@datagrok-libraries/bio 5.3.0 → 5.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/utils/atomic-works.d.ts +2 -0
- package/src/utils/atomic-works.d.ts.map +1 -0
- package/src/utils/atomic-works.js +354 -0
- package/src/utils/const.d.ts +48 -0
- package/src/utils/const.d.ts.map +1 -0
- package/src/utils/const.js +29 -0
- package/src/utils/monomer-library.d.ts +43 -0
- package/src/utils/monomer-library.d.ts.map +1 -0
- package/src/utils/monomer-library.js +154 -0
- package/src/utils/monomer-utils.d.ts +10 -0
- package/src/utils/monomer-utils.d.ts.map +1 -0
- package/src/utils/monomer-utils.js +125 -0
- package/src/utils/notation-converter.d.ts.map +1 -1
- package/src/utils/notation-converter.js +5 -1
- package/src/utils/to-atomic-level.d.ts +3 -0
- package/src/utils/to-atomic-level.d.ts.map +1 -0
- package/src/utils/to-atomic-level.js +1009 -0
- package/src/utils/units-handler.d.ts +4 -0
- package/src/utils/units-handler.d.ts.map +1 -1
- package/src/utils/units-handler.js +6 -8
- package/tsconfig.json +1 -1
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
/** HELM associated sdf libraries with monomer processing*/
|
|
2
|
+
export class MonomerLibrary {
|
|
3
|
+
constructor(sdf) {
|
|
4
|
+
this.monomerFields = [
|
|
5
|
+
'molecule', 'MonomerType', 'MonomerNaturalAnalogCode', 'MonomerName', 'MonomerCode', 'MonomerCaps', 'BranchMonomer',
|
|
6
|
+
];
|
|
7
|
+
this.library = {};
|
|
8
|
+
this.monomers = [];
|
|
9
|
+
const sdfReader = new SDFReader();
|
|
10
|
+
const data = sdfReader.getColls(sdf);
|
|
11
|
+
this.monomerFields.forEach((f) => {
|
|
12
|
+
if (!(f in data))
|
|
13
|
+
throw new Error(`Monomer library was not compiled: ${f} field is absent in provided file`);
|
|
14
|
+
if (data[f].length != data.molecule.length)
|
|
15
|
+
throw new Error(`Monomer library was not compiled: ${f} field is not presented for each monomer`);
|
|
16
|
+
});
|
|
17
|
+
for (let i = 0; i < data.molecule.length; i++) {
|
|
18
|
+
const linkData = this.getLinkData(data.molecule[i], data.MonomerCaps[i], data.MonomerName[i]);
|
|
19
|
+
const entry = {
|
|
20
|
+
mol: data.molecule[i],
|
|
21
|
+
type: 'Peptide',
|
|
22
|
+
code: data.MonomerCode[i],
|
|
23
|
+
analogueCode: data.MonomerNaturalAnalogCode[i],
|
|
24
|
+
linkages: linkData,
|
|
25
|
+
};
|
|
26
|
+
const name = data.MonomerCode[i] !== '.' ? data.MonomerCode[i] : data.MonomerName[i];
|
|
27
|
+
this.library[name] = entry;
|
|
28
|
+
this.monomers.push(name);
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
/** getting full monomer information from monomer library
|
|
32
|
+
* @param {string} name
|
|
33
|
+
* @return {MonomerEntry}
|
|
34
|
+
*/
|
|
35
|
+
getMonomerEntry(name) {
|
|
36
|
+
if (!this.monomers.includes(name))
|
|
37
|
+
throw new Error(`Monomer library do not contain ${name} monomer`);
|
|
38
|
+
return this.library[name];
|
|
39
|
+
}
|
|
40
|
+
/** getting mol as string for monomer
|
|
41
|
+
* @param {string} name
|
|
42
|
+
* @return {string}
|
|
43
|
+
*/
|
|
44
|
+
getMonomerMol(name) {
|
|
45
|
+
if (!this.monomers.includes(name))
|
|
46
|
+
throw new Error(`Monomer library do not contain ${name} monomer`);
|
|
47
|
+
const entry = this.library[name];
|
|
48
|
+
let monomerMol = entry.mol.replace(/M RGP .+\n/, '');
|
|
49
|
+
//order matters
|
|
50
|
+
const links = Object.keys(entry.linkages);
|
|
51
|
+
for (const link of links)
|
|
52
|
+
monomerMol = monomerMol.replace('R#', entry.linkages[link].type + ' ');
|
|
53
|
+
return monomerMol;
|
|
54
|
+
}
|
|
55
|
+
/** getting the list of the minomers available in library*/
|
|
56
|
+
get monomerNames() {
|
|
57
|
+
return this.monomers;
|
|
58
|
+
}
|
|
59
|
+
static get id() {
|
|
60
|
+
return MonomerLibrary.libName;
|
|
61
|
+
}
|
|
62
|
+
getLinkData(mol, caps, name) {
|
|
63
|
+
var _a;
|
|
64
|
+
const rawData = mol.match(/M RGP .+/);
|
|
65
|
+
if (rawData === null)
|
|
66
|
+
throw new Error(`Monomer library was not compiled: ${name} entry has no RGP`);
|
|
67
|
+
const types = {};
|
|
68
|
+
(_a = caps.split('\n')) === null || _a === void 0 ? void 0 : _a.forEach((e) => {
|
|
69
|
+
types[e.match(/\d+/)[0]] = e.match(/(?<=\])\w+/)[0];
|
|
70
|
+
});
|
|
71
|
+
const data = rawData[0].replace('M RGP ', '').split(/\s+/);
|
|
72
|
+
const res = {};
|
|
73
|
+
for (let i = 0; i < parseInt(data[0]); i++) {
|
|
74
|
+
const code = parseInt(data[2 * i + 2]);
|
|
75
|
+
let type = '';
|
|
76
|
+
switch (code) {
|
|
77
|
+
case 1:
|
|
78
|
+
type = 'N-terminal';
|
|
79
|
+
break;
|
|
80
|
+
case 2:
|
|
81
|
+
type = 'C-terminal';
|
|
82
|
+
break;
|
|
83
|
+
case 3:
|
|
84
|
+
type = 'branch';
|
|
85
|
+
break;
|
|
86
|
+
default:
|
|
87
|
+
break;
|
|
88
|
+
}
|
|
89
|
+
res[type] = { atomNumber: parseInt(data[2 * i + 1]), type: types[code] };
|
|
90
|
+
}
|
|
91
|
+
return res;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
MonomerLibrary.libName = 'monomerLibrary';
|
|
95
|
+
//TODO: merge with Chem version
|
|
96
|
+
class SDFReader {
|
|
97
|
+
constructor() {
|
|
98
|
+
this.dataColls = { 'molecule': [] };
|
|
99
|
+
}
|
|
100
|
+
getColls(content) {
|
|
101
|
+
this.read(content);
|
|
102
|
+
return this.dataColls;
|
|
103
|
+
}
|
|
104
|
+
read(content) {
|
|
105
|
+
content = content.replaceAll('\r', ''); //equalize old and new sdf standards
|
|
106
|
+
let startIndex = content.indexOf('$$$$', 0);
|
|
107
|
+
this.parse(content, 0, startIndex, (name, val) => {
|
|
108
|
+
this.dataColls[name] = [];
|
|
109
|
+
this.dataColls[name].push(val);
|
|
110
|
+
});
|
|
111
|
+
startIndex += 5;
|
|
112
|
+
while (startIndex > -1 && startIndex < content.length)
|
|
113
|
+
startIndex = this.readNext(content, startIndex);
|
|
114
|
+
}
|
|
115
|
+
readNext(content, startIndex) {
|
|
116
|
+
const nextStartIndex = content.indexOf('$$$$', startIndex);
|
|
117
|
+
if (nextStartIndex === -1) {
|
|
118
|
+
return -1;
|
|
119
|
+
}
|
|
120
|
+
else {
|
|
121
|
+
this.parse(content, startIndex, nextStartIndex, (name, val) => {
|
|
122
|
+
this.dataColls[name].push(val);
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
if (nextStartIndex > -1)
|
|
126
|
+
return nextStartIndex + 5;
|
|
127
|
+
return nextStartIndex;
|
|
128
|
+
}
|
|
129
|
+
parse(content, start, end, handler) {
|
|
130
|
+
const molEnd = +content.indexOf('M END\n', start) + 7;
|
|
131
|
+
let localEnd = start;
|
|
132
|
+
this.dataColls['molecule'].push(content.substring(start, molEnd));
|
|
133
|
+
start = molEnd;
|
|
134
|
+
while (localEnd < end) {
|
|
135
|
+
start = content.indexOf('> <', localEnd);
|
|
136
|
+
if (start === -1)
|
|
137
|
+
return;
|
|
138
|
+
start += 3;
|
|
139
|
+
localEnd = content.indexOf('>\n', start);
|
|
140
|
+
if (localEnd === -1)
|
|
141
|
+
return;
|
|
142
|
+
const propertyName = content.substring(start, localEnd);
|
|
143
|
+
start = localEnd + 2;
|
|
144
|
+
localEnd = content.indexOf('\n', start);
|
|
145
|
+
if (localEnd === -1)
|
|
146
|
+
localEnd = end;
|
|
147
|
+
else if (content[localEnd + 1] != '\n')
|
|
148
|
+
localEnd = content.indexOf('\n', localEnd + 1);
|
|
149
|
+
handler(propertyName, content.substring(start, localEnd));
|
|
150
|
+
localEnd += 2;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"monomer-library.js","sourceRoot":"","sources":["monomer-library.ts"],"names":[],"mappings":"AASA,2DAA2D;AAC3D,MAAM,OAAO,cAAc;IAWzB,YAAY,GAAW;QARf,kBAAa,GAAa;YAChC,UAAU,EAAE,aAAa,EAAE,0BAA0B,EAAE,aAAa,EAAE,aAAa,EAAE,aAAa,EAAE,eAAe;SACpH,CAAC;QAEM,YAAO,GAAmB,EAAE,CAAC;QAE7B,aAAQ,GAAa,EAAE,CAAC;QAG9B,MAAM,SAAS,GAAG,IAAI,SAAS,EAAE,CAAC;QAClC,MAAM,IAAI,GAAG,SAAS,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;QACrC,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE;YAC/B,IAAI,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;gBACd,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,mCAAmC,CAAC,CAAC;YAE7F,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,MAAM,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM;gBACxC,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,0CAA0C,CAAC,CAAC;QACtG,CAAC,CAAC,CAAC;QAEH,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;YAC7C,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9F,MAAM,KAAK,GAAG;gBACZ,GAAG,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;gBACrB,IAAI,EAAE,SAAS;gBACf,IAAI,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC;gBACzB,YAAY,EAAE,IAAI,CAAC,wBAAwB,CAAC,CAAC,CAAC;gBAC9C,QAAQ,EAAE,QAAQ;aACnB,CAAC;YAEF,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;YACrF,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC;YAC3B,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;SAC1B;IACH,CAAC;IAED;;;OAGG;IACI,eAAe,CAAC,IAAY;QACjC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC;YAC/B,MAAM,IAAI,KAAK,CAAC,kCAAkC,IAAI,UAAU,CAAC,CAAC;QAEpE,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IAC5B,CAAC;IAED;;;OAGG;IACI,aAAa,CAAC,IAAY;QAC/B,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC;YAC/B,MAAM,IAAI,KAAK,CAAC,kCAAkC,IAAI,UAAU,CAAC,CAAC;QAGpE,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QACjC,IAAI,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC,CAAC;QAEvD,eAAe;QACf,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QAC1C,KAAK,MAAM,IAAI,IAAI,KAAK;YACtB,UAAU,GAAG,UAAU,CAAC,OAAO,CAAC,IAAI,EAAE,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,IAAI,GAAG,GAAG,CAAC,CAAC;QAGzE,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,2DAA2D;IAC3D,IAAI,YAAY;QACd,OAAO,IAAI,CAAC,QAAQ,CAAC;IACvB,CAAC;IAED,MAAM,KAAK,EAAE;QACX,OAAO,cAAc,CAAC,OAAO,CAAC;IAChC,CAAC;IAEO,WAAW,CAAC,GAAW,EAAE,IAAY,EAAE,IAAY;;QACzD,MAAM,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;QACxC,IAAI,OAAO,KAAK,IAAI;YAClB,MAAM,IAAI,KAAK,CAAC,qCAAqC,IAAI,mBAAmB,CAAC,CAAC;QAEhF,MAAM,KAAK,GAA+B,EAAE,CAAC;QAC7C,MAAA,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,0CAAE,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE;YAC9B,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAE,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,YAAY,CAAE,CAAC,CAAC,CAAC,CAAC;QACxD,CAAC,CAAC,CAAC;QAEH,MAAM,IAAI,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAC7D,MAAM,GAAG,GAAa,EAAE,CAAC;QACzB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YAC1C,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACvC,IAAI,IAAI,GAAG,EAAE,CAAC;YACd,QAAQ,IAAI,EAAE;gBACd,KAAK,CAAC;oBACJ,IAAI,GAAG,YAAY,CAAC;oBACpB,MAAM;gBACR,KAAK,CAAC;oBACJ,IAAI,GAAG,YAAY,CAAC;oBACpB,MAAM;gBACR,KAAK,CAAC;oBACJ,IAAI,GAAG,QAAQ,CAAC;oBAChB,MAAM;gBACR;oBACE,MAAM;aACP;YACD,GAAG,CAAC,IAAI,CAAC,GAAG,EAAC,UAAU,EAAE,QAAQ,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,EAAC,CAAC;SACxE;QAED,OAAO,GAAG,CAAC;IACb,CAAC;;AA9GM,sBAAO,GAAG,gBAAgB,CAAC;AAiHpC,+BAA+B;AAC/B,MAAM,SAAS;IAGb;QACE,IAAI,CAAC,SAAS,GAAG,EAAC,UAAU,EAAE,EAAE,EAAC,CAAC;IACpC,CAAC;IAED,QAAQ,CAAC,OAAe;QACtB,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACnB,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAED,IAAI,CAAC,OAAe;QAClB,OAAO,GAAG,OAAO,CAAC,UAAU,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,oCAAoC;QAC5E,IAAI,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QAC5C,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,IAAY,EAAE,GAAW,EAAQ,EAAE;YACrE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC;YAC1B,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACjC,CAAC,CAAC,CAAC;QACH,UAAU,IAAI,CAAC,CAAC;QAChB,OAAO,UAAU,GAAG,CAAC,CAAC,IAAI,UAAU,GAAG,OAAO,CAAC,MAAM;YACnD,UAAU,GAAG,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC;IACpD,CAAC;IAED,QAAQ,CAAC,OAAe,EAAE,UAAkB;QAC1C,MAAM,cAAc,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,UAAU,CAAC,CAAC;QAC3D,IAAI,cAAc,KAAK,CAAC,CAAC,EAAE;YACzB,OAAO,CAAC,CAAC,CAAC;SACX;aAAM;YACL,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,UAAU,EAAE,cAAc,EAC5C,CAAC,IAAY,EAAE,GAAW,EAAQ,EAAE;gBAClC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACjC,CAAC,CAAC,CAAC;SACN;QAED,IAAI,cAAc,GAAG,CAAC,CAAC;YACrB,OAAO,cAAc,GAAG,CAAC,CAAC;QAG5B,OAAO,cAAc,CAAC;IACxB,CAAC;IAED,KAAK,CAAC,OAAe,EAAE,KAAa,EAAE,GAAW,EAAE,OAA4C;QAC7F,MAAM,MAAM,GAAG,CAAC,OAAO,CAAC,OAAO,CAAC,UAAU,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC;QACvD,IAAI,QAAQ,GAAG,KAAK,CAAC;QACrB,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC;QAElE,KAAK,GAAG,MAAM,CAAC;QACf,OAAO,QAAQ,GAAG,GAAG,EAAE;YACrB,KAAK,GAAG,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;YACzC,IAAI,KAAK,KAAK,CAAC,CAAC;gBACd,OAAO;YAGT,KAAK,IAAI,CAAC,CAAC;YACX,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;YACzC,IAAI,QAAQ,KAAK,CAAC,CAAC;gBACjB,OAAO;YAGT,MAAM,YAAY,GAAG,OAAO,CAAC,SAAS,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;YACxD,KAAK,GAAG,QAAQ,GAAG,CAAC,CAAC;YAErB,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;YACxC,IAAI,QAAQ,KAAK,CAAC,CAAC;gBACjB,QAAQ,GAAG,GAAG,CAAC;iBACZ,IAAI,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC,IAAI,IAAI;gBACpC,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,IAAI,EAAE,QAAQ,GAAG,CAAC,CAAC,CAAC;YAEjD,OAAO,CAAC,YAAY,EAAE,OAAO,CAAC,SAAS,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC;YAC1D,QAAQ,IAAI,CAAC,CAAC;SACf;IACH,CAAC;CACF","sourcesContent":["export type MonomerEntry = {\n  mol: string,\n  type: string,\n  analogueCode: string,\n  linkages: { [link: string]: { atomNumber: number, type: string } }\n};\nexport type MonomerEntries = { [name: string]: MonomerEntry };\nexport type LinkData = { [link: string]: { atomNumber: number, type: string } };\n\n/** HELM associated sdf libraries with monomer processing*/\nexport class MonomerLibrary {\n  static libName = 'monomerLibrary';\n\n  private monomerFields: string[] = [\n    'molecule', 'MonomerType', 'MonomerNaturalAnalogCode', 'MonomerName', 'MonomerCode', 'MonomerCaps', 'BranchMonomer',\n  ];\n\n  private library: MonomerEntries = {};\n\n  private monomers: string[] = [];\n\n  constructor(sdf: string) {\n    const sdfReader = new SDFReader();\n    const data = sdfReader.getColls(sdf);\n    this.monomerFields.forEach((f) => {\n      if (!(f in data))\n        throw new Error(`Monomer library was not compiled: ${f} field is absent in provided file`);\n\n      if (data[f].length != data.molecule.length)\n        throw new Error(`Monomer library was not compiled: ${f} field is not presented for each monomer`);\n    });\n\n    for (let i = 0; i < data.molecule.length; i++) {\n      const linkData = this.getLinkData(data.molecule[i], data.MonomerCaps[i], data.MonomerName[i]);\n      const entry = {\n        mol: data.molecule[i],\n        type: 'Peptide',\n        code: data.MonomerCode[i],\n        analogueCode: data.MonomerNaturalAnalogCode[i],\n        linkages: linkData,\n      };\n\n      const name = data.MonomerCode[i] !== '.' ? data.MonomerCode[i] : data.MonomerName[i];\n      this.library[name] = entry;\n      this.monomers.push(name);\n    }\n  }\n\n  /** getting full monomer information from monomer library\n   * @param {string} name\n   * @return {MonomerEntry}\n   */\n  public getMonomerEntry(name: string): MonomerEntry {\n    if (!this.monomers.includes(name))\n      throw new Error(`Monomer library do not contain ${name} monomer`);\n\n    return this.library[name];\n  }\n\n  /** getting mol as string for monomer\n   * @param {string} name\n   * @return {string}\n   */\n  public getMonomerMol(name: string): string {\n    if (!this.monomers.includes(name))\n      throw new Error(`Monomer library do not contain ${name} monomer`);\n\n\n    const entry = this.library[name];\n    let monomerMol = entry.mol.replace(/M  RGP  .+\\n/, '');\n\n    //order matters\n    const links = Object.keys(entry.linkages);\n    for (const link of links)\n      monomerMol = monomerMol.replace('R#', entry.linkages[link].type + ' ');\n\n\n    return monomerMol;\n  }\n\n  /** getting the list of the minomers available in library*/\n  get monomerNames(): string[] {\n    return this.monomers;\n  }\n\n  static get id(): string {\n    return MonomerLibrary.libName;\n  }\n\n  private getLinkData(mol: string, caps: string, name: string): LinkData {\n    const rawData = mol.match(/M  RGP  .+/);\n    if (rawData === null)\n      throw new Error(`Monomer library was not compiled: ${name} entry has no RGP`);\n\n    const types: { [code: string]: string } = {};\n    caps.split('\\n')?.forEach((e) => {\n      types[e.match(/\\d+/)![0]] = e.match(/(?<=\\])\\w+/)![0];\n    });\n\n    const data = rawData[0].replace('M  RGP  ', '').split(/\\s+/);\n    const res: LinkData = {};\n    for (let i = 0; i < parseInt(data[0]); i++) {\n      const code = parseInt(data[2 * i + 2]);\n      let type = '';\n      switch (code) {\n      case 1:\n        type = 'N-terminal';\n        break;\n      case 2:\n        type = 'C-terminal';\n        break;\n      case 3:\n        type = 'branch';\n        break;\n      default:\n        break;\n      }\n      res[type] = {atomNumber: parseInt(data[2 * i + 1]), type: types[code]};\n    }\n\n    return res;\n  }\n}\n\n//TODO: merge with Chem version\nclass SDFReader {\n  dataColls: { [_: string]: string [] };\n\n  constructor() {\n    this.dataColls = {'molecule': []};\n  }\n\n  getColls(content: string): { [_: string]: string[] } {\n    this.read(content);\n    return this.dataColls;\n  }\n\n  read(content: string): void {\n    content = content.replaceAll('\\r', ''); //equalize old and new sdf standards\n    let startIndex = content.indexOf('$$$$', 0);\n    this.parse(content, 0, startIndex, (name: string, val: string): void => { // TODO: type\n      this.dataColls[name] = [];\n      this.dataColls[name].push(val);\n    });\n    startIndex += 5;\n    while (startIndex > -1 && startIndex < content.length)\n      startIndex = this.readNext(content, startIndex);\n  }\n\n  readNext(content: string, startIndex: number): number {\n    const nextStartIndex = content.indexOf('$$$$', startIndex);\n    if (nextStartIndex === -1) {\n      return -1;\n    } else {\n      this.parse(content, startIndex, nextStartIndex,\n        (name: string, val: string): void => {\n          this.dataColls[name].push(val);\n        });\n    }\n\n    if (nextStartIndex > -1)\n      return nextStartIndex + 5;\n\n\n    return nextStartIndex;\n  }\n\n  parse(content: string, start: number, end: number, handler: (name: string, val: string) => void): void {\n    const molEnd = +content.indexOf('M  END\\n', start) + 7;\n    let localEnd = start;\n    this.dataColls['molecule'].push(content.substring(start, molEnd));\n\n    start = molEnd;\n    while (localEnd < end) {\n      start = content.indexOf('> <', localEnd);\n      if (start === -1)\n        return;\n\n\n      start += 3;\n      localEnd = content.indexOf('>\\n', start);\n      if (localEnd === -1)\n        return;\n\n\n      const propertyName = content.substring(start, localEnd);\n      start = localEnd + 2;\n\n      localEnd = content.indexOf('\\n', start);\n      if (localEnd === -1)\n        localEnd = end;\n      else if (content[localEnd + 1] != '\\n')\n        localEnd = content.indexOf('\\n', localEnd + 1);\n\n      handler(propertyName, content.substring(start, localEnd));\n      localEnd += 2;\n    }\n  }\n}\n"]}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
export declare const HELM_CORE_LIB_FILENAME = "/data/HELMCoreLibrary.json";
|
|
3
|
+
export declare function encodeMonomers(col: DG.Column): DG.Column | null;
|
|
4
|
+
export declare function getMolfilesFromSeq(col: DG.Column, monomersLibObject: any[]): any[][] | null;
|
|
5
|
+
export declare function getMolfilesFromSingleSeq(cell: DG.Cell, monomersLibObject: any[]): any[][] | null;
|
|
6
|
+
export declare function createMomomersMolDict(lib: any[]): {
|
|
7
|
+
[key: string]: string | any;
|
|
8
|
+
};
|
|
9
|
+
export declare function createJsonMonomerLibFromSdf(table: DG.DataFrame): any;
|
|
10
|
+
//# sourceMappingURL=monomer-utils.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"monomer-utils.d.ts","sourceRoot":"","sources":["monomer-utils.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAUtC,eAAO,MAAM,sBAAsB,+BAA+B,CAAC;AAEnE,wBAAgB,cAAc,CAAC,GAAG,EAAE,EAAE,CAAC,MAAM,GAAG,EAAE,CAAC,MAAM,GAAG,IAAI,CAwB/D;AAED,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,EAAE,CAAC,MAAM,EAAE,iBAAiB,EAAE,GAAG,EAAE,GAAG,GAAG,EAAE,EAAE,GAAG,IAAI,CAuB3F;AAED,wBAAgB,wBAAwB,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,EAAE,iBAAiB,EAAE,GAAG,EAAE,GAAG,GAAG,EAAE,EAAE,GAAG,IAAI,CAoBhG;AAED,wBAAgB,qBAAqB,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG;IAAE,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,GAAG,GAAG,CAAA;CAAE,CAYjF;AAED,wBAAgB,2BAA2B,CAAC,KAAK,EAAE,EAAE,CAAC,SAAS,GAAG,GAAG,CA8BpE"}
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
// import * as ui from 'datagrok-api/ui';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
// import {WebLogo, SplitterFunc} from '../../src/viewers/web-logo';
|
|
5
|
+
import { HELM_CORE_FIELDS, jsonSdfMonomerLibDict, MONOMER_ENCODE_MAX, MONOMER_ENCODE_MIN, SDF_MONOMER_NAME } from './const';
|
|
6
|
+
// import {UnitsHandler} from './units-handler';
|
|
7
|
+
import * as bio from '../../index';
|
|
8
|
+
export const HELM_CORE_LIB_FILENAME = '/data/HELMCoreLibrary.json';
|
|
9
|
+
export function encodeMonomers(col) {
|
|
10
|
+
let encodeSymbol = MONOMER_ENCODE_MIN;
|
|
11
|
+
const monomerSymbolDict = {};
|
|
12
|
+
const units = col.tags[DG.TAGS.UNITS];
|
|
13
|
+
const sep = col.getTag("separator" /* bio.TAGS.separator */);
|
|
14
|
+
const splitterFunc = bio.getSplitter(units, sep);
|
|
15
|
+
const encodedStringArray = [];
|
|
16
|
+
for (let i = 0; i < col.length; ++i) {
|
|
17
|
+
let encodedMonomerStr = '';
|
|
18
|
+
const monomers = splitterFunc(col.get(i));
|
|
19
|
+
monomers.forEach((m) => {
|
|
20
|
+
if (!monomerSymbolDict[m]) {
|
|
21
|
+
if (encodeSymbol > MONOMER_ENCODE_MAX) {
|
|
22
|
+
grok.shell.error(`Not enough symbols to encode monomers`);
|
|
23
|
+
return null;
|
|
24
|
+
}
|
|
25
|
+
monomerSymbolDict[m] = encodeSymbol;
|
|
26
|
+
encodeSymbol++;
|
|
27
|
+
}
|
|
28
|
+
encodedMonomerStr += String.fromCodePoint(monomerSymbolDict[m]);
|
|
29
|
+
});
|
|
30
|
+
encodedStringArray.push(encodedMonomerStr);
|
|
31
|
+
}
|
|
32
|
+
return DG.Column.fromStrings('encodedMolecules', encodedStringArray);
|
|
33
|
+
}
|
|
34
|
+
export function getMolfilesFromSeq(col, monomersLibObject) {
|
|
35
|
+
const units = col.tags[DG.TAGS.UNITS];
|
|
36
|
+
const sep = col.getTag('separator');
|
|
37
|
+
const splitterFunc = bio.getSplitter(units, sep);
|
|
38
|
+
const monomersDict = createMomomersMolDict(monomersLibObject);
|
|
39
|
+
const molFiles = [];
|
|
40
|
+
for (let i = 0; i < col.length; ++i) {
|
|
41
|
+
const macroMolecule = col.get(i);
|
|
42
|
+
const monomers = splitterFunc(macroMolecule);
|
|
43
|
+
const molFilesForSeq = [];
|
|
44
|
+
for (let j = 0; j < monomers.length; ++j) {
|
|
45
|
+
if (monomers[j]) {
|
|
46
|
+
if (!monomersDict[monomers[j]]) {
|
|
47
|
+
grok.shell.warning(`Monomer ${monomers[j]} is missing in HELM library. Structure cannot be created`);
|
|
48
|
+
return null;
|
|
49
|
+
}
|
|
50
|
+
// what is the reason of double conversion?
|
|
51
|
+
molFilesForSeq.push(JSON.parse(JSON.stringify(monomersDict[monomers[j]])));
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
molFiles.push(molFilesForSeq);
|
|
55
|
+
}
|
|
56
|
+
return molFiles;
|
|
57
|
+
}
|
|
58
|
+
export function getMolfilesFromSingleSeq(cell, monomersLibObject) {
|
|
59
|
+
const units = cell.column.tags[DG.TAGS.UNITS];
|
|
60
|
+
const sep = cell.column.getTag('separator');
|
|
61
|
+
const splitterFunc = bio.getSplitter(units, sep);
|
|
62
|
+
const monomersDict = createMomomersMolDict(monomersLibObject);
|
|
63
|
+
const molFiles = [];
|
|
64
|
+
const macroMolecule = cell.value;
|
|
65
|
+
const monomers = splitterFunc(macroMolecule);
|
|
66
|
+
const molFilesForSeq = [];
|
|
67
|
+
for (let j = 0; j < monomers.length; ++j) {
|
|
68
|
+
if (monomers[j]) {
|
|
69
|
+
if (!monomersDict[monomers[j]]) {
|
|
70
|
+
grok.shell.warning(`Monomer ${monomers[j]} is missing in HELM library. Structure cannot be created`);
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
73
|
+
molFilesForSeq.push(JSON.parse(JSON.stringify(monomersDict[monomers[j]])));
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
molFiles.push(molFilesForSeq);
|
|
77
|
+
return molFiles;
|
|
78
|
+
}
|
|
79
|
+
export function createMomomersMolDict(lib) {
|
|
80
|
+
const dict = {};
|
|
81
|
+
lib.forEach((it) => {
|
|
82
|
+
if (it['polymerType'] === 'PEPTIDE') {
|
|
83
|
+
const monomerObject = {};
|
|
84
|
+
HELM_CORE_FIELDS.forEach((field) => {
|
|
85
|
+
monomerObject[field] = it[field];
|
|
86
|
+
});
|
|
87
|
+
dict[it["symbol" /* HELM_FIELDS.SYMBOL */]] = monomerObject;
|
|
88
|
+
}
|
|
89
|
+
});
|
|
90
|
+
return dict;
|
|
91
|
+
}
|
|
92
|
+
export function createJsonMonomerLibFromSdf(table) {
|
|
93
|
+
const resultLib = [];
|
|
94
|
+
for (let i = 0; i < table.rowCount; i++) {
|
|
95
|
+
const monomer = {};
|
|
96
|
+
Object.keys(jsonSdfMonomerLibDict).forEach((key) => {
|
|
97
|
+
if (key === "symbol" /* HELM_FIELDS.SYMBOL */) {
|
|
98
|
+
const monomerSymbol = table.get(jsonSdfMonomerLibDict[key], i);
|
|
99
|
+
monomer[key] = monomerSymbol === '.' ? table.get(SDF_MONOMER_NAME, i) : monomerSymbol;
|
|
100
|
+
}
|
|
101
|
+
else if (key === "rgroups" /* HELM_FIELDS.RGROUPS */) {
|
|
102
|
+
const rgroups = table.get(jsonSdfMonomerLibDict[key], i).split('\n');
|
|
103
|
+
const jsonRgroups = [];
|
|
104
|
+
rgroups.forEach((g) => {
|
|
105
|
+
const rgroup = {};
|
|
106
|
+
const altAtom = g.substring(g.lastIndexOf(']') + 1);
|
|
107
|
+
const radicalNum = g.match(/\[R(\d+)\]/)[1];
|
|
108
|
+
rgroup["capGroupSmiles" /* RGROUP_FIELDS.CAP_GROUP_SMILES */] = altAtom === 'H' ? `[*:${radicalNum}][H]` : `O[*:${radicalNum}]`;
|
|
109
|
+
rgroup["alternateId" /* RGROUP_FIELDS.ALTER_ID */] = altAtom === 'H' ? `R${radicalNum}-H` : `R${radicalNum}-OH`;
|
|
110
|
+
rgroup["capGroupName" /* RGROUP_FIELDS.CAP_GROUP_NAME */] = altAtom === 'H' ? `H` : `OH`;
|
|
111
|
+
rgroup["label" /* RGROUP_FIELDS.LABEL */] = `R${radicalNum}`;
|
|
112
|
+
jsonRgroups.push(rgroup);
|
|
113
|
+
});
|
|
114
|
+
monomer[key] = jsonRgroups;
|
|
115
|
+
}
|
|
116
|
+
else {
|
|
117
|
+
if (jsonSdfMonomerLibDict[key])
|
|
118
|
+
monomer[key] = table.get(jsonSdfMonomerLibDict[key], i);
|
|
119
|
+
}
|
|
120
|
+
});
|
|
121
|
+
resultLib.push(monomer);
|
|
122
|
+
}
|
|
123
|
+
return resultLib;
|
|
124
|
+
}
|
|
125
|
+
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"monomer-utils.js","sourceRoot":"","sources":["monomer-utils.ts"],"names":[],"mappings":"AAAA,yCAAyC;AACzC,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AACtC,OAAO,KAAK,IAAI,MAAM,mBAAmB,CAAC;AAE1C,oEAAoE;AACpE,OAAO,EAAc,gBAAgB,EAAiB,qBAAqB,EACzE,kBAAkB,EAAE,kBAAkB,EAAE,gBAAgB,EAAC,MAAM,SAAS,CAAC;AAC3E,gDAAgD;AAEhD,OAAO,KAAK,GAAG,MAAM,aAAa,CAAC;AAEnC,MAAM,CAAC,MAAM,sBAAsB,GAAG,4BAA4B,CAAC;AAEnE,MAAM,UAAU,cAAc,CAAC,GAAc;IAC3C,IAAI,YAAY,GAAG,kBAAkB,CAAC;IACtC,MAAM,iBAAiB,GAA8B,EAAE,CAAC;IACxD,MAAM,KAAK,GAAG,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACtC,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,sCAAoB,CAAC;IAC3C,MAAM,YAAY,GAAqB,GAAG,CAAC,WAAW,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;IACnE,MAAM,kBAAkB,GAAG,EAAE,CAAC;IAC9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,EAAE,CAAC,EAAE;QACnC,IAAI,iBAAiB,GAAG,EAAE,CAAC;QAC3B,MAAM,QAAQ,GAAG,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1C,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE;YACrB,IAAI,CAAC,iBAAiB,CAAC,CAAC,CAAC,EAAE;gBACzB,IAAI,YAAY,GAAG,kBAAkB,EAAE;oBACrC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,uCAAuC,CAAC,CAAC;oBAC1D,OAAO,IAAI,CAAC;iBACb;gBACD,iBAAiB,CAAC,CAAC,CAAC,GAAG,YAAY,CAAC;gBACpC,YAAY,EAAE,CAAC;aAChB;YACD,iBAAiB,IAAI,MAAM,CAAC,aAAa,CAAC,iBAAiB,CAAC,CAAC,CAAC,CAAC,CAAC;QAClE,CAAC,CAAC,CAAC;QACH,kBAAkB,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;KAC5C;IACD,OAAO,EAAE,CAAC,MAAM,CAAC,WAAW,CAAC,kBAAkB,EAAE,kBAAkB,CAAC,CAAC;AACvE,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,GAAc,EAAE,iBAAwB;IACzE,MAAM,KAAK,GAAG,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACtC,MAAM,GAAG,GAAG,GAAG,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;IACpC,MAAM,YAAY,GAAqB,GAAG,CAAC,WAAW,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;IACnE,MAAM,YAAY,GAAG,qBAAqB,CAAC,iBAAiB,CAAC,CAAC;IAC9D,MAAM,QAAQ,GAAG,EAAE,CAAC;IACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,EAAE,CAAC,EAAE;QACnC,MAAM,aAAa,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QACjC,MAAM,QAAQ,GAAG,YAAY,CAAC,aAAa,CAAC,CAAC;QAC7C,MAAM,cAAc,GAAG,EAAE,CAAC;QAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC,EAAE;YACxC,IAAI,QAAQ,CAAC,CAAC,CAAC,EAAE;gBACf,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE;oBAC9B,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,WAAW,QAAQ,CAAC,CAAC,CAAC,0DAA0D,CAAC,CAAC;oBACrG,OAAO,IAAI,CAAC;iBACb;gBACD,2CAA2C;gBAC3C,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;aAC5E;SACF;QACD,QAAQ,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;KAC/B;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,wBAAwB,CAAC,IAAa,EAAE,iBAAwB;IAC9E,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC9C,MAAM,GAAG,GAAG,IAAI,CAAC,MAAO,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;IAC7C,MAAM,YAAY,GAAqB,GAAG,CAAC,WAAW,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;IACnE,MAAM,YAAY,GAAG,qBAAqB,CAAC,iBAAiB,CAAC,CAAC;IAC9D,MAAM,QAAQ,GAAG,EAAE,CAAC;IACpB,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,CAAC;IACjC,MAAM,QAAQ,GAAG,YAAY,CAAC,aAAa,CAAC,CAAC;IAC7C,MAAM,cAAc,GAAG,EAAE,CAAC;IAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,EAAE,CAAC,EAAE;QACxC,IAAI,QAAQ,CAAC,CAAC,CAAC,EAAE;YACf,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE;gBAC9B,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,WAAW,QAAQ,CAAC,CAAC,CAAC,0DAA0D,CAAC,CAAC;gBACrG,OAAO,IAAI,CAAC;aACb;YACD,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;SAC5E;KACF;IACD,QAAQ,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;IAC9B,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,GAAU;IAC9C,MAAM,IAAI,GAAoC,EAAE,CAAC;IACjD,GAAG,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,EAAE;QACjB,IAAI,EAAE,CAAC,aAAa,CAAC,KAAK,SAAS,EAAE;YACnC,MAAM,aAAa,GAA2B,EAAE,CAAC;YACjD,gBAAgB,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,EAAE;gBACjC,aAAa,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC;YACnC,CAAC,CAAC,CAAC;YACH,IAAI,CAAC,EAAE,mCAAoB,CAAC,GAAG,aAAa,CAAC;SAC9C;IACH,CAAC,CAAC,CAAC;IACH,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,UAAU,2BAA2B,CAAC,KAAmB;IAC7D,MAAM,SAAS,GAAG,EAAE,CAAC;IACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,QAAQ,EAAE,CAAC,EAAE,EAAE;QACvC,MAAM,OAAO,GAAoC,EAAE,CAAC;QACpD,MAAM,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,EAAE;YACjD,IAAI,GAAG,sCAAuB,EAAE;gBAC9B,MAAM,aAAa,GAAG,KAAK,CAAC,GAAG,CAAC,qBAAqB,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC/D,OAAO,CAAC,GAAG,CAAC,GAAG,aAAa,KAAK,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,gBAAgB,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC;aACvF;iBAAM,IAAI,GAAG,wCAAwB,EAAE;gBACtC,MAAM,OAAO,GAAG,KAAK,CAAC,GAAG,CAAC,qBAAqB,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACrE,MAAM,WAAW,GAAU,EAAE,CAAC;gBAC9B,OAAO,CAAC,OAAO,CAAC,CAAC,CAAS,EAAE,EAAE;oBAC5B,MAAM,MAAM,GAAoC,EAAE,CAAC;oBACnD,MAAM,OAAO,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;oBACpD,MAAM,UAAU,GAAG,CAAC,CAAC,KAAK,CAAC,YAAY,CAAE,CAAC,CAAC,CAAC,CAAC;oBAC7C,MAAM,uDAAgC,GAAG,OAAO,KAAK,GAAG,CAAC,CAAC,CAAC,MAAM,UAAU,MAAM,CAAC,CAAC,CAAC,OAAO,UAAU,GAAG,CAAC;oBACzG,MAAM,4CAAwB,GAAG,OAAO,KAAK,GAAG,CAAC,CAAC,CAAC,IAAI,UAAU,IAAI,CAAC,CAAC,CAAC,IAAI,UAAU,KAAK,CAAC;oBAC5F,MAAM,mDAA8B,GAAG,OAAO,KAAK,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC;oBACpE,MAAM,mCAAqB,GAAG,IAAI,UAAU,EAAE,CAAC;oBAC/C,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBAC3B,CAAC,CAAC,CAAC;gBACH,OAAO,CAAC,GAAG,CAAC,GAAG,WAAW,CAAC;aAC5B;iBAAM;gBACL,IAAK,qBAAyD,CAAC,GAAG,CAAC;oBACjE,OAAO,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,GAAG,CAAE,qBAAyD,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;aAChG;QACH,CAAC,CAAC,CAAC;QACH,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;KACzB;IACD,OAAO,SAAS,CAAC;AACnB,CAAC","sourcesContent":["// import * as ui from 'datagrok-api/ui';\nimport * as DG from 'datagrok-api/dg';\nimport * as grok from 'datagrok-api/grok';\n\n// import {WebLogo, SplitterFunc} from '../../src/viewers/web-logo';\nimport {HELM_FIELDS, HELM_CORE_FIELDS, RGROUP_FIELDS, jsonSdfMonomerLibDict,\n  MONOMER_ENCODE_MAX, MONOMER_ENCODE_MIN, SDF_MONOMER_NAME} from './const';\n// import {UnitsHandler} from './units-handler';\n\nimport * as bio from '../../index';\n\nexport const HELM_CORE_LIB_FILENAME = '/data/HELMCoreLibrary.json';\n\nexport function encodeMonomers(col: DG.Column): DG.Column | null {\n  let encodeSymbol = MONOMER_ENCODE_MIN;\n  const monomerSymbolDict: { [key: string]: number } = {};\n  const units = col.tags[DG.TAGS.UNITS];\n  const sep = col.getTag(bio.TAGS.separator);\n  const splitterFunc: bio.SplitterFunc = bio.getSplitter(units, sep);\n  const encodedStringArray = [];\n  for (let i = 0; i < col.length; ++i) {\n    let encodedMonomerStr = '';\n    const monomers = splitterFunc(col.get(i));\n    monomers.forEach((m) => {\n      if (!monomerSymbolDict[m]) {\n        if (encodeSymbol > MONOMER_ENCODE_MAX) {\n          grok.shell.error(`Not enough symbols to encode monomers`);\n          return null;\n        }\n        monomerSymbolDict[m] = encodeSymbol;\n        encodeSymbol++;\n      }\n      encodedMonomerStr += String.fromCodePoint(monomerSymbolDict[m]);\n    });\n    encodedStringArray.push(encodedMonomerStr);\n  }\n  return DG.Column.fromStrings('encodedMolecules', encodedStringArray);\n}\n\nexport function getMolfilesFromSeq(col: DG.Column, monomersLibObject: any[]): any[][] | null {\n  const units = col.tags[DG.TAGS.UNITS];\n  const sep = col.getTag('separator');\n  const splitterFunc: bio.SplitterFunc = bio.getSplitter(units, sep);\n  const monomersDict = createMomomersMolDict(monomersLibObject);\n  const molFiles = [];\n  for (let i = 0; i < col.length; ++i) {\n    const macroMolecule = col.get(i);\n    const monomers = splitterFunc(macroMolecule);\n    const molFilesForSeq = [];\n    for (let j = 0; j < monomers.length; ++j) {\n      if (monomers[j]) {\n        if (!monomersDict[monomers[j]]) {\n          grok.shell.warning(`Monomer ${monomers[j]} is missing in HELM library. Structure cannot be created`);\n          return null;\n        }\n        // what is the reason of double conversion?\n        molFilesForSeq.push(JSON.parse(JSON.stringify(monomersDict[monomers[j]])));\n      }\n    }\n    molFiles.push(molFilesForSeq);\n  }\n  return molFiles;\n}\n\nexport function getMolfilesFromSingleSeq(cell: DG.Cell, monomersLibObject: any[]): any[][] | null {\n  const units = cell.column.tags[DG.TAGS.UNITS];\n  const sep = cell.column!.getTag('separator');\n  const splitterFunc: bio.SplitterFunc = bio.getSplitter(units, sep);\n  const monomersDict = createMomomersMolDict(monomersLibObject);\n  const molFiles = [];\n  const macroMolecule = cell.value;\n  const monomers = splitterFunc(macroMolecule);\n  const molFilesForSeq = [];\n  for (let j = 0; j < monomers.length; ++j) {\n    if (monomers[j]) {\n      if (!monomersDict[monomers[j]]) {\n        grok.shell.warning(`Monomer ${monomers[j]} is missing in HELM library. Structure cannot be created`);\n        return null;\n      }\n      molFilesForSeq.push(JSON.parse(JSON.stringify(monomersDict[monomers[j]])));\n    }\n  }\n  molFiles.push(molFilesForSeq);\n  return molFiles;\n}\n\nexport function createMomomersMolDict(lib: any[]): { [key: string]: string | any } {\n  const dict: { [key: string]: string | any } = {};\n  lib.forEach((it) => {\n    if (it['polymerType'] === 'PEPTIDE') {\n      const monomerObject: { [key: string]: any } = {};\n      HELM_CORE_FIELDS.forEach((field) => {\n        monomerObject[field] = it[field];\n      });\n      dict[it[HELM_FIELDS.SYMBOL]] = monomerObject;\n    }\n  });\n  return dict;\n}\n\nexport function createJsonMonomerLibFromSdf(table: DG.DataFrame): any {\n  const resultLib = [];\n  for (let i = 0; i < table.rowCount; i++) {\n    const monomer: { [key: string]: string | any } = {};\n    Object.keys(jsonSdfMonomerLibDict).forEach((key) => {\n      if (key === HELM_FIELDS.SYMBOL) {\n        const monomerSymbol = table.get(jsonSdfMonomerLibDict[key], i);\n        monomer[key] = monomerSymbol === '.' ? table.get(SDF_MONOMER_NAME, i) : monomerSymbol;\n      } else if (key === HELM_FIELDS.RGROUPS) {\n        const rgroups = table.get(jsonSdfMonomerLibDict[key], i).split('\\n');\n        const jsonRgroups: any[] = [];\n        rgroups.forEach((g: string) => {\n          const rgroup: { [key: string]: string | any } = {};\n          const altAtom = g.substring(g.lastIndexOf(']') + 1);\n          const radicalNum = g.match(/\\[R(\\d+)\\]/)![1];\n          rgroup[RGROUP_FIELDS.CAP_GROUP_SMILES] = altAtom === 'H' ? `[*:${radicalNum}][H]` : `O[*:${radicalNum}]`;\n          rgroup[RGROUP_FIELDS.ALTER_ID] = altAtom === 'H' ? `R${radicalNum}-H` : `R${radicalNum}-OH`;\n          rgroup[RGROUP_FIELDS.CAP_GROUP_NAME] = altAtom === 'H' ? `H` : `OH`;\n          rgroup[RGROUP_FIELDS.LABEL] = `R${radicalNum}`;\n          jsonRgroups.push(rgroup);\n        });\n        monomer[key] = jsonRgroups;\n      } else {\n        if ((jsonSdfMonomerLibDict as { [key: string]: string | any })[key])\n          monomer[key] = table.get((jsonSdfMonomerLibDict as { [key: string]: string | any })[key], i);\n      }\n    });\n    resultLib.push(monomer);\n  }\n  return resultLib;\n}\n"]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"notation-converter.d.ts","sourceRoot":"","sources":["notation-converter.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAEtC,OAAO,EAAC,YAAY,EAAC,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAiC,QAAQ,EAAe,YAAY,EAAO,MAAM,iBAAiB,CAAC;AAE1G,iFAAiF;AACjF,qBAAa,iBAAkB,SAAQ,YAAY;IACjD,OAAO,CAAC,SAAS,CAA6B;IAE9C,SAAS,KAAK,QAAQ,IAAI,YAAY,CAIrC;IAEM,OAAO,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO;IAE1C,WAAW,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO;IAE9C,MAAM,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO;IAEhD;;;;;;OAMG;IACH,OAAO,CAAC,uBAAuB;
|
|
1
|
+
{"version":3,"file":"notation-converter.d.ts","sourceRoot":"","sources":["notation-converter.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAEtC,OAAO,EAAC,YAAY,EAAC,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAiC,QAAQ,EAAe,YAAY,EAAO,MAAM,iBAAiB,CAAC;AAE1G,iFAAiF;AACjF,qBAAa,iBAAkB,SAAQ,YAAY;IACjD,OAAO,CAAC,SAAS,CAA6B;IAE9C,SAAS,KAAK,QAAQ,IAAI,YAAY,CAIrC;IAEM,OAAO,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO;IAE1C,WAAW,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO;IAE9C,MAAM,CAAC,cAAc,EAAE,QAAQ,GAAG,OAAO;IAEhD;;;;;;OAMG;IACH,OAAO,CAAC,uBAAuB;IAoB/B;;;;;OAKG;IACH,OAAO,CAAC,eAAe;IAiBvB,OAAO,CAAC,mBAAmB;IAkB3B;;;;;;;OAOG;IACI,mBAAmB,CACxB,aAAa,EAAE,MAAM,EACrB,eAAe,GAAE,MAAM,GAAG,IAAW,GACpC,MAAM;IAOT;;;;;OAKG;IACH,OAAO,CAAC,aAAa;IAgBrB;;;;;OAKG;IACH,OAAO,CAAC,uBAAuB;IA6B/B;;;;;;;;OAQG;IACH,OAAO,CAAC,WAAW;IAwDnB,OAAO,CAAC,sBAAsB;IAK9B;;;;;OAKG;IACI,OAAO,CAAC,WAAW,EAAE,QAAQ,EAAE,YAAY,GAAE,MAAM,GAAG,IAAW,GAAG,EAAE,CAAC,MAAM;gBAmBjE,GAAG,EAAE,EAAE,CAAC,MAAM;CAGlC"}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
1
2
|
import { UnitsHandler } from './units-handler';
|
|
2
3
|
import { getSplitterForColumn, getStats } from './macromolecule';
|
|
3
4
|
/** Class for handling conversion of notation systems in Macromolecule columns */
|
|
@@ -35,6 +36,7 @@ export class NotationConverter extends UnitsHandler {
|
|
|
35
36
|
}
|
|
36
37
|
return fastaMonomersArray.join(separator);
|
|
37
38
|
});
|
|
39
|
+
newColumn.setTag(DG.TAGS.UNITS, "separator" /* NOTATION.SEPARATOR */);
|
|
38
40
|
newColumn.setTag("separator" /* TAGS.separator */, separator);
|
|
39
41
|
return newColumn;
|
|
40
42
|
}
|
|
@@ -98,6 +100,7 @@ export class NotationConverter extends UnitsHandler {
|
|
|
98
100
|
const sourcePolymer = this.column.get(idx);
|
|
99
101
|
return this.convertToHelmHelper(sourcePolymer, sourceGapSymbol, prefix, leftWrapper, rightWrapper, postfix);
|
|
100
102
|
});
|
|
103
|
+
newColumn.setTag(DG.TAGS.UNITS, "helm" /* NOTATION.HELM */);
|
|
101
104
|
return newColumn;
|
|
102
105
|
}
|
|
103
106
|
/**
|
|
@@ -132,6 +135,7 @@ export class NotationConverter extends UnitsHandler {
|
|
|
132
135
|
}
|
|
133
136
|
return fastaMonomersArray.join('');
|
|
134
137
|
});
|
|
138
|
+
newColumn.setTag(DG.TAGS.UNITS, "fasta" /* NOTATION.FASTA */);
|
|
135
139
|
return newColumn;
|
|
136
140
|
}
|
|
137
141
|
/**
|
|
@@ -218,4 +222,4 @@ export class NotationConverter extends UnitsHandler {
|
|
|
218
222
|
return this.convertHelm(tgtNotation, tgtSeparator);
|
|
219
223
|
}
|
|
220
224
|
}
|
|
221
|
-
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"notation-converter.js","sourceRoot":"","sources":["notation-converter.ts"],"names":[],"mappings":"AAKA,OAAO,EAAC,YAAY,EAAC,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAC,oBAAoB,EAAE,QAAQ,EAA4C,MAAM,iBAAiB,CAAC;AAE1G,iFAAiF;AACjF,MAAM,OAAO,iBAAkB,SAAQ,YAAY;IA0PjD,YAAmB,GAAc;QAC/B,KAAK,CAAC,GAAG,CAAC,CAAC;QA1PL,cAAS,GAAwB,IAAI,CAAC;IA2P9C,CAAC;IAzPD,IAAc,QAAQ;QACpB,IAAI,IAAI,CAAC,SAAS,KAAK,IAAI;YACzB,IAAI,CAAC,SAAS,GAAG,oBAAoB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACrD,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAEM,OAAO,CAAC,cAAwB,IAAa,OAAO,cAAc,iCAAmB,CAAC,CAAC,CAAC;IAExF,WAAW,CAAC,cAAwB,IAAa,OAAO,cAAc,yCAAuB,CAAC,CAAC,CAAC;IAEhG,MAAM,CAAC,cAAwB,IAAa,OAAO,cAAc,+BAAkB,CAAC,CAAC,CAAC;IAE7F;;;;;;OAMG;IACK,uBAAuB,CAAC,SAAiB,EAAE,iBAAgC,IAAI;QACrF,IAAI,cAAc,KAAK,IAAI;YACzB,cAAc,GAAG,IAAI,CAAC,gBAAgB,CAAC;QAEzC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,sCAAoB,CAAC;QACxD,sDAAsD;QACtD,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAC1C,MAAM,kBAAkB,GAAG,IAAI,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC;YACvD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,kBAAkB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAClD,IAAI,kBAAkB,CAAC,CAAC,CAAC,KAAK,cAAc;oBAC1C,kBAAkB,CAAC,CAAC,CAAC,GAAG,YAAY,CAAC,sBAAsB,CAAC,SAAS,CAAC;aACzE;YACD,OAAO,kBAAkB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAC5C,CAAC,CAAC,CAAC;QACH,SAAS,CAAC,MAAM,mCAAiB,SAAS,CAAC,CAAC;QAC5C,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;OAKG;IACK,eAAe;QACrB,MAAM,MAAM,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;YACvC,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;gBACxB,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;oBAChC,SAAS,CAAC,CAAC,6CAA6C;QAE9D,IAAI,MAAM,KAAK,SAAS;YACtB,MAAM,IAAI,KAAK,CAAC,iCAAiC,CAAC,CAAC;QAErD,MAAM,OAAO,GAAG,MAAM,CAAC;QACvB,MAAM,WAAW,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YACzC,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,0BAA0B;QACxD,MAAM,YAAY,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,0BAA0B;QAC3F,OAAO,CAAC,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;IACtD,CAAC;IAED,mDAAmD;IAC3C,mBAAmB,CACzB,aAAqB,EACrB,eAAuB,EACvB,MAAc,EACd,WAAmB,EACnB,YAAoB,EACpB,OAAe;QAEf,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAClD,MAAM,gBAAgB,GAAa,YAAY,CAAC,GAAG,CAAC,CAAC,EAAU,EAAE,EAAE;YACjE,IAAI,EAAE,KAAK,eAAe;gBACxB,OAAO,YAAY,CAAC,sBAAsB,CAAC,IAAI,CAAC;;gBAEhD,OAAO,GAAG,WAAW,GAAG,EAAE,GAAG,YAAY,EAAE,CAAC;QAChD,CAAC,CAAC,CAAC;QACH,OAAO,GAAG,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,OAAO,EAAE,CAAC;IAC5D,CAAC;IAED;;;;;;;OAOG;IACI,mBAAmB,CACxB,aAAqB,EACrB,kBAAiC,IAAI;QAErC,IAAI,eAAe,KAAK,IAAI;YAC1B,eAAe,GAAG,IAAI,CAAC,gBAAgB,CAAC;QAC1C,MAAM,CAAC,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC,eAAe,EAAE,CAAC;QAC5E,OAAO,IAAI,CAAC,mBAAmB,CAAC,aAAa,EAAE,eAAe,EAAE,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;IAC9G,CAAC;IAED;;;;;OAKG;IACK,aAAa,CAAC,kBAAiC,IAAI;QACzD,IAAI,eAAe,KAAK,IAAI;YAC1B,eAAe,GAAG,IAAI,CAAC,gBAAgB,CAAC;QAE1C,MAAM,CAAC,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC,eAAe,EAAE,CAAC;QAE5E,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,4BAAe,CAAC;QACnD,wCAAwC;QACxC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAC3C,OAAO,IAAI,CAAC,mBAAmB,CAAC,aAAa,EAAE,eAAgB,EAAE,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;QAC/G,CAAC,CAAC,CAAC;QACH,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;OAKG;IACK,uBAAuB,CAAC,iBAAgC,IAAI;QAClE,IAAI,cAAc,KAAK,IAAI;YACzB,cAAc,GAAG,YAAY,CAAC,sBAAsB,CAAC,KAAK,CAAC;QAE7D,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,8BAAgB,CAAC;QACpD,wCAAwC;QACxC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAC9C,sCAAsC;YACtC,MAAM,mBAAmB,GAAG,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,CAAC;YAC5D,MAAM,kBAAkB,GAAa,EAAE,CAAC;YACxC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,mBAAmB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACnD,MAAM,IAAI,GAAG,mBAAmB,CAAC,CAAC,CAAC,CAAC;gBACpC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE;oBACrB,kBAAkB,CAAC,IAAI,CAAC,cAAe,CAAC,CAAC;iBAC1C;qBAAM,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;oBAC1B,wCAAwC;oBACxC,MAAM,OAAO,GAAG,GAAG,GAAG,IAAI,GAAG,GAAG,CAAC;oBACjC,kBAAkB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;iBAClC;qBAAM;oBACL,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;iBAC/B;aACF;YACD,OAAO,kBAAkB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACrC,CAAC,CAAC,CAAC;QACH,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;;;;OAQG;IACK,WAAW,CACjB,WAAmB,EACnB,eAAuB,EAAE,EACzB,eAA8B,IAAI;QAElC,mEAAmE;QACnE,wEAAwE;QACxE,IAAI,YAAY,KAAK,IAAI,EAAE;YACzB,YAAY,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,WAAuB,CAAC,CAAC,CAAC,CAAC;gBACtD,YAAY,CAAC,sBAAsB,CAAC,KAAK,CAAC,CAAC;gBAC3C,YAAY,CAAC,sBAAsB,CAAC,SAAS,CAAC;SACjD;QAED,IAAI,IAAI,CAAC,WAAW,CAAC,WAAuB,CAAC,IAAI,YAAY,KAAK,EAAE;YAClE,YAAY,GAAG,IAAI,CAAC,SAAS,CAAC;QAEhC,MAAM,cAAc,GAAG,iBAAiB,CAAC;QACzC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,WAAuB,CAAC,CAAC;QAC7D,wCAAwC;QACxC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAEzC,mEAAmE;YACnE,mEAAmE;YACnE,0CAA0C;YAC1C,MAAM,YAAY,GAAG,WAAW,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,WAAW,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;YAEpF,iCAAiC;YACjC,MAAM,cAAc,GAAG,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;YAClD,MAAM,gBAAgB,GAAa,EAAE,CAAC;YACtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAC9C,IAAI,IAAI,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC;gBAC7B,IAAI,YAAY;oBACd,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC,CAAC;gBAC1C,IAAI,IAAI,KAAK,YAAY,CAAC,sBAAsB,CAAC,IAAI,EAAE;oBACrD,gBAAgB,CAAC,IAAI,CAAC,YAAa,CAAC,CAAC;iBACtC;qBAAM,IAAI,IAAI,CAAC,OAAO,CAAC,WAAuB,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;oBACnE,2DAA2D;oBAC3D,MAAM,OAAO,GAAG,GAAG,GAAG,IAAI,GAAG,GAAG,CAAC;oBACjC,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;iBAChC;qBAAM;oBACL,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;iBAC7B;aACF;YACD,OAAO,gBAAgB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC7C,CAAC,CAAC,CAAC;QAEH,iFAAiF;QACjF,MAAM,QAAQ,GAAiB,oBAAoB,CAAC,SAAS,CAAC,CAAC;QAC/D,MAAM,KAAK,GAAgB,QAAQ,CAAC,SAAS,EAAE,CAAC,EAAE,QAAQ,CAAC,CAAC;QAC5D,MAAM,OAAO,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC;QACrD,SAAS,CAAC,MAAM,+BAAe,OAAO,CAAC,CAAC;QAExC,OAAO,SAAS,CAAC;IACnB,CAAC;IAEO,sBAAsB;QAC5B,iDAAiD;QACjD,OAAO,IAAI,CAAC,YAAY,sCAAoB,CAAC;IAC/C,CAAC;IAED;;;;;OAKG;IACI,OAAO,CAAC,WAAqB,EAAE,eAA8B,IAAI;QACtE,sBAAsB;QACtB,IAAI,IAAI,CAAC,QAAQ,KAAK,WAAW;YAC/B,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;QAC7C,IAAI,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,IAAI,YAAY,KAAK,IAAI;YACxD,MAAM,IAAI,KAAK,CAAC,gCAAgC,CAAC,CAAC;QAEpD,IAAI,IAAI,CAAC,OAAO,EAAE,IAAI,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,IAAI,YAAY,KAAK,IAAI;YAC1E,OAAO,IAAI,CAAC,uBAAuB,CAAC,YAAY,CAAC,CAAC;aAC/C,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC;YACzE,OAAO,IAAI,CAAC,aAAa,EAAE,CAAC;aACzB,IAAI,IAAI,CAAC,WAAW,EAAE,IAAI,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC;YACtD,OAAO,IAAI,CAAC,uBAAuB,EAAE,CAAC;aACnC,IAAI,IAAI,CAAC,MAAM,EAAE,IAAI,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,EAAE,mBAAmB;YACtE,OAAO,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC;aAClC,iDAAiD;YACpD,OAAO,IAAI,CAAC,WAAW,CAAC,WAAW,EAAE,YAAa,CAAC,CAAC;IACxD,CAAC;CAKF","sourcesContent":["/* Do not change these import lines to match external modules in webpack configuration */\nimport * as grok from 'datagrok-api/grok';\nimport * as ui from 'datagrok-api/ui';\nimport * as DG from 'datagrok-api/dg';\n\nimport {UnitsHandler} from './units-handler';\nimport {getSplitterForColumn, getStats, NOTATION, SeqColStats, SplitterFunc, TAGS} from './macromolecule';\n\n/** Class for handling conversion of notation systems in Macromolecule columns */\nexport class NotationConverter extends UnitsHandler {\n  private _splitter: SplitterFunc | null = null;\n\n  protected get splitter(): SplitterFunc {\n    if (this._splitter === null)\n      this._splitter = getSplitterForColumn(this.column);\n    return this._splitter;\n  }\n\n  public toFasta(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.FASTA; }\n\n  public toSeparator(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.SEPARATOR; }\n\n  public toHelm(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.HELM; }\n\n  /**\n   * Convert a Macromolecule column from FASTA to SEPARATOR notation\n   *\n   * @param {string} separator  A specific separator to be used\n   * @param {string} fastaGapSymbol  Gap symbol in FASTA, '-' by default\n   * @return {DG.Column}        A new column in SEPARATOR notation\n   */\n  private convertFastaToSeparator(separator: string, fastaGapSymbol: string | null = null): DG.Column {\n    if (fastaGapSymbol === null)\n      fastaGapSymbol = this.defaultGapSymbol;\n\n    const newColumn = this.getNewColumn(NOTATION.SEPARATOR);\n    // assign the values to the newly created empty column\n    newColumn.init((idx: number) => {\n      const fastaPolymer = this.column.get(idx);\n      const fastaMonomersArray = this.splitter(fastaPolymer);\n      for (let i = 0; i < fastaMonomersArray.length; i++) {\n        if (fastaMonomersArray[i] === fastaGapSymbol)\n          fastaMonomersArray[i] = UnitsHandler._defaultGapSymbolsDict.SEPARATOR;\n      }\n      return fastaMonomersArray.join(separator);\n    });\n    newColumn.setTag(TAGS.separator, separator);\n    return newColumn;\n  }\n\n  /**\n   * Get the wrapper strings for HELM, depending on the type of the\n   * macromolecule (peptide, DNA, RNA)\n   *\n   * @return {string[]} Array of wrappers\n   */\n  private getHelmWrappers(): string[] {\n    const prefix = (this.isDna()) ? 'DNA1{' :\n      (this.isRna()) ? 'RNA1{' :\n        (this.isPeptide()) ? 'PEPTIDE1{' :\n          'Unknown'; // this case should be handled as exceptional\n\n    if (prefix === 'Unknown')\n      throw new Error('Neither peptide, nor nucleotide');\n\n    const postfix = '}$$$';\n    const leftWrapper = (this.isDna()) ? 'D(' :\n      (this.isRna()) ? 'R(' : ''; // no wrapper for peptides\n    const rightWrapper = (this.isDna() || this.isRna()) ? ')P' : ''; // no wrapper for peptides\n    return [prefix, leftWrapper, rightWrapper, postfix];\n  }\n\n  // A helper function for converting strings to HELM\n  private convertToHelmHelper(\n    sourcePolymer: string,\n    sourceGapSymbol: string,\n    prefix: string,\n    leftWrapper: string,\n    rightWrapper: string,\n    postfix: string\n  ): string {\n    const monomerArray = this.splitter(sourcePolymer);\n    const monomerHelmArray: string[] = monomerArray.map((mm: string) => {\n      if (mm === sourceGapSymbol)\n        return UnitsHandler._defaultGapSymbolsDict.HELM;\n      else\n        return `${leftWrapper}${mm}${rightWrapper}`;\n    });\n    return `${prefix}${monomerHelmArray.join('.')}${postfix}`;\n  }\n\n  /**\n   * Convert a string with SEPARATOR/FASTA notation to HELM\n   *\n   * @param {string} sourcePolymer  A string to be converted\n   * @param {string | null} sourceGapSymbol  An optional gap symbol, set to\n   * default values ('-' for FASTA and '' for SEPARATOR) unless specified\n   * @return {string}  The target HELM string\n   */\n  public convertStringToHelm(\n    sourcePolymer: string,\n    sourceGapSymbol: string | null = null\n  ): string {\n    if (sourceGapSymbol === null)\n      sourceGapSymbol = this.defaultGapSymbol;\n    const [prefix, leftWrapper, rightWrapper, postfix] = this.getHelmWrappers();\n    return this.convertToHelmHelper(sourcePolymer, sourceGapSymbol, prefix, leftWrapper, rightWrapper, postfix);\n  }\n\n  /**\n   * Convert a column to HELM\n   *\n   * @param {string | null} sourceGapSymbol\n   * @return {DG.Column}\n   */\n  private convertToHelm(sourceGapSymbol: string | null = null): DG.Column {\n    if (sourceGapSymbol === null)\n      sourceGapSymbol = this.defaultGapSymbol;\n\n    const [prefix, leftWrapper, rightWrapper, postfix] = this.getHelmWrappers();\n\n    const newColumn = this.getNewColumn(NOTATION.HELM);\n    // assign the values to the empty column\n    newColumn.init((idx: number) => {\n      const sourcePolymer = this.column.get(idx);\n      return this.convertToHelmHelper(sourcePolymer, sourceGapSymbol!, prefix, leftWrapper, rightWrapper, postfix);\n    });\n    return newColumn;\n  }\n\n  /**\n   * Convert SEPARATOR column to FASTA notation\n   *\n   * @param {string | null} fastaGapSymbol Optional gap symbol for FASTA\n   * @return {DG.Column}  Converted column\n   */\n  private convertSeparatorToFasta(fastaGapSymbol: string | null = null): DG.Column {\n    if (fastaGapSymbol === null)\n      fastaGapSymbol = UnitsHandler._defaultGapSymbolsDict.FASTA;\n\n    const newColumn = this.getNewColumn(NOTATION.FASTA);\n    // assign the values to the empty column\n    newColumn.init((idx: number) => {\n      const separatorPolymer = this.column.get(idx);\n      // items can be monomers or separators\n      const separatorItemsArray = this.splitter(separatorPolymer);\n      const fastaMonomersArray: string[] = [];\n      for (let i = 0; i < separatorItemsArray.length; i++) {\n        const item = separatorItemsArray[i];\n        if (item.length === 0) {\n          fastaMonomersArray.push(fastaGapSymbol!);\n        } else if (item.length > 1) {\n          // the case of a multi-character monomer\n          const monomer = '[' + item + ']';\n          fastaMonomersArray.push(monomer);\n        } else {\n          fastaMonomersArray.push(item);\n        }\n      }\n      return fastaMonomersArray.join('');\n    });\n    return newColumn;\n  }\n\n  /**\n   *  Convert HELM column to FASTA/SEPARATOR\n   *\n   * @param {string} tgtNotation    Target notation: FASTA or SEPARATOR\n   * @param {string} tgtSeparator   Optional target separator (for HELM ->\n   * @param {string | null} tgtGapSymbol   Optional target gap symbol\n   * SEPARATOR)\n   * @return {DG.Column} Converted column\n   */\n  private convertHelm(\n    tgtNotation: string,\n    tgtSeparator: string = '',\n    tgtGapSymbol: string | null = null\n  ): DG.Column {\n    // This function must not contain calls of isDna() and isRna(), for\n    // source helm columns may contain RNA, DNA and PT across different rows\n    if (tgtGapSymbol === null) {\n      tgtGapSymbol = (this.toFasta(tgtNotation as NOTATION)) ?\n        UnitsHandler._defaultGapSymbolsDict.FASTA :\n        UnitsHandler._defaultGapSymbolsDict.SEPARATOR;\n    }\n\n    if (this.toSeparator(tgtNotation as NOTATION) && tgtSeparator === '')\n      tgtSeparator = this.separator;\n\n    const helmWrappersRe = /(R\\(|D\\(|\\)|P)/g;\n    const newColumn = this.getNewColumn(tgtNotation as NOTATION);\n    // assign the values to the empty column\n    newColumn.init((idx: number) => {\n      const helmPolymer = this.column.get(idx);\n\n      // we cannot use isDna() or isRna() because source helm columns can\n      // contain DNA, RNA and PT in different cells, so the corresponding\n      // tags cannot be set for the whole column\n      const isNucleotide = helmPolymer.startsWith('DNA') || helmPolymer.startsWith('RNA');\n\n      // items can be monomers or helms\n      const helmItemsArray = this.splitter(helmPolymer);\n      const tgtMonomersArray: string[] = [];\n      for (let i = 0; i < helmItemsArray.length; i++) {\n        let item = helmItemsArray[i];\n        if (isNucleotide)\n          item = item.replace(helmWrappersRe, '');\n        if (item === UnitsHandler._defaultGapSymbolsDict.HELM) {\n          tgtMonomersArray.push(tgtGapSymbol!);\n        } else if (this.toFasta(tgtNotation as NOTATION) && item.length > 1) {\n          // the case of a multi-character monomer converted to FASTA\n          const monomer = '[' + item + ']';\n          tgtMonomersArray.push(monomer);\n        } else {\n          tgtMonomersArray.push(item);\n        }\n      }\n      return tgtMonomersArray.join(tgtSeparator);\n    });\n\n    // TAGS.aligned is mandatory for columns of NOTATION.FASTA and NOTATION.SEPARATOR\n    const splitter: SplitterFunc = getSplitterForColumn(newColumn);\n    const stats: SeqColStats = getStats(newColumn, 5, splitter);\n    const aligned = stats.sameLength ? 'SEQ.MSA' : 'SEQ';\n    newColumn.setTag(TAGS.aligned, aligned);\n\n    return newColumn;\n  }\n\n  private convertHelmToSeparator(): DG.Column {\n    // TODO: implementatioreturn this.getNewColumn();\n    return this.getNewColumn(NOTATION.SEPARATOR);\n  }\n\n  /** Dispatcher method for notation conversion\n   *\n   * @param {NOTATION} tgtNotation   Notation we want to convert to\n   * @param {string | null} tgtSeparator   Possible separator\n   * @return {DG.Column}                Converted column\n   */\n  public convert(tgtNotation: NOTATION, tgtSeparator: string | null = null): DG.Column {\n    // possible exceptions\n    if (this.notation === tgtNotation)\n      throw new Error('tgt notation is invalid');\n    if (this.toSeparator(tgtNotation) && tgtSeparator === null)\n      throw new Error('tgt separator is not specified');\n\n    if (this.isFasta() && this.toSeparator(tgtNotation) && tgtSeparator !== null)\n      return this.convertFastaToSeparator(tgtSeparator);\n    else if ((this.isFasta() || this.isSeparator()) && this.toHelm(tgtNotation))\n      return this.convertToHelm();\n    else if (this.isSeparator() && this.toFasta(tgtNotation))\n      return this.convertSeparatorToFasta();\n    else if (this.isHelm() && this.toFasta(tgtNotation)) // the case of HELM\n      return this.convertHelm(tgtNotation);\n    else // this.isHelm() && this.toSeparator(tgtNotation)\n      return this.convertHelm(tgtNotation, tgtSeparator!);\n  }\n\n  public constructor(col: DG.Column) {\n    super(col);\n  }\n}\n"]}
|
|
225
|
+
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"notation-converter.js","sourceRoot":"","sources":["notation-converter.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AAEtC,OAAO,EAAC,YAAY,EAAC,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAC,oBAAoB,EAAE,QAAQ,EAA4C,MAAM,iBAAiB,CAAC;AAE1G,iFAAiF;AACjF,MAAM,OAAO,iBAAkB,SAAQ,YAAY;IA6PjD,YAAmB,GAAc;QAC/B,KAAK,CAAC,GAAG,CAAC,CAAC;QA7PL,cAAS,GAAwB,IAAI,CAAC;IA8P9C,CAAC;IA5PD,IAAc,QAAQ;QACpB,IAAI,IAAI,CAAC,SAAS,KAAK,IAAI;YACzB,IAAI,CAAC,SAAS,GAAG,oBAAoB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACrD,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAEM,OAAO,CAAC,cAAwB,IAAa,OAAO,cAAc,iCAAmB,CAAC,CAAC,CAAC;IAExF,WAAW,CAAC,cAAwB,IAAa,OAAO,cAAc,yCAAuB,CAAC,CAAC,CAAC;IAEhG,MAAM,CAAC,cAAwB,IAAa,OAAO,cAAc,+BAAkB,CAAC,CAAC,CAAC;IAE7F;;;;;;OAMG;IACK,uBAAuB,CAAC,SAAiB,EAAE,iBAAgC,IAAI;QACrF,IAAI,cAAc,KAAK,IAAI;YACzB,cAAc,GAAG,IAAI,CAAC,gBAAgB,CAAC;QAEzC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,sCAAoB,CAAC;QACxD,sDAAsD;QACtD,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAC1C,MAAM,kBAAkB,GAAG,IAAI,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC;YACvD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,kBAAkB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAClD,IAAI,kBAAkB,CAAC,CAAC,CAAC,KAAK,cAAc;oBAC1C,kBAAkB,CAAC,CAAC,CAAC,GAAG,YAAY,CAAC,sBAAsB,CAAC,SAAS,CAAC;aACzE;YACD,OAAO,kBAAkB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAC5C,CAAC,CAAC,CAAC;QACH,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,uCAAqB,CAAC;QACpD,SAAS,CAAC,MAAM,mCAAiB,SAAS,CAAC,CAAC;QAC5C,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;OAKG;IACK,eAAe;QACrB,MAAM,MAAM,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;YACvC,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;gBACxB,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;oBAChC,SAAS,CAAC,CAAC,6CAA6C;QAE9D,IAAI,MAAM,KAAK,SAAS;YACtB,MAAM,IAAI,KAAK,CAAC,iCAAiC,CAAC,CAAC;QAErD,MAAM,OAAO,GAAG,MAAM,CAAC;QACvB,MAAM,WAAW,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YACzC,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,0BAA0B;QACxD,MAAM,YAAY,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,0BAA0B;QAC3F,OAAO,CAAC,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;IACtD,CAAC;IAED,mDAAmD;IAC3C,mBAAmB,CACzB,aAAqB,EACrB,eAAuB,EACvB,MAAc,EACd,WAAmB,EACnB,YAAoB,EACpB,OAAe;QAEf,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAClD,MAAM,gBAAgB,GAAa,YAAY,CAAC,GAAG,CAAC,CAAC,EAAU,EAAE,EAAE;YACjE,IAAI,EAAE,KAAK,eAAe;gBACxB,OAAO,YAAY,CAAC,sBAAsB,CAAC,IAAI,CAAC;;gBAEhD,OAAO,GAAG,WAAW,GAAG,EAAE,GAAG,YAAY,EAAE,CAAC;QAChD,CAAC,CAAC,CAAC;QACH,OAAO,GAAG,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,OAAO,EAAE,CAAC;IAC5D,CAAC;IAED;;;;;;;OAOG;IACI,mBAAmB,CACxB,aAAqB,EACrB,kBAAiC,IAAI;QAErC,IAAI,eAAe,KAAK,IAAI;YAC1B,eAAe,GAAG,IAAI,CAAC,gBAAgB,CAAC;QAC1C,MAAM,CAAC,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC,eAAe,EAAE,CAAC;QAC5E,OAAO,IAAI,CAAC,mBAAmB,CAAC,aAAa,EAAE,eAAe,EAAE,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;IAC9G,CAAC;IAED;;;;;OAKG;IACK,aAAa,CAAC,kBAAiC,IAAI;QACzD,IAAI,eAAe,KAAK,IAAI;YAC1B,eAAe,GAAG,IAAI,CAAC,gBAAgB,CAAC;QAE1C,MAAM,CAAC,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,GAAG,IAAI,CAAC,eAAe,EAAE,CAAC;QAE5E,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,4BAAe,CAAC;QACnD,wCAAwC;QACxC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAC3C,OAAO,IAAI,CAAC,mBAAmB,CAAC,aAAa,EAAE,eAAgB,EAAE,MAAM,EAAE,WAAW,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;QAC/G,CAAC,CAAC,CAAC;QACH,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,6BAAgB,CAAC;QAC/C,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;OAKG;IACK,uBAAuB,CAAC,iBAAgC,IAAI;QAClE,IAAI,cAAc,KAAK,IAAI;YACzB,cAAc,GAAG,YAAY,CAAC,sBAAsB,CAAC,KAAK,CAAC;QAE7D,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,8BAAgB,CAAC;QACpD,wCAAwC;QACxC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAC9C,sCAAsC;YACtC,MAAM,mBAAmB,GAAG,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,CAAC;YAC5D,MAAM,kBAAkB,GAAa,EAAE,CAAC;YACxC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,mBAAmB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBACnD,MAAM,IAAI,GAAG,mBAAmB,CAAC,CAAC,CAAC,CAAC;gBACpC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE;oBACrB,kBAAkB,CAAC,IAAI,CAAC,cAAe,CAAC,CAAC;iBAC1C;qBAAM,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;oBAC1B,wCAAwC;oBACxC,MAAM,OAAO,GAAG,GAAG,GAAG,IAAI,GAAG,GAAG,CAAC;oBACjC,kBAAkB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;iBAClC;qBAAM;oBACL,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;iBAC/B;aACF;YACD,OAAO,kBAAkB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACrC,CAAC,CAAC,CAAC;QACH,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,+BAAiB,CAAC;QAChD,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;;;;OAQG;IACK,WAAW,CACjB,WAAmB,EACnB,eAAuB,EAAE,EACzB,eAA8B,IAAI;QAElC,mEAAmE;QACnE,wEAAwE;QACxE,IAAI,YAAY,KAAK,IAAI,EAAE;YACzB,YAAY,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,WAAuB,CAAC,CAAC,CAAC,CAAC;gBACtD,YAAY,CAAC,sBAAsB,CAAC,KAAK,CAAC,CAAC;gBAC3C,YAAY,CAAC,sBAAsB,CAAC,SAAS,CAAC;SACjD;QAED,IAAI,IAAI,CAAC,WAAW,CAAC,WAAuB,CAAC,IAAI,YAAY,KAAK,EAAE;YAClE,YAAY,GAAG,IAAI,CAAC,SAAS,CAAC;QAEhC,MAAM,cAAc,GAAG,iBAAiB,CAAC;QACzC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC,WAAuB,CAAC,CAAC;QAC7D,wCAAwC;QACxC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAW,EAAE,EAAE;YAC7B,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAEzC,mEAAmE;YACnE,mEAAmE;YACnE,0CAA0C;YAC1C,MAAM,YAAY,GAAG,WAAW,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,WAAW,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;YAEpF,iCAAiC;YACjC,MAAM,cAAc,GAAG,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;YAClD,MAAM,gBAAgB,GAAa,EAAE,CAAC;YACtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAC9C,IAAI,IAAI,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC;gBAC7B,IAAI,YAAY;oBACd,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC,CAAC;gBAC1C,IAAI,IAAI,KAAK,YAAY,CAAC,sBAAsB,CAAC,IAAI,EAAE;oBACrD,gBAAgB,CAAC,IAAI,CAAC,YAAa,CAAC,CAAC;iBACtC;qBAAM,IAAI,IAAI,CAAC,OAAO,CAAC,WAAuB,CAAC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;oBACnE,2DAA2D;oBAC3D,MAAM,OAAO,GAAG,GAAG,GAAG,IAAI,GAAG,GAAG,CAAC;oBACjC,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;iBAChC;qBAAM;oBACL,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;iBAC7B;aACF;YACD,OAAO,gBAAgB,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC7C,CAAC,CAAC,CAAC;QAEH,iFAAiF;QACjF,MAAM,QAAQ,GAAiB,oBAAoB,CAAC,SAAS,CAAC,CAAC;QAC/D,MAAM,KAAK,GAAgB,QAAQ,CAAC,SAAS,EAAE,CAAC,EAAE,QAAQ,CAAC,CAAC;QAC5D,MAAM,OAAO,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC;QACrD,SAAS,CAAC,MAAM,+BAAe,OAAO,CAAC,CAAC;QAExC,OAAO,SAAS,CAAC;IACnB,CAAC;IAEO,sBAAsB;QAC5B,iDAAiD;QACjD,OAAO,IAAI,CAAC,YAAY,sCAAoB,CAAC;IAC/C,CAAC;IAED;;;;;OAKG;IACI,OAAO,CAAC,WAAqB,EAAE,eAA8B,IAAI;QACtE,sBAAsB;QACtB,IAAI,IAAI,CAAC,QAAQ,KAAK,WAAW;YAC/B,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;QAC7C,IAAI,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,IAAI,YAAY,KAAK,IAAI;YACxD,MAAM,IAAI,KAAK,CAAC,gCAAgC,CAAC,CAAC;QAEpD,IAAI,IAAI,CAAC,OAAO,EAAE,IAAI,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,IAAI,YAAY,KAAK,IAAI;YAC1E,OAAO,IAAI,CAAC,uBAAuB,CAAC,YAAY,CAAC,CAAC;aAC/C,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC;YACzE,OAAO,IAAI,CAAC,aAAa,EAAE,CAAC;aACzB,IAAI,IAAI,CAAC,WAAW,EAAE,IAAI,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC;YACtD,OAAO,IAAI,CAAC,uBAAuB,EAAE,CAAC;aACnC,IAAI,IAAI,CAAC,MAAM,EAAE,IAAI,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,EAAE,mBAAmB;YACtE,OAAO,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC;aAClC,iDAAiD;YACpD,OAAO,IAAI,CAAC,WAAW,CAAC,WAAW,EAAE,YAAa,CAAC,CAAC;IACxD,CAAC;CAKF","sourcesContent":["/* Do not change these import lines to match external modules in webpack configuration */\nimport * as grok from 'datagrok-api/grok';\nimport * as ui from 'datagrok-api/ui';\nimport * as DG from 'datagrok-api/dg';\n\nimport {UnitsHandler} from './units-handler';\nimport {getSplitterForColumn, getStats, NOTATION, SeqColStats, SplitterFunc, TAGS} from './macromolecule';\n\n/** Class for handling conversion of notation systems in Macromolecule columns */\nexport class NotationConverter extends UnitsHandler {\n  private _splitter: SplitterFunc | null = null;\n\n  protected get splitter(): SplitterFunc {\n    if (this._splitter === null)\n      this._splitter = getSplitterForColumn(this.column);\n    return this._splitter;\n  }\n\n  public toFasta(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.FASTA; }\n\n  public toSeparator(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.SEPARATOR; }\n\n  public toHelm(targetNotation: NOTATION): boolean { return targetNotation === NOTATION.HELM; }\n\n  /**\n   * Convert a Macromolecule column from FASTA to SEPARATOR notation\n   *\n   * @param {string} separator  A specific separator to be used\n   * @param {string} fastaGapSymbol  Gap symbol in FASTA, '-' by default\n   * @return {DG.Column}        A new column in SEPARATOR notation\n   */\n  private convertFastaToSeparator(separator: string, fastaGapSymbol: string | null = null): DG.Column {\n    if (fastaGapSymbol === null)\n      fastaGapSymbol = this.defaultGapSymbol;\n\n    const newColumn = this.getNewColumn(NOTATION.SEPARATOR);\n    // assign the values to the newly created empty column\n    newColumn.init((idx: number) => {\n      const fastaPolymer = this.column.get(idx);\n      const fastaMonomersArray = this.splitter(fastaPolymer);\n      for (let i = 0; i < fastaMonomersArray.length; i++) {\n        if (fastaMonomersArray[i] === fastaGapSymbol)\n          fastaMonomersArray[i] = UnitsHandler._defaultGapSymbolsDict.SEPARATOR;\n      }\n      return fastaMonomersArray.join(separator);\n    });\n    newColumn.setTag(DG.TAGS.UNITS, NOTATION.SEPARATOR);\n    newColumn.setTag(TAGS.separator, separator);\n    return newColumn;\n  }\n\n  /**\n   * Get the wrapper strings for HELM, depending on the type of the\n   * macromolecule (peptide, DNA, RNA)\n   *\n   * @return {string[]} Array of wrappers\n   */\n  private getHelmWrappers(): string[] {\n    const prefix = (this.isDna()) ? 'DNA1{' :\n      (this.isRna()) ? 'RNA1{' :\n        (this.isPeptide()) ? 'PEPTIDE1{' :\n          'Unknown'; // this case should be handled as exceptional\n\n    if (prefix === 'Unknown')\n      throw new Error('Neither peptide, nor nucleotide');\n\n    const postfix = '}$$$';\n    const leftWrapper = (this.isDna()) ? 'D(' :\n      (this.isRna()) ? 'R(' : ''; // no wrapper for peptides\n    const rightWrapper = (this.isDna() || this.isRna()) ? ')P' : ''; // no wrapper for peptides\n    return [prefix, leftWrapper, rightWrapper, postfix];\n  }\n\n  // A helper function for converting strings to HELM\n  private convertToHelmHelper(\n    sourcePolymer: string,\n    sourceGapSymbol: string,\n    prefix: string,\n    leftWrapper: string,\n    rightWrapper: string,\n    postfix: string\n  ): string {\n    const monomerArray = this.splitter(sourcePolymer);\n    const monomerHelmArray: string[] = monomerArray.map((mm: string) => {\n      if (mm === sourceGapSymbol)\n        return UnitsHandler._defaultGapSymbolsDict.HELM;\n      else\n        return `${leftWrapper}${mm}${rightWrapper}`;\n    });\n    return `${prefix}${monomerHelmArray.join('.')}${postfix}`;\n  }\n\n  /**\n   * Convert a string with SEPARATOR/FASTA notation to HELM\n   *\n   * @param {string} sourcePolymer  A string to be converted\n   * @param {string | null} sourceGapSymbol  An optional gap symbol, set to\n   * default values ('-' for FASTA and '' for SEPARATOR) unless specified\n   * @return {string}  The target HELM string\n   */\n  public convertStringToHelm(\n    sourcePolymer: string,\n    sourceGapSymbol: string | null = null\n  ): string {\n    if (sourceGapSymbol === null)\n      sourceGapSymbol = this.defaultGapSymbol;\n    const [prefix, leftWrapper, rightWrapper, postfix] = this.getHelmWrappers();\n    return this.convertToHelmHelper(sourcePolymer, sourceGapSymbol, prefix, leftWrapper, rightWrapper, postfix);\n  }\n\n  /**\n   * Convert a column to HELM\n   *\n   * @param {string | null} sourceGapSymbol\n   * @return {DG.Column}\n   */\n  private convertToHelm(sourceGapSymbol: string | null = null): DG.Column {\n    if (sourceGapSymbol === null)\n      sourceGapSymbol = this.defaultGapSymbol;\n\n    const [prefix, leftWrapper, rightWrapper, postfix] = this.getHelmWrappers();\n\n    const newColumn = this.getNewColumn(NOTATION.HELM);\n    // assign the values to the empty column\n    newColumn.init((idx: number) => {\n      const sourcePolymer = this.column.get(idx);\n      return this.convertToHelmHelper(sourcePolymer, sourceGapSymbol!, prefix, leftWrapper, rightWrapper, postfix);\n    });\n    newColumn.setTag(DG.TAGS.UNITS, NOTATION.HELM);\n    return newColumn;\n  }\n\n  /**\n   * Convert SEPARATOR column to FASTA notation\n   *\n   * @param {string | null} fastaGapSymbol Optional gap symbol for FASTA\n   * @return {DG.Column}  Converted column\n   */\n  private convertSeparatorToFasta(fastaGapSymbol: string | null = null): DG.Column {\n    if (fastaGapSymbol === null)\n      fastaGapSymbol = UnitsHandler._defaultGapSymbolsDict.FASTA;\n\n    const newColumn = this.getNewColumn(NOTATION.FASTA);\n    // assign the values to the empty column\n    newColumn.init((idx: number) => {\n      const separatorPolymer = this.column.get(idx);\n      // items can be monomers or separators\n      const separatorItemsArray = this.splitter(separatorPolymer);\n      const fastaMonomersArray: string[] = [];\n      for (let i = 0; i < separatorItemsArray.length; i++) {\n        const item = separatorItemsArray[i];\n        if (item.length === 0) {\n          fastaMonomersArray.push(fastaGapSymbol!);\n        } else if (item.length > 1) {\n          // the case of a multi-character monomer\n          const monomer = '[' + item + ']';\n          fastaMonomersArray.push(monomer);\n        } else {\n          fastaMonomersArray.push(item);\n        }\n      }\n      return fastaMonomersArray.join('');\n    });\n    newColumn.setTag(DG.TAGS.UNITS, NOTATION.FASTA);\n    return newColumn;\n  }\n\n  /**\n   *  Convert HELM column to FASTA/SEPARATOR\n   *\n   * @param {string} tgtNotation    Target notation: FASTA or SEPARATOR\n   * @param {string} tgtSeparator   Optional target separator (for HELM ->\n   * @param {string | null} tgtGapSymbol   Optional target gap symbol\n   * SEPARATOR)\n   * @return {DG.Column} Converted column\n   */\n  private convertHelm(\n    tgtNotation: string,\n    tgtSeparator: string = '',\n    tgtGapSymbol: string | null = null\n  ): DG.Column {\n    // This function must not contain calls of isDna() and isRna(), for\n    // source helm columns may contain RNA, DNA and PT across different rows\n    if (tgtGapSymbol === null) {\n      tgtGapSymbol = (this.toFasta(tgtNotation as NOTATION)) ?\n        UnitsHandler._defaultGapSymbolsDict.FASTA :\n        UnitsHandler._defaultGapSymbolsDict.SEPARATOR;\n    }\n\n    if (this.toSeparator(tgtNotation as NOTATION) && tgtSeparator === '')\n      tgtSeparator = this.separator;\n\n    const helmWrappersRe = /(R\\(|D\\(|\\)|P)/g;\n    const newColumn = this.getNewColumn(tgtNotation as NOTATION);\n    // assign the values to the empty column\n    newColumn.init((idx: number) => {\n      const helmPolymer = this.column.get(idx);\n\n      // we cannot use isDna() or isRna() because source helm columns can\n      // contain DNA, RNA and PT in different cells, so the corresponding\n      // tags cannot be set for the whole column\n      const isNucleotide = helmPolymer.startsWith('DNA') || helmPolymer.startsWith('RNA');\n\n      // items can be monomers or helms\n      const helmItemsArray = this.splitter(helmPolymer);\n      const tgtMonomersArray: string[] = [];\n      for (let i = 0; i < helmItemsArray.length; i++) {\n        let item = helmItemsArray[i];\n        if (isNucleotide)\n          item = item.replace(helmWrappersRe, '');\n        if (item === UnitsHandler._defaultGapSymbolsDict.HELM) {\n          tgtMonomersArray.push(tgtGapSymbol!);\n        } else if (this.toFasta(tgtNotation as NOTATION) && item.length > 1) {\n          // the case of a multi-character monomer converted to FASTA\n          const monomer = '[' + item + ']';\n          tgtMonomersArray.push(monomer);\n        } else {\n          tgtMonomersArray.push(item);\n        }\n      }\n      return tgtMonomersArray.join(tgtSeparator);\n    });\n\n    // TAGS.aligned is mandatory for columns of NOTATION.FASTA and NOTATION.SEPARATOR\n    const splitter: SplitterFunc = getSplitterForColumn(newColumn);\n    const stats: SeqColStats = getStats(newColumn, 5, splitter);\n    const aligned = stats.sameLength ? 'SEQ.MSA' : 'SEQ';\n    newColumn.setTag(TAGS.aligned, aligned);\n\n    return newColumn;\n  }\n\n  private convertHelmToSeparator(): DG.Column {\n    // TODO: implementatioreturn this.getNewColumn();\n    return this.getNewColumn(NOTATION.SEPARATOR);\n  }\n\n  /** Dispatcher method for notation conversion\n   *\n   * @param {NOTATION} tgtNotation   Notation we want to convert to\n   * @param {string | null} tgtSeparator   Possible separator\n   * @return {DG.Column}                Converted column\n   */\n  public convert(tgtNotation: NOTATION, tgtSeparator: string | null = null): DG.Column {\n    // possible exceptions\n    if (this.notation === tgtNotation)\n      throw new Error('tgt notation is invalid');\n    if (this.toSeparator(tgtNotation) && tgtSeparator === null)\n      throw new Error('tgt separator is not specified');\n\n    if (this.isFasta() && this.toSeparator(tgtNotation) && tgtSeparator !== null)\n      return this.convertFastaToSeparator(tgtSeparator);\n    else if ((this.isFasta() || this.isSeparator()) && this.toHelm(tgtNotation))\n      return this.convertToHelm();\n    else if (this.isSeparator() && this.toFasta(tgtNotation))\n      return this.convertSeparatorToFasta();\n    else if (this.isHelm() && this.toFasta(tgtNotation)) // the case of HELM\n      return this.convertHelm(tgtNotation);\n    else // this.isHelm() && this.toSeparator(tgtNotation)\n      return this.convertHelm(tgtNotation, tgtSeparator!);\n  }\n\n  public constructor(col: DG.Column) {\n    super(col);\n  }\n}\n"]}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"to-atomic-level.d.ts","sourceRoot":"","sources":["to-atomic-level.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AA4GtC,wBAAsB,cAAc,CAClC,EAAE,EAAE,EAAE,CAAC,SAAS,EAAE,WAAW,EAAE,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,eAAe,EAAE,GAAG,EAAE,GACvE,OAAO,CAAC,IAAI,CAAC,CAyDf"}
|