@datagrok/bio 2.0.25 → 2.0.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/const.ts DELETED
@@ -1,30 +0,0 @@
1
- import * as ui from 'datagrok-api/ui';
2
- import * as grok from 'datagrok-api/grok';
3
- import * as DG from 'datagrok-api/dg';
4
-
5
- export const jsonSdfMonomerLibDict = {
6
- 'monomerType': null,
7
- 'smiles': null,
8
- 'name': 'MonomerName',
9
- 'author': null,
10
- 'molfile': 'molecule',
11
- 'naturalAnalog': 'MonomerNaturalAnalogCode',
12
- 'rgroups': 'MonomerCaps',
13
- 'createDate': null,
14
- 'id': null,
15
- 'polymerType': 'MonomerType',
16
- 'symbol': 'MonomerCode'
17
- };
18
-
19
- export const RGROUP_FIELD = 'rgroups';
20
- export const CAP_GROUP_SMILES = 'capGroupSmiles';
21
- export const RGROUP_ALTER_ID = 'alternateId';
22
- export const CAP_GROUP_NAME = 'capGroupName';
23
- export const RGROUP_LABEL = 'label';
24
- export const MONOMER_SYMBOL = 'symbol';
25
- export const SDF_MONOMER_NAME = 'MonomerName';
26
-
27
- // range of hex nubers used in PepSea library to endode monomers
28
- export const MONOMER_ENCODE_MIN = 0x100;
29
- export const MONOMER_ENCODE_MAX = 0x40A;
30
-
@@ -1,199 +0,0 @@
1
- export type MonomerEntry = {
2
- mol: string,
3
- type: string,
4
- analogueCode: string,
5
- linkages: { [link: string]: { atomNumber: number, type: string } }
6
- };
7
- export type MonomerEntries = { [name: string]: MonomerEntry };
8
- export type LinkData = { [link: string]: { atomNumber: number, type: string } };
9
-
10
- /** HELM associated sdf libraries with monomer processing*/
11
- export class MonomerLibrary {
12
- static libName = 'monomerLibrary';
13
-
14
- private monomerFields: string[] = [
15
- 'molecule', 'MonomerType', 'MonomerNaturalAnalogCode', 'MonomerName', 'MonomerCode', 'MonomerCaps', 'BranchMonomer',
16
- ];
17
-
18
- private library: MonomerEntries = {};
19
-
20
- private monomers: string[] = [];
21
-
22
- constructor(sdf: string) {
23
- const sdfReader = new SDFReader();
24
- const data = sdfReader.getColls(sdf);
25
- this.monomerFields.forEach((f) => {
26
- if (!(f in data))
27
- throw new Error(`Monomer library was not compiled: ${f} field is absent in provided file`);
28
-
29
- if (data[f].length != data.molecule.length)
30
- throw new Error(`Monomer library was not compiled: ${f} field is not presented for each monomer`);
31
- });
32
-
33
- for (let i = 0; i < data.molecule.length; i++) {
34
- const linkData = this.getLinkData(data.molecule[i], data.MonomerCaps[i], data.MonomerName[i]);
35
- const entry = {
36
- mol: data.molecule[i],
37
- type: 'Peptide',
38
- code: data.MonomerCode[i],
39
- analogueCode: data.MonomerNaturalAnalogCode[i],
40
- linkages: linkData,
41
- };
42
-
43
- const name = data.MonomerCode[i] !== '.' ? data.MonomerCode[i] : data.MonomerName[i];
44
- this.library[name] = entry;
45
- this.monomers.push(name);
46
- }
47
- }
48
-
49
- /** getting full monomer information from monomer library
50
- * @param {string} name
51
- * @return {MonomerEntry}
52
- */
53
- public getMonomerEntry(name: string): MonomerEntry {
54
- if (!this.monomers.includes(name))
55
- throw new Error(`Monomer library do not contain ${name} monomer`);
56
-
57
- return this.library[name];
58
- }
59
-
60
- /** getting mol as string for monomer
61
- * @param {string} name
62
- * @return {string}
63
- */
64
- public getMonomerMol(name: string): string {
65
- if (!this.monomers.includes(name))
66
- throw new Error(`Monomer library do not contain ${name} monomer`);
67
-
68
-
69
- const entry = this.library[name];
70
- let monomerMol = entry.mol.replace(/M RGP .+\n/, '');
71
-
72
- //order matters
73
- const links = Object.keys(entry.linkages);
74
- for (const link of links)
75
- monomerMol = monomerMol.replace('R#', entry.linkages[link].type + ' ');
76
-
77
-
78
- return monomerMol;
79
- }
80
-
81
- /** getting the list of the minomers available in library*/
82
- get monomerNames(): string[] {
83
- return this.monomers;
84
- }
85
-
86
- static get id(): string {
87
- return MonomerLibrary.libName;
88
- }
89
-
90
- private getLinkData(mol: string, caps: string, name: string): LinkData {
91
- const rawData = mol.match(/M RGP .+/);
92
- if (rawData === null)
93
- throw new Error(`Monomer library was not compiled: ${name} entry has no RGP`);
94
-
95
- const types: { [code: string]: string } = {};
96
- caps.split('\n')?.forEach((e) => {
97
- types[e.match(/\d+/)![0]] = e.match(/(?<=\])\w+/)![0];
98
- });
99
-
100
- const data = rawData[0].replace('M RGP ', '').split(/\s+/);
101
- const res: LinkData = {};
102
- for (let i = 0; i < parseInt(data[0]); i++) {
103
- const code = parseInt(data[2 * i + 2]);
104
- let type = '';
105
- switch (code) {
106
- case 1:
107
- type = 'N-terminal';
108
- break;
109
- case 2:
110
- type = 'C-terminal';
111
- break;
112
- case 3:
113
- type = 'branch';
114
- break;
115
- default:
116
- break;
117
- }
118
- res[type] = {atomNumber: parseInt(data[2 * i + 1]), type: types[code]};
119
- }
120
-
121
- return res;
122
- }
123
- }
124
-
125
- //TODO: merge with Chem version
126
- class SDFReader {
127
- dataColls: { [_: string]: string [] };
128
-
129
- constructor() {
130
- this.dataColls = {'molecule': []};
131
- }
132
-
133
- getColls(content: string): { [_: string]: string[] } {
134
- this.read(content);
135
- return this.dataColls;
136
- }
137
-
138
- read(content: string): void {
139
- content = content.replaceAll('\r', ''); //equalize old and new sdf standards
140
- let startIndex = content.indexOf('$$$$', 0);
141
- this.parse(content, 0, startIndex, (name: string, val: string): void => { // TODO: type
142
- this.dataColls[name] = [];
143
- this.dataColls[name].push(val);
144
- });
145
- startIndex += 5;
146
- while (startIndex > -1 && startIndex < content.length)
147
- startIndex = this.readNext(content, startIndex);
148
- }
149
-
150
- readNext(content: string, startIndex: number): number {
151
- const nextStartIndex = content.indexOf('$$$$', startIndex);
152
- if (nextStartIndex === -1) {
153
- return -1;
154
- } else {
155
- this.parse(content, startIndex, nextStartIndex,
156
- (name: string, val: string): void => {
157
- this.dataColls[name].push(val);
158
- });
159
- }
160
-
161
- if (nextStartIndex > -1)
162
- return nextStartIndex + 5;
163
-
164
-
165
- return nextStartIndex;
166
- }
167
-
168
- parse(content: string, start: number, end: number, handler: (name: string, val: string) => void): void {
169
- const molEnd = +content.indexOf('M END\n', start) + 7;
170
- let localEnd = start;
171
- this.dataColls['molecule'].push(content.substring(start, molEnd));
172
-
173
- start = molEnd;
174
- while (localEnd < end) {
175
- start = content.indexOf('> <', localEnd);
176
- if (start === -1)
177
- return;
178
-
179
-
180
- start += 3;
181
- localEnd = content.indexOf('>\n', start);
182
- if (localEnd === -1)
183
- return;
184
-
185
-
186
- const propertyName = content.substring(start, localEnd);
187
- start = localEnd + 2;
188
-
189
- localEnd = content.indexOf('\n', start);
190
- if (localEnd === -1)
191
- localEnd = end;
192
- else if (content[localEnd + 1] != '\n')
193
- localEnd = content.indexOf('\n', localEnd + 1);
194
-
195
- handler(propertyName, content.substring(start, localEnd));
196
- localEnd += 2;
197
- }
198
- }
199
- }
@@ -1,135 +0,0 @@
1
- import * as grok from 'datagrok-api/grok';
2
- import * as ui from 'datagrok-api/ui';
3
- import * as DG from 'datagrok-api/dg';
4
- import * as bio from '@datagrok-libraries/bio';
5
-
6
- import {
7
- CAP_GROUP_NAME, CAP_GROUP_SMILES, jsonSdfMonomerLibDict, MONOMER_ENCODE_MAX, MONOMER_ENCODE_MIN, MONOMER_SYMBOL,
8
- RGROUP_ALTER_ID, RGROUP_FIELD, RGROUP_LABEL, SDF_MONOMER_NAME
9
- } from '../const';
10
- import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
11
-
12
- export const HELM_CORE_LIB_FILENAME = '/data/HELMCoreLibrary.json';
13
- export const HELM_CORE_LIB_MONOMER_SYMBOL = 'symbol';
14
- export const HELM_CORE_LIB_MOLFILE = 'molfile';
15
- export const HELM_CORE_FIELDS = ['symbol', 'molfile', 'rgroups', 'name'];
16
-
17
-
18
- export function encodeMonomers(col: DG.Column): DG.Column | null {
19
- let encodeSymbol = MONOMER_ENCODE_MIN;
20
- const monomerSymbolDict: { [key: string]: number } = {};
21
- const units = col.tags[DG.TAGS.UNITS];
22
- const sep = col.getTag(bio.TAGS.separator);
23
- const splitterFunc: bio.SplitterFunc = bio.getSplitter(units, sep);
24
- const encodedStringArray = [];
25
- for (let i = 0; i < col.length; ++i) {
26
- let encodedMonomerStr = '';
27
- const monomers = splitterFunc(col.get(i));
28
- monomers.forEach((m) => {
29
- if (!monomerSymbolDict[m]) {
30
- if (encodeSymbol > MONOMER_ENCODE_MAX) {
31
- grok.shell.error(`Not enough symbols to encode monomers`);
32
- return null;
33
- }
34
- monomerSymbolDict[m] = encodeSymbol;
35
- encodeSymbol++;
36
- }
37
- encodedMonomerStr += String.fromCodePoint(monomerSymbolDict[m]);
38
- });
39
- encodedStringArray.push(encodedMonomerStr);
40
- }
41
- return DG.Column.fromStrings('encodedMolecules', encodedStringArray);
42
- }
43
-
44
- export function getMolfilesFromSeq(col: DG.Column, monomersLibObject: any[]): any[][] | null {
45
- const units = col.tags[DG.TAGS.UNITS];
46
- const sep = col.getTag('separator');
47
- const splitterFunc: bio.SplitterFunc = bio.getSplitter(units, sep);
48
- const monomersDict = createMomomersMolDict(monomersLibObject);
49
- const molFiles = [];
50
- for (let i = 0; i < col.length; ++i) {
51
- const macroMolecule = col.get(i);
52
- const monomers = splitterFunc(macroMolecule);
53
- const molFilesForSeq = [];
54
- for (let j = 0; j < monomers.length; ++j) {
55
- if (monomers[j]) {
56
- if (!monomersDict[monomers[j]]) {
57
- grok.shell.warning(`Monomer ${monomers[j]} is missing in HELM library. Structure cannot be created`);
58
- return null;
59
- }
60
- molFilesForSeq.push(JSON.parse(JSON.stringify(monomersDict[monomers[j]])));
61
- }
62
- }
63
- molFiles.push(molFilesForSeq);
64
- }
65
- return molFiles;
66
- }
67
-
68
- export function getMolfilesFromSingleSeq(cell: DG.Cell, monomersLibObject: any[]): any[][] | null {
69
- const units = cell.column.tags[DG.TAGS.UNITS];
70
- const sep = cell.column!.getTag('separator');
71
- const splitterFunc: bio.SplitterFunc = bio.getSplitter(units, sep);
72
- const monomersDict = createMomomersMolDict(monomersLibObject);
73
- const molFiles = [];
74
- const macroMolecule = cell.value;
75
- const monomers = splitterFunc(macroMolecule);
76
- const molFilesForSeq = [];
77
- for (let j = 0; j < monomers.length; ++j) {
78
- if (monomers[j]) {
79
- if (!monomersDict[monomers[j]]) {
80
- grok.shell.warning(`Monomer ${monomers[j]} is missing in HELM library. Structure cannot be created`);
81
- return null;
82
- }
83
- molFilesForSeq.push(JSON.parse(JSON.stringify(monomersDict[monomers[j]])));
84
- }
85
- }
86
- molFiles.push(molFilesForSeq);
87
- return molFiles;
88
- }
89
-
90
- export function createMomomersMolDict(lib: any[]): { [key: string]: string | any } {
91
- const dict: { [key: string]: string | any } = {};
92
- lib.forEach((it) => {
93
- if (it['polymerType'] === 'PEPTIDE') {
94
- const monomerObject: { [key: string]: any } = {};
95
- HELM_CORE_FIELDS.forEach((field) => {
96
- monomerObject[field] = it[field];
97
- });
98
- dict[it[HELM_CORE_LIB_MONOMER_SYMBOL]] = monomerObject;
99
- }
100
- });
101
- return dict;
102
- }
103
-
104
-
105
- export function createJsonMonomerLibFromSdf(table: DG.DataFrame): any {
106
- const resultLib = [];
107
- for (let i = 0; i < table.rowCount; i++) {
108
- const monomer: { [key: string]: string | any } = {};
109
- Object.keys(jsonSdfMonomerLibDict).forEach((key) => {
110
- if (key === MONOMER_SYMBOL) {
111
- const monomerSymbol = table.get(jsonSdfMonomerLibDict[key], i);
112
- monomer[key] = monomerSymbol === '.' ? table.get(SDF_MONOMER_NAME, i) : monomerSymbol;
113
- } else if (key === RGROUP_FIELD) {
114
- const rgroups = table.get(jsonSdfMonomerLibDict[key], i).split('\n');
115
- const jsonRgroups: any[] = [];
116
- rgroups.forEach((g: string) => {
117
- const rgroup: { [key: string]: string | any } = {};
118
- const altAtom = g.substring(g.lastIndexOf(']') + 1);
119
- const radicalNum = g.match(/\[R(\d+)\]/)![1];
120
- rgroup[CAP_GROUP_SMILES] = altAtom === 'H' ? `[*:${radicalNum}][H]` : `O[*:${radicalNum}]`;
121
- rgroup[RGROUP_ALTER_ID] = altAtom === 'H' ? `R${radicalNum}-H` : `R${radicalNum}-OH`;
122
- rgroup[CAP_GROUP_NAME] = altAtom === 'H' ? `H` : `OH`;
123
- rgroup[RGROUP_LABEL] = `R${radicalNum}`;
124
- jsonRgroups.push(rgroup);
125
- });
126
- monomer[key] = jsonRgroups;
127
- } else {
128
- if ((jsonSdfMonomerLibDict as { [key: string]: string | any })[key])
129
- monomer[key] = table.get((jsonSdfMonomerLibDict as { [key: string]: string | any })[key], i);
130
- }
131
- });
132
- resultLib.push(monomer);
133
- }
134
- return resultLib;
135
- }