@datagrok/bio 1.5.4 → 1.5.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/detectors.js +11 -0
- package/dist/package-test.js +354 -14
- package/dist/package.js +345 -12
- package/package.json +5 -4
- package/src/package.ts +23 -2
- package/src/tests/activity-cliffs-tests.ts +1 -0
- package/src/tests/detectors-test.ts +11 -1
- package/src/utils/atomic-works.ts +252 -0
- package/src/utils/utils.ts +40 -0
- package/tsconfig.json +1 -1
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "@datagrok/bio",
|
|
3
3
|
"beta": false,
|
|
4
4
|
"friendlyName": "Bio",
|
|
5
|
-
"version": "1.5.
|
|
5
|
+
"version": "1.5.7",
|
|
6
6
|
"description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
|
|
7
7
|
"repository": {
|
|
8
8
|
"type": "git",
|
|
@@ -11,14 +11,15 @@
|
|
|
11
11
|
},
|
|
12
12
|
"dependencies": {
|
|
13
13
|
"@biowasm/aioli": ">=2.4.0",
|
|
14
|
-
"@datagrok-libraries/bio": "^2.
|
|
14
|
+
"@datagrok-libraries/bio": "^2.4.1",
|
|
15
15
|
"@datagrok-libraries/utils": "^1.0.0",
|
|
16
|
-
"@datagrok-libraries/ml": "^2.0.
|
|
16
|
+
"@datagrok-libraries/ml": "^2.0.9",
|
|
17
17
|
"cash-dom": "latest",
|
|
18
18
|
"datagrok-api": "^1.4.12",
|
|
19
19
|
"dayjs": "latest",
|
|
20
20
|
"ts-loader": "^9.2.5",
|
|
21
|
-
"typescript": "^4.4.2"
|
|
21
|
+
"typescript": "^4.4.2",
|
|
22
|
+
"openchemlib": "6.0.1"
|
|
22
23
|
},
|
|
23
24
|
"devDependencies": {
|
|
24
25
|
"@types/jest": "^27.0.0",
|
package/src/package.ts
CHANGED
|
@@ -16,6 +16,8 @@ import {getEmbeddingColsNames, sequenceSpace} from './utils/sequence-space';
|
|
|
16
16
|
import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
17
17
|
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
18
18
|
import {sequenceGetSimilarities, drawTooltip} from './utils/sequence-activity-cliffs';
|
|
19
|
+
import { getMolfilesFromSeq, HELM_CORE_LIB_FILENAME } from './utils/utils';
|
|
20
|
+
import {getMacroMol} from './utils/atomic-works';
|
|
19
21
|
|
|
20
22
|
//name: sequenceAlignment
|
|
21
23
|
//input: string alignType {choices: ['Local alignment', 'Global alignment']}
|
|
@@ -50,7 +52,7 @@ export function vdRegionViewer() {
|
|
|
50
52
|
//top-menu: Bio | Sequence Activity Cliffs...
|
|
51
53
|
//name: Sequence Activity Cliffs
|
|
52
54
|
//description: detect activity cliffs
|
|
53
|
-
//input: dataframe
|
|
55
|
+
//input: dataframe table [Input data table]
|
|
54
56
|
//input: column sequence {semType: Macromolecule}
|
|
55
57
|
//input: column activities
|
|
56
58
|
//input: double similarity = 80 [Similarity cutoff]
|
|
@@ -66,6 +68,7 @@ export async function activityCliffs(df: DG.DataFrame, sequence: DG.Column, acti
|
|
|
66
68
|
df,
|
|
67
69
|
sequence,
|
|
68
70
|
axesNames,
|
|
71
|
+
'Activity cliffs',
|
|
69
72
|
activities,
|
|
70
73
|
similarity,
|
|
71
74
|
'Levenshtein',
|
|
@@ -101,11 +104,29 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
101
104
|
if (plotEmbeddings) {
|
|
102
105
|
for (const v of grok.shell.views) {
|
|
103
106
|
if (v.name === table.name)
|
|
104
|
-
(v as DG.TableView).scatterPlot({x: embedColsNames[0], y: embedColsNames[1]});
|
|
107
|
+
(v as DG.TableView).scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
|
|
105
108
|
}
|
|
106
109
|
}
|
|
107
110
|
};
|
|
108
111
|
|
|
112
|
+
//top-menu: Bio | Molfiles From HELM...
|
|
113
|
+
//name: Molfiles From HELM
|
|
114
|
+
//description: returns molfiles for each monomer from HELM library
|
|
115
|
+
//input: dataframe df [Input data table]
|
|
116
|
+
//input: column sequence {semType: Macromolecule}
|
|
117
|
+
export async function molfilesFromHELM(df: DG.DataFrame, sequence: DG.Column): Promise<void> {
|
|
118
|
+
const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
|
|
119
|
+
const monomersLibDf = DG.DataFrame.fromJson(monomersLibFile);
|
|
120
|
+
const atomicCodes = getMolfilesFromSeq(sequence, monomersLibDf);
|
|
121
|
+
|
|
122
|
+
let result: string[] = [];
|
|
123
|
+
for(let i = 0; i < atomicCodes!.length; i++)
|
|
124
|
+
result.push(getMacroMol(atomicCodes![i]));
|
|
125
|
+
|
|
126
|
+
df.columns.add(DG.Column.fromStrings('regenerated', result));
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
|
|
109
130
|
//top-menu: Bio | MSA...
|
|
110
131
|
//name: MSA
|
|
111
132
|
//input: dataframe table
|
|
@@ -94,6 +94,7 @@ MWRSWY-CKHP
|
|
|
94
94
|
msaComplex = 'MsaComplex',
|
|
95
95
|
idCsv = 'IdCsv',
|
|
96
96
|
sarSmallCsv = 'SarSmallCsv',
|
|
97
|
+
HelmCsv = 'HelmCsv',
|
|
97
98
|
}
|
|
98
99
|
|
|
99
100
|
const samples: { [key: string]: string } = {
|
|
@@ -102,6 +103,7 @@ MWRSWY-CKHP
|
|
|
102
103
|
'MsaComplex': 'System:AppData/Bio/samples/sample_MSA.csv',
|
|
103
104
|
'IdCsv': 'System:AppData/Bio/samples/id.csv',
|
|
104
105
|
'SarSmallCsv': 'System:AppData/Bio/samples/sar-small.csv',
|
|
106
|
+
'HelmCsv': 'System:AppData/Bio/samples/sample_HELM.csv',
|
|
105
107
|
};
|
|
106
108
|
|
|
107
109
|
const _samplesDfs: { [key: string]: Promise<DG.DataFrame> } = {};
|
|
@@ -194,6 +196,14 @@ MWRSWY-CKHP
|
|
|
194
196
|
test('samplesSarSmallCsvNegativeSmiles', async () => {
|
|
195
197
|
await _testNeg(readSamplesCsv(Samples.sarSmallCsv), 'smiles');
|
|
196
198
|
});
|
|
199
|
+
|
|
200
|
+
test('samplesHelmCsvHELM', async () => {
|
|
201
|
+
await _testPos(readSamplesCsv(Samples.HelmCsv), 'HELM', 'HELM', null);
|
|
202
|
+
});
|
|
203
|
+
|
|
204
|
+
test('samplesHelmCsvNegativeActivity', async () => {
|
|
205
|
+
await _testNeg(readSamplesCsv(Samples.HelmCsv), 'Activity');
|
|
206
|
+
});
|
|
197
207
|
});
|
|
198
208
|
|
|
199
209
|
export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
@@ -203,7 +213,7 @@ export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
|
203
213
|
expect(col.semType === DG.SEMTYPE.MACROMOLECULE, false);
|
|
204
214
|
}
|
|
205
215
|
|
|
206
|
-
export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, separator: string) {
|
|
216
|
+
export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, separator: string | null = null) {
|
|
207
217
|
const df: DG.DataFrame = await readDf();
|
|
208
218
|
|
|
209
219
|
const col: DG.Column = df.col(colName)!;
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
import * as OCL from 'openchemlib/full.js';
|
|
2
|
+
|
|
3
|
+
export function getMacroMol(molBlocks: string[]): string {
|
|
4
|
+
for (let i = 0; i < molBlocks.length; i++)
|
|
5
|
+
molBlocks[i] = rotateBackboneV3000(molBlocks[i]);
|
|
6
|
+
|
|
7
|
+
return linkV3000(molBlocks);
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
function rotateBackboneV3000(molecule: string): string {
|
|
11
|
+
let molBlock = OCL.Molecule.fromMolfile(molecule).toMolfileV3();
|
|
12
|
+
const coordinates = extractAtomDataV3000(molBlock);
|
|
13
|
+
const natom = coordinates.atomIndex.length;
|
|
14
|
+
|
|
15
|
+
const indexFivePrime = coordinates.atomIndex.indexOf(1);
|
|
16
|
+
const indexThreePrime = coordinates.atomIndex.indexOf(natom);
|
|
17
|
+
|
|
18
|
+
const xCenter = (coordinates.x[indexThreePrime] + coordinates.x[indexFivePrime])/2;
|
|
19
|
+
const yCenter = (coordinates.y[indexThreePrime] + coordinates.y[indexFivePrime])/2;
|
|
20
|
+
|
|
21
|
+
//place to center
|
|
22
|
+
for (let i = 0; i < natom; i++) {
|
|
23
|
+
coordinates.x[i] -= xCenter;
|
|
24
|
+
coordinates.y[i] -= yCenter;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
let angle = 0;
|
|
28
|
+
if (coordinates.x[indexFivePrime] == 0)
|
|
29
|
+
angle = coordinates.y[indexFivePrime] > coordinates.y[indexThreePrime] ? Math.PI/2 : 3*Math.PI/2;
|
|
30
|
+
else if (coordinates.y[indexFivePrime] == 0)
|
|
31
|
+
angle = coordinates.x[indexFivePrime] > coordinates.x[indexThreePrime] ? Math.PI : 0;
|
|
32
|
+
else {
|
|
33
|
+
const derivative = coordinates.y[indexFivePrime]/coordinates.x[indexFivePrime];
|
|
34
|
+
angle = derivative > 0 ? Math.PI - Math.atan(derivative) : Math.atan(derivative);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const cos = Math.cos(angle);
|
|
38
|
+
const sin = Math.sin(angle);
|
|
39
|
+
|
|
40
|
+
for (let i = 0; i < natom; i++) {
|
|
41
|
+
const xAdd = coordinates.x[i];
|
|
42
|
+
coordinates.x[i] = xAdd*cos - coordinates.y[i]*sin;
|
|
43
|
+
coordinates.y[i] = xAdd*sin + coordinates.y[i]*cos;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
//place to right
|
|
47
|
+
const xShift = coordinates.x[indexFivePrime];
|
|
48
|
+
for (let i = 0; i < natom; i++)
|
|
49
|
+
coordinates.x[i] -= xShift;
|
|
50
|
+
|
|
51
|
+
//rewrite molBlock
|
|
52
|
+
let index = molBlock.indexOf('M V30 BEGIN ATOM'); // V3000 index for atoms coordinates
|
|
53
|
+
index = molBlock.indexOf('\n', index);
|
|
54
|
+
let indexEnd = index;
|
|
55
|
+
for (let i = 0; i < natom; i++) {
|
|
56
|
+
index = molBlock.indexOf('V30', index) + 4;
|
|
57
|
+
index = molBlock.indexOf(' ', index) + 1;
|
|
58
|
+
index = molBlock.indexOf(' ', index) + 1;
|
|
59
|
+
indexEnd = molBlock.indexOf(' ', index) + 1;
|
|
60
|
+
indexEnd = molBlock.indexOf(' ', indexEnd);
|
|
61
|
+
|
|
62
|
+
molBlock = molBlock.slice(0, index) +
|
|
63
|
+
coordinates.x[i] + ' ' + coordinates.y[i] +
|
|
64
|
+
molBlock.slice(indexEnd);
|
|
65
|
+
|
|
66
|
+
index = molBlock.indexOf('\n', index) + 1;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return molBlock;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function extractAtomDataV3000(molBlock: string) {
|
|
73
|
+
const numbers = extractAtomsBondsNumbersV3000(molBlock);
|
|
74
|
+
let index = molBlock.indexOf('M V30 BEGIN ATOM'); // V3000 index for atoms coordinates
|
|
75
|
+
index = molBlock.indexOf('\n', index);
|
|
76
|
+
let indexEnd = index;
|
|
77
|
+
|
|
78
|
+
const indexes: number[] = Array(numbers.natom);
|
|
79
|
+
const types: string[] = Array(numbers.natom);
|
|
80
|
+
const x: number[] = Array(numbers.natom);
|
|
81
|
+
const y: number[] = Array(numbers.natom);
|
|
82
|
+
|
|
83
|
+
for (let i = 0; i < numbers.natom; i++) {
|
|
84
|
+
index = molBlock.indexOf('V30', index) + 4;
|
|
85
|
+
indexEnd = molBlock.indexOf(' ', index);
|
|
86
|
+
indexes[i] = parseInt(molBlock.substring(index, indexEnd));
|
|
87
|
+
|
|
88
|
+
index = indexEnd + 1;
|
|
89
|
+
indexEnd = molBlock.indexOf(' ', index);
|
|
90
|
+
types[i] = molBlock.substring(index, indexEnd);
|
|
91
|
+
|
|
92
|
+
index = indexEnd + 1;
|
|
93
|
+
indexEnd = molBlock.indexOf(' ', index);
|
|
94
|
+
x[i] = parseFloat(molBlock.substring(index, indexEnd));
|
|
95
|
+
|
|
96
|
+
index = indexEnd + 1;
|
|
97
|
+
indexEnd = molBlock.indexOf(' ', index);
|
|
98
|
+
y[i] = parseFloat(molBlock.substring(index, indexEnd));
|
|
99
|
+
|
|
100
|
+
index = molBlock.indexOf('\n', index) + 1;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return {atomIndex: indexes, atomType: types, x: x, y: y};
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function linkV3000(molBlocks: string[]): string {
|
|
107
|
+
let macroMolBlock = '\nDatagrok macromolecule handler\n\n';
|
|
108
|
+
macroMolBlock += ' 0 0 0 0 0 0 999 V3000\n';
|
|
109
|
+
macroMolBlock += 'M V30 BEGIN CTAB\n';
|
|
110
|
+
let atomBlock = '';
|
|
111
|
+
let bondBlock = '';
|
|
112
|
+
let collectionBlock = '';
|
|
113
|
+
const collection: number [] = [];
|
|
114
|
+
let natom = 0;
|
|
115
|
+
let nbond = 0;
|
|
116
|
+
let xShift = 0;
|
|
117
|
+
|
|
118
|
+
for (let i = 0; i < molBlocks.length; i++) {
|
|
119
|
+
molBlocks[i] = molBlocks[i].replaceAll('(-\nM V30 ', '(')
|
|
120
|
+
.replaceAll('-\nM V30 ', '').replaceAll(' )', ')');
|
|
121
|
+
const numbers = extractAtomsBondsNumbersV3000(molBlocks[i]);
|
|
122
|
+
const coordinates = extractAtomDataV3000(molBlocks[i]);
|
|
123
|
+
|
|
124
|
+
let indexAtoms = molBlocks[i].indexOf('M V30 BEGIN ATOM'); // V3000 index for atoms coordinates
|
|
125
|
+
indexAtoms = molBlocks[i].indexOf('\n', indexAtoms);
|
|
126
|
+
let index = indexAtoms;
|
|
127
|
+
let indexEnd = indexAtoms;
|
|
128
|
+
|
|
129
|
+
for (let j = 0; j < numbers.natom; j++) {
|
|
130
|
+
if (coordinates.atomIndex[j] != 1 || i == 0) {
|
|
131
|
+
//rewrite atom number
|
|
132
|
+
index = molBlocks[i].indexOf('V30', index) + 4;
|
|
133
|
+
indexEnd = molBlocks[i].indexOf(' ', index);
|
|
134
|
+
const atomNumber = parseInt(molBlocks[i].substring(index, indexEnd)) + natom;
|
|
135
|
+
molBlocks[i] = molBlocks[i].slice(0, index) + atomNumber + molBlocks[i].slice(indexEnd);
|
|
136
|
+
|
|
137
|
+
//rewrite coordinates
|
|
138
|
+
index = molBlocks[i].indexOf(' ', index) + 1;
|
|
139
|
+
index = molBlocks[i].indexOf(' ', index) + 1;
|
|
140
|
+
indexEnd = molBlocks[i].indexOf(' ', index);
|
|
141
|
+
|
|
142
|
+
const totalShift = xShift - coordinates.x[0];
|
|
143
|
+
let coordinate = Math.round(10000*(parseFloat(molBlocks[i].substring(index, indexEnd)) + totalShift))/10000;
|
|
144
|
+
molBlocks[i] = molBlocks[i].slice(0, index) + coordinate + molBlocks[i].slice(indexEnd);
|
|
145
|
+
|
|
146
|
+
index = molBlocks[i].indexOf(' ', index) + 1;
|
|
147
|
+
indexEnd = molBlocks[i].indexOf(' ', index);
|
|
148
|
+
coordinate = Math.round(10000*(parseFloat(molBlocks[i].substring(index, indexEnd))))/10000;
|
|
149
|
+
molBlocks[i] = molBlocks[i].slice(0, index) + coordinate + molBlocks[i].slice(indexEnd);
|
|
150
|
+
|
|
151
|
+
index = molBlocks[i].indexOf('\n', index) + 1;
|
|
152
|
+
} else {
|
|
153
|
+
index = molBlocks[i].indexOf('M V30', index) - 1;
|
|
154
|
+
indexEnd = molBlocks[i].indexOf('\n', index + 1);
|
|
155
|
+
molBlocks[i] = molBlocks[i].slice(0, index) + molBlocks[i].slice(indexEnd);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
const indexAtomsEnd = molBlocks[i].indexOf('M V30 END ATOM');
|
|
160
|
+
atomBlock += molBlocks[i].substring(indexAtoms + 1, indexAtomsEnd);
|
|
161
|
+
|
|
162
|
+
let indexBonds = molBlocks[i].indexOf('M V30 BEGIN BOND'); // V3000 index for bonds
|
|
163
|
+
indexBonds = molBlocks[i].indexOf('\n', indexBonds);
|
|
164
|
+
index = indexBonds;
|
|
165
|
+
indexEnd = indexBonds;
|
|
166
|
+
|
|
167
|
+
for (let j = 0; j < numbers.nbond; j++) {
|
|
168
|
+
//rewrite bond number
|
|
169
|
+
index = molBlocks[i].indexOf('V30', index) + 4;
|
|
170
|
+
indexEnd = molBlocks[i].indexOf(' ', index);
|
|
171
|
+
const bondNumber = parseInt(molBlocks[i].substring(index, indexEnd)) + nbond;
|
|
172
|
+
molBlocks[i] = molBlocks[i].slice(0, index) + bondNumber + molBlocks[i].slice(indexEnd);
|
|
173
|
+
|
|
174
|
+
//rewrite atom pair in bond
|
|
175
|
+
index = molBlocks[i].indexOf(' ', index) + 1;
|
|
176
|
+
index = molBlocks[i].indexOf(' ', index) + 1;
|
|
177
|
+
indexEnd = molBlocks[i].indexOf(' ', index);
|
|
178
|
+
let atomNumber = parseInt(molBlocks[i].substring(index, indexEnd)) + natom;
|
|
179
|
+
molBlocks[i] = molBlocks[i].slice(0, index) + atomNumber + molBlocks[i].slice(indexEnd);
|
|
180
|
+
index = molBlocks[i].indexOf(' ', index) + 1;
|
|
181
|
+
indexEnd = Math.min(molBlocks[i].indexOf('\n', index), molBlocks[i].indexOf(' ', index));
|
|
182
|
+
atomNumber = parseInt(molBlocks[i].substring(index, indexEnd)) + natom;
|
|
183
|
+
molBlocks[i] = molBlocks[i].slice(0, index) + atomNumber + molBlocks[i].slice(indexEnd);
|
|
184
|
+
|
|
185
|
+
index = molBlocks[i].indexOf('\n', index) + 1;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
const indexBondEnd = molBlocks[i].indexOf('M V30 END BOND');
|
|
189
|
+
bondBlock += molBlocks[i].substring(indexBonds + 1, indexBondEnd);
|
|
190
|
+
|
|
191
|
+
let indexCollection = molBlocks[i].indexOf('M V30 MDLV30/STEABS ATOMS=('); // V3000 index for collections
|
|
192
|
+
|
|
193
|
+
while (indexCollection != -1) {
|
|
194
|
+
indexCollection += 28;
|
|
195
|
+
const collectionEnd = molBlocks[i].indexOf(')', indexCollection);
|
|
196
|
+
const collectionEntries = molBlocks[i].substring(indexCollection, collectionEnd).split(' ').slice(1);
|
|
197
|
+
collectionEntries.forEach((e) => {
|
|
198
|
+
collection.push(parseInt(e) + natom);
|
|
199
|
+
});
|
|
200
|
+
indexCollection = collectionEnd;
|
|
201
|
+
indexCollection = molBlocks[i].indexOf('M V30 MDLV30/STEABS ATOMS=(', indexCollection);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
natom += numbers.natom - 1;
|
|
205
|
+
nbond += numbers.nbond;
|
|
206
|
+
xShift += coordinates.x[numbers.natom - 1] - coordinates.x[0];
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
const entries = 4;
|
|
210
|
+
const collNumber = Math.ceil(collection.length / entries);
|
|
211
|
+
collectionBlock += 'M V30 MDLV30/STEABS ATOMS=(' + collection.length + ' -\n';
|
|
212
|
+
for (let i = 0; i < collNumber; i++) {
|
|
213
|
+
collectionBlock += 'M V30 ';
|
|
214
|
+
const entriesCurrent = i + 1 == collNumber ? collection.length - (collNumber - 1)*entries : entries;
|
|
215
|
+
for (let j = 0; j < entriesCurrent; j++) {
|
|
216
|
+
collectionBlock += (j + 1 == entriesCurrent) ?
|
|
217
|
+
(i == collNumber - 1 ? collection[entries*i + j] + ')\n' : collection[entries*i + j] + ' -\n') :
|
|
218
|
+
collection[entries*i + j] + ' ';
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
//generate file
|
|
223
|
+
natom++;
|
|
224
|
+
macroMolBlock += 'M V30 COUNTS ' + natom + ' ' + nbond + ' 0 0 0\n';
|
|
225
|
+
macroMolBlock += 'M V30 BEGIN ATOM\n';
|
|
226
|
+
macroMolBlock += atomBlock;
|
|
227
|
+
macroMolBlock += 'M V30 END ATOM\n';
|
|
228
|
+
macroMolBlock += 'M V30 BEGIN BOND\n';
|
|
229
|
+
macroMolBlock += bondBlock;
|
|
230
|
+
macroMolBlock += 'M V30 END BOND\n';
|
|
231
|
+
macroMolBlock += 'M V30 BEGIN COLLECTION\n';
|
|
232
|
+
macroMolBlock += collectionBlock;
|
|
233
|
+
macroMolBlock += 'M V30 END COLLECTION\n';
|
|
234
|
+
macroMolBlock += 'M V30 END CTAB\n';
|
|
235
|
+
macroMolBlock += 'M END\n';
|
|
236
|
+
|
|
237
|
+
return macroMolBlock;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
function extractAtomsBondsNumbersV3000(molBlock: string): {natom: number, nbond: number} {
|
|
241
|
+
molBlock = molBlock.replaceAll('\r', ''); //equalize old and new sdf standards
|
|
242
|
+
let index = molBlock.indexOf('COUNTS') + 7; // V3000 index for atoms and bonds number
|
|
243
|
+
let indexEnd = molBlock.indexOf(' ', index);
|
|
244
|
+
|
|
245
|
+
const atomsNumber = parseInt(molBlock.substring(index, indexEnd));
|
|
246
|
+
index = indexEnd + 1;
|
|
247
|
+
indexEnd = molBlock.indexOf(' ', index);
|
|
248
|
+
const bondsNumber = parseInt(molBlock.substring(index, indexEnd));
|
|
249
|
+
|
|
250
|
+
return {natom: atomsNumber, nbond: bondsNumber};
|
|
251
|
+
}
|
|
252
|
+
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import { WebLogo, SplitterFunc } from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
3
|
+
import * as grok from 'datagrok-api/grok';
|
|
4
|
+
|
|
5
|
+
export const HELM_CORE_LIB_MONOMER_COL = 'symbol';
|
|
6
|
+
export const HELM_CORE_LIB_MOLFILE_COL = 'molfile';
|
|
7
|
+
export const HELM_CORE_LIB_FILENAME = '/samples/HELMCoreLibrary.json';
|
|
8
|
+
|
|
9
|
+
export function getMolfilesFromSeq(col: DG.Column, monomersLib: DG.DataFrame): string[][] | null {
|
|
10
|
+
const units = col.tags[DG.TAGS.UNITS];
|
|
11
|
+
const sep = col.getTag('separator');
|
|
12
|
+
const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, sep);
|
|
13
|
+
const monomersDict = createMomomersMolDict(monomersLib);
|
|
14
|
+
const molFiles = [];
|
|
15
|
+
for (let i = 0; i < col.length; ++i) {
|
|
16
|
+
const monomers = splitterFunc(col.get(i));
|
|
17
|
+
const molFilesForSeq = [];
|
|
18
|
+
for (let j = 0; j < monomers.length; ++j) {
|
|
19
|
+
if (monomers[j]) {
|
|
20
|
+
if (!monomersDict[monomers[j]]) {
|
|
21
|
+
grok.shell.warning(`Monomer ${monomers[j]} is missing in HELM library. Structure cannot be created`);
|
|
22
|
+
return null;
|
|
23
|
+
}
|
|
24
|
+
molFilesForSeq.push(monomersDict[monomers[j]])
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
molFiles.push(molFilesForSeq);
|
|
28
|
+
}
|
|
29
|
+
return molFiles;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function createMomomersMolDict(lib: DG.DataFrame): {[key: string]: string} {
|
|
33
|
+
const dict: {[key: string]: string} = {};
|
|
34
|
+
const monmersCol = lib.col(HELM_CORE_LIB_MONOMER_COL);
|
|
35
|
+
const molCol = lib.col(HELM_CORE_LIB_MOLFILE_COL);
|
|
36
|
+
for (let i = 0; i < lib.rowCount; ++i) {
|
|
37
|
+
dict[monmersCol!.get(i)] = molCol!.get(i);
|
|
38
|
+
}
|
|
39
|
+
return dict;
|
|
40
|
+
}
|
package/tsconfig.json
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
// "incremental": true, /* Enable incremental compilation */
|
|
7
7
|
"target": "es6", /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019', 'ES2020', or 'ESNEXT'. */
|
|
8
8
|
"module": "es2020", /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', 'es2020', or 'ESNext'. */
|
|
9
|
-
"lib": ["es2020", "dom"], /* Specify library files to be included in the compilation. */
|
|
9
|
+
"lib": ["es2020", "dom", "ES2021.String"], /* Specify library files to be included in the compilation. */
|
|
10
10
|
// "allowJs": true, /* Allow javascript files to be compiled. */
|
|
11
11
|
// "checkJs": true, /* Report errors in .js files. */
|
|
12
12
|
// "jsx": "preserve", /* Specify JSX code generation: 'preserve', 'react-native', 'react', 'react-jsx' or 'react-jsxdev'. */
|