@datagrok/bio 1.5.4 → 1.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "@datagrok/bio",
3
3
  "beta": false,
4
4
  "friendlyName": "Bio",
5
- "version": "1.5.4",
5
+ "version": "1.5.7",
6
6
  "description": "Bio is a [package](https://datagrok.ai/help/develop/develop#packages) for the [Datagrok](https://datagrok.ai) platform",
7
7
  "repository": {
8
8
  "type": "git",
@@ -11,14 +11,15 @@
11
11
  },
12
12
  "dependencies": {
13
13
  "@biowasm/aioli": ">=2.4.0",
14
- "@datagrok-libraries/bio": "^2.3.1",
14
+ "@datagrok-libraries/bio": "^2.4.1",
15
15
  "@datagrok-libraries/utils": "^1.0.0",
16
- "@datagrok-libraries/ml": "^2.0.8",
16
+ "@datagrok-libraries/ml": "^2.0.9",
17
17
  "cash-dom": "latest",
18
18
  "datagrok-api": "^1.4.12",
19
19
  "dayjs": "latest",
20
20
  "ts-loader": "^9.2.5",
21
- "typescript": "^4.4.2"
21
+ "typescript": "^4.4.2",
22
+ "openchemlib": "6.0.1"
22
23
  },
23
24
  "devDependencies": {
24
25
  "@types/jest": "^27.0.0",
package/src/package.ts CHANGED
@@ -16,6 +16,8 @@ import {getEmbeddingColsNames, sequenceSpace} from './utils/sequence-space';
16
16
  import {AvailableMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
17
17
  import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
18
18
  import {sequenceGetSimilarities, drawTooltip} from './utils/sequence-activity-cliffs';
19
+ import { getMolfilesFromSeq, HELM_CORE_LIB_FILENAME } from './utils/utils';
20
+ import {getMacroMol} from './utils/atomic-works';
19
21
 
20
22
  //name: sequenceAlignment
21
23
  //input: string alignType {choices: ['Local alignment', 'Global alignment']}
@@ -50,7 +52,7 @@ export function vdRegionViewer() {
50
52
  //top-menu: Bio | Sequence Activity Cliffs...
51
53
  //name: Sequence Activity Cliffs
52
54
  //description: detect activity cliffs
53
- //input: dataframe df [Input data table]
55
+ //input: dataframe table [Input data table]
54
56
  //input: column sequence {semType: Macromolecule}
55
57
  //input: column activities
56
58
  //input: double similarity = 80 [Similarity cutoff]
@@ -66,6 +68,7 @@ export async function activityCliffs(df: DG.DataFrame, sequence: DG.Column, acti
66
68
  df,
67
69
  sequence,
68
70
  axesNames,
71
+ 'Activity cliffs',
69
72
  activities,
70
73
  similarity,
71
74
  'Levenshtein',
@@ -101,11 +104,29 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
101
104
  if (plotEmbeddings) {
102
105
  for (const v of grok.shell.views) {
103
106
  if (v.name === table.name)
104
- (v as DG.TableView).scatterPlot({x: embedColsNames[0], y: embedColsNames[1]});
107
+ (v as DG.TableView).scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
105
108
  }
106
109
  }
107
110
  };
108
111
 
112
+ //top-menu: Bio | Molfiles From HELM...
113
+ //name: Molfiles From HELM
114
+ //description: returns molfiles for each monomer from HELM library
115
+ //input: dataframe df [Input data table]
116
+ //input: column sequence {semType: Macromolecule}
117
+ export async function molfilesFromHELM(df: DG.DataFrame, sequence: DG.Column): Promise<void> {
118
+ const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
119
+ const monomersLibDf = DG.DataFrame.fromJson(monomersLibFile);
120
+ const atomicCodes = getMolfilesFromSeq(sequence, monomersLibDf);
121
+
122
+ let result: string[] = [];
123
+ for(let i = 0; i < atomicCodes!.length; i++)
124
+ result.push(getMacroMol(atomicCodes![i]));
125
+
126
+ df.columns.add(DG.Column.fromStrings('regenerated', result));
127
+ }
128
+
129
+
109
130
  //top-menu: Bio | MSA...
110
131
  //name: MSA
111
132
  //input: dataframe table
@@ -26,6 +26,7 @@ category('activityCliffs', async () => {
26
26
  actCliffsDf,
27
27
  actCliffsDf.col('MSA')!,
28
28
  axesNames,
29
+ 'Activity cliffs',
29
30
  actCliffsDf.col('Activity')!,
30
31
  50,
31
32
  'Levenshtein',
@@ -94,6 +94,7 @@ MWRSWY-CKHP
94
94
  msaComplex = 'MsaComplex',
95
95
  idCsv = 'IdCsv',
96
96
  sarSmallCsv = 'SarSmallCsv',
97
+ HelmCsv = 'HelmCsv',
97
98
  }
98
99
 
99
100
  const samples: { [key: string]: string } = {
@@ -102,6 +103,7 @@ MWRSWY-CKHP
102
103
  'MsaComplex': 'System:AppData/Bio/samples/sample_MSA.csv',
103
104
  'IdCsv': 'System:AppData/Bio/samples/id.csv',
104
105
  'SarSmallCsv': 'System:AppData/Bio/samples/sar-small.csv',
106
+ 'HelmCsv': 'System:AppData/Bio/samples/sample_HELM.csv',
105
107
  };
106
108
 
107
109
  const _samplesDfs: { [key: string]: Promise<DG.DataFrame> } = {};
@@ -194,6 +196,14 @@ MWRSWY-CKHP
194
196
  test('samplesSarSmallCsvNegativeSmiles', async () => {
195
197
  await _testNeg(readSamplesCsv(Samples.sarSmallCsv), 'smiles');
196
198
  });
199
+
200
+ test('samplesHelmCsvHELM', async () => {
201
+ await _testPos(readSamplesCsv(Samples.HelmCsv), 'HELM', 'HELM', null);
202
+ });
203
+
204
+ test('samplesHelmCsvNegativeActivity', async () => {
205
+ await _testNeg(readSamplesCsv(Samples.HelmCsv), 'Activity');
206
+ });
197
207
  });
198
208
 
199
209
  export async function _testNeg(readDf: DfReaderFunc, colName: string) {
@@ -203,7 +213,7 @@ export async function _testNeg(readDf: DfReaderFunc, colName: string) {
203
213
  expect(col.semType === DG.SEMTYPE.MACROMOLECULE, false);
204
214
  }
205
215
 
206
- export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, separator: string) {
216
+ export async function _testPos(readDf: DfReaderFunc, colName: string, units: string, separator: string | null = null) {
207
217
  const df: DG.DataFrame = await readDf();
208
218
 
209
219
  const col: DG.Column = df.col(colName)!;
@@ -0,0 +1,252 @@
1
+ import * as OCL from 'openchemlib/full.js';
2
+
3
+ export function getMacroMol(molBlocks: string[]): string {
4
+ for (let i = 0; i < molBlocks.length; i++)
5
+ molBlocks[i] = rotateBackboneV3000(molBlocks[i]);
6
+
7
+ return linkV3000(molBlocks);
8
+ }
9
+
10
+ function rotateBackboneV3000(molecule: string): string {
11
+ let molBlock = OCL.Molecule.fromMolfile(molecule).toMolfileV3();
12
+ const coordinates = extractAtomDataV3000(molBlock);
13
+ const natom = coordinates.atomIndex.length;
14
+
15
+ const indexFivePrime = coordinates.atomIndex.indexOf(1);
16
+ const indexThreePrime = coordinates.atomIndex.indexOf(natom);
17
+
18
+ const xCenter = (coordinates.x[indexThreePrime] + coordinates.x[indexFivePrime])/2;
19
+ const yCenter = (coordinates.y[indexThreePrime] + coordinates.y[indexFivePrime])/2;
20
+
21
+ //place to center
22
+ for (let i = 0; i < natom; i++) {
23
+ coordinates.x[i] -= xCenter;
24
+ coordinates.y[i] -= yCenter;
25
+ }
26
+
27
+ let angle = 0;
28
+ if (coordinates.x[indexFivePrime] == 0)
29
+ angle = coordinates.y[indexFivePrime] > coordinates.y[indexThreePrime] ? Math.PI/2 : 3*Math.PI/2;
30
+ else if (coordinates.y[indexFivePrime] == 0)
31
+ angle = coordinates.x[indexFivePrime] > coordinates.x[indexThreePrime] ? Math.PI : 0;
32
+ else {
33
+ const derivative = coordinates.y[indexFivePrime]/coordinates.x[indexFivePrime];
34
+ angle = derivative > 0 ? Math.PI - Math.atan(derivative) : Math.atan(derivative);
35
+ }
36
+
37
+ const cos = Math.cos(angle);
38
+ const sin = Math.sin(angle);
39
+
40
+ for (let i = 0; i < natom; i++) {
41
+ const xAdd = coordinates.x[i];
42
+ coordinates.x[i] = xAdd*cos - coordinates.y[i]*sin;
43
+ coordinates.y[i] = xAdd*sin + coordinates.y[i]*cos;
44
+ }
45
+
46
+ //place to right
47
+ const xShift = coordinates.x[indexFivePrime];
48
+ for (let i = 0; i < natom; i++)
49
+ coordinates.x[i] -= xShift;
50
+
51
+ //rewrite molBlock
52
+ let index = molBlock.indexOf('M V30 BEGIN ATOM'); // V3000 index for atoms coordinates
53
+ index = molBlock.indexOf('\n', index);
54
+ let indexEnd = index;
55
+ for (let i = 0; i < natom; i++) {
56
+ index = molBlock.indexOf('V30', index) + 4;
57
+ index = molBlock.indexOf(' ', index) + 1;
58
+ index = molBlock.indexOf(' ', index) + 1;
59
+ indexEnd = molBlock.indexOf(' ', index) + 1;
60
+ indexEnd = molBlock.indexOf(' ', indexEnd);
61
+
62
+ molBlock = molBlock.slice(0, index) +
63
+ coordinates.x[i] + ' ' + coordinates.y[i] +
64
+ molBlock.slice(indexEnd);
65
+
66
+ index = molBlock.indexOf('\n', index) + 1;
67
+ }
68
+
69
+ return molBlock;
70
+ }
71
+
72
+ function extractAtomDataV3000(molBlock: string) {
73
+ const numbers = extractAtomsBondsNumbersV3000(molBlock);
74
+ let index = molBlock.indexOf('M V30 BEGIN ATOM'); // V3000 index for atoms coordinates
75
+ index = molBlock.indexOf('\n', index);
76
+ let indexEnd = index;
77
+
78
+ const indexes: number[] = Array(numbers.natom);
79
+ const types: string[] = Array(numbers.natom);
80
+ const x: number[] = Array(numbers.natom);
81
+ const y: number[] = Array(numbers.natom);
82
+
83
+ for (let i = 0; i < numbers.natom; i++) {
84
+ index = molBlock.indexOf('V30', index) + 4;
85
+ indexEnd = molBlock.indexOf(' ', index);
86
+ indexes[i] = parseInt(molBlock.substring(index, indexEnd));
87
+
88
+ index = indexEnd + 1;
89
+ indexEnd = molBlock.indexOf(' ', index);
90
+ types[i] = molBlock.substring(index, indexEnd);
91
+
92
+ index = indexEnd + 1;
93
+ indexEnd = molBlock.indexOf(' ', index);
94
+ x[i] = parseFloat(molBlock.substring(index, indexEnd));
95
+
96
+ index = indexEnd + 1;
97
+ indexEnd = molBlock.indexOf(' ', index);
98
+ y[i] = parseFloat(molBlock.substring(index, indexEnd));
99
+
100
+ index = molBlock.indexOf('\n', index) + 1;
101
+ }
102
+
103
+ return {atomIndex: indexes, atomType: types, x: x, y: y};
104
+ }
105
+
106
+ function linkV3000(molBlocks: string[]): string {
107
+ let macroMolBlock = '\nDatagrok macromolecule handler\n\n';
108
+ macroMolBlock += ' 0 0 0 0 0 0 999 V3000\n';
109
+ macroMolBlock += 'M V30 BEGIN CTAB\n';
110
+ let atomBlock = '';
111
+ let bondBlock = '';
112
+ let collectionBlock = '';
113
+ const collection: number [] = [];
114
+ let natom = 0;
115
+ let nbond = 0;
116
+ let xShift = 0;
117
+
118
+ for (let i = 0; i < molBlocks.length; i++) {
119
+ molBlocks[i] = molBlocks[i].replaceAll('(-\nM V30 ', '(')
120
+ .replaceAll('-\nM V30 ', '').replaceAll(' )', ')');
121
+ const numbers = extractAtomsBondsNumbersV3000(molBlocks[i]);
122
+ const coordinates = extractAtomDataV3000(molBlocks[i]);
123
+
124
+ let indexAtoms = molBlocks[i].indexOf('M V30 BEGIN ATOM'); // V3000 index for atoms coordinates
125
+ indexAtoms = molBlocks[i].indexOf('\n', indexAtoms);
126
+ let index = indexAtoms;
127
+ let indexEnd = indexAtoms;
128
+
129
+ for (let j = 0; j < numbers.natom; j++) {
130
+ if (coordinates.atomIndex[j] != 1 || i == 0) {
131
+ //rewrite atom number
132
+ index = molBlocks[i].indexOf('V30', index) + 4;
133
+ indexEnd = molBlocks[i].indexOf(' ', index);
134
+ const atomNumber = parseInt(molBlocks[i].substring(index, indexEnd)) + natom;
135
+ molBlocks[i] = molBlocks[i].slice(0, index) + atomNumber + molBlocks[i].slice(indexEnd);
136
+
137
+ //rewrite coordinates
138
+ index = molBlocks[i].indexOf(' ', index) + 1;
139
+ index = molBlocks[i].indexOf(' ', index) + 1;
140
+ indexEnd = molBlocks[i].indexOf(' ', index);
141
+
142
+ const totalShift = xShift - coordinates.x[0];
143
+ let coordinate = Math.round(10000*(parseFloat(molBlocks[i].substring(index, indexEnd)) + totalShift))/10000;
144
+ molBlocks[i] = molBlocks[i].slice(0, index) + coordinate + molBlocks[i].slice(indexEnd);
145
+
146
+ index = molBlocks[i].indexOf(' ', index) + 1;
147
+ indexEnd = molBlocks[i].indexOf(' ', index);
148
+ coordinate = Math.round(10000*(parseFloat(molBlocks[i].substring(index, indexEnd))))/10000;
149
+ molBlocks[i] = molBlocks[i].slice(0, index) + coordinate + molBlocks[i].slice(indexEnd);
150
+
151
+ index = molBlocks[i].indexOf('\n', index) + 1;
152
+ } else {
153
+ index = molBlocks[i].indexOf('M V30', index) - 1;
154
+ indexEnd = molBlocks[i].indexOf('\n', index + 1);
155
+ molBlocks[i] = molBlocks[i].slice(0, index) + molBlocks[i].slice(indexEnd);
156
+ }
157
+ }
158
+
159
+ const indexAtomsEnd = molBlocks[i].indexOf('M V30 END ATOM');
160
+ atomBlock += molBlocks[i].substring(indexAtoms + 1, indexAtomsEnd);
161
+
162
+ let indexBonds = molBlocks[i].indexOf('M V30 BEGIN BOND'); // V3000 index for bonds
163
+ indexBonds = molBlocks[i].indexOf('\n', indexBonds);
164
+ index = indexBonds;
165
+ indexEnd = indexBonds;
166
+
167
+ for (let j = 0; j < numbers.nbond; j++) {
168
+ //rewrite bond number
169
+ index = molBlocks[i].indexOf('V30', index) + 4;
170
+ indexEnd = molBlocks[i].indexOf(' ', index);
171
+ const bondNumber = parseInt(molBlocks[i].substring(index, indexEnd)) + nbond;
172
+ molBlocks[i] = molBlocks[i].slice(0, index) + bondNumber + molBlocks[i].slice(indexEnd);
173
+
174
+ //rewrite atom pair in bond
175
+ index = molBlocks[i].indexOf(' ', index) + 1;
176
+ index = molBlocks[i].indexOf(' ', index) + 1;
177
+ indexEnd = molBlocks[i].indexOf(' ', index);
178
+ let atomNumber = parseInt(molBlocks[i].substring(index, indexEnd)) + natom;
179
+ molBlocks[i] = molBlocks[i].slice(0, index) + atomNumber + molBlocks[i].slice(indexEnd);
180
+ index = molBlocks[i].indexOf(' ', index) + 1;
181
+ indexEnd = Math.min(molBlocks[i].indexOf('\n', index), molBlocks[i].indexOf(' ', index));
182
+ atomNumber = parseInt(molBlocks[i].substring(index, indexEnd)) + natom;
183
+ molBlocks[i] = molBlocks[i].slice(0, index) + atomNumber + molBlocks[i].slice(indexEnd);
184
+
185
+ index = molBlocks[i].indexOf('\n', index) + 1;
186
+ }
187
+
188
+ const indexBondEnd = molBlocks[i].indexOf('M V30 END BOND');
189
+ bondBlock += molBlocks[i].substring(indexBonds + 1, indexBondEnd);
190
+
191
+ let indexCollection = molBlocks[i].indexOf('M V30 MDLV30/STEABS ATOMS=('); // V3000 index for collections
192
+
193
+ while (indexCollection != -1) {
194
+ indexCollection += 28;
195
+ const collectionEnd = molBlocks[i].indexOf(')', indexCollection);
196
+ const collectionEntries = molBlocks[i].substring(indexCollection, collectionEnd).split(' ').slice(1);
197
+ collectionEntries.forEach((e) => {
198
+ collection.push(parseInt(e) + natom);
199
+ });
200
+ indexCollection = collectionEnd;
201
+ indexCollection = molBlocks[i].indexOf('M V30 MDLV30/STEABS ATOMS=(', indexCollection);
202
+ }
203
+
204
+ natom += numbers.natom - 1;
205
+ nbond += numbers.nbond;
206
+ xShift += coordinates.x[numbers.natom - 1] - coordinates.x[0];
207
+ }
208
+
209
+ const entries = 4;
210
+ const collNumber = Math.ceil(collection.length / entries);
211
+ collectionBlock += 'M V30 MDLV30/STEABS ATOMS=(' + collection.length + ' -\n';
212
+ for (let i = 0; i < collNumber; i++) {
213
+ collectionBlock += 'M V30 ';
214
+ const entriesCurrent = i + 1 == collNumber ? collection.length - (collNumber - 1)*entries : entries;
215
+ for (let j = 0; j < entriesCurrent; j++) {
216
+ collectionBlock += (j + 1 == entriesCurrent) ?
217
+ (i == collNumber - 1 ? collection[entries*i + j] + ')\n' : collection[entries*i + j] + ' -\n') :
218
+ collection[entries*i + j] + ' ';
219
+ }
220
+ }
221
+
222
+ //generate file
223
+ natom++;
224
+ macroMolBlock += 'M V30 COUNTS ' + natom + ' ' + nbond + ' 0 0 0\n';
225
+ macroMolBlock += 'M V30 BEGIN ATOM\n';
226
+ macroMolBlock += atomBlock;
227
+ macroMolBlock += 'M V30 END ATOM\n';
228
+ macroMolBlock += 'M V30 BEGIN BOND\n';
229
+ macroMolBlock += bondBlock;
230
+ macroMolBlock += 'M V30 END BOND\n';
231
+ macroMolBlock += 'M V30 BEGIN COLLECTION\n';
232
+ macroMolBlock += collectionBlock;
233
+ macroMolBlock += 'M V30 END COLLECTION\n';
234
+ macroMolBlock += 'M V30 END CTAB\n';
235
+ macroMolBlock += 'M END\n';
236
+
237
+ return macroMolBlock;
238
+ }
239
+
240
+ function extractAtomsBondsNumbersV3000(molBlock: string): {natom: number, nbond: number} {
241
+ molBlock = molBlock.replaceAll('\r', ''); //equalize old and new sdf standards
242
+ let index = molBlock.indexOf('COUNTS') + 7; // V3000 index for atoms and bonds number
243
+ let indexEnd = molBlock.indexOf(' ', index);
244
+
245
+ const atomsNumber = parseInt(molBlock.substring(index, indexEnd));
246
+ index = indexEnd + 1;
247
+ indexEnd = molBlock.indexOf(' ', index);
248
+ const bondsNumber = parseInt(molBlock.substring(index, indexEnd));
249
+
250
+ return {natom: atomsNumber, nbond: bondsNumber};
251
+ }
252
+
@@ -0,0 +1,40 @@
1
+ import * as DG from 'datagrok-api/dg';
2
+ import { WebLogo, SplitterFunc } from '@datagrok-libraries/bio/src/viewers/web-logo';
3
+ import * as grok from 'datagrok-api/grok';
4
+
5
+ export const HELM_CORE_LIB_MONOMER_COL = 'symbol';
6
+ export const HELM_CORE_LIB_MOLFILE_COL = 'molfile';
7
+ export const HELM_CORE_LIB_FILENAME = '/samples/HELMCoreLibrary.json';
8
+
9
+ export function getMolfilesFromSeq(col: DG.Column, monomersLib: DG.DataFrame): string[][] | null {
10
+ const units = col.tags[DG.TAGS.UNITS];
11
+ const sep = col.getTag('separator');
12
+ const splitterFunc: SplitterFunc = WebLogo.getSplitter(units, sep);
13
+ const monomersDict = createMomomersMolDict(monomersLib);
14
+ const molFiles = [];
15
+ for (let i = 0; i < col.length; ++i) {
16
+ const monomers = splitterFunc(col.get(i));
17
+ const molFilesForSeq = [];
18
+ for (let j = 0; j < monomers.length; ++j) {
19
+ if (monomers[j]) {
20
+ if (!monomersDict[monomers[j]]) {
21
+ grok.shell.warning(`Monomer ${monomers[j]} is missing in HELM library. Structure cannot be created`);
22
+ return null;
23
+ }
24
+ molFilesForSeq.push(monomersDict[monomers[j]])
25
+ }
26
+ }
27
+ molFiles.push(molFilesForSeq);
28
+ }
29
+ return molFiles;
30
+ }
31
+
32
+ export function createMomomersMolDict(lib: DG.DataFrame): {[key: string]: string} {
33
+ const dict: {[key: string]: string} = {};
34
+ const monmersCol = lib.col(HELM_CORE_LIB_MONOMER_COL);
35
+ const molCol = lib.col(HELM_CORE_LIB_MOLFILE_COL);
36
+ for (let i = 0; i < lib.rowCount; ++i) {
37
+ dict[monmersCol!.get(i)] = molCol!.get(i);
38
+ }
39
+ return dict;
40
+ }
package/tsconfig.json CHANGED
@@ -6,7 +6,7 @@
6
6
  // "incremental": true, /* Enable incremental compilation */
7
7
  "target": "es6", /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019', 'ES2020', or 'ESNEXT'. */
8
8
  "module": "es2020", /* Specify module code generation: 'none', 'commonjs', 'amd', 'system', 'umd', 'es2015', 'es2020', or 'ESNext'. */
9
- "lib": ["es2020", "dom"], /* Specify library files to be included in the compilation. */
9
+ "lib": ["es2020", "dom", "ES2021.String"], /* Specify library files to be included in the compilation. */
10
10
  // "allowJs": true, /* Allow javascript files to be compiled. */
11
11
  // "checkJs": true, /* Report errors in .js files. */
12
12
  // "jsx": "preserve", /* Specify JSX code generation: 'preserve', 'react-native', 'react', 'react-jsx' or 'react-jsxdev'. */