@datagrok/bio 2.0.25 → 2.0.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,6 +14,7 @@ import {Subject, Subscription} from 'rxjs';
14
14
  import * as C from '../utils/constants';
15
15
  import {updateDivInnerHTML} from '../utils/ui-utils';
16
16
  import {NOTATION} from '@datagrok-libraries/bio';
17
+ import { delay } from '@datagrok-libraries/utils/src/test';
17
18
 
18
19
  export class BioSubstructureFilter extends DG.Filter {
19
20
  bioFilter: FastaFilter | SeparatorFilter | HelmFilter | null = null;
@@ -102,9 +103,7 @@ export class BioSubstructureFilter extends DG.Filter {
102
103
  } else {
103
104
  this.calculating = true;
104
105
  try {
105
- this.bitset = this.notation === NOTATION.HELM ?
106
- await helmSubstructureSearch(this.bioFilter!.substructure, this.column!) :
107
- linearSubstructureSearch(this.bioFilter!.substructure, this.column!);
106
+ this.bitset = await this.bioFilter?.substrucrureSearch(this.column!)!;
108
107
  this.calculating = false;
109
108
  this.dataFrame?.rows.requestFilter();
110
109
  } finally {
@@ -127,6 +126,10 @@ abstract class BioFilterBase {
127
126
 
128
127
  set substructure(s: string) {
129
128
  }
129
+
130
+ async substrucrureSearch(column: DG.Column): Promise<DG.BitSet | null> {
131
+ return null;
132
+ }
130
133
  }
131
134
 
132
135
  class FastaFilter extends BioFilterBase {
@@ -149,9 +152,13 @@ class FastaFilter extends BioFilterBase {
149
152
  set substructure(s: string) {
150
153
  this.substructureInput.value = s;
151
154
  }
155
+
156
+ async substrucrureSearch(column: DG.Column): Promise<DG.BitSet | null> {
157
+ return await linearSubstructureSearch(this.substructure, column);
158
+ }
152
159
  }
153
160
 
154
- class SeparatorFilter extends FastaFilter {
161
+ export class SeparatorFilter extends FastaFilter {
155
162
  separatorInput: DG.InputBase<string> = ui.stringInput('', '', () => {
156
163
  this.onChanged.next();
157
164
  }, {placeholder: 'Separator'});
@@ -179,9 +186,13 @@ class SeparatorFilter extends FastaFilter {
179
186
  set substructure(s: string) {
180
187
  this.substructureInput.value = s;
181
188
  }
189
+
190
+ async substrucrureSearch(column: DG.Column): Promise<DG.BitSet | null> {
191
+ return await linearSubstructureSearch(this.substructure, column, this.colSeparator);
192
+ }
182
193
  }
183
194
 
184
- class HelmFilter extends BioFilterBase {
195
+ export class HelmFilter extends BioFilterBase {
185
196
  helmEditor: any;
186
197
  _filterPanel = ui.div('', {style: {cursor: 'pointer'}});
187
198
  helmSubstructure = '';
@@ -202,9 +213,9 @@ class HelmFilter extends BioFilterBase {
202
213
  .onOK(() => {
203
214
  const helmString = this.helmEditor
204
215
  .webEditor.canvas.getHelm(true).replace(/<\/span>/g, '').replace(/<span style='background:#bbf;'>/g, '');
205
- this.updateFilterPanel(helmString);
206
216
  this.helmSubstructure = helmString;
207
- this.onChanged.next();
217
+ this.updateFilterPanel(this.substructure);
218
+ setTimeout(() => { this.onChanged.next(); }, 10);
208
219
  }).show({modal: true, fullScreen: true});
209
220
  });
210
221
  ui.onSizeChanged(this._filterPanel).subscribe((_) => {
@@ -247,4 +258,12 @@ class HelmFilter extends BioFilterBase {
247
258
  this.helmEditor.resizeEditor(width, height);
248
259
  }
249
260
  }
261
+
262
+ async substrucrureSearch(column: DG.Column): Promise<DG.BitSet | null> {
263
+ ui.setUpdateIndicator(this._filterPanel, true);
264
+ await delay(10);
265
+ const res = await helmSubstructureSearch(this.substructure, column);
266
+ ui.setUpdateIndicator(this._filterPanel, false);
267
+ return res;
268
+ }
250
269
  }
@@ -1,8 +1,9 @@
1
1
  import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
- import {getMolfilesFromSingleSeq, HELM_CORE_LIB_FILENAME} from '../utils/utils';
5
- import {getMacroMol} from '../utils/atomic-works';
4
+ import {getMolfilesFromSingleSeq} from '@datagrok-libraries/bio/src/utils/monomer-utils';
5
+ import {HELM_CORE_LIB_FILENAME} from '@datagrok-libraries/bio/src/utils/const';
6
+ import {getMacroMol} from '@datagrok-libraries/bio/src/utils/atomic-works';
6
7
 
7
8
  /**
8
9
  * @export
package/src/const.ts DELETED
@@ -1,30 +0,0 @@
1
- import * as ui from 'datagrok-api/ui';
2
- import * as grok from 'datagrok-api/grok';
3
- import * as DG from 'datagrok-api/dg';
4
-
5
- export const jsonSdfMonomerLibDict = {
6
- 'monomerType': null,
7
- 'smiles': null,
8
- 'name': 'MonomerName',
9
- 'author': null,
10
- 'molfile': 'molecule',
11
- 'naturalAnalog': 'MonomerNaturalAnalogCode',
12
- 'rgroups': 'MonomerCaps',
13
- 'createDate': null,
14
- 'id': null,
15
- 'polymerType': 'MonomerType',
16
- 'symbol': 'MonomerCode'
17
- };
18
-
19
- export const RGROUP_FIELD = 'rgroups';
20
- export const CAP_GROUP_SMILES = 'capGroupSmiles';
21
- export const RGROUP_ALTER_ID = 'alternateId';
22
- export const CAP_GROUP_NAME = 'capGroupName';
23
- export const RGROUP_LABEL = 'label';
24
- export const MONOMER_SYMBOL = 'symbol';
25
- export const SDF_MONOMER_NAME = 'MonomerName';
26
-
27
- // range of hex nubers used in PepSea library to endode monomers
28
- export const MONOMER_ENCODE_MIN = 0x100;
29
- export const MONOMER_ENCODE_MAX = 0x40A;
30
-
@@ -1,199 +0,0 @@
1
- export type MonomerEntry = {
2
- mol: string,
3
- type: string,
4
- analogueCode: string,
5
- linkages: { [link: string]: { atomNumber: number, type: string } }
6
- };
7
- export type MonomerEntries = { [name: string]: MonomerEntry };
8
- export type LinkData = { [link: string]: { atomNumber: number, type: string } };
9
-
10
- /** HELM associated sdf libraries with monomer processing*/
11
- export class MonomerLibrary {
12
- static libName = 'monomerLibrary';
13
-
14
- private monomerFields: string[] = [
15
- 'molecule', 'MonomerType', 'MonomerNaturalAnalogCode', 'MonomerName', 'MonomerCode', 'MonomerCaps', 'BranchMonomer',
16
- ];
17
-
18
- private library: MonomerEntries = {};
19
-
20
- private monomers: string[] = [];
21
-
22
- constructor(sdf: string) {
23
- const sdfReader = new SDFReader();
24
- const data = sdfReader.getColls(sdf);
25
- this.monomerFields.forEach((f) => {
26
- if (!(f in data))
27
- throw new Error(`Monomer library was not compiled: ${f} field is absent in provided file`);
28
-
29
- if (data[f].length != data.molecule.length)
30
- throw new Error(`Monomer library was not compiled: ${f} field is not presented for each monomer`);
31
- });
32
-
33
- for (let i = 0; i < data.molecule.length; i++) {
34
- const linkData = this.getLinkData(data.molecule[i], data.MonomerCaps[i], data.MonomerName[i]);
35
- const entry = {
36
- mol: data.molecule[i],
37
- type: 'Peptide',
38
- code: data.MonomerCode[i],
39
- analogueCode: data.MonomerNaturalAnalogCode[i],
40
- linkages: linkData,
41
- };
42
-
43
- const name = data.MonomerCode[i] !== '.' ? data.MonomerCode[i] : data.MonomerName[i];
44
- this.library[name] = entry;
45
- this.monomers.push(name);
46
- }
47
- }
48
-
49
- /** getting full monomer information from monomer library
50
- * @param {string} name
51
- * @return {MonomerEntry}
52
- */
53
- public getMonomerEntry(name: string): MonomerEntry {
54
- if (!this.monomers.includes(name))
55
- throw new Error(`Monomer library do not contain ${name} monomer`);
56
-
57
- return this.library[name];
58
- }
59
-
60
- /** getting mol as string for monomer
61
- * @param {string} name
62
- * @return {string}
63
- */
64
- public getMonomerMol(name: string): string {
65
- if (!this.monomers.includes(name))
66
- throw new Error(`Monomer library do not contain ${name} monomer`);
67
-
68
-
69
- const entry = this.library[name];
70
- let monomerMol = entry.mol.replace(/M RGP .+\n/, '');
71
-
72
- //order matters
73
- const links = Object.keys(entry.linkages);
74
- for (const link of links)
75
- monomerMol = monomerMol.replace('R#', entry.linkages[link].type + ' ');
76
-
77
-
78
- return monomerMol;
79
- }
80
-
81
- /** getting the list of the minomers available in library*/
82
- get monomerNames(): string[] {
83
- return this.monomers;
84
- }
85
-
86
- static get id(): string {
87
- return MonomerLibrary.libName;
88
- }
89
-
90
- private getLinkData(mol: string, caps: string, name: string): LinkData {
91
- const rawData = mol.match(/M RGP .+/);
92
- if (rawData === null)
93
- throw new Error(`Monomer library was not compiled: ${name} entry has no RGP`);
94
-
95
- const types: { [code: string]: string } = {};
96
- caps.split('\n')?.forEach((e) => {
97
- types[e.match(/\d+/)![0]] = e.match(/(?<=\])\w+/)![0];
98
- });
99
-
100
- const data = rawData[0].replace('M RGP ', '').split(/\s+/);
101
- const res: LinkData = {};
102
- for (let i = 0; i < parseInt(data[0]); i++) {
103
- const code = parseInt(data[2 * i + 2]);
104
- let type = '';
105
- switch (code) {
106
- case 1:
107
- type = 'N-terminal';
108
- break;
109
- case 2:
110
- type = 'C-terminal';
111
- break;
112
- case 3:
113
- type = 'branch';
114
- break;
115
- default:
116
- break;
117
- }
118
- res[type] = {atomNumber: parseInt(data[2 * i + 1]), type: types[code]};
119
- }
120
-
121
- return res;
122
- }
123
- }
124
-
125
- //TODO: merge with Chem version
126
- class SDFReader {
127
- dataColls: { [_: string]: string [] };
128
-
129
- constructor() {
130
- this.dataColls = {'molecule': []};
131
- }
132
-
133
- getColls(content: string): { [_: string]: string[] } {
134
- this.read(content);
135
- return this.dataColls;
136
- }
137
-
138
- read(content: string): void {
139
- content = content.replaceAll('\r', ''); //equalize old and new sdf standards
140
- let startIndex = content.indexOf('$$$$', 0);
141
- this.parse(content, 0, startIndex, (name: string, val: string): void => { // TODO: type
142
- this.dataColls[name] = [];
143
- this.dataColls[name].push(val);
144
- });
145
- startIndex += 5;
146
- while (startIndex > -1 && startIndex < content.length)
147
- startIndex = this.readNext(content, startIndex);
148
- }
149
-
150
- readNext(content: string, startIndex: number): number {
151
- const nextStartIndex = content.indexOf('$$$$', startIndex);
152
- if (nextStartIndex === -1) {
153
- return -1;
154
- } else {
155
- this.parse(content, startIndex, nextStartIndex,
156
- (name: string, val: string): void => {
157
- this.dataColls[name].push(val);
158
- });
159
- }
160
-
161
- if (nextStartIndex > -1)
162
- return nextStartIndex + 5;
163
-
164
-
165
- return nextStartIndex;
166
- }
167
-
168
- parse(content: string, start: number, end: number, handler: (name: string, val: string) => void): void {
169
- const molEnd = +content.indexOf('M END\n', start) + 7;
170
- let localEnd = start;
171
- this.dataColls['molecule'].push(content.substring(start, molEnd));
172
-
173
- start = molEnd;
174
- while (localEnd < end) {
175
- start = content.indexOf('> <', localEnd);
176
- if (start === -1)
177
- return;
178
-
179
-
180
- start += 3;
181
- localEnd = content.indexOf('>\n', start);
182
- if (localEnd === -1)
183
- return;
184
-
185
-
186
- const propertyName = content.substring(start, localEnd);
187
- start = localEnd + 2;
188
-
189
- localEnd = content.indexOf('\n', start);
190
- if (localEnd === -1)
191
- localEnd = end;
192
- else if (content[localEnd + 1] != '\n')
193
- localEnd = content.indexOf('\n', localEnd + 1);
194
-
195
- handler(propertyName, content.substring(start, localEnd));
196
- localEnd += 2;
197
- }
198
- }
199
- }
@@ -1,135 +0,0 @@
1
- import * as grok from 'datagrok-api/grok';
2
- import * as ui from 'datagrok-api/ui';
3
- import * as DG from 'datagrok-api/dg';
4
- import * as bio from '@datagrok-libraries/bio';
5
-
6
- import {
7
- CAP_GROUP_NAME, CAP_GROUP_SMILES, jsonSdfMonomerLibDict, MONOMER_ENCODE_MAX, MONOMER_ENCODE_MIN, MONOMER_SYMBOL,
8
- RGROUP_ALTER_ID, RGROUP_FIELD, RGROUP_LABEL, SDF_MONOMER_NAME
9
- } from '../const';
10
- import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
11
-
12
- export const HELM_CORE_LIB_FILENAME = '/data/HELMCoreLibrary.json';
13
- export const HELM_CORE_LIB_MONOMER_SYMBOL = 'symbol';
14
- export const HELM_CORE_LIB_MOLFILE = 'molfile';
15
- export const HELM_CORE_FIELDS = ['symbol', 'molfile', 'rgroups', 'name'];
16
-
17
-
18
- export function encodeMonomers(col: DG.Column): DG.Column | null {
19
- let encodeSymbol = MONOMER_ENCODE_MIN;
20
- const monomerSymbolDict: { [key: string]: number } = {};
21
- const units = col.tags[DG.TAGS.UNITS];
22
- const sep = col.getTag(bio.TAGS.separator);
23
- const splitterFunc: bio.SplitterFunc = bio.getSplitter(units, sep);
24
- const encodedStringArray = [];
25
- for (let i = 0; i < col.length; ++i) {
26
- let encodedMonomerStr = '';
27
- const monomers = splitterFunc(col.get(i));
28
- monomers.forEach((m) => {
29
- if (!monomerSymbolDict[m]) {
30
- if (encodeSymbol > MONOMER_ENCODE_MAX) {
31
- grok.shell.error(`Not enough symbols to encode monomers`);
32
- return null;
33
- }
34
- monomerSymbolDict[m] = encodeSymbol;
35
- encodeSymbol++;
36
- }
37
- encodedMonomerStr += String.fromCodePoint(monomerSymbolDict[m]);
38
- });
39
- encodedStringArray.push(encodedMonomerStr);
40
- }
41
- return DG.Column.fromStrings('encodedMolecules', encodedStringArray);
42
- }
43
-
44
- export function getMolfilesFromSeq(col: DG.Column, monomersLibObject: any[]): any[][] | null {
45
- const units = col.tags[DG.TAGS.UNITS];
46
- const sep = col.getTag('separator');
47
- const splitterFunc: bio.SplitterFunc = bio.getSplitter(units, sep);
48
- const monomersDict = createMomomersMolDict(monomersLibObject);
49
- const molFiles = [];
50
- for (let i = 0; i < col.length; ++i) {
51
- const macroMolecule = col.get(i);
52
- const monomers = splitterFunc(macroMolecule);
53
- const molFilesForSeq = [];
54
- for (let j = 0; j < monomers.length; ++j) {
55
- if (monomers[j]) {
56
- if (!monomersDict[monomers[j]]) {
57
- grok.shell.warning(`Monomer ${monomers[j]} is missing in HELM library. Structure cannot be created`);
58
- return null;
59
- }
60
- molFilesForSeq.push(JSON.parse(JSON.stringify(monomersDict[monomers[j]])));
61
- }
62
- }
63
- molFiles.push(molFilesForSeq);
64
- }
65
- return molFiles;
66
- }
67
-
68
- export function getMolfilesFromSingleSeq(cell: DG.Cell, monomersLibObject: any[]): any[][] | null {
69
- const units = cell.column.tags[DG.TAGS.UNITS];
70
- const sep = cell.column!.getTag('separator');
71
- const splitterFunc: bio.SplitterFunc = bio.getSplitter(units, sep);
72
- const monomersDict = createMomomersMolDict(monomersLibObject);
73
- const molFiles = [];
74
- const macroMolecule = cell.value;
75
- const monomers = splitterFunc(macroMolecule);
76
- const molFilesForSeq = [];
77
- for (let j = 0; j < monomers.length; ++j) {
78
- if (monomers[j]) {
79
- if (!monomersDict[monomers[j]]) {
80
- grok.shell.warning(`Monomer ${monomers[j]} is missing in HELM library. Structure cannot be created`);
81
- return null;
82
- }
83
- molFilesForSeq.push(JSON.parse(JSON.stringify(monomersDict[monomers[j]])));
84
- }
85
- }
86
- molFiles.push(molFilesForSeq);
87
- return molFiles;
88
- }
89
-
90
- export function createMomomersMolDict(lib: any[]): { [key: string]: string | any } {
91
- const dict: { [key: string]: string | any } = {};
92
- lib.forEach((it) => {
93
- if (it['polymerType'] === 'PEPTIDE') {
94
- const monomerObject: { [key: string]: any } = {};
95
- HELM_CORE_FIELDS.forEach((field) => {
96
- monomerObject[field] = it[field];
97
- });
98
- dict[it[HELM_CORE_LIB_MONOMER_SYMBOL]] = monomerObject;
99
- }
100
- });
101
- return dict;
102
- }
103
-
104
-
105
- export function createJsonMonomerLibFromSdf(table: DG.DataFrame): any {
106
- const resultLib = [];
107
- for (let i = 0; i < table.rowCount; i++) {
108
- const monomer: { [key: string]: string | any } = {};
109
- Object.keys(jsonSdfMonomerLibDict).forEach((key) => {
110
- if (key === MONOMER_SYMBOL) {
111
- const monomerSymbol = table.get(jsonSdfMonomerLibDict[key], i);
112
- monomer[key] = monomerSymbol === '.' ? table.get(SDF_MONOMER_NAME, i) : monomerSymbol;
113
- } else if (key === RGROUP_FIELD) {
114
- const rgroups = table.get(jsonSdfMonomerLibDict[key], i).split('\n');
115
- const jsonRgroups: any[] = [];
116
- rgroups.forEach((g: string) => {
117
- const rgroup: { [key: string]: string | any } = {};
118
- const altAtom = g.substring(g.lastIndexOf(']') + 1);
119
- const radicalNum = g.match(/\[R(\d+)\]/)![1];
120
- rgroup[CAP_GROUP_SMILES] = altAtom === 'H' ? `[*:${radicalNum}][H]` : `O[*:${radicalNum}]`;
121
- rgroup[RGROUP_ALTER_ID] = altAtom === 'H' ? `R${radicalNum}-H` : `R${radicalNum}-OH`;
122
- rgroup[CAP_GROUP_NAME] = altAtom === 'H' ? `H` : `OH`;
123
- rgroup[RGROUP_LABEL] = `R${radicalNum}`;
124
- jsonRgroups.push(rgroup);
125
- });
126
- monomer[key] = jsonRgroups;
127
- } else {
128
- if ((jsonSdfMonomerLibDict as { [key: string]: string | any })[key])
129
- monomer[key] = table.get((jsonSdfMonomerLibDict as { [key: string]: string | any })[key], i);
130
- }
131
- });
132
- resultLib.push(monomer);
133
- }
134
- return resultLib;
135
- }