@datagrok/sequence-translator 1.4.7 → 1.4.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,183 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+
3
+ import wu from 'wu';
4
+ import {PolymerTypes} from '@datagrok-libraries/bio/src/helm/consts';
5
+ import {getMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
6
+ import {IMonomerLib, IMonomerLibBase, Monomer, MonomerLibData, RGroup} from '@datagrok-libraries/bio/src/types';
7
+ import {RDModule, RDMol, RDReaction, MolList, RDReactionResult} from '@datagrok-libraries/chem-meta/src/rdkit-api';
8
+ import {HELM_REQUIRED_FIELD as REQ,
9
+ HELM_OPTIONAL_FIELDS as OPT, HELM_RGROUP_FIELDS} from '@datagrok-libraries/bio/src/utils/const';
10
+ import {getRdKitModule} from '@datagrok-libraries/bio/src/chem/rdkit-module';
11
+ import {Rules, RuleReaction} from '../pt-rules';
12
+ import {InvalidReactionError, MonomerNotFoundError} from '../types';
13
+ import {errInfo} from '@datagrok-libraries/bio/src/utils/err-info';
14
+
15
+ /** Gets 0-based in-index (simple polymer) of out-index (continuous) {@link idx} */
16
+ export function getInnerIdx(outIdx: number, monomers: string[][]): [number, number] {
17
+ // let prevSpCount = 0;
18
+ // for (let spI = 0; spI < monomers.length && idx >= (prevSpCount + monomers[spI].length); ++spI)
19
+ // prevSpCount += monomers[spI].length;
20
+ // return idx - prevSpCount;
21
+ let inIdx = outIdx;
22
+ let spIdx: number;
23
+ for (spIdx = 0; spIdx < monomers.length && inIdx >= monomers[spIdx].length; ++spIdx)
24
+ inIdx -= monomers[spIdx].length;
25
+ return [inIdx, spIdx];
26
+ }
27
+
28
+ /** Gets 0-based out-index of 0-based in-index {@link inIdx} monomer of simple polymer {@link spIdx} */
29
+ export function getOuterIdx(inIdx: number, spIdx: number, monomers: string[][]): number {
30
+ let outIdx = 0;
31
+ for (let i = 0; i < spIdx; ++i)
32
+ outIdx += monomers[i].length;
33
+ return outIdx + inIdx;
34
+ }
35
+
36
+ function getMonomersMolBlocks(monomer1: Monomer, monomer2: Monomer): [string, string] {
37
+ const mb1 = monomer1.molfile;
38
+ let mb2 = monomer2.molfile;
39
+ const addGroups = monomer1.rgroups.length;
40
+
41
+ //mol v2000 monomer
42
+ const rgpIdx = mb2.indexOf('M RGP');
43
+ if (rgpIdx !== -1) {
44
+ const groupsCountStr = mb2.substring(rgpIdx + 6, rgpIdx + 9);
45
+ const groupsCount = Number(groupsCountStr);
46
+
47
+ for (let i = 0; i < groupsCount; i++) {
48
+ const start = rgpIdx + 9 + 4 + i * 8;
49
+ const end = rgpIdx + 9 + 8 + i * 8;
50
+ const rGroupSpecifier = mb2.substring(start, end);
51
+ const groupPosition = Number(rGroupSpecifier) + addGroups;
52
+ const digits = Math.floor(Math.log10(groupPosition) + 1);
53
+ const newSpecifier = ' '.repeat(4 - digits) + String(groupPosition);
54
+ mb2 = mb2.substring(0, start) + newSpecifier + mb2.substring(end, mb2.length);
55
+ }
56
+ }
57
+
58
+ //TODO: same for v3000 monomer
59
+
60
+ return [mb1, mb2];
61
+ }
62
+
63
+ function getSyntheticMolBlock(rdkit: RDModule, reaction: string,
64
+ mb1: string, mb2: string, monomerName: string): string {
65
+ let rxn: RDReaction | null = null;
66
+ let mols: MolList | null = null;
67
+ let mol1: RDMol | null = null;
68
+ let mol2: RDMol | null = null;
69
+ let rctns: RDReactionResult | null = null;
70
+ let molP: RDMol | null = null;
71
+ let molBlock = '';
72
+
73
+ try {
74
+ rxn = rdkit.get_rxn(reaction);
75
+ if (!rxn) throw new InvalidReactionError(reaction);
76
+ mols = new rdkit.MolList();
77
+ mol1 = rdkit.get_mol(mb1!);
78
+ mol2 = rdkit.get_mol(mb2!);
79
+ mols.append(mol1!);
80
+ mols.append(mol2!);
81
+
82
+ rctns = rxn.run_reactants(mols, 1);
83
+ //const size = rctns.size();
84
+ const element = rctns.get(0);
85
+
86
+ molP = element.next();
87
+ molBlock = molP?.get_molblock();//molP?.get_v3Kmolblock();//
88
+ } catch (err: any) {
89
+ const [errMsg, _errStack] = errInfo(err);
90
+ grok.shell.error(`Can not assemble monomer '${monomerName}': ${errMsg}.`);
91
+ throw err;
92
+ } finally {
93
+ rxn?.delete();
94
+ mols?.delete();
95
+ mol1?.delete();
96
+ mol2?.delete();
97
+ rctns?.delete();
98
+ molP?.delete();
99
+ }
100
+
101
+ return molBlock;
102
+ }
103
+
104
+ function getNewGroups(monomer1: Monomer, monomer2: Monomer): RGroup[] {
105
+ const groups = new Array<RGroup>(monomer1?.rgroups.length! + monomer2?.rgroups.length!);
106
+ const length1 = monomer1?.rgroups.length!;
107
+ const length2 = monomer2?.rgroups.length!;
108
+
109
+ for (let i = 0; i < length1; i++)
110
+ groups[i] = monomer1?.rgroups[i]!;
111
+
112
+ for (let i = 0; i < length2; i++) {
113
+ const rGroupSpecifier = monomer2?.rgroups[i]!.label.replace('R', '');
114
+ const groupPosition = Number(rGroupSpecifier) + length1;
115
+ const group: RGroup = {
116
+ //@ts-ignore
117
+ [HELM_RGROUP_FIELDS.CAP_GROUP_SMILES_UPPERCASE]: monomer2?.rgroups[i].capGroupSMILES
118
+ .replace(rGroupSpecifier, String(groupPosition)),
119
+ [HELM_RGROUP_FIELDS.ALTERNATE_ID]: monomer2?.rgroups[i].alternateId
120
+ .replace(rGroupSpecifier, String(groupPosition)),
121
+ [HELM_RGROUP_FIELDS.CAP_GROUP_NAME]: monomer2?.rgroups[i].capGroupName,
122
+ [HELM_RGROUP_FIELDS.LABEL]: monomer2?.rgroups[i].label.replace(rGroupSpecifier, String(groupPosition)),
123
+ };
124
+
125
+ groups[i + length1] = group;
126
+ }
127
+
128
+ return groups;
129
+ }
130
+
131
+ export function getNewMonomer(rdkit: RDModule, mLib: IMonomerLib, rule: RuleReaction): [string, Monomer] {
132
+ const reacSmarts = rule.reaction;
133
+ const monomerName = rule.name;
134
+
135
+ const monomer1 = mLib.getMonomer('PEPTIDE', rule.firstMonomer);
136
+ if (!monomer1) throw new MonomerNotFoundError('PEPTIDE', rule.firstMonomer);
137
+ const monomer2 = mLib.getMonomer('PEPTIDE', rule.secondMonomer);
138
+ if (!monomer2) throw new MonomerNotFoundError('PEPTIDE', rule.secondMonomer);
139
+
140
+ const [mb1, mb2] = getMonomersMolBlocks(monomer1!, monomer2!);
141
+ const molBlock = getSyntheticMolBlock(rdkit, reacSmarts, mb1, mb2, monomerName);
142
+ const groups: RGroup[] = getNewGroups(monomer1!, monomer2!);
143
+
144
+ const resMonomer: Monomer = {
145
+ [REQ.SYMBOL]: monomerName,
146
+ [REQ.NAME]: monomerName,
147
+ [REQ.MOLFILE]: molBlock,
148
+ [REQ.AUTHOR]: '',
149
+ [REQ.ID]: 0,
150
+ [REQ.RGROUPS]: groups,
151
+ [REQ.SMILES]: '',
152
+ [REQ.POLYMER_TYPE]: 'PEPTIDE',
153
+ [REQ.MONOMER_TYPE]: 'Backbone',
154
+ [REQ.CREATE_DATE]: null,
155
+
156
+ // // @ts-ignore
157
+ // lib: {source: 'Reaction'},
158
+ };
159
+
160
+ resMonomer[OPT.META] = Object.assign(resMonomer[OPT.META] ?? {},
161
+ {'colors': {'default': {line: '#2083D5', text: '#2083D5', background: '#F2F2F5'}}});
162
+
163
+ return [monomerName, resMonomer];
164
+ }
165
+
166
+ export async function getOverriddenLibrary(rules: Rules): Promise<IMonomerLibBase> {
167
+ const monomerLibHelper = await getMonomerLibHelper();
168
+ const systemMonomerLib = monomerLibHelper.getMonomerLib();
169
+
170
+ const rdkit = await getRdKitModule();
171
+ const argLib: { [symbol: string]: Monomer } = {};
172
+
173
+ for (let i = 0; i < rules.reactionRules.length; i++) {
174
+ const [name, monomer] = getNewMonomer(rdkit, systemMonomerLib, rules.reactionRules[i]);
175
+ argLib[name] = monomer;
176
+ }
177
+
178
+ const overrideMonomerLibData: MonomerLibData = {[PolymerTypes.PEPTIDE]: argLib};
179
+ const overriddenMonomerLib = systemMonomerLib.override(overrideMonomerLibData,
180
+ 'ST-PT-reactions.' + wu.repeat(1).map(() => Math.floor((Math.random() * 36))
181
+ .toString(36)).take(4).toArray().join(''));
182
+ return overriddenMonomerLib;
183
+ }
@@ -7,25 +7,38 @@ import {Unsubscribable} from 'rxjs';
7
7
 
8
8
  import {getHelmHelper} from '@datagrok-libraries/bio/src/helm/helm-helper';
9
9
  import {errInfo} from '@datagrok-libraries/bio/src/utils/err-info';
10
- import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
10
+ import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
11
11
  import {getSeqHelper, ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
12
12
  import {MmcrTemps} from '@datagrok-libraries/bio/src/utils/cell-renderer-consts';
13
- import {buildMonomerHoverLink} from '@datagrok-libraries/bio/src/monomer-works/monomer-hover';
13
+ import {addMonomerHoverLink, buildMonomerHoverLink} from '@datagrok-libraries/bio/src/monomer-works/monomer-hover';
14
14
  import {getRdKitModule} from '@datagrok-libraries/bio/src/chem/rdkit-module';
15
15
  import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
16
16
 
17
- import {getRules, RuleInputs, Rules, RULES_PATH, RULES_STORAGE_NAME} from './pt-rules';
18
- import {doPolyToolConvert, getOverriddenLibrary} from './pt-conversion';
17
+ import {getRules, RuleInputs, RULES_PATH, RULES_STORAGE_NAME} from './pt-rules';
18
+ import {doPolyToolConvert} from './conversion/pt-conversion';
19
+ import {getOverriddenLibrary} from './conversion/pt-misc';
19
20
  import {defaultErrorHandler} from '../utils/err-info';
20
21
  import {getLibrariesList} from './utils';
21
22
  import {getEnumerationChem, PT_CHEM_EXAMPLE} from './pt-enumeration-chem';
22
23
 
23
24
  import {
24
25
  PT_ERROR_DATAFRAME, PT_UI_ADD_HELM, PT_UI_DIALOG_CONVERSION, PT_UI_DIALOG_ENUMERATION,
25
- PT_UI_GET_HELM, PT_UI_RULES_USED, PT_UI_USE_CHIRALITY, PT_WARNING_COLUMN
26
+ PT_UI_GET_HELM, PT_UI_HIGHLIGHT_MONOMERS, PT_UI_RULES_USED, PT_UI_USE_CHIRALITY
26
27
  } from './const';
27
28
 
28
29
  import {_package} from '../package';
30
+ import {IMonomerLibBase} from '@datagrok-libraries/bio/src/types/index';
31
+ import {MonomerHoverLink} from '@datagrok-libraries/bio/src/monomer-works/utils';
32
+ import {MonomerMap} from '@datagrok-libraries/bio/src/monomer-works/types';
33
+ import {ISeqMonomer} from '@datagrok-libraries/bio/src/helm/types';
34
+ import wu from 'wu';
35
+ import {PolymerTypes} from '@datagrok-libraries/js-draw-lite/src/types/org';
36
+ import {getMonomersDictFromLib} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
37
+ import {monomerSeqToMolfile} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level-utils';
38
+ import {LRUCache} from 'lru-cache';
39
+ import {getMonomerHover, ISubstruct, setMonomerHover} from '@datagrok-libraries/chem-meta/src/types';
40
+ import {getMolHighlight} from '@datagrok-libraries/bio/src/monomer-works/seq-to-molfile';
41
+ import {ChemTags} from '@datagrok-libraries/chem-meta/src/consts';
29
42
 
30
43
  type PolyToolConvertSerialized = {
31
44
  generateHelm: boolean;
@@ -87,6 +100,7 @@ export async function getPolyToolConvertDialog(srcCol?: DG.Column): Promise<DG.D
87
100
  ui.tooltip.bind(generateHelmInput.root, PT_UI_ADD_HELM);
88
101
 
89
102
  const chiralityEngineInput = ui.input.bool(PT_UI_USE_CHIRALITY, {value: false});
103
+ const highlightMonomersInput = ui.input.bool(PT_UI_HIGHLIGHT_MONOMERS, {value: false});
90
104
  let ruleFileList: string[];
91
105
  const ruleInputs = new RuleInputs(RULES_PATH, RULES_STORAGE_NAME, '.json', {
92
106
  onValueChanged: (value: string[]) => { ruleFileList = value; }
@@ -99,6 +113,7 @@ export async function getPolyToolConvertDialog(srcCol?: DG.Column): Promise<DG.D
99
113
  srcColInput,
100
114
  generateHelmInput,
101
115
  chiralityEngineInput,
116
+ highlightMonomersInput,
102
117
  rulesHeader,
103
118
  rulesForm
104
119
  ]);
@@ -129,7 +144,7 @@ export async function getPolyToolConvertDialog(srcCol?: DG.Column): Promise<DG.D
129
144
  /* applyInput */ (x: PolyToolConvertSerialized): void => {
130
145
  generateHelmInput.value = x.generateHelm;
131
146
  chiralityEngineInput.value = x.chiralityEngine;
132
- ruleInputs.setActive(ruleFileList);
147
+ ruleInputs.setActive(x.rules);
133
148
  });
134
149
  return dialog;
135
150
  } catch (err: any) {
@@ -240,6 +255,8 @@ function dealGroups(col: DG.Column<string>): void {
240
255
  col.set(i, col.get(i)!.replaceAll('undefined', 'H'));
241
256
  col.set(i, col.get(i)!.replaceAll('Oh', 'O'));
242
257
  col.set(i, col.get(i)!.replaceAll('0.000000 3', '0.000000 0'));
258
+ col.set(i, col.get(i)!.replaceAll('?', 'O'));
259
+ col.set(i, col.get(i)!.replaceAll('0 3\n', '0 0\n'));
243
260
  }
244
261
  }
245
262
 
@@ -285,9 +302,121 @@ export async function polyToolConvert(
285
302
  }
286
303
 
287
304
  buildMonomerHoverLink(resHelmCol, resMolCol, lib, seqHelper, rdKitModule);
305
+ buildCyclizedMonomerHoverLink(seqCol, resHelmCol, resMolCol, lib, seqHelper, rdKitModule);
288
306
 
289
307
  return [resHelmCol, resMolCol];
290
308
  } finally {
291
309
  pi.close();
292
310
  }
293
311
  }
312
+
313
+ function buildCyclizedMonomerHoverLink(
314
+ cyclizedCol: DG.Column<string>, seqCol: DG.Column<string>, molCol: DG.Column<string>,
315
+ monomerLib: IMonomerLibBase, seqHelper: ISeqHelper, rdKitModule: RDModule
316
+ ): MonomerHoverLink {
317
+ function buildMonomerMap(seqCol: DG.Column<string>, tableRowIdx: number): MonomerMap {
318
+ const seqSH = seqHelper.getSeqHandler(seqCol);
319
+ const seqSS = seqSH.getSplitted(tableRowIdx);
320
+ const biotype = seqSH.defaultBiotype;
321
+ const seqMList: ISeqMonomer[] = wu.count(0).take(seqSS.length)
322
+ .map((posIdx) => {
323
+ return {position: posIdx, symbol: seqSS.getCanonical(posIdx), biotype: biotype} as ISeqMonomer;
324
+ })
325
+ .toArray();
326
+
327
+ const alphabet = seqSH.alphabet as ALPHABET;
328
+ const polymerType = alphabet == ALPHABET.RNA || alphabet == ALPHABET.DNA ? PolymerTypes.RNA : PolymerTypes.PEPTIDE;
329
+ const monomersDict = getMonomersDictFromLib([seqMList], polymerType, alphabet, monomerLib, rdKitModule);
330
+ // Call seq-to-molfile worker core directly
331
+ const molWM = monomerSeqToMolfile(seqMList, monomersDict, alphabet, polymerType);
332
+ return molWM.monomers;
333
+ }
334
+
335
+ const monomerMapLruCache = new LRUCache<string, MonomerMap>({max: 100});
336
+
337
+ function getMonomerMap(seqCol: DG.Column<string>, tableRowIdx: number): MonomerMap | null {
338
+ const seq = seqCol.get(tableRowIdx);
339
+ if (seq == null) return null;
340
+
341
+ let resMonomerMap = monomerMapLruCache.get(seq);
342
+ if (!resMonomerMap)
343
+ monomerMapLruCache.set(seq, resMonomerMap = buildMonomerMap(seqCol, tableRowIdx));
344
+
345
+ return resMonomerMap;
346
+ }
347
+
348
+ const resLink: MonomerHoverLink = {
349
+ targetCol: molCol,
350
+ handler: (seqGridCell: DG.GridCell, cyclizedMonomer: ISeqMonomer | null, targetGridCol: DG.GridColumn): boolean => {
351
+ const grid = targetGridCol.grid;
352
+ const tableRowIdx = seqGridCell.tableRowIndex!;
353
+ const gridRowIdx = seqGridCell.gridRow;
354
+ const targetGridCell = grid.cell(targetGridCol.name, gridRowIdx);
355
+
356
+ const prev = getMonomerHover();
357
+ if (!prev || (prev && (prev.dataFrameId != seqCol.dataFrame.id || prev.gridRowIdx != gridRowIdx ||
358
+ prev.seqColName != seqCol.name || prev.seqPosition != cyclizedMonomer?.position))
359
+ ) {
360
+ if (prev) {
361
+ setMonomerHover(null);
362
+ prev.gridCell.grid?.invalidate();
363
+ // prev.gridCell.render();
364
+ }
365
+ if (!cyclizedMonomer) {
366
+ setMonomerHover(null);
367
+ return true;
368
+ }
369
+
370
+ setMonomerHover({
371
+ gridCell: targetGridCell,
372
+ dataFrameId: seqCol.dataFrame.id,
373
+ gridRowIdx: gridRowIdx,
374
+ seqColName: seqCol.name,
375
+ seqPosition: cyclizedMonomer ? cyclizedMonomer.position : -1,
376
+ getSubstruct: (): ISubstruct | undefined => { // Gets monomer highlight
377
+ if (!cyclizedMonomer || cyclizedMonomer.symbol === '*')
378
+ return undefined;
379
+
380
+ const molMonomerMap = getMonomerMap(seqCol, tableRowIdx);
381
+ if (!molMonomerMap)
382
+ return undefined;
383
+
384
+ const resSubstructList: ISubstruct[] = [];
385
+ const seqMonomerList: number[] = [cyclizedMonomer.position]; // TODO: Map position of harmonized sequence
386
+ for (const seqMonomer of seqMonomerList) {
387
+ const monomerMap = molMonomerMap.get(cyclizedMonomer!.position); // single monomer
388
+ if (!monomerMap) return {atoms: [], bonds: [], highlightAtomColors: [], highlightBondColors: []};
389
+ resSubstructList.push(getMolHighlight([monomerMap], monomerLib));
390
+ }
391
+ //TODO: refine merge substract
392
+ //const res: ISubstruct = mergeSubstructs(resSubstructList);
393
+ return undefined;
394
+ }
395
+ });
396
+
397
+ // TODO: Invalidate targetGridCell
398
+ grid.invalidate();
399
+ // targetGridCell.render();
400
+ }
401
+
402
+ return true;
403
+ },
404
+ /* ISubstructProvider.*/getSubstruct: (tableRowIdx: number | null): ISubstruct | undefined =>{
405
+ // Gets whole molecule highlight
406
+ if (molCol.getTag(ChemTags.SEQUENCE_SRC_HL_MONOMERS) != 'true') return undefined;
407
+ if (tableRowIdx == null) return undefined;
408
+ const seq = seqCol.get(tableRowIdx);
409
+ if (!seq) return undefined;
410
+
411
+ const molMonomerMap = getMonomerMap(seqCol, tableRowIdx);
412
+ if (!molMonomerMap) return undefined;
413
+ const res: ISubstruct = getMolHighlight(molMonomerMap.values(), monomerLib);
414
+ return res;
415
+ }
416
+ };
417
+
418
+ addMonomerHoverLink(cyclizedCol.temp, resLink);
419
+ // addSubstructProvider(molCol.temp, resLink); //
420
+
421
+ return resLink;
422
+ }