@datagrok/sequence-translator 1.4.3 → 1.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,9 +27,9 @@
27
27
  "type": "reaction",
28
28
  "code": "3",
29
29
  "monomericSubstitution": {
30
- "firstMonomer": "aG",
31
- "secondMonomer": "azG",
32
- "reaction": "[H:1]NC(C([OH:2])=O)c1cn(C(N[H:3])C([OH:4])=O)nn1",
30
+ "firstMonomer": "azG",
31
+ "secondMonomer": "aG",
32
+ "reaction": "[C:1]N=[N+]=[N-].[C:2]C#C>>[C:1]N1-N=NC=C1[C:2]",
33
33
  "name": "GGaz"
34
34
  }
35
35
  },
@@ -4,4 +4,5 @@ n,seqs
4
4
  3,R-F-C(1)-T-G-H-F-Y-P-C(1)
5
5
  4,C(1)-T-G-H-F-H-P-C(1)
6
6
  5,R-F-D(2)-T-G-H-F-Y-P-NH2(2)
7
- 6,R-F-aG(3)-T-G-H-F-Y-P-azG(3)-meI
7
+ 6,R-F-azG(3)-T-G-H-F-Y-P-aG(3)-meI
8
+ 7,R-F-aG(3)-T-G-H-F-Y-P-azG(3)-meI
package/package.json CHANGED
@@ -1,9 +1,9 @@
1
1
  {
2
2
  "name": "@datagrok/sequence-translator",
3
3
  "friendlyName": "Sequence Translator",
4
- "version": "1.4.3",
4
+ "version": "1.4.4",
5
5
  "author": {
6
- "name": "Alexey Choposky",
6
+ "name": "Alexey Chopovsky",
7
7
  "email": "achopovsky@datagrok.ai"
8
8
  },
9
9
  "description": "SequenceTranslator translates [oligonucleotide](https://en.wikipedia.org/wiki/Oligonucleotide) sequences between [different representations](https://github.com/datagrok-ai/public/tree/master/packages/SequenceTranslator#sequence-representations).",
@@ -1,10 +1,18 @@
1
- // import * as grok from 'datagrok-api/grok';
2
- // import * as DG from 'datagrok-api/dg';
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
3
4
 
4
-
5
- //import {ALIGNMENT, ALPHABET} from '@datagrok-libraries/bio/src/utils/macromolecule';
5
+ import {PolymerTypes} from '@datagrok-libraries/bio/src/helm/consts';
6
+ import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
7
+ import {IMonomerLib, IMonomerLibBase, Monomer, MonomerLibData, RGroup} from '@datagrok-libraries/bio/src/types';
8
+ import {RDModule, RDMol, RDReaction, MolList, RDReactionResult} from '@datagrok-libraries/chem-meta/src/rdkit-api';
9
+ import {HELM_REQUIRED_FIELD, HELM_RGROUP_FIELDS} from '@datagrok-libraries/bio/src/utils/const';
10
+ import {getRdKitModule} from '@datagrok-libraries/bio/src/chem/rdkit-module';
6
11
 
7
12
  import {Rules, RuleLink, RuleReaction} from './pt-rules';
13
+ import {InvalidReactionError, MonomerNotFoundError} from './types';
14
+ import {errInfo} from '@datagrok-libraries/bio/src/utils/err-info';
15
+ import {_package} from '../package';
8
16
 
9
17
  export const RULES_DIMER = '(#2)';
10
18
  export const RULES_HETERODIMER = '($2)';
@@ -32,7 +40,14 @@ export class Chain {
32
40
  const rawLinkages = fragmentation[1].split('|');
33
41
 
34
42
  const monomers = new Array<Array<string>>(rawFragments.length);
35
- const linkages: { fChain: number, sChain: number, fMonomer: number, sMonomer: number, fR: number, sR: number }[] = [];
43
+ const linkages: {
44
+ fChain: number,
45
+ sChain: number,
46
+ fMonomer: number,
47
+ sMonomer: number,
48
+ fR: number,
49
+ sR: number
50
+ }[] = [];
36
51
 
37
52
  //HELM parsing
38
53
  for (let i = 0; i < rawFragments.length; i++) {
@@ -71,7 +86,14 @@ export class Chain {
71
86
  const homodimerCode = rules.homodimerCode;
72
87
  const mainFragments: string[] = [];
73
88
 
74
- const linkages: { fChain: number, sChain: number, fMonomer: number, sMonomer: number, fR: number, sR: number }[] = [];
89
+ const linkages: {
90
+ fChain: number,
91
+ sChain: number,
92
+ fMonomer: number,
93
+ sMonomer: number,
94
+ fR: number,
95
+ sR: number
96
+ }[] = [];
75
97
 
76
98
  //NOTICE: this works only with simple single heterodimers
77
99
  const heterodimeric = heterodimerCode !== null ? sequence.split(`(${rules.heterodimerCode!})`) : '';
@@ -374,10 +396,155 @@ export class Chain {
374
396
  export function doPolyToolConvert(sequences: string[], rules: Rules): string[] {
375
397
  const helms = new Array<string>(sequences.length);
376
398
  for (let i = 0; i < sequences.length; i++) {
377
- if (sequences[i] == null) { helms[i] = ''; } else {
378
- const chain = Chain.fromNotation(sequences[i], rules);
379
- helms[i] = chain.getHelm();
399
+ try {
400
+ if (sequences[i] == null) { helms[i] = ''; } else {
401
+ const chain = Chain.fromNotation(sequences[i], rules);
402
+ helms[i] = chain.getHelm();
403
+ }
404
+ } catch (err: any) {
405
+ const [errMsg, errStack] = errInfo(err);
406
+ _package.logger.error(errMsg, undefined, errStack);
407
+ helms[i] = '';
380
408
  }
381
409
  }
382
410
  return helms;
383
411
  }
412
+
413
+ function getMonomersMolBlocks(monomer1: Monomer, monomer2: Monomer): [string, string] {
414
+ const mb1 = monomer1.molfile;
415
+ let mb2 = monomer2.molfile;
416
+ const addGroups = monomer1.rgroups.length;
417
+
418
+ //mol v2000 monomer
419
+ const rgpIdx = mb2.indexOf('M RGP');
420
+ if (rgpIdx !== -1) {
421
+ const groupsCountStr = mb2.substring(rgpIdx + 6, rgpIdx + 9);
422
+ const groupsCount = Number(groupsCountStr);
423
+
424
+ for (let i = 0; i < groupsCount; i++) {
425
+ const start = rgpIdx + 9 + 4 + i * 8;
426
+ const end = rgpIdx + 9 + 8 + i * 8;
427
+ const rGroupSpecifier = mb2.substring(start, end);
428
+ const groupPosition = Number(rGroupSpecifier) + addGroups;
429
+ const digits = Math.floor(Math.log10(groupPosition) + 1);
430
+ const newSpecifier = ' '.repeat(4 - digits) + String(groupPosition);
431
+ mb2 = mb2.substring(0, start) + newSpecifier + mb2.substring(end, mb2.length);
432
+ }
433
+ }
434
+
435
+ //TODO: same for v3000 monomer
436
+
437
+ return [mb1, mb2];
438
+ }
439
+
440
+ function getSyntheticMolBlock(rdkit: RDModule, reaction: string,
441
+ mb1: string, mb2: string, monomerName: string): string {
442
+ let rxn: RDReaction | null = null;
443
+ let mols: MolList | null = null;
444
+ let mol1: RDMol | null = null;
445
+ let mol2: RDMol | null = null;
446
+ let rctns: RDReactionResult | null = null;
447
+ let molP: RDMol | null = null;
448
+ let molBlock = '';
449
+
450
+ try {
451
+ rxn = rdkit.get_rxn(reaction);
452
+ if (!rxn) throw new InvalidReactionError(reaction);
453
+ mols = new rdkit.MolList();
454
+ mol1 = rdkit.get_mol(mb1!);
455
+ mol2 = rdkit.get_mol(mb2!);
456
+ mols.append(mol1!);
457
+ mols.append(mol2!);
458
+
459
+ rctns = rxn.run_reactants(mols, 1);
460
+ //const size = rctns.size();
461
+ const element = rctns.get(0);
462
+
463
+ molP = element.next();
464
+ molBlock = molP?.get_molblock();//molP?.get_v3Kmolblock();//
465
+ } catch (err: any) {
466
+ const [errMsg, _errStack] = errInfo(err);
467
+ grok.shell.error(`Can not assemble monomer '${monomerName}': ${errMsg}.`);
468
+ throw err;
469
+ } finally {
470
+ rxn?.delete();
471
+ mols?.delete();
472
+ mol1?.delete();
473
+ mol2?.delete();
474
+ rctns?.delete();
475
+ molP?.delete();
476
+ }
477
+
478
+ return molBlock;
479
+ }
480
+
481
+ function getNewGroups(monomer1: Monomer, monomer2: Monomer): RGroup[] {
482
+ const groups = new Array<RGroup>(monomer1?.rgroups.length! + monomer2?.rgroups.length!);
483
+ const length1 = monomer1?.rgroups.length!;
484
+ const length2 = monomer2?.rgroups.length!;
485
+
486
+ for (let i = 0; i < length1; i++)
487
+ groups[i] = monomer1?.rgroups[i]!;
488
+
489
+ for (let i = 0; i < length2; i++) {
490
+ const rGroupSpecifier = monomer2?.rgroups[i]!.label.replace('R', '');
491
+ const groupPosition = Number(rGroupSpecifier) + length1;
492
+ const group: RGroup = {
493
+ //@ts-ignore
494
+ [HELM_RGROUP_FIELDS.CAP_GROUP_SMILES_UPPERCASE]: monomer2?.rgroups[i].capGroupSMILES.replace(rGroupSpecifier, String(groupPosition)),
495
+ [HELM_RGROUP_FIELDS.ALTERNATE_ID]: monomer2?.rgroups[i].alternateId.replace(rGroupSpecifier, String(groupPosition)),
496
+ [HELM_RGROUP_FIELDS.CAP_GROUP_NAME]: monomer2?.rgroups[i].capGroupName,
497
+ [HELM_RGROUP_FIELDS.LABEL]: monomer2?.rgroups[i].label.replace(rGroupSpecifier, String(groupPosition)),
498
+ };
499
+
500
+ groups[i + length1] = group;
501
+ }
502
+
503
+ return groups;
504
+ }
505
+
506
+ export function getNewMonomer(rdkit: RDModule, mLib: IMonomerLib, rule: RuleReaction): [string, Monomer] {
507
+ const reacSmarts = rule.reaction;
508
+ const monomerName = rule.name;
509
+
510
+ const monomer1 = mLib.getMonomer('PEPTIDE', rule.firstMonomer);
511
+ if (!monomer1) throw new MonomerNotFoundError('PEPTIDE', rule.firstMonomer);
512
+ const monomer2 = mLib.getMonomer('PEPTIDE', rule.secondMonomer);
513
+ if (!monomer2) throw new MonomerNotFoundError('PEPTIDE', rule.secondMonomer);
514
+
515
+ const [mb1, mb2] = getMonomersMolBlocks(monomer1!, monomer2!);
516
+ const molBlock = getSyntheticMolBlock(rdkit, reacSmarts, mb1, mb2, monomerName);
517
+ const groups: RGroup[] = getNewGroups(monomer1!, monomer2!);
518
+
519
+ const resMonomer: Monomer = {
520
+ [HELM_REQUIRED_FIELD.SYMBOL]: monomerName,
521
+ [HELM_REQUIRED_FIELD.NAME]: monomerName,
522
+ [HELM_REQUIRED_FIELD.MOLFILE]: molBlock,
523
+ [HELM_REQUIRED_FIELD.AUTHOR]: '',
524
+ [HELM_REQUIRED_FIELD.ID]: 0,
525
+ [HELM_REQUIRED_FIELD.RGROUPS]: groups,
526
+ [HELM_REQUIRED_FIELD.SMILES]: '',
527
+ [HELM_REQUIRED_FIELD.POLYMER_TYPE]: 'PEPTIDE',
528
+ [HELM_REQUIRED_FIELD.MONOMER_TYPE]: 'Backbone',
529
+ [HELM_REQUIRED_FIELD.CREATE_DATE]: null,
530
+ };
531
+
532
+ return [monomerName, resMonomer];
533
+ }
534
+
535
+ export async function getOverriddenLibrary(rules: Rules): Promise<IMonomerLibBase> {
536
+ const monomerLibHelper = await getMonomerLibHelper();
537
+ const systemMonomerLib = monomerLibHelper.getMonomerLib();
538
+
539
+ const rdkit = await getRdKitModule();
540
+ const argLib: { [symbol: string]: Monomer } = {};
541
+
542
+ for (let i = 0; i < rules.reactionRules.length; i++) {
543
+ const [name, monomer] = getNewMonomer(rdkit, systemMonomerLib, rules.reactionRules[i]);
544
+ argLib[name] = monomer;
545
+ }
546
+
547
+ const overrideMonomerLibData: MonomerLibData = {[PolymerTypes.PEPTIDE]: argLib};
548
+ const overriddenMonomerLib = systemMonomerLib.override(overrideMonomerLibData);
549
+ return overriddenMonomerLib;
550
+ }
@@ -11,8 +11,8 @@ import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
11
11
  import {getSeqHelper, ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
12
12
  import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
13
13
 
14
- import {getRules, RuleInputs, RULES_PATH, RULES_STORAGE_NAME} from './pt-rules';
15
- import {doPolyToolConvert} from './pt-conversion';
14
+ import {getRules, RuleInputs, Rules, RULES_PATH, RULES_STORAGE_NAME} from './pt-rules';
15
+ import {doPolyToolConvert, getOverriddenLibrary} from './pt-conversion';
16
16
  import {defaultErrorHandler} from '../utils/err-info';
17
17
  import {getLibrariesList} from './utils';
18
18
  import {getEnumerationChem, PT_CHEM_EXAMPLE} from './pt-enumeration-chem';
@@ -85,7 +85,7 @@ export async function getPolyToolConvertDialog(srcCol?: DG.Column): Promise<DG.D
85
85
  const chiralityEngineInput = ui.input.bool(PT_UI_USE_CHIRALITY, {value: false});
86
86
  let ruleFileList: string[];
87
87
  const ruleInputs = new RuleInputs(RULES_PATH, RULES_STORAGE_NAME, '.json', {
88
- onValueChanged: (value: string[]) => { ruleFileList = value;}
88
+ onValueChanged: (value: string[]) => { ruleFileList = value; }
89
89
  });
90
90
  const rulesHeader = ui.inlineText([PT_UI_RULES_USED]);
91
91
  ui.tooltip.bind(rulesHeader, 'Add or specify rules to use');
@@ -256,28 +256,11 @@ export async function polyToolConvert(
256
256
  if (generateHelm && table) table.columns.add(resHelmCol, true);
257
257
 
258
258
  const seqHelper: ISeqHelper = await getSeqHelper();
259
- const toAtomicLevelRes = await seqHelper.helmToAtomicLevel(resHelmCol, chiralityEngine, /* highlight */ generateHelm);
259
+ const lib = await getOverriddenLibrary(rules);
260
+ const toAtomicLevelRes =
261
+ await seqHelper.helmToAtomicLevel(resHelmCol, chiralityEngine, /* highlight */ generateHelm, lib);
260
262
  const resMolCol = toAtomicLevelRes.molCol!;
261
263
 
262
- // const rdkit = await grok.functions.call('Chem:getRdKitModule');
263
- // for (let i = 0; i < rules.reactionRules.length; i++) {
264
- // const reacSmarts = rules.reactionRules[i].reaction;
265
- // const rxn = rdkit.get_rxn(reacSmarts);
266
-
267
- // for (let j = 0; j < resMolCol.length; j++) {
268
- // const mols = new rdkit.MolList();
269
- // const mol = rdkit.get_mol(resMolCol.get(j));
270
- // mols.append(mol!);
271
- // const rctns = rxn.run_reactants(mols, 1);
272
- // const size = rctns.size();
273
- // const element = rctns.get(0);
274
- // let molP: RDMol | null = null;
275
- // molP = element.next();
276
- // const molBlock = molP?.get_v3Kmolblock();
277
- // resMolCol.set(j, molBlock!);
278
- // }
279
- // }
280
-
281
264
  resMolCol.name = getUnusedName(table, `molfile(${seqCol.name})`);
282
265
  resMolCol.semType = DG.SEMTYPE.MOLECULE;
283
266
  if (table) {
@@ -2,6 +2,8 @@ import * as ui from 'datagrok-api/ui';
2
2
  import * as grok from 'datagrok-api/grok';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
+ import {PolymerType} from '@datagrok-libraries/bio/src/helm/types';
6
+
5
7
  export enum PolyToolEnumeratorTypes {
6
8
  Single = 'single',
7
9
  Matrix = 'matrix',
@@ -21,3 +23,19 @@ export type PolyToolEnumeratorParams = {
21
23
  keepOriginal?: boolean;
22
24
  trivialName?: boolean;
23
25
  }
26
+
27
+ export class MonomerNotFoundError extends Error {
28
+ public type = 'MonomerNotFoundError';
29
+
30
+ constructor(polymerType: PolymerType, symbol: string, options?: ErrorOptions) {
31
+ super(`Monomer '${symbol}' of polymer type '${polymerType}' not found`, options);
32
+ }
33
+ }
34
+
35
+ export class InvalidReactionError extends Error {
36
+ public type = 'InvalidReactionError';
37
+
38
+ constructor(reaction: string, options?: ErrorOptions) {
39
+ super(`Invalid reaction '${reaction}'.`);
40
+ }
41
+ }
@@ -0,0 +1,45 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+
5
+ import {before, after, category, expect, test, expectArray, testEvent, delay} from '@datagrok-libraries/utils/src/test';
6
+ import {Chain} from '../polytool/pt-conversion';
7
+ import {getRules} from '../polytool/pt-rules';
8
+
9
+ category('PolyTool: Chain: fromNotation', () => {
10
+ const tests = {
11
+ 'cyclized': {
12
+ src: {seq: 'R-F-C(1)-T-G-H-F-Y-P-C(1)-meI'},
13
+ tgt: {
14
+ monomerCount: [11], linkageCount: 1,
15
+ helm: 'PEPTIDE1{R.F.C.T.G.H.F.Y.P.C.[meI]}$PEPTIDE1,PEPTIDE1,3:R3-10:R3$$$',
16
+ },
17
+ },
18
+ 'reaction1': {
19
+ src: {seq: 'R-F-azG(3)-T-G-H-F-Y-P-aG(3)-meI'},
20
+ tgt: {
21
+ monomerCount: [9, 1], linkageCount: 2,
22
+ helm: 'PEPTIDE1{R.F.[GGaz].T.G.H.F.Y.P}|PEPTIDE2{[meI]}$PEPTIDE1,PEPTIDE1,3:R3-9:R2|PEPTIDE1,PEPTIDE2,3:R4-1:R1$$$',
23
+ }
24
+ },
25
+ 'reaction2': {
26
+ src: {seq: 'R-F-aG(3)-T-G-H-F-Y-P-azG(3)-meI'},
27
+ tgt: {
28
+ // TODO: Target test data requires clarification
29
+ monomerCount: [2, 8], linkageCount: 0,
30
+ helm: 'PEPTIDE1{R.F}|PEPTIDE2{T.G.H.F.Y.P.[GGaz].[meI]}$PEPTIDE1,PEPTIDE2,2:R2-7:R3|PEPTIDE2,PEPTIDE2,1:R1-7:R4,$$$',
31
+ }
32
+ }
33
+
34
+ };
35
+
36
+ for (const [testName, testData] of Object.entries(tests)) {
37
+ test(`${testName}`, async () => {
38
+ const rules = await getRules(['rules_example.json']);
39
+ const resChain = Chain.fromNotation(testData.src.seq, rules);
40
+ expectArray(resChain.monomers.map((mL) => mL.length), testData.tgt.monomerCount);
41
+ expect(resChain.linkages.length, testData.tgt.linkageCount);
42
+ expect(resChain.getHelm(), testData.tgt.helm);
43
+ }, testName == 'reaction2' ? {skipReason: 'reverse reaction'} : undefined);
44
+ }
45
+ });
@@ -15,6 +15,8 @@ import {getRdKitModule} from '@datagrok-libraries/bio/src/chem/rdkit-module';
15
15
  import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
16
16
 
17
17
  import {_package} from '../package-test';
18
+ import {getNewMonomer} from '../polytool/pt-conversion';
19
+ import {getRules, RuleReaction} from '../polytool/pt-rules';
18
20
 
19
21
  category('toAtomicLevel', () => {
20
22
  let userLibSettings: UserLibSettings;
@@ -69,4 +71,27 @@ category('toAtomicLevel', () => {
69
71
  mol.delete();
70
72
  }
71
73
  });
74
+
75
+ test('getNewMonomer', async () => {
76
+ const rdKitModule = await getRdKitModule();
77
+ const systemMonomerLib = monomerLibHelper.getMonomerLib();
78
+
79
+ const rules = await getRules(['rules_example.json']);
80
+ const reactionRule = rules.reactionRules.find((r) => r.name == 'GGaz')!;
81
+
82
+ const [newSymbol, newMonomer] = getNewMonomer(rdKitModule, systemMonomerLib, reactionRule);
83
+ expect(newSymbol, reactionRule.name);
84
+
85
+ const mol = rdKitModule.get_mol(newMonomer.molfile);
86
+ try {
87
+ const molInchi = mol.get_inchi();
88
+ const molInchiKey = rdKitModule.get_inchikey_for_inchi(molInchi);
89
+ expect(mol.get_num_bonds(), 18);
90
+ expect(mol.get_num_atoms(), 18);
91
+ // TODO: Check inchi key for the new monomer molfile
92
+ // expect(molInchiKey, 'V2H10N2O3S-UHFFFAOYSA-N');
93
+ } finally {
94
+ mol.delete();
95
+ }
96
+ });
72
97
  });