@datagrok/bio 2.24.0 → 2.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/CHANGELOG.md +4 -0
  2. package/dist/455.js.map +1 -1
  3. package/dist/package-test.js +2 -2
  4. package/dist/package-test.js.map +1 -1
  5. package/dist/package.js +2 -2
  6. package/dist/package.js.map +1 -1
  7. package/files/samples/HELM_CHEMS.csv +11 -0
  8. package/package.json +2 -2
  9. package/src/analysis/sequence-space.ts +1 -1
  10. package/src/demo/bio03-atomic-level.ts +1 -1
  11. package/src/package-types.ts +1 -1
  12. package/src/package.ts +1 -1
  13. package/src/tests/monomer-libraries-tests.ts +1 -1
  14. package/src/utils/get-region.ts +2 -2
  15. package/src/utils/helm-to-molfile/converter/const.ts +0 -1
  16. package/src/utils/helm-to-molfile/converter/converter.ts +3 -3
  17. package/src/utils/helm-to-molfile/converter/helm.ts +14 -6
  18. package/src/utils/helm-to-molfile/converter/mol-bonds.ts +1 -1
  19. package/src/utils/helm-to-molfile/converter/mol-wrapper.ts +2 -2
  20. package/src/utils/helm-to-molfile/converter/r-group-handler.ts +2 -2
  21. package/src/utils/monomer-lib/library-file-manager/file-validator.ts +1 -1
  22. package/src/utils/monomer-lib/library-file-manager/ui.ts +1 -1
  23. package/src/utils/monomer-lib/monomer-lib-base.ts +31 -3
  24. package/src/utils/monomer-lib/monomer-lib.ts +0 -26
  25. package/src/utils/monomer-lib/monomer-manager/monomer-manager.ts +1 -1
  26. package/src/utils/monomer-lib/smiles2Monomer.ts +128 -0
  27. package/src/utils/monomer-lib/web-editor-monomer-dummy.ts +15 -1
  28. package/src/utils/multiple-sequence-alignment-ui.ts +1 -1
  29. package/src/utils/multiple-sequence-alignment.ts +1 -1
  30. package/src/utils/seq-helper/seq-handler.ts +10 -10
  31. package/src/utils/ui-utils.ts +1 -1
  32. package/src/viewers/web-logo-viewer.ts +19 -8
  33. package/test-console-output-1.log +338 -341
  34. package/test-record-1.mp4 +0 -0
@@ -0,0 +1,11 @@
1
+ HELM,Activity
2
+ "PEPTIDE1{[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@@H](C(=O)*)C |$_R1;;;;;_R2;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@@H](C(=O)*)Cc1ccccc1 |$_R1;;;;;_R2;;;;;;;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@H](C(=O)*)C |$_R1;;;;;_R2;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@@H](C(=O)*)Cc1ccccc1 |$_R1;;;;;_R2;;;;;;;$|]}$PEPTIDE1,PEPTIDE1,8:R2-1:R1$$$",5.3075109739681280
3
+ "PEPTIDE1{[ac].D.A.D.E.[*N[C@H](C(=O)*)Cc1ccc(cc1)OC(C(=O)O)C(=O)O |$_R1;;;;;_R2;;;;;;;;;;;;;;;$|].L.[am]}$$$$ CHEMBL8284
4
+ PEPTIDE1{[N[C@](C)(C(=O)O)Cc1ccc(c(c1)O)O]}$$$$",5.7238768534315438
5
+ "PEPTIDE1{[meL].[*O[C@@H](C(=O)*)C |$_R1;;;;;_R2;$|].[meL].[*O[C@@H](C(=O)*)Cc1ccccc1 |$_R1;;;;;_R2;;;;;;;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@H](C(=O)*)C |$_R1;;;;;_R2;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@@H](C(=O)*)Cc1ccccc1 |$_R1;;;;;_R2;;;;;;;$|]}|PEPTIDE2{E.[*O[C@@H](C(=O)*)C |$_R1;;;;;_R2;$|].E.[*O[C@@H](C(=O)*)Cc1ccccc1 |$_R1;;;;;_R2;;;;;;;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@H](C(=O)*)C |$_R1;;;;;_R2;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@@H](C(=O)*)Cc1ccccc1 |$_R1;;;;;_R2;;;;;;;$|]}$PEPTIDE2,PEPTIDE2,8:R2-1:R1|PEPTIDE1,PEPTIDE2,1:R1-3:R3|PEPTIDE2,PEPTIDE1,1:R3-8:R2$$$V2.0",5.1858112460224372
6
+ "PEPTIDE1{[[C@H](C(=O)*)CS* |$;;;_R2;;;_R3$|].F.I.Q.N.[dC].S.R.G.[am]}$PEPTIDE1,PEPTIDE1,1:R3-6:R3$$$V2.0",6.2235023908043692
7
+ "PEPTIDE1{A.[dP].D.[dW].F.[dF].N.[dY].Y.[dW].G.[dN].W.[dH].G.[*N[C@@H](C(=O)*)[C@@H](C)O |$_R1;;;;;_R2;;;$|]}$PEPTIDE1,PEPTIDE1,16:R2-1:R1$$$",3.8459123763832412
8
+ PEPTIDE1{N.L.E.R.E.[*N[C@H](C(=O)*)CSCNC(=O)C |$_R1;;;;;_R2;;;;;;;$|].L.E.E.P.[*N[C@H](C(=O)*)CSCNC(=O)C |$_R1;;;;;_R2;;;;;;;$|].S.R.E.E.A.F}$$$$,3.2792043882465700
9
+ "PEPTIDE1{[[C@H](C(=O)*)CS* |$;;;_R2;;;_R3$|].Y.[dF].G.[dN].[dC].[dP].R.G.[am]}$PEPTIDE1,PEPTIDE1,1:R3-6:R3$$$",2.1058521529925680
10
+ PEPTIDE1{A.[[*]C(=O)[C@H](C)N([*])C |$_R2;;;;;;_R1;;;$|].A}$$$$V2.0,1.8036950016492720
11
+ "CHEM1{[*N[C@H](C(=O)*)Cc1ccc(cc1)OP(=O)(O)O |$_R1;;;;;_R2;;;;;;;;;;;;$|]}|PEPTIDE1{[C[C@H](N[*])C(=O)C[*] |$;;;_R1;;;;_R2$|].G.G.G.C.C.K.K.K.K}$PEPTIDE1,CHEM1,10:R3-1:R1$$$V2.0",6.3880602836120888
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Davit Rizhinashvili",
6
6
  "email": "drizhinashvili@datagrok.ai"
7
7
  },
8
- "version": "2.24.0",
8
+ "version": "2.25.0",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -44,7 +44,7 @@
44
44
  ],
45
45
  "dependencies": {
46
46
  "@biowasm/aioli": "^3.1.0",
47
- "@datagrok-libraries/bio": "^5.60.0",
47
+ "@datagrok-libraries/bio": "^5.61.0",
48
48
  "@datagrok-libraries/chem-meta": "^1.2.9",
49
49
  "@datagrok-libraries/math": "^1.2.6",
50
50
  "@datagrok-libraries/ml": "^6.10.6",
@@ -33,7 +33,7 @@ export async function getEncodedSeqSpaceCol(
33
33
  for (let rowIdx = 0; rowIdx < rowCount; rowIdx++) {
34
34
  const catI = seqColRawData[rowIdx];
35
35
  const seq = seqColCats[catI];
36
- if (seq === null || seqCol.isNone(rowIdx)) {
36
+ if (seq == null || seqCol.isNone(rowIdx)) {
37
37
  //@ts-ignore
38
38
  encList[rowIdx] = null;
39
39
  continue;
@@ -18,7 +18,7 @@ export async function demoToAtomicLevel(): Promise<void> {
18
18
  adjustGridcolAfterRender(view.grid, 'molfile(HELM)', 500, 300, true);
19
19
  adjustGridcolAfterRender(view.grid, 'HELM', 500, undefined, true);
20
20
  grok.shell.info('Hover over monomers in HELM column to highlight them in molecular structure.', {timeout: 10});
21
- grok.shell.windows.help.showHelp('/help/datagrok/solutions/domains/bio/bio.md#get-atomic-level-structure');
21
+ grok.shell.windows.help.showHelp('/help/datagrok/solutions/domains/bio/bio.md#convert-to-atomic-level');
22
22
  }
23
23
 
24
24
  export async function demoBio03UI(): Promise<void> {
@@ -41,7 +41,7 @@ export class BioPackageProperties extends Map<string, any> {
41
41
  }
42
42
 
43
43
  public set maxMonomerLength(value: number | null) {
44
- const vs = value === null ? 'long' : value.toString();
44
+ const vs = value == null ? 'long' : value.toString();
45
45
  super.set(BioPackagePropertiesNames.MaxMonomerLength, vs);
46
46
  this._onPropertyChanged.next(BioPackagePropertiesNames.MaxMonomerLength);
47
47
  }
package/src/package.ts CHANGED
@@ -123,7 +123,7 @@ export class PackageFunctions {
123
123
 
124
124
  @grok.decorators.init({})
125
125
  static async initBio(): Promise<void> {
126
- if (initBioPromise === null)
126
+ if (initBioPromise == null)
127
127
  initBioPromise = initBioInt();
128
128
 
129
129
  await initBioPromise;
@@ -83,7 +83,7 @@ category('monomerLibraries', () => {
83
83
  };
84
84
  const monomerLib = monomerLibHelper.getMonomerLib();
85
85
  const absentOverrideMonomer = monomerLib.getMonomer(overMon.polymerType, overMon.symbol);
86
- expect(absentOverrideMonomer === null, true, `Unexpectedly found monomer '${overMon.symbol}' `);
86
+ expect(absentOverrideMonomer == null, true, `Unexpectedly found monomer '${overMon.symbol}' `);
87
87
 
88
88
  const overriddenMonomerLib = monomerLib.override({[overMon.polymerType]: {[overMon.symbol]: overMon}}, 'test');
89
89
  const resOverMon = overriddenMonomerLib.getMonomer(overMon.polymerType, overMon.symbol);
@@ -47,9 +47,9 @@ export function getRegionDo(
47
47
  if (sh.posList[posJ] == startPosName) startPosIdx = posJ;
48
48
  if (sh.posList[posJ] == endPosName) endPosIdx = posJ;
49
49
  }
50
- if (startPosIdx === null && startPosName !== null)
50
+ if (startPosIdx == null && startPosName !== null)
51
51
  throw new Error(`Start position ${startPosName} not found.`);
52
- if (endPosIdx === null && endPosName !== null)
52
+ if (endPosIdx == null && endPosName !== null)
53
53
  throw new Error(`End position ${endPosName} not found.`);
54
54
 
55
55
  if (sh.posList.length < endPosIdx!)
@@ -1,4 +1,3 @@
1
1
  export const HELM_ITEM_SEPARATOR = '|';
2
- export const HELM_SECTION_SEPARATOR = '$';
3
2
 
4
3
  export const HYDROGEN_SYMBOL = 'H';
@@ -29,7 +29,7 @@ export class HelmToMolfileConverter implements IHelmToMolfileConverter {
29
29
  const smilesColName = `smiles(${helmCol.name})`;
30
30
  const smilesColNameU = df ? df.columns.getUnusedName(smilesColName) : smilesColName;
31
31
  return DG.Column.fromStrings(smilesColNameU, smiles.map((molecule) => {
32
- if (molecule === null)
32
+ if (molecule == null)
33
33
  return '';
34
34
  return molecule;
35
35
  }));
@@ -57,7 +57,7 @@ export class HelmToMolfileConverter implements IHelmToMolfileConverter {
57
57
 
58
58
  public getMolV3000ViaOCL(beautifiedMols: (RDMol | null)[], columnName: string): DG.Column<string> {
59
59
  const beautifiedMolV2000 = beautifiedMols.map((mol) => {
60
- if (mol === null)
60
+ if (mol == null)
61
61
  return '';
62
62
  const molBlock = mol.get_v3Kmolblock();
63
63
  mol!.delete();
@@ -97,7 +97,7 @@ export class HelmToMolfileConverter implements IHelmToMolfileConverter {
97
97
  if (chiralityEngine)
98
98
  return this.getMolV3000ViaOCL(beautifiedMols, molColNameU);
99
99
  return DG.Column.fromStrings(molColNameU, beautifiedMols.map((mol) => {
100
- if (mol === null)
100
+ if (mol == null)
101
101
  return '';
102
102
  const molBlock = mol.get_v3Kmolblock();
103
103
  mol!.delete();
@@ -1,16 +1,24 @@
1
1
  import {HELM_POLYMER_TYPE} from '@datagrok-libraries/bio/src/utils/const';
2
2
  import {ConnectionList} from './connection-list';
3
- import {HELM_ITEM_SEPARATOR, HELM_SECTION_SEPARATOR} from './const';
3
+ import {HELM_ITEM_SEPARATOR} from './const';
4
4
  import {SimplePolymer} from './simple-polymer';
5
5
  import {Bond} from './types';
6
6
 
7
7
  export class Helm {
8
8
  constructor(private helmString: string) {
9
- const helmSections = this.helmString.split(HELM_SECTION_SEPARATOR);
10
- const simplePolymers = helmSections[0].split(HELM_ITEM_SEPARATOR);
11
- this.simplePolymers = simplePolymers
9
+ const indexOfSequenceEnd = helmString.indexOf('}$');
10
+ const sequencePart = helmString.substring(0, indexOfSequenceEnd + 1);
11
+ const connectionsEndPart = helmString.indexOf('$', indexOfSequenceEnd + 2);
12
+ const connectionsPart = helmString.substring(indexOfSequenceEnd + 2, connectionsEndPart);
13
+ // const helmParts = seq.split('$');
14
+ const spList = sequencePart.split('}|');
15
+ // since we removed }|, need to add } to last part
16
+ for (let i = 0; i < spList.length - 1; i++)
17
+ spList[i] = spList[i] + '}';
18
+
19
+ this.simplePolymers = spList
12
20
  .map((item) => new SimplePolymer(item));
13
- this.connectionList = new ConnectionList(helmSections[1]);
21
+ this.connectionList = new ConnectionList(connectionsPart);
14
22
  this.bondData = this.getBondData();
15
23
 
16
24
  this.bondedRGroupsMap = this.getBondedRGroupsMap();
@@ -58,7 +66,7 @@ export class Helm {
58
66
  const lowerBound = shiftValues.sort((a, b) => b - a).find( // find the largest shift not exceeding monomerGlobalIdx
59
67
  (shift) => monomerGlobalIdx >= shift
60
68
  );
61
- if (lowerBound === undefined)
69
+ if (lowerBound == undefined)
62
70
  throw new Error(`Cannot find simple polymer for monomer ${monomerGlobalIdx}`);
63
71
  const simplePolymerId = Object.keys(shifts).find((simplePolymerId) => shifts[simplePolymerId] === lowerBound)!;
64
72
  const simplePolymer = this.simplePolymers.find((simplePolymer) => simplePolymer.id === simplePolymerId)!;
@@ -31,7 +31,7 @@ export abstract class MolfileBonds {
31
31
  }
32
32
 
33
33
  replacePositionsInBondsByDummy(positions: PositionInBonds[], dummy?: number): void {
34
- if (dummy === undefined)
34
+ if (dummy == undefined)
35
35
  dummy = -1;
36
36
  positions.forEach((position) => {
37
37
  const {bondLineIdx, nodeIdx} = position;
@@ -15,7 +15,7 @@ export abstract class MolfileWrapper {
15
15
 
16
16
  protected shiftR1GroupToOrigin(): void {
17
17
  const r1Idx = this.rGroups.getAtomicIdx(1);
18
- if (r1Idx === null)
18
+ if (r1Idx == null)
19
19
  return; // R1 group is not present, nothing to shift
20
20
  const {x, y} = this.atoms.atomCoordinates[r1Idx];
21
21
  this.atoms.shift({x: -x, y: -y});
@@ -23,7 +23,7 @@ export abstract class MolfileWrapper {
23
23
 
24
24
  protected alignR2AlongX(): void {
25
25
  const r2Idx = this.rGroups.getAtomicIdx(2);
26
- if (r2Idx === null)
26
+ if (r2Idx == null)
27
27
  throw new Error(`Cannot find R2 group for monomer ${this.monomerSymbol}`);
28
28
  const r2Coordinates = this.atoms.atomCoordinates[r2Idx];
29
29
  const tan = r2Coordinates.y / r2Coordinates.x;
@@ -19,13 +19,13 @@ export class RGroupHandler {
19
19
 
20
20
  getAtomicIdx(rGroupId: number): number | null {
21
21
  const atomicIdx = this.rGroupIdToAtomicIndexMap.get(rGroupId);
22
- return atomicIdx === undefined ? null : atomicIdx;
22
+ return atomicIdx == undefined ? null : atomicIdx;
23
23
  }
24
24
 
25
25
  private removeRGroupsFromAtomBlock(rGroupIds: number[]): void {
26
26
  rGroupIds.forEach((rgroupId) => {
27
27
  const atomicIdx = this.rGroupIdToAtomicIndexMap.get(rgroupId);
28
- if (atomicIdx === undefined)
28
+ if (atomicIdx == undefined)
29
29
  throw new Error(`Cannot find atomic index for R group ${rgroupId}`);
30
30
  });
31
31
 
@@ -22,7 +22,7 @@ export class MonomerLibFileValidator {
22
22
 
23
23
  validateFile(fileContent: string, fileName: string): boolean {
24
24
  const jsonContent = this.parseJson(fileContent, fileName);
25
- if (jsonContent === null)
25
+ if (jsonContent == null)
26
26
  return false;
27
27
 
28
28
  if (!Array.isArray(jsonContent)) {
@@ -50,7 +50,7 @@ class MonomerLibraryManagerWidget {
50
50
  private libHelper: IMonomerLibHelper;
51
51
 
52
52
  static async getInstance(): Promise<MonomerLibraryManagerWidget> {
53
- if (MonomerLibraryManagerWidget.instancePromise === undefined) {
53
+ if (MonomerLibraryManagerWidget.instancePromise == undefined) {
54
54
  MonomerLibraryManagerWidget.instancePromise = (async () => {
55
55
  const instance = new MonomerLibraryManagerWidget();
56
56
  const libHelper = await getMonomerLibHelper();
@@ -1,3 +1,4 @@
1
+ /* eslint-disable max-len */
1
2
  /* eslint-disable max-lines */
2
3
  import * as grok from 'datagrok-api/grok';
3
4
  import * as ui from 'datagrok-api/ui';
@@ -18,12 +19,14 @@ import {GAP_SYMBOL, GapOriginals, NOTATION} from '@datagrok-libraries/bio/src/ut
18
19
  import {Vector} from '@datagrok-libraries/utils/src/type-declarations';
19
20
  import {vectorAdd, vectorDotProduct, vectorLength} from '@datagrok-libraries/utils/src/vector-operations';
20
21
 
21
- import {AmbiguousWebEditorMonomer, GapWebEditorMonomer, MissingWebEditorMonomer} from './web-editor-monomer-dummy';
22
+ import {AmbiguousWebEditorMonomer, GapWebEditorMonomer, MissingWebEditorMonomer, SmilesWebEditorMonomer} from './web-editor-monomer-dummy';
22
23
  import {LibraryWebEditorMonomer} from './web-editor-monomer-of-library';
23
24
  import {naturalMonomerColors} from './monomer-colors';
24
25
 
25
26
  import {_package} from '../../package';
26
27
  import {MonomerLibData} from '@datagrok-libraries/bio/src/types/monomer-library';
28
+ import {smiles2Monomer} from './smiles2Monomer';
29
+ import {polymerTypeToHelmType} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
27
30
 
28
31
  const monomerRe = /[\w()]+/;
29
32
  //** Do not mess with monomer symbol with parenthesis enclosed in square brackets */
@@ -61,9 +64,15 @@ export class MonomerLibBase implements IMonomerLibBase {
61
64
  }
62
65
 
63
66
  getMonomerSymbolsByType(polymerType: PolymerType): string[] {
64
- return Object.keys(this._monomers[polymerType]);
67
+ const res = Object.keys(this._monomers[polymerType]);
68
+ if (this._smilesMonomerCache[polymerType])
69
+ res.push(...Object.keys(this._smilesMonomerCache[polymerType]));
70
+ return res;
65
71
  }
66
72
 
73
+ // smiles to symbol Mapping cache
74
+ private _smilesMonomerCache: {[polymerType: string]: {[smiles: string]: string}} = {};
75
+
67
76
  /** Creates missing {@link Monomer} */
68
77
  addMissingMonomer(polymerType: PolymerType, monomerSymbol: string): Monomer {
69
78
  let mSet = this._monomers[polymerType];
@@ -78,6 +87,22 @@ export class MonomerLibBase implements IMonomerLibBase {
78
87
  else if (polymerType === PolymerTypes.RNA && monomerSymbol === 'N')
79
88
  monomerName = 'Any';
80
89
 
90
+ // test if it is smiles
91
+ // check if the missing monomer symbol is a valid SMILES string
92
+ const smilesMonomer = smiles2Monomer(monomerSymbol, polymerType);
93
+ if (smilesMonomer) {
94
+ this._smilesMonomerCache[polymerType] = this._smilesMonomerCache[polymerType] ?? {};
95
+ const smSet = this._smilesMonomerCache[polymerType];
96
+ const symbol = Object.keys(smSet).length + 1;
97
+ smSet[monomerSymbol] = `#${polymerType[0]}${symbol}`; // e.g. #P1, #R2, #C3, #B4
98
+ const m: Monomer = {...smilesMonomer, symbol: smSet[monomerSymbol]};
99
+ // note, the ID becomes #<index> for smiles based monomers, and as the smiles, original smiles is passed (which is in monomerSymbol), to avoid key duplication
100
+ const wem = new SmilesWebEditorMonomer(polymerTypeToHelmType(polymerType), m.symbol, monomerSymbol, `SMILES Monomer ${m.symbol}`, m.rgroups.map((rg) => rg[RGP.LABEL]));
101
+ m.wem = wem;
102
+ mSet[m.symbol] = m;
103
+ return m;
104
+ }
105
+
81
106
  const m = mSet[monomerSymbol] = {
82
107
  [REQ.SYMBOL]: monomerSymbol,
83
108
  [REQ.NAME]: monomerName,
@@ -124,8 +149,11 @@ export class MonomerLibBase implements IMonomerLibBase {
124
149
  if (res) break;
125
150
  }
126
151
  } else {
152
+ // Check smiles cache, modify with mapped symbol
153
+ if (this._smilesMonomerCache[polymerType]?.[monomerSymbol])
154
+ monomerSymbol = this._smilesMonomerCache[polymerType][monomerSymbol];
127
155
  const dict = this._monomers[polymerType];
128
- res = dict ? dict[monomerSymbol] : null;
156
+ res = dict?.[monomerSymbol] ?? null;
129
157
  }
130
158
  return res;
131
159
  }
@@ -55,32 +55,6 @@ export class MonomerLib extends MonomerLibBase implements IMonomerLib {
55
55
  return resJSON;
56
56
  }
57
57
 
58
- getMonomer(polymerType: PolymerType | null, argMonomerSymbol: string): Monomer | null {
59
- const logPrefix = `Bio: MonomerLib.getMonomer()`;
60
- // Adjust RNA's 'R' for ribose to 'r' and 'P' for phosphate to 'p' for case-sensitive monomer names.
61
- // There are uppercase 'R' and 'P' at RNA samples in test data 'helm2.csv' but lowercase in HELMCoreLibrary.json
62
- let monomerSymbol = argMonomerSymbol;
63
- if (polymerType == 'RNA' && monomerSymbol == 'R')
64
- monomerSymbol = 'r';
65
- if (polymerType == 'RNA' && monomerSymbol == 'P')
66
- monomerSymbol = 'p';
67
-
68
- let res: Monomer | null = null;
69
-
70
- if (!polymerType) {
71
- _package.logger.warning(`${logPrefix} symbol '${argMonomerSymbol}', polymerType not specified.`);
72
- // Assume any polymer type
73
- for (const [_polymerType, dict] of Object.entries(this._monomers)) {
74
- res = dict[monomerSymbol];
75
- if (res) break;
76
- }
77
- } else {
78
- const dict = this._monomers[polymerType];
79
- res = dict?.[monomerSymbol] ?? null;
80
- }
81
- return res;
82
- }
83
-
84
58
  private _monomerSets: { [biotype: string /*HelmType*/]: MonomerSetType } | null = null;
85
59
 
86
60
  getMonomerSet(biotype: HelmType): MonomerSetType | null {
@@ -1178,7 +1178,7 @@ function getCorrectedSmiles(rgroups: RGroup[], smiles?: string, molBlock?: strin
1178
1178
  return isSmilesMalformed ? canonical : grok.chem.convert(canonical, DG.chem.Notation.Unknown, DG.chem.Notation.Smiles);
1179
1179
  }
1180
1180
 
1181
- function getCorrectedMolBlock(molBlock: string) {
1181
+ export function getCorrectedMolBlock(molBlock: string) {
1182
1182
  // to correct molblock, we should make sure that
1183
1183
  // 1. RGP field is present at the end, before the M END line
1184
1184
  // 2. RGP field is present in the correct format
@@ -0,0 +1,128 @@
1
+ /* eslint-disable camelcase */
2
+ /* eslint-disable max-len */
3
+ import {Monomer} from '@datagrok-libraries/bio/src/types/monomer-library';
4
+ import {_package} from '../../package';
5
+ import {PolymerType} from '@datagrok-libraries/bio/src/helm/types';
6
+ import {HELM_RGROUP_FIELDS as RGP} from '@datagrok-libraries/bio/src/utils/const';
7
+ import * as grok from 'datagrok-api/grok';
8
+ import {getCorrectedMolBlock} from './monomer-manager/monomer-manager';
9
+
10
+ /**
11
+ * Exaple r groups
12
+ * {
13
+ "capGroupSMILES": "[*:1][H]",
14
+ "alternateId": "R1-H",
15
+ "capGroupName": "H",
16
+ "label": "R1"
17
+ },
18
+ {
19
+ "capGroupSMILES": "O[*:2]",
20
+ "alternateId": "R2-OH",
21
+ "capGroupName": "OH",
22
+ "label": "R2"
23
+ }
24
+
25
+ */
26
+
27
+ const cx_smiles_regexp = /.*\|\$.*R.*\$\|/;
28
+ const rgroup_regexp = /\[R(\d+)\]/;
29
+ const rgroup_regexpg = /\[R(\d+)\]/g;
30
+ const ambig_regexp = /\[\*:(\d+)\]/g;
31
+
32
+ export type MonomerWithoutSymbol = Omit<Monomer, 'symbol'>;
33
+
34
+ export function getMonomerFromRSmiles(smiles: string, polymerType?: PolymerType): MonomerWithoutSymbol | null {
35
+ const rgroupNumbers = Array.from(smiles.matchAll(rgroup_regexpg)).map((m) => m[1]);
36
+ const res: MonomerWithoutSymbol = {
37
+ name: 'Explicit SMILES Monomer',
38
+ smiles: smiles,
39
+ polymerType: polymerType ?? 'CHEM',
40
+ molfile: '',
41
+ rgroups: rgroupNumbers.map((numString) => ({
42
+ [RGP.LABEL]: `R${numString}`,
43
+ [RGP.CAP_GROUP_NAME]: `H`,
44
+ [RGP.CAP_GROUP_SMILES]: `[*:${numString}][H]`,
45
+ [RGP.ALTERNATE_ID]: `R${numString}-H`,
46
+ })),
47
+ author: 'Datagrok auto-generated',
48
+ id: 0,
49
+ createDate: null,
50
+ monomerType: 'Backbone',
51
+ };
52
+
53
+ try {
54
+ //try to generate corrected molfile and smiles
55
+ let corSmiles = smiles;
56
+ res.rgroups.forEach((rg) => {
57
+ const labelNum = rg[RGP.LABEL].substring(1); // R1 -> 1
58
+ corSmiles = corSmiles.replace(`[R${labelNum}]`, `[*:${labelNum}]`);
59
+ });
60
+ const molFile = getCorrectedMolBlock(grok.chem.convert(corSmiles, grok.chem.Notation.Smiles, grok.chem.Notation.MolBlock));
61
+ res.molfile = molFile;
62
+ res.smiles = corSmiles;
63
+ } catch (e) {
64
+ _package.logger.error(`getMonomerFromRSmiles: cannot convert SMILES to Molfile: ${smiles}\n${e}`);
65
+ }
66
+
67
+ return res;
68
+ }
69
+
70
+ /** Generate Monomer Object directly from inline smiles
71
+ * Purely string based, no external calls
72
+ *
73
+ * Currently accepts (to be extended):
74
+ *
75
+ * cxsmiles written as *N[C@H](C(=O)*)Cc1ccc(cc1)OP(=O)(O)O |$_R1;;;;;_R2;;;;;;;;;;;;$| where * are connection points
76
+ *
77
+ * or * is square brackets like [*]N[C@H](C(=O)[*])Cc1ccc(cc1)OP(=O)(O)O |$_R1;;;;;_R2;;;;;;;;;;;;$|
78
+ *
79
+ * simple smiles with R notations like CCC[R1] or CCC(=O)[R2]
80
+ *
81
+ * simple smiles with ambiguety defined as [*:1] like CCC[*:1] or CCC(=O)[*:2]
82
+ */
83
+ export function smiles2Monomer(smiles: string, polymerType?: PolymerType): MonomerWithoutSymbol | null {
84
+ try {
85
+ const isCxSmiles = cx_smiles_regexp.test(smiles);
86
+ if (isCxSmiles) {
87
+ // CXSMILES parsing
88
+ const parts = smiles.split('|$');
89
+ const molPart = parts[0].trim();
90
+ const rGroupPart = parts[1];
91
+ // make sure all R groups are captured
92
+ const rGroupMatches = Array.from(rGroupPart.matchAll(/R(\d+)/g));
93
+ const starsInMolecule = Array.from(molPart.matchAll(/(\*)/g));
94
+ if (rGroupMatches.length !== starsInMolecule.length)
95
+ return null; // make sure that number of R groups and stars are the same
96
+ // remove brackets from stars if any
97
+ let cleanMol = molPart.replaceAll(/\[\*\]/g, '*');
98
+ // speaking in terms of consecutiveness, R groups in definition and stars in Smiles will be in the same order
99
+ // so we can just iterate through them
100
+ const rGroupNumbers = rGroupMatches.map((m) => m[1]); // numbers as strings
101
+ for (let i = 0; i < rGroupNumbers.length; i++) {
102
+ const rNum = rGroupNumbers[i];
103
+ // replace first matched star with R group
104
+ cleanMol = cleanMol.replace('*', `[R${rNum}]`);
105
+ }
106
+ return getMonomerFromRSmiles(cleanMol, polymerType);
107
+ }
108
+
109
+ // simple smiles parsing
110
+ // to simplify, replace all ambigous [*:1] with R1, etc
111
+ let cleanSmiles = smiles;
112
+ const ambigMatches = Array.from(smiles.matchAll(ambig_regexp));
113
+ for (const match of ambigMatches) {
114
+ const fullMatch = match[0];
115
+ const rNum = match[1];
116
+ cleanSmiles = cleanSmiles.replace(fullMatch, `[R${rNum}]`);
117
+ }
118
+
119
+ // make sure monomer has at least one R group
120
+ if (rgroup_regexp.test(cleanSmiles))
121
+ return getMonomerFromRSmiles(cleanSmiles, polymerType);
122
+ } catch (e) {
123
+ _package.logger.error(`smiles2Monomer: cannot parse SMILES: ${smiles}\n${e}`);
124
+ }
125
+
126
+
127
+ return null;
128
+ }
@@ -18,7 +18,7 @@ export abstract class WebEditorMonomerDummy implements IWebEditorMonomer {
18
18
  get issmiles(): boolean { return !!this.smiles; }
19
19
 
20
20
  /** R-Group index os single digit only is allowed in Pistoia code */
21
- public readonly at: WebEditorRGroups = {
21
+ at: WebEditorRGroups = {
22
22
  R1: 'H', R2: 'H', R3: 'H', R4: 'H', R5: 'H', R6: 'H', R7: 'H', R8: 'H', R9: 'H'
23
23
  };
24
24
 
@@ -80,6 +80,20 @@ export abstract class WebEditorMonomerDummy implements IWebEditorMonomer {
80
80
  }
81
81
  }
82
82
 
83
+ export class SmilesWebEditorMonomer extends WebEditorMonomerDummy {
84
+ public readonly backgroundcolor: string = '#808080';
85
+ public readonly linecolor: string = '#000000';
86
+ public readonly textcolor: string = '#000000';
87
+
88
+ constructor(biotype: string, id: string, smiles: string, name: string, rgpLabels: string[]) {
89
+ super(biotype, id, name, undefined, undefined, undefined, smiles);
90
+ this.at = {};
91
+ rgpLabels.forEach((label) => {
92
+ this.at[label] = 'H';
93
+ });
94
+ }
95
+ }
96
+
83
97
  export class GapWebEditorMonomer extends WebEditorMonomerDummy {
84
98
  public readonly backgroundcolor: string = '#FFFFFF';
85
99
  public readonly linecolor: string = '#808080';
@@ -138,7 +138,7 @@ async function onDialogOk(
138
138
  colInput.fireChanged();
139
139
  if (colInput.value.semType !== DG.SEMTYPE.MACROMOLECULE)
140
140
  throw new Error('Chosen column has to be of Macromolecule semantic type');
141
- if (performAlignment === undefined) // value can only be undefined when column can't be processed with either method
141
+ if (performAlignment == undefined) // value can only be undefined when column can't be processed with either method
142
142
  throw new Error('Invalid column format');
143
143
  msaCol = await performAlignment(); // progress
144
144
  if (msaCol == null)
@@ -149,7 +149,7 @@ function parseKalignError(out: string, limit?: number): string {
149
149
  const errLineList: string[] = [];
150
150
  const errLineRe = /^.+ERROR : (.+)$/gm;
151
151
  let ma: RegExpExecArray | null;
152
- while ((ma = errLineRe.exec(out)) != null && (limit === undefined || errLineList.length < limit)) {
152
+ while ((ma = errLineRe.exec(out)) != null && (limit == undefined || errLineList.length < limit)) {
153
153
  //
154
154
  errLineList.push(ma[1]);
155
155
  }
@@ -132,7 +132,7 @@ export class SeqHandler implements ISeqHandler {
132
132
  for (const seq of values) {
133
133
  const mSeq = !!seq ? splitter(seq) : [];
134
134
 
135
- if (firstLength === null)
135
+ if (firstLength == null)
136
136
  firstLength = mSeq.length;
137
137
  else if (mSeq.length !== firstLength)
138
138
  sameLength = false;
@@ -182,13 +182,13 @@ export class SeqHandler implements ISeqHandler {
182
182
  throw new Error('Alphabet is empty and not annotated.');
183
183
 
184
184
  let aligned = uh.column.getTag(TAGS.aligned);
185
- if (aligned === null) {
185
+ if (aligned == null) {
186
186
  aligned = uh.stats.sameLength ? ALIGNMENT.SEQ_MSA : ALIGNMENT.SEQ;
187
187
  uh.column.setTag(TAGS.aligned, aligned);
188
188
  }
189
189
 
190
190
  let alphabet = uh.column.getTag(TAGS.alphabet);
191
- if (alphabet === null) {
191
+ if (alphabet == null) {
192
192
  alphabet = detectAlphabet(uh.stats.freq, candidateAlphabets);
193
193
  uh.column.setTag(TAGS.alphabet, alphabet);
194
194
  }
@@ -200,7 +200,7 @@ export class SeqHandler implements ISeqHandler {
200
200
  }
201
201
  } else if (units === NOTATION.HELM) {
202
202
  let alphabet = uh.column.getTag(TAGS.alphabet);
203
- if (alphabet === null) {
203
+ if (alphabet == null) {
204
204
  // const cats = uh.column.categories;
205
205
  // const splitter = uh.getSplitter();
206
206
  // const samples = Array.from(new Set(
@@ -232,7 +232,7 @@ export class SeqHandler implements ISeqHandler {
232
232
 
233
233
  public get separator(): string | undefined {
234
234
  const separator: string | undefined = this.column.getTag(TAGS.separator) ?? undefined;
235
- if (this.notation === NOTATION.SEPARATOR && separator === undefined)
235
+ if (this.notation === NOTATION.SEPARATOR && separator == undefined)
236
236
  throw new Error(`Separator is mandatory for column '${this.column.name}' of notation '${this.notation}'.`);
237
237
  return separator;
238
238
  }
@@ -327,7 +327,7 @@ export class SeqHandler implements ISeqHandler {
327
327
  const seq = this.column.get(rowIdx);
328
328
  return this.getSplitter(limit)(seq);
329
329
  } else {
330
- if (this.column.version !== this.columnVersion || this._splitted === null) {
330
+ if (this.column.version !== this.columnVersion || this._splitted == null) {
331
331
  this.columnVersion = this.column.version;
332
332
  this._splitted = new Array<WeakRef<ISeqSplitted>>(this.column.length);
333
333
  }
@@ -408,7 +408,7 @@ export class SeqHandler implements ISeqHandler {
408
408
  }
409
409
 
410
410
  public get stats(): SeqColStats {
411
- if (this._stats === null) {
411
+ if (this._stats == null) {
412
412
  const freq: { [m: string]: number } = {};
413
413
  let sameLength = true;
414
414
  let firstLength = null;
@@ -435,7 +435,7 @@ export class SeqHandler implements ISeqHandler {
435
435
 
436
436
  private _maxLength: number | null = null;
437
437
  public get maxLength(): number {
438
- if (this._maxLength === null) {
438
+ if (this._maxLength == null) {
439
439
  this._maxLength = this.column.length === 0 ? 0 :
440
440
  wu.count(0).take(this.column.length).map((rowIdx) => this.getSplitted(rowIdx).length).reduce((a, b) => a > b ? a : b, 0);
441
441
  }
@@ -444,7 +444,7 @@ export class SeqHandler implements ISeqHandler {
444
444
 
445
445
  private _posList: string[] | null = null;
446
446
  public get posList(): string[] {
447
- if (this._posList === null) {
447
+ if (this._posList == null) {
448
448
  const posListTxt = this.column.getTag(TAGS.positionNames);
449
449
  this._posList = posListTxt ? posListTxt.split(positionSeparator).map((p) => p.trim()) :
450
450
  wu.count(1).take(this.maxLength).map((pos) => pos.toString()).toArray();
@@ -619,7 +619,7 @@ export class SeqHandler implements ISeqHandler {
619
619
  }
620
620
 
621
621
  get splitter(): SplitterFunc {
622
- if (this._splitter === null)
622
+ if (this._splitter == null)
623
623
  this._splitter = this.getSplitter();
624
624
  return this._splitter;
625
625
  }
@@ -3,7 +3,7 @@ import * as DG from 'datagrok-api/dg';
3
3
 
4
4
  export function getMacromoleculeColumns(): DG.Column<string>[] {
5
5
  const columns = grok.shell.t.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
6
- if (columns === null) {
6
+ if (columns == null) {
7
7
  grok.shell.error('Current table does not contain macromolecules');
8
8
  return [];
9
9
  }