@datagrok/bio 2.24.0 → 2.25.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/CHANGELOG.md +11 -0
  2. package/detectors.js +26 -12
  3. package/dist/455.js.map +1 -1
  4. package/dist/package-test.js +2 -2
  5. package/dist/package-test.js.map +1 -1
  6. package/dist/package.js +2 -2
  7. package/dist/package.js.map +1 -1
  8. package/files/samples/HELM_CHEMS.csv +11 -0
  9. package/package.json +2 -2
  10. package/src/analysis/sequence-space.ts +1 -1
  11. package/src/demo/bio03-atomic-level.ts +1 -1
  12. package/src/package-types.ts +1 -1
  13. package/src/package.ts +1 -1
  14. package/src/tests/monomer-libraries-tests.ts +1 -1
  15. package/src/utils/get-region.ts +2 -2
  16. package/src/utils/helm-to-molfile/converter/const.ts +0 -1
  17. package/src/utils/helm-to-molfile/converter/converter.ts +3 -3
  18. package/src/utils/helm-to-molfile/converter/helm.ts +14 -6
  19. package/src/utils/helm-to-molfile/converter/mol-bonds.ts +1 -1
  20. package/src/utils/helm-to-molfile/converter/mol-wrapper.ts +2 -2
  21. package/src/utils/helm-to-molfile/converter/r-group-handler.ts +2 -2
  22. package/src/utils/monomer-lib/library-file-manager/file-validator.ts +1 -1
  23. package/src/utils/monomer-lib/library-file-manager/ui.ts +22 -5
  24. package/src/utils/monomer-lib/monomer-lib-base.ts +31 -3
  25. package/src/utils/monomer-lib/monomer-lib.ts +0 -26
  26. package/src/utils/monomer-lib/monomer-manager/monomer-manager.ts +1 -1
  27. package/src/utils/monomer-lib/smiles2Monomer.ts +128 -0
  28. package/src/utils/monomer-lib/web-editor-monomer-dummy.ts +15 -1
  29. package/src/utils/multiple-sequence-alignment-ui.ts +1 -1
  30. package/src/utils/multiple-sequence-alignment.ts +1 -1
  31. package/src/utils/seq-helper/seq-handler.ts +25 -16
  32. package/src/utils/ui-utils.ts +1 -1
  33. package/src/viewers/web-logo-viewer.ts +19 -8
  34. package/test-console-output-1.log +784 -775
  35. package/test-record-1.mp4 +0 -0
@@ -0,0 +1,11 @@
1
+ HELM,Activity
2
+ "PEPTIDE1{[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@@H](C(=O)*)C |$_R1;;;;;_R2;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@@H](C(=O)*)Cc1ccccc1 |$_R1;;;;;_R2;;;;;;;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@H](C(=O)*)C |$_R1;;;;;_R2;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@@H](C(=O)*)Cc1ccccc1 |$_R1;;;;;_R2;;;;;;;$|]}$PEPTIDE1,PEPTIDE1,8:R2-1:R1$$$",5.3075109739681280
3
+ "PEPTIDE1{[ac].D.A.D.E.[*N[C@H](C(=O)*)Cc1ccc(cc1)OC(C(=O)O)C(=O)O |$_R1;;;;;_R2;;;;;;;;;;;;;;;$|].L.[am]}$$$$ CHEMBL8284
4
+ PEPTIDE1{[N[C@](C)(C(=O)O)Cc1ccc(c(c1)O)O]}$$$$",5.7238768534315438
5
+ "PEPTIDE1{[meL].[*O[C@@H](C(=O)*)C |$_R1;;;;;_R2;$|].[meL].[*O[C@@H](C(=O)*)Cc1ccccc1 |$_R1;;;;;_R2;;;;;;;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@H](C(=O)*)C |$_R1;;;;;_R2;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@@H](C(=O)*)Cc1ccccc1 |$_R1;;;;;_R2;;;;;;;$|]}|PEPTIDE2{E.[*O[C@@H](C(=O)*)C |$_R1;;;;;_R2;$|].E.[*O[C@@H](C(=O)*)Cc1ccccc1 |$_R1;;;;;_R2;;;;;;;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@H](C(=O)*)C |$_R1;;;;;_R2;$|].[*N(C)[C@H](C(=O)*)CCC |$_R1;;;;;;_R2;;;$|].[*O[C@@H](C(=O)*)Cc1ccccc1 |$_R1;;;;;_R2;;;;;;;$|]}$PEPTIDE2,PEPTIDE2,8:R2-1:R1|PEPTIDE1,PEPTIDE2,1:R1-3:R3|PEPTIDE2,PEPTIDE1,1:R3-8:R2$$$V2.0",5.1858112460224372
6
+ "PEPTIDE1{[[C@H](C(=O)*)CS* |$;;;_R2;;;_R3$|].F.I.Q.N.[dC].S.R.G.[am]}$PEPTIDE1,PEPTIDE1,1:R3-6:R3$$$V2.0",6.2235023908043692
7
+ "PEPTIDE1{A.[dP].D.[dW].F.[dF].N.[dY].Y.[dW].G.[dN].W.[dH].G.[*N[C@@H](C(=O)*)[C@@H](C)O |$_R1;;;;;_R2;;;$|]}$PEPTIDE1,PEPTIDE1,16:R2-1:R1$$$",3.8459123763832412
8
+ PEPTIDE1{N.L.E.R.E.[*N[C@H](C(=O)*)CSCNC(=O)C |$_R1;;;;;_R2;;;;;;;$|].L.E.E.P.[*N[C@H](C(=O)*)CSCNC(=O)C |$_R1;;;;;_R2;;;;;;;$|].S.R.E.E.A.F}$$$$,3.2792043882465700
9
+ "PEPTIDE1{[[C@H](C(=O)*)CS* |$;;;_R2;;;_R3$|].Y.[dF].G.[dN].[dC].[dP].R.G.[am]}$PEPTIDE1,PEPTIDE1,1:R3-6:R3$$$",2.1058521529925680
10
+ PEPTIDE1{A.[[*]C(=O)[C@H](C)N([*])C |$_R2;;;;;;_R1;;;$|].A}$$$$V2.0,1.8036950016492720
11
+ "CHEM1{[*N[C@H](C(=O)*)Cc1ccc(cc1)OP(=O)(O)O |$_R1;;;;;_R2;;;;;;;;;;;;$|]}|PEPTIDE1{[C[C@H](N[*])C(=O)C[*] |$;;;_R1;;;;_R2$|].G.G.G.C.C.K.K.K.K}$PEPTIDE1,CHEM1,10:R3-1:R1$$$V2.0",6.3880602836120888
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Davit Rizhinashvili",
6
6
  "email": "drizhinashvili@datagrok.ai"
7
7
  },
8
- "version": "2.24.0",
8
+ "version": "2.25.1",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -44,7 +44,7 @@
44
44
  ],
45
45
  "dependencies": {
46
46
  "@biowasm/aioli": "^3.1.0",
47
- "@datagrok-libraries/bio": "^5.60.0",
47
+ "@datagrok-libraries/bio": "^5.61.1",
48
48
  "@datagrok-libraries/chem-meta": "^1.2.9",
49
49
  "@datagrok-libraries/math": "^1.2.6",
50
50
  "@datagrok-libraries/ml": "^6.10.6",
@@ -33,7 +33,7 @@ export async function getEncodedSeqSpaceCol(
33
33
  for (let rowIdx = 0; rowIdx < rowCount; rowIdx++) {
34
34
  const catI = seqColRawData[rowIdx];
35
35
  const seq = seqColCats[catI];
36
- if (seq === null || seqCol.isNone(rowIdx)) {
36
+ if (seq == null || seqCol.isNone(rowIdx)) {
37
37
  //@ts-ignore
38
38
  encList[rowIdx] = null;
39
39
  continue;
@@ -18,7 +18,7 @@ export async function demoToAtomicLevel(): Promise<void> {
18
18
  adjustGridcolAfterRender(view.grid, 'molfile(HELM)', 500, 300, true);
19
19
  adjustGridcolAfterRender(view.grid, 'HELM', 500, undefined, true);
20
20
  grok.shell.info('Hover over monomers in HELM column to highlight them in molecular structure.', {timeout: 10});
21
- grok.shell.windows.help.showHelp('/help/datagrok/solutions/domains/bio/bio.md#get-atomic-level-structure');
21
+ grok.shell.windows.help.showHelp('/help/datagrok/solutions/domains/bio/bio.md#convert-to-atomic-level');
22
22
  }
23
23
 
24
24
  export async function demoBio03UI(): Promise<void> {
@@ -41,7 +41,7 @@ export class BioPackageProperties extends Map<string, any> {
41
41
  }
42
42
 
43
43
  public set maxMonomerLength(value: number | null) {
44
- const vs = value === null ? 'long' : value.toString();
44
+ const vs = value == null ? 'long' : value.toString();
45
45
  super.set(BioPackagePropertiesNames.MaxMonomerLength, vs);
46
46
  this._onPropertyChanged.next(BioPackagePropertiesNames.MaxMonomerLength);
47
47
  }
package/src/package.ts CHANGED
@@ -123,7 +123,7 @@ export class PackageFunctions {
123
123
 
124
124
  @grok.decorators.init({})
125
125
  static async initBio(): Promise<void> {
126
- if (initBioPromise === null)
126
+ if (initBioPromise == null)
127
127
  initBioPromise = initBioInt();
128
128
 
129
129
  await initBioPromise;
@@ -83,7 +83,7 @@ category('monomerLibraries', () => {
83
83
  };
84
84
  const monomerLib = monomerLibHelper.getMonomerLib();
85
85
  const absentOverrideMonomer = monomerLib.getMonomer(overMon.polymerType, overMon.symbol);
86
- expect(absentOverrideMonomer === null, true, `Unexpectedly found monomer '${overMon.symbol}' `);
86
+ expect(absentOverrideMonomer == null, true, `Unexpectedly found monomer '${overMon.symbol}' `);
87
87
 
88
88
  const overriddenMonomerLib = monomerLib.override({[overMon.polymerType]: {[overMon.symbol]: overMon}}, 'test');
89
89
  const resOverMon = overriddenMonomerLib.getMonomer(overMon.polymerType, overMon.symbol);
@@ -47,9 +47,9 @@ export function getRegionDo(
47
47
  if (sh.posList[posJ] == startPosName) startPosIdx = posJ;
48
48
  if (sh.posList[posJ] == endPosName) endPosIdx = posJ;
49
49
  }
50
- if (startPosIdx === null && startPosName !== null)
50
+ if (startPosIdx == null && startPosName !== null)
51
51
  throw new Error(`Start position ${startPosName} not found.`);
52
- if (endPosIdx === null && endPosName !== null)
52
+ if (endPosIdx == null && endPosName !== null)
53
53
  throw new Error(`End position ${endPosName} not found.`);
54
54
 
55
55
  if (sh.posList.length < endPosIdx!)
@@ -1,4 +1,3 @@
1
1
  export const HELM_ITEM_SEPARATOR = '|';
2
- export const HELM_SECTION_SEPARATOR = '$';
3
2
 
4
3
  export const HYDROGEN_SYMBOL = 'H';
@@ -29,7 +29,7 @@ export class HelmToMolfileConverter implements IHelmToMolfileConverter {
29
29
  const smilesColName = `smiles(${helmCol.name})`;
30
30
  const smilesColNameU = df ? df.columns.getUnusedName(smilesColName) : smilesColName;
31
31
  return DG.Column.fromStrings(smilesColNameU, smiles.map((molecule) => {
32
- if (molecule === null)
32
+ if (molecule == null)
33
33
  return '';
34
34
  return molecule;
35
35
  }));
@@ -57,7 +57,7 @@ export class HelmToMolfileConverter implements IHelmToMolfileConverter {
57
57
 
58
58
  public getMolV3000ViaOCL(beautifiedMols: (RDMol | null)[], columnName: string): DG.Column<string> {
59
59
  const beautifiedMolV2000 = beautifiedMols.map((mol) => {
60
- if (mol === null)
60
+ if (mol == null)
61
61
  return '';
62
62
  const molBlock = mol.get_v3Kmolblock();
63
63
  mol!.delete();
@@ -97,7 +97,7 @@ export class HelmToMolfileConverter implements IHelmToMolfileConverter {
97
97
  if (chiralityEngine)
98
98
  return this.getMolV3000ViaOCL(beautifiedMols, molColNameU);
99
99
  return DG.Column.fromStrings(molColNameU, beautifiedMols.map((mol) => {
100
- if (mol === null)
100
+ if (mol == null)
101
101
  return '';
102
102
  const molBlock = mol.get_v3Kmolblock();
103
103
  mol!.delete();
@@ -1,16 +1,24 @@
1
1
  import {HELM_POLYMER_TYPE} from '@datagrok-libraries/bio/src/utils/const';
2
2
  import {ConnectionList} from './connection-list';
3
- import {HELM_ITEM_SEPARATOR, HELM_SECTION_SEPARATOR} from './const';
3
+ import {HELM_ITEM_SEPARATOR} from './const';
4
4
  import {SimplePolymer} from './simple-polymer';
5
5
  import {Bond} from './types';
6
6
 
7
7
  export class Helm {
8
8
  constructor(private helmString: string) {
9
- const helmSections = this.helmString.split(HELM_SECTION_SEPARATOR);
10
- const simplePolymers = helmSections[0].split(HELM_ITEM_SEPARATOR);
11
- this.simplePolymers = simplePolymers
9
+ const indexOfSequenceEnd = helmString.indexOf('}$');
10
+ const sequencePart = helmString.substring(0, indexOfSequenceEnd + 1);
11
+ const connectionsEndPart = helmString.indexOf('$', indexOfSequenceEnd + 2);
12
+ const connectionsPart = helmString.substring(indexOfSequenceEnd + 2, connectionsEndPart);
13
+ // const helmParts = seq.split('$');
14
+ const spList = sequencePart.split('}|');
15
+ // since we removed }|, need to add } to last part
16
+ for (let i = 0; i < spList.length - 1; i++)
17
+ spList[i] = spList[i] + '}';
18
+
19
+ this.simplePolymers = spList
12
20
  .map((item) => new SimplePolymer(item));
13
- this.connectionList = new ConnectionList(helmSections[1]);
21
+ this.connectionList = new ConnectionList(connectionsPart);
14
22
  this.bondData = this.getBondData();
15
23
 
16
24
  this.bondedRGroupsMap = this.getBondedRGroupsMap();
@@ -58,7 +66,7 @@ export class Helm {
58
66
  const lowerBound = shiftValues.sort((a, b) => b - a).find( // find the largest shift not exceeding monomerGlobalIdx
59
67
  (shift) => monomerGlobalIdx >= shift
60
68
  );
61
- if (lowerBound === undefined)
69
+ if (lowerBound == undefined)
62
70
  throw new Error(`Cannot find simple polymer for monomer ${monomerGlobalIdx}`);
63
71
  const simplePolymerId = Object.keys(shifts).find((simplePolymerId) => shifts[simplePolymerId] === lowerBound)!;
64
72
  const simplePolymer = this.simplePolymers.find((simplePolymer) => simplePolymer.id === simplePolymerId)!;
@@ -31,7 +31,7 @@ export abstract class MolfileBonds {
31
31
  }
32
32
 
33
33
  replacePositionsInBondsByDummy(positions: PositionInBonds[], dummy?: number): void {
34
- if (dummy === undefined)
34
+ if (dummy == undefined)
35
35
  dummy = -1;
36
36
  positions.forEach((position) => {
37
37
  const {bondLineIdx, nodeIdx} = position;
@@ -15,7 +15,7 @@ export abstract class MolfileWrapper {
15
15
 
16
16
  protected shiftR1GroupToOrigin(): void {
17
17
  const r1Idx = this.rGroups.getAtomicIdx(1);
18
- if (r1Idx === null)
18
+ if (r1Idx == null)
19
19
  return; // R1 group is not present, nothing to shift
20
20
  const {x, y} = this.atoms.atomCoordinates[r1Idx];
21
21
  this.atoms.shift({x: -x, y: -y});
@@ -23,7 +23,7 @@ export abstract class MolfileWrapper {
23
23
 
24
24
  protected alignR2AlongX(): void {
25
25
  const r2Idx = this.rGroups.getAtomicIdx(2);
26
- if (r2Idx === null)
26
+ if (r2Idx == null)
27
27
  throw new Error(`Cannot find R2 group for monomer ${this.monomerSymbol}`);
28
28
  const r2Coordinates = this.atoms.atomCoordinates[r2Idx];
29
29
  const tan = r2Coordinates.y / r2Coordinates.x;
@@ -19,13 +19,13 @@ export class RGroupHandler {
19
19
 
20
20
  getAtomicIdx(rGroupId: number): number | null {
21
21
  const atomicIdx = this.rGroupIdToAtomicIndexMap.get(rGroupId);
22
- return atomicIdx === undefined ? null : atomicIdx;
22
+ return atomicIdx == undefined ? null : atomicIdx;
23
23
  }
24
24
 
25
25
  private removeRGroupsFromAtomBlock(rGroupIds: number[]): void {
26
26
  rGroupIds.forEach((rgroupId) => {
27
27
  const atomicIdx = this.rGroupIdToAtomicIndexMap.get(rgroupId);
28
- if (atomicIdx === undefined)
28
+ if (atomicIdx == undefined)
29
29
  throw new Error(`Cannot find atomic index for R group ${rgroupId}`);
30
30
  });
31
31
 
@@ -22,7 +22,7 @@ export class MonomerLibFileValidator {
22
22
 
23
23
  validateFile(fileContent: string, fileName: string): boolean {
24
24
  const jsonContent = this.parseJson(fileContent, fileName);
25
- if (jsonContent === null)
25
+ if (jsonContent == null)
26
26
  return false;
27
27
 
28
28
  if (!Array.isArray(jsonContent)) {
@@ -5,7 +5,7 @@ import * as ui from 'datagrok-api/ui';
5
5
  import * as DG from 'datagrok-api/dg';
6
6
 
7
7
  import $ from 'cash-dom';
8
- import {Subject} from 'rxjs';
8
+ import {Subject, Subscription} from 'rxjs';
9
9
 
10
10
  import {
11
11
  getUserLibSettings, setUserLibSettings
@@ -50,7 +50,7 @@ class MonomerLibraryManagerWidget {
50
50
  private libHelper: IMonomerLibHelper;
51
51
 
52
52
  static async getInstance(): Promise<MonomerLibraryManagerWidget> {
53
- if (MonomerLibraryManagerWidget.instancePromise === undefined) {
53
+ if (MonomerLibraryManagerWidget.instancePromise == undefined) {
54
54
  MonomerLibraryManagerWidget.instancePromise = (async () => {
55
55
  const instance = new MonomerLibraryManagerWidget();
56
56
  const libHelper = await getMonomerLibHelper();
@@ -67,10 +67,12 @@ class MonomerLibraryManagerWidget {
67
67
  instance._widget = await instance.createWidget();
68
68
  }
69
69
 
70
+ private _fileUploadSubscription: Subscription | null = null;
70
71
  private async createWidget() {
71
72
  const content = await this.getWidgetContent();
72
73
  const monomerLibHelper = await getMonomerLibHelper();
73
- // eslint-disable-next-line rxjs/no-ignored-subscription
74
+ this._fileUploadSubscription?.unsubscribe();
75
+ this._fileUploadSubscription =
74
76
  monomerLibHelper.fileUploadRequested.subscribe(
75
77
  () => this.promptToAddLibraryFiles()
76
78
  );
@@ -92,14 +94,29 @@ class MonomerLibraryManagerWidget {
92
94
  accept: '.json',
93
95
  open: async (selectedFile) => {
94
96
  const doAdd = async (provider: IMonomerLibProvider) => {
95
- const content = await selectedFile.text();
96
97
  const name = selectedFile.name;
98
+ const existingLibs = await provider.listLibraries();
99
+ // chech if library already exists
100
+ if (existingLibs.includes(name)) {
101
+ const confirm = await new Promise<boolean>((resolve) => {
102
+ ui.dialog('Confirm Library Update')
103
+ .add(ui.divText(`Library '${name}' already exists. Do you want to overwrite it?`))
104
+ .onOK(() => resolve(true))
105
+ .onCancel(() => resolve(false))
106
+ .show();
107
+ });
108
+ if (!confirm)
109
+ return;
110
+ }
111
+
112
+ const content = await selectedFile.text();
97
113
  const progressIndicator = DG.TaskBarProgressIndicator.create(`Adding ${name} as a monomer library`);
98
114
  try {
99
115
  await provider.addOrUpdateLibraryString(name, content);
100
116
  // this.eventManager.updateLibrarySelectionStatus(name, true);
101
117
  } catch (e) {
102
118
  grok.shell.error(`File ${name} is not a valid monomer library, verify it is aligned to HELM JSON schema.`);
119
+ console.error(e);
103
120
  } finally {
104
121
  progressIndicator.close();
105
122
  }
@@ -122,7 +139,7 @@ class MonomerLibraryManagerWidget {
122
139
  .onOK(async () => {
123
140
  const provider = providers.find((p) => p.name === providersInput.value)!; // should not be null
124
141
  await doAdd(provider);
125
- });
142
+ }).show();
126
143
  },
127
144
  });
128
145
  }
@@ -1,3 +1,4 @@
1
+ /* eslint-disable max-len */
1
2
  /* eslint-disable max-lines */
2
3
  import * as grok from 'datagrok-api/grok';
3
4
  import * as ui from 'datagrok-api/ui';
@@ -18,12 +19,14 @@ import {GAP_SYMBOL, GapOriginals, NOTATION} from '@datagrok-libraries/bio/src/ut
18
19
  import {Vector} from '@datagrok-libraries/utils/src/type-declarations';
19
20
  import {vectorAdd, vectorDotProduct, vectorLength} from '@datagrok-libraries/utils/src/vector-operations';
20
21
 
21
- import {AmbiguousWebEditorMonomer, GapWebEditorMonomer, MissingWebEditorMonomer} from './web-editor-monomer-dummy';
22
+ import {AmbiguousWebEditorMonomer, GapWebEditorMonomer, MissingWebEditorMonomer, SmilesWebEditorMonomer} from './web-editor-monomer-dummy';
22
23
  import {LibraryWebEditorMonomer} from './web-editor-monomer-of-library';
23
24
  import {naturalMonomerColors} from './monomer-colors';
24
25
 
25
26
  import {_package} from '../../package';
26
27
  import {MonomerLibData} from '@datagrok-libraries/bio/src/types/monomer-library';
28
+ import {smiles2Monomer} from './smiles2Monomer';
29
+ import {polymerTypeToHelmType} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
27
30
 
28
31
  const monomerRe = /[\w()]+/;
29
32
  //** Do not mess with monomer symbol with parenthesis enclosed in square brackets */
@@ -61,9 +64,15 @@ export class MonomerLibBase implements IMonomerLibBase {
61
64
  }
62
65
 
63
66
  getMonomerSymbolsByType(polymerType: PolymerType): string[] {
64
- return Object.keys(this._monomers[polymerType]);
67
+ const res = Object.keys(this._monomers[polymerType]);
68
+ if (this._smilesMonomerCache[polymerType])
69
+ res.push(...Object.keys(this._smilesMonomerCache[polymerType]));
70
+ return res;
65
71
  }
66
72
 
73
+ // smiles to symbol Mapping cache
74
+ private _smilesMonomerCache: {[polymerType: string]: {[smiles: string]: string}} = {};
75
+
67
76
  /** Creates missing {@link Monomer} */
68
77
  addMissingMonomer(polymerType: PolymerType, monomerSymbol: string): Monomer {
69
78
  let mSet = this._monomers[polymerType];
@@ -78,6 +87,22 @@ export class MonomerLibBase implements IMonomerLibBase {
78
87
  else if (polymerType === PolymerTypes.RNA && monomerSymbol === 'N')
79
88
  monomerName = 'Any';
80
89
 
90
+ // test if it is smiles
91
+ // check if the missing monomer symbol is a valid SMILES string
92
+ const smilesMonomer = smiles2Monomer(monomerSymbol, polymerType);
93
+ if (smilesMonomer) {
94
+ this._smilesMonomerCache[polymerType] = this._smilesMonomerCache[polymerType] ?? {};
95
+ const smSet = this._smilesMonomerCache[polymerType];
96
+ const symbol = Object.keys(smSet).length + 1;
97
+ smSet[monomerSymbol] = `#${polymerType[0]}${symbol}`; // e.g. #P1, #R2, #C3, #B4
98
+ const m: Monomer = {...smilesMonomer, symbol: smSet[monomerSymbol]};
99
+ // note, the ID becomes #<index> for smiles based monomers, and as the smiles, original smiles is passed (which is in monomerSymbol), to avoid key duplication
100
+ const wem = new SmilesWebEditorMonomer(polymerTypeToHelmType(polymerType), m.symbol, monomerSymbol, `SMILES Monomer ${m.symbol}`, m.rgroups.map((rg) => rg[RGP.LABEL]));
101
+ m.wem = wem;
102
+ mSet[m.symbol] = m;
103
+ return m;
104
+ }
105
+
81
106
  const m = mSet[monomerSymbol] = {
82
107
  [REQ.SYMBOL]: monomerSymbol,
83
108
  [REQ.NAME]: monomerName,
@@ -124,8 +149,11 @@ export class MonomerLibBase implements IMonomerLibBase {
124
149
  if (res) break;
125
150
  }
126
151
  } else {
152
+ // Check smiles cache, modify with mapped symbol
153
+ if (this._smilesMonomerCache[polymerType]?.[monomerSymbol])
154
+ monomerSymbol = this._smilesMonomerCache[polymerType][monomerSymbol];
127
155
  const dict = this._monomers[polymerType];
128
- res = dict ? dict[monomerSymbol] : null;
156
+ res = dict?.[monomerSymbol] ?? null;
129
157
  }
130
158
  return res;
131
159
  }
@@ -55,32 +55,6 @@ export class MonomerLib extends MonomerLibBase implements IMonomerLib {
55
55
  return resJSON;
56
56
  }
57
57
 
58
- getMonomer(polymerType: PolymerType | null, argMonomerSymbol: string): Monomer | null {
59
- const logPrefix = `Bio: MonomerLib.getMonomer()`;
60
- // Adjust RNA's 'R' for ribose to 'r' and 'P' for phosphate to 'p' for case-sensitive monomer names.
61
- // There are uppercase 'R' and 'P' at RNA samples in test data 'helm2.csv' but lowercase in HELMCoreLibrary.json
62
- let monomerSymbol = argMonomerSymbol;
63
- if (polymerType == 'RNA' && monomerSymbol == 'R')
64
- monomerSymbol = 'r';
65
- if (polymerType == 'RNA' && monomerSymbol == 'P')
66
- monomerSymbol = 'p';
67
-
68
- let res: Monomer | null = null;
69
-
70
- if (!polymerType) {
71
- _package.logger.warning(`${logPrefix} symbol '${argMonomerSymbol}', polymerType not specified.`);
72
- // Assume any polymer type
73
- for (const [_polymerType, dict] of Object.entries(this._monomers)) {
74
- res = dict[monomerSymbol];
75
- if (res) break;
76
- }
77
- } else {
78
- const dict = this._monomers[polymerType];
79
- res = dict?.[monomerSymbol] ?? null;
80
- }
81
- return res;
82
- }
83
-
84
58
  private _monomerSets: { [biotype: string /*HelmType*/]: MonomerSetType } | null = null;
85
59
 
86
60
  getMonomerSet(biotype: HelmType): MonomerSetType | null {
@@ -1178,7 +1178,7 @@ function getCorrectedSmiles(rgroups: RGroup[], smiles?: string, molBlock?: strin
1178
1178
  return isSmilesMalformed ? canonical : grok.chem.convert(canonical, DG.chem.Notation.Unknown, DG.chem.Notation.Smiles);
1179
1179
  }
1180
1180
 
1181
- function getCorrectedMolBlock(molBlock: string) {
1181
+ export function getCorrectedMolBlock(molBlock: string) {
1182
1182
  // to correct molblock, we should make sure that
1183
1183
  // 1. RGP field is present at the end, before the M END line
1184
1184
  // 2. RGP field is present in the correct format
@@ -0,0 +1,128 @@
1
+ /* eslint-disable camelcase */
2
+ /* eslint-disable max-len */
3
+ import {Monomer} from '@datagrok-libraries/bio/src/types/monomer-library';
4
+ import {_package} from '../../package';
5
+ import {PolymerType} from '@datagrok-libraries/bio/src/helm/types';
6
+ import {HELM_RGROUP_FIELDS as RGP} from '@datagrok-libraries/bio/src/utils/const';
7
+ import * as grok from 'datagrok-api/grok';
8
+ import {getCorrectedMolBlock} from './monomer-manager/monomer-manager';
9
+
10
+ /**
11
+ * Exaple r groups
12
+ * {
13
+ "capGroupSMILES": "[*:1][H]",
14
+ "alternateId": "R1-H",
15
+ "capGroupName": "H",
16
+ "label": "R1"
17
+ },
18
+ {
19
+ "capGroupSMILES": "O[*:2]",
20
+ "alternateId": "R2-OH",
21
+ "capGroupName": "OH",
22
+ "label": "R2"
23
+ }
24
+
25
+ */
26
+
27
+ const cx_smiles_regexp = /.*\|\$.*R.*\$\|/;
28
+ const rgroup_regexp = /\[R(\d+)\]/;
29
+ const rgroup_regexpg = /\[R(\d+)\]/g;
30
+ const ambig_regexp = /\[\*:(\d+)\]/g;
31
+
32
+ export type MonomerWithoutSymbol = Omit<Monomer, 'symbol'>;
33
+
34
+ export function getMonomerFromRSmiles(smiles: string, polymerType?: PolymerType): MonomerWithoutSymbol | null {
35
+ const rgroupNumbers = Array.from(smiles.matchAll(rgroup_regexpg)).map((m) => m[1]);
36
+ const res: MonomerWithoutSymbol = {
37
+ name: 'Explicit SMILES Monomer',
38
+ smiles: smiles,
39
+ polymerType: polymerType ?? 'CHEM',
40
+ molfile: '',
41
+ rgroups: rgroupNumbers.map((numString) => ({
42
+ [RGP.LABEL]: `R${numString}`,
43
+ [RGP.CAP_GROUP_NAME]: `H`,
44
+ [RGP.CAP_GROUP_SMILES]: `[*:${numString}][H]`,
45
+ [RGP.ALTERNATE_ID]: `R${numString}-H`,
46
+ })),
47
+ author: 'Datagrok auto-generated',
48
+ id: 0,
49
+ createDate: null,
50
+ monomerType: 'Backbone',
51
+ };
52
+
53
+ try {
54
+ //try to generate corrected molfile and smiles
55
+ let corSmiles = smiles;
56
+ res.rgroups.forEach((rg) => {
57
+ const labelNum = rg[RGP.LABEL].substring(1); // R1 -> 1
58
+ corSmiles = corSmiles.replace(`[R${labelNum}]`, `[*:${labelNum}]`);
59
+ });
60
+ const molFile = getCorrectedMolBlock(grok.chem.convert(corSmiles, grok.chem.Notation.Smiles, grok.chem.Notation.MolBlock));
61
+ res.molfile = molFile;
62
+ res.smiles = corSmiles;
63
+ } catch (e) {
64
+ _package.logger.error(`getMonomerFromRSmiles: cannot convert SMILES to Molfile: ${smiles}\n${e}`);
65
+ }
66
+
67
+ return res;
68
+ }
69
+
70
+ /** Generate Monomer Object directly from inline smiles
71
+ * Purely string based, no external calls
72
+ *
73
+ * Currently accepts (to be extended):
74
+ *
75
+ * cxsmiles written as *N[C@H](C(=O)*)Cc1ccc(cc1)OP(=O)(O)O |$_R1;;;;;_R2;;;;;;;;;;;;$| where * are connection points
76
+ *
77
+ * or * is square brackets like [*]N[C@H](C(=O)[*])Cc1ccc(cc1)OP(=O)(O)O |$_R1;;;;;_R2;;;;;;;;;;;;$|
78
+ *
79
+ * simple smiles with R notations like CCC[R1] or CCC(=O)[R2]
80
+ *
81
+ * simple smiles with ambiguety defined as [*:1] like CCC[*:1] or CCC(=O)[*:2]
82
+ */
83
+ export function smiles2Monomer(smiles: string, polymerType?: PolymerType): MonomerWithoutSymbol | null {
84
+ try {
85
+ const isCxSmiles = cx_smiles_regexp.test(smiles);
86
+ if (isCxSmiles) {
87
+ // CXSMILES parsing
88
+ const parts = smiles.split('|$');
89
+ const molPart = parts[0].trim();
90
+ const rGroupPart = parts[1];
91
+ // make sure all R groups are captured
92
+ const rGroupMatches = Array.from(rGroupPart.matchAll(/R(\d+)/g));
93
+ const starsInMolecule = Array.from(molPart.matchAll(/(\*)/g));
94
+ if (rGroupMatches.length !== starsInMolecule.length)
95
+ return null; // make sure that number of R groups and stars are the same
96
+ // remove brackets from stars if any
97
+ let cleanMol = molPart.replaceAll(/\[\*\]/g, '*');
98
+ // speaking in terms of consecutiveness, R groups in definition and stars in Smiles will be in the same order
99
+ // so we can just iterate through them
100
+ const rGroupNumbers = rGroupMatches.map((m) => m[1]); // numbers as strings
101
+ for (let i = 0; i < rGroupNumbers.length; i++) {
102
+ const rNum = rGroupNumbers[i];
103
+ // replace first matched star with R group
104
+ cleanMol = cleanMol.replace('*', `[R${rNum}]`);
105
+ }
106
+ return getMonomerFromRSmiles(cleanMol, polymerType);
107
+ }
108
+
109
+ // simple smiles parsing
110
+ // to simplify, replace all ambigous [*:1] with R1, etc
111
+ let cleanSmiles = smiles;
112
+ const ambigMatches = Array.from(smiles.matchAll(ambig_regexp));
113
+ for (const match of ambigMatches) {
114
+ const fullMatch = match[0];
115
+ const rNum = match[1];
116
+ cleanSmiles = cleanSmiles.replace(fullMatch, `[R${rNum}]`);
117
+ }
118
+
119
+ // make sure monomer has at least one R group
120
+ if (rgroup_regexp.test(cleanSmiles))
121
+ return getMonomerFromRSmiles(cleanSmiles, polymerType);
122
+ } catch (e) {
123
+ _package.logger.error(`smiles2Monomer: cannot parse SMILES: ${smiles}\n${e}`);
124
+ }
125
+
126
+
127
+ return null;
128
+ }
@@ -18,7 +18,7 @@ export abstract class WebEditorMonomerDummy implements IWebEditorMonomer {
18
18
  get issmiles(): boolean { return !!this.smiles; }
19
19
 
20
20
  /** R-Group index os single digit only is allowed in Pistoia code */
21
- public readonly at: WebEditorRGroups = {
21
+ at: WebEditorRGroups = {
22
22
  R1: 'H', R2: 'H', R3: 'H', R4: 'H', R5: 'H', R6: 'H', R7: 'H', R8: 'H', R9: 'H'
23
23
  };
24
24
 
@@ -80,6 +80,20 @@ export abstract class WebEditorMonomerDummy implements IWebEditorMonomer {
80
80
  }
81
81
  }
82
82
 
83
+ export class SmilesWebEditorMonomer extends WebEditorMonomerDummy {
84
+ public readonly backgroundcolor: string = '#808080';
85
+ public readonly linecolor: string = '#000000';
86
+ public readonly textcolor: string = '#000000';
87
+
88
+ constructor(biotype: string, id: string, smiles: string, name: string, rgpLabels: string[]) {
89
+ super(biotype, id, name, undefined, undefined, undefined, smiles);
90
+ this.at = {};
91
+ rgpLabels.forEach((label) => {
92
+ this.at[label] = 'H';
93
+ });
94
+ }
95
+ }
96
+
83
97
  export class GapWebEditorMonomer extends WebEditorMonomerDummy {
84
98
  public readonly backgroundcolor: string = '#FFFFFF';
85
99
  public readonly linecolor: string = '#808080';
@@ -138,7 +138,7 @@ async function onDialogOk(
138
138
  colInput.fireChanged();
139
139
  if (colInput.value.semType !== DG.SEMTYPE.MACROMOLECULE)
140
140
  throw new Error('Chosen column has to be of Macromolecule semantic type');
141
- if (performAlignment === undefined) // value can only be undefined when column can't be processed with either method
141
+ if (performAlignment == undefined) // value can only be undefined when column can't be processed with either method
142
142
  throw new Error('Invalid column format');
143
143
  msaCol = await performAlignment(); // progress
144
144
  if (msaCol == null)
@@ -149,7 +149,7 @@ function parseKalignError(out: string, limit?: number): string {
149
149
  const errLineList: string[] = [];
150
150
  const errLineRe = /^.+ERROR : (.+)$/gm;
151
151
  let ma: RegExpExecArray | null;
152
- while ((ma = errLineRe.exec(out)) != null && (limit === undefined || errLineList.length < limit)) {
152
+ while ((ma = errLineRe.exec(out)) != null && (limit == undefined || errLineList.length < limit)) {
153
153
  //
154
154
  errLineList.push(ma[1]);
155
155
  }