@datagrok/bio 2.14.2 → 2.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/CHANGELOG.md +42 -0
  2. package/css/monomer-manager.css +66 -0
  3. package/detectors.js +7 -2
  4. package/dist/111.js +1 -1
  5. package/dist/111.js.map +1 -1
  6. package/dist/234.js +1 -1
  7. package/dist/234.js.map +1 -1
  8. package/dist/242.js.map +1 -1
  9. package/dist/603.js +1 -1
  10. package/dist/603.js.map +1 -1
  11. package/dist/682.js +1 -1
  12. package/dist/682.js.map +1 -1
  13. package/dist/705.js +1 -1
  14. package/dist/705.js.map +1 -1
  15. package/dist/778.js +1 -1
  16. package/dist/778.js.map +1 -1
  17. package/dist/793.js +1 -1
  18. package/dist/793.js.map +1 -1
  19. package/dist/801.js +2 -0
  20. package/dist/801.js.map +1 -0
  21. package/dist/950.js +1 -1
  22. package/dist/950.js.map +1 -1
  23. package/dist/980.js +2 -0
  24. package/dist/980.js.map +1 -0
  25. package/dist/package-test.js +6 -6
  26. package/dist/package-test.js.map +1 -1
  27. package/dist/package.js +5 -5
  28. package/dist/package.js.map +1 -1
  29. package/files/monomer-libraries/polytool-lib.json +48 -0
  30. package/files/monomer-libraries/sample-lib-Aca-colored.json +2 -2
  31. package/package.json +20 -12
  32. package/src/analysis/sequence-space.ts +2 -1
  33. package/src/demo/bio05-helm-msa-sequence-space.ts +1 -1
  34. package/src/package-test.ts +3 -1
  35. package/src/package-types.ts +9 -1
  36. package/src/package.ts +77 -33
  37. package/src/seq_align.ts +1 -1
  38. package/src/substructure-search/substructure-search.ts +2 -2
  39. package/src/tests/WebLogo-project-tests.ts +3 -4
  40. package/src/tests/activity-cliffs-tests.ts +5 -18
  41. package/src/tests/detectors-benchmark-tests.ts +24 -9
  42. package/src/tests/mm-distance-tests.ts +4 -3
  43. package/src/tests/monomer-libraries-tests.ts +3 -3
  44. package/src/tests/seq-handler-get-helm-tests.ts +88 -0
  45. package/src/tests/sequence-space-test.ts +4 -3
  46. package/src/tests/to-atomic-level-tests.ts +2 -0
  47. package/src/tests/to-atomic-level-ui-tests.ts +74 -0
  48. package/src/utils/cell-renderer.ts +3 -0
  49. package/src/utils/convert.ts +2 -2
  50. package/src/utils/cyclized.ts +20 -1
  51. package/src/utils/dimerized.ts +12 -0
  52. package/src/utils/get-region-func-editor.ts +1 -1
  53. package/src/utils/helm-to-molfile/converter/converter.ts +58 -30
  54. package/src/utils/helm-to-molfile/converter/mol-atoms.ts +2 -0
  55. package/src/utils/helm-to-molfile/converter/mol-bonds.ts +2 -0
  56. package/src/utils/helm-to-molfile/converter/mol-wrapper.ts +5 -1
  57. package/src/utils/helm-to-molfile/converter/monomer-wrapper.ts +7 -3
  58. package/src/utils/helm-to-molfile/converter/polymer.ts +21 -6
  59. package/src/utils/helm-to-molfile/converter/types.ts +11 -0
  60. package/src/utils/helm-to-molfile/utils.ts +11 -15
  61. package/src/utils/monomer-lib/lib-manager.ts +15 -1
  62. package/src/utils/monomer-lib/library-file-manager/file-manager.ts +1 -1
  63. package/src/utils/monomer-lib/library-file-manager/file-validator.ts +8 -0
  64. package/src/utils/monomer-lib/library-file-manager/ui.ts +150 -3
  65. package/src/utils/monomer-lib/monomer-lib.ts +59 -21
  66. package/src/utils/monomer-lib/monomer-manager/duplicate-monomer-manager.ts +155 -0
  67. package/src/utils/monomer-lib/monomer-manager/monomer-manager.ts +924 -0
  68. package/src/utils/multiple-sequence-alignment-ui.ts +3 -3
  69. package/src/utils/seq-helper/index.ts +1 -0
  70. package/src/utils/seq-helper/seq-helper.ts +131 -0
  71. package/src/utils/sequence-to-mol.ts +47 -18
  72. package/src/widgets/bio-substructure-filter.ts +9 -7
  73. package/src/widgets/package-settings-editor-widget.ts +6 -6
  74. package/src/widgets/representations.ts +12 -12
  75. package/dist/449.js +0 -2
  76. package/dist/449.js.map +0 -1
  77. /package/src/tests/{seq-handler-get-region.ts → seq-handler-get-region-tests.ts} +0 -0
@@ -0,0 +1,924 @@
1
+ /* eslint-disable max-lines-per-function */
2
+ /* eslint-disable max-lines */
3
+ /* eslint-disable max-len */
4
+ import * as grok from 'datagrok-api/grok';
5
+ import * as ui from 'datagrok-api/ui';
6
+ import * as DG from 'datagrok-api/dg';
7
+
8
+ import {IMonomerManager, INewMonomerForm} from '@datagrok-libraries/bio/src/utils/monomer-ui';
9
+ import {IMonomerLib, Monomer, RGroup} from '@datagrok-libraries/bio/src/types';
10
+ import {DUMMY_MONOMER, HELM_RGROUP_FIELDS} from '@datagrok-libraries/bio/src/utils/const';
11
+ import {LIB_PATH} from '@datagrok-libraries/bio/src/monomer-works/lib-settings';
12
+ import {ItemsGrid} from '@datagrok-libraries/utils/src/items-grid';
13
+ import {mostSimilarNaturalAnalog} from '@datagrok-libraries/bio/src/utils/macromolecule/monomers';
14
+ import {PolymerType, MonomerType} from '@datagrok-libraries/bio/src/helm/types';
15
+
16
+ import {MonomerLibManager} from '../lib-manager';
17
+
18
+ import '../../../../css/monomer-manager.css';
19
+
20
+ // columns of monomers dataframe, note that rgroups is hidden and will be displayed as separate columns
21
+ export enum MONOMER_DF_COLUMN_NAMES {
22
+ MONOMER = 'Monomer',
23
+ SYMBOL = 'Symbol',
24
+ NAME = 'Name',
25
+ R_GROUPS = '~R-Groups',
26
+ MONOMER_TYPE = 'Monomer Type',
27
+ POLYMER_TYPE = 'Polymer Type',
28
+ NATURAL_ANALOG = 'Natural Analog',
29
+ AUTHOR = 'Author',
30
+ CREATE_DATE = 'Create Date',
31
+ ID = 'ID',
32
+ META = 'Meta',
33
+ SOURCE = 'Source',
34
+ }
35
+
36
+ export const MONOMER_DF_COLUMNS = {
37
+ [MONOMER_DF_COLUMN_NAMES.MONOMER]: DG.COLUMN_TYPE.STRING,
38
+ [MONOMER_DF_COLUMN_NAMES.SYMBOL]: DG.COLUMN_TYPE.STRING,
39
+ [MONOMER_DF_COLUMN_NAMES.NAME]: DG.COLUMN_TYPE.STRING,
40
+ [MONOMER_DF_COLUMN_NAMES.R_GROUPS]: DG.COLUMN_TYPE.STRING,
41
+ [MONOMER_DF_COLUMN_NAMES.MONOMER_TYPE]: DG.COLUMN_TYPE.STRING,
42
+ [MONOMER_DF_COLUMN_NAMES.POLYMER_TYPE]: DG.COLUMN_TYPE.STRING,
43
+ [MONOMER_DF_COLUMN_NAMES.NATURAL_ANALOG]: DG.COLUMN_TYPE.STRING,
44
+ [MONOMER_DF_COLUMN_NAMES.AUTHOR]: DG.COLUMN_TYPE.STRING,
45
+ [MONOMER_DF_COLUMN_NAMES.CREATE_DATE]: DG.COLUMN_TYPE.DATE_TIME,
46
+ [MONOMER_DF_COLUMN_NAMES.ID]: DG.COLUMN_TYPE.INT,
47
+ [MONOMER_DF_COLUMN_NAMES.META]: DG.COLUMN_TYPE.STRING,
48
+ [MONOMER_DF_COLUMN_NAMES.SOURCE]: DG.COLUMN_TYPE.STRING,
49
+ } as const;
50
+
51
+
52
+ export class MonomerManager implements IMonomerManager {
53
+ public static readonly VIEW_NAME = 'Manage Monomers';
54
+ private _newMonomer: Monomer = DUMMY_MONOMER;
55
+ private _newMonomerForm: MonomerForm;
56
+ private monomerLib: IMonomerLib;
57
+ private tv: DG.TableView | null = null;
58
+ private libInput!: DG.InputBase<string | null>;
59
+ private static instance: MonomerManager;
60
+ private activeMonomerLib: IMonomerLib | null = null;
61
+
62
+ protected constructor(public monomerLibManamger: MonomerLibManager) {
63
+ this.monomerLib = monomerLibManamger.getBioLib();
64
+ this._newMonomerForm = new MonomerForm(monomerLibManamger, () => this.activeMonomerLib, async () => {
65
+ const df = await this.getMonomersDf(this.libInput.value!);
66
+ this.tv?.dataFrame && (this.tv.dataFrame = df);
67
+ }, () => this.tv?.dataFrame);
68
+ }
69
+
70
+ public static async getInstance(): Promise<MonomerManager> {
71
+ if (!this.instance) {
72
+ const monManager = await MonomerLibManager.getInstance();
73
+ await monManager.awaitLoaded();
74
+ await monManager.loadLibrariesPromise;
75
+ this.instance = new MonomerManager(monManager);
76
+ }
77
+ return this.instance;
78
+ }
79
+
80
+ async createNewMonomerLib(libName: string, _monomers: Monomer[]): Promise<void> {
81
+ this.tv?.grid && ui.setUpdateIndicator(this.tv.grid.root, true);
82
+ try {
83
+ const monomersString = JSON.stringify(_monomers.map((m) => ({...m, lib: undefined, wem: undefined})));
84
+ if (!libName.endsWith('.json'))
85
+ libName += '.json';
86
+ await (await this.monomerLibManamger.getFileManager()).addLibraryFile(monomersString, libName);
87
+ await grok.dapi.files.writeAsText(LIB_PATH + libName, monomersString);
88
+ await this.monomerLibManamger.loadLibraries(false);
89
+
90
+ //await this.monomerLibManamger.loadLibraries(true);
91
+ grok.shell.v = await this.getViewRoot(libName);
92
+ } catch (e) {
93
+ grok.shell.error('Error creating library');
94
+ console.error(e);
95
+ } finally {
96
+ this.tv?.grid && ui.setUpdateIndicator(this.tv.grid.root, false);
97
+ }
98
+ }
99
+
100
+ async createNewLibDialog(monomers?: Monomer[]) {
101
+ const monomerLibs = (await this.monomerLibManamger.getFileManager()).getValidLibraryPaths();
102
+ const libNameInput = ui.input.string('Library Name', {
103
+ placeholder: 'Enter library name',
104
+ nullable: false,
105
+ onValueChanged: () => {
106
+ const res = validateInput(libNameInput.value);
107
+ d.getButton('Create')?.classList?.toggle('d4-disabled', !!res);
108
+ }
109
+ });
110
+ function getFileNameInputValue() {
111
+ let fileName = libNameInput.value;
112
+ if (!fileName.endsWith('.json'))
113
+ fileName += '.json';
114
+ return fileName;
115
+ };
116
+ function validateInput(v: string) {
117
+ if (!v || !v.trim()) return 'Library name cannot be empty';
118
+ if ((v.endsWith('.json') && monomerLibs.includes(v)) || monomerLibs.includes(v + '.json'))
119
+ return 'Library with this name already exists';
120
+ return null;
121
+ }
122
+ libNameInput.addValidator(validateInput);
123
+ const d = ui.dialog('Create New Library')
124
+ .add(libNameInput)
125
+ .addButton('Create', async () => {
126
+ const vr = validateInput(libNameInput.value);
127
+ if (vr) {
128
+ grok.shell.warning(vr);
129
+ return;
130
+ }
131
+ try {
132
+ await this.createNewMonomerLib(getFileNameInputValue(), monomers ?? []);
133
+ } catch (e) {
134
+ grok.shell.error('Error creating library');
135
+ console.error(e);
136
+ }
137
+ d.close();
138
+ })
139
+ .show();
140
+ d.getButton('Create')?.classList?.toggle('d4-disabled', true);
141
+ }
142
+
143
+ get newMonomer() { return this._newMonomer; }
144
+
145
+ getNewMonomerForm(): INewMonomerForm {
146
+ return this._newMonomerForm;
147
+ }
148
+
149
+ private async getMonomersTableView(fileName?: string): Promise<DG.TableView> {
150
+ const df = await this.getMonomersDf(fileName);
151
+ this.tv = DG.TableView.create(df, true);
152
+ //const f = tv.filters();
153
+ this.tv.grid.col(MONOMER_DF_COLUMN_NAMES.NAME)!.width = 100;
154
+ this.tv.subs.push(
155
+ grok.events.onContextMenu.subscribe(({args}) => {
156
+ if (!args || !args.menu || !args.context || args.context.type !== DG.VIEWER.GRID || !args.context.tableView ||
157
+ args.context.tableView.id !== (this.tv!.id ?? '') || !args.item || !args.item.isTableCell || (args.item.tableRowIndex ?? -1) < 0)
158
+ return;
159
+ const rowIdx = args.item.tableRowIndex;
160
+ args.menu.item('Edit Monomer', () => {
161
+ this.cloneMonomer(this.tv!.dataFrame.rows.get(rowIdx));
162
+ });
163
+ if (this.tv!.dataFrame.selection.trueCount > 0) {
164
+ args.menu.item('Remove Selected Monomers', () => {
165
+ const monomers = Array.from(this.tv!.dataFrame.selection.getSelectedIndexes())
166
+ .map((r) => monomerFromDfRow(this.tv!.dataFrame.rows.get(r)));
167
+ this._newMonomerForm.removeMonomers(monomers, this.libInput.value!);
168
+ });
169
+ args.menu.item('Selection To New Library', () => {
170
+ const monomers = Array.from(this.tv!.dataFrame.selection.getSelectedIndexes())
171
+ .map((r) => monomerFromDfRow(this.tv!.dataFrame.rows.get(r)));
172
+ this.createNewLibDialog(monomers);
173
+ });
174
+ } else {
175
+ args.menu.item('Remove Monomer', () => {
176
+ const monomer = monomerFromDfRow(this.tv!.dataFrame.rows.get(rowIdx));
177
+ this._newMonomerForm.removeMonomers([monomer], this.libInput.value!);
178
+ });
179
+ }
180
+ })
181
+ );
182
+ return this.tv;
183
+ }
184
+
185
+ private findActiveManagerView() {
186
+ if (!this.tv)
187
+ return null;
188
+ const tv = Array.from(grok.shell.tableViews ?? []).find((tv) => tv.id === this.tv!.id);
189
+ if (tv)
190
+ grok.shell.v = tv;
191
+ return tv ?? null;
192
+ }
193
+
194
+ async getViewRoot(libName?: string) {
195
+ const availableMonLibs = (await this.monomerLibManamger.getFileManager()).getValidLibraryPaths();
196
+ this._newMonomerForm.molSketcher.resize();
197
+ if ((this.tv = this.findActiveManagerView()) && (libName ?? this.libInput.value)) {
198
+ // get monomer library list
199
+ this.libInput && ((this.libInput as DG.ChoiceInput<string>).items = availableMonLibs);
200
+ libName && (this.libInput.value = libName);
201
+ const df = await this.getMonomersDf(libName);
202
+ this.tv.dataFrame = df;
203
+ this.tv.grid.col(MONOMER_DF_COLUMN_NAMES.NAME)!.width = 100;
204
+ return this.tv;
205
+ }
206
+
207
+ libName ??= availableMonLibs[0];
208
+ this.tv = await this.getMonomersTableView(libName);
209
+
210
+ // remove project save button and download from ribbons
211
+ let ribbons = this.tv.getRibbonPanels();
212
+ ribbons.forEach((ribbonAr, i) => {
213
+ ribbons[i] = ribbonAr
214
+ .filter((r) => r.getElementsByClassName('grok-icon-filter').length !== 0); // remove everything except filter
215
+ });
216
+ ribbons = ribbons.filter((r) => r.length > 0);
217
+
218
+ const editButton = ui.icons.edit(() => {
219
+ if ((this.tv?.dataFrame?.currentRowIdx ?? -1) < 0) return;
220
+ this.cloneMonomer(this.tv!.dataFrame.rows.get(this.tv!.dataFrame.currentRowIdx));
221
+ }, 'Edit Monomer');
222
+
223
+ const deleteButton = ui.icons.delete(async () => {
224
+ const currentRowIdx = this.tv?.dataFrame?.currentRowIdx ?? -1;
225
+ const selectedRows = Array.from(this.tv?.dataFrame?.selection?.getSelectedIndexes() ?? []);
226
+ if (currentRowIdx < 0 && selectedRows.length === 0) return;
227
+
228
+ if (selectedRows.length > 0) {
229
+ const monomers = selectedRows.map((r) => monomerFromDfRow(this.tv!.dataFrame.rows.get(r)));
230
+ await this._newMonomerForm.removeMonomers(monomers, this.libInput.value!);
231
+ return;
232
+ }
233
+ const monomer = monomerFromDfRow(this.tv!.dataFrame.rows.get(currentRowIdx));
234
+ await this._newMonomerForm.removeMonomers([monomer], this.libInput.value!);
235
+ });
236
+
237
+ ui.tooltip.bind(deleteButton, () =>
238
+ `${(this.tv?.dataFrame?.selection?.getSelectedIndexes() ?? []).length > 0 ? 'Delete selected monomers' : 'Delete monomer'}`);
239
+
240
+ const downloadButton = ui.iconFA('arrow-to-bottom', async () => {
241
+ const libName = this.libInput.value;
242
+ if (!libName)
243
+ return grok.shell.error('No library selected');
244
+ let lib: string | null = null;
245
+ try {
246
+ lib = await grok.dapi.files.readAsText(LIB_PATH + libName);
247
+ } catch (e) {
248
+ grok.shell.error(`Error reading library ${libName}`);
249
+ return console.error(e);
250
+ }
251
+ if (!lib)
252
+ return grok.shell.error(`Library ${libName} is empty`);
253
+ DG.Utils.download(libName!, lib!, 'text/plain');
254
+ }, 'Download Monomer Library');
255
+
256
+ ribbons.push([editButton, deleteButton, downloadButton]);
257
+ this.tv.setRibbonPanels(ribbons);
258
+
259
+
260
+ this.tv.name = MonomerManager.VIEW_NAME;
261
+ this.libInput = ui.input.choice('Monomer Library', {value: libName, items: availableMonLibs, nullable: false, onValueChanged: async () => {
262
+ try {
263
+ const df = await this.getMonomersDf(this.libInput.value!);
264
+ this.tv!.dataFrame = df;
265
+ } catch (e) {
266
+ console.error(e);
267
+ }
268
+ }});
269
+ this.libInput.addOptions(ui.icons.add(() => { this.createNewLibDialog(); }, 'Create new monomer library...'));
270
+ const monForm = this._newMonomerForm.form;
271
+ monForm.prepend(this.libInput.root);
272
+ this.tv.dockManager.dock(monForm, DG.DOCK_TYPE.LEFT, null, undefined, 0.4);
273
+ return this.tv;
274
+ }
275
+
276
+ cloneMonomer(dfRow: DG.Row): Monomer {
277
+ this._newMonomer = monomerFromDfRow(dfRow);
278
+ this._newMonomerForm.setMonomer(this._newMonomer);
279
+ return this._newMonomer;
280
+ }
281
+
282
+ async getMonomersDf(fileName?: string) {
283
+ this.tv?.grid && ui.setUpdateIndicator(this.tv.grid.root, true);
284
+ try {
285
+ fileName ??= (await this.monomerLibManamger.getFileManager()).getValidLibraryPaths()[0];
286
+ this.activeMonomerLib = await this.monomerLibManamger.readLibrary(LIB_PATH, fileName);
287
+ if (!this.activeMonomerLib) {
288
+ grok.shell.error(`Library ${fileName} not found`);
289
+ return DG.DataFrame.create();
290
+ }
291
+ const ploymerTypes = this.activeMonomerLib.getPolymerTypes();
292
+ const monomers = ploymerTypes.flatMap((polymerType) => {
293
+ return this.activeMonomerLib!.getMonomerSymbolsByType(polymerType).map((symbol) => {
294
+ return this.activeMonomerLib!.getMonomer(polymerType, symbol)!;
295
+ });
296
+ });
297
+ const df = DG.DataFrame.create(monomers.length);
298
+
299
+ const uniqueRgroupNamesSet = new Set<string>();
300
+ for (const monomer of monomers) {
301
+ monomer.rgroups.forEach((rg) => {
302
+ rg.label && uniqueRgroupNamesSet.add(rg.label);
303
+ });
304
+ }
305
+ const uniqueRgroupNames = Array.from(uniqueRgroupNamesSet);
306
+ uniqueRgroupNames.sort();
307
+ for (const [k, v] of Object.entries(MONOMER_DF_COLUMNS)) {
308
+ df.columns.addNew(k, v);
309
+ if (k === MONOMER_DF_COLUMN_NAMES.R_GROUPS) {
310
+ for (const rgroupName of uniqueRgroupNames)
311
+ df.columns.addNew(rgroupName, DG.COLUMN_TYPE.STRING);
312
+ }
313
+ }
314
+
315
+
316
+ for (let i = 0; i < monomers.length; i++) {
317
+ let molSmiles = getCorrectedSmiles(monomers[i].rgroups, monomers[i].smiles, monomers[i].molfile);
318
+ molSmiles = fixRGroupsAsElementsSmiles(molSmiles);
319
+ // r-groups here might be broken, so need to make sure they are correct
320
+ monomers[i].rgroups = resolveRGroupInfo(monomers[i].rgroups);
321
+ const rgroupSmiles = uniqueRgroupNames.map((rgName) => {
322
+ const rgroup = monomers[i].rgroups.find((rg) => rg.label === rgName);
323
+ return rgroup ? getCaseInvariantValue(rgroup, HELM_RGROUP_FIELDS.CAP_GROUP_SMILES) : '';
324
+ });
325
+ df.rows.setValues(i, [
326
+ molSmiles,
327
+ monomers[i].symbol,
328
+ monomers[i].name,
329
+ JSON.stringify(monomers[i].rgroups ?? []),
330
+ ...rgroupSmiles,
331
+ monomers[i].monomerType,
332
+ monomers[i].polymerType,
333
+ monomers[i].naturalAnalog,
334
+ monomers[i].author,
335
+ monomers[i].createDate,
336
+ monomers[i].id,
337
+ JSON.stringify(monomers[i].meta ?? {}),
338
+ monomers[i].lib?.source ?? '',
339
+ ]);
340
+ }
341
+ df.col(MONOMER_DF_COLUMN_NAMES.MONOMER)!.semType = DG.SEMTYPE.MOLECULE;
342
+ uniqueRgroupNames.forEach((rgName) => {
343
+ df.col(rgName)!.semType = DG.SEMTYPE.MOLECULE;
344
+ });
345
+ df.currentRowIdx = -1;
346
+ df.onCurrentRowChanged.subscribe((_) => {
347
+ try {
348
+ if (df.currentRowIdx === -1 || this._newMonomerForm.molChanged)
349
+ return;
350
+ this.cloneMonomer(df.rows.get(df.currentRowIdx));
351
+ } catch (e) {
352
+ console.error(e);
353
+ }
354
+ });
355
+ return df;
356
+ } catch (e) {
357
+ grok.shell.error('Error creating monomers dataframe');
358
+ console.error(e);
359
+ throw e;
360
+ } finally {
361
+ this.tv?.grid && ui.setUpdateIndicator(this.tv.grid.root, false);
362
+ }
363
+ }
364
+ }
365
+
366
+ // some monomers might be in form of cap groups in place of r-groups (with supplied rgroups info), this function will convert them to r-groups
367
+ function substituteCapsWithRGroupsSmiles(smiles: string, rGroups: RGroup[]) {
368
+ let newSmiles = smiles;
369
+ // first substitute all caps with R-groups with corresponding numbers
370
+ rGroups.forEach((rGroup) => {
371
+ const RNum = rGroup.label[1] ?? '1';
372
+ const capRegex = new RegExp(`\\[\\${rGroup.capGroupName}:${RNum}\\]`, 'g');
373
+ newSmiles = newSmiles.replace(capRegex, `[*:${RNum}]`);
374
+ });
375
+ // during some conversions atoms can end up as isotops in smiles string like this [2O]
376
+
377
+ // replace all [2O] with [*:2], there can be also two atoms like [2OH] -> [*:2]
378
+ const isotopeRegex = /\[\d[A-Z]{1,2}\]/g;
379
+ newSmiles = newSmiles.replaceAll(isotopeRegex, (match) => {
380
+ const rGroupNum = match[1];
381
+ return `[*:${rGroupNum}]`;
382
+ });
383
+
384
+ return newSmiles;
385
+ }
386
+
387
+ // some monomers might have rgroups in notations that suggest they are elements like [R1],
388
+ //this function will convert them to correct r-groups
389
+ function fixRGroupsAsElementsSmiles(smiles: string) {
390
+ const elementRGroupRegex = /\[R[1-9]\]/g;
391
+ // replace all [R1] with [*:1]
392
+ let correctedSmiles = smiles.replaceAll(elementRGroupRegex, (match) => {
393
+ const rGroupNum = match[2];
394
+ return `[*:${rGroupNum}]`;
395
+ });
396
+
397
+ // in some scenarios, rgroups can be written as [2*]
398
+ const elementRGroupRegex2 = /\[\d\*\]/g;
399
+ correctedSmiles = correctedSmiles.replaceAll(elementRGroupRegex2, (match) => {
400
+ const rGroupNum = match[1];
401
+ return `[*:${rGroupNum}]`;
402
+ });
403
+
404
+ // in some other scenarios, rgroups can be written as [1*:1] or [1*:0]
405
+ const elementRGroupRegex3 = /\[\d\*\:\d\]/g;
406
+ return correctedSmiles.replaceAll(elementRGroupRegex3, (match) => {
407
+ const rGroupNum = match[1];
408
+ return `[*:${rGroupNum}]`;
409
+ });
410
+ }
411
+
412
+ export const RGROUP_FIELDS = [
413
+ HELM_RGROUP_FIELDS.ALTERNATE_ID, HELM_RGROUP_FIELDS.CAP_GROUP_NAME, HELM_RGROUP_FIELDS.CAP_GROUP_SMILES_UPPERCASE, HELM_RGROUP_FIELDS.LABEL
414
+ ];
415
+
416
+ // just utility that makes sure fields like smiles and SMILES are treated as the same for setting
417
+ function assignObjectCaseInvariant<T extends string>(targetKeys: T[], source: { [key: string]: string }): { [key in T]: string } {
418
+ const target = {} as { [key in T]: string };
419
+ targetKeys.forEach((key) => {
420
+ const sourceKey = Object.keys(source).find((k) => k.toLowerCase() === key.toLowerCase());
421
+ if (sourceKey) target[key] = source[sourceKey];
422
+ });
423
+ return target;
424
+ }
425
+
426
+ // just utility that makes sure fields like smiles and SMILES are treated as the same for getting
427
+ function getCaseInvariantValue<T>(obj: { [key: string]: T }, key: string): T | undefined {
428
+ const caseInvariantKey = Object.keys(obj).find((k) => k.toLowerCase() === key.toLowerCase());
429
+ if (!caseInvariantKey) return undefined;
430
+ return obj[caseInvariantKey];
431
+ }
432
+
433
+ // some r groups for some monomers can lack smiles, or something else :D this function will try to fix that
434
+ function resolveRGroupInfo(rgps: RGroup[]): RGroup[] {
435
+ return rgps.map((rg) => {
436
+ const cp = assignObjectCaseInvariant(RGROUP_FIELDS, rg);
437
+ const smi = getCaseInvariantValue(cp, HELM_RGROUP_FIELDS.CAP_GROUP_SMILES_UPPERCASE);
438
+ const altId = getCaseInvariantValue(cp, HELM_RGROUP_FIELDS.ALTERNATE_ID);
439
+ const capName = getCaseInvariantValue(cp, HELM_RGROUP_FIELDS.CAP_GROUP_NAME);
440
+ const label = getCaseInvariantValue(cp, HELM_RGROUP_FIELDS.LABEL) ?? 'R1'; // just in case...
441
+ // if all are present, everything is fine
442
+ if ((smi && altId && capName) || label.length < 2)
443
+ return cp;
444
+ // we assume that label is there.. is it too much to ask?
445
+ // from here on, we assume that only one field is present, and we will try to fix the rest
446
+ if (altId && altId.indexOf(`${label}-`) !== -1) {
447
+ const capAtoms = altId.replace(`${label}-`, '');
448
+ if (!capName)
449
+ cp[HELM_RGROUP_FIELDS.CAP_GROUP_NAME] = capAtoms;
450
+ if (!smi)
451
+ cp[HELM_RGROUP_FIELDS.CAP_GROUP_SMILES_UPPERCASE] = `[*:${label.substring(1)}][${capAtoms}]`;
452
+ } else if (capName) {
453
+ if (!smi)
454
+ cp[HELM_RGROUP_FIELDS.CAP_GROUP_SMILES_UPPERCASE] = `[*:${label.substring(1)}][${capName}]`;
455
+ if (!altId)
456
+ cp[HELM_RGROUP_FIELDS.ALTERNATE_ID] = `${label}-${capName}`;
457
+ }
458
+ return cp;
459
+ }) as RGroup[];
460
+ }
461
+
462
+
463
+ class MonomerForm implements INewMonomerForm {
464
+ molSketcher: grok.chem.Sketcher;
465
+ monomerTypeInput: DG.ChoiceInput<string | null>;
466
+ polymerTypeInput: DG.ChoiceInput<string | null>;
467
+ monomerSymbolInput: DG.InputBase<string>;
468
+ monomerNameInput: DG.InputBase<string>;
469
+ monomerIdInput: DG.InputBase<number | null>;
470
+ monomerNaturalAnalogInput: DG.InputBase<string | null>;
471
+ rgroupsGrid: ItemsGrid;
472
+ metaGrid: ItemsGrid;
473
+ saveButton: HTMLButtonElement;
474
+ rgroupsGridRoot: HTMLElement;
475
+ private _molChanged: boolean = false;
476
+ get molChanged() { return this._molChanged; }
477
+ private saveValidationResult?: string | null = null;
478
+ private triggerMolChange: boolean = true; // makes sure that change is not triggered by copying the molecule from grid
479
+ constructor(public monomerLibManager: MonomerLibManager,
480
+ private getMonomerLib: () => IMonomerLib | null, private refreshTable: () => Promise<void>,
481
+ private getMonomersDataFrame: () => DG.DataFrame | undefined) {
482
+ const monomerTypes = ['PEPTIDE', 'RNA', 'CHEM', 'BLOB', 'G'];
483
+ this.molSketcher = new DG.chem.Sketcher();
484
+ this.molSketcher.root.classList.add('monomer-manager-sketcher');
485
+ this.polymerTypeInput = ui.input.choice('Polymer Type', {value: 'PEPTIDE', items: monomerTypes,
486
+ onValueChanged: () => this.onMonomerInputChanged(), nullable: false});
487
+
488
+ this.monomerTypeInput = ui.input.choice('Monomer Type', {value: 'Backbone', items: ['Backbone', 'Branch', 'Terminal'],
489
+ onValueChanged: () => this.onMonomerInputChanged(), nullable: false});
490
+ this.monomerSymbolInput = ui.input.string('Monomer Symbol', {nullable: false, onValueChanged: () => this.onMonomerInputChanged()});
491
+ this.monomerNameInput = ui.input.string('Monomer Name', {nullable: false, onValueChanged: () => this.onMonomerInputChanged()});
492
+ this.monomerNameInput.nullable = false;
493
+ this.monomerIdInput = ui.input.int('Monomer ID', {nullable: true, value: 0, onValueChanged: () => this.onMonomerInputChanged()});
494
+ this.monomerNaturalAnalogInput = ui.input.string('Natural Analog', {nullable: true, onValueChanged: () => this.onMonomerInputChanged()});
495
+ this.saveButton = ui.bigButton('Save', async () => {
496
+ const validatorRes = this.validateInputs();
497
+ if (validatorRes) {
498
+ grok.shell.warning(validatorRes);
499
+ return;
500
+ }
501
+ await this.saveMonomer();
502
+ });
503
+ // this.saveButton.style.pointerEvents = 'revert';
504
+ this.molSketcher.subs.push(this.molSketcher.onChanged.subscribe(async () => {
505
+ if (!this.triggerMolChange) {
506
+ this.triggerMolChange = true;
507
+ return;
508
+ }
509
+ try {
510
+ this.rgroupsGridRoot.style.display = 'none';
511
+ let smiles = this.molSketcher.getSmiles();
512
+ if (!smiles) {
513
+ this.rgroupsGrid.items = [];
514
+ return;
515
+ }
516
+ smiles = getCorrectedSmiles([], smiles);
517
+
518
+ const rGroupMatches = this.findRgroupsInSmiles(smiles);
519
+ if (rGroupMatches.length === 0) {
520
+ this.rgroupsGrid.items = [];
521
+ this.rgroupsGrid.render();
522
+ return;
523
+ }
524
+ const rGroupNums = rGroupMatches.map((match) => Number.parseInt(match[0].match(/[1-9]/g)![0]));
525
+ const rGroupItems: RGroup[] = rGroupNums.map((num) => {
526
+ const existingRGroup = this.rgroupsGrid.items.find((rg) => rg[HELM_RGROUP_FIELDS.LABEL] === `R${num}`) as RGroup | undefined;
527
+ return existingRGroup ?? {
528
+ [HELM_RGROUP_FIELDS.CAP_GROUP_SMILES_UPPERCASE]: `[*:${num}][H]`,
529
+ [HELM_RGROUP_FIELDS.ALTERNATE_ID]: `R${num}-H`,
530
+ [HELM_RGROUP_FIELDS.CAP_GROUP_NAME]: 'H',
531
+ [HELM_RGROUP_FIELDS.LABEL]: `R${num}`,
532
+ } as unknown as RGroup;
533
+ });
534
+ if (this.rgroupsGrid.items.length !== rGroupItems.length)
535
+ this.rgroupsGrid.items = rGroupItems;
536
+ this.rgroupsGrid.render();
537
+ this.rgroupsGridRoot.style.display = 'flex';
538
+ const mostSimilar = await mostSimilarNaturalAnalog(capSmiles(smiles, rGroupItems), this.polymerTypeInput.value ?? '');
539
+ if (mostSimilar)
540
+ this.monomerNaturalAnalogInput.value = mostSimilar;
541
+ } catch (e) {
542
+ console.error(e);
543
+ }
544
+ this.onMonomerInputChanged();
545
+ this._molChanged = true;
546
+ }));
547
+
548
+
549
+ const rgropProps = [
550
+ DG.Property.js(HELM_RGROUP_FIELDS.CAP_GROUP_SMILES_UPPERCASE, DG.TYPE.STRING, {caption: 'R-group SMILES', nullable: false}),
551
+ DG.Property.js(HELM_RGROUP_FIELDS.ALTERNATE_ID, DG.TYPE.STRING, {caption: 'Alternate ID', nullable: false}),
552
+ DG.Property.js(HELM_RGROUP_FIELDS.CAP_GROUP_NAME, DG.TYPE.STRING, {caption: 'R-group name', nullable: false}),
553
+ DG.Property.js(HELM_RGROUP_FIELDS.LABEL, DG.TYPE.STRING, {fieldName: 'R-group Label', nullable: false, userEditable: false}),
554
+ ];
555
+ this.rgroupsGrid = new ItemsGrid(rgropProps, [], {allowAdd: false, allowRemove: false,
556
+ validators: {
557
+ [HELM_RGROUP_FIELDS.CAP_GROUP_SMILES_UPPERCASE]: (smi) => !smi ? 'Cap group smiles is required' : !grok.chem.checkSmiles(smi) ? 'Invalid SMILES' : null,
558
+ [HELM_RGROUP_FIELDS.ALTERNATE_ID]: (id) => !id ? 'Alternate ID is required' : null,
559
+ [HELM_RGROUP_FIELDS.CAP_GROUP_NAME]: (name) => !name ? 'Cap group name is required' : null,
560
+ [HELM_RGROUP_FIELDS.LABEL]: (label) => !label ? 'R-group label is required' : null,
561
+ },
562
+ customLabels: {
563
+ [HELM_RGROUP_FIELDS.CAP_GROUP_SMILES_UPPERCASE]: 'R-group SMILES',
564
+ [HELM_RGROUP_FIELDS.ALTERNATE_ID]: 'Alternate ID',
565
+ [HELM_RGROUP_FIELDS.CAP_GROUP_NAME]: 'Cap Group Name',
566
+ [HELM_RGROUP_FIELDS.LABEL]: 'Label',
567
+ },
568
+ });
569
+ this.rgroupsGrid.onItemChanged.subscribe(() => this.onMonomerInputChanged());
570
+
571
+ this.rgroupsGridRoot = ui.divV([this.rgroupsGrid.root]);
572
+ this.rgroupsGridRoot.style.display = 'none';
573
+ const metaProps = [
574
+ DG.Property.js('Property', DG.TYPE.STRING, {caption: 'Property', nullable: true}),
575
+ DG.Property.js('Value', DG.TYPE.STRING, {caption: 'Value', nullable: true}),
576
+ ];
577
+ this.metaGrid = new ItemsGrid(metaProps, []);
578
+ this.onMonomerInputChanged();
579
+ }
580
+
581
+ onMonomerInputChanged() {
582
+ setTimeout(() => {
583
+ this.saveValidationResult = this.validateInputs();
584
+ if (this.saveValidationResult)
585
+ this.saveButton.classList.add('d4-disabled');
586
+ else
587
+ this.saveButton.classList.remove('d4-disabled');
588
+
589
+ const monomerExists = this.polymerTypeInput.value && this.polymerTypeInput.value &&
590
+ !!this.getMonomerLib()?.getMonomer(this.polymerTypeInput.value as PolymerType, this.monomerSymbolInput.value);
591
+
592
+ this.saveButton.textContent = monomerExists ? 'Save Monomer' : 'Add Monomer';
593
+ }, 200);
594
+ }
595
+
596
+ setMonomer(monomer: Monomer) {
597
+ this.triggerMolChange = false;
598
+ this.molSketcher.setSmiles(monomer.smiles);
599
+ this.polymerTypeInput.value = monomer.polymerType;
600
+ this.monomerTypeInput.value = monomer.monomerType;
601
+ this.monomerSymbolInput.value = monomer.symbol;
602
+ this.monomerNameInput.value = monomer.name;
603
+ this.monomerIdInput.value = monomer.id;
604
+ this.monomerNaturalAnalogInput.value = monomer.naturalAnalog ?? null;
605
+ this.rgroupsGrid.items = resolveRGroupInfo(monomer.rgroups);
606
+ this.metaGrid.items = Object.entries(monomer.meta ?? {}).map(([k, v]) => {
607
+ return {Property: k, Value: v};
608
+ });
609
+ this.rgroupsGrid.render();
610
+ this.metaGrid.render();
611
+ this.rgroupsGridRoot.style.display = 'flex';
612
+
613
+ this.onMonomerInputChanged();
614
+ if (!monomer.naturalAnalog) {
615
+ mostSimilarNaturalAnalog(capSmiles(monomer.smiles, this.rgroupsGrid.items as RGroup[]), monomer.polymerType).then((mostSimilar) => {
616
+ if (mostSimilar)
617
+ this.monomerNaturalAnalogInput.value = mostSimilar;
618
+ });
619
+ }
620
+ }
621
+
622
+ validateInputs(): string | null | undefined {
623
+ if (!this.molSketcher.getSmiles()) return 'Monomer Molecule field is required';
624
+ for (const i of [this.polymerTypeInput, this.monomerTypeInput, this.monomerSymbolInput, this.monomerNameInput, this.monomerIdInput, this.monomerNaturalAnalogInput]) {
625
+ if (i.value == null || i.value === '')
626
+ return `${i.caption} field is required`;
627
+ }
628
+ if (this.rgroupsGrid.items.length < 1) return 'At least 1 R-group is required';
629
+ for (const item of this.rgroupsGrid.items) {
630
+ for (const [k, v] of Object.entries(item))
631
+ if (!v) return `R-group ${k} field is required for ${item[HELM_RGROUP_FIELDS.LABEL]}`;
632
+ }
633
+ if (this.rgroupsGrid.hasErrors()) return 'R-group fields contain errors';
634
+ return null;
635
+ }
636
+
637
+ findRgroupsInSmiles(smiles: string): RegExpMatchArray[] {
638
+ const regexVar1 = /\[[1-9]\*\]/g;
639
+ const regexVar2 = /\[\*\:[1-9]\]/g;
640
+ const matchesAr1 = Array.from(smiles.matchAll(regexVar1));
641
+ const matchesAr2 = Array.from(smiles.matchAll(regexVar2));
642
+ return [...matchesAr1, ...matchesAr2];
643
+ }
644
+
645
+ get form() {
646
+ const mainInputsDiv = ui.divV([
647
+ this.polymerTypeInput,
648
+ this.monomerTypeInput,
649
+ this.monomerSymbolInput,
650
+ this.monomerNameInput,
651
+ this.monomerIdInput,
652
+ this.monomerNaturalAnalogInput,
653
+ ]);
654
+
655
+ const inputsPanel = ui.tabControl({
656
+ 'Monomer': mainInputsDiv,
657
+ 'R-groups': this.rgroupsGridRoot,
658
+ 'Meta': ui.divV([this.metaGrid.root]),
659
+ }, false);
660
+ inputsPanel.header.style.marginBottom = '10px';
661
+ const saveB = ui.buttonsInput([this.saveButton]);
662
+ ui.tooltip.bind(saveB, () => this.saveValidationResult ?? 'Save monomer to library');
663
+ return ui.divV([
664
+ this.molSketcher.root,
665
+ inputsPanel.root,
666
+ saveB,
667
+ ], {classes: 'ui-form', style: {paddingLeft: '10px', overflow: 'scroll'}});
668
+ }
669
+
670
+ get fieldInputs() {
671
+ return {
672
+ 'molecule': this.molSketcher,
673
+ 'polymerType': this.polymerTypeInput,
674
+ 'monomerType': this.monomerTypeInput,
675
+ 'symbol': this.monomerSymbolInput,
676
+ 'name': this.monomerNameInput,
677
+ 'id': this.monomerIdInput,
678
+ 'naturalAnalog': this.monomerNaturalAnalogInput,
679
+ };
680
+ }
681
+
682
+ get metaInputs() { return [] as any; } //TODO: add meta inputs
683
+ get rgroupInputs() { return [] as any; } //TODO: add rGroup inputs
684
+
685
+
686
+ private getMonomerInfoTable(monomer: Monomer) {
687
+ const molSmiles = getCorrectedSmiles(monomer.rgroups, monomer.smiles, monomer.molfile);
688
+ const molImage = grok.chem.drawMolecule(molSmiles, 150, 150);
689
+ const infoTable = ui.tableFromMap({name: monomer.name, author: monomer.author, createDate: monomer.createDate});
690
+ return ui.divH([molImage, infoTable], {style: {alignItems: 'center'}});
691
+ }
692
+
693
+ async removeMonomers(monomers: Monomer[], libName: string, notify = true) {
694
+ let libJSON: Monomer[] = [];
695
+ try {
696
+ const libTXT = await grok.dapi.files.readAsText(LIB_PATH + libName);
697
+ libJSON = JSON.parse(libTXT);
698
+ } catch (e) {
699
+ grok.shell.error(`Error reading library ${libName}`);
700
+ return console.error(e);
701
+ }
702
+ const monomerIdxs = monomers.map((monomer) => findLastIndex(libJSON, (m) => m.symbol === monomer.symbol && m.polymerType === monomer.polymerType));
703
+ for (let i = 0; i < monomerIdxs.length; i++) {
704
+ const monomerIdx = monomerIdxs[i];
705
+ if (monomerIdx === -1) {
706
+ grok.shell.error(`Monomer ${monomers[i].symbol} not found in library ${libName}`);
707
+ return;
708
+ }
709
+ }
710
+
711
+ const removingMonomers = monomerIdxs.map((idx) => libJSON[idx]);
712
+ const infoTables = ui.divV(removingMonomers.map((m) => this.getMonomerInfoTable(m)), {style: {maxHeight: '500px', overflow: 'scroll'}});
713
+ const isPlural = removingMonomers.length > 1;
714
+ const promptText = isPlural ?
715
+ `Are you sure you want to remove monomers ${removingMonomers.map((m) => m.symbol).join(', ')} from ${libName} library?` :
716
+ `Are you sure you want to remove monomer with symbol ${removingMonomers[0].symbol} from ${libName} library?`;
717
+
718
+
719
+ const dlg = ui.dialog('Remove Monomer' + (isPlural ? 's' : ''))
720
+ .add(ui.h1(promptText))
721
+ .add(infoTables)
722
+ .addButton('Remove', async () => {
723
+ libJSON = libJSON.filter((m) => !removingMonomers.includes(m));
724
+ await grok.dapi.files.writeAsText(LIB_PATH + libName, JSON.stringify(libJSON));
725
+ await (await MonomerLibManager.getInstance()).loadLibraries(true);
726
+ await this.refreshTable();
727
+
728
+ if (notify)
729
+ grok.shell.info(`Monomer${isPlural ? 's' : ''} ${removingMonomers.map((m) => m.symbol).join(', ')} ${isPlural ? 'were' : 'was'} successfully removed from ${libName} library`);
730
+ dlg.close();
731
+ })
732
+ .show();
733
+ }
734
+
735
+ private async addMonomerToLib(monomer: Monomer, libName: string) {
736
+ // TODO: permissions logic;
737
+ let libJSON: Monomer[] = [];
738
+ try {
739
+ const libTXT = await grok.dapi.files.readAsText(LIB_PATH + libName);
740
+ libJSON = JSON.parse(libTXT);
741
+ } catch (e) {
742
+ grok.shell.error(`Error reading library ${libName}`);
743
+ return console.error(e);
744
+ }
745
+ // check if monomer with given symbol exists in library. search from the end to get the last monomer with that symbol (there can be duplicates)
746
+ const existingMonomerIdx = findLastIndex(libJSON, (m) => m.symbol === monomer.symbol && m.polymerType === monomer.polymerType);
747
+ // check if the same structure already exists in the library. as everything is in canonical smiles, we can directly do string matching
748
+ const existingStructureIdx = this.getMonomersDataFrame()?.col(MONOMER_DF_COLUMN_NAMES.MONOMER)?.toList()?.findIndex((smi) => smi === monomer.smiles);
749
+
750
+ const saveLib = async () => {
751
+ try {
752
+ // first remove the existing monomer with that symbol
753
+ libJSON = libJSON.filter((m) => m.symbol !== monomer.symbol || m.polymerType !== monomer.polymerType);
754
+ libJSON.push({...monomer, lib: undefined, wem: undefined});
755
+ await grok.dapi.files.writeAsText(LIB_PATH + libName, JSON.stringify(libJSON));
756
+ await (await MonomerLibManager.getInstance()).loadLibraries(true);
757
+ await this.refreshTable();
758
+ grok.shell.info(`Monomer ${monomer.symbol} was successfully saved in library ${libName}`);
759
+ } catch (e) {
760
+ grok.shell.error('Error saving monomer');
761
+ console.error(e);
762
+ }
763
+ };
764
+ let infoTable: HTMLDivElement | null = null;
765
+ let promptMessage = '';
766
+ if (existingMonomerIdx >= 0) {
767
+ infoTable = this.getMonomerInfoTable(libJSON[existingMonomerIdx]);
768
+ promptMessage = `Monomer with symbol '${monomer.symbol}' already exists in library ${libName}.\nAre you sure you want to overwrite it?`;
769
+ } else if ((existingStructureIdx ?? -1) >= 0) {
770
+ const m = monomerFromDfRow(this.getMonomersDataFrame()!.rows.get(existingStructureIdx!));
771
+ infoTable = this.getMonomerInfoTable(m);
772
+ promptMessage = `Monomer with the same structure already exists in library ${libName} with different symbol (${m.symbol}).\nAre you sure you want to duplicate it?`;
773
+ }
774
+
775
+ if (infoTable) {
776
+ const dlg = ui.dialog('Save Monomer')
777
+ .add(ui.divText(promptMessage))
778
+ .add(infoTable)
779
+ .addButton('Save', () => {
780
+ dlg.close();
781
+ saveLib();
782
+ })
783
+ .show();
784
+ } else
785
+ await saveLib();
786
+ }
787
+
788
+ private async saveMonomer() {
789
+ // TODO: handle some r group logic here
790
+ // const molFile = this.molSketcher.getMolFile();
791
+ let smiles = this.molSketcher.getSmiles();
792
+ if (!smiles || !grok.chem.checkSmiles(smiles)) {
793
+ grok.shell.warning('Invalid SMILES');
794
+ return;
795
+ }
796
+ // correct smiles with correct r-group notation
797
+ smiles = getCorrectedSmiles([], smiles);
798
+ let molFile = grok.chem.convert(smiles, DG.chem.Notation.Smiles, DG.chem.Notation.MolBlock);
799
+ molFile = getCorrectedMolBlock(molFile);
800
+
801
+ const meta: any = {};
802
+ this.metaGrid.items.filter((item) => (!!item['Property']) && (!!item['Value'])).forEach((item) => {
803
+ meta[item['Property']] = item['Value'];
804
+ });
805
+ const monomer: Monomer = {
806
+ symbol: this.monomerSymbolInput.value,
807
+ name: this.monomerNameInput.value,
808
+ molfile: molFile,
809
+ smiles: smiles,
810
+ polymerType: this.polymerTypeInput.value as PolymerType,
811
+ monomerType: this.monomerTypeInput.value as MonomerType,
812
+ naturalAnalog: this.monomerNaturalAnalogInput.value ? this.monomerNaturalAnalogInput.value : undefined,
813
+ id: this.monomerIdInput.value ?? 0,
814
+ rgroups: this.rgroupsGrid.items as RGroup[], // TODO
815
+ author: DG.User.current().friendlyName,
816
+ createDate: new Date().toISOString(),
817
+ meta
818
+ };
819
+ const source = this.getMonomerLib()?.source;
820
+ if (!source) {
821
+ grok.shell.warning('Monomer library source is not specified');
822
+ return;
823
+ }
824
+ await this.addMonomerToLib(monomer, source);
825
+ }
826
+ }
827
+
828
+ function findLastIndex<T>(ar: ArrayLike<T>, pred: (el: T) => boolean): number {
829
+ let foundIdx = -1;
830
+ for (let i = ar.length - 1; i >= 0; i--) {
831
+ if (pred(ar[i])) {
832
+ foundIdx = i;
833
+ break;
834
+ }
835
+ }
836
+ return foundIdx;
837
+ }
838
+
839
+ /**NB! Can throw error */
840
+ function getCorrectedSmiles(rgroups: RGroup[], smiles?: string, molBlock?: string): string {
841
+ const isSmilesMalformed = !smiles || !grok.chem.checkSmiles(smiles);
842
+ if ((isSmilesMalformed) && !molBlock) throw new Error('Both SMILES and MOL block are empty or malformed');
843
+
844
+ let canonical = isSmilesMalformed ? grok.chem.convert(molBlock!, DG.chem.Notation.Unknown, DG.chem.Notation.Smiles) : smiles;
845
+
846
+ canonical = substituteCapsWithRGroupsSmiles(canonical, rgroups);
847
+ canonical = fixRGroupsAsElementsSmiles(canonical);
848
+ // if the source was smiles, canonicalize it before returning
849
+ return isSmilesMalformed ? canonical : grok.chem.convert(canonical, DG.chem.Notation.Unknown, DG.chem.Notation.Smiles);
850
+ }
851
+
852
+ function getCorrectedMolBlock(molBlock: string) {
853
+ // to correct molblock, we should make sure that
854
+ // 1. RGP field is present at the end, before the M END line
855
+ // 2. RGP field is present in the correct format
856
+ // 3. R group labels are written as R# and not just R
857
+ // 4. there is no ISO field in the molblock. if there is, it needs to be substituted with RGP field and thats it.
858
+
859
+ const lines = molBlock.split('\n');
860
+
861
+ const isoLineIdx = lines.findIndex((line) => line.startsWith('M') && line.includes('ISO'));
862
+ if (isoLineIdx !== -1) {
863
+ const isoIndex = lines[isoLineIdx].indexOf('ISO');
864
+ lines[isoLineIdx] = lines[isoLineIdx].substring(0, isoIndex) + 'RGP' + lines[isoLineIdx].substring(isoIndex + 3);
865
+ }
866
+
867
+ const molStartIdx = lines.findIndex((line) => line.includes('V2000') || line.includes('V3000'));
868
+
869
+ const atomCount = Number.parseInt(lines[molStartIdx].trim().split(' ')[0]);
870
+ const rgroupLineNumbers: { [atomLine: number]: number } = {};
871
+ for (let atomI = molStartIdx + 1; atomI < molStartIdx + 1 + atomCount; atomI++) {
872
+ const rIdx = lines[atomI].indexOf('R ');
873
+ if (rIdx === -1) continue;
874
+ if (lines[atomI][rIdx + 1] !== '#')
875
+ lines[atomI] = lines[atomI].replace('R ', 'R#');
876
+ const splitLine = lines[atomI].trim().split(' ').map((s) => s.trim()).filter(Boolean);
877
+ rgroupLineNumbers[atomI - molStartIdx] = 1;
878
+ if (splitLine.length < 14) // rgroup number can be at 13th index as well
879
+ continue;
880
+ const rgroupNum = Number.parseInt(splitLine[13]);
881
+ if (!Number.isNaN(rgroupNum))
882
+ rgroupLineNumbers[atomI - molStartIdx] = rgroupNum;
883
+ }
884
+
885
+ const rgroupLineNums = Object.values(rgroupLineNumbers);
886
+ // find and possibly add rgp field
887
+
888
+ const rgpLineIdx = lines.findIndex((line) => line.startsWith('M') && line.includes('RGP'));
889
+
890
+ if (rgpLineIdx === -1) {
891
+ const rgpLine = `M RGP ${rgroupLineNums.length} ${Object.entries(rgroupLineNumbers).map(([atomLine, rGroupNum]) => `${atomLine} ${rGroupNum}`).join(' ')}`;
892
+ const mEndIdx = lines.findIndex((line) => line.startsWith('M') && line.includes('END'));
893
+ lines.splice(mEndIdx, 0, rgpLine);
894
+ }
895
+ return lines.join('\n');
896
+ }
897
+
898
+ // reverse of r-group substitution, will substitute rgroups with cap groups
899
+ function capSmiles(smiles: string, rgroups: RGroup[]) {
900
+ let newSmiles = smiles;
901
+ rgroups.forEach((rg) => {
902
+ const rgroupNum = rg.label[1] ?? '1';
903
+ const capGroupName = getCaseInvariantValue(rg, HELM_RGROUP_FIELDS.CAP_GROUP_NAME);
904
+ newSmiles = newSmiles.replace(`[*:${rgroupNum}]`, `[${capGroupName}]`);
905
+ });
906
+ return newSmiles;
907
+ }
908
+
909
+ function monomerFromDfRow(dfRow: DG.Row): Monomer {
910
+ return {
911
+ symbol: dfRow.get(MONOMER_DF_COLUMN_NAMES.SYMBOL),
912
+ name: dfRow.get(MONOMER_DF_COLUMN_NAMES.NAME),
913
+ molfile: '',
914
+ smiles: dfRow.get(MONOMER_DF_COLUMN_NAMES.MONOMER),
915
+ polymerType: dfRow.get(MONOMER_DF_COLUMN_NAMES.POLYMER_TYPE),
916
+ monomerType: dfRow.get(MONOMER_DF_COLUMN_NAMES.MONOMER_TYPE),
917
+ naturalAnalog: dfRow.get(MONOMER_DF_COLUMN_NAMES.NATURAL_ANALOG),
918
+ id: dfRow.get(MONOMER_DF_COLUMN_NAMES.ID),
919
+ rgroups: JSON.parse(dfRow.get(MONOMER_DF_COLUMN_NAMES.R_GROUPS) ?? '[]'),
920
+ meta: JSON.parse(dfRow.get(MONOMER_DF_COLUMN_NAMES.META) ?? '{}'),
921
+ author: dfRow.get(MONOMER_DF_COLUMN_NAMES.AUTHOR),
922
+ createDate: dfRow.get(MONOMER_DF_COLUMN_NAMES.CREATE_DATE),
923
+ };
924
+ }