@datagrok/bio 2.25.0 → 2.25.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,13 @@ export namespace scripts {
12
12
  return await grok.functions.call('Bio:Embed', { molecule });
13
13
  }
14
14
 
15
+ /**
16
+ Converts molecules to HELM notation based on monomer library
17
+ */
18
+ export async function molToHelmConverterPy(moleculesDataframe: DG.DataFrame , moleculesColumn: DG.Column , libraryJSON: string ): Promise<DG.DataFrame> {
19
+ return await grok.functions.call('Bio:MolToHelmConverterPy', { moleculesDataframe, moleculesColumn, libraryJSON });
20
+ }
21
+
15
22
  /**
16
23
  Create the model peptides/DNA sequences with peptides data
17
24
  */
@@ -172,6 +179,13 @@ export namespace funcs {
172
179
  return await grok.functions.call('Bio:SequenceSpaceTopMenu', { table, molecules, methodName, similarityMetric, plotEmbeddings, preprocessingFunction, options, clusterEmbeddings, isDemo });
173
180
  }
174
181
 
182
+ /**
183
+ Converts Peptide molecules to HELM notation by matching with monomer library
184
+ */
185
+ export async function moleculesToHelmTopMenu(table: DG.DataFrame , molecules: DG.Column ): Promise<void> {
186
+ return await grok.functions.call('Bio:MoleculesToHelmTopMenu', { table, molecules });
187
+ }
188
+
175
189
  /**
176
190
  Converts sequences to molblocks
177
191
  */
package/src/package.g.ts CHANGED
@@ -264,6 +264,15 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, molecules: DG.Co
264
264
  return await PackageFunctions.sequenceSpaceTopMenu(table, molecules, methodName, similarityMetric, plotEmbeddings, preprocessingFunction, options, clusterEmbeddings, isDemo);
265
265
  }
266
266
 
267
+ //name: Molecules to HELM
268
+ //description: Converts Peptide molecules to HELM notation by matching with monomer library
269
+ //input: dataframe table { description: Input data table }
270
+ //input: column molecules { semType: Molecule; description: Molecule column }
271
+ //top-menu: Bio | Transform | Molecules to HELM...
272
+ export async function moleculesToHelmTopMenu(table: DG.DataFrame, molecules: DG.Column) : Promise<void> {
273
+ await PackageFunctions.moleculesToHelmTopMenu(table, molecules);
274
+ }
275
+
267
276
  //name: To Atomic Level
268
277
  //description: Converts sequences to molblocks
269
278
  //input: dataframe table { description: Input data table }
package/src/package.ts CHANGED
@@ -78,10 +78,12 @@ import {molecular3DStructureWidget, toAtomicLevelWidget} from './widgets/to-atom
78
78
  import {handleSequenceHeaderRendering} from './widgets/sequence-scrolling-widget';
79
79
  import {PolymerType} from '@datagrok-libraries/js-draw-lite/src/types/org';
80
80
  import {BilnNotationProvider} from './utils/biln';
81
- import {MonomerLibFromFilesProvider} from './utils/monomer-lib/library-file-manager/monomers-lib-provider';
81
+
82
+ import * as api from './package-api';
82
83
  export const _package = new BioPackage(/*{debug: true}/**/);
83
84
  export * from './package.g';
84
85
 
86
+
85
87
  // /** Avoid reassigning {@link monomerLib} because consumers subscribe to {@link IMonomerLib.onChanged} event */
86
88
  // let monomerLib: MonomerLib | null = null;
87
89
  let initBioPromise: Promise<void> | null = null;
@@ -617,6 +619,30 @@ export class PackageFunctions {
617
619
  return res;
618
620
  }
619
621
 
622
+ @grok.decorators.func({
623
+ name: 'Molecules to HELM',
624
+ 'top-menu': 'Bio | Transform | Molecules to HELM...',
625
+ description: 'Converts Peptide molecules to HELM notation by matching with monomer library',
626
+ })
627
+ static async moleculesToHelmTopMenu(
628
+ @grok.decorators.param({name: 'table', options: {description: 'Input data table'}})table: DG.DataFrame,
629
+ @grok.decorators.param({name: 'molecules', options: {semType: 'Molecule', description: 'Molecule column'}})molecules: DG.Column,
630
+ ) {
631
+ // collect current monomer library
632
+ const monomerLib = _package.monomerLib;
633
+ const libJSON = JSON.stringify(monomerLib.toJSON());
634
+ await api.scripts.molToHelmConverterPy(table, molecules, libJSON);
635
+
636
+ // semtype is not automatically set, so we set it manually
637
+ const newCol = table.columns.toList().find((c) => c.name.toLowerCase().includes('regenerated sequence') && c.semType !== DG.SEMTYPE.MACROMOLECULE);
638
+ if (newCol) {
639
+ newCol.meta.units = NOTATION.HELM;
640
+ newCol.semType = DG.SEMTYPE.MACROMOLECULE;
641
+ newCol.setTag('cell.renderer', 'helm');
642
+ }
643
+ }
644
+
645
+
620
646
  @grok.decorators.func({
621
647
  name: 'To Atomic Level',
622
648
  description: 'Converts sequences to molblocks',
@@ -1,3 +1,5 @@
1
+ /* eslint-disable rxjs/no-async-subscribe */
2
+ /* eslint-disable rxjs/no-ignored-subscription */
1
3
  /* eslint-disable max-lines */
2
4
  /* Do not change these import lines to match external modules in webpack configuration */
3
5
  import * as grok from 'datagrok-api/grok';
@@ -5,7 +7,7 @@ import * as ui from 'datagrok-api/ui';
5
7
  import * as DG from 'datagrok-api/dg';
6
8
 
7
9
  import $ from 'cash-dom';
8
- import {Subject} from 'rxjs';
10
+ import {Subject, Subscription} from 'rxjs';
9
11
 
10
12
  import {
11
13
  getUserLibSettings, setUserLibSettings
@@ -67,10 +69,12 @@ class MonomerLibraryManagerWidget {
67
69
  instance._widget = await instance.createWidget();
68
70
  }
69
71
 
72
+ private _fileUploadSubscription: Subscription | null = null;
70
73
  private async createWidget() {
71
74
  const content = await this.getWidgetContent();
72
75
  const monomerLibHelper = await getMonomerLibHelper();
73
- // eslint-disable-next-line rxjs/no-ignored-subscription
76
+ this._fileUploadSubscription?.unsubscribe();
77
+ this._fileUploadSubscription =
74
78
  monomerLibHelper.fileUploadRequested.subscribe(
75
79
  () => this.promptToAddLibraryFiles()
76
80
  );
@@ -92,14 +96,29 @@ class MonomerLibraryManagerWidget {
92
96
  accept: '.json',
93
97
  open: async (selectedFile) => {
94
98
  const doAdd = async (provider: IMonomerLibProvider) => {
95
- const content = await selectedFile.text();
96
99
  const name = selectedFile.name;
100
+ const existingLibs = await provider.listLibraries();
101
+ // chech if library already exists
102
+ if (existingLibs.includes(name)) {
103
+ const confirm = await new Promise<boolean>((resolve) => {
104
+ ui.dialog('Confirm Library Update')
105
+ .add(ui.divText(`Library '${name}' already exists. Do you want to overwrite it?`))
106
+ .onOK(() => resolve(true))
107
+ .onCancel(() => resolve(false))
108
+ .show();
109
+ });
110
+ if (!confirm)
111
+ return;
112
+ }
113
+
114
+ const content = await selectedFile.text();
97
115
  const progressIndicator = DG.TaskBarProgressIndicator.create(`Adding ${name} as a monomer library`);
98
116
  try {
99
117
  await provider.addOrUpdateLibraryString(name, content);
100
118
  // this.eventManager.updateLibrarySelectionStatus(name, true);
101
119
  } catch (e) {
102
120
  grok.shell.error(`File ${name} is not a valid monomer library, verify it is aligned to HELM JSON schema.`);
121
+ console.error(e);
103
122
  } finally {
104
123
  progressIndicator.close();
105
124
  }
@@ -122,7 +141,7 @@ class MonomerLibraryManagerWidget {
122
141
  .onOK(async () => {
123
142
  const provider = providers.find((p) => p.name === providersInput.value)!; // should not be null
124
143
  await doAdd(provider);
125
- });
144
+ }).show();
126
145
  },
127
146
  });
128
147
  }
@@ -62,24 +62,44 @@ export async function standardiseMonomers(monomers: Monomer[]) {
62
62
 
63
63
  /// matches molecules in the dataframe with monomers in the library by canonical smiles
64
64
  export async function matchMoleculesWithMonomers(molDf: DG.DataFrame, molColName: string, monomerLib: IMonomerLib, polymerType: PolymerType = 'PEPTIDE'): Promise<DG.DataFrame> {
65
+ const duplicates = monomerLib.duplicateMonomers?.[polymerType] ?? {};
65
66
  const converterFunc = DG.Func.find({package: 'Chem', name: 'convertMoleculeNotation'})[0];
66
67
  if (!converterFunc)
67
68
  throw new Error('Function convertMoleculeNotation not found, please install Chem package');
68
69
  // first: stamdardize monomers
69
70
  const monomers = monomerLib.getMonomerSymbolsByType(polymerType).map((s) => monomerLib.getMonomer(polymerType, s)!).filter((m) => m && (m.smiles || m.molfile));
70
71
  const fixedMonomers = await standardiseMonomers(monomers);
71
- const cappedSmilse = fixedMonomers.map((m, i) => ({sym: m.symbol, smiles: capSmiles(m.smiles ?? '', m.rgroups ?? []), original: m.smiles, source: monomers[i]?.lib?.source})).filter((s) => !!s?.smiles && !s.smiles.includes('[*:'));
72
+ const unCappedMonomerSmilesMap = fixedMonomers.filter((m) => !!m.smiles).reduce((acc, m) => {
73
+ acc[m.smiles] = {symbol: m.symbol, smiles: m.smiles, original: m.smiles, source: m.lib?.source}; return acc;
74
+ }, {} as {[smiles: string]: {symbol: string, smiles: string, original: string | undefined, source: string | undefined}});
75
+ const cappedMonomerSmiles = fixedMonomers.map((m, i) => ({symbol: m.symbol, smiles: capSmiles(m.smiles ?? '', m.rgroups ?? []), original: m.smiles, source: monomers[i]?.lib?.source}))
76
+ .filter((s) => !!s?.smiles && !s.smiles.includes('[*:'));
77
+
72
78
  // canonicalize all monomer smiles
73
- const monomerSmilesCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'MonomerSmiles', cappedSmilse.map((m) => m.smiles!));
79
+ const monomerSmilesCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'MonomerSmiles', cappedMonomerSmiles.map((m) => m.smiles!));
74
80
  monomerSmilesCol.semType = DG.SEMTYPE.MOLECULE;
75
81
  const canonicalizedMonomersSmilesCol: DG.Column = await converterFunc.apply({molecule: monomerSmilesCol, targetNotation: DG.chem.Notation.Smiles});
76
82
  if (!canonicalizedMonomersSmilesCol || canonicalizedMonomersSmilesCol.length !== monomerSmilesCol.length)
77
83
  throw new Error('Error canonicalizing monomer smiles');
78
- canonicalizedMonomersSmilesCol.toList().forEach((s, i) => cappedSmilse[i].smiles = s);
84
+ canonicalizedMonomersSmilesCol.toList().forEach((s, i) => cappedMonomerSmiles[i].smiles = s);
85
+ const cappedMonomerSmilesMap = cappedMonomerSmiles.reduce((acc, m) => { acc[m.smiles] = m; return acc; }, {} as {[smiles: string]: {symbol: string, smiles: string, original: string | undefined, source: string | undefined}});
79
86
 
80
- const molecules = molDf.col(molColName)!;
81
- const canonicalizedMoleculesCol: DG.Column = await converterFunc.apply({molecule: molecules, targetNotation: DG.chem.Notation.Smiles});
82
- if (!canonicalizedMoleculesCol || canonicalizedMoleculesCol.length !== molecules.length)
87
+ const moleculesOriginalCol = molDf.col(molColName)!;
88
+ const correctedOriginalList = moleculesOriginalCol.toList().map((s) => {
89
+ if (!s) return s;
90
+ try {
91
+ const isMolBlock = s.includes('\n');
92
+ return getCorrectedSmiles([], isMolBlock ? undefined : s, isMolBlock ? s : undefined);
93
+ } catch (_e) {
94
+ return s;
95
+ }
96
+ });
97
+ const moleculesOriginalColCorrected = DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'MoleculesOriginalCorrected', correctedOriginalList);
98
+ // create dummy df
99
+ moleculesOriginalColCorrected.semType = DG.SEMTYPE.MOLECULE;
100
+ const _ddf = DG.DataFrame.fromColumns([moleculesOriginalColCorrected]);
101
+ const canonicalizedMoleculesCol: DG.Column = await converterFunc.apply({molecule: moleculesOriginalColCorrected, targetNotation: DG.chem.Notation.Smiles});
102
+ if (!canonicalizedMoleculesCol || canonicalizedMoleculesCol.length !== moleculesOriginalColCorrected.length)
83
103
  throw new Error('Error canonicalizing molecules');
84
104
 
85
105
  const canonicalizedMolecules = canonicalizedMoleculesCol.toList();
@@ -95,13 +115,14 @@ export async function matchMoleculesWithMonomers(molDf: DG.DataFrame, molColName
95
115
  for (let i = 0; i < canonicalizedMolecules.length; i++) {
96
116
  const mol = canonicalizedMolecules[i];
97
117
  if (!mol) continue;
98
- for (let j = 0; j < cappedSmilse.length; j++) {
99
- if (cappedSmilse[j].smiles === mol) {
100
- matchingMonomerSmilesCol.set(i, cappedSmilse[j].original!, false);
101
- matchingMonomerSymbolCol.set(i, cappedSmilse[j].sym, false);
102
- sourceLibCol.set(i, cappedSmilse[j].source ?? '', false);
103
- break;
104
- }
118
+ const match = cappedMonomerSmilesMap[mol] ?? unCappedMonomerSmilesMap[mol];
119
+ if (match) {
120
+ const matchSymbol = match.symbol;
121
+ const sources = (duplicates[matchSymbol]?.length ?? 0) > 0 ? duplicates[matchSymbol].map((m) => m?.lib?.source).filter((s) => !!s).join(', ') : (match.source ?? '');
122
+ const originalSmiles = match.original ?? match.smiles;
123
+ matchingMonomerSmilesCol.set(i, originalSmiles, false);
124
+ matchingMonomerSymbolCol.set(i, matchSymbol, false);
125
+ sourceLibCol.set(i, sources, false);
105
126
  }
106
127
  }
107
128
  return resultDf;
@@ -1049,21 +1049,30 @@ function joinToHelm(srcSS: ISeqSplitted, wrappers: string[], isDnaOrRna: boolean
1049
1049
  }
1050
1050
 
1051
1051
  function joinToBiln(srcSS: ISeqSplitted): string {
1052
+ const needsSquareBrackets = (cm: string | null) => {
1053
+ return cm && (cm.includes('-') || cm.includes('*') || cm.includes('[R'));
1054
+ };
1055
+
1052
1056
  if (!srcSS.graphInfo || !((srcSS.graphInfo.connections?.length ?? 0) > 0)) {
1053
1057
  const resOMList: string[] = new Array<string>(srcSS.length);
1054
1058
  for (let posIdx: number = 0; posIdx < srcSS.length; ++posIdx) {
1055
- resOMList[posIdx] = srcSS.getCanonical(posIdx);
1056
- if (resOMList[posIdx]?.includes('-')) // Biln uses '-' as a separator, need to enclose in []
1057
- resOMList[posIdx] = `[${resOMList[posIdx]}]`;
1059
+ const canonical = srcSS.getCanonical(posIdx);
1060
+ if (needsSquareBrackets(canonical)) // Biln uses '-' as a separator, need to enclose in []. also there might be smiles in there, where Rs are represented as '*' or R
1061
+ resOMList[posIdx] = `[${canonical}]`;
1062
+ else
1063
+ resOMList[posIdx] = canonical;
1058
1064
  }
1059
1065
  return resOMList.join('-'); // Biln uses '-' as a separator
1060
1066
  } else { // conversion happens only if there is a graph info
1061
1067
  const disjointSequenceIdxs = srcSS.graphInfo.disjointSeqStarts;
1062
1068
  const allSeqParts = new Array<string>(srcSS.length);
1063
1069
  for (let posIdx = 0; posIdx < srcSS.length; ++posIdx) {
1064
- allSeqParts[posIdx] = srcSS.getCanonical(posIdx);
1065
- if (allSeqParts[posIdx]?.includes('-')) // Biln uses '-' as a separator, need to enclose in []
1066
- allSeqParts[posIdx] = `[${allSeqParts[posIdx]}]`;
1070
+ const canonical = srcSS.getCanonical(posIdx);
1071
+ // allSeqParts[posIdx] = srcSS.getCanonical(posIdx);
1072
+ if (needsSquareBrackets(canonical)) // Biln uses '-' as a separator, need to enclose in []
1073
+ allSeqParts[posIdx] = `[${canonical}]`;
1074
+ else
1075
+ allSeqParts[posIdx] = canonical;
1067
1076
  }
1068
1077
  for (let i = 0; i < srcSS.graphInfo.connections.length; i++) {
1069
1078
  const conn: ISeqConnection = srcSS.graphInfo.connections[i];