@datagrok/bio 2.25.0 → 2.25.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/detectors.js +26 -12
- package/dist/package-test.js +5 -5
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +3 -3
- package/dist/package.js.map +1 -1
- package/package.json +2 -2
- package/scripts/mol-to-helm.py +1279 -0
- package/src/package-api.ts +14 -0
- package/src/package.g.ts +9 -0
- package/src/package.ts +27 -1
- package/src/utils/monomer-lib/library-file-manager/ui.ts +23 -4
- package/src/utils/monomer-lib/monomer-manager/monomer-manager.ts +34 -13
- package/src/utils/seq-helper/seq-handler.ts +15 -6
- package/src/widgets/sequence-scrolling-widget.ts +195 -183
- package/test-console-output-1.log +774 -766
- package/test-record-1.mp4 +0 -0
package/src/package-api.ts
CHANGED
|
@@ -12,6 +12,13 @@ export namespace scripts {
|
|
|
12
12
|
return await grok.functions.call('Bio:Embed', { molecule });
|
|
13
13
|
}
|
|
14
14
|
|
|
15
|
+
/**
|
|
16
|
+
Converts molecules to HELM notation based on monomer library
|
|
17
|
+
*/
|
|
18
|
+
export async function molToHelmConverterPy(moleculesDataframe: DG.DataFrame , moleculesColumn: DG.Column , libraryJSON: string ): Promise<DG.DataFrame> {
|
|
19
|
+
return await grok.functions.call('Bio:MolToHelmConverterPy', { moleculesDataframe, moleculesColumn, libraryJSON });
|
|
20
|
+
}
|
|
21
|
+
|
|
15
22
|
/**
|
|
16
23
|
Create the model peptides/DNA sequences with peptides data
|
|
17
24
|
*/
|
|
@@ -172,6 +179,13 @@ export namespace funcs {
|
|
|
172
179
|
return await grok.functions.call('Bio:SequenceSpaceTopMenu', { table, molecules, methodName, similarityMetric, plotEmbeddings, preprocessingFunction, options, clusterEmbeddings, isDemo });
|
|
173
180
|
}
|
|
174
181
|
|
|
182
|
+
/**
|
|
183
|
+
Converts Peptide molecules to HELM notation by matching with monomer library
|
|
184
|
+
*/
|
|
185
|
+
export async function moleculesToHelmTopMenu(table: DG.DataFrame , molecules: DG.Column ): Promise<void> {
|
|
186
|
+
return await grok.functions.call('Bio:MoleculesToHelmTopMenu', { table, molecules });
|
|
187
|
+
}
|
|
188
|
+
|
|
175
189
|
/**
|
|
176
190
|
Converts sequences to molblocks
|
|
177
191
|
*/
|
package/src/package.g.ts
CHANGED
|
@@ -264,6 +264,15 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, molecules: DG.Co
|
|
|
264
264
|
return await PackageFunctions.sequenceSpaceTopMenu(table, molecules, methodName, similarityMetric, plotEmbeddings, preprocessingFunction, options, clusterEmbeddings, isDemo);
|
|
265
265
|
}
|
|
266
266
|
|
|
267
|
+
//name: Molecules to HELM
|
|
268
|
+
//description: Converts Peptide molecules to HELM notation by matching with monomer library
|
|
269
|
+
//input: dataframe table { description: Input data table }
|
|
270
|
+
//input: column molecules { semType: Molecule; description: Molecule column }
|
|
271
|
+
//top-menu: Bio | Transform | Molecules to HELM...
|
|
272
|
+
export async function moleculesToHelmTopMenu(table: DG.DataFrame, molecules: DG.Column) : Promise<void> {
|
|
273
|
+
await PackageFunctions.moleculesToHelmTopMenu(table, molecules);
|
|
274
|
+
}
|
|
275
|
+
|
|
267
276
|
//name: To Atomic Level
|
|
268
277
|
//description: Converts sequences to molblocks
|
|
269
278
|
//input: dataframe table { description: Input data table }
|
package/src/package.ts
CHANGED
|
@@ -78,10 +78,12 @@ import {molecular3DStructureWidget, toAtomicLevelWidget} from './widgets/to-atom
|
|
|
78
78
|
import {handleSequenceHeaderRendering} from './widgets/sequence-scrolling-widget';
|
|
79
79
|
import {PolymerType} from '@datagrok-libraries/js-draw-lite/src/types/org';
|
|
80
80
|
import {BilnNotationProvider} from './utils/biln';
|
|
81
|
-
|
|
81
|
+
|
|
82
|
+
import * as api from './package-api';
|
|
82
83
|
export const _package = new BioPackage(/*{debug: true}/**/);
|
|
83
84
|
export * from './package.g';
|
|
84
85
|
|
|
86
|
+
|
|
85
87
|
// /** Avoid reassigning {@link monomerLib} because consumers subscribe to {@link IMonomerLib.onChanged} event */
|
|
86
88
|
// let monomerLib: MonomerLib | null = null;
|
|
87
89
|
let initBioPromise: Promise<void> | null = null;
|
|
@@ -617,6 +619,30 @@ export class PackageFunctions {
|
|
|
617
619
|
return res;
|
|
618
620
|
}
|
|
619
621
|
|
|
622
|
+
@grok.decorators.func({
|
|
623
|
+
name: 'Molecules to HELM',
|
|
624
|
+
'top-menu': 'Bio | Transform | Molecules to HELM...',
|
|
625
|
+
description: 'Converts Peptide molecules to HELM notation by matching with monomer library',
|
|
626
|
+
})
|
|
627
|
+
static async moleculesToHelmTopMenu(
|
|
628
|
+
@grok.decorators.param({name: 'table', options: {description: 'Input data table'}})table: DG.DataFrame,
|
|
629
|
+
@grok.decorators.param({name: 'molecules', options: {semType: 'Molecule', description: 'Molecule column'}})molecules: DG.Column,
|
|
630
|
+
) {
|
|
631
|
+
// collect current monomer library
|
|
632
|
+
const monomerLib = _package.monomerLib;
|
|
633
|
+
const libJSON = JSON.stringify(monomerLib.toJSON());
|
|
634
|
+
await api.scripts.molToHelmConverterPy(table, molecules, libJSON);
|
|
635
|
+
|
|
636
|
+
// semtype is not automatically set, so we set it manually
|
|
637
|
+
const newCol = table.columns.toList().find((c) => c.name.toLowerCase().includes('regenerated sequence') && c.semType !== DG.SEMTYPE.MACROMOLECULE);
|
|
638
|
+
if (newCol) {
|
|
639
|
+
newCol.meta.units = NOTATION.HELM;
|
|
640
|
+
newCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
641
|
+
newCol.setTag('cell.renderer', 'helm');
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
|
|
620
646
|
@grok.decorators.func({
|
|
621
647
|
name: 'To Atomic Level',
|
|
622
648
|
description: 'Converts sequences to molblocks',
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
/* eslint-disable rxjs/no-async-subscribe */
|
|
2
|
+
/* eslint-disable rxjs/no-ignored-subscription */
|
|
1
3
|
/* eslint-disable max-lines */
|
|
2
4
|
/* Do not change these import lines to match external modules in webpack configuration */
|
|
3
5
|
import * as grok from 'datagrok-api/grok';
|
|
@@ -5,7 +7,7 @@ import * as ui from 'datagrok-api/ui';
|
|
|
5
7
|
import * as DG from 'datagrok-api/dg';
|
|
6
8
|
|
|
7
9
|
import $ from 'cash-dom';
|
|
8
|
-
import {Subject} from 'rxjs';
|
|
10
|
+
import {Subject, Subscription} from 'rxjs';
|
|
9
11
|
|
|
10
12
|
import {
|
|
11
13
|
getUserLibSettings, setUserLibSettings
|
|
@@ -67,10 +69,12 @@ class MonomerLibraryManagerWidget {
|
|
|
67
69
|
instance._widget = await instance.createWidget();
|
|
68
70
|
}
|
|
69
71
|
|
|
72
|
+
private _fileUploadSubscription: Subscription | null = null;
|
|
70
73
|
private async createWidget() {
|
|
71
74
|
const content = await this.getWidgetContent();
|
|
72
75
|
const monomerLibHelper = await getMonomerLibHelper();
|
|
73
|
-
|
|
76
|
+
this._fileUploadSubscription?.unsubscribe();
|
|
77
|
+
this._fileUploadSubscription =
|
|
74
78
|
monomerLibHelper.fileUploadRequested.subscribe(
|
|
75
79
|
() => this.promptToAddLibraryFiles()
|
|
76
80
|
);
|
|
@@ -92,14 +96,29 @@ class MonomerLibraryManagerWidget {
|
|
|
92
96
|
accept: '.json',
|
|
93
97
|
open: async (selectedFile) => {
|
|
94
98
|
const doAdd = async (provider: IMonomerLibProvider) => {
|
|
95
|
-
const content = await selectedFile.text();
|
|
96
99
|
const name = selectedFile.name;
|
|
100
|
+
const existingLibs = await provider.listLibraries();
|
|
101
|
+
// chech if library already exists
|
|
102
|
+
if (existingLibs.includes(name)) {
|
|
103
|
+
const confirm = await new Promise<boolean>((resolve) => {
|
|
104
|
+
ui.dialog('Confirm Library Update')
|
|
105
|
+
.add(ui.divText(`Library '${name}' already exists. Do you want to overwrite it?`))
|
|
106
|
+
.onOK(() => resolve(true))
|
|
107
|
+
.onCancel(() => resolve(false))
|
|
108
|
+
.show();
|
|
109
|
+
});
|
|
110
|
+
if (!confirm)
|
|
111
|
+
return;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const content = await selectedFile.text();
|
|
97
115
|
const progressIndicator = DG.TaskBarProgressIndicator.create(`Adding ${name} as a monomer library`);
|
|
98
116
|
try {
|
|
99
117
|
await provider.addOrUpdateLibraryString(name, content);
|
|
100
118
|
// this.eventManager.updateLibrarySelectionStatus(name, true);
|
|
101
119
|
} catch (e) {
|
|
102
120
|
grok.shell.error(`File ${name} is not a valid monomer library, verify it is aligned to HELM JSON schema.`);
|
|
121
|
+
console.error(e);
|
|
103
122
|
} finally {
|
|
104
123
|
progressIndicator.close();
|
|
105
124
|
}
|
|
@@ -122,7 +141,7 @@ class MonomerLibraryManagerWidget {
|
|
|
122
141
|
.onOK(async () => {
|
|
123
142
|
const provider = providers.find((p) => p.name === providersInput.value)!; // should not be null
|
|
124
143
|
await doAdd(provider);
|
|
125
|
-
});
|
|
144
|
+
}).show();
|
|
126
145
|
},
|
|
127
146
|
});
|
|
128
147
|
}
|
|
@@ -62,24 +62,44 @@ export async function standardiseMonomers(monomers: Monomer[]) {
|
|
|
62
62
|
|
|
63
63
|
/// matches molecules in the dataframe with monomers in the library by canonical smiles
|
|
64
64
|
export async function matchMoleculesWithMonomers(molDf: DG.DataFrame, molColName: string, monomerLib: IMonomerLib, polymerType: PolymerType = 'PEPTIDE'): Promise<DG.DataFrame> {
|
|
65
|
+
const duplicates = monomerLib.duplicateMonomers?.[polymerType] ?? {};
|
|
65
66
|
const converterFunc = DG.Func.find({package: 'Chem', name: 'convertMoleculeNotation'})[0];
|
|
66
67
|
if (!converterFunc)
|
|
67
68
|
throw new Error('Function convertMoleculeNotation not found, please install Chem package');
|
|
68
69
|
// first: stamdardize monomers
|
|
69
70
|
const monomers = monomerLib.getMonomerSymbolsByType(polymerType).map((s) => monomerLib.getMonomer(polymerType, s)!).filter((m) => m && (m.smiles || m.molfile));
|
|
70
71
|
const fixedMonomers = await standardiseMonomers(monomers);
|
|
71
|
-
const
|
|
72
|
+
const unCappedMonomerSmilesMap = fixedMonomers.filter((m) => !!m.smiles).reduce((acc, m) => {
|
|
73
|
+
acc[m.smiles] = {symbol: m.symbol, smiles: m.smiles, original: m.smiles, source: m.lib?.source}; return acc;
|
|
74
|
+
}, {} as {[smiles: string]: {symbol: string, smiles: string, original: string | undefined, source: string | undefined}});
|
|
75
|
+
const cappedMonomerSmiles = fixedMonomers.map((m, i) => ({symbol: m.symbol, smiles: capSmiles(m.smiles ?? '', m.rgroups ?? []), original: m.smiles, source: monomers[i]?.lib?.source}))
|
|
76
|
+
.filter((s) => !!s?.smiles && !s.smiles.includes('[*:'));
|
|
77
|
+
|
|
72
78
|
// canonicalize all monomer smiles
|
|
73
|
-
const monomerSmilesCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'MonomerSmiles',
|
|
79
|
+
const monomerSmilesCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'MonomerSmiles', cappedMonomerSmiles.map((m) => m.smiles!));
|
|
74
80
|
monomerSmilesCol.semType = DG.SEMTYPE.MOLECULE;
|
|
75
81
|
const canonicalizedMonomersSmilesCol: DG.Column = await converterFunc.apply({molecule: monomerSmilesCol, targetNotation: DG.chem.Notation.Smiles});
|
|
76
82
|
if (!canonicalizedMonomersSmilesCol || canonicalizedMonomersSmilesCol.length !== monomerSmilesCol.length)
|
|
77
83
|
throw new Error('Error canonicalizing monomer smiles');
|
|
78
|
-
canonicalizedMonomersSmilesCol.toList().forEach((s, i) =>
|
|
84
|
+
canonicalizedMonomersSmilesCol.toList().forEach((s, i) => cappedMonomerSmiles[i].smiles = s);
|
|
85
|
+
const cappedMonomerSmilesMap = cappedMonomerSmiles.reduce((acc, m) => { acc[m.smiles] = m; return acc; }, {} as {[smiles: string]: {symbol: string, smiles: string, original: string | undefined, source: string | undefined}});
|
|
79
86
|
|
|
80
|
-
const
|
|
81
|
-
const
|
|
82
|
-
|
|
87
|
+
const moleculesOriginalCol = molDf.col(molColName)!;
|
|
88
|
+
const correctedOriginalList = moleculesOriginalCol.toList().map((s) => {
|
|
89
|
+
if (!s) return s;
|
|
90
|
+
try {
|
|
91
|
+
const isMolBlock = s.includes('\n');
|
|
92
|
+
return getCorrectedSmiles([], isMolBlock ? undefined : s, isMolBlock ? s : undefined);
|
|
93
|
+
} catch (_e) {
|
|
94
|
+
return s;
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
const moleculesOriginalColCorrected = DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'MoleculesOriginalCorrected', correctedOriginalList);
|
|
98
|
+
// create dummy df
|
|
99
|
+
moleculesOriginalColCorrected.semType = DG.SEMTYPE.MOLECULE;
|
|
100
|
+
const _ddf = DG.DataFrame.fromColumns([moleculesOriginalColCorrected]);
|
|
101
|
+
const canonicalizedMoleculesCol: DG.Column = await converterFunc.apply({molecule: moleculesOriginalColCorrected, targetNotation: DG.chem.Notation.Smiles});
|
|
102
|
+
if (!canonicalizedMoleculesCol || canonicalizedMoleculesCol.length !== moleculesOriginalColCorrected.length)
|
|
83
103
|
throw new Error('Error canonicalizing molecules');
|
|
84
104
|
|
|
85
105
|
const canonicalizedMolecules = canonicalizedMoleculesCol.toList();
|
|
@@ -95,13 +115,14 @@ export async function matchMoleculesWithMonomers(molDf: DG.DataFrame, molColName
|
|
|
95
115
|
for (let i = 0; i < canonicalizedMolecules.length; i++) {
|
|
96
116
|
const mol = canonicalizedMolecules[i];
|
|
97
117
|
if (!mol) continue;
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
118
|
+
const match = cappedMonomerSmilesMap[mol] ?? unCappedMonomerSmilesMap[mol];
|
|
119
|
+
if (match) {
|
|
120
|
+
const matchSymbol = match.symbol;
|
|
121
|
+
const sources = (duplicates[matchSymbol]?.length ?? 0) > 0 ? duplicates[matchSymbol].map((m) => m?.lib?.source).filter((s) => !!s).join(', ') : (match.source ?? '');
|
|
122
|
+
const originalSmiles = match.original ?? match.smiles;
|
|
123
|
+
matchingMonomerSmilesCol.set(i, originalSmiles, false);
|
|
124
|
+
matchingMonomerSymbolCol.set(i, matchSymbol, false);
|
|
125
|
+
sourceLibCol.set(i, sources, false);
|
|
105
126
|
}
|
|
106
127
|
}
|
|
107
128
|
return resultDf;
|
|
@@ -1049,21 +1049,30 @@ function joinToHelm(srcSS: ISeqSplitted, wrappers: string[], isDnaOrRna: boolean
|
|
|
1049
1049
|
}
|
|
1050
1050
|
|
|
1051
1051
|
function joinToBiln(srcSS: ISeqSplitted): string {
|
|
1052
|
+
const needsSquareBrackets = (cm: string | null) => {
|
|
1053
|
+
return cm && (cm.includes('-') || cm.includes('*') || cm.includes('[R'));
|
|
1054
|
+
};
|
|
1055
|
+
|
|
1052
1056
|
if (!srcSS.graphInfo || !((srcSS.graphInfo.connections?.length ?? 0) > 0)) {
|
|
1053
1057
|
const resOMList: string[] = new Array<string>(srcSS.length);
|
|
1054
1058
|
for (let posIdx: number = 0; posIdx < srcSS.length; ++posIdx) {
|
|
1055
|
-
|
|
1056
|
-
if (
|
|
1057
|
-
resOMList[posIdx] = `[${
|
|
1059
|
+
const canonical = srcSS.getCanonical(posIdx);
|
|
1060
|
+
if (needsSquareBrackets(canonical)) // Biln uses '-' as a separator, need to enclose in []. also there might be smiles in there, where Rs are represented as '*' or R
|
|
1061
|
+
resOMList[posIdx] = `[${canonical}]`;
|
|
1062
|
+
else
|
|
1063
|
+
resOMList[posIdx] = canonical;
|
|
1058
1064
|
}
|
|
1059
1065
|
return resOMList.join('-'); // Biln uses '-' as a separator
|
|
1060
1066
|
} else { // conversion happens only if there is a graph info
|
|
1061
1067
|
const disjointSequenceIdxs = srcSS.graphInfo.disjointSeqStarts;
|
|
1062
1068
|
const allSeqParts = new Array<string>(srcSS.length);
|
|
1063
1069
|
for (let posIdx = 0; posIdx < srcSS.length; ++posIdx) {
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1070
|
+
const canonical = srcSS.getCanonical(posIdx);
|
|
1071
|
+
// allSeqParts[posIdx] = srcSS.getCanonical(posIdx);
|
|
1072
|
+
if (needsSquareBrackets(canonical)) // Biln uses '-' as a separator, need to enclose in []
|
|
1073
|
+
allSeqParts[posIdx] = `[${canonical}]`;
|
|
1074
|
+
else
|
|
1075
|
+
allSeqParts[posIdx] = canonical;
|
|
1067
1076
|
}
|
|
1068
1077
|
for (let i = 0; i < srcSS.graphInfo.connections.length; i++) {
|
|
1069
1078
|
const conn: ISeqConnection = srcSS.graphInfo.connections[i];
|