@datagrok/bio 2.22.6 → 2.22.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +1 -0
- package/detectors.js +3 -2
- package/dist/package-test.js +5 -5
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +4 -4
- package/dist/package.js.map +1 -1
- package/package.json +1 -1
- package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +3 -3
- package/src/demo/bio03-atomic-level.ts +3 -3
- package/src/demo/bio05-helm-msa-sequence-space.ts +2 -2
- package/src/demo/utils.ts +2 -2
- package/src/package-api.ts +5 -8
- package/src/package.g.ts +628 -0
- package/src/package.ts +1120 -1051
- package/src/tests/renderers-test.ts +1 -1
- package/src/utils/cell-renderer.ts +2 -0
- package/src/utils/monomer-lib/monomer-lib-base.ts +17 -4
- package/src/utils/monomer-lib/monomer-lib.ts +7 -7
- package/src/utils/monomer-lib/monomer-manager/duplicate-monomer-manager.ts +48 -1
- package/src/utils/monomer-lib/monomer-manager/monomer-manager.ts +84 -85
- package/src/viewers/web-logo-viewer.ts +2 -2
- package/src/widgets/to-atomic-level-widget.ts +3 -3
- package/test-console-output-1.log +650 -652
- package/test-record-1.mp4 +0 -0
package/src/package.ts
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
/* eslint-disable rxjs/no-nested-subscribe */
|
|
3
3
|
/* eslint-disable max-params */
|
|
4
4
|
/* eslint-disable max-len */
|
|
5
|
-
/* eslint max-lines:
|
|
5
|
+
/* eslint max-lines: 'off' */
|
|
6
6
|
import * as grok from 'datagrok-api/grok';
|
|
7
7
|
import * as ui from 'datagrok-api/ui';
|
|
8
8
|
import * as DG from 'datagrok-api/dg';
|
|
@@ -77,437 +77,339 @@ import {_toAtomicLevel} from '@datagrok-libraries/bio/src/monomer-works/to-atomi
|
|
|
77
77
|
import {molecular3DStructureWidget, toAtomicLevelWidget} from './widgets/to-atomic-level-widget';
|
|
78
78
|
import {handleSequenceHeaderRendering} from './widgets/sequence-scrolling-widget';
|
|
79
79
|
export const _package = new BioPackage(/*{debug: true}/**/);
|
|
80
|
+
export * from './package.g';
|
|
80
81
|
|
|
81
82
|
// /** Avoid reassigning {@link monomerLib} because consumers subscribe to {@link IMonomerLib.onChanged} event */
|
|
82
83
|
// let monomerLib: MonomerLib | null = null;
|
|
84
|
+
let initBioPromise: Promise<void> | null = null;
|
|
83
85
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
}
|
|
86
|
+
export class PackageFunctions {
|
|
87
|
+
@grok.decorators.func({description: 'Returns an instance of the monomer library helper', outputs: [{type: 'object', name: 'result'}]})
|
|
88
|
+
static async getMonomerLibHelper(): Promise<IMonomerLibHelper> {
|
|
89
|
+
return await MonomerLibManager.getInstance();
|
|
90
|
+
}
|
|
90
91
|
|
|
91
|
-
|
|
92
|
+
@grok.decorators.init({})
|
|
93
|
+
static async initBio(): Promise<void> {
|
|
94
|
+
if (initBioPromise === null)
|
|
95
|
+
initBioPromise = initBioInt();
|
|
92
96
|
|
|
93
|
-
|
|
94
|
-
private readonly _palette: { [m: string]: string };
|
|
95
|
-
|
|
96
|
-
constructor(palette: { [m: string]: string }) {
|
|
97
|
-
this._palette = palette;
|
|
97
|
+
await initBioPromise;
|
|
98
98
|
}
|
|
99
99
|
|
|
100
|
-
|
|
101
|
-
|
|
100
|
+
@grok.decorators.func({tags: ['tooltip']})
|
|
101
|
+
static sequenceTooltip(
|
|
102
|
+
@grok.decorators.param({options: {semType: 'Macromolecule'}}) col: DG.Column): DG.Widget<any> {
|
|
103
|
+
const resWidget = new MacromoleculeColumnWidget(col, _package.seqHelper);
|
|
104
|
+
const _resPromise = resWidget.init().then(() => { })
|
|
105
|
+
.catch((err: any) => {
|
|
106
|
+
const errMsg = err instanceof Error ? err.message : err.toString();
|
|
107
|
+
grok.shell.error(errMsg);
|
|
108
|
+
});
|
|
109
|
+
return resWidget;
|
|
102
110
|
}
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
let initBioPromise: Promise<void> | null = null;
|
|
106
|
-
|
|
107
|
-
//tags: init
|
|
108
|
-
export async function initBio(): Promise<void> {
|
|
109
|
-
if (initBioPromise === null)
|
|
110
|
-
initBioPromise = initBioInt();
|
|
111
|
-
|
|
112
|
-
await initBioPromise;
|
|
113
|
-
}
|
|
114
111
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
// very important that loading should happen in correct order!
|
|
120
|
-
// first make sure chem and rdkit module are loaded
|
|
121
|
-
const rdKitModule = await getRdKitModule();
|
|
122
|
-
// then load package settings
|
|
123
|
-
const pkgProps = await _package.getProperties();
|
|
124
|
-
const bioPkgProps = new BioPackageProperties(pkgProps);
|
|
125
|
-
_package.properties = bioPkgProps;
|
|
126
|
-
// then load monomer lib
|
|
127
|
-
const libHelper = await MonomerLibManager.getInstance();
|
|
128
|
-
// Fix user lib settings for explicit stuck from a terminated test
|
|
129
|
-
const libSettings = await getUserLibSettings();
|
|
130
|
-
if (libSettings.explicit) {
|
|
131
|
-
libSettings.explicit = [];
|
|
132
|
-
await setUserLibSettings(libSettings);
|
|
112
|
+
// Keep for backward compatibility
|
|
113
|
+
@grok.decorators.func({outputs: [{type: 'object', name: 'monomerLib'}]})
|
|
114
|
+
static getBioLib(): IMonomerLib {
|
|
115
|
+
return _package.monomerLib;
|
|
133
116
|
}
|
|
134
|
-
await libHelper.awaitLoaded(Infinity);
|
|
135
|
-
if (!libHelper.initialLoadCompleted)
|
|
136
|
-
await libHelper.loadMonomerLib();
|
|
137
|
-
// Do not wait for monomers and sets loaded
|
|
138
|
-
libHelper.loadMonomerSets();
|
|
139
|
-
const monomerLib = libHelper.getMonomerLib();
|
|
140
|
-
const monomerSets = libHelper.getMonomerSets();
|
|
141
|
-
// finally log
|
|
142
|
-
const t2: number = window.performance.now();
|
|
143
|
-
_package.logger.debug(`${logPrefix}, loading ET: ${t2 - t1} ms`);
|
|
144
|
-
|
|
145
|
-
// const monomers: string[] = [];
|
|
146
|
-
// const logPs: number[] = [];
|
|
147
117
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
// Object.keys(series).forEach((symbol) => {
|
|
154
|
-
// monomers.push(symbol);
|
|
155
|
-
// const block = series[symbol].replaceAll('#R', 'O ');
|
|
156
|
-
// const mol = rdKitModule.get_mol(block);
|
|
157
|
-
// const logP = JSON.parse(mol.get_descriptors()).CrippenClogP;
|
|
158
|
-
// logPs.push(logP);
|
|
159
|
-
// mol?.delete();
|
|
160
|
-
// });
|
|
161
|
-
|
|
162
|
-
// const sum = logPs.reduce((a, b) => a + b, 0);
|
|
163
|
-
// const avg = (sum / logPs.length) || 0;
|
|
118
|
+
@grok.decorators.func({outputs: [{type: 'object', name: 'result'}]})
|
|
119
|
+
static getSeqHandler(
|
|
120
|
+
@grok.decorators.param({type: 'column', options: {semType: 'Macromolecule'}}) sequence: DG.Column<string>): ISeqHandler {
|
|
121
|
+
return _package.seqHelper.getSeqHandler(sequence);
|
|
122
|
+
}
|
|
164
123
|
|
|
165
|
-
//
|
|
166
|
-
|
|
167
|
-
|
|
124
|
+
// -- Panels --
|
|
125
|
+
|
|
126
|
+
@grok.decorators.panel({name: 'Bioinformatics | Get Region', description: 'Creates a new column with sequences of the region between start and end'})
|
|
127
|
+
static getRegionPanel(
|
|
128
|
+
@grok.decorators.param({type: 'column', options: {semType: 'Macromolecule'}}) seqCol: DG.Column<string>): DG.Widget {
|
|
129
|
+
const funcName: string = 'getRegionTopMenu';
|
|
130
|
+
const funcList = DG.Func.find({package: _package.name, name: funcName});
|
|
131
|
+
if (funcList.length !== 1) throw new Error(`Package '${_package.name}' func '${funcName}' not found`);
|
|
132
|
+
const func = funcList[0];
|
|
133
|
+
const funcCall = func.prepare({table: seqCol.dataFrame, sequence: seqCol});
|
|
134
|
+
const funcEditor = new GetRegionFuncEditor(funcCall, _package.seqHelper);
|
|
135
|
+
return funcEditor.widget();
|
|
136
|
+
}
|
|
168
137
|
|
|
169
|
-
|
|
138
|
+
@grok.decorators.panel({name: 'Bioinformatics | Manage Monomer Libraries', tags: ['exclude-actions-panel']})
|
|
139
|
+
static async libraryPanel(
|
|
140
|
+
@grok.decorators.param({name: 'seqColumn', options: {semType: 'Macromolecule'}}) _seqColumn: DG.Column): Promise<DG.Widget> {
|
|
141
|
+
// return getLibraryPanelUI();
|
|
142
|
+
return getMonomerLibraryManagerLink();
|
|
143
|
+
}
|
|
170
144
|
|
|
171
|
-
|
|
172
|
-
handleSequenceHeaderRendering();
|
|
173
|
-
}
|
|
145
|
+
// -- Func Editors --
|
|
174
146
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
const _resPromise = resWidget.init().then(() => { })
|
|
182
|
-
.catch((err: any) => {
|
|
147
|
+
@grok.decorators.editor({})
|
|
148
|
+
static GetRegionEditor(call: DG.FuncCall): void {
|
|
149
|
+
try {
|
|
150
|
+
const funcEditor = new GetRegionFuncEditor(call, _package.seqHelper);
|
|
151
|
+
funcEditor.dialog();
|
|
152
|
+
} catch (err: any) {
|
|
183
153
|
const errMsg = err instanceof Error ? err.message : err.toString();
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
// Keep for backward compatibility
|
|
190
|
-
//name: getBioLib
|
|
191
|
-
//output: object monomerLib
|
|
192
|
-
export function getBioLib(): IMonomerLib {
|
|
193
|
-
return _package.monomerLib;
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
//name: getSeqHandler
|
|
197
|
-
//input: column sequence { semType: Macromolecule }
|
|
198
|
-
//output: object result
|
|
199
|
-
export function getSeqHandler(sequence: DG.Column<string>): ISeqHandler {
|
|
200
|
-
return _package.seqHelper.getSeqHandler(sequence);
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
// -- Panels --
|
|
204
|
-
|
|
205
|
-
//name: Bioinformatics | Get Region
|
|
206
|
-
//description: Creates a new column with sequences of the region between start and end
|
|
207
|
-
//tags: panel
|
|
208
|
-
//input: column seqCol {semType: Macromolecule}
|
|
209
|
-
//output: widget result
|
|
210
|
-
export function getRegionPanel(seqCol: DG.Column<string>): DG.Widget {
|
|
211
|
-
const funcName: string = 'getRegionTopMenu';
|
|
212
|
-
const funcList = DG.Func.find({package: _package.name, name: funcName});
|
|
213
|
-
if (funcList.length !== 1) throw new Error(`Package '${_package.name}' func '${funcName}' not found`);
|
|
214
|
-
const func = funcList[0];
|
|
215
|
-
const funcCall = func.prepare({table: seqCol.dataFrame, sequence: seqCol});
|
|
216
|
-
const funcEditor = new GetRegionFuncEditor(funcCall, _package.seqHelper);
|
|
217
|
-
return funcEditor.widget();
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
//name: Bioinformatics | Manage Monomer Libraries
|
|
221
|
-
//description:
|
|
222
|
-
//tags: panel, exclude-actions-panel
|
|
223
|
-
//input: column seqColumn {semType: Macromolecule}
|
|
224
|
-
//output: widget result
|
|
225
|
-
export async function libraryPanel(_seqColumn: DG.Column): Promise<DG.Widget> {
|
|
226
|
-
// return getLibraryPanelUI();
|
|
227
|
-
return getMonomerLibraryManagerLink();
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
// -- Func Editors --
|
|
231
|
-
|
|
232
|
-
//name: GetRegionEditor
|
|
233
|
-
//tags: editor
|
|
234
|
-
//input: funccall call
|
|
235
|
-
export function GetRegionEditor(call: DG.FuncCall): void {
|
|
236
|
-
try {
|
|
237
|
-
const funcEditor = new GetRegionFuncEditor(call, _package.seqHelper);
|
|
238
|
-
funcEditor.dialog();
|
|
239
|
-
} catch (err: any) {
|
|
240
|
-
const errMsg = err instanceof Error ? err.message : err.toString();
|
|
241
|
-
const errStack = err instanceof Error ? err.stack : undefined;
|
|
242
|
-
grok.shell.error(`Get region editor error: ${errMsg}`);
|
|
243
|
-
_package.logger.error(errMsg, undefined, errStack);
|
|
154
|
+
const errStack = err instanceof Error ? err.stack : undefined;
|
|
155
|
+
grok.shell.error(`Get region editor error: ${errMsg}`);
|
|
156
|
+
_package.logger.error(errMsg, undefined, errStack);
|
|
157
|
+
}
|
|
244
158
|
}
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
//name: SplitToMonomersEditor
|
|
248
|
-
//tags: editor
|
|
249
|
-
//input: funccall call
|
|
250
|
-
export function SplitToMonomersEditor(call: DG.FuncCall): void {
|
|
251
|
-
const funcEditor = new SplitToMonomersFunctionEditor();
|
|
252
|
-
ui.dialog({title: 'Split to Monomers'})
|
|
253
|
-
.add(funcEditor.paramsUI)
|
|
254
|
-
.onOK(async () => {
|
|
255
|
-
return call.func.prepare(funcEditor.funcParams).call(true);
|
|
256
|
-
})
|
|
257
|
-
.show();
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
//name: SequenceSpaceEditor
|
|
261
|
-
//tags: editor
|
|
262
|
-
//input: funccall call
|
|
263
|
-
export function SequenceSpaceEditor(call: DG.FuncCall) {
|
|
264
|
-
const funcEditor = new DimReductionBaseEditor({semtype: DG.SEMTYPE.MACROMOLECULE});
|
|
265
|
-
const dialog = ui.dialog({title: 'Sequence Space'})
|
|
266
|
-
.add(funcEditor.getEditor())
|
|
267
|
-
.onOK(async () => {
|
|
268
|
-
const params = funcEditor.getParams();
|
|
269
|
-
return call.func.prepare({
|
|
270
|
-
molecules: params.col,
|
|
271
|
-
table: params.table,
|
|
272
|
-
methodName: params.methodName,
|
|
273
|
-
similarityMetric: params.similarityMetric,
|
|
274
|
-
plotEmbeddings: params.plotEmbeddings,
|
|
275
|
-
options: params.options,
|
|
276
|
-
preprocessingFunction: params.preprocessingFunction,
|
|
277
|
-
clusterEmbeddings: params.clusterEmbeddings,
|
|
278
|
-
}).call();
|
|
279
|
-
});
|
|
280
|
-
dialog.history(() => ({editorSettings: funcEditor.getStringInput()}), (x: any) => funcEditor.applyStringInput(x['editorSettings']));
|
|
281
|
-
dialog.show();
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
//name: SeqActivityCliffsEditor
|
|
285
|
-
//tags: editor
|
|
286
|
-
//input: funccall call
|
|
287
|
-
export function SeqActivityCliffsEditor(call: DG.FuncCall) {
|
|
288
|
-
const funcEditor = new ActivityCliffsEditor({semtype: DG.SEMTYPE.MACROMOLECULE});
|
|
289
|
-
const dialog = ui.dialog({title: 'Activity Cliffs'})
|
|
290
|
-
.add(funcEditor.getEditor())
|
|
291
|
-
.onOK(async () => {
|
|
292
|
-
const params = funcEditor.getParams();
|
|
293
|
-
return call.func.prepare({
|
|
294
|
-
table: params.table,
|
|
295
|
-
molecules: params.col,
|
|
296
|
-
activities: params.activities,
|
|
297
|
-
similarity: params.similarityThreshold,
|
|
298
|
-
methodName: params.methodName,
|
|
299
|
-
similarityMetric: params.similarityMetric,
|
|
300
|
-
preprocessingFunction: params.preprocessingFunction,
|
|
301
|
-
options: params.options,
|
|
302
|
-
}).call();
|
|
303
|
-
});
|
|
304
|
-
dialog.history(() => ({editorSettings: funcEditor.getStringInput()}), (x: any) => funcEditor.applyStringInput(x['editorSettings']));
|
|
305
|
-
dialog.show();
|
|
306
|
-
}
|
|
307
159
|
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
// return widget as DG.Widget;
|
|
319
|
-
// }
|
|
160
|
+
@grok.decorators.editor({})
|
|
161
|
+
static SplitToMonomersEditor(call: DG.FuncCall): void {
|
|
162
|
+
const funcEditor = new SplitToMonomersFunctionEditor();
|
|
163
|
+
ui.dialog({title: 'Split to Monomers'})
|
|
164
|
+
.add(funcEditor.paramsUI)
|
|
165
|
+
.onOK(async () => {
|
|
166
|
+
return call.func.prepare(funcEditor.funcParams).call(true);
|
|
167
|
+
})
|
|
168
|
+
.show();
|
|
169
|
+
}
|
|
320
170
|
|
|
321
|
-
|
|
171
|
+
@grok.decorators.editor({})
|
|
172
|
+
static SequenceSpaceEditor(call: DG.FuncCall) {
|
|
173
|
+
const funcEditor = new DimReductionBaseEditor({semtype: DG.SEMTYPE.MACROMOLECULE});
|
|
174
|
+
const dialog = ui.dialog({title: 'Sequence Space'})
|
|
175
|
+
.add(funcEditor.getEditor())
|
|
176
|
+
.onOK(async () => {
|
|
177
|
+
const params = funcEditor.getParams();
|
|
178
|
+
return call.func.prepare({
|
|
179
|
+
molecules: params.col,
|
|
180
|
+
table: params.table,
|
|
181
|
+
methodName: params.methodName,
|
|
182
|
+
similarityMetric: params.similarityMetric,
|
|
183
|
+
plotEmbeddings: params.plotEmbeddings,
|
|
184
|
+
options: params.options,
|
|
185
|
+
preprocessingFunction: params.preprocessingFunction,
|
|
186
|
+
clusterEmbeddings: params.clusterEmbeddings,
|
|
187
|
+
}).call();
|
|
188
|
+
});
|
|
189
|
+
dialog.history(() => ({editorSettings: funcEditor.getStringInput()}), (x: any) => funcEditor.applyStringInput(x['editorSettings']));
|
|
190
|
+
dialog.show();
|
|
191
|
+
}
|
|
322
192
|
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
193
|
+
@grok.decorators.editor({})
|
|
194
|
+
static SeqActivityCliffsEditor(call: DG.FuncCall) {
|
|
195
|
+
const funcEditor = new ActivityCliffsEditor({semtype: DG.SEMTYPE.MACROMOLECULE});
|
|
196
|
+
const dialog = ui.dialog({title: 'Activity Cliffs'})
|
|
197
|
+
.add(funcEditor.getEditor())
|
|
198
|
+
.onOK(async () => {
|
|
199
|
+
const params = funcEditor.getParams();
|
|
200
|
+
return call.func.prepare({
|
|
201
|
+
table: params.table,
|
|
202
|
+
molecules: params.col,
|
|
203
|
+
activities: params.activities,
|
|
204
|
+
similarity: params.similarityThreshold,
|
|
205
|
+
methodName: params.methodName,
|
|
206
|
+
similarityMetric: params.similarityMetric,
|
|
207
|
+
preprocessingFunction: params.preprocessingFunction,
|
|
208
|
+
options: params.options,
|
|
209
|
+
}).call();
|
|
210
|
+
});
|
|
211
|
+
dialog.history(() => ({editorSettings: funcEditor.getStringInput()}), (x: any) => funcEditor.applyStringInput(x['editorSettings']));
|
|
212
|
+
dialog.show();
|
|
213
|
+
}
|
|
331
214
|
|
|
332
|
-
//name: fastaSequenceCellRenderer
|
|
333
|
-
//tags: cellRenderer
|
|
334
|
-
//meta.cellType: sequence
|
|
335
|
-
//meta.columnTags: quality=Macromolecule, units=fasta
|
|
336
|
-
//output: grid_cell_renderer result
|
|
337
|
-
export function fastaSequenceCellRenderer(): MacromoleculeSequenceCellRenderer {
|
|
338
|
-
return new MacromoleculeSequenceCellRenderer();
|
|
339
|
-
}
|
|
340
215
|
|
|
341
|
-
//
|
|
342
|
-
//tags: cellRenderer
|
|
343
|
-
//meta.cellType: sequence
|
|
344
|
-
//meta.columnTags: quality=Macromolecule, units=separator
|
|
345
|
-
//output: grid_cell_renderer result
|
|
346
|
-
export function separatorSequenceCellRenderer(): MacromoleculeSequenceCellRenderer {
|
|
347
|
-
return new MacromoleculeSequenceCellRenderer();
|
|
348
|
-
}
|
|
216
|
+
// -- Cell renderers --
|
|
349
217
|
|
|
350
|
-
|
|
218
|
+
@grok.decorators.func({
|
|
219
|
+
name: 'customSequenceCellRenderer',
|
|
220
|
+
tags: ['cellRenderer'],
|
|
221
|
+
meta: {
|
|
222
|
+
cellType: 'sequence',
|
|
223
|
+
columnTags: 'quality=Macromolecule, units=custom'
|
|
224
|
+
},
|
|
225
|
+
outputs: [{type: 'grid_cell_renderer', name: 'result'}]
|
|
226
|
+
})
|
|
227
|
+
static customSequenceCellRenderer(): DG.GridCellRenderer {
|
|
228
|
+
return new MacromoleculeSequenceCellRenderer();
|
|
229
|
+
}
|
|
351
230
|
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
}
|
|
231
|
+
@grok.decorators.func({
|
|
232
|
+
name: 'fastaSequenceCellRenderer',
|
|
233
|
+
tags: ['cellRenderer'],
|
|
234
|
+
meta: {
|
|
235
|
+
cellType: 'sequence',
|
|
236
|
+
columnTags: 'quality=Macromolecule, units=fasta'
|
|
237
|
+
},
|
|
238
|
+
outputs: [{type: 'grid_cell_renderer', name: 'result'}]
|
|
239
|
+
})
|
|
240
|
+
static fastaSequenceCellRenderer(): MacromoleculeSequenceCellRenderer {
|
|
241
|
+
return new MacromoleculeSequenceCellRenderer();
|
|
242
|
+
}
|
|
359
243
|
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
}
|
|
244
|
+
@grok.decorators.func({
|
|
245
|
+
name: 'separatorSequenceCellRenderer',
|
|
246
|
+
tags: ['cellRenderer'],
|
|
247
|
+
meta: {
|
|
248
|
+
cellType: 'sequence',
|
|
249
|
+
columnTags: 'quality=Macromolecule, units=separator'
|
|
250
|
+
},
|
|
251
|
+
outputs: [{type: 'grid_cell_renderer', name: 'result'}]
|
|
252
|
+
})
|
|
253
|
+
static separatorSequenceCellRenderer(): MacromoleculeSequenceCellRenderer {
|
|
254
|
+
return new MacromoleculeSequenceCellRenderer();
|
|
255
|
+
}
|
|
367
256
|
|
|
368
|
-
//
|
|
369
|
-
//tags: cellRenderer
|
|
370
|
-
//meta.cellType: MacromoleculeDifference
|
|
371
|
-
//meta.columnTags: quality=MacromoleculeDifference
|
|
372
|
-
//output: grid_cell_renderer result
|
|
373
|
-
export function macromoleculeDifferenceCellRenderer(): MacromoleculeDifferenceCellRenderer {
|
|
374
|
-
return new MacromoleculeDifferenceCellRenderer();
|
|
375
|
-
}
|
|
257
|
+
// // -- Property panels --
|
|
376
258
|
|
|
259
|
+
@grok.decorators.panel({name: 'Bioinformatics | Sequence Renderer'})
|
|
260
|
+
static macroMolColumnPropertyPanel(
|
|
261
|
+
@grok.decorators.param({options: {semType: 'Macromolecule'}}) molColumn: DG.Column): DG.Widget {
|
|
262
|
+
return getMacromoleculeColumnPropertyPanel(molColumn);
|
|
263
|
+
}
|
|
377
264
|
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
//input: string seq2
|
|
384
|
-
//output: object res
|
|
385
|
-
export function sequenceAlignment(alignType: string, alignTable: string, gap: number, seq1: string, seq2: string) {
|
|
386
|
-
const toAlign = new SequenceAlignment(seq1, seq2, gap, alignTable);
|
|
387
|
-
const res = alignType == 'Local alignment' ? toAlign.smithWaterman() : toAlign.needlemanWunsch();
|
|
388
|
-
return res;
|
|
389
|
-
}
|
|
265
|
+
@grok.decorators.panel({name: 'Composition analysis', tags: ['bio', 'widgets']})
|
|
266
|
+
static compositionAnalysisWidget(
|
|
267
|
+
@grok.decorators.param({options: {semType: 'Macromolecule'}}) sequence: DG.SemanticValue): DG.Widget {
|
|
268
|
+
return getCompositionAnalysisWidget(sequence, _package.monomerLib, _package.seqHelper);
|
|
269
|
+
}
|
|
390
270
|
|
|
391
|
-
|
|
271
|
+
@grok.decorators.func({
|
|
272
|
+
name: 'MacromoleculeDifferenceCellRenderer',
|
|
273
|
+
tags: ['cellRenderer'],
|
|
274
|
+
meta: {
|
|
275
|
+
cellType: 'MacromoleculeDifference',
|
|
276
|
+
columnTags: 'quality=MacromoleculeDifference'
|
|
277
|
+
},
|
|
278
|
+
outputs: [{type: 'grid_cell_renderer', name: 'result'}]
|
|
279
|
+
})
|
|
280
|
+
static macromoleculeDifferenceCellRenderer(): MacromoleculeDifferenceCellRenderer {
|
|
281
|
+
return new MacromoleculeDifferenceCellRenderer();
|
|
282
|
+
}
|
|
392
283
|
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
284
|
+
@grok.decorators.func({outputs: [{type: 'object', name: 'result'}]})
|
|
285
|
+
static sequenceAlignment(
|
|
286
|
+
@grok.decorators.param({options: {choices: ['Local alignment', 'Global alignment']}}) alignType: string,
|
|
287
|
+
@grok.decorators.param({options: {choices: ['AUTO', 'NUCLEOTIDES', 'BLOSUM45', 'BLOSUM50', 'BLOSUM62', 'BLOSUM80', 'BLOSUM90', 'PAM30', 'PAM70', 'PAM250', 'SCHNEIDER', 'TRANS']}}) alignTable: string,
|
|
288
|
+
gap: number,
|
|
289
|
+
seq1: string,
|
|
290
|
+
seq2: string) {
|
|
291
|
+
const toAlign = new SequenceAlignment(seq1, seq2, gap, alignTable);
|
|
292
|
+
const res = alignType == 'Local alignment' ? toAlign.smithWaterman() : toAlign.needlemanWunsch();
|
|
293
|
+
return res;
|
|
294
|
+
}
|
|
401
295
|
|
|
402
|
-
//
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
}
|
|
296
|
+
// -- Viewers --
|
|
297
|
+
|
|
298
|
+
@grok.decorators.panel({
|
|
299
|
+
name: 'WebLogo',
|
|
300
|
+
description: 'WebLogo',
|
|
301
|
+
tags: ['viewer'],
|
|
302
|
+
meta: {icon: 'files/icons/weblogo-viewer.svg'},
|
|
303
|
+
outputs: [{type: 'viewer', name: 'result'}]
|
|
304
|
+
})
|
|
305
|
+
static webLogoViewer() {
|
|
306
|
+
return new WebLogoViewer();
|
|
307
|
+
}
|
|
410
308
|
|
|
309
|
+
@grok.decorators.panel({
|
|
310
|
+
name: 'VdRegions',
|
|
311
|
+
description: 'V-Domain regions viewer',
|
|
312
|
+
tags: ['viewer'],
|
|
313
|
+
meta: {icon: 'files/icons/vdregions-viewer.svg'},
|
|
314
|
+
outputs: [{type: 'viewer', name: 'result'}],
|
|
315
|
+
})
|
|
316
|
+
static vdRegionsViewer() {
|
|
317
|
+
return new VdRegionsViewer();
|
|
318
|
+
}
|
|
411
319
|
|
|
412
|
-
// -- Top menu --
|
|
413
|
-
|
|
414
|
-
//name: getRegion
|
|
415
|
-
//description: Gets a new column with sequences of the region between start and end
|
|
416
|
-
//input: column sequence
|
|
417
|
-
//input: string start {optional: true}
|
|
418
|
-
//input: string end {optional: true}
|
|
419
|
-
//input: string name {optional: true} [Name of the column to be created]
|
|
420
|
-
//output: column result
|
|
421
|
-
export function getRegion(
|
|
422
|
-
sequence: DG.Column<string>, start: string | undefined, end: string | undefined, name: string | undefined
|
|
423
|
-
): DG.Column<string> {
|
|
424
|
-
return getRegionDo(sequence,
|
|
425
|
-
start ?? null, end ?? null, name ?? null);
|
|
426
|
-
}
|
|
320
|
+
// -- Top menu --
|
|
427
321
|
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
export async function getRegionTopMenu(
|
|
438
|
-
table: DG.DataFrame, sequence: DG.Column,
|
|
439
|
-
start: string | undefined, end: string | undefined, name: string | undefined
|
|
440
|
-
): Promise<void> {
|
|
441
|
-
const regCol = getRegionDo(sequence, start ?? null, end ?? null, name ?? null);
|
|
442
|
-
sequence.dataFrame.columns.add(regCol);
|
|
443
|
-
await grok.data.detectSemanticTypes(sequence.dataFrame); // to set renderer
|
|
444
|
-
}
|
|
322
|
+
@grok.decorators.func({name: 'getRegion', description: 'Gets a new column with sequences of the region between start and end'})
|
|
323
|
+
static getRegion(
|
|
324
|
+
@grok.decorators.param({type: 'column'})sequence: DG.Column<string>,
|
|
325
|
+
@grok.decorators.param({type: 'string', options: {optional: true}}) start: string | undefined,
|
|
326
|
+
@grok.decorators.param({type: 'string', options: {optional: true}}) end: string | undefined,
|
|
327
|
+
@grok.decorators.param({type: 'string', options: {optional: true, description: 'Name of the column to be created'}}) name: string | undefined): DG.Column<string> {
|
|
328
|
+
return getRegionDo(sequence,
|
|
329
|
+
start ?? null, end ?? null, name ?? null);
|
|
330
|
+
}
|
|
445
331
|
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
similarityMetric: MmDistanceFunctionsNames | BitArrayMetrics, preprocessingFunction: DG.Func,
|
|
462
|
-
options?: (IUMAPOptions | ITSNEOptions) & Options, demo?: boolean): Promise<DG.Viewer | undefined> {
|
|
463
|
-
//workaround for functions which add viewers to tableView (can be run only on active table view)
|
|
464
|
-
if (table !== grok.shell.tv.dataFrame) {
|
|
465
|
-
grok.shell.error(`Table ${table.name} is not a current table view`);
|
|
466
|
-
return;
|
|
467
|
-
}
|
|
468
|
-
const tv = grok.shell.tv;
|
|
469
|
-
if (!checkInputColumnUI(molecules, 'Activity Cliffs'))
|
|
470
|
-
return;
|
|
471
|
-
const axesNames = getEmbeddingColsNames(table);
|
|
472
|
-
const tags = {
|
|
473
|
-
'units': molecules.meta.units!,
|
|
474
|
-
'aligned': molecules.getTag(bioTAGS.aligned),
|
|
475
|
-
'separator': molecules.getTag(bioTAGS.separator),
|
|
476
|
-
'alphabet': molecules.getTag(bioTAGS.alphabet),
|
|
477
|
-
};
|
|
478
|
-
const columnDistanceMetric: MmDistanceFunctionsNames | BitArrayMetrics = similarityMetric;
|
|
479
|
-
const seqCol = molecules;
|
|
480
|
-
|
|
481
|
-
const runCliffs = async () => {
|
|
482
|
-
const sp = await getActivityCliffs(
|
|
483
|
-
table,
|
|
484
|
-
seqCol,
|
|
485
|
-
axesNames,
|
|
486
|
-
'Activity cliffs', //scatterTitle
|
|
487
|
-
activities,
|
|
488
|
-
similarity,
|
|
489
|
-
columnDistanceMetric, //similarityMetric
|
|
490
|
-
methodName,
|
|
491
|
-
{...(options ?? {})},
|
|
492
|
-
DG.SEMTYPE.MACROMOLECULE,
|
|
493
|
-
tags,
|
|
494
|
-
preprocessingFunction,
|
|
495
|
-
createTooltipElement,
|
|
496
|
-
createPropPanelElement,
|
|
497
|
-
createLinesGrid,
|
|
498
|
-
undefined,
|
|
499
|
-
demo
|
|
500
|
-
);
|
|
501
|
-
return sp;
|
|
502
|
-
};
|
|
503
|
-
|
|
504
|
-
const allowedRowCount = methodName === DimReductionMethods.UMAP ? 200_000 : 20_000;
|
|
505
|
-
const fastRowCount = methodName === DimReductionMethods.UMAP ? 5_000 : 2_000;
|
|
506
|
-
if (table.rowCount > allowedRowCount) {
|
|
507
|
-
grok.shell.warning(`Too many rows, maximum for sequence activity cliffs is ${allowedRowCount}`);
|
|
508
|
-
return;
|
|
332
|
+
@grok.decorators.func({
|
|
333
|
+
name: 'Get Region Top Menu',
|
|
334
|
+
description: 'Get sequences for a region specified from a Macromolecule',
|
|
335
|
+
'top-menu': 'Bio | Calculate | Get Region...',
|
|
336
|
+
editor: 'Bio:GetRegionEditor'})
|
|
337
|
+
static async getRegionTopMenu(
|
|
338
|
+
@grok.decorators.param({options: {description: 'Input data table'}})table: DG.DataFrame,
|
|
339
|
+
@grok.decorators.param({options: {semType: 'Macromolecule', description: 'Sequence column'}}) sequence: DG.Column,
|
|
340
|
+
@grok.decorators.param({type: 'string', options: {optional: true, description: 'Region start position name'}}) start: string | undefined,
|
|
341
|
+
@grok.decorators.param({type: 'string', options: {optional: true, description: 'Region end position name'}}) end: string | undefined,
|
|
342
|
+
@grok.decorators.param({type: 'string', options: {optional: true, description: 'Region column name'}}) name: string | undefined
|
|
343
|
+
): Promise<void> {
|
|
344
|
+
const regCol = getRegionDo(sequence, start ?? null, end ?? null, name ?? null);
|
|
345
|
+
sequence.dataFrame.columns.add(regCol);
|
|
346
|
+
await grok.data.detectSemanticTypes(sequence.dataFrame); // to set renderer
|
|
509
347
|
}
|
|
510
348
|
|
|
349
|
+
@grok.decorators.func({
|
|
350
|
+
name: 'Sequence Activity Cliffs',
|
|
351
|
+
description: 'Detects pairs of molecules with similar structure and significant difference in any given property',
|
|
352
|
+
'top-menu': 'Bio | Analyze | Activity Cliffs...',
|
|
353
|
+
editor: 'Bio:SeqActivityCliffsEditor',
|
|
354
|
+
outputs: []
|
|
355
|
+
})
|
|
356
|
+
static async activityCliffs(
|
|
357
|
+
@grok.decorators.param({options: {description: 'Input data table'}})table: DG.DataFrame,
|
|
358
|
+
@grok.decorators.param({type: 'string', options: {semType: 'Macromolecule', description: 'Input data table'}}) molecules: DG.Column<string>,
|
|
359
|
+
activities: DG.Column,
|
|
360
|
+
@grok.decorators.param({options: {initialValue: '80', description: 'Similarity cutoff'}}) similarity: number,
|
|
361
|
+
@grok.decorators.param({type: 'string', options: {choices: ['UMAP', 't-SNE']}}) methodName: DimReductionMethods,
|
|
362
|
+
@grok.decorators.param({type: 'string', options: {choices: ['Hamming', 'Levenshtein', 'Monomer chemical distance']}}) similarityMetric: MmDistanceFunctionsNames | BitArrayMetrics,
|
|
363
|
+
@grok.decorators.param({type: 'func'}) preprocessingFunction: DG.Func,
|
|
364
|
+
@grok.decorators.param({type: 'object', options: {optional: true}}) options?: (IUMAPOptions | ITSNEOptions) & Options,
|
|
365
|
+
@grok.decorators.param({options: {optional: true}}) demo?: boolean): Promise<DG.Viewer | undefined> {
|
|
366
|
+
//workaround for functions which add viewers to tableView (can be run only on active table view)
|
|
367
|
+
if (table.name !== grok.shell.tv.dataFrame.name) {
|
|
368
|
+
grok.shell.error(`Table ${table.name} is not a current table view`);
|
|
369
|
+
return;
|
|
370
|
+
}
|
|
371
|
+
if (!checkInputColumnUI(molecules, 'Activity Cliffs'))
|
|
372
|
+
return;
|
|
373
|
+
const axesNames = getEmbeddingColsNames(table);
|
|
374
|
+
const tags = {
|
|
375
|
+
'units': molecules.meta.units!,
|
|
376
|
+
'aligned': molecules.getTag(bioTAGS.aligned),
|
|
377
|
+
'separator': molecules.getTag(bioTAGS.separator),
|
|
378
|
+
'alphabet': molecules.getTag(bioTAGS.alphabet),
|
|
379
|
+
};
|
|
380
|
+
const columnDistanceMetric: MmDistanceFunctionsNames | BitArrayMetrics = similarityMetric;
|
|
381
|
+
const seqCol = molecules;
|
|
382
|
+
|
|
383
|
+
const runCliffs = async () => {
|
|
384
|
+
const sp = await getActivityCliffs(
|
|
385
|
+
table,
|
|
386
|
+
seqCol,
|
|
387
|
+
axesNames,
|
|
388
|
+
'Activity cliffs', //scatterTitle
|
|
389
|
+
activities,
|
|
390
|
+
similarity,
|
|
391
|
+
columnDistanceMetric, //similarityMetric
|
|
392
|
+
methodName,
|
|
393
|
+
{...(options ?? {})},
|
|
394
|
+
DG.SEMTYPE.MACROMOLECULE,
|
|
395
|
+
tags,
|
|
396
|
+
preprocessingFunction,
|
|
397
|
+
createTooltipElement,
|
|
398
|
+
createPropPanelElement,
|
|
399
|
+
createLinesGrid,
|
|
400
|
+
undefined,
|
|
401
|
+
demo
|
|
402
|
+
);
|
|
403
|
+
return sp;
|
|
404
|
+
};
|
|
405
|
+
|
|
406
|
+
const allowedRowCount = methodName === DimReductionMethods.UMAP ? 200_000 : 20_000;
|
|
407
|
+
const fastRowCount = methodName === DimReductionMethods.UMAP ? 5_000 : 2_000;
|
|
408
|
+
if (table.rowCount > allowedRowCount) {
|
|
409
|
+
grok.shell.warning(`Too many rows, maximum for sequence activity cliffs is ${allowedRowCount}`);
|
|
410
|
+
return;
|
|
411
|
+
}
|
|
412
|
+
|
|
511
413
|
const pi = DG.TaskBarProgressIndicator.create(`Running sequence activity cliffs ...`);
|
|
512
414
|
const scRes = (await new Promise<DG.Viewer | undefined>((resolve, reject) => {
|
|
513
415
|
if (table.rowCount > fastRowCount && !options?.[BYPASS_LARGE_DATA_WARNING]) {
|
|
@@ -533,745 +435,912 @@ export async function activityCliffs(table: DG.DataFrame, molecules: DG.Column<s
|
|
|
533
435
|
return scRes;
|
|
534
436
|
}
|
|
535
437
|
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
}
|
|
438
|
+
@grok.decorators.func({
|
|
439
|
+
name: 'Encode Sequences',
|
|
440
|
+
tags: ['dim-red-preprocessing-function'],
|
|
441
|
+
meta: {
|
|
442
|
+
supportedSemTypes: 'Macromolecule',
|
|
443
|
+
supportedTypes: 'string',
|
|
444
|
+
supportedDistanceFunctions: 'Hamming,Levenshtein,Monomer chemical distance,Needlemann-Wunsch'
|
|
445
|
+
},
|
|
446
|
+
outputs: [{type: 'object', name: 'result'}],
|
|
447
|
+
})
|
|
448
|
+
static async macromoleculePreprocessingFunction(
|
|
449
|
+
@grok.decorators.param({options: {semType: 'Macromolecule'}})col: DG.Column,
|
|
450
|
+
@grok.decorators.param({type: 'string'}) metric: MmDistanceFunctionsNames,
|
|
451
|
+
@grok.decorators.param({options: {initialValue: '1', caption: 'Gap open penalty', optional: true}}) gapOpen: number = 1,
|
|
452
|
+
@grok.decorators.param({options: {initialValue: '0.6', caption: 'Gap extension penalty', optional: true}}) gapExtend: number = 0.6,
|
|
453
|
+
@grok.decorators.param({options: {caption: 'Fingerprint type', initialValue: 'Morgan', choices: ['Morgan', 'RDKit', 'Pattern', 'AtomPair', 'MACCS', 'TopologicalTorsion'], optional: true}}) fingerprintType : string = 'Morgan'): Promise<PreprocessFunctionReturnType> {
|
|
454
|
+
if (col.semType !== DG.SEMTYPE.MACROMOLECULE)
|
|
455
|
+
return {entries: col.toList(), options: {}};
|
|
456
|
+
|
|
457
|
+
const {seqList, options} = await getEncodedSeqSpaceCol(col, metric, fingerprintType, gapOpen, gapExtend);
|
|
458
|
+
return {entries: seqList, options};
|
|
459
|
+
}
|
|
557
460
|
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
}
|
|
461
|
+
@grok.decorators.func({name: 'Helm Fingerprints',
|
|
462
|
+
meta: {
|
|
463
|
+
supportedSemTypes: 'Macromolecule',
|
|
464
|
+
supportedTypes: 'string',
|
|
465
|
+
supportedUnits: 'helm',
|
|
466
|
+
supportedDistanceFunctions: 'Tanimoto,Asymmetric,Cosine,Sokal'
|
|
467
|
+
},
|
|
468
|
+
outputs: [{type: 'object', name: 'result'}],
|
|
469
|
+
})
|
|
470
|
+
static async helmPreprocessingFunction(
|
|
471
|
+
@grok.decorators.param({type: 'column', options: {semType: 'Macromolecule'}}) col: DG.Column<string>,
|
|
472
|
+
@grok.decorators.param({type: 'string'})_metric: BitArrayMetrics): Promise<PreprocessFunctionReturnType> {
|
|
473
|
+
if (col.version !== col.temp[MONOMERIC_COL_TAGS.LAST_INVALIDATED_VERSION])
|
|
474
|
+
await invalidateMols(col, _package.seqHelper, false);
|
|
475
|
+
const molCol = col.temp[MONOMERIC_COL_TAGS.MONOMERIC_MOLS];
|
|
476
|
+
const fingerPrints: DG.Column<DG.BitSet | null> =
|
|
477
|
+
await grok.functions.call('Chem:getMorganFingerprints', {molColumn: molCol});
|
|
478
|
+
|
|
479
|
+
const entries: Array<BitArray | null> = new Array(fingerPrints.length).fill(null);
|
|
480
|
+
for (let i = 0; i < fingerPrints.length; i++) {
|
|
481
|
+
if (fingerPrints.isNone(i) || !fingerPrints.get(i))
|
|
482
|
+
continue;
|
|
483
|
+
const fp = fingerPrints.get(i)!;
|
|
484
|
+
entries[i] = BitArray.fromUint32Array(fp.length, new Uint32Array(fp.getBuffer().buffer));
|
|
485
|
+
}
|
|
486
|
+
return {entries, options: {}};
|
|
487
|
+
}
|
|
583
488
|
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
): Promise<DG.ScatterPlotViewer | undefined> {
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
}
|
|
489
|
+
@grok.decorators.func({
|
|
490
|
+
name: 'Sequence Space',
|
|
491
|
+
description: 'Creates 2D sequence space with projected sequences by pairwise distance',
|
|
492
|
+
'top-menu': 'Bio | Analyze | Sequence Space...',
|
|
493
|
+
editor: 'Bio:SequenceSpaceEditor',
|
|
494
|
+
outputs: [],
|
|
495
|
+
})
|
|
496
|
+
static async sequenceSpaceTopMenu(
|
|
497
|
+
table: DG.DataFrame,
|
|
498
|
+
@grok.decorators.param({options: {semType: 'Macromolecule'}}) molecules: DG.Column,
|
|
499
|
+
@grok.decorators.param({type: 'string', options: {choices: ['UMAP', 't-SNE']}}) methodName: DimReductionMethods,
|
|
500
|
+
@grok.decorators.param({type: 'string', options: {choices: ['Hamming', 'Levenshtein', 'Monomer chemical distance']}}) similarityMetric: BitArrayMetrics | MmDistanceFunctionsNames,
|
|
501
|
+
@grok.decorators.param({options: {initialValue: 'true'}}) plotEmbeddings: boolean,
|
|
502
|
+
@grok.decorators.param({type: 'func', options: {optional: true}}) preprocessingFunction?: DG.Func,
|
|
503
|
+
@grok.decorators.param({type: 'object', options: {optional: true}}) options?: (IUMAPOptions | ITSNEOptions) & Options,
|
|
504
|
+
@grok.decorators.param({options: {optional: true, initialValue: 'true'}}) clusterEmbeddings?: boolean,
|
|
505
|
+
@grok.decorators.param({options: {optional: true}}) isDemo?: boolean
|
|
506
|
+
): Promise<DG.ScatterPlotViewer | undefined> {
|
|
507
|
+
//workaround for functions which add viewers to tableView (can be run only on active table view)
|
|
508
|
+
if (table.name !== grok.shell.tv.dataFrame.name) {
|
|
509
|
+
grok.shell.error(`Table ${table.name} is not a current table view`);
|
|
510
|
+
return;
|
|
511
|
+
}
|
|
512
|
+
const tableView =
|
|
513
|
+
grok.shell.tv.dataFrame == table ? grok.shell.tv : undefined;
|
|
514
|
+
if (!checkInputColumnUI(molecules, 'Sequence Space'))
|
|
515
|
+
return;
|
|
516
|
+
if (!preprocessingFunction)
|
|
517
|
+
preprocessingFunction = DG.Func.find({name: 'macromoleculePreprocessingFunction', package: 'Bio'})[0];
|
|
518
|
+
options ??= {};
|
|
519
|
+
const res = await multiColReduceDimensionality(table, [molecules], methodName,
|
|
520
|
+
[similarityMetric as KnownMetrics], [1], [preprocessingFunction], 'MANHATTAN',
|
|
521
|
+
plotEmbeddings, clusterEmbeddings ?? false,
|
|
522
|
+
/* dimRedOptions */ {...options, preprocessingFuncArgs: [options.preprocessingFuncArgs ?? {}]},
|
|
523
|
+
/* uiOptions */{
|
|
524
|
+
fastRowCount: 10000,
|
|
525
|
+
scatterPlotName: 'Sequence space',
|
|
526
|
+
bypassLargeDataWarning: options?.[BYPASS_LARGE_DATA_WARNING],
|
|
527
|
+
tableView: tableView,
|
|
528
|
+
});
|
|
529
|
+
return res;
|
|
530
|
+
}
|
|
626
531
|
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
):
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
532
|
+
@grok.decorators.func({
|
|
533
|
+
name: 'To Atomic Level',
|
|
534
|
+
description: 'Converts sequences to molblocks',
|
|
535
|
+
'top-menu': 'Bio | Transform | To Atomic Level...',
|
|
536
|
+
})
|
|
537
|
+
static async toAtomicLevel(
|
|
538
|
+
@grok.decorators.param({options: {description: 'Input data table'}})table: DG.DataFrame,
|
|
539
|
+
@grok.decorators.param({options: {semType: 'Macromolecule', caption: 'Sequence'}})seqCol: DG.Column,
|
|
540
|
+
@grok.decorators.param({options: {initialValue: 'false', caption: 'Non-linear', description: 'Slower mode for cycling/branching HELM structures'}}) nonlinear: boolean,
|
|
541
|
+
@grok.decorators.param({options: {initialValue: 'false', caption: 'Highlight monomers', description: 'Highlight monomers\' substructures of the molecule'}}) highlight: boolean = false
|
|
542
|
+
): Promise<void> {
|
|
543
|
+
const pi = DG.TaskBarProgressIndicator.create('Converting to atomic level ...');
|
|
544
|
+
try {
|
|
545
|
+
await initBioPromise;
|
|
546
|
+
const monomerLib = seqCol.temp[MmcrTemps.overriddenLibrary] ?? _package.monomerLib;
|
|
547
|
+
const seqHelper = _package.seqHelper;
|
|
548
|
+
const rdKitModule = _package.rdKitModule;
|
|
549
|
+
await sequenceToMolfile(table, seqCol, nonlinear, highlight, monomerLib, seqHelper, rdKitModule);
|
|
550
|
+
} finally {
|
|
551
|
+
pi.close();
|
|
552
|
+
}
|
|
646
553
|
}
|
|
647
|
-
}
|
|
648
554
|
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
555
|
+
@grok.decorators.func({
|
|
556
|
+
name: 'To Atomic Level...',
|
|
557
|
+
meta: {action: 'to atomic level'}
|
|
558
|
+
})
|
|
559
|
+
static async toAtomicLevelAction(
|
|
560
|
+
@grok.decorators.param({options: {semType: 'Macromolecule'}}) seqCol: DG.Column) {
|
|
561
|
+
if (!seqCol?.dataFrame)
|
|
562
|
+
throw new Error('Sequence column is not found or its data frame is not empty');
|
|
563
|
+
const func = DG.Func.find({name: 'toAtomicLevel', package: 'Bio'})[0];
|
|
564
|
+
if (!func) throw new Error('To Atomic level Function not found');
|
|
565
|
+
func.prepare({table: seqCol.dataFrame, seqCol: seqCol}).edit();
|
|
566
|
+
}
|
|
659
567
|
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
568
|
+
@grok.decorators.panel({
|
|
569
|
+
name: 'Molecular Structure',
|
|
570
|
+
tags: ['bio', 'widgets']
|
|
571
|
+
})
|
|
572
|
+
static async toAtomicLevelPanel(
|
|
573
|
+
@grok.decorators.param({name: 'sequence', type: 'semantic_value', options: {semType: 'Macromolecule'}})
|
|
574
|
+
sequence: DG.SemanticValue
|
|
575
|
+
) : Promise<DG.Widget> {
|
|
576
|
+
return toAtomicLevelWidget(sequence);
|
|
577
|
+
}
|
|
667
578
|
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
579
|
+
@grok.decorators.panel({
|
|
580
|
+
name: 'Molecular 3D Structure',
|
|
581
|
+
tags: ['bio', 'widgets']
|
|
582
|
+
})
|
|
583
|
+
static async sequence3dStructureWidget(
|
|
584
|
+
@grok.decorators.param({
|
|
585
|
+
type: 'semantic_value',
|
|
586
|
+
options: {semType: 'Macromolecule'}
|
|
587
|
+
})
|
|
588
|
+
sequence: DG.SemanticValue
|
|
589
|
+
): Promise<DG.Widget> {
|
|
590
|
+
return molecular3DStructureWidget(sequence);
|
|
591
|
+
}
|
|
675
592
|
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
593
|
+
@grok.decorators.panel({
|
|
594
|
+
name: 'MSA',
|
|
595
|
+
description: 'Performs multiple sequence alignment',
|
|
596
|
+
tags: ['bio'],
|
|
597
|
+
'top-menu': 'Bio | Analyze | MSA...'
|
|
598
|
+
})
|
|
599
|
+
static multipleSequenceAlignmentDialog(): void {
|
|
600
|
+
multipleSequenceAlignmentUI({}, _package.seqHelper)
|
|
601
|
+
.catch((err: any) => {
|
|
602
|
+
const [errMsg, errStack] = errInfo(err);
|
|
603
|
+
if (err instanceof MsaWarning) {
|
|
604
|
+
grok.shell.warning((err as MsaWarning).element);
|
|
605
|
+
_package.logger.warning(errMsg);
|
|
606
|
+
return;
|
|
607
|
+
}
|
|
608
|
+
grok.shell.error(errMsg);
|
|
609
|
+
_package.logger.error(errMsg, undefined, errStack);
|
|
610
|
+
// throw err; // This error throw is not handled
|
|
611
|
+
});
|
|
612
|
+
}
|
|
694
613
|
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
): Promise<DG.Column<string>> {
|
|
705
|
-
|
|
706
|
-
}
|
|
614
|
+
@grok.decorators.func({
|
|
615
|
+
name: 'Multiple Sequence Alignment',
|
|
616
|
+
description: 'Multiple sequence alignment',
|
|
617
|
+
tags: ['bio']
|
|
618
|
+
})
|
|
619
|
+
static async alignSequences(
|
|
620
|
+
@grok.decorators.param({type: 'column', options: {semType: 'Macromolecule'}}) sequenceCol: DG.Column<string> | null = null,
|
|
621
|
+
@grok.decorators.param({type: 'column'}) clustersCol: DG.Column | null = null,
|
|
622
|
+
@grok.decorators.param({type: 'object', options: {optional: true}}) options?: any
|
|
623
|
+
): Promise<DG.Column<string>> {
|
|
624
|
+
return multipleSequenceAlignmentUI({col: sequenceCol, clustersCol: clustersCol, ...options}, _package.seqHelper);
|
|
625
|
+
}
|
|
707
626
|
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
const
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
627
|
+
@grok.decorators.func({
|
|
628
|
+
name: 'Composition Analysis',
|
|
629
|
+
description: 'Visualizes sequence composition on a WebLogo plot',
|
|
630
|
+
'top-menu': 'Bio | Analyze | Composition',
|
|
631
|
+
meta: {
|
|
632
|
+
icon: 'files/icons/composition-analysis.svg'
|
|
633
|
+
},
|
|
634
|
+
outputs: [{name: 'result', type: 'viewer'}]
|
|
635
|
+
})
|
|
636
|
+
static async compositionAnalysis(): Promise<void> {
|
|
637
|
+
// Higher priority for columns with MSA data to show with WebLogo.
|
|
638
|
+
const tv = grok.shell.tv;
|
|
639
|
+
const df = tv.dataFrame;
|
|
640
|
+
//@ts-ignore
|
|
641
|
+
const colList: DG.Column[] = df.columns.toList().filter((col) => {
|
|
642
|
+
if (col.semType != DG.SEMTYPE.MACROMOLECULE)
|
|
643
|
+
return false;
|
|
644
|
+
|
|
645
|
+
const _colSh = _package.seqHelper.getSeqHandler(col);
|
|
646
|
+
// TODO: prevent for cyclic, branched or multiple chains in Helm
|
|
647
|
+
return true;
|
|
648
|
+
});
|
|
730
649
|
|
|
731
|
-
const
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
return;
|
|
739
|
-
} else if (colList.length > 1) {
|
|
740
|
-
const colListNames: string [] = colList.map((col) => col.name);
|
|
741
|
-
const selectedCol = colList.find((c) => { return _package.seqHelper.getSeqHandler(c).isMsa(); });
|
|
742
|
-
const colInput: DG.InputBase = ui.input.choice(
|
|
743
|
-
'Column', {value: selectedCol ? selectedCol.name : colListNames[0], items: colListNames});
|
|
744
|
-
ui.dialog({
|
|
745
|
-
title: 'Composition Analysis',
|
|
746
|
-
helpUrl: 'https://datagrok.ai/help/datagrok/solutions/domains/bio/#sequence-composition',
|
|
747
|
-
})
|
|
748
|
-
.add(ui.div([
|
|
749
|
-
colInput,
|
|
750
|
-
]))
|
|
751
|
-
.onOK(async () => {
|
|
752
|
-
const col: DG.Column | null = colList.find((col) => col.name == colInput.value) ?? null;
|
|
650
|
+
const handler = async (col: DG.Column) => {
|
|
651
|
+
if (!checkInputColumnUI(col, 'Composition'))
|
|
652
|
+
return;
|
|
653
|
+
|
|
654
|
+
const wlViewer = tv.addViewer('WebLogo', {sequenceColumnName: col.name});
|
|
655
|
+
grok.shell.tv.dockManager.dock(wlViewer, DG.DOCK_TYPE.DOWN, null, 'Composition analysis', 0.25);
|
|
656
|
+
};
|
|
753
657
|
|
|
754
|
-
|
|
755
|
-
|
|
658
|
+
let col: DG.Column | null = null;
|
|
659
|
+
if (colList.length == 0) {
|
|
660
|
+
grok.shell.error('Current table does not contain sequences');
|
|
661
|
+
return;
|
|
662
|
+
} else if (colList.length > 1) {
|
|
663
|
+
const colListNames: string [] = colList.map((col) => col.name);
|
|
664
|
+
const selectedCol = colList.find((c) => { return _package.seqHelper.getSeqHandler(c).isMsa(); });
|
|
665
|
+
const colInput: DG.InputBase = ui.input.choice(
|
|
666
|
+
'Column', {value: selectedCol ? selectedCol.name : colListNames[0], items: colListNames});
|
|
667
|
+
ui.dialog({
|
|
668
|
+
title: 'Composition Analysis',
|
|
669
|
+
helpUrl: 'https://datagrok.ai/help/datagrok/solutions/domains/bio/#sequence-composition',
|
|
756
670
|
})
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
671
|
+
.add(ui.div([
|
|
672
|
+
colInput,
|
|
673
|
+
]))
|
|
674
|
+
.onOK(async () => {
|
|
675
|
+
const col: DG.Column | null = colList.find((col) => col.name == colInput.value) ?? null;
|
|
760
676
|
|
|
761
|
-
|
|
762
|
-
|
|
677
|
+
if (col)
|
|
678
|
+
await handler(col);
|
|
679
|
+
})
|
|
680
|
+
.show();
|
|
681
|
+
} else
|
|
682
|
+
col = colList[0];
|
|
763
683
|
|
|
764
|
-
|
|
765
|
-
|
|
684
|
+
if (!col)
|
|
685
|
+
return;
|
|
766
686
|
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
//tags: file-handler
|
|
770
|
-
//meta.ext: fasta, fna, ffn, faa, frn, fa, fst
|
|
771
|
-
//input: string fileContent
|
|
772
|
-
//output: list tables
|
|
773
|
-
export function importFasta(fileContent: string): DG.DataFrame [] {
|
|
774
|
-
const ffh = new FastaFileHandler(fileContent);
|
|
775
|
-
return ffh.importFasta();
|
|
776
|
-
}
|
|
687
|
+
await handler(col);
|
|
688
|
+
}
|
|
777
689
|
|
|
778
|
-
//
|
|
779
|
-
//description: Opens Bam file
|
|
780
|
-
//tags: file-handler
|
|
781
|
-
//meta.ext: bam, bai
|
|
782
|
-
//input: string fileContent
|
|
783
|
-
//output: list tables
|
|
784
|
-
export function importBam(fileContent: string): DG.DataFrame [] {
|
|
785
|
-
console.log(fileContent);
|
|
786
|
-
return [];
|
|
787
|
-
}
|
|
690
|
+
// -- Package settings editor --
|
|
788
691
|
|
|
789
|
-
//top-menu: Bio | Transform | Convert Notation...
|
|
790
|
-
//name: convertDialog
|
|
791
|
-
export function convertDialog() {
|
|
792
|
-
const col: DG.Column<string> | undefined = getMacromoleculeColumns()[0];
|
|
793
|
-
convert(col, _package.seqHelper);
|
|
794
|
-
}
|
|
795
692
|
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
693
|
+
@grok.decorators.fileHandler({
|
|
694
|
+
name: 'importFasta',
|
|
695
|
+
description: 'Opens FASTA file',
|
|
696
|
+
ext: 'fasta, fna, ffn, faa, frn, fa, fst',
|
|
697
|
+
})
|
|
698
|
+
static importFasta(
|
|
699
|
+
fileContent: string
|
|
700
|
+
): DG.DataFrame[] {
|
|
701
|
+
const ffh = new FastaFileHandler(fileContent);
|
|
702
|
+
return ffh.importFasta();
|
|
703
|
+
}
|
|
802
704
|
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
705
|
+
@grok.decorators.fileHandler({
|
|
706
|
+
name: 'importBam',
|
|
707
|
+
description: 'Opens Bam file',
|
|
708
|
+
ext: 'bam, bai',
|
|
709
|
+
})
|
|
710
|
+
static importBam(fileContent: string): DG.DataFrame[] {
|
|
711
|
+
console.log(fileContent);
|
|
712
|
+
return [];
|
|
713
|
+
}
|
|
811
714
|
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
715
|
+
@grok.decorators.func({
|
|
716
|
+
name: 'convertDialog',
|
|
717
|
+
'top-menu': 'Bio | Transform | Convert Notation...'
|
|
718
|
+
})
|
|
719
|
+
static convertDialog() {
|
|
720
|
+
const col: DG.Column<string> | undefined = getMacromoleculeColumns()[0];
|
|
721
|
+
convert(col, _package.seqHelper);
|
|
722
|
+
}
|
|
817
723
|
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
724
|
+
@grok.decorators.func({
|
|
725
|
+
name: 'Convert Notation...',
|
|
726
|
+
meta: {
|
|
727
|
+
action: 'Convert Notation...'
|
|
728
|
+
}
|
|
729
|
+
})
|
|
730
|
+
static convertColumnAction(
|
|
731
|
+
@grok.decorators.param({
|
|
732
|
+
options: {semType: 'Macromolecule'}
|
|
733
|
+
})
|
|
734
|
+
col: DG.Column
|
|
735
|
+
) {
|
|
736
|
+
convert(col, _package.seqHelper);
|
|
737
|
+
}
|
|
821
738
|
|
|
822
|
-
|
|
823
|
-
|
|
739
|
+
@grok.decorators.func({
|
|
740
|
+
name: 'monomerCellRenderer',
|
|
741
|
+
tags: ['cellRenderer'],
|
|
742
|
+
meta: {
|
|
743
|
+
cellType: 'Monomer',
|
|
744
|
+
columnTags: 'quality=Monomer'
|
|
745
|
+
},
|
|
746
|
+
outputs: [{type: 'grid_cell_renderer', name: 'result'}]
|
|
747
|
+
})
|
|
748
|
+
static monomerCellRenderer(): MonomerCellRenderer {
|
|
749
|
+
return new MonomerCellRenderer();
|
|
750
|
+
}
|
|
824
751
|
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
752
|
+
@grok.decorators.func({
|
|
753
|
+
name: 'testDetectMacromolecule',
|
|
754
|
+
})
|
|
755
|
+
static async testDetectMacromolecule(
|
|
756
|
+
@grok.decorators.param({
|
|
757
|
+
options: {choices: ['Demo:Files/', 'System:AppData/']}
|
|
758
|
+
})
|
|
759
|
+
path: string
|
|
760
|
+
): Promise<DG.DataFrame> {
|
|
761
|
+
const pi = DG.TaskBarProgressIndicator.create('Test detectMacromolecule...');
|
|
762
|
+
|
|
763
|
+
const fileList = await grok.dapi.files.list(path, true, '');
|
|
764
|
+
//@ts-ignore
|
|
765
|
+
const fileListToTest = fileList.filter((fi) => fi.fileName.endsWith('.csv'));
|
|
766
|
+
|
|
767
|
+
let readyCount = 0;
|
|
768
|
+
const res = [];
|
|
769
|
+
|
|
770
|
+
for (const fileInfo of fileListToTest) {
|
|
771
|
+
try {
|
|
772
|
+
const csv = await grok.dapi.files.readAsText(path + fileInfo.fullPath);
|
|
773
|
+
const df = DG.DataFrame.fromCsv(csv);
|
|
774
|
+
|
|
775
|
+
for (const col of df.columns) {
|
|
776
|
+
const semType = await grok.functions.call('Bio:detectMacromolecule', {col: col});
|
|
777
|
+
if (semType === DG.SEMTYPE.MACROMOLECULE) {
|
|
778
|
+
//console.warn(`file: ${fileInfo.path}, column: ${col.name}, ` +
|
|
779
|
+
// `semType: ${semType}, units: ${col.meta.units}`);
|
|
780
|
+
// console.warn('file: '' + fileInfo.path + '', semType: '' + semType + '', ' +
|
|
781
|
+
// 'units: '' + col.meta.units + ''');
|
|
782
|
+
|
|
783
|
+
res.push({
|
|
784
|
+
file: fileInfo.path, result: 'detected', column: col.name,
|
|
785
|
+
message: `units: ${col.meta.units}`,
|
|
786
|
+
});
|
|
787
|
+
}
|
|
842
788
|
}
|
|
789
|
+
} catch (err: unknown) {
|
|
790
|
+
// console.error('file: ' + fileInfo.path + ', error: ' + ex.toString());
|
|
791
|
+
res.push({
|
|
792
|
+
file: fileInfo.path, result: 'error', column: null,
|
|
793
|
+
message: err instanceof Error ? err.message : (err as Object).toString(),
|
|
794
|
+
});
|
|
795
|
+
} finally {
|
|
796
|
+
readyCount += 1;
|
|
797
|
+
pi.update(100 * readyCount / fileListToTest.length, `Test ${fileInfo.fileName}`);
|
|
843
798
|
}
|
|
844
|
-
} catch (err: unknown) {
|
|
845
|
-
// console.error('file: ' + fileInfo.path + ', error: ' + ex.toString());
|
|
846
|
-
res.push({
|
|
847
|
-
file: fileInfo.path, result: 'error', column: null,
|
|
848
|
-
message: err instanceof Error ? err.message : (err as Object).toString(),
|
|
849
|
-
});
|
|
850
|
-
} finally {
|
|
851
|
-
readyCount += 1;
|
|
852
|
-
pi.update(100 * readyCount / fileListToTest.length, `Test ${fileInfo.fileName}`);
|
|
853
799
|
}
|
|
800
|
+
|
|
801
|
+
grok.shell.info('Test Demo:Files for detectMacromolecule finished.');
|
|
802
|
+
pi.close();
|
|
803
|
+
const resDf = DG.DataFrame.fromObjects(res)!;
|
|
804
|
+
resDf.name = `datasets_detectMacromolecule_${path}`;
|
|
805
|
+
return resDf;
|
|
854
806
|
}
|
|
855
807
|
|
|
856
|
-
grok.
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
808
|
+
@grok.decorators.func({
|
|
809
|
+
name: 'Split to Monomers',
|
|
810
|
+
'top-menu': 'Bio | Transform | Split to Monomers...',
|
|
811
|
+
editor: 'Bio:SplitToMonomersEditor',
|
|
812
|
+
})
|
|
813
|
+
static async splitToMonomersTopMenu(
|
|
814
|
+
table: DG.DataFrame,
|
|
815
|
+
@grok.decorators.param({
|
|
816
|
+
options: {semType: 'Macromolecule'}
|
|
817
|
+
})
|
|
818
|
+
sequence: DG.Column
|
|
819
|
+
): Promise<DG.DataFrame> {
|
|
820
|
+
return await splitToMonomersUI(table, sequence);
|
|
821
|
+
}
|
|
862
822
|
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
823
|
+
@grok.decorators.func({
|
|
824
|
+
name: 'Bio: getHelmMonomers',
|
|
825
|
+
outputs: [{name: 'result', type: 'object'}]
|
|
826
|
+
})
|
|
827
|
+
static getHelmMonomers(
|
|
828
|
+
@grok.decorators.param({type: 'column', options: {semType: 'Macromolecule'}})
|
|
829
|
+
sequence: DG.Column<string>
|
|
830
|
+
): string[] {
|
|
831
|
+
return _package.seqHelper.getSeqMonomers(sequence);
|
|
832
|
+
}
|
|
872
833
|
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
}
|
|
834
|
+
@grok.decorators.func({
|
|
835
|
+
name: 'Sequence Similarity Search',
|
|
836
|
+
tags: ['viewer'],
|
|
837
|
+
meta: {
|
|
838
|
+
icon: 'files/icons/sequence-similarity-viewer.svg'
|
|
839
|
+
},
|
|
840
|
+
outputs: [{name: 'result', type: 'viewer'}]
|
|
841
|
+
})
|
|
842
|
+
static similaritySearchViewer(): SequenceSimilarityViewer {
|
|
843
|
+
return new SequenceSimilarityViewer(_package.seqHelper);
|
|
844
|
+
}
|
|
879
845
|
|
|
846
|
+
@grok.decorators.func({
|
|
847
|
+
name: 'similaritySearch',
|
|
848
|
+
description: 'Finds similar sequences',
|
|
849
|
+
'top-menu': 'Bio | Search | Similarity Search',
|
|
850
|
+
outputs: [{name: 'result', type: 'viewer'}]
|
|
851
|
+
})
|
|
852
|
+
static similaritySearchTopMenu(): void {
|
|
853
|
+
const view = (grok.shell.v as DG.TableView);
|
|
854
|
+
const viewer = view.addViewer('Sequence Similarity Search');
|
|
855
|
+
view.dockManager.dock(viewer, 'down');
|
|
856
|
+
}
|
|
880
857
|
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
}
|
|
858
|
+
@grok.decorators.func({
|
|
859
|
+
name: 'Sequence Diversity Search',
|
|
860
|
+
tags: ['viewer'],
|
|
861
|
+
meta: {
|
|
862
|
+
icon: 'files/icons/sequence-diversity-viewer.svg'
|
|
863
|
+
},
|
|
864
|
+
outputs: [{name: 'result', type: 'viewer'}]
|
|
865
|
+
})
|
|
866
|
+
static diversitySearchViewer(): SequenceDiversityViewer {
|
|
867
|
+
return new SequenceDiversityViewer(_package.seqHelper);
|
|
868
|
+
}
|
|
888
869
|
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
870
|
+
@grok.decorators.func({
|
|
871
|
+
name: 'diversitySearch',
|
|
872
|
+
description: 'Finds the most diverse sequences',
|
|
873
|
+
'top-menu': 'Bio | Search | Diversity Search',
|
|
874
|
+
outputs: [{name: 'result', type: 'viewer'}]
|
|
875
|
+
})
|
|
876
|
+
static diversitySearchTopMenu() {
|
|
877
|
+
const view = (grok.shell.v as DG.TableView);
|
|
878
|
+
const viewer = view.addViewer('Sequence Diversity Search');
|
|
879
|
+
view.dockManager.dock(viewer, 'down');
|
|
880
|
+
}
|
|
898
881
|
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
}
|
|
882
|
+
@grok.decorators.editor({
|
|
883
|
+
name: 'SearchSubsequenceEditor'
|
|
884
|
+
})
|
|
885
|
+
static searchSubsequenceEditor(call: DG.FuncCall) {
|
|
886
|
+
const columns = getMacromoleculeColumns();
|
|
887
|
+
if (columns.length === 1)
|
|
888
|
+
call.func.prepare({macromolecules: columns[0]}).call(true);
|
|
889
|
+
else
|
|
890
|
+
new SubstructureSearchDialog(columns, _package.seqHelper);
|
|
891
|
+
}
|
|
906
892
|
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
893
|
+
@grok.decorators.func({
|
|
894
|
+
name: 'Subsequence Search',
|
|
895
|
+
'top-menu': 'Bio | Search | Subsequence Search ...',
|
|
896
|
+
editor: 'Bio:SearchSubsequenceEditor'
|
|
897
|
+
})
|
|
898
|
+
static SubsequenceSearchTopMenu(macromolecules: DG.Column): void {
|
|
899
|
+
grok.shell.tv.getFiltersGroup({createDefaultFilters: false}).updateOrAdd({
|
|
900
|
+
type: 'Bio:bioSubstructureFilter',
|
|
901
|
+
column: macromolecules.name,
|
|
902
|
+
columnName: macromolecules.name,
|
|
903
|
+
});
|
|
904
|
+
grok.shell.tv.grid.scrollToCell(macromolecules, 0);
|
|
905
|
+
}
|
|
916
906
|
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
907
|
+
@grok.decorators.func({
|
|
908
|
+
name: 'Identity Scoring',
|
|
909
|
+
description: 'Adds a column with fraction of matching monomers',
|
|
910
|
+
'top-menu': 'Bio | Calculate | Identity...',
|
|
911
|
+
})
|
|
912
|
+
static async sequenceIdentityScoring(
|
|
913
|
+
@grok.decorators.param({options: {description: 'Table containing Macromolecule column'}})table: DG.DataFrame,
|
|
914
|
+
@grok.decorators.param({options: {semType: 'Macromolecule', description: 'Sequences to score'}}) macromolecule: DG.Column,
|
|
915
|
+
@grok.decorators.param({options: {description: 'Sequence,matching column format'}})reference: string
|
|
916
|
+
): Promise<DG.Column<number>> {
|
|
917
|
+
const seqHelper = _package.seqHelper;
|
|
918
|
+
const scores = calculateScoresWithEmptyValues(table, macromolecule, reference, SCORE.IDENTITY, seqHelper);
|
|
919
|
+
return scores;
|
|
920
|
+
}
|
|
927
921
|
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
922
|
+
@grok.decorators.func({
|
|
923
|
+
name: 'Similarity Scoring',
|
|
924
|
+
description: 'Adds a column with similarity scores, calculated as sum of monomer fingerprint similarities',
|
|
925
|
+
'top-menu': 'Bio | Calculate | Similarity...',
|
|
926
|
+
})
|
|
927
|
+
static async sequenceSimilarityScoring(
|
|
928
|
+
@grok.decorators.param({options: {description: 'Table containing Macromolecule column'}})table: DG.DataFrame,
|
|
929
|
+
@grok.decorators.param({options: {semType: 'Macromolecule', description: 'Sequences to score'}}) macromolecule: DG.Column,
|
|
930
|
+
@grok.decorators.param({options: {description: 'Sequence,matching column format'}})reference: string
|
|
931
|
+
): Promise<DG.Column<number>> {
|
|
932
|
+
const seqHelper = _package.seqHelper;
|
|
933
|
+
const scores = calculateScoresWithEmptyValues(table, macromolecule, reference, SCORE.SIMILARITY, seqHelper);
|
|
934
|
+
return scores;
|
|
935
|
+
}
|
|
940
936
|
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
export async function sequenceIdentityScoring(
|
|
949
|
-
table: DG.DataFrame, macromolecule: DG.Column, reference: string
|
|
950
|
-
): Promise<DG.Column<number>> {
|
|
951
|
-
const seqHelper = _package.seqHelper;
|
|
952
|
-
const scores = calculateScoresWithEmptyValues(table, macromolecule, reference, SCORE.IDENTITY, seqHelper);
|
|
953
|
-
return scores;
|
|
954
|
-
}
|
|
937
|
+
@grok.decorators.func({
|
|
938
|
+
name: 'Manage Monomer Libraries',
|
|
939
|
+
description: 'Manage HELM monomer libraries'
|
|
940
|
+
})
|
|
941
|
+
static async manageMonomerLibraries(): Promise<void> {
|
|
942
|
+
showManageLibrariesDialog();
|
|
943
|
+
}
|
|
955
944
|
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
export async function sequenceSimilarityScoring(
|
|
964
|
-
table: DG.DataFrame, macromolecule: DG.Column, reference: string
|
|
965
|
-
): Promise<DG.Column<number>> {
|
|
966
|
-
const seqHelper = _package.seqHelper;
|
|
967
|
-
const scores = calculateScoresWithEmptyValues(table, macromolecule, reference, SCORE.SIMILARITY, seqHelper);
|
|
968
|
-
return scores;
|
|
969
|
-
}
|
|
945
|
+
@grok.decorators.func({
|
|
946
|
+
name: 'Manage Monomer Libraries View',
|
|
947
|
+
'top-menu': 'Bio | Manage | Monomer Libraries'
|
|
948
|
+
})
|
|
949
|
+
static async manageLibrariesView(): Promise<void> {
|
|
950
|
+
await showManageLibrariesView();
|
|
951
|
+
}
|
|
970
952
|
|
|
953
|
+
@grok.decorators.func({
|
|
954
|
+
name: 'manageMonomersView',
|
|
955
|
+
description: 'Edit and create monomers',
|
|
956
|
+
'top-menu': 'Bio | Manage | Monomers'
|
|
957
|
+
})
|
|
958
|
+
static async manageMonomersView() {
|
|
959
|
+
const monomerManager = await MonomerManager.getInstance();
|
|
960
|
+
await monomerManager.getViewRoot();
|
|
961
|
+
}
|
|
971
962
|
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
}
|
|
963
|
+
@grok.decorators.app({
|
|
964
|
+
name: 'Manage Monomer Libraries',
|
|
965
|
+
browsePath: 'Peptides',
|
|
966
|
+
icon: 'files/icons/monomers.png',
|
|
967
|
+
})
|
|
968
|
+
static async manageMonomerLibrariesView(): Promise<DG.View> {
|
|
969
|
+
return await showManageLibrariesView(false);
|
|
970
|
+
}
|
|
977
971
|
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
972
|
+
@grok.decorators.func({name: 'Monomer Manager Tree Browser'})
|
|
973
|
+
static async manageMonomerLibrariesViewTreeBrowser(treeNode: DG.TreeViewGroup, browsePanel: DG.BrowsePanel) {
|
|
974
|
+
const libraries = (await (await MonomerLibManager.getInstance()).getFileManager()).getValidLibraryPaths();
|
|
975
|
+
libraries.forEach((libName) => {
|
|
976
|
+
const nodeName = libName.endsWith('.json') ? libName.substring(0, libName.length - 5) : libName;
|
|
977
|
+
const libNode = treeNode.item(nodeName);
|
|
978
|
+
// eslint-disable-next-line rxjs/no-ignored-subscription, rxjs/no-async-subscribe
|
|
979
|
+
libNode.onSelected.subscribe(async () => {
|
|
980
|
+
const monomerManager = await MonomerManager.getInstance();
|
|
981
|
+
await monomerManager.getViewRoot(libName, true);
|
|
982
|
+
monomerManager.resetCurrentRowFollowing();
|
|
983
|
+
});
|
|
984
|
+
});
|
|
985
|
+
}
|
|
983
986
|
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
const monomerManager = await MonomerManager.getInstance();
|
|
989
|
-
await monomerManager.getViewRoot();
|
|
990
|
-
}
|
|
987
|
+
@grok.decorators.fileExporter({description: 'As FASTA...'})
|
|
988
|
+
static saveAsFasta() {
|
|
989
|
+
saveAsFastaUI();
|
|
990
|
+
}
|
|
991
991
|
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
992
|
+
@grok.decorators.func({
|
|
993
|
+
name: 'Bio Substructure Filter',
|
|
994
|
+
description: 'Substructure filter for macromolecules',
|
|
995
|
+
tags: ['filter'],
|
|
996
|
+
meta: {semType: 'Macromolecule'},
|
|
997
|
+
outputs: [{type: 'filter', name: 'result'}],
|
|
998
|
+
})
|
|
999
|
+
static bioSubstructureFilter(): BioSubstructureFilter {
|
|
1000
|
+
return new BioSubstructureFilter(_package.seqHelper, _package.logger);
|
|
1001
|
+
}
|
|
1000
1002
|
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
// eslint-disable-next-line rxjs/no-ignored-subscription, rxjs/no-async-subscribe
|
|
1010
|
-
libNode.onSelected.subscribe(async () => {
|
|
1011
|
-
const monomerManager = await MonomerManager.getInstance();
|
|
1012
|
-
await monomerManager.getViewRoot(libName, true);
|
|
1013
|
-
monomerManager.resetCurrentRowFollowing();
|
|
1014
|
-
});
|
|
1015
|
-
});
|
|
1016
|
-
}
|
|
1003
|
+
@grok.decorators.func({
|
|
1004
|
+
name: 'Bio Substructure Filter Test',
|
|
1005
|
+
description: 'Substructure filter for Helm package tests',
|
|
1006
|
+
outputs: [{name: 'result', type: 'object'}]
|
|
1007
|
+
})
|
|
1008
|
+
static bioSubstructureFilterTest(): BioSubstructureFilter {
|
|
1009
|
+
return new BioSubstructureFilter(_package.seqHelper, _package.logger);
|
|
1010
|
+
}
|
|
1017
1011
|
|
|
1018
|
-
//
|
|
1019
|
-
//description: As FASTA...
|
|
1020
|
-
//tags: fileExporter
|
|
1021
|
-
export function saveAsFasta() {
|
|
1022
|
-
saveAsFastaUI();
|
|
1023
|
-
}
|
|
1012
|
+
// -- Test apps --
|
|
1024
1013
|
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1014
|
+
@grok.decorators.func()
|
|
1015
|
+
static async webLogoLargeApp(): Promise<void> {
|
|
1016
|
+
const pi = DG.TaskBarProgressIndicator.create('WebLogo');
|
|
1017
|
+
try {
|
|
1018
|
+
const urlParams = new URLSearchParams(window.location.search);
|
|
1019
|
+
const app = new WebLogoApp(urlParams, 'webLogoLargeApp');
|
|
1020
|
+
const df: DG.DataFrame = await _package.files.readCsv('data/sample_PT_100000x5.csv');
|
|
1021
|
+
await grok.data.detectSemanticTypes(df);
|
|
1022
|
+
await app.init(df);
|
|
1023
|
+
} finally {
|
|
1024
|
+
pi.close();
|
|
1025
|
+
}
|
|
1026
|
+
}
|
|
1033
1027
|
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1028
|
+
@grok.decorators.func()
|
|
1029
|
+
static async webLogoAggApp(): Promise<void> {
|
|
1030
|
+
const pi = DG.TaskBarProgressIndicator.create('WebLogo ...');
|
|
1031
|
+
try {
|
|
1032
|
+
const urlParams = new URLSearchParams(window.location.search);
|
|
1033
|
+
const app = new WebLogoApp(urlParams, 'webLogoAggApp');
|
|
1034
|
+
const df: DG.DataFrame = await _package.files.readCsv('samples/FASTA_PT_activity.csv');
|
|
1035
|
+
await grok.data.detectSemanticTypes(df);
|
|
1036
|
+
await app.init(df);
|
|
1037
|
+
} finally {
|
|
1038
|
+
pi.close();
|
|
1039
|
+
}
|
|
1040
|
+
}
|
|
1040
1041
|
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
await app.init(df);
|
|
1052
|
-
} finally {
|
|
1053
|
-
pi.close();
|
|
1042
|
+
@grok.decorators.func()
|
|
1043
|
+
static async getRegionApp(): Promise<void> {
|
|
1044
|
+
const pi = DG.TaskBarProgressIndicator.create('getRegion ...');
|
|
1045
|
+
try {
|
|
1046
|
+
const urlParams = new URLSearchParams(window.location.search);
|
|
1047
|
+
const app = new GetRegionApp(urlParams, 'getRegionApp');
|
|
1048
|
+
await app.init();
|
|
1049
|
+
} finally {
|
|
1050
|
+
pi.close();
|
|
1051
|
+
}
|
|
1054
1052
|
}
|
|
1055
|
-
}
|
|
1056
1053
|
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1054
|
+
@grok.decorators.func()
|
|
1055
|
+
static async getRegionHelmApp(): Promise<void> {
|
|
1056
|
+
const pi = DG.TaskBarProgressIndicator.create('getRegion ...');
|
|
1057
|
+
try {
|
|
1058
|
+
const urlParams = new URLSearchParams(window.location.search);
|
|
1059
|
+
const df = await _package.files.readCsv('samples/HELM_empty_vals.csv');
|
|
1060
|
+
const app = new GetRegionApp(urlParams, 'getRegionHelmApp');
|
|
1061
|
+
await app.init({df: df, colName: 'HELM'});
|
|
1062
|
+
} finally {
|
|
1063
|
+
pi.close();
|
|
1064
|
+
}
|
|
1068
1065
|
}
|
|
1069
|
-
}
|
|
1070
1066
|
|
|
1071
|
-
//
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
const
|
|
1077
|
-
|
|
1078
|
-
} finally {
|
|
1079
|
-
pi.close();
|
|
1067
|
+
// -- Tests long seq --
|
|
1068
|
+
|
|
1069
|
+
//name: longSeqTableSeparator
|
|
1070
|
+
@grok.decorators.func()
|
|
1071
|
+
static longSeqTableSeparator(): void {
|
|
1072
|
+
const df = DG.DataFrame.fromColumns(generateLongSequence());
|
|
1073
|
+
grok.shell.addTableView(df);
|
|
1080
1074
|
}
|
|
1081
|
-
}
|
|
1082
1075
|
|
|
1083
|
-
//name:
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
const df = await _package.files.readCsv('samples/HELM_empty_vals.csv');
|
|
1089
|
-
const app = new GetRegionApp(urlParams, 'getRegionHelmApp');
|
|
1090
|
-
await app.init({df: df, colName: 'HELM'});
|
|
1091
|
-
} finally {
|
|
1092
|
-
pi.close();
|
|
1076
|
+
//name: longSeqTableFasta
|
|
1077
|
+
@grok.decorators.func()
|
|
1078
|
+
static longSeqTableFasta(): void {
|
|
1079
|
+
const df = DG.DataFrame.fromColumns([generateLongSequence2(_package.seqHelper, NOTATION.FASTA)]);
|
|
1080
|
+
grok.shell.addTableView(df);
|
|
1093
1081
|
}
|
|
1094
|
-
}
|
|
1095
1082
|
|
|
1096
|
-
//
|
|
1083
|
+
//name: longSeqTableHelm
|
|
1084
|
+
@grok.decorators.func()
|
|
1085
|
+
static longSeqTableHelm(): void {
|
|
1086
|
+
const df = DG.DataFrame.fromColumns([generateLongSequence2(_package.seqHelper, NOTATION.HELM)]);
|
|
1087
|
+
grok.shell.addTableView(df);
|
|
1088
|
+
}
|
|
1097
1089
|
|
|
1098
|
-
//
|
|
1099
|
-
export function longSeqTableSeparator(): void {
|
|
1100
|
-
const df = DG.DataFrame.fromColumns(generateLongSequence());
|
|
1101
|
-
grok.shell.addTableView(df);
|
|
1102
|
-
}
|
|
1090
|
+
// -- Handle context menu --
|
|
1103
1091
|
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1092
|
+
@grok.decorators.func()
|
|
1093
|
+
static addCopyMenu(
|
|
1094
|
+
@grok.decorators.param({type: 'object'})cell: DG.Cell,
|
|
1095
|
+
@grok.decorators.param({type: 'object'}) menu: DG.Menu): void {
|
|
1096
|
+
addCopyMenuUI(cell, menu, _package.seqHelper);
|
|
1097
|
+
}
|
|
1109
1098
|
|
|
1110
|
-
//
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
grok.
|
|
1114
|
-
|
|
1099
|
+
// -- Demo --
|
|
1100
|
+
// demoBio01
|
|
1101
|
+
|
|
1102
|
+
@grok.decorators.demo({
|
|
1103
|
+
description: 'Sequence similarity tracking and evaluation dataset diversity',
|
|
1104
|
+
demoPath: 'Bioinformatics | Similarity, Diversity',
|
|
1105
|
+
path: '/apps/Tutorials/Demo/Bioinformatics/Similarity,%20Diversity',
|
|
1106
|
+
demoSkip: 'GROK-14320'
|
|
1107
|
+
})
|
|
1108
|
+
static async demoBioSimilarityDiversity(): Promise<void> {
|
|
1109
|
+
await demoBioSimDiv();
|
|
1110
|
+
}
|
|
1115
1111
|
|
|
1116
|
-
|
|
1112
|
+
@grok.decorators.demo({
|
|
1113
|
+
description: 'Exploring sequence space of Macromolecules, comparison with hierarchical clustering results',
|
|
1114
|
+
demoPath: 'Bioinformatics | Sequence Space',
|
|
1115
|
+
path: '/apps/Tutorials/Demo/Bioinformatics/Sequence%20Space',
|
|
1116
|
+
demoSkip: 'GROK-14320'
|
|
1117
|
+
})
|
|
1118
|
+
static async demoBioSequenceSpace(): Promise<void> {
|
|
1119
|
+
await demoSeqSpace();
|
|
1120
|
+
}
|
|
1117
1121
|
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
}
|
|
1122
|
+
@grok.decorators.demo({
|
|
1123
|
+
description: 'Activity Cliffs analysis on Macromolecules data',
|
|
1124
|
+
demoPath: 'Bioinformatics | Activity Cliffs',
|
|
1125
|
+
path: '/apps/Tutorials/Demo/Bioinformatics/Activity%20Cliffs',
|
|
1126
|
+
demoSkip: 'GROK-14320'
|
|
1127
|
+
})
|
|
1128
|
+
static async demoBioActivityCliffs(): Promise<void> {
|
|
1129
|
+
await demoActivityCliffsCyclic();
|
|
1130
|
+
}
|
|
1124
1131
|
|
|
1125
|
-
|
|
1132
|
+
@grok.decorators.demo({
|
|
1133
|
+
description: 'Atomic level structure of Macromolecules',
|
|
1134
|
+
demoPath: 'Bioinformatics | Atomic Level',
|
|
1135
|
+
path: '/apps/Tutorials/Demo/Bioinformatics/Atomic%20Level',
|
|
1136
|
+
demoSkip: 'GROK-14320'
|
|
1137
|
+
})
|
|
1138
|
+
static async demoBioAtomicLevel(): Promise<void> {
|
|
1139
|
+
await demoToAtomicLevel();
|
|
1140
|
+
}
|
|
1126
1141
|
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
export async function demoBioSimilarityDiversity(): Promise<void> {
|
|
1134
|
-
await demoBioSimDiv();
|
|
1135
|
-
}
|
|
1142
|
+
@grok.decorators.func({name: 'SDF to JSON Library'})
|
|
1143
|
+
static async sdfToJsonLib(table: DG.DataFrame) {
|
|
1144
|
+
const _jsonMonomerLibrary = createJsonMonomerLibFromSdf(table);
|
|
1145
|
+
const jsonMonomerLibrary = JSON.stringify(_jsonMonomerLibrary);
|
|
1146
|
+
DG.Utils.download(`${table.name}.json`, jsonMonomerLibrary);
|
|
1147
|
+
}
|
|
1136
1148
|
|
|
1137
|
-
//
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
}
|
|
1149
|
+
// -- Utils --
|
|
1150
|
+
|
|
1151
|
+
@grok.decorators.func({
|
|
1152
|
+
friendlyName: 'seq2atomic',
|
|
1153
|
+
description: 'Converts a `Macromolecule` sequence to its atomic level `Molecule` representation',
|
|
1154
|
+
outputs: [{name: 'molfile', type: 'string', options: {semType: 'Molecule'}}]
|
|
1155
|
+
})
|
|
1156
|
+
static async seq2atomic(
|
|
1157
|
+
@grok.decorators.param({options: {semType: 'Macromolecule'}})
|
|
1158
|
+
seq: string,
|
|
1159
|
+
nonlinear: boolean
|
|
1160
|
+
): Promise<string | undefined> {
|
|
1161
|
+
if (!(seq.trim())) return '';
|
|
1162
|
+
try {
|
|
1163
|
+
const seqCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, `helm`, [seq]);
|
|
1164
|
+
const df = DG.DataFrame.fromColumns([seqCol]);
|
|
1165
|
+
const semType = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
|
|
1166
|
+
if (semType) seqCol.semType = semType;
|
|
1167
|
+
|
|
1168
|
+
const monomerLib = (await PackageFunctions.getMonomerLibHelper()).getMonomerLib();
|
|
1169
|
+
const seqHelper = _package.seqHelper;
|
|
1170
|
+
const rdKitModule = await getRdKitModule();
|
|
1171
|
+
const res = (await sequenceToMolfile(df, seqCol, nonlinear, false, monomerLib, seqHelper, rdKitModule))?.molCol?.get(0);
|
|
1172
|
+
return res ?? undefined;
|
|
1173
|
+
} catch (err: any) {
|
|
1174
|
+
const [errMsg, errStack] = errInfo(err);
|
|
1175
|
+
_package.logger.error(errMsg, undefined, errStack);
|
|
1176
|
+
throw err;
|
|
1177
|
+
}
|
|
1178
|
+
}
|
|
1146
1179
|
|
|
1147
|
-
//
|
|
1148
|
-
//
|
|
1149
|
-
//
|
|
1150
|
-
//
|
|
1151
|
-
//
|
|
1152
|
-
//
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1180
|
+
// //description: Gets similarity to a reference sequence
|
|
1181
|
+
// //input: string seq { semType: Macromolecule }
|
|
1182
|
+
// //input: string ref { semType: Macromolecule }
|
|
1183
|
+
// //output: double result
|
|
1184
|
+
// export async function seqSimilarity(seq: string, ref: string): Promise<number> {
|
|
1185
|
+
// // if (!(seq.trim())) return null;
|
|
1186
|
+
// try {
|
|
1187
|
+
// const seqCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, `seq`, [seq]);
|
|
1188
|
+
// const df = DG.DataFrame.fromColumns([seqCol]);
|
|
1189
|
+
// const semType = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
|
|
1190
|
+
// if (semType) seqCol.semType = semType;
|
|
1191
|
+
//
|
|
1192
|
+
// const resCol = await calculateScoresWithEmptyValues(df, seqCol, ref, SCORE.SIMILARITY);
|
|
1193
|
+
// return resCol.get(0)!;
|
|
1194
|
+
// } catch (err: any) {
|
|
1195
|
+
// const [errMsg, errStack] = errInfo(err);
|
|
1196
|
+
// _package.logger.error(errMsg, undefined, errStack);
|
|
1197
|
+
// throw err;
|
|
1198
|
+
// }
|
|
1199
|
+
// }
|
|
1200
|
+
|
|
1201
|
+
@grok.decorators.func({
|
|
1202
|
+
name: 'seqIdentity',
|
|
1203
|
+
friendlyName: 'seqIdentity',
|
|
1204
|
+
description: 'Gets identity to a reference sequence',
|
|
1205
|
+
})
|
|
1206
|
+
static async seqIdentity(
|
|
1207
|
+
@grok.decorators.param({options: {semType: 'Macromolecule'}})
|
|
1208
|
+
seq: string,
|
|
1209
|
+
@grok.decorators.param({options: {semType: 'Macromolecule'}})
|
|
1210
|
+
ref: string
|
|
1211
|
+
): Promise<number | null> {
|
|
1212
|
+
if (!(seq.trim())) return null;
|
|
1213
|
+
try {
|
|
1214
|
+
const seqCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, `seq`, [seq]);
|
|
1215
|
+
const df = DG.DataFrame.fromColumns([seqCol]);
|
|
1216
|
+
const semType = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
|
|
1217
|
+
if (!semType) throw new Error('Macromolecule required');
|
|
1218
|
+
|
|
1219
|
+
const resCol = await calculateScoresWithEmptyValues(df, seqCol, ref, SCORE.IDENTITY, _package.seqHelper);
|
|
1220
|
+
return resCol.get(0);
|
|
1221
|
+
} catch (err: any) {
|
|
1222
|
+
const [errMsg, errStack] = errInfo(err);
|
|
1223
|
+
_package.logger.error(errMsg, undefined, errStack);
|
|
1224
|
+
throw err;
|
|
1225
|
+
}
|
|
1226
|
+
}
|
|
1156
1227
|
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1228
|
+
@grok.decorators.func()
|
|
1229
|
+
static async detectMacromoleculeProbe(
|
|
1230
|
+
file: DG.FileInfo,
|
|
1231
|
+
colName: string = '',
|
|
1232
|
+
@grok.decorators.param({options: {initialValue: '100'}})
|
|
1233
|
+
probeCount: number = 100
|
|
1234
|
+
): Promise<void> {
|
|
1235
|
+
const csv: string = await file.readAsString();
|
|
1236
|
+
await detectMacromoleculeProbeDo(csv, colName, probeCount);
|
|
1237
|
+
}
|
|
1166
1238
|
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
DG.Utils.download(`${table.name}.json`, jsonMonomerLibrary);
|
|
1173
|
-
}
|
|
1239
|
+
@grok.decorators.func({outputs: [{type: 'object', name: 'result'}]})
|
|
1240
|
+
static async getSeqHelper(): Promise<ISeqHelper> {
|
|
1241
|
+
await PackageFunctions.initBio();
|
|
1242
|
+
return _package.seqHelper;
|
|
1243
|
+
}
|
|
1174
1244
|
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
export async function seq2atomic(seq: string, nonlinear: boolean): Promise<string | undefined> {
|
|
1184
|
-
if (!(seq.trim())) return '';
|
|
1185
|
-
try {
|
|
1186
|
-
const seqCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, `helm`, [seq]);
|
|
1187
|
-
const df = DG.DataFrame.fromColumns([seqCol]);
|
|
1188
|
-
const semType = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
|
|
1189
|
-
if (semType) seqCol.semType = semType;
|
|
1190
|
-
|
|
1191
|
-
const monomerLib = (await getMonomerLibHelper()).getMonomerLib();
|
|
1192
|
-
const seqHelper = _package.seqHelper;
|
|
1193
|
-
const rdKitModule = await getRdKitModule();
|
|
1194
|
-
const res = (await sequenceToMolfile(df, seqCol, nonlinear, false, monomerLib, seqHelper, rdKitModule))?.molCol?.get(0);
|
|
1195
|
-
return res ?? undefined;
|
|
1196
|
-
} catch (err: any) {
|
|
1197
|
-
const [errMsg, errStack] = errInfo(err);
|
|
1198
|
-
_package.logger.error(errMsg, undefined, errStack);
|
|
1199
|
-
throw err;
|
|
1245
|
+
@grok.decorators.func()
|
|
1246
|
+
static async getMolFromHelm(
|
|
1247
|
+
df: DG.DataFrame,
|
|
1248
|
+
@grok.decorators.param({type: 'column'})helmCol: DG.Column<string>,
|
|
1249
|
+
@grok.decorators.param({options: {initialValue: 'true'}})
|
|
1250
|
+
chiralityEngine: boolean = true
|
|
1251
|
+
): Promise<DG.Column<string>> {
|
|
1252
|
+
return getMolColumnFromHelm(df, helmCol, chiralityEngine, _package.monomerLib);
|
|
1200
1253
|
}
|
|
1201
1254
|
}
|
|
1202
1255
|
|
|
1203
|
-
// //description: Gets similarity to a reference sequence
|
|
1204
|
-
// //input: string seq { semType: Macromolecule }
|
|
1205
|
-
// //input: string ref { semType: Macromolecule }
|
|
1206
|
-
// //output: double result
|
|
1207
|
-
// export async function seqSimilarity(seq: string, ref: string): Promise<number> {
|
|
1208
|
-
// // if (!(seq.trim())) return null;
|
|
1209
|
-
// try {
|
|
1210
|
-
// const seqCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, `seq`, [seq]);
|
|
1211
|
-
// const df = DG.DataFrame.fromColumns([seqCol]);
|
|
1212
|
-
// const semType = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
|
|
1213
|
-
// if (semType) seqCol.semType = semType;
|
|
1214
|
-
//
|
|
1215
|
-
// const resCol = await calculateScoresWithEmptyValues(df, seqCol, ref, SCORE.SIMILARITY);
|
|
1216
|
-
// return resCol.get(0)!;
|
|
1217
|
-
// } catch (err: any) {
|
|
1218
|
-
// const [errMsg, errStack] = errInfo(err);
|
|
1219
|
-
// _package.logger.error(errMsg, undefined, errStack);
|
|
1220
|
-
// throw err;
|
|
1221
|
-
// }
|
|
1222
|
-
// }
|
|
1223
1256
|
|
|
1224
|
-
//
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
const semType = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
|
|
1236
|
-
if (!semType) throw new Error('Macromolecule required');
|
|
1237
|
-
|
|
1238
|
-
const resCol = await calculateScoresWithEmptyValues(df, seqCol, ref, SCORE.IDENTITY, _package.seqHelper);
|
|
1239
|
-
return resCol.get(0);
|
|
1240
|
-
} catch (err: any) {
|
|
1241
|
-
const [errMsg, errStack] = errInfo(err);
|
|
1242
|
-
_package.logger.error(errMsg, undefined, errStack);
|
|
1243
|
-
throw err;
|
|
1257
|
+
//export let hydrophobPalette: SeqPaletteCustom | null = null;
|
|
1258
|
+
|
|
1259
|
+
export class SeqPaletteCustom implements SeqPalette {
|
|
1260
|
+
private readonly _palette: { [m: string]: string };
|
|
1261
|
+
|
|
1262
|
+
constructor(palette: { [m: string]: string }) {
|
|
1263
|
+
this._palette = palette;
|
|
1264
|
+
}
|
|
1265
|
+
|
|
1266
|
+
public get(m: string): string {
|
|
1267
|
+
return this._palette[m];
|
|
1244
1268
|
}
|
|
1245
1269
|
}
|
|
1246
1270
|
|
|
1271
|
+
async function initBioInt() {
|
|
1272
|
+
const logPrefix = 'Bio: _package.initBio()';
|
|
1273
|
+
_package.logger.debug(`${logPrefix}, start`);
|
|
1274
|
+
const t1: number = window.performance.now();
|
|
1275
|
+
// very important that loading should happen in correct order!
|
|
1276
|
+
// first make sure chem and rdkit module are loaded
|
|
1277
|
+
const rdKitModule = await getRdKitModule();
|
|
1278
|
+
// then load package settings
|
|
1279
|
+
const pkgProps = await _package.getProperties();
|
|
1280
|
+
const bioPkgProps = new BioPackageProperties(pkgProps);
|
|
1281
|
+
_package.properties = bioPkgProps;
|
|
1282
|
+
// then load monomer lib
|
|
1283
|
+
const libHelper = await MonomerLibManager.getInstance();
|
|
1284
|
+
// Fix user lib settings for explicit stuck from a terminated test
|
|
1285
|
+
const libSettings = await getUserLibSettings();
|
|
1286
|
+
if (libSettings.explicit) {
|
|
1287
|
+
libSettings.explicit = [];
|
|
1288
|
+
await setUserLibSettings(libSettings);
|
|
1289
|
+
}
|
|
1290
|
+
await libHelper.awaitLoaded(Infinity);
|
|
1291
|
+
if (!libHelper.initialLoadCompleted)
|
|
1292
|
+
await libHelper.loadMonomerLib();
|
|
1293
|
+
// Do not wait for monomers and sets loaded
|
|
1294
|
+
libHelper.loadMonomerSets();
|
|
1295
|
+
const monomerLib = libHelper.getMonomerLib();
|
|
1296
|
+
const monomerSets = libHelper.getMonomerSets();
|
|
1297
|
+
// finally log
|
|
1298
|
+
const t2: number = window.performance.now();
|
|
1299
|
+
_package.logger.debug(`${logPrefix}, loading ET: ${t2 - t1} ms`);
|
|
1300
|
+
|
|
1301
|
+
// const monomers: string[] = [];
|
|
1302
|
+
// const logPs: number[] = [];
|
|
1247
1303
|
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
//input: string colName = ''
|
|
1251
|
-
//input: int probeCount = 100
|
|
1252
|
-
export async function detectMacromoleculeProbe(file: DG.FileInfo, colName: string, probeCount: number): Promise<void> {
|
|
1253
|
-
const csv: string = await file.readAsString();
|
|
1254
|
-
await detectMacromoleculeProbeDo(csv, colName, probeCount);
|
|
1255
|
-
}
|
|
1304
|
+
const seqHelper = new SeqHelper(libHelper, rdKitModule);
|
|
1305
|
+
_package.completeInit(seqHelper, monomerLib, monomerSets, rdKitModule);
|
|
1256
1306
|
|
|
1257
|
-
//
|
|
1258
|
-
//
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1307
|
+
// NB! do not delete the code below. not used now but in future we might use hydrophobicity palette
|
|
1308
|
+
// const series = monomerLib!.getMonomerMolsByPolymerType('PEPTIDE')!;
|
|
1309
|
+
// Object.keys(series).forEach((symbol) => {
|
|
1310
|
+
// monomers.push(symbol);
|
|
1311
|
+
// const block = series[symbol].replaceAll('#R', 'O ');
|
|
1312
|
+
// const mol = rdKitModule.get_mol(block);
|
|
1313
|
+
// const logP = JSON.parse(mol.get_descriptors()).CrippenClogP;
|
|
1314
|
+
// logPs.push(logP);
|
|
1315
|
+
// mol?.delete();
|
|
1316
|
+
// });
|
|
1317
|
+
|
|
1318
|
+
// const sum = logPs.reduce((a, b) => a + b, 0);
|
|
1319
|
+
// const avg = (sum / logPs.length) || 0;
|
|
1263
1320
|
|
|
1264
|
-
//
|
|
1265
|
-
//
|
|
1266
|
-
//
|
|
1267
|
-
|
|
1268
|
-
//
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
)
|
|
1272
|
-
return getMolColumnFromHelm(df, helmCol, chiralityEngine, _package.monomerLib);
|
|
1321
|
+
// const palette: { [monomer: string]: string } = {};
|
|
1322
|
+
// for (let i = 0; i < monomers.length; i++)
|
|
1323
|
+
// palette[monomers[i]] = logPs[i] < avg ? '#4682B4' : '#DC143C';
|
|
1324
|
+
|
|
1325
|
+
// hydrophobPalette = new SeqPaletteCustom(palette);
|
|
1326
|
+
|
|
1327
|
+
_package.logger.debug(`${logPrefix}, end`);
|
|
1328
|
+
handleSequenceHeaderRendering();
|
|
1273
1329
|
}
|
|
1274
1330
|
|
|
1331
|
+
// -- Package settings editor --
|
|
1332
|
+
|
|
1333
|
+
// //name: packageSettingsEditor
|
|
1334
|
+
// //description: The database connection
|
|
1335
|
+
// //tags: packageSettingsEditor
|
|
1336
|
+
// //input: object propList
|
|
1337
|
+
// //output: widget result
|
|
1338
|
+
// export function packageSettingsEditor(propList: DG.Property[]): DG.Widget {
|
|
1339
|
+
// const widget = new PackageSettingsEditorWidget(propList);
|
|
1340
|
+
// widget.init().then(); // Ignore promise returned
|
|
1341
|
+
// return widget as DG.Widget;
|
|
1342
|
+
// }
|
|
1343
|
+
|
|
1275
1344
|
//name: test1
|
|
1276
1345
|
//output: object result
|
|
1277
1346
|
export function test1(): any {
|