@datagrok/bio 2.9.0 → 2.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -1
- package/dist/452.js +1 -1
- package/dist/452.js.map +1 -1
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/package.json +4 -4
- package/src/analysis/sequence-space.ts +5 -6
- package/src/apps/get-region-app.ts +56 -0
- package/src/apps/web-logo-app.ts +3 -6
- package/src/package-test.ts +1 -0
- package/src/package-types.ts +13 -0
- package/src/package.ts +152 -52
- package/src/tests/converters-test.ts +2 -3
- package/src/tests/scoring.ts +8 -4
- package/src/tests/units-handler-get-region.ts +116 -0
- package/src/utils/cell-renderer.ts +5 -32
- package/src/utils/context-menu.ts +2 -3
- package/src/utils/convert.ts +6 -7
- package/src/utils/get-region-func-editor.ts +241 -0
- package/src/utils/get-region.ts +65 -0
- package/src/utils/multiple-sequence-alignment-ui.ts +21 -17
- package/src/viewers/web-logo-viewer.ts +8 -6
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.
|
|
8
|
+
"version": "2.10.0",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
],
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@biowasm/aioli": "^3.1.0",
|
|
37
|
-
"@datagrok-libraries/bio": "^5.
|
|
37
|
+
"@datagrok-libraries/bio": "^5.38.0",
|
|
38
38
|
"@datagrok-libraries/chem-meta": "^1.0.1",
|
|
39
39
|
"@datagrok-libraries/ml": "^6.3.39",
|
|
40
40
|
"@datagrok-libraries/tutorials": "^1.3.6",
|
|
@@ -62,8 +62,8 @@
|
|
|
62
62
|
"webpack": "^5.76.3",
|
|
63
63
|
"webpack-bundle-analyzer": "latest",
|
|
64
64
|
"webpack-cli": "^4.9.1",
|
|
65
|
-
"@datagrok/chem": "1.
|
|
66
|
-
"@datagrok/helm": "2.1.
|
|
65
|
+
"@datagrok/chem": "1.7.2",
|
|
66
|
+
"@datagrok/helm": "2.1.16"
|
|
67
67
|
},
|
|
68
68
|
"scripts": {
|
|
69
69
|
"link-api": "npm link datagrok-api",
|
|
@@ -6,7 +6,6 @@ import {ISequenceSpaceParams} from '@datagrok-libraries/ml/src/viewers/activity-
|
|
|
6
6
|
import {invalidateMols, MONOMERIC_COL_TAGS} from '../substructure-search/substructure-search';
|
|
7
7
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
8
8
|
import * as grok from 'datagrok-api/grok';
|
|
9
|
-
import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
|
|
10
9
|
import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
11
10
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
12
11
|
|
|
@@ -57,17 +56,17 @@ export async function sequenceSpaceByFingerprints(spaceParams: ISequenceSpacePar
|
|
|
57
56
|
}
|
|
58
57
|
|
|
59
58
|
export async function getSequenceSpace(spaceParams: ISequenceSpaceParams): Promise<ISequenceSpaceResult> {
|
|
60
|
-
const
|
|
61
|
-
if (
|
|
59
|
+
const ncUH = UnitsHandler.getOrCreate(spaceParams.seqCol);
|
|
60
|
+
if (ncUH.isFasta() || (ncUH.isSeparator() && ncUH.alphabet && ncUH.alphabet !== ALPHABET.UN)) {
|
|
62
61
|
let distanceFName = MmDistanceFunctionsNames.LEVENSHTEIN;
|
|
63
62
|
let seqList = spaceParams.seqCol.toList();
|
|
64
|
-
if (
|
|
65
|
-
const fastaCol =
|
|
63
|
+
if (ncUH.isSeparator()) {
|
|
64
|
+
const fastaCol = ncUH.convert(NOTATION.FASTA);
|
|
66
65
|
seqList = fastaCol.toList();
|
|
67
66
|
const uh = UnitsHandler.getOrCreate(fastaCol);
|
|
68
67
|
distanceFName = uh.getDistanceFunctionName();
|
|
69
68
|
} else {
|
|
70
|
-
distanceFName =
|
|
69
|
+
distanceFName = ncUH.getDistanceFunctionName();
|
|
71
70
|
}
|
|
72
71
|
for (let i = 0; i < seqList.length; i++) {
|
|
73
72
|
// toList puts empty values in array and it causes downstream errors. replace with null
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
import {TAGS, positionSeparator} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
6
|
+
import {IWebLogoViewer} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
7
|
+
|
|
8
|
+
import {_package} from '../package';
|
|
9
|
+
|
|
10
|
+
const csv = `seq,value
|
|
11
|
+
ATCCGTCGT,0.5
|
|
12
|
+
TGTTCGTCA,0.4
|
|
13
|
+
ATGGTCGTA,0.7
|
|
14
|
+
ATCCGTGCA,0.1`;
|
|
15
|
+
|
|
16
|
+
const positionNames = ['1', '1A', '1C', '2', '4', '4A', '4B', '5', '6'].join(positionSeparator);
|
|
17
|
+
|
|
18
|
+
const regions = [
|
|
19
|
+
{name: 'first region', start: '1', end: '2'},
|
|
20
|
+
{name: 'second region', start: '1C', end: '4'},
|
|
21
|
+
{name: 'overlapping second', start: '1C', end: '4A'},
|
|
22
|
+
{name: 'whole sequence', start: '1', end: '6'},
|
|
23
|
+
{name: 'bad start', start: '0', end: '6'},
|
|
24
|
+
{name: 'bad end', start: '1', end: '4C'},
|
|
25
|
+
{name: 'bad start & end', start: '0', end: '4C'},
|
|
26
|
+
];
|
|
27
|
+
|
|
28
|
+
export class GetRegionApp {
|
|
29
|
+
df: DG.DataFrame;
|
|
30
|
+
view: DG.TableView;
|
|
31
|
+
|
|
32
|
+
constructor(
|
|
33
|
+
private readonly urlParams: URLSearchParams,
|
|
34
|
+
private readonly funcName: string
|
|
35
|
+
) {}
|
|
36
|
+
|
|
37
|
+
async init(): Promise<void> {
|
|
38
|
+
this.df = DG.DataFrame.fromCsv(csv);
|
|
39
|
+
const seqCol = this.df.getCol('seq');
|
|
40
|
+
seqCol.setTag(TAGS.positionNames, positionNames);
|
|
41
|
+
seqCol.setTag(TAGS.regions, JSON.stringify(regions));
|
|
42
|
+
|
|
43
|
+
await this.buildView();
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// -- View --
|
|
47
|
+
|
|
48
|
+
async buildView(): Promise<void> {
|
|
49
|
+
this.view = grok.shell.addTableView(this.df);
|
|
50
|
+
this.view.path = this.view.basePath = `func/${_package.name}.${this.funcName}`;
|
|
51
|
+
|
|
52
|
+
const viewer: DG.Viewer & IWebLogoViewer = (await this.view.dataFrame.plot
|
|
53
|
+
.fromType('WebLogo')) as DG.Viewer & IWebLogoViewer;
|
|
54
|
+
this.view.dockManager.dock(viewer, DG.DOCK_TYPE.DOWN, null, 'WebLogo', 0.35);
|
|
55
|
+
}
|
|
56
|
+
}
|
package/src/apps/web-logo-app.ts
CHANGED
|
@@ -11,15 +11,12 @@ import {PROPS as wlPROPS} from '../viewers/web-logo-viewer';
|
|
|
11
11
|
import {_package} from '../package';
|
|
12
12
|
|
|
13
13
|
export class WebLogoApp {
|
|
14
|
-
private _funcName: string = '';
|
|
15
|
-
|
|
16
14
|
df: DG.DataFrame;
|
|
17
15
|
view: DG.TableView;
|
|
18
16
|
|
|
19
|
-
constructor(private readonly urlParams: URLSearchParams) {}
|
|
17
|
+
constructor(private readonly urlParams: URLSearchParams, private readonly funcName: string) {}
|
|
20
18
|
|
|
21
|
-
async init(df: DG.DataFrame
|
|
22
|
-
this._funcName = funcName;
|
|
19
|
+
async init(df: DG.DataFrame): Promise<void> {
|
|
23
20
|
this.df = df;
|
|
24
21
|
|
|
25
22
|
await this.buildView();
|
|
@@ -33,7 +30,7 @@ export class WebLogoApp {
|
|
|
33
30
|
.toArray().join('&');
|
|
34
31
|
|
|
35
32
|
this.view = grok.shell.addTableView(this.df);
|
|
36
|
-
this.view.path = this.view.basePath = `func/${_package.name}.${this.
|
|
33
|
+
this.view.path = this.view.basePath = `func/${_package.name}.${this.funcName}?${urlParamsTxt}`;
|
|
37
34
|
|
|
38
35
|
const options: { [p: string]: any } = {sequenceColumnName: 'sequence'};
|
|
39
36
|
for (const [optName, optValue] of this.urlParams.entries()) {
|
package/src/package-test.ts
CHANGED
|
@@ -24,6 +24,7 @@ import './tests/pepsea-tests';
|
|
|
24
24
|
import './tests/viewers';
|
|
25
25
|
import './tests/units-handler-tests';
|
|
26
26
|
import './tests/units-handler-splitted-tests';
|
|
27
|
+
import './tests/units-handler-get-region';
|
|
27
28
|
import './tests/to-atomic-level-tests';
|
|
28
29
|
import './tests/mm-distance-tests';
|
|
29
30
|
import './tests/activity-cliffs-tests';
|
package/src/package-types.ts
CHANGED
|
@@ -57,4 +57,17 @@ export class BioPackage extends DG.Package {
|
|
|
57
57
|
public get properties(): BioPackageProperties { return this._properties; };
|
|
58
58
|
|
|
59
59
|
public set properties(value: BioPackageProperties) { this._properties = value; }
|
|
60
|
+
|
|
61
|
+
private _initialized: boolean = false;
|
|
62
|
+
|
|
63
|
+
public get initialized(): boolean { return this._initialized;}
|
|
64
|
+
|
|
65
|
+
public completeInit(): void { this._initialized = true;}
|
|
66
|
+
|
|
67
|
+
handleErrorUI(err: any) {
|
|
68
|
+
const errMsg = err instanceof Error ? err.message : err.toString();
|
|
69
|
+
const errStack = err instanceof Error ? err.stack : undefined;
|
|
70
|
+
grok.shell.error(errMsg);
|
|
71
|
+
this.logger.error(errMsg, undefined, errStack);
|
|
72
|
+
}
|
|
60
73
|
}
|
package/src/package.ts
CHANGED
|
@@ -55,7 +55,6 @@ import {checkInputColumnUI} from './utils/check-input-column';
|
|
|
55
55
|
import {multipleSequenceAlignmentUI} from './utils/multiple-sequence-alignment-ui';
|
|
56
56
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
57
57
|
import {BitArrayMetrics, BitArrayMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
58
|
-
import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
|
|
59
58
|
import {WebLogoApp} from './apps/web-logo-app';
|
|
60
59
|
import {SplitToMonomersFunctionEditor} from './function-edtiors/split-to-monomers-editor';
|
|
61
60
|
import {splitToMonomersUI} from './utils/split-to-monomers';
|
|
@@ -66,10 +65,13 @@ import {PackageSettingsEditorWidget} from './widgets/package-settings-editor-wid
|
|
|
66
65
|
import {getCompositionAnalysisWidget} from './widgets/composition-analysis-widget';
|
|
67
66
|
import {MacromoleculeColumnWidget} from './utils/macromolecule-column-widget';
|
|
68
67
|
import {addCopyMenuUI} from './utils/context-menu';
|
|
68
|
+
import {getRegionDo} from './utils/get-region';
|
|
69
|
+
import {GetRegionApp} from './apps/get-region-app';
|
|
70
|
+
import {GetRegionFuncEditor} from './utils/get-region-func-editor';
|
|
69
71
|
|
|
70
72
|
export const _package = new BioPackage();
|
|
71
73
|
|
|
72
|
-
// /** Avoid
|
|
74
|
+
// /** Avoid reassigning {@link monomerLib} because consumers subscribe to {@link IMonomerLib.onChanged} event */
|
|
73
75
|
// let monomerLib: MonomerLib | null = null;
|
|
74
76
|
|
|
75
77
|
//name: getMonomerLibHelper
|
|
@@ -104,7 +106,9 @@ export async function initBio() {
|
|
|
104
106
|
const bioPkgProps = new BioPackageProperties(pkgProps);
|
|
105
107
|
_package.properties = bioPkgProps;
|
|
106
108
|
})(),
|
|
107
|
-
])
|
|
109
|
+
]).finally(() => {
|
|
110
|
+
_package.completeInit();
|
|
111
|
+
});
|
|
108
112
|
|
|
109
113
|
const monomerLib = MonomerLibHelper.instance.getBioLib();
|
|
110
114
|
const monomers: string[] = [];
|
|
@@ -150,14 +154,96 @@ export function getBioLib(): IMonomerLib {
|
|
|
150
154
|
return MonomerLibHelper.instance.getBioLib();
|
|
151
155
|
}
|
|
152
156
|
|
|
157
|
+
// -- Panels --
|
|
158
|
+
|
|
159
|
+
//name: Get Region
|
|
160
|
+
//description: Creates a new column with sequences of the region between start and end
|
|
161
|
+
//tags: panel
|
|
162
|
+
//input: column seqCol {semType: Macromolecule}
|
|
163
|
+
//output: widget result
|
|
164
|
+
export function getRegionPanel(seqCol: DG.Column<string>): DG.Widget {
|
|
165
|
+
// const host = ui.divV([
|
|
166
|
+
// ui.inputs([
|
|
167
|
+
// ui.stringInput('Region', ''),
|
|
168
|
+
// ]),
|
|
169
|
+
// ui.button('Ok', () => {})
|
|
170
|
+
// ]);
|
|
171
|
+
// return DG.Widget.fromRoot(host);
|
|
172
|
+
const funcName: string = 'getRegionTopMenu';
|
|
173
|
+
const funcList = DG.Func.find({package: _package.name, name: funcName});
|
|
174
|
+
if (funcList.length !== 1) throw new Error(`Package '${_package.name}' func '${funcName}' not found`);
|
|
175
|
+
const func = funcList[0];
|
|
176
|
+
const funcCall = func.prepare({table: seqCol.dataFrame, sequence: seqCol});
|
|
177
|
+
const funcEditor = new GetRegionFuncEditor(funcCall);
|
|
178
|
+
return funcEditor.widget();
|
|
179
|
+
}
|
|
180
|
+
|
|
153
181
|
//name: Manage Libraries
|
|
154
|
-
//
|
|
182
|
+
//description:
|
|
155
183
|
//tags: panel, exclude-actions-panel
|
|
184
|
+
//input: column seqColumn {semType: Macromolecule}
|
|
156
185
|
//output: widget result
|
|
157
186
|
export async function libraryPanel(_seqColumn: DG.Column): Promise<DG.Widget> {
|
|
158
187
|
return getLibraryPanelUI();
|
|
159
188
|
}
|
|
160
189
|
|
|
190
|
+
// -- Func Editors --
|
|
191
|
+
|
|
192
|
+
//name: GetRegionEditor
|
|
193
|
+
//tags: editor
|
|
194
|
+
//input: funccall call
|
|
195
|
+
export function GetRegionEditor(call: DG.FuncCall): void {
|
|
196
|
+
try {
|
|
197
|
+
const funcEditor = new GetRegionFuncEditor(call);
|
|
198
|
+
funcEditor.dialog();
|
|
199
|
+
} catch (err: any) {
|
|
200
|
+
const errMsg = err instanceof Error ? err.message : err.toString();
|
|
201
|
+
const errStack = err instanceof Error ? err.stack : undefined;
|
|
202
|
+
grok.shell.error(`Get region editor error: ${errMsg}`);
|
|
203
|
+
_package.logger.error(errMsg, undefined, errStack);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
//name: SplitToMonomersEditor
|
|
208
|
+
//tags: editor
|
|
209
|
+
//input: funccall call
|
|
210
|
+
export function SplitToMonomersEditor(call: DG.FuncCall): void {
|
|
211
|
+
const funcEditor = new SplitToMonomersFunctionEditor();
|
|
212
|
+
ui.dialog({title: 'Split to Monomers'})
|
|
213
|
+
.add(funcEditor.paramsUI)
|
|
214
|
+
.onOK(async () => {
|
|
215
|
+
return call.func.prepare(funcEditor.funcParams).call(true);
|
|
216
|
+
})
|
|
217
|
+
.show();
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
//name: SequenceSpaceEditor
|
|
221
|
+
//tags: editor
|
|
222
|
+
//input: funccall call
|
|
223
|
+
export function SequenceSpaceEditor(call: DG.FuncCall) {
|
|
224
|
+
const funcEditor = new SequenceSpaceFunctionEditor(DG.SEMTYPE.MACROMOLECULE);
|
|
225
|
+
ui.dialog({title: 'Sequence Space'})
|
|
226
|
+
.add(funcEditor.paramsUI)
|
|
227
|
+
.onOK(async () => {
|
|
228
|
+
return call.func.prepare(funcEditor.funcParams).call(true);
|
|
229
|
+
})
|
|
230
|
+
.show();
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
//name: SeqActivityCliffsEditor
|
|
234
|
+
//tags: editor
|
|
235
|
+
//input: funccall call
|
|
236
|
+
export function SeqActivityCliffsEditor(call: DG.FuncCall) {
|
|
237
|
+
const funcEditor = new ActivityCliffsFunctionEditor(DG.SEMTYPE.MACROMOLECULE);
|
|
238
|
+
ui.dialog({title: 'Activity Cliffs'})
|
|
239
|
+
.add(funcEditor.paramsUI)
|
|
240
|
+
.onOK(async () => {
|
|
241
|
+
return call.func.prepare(funcEditor.funcParams).call(true);
|
|
242
|
+
})
|
|
243
|
+
.show();
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
|
|
161
247
|
// -- Package settings editor --
|
|
162
248
|
|
|
163
249
|
//name: packageSettingsEditor
|
|
@@ -182,6 +268,8 @@ export function fastaSequenceCellRenderer(): MacromoleculeSequenceCellRenderer {
|
|
|
182
268
|
return new MacromoleculeSequenceCellRenderer();
|
|
183
269
|
}
|
|
184
270
|
|
|
271
|
+
// -- Property panels --
|
|
272
|
+
|
|
185
273
|
//name: Sequence Renderer
|
|
186
274
|
//input: column molColumn {semType: Macromolecule}
|
|
187
275
|
//tags: panel
|
|
@@ -251,17 +339,39 @@ export function vdRegionsViewer() {
|
|
|
251
339
|
return new VdRegionsViewer();
|
|
252
340
|
}
|
|
253
341
|
|
|
254
|
-
|
|
255
|
-
//
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
342
|
+
|
|
343
|
+
// -- Top menu --
|
|
344
|
+
|
|
345
|
+
//name: getRegion
|
|
346
|
+
//description: Gets a new column with sequences of the region between start and end
|
|
347
|
+
//input: column sequence
|
|
348
|
+
//input: string start {optional: true}
|
|
349
|
+
//input: string end {optional: true}
|
|
350
|
+
//input: string name {optional: true} [Name of the column to be created]
|
|
351
|
+
//output: column result
|
|
352
|
+
export function getRegion(
|
|
353
|
+
sequence: DG.Column<string>, start: string | undefined, end: string | undefined, name: string | undefined
|
|
354
|
+
): DG.Column<string> {
|
|
355
|
+
return getRegionDo(sequence,
|
|
356
|
+
start ?? null, end ?? null, name ?? null);
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
//top-menu: Bio | Convert | Get Region...
|
|
360
|
+
//name: Get Region
|
|
361
|
+
//description: Get sequences for a region specified from a Macromolecule
|
|
362
|
+
//input: dataframe table [Input data table]
|
|
363
|
+
//input: column sequence {semType: Macromolecule} [Sequence column]
|
|
364
|
+
//input: string start {optional: true} [Region start position name]
|
|
365
|
+
//input: string end {optional: true} [Region end position name]
|
|
366
|
+
//input: string name {optional: true} [Region column name]
|
|
367
|
+
//editor: Bio:GetRegionEditor
|
|
368
|
+
export function getRegionTopMenu(
|
|
369
|
+
table: DG.DataFrame, sequence: DG.Column,
|
|
370
|
+
start: string | undefined, end: string | undefined, name: string | undefined
|
|
371
|
+
): void {
|
|
372
|
+
const regCol = getRegionDo(sequence, start ?? null, end ?? null, name ?? null);
|
|
373
|
+
sequence.dataFrame.columns.add(regCol);
|
|
374
|
+
regCol.setTag(DG.TAGS.CELL_RENDERER, 'sequence');
|
|
265
375
|
}
|
|
266
376
|
|
|
267
377
|
//top-menu: Bio | Analyze | Activity Cliffs...
|
|
@@ -287,14 +397,14 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
287
397
|
'separator': macroMolecule.getTag(bioTAGS.separator),
|
|
288
398
|
'alphabet': macroMolecule.getTag(bioTAGS.alphabet),
|
|
289
399
|
};
|
|
290
|
-
const
|
|
400
|
+
const ncUH = UnitsHandler.getOrCreate(macroMolecule);
|
|
291
401
|
let columnDistanceMetric: BitArrayMetrics | MmDistanceFunctionsNames = BitArrayMetricsNames.Tanimoto;
|
|
292
402
|
let seqCol = macroMolecule;
|
|
293
|
-
if (
|
|
294
|
-
if (
|
|
295
|
-
columnDistanceMetric =
|
|
403
|
+
if (ncUH.isFasta() || (ncUH.isSeparator() && ncUH.alphabet && ncUH.alphabet !== ALPHABET.UN)) {
|
|
404
|
+
if (ncUH.isFasta()) {
|
|
405
|
+
columnDistanceMetric = ncUH.getDistanceFunctionName();
|
|
296
406
|
} else {
|
|
297
|
-
seqCol =
|
|
407
|
+
seqCol = ncUH.convert(NOTATION.FASTA);
|
|
298
408
|
const uh = UnitsHandler.getOrCreate(seqCol);
|
|
299
409
|
columnDistanceMetric = uh.getDistanceFunctionName();
|
|
300
410
|
tags.units = NOTATION.FASTA;
|
|
@@ -345,19 +455,6 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
345
455
|
}
|
|
346
456
|
}
|
|
347
457
|
|
|
348
|
-
//name: SequenceSpaceEditor
|
|
349
|
-
//tags: editor
|
|
350
|
-
//input: funccall call
|
|
351
|
-
export function SequenceSpaceEditor(call: DG.FuncCall) {
|
|
352
|
-
const funcEditor = new SequenceSpaceFunctionEditor(DG.SEMTYPE.MACROMOLECULE);
|
|
353
|
-
ui.dialog({title: 'Sequence Space'})
|
|
354
|
-
.add(funcEditor.paramsUI)
|
|
355
|
-
.onOK(async () => {
|
|
356
|
-
return call.func.prepare(funcEditor.funcParams).call(true);
|
|
357
|
-
})
|
|
358
|
-
.show();
|
|
359
|
-
}
|
|
360
|
-
|
|
361
458
|
//top-menu: Bio | Analyze | Sequence Space...
|
|
362
459
|
//name: Sequence Space
|
|
363
460
|
//description: Creates 2D sequence space with projected sequences by pairwise distance
|
|
@@ -667,19 +764,6 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
|
|
|
667
764
|
return resDf;
|
|
668
765
|
}
|
|
669
766
|
|
|
670
|
-
//name: SplitToMonomersEditor
|
|
671
|
-
//tags: editor
|
|
672
|
-
//input: funccall call
|
|
673
|
-
export function SplitToMonomersEditor(call: DG.FuncCall): void {
|
|
674
|
-
const funcEditor = new SplitToMonomersFunctionEditor();
|
|
675
|
-
ui.dialog({title: 'Split to Monomers'})
|
|
676
|
-
.add(funcEditor.paramsUI)
|
|
677
|
-
.onOK(async () => {
|
|
678
|
-
return call.func.prepare(funcEditor.funcParams).call(true);
|
|
679
|
-
})
|
|
680
|
-
.show();
|
|
681
|
-
}
|
|
682
|
-
|
|
683
767
|
//top-menu: Bio | Convert | Split to Monomers...
|
|
684
768
|
//name: Split to Monomers
|
|
685
769
|
//input: dataframe table
|
|
@@ -759,26 +843,30 @@ export function SubsequenceSearchTopMenu(macromolecules: DG.Column): void {
|
|
|
759
843
|
grok.shell.tv.grid.scrollToCell(macromolecules, 0);
|
|
760
844
|
}
|
|
761
845
|
|
|
762
|
-
//top-menu: Bio |
|
|
846
|
+
//top-menu: Bio | Calculate | Identity...
|
|
763
847
|
//name: Identity Scoring
|
|
764
848
|
//description: Adds a column with fraction of matching monomers
|
|
765
849
|
//input: dataframe table [Table containing Macromolecule column]
|
|
766
850
|
//input: column macromolecules {semType: Macromolecule} [Sequences to score]
|
|
767
851
|
//input: string reference [Sequence, matching column format]
|
|
768
852
|
//output: column scores
|
|
769
|
-
export async function sequenceIdentityScoring(
|
|
853
|
+
export async function sequenceIdentityScoring(
|
|
854
|
+
table: DG.DataFrame, macromolecule: DG.Column, reference: string
|
|
855
|
+
): Promise<DG.Column<number>> {
|
|
770
856
|
const scores = calculateScores(table, macromolecule, reference, SCORE.IDENTITY);
|
|
771
857
|
return scores;
|
|
772
858
|
}
|
|
773
859
|
|
|
774
|
-
//top-menu: Bio |
|
|
860
|
+
//top-menu: Bio | Calculate | Similarity...
|
|
775
861
|
//name: Similarity Scoring
|
|
776
862
|
//description: Adds a column with similarity scores, calculated as sum of monomer fingerprint similarities
|
|
777
863
|
//input: dataframe table [Table containing Macromolecule column]
|
|
778
864
|
//input: column macromolecules {semType: Macromolecule} [Sequences to score]
|
|
779
865
|
//input: string reference [Sequence, matching column format]
|
|
780
866
|
//output: column scores
|
|
781
|
-
export async function sequenceSimilarityScoring(
|
|
867
|
+
export async function sequenceSimilarityScoring(
|
|
868
|
+
table: DG.DataFrame, macromolecule: DG.Column, reference: string
|
|
869
|
+
): Promise<DG.Column<number>> {
|
|
782
870
|
const scores = calculateScores(table, macromolecule, reference, SCORE.SIMILARITY);
|
|
783
871
|
return scores;
|
|
784
872
|
}
|
|
@@ -806,10 +894,22 @@ export async function webLogoLargeApp(): Promise<void> {
|
|
|
806
894
|
const pi = DG.TaskBarProgressIndicator.create('WebLogo');
|
|
807
895
|
try {
|
|
808
896
|
const urlParams = new URLSearchParams(window.location.search);
|
|
809
|
-
const app = new WebLogoApp(urlParams);
|
|
897
|
+
const app = new WebLogoApp(urlParams, 'webLogoLargeApp');
|
|
810
898
|
const df: DG.DataFrame = await _package.files.readCsv('data/sample_PT_100000x5.csv');
|
|
811
899
|
await grok.data.detectSemanticTypes(df);
|
|
812
|
-
await app.init(df
|
|
900
|
+
await app.init(df);
|
|
901
|
+
} finally {
|
|
902
|
+
pi.close();
|
|
903
|
+
}
|
|
904
|
+
}
|
|
905
|
+
|
|
906
|
+
//name: getRegionApp
|
|
907
|
+
export async function getRegionApp(): Promise<void> {
|
|
908
|
+
const pi = DG.TaskBarProgressIndicator.create('getRegion ...');
|
|
909
|
+
try {
|
|
910
|
+
const urlParams = new URLSearchParams(window.location.search);
|
|
911
|
+
const app = new GetRegionApp(urlParams, 'getRegionApp');
|
|
912
|
+
await app.init();
|
|
813
913
|
} finally {
|
|
814
914
|
pi.close();
|
|
815
915
|
}
|
|
@@ -5,7 +5,6 @@ import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src
|
|
|
5
5
|
|
|
6
6
|
import {ConverterFunc} from './types';
|
|
7
7
|
import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
|
-
import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
|
|
9
8
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
10
9
|
|
|
11
10
|
// import {mmSemType} from '../const';
|
|
@@ -134,8 +133,8 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$$`,
|
|
|
134
133
|
throw new Error(`Argument 'separator' is mandatory for target notation '${tgtNotation.toString()}'.`);
|
|
135
134
|
|
|
136
135
|
return function(srcCol: DG.Column): DG.Column {
|
|
137
|
-
const
|
|
138
|
-
const resCol =
|
|
136
|
+
const converterUH = UnitsHandler.getOrCreate(srcCol);
|
|
137
|
+
const resCol = converterUH.convert(tgtNotation, tgtSeparator);
|
|
139
138
|
expect(resCol.getTag(DG.TAGS.UNITS), tgtNotation);
|
|
140
139
|
return resCol;
|
|
141
140
|
};
|
package/src/tests/scoring.ts
CHANGED
|
@@ -26,13 +26,17 @@ category('Scoring', () => {
|
|
|
26
26
|
|
|
27
27
|
test('Identity', async () => {
|
|
28
28
|
const scoresCol = await sequenceIdentityScoring(table, seqCol, reference);
|
|
29
|
-
for (let i = 0; i < scoresCol.length; i++)
|
|
30
|
-
expectFloat(scoresCol.get(i)!, table.get(expectedIdentity, i), 0.01,
|
|
29
|
+
for (let i = 0; i < scoresCol.length; i++) {
|
|
30
|
+
expectFloat(scoresCol.get(i)!, table.get(expectedIdentity, i), 0.01,
|
|
31
|
+
`Wrong identity score for sequence at position ${i}`);
|
|
32
|
+
}
|
|
31
33
|
});
|
|
32
34
|
|
|
33
35
|
test('Similarity', async () => {
|
|
34
36
|
const scoresCol = await sequenceSimilarityScoring(table, seqCol, reference);
|
|
35
|
-
for (let i = 0; i < scoresCol.length; i++)
|
|
36
|
-
expectFloat(scoresCol.get(i)!, table.get(expectedSimilarity, i), 0.01,
|
|
37
|
+
for (let i = 0; i < scoresCol.length; i++) {
|
|
38
|
+
expectFloat(scoresCol.get(i)!, table.get(expectedSimilarity, i), 0.01,
|
|
39
|
+
`Wrong similarity score for sequence at position ${i}`);
|
|
40
|
+
}
|
|
37
41
|
});
|
|
38
42
|
});
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
|
|
4
|
+
import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
5
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
6
|
+
import {ALPHABET, NOTATION, TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
7
|
+
|
|
8
|
+
category('UnitsHandler: getRegion', () => {
|
|
9
|
+
const data: {
|
|
10
|
+
[testName: string]: {
|
|
11
|
+
srcCsv: string,
|
|
12
|
+
startIdx: number | null,
|
|
13
|
+
endIdx: number | null,
|
|
14
|
+
tgtCsv: string,
|
|
15
|
+
units: NOTATION,
|
|
16
|
+
alphabet: ALPHABET | null, /* alphabet is not applicable for units 'helm' */
|
|
17
|
+
|
|
18
|
+
positionNames?: { tag: string | null, start: string | null, end: string | null }
|
|
19
|
+
}
|
|
20
|
+
} = {
|
|
21
|
+
'fastaDna': {
|
|
22
|
+
srcCsv: `seq
|
|
23
|
+
ATTCGT
|
|
24
|
+
ACTGCTC
|
|
25
|
+
ATTCCGTA`,
|
|
26
|
+
startIdx: 2,
|
|
27
|
+
endIdx: 4,
|
|
28
|
+
tgtCsv: `seq
|
|
29
|
+
TCG
|
|
30
|
+
TGC
|
|
31
|
+
TCC`,
|
|
32
|
+
units: NOTATION.FASTA,
|
|
33
|
+
alphabet: ALPHABET.DNA,
|
|
34
|
+
|
|
35
|
+
positionNames: {tag: 'a, b, c, d, e, f, g, h', start: 'c', end: 'e'},
|
|
36
|
+
},
|
|
37
|
+
'separatorPt': {
|
|
38
|
+
srcCsv: `seq
|
|
39
|
+
M-D-Y-K-E-T-L
|
|
40
|
+
M-I-E-V-F-L-F-G-I
|
|
41
|
+
M-M-`,
|
|
42
|
+
startIdx: 5,
|
|
43
|
+
endIdx: null,
|
|
44
|
+
tgtCsv: `seq
|
|
45
|
+
T-L--
|
|
46
|
+
L-F-G-I
|
|
47
|
+
---`,
|
|
48
|
+
units: NOTATION.SEPARATOR,
|
|
49
|
+
alphabet: ALPHABET.PT,
|
|
50
|
+
|
|
51
|
+
positionNames: {tag: '1, 1A, 1B, 2, 3, 4, 4A, 4A, 4C', start: '4', end: null},
|
|
52
|
+
},
|
|
53
|
+
'helm': {
|
|
54
|
+
srcCsv: `seq
|
|
55
|
+
PEPTIDE1{[meI].[hHis].[Aca].N.T.[dE].[Thr_PO3H2].[Aca].[D-Tyr_Et].[Tyr_ab-dehydroMe].[dV].E.N.[D-Orn]}$$$$
|
|
56
|
+
PEPTIDE1{[meI].[hHis].[Aca].[Cys_SEt].T.[dK].[Thr_PO3H2].[Aca].[Tyr_PO3H2].[D-Chg].[dV].[Phe_ab-dehydro]}$$$$
|
|
57
|
+
PEPTIDE1{[Lys_Boc].[hHis].[Aca].[Cys_SEt].T}$$$$`,
|
|
58
|
+
startIdx: 3,
|
|
59
|
+
endIdx: 6,
|
|
60
|
+
tgtCsv: `seq
|
|
61
|
+
PEPTIDE1{N.T.[dE].[Thr_PO3H2]}$$$$
|
|
62
|
+
PEPTIDE1{[Cys_SEt].T.[dK].[Thr_PO3H2]}$$$$
|
|
63
|
+
PEPTIDE1{[Cys_SEt].T.*.*}$$$$`,
|
|
64
|
+
units: NOTATION.HELM,
|
|
65
|
+
alphabet: null,
|
|
66
|
+
|
|
67
|
+
positionNames: {tag: null, start: '4', end: '7'}
|
|
68
|
+
}
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
for (const [testName, testData] of Object.entries(data)) {
|
|
72
|
+
test(`${testName}-idx`, async () => {
|
|
73
|
+
const srcDf = DG.DataFrame.fromCsv(testData.srcCsv);
|
|
74
|
+
const srcSeqCol = srcDf.getCol('seq');
|
|
75
|
+
|
|
76
|
+
const semType: string | null = await grok.functions.call('Bio:detectMacromolecule', {col: srcSeqCol});
|
|
77
|
+
if (semType) srcSeqCol.semType = semType;
|
|
78
|
+
|
|
79
|
+
const srcUh = UnitsHandler.getOrCreate(srcSeqCol);
|
|
80
|
+
const resSeqCol = srcUh.getRegion(testData.startIdx, testData.endIdx, 'regSeq');
|
|
81
|
+
|
|
82
|
+
const tgtDf = DG.DataFrame.fromCsv(testData.tgtCsv);
|
|
83
|
+
const tgtSeqCol = tgtDf.getCol('seq');
|
|
84
|
+
|
|
85
|
+
expect(srcSeqCol.getTag(DG.TAGS.UNITS), testData.units);
|
|
86
|
+
expect(resSeqCol.getTag(DG.TAGS.UNITS), testData.units);
|
|
87
|
+
expect(srcSeqCol.getTag(TAGS.alphabet), testData.alphabet);
|
|
88
|
+
expect(resSeqCol.getTag(TAGS.alphabet), testData.alphabet);
|
|
89
|
+
expectArray(resSeqCol.toList(), tgtSeqCol.toList());
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
if (testData.positionNames) {
|
|
93
|
+
test(`${testName}-positionNames`, async () => {
|
|
94
|
+
const srcDf = DG.DataFrame.fromCsv(testData.srcCsv);
|
|
95
|
+
const srcSeqCol = srcDf.getCol('seq');
|
|
96
|
+
if (testData.positionNames!.tag)
|
|
97
|
+
srcSeqCol.setTag(TAGS.positionNames, testData.positionNames!.tag);
|
|
98
|
+
|
|
99
|
+
const semType: string | null = await grok.functions.call('Bio:detectMacromolecule', {col: srcSeqCol});
|
|
100
|
+
if (semType) srcSeqCol.semType = semType;
|
|
101
|
+
|
|
102
|
+
const resSeqCol = await grok.functions.call('Bio:getRegion',
|
|
103
|
+
{sequence: srcSeqCol, start: testData.positionNames!.start, end: testData.positionNames!.end});
|
|
104
|
+
|
|
105
|
+
const tgtDf = DG.DataFrame.fromCsv(testData.tgtCsv);
|
|
106
|
+
const tgtSeqCol = tgtDf.getCol('seq');
|
|
107
|
+
|
|
108
|
+
expect(srcSeqCol.getTag(DG.TAGS.UNITS), testData.units);
|
|
109
|
+
expect(resSeqCol.getTag(DG.TAGS.UNITS), testData.units);
|
|
110
|
+
expect(srcSeqCol.getTag(TAGS.alphabet), testData.alphabet);
|
|
111
|
+
expect(resSeqCol.getTag(TAGS.alphabet), testData.alphabet);
|
|
112
|
+
expectArray(resSeqCol.toList(), tgtSeqCol.toList());
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
});
|