@datagrok/bio 2.9.0 → 2.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +26 -1
- package/detectors.js +5 -3
- package/dist/452.js +1 -1
- package/dist/452.js.map +1 -1
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/files/data/sample_HELM_empty_unkn.csv +11 -0
- package/package.json +4 -4
- package/src/analysis/sequence-space.ts +5 -6
- package/src/apps/get-region-app.ts +65 -0
- package/src/apps/web-logo-app.ts +3 -6
- package/src/package-test.ts +1 -0
- package/src/package-types.ts +13 -0
- package/src/package.ts +165 -59
- package/src/tests/converters-test.ts +2 -3
- package/src/tests/detectors-tests.ts +13 -2
- package/src/tests/renderers-monomer-placer.ts +25 -12
- package/src/tests/scoring.ts +8 -4
- package/src/tests/units-handler-get-region.ts +116 -0
- package/src/utils/cell-renderer.ts +5 -32
- package/src/utils/context-menu.ts +2 -3
- package/src/utils/convert.ts +6 -7
- package/src/utils/get-region-func-editor.ts +261 -0
- package/src/utils/get-region.ts +65 -0
- package/src/utils/multiple-sequence-alignment-ui.ts +21 -17
- package/src/viewers/web-logo-viewer.ts +73 -44
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
HELM,Activity
|
|
2
|
+
,5.30751
|
|
3
|
+
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.D-Chg.dV.Phe_ab-dehydro.N.D-Orn.D-aThr.Phe_4Me}$$$$,5.72388
|
|
4
|
+
,5.18581
|
|
5
|
+
PEPTIDE1{meI.hHis.unkn1.Cys_SEt.unkn2.dK.Thr_PO3H2.Aca.Tyr_PO3H2.D-Chg.dV.Thr_PO3H2.N.D-Orn.D-aThr.Phe_4Me}$$$$,6.22350
|
|
6
|
+
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_ab-dehydroMe.dV.Chg.N.D-Orn.D-aThr.Phe_4Me}$$$$,3.84591
|
|
7
|
+
PEPTIDE1{meI.hHis.Aca.N.unkn3.dK.Thr_PO3H2.Aca.D-Tyr_Et.Tyr_Bn.dV.E.N.dV.Phe_4Me}$$$$,3.27920
|
|
8
|
+
,2.10585
|
|
9
|
+
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.D-Tyr_Et.meQ.dV.E.N.dV.Phe_4Me}$$$$,1.80369
|
|
10
|
+
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.Oic_3aS-7aS.D-Chg.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$,6.38806
|
|
11
|
+
PEPTIDE1{meI.hHis.Aca.N.T.dK.Thr_PO3H2.Aca.meM.D-Chg.dV.E.N.D-Orn.D-aThr.Phe_4Me}$$$$,4.44165
|
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.
|
|
8
|
+
"version": "2.10.2",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
],
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@biowasm/aioli": "^3.1.0",
|
|
37
|
-
"@datagrok-libraries/bio": "^5.
|
|
37
|
+
"@datagrok-libraries/bio": "^5.38.2",
|
|
38
38
|
"@datagrok-libraries/chem-meta": "^1.0.1",
|
|
39
39
|
"@datagrok-libraries/ml": "^6.3.39",
|
|
40
40
|
"@datagrok-libraries/tutorials": "^1.3.6",
|
|
@@ -62,8 +62,8 @@
|
|
|
62
62
|
"webpack": "^5.76.3",
|
|
63
63
|
"webpack-bundle-analyzer": "latest",
|
|
64
64
|
"webpack-cli": "^4.9.1",
|
|
65
|
-
"@datagrok/chem": "1.
|
|
66
|
-
"@datagrok/helm": "2.1.
|
|
65
|
+
"@datagrok/chem": "1.7.2",
|
|
66
|
+
"@datagrok/helm": "2.1.16"
|
|
67
67
|
},
|
|
68
68
|
"scripts": {
|
|
69
69
|
"link-api": "npm link datagrok-api",
|
|
@@ -6,7 +6,6 @@ import {ISequenceSpaceParams} from '@datagrok-libraries/ml/src/viewers/activity-
|
|
|
6
6
|
import {invalidateMols, MONOMERIC_COL_TAGS} from '../substructure-search/substructure-search';
|
|
7
7
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
8
8
|
import * as grok from 'datagrok-api/grok';
|
|
9
|
-
import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
|
|
10
9
|
import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
11
10
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
12
11
|
|
|
@@ -57,17 +56,17 @@ export async function sequenceSpaceByFingerprints(spaceParams: ISequenceSpacePar
|
|
|
57
56
|
}
|
|
58
57
|
|
|
59
58
|
export async function getSequenceSpace(spaceParams: ISequenceSpaceParams): Promise<ISequenceSpaceResult> {
|
|
60
|
-
const
|
|
61
|
-
if (
|
|
59
|
+
const ncUH = UnitsHandler.getOrCreate(spaceParams.seqCol);
|
|
60
|
+
if (ncUH.isFasta() || (ncUH.isSeparator() && ncUH.alphabet && ncUH.alphabet !== ALPHABET.UN)) {
|
|
62
61
|
let distanceFName = MmDistanceFunctionsNames.LEVENSHTEIN;
|
|
63
62
|
let seqList = spaceParams.seqCol.toList();
|
|
64
|
-
if (
|
|
65
|
-
const fastaCol =
|
|
63
|
+
if (ncUH.isSeparator()) {
|
|
64
|
+
const fastaCol = ncUH.convert(NOTATION.FASTA);
|
|
66
65
|
seqList = fastaCol.toList();
|
|
67
66
|
const uh = UnitsHandler.getOrCreate(fastaCol);
|
|
68
67
|
distanceFName = uh.getDistanceFunctionName();
|
|
69
68
|
} else {
|
|
70
|
-
distanceFName =
|
|
69
|
+
distanceFName = ncUH.getDistanceFunctionName();
|
|
71
70
|
}
|
|
72
71
|
for (let i = 0; i < seqList.length; i++) {
|
|
73
72
|
// toList puts empty values in array and it causes downstream errors. replace with null
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
import {TAGS, positionSeparator} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
6
|
+
import {IWebLogoViewer} from '@datagrok-libraries/bio/src/viewers/web-logo';
|
|
7
|
+
|
|
8
|
+
import {_package} from '../package';
|
|
9
|
+
|
|
10
|
+
const defaultData: GetRegionAppData = {
|
|
11
|
+
df: DG.DataFrame.fromCsv(`seq,value
|
|
12
|
+
ATCCGTCGT,0.5
|
|
13
|
+
TGTTCGTCA,0.4
|
|
14
|
+
ATGGTCGTA,0.7
|
|
15
|
+
ATCCGTGCA,0.1`),
|
|
16
|
+
colName: 'seq',
|
|
17
|
+
positionNames: ['1', '1A', '1C', '2', '4', '4A', '4B', '5', '6'].join(positionSeparator),
|
|
18
|
+
regions: [
|
|
19
|
+
{name: 'first region', start: '1', end: '2'},
|
|
20
|
+
{name: 'second region', start: '1C', end: '4'},
|
|
21
|
+
{name: 'overlapping second', start: '1C', end: '4A'},
|
|
22
|
+
{name: 'whole sequence', start: '1', end: '6'},
|
|
23
|
+
{name: 'bad start', start: '0', end: '6'},
|
|
24
|
+
{name: 'bad end', start: '1', end: '4C'},
|
|
25
|
+
{name: 'bad start & end', start: '0', end: '4C'},
|
|
26
|
+
]
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
export type GetRegionAppData = {
|
|
30
|
+
df: DG.DataFrame, colName: string,
|
|
31
|
+
positionNames?: string, regions?: { name: string, start: string, end: string }[]
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
export class GetRegionApp {
|
|
35
|
+
view: DG.TableView;
|
|
36
|
+
data!: GetRegionAppData;
|
|
37
|
+
|
|
38
|
+
constructor(
|
|
39
|
+
private readonly urlParams: URLSearchParams,
|
|
40
|
+
private readonly funcName: string
|
|
41
|
+
) {}
|
|
42
|
+
|
|
43
|
+
async init(data?: GetRegionAppData): Promise<void> {
|
|
44
|
+
this.data = data ?? defaultData;
|
|
45
|
+
const seqCol = this.data.df.getCol(this.data.colName);
|
|
46
|
+
if (!!this.data.positionNames) seqCol.setTag(TAGS.positionNames, this.data.positionNames);
|
|
47
|
+
if (!!this.data.regions) seqCol.setTag(TAGS.regions, JSON.stringify(this.data.regions));
|
|
48
|
+
|
|
49
|
+
await this.buildView();
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// -- View --
|
|
53
|
+
|
|
54
|
+
async buildView(): Promise<void> {
|
|
55
|
+
// To allow showing a WebLogoViewer
|
|
56
|
+
await grok.data.detectSemanticTypes(this.data.df);
|
|
57
|
+
|
|
58
|
+
this.view = grok.shell.addTableView(this.data.df);
|
|
59
|
+
this.view.path = this.view.basePath = `func/${_package.name}.${this.funcName}`;
|
|
60
|
+
|
|
61
|
+
const viewer: DG.Viewer & IWebLogoViewer = (await this.view.dataFrame.plot
|
|
62
|
+
.fromType('WebLogo', {sequenceColumnName: this.data.colName})) as DG.Viewer & IWebLogoViewer;
|
|
63
|
+
this.view.dockManager.dock(viewer, DG.DOCK_TYPE.DOWN, null, 'WebLogo', 0.35);
|
|
64
|
+
}
|
|
65
|
+
}
|
package/src/apps/web-logo-app.ts
CHANGED
|
@@ -11,15 +11,12 @@ import {PROPS as wlPROPS} from '../viewers/web-logo-viewer';
|
|
|
11
11
|
import {_package} from '../package';
|
|
12
12
|
|
|
13
13
|
export class WebLogoApp {
|
|
14
|
-
private _funcName: string = '';
|
|
15
|
-
|
|
16
14
|
df: DG.DataFrame;
|
|
17
15
|
view: DG.TableView;
|
|
18
16
|
|
|
19
|
-
constructor(private readonly urlParams: URLSearchParams) {}
|
|
17
|
+
constructor(private readonly urlParams: URLSearchParams, private readonly funcName: string) {}
|
|
20
18
|
|
|
21
|
-
async init(df: DG.DataFrame
|
|
22
|
-
this._funcName = funcName;
|
|
19
|
+
async init(df: DG.DataFrame): Promise<void> {
|
|
23
20
|
this.df = df;
|
|
24
21
|
|
|
25
22
|
await this.buildView();
|
|
@@ -33,7 +30,7 @@ export class WebLogoApp {
|
|
|
33
30
|
.toArray().join('&');
|
|
34
31
|
|
|
35
32
|
this.view = grok.shell.addTableView(this.df);
|
|
36
|
-
this.view.path = this.view.basePath = `func/${_package.name}.${this.
|
|
33
|
+
this.view.path = this.view.basePath = `func/${_package.name}.${this.funcName}?${urlParamsTxt}`;
|
|
37
34
|
|
|
38
35
|
const options: { [p: string]: any } = {sequenceColumnName: 'sequence'};
|
|
39
36
|
for (const [optName, optValue] of this.urlParams.entries()) {
|
package/src/package-test.ts
CHANGED
|
@@ -24,6 +24,7 @@ import './tests/pepsea-tests';
|
|
|
24
24
|
import './tests/viewers';
|
|
25
25
|
import './tests/units-handler-tests';
|
|
26
26
|
import './tests/units-handler-splitted-tests';
|
|
27
|
+
import './tests/units-handler-get-region';
|
|
27
28
|
import './tests/to-atomic-level-tests';
|
|
28
29
|
import './tests/mm-distance-tests';
|
|
29
30
|
import './tests/activity-cliffs-tests';
|
package/src/package-types.ts
CHANGED
|
@@ -57,4 +57,17 @@ export class BioPackage extends DG.Package {
|
|
|
57
57
|
public get properties(): BioPackageProperties { return this._properties; };
|
|
58
58
|
|
|
59
59
|
public set properties(value: BioPackageProperties) { this._properties = value; }
|
|
60
|
+
|
|
61
|
+
private _initialized: boolean = false;
|
|
62
|
+
|
|
63
|
+
public get initialized(): boolean { return this._initialized;}
|
|
64
|
+
|
|
65
|
+
public completeInit(): void { this._initialized = true;}
|
|
66
|
+
|
|
67
|
+
handleErrorUI(err: any) {
|
|
68
|
+
const errMsg = err instanceof Error ? err.message : err.toString();
|
|
69
|
+
const errStack = err instanceof Error ? err.stack : undefined;
|
|
70
|
+
grok.shell.error(errMsg);
|
|
71
|
+
this.logger.error(errMsg, undefined, errStack);
|
|
72
|
+
}
|
|
60
73
|
}
|
package/src/package.ts
CHANGED
|
@@ -55,7 +55,6 @@ import {checkInputColumnUI} from './utils/check-input-column';
|
|
|
55
55
|
import {multipleSequenceAlignmentUI} from './utils/multiple-sequence-alignment-ui';
|
|
56
56
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
57
57
|
import {BitArrayMetrics, BitArrayMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
58
|
-
import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
|
|
59
58
|
import {WebLogoApp} from './apps/web-logo-app';
|
|
60
59
|
import {SplitToMonomersFunctionEditor} from './function-edtiors/split-to-monomers-editor';
|
|
61
60
|
import {splitToMonomersUI} from './utils/split-to-monomers';
|
|
@@ -66,10 +65,13 @@ import {PackageSettingsEditorWidget} from './widgets/package-settings-editor-wid
|
|
|
66
65
|
import {getCompositionAnalysisWidget} from './widgets/composition-analysis-widget';
|
|
67
66
|
import {MacromoleculeColumnWidget} from './utils/macromolecule-column-widget';
|
|
68
67
|
import {addCopyMenuUI} from './utils/context-menu';
|
|
68
|
+
import {getRegionDo} from './utils/get-region';
|
|
69
|
+
import {GetRegionApp} from './apps/get-region-app';
|
|
70
|
+
import {GetRegionFuncEditor} from './utils/get-region-func-editor';
|
|
69
71
|
|
|
70
72
|
export const _package = new BioPackage();
|
|
71
73
|
|
|
72
|
-
// /** Avoid
|
|
74
|
+
// /** Avoid reassigning {@link monomerLib} because consumers subscribe to {@link IMonomerLib.onChanged} event */
|
|
73
75
|
// let monomerLib: MonomerLib | null = null;
|
|
74
76
|
|
|
75
77
|
//name: getMonomerLibHelper
|
|
@@ -104,7 +106,9 @@ export async function initBio() {
|
|
|
104
106
|
const bioPkgProps = new BioPackageProperties(pkgProps);
|
|
105
107
|
_package.properties = bioPkgProps;
|
|
106
108
|
})(),
|
|
107
|
-
])
|
|
109
|
+
]).finally(() => {
|
|
110
|
+
_package.completeInit();
|
|
111
|
+
});
|
|
108
112
|
|
|
109
113
|
const monomerLib = MonomerLibHelper.instance.getBioLib();
|
|
110
114
|
const monomers: string[] = [];
|
|
@@ -150,14 +154,96 @@ export function getBioLib(): IMonomerLib {
|
|
|
150
154
|
return MonomerLibHelper.instance.getBioLib();
|
|
151
155
|
}
|
|
152
156
|
|
|
157
|
+
// -- Panels --
|
|
158
|
+
|
|
159
|
+
//name: Get Region
|
|
160
|
+
//description: Creates a new column with sequences of the region between start and end
|
|
161
|
+
//tags: panel
|
|
162
|
+
//input: column seqCol {semType: Macromolecule}
|
|
163
|
+
//output: widget result
|
|
164
|
+
export function getRegionPanel(seqCol: DG.Column<string>): DG.Widget {
|
|
165
|
+
// const host = ui.divV([
|
|
166
|
+
// ui.inputs([
|
|
167
|
+
// ui.stringInput('Region', ''),
|
|
168
|
+
// ]),
|
|
169
|
+
// ui.button('Ok', () => {})
|
|
170
|
+
// ]);
|
|
171
|
+
// return DG.Widget.fromRoot(host);
|
|
172
|
+
const funcName: string = 'getRegionTopMenu';
|
|
173
|
+
const funcList = DG.Func.find({package: _package.name, name: funcName});
|
|
174
|
+
if (funcList.length !== 1) throw new Error(`Package '${_package.name}' func '${funcName}' not found`);
|
|
175
|
+
const func = funcList[0];
|
|
176
|
+
const funcCall = func.prepare({table: seqCol.dataFrame, sequence: seqCol});
|
|
177
|
+
const funcEditor = new GetRegionFuncEditor(funcCall);
|
|
178
|
+
return funcEditor.widget();
|
|
179
|
+
}
|
|
180
|
+
|
|
153
181
|
//name: Manage Libraries
|
|
154
|
-
//
|
|
182
|
+
//description:
|
|
155
183
|
//tags: panel, exclude-actions-panel
|
|
184
|
+
//input: column seqColumn {semType: Macromolecule}
|
|
156
185
|
//output: widget result
|
|
157
186
|
export async function libraryPanel(_seqColumn: DG.Column): Promise<DG.Widget> {
|
|
158
187
|
return getLibraryPanelUI();
|
|
159
188
|
}
|
|
160
189
|
|
|
190
|
+
// -- Func Editors --
|
|
191
|
+
|
|
192
|
+
//name: GetRegionEditor
|
|
193
|
+
//tags: editor
|
|
194
|
+
//input: funccall call
|
|
195
|
+
export function GetRegionEditor(call: DG.FuncCall): void {
|
|
196
|
+
try {
|
|
197
|
+
const funcEditor = new GetRegionFuncEditor(call);
|
|
198
|
+
funcEditor.dialog();
|
|
199
|
+
} catch (err: any) {
|
|
200
|
+
const errMsg = err instanceof Error ? err.message : err.toString();
|
|
201
|
+
const errStack = err instanceof Error ? err.stack : undefined;
|
|
202
|
+
grok.shell.error(`Get region editor error: ${errMsg}`);
|
|
203
|
+
_package.logger.error(errMsg, undefined, errStack);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
//name: SplitToMonomersEditor
|
|
208
|
+
//tags: editor
|
|
209
|
+
//input: funccall call
|
|
210
|
+
export function SplitToMonomersEditor(call: DG.FuncCall): void {
|
|
211
|
+
const funcEditor = new SplitToMonomersFunctionEditor();
|
|
212
|
+
ui.dialog({title: 'Split to Monomers'})
|
|
213
|
+
.add(funcEditor.paramsUI)
|
|
214
|
+
.onOK(async () => {
|
|
215
|
+
return call.func.prepare(funcEditor.funcParams).call(true);
|
|
216
|
+
})
|
|
217
|
+
.show();
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
//name: SequenceSpaceEditor
|
|
221
|
+
//tags: editor
|
|
222
|
+
//input: funccall call
|
|
223
|
+
export function SequenceSpaceEditor(call: DG.FuncCall) {
|
|
224
|
+
const funcEditor = new SequenceSpaceFunctionEditor(DG.SEMTYPE.MACROMOLECULE);
|
|
225
|
+
ui.dialog({title: 'Sequence Space'})
|
|
226
|
+
.add(funcEditor.paramsUI)
|
|
227
|
+
.onOK(async () => {
|
|
228
|
+
return call.func.prepare(funcEditor.funcParams).call(true);
|
|
229
|
+
})
|
|
230
|
+
.show();
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
//name: SeqActivityCliffsEditor
|
|
234
|
+
//tags: editor
|
|
235
|
+
//input: funccall call
|
|
236
|
+
export function SeqActivityCliffsEditor(call: DG.FuncCall) {
|
|
237
|
+
const funcEditor = new ActivityCliffsFunctionEditor(DG.SEMTYPE.MACROMOLECULE);
|
|
238
|
+
ui.dialog({title: 'Activity Cliffs'})
|
|
239
|
+
.add(funcEditor.paramsUI)
|
|
240
|
+
.onOK(async () => {
|
|
241
|
+
return call.func.prepare(funcEditor.funcParams).call(true);
|
|
242
|
+
})
|
|
243
|
+
.show();
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
|
|
161
247
|
// -- Package settings editor --
|
|
162
248
|
|
|
163
249
|
//name: packageSettingsEditor
|
|
@@ -182,6 +268,8 @@ export function fastaSequenceCellRenderer(): MacromoleculeSequenceCellRenderer {
|
|
|
182
268
|
return new MacromoleculeSequenceCellRenderer();
|
|
183
269
|
}
|
|
184
270
|
|
|
271
|
+
// -- Property panels --
|
|
272
|
+
|
|
185
273
|
//name: Sequence Renderer
|
|
186
274
|
//input: column molColumn {semType: Macromolecule}
|
|
187
275
|
//tags: panel
|
|
@@ -251,17 +339,39 @@ export function vdRegionsViewer() {
|
|
|
251
339
|
return new VdRegionsViewer();
|
|
252
340
|
}
|
|
253
341
|
|
|
254
|
-
|
|
255
|
-
//
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
342
|
+
|
|
343
|
+
// -- Top menu --
|
|
344
|
+
|
|
345
|
+
//name: getRegion
|
|
346
|
+
//description: Gets a new column with sequences of the region between start and end
|
|
347
|
+
//input: column sequence
|
|
348
|
+
//input: string start {optional: true}
|
|
349
|
+
//input: string end {optional: true}
|
|
350
|
+
//input: string name {optional: true} [Name of the column to be created]
|
|
351
|
+
//output: column result
|
|
352
|
+
export function getRegion(
|
|
353
|
+
sequence: DG.Column<string>, start: string | undefined, end: string | undefined, name: string | undefined
|
|
354
|
+
): DG.Column<string> {
|
|
355
|
+
return getRegionDo(sequence,
|
|
356
|
+
start ?? null, end ?? null, name ?? null);
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
//top-menu: Bio | Convert | Get Region...
|
|
360
|
+
//name: Get Region
|
|
361
|
+
//description: Get sequences for a region specified from a Macromolecule
|
|
362
|
+
//input: dataframe table [Input data table]
|
|
363
|
+
//input: column sequence {semType: Macromolecule} [Sequence column]
|
|
364
|
+
//input: string start {optional: true} [Region start position name]
|
|
365
|
+
//input: string end {optional: true} [Region end position name]
|
|
366
|
+
//input: string name {optional: true} [Region column name]
|
|
367
|
+
//editor: Bio:GetRegionEditor
|
|
368
|
+
export async function getRegionTopMenu(
|
|
369
|
+
table: DG.DataFrame, sequence: DG.Column,
|
|
370
|
+
start: string | undefined, end: string | undefined, name: string | undefined
|
|
371
|
+
): Promise<void> {
|
|
372
|
+
const regCol = getRegionDo(sequence, start ?? null, end ?? null, name ?? null);
|
|
373
|
+
sequence.dataFrame.columns.add(regCol);
|
|
374
|
+
await grok.data.detectSemanticTypes(sequence.dataFrame); // to set renderer
|
|
265
375
|
}
|
|
266
376
|
|
|
267
377
|
//top-menu: Bio | Analyze | Activity Cliffs...
|
|
@@ -287,14 +397,14 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
287
397
|
'separator': macroMolecule.getTag(bioTAGS.separator),
|
|
288
398
|
'alphabet': macroMolecule.getTag(bioTAGS.alphabet),
|
|
289
399
|
};
|
|
290
|
-
const
|
|
400
|
+
const ncUH = UnitsHandler.getOrCreate(macroMolecule);
|
|
291
401
|
let columnDistanceMetric: BitArrayMetrics | MmDistanceFunctionsNames = BitArrayMetricsNames.Tanimoto;
|
|
292
402
|
let seqCol = macroMolecule;
|
|
293
|
-
if (
|
|
294
|
-
if (
|
|
295
|
-
columnDistanceMetric =
|
|
403
|
+
if (ncUH.isFasta() || (ncUH.isSeparator() && ncUH.alphabet && ncUH.alphabet !== ALPHABET.UN)) {
|
|
404
|
+
if (ncUH.isFasta()) {
|
|
405
|
+
columnDistanceMetric = ncUH.getDistanceFunctionName();
|
|
296
406
|
} else {
|
|
297
|
-
seqCol =
|
|
407
|
+
seqCol = ncUH.convert(NOTATION.FASTA);
|
|
298
408
|
const uh = UnitsHandler.getOrCreate(seqCol);
|
|
299
409
|
columnDistanceMetric = uh.getDistanceFunctionName();
|
|
300
410
|
tags.units = NOTATION.FASTA;
|
|
@@ -345,19 +455,6 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
345
455
|
}
|
|
346
456
|
}
|
|
347
457
|
|
|
348
|
-
//name: SequenceSpaceEditor
|
|
349
|
-
//tags: editor
|
|
350
|
-
//input: funccall call
|
|
351
|
-
export function SequenceSpaceEditor(call: DG.FuncCall) {
|
|
352
|
-
const funcEditor = new SequenceSpaceFunctionEditor(DG.SEMTYPE.MACROMOLECULE);
|
|
353
|
-
ui.dialog({title: 'Sequence Space'})
|
|
354
|
-
.add(funcEditor.paramsUI)
|
|
355
|
-
.onOK(async () => {
|
|
356
|
-
return call.func.prepare(funcEditor.funcParams).call(true);
|
|
357
|
-
})
|
|
358
|
-
.show();
|
|
359
|
-
}
|
|
360
|
-
|
|
361
458
|
//top-menu: Bio | Analyze | Sequence Space...
|
|
362
459
|
//name: Sequence Space
|
|
363
460
|
//description: Creates 2D sequence space with projected sequences by pairwise distance
|
|
@@ -559,13 +656,6 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
559
656
|
await handler(col);
|
|
560
657
|
}
|
|
561
658
|
|
|
562
|
-
//top-menu: Bio | Convert | SDF to JSON Library...
|
|
563
|
-
//name: SDF to JSON Library
|
|
564
|
-
//input: dataframe table
|
|
565
|
-
export async function sdfToJsonLib(table: DG.DataFrame) {
|
|
566
|
-
const _jsonMonomerLibrary = createJsonMonomerLibFromSdf(table);
|
|
567
|
-
}
|
|
568
|
-
|
|
569
659
|
// 2023-05-17 Representations does not work at BioIT
|
|
570
660
|
// //name: Representations
|
|
571
661
|
// //tags: panel, widgets
|
|
@@ -667,19 +757,6 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
|
|
|
667
757
|
return resDf;
|
|
668
758
|
}
|
|
669
759
|
|
|
670
|
-
//name: SplitToMonomersEditor
|
|
671
|
-
//tags: editor
|
|
672
|
-
//input: funccall call
|
|
673
|
-
export function SplitToMonomersEditor(call: DG.FuncCall): void {
|
|
674
|
-
const funcEditor = new SplitToMonomersFunctionEditor();
|
|
675
|
-
ui.dialog({title: 'Split to Monomers'})
|
|
676
|
-
.add(funcEditor.paramsUI)
|
|
677
|
-
.onOK(async () => {
|
|
678
|
-
return call.func.prepare(funcEditor.funcParams).call(true);
|
|
679
|
-
})
|
|
680
|
-
.show();
|
|
681
|
-
}
|
|
682
|
-
|
|
683
760
|
//top-menu: Bio | Convert | Split to Monomers...
|
|
684
761
|
//name: Split to Monomers
|
|
685
762
|
//input: dataframe table
|
|
@@ -759,26 +836,30 @@ export function SubsequenceSearchTopMenu(macromolecules: DG.Column): void {
|
|
|
759
836
|
grok.shell.tv.grid.scrollToCell(macromolecules, 0);
|
|
760
837
|
}
|
|
761
838
|
|
|
762
|
-
//top-menu: Bio |
|
|
839
|
+
//top-menu: Bio | Calculate | Identity...
|
|
763
840
|
//name: Identity Scoring
|
|
764
841
|
//description: Adds a column with fraction of matching monomers
|
|
765
842
|
//input: dataframe table [Table containing Macromolecule column]
|
|
766
843
|
//input: column macromolecules {semType: Macromolecule} [Sequences to score]
|
|
767
844
|
//input: string reference [Sequence, matching column format]
|
|
768
845
|
//output: column scores
|
|
769
|
-
export async function sequenceIdentityScoring(
|
|
846
|
+
export async function sequenceIdentityScoring(
|
|
847
|
+
table: DG.DataFrame, macromolecule: DG.Column, reference: string
|
|
848
|
+
): Promise<DG.Column<number>> {
|
|
770
849
|
const scores = calculateScores(table, macromolecule, reference, SCORE.IDENTITY);
|
|
771
850
|
return scores;
|
|
772
851
|
}
|
|
773
852
|
|
|
774
|
-
//top-menu: Bio |
|
|
853
|
+
//top-menu: Bio | Calculate | Similarity...
|
|
775
854
|
//name: Similarity Scoring
|
|
776
855
|
//description: Adds a column with similarity scores, calculated as sum of monomer fingerprint similarities
|
|
777
856
|
//input: dataframe table [Table containing Macromolecule column]
|
|
778
857
|
//input: column macromolecules {semType: Macromolecule} [Sequences to score]
|
|
779
858
|
//input: string reference [Sequence, matching column format]
|
|
780
859
|
//output: column scores
|
|
781
|
-
export async function sequenceSimilarityScoring(
|
|
860
|
+
export async function sequenceSimilarityScoring(
|
|
861
|
+
table: DG.DataFrame, macromolecule: DG.Column, reference: string
|
|
862
|
+
): Promise<DG.Column<number>> {
|
|
782
863
|
const scores = calculateScores(table, macromolecule, reference, SCORE.SIMILARITY);
|
|
783
864
|
return scores;
|
|
784
865
|
}
|
|
@@ -806,10 +887,35 @@ export async function webLogoLargeApp(): Promise<void> {
|
|
|
806
887
|
const pi = DG.TaskBarProgressIndicator.create('WebLogo');
|
|
807
888
|
try {
|
|
808
889
|
const urlParams = new URLSearchParams(window.location.search);
|
|
809
|
-
const app = new WebLogoApp(urlParams);
|
|
890
|
+
const app = new WebLogoApp(urlParams, 'webLogoLargeApp');
|
|
810
891
|
const df: DG.DataFrame = await _package.files.readCsv('data/sample_PT_100000x5.csv');
|
|
811
892
|
await grok.data.detectSemanticTypes(df);
|
|
812
|
-
await app.init(df
|
|
893
|
+
await app.init(df);
|
|
894
|
+
} finally {
|
|
895
|
+
pi.close();
|
|
896
|
+
}
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
//name: getRegionApp
|
|
900
|
+
export async function getRegionApp(): Promise<void> {
|
|
901
|
+
const pi = DG.TaskBarProgressIndicator.create('getRegion ...');
|
|
902
|
+
try {
|
|
903
|
+
const urlParams = new URLSearchParams(window.location.search);
|
|
904
|
+
const app = new GetRegionApp(urlParams, 'getRegionApp');
|
|
905
|
+
await app.init();
|
|
906
|
+
} finally {
|
|
907
|
+
pi.close();
|
|
908
|
+
}
|
|
909
|
+
}
|
|
910
|
+
|
|
911
|
+
//name: getRegionHelmApp
|
|
912
|
+
export async function getRegionHelmApp(): Promise<void> {
|
|
913
|
+
const pi = DG.TaskBarProgressIndicator.create('getRegion ...');
|
|
914
|
+
try {
|
|
915
|
+
const urlParams = new URLSearchParams(window.location.search);
|
|
916
|
+
const df = await _package.files.readCsv('data/sample_HELM_empty_vals.csv');
|
|
917
|
+
const app = new GetRegionApp(urlParams, 'getRegionHelmApp');
|
|
918
|
+
await app.init({df: df, colName: 'HELM'});
|
|
813
919
|
} finally {
|
|
814
920
|
pi.close();
|
|
815
921
|
}
|
|
@@ -5,7 +5,6 @@ import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src
|
|
|
5
5
|
|
|
6
6
|
import {ConverterFunc} from './types';
|
|
7
7
|
import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
|
-
import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
|
|
9
8
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
10
9
|
|
|
11
10
|
// import {mmSemType} from '../const';
|
|
@@ -134,8 +133,8 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$$`,
|
|
|
134
133
|
throw new Error(`Argument 'separator' is mandatory for target notation '${tgtNotation.toString()}'.`);
|
|
135
134
|
|
|
136
135
|
return function(srcCol: DG.Column): DG.Column {
|
|
137
|
-
const
|
|
138
|
-
const resCol =
|
|
136
|
+
const converterUH = UnitsHandler.getOrCreate(srcCol);
|
|
137
|
+
const resCol = converterUH.convert(tgtNotation, tgtSeparator);
|
|
139
138
|
expect(resCol.getTag(DG.TAGS.UNITS), tgtNotation);
|
|
140
139
|
return resCol;
|
|
141
140
|
};
|
|
@@ -38,6 +38,7 @@ category('detectors', () => {
|
|
|
38
38
|
sepUn1 = 'sepUn1',
|
|
39
39
|
sepUn2 = 'sepUn2',
|
|
40
40
|
sepMsaDna1 = 'sepMsaDna1',
|
|
41
|
+
sepMsaUnWEmpty = 'sepMsaUnWEmpty',
|
|
41
42
|
fastaMsaDna1 = 'fastaMsaDna1',
|
|
42
43
|
fastaMsaPt1 = 'fastaMsaPt1',
|
|
43
44
|
}
|
|
@@ -109,6 +110,11 @@ rut12/rty/her2/abc/cfr3//wert/rut12`;
|
|
|
109
110
|
A-C--G-T--C-T
|
|
110
111
|
C-A-C--T--G-T
|
|
111
112
|
A-C-C-G-T-A-C-T`;
|
|
113
|
+
[csvTests.sepMsaUnWEmpty]: string = `seq
|
|
114
|
+
m1-M-m3-mon4-mon5-N-T-MON8-N9
|
|
115
|
+
m1-mon2-m3-mon4-mon5-Num--MON8-N9
|
|
116
|
+
|
|
117
|
+
mon1-M-mon3-mon4-mon5---MON8-N9`;
|
|
112
118
|
[csvTests.fastaMsaDna1]: string = `seq
|
|
113
119
|
AC-GT-CT
|
|
114
120
|
CAC-T-GT
|
|
@@ -234,6 +240,11 @@ MWRSWY-CKHP`;
|
|
|
234
240
|
await _testPos(readCsv(csvTests.fastaUn), 'seq',
|
|
235
241
|
NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 12, true);
|
|
236
242
|
});
|
|
243
|
+
|
|
244
|
+
test('SepMsaUnWEmpty', async () => {
|
|
245
|
+
await _testPos(readCsv(csvTests.sepMsaUnWEmpty), 'seq',
|
|
246
|
+
NOTATION.SEPARATOR, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 14, true);
|
|
247
|
+
});
|
|
237
248
|
test('FastaMsaDna1', async () => {
|
|
238
249
|
await _testPos(readCsv(csvTests.fastaMsaDna1), 'seq',
|
|
239
250
|
NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.DNA, 4, false);
|
|
@@ -274,13 +285,13 @@ MWRSWY-CKHP`;
|
|
|
274
285
|
await _testDf(readSamples(Samples.fastaCsv), {
|
|
275
286
|
'Sequence': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false),
|
|
276
287
|
});
|
|
277
|
-
}
|
|
288
|
+
});
|
|
278
289
|
|
|
279
290
|
test('samplesFastaFasta', async () => {
|
|
280
291
|
await _testDf(readSamples(Samples.fastaFasta), {
|
|
281
292
|
'sequence': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false),
|
|
282
293
|
});
|
|
283
|
-
}
|
|
294
|
+
});
|
|
284
295
|
|
|
285
296
|
// peptidesComplex contains monomers with spaces in AlignedSequence columns, which are forbidden
|
|
286
297
|
// test('samplesPeptidesComplexPositiveAlignedSequence', async () => {
|