@datagrok/bio 2.13.2 → 2.13.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +1 -1
- package/CHANGELOG.md +26 -0
- package/detectors.js +52 -38
- package/dist/111.js +2 -0
- package/dist/111.js.map +1 -0
- package/dist/234.js +2 -0
- package/dist/234.js.map +1 -0
- package/dist/242.js +2 -0
- package/dist/242.js.map +1 -0
- package/dist/{286.js → 248.js} +1 -1
- package/dist/248.js.map +1 -0
- package/dist/284.js +3 -0
- package/dist/284.js.map +1 -0
- package/dist/317.js +2 -0
- package/dist/317.js.map +1 -0
- package/dist/589.js +2 -0
- package/dist/589.js.map +1 -0
- package/dist/603.js +2 -0
- package/dist/603.js.map +1 -0
- package/dist/682.js +2 -0
- package/dist/682.js.map +1 -0
- package/dist/705.js +2 -0
- package/dist/705.js.map +1 -0
- package/dist/{590.js → 731.js} +2 -2
- package/dist/731.js.map +1 -0
- package/dist/778.js +2 -0
- package/dist/778.js.map +1 -0
- package/dist/793.js +2 -0
- package/dist/793.js.map +1 -0
- package/dist/950.js +2 -0
- package/dist/950.js.map +1 -0
- package/dist/package-test.js +6 -7
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +6 -7
- package/dist/package.js.map +1 -1
- package/files/cache_config.json +7 -0
- package/package.json +17 -23
- package/src/analysis/sequence-activity-cliffs.ts +1 -1
- package/src/function-edtiors/split-to-monomers-editor.ts +6 -7
- package/src/package-types.ts +19 -19
- package/src/package.ts +23 -16
- package/src/substructure-search/substructure-search.ts +9 -10
- package/src/tests/WebLogo-positions-test.ts +6 -6
- package/src/tests/activity-cliffs-tests.ts +5 -2
- package/src/tests/bio-tests.ts +6 -6
- package/src/tests/checkInputColumn-tests.ts +3 -3
- package/src/tests/converters-test.ts +1 -1
- package/src/tests/detectors-tests.ts +25 -13
- package/src/tests/fasta-export-tests.ts +2 -2
- package/src/tests/mm-distance-tests.ts +1 -1
- package/src/tests/msa-tests.ts +2 -2
- package/src/tests/renderers-test.ts +5 -5
- package/src/tests/scoring.ts +26 -5
- package/src/tests/seq-handler-get-region.ts +4 -4
- package/src/tests/sequence-space-test.ts +1 -1
- package/src/tests/substructure-filters-tests.ts +4 -1
- package/src/tests/to-atomic-level-tests.ts +1 -1
- package/src/utils/cell-renderer-consts.ts +3 -11
- package/src/utils/cell-renderer.ts +15 -17
- package/src/utils/context-menu.ts +1 -1
- package/src/utils/convert.ts +7 -4
- package/src/utils/get-region-func-editor.ts +11 -16
- package/src/utils/get-region.ts +5 -5
- package/src/utils/macromolecule-column-widget.ts +1 -1
- package/src/utils/monomer-lib/lib-manager.ts +20 -8
- package/src/utils/monomer-lib/library-file-manager/file-manager.ts +28 -24
- package/src/utils/monomer-lib/library-file-manager/file-validator.ts +2 -1
- package/src/utils/monomer-lib/library-file-manager/ui.ts +3 -6
- package/src/utils/multiple-sequence-alignment-ui.ts +10 -11
- package/src/utils/multiple-sequence-alignment.ts +2 -2
- package/src/utils/pepsea.ts +1 -1
- package/src/utils/save-as-fasta.ts +5 -5
- package/src/viewers/vd-regions-viewer.ts +2 -2
- package/src/widgets/bio-substructure-filter.ts +7 -7
- package/src/widgets/package-settings-editor-widget.ts +27 -27
- package/src/widgets/representations.ts +57 -61
- package/tsconfig.json +4 -4
- package/webpack.config.js +1 -1
- package/dist/23.js +0 -2
- package/dist/23.js.map +0 -1
- package/dist/231.js +0 -2
- package/dist/231.js.map +0 -1
- package/dist/282.js +0 -2
- package/dist/282.js.map +0 -1
- package/dist/286.js.map +0 -1
- package/dist/356.js +0 -2
- package/dist/356.js.map +0 -1
- package/dist/36.js +0 -2
- package/dist/36.js.map +0 -1
- package/dist/40.js +0 -2
- package/dist/40.js.map +0 -1
- package/dist/413.js +0 -2
- package/dist/413.js.map +0 -1
- package/dist/42.js +0 -2
- package/dist/42.js.map +0 -1
- package/dist/427.js +0 -2
- package/dist/427.js.map +0 -1
- package/dist/545.js +0 -3
- package/dist/545.js.map +0 -1
- package/dist/590.js.map +0 -1
- package/dist/65.js +0 -2
- package/dist/65.js.map +0 -1
- package/dist/796.js +0 -2
- package/dist/796.js.map +0 -1
- package/dist/package-test.js.LICENSE.txt +0 -1
- package/dist/package.js.LICENSE.txt +0 -1
- /package/dist/{545.js.LICENSE.txt → 284.js.LICENSE.txt} +0 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Aleksandr Tanas",
|
|
6
6
|
"email": "atanas@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.13.
|
|
8
|
+
"version": "2.13.5",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -13,30 +13,23 @@
|
|
|
13
13
|
"directory": "packages/Bio"
|
|
14
14
|
},
|
|
15
15
|
"properties": [
|
|
16
|
-
{
|
|
17
|
-
"name": "MonomerWidthMode",
|
|
18
|
-
"propertyType": "string",
|
|
19
|
-
"choices": [
|
|
20
|
-
"short",
|
|
21
|
-
"long"
|
|
22
|
-
],
|
|
23
|
-
"defaultValue": "short",
|
|
24
|
-
"nullable": false
|
|
25
|
-
},
|
|
26
16
|
{
|
|
27
17
|
"name": "MaxMonomerLength",
|
|
28
|
-
"
|
|
29
|
-
"
|
|
18
|
+
"description": "The max length of monomer symbol displayed without shortening, 'long' to no limit",
|
|
19
|
+
"propertyType": "string",
|
|
20
|
+
"defaultValue": "4",
|
|
30
21
|
"nullable": false
|
|
31
22
|
},
|
|
32
23
|
{
|
|
33
24
|
"name": "TooltipWebLogo",
|
|
25
|
+
"description": "Display WebLogo in a Macromolecule column header tooltip",
|
|
34
26
|
"propertyType": "bool",
|
|
35
27
|
"defaultValue": "true",
|
|
36
28
|
"nullable": false
|
|
37
29
|
},
|
|
38
30
|
{
|
|
39
31
|
"name": "DefaultSeparator",
|
|
32
|
+
"description": "Default separator using to convert sequences into separator notation",
|
|
40
33
|
"propertyType": "string",
|
|
41
34
|
"defaultValue": ".",
|
|
42
35
|
"nullable": false
|
|
@@ -44,18 +37,18 @@
|
|
|
44
37
|
],
|
|
45
38
|
"dependencies": {
|
|
46
39
|
"@biowasm/aioli": "^3.1.0",
|
|
47
|
-
"@datagrok-libraries/bio": "^5.42.
|
|
40
|
+
"@datagrok-libraries/bio": "^5.42.5",
|
|
48
41
|
"@datagrok-libraries/chem-meta": "^1.2.5",
|
|
49
42
|
"@datagrok-libraries/math": "^1.1.5",
|
|
50
43
|
"@datagrok-libraries/ml": "^6.6.12",
|
|
51
44
|
"@datagrok-libraries/tutorials": "^1.3.12",
|
|
52
|
-
"@datagrok-libraries/utils": "^4.2.
|
|
45
|
+
"@datagrok-libraries/utils": "^4.2.12",
|
|
53
46
|
"@webgpu/types": "^0.1.40",
|
|
54
47
|
"ajv": "^8.12.0",
|
|
55
48
|
"ajv-errors": "^3.0.0",
|
|
56
49
|
"cash-dom": "^8.0.0",
|
|
57
50
|
"css-loader": "^6.7.3",
|
|
58
|
-
"datagrok-api": "^1.
|
|
51
|
+
"datagrok-api": "^1.20.0",
|
|
59
52
|
"dayjs": "^1.11.4",
|
|
60
53
|
"fastest-levenshtein": "^1.0.16",
|
|
61
54
|
"openchemlib": "^7.2.3",
|
|
@@ -65,8 +58,8 @@
|
|
|
65
58
|
"wu": "latest"
|
|
66
59
|
},
|
|
67
60
|
"devDependencies": {
|
|
68
|
-
"@datagrok-libraries/helm-web-editor": "^1.1.
|
|
69
|
-
"@datagrok-libraries/js-draw-lite": "^0.0.
|
|
61
|
+
"@datagrok-libraries/helm-web-editor": "^1.1.6",
|
|
62
|
+
"@datagrok-libraries/js-draw-lite": "^0.0.4",
|
|
70
63
|
"@datagrok/chem": "^1.9.2",
|
|
71
64
|
"@datagrok/dendrogram": "^1.2.29",
|
|
72
65
|
"@datagrok/helm": "^2.2.1",
|
|
@@ -77,12 +70,13 @@
|
|
|
77
70
|
"datagrok-tools": "latest",
|
|
78
71
|
"eslint": "latest",
|
|
79
72
|
"eslint-config-google": "latest",
|
|
80
|
-
"
|
|
81
|
-
"
|
|
82
|
-
"
|
|
83
|
-
"
|
|
73
|
+
"eslint-plugin-rxjs": "latest",
|
|
74
|
+
"source-map-loader": "latest",
|
|
75
|
+
"ts-loader": "^9.5.1",
|
|
76
|
+
"typescript": "^5.5.3",
|
|
77
|
+
"webpack": "^5.92.1",
|
|
84
78
|
"webpack-bundle-analyzer": "latest",
|
|
85
|
-
"webpack-cli": "^
|
|
79
|
+
"webpack-cli": "^5.1.4"
|
|
86
80
|
},
|
|
87
81
|
"scripts": {
|
|
88
82
|
"link-api": "npm link datagrok-api",
|
|
@@ -164,7 +164,7 @@ export function createLinesGrid(df: DG.DataFrame, colNames: string[]): DG.Grid {
|
|
|
164
164
|
const seqDiffCol = DG.Column.string('seq_diff', df.rowCount)
|
|
165
165
|
.init((i) => `${df.get(colNames[0], i)}#${df.get(colNames[1], i)}`);
|
|
166
166
|
seqDiffCol.semType = 'MacromoleculeDifference';
|
|
167
|
-
seqDiffCol.
|
|
167
|
+
seqDiffCol.meta.units = df.col(colNames[0])!.meta.units;
|
|
168
168
|
seqDiffCol.setTag(bioTAGS.separator, df.col(colNames[0])!.getTag(bioTAGS.separator));
|
|
169
169
|
df.columns.add(seqDiffCol);
|
|
170
170
|
const grid = df.plot.grid();
|
|
@@ -20,14 +20,13 @@ export class SplitToMonomersFunctionEditor {
|
|
|
20
20
|
}
|
|
21
21
|
|
|
22
22
|
constructor() {
|
|
23
|
-
this.tableInput = ui.
|
|
23
|
+
this.tableInput = ui.input.table('Table', {value: grok.shell.tv.dataFrame, onValueChanged: () => {
|
|
24
24
|
this.onTableInputChanged();
|
|
25
|
-
});
|
|
25
|
+
}});
|
|
26
26
|
//TODO: remove when the new version of datagrok-api is available
|
|
27
27
|
const seqColValue = this.tableInput.value!.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
this.seqColInput = ui.columnInput('Sequence', this.tableInput.value!, seqColValue, null, seqColOptions);
|
|
28
|
+
this.seqColInput = ui.input.column('Sequence', {table: this.tableInput.value!, value: seqColValue,
|
|
29
|
+
filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE});
|
|
31
30
|
|
|
32
31
|
this.funcParamsDiv = ui.inputs([
|
|
33
32
|
this.tableInput,
|
|
@@ -36,7 +35,7 @@ export class SplitToMonomersFunctionEditor {
|
|
|
36
35
|
}
|
|
37
36
|
|
|
38
37
|
onTableInputChanged(): void {
|
|
39
|
-
this.seqColInput = ui.
|
|
40
|
-
this.tableInput.value!.columns.bySemType(DG.SEMTYPE.MACROMOLECULE));
|
|
38
|
+
this.seqColInput = ui.input.column('Sequence', {table: this.tableInput.value!,
|
|
39
|
+
value: this.tableInput.value!.columns.bySemType(DG.SEMTYPE.MACROMOLECULE)});
|
|
41
40
|
}
|
|
42
41
|
}
|
package/src/package-types.ts
CHANGED
|
@@ -3,8 +3,9 @@ import * as DG from 'datagrok-api/dg';
|
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
|
|
5
5
|
import {Observable, Subject} from 'rxjs';
|
|
6
|
+
|
|
6
7
|
import {errInfo} from '@datagrok-libraries/bio/src/utils/err-info';
|
|
7
|
-
import {
|
|
8
|
+
import {LoggerWrapper} from '@datagrok-libraries/bio/src/utils/logger';
|
|
8
9
|
|
|
9
10
|
/** Names of package properties/settings declared in properties section of {@link './package.json'} */
|
|
10
11
|
export const enum BioPackagePropertiesNames {
|
|
@@ -19,39 +20,32 @@ export class BioPackageProperties extends Map<string, any> {
|
|
|
19
20
|
private _onPropertyChanged: Subject<string> = new Subject<string>();
|
|
20
21
|
public get onPropertyChanged(): Observable<string> { return this._onPropertyChanged; }
|
|
21
22
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
public set MonomerWidthMode(value: MonomerWidthMode) {
|
|
27
|
-
super.set(BioPackagePropertiesNames.MonomerWidthMode, value);
|
|
28
|
-
this._onPropertyChanged.next(BioPackagePropertiesNames.MonomerWidthMode);
|
|
23
|
+
/** Monomer symbol maximum length displayed, null for unlimited. */
|
|
24
|
+
public get maxMonomerLength(): number | null {
|
|
25
|
+
const vs = super.get(BioPackagePropertiesNames.MaxMonomerLength);
|
|
26
|
+
return vs === 'long' ? null : parseInt(vs);
|
|
29
27
|
}
|
|
30
28
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
public set MaxMonomerLength(value: number) {
|
|
37
|
-
super.set(BioPackagePropertiesNames.MaxMonomerLength, value);
|
|
29
|
+
public set maxMonomerLength(value: number | null) {
|
|
30
|
+
const vs = value === null ? 'long' : value.toString();
|
|
31
|
+
super.set(BioPackagePropertiesNames.MaxMonomerLength, vs);
|
|
38
32
|
this._onPropertyChanged.next(BioPackagePropertiesNames.MaxMonomerLength);
|
|
39
33
|
}
|
|
40
34
|
|
|
41
|
-
public get
|
|
35
|
+
public get tooltipWebLogo(): boolean {
|
|
42
36
|
return super.get(BioPackagePropertiesNames.TooltipWebLogo) as boolean;
|
|
43
37
|
}
|
|
44
38
|
|
|
45
|
-
public set
|
|
39
|
+
public set tooltipWebLogo(value: boolean) {
|
|
46
40
|
super.set(BioPackagePropertiesNames.TooltipWebLogo, value);
|
|
47
41
|
this._onPropertyChanged.next(BioPackagePropertiesNames.TooltipWebLogo);
|
|
48
42
|
}
|
|
49
43
|
|
|
50
|
-
public get
|
|
44
|
+
public get defaultSeparator(): string {
|
|
51
45
|
return super.get(BioPackagePropertiesNames.DefaultSeparator) as string;
|
|
52
46
|
}
|
|
53
47
|
|
|
54
|
-
public set
|
|
48
|
+
public set defaultSeparator(value: string) {
|
|
55
49
|
if (value.length !== 1) throw new Error('The separator must be of length one.');
|
|
56
50
|
super.set(BioPackagePropertiesNames.DefaultSeparator, value);
|
|
57
51
|
this._onPropertyChanged.next(BioPackagePropertiesNames.DefaultSeparator);
|
|
@@ -73,6 +67,12 @@ export class BioPackage extends DG.Package {
|
|
|
73
67
|
|
|
74
68
|
public get initialized(): boolean { return this._initialized; }
|
|
75
69
|
|
|
70
|
+
constructor(opts: { debug: boolean } = {debug: false}) {
|
|
71
|
+
super();
|
|
72
|
+
// @ts-ignore
|
|
73
|
+
super._logger = new LoggerWrapper(super.logger, opts.debug);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
76
|
public completeInit(): void { this._initialized = true; }
|
|
77
77
|
|
|
78
78
|
handleErrorUI(err: any) {
|
package/src/package.ts
CHANGED
|
@@ -77,6 +77,7 @@ import {generateLongSequence, generateLongSequence2} from '@datagrok-libraries/b
|
|
|
77
77
|
import {CyclizedNotationProvider} from './utils/cyclized';
|
|
78
78
|
import {getMolColumnFromHelm} from './utils/helm-to-molfile/utils';
|
|
79
79
|
import {PackageSettingsEditorWidget} from './widgets/package-settings-editor-widget';
|
|
80
|
+
import {getUserLibSettings, setUserLibSettings} from '@datagrok-libraries/bio/src/monomer-works/lib-settings';
|
|
80
81
|
|
|
81
82
|
export const _package = new BioPackage();
|
|
82
83
|
|
|
@@ -115,6 +116,12 @@ export async function initBio() {
|
|
|
115
116
|
await Promise.all([
|
|
116
117
|
(async () => {
|
|
117
118
|
const monomerLibManager = await MonomerLibManager.getInstance();
|
|
119
|
+
// Fix user lib settings for explicit stuck from a terminated test
|
|
120
|
+
const libSettings = await getUserLibSettings();
|
|
121
|
+
if (libSettings.explicit) {
|
|
122
|
+
libSettings.explicit = [];
|
|
123
|
+
await setUserLibSettings(libSettings);
|
|
124
|
+
}
|
|
118
125
|
await monomerLibManager.loadLibraries();
|
|
119
126
|
monomerLib = monomerLibManager.getBioLib();
|
|
120
127
|
})(),
|
|
@@ -291,16 +298,16 @@ export function SeqActivityCliffsEditor(call: DG.FuncCall) {
|
|
|
291
298
|
|
|
292
299
|
// -- Package settings editor --
|
|
293
300
|
|
|
294
|
-
//name: packageSettingsEditor
|
|
295
|
-
//description: The database connection
|
|
296
|
-
//tags: packageSettingsEditor
|
|
297
|
-
//input: object propList
|
|
298
|
-
//output: widget result
|
|
299
|
-
export function packageSettingsEditor(propList: DG.Property[]): DG.Widget {
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
}
|
|
301
|
+
// //name: packageSettingsEditor
|
|
302
|
+
// //description: The database connection
|
|
303
|
+
// //tags: packageSettingsEditor
|
|
304
|
+
// //input: object propList
|
|
305
|
+
// //output: widget result
|
|
306
|
+
// export function packageSettingsEditor(propList: DG.Property[]): DG.Widget {
|
|
307
|
+
// const widget = new PackageSettingsEditorWidget(propList);
|
|
308
|
+
// widget.init().then(); // Ignore promise returned
|
|
309
|
+
// return widget as DG.Widget;
|
|
310
|
+
// }
|
|
304
311
|
|
|
305
312
|
// -- Cell renderers --
|
|
306
313
|
|
|
@@ -440,7 +447,7 @@ export async function activityCliffs(table: DG.DataFrame, molecules: DG.Column<s
|
|
|
440
447
|
return;
|
|
441
448
|
const axesNames = getEmbeddingColsNames(table);
|
|
442
449
|
const tags = {
|
|
443
|
-
'units': molecules.
|
|
450
|
+
'units': molecules.meta.units!,
|
|
444
451
|
'aligned': molecules.getTag(bioTAGS.aligned),
|
|
445
452
|
'separator': molecules.getTag(bioTAGS.separator),
|
|
446
453
|
'alphabet': molecules.getTag(bioTAGS.alphabet),
|
|
@@ -658,8 +665,8 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
658
665
|
} else if (colList.length > 1) {
|
|
659
666
|
const colListNames: string [] = colList.map((col) => col.name);
|
|
660
667
|
const selectedCol = colList.find((c) => { return SeqHandler.forColumn(c).isMsa(); });
|
|
661
|
-
const colInput: DG.InputBase = ui.
|
|
662
|
-
'Column', selectedCol ? selectedCol.name : colListNames[0], colListNames);
|
|
668
|
+
const colInput: DG.InputBase = ui.input.choice(
|
|
669
|
+
'Column', {value: selectedCol ? selectedCol.name : colListNames[0], items: colListNames});
|
|
663
670
|
ui.dialog({
|
|
664
671
|
title: 'Composition Analysis',
|
|
665
672
|
helpUrl: 'https://datagrok.ai/help/datagrok/solutions/domains/bio/#sequence-composition',
|
|
@@ -743,13 +750,13 @@ export async function testDetectMacromolecule(path: string): Promise<DG.DataFram
|
|
|
743
750
|
const semType = await grok.functions.call('Bio:detectMacromolecule', {col: col});
|
|
744
751
|
if (semType === DG.SEMTYPE.MACROMOLECULE) {
|
|
745
752
|
//console.warn(`file: ${fileInfo.path}, column: ${col.name}, ` +
|
|
746
|
-
// `semType: ${semType}, units: ${col.
|
|
753
|
+
// `semType: ${semType}, units: ${col.meta.units}`);
|
|
747
754
|
// console.warn('file: "' + fileInfo.path + '", semType: "' + semType + '", ' +
|
|
748
|
-
// 'units: "' + col.
|
|
755
|
+
// 'units: "' + col.meta.units + '"');
|
|
749
756
|
|
|
750
757
|
res.push({
|
|
751
758
|
file: fileInfo.path, result: 'detected', column: col.name,
|
|
752
|
-
message: `units: ${col.
|
|
759
|
+
message: `units: ${col.meta.units}`,
|
|
753
760
|
});
|
|
754
761
|
}
|
|
755
762
|
}
|
|
@@ -54,7 +54,7 @@ export class SubstructureSearchDialog {
|
|
|
54
54
|
}
|
|
55
55
|
|
|
56
56
|
updateNotationDiv(): void {
|
|
57
|
-
this.units = this.col.
|
|
57
|
+
this.units = this.col.meta.units!;
|
|
58
58
|
this.separator = this.col.getTag(bioTAGS.separator);
|
|
59
59
|
const notationDiv = this.dialog.root.getElementsByClassName('notation-text')[0];
|
|
60
60
|
if (notationDiv)
|
|
@@ -63,26 +63,25 @@ export class SubstructureSearchDialog {
|
|
|
63
63
|
|
|
64
64
|
createUI(): void {
|
|
65
65
|
const dataframe = grok.shell.tv.dataFrame;
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
this.col = column;
|
|
66
|
+
this.columnsInput = ui.input.column('Column', {table: dataframe, value: this.col, onValueChanged: (input) => {
|
|
67
|
+
this.col = input.value;
|
|
69
68
|
this.updateNotationDiv();
|
|
70
69
|
this.updateInputs();
|
|
71
|
-
},
|
|
70
|
+
}, filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE});
|
|
72
71
|
|
|
73
|
-
this.substructureInput = ui.
|
|
72
|
+
this.substructureInput = ui.input.string('Substructure', {value: ''});
|
|
74
73
|
|
|
75
74
|
this.editHelmLink = ui.link('Edit helm', () => this.editHelmLinkAction(), undefined, {style: {position: 'relative', left: '95px'}});
|
|
76
75
|
|
|
77
76
|
const df = DG.DataFrame.create(1);
|
|
78
77
|
df.columns.addNewString(SUBSTR_HELM_COL_NAME).init((_i) => '');
|
|
79
78
|
df.col(SUBSTR_HELM_COL_NAME)!.semType = this.col.semType;
|
|
80
|
-
df.col(SUBSTR_HELM_COL_NAME)!.
|
|
79
|
+
df.col(SUBSTR_HELM_COL_NAME)!.meta.units = NOTATION.HELM;
|
|
81
80
|
this.grid = df.plot.grid();
|
|
82
|
-
this.separatorInput = ui.
|
|
81
|
+
this.separatorInput = ui.input.string('Separator', {value: this.separator});
|
|
83
82
|
|
|
84
83
|
this.inputsDiv = ui.div();
|
|
85
|
-
this.units = this.col.
|
|
84
|
+
this.units = this.col.meta.units!;
|
|
86
85
|
this.separator = this.col.getTag(bioTAGS.separator);
|
|
87
86
|
this.updateInputs();
|
|
88
87
|
|
|
@@ -135,7 +134,7 @@ export async function helmSubstructureSearch(substructure: string, col: DG.Colum
|
|
|
135
134
|
await invalidateMols(col, true);
|
|
136
135
|
const substructureCol: DG.Column<string> = DG.Column.string('helm', 1).init((_i) => substructure);
|
|
137
136
|
substructureCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
138
|
-
substructureCol.
|
|
137
|
+
substructureCol.meta.units = NOTATION.HELM;
|
|
139
138
|
const substructureMolsCol =
|
|
140
139
|
await getMonomericMols(substructureCol, true, col.temp[MONOMERIC_COL_TAGS.MONOMERS_DICT]);
|
|
141
140
|
const matchesCol = await grok.functions.call('Chem:searchSubstructure', {
|
|
@@ -29,7 +29,7 @@ ATC-G-TTGC--
|
|
|
29
29
|
|
|
30
30
|
const seqCol: DG.Column = df.getCol('seq');
|
|
31
31
|
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
32
|
-
seqCol.
|
|
32
|
+
seqCol.meta.units = NOTATION.FASTA;
|
|
33
33
|
seqCol.setTag(bioTAGS.alphabet, ALPHABET.DNA);
|
|
34
34
|
seqCol.setTag(bioTAGS.aligned, 'SEQ.MSA');
|
|
35
35
|
|
|
@@ -77,7 +77,7 @@ ATC-G-TTGC--
|
|
|
77
77
|
|
|
78
78
|
const seqCol: DG.Column = df.getCol('seq');
|
|
79
79
|
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
80
|
-
seqCol.
|
|
80
|
+
seqCol.meta.units = NOTATION.FASTA;
|
|
81
81
|
seqCol.setTag(bioTAGS.alphabet, ALPHABET.DNA);
|
|
82
82
|
seqCol.setTag(bioTAGS.aligned, 'SEQ');
|
|
83
83
|
|
|
@@ -120,7 +120,7 @@ ATC-G-TTGC--
|
|
|
120
120
|
|
|
121
121
|
const seqCol: DG.Column = df.getCol('seq');
|
|
122
122
|
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
123
|
-
seqCol.
|
|
123
|
+
seqCol.meta.units = NOTATION.FASTA;
|
|
124
124
|
seqCol.setTag(bioTAGS.alphabet, ALPHABET.DNA);
|
|
125
125
|
seqCol.setTag(bioTAGS.aligned, 'SEQ.MSA');
|
|
126
126
|
|
|
@@ -191,8 +191,8 @@ ATC-G-TTGC--
|
|
|
191
191
|
test('empty', async () => {
|
|
192
192
|
const df: DG.DataFrame = DG.DataFrame.fromColumns([(() => {
|
|
193
193
|
const col = DG.Column.fromStrings('seq', []);
|
|
194
|
-
col.
|
|
195
|
-
col.
|
|
194
|
+
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
195
|
+
col.meta.units = NOTATION.FASTA;
|
|
196
196
|
col.setTag(bioTAGS.alphabet, ALPHABET.DNA);
|
|
197
197
|
return col;
|
|
198
198
|
})()]);
|
|
@@ -222,7 +222,7 @@ function buildDfWithSeqCol(csv: string, notation: NOTATION, alphabet: ALPHABET,
|
|
|
222
222
|
|
|
223
223
|
const seqCol: DG.Column = df.getCol('seq');
|
|
224
224
|
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
225
|
-
seqCol.
|
|
225
|
+
seqCol.meta.units = notation;
|
|
226
226
|
seqCol.setTag(bioTAGS.alphabet, alphabet);
|
|
227
227
|
seqCol.setTag(bioTAGS.aligned, aligned);
|
|
228
228
|
|
|
@@ -13,21 +13,24 @@ import {
|
|
|
13
13
|
getUserLibSettings, setUserLibSettings, setUserLibSettingsForTests
|
|
14
14
|
} from '@datagrok-libraries/bio/src/monomer-works/lib-settings';
|
|
15
15
|
import {UserLibSettings} from '@datagrok-libraries/bio/src/monomer-works/types';
|
|
16
|
+
import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/types';
|
|
17
|
+
import {getHelmHelper, IHelmHelper} from '@datagrok-libraries/bio/src/helm/helm-helper';
|
|
16
18
|
|
|
17
19
|
import {_package} from '../package-test';
|
|
18
|
-
import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/types';
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
category('activityCliffs', async () => {
|
|
22
23
|
let viewList: DG.ViewBase[] = [];
|
|
23
24
|
let dfList: DG.DataFrame[] = [];
|
|
24
25
|
|
|
26
|
+
let helmHelper: IHelmHelper;
|
|
25
27
|
let monomerLibHelper: IMonomerLibHelper;
|
|
26
28
|
/** Backup actual user's monomer libraries settings */
|
|
27
29
|
let userLibSettings: UserLibSettings;
|
|
28
30
|
const seqEncodingFunc = DG.Func.find({name: 'macromoleculePreprocessingFunction', package: 'Bio'})[0];
|
|
29
31
|
const helmEncodingFunc = DG.Func.find({name: 'helmPreprocessingFunction', package: 'Bio'})[0];
|
|
30
32
|
before(async () => {
|
|
33
|
+
helmHelper = await getHelmHelper(); // init Helm package
|
|
31
34
|
monomerLibHelper = await getMonomerLibHelper();
|
|
32
35
|
userLibSettings = await getUserLibSettings();
|
|
33
36
|
|
|
@@ -59,7 +62,7 @@ category('activityCliffs', async () => {
|
|
|
59
62
|
|
|
60
63
|
await _testActivityCliffsOpen(actCliffsDf, DimReductionMethods.UMAP,
|
|
61
64
|
'sequence', 'Activity', 90, cliffsNum, MmDistanceFunctionsNames.LEVENSHTEIN, seqEncodingFunc);
|
|
62
|
-
});
|
|
65
|
+
}, {benchmark: true});
|
|
63
66
|
|
|
64
67
|
test('activityCliffsWithEmptyRows', async () => {
|
|
65
68
|
const actCliffsDfWithEmptyRows = await readDataframe('tests/100_3_clustests_empty_vals.csv');
|
package/src/tests/bio-tests.ts
CHANGED
|
@@ -58,7 +58,7 @@ PEPTIDE1{meI}$$$$`;
|
|
|
58
58
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
59
59
|
const seqCol: DG.Column = df.getCol('seq')!;
|
|
60
60
|
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
61
|
-
seqCol.
|
|
61
|
+
seqCol.meta.units = NOTATION.HELM;
|
|
62
62
|
const stats = getStatsForCol(seqCol, 1, splitterAsHelm);
|
|
63
63
|
|
|
64
64
|
expectObject(stats.freq, {
|
|
@@ -129,7 +129,7 @@ export async function _testGetStats(csvDfN1: string) {
|
|
|
129
129
|
const dfN1: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
|
|
130
130
|
const seqCol: DG.Column = dfN1.col('seq')!;
|
|
131
131
|
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
132
|
-
seqCol.
|
|
132
|
+
seqCol.meta.units = NOTATION.FASTA;
|
|
133
133
|
const stats = getStatsForCol(seqCol, 5, splitterAsFasta);
|
|
134
134
|
|
|
135
135
|
expectObject(stats.freq, {
|
|
@@ -159,7 +159,7 @@ export async function _testPickupPaletteN1(csvDfN1: string) {
|
|
|
159
159
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1);
|
|
160
160
|
const col: DG.Column = df.col('seq')!;
|
|
161
161
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
162
|
-
col.
|
|
162
|
+
col.meta.units = NOTATION.FASTA;
|
|
163
163
|
const cp = pickUpPalette(col);
|
|
164
164
|
|
|
165
165
|
expect(cp instanceof NucleotidesPalettes, true);
|
|
@@ -169,7 +169,7 @@ export async function _testPickupPaletteN1e(csvDfN1e: string) {
|
|
|
169
169
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfN1e);
|
|
170
170
|
const col: DG.Column = df.col('seq')!;
|
|
171
171
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
172
|
-
col.
|
|
172
|
+
col.meta.units = NOTATION.FASTA;
|
|
173
173
|
const cp = pickUpPalette(col);
|
|
174
174
|
|
|
175
175
|
expect(cp instanceof NucleotidesPalettes, true);
|
|
@@ -179,7 +179,7 @@ export async function _testPickupPaletteAA1(csvDfAA1: string) {
|
|
|
179
179
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfAA1);
|
|
180
180
|
const col: DG.Column = df.col('seq')!;
|
|
181
181
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
182
|
-
col.
|
|
182
|
+
col.meta.units = NOTATION.FASTA;
|
|
183
183
|
const cp = pickUpPalette(col);
|
|
184
184
|
|
|
185
185
|
expect(cp instanceof AminoacidsPalettes, true);
|
|
@@ -189,7 +189,7 @@ export async function _testPickupPaletteX(csvDfX: string) {
|
|
|
189
189
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csvDfX);
|
|
190
190
|
const col: DG.Column = df.col('seq')!;
|
|
191
191
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
192
|
-
col.
|
|
192
|
+
col.meta.units = NOTATION.FASTA;
|
|
193
193
|
const cp = pickUpPalette(col);
|
|
194
194
|
|
|
195
195
|
expect(cp instanceof UnknownSeqPalette, true);
|
|
@@ -18,7 +18,7 @@ seq4`;
|
|
|
18
18
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
19
19
|
const col: DG.Column = df.getCol('seq');
|
|
20
20
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
21
|
-
col.
|
|
21
|
+
col.meta.units = NOTATION.FASTA;
|
|
22
22
|
col.setTag(bioTAGS.alphabet, ALPHABET.DNA);
|
|
23
23
|
col.setTag(bioTAGS.aligned, 'SEQ');
|
|
24
24
|
|
|
@@ -33,7 +33,7 @@ seq4`;
|
|
|
33
33
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
34
34
|
const col: DG.Column = df.getCol('seq');
|
|
35
35
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
36
|
-
col.
|
|
36
|
+
col.meta.units = NOTATION.HELM;
|
|
37
37
|
// col.setTag(bio.TAGS.alphabetSize, '11');
|
|
38
38
|
col.setTag(bioTAGS.alphabetIsMultichar, 'true');
|
|
39
39
|
|
|
@@ -48,7 +48,7 @@ seq4`;
|
|
|
48
48
|
const df: DG.DataFrame = DG.DataFrame.fromCsv(csv);
|
|
49
49
|
const col: DG.Column = df.getCol('seq');
|
|
50
50
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
51
|
-
col.
|
|
51
|
+
col.meta.units = NOTATION.FASTA;
|
|
52
52
|
col.setTag(bioTAGS.alphabet, 'UN');
|
|
53
53
|
col.setTag(bioTAGS.alphabetSize, '11');
|
|
54
54
|
col.setTag(bioTAGS.alphabetIsMultichar, 'true');
|
|
@@ -135,7 +135,7 @@ RNA1{p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.p
|
|
|
135
135
|
return function(srcCol: DG.Column): DG.Column {
|
|
136
136
|
const converterSh = SeqHandler.forColumn(srcCol);
|
|
137
137
|
const resCol = converterSh.convert(tgtNotation, tgtSeparator);
|
|
138
|
-
expect(resCol.
|
|
138
|
+
expect(resCol.meta.units, tgtNotation);
|
|
139
139
|
return resCol;
|
|
140
140
|
};
|
|
141
141
|
}
|
|
@@ -99,6 +99,15 @@ C1CCCCC1
|
|
|
99
99
|
CCCCCC`,
|
|
100
100
|
neg: ['col1'],
|
|
101
101
|
},
|
|
102
|
+
'negFastaUnSingleChar': {
|
|
103
|
+
csv: `col1
|
|
104
|
+
Alanine
|
|
105
|
+
Cysteine
|
|
106
|
+
Aspartic acid
|
|
107
|
+
Glutamic acid
|
|
108
|
+
Phenylalanine`,
|
|
109
|
+
neg: ['col1']
|
|
110
|
+
},
|
|
102
111
|
|
|
103
112
|
// Same length
|
|
104
113
|
'fastaMsaSameLength': {
|
|
@@ -122,30 +131,33 @@ YN[Re]VYNR[Ac]WYV
|
|
|
122
131
|
[Me]EYVMPSFW[Me]H`,
|
|
123
132
|
pos: {'seq': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 14, true, undefined)},
|
|
124
133
|
},
|
|
134
|
+
'fastaMsaExtManyMinus': {
|
|
135
|
+
csv: `seq
|
|
136
|
+
[D-Tic]-------[D-Tyr_Et][Tyr_ab-dehydroMe][dV][Cys_SEt]N[D-Orn][D-aThr]-[Phe_4Me]
|
|
137
|
+
[Phe_2F]--------[Tyr_ab-dehydroMe][dV][Aca]N[D-Orn][D-aThr]-[Phe_4Me]
|
|
138
|
+
[D-Tic]-[Hcy]QTWQ[Phe_4NH2][D-Tyr_Et][Tyr_ab-dehydroMe][dV][Cys_SEt]----[Phe_4Me]`,
|
|
139
|
+
pos: {'seq': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 17, true, undefined)}
|
|
140
|
+
},
|
|
125
141
|
'sepSameLength': {
|
|
126
142
|
csv: `seq
|
|
127
143
|
Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2
|
|
128
144
|
Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2
|
|
129
|
-
Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2`,
|
|
130
|
-
|
|
131
|
-
}
|
|
145
|
+
Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2`,
|
|
146
|
+
pos: {'seq': new PosCol(NOTATION.SEPARATOR, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 5, true, '-')}
|
|
132
147
|
},
|
|
133
148
|
'sepMsaSameLength': {
|
|
134
149
|
csv: `seq
|
|
135
150
|
Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2
|
|
136
151
|
Ac(1)-A-A(2)-A-A-A-C(2)-A-A-A-A-C(1)-G-NH2
|
|
137
|
-
Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2`,
|
|
138
|
-
|
|
139
|
-
}
|
|
152
|
+
Ac(1)-A-A-A-A-A-A-A-A-A-A-A-A-A-C(1)-G-NH2`,
|
|
153
|
+
pos: {'seq': new PosCol(NOTATION.SEPARATOR, ALIGNMENT.SEQ, ALPHABET.UN, 5, true, '-')}
|
|
140
154
|
},
|
|
141
155
|
'helmSameLength': {
|
|
142
156
|
csv: `seq
|
|
143
157
|
PEPTIDE1{Ac(1).A.A.A.A.A.A.A.A.A.A.A.A.A.C(1).G.NH2}$$$$
|
|
144
158
|
PEPTIDE1{Ab(1).Y.V.K.H.P.F.W.R.W.Y.A.A.A.C(1).G.NH2}$$$$
|
|
145
159
|
PEPTIDE1{Ad(1).S.W.Y.C.K.H.P.M.W.A.A.A.A.C(1)-G-NH2}$$$$`,
|
|
146
|
-
pos: {
|
|
147
|
-
'seq': new PosCol(NOTATION.HELM, null, null, 19, undefined, undefined)
|
|
148
|
-
}
|
|
160
|
+
pos: {'seq': new PosCol(NOTATION.HELM, null, null, 19, undefined, undefined)}
|
|
149
161
|
},
|
|
150
162
|
};
|
|
151
163
|
|
|
@@ -493,7 +505,7 @@ export async function _testNegList(list: string[]): Promise<void> {
|
|
|
493
505
|
const col: DG.Column = DG.Column.fromList(DG.TYPE.STRING, 'col1', list);
|
|
494
506
|
const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: col});
|
|
495
507
|
if (col.semType === DG.SEMTYPE.MACROMOLECULE) {
|
|
496
|
-
const msg = `Negative test detected semType='${col.semType}', units='${col.
|
|
508
|
+
const msg = `Negative test detected semType='${col.semType}', units='${col.meta.units}'.`;
|
|
497
509
|
throw new Error(msg);
|
|
498
510
|
}
|
|
499
511
|
}
|
|
@@ -507,7 +519,7 @@ export async function _testNeg(readDf: DfReaderFunc, colName: string) {
|
|
|
507
519
|
col.semType = semType;
|
|
508
520
|
|
|
509
521
|
if (col.semType === DG.SEMTYPE.MACROMOLECULE) {
|
|
510
|
-
const msg = `Negative test detected semType='${col.semType}', units='${col.
|
|
522
|
+
const msg = `Negative test detected semType='${col.semType}', units='${col.meta.units}'.`;
|
|
511
523
|
throw new Error(msg);
|
|
512
524
|
}
|
|
513
525
|
}
|
|
@@ -522,7 +534,7 @@ export async function _testPosList(list: string[], units: NOTATION,
|
|
|
522
534
|
col.semType = semType;
|
|
523
535
|
|
|
524
536
|
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
525
|
-
expect(col.
|
|
537
|
+
expect(col.meta.units, units);
|
|
526
538
|
expect(col.getTag(bioTAGS.aligned), aligned);
|
|
527
539
|
expect(col.getTag(bioTAGS.alphabet), alphabet);
|
|
528
540
|
if (separator)
|
|
@@ -550,7 +562,7 @@ export async function _testPos(
|
|
|
550
562
|
col.semType = semType;
|
|
551
563
|
|
|
552
564
|
expect(col.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
553
|
-
expect(col.
|
|
565
|
+
expect(col.meta.units, units);
|
|
554
566
|
expect(col.getTag(bioTAGS.aligned), aligned);
|
|
555
567
|
expect(col.getTag(bioTAGS.alphabet), alphabet);
|
|
556
568
|
if (separator)
|
|
@@ -91,7 +91,7 @@ MRGGL
|
|
|
91
91
|
const srcSeq: string = wrapData[testKey].src;
|
|
92
92
|
const col = DG.Column.fromStrings('src', [srcSeq]);
|
|
93
93
|
col.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
94
|
-
col.
|
|
94
|
+
col.meta.units = NOTATION.FASTA;
|
|
95
95
|
const sh = SeqHandler.forColumn(col);
|
|
96
96
|
const srcSS = sh.getSplitted(0);
|
|
97
97
|
const wrapRes: string[] = wrapSequence(srcSS, lineWidth);
|
|
@@ -105,7 +105,7 @@ MRGGL
|
|
|
105
105
|
|
|
106
106
|
const seqCol: DG.Column = df.getCol(args.seqCol);
|
|
107
107
|
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
108
|
-
seqCol.
|
|
108
|
+
seqCol.meta.units = NOTATION.FASTA;
|
|
109
109
|
const idCols: DG.Column[] = args.idCols.map((colName) => df.getCol(colName));
|
|
110
110
|
|
|
111
111
|
const fastaRes: string = saveAsFastaDo(idCols, seqCol, args.lineWidth);
|
|
@@ -122,7 +122,7 @@ ATCGAATCGA`;
|
|
|
122
122
|
const seq2 = Array(10000).fill('FYWRRY').join('');
|
|
123
123
|
_testDistance(seq1, seq2, df, 0.667);
|
|
124
124
|
} else { _testDistance(prot5, prot6, df, 1.143); }
|
|
125
|
-
});
|
|
125
|
+
}, {benchmark: true});
|
|
126
126
|
});
|
|
127
127
|
|
|
128
128
|
async function _initMacromoleculeColumn(csv: string): Promise<SeqHandler> {
|