@datagrok/bio 2.14.2 → 2.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +42 -0
- package/css/monomer-manager.css +66 -0
- package/detectors.js +7 -2
- package/dist/111.js +1 -1
- package/dist/111.js.map +1 -1
- package/dist/234.js +1 -1
- package/dist/234.js.map +1 -1
- package/dist/242.js.map +1 -1
- package/dist/603.js +1 -1
- package/dist/603.js.map +1 -1
- package/dist/682.js +1 -1
- package/dist/682.js.map +1 -1
- package/dist/705.js +1 -1
- package/dist/705.js.map +1 -1
- package/dist/778.js +1 -1
- package/dist/778.js.map +1 -1
- package/dist/793.js +1 -1
- package/dist/793.js.map +1 -1
- package/dist/801.js +2 -0
- package/dist/801.js.map +1 -0
- package/dist/950.js +1 -1
- package/dist/950.js.map +1 -1
- package/dist/980.js +2 -0
- package/dist/980.js.map +1 -0
- package/dist/package-test.js +6 -6
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +5 -5
- package/dist/package.js.map +1 -1
- package/files/monomer-libraries/polytool-lib.json +48 -0
- package/files/monomer-libraries/sample-lib-Aca-colored.json +2 -2
- package/package.json +20 -12
- package/src/analysis/sequence-space.ts +2 -1
- package/src/demo/bio05-helm-msa-sequence-space.ts +1 -1
- package/src/package-test.ts +3 -1
- package/src/package-types.ts +9 -1
- package/src/package.ts +77 -33
- package/src/seq_align.ts +1 -1
- package/src/substructure-search/substructure-search.ts +2 -2
- package/src/tests/WebLogo-project-tests.ts +3 -4
- package/src/tests/activity-cliffs-tests.ts +5 -18
- package/src/tests/detectors-benchmark-tests.ts +24 -9
- package/src/tests/mm-distance-tests.ts +4 -3
- package/src/tests/monomer-libraries-tests.ts +3 -3
- package/src/tests/seq-handler-get-helm-tests.ts +88 -0
- package/src/tests/sequence-space-test.ts +4 -3
- package/src/tests/to-atomic-level-tests.ts +2 -0
- package/src/tests/to-atomic-level-ui-tests.ts +74 -0
- package/src/utils/cell-renderer.ts +3 -0
- package/src/utils/convert.ts +2 -2
- package/src/utils/cyclized.ts +20 -1
- package/src/utils/dimerized.ts +12 -0
- package/src/utils/get-region-func-editor.ts +1 -1
- package/src/utils/helm-to-molfile/converter/converter.ts +58 -30
- package/src/utils/helm-to-molfile/converter/mol-atoms.ts +2 -0
- package/src/utils/helm-to-molfile/converter/mol-bonds.ts +2 -0
- package/src/utils/helm-to-molfile/converter/mol-wrapper.ts +5 -1
- package/src/utils/helm-to-molfile/converter/monomer-wrapper.ts +7 -3
- package/src/utils/helm-to-molfile/converter/polymer.ts +21 -6
- package/src/utils/helm-to-molfile/converter/types.ts +11 -0
- package/src/utils/helm-to-molfile/utils.ts +11 -15
- package/src/utils/monomer-lib/lib-manager.ts +15 -1
- package/src/utils/monomer-lib/library-file-manager/file-manager.ts +1 -1
- package/src/utils/monomer-lib/library-file-manager/file-validator.ts +8 -0
- package/src/utils/monomer-lib/library-file-manager/ui.ts +150 -3
- package/src/utils/monomer-lib/monomer-lib.ts +59 -21
- package/src/utils/monomer-lib/monomer-manager/duplicate-monomer-manager.ts +155 -0
- package/src/utils/monomer-lib/monomer-manager/monomer-manager.ts +924 -0
- package/src/utils/multiple-sequence-alignment-ui.ts +3 -3
- package/src/utils/seq-helper/index.ts +1 -0
- package/src/utils/seq-helper/seq-helper.ts +131 -0
- package/src/utils/sequence-to-mol.ts +47 -18
- package/src/widgets/bio-substructure-filter.ts +9 -7
- package/src/widgets/package-settings-editor-widget.ts +6 -6
- package/src/widgets/representations.ts +12 -12
- package/dist/449.js +0 -2
- package/dist/449.js.map +0 -1
- /package/src/tests/{seq-handler-get-region.ts → seq-handler-get-region-tests.ts} +0 -0
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"monomerType": "Backbone",
|
|
4
|
+
"smiles": "[H:2]N[H:1]",
|
|
5
|
+
"name": "NH2",
|
|
6
|
+
"author": "Datagrok",
|
|
7
|
+
"molfile": "\n MJ201900 \n\n 3 2 0 0 0 0 0 0 0 0999 V2000\n -1.0496 -1.2500 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0\n -0.3572 -0.8250 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n 0.3572 -1.2375 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0\n 2 3 1 0 0 0 0\n 1 2 1 0 0 0 0\nM RGP 2 1 2 3 1\nM END\n",
|
|
8
|
+
"naturalAnalog": "",
|
|
9
|
+
"rgroups": [
|
|
10
|
+
{
|
|
11
|
+
"capGroupSMILES": "[*:1][H]",
|
|
12
|
+
"alternateId": "R1-H",
|
|
13
|
+
"capGroupName": "H",
|
|
14
|
+
"label": "R1"
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"capGroupSMILES": "[*:2][H]",
|
|
18
|
+
"alternateId": "R2-H",
|
|
19
|
+
"capGroupName": "H",
|
|
20
|
+
"label": "R2"
|
|
21
|
+
}
|
|
22
|
+
],
|
|
23
|
+
"createDate": null,
|
|
24
|
+
"id": 0,
|
|
25
|
+
"polymerType": "PEPTIDE",
|
|
26
|
+
"symbol": "NH2"
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
"monomerType": "Backbone",
|
|
30
|
+
"smiles": "[H:1]C(=O)O",
|
|
31
|
+
"name": "COOH",
|
|
32
|
+
"author": "Datagrok",
|
|
33
|
+
"molfile": "\n RDKit 2D\n\n 4 3 0 0 0 0 0 0 0 0999 V2000\n -1.2990 -0.7500 0.0000 R# 0 0 0 0 0 1 0 0 0 0 0 0\n 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -0.0000 1.5000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 1.2990 -0.7500 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0\n 2 3 2 0\n 2 4 1 0\nM RGP 1 1 1\nM END\n",
|
|
34
|
+
"naturalAnalog": "",
|
|
35
|
+
"rgroups": [
|
|
36
|
+
{
|
|
37
|
+
"capGroupSMILES": "[*:1][H]",
|
|
38
|
+
"alternateId": "R1-H",
|
|
39
|
+
"capGroupName": "H",
|
|
40
|
+
"label": "R1"
|
|
41
|
+
}
|
|
42
|
+
],
|
|
43
|
+
"createDate": null,
|
|
44
|
+
"id": 0,
|
|
45
|
+
"polymerType": "PEPTIDE",
|
|
46
|
+
"symbol": "COOH"
|
|
47
|
+
}
|
|
48
|
+
]
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
[
|
|
2
2
|
{
|
|
3
3
|
"monomerType": "Backbone",
|
|
4
|
-
"smiles": "
|
|
4
|
+
"smiles": "",
|
|
5
5
|
"name": "2-Aminocapric acid",
|
|
6
6
|
"author": "Pistoia Alliance HELM project",
|
|
7
|
-
"molfile": "
|
|
7
|
+
"molfile": "\n RDKit 2D\n\n 13 13 0 0 0 0 0 0 0 0999 V2000\n 2.6980 -1.9038 0.0000 R# 0 0 0 0 0 1 0 0 0 0 0 0\n 1.3990 -1.1538 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n 1.3990 0.3462 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 0.0999 1.0962 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -1.1991 0.3462 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -1.1991 -1.1538 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -2.4982 -1.9038 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -3.7972 -1.1538 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n -3.7972 0.3462 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -2.4982 1.0962 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 2.6980 1.0962 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 2.6980 2.5962 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 3.9970 0.3462 0.0000 R# 0 0 0 0 0 1 0 0 0 0 0 0\n 1 2 1 0\n 2 3 1 0\n 3 4 1 1\n 4 5 1 0\n 5 6 2 0\n 6 7 1 0\n 7 8 2 0\n 8 9 1 0\n 9 10 2 0\n 3 11 1 0\n 11 12 2 0\n 11 13 1 0\n 10 5 1 0\nM RGP 2 1 1 13 2\nM END\n",
|
|
8
8
|
"naturalAnalog": "X",
|
|
9
9
|
"rgroups": [
|
|
10
10
|
{
|
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Aleksandr Tanas",
|
|
6
6
|
"email": "atanas@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.
|
|
8
|
+
"version": "2.15.0",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -37,18 +37,18 @@
|
|
|
37
37
|
],
|
|
38
38
|
"dependencies": {
|
|
39
39
|
"@biowasm/aioli": "^3.1.0",
|
|
40
|
-
"@datagrok-libraries/bio": "^5.42.
|
|
40
|
+
"@datagrok-libraries/bio": "^5.42.14",
|
|
41
41
|
"@datagrok-libraries/chem-meta": "^1.2.5",
|
|
42
|
-
"@datagrok-libraries/math": "^1.
|
|
43
|
-
"@datagrok-libraries/ml": "^6.
|
|
44
|
-
"@datagrok-libraries/tutorials": "^1.
|
|
45
|
-
"@datagrok-libraries/utils": "^4.
|
|
42
|
+
"@datagrok-libraries/math": "^1.2.0",
|
|
43
|
+
"@datagrok-libraries/ml": "^6.7.0",
|
|
44
|
+
"@datagrok-libraries/tutorials": "^1.4.0",
|
|
45
|
+
"@datagrok-libraries/utils": "^4.3.0",
|
|
46
46
|
"@webgpu/types": "^0.1.40",
|
|
47
47
|
"ajv": "^8.12.0",
|
|
48
48
|
"ajv-errors": "^3.0.0",
|
|
49
49
|
"cash-dom": "^8.0.0",
|
|
50
50
|
"css-loader": "^6.7.3",
|
|
51
|
-
"datagrok-api": "^1.
|
|
51
|
+
"datagrok-api": "^1.21.1",
|
|
52
52
|
"dayjs": "^1.11.4",
|
|
53
53
|
"fastest-levenshtein": "^1.0.16",
|
|
54
54
|
"openchemlib": "^7.2.3",
|
|
@@ -58,11 +58,11 @@
|
|
|
58
58
|
"wu": "latest"
|
|
59
59
|
},
|
|
60
60
|
"devDependencies": {
|
|
61
|
-
"@datagrok-libraries/helm-web-editor": "^1.1.
|
|
62
|
-
"@datagrok-libraries/js-draw-lite": "^0.0.
|
|
63
|
-
"@datagrok/chem": "^1.
|
|
64
|
-
"@datagrok/dendrogram": "^1.2.
|
|
65
|
-
"@datagrok/helm": "
|
|
61
|
+
"@datagrok-libraries/helm-web-editor": "^1.1.11",
|
|
62
|
+
"@datagrok-libraries/js-draw-lite": "^0.0.8",
|
|
63
|
+
"@datagrok/chem": "^1.12.0",
|
|
64
|
+
"@datagrok/dendrogram": "^1.2.33",
|
|
65
|
+
"@datagrok/helm": "latest",
|
|
66
66
|
"@types/node": "^17.0.24",
|
|
67
67
|
"@types/wu": "latest",
|
|
68
68
|
"@typescript-eslint/eslint-plugin": "latest",
|
|
@@ -141,6 +141,14 @@
|
|
|
141
141
|
"Notation...": null,
|
|
142
142
|
"SDF to JSON Library...": null,
|
|
143
143
|
"To Atomic Level...": null
|
|
144
|
+
},
|
|
145
|
+
"Manage": {
|
|
146
|
+
"Monomer Libraries": null
|
|
147
|
+
},
|
|
148
|
+
"PolyTool": {
|
|
149
|
+
"Convert...": null,
|
|
150
|
+
"Enumerate Chem...": null,
|
|
151
|
+
"Enumerate HELM...": null
|
|
144
152
|
}
|
|
145
153
|
}
|
|
146
154
|
}
|
|
@@ -56,7 +56,8 @@ export async function getEncodedSeqSpaceCol(
|
|
|
56
56
|
monomerHashToMatrixMap[charCodeMap.get(key)!] = value;
|
|
57
57
|
});
|
|
58
58
|
// sets distance function args in place.
|
|
59
|
-
|
|
59
|
+
const maxLength = encList.reduce((acc, val) => Math.max(acc, val.length), 0);
|
|
60
|
+
options = {scoringMatrix: monomerRes.scoringMatrix, alphabetIndexes: monomerHashToMatrixMap, maxLength};
|
|
60
61
|
}
|
|
61
62
|
return {seqList: encList, options};
|
|
62
63
|
}
|
|
@@ -93,7 +93,7 @@ export async function demoBio05UI(): Promise<void> {
|
|
|
93
93
|
.step('Build sequence space', async () => {
|
|
94
94
|
const preprocessingFunc = DG.Func.find({package: 'Bio', name: 'macromoleculePreprocessingFunction'})[0];
|
|
95
95
|
ssViewer = (await sequenceSpaceTopMenu(df, msaHelmCol,
|
|
96
|
-
dimRedMethod, MmDistanceFunctionsNames.LEVENSHTEIN, true, preprocessingFunc)) as DG.ScatterPlotViewer;
|
|
96
|
+
dimRedMethod, MmDistanceFunctionsNames.LEVENSHTEIN, true, preprocessingFunc, undefined, undefined, true)) as DG.ScatterPlotViewer;
|
|
97
97
|
view.dockManager.dock(ssViewer, DG.DOCK_TYPE.RIGHT, null, 'Sequence Space', 0.35);
|
|
98
98
|
}, {
|
|
99
99
|
description: 'Reduce sequence space dimensionality to display on 2D representation.',
|
package/src/package-test.ts
CHANGED
|
@@ -28,8 +28,10 @@ import './tests/pepsea-tests';
|
|
|
28
28
|
import './tests/viewers';
|
|
29
29
|
import './tests/seq-handler-tests';
|
|
30
30
|
import './tests/seq-handler-splitted-tests';
|
|
31
|
-
import './tests/seq-handler-get-region';
|
|
31
|
+
import './tests/seq-handler-get-region-tests';
|
|
32
|
+
import './tests/seq-handler-get-helm-tests';
|
|
32
33
|
import './tests/to-atomic-level-tests';
|
|
34
|
+
import './tests/to-atomic-level-ui-tests';
|
|
33
35
|
import './tests/mm-distance-tests';
|
|
34
36
|
import './tests/activity-cliffs-tests';
|
|
35
37
|
import './tests/sequence-space-test';
|
package/src/package-types.ts
CHANGED
|
@@ -6,6 +6,7 @@ import {Observable, Subject} from 'rxjs';
|
|
|
6
6
|
|
|
7
7
|
import {errInfo} from '@datagrok-libraries/bio/src/utils/err-info';
|
|
8
8
|
import {LoggerWrapper} from '@datagrok-libraries/bio/src/utils/logger';
|
|
9
|
+
import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
|
|
9
10
|
|
|
10
11
|
/** Names of package properties/settings declared in properties section of {@link './package.json'} */
|
|
11
12
|
export const enum BioPackagePropertiesNames {
|
|
@@ -58,6 +59,10 @@ export class BioPackageProperties extends Map<string, any> {
|
|
|
58
59
|
|
|
59
60
|
export class BioPackage extends DG.Package {
|
|
60
61
|
private _properties: BioPackageProperties;
|
|
62
|
+
|
|
63
|
+
private _rdKitModule: RDModule;
|
|
64
|
+
public get rdKitModule(): RDModule { return this._rdKitModule;};
|
|
65
|
+
|
|
61
66
|
/** Package properties/settings declared in properties section of {@link './package.json'} */
|
|
62
67
|
public get properties(): BioPackageProperties { return this._properties; };
|
|
63
68
|
|
|
@@ -73,7 +78,10 @@ export class BioPackage extends DG.Package {
|
|
|
73
78
|
super._logger = new LoggerWrapper(super.logger, opts.debug);
|
|
74
79
|
}
|
|
75
80
|
|
|
76
|
-
public completeInit(): void {
|
|
81
|
+
public completeInit(rdKitModule: RDModule): void {
|
|
82
|
+
this._rdKitModule = rdKitModule;
|
|
83
|
+
this._initialized = true;
|
|
84
|
+
}
|
|
77
85
|
|
|
78
86
|
handleErrorUI(err: any) {
|
|
79
87
|
const [errMsg, errStack] = errInfo(err);
|
package/src/package.ts
CHANGED
|
@@ -20,6 +20,22 @@ import {
|
|
|
20
20
|
createJsonMonomerLibFromSdf, IMonomerLibHelper
|
|
21
21
|
} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
22
22
|
import {errInfo} from '@datagrok-libraries/bio/src/utils/err-info';
|
|
23
|
+
import {ActivityCliffsEditor} from '@datagrok-libraries/ml/src/functionEditors/activity-cliffs-function-editor';
|
|
24
|
+
import BitArray from '@datagrok-libraries/utils/src/bit-array';
|
|
25
|
+
import {BYPASS_LARGE_DATA_WARNING} from '@datagrok-libraries/ml/src/functionEditors/consts';
|
|
26
|
+
import {
|
|
27
|
+
getEmbeddingColsNames, multiColReduceDimensionality
|
|
28
|
+
} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/reduce-dimensionality';
|
|
29
|
+
import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/types';
|
|
30
|
+
import {
|
|
31
|
+
ITSNEOptions, IUMAPOptions
|
|
32
|
+
} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/multi-column-dim-reducer';
|
|
33
|
+
import {generateLongSequence, generateLongSequence2} from '@datagrok-libraries/bio/src/utils/generator';
|
|
34
|
+
import {getUserLibSettings, setUserLibSettings} from '@datagrok-libraries/bio/src/monomer-works/lib-settings';
|
|
35
|
+
import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
|
|
36
|
+
import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
|
|
37
|
+
import {getRdKitModule} from '@datagrok-libraries/bio/src/chem/rdkit-module';
|
|
38
|
+
import {getHelmHelper, IHelmHelper} from '@datagrok-libraries/bio/src/helm/helm-helper';
|
|
23
39
|
|
|
24
40
|
import {getMacromoleculeColumns} from './utils/ui-utils';
|
|
25
41
|
import {
|
|
@@ -40,7 +56,8 @@ import {saveAsFastaUI} from './utils/save-as-fasta';
|
|
|
40
56
|
import {BioSubstructureFilter} from './widgets/bio-substructure-filter';
|
|
41
57
|
import {WebLogoViewer} from './viewers/web-logo-viewer';
|
|
42
58
|
import {MonomerLibManager} from './utils/monomer-lib/lib-manager';
|
|
43
|
-
import {getMonomerLibraryManagerLink, showManageLibrariesDialog
|
|
59
|
+
import {getMonomerLibraryManagerLink, showManageLibrariesDialog,
|
|
60
|
+
showManageLibrariesView} from './utils/monomer-lib/library-file-manager/ui';
|
|
44
61
|
import {demoBio01UI} from './demo/bio01-similarity-diversity';
|
|
45
62
|
import {demoBio01aUI} from './demo/bio01a-hierarchical-clustering-and-sequence-space';
|
|
46
63
|
import {demoBio01bUI} from './demo/bio01b-hierarchical-clustering-and-activity-cliffs';
|
|
@@ -62,23 +79,12 @@ import {GetRegionApp} from './apps/get-region-app';
|
|
|
62
79
|
import {GetRegionFuncEditor} from './utils/get-region-func-editor';
|
|
63
80
|
import {sequenceToMolfile} from './utils/sequence-to-mol';
|
|
64
81
|
import {detectMacromoleculeProbeDo} from './utils/detect-macromolecule-probe';
|
|
65
|
-
import {ActivityCliffsEditor} from '@datagrok-libraries/ml/src/functionEditors/activity-cliffs-function-editor';
|
|
66
|
-
import BitArray from '@datagrok-libraries/utils/src/bit-array';
|
|
67
|
-
import {BYPASS_LARGE_DATA_WARNING} from '@datagrok-libraries/ml/src/functionEditors/consts';
|
|
68
|
-
import {
|
|
69
|
-
getEmbeddingColsNames, multiColReduceDimensionality
|
|
70
|
-
} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/reduce-dimensionality';
|
|
71
|
-
import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/types';
|
|
72
|
-
import {
|
|
73
|
-
ITSNEOptions, IUMAPOptions
|
|
74
|
-
} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/multi-column-dim-reducer';
|
|
75
|
-
import {generateLongSequence, generateLongSequence2} from '@datagrok-libraries/bio/src/utils/generator';
|
|
76
|
-
|
|
77
82
|
import {CyclizedNotationProvider} from './utils/cyclized';
|
|
83
|
+
import {DimerizedNotationProvider} from './utils/dimerized';
|
|
78
84
|
import {getMolColumnFromHelm} from './utils/helm-to-molfile/utils';
|
|
79
|
-
import {
|
|
80
|
-
import {getUserLibSettings, setUserLibSettings} from '@datagrok-libraries/bio/src/monomer-works/lib-settings';
|
|
85
|
+
import {MonomerManager} from './utils/monomer-lib/monomer-manager/monomer-manager';
|
|
81
86
|
import {calculateScoresWithEmptyValues} from './utils/calculate-scores';
|
|
87
|
+
import {SeqHelper} from './utils/seq-helper/seq-helper';
|
|
82
88
|
|
|
83
89
|
export const _package = new BioPackage(/*{debug: true}/**/);
|
|
84
90
|
|
|
@@ -113,11 +119,12 @@ let monomerSets: IMonomerSet | null = null;
|
|
|
113
119
|
export async function initBio() {
|
|
114
120
|
const logPrefix = 'Bio: _package.initBio()';
|
|
115
121
|
_package.logger.debug(`${logPrefix}, start`);
|
|
116
|
-
|
|
122
|
+
let rdKitModule!: RDModule;
|
|
123
|
+
let monomerLibManager: MonomerLibManager;
|
|
117
124
|
const t1: number = window.performance.now();
|
|
118
125
|
await Promise.all([
|
|
119
126
|
(async () => {
|
|
120
|
-
|
|
127
|
+
monomerLibManager = await MonomerLibManager.getInstance();
|
|
121
128
|
// Fix user lib settings for explicit stuck from a terminated test
|
|
122
129
|
const libSettings = await getUserLibSettings();
|
|
123
130
|
if (libSettings.explicit) {
|
|
@@ -133,12 +140,14 @@ export async function initBio() {
|
|
|
133
140
|
const bioPkgProps = new BioPackageProperties(pkgProps);
|
|
134
141
|
_package.properties = bioPkgProps;
|
|
135
142
|
})(),
|
|
143
|
+
(async () => { rdKitModule = await getRdKitModule(); })(),
|
|
136
144
|
]).finally(() => {
|
|
137
|
-
_package.completeInit();
|
|
138
145
|
const t2: number = window.performance.now();
|
|
139
146
|
_package.logger.debug(`${logPrefix}, loading ET: ${t2 - t1} ms`);
|
|
140
147
|
});
|
|
141
148
|
|
|
149
|
+
_package.completeInit(rdKitModule);
|
|
150
|
+
|
|
142
151
|
const monomers: string[] = [];
|
|
143
152
|
const logPs: number[] = [];
|
|
144
153
|
|
|
@@ -146,7 +155,7 @@ export async function initBio() {
|
|
|
146
155
|
Object.keys(series).forEach((symbol) => {
|
|
147
156
|
monomers.push(symbol);
|
|
148
157
|
const block = series[symbol].replaceAll('#R', 'O ');
|
|
149
|
-
const mol =
|
|
158
|
+
const mol = rdKitModule.get_mol(block);
|
|
150
159
|
const logP = JSON.parse(mol.get_descriptors()).CrippenClogP;
|
|
151
160
|
logPs.push(logP);
|
|
152
161
|
mol?.delete();
|
|
@@ -259,7 +268,7 @@ export function SplitToMonomersEditor(call: DG.FuncCall): void {
|
|
|
259
268
|
//input: funccall call
|
|
260
269
|
export function SequenceSpaceEditor(call: DG.FuncCall) {
|
|
261
270
|
const funcEditor = new DimReductionBaseEditor({semtype: DG.SEMTYPE.MACROMOLECULE});
|
|
262
|
-
ui.dialog({title: 'Sequence Space'})
|
|
271
|
+
const dialog = ui.dialog({title: 'Sequence Space'})
|
|
263
272
|
.add(funcEditor.getEditor())
|
|
264
273
|
.onOK(async () => {
|
|
265
274
|
const params = funcEditor.getParams();
|
|
@@ -273,8 +282,9 @@ export function SequenceSpaceEditor(call: DG.FuncCall) {
|
|
|
273
282
|
preprocessingFunction: params.preprocessingFunction,
|
|
274
283
|
clusterEmbeddings: params.clusterEmbeddings,
|
|
275
284
|
}).call();
|
|
276
|
-
})
|
|
277
|
-
|
|
285
|
+
});
|
|
286
|
+
dialog.history(() => ({editorSettings: funcEditor.getStringInput()}), (x: any) => funcEditor.applyStringInput(x['editorSettings']));
|
|
287
|
+
dialog.show();
|
|
278
288
|
}
|
|
279
289
|
|
|
280
290
|
//name: SeqActivityCliffsEditor
|
|
@@ -282,7 +292,7 @@ export function SequenceSpaceEditor(call: DG.FuncCall) {
|
|
|
282
292
|
//input: funccall call
|
|
283
293
|
export function SeqActivityCliffsEditor(call: DG.FuncCall) {
|
|
284
294
|
const funcEditor = new ActivityCliffsEditor({semtype: DG.SEMTYPE.MACROMOLECULE});
|
|
285
|
-
ui.dialog({title: 'Activity Cliffs'})
|
|
295
|
+
const dialog = ui.dialog({title: 'Activity Cliffs'})
|
|
286
296
|
.add(funcEditor.getEditor())
|
|
287
297
|
.onOK(async () => {
|
|
288
298
|
const params = funcEditor.getParams();
|
|
@@ -296,7 +306,9 @@ export function SeqActivityCliffsEditor(call: DG.FuncCall) {
|
|
|
296
306
|
preprocessingFunction: params.preprocessingFunction,
|
|
297
307
|
options: params.options,
|
|
298
308
|
}).call();
|
|
299
|
-
})
|
|
309
|
+
});
|
|
310
|
+
dialog.history(() => ({editorSettings: funcEditor.getStringInput()}), (x: any) => funcEditor.applyStringInput(x['editorSettings']));
|
|
311
|
+
dialog.show();
|
|
300
312
|
}
|
|
301
313
|
|
|
302
314
|
// -- Package settings editor --
|
|
@@ -370,7 +382,7 @@ export function macromoleculeDifferenceCellRenderer(): MacromoleculeDifferenceCe
|
|
|
370
382
|
//output: object res
|
|
371
383
|
export function sequenceAlignment(alignType: string, alignTable: string, gap: number, seq1: string, seq2: string) {
|
|
372
384
|
const toAlign = new SequenceAlignment(seq1, seq2, gap, alignTable);
|
|
373
|
-
const res = alignType == 'Local alignment' ? toAlign.smithWaterman() : toAlign.
|
|
385
|
+
const res = alignType == 'Local alignment' ? toAlign.smithWaterman() : toAlign.needlemanWunsch();
|
|
374
386
|
return res;
|
|
375
387
|
}
|
|
376
388
|
|
|
@@ -411,6 +423,14 @@ export function getRegion(
|
|
|
411
423
|
start ?? null, end ?? null, name ?? null);
|
|
412
424
|
}
|
|
413
425
|
|
|
426
|
+
//top-menu: Bio | Manage | Monomers
|
|
427
|
+
//name: manageMonomersView
|
|
428
|
+
//description: Edit and create monomers
|
|
429
|
+
export async function manageMonomersView() {
|
|
430
|
+
const monomerManager = await MonomerManager.getInstance();
|
|
431
|
+
await monomerManager.getViewRoot();
|
|
432
|
+
}
|
|
433
|
+
|
|
414
434
|
//top-menu: Bio | Convert | Get Region...
|
|
415
435
|
//name: Get Region Top Menu
|
|
416
436
|
//description: Get sequences for a region specified from a Macromolecule
|
|
@@ -568,13 +588,16 @@ export async function helmPreprocessingFunction(
|
|
|
568
588
|
//input: func preprocessingFunction {optional: true}
|
|
569
589
|
//input: object options {optional: true}
|
|
570
590
|
//input: bool clusterEmbeddings = true { optional: true }
|
|
591
|
+
//input: bool isDemo {optional: true}
|
|
571
592
|
//output: viewer result
|
|
572
593
|
//editor: Bio:SequenceSpaceEditor
|
|
573
594
|
export async function sequenceSpaceTopMenu(table: DG.DataFrame, molecules: DG.Column,
|
|
574
595
|
methodName: DimReductionMethods, similarityMetric: BitArrayMetrics | MmDistanceFunctionsNames,
|
|
575
596
|
plotEmbeddings: boolean, preprocessingFunction?: DG.Func, options?: (IUMAPOptions | ITSNEOptions) & Options,
|
|
576
|
-
clusterEmbeddings?: boolean
|
|
597
|
+
clusterEmbeddings?: boolean, isDemo?: boolean
|
|
577
598
|
): Promise<DG.ScatterPlotViewer | undefined> {
|
|
599
|
+
const tableView = isDemo ? (grok.shell.view('Browse')! as DG.BrowseView)!.preview! as DG.TableView :
|
|
600
|
+
grok.shell.tv.dataFrame == table ? grok.shell.tv : undefined;
|
|
578
601
|
if (!checkInputColumnUI(molecules, 'Sequence Space'))
|
|
579
602
|
return;
|
|
580
603
|
if (!preprocessingFunction)
|
|
@@ -583,10 +606,12 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, molecules: DG.Co
|
|
|
583
606
|
const res = await multiColReduceDimensionality(table, [molecules], methodName,
|
|
584
607
|
[similarityMetric as KnownMetrics], [1], [preprocessingFunction], 'MANHATTAN',
|
|
585
608
|
plotEmbeddings, clusterEmbeddings ?? false,
|
|
586
|
-
{...options, preprocessingFuncArgs: [options.preprocessingFuncArgs ?? {}]},
|
|
609
|
+
/* dimRedOptions */ {...options, preprocessingFuncArgs: [options.preprocessingFuncArgs ?? {}]},
|
|
610
|
+
/* uiOptions */{
|
|
587
611
|
fastRowCount: 10000,
|
|
588
612
|
scatterPlotName: 'Sequence space',
|
|
589
613
|
bypassLargeDataWarning: options?.[BYPASS_LARGE_DATA_WARNING],
|
|
614
|
+
tableView: tableView,
|
|
590
615
|
});
|
|
591
616
|
return res;
|
|
592
617
|
}
|
|
@@ -595,8 +620,9 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, molecules: DG.Co
|
|
|
595
620
|
//name: To Atomic Level
|
|
596
621
|
//description: Converts sequences to molblocks
|
|
597
622
|
//input: dataframe table [Input data table]
|
|
598
|
-
//input: column
|
|
599
|
-
//input: bool nonlinear =false {description: Slower mode for cycling/branching HELM structures}
|
|
623
|
+
//input: column seqCol {caption: Sequence; semType: Macromolecule}
|
|
624
|
+
//input: bool nonlinear =false {caption: Non linear; description: Slower mode for cycling/branching HELM structures}
|
|
625
|
+
//output:
|
|
600
626
|
export async function toAtomicLevel(table: DG.DataFrame, seqCol: DG.Column, nonlinear: boolean): Promise<void> {
|
|
601
627
|
const pi = DG.TaskBarProgressIndicator.create('Converting to atomic level ...');
|
|
602
628
|
try {
|
|
@@ -893,13 +919,18 @@ export async function sequenceSimilarityScoring(
|
|
|
893
919
|
}
|
|
894
920
|
|
|
895
921
|
|
|
896
|
-
//top-menu: Bio | Manage | Monomer Libraries
|
|
897
922
|
//name: Manage Monomer Libraries
|
|
898
923
|
//description: Manage HELM monomer libraries
|
|
899
924
|
export async function manageMonomerLibraries(): Promise<void> {
|
|
900
925
|
showManageLibrariesDialog();
|
|
901
926
|
}
|
|
902
927
|
|
|
928
|
+
//top-menu: Bio | Manage | Monomer Libraries
|
|
929
|
+
//name: Manage Monomer Libraries View
|
|
930
|
+
export async function manageLibrariesView(): Promise<void> {
|
|
931
|
+
await showManageLibrariesView();
|
|
932
|
+
}
|
|
933
|
+
|
|
903
934
|
//name: saveAsFasta
|
|
904
935
|
//description: As FASTA...
|
|
905
936
|
//tags: fileExporter
|
|
@@ -1076,13 +1107,19 @@ export async function detectMacromoleculeProbe(file: DG.FileInfo, colName: strin
|
|
|
1076
1107
|
await detectMacromoleculeProbeDo(csv, colName, probeCount);
|
|
1077
1108
|
}
|
|
1078
1109
|
|
|
1110
|
+
//name: getSeqHelper
|
|
1111
|
+
//output: object result
|
|
1112
|
+
export async function getSeqHelper(): Promise<ISeqHelper> {
|
|
1113
|
+
return SeqHelper.getInstance();
|
|
1114
|
+
}
|
|
1115
|
+
|
|
1079
1116
|
//name: getMolFromHelm
|
|
1080
1117
|
//input: dataframe df
|
|
1081
1118
|
//input: column helmCol
|
|
1082
|
-
//input: bool chiralityEngine
|
|
1119
|
+
//input: bool chiralityEngine = true
|
|
1083
1120
|
//output: column result
|
|
1084
|
-
export
|
|
1085
|
-
df: DG.DataFrame, helmCol: DG.Column<string>, chiralityEngine
|
|
1121
|
+
export function getMolFromHelm(
|
|
1122
|
+
df: DG.DataFrame, helmCol: DG.Column<string>, chiralityEngine: boolean
|
|
1086
1123
|
): Promise<DG.Column<string>> {
|
|
1087
1124
|
return getMolColumnFromHelm(df, helmCol, chiralityEngine);
|
|
1088
1125
|
}
|
|
@@ -1095,3 +1132,10 @@ export async function getMolFromHelm(
|
|
|
1095
1132
|
export function applyNotationProviderForCyclized(col: DG.Column<string>, separator: string) {
|
|
1096
1133
|
col.temp[SeqTemps.notationProvider] = new CyclizedNotationProvider(separator);
|
|
1097
1134
|
}
|
|
1135
|
+
|
|
1136
|
+
//name: applyNotationProviderForDimerized
|
|
1137
|
+
//input: column col
|
|
1138
|
+
//input: string separator
|
|
1139
|
+
export function applyNotationProviderForDimerized(col: DG.Column<string>, separator: string) {
|
|
1140
|
+
col.temp[SeqTemps.notationProvider] = new DimerizedNotationProvider(separator);
|
|
1141
|
+
}
|
package/src/seq_align.ts
CHANGED
|
@@ -63,8 +63,8 @@ export class SubstructureSearchDialog {
|
|
|
63
63
|
|
|
64
64
|
createUI(): void {
|
|
65
65
|
const dataframe = grok.shell.tv.dataFrame;
|
|
66
|
-
this.columnsInput = ui.input.column('Column', {table: dataframe, value: this.col, onValueChanged: (
|
|
67
|
-
this.col =
|
|
66
|
+
this.columnsInput = ui.input.column('Column', {table: dataframe, value: this.col, onValueChanged: (value) => {
|
|
67
|
+
this.col = value;
|
|
68
68
|
this.updateNotationDiv();
|
|
69
69
|
this.updateInputs();
|
|
70
70
|
}, filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE});
|
|
@@ -51,13 +51,12 @@ export async function uploadProject(projectName: string, tableInfo: DG.TableInfo
|
|
|
51
51
|
const project = DG.Project.create();
|
|
52
52
|
const viewLayout = view.saveLayout();
|
|
53
53
|
|
|
54
|
-
await grok.dapi.layouts.save(view.saveLayout());
|
|
55
|
-
await grok.dapi.tables.uploadDataFrame(df);
|
|
56
|
-
await grok.dapi.tables.save(tableInfo);
|
|
57
|
-
|
|
58
54
|
project.name = projectName;
|
|
59
55
|
project.addChild(tableInfo);
|
|
60
56
|
project.addChild(viewLayout); // cause error
|
|
61
57
|
|
|
58
|
+
await grok.dapi.layouts.save(view.saveLayout());
|
|
59
|
+
await grok.dapi.tables.uploadDataFrame(df);
|
|
60
|
+
await grok.dapi.tables.save(tableInfo);
|
|
62
61
|
await grok.dapi.projects.save(project);
|
|
63
62
|
}
|
|
@@ -20,9 +20,6 @@ import {_package} from '../package-test';
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
category('activityCliffs', async () => {
|
|
23
|
-
let viewList: DG.ViewBase[] = [];
|
|
24
|
-
let dfList: DG.DataFrame[] = [];
|
|
25
|
-
|
|
26
23
|
let helmHelper: IHelmHelper;
|
|
27
24
|
let monomerLibHelper: IMonomerLibHelper;
|
|
28
25
|
/** Backup actual user's monomer libraries settings */
|
|
@@ -37,38 +34,28 @@ category('activityCliffs', async () => {
|
|
|
37
34
|
// Test 'helm' requires default monomer library loaded
|
|
38
35
|
await setUserLibSettingsForTests();
|
|
39
36
|
await monomerLibHelper.loadMonomerLib(true); // load default libraries
|
|
40
|
-
|
|
41
|
-
viewList = [];
|
|
42
|
-
dfList = [];
|
|
43
37
|
});
|
|
44
38
|
|
|
45
39
|
after(async () => {
|
|
46
|
-
// for (const df of dfList) grok.shell.closeTable(df);
|
|
47
|
-
// for (const view of viewList) view.close();
|
|
48
|
-
|
|
49
40
|
// UserDataStorage.put() replaces existing data
|
|
50
41
|
await setUserLibSettings(userLibSettings);
|
|
51
42
|
await monomerLibHelper.loadMonomerLib(true); // load user settings libraries
|
|
52
43
|
});
|
|
53
44
|
|
|
54
45
|
test('activityCliffsOpens', async () => {
|
|
55
|
-
const
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
46
|
+
const testData = !DG.Test.isInBenchmark ?
|
|
47
|
+
{fileName: 'tests/100_3_clustests.csv', tgt: {cliffCount: 3}} :
|
|
48
|
+
{fileName: 'tests/peptides_motif-with-random_10000.csv', tgt: {cliffCount: 53}};
|
|
49
|
+
const actCliffsDf = await readDataframe(testData.fileName);
|
|
59
50
|
const actCliffsTableView = grok.shell.addTableView(actCliffsDf);
|
|
60
|
-
viewList.push(actCliffsTableView);
|
|
61
|
-
const cliffsNum = DG.Test.isInBenchmark ? 6 : 3;
|
|
62
51
|
|
|
63
52
|
await _testActivityCliffsOpen(actCliffsDf, DimReductionMethods.UMAP,
|
|
64
|
-
'sequence', 'Activity', 90,
|
|
53
|
+
'sequence', 'Activity', 90, testData.tgt.cliffCount, MmDistanceFunctionsNames.LEVENSHTEIN, seqEncodingFunc);
|
|
65
54
|
}, {benchmark: true});
|
|
66
55
|
|
|
67
56
|
test('activityCliffsWithEmptyRows', async () => {
|
|
68
57
|
const actCliffsDfWithEmptyRows = await readDataframe('tests/100_3_clustests_empty_vals.csv');
|
|
69
|
-
dfList.push(actCliffsDfWithEmptyRows);
|
|
70
58
|
const actCliffsTableViewWithEmptyRows = grok.shell.addTableView(actCliffsDfWithEmptyRows);
|
|
71
|
-
viewList.push(actCliffsTableViewWithEmptyRows);
|
|
72
59
|
|
|
73
60
|
await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, DimReductionMethods.UMAP,
|
|
74
61
|
'sequence', 'Activity', 90, 3, MmDistanceFunctionsNames.LEVENSHTEIN, seqEncodingFunc);
|
|
@@ -6,6 +6,8 @@ import {before, category, test, expect} from '@datagrok-libraries/utils/src/test
|
|
|
6
6
|
import {ALPHABET, getAlphabet, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
7
7
|
import {SeqHandler} from '@datagrok-libraries/bio/src/utils/seq-handler';
|
|
8
8
|
|
|
9
|
+
import {_package} from '../package-test';
|
|
10
|
+
|
|
9
11
|
category('detectorsBenchmark', () => {
|
|
10
12
|
let detectFunc: DG.Func;
|
|
11
13
|
|
|
@@ -139,17 +141,30 @@ async function benchmark<TData, TRes>(
|
|
|
139
141
|
): Promise<number> {
|
|
140
142
|
const data: TData = await prepare();
|
|
141
143
|
|
|
142
|
-
const
|
|
143
|
-
//
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
const
|
|
144
|
+
const tryCount = 60;
|
|
145
|
+
const maxOutCount = 3; // 95%
|
|
146
|
+
let outCount: number = 0;
|
|
147
|
+
let outResET: number = 0;
|
|
148
|
+
const resEtList: number[] = new Array<number>(tryCount);
|
|
149
|
+
let resET: number = 0;
|
|
150
|
+
for (let tryI = 0; tryI < 20; ++tryI) {
|
|
151
|
+
const t1: number = Date.now();
|
|
152
|
+
// console.profile();
|
|
153
|
+
const res: TRes = await test(data);
|
|
154
|
+
//console.profileEnd();
|
|
155
|
+
const t2: number = Date.now();
|
|
156
|
+
|
|
157
|
+
resET = resEtList[tryI] = t2 - t1;
|
|
158
|
+
if (resET > maxET) {
|
|
159
|
+
outCount++;
|
|
160
|
+
outResET = Math.max(outResET, resET);
|
|
161
|
+
}
|
|
147
162
|
|
|
148
|
-
|
|
163
|
+
check(res);
|
|
164
|
+
}
|
|
149
165
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
const errMsg = `ET ${resET} ms is more than max allowed ${maxET} ms.`;
|
|
166
|
+
if (outCount > maxOutCount) {
|
|
167
|
+
const errMsg = `ET ${outResET} ms is more than max allowed ${maxET} ms. ET: ${JSON.stringify(resEtList)}`;
|
|
153
168
|
console.error(errMsg);
|
|
154
169
|
throw new Error(errMsg);
|
|
155
170
|
} else
|
|
@@ -118,9 +118,10 @@ ATCGAATCGA`;
|
|
|
118
118
|
{scoringMatrix, alphabetIndexes, gapOpen: 1, gapExtend: 1},
|
|
119
119
|
);
|
|
120
120
|
if (DG.Test.isInBenchmark) {
|
|
121
|
-
|
|
122
|
-
const
|
|
123
|
-
|
|
121
|
+
// max length 10000 for Needleman-Wunsch
|
|
122
|
+
const seq1 = /* length 3000 */ Array(1000).fill(prot5).join('');
|
|
123
|
+
const seq2 = /* length 7000 */Array(1000).fill(prot6).join('');
|
|
124
|
+
_testDistance(seq1, seq2, df, 1.333);
|
|
124
125
|
} else
|
|
125
126
|
_testDistance(prot5, prot6, df, 1.333);
|
|
126
127
|
}, {benchmark: true});
|
|
@@ -26,8 +26,8 @@ category('monomerLibraries', () => {
|
|
|
26
26
|
|
|
27
27
|
test('default', async () => {
|
|
28
28
|
// Clear settings to test default
|
|
29
|
-
await setUserLibSettings({exclude: [], explicit: []});
|
|
30
|
-
await monomerLibHelper.
|
|
29
|
+
await setUserLibSettings({exclude: [], explicit: [], duplicateMonomerPreferences: {}});
|
|
30
|
+
await monomerLibHelper.loadLibraries(true); // test defaultLib
|
|
31
31
|
|
|
32
32
|
// Currently default monomer lib set is of all files at LIB_PATH (at least HELMCoreLibrary.json)
|
|
33
33
|
const currentMonomerLib = monomerLibHelper.getMonomerLib();
|
|
@@ -42,7 +42,7 @@ category('monomerLibraries', () => {
|
|
|
42
42
|
const currentMonomerLib = monomerLibHelper.getMonomerLib();
|
|
43
43
|
// HELMCoreLibrary.json checks
|
|
44
44
|
expect(currentMonomerLib.getPolymerTypes().length, 2);
|
|
45
|
-
expect(currentMonomerLib.getMonomerSymbolsByType('PEPTIDE').length,
|
|
45
|
+
expect(currentMonomerLib.getMonomerSymbolsByType('PEPTIDE').length, 324);
|
|
46
46
|
expect(currentMonomerLib.getMonomerSymbolsByType('RNA').length, 383);
|
|
47
47
|
});
|
|
48
48
|
|