@datagrok/bio 2.11.30 → 2.11.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -0
- package/dist/36.js +1 -1
- package/dist/36.js.map +1 -1
- package/dist/42.js +1 -1
- package/dist/42.js.map +1 -1
- package/dist/590.js +2 -0
- package/dist/590.js.map +1 -0
- package/dist/709.js +1 -2
- package/dist/709.js.map +1 -1
- package/dist/79.js.map +1 -1
- package/dist/895.js +3 -0
- package/dist/895.js.map +1 -0
- package/dist/package-test.js +8 -1
- package/dist/package-test.js.LICENSE.txt +1 -0
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +8 -1
- package/dist/package.js.LICENSE.txt +1 -0
- package/dist/package.js.map +1 -1
- package/files/{data → monomer-libraries}/HELMCoreLibrary.json +594 -594
- package/files/tests/libraries/HELMmonomerSchema.json +96 -0
- package/package.json +12 -10
- package/scripts/sequence_generator.md +48 -0
- package/scripts/sequence_generator.py +515 -256
- package/src/package-test.ts +4 -0
- package/src/package.ts +26 -24
- package/src/tests/WebLogo-layout-tests.ts +37 -0
- package/src/tests/WebLogo-positions-test.ts +5 -0
- package/src/tests/WebLogo-project-tests.ts +63 -0
- package/src/tests/activity-cliffs-tests.ts +3 -2
- package/src/tests/monomer-libraries-tests.ts +7 -4
- package/src/tests/scoring.ts +3 -2
- package/src/tests/substructure-filters-tests.ts +3 -2
- package/src/tests/to-atomic-level-tests.ts +3 -2
- package/src/utils/helm-to-molfile.ts +3 -3
- package/src/utils/monomer-lib/lib-manager.ts +116 -0
- package/src/utils/monomer-lib/library-file-manager/consts.ts +1 -0
- package/src/utils/monomer-lib/library-file-manager/custom-monomer-lib-handlers.ts +80 -0
- package/src/utils/monomer-lib/library-file-manager/event-manager.ts +58 -0
- package/src/utils/monomer-lib/library-file-manager/file-manager.ts +187 -0
- package/src/utils/monomer-lib/library-file-manager/file-validator.ts +56 -0
- package/src/utils/monomer-lib/library-file-manager/style.css +8 -0
- package/src/utils/monomer-lib/library-file-manager/ui.ts +224 -0
- package/src/utils/monomer-lib/monomer-lib.ts +114 -0
- package/src/utils/poly-tool/const.ts +28 -0
- package/src/utils/poly-tool/monomer-lib-handler.ts +115 -0
- package/src/utils/poly-tool/types.ts +6 -0
- package/src/utils/poly-tool/ui.ts +2 -2
- package/src/viewers/vd-regions-viewer.ts +5 -4
- package/src/viewers/web-logo-viewer.ts +6 -5
- package/src/widgets/bio-substructure-filter.ts +4 -1
- package/files/libraries/HELMCoreLibrary.json +0 -18218
- package/src/utils/monomer-lib.ts +0 -305
- /package/dist/{709.js.LICENSE.txt → 895.js.LICENSE.txt} +0 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"description": "HELM monomer JSON Schema draft v1",
|
|
4
|
+
"type": "object",
|
|
5
|
+
"properties": {
|
|
6
|
+
"symbol": {
|
|
7
|
+
"description": "Short name for the monomer. This will be displayed on the canvas.",
|
|
8
|
+
"type": "string"
|
|
9
|
+
},
|
|
10
|
+
"name": {
|
|
11
|
+
"description": "Long name for the monomer. Generally an IUPAC or commonly used trivial chemical name.",
|
|
12
|
+
"type": "string"
|
|
13
|
+
},
|
|
14
|
+
"molfile": {
|
|
15
|
+
"description": "Molfile with carriage returns escaped.",
|
|
16
|
+
"type": "string"
|
|
17
|
+
},
|
|
18
|
+
"author": {
|
|
19
|
+
"description": "Name of the person who entered the monomer information.",
|
|
20
|
+
"type": "string"
|
|
21
|
+
},
|
|
22
|
+
"id": {
|
|
23
|
+
"description": "Unique ID for the monomer. There is no meaning associated with this ID value.",
|
|
24
|
+
"type": "integer"
|
|
25
|
+
},
|
|
26
|
+
"rgroups": {
|
|
27
|
+
"description": "An array of the monomer R groups and required information.",
|
|
28
|
+
"type": "array",
|
|
29
|
+
"items": {
|
|
30
|
+
"type": "object",
|
|
31
|
+
"properties": {
|
|
32
|
+
"id": {
|
|
33
|
+
"description": "meaningless identifier for the r group",
|
|
34
|
+
"type": "integer"
|
|
35
|
+
},
|
|
36
|
+
"alternateId": {
|
|
37
|
+
"description": "Descriptive ID for the R group consisting of the label and R group plus number",
|
|
38
|
+
"type": "string"
|
|
39
|
+
},
|
|
40
|
+
"label": {
|
|
41
|
+
"description": "R followed by an integer denoting the R group number",
|
|
42
|
+
"type": "string"
|
|
43
|
+
},
|
|
44
|
+
"capGroupName": {
|
|
45
|
+
"description": "A list of the atoms in the leaving group. This is descriptive, and exists to show the user the leaving group atoms",
|
|
46
|
+
"type": "string"
|
|
47
|
+
},
|
|
48
|
+
"capGroupSMILES": {
|
|
49
|
+
"description": "SMILES of the R group, uses atom mapping notation",
|
|
50
|
+
"type": "string"
|
|
51
|
+
}
|
|
52
|
+
},
|
|
53
|
+
"required": [
|
|
54
|
+
"alternateId",
|
|
55
|
+
"label",
|
|
56
|
+
"capGroupName",
|
|
57
|
+
"capGroupSMILES"
|
|
58
|
+
]
|
|
59
|
+
}
|
|
60
|
+
},
|
|
61
|
+
"smiles": {
|
|
62
|
+
"description": "Canonical SMILES of the monomer including connection points.",
|
|
63
|
+
"type": "string"
|
|
64
|
+
},
|
|
65
|
+
"polymerType": {
|
|
66
|
+
"description": "Type must be one of a fixed list of agreed polymer types.",
|
|
67
|
+
"type": "string",
|
|
68
|
+
"enum": ["RNA", "PEPTIDE", "CHEM"]
|
|
69
|
+
},
|
|
70
|
+
"naturalAnalog": {
|
|
71
|
+
"description": "Natural analogue of the monomer where available.",
|
|
72
|
+
"type": "string"
|
|
73
|
+
},
|
|
74
|
+
"monomerType": {
|
|
75
|
+
"description": "Type of monomer from a fixed list of agreed monomer types.",
|
|
76
|
+
"type": "string",
|
|
77
|
+
"enum": ["Backbone", "Branch", "Terminal", "Undefined"]
|
|
78
|
+
},
|
|
79
|
+
"createDate": {
|
|
80
|
+
"description": "Date created.",
|
|
81
|
+
"type": ["string", "null"]
|
|
82
|
+
}
|
|
83
|
+
},
|
|
84
|
+
"required": [
|
|
85
|
+
"symbol",
|
|
86
|
+
"name",
|
|
87
|
+
"molfile",
|
|
88
|
+
"author",
|
|
89
|
+
"id",
|
|
90
|
+
"rgroups",
|
|
91
|
+
"smiles",
|
|
92
|
+
"polymerType",
|
|
93
|
+
"monomerType",
|
|
94
|
+
"createDate"
|
|
95
|
+
]
|
|
96
|
+
}
|
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.11.
|
|
8
|
+
"version": "2.11.33",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -34,11 +34,13 @@
|
|
|
34
34
|
],
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@biowasm/aioli": "^3.1.0",
|
|
37
|
-
"@datagrok-libraries/bio": "^5.39.
|
|
37
|
+
"@datagrok-libraries/bio": "^5.39.25",
|
|
38
38
|
"@datagrok-libraries/chem-meta": "^1.2.1",
|
|
39
|
-
"@datagrok-libraries/ml": "^6.4.
|
|
39
|
+
"@datagrok-libraries/ml": "^6.4.10",
|
|
40
40
|
"@datagrok-libraries/tutorials": "^1.3.11",
|
|
41
|
-
"
|
|
41
|
+
"ajv": "^8.12.0",
|
|
42
|
+
"ajv-errors": "^3.0.0",
|
|
43
|
+
"@datagrok-libraries/utils": "^4.1.44",
|
|
42
44
|
"@datagrok-libraries/math": "^1.0.7",
|
|
43
45
|
"cash-dom": "^8.0.0",
|
|
44
46
|
"css-loader": "^6.7.3",
|
|
@@ -48,10 +50,13 @@
|
|
|
48
50
|
"openchemlib": "6.0.1",
|
|
49
51
|
"rxjs": "^6.5.5",
|
|
50
52
|
"style-loader": "^3.3.1",
|
|
51
|
-
"
|
|
52
|
-
"
|
|
53
|
+
"umap-js": "^1.3.3",
|
|
54
|
+
"wu": "latest"
|
|
53
55
|
},
|
|
54
56
|
"devDependencies": {
|
|
57
|
+
"@datagrok/chem": "^1.8.11",
|
|
58
|
+
"@datagrok/dendrogram": "^1.2.22",
|
|
59
|
+
"@datagrok/helm": "^2.1.27",
|
|
55
60
|
"@types/node": "^17.0.24",
|
|
56
61
|
"@types/wu": "latest",
|
|
57
62
|
"@typescript-eslint/eslint-plugin": "latest",
|
|
@@ -64,10 +69,7 @@
|
|
|
64
69
|
"typescript": "^4.8.4",
|
|
65
70
|
"webpack": "^5.76.3",
|
|
66
71
|
"webpack-bundle-analyzer": "latest",
|
|
67
|
-
"webpack-cli": "^4.9.1"
|
|
68
|
-
"@datagrok/chem": "^1.8.11",
|
|
69
|
-
"@datagrok/helm": "^2.1.27",
|
|
70
|
-
"@datagrok/dendrogram": "^1.2.22"
|
|
72
|
+
"webpack-cli": "^4.9.1"
|
|
71
73
|
},
|
|
72
74
|
"scripts": {
|
|
73
75
|
"link-api": "npm link datagrok-api",
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# Sequence generator
|
|
2
|
+
|
|
3
|
+
The utility generates clusters of macromolecule sequences to test SAR functionality.
|
|
4
|
+
Each cluster contains a randomly generated sequence motif.
|
|
5
|
+
Each sequence has activity - a Gauss-distributed random value.
|
|
6
|
+
The utility can simulate activity cliffs - random changes in the conservative motif letters,
|
|
7
|
+
leading to the significant change in the activity.
|
|
8
|
+
Utility can simulate multiple experimental assays measuring activity, with different scales and noise levels.
|
|
9
|
+
|
|
10
|
+
### Run options
|
|
11
|
+
The utility can work in two modes:
|
|
12
|
+
* Standalone command-line tool. Run the utility with `--help` key to get detailed help
|
|
13
|
+
* Datagrok script. In this mode, Datagrok automatically generates utility UI.
|
|
14
|
+
|
|
15
|
+
## Utility algorithm
|
|
16
|
+
|
|
17
|
+
### Motif generation
|
|
18
|
+
* Specify the sequence alphabet: DNA/RNA/Peptides/HELM file
|
|
19
|
+
* Generate motif template
|
|
20
|
+
* Iterate through all positions in the motif
|
|
21
|
+
* With the probability `prob_any` it will be a non-conservative letter.
|
|
22
|
+
In this case, place the `?` code in this position.
|
|
23
|
+
The non-conservative letters can't reside on motif ends.
|
|
24
|
+
* Otherwise, generate a conservative letter
|
|
25
|
+
* Generate a random integer in range `[1-max_variants_cluster]` - the number of possible letters in this position.
|
|
26
|
+
* Randomly choose corresponding of letters from the alphabet.
|
|
27
|
+
* Generate cluster sequences by template
|
|
28
|
+
* Iterate through all motif template positions
|
|
29
|
+
* Take the letter from the list of allowed letters for the position
|
|
30
|
+
* With the probability `cliff_probability` generate sequence cliff:
|
|
31
|
+
* Choose a conservative letters
|
|
32
|
+
* Mutate it to a random letter not present in the allowed template letters for this position.
|
|
33
|
+
|
|
34
|
+
## Generation activities
|
|
35
|
+
|
|
36
|
+
The activity generation simulates experimental data from some real assay with noise and bias.
|
|
37
|
+
* Generate random mean value for Gauss distribution for "ideal" activity generation.
|
|
38
|
+
The `activity_range` parameter defines the range of ideal activities
|
|
39
|
+
* Generate "ideal" activities from gauss distribution with sigma=1
|
|
40
|
+
* Make activity cliffs for the of sequences:
|
|
41
|
+
* Calculate difference between generated activities
|
|
42
|
+
* Calculate `cliff_size` - random Gauss-distributed value
|
|
43
|
+
* "Push apart" these values to ensure that the difference between them is
|
|
44
|
+
* Calculate "assay activities" for each assay
|
|
45
|
+
* Generate "noise" - Gauss-distributed value with a randomly chosen center
|
|
46
|
+
* Calculate `real_activity = ideal_activity + noise*noise_level`
|
|
47
|
+
The `noise_level` is the ration of signal/noise for the assay
|
|
48
|
+
* Rescale the activity to fit in the requested assay scale
|