@datagrok/bio 2.11.30 → 2.11.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/CHANGELOG.md +13 -0
  2. package/dist/36.js +1 -1
  3. package/dist/36.js.map +1 -1
  4. package/dist/42.js +1 -1
  5. package/dist/42.js.map +1 -1
  6. package/dist/590.js +2 -0
  7. package/dist/590.js.map +1 -0
  8. package/dist/709.js +1 -2
  9. package/dist/709.js.map +1 -1
  10. package/dist/79.js.map +1 -1
  11. package/dist/895.js +3 -0
  12. package/dist/895.js.map +1 -0
  13. package/dist/package-test.js +8 -1
  14. package/dist/package-test.js.LICENSE.txt +1 -0
  15. package/dist/package-test.js.map +1 -1
  16. package/dist/package.js +8 -1
  17. package/dist/package.js.LICENSE.txt +1 -0
  18. package/dist/package.js.map +1 -1
  19. package/files/{data → monomer-libraries}/HELMCoreLibrary.json +594 -594
  20. package/files/tests/libraries/HELMmonomerSchema.json +96 -0
  21. package/package.json +12 -10
  22. package/scripts/sequence_generator.md +48 -0
  23. package/scripts/sequence_generator.py +515 -256
  24. package/src/package-test.ts +4 -0
  25. package/src/package.ts +26 -24
  26. package/src/tests/WebLogo-layout-tests.ts +37 -0
  27. package/src/tests/WebLogo-positions-test.ts +5 -0
  28. package/src/tests/WebLogo-project-tests.ts +63 -0
  29. package/src/tests/activity-cliffs-tests.ts +3 -2
  30. package/src/tests/monomer-libraries-tests.ts +7 -4
  31. package/src/tests/scoring.ts +3 -2
  32. package/src/tests/substructure-filters-tests.ts +3 -2
  33. package/src/tests/to-atomic-level-tests.ts +3 -2
  34. package/src/utils/helm-to-molfile.ts +3 -3
  35. package/src/utils/monomer-lib/lib-manager.ts +116 -0
  36. package/src/utils/monomer-lib/library-file-manager/consts.ts +1 -0
  37. package/src/utils/monomer-lib/library-file-manager/custom-monomer-lib-handlers.ts +80 -0
  38. package/src/utils/monomer-lib/library-file-manager/event-manager.ts +58 -0
  39. package/src/utils/monomer-lib/library-file-manager/file-manager.ts +187 -0
  40. package/src/utils/monomer-lib/library-file-manager/file-validator.ts +56 -0
  41. package/src/utils/monomer-lib/library-file-manager/style.css +8 -0
  42. package/src/utils/monomer-lib/library-file-manager/ui.ts +224 -0
  43. package/src/utils/monomer-lib/monomer-lib.ts +114 -0
  44. package/src/utils/poly-tool/const.ts +28 -0
  45. package/src/utils/poly-tool/monomer-lib-handler.ts +115 -0
  46. package/src/utils/poly-tool/types.ts +6 -0
  47. package/src/utils/poly-tool/ui.ts +2 -2
  48. package/src/viewers/vd-regions-viewer.ts +5 -4
  49. package/src/viewers/web-logo-viewer.ts +6 -5
  50. package/src/widgets/bio-substructure-filter.ts +4 -1
  51. package/files/libraries/HELMCoreLibrary.json +0 -18218
  52. package/src/utils/monomer-lib.ts +0 -305
  53. /package/dist/{709.js.LICENSE.txt → 895.js.LICENSE.txt} +0 -0
@@ -0,0 +1,96 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "description": "HELM monomer JSON Schema draft v1",
4
+ "type": "object",
5
+ "properties": {
6
+ "symbol": {
7
+ "description": "Short name for the monomer. This will be displayed on the canvas.",
8
+ "type": "string"
9
+ },
10
+ "name": {
11
+ "description": "Long name for the monomer. Generally an IUPAC or commonly used trivial chemical name.",
12
+ "type": "string"
13
+ },
14
+ "molfile": {
15
+ "description": "Molfile with carriage returns escaped.",
16
+ "type": "string"
17
+ },
18
+ "author": {
19
+ "description": "Name of the person who entered the monomer information.",
20
+ "type": "string"
21
+ },
22
+ "id": {
23
+ "description": "Unique ID for the monomer. There is no meaning associated with this ID value.",
24
+ "type": "integer"
25
+ },
26
+ "rgroups": {
27
+ "description": "An array of the monomer R groups and required information.",
28
+ "type": "array",
29
+ "items": {
30
+ "type": "object",
31
+ "properties": {
32
+ "id": {
33
+ "description": "meaningless identifier for the r group",
34
+ "type": "integer"
35
+ },
36
+ "alternateId": {
37
+ "description": "Descriptive ID for the R group consisting of the label and R group plus number",
38
+ "type": "string"
39
+ },
40
+ "label": {
41
+ "description": "R followed by an integer denoting the R group number",
42
+ "type": "string"
43
+ },
44
+ "capGroupName": {
45
+ "description": "A list of the atoms in the leaving group. This is descriptive, and exists to show the user the leaving group atoms",
46
+ "type": "string"
47
+ },
48
+ "capGroupSMILES": {
49
+ "description": "SMILES of the R group, uses atom mapping notation",
50
+ "type": "string"
51
+ }
52
+ },
53
+ "required": [
54
+ "alternateId",
55
+ "label",
56
+ "capGroupName",
57
+ "capGroupSMILES"
58
+ ]
59
+ }
60
+ },
61
+ "smiles": {
62
+ "description": "Canonical SMILES of the monomer including connection points.",
63
+ "type": "string"
64
+ },
65
+ "polymerType": {
66
+ "description": "Type must be one of a fixed list of agreed polymer types.",
67
+ "type": "string",
68
+ "enum": ["RNA", "PEPTIDE", "CHEM"]
69
+ },
70
+ "naturalAnalog": {
71
+ "description": "Natural analogue of the monomer where available.",
72
+ "type": "string"
73
+ },
74
+ "monomerType": {
75
+ "description": "Type of monomer from a fixed list of agreed monomer types.",
76
+ "type": "string",
77
+ "enum": ["Backbone", "Branch", "Terminal", "Undefined"]
78
+ },
79
+ "createDate": {
80
+ "description": "Date created.",
81
+ "type": ["string", "null"]
82
+ }
83
+ },
84
+ "required": [
85
+ "symbol",
86
+ "name",
87
+ "molfile",
88
+ "author",
89
+ "id",
90
+ "rgroups",
91
+ "smiles",
92
+ "polymerType",
93
+ "monomerType",
94
+ "createDate"
95
+ ]
96
+ }
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.11.30",
8
+ "version": "2.11.33",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -34,11 +34,13 @@
34
34
  ],
35
35
  "dependencies": {
36
36
  "@biowasm/aioli": "^3.1.0",
37
- "@datagrok-libraries/bio": "^5.39.24",
37
+ "@datagrok-libraries/bio": "^5.39.25",
38
38
  "@datagrok-libraries/chem-meta": "^1.2.1",
39
- "@datagrok-libraries/ml": "^6.4.7",
39
+ "@datagrok-libraries/ml": "^6.4.10",
40
40
  "@datagrok-libraries/tutorials": "^1.3.11",
41
- "@datagrok-libraries/utils": "^4.1.43",
41
+ "ajv": "^8.12.0",
42
+ "ajv-errors": "^3.0.0",
43
+ "@datagrok-libraries/utils": "^4.1.44",
42
44
  "@datagrok-libraries/math": "^1.0.7",
43
45
  "cash-dom": "^8.0.0",
44
46
  "css-loader": "^6.7.3",
@@ -48,10 +50,13 @@
48
50
  "openchemlib": "6.0.1",
49
51
  "rxjs": "^6.5.5",
50
52
  "style-loader": "^3.3.1",
51
- "wu": "latest",
52
- "umap-js": "^1.3.3"
53
+ "umap-js": "^1.3.3",
54
+ "wu": "latest"
53
55
  },
54
56
  "devDependencies": {
57
+ "@datagrok/chem": "^1.8.11",
58
+ "@datagrok/dendrogram": "^1.2.22",
59
+ "@datagrok/helm": "^2.1.27",
55
60
  "@types/node": "^17.0.24",
56
61
  "@types/wu": "latest",
57
62
  "@typescript-eslint/eslint-plugin": "latest",
@@ -64,10 +69,7 @@
64
69
  "typescript": "^4.8.4",
65
70
  "webpack": "^5.76.3",
66
71
  "webpack-bundle-analyzer": "latest",
67
- "webpack-cli": "^4.9.1",
68
- "@datagrok/chem": "^1.8.11",
69
- "@datagrok/helm": "^2.1.27",
70
- "@datagrok/dendrogram": "^1.2.22"
72
+ "webpack-cli": "^4.9.1"
71
73
  },
72
74
  "scripts": {
73
75
  "link-api": "npm link datagrok-api",
@@ -0,0 +1,48 @@
1
+ # Sequence generator
2
+
3
+ The utility generates clusters of macromolecule sequences to test SAR functionality.
4
+ Each cluster contains a randomly generated sequence motif.
5
+ Each sequence has activity - a Gauss-distributed random value.
6
+ The utility can simulate activity cliffs - random changes in the conservative motif letters,
7
+ leading to the significant change in the activity.
8
+ Utility can simulate multiple experimental assays measuring activity, with different scales and noise levels.
9
+
10
+ ### Run options
11
+ The utility can work in two modes:
12
+ * Standalone command-line tool. Run the utility with `--help` key to get detailed help
13
+ * Datagrok script. In this mode, Datagrok automatically generates utility UI.
14
+
15
+ ## Utility algorithm
16
+
17
+ ### Motif generation
18
+ * Specify the sequence alphabet: DNA/RNA/Peptides/HELM file
19
+ * Generate motif template
20
+ * Iterate through all positions in the motif
21
+ * With the probability `prob_any` it will be a non-conservative letter.
22
+ In this case, place the `?` code in this position.
23
+ The non-conservative letters can't reside on motif ends.
24
+ * Otherwise, generate a conservative letter
25
+ * Generate a random integer in range `[1-max_variants_cluster]` - the number of possible letters in this position.
26
+ * Randomly choose corresponding of letters from the alphabet.
27
+ * Generate cluster sequences by template
28
+ * Iterate through all motif template positions
29
+ * Take the letter from the list of allowed letters for the position
30
+ * With the probability `cliff_probability` generate sequence cliff:
31
+ * Choose a conservative letters
32
+ * Mutate it to a random letter not present in the allowed template letters for this position.
33
+
34
+ ## Generation activities
35
+
36
+ The activity generation simulates experimental data from some real assay with noise and bias.
37
+ * Generate random mean value for Gauss distribution for "ideal" activity generation.
38
+ The `activity_range` parameter defines the range of ideal activities
39
+ * Generate "ideal" activities from gauss distribution with sigma=1
40
+ * Make activity cliffs for the of sequences:
41
+ * Calculate difference between generated activities
42
+ * Calculate `cliff_size` - random Gauss-distributed value
43
+ * "Push apart" these values to ensure that the difference between them is
44
+ * Calculate "assay activities" for each assay
45
+ * Generate "noise" - Gauss-distributed value with a randomly chosen center
46
+ * Calculate `real_activity = ideal_activity + noise*noise_level`
47
+ The `noise_level` is the ration of signal/noise for the assay
48
+ * Rescale the activity to fit in the requested assay scale