myopic-mces 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,35 @@
1
+ name: Publish Python distributions to PyPI
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ paths:
8
+ - 'pyproject.toml'
9
+
10
+ jobs:
11
+ build-and-publish:
12
+ name: Build and publish Python distributions to PyPI
13
+ runs-on: ubuntu-latest
14
+ permissions:
15
+ id-token: write # IMPORTANT: this permission is mandatory for trusted publishing
16
+ steps:
17
+ - name: Check out repository code
18
+ uses: actions/checkout@v3
19
+
20
+ - name: Setup Python
21
+ uses: actions/setup-python@v4
22
+ with:
23
+ python-version: "3.10"
24
+ cache: "pip"
25
+
26
+ - name: Install build/upload packages
27
+ run: |
28
+ python -m pip install --upgrade build twine
29
+
30
+ - name: Build
31
+ run: |
32
+ python -m build
33
+
34
+ - name: Publish package distributions to PyPI
35
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,129 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # pipenv
88
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
90
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
91
+ # install all needed dependencies.
92
+ #Pipfile.lock
93
+
94
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95
+ __pypackages__/
96
+
97
+ # Celery stuff
98
+ celerybeat-schedule
99
+ celerybeat.pid
100
+
101
+ # SageMath parsed files
102
+ *.sage.py
103
+
104
+ # Environments
105
+ .env
106
+ .venv
107
+ env/
108
+ venv/
109
+ ENV/
110
+ env.bak/
111
+ venv.bak/
112
+
113
+ # Spyder project settings
114
+ .spyderproject
115
+ .spyproject
116
+
117
+ # Rope project settings
118
+ .ropeproject
119
+
120
+ # mkdocs documentation
121
+ /site
122
+
123
+ # mypy
124
+ .mypy_cache/
125
+ .dmypy.json
126
+ dmypy.json
127
+
128
+ # Pyre type checker
129
+ .pyre/
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2020 AlBi-HHU
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,135 @@
1
+ Metadata-Version: 2.1
2
+ Name: myopic_mces
3
+ Version: 1.0.0
4
+ Summary: A package for computation of the myopic MCES distance
5
+ Project-URL: Homepage, https://github.com/AlBi-HHU/myopic-mces
6
+ Project-URL: Bug Tracker, https://github.com/AlBi-HHU/myopic-mces/issues
7
+ Project-URL: Publication, https://doi.org/10.1101/2023.03.27.534311
8
+ License: MIT License
9
+
10
+ Copyright (c) 2020 AlBi-HHU
11
+
12
+ Permission is hereby granted, free of charge, to any person obtaining a copy
13
+ of this software and associated documentation files (the "Software"), to deal
14
+ in the Software without restriction, including without limitation the rights
15
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16
+ copies of the Software, and to permit persons to whom the Software is
17
+ furnished to do so, subject to the following conditions:
18
+
19
+ The above copyright notice and this permission notice shall be included in all
20
+ copies or substantial portions of the Software.
21
+
22
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28
+ SOFTWARE.
29
+ License-File: LICENSE
30
+ Classifier: License :: OSI Approved :: MIT License
31
+ Classifier: Operating System :: OS Independent
32
+ Classifier: Programming Language :: Python :: 3
33
+ Requires-Python: >=3.6
34
+ Requires-Dist: joblib>=1.2.0
35
+ Requires-Dist: networkx>=3.0
36
+ Requires-Dist: pulp>=2.7.0
37
+ Requires-Dist: rdkit>=2022.09.5
38
+ Requires-Dist: scipy>=1.10.1
39
+ Description-Content-Type: text/markdown
40
+
41
+ # Computation of myopic MCES distances
42
+
43
+ Implementation of myopic MCES distance computation, see the preprint at [doi:10.1101/2023.03.27.534311](https://doi.org/10.1101/2023.03.27.534311) for details.
44
+
45
+ ## Usage
46
+
47
+ Input and Output file are in csv format. Every line in the input-file is one comparison:
48
+
49
+ input-file: `index,SMILES1,SMILES2`
50
+
51
+ output-file: `index,time taken,myopic MCES distance,status (1 if exact distance, 2/4 if lower bound)`
52
+
53
+ Run from the command line:
54
+ ```bash
55
+ python myopic_mces.py input-file output-file
56
+ ```
57
+
58
+ See [the PuLP documentation](https://coin-or.github.io/pulp/guides/how_to_configure_solvers.html) on how to configure ILP solvers. By default, the PuLP-provided COIN-OR solver will be used.
59
+
60
+ ## Optional Arguments
61
+
62
+ General options
63
+ ```
64
+ --threshold int Threshold for the comparison.
65
+ Exact distance is only calculated if the distance is lower than the threshold.
66
+ If set to -1 the exact disatnce is always calculated.
67
+
68
+ --solver string Solver used for solving the ILP. Examples:'CPLEX_CMD', 'GUROBI_CMD', 'GLPK_CMD'
69
+
70
+ --num_jobs int Number of jobs; instances to run in parallel.
71
+ By default this is set to the number of (logical) CPU cores.
72
+ ```
73
+
74
+ Options for the ILP solver
75
+ ```
76
+ --solver_onethreaded Limit ILP solver to one thread, likely resulting in faster
77
+ performance with parallel computations (not available for all solvers).
78
+
79
+ --solver_no_msg Prevent solver from logging (not available for all solvers)
80
+
81
+ ```
82
+
83
+ Experimental options for myopic MCES distance computation
84
+ ```
85
+ --no_ilp_threshold If set, do not add threshold as constraint to ILP,
86
+ resulting in longer runtimes and potential violations of the triangle equation.
87
+
88
+ --choose_bound_dynamically If set, a potentially weaker but faster lower bound will be computed and used
89
+ when this bound is already greater than the threshold. By default (without
90
+ this option), always the strongest lower bound will be computed and used.
91
+ ```
92
+
93
+
94
+ ## Dependencies and installation
95
+
96
+ Python packages required are:
97
+ ```
98
+ rdkit(==2022.09.5)
99
+ networkx(==3.0)
100
+ pulp(==2.7.0)
101
+ scipy(==1.10.1)
102
+ joblib(==1.2.0)
103
+ ```
104
+ Version numbers in braces correspond to an exemplary tested configuration (under Python version 3.11.0).
105
+ The program can be run on any standard operating system, tested on Windows 10 64 bit and Arch-Linux@linux-6.2.7 64 bit.
106
+
107
+ The recommended method of installation is via [conda](https://docs.conda.io/en/latest/miniconda.html) or [mamba](https://github.com/mamba-org/mamba):
108
+ Download this repository, navigate to the download location and execute the following commands (replacing `conda` with `mamba` when using mamba):
109
+ ```bash
110
+ conda env create -f conda_env.yml
111
+ # to activate the created enironment:
112
+ conda activate myopic_mces
113
+ ```
114
+
115
+ A PyPI-package is also available, install via:
116
+ ```bash
117
+ pip install myopic_mces
118
+ ```
119
+
120
+ A typical installation time should not exceed 5 minutes, mostly depending on the internet connection speed to download all required packages.
121
+
122
+ ## Example data
123
+
124
+ The example provided in [example/example_data.csv](example/example_data.csv) can be run with:
125
+
126
+ ```bash
127
+ python myopic_mces.py example/example_data.csv example/example_data_out.csv
128
+ ```
129
+
130
+ Alternatively, if the package was installed via pip:
131
+ ```bash
132
+ myopic_mces example/example_data.csv example/example_data_out.csv
133
+ ```
134
+
135
+ Typical runtime is about 10s on Windows 10 with all default options, running on 4 cores with 8GB RAM. Exemplary output is provided in [example/example_data_out.csv](example/example_data_out.csv).
@@ -0,0 +1,95 @@
1
+ # Computation of myopic MCES distances
2
+
3
+ Implementation of myopic MCES distance computation, see the preprint at [doi:10.1101/2023.03.27.534311](https://doi.org/10.1101/2023.03.27.534311) for details.
4
+
5
+ ## Usage
6
+
7
+ Input and Output file are in csv format. Every line in the input-file is one comparison:
8
+
9
+ input-file: `index,SMILES1,SMILES2`
10
+
11
+ output-file: `index,time taken,myopic MCES distance,status (1 if exact distance, 2/4 if lower bound)`
12
+
13
+ Run from the command line:
14
+ ```bash
15
+ python myopic_mces.py input-file output-file
16
+ ```
17
+
18
+ See [the PuLP documentation](https://coin-or.github.io/pulp/guides/how_to_configure_solvers.html) on how to configure ILP solvers. By default, the PuLP-provided COIN-OR solver will be used.
19
+
20
+ ## Optional Arguments
21
+
22
+ General options
23
+ ```
24
+ --threshold int Threshold for the comparison.
25
+ Exact distance is only calculated if the distance is lower than the threshold.
26
+ If set to -1 the exact disatnce is always calculated.
27
+
28
+ --solver string Solver used for solving the ILP. Examples:'CPLEX_CMD', 'GUROBI_CMD', 'GLPK_CMD'
29
+
30
+ --num_jobs int Number of jobs; instances to run in parallel.
31
+ By default this is set to the number of (logical) CPU cores.
32
+ ```
33
+
34
+ Options for the ILP solver
35
+ ```
36
+ --solver_onethreaded Limit ILP solver to one thread, likely resulting in faster
37
+ performance with parallel computations (not available for all solvers).
38
+
39
+ --solver_no_msg Prevent solver from logging (not available for all solvers)
40
+
41
+ ```
42
+
43
+ Experimental options for myopic MCES distance computation
44
+ ```
45
+ --no_ilp_threshold If set, do not add threshold as constraint to ILP,
46
+ resulting in longer runtimes and potential violations of the triangle equation.
47
+
48
+ --choose_bound_dynamically If set, a potentially weaker but faster lower bound will be computed and used
49
+ when this bound is already greater than the threshold. By default (without
50
+ this option), always the strongest lower bound will be computed and used.
51
+ ```
52
+
53
+
54
+ ## Dependencies and installation
55
+
56
+ Python packages required are:
57
+ ```
58
+ rdkit(==2022.09.5)
59
+ networkx(==3.0)
60
+ pulp(==2.7.0)
61
+ scipy(==1.10.1)
62
+ joblib(==1.2.0)
63
+ ```
64
+ Version numbers in braces correspond to an exemplary tested configuration (under Python version 3.11.0).
65
+ The program can be run on any standard operating system, tested on Windows 10 64 bit and Arch-Linux@linux-6.2.7 64 bit.
66
+
67
+ The recommended method of installation is via [conda](https://docs.conda.io/en/latest/miniconda.html) or [mamba](https://github.com/mamba-org/mamba):
68
+ Download this repository, navigate to the download location and execute the following commands (replacing `conda` with `mamba` when using mamba):
69
+ ```bash
70
+ conda env create -f conda_env.yml
71
+ # to activate the created enironment:
72
+ conda activate myopic_mces
73
+ ```
74
+
75
+ A PyPI-package is also available, install via:
76
+ ```bash
77
+ pip install myopic_mces
78
+ ```
79
+
80
+ A typical installation time should not exceed 5 minutes, mostly depending on the internet connection speed to download all required packages.
81
+
82
+ ## Example data
83
+
84
+ The example provided in [example/example_data.csv](example/example_data.csv) can be run with:
85
+
86
+ ```bash
87
+ python myopic_mces.py example/example_data.csv example/example_data_out.csv
88
+ ```
89
+
90
+ Alternatively, if the package was installed via pip:
91
+ ```bash
92
+ myopic_mces example/example_data.csv example/example_data_out.csv
93
+ ```
94
+
95
+ Typical runtime is about 10s on Windows 10 with all default options, running on 4 cores with 8GB RAM. Exemplary output is provided in [example/example_data_out.csv](example/example_data_out.csv).
@@ -0,0 +1,11 @@
1
+ name: myopic_mces
2
+ channels:
3
+ - conda-forge
4
+ dependencies:
5
+ - rdkit
6
+ - networkx
7
+ - pulp
8
+ - scipy
9
+ - pip
10
+ - pip:
11
+ - joblib
@@ -0,0 +1,101 @@
1
+ 0,CCSC1=C(C=C(C(=C1)OC)CCN)OC,CS/C=C/C(=O)OC[C@H]1CCN2[C@@H]1CCC2
2
+ 1,CC(CCCN1C=C(C2=CC=CC=C21)C(=O)C3=CC=CC4=CC=CC=C43)O,COC1=C(C=C2C(=C1)CCN=C2CC3=CC=CC=C3)OCC4=CC=CC=C4
3
+ 2,CC1=C(C(=CC=C1)O)N,CC1=C(C=CC(=C1)O)N
4
+ 3,CCC(CCN1C=C(C2=CC=CC=C21)C(=O)C3=CC=CC4=CC=CC=C43)O,COC1=C(C=C2C(=C1)CCN=C2CC3=CC=CC=C3)OCC4=CC=CC=C4
5
+ 4,CC(C)C(C(=O)N)NC(=O)C1=NN(C2=CC=CC=C21)CC3CCCCC3,CC(C)CC1COCN2C(=CN=N2)CCC(C(=O)N1)CC3=CC=CC=C3
6
+ 5,CC(CCCN1C=C(C2=CC=CC=C21)C(=O)C3C(C3(C)C)(C)C)O,CC1CC(=CCC12COC(OC2)C3=CN=CC=C3)CCC=C(C)C
7
+ 6,C(C(C(=O)NCC(=O)NC(CC(=O)O)C(=O)O)N)C(=O)O,C(C1C(C(C(C(O1)OC2=C(NC(=O)NC2=O)N)O)O)O)O
8
+ 7,CC(CC1=CC2=C(C=C1)OC=C2)NC,CN(C)CC1CC2=CC=CC=C2C1=O
9
+ 8,C1=CC=C2C(=C1)C=CC=C2C(=O)C3=CN(C4=CC=CC=C43)CCCCCO,COC1=C(C=C2C(=C1)CCN=C2CC3=CC=CC=C3)OCC4=CC=CC=C4
10
+ 9,C1CN(CCC1N)C(=O)OCC2=CC=CC=C2,C1=CC(=CC=C1C=CC(=O)NCCCCN)O
11
+ 10,CC1=CC=CC=C1C(C2=CC=CC=C2)O,C1=CC=C(C=C1)COCC2=CC=CC=C2
12
+ 11,CN1C2=C(C=C(C=C2)Cl)C(=NC(C1=O)OC3C(C(C(C(O3)C(=O)O)O)O)O)C4=CC=CC=C4,CN(C)C1C2C(C3C(=C)C4=C(C=CC(=C4C(=C3C(=O)C2(C(=C(C1=O)C(=O)N)O)O)O)O)Cl)O
13
+ 12,CCCCCN1C=C(C2=CC=CC=C21)C(=O)OC3=CC=CC4=C3N=CC=C4,CN1C2=C3C(=CC1=O)C4=CC=CC=C4C(=O)C3=C(C=C2)NC5CCCCC5
14
+ 13,CC1=C(C=C[N+](=C1CS(=O)(=O)C2=NC3=CC=CC=C3N2)[O-])OCCCOC,CSCCC(C(=O)O)NC(=O)COC1=CC2=C(C=C1)N=C3CCCN3C2=O
15
+ 14,CC(C)C(C(=O)N)NC(=O)C1=NN(C2=CC=CC=C21)CC3=CC=C(C=C3)F,COC1=NC=CC(=N1)C2=C(N=CN2C3CCC(CC3)O)C4=CC=C(C=C4)F
16
+ 15,C1C(O1)CCCN2C=C(C3=CC=CC=C32)C(=O)C4=CC=CC5=CC=CC=C54,CC1=C(C(=C2C(=N1)C3=CC=CC=C3C2=O)C4=CC=C(C=C4)C(C)C)C(=O)C
17
+ 16,CC(C(=O)O)NC(=O)C(CC1=CC=C(C=C1)O)N,CC(C(=O)NC(CC1=CC=C(C=C1)O)C(=O)O)N
18
+ 17,C1=CC=C2C(=C1)C=CC=C2C(=O)C3=CN(C4=CC=CC=C43)CCCCC(=O)O,CCOC(=O)CC1=CC(=NC2=C1C3=CC=CC=C3C=C2)C4=CC=C(C=C4)OC
19
+ 18,CC(=NO)CC1=CC2=C(C=C1)OCO2,COC1=C(C=C2CCNC(=O)C2=C1)O
20
+ 19,CCC=CCC=CCC=CCC=CCC=CCC=CCCC(=O)NCC(=O)O,CC=C(C)C1C(C2C(CC(CC2C=C1C)C)C)C(=O)C3=CC(NC3=O)(C)OC
21
+ 20,CCCCCN1C=C(C2=CC=CC=C21)C(=O)NC3=CC=CC4=CC=CC=C43,C1CC(CN(C1)CC2=CC=CC=N2)C(=O)C3=C4C=CC=C5C4=C(CC5)C=C3
22
+ 21,CC1=CC=C(C2=CC=CC=C12)C(=O)C3=CN(C4=CC=CC=C43)CCCCC(=O)O,CC1=CC=C(O1)C(C2=CC=CC=C2C(=O)NCC3=CC=CC=C3)C4=CC=C(O4)C
23
+ 22,CNCCCC(C1=CN=CC=C1)O,C1CC1C(C2CC2)NC3=NCCO3
24
+ 23,CC(=C)C1=CC2=CC(=C(C=C2O1)O)C(=O)C,CC1=C(C(=C2C(=C1)C=CC=C2O)O)C(=O)C
25
+ 24,CC(C)N1C(=O)C2=CC=CC=C2N(S1(=O)=O)C,CC(=CC(=O)NS(=O)(=O)C1=CC=C(C=C1)N)C
26
+ 25,CC1=CC(=C(C=C1OC)CCNCC2=CC=CC=C2OC)OC,CN(CCC1=CC=C(C=C1)OC)CC2=CC(=C(C=C2)OC)OC
27
+ 26,C1C(C(C(C1=O)CC=CCCCC(=O)NC(CO)CO)C=CC(CCC2=CC=CC=C2)O)O,CC1C(CC=CCCC=CC(=O)OC1C(=CC(C)C(=O)CCCC2CC(=O)NC(=O)C2)C)O
28
+ 27,CCCCCC=CCC=CCC=CCC=CCCCC(=O)NC1=C(C=C(C=C1)O)C,CC1=CC=C(C=C1)C2(CCOC(C2)C(C)C)CCNCC3=CC=C(C=C3)OC(C)C
29
+ 28,CS(=O)(=O)N(CC1=CC=CO1)CC(CN2C3=CC=CC=C3C4=CC=CC=C42)O,COC1=C(C=C(C=C1)CCNC(=O)C2CSC3N2C(=O)C4=CC=CC=C34)OC
30
+ 29,CCCCCC(C)OC(=O)COC1=C2C(=C(C=C1)Cl)C=CC=N2,C[N+]1(CCC23C=CC(CC2OC4=C(C=CC(=C34)C1)OC)O)CCl
31
+ 30,C1CC(C(=O)C=C1)(C2=CC=CC=C2Cl)N,C1CC(=CC(=O)C1)NC2=CC=C(C=C2)Cl
32
+ 31,CC(CC1=C2CCOC2=CC=C1)N,CC(C(C=CC1=CC=CC=C1)O)N
33
+ 32,CCCCCN1C=C(C2=CC=CC=C21)C(=O)NC3=CC4=CC=CC=C4C=C3,C1CC(CN(C1)CC2=CC=CC=N2)C(=O)C3=C4C=CC=C5C4=C(CC5)C=C3
34
+ 33,CC1(CC2=C(O1)C(=CC=C2)O)C,CCOC(=O)CC1=CC=CC=C1
35
+ 34,CCC=CCC=CCC=CCC=CCC=CCCCCCC(=O)NCCO,CCCCCCCCCCCCCCCC(=O)N/C=C/C1=CC=C(C=C1)O
36
+ 35,CC(=O)N1CCCC(C1)N,C[N+](C)(C)CC(CC#N)O
37
+ 36,COC1=CC=C(C=C1)C=CC(=O)C2=C(C=CC(=C2)OC)OC,COC1=CC=C(C=C1)C=CC(=O)C2=C(C=C(C=C2)OC)OC
38
+ 37,CCCCCC=CC=C1C(CCC1=O)CC=CCCCC(=O)O,CCCCCC=CCC=CC=CC(=O)CC=CCCCC(=O)O
39
+ 38,CCCCCC(C=CC1C2CC(C1CC=CCCCC(=O)O)OO2)OO,CCCCCC(C=CC=CC=CC=CC(C(CCCC(=O)O)O)O)OO
40
+ 39,CCC(C(=O)C1=CC2=C(CCC2)C=C1)NCC,CCC(=O)C1CCC(C2=C(C1)N=C(C=C2)C)C
41
+ 40,CCCCCC=CCC(=O)C=CC=CCC=CCCCC(=O)O,CC(C=CCC=CCC=CCC=CCC=CCCCC(=O)O)O
42
+ 41,CC(C(=O)NC(CC1=CC=CC=C1)C(=O)N)NC(=O)C(CC2=CC=C(C=C2)O)N,CC(C)(CC(=O)N1CC2=C(CC1C(=O)NCC3=CC=CC=C3)N=CN2)CC(=O)O
43
+ 42,CC12CCC3C(=O)OC(CC3(C1C(=O)C(CC2C(=O)OC)O)C)C4=COC=C4,CC12CC(OC(=O)C1CC(C3(C2CC(C=C3C(=O)OC)O)C)O)C4=COC=C4
44
+ 43,CC1CCC(C2=C(CCC12)C)C=C(C)C(=O)O,CC1=C2CC(CCC2(CCC1)C)C(=C)C(=O)O
45
+ 44,C1=CC=C2C(=C1)C=CC=C2NC(=O)C3=NN(C4=CC=CC=C43)CCCCCF,C1C2CN(CC1C3=CC=C(C(=O)N3C2)C4=CN=CC=C4)CC5=CC=C(C=C5)F
46
+ 45,COC1=C(C2=C3C(CC4=CC(=C(C=C42)OC)O)NCCC3=C1)O,COC1=C(C=C2C(=C1)CC3C4=C2C(=C(C=C4CCN3)O)OC)O
47
+ 46,C1CC(=O)N(C1=O)OC(=O)CCCCCCC(=O)ON2C(=O)CCC2=O,CN1C(CC(C1=O)OC2C(C(C(C(O2)C(=O)O)O)O)O)C3=CN=CC=C3
48
+ 47,COC(=O)CCCC=CCC1C(CC(C1C=CC(CCC2=CC=CC=C2)O)O)O,CCC1CCCC(C(C(=O)C=CC2C3CC(CC3C=CC2C=CC(=O)O1)O)C)O
49
+ 48,CC(CCCN1C=C(C2=CC=CC=C21)C(=O)C3=CC=C(C=C3)O)O,CCOC1=CC=CC(=O)C2=C(N(C(=C12)C)C3=CC=C(C=C3)OC)C
50
+ 49,CON=C(CC1=CN=CC=C1)C2=C(C=C(C=C2)Cl)Cl,C1=CC=C(C=C1)CNC(=O)NC2=CC(=C(C=C2)Cl)Cl
51
+ 50,C1CN(CCC1N)CC2=CC=CC=C2,CC1=CC(=C(C=C1)N2CCCCC2)N
52
+ 51,CC1C=C(C(=O)C2(C1CC3C4(C2C(=O)C(=C(C4CC(=O)O3)C)OC)C)C)OC,CC12CCC(C(C1CCC=C2C(=O)OC)(C)CC(=O)C3=COC=C3)C(=O)OC
53
+ 52,CN(C)C1=CC=C(C=C1)C=CC(=O)C2=C(C=C(C=C2OC)OC)O,COC1=C(C=C2CN3CCC4=CC(=C(C=C4[C@@H]3CC2=C1)OC)O)O
54
+ 53,CC(C)N(CCC1=CNC2=C1C(=CC=C2)OC(=O)C)C(C)C,CCC1CN2CCC1CC2CNC(=O)COC3=CC=CC=C3
55
+ 54,CCCCCCCCC(=O)NC1CCOC1=O,CC(C)NC(=O)C1CCC(C1(C)C)(C)C(=O)O
56
+ 55,C1CC1N2C=C(C(=O)C3=CC(=C(C=C32)NCCN)F)C(=O)O,CC1C(=O)N2CCN(CC2C(=O)N1)C(=O)C3=CC(=CC=C3)F
57
+ 56,CCCCCCCCCCCCCCC(=O)NC1CCOC1=O,C1CCCCCC2(CCCCC1)OCC(O2)CN3CCOCC3
58
+ 57,CCC1=CC=CC2=C1NC3=C2C(=O)COC3(CC)CC(=O)OC,CC(=C)C(CC1=C(C2=CC=CC=C2N(C1=O)C)OC)OC(=O)C
59
+ 58,CCCCCC(C=CC1C(CC(C1CC=CCCCCO)O)O)O,CCC(C)C(CC1CC1CC2CC2CC(C(CC)C(=O)O)O)OC
60
+ 59,CC(C)C(C(=O)N)NC(=O)C1=NN(C2=CC=CC=C21)CCCC(C)O,CN1C2CCN(C2C(=O)NCC1CCC(=O)OC)CC3=CC=CC=N3
61
+ 60,C1=CC=C2C(=C1)C3=C(N2)C(=NC=C3)CCC(=O)O,CCC1=C2C(=CC(=N1)C(=O)O)C3=CC=CC=C3N2
62
+ 61,CCC1=CC=C(C2=CC=CC=C12)C(=O)C3=CN(C4=CC=CC=C43)CCCCC(=O)O,CC1=C(C=CC(=C1)C2=CC3=CC(=C(C=C3C(=N2)CC4=CC=CC=C4)OC)OC)OC
63
+ 62,CCC(C(=O)C1=CC=C(C=C1)C)NC,CCN(CC)C(=O)C1=CC(=CC=C1)C
64
+ 63,CCC=CCC=CCC=CCC=CCC=CCC#CCCC(=O)O,CCC(=O)C1(CCC2C1(CCC3=C4CCC(=O)C=C4CCC23)C)C
65
+ 64,CC(=O)CC(C1=CC=CO1)C2=C(C3=CC=CC=C3OC2=O)O,COC1=CC2=C(C=C1)C(=O)C(=CC3=CC(=C(C=C3)O)O)CO2
66
+ 65,COC1=CC(=CC(=C1O)OC)C=CCO,COC1=C(C=C(C=C1)CCC(=O)OC)O
67
+ 66,CC(CCCCN1C(=O)C2=C(N=CN2C)N(C1=O)C)OC(=O)C,C1CCC(CC1)C(=O)N2CCN3C(C2)C(=O)N(C3=O)CC(=O)N
68
+ 67,CC12CCC3C4(CCCC(C4CCC3(C1)CC2=O)(C)C(=O)O)C,CC12CCCC(C1CCC34C2CCC(C3)(C(=C)C4)O)(C)C(=O)O
69
+ 68,CCCCCN1C2=CC=CC=C2C(=N1)C(=O)NC(C(C)C)C(=O)N,CN1C2CCNC2C(=O)NCC1CCC(=O)NCC3=CC=CC=C3
70
+ 69,CC(C)(C)C(C(=O)OC)NC(=O)C1=NN(C2=CC=CC=C21)CCCCCF,COC1=C(C=C2C3CC(C(CN3CCC2=C1)N4CC(CC4=O)CF)N)OC
71
+ 70,CCCCCN1C2=CC=CC=C2C(=N1)C(=O)NC(C(=O)N)C(C)(C)C,CC1C2C(N(C(=O)NC2N(C(=O)N1)C(C)C)C(C)C)C3=CC=CC=C3
72
+ 71,CCCCCCCCCCCCCCCC(=O)OCC1(CC23CCC4C5=C(C=CC4(C2CCC1C3)C)OC=C5)O,CCCCC1CCCCC(CC2=CC(=C(C(CCCCC(CC3=CC(=C1C(=C3)O)O)C)CCCC)C(=C2)O)O)C
73
+ 72,CC(C)(C)OC(=O)NC(CC1=CN=CN1)C(=O)O,CCNC1=NC(=O)N(C=C1)C2CC(C(O2)CO)O
74
+ 73,C1CC(CCC1N2C(=NN=N2)CCCCOC3=CC4=C(C=C3)NC(=O)CC4)O,CCN(CCN1C(=NC2=C1C(=O)N(C(=O)N2C)C)CC3=CC=CC=C3)CCO
75
+ 74,CCCCCCCCCC(=O)NC1=CC(=CC=C1)[N+](=O)[O-],CCC1CN(CCC1CC(=O)NCC2=CC=CO2)C(=O)C
76
+ 75,CC(=O)N1CCC(CC1)N,C[N+](C)(C)CC(CC#N)O
77
+ 76,CCCCCC=CCC=CCC=CCC=CCCCC(=O)NCC(CC(=O)O)O,CC(=CC(=C)C(CC(CC(CC(CC=C)OC)OC)OC)OC)CC=C(C)[N+]#[C-]
78
+ 77,CCCCCC(C)(C=CC1C(CC(C1CC=CCCCC(=O)NCC)O)O)O,CCCC=CCC=CCCCCCCCC(=O)OC(CCC(=O)O)[N+](C)(C)C
79
+ 78,C1CC2C(C(CC1N2)OC(=O)C3=CC=CC=C3)C(=O)O,CN(CC1=CC=CC=C1)CC2=C(C(=O)C=C(O2)CO)O
80
+ 79,CNC1(CCCCC1=O)C2=CC=CC=C2OC,CCC(=O)NC1CCC2=C(C1)C(=CC=C2)OC
81
+ 80,CCOC(=O)C(C1CCCCN1)C2=CC=CC=C2,CCC(=O)C1(CCN(CC1)C)C2=CC(=CC=C2)O
82
+ 81,CN(CC1=CC=CC=C1)C2CCCNC2,CN1CCC2=CC=CC=C2C1CN(C)C
83
+ 82,C1=CC=C2C(=C1)C=C(C(=C2CC3=C(C(=CC4=CC=CC=C43)C(=O)O)O)O)C(=O)O,CC1=C(C(=O)C2=C(C1=O)C=CC=C2O)CC3=C(C(=O)C4=C(C3=O)C(=CC=C4)O)C
84
+ 83,CC1=CC=CC(=C1)C(=O)C(C)N2CCCC2,CC(C)CNC=C(C)C(=O)C1=CC=CC=C1
85
+ 84,CN1CCN(CC1)C2=NC3=C(C=CC(=C3)OC)OC4=C2C=C(C=C4)Cl,CC1=CC(=C(N1C2=C(N(N(C2=O)C3=CC=CC=C3)C)C)C)C(=O)CCl
86
+ 85,CCC1=C(C=C(C(=C1)C(=O)C)O)OCCCCCC(C)(C)C2=NNN=N2,CCC1CN(CCC1CC(=O)NCCNC(=O)C)C(=O)C2=CN=CC=C2
87
+ 86,CCCCCCC=CCCCCCC(=O)CC(=O)NC1CCOC1=O,CC12CCCC(=C)C1CC3C(C2)OC(=O)C3CN(C)CC(OC)OC
88
+ 87,COC1=CC=CC=C1CNCCC2=CC(=C(C=C2OC)Cl)OC,C[N+]1(CCC23C=CC(CC2OC4=C(C=CC(=C34)C1)OC)O)CCl
89
+ 88,C1CC1CNC(=O)CCCC=CCC2C(CC(C2C=CC(COC3=CC=CC=C3)O)O)O,CC(C)C1=CC23CCC4C(C2CC1C5C3C(=O)N(C5=O)CCO)(CCCC4(C)C(=O)O)C
90
+ 89,C1=CC=C(C=C1)CCCCCCNC(=O)OC2=CC=CC(=C2)C3=CC(=CC=C3)C(=O)N,C1CCN2CCCC(C2C1)CNC(=O)C3=CC=CC(=C3)C4=CC5=CC=CC=C5OC4=O
91
+ 90,C1=CC2=C(C(=C1)O)NC(=CC2=O)C(=O)O,C1=CC2=C(C=C1O)C(=O)C=C(N2)C(=O)O
92
+ 91,CC1(C(C1(C)C)C(=O)C2=CN(C3=CC=CC=C32)CCCCCO)C,CC1=NC(=C(C=C1)OC(=O)C23CC4CC(C2)CC(C4)C3)C(C)(C)C
93
+ 92,CCCCCC=CCC=CCC=CC=CC(=O)CCCC(=O)O,CCCCCC=CCC(=O)C=CC=CCC=CCCCC(=O)O
94
+ 93,C1C(C(C(C1O)C=CC(CCC2=CC=CC=C2)O)CC=CCCCC(=O)NC(CO)CO)O,CC12CCC3C(C1CCC(C2CC(=O)NCC4=CC=CC=C4OC)O)(COC(O3)COC)C
95
+ 94,CC1(C(=O)C2=C(O1)C(=CC=C2)O)C,COC1=C(C=C(C=C1)C=CC=O)O
96
+ 95,C#CCOC1=CC=CC=C1CCCCCC(=O)O,CC1=C2CC(CCC2(C=CC1=O)C)C(=C)C(=O)O
97
+ 96,CN(CCC(=NO)C(=O)C1=CN=CC=C1)N=O,CCC(C1=CN=C2C(=N1)C(=O)N(C(=O)N2)C)O
98
+ 97,CC1CN(CCN1C)C(=O)C2=CN(C3=C2C=CC=C3OC)CC4CCCCC4,CC(C)C(C(=O)NCC1CCCN2C1CCCC2)N3CC4=CC=CC=C4C3=O
99
+ 98,CCCCCC(C=CC=CCCCCCCCCCC(=O)O)OO,CCCCCCC(CC=CCCCCCCCC(=O)O)OC(=O)C
100
+ 99,COC1=CC=CC(=C1)C(=S)N,C1=CC=C(C=C1)CC(=S)NO
101
+ 100,CC1=CC2C(CC1)(C3(C(C(C(C34CO4)O2)O)OC(=O)C)C)COC(=O)C,C[C@@H]1C[C@H]2[C@@H]([C@@H](C[C@@](C(=O)C[C@@H]1O)(C)O)OC(=O)C(=C)C)C(=C)C(=O)O2
@@ -0,0 +1,101 @@
1
+ 0,0.8935220241546631,10,2
2
+ 1,1.7779769897460938,10,2
3
+ 2,0.7666854858398438,2.0,1
4
+ 3,1.886087417602539,10,2
5
+ 4,1.1720013618469238,10,2
6
+ 5,1.3709580898284912,10,2
7
+ 6,0.6033799648284912,13.0,4
8
+ 7,0.8473553657531738,10.0,1
9
+ 8,1.1307294368743896,10,2
10
+ 9,0.2676382064819336,10.0,1
11
+ 10,0.3690464496612549,2.0,1
12
+ 11,0.02732396125793457,15.0,4
13
+ 12,0.8388707637786865,10,2
14
+ 13,0.017572879791259766,18.0,4
15
+ 14,0.027521848678588867,17.0,4
16
+ 15,1.2031121253967285,10,2
17
+ 16,0.24315237998962402,2.0,1
18
+ 17,1.3124303817749023,10,2
19
+ 18,0.24808931350708008,8.0,1
20
+ 19,0.026569604873657227,14.0,4
21
+ 20,1.1287765502929688,10,2
22
+ 21,0.03397989273071289,11.5,4
23
+ 22,0.2266993522644043,10,2
24
+ 23,0.2823958396911621,8.5,1
25
+ 24,0.23221635818481445,8.0,1
26
+ 25,0.5202853679656982,6.0,1
27
+ 26,1.5058989524841309,10,2
28
+ 27,0.03613901138305664,13.5,4
29
+ 28,0.0243985652923584,18.5,4
30
+ 29,0.021234989166259766,12.0,4
31
+ 30,0.19841241836547852,4.0,1
32
+ 31,0.259920597076416,6.0,1
33
+ 32,1.565441608428955,10,2
34
+ 33,0.17687010765075684,8.0,1
35
+ 34,1.1181747913360596,10,2
36
+ 35,0.12376904487609863,8.0,1
37
+ 36,0.41043567657470703,2.0,1
38
+ 37,0.7481114864349365,6.0,1
39
+ 38,1.110314130783081,10.0,1
40
+ 39,0.5891437530517578,10,2
41
+ 40,0.6584610939025879,6.0,1
42
+ 41,0.029075145721435547,12.5,4
43
+ 42,0.8252356052398682,4.0,1
44
+ 43,0.4747192859649658,6.0,1
45
+ 44,0.03524971008300781,12.0,4
46
+ 45,0.46724486351013184,4.0,1
47
+ 46,0.0179443359375,16.0,4
48
+ 47,1.5226995944976807,10,2
49
+ 48,0.5487077236175537,10,2
50
+ 49,0.29987573623657227,10,2
51
+ 50,0.23734402656555176,4.0,1
52
+ 51,0.9015848636627197,10,2
53
+ 52,0.4507875442504883,10,2
54
+ 53,0.4521918296813965,10,2
55
+ 54,0.3387339115142822,8.0,1
56
+ 55,0.32390570640563965,10,2
57
+ 56,0.46780920028686523,10,2
58
+ 57,0.3915855884552002,10,2
59
+ 58,0.6075198650360107,10,2
60
+ 59,0.018615245819091797,12.5,4
61
+ 60,0.28983640670776367,2.0,1
62
+ 61,0.03423500061035156,12.0,4
63
+ 62,0.21294283866882324,6.0,1
64
+ 63,0.025656938552856445,13.0,4
65
+ 64,0.4521300792694092,10,2
66
+ 65,0.18150854110717773,6.0,1
67
+ 66,0.0157473087310791,13.0,4
68
+ 67,0.7337052822113037,4.0,1
69
+ 68,0.5009119510650635,10,2
70
+ 69,0.03911733627319336,11.0,4
71
+ 70,0.44536709785461426,10,2
72
+ 71,0.06927871704101562,12.5,4
73
+ 72,0.26279354095458984,10,2
74
+ 73,0.02523326873779297,25.5,4
75
+ 74,0.015688657760620117,15.5,4
76
+ 75,0.1666100025177002,10.0,1
77
+ 76,0.0304563045501709,16.0,4
78
+ 77,0.7685189247131348,10,2
79
+ 78,0.3359997272491455,10,2
80
+ 79,0.2607276439666748,8.0,1
81
+ 80,0.37820887565612793,10.0,1
82
+ 81,0.28598618507385254,6.0,1
83
+ 82,0.03360486030578613,11.0,4
84
+ 83,0.2871870994567871,6.0,1
85
+ 84,0.02286696434020996,15.5,4
86
+ 85,0.02148747444152832,21.0,4
87
+ 86,0.02533125877380371,13.0,4
88
+ 87,0.41715145111083984,10,2
89
+ 88,0.040938615798950195,17.0,4
90
+ 89,1.3361961841583252,10,2
91
+ 90,0.2315044403076172,2.0,1
92
+ 91,0.5691916942596436,10,2
93
+ 92,0.5627284049987793,4.0,1
94
+ 93,0.03430485725402832,12.0,4
95
+ 94,0.1643238067626953,6.0,1
96
+ 95,0.31859779357910156,10,2
97
+ 96,0.00754857063293457,17.5,4
98
+ 97,0.6105747222900391,10,2
99
+ 98,0.40708208084106445,8.0,1
100
+ 99,0.14671039581298828,6.0,1
101
+ 100,0.49617958068847656,10,2
@@ -0,0 +1,28 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+ [project]
5
+ name = "myopic_mces"
6
+ version = "1.0.0"
7
+ description = "A package for computation of the myopic MCES distance"
8
+ readme = "README.md"
9
+ license = {file = "LICENSE"}
10
+ requires-python = ">=3.6"
11
+ classifiers = [
12
+ "Programming Language :: Python :: 3",
13
+ "License :: OSI Approved :: MIT License",
14
+ "Operating System :: OS Independent",
15
+ ]
16
+ dependencies = [
17
+ "rdkit >= 2022.09.5",
18
+ "networkx >= 3.0",
19
+ "pulp >= 2.7.0",
20
+ "scipy >= 1.10.1",
21
+ "joblib >= 1.2.0",
22
+ ]
23
+ [project.scripts]
24
+ myopic_mces = "myopic_mces.myopic_mces:main"
25
+ [project.urls]
26
+ "Homepage" = "https://github.com/AlBi-HHU/myopic-mces"
27
+ "Bug Tracker" = "https://github.com/AlBi-HHU/myopic-mces/issues"
28
+ "Publication" = "https://doi.org/10.1101/2023.03.27.534311"
@@ -0,0 +1,161 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Mon Oct 5 17:17:41 2020
4
+
5
+ @author: seipp
6
+ """
7
+ import pulp
8
+ import networkx as nx
9
+
10
+ def MCES_ILP(G1,G2,threshold,solver,solver_options={},no_ilp_threshold=False):
11
+ """
12
+ Calculates the exact distance between two molecules using an ILP
13
+
14
+ Parameters
15
+ ----------
16
+ G1 : networkx.classes.graph.Graph
17
+ Graph representing the first molecule.
18
+ G2 : networkx.classes.graph.Graph
19
+ Graph representing the second molecule.
20
+ threshold : int
21
+ Threshold for the comparison. Exact distance is only calculated if the distance is lower than the threshold.
22
+ solver: string
23
+ ILP-solver used for solving MCES. Example:GUROBI_CMD
24
+ solver_options: dict
25
+ additional options to pass to solvers. Example: threads=1, msg=False for better multi-threaded performance
26
+ no_ilp_threshold: bool
27
+ if true, always return exact distance even if it is below the threshold (slower)
28
+
29
+ Returns:
30
+ -------
31
+ float
32
+ Distance between the molecules
33
+ int
34
+ Type of Distance:
35
+ 1 : Exact Distance
36
+ 2 : Lower bound (If the exact distance is above the threshold)
37
+
38
+ """
39
+
40
+ ILP=pulp.LpProblem("MCES", pulp.LpMinimize)
41
+
42
+ #Variables for nodepairs
43
+ nodepairs=[]
44
+ for i in G1.nodes:
45
+ for j in G2.nodes:
46
+ if G1.nodes[i]["atom"]==G2.nodes[j]["atom"]:
47
+ nodepairs.append(tuple([i,j]))
48
+ y=pulp.LpVariable.dicts('nodepairs', nodepairs,
49
+ lowBound = 0,
50
+ upBound = 1,
51
+ cat = pulp.LpInteger)
52
+ #variables for edgepairs and weight
53
+ edgepairs=[]
54
+ w={}
55
+ for i in G1.edges:
56
+ for j in G2.edges:
57
+ if (G1.nodes[i[0]]["atom"]==G2.nodes[j[0]]["atom"] and G1.nodes[i[1]]["atom"]==G2.nodes[j[1]]["atom"]) or (G1.nodes[i[1]]["atom"]==G2.nodes[j[0]]["atom"] and G1.nodes[i[0]]["atom"]==G2.nodes[j[1]]["atom"]):
58
+ edgepairs.append(tuple([i,j]))
59
+ w[tuple([i,j])]=max(G1[i[0]][i[1]]["weight"],G2[j[0]][j[1]]["weight"])-min(G1[i[0]][i[1]]["weight"],G2[j[0]][j[1]]["weight"])
60
+
61
+ #variables for not mapping an edge
62
+ for i in G1.edges:
63
+ edgepairs.append(tuple([i,-1]))
64
+ w[tuple([i,-1])]=G1[i[0]][i[1]]["weight"]
65
+ for j in G2.edges:
66
+ edgepairs.append(tuple([-1,j]))
67
+ w[tuple([-1,j])]=G2[j[0]][j[1]]["weight"]
68
+ c=pulp.LpVariable.dicts('edgepairs', edgepairs,
69
+ lowBound = 0,
70
+ upBound = 1,
71
+ cat = pulp.LpInteger)
72
+
73
+
74
+ #objective function
75
+ ILP += pulp.lpSum([ w[i]*c[i] for i in edgepairs])
76
+
77
+
78
+
79
+
80
+ #Every node in G1 can only be mapped to at most one in G2
81
+ for i in G1.nodes:
82
+ h=[]
83
+ for j in G2.nodes:
84
+ if G1.nodes[i]["atom"]==G2.nodes[j]["atom"]:
85
+ h.append(tuple([i,j]))
86
+ ILP+=pulp.lpSum([y[k] for k in h])<=1
87
+
88
+ #Every node in G1 can only be mapped to at most one in G1
89
+ for i in G2.nodes:
90
+ h=[]
91
+ for j in G1.nodes:
92
+ if G1.nodes[j]["atom"]==G2.nodes[i]["atom"]:
93
+ h.append(tuple([j,i]))
94
+ ILP+=pulp.lpSum([y[k] for k in h])<=1
95
+
96
+ #Every edge in G1 has to be mapped to an edge in G2 or the variable for not mapping has to be 1
97
+ for i in G1.edges:
98
+ ls=[]
99
+ rs=[]
100
+ for j in G2.edges:
101
+ if (G1.nodes[i[0]]["atom"]==G2.nodes[j[0]]["atom"] and G1.nodes[i[1]]["atom"]==G2.nodes[j[1]]["atom"]) or (G1.nodes[i[1]]["atom"]==G2.nodes[j[0]]["atom"] and G1.nodes[i[0]]["atom"]==G2.nodes[j[1]]["atom"]):
102
+ ls.append(tuple([i,j]))
103
+ ILP+=pulp.lpSum([c[k] for k in ls])+c[tuple([i,-1])]==1
104
+
105
+ #Every edge in G2 has to be mapped to an edge in G1 or the variable for not mapping has to be 1
106
+ for i in G2.edges:
107
+ ls=[]
108
+ rs=[]
109
+ for j in G1.edges:
110
+ if (G1.nodes[j[0]]["atom"]==G2.nodes[i[0]]["atom"] and G1.nodes[j[1]]["atom"]==G2.nodes[i[1]]["atom"]) or (G1.nodes[j[1]]["atom"]==G2.nodes[i[0]]["atom"] and G1.nodes[j[0]]["atom"]==G2.nodes[i[1]]["atom"]):
111
+ ls.append(tuple([j,i]))
112
+ ILP+=pulp.lpSum([c[k] for k in ls])+c[tuple([-1,i])]==1
113
+
114
+ #The mapping of the edges has to match the mapping of the nodes
115
+ for i in G1.nodes:
116
+ for j in G2.edges:
117
+ ls=[]
118
+ for k in G1.neighbors(i):
119
+ if tuple([tuple([i,k]),j]) in c:
120
+ ls.append(tuple([tuple([i,k]),j]))
121
+ else:
122
+ if tuple([tuple([k,i]),j]) in c:
123
+ ls.append(tuple([tuple([k,i]),j]))
124
+ rs=[]
125
+ if G1.nodes[i]["atom"]==G2.nodes[j[0]]["atom"]:
126
+ rs.append(tuple([i,j[0]]))
127
+ if G1.nodes[i]["atom"]==G2.nodes[j[1]]["atom"]:
128
+ rs.append(tuple([i,j[1]]))
129
+ ILP+=pulp.lpSum([c[k] for k in ls])<=pulp.lpSum([y[k] for k in rs])
130
+
131
+
132
+ for i in G2.nodes:
133
+ for j in G1.edges:
134
+ ls=[]
135
+ for k in G2.neighbors(i):
136
+ if tuple([j,tuple([i,k])]) in c:
137
+ ls.append(tuple([j,tuple([i,k])]))
138
+ else:
139
+ if tuple([j,tuple([k,i])]) in c:
140
+ ls.append(tuple([j,tuple([k,i])]))
141
+ rs=[]
142
+ if G2.nodes[i]["atom"]==G1.nodes[j[0]]["atom"]:
143
+ rs.append(tuple([j[0],i]))
144
+ if G2.nodes[i]["atom"]==G1.nodes[j[1]]["atom"]:
145
+ rs.append(tuple([j[1],i]))
146
+ ILP+=pulp.lpSum([c[k] for k in ls])<=pulp.lpSum(y[k] for k in rs)
147
+
148
+ #constraint for the threshold
149
+ if threshold!=-1 and not no_ilp_threshold:
150
+ ILP +=pulp.lpSum([ w[i]*c[i] for i in edgepairs])<=threshold
151
+
152
+ #solve the ILP
153
+ if solver=="default":
154
+ ILP.solve()
155
+ else:
156
+ sol=pulp.getSolver(solver, **solver_options)
157
+ ILP.solve(sol)
158
+ if ILP.status==1:
159
+ return float(ILP.objective.value()),1
160
+ else:
161
+ return threshold,2
File without changes
@@ -0,0 +1,254 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Sat Oct 17 17:59:05 2020
4
+
5
+ @author: seipp
6
+ """
7
+
8
+ import networkx as nx
9
+
10
+ def filter1(G1,G2):
11
+ """
12
+ Finds a lower bound for the distance based on degree
13
+
14
+ Parameters
15
+ ----------
16
+ G1 : networkx.classes.graph.Graph
17
+ Graph representing the first molecule.
18
+ G2 : networkx.classes.graph.Graph
19
+ Graph representing the second molecule.
20
+
21
+ Returns:
22
+ -------
23
+ float
24
+ Lower bound for the distance between the molecules
25
+
26
+ """
27
+ #Find all occuring atom types and partition by type
28
+ atom_types1=[]
29
+ for i in G1.nodes:
30
+ if G1.nodes[i]["atom"] not in atom_types1:
31
+ atom_types1.append(G1.nodes[i]["atom"])
32
+ type_map1={}
33
+ for i in atom_types1:
34
+ type_map1[i]=list(filter(lambda x: i==G1.nodes[x]["atom"],G1.nodes))
35
+
36
+ atom_types2=[]
37
+ for i in G2.nodes:
38
+ if G2.nodes[i]["atom"] not in atom_types2:
39
+ atom_types2.append(G2.nodes[i]["atom"])
40
+ type_map2={}
41
+ for i in atom_types2:
42
+ type_map2[i]=list(filter(lambda x: i==G2.nodes[x]["atom"],G2.nodes))
43
+
44
+ #calculate lower bound
45
+ difference=0
46
+ #Every atom type is done seperately
47
+ for i in atom_types1:
48
+ if i in atom_types2:
49
+ #number of nodes that can be mapped
50
+ n=min(len(type_map1[i]),len(type_map2[i]))
51
+ #sort by degree
52
+ degreelist1=sorted(type_map1[i],key=lambda x:sum([G1[x][j]["weight"] for j in G1.neighbors(x)]),reverse=True)
53
+ degreelist2=sorted(type_map2[i],key=lambda x:sum([G2[x][j]["weight"] for j in G2.neighbors(x)]),reverse=True)
54
+ #map in order of sorted lists
55
+ for j in range(n):
56
+ deg1=sum([G1[degreelist1[j]][k]["weight"] for k in G1.neighbors(degreelist1[j])])
57
+ deg2=sum([G2[degreelist2[j]][k]["weight"] for k in G2.neighbors(degreelist2[j])])
58
+ difference+= abs(deg1-deg2)
59
+ #nodes that are not mapped
60
+ if len(degreelist1)>n:
61
+ for j in range(n,len(degreelist1)):
62
+ difference+=sum([G1[degreelist1[j]][k]["weight"] for k in G1.neighbors(degreelist1[j])])
63
+ if len(degreelist2)>n:
64
+ for j in range(n,len(degreelist2)):
65
+ difference+=sum([G2[degreelist2[j]][k]["weight"] for k in G2.neighbors(degreelist2[j])])
66
+ #atom type only in one of the graphs
67
+ else:
68
+ for j in type_map1[i]:
69
+ difference+=sum([G1[j][k]["weight"] for k in G1.neighbors(j)])
70
+ for i in atom_types2:
71
+ if i not in atom_types1:
72
+ for j in type_map2[i]:
73
+ difference+=sum([G2[j][k]["weight"] for k in G2.neighbors(j)])
74
+ return difference/2
75
+
76
+ def get_cost(G1,G2,i,j):
77
+ """
78
+ Calculates the cost for mapping node i to j based on neighborhood
79
+
80
+ Parameters
81
+ ----------
82
+ G1 : networkx.classes.graph.Graph
83
+ Graph representing the first molecule.
84
+ G2 : networkx.classes.graph.Graph
85
+ Graph representing the second molecule.
86
+ i : int
87
+ Node of G1
88
+ j : int
89
+ Node of G2
90
+
91
+ Returns:
92
+ -------
93
+ float
94
+ Cost of mapping i to j
95
+
96
+ """
97
+ #Find all occuring atom types in neighborhood
98
+ atom_types1=[]
99
+ for k in G1.neighbors(i):
100
+ if G1.nodes[k]["atom"] not in atom_types1:
101
+ atom_types1.append(G1.nodes[k]["atom"])
102
+ type_map1={}
103
+ for k in atom_types1:
104
+ type_map1[k]=list(filter(lambda x: k==G1.nodes[x]["atom"],G1.neighbors(i)))
105
+
106
+
107
+ atom_types2=[]
108
+ for k in G2.neighbors(j):
109
+ if G2.nodes[k]["atom"] not in atom_types2:
110
+ atom_types2.append(G2.nodes[k]["atom"])
111
+ type_map2={}
112
+ for k in atom_types2:
113
+ type_map2[k]=list(filter(lambda x: k==G2.nodes[x]["atom"],G2.neighbors(j)))
114
+
115
+ #calculate cost
116
+ difference=0.
117
+ #Every atom type is handled seperately
118
+ for k in atom_types1:
119
+ if k in atom_types2:
120
+ n=min(len(type_map1[k]),len(type_map2[k]))
121
+ #sort by incident edges by weight
122
+ edgelist1=sorted(type_map1[k],key=lambda x:G1[i][x]["weight"],reverse=True)
123
+ edgelist2=sorted(type_map2[k],key=lambda x:G2[j][x]["weight"],reverse=True)
124
+ #map in order of sorted lists
125
+ for l in range(n):
126
+ difference+=(max(G1[i][edgelist1[l]]["weight"],G2[j][edgelist2[l]]["weight"])-min(G1[i][edgelist1[l]]["weight"],G2[j][edgelist2[l]]["weight"]))/2
127
+ #cost for not mapped edges
128
+ if len(edgelist1)>n:
129
+ for l in range(n,len(edgelist1)):
130
+ difference+=G1[i][edgelist1[l]]["weight"]/2
131
+ if len(edgelist2)>n:
132
+ for l in range(n,len(edgelist2)):
133
+ difference+=G2[j][edgelist2[l]]["weight"]/2
134
+ else:
135
+ for l in type_map1[k]:
136
+ difference+=G1[i][l]["weight"]/2
137
+ for k in atom_types2:
138
+ if k not in atom_types1:
139
+ for l in type_map2[k]:
140
+ difference+=G2[j][l]["weight"]/2
141
+ return difference
142
+
143
+ def filter2(G1,G2):
144
+ """
145
+ Finds a lower bound for the distance based on neighborhood
146
+
147
+ Parameters
148
+ ----------
149
+ G1 : networkx.classes.graph.Graph
150
+ Graph representing the first molecule.
151
+ G2 : networkx.classes.graph.Graph
152
+ Graph representing the second molecule.
153
+
154
+ Returns:
155
+ -------
156
+ float
157
+ Lower bound for the distance between the molecules
158
+
159
+ """
160
+ # Find all occuring atom types
161
+ atom_types1=[]
162
+ for i in G1.nodes:
163
+ if G1.nodes[i]["atom"] not in atom_types1:
164
+ atom_types1.append(G1.nodes[i]["atom"])
165
+
166
+ atom_types2=[]
167
+ for i in G2.nodes:
168
+ if G2.nodes[i]["atom"] not in atom_types2:
169
+ atom_types2.append(G2.nodes[i]["atom"])
170
+
171
+ atom_types=atom_types1
172
+
173
+ for i in atom_types2:
174
+ if i not in atom_types:
175
+ atom_types.append(i)
176
+ #calculate distance
177
+ res=0
178
+ #handle every atom type seperately
179
+ for i in atom_types:
180
+ #filter by atom type
181
+ nodes1=list(filter(lambda x: i==G1.nodes[x]["atom"],G1.nodes))
182
+ nodes2=list(filter(lambda x: i==G2.nodes[x]["atom"],G2.nodes))
183
+ #Create new graph for and solve minimum weight full matching
184
+ G=nx.Graph()
185
+ #Add node for every node of type i in G1 and G2
186
+ for j in nodes1:
187
+ G.add_node(tuple([1,j]))
188
+ for j in nodes2:
189
+ G.add_node(tuple([2,j]))
190
+ #Add edges between all nodes of G1 and G2
191
+ for j in nodes1:
192
+ for k in nodes2:
193
+ if G1.nodes[j]["atom"]==G2.nodes[k]["atom"]:
194
+ G.add_edge(tuple([1,j]),tuple([2,k]),weight=get_cost(G1,G2,j,k))
195
+ #Add nodes if one graph has more nodes of type i than the other
196
+ if len(nodes1)<len(nodes2):
197
+ diff=len(nodes2)-len(nodes1)
198
+ for j in range(1,diff+1):
199
+ G.add_node(tuple([1,-j]))
200
+ for k in nodes2:
201
+ G.add_edge(tuple([1,-j]),tuple([2,k]),weight=sum([G2[l][k]["weight"] for l in G2.neighbors(k)])/2)
202
+ if len(nodes2)<len(nodes1):
203
+ diff=len(nodes1)-len(nodes2)
204
+ for j in range(1,diff+1):
205
+ G.add_node(tuple([2,-j]))
206
+ for k in nodes1:
207
+ G.add_edge(tuple([1,k]),tuple([2,-j]),weight=sum([G1[l][k]["weight"] for l in G1.neighbors(k)])/2)
208
+ #Solve minimum weight full matching
209
+ h=nx.bipartite.minimum_weight_full_matching(G)
210
+ #Add weight of the matching
211
+ for k in h:
212
+ if k[0]==1:
213
+ res=res+G[k][h[k]]["weight"]
214
+
215
+ return res
216
+
217
+ def apply_filter(G1,G2,threshold,always_stronger_bound=True):
218
+ """
219
+ Finds a lower bound for the distance
220
+
221
+ Parameters
222
+ ----------
223
+ G1 : networkx.classes.graph.Graph
224
+ Graph representing the first molecule.
225
+ G2 : networkx.classes.graph.Graph
226
+ Graph representing the second molecule.
227
+ threshold : int
228
+ Threshold for the comparison. We want to find a lower bound that is higher than the threshold
229
+ always_stronger_bound : bool
230
+ if true, always compute and use the second stronger bound
231
+
232
+
233
+
234
+ Returns:
235
+ -------
236
+ float
237
+ Lower bound for the distance between the molecules
238
+ int
239
+ Which lower bound was chosen: 2 - depending on threshold, 4 - second lower bound
240
+
241
+ """
242
+ if always_stronger_bound:
243
+ d=filter2(G1,G2)
244
+ return d, 4
245
+ else:
246
+ #calculate first lower bound
247
+ d=filter1(G1,G2)
248
+ #if below threshold calculate second lower bound
249
+ if d<=threshold:
250
+ d=filter2(G1,G2)
251
+ if d<=threshold:
252
+ return d, 2
253
+
254
+ return d, 2
@@ -0,0 +1,35 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Mon Oct 5 17:18:56 2020
4
+
5
+ @author: seipp
6
+ """
7
+ from rdkit import Chem
8
+ import networkx as nx
9
+
10
+ def construct_graph(s):
11
+ """
12
+ Converts a SMILE into a graph
13
+
14
+ Parameters
15
+ ----------
16
+ s : str
17
+ Smile of the molecule
18
+
19
+ Returns:
20
+ -------
21
+ networkx.classes.graph.Graph
22
+ Graph that represents the molecule.
23
+ The bond types are represented as edge weights.
24
+ The atom types are represented as atom attributes of the nodes.
25
+ """
26
+ #read the smile
27
+ m = Chem.MolFromSmiles(s)
28
+ # convert the molecule into a graph
29
+ # The bond and atom types are converted to node/edge attributes
30
+ G=nx.Graph()
31
+ for atom in m.GetAtoms():
32
+ G.add_node(atom.GetIdx(),atom=atom.GetSymbol())
33
+ for bond in m.GetBonds():
34
+ G.add_edge(bond.GetBeginAtom().GetIdx(),bond.GetEndAtom().GetIdx(),weight=bond.GetBondTypeAsDouble())
35
+ return G
@@ -0,0 +1,133 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Mon Oct 5 17:16:05 2020
4
+ @author: seipp
5
+ """
6
+ import time
7
+ import pulp
8
+ import networkx as nx
9
+ from joblib import Parallel, delayed
10
+ import multiprocessing
11
+ import argparse
12
+ from myopic_mces.graph import construct_graph
13
+ from myopic_mces.MCES_ILP import MCES_ILP
14
+ from myopic_mces.filter_MCES import apply_filter
15
+
16
+ def MCES(ind, s1, s2, threshold, solver, solver_options={}, no_ilp_threshold=False, always_stronger_bound=True):
17
+ """
18
+ Calculates the distance between two molecules
19
+
20
+ Parameters
21
+ ----------
22
+ ind : int
23
+ index
24
+ s1 : str
25
+ SMILES of the first molecule
26
+ s2 : str
27
+ SMILES of the second molecule
28
+ threshold : int
29
+ Threshold for the comparison. Exact distance is only calculated if the distance is lower than the threshold.
30
+ If set to -1 the exact disatnce is always calculated.
31
+ solver: string
32
+ ILP-solver used for solving MCES. Example:CPLEX_CMD
33
+ solver_options: dict
34
+ additional options to pass to solvers. Example: threads=1 for better multi-threaded performance
35
+ no_ilp_threshold: bool
36
+ if true, always return exact distance even if it is below the threshold (slower)
37
+ always_stronger_bound: bool
38
+ if true, always compute and use the second stronger bound
39
+
40
+ Returns:
41
+ -------
42
+ int
43
+ index
44
+ float
45
+ Distance between the molecules
46
+ float
47
+ Time taken for the calculation
48
+ int
49
+ Type of Distance:
50
+ 1 : Exact Distance
51
+ 2 : Lower bound (if the exact distance is above the threshold; bound chosen dynamically)
52
+ 4 : Lower bound (second lower bound was used)
53
+
54
+ """
55
+ start = time.time()
56
+ # construct graph for both smiles.
57
+ G1 = construct_graph(s1)
58
+ G2 = construct_graph(s2)
59
+ # filter out if distance is above the threshold
60
+ if threshold == -1:
61
+ res = MCES_ILP(G1, G2, threshold, solver, solver_options=solver_options, no_ilp_threshold=no_ilp_threshold)
62
+ end = time.time()
63
+ total_time = str(end-start)
64
+ return ind, res[0], total_time, res[1]
65
+ d, filter_id = apply_filter(G1, G2, threshold, always_stronger_bound=always_stronger_bound)
66
+ if d > threshold:
67
+ end = time.time()
68
+ total_time = str(end-start)
69
+ return ind, d, total_time, filter_id
70
+ # calculate MCES
71
+ res = MCES_ILP(G1, G2, threshold, solver, solver_options=solver_options, no_ilp_threshold=no_ilp_threshold)
72
+ end = time.time()
73
+ total_time = str(end-start)
74
+ return ind, res[0], total_time, res[1]
75
+
76
+ def main():
77
+ parser = argparse.ArgumentParser()
78
+ parser.add_argument(
79
+ "input", help="input file in the format: id,smiles1,smiles2")
80
+ parser.add_argument("output", help="output file")
81
+ parser.add_argument("--threshold", type=int, default=10,
82
+ action="store", help="threshold for the distance")
83
+ parser.add_argument("--no_ilp_threshold", action="store_true",
84
+ help="(experimental) if set, do not add threshold as constraint to ILP, "
85
+ "resulting in longer runtimes and potential violations of the triangle equation")
86
+ parser.add_argument("--choose_bound_dynamically", action="store_true",
87
+ help="if this is set, compute and use potentially weaker but faster lower bound if "
88
+ "already greater than the threshold. Otherwise (default), the strongest lower bound "
89
+ "is always computed and used")
90
+ parser.add_argument("--solver", type=str, default="default",
91
+ action="store", help="Solver for the ILP. example:CPLEX_CMD")
92
+ parser.add_argument("--solver_onethreaded", action="store_true",
93
+ help="limit ILP solver to one thread, resulting in faster "
94
+ "performance with parallel computations (not available for all solvers)")
95
+ parser.add_argument("--solver_no_msg", action="store_true",
96
+ help="prevent solver from logging (not available for all solvers)")
97
+ parser.add_argument("--num_jobs", type=int, help="Number of jobs; instances to run in parallel. "
98
+ "By default this is set to the number of (logical) CPU cores.")
99
+ args = parser.parse_args()
100
+
101
+ threshold = args.threshold
102
+
103
+ num_jobs = multiprocessing.cpu_count() if args.num_jobs is None else args.num_jobs
104
+ additional_mces_options = dict(no_ilp_threshold=args.no_ilp_threshold, solver_options=dict(),
105
+ always_stronger_bound=not args.choose_bound_dynamically)
106
+ if args.solver_onethreaded:
107
+ additional_mces_options['solver_options']['threads'] = 1
108
+ if args.solver_no_msg:
109
+ additional_mces_options['solver_options']['msg'] = False
110
+ F = args.input
111
+ F2 = args.output
112
+ f = open(F, "r")
113
+ solver = args.solver
114
+
115
+ inputs = []
116
+ for line in f:
117
+ args = line.split(",")
118
+ inputs.append(tuple([args[0], args[1], args[2]]))
119
+ f.close()
120
+
121
+ if num_jobs > 1:
122
+ results = Parallel(n_jobs=num_jobs, verbose=5)(
123
+ delayed(MCES)(i[0], i[1], i[2], threshold, solver, **additional_mces_options) for i in inputs)
124
+ else:
125
+ results = [MCES(i[0], i[1], i[2], threshold, solver, **additional_mces_options) for i in inputs]
126
+
127
+ out = open(F2, "w")
128
+ for i in results:
129
+ out.write(i[0]+","+i[2]+","+str(i[1])+","+str(i[3])+"\n")
130
+ out.close()
131
+
132
+ if __name__ == '__main__':
133
+ main()