chemparseplot 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,166 @@
1
+ ### Generated by gibo (https://github.com/simonwhitaker/gibo)
2
+ ### https://raw.github.com/github/gitignore/4488915eec0b3a45b5c63ead28f286819c0917de/Python.gitignore
3
+
4
+ # Byte-compiled / optimized / DLL files
5
+ __pycache__/
6
+ *.py[cod]
7
+ *$py.class
8
+
9
+ # C extensions
10
+ *.so
11
+
12
+ # Distribution / packaging
13
+ .Python
14
+ build/
15
+ develop-eggs/
16
+ dist/
17
+ downloads/
18
+ eggs/
19
+ .eggs/
20
+ lib/
21
+ lib64/
22
+ parts/
23
+ sdist/
24
+ var/
25
+ wheels/
26
+ share/python-wheels/
27
+ *.egg-info/
28
+ .installed.cfg
29
+ *.egg
30
+ MANIFEST
31
+
32
+ # PyInstaller
33
+ # Usually these files are written by a python script from a template
34
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
35
+ *.manifest
36
+ *.spec
37
+
38
+ # Installer logs
39
+ pip-log.txt
40
+ pip-delete-this-directory.txt
41
+
42
+ # Unit test / coverage reports
43
+ htmlcov/
44
+ .tox/
45
+ .nox/
46
+ .coverage
47
+ .coverage.*
48
+ .cache
49
+ nosetests.xml
50
+ coverage.xml
51
+ *.cover
52
+ *.py,cover
53
+ .hypothesis/
54
+ .pytest_cache/
55
+ cover/
56
+
57
+ # Translations
58
+ *.mo
59
+ *.pot
60
+
61
+ # Django stuff:
62
+ *.log
63
+ local_settings.py
64
+ db.sqlite3
65
+ db.sqlite3-journal
66
+
67
+ # Flask stuff:
68
+ instance/
69
+ .webassets-cache
70
+
71
+ # Scrapy stuff:
72
+ .scrapy
73
+
74
+ # Sphinx documentation
75
+ docs/_build/
76
+
77
+ # PyBuilder
78
+ .pybuilder/
79
+ target/
80
+
81
+ # Jupyter Notebook
82
+ .ipynb_checkpoints
83
+
84
+ # IPython
85
+ profile_default/
86
+ ipython_config.py
87
+
88
+ # pyenv
89
+ # For a library or package, you might want to ignore these files since the code is
90
+ # intended to run in multiple environments; otherwise, check them in:
91
+ # .python-version
92
+
93
+ # pipenv
94
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
96
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
97
+ # install all needed dependencies.
98
+ #Pipfile.lock
99
+
100
+ # poetry
101
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
103
+ # commonly ignored for libraries.
104
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105
+ #poetry.lock
106
+
107
+ # pdm
108
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109
+ #pdm.lock
110
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111
+ # in version control.
112
+ # https://pdm.fming.dev/#use-with-ide
113
+ .pdm.toml
114
+
115
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
116
+ __pypackages__/
117
+
118
+ # Celery stuff
119
+ celerybeat-schedule
120
+ celerybeat.pid
121
+
122
+ # SageMath parsed files
123
+ *.sage.py
124
+
125
+ # Environments
126
+ .env
127
+ .venv
128
+ env/
129
+ venv/
130
+ ENV/
131
+ env.bak/
132
+ venv.bak/
133
+
134
+ # Spyder project settings
135
+ .spyderproject
136
+ .spyproject
137
+
138
+ # Rope project settings
139
+ .ropeproject
140
+
141
+ # mkdocs documentation
142
+ /site
143
+
144
+ # mypy
145
+ .mypy_cache/
146
+ .dmypy.json
147
+ dmypy.json
148
+
149
+ # Pyre type checker
150
+ .pyre/
151
+
152
+ # pytype static type analyzer
153
+ .pytype/
154
+
155
+ # Cython debug symbols
156
+ cython_debug/
157
+
158
+ # PyCharm
159
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
160
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
161
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
162
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
163
+ #.idea/
164
+ /_version.py
165
+ /.pdm-python
166
+ *.ipynb
@@ -0,0 +1,19 @@
1
+ MIT License Copyright (c) 2023 Rohit Goswami (HaoZeke) <rog32[at]hi.is>
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is furnished
8
+ to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice (including the next
11
+ paragraph) shall be included in all copies or substantial portions of the
12
+ Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
16
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
17
+ OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19
+ OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,111 @@
1
+ Metadata-Version: 2.1
2
+ Name: chemparseplot
3
+ Version: 0.0.2
4
+ Summary: Parsers and plotting tools for computational chemistry
5
+ Project-URL: Documentation, https://github.com/HaoZeke/chemparseplot#readme
6
+ Project-URL: Issues, https://github.com/HaoZeke/chemparseplot/issues
7
+ Project-URL: Source, https://github.com/HaoZeke/chemparseplot
8
+ Author-email: Rohit Goswami <rog32@hi.is>
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: compchem,parser,plot
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Programming Language :: Python
14
+ Classifier: Programming Language :: Python :: 3 :: Only
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: Implementation :: CPython
20
+ Requires-Python: >=3.9
21
+ Requires-Dist: numpy>=1.26.2
22
+ Requires-Dist: pint>=0.22
23
+ Provides-Extra: doc
24
+ Requires-Dist: mdit-py-plugins>=0.3.4; extra == 'doc'
25
+ Requires-Dist: myst-nb>=1.0.0; extra == 'doc'
26
+ Requires-Dist: myst-parser>=2.0.0; extra == 'doc'
27
+ Requires-Dist: sphinx-autodoc2>=0.5.0; extra == 'doc'
28
+ Requires-Dist: sphinx-copybutton>=0.5.2; extra == 'doc'
29
+ Requires-Dist: sphinx-library>=1.1.2; extra == 'doc'
30
+ Requires-Dist: sphinx-sitemap>=2.5.1; extra == 'doc'
31
+ Requires-Dist: sphinx-togglebutton>=0.3.2; extra == 'doc'
32
+ Requires-Dist: sphinx>=7.2.6; extra == 'doc'
33
+ Requires-Dist: sphinxcontrib-apidoc>=0.4.0; extra == 'doc'
34
+ Provides-Extra: plot
35
+ Requires-Dist: matplotlib>=3.8.2; extra == 'plot'
36
+ Description-Content-Type: text/markdown
37
+
38
+
39
+ # Table of Contents
40
+
41
+ 1. [About](#orgea43256)
42
+ 1. [Features](#org6a31408)
43
+ 1. [Supported Engines [WIP]](#orgbfa09d8)
44
+ 2. [Rationale](#org4c00d67)
45
+ 2. [License](#orge2168a3)
46
+
47
+
48
+ <a id="orgea43256"></a>
49
+
50
+ # About
51
+
52
+ ![img](branding/logo/chemparseplot_logo.png)
53
+
54
+ [![Hatch project](https://img.shields.io/badge/%F0%9F%A5%9A-Hatch-4051b5.svg)](https://github.com/pypa/hatch)
55
+
56
+ A **pure-python**<sup><a id="fnr.1" class="footref" href="#fn.1" role="doc-backlink">1</a></sup> project to provide unit-aware uniform visualizations
57
+ of common computational chemistry tasks. Essentially this means we provide:
58
+
59
+ - Plotting scripts for specific workflows
60
+ - Parsers for various software outputs
61
+
62
+ This is a spin-off from `wailord` ([here](https://wailord.xyz)) which is meant to handle aggregated
63
+ runs in a specific workflow, while here the goal is to do no input handling and
64
+ very pragmatic output parsing, with the goal of generating uniform plots.
65
+
66
+
67
+ <a id="org6a31408"></a>
68
+
69
+ ## Features
70
+
71
+ - [Scientific color maps](https://www.fabiocrameri.ch/colourmaps/) for the plots
72
+ - Camera ready
73
+ - Unit preserving
74
+ - Via `pint`
75
+
76
+
77
+ <a id="orgbfa09d8"></a>
78
+
79
+ ### Supported Engines [WIP]
80
+
81
+ - ORCA (**5.x**)
82
+ - Scanning energies over a degree of freedom (`OPT` scans)
83
+ - Nudged elastic band (`NEB`) visualizations (over the "linearized" reaction
84
+ coordinate)
85
+
86
+
87
+ <a id="org4c00d67"></a>
88
+
89
+ ## Rationale
90
+
91
+ `wailord` is for production runs, however often there is a need to collect
92
+ "spot" calculation visualizations, which should nevertheless be uniform, i.e.
93
+ either Bohr/Hartree or Angstron/eV or whatever.
94
+
95
+ Also I couldn't find (m)any scripts using the scientific colorschemes.
96
+
97
+
98
+ <a id="orge2168a3"></a>
99
+
100
+ # License
101
+
102
+ MIT. However, this is an academic resource, so **please cite** as much as possible
103
+ via:
104
+
105
+ - The Zenodo DOI for general use.
106
+ - The `wailord` paper for ORCA usage
107
+
108
+
109
+ # Footnotes
110
+
111
+ <sup><a id="fn.1" href="#fnr.1">1</a></sup> To distinguish it from my other thin-python wrapper projects
@@ -0,0 +1,16 @@
1
+ # file generated by setuptools_scm
2
+ # don't change, don't track in version control
3
+ TYPE_CHECKING = False
4
+ if TYPE_CHECKING:
5
+ from typing import Tuple, Union
6
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
7
+ else:
8
+ VERSION_TUPLE = object
9
+
10
+ version: str
11
+ __version__: str
12
+ __version_tuple__: VERSION_TUPLE
13
+ version_tuple: VERSION_TUPLE
14
+
15
+ __version__ = version = '0.0.2'
16
+ __version_tuple__ = version_tuple = (0, 0, 2)
@@ -0,0 +1,5 @@
1
+ # SPDX-FileCopyrightText: 2023-present Rohit Goswami <rog32@hi.is>
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ from chemparseplot import basetypes, parse, units
@@ -0,0 +1,47 @@
1
+ # SPDX-FileCopyrightText: 2023-present Rohit Goswami <rog32@hi.is>
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+ from collections import namedtuple
5
+
6
+ # namedtuple for storing NEB iteration data
7
+ nebiter = namedtuple("nebiter", ["iteration", "nebpath"])
8
+ """
9
+ A namedtuple representing an iteration of a Nudged Elastic Band (NEB) calculation.
10
+
11
+ Parameters
12
+ ----------
13
+ iteration : int
14
+ The iteration number of the NEB calculation.
15
+ nebpath : nebpath namedtuple
16
+ The data for the NEB path at this iteration.
17
+
18
+ See Also
19
+ --------
20
+ nebpath : Stores the normalized arclength, actual arclength, and energy data for
21
+ the NEB path.
22
+ """
23
+
24
+ # namedtuple for storing the NEB path data
25
+ nebpath = namedtuple("nebpath", ["norm_dist", "arc_dist", "energy"])
26
+ """
27
+ A namedtuple representing the NEB path data.
28
+
29
+ Parameters
30
+ ----------
31
+ norm_dist : float
32
+ Normalized Arclength (0 to 1), representing the progression along the reaction path.
33
+ Calculated as xcoord2 = arcS[img] / arcS[nim-1].
34
+ arc_dist : float
35
+ Actual Arclength at each point along the reaction path. Calculated as
36
+ xcoord = arcS[img] + dx(ii).
37
+ energy : float
38
+ Interpolated Energy at each point, calculated using cubic polynomial
39
+ interpolation. The energy is calculated using the formula:
40
+ p = a*pow(dx(ii), 3.0) + b*pow(dx(ii), 2.0) + c*dx(ii) + d,
41
+ where a, b, c, and d are coefficients of the cubic polynomial.
42
+
43
+ Notes
44
+ -----
45
+ The `nebpath` namedtuple is used within the `nebiter` namedtuple to store
46
+ detailed path information for each NEB iteration.
47
+ """
@@ -0,0 +1,5 @@
1
+ # SPDX-FileCopyrightText: 2023-present Rohit Goswami <rog32@hi.is>
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ from chemparseplot.parse import orca, patterns
@@ -0,0 +1,11 @@
1
+ # SPDX-FileCopyrightText: 2023-present Rohit Goswami <rog32@hi.is>
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+ from io import StringIO
5
+
6
+ import numpy as np
7
+
8
+
9
+ def np_txt(matched_data):
10
+ datio = StringIO(matched_data)
11
+ return np.loadtxt(datio)
@@ -0,0 +1,5 @@
1
+ # SPDX-FileCopyrightText: 2023-present Rohit Goswami <rog32@hi.is>
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ from chemparseplot.parse.orca import geomscan
@@ -0,0 +1,63 @@
1
+ # SPDX-FileCopyrightText: 2023-present Rohit Goswami <rog32@hi.is>
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+ """
5
+ For parsing outputs from input files like this:
6
+ !OPT UHF def2-SVP
7
+ %geom Scan
8
+ B 0 1 = 7.5589039543, 0.2116708996, 33
9
+ end
10
+ end
11
+ *xyzfile 0 1 h2_base.xyz
12
+ """
13
+ import re
14
+
15
+ import chemparseplot.parse.converter as conv
16
+ import chemparseplot.parse.patterns as pat
17
+ from chemparseplot.units import Q_
18
+
19
+
20
+ def extract_energy_data(data: str, energy_type: str) -> tuple[Q_, Q_]:
21
+ """
22
+ Extracts and converts the energy data for a specified energy type.
23
+
24
+ This function assumes the input data is a blob of text. It searches for
25
+ 'Calculated Surface' followed by the specified energy type ('Actual' or 'SCF')
26
+ and extracts the two-column data (distance and energy values) following it.
27
+ Energies are returned in Hartree and distances in Bohr, as these are the default
28
+ units used in ORCA.
29
+
30
+ Parameters
31
+ ----------
32
+ data : str
33
+ The blob of text containing energy data.
34
+ energy_type : str
35
+ The type of energy to search for ('Actual' or 'SCF').
36
+
37
+ Returns
38
+ -------
39
+ tuple[Q_, Q_]
40
+ A tuple containing two `Quantity` objects from the `pint` library.
41
+ The first element is an array of distances in Bohr, and the second
42
+ element is an array of energies in Hartree.
43
+
44
+ """
45
+ # Regular expression to find the energy type and the two-column data following it
46
+ # https://regex101.com/r/RF6b4V/2
47
+ # fmt: off
48
+ pattern = (
49
+ r".*? Calculated Surface.*?"
50
+ rf"{energy_type}.*?"
51
+ ) + pat.TWO_COL_NUM
52
+ matchr = re.search(pattern, data, re.MULTILINE)
53
+ # fmt: on
54
+ if not matchr:
55
+ xdu = Q_([], "bohr")
56
+ ydu = Q_([], "hartree")
57
+ return xdu, ydu
58
+
59
+ energytxt = matchr.group("twocolnum")
60
+ xydat = conv.np_txt(energytxt)
61
+ xdu = Q_(xydat[:, 0], "bohr")
62
+ ydu = Q_(xydat[:, 1], "hartree")
63
+ return xdu, ydu
@@ -0,0 +1,40 @@
1
+ # SPDX-FileCopyrightText: 2023-present Rohit Goswami <rog32@hi.is>
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+ """
5
+ For parsing .interp files from inputs like:
6
+ !B3LYP def2-SVP NEB-CI
7
+ %neb
8
+ nimages = 7
9
+ Product "prod.xyz"
10
+ end
11
+ *xyzfile 0 1 react.xyz
12
+ """
13
+ import re
14
+
15
+ import chemparseplot.parse.converter as conv
16
+ import chemparseplot.parse.patterns as pat
17
+ from chemparseplot.basetypes import nebiter, nebpath
18
+ from chemparseplot.units import Q_
19
+
20
+ # fmt: off
21
+ INTERP_PAT = (
22
+ r"Iteration:\s*(?P<iteration>\d+)\s*\n" # Capture iteration number
23
+ r"Images: Distance\s+\(Bohr\), Energy \(Eh\)\s*\n" # Match 'Images:' line
24
+ + pat.THREE_COL_NUM
25
+ )
26
+ # fmt: on
27
+
28
+
29
+ def extract_interp_points(text: str) -> list[int, Q_, Q_]:
30
+ data = []
31
+ for match in re.finditer(INTERP_PAT, text, re.DOTALL):
32
+ iteration = int(match.group("iteration"))
33
+ energytxt = match.group("threecolnum")
34
+ ixydat = conv.np_txt(energytxt)
35
+ nxdu = Q_(ixydat[:, 0], "dimensionless")
36
+ xdu = Q_(ixydat[:, 1], "bohr")
37
+ ydu = Q_(ixydat[:, 2], "hartree")
38
+ tnp = nebpath(norm_dist=nxdu, arc_dist=xdu, energy=ydu)
39
+ data.append(nebiter(iteration=iteration, nebpath=tnp))
40
+ return data
@@ -0,0 +1,32 @@
1
+ # SPDX-FileCopyrightText: 2023-present Rohit Goswami <rog32@hi.is>
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ # https://regex101.com/r/jHAG2T/1
6
+ # DIGIT pattern for a floating-point number, possibly negative
7
+ DIGIT = r"-?\d+\.\d+"
8
+
9
+
10
+ def create_multicol_pattern(num_cols, pname="multicolnum"):
11
+ if num_cols < 1:
12
+ error_message = "Number of columns must be at least 1"
13
+ raise ValueError(error_message)
14
+
15
+ # Building the pattern for N columns
16
+ pattern = (
17
+ r"\s*" # Optional leading whitespace
18
+ rf"(?P<{pname}>" # Named group
19
+ r"(?:"
20
+ )
21
+
22
+ # Add DIGIT pattern for each column, with whitespace
23
+ for _ in range(num_cols):
24
+ pattern += r"\s*" # Optional whitespace before each number
25
+ pattern += DIGIT
26
+ pattern += r")+" # Repeat for multiple lines
27
+ pattern += r")" # End of named group
28
+ return pattern
29
+
30
+
31
+ TWO_COL_NUM = create_multicol_pattern(2, "twocolnum")
32
+ THREE_COL_NUM = create_multicol_pattern(3, "threecolnum")
@@ -0,0 +1,3 @@
1
+ # SPDX-FileCopyrightText: 2023-present Rohit Goswami <rog32@hi.is>
2
+ #
3
+ # SPDX-License-Identifier: MIT
@@ -0,0 +1,15 @@
1
+ # SPDX-FileCopyrightText: 2023-present Rohit Goswami <rog32@hi.is>
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+ import warnings
5
+
6
+ import pint
7
+
8
+ ureg = pint.UnitRegistry(cache_folder=":auto:")
9
+ ureg.define("kcal_mol = kcal / 6.02214076e+23 = kcm")
10
+ Q_ = ureg.Quantity
11
+
12
+ # Silence NEP 18 warning
13
+ with warnings.catch_warnings():
14
+ warnings.simplefilter("ignore")
15
+ Q_([])
@@ -0,0 +1,216 @@
1
+ [build-system]
2
+ build-backend = "hatchling.build"
3
+ requires = [
4
+ "hatch-vcs",
5
+ "hatchling",
6
+ ]
7
+
8
+ [tool.hatch.build.hooks.vcs]
9
+ version-file = "_version.py"
10
+
11
+ [tool.hatch.build]
12
+ include = [
13
+ "chemparseplot/**/*.py",
14
+ "chemparseplot/*.py",
15
+ "/tests",
16
+ ]
17
+
18
+
19
+
20
+ [tool.pdm.dev-dependencies]
21
+ lint = [
22
+ "ruff>=0.1.6",
23
+ ]
24
+ test = [
25
+ "pytest>=7.4.3",
26
+ "pytest-cov>=4.1.0",
27
+ ]
28
+ nbdoc = [
29
+ "jupyterlab>=4.0.9",
30
+ "jupytext>=1.15.2",
31
+ "ase>=3.22.1",
32
+ ]
33
+ release_aid = [
34
+ "tbump>=6.11.0",
35
+ "towncrier>=23.11.0",
36
+ ]
37
+
38
+ [tool.pdm.scripts]
39
+ check_ruff = "ruff ."
40
+ ruff_fix = "ruff --fix ."
41
+ ruff_format = "ruff format ."
42
+ lint = {composite = ["ruff_fix", "ruff_format"]}
43
+ test = "pytest --cov=chemparseplot tests"
44
+
45
+ [project]
46
+ name = "chemparseplot"
47
+ description = "Parsers and plotting tools for computational chemistry"
48
+ readme = "readme.md"
49
+ keywords = [
50
+ "compchem",
51
+ "parser",
52
+ "plot",
53
+ ]
54
+ license = {text = "MIT"}
55
+ authors = [
56
+ { name = "Rohit Goswami", email = "rog32@hi.is" },
57
+ ]
58
+ requires-python = ">=3.9"
59
+ classifiers = [
60
+ "Development Status :: 4 - Beta",
61
+ "Programming Language :: Python",
62
+ "Programming Language :: Python :: 3 :: Only",
63
+ "Programming Language :: Python :: 3.9",
64
+ "Programming Language :: Python :: 3.10",
65
+ "Programming Language :: Python :: 3.11",
66
+ "Programming Language :: Python :: 3.12",
67
+ "Programming Language :: Python :: Implementation :: CPython",
68
+ ]
69
+ dynamic = [
70
+ "version",
71
+ ]
72
+ dependencies = [
73
+ "numpy>=1.26.2",
74
+ "pint>=0.22",
75
+ ]
76
+ [project.optional-dependencies]
77
+ plot = [
78
+ "matplotlib>=3.8.2",
79
+ ]
80
+ doc = [
81
+ "sphinx>=7.2.6",
82
+ "myst-parser>=2.0.0",
83
+ "sphinxcontrib-apidoc>=0.4.0",
84
+ "sphinx-copybutton>=0.5.2",
85
+ "sphinx-sitemap>=2.5.1",
86
+ "sphinx-togglebutton>=0.3.2",
87
+ "sphinx-library>=1.1.2",
88
+ "sphinx-autodoc2>=0.5.0",
89
+ "mdit-py-plugins>=0.3.4",
90
+ "myst-nb>=1.0.0",
91
+ ]
92
+ [project.urls]
93
+ Documentation = "https://github.com/HaoZeke/chemparseplot#readme"
94
+ Issues = "https://github.com/HaoZeke/chemparseplot/issues"
95
+ Source = "https://github.com/HaoZeke/chemparseplot"
96
+
97
+ [tool.hatch.version]
98
+ source = "vcs"
99
+
100
+ [tool.ruff]
101
+ target-version = "py312"
102
+ line-length = 90
103
+ select = [
104
+ "A",
105
+ "ARG",
106
+ "B",
107
+ "C",
108
+ "DTZ",
109
+ "E",
110
+ "EM",
111
+ "F",
112
+ "FBT",
113
+ "I",
114
+ "ICN",
115
+ "ISC",
116
+ "N",
117
+ "PLC",
118
+ "PLE",
119
+ "PLR",
120
+ "PLW",
121
+ "Q",
122
+ "RUF",
123
+ "S",
124
+ "T",
125
+ "TID",
126
+ "UP",
127
+ "W",
128
+ "YTT",
129
+ ]
130
+ ignore = [
131
+ # Implicitly concatenated string literals on one line
132
+ "ISC001",
133
+ # Allow non-abstract empty methods in abstract base classes
134
+ "B027",
135
+ # Allow boolean positional values in function calls, like `dict.get(... True)`
136
+ "FBT003",
137
+ # Ignore checks for possible passwords
138
+ "S105", "S106", "S107",
139
+ # Ignore complexity
140
+ "C901", "PLR0911", "PLR0912", "PLR0913", "PLR0915",
141
+ ]
142
+ unfixable = [
143
+ # Don't touch unused imports
144
+ "F401",
145
+ ]
146
+
147
+ [tool.ruff.isort]
148
+ known-first-party = ["chemparseplot"]
149
+
150
+ [tool.ruff.flake8-tidy-imports]
151
+ ban-relative-imports = "all"
152
+
153
+ [tool.ruff.per-file-ignores]
154
+ # Tests can use magic values, assertions, and relative imports
155
+ "tests/**/*" = ["PLR2004", "S101", "TID252"]
156
+ # __init__.py can import without use
157
+ "__init__.py" = ["F401"]
158
+
159
+ [tool.coverage.run]
160
+ source_pkgs = ["chemparseplot"]
161
+ branch = true
162
+ parallel = true
163
+ omit = [
164
+ "_version.py",
165
+ ]
166
+
167
+ [tool.coverage.paths]
168
+ chemparseplot = ["src/chemparseplot", "*/chemparseplot/src/chemparseplot"]
169
+ tests = ["tests"]
170
+
171
+ [tool.coverage.report]
172
+ exclude_lines = [
173
+ "no cov",
174
+ "if __name__ == .__main__.:",
175
+ "if TYPE_CHECKING:",
176
+ ]
177
+
178
+ [tool.towncrier]
179
+ start_string = "<!-- towncrier release notes start -->\n"
180
+ underlines = ["", "", ""]
181
+ single_file = true
182
+ filename = "CHANGELOG.md"
183
+ directory = "doc/release/upcoming_changes/"
184
+ issue_format = "[#{issue}](https://github.com/HaoZeke/chemparseplot/issues/{issue})"
185
+ title_format = "## [{version}](https://github.com/HaoZeke/chemparseplot/tree/{version}) - {project_date}"
186
+ all_bullets = false
187
+
188
+ [[tool.towncrier.type]]
189
+ directory = "removed"
190
+ name = "Removed"
191
+ showcontent = true
192
+
193
+ [[tool.towncrier.type]]
194
+ directory = "deprecated"
195
+ name = "Deprecated"
196
+ showcontent = true
197
+
198
+ [[tool.towncrier.type]]
199
+ directory = "added"
200
+ name = "Added"
201
+ showcontent = true
202
+
203
+ [[tool.towncrier.type]]
204
+ directory = "changed"
205
+ name = "Changed"
206
+ showcontent = true
207
+
208
+ [[tool.towncrier.type]]
209
+ directory = "fixed"
210
+ name = "Fixed"
211
+ showcontent = true
212
+
213
+ [[tool.towncrier.type]]
214
+ directory = "misc"
215
+ name = "Miscellaneous"
216
+ showcontent = true
@@ -0,0 +1,74 @@
1
+
2
+ # Table of Contents
3
+
4
+ 1. [About](#orgea43256)
5
+ 1. [Features](#org6a31408)
6
+ 1. [Supported Engines [WIP]](#orgbfa09d8)
7
+ 2. [Rationale](#org4c00d67)
8
+ 2. [License](#orge2168a3)
9
+
10
+
11
+ <a id="orgea43256"></a>
12
+
13
+ # About
14
+
15
+ ![img](branding/logo/chemparseplot_logo.png)
16
+
17
+ [![Hatch project](https://img.shields.io/badge/%F0%9F%A5%9A-Hatch-4051b5.svg)](https://github.com/pypa/hatch)
18
+
19
+ A **pure-python**<sup><a id="fnr.1" class="footref" href="#fn.1" role="doc-backlink">1</a></sup> project to provide unit-aware uniform visualizations
20
+ of common computational chemistry tasks. Essentially this means we provide:
21
+
22
+ - Plotting scripts for specific workflows
23
+ - Parsers for various software outputs
24
+
25
+ This is a spin-off from `wailord` ([here](https://wailord.xyz)) which is meant to handle aggregated
26
+ runs in a specific workflow, while here the goal is to do no input handling and
27
+ very pragmatic output parsing, with the goal of generating uniform plots.
28
+
29
+
30
+ <a id="org6a31408"></a>
31
+
32
+ ## Features
33
+
34
+ - [Scientific color maps](https://www.fabiocrameri.ch/colourmaps/) for the plots
35
+ - Camera ready
36
+ - Unit preserving
37
+ - Via `pint`
38
+
39
+
40
+ <a id="orgbfa09d8"></a>
41
+
42
+ ### Supported Engines [WIP]
43
+
44
+ - ORCA (**5.x**)
45
+ - Scanning energies over a degree of freedom (`OPT` scans)
46
+ - Nudged elastic band (`NEB`) visualizations (over the "linearized" reaction
47
+ coordinate)
48
+
49
+
50
+ <a id="org4c00d67"></a>
51
+
52
+ ## Rationale
53
+
54
+ `wailord` is for production runs, however often there is a need to collect
55
+ "spot" calculation visualizations, which should nevertheless be uniform, i.e.
56
+ either Bohr/Hartree or Angstron/eV or whatever.
57
+
58
+ Also I couldn't find (m)any scripts using the scientific colorschemes.
59
+
60
+
61
+ <a id="orge2168a3"></a>
62
+
63
+ # License
64
+
65
+ MIT. However, this is an academic resource, so **please cite** as much as possible
66
+ via:
67
+
68
+ - The Zenodo DOI for general use.
69
+ - The `wailord` paper for ORCA usage
70
+
71
+
72
+ # Footnotes
73
+
74
+ <sup><a id="fn.1" href="#fnr.1">1</a></sup> To distinguish it from my other thin-python wrapper projects
@@ -0,0 +1,55 @@
1
+ # SPDX-FileCopyrightText: 2023-present Rohit Goswami <rog32@hi.is>
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+ import numpy as np
5
+
6
+ from chemparseplot.parse.orca.geomscan import extract_energy_data
7
+ from chemparseplot.units import Q_
8
+
9
+ # Sample data snippet
10
+ sample_data = """
11
+ The Calculated Surface using the 'Actual Energy'
12
+ 7.55890395 -0.74239862
13
+ 7.32930292 -0.74349939
14
+ 7.09970189 -0.74467446
15
+
16
+ The Calculated Surface using the SCF energy
17
+ 7.55890395 -0.74239862
18
+ 7.32930292 -0.74349939
19
+ 7.09970189 -0.74467446
20
+ """
21
+
22
+
23
+ def test_extract_actual_energy():
24
+ x_expected = Q_([7.55890395, 7.32930292, 7.09970189], "bohr")
25
+ y_expected = Q_([-0.74239862, -0.74349939, -0.74467446], "hartree")
26
+
27
+ x_actual, y_actual = extract_energy_data(sample_data, "Actual Energy")
28
+
29
+ assert np.allclose(x_actual.magnitude, x_expected.magnitude)
30
+ assert np.allclose(y_actual.magnitude, y_expected.magnitude)
31
+ assert x_actual.units == x_expected.units
32
+ assert y_actual.units == y_expected.units
33
+
34
+
35
+ def test_extract_scf_energy():
36
+ x_expected = Q_([7.55890395, 7.32930292, 7.09970189], "bohr")
37
+ y_expected = Q_([-0.74239862, -0.74349939, -0.74467446], "hartree")
38
+
39
+ x_scf, y_scf = extract_energy_data(sample_data, "SCF energy")
40
+
41
+ assert np.allclose(x_scf.magnitude, x_expected.magnitude)
42
+ assert np.allclose(y_scf.magnitude, y_expected.magnitude)
43
+ assert x_scf.units == x_expected.units
44
+ assert y_scf.units == y_expected.units
45
+
46
+
47
+ def test_empty_data():
48
+ x_empty, y_empty = extract_energy_data("", "Actual Energy")
49
+ assert x_empty.size == 0 and y_empty.size == 0
50
+
51
+
52
+ def test_malformed_data():
53
+ malformed_data = "Some random text"
54
+ x_malformed, y_malformed = extract_energy_data(malformed_data, "Actual Energy")
55
+ assert x_malformed.size == 0 and y_malformed.size == 0
@@ -0,0 +1,44 @@
1
+ # SPDX-FileCopyrightText: 2023-present Rohit Goswami <rog32@hi.is>
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+ import pytest
5
+ from chemparseplot.parse.orca.neb.interp import extract_interp_points
6
+ from chemparseplot.basetypes import nebiter, nebpath
7
+ from chemparseplot.units import Q_, ureg
8
+
9
+ def test_extract_interp_points_valid_input():
10
+ # Example valid text input
11
+ text_input = """Iteration: 1
12
+ Images: Distance (Bohr), Energy (Eh)
13
+ 13.0 0.0 0.0
14
+ 1.0 10.0 -0.5
15
+ Iteration: 2
16
+ Images: Distance (Bohr), Energy (Eh)
17
+ 0.0 0.2 0.3
18
+ 1.0 20.0 -1.0
19
+ """
20
+ # Extract data
21
+ result = extract_interp_points(text_input)
22
+
23
+ # Check if result is a list of nebiter
24
+ assert isinstance(result, list)
25
+ assert all(isinstance(item, nebiter) for item in result)
26
+
27
+ # Check if each nebiter contains a nebpath with correct values and units
28
+ assert result[0].iteration == 1
29
+ assert result[0].nebpath.norm_dist.magnitude[0] == 13.0
30
+ assert result[0].nebpath.arc_dist.magnitude[0] == 0.0
31
+ assert result[0].nebpath.energy.magnitude[0] == 0.0
32
+ assert result[0].nebpath.norm_dist.units == ureg.Unit('dimensionless')
33
+ assert result[0].nebpath.arc_dist.units == 'bohr'
34
+ assert result[0].nebpath.energy.units == 'hartree'
35
+ assert result[1].nebpath.norm_dist.magnitude[0] == 0.0
36
+ assert result[1].nebpath.arc_dist.magnitude[0] == 0.2
37
+ assert result[1].nebpath.energy.magnitude[0] == 0.3
38
+
39
+ def test_extract_interp_points_invalid_input():
40
+ # Example invalid text input
41
+ text_input = """This is not a valid input for the function."""
42
+ result = extract_interp_points(text_input)
43
+ # Expecting empty list for invalid input
44
+ assert result == []
@@ -0,0 +1,26 @@
1
+ # SPDX-FileCopyrightText: 2023-present Rohit Goswami <rog32@hi.is>
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+ import re
5
+
6
+ import numpy as np
7
+
8
+ from chemparseplot.parse import converter as conv
9
+ from chemparseplot.parse import patterns as pat
10
+
11
+
12
+ def test_numeric_from_match():
13
+ # Create a sample string that matches the TWO_COL_NUM pattern
14
+ sample_data = " 1.23 4.56\n7.89 10.11"
15
+
16
+ # Generate the regex pattern for two columns
17
+ pattern = pat.create_multicol_pattern(2, "twocolnum")
18
+ regex = re.compile(pattern)
19
+
20
+ match = regex.search(sample_data)
21
+ assert match is not None
22
+
23
+ matched_data = match.group("twocolnum")
24
+ result_array = conv.np_txt(matched_data)
25
+ assert isinstance(result_array, np.ndarray)
26
+ assert np.array_equal(result_array, np.array([[1.23, 4.56], [7.89, 10.11]]))
@@ -0,0 +1,32 @@
1
+ # SPDX-FileCopyrightText: 2023-present Rohit Goswami <rog32@hi.is>
2
+ #
3
+ # SPDX-License-Identifier: MIT
4
+ import re
5
+
6
+ import pytest
7
+
8
+ from chemparseplot.parse import patterns as pat
9
+
10
+
11
+ def test_num_cols_less_than_one():
12
+ with pytest.raises(ValueError):
13
+ pat.create_multicol_pattern(0)
14
+
15
+
16
+ def test_two_col_pattern():
17
+ pattern = pat.create_multicol_pattern(2)
18
+ regex = re.compile(pattern)
19
+ assert regex.search(" 1.23 -4.56")
20
+ assert not regex.search("1.23")
21
+
22
+
23
+ def test_three_col_pattern():
24
+ pattern = pat.create_multicol_pattern(3)
25
+ regex = re.compile(pattern)
26
+ assert regex.search(" 1.23 -4.56 7.89")
27
+ assert not regex.search("1.23 -4.56")
28
+
29
+
30
+ def test_custom_pattern_name():
31
+ pattern = pat.create_multicol_pattern(2, "customname")
32
+ assert "(?P<customname>" in pattern