hashmol3d 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ MIT License
2
+
3
+ Copyright.
@@ -0,0 +1,152 @@
1
+ Metadata-Version: 2.4
2
+ Name: hashmol3d
3
+ Version: 0.5.0
4
+ Summary: Deterministic 3D molecular geometry hashing standard
5
+ Author: Murat Keçeli
6
+ Requires-Python: >=3.8
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE
9
+ Requires-Dist: numpy
10
+ Provides-Extra: test
11
+ Requires-Dist: pytest; extra == "test"
12
+ Dynamic: license-file
13
+
14
+ # HashMol3D
15
+
16
+ **HashMol3D** is a standard, deterministic 3D molecular geometry identifier
17
+ for computational chemistry, machine learning, and HPC workflows.
18
+
19
+ It produces a **readable** identifier of the form
20
+
21
+ <Hill formula><state tag>-<geometry hash>
22
+
23
+ e.g. `H2Oq0m1-68936c504bf5fa3b` for neutral singlet water. The trailing
24
+ geometry hash is **rotation-, translation-, permutation-, and
25
+ parity-invariant** (matching the invariances of the eigenvalues of the
26
+ non-relativistic molecular Hamiltonian), and depends on:
27
+
28
+ - atomic numbers
29
+ - pairwise distances rounded to a user-specified precision
30
+ - a descriptor version tag
31
+
32
+ Charge and spin multiplicity live in the readable prefix, **not** in
33
+ the hash, so two states of the same geometry share the same hex tail
34
+ and can be grouped by suffix matching:
35
+
36
+ ```text
37
+ H2Oq0m1-68936c504bf5fa3b # neutral singlet water
38
+ H2Oq1m2-68936c504bf5fa3b # water cation, same geometry → same hex tail
39
+ ```
40
+
41
+ The hash length auto-scales with the number of atoms (`clip(N, 16, 64)`
42
+ hex chars) so collision risk stays roughly constant as molecules grow;
43
+ pass `length=` to pin a fixed value.
44
+
45
+ It deliberately does **not** distinguish enantiomers (which share their
46
+ Hamiltonian eigenvalues). The reference implementation depends only on
47
+ NumPy.
48
+
49
+ HashMol3D IDs are **stable across machines**, **reproducible**, and ideal for:
50
+ - workflow deduplication
51
+ - caching
52
+ - large QC datasets
53
+ - MD conformer tracking
54
+ - ML potential datasets
55
+ - LLM scientific agents
56
+
57
+ ## Install
58
+
59
+ ### Using pip
60
+
61
+ ```bash
62
+ pip install hashmol3d
63
+ ```
64
+
65
+ ### Using uv
66
+
67
+ ```bash
68
+ # Install from PyPI
69
+ uv pip install hashmol3d
70
+ ```
71
+
72
+ ### Install from source
73
+
74
+ ```bash
75
+ # Clone the repository
76
+ git clone https://github.com/yourusername/HashMol3D.git
77
+ cd HashMol3D
78
+
79
+ # Create and activate a virtual environment (recommended)
80
+ # Using uv:
81
+ uv venv
82
+ source .venv/bin/activate # On Windows: .venv\Scripts\activate
83
+
84
+ # Or using standard Python:
85
+ python -m venv .venv
86
+ source .venv/bin/activate # On Windows: .venv\Scripts\activate
87
+
88
+ # Install the package in editable mode
89
+ uv pip install -e . # Or: pip install -e .
90
+ ```
91
+
92
+ ## Usage (CLI)
93
+
94
+ ```bash
95
+ $ hashmol3d water.xyz
96
+ H2Oq0m1-68936c504bf5fa3b
97
+
98
+ # Cation with explicit multiplicity — only the prefix changes.
99
+ $ hashmol3d -c 1 -m 2 water.xyz
100
+ H2Oq1m2-68936c504bf5fa3b
101
+
102
+ # Pin a fixed hash length and a coarser precision.
103
+ $ hashmol3d -p 1e-3 -l 32 benzene.xyz
104
+
105
+ # Verbose: also print formula, geometry hash, descriptor, and metadata.
106
+ $ hashmol3d -v water.xyz
107
+
108
+ # Show the package version.
109
+ $ hashmol3d --version
110
+ ```
111
+
112
+ Short flags: `-p/--precision`, `-c/--charge`, `-m/--multiplicity`,
113
+ `-l/--length`, `-v/--verbose`. Errors on missing or malformed input go
114
+ to stderr with exit code 1 (no Python traceback).
115
+
116
+ ## Usage (Python)
117
+
118
+ ```python
119
+ import numpy as np
120
+ from hashmol3d import hash_molecule
121
+
122
+ atomic_nums = np.array([8, 1, 1])
123
+ coords = np.array([
124
+ [ 0.0000, 0.0000, 0.0],
125
+ [ 0.7572, 0.5860, 0.0],
126
+ [-0.7572, 0.5860, 0.0],
127
+ ])
128
+ res = hash_molecule(atomic_nums, coords)
129
+ print(res.hash_str) # H2Oq0m1-68936c504bf5fa3b
130
+ print(res.formula) # H2O
131
+ print(res.geometry_hash) # 68936c504bf5fa3b
132
+ print(res.charge, res.multiplicity) # 0 1
133
+ ```
134
+
135
+ All optional arguments are keyword-only: `precision`, `charge`,
136
+ `multiplicity`, `length`.
137
+
138
+ Or read straight from a file:
139
+
140
+ ```python
141
+ from hashmol3d import hash_xyz
142
+
143
+ print(hash_xyz("water.xyz").hash_str) # H2Oq0m1-68936c504bf5fa3b
144
+ print(hash_xyz("water.xyz", charge=1, multiplicity=2).hash_str)
145
+ # H2Oq1m2-68936c504bf5fa3b
146
+ ```
147
+
148
+ See [`docs/`](docs/) for the full
149
+ [specification](docs/specification.md),
150
+ [API reference](docs/api_reference.md), and
151
+ [CLI guide](docs/cli_usage.md).
152
+
@@ -0,0 +1,139 @@
1
+ # HashMol3D
2
+
3
+ **HashMol3D** is a standard, deterministic 3D molecular geometry identifier
4
+ for computational chemistry, machine learning, and HPC workflows.
5
+
6
+ It produces a **readable** identifier of the form
7
+
8
+ <Hill formula><state tag>-<geometry hash>
9
+
10
+ e.g. `H2Oq0m1-68936c504bf5fa3b` for neutral singlet water. The trailing
11
+ geometry hash is **rotation-, translation-, permutation-, and
12
+ parity-invariant** (matching the invariances of the eigenvalues of the
13
+ non-relativistic molecular Hamiltonian), and depends on:
14
+
15
+ - atomic numbers
16
+ - pairwise distances rounded to a user-specified precision
17
+ - a descriptor version tag
18
+
19
+ Charge and spin multiplicity live in the readable prefix, **not** in
20
+ the hash, so two states of the same geometry share the same hex tail
21
+ and can be grouped by suffix matching:
22
+
23
+ ```text
24
+ H2Oq0m1-68936c504bf5fa3b # neutral singlet water
25
+ H2Oq1m2-68936c504bf5fa3b # water cation, same geometry → same hex tail
26
+ ```
27
+
28
+ The hash length auto-scales with the number of atoms (`clip(N, 16, 64)`
29
+ hex chars) so collision risk stays roughly constant as molecules grow;
30
+ pass `length=` to pin a fixed value.
31
+
32
+ It deliberately does **not** distinguish enantiomers (which share their
33
+ Hamiltonian eigenvalues). The reference implementation depends only on
34
+ NumPy.
35
+
36
+ HashMol3D IDs are **stable across machines**, **reproducible**, and ideal for:
37
+ - workflow deduplication
38
+ - caching
39
+ - large QC datasets
40
+ - MD conformer tracking
41
+ - ML potential datasets
42
+ - LLM scientific agents
43
+
44
+ ## Install
45
+
46
+ ### Using pip
47
+
48
+ ```bash
49
+ pip install hashmol3d
50
+ ```
51
+
52
+ ### Using uv
53
+
54
+ ```bash
55
+ # Install from PyPI
56
+ uv pip install hashmol3d
57
+ ```
58
+
59
+ ### Install from source
60
+
61
+ ```bash
62
+ # Clone the repository
63
+ git clone https://github.com/yourusername/HashMol3D.git
64
+ cd HashMol3D
65
+
66
+ # Create and activate a virtual environment (recommended)
67
+ # Using uv:
68
+ uv venv
69
+ source .venv/bin/activate # On Windows: .venv\Scripts\activate
70
+
71
+ # Or using standard Python:
72
+ python -m venv .venv
73
+ source .venv/bin/activate # On Windows: .venv\Scripts\activate
74
+
75
+ # Install the package in editable mode
76
+ uv pip install -e . # Or: pip install -e .
77
+ ```
78
+
79
+ ## Usage (CLI)
80
+
81
+ ```bash
82
+ $ hashmol3d water.xyz
83
+ H2Oq0m1-68936c504bf5fa3b
84
+
85
+ # Cation with explicit multiplicity — only the prefix changes.
86
+ $ hashmol3d -c 1 -m 2 water.xyz
87
+ H2Oq1m2-68936c504bf5fa3b
88
+
89
+ # Pin a fixed hash length and a coarser precision.
90
+ $ hashmol3d -p 1e-3 -l 32 benzene.xyz
91
+
92
+ # Verbose: also print formula, geometry hash, descriptor, and metadata.
93
+ $ hashmol3d -v water.xyz
94
+
95
+ # Show the package version.
96
+ $ hashmol3d --version
97
+ ```
98
+
99
+ Short flags: `-p/--precision`, `-c/--charge`, `-m/--multiplicity`,
100
+ `-l/--length`, `-v/--verbose`. Errors on missing or malformed input go
101
+ to stderr with exit code 1 (no Python traceback).
102
+
103
+ ## Usage (Python)
104
+
105
+ ```python
106
+ import numpy as np
107
+ from hashmol3d import hash_molecule
108
+
109
+ atomic_nums = np.array([8, 1, 1])
110
+ coords = np.array([
111
+ [ 0.0000, 0.0000, 0.0],
112
+ [ 0.7572, 0.5860, 0.0],
113
+ [-0.7572, 0.5860, 0.0],
114
+ ])
115
+ res = hash_molecule(atomic_nums, coords)
116
+ print(res.hash_str) # H2Oq0m1-68936c504bf5fa3b
117
+ print(res.formula) # H2O
118
+ print(res.geometry_hash) # 68936c504bf5fa3b
119
+ print(res.charge, res.multiplicity) # 0 1
120
+ ```
121
+
122
+ All optional arguments are keyword-only: `precision`, `charge`,
123
+ `multiplicity`, `length`.
124
+
125
+ Or read straight from a file:
126
+
127
+ ```python
128
+ from hashmol3d import hash_xyz
129
+
130
+ print(hash_xyz("water.xyz").hash_str) # H2Oq0m1-68936c504bf5fa3b
131
+ print(hash_xyz("water.xyz", charge=1, multiplicity=2).hash_str)
132
+ # H2Oq1m2-68936c504bf5fa3b
133
+ ```
134
+
135
+ See [`docs/`](docs/) for the full
136
+ [specification](docs/specification.md),
137
+ [API reference](docs/api_reference.md), and
138
+ [CLI guide](docs/cli_usage.md).
139
+
@@ -0,0 +1,42 @@
1
+ [project]
2
+ name = "hashmol3d"
3
+ version = "0.5.0"
4
+ description = "Deterministic 3D molecular geometry hashing standard"
5
+ authors = [{name = "Murat Keçeli"}]
6
+ readme = "README.md"
7
+ requires-python = ">=3.8"
8
+ dependencies = ["numpy"]
9
+
10
+ [project.optional-dependencies]
11
+ test = ["pytest"]
12
+
13
+ [project.scripts]
14
+ hashmol3d = "hashmol3d.cli:main"
15
+
16
+ [build-system]
17
+ requires = ["setuptools>=61", "wheel"]
18
+ build-backend = "setuptools.build_meta"
19
+
20
+ [tool.setuptools.packages.find]
21
+ where = ["src"]
22
+
23
+ [tool.pytest.ini_options]
24
+ testpaths = ["tests"]
25
+ pythonpath = ["src"]
26
+ norecursedirs = ["gemini", "xyz", ".venv", ".git", "build", "dist"]
27
+
28
+ [tool.mypy]
29
+ ignore_missing_imports = true
30
+
31
+ [tool.ruff]
32
+ line-length = 100
33
+ target-version = "py38"
34
+ extend-exclude = ["gemini", "xyz", "build", "dist"]
35
+
36
+ [tool.ruff.lint]
37
+ # Pyflakes (F), pycodestyle errors+warnings (E, W), isort (I),
38
+ # pyupgrade (UP), bugbear (B).
39
+ select = ["E", "W", "F", "I", "UP", "B"]
40
+
41
+ [tool.ruff.lint.per-file-ignores]
42
+ "tests/*" = ["E501"] # tolerate long lines in test fixtures
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,32 @@
1
+ """HashMol3D: deterministic 3D molecular geometry hashing."""
2
+
3
+ from .core import (
4
+ DESCRIPTOR_VERSION,
5
+ HashMol3DResult,
6
+ generate_hashmol3d,
7
+ hash_molecule,
8
+ )
9
+ from .io import read_xyz
10
+ from .version import __version__
11
+
12
+
13
+ def hash_xyz(path, **kwargs) -> HashMol3DResult:
14
+ """Read an XYZ file and hash its geometry.
15
+
16
+ Convenience wrapper combining :func:`read_xyz` and
17
+ :func:`hash_molecule`. Any keyword arguments are forwarded to
18
+ :func:`hash_molecule`.
19
+ """
20
+ atomic_nums, coords = read_xyz(path)
21
+ return hash_molecule(atomic_nums, coords, **kwargs)
22
+
23
+
24
+ __all__ = [
25
+ "DESCRIPTOR_VERSION",
26
+ "HashMol3DResult",
27
+ "__version__",
28
+ "generate_hashmol3d",
29
+ "hash_molecule",
30
+ "hash_xyz",
31
+ "read_xyz",
32
+ ]
@@ -0,0 +1,109 @@
1
+ """Command-line interface for HashMol3D."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import sys
7
+ from typing import Sequence
8
+
9
+ from .core import hash_molecule
10
+ from .io import read_xyz
11
+ from .version import __version__
12
+
13
+
14
+ def _build_parser() -> argparse.ArgumentParser:
15
+ parser = argparse.ArgumentParser(
16
+ prog="hashmol3d",
17
+ description=(
18
+ "Deterministic 3D molecular geometry hash. "
19
+ "Reads an XYZ file and prints the HashMol3D identifier."
20
+ ),
21
+ )
22
+ parser.add_argument(
23
+ "--version",
24
+ action="version",
25
+ version=f"hashmol3d {__version__}",
26
+ )
27
+ parser.add_argument("file", help="Path to a molecular geometry file (.xyz)")
28
+ parser.add_argument(
29
+ "-p",
30
+ "--precision",
31
+ type=float,
32
+ default=1e-4,
33
+ metavar="Å",
34
+ help="Distance precision in angstroms (default: 1e-4)",
35
+ )
36
+ parser.add_argument(
37
+ "-c",
38
+ "--charge",
39
+ type=int,
40
+ default=0,
41
+ help="Total formal charge (default: 0)",
42
+ )
43
+ parser.add_argument(
44
+ "-m",
45
+ "--multiplicity",
46
+ type=int,
47
+ default=None,
48
+ help="Spin multiplicity (default: inferred from electron count)",
49
+ )
50
+ parser.add_argument(
51
+ "-l",
52
+ "--length",
53
+ type=int,
54
+ default=None,
55
+ help="Number of hex characters in the geometry hash, 1-64 "
56
+ "(default: auto-scaled as clip(N, 16, 64))",
57
+ )
58
+ parser.add_argument(
59
+ "-v",
60
+ "--verbose",
61
+ action="store_true",
62
+ help="Also print the canonical descriptor and metadata",
63
+ )
64
+ return parser
65
+
66
+
67
+ def cli(argv: Sequence[str] | None = None) -> int:
68
+ """Run the HashMol3D CLI.
69
+
70
+ Returns the process exit code. ``argv`` may be passed for testing;
71
+ if omitted, ``sys.argv[1:]`` is used.
72
+ """
73
+ parser = _build_parser()
74
+ args = parser.parse_args(argv)
75
+
76
+ try:
77
+ atomic_nums, coords = read_xyz(args.file)
78
+ result = hash_molecule(
79
+ atomic_nums,
80
+ coords,
81
+ precision=args.precision,
82
+ charge=args.charge,
83
+ multiplicity=args.multiplicity,
84
+ length=args.length,
85
+ )
86
+ except FileNotFoundError as err:
87
+ print(f"hashmol3d: {err}", file=sys.stderr)
88
+ return 1
89
+ except (ValueError, OSError) as err:
90
+ print(f"hashmol3d: {err}", file=sys.stderr)
91
+ return 1
92
+
93
+ if args.verbose:
94
+ print(f"identifier: {result.hash_str}")
95
+ print(f"formula: {result.formula}")
96
+ print(f"geometry_hash: {result.geometry_hash}")
97
+ print(f"descriptor: {result.descriptor}")
98
+ print(f"version: {result.version}")
99
+ print(f"precision: {result.precision}")
100
+ print(f"charge: {result.charge}")
101
+ print(f"multiplicity: {result.multiplicity}")
102
+ else:
103
+ print(result.hash_str)
104
+ return 0
105
+
106
+
107
+ def main() -> None:
108
+ """Entry point used by the ``hashmol3d`` console script."""
109
+ sys.exit(cli())