hea-bench 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. hea_bench-0.1.0/.github/workflows/ci.yml +39 -0
  2. hea_bench-0.1.0/.gitignore +64 -0
  3. hea_bench-0.1.0/AGENTS.md +205 -0
  4. hea_bench-0.1.0/CITATION.cff +36 -0
  5. hea_bench-0.1.0/CODE_OF_CONDUCT.md +56 -0
  6. hea_bench-0.1.0/CONTRIBUTING.md +70 -0
  7. hea_bench-0.1.0/LICENSE +21 -0
  8. hea_bench-0.1.0/PKG-INFO +307 -0
  9. hea_bench-0.1.0/README.md +275 -0
  10. hea_bench-0.1.0/data/.gitkeep +0 -0
  11. hea_bench-0.1.0/data/consolidated/v0.1.0/README.md +108 -0
  12. hea_bench-0.1.0/data/consolidated/v0.1.0/consolidated.csv +7785 -0
  13. hea_bench-0.1.0/data/consolidated/v0.1.0/coverage_report.json +317 -0
  14. hea_bench-0.1.0/data/consolidated/v0.1.0/manifest.json +54 -0
  15. hea_bench-0.1.0/data/consolidated/v0.1.0/rule_baselines.json +81 -0
  16. hea_bench-0.1.0/data/raw/README.md +30 -0
  17. hea_bench-0.1.0/data/raw/borg2020/MPEA_dataset.csv +1546 -0
  18. hea_bench-0.1.0/data/raw/borg2020/README.md +118 -0
  19. hea_bench-0.1.0/data/raw/couzinie2018/README.md +64 -0
  20. hea_bench-0.1.0/data/raw/pei2020/README.md +86 -0
  21. hea_bench-0.1.0/data/raw/pei2020/pei2020_alloys_phases.csv +1253 -0
  22. hea_bench-0.1.0/data/raw/peivaste/README.md +132 -0
  23. hea_bench-0.1.0/data/raw/peivaste/fetch.py +53 -0
  24. hea_bench-0.1.0/docs/.gitkeep +0 -0
  25. hea_bench-0.1.0/examples/01_cantor_walkthrough.ipynb +280 -0
  26. hea_bench-0.1.0/examples/01_cantor_walkthrough.py +117 -0
  27. hea_bench-0.1.0/examples/02_benchmark_evaluation.ipynb +277 -0
  28. hea_bench-0.1.0/examples/02_benchmark_evaluation.py +126 -0
  29. hea_bench-0.1.0/examples/README.md +49 -0
  30. hea_bench-0.1.0/examples/_build_notebooks.py +145 -0
  31. hea_bench-0.1.0/pyproject.toml +79 -0
  32. hea_bench-0.1.0/src/hea_bench/__init__.py +37 -0
  33. hea_bench-0.1.0/src/hea_bench/benchmark/__init__.py +4 -0
  34. hea_bench-0.1.0/src/hea_bench/benchmark/composition.py +16 -0
  35. hea_bench-0.1.0/src/hea_bench/benchmark/consolidate.py +296 -0
  36. hea_bench-0.1.0/src/hea_bench/benchmark/coverage.py +169 -0
  37. hea_bench-0.1.0/src/hea_bench/benchmark/loaders/__init__.py +54 -0
  38. hea_bench-0.1.0/src/hea_bench/benchmark/loaders/borg2020.py +85 -0
  39. hea_bench-0.1.0/src/hea_bench/benchmark/loaders/pei2020.py +82 -0
  40. hea_bench-0.1.0/src/hea_bench/benchmark/loaders/peivaste.py +104 -0
  41. hea_bench-0.1.0/src/hea_bench/benchmark/taxonomy.py +86 -0
  42. hea_bench-0.1.0/src/hea_bench/classifiers/__init__.py +4 -0
  43. hea_bench-0.1.0/src/hea_bench/classifiers/diagnostic_stats.py +208 -0
  44. hea_bench-0.1.0/src/hea_bench/cli.py +36 -0
  45. hea_bench-0.1.0/src/hea_bench/composition.py +138 -0
  46. hea_bench-0.1.0/src/hea_bench/constants.py +3 -0
  47. hea_bench-0.1.0/src/hea_bench/descriptors/__init__.py +6 -0
  48. hea_bench-0.1.0/src/hea_bench/descriptors/data/LICENSE.matminer.txt +46 -0
  49. hea_bench-0.1.0/src/hea_bench/descriptors/data/README.md +109 -0
  50. hea_bench-0.1.0/src/hea_bench/descriptors/data/__init__.py +7 -0
  51. hea_bench-0.1.0/src/hea_bench/descriptors/data/elemental.py +98 -0
  52. hea_bench-0.1.0/src/hea_bench/descriptors/data/miedema_parameters.csv +74 -0
  53. hea_bench-0.1.0/src/hea_bench/descriptors/data/pair_enthalpies.py +99 -0
  54. hea_bench-0.1.0/src/hea_bench/descriptors/data/pair_enthalpies.tsv +2629 -0
  55. hea_bench-0.1.0/src/hea_bench/descriptors/entropy.py +55 -0
  56. hea_bench-0.1.0/src/hea_bench/descriptors/melting.py +47 -0
  57. hea_bench-0.1.0/src/hea_bench/descriptors/miedema.py +84 -0
  58. hea_bench-0.1.0/src/hea_bench/descriptors/omega.py +77 -0
  59. hea_bench-0.1.0/src/hea_bench/descriptors/size.py +80 -0
  60. hea_bench-0.1.0/src/hea_bench/descriptors/vec.py +56 -0
  61. hea_bench-0.1.0/src/hea_bench/evaluate.py +256 -0
  62. hea_bench-0.1.0/src/hea_bench/rules/__init__.py +15 -0
  63. hea_bench-0.1.0/src/hea_bench/rules/guo_vec.py +48 -0
  64. hea_bench-0.1.0/src/hea_bench/rules/yang_omega.py +38 -0
  65. hea_bench-0.1.0/src/hea_bench/rules/yeh_smix.py +37 -0
  66. hea_bench-0.1.0/src/hea_bench/rules/zhang_delta.py +32 -0
  67. hea_bench-0.1.0/tests/__init__.py +0 -0
  68. hea_bench-0.1.0/tests/test_borg_loader.py +76 -0
  69. hea_bench-0.1.0/tests/test_composition.py +78 -0
  70. hea_bench-0.1.0/tests/test_consolidate.py +170 -0
  71. hea_bench-0.1.0/tests/test_coverage.py +96 -0
  72. hea_bench-0.1.0/tests/test_descriptors_simple.py +122 -0
  73. hea_bench-0.1.0/tests/test_diagnostic_stats.py +139 -0
  74. hea_bench-0.1.0/tests/test_entropy.py +45 -0
  75. hea_bench-0.1.0/tests/test_evaluate.py +171 -0
  76. hea_bench-0.1.0/tests/test_miedema_and_omega.py +124 -0
  77. hea_bench-0.1.0/tests/test_pair_enthalpies.py +95 -0
  78. hea_bench-0.1.0/tests/test_pei_loader.py +89 -0
  79. hea_bench-0.1.0/tests/test_peivaste_loader.py +113 -0
  80. hea_bench-0.1.0/tests/test_release_artifacts.py +58 -0
  81. hea_bench-0.1.0/tests/test_rules.py +91 -0
  82. hea_bench-0.1.0/tests/test_smoke.py +17 -0
  83. hea_bench-0.1.0/web/README.md +61 -0
  84. hea_bench-0.1.0/web/dist/hea_bench-0.1.0-py3-none-any.whl +0 -0
  85. hea_bench-0.1.0/web/index.html +230 -0
@@ -0,0 +1,39 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ fail-fast: false
14
+ matrix:
15
+ python-version: ["3.10", "3.11", "3.12"]
16
+
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - name: Set up Python ${{ matrix.python-version }}
21
+ uses: actions/setup-python@v5
22
+ with:
23
+ python-version: ${{ matrix.python-version }}
24
+
25
+ - name: Install package with dev extras
26
+ run: |
27
+ python -m pip install --upgrade pip
28
+ pip install -e ".[dev,data]"
29
+
30
+ - name: Lint with ruff
31
+ run: ruff check src tests
32
+
33
+ - name: Run test suite
34
+ run: pytest tests/ -q
35
+ # Tests that depend on the pointer-only Peivaste dataset skip
36
+ # automatically when that file is absent (it is not committed
37
+ # because the upstream repository declares no license). The
38
+ # Borg and Pei datasets are committed, so their loader tests
39
+ # and all benchmark-level tests run in CI.
@@ -0,0 +1,64 @@
1
+ # Python build artifacts
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.egg-info/
6
+ *.egg
7
+ build/
8
+ /dist/
9
+ .eggs/
10
+ pip-wheel-metadata/
11
+
12
+ # But DO commit the wheel served by the Pyodide frontend so the demo
13
+ # "just works" after a clone. Rebuild this with:
14
+ # python -m pip wheel . --no-deps -w web/dist
15
+ !web/dist/
16
+ !web/dist/*.whl
17
+
18
+ # Virtual environments
19
+ .venv/
20
+ venv/
21
+ env/
22
+ ENV/
23
+
24
+ # Test / coverage
25
+ .pytest_cache/
26
+ .coverage
27
+ .coverage.*
28
+ htmlcov/
29
+ .tox/
30
+ .nox/
31
+ coverage.xml
32
+
33
+ # Linting / type-checking caches
34
+ .ruff_cache/
35
+ .mypy_cache/
36
+
37
+ # IDE / editor cruft
38
+ .vscode/
39
+ .idea/
40
+ *.swp
41
+ .DS_Store
42
+ Thumbs.db
43
+ desktop.ini
44
+
45
+ # Claude Code local settings
46
+ .claude/
47
+
48
+ # Working scratch
49
+ .data-scratch/
50
+ _scratch/
51
+ data/raw/_scratch/
52
+
53
+ # Per-source gitignores (only block sources whose upstream license forbids
54
+ # redistribution — see data/raw/README.md). Sources released under CC-BY,
55
+ # MIT, Apache, etc. are mirrored directly; not listed here.
56
+ #
57
+ # Peivaste (Iman-Peivaste/ML_HEAs_Phase_Dataset) — no upstream license declared
58
+ data/raw/peivaste/*.csv
59
+ data/raw/peivaste/*.xlsx
60
+ data/raw/peivaste/*.ipynb
61
+
62
+ # Built docs
63
+ docs/_build/
64
+ site/
@@ -0,0 +1,205 @@
1
+ # AGENTS.md
2
+
3
+ Machine-oriented usage guide for `hea-bench`. If you are an AI coding
4
+ agent integrating this library into another project, read this first.
5
+ It tells you the public API, exact return types and units, the
6
+ fastest path to each common task, and the mistakes to avoid. Every
7
+ snippet here is copy-pasteable and was checked against the shipped
8
+ code.
9
+
10
+ ## What this library is
11
+
12
+ `hea-bench` does two separate things:
13
+
14
+ 1. **Descriptors + rules** — pure functions that compute the six
15
+ canonical high-entropy-alloy (HEA) phase descriptors for a
16
+ composition, and the four canonical empirical phase-prediction
17
+ rules wrapped as classifiers.
18
+ 2. **A benchmark** — a versioned, deduplicated dataset of 7,784
19
+ experimentally characterized compositions with per-row provenance,
20
+ plus the machinery to score any rule or model against it with
21
+ diagnostic statistics.
22
+
23
+ It is composition-only and dependency-free in its core. Use it to
24
+ screen candidate alloys, to compute descriptors as features, or as a
25
+ fixed reference benchmark to evaluate a new phase-prediction method.
26
+
27
+ ## Install and import
28
+
29
+ ```bash
30
+ pip install hea-bench
31
+ ```
32
+
33
+ ```python
34
+ import hea_bench as hb
35
+ ```
36
+
37
+ Python >= 3.10. No required runtime dependencies for the core. The
38
+ `[data]` and `[dev]` extras add pandas/matplotlib/pytest only.
39
+
40
+ ## The one mental model you need
41
+
42
+ A **composition is a plain `dict`** mapping element symbol to amount:
43
+ `{"Co": 0.2, "Cr": 0.2, "Fe": 0.2, "Mn": 0.2, "Ni": 0.2}`. Amounts do
44
+ **not** need to sum to 1; they are normalized internally, so
45
+ `{"Al": 1, "Co": 1, "Cr": 1}` is equiatomic AlCoCr. Every descriptor
46
+ and rule accepts this dict directly. You rarely need anything else.
47
+
48
+ ## Descriptors (pure functions, exact units)
49
+
50
+ All take a composition dict (or the result of `parse_formula`) and
51
+ return a float.
52
+
53
+ | Call | Returns | Unit | Notes |
54
+ |---|---|---|---|
55
+ | `hb.smix(comp)` | mixing entropy | J/(mol·K) | `-R Σ cᵢ ln cᵢ` |
56
+ | `hb.delta(comp)` | atomic-size mismatch | **percent** (e.g. 3.164, not 0.03) | |
57
+ | `hb.vec(comp)` | valence electron concentration | electrons | linear mean |
58
+ | `hb.melting_temperature(comp)` | average melting point | K | rule-of-mixtures |
59
+ | `hb.mixing_enthalpy(comp)` | Miedema mixing enthalpy | **kJ/mol** | semi-empirical estimate |
60
+ | `hb.omega(comp)` | Yang-Zhang Ω | dimensionless | `Tm·ΔS / |ΔH|` |
61
+
62
+ ```python
63
+ cantor = {"Co": 0.2, "Cr": 0.2, "Fe": 0.2, "Mn": 0.2, "Ni": 0.2}
64
+ hb.smix(cantor) # 13.381 (= R·ln 5)
65
+ hb.delta(cantor) # 3.164 (percent)
66
+ hb.vec(cantor) # 8.0
67
+ hb.melting_temperature(cantor) # 1801.2 (K)
68
+ hb.mixing_enthalpy(cantor) # -4.16 (kJ/mol)
69
+ hb.omega(cantor) # 5.794
70
+ ```
71
+
72
+ These six values for the Cantor alloy are pinned in the regression
73
+ suite; treat them as the canonical sanity check.
74
+
75
+ ## Parsing formula strings
76
+
77
+ If you have a string rather than a dict, parse it first. The parser
78
+ accepts compact formulas (`"CoCrFeMnNi"`), subscripted formulas
79
+ (`"CuCoMn1.75NiFe0.25"`), and space-separated amounts.
80
+
81
+ ```python
82
+ comp = hb.parse_formula("CoCrFeMnNi") # dict-like Composition
83
+ hb.normalize(comp) # explicit mole-fraction dict
84
+ hb.smix(comp) # descriptors accept it directly
85
+ ```
86
+
87
+ ## Rules (classifiers)
88
+
89
+ ```python
90
+ from hea_bench.rules import yeh_smix, zhang_delta, guo_vec, yang_omega
91
+ ```
92
+
93
+ Each module exposes `predict(composition, ...)`, a `DESCRIPTION`
94
+ string, and (for the tunable ones) a `DEFAULT_THRESHOLD`. Return
95
+ values are strings:
96
+
97
+ | Rule | Call | Returns one of | Threshold arg |
98
+ |---|---|---|---|
99
+ | Yeh ΔS_mix | `yeh_smix.predict(comp)` | `"HEA"` / `"MEA"` / `"dilute"` | fixed (descriptive) |
100
+ | Zhang δ | `zhang_delta.predict(comp, threshold=6.5)` | `"single-phase"` / `"multi-phase"` | percent, default 6.5 |
101
+ | Yang Ω | `yang_omega.predict(comp, threshold=1.1)` | `"single-phase"` / `"multi-phase"` | default 1.1 |
102
+ | Guo-Liu VEC | `guo_vec.predict(comp)` | `"FCC"` / `"BCC"` / `"mixed"` | fixed bounds 8.0 / 6.87 |
103
+
104
+ ```python
105
+ zhang_delta.predict(cantor) # 'single-phase'
106
+ guo_vec.predict(cantor) # 'FCC'
107
+ yeh_smix.predict(cantor) # 'HEA'
108
+ ```
109
+
110
+ **Important:** these rules are weak classifiers. On the consolidated
111
+ benchmark both binary rules collapse to "predict single-phase almost
112
+ always" (Youden's J of 0.075 and 0.032). Do not treat a rule's output
113
+ as ground truth. If you need a confidence-aware answer, evaluate
114
+ against the benchmark (below) rather than trusting a single predict().
115
+
116
+ ## Scoring against the benchmark
117
+
118
+ ```python
119
+ from hea_bench.evaluate import build_report
120
+ report = build_report()
121
+ ```
122
+
123
+ `report` is a dict with keys `csv_path`, `n_rows_loaded`, and `rules`.
124
+ `report["rules"]` has four entries: `zhang_delta_6_5`,
125
+ `yang_omega_1_1`, `guo_vec_stratified`, `yeh_smix_descriptive`. Each
126
+ entry is a dict of statistics:
127
+
128
+ ```python
129
+ r = report["rules"]["zhang_delta_6_5"]
130
+ r["accuracy"] # 0.5670
131
+ r["sensitivity"] # 0.990
132
+ r["specificity"] # 0.085
133
+ r["youden_j"] # 0.075
134
+ r["accuracy_ci95"] # (low, high) Wilson 95% interval
135
+ r["confusion"] # confusion matrix
136
+ # also: n, n_positive_observed, n_negative_observed,
137
+ # true_positive, false_positive, true_negative, false_negative,
138
+ # positive_label
139
+ ```
140
+
141
+ ## Threshold sweeps / ROC
142
+
143
+ ```python
144
+ from hea_bench.classifiers.diagnostic_stats import roc_sweep
145
+ ```
146
+
147
+ Use this to find the accuracy-optimal or Youden-J-optimal threshold
148
+ for a tunable rule. The shipped recalibration finding for the Zhang
149
+ rule is J-optimal at δ < 2.5% (vs the canonical 6.5%); reproduce it
150
+ rather than hard-coding it.
151
+
152
+ ## Command line
153
+
154
+ ```bash
155
+ hea-bench --version
156
+ python -m hea_bench.evaluate # all four rules vs v0.1.0 benchmark
157
+ python -m hea_bench.benchmark.coverage # element-coverage analysis
158
+ ```
159
+
160
+ On Windows set `PYTHONIOENCODING=utf-8` before these if the output
161
+ contains Greek/math symbols, to avoid a cp1252 encode error.
162
+
163
+ ## Data layout
164
+
165
+ - `data/consolidated/v0.1.0/consolidated.csv` — the benchmark, 7,784
166
+ rows. Join key is `composition_key`; `canonical_phase` is one of
167
+ BCC/FCC/HCP/multi-phase (blank when sources conflict); `sources` is
168
+ semicolon-separated provenance; `has_conflict` flags the 100
169
+ cross-source disagreements.
170
+ - `data/consolidated/v0.1.0/{rule_baselines,coverage_report,manifest}.json`
171
+ — committed outputs, regenerated by the evaluate/coverage modules.
172
+ - `data/raw/<source>/` — per-source provenance, licenses, SHA-256s.
173
+ - `src/hea_bench/descriptors/data/` — vendored elemental table (24
174
+ elements) and the matminer Miedema pair table (75 elements).
175
+
176
+ ## Coverage limit (read before scaling up)
177
+
178
+ The elemental data table covers **24 elements**, so 86.7% of the
179
+ benchmark is scorable by every descriptor. Compositions containing
180
+ elements outside the table (Mg, C, Zn, B, Sn, Re, and others) will
181
+ not be fully scorable. Check coverage with
182
+ `python -m hea_bench.benchmark.coverage` before assuming a dataset is
183
+ fully evaluable.
184
+
185
+ ## Things not to do
186
+
187
+ - **Do not edit the pinned numbers** in `tests/` to make a test pass.
188
+ Those values are derived from the data and code; a changed number
189
+ means real drift you should explain, not silence.
190
+ - **Do not add elements to the elemental table** without a citable
191
+ source for the atomic radius, VEC, and melting point. Unsourced
192
+ values corrupt every descriptor that uses them.
193
+ - **Do not treat `mixing_enthalpy` as a measured quantity.** It is a
194
+ semi-empirical Miedema estimate with known systematic error for
195
+ some pairs.
196
+ - **Do not assume composition fixes phase.** The benchmark is
197
+ composition-only; the same composition can form different phases
198
+ depending on processing history.
199
+
200
+ ## Verifying your integration
201
+
202
+ After wiring this in, confirm the Cantor sanity values
203
+ (`smix=13.381`, `delta=3.164`, `vec=8.0`, `omega=5.794`) and run the
204
+ test suite (`python -m pytest -q`, 155 tests). If those match, your
205
+ environment is using the canonical implementation correctly.
@@ -0,0 +1,36 @@
1
+ cff-version: 1.2.0
2
+ title: "hea-bench: An open benchmark suite for high-entropy alloy phase prediction"
3
+ message: >-
4
+ If you use this software, please cite it using the metadata from this file.
5
+ type: software
6
+ authors:
7
+ - given-names: David
8
+ family-names: Fieser
9
+ orcid: 'https://orcid.org/0009-0007-5754-4331'
10
+ abstract: >-
11
+ hea-bench is a standardized, reproducible benchmark suite for high-entropy
12
+ alloy phase prediction. It consolidates published open HEA phase-label
13
+ datasets into a single curated benchmark, provides reference baseline
14
+ implementations of the canonical empirical descriptors and phase-prediction
15
+ rules treated as diagnostic classifiers, and ships a standardized evaluation
16
+ harness so any user-supplied model can be scored against the same dataset
17
+ and metrics.
18
+ keywords:
19
+ - high-entropy alloys
20
+ - phase prediction
21
+ - materials informatics
22
+ - benchmark
23
+ - Miedema model
24
+ - thermodynamic descriptors
25
+ license: MIT
26
+ repository-code: 'https://github.com/dfieser/hea-bench'
27
+ version: 0.1.0
28
+ date-released: '2026-05-22'
29
+ doi: 10.5281/zenodo.20346288
30
+ identifiers:
31
+ - type: doi
32
+ value: 10.5281/zenodo.20346287
33
+ description: Concept DOI (resolves to all versions of hea-bench)
34
+ - type: doi
35
+ value: 10.5281/zenodo.20346288
36
+ description: Version DOI (this release, v0.1.0)
@@ -0,0 +1,56 @@
1
+ # Contributor Covenant Code of Conduct
2
+
3
+ ## Our Pledge
4
+
5
+ We as members, contributors, and leaders pledge to make participation
6
+ in our community a harassment-free experience for everyone, regardless
7
+ of age, body size, visible or invisible disability, ethnicity, sex
8
+ characteristics, gender identity and expression, level of experience,
9
+ education, socio-economic status, nationality, personal appearance,
10
+ race, religion, or sexual identity and orientation.
11
+
12
+ We pledge to act and interact in ways that contribute to an open,
13
+ welcoming, diverse, inclusive, and healthy community.
14
+
15
+ ## Our Standards
16
+
17
+ Examples of behavior that contributes to a positive environment
18
+ include demonstrating empathy and kindness toward other people, being
19
+ respectful of differing opinions and experiences, giving and
20
+ gracefully accepting constructive feedback, accepting responsibility
21
+ and apologizing to those affected by our mistakes, and focusing on
22
+ what is best for the overall community.
23
+
24
+ Examples of unacceptable behavior include the use of sexualized
25
+ language or imagery and unwelcome sexual attention, trolling,
26
+ insulting or derogatory comments, public or private harassment,
27
+ publishing others' private information without explicit permission,
28
+ and other conduct which could reasonably be considered inappropriate
29
+ in a professional setting.
30
+
31
+ ## Enforcement Responsibilities
32
+
33
+ Community leaders are responsible for clarifying and enforcing
34
+ standards of acceptable behavior and will take appropriate and fair
35
+ corrective action in response to any behavior that they deem
36
+ inappropriate, threatening, offensive, or harmful.
37
+
38
+ ## Scope
39
+
40
+ This Code of Conduct applies within all community spaces and also
41
+ applies when an individual is officially representing the community
42
+ in public spaces.
43
+
44
+ ## Enforcement
45
+
46
+ Instances of abusive, harassing, or otherwise unacceptable behavior
47
+ may be reported to the maintainer responsible for enforcement at
48
+ `davjfies@gmail.com`. All complaints will be reviewed and
49
+ investigated promptly and fairly.
50
+
51
+ ## Attribution
52
+
53
+ This Code of Conduct is adapted from the
54
+ [Contributor Covenant](https://www.contributor-covenant.org),
55
+ version 2.1, available at
56
+ https://www.contributor-covenant.org/version/2/1/code_of_conduct.html.
@@ -0,0 +1,70 @@
1
+ # Contributing to hea-bench
2
+
3
+ Contributions are welcome, whether bug reports, new datasets, new
4
+ descriptors or rules, additional elemental data, or documentation
5
+ improvements.
6
+
7
+ ## Reporting issues and seeking support
8
+
9
+ - **Bugs and feature requests:** open an issue on the GitHub issue
10
+ tracker. For a bug, please include the `hea-bench` version, your
11
+ Python version and operating system, a minimal composition or
12
+ command that reproduces the problem, and the full traceback.
13
+ - **Questions and support:** open a GitHub issue with the
14
+ `question` label, or email the maintainer at `davjfies@gmail.com`.
15
+
16
+ ## Development setup
17
+
18
+ ```bash
19
+ git clone https://github.com/dfieser/hea-bench
20
+ cd hea-bench
21
+ pip install -e ".[dev,data]"
22
+ python -m pytest tests/ -q
23
+ ```
24
+
25
+ The core package is dependency-free. The `dev` extra adds `pytest`,
26
+ `pytest-cov`, and `ruff`. The `data` extra adds `pandas` for tabular
27
+ work.
28
+
29
+ ## Tests
30
+
31
+ Every numerical result reported in the documentation and in the
32
+ paper is pinned in the test suite. When you change descriptor code,
33
+ the consolidator, or the vendored data, run the suite and update the
34
+ affected pinned values only after confirming the new number by an
35
+ independent measurement. The repository convention is to measure
36
+ first and assert second, never to write an expected value from
37
+ memory.
38
+
39
+ When adding a new descriptor, rule, or loader, add tests that cover
40
+ at least one canonical reference case (the equiatomic Cantor alloy
41
+ CoCrFeMnNi is the standard sanity check) plus the error paths.
42
+
43
+ ## Adding a dataset
44
+
45
+ Per-source data lives under `data/raw/<source>/` with a README that
46
+ records the citation, the license, the acquisition date, and a
47
+ SHA-256 of the file. Datasets under permissive licenses (CC-BY, CC0,
48
+ MIT, Apache, BSD) are mirrored directly. Datasets without a
49
+ redistributable license are pointer-only: commit a `fetch.py` that
50
+ downloads the file on request, and add the file pattern to
51
+ `.gitignore`. See `data/raw/README.md` for the policy.
52
+
53
+ ## Adding elemental coverage
54
+
55
+ To extend the elemental data table, add the element to the table in
56
+ `src/hea_bench/descriptors/data/elemental.py` with a source comment
57
+ for each property, then run the coverage analysis to confirm the
58
+ benchmark coverage improved as expected.
59
+
60
+ ## Pull requests
61
+
62
+ - Open PRs against the default branch.
63
+ - Keep the test suite green. New functionality needs new tests.
64
+ - Run `ruff` for linting before submitting.
65
+ - Describe what changed and why in the PR description.
66
+
67
+ ## Code of conduct
68
+
69
+ Participation in this project is governed by the
70
+ [Code of Conduct](CODE_OF_CONDUCT.md).
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 David Fieser
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to grant persons to whom the Software is furnished
10
+ to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.