ezga-lib 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. ezga_lib-0.0.1/PKG-INFO +393 -0
  2. ezga_lib-0.0.1/README.md +359 -0
  3. ezga_lib-0.0.1/pyproject.toml +74 -0
  4. ezga_lib-0.0.1/src/ezga/DoE/DoE.py +938 -0
  5. ezga_lib-0.0.1/src/ezga/DoE/__init__.py +0 -0
  6. ezga_lib-0.0.1/src/ezga/DoE/constraint.py +326 -0
  7. ezga_lib-0.0.1/src/ezga/__init__.py +24 -0
  8. ezga_lib-0.0.1/src/ezga/bayesian_optimization/__init__.py +2 -0
  9. ezga_lib-0.0.1/src/ezga/bayesian_optimization/bayesian_optimization.py +877 -0
  10. ezga_lib-0.0.1/src/ezga/classification/__init__.py +6 -0
  11. ezga_lib-0.0.1/src/ezga/classification/clustering.py +695 -0
  12. ezga_lib-0.0.1/src/ezga/cli/__init__.py +14 -0
  13. ezga_lib-0.0.1/src/ezga/cli/__main__.py +5 -0
  14. ezga_lib-0.0.1/src/ezga/cli/resume.py +12 -0
  15. ezga_lib-0.0.1/src/ezga/cli/run.py +254 -0
  16. ezga_lib-0.0.1/src/ezga/cli/runners.py +76 -0
  17. ezga_lib-0.0.1/src/ezga/cli/wizard.py +433 -0
  18. ezga_lib-0.0.1/src/ezga/convergence/__init__.py +6 -0
  19. ezga_lib-0.0.1/src/ezga/convergence/convergence.py +220 -0
  20. ezga_lib-0.0.1/src/ezga/core/config.py +207 -0
  21. ezga_lib-0.0.1/src/ezga/core/context.py +265 -0
  22. ezga_lib-0.0.1/src/ezga/core/engine.py +471 -0
  23. ezga_lib-0.0.1/src/ezga/core/interfaces.py +85 -0
  24. ezga_lib-0.0.1/src/ezga/core/population.py +506 -0
  25. ezga_lib-0.0.1/src/ezga/evaluator/evaluator.py +135 -0
  26. ezga_lib-0.0.1/src/ezga/evaluator/features.py +258 -0
  27. ezga_lib-0.0.1/src/ezga/evaluator/objective.py +986 -0
  28. ezga_lib-0.0.1/src/ezga/examples/config.xyz +6 -0
  29. ezga_lib-0.0.1/src/ezga/examples/config.yaml +153 -0
  30. ezga_lib-0.0.1/src/ezga/factory.py +188 -0
  31. ezga_lib-0.0.1/src/ezga/hise/manager.py +457 -0
  32. ezga_lib-0.0.1/src/ezga/io/__init__.py +6 -0
  33. ezga_lib-0.0.1/src/ezga/io/config_loader.py +446 -0
  34. ezga_lib-0.0.1/src/ezga/io/io_manager.py +85 -0
  35. ezga_lib-0.0.1/src/ezga/io/snapshot_recorder.py +188 -0
  36. ezga_lib-0.0.1/src/ezga/metric/__init__.py +0 -0
  37. ezga_lib-0.0.1/src/ezga/metric/information_ensemble_metrics.py +718 -0
  38. ezga_lib-0.0.1/src/ezga/selection/__init__.py +7 -0
  39. ezga_lib-0.0.1/src/ezga/selection/multiobjective_selector.py +785 -0
  40. ezga_lib-0.0.1/src/ezga/selection/ranking.py +287 -0
  41. ezga_lib-0.0.1/src/ezga/selection/selector.py +860 -0
  42. ezga_lib-0.0.1/src/ezga/simulator/ase_calculator.py +271 -0
  43. ezga_lib-0.0.1/src/ezga/simulator/mace_calculator.py +33 -0
  44. ezga_lib-0.0.1/src/ezga/simulator/simulator.py +253 -0
  45. ezga_lib-0.0.1/src/ezga/sync/__init__.py +0 -0
  46. ezga_lib-0.0.1/src/ezga/sync/agenticsync.py +378 -0
  47. ezga_lib-0.0.1/src/ezga/thermostat/__init__.py +0 -0
  48. ezga_lib-0.0.1/src/ezga/thermostat/thermostat.py +398 -0
  49. ezga_lib-0.0.1/src/ezga/utils/__init__.py +0 -0
  50. ezga_lib-0.0.1/src/ezga/utils/generate_initial_population.py +15 -0
  51. ezga_lib-0.0.1/src/ezga/utils/helper_functions.py +267 -0
  52. ezga_lib-0.0.1/src/ezga/utils/lineage.py +57 -0
  53. ezga_lib-0.0.1/src/ezga/utils/logger.py +63 -0
  54. ezga_lib-0.0.1/src/ezga/utils/planes.py +114 -0
  55. ezga_lib-0.0.1/src/ezga/utils/pourbaix_diagram.py +962 -0
  56. ezga_lib-0.0.1/src/ezga/utils/structure_hash_map.py +769 -0
  57. ezga_lib-0.0.1/src/ezga/variation/crossover.py +1333 -0
  58. ezga_lib-0.0.1/src/ezga/variation/mutation.py +941 -0
  59. ezga_lib-0.0.1/src/ezga/variation/variation.py +1465 -0
  60. ezga_lib-0.0.1/src/ezga/visualization/__init__.py +0 -0
  61. ezga_lib-0.0.1/src/ezga/visualization/plot_evolution.py +1224 -0
  62. ezga_lib-0.0.1/src/ezga/visualization/plotter.py +1225 -0
  63. ezga_lib-0.0.1/src/ezga/visualization/snapshot_plotter.py +285 -0
@@ -0,0 +1,393 @@
1
+ Metadata-Version: 2.4
2
+ Name: ezga-lib
3
+ Version: 0.0.1
4
+ Summary: A modular multi-objective genetic algorithm framework for atomistic structure exploration
5
+ Project-URL: Homepage, https://thgitlab.rz-berlin.mpg.de/lombardi/ezga
6
+ Project-URL: Issues, https://thgitlab.rz-berlin.mpg.de/lombardi/ezga/issues
7
+ Author-email: Juan Manuel Lombardi <lombardi@fhi-berlin.mpg.de>
8
+ License: GPL-3.0-only
9
+ Keywords: DFT,MLIP,genetic algorithm,materials,multi-objective
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Topic :: Scientific/Engineering :: Chemistry
16
+ Classifier: Topic :: Scientific/Engineering :: Physics
17
+ Requires-Python: >=3.8
18
+ Requires-Dist: ase>=3.22.1
19
+ Requires-Dist: matplotlib>=3.4
20
+ Requires-Dist: numpy>=1.26
21
+ Requires-Dist: pydantic>=2.10
22
+ Requires-Dist: rich>=13.0
23
+ Requires-Dist: ruamel-yaml>=0.18
24
+ Requires-Dist: sage-lib>=0.1.6.3
25
+ Requires-Dist: scikit-learn>=1.4
26
+ Requires-Dist: scipy>=1.12
27
+ Requires-Dist: spglib
28
+ Requires-Dist: typer>=0.12
29
+ Requires-Dist: typing-extensions>=4.6
30
+ Provides-Extra: tests
31
+ Requires-Dist: pytest-cov>=4; extra == 'tests'
32
+ Requires-Dist: pytest>=7; extra == 'tests'
33
+ Description-Content-Type: text/markdown
34
+
35
+ # EZGA — Evolutionary Structure Explorer (ezga-lib)
36
+
37
+ A modular **multi-objective genetic algorithm** (GA) framework for **atomistic structure exploration**, with first-class YAML configuration, plugin-style extensibility, and a **Hierarchical Supercell Escalation (HiSE)** workflow for coarse-to-fine supercell searches.
38
+
39
+ > PyPI name: `ezga-lib`
40
+ > CLI entry point: `ezga` (via `ezga.cli.run:app`)
41
+ > License: GPL-3.0-only
42
+
43
+ ---
44
+
45
+ ## Features
46
+
47
+ * **Clean YAML → Runtime**: Pydantic-v2 validated configs; dotted imports & factory specs are materialized into live Python callables.
48
+ * **Multi-objective selection**: Boltzmann (default) plus alternative methods; repulsion & diversity control.
49
+ * **Rich variation operators**: Tunable mutation, crossover, and user-defined operators.
50
+ * **ASE integration**: Simple shorthand to wrap ASE calculators.
51
+ * **HiSE manager**: Orchestrates multi-stage, coarse-to-fine supercell exploration. Lifts previous results via:
52
+
53
+ * `tile` (Partition-based `generate_supercell`),
54
+ * `best_compatible` (find largest divisor supercell among previous stages),
55
+ * `ase` (fallback tiling using ASE).
56
+ * **Agentic mailbox**: Stage-scoped shared directory for multi-agent workflows.
57
+ * **Pretty CLI summaries**: Rich panels with compact configuration overviews.
58
+
59
+ ---
60
+
61
+ ## Installation
62
+
63
+ ### From source (recommended during development)
64
+
65
+ ```bash
66
+ git clone <your-repo-url>
67
+ cd ezga
68
+ pip install -U pip
69
+ pip install -e .
70
+ ```
71
+
72
+ This installs the `ezga` command line app.
73
+
74
+ ### From PyPI (when available)
75
+
76
+ ```bash
77
+ pip install ezga-lib
78
+ ```
79
+
80
+ ---
81
+
82
+ ## Quick Start
83
+
84
+ Create a minimal `ezga.yaml`:
85
+
86
+ ```yaml
87
+ max_generations: 100
88
+ output_path: demo/run
89
+
90
+ population:
91
+ dataset_path: config.xyz
92
+ filter_duplicates: true
93
+
94
+ evaluator:
95
+ features_funcs:
96
+ factory: ezga.selection.features:feature_composition_vector
97
+ args: [["C","H"]] # features are composition counts
98
+ objectives_funcs:
99
+ - ezga.selection.objective:objective_energy
100
+
101
+ multiobjective:
102
+ size: 256
103
+ selection_method: boltzmann
104
+ sampling_temperature: 0.9
105
+ objective_temperature: 0.6
106
+ random_seed: 73
107
+
108
+ variation:
109
+ initial_mutation_rate: 3.0
110
+ crossover_probability: 0.1
111
+
112
+ simulator:
113
+ mode: sampling
114
+ calculator:
115
+ type: ase
116
+ class: ase.calculators.lj:LennardJones
117
+ kwargs: { epsilon: 0.0103, sigma: 3.4 } # ASE params
118
+ ```
119
+
120
+ Run:
121
+
122
+ ```bash
123
+ ezga validate -c ezga.yaml --strict
124
+ ezga once -c ezga.yaml
125
+ ```
126
+
127
+ ---
128
+
129
+ ## CLI
130
+
131
+ ```
132
+ ezga once -c <config.yaml>
133
+ ezga validate -c <config.yaml> [--strict]
134
+ ```
135
+
136
+ * `once`: Runs a single GA or delegates to **HiSE** if the YAML has an `hise` block.
137
+ * `validate`: Validates and prints a rich summary; `--strict` also builds the engine to catch wiring errors.
138
+
139
+ ---
140
+
141
+ ## Configuration
142
+
143
+ ### GAConfig (high level)
144
+
145
+ * `population`: dataset paths, constraints, duplicate filtering, …
146
+ * `evaluator`: `features_funcs`, `objectives_funcs` (dotted, factory, or list)
147
+ * `multiobjective`: selection params (size, method, temperatures, metric, …)
148
+ * `variation`: mutation & crossover knobs
149
+ * `simulator`: mode & calculator (ASE shorthand supported)
150
+ * `convergence`, `hashmap`, `agentic`: execution support
151
+ * `hise` (optional): HiSE manager block (see below)
152
+
153
+ All sections are validated by Pydantic-v2; unknown fields are forbidden.
154
+
155
+ ### Dotted imports & factories
156
+
157
+ Anywhere you need a callable/object, you can write:
158
+
159
+ * **Dotted string**: `"package.module:attr"` or `"package.module.attr"`
160
+ * **Factory spec**:
161
+
162
+ ```yaml
163
+ key:
164
+ factory: "pkg.mod:build_something"
165
+ args: [1, 2]
166
+ kwargs: { flag: true }
167
+ ```
168
+ * **ASE shorthand** (calculator only):
169
+
170
+ ```yaml
171
+ simulator:
172
+ mode: sampling
173
+ calculator:
174
+ type: ase
175
+ class: ase.calculators.lj:LennardJones
176
+ kwargs: { epsilon: 0.0103, sigma: 3.4 }
177
+ ```
178
+
179
+ The loader resolves these into live Python objects before the run.
180
+
181
+ ---
182
+
183
+ ## Constraints (Design of Experiments)
184
+
185
+ You can provide constraint generators as factories. Example using a custom generator:
186
+
187
+ ```yaml
188
+ population:
189
+ constraints:
190
+ - factory: ezga.DoE.DoE:ConstraintGenerator.sum_in_range
191
+ args: [["C", "H"], 100, 100]
192
+ ```
193
+
194
+ > **Tip**
195
+ > Use `ezga.DoE.DoE:ConstraintGenerator.sum_in_range` (colon form).
196
+ > Avoid `ezga.DoE.DoE.ConstraintGenerator:sum_in_range` (that treats `ConstraintGenerator` as a module path).
197
+
198
+ If your constraint generator expects feature **names**, you can register a name→index mapping in your code (e.g., after features are known):
199
+
200
+ ```python
201
+ from ezga.DoE.DoE import ConstraintGenerator
202
+ ConstraintGenerator.set_name_mapping({"C": 0, "H": 1})
203
+ ```
204
+
205
+ ---
206
+
207
+ ## HiSE — Hierarchical Supercell Escalation
208
+
209
+ HiSE runs a sequence of stages over growing supercells and **replaces** the base input at each stage with a lifted dataset derived from previous results.
210
+
211
+ ### Example
212
+
213
+ ```yaml
214
+ hise:
215
+ supercells:
216
+ - [1,1,1]
217
+ - [2,1,1]
218
+ - [2,2,1]
219
+
220
+ input_from: final_dataset # or: latest_generation
221
+ stage_dir_pattern: "supercell_{a}_{b}_{c}"
222
+ restart: false
223
+ carry: all
224
+ reseed_fraction: 1.0
225
+ lift_method: tile # tile | best_compatible | ase
226
+
227
+ overrides:
228
+ multiobjective.size: [10, 20, 30]
229
+ max_generations: [ 2, 3, 5]
230
+ variation.initial_mutation_rate: [ 1, 2, 3]
231
+ population.constraints:
232
+ - factory: ezga.DoE.DoE:ConstraintGenerator.sum_in_range
233
+ args: [['C', 'H'], 100, 100]
234
+ - factory: ezga.DoE.DoE:ConstraintGenerator.sum_in_range
235
+ args: [['C', 'H'], 200, 200]
236
+ - factory: ezga.DoE.DoE:ConstraintGenerator.sum_in_range
237
+ args: [['C', 'H'], 400, 400]
238
+ ```
239
+
240
+ ### Lift methods
241
+
242
+ * **`tile`**: Partition-based lifting using
243
+ `container.AtomPositionManager.generate_supercell(repeat=(ra, rb, rc))`
244
+ (requires `sage_lib.partition.Partition`).
245
+ * **`best_compatible`**: Scans *all* previous stages and picks the largest supercell (by volume) that divides the target coordinate-wise; lifts via Partition.
246
+ * **`ase`**: Simple tiling via `ASE.Atoms.repeat`. No Partition dependency (fallback).
247
+
248
+ ### Input source
249
+
250
+ * `final_dataset`: uses `stage_root/config.xyz`
251
+ * `latest_generation`: concatenates `stage_root/generation/*/config.xyz`
252
+
253
+ ### Stage directories
254
+
255
+ For each supercell `(a,b,c)` the HiSE manager creates:
256
+
257
+ ```
258
+ <output_path>/
259
+ supercell_{a}_{b}_{c}/
260
+ input_lifted.xyz # if lifting writes to disk
261
+ config.xyz # final dataset (engine may write this)
262
+ generation/...
263
+ ```
264
+
265
+ ### Agentic shared dir
266
+
267
+ If `agentic.shared_dir` is set in the base config, each stage receives a **stage-scoped** mailbox:
268
+
269
+ ```
270
+ <base_shared>/<relative_stage_dir>/
271
+ ```
272
+
273
+ All agents of a given stage share this directory.
274
+
275
+ ---
276
+
277
+ ## Directory Layout (source tree)
278
+
279
+ ```
280
+ src/ezga/
281
+ cli/
282
+ run.py # Typer app (ezga entry point)
283
+ runners.py # once / validate / hise dispatchers
284
+ core/
285
+ config.py # GAConfig + submodels (Pydantic v2)
286
+ engine.py # GA main loop
287
+ population.py # population & DoE validation
288
+ selection/
289
+ features.py, objective.py # feature/ objective factories
290
+ DoE/
291
+ DoE.py # ConstraintGenerator and DoE
292
+ hise/
293
+ manager.py # HiSE orchestrator
294
+ io/
295
+ config_loader.py # YAML loader & materializer
296
+ simulator/
297
+ ase_calculator.py # ASE adapter (shorthand support)
298
+ ```
299
+
300
+ ---
301
+
302
+ ## Logging & Output
303
+
304
+ * Logs and artifacts are written under `output_path` (and per-stage subdirs in HiSE).
305
+ * The CLI prints a rich summary of the configuration before running.
306
+
307
+ ---
308
+
309
+ ## Developing
310
+
311
+ ### Tests
312
+
313
+ We use `pytest`. Example structure:
314
+
315
+ ```
316
+ tests/
317
+ test_loader.py
318
+ test_hise_manager.py
319
+ test_constraints.py
320
+ conftest.py
321
+ ```
322
+
323
+ Run:
324
+
325
+ ```bash
326
+ pip install -e ".[test]" # if you add an extra in pyproject
327
+ pytest -q
328
+ ```
329
+
330
+ Example unit test for loader materialization:
331
+
332
+ ```python
333
+ # tests/test_loader.py
334
+ from ezga.io.config_loader import _materialize_factories
335
+
336
+ def test_factory_resolution():
337
+ spec = {"factory": "math:prod", "args": [[2,3,4]]}
338
+ fn = _materialize_factories(spec)
339
+ assert callable(fn)
340
+ assert fn([2,3,4]) == 24
341
+ ```
342
+
343
+ ### Code style
344
+
345
+ * Type hints everywhere.
346
+ * Docstrings follow **Google style**.
347
+ * Avoid side effects in import time; factories should be cheap to resolve.
348
+
349
+ ---
350
+
351
+ ## Troubleshooting
352
+
353
+ * **`TypeError: 'dict' object is not callable`**
354
+ You likely passed a factory **dict** (not materialized) directly into a runtime component. Ensure your keys live in the YAML under sections that the loader post-processes, or put them under `hise.overrides` if you need stage-specific values. The loader will materialize `population.constraints`, `evaluator.*`, `mutation_funcs`, `crossover_funcs`, and `simulator.calculator`.
355
+
356
+ * **`ModuleNotFoundError` or wrong dotted form**
357
+ Use colon form: `pkg.mod:attr` (preferred). For our DoE example:
358
+ `ezga.DoE.DoE:ConstraintGenerator.sum_in_range`.
359
+
360
+ * **Pydantic model errors**
361
+ Ensure `pydantic>=2.x` is installed. Unknown fields are rejected (`extra='forbid'`).
362
+
363
+ * **Permission error exporting `input_lifted.xyz`**
364
+ Ensure the path is writable. The exporter writes a new file; if you manage files manually, don’t open the same file elsewhere.
365
+
366
+ ---
367
+
368
+ ## Roadmap
369
+
370
+ * Additional selection methods & visual diagnostics.
371
+ * More HiSE lift strategies (symmetry-aware mapping).
372
+ * Native viewers for generation trajectories.
373
+ * Optional async physics backends.
374
+
375
+ ---
376
+
377
+ ## Citation
378
+
379
+ If this software helps your research, please cite the repository (add DOI when available).
380
+
381
+ ---
382
+
383
+ ## License
384
+
385
+ GPL-3.0-only. See `LICENSE`.
386
+
387
+ ---
388
+
389
+ ## Acknowledgments
390
+
391
+ * **ASE** for atomistic infrastructure.
392
+ * **pydantic**, **typer**, **ruamel.yaml**, **rich** for the developer experience.
393
+ * **sage\_lib** for partition and supercell lifting utilities.