emmet-builders 0.87.0.dev15__tar.gz → 0.87.0.dev16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {emmet_builders-0.87.0.dev15/emmet_builders.egg-info → emmet_builders-0.87.0.dev16}/PKG-INFO +1 -1
  2. emmet_builders-0.87.0.dev16/emmet/builders/materials/provenance.py +188 -0
  3. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16/emmet_builders.egg-info}/PKG-INFO +1 -1
  4. emmet_builders-0.87.0.dev15/emmet/builders/materials/provenance.py +0 -151
  5. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/MANIFEST.in +0 -0
  6. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/emmet/builders/__init__.py +0 -0
  7. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/emmet/builders/base.py +0 -0
  8. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/emmet/builders/materials/__init__.py +0 -0
  9. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/emmet/builders/materials/absorption_spectrum.py +0 -0
  10. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/emmet/builders/materials/bonds.py +0 -0
  11. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/emmet/builders/materials/chemenv.py +0 -0
  12. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/emmet/builders/materials/corrected_entries.py +0 -0
  13. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/emmet/builders/materials/electronic_structure.py +0 -0
  14. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/emmet/builders/materials/linear_response.py +0 -0
  15. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/emmet/builders/materials/magnetism.py +0 -0
  16. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/emmet/builders/materials/oxidation_states.py +0 -0
  17. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/emmet/builders/materials/robocrys.py +0 -0
  18. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/emmet/builders/materials/similarity.py +0 -0
  19. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/emmet/builders/materials/summary.py +0 -0
  20. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/emmet/builders/materials/thermo.py +0 -0
  21. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/emmet/builders/py.typed +0 -0
  22. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/emmet/builders/settings.py +0 -0
  23. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/emmet/builders/utils.py +0 -0
  24. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/emmet/builders/vasp/materials.py +0 -0
  25. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/emmet/builders/vasp/mp_potcar_stats.json.gz +0 -0
  26. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/emmet/builders/vasp/task_validator.py +0 -0
  27. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/emmet_builders.egg-info/SOURCES.txt +0 -0
  28. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/emmet_builders.egg-info/dependency_links.txt +0 -0
  29. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/emmet_builders.egg-info/requires.txt +0 -0
  30. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/emmet_builders.egg-info/top_level.txt +0 -0
  31. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/pyproject.toml +0 -0
  32. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/requirements/deployment.txt +0 -0
  33. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/requirements/ubuntu-latest_py3.11.txt +0 -0
  34. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/requirements/ubuntu-latest_py3.11_extras.txt +0 -0
  35. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/requirements/ubuntu-latest_py3.12.txt +0 -0
  36. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/requirements/ubuntu-latest_py3.12_extras.txt +0 -0
  37. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/requirements/ubuntu-latest_py3.13.txt +0 -0
  38. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/requirements/ubuntu-latest_py3.13_extras.txt +0 -0
  39. {emmet_builders-0.87.0.dev15 → emmet_builders-0.87.0.dev16}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: emmet-builders
3
- Version: 0.87.0.dev15
3
+ Version: 0.87.0.dev16
4
4
  Summary: Builders for the Emmet Library
5
5
  Author-email: The Materials Project <feedback@materialsproject.org>
6
6
  License-Expression: BSD-3-Clause-LBNL
@@ -0,0 +1,188 @@
1
+ """Build provenance collection."""
2
+
3
+ import logging
4
+ from collections import defaultdict
5
+ from itertools import chain, groupby
6
+ from typing import Iterator
7
+
8
+ from pymatgen.analysis.structure_matcher import ElementComparator, StructureMatcher
9
+
10
+ from emmet.builders.base import BaseBuilderInput
11
+ from emmet.builders.settings import EmmetBuildSettings
12
+ from emmet.builders.utils import filter_map
13
+ from emmet.core.connectors.analysis import parse_cif
14
+ from emmet.core.connectors.icsd.client import IcsdClient
15
+ from emmet.core.connectors.icsd.enums import IcsdSubset
16
+ from emmet.core.provenance import DatabaseSNL, ProvenanceDoc
17
+
18
+ SETTINGS = EmmetBuildSettings()
19
+ structure_matcher = StructureMatcher(
20
+ ltol=SETTINGS.LTOL,
21
+ stol=SETTINGS.STOL,
22
+ comparator=ElementComparator(),
23
+ angle_tol=SETTINGS.ANGLE_TOL,
24
+ primitive_cell=True,
25
+ scale=True,
26
+ attempt_supercell=False,
27
+ allow_subset=False,
28
+ )
29
+
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ def _get_snl_from_cif(cif_str: str, **kwargs) -> DatabaseSNL | None:
35
+ """Build a database SNL from a CIF plus its metadata.
36
+
37
+ NB: Only takes the first structure from a CIF.
38
+ While a CIF can technically contain many structures,
39
+ the ICSD usually only distributes CIFs with one structure
40
+ per file.
41
+
42
+ Parameters
43
+ -----------
44
+ cif_str : the CIF to parse
45
+ **kwargs to pass to `DatabaseSNL`
46
+ """
47
+ try:
48
+ structures, cif_parsing_remarks = parse_cif(cif_str)
49
+ remarks = kwargs.pop("remarks", None) or cif_parsing_remarks or None
50
+ snl = DatabaseSNL.from_structure(
51
+ meta_structure=structures[0],
52
+ structure=structures[0],
53
+ **kwargs,
54
+ )
55
+
56
+ snl.about.remarks = remarks
57
+
58
+ except Exception as e:
59
+ logger.warning(e)
60
+ snl = None
61
+
62
+ return snl
63
+
64
+
65
+ def update_experimental_icsd_structures(**client_kwargs) -> list[DatabaseSNL]:
66
+ """Update the collection of ICSD SNLs.
67
+
68
+ Parameters
69
+ -----------
70
+ **client_kwargs to pass to `IcsdClient`
71
+
72
+ Returns
73
+ -----------
74
+ List of DatabaseSNL
75
+ """
76
+ data = []
77
+ with IcsdClient(use_document_model=False, **client_kwargs) as client:
78
+ for icsd_subset in (
79
+ IcsdSubset.EXPERIMENTAL_METALORGANIC,
80
+ IcsdSubset.EXPERIMENTAL_INORGANIC,
81
+ ):
82
+ data += client.search(
83
+ subset=icsd_subset,
84
+ space_group_number=(1, 230),
85
+ include_cif=False,
86
+ include_metadata=False,
87
+ )
88
+ return data
89
+
90
+ parsed = [
91
+ _get_snl_from_cif(
92
+ doc["cif"],
93
+ snl_id=f"icsd-{doc['collection_code']}",
94
+ tags=[doc["subset"].value],
95
+ source="icsd",
96
+ )
97
+ for doc in data
98
+ ]
99
+
100
+ return sorted(
101
+ [doc for doc in parsed if doc],
102
+ key=lambda doc: int(doc.snl_id.split("-", 1)[-1]),
103
+ )
104
+
105
+
106
+ class ProvenanceBuilderInput(BaseBuilderInput):
107
+ formula_pretty: str
108
+
109
+
110
+ def _match_against_snls(
111
+ inputs: tuple[list[ProvenanceBuilderInput], list[DatabaseSNL]],
112
+ ) -> list[ProvenanceDoc]:
113
+ """
114
+ Structure match a set of ProvenanceBuilderInputs against a group of DatabaseSNLs
115
+
116
+ Should be used in conjunction with ``build_provenance_docs`` to ensure inputs
117
+ are correctly grouped by 'formula_pretty'.
118
+ """
119
+ input_documents, snls = inputs
120
+
121
+ results = []
122
+ for input_doc in input_documents:
123
+ authors = [[SETTINGS.DEFAULT_AUTHOR]]
124
+ database_ids = defaultdict(list)
125
+ history = [[SETTINGS.DEFAULT_HISTORY]]
126
+ references = [SETTINGS.DEFAULT_REFERENCE]
127
+ theoretical = True
128
+
129
+ if snls:
130
+ for snl in snls:
131
+ if structure_matcher.fit(input_doc.structure, snl.structure):
132
+ if snl.source and snl.source in {"icsd", "pauling"}:
133
+ theoretical = False
134
+ database_ids[snl.source].append(snl.snl_id)
135
+
136
+ if snl.about:
137
+ authors.append(snl.about.authors or [])
138
+ history.append(snl.about.history or [])
139
+ # `SNLAbout` uses string for `references`,
140
+ # `ProvenanceDoc` uses list of str
141
+ if snl.about.references:
142
+ references.append(snl.about.references)
143
+
144
+ results.append(
145
+ ProvenanceDoc.from_structure(
146
+ meta_structure=input_doc.structure,
147
+ material_id=input_doc.material_id,
148
+ database_IDs=database_ids,
149
+ theoretical=theoretical,
150
+ authors=list(chain.from_iterable(authors)),
151
+ history=list(chain.from_iterable(history)),
152
+ references=references,
153
+ )
154
+ )
155
+
156
+ return results
157
+
158
+
159
+ def build_provenance_docs(
160
+ input_documents: list[ProvenanceBuilderInput], snls: list[DatabaseSNL], **kwargs
161
+ ) -> Iterator[ProvenanceDoc]:
162
+ """
163
+ Groups input documents and SNLs by formula_pretty, performs structure matching
164
+ on each formula group, and constructs ProvenanceDocs for each group of
165
+ ProvenanceBuilderInputs with matching structures within each formula group.
166
+
167
+ Args:
168
+ input_documents: List of ProvenanceBuilderInput objects to process.
169
+ snls: List of DatabaseSNL objects for structure matching against.
170
+
171
+ Returns:
172
+ Iterator[ProvenanceDoc]
173
+ """
174
+
175
+ input_documents.sort(key=lambda x: x.formula_pretty)
176
+ snls.sort(key=lambda y: y.formula_pretty)
177
+
178
+ input_docs = dict()
179
+ for form, input_group in groupby(input_documents, key=lambda x: x.formula_pretty):
180
+ input_docs[form] = list(input_group)
181
+
182
+ snl_docs = dict()
183
+ for form, snl_group in groupby(snls, key=lambda y: y.formula_pretty):
184
+ snl_docs[form] = list(snl_group)
185
+
186
+ inputs = [(inp, snl_docs.get(form, [])) for form, inp in input_docs.items()]
187
+
188
+ return chain.from_iterable(filter_map(_match_against_snls, inputs, **kwargs))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: emmet-builders
3
- Version: 0.87.0.dev15
3
+ Version: 0.87.0.dev16
4
4
  Summary: Builders for the Emmet Library
5
5
  Author-email: The Materials Project <feedback@materialsproject.org>
6
6
  License-Expression: BSD-3-Clause-LBNL
@@ -1,151 +0,0 @@
1
- """Build provenance collection."""
2
-
3
- import logging
4
-
5
- from pymatgen.analysis.structure_matcher import ElementComparator, StructureMatcher
6
-
7
- from emmet.builders.base import BaseBuilderInput
8
- from emmet.builders.settings import EmmetBuildSettings
9
- from emmet.builders.utils import filter_map
10
- from emmet.core.connectors.analysis import parse_cif
11
- from emmet.core.connectors.icsd.client import IcsdClient
12
- from emmet.core.connectors.icsd.enums import IcsdSubset
13
- from emmet.core.provenance import DatabaseSNL, ProvenanceDoc
14
-
15
- SETTINGS = EmmetBuildSettings()
16
- structure_matcher = StructureMatcher(
17
- ltol=SETTINGS.LTOL,
18
- stol=SETTINGS.STOL,
19
- comparator=ElementComparator(),
20
- angle_tol=SETTINGS.ANGLE_TOL,
21
- primitive_cell=True,
22
- scale=True,
23
- attempt_supercell=False,
24
- allow_subset=False,
25
- )
26
-
27
-
28
- logger = logging.getLogger(__name__)
29
-
30
-
31
- def _get_snl_from_cif(cif_str: str, **kwargs) -> DatabaseSNL | None:
32
- """Build a database SNL from a CIF plus its metadata.
33
-
34
- NB: Only takes the first structure from a CIF.
35
- While a CIF can technically contain many structures,
36
- the ICSD usually only distributes CIFs with one structure
37
- per file.
38
-
39
- Parameters
40
- -----------
41
- cif_str : the CIF to parse
42
- **kwargs to pass to `DatabaseSNL`
43
- """
44
- try:
45
- structures, cif_parsing_remarks = parse_cif(cif_str)
46
- remarks = kwargs.pop("remarks", None) or cif_parsing_remarks or None
47
- snl = DatabaseSNL.from_structure(
48
- meta_structure=structures[0],
49
- structure=structures[0],
50
- **kwargs,
51
- )
52
-
53
- snl.about.remarks = remarks
54
-
55
- except Exception as e:
56
- logger.warning(e)
57
- snl = None
58
-
59
- return snl
60
-
61
-
62
- def update_experimental_icsd_structures(**client_kwargs) -> list[DatabaseSNL]:
63
- """Update the collection of ICSD SNLs.
64
-
65
- Parameters
66
- -----------
67
- **client_kwargs to pass to `IcsdClient`
68
-
69
- Returns
70
- -----------
71
- List of DatabaseSNL
72
- """
73
- data = []
74
- with IcsdClient(use_document_model=False, **client_kwargs) as client:
75
- for icsd_subset in (
76
- IcsdSubset.EXPERIMENTAL_METALORGANIC,
77
- IcsdSubset.EXPERIMENTAL_INORGANIC,
78
- ):
79
- data += client.search(
80
- subset=IcsdSubset.EXPERIMENTAL_INORGANIC,
81
- space_group_number=(1, 230),
82
- include_cif=True,
83
- include_metadata=False,
84
- )
85
-
86
- parsed = [
87
- _get_snl_from_cif(
88
- doc["cif"],
89
- snl_id=f"icsd-{doc['collection_code']}",
90
- tags=[doc["subset"].value],
91
- source="icsd",
92
- )
93
- for doc in data
94
- ]
95
-
96
- return sorted(
97
- [doc for doc in parsed if doc],
98
- key=lambda doc: int(doc.snl_id.split("-", 1)[-1]),
99
- )
100
-
101
-
102
- def match_against_snls(
103
- input_doc: BaseBuilderInput,
104
- snls: list[DatabaseSNL],
105
- ):
106
- """Match a single document against the SNL collection."""
107
- database_ids = {}
108
- authors = [SETTINGS.DEFAULT_AUTHOR]
109
- history = [SETTINGS.DEFAULT_HISTORY]
110
- references = [SETTINGS.DEFAULT_REFERENCE]
111
- theoretical = True
112
-
113
- for snl in [
114
- doc
115
- for doc in snls
116
- if doc.chemsys
117
- == (
118
- "-".join(sorted(input_doc.structure.composition.chemical_system.split("-")))
119
- )
120
- ]:
121
- if structure_matcher.fit(input_doc.structure, snl.structure):
122
-
123
- if snl.source and snl.source in {"icsd", "pauling"}:
124
- theoretical = False
125
- database_ids[snl.source].append(snl.snl_id)
126
-
127
- if snl.about:
128
- authors.extend(snl.about.authors or [])
129
- history.extend(snl.about.history or [])
130
- # `SNLAbout` uses string for `references`,
131
- # `ProvenanceDoc` uses list of str
132
- if snl.about.references:
133
- references.append(snl.about.references)
134
-
135
- return ProvenanceDoc.from_structure(
136
- meta_structure=input_doc.structure,
137
- material_id=input_doc.material_id,
138
- database_IDs=database_ids,
139
- theoretical=theoretical,
140
- authors=authors,
141
- history=history,
142
- references=references,
143
- )
144
-
145
-
146
- def build_provenance_docs(
147
- input_documents: list[BaseBuilderInput], snls: list[DatabaseSNL], **kwargs
148
- ) -> list[ProvenanceDoc]:
149
- """Build the provenance collection."""
150
-
151
- return list(filter_map(match_against_snls, input_documents, snls=snls, **kwargs))