optimade-maker 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. optimade_maker-0.3.0/LICENSE +21 -0
  2. optimade_maker-0.3.0/PKG-INFO +164 -0
  3. optimade_maker-0.3.0/README.md +126 -0
  4. optimade_maker-0.3.0/pyproject.toml +58 -0
  5. optimade_maker-0.3.0/setup.cfg +4 -0
  6. optimade_maker-0.3.0/src/optimade_maker/__init__.py +4 -0
  7. optimade_maker-0.3.0/src/optimade_maker/archive/__init__.py +0 -0
  8. optimade_maker-0.3.0/src/optimade_maker/archive/archive_record.py +183 -0
  9. optimade_maker-0.3.0/src/optimade_maker/archive/cli.py +13 -0
  10. optimade_maker-0.3.0/src/optimade_maker/archive/scan_records.py +36 -0
  11. optimade_maker-0.3.0/src/optimade_maker/archive/utils.py +84 -0
  12. optimade_maker-0.3.0/src/optimade_maker/cli.py +83 -0
  13. optimade_maker-0.3.0/src/optimade_maker/config.py +162 -0
  14. optimade_maker-0.3.0/src/optimade_maker/convert.py +576 -0
  15. optimade_maker-0.3.0/src/optimade_maker/logger.py +9 -0
  16. optimade_maker-0.3.0/src/optimade_maker/parsers.py +163 -0
  17. optimade_maker-0.3.0/src/optimade_maker/serve.py +135 -0
  18. optimade_maker-0.3.0/src/optimade_maker.egg-info/PKG-INFO +164 -0
  19. optimade_maker-0.3.0/src/optimade_maker.egg-info/SOURCES.txt +25 -0
  20. optimade_maker-0.3.0/src/optimade_maker.egg-info/dependency_links.txt +1 -0
  21. optimade_maker-0.3.0/src/optimade_maker.egg-info/entry_points.txt +2 -0
  22. optimade_maker-0.3.0/src/optimade_maker.egg-info/requires.txt +22 -0
  23. optimade_maker-0.3.0/src/optimade_maker.egg-info/top_level.txt +1 -0
  24. optimade_maker-0.3.0/tests/test_archive.py +36 -0
  25. optimade_maker-0.3.0/tests/test_convert.py +159 -0
  26. optimade_maker-0.3.0/tests/test_serve.py +74 -0
  27. optimade_maker-0.3.0/tests/test_yaml.py +11 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Materials Cloud & Matthew Evans
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,164 @@
1
+ Metadata-Version: 2.1
2
+ Name: optimade-maker
3
+ Version: 0.3.0
4
+ Summary: Tools for making OPTIMADE APIs from raw structural data.
5
+ License: MIT
6
+ Keywords: optimade,jsonapi,materials
7
+ Classifier: Development Status :: 4 - Beta
8
+ Classifier: Programming Language :: Python :: 3 :: Only
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: Topic :: Database
14
+ Classifier: Topic :: Scientific/Engineering
15
+ Requires-Python: >=3.10
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Requires-Dist: pydantic~=2.2
19
+ Requires-Dist: optimade[ase,server]~=1.1
20
+ Requires-Dist: pyyaml~=6.0
21
+ Requires-Dist: pymatgen>=2023.9
22
+ Requires-Dist: pandas~=2.1
23
+ Requires-Dist: pybtex~=0.24
24
+ Requires-Dist: tqdm~=4.65
25
+ Requires-Dist: requests~=2.31
26
+ Requires-Dist: numpy~=1.26
27
+ Requires-Dist: click~=8.1
28
+ Provides-Extra: tests
29
+ Requires-Dist: pytest~=7.4; extra == "tests"
30
+ Requires-Dist: pytest-cov~=4.0; extra == "tests"
31
+ Provides-Extra: dev
32
+ Requires-Dist: black; extra == "dev"
33
+ Requires-Dist: ruff; extra == "dev"
34
+ Requires-Dist: pre-commit; extra == "dev"
35
+ Requires-Dist: mypy; extra == "dev"
36
+ Requires-Dist: isort; extra == "dev"
37
+ Requires-Dist: types-all; extra == "dev"
38
+
39
+ <div align="center" style="padding: 2em;">
40
+ <span style="padding: 1em">
41
+ <img height="70px" align="center" src="https://matsci.org/uploads/default/original/2X/b/bd2f59b3bf14fb046b74538750699d7da4c19ac1.svg">
42
+ </span>
43
+ </div>
44
+
45
+ # <div align="center">optimade-maker</div>
46
+
47
+ [![PyPI - Version](https://img.shields.io/pypi/v/optimade-maker?color=4CC61E)](https://pypi.org/project/optimade-maker/)
48
+
49
+ Tools for making [OPTIMADE APIs](https://optimade.org) from various formats of structural data (e.g. an archive of CIF files).
50
+
51
+ This repository contains the `src/optimade-maker` Python package and the corresponding CLI tool `optimake` that work towards this aim. Features include
52
+
53
+ - definition of a config file format (`optimade.yaml`) for annotating data archives to be used in the OPTIMADE ecosystem;
54
+ - conversion of the raw data into corresponding OPTIMADE types using pre-existing parsers (e.g., ASE for structures);
55
+ - conversion of the annotated data archive into an intermediate JSONLines file format that can be ingested into a database and used to serve a full OPTIMADE API.
56
+ - serving either an annotated data archive or a JSONLines file as an OPTIMADE API (using the [`optimade-python-tools`](https://github.com/Materials-Consortia/optimade-python-tools/)
57
+ reference server implementation).
58
+
59
+ ## Usage
60
+
61
+ See `./examples` for a more complete set of supported formats and corresponding `optimade.yaml` config files.
62
+
63
+ ### Annotating with `optimade.yaml`
64
+
65
+ To annotate your structural data for `optimade-maker`, the data archive needs to be accompanied by an `optimade.yaml` config file. The following is a simple example for a zip archive (`structures.zip`) of cif files together with an optional property file (`data.csv`):
66
+
67
+ ```yaml
68
+ config_version: 0.1.0
69
+ database_description: Simple database
70
+
71
+ entries:
72
+ - entry_type: structures
73
+ entry_paths:
74
+ - file: structures.zip
75
+ matches:
76
+ - cifs/*/*.cif
77
+ # (optional) property file and definitions:
78
+ property_paths:
79
+ - file: data.csv
80
+ property_definitions:
81
+ - name: energy
82
+ title: Total energy per atom
83
+ description: The total energy per atom as computed by DFT
84
+ unit: eV/atom
85
+ type: float
86
+ ```
87
+
88
+ ### Structure `id`s and property files
89
+
90
+ `optimade-maker` will assign an `id` for each structure based on its full path in the archive, following a simple deterministic rule: from the set of all archive paths, the maximum common path prefix and postfix (including file extensions) are removed. E.g.
91
+
92
+ ```
93
+ structures.zip/cifs/set1/101.cif
94
+ structures.zip/cifs/set2/102.cif
95
+ ```
96
+
97
+ produces `["set1/101", "set2/102"]`.
98
+
99
+ The property files need to either refer to these `id`s or the full path in the archive to be associated with a structure. E.g. a possible property `csv` file could be
100
+
101
+ ```csv
102
+ id,energy
103
+ set1/101,2.5
104
+ structures.zip/cifs/set2/102.cif,3.2
105
+ ```
106
+
107
+ ### Installing and running `optimake`
108
+
109
+ Install with
110
+
111
+ ```bash
112
+ pip install optimade-maker
113
+ ```
114
+
115
+ this will also make the `optimake` CLI utility available.
116
+
117
+ For a folder containing the data archive and the `optimade.yaml` file (such as in `/examples`), run
118
+
119
+ - `optimake convert .` to just convert the entry into the JSONL format (see below).
120
+ - `optimake serve .` to start the OPTIMADE API (this also first converts the entry, if needed);
121
+
122
+ For more detailed information see also `optimake --help`.
123
+
124
+ ## `optimade-maker` JSONLines Format
125
+
126
+ As described above, `optimade-maker` works via an intermediate JSONLines file representation of an OPTIMADE API (see also the [corresponding issue in the specification](https://github.com/Materials-Consortia/OPTIMADE/issues/471)).
127
+ This file should provide enough metadata to spin up an OPTIMADE API with many different entry types.
128
+ The format is as follows:
129
+
130
+ - First line must be a dictionary with the key `x-optimade`, containing a sub-dictionary of metadata (such as the OPTIMADE API version).
131
+ - Second line contains the `info/structures` endpoint.
132
+ - Third line contains the `info/references` endpoint, if present.
133
+ - Then each line contains an entry from the corresponding individual structure/reference endpoints.
134
+
135
+ ```json
136
+ {"x-optimade": {"meta": {"api_version": "1.1.0"}}}
137
+ {"type": "info", "id": "structures", "properties": {...}}
138
+ {"type": "info", "id": "references", "properties": {...}}
139
+ {"type": "structures", "id": "1234", "attributes": {...}}
140
+ {"type": "structures", "id": "1235", "attributes": {...}}
141
+ {"type": "references", "id": "sfdas", "attributes": {...}}
142
+ ```
143
+
144
+ NOTE: the `info/` endpoints in [OPTIMADE v1.2.0](https://www.optimade.org/specification/#entry-listing-info-endpoints) will include `type` and `id` as well.
145
+
146
+ ## Relevant links
147
+
148
+ - [Roadmap and meeting notes](https://docs.google.com/document/d/1cIpwuX6Ty5d3ZHKYWktQaBBQcI9fYmgG_hsD1P1UpO4/edit)
149
+ - [OPTIMADE serialization format notes](https://docs.google.com/document/d/1vf8_qxSRP5lCSb0P3M9gTr6nqkERxgOoSDno6YLcCjo/edit)
150
+ - [Flow diagram](https://excalidraw.com/#json=MBNl66sARCQekVrKZXDg8,K35f5FwmiS46vlsYGMJdrw)
151
+
152
+ ## Contributors
153
+
154
+ Initial prototype was created at the Paul Scherrer Institute, Switzerland in the week of
155
+ 12th-16th June 2023.
156
+
157
+ Authors (alphabetical):
158
+
159
+ - Kristjan Eimre
160
+ - Matthew Evans
161
+ - Giovanni Pizzi
162
+ - Gian-Marco Rignanese
163
+ - Jusong Yu
164
+ - Xing Wang
@@ -0,0 +1,126 @@
1
+ <div align="center" style="padding: 2em;">
2
+ <span style="padding: 1em">
3
+ <img height="70px" align="center" src="https://matsci.org/uploads/default/original/2X/b/bd2f59b3bf14fb046b74538750699d7da4c19ac1.svg">
4
+ </span>
5
+ </div>
6
+
7
+ # <div align="center">optimade-maker</div>
8
+
9
+ [![PyPI - Version](https://img.shields.io/pypi/v/optimade-maker?color=4CC61E)](https://pypi.org/project/optimade-maker/)
10
+
11
+ Tools for making [OPTIMADE APIs](https://optimade.org) from various formats of structural data (e.g. an archive of CIF files).
12
+
13
+ This repository contains the `src/optimade-maker` Python package and the corresponding CLI tool `optimake` that work towards this aim. Features include
14
+
15
+ - definition of a config file format (`optimade.yaml`) for annotating data archives to be used in the OPTIMADE ecosystem;
16
+ - conversion of the raw data into corresponding OPTIMADE types using pre-existing parsers (e.g., ASE for structures);
17
+ - conversion of the annotated data archive into an intermediate JSONLines file format that can be ingested into a database and used to serve a full OPTIMADE API.
18
+ - serving either an annotated data archive or a JSONLines file as an OPTIMADE API (using the [`optimade-python-tools`](https://github.com/Materials-Consortia/optimade-python-tools/)
19
+ reference server implementation).
20
+
21
+ ## Usage
22
+
23
+ See `./examples` for a more complete set of supported formats and corresponding `optimade.yaml` config files.
24
+
25
+ ### Annotating with `optimade.yaml`
26
+
27
+ To annotate your structural data for `optimade-maker`, the data archive needs to be accompanied by an `optimade.yaml` config file. The following is a simple example for a zip archive (`structures.zip`) of cif files together with an optional property file (`data.csv`):
28
+
29
+ ```yaml
30
+ config_version: 0.1.0
31
+ database_description: Simple database
32
+
33
+ entries:
34
+ - entry_type: structures
35
+ entry_paths:
36
+ - file: structures.zip
37
+ matches:
38
+ - cifs/*/*.cif
39
+ # (optional) property file and definitions:
40
+ property_paths:
41
+ - file: data.csv
42
+ property_definitions:
43
+ - name: energy
44
+ title: Total energy per atom
45
+ description: The total energy per atom as computed by DFT
46
+ unit: eV/atom
47
+ type: float
48
+ ```
49
+
50
+ ### Structure `id`s and property files
51
+
52
+ `optimade-maker` will assign an `id` for each structure based on its full path in the archive, following a simple deterministic rule: from the set of all archive paths, the maximum common path prefix and postfix (including file extensions) are removed. E.g.
53
+
54
+ ```
55
+ structures.zip/cifs/set1/101.cif
56
+ structures.zip/cifs/set2/102.cif
57
+ ```
58
+
59
+ produces `["set1/101", "set2/102"]`.
60
+
61
+ The property files need to either refer to these `id`s or the full path in the archive to be associated with a structure. E.g. a possible property `csv` file could be
62
+
63
+ ```csv
64
+ id,energy
65
+ set1/101,2.5
66
+ structures.zip/cifs/set2/102.cif,3.2
67
+ ```
68
+
69
+ ### Installing and running `optimake`
70
+
71
+ Install with
72
+
73
+ ```bash
74
+ pip install optimade-maker
75
+ ```
76
+
77
+ this will also make the `optimake` CLI utility available.
78
+
79
+ For a folder containing the data archive and the `optimade.yaml` file (such as in `/examples`), run
80
+
81
+ - `optimake convert .` to just convert the entry into the JSONL format (see below).
82
+ - `optimake serve .` to start the OPTIMADE API (this also first converts the entry, if needed);
83
+
84
+ For more detailed information see also `optimake --help`.
85
+
86
+ ## `optimade-maker` JSONLines Format
87
+
88
+ As described above, `optimade-maker` works via an intermediate JSONLines file representation of an OPTIMADE API (see also the [corresponding issue in the specification](https://github.com/Materials-Consortia/OPTIMADE/issues/471)).
89
+ This file should provide enough metadata to spin up an OPTIMADE API with many different entry types.
90
+ The format is as follows:
91
+
92
+ - First line must be a dictionary with the key `x-optimade`, containing a sub-dictionary of metadata (such as the OPTIMADE API version).
93
+ - Second line contains the `info/structures` endpoint.
94
+ - Third line contains the `info/references` endpoint, if present.
95
+ - Then each line contains an entry from the corresponding individual structure/reference endpoints.
96
+
97
+ ```json
98
+ {"x-optimade": {"meta": {"api_version": "1.1.0"}}}
99
+ {"type": "info", "id": "structures", "properties": {...}}
100
+ {"type": "info", "id": "references", "properties": {...}}
101
+ {"type": "structures", "id": "1234", "attributes": {...}}
102
+ {"type": "structures", "id": "1235", "attributes": {...}}
103
+ {"type": "references", "id": "sfdas", "attributes": {...}}
104
+ ```
105
+
106
+ NOTE: the `info/` endpoints in [OPTIMADE v1.2.0](https://www.optimade.org/specification/#entry-listing-info-endpoints) will include `type` and `id` as well.
107
+
108
+ ## Relevant links
109
+
110
+ - [Roadmap and meeting notes](https://docs.google.com/document/d/1cIpwuX6Ty5d3ZHKYWktQaBBQcI9fYmgG_hsD1P1UpO4/edit)
111
+ - [OPTIMADE serialization format notes](https://docs.google.com/document/d/1vf8_qxSRP5lCSb0P3M9gTr6nqkERxgOoSDno6YLcCjo/edit)
112
+ - [Flow diagram](https://excalidraw.com/#json=MBNl66sARCQekVrKZXDg8,K35f5FwmiS46vlsYGMJdrw)
113
+
114
+ ## Contributors
115
+
116
+ Initial prototype was created at the Paul Scherrer Institute, Switzerland in the week of
117
+ 12th-16th June 2023.
118
+
119
+ Authors (alphabetical):
120
+
121
+ - Kristjan Eimre
122
+ - Matthew Evans
123
+ - Giovanni Pizzi
124
+ - Gian-Marco Rignanese
125
+ - Jusong Yu
126
+ - Xing Wang
@@ -0,0 +1,58 @@
1
+ [project]
2
+ name = "optimade-maker"
3
+ description = "Tools for making OPTIMADE APIs from raw structural data."
4
+ readme = "README.md"
5
+ version = "0.3.0"
6
+ requires-python = ">=3.10"
7
+ license = { text = "MIT" }
8
+ keywords = ["optimade", "jsonapi", "materials"]
9
+
10
+ classifiers = [
11
+ "Development Status :: 4 - Beta",
12
+ "Programming Language :: Python :: 3 :: Only",
13
+ "Programming Language :: Python :: 3.10",
14
+ "Programming Language :: Python :: 3.11",
15
+ "Programming Language :: Python :: 3.12",
16
+ "Intended Audience :: Science/Research",
17
+ "Topic :: Database",
18
+ "Topic :: Scientific/Engineering",
19
+ ]
20
+
21
+ dependencies = [
22
+ "pydantic~=2.2",
23
+ "optimade[server,ase]~=1.1",
24
+ "pyyaml~=6.0",
25
+ "pymatgen>=2023.9",
26
+ "pandas~=2.1",
27
+ "pybtex~=0.24",
28
+ "tqdm~=4.65",
29
+ "requests~=2.31",
30
+ "numpy~=1.26",
31
+ "click~=8.1"
32
+ ]
33
+
34
+ [project.optional-dependencies]
35
+ tests = ["pytest~=7.4", "pytest-cov~=4.0"]
36
+ dev = ["black", "ruff", "pre-commit", "mypy", "isort", "types-all"]
37
+
38
+ [tool.ruff]
39
+ select = ["E", "F", "I", "W", "Q"]
40
+ ignore = ["E501", "E402"]
41
+ fixable = ["A", "B", "C", "D", "E", "F", "I"]
42
+ unfixable = []
43
+ target-version = "py311"
44
+ per-file-ignores = {}
45
+ # Allow unused variables when underscore-prefixed.
46
+ dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
47
+
48
+ [tool.mypy]
49
+ plugins = "pydantic.mypy"
50
+ ignore_missing_imports = true
51
+ follow_imports = "skip"
52
+
53
+ [tool.isort]
54
+ known_first_party = "optimade_maker"
55
+ profile = "black"
56
+
57
+ [project.scripts]
58
+ optimake = "optimade_maker.cli:cli"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,4 @@
1
+ from optimade_maker.config import Config
2
+ from optimade_maker.convert import convert_archive
3
+
4
+ __all__ = ("Config", "convert_archive")
@@ -0,0 +1,183 @@
1
+ import json
2
+ import os
3
+ from urllib.error import HTTPError, URLError
4
+
5
+ import requests
6
+
7
+ from optimade_maker.config import Config
8
+
9
+ DEFAULT_ARCHIVE_URL = "https://archive.materialscloud.org"
10
+
11
+
12
+ class ArchiveRecord:
13
+ """An class for Materials Cloud Archive record.
14
+ The class have the following methods:
15
+ 1. get the url of a record by its id
16
+ 2. get the metadata of a record by request the url
17
+ 3. check if the record has a config file called "optimade.yaml"
18
+ 4. if so, parse the config file, get the file list to be download.
19
+ 5. download the files in the file list
20
+ 6. convert the structure to OPTIMADE format (in another script)
21
+
22
+ Parameters:
23
+
24
+ id: int
25
+ id of the record. In MC archive, on the right panel,
26
+ "Export" --> "JSON", then find the "id" value.
27
+ archive_url: str
28
+ url of the archive.
29
+ dir: str
30
+ directory to save the downloaded files.
31
+ """
32
+
33
+ def __init__(self, id: int, archive_url: str = DEFAULT_ARCHIVE_URL) -> None:
34
+ self.id = id
35
+ self.archive_url = archive_url
36
+ self.url = self.get_record_url(id)
37
+
38
+ self.metadata = self.get_record_metadata()
39
+ self.doi_id = self.get_doi_id()
40
+ self.files_w_checksums = self.get_files_w_checksums()
41
+
42
+ self.default_path = os.path.join("/tmp/archive", self.doi_id)
43
+
44
+ self.optimade_config_name = self.check_optimade_config_name()
45
+
46
+ def check_optimade_config_name(self):
47
+ """
48
+ Check if optimade config file exists. If it doesn't, return None
49
+ """
50
+ optimade_yml_name = None
51
+ for name_candidate in ["optimade.yaml", "optimade.yml"]:
52
+ if name_candidate in self.files_w_checksums:
53
+ optimade_yml_name = name_candidate
54
+ break
55
+ return optimade_yml_name
56
+
57
+ def process(self):
58
+ if not self.is_optimade_record():
59
+ return
60
+ self.load_optimade_config()
61
+ self.download_files()
62
+ # self.convert_to_optimade()
63
+
64
+ def download_optimade_files(self, path=None):
65
+ if not self.is_optimade_record():
66
+ return
67
+ self.load_optimade_config()
68
+ self.download_files(path)
69
+
70
+ def get_record_url(self, record_id: int) -> str:
71
+ return self.archive_url + "api/records/" + str(record_id)
72
+
73
+ def get_file_url(self, filename: str) -> str:
74
+ # checksum = self.files_w_checksums[filename]
75
+ filename = filename.replace(" ", "+")
76
+ # original version, failing for
77
+ # https://staging-archive.materialscloud.org//record/file_stats?record_id=1412&checksum=md5:81b5fefab6bfa8e516d313b9cea39c66&filename=structures.zip
78
+ # url = (
79
+ # self.archive_url
80
+ # + f"/record/file_stats?record_id={record_id}&checksum={checksum}&filename={filename}"
81
+ # )
82
+ url = self.archive_url + f"/record/file?record_id={self.id}&filename={filename}"
83
+ return url
84
+
85
+ def get_record_metadata(self):
86
+ """
87
+ Get the metadata of a record by request the url.
88
+ """
89
+ try:
90
+ r = requests.get(self.url, allow_redirects=True, verify=False)
91
+ s = json.loads(r.content.decode("utf-8"))
92
+ return s["metadata"]
93
+ except HTTPError as e:
94
+ print("The server couldn't fulfill the request.")
95
+ print("Error code: ", e.code)
96
+ except URLError as e:
97
+ print("We failed to reach a server.")
98
+ print("Reason: ", e.reason)
99
+
100
+ def get_doi_id(self):
101
+ """
102
+ Get the DOI identifier of the record, e.g.
103
+ "10.24435/materialscloud:jq-0s" -> "jq-0s"
104
+ "10.24435/materialscloud:2020.0040/v1" -> "2020.0040/v1"
105
+
106
+ NOTE: the slash in the old format currently unsupported (e.g. can't make a folder,
107
+ or docker container), but these entries any way don't contain optimade.yml, so it
108
+ should be safe to ignore this for now.
109
+ """
110
+ return self.metadata["doi"].split(":")[-1]
111
+
112
+ def get_files_w_checksums(self):
113
+ """
114
+ Get the file list with checksums of a record.
115
+ """
116
+ files = {f["key"]: f["checksum"] for f in self.metadata["_files"]}
117
+ return files
118
+
119
+ def is_optimade_record(self):
120
+ """
121
+ return if the record has the optimade config file.
122
+ """
123
+ return self.optimade_config_name is not None
124
+
125
+ def download_optimade_config_file(self):
126
+ """
127
+ Try to download the optimade.yaml/yml file.
128
+ """
129
+ filename = self.optimade_config_name
130
+ url = self.get_file_url(filename)
131
+ response = requests.get(url, allow_redirects=True)
132
+ if not response.status_code == 200:
133
+ raise RuntimeError(f"Could not download {filename} file.")
134
+ return response
135
+
136
+ def load_optimade_config(self):
137
+ """
138
+ Download and parse the optimade.yaml/yml file.
139
+ """
140
+ response = self.download_optimade_config_file()
141
+ self.mc_config = Config.from_string(response.content.decode("utf-8"))
142
+
143
+ def download_files(self, path=None):
144
+ """
145
+ Download all files from the optimade file list.
146
+ """
147
+ import os
148
+ import shutil
149
+
150
+ from .utils import download_file
151
+
152
+ if not path:
153
+ path = self.default_path
154
+
155
+ # remove the directory if it exists
156
+ if os.path.exists(path) and os.path.isdir(path):
157
+ shutil.rmtree(path)
158
+ os.makedirs(path)
159
+
160
+ # download optimade.yml/yaml and rename to "yml->yaml"
161
+ file_url = self.get_file_url(self.optimade_config_name)
162
+ download_file(file_url, path, rename="optimade.yaml")
163
+
164
+ # download files in record
165
+ if hasattr(self.mc_config.entries, "jsonl_path"):
166
+ # case 1: jsonl file specified (either via `file: jsonl.gz` or `jsonl_path:`)
167
+ if hasattr(self.mc_config.entries, "file"):
168
+ # download `file:`, if specified
169
+ file_url = self.get_file_url(self.mc_config.entries.file)
170
+ download_file(file_url, path)
171
+ else:
172
+ # otherwise download the `jsonl_path:`
173
+ file_url = self.get_file_url(self.mc_config.entries.jsonl_path)
174
+ download_file(file_url, path)
175
+ else:
176
+ # case 2: files specified as entry_paths/property_paths
177
+ for entry in self.mc_config.entries:
178
+ list_of_files = [path.file for path in entry.entry_paths]
179
+ if hasattr(entry, "property_paths"):
180
+ list_of_files += [path.file for path in entry.property_paths]
181
+ for fname in list_of_files:
182
+ file_url = self.get_file_url(fname)
183
+ download_file(file_url, path)
@@ -0,0 +1,13 @@
1
+ import click
2
+
3
+ from .scan_records import scan_records
4
+
5
+
6
+ @click.group()
7
+ def cli():
8
+ pass
9
+
10
+
11
+ @cli.command()
12
+ def scan():
13
+ scan_records()
@@ -0,0 +1,36 @@
1
+ import tqdm
2
+
3
+ from optimade_maker.archive.archive_record import ArchiveRecord
4
+ from optimade_maker.archive.utils import get_all_records, get_parsed_records
5
+
6
+ DEFAULT_ARCHIVE_URL = "https://archive.materialscloud.org/"
7
+
8
+
9
+ def process_records(records: list, archive_url: str = DEFAULT_ARCHIVE_URL):
10
+ """
11
+ Scan the Materials Cloud Archive entries, read the file info
12
+ and check if there is a file called "optimade.y(ml|aml)".
13
+ If so, triger the conversion step.
14
+ """
15
+ # get the old records by looping through the optimade_id.json files in the folders
16
+ old_record_ids = get_parsed_records()
17
+ for record in tqdm.tqdm(records, desc="Processing records"):
18
+ record_id = record["id"]
19
+ if record_id in old_record_ids:
20
+ continue
21
+ record = ArchiveRecord(record_id, archive_url=archive_url)
22
+ if record.is_optimade_record():
23
+ print(f"Record {record_id} is a OPTIMADE record.")
24
+ record.process()
25
+
26
+
27
+ def scan_records(archive_url=DEFAULT_ARCHIVE_URL):
28
+ """This script can be run as a cron job to check for new optimade entries in the Materials Cloud Archive, and convert them to OPTIMADE format."""
29
+ print("Start scanning the Materials Cloud Archive for new OPTIMADE entries...")
30
+ records = get_all_records(archive_url)
31
+ process_records(records, archive_url)
32
+
33
+
34
+ if __name__ == "__main__":
35
+ url = "https://staging-archive.materialscloud.org/"
36
+ scan_records(url)