optimade-maker 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optimade_maker-0.3.0/LICENSE +21 -0
- optimade_maker-0.3.0/PKG-INFO +164 -0
- optimade_maker-0.3.0/README.md +126 -0
- optimade_maker-0.3.0/pyproject.toml +58 -0
- optimade_maker-0.3.0/setup.cfg +4 -0
- optimade_maker-0.3.0/src/optimade_maker/__init__.py +4 -0
- optimade_maker-0.3.0/src/optimade_maker/archive/__init__.py +0 -0
- optimade_maker-0.3.0/src/optimade_maker/archive/archive_record.py +183 -0
- optimade_maker-0.3.0/src/optimade_maker/archive/cli.py +13 -0
- optimade_maker-0.3.0/src/optimade_maker/archive/scan_records.py +36 -0
- optimade_maker-0.3.0/src/optimade_maker/archive/utils.py +84 -0
- optimade_maker-0.3.0/src/optimade_maker/cli.py +83 -0
- optimade_maker-0.3.0/src/optimade_maker/config.py +162 -0
- optimade_maker-0.3.0/src/optimade_maker/convert.py +576 -0
- optimade_maker-0.3.0/src/optimade_maker/logger.py +9 -0
- optimade_maker-0.3.0/src/optimade_maker/parsers.py +163 -0
- optimade_maker-0.3.0/src/optimade_maker/serve.py +135 -0
- optimade_maker-0.3.0/src/optimade_maker.egg-info/PKG-INFO +164 -0
- optimade_maker-0.3.0/src/optimade_maker.egg-info/SOURCES.txt +25 -0
- optimade_maker-0.3.0/src/optimade_maker.egg-info/dependency_links.txt +1 -0
- optimade_maker-0.3.0/src/optimade_maker.egg-info/entry_points.txt +2 -0
- optimade_maker-0.3.0/src/optimade_maker.egg-info/requires.txt +22 -0
- optimade_maker-0.3.0/src/optimade_maker.egg-info/top_level.txt +1 -0
- optimade_maker-0.3.0/tests/test_archive.py +36 -0
- optimade_maker-0.3.0/tests/test_convert.py +159 -0
- optimade_maker-0.3.0/tests/test_serve.py +74 -0
- optimade_maker-0.3.0/tests/test_yaml.py +11 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2023 Materials Cloud & Matthew Evans
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: optimade-maker
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Tools for making OPTIMADE APIs from raw structural data.
|
|
5
|
+
License: MIT
|
|
6
|
+
Keywords: optimade,jsonapi,materials
|
|
7
|
+
Classifier: Development Status :: 4 - Beta
|
|
8
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: Topic :: Database
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering
|
|
15
|
+
Requires-Python: >=3.10
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Requires-Dist: pydantic~=2.2
|
|
19
|
+
Requires-Dist: optimade[ase,server]~=1.1
|
|
20
|
+
Requires-Dist: pyyaml~=6.0
|
|
21
|
+
Requires-Dist: pymatgen>=2023.9
|
|
22
|
+
Requires-Dist: pandas~=2.1
|
|
23
|
+
Requires-Dist: pybtex~=0.24
|
|
24
|
+
Requires-Dist: tqdm~=4.65
|
|
25
|
+
Requires-Dist: requests~=2.31
|
|
26
|
+
Requires-Dist: numpy~=1.26
|
|
27
|
+
Requires-Dist: click~=8.1
|
|
28
|
+
Provides-Extra: tests
|
|
29
|
+
Requires-Dist: pytest~=7.4; extra == "tests"
|
|
30
|
+
Requires-Dist: pytest-cov~=4.0; extra == "tests"
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: black; extra == "dev"
|
|
33
|
+
Requires-Dist: ruff; extra == "dev"
|
|
34
|
+
Requires-Dist: pre-commit; extra == "dev"
|
|
35
|
+
Requires-Dist: mypy; extra == "dev"
|
|
36
|
+
Requires-Dist: isort; extra == "dev"
|
|
37
|
+
Requires-Dist: types-all; extra == "dev"
|
|
38
|
+
|
|
39
|
+
<div align="center" style="padding: 2em;">
|
|
40
|
+
<span style="padding: 1em">
|
|
41
|
+
<img height="70px" align="center" src="https://matsci.org/uploads/default/original/2X/b/bd2f59b3bf14fb046b74538750699d7da4c19ac1.svg">
|
|
42
|
+
</span>
|
|
43
|
+
</div>
|
|
44
|
+
|
|
45
|
+
# <div align="center">optimade-maker</div>
|
|
46
|
+
|
|
47
|
+
[](https://pypi.org/project/optimade-maker/)
|
|
48
|
+
|
|
49
|
+
Tools for making [OPTIMADE APIs](https://optimade.org) from various formats of structural data (e.g. an archive of CIF files).
|
|
50
|
+
|
|
51
|
+
This repository contains the `src/optimade-maker` Python package and the corresponding CLI tool `optimake` that work towards this aim. Features include
|
|
52
|
+
|
|
53
|
+
- definition of a config file format (`optimade.yaml`) for annotating data archives to be used in the OPTIMADE ecosystem;
|
|
54
|
+
- conversion of the raw data into corresponding OPTIMADE types using pre-existing parsers (e.g., ASE for structures);
|
|
55
|
+
- conversion of the annotated data archive into an intermediate JSONLines file format that can be ingested into a database and used to serve a full OPTIMADE API.
|
|
56
|
+
- serving either an annotated data archive or a JSONLines file as an OPTIMADE API (using the [`optimade-python-tools`](https://github.com/Materials-Consortia/optimade-python-tools/)
|
|
57
|
+
reference server implementation).
|
|
58
|
+
|
|
59
|
+
## Usage
|
|
60
|
+
|
|
61
|
+
See `./examples` for a more complete set of supported formats and corresponding `optimade.yaml` config files.
|
|
62
|
+
|
|
63
|
+
### Annotating with `optimade.yaml`
|
|
64
|
+
|
|
65
|
+
To annotate your structural data for `optimade-maker`, the data archive needs to be accompanied by an `optimade.yaml` config file. The following is a simple example for a zip archive (`structures.zip`) of cif files together with an optional property file (`data.csv`):
|
|
66
|
+
|
|
67
|
+
```yaml
|
|
68
|
+
config_version: 0.1.0
|
|
69
|
+
database_description: Simple database
|
|
70
|
+
|
|
71
|
+
entries:
|
|
72
|
+
- entry_type: structures
|
|
73
|
+
entry_paths:
|
|
74
|
+
- file: structures.zip
|
|
75
|
+
matches:
|
|
76
|
+
- cifs/*/*.cif
|
|
77
|
+
# (optional) property file and definitions:
|
|
78
|
+
property_paths:
|
|
79
|
+
- file: data.csv
|
|
80
|
+
property_definitions:
|
|
81
|
+
- name: energy
|
|
82
|
+
title: Total energy per atom
|
|
83
|
+
description: The total energy per atom as computed by DFT
|
|
84
|
+
unit: eV/atom
|
|
85
|
+
type: float
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Structure `id`s and property files
|
|
89
|
+
|
|
90
|
+
`optimade-maker` will assign an `id` for each structure based on its full path in the archive, following a simple deterministic rule: from the set of all archive paths, the maximum common path prefix and postfix (including file extensions) are removed. E.g.
|
|
91
|
+
|
|
92
|
+
```
|
|
93
|
+
structures.zip/cifs/set1/101.cif
|
|
94
|
+
structures.zip/cifs/set2/102.cif
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
produces `["set1/101", "set2/102"]`.
|
|
98
|
+
|
|
99
|
+
The property files need to either refer to these `id`s or the full path in the archive to be associated with a structure. E.g. a possible property `csv` file could be
|
|
100
|
+
|
|
101
|
+
```csv
|
|
102
|
+
id,energy
|
|
103
|
+
set1/101,2.5
|
|
104
|
+
structures.zip/cifs/set2/102.cif,3.2
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Installing and running `optimake`
|
|
108
|
+
|
|
109
|
+
Install with
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
pip install optimade-maker
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
this will also make the `optimake` CLI utility available.
|
|
116
|
+
|
|
117
|
+
For a folder containing the data archive and the `optimade.yaml` file (such as in `/examples`), run
|
|
118
|
+
|
|
119
|
+
- `optimake convert .` to just convert the entry into the JSONL format (see below).
|
|
120
|
+
- `optimake serve .` to start the OPTIMADE API (this also first converts the entry, if needed);
|
|
121
|
+
|
|
122
|
+
For more detailed information see also `optimake --help`.
|
|
123
|
+
|
|
124
|
+
## `optimade-maker` JSONLines Format
|
|
125
|
+
|
|
126
|
+
As described above, `optimade-maker` works via an intermediate JSONLines file representation of an OPTIMADE API (see also the [corresponding issue in the specification](https://github.com/Materials-Consortia/OPTIMADE/issues/471)).
|
|
127
|
+
This file should provide enough metadata to spin up an OPTIMADE API with many different entry types.
|
|
128
|
+
The format is as follows:
|
|
129
|
+
|
|
130
|
+
- First line must be a dictionary with the key `x-optimade`, containing a sub-dictionary of metadata (such as the OPTIMADE API version).
|
|
131
|
+
- Second line contains the `info/structures` endpoint.
|
|
132
|
+
- Third line contains the `info/references` endpoint, if present.
|
|
133
|
+
- Then each line contains an entry from the corresponding individual structure/reference endpoints.
|
|
134
|
+
|
|
135
|
+
```json
|
|
136
|
+
{"x-optimade": {"meta": {"api_version": "1.1.0"}}}
|
|
137
|
+
{"type": "info", "id": "structures", "properties": {...}}
|
|
138
|
+
{"type": "info", "id": "references", "properties": {...}}
|
|
139
|
+
{"type": "structures", "id": "1234", "attributes": {...}}
|
|
140
|
+
{"type": "structures", "id": "1235", "attributes": {...}}
|
|
141
|
+
{"type": "references", "id": "sfdas", "attributes": {...}}
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
NOTE: the `info/` endpoints in [OPTIMADE v1.2.0](https://www.optimade.org/specification/#entry-listing-info-endpoints) will include `type` and `id` as well.
|
|
145
|
+
|
|
146
|
+
## Relevant links
|
|
147
|
+
|
|
148
|
+
- [Roadmap and meeting notes](https://docs.google.com/document/d/1cIpwuX6Ty5d3ZHKYWktQaBBQcI9fYmgG_hsD1P1UpO4/edit)
|
|
149
|
+
- [OPTIMADE serialization format notes](https://docs.google.com/document/d/1vf8_qxSRP5lCSb0P3M9gTr6nqkERxgOoSDno6YLcCjo/edit)
|
|
150
|
+
- [Flow diagram](https://excalidraw.com/#json=MBNl66sARCQekVrKZXDg8,K35f5FwmiS46vlsYGMJdrw)
|
|
151
|
+
|
|
152
|
+
## Contributors
|
|
153
|
+
|
|
154
|
+
Initial prototype was created at the Paul Scherrer Institute, Switzerland in the week of
|
|
155
|
+
12th-16th June 2023.
|
|
156
|
+
|
|
157
|
+
Authors (alphabetical):
|
|
158
|
+
|
|
159
|
+
- Kristjan Eimre
|
|
160
|
+
- Matthew Evans
|
|
161
|
+
- Giovanni Pizzi
|
|
162
|
+
- Gian-Marco Rignanese
|
|
163
|
+
- Jusong Yu
|
|
164
|
+
- Xing Wang
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
<div align="center" style="padding: 2em;">
|
|
2
|
+
<span style="padding: 1em">
|
|
3
|
+
<img height="70px" align="center" src="https://matsci.org/uploads/default/original/2X/b/bd2f59b3bf14fb046b74538750699d7da4c19ac1.svg">
|
|
4
|
+
</span>
|
|
5
|
+
</div>
|
|
6
|
+
|
|
7
|
+
# <div align="center">optimade-maker</div>
|
|
8
|
+
|
|
9
|
+
[](https://pypi.org/project/optimade-maker/)
|
|
10
|
+
|
|
11
|
+
Tools for making [OPTIMADE APIs](https://optimade.org) from various formats of structural data (e.g. an archive of CIF files).
|
|
12
|
+
|
|
13
|
+
This repository contains the `src/optimade-maker` Python package and the corresponding CLI tool `optimake` that work towards this aim. Features include
|
|
14
|
+
|
|
15
|
+
- definition of a config file format (`optimade.yaml`) for annotating data archives to be used in the OPTIMADE ecosystem;
|
|
16
|
+
- conversion of the raw data into corresponding OPTIMADE types using pre-existing parsers (e.g., ASE for structures);
|
|
17
|
+
- conversion of the annotated data archive into an intermediate JSONLines file format that can be ingested into a database and used to serve a full OPTIMADE API.
|
|
18
|
+
- serving either an annotated data archive or a JSONLines file as an OPTIMADE API (using the [`optimade-python-tools`](https://github.com/Materials-Consortia/optimade-python-tools/)
|
|
19
|
+
reference server implementation).
|
|
20
|
+
|
|
21
|
+
## Usage
|
|
22
|
+
|
|
23
|
+
See `./examples` for a more complete set of supported formats and corresponding `optimade.yaml` config files.
|
|
24
|
+
|
|
25
|
+
### Annotating with `optimade.yaml`
|
|
26
|
+
|
|
27
|
+
To annotate your structural data for `optimade-maker`, the data archive needs to be accompanied by an `optimade.yaml` config file. The following is a simple example for a zip archive (`structures.zip`) of cif files together with an optional property file (`data.csv`):
|
|
28
|
+
|
|
29
|
+
```yaml
|
|
30
|
+
config_version: 0.1.0
|
|
31
|
+
database_description: Simple database
|
|
32
|
+
|
|
33
|
+
entries:
|
|
34
|
+
- entry_type: structures
|
|
35
|
+
entry_paths:
|
|
36
|
+
- file: structures.zip
|
|
37
|
+
matches:
|
|
38
|
+
- cifs/*/*.cif
|
|
39
|
+
# (optional) property file and definitions:
|
|
40
|
+
property_paths:
|
|
41
|
+
- file: data.csv
|
|
42
|
+
property_definitions:
|
|
43
|
+
- name: energy
|
|
44
|
+
title: Total energy per atom
|
|
45
|
+
description: The total energy per atom as computed by DFT
|
|
46
|
+
unit: eV/atom
|
|
47
|
+
type: float
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Structure `id`s and property files
|
|
51
|
+
|
|
52
|
+
`optimade-maker` will assign an `id` for each structure based on its full path in the archive, following a simple deterministic rule: from the set of all archive paths, the maximum common path prefix and postfix (including file extensions) are removed. E.g.
|
|
53
|
+
|
|
54
|
+
```
|
|
55
|
+
structures.zip/cifs/set1/101.cif
|
|
56
|
+
structures.zip/cifs/set2/102.cif
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
produces `["set1/101", "set2/102"]`.
|
|
60
|
+
|
|
61
|
+
The property files need to either refer to these `id`s or the full path in the archive to be associated with a structure. E.g. a possible property `csv` file could be
|
|
62
|
+
|
|
63
|
+
```csv
|
|
64
|
+
id,energy
|
|
65
|
+
set1/101,2.5
|
|
66
|
+
structures.zip/cifs/set2/102.cif,3.2
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Installing and running `optimake`
|
|
70
|
+
|
|
71
|
+
Install with
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
pip install optimade-maker
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
this will also make the `optimake` CLI utility available.
|
|
78
|
+
|
|
79
|
+
For a folder containing the data archive and the `optimade.yaml` file (such as in `/examples`), run
|
|
80
|
+
|
|
81
|
+
- `optimake convert .` to just convert the entry into the JSONL format (see below).
|
|
82
|
+
- `optimake serve .` to start the OPTIMADE API (this also first converts the entry, if needed);
|
|
83
|
+
|
|
84
|
+
For more detailed information see also `optimake --help`.
|
|
85
|
+
|
|
86
|
+
## `optimade-maker` JSONLines Format
|
|
87
|
+
|
|
88
|
+
As described above, `optimade-maker` works via an intermediate JSONLines file representation of an OPTIMADE API (see also the [corresponding issue in the specification](https://github.com/Materials-Consortia/OPTIMADE/issues/471)).
|
|
89
|
+
This file should provide enough metadata to spin up an OPTIMADE API with many different entry types.
|
|
90
|
+
The format is as follows:
|
|
91
|
+
|
|
92
|
+
- First line must be a dictionary with the key `x-optimade`, containing a sub-dictionary of metadata (such as the OPTIMADE API version).
|
|
93
|
+
- Second line contains the `info/structures` endpoint.
|
|
94
|
+
- Third line contains the `info/references` endpoint, if present.
|
|
95
|
+
- Then each line contains an entry from the corresponding individual structure/reference endpoints.
|
|
96
|
+
|
|
97
|
+
```json
|
|
98
|
+
{"x-optimade": {"meta": {"api_version": "1.1.0"}}}
|
|
99
|
+
{"type": "info", "id": "structures", "properties": {...}}
|
|
100
|
+
{"type": "info", "id": "references", "properties": {...}}
|
|
101
|
+
{"type": "structures", "id": "1234", "attributes": {...}}
|
|
102
|
+
{"type": "structures", "id": "1235", "attributes": {...}}
|
|
103
|
+
{"type": "references", "id": "sfdas", "attributes": {...}}
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
NOTE: the `info/` endpoints in [OPTIMADE v1.2.0](https://www.optimade.org/specification/#entry-listing-info-endpoints) will include `type` and `id` as well.
|
|
107
|
+
|
|
108
|
+
## Relevant links
|
|
109
|
+
|
|
110
|
+
- [Roadmap and meeting notes](https://docs.google.com/document/d/1cIpwuX6Ty5d3ZHKYWktQaBBQcI9fYmgG_hsD1P1UpO4/edit)
|
|
111
|
+
- [OPTIMADE serialization format notes](https://docs.google.com/document/d/1vf8_qxSRP5lCSb0P3M9gTr6nqkERxgOoSDno6YLcCjo/edit)
|
|
112
|
+
- [Flow diagram](https://excalidraw.com/#json=MBNl66sARCQekVrKZXDg8,K35f5FwmiS46vlsYGMJdrw)
|
|
113
|
+
|
|
114
|
+
## Contributors
|
|
115
|
+
|
|
116
|
+
Initial prototype was created at the Paul Scherrer Institute, Switzerland in the week of
|
|
117
|
+
12th-16th June 2023.
|
|
118
|
+
|
|
119
|
+
Authors (alphabetical):
|
|
120
|
+
|
|
121
|
+
- Kristjan Eimre
|
|
122
|
+
- Matthew Evans
|
|
123
|
+
- Giovanni Pizzi
|
|
124
|
+
- Gian-Marco Rignanese
|
|
125
|
+
- Jusong Yu
|
|
126
|
+
- Xing Wang
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "optimade-maker"
|
|
3
|
+
description = "Tools for making OPTIMADE APIs from raw structural data."
|
|
4
|
+
readme = "README.md"
|
|
5
|
+
version = "0.3.0"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
license = { text = "MIT" }
|
|
8
|
+
keywords = ["optimade", "jsonapi", "materials"]
|
|
9
|
+
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Development Status :: 4 - Beta",
|
|
12
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
13
|
+
"Programming Language :: Python :: 3.10",
|
|
14
|
+
"Programming Language :: Python :: 3.11",
|
|
15
|
+
"Programming Language :: Python :: 3.12",
|
|
16
|
+
"Intended Audience :: Science/Research",
|
|
17
|
+
"Topic :: Database",
|
|
18
|
+
"Topic :: Scientific/Engineering",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
dependencies = [
|
|
22
|
+
"pydantic~=2.2",
|
|
23
|
+
"optimade[server,ase]~=1.1",
|
|
24
|
+
"pyyaml~=6.0",
|
|
25
|
+
"pymatgen>=2023.9",
|
|
26
|
+
"pandas~=2.1",
|
|
27
|
+
"pybtex~=0.24",
|
|
28
|
+
"tqdm~=4.65",
|
|
29
|
+
"requests~=2.31",
|
|
30
|
+
"numpy~=1.26",
|
|
31
|
+
"click~=8.1"
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
[project.optional-dependencies]
|
|
35
|
+
tests = ["pytest~=7.4", "pytest-cov~=4.0"]
|
|
36
|
+
dev = ["black", "ruff", "pre-commit", "mypy", "isort", "types-all"]
|
|
37
|
+
|
|
38
|
+
[tool.ruff]
|
|
39
|
+
select = ["E", "F", "I", "W", "Q"]
|
|
40
|
+
ignore = ["E501", "E402"]
|
|
41
|
+
fixable = ["A", "B", "C", "D", "E", "F", "I"]
|
|
42
|
+
unfixable = []
|
|
43
|
+
target-version = "py311"
|
|
44
|
+
per-file-ignores = {}
|
|
45
|
+
# Allow unused variables when underscore-prefixed.
|
|
46
|
+
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
|
|
47
|
+
|
|
48
|
+
[tool.mypy]
|
|
49
|
+
plugins = "pydantic.mypy"
|
|
50
|
+
ignore_missing_imports = true
|
|
51
|
+
follow_imports = "skip"
|
|
52
|
+
|
|
53
|
+
[tool.isort]
|
|
54
|
+
known_first_party = "optimade_maker"
|
|
55
|
+
profile = "black"
|
|
56
|
+
|
|
57
|
+
[project.scripts]
|
|
58
|
+
optimake = "optimade_maker.cli:cli"
|
|
File without changes
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from urllib.error import HTTPError, URLError
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
|
|
7
|
+
from optimade_maker.config import Config
|
|
8
|
+
|
|
9
|
+
DEFAULT_ARCHIVE_URL = "https://archive.materialscloud.org"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ArchiveRecord:
|
|
13
|
+
"""An class for Materials Cloud Archive record.
|
|
14
|
+
The class have the following methods:
|
|
15
|
+
1. get the url of a record by its id
|
|
16
|
+
2. get the metadata of a record by request the url
|
|
17
|
+
3. check if the record has a config file called "optimade.yaml"
|
|
18
|
+
4. if so, parse the config file, get the file list to be download.
|
|
19
|
+
5. download the files in the file list
|
|
20
|
+
6. convert the structure to OPTIMADE format (in another script)
|
|
21
|
+
|
|
22
|
+
Parameters:
|
|
23
|
+
|
|
24
|
+
id: int
|
|
25
|
+
id of the record. In MC archive, on the right panel,
|
|
26
|
+
"Export" --> "JSON", then find the "id" value.
|
|
27
|
+
archive_url: str
|
|
28
|
+
url of the archive.
|
|
29
|
+
dir: str
|
|
30
|
+
directory to save the downloaded files.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(self, id: int, archive_url: str = DEFAULT_ARCHIVE_URL) -> None:
|
|
34
|
+
self.id = id
|
|
35
|
+
self.archive_url = archive_url
|
|
36
|
+
self.url = self.get_record_url(id)
|
|
37
|
+
|
|
38
|
+
self.metadata = self.get_record_metadata()
|
|
39
|
+
self.doi_id = self.get_doi_id()
|
|
40
|
+
self.files_w_checksums = self.get_files_w_checksums()
|
|
41
|
+
|
|
42
|
+
self.default_path = os.path.join("/tmp/archive", self.doi_id)
|
|
43
|
+
|
|
44
|
+
self.optimade_config_name = self.check_optimade_config_name()
|
|
45
|
+
|
|
46
|
+
def check_optimade_config_name(self):
|
|
47
|
+
"""
|
|
48
|
+
Check if optimade config file exists. If it doesn't, return None
|
|
49
|
+
"""
|
|
50
|
+
optimade_yml_name = None
|
|
51
|
+
for name_candidate in ["optimade.yaml", "optimade.yml"]:
|
|
52
|
+
if name_candidate in self.files_w_checksums:
|
|
53
|
+
optimade_yml_name = name_candidate
|
|
54
|
+
break
|
|
55
|
+
return optimade_yml_name
|
|
56
|
+
|
|
57
|
+
def process(self):
|
|
58
|
+
if not self.is_optimade_record():
|
|
59
|
+
return
|
|
60
|
+
self.load_optimade_config()
|
|
61
|
+
self.download_files()
|
|
62
|
+
# self.convert_to_optimade()
|
|
63
|
+
|
|
64
|
+
def download_optimade_files(self, path=None):
|
|
65
|
+
if not self.is_optimade_record():
|
|
66
|
+
return
|
|
67
|
+
self.load_optimade_config()
|
|
68
|
+
self.download_files(path)
|
|
69
|
+
|
|
70
|
+
def get_record_url(self, record_id: int) -> str:
|
|
71
|
+
return self.archive_url + "api/records/" + str(record_id)
|
|
72
|
+
|
|
73
|
+
def get_file_url(self, filename: str) -> str:
|
|
74
|
+
# checksum = self.files_w_checksums[filename]
|
|
75
|
+
filename = filename.replace(" ", "+")
|
|
76
|
+
# original version, failing for
|
|
77
|
+
# https://staging-archive.materialscloud.org//record/file_stats?record_id=1412&checksum=md5:81b5fefab6bfa8e516d313b9cea39c66&filename=structures.zip
|
|
78
|
+
# url = (
|
|
79
|
+
# self.archive_url
|
|
80
|
+
# + f"/record/file_stats?record_id={record_id}&checksum={checksum}&filename={filename}"
|
|
81
|
+
# )
|
|
82
|
+
url = self.archive_url + f"/record/file?record_id={self.id}&filename={filename}"
|
|
83
|
+
return url
|
|
84
|
+
|
|
85
|
+
def get_record_metadata(self):
|
|
86
|
+
"""
|
|
87
|
+
Get the metadata of a record by request the url.
|
|
88
|
+
"""
|
|
89
|
+
try:
|
|
90
|
+
r = requests.get(self.url, allow_redirects=True, verify=False)
|
|
91
|
+
s = json.loads(r.content.decode("utf-8"))
|
|
92
|
+
return s["metadata"]
|
|
93
|
+
except HTTPError as e:
|
|
94
|
+
print("The server couldn't fulfill the request.")
|
|
95
|
+
print("Error code: ", e.code)
|
|
96
|
+
except URLError as e:
|
|
97
|
+
print("We failed to reach a server.")
|
|
98
|
+
print("Reason: ", e.reason)
|
|
99
|
+
|
|
100
|
+
def get_doi_id(self):
|
|
101
|
+
"""
|
|
102
|
+
Get the DOI identifier of the record, e.g.
|
|
103
|
+
"10.24435/materialscloud:jq-0s" -> "jq-0s"
|
|
104
|
+
"10.24435/materialscloud:2020.0040/v1" -> "2020.0040/v1"
|
|
105
|
+
|
|
106
|
+
NOTE: the slash in the old format currently unsupported (e.g. can't make a folder,
|
|
107
|
+
or docker container), but these entries any way don't contain optimade.yml, so it
|
|
108
|
+
should be safe to ignore this for now.
|
|
109
|
+
"""
|
|
110
|
+
return self.metadata["doi"].split(":")[-1]
|
|
111
|
+
|
|
112
|
+
def get_files_w_checksums(self):
|
|
113
|
+
"""
|
|
114
|
+
Get the file list with checksums of a record.
|
|
115
|
+
"""
|
|
116
|
+
files = {f["key"]: f["checksum"] for f in self.metadata["_files"]}
|
|
117
|
+
return files
|
|
118
|
+
|
|
119
|
+
def is_optimade_record(self):
|
|
120
|
+
"""
|
|
121
|
+
return if the record has the optimade config file.
|
|
122
|
+
"""
|
|
123
|
+
return self.optimade_config_name is not None
|
|
124
|
+
|
|
125
|
+
def download_optimade_config_file(self):
|
|
126
|
+
"""
|
|
127
|
+
Try to download the optimade.yaml/yml file.
|
|
128
|
+
"""
|
|
129
|
+
filename = self.optimade_config_name
|
|
130
|
+
url = self.get_file_url(filename)
|
|
131
|
+
response = requests.get(url, allow_redirects=True)
|
|
132
|
+
if not response.status_code == 200:
|
|
133
|
+
raise RuntimeError(f"Could not download {filename} file.")
|
|
134
|
+
return response
|
|
135
|
+
|
|
136
|
+
def load_optimade_config(self):
|
|
137
|
+
"""
|
|
138
|
+
Download and parse the optimade.yaml/yml file.
|
|
139
|
+
"""
|
|
140
|
+
response = self.download_optimade_config_file()
|
|
141
|
+
self.mc_config = Config.from_string(response.content.decode("utf-8"))
|
|
142
|
+
|
|
143
|
+
def download_files(self, path=None):
|
|
144
|
+
"""
|
|
145
|
+
Download all files from the optimade file list.
|
|
146
|
+
"""
|
|
147
|
+
import os
|
|
148
|
+
import shutil
|
|
149
|
+
|
|
150
|
+
from .utils import download_file
|
|
151
|
+
|
|
152
|
+
if not path:
|
|
153
|
+
path = self.default_path
|
|
154
|
+
|
|
155
|
+
# remove the directory if it exists
|
|
156
|
+
if os.path.exists(path) and os.path.isdir(path):
|
|
157
|
+
shutil.rmtree(path)
|
|
158
|
+
os.makedirs(path)
|
|
159
|
+
|
|
160
|
+
# download optimade.yml/yaml and rename to "yml->yaml"
|
|
161
|
+
file_url = self.get_file_url(self.optimade_config_name)
|
|
162
|
+
download_file(file_url, path, rename="optimade.yaml")
|
|
163
|
+
|
|
164
|
+
# download files in record
|
|
165
|
+
if hasattr(self.mc_config.entries, "jsonl_path"):
|
|
166
|
+
# case 1: jsonl file specified (either via `file: jsonl.gz` or `jsonl_path:`)
|
|
167
|
+
if hasattr(self.mc_config.entries, "file"):
|
|
168
|
+
# download `file:`, if specified
|
|
169
|
+
file_url = self.get_file_url(self.mc_config.entries.file)
|
|
170
|
+
download_file(file_url, path)
|
|
171
|
+
else:
|
|
172
|
+
# otherwise download the `jsonl_path:`
|
|
173
|
+
file_url = self.get_file_url(self.mc_config.entries.jsonl_path)
|
|
174
|
+
download_file(file_url, path)
|
|
175
|
+
else:
|
|
176
|
+
# case 2: files specified as entry_paths/property_paths
|
|
177
|
+
for entry in self.mc_config.entries:
|
|
178
|
+
list_of_files = [path.file for path in entry.entry_paths]
|
|
179
|
+
if hasattr(entry, "property_paths"):
|
|
180
|
+
list_of_files += [path.file for path in entry.property_paths]
|
|
181
|
+
for fname in list_of_files:
|
|
182
|
+
file_url = self.get_file_url(fname)
|
|
183
|
+
download_file(file_url, path)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import tqdm
|
|
2
|
+
|
|
3
|
+
from optimade_maker.archive.archive_record import ArchiveRecord
|
|
4
|
+
from optimade_maker.archive.utils import get_all_records, get_parsed_records
|
|
5
|
+
|
|
6
|
+
DEFAULT_ARCHIVE_URL = "https://archive.materialscloud.org/"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def process_records(records: list, archive_url: str = DEFAULT_ARCHIVE_URL):
|
|
10
|
+
"""
|
|
11
|
+
Scan the Materials Cloud Archive entries, read the file info
|
|
12
|
+
and check if there is a file called "optimade.y(ml|aml)".
|
|
13
|
+
If so, triger the conversion step.
|
|
14
|
+
"""
|
|
15
|
+
# get the old records by looping through the optimade_id.json files in the folders
|
|
16
|
+
old_record_ids = get_parsed_records()
|
|
17
|
+
for record in tqdm.tqdm(records, desc="Processing records"):
|
|
18
|
+
record_id = record["id"]
|
|
19
|
+
if record_id in old_record_ids:
|
|
20
|
+
continue
|
|
21
|
+
record = ArchiveRecord(record_id, archive_url=archive_url)
|
|
22
|
+
if record.is_optimade_record():
|
|
23
|
+
print(f"Record {record_id} is a OPTIMADE record.")
|
|
24
|
+
record.process()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def scan_records(archive_url=DEFAULT_ARCHIVE_URL):
|
|
28
|
+
"""This script can be run as a cron job to check for new optimade entries in the Materials Cloud Archive, and convert them to OPTIMADE format."""
|
|
29
|
+
print("Start scanning the Materials Cloud Archive for new OPTIMADE entries...")
|
|
30
|
+
records = get_all_records(archive_url)
|
|
31
|
+
process_records(records, archive_url)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
if __name__ == "__main__":
|
|
35
|
+
url = "https://staging-archive.materialscloud.org/"
|
|
36
|
+
scan_records(url)
|