aiondemand 0.1.0b0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiondemand-0.1.0b0/LICENSE +22 -0
- aiondemand-0.1.0b0/PKG-INFO +82 -0
- aiondemand-0.1.0b0/docs/README.md +58 -0
- aiondemand-0.1.0b0/pyproject.toml +36 -0
- aiondemand-0.1.0b0/setup.cfg +4 -0
- aiondemand-0.1.0b0/src/aiod/__init__.py +19 -0
- aiondemand-0.1.0b0/src/aiod/__version__.py +1 -0
- aiondemand-0.1.0b0/src/aiod/authentication/__init__.py +1 -0
- aiondemand-0.1.0b0/src/aiod/authentication/authentication.py +118 -0
- aiondemand-0.1.0b0/src/aiod/calls/__init__.py +0 -0
- aiondemand-0.1.0b0/src/aiod/calls/calls.py +299 -0
- aiondemand-0.1.0b0/src/aiod/calls/urls.py +95 -0
- aiondemand-0.1.0b0/src/aiod/calls/utils.py +53 -0
- aiondemand-0.1.0b0/src/aiod/configuration/__init__.py +1 -0
- aiondemand-0.1.0b0/src/aiod/configuration/_config.py +40 -0
- aiondemand-0.1.0b0/src/aiod/default/counts.py +38 -0
- aiondemand-0.1.0b0/src/aiod/resources/__init__.py +0 -0
- aiondemand-0.1.0b0/src/aiod/resources/case_studies.py +11 -0
- aiondemand-0.1.0b0/src/aiod/resources/computational_assets.py +11 -0
- aiondemand-0.1.0b0/src/aiod/resources/contacts.py +11 -0
- aiondemand-0.1.0b0/src/aiod/resources/datasets.py +13 -0
- aiondemand-0.1.0b0/src/aiod/resources/educational_resources.py +11 -0
- aiondemand-0.1.0b0/src/aiod/resources/events.py +13 -0
- aiondemand-0.1.0b0/src/aiod/resources/experiments.py +13 -0
- aiondemand-0.1.0b0/src/aiod/resources/ml_models.py +13 -0
- aiondemand-0.1.0b0/src/aiod/resources/news.py +13 -0
- aiondemand-0.1.0b0/src/aiod/resources/organisations.py +13 -0
- aiondemand-0.1.0b0/src/aiod/resources/persons.py +11 -0
- aiondemand-0.1.0b0/src/aiod/resources/platforms.py +11 -0
- aiondemand-0.1.0b0/src/aiod/resources/projects.py +13 -0
- aiondemand-0.1.0b0/src/aiod/resources/publications.py +13 -0
- aiondemand-0.1.0b0/src/aiod/resources/services.py +13 -0
- aiondemand-0.1.0b0/src/aiod/resources/teams.py +11 -0
- aiondemand-0.1.0b0/src/aiondemand.egg-info/PKG-INFO +82 -0
- aiondemand-0.1.0b0/src/aiondemand.egg-info/SOURCES.txt +40 -0
- aiondemand-0.1.0b0/src/aiondemand.egg-info/dependency_links.txt +1 -0
- aiondemand-0.1.0b0/src/aiondemand.egg-info/entry_points.txt +2 -0
- aiondemand-0.1.0b0/src/aiondemand.egg-info/requires.txt +17 -0
- aiondemand-0.1.0b0/src/aiondemand.egg-info/top_level.txt +1 -0
- aiondemand-0.1.0b0/tests/test_authentication.py +60 -0
- aiondemand-0.1.0b0/tests/test_counts.py +24 -0
- aiondemand-0.1.0b0/tests/test_endpoints.py +259 -0
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2023, Jean Matias
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
22
|
+
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: aiondemand
|
|
3
|
+
Version: 0.1.0b0
|
|
4
|
+
Summary: Python SDK for the AIoD metadata catalogue
|
|
5
|
+
Author-email: Jean Matias <smatias.jean@gmail.com>, Pieter Gijsbers <p.gijsbers@tue.nl>
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Dist: aiohttp
|
|
10
|
+
Requires-Dist: pandas
|
|
11
|
+
Requires-Dist: python-keycloak
|
|
12
|
+
Requires-Dist: requests
|
|
13
|
+
Provides-Extra: dev
|
|
14
|
+
Requires-Dist: pre-commit; extra == "dev"
|
|
15
|
+
Requires-Dist: responses; extra == "dev"
|
|
16
|
+
Requires-Dist: aioresponses; extra == "dev"
|
|
17
|
+
Requires-Dist: pytest; extra == "dev"
|
|
18
|
+
Requires-Dist: pytest-asyncio; extra == "dev"
|
|
19
|
+
Provides-Extra: doc
|
|
20
|
+
Requires-Dist: mkdocs-material; extra == "doc"
|
|
21
|
+
Requires-Dist: mkdocstrings-python; extra == "doc"
|
|
22
|
+
Requires-Dist: mkdocs-jupyter; extra == "doc"
|
|
23
|
+
Requires-Dist: griffe<1.0; extra == "doc"
|
|
24
|
+
|
|
25
|
+
[](https://opensource.org/licenses/MIT)
|
|
26
|
+
[](https://www.python.org)
|
|
27
|
+
|
|
28
|
+
# AI-on-Demand
|
|
29
|
+
|
|
30
|
+
The [AI-on-Demand](https://aiod.eu) (AIOD) platform empowers AI research and innovation for industry and academia.
|
|
31
|
+
At its core if the [metadata catalogue](https://api.aiod.eu) which indexes countless AI resources, such as datasets, papers, and educational material,
|
|
32
|
+
from many different platforms such as [Zenodo](https://www.zenodo.org), [OpenML](https://www.openml.org), and [AIDA](https://https://www.i-aida.org/ai-educational-resources/).
|
|
33
|
+
This package allows you to explore all resources in the metadata catalogue through Python.
|
|
34
|
+
You can also browse the contents of the AI-on-Demand metadata catalogue through the [MyLibrary](https://mylibrary.aiod.eu) service.
|
|
35
|
+
|
|
36
|
+
## Installation
|
|
37
|
+
The `aiondemand` package is on [PyPI](https://pypi.org/project/aiondemand/):
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
$ pip install aiondemand
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Tip: install your dependencies in a [virtual environment](https://realpython.com/python-virtual-environments-a-primer/).
|
|
44
|
+
|
|
45
|
+
## Usage
|
|
46
|
+
You can directly access endpoints through the Python API, for example to browse datasets:
|
|
47
|
+
```python
|
|
48
|
+
import aiod
|
|
49
|
+
|
|
50
|
+
aiod.datasets.get_list()
|
|
51
|
+
```
|
|
52
|
+
And results will be returned as a [Pandas](https://pandas.pydata.org/docs/getting_started/overview.html) dataframe (though the `data_format` may be used to get JSON instead):
|
|
53
|
+
```bash
|
|
54
|
+
platform platform_resource_identifier name date_published same_as is_accessible_for_free ... relevant_link relevant_resource relevant_to research_area scientific_domain identifier
|
|
55
|
+
0 huggingface acronym_identification acronym_identification 2022-03-02T23:29:22 https://huggingface.co/datasets/acronym_identi... True ... [] [] [] [] [] 1
|
|
56
|
+
...
|
|
57
|
+
9 huggingface allegro_reviews allegro_reviews 2022-03-02T23:29:22 https://huggingface.co/datasets/allegro_reviews True ... [] [] [] [] [] 10
|
|
58
|
+
|
|
59
|
+
[10 rows x 30 columns]
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
You can even query the elastic search endpoints:
|
|
63
|
+
```python
|
|
64
|
+
aiod.publications.search(search_query="Robotics")
|
|
65
|
+
```
|
|
66
|
+
```bash
|
|
67
|
+
platform platform_resource_identifier name date_published same_as is_accessible_for_free ... relevant_resource relevant_to research_area scientific_domain type identifier
|
|
68
|
+
0 robotics4eu 1803 Responsible Robotics & non-tech barriers t... None https://www.robotics4eu.eu/publications/respon... None ... [] [] [other materials] [other materials] None 4
|
|
69
|
+
|
|
70
|
+
[1 rows x 36 columns]
|
|
71
|
+
```
|
|
72
|
+
## Contributing
|
|
73
|
+
|
|
74
|
+
Interested in contributing? Check out the [contributing guidelines](contributing.md).
|
|
75
|
+
By contributing to this project, you agree to abide by our [Code of Conduct](conduct.md).
|
|
76
|
+
|
|
77
|
+
## Credits
|
|
78
|
+
|
|
79
|
+
The `aiondemand` package is being developed with funding from EU’s Horizon Europe research and innovation program under grant agreement [No. 101070000 (AI4EUROPE)](https://cordis.europa.eu/project/id/101070000).
|
|
80
|
+
Not all contributors need be affiliated with this funding.
|
|
81
|
+
|
|
82
|
+
[`cookiecutter`](https://cookiecutter.readthedocs.io/en/latest/) and the `py-pkgs-cookiecutter` [template](https://github.com/py-pkgs/py-pkgs-cookiecutter) were used to create the repository structure.
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
[](https://opensource.org/licenses/MIT)
|
|
2
|
+
[](https://www.python.org)
|
|
3
|
+
|
|
4
|
+
# AI-on-Demand
|
|
5
|
+
|
|
6
|
+
The [AI-on-Demand](https://aiod.eu) (AIOD) platform empowers AI research and innovation for industry and academia.
|
|
7
|
+
At its core if the [metadata catalogue](https://api.aiod.eu) which indexes countless AI resources, such as datasets, papers, and educational material,
|
|
8
|
+
from many different platforms such as [Zenodo](https://www.zenodo.org), [OpenML](https://www.openml.org), and [AIDA](https://https://www.i-aida.org/ai-educational-resources/).
|
|
9
|
+
This package allows you to explore all resources in the metadata catalogue through Python.
|
|
10
|
+
You can also browse the contents of the AI-on-Demand metadata catalogue through the [MyLibrary](https://mylibrary.aiod.eu) service.
|
|
11
|
+
|
|
12
|
+
## Installation
|
|
13
|
+
The `aiondemand` package is on [PyPI](https://pypi.org/project/aiondemand/):
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
$ pip install aiondemand
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Tip: install your dependencies in a [virtual environment](https://realpython.com/python-virtual-environments-a-primer/).
|
|
20
|
+
|
|
21
|
+
## Usage
|
|
22
|
+
You can directly access endpoints through the Python API, for example to browse datasets:
|
|
23
|
+
```python
|
|
24
|
+
import aiod
|
|
25
|
+
|
|
26
|
+
aiod.datasets.get_list()
|
|
27
|
+
```
|
|
28
|
+
And results will be returned as a [Pandas](https://pandas.pydata.org/docs/getting_started/overview.html) dataframe (though the `data_format` may be used to get JSON instead):
|
|
29
|
+
```bash
|
|
30
|
+
platform platform_resource_identifier name date_published same_as is_accessible_for_free ... relevant_link relevant_resource relevant_to research_area scientific_domain identifier
|
|
31
|
+
0 huggingface acronym_identification acronym_identification 2022-03-02T23:29:22 https://huggingface.co/datasets/acronym_identi... True ... [] [] [] [] [] 1
|
|
32
|
+
...
|
|
33
|
+
9 huggingface allegro_reviews allegro_reviews 2022-03-02T23:29:22 https://huggingface.co/datasets/allegro_reviews True ... [] [] [] [] [] 10
|
|
34
|
+
|
|
35
|
+
[10 rows x 30 columns]
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
You can even query the elastic search endpoints:
|
|
39
|
+
```python
|
|
40
|
+
aiod.publications.search(search_query="Robotics")
|
|
41
|
+
```
|
|
42
|
+
```bash
|
|
43
|
+
platform platform_resource_identifier name date_published same_as is_accessible_for_free ... relevant_resource relevant_to research_area scientific_domain type identifier
|
|
44
|
+
0 robotics4eu 1803 Responsible Robotics & non-tech barriers t... None https://www.robotics4eu.eu/publications/respon... None ... [] [] [other materials] [other materials] None 4
|
|
45
|
+
|
|
46
|
+
[1 rows x 36 columns]
|
|
47
|
+
```
|
|
48
|
+
## Contributing
|
|
49
|
+
|
|
50
|
+
Interested in contributing? Check out the [contributing guidelines](contributing.md).
|
|
51
|
+
By contributing to this project, you agree to abide by our [Code of Conduct](conduct.md).
|
|
52
|
+
|
|
53
|
+
## Credits
|
|
54
|
+
|
|
55
|
+
The `aiondemand` package is being developed with funding from EU’s Horizon Europe research and innovation program under grant agreement [No. 101070000 (AI4EUROPE)](https://cordis.europa.eu/project/id/101070000).
|
|
56
|
+
Not all contributors need be affiliated with this funding.
|
|
57
|
+
|
|
58
|
+
[`cookiecutter`](https://cookiecutter.readthedocs.io/en/latest/) and the `py-pkgs-cookiecutter` [template](https://github.com/py-pkgs/py-pkgs-cookiecutter) were used to create the repository structure.
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "aiondemand"
|
|
3
|
+
version = "0.1.0b0"
|
|
4
|
+
description = "Python SDK for the AIoD metadata catalogue"
|
|
5
|
+
authors = [
|
|
6
|
+
{ name = "Jean Matias", email = "smatias.jean@gmail.com" },
|
|
7
|
+
{ name = "Pieter Gijsbers", email = "p.gijsbers@tue.nl" },
|
|
8
|
+
]
|
|
9
|
+
readme = "docs/README.md"
|
|
10
|
+
|
|
11
|
+
requires-python = ">=3.11"
|
|
12
|
+
|
|
13
|
+
dependencies = ["aiohttp", "pandas", "python-keycloak", "requests"]
|
|
14
|
+
|
|
15
|
+
[project.optional-dependencies]
|
|
16
|
+
dev = [
|
|
17
|
+
"pre-commit",
|
|
18
|
+
"responses",
|
|
19
|
+
"aioresponses",
|
|
20
|
+
"pytest",
|
|
21
|
+
"pytest-asyncio",
|
|
22
|
+
]
|
|
23
|
+
doc = [
|
|
24
|
+
"mkdocs-material",
|
|
25
|
+
"mkdocstrings-python",
|
|
26
|
+
"mkdocs-jupyter",
|
|
27
|
+
"griffe<1.0",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[tool.commitizen]
|
|
31
|
+
name = "cz_conventional_commits"
|
|
32
|
+
version = "0.1.0b0"
|
|
33
|
+
version_files = ["pyproject.toml:version", "src/aiod/__version__.py"]
|
|
34
|
+
|
|
35
|
+
[project.entry-points."mkdocs.plugins"]
|
|
36
|
+
asset-type-replacer = "docs.tools.asset_type_replacer:AssetTypeReplacer"
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from aiod.authentication import login, logout, get_current_user
|
|
2
|
+
from aiod.configuration import config
|
|
3
|
+
from aiod.resources import case_studies
|
|
4
|
+
from aiod.resources import computational_assets
|
|
5
|
+
from aiod.resources import contacts
|
|
6
|
+
from aiod.default.counts import asset_counts as counts
|
|
7
|
+
from aiod.resources import datasets
|
|
8
|
+
from aiod.resources import educational_resources
|
|
9
|
+
from aiod.resources import events
|
|
10
|
+
from aiod.resources import experiments
|
|
11
|
+
from aiod.resources import ml_models
|
|
12
|
+
from aiod.resources import news
|
|
13
|
+
from aiod.resources import organisations
|
|
14
|
+
from aiod.resources import persons
|
|
15
|
+
from aiod.resources import platforms
|
|
16
|
+
from aiod.resources import projects
|
|
17
|
+
from aiod.resources import publications
|
|
18
|
+
from aiod.resources import services
|
|
19
|
+
from aiod.resources import teams
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
version = '0.1.0b0'
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .authentication import login, logout, get_current_user
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import http.client
|
|
2
|
+
import requests
|
|
3
|
+
from keycloak import KeycloakOpenID, KeycloakAuthenticationError, KeycloakPostError
|
|
4
|
+
from typing import Sequence, NamedTuple
|
|
5
|
+
|
|
6
|
+
from aiod.configuration import config
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _connect_keycloak() -> KeycloakOpenID:
|
|
10
|
+
return KeycloakOpenID(
|
|
11
|
+
server_url=config.auth_server_url,
|
|
12
|
+
client_id=config.client_id,
|
|
13
|
+
realm_name=config.realm,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def keycloak_openid() -> KeycloakOpenID:
|
|
18
|
+
global _keycloak_openid
|
|
19
|
+
if _keycloak_openid is None:
|
|
20
|
+
_keycloak_openid = _connect_keycloak()
|
|
21
|
+
return _keycloak_openid
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def on_keycloak_config_changed(_: str, __: str, ___: str) -> None:
|
|
25
|
+
global _keycloak_openid
|
|
26
|
+
logout(ignore_post_error=True)
|
|
27
|
+
_keycloak_openid = None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
config.subscribe("auth_server_url", on_change=on_keycloak_config_changed)
|
|
31
|
+
config.subscribe("realm", on_change=on_keycloak_config_changed)
|
|
32
|
+
config.subscribe("client_id", on_change=on_keycloak_config_changed)
|
|
33
|
+
|
|
34
|
+
_keycloak_openid: KeycloakOpenID | None = None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class User(NamedTuple):
|
|
38
|
+
name: str
|
|
39
|
+
roles: Sequence[str]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def login(username: str, password: str) -> None:
|
|
43
|
+
"""Logs in the user with the provided username and password.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
username (str): The username of the user.
|
|
47
|
+
password (str): The password of the user.
|
|
48
|
+
|
|
49
|
+
Raises:
|
|
50
|
+
ValueError: When username or password is empty.
|
|
51
|
+
FailedAuthenticationError: When the username or password is wrong.
|
|
52
|
+
"""
|
|
53
|
+
if not username:
|
|
54
|
+
raise ValueError(
|
|
55
|
+
f"{username!r} is not a valid username, must be a non-empty string."
|
|
56
|
+
)
|
|
57
|
+
if not password:
|
|
58
|
+
raise ValueError(
|
|
59
|
+
f"{password!r} is not a valid password, must be a non-empty string."
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
try:
|
|
63
|
+
token = keycloak_openid().token(username, password)
|
|
64
|
+
except KeycloakAuthenticationError as e:
|
|
65
|
+
raise FailedAuthenticationError(
|
|
66
|
+
"Authentication failed! Please verify your credentials."
|
|
67
|
+
) from e
|
|
68
|
+
config.access_token = token["access_token"]
|
|
69
|
+
config.refresh_token = token["refresh_token"]
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def logout(ignore_post_error: bool = False) -> None:
|
|
73
|
+
"""Logs out the current user.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
ignore_post_error:
|
|
77
|
+
If true, do not raise an error if the logout attempt failed.
|
|
78
|
+
|
|
79
|
+
"""
|
|
80
|
+
try:
|
|
81
|
+
keycloak_openid().logout(config.refresh_token)
|
|
82
|
+
except KeycloakPostError as e:
|
|
83
|
+
if not ignore_post_error:
|
|
84
|
+
raise e
|
|
85
|
+
|
|
86
|
+
config.access_token = None
|
|
87
|
+
config.refresh_token = None
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def get_current_user() -> User:
|
|
91
|
+
"""Return name and roles of the user that is currently authenticated.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
User: The user information for the currently authenticated user.
|
|
95
|
+
|
|
96
|
+
Raises:
|
|
97
|
+
NotAuthenticatedError: When the user is not authenticated.
|
|
98
|
+
"""
|
|
99
|
+
response = requests.get(
|
|
100
|
+
f"{config.api_base_url}authorization_test",
|
|
101
|
+
headers={"Authorization": f"Bearer {config.access_token}"},
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
content = response.json()
|
|
105
|
+
if response.status_code == http.client.UNAUTHORIZED:
|
|
106
|
+
raise NotAuthenticatedError(content)
|
|
107
|
+
return User(
|
|
108
|
+
name=content["name"],
|
|
109
|
+
roles=tuple(content["roles"]),
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class FailedAuthenticationError(Exception):
|
|
114
|
+
"""Raised when an authentication error occurred."""
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class NotAuthenticatedError(Exception):
|
|
118
|
+
"""Raised when an endpoint that requires authentication is called without authentication."""
|
|
File without changes
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
import aiohttp
|
|
2
|
+
import asyncio
|
|
3
|
+
import requests
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from typing import Literal
|
|
7
|
+
from functools import partial
|
|
8
|
+
|
|
9
|
+
from aiod.calls.urls import (
|
|
10
|
+
url_to_get_asset,
|
|
11
|
+
url_to_get_content,
|
|
12
|
+
url_to_get_list,
|
|
13
|
+
url_to_get_list_from_platform,
|
|
14
|
+
url_to_get_asset_from_platform,
|
|
15
|
+
url_to_resource_counts,
|
|
16
|
+
url_to_search,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
from aiod.calls.utils import format_response, wrap_calls
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get_list(
|
|
23
|
+
*,
|
|
24
|
+
asset_type: str,
|
|
25
|
+
platform: str | None = None,
|
|
26
|
+
offset: int = 0,
|
|
27
|
+
limit: int = 10,
|
|
28
|
+
version: str | None = None,
|
|
29
|
+
data_format: Literal["pandas", "json"] = "pandas",
|
|
30
|
+
) -> pd.DataFrame | list[dict]:
|
|
31
|
+
"""
|
|
32
|
+
Retrieve a list of ASSET_TYPE from the catalogue.
|
|
33
|
+
|
|
34
|
+
Parameters:
|
|
35
|
+
platform: Return metadata of ASSET_TYPE assets of this platform (default is None).
|
|
36
|
+
offset: The offset for pagination (default is 0).
|
|
37
|
+
limit: The maximum number of items to retrieve (default is 10).
|
|
38
|
+
version: The version of the endpoint (default is None).
|
|
39
|
+
data_format: The desired format for the response (default is "pandas").
|
|
40
|
+
For "json" formats, the returned type is a json decoded type, i.e. in this case a list of dicts.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
The retrieved metadata in the specified format.
|
|
44
|
+
"""
|
|
45
|
+
url = (
|
|
46
|
+
url_to_get_list_from_platform(asset_type, platform, offset, limit, version)
|
|
47
|
+
if platform is not None
|
|
48
|
+
else url_to_get_list(asset_type, offset, limit, version)
|
|
49
|
+
)
|
|
50
|
+
res = requests.get(url)
|
|
51
|
+
resources = format_response(res.json(), data_format)
|
|
52
|
+
return resources
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def counts(
|
|
56
|
+
*, asset_type: str, version: str | None = None, per_platform: bool = False
|
|
57
|
+
) -> int | dict[str, int]:
|
|
58
|
+
"""
|
|
59
|
+
Retrieve the number of ASSET_TYPE assets in the metadata catalogue.
|
|
60
|
+
|
|
61
|
+
Parameters:
|
|
62
|
+
version: The version of the endpoint (default is None).
|
|
63
|
+
per_platform: Whether to list counts per platform (default is False).
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
The number ASSET_TYPE assets in the metadata catalogue. If the parameter per_platform is True, it returns a dictionary with platform names as keys and the number of ASSET_TYPE assets from that platform as values.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
url = url_to_resource_counts(version, per_platform, asset_type)
|
|
70
|
+
res = requests.get(url)
|
|
71
|
+
return res.json()
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def get_asset(
|
|
75
|
+
*,
|
|
76
|
+
asset_type: str,
|
|
77
|
+
identifier: int,
|
|
78
|
+
version: str | None = None,
|
|
79
|
+
data_format: Literal["pandas", "json"] = "pandas",
|
|
80
|
+
) -> pd.Series | dict:
|
|
81
|
+
"""
|
|
82
|
+
Retrieve metadata for a specific ASSET_TYPE.
|
|
83
|
+
|
|
84
|
+
Parameters:
|
|
85
|
+
identifier: The identifier of the ASSET_TYPE to retrieve.
|
|
86
|
+
version: The version of the endpoint (default is None).
|
|
87
|
+
data_format: The desired format for the response (default is "pandas").
|
|
88
|
+
For "json" formats, the returned type is a json decoded type, in this case a dict.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
The retrieved metadata for the specified ASSET_TYPE.
|
|
92
|
+
"""
|
|
93
|
+
url = url_to_get_asset(asset_type, identifier, version)
|
|
94
|
+
res = requests.get(url)
|
|
95
|
+
resources = format_response(res.json(), data_format)
|
|
96
|
+
return resources
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def get_asset_from_platform(
|
|
100
|
+
*,
|
|
101
|
+
asset_type: str,
|
|
102
|
+
platform: str,
|
|
103
|
+
platform_identifier: str,
|
|
104
|
+
version: str | None = None,
|
|
105
|
+
data_format: Literal["pandas", "json"] = "pandas",
|
|
106
|
+
) -> pd.Series | dict:
|
|
107
|
+
"""
|
|
108
|
+
Retrieve metadata for a specific ASSET_TYPE identified by the external platform identifier.
|
|
109
|
+
|
|
110
|
+
Parameters:
|
|
111
|
+
platform: The platform where the ASSET_TYPE asset is retrieved from.
|
|
112
|
+
platform_identifier: The identifier under which the ASSET_TYPE is known by the platform.
|
|
113
|
+
version: The version of the endpoint (default is None).
|
|
114
|
+
data_format: The desired format for the response (default is "pandas").
|
|
115
|
+
For "json" formats, the returned type is a json decoded type, in this case a dict.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
The retrieved metadata for the specified ASSET_TYPE.
|
|
119
|
+
"""
|
|
120
|
+
url = url_to_get_asset_from_platform(
|
|
121
|
+
asset_type, platform, platform_identifier, version
|
|
122
|
+
)
|
|
123
|
+
res = requests.get(url)
|
|
124
|
+
resources = format_response(res.json(), data_format)
|
|
125
|
+
return resources
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def get_content(
|
|
129
|
+
*,
|
|
130
|
+
asset_type: str,
|
|
131
|
+
identifier: int,
|
|
132
|
+
distribution_idx: int = 0,
|
|
133
|
+
version: str | None = None,
|
|
134
|
+
) -> bytes:
|
|
135
|
+
"""
|
|
136
|
+
Retrieve the data content of a specific ASSET_TYPE.
|
|
137
|
+
|
|
138
|
+
Parameters:
|
|
139
|
+
identifier: The identifier of the ASSET_TYPE asset.
|
|
140
|
+
distribution_idx: The index of a specific distribution from the distribution list (default is 0).
|
|
141
|
+
version: The version of the endpoint (default is None).
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
The data content for the specified ASSET_TYPE.
|
|
145
|
+
"""
|
|
146
|
+
url = url_to_get_content(asset_type, identifier, distribution_idx, version)
|
|
147
|
+
res = requests.get(url)
|
|
148
|
+
distribution = res.content
|
|
149
|
+
return distribution
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def search(
|
|
153
|
+
*,
|
|
154
|
+
asset_type: str,
|
|
155
|
+
search_query: str,
|
|
156
|
+
platforms: list[str] | None = None,
|
|
157
|
+
offset: int = 0,
|
|
158
|
+
limit: int = 10,
|
|
159
|
+
search_field: (
|
|
160
|
+
None | Literal["name", "issn", "description_html", "description_plain"]
|
|
161
|
+
) = None,
|
|
162
|
+
get_all: bool = True,
|
|
163
|
+
version: str | None = None,
|
|
164
|
+
data_format: Literal["pandas", "json"] = "pandas",
|
|
165
|
+
) -> pd.DataFrame | list[dict]:
|
|
166
|
+
"""
|
|
167
|
+
Search metadata for ASSET_TYPE type using the Elasticsearch endpoint of the AIoD metadata catalogue.
|
|
168
|
+
|
|
169
|
+
Parameters:
|
|
170
|
+
search_query: The string to be matched against the search fields.
|
|
171
|
+
platforms: The platforms to filter the search results.
|
|
172
|
+
If None, results from all platforms will be returned (default is None).
|
|
173
|
+
offset: The offset for pagination (default is 0).
|
|
174
|
+
limit: The maximum number of results to retrieve (default is 10).
|
|
175
|
+
search_field:
|
|
176
|
+
The specific fields to search within. If None, the query will be matched against all fields (default is None).
|
|
177
|
+
get_all: If true, a request to the database is made to retrieve all data.
|
|
178
|
+
If false, only the indexed information is returned. (default is True).
|
|
179
|
+
version: The version of the endpoint to use (default is None).
|
|
180
|
+
data_format: The desired format for the response (default is "pandas").
|
|
181
|
+
For "json" formats, the returned type is a json decoded type, in this case a list of dict's.
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
The retrieved metadata in the specified format.
|
|
185
|
+
"""
|
|
186
|
+
url = url_to_search(
|
|
187
|
+
asset_type,
|
|
188
|
+
search_query,
|
|
189
|
+
platforms,
|
|
190
|
+
offset,
|
|
191
|
+
limit,
|
|
192
|
+
search_field,
|
|
193
|
+
get_all,
|
|
194
|
+
version,
|
|
195
|
+
)
|
|
196
|
+
res = requests.get(url)
|
|
197
|
+
resources = format_response(res.json()["resources"], data_format)
|
|
198
|
+
return resources
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
async def get_assets_async(
|
|
202
|
+
*,
|
|
203
|
+
asset_type: str,
|
|
204
|
+
identifiers: list[int],
|
|
205
|
+
version: str | None = None,
|
|
206
|
+
data_format: Literal["pandas", "json"] = "pandas",
|
|
207
|
+
) -> pd.DataFrame | list[dict]:
|
|
208
|
+
"""
|
|
209
|
+
Asynchronously retrieve metadata for a list of ASSET_TYPE identifiers.
|
|
210
|
+
|
|
211
|
+
Parameters:
|
|
212
|
+
identifiers: The list of identifiers of the ASSET_TYPE to retrieve.
|
|
213
|
+
version: The version of the endpoint (default is None).
|
|
214
|
+
data_format: The desired format for the response (default is "pandas").
|
|
215
|
+
For "json" formats, the returned type is a json decoded type, in this case a list of dicts.
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
The retrieved metadata for the specified ASSET_TYPE.
|
|
219
|
+
"""
|
|
220
|
+
urls = [
|
|
221
|
+
url_to_get_asset(asset_type, identifier, version) for identifier in identifiers
|
|
222
|
+
]
|
|
223
|
+
response_data = await _fetch_resources(urls)
|
|
224
|
+
resources = format_response(response_data, data_format)
|
|
225
|
+
return resources
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
async def get_list_async(
|
|
229
|
+
*,
|
|
230
|
+
asset_type: str,
|
|
231
|
+
offset: int = 0,
|
|
232
|
+
limit: int = 100,
|
|
233
|
+
batch_size: int = 10,
|
|
234
|
+
version: str | None = None,
|
|
235
|
+
data_format: Literal["pandas", "json"] = "pandas",
|
|
236
|
+
) -> pd.DataFrame | list[dict]:
|
|
237
|
+
"""
|
|
238
|
+
Asynchronously retrieve a list of ASSET_TYPE from the catalogue in batches.
|
|
239
|
+
|
|
240
|
+
Parameters:
|
|
241
|
+
offset: The offset for pagination (default is 0).
|
|
242
|
+
limit: The maximum number of items to retrieve (default is 10).
|
|
243
|
+
batch_size: The number of items in a a batch.
|
|
244
|
+
version: The version of the endpoint (default is None).
|
|
245
|
+
data_format: The desired format for the response (default is "pandas").
|
|
246
|
+
For "json" formats, the returned type is a json decoded type, in this case a list of dicts.
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
The retrieved metadata in the specified format.
|
|
250
|
+
|
|
251
|
+
Raises:
|
|
252
|
+
ValueError: Batch size must be larger than 0.
|
|
253
|
+
"""
|
|
254
|
+
if batch_size <= 0:
|
|
255
|
+
raise ValueError(
|
|
256
|
+
"batch_size must be larger than 0, otherwise you can use the synchronous get_list function!"
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
offsets = range(offset, offset + limit, batch_size)
|
|
260
|
+
last_batch_size = (limit % batch_size) or batch_size
|
|
261
|
+
batch_sizes = [batch_size] * (len(offsets) - 1) + [last_batch_size]
|
|
262
|
+
|
|
263
|
+
urls = [
|
|
264
|
+
url_to_get_list(asset_type, offset, limit, version)
|
|
265
|
+
for offset, limit in zip(offsets, batch_sizes)
|
|
266
|
+
]
|
|
267
|
+
|
|
268
|
+
response_data = await _fetch_resources(urls)
|
|
269
|
+
flattened_response_data = [
|
|
270
|
+
response for batch in response_data for response in batch
|
|
271
|
+
]
|
|
272
|
+
resources = format_response(flattened_response_data, data_format)
|
|
273
|
+
return resources
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
async def _fetch_resources(urls) -> dict:
|
|
277
|
+
async def _fetch_data(session, url) -> dict:
|
|
278
|
+
async with session.get(url) as response:
|
|
279
|
+
return await response.json()
|
|
280
|
+
|
|
281
|
+
async with aiohttp.ClientSession() as session:
|
|
282
|
+
tasks = [_fetch_data(session, url) for url in urls]
|
|
283
|
+
response_data = await asyncio.gather(*tasks)
|
|
284
|
+
return response_data
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
wrap_common_calls = partial(
|
|
288
|
+
wrap_calls,
|
|
289
|
+
calls=[
|
|
290
|
+
get_list,
|
|
291
|
+
counts,
|
|
292
|
+
get_asset,
|
|
293
|
+
get_asset_from_platform,
|
|
294
|
+
get_content,
|
|
295
|
+
get_assets_async,
|
|
296
|
+
get_list_async,
|
|
297
|
+
],
|
|
298
|
+
)
|
|
299
|
+
wrap_search_call = partial(wrap_calls, calls=[search])
|