ahorn-loader 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ahorn-loader might be problematic. Click here for more details.

ahorn_loader/api.py CHANGED
@@ -1,20 +1,46 @@
1
1
  """Module to interact with the Ahorn dataset API."""
2
2
 
3
+ import contextlib
4
+ import gzip
3
5
  import json
6
+ from collections.abc import Generator, Iterable
4
7
  from datetime import UTC, datetime
5
8
  from pathlib import Path
6
- from typing import Any
9
+ from typing import TypedDict
7
10
  from urllib.parse import ParseResult, urlparse
8
11
 
9
12
  import requests
10
13
 
11
- __all__ = ["download_dataset", "load_dataset_data", "load_datasets_data"]
14
+ from .utils import get_cache_dir
15
+
16
+ __all__ = [
17
+ "download_dataset",
18
+ "load_dataset_data",
19
+ "load_datasets_data",
20
+ "read_dataset",
21
+ ]
12
22
 
13
23
  DATASET_API_URL = "https://ahorn.rwth-aachen.de/api/datasets.json"
14
- CACHE_PATH = Path(__file__).parent.parent.parent / "cache" / "datasets.json"
15
24
 
16
25
 
17
- def load_datasets_data(*, cache_lifetime: int | None = None) -> dict[str, Any]:
26
+ class AttachmentDict(TypedDict):
27
+ url: str
28
+ size: int
29
+
30
+
31
+ class DatasetDict(TypedDict):
32
+ slug: str
33
+ title: str
34
+ tags: list[str]
35
+ attachments: dict[str, AttachmentDict]
36
+
37
+
38
+ class DatasetsDataDict(TypedDict):
39
+ datasets: dict[str, DatasetDict]
40
+ time: str
41
+
42
+
43
+ def load_datasets_data(*, cache_lifetime: int | None = None) -> dict[str, DatasetDict]:
18
44
  """Load dataset data from the Ahorn API.
19
45
 
20
46
  Parameters
@@ -30,31 +56,28 @@ def load_datasets_data(*, cache_lifetime: int | None = None) -> dict[str, Any]:
30
56
  and the values are dictionaries with dataset details such as title, tags, and
31
57
  attachments.
32
58
  """
33
- if CACHE_PATH.exists() and cache_lifetime is not None:
34
- with CACHE_PATH.open("r", encoding="utf-8") as cache_file:
35
- cache = json.load(cache_file)
36
- if (
37
- cache.get("time")
38
- and (
39
- datetime.now(tz=UTC) - datetime.fromisoformat(cache["time"])
40
- ).total_seconds()
41
- < cache_lifetime
42
- ):
43
- return cache["datasets"]
59
+ datasets_data_cache = get_cache_dir() / "datasets.json"
60
+ if datasets_data_cache.exists() and cache_lifetime is not None:
61
+ cache_mtime = datetime.fromtimestamp(
62
+ datasets_data_cache.stat().st_mtime, tz=UTC
63
+ )
64
+ if (datetime.now(tz=UTC) - cache_mtime).total_seconds() < cache_lifetime:
65
+ with datasets_data_cache.open("r", encoding="utf-8") as cache_file:
66
+ cache: DatasetsDataDict = json.load(cache_file)
67
+ return cache["datasets"]
44
68
 
45
69
  response = requests.get(DATASET_API_URL, timeout=10)
46
70
  response.raise_for_status()
47
71
 
48
- CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
49
- with CACHE_PATH.open("w", encoding="utf-8") as cache_file:
72
+ datasets_data_cache.parent.mkdir(parents=True, exist_ok=True)
73
+ with datasets_data_cache.open("w", encoding="utf-8") as cache_file:
50
74
  cache_file.write(response.text)
51
75
 
52
- return response.json()["datasets"]
76
+ response_json: DatasetsDataDict = response.json()
77
+ return response_json["datasets"]
53
78
 
54
79
 
55
- def load_dataset_data(
56
- slug: str, *, cache_lifetime: int | None = None
57
- ) -> dict[str, Any]:
80
+ def load_dataset_data(slug: str, *, cache_lifetime: int | None = None) -> DatasetDict:
58
81
  """Load data for a specific dataset by its slug.
59
82
 
60
83
  Parameters
@@ -67,19 +90,25 @@ def load_dataset_data(
67
90
 
68
91
  Returns
69
92
  -------
70
- dict[str, Any]
93
+ DatasetDict
71
94
  Dictionary containing the dataset details.
95
+
96
+ Raises
97
+ ------
98
+ KeyError
99
+ If the dataset with the given `slug` does not exist.
72
100
  """
73
101
  datasets = load_datasets_data(cache_lifetime=cache_lifetime)
74
- if "error" in datasets:
75
- return {"error": datasets["error"]}
76
102
 
77
- return datasets.get(slug, {"error": f"Dataset '{slug}' not found."})
103
+ if slug not in datasets:
104
+ raise KeyError(f"Dataset with slug '{slug}' does not exist in AHORN.")
105
+
106
+ return datasets[slug]
78
107
 
79
108
 
80
109
  def download_dataset(
81
110
  slug: str, folder: Path | str, *, cache_lifetime: int | None = None
82
- ) -> None:
111
+ ) -> Path:
83
112
  """Download a dataset by its slug to the specified folder.
84
113
 
85
114
  Parameters
@@ -91,15 +120,27 @@ def download_dataset(
91
120
  cache_lifetime : int, optional
92
121
  How long to reuse cached data in seconds. If not provided, the cache will not
93
122
  be used.
123
+
124
+ Returns
125
+ -------
126
+ Path
127
+ The path to the downloaded dataset file.
128
+
129
+ Raises
130
+ ------
131
+ KeyError
132
+ If the dataset with the given `slug` does not exist.
133
+ RuntimeError
134
+ If the dataset file could not be downloaded due to some error.
94
135
  """
95
136
  if isinstance(folder, str):
96
137
  folder = Path(folder)
97
138
 
98
139
  data = load_dataset_data(slug, cache_lifetime=cache_lifetime)
99
- if "error" in data:
100
- raise ValueError(f"Error loading dataset '{slug}': {data['error']}")
101
- if "attachments" not in data or "dataset" not in data["attachments"]:
102
- raise KeyError(f"Dataset '{slug}' does not contain required 'attachments/dataset' keys.")
140
+ if "dataset" not in data["attachments"]:
141
+ raise RuntimeError(
142
+ f"Dataset '{slug}' does not contain required 'attachments/dataset' keys."
143
+ )
103
144
  dataset_attachment = data["attachments"]["dataset"]
104
145
 
105
146
  url: ParseResult = urlparse(dataset_attachment["url"])
@@ -113,3 +154,46 @@ def download_dataset(
113
154
  for chunk in response.iter_content(chunk_size=8192):
114
155
  if chunk:
115
156
  f.write(chunk)
157
+
158
+ return filepath
159
+
160
+
161
+ @contextlib.contextmanager
162
+ def read_dataset(slug: str) -> Generator[Iterable[str], None, None]:
163
+ """Download and yield a context-managed file object for the dataset lines by slug.
164
+
165
+ The dataset file will be stored in your system cache and can be deleted according
166
+ to your system's cache policy. To ensure that costly re-downloads do not occur, use
167
+ the `download_dataset` function to store the dataset file at a more permanent
168
+ location.
169
+
170
+ Parameters
171
+ ----------
172
+ slug : str
173
+ The slug of the dataset to download.
174
+
175
+ Returns
176
+ -------
177
+ Context manager yielding an open file object (iterator over lines).
178
+
179
+ Raises
180
+ ------
181
+ KeyError
182
+ If the dataset with the given `slug` does not exist.
183
+ RuntimeError
184
+ If the dataset file could not be downloaded due to other errors.
185
+
186
+ Examples
187
+ --------
188
+ >>> import ahorn_loader
189
+ >>> with ahorn_loader.read_dataset("contact-high-school") as f:
190
+ >>> for line in f:
191
+ >>> ...
192
+ """
193
+ filepath = download_dataset(slug, get_cache_dir())
194
+ if filepath.suffix == ".gz":
195
+ with gzip.open(filepath, mode="rt", encoding="utf-8") as f:
196
+ yield f
197
+ else:
198
+ with filepath.open("r", encoding="utf-8") as f:
199
+ yield f
ahorn_loader/cli.py CHANGED
@@ -58,6 +58,7 @@ def download(
58
58
  typer.echo(f"Failed to download dataset: {e}")
59
59
  raise typer.Exit(code=1) from e
60
60
 
61
+
61
62
  @app.command()
62
63
  def validate(
63
64
  path: Annotated[
@@ -0,0 +1,3 @@
1
+ """Utility functions used internally in `ahorn-loader`."""
2
+
3
+ from .cache import *
@@ -0,0 +1,29 @@
1
+ """Module with cache-related utility functions."""
2
+
3
+ import os
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ __all__ = ["get_cache_dir"]
8
+
9
+
10
+ def get_cache_dir() -> Path:
11
+ """Return an appropriate cache location for the current platform.
12
+
13
+ Returns
14
+ -------
15
+ pathlib.Path
16
+ Platform-dependent cache directory.
17
+ """
18
+ match sys.platform:
19
+ case "win32":
20
+ base = os.getenv("LOCALAPPDATA") or Path("~\\AppData\\Local").expanduser()
21
+ return Path(base) / "ahorn-loader" / "Cache"
22
+ case "darwin":
23
+ return Path.home() / "Library" / "Caches" / "ahorn-loader"
24
+ case _:
25
+ # Linux and other Unix
26
+ xdg = os.getenv("XDG_CACHE_HOME")
27
+ if xdg:
28
+ return Path(xdg) / "ahorn-loader"
29
+ return Path.home() / ".cache" / "ahorn-loader"
@@ -0,0 +1,85 @@
1
+ Metadata-Version: 2.3
2
+ Name: ahorn-loader
3
+ Version: 0.2.0
4
+ Summary: Library and command-line application to interact with datasets in the Aachen Higher-Order Repository of Networks.
5
+ Author: Florian Frantzen
6
+ Author-email: Florian Frantzen <frantzen@netsci.rwth-aachen.de>
7
+ Classifier: Development Status :: 4 - Beta
8
+ Classifier: Intended Audience :: Developers
9
+ Classifier: Intended Audience :: Science/Research
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3 :: Only
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Topic :: Scientific/Engineering
17
+ Requires-Dist: requests>=2.32.4
18
+ Requires-Dist: typer>=0.16.0
19
+ Requires-Python: >=3.11
20
+ Description-Content-Type: text/markdown
21
+
22
+ # `ahorn-loader`
23
+
24
+ Library and command-line application to interact with datasets in [AHORN](https://ahorn.rwth-aachen.de/).
25
+
26
+ <div align="center">
27
+
28
+ [![Python](https://img.shields.io/badge/python-3.11+-blue)](https://www.python.org/)
29
+ [![license](https://badgen.net/github/license/netsci-rwth/ahorn-loader)](https://github.com/pyt-team/TopoNetX/blob/main/LICENSE)
30
+
31
+ </div>
32
+
33
+ ## Usage
34
+
35
+ `ahorn-loader` is both a command-line application and a Python package to interact with the AHORN repository for higher-order datasets.
36
+
37
+ ### Command-Line Usage
38
+
39
+ To install and use `ahorn-loader` from the command line, you can run the following command:
40
+
41
+ ```bash
42
+ uvx ahorn-loader [command] [args]
43
+ ```
44
+
45
+ Commands include:
46
+ - `ls`: List available datasets in AHORN.
47
+ - `download`: Download a dataset from AHORN.
48
+ - `validate`: Validate a specific dataset file (e.g., before adding it to AHORN).
49
+
50
+ To get a full help of available commands and options, run `ahorn-loader --help`.
51
+
52
+ ### Python Package Usage
53
+
54
+ To use `ahorn-loader` as a Python package, you can install it via `pip` (or some other package manager of your choice):
55
+
56
+ ```bash
57
+ pip install ahorn-loader
58
+ ```
59
+
60
+ Then, you can use it in your Python scripts:
61
+
62
+ ```python
63
+ import ahorn_loader
64
+
65
+ # Download a dataset:
66
+ ahorn_loader.download_dataset("dataset_name", "target_path")
67
+
68
+ # Download and read a dataset:
69
+ # The dataset will be stored in your system's cache. For a more permanent storage
70
+ # location, use `ahorn_loader.download_dataset` instead.
71
+ with ahorn_loader.read_dataset("dataset_name") as dataset:
72
+ for line in dataset:
73
+ ...
74
+
75
+ # Validate a specific dataset (e.g., before adding it to AHORN):
76
+ ahorn_loader.validate("path_to_dataset_file")
77
+ ```
78
+
79
+ ## Funding
80
+
81
+ <img align="right" width="200" src="https://raw.githubusercontent.com/netsci-rwth/ahorn/main/public/images/erc_logo.png">
82
+
83
+ Funded by the European Union (ERC, HIGH-HOPeS, 101039827).
84
+ Views and opinions expressed are however those of the author(s) only and do not necessarily reflect those of the European Union or the European Research Council Executive Agency.
85
+ Neither the European Union nor the granting authority can be held responsible for them.
@@ -0,0 +1,13 @@
1
+ ahorn_loader/__init__.py,sha256=kEDhV6uY5P7i2ceFDSPi7CCR9GekRszv7EzvYx4RDEw,83
2
+ ahorn_loader/api.py,sha256=_alXpuc0UfWLQxi-uS6QFLHpr_xa6cIoL32ff6z_kxA,5779
3
+ ahorn_loader/cli.py,sha256=4fFIQVhE-Zzvq47JMghKoMFAzhZXJ8lXRdtyAjvYzBY,2272
4
+ ahorn_loader/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ ahorn_loader/utils/__init__.py,sha256=kIYHc-9ExuESHM2TIXlh9-YF7r7hFiRfjAKYTQG4gGg,81
6
+ ahorn_loader/utils/cache.py,sha256=rRsn5z6LM1aFLufZGM4uppHVP553iR8cP3JTNxZiEKY,832
7
+ ahorn_loader/validator/__init__.py,sha256=tyGbqMMzzkGPI3pEb9uBAJoNMGUds_WdU_5575vGBM8,84
8
+ ahorn_loader/validator/rules.py,sha256=djiWi4_Y-UlC2XhwPGrZywyr56AoPfAcNpOnNMZ6w8I,3155
9
+ ahorn_loader/validator/validator.py,sha256=qfooTPfjZ2ieqraJ3CqdqADfDFlODHm-OU_LRPK0gmM,1437
10
+ ahorn_loader-0.2.0.dist-info/WHEEL,sha256=-neZj6nU9KAMg2CnCY6T3w8J53nx1kFGw_9HfoSzM60,79
11
+ ahorn_loader-0.2.0.dist-info/entry_points.txt,sha256=oyQAA_k5r0sAD_lBKgQLPhpxqk0-UTagDJlsU97AJ4s,55
12
+ ahorn_loader-0.2.0.dist-info/METADATA,sha256=WrQAi5YC7DO58MgTNtxv7IlJ3qqs8H-mkX-JcXu9D2s,3020
13
+ ahorn_loader-0.2.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: uv 0.8.11
2
+ Generator: uv 0.8.22
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,53 +0,0 @@
1
- Metadata-Version: 2.3
2
- Name: ahorn-loader
3
- Version: 0.1.1
4
- Summary: Library and command-line application to interact with datasets in the Aachen Higher-Order Repository of Networks.
5
- Author: Florian Frantzen
6
- Author-email: Florian Frantzen <florian.frantzen@cs.rwth-aachen.de>
7
- Requires-Dist: requests>=2.32.4
8
- Requires-Dist: typer>=0.16.0
9
- Requires-Python: >=3.12
10
- Description-Content-Type: text/markdown
11
-
12
- # `ahorn-loader`
13
-
14
- Library and command-line application to interact with datasets in [AHORN](https://ahorn.rwth-aachen.de/).
15
-
16
- ## Usage
17
-
18
- `ahorn-loader` is both a command-line application and a Python package to interact with the AHORN repository for higher-order datasets.
19
-
20
- ### Command-Line Usage
21
-
22
- To install and use `ahorn-loader` from the command line, you can run the following command:
23
-
24
- ```bash
25
- uvx ahorn-loader [command] [args]
26
- ```
27
-
28
- Commands include:
29
- - `ls`: List available datasets in AHORN.
30
- - `download`: Download a dataset from AHORN.
31
- - `validate`: Validate a specific dataset file (e.g., before adding it to AHORN).
32
-
33
- To get a full help of available commands and options, run `ahorn-loader --help`.
34
-
35
- ### Python Package Usage
36
-
37
- To use `ahorn-loader` as a Python package, you can install it via `pip` (or some other package manager of your choice):
38
-
39
- ```bash
40
- pip install ahorn-loader
41
- ```
42
-
43
- Then, you can use it in your Python scripts:
44
-
45
- ```python
46
- import ahorn_loader
47
-
48
- # download a dataset
49
- ahorn_loader.download('dataset_name', 'target_path')
50
-
51
- # validate a specific dataset (e.g., before adding it to AHORN)
52
- ahorn_loader.validate('path_to_dataset_file')
53
- ```
@@ -1,11 +0,0 @@
1
- ahorn_loader/__init__.py,sha256=9040e157ab98e4fee2d9c7850d23e2ec2091f467a446ccefec4cef631e110c4c,83
2
- ahorn_loader/api.py,sha256=cc79aa4a691fb2e8d751fab2705794e952689814da4496259f7709cf677688ea,3726
3
- ahorn_loader/cli.py,sha256=d8b7bedffd43ce1cecff26b994652621343d60d4f2269d0cca6ed3cf1edef01e,2271
4
- ahorn_loader/py.typed,sha256=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855,0
5
- ahorn_loader/validator/__init__.py,sha256=b7219ba8c333ce418f237a446fdb81009a0d30651db3f59d53fe79ef9bc604cf,84
6
- ahorn_loader/validator/rules.py,sha256=7638968b8fd8f94942d978703c6ad9cb0cabe7a0283df01c3693a734c67ac3c2,3155
7
- ahorn_loader/validator/validator.py,sha256=a9fa284cf7e367689eaab689dc2a9da800df0c594e0c79be394fcb44f2b48263,1437
8
- ahorn_loader-0.1.1.dist-info/WHEEL,sha256=0f7d664a881437bddec71c703c3c2f01fd13581519f95130abcc96e296ef0426,79
9
- ahorn_loader-0.1.1.dist-info/entry_points.txt,sha256=a3240003f939af4b000ff9412a040b3e1a71aa4d3e5136a00c996c53dec0278b,55
10
- ahorn_loader-0.1.1.dist-info/METADATA,sha256=b48c03747dadce166fd95f3c948ba621a4a7fdad109968bf032b96cc44d73f6b,1555
11
- ahorn_loader-0.1.1.dist-info/RECORD,,