PyPI - ahorn-loader - Versions diffs - 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

ahorn-loader 0.1.1py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ahorn-loader might be problematic. Click here for more details.

Files changed (10) hide show

ahorn_loader/api.py +114 -30
ahorn_loader/cli.py +1 -0
ahorn_loader/utils/__init__.py +3 -0
ahorn_loader/utils/cache.py +29 -0
ahorn_loader-0.2.0.dist-info/METADATA +85 -0
ahorn_loader-0.2.0.dist-info/RECORD +13 -0
{ahorn_loader-0.1.1.dist-info → ahorn_loader-0.2.0.dist-info}/WHEEL +1 -1
ahorn_loader-0.1.1.dist-info/METADATA +0 -53
ahorn_loader-0.1.1.dist-info/RECORD +0 -11
{ahorn_loader-0.1.1.dist-info → ahorn_loader-0.2.0.dist-info}/entry_points.txt +0 -0

ahorn_loader/api.py CHANGED Viewed

@@ -1,20 +1,46 @@
 """Module to interact with the Ahorn dataset API."""
+import contextlib
+import gzip
 import json
+from collections.abc import Generator, Iterable
 from datetime import UTC, datetime
 from pathlib import Path
-from typing import Any
+from typing import TypedDict
 from urllib.parse import ParseResult, urlparse
 import requests
-__all__ = ["download_dataset", "load_dataset_data", "load_datasets_data"]
+from .utils import get_cache_dir
+__all__ = [
+    "download_dataset",
+    "load_dataset_data",
+    "load_datasets_data",
+    "read_dataset",
+]
 DATASET_API_URL = "https://ahorn.rwth-aachen.de/api/datasets.json"
-CACHE_PATH = Path(__file__).parent.parent.parent / "cache" / "datasets.json"
-def load_datasets_data(*, cache_lifetime: int | None = None) -> dict[str, Any]:
+class AttachmentDict(TypedDict):
+    url: str
+    size: int
+class DatasetDict(TypedDict):
+    slug: str
+    title: str
+    tags: list[str]
+    attachments: dict[str, AttachmentDict]
+class DatasetsDataDict(TypedDict):
+    datasets: dict[str, DatasetDict]
+    time: str
+def load_datasets_data(*, cache_lifetime: int | None = None) -> dict[str, DatasetDict]:
     """Load dataset data from the Ahorn API.
     Parameters
@@ -30,31 +56,28 @@ def load_datasets_data(*, cache_lifetime: int | None = None) -> dict[str, Any]:
         and the values are dictionaries with dataset details such as title, tags, and
         attachments.
     """
-    if CACHE_PATH.exists() and cache_lifetime is not None:
-        with CACHE_PATH.open("r", encoding="utf-8") as cache_file:
-            cache = json.load(cache_file)
-        if (
-            cache.get("time")
-            and (
-                datetime.now(tz=UTC) - datetime.fromisoformat(cache["time"])
-            ).total_seconds()
-            < cache_lifetime
-        ):
-            return cache["datasets"]
+    datasets_data_cache = get_cache_dir() / "datasets.json"
+    if datasets_data_cache.exists() and cache_lifetime is not None:
+        cache_mtime = datetime.fromtimestamp(
+            datasets_data_cache.stat().st_mtime, tz=UTC
+        )
+        if (datetime.now(tz=UTC) - cache_mtime).total_seconds() < cache_lifetime:
+            with datasets_data_cache.open("r", encoding="utf-8") as cache_file:
+                cache: DatasetsDataDict = json.load(cache_file)
+                return cache["datasets"]
     response = requests.get(DATASET_API_URL, timeout=10)
     response.raise_for_status()
-    CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
-    with CACHE_PATH.open("w", encoding="utf-8") as cache_file:
+    datasets_data_cache.parent.mkdir(parents=True, exist_ok=True)
+    with datasets_data_cache.open("w", encoding="utf-8") as cache_file:
         cache_file.write(response.text)
-    return response.json()["datasets"]
+    response_json: DatasetsDataDict = response.json()
+    return response_json["datasets"]
-def load_dataset_data(
-    slug: str, *, cache_lifetime: int | None = None
-) -> dict[str, Any]:
+def load_dataset_data(slug: str, *, cache_lifetime: int | None = None) -> DatasetDict:
     """Load data for a specific dataset by its slug.
     Parameters
@@ -67,19 +90,25 @@ def load_dataset_data(
     Returns
     -------
-    dict[str, Any]
+    DatasetDict
         Dictionary containing the dataset details.
+    Raises
+    ------
+    KeyError
+        If the dataset with the given `slug` does not exist.
     """
     datasets = load_datasets_data(cache_lifetime=cache_lifetime)
-    if "error" in datasets:
-        return {"error": datasets["error"]}
-    return datasets.get(slug, {"error": f"Dataset '{slug}' not found."})
+    if slug not in datasets:
+        raise KeyError(f"Dataset with slug '{slug}' does not exist in AHORN.")
+    return datasets[slug]
 def download_dataset(
     slug: str, folder: Path | str, *, cache_lifetime: int | None = None
-) -> None:
+) -> Path:
     """Download a dataset by its slug to the specified folder.
     Parameters
@@ -91,15 +120,27 @@ def download_dataset(
     cache_lifetime : int, optional
         How long to reuse cached data in seconds. If not provided, the cache will not
         be used.
+    Returns
+    -------
+    Path
+        The path to the downloaded dataset file.
+    Raises
+    ------
+    KeyError
+        If the dataset with the given `slug` does not exist.
+    RuntimeError
+        If the dataset file could not be downloaded due to some error.
     """
     if isinstance(folder, str):
         folder = Path(folder)
     data = load_dataset_data(slug, cache_lifetime=cache_lifetime)
-    if "error" in data:
-        raise ValueError(f"Error loading dataset '{slug}': {data['error']}")
-    if "attachments" not in data or "dataset" not in data["attachments"]:
-        raise KeyError(f"Dataset '{slug}' does not contain required 'attachments/dataset' keys.")
+    if "dataset" not in data["attachments"]:
+        raise RuntimeError(
+            f"Dataset '{slug}' does not contain required 'attachments/dataset' keys."
+        )
     dataset_attachment = data["attachments"]["dataset"]
     url: ParseResult = urlparse(dataset_attachment["url"])
@@ -113,3 +154,46 @@ def download_dataset(
         for chunk in response.iter_content(chunk_size=8192):
             if chunk:
                 f.write(chunk)
+    return filepath
+@contextlib.contextmanager
+def read_dataset(slug: str) -> Generator[Iterable[str], None, None]:
+    """Download and yield a context-managed file object for the dataset lines by slug.
+    The dataset file will be stored in your system cache and can be deleted according
+    to your system's cache policy. To ensure that costly re-downloads do not occur, use
+    the `download_dataset` function to store the dataset file at a more permanent
+    location.
+    Parameters
+    ----------
+    slug : str
+        The slug of the dataset to download.
+    Returns
+    -------
+    Context manager yielding an open file object (iterator over lines).
+    Raises
+    ------
+    KeyError
+        If the dataset with the given `slug` does not exist.
+    RuntimeError
+        If the dataset file could not be downloaded due to other errors.
+    Examples
+    --------
+    >>> import ahorn_loader
+    >>> with ahorn_loader.read_dataset("contact-high-school") as f:
+    >>>     for line in f:
+    >>>         ...
+    """
+    filepath = download_dataset(slug, get_cache_dir())
+    if filepath.suffix == ".gz":
+        with gzip.open(filepath, mode="rt", encoding="utf-8") as f:
+            yield f
+    else:
+        with filepath.open("r", encoding="utf-8") as f:
+            yield f

ahorn_loader/cli.py CHANGED Viewed

@@ -58,6 +58,7 @@ def download(
         typer.echo(f"Failed to download dataset: {e}")
         raise typer.Exit(code=1) from e
 @app.command()
 def validate(
     path: Annotated[

ahorn_loader/utils/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""Utility functions used internally in `ahorn-loader`."""
+from .cache import *

ahorn_loader/utils/cache.py ADDED Viewed

@@ -0,0 +1,29 @@
+"""Module with cache-related utility functions."""
+import os
+import sys
+from pathlib import Path
+__all__ = ["get_cache_dir"]
+def get_cache_dir() -> Path:
+    """Return an appropriate cache location for the current platform.
+    Returns
+    -------
+    pathlib.Path
+        Platform-dependent cache directory.
+    """
+    match sys.platform:
+        case "win32":
+            base = os.getenv("LOCALAPPDATA") or Path("~\\AppData\\Local").expanduser()
+            return Path(base) / "ahorn-loader" / "Cache"
+        case "darwin":
+            return Path.home() / "Library" / "Caches" / "ahorn-loader"
+        case _:
+            # Linux and other Unix
+            xdg = os.getenv("XDG_CACHE_HOME")
+            if xdg:
+                return Path(xdg) / "ahorn-loader"
+            return Path.home() / ".cache" / "ahorn-loader"

ahorn_loader-0.2.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,85 @@
+Metadata-Version: 2.3
+Name: ahorn-loader
+Version: 0.2.0
+Summary: Library and command-line application to interact with datasets in the Aachen Higher-Order Repository of Networks.
+Author: Florian Frantzen
+Author-email: Florian Frantzen <frantzen@netsci.rwth-aachen.de>
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Science/Research
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Scientific/Engineering
+Requires-Dist: requests>=2.32.4
+Requires-Dist: typer>=0.16.0
+Requires-Python: >=3.11
+Description-Content-Type: text/markdown
+# `ahorn-loader`
+Library and command-line application to interact with datasets in [AHORN](https://ahorn.rwth-aachen.de/).
+<div align="center">
+[![Python](https://img.shields.io/badge/python-3.11+-blue)](https://www.python.org/)
+[![license](https://badgen.net/github/license/netsci-rwth/ahorn-loader)](https://github.com/pyt-team/TopoNetX/blob/main/LICENSE)
+</div>
+## Usage
+`ahorn-loader` is both a command-line application and a Python package to interact with the AHORN repository for higher-order datasets.
+### Command-Line Usage
+To install and use `ahorn-loader` from the command line, you can run the following command:
+```bash
+uvx ahorn-loader [command] [args]
+```
+Commands include:
+- `ls`: List available datasets in AHORN.
+- `download`: Download a dataset from AHORN.
+- `validate`: Validate a specific dataset file (e.g., before adding it to AHORN).
+To get a full help of available commands and options, run `ahorn-loader --help`.
+### Python Package Usage
+To use `ahorn-loader` as a Python package, you can install it via `pip` (or some other package manager of your choice):
+```bash
+pip install ahorn-loader
+```
+Then, you can use it in your Python scripts:
+```python
+import ahorn_loader
+# Download a dataset:
+ahorn_loader.download_dataset("dataset_name", "target_path")
+# Download and read a dataset:
+# The dataset will be stored in your system's cache. For a more permanent storage
+# location, use `ahorn_loader.download_dataset` instead.
+with ahorn_loader.read_dataset("dataset_name") as dataset:
+    for line in dataset:
+        ...
+# Validate a specific dataset (e.g., before adding it to AHORN):
+ahorn_loader.validate("path_to_dataset_file")
+```
+## Funding
+<img align="right" width="200" src="https://raw.githubusercontent.com/netsci-rwth/ahorn/main/public/images/erc_logo.png">
+Funded by the European Union (ERC, HIGH-HOPeS, 101039827).
+Views and opinions expressed are however those of the author(s) only and do not necessarily reflect those of the European Union or the European Research Council Executive Agency.
+Neither the European Union nor the granting authority can be held responsible for them.

ahorn_loader-0.2.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,13 @@
+ahorn_loader/__init__.py,sha256=kEDhV6uY5P7i2ceFDSPi7CCR9GekRszv7EzvYx4RDEw,83
+ahorn_loader/api.py,sha256=_alXpuc0UfWLQxi-uS6QFLHpr_xa6cIoL32ff6z_kxA,5779
+ahorn_loader/cli.py,sha256=4fFIQVhE-Zzvq47JMghKoMFAzhZXJ8lXRdtyAjvYzBY,2272
+ahorn_loader/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+ahorn_loader/utils/__init__.py,sha256=kIYHc-9ExuESHM2TIXlh9-YF7r7hFiRfjAKYTQG4gGg,81
+ahorn_loader/utils/cache.py,sha256=rRsn5z6LM1aFLufZGM4uppHVP553iR8cP3JTNxZiEKY,832
+ahorn_loader/validator/__init__.py,sha256=tyGbqMMzzkGPI3pEb9uBAJoNMGUds_WdU_5575vGBM8,84
+ahorn_loader/validator/rules.py,sha256=djiWi4_Y-UlC2XhwPGrZywyr56AoPfAcNpOnNMZ6w8I,3155
+ahorn_loader/validator/validator.py,sha256=qfooTPfjZ2ieqraJ3CqdqADfDFlODHm-OU_LRPK0gmM,1437
+ahorn_loader-0.2.0.dist-info/WHEEL,sha256=-neZj6nU9KAMg2CnCY6T3w8J53nx1kFGw_9HfoSzM60,79
+ahorn_loader-0.2.0.dist-info/entry_points.txt,sha256=oyQAA_k5r0sAD_lBKgQLPhpxqk0-UTagDJlsU97AJ4s,55
+ahorn_loader-0.2.0.dist-info/METADATA,sha256=WrQAi5YC7DO58MgTNtxv7IlJ3qqs8H-mkX-JcXu9D2s,3020
+ahorn_loader-0.2.0.dist-info/RECORD,,

{ahorn_loader-0.1.1.dist-info → ahorn_loader-0.2.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: uv 0.8.11
+Generator: uv 0.8.22
 Root-Is-Purelib: true
 Tag: py3-none-any

ahorn_loader-0.1.1.dist-info/METADATA DELETED Viewed

@@ -1,53 +0,0 @@
-Metadata-Version: 2.3
-Name: ahorn-loader
-Version: 0.1.1
-Summary: Library and command-line application to interact with datasets in the Aachen Higher-Order Repository of Networks.
-Author: Florian Frantzen
-Author-email: Florian Frantzen <florian.frantzen@cs.rwth-aachen.de>
-Requires-Dist: requests>=2.32.4
-Requires-Dist: typer>=0.16.0
-Requires-Python: >=3.12
-Description-Content-Type: text/markdown
-# `ahorn-loader`
-Library and command-line application to interact with datasets in [AHORN](https://ahorn.rwth-aachen.de/).
-## Usage
-`ahorn-loader` is both a command-line application and a Python package to interact with the AHORN repository for higher-order datasets.
-### Command-Line Usage
-To install and use `ahorn-loader` from the command line, you can run the following command:
-```bash
-uvx ahorn-loader [command] [args]
-```
-Commands include:
-- `ls`: List available datasets in AHORN.
-- `download`: Download a dataset from AHORN.
-- `validate`: Validate a specific dataset file (e.g., before adding it to AHORN).
-To get a full help of available commands and options, run `ahorn-loader --help`.
-### Python Package Usage
-To use `ahorn-loader` as a Python package, you can install it via `pip` (or some other package manager of your choice):
-```bash
-pip install ahorn-loader
-```
-Then, you can use it in your Python scripts:
-```python
-import ahorn_loader
-# download a dataset
-ahorn_loader.download('dataset_name', 'target_path')
-# validate a specific dataset (e.g., before adding it to AHORN)
-ahorn_loader.validate('path_to_dataset_file')
-```

ahorn_loader-0.1.1.dist-info/RECORD DELETED Viewed

@@ -1,11 +0,0 @@
-ahorn_loader/__init__.py,sha256=9040e157ab98e4fee2d9c7850d23e2ec2091f467a446ccefec4cef631e110c4c,83
-ahorn_loader/api.py,sha256=cc79aa4a691fb2e8d751fab2705794e952689814da4496259f7709cf677688ea,3726
-ahorn_loader/cli.py,sha256=d8b7bedffd43ce1cecff26b994652621343d60d4f2269d0cca6ed3cf1edef01e,2271
-ahorn_loader/py.typed,sha256=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855,0
-ahorn_loader/validator/__init__.py,sha256=b7219ba8c333ce418f237a446fdb81009a0d30651db3f59d53fe79ef9bc604cf,84
-ahorn_loader/validator/rules.py,sha256=7638968b8fd8f94942d978703c6ad9cb0cabe7a0283df01c3693a734c67ac3c2,3155
-ahorn_loader/validator/validator.py,sha256=a9fa284cf7e367689eaab689dc2a9da800df0c594e0c79be394fcb44f2b48263,1437
-ahorn_loader-0.1.1.dist-info/WHEEL,sha256=0f7d664a881437bddec71c703c3c2f01fd13581519f95130abcc96e296ef0426,79
-ahorn_loader-0.1.1.dist-info/entry_points.txt,sha256=a3240003f939af4b000ff9412a040b3e1a71aa4d3e5136a00c996c53dec0278b,55
-ahorn_loader-0.1.1.dist-info/METADATA,sha256=b48c03747dadce166fd95f3c948ba621a4a7fdad109968bf032b96cc44d73f6b,1555
-ahorn_loader-0.1.1.dist-info/RECORD,,

{ahorn_loader-0.1.1.dist-info → ahorn_loader-0.2.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

ahorn-loader 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

Potentially problematic release.

ahorn-loader 0.1.1py3-none-any.whl → 0.2.0py3-none-any.whl