apifetch 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,24 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main, master]
6
+ pull_request:
7
+ branches: [main, master]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ fail-fast: false
14
+ matrix:
15
+ python-version: ["3.9", "3.11", "3.13"]
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+ - uses: actions/setup-python@v5
19
+ with:
20
+ python-version: ${{ matrix.python-version }}
21
+ - name: Install
22
+ run: pip install -e ".[dev]"
23
+ - name: Test
24
+ run: pytest -q
@@ -0,0 +1,22 @@
1
+ name: docs
2
+
3
+ on:
4
+ push:
5
+ branches: [main, master]
6
+ workflow_dispatch:
7
+
8
+ permissions:
9
+ contents: write
10
+
11
+ jobs:
12
+ deploy:
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+ - uses: actions/setup-python@v5
17
+ with:
18
+ python-version: "3.12"
19
+ - name: Install docs deps
20
+ run: pip install -e ".[docs]"
21
+ - name: Build & deploy to GitHub Pages
22
+ run: mkdocs gh-deploy --force
@@ -0,0 +1,37 @@
1
+ name: release
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+ workflow_dispatch:
7
+
8
+ jobs:
9
+ build:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+ - uses: actions/setup-python@v5
14
+ with:
15
+ python-version: "3.12"
16
+ - name: Build sdist and wheel
17
+ run: |
18
+ pip install build
19
+ python -m build
20
+ - uses: actions/upload-artifact@v4
21
+ with:
22
+ name: dist
23
+ path: dist/
24
+
25
+ publish:
26
+ needs: build
27
+ runs-on: ubuntu-latest
28
+ environment: pypi
29
+ permissions:
30
+ id-token: write # required for PyPI Trusted Publishing (OIDC)
31
+ steps:
32
+ - uses: actions/download-artifact@v4
33
+ with:
34
+ name: dist
35
+ path: dist/
36
+ - name: Publish to PyPI
37
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,13 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ .eggs/
5
+ build/
6
+ dist/
7
+ .venv/
8
+ venv/
9
+ .pytest_cache/
10
+ .mypy_cache/
11
+ .ruff_cache/
12
+ site/
13
+ .DS_Store
apifetch-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 André Leite
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,108 @@
1
+ Metadata-Version: 2.4
2
+ Name: apifetch
3
+ Version: 0.1.0
4
+ Summary: A generic toolkit for token-authenticated REST API retrieval.
5
+ Project-URL: Homepage, https://github.com/StrategicProjects/apifetch-py
6
+ Project-URL: Repository, https://github.com/StrategicProjects/apifetch-py
7
+ Project-URL: R sibling, https://github.com/StrategicProjects/apifetch
8
+ Author-email: André Leite <leite@de.ufpe.br>
9
+ License: MIT License
10
+
11
+ Copyright (c) 2026 André Leite
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy
14
+ of this software and associated documentation files (the "Software"), to deal
15
+ in the Software without restriction, including without limitation the rights
16
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
+ copies of the Software, and to permit persons to whom the Software is
18
+ furnished to do so, subject to the following conditions:
19
+
20
+ The above copyright notice and this permission notice shall be included in all
21
+ copies or substantial portions of the Software.
22
+
23
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
+ SOFTWARE.
30
+ License-File: LICENSE
31
+ Keywords: api,client,http,pagination,rest,token
32
+ Classifier: Development Status :: 3 - Alpha
33
+ Classifier: Intended Audience :: Developers
34
+ Classifier: License :: OSI Approved :: MIT License
35
+ Classifier: Programming Language :: Python :: 3
36
+ Classifier: Topic :: Internet :: WWW/HTTP
37
+ Requires-Python: >=3.9
38
+ Requires-Dist: httpx>=0.24
39
+ Provides-Extra: dev
40
+ Requires-Dist: pandas>=1.3; extra == 'dev'
41
+ Requires-Dist: pytest>=7; extra == 'dev'
42
+ Requires-Dist: respx>=0.20; extra == 'dev'
43
+ Provides-Extra: docs
44
+ Requires-Dist: mkdocs-material>=9; extra == 'docs'
45
+ Requires-Dist: mkdocstrings[python]>=0.24; extra == 'docs'
46
+ Provides-Extra: pandas
47
+ Requires-Dist: pandas>=1.3; extra == 'pandas'
48
+ Description-Content-Type: text/markdown
49
+
50
+ <p align="center">
51
+ <img src="docs/assets/logo.png" width="200" alt="apifetch">
52
+ </p>
53
+
54
+ # apifetch (Python)
55
+
56
+ `apifetch` is a small, dependency-light toolkit for talking to
57
+ token-authenticated REST APIs. It handles three recurring chores:
58
+
59
+ 1. **Token management** — store/get/remove/list tokens in process environment
60
+ variables (never written to disk), namespaced per service.
61
+ 2. **Request building** — pluggable **authentication** and **pagination**
62
+ strategies, bundled into a reusable `Api` profile.
63
+ 3. **Data retrieval** — fetch one page, or fetch everything in chunks.
64
+
65
+ This is the Python sibling of the R package
66
+ [apifetch](https://github.com/StrategicProjects/apifetch). Both were extracted
67
+ from the [BigDataPE](https://github.com/StrategicProjects/BigDataPE) package,
68
+ which is now one *use case* (see `examples/bigdatape.py`).
69
+
70
+ ## Installation
71
+
72
+ ```bash
73
+ pip install apifetch
74
+ ```
75
+
76
+ ## Usage
77
+
78
+ ```python
79
+ import apifetch as af
80
+
81
+ # 1. Describe the API once: where, how to authenticate, how to paginate.
82
+ api = af.Api(
83
+ endpoint="https://api.example.com/v1/search",
84
+ service="Example",
85
+ auth=af.AuthBearer(), # "Authorization: Bearer <token>"
86
+ pagination=af.PaginateOffset(where="query"),
87
+ )
88
+
89
+ # 2. Store a token (kept only in this process's environment).
90
+ af.store_token("reports", "my-secret-token", service="Example")
91
+
92
+ # 3. Fetch.
93
+ one_page = af.fetch(api, "reports", limit=50)
94
+ everything = af.fetch_all(api, "reports", chunk_size=1000)
95
+
96
+ # Optional: turn it into a DataFrame.
97
+ # import pandas as pd; df = pd.DataFrame(everything)
98
+ ```
99
+
100
+ ### Strategies
101
+
102
+ **Authentication:** `AuthBearer`, `AuthRaw`, `AuthHeader`, `AuthQuery`.
103
+
104
+ **Pagination:** `PaginateOffset(where="header" | "query")`, `PaginateNone`.
105
+
106
+ ## License
107
+
108
+ MIT © André Leite
@@ -0,0 +1,59 @@
1
+ <p align="center">
2
+ <img src="docs/assets/logo.png" width="200" alt="apifetch">
3
+ </p>
4
+
5
+ # apifetch (Python)
6
+
7
+ `apifetch` is a small, dependency-light toolkit for talking to
8
+ token-authenticated REST APIs. It handles three recurring chores:
9
+
10
+ 1. **Token management** — store/get/remove/list tokens in process environment
11
+ variables (never written to disk), namespaced per service.
12
+ 2. **Request building** — pluggable **authentication** and **pagination**
13
+ strategies, bundled into a reusable `Api` profile.
14
+ 3. **Data retrieval** — fetch one page, or fetch everything in chunks.
15
+
16
+ This is the Python sibling of the R package
17
+ [apifetch](https://github.com/StrategicProjects/apifetch). Both were extracted
18
+ from the [BigDataPE](https://github.com/StrategicProjects/BigDataPE) package,
19
+ which is now one *use case* (see `examples/bigdatape.py`).
20
+
21
+ ## Installation
22
+
23
+ ```bash
24
+ pip install apifetch
25
+ ```
26
+
27
+ ## Usage
28
+
29
+ ```python
30
+ import apifetch as af
31
+
32
+ # 1. Describe the API once: where, how to authenticate, how to paginate.
33
+ api = af.Api(
34
+ endpoint="https://api.example.com/v1/search",
35
+ service="Example",
36
+ auth=af.AuthBearer(), # "Authorization: Bearer <token>"
37
+ pagination=af.PaginateOffset(where="query"),
38
+ )
39
+
40
+ # 2. Store a token (kept only in this process's environment).
41
+ af.store_token("reports", "my-secret-token", service="Example")
42
+
43
+ # 3. Fetch.
44
+ one_page = af.fetch(api, "reports", limit=50)
45
+ everything = af.fetch_all(api, "reports", chunk_size=1000)
46
+
47
+ # Optional: turn it into a DataFrame.
48
+ # import pandas as pd; df = pd.DataFrame(everything)
49
+ ```
50
+
51
+ ### Strategies
52
+
53
+ **Authentication:** `AuthBearer`, `AuthRaw`, `AuthHeader`, `AuthQuery`.
54
+
55
+ **Pagination:** `PaginateOffset(where="header" | "query")`, `PaginateNone`.
56
+
57
+ ## License
58
+
59
+ MIT © André Leite
@@ -0,0 +1,15 @@
1
+ <svg xmlns="http://www.w3.org/2000/svg"
2
+ viewBox="6 -6 188 212" preserveAspectRatio="xMidYMid meet"
3
+ role="img" aria-label="apifetch for Python">
4
+ <title>apifetch (Python)</title>
5
+ <defs>
6
+ <linearGradient id="hexpy" x1="0" y1="0" x2="0" y2="1">
7
+ <stop offset="0" stop-color="#22357d"/>
8
+ <stop offset="1" stop-color="#1a2a65"/>
9
+ </linearGradient>
10
+ </defs>
11
+ <path d="M100,0 L186.6,50 L186.6,150 L100,200 L13.4,150 L13.4,50 Z"
12
+ fill="url(#hexpy)" stroke="#ffd43b" stroke-width="7" stroke-linejoin="round"/>
13
+ <path d="M84,46 L116,46 L116,104 L142,104 L100,150 L58,104 L84,104 Z"
14
+ fill="#4b8bbe" stroke="#1a2a65" stroke-width="3" stroke-linejoin="round"/>
15
+ </svg>
Binary file
Binary file
Binary file
@@ -0,0 +1,33 @@
1
+ <svg xmlns="http://www.w3.org/2000/svg"
2
+ width="246.5" height="280" viewBox="1.4 -12 197.2 224"
3
+ preserveAspectRatio="xMidYMid meet"
4
+ role="img" aria-label="apifetch for Python">
5
+ <title>apifetch (Python)</title>
6
+ <defs>
7
+ <style type="text/css">
8
+ text { font-family: 'Jost', 'Helvetica Neue', Arial, sans-serif; }
9
+ </style>
10
+ <linearGradient id="hexpy" x1="0" y1="0" x2="0" y2="1">
11
+ <stop offset="0" stop-color="#22357d"/>
12
+ <stop offset="1" stop-color="#1a2a65"/>
13
+ </linearGradient>
14
+ </defs>
15
+
16
+ <!-- hexagon (regular: every edge length 100, uniform 5px stroke) -->
17
+ <path d="M100,0 L186.6,50 L186.6,150 L100,200 L13.4,150 L13.4,50 Z"
18
+ fill="url(#hexpy)" stroke="#ffd43b" stroke-width="5" stroke-linejoin="round"/>
19
+
20
+ <!-- fetch arrow (download = retrieve) in Python blue -->
21
+ <path d="M90,22 L110,22 L110,62 L126,62 L100,90 L74,62 L90,62 Z"
22
+ fill="#4b8bbe" stroke="#1a2a65" stroke-width="2.5" stroke-linejoin="round"/>
23
+
24
+ <!-- wordmark: textLength locks the width so font substitution can't overflow -->
25
+ <text x="100" y="112" text-anchor="middle" font-weight="bold"
26
+ fill="#ececec" font-size="28" textLength="130"
27
+ lengthAdjust="spacingAndGlyphs" dominant-baseline="central">apifetch</text>
28
+
29
+ <!-- "python" badge, kept in the full-width band -->
30
+ <text x="100" y="136" text-anchor="middle" font-weight="bold"
31
+ fill="#ffd43b" font-size="14" textLength="92"
32
+ lengthAdjust="spacingAndGlyphs" dominant-baseline="central">PYTHON</text>
33
+ </svg>
@@ -0,0 +1,48 @@
1
+ # Use case: the Big Data PE API
2
+
3
+ `apifetch` was extracted from the
4
+ [BigDataPE](https://github.com/StrategicProjects/BigDataPE) package, which wraps
5
+ the **Big Data PE** platform — a public-data REST API run by the Government of
6
+ the State of Pernambuco, Brazil. This page is the canonical worked example of
7
+ configuring the package for a real API.
8
+
9
+ ## What is specific about Big Data PE
10
+
11
+ | Convention | Strategy |
12
+ |---|---|
13
+ | Token sent *verbatim* in the `Authorization` header (no `Bearer`) | `AuthRaw()` |
14
+ | `limit`/`offset` sent as **HTTP headers**, not query params | `PaginateOffset(where="header")` |
15
+ | Responses carry a status column `"Mensagem"` to drop | `drop_cols=("Mensagem",)` |
16
+ | Reachable only from the PE Conectado network / VPN | `connect_hint=...` |
17
+
18
+ ## Defining the profile
19
+
20
+ ```python
21
+ import apifetch as af
22
+
23
+ bigdatape = af.Api(
24
+ endpoint="https://www.bigdata.pe.gov.br/api/buscar",
25
+ service="BigDataPE",
26
+ auth=af.AuthRaw(),
27
+ pagination=af.PaginateOffset(where="header"),
28
+ drop_cols=("Mensagem",),
29
+ connect_hint="Ensure you are on the PE Conectado network or VPN.",
30
+ )
31
+ ```
32
+
33
+ ## Storing a token and fetching
34
+
35
+ ```python
36
+ af.store_token("dengue", "your-token-here", service="BigDataPE")
37
+
38
+ # A single page of 50 records
39
+ dengue = af.fetch(bigdatape, "dengue", limit=50)
40
+
41
+ # Everything, in chunks, with a progress message per chunk
42
+ dengue_all = af.fetch_all(bigdatape, "dengue", chunk_size=50_000, verbosity=1)
43
+ ```
44
+
45
+ !!! note "On language"
46
+ Function and parameter names are English, but the API's response keys and
47
+ some values are Portuguese (e.g. `nu_notificacao`, `"BOA VIAGEM"`). That is
48
+ intentional — they come straight from the upstream service.
@@ -0,0 +1,50 @@
1
+ <p align="center">
2
+ <img src="assets/logo.svg" width="200" alt="apifetch">
3
+ </p>
4
+
5
+ # apifetch
6
+
7
+ A small, dependency-light toolkit for talking to **token-authenticated REST
8
+ APIs**. It handles three recurring chores:
9
+
10
+ 1. **Token management** — store/get/remove/list tokens in process environment
11
+ variables (never written to disk), namespaced per service.
12
+ 2. **Request building** — pluggable **authentication** and **pagination**
13
+ strategies, bundled into a reusable [`Api`](reference.md) profile.
14
+ 3. **Data retrieval** — fetch one page, or fetch everything in chunks.
15
+
16
+ This is the Python sibling of the R package
17
+ [apifetch](https://github.com/StrategicProjects/apifetch).
18
+
19
+ ## Installation
20
+
21
+ ```bash
22
+ pip install apifetch
23
+ ```
24
+
25
+ ## Quick start
26
+
27
+ ```python
28
+ import apifetch as af
29
+
30
+ api = af.Api(
31
+ endpoint="https://api.example.com/v1/search",
32
+ service="Example",
33
+ auth=af.AuthBearer(), # "Authorization: Bearer <token>"
34
+ pagination=af.PaginateOffset(where="query"),
35
+ )
36
+
37
+ af.store_token("reports", "my-secret-token", service="Example")
38
+
39
+ one_page = af.fetch(api, "reports", limit=50)
40
+ everything = af.fetch_all(api, "reports", chunk_size=1000)
41
+ ```
42
+
43
+ ### Strategies
44
+
45
+ **Authentication:** `AuthBearer`, `AuthRaw`, `AuthHeader`, `AuthQuery`.
46
+
47
+ **Pagination:** `PaginateOffset(where="header" | "query")`, `PaginateNone`.
48
+
49
+ See the [BigDataPE use case](bigdatape.md) for a real-world configuration, or the
50
+ full [API reference](reference.md).
@@ -0,0 +1,41 @@
1
+ # API reference
2
+
3
+ ## Fetching
4
+
5
+ ::: apifetch.fetch
6
+
7
+ ::: apifetch.fetch_all
8
+
9
+ ## API profile
10
+
11
+ ::: apifetch.Api
12
+
13
+ ## Authentication strategies
14
+
15
+ ::: apifetch.AuthRaw
16
+
17
+ ::: apifetch.AuthBearer
18
+
19
+ ::: apifetch.AuthHeader
20
+
21
+ ::: apifetch.AuthQuery
22
+
23
+ ## Pagination strategies
24
+
25
+ ::: apifetch.PaginateOffset
26
+
27
+ ::: apifetch.PaginateNone
28
+
29
+ ## Token management
30
+
31
+ ::: apifetch.store_token
32
+
33
+ ::: apifetch.get_token
34
+
35
+ ::: apifetch.remove_token
36
+
37
+ ::: apifetch.list_tokens
38
+
39
+ ## Errors
40
+
41
+ ::: apifetch.ApiError
@@ -0,0 +1,43 @@
1
+ """Use case: the Big Data PE API.
2
+
3
+ `apifetch` was extracted from the BigDataPE package, which wraps the Big Data PE
4
+ platform — a public-data REST API run by the Government of the State of
5
+ Pernambuco, Brazil. This is the canonical worked example of configuring the
6
+ package for a real API.
7
+
8
+ What is specific about Big Data PE:
9
+
10
+ - Authentication sends the token *verbatim* in the ``Authorization`` header
11
+ (no ``Bearer`` prefix) -> ``AuthRaw()``
12
+ - Pagination sends ``limit``/``offset`` as HTTP *headers*, not query params
13
+ -> ``PaginateOffset(where="header")``
14
+ - Responses carry a status column ``"Mensagem"`` we drop -> ``drop_cols``
15
+ - The API is reachable only from the PE Conectado network or VPN -> ``connect_hint``
16
+ """
17
+
18
+ import apifetch as af
19
+
20
+ bigdatape = af.Api(
21
+ endpoint="https://www.bigdata.pe.gov.br/api/buscar",
22
+ service="BigDataPE",
23
+ auth=af.AuthRaw(),
24
+ pagination=af.PaginateOffset(where="header"),
25
+ drop_cols=("Mensagem",),
26
+ connect_hint="Ensure you are on the PE Conectado network or VPN.",
27
+ )
28
+
29
+ if __name__ == "__main__":
30
+ # Store your token (kept only in this process's environment).
31
+ af.store_token("dengue", "your-token-here", service="BigDataPE")
32
+
33
+ # A single page of 50 records.
34
+ dengue = af.fetch(bigdatape, "dengue", limit=50)
35
+ print(f"got {len(dengue)} records")
36
+
37
+ # Everything, in chunks, with a progress message per chunk.
38
+ dengue_all = af.fetch_all(bigdatape, "dengue", chunk_size=50_000, verbosity=1)
39
+ print(f"got {len(dengue_all)} records total")
40
+
41
+ # Optional: a DataFrame.
42
+ # import pandas as pd
43
+ # df = pd.DataFrame(dengue_all)
@@ -0,0 +1,45 @@
1
+ site_name: apifetch (Python)
2
+ site_description: A generic toolkit for token-authenticated REST API retrieval.
3
+ site_url: https://strategicprojects.github.io/apifetch-py/
4
+ repo_url: https://github.com/StrategicProjects/apifetch-py
5
+ repo_name: StrategicProjects/apifetch-py
6
+
7
+ theme:
8
+ name: material
9
+ logo: assets/logo.svg
10
+ favicon: assets/favicon.png
11
+ palette:
12
+ - scheme: default
13
+ primary: indigo
14
+ accent: amber
15
+ toggle:
16
+ icon: material/brightness-7
17
+ name: Switch to dark mode
18
+ - scheme: slate
19
+ primary: indigo
20
+ accent: amber
21
+ toggle:
22
+ icon: material/brightness-4
23
+ name: Switch to light mode
24
+ features:
25
+ - navigation.sections
26
+ - content.code.copy
27
+
28
+ plugins:
29
+ - search
30
+ - mkdocstrings:
31
+ handlers:
32
+ python:
33
+ options:
34
+ show_source: false
35
+ docstring_style: google
36
+
37
+ nav:
38
+ - Home: index.md
39
+ - "Use case — BigDataPE": bigdatape.md
40
+ - API reference: reference.md
41
+
42
+ markdown_extensions:
43
+ - admonition
44
+ - pymdownx.highlight
45
+ - pymdownx.superfences
@@ -0,0 +1,37 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "apifetch"
7
+ version = "0.1.0"
8
+ description = "A generic toolkit for token-authenticated REST API retrieval."
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = { file = "LICENSE" }
12
+ authors = [{ name = "André Leite", email = "leite@de.ufpe.br" }]
13
+ keywords = ["api", "rest", "http", "token", "pagination", "client"]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Intended Audience :: Developers",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Programming Language :: Python :: 3",
19
+ "Topic :: Internet :: WWW/HTTP",
20
+ ]
21
+ dependencies = ["httpx>=0.24"]
22
+
23
+ [project.optional-dependencies]
24
+ pandas = ["pandas>=1.3"]
25
+ dev = ["pytest>=7", "respx>=0.20", "pandas>=1.3"]
26
+ docs = ["mkdocs-material>=9", "mkdocstrings[python]>=0.24"]
27
+
28
+ [project.urls]
29
+ Homepage = "https://github.com/StrategicProjects/apifetch-py"
30
+ Repository = "https://github.com/StrategicProjects/apifetch-py"
31
+ "R sibling" = "https://github.com/StrategicProjects/apifetch"
32
+
33
+ [tool.hatch.build.targets.wheel]
34
+ packages = ["src/apifetch"]
35
+
36
+ [tool.pytest.ini_options]
37
+ testpaths = ["tests"]
@@ -0,0 +1,55 @@
1
+ """apifetch — a generic toolkit for token-authenticated REST API retrieval.
2
+
3
+ Quick start::
4
+
5
+ import apifetch as af
6
+
7
+ api = af.Api(
8
+ endpoint="https://api.example.com/v1/search",
9
+ service="Example",
10
+ auth=af.AuthBearer(),
11
+ pagination=af.PaginateOffset(where="query"),
12
+ )
13
+ af.store_token("reports", "my-secret-token", service="Example")
14
+ rows = af.fetch_all(api, "reports", chunk_size=1000)
15
+
16
+ This is the Python sibling of the R package ``apifetch``.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from .api import (
22
+ Api,
23
+ Auth,
24
+ AuthBearer,
25
+ AuthHeader,
26
+ AuthQuery,
27
+ AuthRaw,
28
+ PaginateNone,
29
+ PaginateOffset,
30
+ Pagination,
31
+ )
32
+ from .fetch import ApiError, fetch, fetch_all
33
+ from .tokens import get_token, list_tokens, remove_token, store_token
34
+
35
+ __version__ = "0.1.0"
36
+
37
+ __all__ = [
38
+ "Api",
39
+ "Auth",
40
+ "AuthRaw",
41
+ "AuthBearer",
42
+ "AuthHeader",
43
+ "AuthQuery",
44
+ "Pagination",
45
+ "PaginateOffset",
46
+ "PaginateNone",
47
+ "fetch",
48
+ "fetch_all",
49
+ "ApiError",
50
+ "store_token",
51
+ "get_token",
52
+ "remove_token",
53
+ "list_tokens",
54
+ "__version__",
55
+ ]
@@ -0,0 +1,31 @@
1
+ """Internal helpers shared across the package."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import math
6
+ import unicodedata
7
+
8
+
9
+ def sanitize_name(value: str) -> str:
10
+ """Transliterate to ASCII (dropping accents) and turn spaces into underscores.
11
+
12
+ This matches the contract shared by every token function so that the same
13
+ environment-variable name is computed everywhere.
14
+ """
15
+ nfkd = unicodedata.normalize("NFKD", value)
16
+ ascii_only = nfkd.encode("ascii", "ignore").decode("ascii")
17
+ return ascii_only.replace(" ", "_")
18
+
19
+
20
+ def token_var(name: str, service: str) -> str:
21
+ """Build the environment-variable name for a token: ``<service>_<name>``."""
22
+ return f"{sanitize_name(service)}_{sanitize_name(name)}"
23
+
24
+
25
+ def is_unset(value) -> bool:
26
+ """True for values that mean "no pagination bound": ``None``, ``<= 0``, ``inf``."""
27
+ if value is None:
28
+ return True
29
+ if isinstance(value, float) and math.isinf(value):
30
+ return True
31
+ return value <= 0
@@ -0,0 +1,168 @@
1
+ """API profiles: authentication and pagination strategies.
2
+
3
+ An :class:`Api` describes *where* to call, *how* to authenticate, and *how* to
4
+ paginate. Auth and pagination are pluggable strategy objects, so the same fetch
5
+ functions work against APIs with different conventions.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass, field
11
+
12
+ from ._utils import is_unset
13
+
14
+ __all__ = [
15
+ "Auth",
16
+ "AuthRaw",
17
+ "AuthBearer",
18
+ "AuthHeader",
19
+ "AuthQuery",
20
+ "Pagination",
21
+ "PaginateOffset",
22
+ "PaginateNone",
23
+ "Api",
24
+ ]
25
+
26
+
27
+ # ---- Authentication strategies -------------------------------------------
28
+
29
+
30
+ class Auth:
31
+ """Base authentication strategy.
32
+
33
+ Subclasses contribute to a request's headers and/or query parameters.
34
+ """
35
+
36
+ def headers(self, token: str) -> dict:
37
+ return {}
38
+
39
+ def params(self, token: str) -> dict:
40
+ return {}
41
+
42
+
43
+ @dataclass
44
+ class AuthRaw(Auth):
45
+ """Send the token verbatim in a header (default ``Authorization``).
46
+
47
+ This is what the Big Data PE API expects.
48
+ """
49
+
50
+ header: str = "Authorization"
51
+
52
+ def headers(self, token: str) -> dict:
53
+ return {self.header: token}
54
+
55
+
56
+ @dataclass
57
+ class AuthBearer(Auth):
58
+ """Send ``"<prefix><token>"`` in a header (default ``Authorization: Bearer``)."""
59
+
60
+ header: str = "Authorization"
61
+ prefix: str = "Bearer "
62
+
63
+ def headers(self, token: str) -> dict:
64
+ return {self.header: f"{self.prefix}{token}"}
65
+
66
+
67
+ @dataclass
68
+ class AuthHeader(Auth):
69
+ """Send the token in an arbitrary header (e.g. ``X-API-Key``)."""
70
+
71
+ header: str = "X-API-Key"
72
+
73
+ def headers(self, token: str) -> dict:
74
+ return {self.header: token}
75
+
76
+
77
+ @dataclass
78
+ class AuthQuery(Auth):
79
+ """Send the token as a URL query parameter."""
80
+
81
+ param: str = "api_key"
82
+
83
+ def params(self, token: str) -> dict:
84
+ return {self.param: token}
85
+
86
+
87
+ # ---- Pagination strategies -----------------------------------------------
88
+
89
+
90
+ class Pagination:
91
+ """Base pagination strategy."""
92
+
93
+ def headers(self, limit, offset) -> dict:
94
+ return {}
95
+
96
+ def params(self, limit, offset) -> dict:
97
+ return {}
98
+
99
+
100
+ @dataclass
101
+ class PaginateOffset(Pagination):
102
+ """Send ``limit``/``offset`` as HTTP headers (default) or query parameters.
103
+
104
+ Non-positive, ``None`` and infinite values are omitted.
105
+ """
106
+
107
+ where: str = "header" # "header" or "query"
108
+ limit_param: str = "limit"
109
+ offset_param: str = "offset"
110
+
111
+ def _values(self, limit, offset) -> dict:
112
+ vals = {}
113
+ if not is_unset(limit):
114
+ vals[self.limit_param] = int(limit)
115
+ if not is_unset(offset):
116
+ vals[self.offset_param] = int(offset)
117
+ return vals
118
+
119
+ def headers(self, limit, offset) -> dict:
120
+ # HTTP header values must be strings.
121
+ if self.where != "header":
122
+ return {}
123
+ return {k: str(v) for k, v in self._values(limit, offset).items()}
124
+
125
+ def params(self, limit, offset) -> dict:
126
+ return self._values(limit, offset) if self.where == "query" else {}
127
+
128
+
129
+ @dataclass
130
+ class PaginateNone(Pagination):
131
+ """Send no pagination parameters."""
132
+
133
+
134
+ # ---- API profile ----------------------------------------------------------
135
+
136
+
137
+ @dataclass
138
+ class Api:
139
+ """Describe an API endpoint together with its auth and pagination strategies.
140
+
141
+ Args:
142
+ endpoint: The base API URL.
143
+ service: Namespace used to look up the token (see :func:`get_token`).
144
+ auth: An :class:`Auth` strategy. Defaults to bearer-token auth.
145
+ pagination: A :class:`Pagination` strategy. Defaults to offset paging
146
+ sent as HTTP headers.
147
+ drop_cols: Keys to drop from each record after parsing (e.g. a status
148
+ column).
149
+ connect_hint: Optional extra line shown on a connection error (e.g. a
150
+ VPN requirement).
151
+ """
152
+
153
+ endpoint: str
154
+ service: str = "apifetch"
155
+ auth: Auth = field(default_factory=AuthBearer)
156
+ pagination: Pagination = field(default_factory=PaginateOffset)
157
+ drop_cols: tuple[str, ...] = ()
158
+ connect_hint: str | None = None
159
+
160
+ def __post_init__(self):
161
+ if not self.endpoint:
162
+ raise ValueError("endpoint must be a non-empty string.")
163
+ if not isinstance(self.auth, Auth):
164
+ raise TypeError("auth must be an Auth instance (e.g. AuthBearer()).")
165
+ if not isinstance(self.pagination, Pagination):
166
+ raise TypeError(
167
+ "pagination must be a Pagination instance (e.g. PaginateOffset())."
168
+ )
@@ -0,0 +1,152 @@
1
+ """Data fetching: single page and chunked retrieval."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import math
6
+ import sys
7
+ from typing import Any, Optional
8
+
9
+ import httpx
10
+
11
+ from .api import Api
12
+ from .tokens import get_token
13
+
14
+ __all__ = ["fetch", "fetch_all", "ApiError"]
15
+
16
+
17
+ class ApiError(RuntimeError):
18
+ """Raised when the API is unreachable or returns an HTTP error."""
19
+
20
+
21
+ def _log(verbosity: int, message: str) -> None:
22
+ if verbosity > 0:
23
+ print(message, file=sys.stderr)
24
+
25
+
26
+ def fetch(
27
+ api: Api,
28
+ name: str,
29
+ limit: Optional[float] = None,
30
+ offset: int = 0,
31
+ query: Optional[dict] = None,
32
+ verbosity: int = 0,
33
+ client: Optional[httpx.Client] = None,
34
+ ) -> list[dict[str, Any]]:
35
+ """Fetch a single page from ``api`` and return it as a list of records.
36
+
37
+ Args:
38
+ api: An :class:`Api` profile.
39
+ name: Token name to authenticate with (looked up via ``api.service``).
40
+ limit: Maximum records to request. ``None``/``inf`` means no limit.
41
+ offset: Starting record (omitted when ``0``).
42
+ query: Extra query-string filters.
43
+ verbosity: ``0`` silent, ``>=1`` progress messages.
44
+ client: Optional pre-built ``httpx.Client`` (useful for testing or
45
+ connection reuse). One is created and closed per call otherwise.
46
+
47
+ Returns:
48
+ The parsed JSON body as a list of dictionaries.
49
+ """
50
+ if not isinstance(api, Api):
51
+ raise TypeError("api must be an Api instance (see apifetch.Api).")
52
+ query = query or {}
53
+
54
+ token = get_token(name, service=api.service)
55
+ if token is None:
56
+ raise ApiError(
57
+ f"No token available for {name!r}; store one with apifetch.store_token()."
58
+ )
59
+
60
+ headers = {**api.auth.headers(token), **api.pagination.headers(limit, offset)}
61
+ params = {**query, **api.auth.params(token), **api.pagination.params(limit, offset)}
62
+
63
+ owns_client = client is None
64
+ client = client or httpx.Client()
65
+ try:
66
+ try:
67
+ resp = client.get(api.endpoint, headers=headers, params=params)
68
+ except httpx.RequestError as exc:
69
+ hint = f" {api.connect_hint}" if api.connect_hint else ""
70
+ raise ApiError(
71
+ f"Unable to connect to the API at {api.endpoint}. "
72
+ f"Check your network connection.{hint} ({exc})"
73
+ ) from exc
74
+ finally:
75
+ if owns_client:
76
+ client.close()
77
+
78
+ if resp.status_code >= 400:
79
+ raise ApiError(
80
+ f"The API returned an error (HTTP {resp.status_code} - {resp.reason_phrase}). "
81
+ "Try again later, and check that the endpoint and token are valid."
82
+ )
83
+
84
+ data = resp.json()
85
+ if isinstance(data, dict):
86
+ data = [data]
87
+ _log(verbosity, f"i Fetched {len(data)} records.")
88
+ return data
89
+
90
+
91
+ def fetch_all(
92
+ api: Api,
93
+ name: str,
94
+ total_limit: Optional[float] = None,
95
+ chunk_size: int = 50_000,
96
+ query: Optional[dict] = None,
97
+ verbosity: int = 0,
98
+ client: Optional[httpx.Client] = None,
99
+ ) -> list[dict[str, Any]]:
100
+ """Fetch every record by paging through ``api`` in chunks.
101
+
102
+ Iteratively calls :func:`fetch` with an advancing ``offset`` until a chunk
103
+ comes back empty or ``total_limit`` is reached. Columns listed in
104
+ ``api.drop_cols`` are removed from each record.
105
+ """
106
+ if not isinstance(api, Api):
107
+ raise TypeError("api must be an Api instance (see apifetch.Api).")
108
+ if chunk_size <= 0:
109
+ raise ValueError("chunk_size must be a positive integer.")
110
+
111
+ total = math.inf if total_limit is None else total_limit
112
+ offset = 0
113
+ fetched = 0
114
+ records: list[dict[str, Any]] = []
115
+
116
+ owns_client = client is None
117
+ client = client or httpx.Client()
118
+ try:
119
+ while True:
120
+ current_limit = int(min(chunk_size, total - fetched))
121
+ if current_limit <= 0:
122
+ break
123
+
124
+ chunk = fetch(
125
+ api, name,
126
+ limit=current_limit,
127
+ offset=offset,
128
+ query=query,
129
+ verbosity=verbosity,
130
+ client=client,
131
+ )
132
+ if api.drop_cols:
133
+ chunk = [
134
+ {k: v for k, v in row.items() if k not in api.drop_cols}
135
+ for row in chunk
136
+ ]
137
+ if not chunk:
138
+ break
139
+
140
+ records.extend(chunk)
141
+ fetched += len(chunk)
142
+ offset += len(chunk)
143
+ _log(verbosity, f"i Fetched {len(chunk)} records (total: {fetched}).")
144
+
145
+ if fetched >= total:
146
+ break
147
+ finally:
148
+ if owns_client:
149
+ client.close()
150
+
151
+ _log(verbosity, f"✓ Fetching complete: {len(records)} records retrieved.")
152
+ return records
@@ -0,0 +1,67 @@
1
+ """Token management via environment variables.
2
+
3
+ Tokens are never written to disk; they live only in process environment
4
+ variables named ``<service>_<name>``. ``service`` acts as a namespace so a single
5
+ process can hold tokens for several different APIs without clashing.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import os
11
+
12
+ from ._utils import sanitize_name, token_var
13
+
14
+ __all__ = ["store_token", "get_token", "remove_token", "list_tokens"]
15
+
16
+
17
+ def store_token(name: str, token: str, service: str = "apifetch") -> None:
18
+ """Store ``token`` for ``name`` in an environment variable.
19
+
20
+ Refuses to overwrite an existing, non-empty variable.
21
+ """
22
+ if not name:
23
+ raise ValueError("name must be a non-empty string.")
24
+ if not token:
25
+ raise ValueError("token must be a non-empty string.")
26
+
27
+ var = token_var(name, service)
28
+ if os.environ.get(var):
29
+ print(f"! {var} is already defined; not overwriting to avoid data loss.")
30
+ return
31
+
32
+ os.environ[var] = token
33
+ print(f"✓ Token stored in environment variable: {var}")
34
+
35
+
36
+ def get_token(name: str, service: str = "apifetch") -> str | None:
37
+ """Return the token stored for ``name``/``service``, or ``None`` if missing."""
38
+ if not name:
39
+ raise ValueError("name must be a non-empty string.")
40
+
41
+ token = os.environ.get(token_var(name, service))
42
+ if not token:
43
+ print(f"! No token found for {name!r} (service {service!r}).")
44
+ return None
45
+ return token
46
+
47
+
48
+ def remove_token(name: str, service: str = "apifetch") -> None:
49
+ """Remove the token stored for ``name``/``service`` if present."""
50
+ if not name:
51
+ raise ValueError("name must be a non-empty string.")
52
+
53
+ var = token_var(name, service)
54
+ if os.environ.get(var):
55
+ del os.environ[var]
56
+ print(f"✓ Token removed for {name!r} (service {service!r}).")
57
+ else:
58
+ print(f"! No token found for {name!r} (service {service!r}).")
59
+
60
+
61
+ def list_tokens(service: str = "apifetch") -> list[str]:
62
+ """Return the names (without the ``service`` prefix) of stored tokens."""
63
+ prefix = f"{sanitize_name(service)}_"
64
+ names = [key[len(prefix):] for key in os.environ if key.startswith(prefix)]
65
+ if not names:
66
+ print(f"i No tokens found for service {service!r}.")
67
+ return sorted(names)
@@ -0,0 +1,43 @@
1
+ import math
2
+
3
+ import pytest
4
+
5
+ import apifetch as af
6
+
7
+
8
+ def test_auth_strategies():
9
+ assert af.AuthRaw().headers("tok") == {"Authorization": "tok"}
10
+ assert af.AuthBearer().headers("tok") == {"Authorization": "Bearer tok"}
11
+ assert af.AuthHeader("X-API-Key").headers("tok") == {"X-API-Key": "tok"}
12
+ assert af.AuthQuery("api_key").params("tok") == {"api_key": "tok"}
13
+
14
+
15
+ def test_pagination_offset_header():
16
+ p = af.PaginateOffset(where="header")
17
+ assert p.headers(10, 5) == {"limit": "10", "offset": "5"} # headers are strings
18
+ assert p.params(10, 5) == {}
19
+ # inf / None / non-positive omitted
20
+ assert p.headers(math.inf, 0) == {}
21
+ assert p.headers(None, None) == {}
22
+
23
+
24
+ def test_pagination_offset_query():
25
+ p = af.PaginateOffset(where="query")
26
+ assert p.params(10, 0) == {"limit": 10}
27
+ assert p.headers(10, 0) == {}
28
+
29
+
30
+ def test_api_validation():
31
+ with pytest.raises(ValueError):
32
+ af.Api("")
33
+ with pytest.raises(TypeError):
34
+ af.Api("https://x.test", auth="nope")
35
+ with pytest.raises(TypeError):
36
+ af.Api("https://x.test", pagination="nope")
37
+
38
+
39
+ def test_api_defaults():
40
+ api = af.Api("https://x.test", service="S", drop_cols=("Mensagem",))
41
+ assert isinstance(api.auth, af.AuthBearer)
42
+ assert isinstance(api.pagination, af.PaginateOffset)
43
+ assert api.drop_cols == ("Mensagem",)
@@ -0,0 +1,78 @@
1
+ import httpx
2
+ import pytest
3
+
4
+ import apifetch as af
5
+
6
+
7
+ def _client(handler):
8
+ return httpx.Client(transport=httpx.MockTransport(handler))
9
+
10
+
11
+ def test_fetch_sends_auth_and_pagination(monkeypatch):
12
+ monkeypatch.delenv("T_ds", raising=False)
13
+ af.store_token("ds", "secret", service="T")
14
+ captured = {}
15
+
16
+ def handler(request):
17
+ captured["auth"] = request.headers.get("authorization")
18
+ captured["limit"] = request.headers.get("limit")
19
+ return httpx.Response(200, json=[{"a": 1}, {"a": 2}])
20
+
21
+ api = af.Api(
22
+ "https://x.test/api",
23
+ service="T",
24
+ auth=af.AuthRaw(),
25
+ pagination=af.PaginateOffset(where="header"),
26
+ )
27
+ rows = af.fetch(api, "ds", limit=2, client=_client(handler))
28
+
29
+ assert rows == [{"a": 1}, {"a": 2}]
30
+ assert captured["auth"] == "secret"
31
+ assert captured["limit"] == "2"
32
+ af.remove_token("ds", service="T")
33
+
34
+
35
+ def test_fetch_all_pages_until_empty(monkeypatch):
36
+ monkeypatch.delenv("T_ds", raising=False)
37
+ af.store_token("ds", "secret", service="T")
38
+ pages = [
39
+ [{"x": 1, "Mensagem": "ok"}, {"x": 2, "Mensagem": "ok"}],
40
+ [{"x": 3, "Mensagem": "ok"}],
41
+ [],
42
+ ]
43
+ calls = {"n": 0}
44
+
45
+ def handler(request):
46
+ page = pages[calls["n"]]
47
+ calls["n"] += 1
48
+ return httpx.Response(200, json=page)
49
+
50
+ api = af.Api(
51
+ "https://x.test/api",
52
+ service="T",
53
+ auth=af.AuthRaw(),
54
+ drop_cols=("Mensagem",),
55
+ )
56
+ rows = af.fetch_all(api, "ds", chunk_size=2, client=_client(handler))
57
+
58
+ assert [r["x"] for r in rows] == [1, 2, 3]
59
+ assert all("Mensagem" not in r for r in rows) # drop_cols applied
60
+ af.remove_token("ds", service="T")
61
+
62
+
63
+ def test_fetch_http_error_raises():
64
+ af.store_token("ds", "secret", service="T")
65
+
66
+ def handler(request):
67
+ return httpx.Response(503, text="down")
68
+
69
+ api = af.Api("https://x.test/api", service="T", auth=af.AuthRaw())
70
+ with pytest.raises(af.ApiError):
71
+ af.fetch(api, "ds", client=_client(handler))
72
+ af.remove_token("ds", service="T")
73
+
74
+
75
+ def test_fetch_missing_token_raises():
76
+ api = af.Api("https://x.test/api", service="Empty", auth=af.AuthRaw())
77
+ with pytest.raises(af.ApiError):
78
+ af.fetch(api, "absent")
@@ -0,0 +1,37 @@
1
+ import apifetch as af
2
+
3
+
4
+ def test_round_trip(monkeypatch):
5
+ monkeypatch.delenv("svc_alpha", raising=False)
6
+ af.store_token("alpha", "tok-123", service="svc")
7
+ assert af.get_token("alpha", service="svc") == "tok-123"
8
+ assert "alpha" in af.list_tokens(service="svc")
9
+ af.remove_token("alpha", service="svc")
10
+ assert af.get_token("alpha", service="svc") is None
11
+
12
+
13
+ def test_no_overwrite(monkeypatch):
14
+ monkeypatch.delenv("svc_beta", raising=False)
15
+ af.store_token("beta", "first", service="svc")
16
+ af.store_token("beta", "second", service="svc") # refused
17
+ assert af.get_token("beta", service="svc") == "first"
18
+
19
+
20
+ def test_accents_and_spaces(monkeypatch):
21
+ af.store_token("São Paulo", "tok", service="svc")
22
+ assert af.get_token("São Paulo", service="svc") == "tok"
23
+ # env var name is sanitized
24
+ import os
25
+ assert "svc_Sao_Paulo" in os.environ
26
+
27
+
28
+ def test_missing_returns_none():
29
+ assert af.get_token("nope", service="absent") is None
30
+
31
+
32
+ def test_invalid_inputs():
33
+ import pytest
34
+ with pytest.raises(ValueError):
35
+ af.store_token("", "tok")
36
+ with pytest.raises(ValueError):
37
+ af.store_token("x", "")