open-data-mexico 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. open_data_mexico-1.0.0/.github/workflows/ci.yml +59 -0
  2. open_data_mexico-1.0.0/.github/workflows/docs.yml +24 -0
  3. open_data_mexico-1.0.0/.github/workflows/publish.yml +47 -0
  4. open_data_mexico-1.0.0/.gitignore +13 -0
  5. open_data_mexico-1.0.0/.pre-commit-config.yaml +14 -0
  6. open_data_mexico-1.0.0/CHANGELOG.md +63 -0
  7. open_data_mexico-1.0.0/CONTRIBUTING.md +115 -0
  8. open_data_mexico-1.0.0/LICENSE +21 -0
  9. open_data_mexico-1.0.0/PKG-INFO +577 -0
  10. open_data_mexico-1.0.0/README.md +514 -0
  11. open_data_mexico-1.0.0/SECURITY.md +34 -0
  12. open_data_mexico-1.0.0/docs/.gitkeep +0 -0
  13. open_data_mexico-1.0.0/docs/categories/agricultura.md +43 -0
  14. open_data_mexico-1.0.0/docs/categories/catalogo_datos.md +43 -0
  15. open_data_mexico-1.0.0/docs/categories/ciencia_tecnologia.md +43 -0
  16. open_data_mexico-1.0.0/docs/categories/cultura.md +43 -0
  17. open_data_mexico-1.0.0/docs/categories/deporte.md +43 -0
  18. open_data_mexico-1.0.0/docs/categories/derechos_humanos.md +43 -0
  19. open_data_mexico-1.0.0/docs/categories/economia.md +43 -0
  20. open_data_mexico-1.0.0/docs/categories/educacion.md +43 -0
  21. open_data_mexico-1.0.0/docs/categories/energia.md +43 -0
  22. open_data_mexico-1.0.0/docs/categories/gobierno.md +43 -0
  23. open_data_mexico-1.0.0/docs/categories/infraestructura.md +43 -0
  24. open_data_mexico-1.0.0/docs/categories/mar_costa.md +43 -0
  25. open_data_mexico-1.0.0/docs/categories/medio_ambiente.md +43 -0
  26. open_data_mexico-1.0.0/docs/categories/migracion.md +43 -0
  27. open_data_mexico-1.0.0/docs/categories/movilidad.md +43 -0
  28. open_data_mexico-1.0.0/docs/categories/mujeres.md +43 -0
  29. open_data_mexico-1.0.0/docs/categories/multiculturalidad.md +43 -0
  30. open_data_mexico-1.0.0/docs/categories/plan_apertura_datos.md +43 -0
  31. open_data_mexico-1.0.0/docs/categories/poblacion.md +43 -0
  32. open_data_mexico-1.0.0/docs/categories/presupuesto.md +43 -0
  33. open_data_mexico-1.0.0/docs/categories/programas_sociales.md +43 -0
  34. open_data_mexico-1.0.0/docs/categories/salud.md +43 -0
  35. open_data_mexico-1.0.0/docs/categories/seguridad.md +175 -0
  36. open_data_mexico-1.0.0/docs/categories/servicios.md +43 -0
  37. open_data_mexico-1.0.0/docs/categories/telecomunicaciones.md +43 -0
  38. open_data_mexico-1.0.0/docs/categories/territorio.md +43 -0
  39. open_data_mexico-1.0.0/docs/categories/trabajo.md +43 -0
  40. open_data_mexico-1.0.0/docs/categories/turismo.md +43 -0
  41. open_data_mexico-1.0.0/docs/index.md +228 -0
  42. open_data_mexico-1.0.0/mkdocs.yml +76 -0
  43. open_data_mexico-1.0.0/open_data_mexico/__init__.py +73 -0
  44. open_data_mexico-1.0.0/open_data_mexico/_config.py +19 -0
  45. open_data_mexico-1.0.0/open_data_mexico/_http.py +101 -0
  46. open_data_mexico-1.0.0/open_data_mexico/_scrapers/__init__.py +0 -0
  47. open_data_mexico-1.0.0/open_data_mexico/_scrapers/categories.py +169 -0
  48. open_data_mexico-1.0.0/open_data_mexico/_scrapers/dataset_detail.py +195 -0
  49. open_data_mexico-1.0.0/open_data_mexico/_scrapers/datasets.py +183 -0
  50. open_data_mexico-1.0.0/open_data_mexico/_scrapers/organizations.py +101 -0
  51. open_data_mexico-1.0.0/open_data_mexico/_scrapers/search.py +102 -0
  52. open_data_mexico-1.0.0/open_data_mexico/_utils.py +71 -0
  53. open_data_mexico-1.0.0/open_data_mexico/client.py +427 -0
  54. open_data_mexico-1.0.0/open_data_mexico/models.py +205 -0
  55. open_data_mexico-1.0.0/open_data_mexico/py.typed +0 -0
  56. open_data_mexico-1.0.0/pyproject.toml +94 -0
  57. open_data_mexico-1.0.0/server/__init__.py +0 -0
  58. open_data_mexico-1.0.0/server/app.py +109 -0
  59. open_data_mexico-1.0.0/tests/__init__.py +0 -0
  60. open_data_mexico-1.0.0/tests/conftest.py +654 -0
  61. open_data_mexico-1.0.0/tests/test_categories.py +173 -0
  62. open_data_mexico-1.0.0/tests/test_dataset_detail.py +245 -0
  63. open_data_mexico-1.0.0/tests/test_datasets.py +154 -0
  64. open_data_mexico-1.0.0/tests/test_organizations.py +164 -0
  65. open_data_mexico-1.0.0/tests/test_search.py +196 -0
@@ -0,0 +1,59 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main, feature/**]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ lint:
11
+ name: Lint & type check
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+
16
+ - uses: actions/setup-python@v5
17
+ with:
18
+ python-version: "3.13"
19
+
20
+ - name: Install dev dependencies
21
+ run: pip install -e ".[dev]"
22
+
23
+ - name: Ruff lint
24
+ run: ruff check .
25
+
26
+ - name: Ruff format check
27
+ run: ruff format --check .
28
+
29
+ - name: Mypy
30
+ run: mypy open_data_mexico/ --ignore-missing-imports
31
+
32
+ test:
33
+ name: Test (Python ${{ matrix.python-version }})
34
+ runs-on: ubuntu-latest
35
+ needs: lint
36
+ strategy:
37
+ fail-fast: false
38
+ matrix:
39
+ python-version: ["3.11", "3.12", "3.13"]
40
+
41
+ steps:
42
+ - uses: actions/checkout@v4
43
+
44
+ - uses: actions/setup-python@v5
45
+ with:
46
+ python-version: ${{ matrix.python-version }}
47
+
48
+ - name: Install dev dependencies
49
+ run: pip install -e ".[dev]"
50
+
51
+ - name: Run tests with coverage
52
+ run: pytest
53
+
54
+ - name: Upload coverage report
55
+ if: matrix.python-version == '3.13'
56
+ uses: actions/upload-artifact@v4
57
+ with:
58
+ name: coverage-report
59
+ path: .coverage
@@ -0,0 +1,24 @@
1
+ name: Deploy docs
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+
7
+ permissions:
8
+ contents: write
9
+
10
+ jobs:
11
+ deploy:
12
+ runs-on: ubuntu-latest
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+
16
+ - uses: actions/setup-python@v5
17
+ with:
18
+ python-version: "3.13"
19
+
20
+ - name: Install docs dependencies
21
+ run: pip install mkdocs-material
22
+
23
+ - name: Deploy to GitHub Pages
24
+ run: mkdocs gh-deploy --force
@@ -0,0 +1,47 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ permissions:
8
+ id-token: write # Required for Trusted Publisher (OIDC)
9
+
10
+ jobs:
11
+ build:
12
+ name: Build distribution
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+
17
+ - uses: actions/setup-python@v5
18
+ with:
19
+ python-version: "3.13"
20
+
21
+ - name: Install build tools
22
+ run: pip install build
23
+
24
+ - name: Build wheel and sdist
25
+ run: python -m build
26
+
27
+ - name: Upload dist artifacts
28
+ uses: actions/upload-artifact@v4
29
+ with:
30
+ name: dist
31
+ path: dist/
32
+
33
+ publish:
34
+ name: Publish to PyPI
35
+ runs-on: ubuntu-latest
36
+ needs: build
37
+ environment: pypi # Requires a GitHub environment named "pypi"
38
+ steps:
39
+ - name: Download dist artifacts
40
+ uses: actions/download-artifact@v4
41
+ with:
42
+ name: dist
43
+ path: dist/
44
+
45
+ - name: Publish to PyPI
46
+ uses: pypa/gh-action-pypi-publish@release/v1
47
+ # No token needed — uses OIDC Trusted Publisher configured on pypi.org
@@ -0,0 +1,13 @@
1
+ .claude
2
+ .venv
3
+ .env
4
+ __pycache__
5
+ .pytest_cache
6
+ .ruff_cache
7
+ .mypy_cache
8
+ .pytest_cache
9
+ .pytest_cache
10
+ .pytest_cache
11
+ PROJECT.md
12
+ dist/
13
+ .coverage
@@ -0,0 +1,14 @@
1
+ repos:
2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ rev: v0.15.8
4
+ hooks:
5
+ - id: ruff
6
+ args: [--fix]
7
+ - id: ruff-format
8
+
9
+ - repo: https://github.com/pre-commit/mirrors-mypy
10
+ rev: v1.19.1
11
+ hooks:
12
+ - id: mypy
13
+ args: [--ignore-missing-imports]
14
+ additional_dependencies: [pydantic>=2.0.0]
@@ -0,0 +1,63 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ---
9
+
10
+ ## [Unreleased]
11
+
12
+ ## [1.0.0] — 2026-03-27
13
+
14
+ ### Added
15
+ - `request_delay` parameter on `DatosGobMX` for configurable rate limiting between requests.
16
+ - `max_retries` parameter on `DatosGobMX` for automatic retry with exponential backoff on 5xx/429 and network errors.
17
+ - `cache_ttl` parameter on `DatosGobMX` for in-memory TTL caching of responses (default 300 s).
18
+ - `open_data_mexico/_http.py` — `robust_get()` helper used by all scrapers.
19
+ - FastAPI server uses a single shared `DatosGobMX` client via lifespan (connection pool + shared cache).
20
+ - `py.typed` marker for PEP 561 compliance (mypy/pyright support).
21
+ - GitHub Actions CI workflow — lint, type-check and test on Python 3.11, 3.12, 3.13.
22
+ - GitHub Actions publish workflow — build and publish to PyPI on GitHub Release via OIDC Trusted Publisher.
23
+ - `.pre-commit-config.yaml` with ruff and mypy hooks.
24
+ - `pytest-cov` with 80 % minimum coverage enforced.
25
+ - MkDocs site configuration.
26
+ - `client.search(query)` — full-text dataset search via CKAN `package_search` JSON API; supports `category`, `limit`, `offset` parameters; returns `SearchResponse`.
27
+ - `client.get_organizations()` — lists all 184+ publishing institutions via CKAN `organization_list`; returns `list[Organization]`.
28
+ - `client.get_organization(slug)` — fetches a single organization by slug via CKAN `organization_show`; returns `None` on 404.
29
+ - `Organization` and `OrganizationsResponse` Pydantic models.
30
+ - `SearchResponse` Pydantic model.
31
+ - `GET /organizations` and `GET /organizations/{slug}` FastAPI endpoints.
32
+ - `GET /search?q=...` FastAPI endpoint with optional `category`, `limit`, `offset` query params.
33
+ - `open_data_mexico/_utils.py` — `parse_spanish_date()` and `parse_iso_dt()` helpers for robust datetime parsing.
34
+
35
+ ### Changed
36
+ - `_get_total_pages()` now scans all `<li>` elements in `ul.pagination` instead of only `<a>` tags, making pagination detection more robust against disabled/active page items rendered as `<span>`.
37
+ - `Dataset.last_updated`, `DatasetDetail.created`, and `DatasetDetail.last_updated` changed from `str | None` to `datetime | None`; values are now timezone-aware UTC datetimes.
38
+ - `pyproject.toml`: corrected project URLs to the real GitHub repository.
39
+ - `pyproject.toml`: `license` field now points to the `LICENSE` file.
40
+ - `pyproject.toml`: added Python 3.13 classifier.
41
+ - README updated to document new client parameters and corrected test count.
42
+
43
+ ### Fixed
44
+ - Unused imports removed across `client.py`, `server/app.py`, and test files (ruff F401).
45
+ - `Optional[str]` annotations modernised to `str | None` (ruff UP045).
46
+
47
+ ---
48
+
49
+ ## [0.1.0] — 2026-03-23
50
+
51
+ ### Added
52
+ - Initial implementation of `DatosGobMX` async client.
53
+ - Scraping of categories (`/group/`), dataset listings (`/group/{slug}`), and dataset detail pages (`/dataset/{slug}`).
54
+ - Pydantic models: `Category`, `Dataset`, `DatasetDetail`, `Resource`, `CategoriesResponse`, `DatasetsResponse`.
55
+ - Auto-pagination for categories and dataset listings.
56
+ - `get_resource_data()` for in-memory CSV download with UTF-8 / latin-1 fallback.
57
+ - Optional FastAPI REST server (`pip install open-data-mexico[server]`).
58
+ - 44 tests with mock HTML fixtures (pytest-asyncio + pytest-httpx).
59
+ - Documentation for all 28 available categories.
60
+
61
+ [Unreleased]: https://github.com/lehcimhdz/open-data-mexico-api/compare/v1.0.0...HEAD
62
+ [1.0.0]: https://github.com/lehcimhdz/open-data-mexico-api/releases/tag/v1.0.0
63
+ [0.1.0]: https://github.com/lehcimhdz/open-data-mexico-api/releases/tag/v0.1.0
@@ -0,0 +1,115 @@
1
+ # Contributing to open-data-mexico
2
+
3
+ Thank you for your interest in contributing! This document covers everything you need to get started.
4
+
5
+ ---
6
+
7
+ ## Setup
8
+
9
+ **Requirements:** Python 3.11+ and git.
10
+
11
+ ```bash
12
+ git clone https://github.com/lehcimhdz/open-data-mexico-api.git
13
+ cd open-data-mexico-api
14
+ pip install -e ".[dev]"
15
+ ```
16
+
17
+ Install pre-commit hooks so linting runs automatically before every commit:
18
+
19
+ ```bash
20
+ pip install pre-commit
21
+ pre-commit install
22
+ ```
23
+
24
+ ---
25
+
26
+ ## Running tests
27
+
28
+ ```bash
29
+ # Full suite with coverage (must stay above 80 %)
30
+ pytest
31
+
32
+ # Skip coverage for a faster feedback loop
33
+ pytest --no-cov -v
34
+
35
+ # Single file
36
+ pytest tests/test_datasets.py -v
37
+ ```
38
+
39
+ ---
40
+
41
+ ## Code quality
42
+
43
+ All three checks must pass before opening a PR — they also run in CI:
44
+
45
+ ```bash
46
+ ruff check . # lint
47
+ ruff format --check . # formatting
48
+ mypy open_data_mexico/ --ignore-missing-imports # type checking
49
+ ```
50
+
51
+ Auto-fix lint and formatting issues:
52
+
53
+ ```bash
54
+ ruff check . --fix
55
+ ruff format .
56
+ ```
57
+
58
+ ---
59
+
60
+ ## Project structure
61
+
62
+ ```
63
+ open_data_mexico/ # installable library (published to PyPI)
64
+ ├── client.py # DatosGobMX — public entry point
65
+ ├── models.py # Pydantic models
66
+ ├── _config.py # BASE_URL, headers, defaults (private)
67
+ ├── _http.py # robust_get() with retry/backoff (private)
68
+ └── _scrapers/ # HTML scraping internals (private)
69
+ ├── categories.py
70
+ ├── datasets.py
71
+ └── dataset_detail.py
72
+ server/ # optional FastAPI REST server
73
+ tests/ # pytest suite with mock HTML fixtures
74
+ docs/ # MkDocs source
75
+ ```
76
+
77
+ ---
78
+
79
+ ## Making changes
80
+
81
+ ### Scraper changes
82
+
83
+ The scrapers parse HTML from `datos.gob.mx`. If the site changes its structure, update the CSS selectors and add/update the corresponding HTML fixture in `tests/conftest.py` to reflect the new markup.
84
+
85
+ ### Adding a new public method
86
+
87
+ 1. Implement it in `client.py`.
88
+ 2. Export it from `open_data_mexico/__init__.py` and add it to `__all__`.
89
+ 3. Add a Pydantic model in `models.py` if the method returns a new type.
90
+ 4. Write tests in `tests/`.
91
+ 5. Document it in `README.md` (Client Reference section).
92
+
93
+ ### Updating the CHANGELOG
94
+
95
+ Add a bullet under `## [Unreleased]` in `CHANGELOG.md` following the [Keep a Changelog](https://keepachangelog.com) convention (`Added`, `Changed`, `Fixed`, `Removed`).
96
+
97
+ ---
98
+
99
+ ## Pull request checklist
100
+
101
+ - [ ] `pytest` passes with ≥ 80 % coverage
102
+ - [ ] `ruff check .` reports no errors
103
+ - [ ] `ruff format --check .` reports no changes needed
104
+ - [ ] `mypy open_data_mexico/` reports no errors
105
+ - [ ] `CHANGELOG.md` updated under `[Unreleased]`
106
+ - [ ] New public API documented in `README.md`
107
+
108
+ ---
109
+
110
+ ## Reporting bugs
111
+
112
+ Open an issue at [github.com/lehcimhdz/open-data-mexico-api/issues](https://github.com/lehcimhdz/open-data-mexico-api/issues) with:
113
+ - Python version and OS
114
+ - Minimal code that reproduces the problem
115
+ - Full traceback if applicable
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 lehcimhdz
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.