open-data-mexico 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- open_data_mexico-1.0.0/.github/workflows/ci.yml +59 -0
- open_data_mexico-1.0.0/.github/workflows/docs.yml +24 -0
- open_data_mexico-1.0.0/.github/workflows/publish.yml +47 -0
- open_data_mexico-1.0.0/.gitignore +13 -0
- open_data_mexico-1.0.0/.pre-commit-config.yaml +14 -0
- open_data_mexico-1.0.0/CHANGELOG.md +63 -0
- open_data_mexico-1.0.0/CONTRIBUTING.md +115 -0
- open_data_mexico-1.0.0/LICENSE +21 -0
- open_data_mexico-1.0.0/PKG-INFO +577 -0
- open_data_mexico-1.0.0/README.md +514 -0
- open_data_mexico-1.0.0/SECURITY.md +34 -0
- open_data_mexico-1.0.0/docs/.gitkeep +0 -0
- open_data_mexico-1.0.0/docs/categories/agricultura.md +43 -0
- open_data_mexico-1.0.0/docs/categories/catalogo_datos.md +43 -0
- open_data_mexico-1.0.0/docs/categories/ciencia_tecnologia.md +43 -0
- open_data_mexico-1.0.0/docs/categories/cultura.md +43 -0
- open_data_mexico-1.0.0/docs/categories/deporte.md +43 -0
- open_data_mexico-1.0.0/docs/categories/derechos_humanos.md +43 -0
- open_data_mexico-1.0.0/docs/categories/economia.md +43 -0
- open_data_mexico-1.0.0/docs/categories/educacion.md +43 -0
- open_data_mexico-1.0.0/docs/categories/energia.md +43 -0
- open_data_mexico-1.0.0/docs/categories/gobierno.md +43 -0
- open_data_mexico-1.0.0/docs/categories/infraestructura.md +43 -0
- open_data_mexico-1.0.0/docs/categories/mar_costa.md +43 -0
- open_data_mexico-1.0.0/docs/categories/medio_ambiente.md +43 -0
- open_data_mexico-1.0.0/docs/categories/migracion.md +43 -0
- open_data_mexico-1.0.0/docs/categories/movilidad.md +43 -0
- open_data_mexico-1.0.0/docs/categories/mujeres.md +43 -0
- open_data_mexico-1.0.0/docs/categories/multiculturalidad.md +43 -0
- open_data_mexico-1.0.0/docs/categories/plan_apertura_datos.md +43 -0
- open_data_mexico-1.0.0/docs/categories/poblacion.md +43 -0
- open_data_mexico-1.0.0/docs/categories/presupuesto.md +43 -0
- open_data_mexico-1.0.0/docs/categories/programas_sociales.md +43 -0
- open_data_mexico-1.0.0/docs/categories/salud.md +43 -0
- open_data_mexico-1.0.0/docs/categories/seguridad.md +175 -0
- open_data_mexico-1.0.0/docs/categories/servicios.md +43 -0
- open_data_mexico-1.0.0/docs/categories/telecomunicaciones.md +43 -0
- open_data_mexico-1.0.0/docs/categories/territorio.md +43 -0
- open_data_mexico-1.0.0/docs/categories/trabajo.md +43 -0
- open_data_mexico-1.0.0/docs/categories/turismo.md +43 -0
- open_data_mexico-1.0.0/docs/index.md +228 -0
- open_data_mexico-1.0.0/mkdocs.yml +76 -0
- open_data_mexico-1.0.0/open_data_mexico/__init__.py +73 -0
- open_data_mexico-1.0.0/open_data_mexico/_config.py +19 -0
- open_data_mexico-1.0.0/open_data_mexico/_http.py +101 -0
- open_data_mexico-1.0.0/open_data_mexico/_scrapers/__init__.py +0 -0
- open_data_mexico-1.0.0/open_data_mexico/_scrapers/categories.py +169 -0
- open_data_mexico-1.0.0/open_data_mexico/_scrapers/dataset_detail.py +195 -0
- open_data_mexico-1.0.0/open_data_mexico/_scrapers/datasets.py +183 -0
- open_data_mexico-1.0.0/open_data_mexico/_scrapers/organizations.py +101 -0
- open_data_mexico-1.0.0/open_data_mexico/_scrapers/search.py +102 -0
- open_data_mexico-1.0.0/open_data_mexico/_utils.py +71 -0
- open_data_mexico-1.0.0/open_data_mexico/client.py +427 -0
- open_data_mexico-1.0.0/open_data_mexico/models.py +205 -0
- open_data_mexico-1.0.0/open_data_mexico/py.typed +0 -0
- open_data_mexico-1.0.0/pyproject.toml +94 -0
- open_data_mexico-1.0.0/server/__init__.py +0 -0
- open_data_mexico-1.0.0/server/app.py +109 -0
- open_data_mexico-1.0.0/tests/__init__.py +0 -0
- open_data_mexico-1.0.0/tests/conftest.py +654 -0
- open_data_mexico-1.0.0/tests/test_categories.py +173 -0
- open_data_mexico-1.0.0/tests/test_dataset_detail.py +245 -0
- open_data_mexico-1.0.0/tests/test_datasets.py +154 -0
- open_data_mexico-1.0.0/tests/test_organizations.py +164 -0
- open_data_mexico-1.0.0/tests/test_search.py +196 -0
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main, feature/**]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
lint:
|
|
11
|
+
name: Lint & type check
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
|
|
16
|
+
- uses: actions/setup-python@v5
|
|
17
|
+
with:
|
|
18
|
+
python-version: "3.13"
|
|
19
|
+
|
|
20
|
+
- name: Install dev dependencies
|
|
21
|
+
run: pip install -e ".[dev]"
|
|
22
|
+
|
|
23
|
+
- name: Ruff lint
|
|
24
|
+
run: ruff check .
|
|
25
|
+
|
|
26
|
+
- name: Ruff format check
|
|
27
|
+
run: ruff format --check .
|
|
28
|
+
|
|
29
|
+
- name: Mypy
|
|
30
|
+
run: mypy open_data_mexico/ --ignore-missing-imports
|
|
31
|
+
|
|
32
|
+
test:
|
|
33
|
+
name: Test (Python ${{ matrix.python-version }})
|
|
34
|
+
runs-on: ubuntu-latest
|
|
35
|
+
needs: lint
|
|
36
|
+
strategy:
|
|
37
|
+
fail-fast: false
|
|
38
|
+
matrix:
|
|
39
|
+
python-version: ["3.11", "3.12", "3.13"]
|
|
40
|
+
|
|
41
|
+
steps:
|
|
42
|
+
- uses: actions/checkout@v4
|
|
43
|
+
|
|
44
|
+
- uses: actions/setup-python@v5
|
|
45
|
+
with:
|
|
46
|
+
python-version: ${{ matrix.python-version }}
|
|
47
|
+
|
|
48
|
+
- name: Install dev dependencies
|
|
49
|
+
run: pip install -e ".[dev]"
|
|
50
|
+
|
|
51
|
+
- name: Run tests with coverage
|
|
52
|
+
run: pytest
|
|
53
|
+
|
|
54
|
+
- name: Upload coverage report
|
|
55
|
+
if: matrix.python-version == '3.13'
|
|
56
|
+
uses: actions/upload-artifact@v4
|
|
57
|
+
with:
|
|
58
|
+
name: coverage-report
|
|
59
|
+
path: .coverage
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
name: Deploy docs
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
contents: write
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
deploy:
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
|
|
16
|
+
- uses: actions/setup-python@v5
|
|
17
|
+
with:
|
|
18
|
+
python-version: "3.13"
|
|
19
|
+
|
|
20
|
+
- name: Install docs dependencies
|
|
21
|
+
run: pip install mkdocs-material
|
|
22
|
+
|
|
23
|
+
- name: Deploy to GitHub Pages
|
|
24
|
+
run: mkdocs gh-deploy --force
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
id-token: write # Required for Trusted Publisher (OIDC)
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
build:
|
|
12
|
+
name: Build distribution
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
|
|
17
|
+
- uses: actions/setup-python@v5
|
|
18
|
+
with:
|
|
19
|
+
python-version: "3.13"
|
|
20
|
+
|
|
21
|
+
- name: Install build tools
|
|
22
|
+
run: pip install build
|
|
23
|
+
|
|
24
|
+
- name: Build wheel and sdist
|
|
25
|
+
run: python -m build
|
|
26
|
+
|
|
27
|
+
- name: Upload dist artifacts
|
|
28
|
+
uses: actions/upload-artifact@v4
|
|
29
|
+
with:
|
|
30
|
+
name: dist
|
|
31
|
+
path: dist/
|
|
32
|
+
|
|
33
|
+
publish:
|
|
34
|
+
name: Publish to PyPI
|
|
35
|
+
runs-on: ubuntu-latest
|
|
36
|
+
needs: build
|
|
37
|
+
environment: pypi # Requires a GitHub environment named "pypi"
|
|
38
|
+
steps:
|
|
39
|
+
- name: Download dist artifacts
|
|
40
|
+
uses: actions/download-artifact@v4
|
|
41
|
+
with:
|
|
42
|
+
name: dist
|
|
43
|
+
path: dist/
|
|
44
|
+
|
|
45
|
+
- name: Publish to PyPI
|
|
46
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
47
|
+
# No token needed — uses OIDC Trusted Publisher configured on pypi.org
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
3
|
+
rev: v0.15.8
|
|
4
|
+
hooks:
|
|
5
|
+
- id: ruff
|
|
6
|
+
args: [--fix]
|
|
7
|
+
- id: ruff-format
|
|
8
|
+
|
|
9
|
+
- repo: https://github.com/pre-commit/mirrors-mypy
|
|
10
|
+
rev: v1.19.1
|
|
11
|
+
hooks:
|
|
12
|
+
- id: mypy
|
|
13
|
+
args: [--ignore-missing-imports]
|
|
14
|
+
additional_dependencies: [pydantic>=2.0.0]
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
## [Unreleased]
|
|
11
|
+
|
|
12
|
+
## [1.0.0] — 2026-03-27
|
|
13
|
+
|
|
14
|
+
### Added
|
|
15
|
+
- `request_delay` parameter on `DatosGobMX` for configurable rate limiting between requests.
|
|
16
|
+
- `max_retries` parameter on `DatosGobMX` for automatic retry with exponential backoff on 5xx/429 and network errors.
|
|
17
|
+
- `cache_ttl` parameter on `DatosGobMX` for in-memory TTL caching of responses (default 300 s).
|
|
18
|
+
- `open_data_mexico/_http.py` — `robust_get()` helper used by all scrapers.
|
|
19
|
+
- FastAPI server uses a single shared `DatosGobMX` client via lifespan (connection pool + shared cache).
|
|
20
|
+
- `py.typed` marker for PEP 561 compliance (mypy/pyright support).
|
|
21
|
+
- GitHub Actions CI workflow — lint, type-check and test on Python 3.11, 3.12, 3.13.
|
|
22
|
+
- GitHub Actions publish workflow — build and publish to PyPI on GitHub Release via OIDC Trusted Publisher.
|
|
23
|
+
- `.pre-commit-config.yaml` with ruff and mypy hooks.
|
|
24
|
+
- `pytest-cov` with 80 % minimum coverage enforced.
|
|
25
|
+
- MkDocs site configuration.
|
|
26
|
+
- `client.search(query)` — full-text dataset search via CKAN `package_search` JSON API; supports `category`, `limit`, `offset` parameters; returns `SearchResponse`.
|
|
27
|
+
- `client.get_organizations()` — lists all 184+ publishing institutions via CKAN `organization_list`; returns `list[Organization]`.
|
|
28
|
+
- `client.get_organization(slug)` — fetches a single organization by slug via CKAN `organization_show`; returns `None` on 404.
|
|
29
|
+
- `Organization` and `OrganizationsResponse` Pydantic models.
|
|
30
|
+
- `SearchResponse` Pydantic model.
|
|
31
|
+
- `GET /organizations` and `GET /organizations/{slug}` FastAPI endpoints.
|
|
32
|
+
- `GET /search?q=...` FastAPI endpoint with optional `category`, `limit`, `offset` query params.
|
|
33
|
+
- `open_data_mexico/_utils.py` — `parse_spanish_date()` and `parse_iso_dt()` helpers for robust datetime parsing.
|
|
34
|
+
|
|
35
|
+
### Changed
|
|
36
|
+
- `_get_total_pages()` now scans all `<li>` elements in `ul.pagination` instead of only `<a>` tags, making pagination detection more robust against disabled/active page items rendered as `<span>`.
|
|
37
|
+
- `Dataset.last_updated`, `DatasetDetail.created`, and `DatasetDetail.last_updated` changed from `str | None` to `datetime | None`; values are now timezone-aware UTC datetimes.
|
|
38
|
+
- `pyproject.toml`: corrected project URLs to the real GitHub repository.
|
|
39
|
+
- `pyproject.toml`: `license` field now points to the `LICENSE` file.
|
|
40
|
+
- `pyproject.toml`: added Python 3.13 classifier.
|
|
41
|
+
- README updated to document new client parameters and corrected test count.
|
|
42
|
+
|
|
43
|
+
### Fixed
|
|
44
|
+
- Unused imports removed across `client.py`, `server/app.py`, and test files (ruff F401).
|
|
45
|
+
- `Optional[str]` annotations modernised to `str | None` (ruff UP045).
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## [0.1.0] — 2026-03-23
|
|
50
|
+
|
|
51
|
+
### Added
|
|
52
|
+
- Initial implementation of `DatosGobMX` async client.
|
|
53
|
+
- Scraping of categories (`/group/`), dataset listings (`/group/{slug}`), and dataset detail pages (`/dataset/{slug}`).
|
|
54
|
+
- Pydantic models: `Category`, `Dataset`, `DatasetDetail`, `Resource`, `CategoriesResponse`, `DatasetsResponse`.
|
|
55
|
+
- Auto-pagination for categories and dataset listings.
|
|
56
|
+
- `get_resource_data()` for in-memory CSV download with UTF-8 / latin-1 fallback.
|
|
57
|
+
- Optional FastAPI REST server (`pip install open-data-mexico[server]`).
|
|
58
|
+
- 44 tests with mock HTML fixtures (pytest-asyncio + pytest-httpx).
|
|
59
|
+
- Documentation for all 28 available categories.
|
|
60
|
+
|
|
61
|
+
[Unreleased]: https://github.com/lehcimhdz/open-data-mexico-api/compare/v1.0.0...HEAD
|
|
62
|
+
[1.0.0]: https://github.com/lehcimhdz/open-data-mexico-api/releases/tag/v1.0.0
|
|
63
|
+
[0.1.0]: https://github.com/lehcimhdz/open-data-mexico-api/releases/tag/v0.1.0
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# Contributing to open-data-mexico
|
|
2
|
+
|
|
3
|
+
Thank you for your interest in contributing! This document covers everything you need to get started.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Setup
|
|
8
|
+
|
|
9
|
+
**Requirements:** Python 3.11+ and git.
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
git clone https://github.com/lehcimhdz/open-data-mexico-api.git
|
|
13
|
+
cd open-data-mexico-api
|
|
14
|
+
pip install -e ".[dev]"
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
Install pre-commit hooks so linting runs automatically before every commit:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install pre-commit
|
|
21
|
+
pre-commit install
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Running tests
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
# Full suite with coverage (must stay above 80 %)
|
|
30
|
+
pytest
|
|
31
|
+
|
|
32
|
+
# Skip coverage for a faster feedback loop
|
|
33
|
+
pytest --no-cov -v
|
|
34
|
+
|
|
35
|
+
# Single file
|
|
36
|
+
pytest tests/test_datasets.py -v
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## Code quality
|
|
42
|
+
|
|
43
|
+
All three checks must pass before opening a PR — they also run in CI:
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
ruff check . # lint
|
|
47
|
+
ruff format --check . # formatting
|
|
48
|
+
mypy open_data_mexico/ --ignore-missing-imports # type checking
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Auto-fix lint and formatting issues:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
ruff check . --fix
|
|
55
|
+
ruff format .
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## Project structure
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
open_data_mexico/ # installable library (published to PyPI)
|
|
64
|
+
├── client.py # DatosGobMX — public entry point
|
|
65
|
+
├── models.py # Pydantic models
|
|
66
|
+
├── _config.py # BASE_URL, headers, defaults (private)
|
|
67
|
+
├── _http.py # robust_get() with retry/backoff (private)
|
|
68
|
+
└── _scrapers/ # HTML scraping internals (private)
|
|
69
|
+
├── categories.py
|
|
70
|
+
├── datasets.py
|
|
71
|
+
└── dataset_detail.py
|
|
72
|
+
server/ # optional FastAPI REST server
|
|
73
|
+
tests/ # pytest suite with mock HTML fixtures
|
|
74
|
+
docs/ # MkDocs source
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
## Making changes
|
|
80
|
+
|
|
81
|
+
### Scraper changes
|
|
82
|
+
|
|
83
|
+
The scrapers parse HTML from `datos.gob.mx`. If the site changes its structure, update the CSS selectors and add/update the corresponding HTML fixture in `tests/conftest.py` to reflect the new markup.
|
|
84
|
+
|
|
85
|
+
### Adding a new public method
|
|
86
|
+
|
|
87
|
+
1. Implement it in `client.py`.
|
|
88
|
+
2. Export it from `open_data_mexico/__init__.py` and add it to `__all__`.
|
|
89
|
+
3. Add a Pydantic model in `models.py` if the method returns a new type.
|
|
90
|
+
4. Write tests in `tests/`.
|
|
91
|
+
5. Document it in `README.md` (Client Reference section).
|
|
92
|
+
|
|
93
|
+
### Updating the CHANGELOG
|
|
94
|
+
|
|
95
|
+
Add a bullet under `## [Unreleased]` in `CHANGELOG.md` following the [Keep a Changelog](https://keepachangelog.com) convention (`Added`, `Changed`, `Fixed`, `Removed`).
|
|
96
|
+
|
|
97
|
+
---
|
|
98
|
+
|
|
99
|
+
## Pull request checklist
|
|
100
|
+
|
|
101
|
+
- [ ] `pytest` passes with ≥ 80 % coverage
|
|
102
|
+
- [ ] `ruff check .` reports no errors
|
|
103
|
+
- [ ] `ruff format --check .` reports no changes needed
|
|
104
|
+
- [ ] `mypy open_data_mexico/` reports no errors
|
|
105
|
+
- [ ] `CHANGELOG.md` updated under `[Unreleased]`
|
|
106
|
+
- [ ] New public API documented in `README.md`
|
|
107
|
+
|
|
108
|
+
---
|
|
109
|
+
|
|
110
|
+
## Reporting bugs
|
|
111
|
+
|
|
112
|
+
Open an issue at [github.com/lehcimhdz/open-data-mexico-api/issues](https://github.com/lehcimhdz/open-data-mexico-api/issues) with:
|
|
113
|
+
- Python version and OS
|
|
114
|
+
- Minimal code that reproduces the problem
|
|
115
|
+
- Full traceback if applicable
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 lehcimhdz
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|