openquery 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. openquery-0.1.0/.github/ISSUE_TEMPLATE/bug_report.md +32 -0
  2. openquery-0.1.0/.github/ISSUE_TEMPLATE/feature_request.md +25 -0
  3. openquery-0.1.0/.github/PULL_REQUEST_TEMPLATE.md +17 -0
  4. openquery-0.1.0/.github/workflows/ci.yml +56 -0
  5. openquery-0.1.0/.github/workflows/publish.yml +43 -0
  6. openquery-0.1.0/.gitignore +13 -0
  7. openquery-0.1.0/.pre-commit-config.yaml +17 -0
  8. openquery-0.1.0/.python-version +1 -0
  9. openquery-0.1.0/CHANGELOG.md +31 -0
  10. openquery-0.1.0/CONTRIBUTING.md +81 -0
  11. openquery-0.1.0/Dockerfile +26 -0
  12. openquery-0.1.0/LICENSE +21 -0
  13. openquery-0.1.0/PKG-INFO +285 -0
  14. openquery-0.1.0/README.md +238 -0
  15. openquery-0.1.0/SECURITY.md +37 -0
  16. openquery-0.1.0/docker-compose.yml +24 -0
  17. openquery-0.1.0/pyproject.toml +70 -0
  18. openquery-0.1.0/src/openquery/__init__.py +3 -0
  19. openquery-0.1.0/src/openquery/__main__.py +5 -0
  20. openquery-0.1.0/src/openquery/app.py +44 -0
  21. openquery-0.1.0/src/openquery/commands/__init__.py +1 -0
  22. openquery-0.1.0/src/openquery/commands/query.py +82 -0
  23. openquery-0.1.0/src/openquery/commands/serve.py +36 -0
  24. openquery-0.1.0/src/openquery/commands/sources.py +30 -0
  25. openquery-0.1.0/src/openquery/config.py +50 -0
  26. openquery-0.1.0/src/openquery/core/__init__.py +1 -0
  27. openquery-0.1.0/src/openquery/core/browser.py +120 -0
  28. openquery-0.1.0/src/openquery/core/cache.py +158 -0
  29. openquery-0.1.0/src/openquery/core/captcha.py +130 -0
  30. openquery-0.1.0/src/openquery/core/rate_limit.py +79 -0
  31. openquery-0.1.0/src/openquery/core/retry.py +54 -0
  32. openquery-0.1.0/src/openquery/exceptions.py +38 -0
  33. openquery-0.1.0/src/openquery/models/__init__.py +1 -0
  34. openquery-0.1.0/src/openquery/models/co/__init__.py +1 -0
  35. openquery-0.1.0/src/openquery/models/co/runt.py +108 -0
  36. openquery-0.1.0/src/openquery/models/co/simit.py +24 -0
  37. openquery-0.1.0/src/openquery/models/common.py +22 -0
  38. openquery-0.1.0/src/openquery/server/__init__.py +1 -0
  39. openquery-0.1.0/src/openquery/server/app.py +31 -0
  40. openquery-0.1.0/src/openquery/server/auth.py +33 -0
  41. openquery-0.1.0/src/openquery/server/deps.py +28 -0
  42. openquery-0.1.0/src/openquery/server/routes/__init__.py +1 -0
  43. openquery-0.1.0/src/openquery/server/routes/health.py +21 -0
  44. openquery-0.1.0/src/openquery/server/routes/query.py +108 -0
  45. openquery-0.1.0/src/openquery/server/routes/sources.py +29 -0
  46. openquery-0.1.0/src/openquery/sources/__init__.py +51 -0
  47. openquery-0.1.0/src/openquery/sources/base.py +73 -0
  48. openquery-0.1.0/src/openquery/sources/co/__init__.py +1 -0
  49. openquery-0.1.0/src/openquery/sources/co/runt.py +298 -0
  50. openquery-0.1.0/src/openquery/sources/co/simit.py +188 -0
  51. openquery-0.1.0/src/openquery/sources/us/__init__.py +1 -0
  52. openquery-0.1.0/tests/__init__.py +0 -0
  53. openquery-0.1.0/tests/conftest.py +29 -0
  54. openquery-0.1.0/tests/test_api.py +47 -0
  55. openquery-0.1.0/tests/test_cache.py +40 -0
  56. openquery-0.1.0/tests/test_captcha.py +40 -0
  57. openquery-0.1.0/tests/test_runt.py +137 -0
  58. openquery-0.1.0/tests/test_simit.py +177 -0
  59. openquery-0.1.0/uv.lock +1091 -0
@@ -0,0 +1,32 @@
1
+ ---
2
+ name: Bug Report
3
+ about: Report a bug or unexpected behavior
4
+ title: ""
5
+ labels: bug
6
+ assignees: ""
7
+ ---
8
+
9
+ ## Description
10
+
11
+ A clear description of the bug.
12
+
13
+ ## Steps to Reproduce
14
+
15
+ 1. Run `openquery ...`
16
+ 2. ...
17
+ 3. See error
18
+
19
+ ## Expected Behavior
20
+
21
+ What you expected to happen.
22
+
23
+ ## Actual Behavior
24
+
25
+ What actually happened. Include error messages or output.
26
+
27
+ ## Environment
28
+
29
+ - OpenQuery version: (`openquery --version`)
30
+ - Python version:
31
+ - OS:
32
+ - Source queried (if applicable):
@@ -0,0 +1,25 @@
1
+ ---
2
+ name: Feature Request
3
+ about: Suggest a new feature or data source
4
+ title: ""
5
+ labels: enhancement
6
+ assignees: ""
7
+ ---
8
+
9
+ ## Description
10
+
11
+ What feature or data source would you like?
12
+
13
+ ## Use Case
14
+
15
+ Why is this useful? What problem does it solve?
16
+
17
+ ## Proposed Solution
18
+
19
+ How should it work? (CLI usage, API response, etc.)
20
+
21
+ ## Additional Context
22
+
23
+ - Source URL (if requesting a new data source):
24
+ - Country/region:
25
+ - Does it require CAPTCHA or authentication?
@@ -0,0 +1,17 @@
1
+ ## Summary
2
+
3
+ Brief description of changes.
4
+
5
+ ## Changes
6
+
7
+ - ...
8
+
9
+ ## Testing
10
+
11
+ - [ ] Tests pass (`uv run pytest`)
12
+ - [ ] Linting passes (`uv run ruff check src/ tests/`)
13
+ - [ ] New source includes both unit and integration tests (if applicable)
14
+
15
+ ## Related Issues
16
+
17
+ Closes #
@@ -0,0 +1,56 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ permissions:
10
+ contents: read
11
+
12
+ jobs:
13
+ test:
14
+ runs-on: ${{ matrix.os }}
15
+ strategy:
16
+ fail-fast: false
17
+ matrix:
18
+ os: [ubuntu-latest, macos-latest]
19
+ python-version: ["3.12", "3.13"]
20
+
21
+ steps:
22
+ - uses: actions/checkout@v4
23
+
24
+ - name: Install uv
25
+ uses: astral-sh/setup-uv@v5
26
+
27
+ - name: Set up Python ${{ matrix.python-version }}
28
+ run: uv python install ${{ matrix.python-version }}
29
+
30
+ - name: Install dependencies
31
+ run: uv sync --all-extras
32
+
33
+ - name: Install system dependencies (Ubuntu)
34
+ if: runner.os == 'Linux'
35
+ run: sudo apt-get update && sudo apt-get install -y tesseract-ocr
36
+
37
+ - name: Install system dependencies (macOS)
38
+ if: runner.os == 'macOS'
39
+ run: brew install tesseract
40
+
41
+ - name: Install Playwright browsers
42
+ run: uv run playwright install --with-deps chromium
43
+
44
+ - name: Lint
45
+ run: uv run ruff check src/ tests/
46
+
47
+ - name: Test
48
+ run: uv run pytest --tb=short -q
49
+
50
+ docker:
51
+ runs-on: ubuntu-latest
52
+ steps:
53
+ - uses: actions/checkout@v4
54
+
55
+ - name: Build Docker image
56
+ run: docker build -t openquery:test .
@@ -0,0 +1,43 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ permissions:
8
+ contents: read
9
+ id-token: write
10
+
11
+ jobs:
12
+ build:
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+
17
+ - name: Install uv
18
+ uses: astral-sh/setup-uv@v5
19
+
20
+ - name: Build package
21
+ run: uv build
22
+
23
+ - name: Upload artifacts
24
+ uses: actions/upload-artifact@v4
25
+ with:
26
+ name: dist
27
+ path: dist/
28
+
29
+ publish:
30
+ needs: build
31
+ runs-on: ubuntu-latest
32
+ environment:
33
+ name: pypi
34
+ url: https://pypi.org/project/openquery/
35
+ steps:
36
+ - name: Download artifacts
37
+ uses: actions/download-artifact@v4
38
+ with:
39
+ name: dist
40
+ path: dist/
41
+
42
+ - name: Publish to PyPI
43
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,13 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .eggs/
8
+ *.egg
9
+ .venv/
10
+ .env
11
+ .pytest_cache/
12
+ .ruff_cache/
13
+ *.db
@@ -0,0 +1,17 @@
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v5.0.0
4
+ hooks:
5
+ - id: trailing-whitespace
6
+ - id: end-of-file-fixer
7
+ - id: check-yaml
8
+ - id: check-added-large-files
9
+ args: ["--maxkb=500"]
10
+ - id: check-merge-conflict
11
+
12
+ - repo: https://github.com/astral-sh/ruff-pre-commit
13
+ rev: v0.9.10
14
+ hooks:
15
+ - id: ruff
16
+ args: [--fix]
17
+ - id: ruff-format
@@ -0,0 +1 @@
1
+ 3.12
@@ -0,0 +1,31 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ## [0.1.0] - 2026-03-31
11
+
12
+ ### Added
13
+
14
+ - Core framework with `BaseSource` plugin architecture
15
+ - `BrowserManager` for Playwright-based scraping with WAF bypass
16
+ - CAPTCHA solving: `OCRSolver` (pytesseract), `TwoCaptchaSolver`, `ChainedSolver`
17
+ - Cache backends: in-memory (cachetools), Redis, SQLite
18
+ - Per-source token-bucket rate limiter
19
+ - Retry with exponential backoff
20
+ - **co.simit** — Colombian traffic fines (SIMIT) via Playwright DOM scraping
21
+ - **co.runt** — Colombian vehicle registry (RUNT) with CAPTCHA and Imperva WAF bypass
22
+ - FastAPI REST API with `/api/v1/query`, `/api/v1/sources`, `/api/v1/health`
23
+ - API key authentication middleware
24
+ - Typer CLI: `openquery query`, `openquery sources`, `openquery serve`
25
+ - Pydantic models for all response types
26
+ - Configuration via environment variables (`OPENQUERY_*`)
27
+ - Docker and docker-compose support with Redis
28
+ - 29 unit tests
29
+
30
+ [Unreleased]: https://github.com/dacrypt/openquery/compare/v0.1.0...HEAD
31
+ [0.1.0]: https://github.com/dacrypt/openquery/releases/tag/v0.1.0
@@ -0,0 +1,81 @@
1
+ # Contributing to OpenQuery
2
+
3
+ Thanks for your interest in contributing! This document provides guidelines to make the process smooth.
4
+
5
+ ## Development Setup
6
+
7
+ ```bash
8
+ git clone https://github.com/dacrypt/openquery.git
9
+ cd openquery
10
+ uv sync --all-extras
11
+ playwright install chromium
12
+ ```
13
+
14
+ ## Running Tests
15
+
16
+ ```bash
17
+ # Unit tests
18
+ uv run pytest
19
+
20
+ # With coverage
21
+ uv run pytest --cov=openquery
22
+
23
+ # Integration tests (hits real external services)
24
+ uv run pytest -m integration
25
+ ```
26
+
27
+ ## Code Quality
28
+
29
+ ```bash
30
+ # Lint
31
+ uv run ruff check src/ tests/
32
+
33
+ # Auto-fix
34
+ uv run ruff check --fix src/ tests/
35
+ ```
36
+
37
+ ## Adding a New Data Source
38
+
39
+ This is the most common type of contribution. To add a new source:
40
+
41
+ 1. **Create the model** in `src/openquery/models/<country>/` with a Pydantic `BaseModel`
42
+ 2. **Create the source** in `src/openquery/sources/<country>/` implementing `BaseSource`
43
+ 3. **Register it** with the `@register` decorator
44
+ 4. **Add tests** in `tests/test_<source>.py`
45
+ 5. **Update the README** source table
46
+
47
+ See [README.md](README.md#adding-a-new-source) for a complete example.
48
+
49
+ ### Source Guidelines
50
+
51
+ - Use `BrowserManager` for browser automation — don't manage Playwright directly
52
+ - Use `CaptchaSolver` for CAPTCHA handling — don't implement solving inline
53
+ - Include a `SourceMeta` with accurate `rate_limit_rpm` to be respectful to servers
54
+ - Return typed Pydantic models, not raw dicts
55
+ - Add both unit tests (mocked) and integration tests (marked with `@pytest.mark.integration`)
56
+
57
+ ## Pull Requests
58
+
59
+ 1. Fork the repo and create a branch from `main`
60
+ 2. Make your changes
61
+ 3. Ensure tests pass: `uv run pytest`
62
+ 4. Ensure linting passes: `uv run ruff check src/ tests/`
63
+ 5. Write a clear PR description explaining what and why
64
+
65
+ ## Reporting Bugs
66
+
67
+ Open an issue with:
68
+
69
+ - Steps to reproduce
70
+ - Expected vs actual behavior
71
+ - OpenQuery version (`openquery --version`)
72
+ - Python version and OS
73
+
74
+ ## Suggesting Sources
75
+
76
+ If you know of a useful public data source, open an issue with:
77
+
78
+ - Source name and URL
79
+ - What data it provides
80
+ - Whether it requires CAPTCHA or authentication
81
+ - Country/region it covers
@@ -0,0 +1,26 @@
1
+ FROM python:3.12-slim
2
+
3
+ # System dependencies for tesseract OCR and Playwright
4
+ RUN apt-get update && apt-get install -y --no-install-recommends \
5
+ tesseract-ocr \
6
+ libtesseract-dev \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ # Install uv
10
+ COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
11
+
12
+ WORKDIR /app
13
+
14
+ # Copy project files
15
+ COPY pyproject.toml .
16
+ COPY src/ src/
17
+
18
+ # Install dependencies
19
+ RUN uv pip install --system ".[serve]"
20
+
21
+ # Install Playwright browsers
22
+ RUN playwright install --with-deps chromium
23
+
24
+ EXPOSE 8000
25
+
26
+ CMD ["openquery", "serve", "--host", "0.0.0.0", "--port", "8000"]
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 dacrypt
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,285 @@
1
+ Metadata-Version: 2.4
2
+ Name: openquery
3
+ Version: 0.1.0
4
+ Summary: Query public data sources worldwide via scraping and APIs
5
+ Project-URL: Homepage, https://github.com/dacrypt/openquery
6
+ Project-URL: Repository, https://github.com/dacrypt/openquery
7
+ Project-URL: Issues, https://github.com/dacrypt/openquery/issues
8
+ Project-URL: Changelog, https://github.com/dacrypt/openquery/blob/main/CHANGELOG.md
9
+ Author-email: dacrypt <dev@dacrypt.dev>
10
+ License: MIT
11
+ License-File: LICENSE
12
+ Keywords: api,captcha,government,playwright,public-data,scraping,web-scraping
13
+ Classifier: Development Status :: 3 - Alpha
14
+ Classifier: Environment :: Console
15
+ Classifier: Framework :: FastAPI
16
+ Classifier: Intended Audience :: Developers
17
+ Classifier: License :: OSI Approved :: MIT License
18
+ Classifier: Operating System :: OS Independent
19
+ Classifier: Programming Language :: Python :: 3
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
23
+ Classifier: Typing :: Typed
24
+ Requires-Python: >=3.12
25
+ Requires-Dist: cachetools>=5.5
26
+ Requires-Dist: httpx>=0.28
27
+ Requires-Dist: pillow>=11.0
28
+ Requires-Dist: playwright>=1.49
29
+ Requires-Dist: pydantic-settings>=2.7
30
+ Requires-Dist: pydantic>=2.10
31
+ Requires-Dist: pytesseract>=0.3.13
32
+ Requires-Dist: rich>=13.9
33
+ Requires-Dist: typer>=0.15
34
+ Provides-Extra: captcha
35
+ Requires-Dist: 2captcha-python>=1.5; extra == 'captcha'
36
+ Provides-Extra: dev
37
+ Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
38
+ Requires-Dist: pytest-httpx>=0.35; extra == 'dev'
39
+ Requires-Dist: pytest>=8; extra == 'dev'
40
+ Requires-Dist: ruff>=0.9; extra == 'dev'
41
+ Provides-Extra: redis
42
+ Requires-Dist: redis[hiredis]>=5.2; extra == 'redis'
43
+ Provides-Extra: serve
44
+ Requires-Dist: fastapi>=0.115; extra == 'serve'
45
+ Requires-Dist: uvicorn[standard]>=0.34; extra == 'serve'
46
+ Description-Content-Type: text/markdown
47
+
48
+ # OpenQuery
49
+
50
+ [![CI](https://github.com/dacrypt/openquery/actions/workflows/ci.yml/badge.svg)](https://github.com/dacrypt/openquery/actions/workflows/ci.yml)
51
+ [![PyPI](https://img.shields.io/pypi/v/openquery)](https://pypi.org/project/openquery/)
52
+ [![Python](https://img.shields.io/pypi/pyversions/openquery)](https://pypi.org/project/openquery/)
53
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
54
+
55
+ Query public data sources worldwide through a unified CLI and REST API.
56
+
57
+ OpenQuery provides a plugin-based framework for scraping government websites, public registries, and open data APIs. It handles the hard parts — browser automation, CAPTCHA solving, WAF bypass, caching, and rate limiting — so you can focus on the data.
58
+
59
+ ## Features
60
+
61
+ - **Unified interface** — one CLI and one API endpoint for all data sources
62
+ - **Browser automation** — Playwright-based scraping for JavaScript-heavy sites
63
+ - **CAPTCHA solving** — local OCR (pytesseract) with optional paid service fallback
64
+ - **WAF bypass** — browser-context API calls preserve session cookies
65
+ - **Caching** — in-memory, Redis, or SQLite backends with configurable TTL
66
+ - **Rate limiting** — per-source token-bucket to respect server limits
67
+ - **REST API** — FastAPI server with auto-generated OpenAPI docs
68
+ - **Extensible** — add new data sources by implementing a single class
69
+ - **Country-organized** — sources grouped by country code (`co`, `us`, etc.)
70
+
71
+ ## Built-in Sources
72
+
73
+ | Source | Country | Description | Inputs | CAPTCHA |
74
+ |--------|---------|-------------|--------|---------|
75
+ | `co.simit` | CO | Traffic fines and violations | cedula, placa | No |
76
+ | `co.runt` | CO | National vehicle registry (SOAT, RTM, ownership) | vin, placa | Yes (OCR) |
77
+
78
+ ## Installation
79
+
80
+ ```bash
81
+ pip install openquery
82
+ ```
83
+
84
+ Or with [uv](https://docs.astral.sh/uv/):
85
+
86
+ ```bash
87
+ uv add openquery
88
+ ```
89
+
90
+ ### System Dependencies
91
+
92
+ OpenQuery requires [Tesseract OCR](https://github.com/tesseract-ocr/tesseract) for CAPTCHA solving and Playwright browsers for web scraping:
93
+
94
+ ```bash
95
+ # macOS
96
+ brew install tesseract
97
+ playwright install chromium
98
+
99
+ # Ubuntu/Debian
100
+ sudo apt-get install tesseract-ocr
101
+ playwright install --with-deps chromium
102
+ ```
103
+
104
+ ### Optional Extras
105
+
106
+ ```bash
107
+ pip install "openquery[serve]" # FastAPI server (fastapi, uvicorn)
108
+ pip install "openquery[redis]" # Redis cache backend
109
+ pip install "openquery[captcha]" # 2captcha paid CAPTCHA solving
110
+ ```
111
+
112
+ ## Quick Start
113
+
114
+ ### CLI
115
+
116
+ ```bash
117
+ # List available data sources
118
+ openquery sources
119
+
120
+ # Query Colombian traffic fines by cedula
121
+ openquery query co.simit --cedula 12345678
122
+
123
+ # Query Colombian vehicle registry by plate
124
+ openquery query co.runt --placa ABC123
125
+
126
+ # Query by VIN
127
+ openquery query co.runt --vin 5YJ3E1EA1PF000001
128
+
129
+ # Output raw JSON
130
+ openquery query co.simit --cedula 12345678 --json
131
+ ```
132
+
133
+ ### REST API
134
+
135
+ ```bash
136
+ # Start the API server
137
+ openquery serve
138
+
139
+ # Or with custom host/port
140
+ openquery serve --host 127.0.0.1 --port 3000
141
+ ```
142
+
143
+ Then query via HTTP:
144
+
145
+ ```bash
146
+ curl -X POST http://localhost:8000/api/v1/query \
147
+ -H "Content-Type: application/json" \
148
+ -d '{
149
+ "source": "co.simit",
150
+ "document_type": "cedula",
151
+ "document_number": "12345678"
152
+ }'
153
+ ```
154
+
155
+ **Response:**
156
+
157
+ ```json
158
+ {
159
+ "ok": true,
160
+ "source": "co.simit",
161
+ "queried_at": "2026-03-31T10:30:00Z",
162
+ "cached": false,
163
+ "latency_ms": 4523,
164
+ "data": {
165
+ "comparendos": 0,
166
+ "multas": 0,
167
+ "total_deuda": 0.0,
168
+ "paz_y_salvo": true
169
+ }
170
+ }
171
+ ```
172
+
173
+ **API Endpoints:**
174
+
175
+ | Method | Path | Description |
176
+ |--------|------|-------------|
177
+ | `POST` | `/api/v1/query` | Query a data source |
178
+ | `GET` | `/api/v1/sources` | List available sources |
179
+ | `GET` | `/api/v1/health` | Health check and cache stats |
180
+ | `GET` | `/docs` | Interactive API documentation |
181
+
182
+ ### Docker
183
+
184
+ ```bash
185
+ docker compose up
186
+ ```
187
+
188
+ This starts the API server with Redis caching on port 8000.
189
+
190
+ ## Configuration
191
+
192
+ All settings use environment variables with the `OPENQUERY_` prefix:
193
+
194
+ | Variable | Default | Description |
195
+ |----------|---------|-------------|
196
+ | `OPENQUERY_API_KEY` | _(none)_ | API key for server authentication |
197
+ | `OPENQUERY_CACHE_BACKEND` | `memory` | Cache backend: `memory`, `redis`, `sqlite` |
198
+ | `OPENQUERY_CACHE_TTL_DEFAULT` | `3600` | Default cache TTL in seconds |
199
+ | `OPENQUERY_REDIS_URL` | `redis://localhost:6379/0` | Redis connection URL |
200
+ | `OPENQUERY_BROWSER_HEADLESS` | `true` | Run browser in headless mode |
201
+ | `OPENQUERY_BROWSER_TIMEOUT` | `30.0` | Browser operation timeout in seconds |
202
+ | `OPENQUERY_RATE_LIMIT_DEFAULT_RPM` | `10` | Default requests per minute per source |
203
+ | `OPENQUERY_CAPTCHA_SOLVER` | `ocr` | CAPTCHA solver: `ocr`, `2captcha`, `chained` |
204
+ | `OPENQUERY_TWO_CAPTCHA_API_KEY` | _(none)_ | 2captcha.com API key |
205
+ | `OPENQUERY_LOG_LEVEL` | `INFO` | Logging level |
206
+
207
+ ## Adding a New Source
208
+
209
+ Create a new source by implementing the `BaseSource` class:
210
+
211
+ ```python
212
+ # src/openquery/sources/us/nhtsa.py
213
+ from pydantic import BaseModel
214
+ from openquery.sources import register
215
+ from openquery.sources.base import BaseSource, DocumentType, QueryInput, SourceMeta
216
+
217
+
218
+ class NhtsaResult(BaseModel):
219
+ manufacturer: str = ""
220
+ model: str = ""
221
+ year: int = 0
222
+ recalls: list[dict] = []
223
+
224
+
225
+ @register
226
+ class NhtsaSource(BaseSource):
227
+ def meta(self) -> SourceMeta:
228
+ return SourceMeta(
229
+ name="us.nhtsa",
230
+ display_name="NHTSA Vehicle Safety",
231
+ description="US vehicle safety recalls and VIN decoding",
232
+ country="US",
233
+ url="https://vpic.nhtsa.dot.gov/api/",
234
+ supported_inputs=[DocumentType.VIN],
235
+ requires_captcha=False,
236
+ requires_browser=False,
237
+ rate_limit_rpm=30,
238
+ )
239
+
240
+ def query(self, input: QueryInput) -> NhtsaResult:
241
+ import httpx
242
+ resp = httpx.get(
243
+ f"https://vpic.nhtsa.dot.gov/api/vehicles/decodevin/{input.document_number}",
244
+ params={"format": "json"},
245
+ )
246
+ data = resp.json()
247
+ # Parse and return NhtsaResult...
248
+ ```
249
+
250
+ The `@register` decorator automatically makes the source available in the CLI, API, and source listing.
251
+
252
+ ## Architecture
253
+
254
+ ```
255
+ openquery/
256
+ ├── core/ # Infrastructure (browser, captcha, cache, rate limiting)
257
+ ├── sources/ # Data source plugins, organized by country
258
+ │ ├── base.py # BaseSource ABC — implement this to add sources
259
+ │ ├── co/ # Colombia (SIMIT, RUNT)
260
+ │ └── us/ # United States (future)
261
+ ├── models/ # Pydantic response models, organized by country
262
+ ├── server/ # FastAPI REST API
263
+ └── commands/ # Typer CLI commands
264
+ ```
265
+
266
+ ## Development
267
+
268
+ ```bash
269
+ git clone https://github.com/dacrypt/openquery.git
270
+ cd openquery
271
+ uv sync --all-extras
272
+ playwright install chromium
273
+
274
+ # Run tests
275
+ uv run pytest
276
+
277
+ # Lint
278
+ uv run ruff check src/ tests/
279
+ ```
280
+
281
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for detailed guidelines.
282
+
283
+ ## License
284
+
285
+ [MIT](LICENSE)