PyPI - nl-voting-data-scraper - Versions diffs - 0.1.0__tar.gz - Mend

nl-voting-data-scraper 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

nl_voting_data_scraper-0.1.0/.github/workflows/publish.yml ADDED Viewed

@@ -0,0 +1,37 @@
+name: Publish to PyPI
+on:
+  release:
+    types: [published]
+jobs:
+  build:
+    name: Build distribution
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - name: Install build tools
+        run: pip install build
+      - name: Build package
+        run: python -m build
+      - uses: actions/upload-artifact@v4
+        with:
+          name: dist
+          path: dist/
+  publish:
+    name: Publish to PyPI
+    needs: build
+    runs-on: ubuntu-latest
+    environment: pypi
+    permissions:
+      id-token: write
+    steps:
+      - uses: actions/download-artifact@v4
+        with:
+          name: dist
+          path: dist/
+      - uses: pypa/gh-action-pypi-publish@release/v1

nl_voting_data_scraper-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,19 @@
+__pycache__/
+*.py[cod]
+*$py.class
+*.egg-info/
+dist/
+build/
+.eggs/
+*.egg
+.venv/
+venv/
+env/
+.env
+.pytest_cache/
+.ruff_cache/
+.mypy_cache/
+output/
+.cache/
+*.log
+.DS_Store

nl_voting_data_scraper-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Rehan Fazal
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

nl_voting_data_scraper-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,178 @@
+Metadata-Version: 2.4
+Name: nl-voting-data-scraper
+Version: 0.1.0
+Summary: Scrape Dutch voting advice (StemWijzer) data for any election
+Author: Rehan Fazal
+License: MIT
+License-File: LICENSE
+Keywords: dutch,elections,scraper,stemwijzer,votematch
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
+Requires-Python: >=3.11
+Requires-Dist: click>=8.3
+Requires-Dist: httpx>=0.28
+Requires-Dist: pycryptodome>=3.23
+Requires-Dist: pydantic>=2.12
+Requires-Dist: rich>=14.0
+Requires-Dist: tenacity>=9.1
+Provides-Extra: browser
+Requires-Dist: playwright>=1.58; extra == 'browser'
+Provides-Extra: dev
+Requires-Dist: pytest-asyncio>=1.3; extra == 'dev'
+Requires-Dist: pytest-httpx>=0.36; extra == 'dev'
+Requires-Dist: pytest>=9.0; extra == 'dev'
+Requires-Dist: ruff>=0.15; extra == 'dev'
+Description-Content-Type: text/markdown
+# nl-voting-data-scraper
+Scrape Dutch voting advice ([StemWijzer](https://stemwijzer.nl)) data for any election — municipal, national, European, or provincial.
+Outputs structured JSON with party positions, policy statements, and metadata. Reusable across election cycles.
+## Installation
+```bash
+pip install nl-voting-data-scraper
+```
+For browser automation fallback (optional):
+```bash
+pip install "nl-voting-data-scraper[browser]"
+playwright install chromium
+```
+## Quick Start
+### CLI
+```bash
+# List known elections
+nl-voting-data-scraper list-elections
+# Scrape all municipalities for 2026 municipal elections
+nl-voting-data-scraper scrape gr2026 -o ./output
+# Scrape a specific municipality
+nl-voting-data-scraper scrape gr2026 -m GM0014 -o ./output
+# Scrape national election
+nl-voting-data-scraper scrape tk2025 -o ./output
+# List municipalities for an election
+nl-voting-data-scraper list-municipalities gr2026
+# Discover API endpoints
+nl-voting-data-scraper discover gr2026
+```
+### Python Library
+```python
+import asyncio
+from nl_voting_data_scraper import StemwijzerScraper
+async def main():
+    async with StemwijzerScraper("gr2026") as scraper:
+        # Scrape a single municipality
+        data = await scraper.scrape_one("GM0014")
+        print(f"{data.votematch.name}: {len(data.parties)} parties, {len(data.statements)} statements")
+        # Scrape all
+        results = await scraper.scrape()
+        print(f"Scraped {len(results)} entries")
+asyncio.run(main())
+```
+## Supported Elections
+| Slug | Type | Year | Description |
+|------|------|------|-------------|
+| `gr2026` | Municipal | 2026 | Gemeenteraadsverkiezingen 2026 |
+| `tk2025` | National | 2025 | Tweede Kamerverkiezingen 2025 |
+| `tk2023` | National | 2023 | Tweede Kamerverkiezingen 2023 |
+| `eu2024` | European | 2024 | Europees Parlement 2024 |
+| `ps2023` | Provincial | 2023 | Provinciale Staten 2023 |
+New elections are auto-detected from URL patterns. You can also pass custom election slugs.
+## How It Works
+**Hybrid approach:**
+1. **API-first (fast):** Tries to fetch data from StemWijzer data endpoints via HTTP. Handles base64-encoded responses and optional AES decryption.
+2. **Browser fallback:** If the API fails, uses Playwright to load the frontend, intercept network requests, and capture the data. Falls back to DOM extraction as a last resort.
+## Output Format
+Each municipality/election produces a JSON file:
+```json
+{
+  "parties": [
+    {
+      "id": 206919,
+      "name": "Party Name",
+      "fullName": "Full Party Name",
+      "website": "https://...",
+      "hasSeats": true,
+      "statements": [
+        { "id": 206987, "position": "agree", "explanation": "..." }
+      ]
+    }
+  ],
+  "statements": [
+    {
+      "id": 206987,
+      "theme": "Housing",
+      "title": "The municipality should build more affordable housing.",
+      "index": 1
+    }
+  ],
+  "shootoutStatements": [...],
+  "votematch": {
+    "id": 206918,
+    "name": "Municipality Name",
+    "context": "2026GR",
+    "remote_id": "GM0014",
+    "langcode": "nl"
+  }
+}
+```
+## CLI Options
+```
+nl-voting-data-scraper scrape ELECTION [OPTIONS]
+Options:
+  -m, --municipality TEXT   Specific GM codes (repeatable)
+  -l, --language TEXT       Languages to scrape (default: nl)
+  -o, --output TEXT         Output directory (default: ./output)
+  --combined                Also write combined.json
+  --rate-limit FLOAT        Requests per second (default: 2.0)
+  --no-cache                Disable caching
+  --resume                  Resume interrupted scrape
+  --browser-only            Only use browser scraping
+  --api-only                Only use API scraping
+  -v, --verbose             Verbose output
+```
+## Development
+```bash
+git clone https://github.com/rhnfzl/nl-voting-data-scraper.git
+cd nl-voting-data-scraper
+pip install -e ".[dev,browser]"
+playwright install chromium
+pytest
+```
+## License
+MIT

nl_voting_data_scraper-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,149 @@
+# nl-voting-data-scraper
+Scrape Dutch voting advice ([StemWijzer](https://stemwijzer.nl)) data for any election — municipal, national, European, or provincial.
+Outputs structured JSON with party positions, policy statements, and metadata. Reusable across election cycles.
+## Installation
+```bash
+pip install nl-voting-data-scraper
+```
+For browser automation fallback (optional):
+```bash
+pip install "nl-voting-data-scraper[browser]"
+playwright install chromium
+```
+## Quick Start
+### CLI
+```bash
+# List known elections
+nl-voting-data-scraper list-elections
+# Scrape all municipalities for 2026 municipal elections
+nl-voting-data-scraper scrape gr2026 -o ./output
+# Scrape a specific municipality
+nl-voting-data-scraper scrape gr2026 -m GM0014 -o ./output
+# Scrape national election
+nl-voting-data-scraper scrape tk2025 -o ./output
+# List municipalities for an election
+nl-voting-data-scraper list-municipalities gr2026
+# Discover API endpoints
+nl-voting-data-scraper discover gr2026
+```
+### Python Library
+```python
+import asyncio
+from nl_voting_data_scraper import StemwijzerScraper
+async def main():
+    async with StemwijzerScraper("gr2026") as scraper:
+        # Scrape a single municipality
+        data = await scraper.scrape_one("GM0014")
+        print(f"{data.votematch.name}: {len(data.parties)} parties, {len(data.statements)} statements")
+        # Scrape all
+        results = await scraper.scrape()
+        print(f"Scraped {len(results)} entries")
+asyncio.run(main())
+```
+## Supported Elections
+| Slug | Type | Year | Description |
+|------|------|------|-------------|
+| `gr2026` | Municipal | 2026 | Gemeenteraadsverkiezingen 2026 |
+| `tk2025` | National | 2025 | Tweede Kamerverkiezingen 2025 |
+| `tk2023` | National | 2023 | Tweede Kamerverkiezingen 2023 |
+| `eu2024` | European | 2024 | Europees Parlement 2024 |
+| `ps2023` | Provincial | 2023 | Provinciale Staten 2023 |
+New elections are auto-detected from URL patterns. You can also pass custom election slugs.
+## How It Works
+**Hybrid approach:**
+1. **API-first (fast):** Tries to fetch data from StemWijzer data endpoints via HTTP. Handles base64-encoded responses and optional AES decryption.
+2. **Browser fallback:** If the API fails, uses Playwright to load the frontend, intercept network requests, and capture the data. Falls back to DOM extraction as a last resort.
+## Output Format
+Each municipality/election produces a JSON file:
+```json
+{
+  "parties": [
+    {
+      "id": 206919,
+      "name": "Party Name",
+      "fullName": "Full Party Name",
+      "website": "https://...",
+      "hasSeats": true,
+      "statements": [
+        { "id": 206987, "position": "agree", "explanation": "..." }
+      ]
+    }
+  ],
+  "statements": [
+    {
+      "id": 206987,
+      "theme": "Housing",
+      "title": "The municipality should build more affordable housing.",
+      "index": 1
+    }
+  ],
+  "shootoutStatements": [...],
+  "votematch": {
+    "id": 206918,
+    "name": "Municipality Name",
+    "context": "2026GR",
+    "remote_id": "GM0014",
+    "langcode": "nl"
+  }
+}
+```
+## CLI Options
+```
+nl-voting-data-scraper scrape ELECTION [OPTIONS]
+Options:
+  -m, --municipality TEXT   Specific GM codes (repeatable)
+  -l, --language TEXT       Languages to scrape (default: nl)
+  -o, --output TEXT         Output directory (default: ./output)
+  --combined                Also write combined.json
+  --rate-limit FLOAT        Requests per second (default: 2.0)
+  --no-cache                Disable caching
+  --resume                  Resume interrupted scrape
+  --browser-only            Only use browser scraping
+  --api-only                Only use API scraping
+  -v, --verbose             Verbose output
+```
+## Development
+```bash
+git clone https://github.com/rhnfzl/nl-voting-data-scraper.git
+cd nl-voting-data-scraper
+pip install -e ".[dev,browser]"
+playwright install chromium
+pytest
+```
+## License
+MIT

nl_voting_data_scraper-0.1.0/examples/scrape_all_municipal.py ADDED Viewed

@@ -0,0 +1,29 @@
+"""Example: Scrape all municipalities for the 2026 municipal elections."""
+import asyncio
+from pathlib import Path
+from nl_voting_data_scraper import StemwijzerScraper
+from nl_voting_data_scraper.output import write_all
+async def main():
+    async with StemwijzerScraper("gr2026", rate_limit=2.0) as scraper:
+        # Fetch index first to see how many municipalities
+        index = await scraper.fetch_index()
+        print(f"Found {len(index)} entries")
+        # Scrape all
+        results = await scraper.scrape()
+        print(f"Scraped {len(results)} entries")
+        # Write to output directory
+        output_dir = Path("output") / "gr2026"
+        paths = write_all(results, output_dir, write_combined=True)
+        print(f"\nWritten to {output_dir}/")
+        for name, path in paths.items():
+            print(f"  {name}: {path}")
+if __name__ == "__main__":
+    asyncio.run(main())

nl_voting_data_scraper-0.1.0/examples/scrape_national.py ADDED Viewed

@@ -0,0 +1,27 @@
+"""Example: Scrape national (Tweede Kamer) election data."""
+import asyncio
+import json
+from nl_voting_data_scraper import StemwijzerScraper
+async def main():
+    async with StemwijzerScraper("tk2025") as scraper:
+        results = await scraper.scrape()
+        for data in results:
+            print(f"Election: {data.votematch.name}")
+            print(f"Context: {data.votematch.context}")
+            print(f"Parties: {len(data.parties)}")
+            print(f"Statements: {len(data.statements)}")
+            # Save to file
+            filename = f"{data.votematch.context}.json"
+            with open(filename, "w") as f:
+                json.dump(data.model_dump(by_alias=True), f, ensure_ascii=False, indent=2)
+            print(f"Saved to {filename}")
+if __name__ == "__main__":
+    asyncio.run(main())

nl_voting_data_scraper-0.1.0/examples/scrape_single.py ADDED Viewed

@@ -0,0 +1,28 @@
+"""Example: Scrape a single municipality."""
+import asyncio
+from nl_voting_data_scraper import StemwijzerScraper
+async def main():
+    async with StemwijzerScraper("gr2026") as scraper:
+        # Scrape Groningen
+        data = await scraper.scrape_one("GM0014")
+        print(f"Municipality: {data.votematch.name}")
+        print(f"Parties: {len(data.parties)}")
+        print(f"Statements: {len(data.statements)}")
+        print(f"Shootout statements: {len(data.shootoutStatements)}")
+        # Print first party and first statement
+        party = data.parties[0]
+        print(f"\nFirst party: {party.name}")
+        print(f"  Positions: {len(party.statements)}")
+        stmt = data.statements[0]
+        print(f"\nFirst statement: {stmt.title}")
+        print(f"  Theme: {stmt.theme}")
+if __name__ == "__main__":
+    asyncio.run(main())

nl_voting_data_scraper-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,51 @@
+[project]
+name = "nl-voting-data-scraper"
+version = "0.1.0"
+description = "Scrape Dutch voting advice (StemWijzer) data for any election"
+readme = "README.md"
+license = { text = "MIT" }
+requires-python = ">=3.11"
+authors = [{ name = "Rehan Fazal" }]
+keywords = ["stemwijzer", "dutch", "elections", "scraper", "votematch"]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Topic :: Internet :: WWW/HTTP :: Indexing/Search",
+]
+dependencies = [
+    "httpx>=0.28",
+    "pydantic>=2.12",
+    "click>=8.3",
+    "rich>=14.0",
+    "pycryptodome>=3.23",
+    "tenacity>=9.1",
+]
+[project.optional-dependencies]
+browser = ["playwright>=1.58"]
+dev = [
+    "pytest>=9.0",
+    "pytest-asyncio>=1.3",
+    "pytest-httpx>=0.36",
+    "ruff>=0.15",
+]
+[project.scripts]
+nl-voting-data-scraper = "nl_voting_data_scraper.cli:cli"
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[tool.hatch.build.targets.wheel]
+packages = ["src/nl_voting_data_scraper"]
+[tool.ruff]
+target-version = "py311"
+line-length = 100
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
+testpaths = ["tests"]

nl_voting_data_scraper-0.1.0/src/nl_voting_data_scraper/__init__.py ADDED Viewed

@@ -0,0 +1,23 @@
+"""nl-voting-data-scraper: Scrape Dutch voting advice (StemWijzer) data for any election."""
+__version__ = "0.1.0"
+from nl_voting_data_scraper.models import (
+    ElectionData,
+    ElectionIndexEntry,
+    Party,
+    PartyPosition,
+    Statement,
+    VotematchMeta,
+)
+from nl_voting_data_scraper.scraper import StemwijzerScraper
+__all__ = [
+    "StemwijzerScraper",
+    "ElectionData",
+    "ElectionIndexEntry",
+    "Party",
+    "PartyPosition",
+    "Statement",
+    "VotematchMeta",
+]