bnetza-bk6-scraper 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. bnetza_bk6_scraper-0.0.1/.gitattributes +1 -0
  2. bnetza_bk6_scraper-0.0.1/.github/dependabot.yml +18 -0
  3. bnetza_bk6_scraper-0.0.1/.github/workflows/codeql-analysis.yml +70 -0
  4. bnetza_bk6_scraper-0.0.1/.github/workflows/coverage.yml +22 -0
  5. bnetza_bk6_scraper-0.0.1/.github/workflows/dependabot_automerge.yml +18 -0
  6. bnetza_bk6_scraper-0.0.1/.github/workflows/dev_test.yml +28 -0
  7. bnetza_bk6_scraper-0.0.1/.github/workflows/formatting.yml +25 -0
  8. bnetza_bk6_scraper-0.0.1/.github/workflows/no_byte_order_mark.yml +15 -0
  9. bnetza_bk6_scraper-0.0.1/.github/workflows/packaging_test.yml +22 -0
  10. bnetza_bk6_scraper-0.0.1/.github/workflows/python-publish.yml +65 -0
  11. bnetza_bk6_scraper-0.0.1/.github/workflows/pythonlint.yml +26 -0
  12. bnetza_bk6_scraper-0.0.1/.github/workflows/unittests.yml +26 -0
  13. bnetza_bk6_scraper-0.0.1/.gitignore +140 -0
  14. bnetza_bk6_scraper-0.0.1/.pre-commit-config.yaml +24 -0
  15. bnetza_bk6_scraper-0.0.1/PKG-INFO +160 -0
  16. bnetza_bk6_scraper-0.0.1/README.md +112 -0
  17. bnetza_bk6_scraper-0.0.1/docs/superpowers/plans/2026-07-03-bnetza-bk6-scraper.md +1197 -0
  18. bnetza_bk6_scraper-0.0.1/docs/superpowers/specs/2026-07-03-bnetza-bk6-scraper-design.md +189 -0
  19. bnetza_bk6_scraper-0.0.1/domain-specific-terms.txt +2 -0
  20. bnetza_bk6_scraper-0.0.1/pyproject.toml +117 -0
  21. bnetza_bk6_scraper-0.0.1/requirements.txt +67 -0
  22. bnetza_bk6_scraper-0.0.1/src/bnetza_bk6_scraper/__init__.py +6 -0
  23. bnetza_bk6_scraper-0.0.1/src/bnetza_bk6_scraper/_version.py +1 -0
  24. bnetza_bk6_scraper-0.0.1/src/bnetza_bk6_scraper/cli.py +36 -0
  25. bnetza_bk6_scraper-0.0.1/src/bnetza_bk6_scraper/discovery.py +19 -0
  26. bnetza_bk6_scraper-0.0.1/src/bnetza_bk6_scraper/fetch.py +83 -0
  27. bnetza_bk6_scraper-0.0.1/src/bnetza_bk6_scraper/models.py +69 -0
  28. bnetza_bk6_scraper-0.0.1/src/bnetza_bk6_scraper/normalize.py +22 -0
  29. bnetza_bk6_scraper-0.0.1/src/bnetza_bk6_scraper/parse.py +153 -0
  30. bnetza_bk6_scraper-0.0.1/src/bnetza_bk6_scraper/py.typed +2 -0
  31. bnetza_bk6_scraper-0.0.1/src/bnetza_bk6_scraper/scraper.py +142 -0
  32. bnetza_bk6_scraper-0.0.1/tox.ini +95 -0
@@ -0,0 +1 @@
1
+ unittests/fixtures/** -text
@@ -0,0 +1,18 @@
1
+ # To get started with Dependabot version updates, you'll need to specify which
2
+ # package ecosystems to update and where the package manifests are located.
3
+ # Please see the documentation for all configuration options:
4
+ # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
5
+
6
+ version: 2
7
+ updates:
8
+ - package-ecosystem: "pip" # See documentation for possible values
9
+ directory: "/" # Location of package manifests
10
+ schedule:
11
+ interval: "weekly"
12
+ reviewers:
13
+ - "@Hochfrequenz/python-developers-review-team"
14
+ # Maintain dependencies for GitHub Actions
15
+ - package-ecosystem: "github-actions"
16
+ directory: "/"
17
+ schedule:
18
+ interval: "weekly"
@@ -0,0 +1,70 @@
1
+ # For most projects, this workflow file will not need changing; you simply need
2
+ # to commit it to your repository.
3
+ #
4
+ # You may wish to alter this file to override the set of languages analyzed,
5
+ # or to provide custom queries or build logic.
6
+ #
7
+ # ******** NOTE ********
8
+ # We have attempted to detect the languages in your repository. Please check
9
+ # the `language` matrix defined below to confirm you have the correct set of
10
+ # supported CodeQL languages.
11
+ #
12
+ name: "CodeQL"
13
+
14
+ on:
15
+ push:
16
+ branches: [main]
17
+ pull_request:
18
+ # The branches below must be a subset of the branches above
19
+ branches: [main]
20
+ schedule:
21
+ - cron: "29 14 * * 6"
22
+
23
+ jobs:
24
+ analyze:
25
+ name: Analyze
26
+ runs-on: ubuntu-latest
27
+ permissions:
28
+ actions: read
29
+ contents: read
30
+ security-events: write
31
+
32
+ strategy:
33
+ fail-fast: false
34
+ matrix:
35
+ language: ["python"]
36
+ # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
37
+ # Learn more about CodeQL language support at https://git.io/codeql-language-support
38
+
39
+ steps:
40
+ - name: Checkout repository
41
+ uses: actions/checkout@v7
42
+
43
+ # Initializes the CodeQL tools for scanning.
44
+ - name: Initialize CodeQL
45
+ uses: github/codeql-action/init@v4
46
+ with:
47
+ languages: ${{ matrix.language }}
48
+ # If you wish to specify custom queries, you can do so here or in a config file.
49
+ # By default, queries listed here will override any specified in a config file.
50
+ # Prefix the list here with "+" to use these queries and those in the config file.
51
+ # queries: ./path/to/local/query, your-org/your-repo/queries@main
52
+
53
+ # Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
54
+ # If this step fails, then you should remove it and run the build manually (see below)
55
+ - name: Autobuild
56
+ uses: github/codeql-action/autobuild@v4
57
+
58
+ # â„šī¸ Command-line programs to run using the OS shell.
59
+ # 📚 https://git.io/JvXDl
60
+
61
+ # âœī¸ If the Autobuild fails above, remove it and uncomment the following three lines
62
+ # and modify them (or add more) to build your code if your project
63
+ # uses a compiled language
64
+
65
+ #- run: |
66
+ # make bootstrap
67
+ # make release
68
+
69
+ - name: Perform CodeQL Analysis
70
+ uses: github/codeql-action/analyze@v4
@@ -0,0 +1,22 @@
1
+ name: "Coverage"
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request: {}
7
+ jobs:
8
+ coverage:
9
+ runs-on: ubuntu-latest
10
+ steps:
11
+ - uses: actions/checkout@v7
12
+ - name: Set up Python
13
+ uses: actions/setup-python@v6
14
+ with:
15
+ python-version: 3.14
16
+ - name: Install dependencies
17
+ run: |
18
+ python -m pip install --upgrade pip
19
+ pip install tox
20
+ - name: Run Tests and Record Coverage
21
+ run: |
22
+ tox -e coverage
@@ -0,0 +1,18 @@
1
+ name: Dependabot auto-approve / -merge
2
+ on: pull_request
3
+
4
+ jobs:
5
+ dependabot:
6
+ permissions:
7
+ contents: write
8
+ pull-requests: write
9
+ runs-on: ubuntu-latest
10
+ env:
11
+ PR_URL: ${{github.event.pull_request.html_url}}
12
+ GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
13
+ if: ${{ github.actor == 'dependabot[bot]' }}
14
+ steps:
15
+ - name: Approve a PR
16
+ run: gh pr review --approve "$PR_URL"
17
+ - name: Enable auto-merge for Dependabot PRs
18
+ run: gh pr merge --auto --squash "$PR_URL"
@@ -0,0 +1,28 @@
1
+ name: "Test Dev Environment"
2
+ # Checks that the dev environment (tox -e dev) can be set up.
3
+ # This might not work, if different linting/testing envs refer to different versions of the same lib (e.g. typing-extensions).
4
+ # Different versions of the same package might work for isolated specific envs (only linting, only testing...) but the dev environment inherits from all of them.
5
+ on:
6
+ push:
7
+ branches: [main]
8
+ pull_request: {}
9
+ jobs:
10
+ check:
11
+ runs-on: ${{ matrix.os }}
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.11", "3.12", "3.13", "3.14"]
15
+ os: [ubuntu-latest]
16
+ steps:
17
+ - uses: actions/checkout@v7
18
+ - name: Set up Python ${{ matrix.python-version }}
19
+ uses: actions/setup-python@v6
20
+ with:
21
+ python-version: ${{ matrix.python-version }}
22
+ - name: Install Dependencies
23
+ run: |
24
+ python -m pip install --upgrade pip
25
+ pip install tox
26
+ - name: Create a Dev Environment
27
+ run: |
28
+ tox -e dev
@@ -0,0 +1,25 @@
1
+ name: "Formatting"
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request: {}
7
+ jobs:
8
+ format:
9
+ runs-on: ubuntu-latest
10
+ strategy:
11
+ matrix:
12
+ tool: ["black", "isort"]
13
+ steps:
14
+ - uses: actions/checkout@v7
15
+ - name: Set up Python
16
+ uses: actions/setup-python@v6
17
+ with:
18
+ python-version: 3.14
19
+ - name: Install dependencies
20
+ run: |
21
+ python -m pip install --upgrade pip
22
+ pip install .[formatting]
23
+ - name: ${{ matrix.tool }} Code Formatter
24
+ run: |
25
+ ${{ matrix.tool }} . --check
@@ -0,0 +1,15 @@
1
+ name: Prevent ByteOrderMarks
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request: {}
8
+
9
+ jobs:
10
+ bom-check:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v7
14
+ - uses: arma-actions/bom-check@v1
15
+ name: Check for BOM
@@ -0,0 +1,22 @@
1
+ name: "Packaging Test"
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request: {}
7
+ jobs:
8
+ check_packaging:
9
+ runs-on: ubuntu-latest
10
+ steps:
11
+ - uses: actions/checkout@v7
12
+ - name: Set up Python
13
+ uses: actions/setup-python@v6
14
+ with:
15
+ python-version: 3.14
16
+ - name: Install dependencies
17
+ run: |
18
+ python -m pip install --upgrade pip
19
+ pip install tox
20
+ - name: Run Packaging Test
21
+ run: |
22
+ tox -e test_packaging
@@ -0,0 +1,65 @@
1
+ # This GitHub workflow is only needed for python package releases which are supposed to be published on pypi.
2
+ # It requires the Github "environments" feature (see instructions below) it might not be available for private free accounts (but works for public or organization repos).
3
+ # After creating the "release" environment in the Github repo settings, you need to enter your Github organization/user name + repo name + "python-publish.yml" workflow file name in the PyPI UI to make this work.
4
+
5
+ # This workflow uploads a Python Package using Twine when a release is created.
6
+ # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
7
+
8
+ name: Upload Python Package
9
+
10
+ on:
11
+ release:
12
+ types: [created, edited]
13
+
14
+ jobs:
15
+ tests:
16
+ if: startsWith(github.ref, 'refs/tags/v')
17
+ runs-on: ${{ matrix.os }}
18
+ strategy:
19
+ matrix:
20
+ python-version: ["3.14"]
21
+ os: [ubuntu-latest]
22
+ steps:
23
+ - uses: actions/checkout@v4
24
+ - name: Set up Python ${{ matrix.python-version }}
25
+ uses: actions/setup-python@v5
26
+ with:
27
+ python-version: ${{ matrix.python-version }}
28
+ - name: Install tox
29
+ run: |
30
+ python -m pip install --upgrade pip
31
+ pip install tox
32
+ - name: Run tox
33
+ run: |
34
+ tox
35
+
36
+ build-n-publish:
37
+ name: Build and publish Python 🐍 distributions đŸ“Ļ to PyPI and TestPyPI
38
+ runs-on: ${{ matrix.os }}
39
+ strategy:
40
+ matrix:
41
+ python-version: [ "3.12" ]
42
+ os: [ ubuntu-latest ]
43
+ # Specifying a GitHub environment, # Specifying a GitHub environment, which is strongly recommended by PyPI: https://docs.pypi.org/trusted-publishers/adding-a-publisher/
44
+ # you have to create an environment in your repository settings and add the environment name here
45
+ environment: release
46
+ permissions:
47
+ # IMPORTANT: this permission is mandatory for trusted publishing
48
+ id-token: write
49
+ needs: tests
50
+ steps:
51
+ - uses: actions/checkout@v4
52
+ - name: Set up Python ${{ matrix.python-version }}
53
+ uses: actions/setup-python@v5
54
+ with:
55
+ python-version: ${{ matrix.python-version }}
56
+ - name: Install dependencies
57
+ run: |
58
+ python -m pip install --upgrade pip
59
+ pip install .[packaging]
60
+ - name: Build wheel and source distributions
61
+ run: |
62
+ python -m build
63
+ - name: Publish distribution đŸ“Ļ to PyPI
64
+ if: startsWith(github.ref, 'refs/tags/v')
65
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,26 @@
1
+ name: "Linting"
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request: {}
7
+ jobs:
8
+ pylint:
9
+ name: Python Code Quality and Lint
10
+ runs-on: ubuntu-latest
11
+ strategy:
12
+ matrix:
13
+ linter-env: ["linting", "type_check", "spell_check"]
14
+ steps:
15
+ - uses: actions/checkout@v7
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v6
18
+ with:
19
+ python-version: 3.14
20
+ - name: Install Dependencies
21
+ run: |
22
+ python -m pip install --upgrade pip
23
+ pip install tox
24
+ - name: Run ${{ matrix.linter-env }} via Tox
25
+ run: |
26
+ tox -e ${{ matrix.linter-env }}
@@ -0,0 +1,26 @@
1
+ name: "Unittests"
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request: {}
7
+ jobs:
8
+ pytest:
9
+ runs-on: ${{ matrix.os }}
10
+ strategy:
11
+ matrix:
12
+ python-version: ["3.11", "3.12", "3.13", "3.14"]
13
+ os: [ubuntu-latest]
14
+ steps:
15
+ - uses: actions/checkout@v7
16
+ - name: Set up Python ${{ matrix.python-version }}
17
+ uses: actions/setup-python@v6
18
+ with:
19
+ python-version: ${{ matrix.python-version }}
20
+ - name: Install Dependencies
21
+ run: |
22
+ python -m pip install --upgrade pip
23
+ pip install tox
24
+ - name: Run the Unit Tests via Tox
25
+ run: |
26
+ tox -e tests
@@ -0,0 +1,140 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # pipenv
88
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
90
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
91
+ # install all needed dependencies.
92
+ #Pipfile.lock
93
+
94
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95
+ __pypackages__/
96
+
97
+ # Celery stuff
98
+ celerybeat-schedule
99
+ celerybeat.pid
100
+
101
+ # SageMath parsed files
102
+ *.sage.py
103
+
104
+ # Environments
105
+ .env
106
+ .venv
107
+ env/
108
+ venv/
109
+ ENV/
110
+ env.bak/
111
+ venv.bak/
112
+
113
+ # Spyder project settings
114
+ .spyderproject
115
+ .spyproject
116
+
117
+ # Rope project settings
118
+ .ropeproject
119
+
120
+ # mkdocs documentation
121
+ /site
122
+
123
+ # mypy
124
+ .mypy_cache/
125
+ .dmypy.json
126
+ dmypy.json
127
+
128
+ # Pyre type checker
129
+ .pyre/
130
+
131
+ .idea/
132
+
133
+ # vscode settings
134
+ .vscode/
135
+
136
+ src/bnetza_bk6_scraper/_version.py
137
+
138
+ # local smoke-test output
139
+ _smoke/
140
+ _smoke*/
@@ -0,0 +1,24 @@
1
+ # to update all repo revisions just run: pre-commit autoupdate
2
+ repos:
3
+ - repo: https://github.com/pre-commit/pre-commit-hooks
4
+ rev: v4.4.0
5
+ hooks:
6
+ - id: check-yaml
7
+ - id: end-of-file-fixer
8
+ - id: trailing-whitespace
9
+ - repo: https://github.com/psf/black
10
+ rev: 23.9.1
11
+ hooks:
12
+ - id: black
13
+ language_version: python3
14
+ - repo: https://github.com/pycqa/isort
15
+ rev: 5.12.0
16
+ hooks:
17
+ - id: isort
18
+ name: isort (python)
19
+ - id: isort
20
+ name: isort (cython)
21
+ types: [cython]
22
+ - id: isort
23
+ name: isort (pyi)
24
+ types: [pyi]
@@ -0,0 +1,160 @@
1
+ Metadata-Version: 2.4
2
+ Name: bnetza_bk6_scraper
3
+ Version: 0.0.1
4
+ Summary: Scrapes documents of Bundesnetzagentur Beschlusskammer 6 into a structured, git-diffable mirror
5
+ Project-URL: Changelog, https://github.com/Hochfrequenz/bnetza_bk6_scraper/releases
6
+ Project-URL: Homepage, https://github.com/Hochfrequenz/bnetza_bk6_scraper
7
+ Author-email: Hochfrequenz Unternehmensberatung GmbH <info@hochfrequenz.de>
8
+ License: MIT
9
+ Keywords: beschlusskammer,bnetza,bundesnetzagentur,energy,scraper
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Environment :: Console
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python
16
+ Classifier: Programming Language :: Python :: 3 :: Only
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Programming Language :: Python :: 3.14
21
+ Requires-Python: >=3.11
22
+ Requires-Dist: aiohttp<3.14
23
+ Requires-Dist: beautifulsoup4
24
+ Requires-Dist: lxml
25
+ Requires-Dist: pydantic>=2
26
+ Requires-Dist: typer
27
+ Provides-Extra: coverage
28
+ Requires-Dist: coverage==7.15.0; extra == 'coverage'
29
+ Provides-Extra: dev
30
+ Requires-Dist: pip-tools; extra == 'dev'
31
+ Provides-Extra: formatting
32
+ Requires-Dist: black==26.5.1; extra == 'formatting'
33
+ Requires-Dist: isort==8.0.1; extra == 'formatting'
34
+ Provides-Extra: linting
35
+ Requires-Dist: pylint==4.0.6; extra == 'linting'
36
+ Provides-Extra: packaging
37
+ Requires-Dist: build==1.5.0; extra == 'packaging'
38
+ Requires-Dist: twine==6.2.0; extra == 'packaging'
39
+ Provides-Extra: spell-check
40
+ Requires-Dist: codespell==2.4.2; extra == 'spell-check'
41
+ Provides-Extra: tests
42
+ Requires-Dist: aioresponses==0.7.8; extra == 'tests'
43
+ Requires-Dist: pytest-asyncio==1.3.0; extra == 'tests'
44
+ Requires-Dist: pytest==9.1.1; extra == 'tests'
45
+ Provides-Extra: type-check
46
+ Requires-Dist: mypy==2.1.0; extra == 'type-check'
47
+ Description-Content-Type: text/markdown
48
+
49
+ # bnetza_bk6_scraper
50
+
51
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
52
+ ![Python Versions (officially) supported](https://img.shields.io/pypi/pyversions/bnetza_bk6_scraper.svg)
53
+ ![PyPI Status Badge](https://img.shields.io/pypi/v/bnetza_bk6_scraper)
54
+ ![Unittests status badge](https://github.com/Hochfrequenz/bnetza_bk6_scraper/workflows/Unittests/badge.svg)
55
+ ![Coverage status badge](https://github.com/Hochfrequenz/bnetza_bk6_scraper/workflows/Coverage/badge.svg)
56
+ ![Linting status badge](https://github.com/Hochfrequenz/bnetza_bk6_scraper/workflows/Linting/badge.svg)
57
+ ![Formatting status badge](https://github.com/Hochfrequenz/bnetza_bk6_scraper/workflows/Formatting/badge.svg)
58
+
59
+ `bnetza_bk6_scraper` mirrors the documents published by the German
60
+ Bundesnetzagentur (BNetzA) **Beschlusskammer 6** (BK6) into a structured,
61
+ git-diffable directory tree. BK6 regulates electricity network access and is a
62
+ constant source of consultations, rulings (Festlegungen) and their attachments.
63
+ Because the agency publishes these as loose PDFs on HTML pages with no changelog,
64
+ tracking *what* changed and *when* is painful. This tool discovers every BK6
65
+ proceeding, downloads its PDFs and a normalized HTML snapshot of each phase page,
66
+ and records structured metadata. Committing the output to git turns every
67
+ regulatory update into a reviewable diff.
68
+
69
+ ## Installation
70
+
71
+ ```bash
72
+ pip install bnetza_bk6_scraper
73
+ ```
74
+
75
+ ## Usage
76
+
77
+ The package installs a single console command, `bnetza-bk6-scraper`, with a
78
+ `mirror` subcommand:
79
+
80
+ ```bash
81
+ bnetza-bk6-scraper mirror --target <dir> [--concurrency N] [--year YYYY] [-v]
82
+ ```
83
+
84
+ | Option | Default | Description |
85
+ | --------------- | ------- | -------------------------------------------------- |
86
+ | `--target` | *(required)* | Output directory (the mirror repository root). |
87
+ | `--concurrency` | `4` | Number of parallel HTTP fetches. |
88
+ | `--year` | *(all)* | Restrict the run to a single year, e.g. `2023`. |
89
+ | `-v`, `--verbose` | off | Enable debug logging. |
90
+
91
+ Example — mirror only the 2023 proceedings into `./mirror`:
92
+
93
+ ```bash
94
+ bnetza-bk6-scraper mirror --target ./mirror --year 2023 -v
95
+ ```
96
+
97
+ Each run logs a summary such as
98
+ `run summary: 7 proceedings, 16 documents written, 0 failures`.
99
+
100
+ ## Output layout
101
+
102
+ Proceedings are written under `/{year}/{aktenzeichen}/`, with a top-level
103
+ `index.json` listing every mirrored proceeding:
104
+
105
+ ```text
106
+ <target>/
107
+ ├── index.json # summary of all proceedings
108
+ └── 2023/
109
+ └── BK6-23-241/
110
+ ├── metadata.json # structured proceeding metadata
111
+ ├── BK6-23-241_beschluss.html # normalized HTML snapshot of a phase page
112
+ ├── BK6-23-241_beschluss_vom_07.05.26.pdf
113
+ ├── BK6-23-241_bilarem.pdf
114
+ └── BK6-23-241_anlage_bilarem.pdf
115
+ ```
116
+
117
+ - `metadata.json` captures the Aktenzeichen, year, title, status, `Stand`
118
+ (last-modified date), any submission deadline (Frist), the phase pages, and
119
+ one entry per document (title, type, source URL, filename).
120
+ - The normalized `*.html` files are trimmed, stable snapshots of the source
121
+ phase pages so that content changes surface as small diffs.
122
+ - The PDFs are the proceeding's documents, downloaded verbatim.
123
+
124
+ Change detection is intentionally "dumb": the tool always writes the current
125
+ state, and `git diff` in the mirror repository reveals what changed.
126
+
127
+ ## Mirror repository
128
+
129
+ The scraper is designed to feed a separate mirror repository,
130
+ `Hochfrequenz/bnetza_bk6_mirror`. A scheduled GitHub Action there will
131
+ periodically:
132
+
133
+ ```bash
134
+ pip install bnetza_bk6_scraper
135
+ bnetza-bk6-scraper mirror --target .
136
+ git add -A && git commit -m "update BK6 mirror"
137
+ ```
138
+
139
+ so that regulatory changes at BK6 become visible as reviewable git diffs and
140
+ commit history. That Action is future work and does not live in this repository.
141
+
142
+ ## WAF / browser User-Agent
143
+
144
+ The BNetzA website sits behind a Web Application Firewall that rejects
145
+ non-browser clients by serving a `200 OK` "The requested URL was rejected" page
146
+ instead of the real content. To get through, the scraper sends browser-like
147
+ `User-Agent` and `Accept` headers and treats the rejection page as a retryable
148
+ error. No credentials or API keys are required.
149
+
150
+ ## Contribute
151
+
152
+ This project uses [tox](https://tox.wiki) for all quality gates. Create a
153
+ one-shot development environment with everything installed:
154
+
155
+ ```bash
156
+ tox -e dev
157
+ ```
158
+
159
+ Individual gates: `tox -e tests`, `tox -e linting`, `tox -e type_check`,
160
+ `tox -e coverage`, and `tox -e spell_check`. Run the full suite with `tox`.