pyGAEB 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pygaeb-1.0.0/.github/workflows/publish.yml +95 -0
- pygaeb-1.0.0/.github/workflows/test.yml +51 -0
- pygaeb-1.0.0/.gitignore +30 -0
- pygaeb-1.0.0/.readthedocs.yaml +16 -0
- pygaeb-1.0.0/CHANGELOG.md +33 -0
- pygaeb-1.0.0/CONTRIBUTING.md +50 -0
- pygaeb-1.0.0/LICENSE +21 -0
- pygaeb-1.0.0/PKG-INFO +260 -0
- pygaeb-1.0.0/README.md +213 -0
- pygaeb-1.0.0/docs/changelog.md +31 -0
- pygaeb-1.0.0/docs/getting-started/index.md +6 -0
- pygaeb-1.0.0/docs/getting-started/installation.md +81 -0
- pygaeb-1.0.0/docs/getting-started/quickstart.md +146 -0
- pygaeb-1.0.0/docs/guides/caching.md +131 -0
- pygaeb-1.0.0/docs/guides/classification.md +156 -0
- pygaeb-1.0.0/docs/guides/extraction.md +147 -0
- pygaeb-1.0.0/docs/guides/index.md +10 -0
- pygaeb-1.0.0/docs/guides/parsing.md +119 -0
- pygaeb-1.0.0/docs/guides/validation.md +128 -0
- pygaeb-1.0.0/docs/guides/writing.md +92 -0
- pygaeb-1.0.0/docs/index.md +52 -0
- pygaeb-1.0.0/docs/reference/cache.md +24 -0
- pygaeb-1.0.0/docs/reference/classifier.md +35 -0
- pygaeb-1.0.0/docs/reference/config.md +28 -0
- pygaeb-1.0.0/docs/reference/exceptions.md +36 -0
- pygaeb-1.0.0/docs/reference/extractor.md +46 -0
- pygaeb-1.0.0/docs/reference/index.md +21 -0
- pygaeb-1.0.0/docs/reference/models.md +138 -0
- pygaeb-1.0.0/docs/reference/parser.md +9 -0
- pygaeb-1.0.0/docs/reference/validation.md +10 -0
- pygaeb-1.0.0/docs/reference/writer.md +26 -0
- pygaeb-1.0.0/mkdocs.yml +97 -0
- pygaeb-1.0.0/pygaeb/__init__.py +135 -0
- pygaeb-1.0.0/pygaeb/api/__init__.py +5 -0
- pygaeb-1.0.0/pygaeb/api/document_api.py +119 -0
- pygaeb-1.0.0/pygaeb/cache.py +159 -0
- pygaeb-1.0.0/pygaeb/classifier/__init__.py +5 -0
- pygaeb-1.0.0/pygaeb/classifier/batch_classifier.py +210 -0
- pygaeb-1.0.0/pygaeb/classifier/cache.py +104 -0
- pygaeb-1.0.0/pygaeb/classifier/confidence.py +31 -0
- pygaeb-1.0.0/pygaeb/classifier/llm_backend.py +95 -0
- pygaeb-1.0.0/pygaeb/classifier/prompt_templates.py +50 -0
- pygaeb-1.0.0/pygaeb/classifier/result_model.py +5 -0
- pygaeb-1.0.0/pygaeb/classifier/taxonomy.py +78 -0
- pygaeb-1.0.0/pygaeb/config.py +65 -0
- pygaeb-1.0.0/pygaeb/convert/__init__.py +6 -0
- pygaeb-1.0.0/pygaeb/convert/to_csv.py +78 -0
- pygaeb-1.0.0/pygaeb/convert/to_json.py +59 -0
- pygaeb-1.0.0/pygaeb/detector/__init__.py +7 -0
- pygaeb-1.0.0/pygaeb/detector/encoding_repair.py +84 -0
- pygaeb-1.0.0/pygaeb/detector/format_detector.py +64 -0
- pygaeb-1.0.0/pygaeb/detector/version_detector.py +223 -0
- pygaeb-1.0.0/pygaeb/exceptions.py +23 -0
- pygaeb-1.0.0/pygaeb/extractor/__init__.py +5 -0
- pygaeb-1.0.0/pygaeb/extractor/builtin_schemas.py +100 -0
- pygaeb-1.0.0/pygaeb/extractor/extraction_cache.py +91 -0
- pygaeb-1.0.0/pygaeb/extractor/extraction_prompt.py +73 -0
- pygaeb-1.0.0/pygaeb/extractor/schema_utils.py +89 -0
- pygaeb-1.0.0/pygaeb/extractor/structured_extractor.py +386 -0
- pygaeb-1.0.0/pygaeb/models/__init__.py +47 -0
- pygaeb-1.0.0/pygaeb/models/boq.py +127 -0
- pygaeb-1.0.0/pygaeb/models/document.py +118 -0
- pygaeb-1.0.0/pygaeb/models/enums.py +126 -0
- pygaeb-1.0.0/pygaeb/models/item.py +164 -0
- pygaeb-1.0.0/pygaeb/parser/__init__.py +5 -0
- pygaeb-1.0.0/pygaeb/parser/gaeb_parser.py +198 -0
- pygaeb-1.0.0/pygaeb/parser/recovery.py +75 -0
- pygaeb-1.0.0/pygaeb/parser/xml_v2/__init__.py +1 -0
- pygaeb-1.0.0/pygaeb/parser/xml_v2/german_element_map.py +75 -0
- pygaeb-1.0.0/pygaeb/parser/xml_v2/v2_parser.py +44 -0
- pygaeb-1.0.0/pygaeb/parser/xml_v3/__init__.py +1 -0
- pygaeb-1.0.0/pygaeb/parser/xml_v3/base_v3_parser.py +392 -0
- pygaeb-1.0.0/pygaeb/parser/xml_v3/oz_resolver.py +66 -0
- pygaeb-1.0.0/pygaeb/parser/xml_v3/richtext_parser.py +99 -0
- pygaeb-1.0.0/pygaeb/parser/xml_v3/v30_compat.py +16 -0
- pygaeb-1.0.0/pygaeb/parser/xml_v3/v31_compat.py +24 -0
- pygaeb-1.0.0/pygaeb/parser/xml_v3/v32_compat.py +23 -0
- pygaeb-1.0.0/pygaeb/parser/xml_v3/v33_compat.py +68 -0
- pygaeb-1.0.0/pygaeb/py.typed +0 -0
- pygaeb-1.0.0/pygaeb/schemas/README.md +31 -0
- pygaeb-1.0.0/pygaeb/validation/__init__.py +35 -0
- pygaeb-1.0.0/pygaeb/validation/cross_phase_validator.py +75 -0
- pygaeb-1.0.0/pygaeb/validation/item_validator.py +50 -0
- pygaeb-1.0.0/pygaeb/validation/numeric_validator.py +33 -0
- pygaeb-1.0.0/pygaeb/validation/phase_validator.py +64 -0
- pygaeb-1.0.0/pygaeb/validation/structural_validator.py +71 -0
- pygaeb-1.0.0/pygaeb/writer/__init__.py +5 -0
- pygaeb-1.0.0/pygaeb/writer/gaeb_writer.py +197 -0
- pygaeb-1.0.0/pyproject.toml +93 -0
- pygaeb-1.0.0/tests/__init__.py +0 -0
- pygaeb-1.0.0/tests/conftest.py +268 -0
- pygaeb-1.0.0/tests/test_api.py +231 -0
- pygaeb-1.0.0/tests/test_cache.py +133 -0
- pygaeb-1.0.0/tests/test_classifier.py +145 -0
- pygaeb-1.0.0/tests/test_detector.py +65 -0
- pygaeb-1.0.0/tests/test_extractor.py +395 -0
- pygaeb-1.0.0/tests/test_models.py +157 -0
- pygaeb-1.0.0/tests/test_oz_resolver.py +75 -0
- pygaeb-1.0.0/tests/test_parser.py +118 -0
- pygaeb-1.0.0/tests/test_validation.py +129 -0
- pygaeb-1.0.0/tests/test_writer.py +75 -0
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
name: Publish
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags: ["v*"]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
test:
|
|
9
|
+
name: Test before publish
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v4
|
|
13
|
+
|
|
14
|
+
- uses: actions/setup-python@v5
|
|
15
|
+
with:
|
|
16
|
+
python-version: "3.12"
|
|
17
|
+
|
|
18
|
+
- name: Install dependencies
|
|
19
|
+
run: pip install -e ".[dev]"
|
|
20
|
+
|
|
21
|
+
- name: Run tests
|
|
22
|
+
run: pytest -v
|
|
23
|
+
|
|
24
|
+
build:
|
|
25
|
+
name: Build distribution
|
|
26
|
+
needs: test
|
|
27
|
+
runs-on: ubuntu-latest
|
|
28
|
+
steps:
|
|
29
|
+
- uses: actions/checkout@v4
|
|
30
|
+
|
|
31
|
+
- uses: actions/setup-python@v5
|
|
32
|
+
with:
|
|
33
|
+
python-version: "3.12"
|
|
34
|
+
|
|
35
|
+
- name: Install build tools
|
|
36
|
+
run: pip install build
|
|
37
|
+
|
|
38
|
+
- name: Build sdist and wheel
|
|
39
|
+
run: python -m build
|
|
40
|
+
|
|
41
|
+
- name: Upload distribution artifacts
|
|
42
|
+
uses: actions/upload-artifact@v4
|
|
43
|
+
with:
|
|
44
|
+
name: dist
|
|
45
|
+
path: dist/
|
|
46
|
+
|
|
47
|
+
publish-testpypi:
|
|
48
|
+
name: Publish to TestPyPI
|
|
49
|
+
needs: build
|
|
50
|
+
runs-on: ubuntu-latest
|
|
51
|
+
environment: testpypi
|
|
52
|
+
permissions:
|
|
53
|
+
id-token: write
|
|
54
|
+
steps:
|
|
55
|
+
- name: Download distribution artifacts
|
|
56
|
+
uses: actions/download-artifact@v4
|
|
57
|
+
with:
|
|
58
|
+
name: dist
|
|
59
|
+
path: dist/
|
|
60
|
+
|
|
61
|
+
- name: Publish to TestPyPI
|
|
62
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
63
|
+
with:
|
|
64
|
+
repository-url: https://test.pypi.org/legacy/
|
|
65
|
+
|
|
66
|
+
publish-pypi:
|
|
67
|
+
name: Publish to PyPI
|
|
68
|
+
needs: publish-testpypi
|
|
69
|
+
runs-on: ubuntu-latest
|
|
70
|
+
environment: pypi
|
|
71
|
+
permissions:
|
|
72
|
+
id-token: write
|
|
73
|
+
steps:
|
|
74
|
+
- name: Download distribution artifacts
|
|
75
|
+
uses: actions/download-artifact@v4
|
|
76
|
+
with:
|
|
77
|
+
name: dist
|
|
78
|
+
path: dist/
|
|
79
|
+
|
|
80
|
+
- name: Publish to PyPI
|
|
81
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
82
|
+
|
|
83
|
+
github-release:
|
|
84
|
+
name: Create GitHub Release
|
|
85
|
+
needs: publish-pypi
|
|
86
|
+
runs-on: ubuntu-latest
|
|
87
|
+
permissions:
|
|
88
|
+
contents: write
|
|
89
|
+
steps:
|
|
90
|
+
- uses: actions/checkout@v4
|
|
91
|
+
|
|
92
|
+
- name: Create GitHub Release
|
|
93
|
+
env:
|
|
94
|
+
GH_TOKEN: ${{ github.token }}
|
|
95
|
+
run: gh release create "${{ github.ref_name }}" --generate-notes
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
concurrency:
|
|
9
|
+
group: ci-${{ github.ref }}
|
|
10
|
+
cancel-in-progress: true
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
lint:
|
|
14
|
+
name: Lint
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- uses: actions/setup-python@v5
|
|
20
|
+
with:
|
|
21
|
+
python-version: "3.12"
|
|
22
|
+
|
|
23
|
+
- name: Install dependencies
|
|
24
|
+
run: pip install -e ".[dev]"
|
|
25
|
+
|
|
26
|
+
- name: Ruff
|
|
27
|
+
run: ruff check pygaeb/ tests/
|
|
28
|
+
|
|
29
|
+
- name: Mypy
|
|
30
|
+
run: mypy pygaeb/
|
|
31
|
+
|
|
32
|
+
test:
|
|
33
|
+
name: Test (Python ${{ matrix.python-version }})
|
|
34
|
+
runs-on: ubuntu-latest
|
|
35
|
+
strategy:
|
|
36
|
+
fail-fast: false
|
|
37
|
+
matrix:
|
|
38
|
+
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
|
|
39
|
+
|
|
40
|
+
steps:
|
|
41
|
+
- uses: actions/checkout@v4
|
|
42
|
+
|
|
43
|
+
- uses: actions/setup-python@v5
|
|
44
|
+
with:
|
|
45
|
+
python-version: ${{ matrix.python-version }}
|
|
46
|
+
|
|
47
|
+
- name: Install dependencies
|
|
48
|
+
run: pip install -e ".[dev]"
|
|
49
|
+
|
|
50
|
+
- name: Run tests
|
|
51
|
+
run: pytest -v
|
pygaeb-1.0.0/.gitignore
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
dist/
|
|
6
|
+
build/
|
|
7
|
+
*.egg
|
|
8
|
+
|
|
9
|
+
# Virtual environments
|
|
10
|
+
.venv/
|
|
11
|
+
venv/
|
|
12
|
+
|
|
13
|
+
# IDE
|
|
14
|
+
.idea/
|
|
15
|
+
.vscode/
|
|
16
|
+
*.swp
|
|
17
|
+
*.swo
|
|
18
|
+
|
|
19
|
+
# Testing
|
|
20
|
+
.pytest_cache/
|
|
21
|
+
.hypothesis/
|
|
22
|
+
htmlcov/
|
|
23
|
+
.coverage
|
|
24
|
+
|
|
25
|
+
# MkDocs build output
|
|
26
|
+
site/
|
|
27
|
+
|
|
28
|
+
# OS
|
|
29
|
+
.DS_Store
|
|
30
|
+
Thumbs.db
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [1.0.0] - 2026-03-14
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
|
|
12
|
+
- Unified domain model (GAEBDocument, Item, BoQ, AwardInfo) with Pydantic v2
|
|
13
|
+
- Format & version detection for DA XML 2.0–3.3
|
|
14
|
+
- Pre-parse encoding repair via ftfy + charset-normalizer
|
|
15
|
+
- Malformed XML recovery with two-pass strategy
|
|
16
|
+
- DA XML 3.x parser (3.0, 3.1, 3.2, 3.3)
|
|
17
|
+
- DA XML 2.x parser (2.0, 2.1) via German element mapping
|
|
18
|
+
- OZ resolver with BoQBkdn hierarchy breakdown
|
|
19
|
+
- Rich text parser for tgBoQText long texts (BeautifulSoup4 + lxml)
|
|
20
|
+
- Structural, item, numeric, and phase validation
|
|
21
|
+
- Cross-phase validation (source ↔ response compatibility)
|
|
22
|
+
- LLM classification via LiteLLM (100+ providers) + instructor (structured output)
|
|
23
|
+
- Async batch classifier with SQLite cache, deduplication, cost preview
|
|
24
|
+
- Sync convenience wrapper for classification
|
|
25
|
+
- Model fallback chains
|
|
26
|
+
- Progress reporting callbacks
|
|
27
|
+
- Manual override support with cache persistence
|
|
28
|
+
- Prompt versioning (v1)
|
|
29
|
+
- GAEB XML writer with round-trip support
|
|
30
|
+
- JSON and CSV export
|
|
31
|
+
- Multi-lot document navigation
|
|
32
|
+
- Configuration via pydantic-settings (env vars / .env)
|
|
33
|
+
- Comprehensive validation with lenient (default) and strict modes
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# Contributing to pyGAEB
|
|
2
|
+
|
|
3
|
+
Thank you for considering contributing to pyGAEB! This guide will help you get started.
|
|
4
|
+
|
|
5
|
+
## Licence Agreement
|
|
6
|
+
|
|
7
|
+
By submitting a pull request you agree that your contributions are licensed under the MIT License and that the project maintainers retain the right to relicence the project under alternative terms.
|
|
8
|
+
|
|
9
|
+
## Development Setup
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
git clone https://github.com/frameiq/pygaeb.git
|
|
13
|
+
cd pygaeb
|
|
14
|
+
python -m venv .venv
|
|
15
|
+
source .venv/bin/activate
|
|
16
|
+
pip install -e ".[dev,llm]"
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Running Tests
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
pytest -v
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Code Quality
|
|
26
|
+
|
|
27
|
+
We enforce the following in CI — please run these locally before pushing:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
ruff check pygaeb/ tests/
|
|
31
|
+
mypy pygaeb/
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
All code must pass ruff with the rules configured in `pyproject.toml` and mypy in strict mode.
|
|
35
|
+
|
|
36
|
+
## Pull Request Guidelines
|
|
37
|
+
|
|
38
|
+
1. **Create a branch** from `main` for your changes
|
|
39
|
+
2. **Write tests** for any new functionality
|
|
40
|
+
3. **Run the full suite** (`pytest -v`, `ruff check`, `mypy`) before opening a PR
|
|
41
|
+
4. **Keep PRs focused** — one feature or fix per PR
|
|
42
|
+
5. **Update documentation** if you change public API surface
|
|
43
|
+
|
|
44
|
+
## Reporting Issues
|
|
45
|
+
|
|
46
|
+
Open an issue on GitHub with:
|
|
47
|
+
|
|
48
|
+
- A clear title and description
|
|
49
|
+
- Minimal reproduction steps (ideally a sample GAEB file or XML snippet)
|
|
50
|
+
- Python version and pyGAEB version (`python -c "import pygaeb; print(pygaeb.__version__)"`)
|
pygaeb-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 FrameIQ
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
pygaeb-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pyGAEB
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Python parser for GAEB DA XML construction data exchange files, with LLM-powered item classification
|
|
5
|
+
Project-URL: Homepage, https://github.com/frameiq/pygaeb
|
|
6
|
+
Project-URL: Docs, https://pygaeb.readthedocs.io
|
|
7
|
+
Project-URL: Changelog, https://github.com/frameiq/pygaeb/blob/main/CHANGELOG.md
|
|
8
|
+
License: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: ava,bill-of-quantities,bim,boq,construction,gaeb,leistungsverzeichnis
|
|
11
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
|
+
Classifier: Typing :: Typed
|
|
23
|
+
Requires-Python: >=3.9
|
|
24
|
+
Requires-Dist: beautifulsoup4>=4.12
|
|
25
|
+
Requires-Dist: charset-normalizer>=3.0
|
|
26
|
+
Requires-Dist: eval-type-backport>=0.2.0; python_version < '3.10'
|
|
27
|
+
Requires-Dist: ftfy>=6.1
|
|
28
|
+
Requires-Dist: lxml>=4.9
|
|
29
|
+
Requires-Dist: pydantic-settings>=2.0
|
|
30
|
+
Requires-Dist: pydantic>=2.0
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: hypothesis; extra == 'dev'
|
|
33
|
+
Requires-Dist: lxml-stubs; extra == 'dev'
|
|
34
|
+
Requires-Dist: mypy; extra == 'dev'
|
|
35
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
36
|
+
Requires-Dist: pytest-asyncio; extra == 'dev'
|
|
37
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
38
|
+
Requires-Dist: xmldiff>=2.6; extra == 'dev'
|
|
39
|
+
Provides-Extra: docs
|
|
40
|
+
Requires-Dist: mkdocs-material>=9.5; extra == 'docs'
|
|
41
|
+
Requires-Dist: mkdocs-section-index>=0.3; extra == 'docs'
|
|
42
|
+
Requires-Dist: mkdocstrings[python]>=0.24; extra == 'docs'
|
|
43
|
+
Provides-Extra: llm
|
|
44
|
+
Requires-Dist: instructor>=1.0; extra == 'llm'
|
|
45
|
+
Requires-Dist: litellm>=1.40; extra == 'llm'
|
|
46
|
+
Description-Content-Type: text/markdown
|
|
47
|
+
|
|
48
|
+
# pyGAEB
|
|
49
|
+
|
|
50
|
+
**Python parser for GAEB DA XML construction data exchange files, with LLM-powered item classification.**
|
|
51
|
+
|
|
52
|
+
[](https://www.python.org/downloads/)
|
|
53
|
+
[](https://opensource.org/licenses/MIT)
|
|
54
|
+
|
|
55
|
+
pyGAEB parses, validates, classifies, and writes GAEB DA XML files (versions 2.0 through 3.3), producing a unified Pydantic v2 domain model from all inputs. An optional LLM classification layer enriches each item with a semantic construction element type via [LiteLLM](https://github.com/BerriAI/litellm) (100+ providers).
|
|
56
|
+
|
|
57
|
+
## Installation
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
# Core parser + writer + export (zero LLM dependencies)
|
|
61
|
+
pip install pyGAEB
|
|
62
|
+
|
|
63
|
+
# With LLM classification (supports 100+ providers via LiteLLM)
|
|
64
|
+
pip install pyGAEB[llm]
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Quick Start
|
|
68
|
+
|
|
69
|
+
### Parse any GAEB file
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
from pygaeb import GAEBParser
|
|
73
|
+
|
|
74
|
+
doc = GAEBParser.parse("tender.X83") # DA XML 3.x
|
|
75
|
+
doc = GAEBParser.parse("old.D83") # DA XML 2.x — same call
|
|
76
|
+
|
|
77
|
+
print(doc.source_version) # SourceVersion.DA_XML_33
|
|
78
|
+
print(doc.exchange_phase) # ExchangePhase.X83
|
|
79
|
+
print(doc.grand_total) # Decimal("1234567.89")
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Iterate items
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
for item in doc.award.boq.iter_items():
|
|
86
|
+
print(item.oz) # "01.02.0030"
|
|
87
|
+
print(item.short_text) # "Mauerwerk der Innenwand…"
|
|
88
|
+
print(item.qty) # Decimal("1170.000")
|
|
89
|
+
print(item.unit) # "m2"
|
|
90
|
+
print(item.unit_price) # Decimal("45.50")
|
|
91
|
+
print(item.total_price) # Decimal("53235.00")
|
|
92
|
+
print(item.item_type) # ItemType.NORMAL
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Validation
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
from pygaeb import GAEBParser, ValidationMode
|
|
99
|
+
|
|
100
|
+
# Lenient (default) — collect warnings, keep parsing
|
|
101
|
+
doc = GAEBParser.parse("tender.X83")
|
|
102
|
+
for issue in doc.validation_results:
|
|
103
|
+
print(issue.severity, issue.message)
|
|
104
|
+
|
|
105
|
+
# Strict — raise on first ERROR
|
|
106
|
+
doc = GAEBParser.parse("tender.X83", validation=ValidationMode.STRICT)
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### Write / Round-trip
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
from pygaeb import GAEBWriter, ExchangePhase
|
|
113
|
+
from decimal import Decimal
|
|
114
|
+
|
|
115
|
+
doc = GAEBParser.parse("tender.X83")
|
|
116
|
+
item = doc.award.boq.get_item("01.02.0030")
|
|
117
|
+
item.unit_price = Decimal("48.00")
|
|
118
|
+
|
|
119
|
+
GAEBWriter.write(doc, "bid.X84", phase=ExchangePhase.X84)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Export to JSON / CSV
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
from pygaeb.convert import to_json, to_csv
|
|
126
|
+
|
|
127
|
+
to_json(doc, "boq.json") # full nested BoQ tree
|
|
128
|
+
to_csv(doc, "items.csv") # flat item table with classification columns
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### LLM Classification
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
from pygaeb import LLMClassifier
|
|
135
|
+
|
|
136
|
+
# Default: in-memory cache (no disk I/O, session-scoped)
|
|
137
|
+
classifier = LLMClassifier(model="anthropic/claude-sonnet-4-6")
|
|
138
|
+
# classifier = LLMClassifier(model="gpt-4o")
|
|
139
|
+
# classifier = LLMClassifier(model="ollama/llama3") # local, free, private
|
|
140
|
+
|
|
141
|
+
# Opt-in: persistent SQLite cache (survives across runs)
|
|
142
|
+
from pygaeb import SQLiteCache
|
|
143
|
+
classifier = LLMClassifier(model="anthropic/claude-sonnet-4-6", cache=SQLiteCache("~/.pygaeb/cache"))
|
|
144
|
+
|
|
145
|
+
# Check cost before running
|
|
146
|
+
estimate = await classifier.estimate_cost(doc)
|
|
147
|
+
print(f"Will classify {estimate.items_to_classify} items for ~${estimate.estimated_cost_usd:.2f}")
|
|
148
|
+
|
|
149
|
+
# Classify all items
|
|
150
|
+
await classifier.enrich(doc)
|
|
151
|
+
|
|
152
|
+
# Or synchronous
|
|
153
|
+
classifier.enrich_sync(doc)
|
|
154
|
+
|
|
155
|
+
for item in doc.award.boq.iter_items():
|
|
156
|
+
if item.classification:
|
|
157
|
+
print(item.oz, item.classification.element_type, item.classification.confidence)
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### Structured Extraction — Custom Schemas
|
|
161
|
+
|
|
162
|
+
After classification, extract typed attributes into your own Pydantic schema:
|
|
163
|
+
|
|
164
|
+
```python
|
|
165
|
+
from pydantic import BaseModel, Field
|
|
166
|
+
from typing import Optional
|
|
167
|
+
from pygaeb import StructuredExtractor
|
|
168
|
+
|
|
169
|
+
class DoorSpec(BaseModel):
|
|
170
|
+
door_type: str = Field("", description="single, double, sliding")
|
|
171
|
+
width_mm: Optional[int] = Field(None, description="Width in mm")
|
|
172
|
+
fire_rating: Optional[str] = Field(None, description="T30, T60, T90")
|
|
173
|
+
glazing: bool = Field(False, description="Has glass panels")
|
|
174
|
+
material: str = Field("", description="wood, steel, aluminium")
|
|
175
|
+
|
|
176
|
+
extractor = StructuredExtractor(model="anthropic/claude-sonnet-4-6")
|
|
177
|
+
|
|
178
|
+
# Extract from all items classified as "Door"
|
|
179
|
+
doors = await extractor.extract(doc, schema=DoorSpec, element_type="Door")
|
|
180
|
+
for item, spec in doors:
|
|
181
|
+
print(item.oz, spec.door_type, spec.fire_rating, spec.width_mm)
|
|
182
|
+
|
|
183
|
+
# Filter by trade (broad) or sub_type (narrow)
|
|
184
|
+
pipes = await extractor.extract(doc, schema=PipeSpec, trade="MEP-Plumbing")
|
|
185
|
+
fire_doors = await extractor.extract(doc, schema=DoorSpec, sub_type="Fire Door")
|
|
186
|
+
|
|
187
|
+
# Or synchronous
|
|
188
|
+
doors = extractor.extract_sync(doc, schema=DoorSpec, element_type="Door")
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
Built-in starter schemas: `DoorSpec`, `WindowSpec`, `WallSpec`, `PipeSpec` — or define your own.
|
|
192
|
+
|
|
193
|
+
### Custom Cache Backend
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
from pygaeb import CacheBackend, InMemoryCache, SQLiteCache
|
|
197
|
+
|
|
198
|
+
# Default: in-memory (no disk, session-scoped)
|
|
199
|
+
classifier = LLMClassifier()
|
|
200
|
+
|
|
201
|
+
# Persistent: SQLite
|
|
202
|
+
classifier = LLMClassifier(cache=SQLiteCache("~/.pygaeb/cache"))
|
|
203
|
+
|
|
204
|
+
# Share one backend between classifier and extractor
|
|
205
|
+
shared = SQLiteCache("/tmp/project-cache")
|
|
206
|
+
classifier = LLMClassifier(cache=shared)
|
|
207
|
+
extractor = StructuredExtractor(cache=shared)
|
|
208
|
+
|
|
209
|
+
# Bring your own: implement CacheBackend protocol
|
|
210
|
+
class RedisCache:
|
|
211
|
+
def get(self, key: str) -> str | None: ...
|
|
212
|
+
def put(self, key: str, value: str) -> None: ...
|
|
213
|
+
def delete(self, key: str) -> None: ...
|
|
214
|
+
def keys(self) -> list[str]: ...
|
|
215
|
+
def clear(self) -> None: ...
|
|
216
|
+
def close(self) -> None: ...
|
|
217
|
+
|
|
218
|
+
classifier = LLMClassifier(cache=RedisCache())
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
### Cross-Phase Validation
|
|
222
|
+
|
|
223
|
+
```python
|
|
224
|
+
from pygaeb import GAEBParser, CrossPhaseValidator
|
|
225
|
+
|
|
226
|
+
tender = GAEBParser.parse("tender.X83")
|
|
227
|
+
bid = GAEBParser.parse("bid.X84")
|
|
228
|
+
|
|
229
|
+
issues = CrossPhaseValidator.check(source=tender, response=bid)
|
|
230
|
+
for issue in issues:
|
|
231
|
+
print(issue.severity, issue.message)
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
## Supported Versions
|
|
235
|
+
|
|
236
|
+
| Version | Parser Track | Status |
|
|
237
|
+
|---------|-------------|--------|
|
|
238
|
+
| DA XML 2.0 | Track A (German elements) | ✅ v1.0 |
|
|
239
|
+
| DA XML 2.1 | Track A (German elements) | ✅ v1.0 |
|
|
240
|
+
| DA XML 3.0 | Track B (English elements) | ✅ v1.0 |
|
|
241
|
+
| DA XML 3.1 | Track B (English elements) | ✅ v1.0 |
|
|
242
|
+
| DA XML 3.2 | Track B (English elements) | ✅ v1.0 |
|
|
243
|
+
| DA XML 3.3 | Track B (English elements) | ✅ v1.0 |
|
|
244
|
+
| GAEB 90 | Track C (fixed-width) | 🔜 v1.1 |
|
|
245
|
+
|
|
246
|
+
## Configuration
|
|
247
|
+
|
|
248
|
+
```bash
|
|
249
|
+
# Environment variables
|
|
250
|
+
export PYGAEB_DEFAULT_MODEL=ollama/llama3
|
|
251
|
+
export PYGAEB_XSD_DIR=/opt/gaeb-schemas
|
|
252
|
+
|
|
253
|
+
# Or programmatic
|
|
254
|
+
from pygaeb import PyGAEBSettings
|
|
255
|
+
settings = PyGAEBSettings(default_model="gpt-4o", classifier_concurrency=10)
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
## License
|
|
259
|
+
|
|
260
|
+
MIT — see [LICENSE](LICENSE) for details.
|