pyGAEB 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. pygaeb-1.0.0/.github/workflows/publish.yml +95 -0
  2. pygaeb-1.0.0/.github/workflows/test.yml +51 -0
  3. pygaeb-1.0.0/.gitignore +30 -0
  4. pygaeb-1.0.0/.readthedocs.yaml +16 -0
  5. pygaeb-1.0.0/CHANGELOG.md +33 -0
  6. pygaeb-1.0.0/CONTRIBUTING.md +50 -0
  7. pygaeb-1.0.0/LICENSE +21 -0
  8. pygaeb-1.0.0/PKG-INFO +260 -0
  9. pygaeb-1.0.0/README.md +213 -0
  10. pygaeb-1.0.0/docs/changelog.md +31 -0
  11. pygaeb-1.0.0/docs/getting-started/index.md +6 -0
  12. pygaeb-1.0.0/docs/getting-started/installation.md +81 -0
  13. pygaeb-1.0.0/docs/getting-started/quickstart.md +146 -0
  14. pygaeb-1.0.0/docs/guides/caching.md +131 -0
  15. pygaeb-1.0.0/docs/guides/classification.md +156 -0
  16. pygaeb-1.0.0/docs/guides/extraction.md +147 -0
  17. pygaeb-1.0.0/docs/guides/index.md +10 -0
  18. pygaeb-1.0.0/docs/guides/parsing.md +119 -0
  19. pygaeb-1.0.0/docs/guides/validation.md +128 -0
  20. pygaeb-1.0.0/docs/guides/writing.md +92 -0
  21. pygaeb-1.0.0/docs/index.md +52 -0
  22. pygaeb-1.0.0/docs/reference/cache.md +24 -0
  23. pygaeb-1.0.0/docs/reference/classifier.md +35 -0
  24. pygaeb-1.0.0/docs/reference/config.md +28 -0
  25. pygaeb-1.0.0/docs/reference/exceptions.md +36 -0
  26. pygaeb-1.0.0/docs/reference/extractor.md +46 -0
  27. pygaeb-1.0.0/docs/reference/index.md +21 -0
  28. pygaeb-1.0.0/docs/reference/models.md +138 -0
  29. pygaeb-1.0.0/docs/reference/parser.md +9 -0
  30. pygaeb-1.0.0/docs/reference/validation.md +10 -0
  31. pygaeb-1.0.0/docs/reference/writer.md +26 -0
  32. pygaeb-1.0.0/mkdocs.yml +97 -0
  33. pygaeb-1.0.0/pygaeb/__init__.py +135 -0
  34. pygaeb-1.0.0/pygaeb/api/__init__.py +5 -0
  35. pygaeb-1.0.0/pygaeb/api/document_api.py +119 -0
  36. pygaeb-1.0.0/pygaeb/cache.py +159 -0
  37. pygaeb-1.0.0/pygaeb/classifier/__init__.py +5 -0
  38. pygaeb-1.0.0/pygaeb/classifier/batch_classifier.py +210 -0
  39. pygaeb-1.0.0/pygaeb/classifier/cache.py +104 -0
  40. pygaeb-1.0.0/pygaeb/classifier/confidence.py +31 -0
  41. pygaeb-1.0.0/pygaeb/classifier/llm_backend.py +95 -0
  42. pygaeb-1.0.0/pygaeb/classifier/prompt_templates.py +50 -0
  43. pygaeb-1.0.0/pygaeb/classifier/result_model.py +5 -0
  44. pygaeb-1.0.0/pygaeb/classifier/taxonomy.py +78 -0
  45. pygaeb-1.0.0/pygaeb/config.py +65 -0
  46. pygaeb-1.0.0/pygaeb/convert/__init__.py +6 -0
  47. pygaeb-1.0.0/pygaeb/convert/to_csv.py +78 -0
  48. pygaeb-1.0.0/pygaeb/convert/to_json.py +59 -0
  49. pygaeb-1.0.0/pygaeb/detector/__init__.py +7 -0
  50. pygaeb-1.0.0/pygaeb/detector/encoding_repair.py +84 -0
  51. pygaeb-1.0.0/pygaeb/detector/format_detector.py +64 -0
  52. pygaeb-1.0.0/pygaeb/detector/version_detector.py +223 -0
  53. pygaeb-1.0.0/pygaeb/exceptions.py +23 -0
  54. pygaeb-1.0.0/pygaeb/extractor/__init__.py +5 -0
  55. pygaeb-1.0.0/pygaeb/extractor/builtin_schemas.py +100 -0
  56. pygaeb-1.0.0/pygaeb/extractor/extraction_cache.py +91 -0
  57. pygaeb-1.0.0/pygaeb/extractor/extraction_prompt.py +73 -0
  58. pygaeb-1.0.0/pygaeb/extractor/schema_utils.py +89 -0
  59. pygaeb-1.0.0/pygaeb/extractor/structured_extractor.py +386 -0
  60. pygaeb-1.0.0/pygaeb/models/__init__.py +47 -0
  61. pygaeb-1.0.0/pygaeb/models/boq.py +127 -0
  62. pygaeb-1.0.0/pygaeb/models/document.py +118 -0
  63. pygaeb-1.0.0/pygaeb/models/enums.py +126 -0
  64. pygaeb-1.0.0/pygaeb/models/item.py +164 -0
  65. pygaeb-1.0.0/pygaeb/parser/__init__.py +5 -0
  66. pygaeb-1.0.0/pygaeb/parser/gaeb_parser.py +198 -0
  67. pygaeb-1.0.0/pygaeb/parser/recovery.py +75 -0
  68. pygaeb-1.0.0/pygaeb/parser/xml_v2/__init__.py +1 -0
  69. pygaeb-1.0.0/pygaeb/parser/xml_v2/german_element_map.py +75 -0
  70. pygaeb-1.0.0/pygaeb/parser/xml_v2/v2_parser.py +44 -0
  71. pygaeb-1.0.0/pygaeb/parser/xml_v3/__init__.py +1 -0
  72. pygaeb-1.0.0/pygaeb/parser/xml_v3/base_v3_parser.py +392 -0
  73. pygaeb-1.0.0/pygaeb/parser/xml_v3/oz_resolver.py +66 -0
  74. pygaeb-1.0.0/pygaeb/parser/xml_v3/richtext_parser.py +99 -0
  75. pygaeb-1.0.0/pygaeb/parser/xml_v3/v30_compat.py +16 -0
  76. pygaeb-1.0.0/pygaeb/parser/xml_v3/v31_compat.py +24 -0
  77. pygaeb-1.0.0/pygaeb/parser/xml_v3/v32_compat.py +23 -0
  78. pygaeb-1.0.0/pygaeb/parser/xml_v3/v33_compat.py +68 -0
  79. pygaeb-1.0.0/pygaeb/py.typed +0 -0
  80. pygaeb-1.0.0/pygaeb/schemas/README.md +31 -0
  81. pygaeb-1.0.0/pygaeb/validation/__init__.py +35 -0
  82. pygaeb-1.0.0/pygaeb/validation/cross_phase_validator.py +75 -0
  83. pygaeb-1.0.0/pygaeb/validation/item_validator.py +50 -0
  84. pygaeb-1.0.0/pygaeb/validation/numeric_validator.py +33 -0
  85. pygaeb-1.0.0/pygaeb/validation/phase_validator.py +64 -0
  86. pygaeb-1.0.0/pygaeb/validation/structural_validator.py +71 -0
  87. pygaeb-1.0.0/pygaeb/writer/__init__.py +5 -0
  88. pygaeb-1.0.0/pygaeb/writer/gaeb_writer.py +197 -0
  89. pygaeb-1.0.0/pyproject.toml +93 -0
  90. pygaeb-1.0.0/tests/__init__.py +0 -0
  91. pygaeb-1.0.0/tests/conftest.py +268 -0
  92. pygaeb-1.0.0/tests/test_api.py +231 -0
  93. pygaeb-1.0.0/tests/test_cache.py +133 -0
  94. pygaeb-1.0.0/tests/test_classifier.py +145 -0
  95. pygaeb-1.0.0/tests/test_detector.py +65 -0
  96. pygaeb-1.0.0/tests/test_extractor.py +395 -0
  97. pygaeb-1.0.0/tests/test_models.py +157 -0
  98. pygaeb-1.0.0/tests/test_oz_resolver.py +75 -0
  99. pygaeb-1.0.0/tests/test_parser.py +118 -0
  100. pygaeb-1.0.0/tests/test_validation.py +129 -0
  101. pygaeb-1.0.0/tests/test_writer.py +75 -0
@@ -0,0 +1,95 @@
1
+ name: Publish
2
+
3
+ on:
4
+ push:
5
+ tags: ["v*"]
6
+
7
+ jobs:
8
+ test:
9
+ name: Test before publish
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+
14
+ - uses: actions/setup-python@v5
15
+ with:
16
+ python-version: "3.12"
17
+
18
+ - name: Install dependencies
19
+ run: pip install -e ".[dev]"
20
+
21
+ - name: Run tests
22
+ run: pytest -v
23
+
24
+ build:
25
+ name: Build distribution
26
+ needs: test
27
+ runs-on: ubuntu-latest
28
+ steps:
29
+ - uses: actions/checkout@v4
30
+
31
+ - uses: actions/setup-python@v5
32
+ with:
33
+ python-version: "3.12"
34
+
35
+ - name: Install build tools
36
+ run: pip install build
37
+
38
+ - name: Build sdist and wheel
39
+ run: python -m build
40
+
41
+ - name: Upload distribution artifacts
42
+ uses: actions/upload-artifact@v4
43
+ with:
44
+ name: dist
45
+ path: dist/
46
+
47
+ publish-testpypi:
48
+ name: Publish to TestPyPI
49
+ needs: build
50
+ runs-on: ubuntu-latest
51
+ environment: testpypi
52
+ permissions:
53
+ id-token: write
54
+ steps:
55
+ - name: Download distribution artifacts
56
+ uses: actions/download-artifact@v4
57
+ with:
58
+ name: dist
59
+ path: dist/
60
+
61
+ - name: Publish to TestPyPI
62
+ uses: pypa/gh-action-pypi-publish@release/v1
63
+ with:
64
+ repository-url: https://test.pypi.org/legacy/
65
+
66
+ publish-pypi:
67
+ name: Publish to PyPI
68
+ needs: publish-testpypi
69
+ runs-on: ubuntu-latest
70
+ environment: pypi
71
+ permissions:
72
+ id-token: write
73
+ steps:
74
+ - name: Download distribution artifacts
75
+ uses: actions/download-artifact@v4
76
+ with:
77
+ name: dist
78
+ path: dist/
79
+
80
+ - name: Publish to PyPI
81
+ uses: pypa/gh-action-pypi-publish@release/v1
82
+
83
+ github-release:
84
+ name: Create GitHub Release
85
+ needs: publish-pypi
86
+ runs-on: ubuntu-latest
87
+ permissions:
88
+ contents: write
89
+ steps:
90
+ - uses: actions/checkout@v4
91
+
92
+ - name: Create GitHub Release
93
+ env:
94
+ GH_TOKEN: ${{ github.token }}
95
+ run: gh release create "${{ github.ref_name }}" --generate-notes
@@ -0,0 +1,51 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+
8
+ concurrency:
9
+ group: ci-${{ github.ref }}
10
+ cancel-in-progress: true
11
+
12
+ jobs:
13
+ lint:
14
+ name: Lint
15
+ runs-on: ubuntu-latest
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - uses: actions/setup-python@v5
20
+ with:
21
+ python-version: "3.12"
22
+
23
+ - name: Install dependencies
24
+ run: pip install -e ".[dev]"
25
+
26
+ - name: Ruff
27
+ run: ruff check pygaeb/ tests/
28
+
29
+ - name: Mypy
30
+ run: mypy pygaeb/
31
+
32
+ test:
33
+ name: Test (Python ${{ matrix.python-version }})
34
+ runs-on: ubuntu-latest
35
+ strategy:
36
+ fail-fast: false
37
+ matrix:
38
+ python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
39
+
40
+ steps:
41
+ - uses: actions/checkout@v4
42
+
43
+ - uses: actions/setup-python@v5
44
+ with:
45
+ python-version: ${{ matrix.python-version }}
46
+
47
+ - name: Install dependencies
48
+ run: pip install -e ".[dev]"
49
+
50
+ - name: Run tests
51
+ run: pytest -v
@@ -0,0 +1,30 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ *.egg
8
+
9
+ # Virtual environments
10
+ .venv/
11
+ venv/
12
+
13
+ # IDE
14
+ .idea/
15
+ .vscode/
16
+ *.swp
17
+ *.swo
18
+
19
+ # Testing
20
+ .pytest_cache/
21
+ .hypothesis/
22
+ htmlcov/
23
+ .coverage
24
+
25
+ # MkDocs build output
26
+ site/
27
+
28
+ # OS
29
+ .DS_Store
30
+ Thumbs.db
@@ -0,0 +1,16 @@
1
+ version: 2
2
+
3
+ build:
4
+ os: ubuntu-22.04
5
+ tools:
6
+ python: "3.11"
7
+
8
+ mkdocs:
9
+ configuration: mkdocs.yml
10
+
11
+ python:
12
+ install:
13
+ - method: pip
14
+ path: .
15
+ extra_requirements:
16
+ - docs
@@ -0,0 +1,33 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [1.0.0] - 2026-03-14
9
+
10
+ ### Added
11
+
12
+ - Unified domain model (GAEBDocument, Item, BoQ, AwardInfo) with Pydantic v2
13
+ - Format & version detection for DA XML 2.0–3.3
14
+ - Pre-parse encoding repair via ftfy + charset-normalizer
15
+ - Malformed XML recovery with two-pass strategy
16
+ - DA XML 3.x parser (3.0, 3.1, 3.2, 3.3)
17
+ - DA XML 2.x parser (2.0, 2.1) via German element mapping
18
+ - OZ resolver with BoQBkdn hierarchy breakdown
19
+ - Rich text parser for tgBoQText long texts (BeautifulSoup4 + lxml)
20
+ - Structural, item, numeric, and phase validation
21
+ - Cross-phase validation (source ↔ response compatibility)
22
+ - LLM classification via LiteLLM (100+ providers) + instructor (structured output)
23
+ - Async batch classifier with SQLite cache, deduplication, cost preview
24
+ - Sync convenience wrapper for classification
25
+ - Model fallback chains
26
+ - Progress reporting callbacks
27
+ - Manual override support with cache persistence
28
+ - Prompt versioning (v1)
29
+ - GAEB XML writer with round-trip support
30
+ - JSON and CSV export
31
+ - Multi-lot document navigation
32
+ - Configuration via pydantic-settings (env vars / .env)
33
+ - Comprehensive validation with lenient (default) and strict modes
@@ -0,0 +1,50 @@
1
+ # Contributing to pyGAEB
2
+
3
+ Thank you for considering contributing to pyGAEB! This guide will help you get started.
4
+
5
+ ## Licence Agreement
6
+
7
+ By submitting a pull request you agree that your contributions are licensed under the MIT License and that the project maintainers retain the right to relicence the project under alternative terms.
8
+
9
+ ## Development Setup
10
+
11
+ ```bash
12
+ git clone https://github.com/frameiq/pygaeb.git
13
+ cd pygaeb
14
+ python -m venv .venv
15
+ source .venv/bin/activate
16
+ pip install -e ".[dev,llm]"
17
+ ```
18
+
19
+ ## Running Tests
20
+
21
+ ```bash
22
+ pytest -v
23
+ ```
24
+
25
+ ## Code Quality
26
+
27
+ We enforce the following in CI — please run these locally before pushing:
28
+
29
+ ```bash
30
+ ruff check pygaeb/ tests/
31
+ mypy pygaeb/
32
+ ```
33
+
34
+ All code must pass ruff with the rules configured in `pyproject.toml` and mypy in strict mode.
35
+
36
+ ## Pull Request Guidelines
37
+
38
+ 1. **Create a branch** from `main` for your changes
39
+ 2. **Write tests** for any new functionality
40
+ 3. **Run the full suite** (`pytest -v`, `ruff check`, `mypy`) before opening a PR
41
+ 4. **Keep PRs focused** — one feature or fix per PR
42
+ 5. **Update documentation** if you change public API surface
43
+
44
+ ## Reporting Issues
45
+
46
+ Open an issue on GitHub with:
47
+
48
+ - A clear title and description
49
+ - Minimal reproduction steps (ideally a sample GAEB file or XML snippet)
50
+ - Python version and pyGAEB version (`python -c "import pygaeb; print(pygaeb.__version__)"`)
pygaeb-1.0.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 FrameIQ
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
pygaeb-1.0.0/PKG-INFO ADDED
@@ -0,0 +1,260 @@
1
+ Metadata-Version: 2.4
2
+ Name: pyGAEB
3
+ Version: 1.0.0
4
+ Summary: Python parser for GAEB DA XML construction data exchange files, with LLM-powered item classification
5
+ Project-URL: Homepage, https://github.com/frameiq/pygaeb
6
+ Project-URL: Docs, https://pygaeb.readthedocs.io
7
+ Project-URL: Changelog, https://github.com/frameiq/pygaeb/blob/main/CHANGELOG.md
8
+ License: MIT
9
+ License-File: LICENSE
10
+ Keywords: ava,bill-of-quantities,bim,boq,construction,gaeb,leistungsverzeichnis
11
+ Classifier: Development Status :: 5 - Production/Stable
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Scientific/Engineering
21
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
+ Classifier: Typing :: Typed
23
+ Requires-Python: >=3.9
24
+ Requires-Dist: beautifulsoup4>=4.12
25
+ Requires-Dist: charset-normalizer>=3.0
26
+ Requires-Dist: eval-type-backport>=0.2.0; python_version < '3.10'
27
+ Requires-Dist: ftfy>=6.1
28
+ Requires-Dist: lxml>=4.9
29
+ Requires-Dist: pydantic-settings>=2.0
30
+ Requires-Dist: pydantic>=2.0
31
+ Provides-Extra: dev
32
+ Requires-Dist: hypothesis; extra == 'dev'
33
+ Requires-Dist: lxml-stubs; extra == 'dev'
34
+ Requires-Dist: mypy; extra == 'dev'
35
+ Requires-Dist: pytest; extra == 'dev'
36
+ Requires-Dist: pytest-asyncio; extra == 'dev'
37
+ Requires-Dist: ruff; extra == 'dev'
38
+ Requires-Dist: xmldiff>=2.6; extra == 'dev'
39
+ Provides-Extra: docs
40
+ Requires-Dist: mkdocs-material>=9.5; extra == 'docs'
41
+ Requires-Dist: mkdocs-section-index>=0.3; extra == 'docs'
42
+ Requires-Dist: mkdocstrings[python]>=0.24; extra == 'docs'
43
+ Provides-Extra: llm
44
+ Requires-Dist: instructor>=1.0; extra == 'llm'
45
+ Requires-Dist: litellm>=1.40; extra == 'llm'
46
+ Description-Content-Type: text/markdown
47
+
48
+ # pyGAEB
49
+
50
+ **Python parser for GAEB DA XML construction data exchange files, with LLM-powered item classification.**
51
+
52
+ [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
53
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
54
+
55
+ pyGAEB parses, validates, classifies, and writes GAEB DA XML files (versions 2.0 through 3.3), producing a unified Pydantic v2 domain model from all inputs. An optional LLM classification layer enriches each item with a semantic construction element type via [LiteLLM](https://github.com/BerriAI/litellm) (100+ providers).
56
+
57
+ ## Installation
58
+
59
+ ```bash
60
+ # Core parser + writer + export (zero LLM dependencies)
61
+ pip install pyGAEB
62
+
63
+ # With LLM classification (supports 100+ providers via LiteLLM)
64
+ pip install pyGAEB[llm]
65
+ ```
66
+
67
+ ## Quick Start
68
+
69
+ ### Parse any GAEB file
70
+
71
+ ```python
72
+ from pygaeb import GAEBParser
73
+
74
+ doc = GAEBParser.parse("tender.X83") # DA XML 3.x
75
+ doc = GAEBParser.parse("old.D83") # DA XML 2.x — same call
76
+
77
+ print(doc.source_version) # SourceVersion.DA_XML_33
78
+ print(doc.exchange_phase) # ExchangePhase.X83
79
+ print(doc.grand_total) # Decimal("1234567.89")
80
+ ```
81
+
82
+ ### Iterate items
83
+
84
+ ```python
85
+ for item in doc.award.boq.iter_items():
86
+ print(item.oz) # "01.02.0030"
87
+ print(item.short_text) # "Mauerwerk der Innenwand…"
88
+ print(item.qty) # Decimal("1170.000")
89
+ print(item.unit) # "m2"
90
+ print(item.unit_price) # Decimal("45.50")
91
+ print(item.total_price) # Decimal("53235.00")
92
+ print(item.item_type) # ItemType.NORMAL
93
+ ```
94
+
95
+ ### Validation
96
+
97
+ ```python
98
+ from pygaeb import GAEBParser, ValidationMode
99
+
100
+ # Lenient (default) — collect warnings, keep parsing
101
+ doc = GAEBParser.parse("tender.X83")
102
+ for issue in doc.validation_results:
103
+ print(issue.severity, issue.message)
104
+
105
+ # Strict — raise on first ERROR
106
+ doc = GAEBParser.parse("tender.X83", validation=ValidationMode.STRICT)
107
+ ```
108
+
109
+ ### Write / Round-trip
110
+
111
+ ```python
112
+ from pygaeb import GAEBWriter, ExchangePhase
113
+ from decimal import Decimal
114
+
115
+ doc = GAEBParser.parse("tender.X83")
116
+ item = doc.award.boq.get_item("01.02.0030")
117
+ item.unit_price = Decimal("48.00")
118
+
119
+ GAEBWriter.write(doc, "bid.X84", phase=ExchangePhase.X84)
120
+ ```
121
+
122
+ ### Export to JSON / CSV
123
+
124
+ ```python
125
+ from pygaeb.convert import to_json, to_csv
126
+
127
+ to_json(doc, "boq.json") # full nested BoQ tree
128
+ to_csv(doc, "items.csv") # flat item table with classification columns
129
+ ```
130
+
131
+ ### LLM Classification
132
+
133
+ ```python
134
+ from pygaeb import LLMClassifier
135
+
136
+ # Default: in-memory cache (no disk I/O, session-scoped)
137
+ classifier = LLMClassifier(model="anthropic/claude-sonnet-4-6")
138
+ # classifier = LLMClassifier(model="gpt-4o")
139
+ # classifier = LLMClassifier(model="ollama/llama3") # local, free, private
140
+
141
+ # Opt-in: persistent SQLite cache (survives across runs)
142
+ from pygaeb import SQLiteCache
143
+ classifier = LLMClassifier(model="anthropic/claude-sonnet-4-6", cache=SQLiteCache("~/.pygaeb/cache"))
144
+
145
+ # Check cost before running
146
+ estimate = await classifier.estimate_cost(doc)
147
+ print(f"Will classify {estimate.items_to_classify} items for ~${estimate.estimated_cost_usd:.2f}")
148
+
149
+ # Classify all items
150
+ await classifier.enrich(doc)
151
+
152
+ # Or synchronous
153
+ classifier.enrich_sync(doc)
154
+
155
+ for item in doc.award.boq.iter_items():
156
+ if item.classification:
157
+ print(item.oz, item.classification.element_type, item.classification.confidence)
158
+ ```
159
+
160
+ ### Structured Extraction — Custom Schemas
161
+
162
+ After classification, extract typed attributes into your own Pydantic schema:
163
+
164
+ ```python
165
+ from pydantic import BaseModel, Field
166
+ from typing import Optional
167
+ from pygaeb import StructuredExtractor
168
+
169
+ class DoorSpec(BaseModel):
170
+ door_type: str = Field("", description="single, double, sliding")
171
+ width_mm: Optional[int] = Field(None, description="Width in mm")
172
+ fire_rating: Optional[str] = Field(None, description="T30, T60, T90")
173
+ glazing: bool = Field(False, description="Has glass panels")
174
+ material: str = Field("", description="wood, steel, aluminium")
175
+
176
+ extractor = StructuredExtractor(model="anthropic/claude-sonnet-4-6")
177
+
178
+ # Extract from all items classified as "Door"
179
+ doors = await extractor.extract(doc, schema=DoorSpec, element_type="Door")
180
+ for item, spec in doors:
181
+ print(item.oz, spec.door_type, spec.fire_rating, spec.width_mm)
182
+
183
+ # Filter by trade (broad) or sub_type (narrow)
184
+ pipes = await extractor.extract(doc, schema=PipeSpec, trade="MEP-Plumbing")
185
+ fire_doors = await extractor.extract(doc, schema=DoorSpec, sub_type="Fire Door")
186
+
187
+ # Or synchronous
188
+ doors = extractor.extract_sync(doc, schema=DoorSpec, element_type="Door")
189
+ ```
190
+
191
+ Built-in starter schemas: `DoorSpec`, `WindowSpec`, `WallSpec`, `PipeSpec` — or define your own.
192
+
193
+ ### Custom Cache Backend
194
+
195
+ ```python
196
+ from pygaeb import CacheBackend, InMemoryCache, SQLiteCache
197
+
198
+ # Default: in-memory (no disk, session-scoped)
199
+ classifier = LLMClassifier()
200
+
201
+ # Persistent: SQLite
202
+ classifier = LLMClassifier(cache=SQLiteCache("~/.pygaeb/cache"))
203
+
204
+ # Share one backend between classifier and extractor
205
+ shared = SQLiteCache("/tmp/project-cache")
206
+ classifier = LLMClassifier(cache=shared)
207
+ extractor = StructuredExtractor(cache=shared)
208
+
209
+ # Bring your own: implement CacheBackend protocol
210
+ class RedisCache:
211
+ def get(self, key: str) -> str | None: ...
212
+ def put(self, key: str, value: str) -> None: ...
213
+ def delete(self, key: str) -> None: ...
214
+ def keys(self) -> list[str]: ...
215
+ def clear(self) -> None: ...
216
+ def close(self) -> None: ...
217
+
218
+ classifier = LLMClassifier(cache=RedisCache())
219
+ ```
220
+
221
+ ### Cross-Phase Validation
222
+
223
+ ```python
224
+ from pygaeb import GAEBParser, CrossPhaseValidator
225
+
226
+ tender = GAEBParser.parse("tender.X83")
227
+ bid = GAEBParser.parse("bid.X84")
228
+
229
+ issues = CrossPhaseValidator.check(source=tender, response=bid)
230
+ for issue in issues:
231
+ print(issue.severity, issue.message)
232
+ ```
233
+
234
+ ## Supported Versions
235
+
236
+ | Version | Parser Track | Status |
237
+ |---------|-------------|--------|
238
+ | DA XML 2.0 | Track A (German elements) | ✅ v1.0 |
239
+ | DA XML 2.1 | Track A (German elements) | ✅ v1.0 |
240
+ | DA XML 3.0 | Track B (English elements) | ✅ v1.0 |
241
+ | DA XML 3.1 | Track B (English elements) | ✅ v1.0 |
242
+ | DA XML 3.2 | Track B (English elements) | ✅ v1.0 |
243
+ | DA XML 3.3 | Track B (English elements) | ✅ v1.0 |
244
+ | GAEB 90 | Track C (fixed-width) | 🔜 v1.1 |
245
+
246
+ ## Configuration
247
+
248
+ ```bash
249
+ # Environment variables
250
+ export PYGAEB_DEFAULT_MODEL=ollama/llama3
251
+ export PYGAEB_XSD_DIR=/opt/gaeb-schemas
252
+
253
+ # Or programmatic
254
+ from pygaeb import PyGAEBSettings
255
+ settings = PyGAEBSettings(default_model="gpt-4o", classifier_concurrency=10)
256
+ ```
257
+
258
+ ## License
259
+
260
+ MIT — see [LICENSE](LICENSE) for details.