bluebeam-set-manager 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. bluebeam_set_manager-0.1.0/.github/workflows/ci.yml +56 -0
  2. bluebeam_set_manager-0.1.0/.github/workflows/release.yml +105 -0
  3. bluebeam_set_manager-0.1.0/.gitignore +21 -0
  4. bluebeam_set_manager-0.1.0/PKG-INFO +6 -0
  5. bluebeam_set_manager-0.1.0/README.md +0 -0
  6. bluebeam_set_manager-0.1.0/docs/superpowers/specs/2026-05-29-bluebeam-set-manager-design.md +188 -0
  7. bluebeam_set_manager-0.1.0/pyproject.toml +29 -0
  8. bluebeam_set_manager-0.1.0/src/bluebeam_set_manager/__init__.py +3 -0
  9. bluebeam_set_manager-0.1.0/src/bluebeam_set_manager/__main__.py +21 -0
  10. bluebeam_set_manager-0.1.0/src/bluebeam_set_manager/io/__init__.py +15 -0
  11. bluebeam_set_manager-0.1.0/src/bluebeam_set_manager/io/parser.py +146 -0
  12. bluebeam_set_manager-0.1.0/src/bluebeam_set_manager/io/writer.py +141 -0
  13. bluebeam_set_manager-0.1.0/src/bluebeam_set_manager/model/__init__.py +7 -0
  14. bluebeam_set_manager-0.1.0/src/bluebeam_set_manager/model/document.py +29 -0
  15. bluebeam_set_manager-0.1.0/src/bluebeam_set_manager/model/page.py +67 -0
  16. bluebeam_set_manager-0.1.0/src/bluebeam_set_manager/model/source.py +37 -0
  17. bluebeam_set_manager-0.1.0/src/bluebeam_set_manager/ops/__init__.py +20 -0
  18. bluebeam_set_manager-0.1.0/src/bluebeam_set_manager/ops/buffer.py +154 -0
  19. bluebeam_set_manager-0.1.0/src/bluebeam_set_manager/ops/renumber.py +125 -0
  20. bluebeam_set_manager-0.1.0/src/bluebeam_set_manager/tui/__init__.py +5 -0
  21. bluebeam_set_manager-0.1.0/src/bluebeam_set_manager/tui/app.py +416 -0
  22. bluebeam_set_manager-0.1.0/src/bluebeam_set_manager/tui/dialogs.py +119 -0
  23. bluebeam_set_manager-0.1.0/src/bluebeam_set_manager/tui/renumber_screen.py +109 -0
  24. bluebeam_set_manager-0.1.0/src/bluebeam_set_manager/tui/util.py +30 -0
  25. bluebeam_set_manager-0.1.0/tests/conftest.py +42 -0
  26. bluebeam_set_manager-0.1.0/tests/test_buffer.py +37 -0
  27. bluebeam_set_manager-0.1.0/tests/test_parser.py +39 -0
  28. bluebeam_set_manager-0.1.0/tests/test_renumber.py +57 -0
  29. bluebeam_set_manager-0.1.0/tests/test_save_integration.py +39 -0
  30. bluebeam_set_manager-0.1.0/tests/test_tui.py +119 -0
  31. bluebeam_set_manager-0.1.0/tests/test_writer.py +61 -0
  32. bluebeam_set_manager-0.1.0/uv.lock +207 -0
@@ -0,0 +1,56 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: ["main"]
6
+ pull_request:
7
+ workflow_dispatch:
8
+
9
+ # Cancel superseded runs on the same ref.
10
+ concurrency:
11
+ group: ci-${{ github.ref }}
12
+ cancel-in-progress: true
13
+
14
+ jobs:
15
+ lint:
16
+ name: Lint (ruff)
17
+ runs-on: ubuntu-latest
18
+ steps:
19
+ - uses: actions/checkout@v4
20
+
21
+ - name: Install uv
22
+ uses: astral-sh/setup-uv@v5
23
+ with:
24
+ enable-cache: true
25
+
26
+ - name: Ruff lint
27
+ run: uvx ruff check --output-format=github .
28
+
29
+ - name: Ruff format check
30
+ run: uvx ruff format --check .
31
+
32
+ build:
33
+ name: Build distribution
34
+ runs-on: ubuntu-latest
35
+ steps:
36
+ - uses: actions/checkout@v4
37
+ with:
38
+ # Full history + tags so hatch-vcs can resolve a version.
39
+ fetch-depth: 0
40
+
41
+ - name: Install uv
42
+ uses: astral-sh/setup-uv@v5
43
+ with:
44
+ enable-cache: true
45
+
46
+ - name: Build sdist and wheel
47
+ run: uv build
48
+
49
+ - name: Check metadata
50
+ run: uvx twine check dist/*
51
+
52
+ - name: Upload build artifacts
53
+ uses: actions/upload-artifact@v4
54
+ with:
55
+ name: dist
56
+ path: dist/
@@ -0,0 +1,105 @@
1
+ name: Release
2
+
3
+ # Publish to PyPI when a version tag is pushed (e.g. `v1.2.3`).
4
+ # The version is derived from the tag by hatch-vcs, so the tag is the single
5
+ # source of truth — no version bump commit is needed.
6
+ on:
7
+ push:
8
+ tags:
9
+ - "v*"
10
+
11
+ jobs:
12
+ lint:
13
+ name: Lint (ruff)
14
+ runs-on: ubuntu-latest
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+
18
+ - name: Install uv
19
+ uses: astral-sh/setup-uv@v5
20
+ with:
21
+ enable-cache: true
22
+
23
+ - name: Ruff lint
24
+ run: uvx ruff check --output-format=github .
25
+
26
+ - name: Ruff format check
27
+ run: uvx ruff format --check .
28
+
29
+ build:
30
+ name: Build distribution
31
+ needs: lint
32
+ runs-on: ubuntu-latest
33
+ steps:
34
+ - uses: actions/checkout@v4
35
+ with:
36
+ # Full history + tags so hatch-vcs resolves the version from the tag.
37
+ fetch-depth: 0
38
+
39
+ - name: Install uv
40
+ uses: astral-sh/setup-uv@v5
41
+ with:
42
+ enable-cache: true
43
+
44
+ - name: Build sdist and wheel
45
+ run: uv build
46
+
47
+ - name: Verify the built version matches the tag
48
+ run: |
49
+ tag="${GITHUB_REF_NAME#v}"
50
+ built=$(ls dist/*.whl | sed -E 's/.*bluebeam_set_manager-([^-]+)-.*/\1/')
51
+ echo "tag=$tag built=$built"
52
+ if [ "$tag" != "$built" ]; then
53
+ echo "::error::Built version '$built' does not match tag '$tag'." \
54
+ "Is the tag on a clean commit?"
55
+ exit 1
56
+ fi
57
+
58
+ - name: Check metadata
59
+ run: uvx twine check dist/*
60
+
61
+ - name: Upload build artifacts
62
+ uses: actions/upload-artifact@v4
63
+ with:
64
+ name: dist
65
+ path: dist/
66
+
67
+ publish:
68
+ name: Publish to PyPI
69
+ needs: build
70
+ runs-on: ubuntu-latest
71
+ # Must match the trusted publisher configured on PyPI.
72
+ environment:
73
+ name: pypi
74
+ url: https://pypi.org/p/bluebeam-set-manager
75
+ permissions:
76
+ # Required for Trusted Publishing (OIDC) — no API token needed.
77
+ id-token: write
78
+ steps:
79
+ - name: Download build artifacts
80
+ uses: actions/download-artifact@v4
81
+ with:
82
+ name: dist
83
+ path: dist/
84
+
85
+ - name: Publish to PyPI
86
+ uses: pypa/gh-action-pypi-publish@release/v1
87
+
88
+ github-release:
89
+ name: Create GitHub release
90
+ needs: publish
91
+ runs-on: ubuntu-latest
92
+ permissions:
93
+ contents: write
94
+ steps:
95
+ - name: Download build artifacts
96
+ uses: actions/download-artifact@v4
97
+ with:
98
+ name: dist
99
+ path: dist/
100
+
101
+ - name: Create release
102
+ uses: softprops/action-gh-release@v2
103
+ with:
104
+ generate_release_notes: true
105
+ files: dist/*
@@ -0,0 +1,21 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ *.egg-info/
7
+
8
+ # Virtual environments
9
+ .venv/
10
+
11
+ # Backups written next to the original .bex on Save
12
+ *.bak-*
13
+
14
+ # Snapshots
15
+ *.svg
16
+
17
+ # Stray uv binary if copied into the project root
18
+ uv.exe
19
+
20
+ # Example Files
21
+ examples/
@@ -0,0 +1,6 @@
1
+ Metadata-Version: 2.4
2
+ Name: bluebeam-set-manager
3
+ Version: 0.1.0
4
+ Summary: TUI manager for Bluebeam Revu page-set (.bex) files
5
+ Requires-Python: >=3.12
6
+ Requires-Dist: textual>=0.80
File without changes
@@ -0,0 +1,188 @@
1
+ # Bluebeam Set Manager — Design Spec
2
+
3
+ **Date:** 2026-05-29
4
+ **Status:** Approved for implementation
5
+
6
+ ## Purpose
7
+
8
+ A Python TUI for managing Bluebeam Revu page-set files (`.bex`, XML). It presents
9
+ every sheet in the set as a tree (Category → Sheet Number → Revision), lets the user
10
+ view and edit a selected page's tags/metadata in a properties pane, and supports
11
+ batch sheet renumbering via an Excel-pasteable two-column table. All edits are
12
+ buffered (auto-saved to an in-memory buffer) and only written to disk on an explicit
13
+ Save / Save-As command.
14
+
15
+ ## File format (observed from `examples/DP-P WBMS - STR Markups.bex`)
16
+
17
+ - UTF-8 **with BOM**, **CRLF** line endings, root `<BluebeamRevuPageSet Version="1">`.
18
+ - Top-level set settings: `PageSort`, `PageShowSetting`, `PageSortType`, `AddendumKey`
19
+ (CDATA), `WildcardFilter`, `CategoryType`, `CategorizeBy`, etc. (display-only).
20
+ - A sequence of `<File>` elements. Each `<File>`:
21
+ - Contains the source-PDF path as **raw CDATA text** (`SP:` SharePoint URL),
22
+ written **both before and after** the page list — interleaved with child elements.
23
+ - `<Category>` (e.g. "Structural").
24
+ - One or more `<Page>` elements, each with:
25
+ - `<Label>` (page label within source PDF)
26
+ - `<Tags>` containing `<Tag ID="SheetName">`, `<Tag ID="SheetNumber">`,
27
+ `<Tag ID="RevisionNumber">`, and named tags like
28
+ `<Tag Name="Plot Date" Type="Date" DateFormat="M/d/yyyy">3/12/2025</Tag>`
29
+ - `<Width>`, `<Height>`, `<Index>`
30
+ - A "sheet instance" = one `<Page>`. The same SheetNumber can recur across pages/files
31
+ with different revision numbers.
32
+
33
+ **Quirk:** Bluebeam writes the CDATA path interleaved with child elements inside
34
+ `<File>`. Standard XML libraries will not round-trip this byte-for-byte, so saving uses
35
+ surgical byte-span edits (see below) rather than tree re-serialization.
36
+
37
+ ## Architecture
38
+
39
+ ```
40
+ bluebeam_set_manager/
41
+ ├── model/
42
+ │ ├── document.py # SetDocument: in-memory model of the .bex
43
+ │ ├── page.py # Page, Tag, FileEntry dataclasses
44
+ │ └── source.py # SourceText: original bytes + BOM/line-ending + Span
45
+ ├── io/
46
+ │ ├── parser.py # bytes -> SetDocument, recording byte spans per editable tag
47
+ │ └── writer.py # SetDocument + buffer -> bytes via surgical span splicing
48
+ ├── ops/
49
+ │ ├── buffer.py # EditBuffer: pending changes, dirty flag, apply/revert
50
+ │ └── renumber.py # renumber planning, validation, diff generation (headless)
51
+ └── tui/
52
+ ├── app.py # Textual App, key bindings, save/save-as/quit
53
+ ├── tree_pane.py # Category > SheetNumber > Revision tree
54
+ ├── props_pane.py # properties editor for the selected page
55
+ └── renumber_screen.py # modal: existing/new table + preview diff
56
+ ```
57
+
58
+ **Data flow:** load → `parser` builds `SetDocument` + span map → tree/props render
59
+ *effective* values (buffer overlaid on model) → edits recorded in `EditBuffer` (model
60
+ not mutated) → on Save, `writer` splices buffered changes into original bytes via the
61
+ span map → timestamped `.bak` written, then original overwritten.
62
+
63
+ ## Data model
64
+
65
+ ```python
66
+ Span = tuple[int, int] # (start, end) byte offsets into original file
67
+
68
+ @dataclass
69
+ class Tag:
70
+ key: str # "SheetName" | "SheetNumber" | "RevisionNumber" | named e.g. "Plot Date"
71
+ value: str
72
+ kind: Literal["id", "named"]
73
+ attrs: dict[str, str] # named-tag attrs: Type, DateFormat, ...
74
+ value_span: Span | None # byte span of value text; None for buffer-new tags
75
+
76
+ @dataclass
77
+ class Page:
78
+ label: str # read-only
79
+ tags: list[Tag]
80
+ width: int; height: int; index: int # read-only
81
+ file_ref: FileEntry
82
+ page_id: str # synthetic stable id: f"{file_idx}:{page_idx}"
83
+ tags_insert_offset: int # byte offset just before </Tags>, for new-tag insertion
84
+ # accessors: sheet_number, sheet_name, revision
85
+
86
+ @dataclass
87
+ class FileEntry:
88
+ category: str # read-only (v1)
89
+ path: str # SP: path, read-only
90
+ pages: list[Page]
91
+
92
+ @dataclass
93
+ class SetDocument:
94
+ settings: dict[str, str] # display-only
95
+ files: list[FileEntry]
96
+ source: SourceText
97
+ def pages(self) -> Iterator[Page]: ...
98
+ ```
99
+
100
+ **Tree projection** from `document.pages()`:
101
+ - L1 Category (distinct, sorted)
102
+ - L2 SheetNumber (distinct within category, natural-sorted: S-100 < S-110 < S-301)
103
+ - L3 one node per distinct RevisionNumber sharing that number; payload = the Page.
104
+
105
+ Selecting an L3 node loads that Page into the properties pane.
106
+
107
+ ## Editable vs read-only
108
+
109
+ - **Editable:** core sheet tags (SheetNumber, SheetName, RevisionNumber) and named/custom
110
+ tags (value + attrs; add/remove custom tags).
111
+ - **Read-only (displayed):** Category, Label, Width, Height, Index, source path,
112
+ set-level settings.
113
+
114
+ ## Edit buffer & save semantics
115
+
116
+ ```python
117
+ @dataclass
118
+ class TagEdit:
119
+ page_id: str
120
+ tag_key: str
121
+ new_value: str | None # None => delete tag
122
+ new_attrs: dict | None
123
+
124
+ class EditBuffer:
125
+ edits: dict[tuple[str, str], TagEdit] # keyed (page_id, tag_key); last-write-wins
126
+ def set_tag(page_id, key, value): ...
127
+ def add_tag(page_id, key, value, attrs): ...
128
+ def remove_tag(page_id, key): ...
129
+ def is_dirty() -> bool
130
+ def effective_value(page, key): ...
131
+ def revert_all(): ...
132
+ ```
133
+
134
+ - **Auto-save to buffer:** props-pane field edits commit on blur/Enter via `set_tag`.
135
+ Tree and props render effective values; a `●` dirty marker shows in the title bar.
136
+ - **Save (`Ctrl+S`):** writer resolves edits to spans/insertions, splices into original
137
+ bytes **back-to-front** (offsets stay valid), writes
138
+ `filename.bex.bak-YYYYMMDD-HHMMSS`, overwrites `filename.bex`, clears buffer.
139
+ - **Save-As (`Ctrl+Shift+S`):** same splice to a new path (no backup; original untouched);
140
+ active file switches to the new path.
141
+ - **Quit dirty:** modal confirm (Save / Discard / Cancel).
142
+ - Renumbering writes through the **same buffer** (batch `set_tag` on SheetNumber), so it
143
+ is buffered and revertable until explicit Save.
144
+
145
+ ## Batch renumbering
146
+
147
+ Modal (`R`) with an editable two-column `DataTable`: **Existing** | **New**.
148
+
149
+ - **Input:** type into cells, or **paste** a two-column block from Excel (split on
150
+ tab/newline; rows auto-added). Empty rows ignored; whitespace trimmed.
151
+ - **Plan & validate (Preview):**
152
+ 1. Build `{existing → new}`. Reject duplicate `existing` keys and duplicate `new`
153
+ values inline.
154
+ 2. Match **all pages** whose SheetNumber == existing, across all files/revisions.
155
+ 3. **Warn on no-match:** existing value matching zero pages.
156
+ 4. **Warn on collision:** new value already present in the set and not itself being
157
+ renumbered away.
158
+ 5. **Preview diff:** one row per affected page —
159
+ `Category · old# · rev · SheetName → new#`; warnings banner above.
160
+ - **Apply:** push changes into `EditBuffer` as SheetNumber edits (**SheetNumber only**;
161
+ Name/Label untouched). Tree rebuilds, dirty marker on. File untouched until Save.
162
+ - Engine `ops/renumber.py` is **headless/pure**: `(SetDocument, mapping) -> (plan, warnings)`.
163
+
164
+ ## Error handling
165
+
166
+ - **Parse:** non-`BluebeamRevuPageSet` files fail with a clear message. Pages with
167
+ missing expected tags are still listed; missing fields show `(none)`.
168
+ - **Save:** writer **round-trip self-check** — spliced output must re-parse to the same
169
+ logical model before overwrite; on mismatch, abort and keep original. Backup-write
170
+ failure aborts the save.
171
+ - **Encoding:** preserve detected BOM + CRLF; XML-escape edited values (`&`, `<`, `>`)
172
+ — the example contains `B&P`.
173
+
174
+ ## Testing
175
+
176
+ pytest. Coverage:
177
+ - Parser span accuracy (spans map to correct value text).
178
+ - Writer round-trip: load → save with no edits → byte-identical output.
179
+ - Single-tag edit splice correctness.
180
+ - Renumber planning: match / no-match / collision / duplicate-key.
181
+ - Buffer revert.
182
+ Fixtures: bundled `examples/*.bex` plus small synthetic snippets.
183
+
184
+ ## Stack & tooling
185
+
186
+ - Python ≥ 3.12, **uv** for venv + dependency management (`pyproject.toml`).
187
+ - Runtime dep: `textual`. Dev dep: `pytest`.
188
+ - Entry point: `bsm` console script and `python -m bluebeam_set_manager <file.bex>`.
@@ -0,0 +1,29 @@
1
+ [project]
2
+ name = "bluebeam-set-manager"
3
+ description = "TUI manager for Bluebeam Revu page-set (.bex) files"
4
+ readme = "README.md"
5
+ requires-python = ">=3.12"
6
+ version = "0.1.0"
7
+ dependencies = [
8
+ "textual>=0.80",
9
+ ]
10
+
11
+ [project.scripts]
12
+ bsm = "bluebeam_set_manager.__main__:main"
13
+
14
+ [dependency-groups]
15
+ dev = [
16
+ "pytest>=8.0",
17
+ "pytest-asyncio>=0.23",
18
+ ]
19
+
20
+ [build-system]
21
+ requires = ["hatchling"]
22
+ build-backend = "hatchling.build"
23
+
24
+ [tool.hatch.build.targets.wheel]
25
+ packages = ["src/bluebeam_set_manager"]
26
+
27
+ [tool.pytest.ini_options]
28
+ asyncio_mode = "auto"
29
+ testpaths = ["tests"]
@@ -0,0 +1,3 @@
1
+ """Bluebeam Revu page-set (.bex) manager — TUI."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,21 @@
1
+ """CLI entry point: bsm [path-to-file.bex]"""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+
7
+ from .tui import run
8
+
9
+
10
+ def main() -> None:
11
+ ap = argparse.ArgumentParser(
12
+ prog="bsm",
13
+ description="TUI manager for Bluebeam Revu page-set (.bex) files.",
14
+ )
15
+ ap.add_argument("file", nargs="?", help="Path to a .bex page-set file to open.")
16
+ args = ap.parse_args()
17
+ run(args.file)
18
+
19
+
20
+ if __name__ == "__main__":
21
+ main()
@@ -0,0 +1,15 @@
1
+ """Parsing and writing of .bex files."""
2
+
3
+ from .parser import parse, parse_bytes, ParseError
4
+ from .writer import render, save, save_as, BackupError, RoundTripError
5
+
6
+ __all__ = [
7
+ "parse",
8
+ "parse_bytes",
9
+ "ParseError",
10
+ "render",
11
+ "save",
12
+ "save_as",
13
+ "BackupError",
14
+ "RoundTripError",
15
+ ]
@@ -0,0 +1,146 @@
1
+ """Parse a Bluebeam .bex page set into a SetDocument, recording char spans.
2
+
3
+ The parser is deliberately regex/offset based rather than DOM based: Bluebeam writes
4
+ the source-PDF path as raw CDATA interleaved with child elements inside ``<File>``,
5
+ which a standard XML library will not round-trip. We only need a navigable model plus
6
+ the precise character spans of editable tag values, so a tolerant scan is the right
7
+ tool — and it lets the writer leave every untouched character exactly as it was.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import re
13
+ from xml.sax.saxutils import unescape
14
+
15
+ from ..model import FileEntry, Page, SetDocument, SourceText, Tag
16
+
17
+ _ROOT_RE = re.compile(r"<BluebeamRevuPageSet\b([^>]*)>")
18
+ _FILE_RE = re.compile(r"<File>(.*?)</File>", re.DOTALL)
19
+ _CATEGORY_RE = re.compile(r"<Category>(.*?)</Category>", re.DOTALL)
20
+ _CDATA_RE = re.compile(r"<!\[CDATA\[(.*?)\]\]>", re.DOTALL)
21
+ _PAGE_RE = re.compile(r"<Page>(.*?)</Page>", re.DOTALL)
22
+ _LABEL_RE = re.compile(r"<Label>(.*?)</Label>", re.DOTALL)
23
+ _WIDTH_RE = re.compile(r"<Width>(\d+)</Width>")
24
+ _HEIGHT_RE = re.compile(r"<Height>(\d+)</Height>")
25
+ _INDEX_RE = re.compile(r"<Index>(\d+)</Index>")
26
+ _TAGS_RE = re.compile(r"<Tags>(.*?)</Tags>", re.DOTALL)
27
+ _TAG_RE = re.compile(r"<Tag\b([^>]*)>(.*?)</Tag>", re.DOTALL)
28
+ _ATTR_RE = re.compile(r'(\w+)\s*=\s*"([^"]*)"')
29
+ # Simple set-level settings: <Name>value</Name> or <Name><![CDATA[..]]></Name>.
30
+ _SETTING_RE = re.compile(r"<(\w+)>(?:<!\[CDATA\[(.*?)\]\]>|([^<]*))</\1>", re.DOTALL)
31
+
32
+
33
+ class ParseError(Exception):
34
+ """Raised when the input is not a recognizable Bluebeam page set."""
35
+
36
+
37
+ def parse_bytes(raw: bytes, path: str | None = None) -> SetDocument:
38
+ return parse(SourceText.from_bytes(raw), path=path)
39
+
40
+
41
+ def parse(source: SourceText, path: str | None = None) -> SetDocument:
42
+ text = source.text
43
+ root = _ROOT_RE.search(text)
44
+ if root is None:
45
+ raise ParseError("Not a BluebeamRevuPageSet file (missing root element).")
46
+
47
+ root_attrs = dict(_ATTR_RE.findall(root.group(1)))
48
+ body_start = root.end()
49
+ first_file = _FILE_RE.search(text)
50
+ settings_region_end = first_file.start() if first_file else len(text)
51
+ settings = _parse_settings(text, body_start, settings_region_end)
52
+
53
+ files: list[FileEntry] = []
54
+ for file_idx, fmatch in enumerate(_FILE_RE.finditer(text)):
55
+ inner_base = fmatch.start(1)
56
+ inner = fmatch.group(1)
57
+ cat_m = _CATEGORY_RE.search(inner)
58
+ category = unescape(cat_m.group(1).strip()) if cat_m else ""
59
+ path_m = _CDATA_RE.search(inner)
60
+ src_path = path_m.group(1) if path_m else ""
61
+
62
+ entry = FileEntry(category=category, path=src_path, pages=[])
63
+ for page_idx, pmatch in enumerate(_PAGE_RE.finditer(inner)):
64
+ page = _parse_page(
65
+ inner_base + pmatch.start(1), pmatch.group(1), f"{file_idx}:{page_idx}"
66
+ )
67
+ page.file_ref = entry
68
+ entry.pages.append(page)
69
+ files.append(entry)
70
+
71
+ return SetDocument(
72
+ settings=settings,
73
+ files=files,
74
+ source=source,
75
+ path=path,
76
+ root_attrs=root_attrs,
77
+ )
78
+
79
+
80
+ def _parse_settings(text: str, start: int, end: int) -> dict[str, str]:
81
+ region = text[start:end]
82
+ settings: dict[str, str] = {}
83
+ for m in _SETTING_RE.finditer(region):
84
+ name = m.group(1)
85
+ if name in ("File", "Page", "Tags", "Tag"):
86
+ continue
87
+ value = m.group(2) if m.group(2) is not None else (m.group(3) or "")
88
+ settings[name] = value
89
+ return settings
90
+
91
+
92
+ def _parse_page(page_base: int, inner: str, page_id: str) -> Page:
93
+ """`page_base` is the absolute offset (in full text) of `inner`'s first char."""
94
+ label_m = _LABEL_RE.search(inner)
95
+ label = unescape(label_m.group(1)) if label_m else ""
96
+ w_m = _WIDTH_RE.search(inner)
97
+ h_m = _HEIGHT_RE.search(inner)
98
+ i_m = _INDEX_RE.search(inner)
99
+ width = int(w_m.group(1)) if w_m else 0
100
+ height = int(h_m.group(1)) if h_m else 0
101
+ index = int(i_m.group(1)) if i_m else 0
102
+
103
+ tags: list[Tag] = []
104
+ tags_insert_offset = page_base + len(inner) # fallback: end of page
105
+ tags_m = _TAGS_RE.search(inner)
106
+ if tags_m:
107
+ tags_inner_base = page_base + tags_m.start(1)
108
+ tags_insert_offset = page_base + tags_m.end(1) # start of "</Tags>"
109
+ for tm in _TAG_RE.finditer(tags_m.group(1)):
110
+ tags.append(_parse_tag(tags_inner_base, tm))
111
+
112
+ return Page(
113
+ label=label,
114
+ tags=tags,
115
+ width=width,
116
+ height=height,
117
+ index=index,
118
+ page_id=page_id,
119
+ tags_insert_offset=tags_insert_offset,
120
+ )
121
+
122
+
123
+ def _parse_tag(tags_inner_base: int, tm: "re.Match") -> Tag:
124
+ attrs = dict(_ATTR_RE.findall(tm.group(1)))
125
+ value = unescape(tm.group(2))
126
+ full_span = (tags_inner_base + tm.start(), tags_inner_base + tm.end())
127
+ value_span = (tags_inner_base + tm.start(2), tags_inner_base + tm.end(2))
128
+
129
+ if "ID" in attrs:
130
+ return Tag(
131
+ key=attrs["ID"],
132
+ value=value,
133
+ kind="id",
134
+ attrs={},
135
+ value_span=value_span,
136
+ full_span=full_span,
137
+ )
138
+ name = attrs.pop("Name", "")
139
+ return Tag(
140
+ key=name,
141
+ value=value,
142
+ kind="named",
143
+ attrs=attrs,
144
+ value_span=value_span,
145
+ full_span=full_span,
146
+ )