changex-core 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. changex_core-0.1.0/.gitignore +38 -0
  2. changex_core-0.1.0/LICENSE +21 -0
  3. changex_core-0.1.0/PKG-INFO +56 -0
  4. changex_core-0.1.0/README.md +33 -0
  5. changex_core-0.1.0/pyproject.toml +53 -0
  6. changex_core-0.1.0/src/changex_core/__init__.py +177 -0
  7. changex_core-0.1.0/src/changex_core/adapters/__init__.py +156 -0
  8. changex_core-0.1.0/src/changex_core/adapters/base.py +109 -0
  9. changex_core-0.1.0/src/changex_core/adapters/csv_adapter.py +336 -0
  10. changex_core-0.1.0/src/changex_core/adapters/docx_adapter.py +418 -0
  11. changex_core-0.1.0/src/changex_core/adapters/docx_revisions.py +174 -0
  12. changex_core-0.1.0/src/changex_core/adapters/pptx_adapter.py +499 -0
  13. changex_core-0.1.0/src/changex_core/adapters/xlsx_adapter.py +429 -0
  14. changex_core-0.1.0/src/changex_core/baseline.py +68 -0
  15. changex_core-0.1.0/src/changex_core/cli.py +290 -0
  16. changex_core-0.1.0/src/changex_core/diff/__init__.py +29 -0
  17. changex_core-0.1.0/src/changex_core/diff/text_diff.py +217 -0
  18. changex_core-0.1.0/src/changex_core/journal/__init__.py +31 -0
  19. changex_core-0.1.0/src/changex_core/journal/canonical.py +118 -0
  20. changex_core-0.1.0/src/changex_core/journal/events.py +203 -0
  21. changex_core-0.1.0/src/changex_core/journal/journal.py +297 -0
  22. changex_core-0.1.0/src/changex_core/model/__init__.py +23 -0
  23. changex_core-0.1.0/src/changex_core/model/addressing.py +209 -0
  24. changex_core-0.1.0/src/changex_core/model/nodes.py +121 -0
  25. changex_core-0.1.0/src/changex_core/ops/__init__.py +49 -0
  26. changex_core-0.1.0/src/changex_core/ops/schema.json +246 -0
  27. changex_core-0.1.0/src/changex_core/ops/validation.py +181 -0
  28. changex_core-0.1.0/src/changex_core/ops/vocabulary.py +491 -0
  29. changex_core-0.1.0/src/changex_core/passive.py +217 -0
  30. changex_core-0.1.0/src/changex_core/paths.py +70 -0
  31. changex_core-0.1.0/src/changex_core/render/__init__.py +45 -0
  32. changex_core-0.1.0/src/changex_core/render/csv.py +142 -0
  33. changex_core-0.1.0/src/changex_core/render/document.py +157 -0
  34. changex_core-0.1.0/src/changex_core/render/html.py +119 -0
  35. changex_core-0.1.0/src/changex_core/render/pptx.py +224 -0
  36. changex_core-0.1.0/src/changex_core/render/save.py +62 -0
  37. changex_core-0.1.0/src/changex_core/render/server.py +391 -0
  38. changex_core-0.1.0/src/changex_core/render/xlsx.py +166 -0
@@ -0,0 +1,38 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ .venv/
5
+ venv/
6
+ *.egg-info/
7
+ dist/
8
+ build/
9
+ .pytest_cache/
10
+ .mypy_cache/
11
+
12
+ # Node / Tauri
13
+ node_modules/
14
+ packages/viewer/dist/
15
+ packages/viewer/src-tauri/target/
16
+
17
+ # OS
18
+ .DS_Store
19
+
20
+ # Local editor/agent settings
21
+ .claude/settings.local.json
22
+
23
+ # ChangeX working artifacts
24
+ *.changex.tmp
25
+ examples/out/
26
+
27
+ # stray sample journals (regenerate via make demo)
28
+ examples/*.changex
29
+
30
+ # Tauri generated icons (regenerated by `tauri icon` / the desktop workflow)
31
+ packages/viewer/src-tauri/icons/32x32.png
32
+ packages/viewer/src-tauri/icons/128x128.png
33
+ packages/viewer/src-tauri/icons/128x128@2x.png
34
+ packages/viewer/src-tauri/icons/icon.icns
35
+ packages/viewer/src-tauri/icons/icon.ico
36
+ packages/viewer/src-tauri/icons/Square*.png
37
+ packages/viewer/src-tauri/icons/StoreLogo.png
38
+ packages/viewer/src-tauri/icons/_source.png
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Ario Moniri
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,56 @@
1
+ Metadata-Version: 2.4
2
+ Name: changex-core
3
+ Version: 0.1.0
4
+ Summary: ChangeX core: canonical document model, append-only provenance journal, and the docx native-revisions adapter (M0 spine).
5
+ Author: ChangeX
6
+ License: MIT
7
+ License-File: LICENSE
8
+ Keywords: csv,docx,event-sourcing,ooxml,pptx,provenance,track-changes,xlsx
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Topic :: Office/Business :: Office Suites
13
+ Requires-Python: >=3.11
14
+ Requires-Dist: lxml>=4.9.0
15
+ Requires-Dist: openpyxl>=3.1.0
16
+ Requires-Dist: python-docx>=1.1.0
17
+ Requires-Dist: python-pptx>=0.6.21
18
+ Provides-Extra: dev
19
+ Requires-Dist: mypy>=1.5; extra == 'dev'
20
+ Requires-Dist: pytest>=7.0; extra == 'dev'
21
+ Requires-Dist: ruff>=0.1; extra == 'dev'
22
+ Description-Content-Type: text/markdown
23
+
24
+ # changex-core
25
+
26
+ Core of **ChangeX** — the provenance-first edit-tracking spine (roadmap M0).
27
+
28
+ This package contains, with **no network access** and **no MCP dependency**:
29
+
30
+ - **`changex_core.model`** — a canonical, addressable document-node tree whose
31
+ `node_id` is an *opaque, edit-invariant* identifier (NOT a content hash). For
32
+ docx paragraphs it reuses Word's native `w14:paraId`; for nodes lacking a native
33
+ id it mints a monotonic per-session counter. A content+position fingerprint is
34
+ demoted to a *fallback rebind anchor* used only for fuzzy re-resolution.
35
+ - **`changex_core.ops`** — the frozen **v0.1 op vocabulary** (docx-only):
36
+ `text.insert`, `text.delete`, `text.replace`, `node.insert`, `node.delete`,
37
+ `style.change`. Offsets are *node-relative* and *seq-ordered*; `before` substrings
38
+ are validated against current node content.
39
+ - **`changex_core.journal`** — the append-only JSONL `.changex` journal with an
40
+ RFC 8785 (JCS) canonicalized **sha256 hash chain**, plus `append`, `read`,
41
+ `replay`, `verify`, and `revert`.
42
+ - **`changex_core.adapters`** — the `DocumentAdapter` contract and the **docx
43
+ adapter** that loads a `.docx`, applies the v0.1 ops, and renders **native Word
44
+ revisions** (`<w:ins>` / `<w:del>` / `<w:delText>` / `<w:pPrChange>`) with
45
+ centrally-allocated unique `w:id`, `w:author = <model name>`, and `w:date`.
46
+ - **`changex_core.render`** — an HTML/markdown redline projection of the journal.
47
+ - **`changex_core.baseline`** — a baseline snapshot + out-of-band mismatch warning.
48
+ - **`changex_core.cli`** — a thin CLI (`changex track / review / verify`) that
49
+ exercises the spine for the M0 script-based acceptance test.
50
+
51
+ ## Threat model (hash chain)
52
+
53
+ The hash chain gives **tamper-evidence** for accidental corruption and naive
54
+ tampering only. An attacker who controls the `.changex` can recompute the whole
55
+ chain. Adversarial integrity requires out-of-band storage or signing (deferred to
56
+ M6).
@@ -0,0 +1,33 @@
1
+ # changex-core
2
+
3
+ Core of **ChangeX** — the provenance-first edit-tracking spine (roadmap M0).
4
+
5
+ This package contains, with **no network access** and **no MCP dependency**:
6
+
7
+ - **`changex_core.model`** — a canonical, addressable document-node tree whose
8
+ `node_id` is an *opaque, edit-invariant* identifier (NOT a content hash). For
9
+ docx paragraphs it reuses Word's native `w14:paraId`; for nodes lacking a native
10
+ id it mints a monotonic per-session counter. A content+position fingerprint is
11
+ demoted to a *fallback rebind anchor* used only for fuzzy re-resolution.
12
+ - **`changex_core.ops`** — the frozen **v0.1 op vocabulary** (docx-only):
13
+ `text.insert`, `text.delete`, `text.replace`, `node.insert`, `node.delete`,
14
+ `style.change`. Offsets are *node-relative* and *seq-ordered*; `before` substrings
15
+ are validated against current node content.
16
+ - **`changex_core.journal`** — the append-only JSONL `.changex` journal with an
17
+ RFC 8785 (JCS) canonicalized **sha256 hash chain**, plus `append`, `read`,
18
+ `replay`, `verify`, and `revert`.
19
+ - **`changex_core.adapters`** — the `DocumentAdapter` contract and the **docx
20
+ adapter** that loads a `.docx`, applies the v0.1 ops, and renders **native Word
21
+ revisions** (`<w:ins>` / `<w:del>` / `<w:delText>` / `<w:pPrChange>`) with
22
+ centrally-allocated unique `w:id`, `w:author = <model name>`, and `w:date`.
23
+ - **`changex_core.render`** — an HTML/markdown redline projection of the journal.
24
+ - **`changex_core.baseline`** — a baseline snapshot + out-of-band mismatch warning.
25
+ - **`changex_core.cli`** — a thin CLI (`changex track / review / verify`) that
26
+ exercises the spine for the M0 script-based acceptance test.
27
+
28
+ ## Threat model (hash chain)
29
+
30
+ The hash chain gives **tamper-evidence** for accidental corruption and naive
31
+ tampering only. An attacker who controls the `.changex` can recompute the whole
32
+ chain. Adversarial integrity requires out-of-band storage or signing (deferred to
33
+ M6).
@@ -0,0 +1,53 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "changex-core"
7
+ version = "0.1.0"
8
+ description = "ChangeX core: canonical document model, append-only provenance journal, and the docx native-revisions adapter (M0 spine)."
9
+ readme = "README.md"
10
+ requires-python = ">=3.11"
11
+ license = { text = "MIT" }
12
+ authors = [{ name = "ChangeX" }]
13
+ keywords = ["docx", "xlsx", "pptx", "csv", "track-changes", "provenance", "ooxml", "event-sourcing"]
14
+ classifiers = [
15
+ "Programming Language :: Python :: 3.11",
16
+ "Programming Language :: Python :: 3.12",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Topic :: Office/Business :: Office Suites",
19
+ ]
20
+ dependencies = [
21
+ "python-docx>=1.1.0",
22
+ "lxml>=4.9.0",
23
+ # v0.2 format adapters (phase-2): xlsx/csv use openpyxl, pptx uses python-pptx.
24
+ # Declared here so the phase-2 adapter agents never need to touch pyproject.
25
+ "openpyxl>=3.1.0",
26
+ "python-pptx>=0.6.21",
27
+ ]
28
+
29
+ [project.optional-dependencies]
30
+ dev = [
31
+ "pytest>=7.0",
32
+ "ruff>=0.1",
33
+ "mypy>=1.5",
34
+ ]
35
+
36
+ [project.scripts]
37
+ changex = "changex_core.cli:main"
38
+
39
+ [tool.hatch.build.targets.wheel]
40
+ packages = ["src/changex_core"]
41
+
42
+ [tool.hatch.build.targets.sdist]
43
+ include = ["src/changex_core", "README.md"]
44
+
45
+ [tool.ruff]
46
+ line-length = 100
47
+ target-version = "py311"
48
+
49
+ [tool.mypy]
50
+ python_version = "3.11"
51
+ strict = true
52
+ warn_unused_ignores = true
53
+ ignore_missing_imports = true
@@ -0,0 +1,177 @@
1
+ """ChangeX core (M0 spine): canonical model, provenance journal, docx adapter.
2
+
3
+ Public API (the surface other packages — ``changex-mcp``, the CLI — code against):
4
+
5
+ Model
6
+ ``Node``, ``NodeKind``, ``NodeIdAllocator``, ``AnchorFingerprint``,
7
+ ``rebind`` — addressable nodes with opaque, edit-invariant ids.
8
+
9
+ Ops (frozen v0.2)
10
+ docx: ``TextInsert``, ``TextDelete``, ``TextReplace``, ``NodeInsert``,
11
+ ``NodeDelete``, ``StyleChange``. xlsx/csv: ``CellSet``, ``FormulaSet``,
12
+ ``RowInsert``, ``RowDelete``. pptx: ``SlideInsert``, ``SlideDelete``,
13
+ ``ShapeEdit``. Plus ``op_from_dict`` / ``op_to_dict`` and ``validate_op``.
14
+ (``format.run`` / ``node.move`` remain reserved.)
15
+
16
+ Journal
17
+ ``Journal`` (append / read / replay / verify / revert), ``Header``,
18
+ ``Event``, ``Provenance``, ``Target``, ``VerifyResult``, and the
19
+ canonicalization primitives ``canonicalize`` / ``chain_hash``.
20
+
21
+ Adapters
22
+ ``DocumentAdapter`` (the contract), ``DocxAdapter`` (native Word
23
+ revisions), and ``load_adapter`` (the extension-keyed factory that lazily
24
+ imports the right adapter for ``.docx`` / ``.xlsx`` / ``.csv`` / ``.pptx``),
25
+ plus the boundary errors ``BeforeMismatchError`` / ``OversizedOpError``.
26
+
27
+ Render / baseline
28
+ ``render_html`` / ``render_markdown`` and ``snapshot`` /
29
+ ``check_out_of_band``.
30
+ """
31
+
32
+ from __future__ import annotations
33
+
34
+ from changex_core.adapters import load_adapter
35
+ from changex_core.adapters.base import (
36
+ AdapterError,
37
+ BeforeMismatchError,
38
+ DocumentAdapter,
39
+ NodeNotFoundError,
40
+ OversizedOpError,
41
+ UnsupportedFormatError,
42
+ )
43
+ from changex_core.adapters.docx_adapter import DocxAdapter
44
+ from changex_core.baseline import (
45
+ Baseline,
46
+ OutOfBandWarning,
47
+ check_out_of_band,
48
+ snapshot,
49
+ )
50
+ from changex_core.journal.canonical import canonicalize, chain_hash, sha256_hex
51
+ from changex_core.journal.events import Event, Header, Provenance, Target
52
+ from changex_core.journal.journal import Journal, JournalError, VerifyResult
53
+ from changex_core.model.addressing import (
54
+ AnchorFingerprint,
55
+ NodeIdAllocator,
56
+ RebindResult,
57
+ rebind,
58
+ )
59
+ from changex_core.model.nodes import Node, NodeKind
60
+ from changex_core.ops.vocabulary import (
61
+ OP_SCHEMA_VERSION,
62
+ CellSet,
63
+ FormulaSet,
64
+ NodeDelete,
65
+ NodeInsert,
66
+ Op,
67
+ ReservedOpError,
68
+ RowDelete,
69
+ RowInsert,
70
+ ShapeEdit,
71
+ SlideDelete,
72
+ SlideInsert,
73
+ StyleChange,
74
+ TextDelete,
75
+ TextInsert,
76
+ TextReplace,
77
+ UnknownOpError,
78
+ op_from_dict,
79
+ op_to_dict,
80
+ target_node_id,
81
+ )
82
+ from changex_core.diff.text_diff import (
83
+ ParagraphSpec,
84
+ ReconstructedOp,
85
+ diff_paragraphs,
86
+ reconstruct_ops,
87
+ )
88
+ from changex_core.ops.validation import SchemaValidationError, validate_op
89
+ from changex_core.passive import (
90
+ OpenResult,
91
+ SealResult,
92
+ open_passive,
93
+ seal_passive,
94
+ )
95
+ from changex_core.render.document import render_document_html
96
+ from changex_core.render.html import render_html, render_markdown
97
+ from changex_core.render.save import save_active, save_active_from_path
98
+ from changex_core.render.server import build_server, serve
99
+
100
+ __version__ = "0.1.0"
101
+
102
+ __all__ = [
103
+ "__version__",
104
+ # model
105
+ "Node",
106
+ "NodeKind",
107
+ "NodeIdAllocator",
108
+ "AnchorFingerprint",
109
+ "RebindResult",
110
+ "rebind",
111
+ # ops (docx, v0.1)
112
+ "Op",
113
+ "TextInsert",
114
+ "TextDelete",
115
+ "TextReplace",
116
+ "NodeInsert",
117
+ "NodeDelete",
118
+ "StyleChange",
119
+ # ops (xlsx/csv + pptx, v0.2)
120
+ "CellSet",
121
+ "FormulaSet",
122
+ "RowInsert",
123
+ "RowDelete",
124
+ "SlideInsert",
125
+ "SlideDelete",
126
+ "ShapeEdit",
127
+ "OP_SCHEMA_VERSION",
128
+ "op_from_dict",
129
+ "op_to_dict",
130
+ "target_node_id",
131
+ "validate_op",
132
+ "SchemaValidationError",
133
+ "ReservedOpError",
134
+ "UnknownOpError",
135
+ # journal
136
+ "Journal",
137
+ "JournalError",
138
+ "VerifyResult",
139
+ "Header",
140
+ "Event",
141
+ "Provenance",
142
+ "Target",
143
+ "canonicalize",
144
+ "chain_hash",
145
+ "sha256_hex",
146
+ # adapters
147
+ "DocumentAdapter",
148
+ "DocxAdapter",
149
+ "load_adapter",
150
+ "AdapterError",
151
+ "BeforeMismatchError",
152
+ "OversizedOpError",
153
+ "NodeNotFoundError",
154
+ "UnsupportedFormatError",
155
+ # render / baseline
156
+ "render_html",
157
+ "render_markdown",
158
+ "render_document_html",
159
+ "Baseline",
160
+ "OutOfBandWarning",
161
+ "snapshot",
162
+ "check_out_of_band",
163
+ # render: journal-aware save + interactive review server
164
+ "save_active",
165
+ "save_active_from_path",
166
+ "build_server",
167
+ "serve",
168
+ # passive ("native to any model") capture + diff reconstruction
169
+ "open_passive",
170
+ "seal_passive",
171
+ "OpenResult",
172
+ "SealResult",
173
+ "ParagraphSpec",
174
+ "ReconstructedOp",
175
+ "diff_paragraphs",
176
+ "reconstruct_ops",
177
+ ]
@@ -0,0 +1,156 @@
1
+ """Document adapters: the contract, the docx implementation, and the registry.
2
+
3
+ The :class:`~changex_core.adapters.base.DocumentAdapter` ABC is the anti-drift
4
+ contract every format adapter implements. This package also owns the **format
5
+ registry + factory** (:func:`load_adapter`) that resolves a file path to the
6
+ adapter that handles its extension.
7
+
8
+ Lazy import is load-bearing
9
+ ---------------------------
10
+ The registry maps an extension to a ``"module:ClassName"`` target string and
11
+ imports the module **only when that format is actually loaded**. Concretely the
12
+ xlsx/csv/pptx adapter modules are created by phase-2 agents and may not exist (or
13
+ may pull heavy third-party deps like ``openpyxl`` / ``python-pptx``) when this
14
+ package is imported. Importing them eagerly here would (a) crash ``import
15
+ changex_core`` before phase-2 lands and (b) force every consumer to install every
16
+ format's dependencies. So this module **must not** import the adapter
17
+ implementations at top level — only ``base`` and the already-shipping
18
+ ``docx_adapter`` are imported eagerly.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ from typing import Callable
24
+
25
+ from changex_core.adapters.base import (
26
+ AdapterError,
27
+ BeforeMismatchError,
28
+ DocumentAdapter,
29
+ NodeNotFoundError,
30
+ OversizedOpError,
31
+ UnsupportedFormatError,
32
+ )
33
+ from changex_core.adapters.docx_adapter import DocxAdapter
34
+ from changex_core.paths import safe_path
35
+
36
+ # --------------------------------------------------------------------------- #
37
+ # Format registry.
38
+ #
39
+ # Maps a lowercased file extension to a ``"<module>:<ClassName>"`` target. The
40
+ # class is resolved by a LAZY import (see ``_resolve``) so the adapter module is
41
+ # only imported when a file of that format is loaded — phase-2 modules
42
+ # (xlsx_adapter, csv_adapter, pptx_adapter) therefore need not exist at import
43
+ # time, and their third-party deps stay opt-in.
44
+ # --------------------------------------------------------------------------- #
45
+ _REGISTRY: dict[str, str] = {
46
+ ".docx": "changex_core.adapters.docx_adapter:DocxAdapter",
47
+ ".xlsx": "changex_core.adapters.xlsx_adapter:XlsxAdapter",
48
+ ".csv": "changex_core.adapters.csv_adapter:CsvAdapter",
49
+ ".pptx": "changex_core.adapters.pptx_adapter:PptxAdapter",
50
+ }
51
+
52
+ # The suffixes ``load_adapter`` will accept (used as the ``allow_suffixes`` guard
53
+ # at the path boundary so an unknown extension is rejected before any import).
54
+ SUPPORTED_SUFFIXES: tuple[str, ...] = tuple(_REGISTRY)
55
+
56
+
57
+ def supported_suffixes() -> tuple[str, ...]:
58
+ """Return the file extensions the registry knows how to load."""
59
+ return SUPPORTED_SUFFIXES
60
+
61
+
62
+ def _resolve(target: str) -> type[DocumentAdapter]:
63
+ """Lazily import ``"module:ClassName"`` and return the adapter class.
64
+
65
+ Raises:
66
+ UnsupportedFormatError: if the module/class cannot be imported (e.g. a
67
+ phase-2 adapter that has not been created yet, or its third-party
68
+ dependency is missing). The original cause is chained for debugging.
69
+ """
70
+ import importlib
71
+
72
+ module_name, _, class_name = target.partition(":")
73
+ try:
74
+ module = importlib.import_module(module_name)
75
+ cls = getattr(module, class_name)
76
+ except (ImportError, AttributeError) as exc: # adapter not yet implemented
77
+ raise UnsupportedFormatError(
78
+ f"adapter {target!r} is not available: {exc}"
79
+ ) from exc
80
+ if not (isinstance(cls, type) and issubclass(cls, DocumentAdapter)):
81
+ raise UnsupportedFormatError(
82
+ f"{target!r} does not resolve to a DocumentAdapter subclass"
83
+ )
84
+ return cls
85
+
86
+
87
+ def adapter_class_for(path: str) -> type[DocumentAdapter]:
88
+ """Return the adapter **class** registered for ``path``'s extension.
89
+
90
+ The path is sanitized (suffix-guarded against :data:`SUPPORTED_SUFFIXES`) and
91
+ the matching adapter class is lazily imported. The file need not exist — this
92
+ is a pure type lookup, used by callers that want to construct the adapter
93
+ themselves. Use :func:`load_adapter` to also load the document.
94
+
95
+ Raises:
96
+ UnsupportedFormatError: if the extension is not registered or the adapter
97
+ module/class cannot be imported.
98
+ UnsafePathError: if the path fails sanitization.
99
+ """
100
+ resolved = safe_path(path, allow_suffixes=SUPPORTED_SUFFIXES)
101
+ suffix = resolved.suffix.lower()
102
+ target = _REGISTRY.get(suffix)
103
+ if target is None: # pragma: no cover - safe_path already guards the suffix
104
+ raise UnsupportedFormatError(
105
+ f"no adapter registered for {suffix!r}; supported: {SUPPORTED_SUFFIXES}"
106
+ )
107
+ return _resolve(target)
108
+
109
+
110
+ def load_adapter(path: str, **kwargs: object) -> DocumentAdapter:
111
+ """Load ``path`` with the adapter registered for its file extension.
112
+
113
+ This is the format-aware entry point the CLI and MCP server use instead of
114
+ hard-coding :class:`DocxAdapter`. It dispatches by **extension** (``.docx`` ->
115
+ :class:`DocxAdapter`, ``.xlsx`` -> ``XlsxAdapter``, ``.csv`` -> ``CsvAdapter``,
116
+ ``.pptx`` -> ``PptxAdapter``), lazily importing the adapter module so unused
117
+ formats never pull their third-party deps.
118
+
119
+ ``path`` is sanitized (must exist, suffix-guarded). Extra ``kwargs`` (e.g.
120
+ ``author=`` / ``date=`` for docx) are forwarded to the adapter's ``load``
121
+ classmethod; adapters should accept the kwargs they understand.
122
+
123
+ Args:
124
+ path: The document to load. Extension selects the adapter.
125
+ **kwargs: Forwarded to ``<Adapter>.load`` (adapter-specific).
126
+
127
+ Returns:
128
+ A loaded :class:`DocumentAdapter` for the file.
129
+
130
+ Raises:
131
+ UnsupportedFormatError: if the extension is not registered or the adapter
132
+ is not yet available.
133
+ UnsafePathError: if the path fails sanitization or does not exist.
134
+ """
135
+ resolved = safe_path(path, must_exist=True, allow_suffixes=SUPPORTED_SUFFIXES)
136
+ cls = adapter_class_for(str(resolved))
137
+ loader: Callable[..., DocumentAdapter] = cls.load # type: ignore[assignment]
138
+ return loader(str(resolved), **kwargs)
139
+
140
+
141
+ __all__ = [
142
+ # contract + errors
143
+ "DocumentAdapter",
144
+ "AdapterError",
145
+ "BeforeMismatchError",
146
+ "OversizedOpError",
147
+ "NodeNotFoundError",
148
+ "UnsupportedFormatError",
149
+ # docx implementation (shipping)
150
+ "DocxAdapter",
151
+ # registry / factory
152
+ "load_adapter",
153
+ "adapter_class_for",
154
+ "supported_suffixes",
155
+ "SUPPORTED_SUFFIXES",
156
+ ]
@@ -0,0 +1,109 @@
1
+ """The ``DocumentAdapter`` contract every format adapter implements.
2
+
3
+ This abstract interface is the anti-drift contract between the journal/render
4
+ layer and each format. The journal calls only these methods, so a new format
5
+ (xlsx, pptx) plugs in without touching journal or render code.
6
+
7
+ Key invariants an implementation must uphold:
8
+
9
+ * ``to_model()`` returns a tree whose ``node_id``s are **opaque and
10
+ edit-invariant** (never content hashes).
11
+ * ``apply(op)`` mutates the in-memory model and **raises on a before-mismatch or
12
+ an oversized op** (the validation the MCP boundary also performs, enforced here
13
+ so direct/core callers get the same guarantee).
14
+ * ``render_tracked()`` returns native tracked-changes bytes (for docx: real
15
+ ``w:ins``/``w:del`` revisions a Word/LibreOffice user can accept/reject).
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from abc import ABC, abstractmethod
21
+
22
+ from changex_core.model.nodes import Node
23
+ from changex_core.ops.vocabulary import Op
24
+
25
+
26
+ class AdapterError(RuntimeError):
27
+ """Base error for adapter operations."""
28
+
29
+
30
+ class BeforeMismatchError(AdapterError):
31
+ """Raised when an op's ``before`` substring is not present in the node.
32
+
33
+ This is the boundary guard that kills blind full-node overwrites: the agent
34
+ must pass the exact text it intends to change, and the adapter refuses if the
35
+ node's current content does not contain it.
36
+ """
37
+
38
+
39
+ class OversizedOpError(AdapterError):
40
+ """Raised when an op rewrites too much of a node or spans more than one node.
41
+
42
+ The structured message instructs the caller to split the change — the error
43
+ message is itself the prompt the model should act on.
44
+ """
45
+
46
+
47
+ class NodeNotFoundError(AdapterError):
48
+ """Raised when an op targets a ``node_id`` not present in the model."""
49
+
50
+
51
+ class UnsupportedFormatError(AdapterError):
52
+ """Raised when no registered adapter handles a path's file extension.
53
+
54
+ Emitted by :func:`changex_core.adapters.load_adapter` when the (sanitized)
55
+ path's suffix is not one of the registered formats (``.docx`` / ``.xlsx`` /
56
+ ``.csv`` / ``.pptx``). The message lists the supported suffixes so the caller
57
+ can correct the input.
58
+ """
59
+
60
+
61
+ class DocumentAdapter(ABC):
62
+ """Abstract base for format adapters (docx implemented; others reserved)."""
63
+
64
+ @classmethod
65
+ @abstractmethod
66
+ def load(cls, path: str) -> "DocumentAdapter":
67
+ """Load a document from a (sanitized) path and build the model."""
68
+ raise NotImplementedError
69
+
70
+ @abstractmethod
71
+ def baseline_sha256(self) -> str:
72
+ """Return the sha256 of the original document bytes captured on load."""
73
+ raise NotImplementedError
74
+
75
+ @abstractmethod
76
+ def to_model(self) -> Node:
77
+ """Return the current canonical model tree (root node)."""
78
+ raise NotImplementedError
79
+
80
+ @abstractmethod
81
+ def set_model(self, root: Node) -> None:
82
+ """Reset the adapter's working state to ``root`` (used by replay)."""
83
+ raise NotImplementedError
84
+
85
+ @abstractmethod
86
+ def resolve(self, node_id: str) -> Node | None:
87
+ """Return the model node with ``node_id``, or ``None`` if absent."""
88
+ raise NotImplementedError
89
+
90
+ @abstractmethod
91
+ def apply(self, op: Op) -> None:
92
+ """Apply one op to the model.
93
+
94
+ Raises:
95
+ BeforeMismatchError: if the op's ``before`` is absent.
96
+ OversizedOpError: if the op is too large / multi-node.
97
+ NodeNotFoundError: if the target node is missing.
98
+ """
99
+ raise NotImplementedError
100
+
101
+ @abstractmethod
102
+ def render_tracked(self) -> bytes:
103
+ """Render the applied ops as native tracked-changes bytes."""
104
+ raise NotImplementedError
105
+
106
+ @abstractmethod
107
+ def save(self, out_path: str) -> None:
108
+ """Write the tracked document to a (sanitized) ``out_path``."""
109
+ raise NotImplementedError