changex-core 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- changex_core-0.1.0/.gitignore +38 -0
- changex_core-0.1.0/LICENSE +21 -0
- changex_core-0.1.0/PKG-INFO +56 -0
- changex_core-0.1.0/README.md +33 -0
- changex_core-0.1.0/pyproject.toml +53 -0
- changex_core-0.1.0/src/changex_core/__init__.py +177 -0
- changex_core-0.1.0/src/changex_core/adapters/__init__.py +156 -0
- changex_core-0.1.0/src/changex_core/adapters/base.py +109 -0
- changex_core-0.1.0/src/changex_core/adapters/csv_adapter.py +336 -0
- changex_core-0.1.0/src/changex_core/adapters/docx_adapter.py +418 -0
- changex_core-0.1.0/src/changex_core/adapters/docx_revisions.py +174 -0
- changex_core-0.1.0/src/changex_core/adapters/pptx_adapter.py +499 -0
- changex_core-0.1.0/src/changex_core/adapters/xlsx_adapter.py +429 -0
- changex_core-0.1.0/src/changex_core/baseline.py +68 -0
- changex_core-0.1.0/src/changex_core/cli.py +290 -0
- changex_core-0.1.0/src/changex_core/diff/__init__.py +29 -0
- changex_core-0.1.0/src/changex_core/diff/text_diff.py +217 -0
- changex_core-0.1.0/src/changex_core/journal/__init__.py +31 -0
- changex_core-0.1.0/src/changex_core/journal/canonical.py +118 -0
- changex_core-0.1.0/src/changex_core/journal/events.py +203 -0
- changex_core-0.1.0/src/changex_core/journal/journal.py +297 -0
- changex_core-0.1.0/src/changex_core/model/__init__.py +23 -0
- changex_core-0.1.0/src/changex_core/model/addressing.py +209 -0
- changex_core-0.1.0/src/changex_core/model/nodes.py +121 -0
- changex_core-0.1.0/src/changex_core/ops/__init__.py +49 -0
- changex_core-0.1.0/src/changex_core/ops/schema.json +246 -0
- changex_core-0.1.0/src/changex_core/ops/validation.py +181 -0
- changex_core-0.1.0/src/changex_core/ops/vocabulary.py +491 -0
- changex_core-0.1.0/src/changex_core/passive.py +217 -0
- changex_core-0.1.0/src/changex_core/paths.py +70 -0
- changex_core-0.1.0/src/changex_core/render/__init__.py +45 -0
- changex_core-0.1.0/src/changex_core/render/csv.py +142 -0
- changex_core-0.1.0/src/changex_core/render/document.py +157 -0
- changex_core-0.1.0/src/changex_core/render/html.py +119 -0
- changex_core-0.1.0/src/changex_core/render/pptx.py +224 -0
- changex_core-0.1.0/src/changex_core/render/save.py +62 -0
- changex_core-0.1.0/src/changex_core/render/server.py +391 -0
- changex_core-0.1.0/src/changex_core/render/xlsx.py +166 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
.venv/
|
|
5
|
+
venv/
|
|
6
|
+
*.egg-info/
|
|
7
|
+
dist/
|
|
8
|
+
build/
|
|
9
|
+
.pytest_cache/
|
|
10
|
+
.mypy_cache/
|
|
11
|
+
|
|
12
|
+
# Node / Tauri
|
|
13
|
+
node_modules/
|
|
14
|
+
packages/viewer/dist/
|
|
15
|
+
packages/viewer/src-tauri/target/
|
|
16
|
+
|
|
17
|
+
# OS
|
|
18
|
+
.DS_Store
|
|
19
|
+
|
|
20
|
+
# Local editor/agent settings
|
|
21
|
+
.claude/settings.local.json
|
|
22
|
+
|
|
23
|
+
# ChangeX working artifacts
|
|
24
|
+
*.changex.tmp
|
|
25
|
+
examples/out/
|
|
26
|
+
|
|
27
|
+
# stray sample journals (regenerate via make demo)
|
|
28
|
+
examples/*.changex
|
|
29
|
+
|
|
30
|
+
# Tauri generated icons (regenerated by `tauri icon` / the desktop workflow)
|
|
31
|
+
packages/viewer/src-tauri/icons/32x32.png
|
|
32
|
+
packages/viewer/src-tauri/icons/128x128.png
|
|
33
|
+
packages/viewer/src-tauri/icons/128x128@2x.png
|
|
34
|
+
packages/viewer/src-tauri/icons/icon.icns
|
|
35
|
+
packages/viewer/src-tauri/icons/icon.ico
|
|
36
|
+
packages/viewer/src-tauri/icons/Square*.png
|
|
37
|
+
packages/viewer/src-tauri/icons/StoreLogo.png
|
|
38
|
+
packages/viewer/src-tauri/icons/_source.png
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Ario Moniri
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: changex-core
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: ChangeX core: canonical document model, append-only provenance journal, and the docx native-revisions adapter (M0 spine).
|
|
5
|
+
Author: ChangeX
|
|
6
|
+
License: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Keywords: csv,docx,event-sourcing,ooxml,pptx,provenance,track-changes,xlsx
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Topic :: Office/Business :: Office Suites
|
|
13
|
+
Requires-Python: >=3.11
|
|
14
|
+
Requires-Dist: lxml>=4.9.0
|
|
15
|
+
Requires-Dist: openpyxl>=3.1.0
|
|
16
|
+
Requires-Dist: python-docx>=1.1.0
|
|
17
|
+
Requires-Dist: python-pptx>=0.6.21
|
|
18
|
+
Provides-Extra: dev
|
|
19
|
+
Requires-Dist: mypy>=1.5; extra == 'dev'
|
|
20
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
21
|
+
Requires-Dist: ruff>=0.1; extra == 'dev'
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
|
|
24
|
+
# changex-core
|
|
25
|
+
|
|
26
|
+
Core of **ChangeX** — the provenance-first edit-tracking spine (roadmap M0).
|
|
27
|
+
|
|
28
|
+
This package contains, with **no network access** and **no MCP dependency**:
|
|
29
|
+
|
|
30
|
+
- **`changex_core.model`** — a canonical, addressable document-node tree whose
|
|
31
|
+
`node_id` is an *opaque, edit-invariant* identifier (NOT a content hash). For
|
|
32
|
+
docx paragraphs it reuses Word's native `w14:paraId`; for nodes lacking a native
|
|
33
|
+
id it mints a monotonic per-session counter. A content+position fingerprint is
|
|
34
|
+
demoted to a *fallback rebind anchor* used only for fuzzy re-resolution.
|
|
35
|
+
- **`changex_core.ops`** — the frozen **v0.1 op vocabulary** (docx-only):
|
|
36
|
+
`text.insert`, `text.delete`, `text.replace`, `node.insert`, `node.delete`,
|
|
37
|
+
`style.change`. Offsets are *node-relative* and *seq-ordered*; `before` substrings
|
|
38
|
+
are validated against current node content.
|
|
39
|
+
- **`changex_core.journal`** — the append-only JSONL `.changex` journal with an
|
|
40
|
+
RFC 8785 (JCS) canonicalized **sha256 hash chain**, plus `append`, `read`,
|
|
41
|
+
`replay`, `verify`, and `revert`.
|
|
42
|
+
- **`changex_core.adapters`** — the `DocumentAdapter` contract and the **docx
|
|
43
|
+
adapter** that loads a `.docx`, applies the v0.1 ops, and renders **native Word
|
|
44
|
+
revisions** (`<w:ins>` / `<w:del>` / `<w:delText>` / `<w:pPrChange>`) with
|
|
45
|
+
centrally-allocated unique `w:id`, `w:author = <model name>`, and `w:date`.
|
|
46
|
+
- **`changex_core.render`** — an HTML/markdown redline projection of the journal.
|
|
47
|
+
- **`changex_core.baseline`** — a baseline snapshot + out-of-band mismatch warning.
|
|
48
|
+
- **`changex_core.cli`** — a thin CLI (`changex track / review / verify`) that
|
|
49
|
+
exercises the spine for the M0 script-based acceptance test.
|
|
50
|
+
|
|
51
|
+
## Threat model (hash chain)
|
|
52
|
+
|
|
53
|
+
The hash chain gives **tamper-evidence** for accidental corruption and naive
|
|
54
|
+
tampering only. An attacker who controls the `.changex` can recompute the whole
|
|
55
|
+
chain. Adversarial integrity requires out-of-band storage or signing (deferred to
|
|
56
|
+
M6).
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# changex-core
|
|
2
|
+
|
|
3
|
+
Core of **ChangeX** — the provenance-first edit-tracking spine (roadmap M0).
|
|
4
|
+
|
|
5
|
+
This package contains, with **no network access** and **no MCP dependency**:
|
|
6
|
+
|
|
7
|
+
- **`changex_core.model`** — a canonical, addressable document-node tree whose
|
|
8
|
+
`node_id` is an *opaque, edit-invariant* identifier (NOT a content hash). For
|
|
9
|
+
docx paragraphs it reuses Word's native `w14:paraId`; for nodes lacking a native
|
|
10
|
+
id it mints a monotonic per-session counter. A content+position fingerprint is
|
|
11
|
+
demoted to a *fallback rebind anchor* used only for fuzzy re-resolution.
|
|
12
|
+
- **`changex_core.ops`** — the frozen **v0.1 op vocabulary** (docx-only):
|
|
13
|
+
`text.insert`, `text.delete`, `text.replace`, `node.insert`, `node.delete`,
|
|
14
|
+
`style.change`. Offsets are *node-relative* and *seq-ordered*; `before` substrings
|
|
15
|
+
are validated against current node content.
|
|
16
|
+
- **`changex_core.journal`** — the append-only JSONL `.changex` journal with an
|
|
17
|
+
RFC 8785 (JCS) canonicalized **sha256 hash chain**, plus `append`, `read`,
|
|
18
|
+
`replay`, `verify`, and `revert`.
|
|
19
|
+
- **`changex_core.adapters`** — the `DocumentAdapter` contract and the **docx
|
|
20
|
+
adapter** that loads a `.docx`, applies the v0.1 ops, and renders **native Word
|
|
21
|
+
revisions** (`<w:ins>` / `<w:del>` / `<w:delText>` / `<w:pPrChange>`) with
|
|
22
|
+
centrally-allocated unique `w:id`, `w:author = <model name>`, and `w:date`.
|
|
23
|
+
- **`changex_core.render`** — an HTML/markdown redline projection of the journal.
|
|
24
|
+
- **`changex_core.baseline`** — a baseline snapshot + out-of-band mismatch warning.
|
|
25
|
+
- **`changex_core.cli`** — a thin CLI (`changex track / review / verify`) that
|
|
26
|
+
exercises the spine for the M0 script-based acceptance test.
|
|
27
|
+
|
|
28
|
+
## Threat model (hash chain)
|
|
29
|
+
|
|
30
|
+
The hash chain gives **tamper-evidence** for accidental corruption and naive
|
|
31
|
+
tampering only. An attacker who controls the `.changex` can recompute the whole
|
|
32
|
+
chain. Adversarial integrity requires out-of-band storage or signing (deferred to
|
|
33
|
+
M6).
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "changex-core"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "ChangeX core: canonical document model, append-only provenance journal, and the docx native-revisions adapter (M0 spine)."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "ChangeX" }]
|
|
13
|
+
keywords = ["docx", "xlsx", "pptx", "csv", "track-changes", "provenance", "ooxml", "event-sourcing"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Programming Language :: Python :: 3.11",
|
|
16
|
+
"Programming Language :: Python :: 3.12",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Topic :: Office/Business :: Office Suites",
|
|
19
|
+
]
|
|
20
|
+
dependencies = [
|
|
21
|
+
"python-docx>=1.1.0",
|
|
22
|
+
"lxml>=4.9.0",
|
|
23
|
+
# v0.2 format adapters (phase-2): xlsx/csv use openpyxl, pptx uses python-pptx.
|
|
24
|
+
# Declared here so the phase-2 adapter agents never need to touch pyproject.
|
|
25
|
+
"openpyxl>=3.1.0",
|
|
26
|
+
"python-pptx>=0.6.21",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
[project.optional-dependencies]
|
|
30
|
+
dev = [
|
|
31
|
+
"pytest>=7.0",
|
|
32
|
+
"ruff>=0.1",
|
|
33
|
+
"mypy>=1.5",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
[project.scripts]
|
|
37
|
+
changex = "changex_core.cli:main"
|
|
38
|
+
|
|
39
|
+
[tool.hatch.build.targets.wheel]
|
|
40
|
+
packages = ["src/changex_core"]
|
|
41
|
+
|
|
42
|
+
[tool.hatch.build.targets.sdist]
|
|
43
|
+
include = ["src/changex_core", "README.md"]
|
|
44
|
+
|
|
45
|
+
[tool.ruff]
|
|
46
|
+
line-length = 100
|
|
47
|
+
target-version = "py311"
|
|
48
|
+
|
|
49
|
+
[tool.mypy]
|
|
50
|
+
python_version = "3.11"
|
|
51
|
+
strict = true
|
|
52
|
+
warn_unused_ignores = true
|
|
53
|
+
ignore_missing_imports = true
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
"""ChangeX core (M0 spine): canonical model, provenance journal, docx adapter.
|
|
2
|
+
|
|
3
|
+
Public API (the surface other packages — ``changex-mcp``, the CLI — code against):
|
|
4
|
+
|
|
5
|
+
Model
|
|
6
|
+
``Node``, ``NodeKind``, ``NodeIdAllocator``, ``AnchorFingerprint``,
|
|
7
|
+
``rebind`` — addressable nodes with opaque, edit-invariant ids.
|
|
8
|
+
|
|
9
|
+
Ops (frozen v0.2)
|
|
10
|
+
docx: ``TextInsert``, ``TextDelete``, ``TextReplace``, ``NodeInsert``,
|
|
11
|
+
``NodeDelete``, ``StyleChange``. xlsx/csv: ``CellSet``, ``FormulaSet``,
|
|
12
|
+
``RowInsert``, ``RowDelete``. pptx: ``SlideInsert``, ``SlideDelete``,
|
|
13
|
+
``ShapeEdit``. Plus ``op_from_dict`` / ``op_to_dict`` and ``validate_op``.
|
|
14
|
+
(``format.run`` / ``node.move`` remain reserved.)
|
|
15
|
+
|
|
16
|
+
Journal
|
|
17
|
+
``Journal`` (append / read / replay / verify / revert), ``Header``,
|
|
18
|
+
``Event``, ``Provenance``, ``Target``, ``VerifyResult``, and the
|
|
19
|
+
canonicalization primitives ``canonicalize`` / ``chain_hash``.
|
|
20
|
+
|
|
21
|
+
Adapters
|
|
22
|
+
``DocumentAdapter`` (the contract), ``DocxAdapter`` (native Word
|
|
23
|
+
revisions), and ``load_adapter`` (the extension-keyed factory that lazily
|
|
24
|
+
imports the right adapter for ``.docx`` / ``.xlsx`` / ``.csv`` / ``.pptx``),
|
|
25
|
+
plus the boundary errors ``BeforeMismatchError`` / ``OversizedOpError``.
|
|
26
|
+
|
|
27
|
+
Render / baseline
|
|
28
|
+
``render_html`` / ``render_markdown`` and ``snapshot`` /
|
|
29
|
+
``check_out_of_band``.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
from __future__ import annotations
|
|
33
|
+
|
|
34
|
+
from changex_core.adapters import load_adapter
|
|
35
|
+
from changex_core.adapters.base import (
|
|
36
|
+
AdapterError,
|
|
37
|
+
BeforeMismatchError,
|
|
38
|
+
DocumentAdapter,
|
|
39
|
+
NodeNotFoundError,
|
|
40
|
+
OversizedOpError,
|
|
41
|
+
UnsupportedFormatError,
|
|
42
|
+
)
|
|
43
|
+
from changex_core.adapters.docx_adapter import DocxAdapter
|
|
44
|
+
from changex_core.baseline import (
|
|
45
|
+
Baseline,
|
|
46
|
+
OutOfBandWarning,
|
|
47
|
+
check_out_of_band,
|
|
48
|
+
snapshot,
|
|
49
|
+
)
|
|
50
|
+
from changex_core.journal.canonical import canonicalize, chain_hash, sha256_hex
|
|
51
|
+
from changex_core.journal.events import Event, Header, Provenance, Target
|
|
52
|
+
from changex_core.journal.journal import Journal, JournalError, VerifyResult
|
|
53
|
+
from changex_core.model.addressing import (
|
|
54
|
+
AnchorFingerprint,
|
|
55
|
+
NodeIdAllocator,
|
|
56
|
+
RebindResult,
|
|
57
|
+
rebind,
|
|
58
|
+
)
|
|
59
|
+
from changex_core.model.nodes import Node, NodeKind
|
|
60
|
+
from changex_core.ops.vocabulary import (
|
|
61
|
+
OP_SCHEMA_VERSION,
|
|
62
|
+
CellSet,
|
|
63
|
+
FormulaSet,
|
|
64
|
+
NodeDelete,
|
|
65
|
+
NodeInsert,
|
|
66
|
+
Op,
|
|
67
|
+
ReservedOpError,
|
|
68
|
+
RowDelete,
|
|
69
|
+
RowInsert,
|
|
70
|
+
ShapeEdit,
|
|
71
|
+
SlideDelete,
|
|
72
|
+
SlideInsert,
|
|
73
|
+
StyleChange,
|
|
74
|
+
TextDelete,
|
|
75
|
+
TextInsert,
|
|
76
|
+
TextReplace,
|
|
77
|
+
UnknownOpError,
|
|
78
|
+
op_from_dict,
|
|
79
|
+
op_to_dict,
|
|
80
|
+
target_node_id,
|
|
81
|
+
)
|
|
82
|
+
from changex_core.diff.text_diff import (
|
|
83
|
+
ParagraphSpec,
|
|
84
|
+
ReconstructedOp,
|
|
85
|
+
diff_paragraphs,
|
|
86
|
+
reconstruct_ops,
|
|
87
|
+
)
|
|
88
|
+
from changex_core.ops.validation import SchemaValidationError, validate_op
|
|
89
|
+
from changex_core.passive import (
|
|
90
|
+
OpenResult,
|
|
91
|
+
SealResult,
|
|
92
|
+
open_passive,
|
|
93
|
+
seal_passive,
|
|
94
|
+
)
|
|
95
|
+
from changex_core.render.document import render_document_html
|
|
96
|
+
from changex_core.render.html import render_html, render_markdown
|
|
97
|
+
from changex_core.render.save import save_active, save_active_from_path
|
|
98
|
+
from changex_core.render.server import build_server, serve
|
|
99
|
+
|
|
100
|
+
__version__ = "0.1.0"
|
|
101
|
+
|
|
102
|
+
__all__ = [
|
|
103
|
+
"__version__",
|
|
104
|
+
# model
|
|
105
|
+
"Node",
|
|
106
|
+
"NodeKind",
|
|
107
|
+
"NodeIdAllocator",
|
|
108
|
+
"AnchorFingerprint",
|
|
109
|
+
"RebindResult",
|
|
110
|
+
"rebind",
|
|
111
|
+
# ops (docx, v0.1)
|
|
112
|
+
"Op",
|
|
113
|
+
"TextInsert",
|
|
114
|
+
"TextDelete",
|
|
115
|
+
"TextReplace",
|
|
116
|
+
"NodeInsert",
|
|
117
|
+
"NodeDelete",
|
|
118
|
+
"StyleChange",
|
|
119
|
+
# ops (xlsx/csv + pptx, v0.2)
|
|
120
|
+
"CellSet",
|
|
121
|
+
"FormulaSet",
|
|
122
|
+
"RowInsert",
|
|
123
|
+
"RowDelete",
|
|
124
|
+
"SlideInsert",
|
|
125
|
+
"SlideDelete",
|
|
126
|
+
"ShapeEdit",
|
|
127
|
+
"OP_SCHEMA_VERSION",
|
|
128
|
+
"op_from_dict",
|
|
129
|
+
"op_to_dict",
|
|
130
|
+
"target_node_id",
|
|
131
|
+
"validate_op",
|
|
132
|
+
"SchemaValidationError",
|
|
133
|
+
"ReservedOpError",
|
|
134
|
+
"UnknownOpError",
|
|
135
|
+
# journal
|
|
136
|
+
"Journal",
|
|
137
|
+
"JournalError",
|
|
138
|
+
"VerifyResult",
|
|
139
|
+
"Header",
|
|
140
|
+
"Event",
|
|
141
|
+
"Provenance",
|
|
142
|
+
"Target",
|
|
143
|
+
"canonicalize",
|
|
144
|
+
"chain_hash",
|
|
145
|
+
"sha256_hex",
|
|
146
|
+
# adapters
|
|
147
|
+
"DocumentAdapter",
|
|
148
|
+
"DocxAdapter",
|
|
149
|
+
"load_adapter",
|
|
150
|
+
"AdapterError",
|
|
151
|
+
"BeforeMismatchError",
|
|
152
|
+
"OversizedOpError",
|
|
153
|
+
"NodeNotFoundError",
|
|
154
|
+
"UnsupportedFormatError",
|
|
155
|
+
# render / baseline
|
|
156
|
+
"render_html",
|
|
157
|
+
"render_markdown",
|
|
158
|
+
"render_document_html",
|
|
159
|
+
"Baseline",
|
|
160
|
+
"OutOfBandWarning",
|
|
161
|
+
"snapshot",
|
|
162
|
+
"check_out_of_band",
|
|
163
|
+
# render: journal-aware save + interactive review server
|
|
164
|
+
"save_active",
|
|
165
|
+
"save_active_from_path",
|
|
166
|
+
"build_server",
|
|
167
|
+
"serve",
|
|
168
|
+
# passive ("native to any model") capture + diff reconstruction
|
|
169
|
+
"open_passive",
|
|
170
|
+
"seal_passive",
|
|
171
|
+
"OpenResult",
|
|
172
|
+
"SealResult",
|
|
173
|
+
"ParagraphSpec",
|
|
174
|
+
"ReconstructedOp",
|
|
175
|
+
"diff_paragraphs",
|
|
176
|
+
"reconstruct_ops",
|
|
177
|
+
]
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""Document adapters: the contract, the docx implementation, and the registry.
|
|
2
|
+
|
|
3
|
+
The :class:`~changex_core.adapters.base.DocumentAdapter` ABC is the anti-drift
|
|
4
|
+
contract every format adapter implements. This package also owns the **format
|
|
5
|
+
registry + factory** (:func:`load_adapter`) that resolves a file path to the
|
|
6
|
+
adapter that handles its extension.
|
|
7
|
+
|
|
8
|
+
Lazy import is load-bearing
|
|
9
|
+
---------------------------
|
|
10
|
+
The registry maps an extension to a ``"module:ClassName"`` target string and
|
|
11
|
+
imports the module **only when that format is actually loaded**. Concretely the
|
|
12
|
+
xlsx/csv/pptx adapter modules are created by phase-2 agents and may not exist (or
|
|
13
|
+
may pull heavy third-party deps like ``openpyxl`` / ``python-pptx``) when this
|
|
14
|
+
package is imported. Importing them eagerly here would (a) crash ``import
|
|
15
|
+
changex_core`` before phase-2 lands and (b) force every consumer to install every
|
|
16
|
+
format's dependencies. So this module **must not** import the adapter
|
|
17
|
+
implementations at top level — only ``base`` and the already-shipping
|
|
18
|
+
``docx_adapter`` are imported eagerly.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
from typing import Callable
|
|
24
|
+
|
|
25
|
+
from changex_core.adapters.base import (
|
|
26
|
+
AdapterError,
|
|
27
|
+
BeforeMismatchError,
|
|
28
|
+
DocumentAdapter,
|
|
29
|
+
NodeNotFoundError,
|
|
30
|
+
OversizedOpError,
|
|
31
|
+
UnsupportedFormatError,
|
|
32
|
+
)
|
|
33
|
+
from changex_core.adapters.docx_adapter import DocxAdapter
|
|
34
|
+
from changex_core.paths import safe_path
|
|
35
|
+
|
|
36
|
+
# --------------------------------------------------------------------------- #
|
|
37
|
+
# Format registry.
|
|
38
|
+
#
|
|
39
|
+
# Maps a lowercased file extension to a ``"<module>:<ClassName>"`` target. The
|
|
40
|
+
# class is resolved by a LAZY import (see ``_resolve``) so the adapter module is
|
|
41
|
+
# only imported when a file of that format is loaded — phase-2 modules
|
|
42
|
+
# (xlsx_adapter, csv_adapter, pptx_adapter) therefore need not exist at import
|
|
43
|
+
# time, and their third-party deps stay opt-in.
|
|
44
|
+
# --------------------------------------------------------------------------- #
|
|
45
|
+
_REGISTRY: dict[str, str] = {
|
|
46
|
+
".docx": "changex_core.adapters.docx_adapter:DocxAdapter",
|
|
47
|
+
".xlsx": "changex_core.adapters.xlsx_adapter:XlsxAdapter",
|
|
48
|
+
".csv": "changex_core.adapters.csv_adapter:CsvAdapter",
|
|
49
|
+
".pptx": "changex_core.adapters.pptx_adapter:PptxAdapter",
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
# The suffixes ``load_adapter`` will accept (used as the ``allow_suffixes`` guard
|
|
53
|
+
# at the path boundary so an unknown extension is rejected before any import).
|
|
54
|
+
SUPPORTED_SUFFIXES: tuple[str, ...] = tuple(_REGISTRY)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def supported_suffixes() -> tuple[str, ...]:
|
|
58
|
+
"""Return the file extensions the registry knows how to load."""
|
|
59
|
+
return SUPPORTED_SUFFIXES
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _resolve(target: str) -> type[DocumentAdapter]:
|
|
63
|
+
"""Lazily import ``"module:ClassName"`` and return the adapter class.
|
|
64
|
+
|
|
65
|
+
Raises:
|
|
66
|
+
UnsupportedFormatError: if the module/class cannot be imported (e.g. a
|
|
67
|
+
phase-2 adapter that has not been created yet, or its third-party
|
|
68
|
+
dependency is missing). The original cause is chained for debugging.
|
|
69
|
+
"""
|
|
70
|
+
import importlib
|
|
71
|
+
|
|
72
|
+
module_name, _, class_name = target.partition(":")
|
|
73
|
+
try:
|
|
74
|
+
module = importlib.import_module(module_name)
|
|
75
|
+
cls = getattr(module, class_name)
|
|
76
|
+
except (ImportError, AttributeError) as exc: # adapter not yet implemented
|
|
77
|
+
raise UnsupportedFormatError(
|
|
78
|
+
f"adapter {target!r} is not available: {exc}"
|
|
79
|
+
) from exc
|
|
80
|
+
if not (isinstance(cls, type) and issubclass(cls, DocumentAdapter)):
|
|
81
|
+
raise UnsupportedFormatError(
|
|
82
|
+
f"{target!r} does not resolve to a DocumentAdapter subclass"
|
|
83
|
+
)
|
|
84
|
+
return cls
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def adapter_class_for(path: str) -> type[DocumentAdapter]:
|
|
88
|
+
"""Return the adapter **class** registered for ``path``'s extension.
|
|
89
|
+
|
|
90
|
+
The path is sanitized (suffix-guarded against :data:`SUPPORTED_SUFFIXES`) and
|
|
91
|
+
the matching adapter class is lazily imported. The file need not exist — this
|
|
92
|
+
is a pure type lookup, used by callers that want to construct the adapter
|
|
93
|
+
themselves. Use :func:`load_adapter` to also load the document.
|
|
94
|
+
|
|
95
|
+
Raises:
|
|
96
|
+
UnsupportedFormatError: if the extension is not registered or the adapter
|
|
97
|
+
module/class cannot be imported.
|
|
98
|
+
UnsafePathError: if the path fails sanitization.
|
|
99
|
+
"""
|
|
100
|
+
resolved = safe_path(path, allow_suffixes=SUPPORTED_SUFFIXES)
|
|
101
|
+
suffix = resolved.suffix.lower()
|
|
102
|
+
target = _REGISTRY.get(suffix)
|
|
103
|
+
if target is None: # pragma: no cover - safe_path already guards the suffix
|
|
104
|
+
raise UnsupportedFormatError(
|
|
105
|
+
f"no adapter registered for {suffix!r}; supported: {SUPPORTED_SUFFIXES}"
|
|
106
|
+
)
|
|
107
|
+
return _resolve(target)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def load_adapter(path: str, **kwargs: object) -> DocumentAdapter:
|
|
111
|
+
"""Load ``path`` with the adapter registered for its file extension.
|
|
112
|
+
|
|
113
|
+
This is the format-aware entry point the CLI and MCP server use instead of
|
|
114
|
+
hard-coding :class:`DocxAdapter`. It dispatches by **extension** (``.docx`` ->
|
|
115
|
+
:class:`DocxAdapter`, ``.xlsx`` -> ``XlsxAdapter``, ``.csv`` -> ``CsvAdapter``,
|
|
116
|
+
``.pptx`` -> ``PptxAdapter``), lazily importing the adapter module so unused
|
|
117
|
+
formats never pull their third-party deps.
|
|
118
|
+
|
|
119
|
+
``path`` is sanitized (must exist, suffix-guarded). Extra ``kwargs`` (e.g.
|
|
120
|
+
``author=`` / ``date=`` for docx) are forwarded to the adapter's ``load``
|
|
121
|
+
classmethod; adapters should accept the kwargs they understand.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
path: The document to load. Extension selects the adapter.
|
|
125
|
+
**kwargs: Forwarded to ``<Adapter>.load`` (adapter-specific).
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
A loaded :class:`DocumentAdapter` for the file.
|
|
129
|
+
|
|
130
|
+
Raises:
|
|
131
|
+
UnsupportedFormatError: if the extension is not registered or the adapter
|
|
132
|
+
is not yet available.
|
|
133
|
+
UnsafePathError: if the path fails sanitization or does not exist.
|
|
134
|
+
"""
|
|
135
|
+
resolved = safe_path(path, must_exist=True, allow_suffixes=SUPPORTED_SUFFIXES)
|
|
136
|
+
cls = adapter_class_for(str(resolved))
|
|
137
|
+
loader: Callable[..., DocumentAdapter] = cls.load # type: ignore[assignment]
|
|
138
|
+
return loader(str(resolved), **kwargs)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
__all__ = [
|
|
142
|
+
# contract + errors
|
|
143
|
+
"DocumentAdapter",
|
|
144
|
+
"AdapterError",
|
|
145
|
+
"BeforeMismatchError",
|
|
146
|
+
"OversizedOpError",
|
|
147
|
+
"NodeNotFoundError",
|
|
148
|
+
"UnsupportedFormatError",
|
|
149
|
+
# docx implementation (shipping)
|
|
150
|
+
"DocxAdapter",
|
|
151
|
+
# registry / factory
|
|
152
|
+
"load_adapter",
|
|
153
|
+
"adapter_class_for",
|
|
154
|
+
"supported_suffixes",
|
|
155
|
+
"SUPPORTED_SUFFIXES",
|
|
156
|
+
]
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""The ``DocumentAdapter`` contract every format adapter implements.
|
|
2
|
+
|
|
3
|
+
This abstract interface is the anti-drift contract between the journal/render
|
|
4
|
+
layer and each format. The journal calls only these methods, so a new format
|
|
5
|
+
(xlsx, pptx) plugs in without touching journal or render code.
|
|
6
|
+
|
|
7
|
+
Key invariants an implementation must uphold:
|
|
8
|
+
|
|
9
|
+
* ``to_model()`` returns a tree whose ``node_id``s are **opaque and
|
|
10
|
+
edit-invariant** (never content hashes).
|
|
11
|
+
* ``apply(op)`` mutates the in-memory model and **raises on a before-mismatch or
|
|
12
|
+
an oversized op** (the validation the MCP boundary also performs, enforced here
|
|
13
|
+
so direct/core callers get the same guarantee).
|
|
14
|
+
* ``render_tracked()`` returns native tracked-changes bytes (for docx: real
|
|
15
|
+
``w:ins``/``w:del`` revisions a Word/LibreOffice user can accept/reject).
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
from abc import ABC, abstractmethod
|
|
21
|
+
|
|
22
|
+
from changex_core.model.nodes import Node
|
|
23
|
+
from changex_core.ops.vocabulary import Op
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class AdapterError(RuntimeError):
|
|
27
|
+
"""Base error for adapter operations."""
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class BeforeMismatchError(AdapterError):
|
|
31
|
+
"""Raised when an op's ``before`` substring is not present in the node.
|
|
32
|
+
|
|
33
|
+
This is the boundary guard that kills blind full-node overwrites: the agent
|
|
34
|
+
must pass the exact text it intends to change, and the adapter refuses if the
|
|
35
|
+
node's current content does not contain it.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class OversizedOpError(AdapterError):
|
|
40
|
+
"""Raised when an op rewrites too much of a node or spans more than one node.
|
|
41
|
+
|
|
42
|
+
The structured message instructs the caller to split the change — the error
|
|
43
|
+
message is itself the prompt the model should act on.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class NodeNotFoundError(AdapterError):
|
|
48
|
+
"""Raised when an op targets a ``node_id`` not present in the model."""
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class UnsupportedFormatError(AdapterError):
|
|
52
|
+
"""Raised when no registered adapter handles a path's file extension.
|
|
53
|
+
|
|
54
|
+
Emitted by :func:`changex_core.adapters.load_adapter` when the (sanitized)
|
|
55
|
+
path's suffix is not one of the registered formats (``.docx`` / ``.xlsx`` /
|
|
56
|
+
``.csv`` / ``.pptx``). The message lists the supported suffixes so the caller
|
|
57
|
+
can correct the input.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class DocumentAdapter(ABC):
|
|
62
|
+
"""Abstract base for format adapters (docx implemented; others reserved)."""
|
|
63
|
+
|
|
64
|
+
@classmethod
|
|
65
|
+
@abstractmethod
|
|
66
|
+
def load(cls, path: str) -> "DocumentAdapter":
|
|
67
|
+
"""Load a document from a (sanitized) path and build the model."""
|
|
68
|
+
raise NotImplementedError
|
|
69
|
+
|
|
70
|
+
@abstractmethod
|
|
71
|
+
def baseline_sha256(self) -> str:
|
|
72
|
+
"""Return the sha256 of the original document bytes captured on load."""
|
|
73
|
+
raise NotImplementedError
|
|
74
|
+
|
|
75
|
+
@abstractmethod
|
|
76
|
+
def to_model(self) -> Node:
|
|
77
|
+
"""Return the current canonical model tree (root node)."""
|
|
78
|
+
raise NotImplementedError
|
|
79
|
+
|
|
80
|
+
@abstractmethod
|
|
81
|
+
def set_model(self, root: Node) -> None:
|
|
82
|
+
"""Reset the adapter's working state to ``root`` (used by replay)."""
|
|
83
|
+
raise NotImplementedError
|
|
84
|
+
|
|
85
|
+
@abstractmethod
|
|
86
|
+
def resolve(self, node_id: str) -> Node | None:
|
|
87
|
+
"""Return the model node with ``node_id``, or ``None`` if absent."""
|
|
88
|
+
raise NotImplementedError
|
|
89
|
+
|
|
90
|
+
@abstractmethod
|
|
91
|
+
def apply(self, op: Op) -> None:
|
|
92
|
+
"""Apply one op to the model.
|
|
93
|
+
|
|
94
|
+
Raises:
|
|
95
|
+
BeforeMismatchError: if the op's ``before`` is absent.
|
|
96
|
+
OversizedOpError: if the op is too large / multi-node.
|
|
97
|
+
NodeNotFoundError: if the target node is missing.
|
|
98
|
+
"""
|
|
99
|
+
raise NotImplementedError
|
|
100
|
+
|
|
101
|
+
@abstractmethod
|
|
102
|
+
def render_tracked(self) -> bytes:
|
|
103
|
+
"""Render the applied ops as native tracked-changes bytes."""
|
|
104
|
+
raise NotImplementedError
|
|
105
|
+
|
|
106
|
+
@abstractmethod
|
|
107
|
+
def save(self, out_path: str) -> None:
|
|
108
|
+
"""Write the tracked document to a (sanitized) ``out_path``."""
|
|
109
|
+
raise NotImplementedError
|