langchain-adeu 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langchain_adeu-1.0.0/.gitignore +23 -0
- langchain_adeu-1.0.0/LICENSE +21 -0
- langchain_adeu-1.0.0/Makefile +34 -0
- langchain_adeu-1.0.0/PKG-INFO +221 -0
- langchain_adeu-1.0.0/README.md +175 -0
- langchain_adeu-1.0.0/langchain_adeu/__init__.py +56 -0
- langchain_adeu-1.0.0/langchain_adeu/_shared.py +125 -0
- langchain_adeu-1.0.0/langchain_adeu/accept_all_changes.py +128 -0
- langchain_adeu-1.0.0/langchain_adeu/apply_changes.py +283 -0
- langchain_adeu-1.0.0/langchain_adeu/diff_docx.py +133 -0
- langchain_adeu-1.0.0/langchain_adeu/read_docx.py +201 -0
- langchain_adeu-1.0.0/langchain_adeu/sanitize_docx.py +198 -0
- langchain_adeu-1.0.0/langchain_adeu/toolkit.py +81 -0
- langchain_adeu-1.0.0/pyproject.toml +80 -0
- langchain_adeu-1.0.0/tests/__init__.py +0 -0
- langchain_adeu-1.0.0/tests/conftest.py +36 -0
- langchain_adeu-1.0.0/tests/integration_tests/__init__.py +0 -0
- langchain_adeu-1.0.0/tests/integration_tests/conftest.py +44 -0
- langchain_adeu-1.0.0/tests/integration_tests/test_accept_all_changes.py +119 -0
- langchain_adeu-1.0.0/tests/integration_tests/test_apply_changes.py +202 -0
- langchain_adeu-1.0.0/tests/integration_tests/test_diff_docx.py +132 -0
- langchain_adeu-1.0.0/tests/integration_tests/test_read_docx.py +123 -0
- langchain_adeu-1.0.0/tests/integration_tests/test_sanitize_docx.py +100 -0
- langchain_adeu-1.0.0/tests/unit_tests/__init__.py +0 -0
- langchain_adeu-1.0.0/tests/unit_tests/test_accept_all_changes.py +91 -0
- langchain_adeu-1.0.0/tests/unit_tests/test_apply_changes.py +206 -0
- langchain_adeu-1.0.0/tests/unit_tests/test_diff_docx.py +94 -0
- langchain_adeu-1.0.0/tests/unit_tests/test_read_docx.py +89 -0
- langchain_adeu-1.0.0/tests/unit_tests/test_sanitize_docx.py +101 -0
- langchain_adeu-1.0.0/tests/unit_tests/test_shared.py +128 -0
- langchain_adeu-1.0.0/tests/unit_tests/test_standard.py +126 -0
- langchain_adeu-1.0.0/tests/unit_tests/test_toolkit.py +108 -0
- langchain_adeu-1.0.0/uv.lock +2586 -0
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# FILE: langchain/.gitignore
|
|
2
|
+
# Python build artifacts
|
|
3
|
+
dist/
|
|
4
|
+
build/
|
|
5
|
+
*.egg-info/
|
|
6
|
+
__pycache__/
|
|
7
|
+
*.py[cod]
|
|
8
|
+
*$py.class
|
|
9
|
+
|
|
10
|
+
# Virtual environments
|
|
11
|
+
.venv/
|
|
12
|
+
.python-version
|
|
13
|
+
|
|
14
|
+
# Caches
|
|
15
|
+
.pytest_cache/
|
|
16
|
+
.mypy_cache/
|
|
17
|
+
.ruff_cache/
|
|
18
|
+
|
|
19
|
+
# Coverage
|
|
20
|
+
.coverage
|
|
21
|
+
htmlcov/
|
|
22
|
+
|
|
23
|
+
# uv lockfile lives in repo; nothing to ignore here for it.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Dealfluence Oy
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
.PHONY: help install test integration_test lint format typecheck clean
|
|
2
|
+
|
|
3
|
+
help:
|
|
4
|
+
@echo "Targets:"
|
|
5
|
+
@echo " install Sync dev + test dependencies"
|
|
6
|
+
@echo " test Run unit tests (no network)"
|
|
7
|
+
@echo " integration_test Run integration tests (requires real DOCX fixtures)"
|
|
8
|
+
@echo " lint Run ruff in check mode"
|
|
9
|
+
@echo " format Apply ruff formatter and import sorter"
|
|
10
|
+
@echo " typecheck Run mypy"
|
|
11
|
+
@echo " clean Remove build artifacts and caches"
|
|
12
|
+
|
|
13
|
+
install:
|
|
14
|
+
uv sync --group dev --group test
|
|
15
|
+
|
|
16
|
+
test:
|
|
17
|
+
uv run --group test pytest --disable-socket --allow-unix-socket tests/unit_tests/ -v
|
|
18
|
+
|
|
19
|
+
integration_test:
|
|
20
|
+
uv run --group test pytest tests/integration_tests/ -v
|
|
21
|
+
|
|
22
|
+
lint:
|
|
23
|
+
uv run --group dev ruff check langchain_adeu tests
|
|
24
|
+
|
|
25
|
+
format:
|
|
26
|
+
uv run --group dev ruff format langchain_adeu tests
|
|
27
|
+
uv run --group dev ruff check --fix langchain_adeu tests
|
|
28
|
+
|
|
29
|
+
typecheck:
|
|
30
|
+
uv run --group dev mypy langchain_adeu
|
|
31
|
+
|
|
32
|
+
clean:
|
|
33
|
+
rm -rf dist build *.egg-info .pytest_cache .mypy_cache .ruff_cache
|
|
34
|
+
find . -type d -name __pycache__ -exec rm -rf {} +
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: langchain-adeu
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: LangChain integration for Adeu — track-changes for DOCX in the LLM era
|
|
5
|
+
Project-URL: Homepage, https://adeu.ai
|
|
6
|
+
Project-URL: Repository, https://github.com/dealfluence/adeu
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/dealfluence/adeu/issues
|
|
8
|
+
Project-URL: Documentation, https://github.com/dealfluence/adeu/tree/main/langchain
|
|
9
|
+
Author: Mikko Korpela, Uzair Ahmed
|
|
10
|
+
License: MIT License
|
|
11
|
+
|
|
12
|
+
Copyright (c) 2026 Dealfluence Oy
|
|
13
|
+
|
|
14
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
15
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
16
|
+
in the Software without restriction, including without limitation the rights
|
|
17
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
18
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
19
|
+
furnished to do so, subject to the following conditions:
|
|
20
|
+
|
|
21
|
+
The above copyright notice and this permission notice shall be included in all
|
|
22
|
+
copies or substantial portions of the Software.
|
|
23
|
+
|
|
24
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
25
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
26
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
27
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
28
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
29
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
30
|
+
SOFTWARE.
|
|
31
|
+
License-File: LICENSE
|
|
32
|
+
Keywords: agent,docx,langchain,redlining,tools,tracked-changes,word
|
|
33
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
34
|
+
Classifier: Intended Audience :: Developers
|
|
35
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
36
|
+
Classifier: Operating System :: OS Independent
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
39
|
+
Classifier: Topic :: Office/Business :: Office Suites
|
|
40
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
41
|
+
Requires-Python: >=3.12
|
|
42
|
+
Requires-Dist: adeu>=1.7.5
|
|
43
|
+
Requires-Dist: langchain-core<2.0.0,>=0.3.0
|
|
44
|
+
Requires-Dist: pydantic>=2.0.0
|
|
45
|
+
Description-Content-Type: text/markdown
|
|
46
|
+
|
|
47
|
+
# langchain-adeu
|
|
48
|
+
|
|
49
|
+
[](https://pypi.org/project/langchain-adeu/)
|
|
50
|
+
[](https://opensource.org/licenses/MIT)
|
|
51
|
+
|
|
52
|
+
**LangChain integration for [Adeu](https://adeu.ai) — Track Changes for Microsoft Word (.docx) in the LLM Era.**
|
|
53
|
+
|
|
54
|
+
This package wraps the local, cross-platform, and offline-capable subset of Adeu's document-editing engine as native LangChain tools. It enables LangChain and LangGraph agents to read, edit, diff, sanitize, and finalize Microsoft Word documents while preserving the underlying formatting, layout, custom styles, and XML structures.
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## Installation
|
|
59
|
+
|
|
60
|
+
Install the package via `pip` or `uv`:
|
|
61
|
+
|
|
62
|
+
### Using pip
|
|
63
|
+
```bash
|
|
64
|
+
pip install langchain-adeu
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Using uv
|
|
68
|
+
```bash
|
|
69
|
+
uv add langchain-adeu
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## Quick Start
|
|
75
|
+
|
|
76
|
+
Instantiate the `AdeuToolkit` and register its tools with a tool-calling chat model. This short example initializes an agent capable of managing docx operations.
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
from langchain.agents import create_agent
|
|
80
|
+
from langchain_openai import ChatOpenAI
|
|
81
|
+
from langchain_adeu import AdeuToolkit
|
|
82
|
+
|
|
83
|
+
# Load a tool-calling model
|
|
84
|
+
model = ChatOpenAI(model="anthropic:claude-sonnet-4-6")
|
|
85
|
+
|
|
86
|
+
# Initialize the toolkit
|
|
87
|
+
toolkit = AdeuToolkit()
|
|
88
|
+
tools = toolkit.get_tools()
|
|
89
|
+
|
|
90
|
+
# Create the agent
|
|
91
|
+
agent = create_agent(model=model, tools=tools)
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
## Worked Example: Multi-Tool Review & Redline Flow
|
|
97
|
+
|
|
98
|
+
Below is a complete, runnable workflow illustrating how an agent can read an existing draft, apply tracked changes, generate a visual diff, and sanitize metadata before sending it to a counterparty.
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
from langchain_adeu import AdeuToolkit
|
|
102
|
+
|
|
103
|
+
# 1. Instantiate the toolkit
|
|
104
|
+
tools_map = {t.name: t for t in AdeuToolkit().get_tools()}
|
|
105
|
+
|
|
106
|
+
read_tool = tools_map["adeu_read_docx"]
|
|
107
|
+
apply_tool = tools_map["adeu_apply_changes"]
|
|
108
|
+
diff_tool = tools_map["adeu_diff_docx"]
|
|
109
|
+
sanitize_tool = tools_map["adeu_sanitize_docx"]
|
|
110
|
+
|
|
111
|
+
input_path = "MSA_draft.docx"
|
|
112
|
+
redline_path = "MSA_redlined.docx"
|
|
113
|
+
clean_path = "MSA_final.docx"
|
|
114
|
+
|
|
115
|
+
# 2. Read the document to extract text with active tracked changes & comments
|
|
116
|
+
# clean_view=False ensures the LLM sees inline CriticMarkup (e.g. {++inserted++})
|
|
117
|
+
read_result = read_tool.invoke({
|
|
118
|
+
"file_path": input_path,
|
|
119
|
+
"clean_view": False,
|
|
120
|
+
"mode": "full",
|
|
121
|
+
"page": 1
|
|
122
|
+
})
|
|
123
|
+
print("--- Document Contents ---\n", read_result)
|
|
124
|
+
|
|
125
|
+
# 3. Apply a batch of edits (tracked modifications + a comment reply)
|
|
126
|
+
apply_result = apply_tool.invoke({
|
|
127
|
+
"file_path": input_path,
|
|
128
|
+
"author_name": "AI Reviewer",
|
|
129
|
+
"output_path": redline_path,
|
|
130
|
+
"changes": [
|
|
131
|
+
{
|
|
132
|
+
"type": "modify",
|
|
133
|
+
"target_text": "Governing Law shall be the State of New York.",
|
|
134
|
+
"new_text": "Governing Law shall be the State of Delaware.",
|
|
135
|
+
"comment": "Updating jurisdiction to corporate standard."
|
|
136
|
+
},
|
|
137
|
+
{
|
|
138
|
+
"type": "reply",
|
|
139
|
+
"target_id": "Com:1",
|
|
140
|
+
"text": "Agreed. Applied jurisdiction change."
|
|
141
|
+
}
|
|
142
|
+
]
|
|
143
|
+
})
|
|
144
|
+
print("\n--- Changes Applied ---\n", apply_result)
|
|
145
|
+
|
|
146
|
+
# 4. Generate a word-level diff to verify edits
|
|
147
|
+
diff_result = diff_tool.invoke({
|
|
148
|
+
"original_path": input_path,
|
|
149
|
+
"modified_path": redline_path,
|
|
150
|
+
"compare_clean": True
|
|
151
|
+
})
|
|
152
|
+
print("\n--- Word-Level Diff ---\n", diff_result)
|
|
153
|
+
|
|
154
|
+
# 5. Sanitize document properties and remove author history for final delivery
|
|
155
|
+
# keep_markup=True preserves unresolved track changes while stripping metadata
|
|
156
|
+
sanitize_result = sanitize_tool.invoke({
|
|
157
|
+
"file_path": redline_path,
|
|
158
|
+
"output_path": clean_path,
|
|
159
|
+
"keep_markup": True,
|
|
160
|
+
"author": "Anonymous Advisor"
|
|
161
|
+
})
|
|
162
|
+
print("\n--- Sanitization Report ---\n", sanitize_result)
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
---
|
|
166
|
+
|
|
167
|
+
## Per-Tool Reference
|
|
168
|
+
|
|
169
|
+
| Tool Name | Purpose / When to Use | Key Input Parameters | Response Format / Output Shape |
|
|
170
|
+
| :--- | :--- | :--- | :--- |
|
|
171
|
+
| `adeu_read_docx` | Reads a `.docx` file into Markdown. Use `clean_view=False` to audit active track-changes. | `file_path` (str), `clean_view` (bool), `mode` (Literal), `page` (int) | `content_and_artifact` (Returns projected Markdown text + structured metadata artifact) |
|
|
172
|
+
| `adeu_apply_changes` | Commits a transactional batch of edits as native track-changes and comment threads. | `file_path` (str), `author_name` (str), `changes` (list[dict]), `output_path` (str) | `content_and_artifact` (Returns completion text + structured change stats) |
|
|
173
|
+
| `adeu_diff_docx` | Generates a word-level patch showing insertions and deletions between two files. | `original_path` (str), `modified_path` (str), `compare_clean` (bool) | `content` (Returns free-form `@@ Word Patch @@` visual text) |
|
|
174
|
+
| `adeu_accept_all_changes` | Resolves and bakes all tracked changes and format modifications into plain text. | `file_path` (str), `output_path` (str) | `content_and_artifact` (Returns completion text + artifact mapping paths) |
|
|
175
|
+
| `adeu_sanitize_docx` | Cleans document properties (author names, RSIDs, Custom XML, DMS traces). | `file_path` (str), `output_path` (str), `keep_markup` (bool), `accept_all` (bool) | `content_and_artifact` (Returns human-readable report text + structured cleanup stats) |
|
|
176
|
+
|
|
177
|
+
---
|
|
178
|
+
|
|
179
|
+
## What's NOT Included
|
|
180
|
+
|
|
181
|
+
This package intentionally focuses on **local, cross-platform, offline-capable** workflows. For the following, use the [Adeu MCP server](https://github.com/dealfluence/adeu) directly:
|
|
182
|
+
|
|
183
|
+
- **Live MS Word Interop** (Windows COM) — real-time edits on an active Microsoft Word canvas.
|
|
184
|
+
- **Adeu Cloud Features** — email fetching, multi-document asynchronous semantic validation.
|
|
185
|
+
- **MCP Apps UI** — interactive Markdown preview rendering inside custom client interfaces.
|
|
186
|
+
|
|
187
|
+
---
|
|
188
|
+
|
|
189
|
+
## Development & Testing
|
|
190
|
+
|
|
191
|
+
We use `uv` for dependency management and workspace isolation.
|
|
192
|
+
|
|
193
|
+
### Installation
|
|
194
|
+
Sync development and testing dependencies locally:
|
|
195
|
+
```bash
|
|
196
|
+
make install
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
### Running Tests
|
|
200
|
+
To run unit tests (isolated, socket-disabled):
|
|
201
|
+
```bash
|
|
202
|
+
make test
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
To run integration tests (requires real fixture `.docx` documents):
|
|
206
|
+
```bash
|
|
207
|
+
make integration_test
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
### Code Formatting & Linting
|
|
211
|
+
We enforce Ruff for formatting and linting:
|
|
212
|
+
```bash
|
|
213
|
+
make format
|
|
214
|
+
make lint
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
---
|
|
218
|
+
|
|
219
|
+
## License
|
|
220
|
+
|
|
221
|
+
MIT. See [LICENSE](../LICENSE).
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# langchain-adeu
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/langchain-adeu/)
|
|
4
|
+
[](https://opensource.org/licenses/MIT)
|
|
5
|
+
|
|
6
|
+
**LangChain integration for [Adeu](https://adeu.ai) — Track Changes for Microsoft Word (.docx) in the LLM Era.**
|
|
7
|
+
|
|
8
|
+
This package wraps the local, cross-platform, and offline-capable subset of Adeu's document-editing engine as native LangChain tools. It enables LangChain and LangGraph agents to read, edit, diff, sanitize, and finalize Microsoft Word documents while preserving the underlying formatting, layout, custom styles, and XML structures.
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## Installation
|
|
13
|
+
|
|
14
|
+
Install the package via `pip` or `uv`:
|
|
15
|
+
|
|
16
|
+
### Using pip
|
|
17
|
+
```bash
|
|
18
|
+
pip install langchain-adeu
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
### Using uv
|
|
22
|
+
```bash
|
|
23
|
+
uv add langchain-adeu
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Quick Start
|
|
29
|
+
|
|
30
|
+
Instantiate the `AdeuToolkit` and register its tools with a tool-calling chat model. This short example initializes an agent capable of managing docx operations.
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
from langchain.agents import create_agent
|
|
34
|
+
from langchain_openai import ChatOpenAI
|
|
35
|
+
from langchain_adeu import AdeuToolkit
|
|
36
|
+
|
|
37
|
+
# Load a tool-calling model
|
|
38
|
+
model = ChatOpenAI(model="anthropic:claude-sonnet-4-6")
|
|
39
|
+
|
|
40
|
+
# Initialize the toolkit
|
|
41
|
+
toolkit = AdeuToolkit()
|
|
42
|
+
tools = toolkit.get_tools()
|
|
43
|
+
|
|
44
|
+
# Create the agent
|
|
45
|
+
agent = create_agent(model=model, tools=tools)
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## Worked Example: Multi-Tool Review & Redline Flow
|
|
51
|
+
|
|
52
|
+
Below is a complete, runnable workflow illustrating how an agent can read an existing draft, apply tracked changes, generate a visual diff, and sanitize metadata before sending it to a counterparty.
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
from langchain_adeu import AdeuToolkit
|
|
56
|
+
|
|
57
|
+
# 1. Instantiate the toolkit
|
|
58
|
+
tools_map = {t.name: t for t in AdeuToolkit().get_tools()}
|
|
59
|
+
|
|
60
|
+
read_tool = tools_map["adeu_read_docx"]
|
|
61
|
+
apply_tool = tools_map["adeu_apply_changes"]
|
|
62
|
+
diff_tool = tools_map["adeu_diff_docx"]
|
|
63
|
+
sanitize_tool = tools_map["adeu_sanitize_docx"]
|
|
64
|
+
|
|
65
|
+
input_path = "MSA_draft.docx"
|
|
66
|
+
redline_path = "MSA_redlined.docx"
|
|
67
|
+
clean_path = "MSA_final.docx"
|
|
68
|
+
|
|
69
|
+
# 2. Read the document to extract text with active tracked changes & comments
|
|
70
|
+
# clean_view=False ensures the LLM sees inline CriticMarkup (e.g. {++inserted++})
|
|
71
|
+
read_result = read_tool.invoke({
|
|
72
|
+
"file_path": input_path,
|
|
73
|
+
"clean_view": False,
|
|
74
|
+
"mode": "full",
|
|
75
|
+
"page": 1
|
|
76
|
+
})
|
|
77
|
+
print("--- Document Contents ---\n", read_result)
|
|
78
|
+
|
|
79
|
+
# 3. Apply a batch of edits (tracked modifications + a comment reply)
|
|
80
|
+
apply_result = apply_tool.invoke({
|
|
81
|
+
"file_path": input_path,
|
|
82
|
+
"author_name": "AI Reviewer",
|
|
83
|
+
"output_path": redline_path,
|
|
84
|
+
"changes": [
|
|
85
|
+
{
|
|
86
|
+
"type": "modify",
|
|
87
|
+
"target_text": "Governing Law shall be the State of New York.",
|
|
88
|
+
"new_text": "Governing Law shall be the State of Delaware.",
|
|
89
|
+
"comment": "Updating jurisdiction to corporate standard."
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
"type": "reply",
|
|
93
|
+
"target_id": "Com:1",
|
|
94
|
+
"text": "Agreed. Applied jurisdiction change."
|
|
95
|
+
}
|
|
96
|
+
]
|
|
97
|
+
})
|
|
98
|
+
print("\n--- Changes Applied ---\n", apply_result)
|
|
99
|
+
|
|
100
|
+
# 4. Generate a word-level diff to verify edits
|
|
101
|
+
diff_result = diff_tool.invoke({
|
|
102
|
+
"original_path": input_path,
|
|
103
|
+
"modified_path": redline_path,
|
|
104
|
+
"compare_clean": True
|
|
105
|
+
})
|
|
106
|
+
print("\n--- Word-Level Diff ---\n", diff_result)
|
|
107
|
+
|
|
108
|
+
# 5. Sanitize document properties and remove author history for final delivery
|
|
109
|
+
# keep_markup=True preserves unresolved track changes while stripping metadata
|
|
110
|
+
sanitize_result = sanitize_tool.invoke({
|
|
111
|
+
"file_path": redline_path,
|
|
112
|
+
"output_path": clean_path,
|
|
113
|
+
"keep_markup": True,
|
|
114
|
+
"author": "Anonymous Advisor"
|
|
115
|
+
})
|
|
116
|
+
print("\n--- Sanitization Report ---\n", sanitize_result)
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
---
|
|
120
|
+
|
|
121
|
+
## Per-Tool Reference
|
|
122
|
+
|
|
123
|
+
| Tool Name | Purpose / When to Use | Key Input Parameters | Response Format / Output Shape |
|
|
124
|
+
| :--- | :--- | :--- | :--- |
|
|
125
|
+
| `adeu_read_docx` | Reads a `.docx` file into Markdown. Use `clean_view=False` to audit active track-changes. | `file_path` (str), `clean_view` (bool), `mode` (Literal), `page` (int) | `content_and_artifact` (Returns projected Markdown text + structured metadata artifact) |
|
|
126
|
+
| `adeu_apply_changes` | Commits a transactional batch of edits as native track-changes and comment threads. | `file_path` (str), `author_name` (str), `changes` (list[dict]), `output_path` (str) | `content_and_artifact` (Returns completion text + structured change stats) |
|
|
127
|
+
| `adeu_diff_docx` | Generates a word-level patch showing insertions and deletions between two files. | `original_path` (str), `modified_path` (str), `compare_clean` (bool) | `content` (Returns free-form `@@ Word Patch @@` visual text) |
|
|
128
|
+
| `adeu_accept_all_changes` | Resolves and bakes all tracked changes and format modifications into plain text. | `file_path` (str), `output_path` (str) | `content_and_artifact` (Returns completion text + artifact mapping paths) |
|
|
129
|
+
| `adeu_sanitize_docx` | Cleans document properties (author names, RSIDs, Custom XML, DMS traces). | `file_path` (str), `output_path` (str), `keep_markup` (bool), `accept_all` (bool) | `content_and_artifact` (Returns human-readable report text + structured cleanup stats) |
|
|
130
|
+
|
|
131
|
+
---
|
|
132
|
+
|
|
133
|
+
## What's NOT Included
|
|
134
|
+
|
|
135
|
+
This package intentionally focuses on **local, cross-platform, offline-capable** workflows. For the following, use the [Adeu MCP server](https://github.com/dealfluence/adeu) directly:
|
|
136
|
+
|
|
137
|
+
- **Live MS Word Interop** (Windows COM) — real-time edits on an active Microsoft Word canvas.
|
|
138
|
+
- **Adeu Cloud Features** — email fetching, multi-document asynchronous semantic validation.
|
|
139
|
+
- **MCP Apps UI** — interactive Markdown preview rendering inside custom client interfaces.
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
|
|
143
|
+
## Development & Testing
|
|
144
|
+
|
|
145
|
+
We use `uv` for dependency management and workspace isolation.
|
|
146
|
+
|
|
147
|
+
### Installation
|
|
148
|
+
Sync development and testing dependencies locally:
|
|
149
|
+
```bash
|
|
150
|
+
make install
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
### Running Tests
|
|
154
|
+
To run unit tests (isolated, socket-disabled):
|
|
155
|
+
```bash
|
|
156
|
+
make test
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
To run integration tests (requires real fixture `.docx` documents):
|
|
160
|
+
```bash
|
|
161
|
+
make integration_test
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
### Code Formatting & Linting
|
|
165
|
+
We enforce Ruff for formatting and linting:
|
|
166
|
+
```bash
|
|
167
|
+
make format
|
|
168
|
+
make lint
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
---
|
|
172
|
+
|
|
173
|
+
## License
|
|
174
|
+
|
|
175
|
+
MIT. See [LICENSE](../LICENSE).
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# FILE: langchain/langchain_adeu/__init__.py
|
|
2
|
+
"""LangChain integration for Adeu — track-changes for DOCX in the LLM era.
|
|
3
|
+
|
|
4
|
+
This package exposes the local-only, offline-capable subset of Adeu's
|
|
5
|
+
document-editing capabilities as native LangChain tools. Use these tools
|
|
6
|
+
with `create_agent` or any LangGraph workflow to build agents that can
|
|
7
|
+
read, edit, diff, and sanitize Microsoft Word documents while preserving
|
|
8
|
+
the underlying OOXML structure.
|
|
9
|
+
|
|
10
|
+
Live MS Word integration (Windows-only COM) and Adeu Cloud features
|
|
11
|
+
(email, validation) are intentionally excluded from this package. Use the
|
|
12
|
+
Adeu MCP server directly for those workflows.
|
|
13
|
+
|
|
14
|
+
Quick start:
|
|
15
|
+
|
|
16
|
+
from langchain.agents import create_agent
|
|
17
|
+
from langchain_adeu import AdeuToolkit
|
|
18
|
+
|
|
19
|
+
agent = create_agent(
|
|
20
|
+
model="anthropic:claude-sonnet-4-6",
|
|
21
|
+
tools=AdeuToolkit().get_tools(),
|
|
22
|
+
)
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
26
|
+
|
|
27
|
+
from langchain_adeu.accept_all_changes import (
|
|
28
|
+
AdeuAcceptAllChanges,
|
|
29
|
+
AdeuAcceptAllChangesInput,
|
|
30
|
+
)
|
|
31
|
+
from langchain_adeu.apply_changes import AdeuApplyChanges, AdeuApplyChangesInput
|
|
32
|
+
from langchain_adeu.diff_docx import AdeuDiffDocx, AdeuDiffDocxInput
|
|
33
|
+
from langchain_adeu.read_docx import AdeuReadDocx, AdeuReadDocxInput
|
|
34
|
+
from langchain_adeu.sanitize_docx import AdeuSanitizeDocx, AdeuSanitizeDocxInput
|
|
35
|
+
from langchain_adeu.toolkit import AdeuToolkit, get_tools
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
__version__ = version("langchain-adeu")
|
|
39
|
+
except PackageNotFoundError:
|
|
40
|
+
__version__ = "0.0.0-dev"
|
|
41
|
+
|
|
42
|
+
__all__ = [
|
|
43
|
+
"AdeuAcceptAllChanges",
|
|
44
|
+
"AdeuAcceptAllChangesInput",
|
|
45
|
+
"AdeuApplyChanges",
|
|
46
|
+
"AdeuApplyChangesInput",
|
|
47
|
+
"AdeuDiffDocx",
|
|
48
|
+
"AdeuDiffDocxInput",
|
|
49
|
+
"AdeuReadDocx",
|
|
50
|
+
"AdeuReadDocxInput",
|
|
51
|
+
"AdeuSanitizeDocx",
|
|
52
|
+
"AdeuSanitizeDocxInput",
|
|
53
|
+
"AdeuToolkit",
|
|
54
|
+
"__version__",
|
|
55
|
+
"get_tools",
|
|
56
|
+
]
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# FILE: langchain/langchain_adeu/_shared.py
|
|
2
|
+
"""Internal helpers shared across langchain-adeu tools.
|
|
3
|
+
|
|
4
|
+
This module deliberately keeps zero LangChain-specific logic in the
|
|
5
|
+
business layer — the tool classes are thin orchestrators that:
|
|
6
|
+
1. Validate input paths via `validate_docx_path` / `validate_path`
|
|
7
|
+
2. Call directly into the `adeu.*` SDK
|
|
8
|
+
3. Convert Adeu's domain errors into `ToolException` so LangChain's
|
|
9
|
+
agent middleware can present them cleanly to the model.
|
|
10
|
+
|
|
11
|
+
All path validation happens at the tool boundary so engine code never
|
|
12
|
+
sees a missing/wrong-extension file (those errors would bubble up as
|
|
13
|
+
opaque `python-docx` exceptions otherwise).
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import functools
|
|
19
|
+
from collections.abc import Callable
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
from langchain_core.tools import ToolException
|
|
24
|
+
|
|
25
|
+
_DOCX_SUFFIX = ".docx"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def validate_path(path_str: str, *, must_exist: bool = True, label: str = "path") -> Path:
|
|
29
|
+
"""Validate a filesystem path string and return a resolved `Path`.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
path_str: The path as provided by the LLM.
|
|
33
|
+
must_exist: When True (default), raise if the path doesn't exist on disk.
|
|
34
|
+
label: Human-readable label for the path used in error messages
|
|
35
|
+
(e.g. "input file", "baseline document").
|
|
36
|
+
|
|
37
|
+
Raises:
|
|
38
|
+
ToolException: If the path is empty, malformed, or (when
|
|
39
|
+
`must_exist=True`) does not exist.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
A resolved `Path` object.
|
|
43
|
+
"""
|
|
44
|
+
if not path_str or not path_str.strip():
|
|
45
|
+
raise ToolException(f"The {label} cannot be empty.")
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
p = Path(path_str).expanduser().resolve()
|
|
49
|
+
except (OSError, RuntimeError) as e:
|
|
50
|
+
raise ToolException(f"The {label} '{path_str}' is not a valid filesystem path: {e}") from e
|
|
51
|
+
|
|
52
|
+
if must_exist and not p.exists():
|
|
53
|
+
raise ToolException(f"The {label} does not exist: {p}")
|
|
54
|
+
|
|
55
|
+
return p
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def validate_docx_path(path_str: str, *, must_exist: bool = True, label: str = "DOCX file") -> Path:
|
|
59
|
+
"""Validate a path that must point to a `.docx` file.
|
|
60
|
+
|
|
61
|
+
Performs the same checks as `validate_path`, then verifies the suffix.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
path_str: The path as provided by the LLM.
|
|
65
|
+
must_exist: When True (default), raise if the file doesn't exist.
|
|
66
|
+
label: Human-readable label used in error messages.
|
|
67
|
+
|
|
68
|
+
Raises:
|
|
69
|
+
ToolException: On the same conditions as `validate_path`, plus
|
|
70
|
+
when the suffix is not `.docx`.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
A resolved `Path` to the DOCX file.
|
|
74
|
+
"""
|
|
75
|
+
p = validate_path(path_str, must_exist=must_exist, label=label)
|
|
76
|
+
|
|
77
|
+
if p.suffix.lower() != _DOCX_SUFFIX:
|
|
78
|
+
raise ToolException(
|
|
79
|
+
f"The {label} must be a .docx file, got '{p.suffix}': {p}. "
|
|
80
|
+
"Adeu only supports modern Word (.docx) format; .doc and other "
|
|
81
|
+
"formats are not supported."
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
if must_exist and not p.is_file():
|
|
85
|
+
raise ToolException(f"The {label} exists but is not a regular file: {p}")
|
|
86
|
+
|
|
87
|
+
return p
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def wrap_tool_errors[F: Callable[..., Any]](func: F) -> F:
|
|
91
|
+
"""Decorator that converts Adeu/python-docx exceptions to `ToolException`.
|
|
92
|
+
|
|
93
|
+
Why: agents are far more useful when tool failures arrive as readable
|
|
94
|
+
`ToolMessage` content than when they crash the run loop. By raising
|
|
95
|
+
`ToolException` (rather than the original exception type), LangChain's
|
|
96
|
+
default `handle_tool_errors` middleware will format the message and
|
|
97
|
+
feed it back to the model, which can then correct its input and retry.
|
|
98
|
+
|
|
99
|
+
`ToolException` and `KeyboardInterrupt`/`SystemExit` are re-raised
|
|
100
|
+
untouched. Everything else is wrapped.
|
|
101
|
+
|
|
102
|
+
Use sparingly: only wrap entry-point functions called directly from
|
|
103
|
+
`_run` / `_arun`. Wrapping internal helpers hides stack traces during
|
|
104
|
+
development.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
@functools.wraps(func)
|
|
108
|
+
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
109
|
+
try:
|
|
110
|
+
return func(*args, **kwargs)
|
|
111
|
+
except ToolException:
|
|
112
|
+
raise
|
|
113
|
+
except (KeyboardInterrupt, SystemExit):
|
|
114
|
+
raise
|
|
115
|
+
except FileNotFoundError as e:
|
|
116
|
+
raise ToolException(f"File not found: {e}") from e
|
|
117
|
+
except (ValueError, OSError) as e:
|
|
118
|
+
raise ToolException(str(e)) from e
|
|
119
|
+
except Exception as e:
|
|
120
|
+
# Catch-all for python-docx, lxml, and other deep-stack failures.
|
|
121
|
+
# We deliberately surface the type name so debugging is possible
|
|
122
|
+
# from the agent's transcript alone.
|
|
123
|
+
raise ToolException(f"{type(e).__name__}: {e}") from e
|
|
124
|
+
|
|
125
|
+
return wrapper # type: ignore[return-value]
|