@heylemon/lemonade 0.0.3 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/build-info.json +3 -3
- package/dist/canvas-host/a2ui/.bundle.hash +1 -1
- package/dist/slack/monitor/allow-list.js +2 -0
- package/dist/slack/monitor/context.js +2 -1
- package/dist/slack/monitor/message-handler/prepare.js +6 -3
- package/package.json +1 -1
- package/skills/docx/SKILL.md +25 -30
- package/skills/docx/scripts/accept_changes.py +0 -17
- package/skills/docx/scripts/comment.py +10 -39
- package/skills/docx/scripts/office/helpers/merge_runs.py +1 -33
- package/skills/docx/scripts/office/helpers/simplify_redlines.py +0 -43
- package/skills/docx/scripts/office/pack.py +0 -30
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -1499
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -1085
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -3081
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -287
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -1676
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -174
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -582
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -4439
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -570
- package/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -116
- package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -42
- package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -50
- package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -49
- package/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -33
- package/skills/docx/scripts/office/soffice.py +0 -55
- package/skills/docx/scripts/office/unpack.py +5 -27
- package/skills/docx/scripts/office/validate.py +19 -14
- package/skills/docx/scripts/office/validators/base.py +48 -224
- package/skills/docx/scripts/office/validators/docx.py +44 -117
- package/skills/docx/scripts/office/validators/pptx.py +2 -42
- package/skills/docx/scripts/office/validators/redlining.py +3 -40
- package/skills/pdf/SKILL.md +22 -15
- package/skills/pdf/{FORMS.md → forms.md} +0 -14
- package/skills/pdf/scripts/check_bounding_boxes.py +0 -5
- package/skills/pdf/scripts/check_fillable_fields.py +0 -1
- package/skills/pdf/scripts/convert_pdf_to_images.py +0 -2
- package/skills/pdf/scripts/create_validation_image.py +0 -4
- package/skills/pdf/scripts/extract_form_field_info.py +1 -31
- package/skills/pdf/scripts/extract_form_structure.py +0 -9
- package/skills/pdf/scripts/fill_fillable_fields.py +0 -23
- package/skills/pdf/scripts/fill_pdf_form_with_annotations.py +3 -38
- package/skills/pptx/SKILL.md +2 -29
- package/skills/pptx/editing.md +2 -2
- package/skills/pptx/pptxgenjs.md +53 -8
- package/skills/pptx/scripts/add_slide.py +0 -30
- package/skills/pptx/scripts/clean.py +0 -23
- package/skills/pptx/scripts/office/helpers/merge_runs.py +1 -33
- package/skills/pptx/scripts/office/helpers/simplify_redlines.py +0 -43
- package/skills/pptx/scripts/office/pack.py +0 -30
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -1499
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -1085
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -3081
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -287
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -1676
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -174
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -582
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -4439
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -570
- package/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -116
- package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -42
- package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -50
- package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -49
- package/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -33
- package/skills/pptx/scripts/office/soffice.py +0 -55
- package/skills/pptx/scripts/office/unpack.py +5 -27
- package/skills/pptx/scripts/office/validate.py +19 -14
- package/skills/pptx/scripts/office/validators/base.py +48 -224
- package/skills/pptx/scripts/office/validators/docx.py +44 -117
- package/skills/pptx/scripts/office/validators/pptx.py +2 -42
- package/skills/pptx/scripts/office/validators/redlining.py +3 -40
- package/skills/pptx/scripts/thumbnail.py +0 -31
- package/skills/xlsx/SKILL.md +3 -26
- package/skills/xlsx/scripts/office/helpers/merge_runs.py +1 -33
- package/skills/xlsx/scripts/office/helpers/simplify_redlines.py +0 -43
- package/skills/xlsx/scripts/office/pack.py +0 -30
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -1499
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -1085
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -3081
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -287
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -1676
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -174
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -582
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -4439
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -570
- package/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -116
- package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -42
- package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -50
- package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -49
- package/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -33
- package/skills/xlsx/scripts/office/soffice.py +0 -55
- package/skills/xlsx/scripts/office/unpack.py +5 -27
- package/skills/xlsx/scripts/office/validate.py +19 -14
- package/skills/xlsx/scripts/office/validators/base.py +48 -224
- package/skills/xlsx/scripts/office/validators/docx.py +44 -117
- package/skills/xlsx/scripts/office/validators/pptx.py +2 -42
- package/skills/xlsx/scripts/office/validators/redlining.py +3 -40
- package/skills/xlsx/scripts/recalc.py +2 -26
- package/skills/docx/scripts/__init__.py +0 -1
- package/skills/docx/scripts/office/helpers/__init__.py +0 -0
- package/skills/docx/scripts/office/validators/__init__.py +0 -15
- package/skills/pptx/scripts/__init__.py +0 -0
- package/skills/pptx/scripts/office/helpers/__init__.py +0 -0
- package/skills/pptx/scripts/office/validators/__init__.py +0 -15
- package/skills/xlsx/scripts/office/helpers/__init__.py +0 -0
- package/skills/xlsx/scripts/office/validators/__init__.py +0 -15
- /package/skills/pdf/{REFERENCE.md → reference.md} +0 -0
package/dist/build-info.json
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
0084b5bec47a901a88b379363f5f31530feeb541ab85d15e5f944c012ebbef96
|
|
@@ -21,6 +21,7 @@ export function resolveSlackAllowListMatch(params) {
|
|
|
21
21
|
}
|
|
22
22
|
const id = params.id?.toLowerCase();
|
|
23
23
|
const name = params.name?.toLowerCase();
|
|
24
|
+
const email = params.email?.toLowerCase();
|
|
24
25
|
const slug = normalizeSlackSlug(name);
|
|
25
26
|
const candidates = [
|
|
26
27
|
{ value: id, source: "id" },
|
|
@@ -29,6 +30,7 @@ export function resolveSlackAllowListMatch(params) {
|
|
|
29
30
|
{ value: name, source: "name" },
|
|
30
31
|
{ value: name ? `slack:${name}` : undefined, source: "prefixed-name" },
|
|
31
32
|
{ value: slug, source: "slug" },
|
|
33
|
+
{ value: email, source: "email" },
|
|
32
34
|
];
|
|
33
35
|
for (const candidate of candidates) {
|
|
34
36
|
if (!candidate.value)
|
|
@@ -98,7 +98,8 @@ export function createSlackMonitorContext(params) {
|
|
|
98
98
|
});
|
|
99
99
|
const profile = info.user?.profile;
|
|
100
100
|
const name = profile?.display_name || profile?.real_name || info.user?.name || undefined;
|
|
101
|
-
const
|
|
101
|
+
const email = profile?.email || undefined;
|
|
102
|
+
const entry = { name, email };
|
|
102
103
|
userCache.set(userId, entry);
|
|
103
104
|
return entry;
|
|
104
105
|
}
|
|
@@ -96,24 +96,26 @@ export async function prepareSlackMessage(params) {
|
|
|
96
96
|
return null;
|
|
97
97
|
}
|
|
98
98
|
if (ctx.dmPolicy !== "open") {
|
|
99
|
+
const dmSender = await ctx.resolveUserName(directUserId);
|
|
100
|
+
const dmSenderEmail = dmSender?.email ?? undefined;
|
|
99
101
|
const allowMatch = resolveSlackAllowListMatch({
|
|
100
102
|
allowList: allowFromLower,
|
|
101
103
|
id: directUserId,
|
|
104
|
+
email: dmSenderEmail,
|
|
102
105
|
});
|
|
103
106
|
const allowMatchMeta = formatAllowlistMatchMeta(allowMatch);
|
|
104
107
|
if (!allowMatch.allowed) {
|
|
105
|
-
// Check if sender is an approved guest (responses only, no tools)
|
|
106
108
|
const guestMatch = resolveSlackAllowListMatch({
|
|
107
109
|
allowList: guestFromLower,
|
|
108
110
|
id: directUserId,
|
|
111
|
+
email: dmSenderEmail,
|
|
109
112
|
});
|
|
110
113
|
if (guestMatch.allowed) {
|
|
111
114
|
isGuestSender = true;
|
|
112
115
|
logVerbose(`slack: guest sender ${directUserId} allowed (responses only, no tools)`);
|
|
113
116
|
}
|
|
114
117
|
else if (ctx.dmPolicy === "pairing" || ctx.dmPolicy === "allowlist") {
|
|
115
|
-
const
|
|
116
|
-
const senderName = sender?.name ?? undefined;
|
|
118
|
+
const senderName = dmSender?.name ?? undefined;
|
|
117
119
|
const { created } = await upsertChannelPairingRequest({
|
|
118
120
|
channel: "slack",
|
|
119
121
|
id: directUserId,
|
|
@@ -226,6 +228,7 @@ export async function prepareSlackMessage(params) {
|
|
|
226
228
|
allowList: allowFromLower,
|
|
227
229
|
id: senderId,
|
|
228
230
|
name: senderName,
|
|
231
|
+
email: sender?.email,
|
|
229
232
|
}).allowed;
|
|
230
233
|
const channelUsersAllowlistConfigured = isRoom && Array.isArray(channelConfig?.users) && channelConfig.users.length > 0;
|
|
231
234
|
const channelCommandAuthorized = isRoom && channelUsersAllowlistConfigured
|
package/package.json
CHANGED
package/skills/docx/SKILL.md
CHANGED
|
@@ -1,30 +1,11 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: docx
|
|
3
|
-
description: "
|
|
3
|
+
description: "Use this skill whenever the user wants to create, read, edit, or manipulate Word documents (.docx files). Triggers include: any mention of \"Word doc\", \"word document\", \".docx\", or requests to produce professional documents with formatting like tables of contents, headings, page numbers, or letterheads. Also use when extracting or reorganizing content from .docx files, inserting or replacing images in documents, performing find-and-replace in Word files, working with tracked changes or comments, or converting content into a polished Word document. If the user asks for a \"report\", \"memo\", \"letter\", \"template\", or similar deliverable as a Word or .docx file, use this skill. Do NOT use for PDFs, spreadsheets, Google Docs, or general coding tasks unrelated to document generation."
|
|
4
4
|
license: Proprietary. LICENSE.txt has complete terms
|
|
5
5
|
---
|
|
6
6
|
|
|
7
7
|
# DOCX creation, editing, and analysis
|
|
8
8
|
|
|
9
|
-
## IMPORTANT: Save to Desktop
|
|
10
|
-
|
|
11
|
-
**Always save created `.docx` files to `~/Desktop/`** (e.g. `~/Desktop/document.docx`). Never save to the agent workspace or hidden directories — the user needs easy access to the file.
|
|
12
|
-
|
|
13
|
-
## CRITICAL: Integration Priority
|
|
14
|
-
|
|
15
|
-
### 1. `lemon-docs` CLI (For Google Docs)
|
|
16
|
-
If user wants a Google Doc (shareable, collaborative), use `lemon-docs`:
|
|
17
|
-
- `lemon-docs create "Title"` - Create a new Google Doc
|
|
18
|
-
- `lemon-docs read <id>` - Read a document
|
|
19
|
-
|
|
20
|
-
### 2. Local DOCX (For Files)
|
|
21
|
-
If user wants a local `.docx` file, or `lemon-docs` is not connected, use the local creation methods below.
|
|
22
|
-
|
|
23
|
-
### 3. Browser (LAST RESORT)
|
|
24
|
-
Only if `lemon-docs` CLI fails AND user explicitly requests Google Docs in browser.
|
|
25
|
-
|
|
26
|
-
---
|
|
27
|
-
|
|
28
9
|
## Overview
|
|
29
10
|
|
|
30
11
|
A .docx file is a ZIP archive containing XML files.
|
|
@@ -67,14 +48,14 @@ pdftoppm -jpeg -r 150 document.pdf page
|
|
|
67
48
|
To produce a clean document with all tracked changes accepted (requires LibreOffice):
|
|
68
49
|
|
|
69
50
|
```bash
|
|
70
|
-
python scripts/accept_changes.py input.docx
|
|
51
|
+
python scripts/accept_changes.py input.docx output.docx
|
|
71
52
|
```
|
|
72
53
|
|
|
73
54
|
---
|
|
74
55
|
|
|
75
56
|
## Creating New Documents
|
|
76
57
|
|
|
77
|
-
Generate .docx files with JavaScript. Install: `npm install -g docx`
|
|
58
|
+
Generate .docx files with JavaScript, then validate. Install: `npm install -g docx`
|
|
78
59
|
|
|
79
60
|
### Setup
|
|
80
61
|
```javascript
|
|
@@ -87,6 +68,12 @@ const doc = new Document({ sections: [{ children: [/* content */] }] });
|
|
|
87
68
|
Packer.toBuffer(doc).then(buffer => fs.writeFileSync("doc.docx", buffer));
|
|
88
69
|
```
|
|
89
70
|
|
|
71
|
+
### Validation
|
|
72
|
+
After creating the file, validate it. If validation fails, unpack, fix the XML, and repack.
|
|
73
|
+
```bash
|
|
74
|
+
python scripts/office/validate.py doc.docx
|
|
75
|
+
```
|
|
76
|
+
|
|
90
77
|
### Page Size
|
|
91
78
|
|
|
92
79
|
```javascript
|
|
@@ -113,6 +100,16 @@ sections: [{
|
|
|
113
100
|
| US Letter | 12,240 | 15,840 | 9,360 |
|
|
114
101
|
| A4 (default) | 11,906 | 16,838 | 9,026 |
|
|
115
102
|
|
|
103
|
+
**Landscape orientation:** docx-js swaps width/height internally, so pass portrait dimensions and let it handle the swap:
|
|
104
|
+
```javascript
|
|
105
|
+
size: {
|
|
106
|
+
width: 12240, // Pass SHORT edge as width
|
|
107
|
+
height: 15840, // Pass LONG edge as height
|
|
108
|
+
orientation: PageOrientation.LANDSCAPE // docx-js swaps them in the XML
|
|
109
|
+
},
|
|
110
|
+
// Content width = 15840 - left margin - right margin (uses the long edge)
|
|
111
|
+
```
|
|
112
|
+
|
|
116
113
|
### Styles (Override Built-in Headings)
|
|
117
114
|
|
|
118
115
|
Use Arial as the default font (universally supported). Keep titles black for readability.
|
|
@@ -184,8 +181,8 @@ const border = { style: BorderStyle.SINGLE, size: 1, color: "CCCCCC" };
|
|
|
184
181
|
const borders = { top: border, bottom: border, left: border, right: border };
|
|
185
182
|
|
|
186
183
|
new Table({
|
|
187
|
-
width: { size:
|
|
188
|
-
columnWidths: [4680, 4680], //
|
|
184
|
+
width: { size: 9360, type: WidthType.DXA }, // Always use DXA (percentages break in Google Docs)
|
|
185
|
+
columnWidths: [4680, 4680], // Must sum to table width (DXA: 1440 = 1 inch)
|
|
189
186
|
rows: [
|
|
190
187
|
new TableRow({
|
|
191
188
|
children: [
|
|
@@ -204,13 +201,9 @@ new Table({
|
|
|
204
201
|
|
|
205
202
|
**Table width calculation:**
|
|
206
203
|
|
|
207
|
-
|
|
204
|
+
Always use `WidthType.DXA` — `WidthType.PERCENTAGE` breaks in Google Docs.
|
|
208
205
|
|
|
209
206
|
```javascript
|
|
210
|
-
// Option 1: Percentage (recommended - automatically fits content area)
|
|
211
|
-
width: { size: 100, type: WidthType.PERCENTAGE }
|
|
212
|
-
|
|
213
|
-
// Option 2: DXA (precise control)
|
|
214
207
|
// Table width = sum of columnWidths = content width
|
|
215
208
|
// US Letter with 1" margins: 12240 - 2880 = 9360 DXA
|
|
216
209
|
width: { size: 9360, type: WidthType.DXA },
|
|
@@ -218,6 +211,7 @@ columnWidths: [7000, 2360] // Must sum to table width
|
|
|
218
211
|
```
|
|
219
212
|
|
|
220
213
|
**Width rules:**
|
|
214
|
+
- **Always use `WidthType.DXA`** — never `WidthType.PERCENTAGE` (incompatible with Google Docs)
|
|
221
215
|
- Table width must equal the sum of `columnWidths`
|
|
222
216
|
- Cell `width` must match corresponding `columnWidth`
|
|
223
217
|
- Cell `margins` are internal padding - they reduce content area, not add to cell width
|
|
@@ -276,11 +270,12 @@ sections: [{
|
|
|
276
270
|
### Critical Rules for docx-js
|
|
277
271
|
|
|
278
272
|
- **Set page size explicitly** - docx-js defaults to A4; use US Letter (12240 x 15840 DXA) for US documents
|
|
273
|
+
- **Landscape: pass portrait dimensions** - docx-js swaps width/height internally; pass short edge as `width`, long edge as `height`, and set `orientation: PageOrientation.LANDSCAPE`
|
|
279
274
|
- **Never use `\n`** - use separate Paragraph elements
|
|
280
275
|
- **Never use unicode bullets** - use `LevelFormat.BULLET` with numbering config
|
|
281
276
|
- **PageBreak must be in Paragraph** - standalone creates invalid XML
|
|
282
277
|
- **ImageRun requires `type`** - always specify png/jpg/etc
|
|
283
|
-
- **Always set table `width
|
|
278
|
+
- **Always set table `width` with DXA** - never use `WidthType.PERCENTAGE` (breaks in Google Docs)
|
|
284
279
|
- **Tables need dual widths** - `columnWidths` array AND cell `width`, both must match
|
|
285
280
|
- **Table width = sum of columnWidths** - for DXA, ensure they add up exactly
|
|
286
281
|
- **Always add cell margins** - use `margins: { top: 80, bottom: 80, left: 120, right: 120 }` for readable padding
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
1
|
"""Accept all tracked changes in a DOCX file using LibreOffice.
|
|
3
2
|
|
|
4
3
|
Requires LibreOffice (soffice) to be installed.
|
|
@@ -14,7 +13,6 @@ from office.soffice import get_soffice_env
|
|
|
14
13
|
|
|
15
14
|
logger = logging.getLogger(__name__)
|
|
16
15
|
|
|
17
|
-
# LibreOffice profile directory for macro storage
|
|
18
16
|
LIBREOFFICE_PROFILE = "/tmp/libreoffice_docx_profile"
|
|
19
17
|
MACRO_DIR = f"{LIBREOFFICE_PROFILE}/user/basic/Standard"
|
|
20
18
|
|
|
@@ -39,15 +37,6 @@ def accept_changes(
|
|
|
39
37
|
input_file: str,
|
|
40
38
|
output_file: str,
|
|
41
39
|
) -> tuple[None, str]:
|
|
42
|
-
"""Accept all tracked changes in a DOCX file and save to output file.
|
|
43
|
-
|
|
44
|
-
Args:
|
|
45
|
-
input_file: Path to input DOCX file with tracked changes
|
|
46
|
-
output_file: Path to output DOCX file (will be created/overwritten)
|
|
47
|
-
|
|
48
|
-
Returns:
|
|
49
|
-
(None, message) - message indicates success or failure
|
|
50
|
-
"""
|
|
51
40
|
input_path = Path(input_file)
|
|
52
41
|
output_path = Path(output_file)
|
|
53
42
|
|
|
@@ -57,18 +46,15 @@ def accept_changes(
|
|
|
57
46
|
if not input_path.suffix.lower() == ".docx":
|
|
58
47
|
return None, f"Error: Input file is not a DOCX file: {input_file}"
|
|
59
48
|
|
|
60
|
-
# Copy input file to output file location
|
|
61
49
|
try:
|
|
62
50
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
63
51
|
shutil.copy2(input_path, output_path)
|
|
64
52
|
except Exception as e:
|
|
65
53
|
return None, f"Error: Failed to copy input file to output location: {e}"
|
|
66
54
|
|
|
67
|
-
# Setup LibreOffice macro
|
|
68
55
|
if not _setup_libreoffice_macro():
|
|
69
56
|
return None, "Error: Failed to setup LibreOffice macro"
|
|
70
57
|
|
|
71
|
-
# Run LibreOffice with macro to accept changes
|
|
72
58
|
cmd = [
|
|
73
59
|
"soffice",
|
|
74
60
|
"--headless",
|
|
@@ -88,7 +74,6 @@ def accept_changes(
|
|
|
88
74
|
env=get_soffice_env(),
|
|
89
75
|
)
|
|
90
76
|
except subprocess.TimeoutExpired:
|
|
91
|
-
# Timeout is expected - LibreOffice may hang after completing
|
|
92
77
|
return (
|
|
93
78
|
None,
|
|
94
79
|
f"Successfully accepted all tracked changes: {input_file} -> {output_file}",
|
|
@@ -104,14 +89,12 @@ def accept_changes(
|
|
|
104
89
|
|
|
105
90
|
|
|
106
91
|
def _setup_libreoffice_macro() -> bool:
|
|
107
|
-
"""Setup LibreOffice macro for accepting tracked changes."""
|
|
108
92
|
macro_dir = Path(MACRO_DIR)
|
|
109
93
|
macro_file = macro_dir / "Module1.xba"
|
|
110
94
|
|
|
111
95
|
if macro_file.exists() and "AcceptAllTrackedChanges" in macro_file.read_text():
|
|
112
96
|
return True
|
|
113
97
|
|
|
114
|
-
# Initialize LibreOffice if needed (use custom profile)
|
|
115
98
|
if not macro_dir.exists():
|
|
116
99
|
subprocess.run(
|
|
117
100
|
[
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
1
|
"""Add comments to DOCX documents.
|
|
3
2
|
|
|
4
3
|
Usage:
|
|
@@ -32,7 +31,6 @@ NS = {
|
|
|
32
31
|
"w16cex": "http://schemas.microsoft.com/office/word/2018/wordml/cex",
|
|
33
32
|
}
|
|
34
33
|
|
|
35
|
-
# XML template for comment content in comments.xml
|
|
36
34
|
COMMENT_XML = """\
|
|
37
35
|
<w:comment w:id="{id}" w:author="{author}" w:date="{date}" w:initials="{initials}">
|
|
38
36
|
<w:p w14:paraId="{para_id}" w14:textId="77777777">
|
|
@@ -51,7 +49,6 @@ COMMENT_XML = """\
|
|
|
51
49
|
</w:p>
|
|
52
50
|
</w:comment>"""
|
|
53
51
|
|
|
54
|
-
# Output templates for marker placement instructions
|
|
55
52
|
COMMENT_MARKER_TEMPLATE = """
|
|
56
53
|
Add to document.xml (markers must be direct children of w:p, never inside w:r):
|
|
57
54
|
<w:commentRangeStart w:id="{cid}"/>
|
|
@@ -69,42 +66,36 @@ Nest markers inside parent {pid}'s markers (markers must be direct children of w
|
|
|
69
66
|
|
|
70
67
|
|
|
71
68
|
def _generate_hex_id() -> str:
|
|
72
|
-
"""Random 8-char hex ID (satisfies paraId < 0x80000000, durableId < 0x7FFFFFFF)."""
|
|
73
69
|
return f"{random.randint(0, 0x7FFFFFFE):08X}"
|
|
74
70
|
|
|
75
71
|
|
|
76
|
-
# Smart quotes to re-encode after DOM serialization (DOM decodes entities to Unicode)
|
|
77
72
|
SMART_QUOTE_ENTITIES = {
|
|
78
|
-
"\u201c": "“",
|
|
79
|
-
"\u201d": "”",
|
|
80
|
-
"\u2018": "‘",
|
|
81
|
-
"\u2019": "’",
|
|
73
|
+
"\u201c": "“",
|
|
74
|
+
"\u201d": "”",
|
|
75
|
+
"\u2018": "‘",
|
|
76
|
+
"\u2019": "’",
|
|
82
77
|
}
|
|
83
78
|
|
|
84
79
|
|
|
85
80
|
def _encode_smart_quotes(text: str) -> str:
|
|
86
|
-
"""Re-encode smart quotes as XML entities after DOM serialization."""
|
|
87
81
|
for char, entity in SMART_QUOTE_ENTITIES.items():
|
|
88
82
|
text = text.replace(char, entity)
|
|
89
83
|
return text
|
|
90
84
|
|
|
91
85
|
|
|
92
86
|
def _append_xml(xml_path: Path, root_tag: str, content: str) -> None:
|
|
93
|
-
"""Append content as child of root element."""
|
|
94
87
|
dom = defusedxml.minidom.parseString(xml_path.read_text(encoding="utf-8"))
|
|
95
88
|
root = dom.getElementsByTagName(root_tag)[0]
|
|
96
89
|
ns_attrs = " ".join(f'xmlns:{k}="{v}"' for k, v in NS.items())
|
|
97
90
|
wrapper_dom = defusedxml.minidom.parseString(f"<root {ns_attrs}>{content}</root>")
|
|
98
|
-
for child in wrapper_dom.documentElement.childNodes:
|
|
91
|
+
for child in wrapper_dom.documentElement.childNodes:
|
|
99
92
|
if child.nodeType == child.ELEMENT_NODE:
|
|
100
93
|
root.appendChild(dom.importNode(child, True))
|
|
101
|
-
# Re-encode smart quotes that DOM decoded to Unicode
|
|
102
94
|
output = _encode_smart_quotes(dom.toxml(encoding="UTF-8").decode("utf-8"))
|
|
103
95
|
xml_path.write_text(output, encoding="utf-8")
|
|
104
96
|
|
|
105
97
|
|
|
106
98
|
def _find_para_id(comments_path: Path, comment_id: int) -> str | None:
|
|
107
|
-
"""Find para_id for a comment ID."""
|
|
108
99
|
dom = defusedxml.minidom.parseString(comments_path.read_text(encoding="utf-8"))
|
|
109
100
|
for c in dom.getElementsByTagName("w:comment"):
|
|
110
101
|
if c.getAttribute("w:id") == str(comment_id):
|
|
@@ -115,7 +106,6 @@ def _find_para_id(comments_path: Path, comment_id: int) -> str | None:
|
|
|
115
106
|
|
|
116
107
|
|
|
117
108
|
def _get_next_rid(rels_path: Path) -> int:
|
|
118
|
-
"""Get the next available rId number from document.xml.rels."""
|
|
119
109
|
dom = defusedxml.minidom.parseString(rels_path.read_text(encoding="utf-8"))
|
|
120
110
|
max_rid = 0
|
|
121
111
|
for rel in dom.getElementsByTagName("Relationship"):
|
|
@@ -129,7 +119,6 @@ def _get_next_rid(rels_path: Path) -> int:
|
|
|
129
119
|
|
|
130
120
|
|
|
131
121
|
def _has_relationship(rels_path: Path, target: str) -> bool:
|
|
132
|
-
"""Check if a relationship with given target exists."""
|
|
133
122
|
dom = defusedxml.minidom.parseString(rels_path.read_text(encoding="utf-8"))
|
|
134
123
|
for rel in dom.getElementsByTagName("Relationship"):
|
|
135
124
|
if rel.getAttribute("Target") == target:
|
|
@@ -138,7 +127,6 @@ def _has_relationship(rels_path: Path, target: str) -> bool:
|
|
|
138
127
|
|
|
139
128
|
|
|
140
129
|
def _has_content_type(ct_path: Path, part_name: str) -> bool:
|
|
141
|
-
"""Check if a content type override with given part name exists."""
|
|
142
130
|
dom = defusedxml.minidom.parseString(ct_path.read_text(encoding="utf-8"))
|
|
143
131
|
for override in dom.getElementsByTagName("Override"):
|
|
144
132
|
if override.getAttribute("PartName") == part_name:
|
|
@@ -147,19 +135,17 @@ def _has_content_type(ct_path: Path, part_name: str) -> bool:
|
|
|
147
135
|
|
|
148
136
|
|
|
149
137
|
def _ensure_comment_relationships(unpacked_dir: Path) -> None:
|
|
150
|
-
"""Ensure word/_rels/document.xml.rels has comment relationships."""
|
|
151
138
|
rels_path = unpacked_dir / "word" / "_rels" / "document.xml.rels"
|
|
152
139
|
if not rels_path.exists():
|
|
153
140
|
return
|
|
154
141
|
|
|
155
142
|
if _has_relationship(rels_path, "comments.xml"):
|
|
156
|
-
return
|
|
143
|
+
return
|
|
157
144
|
|
|
158
145
|
dom = defusedxml.minidom.parseString(rels_path.read_text(encoding="utf-8"))
|
|
159
146
|
root = dom.documentElement
|
|
160
147
|
next_rid = _get_next_rid(rels_path)
|
|
161
148
|
|
|
162
|
-
# Add relationship elements
|
|
163
149
|
rels = [
|
|
164
150
|
(
|
|
165
151
|
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments",
|
|
@@ -184,25 +170,23 @@ def _ensure_comment_relationships(unpacked_dir: Path) -> None:
|
|
|
184
170
|
rel.setAttribute("Id", f"rId{next_rid}")
|
|
185
171
|
rel.setAttribute("Type", rel_type)
|
|
186
172
|
rel.setAttribute("Target", target)
|
|
187
|
-
root.appendChild(rel)
|
|
173
|
+
root.appendChild(rel)
|
|
188
174
|
next_rid += 1
|
|
189
175
|
|
|
190
176
|
rels_path.write_bytes(dom.toxml(encoding="UTF-8"))
|
|
191
177
|
|
|
192
178
|
|
|
193
179
|
def _ensure_comment_content_types(unpacked_dir: Path) -> None:
|
|
194
|
-
"""Ensure [Content_Types].xml has comment content types."""
|
|
195
180
|
ct_path = unpacked_dir / "[Content_Types].xml"
|
|
196
181
|
if not ct_path.exists():
|
|
197
182
|
return
|
|
198
183
|
|
|
199
184
|
if _has_content_type(ct_path, "/word/comments.xml"):
|
|
200
|
-
return
|
|
185
|
+
return
|
|
201
186
|
|
|
202
187
|
dom = defusedxml.minidom.parseString(ct_path.read_text(encoding="utf-8"))
|
|
203
188
|
root = dom.documentElement
|
|
204
189
|
|
|
205
|
-
# Add Override elements
|
|
206
190
|
overrides = [
|
|
207
191
|
(
|
|
208
192
|
"/word/comments.xml",
|
|
@@ -226,7 +210,7 @@ def _ensure_comment_content_types(unpacked_dir: Path) -> None:
|
|
|
226
210
|
override = dom.createElement("Override")
|
|
227
211
|
override.setAttribute("PartName", part_name)
|
|
228
212
|
override.setAttribute("ContentType", content_type)
|
|
229
|
-
root.appendChild(override)
|
|
213
|
+
root.appendChild(override)
|
|
230
214
|
|
|
231
215
|
ct_path.write_bytes(dom.toxml(encoding="UTF-8"))
|
|
232
216
|
|
|
@@ -239,14 +223,6 @@ def add_comment(
|
|
|
239
223
|
initials: str = "C",
|
|
240
224
|
parent_id: int | None = None,
|
|
241
225
|
) -> tuple[str, str]:
|
|
242
|
-
"""Add comment to unpacked DOCX.
|
|
243
|
-
|
|
244
|
-
Args:
|
|
245
|
-
text: Comment text, pre-escaped for XML (e.g., & ’).
|
|
246
|
-
|
|
247
|
-
Returns:
|
|
248
|
-
(para_id, message) tuple.
|
|
249
|
-
"""
|
|
250
226
|
word = Path(unpacked_dir) / "word"
|
|
251
227
|
if not word.exists():
|
|
252
228
|
return "", f"Error: {word} not found"
|
|
@@ -254,12 +230,10 @@ def add_comment(
|
|
|
254
230
|
para_id, durable_id = _generate_hex_id(), _generate_hex_id()
|
|
255
231
|
ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
256
232
|
|
|
257
|
-
# comments.xml
|
|
258
233
|
comments = word / "comments.xml"
|
|
259
234
|
first_comment = not comments.exists()
|
|
260
235
|
if first_comment:
|
|
261
236
|
shutil.copy(TEMPLATE_DIR / "comments.xml", comments)
|
|
262
|
-
# Add relationships and content types for comment files
|
|
263
237
|
_ensure_comment_relationships(Path(unpacked_dir))
|
|
264
238
|
_ensure_comment_content_types(Path(unpacked_dir))
|
|
265
239
|
_append_xml(
|
|
@@ -271,11 +245,10 @@ def add_comment(
|
|
|
271
245
|
date=ts,
|
|
272
246
|
initials=initials,
|
|
273
247
|
para_id=para_id,
|
|
274
|
-
text=text,
|
|
248
|
+
text=text,
|
|
275
249
|
),
|
|
276
250
|
)
|
|
277
251
|
|
|
278
|
-
# commentsExtended.xml
|
|
279
252
|
ext = word / "commentsExtended.xml"
|
|
280
253
|
if not ext.exists():
|
|
281
254
|
shutil.copy(TEMPLATE_DIR / "commentsExtended.xml", ext)
|
|
@@ -295,7 +268,6 @@ def add_comment(
|
|
|
295
268
|
f'<w15:commentEx w15:paraId="{para_id}" w15:done="0"/>',
|
|
296
269
|
)
|
|
297
270
|
|
|
298
|
-
# commentsIds.xml
|
|
299
271
|
ids = word / "commentsIds.xml"
|
|
300
272
|
if not ids.exists():
|
|
301
273
|
shutil.copy(TEMPLATE_DIR / "commentsIds.xml", ids)
|
|
@@ -305,7 +277,6 @@ def add_comment(
|
|
|
305
277
|
f'<w16cid:commentId w16cid:paraId="{para_id}" w16cid:durableId="{durable_id}"/>',
|
|
306
278
|
)
|
|
307
279
|
|
|
308
|
-
# commentsExtensible.xml
|
|
309
280
|
extensible = word / "commentsExtensible.xml"
|
|
310
281
|
if not extensible.exists():
|
|
311
282
|
shutil.copy(TEMPLATE_DIR / "commentsExtensible.xml", extensible)
|
|
@@ -14,14 +14,6 @@ import defusedxml.minidom
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
def merge_runs(input_dir: str) -> tuple[int, str]:
|
|
17
|
-
"""Merge adjacent runs in document.xml.
|
|
18
|
-
|
|
19
|
-
Args:
|
|
20
|
-
input_dir: Path to unpacked DOCX directory
|
|
21
|
-
|
|
22
|
-
Returns:
|
|
23
|
-
(merge_count, message)
|
|
24
|
-
"""
|
|
25
17
|
doc_xml = Path(input_dir) / "word" / "document.xml"
|
|
26
18
|
|
|
27
19
|
if not doc_xml.exists():
|
|
@@ -31,14 +23,11 @@ def merge_runs(input_dir: str) -> tuple[int, str]:
|
|
|
31
23
|
dom = defusedxml.minidom.parseString(doc_xml.read_text(encoding="utf-8"))
|
|
32
24
|
root = dom.documentElement
|
|
33
25
|
|
|
34
|
-
# Clean up elements that block merging
|
|
35
26
|
_remove_elements(root, "proofErr")
|
|
36
27
|
_strip_run_rsid_attrs(root)
|
|
37
28
|
|
|
38
|
-
# Find all containers that have runs
|
|
39
29
|
containers = {run.parentNode for run in _find_elements(root, "r")}
|
|
40
30
|
|
|
41
|
-
# Merge runs in each container
|
|
42
31
|
merge_count = 0
|
|
43
32
|
for container in containers:
|
|
44
33
|
merge_count += _merge_runs_in(container)
|
|
@@ -50,11 +39,9 @@ def merge_runs(input_dir: str) -> tuple[int, str]:
|
|
|
50
39
|
return 0, f"Error: {e}"
|
|
51
40
|
|
|
52
41
|
|
|
53
|
-
# --- Element helpers ---
|
|
54
42
|
|
|
55
43
|
|
|
56
44
|
def _find_elements(root, tag: str) -> list:
|
|
57
|
-
"""Find all elements matching tag name (with or without namespace)."""
|
|
58
45
|
results = []
|
|
59
46
|
|
|
60
47
|
def traverse(node):
|
|
@@ -70,7 +57,6 @@ def _find_elements(root, tag: str) -> list:
|
|
|
70
57
|
|
|
71
58
|
|
|
72
59
|
def _get_child(parent, tag: str):
|
|
73
|
-
"""Get first child element matching tag name."""
|
|
74
60
|
for child in parent.childNodes:
|
|
75
61
|
if child.nodeType == child.ELEMENT_NODE:
|
|
76
62
|
name = child.localName or child.tagName
|
|
@@ -80,7 +66,6 @@ def _get_child(parent, tag: str):
|
|
|
80
66
|
|
|
81
67
|
|
|
82
68
|
def _get_children(parent, tag: str) -> list:
|
|
83
|
-
"""Get all direct child elements matching tag name."""
|
|
84
69
|
results = []
|
|
85
70
|
for child in parent.childNodes:
|
|
86
71
|
if child.nodeType == child.ELEMENT_NODE:
|
|
@@ -91,7 +76,6 @@ def _get_children(parent, tag: str) -> list:
|
|
|
91
76
|
|
|
92
77
|
|
|
93
78
|
def _is_adjacent(elem1, elem2) -> bool:
|
|
94
|
-
"""Check if two elements are adjacent (only whitespace between them)."""
|
|
95
79
|
node = elem1.nextSibling
|
|
96
80
|
while node:
|
|
97
81
|
if node == elem2:
|
|
@@ -104,34 +88,28 @@ def _is_adjacent(elem1, elem2) -> bool:
|
|
|
104
88
|
return False
|
|
105
89
|
|
|
106
90
|
|
|
107
|
-
# --- Cleanup functions ---
|
|
108
91
|
|
|
109
92
|
|
|
110
93
|
def _remove_elements(root, tag: str):
|
|
111
|
-
"""Remove all elements matching tag name."""
|
|
112
94
|
for elem in _find_elements(root, tag):
|
|
113
95
|
if elem.parentNode:
|
|
114
96
|
elem.parentNode.removeChild(elem)
|
|
115
97
|
|
|
116
98
|
|
|
117
99
|
def _strip_run_rsid_attrs(root):
|
|
118
|
-
"""Remove rsid attributes from all run elements."""
|
|
119
100
|
for run in _find_elements(root, "r"):
|
|
120
101
|
for attr in list(run.attributes.values()):
|
|
121
102
|
if "rsid" in attr.name.lower():
|
|
122
103
|
run.removeAttribute(attr.name)
|
|
123
104
|
|
|
124
105
|
|
|
125
|
-
# --- Merge functions ---
|
|
126
106
|
|
|
127
107
|
|
|
128
108
|
def _merge_runs_in(container) -> int:
|
|
129
|
-
"""Merge adjacent runs with identical formatting in a container element."""
|
|
130
109
|
merge_count = 0
|
|
131
110
|
run = _first_child_run(container)
|
|
132
111
|
|
|
133
112
|
while run:
|
|
134
|
-
# Absorb adjacent runs with same formatting
|
|
135
113
|
while True:
|
|
136
114
|
next_elem = _next_element_sibling(run)
|
|
137
115
|
if next_elem and _is_run(next_elem) and _can_merge(run, next_elem):
|
|
@@ -148,7 +126,6 @@ def _merge_runs_in(container) -> int:
|
|
|
148
126
|
|
|
149
127
|
|
|
150
128
|
def _first_child_run(container):
|
|
151
|
-
"""Get the first run child of a container."""
|
|
152
129
|
for child in container.childNodes:
|
|
153
130
|
if child.nodeType == child.ELEMENT_NODE and _is_run(child):
|
|
154
131
|
return child
|
|
@@ -156,7 +133,6 @@ def _first_child_run(container):
|
|
|
156
133
|
|
|
157
134
|
|
|
158
135
|
def _next_element_sibling(node):
|
|
159
|
-
"""Get the next element sibling, skipping text/whitespace nodes."""
|
|
160
136
|
sibling = node.nextSibling
|
|
161
137
|
while sibling:
|
|
162
138
|
if sibling.nodeType == sibling.ELEMENT_NODE:
|
|
@@ -166,25 +142,21 @@ def _next_element_sibling(node):
|
|
|
166
142
|
|
|
167
143
|
|
|
168
144
|
def _next_sibling_run(node):
|
|
169
|
-
"""Get the next sibling that is a run element."""
|
|
170
145
|
sibling = node.nextSibling
|
|
171
146
|
while sibling:
|
|
172
147
|
if sibling.nodeType == sibling.ELEMENT_NODE:
|
|
173
148
|
if _is_run(sibling):
|
|
174
149
|
return sibling
|
|
175
|
-
# Skip non-run elements (bookmarks, etc.) but keep looking
|
|
176
150
|
sibling = sibling.nextSibling
|
|
177
151
|
return None
|
|
178
152
|
|
|
179
153
|
|
|
180
154
|
def _is_run(node) -> bool:
|
|
181
|
-
"""Check if node is a run element."""
|
|
182
155
|
name = node.localName or node.tagName
|
|
183
156
|
return name == "r" or name.endswith(":r")
|
|
184
157
|
|
|
185
158
|
|
|
186
159
|
def _can_merge(run1, run2) -> bool:
|
|
187
|
-
"""Check if two runs have identical formatting."""
|
|
188
160
|
rpr1 = _get_child(run1, "rPr")
|
|
189
161
|
rpr2 = _get_child(run2, "rPr")
|
|
190
162
|
|
|
@@ -192,11 +164,10 @@ def _can_merge(run1, run2) -> bool:
|
|
|
192
164
|
return False
|
|
193
165
|
if rpr1 is None:
|
|
194
166
|
return True
|
|
195
|
-
return rpr1.toxml() == rpr2.toxml()
|
|
167
|
+
return rpr1.toxml() == rpr2.toxml()
|
|
196
168
|
|
|
197
169
|
|
|
198
170
|
def _merge_run_content(target, source):
|
|
199
|
-
"""Move content from source run to target run (excluding rPr)."""
|
|
200
171
|
for child in list(source.childNodes):
|
|
201
172
|
if child.nodeType == child.ELEMENT_NODE:
|
|
202
173
|
name = child.localName or child.tagName
|
|
@@ -205,10 +176,8 @@ def _merge_run_content(target, source):
|
|
|
205
176
|
|
|
206
177
|
|
|
207
178
|
def _consolidate_text(run):
|
|
208
|
-
"""Merge adjacent <w:t> elements within a run."""
|
|
209
179
|
t_elements = _get_children(run, "t")
|
|
210
180
|
|
|
211
|
-
# Work backwards to safely remove elements
|
|
212
181
|
for i in range(len(t_elements) - 1, 0, -1):
|
|
213
182
|
curr, prev = t_elements[i], t_elements[i - 1]
|
|
214
183
|
|
|
@@ -222,7 +191,6 @@ def _consolidate_text(run):
|
|
|
222
191
|
else:
|
|
223
192
|
prev.appendChild(run.ownerDocument.createTextNode(merged))
|
|
224
193
|
|
|
225
|
-
# Preserve whitespace if needed
|
|
226
194
|
if merged.startswith(" ") or merged.endswith(" "):
|
|
227
195
|
prev.setAttribute("xml:space", "preserve")
|
|
228
196
|
elif prev.hasAttribute("xml:space"):
|