@drbaher/draft-cli 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +40 -0
- package/PARAM_SCHEMA.md +14 -1
- package/draft-cli.mjs +153 -7
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,46 @@ All notable changes to this project will be documented in this file. The
|
|
|
4
4
|
format is loosely based on [Keep a Changelog](https://keepachangelog.com/),
|
|
5
5
|
and the project adheres to semantic versioning once it leaves 0.x.
|
|
6
6
|
|
|
7
|
+
## 0.2.0 — 2026-05-16
|
|
8
|
+
|
|
9
|
+
### Added
|
|
10
|
+
|
|
11
|
+
- **`.docx` output round-trip.** Templates read from `.docx` (tier 3
|
|
12
|
+
highlight detection) now write back as `.docx`, preserving runs,
|
|
13
|
+
styles, paragraph breaks, and every non-document part of the package
|
|
14
|
+
(`[Content_Types].xml`, relationships, images, headers, etc.).
|
|
15
|
+
Default output filename is `<basename>-filled.docx` next to the
|
|
16
|
+
input; override with `--output PATH.docx`. Schema-rescue, T1/T2
|
|
17
|
+
bracket/mustache detection, and T4/T5 substitution all benefit
|
|
18
|
+
too — any tier that detects a placeholder in a `.docx` template
|
|
19
|
+
now substitutes back into the same runs.
|
|
20
|
+
- **`--output -` writes plain text to stdout** (Unix `-` convention).
|
|
21
|
+
Use this on a `.docx` input to get the substituted body as text
|
|
22
|
+
instead of a `.docx` file: `draft contract.docx --output -`.
|
|
23
|
+
- **`writeDocxBuffer(originalPath, newDocumentXml)`**, **`makeDocxOutputPath(inputPath)`**,
|
|
24
|
+
**`substituteDocxXml(xml, placeholders, values, tier)`**, **`decideDocxOutput(opts, input)`**,
|
|
25
|
+
and **`encodeXml(s)`** added to the public API for programmatic
|
|
26
|
+
drivers. Same import surface as `substitute` and `extractDocxText`.
|
|
27
|
+
|
|
28
|
+
### Changed
|
|
29
|
+
|
|
30
|
+
- **Default output for `.docx` input is now `<basename>-filled.docx`,
|
|
31
|
+
not stdout text.** Previously, `draft contract.docx` (no
|
|
32
|
+
`--output`) extracted text and wrote substituted plain text to
|
|
33
|
+
stdout. v0.2.0 writes `contract-filled.docx` next to the input.
|
|
34
|
+
Pipelines that depended on the stdout-text behavior should pass
|
|
35
|
+
`--output -` to opt back in.
|
|
36
|
+
|
|
37
|
+
### Split-run handling
|
|
38
|
+
|
|
39
|
+
When a placeholder's text spans multiple `<w:t>` runs in the source
|
|
40
|
+
`.docx` (Word sometimes splits runs at punctuation, auto-correct
|
|
41
|
+
boundaries, or comment anchors), v0.2.0 emits a warning and skips
|
|
42
|
+
that substitution rather than merging the runs and losing run-level
|
|
43
|
+
styling. The warning explains how to fix the source: open the
|
|
44
|
+
document, retype the placeholder so it lives in one run, save, and
|
|
45
|
+
retry. This decision is logged in `PARAM_SCHEMA.md` §2.
|
|
46
|
+
|
|
7
47
|
## 0.1.1 — 2026-05-16
|
|
8
48
|
|
|
9
49
|
### Fixed
|
package/PARAM_SCHEMA.md
CHANGED
|
@@ -22,7 +22,20 @@ draft <cat>/<name>[@ver] ... # pulls via `template-vault get`
|
|
|
22
22
|
```
|
|
23
23
|
|
|
24
24
|
- **Input forms accepted:** `path/to/file.md`, `path/to/file.txt`, `path/to/file.docx`, stdin (`-`), or a `template-vault` ref shaped `<category>/<name>[@version]`. Vault refs shell out to `template-vault get` — no library import.
|
|
25
|
-
- **Output:**
|
|
25
|
+
- **Output:** depends on input kind and `--output` target.
|
|
26
|
+
|
|
27
|
+
| Input | `--output` | Output |
|
|
28
|
+
| ------------ | ------------------- | ------------------------------------- |
|
|
29
|
+
| text (any) | absent | plain text on stdout |
|
|
30
|
+
| text (any) | `-` | plain text on stdout |
|
|
31
|
+
| text (any) | `PATH` (any ext) | plain text written to `PATH` |
|
|
32
|
+
| `.docx` | absent | `.docx` to `<basename>-filled.docx` |
|
|
33
|
+
| `.docx` | `PATH.docx` | `.docx` to `PATH.docx` |
|
|
34
|
+
| `.docx` | `-` | plain text (substituted body) on stdout |
|
|
35
|
+
| `.docx` | `PATH` (non-`.docx`)| plain text written to `PATH` |
|
|
36
|
+
|
|
37
|
+
`.docx` output is a round-trip: the original `.docx` package is reopened, the substituted text is written back into the same `<w:t>` runs that detection found, and all other parts of the package (relationships, images, headers, `[Content_Types].xml`, etc.) pass through unchanged. Run-level styling is preserved. If a placeholder's text spans multiple `<w:t>` runs in the source (Word sometimes splits runs at punctuation or auto-correct boundaries), that placeholder is **skipped**, not substituted, and a warning is emitted explaining how to fix the source — locked decision Q1.1.
|
|
38
|
+
- `--json`, `--diff`, `--validate`, and `--list-placeholders` all override the `.docx` round-trip and produce text/JSON to stdout (or to `--output PATH`, when provided).
|
|
26
39
|
- **Encoding:** UTF-8 in, UTF-8 out. No BOM written; BOM tolerated on read.
|
|
27
40
|
|
|
28
41
|
## 3. Detection cascade (sequential-with-stop)
|
package/draft-cli.mjs
CHANGED
|
@@ -70,7 +70,7 @@ import { fileURLToPath } from "node:url";
|
|
|
70
70
|
*/
|
|
71
71
|
|
|
72
72
|
/** @type {string} */
|
|
73
|
-
export const VERSION = "0.
|
|
73
|
+
export const VERSION = "0.2.0";
|
|
74
74
|
|
|
75
75
|
// ─── EXIT CODES ─────────────────────────────────────────────────────────────
|
|
76
76
|
/**
|
|
@@ -489,6 +489,43 @@ export async function extractDocxText(path) {
|
|
|
489
489
|
return { body: docxXmlToText(xml), xml };
|
|
490
490
|
}
|
|
491
491
|
|
|
492
|
+
/**
|
|
493
|
+
* Re-read the original `.docx`, swap in a new `word/document.xml`, and
|
|
494
|
+
* return the resulting `.docx` as a `Buffer`. All other parts of the
|
|
495
|
+
* package (`[Content_Types].xml`, relationships, images, headers, etc.)
|
|
496
|
+
* pass through unchanged.
|
|
497
|
+
*
|
|
498
|
+
* @param {string} originalPath — filesystem path to the source `.docx`.
|
|
499
|
+
* @param {string} newDocumentXml — replacement content for `word/document.xml`.
|
|
500
|
+
* @returns {Promise<Buffer>}
|
|
501
|
+
* @throws {Error} with `.exitCode = EXIT.IO` on missing jszip or invalid source.
|
|
502
|
+
*/
|
|
503
|
+
export async function writeDocxBuffer(originalPath, newDocumentXml) {
|
|
504
|
+
const JSZip = await loadJSZip();
|
|
505
|
+
let zip;
|
|
506
|
+
try { zip = await JSZip.loadAsync(readFileSync(originalPath)); }
|
|
507
|
+
catch (err) {
|
|
508
|
+
const e = new Error(`could not re-open source .docx (${err.message})`);
|
|
509
|
+
e.exitCode = EXIT.IO;
|
|
510
|
+
throw e;
|
|
511
|
+
}
|
|
512
|
+
zip.file("word/document.xml", newDocumentXml);
|
|
513
|
+
return await zip.generateAsync({ type: "nodebuffer" });
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
/**
|
|
517
|
+
* Derive the default `.docx` output filename from an input path. Appends
|
|
518
|
+
* `-filled` before the extension: `contract.docx` → `contract-filled.docx`.
|
|
519
|
+
* If the input has no extension, appends `-filled.docx`.
|
|
520
|
+
* @param {string} inputPath
|
|
521
|
+
* @returns {string}
|
|
522
|
+
*/
|
|
523
|
+
export function makeDocxOutputPath(inputPath) {
|
|
524
|
+
const ext = extname(inputPath);
|
|
525
|
+
if (!ext) return `${inputPath}-filled.docx`;
|
|
526
|
+
return `${inputPath.slice(0, -ext.length)}-filled${ext}`;
|
|
527
|
+
}
|
|
528
|
+
|
|
492
529
|
// Walk the XML in document order. For each <w:p> emit a line; concatenate
|
|
493
530
|
// <w:t> contents within. Decode XML entities. Used for both output body and
|
|
494
531
|
// T1/T2 detection on docx input.
|
|
@@ -524,6 +561,18 @@ export function decodeXml(s) {
|
|
|
524
561
|
.replace(/"/g, '"').replace(/'/g, "'");
|
|
525
562
|
}
|
|
526
563
|
|
|
564
|
+
/**
|
|
565
|
+
* Inverse of {@link decodeXml}. Used when writing substituted text back into
|
|
566
|
+
* a Word document's `<w:t>` runs. Only encodes the three structural
|
|
567
|
+
* characters; double- and single-quotes don't need encoding inside element
|
|
568
|
+
* text content.
|
|
569
|
+
* @param {string} s
|
|
570
|
+
* @returns {string}
|
|
571
|
+
*/
|
|
572
|
+
export function encodeXml(s) {
|
|
573
|
+
return s.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">");
|
|
574
|
+
}
|
|
575
|
+
|
|
527
576
|
const RECOGNIZED_HIGHLIGHTS = new Set(["yellow", "green", "cyan", "magenta"]);
|
|
528
577
|
|
|
529
578
|
// Scan the XML for highlighted runs. Returns an array of { text, color }.
|
|
@@ -1180,6 +1229,83 @@ function replaceAll(s, find, repl) {
|
|
|
1180
1229
|
return s.split(find).join(repl);
|
|
1181
1230
|
}
|
|
1182
1231
|
|
|
1232
|
+
/**
|
|
1233
|
+
* Substitute placeholder values *inside the Word XML*, preserving runs
|
|
1234
|
+
* and styling. Returns a new XML string plus warnings for any placeholder
|
|
1235
|
+
* whose text spans multiple `<w:t>` runs in the source — these are
|
|
1236
|
+
* skipped rather than substituted (merging the runs would lose styling
|
|
1237
|
+
* information; leaving them is reversible).
|
|
1238
|
+
*
|
|
1239
|
+
* For T1 (bracket) / T2 (mustache) the search text is the literal match
|
|
1240
|
+
* (e.g. `[Party A]` or `{{party_a}}`). For T3 (docx-highlight), T4
|
|
1241
|
+
* (heuristic), T5 (llm) the search text is the run's inner content with
|
|
1242
|
+
* whole-word boundaries — same semantics as {@link substitute}.
|
|
1243
|
+
*
|
|
1244
|
+
* @param {string} xml — original `word/document.xml`.
|
|
1245
|
+
* @param {Placeholder[]} placeholders
|
|
1246
|
+
* @param {Object<string,string>} values — `{ key: resolvedValue }`.
|
|
1247
|
+
* @param {Tier} tier
|
|
1248
|
+
* @returns {{ xml: string, warnings: string[] }}
|
|
1249
|
+
*/
|
|
1250
|
+
export function substituteDocxXml(xml, placeholders, values, tier) {
|
|
1251
|
+
let out = xml;
|
|
1252
|
+
const warnings = [];
|
|
1253
|
+
const originalText = docxXmlToText(xml);
|
|
1254
|
+
for (const p of placeholders) {
|
|
1255
|
+
const v = values[p.key];
|
|
1256
|
+
if (v === undefined) continue;
|
|
1257
|
+
for (const h of p.hits) {
|
|
1258
|
+
const find = (tier === "bracket" || tier === "mustache") ? h.match : h.inner;
|
|
1259
|
+
const literal = (tier === "bracket" || tier === "mustache");
|
|
1260
|
+
const buildRe = (global) => literal
|
|
1261
|
+
? new RegExp(escapeRegex(find), global ? "g" : "")
|
|
1262
|
+
: new RegExp(`(?<![A-Za-z0-9])${escapeRegex(find)}(?![A-Za-z0-9])`, global ? "g" : "");
|
|
1263
|
+
const replaceRe = buildRe(true);
|
|
1264
|
+
let madeSubstitution = false;
|
|
1265
|
+
out = out.replace(/<w:t(\s[^>]*)?>([\s\S]*?)<\/w:t>/g, (match, attrs, content) => {
|
|
1266
|
+
const decoded = decodeXml(content);
|
|
1267
|
+
replaceRe.lastIndex = 0;
|
|
1268
|
+
const replaced = decoded.replace(replaceRe, v);
|
|
1269
|
+
if (replaced === decoded) return match;
|
|
1270
|
+
madeSubstitution = true;
|
|
1271
|
+
return `<w:t${attrs || ""}>${encodeXml(replaced)}</w:t>`;
|
|
1272
|
+
});
|
|
1273
|
+
if (!madeSubstitution && buildRe(false).test(originalText)) {
|
|
1274
|
+
warnings.push(
|
|
1275
|
+
`docx substitution skipped for "${find}" (→ "${v}"): the placeholder spans ` +
|
|
1276
|
+
`multiple text runs in the source, which would lose run-level styling if merged. ` +
|
|
1277
|
+
`Open the document, retype the placeholder so it lives in a single run, and retry.`
|
|
1278
|
+
);
|
|
1279
|
+
}
|
|
1280
|
+
}
|
|
1281
|
+
}
|
|
1282
|
+
return { xml: out, warnings };
|
|
1283
|
+
}
|
|
1284
|
+
|
|
1285
|
+
/**
|
|
1286
|
+
* Decide whether to write `.docx` output (round-trip) versus plain text.
|
|
1287
|
+
* Returns `{ path }` for `.docx`, or `null` for text. Rules:
|
|
1288
|
+
* - input must be `.docx`;
|
|
1289
|
+
* - `--json`, `--diff`, `--validate`, `--list-placeholders` force text;
|
|
1290
|
+
* - `--output PATH.docx` writes `.docx` to PATH;
|
|
1291
|
+
* - `--output -` writes plain text to stdout (Unix `-` convention);
|
|
1292
|
+
* - `--output PATH` with any other extension writes plain text;
|
|
1293
|
+
* - no `--output` defaults to `<basename>-filled.docx`.
|
|
1294
|
+
*
|
|
1295
|
+
* @param {Object} opts — parsed CLI args.
|
|
1296
|
+
* @param {{kind: "text"|"docx", path: string|null}} input
|
|
1297
|
+
* @returns {{ path: string } | null}
|
|
1298
|
+
*/
|
|
1299
|
+
export function decideDocxOutput(opts, input) {
|
|
1300
|
+
if (input.kind !== "docx") return null;
|
|
1301
|
+
if (opts.json || opts.diff || opts.listPlaceholders || opts.validate) return null;
|
|
1302
|
+
if (opts.output === "-") return null;
|
|
1303
|
+
if (opts.output) {
|
|
1304
|
+
return extname(opts.output) === ".docx" ? { path: opts.output } : null;
|
|
1305
|
+
}
|
|
1306
|
+
return { path: makeDocxOutputPath(input.path || "out.docx") };
|
|
1307
|
+
}
|
|
1308
|
+
|
|
1183
1309
|
// ─── --why BUILDER ──────────────────────────────────────────────────────────
|
|
1184
1310
|
/**
|
|
1185
1311
|
* Format the `--why` stderr block. Stable shape across minor versions; see
|
|
@@ -1367,9 +1493,29 @@ export async function cmdDraft(opts, input, schema, paramsObj, envObj, { fetcher
|
|
|
1367
1493
|
|
|
1368
1494
|
const output = substitute(input.body, result.placeholders, resolved, result.tier);
|
|
1369
1495
|
|
|
1370
|
-
// Write output.
|
|
1371
|
-
|
|
1372
|
-
|
|
1496
|
+
// Write output. Three paths:
|
|
1497
|
+
// (a) docx round-trip: input is .docx and target is .docx (default for .docx
|
|
1498
|
+
// inputs, unless --output is set to a non-.docx extension or `-`).
|
|
1499
|
+
// (b) write text to a file (--output PATH, where PATH ≠ "-").
|
|
1500
|
+
// (c) write text to stdout (no --output, or --output "-").
|
|
1501
|
+
// --json suppresses (c) so it doesn't collide with the JSON payload.
|
|
1502
|
+
const docxOut = decideDocxOutput(opts, input);
|
|
1503
|
+
let writtenPath = null;
|
|
1504
|
+
if (docxOut) {
|
|
1505
|
+
try {
|
|
1506
|
+
const { xml: newXml, warnings: docxWarnings } = substituteDocxXml(
|
|
1507
|
+
input.docxXml, result.placeholders, resolved, result.tier
|
|
1508
|
+
);
|
|
1509
|
+
if (docxWarnings.length) result.warnings.push(...docxWarnings);
|
|
1510
|
+
const buf = await writeDocxBuffer(input.path, newXml);
|
|
1511
|
+
writeFileSync(docxOut.path, buf);
|
|
1512
|
+
writtenPath = docxOut.path;
|
|
1513
|
+
} catch (e) {
|
|
1514
|
+
err.write(paint(`error: could not write ${docxOut.path}: ${e.message}\n`, "red", err));
|
|
1515
|
+
return EXIT.IO;
|
|
1516
|
+
}
|
|
1517
|
+
} else if (opts.output && opts.output !== "-") {
|
|
1518
|
+
try { writeFileSync(opts.output, output, "utf8"); writtenPath = opts.output; }
|
|
1373
1519
|
catch (e) {
|
|
1374
1520
|
err.write(paint(`error: could not write ${opts.output}: ${e.message}\n`, "red", err));
|
|
1375
1521
|
return EXIT.IO;
|
|
@@ -1382,8 +1528,8 @@ export async function cmdDraft(opts, input, schema, paramsObj, envObj, { fetcher
|
|
|
1382
1528
|
out.write(JSON.stringify({
|
|
1383
1529
|
ok: true,
|
|
1384
1530
|
tier: result.tier,
|
|
1385
|
-
output_path:
|
|
1386
|
-
output:
|
|
1531
|
+
output_path: writtenPath,
|
|
1532
|
+
output: writtenPath ? null : output,
|
|
1387
1533
|
placeholders: publicPlaceholders(result.placeholders),
|
|
1388
1534
|
sources,
|
|
1389
1535
|
warnings: result.warnings,
|
|
@@ -1401,7 +1547,7 @@ export async function cmdDraft(opts, input, schema, paramsObj, envObj, { fetcher
|
|
|
1401
1547
|
missing,
|
|
1402
1548
|
unmapped: result.unmapped,
|
|
1403
1549
|
warnings: result.warnings,
|
|
1404
|
-
outputPath:
|
|
1550
|
+
outputPath: writtenPath,
|
|
1405
1551
|
}) + "\n");
|
|
1406
1552
|
}
|
|
1407
1553
|
for (const w of result.warnings) err.write(paint(`warning: ${w}\n`, "yellow", err));
|