docx-to-builder 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +177 -0
- package/cli.js +38 -0
- package/docx-to-builder.py +1268 -0
- package/examples/sample-builder.js +943 -0
- package/examples/sample-template.docx +0 -0
- package/package.json +45 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Jeremy Morrison
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
# docx-to-builder
|
|
2
|
+
|
|
3
|
+
> Parse any `.docx` template and generate a ready-to-run JavaScript builder that reproduces it exactly.
|
|
4
|
+
|
|
5
|
+
You have a branded Word template. You want to generate documents from it programmatically — with AI-written content, dynamic data, or automation pipelines. But Pandoc can't faithfully reproduce your layout, and docxtemplater requires manually adding `{placeholders}` to every field.
|
|
6
|
+
|
|
7
|
+
**docx-to-builder** takes a different approach: it reads your `.docx` template's raw XML, understands every formatting decision, and writes a JavaScript file that recreates that exact layout using the [`docx`](https://docx.js.org) npm package.
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## How it works
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
your-template.docx → [docx-to-builder] → your-template-builder.js
|
|
15
|
+
↓
|
|
16
|
+
node your-template-builder.js
|
|
17
|
+
↓
|
|
18
|
+
branded-output.docx ✓
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
1. **Parse** — reads the `.docx` XML directly (no third-party Python libraries needed)
|
|
22
|
+
2. **Extract** — captures colors, fonts, spacing, borders, tables, images, headers, footers
|
|
23
|
+
3. **Infer** — detects `[bracketed placeholders]` and maps them to `data.fieldName` references
|
|
24
|
+
4. **Generate** — writes a complete ES module with `buildDocument(data, outputPath)`
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Quick start
|
|
29
|
+
|
|
30
|
+
**Requirements:** Python 3.8+ · Node.js · [`docx`](https://www.npmjs.com/package/docx) npm package
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
# 1. Generate a builder from your template
|
|
34
|
+
python3 docx-to-builder.py my-template.docx
|
|
35
|
+
|
|
36
|
+
# 2. Install docx in your project (if you haven't already)
|
|
37
|
+
npm install docx
|
|
38
|
+
|
|
39
|
+
# 3. Run the builder
|
|
40
|
+
node my-template-builder.js
|
|
41
|
+
|
|
42
|
+
# Output: my-template-output.docx
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Or install via npm and run with npx:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
npx docx-to-builder my-template.docx
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Template conventions
|
|
54
|
+
|
|
55
|
+
Your template can use any Word formatting. To make fields dynamic, use bracketed placeholders:
|
|
56
|
+
|
|
57
|
+
| In your template | Generated JS |
|
|
58
|
+
|---|---|
|
|
59
|
+
| `[Client Name]` | `data.clientName` |
|
|
60
|
+
| `[Proposal Title]` | `data.title` |
|
|
61
|
+
| `[Month Day, Year]` | `data.date` |
|
|
62
|
+
| `[Draft / Final]` | `data.status` |
|
|
63
|
+
| `[Your custom field]` | `data.yourCustomField` |
|
|
64
|
+
|
|
65
|
+
Any text **not** in brackets is treated as a static label and kept as-is.
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
69
|
+
## What gets extracted
|
|
70
|
+
|
|
71
|
+
| Template element | Extracted |
|
|
72
|
+
|---|---|
|
|
73
|
+
| Body paragraphs | Text, font, size, color, bold, italic, all-caps, spacing, alignment |
|
|
74
|
+
| Section headings | With red rule lines, borders, spacing |
|
|
75
|
+
| Bullet lists | Level, indent, font |
|
|
76
|
+
| Tables | Cell widths, shading, borders, margins, content |
|
|
77
|
+
| Inline images | Size (EMU → pt), relationship to file, auto-copied to `assets/` |
|
|
78
|
+
| Headers & footers | Text, tab stops, border rules, page number fields |
|
|
79
|
+
| Page margins | Per-section |
|
|
80
|
+
| Brand colors | Auto-named constants (`COLOR.accent`, `COLOR.body`, etc.) |
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
## Generated file structure
|
|
85
|
+
|
|
86
|
+
```js
|
|
87
|
+
// Brand colors extracted from template
|
|
88
|
+
const COLOR = { accent: '2C4A6E', body: '444444', ... };
|
|
89
|
+
const FONT = 'Calibri';
|
|
90
|
+
const LOGO_PATH = join(__dirname, 'assets/logo.png');
|
|
91
|
+
|
|
92
|
+
// Header and footer — exact replica of template
|
|
93
|
+
function buildHeader(data) { ... }
|
|
94
|
+
function buildFooter() { ... }
|
|
95
|
+
|
|
96
|
+
// Document body — all layout hardcoded, bracketed fields wired to data object
|
|
97
|
+
function buildContent(data) { ... }
|
|
98
|
+
|
|
99
|
+
// Main entry point — call this from your code or pipeline
|
|
100
|
+
export async function buildDocument(data, outputPath) { ... }
|
|
101
|
+
|
|
102
|
+
// Data object — auto-populated from inferred placeholders
|
|
103
|
+
const data = {
|
|
104
|
+
title: 'Proposal Title',
|
|
105
|
+
clientName: 'Client Name',
|
|
106
|
+
date: '...',
|
|
107
|
+
};
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
## Wiring in your own content
|
|
113
|
+
|
|
114
|
+
After generation, the `data` object at the bottom of the file maps directly to the bracketed fields the generator found. Replace the example values with your real data — or import `buildDocument` and pass a data object from your own code:
|
|
115
|
+
|
|
116
|
+
```js
|
|
117
|
+
import { buildDocument } from './my-template-builder.js';
|
|
118
|
+
|
|
119
|
+
await buildDocument({
|
|
120
|
+
title: 'Cloud Migration Proposal',
|
|
121
|
+
clientName: 'Acme Corp',
|
|
122
|
+
date: 'April 1, 2026',
|
|
123
|
+
status: 'Final',
|
|
124
|
+
}, 'output/acme-proposal.docx');
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
---
|
|
128
|
+
|
|
129
|
+
## Example
|
|
130
|
+
|
|
131
|
+
The `examples/` folder contains:
|
|
132
|
+
|
|
133
|
+
- `sample-template.docx` — a generic proposal template (Meridian Consulting)
|
|
134
|
+
- `sample-builder.js` — the builder generated from that template
|
|
135
|
+
|
|
136
|
+
Run the example:
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
cd examples
|
|
140
|
+
npm install docx
|
|
141
|
+
node sample-builder.js
|
|
142
|
+
# → output/sample-output.docx
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
---
|
|
146
|
+
|
|
147
|
+
## Limitations
|
|
148
|
+
|
|
149
|
+
- **Inline images** — fully extracted. The image file is automatically copied to `assets/` next to the generated builder — no manual step needed
|
|
150
|
+
- **Complex graphics** — SmartArt, shapes, charts, and WordArt use a different XML format and are skipped; the surrounding paragraph text is preserved
|
|
151
|
+
- **Multi-section documents** — section breaks are noted as comments; the generated builder uses a single section (the last one's margins). Multi-section support is on the roadmap
|
|
152
|
+
- **Dynamic content** — the generator wires up bracketed placeholders automatically, but long static template paragraphs become literal strings. Replace them with `data.fieldName` references for full dynamic control
|
|
153
|
+
- **Table of contents** — TOC fields are not regenerated
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## CLI options
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
python3 docx-to-builder.py <template.docx> [--output <builder.js>]
|
|
161
|
+
|
|
162
|
+
# Examples:
|
|
163
|
+
python3 docx-to-builder.py proposal.docx
|
|
164
|
+
python3 docx-to-builder.py proposal.docx --output src/builders/proposal-builder.js
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
## Contributing
|
|
170
|
+
|
|
171
|
+
Issues and PRs welcome. If your template produces unexpected output, open an issue and attach the template (or a sanitized version of it).
|
|
172
|
+
|
|
173
|
+
---
|
|
174
|
+
|
|
175
|
+
## License
|
|
176
|
+
|
|
177
|
+
MIT — see [LICENSE](LICENSE)
|
package/cli.js
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* docx-to-builder CLI wrapper
|
|
4
|
+
* Calls the Python script with all arguments passed through.
|
|
5
|
+
*/
|
|
6
|
+
import { spawn } from 'child_process';
|
|
7
|
+
import { fileURLToPath } from 'url';
|
|
8
|
+
import { dirname, join } from 'path';
|
|
9
|
+
|
|
10
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
11
|
+
const script = join(__dirname, 'docx-to-builder.py');
|
|
12
|
+
const args = process.argv.slice(2);
|
|
13
|
+
|
|
14
|
+
if (args.length === 0) {
|
|
15
|
+
console.log('Usage: npx docx-to-builder <template.docx> [--output <builder.js>]');
|
|
16
|
+
console.log('');
|
|
17
|
+
console.log('Examples:');
|
|
18
|
+
console.log(' npx docx-to-builder my-template.docx');
|
|
19
|
+
console.log(' npx docx-to-builder my-template.docx --output src/my-builder.js');
|
|
20
|
+
process.exit(0);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
const python = process.platform === 'win32' ? 'python' : 'python3';
|
|
24
|
+
const child = spawn(python, [script, ...args], { stdio: 'inherit' });
|
|
25
|
+
|
|
26
|
+
child.on('error', (err) => {
|
|
27
|
+
if (err.code === 'ENOENT') {
|
|
28
|
+
console.error(`Error: Python not found. Please install Python 3.8+ and ensure it is in your PATH.`);
|
|
29
|
+
console.error(` Download: https://www.python.org/downloads/`);
|
|
30
|
+
} else {
|
|
31
|
+
console.error('Error:', err.message);
|
|
32
|
+
}
|
|
33
|
+
process.exit(1);
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
child.on('close', (code) => {
|
|
37
|
+
process.exit(code ?? 0);
|
|
38
|
+
});
|