@origints/mammoth 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +171 -0
- package/package.json +11 -11
package/README.md
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# @origints/mammoth
|
|
2
|
+
|
|
3
|
+
> DOCX to HTML/text conversion for Origins using mammoth.js.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Why
|
|
8
|
+
|
|
9
|
+
Word documents are everywhere in enterprise workflows, but extracting their content programmatically is challenging. You need to convert them to a usable format while preserving semantic structure.
|
|
10
|
+
|
|
11
|
+
This package wraps mammoth.js and exposes it as Origins transforms. Convert DOCX files to clean HTML or plain text, with full control over style mapping and conversion options.
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
## Features
|
|
16
|
+
|
|
17
|
+
- Convert DOCX to semantic HTML
|
|
18
|
+
- Convert DOCX to plain text
|
|
19
|
+
- Custom style mapping for headings, lists, and more
|
|
20
|
+
- Configurable image handling
|
|
21
|
+
- Conversion warnings and messages
|
|
22
|
+
- Integrates with Origins transform registry
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Quick Start
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
npm install @origints/mammoth @origints/core
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
```ts
|
|
33
|
+
import { Planner, loadFile, run, globalRegistry } from "@origints/core";
|
|
34
|
+
import { docxToHtml, registerMammothTransforms } from "@origints/mammoth";
|
|
35
|
+
|
|
36
|
+
registerMammothTransforms(globalRegistry);
|
|
37
|
+
|
|
38
|
+
const plan = Planner.in(loadFile("document.docx"))
|
|
39
|
+
.mapIn(docxToHtml())
|
|
40
|
+
.emit((out, $) => out.add("html", $.get("html").asString()))
|
|
41
|
+
.compile();
|
|
42
|
+
|
|
43
|
+
const result = await run(plan, {}, globalRegistry);
|
|
44
|
+
|
|
45
|
+
if (result.ok) {
|
|
46
|
+
console.log(result.value.html);
|
|
47
|
+
}
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Expected output:
|
|
51
|
+
|
|
52
|
+
```
|
|
53
|
+
<h1>Document Title</h1><p>Content here...</p>
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## Installation
|
|
59
|
+
|
|
60
|
+
- Supported platforms:
|
|
61
|
+
- macOS / Linux / Windows
|
|
62
|
+
- Runtime requirements:
|
|
63
|
+
- Node.js >= 18
|
|
64
|
+
- Package managers:
|
|
65
|
+
- npm, pnpm, yarn
|
|
66
|
+
- Peer dependencies:
|
|
67
|
+
- @origints/core ^0.1.0
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
npm install @origints/mammoth @origints/core
|
|
71
|
+
# or
|
|
72
|
+
pnpm add @origints/mammoth @origints/core
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
## Usage
|
|
78
|
+
|
|
79
|
+
### Basic HTML conversion
|
|
80
|
+
|
|
81
|
+
```ts
|
|
82
|
+
import { Planner, loadFile, globalRegistry } from "@origints/core";
|
|
83
|
+
import { docxToHtml, registerMammothTransforms } from "@origints/mammoth";
|
|
84
|
+
|
|
85
|
+
registerMammothTransforms(globalRegistry);
|
|
86
|
+
|
|
87
|
+
const plan = Planner.in(loadFile("report.docx"))
|
|
88
|
+
.mapIn(docxToHtml())
|
|
89
|
+
.emit((out, $) => {
|
|
90
|
+
out.add("html", $.get("html").asString());
|
|
91
|
+
out.add("messages", $.get("messages").asArray());
|
|
92
|
+
})
|
|
93
|
+
.compile();
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### Custom style mapping
|
|
97
|
+
|
|
98
|
+
```ts
|
|
99
|
+
const plan = Planner.in(loadFile("document.docx"))
|
|
100
|
+
.mapIn(
|
|
101
|
+
docxToHtml({
|
|
102
|
+
styleMap: [
|
|
103
|
+
"p[style-name='Title'] => h1.document-title",
|
|
104
|
+
"p[style-name='Heading 1'] => h1",
|
|
105
|
+
"p[style-name='Heading 2'] => h2",
|
|
106
|
+
"p[style-name='Quote'] => blockquote",
|
|
107
|
+
],
|
|
108
|
+
})
|
|
109
|
+
)
|
|
110
|
+
.emit((out, $) => out.add("html", $.get("html").asString()))
|
|
111
|
+
.compile();
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### Convert to plain text
|
|
115
|
+
|
|
116
|
+
```ts
|
|
117
|
+
import { docxToText } from "@origints/mammoth";
|
|
118
|
+
|
|
119
|
+
const plan = Planner.in(loadFile("document.docx"))
|
|
120
|
+
.mapIn(docxToText())
|
|
121
|
+
.emit((out, $) => out.add("text", $.get("text").asString()))
|
|
122
|
+
.compile();
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### Image handling options
|
|
126
|
+
|
|
127
|
+
```ts
|
|
128
|
+
const plan = Planner.in(loadFile("document.docx"))
|
|
129
|
+
.mapIn(
|
|
130
|
+
docxToHtml({
|
|
131
|
+
imageHandling: "omit", // or 'base64'
|
|
132
|
+
})
|
|
133
|
+
)
|
|
134
|
+
.emit((out, $) => out.add("html", $.get("html").asString()))
|
|
135
|
+
.compile();
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
---
|
|
139
|
+
|
|
140
|
+
## Project Status
|
|
141
|
+
|
|
142
|
+
- **Experimental** - APIs may change
|
|
143
|
+
|
|
144
|
+
---
|
|
145
|
+
|
|
146
|
+
## Non-Goals
|
|
147
|
+
|
|
148
|
+
- Not a DOCX writer/generator
|
|
149
|
+
- Not a full Word document parser (no styles, comments, etc.)
|
|
150
|
+
- Not a PDF converter
|
|
151
|
+
|
|
152
|
+
---
|
|
153
|
+
|
|
154
|
+
## Documentation
|
|
155
|
+
|
|
156
|
+
- See `@origints/core` for Origins concepts
|
|
157
|
+
- See [mammoth.js](https://www.npmjs.com/package/mammoth) for conversion details
|
|
158
|
+
|
|
159
|
+
---
|
|
160
|
+
|
|
161
|
+
## Contributing
|
|
162
|
+
|
|
163
|
+
- Open an issue before large changes
|
|
164
|
+
- Keep PRs focused
|
|
165
|
+
- Tests required for new features
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
## License
|
|
170
|
+
|
|
171
|
+
MIT
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@origints/mammoth",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.1",
|
|
4
4
|
"description": "DOCX to HTML conversion for Origins using mammoth.js",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
|
@@ -20,6 +20,13 @@
|
|
|
20
20
|
"publishConfig": {
|
|
21
21
|
"access": "public"
|
|
22
22
|
},
|
|
23
|
+
"scripts": {
|
|
24
|
+
"build": "vite build",
|
|
25
|
+
"test": "vitest run",
|
|
26
|
+
"test:coverage": "vitest run --coverage",
|
|
27
|
+
"lint": "eslint \"{src,tests}/**/*.{ts,tsx}\" --max-warnings 0",
|
|
28
|
+
"typecheck": "tsc -p tsconfig.json --noEmit"
|
|
29
|
+
},
|
|
23
30
|
"dependencies": {
|
|
24
31
|
"mammoth": "^1.11.0"
|
|
25
32
|
},
|
|
@@ -27,6 +34,7 @@
|
|
|
27
34
|
"@origints/core": "^0.1.0"
|
|
28
35
|
},
|
|
29
36
|
"devDependencies": {
|
|
37
|
+
"@origints/core": "workspace:*",
|
|
30
38
|
"@types/node": "25.0.6",
|
|
31
39
|
"@vitest/coverage-v8": "^4.0.16",
|
|
32
40
|
"eslint": "9.39.2",
|
|
@@ -34,14 +42,6 @@
|
|
|
34
42
|
"typescript": "5.9.3",
|
|
35
43
|
"vite": "7.3.1",
|
|
36
44
|
"vite-plugin-dts": "4.5.4",
|
|
37
|
-
"vitest": "4.0.16"
|
|
38
|
-
"@origints/core": "0.1.0"
|
|
39
|
-
},
|
|
40
|
-
"scripts": {
|
|
41
|
-
"build": "vite build",
|
|
42
|
-
"test": "vitest run",
|
|
43
|
-
"test:coverage": "vitest run --coverage",
|
|
44
|
-
"lint": "eslint \"{src,tests}/**/*.{ts,tsx}\" --max-warnings 0",
|
|
45
|
-
"typecheck": "tsc -p tsconfig.json --noEmit"
|
|
45
|
+
"vitest": "4.0.16"
|
|
46
46
|
}
|
|
47
|
-
}
|
|
47
|
+
}
|