@origints/mammoth 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/README.md +171 -0
  2. package/package.json +11 -11
package/README.md ADDED
@@ -0,0 +1,171 @@
1
+ # @origints/mammoth
2
+
3
+ > DOCX to HTML/text conversion for Origins using mammoth.js.
4
+
5
+ ---
6
+
7
+ ## Why
8
+
9
+ Word documents are everywhere in enterprise workflows, but extracting their content programmatically is challenging. You need to convert them to a usable format while preserving semantic structure.
10
+
11
+ This package wraps mammoth.js and exposes it as Origins transforms. Convert DOCX files to clean HTML or plain text, with full control over style mapping and conversion options.
12
+
13
+ ---
14
+
15
+ ## Features
16
+
17
+ - Convert DOCX to semantic HTML
18
+ - Convert DOCX to plain text
19
+ - Custom style mapping for headings, lists, and more
20
+ - Configurable image handling
21
+ - Conversion warnings and messages
22
+ - Integrates with Origins transform registry
23
+
24
+ ---
25
+
26
+ ## Quick Start
27
+
28
+ ```bash
29
+ npm install @origints/mammoth @origints/core
30
+ ```
31
+
32
+ ```ts
33
+ import { Planner, loadFile, run, globalRegistry } from "@origints/core";
34
+ import { docxToHtml, registerMammothTransforms } from "@origints/mammoth";
35
+
36
+ registerMammothTransforms(globalRegistry);
37
+
38
+ const plan = Planner.in(loadFile("document.docx"))
39
+ .mapIn(docxToHtml())
40
+ .emit((out, $) => out.add("html", $.get("html").asString()))
41
+ .compile();
42
+
43
+ const result = await run(plan, {}, globalRegistry);
44
+
45
+ if (result.ok) {
46
+ console.log(result.value.html);
47
+ }
48
+ ```
49
+
50
+ Expected output:
51
+
52
+ ```
53
+ <h1>Document Title</h1><p>Content here...</p>
54
+ ```
55
+
56
+ ---
57
+
58
+ ## Installation
59
+
60
+ - Supported platforms:
61
+ - macOS / Linux / Windows
62
+ - Runtime requirements:
63
+ - Node.js >= 18
64
+ - Package managers:
65
+ - npm, pnpm, yarn
66
+ - Peer dependencies:
67
+ - @origints/core ^0.1.0
68
+
69
+ ```bash
70
+ npm install @origints/mammoth @origints/core
71
+ # or
72
+ pnpm add @origints/mammoth @origints/core
73
+ ```
74
+
75
+ ---
76
+
77
+ ## Usage
78
+
79
+ ### Basic HTML conversion
80
+
81
+ ```ts
82
+ import { Planner, loadFile, globalRegistry } from "@origints/core";
83
+ import { docxToHtml, registerMammothTransforms } from "@origints/mammoth";
84
+
85
+ registerMammothTransforms(globalRegistry);
86
+
87
+ const plan = Planner.in(loadFile("report.docx"))
88
+ .mapIn(docxToHtml())
89
+ .emit((out, $) => {
90
+ out.add("html", $.get("html").asString());
91
+ out.add("messages", $.get("messages").asArray());
92
+ })
93
+ .compile();
94
+ ```
95
+
96
+ ### Custom style mapping
97
+
98
+ ```ts
99
+ const plan = Planner.in(loadFile("document.docx"))
100
+ .mapIn(
101
+ docxToHtml({
102
+ styleMap: [
103
+ "p[style-name='Title'] => h1.document-title",
104
+ "p[style-name='Heading 1'] => h1",
105
+ "p[style-name='Heading 2'] => h2",
106
+ "p[style-name='Quote'] => blockquote",
107
+ ],
108
+ })
109
+ )
110
+ .emit((out, $) => out.add("html", $.get("html").asString()))
111
+ .compile();
112
+ ```
113
+
114
+ ### Convert to plain text
115
+
116
+ ```ts
117
+ import { docxToText } from "@origints/mammoth";
118
+
119
+ const plan = Planner.in(loadFile("document.docx"))
120
+ .mapIn(docxToText())
121
+ .emit((out, $) => out.add("text", $.get("text").asString()))
122
+ .compile();
123
+ ```
124
+
125
+ ### Image handling options
126
+
127
+ ```ts
128
+ const plan = Planner.in(loadFile("document.docx"))
129
+ .mapIn(
130
+ docxToHtml({
131
+ imageHandling: "omit", // or 'base64'
132
+ })
133
+ )
134
+ .emit((out, $) => out.add("html", $.get("html").asString()))
135
+ .compile();
136
+ ```
137
+
138
+ ---
139
+
140
+ ## Project Status
141
+
142
+ - **Experimental** - APIs may change
143
+
144
+ ---
145
+
146
+ ## Non-Goals
147
+
148
+ - Not a DOCX writer/generator
149
+ - Not a full Word document parser (no styles, comments, etc.)
150
+ - Not a PDF converter
151
+
152
+ ---
153
+
154
+ ## Documentation
155
+
156
+ - See `@origints/core` for Origins concepts
157
+ - See [mammoth.js](https://www.npmjs.com/package/mammoth) for conversion details
158
+
159
+ ---
160
+
161
+ ## Contributing
162
+
163
+ - Open an issue before large changes
164
+ - Keep PRs focused
165
+ - Tests required for new features
166
+
167
+ ---
168
+
169
+ ## License
170
+
171
+ MIT
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@origints/mammoth",
3
- "version": "0.1.0",
3
+ "version": "0.1.1",
4
4
  "description": "DOCX to HTML conversion for Origins using mammoth.js",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -20,6 +20,13 @@
20
20
  "publishConfig": {
21
21
  "access": "public"
22
22
  },
23
+ "scripts": {
24
+ "build": "vite build",
25
+ "test": "vitest run",
26
+ "test:coverage": "vitest run --coverage",
27
+ "lint": "eslint \"{src,tests}/**/*.{ts,tsx}\" --max-warnings 0",
28
+ "typecheck": "tsc -p tsconfig.json --noEmit"
29
+ },
23
30
  "dependencies": {
24
31
  "mammoth": "^1.11.0"
25
32
  },
@@ -27,6 +34,7 @@
27
34
  "@origints/core": "^0.1.0"
28
35
  },
29
36
  "devDependencies": {
37
+ "@origints/core": "workspace:*",
30
38
  "@types/node": "25.0.6",
31
39
  "@vitest/coverage-v8": "^4.0.16",
32
40
  "eslint": "9.39.2",
@@ -34,14 +42,6 @@
34
42
  "typescript": "5.9.3",
35
43
  "vite": "7.3.1",
36
44
  "vite-plugin-dts": "4.5.4",
37
- "vitest": "4.0.16",
38
- "@origints/core": "0.1.0"
39
- },
40
- "scripts": {
41
- "build": "vite build",
42
- "test": "vitest run",
43
- "test:coverage": "vitest run --coverage",
44
- "lint": "eslint \"{src,tests}/**/*.{ts,tsx}\" --max-warnings 0",
45
- "typecheck": "tsc -p tsconfig.json --noEmit"
45
+ "vitest": "4.0.16"
46
46
  }
47
- }
47
+ }