@origints/mammoth 0.1.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +75 -97
- package/dist/convert.d.ts +6 -6
- package/dist/index.cjs +1 -1
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +5 -8
- package/dist/index.es.js +21 -19
- package/dist/index.es.js.map +1 -1
- package/package.json +21 -2
package/README.md
CHANGED
|
@@ -4,14 +4,6 @@
|
|
|
4
4
|
|
|
5
5
|
---
|
|
6
6
|
|
|
7
|
-
## Why
|
|
8
|
-
|
|
9
|
-
Word documents are everywhere in enterprise workflows, but extracting their content programmatically is challenging. You need to convert them to a usable format while preserving semantic structure.
|
|
10
|
-
|
|
11
|
-
This package wraps mammoth.js and exposes it as Origins transforms. Convert DOCX files to clean HTML or plain text, with full control over style mapping and conversion options.
|
|
12
|
-
|
|
13
|
-
---
|
|
14
|
-
|
|
15
7
|
## Features
|
|
16
8
|
|
|
17
9
|
- Convert DOCX to semantic HTML
|
|
@@ -23,80 +15,39 @@ This package wraps mammoth.js and exposes it as Origins transforms. Convert DOCX
|
|
|
23
15
|
|
|
24
16
|
---
|
|
25
17
|
|
|
26
|
-
## Quick Start
|
|
27
|
-
|
|
28
|
-
```bash
|
|
29
|
-
npm install @origints/mammoth @origints/core
|
|
30
|
-
```
|
|
31
|
-
|
|
32
|
-
```ts
|
|
33
|
-
import { Planner, loadFile, run, globalRegistry } from "@origints/core";
|
|
34
|
-
import { docxToHtml, registerMammothTransforms } from "@origints/mammoth";
|
|
35
|
-
|
|
36
|
-
registerMammothTransforms(globalRegistry);
|
|
37
|
-
|
|
38
|
-
const plan = Planner.in(loadFile("document.docx"))
|
|
39
|
-
.mapIn(docxToHtml())
|
|
40
|
-
.emit((out, $) => out.add("html", $.get("html").asString()))
|
|
41
|
-
.compile();
|
|
42
|
-
|
|
43
|
-
const result = await run(plan, {}, globalRegistry);
|
|
44
|
-
|
|
45
|
-
if (result.ok) {
|
|
46
|
-
console.log(result.value.html);
|
|
47
|
-
}
|
|
48
|
-
```
|
|
49
|
-
|
|
50
|
-
Expected output:
|
|
51
|
-
|
|
52
|
-
```
|
|
53
|
-
<h1>Document Title</h1><p>Content here...</p>
|
|
54
|
-
```
|
|
55
|
-
|
|
56
|
-
---
|
|
57
|
-
|
|
58
18
|
## Installation
|
|
59
19
|
|
|
60
|
-
- Supported platforms:
|
|
61
|
-
- macOS / Linux / Windows
|
|
62
|
-
- Runtime requirements:
|
|
63
|
-
- Node.js >= 18
|
|
64
|
-
- Package managers:
|
|
65
|
-
- npm, pnpm, yarn
|
|
66
|
-
- Peer dependencies:
|
|
67
|
-
- @origints/core ^0.1.0
|
|
68
|
-
|
|
69
20
|
```bash
|
|
70
21
|
npm install @origints/mammoth @origints/core
|
|
71
|
-
# or
|
|
72
|
-
pnpm add @origints/mammoth @origints/core
|
|
73
22
|
```
|
|
74
23
|
|
|
75
24
|
---
|
|
76
25
|
|
|
77
|
-
## Usage
|
|
26
|
+
## Usage with Planner
|
|
78
27
|
|
|
79
|
-
###
|
|
28
|
+
### Convert a DOCX file and extract the HTML
|
|
80
29
|
|
|
81
30
|
```ts
|
|
82
|
-
import { Planner, loadFile,
|
|
83
|
-
import { docxToHtml
|
|
84
|
-
|
|
85
|
-
registerMammothTransforms(globalRegistry);
|
|
31
|
+
import { Planner, loadFile, run } from '@origints/core'
|
|
32
|
+
import { docxToHtml } from '@origints/mammoth'
|
|
86
33
|
|
|
87
|
-
const plan = Planner
|
|
34
|
+
const plan = new Planner()
|
|
35
|
+
.in(loadFile('document.docx'))
|
|
88
36
|
.mapIn(docxToHtml())
|
|
89
|
-
.emit((out, $) =>
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
37
|
+
.emit((out, $) => out.add('html', $.get('html').string()))
|
|
38
|
+
.compile()
|
|
39
|
+
|
|
40
|
+
const result = await run(plan, { readFile, registry })
|
|
41
|
+
// result.value: { html: '<h1>Title</h1><p>Content...</p>' }
|
|
94
42
|
```
|
|
95
43
|
|
|
96
|
-
###
|
|
44
|
+
### Convert with custom style mapping
|
|
97
45
|
|
|
98
46
|
```ts
|
|
99
|
-
|
|
47
|
+
import { docxToHtml } from '@origints/mammoth'
|
|
48
|
+
|
|
49
|
+
const plan = new Planner()
|
|
50
|
+
.in(loadFile('report.docx'))
|
|
100
51
|
.mapIn(
|
|
101
52
|
docxToHtml({
|
|
102
53
|
styleMap: [
|
|
@@ -105,64 +56,91 @@ const plan = Planner.in(loadFile("document.docx"))
|
|
|
105
56
|
"p[style-name='Heading 2'] => h2",
|
|
106
57
|
"p[style-name='Quote'] => blockquote",
|
|
107
58
|
],
|
|
59
|
+
idPrefix: 'doc-',
|
|
108
60
|
})
|
|
109
61
|
)
|
|
110
|
-
.emit((out, $) => out.add(
|
|
111
|
-
.compile()
|
|
62
|
+
.emit((out, $) => out.add('content', $.get('html').string()))
|
|
63
|
+
.compile()
|
|
112
64
|
```
|
|
113
65
|
|
|
114
|
-
###
|
|
66
|
+
### Extract plain text from a DOCX file
|
|
115
67
|
|
|
116
68
|
```ts
|
|
117
|
-
import { docxToText } from
|
|
69
|
+
import { docxToText } from '@origints/mammoth'
|
|
118
70
|
|
|
119
|
-
const plan = Planner
|
|
71
|
+
const plan = new Planner()
|
|
72
|
+
.in(loadFile('document.docx'))
|
|
120
73
|
.mapIn(docxToText())
|
|
121
|
-
.emit((out, $) => out.add(
|
|
122
|
-
.compile()
|
|
74
|
+
.emit((out, $) => out.add('text', $.get('text').string()))
|
|
75
|
+
.compile()
|
|
76
|
+
|
|
77
|
+
const result = await run(plan, { readFile, registry })
|
|
78
|
+
// result.value: { text: 'Document Title\nContent here...' }
|
|
123
79
|
```
|
|
124
80
|
|
|
125
|
-
###
|
|
81
|
+
### Combine DOCX with other sources
|
|
126
82
|
|
|
127
83
|
```ts
|
|
128
|
-
const plan = Planner
|
|
129
|
-
.
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
84
|
+
const plan = new Planner()
|
|
85
|
+
.in(loadFile('report.docx'))
|
|
86
|
+
.mapIn(docxToHtml())
|
|
87
|
+
.emit((out, $) => out.add('reportHtml', $.get('html').string()))
|
|
88
|
+
.in(loadFile('metadata.json'))
|
|
89
|
+
.mapIn(parseJson())
|
|
90
|
+
.emit((out, $) =>
|
|
91
|
+
out
|
|
92
|
+
.add('author', $.get('author').string())
|
|
93
|
+
.add('date', $.get('date').string())
|
|
133
94
|
)
|
|
134
|
-
.
|
|
135
|
-
.compile();
|
|
95
|
+
.compile()
|
|
136
96
|
```
|
|
137
97
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
## Project Status
|
|
98
|
+
### Standalone usage (without Planner)
|
|
141
99
|
|
|
142
|
-
|
|
100
|
+
```ts
|
|
101
|
+
import * as fs from 'fs'
|
|
102
|
+
import { docxToHtmlImpl, docxToTextImpl } from '@origints/mammoth'
|
|
143
103
|
|
|
144
|
-
|
|
104
|
+
const buffer = fs.readFileSync('document.docx')
|
|
145
105
|
|
|
146
|
-
|
|
106
|
+
// Convert to HTML
|
|
107
|
+
const htmlResult = await docxToHtmlImpl.execute(buffer)
|
|
108
|
+
console.log(htmlResult.html)
|
|
147
109
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
110
|
+
// Log conversion warnings
|
|
111
|
+
for (const msg of htmlResult.messages) {
|
|
112
|
+
console.warn(msg.message)
|
|
113
|
+
}
|
|
151
114
|
|
|
152
|
-
|
|
115
|
+
// Convert to plain text
|
|
116
|
+
const textResult = await docxToTextImpl.execute(buffer)
|
|
117
|
+
console.log(textResult.text)
|
|
118
|
+
```
|
|
153
119
|
|
|
154
|
-
|
|
120
|
+
### Image handling
|
|
155
121
|
|
|
156
|
-
|
|
157
|
-
|
|
122
|
+
```ts
|
|
123
|
+
import { docxToHtml } from '@origints/mammoth'
|
|
124
|
+
|
|
125
|
+
// Omit images
|
|
126
|
+
const plan = new Planner()
|
|
127
|
+
.in(loadFile('document.docx'))
|
|
128
|
+
.mapIn(docxToHtml({ imageHandling: 'omit' }))
|
|
129
|
+
.emit((out, $) => out.add('html', $.get('html').string()))
|
|
130
|
+
.compile()
|
|
131
|
+
```
|
|
158
132
|
|
|
159
133
|
---
|
|
160
134
|
|
|
161
|
-
##
|
|
135
|
+
## API
|
|
162
136
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
137
|
+
| Export | Description |
|
|
138
|
+
| ------------------------------------- | -------------------------------------------------- |
|
|
139
|
+
| `docxToHtml(options?)` | Create a transform AST for HTML conversion |
|
|
140
|
+
| `docxToText(options?)` | Create a transform AST for text conversion |
|
|
141
|
+
| `docxToHtmlImpl` | Async transform implementation for HTML conversion |
|
|
142
|
+
| `docxToTextImpl` | Async transform implementation for text conversion |
|
|
143
|
+
| `registerMammothTransforms(registry)` | Register all mammoth transforms with a registry |
|
|
166
144
|
|
|
167
145
|
---
|
|
168
146
|
|
package/dist/convert.d.ts
CHANGED
|
@@ -5,15 +5,15 @@ import { DocxToHtmlOptions, DocxToTextOptions } from './options';
|
|
|
5
5
|
*
|
|
6
6
|
* @example
|
|
7
7
|
* ```ts
|
|
8
|
-
* const plan = Planner.in(loadFile('document.docx'))
|
|
8
|
+
* const plan = new Planner().in(loadFile('document.docx'))
|
|
9
9
|
* .mapIn(docxToHtml())
|
|
10
|
-
* .emit((out, $) => out.add('html', $.get('html').
|
|
10
|
+
* .emit((out, $) => out.add('html', $.get('html').string()))
|
|
11
11
|
* .compile()
|
|
12
12
|
* ```
|
|
13
13
|
*
|
|
14
14
|
* @example With custom style mapping
|
|
15
15
|
* ```ts
|
|
16
|
-
* const plan = Planner.in(loadFile('document.docx'))
|
|
16
|
+
* const plan = new Planner().in(loadFile('document.docx'))
|
|
17
17
|
* .mapIn(docxToHtml({
|
|
18
18
|
* styleMap: [
|
|
19
19
|
* "p[style-name='Title'] => h1.document-title",
|
|
@@ -21,7 +21,7 @@ import { DocxToHtmlOptions, DocxToTextOptions } from './options';
|
|
|
21
21
|
* ],
|
|
22
22
|
* idPrefix: 'doc-',
|
|
23
23
|
* }))
|
|
24
|
-
* .emit((out, $) => out.add('content', $.get('html').
|
|
24
|
+
* .emit((out, $) => out.add('content', $.get('html').string()))
|
|
25
25
|
* .compile()
|
|
26
26
|
* ```
|
|
27
27
|
*/
|
|
@@ -31,9 +31,9 @@ export declare function docxToHtml(options?: DocxToHtmlOptions): TransformAst;
|
|
|
31
31
|
*
|
|
32
32
|
* @example
|
|
33
33
|
* ```ts
|
|
34
|
-
* const plan = Planner.in(loadFile('document.docx'))
|
|
34
|
+
* const plan = new Planner().in(loadFile('document.docx'))
|
|
35
35
|
* .mapIn(docxToText())
|
|
36
|
-
* .emit((out, $) => out.add('text', $.get('text').
|
|
36
|
+
* .emit((out, $) => out.add('text', $.get('text').string()))
|
|
37
37
|
* .compile()
|
|
38
38
|
* ```
|
|
39
39
|
*/
|
package/dist/index.cjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
"use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const m=require("mammoth");async function s(e){const r=e.getReader(),t=[];try{for(;;){const{done:a,value:n}=await r.read();if(a)break;t.push(n)}return Buffer.concat(t)}finally{r.releaseLock()}}function
|
|
1
|
+
"use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const m=require("mammoth"),o=require("@origints/core");async function s(e){const r=e.getReader(),t=[];try{for(;;){const{done:a,value:n}=await r.read();if(a)break;t.push(n)}return Buffer.concat(t)}finally{r.releaseLock()}}function g(e){return{kind:"transform",namespace:"@origints/mammoth",name:"docxToHtml",args:p(e)}}function y(e){return{kind:"transform",namespace:"@origints/mammoth",name:"docxToText",args:e}}const c={namespace:"@origints/mammoth",name:"docxToHtml",async execute(e,r){const t=x(r),a=await d(e),n=T(t),i=await m.convertToHtml({buffer:a},n);return{html:i.value,messages:i.messages.map(f)}}},l={namespace:"@origints/mammoth",name:"docxToText",async execute(e){const r=await d(e),t=await m.extractRawText({buffer:r});return{text:t.value,messages:t.messages.map(f)}}};function p(e){if(!e)return;const r={};return e.styleMap&&(r.styleMap=e.styleMap),e.includeEmbeddedStyleMap!==void 0&&(r.includeEmbeddedStyleMap=e.includeEmbeddedStyleMap),e.includeDefaultStyleMap!==void 0&&(r.includeDefaultStyleMap=e.includeDefaultStyleMap),e.idPrefix&&(r.idPrefix=e.idPrefix),(e.imageHandling==="inline"||e.imageHandling==="omit")&&(r.imageHandling=e.imageHandling),e.preserveEmptyParagraphs!==void 0&&(r.preserveEmptyParagraphs=e.preserveEmptyParagraphs),Object.keys(r).length>0?r:void 0}function x(e){return e||{}}async function d(e){if(Buffer.isBuffer(e))return e;if(e instanceof ReadableStream)return s(e);throw new Error(`docxToHtml expects Buffer or ReadableStream input, got ${typeof e}`)}function T(e){const r={};return e.styleMap&&(r.styleMap=e.styleMap),e.includeEmbeddedStyleMap!==void 0&&(r.includeEmbeddedStyleMap=e.includeEmbeddedStyleMap),e.includeDefaultStyleMap!==void 0&&(r.includeDefaultStyleMap=e.includeDefaultStyleMap),e.idPrefix&&(r.idPrefix=e.idPrefix),e.preserveEmptyParagraphs===!1?r.ignoreEmptyParagraphs=!0:e.preserveEmptyParagraphs===!0&&(r.ignoreEmptyParagraphs=!1),e.imageHandling&&(r.convertImage=M(e.imageHandling)),e.transformDocument&&(r.transformDocument=e.transformDocument),r}function M(e){return e==="omit"?m.images.imgElement(()=>Promise.resolve({src:""})):e==="inline"?m.images.imgElement(r=>r.readAsBase64String().then(t=>({src:`data:${r.contentType};base64,${t}`}))):m.images.imgElement(async r=>{const t={contentType:r.contentType,read:(n=>n==="base64"?r.readAsBase64String():r.readAsArrayBuffer())},a=await e(t);return a===null?{src:""}:a})}function f(e){return{type:e.type==="error"?"error":"warning",message:e.message}}function u(e){e.register(c),e.register(l)}u(o.globalRegistry);exports.docxToHtml=g;exports.docxToHtmlImpl=c;exports.docxToText=y;exports.docxToTextImpl=l;exports.registerMammothTransforms=u;exports.streamToBuffer=s;
|
|
2
2
|
//# sourceMappingURL=index.cjs.map
|
package/dist/index.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.cjs","sources":["../src/util.ts","../src/convert.ts","../src/index.ts"],"sourcesContent":["/**\n * Utility functions for mammoth package.\n *\n * @module mammoth/util\n */\n\n/**\n * Convert a ReadableStream<Uint8Array> to a Buffer.\n */\nexport async function streamToBuffer(\n stream: ReadableStream<Uint8Array>\n): Promise<Buffer> {\n const reader = stream.getReader()\n const chunks: Uint8Array[] = []\n\n try {\n while (true) {\n const { done, value } = await reader.read()\n if (done) break\n chunks.push(value)\n }\n return Buffer.concat(chunks)\n } finally {\n reader.releaseLock()\n }\n}\n","/**\n * DOCX to HTML conversion transform for Origins.\n *\n * @module mammoth/convert\n */\n\nimport mammoth from 'mammoth'\nimport type { TransformAst, TransformImpl } from '@origints/core'\nimport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageHandler,\n} from './options'\nimport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n} from './result'\nimport { streamToBuffer } from './util'\n\n/**\n * Mammoth options type extracted from the library.\n */\ninterface MammothOptions {\n styleMap?: string | string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n convertImage?: MammothImageConverter\n ignoreEmptyParagraphs?: boolean\n idPrefix?: string\n transformDocument?: (element: unknown) => unknown\n}\n\n/**\n * Mammoth image converter (opaque branded type).\n */\ninterface MammothImageConverter {\n __mammothBrand: 'ImageConverter'\n}\n\n/**\n * Mammoth image interface for custom converters.\n */\ninterface MammothImage {\n contentType: string\n readAsBase64String: () => Promise<string>\n readAsBuffer: () => Promise<Buffer>\n readAsArrayBuffer: () => Promise<ArrayBuffer>\n}\n\n/**\n * Creates a TransformAst for converting DOCX to HTML.\n *\n * @example\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').asString()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Subtitle'] => h2.document-subtitle\",\n * ],\n * idPrefix: 'doc-',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').asString()))\n * .compile()\n * ```\n */\nexport function docxToHtml(options?: DocxToHtmlOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n args: serializeOptions(options),\n }\n}\n\n/**\n * Creates a TransformAst for extracting raw text from DOCX.\n *\n * @example\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToText())\n * .emit((out, $) => out.add('text', $.get('text').asString()))\n * .compile()\n * ```\n */\nexport function docxToText(options?: DocxToTextOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToText',\n args: options,\n }\n}\n\n/**\n * Transform implementation for docxToHtml.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToHtmlImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n\n async execute(\n input: unknown,\n args?: unknown\n ): Promise<DocxConversionResult> {\n const options = deserializeOptions(\n args as SerializedDocxOptions | undefined\n )\n const buffer = await toBuffer(input)\n const mammothOptions = toMammothOptions(options)\n\n const result = await mammoth.convertToHtml({ buffer }, mammothOptions)\n\n return {\n html: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n/**\n * Transform implementation for docxToText.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToTextImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToText',\n\n async execute(input: unknown): Promise<DocxTextResult> {\n const buffer = await toBuffer(input)\n\n // Note: extractRawText doesn't accept options in mammoth's API\n const result = await mammoth.extractRawText({ buffer })\n\n return {\n text: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n// ---------------------------------------------------------------------------\n// Internal helpers\n// ---------------------------------------------------------------------------\n\n/**\n * Serialized options that can be stored in TransformAst.args.\n * Function handlers are converted to string identifiers.\n */\ninterface SerializedDocxOptions {\n styleMap?: string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n idPrefix?: string\n imageHandling?: 'inline' | 'omit'\n preserveEmptyParagraphs?: boolean\n // Note: transformDocument and custom imageHandling functions cannot be serialized\n}\n\n/**\n * Serialize options for storage in TransformAst.\n * Custom functions cannot be serialized and are dropped.\n */\nfunction serializeOptions(\n options?: DocxToHtmlOptions\n): SerializedDocxOptions | undefined {\n if (!options) return undefined\n\n const serialized: SerializedDocxOptions = {}\n\n if (options.styleMap) {\n serialized.styleMap = options.styleMap\n }\n if (options.includeEmbeddedStyleMap !== undefined) {\n serialized.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n if (options.includeDefaultStyleMap !== undefined) {\n serialized.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n if (options.idPrefix) {\n serialized.idPrefix = options.idPrefix\n }\n if (\n options.imageHandling === 'inline' ||\n options.imageHandling === 'omit'\n ) {\n serialized.imageHandling = options.imageHandling\n }\n if (options.preserveEmptyParagraphs !== undefined) {\n serialized.preserveEmptyParagraphs = options.preserveEmptyParagraphs\n }\n\n return Object.keys(serialized).length > 0 ? serialized : undefined\n}\n\n/**\n * Deserialize options from TransformAst.args.\n */\nfunction deserializeOptions(\n serialized?: SerializedDocxOptions\n): DocxToHtmlOptions {\n if (!serialized) return {}\n return serialized\n}\n\n/**\n * Convert input to Buffer.\n */\nasync function toBuffer(input: unknown): Promise<Buffer> {\n if (Buffer.isBuffer(input)) {\n return input\n }\n if (input instanceof ReadableStream) {\n return streamToBuffer(input as ReadableStream<Uint8Array>)\n }\n throw new Error(\n `docxToHtml expects Buffer or ReadableStream input, got ${typeof input}`\n )\n}\n\n/**\n * Convert our options to mammoth options.\n */\nfunction toMammothOptions(options: DocxToHtmlOptions): MammothOptions {\n const mammothOpts: MammothOptions = {}\n\n if (options.styleMap) {\n mammothOpts.styleMap = options.styleMap\n }\n\n if (options.includeEmbeddedStyleMap !== undefined) {\n mammothOpts.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n\n if (options.includeDefaultStyleMap !== undefined) {\n mammothOpts.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n\n if (options.idPrefix) {\n mammothOpts.idPrefix = options.idPrefix\n }\n\n if (options.preserveEmptyParagraphs === false) {\n mammothOpts.ignoreEmptyParagraphs = true\n } else if (options.preserveEmptyParagraphs === true) {\n mammothOpts.ignoreEmptyParagraphs = false\n }\n\n if (options.imageHandling) {\n mammothOpts.convertImage = createImageConverter(options.imageHandling)\n }\n\n if (options.transformDocument) {\n mammothOpts.transformDocument = options.transformDocument\n }\n\n return mammothOpts\n}\n\n/**\n * Create a mammoth image converter from our options.\n */\nfunction createImageConverter(\n handling: 'inline' | 'omit' | MammothImageHandler\n): MammothImageConverter {\n if (handling === 'omit') {\n return mammoth.images.imgElement(() => Promise.resolve({ src: '' }))\n }\n\n if (handling === 'inline') {\n // Use default mammoth behavior (base64 inline)\n return mammoth.images.imgElement((image: MammothImage) =>\n image.readAsBase64String().then(data => ({\n src: `data:${image.contentType};base64,${data}`,\n }))\n )\n }\n\n // Custom handler - adapt our interface to mammoth's\n return mammoth.images.imgElement(async (image: MammothImage) => {\n // Adapt mammoth's Image to our MammothImageElement interface\n const adaptedImage = {\n contentType: image.contentType,\n read: ((encoding: 'base64' | 'buffer') => {\n if (encoding === 'base64') {\n return image.readAsBase64String()\n }\n return image.readAsArrayBuffer()\n }) as {\n (encoding: 'base64'): Promise<string>\n (encoding: 'buffer'): Promise<ArrayBuffer>\n },\n }\n\n const result = await handling(adaptedImage)\n if (result === null) {\n return { src: '' }\n }\n return result\n })\n}\n\n/**\n * Convert mammoth message to our message type.\n */\nfunction toMammothMessage(msg: { type: string; message: string }): MammothMessage {\n return {\n type: msg.type === 'error' ? 'error' : 'warning',\n message: msg.message,\n }\n}\n","/**\n * @origints/mammoth - DOCX to HTML conversion for Origins using mammoth.js\n *\n * This package provides transforms for converting Word documents (.docx) to HTML.\n * It wraps the mammoth.js library and exposes all its conversion options.\n *\n * @packageDocumentation\n *\n * @example Basic usage\n * ```ts\n * import { Planner, loadFile } from '@origints/core'\n * import { docxToHtml, registerMammothTransforms } from '@origints/mammoth'\n *\n * // Register transforms\n * registerMammothTransforms(globalRegistry)\n *\n * // Create a plan\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').asString()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Heading 1'] => h1\",\n * \"p[style-name='Heading 2'] => h2\",\n * ],\n * idPrefix: 'doc-',\n * imageHandling: 'omit',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').asString()))\n * .compile()\n * ```\n */\n\n// Re-export option types\nexport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageElement,\n MammothImageHandler,\n MammothImageResult,\n} from './options'\n\n// Re-export result types\nexport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n MammothMessageType,\n} from './result'\n\n// Re-export transform creators and implementations\nexport {\n docxToHtml,\n docxToText,\n docxToHtmlImpl,\n docxToTextImpl,\n} from './convert'\n\n// Re-export utilities\nexport { streamToBuffer } from './util'\n\n// ---------------------------------------------------------------------------\n// Auto-registration of transforms\n// ---------------------------------------------------------------------------\n\nimport { docxToHtmlImpl, docxToTextImpl } from './convert'\n\n/**\n * Register the mammoth transforms with a registry.\n * Call this to enable docxToHtml() and docxToText() in your plans.\n *\n * @example\n * ```ts\n * import { globalRegistry } from '@origints/core'\n * import { registerMammothTransforms } from '@origints/mammoth'\n *\n * registerMammothTransforms(globalRegistry)\n * ```\n */\nexport function registerMammothTransforms(registry: {\n register(impl: {\n namespace: string\n name: string\n execute: (...args: unknown[]) => unknown\n }): void\n}): void {\n registry.register(docxToHtmlImpl)\n registry.register(docxToTextImpl)\n}\n"],"names":["streamToBuffer","stream","reader","chunks","done","value","docxToHtml","options","serializeOptions","docxToText","docxToHtmlImpl","input","args","deserializeOptions","buffer","toBuffer","mammothOptions","toMammothOptions","result","mammoth","toMammothMessage","docxToTextImpl","serialized","mammothOpts","createImageConverter","handling","image","data","adaptedImage","encoding","msg","registerMammothTransforms","registry"],"mappings":"2GASA,eAAsBA,EACpBC,EACiB,CACjB,MAAMC,EAASD,EAAO,UAAA,EAChBE,EAAuB,CAAA,EAE7B,GAAI,CACF,OAAa,CACX,KAAM,CAAE,KAAAC,EAAM,MAAAC,CAAA,EAAU,MAAMH,EAAO,KAAA,EACrC,GAAIE,EAAM,MACVD,EAAO,KAAKE,CAAK,CACnB,CACA,OAAO,OAAO,OAAOF,CAAM,CAC7B,QAAA,CACED,EAAO,YAAA,CACT,CACF,CCkDO,SAASI,EAAWC,EAA2C,CACpE,MAAO,CACL,KAAM,YACN,UAAW,oBACX,KAAM,aACN,KAAMC,EAAiBD,CAAO,CAAA,CAElC,CAaO,SAASE,EAAWF,EAA2C,CACpE,MAAO,CACL,KAAM,YACN,UAAW,oBACX,KAAM,aACN,KAAMA,CAAA,CAEV,CAOO,MAAMG,EAAgC,CAC3C,UAAW,oBACX,KAAM,aAEN,MAAM,QACJC,EACAC,EAC+B,CAC/B,MAAML,EAAUM,EACdD,CAAA,EAEIE,EAAS,MAAMC,EAASJ,CAAK,EAC7BK,EAAiBC,EAAiBV,CAAO,EAEzCW,EAAS,MAAMC,EAAQ,cAAc,CAAE,OAAAL,CAAA,EAAUE,CAAc,EAErE,MAAO,CACL,KAAME,EAAO,MACb,SAAUA,EAAO,SAAS,IAAIE,CAAgB,CAAA,CAElD,CACF,EAOaC,EAAgC,CAC3C,UAAW,oBACX,KAAM,aAEN,MAAM,QAAQV,EAAyC,CACrD,MAAMG,EAAS,MAAMC,EAASJ,CAAK,EAG7BO,EAAS,MAAMC,EAAQ,eAAe,CAAE,OAAAL,EAAQ,EAEtD,MAAO,CACL,KAAMI,EAAO,MACb,SAAUA,EAAO,SAAS,IAAIE,CAAgB,CAAA,CAElD,CACF,EAwBA,SAASZ,EACPD,EACmC,CACnC,GAAI,CAACA,EAAS,OAEd,MAAMe,EAAoC,CAAA,EAE1C,OAAIf,EAAQ,WACVe,EAAW,SAAWf,EAAQ,UAE5BA,EAAQ,0BAA4B,SACtCe,EAAW,wBAA0Bf,EAAQ,yBAE3CA,EAAQ,yBAA2B,SACrCe,EAAW,uBAAyBf,EAAQ,wBAE1CA,EAAQ,WACVe,EAAW,SAAWf,EAAQ,WAG9BA,EAAQ,gBAAkB,UAC1BA,EAAQ,gBAAkB,UAE1Be,EAAW,cAAgBf,EAAQ,eAEjCA,EAAQ,0BAA4B,SACtCe,EAAW,wBAA0Bf,EAAQ,yBAGxC,OAAO,KAAKe,CAAU,EAAE,OAAS,EAAIA,EAAa,MAC3D,CAKA,SAAST,EACPS,EACmB,CACnB,OAAKA,GAAmB,CAAA,CAE1B,CAKA,eAAeP,EAASJ,EAAiC,CACvD,GAAI,OAAO,SAASA,CAAK,EACvB,OAAOA,EAET,GAAIA,aAAiB,eACnB,OAAOX,EAAeW,CAAmC,EAE3D,MAAM,IAAI,MACR,0DAA0D,OAAOA,CAAK,EAAA,CAE1E,CAKA,SAASM,EAAiBV,EAA4C,CACpE,MAAMgB,EAA8B,CAAA,EAEpC,OAAIhB,EAAQ,WACVgB,EAAY,SAAWhB,EAAQ,UAG7BA,EAAQ,0BAA4B,SACtCgB,EAAY,wBAA0BhB,EAAQ,yBAG5CA,EAAQ,yBAA2B,SACrCgB,EAAY,uBAAyBhB,EAAQ,wBAG3CA,EAAQ,WACVgB,EAAY,SAAWhB,EAAQ,UAG7BA,EAAQ,0BAA4B,GACtCgB,EAAY,sBAAwB,GAC3BhB,EAAQ,0BAA4B,KAC7CgB,EAAY,sBAAwB,IAGlChB,EAAQ,gBACVgB,EAAY,aAAeC,EAAqBjB,EAAQ,aAAa,GAGnEA,EAAQ,oBACVgB,EAAY,kBAAoBhB,EAAQ,mBAGnCgB,CACT,CAKA,SAASC,EACPC,EACuB,CACvB,OAAIA,IAAa,OACRN,EAAQ,OAAO,WAAW,IAAM,QAAQ,QAAQ,CAAE,IAAK,EAAA,CAAI,CAAC,EAGjEM,IAAa,SAERN,EAAQ,OAAO,WAAYO,GAChCA,EAAM,mBAAA,EAAqB,KAAKC,IAAS,CACvC,IAAK,QAAQD,EAAM,WAAW,WAAWC,CAAI,EAAA,EAC7C,CAAA,EAKCR,EAAQ,OAAO,WAAW,MAAOO,GAAwB,CAE9D,MAAME,EAAe,CACnB,YAAaF,EAAM,YACnB,MAAQG,GACFA,IAAa,SACRH,EAAM,mBAAA,EAERA,EAAM,kBAAA,EACf,EAMIR,EAAS,MAAMO,EAASG,CAAY,EAC1C,OAAIV,IAAW,KACN,CAAE,IAAK,EAAA,EAETA,CACT,CAAC,CACH,CAKA,SAASE,EAAiBU,EAAwD,CAChF,MAAO,CACL,KAAMA,EAAI,OAAS,QAAU,QAAU,UACvC,QAASA,EAAI,OAAA,CAEjB,CC7OO,SAASC,EAA0BC,EAMjC,CACPA,EAAS,SAAStB,CAAc,EAChCsB,EAAS,SAASX,CAAc,CAClC"}
|
|
1
|
+
{"version":3,"file":"index.cjs","sources":["../src/util.ts","../src/convert.ts","../src/index.ts"],"sourcesContent":["/**\n * Utility functions for mammoth package.\n *\n * @module mammoth/util\n */\n\n/**\n * Convert a ReadableStream<Uint8Array> to a Buffer.\n */\nexport async function streamToBuffer(\n stream: ReadableStream<Uint8Array>\n): Promise<Buffer> {\n const reader = stream.getReader()\n const chunks: Uint8Array[] = []\n\n try {\n while (true) {\n const { done, value } = await reader.read()\n if (done) break\n chunks.push(value)\n }\n return Buffer.concat(chunks)\n } finally {\n reader.releaseLock()\n }\n}\n","/**\n * DOCX to HTML conversion transform for Origins.\n *\n * @module mammoth/convert\n */\n\nimport mammoth from 'mammoth'\nimport type { TransformAst, TransformImpl } from '@origints/core'\nimport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageHandler,\n} from './options'\nimport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n} from './result'\nimport { streamToBuffer } from './util'\n\n/**\n * Mammoth options type extracted from the library.\n */\ninterface MammothOptions {\n styleMap?: string | string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n convertImage?: MammothImageConverter\n ignoreEmptyParagraphs?: boolean\n idPrefix?: string\n transformDocument?: (element: unknown) => unknown\n}\n\n/**\n * Mammoth image converter (opaque branded type).\n */\ninterface MammothImageConverter {\n __mammothBrand: 'ImageConverter'\n}\n\n/**\n * Mammoth image interface for custom converters.\n */\ninterface MammothImage {\n contentType: string\n readAsBase64String: () => Promise<string>\n readAsBuffer: () => Promise<Buffer>\n readAsArrayBuffer: () => Promise<ArrayBuffer>\n}\n\n/**\n * Creates a TransformAst for converting DOCX to HTML.\n *\n * @example\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').string()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Subtitle'] => h2.document-subtitle\",\n * ],\n * idPrefix: 'doc-',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').string()))\n * .compile()\n * ```\n */\nexport function docxToHtml(options?: DocxToHtmlOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n args: serializeOptions(options),\n }\n}\n\n/**\n * Creates a TransformAst for extracting raw text from DOCX.\n *\n * @example\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToText())\n * .emit((out, $) => out.add('text', $.get('text').string()))\n * .compile()\n * ```\n */\nexport function docxToText(options?: DocxToTextOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToText',\n args: options,\n }\n}\n\n/**\n * Transform implementation for docxToHtml.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToHtmlImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n\n async execute(input: unknown, args?: unknown): Promise<DocxConversionResult> {\n const options = deserializeOptions(\n args as SerializedDocxOptions | undefined\n )\n const buffer = await toBuffer(input)\n const mammothOptions = toMammothOptions(options)\n\n const result = await mammoth.convertToHtml({ buffer }, mammothOptions)\n\n return {\n html: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n/**\n * Transform implementation for docxToText.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToTextImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToText',\n\n async execute(input: unknown): Promise<DocxTextResult> {\n const buffer = await toBuffer(input)\n\n // Note: extractRawText doesn't accept options in mammoth's API\n const result = await mammoth.extractRawText({ buffer })\n\n return {\n text: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n// ---------------------------------------------------------------------------\n// Internal helpers\n// ---------------------------------------------------------------------------\n\n/**\n * Serialized options that can be stored in TransformAst.args.\n * Function handlers are converted to string identifiers.\n */\ninterface SerializedDocxOptions {\n styleMap?: string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n idPrefix?: string\n imageHandling?: 'inline' | 'omit'\n preserveEmptyParagraphs?: boolean\n // Note: transformDocument and custom imageHandling functions cannot be serialized\n}\n\n/**\n * Serialize options for storage in TransformAst.\n * Custom functions cannot be serialized and are dropped.\n */\nfunction serializeOptions(\n options?: DocxToHtmlOptions\n): SerializedDocxOptions | undefined {\n if (!options) return undefined\n\n const serialized: SerializedDocxOptions = {}\n\n if (options.styleMap) {\n serialized.styleMap = options.styleMap\n }\n if (options.includeEmbeddedStyleMap !== undefined) {\n serialized.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n if (options.includeDefaultStyleMap !== undefined) {\n serialized.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n if (options.idPrefix) {\n serialized.idPrefix = options.idPrefix\n }\n if (options.imageHandling === 'inline' || options.imageHandling === 'omit') {\n serialized.imageHandling = options.imageHandling\n }\n if (options.preserveEmptyParagraphs !== undefined) {\n serialized.preserveEmptyParagraphs = options.preserveEmptyParagraphs\n }\n\n return Object.keys(serialized).length > 0 ? serialized : undefined\n}\n\n/**\n * Deserialize options from TransformAst.args.\n */\nfunction deserializeOptions(\n serialized?: SerializedDocxOptions\n): DocxToHtmlOptions {\n if (!serialized) return {}\n return serialized\n}\n\n/**\n * Convert input to Buffer.\n */\nasync function toBuffer(input: unknown): Promise<Buffer> {\n if (Buffer.isBuffer(input)) {\n return input\n }\n if (input instanceof ReadableStream) {\n return streamToBuffer(input as ReadableStream<Uint8Array>)\n }\n throw new Error(\n `docxToHtml expects Buffer or ReadableStream input, got ${typeof input}`\n )\n}\n\n/**\n * Convert our options to mammoth options.\n */\nfunction toMammothOptions(options: DocxToHtmlOptions): MammothOptions {\n const mammothOpts: MammothOptions = {}\n\n if (options.styleMap) {\n mammothOpts.styleMap = options.styleMap\n }\n\n if (options.includeEmbeddedStyleMap !== undefined) {\n mammothOpts.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n\n if (options.includeDefaultStyleMap !== undefined) {\n mammothOpts.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n\n if (options.idPrefix) {\n mammothOpts.idPrefix = options.idPrefix\n }\n\n if (options.preserveEmptyParagraphs === false) {\n mammothOpts.ignoreEmptyParagraphs = true\n } else if (options.preserveEmptyParagraphs === true) {\n mammothOpts.ignoreEmptyParagraphs = false\n }\n\n if (options.imageHandling) {\n mammothOpts.convertImage = createImageConverter(options.imageHandling)\n }\n\n if (options.transformDocument) {\n mammothOpts.transformDocument = options.transformDocument\n }\n\n return mammothOpts\n}\n\n/**\n * Create a mammoth image converter from our options.\n */\nfunction createImageConverter(\n handling: 'inline' | 'omit' | MammothImageHandler\n): MammothImageConverter {\n if (handling === 'omit') {\n return mammoth.images.imgElement(() => Promise.resolve({ src: '' }))\n }\n\n if (handling === 'inline') {\n // Use default mammoth behavior (base64 inline)\n return mammoth.images.imgElement((image: MammothImage) =>\n image.readAsBase64String().then(data => ({\n src: `data:${image.contentType};base64,${data}`,\n }))\n )\n }\n\n // Custom handler - adapt our interface to mammoth's\n return mammoth.images.imgElement(async (image: MammothImage) => {\n // Adapt mammoth's Image to our MammothImageElement interface\n const adaptedImage = {\n contentType: image.contentType,\n read: ((encoding: 'base64' | 'buffer') => {\n if (encoding === 'base64') {\n return image.readAsBase64String()\n }\n return image.readAsArrayBuffer()\n }) as {\n (encoding: 'base64'): Promise<string>\n (encoding: 'buffer'): Promise<ArrayBuffer>\n },\n }\n\n const result = await handling(adaptedImage)\n if (result === null) {\n return { src: '' }\n }\n return result\n })\n}\n\n/**\n * Convert mammoth message to our message type.\n */\nfunction toMammothMessage(msg: {\n type: string\n message: string\n}): MammothMessage {\n return {\n type: msg.type === 'error' ? 'error' : 'warning',\n message: msg.message,\n }\n}\n","/**\n * @origints/mammoth - DOCX to HTML conversion for Origins using mammoth.js\n *\n * This package provides transforms for converting Word documents (.docx) to HTML.\n * It wraps the mammoth.js library and exposes all its conversion options.\n *\n * @packageDocumentation\n *\n * @example Basic usage\n * ```ts\n * import { Planner, loadFile } from '@origints/core'\n * import { docxToHtml } from '@origints/mammoth'\n *\n * // Create a plan\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').string()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Heading 1'] => h1\",\n * \"p[style-name='Heading 2'] => h2\",\n * ],\n * idPrefix: 'doc-',\n * imageHandling: 'omit',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').string()))\n * .compile()\n * ```\n */\n\n// Re-export option types\nexport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageElement,\n MammothImageHandler,\n MammothImageResult,\n} from './options'\n\n// Re-export result types\nexport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n MammothMessageType,\n} from './result'\n\n// Re-export transform creators and implementations\nexport {\n docxToHtml,\n docxToText,\n docxToHtmlImpl,\n docxToTextImpl,\n} from './convert'\n\n// Re-export utilities\nexport { streamToBuffer } from './util'\n\n// ---------------------------------------------------------------------------\n// Auto-registration of transforms\n// ---------------------------------------------------------------------------\n\nimport { globalRegistry } from '@origints/core'\nimport { docxToHtmlImpl, docxToTextImpl } from './convert'\n\n/**\n * Register the mammoth transforms with a registry.\n * Call this to enable docxToHtml() and docxToText() in your plans.\n *\n * @example\n * ```ts\n * import { globalRegistry } from '@origints/core'\n * import { registerMammothTransforms } from '@origints/mammoth'\n *\n * registerMammothTransforms(globalRegistry)\n * ```\n */\nexport function registerMammothTransforms(registry: {\n register(impl: {\n namespace: string\n name: string\n execute: (...args: unknown[]) => unknown\n }): void\n}): void {\n registry.register(docxToHtmlImpl)\n registry.register(docxToTextImpl)\n}\n\n// Auto-register transforms\nregisterMammothTransforms(globalRegistry)\n"],"names":["streamToBuffer","stream","reader","chunks","done","value","docxToHtml","options","serializeOptions","docxToText","docxToHtmlImpl","input","args","deserializeOptions","buffer","toBuffer","mammothOptions","toMammothOptions","result","mammoth","toMammothMessage","docxToTextImpl","serialized","mammothOpts","createImageConverter","handling","image","data","adaptedImage","encoding","msg","registerMammothTransforms","registry","globalRegistry"],"mappings":"uIASA,eAAsBA,EACpBC,EACiB,CACjB,MAAMC,EAASD,EAAO,UAAA,EAChBE,EAAuB,CAAA,EAE7B,GAAI,CACF,OAAa,CACX,KAAM,CAAE,KAAAC,EAAM,MAAAC,CAAA,EAAU,MAAMH,EAAO,KAAA,EACrC,GAAIE,EAAM,MACVD,EAAO,KAAKE,CAAK,CACnB,CACA,OAAO,OAAO,OAAOF,CAAM,CAC7B,QAAA,CACED,EAAO,YAAA,CACT,CACF,CCkDO,SAASI,EAAWC,EAA2C,CACpE,MAAO,CACL,KAAM,YACN,UAAW,oBACX,KAAM,aACN,KAAMC,EAAiBD,CAAO,CAAA,CAElC,CAaO,SAASE,EAAWF,EAA2C,CACpE,MAAO,CACL,KAAM,YACN,UAAW,oBACX,KAAM,aACN,KAAMA,CAAA,CAEV,CAOO,MAAMG,EAAgC,CAC3C,UAAW,oBACX,KAAM,aAEN,MAAM,QAAQC,EAAgBC,EAA+C,CAC3E,MAAML,EAAUM,EACdD,CAAA,EAEIE,EAAS,MAAMC,EAASJ,CAAK,EAC7BK,EAAiBC,EAAiBV,CAAO,EAEzCW,EAAS,MAAMC,EAAQ,cAAc,CAAE,OAAAL,CAAA,EAAUE,CAAc,EAErE,MAAO,CACL,KAAME,EAAO,MACb,SAAUA,EAAO,SAAS,IAAIE,CAAgB,CAAA,CAElD,CACF,EAOaC,EAAgC,CAC3C,UAAW,oBACX,KAAM,aAEN,MAAM,QAAQV,EAAyC,CACrD,MAAMG,EAAS,MAAMC,EAASJ,CAAK,EAG7BO,EAAS,MAAMC,EAAQ,eAAe,CAAE,OAAAL,EAAQ,EAEtD,MAAO,CACL,KAAMI,EAAO,MACb,SAAUA,EAAO,SAAS,IAAIE,CAAgB,CAAA,CAElD,CACF,EAwBA,SAASZ,EACPD,EACmC,CACnC,GAAI,CAACA,EAAS,OAEd,MAAMe,EAAoC,CAAA,EAE1C,OAAIf,EAAQ,WACVe,EAAW,SAAWf,EAAQ,UAE5BA,EAAQ,0BAA4B,SACtCe,EAAW,wBAA0Bf,EAAQ,yBAE3CA,EAAQ,yBAA2B,SACrCe,EAAW,uBAAyBf,EAAQ,wBAE1CA,EAAQ,WACVe,EAAW,SAAWf,EAAQ,WAE5BA,EAAQ,gBAAkB,UAAYA,EAAQ,gBAAkB,UAClEe,EAAW,cAAgBf,EAAQ,eAEjCA,EAAQ,0BAA4B,SACtCe,EAAW,wBAA0Bf,EAAQ,yBAGxC,OAAO,KAAKe,CAAU,EAAE,OAAS,EAAIA,EAAa,MAC3D,CAKA,SAAST,EACPS,EACmB,CACnB,OAAKA,GAAmB,CAAA,CAE1B,CAKA,eAAeP,EAASJ,EAAiC,CACvD,GAAI,OAAO,SAASA,CAAK,EACvB,OAAOA,EAET,GAAIA,aAAiB,eACnB,OAAOX,EAAeW,CAAmC,EAE3D,MAAM,IAAI,MACR,0DAA0D,OAAOA,CAAK,EAAA,CAE1E,CAKA,SAASM,EAAiBV,EAA4C,CACpE,MAAMgB,EAA8B,CAAA,EAEpC,OAAIhB,EAAQ,WACVgB,EAAY,SAAWhB,EAAQ,UAG7BA,EAAQ,0BAA4B,SACtCgB,EAAY,wBAA0BhB,EAAQ,yBAG5CA,EAAQ,yBAA2B,SACrCgB,EAAY,uBAAyBhB,EAAQ,wBAG3CA,EAAQ,WACVgB,EAAY,SAAWhB,EAAQ,UAG7BA,EAAQ,0BAA4B,GACtCgB,EAAY,sBAAwB,GAC3BhB,EAAQ,0BAA4B,KAC7CgB,EAAY,sBAAwB,IAGlChB,EAAQ,gBACVgB,EAAY,aAAeC,EAAqBjB,EAAQ,aAAa,GAGnEA,EAAQ,oBACVgB,EAAY,kBAAoBhB,EAAQ,mBAGnCgB,CACT,CAKA,SAASC,EACPC,EACuB,CACvB,OAAIA,IAAa,OACRN,EAAQ,OAAO,WAAW,IAAM,QAAQ,QAAQ,CAAE,IAAK,EAAA,CAAI,CAAC,EAGjEM,IAAa,SAERN,EAAQ,OAAO,WAAYO,GAChCA,EAAM,mBAAA,EAAqB,KAAKC,IAAS,CACvC,IAAK,QAAQD,EAAM,WAAW,WAAWC,CAAI,EAAA,EAC7C,CAAA,EAKCR,EAAQ,OAAO,WAAW,MAAOO,GAAwB,CAE9D,MAAME,EAAe,CACnB,YAAaF,EAAM,YACnB,MAAQG,GACFA,IAAa,SACRH,EAAM,mBAAA,EAERA,EAAM,kBAAA,EACf,EAMIR,EAAS,MAAMO,EAASG,CAAY,EAC1C,OAAIV,IAAW,KACN,CAAE,IAAK,EAAA,EAETA,CACT,CAAC,CACH,CAKA,SAASE,EAAiBU,EAGP,CACjB,MAAO,CACL,KAAMA,EAAI,OAAS,QAAU,QAAU,UACvC,QAASA,EAAI,OAAA,CAEjB,CC5OO,SAASC,EAA0BC,EAMjC,CACPA,EAAS,SAAStB,CAAc,EAChCsB,EAAS,SAASX,CAAc,CAClC,CAGAU,EAA0BE,gBAAc"}
|
package/dist/index.d.ts
CHANGED
|
@@ -9,21 +9,18 @@
|
|
|
9
9
|
* @example Basic usage
|
|
10
10
|
* ```ts
|
|
11
11
|
* import { Planner, loadFile } from '@origints/core'
|
|
12
|
-
* import { docxToHtml
|
|
13
|
-
*
|
|
14
|
-
* // Register transforms
|
|
15
|
-
* registerMammothTransforms(globalRegistry)
|
|
12
|
+
* import { docxToHtml } from '@origints/mammoth'
|
|
16
13
|
*
|
|
17
14
|
* // Create a plan
|
|
18
|
-
* const plan = Planner.in(loadFile('document.docx'))
|
|
15
|
+
* const plan = new Planner().in(loadFile('document.docx'))
|
|
19
16
|
* .mapIn(docxToHtml())
|
|
20
|
-
* .emit((out, $) => out.add('html', $.get('html').
|
|
17
|
+
* .emit((out, $) => out.add('html', $.get('html').string()))
|
|
21
18
|
* .compile()
|
|
22
19
|
* ```
|
|
23
20
|
*
|
|
24
21
|
* @example With custom style mapping
|
|
25
22
|
* ```ts
|
|
26
|
-
* const plan = Planner.in(loadFile('document.docx'))
|
|
23
|
+
* const plan = new Planner().in(loadFile('document.docx'))
|
|
27
24
|
* .mapIn(docxToHtml({
|
|
28
25
|
* styleMap: [
|
|
29
26
|
* "p[style-name='Title'] => h1.document-title",
|
|
@@ -33,7 +30,7 @@
|
|
|
33
30
|
* idPrefix: 'doc-',
|
|
34
31
|
* imageHandling: 'omit',
|
|
35
32
|
* }))
|
|
36
|
-
* .emit((out, $) => out.add('content', $.get('html').
|
|
33
|
+
* .emit((out, $) => out.add('content', $.get('html').string()))
|
|
37
34
|
* .compile()
|
|
38
35
|
* ```
|
|
39
36
|
*/
|
package/dist/index.es.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import m from "mammoth";
|
|
2
|
+
import { globalRegistry as l } from "@origints/core";
|
|
2
3
|
async function f(e) {
|
|
3
4
|
const r = e.getReader(), a = [];
|
|
4
5
|
try {
|
|
@@ -12,15 +13,15 @@ async function f(e) {
|
|
|
12
13
|
r.releaseLock();
|
|
13
14
|
}
|
|
14
15
|
}
|
|
15
|
-
function
|
|
16
|
+
function T(e) {
|
|
16
17
|
return {
|
|
17
18
|
kind: "transform",
|
|
18
19
|
namespace: "@origints/mammoth",
|
|
19
20
|
name: "docxToHtml",
|
|
20
|
-
args:
|
|
21
|
+
args: o(e)
|
|
21
22
|
};
|
|
22
23
|
}
|
|
23
|
-
function
|
|
24
|
+
function v(e) {
|
|
24
25
|
return {
|
|
25
26
|
kind: "transform",
|
|
26
27
|
namespace: "@origints/mammoth",
|
|
@@ -28,19 +29,19 @@ function h(e) {
|
|
|
28
29
|
args: e
|
|
29
30
|
};
|
|
30
31
|
}
|
|
31
|
-
const
|
|
32
|
+
const d = {
|
|
32
33
|
namespace: "@origints/mammoth",
|
|
33
34
|
name: "docxToHtml",
|
|
34
35
|
async execute(e, r) {
|
|
35
|
-
const a =
|
|
36
|
+
const a = g(
|
|
36
37
|
r
|
|
37
|
-
), t = await s(e), n =
|
|
38
|
+
), t = await s(e), n = y(a), i = await m.convertToHtml({ buffer: t }, n);
|
|
38
39
|
return {
|
|
39
40
|
html: i.value,
|
|
40
41
|
messages: i.messages.map(c)
|
|
41
42
|
};
|
|
42
43
|
}
|
|
43
|
-
},
|
|
44
|
+
}, u = {
|
|
44
45
|
namespace: "@origints/mammoth",
|
|
45
46
|
name: "docxToText",
|
|
46
47
|
async execute(e) {
|
|
@@ -51,12 +52,12 @@ const l = {
|
|
|
51
52
|
};
|
|
52
53
|
}
|
|
53
54
|
};
|
|
54
|
-
function
|
|
55
|
+
function o(e) {
|
|
55
56
|
if (!e) return;
|
|
56
57
|
const r = {};
|
|
57
58
|
return e.styleMap && (r.styleMap = e.styleMap), e.includeEmbeddedStyleMap !== void 0 && (r.includeEmbeddedStyleMap = e.includeEmbeddedStyleMap), e.includeDefaultStyleMap !== void 0 && (r.includeDefaultStyleMap = e.includeDefaultStyleMap), e.idPrefix && (r.idPrefix = e.idPrefix), (e.imageHandling === "inline" || e.imageHandling === "omit") && (r.imageHandling = e.imageHandling), e.preserveEmptyParagraphs !== void 0 && (r.preserveEmptyParagraphs = e.preserveEmptyParagraphs), Object.keys(r).length > 0 ? r : void 0;
|
|
58
59
|
}
|
|
59
|
-
function
|
|
60
|
+
function g(e) {
|
|
60
61
|
return e || {};
|
|
61
62
|
}
|
|
62
63
|
async function s(e) {
|
|
@@ -68,11 +69,11 @@ async function s(e) {
|
|
|
68
69
|
`docxToHtml expects Buffer or ReadableStream input, got ${typeof e}`
|
|
69
70
|
);
|
|
70
71
|
}
|
|
71
|
-
function
|
|
72
|
+
function y(e) {
|
|
72
73
|
const r = {};
|
|
73
|
-
return e.styleMap && (r.styleMap = e.styleMap), e.includeEmbeddedStyleMap !== void 0 && (r.includeEmbeddedStyleMap = e.includeEmbeddedStyleMap), e.includeDefaultStyleMap !== void 0 && (r.includeDefaultStyleMap = e.includeDefaultStyleMap), e.idPrefix && (r.idPrefix = e.idPrefix), e.preserveEmptyParagraphs === !1 ? r.ignoreEmptyParagraphs = !0 : e.preserveEmptyParagraphs === !0 && (r.ignoreEmptyParagraphs = !1), e.imageHandling && (r.convertImage =
|
|
74
|
+
return e.styleMap && (r.styleMap = e.styleMap), e.includeEmbeddedStyleMap !== void 0 && (r.includeEmbeddedStyleMap = e.includeEmbeddedStyleMap), e.includeDefaultStyleMap !== void 0 && (r.includeDefaultStyleMap = e.includeDefaultStyleMap), e.idPrefix && (r.idPrefix = e.idPrefix), e.preserveEmptyParagraphs === !1 ? r.ignoreEmptyParagraphs = !0 : e.preserveEmptyParagraphs === !0 && (r.ignoreEmptyParagraphs = !1), e.imageHandling && (r.convertImage = p(e.imageHandling)), e.transformDocument && (r.transformDocument = e.transformDocument), r;
|
|
74
75
|
}
|
|
75
|
-
function
|
|
76
|
+
function p(e) {
|
|
76
77
|
return e === "omit" ? m.images.imgElement(() => Promise.resolve({ src: "" })) : e === "inline" ? m.images.imgElement(
|
|
77
78
|
(r) => r.readAsBase64String().then((a) => ({
|
|
78
79
|
src: `data:${r.contentType};base64,${a}`
|
|
@@ -91,15 +92,16 @@ function c(e) {
|
|
|
91
92
|
message: e.message
|
|
92
93
|
};
|
|
93
94
|
}
|
|
94
|
-
function
|
|
95
|
-
e.register(
|
|
95
|
+
function x(e) {
|
|
96
|
+
e.register(d), e.register(u);
|
|
96
97
|
}
|
|
98
|
+
x(l);
|
|
97
99
|
export {
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
100
|
+
T as docxToHtml,
|
|
101
|
+
d as docxToHtmlImpl,
|
|
102
|
+
v as docxToText,
|
|
103
|
+
u as docxToTextImpl,
|
|
104
|
+
x as registerMammothTransforms,
|
|
103
105
|
f as streamToBuffer
|
|
104
106
|
};
|
|
105
107
|
//# sourceMappingURL=index.es.js.map
|
package/dist/index.es.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.es.js","sources":["../src/util.ts","../src/convert.ts","../src/index.ts"],"sourcesContent":["/**\n * Utility functions for mammoth package.\n *\n * @module mammoth/util\n */\n\n/**\n * Convert a ReadableStream<Uint8Array> to a Buffer.\n */\nexport async function streamToBuffer(\n stream: ReadableStream<Uint8Array>\n): Promise<Buffer> {\n const reader = stream.getReader()\n const chunks: Uint8Array[] = []\n\n try {\n while (true) {\n const { done, value } = await reader.read()\n if (done) break\n chunks.push(value)\n }\n return Buffer.concat(chunks)\n } finally {\n reader.releaseLock()\n }\n}\n","/**\n * DOCX to HTML conversion transform for Origins.\n *\n * @module mammoth/convert\n */\n\nimport mammoth from 'mammoth'\nimport type { TransformAst, TransformImpl } from '@origints/core'\nimport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageHandler,\n} from './options'\nimport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n} from './result'\nimport { streamToBuffer } from './util'\n\n/**\n * Mammoth options type extracted from the library.\n */\ninterface MammothOptions {\n styleMap?: string | string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n convertImage?: MammothImageConverter\n ignoreEmptyParagraphs?: boolean\n idPrefix?: string\n transformDocument?: (element: unknown) => unknown\n}\n\n/**\n * Mammoth image converter (opaque branded type).\n */\ninterface MammothImageConverter {\n __mammothBrand: 'ImageConverter'\n}\n\n/**\n * Mammoth image interface for custom converters.\n */\ninterface MammothImage {\n contentType: string\n readAsBase64String: () => Promise<string>\n readAsBuffer: () => Promise<Buffer>\n readAsArrayBuffer: () => Promise<ArrayBuffer>\n}\n\n/**\n * Creates a TransformAst for converting DOCX to HTML.\n *\n * @example\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').asString()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Subtitle'] => h2.document-subtitle\",\n * ],\n * idPrefix: 'doc-',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').asString()))\n * .compile()\n * ```\n */\nexport function docxToHtml(options?: DocxToHtmlOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n args: serializeOptions(options),\n }\n}\n\n/**\n * Creates a TransformAst for extracting raw text from DOCX.\n *\n * @example\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToText())\n * .emit((out, $) => out.add('text', $.get('text').asString()))\n * .compile()\n * ```\n */\nexport function docxToText(options?: DocxToTextOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToText',\n args: options,\n }\n}\n\n/**\n * Transform implementation for docxToHtml.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToHtmlImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n\n async execute(\n input: unknown,\n args?: unknown\n ): Promise<DocxConversionResult> {\n const options = deserializeOptions(\n args as SerializedDocxOptions | undefined\n )\n const buffer = await toBuffer(input)\n const mammothOptions = toMammothOptions(options)\n\n const result = await mammoth.convertToHtml({ buffer }, mammothOptions)\n\n return {\n html: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n/**\n * Transform implementation for docxToText.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToTextImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToText',\n\n async execute(input: unknown): Promise<DocxTextResult> {\n const buffer = await toBuffer(input)\n\n // Note: extractRawText doesn't accept options in mammoth's API\n const result = await mammoth.extractRawText({ buffer })\n\n return {\n text: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n// ---------------------------------------------------------------------------\n// Internal helpers\n// ---------------------------------------------------------------------------\n\n/**\n * Serialized options that can be stored in TransformAst.args.\n * Function handlers are converted to string identifiers.\n */\ninterface SerializedDocxOptions {\n styleMap?: string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n idPrefix?: string\n imageHandling?: 'inline' | 'omit'\n preserveEmptyParagraphs?: boolean\n // Note: transformDocument and custom imageHandling functions cannot be serialized\n}\n\n/**\n * Serialize options for storage in TransformAst.\n * Custom functions cannot be serialized and are dropped.\n */\nfunction serializeOptions(\n options?: DocxToHtmlOptions\n): SerializedDocxOptions | undefined {\n if (!options) return undefined\n\n const serialized: SerializedDocxOptions = {}\n\n if (options.styleMap) {\n serialized.styleMap = options.styleMap\n }\n if (options.includeEmbeddedStyleMap !== undefined) {\n serialized.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n if (options.includeDefaultStyleMap !== undefined) {\n serialized.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n if (options.idPrefix) {\n serialized.idPrefix = options.idPrefix\n }\n if (\n options.imageHandling === 'inline' ||\n options.imageHandling === 'omit'\n ) {\n serialized.imageHandling = options.imageHandling\n }\n if (options.preserveEmptyParagraphs !== undefined) {\n serialized.preserveEmptyParagraphs = options.preserveEmptyParagraphs\n }\n\n return Object.keys(serialized).length > 0 ? serialized : undefined\n}\n\n/**\n * Deserialize options from TransformAst.args.\n */\nfunction deserializeOptions(\n serialized?: SerializedDocxOptions\n): DocxToHtmlOptions {\n if (!serialized) return {}\n return serialized\n}\n\n/**\n * Convert input to Buffer.\n */\nasync function toBuffer(input: unknown): Promise<Buffer> {\n if (Buffer.isBuffer(input)) {\n return input\n }\n if (input instanceof ReadableStream) {\n return streamToBuffer(input as ReadableStream<Uint8Array>)\n }\n throw new Error(\n `docxToHtml expects Buffer or ReadableStream input, got ${typeof input}`\n )\n}\n\n/**\n * Convert our options to mammoth options.\n */\nfunction toMammothOptions(options: DocxToHtmlOptions): MammothOptions {\n const mammothOpts: MammothOptions = {}\n\n if (options.styleMap) {\n mammothOpts.styleMap = options.styleMap\n }\n\n if (options.includeEmbeddedStyleMap !== undefined) {\n mammothOpts.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n\n if (options.includeDefaultStyleMap !== undefined) {\n mammothOpts.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n\n if (options.idPrefix) {\n mammothOpts.idPrefix = options.idPrefix\n }\n\n if (options.preserveEmptyParagraphs === false) {\n mammothOpts.ignoreEmptyParagraphs = true\n } else if (options.preserveEmptyParagraphs === true) {\n mammothOpts.ignoreEmptyParagraphs = false\n }\n\n if (options.imageHandling) {\n mammothOpts.convertImage = createImageConverter(options.imageHandling)\n }\n\n if (options.transformDocument) {\n mammothOpts.transformDocument = options.transformDocument\n }\n\n return mammothOpts\n}\n\n/**\n * Create a mammoth image converter from our options.\n */\nfunction createImageConverter(\n handling: 'inline' | 'omit' | MammothImageHandler\n): MammothImageConverter {\n if (handling === 'omit') {\n return mammoth.images.imgElement(() => Promise.resolve({ src: '' }))\n }\n\n if (handling === 'inline') {\n // Use default mammoth behavior (base64 inline)\n return mammoth.images.imgElement((image: MammothImage) =>\n image.readAsBase64String().then(data => ({\n src: `data:${image.contentType};base64,${data}`,\n }))\n )\n }\n\n // Custom handler - adapt our interface to mammoth's\n return mammoth.images.imgElement(async (image: MammothImage) => {\n // Adapt mammoth's Image to our MammothImageElement interface\n const adaptedImage = {\n contentType: image.contentType,\n read: ((encoding: 'base64' | 'buffer') => {\n if (encoding === 'base64') {\n return image.readAsBase64String()\n }\n return image.readAsArrayBuffer()\n }) as {\n (encoding: 'base64'): Promise<string>\n (encoding: 'buffer'): Promise<ArrayBuffer>\n },\n }\n\n const result = await handling(adaptedImage)\n if (result === null) {\n return { src: '' }\n }\n return result\n })\n}\n\n/**\n * Convert mammoth message to our message type.\n */\nfunction toMammothMessage(msg: { type: string; message: string }): MammothMessage {\n return {\n type: msg.type === 'error' ? 'error' : 'warning',\n message: msg.message,\n }\n}\n","/**\n * @origints/mammoth - DOCX to HTML conversion for Origins using mammoth.js\n *\n * This package provides transforms for converting Word documents (.docx) to HTML.\n * It wraps the mammoth.js library and exposes all its conversion options.\n *\n * @packageDocumentation\n *\n * @example Basic usage\n * ```ts\n * import { Planner, loadFile } from '@origints/core'\n * import { docxToHtml, registerMammothTransforms } from '@origints/mammoth'\n *\n * // Register transforms\n * registerMammothTransforms(globalRegistry)\n *\n * // Create a plan\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').asString()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Heading 1'] => h1\",\n * \"p[style-name='Heading 2'] => h2\",\n * ],\n * idPrefix: 'doc-',\n * imageHandling: 'omit',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').asString()))\n * .compile()\n * ```\n */\n\n// Re-export option types\nexport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageElement,\n MammothImageHandler,\n MammothImageResult,\n} from './options'\n\n// Re-export result types\nexport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n MammothMessageType,\n} from './result'\n\n// Re-export transform creators and implementations\nexport {\n docxToHtml,\n docxToText,\n docxToHtmlImpl,\n docxToTextImpl,\n} from './convert'\n\n// Re-export utilities\nexport { streamToBuffer } from './util'\n\n// ---------------------------------------------------------------------------\n// Auto-registration of transforms\n// ---------------------------------------------------------------------------\n\nimport { docxToHtmlImpl, docxToTextImpl } from './convert'\n\n/**\n * Register the mammoth transforms with a registry.\n * Call this to enable docxToHtml() and docxToText() in your plans.\n *\n * @example\n * ```ts\n * import { globalRegistry } from '@origints/core'\n * import { registerMammothTransforms } from '@origints/mammoth'\n *\n * registerMammothTransforms(globalRegistry)\n * ```\n */\nexport function registerMammothTransforms(registry: {\n register(impl: {\n namespace: string\n name: string\n execute: (...args: unknown[]) => unknown\n }): void\n}): void {\n registry.register(docxToHtmlImpl)\n registry.register(docxToTextImpl)\n}\n"],"names":["streamToBuffer","stream","reader","chunks","done","value","docxToHtml","options","serializeOptions","docxToText","docxToHtmlImpl","input","args","deserializeOptions","buffer","toBuffer","mammothOptions","toMammothOptions","result","mammoth","toMammothMessage","docxToTextImpl","serialized","mammothOpts","createImageConverter","handling","image","data","adaptedImage","encoding","msg","registerMammothTransforms","registry"],"mappings":";AASA,eAAsBA,EACpBC,GACiB;AACjB,QAAMC,IAASD,EAAO,UAAA,GAChBE,IAAuB,CAAA;AAE7B,MAAI;AACF,eAAa;AACX,YAAM,EAAE,MAAAC,GAAM,OAAAC,EAAA,IAAU,MAAMH,EAAO,KAAA;AACrC,UAAIE,EAAM;AACV,MAAAD,EAAO,KAAKE,CAAK;AAAA,IACnB;AACA,WAAO,OAAO,OAAOF,CAAM;AAAA,EAC7B,UAAA;AACE,IAAAD,EAAO,YAAA;AAAA,EACT;AACF;ACkDO,SAASI,EAAWC,GAA2C;AACpE,SAAO;AAAA,IACL,MAAM;AAAA,IACN,WAAW;AAAA,IACX,MAAM;AAAA,IACN,MAAMC,EAAiBD,CAAO;AAAA,EAAA;AAElC;AAaO,SAASE,EAAWF,GAA2C;AACpE,SAAO;AAAA,IACL,MAAM;AAAA,IACN,WAAW;AAAA,IACX,MAAM;AAAA,IACN,MAAMA;AAAA,EAAA;AAEV;AAOO,MAAMG,IAAgC;AAAA,EAC3C,WAAW;AAAA,EACX,MAAM;AAAA,EAEN,MAAM,QACJC,GACAC,GAC+B;AAC/B,UAAML,IAAUM;AAAA,MACdD;AAAA,IAAA,GAEIE,IAAS,MAAMC,EAASJ,CAAK,GAC7BK,IAAiBC,EAAiBV,CAAO,GAEzCW,IAAS,MAAMC,EAAQ,cAAc,EAAE,QAAAL,EAAA,GAAUE,CAAc;AAErE,WAAO;AAAA,MACL,MAAME,EAAO;AAAA,MACb,UAAUA,EAAO,SAAS,IAAIE,CAAgB;AAAA,IAAA;AAAA,EAElD;AACF,GAOaC,IAAgC;AAAA,EAC3C,WAAW;AAAA,EACX,MAAM;AAAA,EAEN,MAAM,QAAQV,GAAyC;AACrD,UAAMG,IAAS,MAAMC,EAASJ,CAAK,GAG7BO,IAAS,MAAMC,EAAQ,eAAe,EAAE,QAAAL,GAAQ;AAEtD,WAAO;AAAA,MACL,MAAMI,EAAO;AAAA,MACb,UAAUA,EAAO,SAAS,IAAIE,CAAgB;AAAA,IAAA;AAAA,EAElD;AACF;AAwBA,SAASZ,EACPD,GACmC;AACnC,MAAI,CAACA,EAAS;AAEd,QAAMe,IAAoC,CAAA;AAE1C,SAAIf,EAAQ,aACVe,EAAW,WAAWf,EAAQ,WAE5BA,EAAQ,4BAA4B,WACtCe,EAAW,0BAA0Bf,EAAQ,0BAE3CA,EAAQ,2BAA2B,WACrCe,EAAW,yBAAyBf,EAAQ,yBAE1CA,EAAQ,aACVe,EAAW,WAAWf,EAAQ,YAG9BA,EAAQ,kBAAkB,YAC1BA,EAAQ,kBAAkB,YAE1Be,EAAW,gBAAgBf,EAAQ,gBAEjCA,EAAQ,4BAA4B,WACtCe,EAAW,0BAA0Bf,EAAQ,0BAGxC,OAAO,KAAKe,CAAU,EAAE,SAAS,IAAIA,IAAa;AAC3D;AAKA,SAAST,EACPS,GACmB;AACnB,SAAKA,KAAmB,CAAA;AAE1B;AAKA,eAAeP,EAASJ,GAAiC;AACvD,MAAI,OAAO,SAASA,CAAK;AACvB,WAAOA;AAET,MAAIA,aAAiB;AACnB,WAAOX,EAAeW,CAAmC;AAE3D,QAAM,IAAI;AAAA,IACR,0DAA0D,OAAOA,CAAK;AAAA,EAAA;AAE1E;AAKA,SAASM,EAAiBV,GAA4C;AACpE,QAAMgB,IAA8B,CAAA;AAEpC,SAAIhB,EAAQ,aACVgB,EAAY,WAAWhB,EAAQ,WAG7BA,EAAQ,4BAA4B,WACtCgB,EAAY,0BAA0BhB,EAAQ,0BAG5CA,EAAQ,2BAA2B,WACrCgB,EAAY,yBAAyBhB,EAAQ,yBAG3CA,EAAQ,aACVgB,EAAY,WAAWhB,EAAQ,WAG7BA,EAAQ,4BAA4B,KACtCgB,EAAY,wBAAwB,KAC3BhB,EAAQ,4BAA4B,OAC7CgB,EAAY,wBAAwB,KAGlChB,EAAQ,kBACVgB,EAAY,eAAeC,EAAqBjB,EAAQ,aAAa,IAGnEA,EAAQ,sBACVgB,EAAY,oBAAoBhB,EAAQ,oBAGnCgB;AACT;AAKA,SAASC,EACPC,GACuB;AACvB,SAAIA,MAAa,SACRN,EAAQ,OAAO,WAAW,MAAM,QAAQ,QAAQ,EAAE,KAAK,GAAA,CAAI,CAAC,IAGjEM,MAAa,WAERN,EAAQ,OAAO;AAAA,IAAW,CAACO,MAChCA,EAAM,mBAAA,EAAqB,KAAK,CAAAC,OAAS;AAAA,MACvC,KAAK,QAAQD,EAAM,WAAW,WAAWC,CAAI;AAAA,IAAA,EAC7C;AAAA,EAAA,IAKCR,EAAQ,OAAO,WAAW,OAAOO,MAAwB;AAE9D,UAAME,IAAe;AAAA,MACnB,aAAaF,EAAM;AAAA,MACnB,OAAO,CAACG,MACFA,MAAa,WACRH,EAAM,mBAAA,IAERA,EAAM,kBAAA;AAAA,IACf,GAMIR,IAAS,MAAMO,EAASG,CAAY;AAC1C,WAAIV,MAAW,OACN,EAAE,KAAK,GAAA,IAETA;AAAA,EACT,CAAC;AACH;AAKA,SAASE,EAAiBU,GAAwD;AAChF,SAAO;AAAA,IACL,MAAMA,EAAI,SAAS,UAAU,UAAU;AAAA,IACvC,SAASA,EAAI;AAAA,EAAA;AAEjB;AC7OO,SAASC,EAA0BC,GAMjC;AACP,EAAAA,EAAS,SAAStB,CAAc,GAChCsB,EAAS,SAASX,CAAc;AAClC;"}
|
|
1
|
+
{"version":3,"file":"index.es.js","sources":["../src/util.ts","../src/convert.ts","../src/index.ts"],"sourcesContent":["/**\n * Utility functions for mammoth package.\n *\n * @module mammoth/util\n */\n\n/**\n * Convert a ReadableStream<Uint8Array> to a Buffer.\n */\nexport async function streamToBuffer(\n stream: ReadableStream<Uint8Array>\n): Promise<Buffer> {\n const reader = stream.getReader()\n const chunks: Uint8Array[] = []\n\n try {\n while (true) {\n const { done, value } = await reader.read()\n if (done) break\n chunks.push(value)\n }\n return Buffer.concat(chunks)\n } finally {\n reader.releaseLock()\n }\n}\n","/**\n * DOCX to HTML conversion transform for Origins.\n *\n * @module mammoth/convert\n */\n\nimport mammoth from 'mammoth'\nimport type { TransformAst, TransformImpl } from '@origints/core'\nimport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageHandler,\n} from './options'\nimport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n} from './result'\nimport { streamToBuffer } from './util'\n\n/**\n * Mammoth options type extracted from the library.\n */\ninterface MammothOptions {\n styleMap?: string | string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n convertImage?: MammothImageConverter\n ignoreEmptyParagraphs?: boolean\n idPrefix?: string\n transformDocument?: (element: unknown) => unknown\n}\n\n/**\n * Mammoth image converter (opaque branded type).\n */\ninterface MammothImageConverter {\n __mammothBrand: 'ImageConverter'\n}\n\n/**\n * Mammoth image interface for custom converters.\n */\ninterface MammothImage {\n contentType: string\n readAsBase64String: () => Promise<string>\n readAsBuffer: () => Promise<Buffer>\n readAsArrayBuffer: () => Promise<ArrayBuffer>\n}\n\n/**\n * Creates a TransformAst for converting DOCX to HTML.\n *\n * @example\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').string()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Subtitle'] => h2.document-subtitle\",\n * ],\n * idPrefix: 'doc-',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').string()))\n * .compile()\n * ```\n */\nexport function docxToHtml(options?: DocxToHtmlOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n args: serializeOptions(options),\n }\n}\n\n/**\n * Creates a TransformAst for extracting raw text from DOCX.\n *\n * @example\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToText())\n * .emit((out, $) => out.add('text', $.get('text').string()))\n * .compile()\n * ```\n */\nexport function docxToText(options?: DocxToTextOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToText',\n args: options,\n }\n}\n\n/**\n * Transform implementation for docxToHtml.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToHtmlImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n\n async execute(input: unknown, args?: unknown): Promise<DocxConversionResult> {\n const options = deserializeOptions(\n args as SerializedDocxOptions | undefined\n )\n const buffer = await toBuffer(input)\n const mammothOptions = toMammothOptions(options)\n\n const result = await mammoth.convertToHtml({ buffer }, mammothOptions)\n\n return {\n html: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n/**\n * Transform implementation for docxToText.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToTextImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToText',\n\n async execute(input: unknown): Promise<DocxTextResult> {\n const buffer = await toBuffer(input)\n\n // Note: extractRawText doesn't accept options in mammoth's API\n const result = await mammoth.extractRawText({ buffer })\n\n return {\n text: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n// ---------------------------------------------------------------------------\n// Internal helpers\n// ---------------------------------------------------------------------------\n\n/**\n * Serialized options that can be stored in TransformAst.args.\n * Function handlers are converted to string identifiers.\n */\ninterface SerializedDocxOptions {\n styleMap?: string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n idPrefix?: string\n imageHandling?: 'inline' | 'omit'\n preserveEmptyParagraphs?: boolean\n // Note: transformDocument and custom imageHandling functions cannot be serialized\n}\n\n/**\n * Serialize options for storage in TransformAst.\n * Custom functions cannot be serialized and are dropped.\n */\nfunction serializeOptions(\n options?: DocxToHtmlOptions\n): SerializedDocxOptions | undefined {\n if (!options) return undefined\n\n const serialized: SerializedDocxOptions = {}\n\n if (options.styleMap) {\n serialized.styleMap = options.styleMap\n }\n if (options.includeEmbeddedStyleMap !== undefined) {\n serialized.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n if (options.includeDefaultStyleMap !== undefined) {\n serialized.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n if (options.idPrefix) {\n serialized.idPrefix = options.idPrefix\n }\n if (options.imageHandling === 'inline' || options.imageHandling === 'omit') {\n serialized.imageHandling = options.imageHandling\n }\n if (options.preserveEmptyParagraphs !== undefined) {\n serialized.preserveEmptyParagraphs = options.preserveEmptyParagraphs\n }\n\n return Object.keys(serialized).length > 0 ? serialized : undefined\n}\n\n/**\n * Deserialize options from TransformAst.args.\n */\nfunction deserializeOptions(\n serialized?: SerializedDocxOptions\n): DocxToHtmlOptions {\n if (!serialized) return {}\n return serialized\n}\n\n/**\n * Convert input to Buffer.\n */\nasync function toBuffer(input: unknown): Promise<Buffer> {\n if (Buffer.isBuffer(input)) {\n return input\n }\n if (input instanceof ReadableStream) {\n return streamToBuffer(input as ReadableStream<Uint8Array>)\n }\n throw new Error(\n `docxToHtml expects Buffer or ReadableStream input, got ${typeof input}`\n )\n}\n\n/**\n * Convert our options to mammoth options.\n */\nfunction toMammothOptions(options: DocxToHtmlOptions): MammothOptions {\n const mammothOpts: MammothOptions = {}\n\n if (options.styleMap) {\n mammothOpts.styleMap = options.styleMap\n }\n\n if (options.includeEmbeddedStyleMap !== undefined) {\n mammothOpts.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n\n if (options.includeDefaultStyleMap !== undefined) {\n mammothOpts.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n\n if (options.idPrefix) {\n mammothOpts.idPrefix = options.idPrefix\n }\n\n if (options.preserveEmptyParagraphs === false) {\n mammothOpts.ignoreEmptyParagraphs = true\n } else if (options.preserveEmptyParagraphs === true) {\n mammothOpts.ignoreEmptyParagraphs = false\n }\n\n if (options.imageHandling) {\n mammothOpts.convertImage = createImageConverter(options.imageHandling)\n }\n\n if (options.transformDocument) {\n mammothOpts.transformDocument = options.transformDocument\n }\n\n return mammothOpts\n}\n\n/**\n * Create a mammoth image converter from our options.\n */\nfunction createImageConverter(\n handling: 'inline' | 'omit' | MammothImageHandler\n): MammothImageConverter {\n if (handling === 'omit') {\n return mammoth.images.imgElement(() => Promise.resolve({ src: '' }))\n }\n\n if (handling === 'inline') {\n // Use default mammoth behavior (base64 inline)\n return mammoth.images.imgElement((image: MammothImage) =>\n image.readAsBase64String().then(data => ({\n src: `data:${image.contentType};base64,${data}`,\n }))\n )\n }\n\n // Custom handler - adapt our interface to mammoth's\n return mammoth.images.imgElement(async (image: MammothImage) => {\n // Adapt mammoth's Image to our MammothImageElement interface\n const adaptedImage = {\n contentType: image.contentType,\n read: ((encoding: 'base64' | 'buffer') => {\n if (encoding === 'base64') {\n return image.readAsBase64String()\n }\n return image.readAsArrayBuffer()\n }) as {\n (encoding: 'base64'): Promise<string>\n (encoding: 'buffer'): Promise<ArrayBuffer>\n },\n }\n\n const result = await handling(adaptedImage)\n if (result === null) {\n return { src: '' }\n }\n return result\n })\n}\n\n/**\n * Convert mammoth message to our message type.\n */\nfunction toMammothMessage(msg: {\n type: string\n message: string\n}): MammothMessage {\n return {\n type: msg.type === 'error' ? 'error' : 'warning',\n message: msg.message,\n }\n}\n","/**\n * @origints/mammoth - DOCX to HTML conversion for Origins using mammoth.js\n *\n * This package provides transforms for converting Word documents (.docx) to HTML.\n * It wraps the mammoth.js library and exposes all its conversion options.\n *\n * @packageDocumentation\n *\n * @example Basic usage\n * ```ts\n * import { Planner, loadFile } from '@origints/core'\n * import { docxToHtml } from '@origints/mammoth'\n *\n * // Create a plan\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').string()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Heading 1'] => h1\",\n * \"p[style-name='Heading 2'] => h2\",\n * ],\n * idPrefix: 'doc-',\n * imageHandling: 'omit',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').string()))\n * .compile()\n * ```\n */\n\n// Re-export option types\nexport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageElement,\n MammothImageHandler,\n MammothImageResult,\n} from './options'\n\n// Re-export result types\nexport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n MammothMessageType,\n} from './result'\n\n// Re-export transform creators and implementations\nexport {\n docxToHtml,\n docxToText,\n docxToHtmlImpl,\n docxToTextImpl,\n} from './convert'\n\n// Re-export utilities\nexport { streamToBuffer } from './util'\n\n// ---------------------------------------------------------------------------\n// Auto-registration of transforms\n// ---------------------------------------------------------------------------\n\nimport { globalRegistry } from '@origints/core'\nimport { docxToHtmlImpl, docxToTextImpl } from './convert'\n\n/**\n * Register the mammoth transforms with a registry.\n * Call this to enable docxToHtml() and docxToText() in your plans.\n *\n * @example\n * ```ts\n * import { globalRegistry } from '@origints/core'\n * import { registerMammothTransforms } from '@origints/mammoth'\n *\n * registerMammothTransforms(globalRegistry)\n * ```\n */\nexport function registerMammothTransforms(registry: {\n register(impl: {\n namespace: string\n name: string\n execute: (...args: unknown[]) => unknown\n }): void\n}): void {\n registry.register(docxToHtmlImpl)\n registry.register(docxToTextImpl)\n}\n\n// Auto-register transforms\nregisterMammothTransforms(globalRegistry)\n"],"names":["streamToBuffer","stream","reader","chunks","done","value","docxToHtml","options","serializeOptions","docxToText","docxToHtmlImpl","input","args","deserializeOptions","buffer","toBuffer","mammothOptions","toMammothOptions","result","mammoth","toMammothMessage","docxToTextImpl","serialized","mammothOpts","createImageConverter","handling","image","data","adaptedImage","encoding","msg","registerMammothTransforms","registry","globalRegistry"],"mappings":";;AASA,eAAsBA,EACpBC,GACiB;AACjB,QAAMC,IAASD,EAAO,UAAA,GAChBE,IAAuB,CAAA;AAE7B,MAAI;AACF,eAAa;AACX,YAAM,EAAE,MAAAC,GAAM,OAAAC,EAAA,IAAU,MAAMH,EAAO,KAAA;AACrC,UAAIE,EAAM;AACV,MAAAD,EAAO,KAAKE,CAAK;AAAA,IACnB;AACA,WAAO,OAAO,OAAOF,CAAM;AAAA,EAC7B,UAAA;AACE,IAAAD,EAAO,YAAA;AAAA,EACT;AACF;ACkDO,SAASI,EAAWC,GAA2C;AACpE,SAAO;AAAA,IACL,MAAM;AAAA,IACN,WAAW;AAAA,IACX,MAAM;AAAA,IACN,MAAMC,EAAiBD,CAAO;AAAA,EAAA;AAElC;AAaO,SAASE,EAAWF,GAA2C;AACpE,SAAO;AAAA,IACL,MAAM;AAAA,IACN,WAAW;AAAA,IACX,MAAM;AAAA,IACN,MAAMA;AAAA,EAAA;AAEV;AAOO,MAAMG,IAAgC;AAAA,EAC3C,WAAW;AAAA,EACX,MAAM;AAAA,EAEN,MAAM,QAAQC,GAAgBC,GAA+C;AAC3E,UAAML,IAAUM;AAAA,MACdD;AAAA,IAAA,GAEIE,IAAS,MAAMC,EAASJ,CAAK,GAC7BK,IAAiBC,EAAiBV,CAAO,GAEzCW,IAAS,MAAMC,EAAQ,cAAc,EAAE,QAAAL,EAAA,GAAUE,CAAc;AAErE,WAAO;AAAA,MACL,MAAME,EAAO;AAAA,MACb,UAAUA,EAAO,SAAS,IAAIE,CAAgB;AAAA,IAAA;AAAA,EAElD;AACF,GAOaC,IAAgC;AAAA,EAC3C,WAAW;AAAA,EACX,MAAM;AAAA,EAEN,MAAM,QAAQV,GAAyC;AACrD,UAAMG,IAAS,MAAMC,EAASJ,CAAK,GAG7BO,IAAS,MAAMC,EAAQ,eAAe,EAAE,QAAAL,GAAQ;AAEtD,WAAO;AAAA,MACL,MAAMI,EAAO;AAAA,MACb,UAAUA,EAAO,SAAS,IAAIE,CAAgB;AAAA,IAAA;AAAA,EAElD;AACF;AAwBA,SAASZ,EACPD,GACmC;AACnC,MAAI,CAACA,EAAS;AAEd,QAAMe,IAAoC,CAAA;AAE1C,SAAIf,EAAQ,aACVe,EAAW,WAAWf,EAAQ,WAE5BA,EAAQ,4BAA4B,WACtCe,EAAW,0BAA0Bf,EAAQ,0BAE3CA,EAAQ,2BAA2B,WACrCe,EAAW,yBAAyBf,EAAQ,yBAE1CA,EAAQ,aACVe,EAAW,WAAWf,EAAQ,YAE5BA,EAAQ,kBAAkB,YAAYA,EAAQ,kBAAkB,YAClEe,EAAW,gBAAgBf,EAAQ,gBAEjCA,EAAQ,4BAA4B,WACtCe,EAAW,0BAA0Bf,EAAQ,0BAGxC,OAAO,KAAKe,CAAU,EAAE,SAAS,IAAIA,IAAa;AAC3D;AAKA,SAAST,EACPS,GACmB;AACnB,SAAKA,KAAmB,CAAA;AAE1B;AAKA,eAAeP,EAASJ,GAAiC;AACvD,MAAI,OAAO,SAASA,CAAK;AACvB,WAAOA;AAET,MAAIA,aAAiB;AACnB,WAAOX,EAAeW,CAAmC;AAE3D,QAAM,IAAI;AAAA,IACR,0DAA0D,OAAOA,CAAK;AAAA,EAAA;AAE1E;AAKA,SAASM,EAAiBV,GAA4C;AACpE,QAAMgB,IAA8B,CAAA;AAEpC,SAAIhB,EAAQ,aACVgB,EAAY,WAAWhB,EAAQ,WAG7BA,EAAQ,4BAA4B,WACtCgB,EAAY,0BAA0BhB,EAAQ,0BAG5CA,EAAQ,2BAA2B,WACrCgB,EAAY,yBAAyBhB,EAAQ,yBAG3CA,EAAQ,aACVgB,EAAY,WAAWhB,EAAQ,WAG7BA,EAAQ,4BAA4B,KACtCgB,EAAY,wBAAwB,KAC3BhB,EAAQ,4BAA4B,OAC7CgB,EAAY,wBAAwB,KAGlChB,EAAQ,kBACVgB,EAAY,eAAeC,EAAqBjB,EAAQ,aAAa,IAGnEA,EAAQ,sBACVgB,EAAY,oBAAoBhB,EAAQ,oBAGnCgB;AACT;AAKA,SAASC,EACPC,GACuB;AACvB,SAAIA,MAAa,SACRN,EAAQ,OAAO,WAAW,MAAM,QAAQ,QAAQ,EAAE,KAAK,GAAA,CAAI,CAAC,IAGjEM,MAAa,WAERN,EAAQ,OAAO;AAAA,IAAW,CAACO,MAChCA,EAAM,mBAAA,EAAqB,KAAK,CAAAC,OAAS;AAAA,MACvC,KAAK,QAAQD,EAAM,WAAW,WAAWC,CAAI;AAAA,IAAA,EAC7C;AAAA,EAAA,IAKCR,EAAQ,OAAO,WAAW,OAAOO,MAAwB;AAE9D,UAAME,IAAe;AAAA,MACnB,aAAaF,EAAM;AAAA,MACnB,OAAO,CAACG,MACFA,MAAa,WACRH,EAAM,mBAAA,IAERA,EAAM,kBAAA;AAAA,IACf,GAMIR,IAAS,MAAMO,EAASG,CAAY;AAC1C,WAAIV,MAAW,OACN,EAAE,KAAK,GAAA,IAETA;AAAA,EACT,CAAC;AACH;AAKA,SAASE,EAAiBU,GAGP;AACjB,SAAO;AAAA,IACL,MAAMA,EAAI,SAAS,UAAU,UAAU;AAAA,IACvC,SAASA,EAAI;AAAA,EAAA;AAEjB;AC5OO,SAASC,EAA0BC,GAMjC;AACP,EAAAA,EAAS,SAAStB,CAAc,GAChCsB,EAAS,SAASX,CAAc;AAClC;AAGAU,EAA0BE,CAAc;"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@origints/mammoth",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.2",
|
|
4
4
|
"description": "DOCX to HTML conversion for Origins using mammoth.js",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
|
@@ -27,11 +27,30 @@
|
|
|
27
27
|
"lint": "eslint \"{src,tests}/**/*.{ts,tsx}\" --max-warnings 0",
|
|
28
28
|
"typecheck": "tsc -p tsconfig.json --noEmit"
|
|
29
29
|
},
|
|
30
|
+
"repository": {
|
|
31
|
+
"type": "git",
|
|
32
|
+
"url": "https://github.com/fponticelli/origints.git",
|
|
33
|
+
"directory": "packages/mammoth"
|
|
34
|
+
},
|
|
35
|
+
"homepage": "https://origints.dev",
|
|
36
|
+
"bugs": "https://github.com/fponticelli/origints/issues",
|
|
37
|
+
"keywords": [
|
|
38
|
+
"origints",
|
|
39
|
+
"data-extraction",
|
|
40
|
+
"lineage",
|
|
41
|
+
"provenance",
|
|
42
|
+
"docx",
|
|
43
|
+
"word",
|
|
44
|
+
"document-conversion"
|
|
45
|
+
],
|
|
46
|
+
"engines": {
|
|
47
|
+
"node": ">=18"
|
|
48
|
+
},
|
|
30
49
|
"dependencies": {
|
|
31
50
|
"mammoth": "^1.11.0"
|
|
32
51
|
},
|
|
33
52
|
"peerDependencies": {
|
|
34
|
-
"@origints/core": "^0.
|
|
53
|
+
"@origints/core": "^0.3.0"
|
|
35
54
|
},
|
|
36
55
|
"devDependencies": {
|
|
37
56
|
"@origints/core": "workspace:*",
|