@origints/mammoth 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +149 -0
- package/dist/convert.d.ts +6 -6
- package/dist/index.cjs +1 -1
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +5 -8
- package/dist/index.es.js +21 -19
- package/dist/index.es.js.map +1 -1
- package/package.json +22 -3
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Franco Ponticelli
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# @origints/mammoth
|
|
2
|
+
|
|
3
|
+
> DOCX to HTML/text conversion for Origins using mammoth.js.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- Convert DOCX to semantic HTML
|
|
10
|
+
- Convert DOCX to plain text
|
|
11
|
+
- Custom style mapping for headings, lists, and more
|
|
12
|
+
- Configurable image handling
|
|
13
|
+
- Conversion warnings and messages
|
|
14
|
+
- Integrates with Origins transform registry
|
|
15
|
+
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
npm install @origints/mammoth @origints/core
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Usage with Planner
|
|
27
|
+
|
|
28
|
+
### Convert a DOCX file and extract the HTML
|
|
29
|
+
|
|
30
|
+
```ts
|
|
31
|
+
import { Planner, loadFile, run } from '@origints/core'
|
|
32
|
+
import { docxToHtml } from '@origints/mammoth'
|
|
33
|
+
|
|
34
|
+
const plan = new Planner()
|
|
35
|
+
.in(loadFile('document.docx'))
|
|
36
|
+
.mapIn(docxToHtml())
|
|
37
|
+
.emit((out, $) => out.add('html', $.get('html').string()))
|
|
38
|
+
.compile()
|
|
39
|
+
|
|
40
|
+
const result = await run(plan, { readFile, registry })
|
|
41
|
+
// result.value: { html: '<h1>Title</h1><p>Content...</p>' }
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### Convert with custom style mapping
|
|
45
|
+
|
|
46
|
+
```ts
|
|
47
|
+
import { docxToHtml } from '@origints/mammoth'
|
|
48
|
+
|
|
49
|
+
const plan = new Planner()
|
|
50
|
+
.in(loadFile('report.docx'))
|
|
51
|
+
.mapIn(
|
|
52
|
+
docxToHtml({
|
|
53
|
+
styleMap: [
|
|
54
|
+
"p[style-name='Title'] => h1.document-title",
|
|
55
|
+
"p[style-name='Heading 1'] => h1",
|
|
56
|
+
"p[style-name='Heading 2'] => h2",
|
|
57
|
+
"p[style-name='Quote'] => blockquote",
|
|
58
|
+
],
|
|
59
|
+
idPrefix: 'doc-',
|
|
60
|
+
})
|
|
61
|
+
)
|
|
62
|
+
.emit((out, $) => out.add('content', $.get('html').string()))
|
|
63
|
+
.compile()
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Extract plain text from a DOCX file
|
|
67
|
+
|
|
68
|
+
```ts
|
|
69
|
+
import { docxToText } from '@origints/mammoth'
|
|
70
|
+
|
|
71
|
+
const plan = new Planner()
|
|
72
|
+
.in(loadFile('document.docx'))
|
|
73
|
+
.mapIn(docxToText())
|
|
74
|
+
.emit((out, $) => out.add('text', $.get('text').string()))
|
|
75
|
+
.compile()
|
|
76
|
+
|
|
77
|
+
const result = await run(plan, { readFile, registry })
|
|
78
|
+
// result.value: { text: 'Document Title\nContent here...' }
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Combine DOCX with other sources
|
|
82
|
+
|
|
83
|
+
```ts
|
|
84
|
+
const plan = new Planner()
|
|
85
|
+
.in(loadFile('report.docx'))
|
|
86
|
+
.mapIn(docxToHtml())
|
|
87
|
+
.emit((out, $) => out.add('reportHtml', $.get('html').string()))
|
|
88
|
+
.in(loadFile('metadata.json'))
|
|
89
|
+
.mapIn(parseJson())
|
|
90
|
+
.emit((out, $) =>
|
|
91
|
+
out
|
|
92
|
+
.add('author', $.get('author').string())
|
|
93
|
+
.add('date', $.get('date').string())
|
|
94
|
+
)
|
|
95
|
+
.compile()
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### Standalone usage (without Planner)
|
|
99
|
+
|
|
100
|
+
```ts
|
|
101
|
+
import * as fs from 'fs'
|
|
102
|
+
import { docxToHtmlImpl, docxToTextImpl } from '@origints/mammoth'
|
|
103
|
+
|
|
104
|
+
const buffer = fs.readFileSync('document.docx')
|
|
105
|
+
|
|
106
|
+
// Convert to HTML
|
|
107
|
+
const htmlResult = await docxToHtmlImpl.execute(buffer)
|
|
108
|
+
console.log(htmlResult.html)
|
|
109
|
+
|
|
110
|
+
// Log conversion warnings
|
|
111
|
+
for (const msg of htmlResult.messages) {
|
|
112
|
+
console.warn(msg.message)
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Convert to plain text
|
|
116
|
+
const textResult = await docxToTextImpl.execute(buffer)
|
|
117
|
+
console.log(textResult.text)
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### Image handling
|
|
121
|
+
|
|
122
|
+
```ts
|
|
123
|
+
import { docxToHtml } from '@origints/mammoth'
|
|
124
|
+
|
|
125
|
+
// Omit images
|
|
126
|
+
const plan = new Planner()
|
|
127
|
+
.in(loadFile('document.docx'))
|
|
128
|
+
.mapIn(docxToHtml({ imageHandling: 'omit' }))
|
|
129
|
+
.emit((out, $) => out.add('html', $.get('html').string()))
|
|
130
|
+
.compile()
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
## API
|
|
136
|
+
|
|
137
|
+
| Export | Description |
|
|
138
|
+
| ------------------------------------- | -------------------------------------------------- |
|
|
139
|
+
| `docxToHtml(options?)` | Create a transform AST for HTML conversion |
|
|
140
|
+
| `docxToText(options?)` | Create a transform AST for text conversion |
|
|
141
|
+
| `docxToHtmlImpl` | Async transform implementation for HTML conversion |
|
|
142
|
+
| `docxToTextImpl` | Async transform implementation for text conversion |
|
|
143
|
+
| `registerMammothTransforms(registry)` | Register all mammoth transforms with a registry |
|
|
144
|
+
|
|
145
|
+
---
|
|
146
|
+
|
|
147
|
+
## License
|
|
148
|
+
|
|
149
|
+
MIT
|
package/dist/convert.d.ts
CHANGED
|
@@ -5,15 +5,15 @@ import { DocxToHtmlOptions, DocxToTextOptions } from './options';
|
|
|
5
5
|
*
|
|
6
6
|
* @example
|
|
7
7
|
* ```ts
|
|
8
|
-
* const plan = Planner.in(loadFile('document.docx'))
|
|
8
|
+
* const plan = new Planner().in(loadFile('document.docx'))
|
|
9
9
|
* .mapIn(docxToHtml())
|
|
10
|
-
* .emit((out, $) => out.add('html', $.get('html').
|
|
10
|
+
* .emit((out, $) => out.add('html', $.get('html').string()))
|
|
11
11
|
* .compile()
|
|
12
12
|
* ```
|
|
13
13
|
*
|
|
14
14
|
* @example With custom style mapping
|
|
15
15
|
* ```ts
|
|
16
|
-
* const plan = Planner.in(loadFile('document.docx'))
|
|
16
|
+
* const plan = new Planner().in(loadFile('document.docx'))
|
|
17
17
|
* .mapIn(docxToHtml({
|
|
18
18
|
* styleMap: [
|
|
19
19
|
* "p[style-name='Title'] => h1.document-title",
|
|
@@ -21,7 +21,7 @@ import { DocxToHtmlOptions, DocxToTextOptions } from './options';
|
|
|
21
21
|
* ],
|
|
22
22
|
* idPrefix: 'doc-',
|
|
23
23
|
* }))
|
|
24
|
-
* .emit((out, $) => out.add('content', $.get('html').
|
|
24
|
+
* .emit((out, $) => out.add('content', $.get('html').string()))
|
|
25
25
|
* .compile()
|
|
26
26
|
* ```
|
|
27
27
|
*/
|
|
@@ -31,9 +31,9 @@ export declare function docxToHtml(options?: DocxToHtmlOptions): TransformAst;
|
|
|
31
31
|
*
|
|
32
32
|
* @example
|
|
33
33
|
* ```ts
|
|
34
|
-
* const plan = Planner.in(loadFile('document.docx'))
|
|
34
|
+
* const plan = new Planner().in(loadFile('document.docx'))
|
|
35
35
|
* .mapIn(docxToText())
|
|
36
|
-
* .emit((out, $) => out.add('text', $.get('text').
|
|
36
|
+
* .emit((out, $) => out.add('text', $.get('text').string()))
|
|
37
37
|
* .compile()
|
|
38
38
|
* ```
|
|
39
39
|
*/
|
package/dist/index.cjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
"use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const m=require("mammoth");async function s(e){const r=e.getReader(),t=[];try{for(;;){const{done:a,value:n}=await r.read();if(a)break;t.push(n)}return Buffer.concat(t)}finally{r.releaseLock()}}function
|
|
1
|
+
"use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const m=require("mammoth"),o=require("@origints/core");async function s(e){const r=e.getReader(),t=[];try{for(;;){const{done:a,value:n}=await r.read();if(a)break;t.push(n)}return Buffer.concat(t)}finally{r.releaseLock()}}function g(e){return{kind:"transform",namespace:"@origints/mammoth",name:"docxToHtml",args:p(e)}}function y(e){return{kind:"transform",namespace:"@origints/mammoth",name:"docxToText",args:e}}const c={namespace:"@origints/mammoth",name:"docxToHtml",async execute(e,r){const t=x(r),a=await d(e),n=T(t),i=await m.convertToHtml({buffer:a},n);return{html:i.value,messages:i.messages.map(f)}}},l={namespace:"@origints/mammoth",name:"docxToText",async execute(e){const r=await d(e),t=await m.extractRawText({buffer:r});return{text:t.value,messages:t.messages.map(f)}}};function p(e){if(!e)return;const r={};return e.styleMap&&(r.styleMap=e.styleMap),e.includeEmbeddedStyleMap!==void 0&&(r.includeEmbeddedStyleMap=e.includeEmbeddedStyleMap),e.includeDefaultStyleMap!==void 0&&(r.includeDefaultStyleMap=e.includeDefaultStyleMap),e.idPrefix&&(r.idPrefix=e.idPrefix),(e.imageHandling==="inline"||e.imageHandling==="omit")&&(r.imageHandling=e.imageHandling),e.preserveEmptyParagraphs!==void 0&&(r.preserveEmptyParagraphs=e.preserveEmptyParagraphs),Object.keys(r).length>0?r:void 0}function x(e){return e||{}}async function d(e){if(Buffer.isBuffer(e))return e;if(e instanceof ReadableStream)return s(e);throw new Error(`docxToHtml expects Buffer or ReadableStream input, got ${typeof e}`)}function T(e){const r={};return e.styleMap&&(r.styleMap=e.styleMap),e.includeEmbeddedStyleMap!==void 0&&(r.includeEmbeddedStyleMap=e.includeEmbeddedStyleMap),e.includeDefaultStyleMap!==void 0&&(r.includeDefaultStyleMap=e.includeDefaultStyleMap),e.idPrefix&&(r.idPrefix=e.idPrefix),e.preserveEmptyParagraphs===!1?r.ignoreEmptyParagraphs=!0:e.preserveEmptyParagraphs===!0&&(r.ignoreEmptyParagraphs=!1),e.imageHandling&&(r.convertImage=M(e.imageHandling)),e.transformDocument&&(r.transformDocument=e.transformDocument),r}function M(e){return e==="omit"?m.images.imgElement(()=>Promise.resolve({src:""})):e==="inline"?m.images.imgElement(r=>r.readAsBase64String().then(t=>({src:`data:${r.contentType};base64,${t}`}))):m.images.imgElement(async r=>{const t={contentType:r.contentType,read:(n=>n==="base64"?r.readAsBase64String():r.readAsArrayBuffer())},a=await e(t);return a===null?{src:""}:a})}function f(e){return{type:e.type==="error"?"error":"warning",message:e.message}}function u(e){e.register(c),e.register(l)}u(o.globalRegistry);exports.docxToHtml=g;exports.docxToHtmlImpl=c;exports.docxToText=y;exports.docxToTextImpl=l;exports.registerMammothTransforms=u;exports.streamToBuffer=s;
|
|
2
2
|
//# sourceMappingURL=index.cjs.map
|
package/dist/index.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.cjs","sources":["../src/util.ts","../src/convert.ts","../src/index.ts"],"sourcesContent":["/**\n * Utility functions for mammoth package.\n *\n * @module mammoth/util\n */\n\n/**\n * Convert a ReadableStream<Uint8Array> to a Buffer.\n */\nexport async function streamToBuffer(\n stream: ReadableStream<Uint8Array>\n): Promise<Buffer> {\n const reader = stream.getReader()\n const chunks: Uint8Array[] = []\n\n try {\n while (true) {\n const { done, value } = await reader.read()\n if (done) break\n chunks.push(value)\n }\n return Buffer.concat(chunks)\n } finally {\n reader.releaseLock()\n }\n}\n","/**\n * DOCX to HTML conversion transform for Origins.\n *\n * @module mammoth/convert\n */\n\nimport mammoth from 'mammoth'\nimport type { TransformAst, TransformImpl } from '@origints/core'\nimport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageHandler,\n} from './options'\nimport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n} from './result'\nimport { streamToBuffer } from './util'\n\n/**\n * Mammoth options type extracted from the library.\n */\ninterface MammothOptions {\n styleMap?: string | string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n convertImage?: MammothImageConverter\n ignoreEmptyParagraphs?: boolean\n idPrefix?: string\n transformDocument?: (element: unknown) => unknown\n}\n\n/**\n * Mammoth image converter (opaque branded type).\n */\ninterface MammothImageConverter {\n __mammothBrand: 'ImageConverter'\n}\n\n/**\n * Mammoth image interface for custom converters.\n */\ninterface MammothImage {\n contentType: string\n readAsBase64String: () => Promise<string>\n readAsBuffer: () => Promise<Buffer>\n readAsArrayBuffer: () => Promise<ArrayBuffer>\n}\n\n/**\n * Creates a TransformAst for converting DOCX to HTML.\n *\n * @example\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').asString()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Subtitle'] => h2.document-subtitle\",\n * ],\n * idPrefix: 'doc-',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').asString()))\n * .compile()\n * ```\n */\nexport function docxToHtml(options?: DocxToHtmlOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n args: serializeOptions(options),\n }\n}\n\n/**\n * Creates a TransformAst for extracting raw text from DOCX.\n *\n * @example\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToText())\n * .emit((out, $) => out.add('text', $.get('text').asString()))\n * .compile()\n * ```\n */\nexport function docxToText(options?: DocxToTextOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToText',\n args: options,\n }\n}\n\n/**\n * Transform implementation for docxToHtml.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToHtmlImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n\n async execute(\n input: unknown,\n args?: unknown\n ): Promise<DocxConversionResult> {\n const options = deserializeOptions(\n args as SerializedDocxOptions | undefined\n )\n const buffer = await toBuffer(input)\n const mammothOptions = toMammothOptions(options)\n\n const result = await mammoth.convertToHtml({ buffer }, mammothOptions)\n\n return {\n html: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n/**\n * Transform implementation for docxToText.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToTextImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToText',\n\n async execute(input: unknown): Promise<DocxTextResult> {\n const buffer = await toBuffer(input)\n\n // Note: extractRawText doesn't accept options in mammoth's API\n const result = await mammoth.extractRawText({ buffer })\n\n return {\n text: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n// ---------------------------------------------------------------------------\n// Internal helpers\n// ---------------------------------------------------------------------------\n\n/**\n * Serialized options that can be stored in TransformAst.args.\n * Function handlers are converted to string identifiers.\n */\ninterface SerializedDocxOptions {\n styleMap?: string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n idPrefix?: string\n imageHandling?: 'inline' | 'omit'\n preserveEmptyParagraphs?: boolean\n // Note: transformDocument and custom imageHandling functions cannot be serialized\n}\n\n/**\n * Serialize options for storage in TransformAst.\n * Custom functions cannot be serialized and are dropped.\n */\nfunction serializeOptions(\n options?: DocxToHtmlOptions\n): SerializedDocxOptions | undefined {\n if (!options) return undefined\n\n const serialized: SerializedDocxOptions = {}\n\n if (options.styleMap) {\n serialized.styleMap = options.styleMap\n }\n if (options.includeEmbeddedStyleMap !== undefined) {\n serialized.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n if (options.includeDefaultStyleMap !== undefined) {\n serialized.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n if (options.idPrefix) {\n serialized.idPrefix = options.idPrefix\n }\n if (\n options.imageHandling === 'inline' ||\n options.imageHandling === 'omit'\n ) {\n serialized.imageHandling = options.imageHandling\n }\n if (options.preserveEmptyParagraphs !== undefined) {\n serialized.preserveEmptyParagraphs = options.preserveEmptyParagraphs\n }\n\n return Object.keys(serialized).length > 0 ? serialized : undefined\n}\n\n/**\n * Deserialize options from TransformAst.args.\n */\nfunction deserializeOptions(\n serialized?: SerializedDocxOptions\n): DocxToHtmlOptions {\n if (!serialized) return {}\n return serialized\n}\n\n/**\n * Convert input to Buffer.\n */\nasync function toBuffer(input: unknown): Promise<Buffer> {\n if (Buffer.isBuffer(input)) {\n return input\n }\n if (input instanceof ReadableStream) {\n return streamToBuffer(input as ReadableStream<Uint8Array>)\n }\n throw new Error(\n `docxToHtml expects Buffer or ReadableStream input, got ${typeof input}`\n )\n}\n\n/**\n * Convert our options to mammoth options.\n */\nfunction toMammothOptions(options: DocxToHtmlOptions): MammothOptions {\n const mammothOpts: MammothOptions = {}\n\n if (options.styleMap) {\n mammothOpts.styleMap = options.styleMap\n }\n\n if (options.includeEmbeddedStyleMap !== undefined) {\n mammothOpts.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n\n if (options.includeDefaultStyleMap !== undefined) {\n mammothOpts.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n\n if (options.idPrefix) {\n mammothOpts.idPrefix = options.idPrefix\n }\n\n if (options.preserveEmptyParagraphs === false) {\n mammothOpts.ignoreEmptyParagraphs = true\n } else if (options.preserveEmptyParagraphs === true) {\n mammothOpts.ignoreEmptyParagraphs = false\n }\n\n if (options.imageHandling) {\n mammothOpts.convertImage = createImageConverter(options.imageHandling)\n }\n\n if (options.transformDocument) {\n mammothOpts.transformDocument = options.transformDocument\n }\n\n return mammothOpts\n}\n\n/**\n * Create a mammoth image converter from our options.\n */\nfunction createImageConverter(\n handling: 'inline' | 'omit' | MammothImageHandler\n): MammothImageConverter {\n if (handling === 'omit') {\n return mammoth.images.imgElement(() => Promise.resolve({ src: '' }))\n }\n\n if (handling === 'inline') {\n // Use default mammoth behavior (base64 inline)\n return mammoth.images.imgElement((image: MammothImage) =>\n image.readAsBase64String().then(data => ({\n src: `data:${image.contentType};base64,${data}`,\n }))\n )\n }\n\n // Custom handler - adapt our interface to mammoth's\n return mammoth.images.imgElement(async (image: MammothImage) => {\n // Adapt mammoth's Image to our MammothImageElement interface\n const adaptedImage = {\n contentType: image.contentType,\n read: ((encoding: 'base64' | 'buffer') => {\n if (encoding === 'base64') {\n return image.readAsBase64String()\n }\n return image.readAsArrayBuffer()\n }) as {\n (encoding: 'base64'): Promise<string>\n (encoding: 'buffer'): Promise<ArrayBuffer>\n },\n }\n\n const result = await handling(adaptedImage)\n if (result === null) {\n return { src: '' }\n }\n return result\n })\n}\n\n/**\n * Convert mammoth message to our message type.\n */\nfunction toMammothMessage(msg: { type: string; message: string }): MammothMessage {\n return {\n type: msg.type === 'error' ? 'error' : 'warning',\n message: msg.message,\n }\n}\n","/**\n * @origints/mammoth - DOCX to HTML conversion for Origins using mammoth.js\n *\n * This package provides transforms for converting Word documents (.docx) to HTML.\n * It wraps the mammoth.js library and exposes all its conversion options.\n *\n * @packageDocumentation\n *\n * @example Basic usage\n * ```ts\n * import { Planner, loadFile } from '@origints/core'\n * import { docxToHtml, registerMammothTransforms } from '@origints/mammoth'\n *\n * // Register transforms\n * registerMammothTransforms(globalRegistry)\n *\n * // Create a plan\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').asString()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Heading 1'] => h1\",\n * \"p[style-name='Heading 2'] => h2\",\n * ],\n * idPrefix: 'doc-',\n * imageHandling: 'omit',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').asString()))\n * .compile()\n * ```\n */\n\n// Re-export option types\nexport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageElement,\n MammothImageHandler,\n MammothImageResult,\n} from './options'\n\n// Re-export result types\nexport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n MammothMessageType,\n} from './result'\n\n// Re-export transform creators and implementations\nexport {\n docxToHtml,\n docxToText,\n docxToHtmlImpl,\n docxToTextImpl,\n} from './convert'\n\n// Re-export utilities\nexport { streamToBuffer } from './util'\n\n// ---------------------------------------------------------------------------\n// Auto-registration of transforms\n// ---------------------------------------------------------------------------\n\nimport { docxToHtmlImpl, docxToTextImpl } from './convert'\n\n/**\n * Register the mammoth transforms with a registry.\n * Call this to enable docxToHtml() and docxToText() in your plans.\n *\n * @example\n * ```ts\n * import { globalRegistry } from '@origints/core'\n * import { registerMammothTransforms } from '@origints/mammoth'\n *\n * registerMammothTransforms(globalRegistry)\n * ```\n */\nexport function registerMammothTransforms(registry: {\n register(impl: {\n namespace: string\n name: string\n execute: (...args: unknown[]) => unknown\n }): void\n}): void {\n registry.register(docxToHtmlImpl)\n registry.register(docxToTextImpl)\n}\n"],"names":["streamToBuffer","stream","reader","chunks","done","value","docxToHtml","options","serializeOptions","docxToText","docxToHtmlImpl","input","args","deserializeOptions","buffer","toBuffer","mammothOptions","toMammothOptions","result","mammoth","toMammothMessage","docxToTextImpl","serialized","mammothOpts","createImageConverter","handling","image","data","adaptedImage","encoding","msg","registerMammothTransforms","registry"],"mappings":"2GASA,eAAsBA,EACpBC,EACiB,CACjB,MAAMC,EAASD,EAAO,UAAA,EAChBE,EAAuB,CAAA,EAE7B,GAAI,CACF,OAAa,CACX,KAAM,CAAE,KAAAC,EAAM,MAAAC,CAAA,EAAU,MAAMH,EAAO,KAAA,EACrC,GAAIE,EAAM,MACVD,EAAO,KAAKE,CAAK,CACnB,CACA,OAAO,OAAO,OAAOF,CAAM,CAC7B,QAAA,CACED,EAAO,YAAA,CACT,CACF,CCkDO,SAASI,EAAWC,EAA2C,CACpE,MAAO,CACL,KAAM,YACN,UAAW,oBACX,KAAM,aACN,KAAMC,EAAiBD,CAAO,CAAA,CAElC,CAaO,SAASE,EAAWF,EAA2C,CACpE,MAAO,CACL,KAAM,YACN,UAAW,oBACX,KAAM,aACN,KAAMA,CAAA,CAEV,CAOO,MAAMG,EAAgC,CAC3C,UAAW,oBACX,KAAM,aAEN,MAAM,QACJC,EACAC,EAC+B,CAC/B,MAAML,EAAUM,EACdD,CAAA,EAEIE,EAAS,MAAMC,EAASJ,CAAK,EAC7BK,EAAiBC,EAAiBV,CAAO,EAEzCW,EAAS,MAAMC,EAAQ,cAAc,CAAE,OAAAL,CAAA,EAAUE,CAAc,EAErE,MAAO,CACL,KAAME,EAAO,MACb,SAAUA,EAAO,SAAS,IAAIE,CAAgB,CAAA,CAElD,CACF,EAOaC,EAAgC,CAC3C,UAAW,oBACX,KAAM,aAEN,MAAM,QAAQV,EAAyC,CACrD,MAAMG,EAAS,MAAMC,EAASJ,CAAK,EAG7BO,EAAS,MAAMC,EAAQ,eAAe,CAAE,OAAAL,EAAQ,EAEtD,MAAO,CACL,KAAMI,EAAO,MACb,SAAUA,EAAO,SAAS,IAAIE,CAAgB,CAAA,CAElD,CACF,EAwBA,SAASZ,EACPD,EACmC,CACnC,GAAI,CAACA,EAAS,OAEd,MAAMe,EAAoC,CAAA,EAE1C,OAAIf,EAAQ,WACVe,EAAW,SAAWf,EAAQ,UAE5BA,EAAQ,0BAA4B,SACtCe,EAAW,wBAA0Bf,EAAQ,yBAE3CA,EAAQ,yBAA2B,SACrCe,EAAW,uBAAyBf,EAAQ,wBAE1CA,EAAQ,WACVe,EAAW,SAAWf,EAAQ,WAG9BA,EAAQ,gBAAkB,UAC1BA,EAAQ,gBAAkB,UAE1Be,EAAW,cAAgBf,EAAQ,eAEjCA,EAAQ,0BAA4B,SACtCe,EAAW,wBAA0Bf,EAAQ,yBAGxC,OAAO,KAAKe,CAAU,EAAE,OAAS,EAAIA,EAAa,MAC3D,CAKA,SAAST,EACPS,EACmB,CACnB,OAAKA,GAAmB,CAAA,CAE1B,CAKA,eAAeP,EAASJ,EAAiC,CACvD,GAAI,OAAO,SAASA,CAAK,EACvB,OAAOA,EAET,GAAIA,aAAiB,eACnB,OAAOX,EAAeW,CAAmC,EAE3D,MAAM,IAAI,MACR,0DAA0D,OAAOA,CAAK,EAAA,CAE1E,CAKA,SAASM,EAAiBV,EAA4C,CACpE,MAAMgB,EAA8B,CAAA,EAEpC,OAAIhB,EAAQ,WACVgB,EAAY,SAAWhB,EAAQ,UAG7BA,EAAQ,0BAA4B,SACtCgB,EAAY,wBAA0BhB,EAAQ,yBAG5CA,EAAQ,yBAA2B,SACrCgB,EAAY,uBAAyBhB,EAAQ,wBAG3CA,EAAQ,WACVgB,EAAY,SAAWhB,EAAQ,UAG7BA,EAAQ,0BAA4B,GACtCgB,EAAY,sBAAwB,GAC3BhB,EAAQ,0BAA4B,KAC7CgB,EAAY,sBAAwB,IAGlChB,EAAQ,gBACVgB,EAAY,aAAeC,EAAqBjB,EAAQ,aAAa,GAGnEA,EAAQ,oBACVgB,EAAY,kBAAoBhB,EAAQ,mBAGnCgB,CACT,CAKA,SAASC,EACPC,EACuB,CACvB,OAAIA,IAAa,OACRN,EAAQ,OAAO,WAAW,IAAM,QAAQ,QAAQ,CAAE,IAAK,EAAA,CAAI,CAAC,EAGjEM,IAAa,SAERN,EAAQ,OAAO,WAAYO,GAChCA,EAAM,mBAAA,EAAqB,KAAKC,IAAS,CACvC,IAAK,QAAQD,EAAM,WAAW,WAAWC,CAAI,EAAA,EAC7C,CAAA,EAKCR,EAAQ,OAAO,WAAW,MAAOO,GAAwB,CAE9D,MAAME,EAAe,CACnB,YAAaF,EAAM,YACnB,MAAQG,GACFA,IAAa,SACRH,EAAM,mBAAA,EAERA,EAAM,kBAAA,EACf,EAMIR,EAAS,MAAMO,EAASG,CAAY,EAC1C,OAAIV,IAAW,KACN,CAAE,IAAK,EAAA,EAETA,CACT,CAAC,CACH,CAKA,SAASE,EAAiBU,EAAwD,CAChF,MAAO,CACL,KAAMA,EAAI,OAAS,QAAU,QAAU,UACvC,QAASA,EAAI,OAAA,CAEjB,CC7OO,SAASC,EAA0BC,EAMjC,CACPA,EAAS,SAAStB,CAAc,EAChCsB,EAAS,SAASX,CAAc,CAClC"}
|
|
1
|
+
{"version":3,"file":"index.cjs","sources":["../src/util.ts","../src/convert.ts","../src/index.ts"],"sourcesContent":["/**\n * Utility functions for mammoth package.\n *\n * @module mammoth/util\n */\n\n/**\n * Convert a ReadableStream<Uint8Array> to a Buffer.\n */\nexport async function streamToBuffer(\n stream: ReadableStream<Uint8Array>\n): Promise<Buffer> {\n const reader = stream.getReader()\n const chunks: Uint8Array[] = []\n\n try {\n while (true) {\n const { done, value } = await reader.read()\n if (done) break\n chunks.push(value)\n }\n return Buffer.concat(chunks)\n } finally {\n reader.releaseLock()\n }\n}\n","/**\n * DOCX to HTML conversion transform for Origins.\n *\n * @module mammoth/convert\n */\n\nimport mammoth from 'mammoth'\nimport type { TransformAst, TransformImpl } from '@origints/core'\nimport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageHandler,\n} from './options'\nimport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n} from './result'\nimport { streamToBuffer } from './util'\n\n/**\n * Mammoth options type extracted from the library.\n */\ninterface MammothOptions {\n styleMap?: string | string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n convertImage?: MammothImageConverter\n ignoreEmptyParagraphs?: boolean\n idPrefix?: string\n transformDocument?: (element: unknown) => unknown\n}\n\n/**\n * Mammoth image converter (opaque branded type).\n */\ninterface MammothImageConverter {\n __mammothBrand: 'ImageConverter'\n}\n\n/**\n * Mammoth image interface for custom converters.\n */\ninterface MammothImage {\n contentType: string\n readAsBase64String: () => Promise<string>\n readAsBuffer: () => Promise<Buffer>\n readAsArrayBuffer: () => Promise<ArrayBuffer>\n}\n\n/**\n * Creates a TransformAst for converting DOCX to HTML.\n *\n * @example\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').string()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Subtitle'] => h2.document-subtitle\",\n * ],\n * idPrefix: 'doc-',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').string()))\n * .compile()\n * ```\n */\nexport function docxToHtml(options?: DocxToHtmlOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n args: serializeOptions(options),\n }\n}\n\n/**\n * Creates a TransformAst for extracting raw text from DOCX.\n *\n * @example\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToText())\n * .emit((out, $) => out.add('text', $.get('text').string()))\n * .compile()\n * ```\n */\nexport function docxToText(options?: DocxToTextOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToText',\n args: options,\n }\n}\n\n/**\n * Transform implementation for docxToHtml.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToHtmlImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n\n async execute(input: unknown, args?: unknown): Promise<DocxConversionResult> {\n const options = deserializeOptions(\n args as SerializedDocxOptions | undefined\n )\n const buffer = await toBuffer(input)\n const mammothOptions = toMammothOptions(options)\n\n const result = await mammoth.convertToHtml({ buffer }, mammothOptions)\n\n return {\n html: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n/**\n * Transform implementation for docxToText.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToTextImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToText',\n\n async execute(input: unknown): Promise<DocxTextResult> {\n const buffer = await toBuffer(input)\n\n // Note: extractRawText doesn't accept options in mammoth's API\n const result = await mammoth.extractRawText({ buffer })\n\n return {\n text: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n// ---------------------------------------------------------------------------\n// Internal helpers\n// ---------------------------------------------------------------------------\n\n/**\n * Serialized options that can be stored in TransformAst.args.\n * Function handlers are converted to string identifiers.\n */\ninterface SerializedDocxOptions {\n styleMap?: string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n idPrefix?: string\n imageHandling?: 'inline' | 'omit'\n preserveEmptyParagraphs?: boolean\n // Note: transformDocument and custom imageHandling functions cannot be serialized\n}\n\n/**\n * Serialize options for storage in TransformAst.\n * Custom functions cannot be serialized and are dropped.\n */\nfunction serializeOptions(\n options?: DocxToHtmlOptions\n): SerializedDocxOptions | undefined {\n if (!options) return undefined\n\n const serialized: SerializedDocxOptions = {}\n\n if (options.styleMap) {\n serialized.styleMap = options.styleMap\n }\n if (options.includeEmbeddedStyleMap !== undefined) {\n serialized.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n if (options.includeDefaultStyleMap !== undefined) {\n serialized.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n if (options.idPrefix) {\n serialized.idPrefix = options.idPrefix\n }\n if (options.imageHandling === 'inline' || options.imageHandling === 'omit') {\n serialized.imageHandling = options.imageHandling\n }\n if (options.preserveEmptyParagraphs !== undefined) {\n serialized.preserveEmptyParagraphs = options.preserveEmptyParagraphs\n }\n\n return Object.keys(serialized).length > 0 ? serialized : undefined\n}\n\n/**\n * Deserialize options from TransformAst.args.\n */\nfunction deserializeOptions(\n serialized?: SerializedDocxOptions\n): DocxToHtmlOptions {\n if (!serialized) return {}\n return serialized\n}\n\n/**\n * Convert input to Buffer.\n */\nasync function toBuffer(input: unknown): Promise<Buffer> {\n if (Buffer.isBuffer(input)) {\n return input\n }\n if (input instanceof ReadableStream) {\n return streamToBuffer(input as ReadableStream<Uint8Array>)\n }\n throw new Error(\n `docxToHtml expects Buffer or ReadableStream input, got ${typeof input}`\n )\n}\n\n/**\n * Convert our options to mammoth options.\n */\nfunction toMammothOptions(options: DocxToHtmlOptions): MammothOptions {\n const mammothOpts: MammothOptions = {}\n\n if (options.styleMap) {\n mammothOpts.styleMap = options.styleMap\n }\n\n if (options.includeEmbeddedStyleMap !== undefined) {\n mammothOpts.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n\n if (options.includeDefaultStyleMap !== undefined) {\n mammothOpts.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n\n if (options.idPrefix) {\n mammothOpts.idPrefix = options.idPrefix\n }\n\n if (options.preserveEmptyParagraphs === false) {\n mammothOpts.ignoreEmptyParagraphs = true\n } else if (options.preserveEmptyParagraphs === true) {\n mammothOpts.ignoreEmptyParagraphs = false\n }\n\n if (options.imageHandling) {\n mammothOpts.convertImage = createImageConverter(options.imageHandling)\n }\n\n if (options.transformDocument) {\n mammothOpts.transformDocument = options.transformDocument\n }\n\n return mammothOpts\n}\n\n/**\n * Create a mammoth image converter from our options.\n */\nfunction createImageConverter(\n handling: 'inline' | 'omit' | MammothImageHandler\n): MammothImageConverter {\n if (handling === 'omit') {\n return mammoth.images.imgElement(() => Promise.resolve({ src: '' }))\n }\n\n if (handling === 'inline') {\n // Use default mammoth behavior (base64 inline)\n return mammoth.images.imgElement((image: MammothImage) =>\n image.readAsBase64String().then(data => ({\n src: `data:${image.contentType};base64,${data}`,\n }))\n )\n }\n\n // Custom handler - adapt our interface to mammoth's\n return mammoth.images.imgElement(async (image: MammothImage) => {\n // Adapt mammoth's Image to our MammothImageElement interface\n const adaptedImage = {\n contentType: image.contentType,\n read: ((encoding: 'base64' | 'buffer') => {\n if (encoding === 'base64') {\n return image.readAsBase64String()\n }\n return image.readAsArrayBuffer()\n }) as {\n (encoding: 'base64'): Promise<string>\n (encoding: 'buffer'): Promise<ArrayBuffer>\n },\n }\n\n const result = await handling(adaptedImage)\n if (result === null) {\n return { src: '' }\n }\n return result\n })\n}\n\n/**\n * Convert mammoth message to our message type.\n */\nfunction toMammothMessage(msg: {\n type: string\n message: string\n}): MammothMessage {\n return {\n type: msg.type === 'error' ? 'error' : 'warning',\n message: msg.message,\n }\n}\n","/**\n * @origints/mammoth - DOCX to HTML conversion for Origins using mammoth.js\n *\n * This package provides transforms for converting Word documents (.docx) to HTML.\n * It wraps the mammoth.js library and exposes all its conversion options.\n *\n * @packageDocumentation\n *\n * @example Basic usage\n * ```ts\n * import { Planner, loadFile } from '@origints/core'\n * import { docxToHtml } from '@origints/mammoth'\n *\n * // Create a plan\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').string()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Heading 1'] => h1\",\n * \"p[style-name='Heading 2'] => h2\",\n * ],\n * idPrefix: 'doc-',\n * imageHandling: 'omit',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').string()))\n * .compile()\n * ```\n */\n\n// Re-export option types\nexport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageElement,\n MammothImageHandler,\n MammothImageResult,\n} from './options'\n\n// Re-export result types\nexport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n MammothMessageType,\n} from './result'\n\n// Re-export transform creators and implementations\nexport {\n docxToHtml,\n docxToText,\n docxToHtmlImpl,\n docxToTextImpl,\n} from './convert'\n\n// Re-export utilities\nexport { streamToBuffer } from './util'\n\n// ---------------------------------------------------------------------------\n// Auto-registration of transforms\n// ---------------------------------------------------------------------------\n\nimport { globalRegistry } from '@origints/core'\nimport { docxToHtmlImpl, docxToTextImpl } from './convert'\n\n/**\n * Register the mammoth transforms with a registry.\n * Call this to enable docxToHtml() and docxToText() in your plans.\n *\n * @example\n * ```ts\n * import { globalRegistry } from '@origints/core'\n * import { registerMammothTransforms } from '@origints/mammoth'\n *\n * registerMammothTransforms(globalRegistry)\n * ```\n */\nexport function registerMammothTransforms(registry: {\n register(impl: {\n namespace: string\n name: string\n execute: (...args: unknown[]) => unknown\n }): void\n}): void {\n registry.register(docxToHtmlImpl)\n registry.register(docxToTextImpl)\n}\n\n// Auto-register transforms\nregisterMammothTransforms(globalRegistry)\n"],"names":["streamToBuffer","stream","reader","chunks","done","value","docxToHtml","options","serializeOptions","docxToText","docxToHtmlImpl","input","args","deserializeOptions","buffer","toBuffer","mammothOptions","toMammothOptions","result","mammoth","toMammothMessage","docxToTextImpl","serialized","mammothOpts","createImageConverter","handling","image","data","adaptedImage","encoding","msg","registerMammothTransforms","registry","globalRegistry"],"mappings":"uIASA,eAAsBA,EACpBC,EACiB,CACjB,MAAMC,EAASD,EAAO,UAAA,EAChBE,EAAuB,CAAA,EAE7B,GAAI,CACF,OAAa,CACX,KAAM,CAAE,KAAAC,EAAM,MAAAC,CAAA,EAAU,MAAMH,EAAO,KAAA,EACrC,GAAIE,EAAM,MACVD,EAAO,KAAKE,CAAK,CACnB,CACA,OAAO,OAAO,OAAOF,CAAM,CAC7B,QAAA,CACED,EAAO,YAAA,CACT,CACF,CCkDO,SAASI,EAAWC,EAA2C,CACpE,MAAO,CACL,KAAM,YACN,UAAW,oBACX,KAAM,aACN,KAAMC,EAAiBD,CAAO,CAAA,CAElC,CAaO,SAASE,EAAWF,EAA2C,CACpE,MAAO,CACL,KAAM,YACN,UAAW,oBACX,KAAM,aACN,KAAMA,CAAA,CAEV,CAOO,MAAMG,EAAgC,CAC3C,UAAW,oBACX,KAAM,aAEN,MAAM,QAAQC,EAAgBC,EAA+C,CAC3E,MAAML,EAAUM,EACdD,CAAA,EAEIE,EAAS,MAAMC,EAASJ,CAAK,EAC7BK,EAAiBC,EAAiBV,CAAO,EAEzCW,EAAS,MAAMC,EAAQ,cAAc,CAAE,OAAAL,CAAA,EAAUE,CAAc,EAErE,MAAO,CACL,KAAME,EAAO,MACb,SAAUA,EAAO,SAAS,IAAIE,CAAgB,CAAA,CAElD,CACF,EAOaC,EAAgC,CAC3C,UAAW,oBACX,KAAM,aAEN,MAAM,QAAQV,EAAyC,CACrD,MAAMG,EAAS,MAAMC,EAASJ,CAAK,EAG7BO,EAAS,MAAMC,EAAQ,eAAe,CAAE,OAAAL,EAAQ,EAEtD,MAAO,CACL,KAAMI,EAAO,MACb,SAAUA,EAAO,SAAS,IAAIE,CAAgB,CAAA,CAElD,CACF,EAwBA,SAASZ,EACPD,EACmC,CACnC,GAAI,CAACA,EAAS,OAEd,MAAMe,EAAoC,CAAA,EAE1C,OAAIf,EAAQ,WACVe,EAAW,SAAWf,EAAQ,UAE5BA,EAAQ,0BAA4B,SACtCe,EAAW,wBAA0Bf,EAAQ,yBAE3CA,EAAQ,yBAA2B,SACrCe,EAAW,uBAAyBf,EAAQ,wBAE1CA,EAAQ,WACVe,EAAW,SAAWf,EAAQ,WAE5BA,EAAQ,gBAAkB,UAAYA,EAAQ,gBAAkB,UAClEe,EAAW,cAAgBf,EAAQ,eAEjCA,EAAQ,0BAA4B,SACtCe,EAAW,wBAA0Bf,EAAQ,yBAGxC,OAAO,KAAKe,CAAU,EAAE,OAAS,EAAIA,EAAa,MAC3D,CAKA,SAAST,EACPS,EACmB,CACnB,OAAKA,GAAmB,CAAA,CAE1B,CAKA,eAAeP,EAASJ,EAAiC,CACvD,GAAI,OAAO,SAASA,CAAK,EACvB,OAAOA,EAET,GAAIA,aAAiB,eACnB,OAAOX,EAAeW,CAAmC,EAE3D,MAAM,IAAI,MACR,0DAA0D,OAAOA,CAAK,EAAA,CAE1E,CAKA,SAASM,EAAiBV,EAA4C,CACpE,MAAMgB,EAA8B,CAAA,EAEpC,OAAIhB,EAAQ,WACVgB,EAAY,SAAWhB,EAAQ,UAG7BA,EAAQ,0BAA4B,SACtCgB,EAAY,wBAA0BhB,EAAQ,yBAG5CA,EAAQ,yBAA2B,SACrCgB,EAAY,uBAAyBhB,EAAQ,wBAG3CA,EAAQ,WACVgB,EAAY,SAAWhB,EAAQ,UAG7BA,EAAQ,0BAA4B,GACtCgB,EAAY,sBAAwB,GAC3BhB,EAAQ,0BAA4B,KAC7CgB,EAAY,sBAAwB,IAGlChB,EAAQ,gBACVgB,EAAY,aAAeC,EAAqBjB,EAAQ,aAAa,GAGnEA,EAAQ,oBACVgB,EAAY,kBAAoBhB,EAAQ,mBAGnCgB,CACT,CAKA,SAASC,EACPC,EACuB,CACvB,OAAIA,IAAa,OACRN,EAAQ,OAAO,WAAW,IAAM,QAAQ,QAAQ,CAAE,IAAK,EAAA,CAAI,CAAC,EAGjEM,IAAa,SAERN,EAAQ,OAAO,WAAYO,GAChCA,EAAM,mBAAA,EAAqB,KAAKC,IAAS,CACvC,IAAK,QAAQD,EAAM,WAAW,WAAWC,CAAI,EAAA,EAC7C,CAAA,EAKCR,EAAQ,OAAO,WAAW,MAAOO,GAAwB,CAE9D,MAAME,EAAe,CACnB,YAAaF,EAAM,YACnB,MAAQG,GACFA,IAAa,SACRH,EAAM,mBAAA,EAERA,EAAM,kBAAA,EACf,EAMIR,EAAS,MAAMO,EAASG,CAAY,EAC1C,OAAIV,IAAW,KACN,CAAE,IAAK,EAAA,EAETA,CACT,CAAC,CACH,CAKA,SAASE,EAAiBU,EAGP,CACjB,MAAO,CACL,KAAMA,EAAI,OAAS,QAAU,QAAU,UACvC,QAASA,EAAI,OAAA,CAEjB,CC5OO,SAASC,EAA0BC,EAMjC,CACPA,EAAS,SAAStB,CAAc,EAChCsB,EAAS,SAASX,CAAc,CAClC,CAGAU,EAA0BE,gBAAc"}
|
package/dist/index.d.ts
CHANGED
|
@@ -9,21 +9,18 @@
|
|
|
9
9
|
* @example Basic usage
|
|
10
10
|
* ```ts
|
|
11
11
|
* import { Planner, loadFile } from '@origints/core'
|
|
12
|
-
* import { docxToHtml
|
|
13
|
-
*
|
|
14
|
-
* // Register transforms
|
|
15
|
-
* registerMammothTransforms(globalRegistry)
|
|
12
|
+
* import { docxToHtml } from '@origints/mammoth'
|
|
16
13
|
*
|
|
17
14
|
* // Create a plan
|
|
18
|
-
* const plan = Planner.in(loadFile('document.docx'))
|
|
15
|
+
* const plan = new Planner().in(loadFile('document.docx'))
|
|
19
16
|
* .mapIn(docxToHtml())
|
|
20
|
-
* .emit((out, $) => out.add('html', $.get('html').
|
|
17
|
+
* .emit((out, $) => out.add('html', $.get('html').string()))
|
|
21
18
|
* .compile()
|
|
22
19
|
* ```
|
|
23
20
|
*
|
|
24
21
|
* @example With custom style mapping
|
|
25
22
|
* ```ts
|
|
26
|
-
* const plan = Planner.in(loadFile('document.docx'))
|
|
23
|
+
* const plan = new Planner().in(loadFile('document.docx'))
|
|
27
24
|
* .mapIn(docxToHtml({
|
|
28
25
|
* styleMap: [
|
|
29
26
|
* "p[style-name='Title'] => h1.document-title",
|
|
@@ -33,7 +30,7 @@
|
|
|
33
30
|
* idPrefix: 'doc-',
|
|
34
31
|
* imageHandling: 'omit',
|
|
35
32
|
* }))
|
|
36
|
-
* .emit((out, $) => out.add('content', $.get('html').
|
|
33
|
+
* .emit((out, $) => out.add('content', $.get('html').string()))
|
|
37
34
|
* .compile()
|
|
38
35
|
* ```
|
|
39
36
|
*/
|
package/dist/index.es.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import m from "mammoth";
|
|
2
|
+
import { globalRegistry as l } from "@origints/core";
|
|
2
3
|
async function f(e) {
|
|
3
4
|
const r = e.getReader(), a = [];
|
|
4
5
|
try {
|
|
@@ -12,15 +13,15 @@ async function f(e) {
|
|
|
12
13
|
r.releaseLock();
|
|
13
14
|
}
|
|
14
15
|
}
|
|
15
|
-
function
|
|
16
|
+
function T(e) {
|
|
16
17
|
return {
|
|
17
18
|
kind: "transform",
|
|
18
19
|
namespace: "@origints/mammoth",
|
|
19
20
|
name: "docxToHtml",
|
|
20
|
-
args:
|
|
21
|
+
args: o(e)
|
|
21
22
|
};
|
|
22
23
|
}
|
|
23
|
-
function
|
|
24
|
+
function v(e) {
|
|
24
25
|
return {
|
|
25
26
|
kind: "transform",
|
|
26
27
|
namespace: "@origints/mammoth",
|
|
@@ -28,19 +29,19 @@ function h(e) {
|
|
|
28
29
|
args: e
|
|
29
30
|
};
|
|
30
31
|
}
|
|
31
|
-
const
|
|
32
|
+
const d = {
|
|
32
33
|
namespace: "@origints/mammoth",
|
|
33
34
|
name: "docxToHtml",
|
|
34
35
|
async execute(e, r) {
|
|
35
|
-
const a =
|
|
36
|
+
const a = g(
|
|
36
37
|
r
|
|
37
|
-
), t = await s(e), n =
|
|
38
|
+
), t = await s(e), n = y(a), i = await m.convertToHtml({ buffer: t }, n);
|
|
38
39
|
return {
|
|
39
40
|
html: i.value,
|
|
40
41
|
messages: i.messages.map(c)
|
|
41
42
|
};
|
|
42
43
|
}
|
|
43
|
-
},
|
|
44
|
+
}, u = {
|
|
44
45
|
namespace: "@origints/mammoth",
|
|
45
46
|
name: "docxToText",
|
|
46
47
|
async execute(e) {
|
|
@@ -51,12 +52,12 @@ const l = {
|
|
|
51
52
|
};
|
|
52
53
|
}
|
|
53
54
|
};
|
|
54
|
-
function
|
|
55
|
+
function o(e) {
|
|
55
56
|
if (!e) return;
|
|
56
57
|
const r = {};
|
|
57
58
|
return e.styleMap && (r.styleMap = e.styleMap), e.includeEmbeddedStyleMap !== void 0 && (r.includeEmbeddedStyleMap = e.includeEmbeddedStyleMap), e.includeDefaultStyleMap !== void 0 && (r.includeDefaultStyleMap = e.includeDefaultStyleMap), e.idPrefix && (r.idPrefix = e.idPrefix), (e.imageHandling === "inline" || e.imageHandling === "omit") && (r.imageHandling = e.imageHandling), e.preserveEmptyParagraphs !== void 0 && (r.preserveEmptyParagraphs = e.preserveEmptyParagraphs), Object.keys(r).length > 0 ? r : void 0;
|
|
58
59
|
}
|
|
59
|
-
function
|
|
60
|
+
function g(e) {
|
|
60
61
|
return e || {};
|
|
61
62
|
}
|
|
62
63
|
async function s(e) {
|
|
@@ -68,11 +69,11 @@ async function s(e) {
|
|
|
68
69
|
`docxToHtml expects Buffer or ReadableStream input, got ${typeof e}`
|
|
69
70
|
);
|
|
70
71
|
}
|
|
71
|
-
function
|
|
72
|
+
function y(e) {
|
|
72
73
|
const r = {};
|
|
73
|
-
return e.styleMap && (r.styleMap = e.styleMap), e.includeEmbeddedStyleMap !== void 0 && (r.includeEmbeddedStyleMap = e.includeEmbeddedStyleMap), e.includeDefaultStyleMap !== void 0 && (r.includeDefaultStyleMap = e.includeDefaultStyleMap), e.idPrefix && (r.idPrefix = e.idPrefix), e.preserveEmptyParagraphs === !1 ? r.ignoreEmptyParagraphs = !0 : e.preserveEmptyParagraphs === !0 && (r.ignoreEmptyParagraphs = !1), e.imageHandling && (r.convertImage =
|
|
74
|
+
return e.styleMap && (r.styleMap = e.styleMap), e.includeEmbeddedStyleMap !== void 0 && (r.includeEmbeddedStyleMap = e.includeEmbeddedStyleMap), e.includeDefaultStyleMap !== void 0 && (r.includeDefaultStyleMap = e.includeDefaultStyleMap), e.idPrefix && (r.idPrefix = e.idPrefix), e.preserveEmptyParagraphs === !1 ? r.ignoreEmptyParagraphs = !0 : e.preserveEmptyParagraphs === !0 && (r.ignoreEmptyParagraphs = !1), e.imageHandling && (r.convertImage = p(e.imageHandling)), e.transformDocument && (r.transformDocument = e.transformDocument), r;
|
|
74
75
|
}
|
|
75
|
-
function
|
|
76
|
+
function p(e) {
|
|
76
77
|
return e === "omit" ? m.images.imgElement(() => Promise.resolve({ src: "" })) : e === "inline" ? m.images.imgElement(
|
|
77
78
|
(r) => r.readAsBase64String().then((a) => ({
|
|
78
79
|
src: `data:${r.contentType};base64,${a}`
|
|
@@ -91,15 +92,16 @@ function c(e) {
|
|
|
91
92
|
message: e.message
|
|
92
93
|
};
|
|
93
94
|
}
|
|
94
|
-
function
|
|
95
|
-
e.register(
|
|
95
|
+
function x(e) {
|
|
96
|
+
e.register(d), e.register(u);
|
|
96
97
|
}
|
|
98
|
+
x(l);
|
|
97
99
|
export {
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
100
|
+
T as docxToHtml,
|
|
101
|
+
d as docxToHtmlImpl,
|
|
102
|
+
v as docxToText,
|
|
103
|
+
u as docxToTextImpl,
|
|
104
|
+
x as registerMammothTransforms,
|
|
103
105
|
f as streamToBuffer
|
|
104
106
|
};
|
|
105
107
|
//# sourceMappingURL=index.es.js.map
|
package/dist/index.es.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.es.js","sources":["../src/util.ts","../src/convert.ts","../src/index.ts"],"sourcesContent":["/**\n * Utility functions for mammoth package.\n *\n * @module mammoth/util\n */\n\n/**\n * Convert a ReadableStream<Uint8Array> to a Buffer.\n */\nexport async function streamToBuffer(\n stream: ReadableStream<Uint8Array>\n): Promise<Buffer> {\n const reader = stream.getReader()\n const chunks: Uint8Array[] = []\n\n try {\n while (true) {\n const { done, value } = await reader.read()\n if (done) break\n chunks.push(value)\n }\n return Buffer.concat(chunks)\n } finally {\n reader.releaseLock()\n }\n}\n","/**\n * DOCX to HTML conversion transform for Origins.\n *\n * @module mammoth/convert\n */\n\nimport mammoth from 'mammoth'\nimport type { TransformAst, TransformImpl } from '@origints/core'\nimport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageHandler,\n} from './options'\nimport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n} from './result'\nimport { streamToBuffer } from './util'\n\n/**\n * Mammoth options type extracted from the library.\n */\ninterface MammothOptions {\n styleMap?: string | string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n convertImage?: MammothImageConverter\n ignoreEmptyParagraphs?: boolean\n idPrefix?: string\n transformDocument?: (element: unknown) => unknown\n}\n\n/**\n * Mammoth image converter (opaque branded type).\n */\ninterface MammothImageConverter {\n __mammothBrand: 'ImageConverter'\n}\n\n/**\n * Mammoth image interface for custom converters.\n */\ninterface MammothImage {\n contentType: string\n readAsBase64String: () => Promise<string>\n readAsBuffer: () => Promise<Buffer>\n readAsArrayBuffer: () => Promise<ArrayBuffer>\n}\n\n/**\n * Creates a TransformAst for converting DOCX to HTML.\n *\n * @example\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').asString()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Subtitle'] => h2.document-subtitle\",\n * ],\n * idPrefix: 'doc-',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').asString()))\n * .compile()\n * ```\n */\nexport function docxToHtml(options?: DocxToHtmlOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n args: serializeOptions(options),\n }\n}\n\n/**\n * Creates a TransformAst for extracting raw text from DOCX.\n *\n * @example\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToText())\n * .emit((out, $) => out.add('text', $.get('text').asString()))\n * .compile()\n * ```\n */\nexport function docxToText(options?: DocxToTextOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToText',\n args: options,\n }\n}\n\n/**\n * Transform implementation for docxToHtml.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToHtmlImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n\n async execute(\n input: unknown,\n args?: unknown\n ): Promise<DocxConversionResult> {\n const options = deserializeOptions(\n args as SerializedDocxOptions | undefined\n )\n const buffer = await toBuffer(input)\n const mammothOptions = toMammothOptions(options)\n\n const result = await mammoth.convertToHtml({ buffer }, mammothOptions)\n\n return {\n html: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n/**\n * Transform implementation for docxToText.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToTextImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToText',\n\n async execute(input: unknown): Promise<DocxTextResult> {\n const buffer = await toBuffer(input)\n\n // Note: extractRawText doesn't accept options in mammoth's API\n const result = await mammoth.extractRawText({ buffer })\n\n return {\n text: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n// ---------------------------------------------------------------------------\n// Internal helpers\n// ---------------------------------------------------------------------------\n\n/**\n * Serialized options that can be stored in TransformAst.args.\n * Function handlers are converted to string identifiers.\n */\ninterface SerializedDocxOptions {\n styleMap?: string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n idPrefix?: string\n imageHandling?: 'inline' | 'omit'\n preserveEmptyParagraphs?: boolean\n // Note: transformDocument and custom imageHandling functions cannot be serialized\n}\n\n/**\n * Serialize options for storage in TransformAst.\n * Custom functions cannot be serialized and are dropped.\n */\nfunction serializeOptions(\n options?: DocxToHtmlOptions\n): SerializedDocxOptions | undefined {\n if (!options) return undefined\n\n const serialized: SerializedDocxOptions = {}\n\n if (options.styleMap) {\n serialized.styleMap = options.styleMap\n }\n if (options.includeEmbeddedStyleMap !== undefined) {\n serialized.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n if (options.includeDefaultStyleMap !== undefined) {\n serialized.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n if (options.idPrefix) {\n serialized.idPrefix = options.idPrefix\n }\n if (\n options.imageHandling === 'inline' ||\n options.imageHandling === 'omit'\n ) {\n serialized.imageHandling = options.imageHandling\n }\n if (options.preserveEmptyParagraphs !== undefined) {\n serialized.preserveEmptyParagraphs = options.preserveEmptyParagraphs\n }\n\n return Object.keys(serialized).length > 0 ? serialized : undefined\n}\n\n/**\n * Deserialize options from TransformAst.args.\n */\nfunction deserializeOptions(\n serialized?: SerializedDocxOptions\n): DocxToHtmlOptions {\n if (!serialized) return {}\n return serialized\n}\n\n/**\n * Convert input to Buffer.\n */\nasync function toBuffer(input: unknown): Promise<Buffer> {\n if (Buffer.isBuffer(input)) {\n return input\n }\n if (input instanceof ReadableStream) {\n return streamToBuffer(input as ReadableStream<Uint8Array>)\n }\n throw new Error(\n `docxToHtml expects Buffer or ReadableStream input, got ${typeof input}`\n )\n}\n\n/**\n * Convert our options to mammoth options.\n */\nfunction toMammothOptions(options: DocxToHtmlOptions): MammothOptions {\n const mammothOpts: MammothOptions = {}\n\n if (options.styleMap) {\n mammothOpts.styleMap = options.styleMap\n }\n\n if (options.includeEmbeddedStyleMap !== undefined) {\n mammothOpts.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n\n if (options.includeDefaultStyleMap !== undefined) {\n mammothOpts.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n\n if (options.idPrefix) {\n mammothOpts.idPrefix = options.idPrefix\n }\n\n if (options.preserveEmptyParagraphs === false) {\n mammothOpts.ignoreEmptyParagraphs = true\n } else if (options.preserveEmptyParagraphs === true) {\n mammothOpts.ignoreEmptyParagraphs = false\n }\n\n if (options.imageHandling) {\n mammothOpts.convertImage = createImageConverter(options.imageHandling)\n }\n\n if (options.transformDocument) {\n mammothOpts.transformDocument = options.transformDocument\n }\n\n return mammothOpts\n}\n\n/**\n * Create a mammoth image converter from our options.\n */\nfunction createImageConverter(\n handling: 'inline' | 'omit' | MammothImageHandler\n): MammothImageConverter {\n if (handling === 'omit') {\n return mammoth.images.imgElement(() => Promise.resolve({ src: '' }))\n }\n\n if (handling === 'inline') {\n // Use default mammoth behavior (base64 inline)\n return mammoth.images.imgElement((image: MammothImage) =>\n image.readAsBase64String().then(data => ({\n src: `data:${image.contentType};base64,${data}`,\n }))\n )\n }\n\n // Custom handler - adapt our interface to mammoth's\n return mammoth.images.imgElement(async (image: MammothImage) => {\n // Adapt mammoth's Image to our MammothImageElement interface\n const adaptedImage = {\n contentType: image.contentType,\n read: ((encoding: 'base64' | 'buffer') => {\n if (encoding === 'base64') {\n return image.readAsBase64String()\n }\n return image.readAsArrayBuffer()\n }) as {\n (encoding: 'base64'): Promise<string>\n (encoding: 'buffer'): Promise<ArrayBuffer>\n },\n }\n\n const result = await handling(adaptedImage)\n if (result === null) {\n return { src: '' }\n }\n return result\n })\n}\n\n/**\n * Convert mammoth message to our message type.\n */\nfunction toMammothMessage(msg: { type: string; message: string }): MammothMessage {\n return {\n type: msg.type === 'error' ? 'error' : 'warning',\n message: msg.message,\n }\n}\n","/**\n * @origints/mammoth - DOCX to HTML conversion for Origins using mammoth.js\n *\n * This package provides transforms for converting Word documents (.docx) to HTML.\n * It wraps the mammoth.js library and exposes all its conversion options.\n *\n * @packageDocumentation\n *\n * @example Basic usage\n * ```ts\n * import { Planner, loadFile } from '@origints/core'\n * import { docxToHtml, registerMammothTransforms } from '@origints/mammoth'\n *\n * // Register transforms\n * registerMammothTransforms(globalRegistry)\n *\n * // Create a plan\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').asString()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = Planner.in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Heading 1'] => h1\",\n * \"p[style-name='Heading 2'] => h2\",\n * ],\n * idPrefix: 'doc-',\n * imageHandling: 'omit',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').asString()))\n * .compile()\n * ```\n */\n\n// Re-export option types\nexport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageElement,\n MammothImageHandler,\n MammothImageResult,\n} from './options'\n\n// Re-export result types\nexport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n MammothMessageType,\n} from './result'\n\n// Re-export transform creators and implementations\nexport {\n docxToHtml,\n docxToText,\n docxToHtmlImpl,\n docxToTextImpl,\n} from './convert'\n\n// Re-export utilities\nexport { streamToBuffer } from './util'\n\n// ---------------------------------------------------------------------------\n// Auto-registration of transforms\n// ---------------------------------------------------------------------------\n\nimport { docxToHtmlImpl, docxToTextImpl } from './convert'\n\n/**\n * Register the mammoth transforms with a registry.\n * Call this to enable docxToHtml() and docxToText() in your plans.\n *\n * @example\n * ```ts\n * import { globalRegistry } from '@origints/core'\n * import { registerMammothTransforms } from '@origints/mammoth'\n *\n * registerMammothTransforms(globalRegistry)\n * ```\n */\nexport function registerMammothTransforms(registry: {\n register(impl: {\n namespace: string\n name: string\n execute: (...args: unknown[]) => unknown\n }): void\n}): void {\n registry.register(docxToHtmlImpl)\n registry.register(docxToTextImpl)\n}\n"],"names":["streamToBuffer","stream","reader","chunks","done","value","docxToHtml","options","serializeOptions","docxToText","docxToHtmlImpl","input","args","deserializeOptions","buffer","toBuffer","mammothOptions","toMammothOptions","result","mammoth","toMammothMessage","docxToTextImpl","serialized","mammothOpts","createImageConverter","handling","image","data","adaptedImage","encoding","msg","registerMammothTransforms","registry"],"mappings":";AASA,eAAsBA,EACpBC,GACiB;AACjB,QAAMC,IAASD,EAAO,UAAA,GAChBE,IAAuB,CAAA;AAE7B,MAAI;AACF,eAAa;AACX,YAAM,EAAE,MAAAC,GAAM,OAAAC,EAAA,IAAU,MAAMH,EAAO,KAAA;AACrC,UAAIE,EAAM;AACV,MAAAD,EAAO,KAAKE,CAAK;AAAA,IACnB;AACA,WAAO,OAAO,OAAOF,CAAM;AAAA,EAC7B,UAAA;AACE,IAAAD,EAAO,YAAA;AAAA,EACT;AACF;ACkDO,SAASI,EAAWC,GAA2C;AACpE,SAAO;AAAA,IACL,MAAM;AAAA,IACN,WAAW;AAAA,IACX,MAAM;AAAA,IACN,MAAMC,EAAiBD,CAAO;AAAA,EAAA;AAElC;AAaO,SAASE,EAAWF,GAA2C;AACpE,SAAO;AAAA,IACL,MAAM;AAAA,IACN,WAAW;AAAA,IACX,MAAM;AAAA,IACN,MAAMA;AAAA,EAAA;AAEV;AAOO,MAAMG,IAAgC;AAAA,EAC3C,WAAW;AAAA,EACX,MAAM;AAAA,EAEN,MAAM,QACJC,GACAC,GAC+B;AAC/B,UAAML,IAAUM;AAAA,MACdD;AAAA,IAAA,GAEIE,IAAS,MAAMC,EAASJ,CAAK,GAC7BK,IAAiBC,EAAiBV,CAAO,GAEzCW,IAAS,MAAMC,EAAQ,cAAc,EAAE,QAAAL,EAAA,GAAUE,CAAc;AAErE,WAAO;AAAA,MACL,MAAME,EAAO;AAAA,MACb,UAAUA,EAAO,SAAS,IAAIE,CAAgB;AAAA,IAAA;AAAA,EAElD;AACF,GAOaC,IAAgC;AAAA,EAC3C,WAAW;AAAA,EACX,MAAM;AAAA,EAEN,MAAM,QAAQV,GAAyC;AACrD,UAAMG,IAAS,MAAMC,EAASJ,CAAK,GAG7BO,IAAS,MAAMC,EAAQ,eAAe,EAAE,QAAAL,GAAQ;AAEtD,WAAO;AAAA,MACL,MAAMI,EAAO;AAAA,MACb,UAAUA,EAAO,SAAS,IAAIE,CAAgB;AAAA,IAAA;AAAA,EAElD;AACF;AAwBA,SAASZ,EACPD,GACmC;AACnC,MAAI,CAACA,EAAS;AAEd,QAAMe,IAAoC,CAAA;AAE1C,SAAIf,EAAQ,aACVe,EAAW,WAAWf,EAAQ,WAE5BA,EAAQ,4BAA4B,WACtCe,EAAW,0BAA0Bf,EAAQ,0BAE3CA,EAAQ,2BAA2B,WACrCe,EAAW,yBAAyBf,EAAQ,yBAE1CA,EAAQ,aACVe,EAAW,WAAWf,EAAQ,YAG9BA,EAAQ,kBAAkB,YAC1BA,EAAQ,kBAAkB,YAE1Be,EAAW,gBAAgBf,EAAQ,gBAEjCA,EAAQ,4BAA4B,WACtCe,EAAW,0BAA0Bf,EAAQ,0BAGxC,OAAO,KAAKe,CAAU,EAAE,SAAS,IAAIA,IAAa;AAC3D;AAKA,SAAST,EACPS,GACmB;AACnB,SAAKA,KAAmB,CAAA;AAE1B;AAKA,eAAeP,EAASJ,GAAiC;AACvD,MAAI,OAAO,SAASA,CAAK;AACvB,WAAOA;AAET,MAAIA,aAAiB;AACnB,WAAOX,EAAeW,CAAmC;AAE3D,QAAM,IAAI;AAAA,IACR,0DAA0D,OAAOA,CAAK;AAAA,EAAA;AAE1E;AAKA,SAASM,EAAiBV,GAA4C;AACpE,QAAMgB,IAA8B,CAAA;AAEpC,SAAIhB,EAAQ,aACVgB,EAAY,WAAWhB,EAAQ,WAG7BA,EAAQ,4BAA4B,WACtCgB,EAAY,0BAA0BhB,EAAQ,0BAG5CA,EAAQ,2BAA2B,WACrCgB,EAAY,yBAAyBhB,EAAQ,yBAG3CA,EAAQ,aACVgB,EAAY,WAAWhB,EAAQ,WAG7BA,EAAQ,4BAA4B,KACtCgB,EAAY,wBAAwB,KAC3BhB,EAAQ,4BAA4B,OAC7CgB,EAAY,wBAAwB,KAGlChB,EAAQ,kBACVgB,EAAY,eAAeC,EAAqBjB,EAAQ,aAAa,IAGnEA,EAAQ,sBACVgB,EAAY,oBAAoBhB,EAAQ,oBAGnCgB;AACT;AAKA,SAASC,EACPC,GACuB;AACvB,SAAIA,MAAa,SACRN,EAAQ,OAAO,WAAW,MAAM,QAAQ,QAAQ,EAAE,KAAK,GAAA,CAAI,CAAC,IAGjEM,MAAa,WAERN,EAAQ,OAAO;AAAA,IAAW,CAACO,MAChCA,EAAM,mBAAA,EAAqB,KAAK,CAAAC,OAAS;AAAA,MACvC,KAAK,QAAQD,EAAM,WAAW,WAAWC,CAAI;AAAA,IAAA,EAC7C;AAAA,EAAA,IAKCR,EAAQ,OAAO,WAAW,OAAOO,MAAwB;AAE9D,UAAME,IAAe;AAAA,MACnB,aAAaF,EAAM;AAAA,MACnB,OAAO,CAACG,MACFA,MAAa,WACRH,EAAM,mBAAA,IAERA,EAAM,kBAAA;AAAA,IACf,GAMIR,IAAS,MAAMO,EAASG,CAAY;AAC1C,WAAIV,MAAW,OACN,EAAE,KAAK,GAAA,IAETA;AAAA,EACT,CAAC;AACH;AAKA,SAASE,EAAiBU,GAAwD;AAChF,SAAO;AAAA,IACL,MAAMA,EAAI,SAAS,UAAU,UAAU;AAAA,IACvC,SAASA,EAAI;AAAA,EAAA;AAEjB;AC7OO,SAASC,EAA0BC,GAMjC;AACP,EAAAA,EAAS,SAAStB,CAAc,GAChCsB,EAAS,SAASX,CAAc;AAClC;"}
|
|
1
|
+
{"version":3,"file":"index.es.js","sources":["../src/util.ts","../src/convert.ts","../src/index.ts"],"sourcesContent":["/**\n * Utility functions for mammoth package.\n *\n * @module mammoth/util\n */\n\n/**\n * Convert a ReadableStream<Uint8Array> to a Buffer.\n */\nexport async function streamToBuffer(\n stream: ReadableStream<Uint8Array>\n): Promise<Buffer> {\n const reader = stream.getReader()\n const chunks: Uint8Array[] = []\n\n try {\n while (true) {\n const { done, value } = await reader.read()\n if (done) break\n chunks.push(value)\n }\n return Buffer.concat(chunks)\n } finally {\n reader.releaseLock()\n }\n}\n","/**\n * DOCX to HTML conversion transform for Origins.\n *\n * @module mammoth/convert\n */\n\nimport mammoth from 'mammoth'\nimport type { TransformAst, TransformImpl } from '@origints/core'\nimport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageHandler,\n} from './options'\nimport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n} from './result'\nimport { streamToBuffer } from './util'\n\n/**\n * Mammoth options type extracted from the library.\n */\ninterface MammothOptions {\n styleMap?: string | string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n convertImage?: MammothImageConverter\n ignoreEmptyParagraphs?: boolean\n idPrefix?: string\n transformDocument?: (element: unknown) => unknown\n}\n\n/**\n * Mammoth image converter (opaque branded type).\n */\ninterface MammothImageConverter {\n __mammothBrand: 'ImageConverter'\n}\n\n/**\n * Mammoth image interface for custom converters.\n */\ninterface MammothImage {\n contentType: string\n readAsBase64String: () => Promise<string>\n readAsBuffer: () => Promise<Buffer>\n readAsArrayBuffer: () => Promise<ArrayBuffer>\n}\n\n/**\n * Creates a TransformAst for converting DOCX to HTML.\n *\n * @example\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').string()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Subtitle'] => h2.document-subtitle\",\n * ],\n * idPrefix: 'doc-',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').string()))\n * .compile()\n * ```\n */\nexport function docxToHtml(options?: DocxToHtmlOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n args: serializeOptions(options),\n }\n}\n\n/**\n * Creates a TransformAst for extracting raw text from DOCX.\n *\n * @example\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToText())\n * .emit((out, $) => out.add('text', $.get('text').string()))\n * .compile()\n * ```\n */\nexport function docxToText(options?: DocxToTextOptions): TransformAst {\n return {\n kind: 'transform',\n namespace: '@origints/mammoth',\n name: 'docxToText',\n args: options,\n }\n}\n\n/**\n * Transform implementation for docxToHtml.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToHtmlImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToHtml',\n\n async execute(input: unknown, args?: unknown): Promise<DocxConversionResult> {\n const options = deserializeOptions(\n args as SerializedDocxOptions | undefined\n )\n const buffer = await toBuffer(input)\n const mammothOptions = toMammothOptions(options)\n\n const result = await mammoth.convertToHtml({ buffer }, mammothOptions)\n\n return {\n html: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n/**\n * Transform implementation for docxToText.\n *\n * Accepts Buffer or ReadableStream<Uint8Array> input.\n */\nexport const docxToTextImpl: TransformImpl = {\n namespace: '@origints/mammoth',\n name: 'docxToText',\n\n async execute(input: unknown): Promise<DocxTextResult> {\n const buffer = await toBuffer(input)\n\n // Note: extractRawText doesn't accept options in mammoth's API\n const result = await mammoth.extractRawText({ buffer })\n\n return {\n text: result.value,\n messages: result.messages.map(toMammothMessage),\n }\n },\n}\n\n// ---------------------------------------------------------------------------\n// Internal helpers\n// ---------------------------------------------------------------------------\n\n/**\n * Serialized options that can be stored in TransformAst.args.\n * Function handlers are converted to string identifiers.\n */\ninterface SerializedDocxOptions {\n styleMap?: string[]\n includeEmbeddedStyleMap?: boolean\n includeDefaultStyleMap?: boolean\n idPrefix?: string\n imageHandling?: 'inline' | 'omit'\n preserveEmptyParagraphs?: boolean\n // Note: transformDocument and custom imageHandling functions cannot be serialized\n}\n\n/**\n * Serialize options for storage in TransformAst.\n * Custom functions cannot be serialized and are dropped.\n */\nfunction serializeOptions(\n options?: DocxToHtmlOptions\n): SerializedDocxOptions | undefined {\n if (!options) return undefined\n\n const serialized: SerializedDocxOptions = {}\n\n if (options.styleMap) {\n serialized.styleMap = options.styleMap\n }\n if (options.includeEmbeddedStyleMap !== undefined) {\n serialized.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n if (options.includeDefaultStyleMap !== undefined) {\n serialized.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n if (options.idPrefix) {\n serialized.idPrefix = options.idPrefix\n }\n if (options.imageHandling === 'inline' || options.imageHandling === 'omit') {\n serialized.imageHandling = options.imageHandling\n }\n if (options.preserveEmptyParagraphs !== undefined) {\n serialized.preserveEmptyParagraphs = options.preserveEmptyParagraphs\n }\n\n return Object.keys(serialized).length > 0 ? serialized : undefined\n}\n\n/**\n * Deserialize options from TransformAst.args.\n */\nfunction deserializeOptions(\n serialized?: SerializedDocxOptions\n): DocxToHtmlOptions {\n if (!serialized) return {}\n return serialized\n}\n\n/**\n * Convert input to Buffer.\n */\nasync function toBuffer(input: unknown): Promise<Buffer> {\n if (Buffer.isBuffer(input)) {\n return input\n }\n if (input instanceof ReadableStream) {\n return streamToBuffer(input as ReadableStream<Uint8Array>)\n }\n throw new Error(\n `docxToHtml expects Buffer or ReadableStream input, got ${typeof input}`\n )\n}\n\n/**\n * Convert our options to mammoth options.\n */\nfunction toMammothOptions(options: DocxToHtmlOptions): MammothOptions {\n const mammothOpts: MammothOptions = {}\n\n if (options.styleMap) {\n mammothOpts.styleMap = options.styleMap\n }\n\n if (options.includeEmbeddedStyleMap !== undefined) {\n mammothOpts.includeEmbeddedStyleMap = options.includeEmbeddedStyleMap\n }\n\n if (options.includeDefaultStyleMap !== undefined) {\n mammothOpts.includeDefaultStyleMap = options.includeDefaultStyleMap\n }\n\n if (options.idPrefix) {\n mammothOpts.idPrefix = options.idPrefix\n }\n\n if (options.preserveEmptyParagraphs === false) {\n mammothOpts.ignoreEmptyParagraphs = true\n } else if (options.preserveEmptyParagraphs === true) {\n mammothOpts.ignoreEmptyParagraphs = false\n }\n\n if (options.imageHandling) {\n mammothOpts.convertImage = createImageConverter(options.imageHandling)\n }\n\n if (options.transformDocument) {\n mammothOpts.transformDocument = options.transformDocument\n }\n\n return mammothOpts\n}\n\n/**\n * Create a mammoth image converter from our options.\n */\nfunction createImageConverter(\n handling: 'inline' | 'omit' | MammothImageHandler\n): MammothImageConverter {\n if (handling === 'omit') {\n return mammoth.images.imgElement(() => Promise.resolve({ src: '' }))\n }\n\n if (handling === 'inline') {\n // Use default mammoth behavior (base64 inline)\n return mammoth.images.imgElement((image: MammothImage) =>\n image.readAsBase64String().then(data => ({\n src: `data:${image.contentType};base64,${data}`,\n }))\n )\n }\n\n // Custom handler - adapt our interface to mammoth's\n return mammoth.images.imgElement(async (image: MammothImage) => {\n // Adapt mammoth's Image to our MammothImageElement interface\n const adaptedImage = {\n contentType: image.contentType,\n read: ((encoding: 'base64' | 'buffer') => {\n if (encoding === 'base64') {\n return image.readAsBase64String()\n }\n return image.readAsArrayBuffer()\n }) as {\n (encoding: 'base64'): Promise<string>\n (encoding: 'buffer'): Promise<ArrayBuffer>\n },\n }\n\n const result = await handling(adaptedImage)\n if (result === null) {\n return { src: '' }\n }\n return result\n })\n}\n\n/**\n * Convert mammoth message to our message type.\n */\nfunction toMammothMessage(msg: {\n type: string\n message: string\n}): MammothMessage {\n return {\n type: msg.type === 'error' ? 'error' : 'warning',\n message: msg.message,\n }\n}\n","/**\n * @origints/mammoth - DOCX to HTML conversion for Origins using mammoth.js\n *\n * This package provides transforms for converting Word documents (.docx) to HTML.\n * It wraps the mammoth.js library and exposes all its conversion options.\n *\n * @packageDocumentation\n *\n * @example Basic usage\n * ```ts\n * import { Planner, loadFile } from '@origints/core'\n * import { docxToHtml } from '@origints/mammoth'\n *\n * // Create a plan\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml())\n * .emit((out, $) => out.add('html', $.get('html').string()))\n * .compile()\n * ```\n *\n * @example With custom style mapping\n * ```ts\n * const plan = new Planner().in(loadFile('document.docx'))\n * .mapIn(docxToHtml({\n * styleMap: [\n * \"p[style-name='Title'] => h1.document-title\",\n * \"p[style-name='Heading 1'] => h1\",\n * \"p[style-name='Heading 2'] => h2\",\n * ],\n * idPrefix: 'doc-',\n * imageHandling: 'omit',\n * }))\n * .emit((out, $) => out.add('content', $.get('html').string()))\n * .compile()\n * ```\n */\n\n// Re-export option types\nexport type {\n DocxToHtmlOptions,\n DocxToTextOptions,\n MammothImageElement,\n MammothImageHandler,\n MammothImageResult,\n} from './options'\n\n// Re-export result types\nexport type {\n DocxConversionResult,\n DocxTextResult,\n MammothMessage,\n MammothMessageType,\n} from './result'\n\n// Re-export transform creators and implementations\nexport {\n docxToHtml,\n docxToText,\n docxToHtmlImpl,\n docxToTextImpl,\n} from './convert'\n\n// Re-export utilities\nexport { streamToBuffer } from './util'\n\n// ---------------------------------------------------------------------------\n// Auto-registration of transforms\n// ---------------------------------------------------------------------------\n\nimport { globalRegistry } from '@origints/core'\nimport { docxToHtmlImpl, docxToTextImpl } from './convert'\n\n/**\n * Register the mammoth transforms with a registry.\n * Call this to enable docxToHtml() and docxToText() in your plans.\n *\n * @example\n * ```ts\n * import { globalRegistry } from '@origints/core'\n * import { registerMammothTransforms } from '@origints/mammoth'\n *\n * registerMammothTransforms(globalRegistry)\n * ```\n */\nexport function registerMammothTransforms(registry: {\n register(impl: {\n namespace: string\n name: string\n execute: (...args: unknown[]) => unknown\n }): void\n}): void {\n registry.register(docxToHtmlImpl)\n registry.register(docxToTextImpl)\n}\n\n// Auto-register transforms\nregisterMammothTransforms(globalRegistry)\n"],"names":["streamToBuffer","stream","reader","chunks","done","value","docxToHtml","options","serializeOptions","docxToText","docxToHtmlImpl","input","args","deserializeOptions","buffer","toBuffer","mammothOptions","toMammothOptions","result","mammoth","toMammothMessage","docxToTextImpl","serialized","mammothOpts","createImageConverter","handling","image","data","adaptedImage","encoding","msg","registerMammothTransforms","registry","globalRegistry"],"mappings":";;AASA,eAAsBA,EACpBC,GACiB;AACjB,QAAMC,IAASD,EAAO,UAAA,GAChBE,IAAuB,CAAA;AAE7B,MAAI;AACF,eAAa;AACX,YAAM,EAAE,MAAAC,GAAM,OAAAC,EAAA,IAAU,MAAMH,EAAO,KAAA;AACrC,UAAIE,EAAM;AACV,MAAAD,EAAO,KAAKE,CAAK;AAAA,IACnB;AACA,WAAO,OAAO,OAAOF,CAAM;AAAA,EAC7B,UAAA;AACE,IAAAD,EAAO,YAAA;AAAA,EACT;AACF;ACkDO,SAASI,EAAWC,GAA2C;AACpE,SAAO;AAAA,IACL,MAAM;AAAA,IACN,WAAW;AAAA,IACX,MAAM;AAAA,IACN,MAAMC,EAAiBD,CAAO;AAAA,EAAA;AAElC;AAaO,SAASE,EAAWF,GAA2C;AACpE,SAAO;AAAA,IACL,MAAM;AAAA,IACN,WAAW;AAAA,IACX,MAAM;AAAA,IACN,MAAMA;AAAA,EAAA;AAEV;AAOO,MAAMG,IAAgC;AAAA,EAC3C,WAAW;AAAA,EACX,MAAM;AAAA,EAEN,MAAM,QAAQC,GAAgBC,GAA+C;AAC3E,UAAML,IAAUM;AAAA,MACdD;AAAA,IAAA,GAEIE,IAAS,MAAMC,EAASJ,CAAK,GAC7BK,IAAiBC,EAAiBV,CAAO,GAEzCW,IAAS,MAAMC,EAAQ,cAAc,EAAE,QAAAL,EAAA,GAAUE,CAAc;AAErE,WAAO;AAAA,MACL,MAAME,EAAO;AAAA,MACb,UAAUA,EAAO,SAAS,IAAIE,CAAgB;AAAA,IAAA;AAAA,EAElD;AACF,GAOaC,IAAgC;AAAA,EAC3C,WAAW;AAAA,EACX,MAAM;AAAA,EAEN,MAAM,QAAQV,GAAyC;AACrD,UAAMG,IAAS,MAAMC,EAASJ,CAAK,GAG7BO,IAAS,MAAMC,EAAQ,eAAe,EAAE,QAAAL,GAAQ;AAEtD,WAAO;AAAA,MACL,MAAMI,EAAO;AAAA,MACb,UAAUA,EAAO,SAAS,IAAIE,CAAgB;AAAA,IAAA;AAAA,EAElD;AACF;AAwBA,SAASZ,EACPD,GACmC;AACnC,MAAI,CAACA,EAAS;AAEd,QAAMe,IAAoC,CAAA;AAE1C,SAAIf,EAAQ,aACVe,EAAW,WAAWf,EAAQ,WAE5BA,EAAQ,4BAA4B,WACtCe,EAAW,0BAA0Bf,EAAQ,0BAE3CA,EAAQ,2BAA2B,WACrCe,EAAW,yBAAyBf,EAAQ,yBAE1CA,EAAQ,aACVe,EAAW,WAAWf,EAAQ,YAE5BA,EAAQ,kBAAkB,YAAYA,EAAQ,kBAAkB,YAClEe,EAAW,gBAAgBf,EAAQ,gBAEjCA,EAAQ,4BAA4B,WACtCe,EAAW,0BAA0Bf,EAAQ,0BAGxC,OAAO,KAAKe,CAAU,EAAE,SAAS,IAAIA,IAAa;AAC3D;AAKA,SAAST,EACPS,GACmB;AACnB,SAAKA,KAAmB,CAAA;AAE1B;AAKA,eAAeP,EAASJ,GAAiC;AACvD,MAAI,OAAO,SAASA,CAAK;AACvB,WAAOA;AAET,MAAIA,aAAiB;AACnB,WAAOX,EAAeW,CAAmC;AAE3D,QAAM,IAAI;AAAA,IACR,0DAA0D,OAAOA,CAAK;AAAA,EAAA;AAE1E;AAKA,SAASM,EAAiBV,GAA4C;AACpE,QAAMgB,IAA8B,CAAA;AAEpC,SAAIhB,EAAQ,aACVgB,EAAY,WAAWhB,EAAQ,WAG7BA,EAAQ,4BAA4B,WACtCgB,EAAY,0BAA0BhB,EAAQ,0BAG5CA,EAAQ,2BAA2B,WACrCgB,EAAY,yBAAyBhB,EAAQ,yBAG3CA,EAAQ,aACVgB,EAAY,WAAWhB,EAAQ,WAG7BA,EAAQ,4BAA4B,KACtCgB,EAAY,wBAAwB,KAC3BhB,EAAQ,4BAA4B,OAC7CgB,EAAY,wBAAwB,KAGlChB,EAAQ,kBACVgB,EAAY,eAAeC,EAAqBjB,EAAQ,aAAa,IAGnEA,EAAQ,sBACVgB,EAAY,oBAAoBhB,EAAQ,oBAGnCgB;AACT;AAKA,SAASC,EACPC,GACuB;AACvB,SAAIA,MAAa,SACRN,EAAQ,OAAO,WAAW,MAAM,QAAQ,QAAQ,EAAE,KAAK,GAAA,CAAI,CAAC,IAGjEM,MAAa,WAERN,EAAQ,OAAO;AAAA,IAAW,CAACO,MAChCA,EAAM,mBAAA,EAAqB,KAAK,CAAAC,OAAS;AAAA,MACvC,KAAK,QAAQD,EAAM,WAAW,WAAWC,CAAI;AAAA,IAAA,EAC7C;AAAA,EAAA,IAKCR,EAAQ,OAAO,WAAW,OAAOO,MAAwB;AAE9D,UAAME,IAAe;AAAA,MACnB,aAAaF,EAAM;AAAA,MACnB,OAAO,CAACG,MACFA,MAAa,WACRH,EAAM,mBAAA,IAERA,EAAM,kBAAA;AAAA,IACf,GAMIR,IAAS,MAAMO,EAASG,CAAY;AAC1C,WAAIV,MAAW,OACN,EAAE,KAAK,GAAA,IAETA;AAAA,EACT,CAAC;AACH;AAKA,SAASE,EAAiBU,GAGP;AACjB,SAAO;AAAA,IACL,MAAMA,EAAI,SAAS,UAAU,UAAU;AAAA,IACvC,SAASA,EAAI;AAAA,EAAA;AAEjB;AC5OO,SAASC,EAA0BC,GAMjC;AACP,EAAAA,EAAS,SAAStB,CAAc,GAChCsB,EAAS,SAASX,CAAc;AAClC;AAGAU,EAA0BE,CAAc;"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@origints/mammoth",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "DOCX to HTML conversion for Origins using mammoth.js",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
|
@@ -20,11 +20,30 @@
|
|
|
20
20
|
"publishConfig": {
|
|
21
21
|
"access": "public"
|
|
22
22
|
},
|
|
23
|
+
"repository": {
|
|
24
|
+
"type": "git",
|
|
25
|
+
"url": "https://github.com/fponticelli/origints.git",
|
|
26
|
+
"directory": "packages/mammoth"
|
|
27
|
+
},
|
|
28
|
+
"homepage": "https://origints.dev",
|
|
29
|
+
"bugs": "https://github.com/fponticelli/origints/issues",
|
|
30
|
+
"keywords": [
|
|
31
|
+
"origints",
|
|
32
|
+
"data-extraction",
|
|
33
|
+
"lineage",
|
|
34
|
+
"provenance",
|
|
35
|
+
"docx",
|
|
36
|
+
"word",
|
|
37
|
+
"document-conversion"
|
|
38
|
+
],
|
|
39
|
+
"engines": {
|
|
40
|
+
"node": ">=18"
|
|
41
|
+
},
|
|
23
42
|
"dependencies": {
|
|
24
43
|
"mammoth": "^1.11.0"
|
|
25
44
|
},
|
|
26
45
|
"peerDependencies": {
|
|
27
|
-
"@origints/core": "^0.
|
|
46
|
+
"@origints/core": "^0.2.0"
|
|
28
47
|
},
|
|
29
48
|
"devDependencies": {
|
|
30
49
|
"@types/node": "25.0.6",
|
|
@@ -35,7 +54,7 @@
|
|
|
35
54
|
"vite": "7.3.1",
|
|
36
55
|
"vite-plugin-dts": "4.5.4",
|
|
37
56
|
"vitest": "4.0.16",
|
|
38
|
-
"@origints/core": "0.
|
|
57
|
+
"@origints/core": "0.2.0"
|
|
39
58
|
},
|
|
40
59
|
"scripts": {
|
|
41
60
|
"build": "vite build",
|