@docscode/adapter-docx 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +17 -0
  2. package/src/index.ts +69 -0
package/package.json ADDED
@@ -0,0 +1,17 @@
1
+ {
2
+ "name": "@docscode/adapter-docx",
3
+ "version": "1.0.0",
4
+ "description": "DOCX Format Adapter for Kairo",
5
+ "main": "./dist/index.cjs",
6
+ "module": "./dist/index.js",
7
+ "types": "./dist/index.d.ts",
8
+ "type": "module",
9
+ "scripts": {
10
+ "build": "tsup src/index.ts --format cjs,esm --dts --clean"
11
+ },
12
+ "dependencies": {
13
+ "@docscode/core": "*",
14
+ "docx": "^9.1.1",
15
+ "yjs": "^13.6.30"
16
+ }
17
+ }
package/src/index.ts ADDED
@@ -0,0 +1,69 @@
1
+ import * as Y from 'yjs';
2
+ import { Document, Packer, Paragraph, TextRun, InsertedTextRun, DeletedTextRun } from 'docx';
3
+ import { FormatAdapter, CanonicalDoc, DoclingClient, Suggestion } from '@docscode/core';
4
+
5
+ export class DocxAdapter implements FormatAdapter {
6
+ readonly format = 'docx';
7
+ private docling = new DoclingClient();
8
+
9
+ async read(source: Buffer | string): Promise<Y.Doc> {
10
+ const yDoc = new Y.Doc();
11
+ const canonical = new CanonicalDoc(yDoc);
12
+
13
+ const input = Buffer.isBuffer(source)
14
+ ? `base64:${source.toString('base64')}`
15
+ : source;
16
+
17
+ const result = await this.docling.convert(input);
18
+
19
+ canonical.metadata.set('title', result.metadata.title);
20
+
21
+ for (const block of result.content) {
22
+ if (block.type === 'p') {
23
+ canonical.addParagraph(block.text);
24
+ }
25
+ }
26
+
27
+ return yDoc;
28
+ }
29
+
30
+ async write(doc: Y.Doc): Promise<Buffer> {
31
+ return this._serialize(doc);
32
+ }
33
+
34
+ async applyTrackedChanges(doc: Y.Doc, suggestions: Suggestion[]): Promise<Buffer> {
35
+ return this._serialize(doc, suggestions);
36
+ }
37
+
38
+ private async _serialize(doc: Y.Doc, suggestions: Suggestion[] = []): Promise<Buffer> {
39
+ const canonical = new CanonicalDoc(doc);
40
+ const docx = new Document({
41
+ sections: [{
42
+ children: canonical.content.toArray().map((block: any) => {
43
+ if (block.get('type') === 'p') {
44
+ const yText = block.get('text') as Y.Text;
45
+ const children: any[] = [new TextRun(yText.toString())];
46
+
47
+ // Map suggestions for this paragraph
48
+ // (Simple implementation: append suggestions as tracked changes)
49
+ for (const s of suggestions) {
50
+ if (s.type === 'insert') {
51
+ children.push(new InsertedTextRun({
52
+ id: Math.floor(Math.random() * 10000),
53
+ text: s.text || '',
54
+ author: s.author,
55
+ date: new Date(s.timestamp).toISOString()
56
+ }));
57
+ }
58
+ }
59
+
60
+ return new Paragraph({ children });
61
+ }
62
+ return new Paragraph('');
63
+ })
64
+ }]
65
+ });
66
+
67
+ return await Packer.toBuffer(docx) as Buffer;
68
+ }
69
+ }