@nhtio/adk 1.20260609.0 → 1.20260610.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/CHANGELOG.md +132 -9
  2. package/batteries/llm/ollama/helpers.cjs +9 -0
  3. package/batteries/llm/ollama/helpers.cjs.map +1 -1
  4. package/batteries/llm/ollama/helpers.mjs +9 -0
  5. package/batteries/llm/ollama/helpers.mjs.map +1 -1
  6. package/batteries/llm/openai_chat_completions/helpers.cjs +19 -0
  7. package/batteries/llm/openai_chat_completions/helpers.cjs.map +1 -1
  8. package/batteries/llm/openai_chat_completions/helpers.mjs +19 -0
  9. package/batteries/llm/openai_chat_completions/helpers.mjs.map +1 -1
  10. package/batteries/media/builder.d.ts +245 -0
  11. package/batteries/media/contracts.cjs +119 -0
  12. package/batteries/media/contracts.cjs.map +1 -0
  13. package/batteries/media/contracts.d.ts +321 -0
  14. package/batteries/media/contracts.mjs +110 -0
  15. package/batteries/media/contracts.mjs.map +1 -0
  16. package/batteries/media/engines/audio_decode.cjs +92 -0
  17. package/batteries/media/engines/audio_decode.cjs.map +1 -0
  18. package/batteries/media/engines/audio_decode.d.ts +46 -0
  19. package/batteries/media/engines/audio_decode.mjs +90 -0
  20. package/batteries/media/engines/audio_decode.mjs.map +1 -0
  21. package/batteries/media/engines/execa_executor.cjs +64 -0
  22. package/batteries/media/engines/execa_executor.cjs.map +1 -0
  23. package/batteries/media/engines/execa_executor.d.ts +54 -0
  24. package/batteries/media/engines/execa_executor.mjs +62 -0
  25. package/batteries/media/engines/execa_executor.mjs.map +1 -0
  26. package/batteries/media/engines/fs_workspace.cjs +84 -0
  27. package/batteries/media/engines/fs_workspace.cjs.map +1 -0
  28. package/batteries/media/engines/fs_workspace.d.ts +51 -0
  29. package/batteries/media/engines/fs_workspace.mjs +82 -0
  30. package/batteries/media/engines/fs_workspace.mjs.map +1 -0
  31. package/batteries/media/engines/jimp.cjs +116 -0
  32. package/batteries/media/engines/jimp.cjs.map +1 -0
  33. package/batteries/media/engines/jimp.d.ts +32 -0
  34. package/batteries/media/engines/jimp.mjs +114 -0
  35. package/batteries/media/engines/jimp.mjs.map +1 -0
  36. package/batteries/media/engines/sharp.cjs +120 -0
  37. package/batteries/media/engines/sharp.cjs.map +1 -0
  38. package/batteries/media/engines/sharp.d.ts +42 -0
  39. package/batteries/media/engines/sharp.mjs +117 -0
  40. package/batteries/media/engines/sharp.mjs.map +1 -0
  41. package/batteries/media/engines/soffice.cjs +246 -0
  42. package/batteries/media/engines/soffice.cjs.map +1 -0
  43. package/batteries/media/engines/soffice.d.ts +39 -0
  44. package/batteries/media/engines/soffice.mjs +244 -0
  45. package/batteries/media/engines/soffice.mjs.map +1 -0
  46. package/batteries/media/engines/tesseract_js.cjs +87 -0
  47. package/batteries/media/engines/tesseract_js.cjs.map +1 -0
  48. package/batteries/media/engines/tesseract_js.d.ts +41 -0
  49. package/batteries/media/engines/tesseract_js.mjs +85 -0
  50. package/batteries/media/engines/tesseract_js.mjs.map +1 -0
  51. package/batteries/media/engines/transformers_asr.cjs +111 -0
  52. package/batteries/media/engines/transformers_asr.cjs.map +1 -0
  53. package/batteries/media/engines/transformers_asr.d.ts +41 -0
  54. package/batteries/media/engines/transformers_asr.mjs +109 -0
  55. package/batteries/media/engines/transformers_asr.mjs.map +1 -0
  56. package/batteries/media/exceptions.d.ts +103 -0
  57. package/batteries/media/forge.cjs +403 -0
  58. package/batteries/media/forge.cjs.map +1 -0
  59. package/batteries/media/forge.d.ts +90 -0
  60. package/batteries/media/forge.mjs +399 -0
  61. package/batteries/media/forge.mjs.map +1 -0
  62. package/batteries/media/formats.d.ts +72 -0
  63. package/batteries/media/index.d.ts +136 -0
  64. package/batteries/media/lint.cjs +339 -0
  65. package/batteries/media/lint.cjs.map +1 -0
  66. package/batteries/media/lint.d.ts +117 -0
  67. package/batteries/media/lint.mjs +331 -0
  68. package/batteries/media/lint.mjs.map +1 -0
  69. package/batteries/media/pipe.d.ts +66 -0
  70. package/batteries/media/plan.d.ts +133 -0
  71. package/batteries/media/registry.d.ts +92 -0
  72. package/batteries/media/runtime.d.ts +105 -0
  73. package/batteries/media/steps/doc.d.ts +33 -0
  74. package/batteries/media/steps/image_audio.d.ts +24 -0
  75. package/batteries/media/steps/ingest.d.ts +25 -0
  76. package/batteries/media/steps/pages.d.ts +18 -0
  77. package/batteries/media/steps/sheet.d.ts +36 -0
  78. package/batteries/media/steps/slides.d.ts +35 -0
  79. package/batteries/media/steps/text.d.ts +43 -0
  80. package/batteries/media/validate.d.ts +49 -0
  81. package/batteries/media/verbs.d.ts +126 -0
  82. package/batteries/media.cjs +3049 -0
  83. package/batteries/media.cjs.map +1 -0
  84. package/batteries/media.mjs +3009 -0
  85. package/batteries/media.mjs.map +1 -0
  86. package/batteries/tools/_shared/index.d.ts +142 -0
  87. package/batteries/tools/_shared.cjs +173 -0
  88. package/batteries/tools/_shared.cjs.map +1 -0
  89. package/batteries/tools/_shared.mjs +164 -0
  90. package/batteries/tools/_shared.mjs.map +1 -0
  91. package/batteries/tools/index.d.ts +2 -0
  92. package/batteries/tools/scrapper/exceptions.d.ts +21 -0
  93. package/batteries/tools/scrapper/index.d.ts +172 -0
  94. package/batteries/tools/scrapper/shared.d.ts +146 -0
  95. package/batteries/tools/scrapper.cjs +8 -0
  96. package/batteries/tools/scrapper.mjs +2 -0
  97. package/batteries/tools/searxng/index.d.ts +54 -20
  98. package/batteries/tools/searxng.cjs +2 -1
  99. package/batteries/tools/searxng.mjs +2 -2
  100. package/batteries/tools/web_retrieval/index.d.ts +186 -0
  101. package/batteries/tools/web_retrieval.cjs +206 -0
  102. package/batteries/tools/web_retrieval.cjs.map +1 -0
  103. package/batteries/tools/web_retrieval.mjs +201 -0
  104. package/batteries/tools/web_retrieval.mjs.map +1 -0
  105. package/batteries/tools.cjs +13 -1
  106. package/batteries/tools.mjs +4 -2
  107. package/batteries.cjs +13 -1
  108. package/batteries.mjs +4 -2
  109. package/common.d.ts +1 -1
  110. package/eslint.cjs +1 -1
  111. package/eslint.mjs +1 -1
  112. package/exceptions-C7FSHEnV.mjs +87 -0
  113. package/exceptions-C7FSHEnV.mjs.map +1 -0
  114. package/exceptions-CQi_lNs1.js +152 -0
  115. package/exceptions-CQi_lNs1.js.map +1 -0
  116. package/index.cjs +2 -2
  117. package/index.mjs +2 -2
  118. package/mcp/adk-docs-corpus.json +1 -1
  119. package/package.json +301 -178
  120. package/scrapper-BOLWYGbD.js +463 -0
  121. package/scrapper-BOLWYGbD.js.map +1 -0
  122. package/scrapper-hDKlNuCT.mjs +433 -0
  123. package/scrapper-hDKlNuCT.mjs.map +1 -0
  124. package/{searxng-Bkrwhwhw.js → searxng-CJtEpa8p.js} +82 -85
  125. package/searxng-CJtEpa8p.js.map +1 -0
  126. package/{searxng-CyA-nEu5.mjs → searxng-riarj_0u.mjs} +76 -85
  127. package/searxng-riarj_0u.mjs.map +1 -0
  128. package/skills/adk-assembly/SKILL.md +2 -2
  129. package/validate-BFaUYHDN.js +1298 -0
  130. package/validate-BFaUYHDN.js.map +1 -0
  131. package/validate-DSZ3wicB.mjs +1215 -0
  132. package/validate-DSZ3wicB.mjs.map +1 -0
  133. package/searxng-Bkrwhwhw.js.map +0 -1
  134. package/searxng-CyA-nEu5.mjs.map +0 -1
@@ -0,0 +1,246 @@
1
+ Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
2
+ require("../../../chunk-Ble4zEEl.js");
3
+ const require_batteries_media_contracts = require("../contracts.cjs");
4
+ const require_exceptions = require("../../../exceptions-CQi_lNs1.js");
5
+ //#region src/batteries/media/engines/soffice.ts
6
+ /**
7
+ * The LibreOffice-backed {@link @nhtio/adk/batteries/media/contracts!MediaEngine}: document,
8
+ * spreadsheet, and presentation conversion via the `soffice` binary.
9
+ *
10
+ * @module @nhtio/adk/batteries/media/engines/soffice
11
+ *
12
+ * @remarks
13
+ * Document conversion is the one capability in the media battery with no mature
14
+ * cross-environment equivalent, so this engine is binary-backed by design. It composes the
15
+ * two BYO runtime contracts: a {@link @nhtio/adk/batteries/media/contracts!BinaryExecutor}
16
+ * runs the invocation (bundled: `execa_executor`) and a
17
+ * {@link @nhtio/adk/batteries/media/contracts!ScratchWorkspace} exchanges bytes with it
18
+ * (bundled: `fs_workspace`). The executor and workspace must agree on path visibility —
19
+ * that pairing is the consumer's composition decision.
20
+ *
21
+ * One engine, one capability kind: the conversion matrix is declared as convert capability
22
+ * groups (each format silo converts within itself plus to PDF/HTML; PDF converts to
23
+ * html/txt/docx/odt). The spreadsheet group covers ODS/legacy-xls to xlsx — what used to be
24
+ * a separate "normalize" engine is just an edge in the matrix.
25
+ */
26
+ var MIME_BY_TARGET = {
27
+ pdf: "application/pdf",
28
+ html: "text/html",
29
+ txt: "text/plain",
30
+ md: "text/markdown",
31
+ csv: "text/csv",
32
+ json: "application/json",
33
+ docx: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
34
+ doc: "application/msword",
35
+ rtf: "application/rtf",
36
+ odt: "application/vnd.oasis.opendocument.text",
37
+ xlsx: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
38
+ xls: "application/vnd.ms-excel",
39
+ ods: "application/vnd.oasis.opendocument.spreadsheet",
40
+ pptx: "application/vnd.openxmlformats-officedocument.presentationml.presentation",
41
+ ppt: "application/vnd.ms-powerpoint",
42
+ odp: "application/vnd.oasis.opendocument.presentation"
43
+ };
44
+ var EXT_BY_MIME = {
45
+ "application/pdf": "pdf",
46
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
47
+ "application/msword": "doc",
48
+ "application/rtf": "rtf",
49
+ "text/rtf": "rtf",
50
+ "application/vnd.oasis.opendocument.text": "odt",
51
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
52
+ "application/vnd.ms-excel": "xls",
53
+ "application/vnd.oasis.opendocument.spreadsheet": "ods",
54
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation": "pptx",
55
+ "application/vnd.ms-powerpoint": "ppt",
56
+ "application/vnd.oasis.opendocument.presentation": "odp",
57
+ "text/plain": "txt",
58
+ "text/html": "html",
59
+ "text/csv": "csv"
60
+ };
61
+ /** Server-parity conversion matrix: same-silo targets plus pdf/html. */
62
+ var TARGETS_BY_EXT = {
63
+ docx: [
64
+ "pdf",
65
+ "html",
66
+ "txt",
67
+ "md",
68
+ "odt",
69
+ "doc",
70
+ "rtf"
71
+ ],
72
+ odt: [
73
+ "pdf",
74
+ "html",
75
+ "txt",
76
+ "md",
77
+ "docx",
78
+ "doc",
79
+ "rtf"
80
+ ],
81
+ doc: [
82
+ "pdf",
83
+ "html",
84
+ "txt",
85
+ "md",
86
+ "docx",
87
+ "odt",
88
+ "rtf"
89
+ ],
90
+ rtf: [
91
+ "pdf",
92
+ "html",
93
+ "txt",
94
+ "md",
95
+ "docx",
96
+ "odt",
97
+ "doc"
98
+ ],
99
+ xlsx: [
100
+ "pdf",
101
+ "html",
102
+ "csv",
103
+ "json",
104
+ "ods",
105
+ "xls"
106
+ ],
107
+ ods: [
108
+ "pdf",
109
+ "html",
110
+ "csv",
111
+ "json",
112
+ "xlsx",
113
+ "xls"
114
+ ],
115
+ xls: [
116
+ "pdf",
117
+ "html",
118
+ "csv",
119
+ "json",
120
+ "xlsx",
121
+ "ods"
122
+ ],
123
+ pptx: [
124
+ "pdf",
125
+ "html",
126
+ "odp",
127
+ "ppt"
128
+ ],
129
+ odp: [
130
+ "pdf",
131
+ "html",
132
+ "pptx",
133
+ "ppt"
134
+ ],
135
+ ppt: [
136
+ "pdf",
137
+ "html",
138
+ "pptx",
139
+ "odp"
140
+ ],
141
+ pdf: [
142
+ "html",
143
+ "txt",
144
+ "docx",
145
+ "odt"
146
+ ]
147
+ };
148
+ /** soffice convert-to filter token per target (md goes via txt; json via csv). */
149
+ var CONVERT_TOKEN = {
150
+ md: "txt:Text",
151
+ json: "csv"
152
+ };
153
+ var validateOptions = (options, name) => {
154
+ if (typeof options?.path !== "string" || options.path.length === 0) throw new require_exceptions.E_INVALID_MEDIA_PIPELINE_CONFIG([`${name} requires the soffice binary path`]);
155
+ if (!require_batteries_media_contracts.implementsBinaryExecutor(options.executor)) throw new require_exceptions.E_INVALID_MEDIA_PIPELINE_CONFIG([`${name} requires an executor implementing the BinaryExecutor contract`]);
156
+ if (typeof options.workspace !== "function") throw new require_exceptions.E_INVALID_MEDIA_PIPELINE_CONFIG([`${name} requires a workspace factory (e.g. fsScratchWorkspace({ root }))`]);
157
+ };
158
+ var runSoffice = async (options, bytes, inputExt, convertTo, outputExt, signal) => {
159
+ const workspace = await options.workspace();
160
+ if (!require_batteries_media_contracts.implementsScratchWorkspace(workspace)) throw new require_exceptions.E_INVALID_MEDIA_PIPELINE_CONFIG(["the workspace factory minted a value that does not implement ScratchWorkspace"]);
161
+ try {
162
+ const inputPath = await workspace.materialize(bytes, `input.${inputExt}`);
163
+ const result = await options.executor.exec({
164
+ cmd: options.path,
165
+ args: [
166
+ "--headless",
167
+ "--nologo",
168
+ "--nodefault",
169
+ "--norestore",
170
+ "--nolockcheck",
171
+ "--convert-to",
172
+ convertTo,
173
+ "--outdir",
174
+ workspace.dir(),
175
+ inputPath
176
+ ],
177
+ timeoutMs: options.timeoutMs ?? 12e4,
178
+ signal
179
+ });
180
+ if (result.failed) {
181
+ const detail = result.stderr || result.stdout || `exit code ${result.exitCode}`;
182
+ throw new Error(`LibreOffice conversion failed: ${detail}`);
183
+ }
184
+ const files = await workspace.list();
185
+ const produced = files.find((f) => f.toLowerCase().endsWith(`.${outputExt}`) && f !== `input.${inputExt}`);
186
+ if (!produced) throw new Error(`LibreOffice reported success but produced no .${outputExt} output (files: ${files.join(", ")})`);
187
+ return workspace.read(`${workspace.dir()}/${produced}`);
188
+ } finally {
189
+ await workspace.dispose();
190
+ }
191
+ };
192
+ /** The MIME types of one extension group, for capability `from` declarations. */
193
+ var MIMES_OF = (exts) => Object.entries(EXT_BY_MIME).filter(([, ext]) => exts.includes(ext)).map(([mime]) => mime);
194
+ /**
195
+ * Construct the LibreOffice engine.
196
+ *
197
+ * @param options - Binary path, executor, workspace factory, timeout.
198
+ * @returns The engine, declaring one convert capability group per format silo.
199
+ */
200
+ var sofficeEngine = (options) => {
201
+ validateOptions(options, "sofficeEngine");
202
+ const convert = async (request) => {
203
+ const inputExt = EXT_BY_MIME[request.mimeType.toLowerCase().split(";")[0].trim()];
204
+ if (!inputExt) throw new Error(`unsupported input MIME for conversion: ${request.mimeType}`);
205
+ const targets = TARGETS_BY_EXT[inputExt] ?? [];
206
+ if (!targets.includes(request.to)) throw new Error(`cannot convert ${inputExt} to ${request.to}; supported: ${targets.join(", ")}`);
207
+ const convertTo = CONVERT_TOKEN[request.to] ?? request.to;
208
+ const outputExt = request.to === "md" ? "txt" : request.to === "json" ? "csv" : request.to;
209
+ let bytes = await runSoffice(options, request.bytes, inputExt, convertTo, outputExt, request.signal);
210
+ let mimeType = MIME_BY_TARGET[request.to] ?? "application/octet-stream";
211
+ if (request.to === "json") {
212
+ const rows = new TextDecoder().decode(bytes).split(/\r?\n/).filter((line) => line.length > 0).map((line) => line.split(","));
213
+ bytes = new TextEncoder().encode(JSON.stringify(rows));
214
+ mimeType = MIME_BY_TARGET.json;
215
+ }
216
+ return { outputs: [{
217
+ bytes,
218
+ mimeType
219
+ }] };
220
+ };
221
+ const group = (exts, to) => ({
222
+ from: MIMES_OF(exts),
223
+ to,
224
+ convert
225
+ });
226
+ return {
227
+ id: "soffice",
228
+ converts: [
229
+ group(["docx"], TARGETS_BY_EXT.docx),
230
+ group(["odt"], TARGETS_BY_EXT.odt),
231
+ group(["doc"], TARGETS_BY_EXT.doc),
232
+ group(["rtf"], TARGETS_BY_EXT.rtf),
233
+ group(["xlsx"], TARGETS_BY_EXT.xlsx),
234
+ group(["ods"], TARGETS_BY_EXT.ods),
235
+ group(["xls"], TARGETS_BY_EXT.xls),
236
+ group(["pptx"], TARGETS_BY_EXT.pptx),
237
+ group(["odp"], TARGETS_BY_EXT.odp),
238
+ group(["ppt"], TARGETS_BY_EXT.ppt),
239
+ group(["pdf"], TARGETS_BY_EXT.pdf)
240
+ ]
241
+ };
242
+ };
243
+ //#endregion
244
+ exports.sofficeEngine = sofficeEngine;
245
+
246
+ //# sourceMappingURL=soffice.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"soffice.cjs","names":[],"sources":["../../../../src/batteries/media/engines/soffice.ts"],"sourcesContent":["/**\n * The LibreOffice-backed {@link @nhtio/adk/batteries/media/contracts!MediaEngine}: document,\n * spreadsheet, and presentation conversion via the `soffice` binary.\n *\n * @module @nhtio/adk/batteries/media/engines/soffice\n *\n * @remarks\n * Document conversion is the one capability in the media battery with no mature\n * cross-environment equivalent, so this engine is binary-backed by design. It composes the\n * two BYO runtime contracts: a {@link @nhtio/adk/batteries/media/contracts!BinaryExecutor}\n * runs the invocation (bundled: `execa_executor`) and a\n * {@link @nhtio/adk/batteries/media/contracts!ScratchWorkspace} exchanges bytes with it\n * (bundled: `fs_workspace`). The executor and workspace must agree on path visibility —\n * that pairing is the consumer's composition decision.\n *\n * One engine, one capability kind: the conversion matrix is declared as convert capability\n * groups (each format silo converts within itself plus to PDF/HTML; PDF converts to\n * html/txt/docx/odt). The spreadsheet group covers ODS/legacy-xls to xlsx — what used to be\n * a separate \"normalize\" engine is just an edge in the matrix.\n */\n\nimport { E_INVALID_MEDIA_PIPELINE_CONFIG } from '../exceptions'\nimport { implementsBinaryExecutor, implementsScratchWorkspace } from '../contracts'\nimport type {\n MediaEngine,\n ConvertCapability,\n ConvertRequest,\n ConvertResult,\n BinaryExecutor,\n ScratchWorkspaceFactory,\n} from '../contracts'\n\n/** Options for {@link sofficeEngine}. */\nexport interface SofficeEngineOptions {\n /** Path (or resolvable name) of the soffice binary. */\n path: string\n /** Runs the soffice invocation. Required — no platform default. */\n executor: BinaryExecutor\n /** Mints a scratch dir per invocation whose paths the executor can open. Required. */\n workspace: ScratchWorkspaceFactory\n /** Per-invocation timeout. Default 120000. */\n timeoutMs?: number\n}\n\nconst MIME_BY_TARGET: Record<string, string> = {\n pdf: 'application/pdf',\n html: 'text/html',\n txt: 'text/plain',\n md: 'text/markdown',\n csv: 'text/csv',\n json: 'application/json',\n docx: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',\n doc: 'application/msword',\n rtf: 'application/rtf',\n odt: 'application/vnd.oasis.opendocument.text',\n xlsx: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',\n xls: 'application/vnd.ms-excel',\n ods: 'application/vnd.oasis.opendocument.spreadsheet',\n pptx: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',\n ppt: 'application/vnd.ms-powerpoint',\n odp: 'application/vnd.oasis.opendocument.presentation',\n}\n\nconst EXT_BY_MIME: Record<string, string> = {\n 'application/pdf': 'pdf',\n 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',\n 'application/msword': 'doc',\n 'application/rtf': 'rtf',\n 'text/rtf': 'rtf',\n 'application/vnd.oasis.opendocument.text': 'odt',\n 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx',\n 'application/vnd.ms-excel': 'xls',\n 'application/vnd.oasis.opendocument.spreadsheet': 'ods',\n 'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'pptx',\n 'application/vnd.ms-powerpoint': 'ppt',\n 'application/vnd.oasis.opendocument.presentation': 'odp',\n 'text/plain': 'txt',\n 'text/html': 'html',\n 'text/csv': 'csv',\n}\n\n/** Server-parity conversion matrix: same-silo targets plus pdf/html. */\nconst TARGETS_BY_EXT: Record<string, readonly string[]> = {\n docx: ['pdf', 'html', 'txt', 'md', 'odt', 'doc', 'rtf'],\n odt: ['pdf', 'html', 'txt', 'md', 'docx', 'doc', 'rtf'],\n doc: ['pdf', 'html', 'txt', 'md', 'docx', 'odt', 'rtf'],\n rtf: ['pdf', 'html', 'txt', 'md', 'docx', 'odt', 'doc'],\n xlsx: ['pdf', 'html', 'csv', 'json', 'ods', 'xls'],\n ods: ['pdf', 'html', 'csv', 'json', 'xlsx', 'xls'],\n xls: ['pdf', 'html', 'csv', 'json', 'xlsx', 'ods'],\n pptx: ['pdf', 'html', 'odp', 'ppt'],\n odp: ['pdf', 'html', 'pptx', 'ppt'],\n ppt: ['pdf', 'html', 'pptx', 'odp'],\n pdf: ['html', 'txt', 'docx', 'odt'],\n}\n\n/** soffice convert-to filter token per target (md goes via txt; json via csv). */\nconst CONVERT_TOKEN: Record<string, string> = {\n md: 'txt:Text',\n json: 'csv',\n}\n\nconst validateOptions = (options: SofficeEngineOptions, name: string): void => {\n if (typeof options?.path !== 'string' || options.path.length === 0) {\n throw new E_INVALID_MEDIA_PIPELINE_CONFIG([`${name} requires the soffice binary path`])\n }\n if (!implementsBinaryExecutor(options.executor)) {\n throw new E_INVALID_MEDIA_PIPELINE_CONFIG([\n `${name} requires an executor implementing the BinaryExecutor contract`,\n ])\n }\n if (typeof options.workspace !== 'function') {\n throw new E_INVALID_MEDIA_PIPELINE_CONFIG([\n `${name} requires a workspace factory (e.g. fsScratchWorkspace({ root }))`,\n ])\n }\n}\n\nconst runSoffice = async (\n options: SofficeEngineOptions,\n bytes: Uint8Array,\n inputExt: string,\n convertTo: string,\n outputExt: string,\n signal?: AbortSignal\n): Promise<Uint8Array> => {\n const workspace = await options.workspace()\n if (!implementsScratchWorkspace(workspace)) {\n throw new E_INVALID_MEDIA_PIPELINE_CONFIG([\n 'the workspace factory minted a value that does not implement ScratchWorkspace',\n ])\n }\n try {\n const inputPath = await workspace.materialize(bytes, `input.${inputExt}`)\n const result = await options.executor.exec({\n cmd: options.path,\n args: [\n '--headless',\n '--nologo',\n '--nodefault',\n '--norestore',\n '--nolockcheck',\n '--convert-to',\n convertTo,\n '--outdir',\n workspace.dir(),\n inputPath,\n ],\n timeoutMs: options.timeoutMs ?? 120_000,\n signal,\n })\n if (result.failed) {\n const detail = result.stderr || result.stdout || `exit code ${result.exitCode}`\n throw new Error(`LibreOffice conversion failed: ${detail}`)\n }\n const files = await workspace.list()\n const produced = files.find(\n (f) => f.toLowerCase().endsWith(`.${outputExt}`) && f !== `input.${inputExt}`\n )\n if (!produced) {\n throw new Error(\n `LibreOffice reported success but produced no .${outputExt} output (files: ${files.join(', ')})`\n )\n }\n return workspace.read(`${workspace.dir()}/${produced}`)\n } finally {\n await workspace.dispose()\n }\n}\n\n/** The MIME types of one extension group, for capability `from` declarations. */\nconst MIMES_OF = (exts: readonly string[]): string[] =>\n Object.entries(EXT_BY_MIME)\n .filter(([, ext]) => exts.includes(ext))\n .map(([mime]) => mime)\n\n/**\n * Construct the LibreOffice engine.\n *\n * @param options - Binary path, executor, workspace factory, timeout.\n * @returns The engine, declaring one convert capability group per format silo.\n */\nexport const sofficeEngine = (options: SofficeEngineOptions): MediaEngine => {\n validateOptions(options, 'sofficeEngine')\n\n const convert = async (request: ConvertRequest): Promise<ConvertResult> => {\n const mime = request.mimeType.toLowerCase().split(';')[0].trim()\n const inputExt = EXT_BY_MIME[mime]\n if (!inputExt) throw new Error(`unsupported input MIME for conversion: ${request.mimeType}`)\n const targets = TARGETS_BY_EXT[inputExt] ?? []\n if (!targets.includes(request.to)) {\n throw new Error(\n `cannot convert ${inputExt} to ${request.to}; supported: ${targets.join(', ')}`\n )\n }\n const convertTo = CONVERT_TOKEN[request.to] ?? request.to\n const outputExt = request.to === 'md' ? 'txt' : request.to === 'json' ? 'csv' : request.to\n let bytes = await runSoffice(\n options,\n request.bytes,\n inputExt,\n convertTo,\n outputExt,\n request.signal\n )\n let mimeType = MIME_BY_TARGET[request.to] ?? 'application/octet-stream'\n if (request.to === 'json') {\n // soffice emits CSV; lower to a JSON array-of-arrays for predictable structure.\n const text = new TextDecoder().decode(bytes)\n const rows = text\n .split(/\\r?\\n/)\n .filter((line) => line.length > 0)\n .map((line) => line.split(','))\n bytes = new TextEncoder().encode(JSON.stringify(rows))\n mimeType = MIME_BY_TARGET.json\n }\n return { outputs: [{ bytes, mimeType }] }\n }\n\n // One capability group per uniform from×to block of the matrix.\n const group = (exts: readonly string[], to: readonly string[]): ConvertCapability => ({\n from: MIMES_OF(exts),\n to,\n convert,\n })\n\n return {\n id: 'soffice',\n converts: [\n group(['docx'], TARGETS_BY_EXT.docx),\n group(['odt'], TARGETS_BY_EXT.odt),\n group(['doc'], TARGETS_BY_EXT.doc),\n group(['rtf'], TARGETS_BY_EXT.rtf),\n group(['xlsx'], TARGETS_BY_EXT.xlsx),\n group(['ods'], TARGETS_BY_EXT.ods),\n group(['xls'], TARGETS_BY_EXT.xls),\n group(['pptx'], TARGETS_BY_EXT.pptx),\n group(['odp'], TARGETS_BY_EXT.odp),\n group(['ppt'], TARGETS_BY_EXT.ppt),\n group(['pdf'], TARGETS_BY_EXT.pdf),\n ],\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;AA4CA,IAAM,iBAAyC;CAC7C,KAAK;CACL,MAAM;CACN,KAAK;CACL,IAAI;CACJ,KAAK;CACL,MAAM;CACN,MAAM;CACN,KAAK;CACL,KAAK;CACL,KAAK;CACL,MAAM;CACN,KAAK;CACL,KAAK;CACL,MAAM;CACN,KAAK;CACL,KAAK;AACP;AAEA,IAAM,cAAsC;CAC1C,mBAAmB;CACnB,2EAA2E;CAC3E,sBAAsB;CACtB,mBAAmB;CACnB,YAAY;CACZ,2CAA2C;CAC3C,qEAAqE;CACrE,4BAA4B;CAC5B,kDAAkD;CAClD,6EAA6E;CAC7E,iCAAiC;CACjC,mDAAmD;CACnD,cAAc;CACd,aAAa;CACb,YAAY;AACd;;AAGA,IAAM,iBAAoD;CACxD,MAAM;EAAC;EAAO;EAAQ;EAAO;EAAM;EAAO;EAAO;CAAK;CACtD,KAAK;EAAC;EAAO;EAAQ;EAAO;EAAM;EAAQ;EAAO;CAAK;CACtD,KAAK;EAAC;EAAO;EAAQ;EAAO;EAAM;EAAQ;EAAO;CAAK;CACtD,KAAK;EAAC;EAAO;EAAQ;EAAO;EAAM;EAAQ;EAAO;CAAK;CACtD,MAAM;EAAC;EAAO;EAAQ;EAAO;EAAQ;EAAO;CAAK;CACjD,KAAK;EAAC;EAAO;EAAQ;EAAO;EAAQ;EAAQ;CAAK;CACjD,KAAK;EAAC;EAAO;EAAQ;EAAO;EAAQ;EAAQ;CAAK;CACjD,MAAM;EAAC;EAAO;EAAQ;EAAO;CAAK;CAClC,KAAK;EAAC;EAAO;EAAQ;EAAQ;CAAK;CAClC,KAAK;EAAC;EAAO;EAAQ;EAAQ;CAAK;CAClC,KAAK;EAAC;EAAQ;EAAO;EAAQ;CAAK;AACpC;;AAGA,IAAM,gBAAwC;CAC5C,IAAI;CACJ,MAAM;AACR;AAEA,IAAM,mBAAmB,SAA+B,SAAuB;CAC7E,IAAI,OAAO,SAAS,SAAS,YAAY,QAAQ,KAAK,WAAW,GAC/D,MAAM,IAAI,mBAAA,gCAAgC,CAAC,GAAG,KAAK,kCAAkC,CAAC;CAExF,IAAI,CAAC,kCAAA,yBAAyB,QAAQ,QAAQ,GAC5C,MAAM,IAAI,mBAAA,gCAAgC,CACxC,GAAG,KAAK,+DACV,CAAC;CAEH,IAAI,OAAO,QAAQ,cAAc,YAC/B,MAAM,IAAI,mBAAA,gCAAgC,CACxC,GAAG,KAAK,kEACV,CAAC;AAEL;AAEA,IAAM,aAAa,OACjB,SACA,OACA,UACA,WACA,WACA,WACwB;CACxB,MAAM,YAAY,MAAM,QAAQ,UAAU;CAC1C,IAAI,CAAC,kCAAA,2BAA2B,SAAS,GACvC,MAAM,IAAI,mBAAA,gCAAgC,CACxC,+EACF,CAAC;CAEH,IAAI;EACF,MAAM,YAAY,MAAM,UAAU,YAAY,OAAO,SAAS,UAAU;EACxE,MAAM,SAAS,MAAM,QAAQ,SAAS,KAAK;GACzC,KAAK,QAAQ;GACb,MAAM;IACJ;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IACA,UAAU,IAAI;IACd;GACF;GACA,WAAW,QAAQ,aAAa;GAChC;EACF,CAAC;EACD,IAAI,OAAO,QAAQ;GACjB,MAAM,SAAS,OAAO,UAAU,OAAO,UAAU,aAAa,OAAO;GACrE,MAAM,IAAI,MAAM,kCAAkC,QAAQ;EAC5D;EACA,MAAM,QAAQ,MAAM,UAAU,KAAK;EACnC,MAAM,WAAW,MAAM,MACpB,MAAM,EAAE,YAAY,EAAE,SAAS,IAAI,WAAW,KAAK,MAAM,SAAS,UACrE;EACA,IAAI,CAAC,UACH,MAAM,IAAI,MACR,iDAAiD,UAAU,kBAAkB,MAAM,KAAK,IAAI,EAAE,EAChG;EAEF,OAAO,UAAU,KAAK,GAAG,UAAU,IAAI,EAAE,GAAG,UAAU;CACxD,UAAU;EACR,MAAM,UAAU,QAAQ;CAC1B;AACF;;AAGA,IAAM,YAAY,SAChB,OAAO,QAAQ,WAAW,EACvB,QAAQ,GAAG,SAAS,KAAK,SAAS,GAAG,CAAC,EACtC,KAAK,CAAC,UAAU,IAAI;;;;;;;AAQzB,IAAa,iBAAiB,YAA+C;CAC3E,gBAAgB,SAAS,eAAe;CAExC,MAAM,UAAU,OAAO,YAAoD;EAEzE,MAAM,WAAW,YADJ,QAAQ,SAAS,YAAY,EAAE,MAAM,GAAG,EAAE,GAAG,KAC7B;EAC7B,IAAI,CAAC,UAAU,MAAM,IAAI,MAAM,0CAA0C,QAAQ,UAAU;EAC3F,MAAM,UAAU,eAAe,aAAa,CAAC;EAC7C,IAAI,CAAC,QAAQ,SAAS,QAAQ,EAAE,GAC9B,MAAM,IAAI,MACR,kBAAkB,SAAS,MAAM,QAAQ,GAAG,eAAe,QAAQ,KAAK,IAAI,GAC9E;EAEF,MAAM,YAAY,cAAc,QAAQ,OAAO,QAAQ;EACvD,MAAM,YAAY,QAAQ,OAAO,OAAO,QAAQ,QAAQ,OAAO,SAAS,QAAQ,QAAQ;EACxF,IAAI,QAAQ,MAAM,WAChB,SACA,QAAQ,OACR,UACA,WACA,WACA,QAAQ,MACV;EACA,IAAI,WAAW,eAAe,QAAQ,OAAO;EAC7C,IAAI,QAAQ,OAAO,QAAQ;GAGzB,MAAM,OADO,IAAI,YAAY,EAAE,OAAO,KACzB,EACV,MAAM,OAAO,EACb,QAAQ,SAAS,KAAK,SAAS,CAAC,EAChC,KAAK,SAAS,KAAK,MAAM,GAAG,CAAC;GAChC,QAAQ,IAAI,YAAY,EAAE,OAAO,KAAK,UAAU,IAAI,CAAC;GACrD,WAAW,eAAe;EAC5B;EACA,OAAO,EAAE,SAAS,CAAC;GAAE;GAAO;EAAS,CAAC,EAAE;CAC1C;CAGA,MAAM,SAAS,MAAyB,QAA8C;EACpF,MAAM,SAAS,IAAI;EACnB;EACA;CACF;CAEA,OAAO;EACL,IAAI;EACJ,UAAU;GACR,MAAM,CAAC,MAAM,GAAG,eAAe,IAAI;GACnC,MAAM,CAAC,KAAK,GAAG,eAAe,GAAG;GACjC,MAAM,CAAC,KAAK,GAAG,eAAe,GAAG;GACjC,MAAM,CAAC,KAAK,GAAG,eAAe,GAAG;GACjC,MAAM,CAAC,MAAM,GAAG,eAAe,IAAI;GACnC,MAAM,CAAC,KAAK,GAAG,eAAe,GAAG;GACjC,MAAM,CAAC,KAAK,GAAG,eAAe,GAAG;GACjC,MAAM,CAAC,MAAM,GAAG,eAAe,IAAI;GACnC,MAAM,CAAC,KAAK,GAAG,eAAe,GAAG;GACjC,MAAM,CAAC,KAAK,GAAG,eAAe,GAAG;GACjC,MAAM,CAAC,KAAK,GAAG,eAAe,GAAG;EACnC;CACF;AACF"}
@@ -0,0 +1,39 @@
1
+ /**
2
+ * The LibreOffice-backed {@link @nhtio/adk/batteries/media/contracts!MediaEngine}: document,
3
+ * spreadsheet, and presentation conversion via the `soffice` binary.
4
+ *
5
+ * @module @nhtio/adk/batteries/media/engines/soffice
6
+ *
7
+ * @remarks
8
+ * Document conversion is the one capability in the media battery with no mature
9
+ * cross-environment equivalent, so this engine is binary-backed by design. It composes the
10
+ * two BYO runtime contracts: a {@link @nhtio/adk/batteries/media/contracts!BinaryExecutor}
11
+ * runs the invocation (bundled: `execa_executor`) and a
12
+ * {@link @nhtio/adk/batteries/media/contracts!ScratchWorkspace} exchanges bytes with it
13
+ * (bundled: `fs_workspace`). The executor and workspace must agree on path visibility —
14
+ * that pairing is the consumer's composition decision.
15
+ *
16
+ * One engine, one capability kind: the conversion matrix is declared as convert capability
17
+ * groups (each format silo converts within itself plus to PDF/HTML; PDF converts to
18
+ * html/txt/docx/odt). The spreadsheet group covers ODS/legacy-xls to xlsx — what used to be
19
+ * a separate "normalize" engine is just an edge in the matrix.
20
+ */
21
+ import type { MediaEngine, BinaryExecutor, ScratchWorkspaceFactory } from "../contracts";
22
+ /** Options for {@link sofficeEngine}. */
23
+ export interface SofficeEngineOptions {
24
+ /** Path (or resolvable name) of the soffice binary. */
25
+ path: string;
26
+ /** Runs the soffice invocation. Required — no platform default. */
27
+ executor: BinaryExecutor;
28
+ /** Mints a scratch dir per invocation whose paths the executor can open. Required. */
29
+ workspace: ScratchWorkspaceFactory;
30
+ /** Per-invocation timeout. Default 120000. */
31
+ timeoutMs?: number;
32
+ }
33
+ /**
34
+ * Construct the LibreOffice engine.
35
+ *
36
+ * @param options - Binary path, executor, workspace factory, timeout.
37
+ * @returns The engine, declaring one convert capability group per format silo.
38
+ */
39
+ export declare const sofficeEngine: (options: SofficeEngineOptions) => MediaEngine;
@@ -0,0 +1,244 @@
1
+ import { implementsBinaryExecutor, implementsScratchWorkspace } from "../contracts.mjs";
2
+ import { t as E_INVALID_MEDIA_PIPELINE_CONFIG } from "../../../exceptions-C7FSHEnV.mjs";
3
+ //#region src/batteries/media/engines/soffice.ts
4
+ /**
5
+ * The LibreOffice-backed {@link @nhtio/adk/batteries/media/contracts!MediaEngine}: document,
6
+ * spreadsheet, and presentation conversion via the `soffice` binary.
7
+ *
8
+ * @module @nhtio/adk/batteries/media/engines/soffice
9
+ *
10
+ * @remarks
11
+ * Document conversion is the one capability in the media battery with no mature
12
+ * cross-environment equivalent, so this engine is binary-backed by design. It composes the
13
+ * two BYO runtime contracts: a {@link @nhtio/adk/batteries/media/contracts!BinaryExecutor}
14
+ * runs the invocation (bundled: `execa_executor`) and a
15
+ * {@link @nhtio/adk/batteries/media/contracts!ScratchWorkspace} exchanges bytes with it
16
+ * (bundled: `fs_workspace`). The executor and workspace must agree on path visibility —
17
+ * that pairing is the consumer's composition decision.
18
+ *
19
+ * One engine, one capability kind: the conversion matrix is declared as convert capability
20
+ * groups (each format silo converts within itself plus to PDF/HTML; PDF converts to
21
+ * html/txt/docx/odt). The spreadsheet group covers ODS/legacy-xls to xlsx — what used to be
22
+ * a separate "normalize" engine is just an edge in the matrix.
23
+ */
24
+ var MIME_BY_TARGET = {
25
+ pdf: "application/pdf",
26
+ html: "text/html",
27
+ txt: "text/plain",
28
+ md: "text/markdown",
29
+ csv: "text/csv",
30
+ json: "application/json",
31
+ docx: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
32
+ doc: "application/msword",
33
+ rtf: "application/rtf",
34
+ odt: "application/vnd.oasis.opendocument.text",
35
+ xlsx: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
36
+ xls: "application/vnd.ms-excel",
37
+ ods: "application/vnd.oasis.opendocument.spreadsheet",
38
+ pptx: "application/vnd.openxmlformats-officedocument.presentationml.presentation",
39
+ ppt: "application/vnd.ms-powerpoint",
40
+ odp: "application/vnd.oasis.opendocument.presentation"
41
+ };
42
+ var EXT_BY_MIME = {
43
+ "application/pdf": "pdf",
44
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
45
+ "application/msword": "doc",
46
+ "application/rtf": "rtf",
47
+ "text/rtf": "rtf",
48
+ "application/vnd.oasis.opendocument.text": "odt",
49
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
50
+ "application/vnd.ms-excel": "xls",
51
+ "application/vnd.oasis.opendocument.spreadsheet": "ods",
52
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation": "pptx",
53
+ "application/vnd.ms-powerpoint": "ppt",
54
+ "application/vnd.oasis.opendocument.presentation": "odp",
55
+ "text/plain": "txt",
56
+ "text/html": "html",
57
+ "text/csv": "csv"
58
+ };
59
+ /** Server-parity conversion matrix: same-silo targets plus pdf/html. */
60
+ var TARGETS_BY_EXT = {
61
+ docx: [
62
+ "pdf",
63
+ "html",
64
+ "txt",
65
+ "md",
66
+ "odt",
67
+ "doc",
68
+ "rtf"
69
+ ],
70
+ odt: [
71
+ "pdf",
72
+ "html",
73
+ "txt",
74
+ "md",
75
+ "docx",
76
+ "doc",
77
+ "rtf"
78
+ ],
79
+ doc: [
80
+ "pdf",
81
+ "html",
82
+ "txt",
83
+ "md",
84
+ "docx",
85
+ "odt",
86
+ "rtf"
87
+ ],
88
+ rtf: [
89
+ "pdf",
90
+ "html",
91
+ "txt",
92
+ "md",
93
+ "docx",
94
+ "odt",
95
+ "doc"
96
+ ],
97
+ xlsx: [
98
+ "pdf",
99
+ "html",
100
+ "csv",
101
+ "json",
102
+ "ods",
103
+ "xls"
104
+ ],
105
+ ods: [
106
+ "pdf",
107
+ "html",
108
+ "csv",
109
+ "json",
110
+ "xlsx",
111
+ "xls"
112
+ ],
113
+ xls: [
114
+ "pdf",
115
+ "html",
116
+ "csv",
117
+ "json",
118
+ "xlsx",
119
+ "ods"
120
+ ],
121
+ pptx: [
122
+ "pdf",
123
+ "html",
124
+ "odp",
125
+ "ppt"
126
+ ],
127
+ odp: [
128
+ "pdf",
129
+ "html",
130
+ "pptx",
131
+ "ppt"
132
+ ],
133
+ ppt: [
134
+ "pdf",
135
+ "html",
136
+ "pptx",
137
+ "odp"
138
+ ],
139
+ pdf: [
140
+ "html",
141
+ "txt",
142
+ "docx",
143
+ "odt"
144
+ ]
145
+ };
146
+ /** soffice convert-to filter token per target (md goes via txt; json via csv). */
147
+ var CONVERT_TOKEN = {
148
+ md: "txt:Text",
149
+ json: "csv"
150
+ };
151
+ var validateOptions = (options, name) => {
152
+ if (typeof options?.path !== "string" || options.path.length === 0) throw new E_INVALID_MEDIA_PIPELINE_CONFIG([`${name} requires the soffice binary path`]);
153
+ if (!implementsBinaryExecutor(options.executor)) throw new E_INVALID_MEDIA_PIPELINE_CONFIG([`${name} requires an executor implementing the BinaryExecutor contract`]);
154
+ if (typeof options.workspace !== "function") throw new E_INVALID_MEDIA_PIPELINE_CONFIG([`${name} requires a workspace factory (e.g. fsScratchWorkspace({ root }))`]);
155
+ };
156
+ var runSoffice = async (options, bytes, inputExt, convertTo, outputExt, signal) => {
157
+ const workspace = await options.workspace();
158
+ if (!implementsScratchWorkspace(workspace)) throw new E_INVALID_MEDIA_PIPELINE_CONFIG(["the workspace factory minted a value that does not implement ScratchWorkspace"]);
159
+ try {
160
+ const inputPath = await workspace.materialize(bytes, `input.${inputExt}`);
161
+ const result = await options.executor.exec({
162
+ cmd: options.path,
163
+ args: [
164
+ "--headless",
165
+ "--nologo",
166
+ "--nodefault",
167
+ "--norestore",
168
+ "--nolockcheck",
169
+ "--convert-to",
170
+ convertTo,
171
+ "--outdir",
172
+ workspace.dir(),
173
+ inputPath
174
+ ],
175
+ timeoutMs: options.timeoutMs ?? 12e4,
176
+ signal
177
+ });
178
+ if (result.failed) {
179
+ const detail = result.stderr || result.stdout || `exit code ${result.exitCode}`;
180
+ throw new Error(`LibreOffice conversion failed: ${detail}`);
181
+ }
182
+ const files = await workspace.list();
183
+ const produced = files.find((f) => f.toLowerCase().endsWith(`.${outputExt}`) && f !== `input.${inputExt}`);
184
+ if (!produced) throw new Error(`LibreOffice reported success but produced no .${outputExt} output (files: ${files.join(", ")})`);
185
+ return workspace.read(`${workspace.dir()}/${produced}`);
186
+ } finally {
187
+ await workspace.dispose();
188
+ }
189
+ };
190
+ /** The MIME types of one extension group, for capability `from` declarations. */
191
+ var MIMES_OF = (exts) => Object.entries(EXT_BY_MIME).filter(([, ext]) => exts.includes(ext)).map(([mime]) => mime);
192
+ /**
193
+ * Construct the LibreOffice engine.
194
+ *
195
+ * @param options - Binary path, executor, workspace factory, timeout.
196
+ * @returns The engine, declaring one convert capability group per format silo.
197
+ */
198
+ var sofficeEngine = (options) => {
199
+ validateOptions(options, "sofficeEngine");
200
+ const convert = async (request) => {
201
+ const inputExt = EXT_BY_MIME[request.mimeType.toLowerCase().split(";")[0].trim()];
202
+ if (!inputExt) throw new Error(`unsupported input MIME for conversion: ${request.mimeType}`);
203
+ const targets = TARGETS_BY_EXT[inputExt] ?? [];
204
+ if (!targets.includes(request.to)) throw new Error(`cannot convert ${inputExt} to ${request.to}; supported: ${targets.join(", ")}`);
205
+ const convertTo = CONVERT_TOKEN[request.to] ?? request.to;
206
+ const outputExt = request.to === "md" ? "txt" : request.to === "json" ? "csv" : request.to;
207
+ let bytes = await runSoffice(options, request.bytes, inputExt, convertTo, outputExt, request.signal);
208
+ let mimeType = MIME_BY_TARGET[request.to] ?? "application/octet-stream";
209
+ if (request.to === "json") {
210
+ const rows = new TextDecoder().decode(bytes).split(/\r?\n/).filter((line) => line.length > 0).map((line) => line.split(","));
211
+ bytes = new TextEncoder().encode(JSON.stringify(rows));
212
+ mimeType = MIME_BY_TARGET.json;
213
+ }
214
+ return { outputs: [{
215
+ bytes,
216
+ mimeType
217
+ }] };
218
+ };
219
+ const group = (exts, to) => ({
220
+ from: MIMES_OF(exts),
221
+ to,
222
+ convert
223
+ });
224
+ return {
225
+ id: "soffice",
226
+ converts: [
227
+ group(["docx"], TARGETS_BY_EXT.docx),
228
+ group(["odt"], TARGETS_BY_EXT.odt),
229
+ group(["doc"], TARGETS_BY_EXT.doc),
230
+ group(["rtf"], TARGETS_BY_EXT.rtf),
231
+ group(["xlsx"], TARGETS_BY_EXT.xlsx),
232
+ group(["ods"], TARGETS_BY_EXT.ods),
233
+ group(["xls"], TARGETS_BY_EXT.xls),
234
+ group(["pptx"], TARGETS_BY_EXT.pptx),
235
+ group(["odp"], TARGETS_BY_EXT.odp),
236
+ group(["ppt"], TARGETS_BY_EXT.ppt),
237
+ group(["pdf"], TARGETS_BY_EXT.pdf)
238
+ ]
239
+ };
240
+ };
241
+ //#endregion
242
+ export { sofficeEngine };
243
+
244
+ //# sourceMappingURL=soffice.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"soffice.mjs","names":[],"sources":["../../../../src/batteries/media/engines/soffice.ts"],"sourcesContent":["/**\n * The LibreOffice-backed {@link @nhtio/adk/batteries/media/contracts!MediaEngine}: document,\n * spreadsheet, and presentation conversion via the `soffice` binary.\n *\n * @module @nhtio/adk/batteries/media/engines/soffice\n *\n * @remarks\n * Document conversion is the one capability in the media battery with no mature\n * cross-environment equivalent, so this engine is binary-backed by design. It composes the\n * two BYO runtime contracts: a {@link @nhtio/adk/batteries/media/contracts!BinaryExecutor}\n * runs the invocation (bundled: `execa_executor`) and a\n * {@link @nhtio/adk/batteries/media/contracts!ScratchWorkspace} exchanges bytes with it\n * (bundled: `fs_workspace`). The executor and workspace must agree on path visibility —\n * that pairing is the consumer's composition decision.\n *\n * One engine, one capability kind: the conversion matrix is declared as convert capability\n * groups (each format silo converts within itself plus to PDF/HTML; PDF converts to\n * html/txt/docx/odt). The spreadsheet group covers ODS/legacy-xls to xlsx — what used to be\n * a separate \"normalize\" engine is just an edge in the matrix.\n */\n\nimport { E_INVALID_MEDIA_PIPELINE_CONFIG } from '../exceptions'\nimport { implementsBinaryExecutor, implementsScratchWorkspace } from '../contracts'\nimport type {\n MediaEngine,\n ConvertCapability,\n ConvertRequest,\n ConvertResult,\n BinaryExecutor,\n ScratchWorkspaceFactory,\n} from '../contracts'\n\n/** Options for {@link sofficeEngine}. */\nexport interface SofficeEngineOptions {\n /** Path (or resolvable name) of the soffice binary. */\n path: string\n /** Runs the soffice invocation. Required — no platform default. */\n executor: BinaryExecutor\n /** Mints a scratch dir per invocation whose paths the executor can open. Required. */\n workspace: ScratchWorkspaceFactory\n /** Per-invocation timeout. Default 120000. */\n timeoutMs?: number\n}\n\nconst MIME_BY_TARGET: Record<string, string> = {\n pdf: 'application/pdf',\n html: 'text/html',\n txt: 'text/plain',\n md: 'text/markdown',\n csv: 'text/csv',\n json: 'application/json',\n docx: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',\n doc: 'application/msword',\n rtf: 'application/rtf',\n odt: 'application/vnd.oasis.opendocument.text',\n xlsx: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',\n xls: 'application/vnd.ms-excel',\n ods: 'application/vnd.oasis.opendocument.spreadsheet',\n pptx: 'application/vnd.openxmlformats-officedocument.presentationml.presentation',\n ppt: 'application/vnd.ms-powerpoint',\n odp: 'application/vnd.oasis.opendocument.presentation',\n}\n\nconst EXT_BY_MIME: Record<string, string> = {\n 'application/pdf': 'pdf',\n 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': 'docx',\n 'application/msword': 'doc',\n 'application/rtf': 'rtf',\n 'text/rtf': 'rtf',\n 'application/vnd.oasis.opendocument.text': 'odt',\n 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': 'xlsx',\n 'application/vnd.ms-excel': 'xls',\n 'application/vnd.oasis.opendocument.spreadsheet': 'ods',\n 'application/vnd.openxmlformats-officedocument.presentationml.presentation': 'pptx',\n 'application/vnd.ms-powerpoint': 'ppt',\n 'application/vnd.oasis.opendocument.presentation': 'odp',\n 'text/plain': 'txt',\n 'text/html': 'html',\n 'text/csv': 'csv',\n}\n\n/** Server-parity conversion matrix: same-silo targets plus pdf/html. */\nconst TARGETS_BY_EXT: Record<string, readonly string[]> = {\n docx: ['pdf', 'html', 'txt', 'md', 'odt', 'doc', 'rtf'],\n odt: ['pdf', 'html', 'txt', 'md', 'docx', 'doc', 'rtf'],\n doc: ['pdf', 'html', 'txt', 'md', 'docx', 'odt', 'rtf'],\n rtf: ['pdf', 'html', 'txt', 'md', 'docx', 'odt', 'doc'],\n xlsx: ['pdf', 'html', 'csv', 'json', 'ods', 'xls'],\n ods: ['pdf', 'html', 'csv', 'json', 'xlsx', 'xls'],\n xls: ['pdf', 'html', 'csv', 'json', 'xlsx', 'ods'],\n pptx: ['pdf', 'html', 'odp', 'ppt'],\n odp: ['pdf', 'html', 'pptx', 'ppt'],\n ppt: ['pdf', 'html', 'pptx', 'odp'],\n pdf: ['html', 'txt', 'docx', 'odt'],\n}\n\n/** soffice convert-to filter token per target (md goes via txt; json via csv). */\nconst CONVERT_TOKEN: Record<string, string> = {\n md: 'txt:Text',\n json: 'csv',\n}\n\nconst validateOptions = (options: SofficeEngineOptions, name: string): void => {\n if (typeof options?.path !== 'string' || options.path.length === 0) {\n throw new E_INVALID_MEDIA_PIPELINE_CONFIG([`${name} requires the soffice binary path`])\n }\n if (!implementsBinaryExecutor(options.executor)) {\n throw new E_INVALID_MEDIA_PIPELINE_CONFIG([\n `${name} requires an executor implementing the BinaryExecutor contract`,\n ])\n }\n if (typeof options.workspace !== 'function') {\n throw new E_INVALID_MEDIA_PIPELINE_CONFIG([\n `${name} requires a workspace factory (e.g. fsScratchWorkspace({ root }))`,\n ])\n }\n}\n\nconst runSoffice = async (\n options: SofficeEngineOptions,\n bytes: Uint8Array,\n inputExt: string,\n convertTo: string,\n outputExt: string,\n signal?: AbortSignal\n): Promise<Uint8Array> => {\n const workspace = await options.workspace()\n if (!implementsScratchWorkspace(workspace)) {\n throw new E_INVALID_MEDIA_PIPELINE_CONFIG([\n 'the workspace factory minted a value that does not implement ScratchWorkspace',\n ])\n }\n try {\n const inputPath = await workspace.materialize(bytes, `input.${inputExt}`)\n const result = await options.executor.exec({\n cmd: options.path,\n args: [\n '--headless',\n '--nologo',\n '--nodefault',\n '--norestore',\n '--nolockcheck',\n '--convert-to',\n convertTo,\n '--outdir',\n workspace.dir(),\n inputPath,\n ],\n timeoutMs: options.timeoutMs ?? 120_000,\n signal,\n })\n if (result.failed) {\n const detail = result.stderr || result.stdout || `exit code ${result.exitCode}`\n throw new Error(`LibreOffice conversion failed: ${detail}`)\n }\n const files = await workspace.list()\n const produced = files.find(\n (f) => f.toLowerCase().endsWith(`.${outputExt}`) && f !== `input.${inputExt}`\n )\n if (!produced) {\n throw new Error(\n `LibreOffice reported success but produced no .${outputExt} output (files: ${files.join(', ')})`\n )\n }\n return workspace.read(`${workspace.dir()}/${produced}`)\n } finally {\n await workspace.dispose()\n }\n}\n\n/** The MIME types of one extension group, for capability `from` declarations. */\nconst MIMES_OF = (exts: readonly string[]): string[] =>\n Object.entries(EXT_BY_MIME)\n .filter(([, ext]) => exts.includes(ext))\n .map(([mime]) => mime)\n\n/**\n * Construct the LibreOffice engine.\n *\n * @param options - Binary path, executor, workspace factory, timeout.\n * @returns The engine, declaring one convert capability group per format silo.\n */\nexport const sofficeEngine = (options: SofficeEngineOptions): MediaEngine => {\n validateOptions(options, 'sofficeEngine')\n\n const convert = async (request: ConvertRequest): Promise<ConvertResult> => {\n const mime = request.mimeType.toLowerCase().split(';')[0].trim()\n const inputExt = EXT_BY_MIME[mime]\n if (!inputExt) throw new Error(`unsupported input MIME for conversion: ${request.mimeType}`)\n const targets = TARGETS_BY_EXT[inputExt] ?? []\n if (!targets.includes(request.to)) {\n throw new Error(\n `cannot convert ${inputExt} to ${request.to}; supported: ${targets.join(', ')}`\n )\n }\n const convertTo = CONVERT_TOKEN[request.to] ?? request.to\n const outputExt = request.to === 'md' ? 'txt' : request.to === 'json' ? 'csv' : request.to\n let bytes = await runSoffice(\n options,\n request.bytes,\n inputExt,\n convertTo,\n outputExt,\n request.signal\n )\n let mimeType = MIME_BY_TARGET[request.to] ?? 'application/octet-stream'\n if (request.to === 'json') {\n // soffice emits CSV; lower to a JSON array-of-arrays for predictable structure.\n const text = new TextDecoder().decode(bytes)\n const rows = text\n .split(/\\r?\\n/)\n .filter((line) => line.length > 0)\n .map((line) => line.split(','))\n bytes = new TextEncoder().encode(JSON.stringify(rows))\n mimeType = MIME_BY_TARGET.json\n }\n return { outputs: [{ bytes, mimeType }] }\n }\n\n // One capability group per uniform from×to block of the matrix.\n const group = (exts: readonly string[], to: readonly string[]): ConvertCapability => ({\n from: MIMES_OF(exts),\n to,\n convert,\n })\n\n return {\n id: 'soffice',\n converts: [\n group(['docx'], TARGETS_BY_EXT.docx),\n group(['odt'], TARGETS_BY_EXT.odt),\n group(['doc'], TARGETS_BY_EXT.doc),\n group(['rtf'], TARGETS_BY_EXT.rtf),\n group(['xlsx'], TARGETS_BY_EXT.xlsx),\n group(['ods'], TARGETS_BY_EXT.ods),\n group(['xls'], TARGETS_BY_EXT.xls),\n group(['pptx'], TARGETS_BY_EXT.pptx),\n group(['odp'], TARGETS_BY_EXT.odp),\n group(['ppt'], TARGETS_BY_EXT.ppt),\n group(['pdf'], TARGETS_BY_EXT.pdf),\n ],\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;AA4CA,IAAM,iBAAyC;CAC7C,KAAK;CACL,MAAM;CACN,KAAK;CACL,IAAI;CACJ,KAAK;CACL,MAAM;CACN,MAAM;CACN,KAAK;CACL,KAAK;CACL,KAAK;CACL,MAAM;CACN,KAAK;CACL,KAAK;CACL,MAAM;CACN,KAAK;CACL,KAAK;AACP;AAEA,IAAM,cAAsC;CAC1C,mBAAmB;CACnB,2EAA2E;CAC3E,sBAAsB;CACtB,mBAAmB;CACnB,YAAY;CACZ,2CAA2C;CAC3C,qEAAqE;CACrE,4BAA4B;CAC5B,kDAAkD;CAClD,6EAA6E;CAC7E,iCAAiC;CACjC,mDAAmD;CACnD,cAAc;CACd,aAAa;CACb,YAAY;AACd;;AAGA,IAAM,iBAAoD;CACxD,MAAM;EAAC;EAAO;EAAQ;EAAO;EAAM;EAAO;EAAO;CAAK;CACtD,KAAK;EAAC;EAAO;EAAQ;EAAO;EAAM;EAAQ;EAAO;CAAK;CACtD,KAAK;EAAC;EAAO;EAAQ;EAAO;EAAM;EAAQ;EAAO;CAAK;CACtD,KAAK;EAAC;EAAO;EAAQ;EAAO;EAAM;EAAQ;EAAO;CAAK;CACtD,MAAM;EAAC;EAAO;EAAQ;EAAO;EAAQ;EAAO;CAAK;CACjD,KAAK;EAAC;EAAO;EAAQ;EAAO;EAAQ;EAAQ;CAAK;CACjD,KAAK;EAAC;EAAO;EAAQ;EAAO;EAAQ;EAAQ;CAAK;CACjD,MAAM;EAAC;EAAO;EAAQ;EAAO;CAAK;CAClC,KAAK;EAAC;EAAO;EAAQ;EAAQ;CAAK;CAClC,KAAK;EAAC;EAAO;EAAQ;EAAQ;CAAK;CAClC,KAAK;EAAC;EAAQ;EAAO;EAAQ;CAAK;AACpC;;AAGA,IAAM,gBAAwC;CAC5C,IAAI;CACJ,MAAM;AACR;AAEA,IAAM,mBAAmB,SAA+B,SAAuB;CAC7E,IAAI,OAAO,SAAS,SAAS,YAAY,QAAQ,KAAK,WAAW,GAC/D,MAAM,IAAI,gCAAgC,CAAC,GAAG,KAAK,kCAAkC,CAAC;CAExF,IAAI,CAAC,yBAAyB,QAAQ,QAAQ,GAC5C,MAAM,IAAI,gCAAgC,CACxC,GAAG,KAAK,+DACV,CAAC;CAEH,IAAI,OAAO,QAAQ,cAAc,YAC/B,MAAM,IAAI,gCAAgC,CACxC,GAAG,KAAK,kEACV,CAAC;AAEL;AAEA,IAAM,aAAa,OACjB,SACA,OACA,UACA,WACA,WACA,WACwB;CACxB,MAAM,YAAY,MAAM,QAAQ,UAAU;CAC1C,IAAI,CAAC,2BAA2B,SAAS,GACvC,MAAM,IAAI,gCAAgC,CACxC,+EACF,CAAC;CAEH,IAAI;EACF,MAAM,YAAY,MAAM,UAAU,YAAY,OAAO,SAAS,UAAU;EACxE,MAAM,SAAS,MAAM,QAAQ,SAAS,KAAK;GACzC,KAAK,QAAQ;GACb,MAAM;IACJ;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IACA,UAAU,IAAI;IACd;GACF;GACA,WAAW,QAAQ,aAAa;GAChC;EACF,CAAC;EACD,IAAI,OAAO,QAAQ;GACjB,MAAM,SAAS,OAAO,UAAU,OAAO,UAAU,aAAa,OAAO;GACrE,MAAM,IAAI,MAAM,kCAAkC,QAAQ;EAC5D;EACA,MAAM,QAAQ,MAAM,UAAU,KAAK;EACnC,MAAM,WAAW,MAAM,MACpB,MAAM,EAAE,YAAY,EAAE,SAAS,IAAI,WAAW,KAAK,MAAM,SAAS,UACrE;EACA,IAAI,CAAC,UACH,MAAM,IAAI,MACR,iDAAiD,UAAU,kBAAkB,MAAM,KAAK,IAAI,EAAE,EAChG;EAEF,OAAO,UAAU,KAAK,GAAG,UAAU,IAAI,EAAE,GAAG,UAAU;CACxD,UAAU;EACR,MAAM,UAAU,QAAQ;CAC1B;AACF;;AAGA,IAAM,YAAY,SAChB,OAAO,QAAQ,WAAW,EACvB,QAAQ,GAAG,SAAS,KAAK,SAAS,GAAG,CAAC,EACtC,KAAK,CAAC,UAAU,IAAI;;;;;;;AAQzB,IAAa,iBAAiB,YAA+C;CAC3E,gBAAgB,SAAS,eAAe;CAExC,MAAM,UAAU,OAAO,YAAoD;EAEzE,MAAM,WAAW,YADJ,QAAQ,SAAS,YAAY,EAAE,MAAM,GAAG,EAAE,GAAG,KAC7B;EAC7B,IAAI,CAAC,UAAU,MAAM,IAAI,MAAM,0CAA0C,QAAQ,UAAU;EAC3F,MAAM,UAAU,eAAe,aAAa,CAAC;EAC7C,IAAI,CAAC,QAAQ,SAAS,QAAQ,EAAE,GAC9B,MAAM,IAAI,MACR,kBAAkB,SAAS,MAAM,QAAQ,GAAG,eAAe,QAAQ,KAAK,IAAI,GAC9E;EAEF,MAAM,YAAY,cAAc,QAAQ,OAAO,QAAQ;EACvD,MAAM,YAAY,QAAQ,OAAO,OAAO,QAAQ,QAAQ,OAAO,SAAS,QAAQ,QAAQ;EACxF,IAAI,QAAQ,MAAM,WAChB,SACA,QAAQ,OACR,UACA,WACA,WACA,QAAQ,MACV;EACA,IAAI,WAAW,eAAe,QAAQ,OAAO;EAC7C,IAAI,QAAQ,OAAO,QAAQ;GAGzB,MAAM,OADO,IAAI,YAAY,EAAE,OAAO,KACzB,EACV,MAAM,OAAO,EACb,QAAQ,SAAS,KAAK,SAAS,CAAC,EAChC,KAAK,SAAS,KAAK,MAAM,GAAG,CAAC;GAChC,QAAQ,IAAI,YAAY,EAAE,OAAO,KAAK,UAAU,IAAI,CAAC;GACrD,WAAW,eAAe;EAC5B;EACA,OAAO,EAAE,SAAS,CAAC;GAAE;GAAO;EAAS,CAAC,EAAE;CAC1C;CAGA,MAAM,SAAS,MAAyB,QAA8C;EACpF,MAAM,SAAS,IAAI;EACnB;EACA;CACF;CAEA,OAAO;EACL,IAAI;EACJ,UAAU;GACR,MAAM,CAAC,MAAM,GAAG,eAAe,IAAI;GACnC,MAAM,CAAC,KAAK,GAAG,eAAe,GAAG;GACjC,MAAM,CAAC,KAAK,GAAG,eAAe,GAAG;GACjC,MAAM,CAAC,KAAK,GAAG,eAAe,GAAG;GACjC,MAAM,CAAC,MAAM,GAAG,eAAe,IAAI;GACnC,MAAM,CAAC,KAAK,GAAG,eAAe,GAAG;GACjC,MAAM,CAAC,KAAK,GAAG,eAAe,GAAG;GACjC,MAAM,CAAC,MAAM,GAAG,eAAe,IAAI;GACnC,MAAM,CAAC,KAAK,GAAG,eAAe,GAAG;GACjC,MAAM,CAAC,KAAK,GAAG,eAAe,GAAG;GACjC,MAAM,CAAC,KAAK,GAAG,eAAe,GAAG;EACnC;CACF;AACF"}