@nhtio/adk 1.20260609.0 → 1.20260610.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +132 -9
- package/batteries/llm/ollama/helpers.cjs +9 -0
- package/batteries/llm/ollama/helpers.cjs.map +1 -1
- package/batteries/llm/ollama/helpers.mjs +9 -0
- package/batteries/llm/ollama/helpers.mjs.map +1 -1
- package/batteries/llm/openai_chat_completions/helpers.cjs +19 -0
- package/batteries/llm/openai_chat_completions/helpers.cjs.map +1 -1
- package/batteries/llm/openai_chat_completions/helpers.mjs +19 -0
- package/batteries/llm/openai_chat_completions/helpers.mjs.map +1 -1
- package/batteries/media/builder.d.ts +245 -0
- package/batteries/media/contracts.cjs +119 -0
- package/batteries/media/contracts.cjs.map +1 -0
- package/batteries/media/contracts.d.ts +321 -0
- package/batteries/media/contracts.mjs +110 -0
- package/batteries/media/contracts.mjs.map +1 -0
- package/batteries/media/engines/audio_decode.cjs +92 -0
- package/batteries/media/engines/audio_decode.cjs.map +1 -0
- package/batteries/media/engines/audio_decode.d.ts +46 -0
- package/batteries/media/engines/audio_decode.mjs +90 -0
- package/batteries/media/engines/audio_decode.mjs.map +1 -0
- package/batteries/media/engines/execa_executor.cjs +64 -0
- package/batteries/media/engines/execa_executor.cjs.map +1 -0
- package/batteries/media/engines/execa_executor.d.ts +54 -0
- package/batteries/media/engines/execa_executor.mjs +62 -0
- package/batteries/media/engines/execa_executor.mjs.map +1 -0
- package/batteries/media/engines/fs_workspace.cjs +84 -0
- package/batteries/media/engines/fs_workspace.cjs.map +1 -0
- package/batteries/media/engines/fs_workspace.d.ts +51 -0
- package/batteries/media/engines/fs_workspace.mjs +82 -0
- package/batteries/media/engines/fs_workspace.mjs.map +1 -0
- package/batteries/media/engines/jimp.cjs +116 -0
- package/batteries/media/engines/jimp.cjs.map +1 -0
- package/batteries/media/engines/jimp.d.ts +32 -0
- package/batteries/media/engines/jimp.mjs +114 -0
- package/batteries/media/engines/jimp.mjs.map +1 -0
- package/batteries/media/engines/sharp.cjs +120 -0
- package/batteries/media/engines/sharp.cjs.map +1 -0
- package/batteries/media/engines/sharp.d.ts +42 -0
- package/batteries/media/engines/sharp.mjs +117 -0
- package/batteries/media/engines/sharp.mjs.map +1 -0
- package/batteries/media/engines/soffice.cjs +246 -0
- package/batteries/media/engines/soffice.cjs.map +1 -0
- package/batteries/media/engines/soffice.d.ts +39 -0
- package/batteries/media/engines/soffice.mjs +244 -0
- package/batteries/media/engines/soffice.mjs.map +1 -0
- package/batteries/media/engines/tesseract_js.cjs +87 -0
- package/batteries/media/engines/tesseract_js.cjs.map +1 -0
- package/batteries/media/engines/tesseract_js.d.ts +41 -0
- package/batteries/media/engines/tesseract_js.mjs +85 -0
- package/batteries/media/engines/tesseract_js.mjs.map +1 -0
- package/batteries/media/engines/transformers_asr.cjs +111 -0
- package/batteries/media/engines/transformers_asr.cjs.map +1 -0
- package/batteries/media/engines/transformers_asr.d.ts +41 -0
- package/batteries/media/engines/transformers_asr.mjs +109 -0
- package/batteries/media/engines/transformers_asr.mjs.map +1 -0
- package/batteries/media/exceptions.d.ts +103 -0
- package/batteries/media/forge.cjs +403 -0
- package/batteries/media/forge.cjs.map +1 -0
- package/batteries/media/forge.d.ts +90 -0
- package/batteries/media/forge.mjs +399 -0
- package/batteries/media/forge.mjs.map +1 -0
- package/batteries/media/formats.d.ts +72 -0
- package/batteries/media/index.d.ts +136 -0
- package/batteries/media/lint.cjs +339 -0
- package/batteries/media/lint.cjs.map +1 -0
- package/batteries/media/lint.d.ts +117 -0
- package/batteries/media/lint.mjs +331 -0
- package/batteries/media/lint.mjs.map +1 -0
- package/batteries/media/pipe.d.ts +66 -0
- package/batteries/media/plan.d.ts +133 -0
- package/batteries/media/registry.d.ts +92 -0
- package/batteries/media/runtime.d.ts +105 -0
- package/batteries/media/steps/doc.d.ts +33 -0
- package/batteries/media/steps/image_audio.d.ts +24 -0
- package/batteries/media/steps/ingest.d.ts +25 -0
- package/batteries/media/steps/pages.d.ts +18 -0
- package/batteries/media/steps/sheet.d.ts +36 -0
- package/batteries/media/steps/slides.d.ts +35 -0
- package/batteries/media/steps/text.d.ts +43 -0
- package/batteries/media/validate.d.ts +49 -0
- package/batteries/media/verbs.d.ts +126 -0
- package/batteries/media.cjs +3049 -0
- package/batteries/media.cjs.map +1 -0
- package/batteries/media.mjs +3009 -0
- package/batteries/media.mjs.map +1 -0
- package/batteries/tools/_shared/index.d.ts +142 -0
- package/batteries/tools/_shared.cjs +173 -0
- package/batteries/tools/_shared.cjs.map +1 -0
- package/batteries/tools/_shared.mjs +164 -0
- package/batteries/tools/_shared.mjs.map +1 -0
- package/batteries/tools/index.d.ts +2 -0
- package/batteries/tools/scrapper/exceptions.d.ts +21 -0
- package/batteries/tools/scrapper/index.d.ts +172 -0
- package/batteries/tools/scrapper/shared.d.ts +146 -0
- package/batteries/tools/scrapper.cjs +8 -0
- package/batteries/tools/scrapper.mjs +2 -0
- package/batteries/tools/searxng/index.d.ts +54 -20
- package/batteries/tools/searxng.cjs +2 -1
- package/batteries/tools/searxng.mjs +2 -2
- package/batteries/tools/web_retrieval/index.d.ts +186 -0
- package/batteries/tools/web_retrieval.cjs +206 -0
- package/batteries/tools/web_retrieval.cjs.map +1 -0
- package/batteries/tools/web_retrieval.mjs +201 -0
- package/batteries/tools/web_retrieval.mjs.map +1 -0
- package/batteries/tools.cjs +13 -1
- package/batteries/tools.mjs +4 -2
- package/batteries.cjs +13 -1
- package/batteries.mjs +4 -2
- package/common.d.ts +1 -1
- package/eslint.cjs +1 -1
- package/eslint.mjs +1 -1
- package/exceptions-C7FSHEnV.mjs +87 -0
- package/exceptions-C7FSHEnV.mjs.map +1 -0
- package/exceptions-CQi_lNs1.js +152 -0
- package/exceptions-CQi_lNs1.js.map +1 -0
- package/index.cjs +2 -2
- package/index.mjs +2 -2
- package/mcp/adk-docs-corpus.json +1 -1
- package/package.json +301 -178
- package/scrapper-BOLWYGbD.js +463 -0
- package/scrapper-BOLWYGbD.js.map +1 -0
- package/scrapper-hDKlNuCT.mjs +433 -0
- package/scrapper-hDKlNuCT.mjs.map +1 -0
- package/{searxng-Bkrwhwhw.js → searxng-CJtEpa8p.js} +82 -85
- package/searxng-CJtEpa8p.js.map +1 -0
- package/{searxng-CyA-nEu5.mjs → searxng-riarj_0u.mjs} +76 -85
- package/searxng-riarj_0u.mjs.map +1 -0
- package/skills/adk-assembly/SKILL.md +2 -2
- package/validate-BFaUYHDN.js +1298 -0
- package/validate-BFaUYHDN.js.map +1 -0
- package/validate-DSZ3wicB.mjs +1215 -0
- package/validate-DSZ3wicB.mjs.map +1 -0
- package/searxng-Bkrwhwhw.js.map +0 -1
- package/searxng-CyA-nEu5.mjs.map +0 -1
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Generic engine contracts for the media pipeline battery — the seams every implementation
|
|
3
|
+
* (bundled or BYO) plugs into.
|
|
4
|
+
*
|
|
5
|
+
* @module @nhtio/adk/batteries/media/contracts
|
|
6
|
+
*
|
|
7
|
+
* @remarks
|
|
8
|
+
* Engines are seams, not policies. An engine is a self-declaring capability provider: it
|
|
9
|
+
* states exactly which transforms it supports — {@link ConvertCapability} edges (input MIME
|
|
10
|
+
* patterns to output format tokens) and {@link MutateCapability} groups (same-format content
|
|
11
|
+
* transforms) — and the pipeline dispatches against those declarations. There are only two
|
|
12
|
+
* capability shapes because there are only two things a media engine ever does: change the
|
|
13
|
+
* format, or change the content. OCR is a convert (image to text). Transcription is a convert
|
|
14
|
+
* (PCM to text). Decoding audio is a convert (container to PCM). Resizing is a mutate. A new
|
|
15
|
+
* capability is a new edge in the data, never a new contract.
|
|
16
|
+
*
|
|
17
|
+
* Engines are supplied to `createMediaPipeline` as a flat ordered array and resolved eagerly
|
|
18
|
+
* at construction (declarations drive verb narrowing, so they must be known up front).
|
|
19
|
+
* Bundled engines stay cheap to resolve: their heavy peer dependencies load lazily inside
|
|
20
|
+
* the capability methods, on first actual use.
|
|
21
|
+
*
|
|
22
|
+
* All contracts are duck-typed: validation guards check structure, not class identity, so a
|
|
23
|
+
* consumer can implement an interface from scratch or adapt an existing client. Contracts are
|
|
24
|
+
* enforced at runtime — construction validates every engine and every declared capability and
|
|
25
|
+
* throws `E_INVALID_MEDIA_PIPELINE_CONFIG` naming the offending index when a value fails.
|
|
26
|
+
*
|
|
27
|
+
* Two further contracts exist so that even "run a binary" and "give a binary a file" are
|
|
28
|
+
* movable seams rather than Node assumptions:
|
|
29
|
+
*
|
|
30
|
+
* - {@link BinaryExecutor} — how an invocation runs. The bundled `execa_executor` wraps execa;
|
|
31
|
+
* a browser/remote/sandbox executor satisfies the same contract.
|
|
32
|
+
* - {@link ScratchWorkspace} — bytes ⇄ executor-visible paths. A sibling of `ByteStore`, NOT a
|
|
33
|
+
* `ByteStore`: byte stores promise in-process readers, while binaries are foreign processes
|
|
34
|
+
* that need real paths. The bundled `fs_workspace` uses `node:fs/promises` via an async
|
|
35
|
+
* resolver; any implementation whose paths the chosen executor can see is valid — that
|
|
36
|
+
* compatibility is the consumer's composition decision.
|
|
37
|
+
*/
|
|
38
|
+
/**
|
|
39
|
+
* A value-or-resolver: the canonical way to supply an engine. Resolvers may be sync or async
|
|
40
|
+
* (dynamic import) and may resolve to the value directly or a `{ default: value }` module
|
|
41
|
+
* namespace. Engine resolvers run eagerly at pipeline construction — the engine module itself
|
|
42
|
+
* is cheap; heavy peer dependencies load lazily inside capability methods.
|
|
43
|
+
*/
|
|
44
|
+
export type EngineResolver<T = MediaEngine> = T | (() => T | {
|
|
45
|
+
default: T;
|
|
46
|
+
} | Promise<T | {
|
|
47
|
+
default: T;
|
|
48
|
+
}>);
|
|
49
|
+
/** Common result shape for engines that transform bytes to bytes. */
|
|
50
|
+
export interface EngineBytesResult {
|
|
51
|
+
/** The output bytes. */
|
|
52
|
+
bytes: Uint8Array;
|
|
53
|
+
/** The output MIME type. */
|
|
54
|
+
mimeType: string;
|
|
55
|
+
}
|
|
56
|
+
/** A single binary invocation handed to a {@link BinaryExecutor}. */
|
|
57
|
+
export interface BinaryInvocation {
|
|
58
|
+
/** The command to run (an absolute path or a name the executor can resolve). */
|
|
59
|
+
cmd: string;
|
|
60
|
+
/** Arguments, exec-style (no shell interpolation). */
|
|
61
|
+
args: string[];
|
|
62
|
+
/** Wall-clock timeout in milliseconds. */
|
|
63
|
+
timeoutMs?: number;
|
|
64
|
+
/** Abort signal to cancel the invocation. */
|
|
65
|
+
signal?: AbortSignal;
|
|
66
|
+
}
|
|
67
|
+
/** The settled result of a {@link BinaryExecutor.exec} call. */
|
|
68
|
+
export interface BinaryExecResult {
|
|
69
|
+
/** The process exit code (or -1 when the process failed to start). */
|
|
70
|
+
exitCode: number;
|
|
71
|
+
/** Captured standard output. */
|
|
72
|
+
stdout: string;
|
|
73
|
+
/** Captured standard error. */
|
|
74
|
+
stderr: string;
|
|
75
|
+
/** `true` when the invocation failed (non-zero exit, spawn failure, timeout, abort). */
|
|
76
|
+
failed: boolean;
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Runs a binary invocation to completion. How and where it runs — local child process, remote
|
|
80
|
+
* runner, sandbox, container, a browser-side WASI shim — is the implementation's business.
|
|
81
|
+
*/
|
|
82
|
+
export interface BinaryExecutor {
|
|
83
|
+
/**
|
|
84
|
+
* Run one invocation to completion and report the result. Implementations must not throw on
|
|
85
|
+
* non-zero exits — report via `failed`/`exitCode` so callers map failures to readable errors.
|
|
86
|
+
*
|
|
87
|
+
* @param invocation - The command, args, and limits to run.
|
|
88
|
+
* @returns The settled result.
|
|
89
|
+
*/
|
|
90
|
+
exec(invocation: BinaryInvocation): Promise<BinaryExecResult>;
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Bytes ⇄ executor-visible paths. The seam that lets binary-backed engines exchange files
|
|
94
|
+
* with the process (or remote runner) that executes them.
|
|
95
|
+
*/
|
|
96
|
+
export interface ScratchWorkspace {
|
|
97
|
+
/**
|
|
98
|
+
* Write `bytes` into the workspace under `filename` and return the absolute path the
|
|
99
|
+
* paired executor can open.
|
|
100
|
+
*
|
|
101
|
+
* @param bytes - The content to materialize.
|
|
102
|
+
* @param filename - The basename to use (extension matters to format-sniffing binaries).
|
|
103
|
+
* @returns The absolute path.
|
|
104
|
+
*/
|
|
105
|
+
materialize(bytes: Uint8Array, filename: string): Promise<string>;
|
|
106
|
+
/**
|
|
107
|
+
* Read a file the executor produced inside the workspace.
|
|
108
|
+
*
|
|
109
|
+
* @param path - The absolute path to read.
|
|
110
|
+
* @returns The file bytes.
|
|
111
|
+
*/
|
|
112
|
+
read(path: string): Promise<Uint8Array>;
|
|
113
|
+
/** The workspace root directory, for `--outdir`-style binary arguments. */
|
|
114
|
+
dir(): string;
|
|
115
|
+
/** List the files currently in the workspace root (basenames). */
|
|
116
|
+
list(): Promise<string[]>;
|
|
117
|
+
/** Remove the workspace and everything in it. Engines call this in `finally`. */
|
|
118
|
+
dispose(): Promise<void>;
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* A factory for per-execution scratch workspaces. Engines mint one workspace per invocation
|
|
122
|
+
* so concurrent executions never share a directory.
|
|
123
|
+
*/
|
|
124
|
+
export type ScratchWorkspaceFactory = () => ScratchWorkspace | Promise<ScratchWorkspace>;
|
|
125
|
+
/**
|
|
126
|
+
* An input-matching pattern: an exact MIME type (`application/pdf`), a family wildcard
|
|
127
|
+
* (`image/*`), or a virtual MIME such as {@link PCM_MIME}.
|
|
128
|
+
*/
|
|
129
|
+
export type MimePattern = string;
|
|
130
|
+
/**
|
|
131
|
+
* The virtual MIME type for decoded mono PCM audio — the intermediate between an audio
|
|
132
|
+
* container and a transcription. Bytes are little-endian Float32 samples in `[-1, 1]`;
|
|
133
|
+
* a {@link ConvertOutput} carrying PCM must set `meta.sampleRate` (Hz).
|
|
134
|
+
*/
|
|
135
|
+
export declare const PCM_MIME = "audio/x-adk-pcm";
|
|
136
|
+
/**
|
|
137
|
+
* Pack PCM samples into transport bytes for a {@link ConvertOutput}.
|
|
138
|
+
*
|
|
139
|
+
* @remarks
|
|
140
|
+
* A `Float32Array` view over arbitrary `Uint8Array` bytes requires 4-byte alignment, which
|
|
141
|
+
* sliced buffers do not guarantee — this helper (and {@link bytesToPcm}) copy-normalize so
|
|
142
|
+
* neither side has to think about alignment.
|
|
143
|
+
*
|
|
144
|
+
* @param pcm - Mono PCM samples in `[-1, 1]`.
|
|
145
|
+
* @returns The samples as little-endian Float32 bytes.
|
|
146
|
+
*/
|
|
147
|
+
export declare const pcmToBytes: (pcm: Float32Array) => Uint8Array;
|
|
148
|
+
/**
|
|
149
|
+
* Read PCM samples back out of transport bytes.
|
|
150
|
+
*
|
|
151
|
+
* @param bytes - Little-endian Float32 bytes (as produced by {@link pcmToBytes}).
|
|
152
|
+
* @returns The mono PCM samples.
|
|
153
|
+
*/
|
|
154
|
+
export declare const bytesToPcm: (bytes: Uint8Array) => Float32Array;
|
|
155
|
+
/** Options understood by OCR-flavored converts (`image/*` → `txt`/`hocr`/`json`). */
|
|
156
|
+
export interface OcrConvertOptions {
|
|
157
|
+
/** Recognition language hints (e.g. `['eng','deu']`). */
|
|
158
|
+
languages?: readonly string[];
|
|
159
|
+
}
|
|
160
|
+
/** Options understood by transcription-flavored converts ({@link PCM_MIME} → `txt`/`srt`/`vtt`/`json`). */
|
|
161
|
+
export interface AsrConvertOptions {
|
|
162
|
+
/** Source-language hint (BCP-47-ish). */
|
|
163
|
+
lang?: string;
|
|
164
|
+
/** Translate the transcription to English. */
|
|
165
|
+
translate?: boolean;
|
|
166
|
+
}
|
|
167
|
+
/** Options understood by embedded-image extraction converts (`application/pdf` → `images`). */
|
|
168
|
+
export interface ImagesConvertOptions {
|
|
169
|
+
/**
|
|
170
|
+
* Preferred output encoding token (`jpg`, `png`, …). An extractor that can emit it natively
|
|
171
|
+
* should; outputs in other encodings are re-encoded downstream by the requesting step.
|
|
172
|
+
*/
|
|
173
|
+
format?: string;
|
|
174
|
+
}
|
|
175
|
+
/**
|
|
176
|
+
* The options bag carried by a {@link ConvertRequest} — one typed, augmentable interface
|
|
177
|
+
* merging every documented convention.
|
|
178
|
+
*
|
|
179
|
+
* @remarks
|
|
180
|
+
* Consumers add their own keys via declaration merging against this module:
|
|
181
|
+
*
|
|
182
|
+
* ```ts
|
|
183
|
+
* declare module '@nhtio/adk/batteries/media/contracts' {
|
|
184
|
+
* interface ConvertOptions {
|
|
185
|
+
* watermark?: { text: string }
|
|
186
|
+
* }
|
|
187
|
+
* }
|
|
188
|
+
* ```
|
|
189
|
+
*
|
|
190
|
+
* Typo'd keys become excess-property compile errors at literal call sites; the runtime stays
|
|
191
|
+
* open — engines must ignore keys they don't understand (multi-hop conversion forwards one
|
|
192
|
+
* bag to every hop). The namespace is flat and globally merged, so BYO keys should be named
|
|
193
|
+
* to avoid collisions (prefix by engine where ambiguous).
|
|
194
|
+
*/
|
|
195
|
+
export interface ConvertOptions extends OcrConvertOptions, AsrConvertOptions, ImagesConvertOptions {
|
|
196
|
+
}
|
|
197
|
+
/** A format-changing request handed to a {@link ConvertCapability}. */
|
|
198
|
+
export interface ConvertRequest {
|
|
199
|
+
/** The input content bytes. */
|
|
200
|
+
bytes: Uint8Array;
|
|
201
|
+
/** The input MIME type. */
|
|
202
|
+
mimeType: string;
|
|
203
|
+
/** The input filename (extension informs format sniffing). */
|
|
204
|
+
filename: string;
|
|
205
|
+
/** The target format token (`pdf`, `docx`, `txt`, `pcm`, `images`, …). */
|
|
206
|
+
to: string;
|
|
207
|
+
/** Capability-specific options — see {@link ConvertOptions}. */
|
|
208
|
+
options?: ConvertOptions;
|
|
209
|
+
/** Abort signal threaded from the pipeline execution. */
|
|
210
|
+
signal?: AbortSignal;
|
|
211
|
+
}
|
|
212
|
+
/** One output of a convert — most converts yield exactly one; `images` yields many. */
|
|
213
|
+
export interface ConvertOutput {
|
|
214
|
+
/** The output bytes. */
|
|
215
|
+
bytes: Uint8Array;
|
|
216
|
+
/** The output MIME type (honest — native encoding, no silent re-encode). */
|
|
217
|
+
mimeType: string;
|
|
218
|
+
/** Output metadata (e.g. `{ sampleRate: 44100 }` on {@link PCM_MIME} outputs). */
|
|
219
|
+
meta?: Record<string, unknown>;
|
|
220
|
+
}
|
|
221
|
+
/** The settled result of a convert. */
|
|
222
|
+
export interface ConvertResult {
|
|
223
|
+
/** The outputs, in source order. */
|
|
224
|
+
outputs: readonly ConvertOutput[];
|
|
225
|
+
}
|
|
226
|
+
/**
|
|
227
|
+
* One uniform block of an engine's conversion matrix: every format token in `to` is
|
|
228
|
+
* producible from every input matching `from`.
|
|
229
|
+
*
|
|
230
|
+
* @remarks
|
|
231
|
+
* Declarations are plain data — the registry reads them without calling engine code. An
|
|
232
|
+
* input-dependent matrix (LibreOffice: docx→pdf yes, docx→xlsx no, ods→xlsx yes) is expressed
|
|
233
|
+
* as several capability groups, each a uniform from×to block.
|
|
234
|
+
*/
|
|
235
|
+
export interface ConvertCapability {
|
|
236
|
+
/** Input patterns this block accepts. */
|
|
237
|
+
from: readonly MimePattern[];
|
|
238
|
+
/** Format tokens producible from every `from` member. */
|
|
239
|
+
to: readonly string[];
|
|
240
|
+
/**
|
|
241
|
+
* Perform the conversion.
|
|
242
|
+
*
|
|
243
|
+
* @param request - The input bytes, target token, and options.
|
|
244
|
+
* @returns The conversion outputs.
|
|
245
|
+
*/
|
|
246
|
+
convert(request: ConvertRequest): Promise<ConvertResult>;
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* A same-format content transform handed to a {@link MutateCapability} — the fused image
|
|
250
|
+
* request: adjacent `image.*` steps fold into ONE request so a resize→rotate→format chain
|
|
251
|
+
* costs a single decode/encode.
|
|
252
|
+
*/
|
|
253
|
+
export interface MutateRequest {
|
|
254
|
+
/** The input bytes. */
|
|
255
|
+
bytes: Uint8Array;
|
|
256
|
+
/** The input MIME type. */
|
|
257
|
+
mimeType: string;
|
|
258
|
+
/** Resize, when requested. */
|
|
259
|
+
resize?: {
|
|
260
|
+
width?: number;
|
|
261
|
+
height?: number;
|
|
262
|
+
fit?: 'cover' | 'contain' | 'fill' | 'inside' | 'outside';
|
|
263
|
+
};
|
|
264
|
+
/** Clockwise rotation in degrees. */
|
|
265
|
+
rotate?: 90 | 180 | 270;
|
|
266
|
+
/** Flip axes. */
|
|
267
|
+
flip?: {
|
|
268
|
+
horizontal?: boolean;
|
|
269
|
+
vertical?: boolean;
|
|
270
|
+
};
|
|
271
|
+
/** Remove EXIF/ICC metadata. */
|
|
272
|
+
stripMetadata?: boolean;
|
|
273
|
+
/** Re-encode target, when requested (rides the same fused call). */
|
|
274
|
+
format?: {
|
|
275
|
+
to: string;
|
|
276
|
+
quality?: number;
|
|
277
|
+
};
|
|
278
|
+
/** Abort signal threaded from the pipeline execution. */
|
|
279
|
+
signal?: AbortSignal;
|
|
280
|
+
}
|
|
281
|
+
/** A same-format content-transform capability group. */
|
|
282
|
+
export interface MutateCapability {
|
|
283
|
+
/** Input patterns this block mutates. */
|
|
284
|
+
over: readonly MimePattern[];
|
|
285
|
+
/** Content operations supported (`resize`, `rotate`, `flip`, `strip_metadata`). */
|
|
286
|
+
ops: readonly string[];
|
|
287
|
+
/** Format tokens reachable via `request.format` in the same fused call. */
|
|
288
|
+
encodes: readonly string[];
|
|
289
|
+
/**
|
|
290
|
+
* Apply the fused transform.
|
|
291
|
+
*
|
|
292
|
+
* @param request - The folded operations and input bytes.
|
|
293
|
+
* @returns The transformed bytes.
|
|
294
|
+
*/
|
|
295
|
+
mutate(request: MutateRequest): Promise<EngineBytesResult>;
|
|
296
|
+
}
|
|
297
|
+
/**
|
|
298
|
+
* A self-declaring media engine: an id for error messages plus the capabilities it provides.
|
|
299
|
+
* At least one capability entry is required.
|
|
300
|
+
*/
|
|
301
|
+
export interface MediaEngine {
|
|
302
|
+
/** Stable identifier used in config and dispatch error messages (`jimp`, `soffice`, …). */
|
|
303
|
+
readonly id: string;
|
|
304
|
+
/** Format-changing capability groups. */
|
|
305
|
+
readonly converts?: readonly ConvertCapability[];
|
|
306
|
+
/** Same-format content-transform capability groups. */
|
|
307
|
+
readonly mutates?: readonly MutateCapability[];
|
|
308
|
+
}
|
|
309
|
+
/** `true` when `value` structurally implements {@link BinaryExecutor}. */
|
|
310
|
+
export declare const implementsBinaryExecutor: (value: unknown) => value is BinaryExecutor;
|
|
311
|
+
/** `true` when `value` structurally implements {@link ScratchWorkspace}. */
|
|
312
|
+
export declare const implementsScratchWorkspace: (value: unknown) => value is ScratchWorkspace;
|
|
313
|
+
/** `true` when `value` structurally implements {@link ConvertCapability}. */
|
|
314
|
+
export declare const implementsConvertCapability: (value: unknown) => value is ConvertCapability;
|
|
315
|
+
/** `true` when `value` structurally implements {@link MutateCapability}. */
|
|
316
|
+
export declare const implementsMutateCapability: (value: unknown) => value is MutateCapability;
|
|
317
|
+
/**
|
|
318
|
+
* `true` when `value` structurally implements {@link MediaEngine}: a string id plus at least
|
|
319
|
+
* one well-formed capability entry. Every declared entry must pass its capability guard.
|
|
320
|
+
*/
|
|
321
|
+
export declare const implementsMediaEngine: (value: unknown) => value is MediaEngine;
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import { c as isObject } from "../../tool_registry-791Vrjtf.mjs";
|
|
2
|
+
import "../../guards.mjs";
|
|
3
|
+
//#region src/batteries/media/contracts.ts
|
|
4
|
+
/**
|
|
5
|
+
* Generic engine contracts for the media pipeline battery — the seams every implementation
|
|
6
|
+
* (bundled or BYO) plugs into.
|
|
7
|
+
*
|
|
8
|
+
* @module @nhtio/adk/batteries/media/contracts
|
|
9
|
+
*
|
|
10
|
+
* @remarks
|
|
11
|
+
* Engines are seams, not policies. An engine is a self-declaring capability provider: it
|
|
12
|
+
* states exactly which transforms it supports — {@link ConvertCapability} edges (input MIME
|
|
13
|
+
* patterns to output format tokens) and {@link MutateCapability} groups (same-format content
|
|
14
|
+
* transforms) — and the pipeline dispatches against those declarations. There are only two
|
|
15
|
+
* capability shapes because there are only two things a media engine ever does: change the
|
|
16
|
+
* format, or change the content. OCR is a convert (image to text). Transcription is a convert
|
|
17
|
+
* (PCM to text). Decoding audio is a convert (container to PCM). Resizing is a mutate. A new
|
|
18
|
+
* capability is a new edge in the data, never a new contract.
|
|
19
|
+
*
|
|
20
|
+
* Engines are supplied to `createMediaPipeline` as a flat ordered array and resolved eagerly
|
|
21
|
+
* at construction (declarations drive verb narrowing, so they must be known up front).
|
|
22
|
+
* Bundled engines stay cheap to resolve: their heavy peer dependencies load lazily inside
|
|
23
|
+
* the capability methods, on first actual use.
|
|
24
|
+
*
|
|
25
|
+
* All contracts are duck-typed: validation guards check structure, not class identity, so a
|
|
26
|
+
* consumer can implement an interface from scratch or adapt an existing client. Contracts are
|
|
27
|
+
* enforced at runtime — construction validates every engine and every declared capability and
|
|
28
|
+
* throws `E_INVALID_MEDIA_PIPELINE_CONFIG` naming the offending index when a value fails.
|
|
29
|
+
*
|
|
30
|
+
* Two further contracts exist so that even "run a binary" and "give a binary a file" are
|
|
31
|
+
* movable seams rather than Node assumptions:
|
|
32
|
+
*
|
|
33
|
+
* - {@link BinaryExecutor} — how an invocation runs. The bundled `execa_executor` wraps execa;
|
|
34
|
+
* a browser/remote/sandbox executor satisfies the same contract.
|
|
35
|
+
* - {@link ScratchWorkspace} — bytes ⇄ executor-visible paths. A sibling of `ByteStore`, NOT a
|
|
36
|
+
* `ByteStore`: byte stores promise in-process readers, while binaries are foreign processes
|
|
37
|
+
* that need real paths. The bundled `fs_workspace` uses `node:fs/promises` via an async
|
|
38
|
+
* resolver; any implementation whose paths the chosen executor can see is valid — that
|
|
39
|
+
* compatibility is the consumer's composition decision.
|
|
40
|
+
*/
|
|
41
|
+
/**
|
|
42
|
+
* The virtual MIME type for decoded mono PCM audio — the intermediate between an audio
|
|
43
|
+
* container and a transcription. Bytes are little-endian Float32 samples in `[-1, 1]`;
|
|
44
|
+
* a {@link ConvertOutput} carrying PCM must set `meta.sampleRate` (Hz).
|
|
45
|
+
*/
|
|
46
|
+
var PCM_MIME = "audio/x-adk-pcm";
|
|
47
|
+
/**
|
|
48
|
+
* Pack PCM samples into transport bytes for a {@link ConvertOutput}.
|
|
49
|
+
*
|
|
50
|
+
* @remarks
|
|
51
|
+
* A `Float32Array` view over arbitrary `Uint8Array` bytes requires 4-byte alignment, which
|
|
52
|
+
* sliced buffers do not guarantee — this helper (and {@link bytesToPcm}) copy-normalize so
|
|
53
|
+
* neither side has to think about alignment.
|
|
54
|
+
*
|
|
55
|
+
* @param pcm - Mono PCM samples in `[-1, 1]`.
|
|
56
|
+
* @returns The samples as little-endian Float32 bytes.
|
|
57
|
+
*/
|
|
58
|
+
var pcmToBytes = (pcm) => {
|
|
59
|
+
const copy = new Float32Array(pcm);
|
|
60
|
+
return new Uint8Array(copy.buffer, 0, copy.byteLength);
|
|
61
|
+
};
|
|
62
|
+
/**
|
|
63
|
+
* Read PCM samples back out of transport bytes.
|
|
64
|
+
*
|
|
65
|
+
* @param bytes - Little-endian Float32 bytes (as produced by {@link pcmToBytes}).
|
|
66
|
+
* @returns The mono PCM samples.
|
|
67
|
+
*/
|
|
68
|
+
var bytesToPcm = (bytes) => {
|
|
69
|
+
const aligned = new Uint8Array(bytes.length);
|
|
70
|
+
aligned.set(bytes);
|
|
71
|
+
return new Float32Array(aligned.buffer, 0, Math.floor(bytes.length / 4));
|
|
72
|
+
};
|
|
73
|
+
var hasFns = (value, names) => isObject(value) && names.every((n) => typeof value[n] === "function");
|
|
74
|
+
var isStringArray = (value) => Array.isArray(value) && value.every((v) => typeof v === "string");
|
|
75
|
+
/** `true` when `value` structurally implements {@link BinaryExecutor}. */
|
|
76
|
+
var implementsBinaryExecutor = (value) => hasFns(value, ["exec"]);
|
|
77
|
+
/** `true` when `value` structurally implements {@link ScratchWorkspace}. */
|
|
78
|
+
var implementsScratchWorkspace = (value) => hasFns(value, [
|
|
79
|
+
"materialize",
|
|
80
|
+
"read",
|
|
81
|
+
"dir",
|
|
82
|
+
"list",
|
|
83
|
+
"dispose"
|
|
84
|
+
]);
|
|
85
|
+
/** `true` when `value` structurally implements {@link ConvertCapability}. */
|
|
86
|
+
var implementsConvertCapability = (value) => hasFns(value, ["convert"]) && isStringArray(value.from) && isStringArray(value.to);
|
|
87
|
+
/** `true` when `value` structurally implements {@link MutateCapability}. */
|
|
88
|
+
var implementsMutateCapability = (value) => hasFns(value, ["mutate"]) && isStringArray(value.over) && isStringArray(value.ops) && isStringArray(value.encodes);
|
|
89
|
+
/**
|
|
90
|
+
* `true` when `value` structurally implements {@link MediaEngine}: a string id plus at least
|
|
91
|
+
* one well-formed capability entry. Every declared entry must pass its capability guard.
|
|
92
|
+
*/
|
|
93
|
+
var implementsMediaEngine = (value) => {
|
|
94
|
+
if (!isObject(value)) return false;
|
|
95
|
+
const engine = value;
|
|
96
|
+
if (typeof engine.id !== "string" || engine.id.length === 0) return false;
|
|
97
|
+
const converts = engine.converts;
|
|
98
|
+
const mutates = engine.mutates;
|
|
99
|
+
if (converts !== void 0) {
|
|
100
|
+
if (!Array.isArray(converts) || !converts.every(implementsConvertCapability)) return false;
|
|
101
|
+
}
|
|
102
|
+
if (mutates !== void 0) {
|
|
103
|
+
if (!Array.isArray(mutates) || !mutates.every(implementsMutateCapability)) return false;
|
|
104
|
+
}
|
|
105
|
+
return (Array.isArray(converts) ? converts.length : 0) + (Array.isArray(mutates) ? mutates.length : 0) > 0;
|
|
106
|
+
};
|
|
107
|
+
//#endregion
|
|
108
|
+
export { PCM_MIME, bytesToPcm, implementsBinaryExecutor, implementsConvertCapability, implementsMediaEngine, implementsMutateCapability, implementsScratchWorkspace, pcmToBytes };
|
|
109
|
+
|
|
110
|
+
//# sourceMappingURL=contracts.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"contracts.mjs","names":[],"sources":["../../../src/batteries/media/contracts.ts"],"sourcesContent":["/**\n * Generic engine contracts for the media pipeline battery — the seams every implementation\n * (bundled or BYO) plugs into.\n *\n * @module @nhtio/adk/batteries/media/contracts\n *\n * @remarks\n * Engines are seams, not policies. An engine is a self-declaring capability provider: it\n * states exactly which transforms it supports — {@link ConvertCapability} edges (input MIME\n * patterns to output format tokens) and {@link MutateCapability} groups (same-format content\n * transforms) — and the pipeline dispatches against those declarations. There are only two\n * capability shapes because there are only two things a media engine ever does: change the\n * format, or change the content. OCR is a convert (image to text). Transcription is a convert\n * (PCM to text). Decoding audio is a convert (container to PCM). Resizing is a mutate. A new\n * capability is a new edge in the data, never a new contract.\n *\n * Engines are supplied to `createMediaPipeline` as a flat ordered array and resolved eagerly\n * at construction (declarations drive verb narrowing, so they must be known up front).\n * Bundled engines stay cheap to resolve: their heavy peer dependencies load lazily inside\n * the capability methods, on first actual use.\n *\n * All contracts are duck-typed: validation guards check structure, not class identity, so a\n * consumer can implement an interface from scratch or adapt an existing client. Contracts are\n * enforced at runtime — construction validates every engine and every declared capability and\n * throws `E_INVALID_MEDIA_PIPELINE_CONFIG` naming the offending index when a value fails.\n *\n * Two further contracts exist so that even \"run a binary\" and \"give a binary a file\" are\n * movable seams rather than Node assumptions:\n *\n * - {@link BinaryExecutor} — how an invocation runs. The bundled `execa_executor` wraps execa;\n * a browser/remote/sandbox executor satisfies the same contract.\n * - {@link ScratchWorkspace} — bytes ⇄ executor-visible paths. A sibling of `ByteStore`, NOT a\n * `ByteStore`: byte stores promise in-process readers, while binaries are foreign processes\n * that need real paths. The bundled `fs_workspace` uses `node:fs/promises` via an async\n * resolver; any implementation whose paths the chosen executor can see is valid — that\n * compatibility is the consumer's composition decision.\n */\n\nimport { isObject } from '@nhtio/adk/guards'\n\n// ── shared helper shapes ─────────────────────────────────────────────────────\n\n/**\n * A value-or-resolver: the canonical way to supply an engine. Resolvers may be sync or async\n * (dynamic import) and may resolve to the value directly or a `{ default: value }` module\n * namespace. Engine resolvers run eagerly at pipeline construction — the engine module itself\n * is cheap; heavy peer dependencies load lazily inside capability methods.\n */\nexport type EngineResolver<T = MediaEngine> =\n | T\n | (() => T | { default: T } | Promise<T | { default: T }>)\n\n/** Common result shape for engines that transform bytes to bytes. */\nexport interface EngineBytesResult {\n /** The output bytes. */\n bytes: Uint8Array\n /** The output MIME type. */\n mimeType: string\n}\n\n// ── process execution + scratch filesystem ──────────────────────────────────\n\n/** A single binary invocation handed to a {@link BinaryExecutor}. */\nexport interface BinaryInvocation {\n /** The command to run (an absolute path or a name the executor can resolve). */\n cmd: string\n /** Arguments, exec-style (no shell interpolation). */\n args: string[]\n /** Wall-clock timeout in milliseconds. */\n timeoutMs?: number\n /** Abort signal to cancel the invocation. */\n signal?: AbortSignal\n}\n\n/** The settled result of a {@link BinaryExecutor.exec} call. */\nexport interface BinaryExecResult {\n /** The process exit code (or -1 when the process failed to start). */\n exitCode: number\n /** Captured standard output. */\n stdout: string\n /** Captured standard error. */\n stderr: string\n /** `true` when the invocation failed (non-zero exit, spawn failure, timeout, abort). */\n failed: boolean\n}\n\n/**\n * Runs a binary invocation to completion. How and where it runs — local child process, remote\n * runner, sandbox, container, a browser-side WASI shim — is the implementation's business.\n */\nexport interface BinaryExecutor {\n /**\n * Run one invocation to completion and report the result. Implementations must not throw on\n * non-zero exits — report via `failed`/`exitCode` so callers map failures to readable errors.\n *\n * @param invocation - The command, args, and limits to run.\n * @returns The settled result.\n */\n exec(invocation: BinaryInvocation): Promise<BinaryExecResult>\n}\n\n/**\n * Bytes ⇄ executor-visible paths. The seam that lets binary-backed engines exchange files\n * with the process (or remote runner) that executes them.\n */\nexport interface ScratchWorkspace {\n /**\n * Write `bytes` into the workspace under `filename` and return the absolute path the\n * paired executor can open.\n *\n * @param bytes - The content to materialize.\n * @param filename - The basename to use (extension matters to format-sniffing binaries).\n * @returns The absolute path.\n */\n materialize(bytes: Uint8Array, filename: string): Promise<string>\n /**\n * Read a file the executor produced inside the workspace.\n *\n * @param path - The absolute path to read.\n * @returns The file bytes.\n */\n read(path: string): Promise<Uint8Array>\n /** The workspace root directory, for `--outdir`-style binary arguments. */\n dir(): string\n /** List the files currently in the workspace root (basenames). */\n list(): Promise<string[]>\n /** Remove the workspace and everything in it. Engines call this in `finally`. */\n dispose(): Promise<void>\n}\n\n/**\n * A factory for per-execution scratch workspaces. Engines mint one workspace per invocation\n * so concurrent executions never share a directory.\n */\nexport type ScratchWorkspaceFactory = () => ScratchWorkspace | Promise<ScratchWorkspace>\n\n// ── the format vocabulary ────────────────────────────────────────────────────\n\n/**\n * An input-matching pattern: an exact MIME type (`application/pdf`), a family wildcard\n * (`image/*`), or a virtual MIME such as {@link PCM_MIME}.\n */\nexport type MimePattern = string\n\n/**\n * The virtual MIME type for decoded mono PCM audio — the intermediate between an audio\n * container and a transcription. Bytes are little-endian Float32 samples in `[-1, 1]`;\n * a {@link ConvertOutput} carrying PCM must set `meta.sampleRate` (Hz).\n */\nexport const PCM_MIME = 'audio/x-adk-pcm'\n\n/**\n * Pack PCM samples into transport bytes for a {@link ConvertOutput}.\n *\n * @remarks\n * A `Float32Array` view over arbitrary `Uint8Array` bytes requires 4-byte alignment, which\n * sliced buffers do not guarantee — this helper (and {@link bytesToPcm}) copy-normalize so\n * neither side has to think about alignment.\n *\n * @param pcm - Mono PCM samples in `[-1, 1]`.\n * @returns The samples as little-endian Float32 bytes.\n */\nexport const pcmToBytes = (pcm: Float32Array): Uint8Array => {\n const copy = new Float32Array(pcm)\n return new Uint8Array(copy.buffer, 0, copy.byteLength)\n}\n\n/**\n * Read PCM samples back out of transport bytes.\n *\n * @param bytes - Little-endian Float32 bytes (as produced by {@link pcmToBytes}).\n * @returns The mono PCM samples.\n */\nexport const bytesToPcm = (bytes: Uint8Array): Float32Array => {\n const aligned = new Uint8Array(bytes.length)\n aligned.set(bytes)\n return new Float32Array(aligned.buffer, 0, Math.floor(bytes.length / 4))\n}\n\n// ── convert options (typed, augmentable) ─────────────────────────────────────\n\n/** Options understood by OCR-flavored converts (`image/*` → `txt`/`hocr`/`json`). */\nexport interface OcrConvertOptions {\n /** Recognition language hints (e.g. `['eng','deu']`). */\n languages?: readonly string[]\n}\n\n/** Options understood by transcription-flavored converts ({@link PCM_MIME} → `txt`/`srt`/`vtt`/`json`). */\nexport interface AsrConvertOptions {\n /** Source-language hint (BCP-47-ish). */\n lang?: string\n /** Translate the transcription to English. */\n translate?: boolean\n}\n\n/** Options understood by embedded-image extraction converts (`application/pdf` → `images`). */\nexport interface ImagesConvertOptions {\n /**\n * Preferred output encoding token (`jpg`, `png`, …). An extractor that can emit it natively\n * should; outputs in other encodings are re-encoded downstream by the requesting step.\n */\n format?: string\n}\n\n/**\n * The options bag carried by a {@link ConvertRequest} — one typed, augmentable interface\n * merging every documented convention.\n *\n * @remarks\n * Consumers add their own keys via declaration merging against this module:\n *\n * ```ts\n * declare module '@nhtio/adk/batteries/media/contracts' {\n * interface ConvertOptions {\n * watermark?: { text: string }\n * }\n * }\n * ```\n *\n * Typo'd keys become excess-property compile errors at literal call sites; the runtime stays\n * open — engines must ignore keys they don't understand (multi-hop conversion forwards one\n * bag to every hop). The namespace is flat and globally merged, so BYO keys should be named\n * to avoid collisions (prefix by engine where ambiguous).\n */\nexport interface ConvertOptions\n extends OcrConvertOptions, AsrConvertOptions, ImagesConvertOptions {}\n\n// ── the two capabilities ─────────────────────────────────────────────────────\n\n/** A format-changing request handed to a {@link ConvertCapability}. */\nexport interface ConvertRequest {\n /** The input content bytes. */\n bytes: Uint8Array\n /** The input MIME type. */\n mimeType: string\n /** The input filename (extension informs format sniffing). */\n filename: string\n /** The target format token (`pdf`, `docx`, `txt`, `pcm`, `images`, …). */\n to: string\n /** Capability-specific options — see {@link ConvertOptions}. */\n options?: ConvertOptions\n /** Abort signal threaded from the pipeline execution. */\n signal?: AbortSignal\n}\n\n/** One output of a convert — most converts yield exactly one; `images` yields many. */\nexport interface ConvertOutput {\n /** The output bytes. */\n bytes: Uint8Array\n /** The output MIME type (honest — native encoding, no silent re-encode). */\n mimeType: string\n /** Output metadata (e.g. `{ sampleRate: 44100 }` on {@link PCM_MIME} outputs). */\n meta?: Record<string, unknown>\n}\n\n/** The settled result of a convert. */\nexport interface ConvertResult {\n /** The outputs, in source order. */\n outputs: readonly ConvertOutput[]\n}\n\n/**\n * One uniform block of an engine's conversion matrix: every format token in `to` is\n * producible from every input matching `from`.\n *\n * @remarks\n * Declarations are plain data — the registry reads them without calling engine code. An\n * input-dependent matrix (LibreOffice: docx→pdf yes, docx→xlsx no, ods→xlsx yes) is expressed\n * as several capability groups, each a uniform from×to block.\n */\nexport interface ConvertCapability {\n /** Input patterns this block accepts. */\n from: readonly MimePattern[]\n /** Format tokens producible from every `from` member. */\n to: readonly string[]\n /**\n * Perform the conversion.\n *\n * @param request - The input bytes, target token, and options.\n * @returns The conversion outputs.\n */\n convert(request: ConvertRequest): Promise<ConvertResult>\n}\n\n/**\n * A same-format content transform handed to a {@link MutateCapability} — the fused image\n * request: adjacent `image.*` steps fold into ONE request so a resize→rotate→format chain\n * costs a single decode/encode.\n */\nexport interface MutateRequest {\n /** The input bytes. */\n bytes: Uint8Array\n /** The input MIME type. */\n mimeType: string\n /** Resize, when requested. */\n resize?: {\n width?: number\n height?: number\n fit?: 'cover' | 'contain' | 'fill' | 'inside' | 'outside'\n }\n /** Clockwise rotation in degrees. */\n rotate?: 90 | 180 | 270\n /** Flip axes. */\n flip?: { horizontal?: boolean; vertical?: boolean }\n /** Remove EXIF/ICC metadata. */\n stripMetadata?: boolean\n /** Re-encode target, when requested (rides the same fused call). */\n format?: { to: string; quality?: number }\n /** Abort signal threaded from the pipeline execution. */\n signal?: AbortSignal\n}\n\n/** A same-format content-transform capability group. */\nexport interface MutateCapability {\n /** Input patterns this block mutates. */\n over: readonly MimePattern[]\n /** Content operations supported (`resize`, `rotate`, `flip`, `strip_metadata`). */\n ops: readonly string[]\n /** Format tokens reachable via `request.format` in the same fused call. */\n encodes: readonly string[]\n /**\n * Apply the fused transform.\n *\n * @param request - The folded operations and input bytes.\n * @returns The transformed bytes.\n */\n mutate(request: MutateRequest): Promise<EngineBytesResult>\n}\n\n/**\n * A self-declaring media engine: an id for error messages plus the capabilities it provides.\n * At least one capability entry is required.\n */\nexport interface MediaEngine {\n /** Stable identifier used in config and dispatch error messages (`jimp`, `soffice`, …). */\n readonly id: string\n /** Format-changing capability groups. */\n readonly converts?: readonly ConvertCapability[]\n /** Same-format content-transform capability groups. */\n readonly mutates?: readonly MutateCapability[]\n}\n\n// ── duck-typed guards ────────────────────────────────────────────────────────\n\nconst hasFns = (value: unknown, names: readonly string[]): boolean =>\n isObject(value) && names.every((n) => typeof (value as Record<string, unknown>)[n] === 'function')\n\nconst isStringArray = (value: unknown): boolean =>\n Array.isArray(value) && value.every((v) => typeof v === 'string')\n\n/** `true` when `value` structurally implements {@link BinaryExecutor}. */\nexport const implementsBinaryExecutor = (value: unknown): value is BinaryExecutor =>\n hasFns(value, ['exec'])\n\n/** `true` when `value` structurally implements {@link ScratchWorkspace}. */\nexport const implementsScratchWorkspace = (value: unknown): value is ScratchWorkspace =>\n hasFns(value, ['materialize', 'read', 'dir', 'list', 'dispose'])\n\n/** `true` when `value` structurally implements {@link ConvertCapability}. */\nexport const implementsConvertCapability = (value: unknown): value is ConvertCapability =>\n hasFns(value, ['convert']) &&\n isStringArray((value as ConvertCapability).from) &&\n isStringArray((value as ConvertCapability).to)\n\n/** `true` when `value` structurally implements {@link MutateCapability}. */\nexport const implementsMutateCapability = (value: unknown): value is MutateCapability =>\n hasFns(value, ['mutate']) &&\n isStringArray((value as MutateCapability).over) &&\n isStringArray((value as MutateCapability).ops) &&\n isStringArray((value as MutateCapability).encodes)\n\n/**\n * `true` when `value` structurally implements {@link MediaEngine}: a string id plus at least\n * one well-formed capability entry. Every declared entry must pass its capability guard.\n */\nexport const implementsMediaEngine = (value: unknown): value is MediaEngine => {\n if (!isObject(value)) return false\n const engine = value as unknown as MediaEngine\n if (typeof engine.id !== 'string' || engine.id.length === 0) return false\n const converts = engine.converts\n const mutates = engine.mutates\n if (converts !== undefined) {\n if (!Array.isArray(converts) || !converts.every(implementsConvertCapability)) return false\n }\n if (mutates !== undefined) {\n if (!Array.isArray(mutates) || !mutates.every(implementsMutateCapability)) return false\n }\n const convertCount = Array.isArray(converts) ? converts.length : 0\n const mutateCount = Array.isArray(mutates) ? mutates.length : 0\n return convertCount + mutateCount > 0\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAqJA,IAAa,WAAW;;;;;;;;;;;;AAaxB,IAAa,cAAc,QAAkC;CAC3D,MAAM,OAAO,IAAI,aAAa,GAAG;CACjC,OAAO,IAAI,WAAW,KAAK,QAAQ,GAAG,KAAK,UAAU;AACvD;;;;;;;AAQA,IAAa,cAAc,UAAoC;CAC7D,MAAM,UAAU,IAAI,WAAW,MAAM,MAAM;CAC3C,QAAQ,IAAI,KAAK;CACjB,OAAO,IAAI,aAAa,QAAQ,QAAQ,GAAG,KAAK,MAAM,MAAM,SAAS,CAAC,CAAC;AACzE;AAuKA,IAAM,UAAU,OAAgB,UAC9B,SAAS,KAAK,KAAK,MAAM,OAAO,MAAM,OAAQ,MAAkC,OAAO,UAAU;AAEnG,IAAM,iBAAiB,UACrB,MAAM,QAAQ,KAAK,KAAK,MAAM,OAAO,MAAM,OAAO,MAAM,QAAQ;;AAGlE,IAAa,4BAA4B,UACvC,OAAO,OAAO,CAAC,MAAM,CAAC;;AAGxB,IAAa,8BAA8B,UACzC,OAAO,OAAO;CAAC;CAAe;CAAQ;CAAO;CAAQ;AAAS,CAAC;;AAGjE,IAAa,+BAA+B,UAC1C,OAAO,OAAO,CAAC,SAAS,CAAC,KACzB,cAAe,MAA4B,IAAI,KAC/C,cAAe,MAA4B,EAAE;;AAG/C,IAAa,8BAA8B,UACzC,OAAO,OAAO,CAAC,QAAQ,CAAC,KACxB,cAAe,MAA2B,IAAI,KAC9C,cAAe,MAA2B,GAAG,KAC7C,cAAe,MAA2B,OAAO;;;;;AAMnD,IAAa,yBAAyB,UAAyC;CAC7E,IAAI,CAAC,SAAS,KAAK,GAAG,OAAO;CAC7B,MAAM,SAAS;CACf,IAAI,OAAO,OAAO,OAAO,YAAY,OAAO,GAAG,WAAW,GAAG,OAAO;CACpE,MAAM,WAAW,OAAO;CACxB,MAAM,UAAU,OAAO;CACvB,IAAI,aAAa,KAAA;MACX,CAAC,MAAM,QAAQ,QAAQ,KAAK,CAAC,SAAS,MAAM,2BAA2B,GAAG,OAAO;CAAA;CAEvF,IAAI,YAAY,KAAA;MACV,CAAC,MAAM,QAAQ,OAAO,KAAK,CAAC,QAAQ,MAAM,0BAA0B,GAAG,OAAO;CAAA;CAIpF,QAFqB,MAAM,QAAQ,QAAQ,IAAI,SAAS,SAAS,MAC7C,MAAM,QAAQ,OAAO,IAAI,QAAQ,SAAS,KAC1B;AACtC"}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
|
|
2
|
+
require("../../../chunk-Ble4zEEl.js");
|
|
3
|
+
const require_tool_registry = require("../../../tool_registry-CKJPze3j.js");
|
|
4
|
+
require("../../../guards.cjs");
|
|
5
|
+
const require_batteries_media_contracts = require("../contracts.cjs");
|
|
6
|
+
const require_exceptions = require("../../../exceptions-CQi_lNs1.js");
|
|
7
|
+
//#region src/batteries/media/engines/audio_decode.ts
|
|
8
|
+
/**
|
|
9
|
+
* A cross-environment audio-decoding {@link @nhtio/adk/batteries/media/contracts!MediaEngine}
|
|
10
|
+
* backed by the `audio-decode` package (pure JS/WASM codecs — no ffmpeg, no native bindings;
|
|
11
|
+
* works in Node and browsers).
|
|
12
|
+
*
|
|
13
|
+
* @module @nhtio/adk/batteries/media/engines/audio_decode
|
|
14
|
+
*
|
|
15
|
+
* @remarks
|
|
16
|
+
* Declares one convert capability: audio containers to the virtual `pcm` token (mp3 /
|
|
17
|
+
* m4a-aac / ogg-vorbis / opus / flac / wav), downmixed to mono. The PCM output reports the
|
|
18
|
+
* SOURCE sample rate in `meta.sampleRate` — the pipeline's transcribe step resamples to the
|
|
19
|
+
* 16 kHz transcription engines expect. For exotic containers, compose an ffmpeg-backed
|
|
20
|
+
* engine instead; the capability declaration is the seam.
|
|
21
|
+
*
|
|
22
|
+
* `audio-decode` is an optional peer dependency, lazily imported on first actual use.
|
|
23
|
+
*/
|
|
24
|
+
var channelsOf = (buffer) => {
|
|
25
|
+
if (Array.isArray(buffer.channelData)) return buffer.channelData;
|
|
26
|
+
if (typeof buffer.getChannelData === "function") {
|
|
27
|
+
const count = buffer.numberOfChannels ?? 1;
|
|
28
|
+
return Array.from({ length: count }, (_, c) => buffer.getChannelData(c));
|
|
29
|
+
}
|
|
30
|
+
throw new Error("audio-decode returned an unrecognized buffer shape");
|
|
31
|
+
};
|
|
32
|
+
/**
|
|
33
|
+
* Construct the audio-decode-backed engine.
|
|
34
|
+
*
|
|
35
|
+
* @param options - Optional module resolver override.
|
|
36
|
+
* @returns The engine.
|
|
37
|
+
*/
|
|
38
|
+
var audioDecodeEngine = (options = {}) => {
|
|
39
|
+
let fnPromise;
|
|
40
|
+
const getDecode = () => {
|
|
41
|
+
fnPromise ??= Promise.resolve(options.audioDecode ? options.audioDecode() : import("audio-decode")).then((mod) => {
|
|
42
|
+
const fn = typeof mod === "function" ? mod : mod.default;
|
|
43
|
+
if (typeof fn !== "function") throw new Error("audio-decode did not resolve to a decode function");
|
|
44
|
+
return fn;
|
|
45
|
+
}).catch((err) => {
|
|
46
|
+
throw new require_exceptions.E_INVALID_MEDIA_PIPELINE_CONFIG([`the audio-decode engine could not load its peer dependency "audio-decode": ${require_tool_registry.isError(err) ? err.message : String(err)} — install it (pnpm add audio-decode)`]);
|
|
47
|
+
});
|
|
48
|
+
return fnPromise;
|
|
49
|
+
};
|
|
50
|
+
const convert = async (request) => {
|
|
51
|
+
const buffer = await (await getDecode())(request.bytes);
|
|
52
|
+
const channels = channelsOf(buffer);
|
|
53
|
+
let pcm;
|
|
54
|
+
if (channels.length <= 1) pcm = channels[0];
|
|
55
|
+
else {
|
|
56
|
+
const length = channels[0].length;
|
|
57
|
+
const mono = new Float32Array(length);
|
|
58
|
+
for (const data of channels) for (let i = 0; i < length; i++) mono[i] += data[i] / channels.length;
|
|
59
|
+
pcm = mono;
|
|
60
|
+
}
|
|
61
|
+
return { outputs: [{
|
|
62
|
+
bytes: require_batteries_media_contracts.pcmToBytes(pcm),
|
|
63
|
+
mimeType: require_batteries_media_contracts.PCM_MIME,
|
|
64
|
+
meta: { sampleRate: buffer.sampleRate }
|
|
65
|
+
}] };
|
|
66
|
+
};
|
|
67
|
+
return {
|
|
68
|
+
id: "audio-decode",
|
|
69
|
+
converts: [{
|
|
70
|
+
from: [
|
|
71
|
+
"audio/mpeg",
|
|
72
|
+
"audio/mp3",
|
|
73
|
+
"audio/mp4",
|
|
74
|
+
"audio/aac",
|
|
75
|
+
"audio/x-m4a",
|
|
76
|
+
"audio/ogg",
|
|
77
|
+
"audio/opus",
|
|
78
|
+
"audio/flac",
|
|
79
|
+
"audio/x-flac",
|
|
80
|
+
"audio/wav",
|
|
81
|
+
"audio/x-wav",
|
|
82
|
+
"audio/wave"
|
|
83
|
+
],
|
|
84
|
+
to: ["pcm"],
|
|
85
|
+
convert
|
|
86
|
+
}]
|
|
87
|
+
};
|
|
88
|
+
};
|
|
89
|
+
//#endregion
|
|
90
|
+
exports.audioDecodeEngine = audioDecodeEngine;
|
|
91
|
+
|
|
92
|
+
//# sourceMappingURL=audio_decode.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"audio_decode.cjs","names":[],"sources":["../../../../src/batteries/media/engines/audio_decode.ts"],"sourcesContent":["/**\n * A cross-environment audio-decoding {@link @nhtio/adk/batteries/media/contracts!MediaEngine}\n * backed by the `audio-decode` package (pure JS/WASM codecs — no ffmpeg, no native bindings;\n * works in Node and browsers).\n *\n * @module @nhtio/adk/batteries/media/engines/audio_decode\n *\n * @remarks\n * Declares one convert capability: audio containers to the virtual `pcm` token (mp3 /\n * m4a-aac / ogg-vorbis / opus / flac / wav), downmixed to mono. The PCM output reports the\n * SOURCE sample rate in `meta.sampleRate` — the pipeline's transcribe step resamples to the\n * 16 kHz transcription engines expect. For exotic containers, compose an ffmpeg-backed\n * engine instead; the capability declaration is the seam.\n *\n * `audio-decode` is an optional peer dependency, lazily imported on first actual use.\n */\n\nimport { isError } from '@nhtio/adk/guards'\nimport { pcmToBytes, PCM_MIME } from '../contracts'\nimport { E_INVALID_MEDIA_PIPELINE_CONFIG } from '../exceptions'\nimport type { MediaEngine, ConvertRequest, ConvertResult } from '../contracts'\n\n/**\n * The decoded shapes audio-decode resolves to. Some codecs return an AudioBuffer-compatible\n * object (`numberOfChannels` + `getChannelData`); others (e.g. the wav path in Node) return a\n * plain `{ channelData: Float32Array[], sampleRate }` record. The engine normalizes both.\n */\ninterface AudioBufferLike {\n numberOfChannels?: number\n sampleRate: number\n getChannelData?(channel: number): Float32Array\n channelData?: Float32Array[]\n}\n\ntype AudioDecodeFn = (bytes: Uint8Array | ArrayBuffer) => Promise<AudioBufferLike>\n\nconst channelsOf = (buffer: AudioBufferLike): Float32Array[] => {\n if (Array.isArray(buffer.channelData)) return buffer.channelData\n if (typeof buffer.getChannelData === 'function') {\n const count = buffer.numberOfChannels ?? 1\n return Array.from({ length: count }, (_, c) => buffer.getChannelData!(c))\n }\n throw new Error('audio-decode returned an unrecognized buffer shape')\n}\n\n/** Options for {@link audioDecodeEngine}. */\nexport interface AudioDecodeEngineOptions {\n /** Override the module resolution. Default: `import('audio-decode')`. */\n audioDecode?: () =>\n | AudioDecodeFn\n | { default: AudioDecodeFn }\n | Promise<AudioDecodeFn | { default: AudioDecodeFn }>\n}\n\n/**\n * Construct the audio-decode-backed engine.\n *\n * @param options - Optional module resolver override.\n * @returns The engine.\n */\nexport const audioDecodeEngine = (options: AudioDecodeEngineOptions = {}): MediaEngine => {\n let fnPromise: Promise<AudioDecodeFn> | undefined\n const getDecode = (): Promise<AudioDecodeFn> => {\n fnPromise ??= Promise.resolve(\n options.audioDecode ? options.audioDecode() : import('audio-decode')\n )\n .then((mod) => {\n const fn = typeof mod === 'function' ? mod : (mod as { default: AudioDecodeFn }).default\n if (typeof fn !== 'function') {\n throw new Error('audio-decode did not resolve to a decode function')\n }\n return fn\n })\n .catch((err) => {\n const detail = isError(err) ? err.message : String(err)\n throw new E_INVALID_MEDIA_PIPELINE_CONFIG([\n `the audio-decode engine could not load its peer dependency \"audio-decode\": ${detail} — install it (pnpm add audio-decode)`,\n ])\n })\n return fnPromise\n }\n\n const convert = async (request: ConvertRequest): Promise<ConvertResult> => {\n const decode = await getDecode()\n const buffer = await decode(request.bytes)\n const channels = channelsOf(buffer)\n let pcm: Float32Array\n if (channels.length <= 1) {\n pcm = channels[0]\n } else {\n // Downmix to mono by averaging channels.\n const length = channels[0].length\n const mono = new Float32Array(length)\n for (const data of channels) {\n for (let i = 0; i < length; i++) mono[i] += data[i] / channels.length\n }\n pcm = mono\n }\n return {\n outputs: [\n { bytes: pcmToBytes(pcm), mimeType: PCM_MIME, meta: { sampleRate: buffer.sampleRate } },\n ],\n }\n }\n\n return {\n id: 'audio-decode',\n converts: [\n {\n from: [\n 'audio/mpeg',\n 'audio/mp3',\n 'audio/mp4',\n 'audio/aac',\n 'audio/x-m4a',\n 'audio/ogg',\n 'audio/opus',\n 'audio/flac',\n 'audio/x-flac',\n 'audio/wav',\n 'audio/x-wav',\n 'audio/wave',\n ],\n to: ['pcm'],\n convert,\n },\n ],\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;AAoCA,IAAM,cAAc,WAA4C;CAC9D,IAAI,MAAM,QAAQ,OAAO,WAAW,GAAG,OAAO,OAAO;CACrD,IAAI,OAAO,OAAO,mBAAmB,YAAY;EAC/C,MAAM,QAAQ,OAAO,oBAAoB;EACzC,OAAO,MAAM,KAAK,EAAE,QAAQ,MAAM,IAAI,GAAG,MAAM,OAAO,eAAgB,CAAC,CAAC;CAC1E;CACA,MAAM,IAAI,MAAM,oDAAoD;AACtE;;;;;;;AAiBA,IAAa,qBAAqB,UAAoC,CAAC,MAAmB;CACxF,IAAI;CACJ,MAAM,kBAA0C;EAC9C,cAAc,QAAQ,QACpB,QAAQ,cAAc,QAAQ,YAAY,IAAI,OAAO,eACvD,EACG,MAAM,QAAQ;GACb,MAAM,KAAK,OAAO,QAAQ,aAAa,MAAO,IAAmC;GACjF,IAAI,OAAO,OAAO,YAChB,MAAM,IAAI,MAAM,mDAAmD;GAErE,OAAO;EACT,CAAC,EACA,OAAO,QAAQ;GAEd,MAAM,IAAI,mBAAA,gCAAgC,CACxC,8EAFa,sBAAA,QAAQ,GAAG,IAAI,IAAI,UAAU,OAAO,GAAG,EAEiC,sCACvF,CAAC;EACH,CAAC;EACH,OAAO;CACT;CAEA,MAAM,UAAU,OAAO,YAAoD;EAEzE,MAAM,SAAS,OAAM,MADA,UAAU,GACH,QAAQ,KAAK;EACzC,MAAM,WAAW,WAAW,MAAM;EAClC,IAAI;EACJ,IAAI,SAAS,UAAU,GACrB,MAAM,SAAS;OACV;GAEL,MAAM,SAAS,SAAS,GAAG;GAC3B,MAAM,OAAO,IAAI,aAAa,MAAM;GACpC,KAAK,MAAM,QAAQ,UACjB,KAAK,IAAI,IAAI,GAAG,IAAI,QAAQ,KAAK,KAAK,MAAM,KAAK,KAAK,SAAS;GAEjE,MAAM;EACR;EACA,OAAO,EACL,SAAS,CACP;GAAE,OAAO,kCAAA,WAAW,GAAG;GAAG,UAAU,kCAAA;GAAU,MAAM,EAAE,YAAY,OAAO,WAAW;EAAE,CACxF,EACF;CACF;CAEA,OAAO;EACL,IAAI;EACJ,UAAU,CACR;GACE,MAAM;IACJ;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IACA;IACA;GACF;GACA,IAAI,CAAC,KAAK;GACV;EACF,CACF;CACF;AACF"}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* A cross-environment audio-decoding {@link @nhtio/adk/batteries/media/contracts!MediaEngine}
|
|
3
|
+
* backed by the `audio-decode` package (pure JS/WASM codecs — no ffmpeg, no native bindings;
|
|
4
|
+
* works in Node and browsers).
|
|
5
|
+
*
|
|
6
|
+
* @module @nhtio/adk/batteries/media/engines/audio_decode
|
|
7
|
+
*
|
|
8
|
+
* @remarks
|
|
9
|
+
* Declares one convert capability: audio containers to the virtual `pcm` token (mp3 /
|
|
10
|
+
* m4a-aac / ogg-vorbis / opus / flac / wav), downmixed to mono. The PCM output reports the
|
|
11
|
+
* SOURCE sample rate in `meta.sampleRate` — the pipeline's transcribe step resamples to the
|
|
12
|
+
* 16 kHz transcription engines expect. For exotic containers, compose an ffmpeg-backed
|
|
13
|
+
* engine instead; the capability declaration is the seam.
|
|
14
|
+
*
|
|
15
|
+
* `audio-decode` is an optional peer dependency, lazily imported on first actual use.
|
|
16
|
+
*/
|
|
17
|
+
import type { MediaEngine } from "../contracts";
|
|
18
|
+
/**
|
|
19
|
+
* The decoded shapes audio-decode resolves to. Some codecs return an AudioBuffer-compatible
|
|
20
|
+
* object (`numberOfChannels` + `getChannelData`); others (e.g. the wav path in Node) return a
|
|
21
|
+
* plain `{ channelData: Float32Array[], sampleRate }` record. The engine normalizes both.
|
|
22
|
+
*/
|
|
23
|
+
interface AudioBufferLike {
|
|
24
|
+
numberOfChannels?: number;
|
|
25
|
+
sampleRate: number;
|
|
26
|
+
getChannelData?(channel: number): Float32Array;
|
|
27
|
+
channelData?: Float32Array[];
|
|
28
|
+
}
|
|
29
|
+
type AudioDecodeFn = (bytes: Uint8Array | ArrayBuffer) => Promise<AudioBufferLike>;
|
|
30
|
+
/** Options for {@link audioDecodeEngine}. */
|
|
31
|
+
export interface AudioDecodeEngineOptions {
|
|
32
|
+
/** Override the module resolution. Default: `import('audio-decode')`. */
|
|
33
|
+
audioDecode?: () => AudioDecodeFn | {
|
|
34
|
+
default: AudioDecodeFn;
|
|
35
|
+
} | Promise<AudioDecodeFn | {
|
|
36
|
+
default: AudioDecodeFn;
|
|
37
|
+
}>;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Construct the audio-decode-backed engine.
|
|
41
|
+
*
|
|
42
|
+
* @param options - Optional module resolver override.
|
|
43
|
+
* @returns The engine.
|
|
44
|
+
*/
|
|
45
|
+
export declare const audioDecodeEngine: (options?: AudioDecodeEngineOptions) => MediaEngine;
|
|
46
|
+
export {};
|