@dvai-bridge/core 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +51 -0
- package/README.md +199 -0
- package/bin/dvai-bridge.js +72 -0
- package/dist/dvai-transformers.worker.js +48 -0
- package/dist/dvai-webllm.worker.js +89 -0
- package/dist/index.cjs +69 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +1030 -0
- package/dist/index.d.ts +1030 -0
- package/dist/index.js +69 -0
- package/dist/index.js.map +1 -0
- package/package.json +75 -0
package/dist/index.d.cts
ADDED
|
@@ -0,0 +1,1030 @@
|
|
|
1
|
+
export { InitProgressReport } from '@mlc-ai/web-llm';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Phase 3 — peer discovery types.
|
|
5
|
+
*
|
|
6
|
+
* A "peer" is another device running dvai-bridge that this device can
|
|
7
|
+
* (potentially) offload inference requests to. Peers are surfaced by
|
|
8
|
+
* one or more `IDiscovery` impls — LAN mDNS, app-supplied static list,
|
|
9
|
+
* rendezvous-paired (internet), or a host-app-provided custom source.
|
|
10
|
+
*/
|
|
11
|
+
interface Peer {
|
|
12
|
+
/** Stable per-install device ID of the peer. */
|
|
13
|
+
deviceId: string;
|
|
14
|
+
/** Human-readable hint (iOS device name, hostname, etc.). */
|
|
15
|
+
deviceName: string;
|
|
16
|
+
/** Library SemVer the peer is running. */
|
|
17
|
+
dvaiVersion: string;
|
|
18
|
+
/** OpenAI-compatible base URL the peer's local server exposes. */
|
|
19
|
+
baseUrl: string;
|
|
20
|
+
/**
|
|
21
|
+
* v3.1 wire-protocol extension. Identifies which application on the
|
|
22
|
+
* peer device is making the request — used by multi-tenant targets
|
|
23
|
+
* (the Hub) to isolate per-app state. Optional for backwards compat
|
|
24
|
+
* with v3.0 SDKs that don't send this field.
|
|
25
|
+
*/
|
|
26
|
+
appId?: string;
|
|
27
|
+
/**
|
|
28
|
+
* Models the peer claims to have loaded right now. Used to filter
|
|
29
|
+
* peer eligibility — we only offload model X to a peer that already
|
|
30
|
+
* has model X loaded (loading from scratch on the peer is fine but
|
|
31
|
+
* defeats the latency win).
|
|
32
|
+
*/
|
|
33
|
+
loadedModels: string[];
|
|
34
|
+
/**
|
|
35
|
+
* Peer-reported capability map: { modelId → tok/s }. Treat as
|
|
36
|
+
* advisory only; the offload decider re-probes a peer with a small
|
|
37
|
+
* reachability+decode test before its first real offload request.
|
|
38
|
+
*/
|
|
39
|
+
capability: Record<string, number>;
|
|
40
|
+
/** Discovery source — useful for diagnostics and the structured-error response. */
|
|
41
|
+
via: "mdns" | "static" | "rendezvous" | "custom";
|
|
42
|
+
/** Whether the peer's URL uses TLS. */
|
|
43
|
+
secure: boolean;
|
|
44
|
+
/** Last-seen unix ms — discovery sources update this. */
|
|
45
|
+
lastSeenAt: number;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Phase 3 (v3.0) — capability assessment types.
|
|
50
|
+
*
|
|
51
|
+
* A "capability score" is an estimate of decode tok/s for a given
|
|
52
|
+
* (model, device) pair on this device. Used by the offload decider
|
|
53
|
+
* to pick local vs. peer execution per request.
|
|
54
|
+
*/
|
|
55
|
+
interface CapabilityScore {
|
|
56
|
+
/** Model identifier this score applies to. */
|
|
57
|
+
modelId: string;
|
|
58
|
+
/** Stable per-install device identifier. */
|
|
59
|
+
deviceId: string;
|
|
60
|
+
/** Library SemVer at the time the score was measured. */
|
|
61
|
+
libraryVersion: string;
|
|
62
|
+
/** Estimated decode rate, tokens-per-second. */
|
|
63
|
+
tokPerSec: number;
|
|
64
|
+
/** Source of the estimate. */
|
|
65
|
+
source: "probe" | "heuristic";
|
|
66
|
+
/** Unix milliseconds the score was measured / computed. */
|
|
67
|
+
measuredAt: number;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Coarse device-class buckets used by the heuristic fallback when no
|
|
71
|
+
* cold-run probe has run yet. Numbers are intentionally conservative
|
|
72
|
+
* — the probe will refine on first real use.
|
|
73
|
+
*/
|
|
74
|
+
interface DeviceCapabilityHints {
|
|
75
|
+
/** Has a dedicated NPU (Apple Neural Engine, Hexagon, Intel NPU, etc.) */
|
|
76
|
+
hasNpu: boolean;
|
|
77
|
+
/** Approximate system RAM in GB. */
|
|
78
|
+
ramGb: number;
|
|
79
|
+
/** GPU class — best-guess based on platform clues. */
|
|
80
|
+
gpuClass: "none" | "integrated" | "discrete" | "apple-silicon";
|
|
81
|
+
/** Coarse CPU bucket. */
|
|
82
|
+
cpuClass: "low" | "mid" | "high";
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* LAN-pairing handshake. The first time Device A wants to offload to
|
|
87
|
+
* Device B over the LAN, A POSTs /v1/dvai/handshake to B with its
|
|
88
|
+
* identity + a nonce. B surfaces a UI prompt to the user; on approve,
|
|
89
|
+
* B generates a 256-bit pairing key and returns it. From then on, A
|
|
90
|
+
* includes `X-DVAI-Pairing: HMAC-SHA256(pairingKey, body)` on every
|
|
91
|
+
* offload request to B.
|
|
92
|
+
*/
|
|
93
|
+
|
|
94
|
+
/** Generate a fresh 256-bit pairing key (base64-url encoded). */
|
|
95
|
+
declare function generatePairingKey(): string;
|
|
96
|
+
/** Generate a fresh nonce for a handshake request. */
|
|
97
|
+
declare function generateNonce(): string;
|
|
98
|
+
/**
|
|
99
|
+
* HMAC-SHA256(key, message). Used to sign offload requests so the
|
|
100
|
+
* peer can verify they came from a paired device.
|
|
101
|
+
*/
|
|
102
|
+
declare function signHmac(pairingKey: string, message: string): Promise<string>;
|
|
103
|
+
/** Verify an HMAC. Returns true on match, false otherwise (constant-time-ish). */
|
|
104
|
+
declare function verifyHmac(pairingKey: string, message: string, signature: string): Promise<boolean>;
|
|
105
|
+
/**
|
|
106
|
+
* Compose the canonical message that gets HMAC-signed for a peer-to-peer
|
|
107
|
+
* offload request. The peer recomputes the same string and verifies.
|
|
108
|
+
*
|
|
109
|
+
* Format: `${nonce}\n${method}\n${path}\n${bodyHash}` — bodyHash is the
|
|
110
|
+
* hex-encoded SHA-256 of the request body bytes.
|
|
111
|
+
*/
|
|
112
|
+
declare function composeSignedMessage(nonce: string, method: string, path: string, body: string | undefined): Promise<string>;
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Phase 3 — `/v1/dvai/*` handlers.
|
|
116
|
+
*
|
|
117
|
+
* Hosted by the same in-process HTTP server (or MSW intercept in
|
|
118
|
+
* browser) that already serves the OpenAI surface. Routes:
|
|
119
|
+
*
|
|
120
|
+
* GET /v1/dvai/health — liveness, capacity, version
|
|
121
|
+
* GET /v1/dvai/capability — this device's capability map
|
|
122
|
+
* GET /v1/dvai/peers — discovered peer list
|
|
123
|
+
* POST /v1/dvai/probe — manually trigger a capability probe
|
|
124
|
+
* POST /v1/dvai/handshake — LAN-pairing handshake
|
|
125
|
+
* POST /v1/dvai/pair-qr — start a rendezvous session, return QR payload
|
|
126
|
+
* POST /v1/dvai/pair-scan — submit a scanned QR payload, complete the join
|
|
127
|
+
*
|
|
128
|
+
* These handlers are pure-ish — they take a context object with the
|
|
129
|
+
* relevant collaborators (capability cache, discovery, pairing policy,
|
|
130
|
+
* rendezvous client, etc.) and return JSON responses. Wired into the
|
|
131
|
+
* core's existing handler pipeline by src/index.ts.
|
|
132
|
+
*/
|
|
133
|
+
|
|
134
|
+
/** Generic handler shape: takes parsed request body, returns JSON-stringifiable. */
|
|
135
|
+
type DvaiHandler = (req: {
|
|
136
|
+
body: unknown;
|
|
137
|
+
}) => Promise<{
|
|
138
|
+
status: number;
|
|
139
|
+
body: unknown;
|
|
140
|
+
}>;
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Duck-typed backend contract consumed by the transport-agnostic handlers.
|
|
144
|
+
* Both existing backends (WebLLMBackend, TransformersBackend) satisfy this
|
|
145
|
+
* structurally without any backend changes.
|
|
146
|
+
*/
|
|
147
|
+
interface BackendInterface {
|
|
148
|
+
chatCompletion(body: any): Promise<any>;
|
|
149
|
+
createStreamingResponse(body: any): ReadableStream<Uint8Array>;
|
|
150
|
+
embedding?(inputs: string | string[]): Promise<number[][]>;
|
|
151
|
+
/** WebLLM sets this on fatal errors; triggers recovery path. */
|
|
152
|
+
lastFatalError?: unknown;
|
|
153
|
+
clearFatalError?(): void;
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Per-request context passed to every handler. Built once by DVAI.initialize()
|
|
157
|
+
* and reused for the lifetime of the transport; handler reads the fields on
|
|
158
|
+
* each request so state updates on DVAI (e.g. backendInstance replaced during
|
|
159
|
+
* recovery) are visible through the same reference.
|
|
160
|
+
*/
|
|
161
|
+
interface HandlerContext {
|
|
162
|
+
/** Active backend; null means "not initialized" → 503. */
|
|
163
|
+
backend: BackendInterface | null;
|
|
164
|
+
/**
|
|
165
|
+
* Resolved backend kind. Used only for error messages and the model
|
|
166
|
+
* echo in responses. Union widens as new backends are added in later
|
|
167
|
+
* phases — handlers must NOT dispatch on this value; always duck-type
|
|
168
|
+
* on backend methods instead.
|
|
169
|
+
*/
|
|
170
|
+
resolvedBackend: "webllm" | "transformers" | "native";
|
|
171
|
+
/** Model identifier echoed back in responses. */
|
|
172
|
+
modelId: string;
|
|
173
|
+
/**
|
|
174
|
+
* Optional recovery hook. Handler awaits this before a retry when
|
|
175
|
+
* backend.lastFatalError is set. DVAI owns the retry counter and
|
|
176
|
+
* throws when exhausted; handler only awaits. Undefined → no recovery.
|
|
177
|
+
*/
|
|
178
|
+
onRecovery?: () => Promise<void>;
|
|
179
|
+
/**
|
|
180
|
+
* Phase 3 — `/v1/dvai/*` route map populated when `offload.enabled`.
|
|
181
|
+
* Late-bound (getter) so transports can read it per request even
|
|
182
|
+
* though the routes are built after the transport starts. Undefined
|
|
183
|
+
* when offload isn't enabled — transports return 404 in that case.
|
|
184
|
+
*/
|
|
185
|
+
dvaiRoutes?: Record<string, DvaiHandler>;
|
|
186
|
+
/**
|
|
187
|
+
* Phase 4 — first-chance hook for /v1/chat/completions. The Hub
|
|
188
|
+
* uses this to inject substitution-policy + engine-bridge routing
|
|
189
|
+
* before the default handler dispatches to the local backend.
|
|
190
|
+
*
|
|
191
|
+
* Return a Response → that's what the client gets.
|
|
192
|
+
* Return null → fall through to the default backend path.
|
|
193
|
+
*
|
|
194
|
+
* Receives request headers (lower-cased keys) so the interceptor can
|
|
195
|
+
* read v3.1 identity fields (X-DVAI-Peer-Device-Id, X-DVAI-App-Id,
|
|
196
|
+
* X-DVAI-Nonce, X-DVAI-Signature) for HMAC verification + tenant
|
|
197
|
+
* routing.
|
|
198
|
+
*
|
|
199
|
+
* Errors raised in the interceptor propagate to the standard error
|
|
200
|
+
* response path in handleChatCompletion.
|
|
201
|
+
*/
|
|
202
|
+
chatCompletionInterceptor?: (body: any, ctx: HandlerContext, headers?: Record<string, string>) => Promise<Response | null>;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
/**
|
|
206
|
+
* Phase 3 — offload module types.
|
|
207
|
+
*/
|
|
208
|
+
|
|
209
|
+
interface OffloadConfig {
|
|
210
|
+
/** Master switch. Default false; offload is opt-in at v3.0. */
|
|
211
|
+
enabled: boolean;
|
|
212
|
+
/** Run mDNS to discover LAN peers. */
|
|
213
|
+
discoverLAN: boolean;
|
|
214
|
+
/**
|
|
215
|
+
* v3.2.1 — advertise this DVAI instance on `_dvai-bridge._tcp` so
|
|
216
|
+
* other LAN peers can discover it. Default false to preserve v3.1
|
|
217
|
+
* behaviour where mobile SDKs owned the advertise side and the
|
|
218
|
+
* desktop Hub didn't advertise. Set this to `true` on the Hub so
|
|
219
|
+
* mobile peers can auto-discover it without manual URL entry.
|
|
220
|
+
*/
|
|
221
|
+
advertiseLAN?: boolean;
|
|
222
|
+
/**
|
|
223
|
+
* Optional override of the port we advertise. Default: pulled from
|
|
224
|
+
* the running DVAI server's bound port.
|
|
225
|
+
*/
|
|
226
|
+
advertisePort?: number;
|
|
227
|
+
/** Below this tok/s, look for a peer. Default 10. */
|
|
228
|
+
minLocalCapability: number;
|
|
229
|
+
/**
|
|
230
|
+
* v3.2 — hard floor for any local inference, in tok/s. Below this
|
|
231
|
+
* the device is "too weak"; the SDK aborts initialize() and
|
|
232
|
+
* (optionally) shows a system popup via [onHardwareTooWeak].
|
|
233
|
+
*
|
|
234
|
+
* Default: 3. Apps targeting long-prompt batch use cases where
|
|
235
|
+
* latency is acceptable can lower this; apps targeting interactive
|
|
236
|
+
* chat should leave it.
|
|
237
|
+
*/
|
|
238
|
+
hardwareMinimum?: number;
|
|
239
|
+
/** Optional rendezvous-server URL — enables internet path if set. */
|
|
240
|
+
rendezvousUrl?: string;
|
|
241
|
+
/** Optional pre-known peers (skip discovery). */
|
|
242
|
+
knownPeers?: Peer[];
|
|
243
|
+
/**
|
|
244
|
+
* Hook to surface pairing-request UI to the host app. Default: deny.
|
|
245
|
+
*
|
|
246
|
+
* Return:
|
|
247
|
+
* - `true` / `false` — boolean approve/deny. PairingPolicy generates
|
|
248
|
+
* a fresh pairingKey on approval.
|
|
249
|
+
* - `{ approved: true, pairingKey }` — host has its own pairing
|
|
250
|
+
* state (the v3.1 Hub's MultiTenantPairing) and wants the
|
|
251
|
+
* library to use the host-supplied key. Avoids the library
|
|
252
|
+
* generating a key that diverges from the host's store.
|
|
253
|
+
* - `{ approved: false }` — denied.
|
|
254
|
+
*/
|
|
255
|
+
onPairingRequest?: (peer: Peer) => Promise<boolean | {
|
|
256
|
+
approved: true;
|
|
257
|
+
pairingKey: string;
|
|
258
|
+
} | {
|
|
259
|
+
approved: false;
|
|
260
|
+
}>;
|
|
261
|
+
/** Diagnostic callback when a request is offloaded. */
|
|
262
|
+
onOffload?: (peer: Peer) => void;
|
|
263
|
+
/** Hook to plug a custom discovery source. */
|
|
264
|
+
customDiscovery?: () => Promise<Peer[]>;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
/**
|
|
268
|
+
* Supported pipeline tasks from Transformers.js.
|
|
269
|
+
* Common tasks include:
|
|
270
|
+
* - "text-generation" (default) — LLM chat/text generation
|
|
271
|
+
* - "text2text-generation" — encoder-decoder text models
|
|
272
|
+
* - "text-to-image" — image generation from text prompts
|
|
273
|
+
* - "image-to-text" — image captioning
|
|
274
|
+
* - "automatic-speech-recognition" — audio/speech to text
|
|
275
|
+
* - "text-to-speech" — text to audio
|
|
276
|
+
* - "zero-shot-classification" — classify without training
|
|
277
|
+
* - "feature-extraction" — embeddings
|
|
278
|
+
* - "translation" — language translation
|
|
279
|
+
* - "summarization" — text summarization
|
|
280
|
+
* - And many more: see https://huggingface.co/docs/transformers.js
|
|
281
|
+
*/
|
|
282
|
+
type PipelineTask = string;
|
|
283
|
+
/**
|
|
284
|
+
* A pipeline-compatible callable function.
|
|
285
|
+
* Accepts messages (chat format) and generation options,
|
|
286
|
+
* returns results in the same shape as a Transformers.js pipeline:
|
|
287
|
+
* [{ generated_text: string }]
|
|
288
|
+
*/
|
|
289
|
+
type PipelineCallable = (messages: any, options?: any) => Promise<any>;
|
|
290
|
+
/**
|
|
291
|
+
* Factory function that the client can supply to customize model loading.
|
|
292
|
+
* Receives the dynamically-imported @huggingface/transformers module and
|
|
293
|
+
* config details; must return a PipelineCallable.
|
|
294
|
+
*
|
|
295
|
+
* This lets the client control *how* the model is loaded and how inference
|
|
296
|
+
* is run, while DVAI handles everything else (MSW, OpenAI endpoint, etc.).
|
|
297
|
+
*/
|
|
298
|
+
type CreatePipelineFn = (transformers: any, ctx: {
|
|
299
|
+
modelId: string;
|
|
300
|
+
device: "webgpu" | "wasm" | "cpu";
|
|
301
|
+
dtype?: string;
|
|
302
|
+
onProgress?: (info: any) => void;
|
|
303
|
+
}) => Promise<PipelineCallable>;
|
|
304
|
+
interface TransformersProgressInfo {
|
|
305
|
+
status: string;
|
|
306
|
+
name?: string;
|
|
307
|
+
file?: string;
|
|
308
|
+
progress?: number;
|
|
309
|
+
loaded?: number;
|
|
310
|
+
total?: number;
|
|
311
|
+
}
|
|
312
|
+
/**
|
|
313
|
+
* Detects whether WebGPU is available in the current environment.
|
|
314
|
+
*/
|
|
315
|
+
declare function detectWebGPU(): Promise<boolean>;
|
|
316
|
+
|
|
317
|
+
/**
|
|
318
|
+
* Public-key registry for DVAI-Bridge license JWT verification.
|
|
319
|
+
*
|
|
320
|
+
* Each entry is keyed by `kid` (key id, written by the license generator
|
|
321
|
+
* into the JWT header). The SDK looks up the matching entry by kid when
|
|
322
|
+
* verifying a license token. Multiple entries can coexist so that key
|
|
323
|
+
* rotation is non-disruptive: ship the new key in a release alongside
|
|
324
|
+
* the old, leave the old in place for ~12 months while previously-
|
|
325
|
+
* issued licenses naturally expire or get re-issued, then prune.
|
|
326
|
+
*
|
|
327
|
+
* THE PRIVATE KEY DOES NOT LIVE HERE. It belongs in your secrets
|
|
328
|
+
* manager (1Password / AWS Secrets Manager / Vault), accessible only
|
|
329
|
+
* to the license-generator service that produces signed JWTs. The
|
|
330
|
+
* mathematics of ECDSA P-256 guarantee that a holder of the public
|
|
331
|
+
* key alone cannot forge a signature.
|
|
332
|
+
*
|
|
333
|
+
* To populate this registry:
|
|
334
|
+
* 1. Run `node scripts/license/generate-keypair.mjs` (see that
|
|
335
|
+
* script's comment for full instructions)
|
|
336
|
+
* 2. Paste the printed PUBLIC key JWK as an entry below
|
|
337
|
+
* 3. Move the printed PRIVATE key into your secrets store
|
|
338
|
+
* 4. Wire your license-generator backend to use the private key
|
|
339
|
+
*/
|
|
340
|
+
/** ES256 (P-256 ECDSA) public key in JWK form. */
|
|
341
|
+
interface DvaiPublicKeyJwk {
|
|
342
|
+
kty: "EC";
|
|
343
|
+
crv: "P-256";
|
|
344
|
+
x: string;
|
|
345
|
+
y: string;
|
|
346
|
+
alg?: "ES256";
|
|
347
|
+
use?: "sig";
|
|
348
|
+
kid?: string;
|
|
349
|
+
}
|
|
350
|
+
/**
|
|
351
|
+
* Registry mapping `kid` → public key JWK.
|
|
352
|
+
*
|
|
353
|
+
* ⚠️ The entry below is a **placeholder** — it is a published, well-known
|
|
354
|
+
* test keypair and DOES NOT verify any real production license. Before
|
|
355
|
+
* shipping licenses to customers, replace it with the output of
|
|
356
|
+
* `scripts/license/generate-keypair.mjs`. The SDK refuses to validate
|
|
357
|
+
* licenses against the placeholder kid `"placeholder-do-not-ship"`
|
|
358
|
+
* unless DVAI_LICENSE_ALLOW_PLACEHOLDER=1 is set (test-only escape hatch).
|
|
359
|
+
*
|
|
360
|
+
* Adding a new key for rotation:
|
|
361
|
+
*
|
|
362
|
+
* export const DVAI_PUBLIC_KEYS: Record<string, DvaiPublicKeyJwk> = {
|
|
363
|
+
* "2026-05": { kty: "EC", crv: "P-256", x: "...", y: "...", alg: "ES256", use: "sig", kid: "2026-05" },
|
|
364
|
+
* "2027-01": { kty: "EC", crv: "P-256", x: "...", y: "...", alg: "ES256", use: "sig", kid: "2027-01" },
|
|
365
|
+
* };
|
|
366
|
+
*/
|
|
367
|
+
declare const DVAI_PUBLIC_KEYS: Record<string, DvaiPublicKeyJwk>;
|
|
368
|
+
/**
|
|
369
|
+
* `kid` reserved for the placeholder key above. The validator refuses to
|
|
370
|
+
* accept tokens signed with this kid unless the caller explicitly opts
|
|
371
|
+
* in (DVAI_LICENSE_ALLOW_PLACEHOLDER=1 or `allowPlaceholderKey: true`
|
|
372
|
+
* passed to the validator constructor). Used by tests and by the
|
|
373
|
+
* sample license printed by `generate-keypair.mjs`.
|
|
374
|
+
*/
|
|
375
|
+
declare const PLACEHOLDER_KID = "placeholder-do-not-ship";
|
|
376
|
+
|
|
377
|
+
/**
|
|
378
|
+
* License-file discovery for the JS/TS SDK.
|
|
379
|
+
*
|
|
380
|
+
* The SDK reads the license JWT from (in priority order):
|
|
381
|
+
*
|
|
382
|
+
* 1. An explicit string literal passed as `licenseToken` in DVAIConfig
|
|
383
|
+
* — useful for CI / serverless / contexts where reading a file isn't
|
|
384
|
+
* practical and the operator wants to inject via env var instead.
|
|
385
|
+
*
|
|
386
|
+
* 2. A path passed as `licenseKeyPath` in DVAIConfig — the developer
|
|
387
|
+
* points the SDK at a file they've placed somewhere non-default.
|
|
388
|
+
*
|
|
389
|
+
* 3. The `DVAI_LICENSE_PATH` env var — same as (2) but driven by
|
|
390
|
+
* process environment, helpful for containerised deployments.
|
|
391
|
+
*
|
|
392
|
+
* 4. Auto-discovery from platform-default locations (see below) —
|
|
393
|
+
* the dev-friendly happy path. Drop the file at the convention
|
|
394
|
+
* location and forget about it.
|
|
395
|
+
*
|
|
396
|
+
* Default discovery paths per JS-side platform:
|
|
397
|
+
*
|
|
398
|
+
* - **Node.js**: looks for `dvai-license.jwt` in `process.cwd()` and
|
|
399
|
+
* in `<package-root>/dvai-license.jwt` (one level up). Mirrors how
|
|
400
|
+
* `.env` files are discovered.
|
|
401
|
+
*
|
|
402
|
+
* - **Browser**: fetches `/dvai-license.jwt` from the same origin. The
|
|
403
|
+
* file must be served alongside `mockServiceWorker.js` — typically
|
|
404
|
+
* in `public/` for Vite/Webpack apps. The HTTP fetch is cached by
|
|
405
|
+
* the browser so this is one round-trip on startup, not per request.
|
|
406
|
+
*
|
|
407
|
+
* - **Capacitor**: fetches `/dvai-license.jwt` from the bundled web
|
|
408
|
+
* assets (Capacitor.convertFileSrc on the public/ folder). The
|
|
409
|
+
* native-side validator (in DVAIBridge.iOS / .Android) is the
|
|
410
|
+
* authoritative binding for native bundle ids; this JS-side check
|
|
411
|
+
* is a soft signal only.
|
|
412
|
+
*
|
|
413
|
+
* Returning `null` means "no license file found"; the validator treats
|
|
414
|
+
* that as the free-tier case (after dev-mode bypass).
|
|
415
|
+
*/
|
|
416
|
+
/**
|
|
417
|
+
* Default filename the SDK looks for. Chosen to be self-documenting and
|
|
418
|
+
* to encourage commit-to-vcs (so the license travels with the code,
|
|
419
|
+
* audited and reviewable by the team).
|
|
420
|
+
*/
|
|
421
|
+
declare const DEFAULT_LICENSE_FILENAME = "dvai-license.jwt";
|
|
422
|
+
interface LicenseDiscoveryOptions {
|
|
423
|
+
/** Pre-loaded JWT string (skips all filesystem / fetch lookups). */
|
|
424
|
+
token?: string;
|
|
425
|
+
/** Explicit path or URL to load from. Overrides auto-discovery. */
|
|
426
|
+
path?: string;
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
/**
|
|
430
|
+
* Type surface for the DVAI-Bridge offline JWT license system.
|
|
431
|
+
*
|
|
432
|
+
* The whole license flow is deliberately small:
|
|
433
|
+
* 1. A signed JWT (produced server-side by your license generator) is
|
|
434
|
+
* either dropped at a platform-default path, pointed at via the
|
|
435
|
+
* `licenseKeyPath` config option, or pasted directly into the
|
|
436
|
+
* `licenseToken` config option.
|
|
437
|
+
* 2. The SDK reads it, verifies the ECDSA P-256 signature against the
|
|
438
|
+
* key registry in `publicKeys.ts`, and checks four runtime claims:
|
|
439
|
+
* - signature must verify against a known kid
|
|
440
|
+
* - `exp` must be in the future
|
|
441
|
+
* - `aud` must include the current audience (hostname / bundleId)
|
|
442
|
+
* - `platforms` must include the current SDK platform
|
|
443
|
+
* 3. The outcome is summarised in a `LicenseStatus` value that the
|
|
444
|
+
* rest of the SDK can dispatch on (commercial/trial → premium
|
|
445
|
+
* behaviour; everything else → free-tier behaviour with the
|
|
446
|
+
* "Powered by DVAI Bridge" attribution badge).
|
|
447
|
+
*
|
|
448
|
+
* Nothing in this file makes network calls. The entire flow is offline.
|
|
449
|
+
*/
|
|
450
|
+
/** Recognised license tiers. Free-tier values are produced internally by
|
|
451
|
+
* the validator; commercial / trial come from the signed token's `tier`
|
|
452
|
+
* claim. Anything unknown collapses to "free-prod" defensively. */
|
|
453
|
+
type LicenseTier = "commercial" | "trial" | "free-dev" | "free-prod" | "free-expired";
|
|
454
|
+
/** Payload shape we issue (subset; extra claims tolerated). */
|
|
455
|
+
interface DvaiLicensePayload {
|
|
456
|
+
/** Standard JWT issuer claim. Must be `"DVAI-Bridge"`. */
|
|
457
|
+
iss: string;
|
|
458
|
+
/** Standard subject — our internal license id. Surfaced in audit logs. */
|
|
459
|
+
sub: string;
|
|
460
|
+
/** Audience binding — array of domains and/or bundle ids permitted to
|
|
461
|
+
* activate this license. Each entry is either an exact string match
|
|
462
|
+
* (e.g. `"com.acme.app"`) or a wildcard subdomain pattern
|
|
463
|
+
* (e.g. `"*.acme.com"` matches both `acme.com` and `app.acme.com`). */
|
|
464
|
+
aud: string[];
|
|
465
|
+
/** Tier the license grants. `commercial` and `trial` are the live tiers;
|
|
466
|
+
* the validator never produces `free-*` here (those are computed). */
|
|
467
|
+
tier: "commercial" | "trial";
|
|
468
|
+
/** Which DVAI-Bridge SDK platforms this license activates. The current
|
|
469
|
+
* runtime platform must appear here for the license to apply. */
|
|
470
|
+
platforms: DvaiPlatform[];
|
|
471
|
+
/** Display name of the licensee, for audit logs + user-facing messaging. */
|
|
472
|
+
licensee: string;
|
|
473
|
+
/** Standard JWT issued-at (seconds since Unix epoch). */
|
|
474
|
+
iat: number;
|
|
475
|
+
/** Standard JWT expiry (seconds since Unix epoch). */
|
|
476
|
+
exp: number;
|
|
477
|
+
}
|
|
478
|
+
/** Platform identifiers the SDK recognises in license `platforms` claims. */
|
|
479
|
+
type DvaiPlatform = "web" | "node" | "ios" | "android" | "dotnet" | "flutter" | "react-native" | "capacitor";
|
|
480
|
+
/**
|
|
481
|
+
* Result of license validation. Discriminated union so the consumer's
|
|
482
|
+
* decision tree is exhaustive ("commercial" or "trial" → premium;
|
|
483
|
+
* everything else → free).
|
|
484
|
+
*/
|
|
485
|
+
type LicenseStatus = {
|
|
486
|
+
kind: "commercial";
|
|
487
|
+
licensee: string;
|
|
488
|
+
expiresAt: number;
|
|
489
|
+
platform: DvaiPlatform;
|
|
490
|
+
audienceMatched: string;
|
|
491
|
+
} | {
|
|
492
|
+
kind: "trial";
|
|
493
|
+
licensee: string;
|
|
494
|
+
expiresAt: number;
|
|
495
|
+
platform: DvaiPlatform;
|
|
496
|
+
audienceMatched: string;
|
|
497
|
+
} | {
|
|
498
|
+
kind: "free-dev";
|
|
499
|
+
/** Why dev mode was detected (for logging / dashboard surfacing). */
|
|
500
|
+
reason: string;
|
|
501
|
+
} | {
|
|
502
|
+
kind: "free-prod";
|
|
503
|
+
/** Why a license could not be loaded or validated. Surfaced via a
|
|
504
|
+
* console warning so the developer can debug. Does NOT throw — the
|
|
505
|
+
* SDK falls back to free tier rather than refusing to start. */
|
|
506
|
+
reason: string;
|
|
507
|
+
} | {
|
|
508
|
+
kind: "free-expired";
|
|
509
|
+
licensee: string;
|
|
510
|
+
expiredAt: number;
|
|
511
|
+
};
|
|
512
|
+
/** Returns true iff `tier` represents a paid / unwatermarked status. */
|
|
513
|
+
declare function isPaidTier(status: LicenseStatus): boolean;
|
|
514
|
+
/**
|
|
515
|
+
* Thrown by `LicenseValidator.validateAndAssert()` (and propagated from
|
|
516
|
+
* `DVAI.initialize()`) when an SDK consumer attempts to run the library
|
|
517
|
+
* in a production / release context without a valid commercial or trial
|
|
518
|
+
* license.
|
|
519
|
+
*
|
|
520
|
+
* The error message is intentionally verbose: it tells the developer
|
|
521
|
+
* exactly which check failed (missing file, expired, audience mismatch,
|
|
522
|
+
* etc.), how to resolve it, and where to put the license file once
|
|
523
|
+
* they have one. This is the front line of the BSL 1.1 commercial
|
|
524
|
+
* enforcement story — surface it clearly enough that a developer can
|
|
525
|
+
* unblock themselves without a support ticket.
|
|
526
|
+
*
|
|
527
|
+
* The `status` field carries the underlying `LicenseStatus` so
|
|
528
|
+
* programmatic callers can dispatch on `err.status.kind` if they
|
|
529
|
+
* want to handle "expired" differently from "missing".
|
|
530
|
+
*/
|
|
531
|
+
declare class LicenseRequiredError extends Error {
|
|
532
|
+
/** The underlying validator status that triggered the throw. */
|
|
533
|
+
readonly status: LicenseStatus;
|
|
534
|
+
/** Stable name set so `err.name === "LicenseRequiredError"` works
|
|
535
|
+
* across module-boundary serialisation (e.g. Vite SSR). */
|
|
536
|
+
readonly name = "LicenseRequiredError";
|
|
537
|
+
constructor(message: string,
|
|
538
|
+
/** The underlying validator status that triggered the throw. */
|
|
539
|
+
status: LicenseStatus);
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
interface LicenseValidatorOptions extends LicenseDiscoveryOptions {
|
|
543
|
+
/**
|
|
544
|
+
* Override the public-key registry. Defaults to `DVAI_PUBLIC_KEYS`
|
|
545
|
+
* from `./publicKeys.ts`. Tests inject their own keypair via this
|
|
546
|
+
* option so they can sign + verify against a deterministic key
|
|
547
|
+
* without polluting the production registry.
|
|
548
|
+
*/
|
|
549
|
+
publicKeys?: Record<string, DvaiPublicKeyJwk>;
|
|
550
|
+
/**
|
|
551
|
+
* If true, accept tokens signed under `PLACEHOLDER_KID` (i.e. the
|
|
552
|
+
* built-in placeholder public key). Off by default — a real
|
|
553
|
+
* production build must replace the placeholder with a generated
|
|
554
|
+
* key. Tests set this to true.
|
|
555
|
+
*/
|
|
556
|
+
allowPlaceholderKey?: boolean;
|
|
557
|
+
}
|
|
558
|
+
/**
|
|
559
|
+
* Validate a DVAI-Bridge license once at SDK startup. The returned
|
|
560
|
+
* `LicenseStatus` is the discriminated value the rest of the SDK
|
|
561
|
+
* dispatches on. Never throws on validation failure — it logs a
|
|
562
|
+
* console.warn and returns a `free-prod` / `free-expired` status.
|
|
563
|
+
*/
|
|
564
|
+
declare class LicenseValidator {
|
|
565
|
+
private readonly opts;
|
|
566
|
+
constructor(opts?: LicenseValidatorOptions);
|
|
567
|
+
/**
|
|
568
|
+
* Validate WITHOUT throwing. Returns a `LicenseStatus` describing what
|
|
569
|
+
* the validator determined; never throws on missing / invalid /
|
|
570
|
+
* expired licenses. Useful for host-app dashboards that want to
|
|
571
|
+
* display the licensee / expiry / fallback reason without halting
|
|
572
|
+
* SDK startup, and for tests.
|
|
573
|
+
*
|
|
574
|
+
* The SDK's `initialize()` calls `validateAndAssert()` instead — that
|
|
575
|
+
* throws `LicenseRequiredError` for `free-prod` / `free-expired`,
|
|
576
|
+
* which is how the BSL 1.1 commercial-only-in-production policy is
|
|
577
|
+
* actually enforced at runtime.
|
|
578
|
+
*
|
|
579
|
+
* Idempotent; safe to call multiple times.
|
|
580
|
+
*/
|
|
581
|
+
validate(): Promise<LicenseStatus>;
|
|
582
|
+
/**
|
|
583
|
+
* Strict validation entry point used by the SDK at startup. Returns
|
|
584
|
+
* `LicenseStatus` on success (`commercial`, `trial`, `free-dev`) and
|
|
585
|
+
* THROWS `LicenseRequiredError` on `free-prod` / `free-expired`.
|
|
586
|
+
*
|
|
587
|
+
* This is the BSL 1.1 enforcement point: in production / release
|
|
588
|
+
* builds (any non-dev-mode environment), the SDK refuses to operate
|
|
589
|
+
* without a valid commercial or trial license. Developers running on
|
|
590
|
+
* localhost / debug builds / explicit DVAI_FORCE_DEV are unaffected
|
|
591
|
+
* — those return a `free-dev` status and the SDK proceeds normally.
|
|
592
|
+
*
|
|
593
|
+
* Use `validate()` instead when you want to inspect the status
|
|
594
|
+
* without halting startup (host-app dashboards, test fixtures).
|
|
595
|
+
*/
|
|
596
|
+
validateAndAssert(): Promise<LicenseStatus>;
|
|
597
|
+
private verifyToken;
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
/**
|
|
601
|
+
* Convert an OpenAI chat.completion response body into the legacy
|
|
602
|
+
* text_completion shape used by POST /v1/completions.
|
|
603
|
+
*/
|
|
604
|
+
declare function chatToLegacyCompletion(chatResp: any): any;
|
|
605
|
+
/**
|
|
606
|
+
* Wraps an SSE stream of chat.completion.chunk events and rewrites each
|
|
607
|
+
* event as a legacy text_completion chunk. Preserves event boundaries.
|
|
608
|
+
*/
|
|
609
|
+
declare function legacyCompletionStreamAdapter(chatStream: ReadableStream<Uint8Array>, model: string): ReadableStream<Uint8Array>;
|
|
610
|
+
|
|
611
|
+
type BackendType = "webllm" | "transformers" | "native" | "auto";
|
|
612
|
+
type DeviceType = "webgpu" | "cpu" | "auto";
|
|
613
|
+
|
|
614
|
+
interface DVAIConfig {
|
|
615
|
+
/** The model ID for web-llm backend. Default: "gemma-2-2b-it-q4f16_1-MLC" */
|
|
616
|
+
modelId?: string;
|
|
617
|
+
/**
|
|
618
|
+
* The backend engine to use. Default: "webllm". Set to "auto" to auto-detect.
|
|
619
|
+
* - "webllm" → @mlc-ai/web-llm (browser, WebGPU)
|
|
620
|
+
* - "transformers" → @huggingface/transformers (browser or Node)
|
|
621
|
+
* - "native" → node-llama-cpp (Node only; loads a GGUF file)
|
|
622
|
+
* - "auto" → resolved at runtime
|
|
623
|
+
*/
|
|
624
|
+
backend?: BackendType;
|
|
625
|
+
/** HuggingFace model ID for Transformers.js backend. Default: "onnx-community/gemma-3n-E2B-it-ONNX" */
|
|
626
|
+
transformersModelId?: string;
|
|
627
|
+
/** Pipeline task for Transformers.js (e.g. "text-generation", "text-to-image", "automatic-speech-recognition"). Default: "text-generation" */
|
|
628
|
+
pipelineTask?: string;
|
|
629
|
+
/** Device for Transformers.js - "webgpu", "cpu", or "auto" (detect). Default: "auto" */
|
|
630
|
+
device?: DeviceType;
|
|
631
|
+
/** Quantization for Transformers.js (e.g. "q4", "q8", "f16"). Default: undefined */
|
|
632
|
+
dtype?: string;
|
|
633
|
+
/** Generation timeout in ms. Default: 60000 (60s) */
|
|
634
|
+
generationTimeout?: number;
|
|
635
|
+
/** Maximum consecutive blank chunks before aborting stream (WebLLM). Default: 20 */
|
|
636
|
+
maxBlankChunks?: number;
|
|
637
|
+
/** Maximum auto-recovery retries on fatal WebLLM errors (blank output/timeout). Default: 2 */
|
|
638
|
+
maxRetries?: number;
|
|
639
|
+
/** Mock URL for MSW interception. Default: "https://api.openai.local/v1/chat/completions" */
|
|
640
|
+
mockUrl?: string;
|
|
641
|
+
/** Path to the MSW service worker script. Default: "/mockServiceWorker.js" */
|
|
642
|
+
serviceWorkerUrl?: string;
|
|
643
|
+
/** URL to the WebLLM worker script (for offloading inference). Default: "/dvai-webllm.worker.js" */
|
|
644
|
+
webllmWorkerUrl?: string;
|
|
645
|
+
/** URL to the Transformers.js worker script (for offloading inference). Default: "/dvai-transformers.worker.js" */
|
|
646
|
+
transformersWorkerUrl?: string;
|
|
647
|
+
/**
|
|
648
|
+
* Custom pipeline factory for Transformers.js backend.
|
|
649
|
+
* MAIN-THREAD ONLY — function closures don't cross the Worker boundary.
|
|
650
|
+
* When provided, replaces the default pipeline() call with your own
|
|
651
|
+
* model loading and inference logic. Must return a callable that accepts
|
|
652
|
+
* (messages, options) and returns [{ generated_text: string }].
|
|
653
|
+
*
|
|
654
|
+
* For multimodal models that should run in the worker (recommended),
|
|
655
|
+
* use the declarative `transformersModelClass` / `transformersProcessorClass`
|
|
656
|
+
* / `transformersDisableEncoders` config instead.
|
|
657
|
+
*/
|
|
658
|
+
createPipeline?: CreatePipelineFn;
|
|
659
|
+
/**
|
|
660
|
+
* Name of a transformers.js export to use as the model class (loaded via
|
|
661
|
+
* `ClassName.from_pretrained(modelId)`). Enables the declarative
|
|
662
|
+
* multimodal loader — works in the worker AND on main thread so the
|
|
663
|
+
* same config ships correctly regardless of path.
|
|
664
|
+
*
|
|
665
|
+
* Examples: "Gemma4ForConditionalGeneration", "LlavaForConditionalGeneration",
|
|
666
|
+
* "AutoModelForCausalLM". Leave unset to use the generic `pipeline()` factory.
|
|
667
|
+
*/
|
|
668
|
+
transformersModelClass?: string;
|
|
669
|
+
/**
|
|
670
|
+
* Processor class name for the declarative loader. Default: "AutoProcessor".
|
|
671
|
+
* Only used when `transformersModelClass` is set.
|
|
672
|
+
*/
|
|
673
|
+
transformersProcessorClass?: string;
|
|
674
|
+
/**
|
|
675
|
+
* Model submodule fields to null out after load, e.g. `["vision_encoder"]`
|
|
676
|
+
* for a voice-only host app using a multimodal checkpoint. Purely
|
|
677
|
+
* declarative — dvai-bridge walks the list and nulls each named field
|
|
678
|
+
* if present; unknown/absent names are silently ignored. Host apps
|
|
679
|
+
* control this based on their own criteria.
|
|
680
|
+
*/
|
|
681
|
+
transformersDisableEncoders?: string[];
|
|
682
|
+
/** Path to the GGUF model file for the Capacitor llama backend. */
|
|
683
|
+
nativeModelPath?: string;
|
|
684
|
+
/** Number of GPU layers for the Capacitor llama backend (iOS Metal). Default: 99 (max) */
|
|
685
|
+
nativeGpuLayers?: number;
|
|
686
|
+
/** Number of CPU threads for the Capacitor llama backend. Default: 4 */
|
|
687
|
+
nativeThreads?: number;
|
|
688
|
+
/** Context window size for the Capacitor llama backend. Default: 2048 */
|
|
689
|
+
nativeContextSize?: number;
|
|
690
|
+
/**
|
|
691
|
+
* Initialize the Capacitor llama context in embedding mode. Required for
|
|
692
|
+
* `/v1/embeddings` to work natively. When true, the context should be a
|
|
693
|
+
* dedicated embedding model and will typically not be usable for
|
|
694
|
+
* chat/completion. Default: false.
|
|
695
|
+
*/
|
|
696
|
+
nativeEmbeddingMode?: boolean;
|
|
697
|
+
/**
|
|
698
|
+
* Path (or fetchable URL) to your DVAI-Bridge license JWT file.
|
|
699
|
+
*
|
|
700
|
+
* Default behaviour when this is unset: the SDK looks for
|
|
701
|
+
* `dvai-license.jwt` at platform-conventional locations:
|
|
702
|
+
* - Node: `process.cwd()/dvai-license.jwt` (and one level up for
|
|
703
|
+
* monorepos)
|
|
704
|
+
* - Browser / Capacitor: same-origin `/dvai-license.jwt`
|
|
705
|
+
*
|
|
706
|
+
* Override mechanisms in priority order:
|
|
707
|
+
* 1. `licenseToken` (below) — inline JWT string, highest priority
|
|
708
|
+
* 2. `licenseKeyPath` (this field) — explicit path or URL
|
|
709
|
+
* 3. `DVAI_LICENSE_PATH` env var
|
|
710
|
+
* 4. `DVAI_LICENSE_TOKEN` env var — inline JWT
|
|
711
|
+
* 5. Auto-discovery
|
|
712
|
+
*
|
|
713
|
+
* If no license is found OR validation fails, the SDK falls back to
|
|
714
|
+
* the free tier (with the "Powered by DVAI Bridge" attribution
|
|
715
|
+
* badge in browser/Capacitor contexts). The SDK never refuses to
|
|
716
|
+
* start because of a license problem — license issues surface as a
|
|
717
|
+
* `licenseStatus` value with `kind: "free-prod"` and a
|
|
718
|
+
* human-readable `reason`.
|
|
719
|
+
*/
|
|
720
|
+
licenseKeyPath?: string;
|
|
721
|
+
/**
|
|
722
|
+
* Inline DVAI-Bridge license JWT (the full token string). Use this
|
|
723
|
+
* when you'd rather inject the license via env var / config than
|
|
724
|
+
* ship a file — typical in serverless / containerised deployments
|
|
725
|
+
* where filesystem state is awkward.
|
|
726
|
+
*
|
|
727
|
+
* If both `licenseToken` and `licenseKeyPath` are set, `licenseToken`
|
|
728
|
+
* wins.
|
|
729
|
+
*/
|
|
730
|
+
licenseToken?: string;
|
|
731
|
+
/** Auto-initialize on creation (React/Vanilla). Default: true */
|
|
732
|
+
autoInit?: boolean;
|
|
733
|
+
/**
|
|
734
|
+
* Phase 3 (v3.0+) — distributed inference / device offload.
|
|
735
|
+
*
|
|
736
|
+
* If unset OR `enabled: false`, the library behaves exactly like
|
|
737
|
+
* v2.x: every request runs locally. When enabled, the library
|
|
738
|
+
* discovers peer devices on the LAN (via mDNS) and / or via a
|
|
739
|
+
* self-hosted rendezvous server (if `rendezvousUrl` is set), and
|
|
740
|
+
* routes inference requests to the most-capable peer when local
|
|
741
|
+
* tok/s falls below `minLocalCapability`.
|
|
742
|
+
*
|
|
743
|
+
* See `docs/guide/distributed-inference.md` for the full design,
|
|
744
|
+
* `docs/guide/self-hosting-rendezvous.md` for the rendezvous
|
|
745
|
+
* server self-hosting flow, and `src/offload/types.ts` for the
|
|
746
|
+
* full `OffloadConfig` shape.
|
|
747
|
+
*/
|
|
748
|
+
offload?: OffloadConfig;
|
|
749
|
+
/**
|
|
750
|
+
* Which transport to use for the OpenAI-compatible surface.
|
|
751
|
+
* - "auto" (default) → capacitor on Capacitor, msw in browser,
|
|
752
|
+
* http in Node, none in workers
|
|
753
|
+
* - "msw" → force MSW (browser only; errors elsewhere)
|
|
754
|
+
* - "http" → force HTTP server (Node only; errors elsewhere)
|
|
755
|
+
* - "capacitor" → force native Capacitor HTTP server (requires
|
|
756
|
+
* @dvai-bridge/capacitor + a Capacitor backend plugin)
|
|
757
|
+
* - "none" → no transport; use dvai.chatCompletion() directly
|
|
758
|
+
*/
|
|
759
|
+
transport?: "auto" | "msw" | "http" | "none" | "capacitor";
|
|
760
|
+
/** HTTP-only. Base port. Default: 38883. */
|
|
761
|
+
httpBasePort?: number;
|
|
762
|
+
/** HTTP-only. Max port-fallback attempts. Default: 16. */
|
|
763
|
+
httpMaxPortAttempts?: number;
|
|
764
|
+
/**
|
|
765
|
+
* HTTP-only. Network interface to bind. Default `127.0.0.1`
|
|
766
|
+
* (loopback only). Set to `0.0.0.0` for LAN-target deployments
|
|
767
|
+
* (the v3.1 Hub, native SDKs running in target mode) so peers on
|
|
768
|
+
* the same Wi-Fi can reach the embedded server. Phone-as-source /
|
|
769
|
+
* single-device deployments should leave this default — a
|
|
770
|
+
* 0.0.0.0 bind on a developer laptop without pairing protection
|
|
771
|
+
* exposes the OpenAI surface.
|
|
772
|
+
*/
|
|
773
|
+
httpBindHost?: string;
|
|
774
|
+
/**
|
|
775
|
+
* Phase 4 — first-chance interceptor for /v1/chat/completions. The
|
|
776
|
+
* v3.1 Hub uses this to apply substitution-policy + engine-bridge
|
|
777
|
+
* routing before falling through to the default local-backend
|
|
778
|
+
* handler. Return a Response → that's what the client gets;
|
|
779
|
+
* return null → fall through to the local backend.
|
|
780
|
+
*
|
|
781
|
+
* Receives request headers (lower-cased keys) so the interceptor
|
|
782
|
+
* can read the v3.1 identity fields (X-DVAI-Peer-Device-Id,
|
|
783
|
+
* X-DVAI-App-Id, X-DVAI-Nonce, X-DVAI-Signature) for HMAC verify.
|
|
784
|
+
*/
|
|
785
|
+
chatCompletionInterceptor?: (body: any, ctx: HandlerContext, headers?: Record<string, string>) => Promise<Response | null>;
|
|
786
|
+
/**
|
|
787
|
+
* HTTP-only. Controls the Access-Control-Allow-Origin response header.
|
|
788
|
+
* - "*" → echo "*" (default; dev-friendly)
|
|
789
|
+
* - "https://x.com" → echo that exact origin
|
|
790
|
+
* - ["a.com","b.com"] → match the request's Origin header against the
|
|
791
|
+
* list; echo the matched value. Requests from
|
|
792
|
+
* unlisted origins get ACAO omitted.
|
|
793
|
+
*/
|
|
794
|
+
corsOrigin?: string | string[];
|
|
795
|
+
/**
|
|
796
|
+
* Capacitor-backend selection (when transport resolves to "capacitor").
|
|
797
|
+
* Default: "llama".
|
|
798
|
+
*/
|
|
799
|
+
capacitorBackend?: "llama" | "foundation" | "mediapipe";
|
|
800
|
+
/**
|
|
801
|
+
* Path to the mmproj (vision projector) file when using a multimodal
|
|
802
|
+
* llama.cpp model. Optional; only required for vision-capable models.
|
|
803
|
+
*/
|
|
804
|
+
nativeMmprojPath?: string;
|
|
805
|
+
}
|
|
806
|
+
/**
|
|
807
|
+
* DVAI: Local AI Orchestration
|
|
808
|
+
* Orchestrates WebLLM or Transformers.js for local inference and selects
|
|
809
|
+
* an MSW, HTTP, or Capacitor transport (auto-detected from environment)
|
|
810
|
+
* to expose the OpenAI-compatible endpoint. On Capacitor, the native
|
|
811
|
+
* runtime runs in a first-party plugin behind the "capacitor" transport.
|
|
812
|
+
* Read `dvai.baseUrl` after initialize() to get the URL to point any
|
|
813
|
+
* OpenAI SDK at.
|
|
814
|
+
*/
|
|
815
|
+
declare class DVAI {
|
|
816
|
+
modelId: string;
|
|
817
|
+
mockUrl: string;
|
|
818
|
+
serviceWorkerUrl: string;
|
|
819
|
+
licenseKeyPath?: string;
|
|
820
|
+
licenseToken?: string;
|
|
821
|
+
/**
|
|
822
|
+
* Result of the most recent license validation. Populated by
|
|
823
|
+
* `initialize()`; consult before promoting paid-tier UI affordances
|
|
824
|
+
* (e.g. hiding the attribution badge). Null before initialization.
|
|
825
|
+
*/
|
|
826
|
+
licenseStatus: LicenseStatus | null;
|
|
827
|
+
backend: BackendType;
|
|
828
|
+
transformersModelId: string;
|
|
829
|
+
pipelineTask: string;
|
|
830
|
+
device: DeviceType;
|
|
831
|
+
generationTimeout: number;
|
|
832
|
+
maxBlankChunks: number;
|
|
833
|
+
maxRetries: number;
|
|
834
|
+
webllmWorkerUrl: string;
|
|
835
|
+
transformersWorkerUrl: string;
|
|
836
|
+
dtype?: string;
|
|
837
|
+
createPipeline?: CreatePipelineFn;
|
|
838
|
+
transformersModelClass?: string;
|
|
839
|
+
transformersProcessorClass?: string;
|
|
840
|
+
transformersDisableEncoders?: string[];
|
|
841
|
+
nativeModelPath: string;
|
|
842
|
+
nativeGpuLayers: number;
|
|
843
|
+
nativeThreads: number;
|
|
844
|
+
nativeContextSize: number;
|
|
845
|
+
nativeEmbeddingMode: boolean;
|
|
846
|
+
capacitorBackend: "llama" | "foundation" | "mediapipe";
|
|
847
|
+
nativeMmprojPath?: string;
|
|
848
|
+
/** Raw transport config (e.g., "auto"). */
|
|
849
|
+
transport: "auto" | "msw" | "http" | "none" | "capacitor";
|
|
850
|
+
httpBasePort: number;
|
|
851
|
+
httpMaxPortAttempts: number;
|
|
852
|
+
corsOrigin: string | string[];
|
|
853
|
+
httpBindHost: string | undefined;
|
|
854
|
+
chatCompletionInterceptor: ((body: any, ctx: HandlerContext, headers?: Record<string, string>) => Promise<Response | null>) | undefined;
|
|
855
|
+
/** Resolved transport kind after selectTransport() runs. */
|
|
856
|
+
private resolvedTransport;
|
|
857
|
+
/** Populated after transport.start(). Undefined on "none". */
|
|
858
|
+
baseUrl?: string;
|
|
859
|
+
port?: number;
|
|
860
|
+
/** Active transport instance; null before initialize() / after unload(). */
|
|
861
|
+
private activeTransport;
|
|
862
|
+
private validator;
|
|
863
|
+
private backendInstance;
|
|
864
|
+
isReady: boolean;
|
|
865
|
+
/** Tracks how many consecutive recovery attempts have been made. */
|
|
866
|
+
private recoveryAttempts;
|
|
867
|
+
/** The resolved backend type (after "auto" resolution). */
|
|
868
|
+
private resolvedBackend;
|
|
869
|
+
/** OffloadConfig as supplied by the consumer (or undefined). */
|
|
870
|
+
offload?: OffloadConfig;
|
|
871
|
+
/**
|
|
872
|
+
* v3.2 — set true when the pre-init capability gate decides this
|
|
873
|
+
* device is below `OffloadConfig.minLocalCapability`. In this mode
|
|
874
|
+
* `initialize()` skips backend init entirely (no model download /
|
|
875
|
+
* load) and only brings up discovery + pairing. Every request is
|
|
876
|
+
* expected to be forwarded to a paired peer; without one, requests
|
|
877
|
+
* 503.
|
|
878
|
+
*/
|
|
879
|
+
offloadOnlyMode: boolean;
|
|
880
|
+
/** Capability cache (persistent storage of probe scores). */
|
|
881
|
+
private capabilityCache?;
|
|
882
|
+
/** Phase 3 — built when offload.enabled; mounted on the HTTP transport via the handler context. */
|
|
883
|
+
private dvaiRoutes?;
|
|
884
|
+
/** Used by the dvai/health endpoint to report uptime. */
|
|
885
|
+
private startedAt;
|
|
886
|
+
/** Discovery layer — composite of LAN mDNS + static + custom. */
|
|
887
|
+
private discovery?;
|
|
888
|
+
/** Pairing policy (LAN-handshake auth + persistent store). */
|
|
889
|
+
private pairingPolicy?;
|
|
890
|
+
/** Stable per-install device ID (cached after first call). */
|
|
891
|
+
private deviceId?;
|
|
892
|
+
constructor(config?: DVAIConfig);
|
|
893
|
+
/**
|
|
894
|
+
* Returns the active backend type (resolved from "auto" if applicable).
|
|
895
|
+
*/
|
|
896
|
+
getActiveBackend(): "webllm" | "transformers" | "native";
|
|
897
|
+
/** Returns the resolved transport kind (after "auto" resolution). */
|
|
898
|
+
getActiveTransport(): "msw" | "http" | "none" | "capacitor";
|
|
899
|
+
/** Returns the base URL a host app hands to an OpenAI SDK. */
|
|
900
|
+
getBaseUrl(): string | undefined;
|
|
901
|
+
/** Returns the HTTP port bound (http transport only). */
|
|
902
|
+
getPort(): number | undefined;
|
|
903
|
+
/**
|
|
904
|
+
* Resolves the "auto" backend to a concrete type based on environment.
|
|
905
|
+
*
|
|
906
|
+
* On Capacitor, the native runtime is selected via `transport: "capacitor"`
|
|
907
|
+
* (which delegates to a native HTTP server in the Capacitor plugin), not
|
|
908
|
+
* via the backend. The backend stays in the webview as a thin client.
|
|
909
|
+
*/
|
|
910
|
+
private resolveBackend;
|
|
911
|
+
/**
|
|
912
|
+
* Initializes the selected backend engine and starts the resolved
|
|
913
|
+
* transport (MSW in browsers, HTTP server in Node, or none).
|
|
914
|
+
* @param onProgress - Callback for model download progress
|
|
915
|
+
*/
|
|
916
|
+
initialize(onProgress?: (info: any) => void): Promise<boolean>;
|
|
917
|
+
/**
|
|
918
|
+
* Phase 3 — bring up the capability cache, discovery layer, and
|
|
919
|
+
* pairing policy on top of an already-running DVAI instance.
|
|
920
|
+
* Called from initialize() when `offload.enabled` is true.
|
|
921
|
+
*/
|
|
922
|
+
private initializeOffload;
|
|
923
|
+
/** Phase 3 — release offload state (LAN advertiser, discovery sockets, etc). */
|
|
924
|
+
private shutdownOffload;
|
|
925
|
+
/**
|
|
926
|
+
* Builds a HandlerContext consumed by the transport-agnostic handlers.
|
|
927
|
+
* `backend` is exposed via a getter so that when recovery replaces
|
|
928
|
+
* `this.backendInstance` mid-request, the handler's subsequent reads of
|
|
929
|
+
* `ctx.backend` see the new instance (critical for the reactive-recovery
|
|
930
|
+
* retry path in handleChatCompletion).
|
|
931
|
+
*/
|
|
932
|
+
private getHandlerContext;
|
|
933
|
+
/**
|
|
934
|
+
* Attempts to recover from a fatal WebLLM error by unloading and reloading the backend.
|
|
935
|
+
*/
|
|
936
|
+
private attemptRecovery;
|
|
937
|
+
/**
|
|
938
|
+
* Lazy-imports and initializes the selected backend.
|
|
939
|
+
*/
|
|
940
|
+
private initializeBackend;
|
|
941
|
+
/**
|
|
942
|
+
* Gets the underlying engine instance directly.
|
|
943
|
+
* - For WebLLM: returns the MLCEngine
|
|
944
|
+
* - For Transformers.js: returns the pipeline
|
|
945
|
+
*/
|
|
946
|
+
getEngine(): any;
|
|
947
|
+
/**
|
|
948
|
+
* Perform a direct chat completion (bypasses MSW, calls backend directly).
|
|
949
|
+
* Useful for programmatic usage without going through the fetch mock.
|
|
950
|
+
*/
|
|
951
|
+
chatCompletion(requestBody: any): Promise<any>;
|
|
952
|
+
/**
|
|
953
|
+
* Generate embeddings for one or more text inputs.
|
|
954
|
+
*
|
|
955
|
+
* Supported when backend is "transformers" with
|
|
956
|
+
* pipelineTask: "feature-extraction".
|
|
957
|
+
*
|
|
958
|
+
* Throws when called on the WebLLM backend.
|
|
959
|
+
*
|
|
960
|
+
* @param inputs - A single string or array of strings to embed
|
|
961
|
+
* @returns An array of embedding vectors (one per input)
|
|
962
|
+
*/
|
|
963
|
+
embedding(inputs: string | string[]): Promise<number[][]>;
|
|
964
|
+
/**
|
|
965
|
+
* Run the pipeline directly (Transformers.js only).
|
|
966
|
+
* Use this for non-text tasks like text-to-image, ASR, text-to-speech, etc.
|
|
967
|
+
* @param inputs - Input data appropriate for the pipeline task
|
|
968
|
+
* @param options - Pipeline-specific options
|
|
969
|
+
*/
|
|
970
|
+
runPipeline(inputs: any, options?: Record<string, any>): Promise<any>;
|
|
971
|
+
/**
|
|
972
|
+
* Unloads the AI engine and stops the active transport to free up resources.
|
|
973
|
+
*/
|
|
974
|
+
unload(): Promise<void>;
|
|
975
|
+
/**
|
|
976
|
+
* v3.2 — pre-init hardware assessment.
|
|
977
|
+
*
|
|
978
|
+
* Returns a JSON-serializable description of how this device
|
|
979
|
+
* would handle local inference, BEFORE any model download/load.
|
|
980
|
+
*
|
|
981
|
+
* Consumers should call this before `initialize()` if they want
|
|
982
|
+
* to refuse to start on too-weak devices. The SDK itself never
|
|
983
|
+
* shows UI — it's the consumer app's job to decide what (if
|
|
984
|
+
* anything) to surface based on the result.
|
|
985
|
+
*
|
|
986
|
+
* Result `mode` values:
|
|
987
|
+
* - `ok` → device can comfortably run the model
|
|
988
|
+
* locally; initialize() will proceed normally.
|
|
989
|
+
* - `offload-only` → device can run but slowly (below
|
|
990
|
+
* `minLocalCapability`); initialize() will
|
|
991
|
+
* skip the model load and route every
|
|
992
|
+
* request to a paired peer.
|
|
993
|
+
* - `too-weak` → device is below the hardware floor (3
|
|
994
|
+
* tok/s by default); initialize() will
|
|
995
|
+
* ALSO skip the model load — the consumer
|
|
996
|
+
* should typically bail rather than even
|
|
997
|
+
* calling initialize().
|
|
998
|
+
*
|
|
999
|
+
* Pass `hardwareMinimum` / `minLocalCapability` to override the
|
|
1000
|
+
* defaults (matches `OffloadConfig`).
|
|
1001
|
+
*
|
|
1002
|
+
* @returns a serializable assessment (safe to JSON.stringify and
|
|
1003
|
+
* ship over a Pigeon / Capacitor channel).
|
|
1004
|
+
*/
|
|
1005
|
+
assessHardware(opts?: {
|
|
1006
|
+
hardwareMinimum?: number;
|
|
1007
|
+
minLocalCapability?: number;
|
|
1008
|
+
}): Promise<{
|
|
1009
|
+
mode: "ok" | "offload-only" | "too-weak";
|
|
1010
|
+
tokPerSec: number;
|
|
1011
|
+
reason: string;
|
|
1012
|
+
hints: DeviceCapabilityHints;
|
|
1013
|
+
}>;
|
|
1014
|
+
/**
|
|
1015
|
+
* Run a cold-run capability probe against the active backend +
|
|
1016
|
+
* model. Persists the result for future getCapability() calls.
|
|
1017
|
+
* Requires offload.enabled.
|
|
1018
|
+
*/
|
|
1019
|
+
probeCapability(): Promise<CapabilityScore | undefined>;
|
|
1020
|
+
/**
|
|
1021
|
+
* Get the cached capability score for a model on this device, or
|
|
1022
|
+
* compute a heuristic estimate if no probe has run yet.
|
|
1023
|
+
*/
|
|
1024
|
+
getCapability(modelId?: string): Promise<CapabilityScore | undefined>;
|
|
1025
|
+
/** Snapshot of currently-known peers via the discovery layer. */
|
|
1026
|
+
getPeers(): Peer[];
|
|
1027
|
+
}
|
|
1028
|
+
declare const dvai: DVAI;
|
|
1029
|
+
|
|
1030
|
+
export { type BackendType, type CreatePipelineFn, DEFAULT_LICENSE_FILENAME, DVAI, type DVAIConfig, DVAI_PUBLIC_KEYS, type DeviceType, type DvaiLicensePayload, type DvaiPlatform, type DvaiPublicKeyJwk, type LicenseDiscoveryOptions, LicenseRequiredError, type LicenseStatus, type LicenseTier, LicenseValidator, type LicenseValidatorOptions, PLACEHOLDER_KID, type PipelineCallable, type PipelineTask, type TransformersProgressInfo, chatToLegacyCompletion, composeSignedMessage, detectWebGPU, dvai, generateNonce, generatePairingKey, isPaidTier, legacyCompletionStreamAdapter, signHmac, verifyHmac };
|