npm - localm-web - Versions diffs - 0.2.0 → 0.4.0 - Mend

localm-web 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/CHANGELOG.md +88 -0
package/README.md +15 -3
package/dist/assets/{inference.worker-CwvQtobb.js → inference.worker-DZbXKJZY.js} +49 -5
package/dist/assets/inference.worker-DZbXKJZY.js.map +1 -0
package/dist/index.d.ts +362 -5
package/dist/index.js +381 -7
package/dist/index.js.map +1 -1
package/package.json +8 -1
package/dist/assets/inference.worker-CwvQtobb.js.map +0 -1

package/dist/index.d.ts CHANGED Viewed

@@ -6,6 +6,30 @@
  * @packageDocumentation
  */
+/**
+ * JSON Schema helpers for structured output.
+ *
+ * The SDK delegates the actual constrained decoding to the underlying
+ * runtime (xgrammar inside WebLLM today, ORT-Web equivalent later). These
+ * helpers normalize user input — turning a JS object schema into the
+ * JSON-string shape that WebLLM's `response_format.schema` expects — and
+ * parse the runtime's textual output back into typed JSON.
+ */
+/**
+ * Minimal structural sanity check for a JSON Schema.
+ *
+ * Does not validate the schema against the JSON Schema meta-schema. The goal
+ * is to fail fast on obvious mistakes (passing a string, an array, `null`)
+ * before handing the value off to the runtime, where errors surface much
+ * later and with much worse messages.
+ *
+ * @param schema - Candidate JSON Schema object.
+ * @throws StructuredOutputError when `schema` is not a plain object or has
+ *   no recognizable schema shape (`type`, `$ref`, `oneOf`, `anyOf`, `allOf`,
+ *   `enum`).
+ */
+export declare function assertJsonSchema(schema: unknown): asserts schema is object;
 /** Thrown when no usable backend is available on the current platform. */
 export declare class BackendNotAvailableError extends LocalmWebError {
 }
@@ -118,6 +142,18 @@ export declare class ChatReply {
     tokensGenerated: number,
     /** Why the generation loop stopped. */
     finishReason: FinishReason);
+    /**
+     * Parse {@link ChatReply.text} as JSON.
+     *
+     * Intended for replies generated with `json: true` or `jsonSchema`.
+     * The result is cast to `T` without runtime validation; pair with Zod /
+     * Ajv on the call site if you need to verify the schema.
+     *
+     * @typeParam T - Expected parsed shape.
+     * @returns The parsed JSON value.
+     * @throws StructuredOutputError if the text is not valid JSON.
+     */
+    json<T = unknown>(): T;
 }
 /**
@@ -208,6 +244,17 @@ export declare class CompletionResult {
     tokensGenerated: number,
     /** Why the generation loop stopped. */
     finishReason: FinishReason);
+    /**
+     * Parse {@link CompletionResult.text} as JSON.
+     *
+     * Intended for completions generated with `json: true` or `jsonSchema`.
+     * The result is cast to `T` without runtime validation.
+     *
+     * @typeParam T - Expected parsed shape.
+     * @returns The parsed JSON value.
+     * @throws StructuredOutputError if the text is not valid JSON.
+     */
+    json<T = unknown>(): T;
 }
 /**
@@ -224,6 +271,116 @@ export declare class CompletionResult {
  */
 export declare function createInferenceWorker(): WorkerLike;
+/**
+ * Curated registry of supported embedding models for v0.3.
+ *
+ * Each entry maps a friendly id to the underlying transformers.js model id.
+ */
+export declare const EMBEDDING_PRESETS: Readonly<Record<string, EmbeddingPreset>>;
+/** Curated metadata for a supported embedding model. */
+export declare interface EmbeddingPreset {
+    /** Friendly identifier (e.g. `"bge-small-en-v1.5"`). */
+    id: string;
+    /** Family name (e.g. `"BGE"`). */
+    family: string;
+    /** Embedding dimension. */
+    dimension: number;
+    /** Maximum input length in tokens. */
+    maxTokens: number;
+    /** Identifier passed to `@huggingface/transformers`. */
+    transformersId: string;
+    /** Approximate quantization scheme (e.g. `"fp32"`, `"int8"`). */
+    quantization: string;
+    /** Short human description. */
+    description: string;
+}
+/**
+ * Sentence embedding task backed by `@huggingface/transformers`.
+ *
+ * Use {@link Embeddings.create} to construct an instance — the constructor is
+ * private. The default backend lazy-loads the transformers.js runtime; tests
+ * inject a {@link EmbedPipeline} mock instead.
+ *
+ * @example
+ * ```ts
+ * const emb = await Embeddings.create("bge-small-en-v1.5");
+ * const vectors = await emb.embed(["hello world", "another sentence"]);
+ * console.log(vectors[0].length); // 384
+ * ```
+ */
+export declare class Embeddings {
+    private readonly pipeline;
+    /** Resolved metadata for the loaded model. */
+    readonly preset: EmbeddingPreset;
+    private constructor();
+    /**
+     * Create and load an `Embeddings` task for the given model.
+     *
+     * @param modelId - Friendly id from the embedding registry.
+     * @param options - Optional creation options.
+     * @throws UnknownModelError if `modelId` is not in the registry.
+     * @throws ModelLoadError if the underlying pipeline fails to load.
+     */
+    static create(modelId: string, options?: EmbeddingsCreateOptions): Promise<Embeddings>;
+    /**
+     * Encode an array of strings into dense vectors.
+     *
+     * Returns one vector per input, in the same order. Empty input array
+     * returns an empty array (no error).
+     *
+     * @param texts - Input strings.
+     * @param options - Pooling + normalization. Defaults: `pooling: "mean"`, `normalize: true`.
+     */
+    embed(texts: string[], options?: EmbedOptions): Promise<number[][]>;
+    /**
+     * Convenience: encode a single string and return its vector.
+     *
+     * @param text - Input string.
+     * @param options - Forwarded to {@link Embeddings.embed}.
+     */
+    embedSingle(text: string, options?: EmbedOptions): Promise<number[]>;
+    /** Embedding dimension exposed by the loaded model. */
+    get dimension(): number;
+    /** Release pipeline resources. Safe to call multiple times. */
+    unload(): Promise<void>;
+}
+/** Options accepted by {@link Embeddings.create}. */
+export declare interface EmbeddingsCreateOptions {
+    /** Optional callback for model load progress updates. */
+    onProgress?: ProgressCallback;
+    /** Override the embedding pipeline. Intended for testing. */
+    pipeline?: EmbedPipeline;
+}
+/** Options accepted by {@link Embeddings.embed}. */
+export declare interface EmbedOptions {
+    /** L2-normalize each vector. Recommended for cosine similarity downstream. Default `true`. */
+    normalize?: boolean;
+    /** Pooling strategy. BGE-style models use `"cls"`. Most sentence-transformers use `"mean"`. Default `"mean"`. */
+    pooling?: "mean" | "cls";
+}
+/**
+ * Minimal pipeline contract that {@link Embeddings} depends on.
+ *
+ * The default implementation wraps `@huggingface/transformers`. Tests inject
+ * a fake satisfying the same shape — they never load the real runtime.
+ */
+export declare interface EmbedPipeline {
+    /**
+     * Run the encoder on a batch of inputs and return raw vectors.
+     *
+     * @param texts - Input strings.
+     * @param options - Pooling + normalization passed to the underlying pipeline.
+     */
+    embed(texts: string[], options: Required<EmbedOptions>): Promise<number[][]>;
+    /** Release pipeline resources. */
+    unload?(): Promise<void>;
+}
 /**
  * Runtime-agnostic inference contract.
  *
@@ -313,15 +470,36 @@ export declare interface GenerationOptions {
     /** Cancellation signal. When triggered, the engine stops generation. */
     signal?: AbortSignal;
     /**
-     * JSON Schema for structured output. The engine constrains decoding to
-     * produce a string parseable as JSON matching the schema. Planned for v0.4.
+     * Force the engine to emit a string parseable as JSON.
+     *
+     * When `true` (and `jsonSchema` is not also set), the engine maps to
+     * WebLLM's `response_format: { type: "json_object" }` — the model is free
+     * to choose any JSON shape, but the output is guaranteed to parse.
+     *
+     * Ignored when {@link GenerationOptions.jsonSchema} is set.
+     */
+    json?: boolean;
+    /**
+     * JSON Schema for structured output. When set, the engine constrains
+     * decoding (xgrammar inside WebLLM) so the output parses as JSON matching
+     * the schema. Takes priority over {@link GenerationOptions.json}.
+     *
+     * The schema is passed verbatim to the runtime — the SDK does not validate
+     * the parsed value against it. Use Ajv/Zod on the consumer side if you
+     * need runtime validation in addition to constrained decoding.
      */
     jsonSchema?: object;
 }
+/** Return the list of supported embedding model ids. */
+export declare function listSupportedEmbeddingModels(): string[];
 /** Return the list of supported friendly model ids. */
 export declare function listSupportedModels(): string[];
+/** Return the list of supported reranker model ids. */
+export declare function listSupportedRerankerModels(): string[];
 /**
  * Base class shared by all language-model tasks (`Chat` for v0.1; `Completion`,
  * `Embeddings` and `Reranker` planned for later versions).
@@ -370,9 +548,10 @@ export declare interface LMTaskCreateOptions {
     engine?: Engine;
     /**
      * Run inference inside a Web Worker, isolating the UI thread from
-     * tokenization and generation. Defaults to `false` in v0.2 (opt-in) and
-     * will flip to `true` in v0.3 once the Cache API / OPFS integration
-     * (also v0.2) has been validated against worker-thread storage access.
+     * tokenization and generation. **Default `true` from v0.3** — the
+     * `WorkerEngine` is the recommended path. Pass `false` to keep
+     * inference on the main thread (useful for environments without
+     * `Worker` support or when debugging the runtime directly).
      *
      * Ignored when {@link engine} is provided.
      */
@@ -560,6 +739,17 @@ export declare interface ModelPreset {
     description: string;
 }
+/**
+ * Parse the textual output of a structured-decoding generation as JSON.
+ *
+ * @typeParam T - The expected parsed shape. The function does not validate
+ *   the parsed value against `T`; that is the caller's responsibility.
+ * @param text - Raw text returned by the engine.
+ * @returns The parsed JSON value cast to `T`.
+ * @throws StructuredOutputError when the text is not valid JSON.
+ */
+export declare function parseStructuredOutput<T = unknown>(text: string): T;
 /** Callback signature for model load progress. */
 export declare type ProgressCallback = (progress: ModelLoadProgress) => void;
@@ -567,12 +757,147 @@ export declare type ProgressCallback = (progress: ModelLoadProgress) => void;
 export declare class QuotaExceededError extends LocalmWebError {
 }
+/** A document paired with its score, for {@link Reranker.rank}. */
+export declare interface RankedDocument {
+    /** The document text. */
+    text: string;
+    /** Score from the cross-encoder. */
+    score: number;
+    /** Original index of the document in the input array. */
+    index: number;
+}
+/**
+ * Cross-encoder reranking task backed by `@huggingface/transformers`.
+ *
+ * Use {@link Reranker.create} to construct an instance — the constructor is
+ * private. Useful as a second-stage step in a retrieve-then-rerank pipeline:
+ * pull top-K candidates with a fast embedding similarity, then rerank with
+ * a cross-encoder for higher precision.
+ *
+ * @example
+ * ```ts
+ * const rerank = await Reranker.create("bge-reranker-base");
+ * const scores = await rerank.score("what is webgpu?", [
+ *   "WebGPU is a modern graphics API",
+ *   "Bananas grow on trees",
+ * ]);
+ * // scores[0] >> scores[1]
+ * ```
+ *
+ * @example Ranked output sorted by score
+ * ```ts
+ * const ranked = await rerank.rank("what is webgpu?", docs);
+ * for (const r of ranked) console.log(r.score, r.text);
+ * ```
+ */
+export declare class Reranker {
+    private readonly pipeline;
+    /** Resolved metadata for the loaded model. */
+    readonly preset: RerankerPreset;
+    private constructor();
+    /**
+     * Create and load a `Reranker` task for the given model.
+     *
+     * @param modelId - Friendly id from the reranker registry.
+     * @param options - Optional creation options.
+     * @throws UnknownModelError if `modelId` is not in the registry.
+     * @throws ModelLoadError if the underlying pipeline fails to load.
+     */
+    static create(modelId: string, options?: RerankerCreateOptions): Promise<Reranker>;
+    /**
+     * Score each document against the query. Returns one score per doc, in
+     * the same order. Empty `docs` returns `[]` (no error).
+     *
+     * @param query - Query string.
+     * @param docs - Documents to score.
+     * @param options - `sigmoid: true` maps logits into `[0, 1]`.
+     */
+    score(query: string, docs: string[], options?: RerankOptions): Promise<number[]>;
+    /**
+     * Score and sort documents by score in descending order. Returns a list of
+     * {@link RankedDocument}s carrying the original index.
+     *
+     * @param query - Query string.
+     * @param docs - Documents to rank.
+     * @param options - Forwarded to {@link Reranker.score}.
+     */
+    rank(query: string, docs: string[], options?: RerankOptions): Promise<RankedDocument[]>;
+    /** Release pipeline resources. Safe to call multiple times. */
+    unload(): Promise<void>;
+}
+/**
+ * Curated registry of supported reranker models for v0.3.
+ */
+export declare const RERANKER_PRESETS: Readonly<Record<string, RerankerPreset>>;
+/** Options accepted by {@link Reranker.create}. */
+export declare interface RerankerCreateOptions {
+    /** Optional callback for model load progress updates. */
+    onProgress?: ProgressCallback;
+    /** Override the rerank pipeline. Intended for testing. */
+    pipeline?: RerankPipeline;
+}
+/** Curated metadata for a supported reranker (cross-encoder) model. */
+export declare interface RerankerPreset {
+    /** Friendly identifier (e.g. `"bge-reranker-base"`). */
+    id: string;
+    /** Family name (e.g. `"BGE Reranker"`). */
+    family: string;
+    /** Maximum input length in tokens (combined query + document). */
+    maxTokens: number;
+    /** Identifier passed to `@huggingface/transformers`. */
+    transformersId: string;
+    /** Approximate quantization (e.g. `"fp32"`). */
+    quantization: string;
+    /** Short human description. */
+    description: string;
+}
+/** Options accepted by {@link Reranker.score}. */
+export declare interface RerankOptions {
+    /**
+     * Apply sigmoid to logits to map scores into `[0, 1]`. Recommended when the
+     * downstream code uses scores as probabilities. Default `false` (raw logits).
+     */
+    sigmoid?: boolean;
+}
+/**
+ * Minimal pipeline contract that {@link Reranker} depends on.
+ *
+ * The default implementation wraps `@huggingface/transformers`. Tests inject
+ * a fake satisfying the same shape — they never load the real runtime.
+ */
+export declare interface RerankPipeline {
+    /**
+     * Score `(query, doc)` pairs. One score per doc, in the same order.
+     *
+     * @param query - Single query string.
+     * @param docs - Documents to score against the query.
+     */
+    score(query: string, docs: string[]): Promise<number[]>;
+    /** Release pipeline resources. */
+    unload?(): Promise<void>;
+}
 /** Internal payload returned by {@link LMTask.createEngine}. */
 declare interface ResolvedEngine {
     engine: Engine;
     preset: ModelPreset;
 }
+/**
+ * Resolve a friendly embedding model id to its full preset metadata.
+ *
+ * @param modelId - Friendly id (e.g. `"bge-small-en-v1.5"`).
+ * @returns The matching preset.
+ * @throws UnknownModelError if no preset matches.
+ */
+export declare function resolveEmbeddingPreset(modelId: string): EmbeddingPreset;
 /**
  * Resolve a friendly model id to its full preset metadata.
  *
@@ -582,6 +907,14 @@ declare interface ResolvedEngine {
  */
 export declare function resolveModelPreset(modelId: string): ModelPreset;
+/**
+ * Resolve a friendly reranker model id to its full preset metadata.
+ *
+ * @param modelId - Friendly id (e.g. `"bge-reranker-base"`).
+ * @throws UnknownModelError if no preset matches.
+ */
+export declare function resolveRerankerPreset(modelId: string): RerankerPreset;
 /**
  * Public type primitives for localm-web.
  */
@@ -596,6 +929,30 @@ export declare type Role = "system" | "user" | "assistant" | "tool";
  */
 declare type SerializableGenerationOptions = Omit<GenerationOptions, "signal">;
+/**
+ * Serialize a JSON Schema object for the WebLLM `response_format.schema`
+ * field.
+ *
+ * WebLLM expects the schema as a JSON-encoded string (xgrammar parses it
+ * server-side). Validates the shape via {@link assertJsonSchema} first.
+ *
+ * @param schema - JSON Schema object.
+ * @returns The schema serialized as a JSON string.
+ * @throws StructuredOutputError when `schema` is not a recognizable JSON
+ *   Schema shape.
+ */
+export declare function serializeJsonSchema(schema: unknown): string;
+/**
+ * Thrown when structured output (JSON mode or JSON Schema constrained
+ * decoding) fails to parse as valid JSON.
+ *
+ * Wraps the underlying `SyntaxError` from `JSON.parse` so consumers can
+ * distinguish SDK-issued failures from unrelated runtime exceptions.
+ */
+export declare class StructuredOutputError extends LocalmWebError {
+}
 /**
  * Wrap an async iterable so that each `TokenChunk` is also passed to a
  * caller-supplied side-effect callback before being yielded downstream.