npm - @voyantjs/catalog-rag - Versions diffs - 0.19.0 - Mend

@voyantjs/catalog-rag 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

package/README.md +48 -0
package/dist/embeddings/contract.d.ts +85 -0
package/dist/embeddings/contract.d.ts.map +1 -0
package/dist/embeddings/contract.js +42 -0
package/dist/embeddings/contract.test.d.ts +2 -0
package/dist/embeddings/contract.test.d.ts.map +1 -0
package/dist/embeddings/contract.test.js +30 -0
package/dist/embeddings/gemini.d.ts +110 -0
package/dist/embeddings/gemini.d.ts.map +1 -0
package/dist/embeddings/gemini.js +118 -0
package/dist/embeddings/gemini.test.d.ts +2 -0
package/dist/embeddings/gemini.test.d.ts.map +1 -0
package/dist/embeddings/gemini.test.js +126 -0
package/dist/embeddings/model-registry.d.ts +62 -0
package/dist/embeddings/model-registry.d.ts.map +1 -0
package/dist/embeddings/model-registry.js +78 -0
package/dist/embeddings/model-registry.test.d.ts +2 -0
package/dist/embeddings/model-registry.test.d.ts.map +1 -0
package/dist/embeddings/model-registry.test.js +81 -0
package/dist/embeddings/openai.d.ts +81 -0
package/dist/embeddings/openai.d.ts.map +1 -0
package/dist/embeddings/openai.js +123 -0
package/dist/embeddings/openai.test.d.ts +2 -0
package/dist/embeddings/openai.test.d.ts.map +1 -0
package/dist/embeddings/openai.test.js +157 -0
package/dist/index.d.ts +7 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +11 -0
package/dist/search/federate.d.ts +57 -0
package/dist/search/federate.d.ts.map +1 -0
package/dist/search/federate.js +103 -0
package/dist/search/federate.test.d.ts +2 -0
package/dist/search/federate.test.d.ts.map +1 -0
package/dist/search/federate.test.js +146 -0
package/dist/search/semantic.d.ts +58 -0
package/dist/search/semantic.d.ts.map +1 -0
package/dist/search/semantic.js +71 -0
package/dist/search/semantic.test.d.ts +2 -0
package/dist/search/semantic.test.d.ts.map +1 -0
package/dist/search/semantic.test.js +143 -0
package/package.json +75 -0

package/README.md ADDED Viewed

@@ -0,0 +1,48 @@
+# @voyantjs/catalog-rag
+Phase 2 of the catalog plane. Adds vector embeddings, AI-agent access patterns, and the MCP server scaffolding on top of the Phase 1 foundation in `@voyantjs/catalog`.
+See [`docs/architecture/catalog-rag-architecture.md`](../../docs/architecture/catalog-rag-architecture.md) for the full design.
+## Install
+```bash
+pnpm add @voyantjs/catalog-rag
+```
+## What's in the box
+- **`./embeddings/contract`** — `EmbeddingProvider` interface plus capability declarations (model id, dimensions, max tokens, max batch size, supported languages).
+- **`./embeddings/openai`** — Default `EmbeddingProvider` implementation backed by OpenAI's embeddings API. Uses native `fetch` (works in Cloudflare Workers + Node).
+- **`./embeddings/model-registry`** — Helpers for tracking embedding model identity per document, validating dimension compatibility at deployment startup, and supporting mixed-model migration windows.
+- **`./search/semantic`** — Search orchestration helpers: build a hybrid `SearchRequest` with `mode: "semantic" | "hybrid" | "keyword"`, attach a `query_embedding` if the caller brought one, and delegate to the underlying `IndexerAdapter`.
+- **`./search/federate`** — Cross-audience federated query helper for staff actors that need to search non-staff audience pools (architecture §7).
+## Phase relationship
+Phase 2 is **additive** on Phase 1. It does not modify the field-policy contract, the overlay store, the snapshot graph, or the source-adapter contract. The `IndexerAdapter` capability flags (`supportsVectorFields`, `supportsHybridSearch`, `vectorDimensions`, `supportsCrossAudienceFederation`) are already declared in Phase 1; Phase 2 deployments fill them in.
+## Architectural rules (enforced by code, not just convention)
+- **AI agents query the API, not the vector database directly.** Visibility filtering, overlay resolution, and audit all happen at the API layer. The vector DB is implementation detail.
+- **Per-audience embedding pools.** Vectors are strictly per-audience — no cross-audience denormalization on the vector side. Customer chatbots' nearest-neighbor search runs against vectors that only ever saw customer-visible content.
+- **Model versioning is explicit.** Each search-index document carries an `embedding_model_id`. Switching models is a deliberate `bulkReindex` migration, not silent.
+## Usage
+```typescript
+import { createOpenAIEmbeddingProvider } from "@voyantjs/catalog-rag/embeddings/openai"
+const embeddings = createOpenAIEmbeddingProvider({
+  apiKey: env.OPENAI_API_KEY,
+  model: "text-embedding-3-small", // 1536 dimensions, multilingual
+})
+// Generate embeddings for a batch of catalog texts
+const vectors = await embeddings.embed([
+  "Bali Wellness Retreat",
+  "Sunset Yacht Cruise",
+])
+```
+See `docs/architecture/catalog-rag-architecture.md` for the full design and integration patterns.

package/dist/embeddings/contract.d.ts ADDED Viewed

@@ -0,0 +1,85 @@
+/**
+ * EmbeddingProvider contract — engine-agnostic interface for generating
+ * vector embeddings from text.
+ *
+ * Voyant ships native OpenAI as the v1 default (see `./openai.ts`).
+ * Deployments swap in Voyage AI, local sentence-transformers, Cohere,
+ * or any other engine by satisfying this contract.
+ *
+ * See `docs/architecture/catalog-rag-architecture.md` §6 for the design.
+ */
+/**
+ * Capability metadata declared by the provider. Used by the catalog plane
+ * at deployment startup to validate that the configured embedding model is
+ * compatible with the configured `IndexerAdapter`'s `vectorDimensions`.
+ */
+export interface EmbeddingProviderCapabilities {
+    /**
+     * Stable identifier for this provider+model combo. Conventional shape:
+     * `<vendor>/<model-name>/<version>` — e.g. `openai/text-embedding-3-small/v1`.
+     * Stamped onto every search-index document for migration safety.
+     */
+    modelId: string;
+    /**
+     * Fixed dimensionality of vectors produced by `embed()`. Must match the
+     * `IndexerAdapter`'s declared `vectorDimensions` or the catalog plane
+     * fails fast at deployment.
+     */
+    dimensions: number;
+    /**
+     * Maximum input token length per text. Texts longer than this should be
+     * truncated by the caller before passing in (the provider rejects
+     * oversize inputs rather than truncating silently).
+     */
+    maxTokensPerInput: number;
+    /**
+     * Maximum batch size for a single `embed()` call. Larger batches must be
+     * chunked by the caller. Common values: OpenAI 2048, Voyage 128.
+     */
+    maxBatchSize: number;
+    /**
+     * ISO language codes the model handles well. `null` means multilingual
+     * or language-agnostic (e.g. OpenAI's `text-embedding-3-small`).
+     */
+    supportedLanguages?: string[] | null;
+}
+/**
+ * The EmbeddingProvider contract. Implementations come from anywhere —
+ * vendor SDKs, local models, custom wrappers. No implementer is privileged.
+ *
+ * Synchronous-shaped at the type level; concrete impls return promises.
+ */
+export interface EmbeddingProvider {
+    readonly capabilities: EmbeddingProviderCapabilities;
+    /**
+     * Generate one embedding per input text. Returns vectors in the same
+     * order as the input. The result array has `texts.length` entries,
+     * each with `capabilities.dimensions` floats.
+     *
+     * Throws on:
+     *   - Provider/transport errors (rate limits, auth, network)
+     *   - Inputs exceeding `capabilities.maxBatchSize` or `maxTokensPerInput`
+     *
+     * Implementations MUST NOT silently truncate or drop inputs.
+     */
+    embed(texts: string[]): Promise<number[][]>;
+}
+/**
+ * Standard error code adapters should throw when a constraint is violated.
+ * The catalog plane translates these into structured error responses.
+ */
+export declare const EMBEDDING_BATCH_TOO_LARGE: "EMBEDDING_BATCH_TOO_LARGE";
+export declare const EMBEDDING_INPUT_TOO_LONG: "EMBEDDING_INPUT_TOO_LONG";
+export declare const EMBEDDING_PROVIDER_ERROR: "EMBEDDING_PROVIDER_ERROR";
+export declare class EmbeddingProviderError extends Error {
+    readonly code: typeof EMBEDDING_BATCH_TOO_LARGE | typeof EMBEDDING_INPUT_TOO_LONG | typeof EMBEDDING_PROVIDER_ERROR;
+    readonly cause?: unknown | undefined;
+    constructor(code: typeof EMBEDDING_BATCH_TOO_LARGE | typeof EMBEDDING_INPUT_TOO_LONG | typeof EMBEDDING_PROVIDER_ERROR, message: string, cause?: unknown | undefined);
+}
+/**
+ * Thin helper for chunking a large input array into batches the provider
+ * can handle. Provider implementations call this internally; callers can
+ * also use it directly for finer-grained control.
+ */
+export declare function chunkForBatch<T>(items: T[], maxBatchSize: number): T[][];
+//# sourceMappingURL=contract.d.ts.map

package/dist/embeddings/contract.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"contract.d.ts","sourceRoot":"","sources":["../../src/embeddings/contract.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH;;;;GAIG;AACH,MAAM,WAAW,6BAA6B;IAC5C;;;;OAIG;IACH,OAAO,EAAE,MAAM,CAAA;IAEf;;;;OAIG;IACH,UAAU,EAAE,MAAM,CAAA;IAElB;;;;OAIG;IACH,iBAAiB,EAAE,MAAM,CAAA;IAEzB;;;OAGG;IACH,YAAY,EAAE,MAAM,CAAA;IAEpB;;;OAGG;IACH,kBAAkB,CAAC,EAAE,MAAM,EAAE,GAAG,IAAI,CAAA;CACrC;AAED;;;;;GAKG;AACH,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,YAAY,EAAE,6BAA6B,CAAA;IAEpD;;;;;;;;;;OAUG;IACH,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAA;CAC5C;AAED;;;GAGG;AACH,eAAO,MAAM,yBAAyB,EAAG,2BAAoC,CAAA;AAC7E,eAAO,MAAM,wBAAwB,EAAG,0BAAmC,CAAA;AAC3E,eAAO,MAAM,wBAAwB,EAAG,0BAAmC,CAAA;AAE3E,qBAAa,sBAAuB,SAAQ,KAAK;aAE7B,IAAI,EAChB,OAAO,yBAAyB,GAChC,OAAO,wBAAwB,GAC/B,OAAO,wBAAwB;aAEnB,KAAK,CAAC,EAAE,OAAO;gBALf,IAAI,EAChB,OAAO,yBAAyB,GAChC,OAAO,wBAAwB,GAC/B,OAAO,wBAAwB,EACnC,OAAO,EAAE,MAAM,EACC,KAAK,CAAC,EAAE,OAAO,YAAA;CAKlC;AAED;;;;GAIG;AACH,wBAAgB,aAAa,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,EAAE,YAAY,EAAE,MAAM,GAAG,CAAC,EAAE,EAAE,CASxE"}

package/dist/embeddings/contract.js ADDED Viewed

@@ -0,0 +1,42 @@
+/**
+ * EmbeddingProvider contract — engine-agnostic interface for generating
+ * vector embeddings from text.
+ *
+ * Voyant ships native OpenAI as the v1 default (see `./openai.ts`).
+ * Deployments swap in Voyage AI, local sentence-transformers, Cohere,
+ * or any other engine by satisfying this contract.
+ *
+ * See `docs/architecture/catalog-rag-architecture.md` §6 for the design.
+ */
+/**
+ * Standard error code adapters should throw when a constraint is violated.
+ * The catalog plane translates these into structured error responses.
+ */
+export const EMBEDDING_BATCH_TOO_LARGE = "EMBEDDING_BATCH_TOO_LARGE";
+export const EMBEDDING_INPUT_TOO_LONG = "EMBEDDING_INPUT_TOO_LONG";
+export const EMBEDDING_PROVIDER_ERROR = "EMBEDDING_PROVIDER_ERROR";
+export class EmbeddingProviderError extends Error {
+    code;
+    cause;
+    constructor(code, message, cause) {
+        super(message);
+        this.code = code;
+        this.cause = cause;
+        this.name = "EmbeddingProviderError";
+    }
+}
+/**
+ * Thin helper for chunking a large input array into batches the provider
+ * can handle. Provider implementations call this internally; callers can
+ * also use it directly for finer-grained control.
+ */
+export function chunkForBatch(items, maxBatchSize) {
+    if (maxBatchSize <= 0) {
+        throw new Error("maxBatchSize must be positive");
+    }
+    const batches = [];
+    for (let i = 0; i < items.length; i += maxBatchSize) {
+        batches.push(items.slice(i, i + maxBatchSize));
+    }
+    return batches;
+}

package/dist/embeddings/contract.test.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ export {};
2	+ //# sourceMappingURL=contract.test.d.ts.map

package/dist/embeddings/contract.test.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"contract.test.d.ts","sourceRoot":"","sources":["../../src/embeddings/contract.test.ts"],"names":[],"mappings":""}

package/dist/embeddings/contract.test.js ADDED Viewed

@@ -0,0 +1,30 @@
+import { describe, expect, it } from "vitest";
+import { chunkForBatch, EMBEDDING_BATCH_TOO_LARGE, EmbeddingProviderError } from "./contract.js";
+describe("chunkForBatch", () => {
+    it("returns a single batch when items fit in maxBatchSize", () => {
+        expect(chunkForBatch([1, 2, 3], 5)).toEqual([[1, 2, 3]]);
+    });
+    it("splits into multiple batches when items exceed maxBatchSize", () => {
+        expect(chunkForBatch([1, 2, 3, 4, 5], 2)).toEqual([[1, 2], [3, 4], [5]]);
+    });
+    it("returns an empty array for an empty input", () => {
+        expect(chunkForBatch([], 10)).toEqual([]);
+    });
+    it("throws on non-positive maxBatchSize", () => {
+        expect(() => chunkForBatch([1], 0)).toThrow(/positive/);
+        expect(() => chunkForBatch([1], -1)).toThrow(/positive/);
+    });
+});
+describe("EmbeddingProviderError", () => {
+    it("carries the standard error code", () => {
+        const err = new EmbeddingProviderError(EMBEDDING_BATCH_TOO_LARGE, "too big");
+        expect(err.code).toBe(EMBEDDING_BATCH_TOO_LARGE);
+        expect(err.message).toBe("too big");
+        expect(err.name).toBe("EmbeddingProviderError");
+    });
+    it("optionally retains the underlying cause", () => {
+        const cause = new Error("original");
+        const err = new EmbeddingProviderError(EMBEDDING_BATCH_TOO_LARGE, "wrapped", cause);
+        expect(err.cause).toBe(cause);
+    });
+});

package/dist/embeddings/gemini.d.ts ADDED Viewed

@@ -0,0 +1,110 @@
+/**
+ * EmbeddingProvider implementation backed by Google's Gemini embeddings API.
+ *
+ * Uses native `fetch` so it works in Cloudflare Workers + Node + browsers
+ * without an SDK dependency. Templates pass in the API key (and optionally
+ * a custom `baseUrl` for proxies).
+ *
+ * Models supported by default:
+ *   - `gemini-embedding-001` — 3072d, multilingual, current recommendation.
+ *     Supports Matryoshka representation learning (MRL) — request a smaller
+ *     output dimension via `outputDimensionality` to reduce storage cost.
+ *   - `text-embedding-004` — 768d, multilingual, legacy stable.
+ *
+ * See `docs/architecture/catalog-rag-architecture.md` §6 for the design.
+ */
+import { chunkForBatch, type EmbeddingProvider } from "./contract.js";
+/**
+ * Known Gemini embedding models. `dimensions` is the *native* size; when
+ * `outputDimensionality` is set in the provider options, the effective
+ * vector size is whatever the caller requested (Gemini truncates server-side
+ * via MRL on `gemini-embedding-001`).
+ */
+declare const GEMINI_MODELS: {
+    readonly "gemini-embedding-001": {
+        readonly dimensions: 3072;
+        readonly maxTokensPerInput: 2048;
+        readonly maxBatchSize: 100;
+        readonly multilingual: true;
+        readonly supportsOutputDimensionality: true;
+    };
+    readonly "text-embedding-004": {
+        readonly dimensions: 768;
+        readonly maxTokensPerInput: 2048;
+        readonly maxBatchSize: 100;
+        readonly multilingual: true;
+        readonly supportsOutputDimensionality: false;
+    };
+};
+export type GeminiEmbeddingModel = keyof typeof GEMINI_MODELS;
+/**
+ * Tasks Gemini optimizes embeddings for. `RETRIEVAL_DOCUMENT` is the right
+ * default for indexed catalog docs; switch to `RETRIEVAL_QUERY` when
+ * embedding a search query at read time. The provider keeps a single task
+ * type per instance — wire two providers if you index and query separately.
+ */
+export type GeminiTaskType = "RETRIEVAL_DOCUMENT" | "RETRIEVAL_QUERY" | "SEMANTIC_SIMILARITY" | "CLASSIFICATION" | "CLUSTERING" | "QUESTION_ANSWERING" | "FACT_VERIFICATION" | "CODE_RETRIEVAL_QUERY";
+export interface GeminiEmbeddingProviderOptions {
+    /**
+     * API key to authenticate with. In `auth: "google"` mode, this is the
+     * Google AI Studio key. In `auth: "bearer"` mode (e.g. when routing
+     * through the Voyant Cloud AI gateway), this is the gateway's bearer
+     * token.
+     */
+    apiKey: string;
+    /**
+     * How to attach the API key to outbound requests.
+     *  - `"google"` (default) — `x-goog-api-key: <apiKey>`. Use when
+     *    talking directly to `generativelanguage.googleapis.com`.
+     *  - `"bearer"` — `Authorization: Bearer <apiKey>`. Use when routing
+     *    through the Voyant Cloud `/ai/v1/gemini` gateway, which forwards
+     *    to Google with the org's saved provider key.
+     */
+    auth?: "google" | "bearer";
+    /**
+     * Embedding model to use. Default: `gemini-embedding-001`.
+     * Switching models is a deliberate `bulkReindex` operation — the catalog
+     * plane scopes vector queries to documents matching the active
+     * `embedding_model_id`, so mid-migration mixes are handled cleanly.
+     */
+    model?: GeminiEmbeddingModel;
+    /**
+     * Output vector size for MRL-capable models. When omitted, the model's
+     * native dimension is used. Only `gemini-embedding-001` supports this.
+     * Smaller dims reduce storage / query cost at some quality loss.
+     */
+    outputDimensionality?: number;
+    /**
+     * Task type the embedded text will be used for. Default:
+     * `RETRIEVAL_DOCUMENT` (right for ingestion). Use `RETRIEVAL_QUERY` for
+     * read-side query embedding.
+     */
+    taskType?: GeminiTaskType;
+    /**
+     * Override the API base URL — useful for a corporate proxy or a custom
+     * Vertex-compatible deployment. Default:
+     * `https://generativelanguage.googleapis.com/v1beta`.
+     */
+    baseUrl?: string;
+    /**
+     * Optional `fetch` override for testing or custom transport. Default:
+     * the global `fetch`. Must follow the standard Fetch API contract.
+     */
+    fetchImpl?: typeof fetch;
+    /**
+     * Override the model id stamped onto search-index documents. Defaults
+     * to `gemini/<model>/v1` — keep this stable across deployments so
+     * documents stay queryable across instances.
+     */
+    modelId?: string;
+}
+/**
+ * Build a Gemini-backed EmbeddingProvider.
+ */
+export declare function createGeminiEmbeddingProvider(options: GeminiEmbeddingProviderOptions): EmbeddingProvider;
+/**
+ * Re-export the chunking helper alongside the Gemini provider so callers
+ * can `embedBatched(provider, texts)` for very large inputs.
+ */
+export { chunkForBatch, GEMINI_MODELS };
+//# sourceMappingURL=gemini.d.ts.map

package/dist/embeddings/gemini.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"gemini.d.ts","sourceRoot":"","sources":["../../src/embeddings/gemini.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAEH,OAAO,EACL,aAAa,EAIb,KAAK,iBAAiB,EAGvB,MAAM,eAAe,CAAA;AAEtB;;;;;GAKG;AACH,QAAA,MAAM,aAAa;;;;;;;;;;;;;;;CAeT,CAAA;AAEV,MAAM,MAAM,oBAAoB,GAAG,MAAM,OAAO,aAAa,CAAA;AAE7D;;;;;GAKG;AACH,MAAM,MAAM,cAAc,GACtB,oBAAoB,GACpB,iBAAiB,GACjB,qBAAqB,GACrB,gBAAgB,GAChB,YAAY,GACZ,oBAAoB,GACpB,mBAAmB,GACnB,sBAAsB,CAAA;AAE1B,MAAM,WAAW,8BAA8B;IAC7C;;;;;OAKG;IACH,MAAM,EAAE,MAAM,CAAA;IACd;;;;;;;OAOG;IACH,IAAI,CAAC,EAAE,QAAQ,GAAG,QAAQ,CAAA;IAC1B;;;;;OAKG;IACH,KAAK,CAAC,EAAE,oBAAoB,CAAA;IAC5B;;;;OAIG;IACH,oBAAoB,CAAC,EAAE,MAAM,CAAA;IAC7B;;;;OAIG;IACH,QAAQ,CAAC,EAAE,cAAc,CAAA;IACzB;;;;OAIG;IACH,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB;;;OAGG;IACH,SAAS,CAAC,EAAE,OAAO,KAAK,CAAA;IACxB;;;;OAIG;IACH,OAAO,CAAC,EAAE,MAAM,CAAA;CACjB;AAcD;;GAEG;AACH,wBAAgB,6BAA6B,CAC3C,OAAO,EAAE,8BAA8B,GACtC,iBAAiB,CA4FnB;AAED;;;GAGG;AACH,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,CAAA"}

package/dist/embeddings/gemini.js ADDED Viewed

@@ -0,0 +1,118 @@
+/**
+ * EmbeddingProvider implementation backed by Google's Gemini embeddings API.
+ *
+ * Uses native `fetch` so it works in Cloudflare Workers + Node + browsers
+ * without an SDK dependency. Templates pass in the API key (and optionally
+ * a custom `baseUrl` for proxies).
+ *
+ * Models supported by default:
+ *   - `gemini-embedding-001` — 3072d, multilingual, current recommendation.
+ *     Supports Matryoshka representation learning (MRL) — request a smaller
+ *     output dimension via `outputDimensionality` to reduce storage cost.
+ *   - `text-embedding-004` — 768d, multilingual, legacy stable.
+ *
+ * See `docs/architecture/catalog-rag-architecture.md` §6 for the design.
+ */
+import { chunkForBatch, EMBEDDING_BATCH_TOO_LARGE, EMBEDDING_INPUT_TOO_LONG, EMBEDDING_PROVIDER_ERROR, EmbeddingProviderError, } from "./contract.js";
+/**
+ * Known Gemini embedding models. `dimensions` is the *native* size; when
+ * `outputDimensionality` is set in the provider options, the effective
+ * vector size is whatever the caller requested (Gemini truncates server-side
+ * via MRL on `gemini-embedding-001`).
+ */
+const GEMINI_MODELS = {
+    "gemini-embedding-001": {
+        dimensions: 3072,
+        maxTokensPerInput: 2048,
+        maxBatchSize: 100,
+        multilingual: true,
+        supportsOutputDimensionality: true,
+    },
+    "text-embedding-004": {
+        dimensions: 768,
+        maxTokensPerInput: 2048,
+        maxBatchSize: 100,
+        multilingual: true,
+        supportsOutputDimensionality: false,
+    },
+};
+/**
+ * Build a Gemini-backed EmbeddingProvider.
+ */
+export function createGeminiEmbeddingProvider(options) {
+    const model = options.model ?? "gemini-embedding-001";
+    const modelInfo = GEMINI_MODELS[model];
+    const baseUrl = (options.baseUrl ?? "https://generativelanguage.googleapis.com/v1beta").replace(/\/$/, "");
+    const fetchImpl = options.fetchImpl ?? globalThis.fetch.bind(globalThis);
+    const taskType = options.taskType ?? "RETRIEVAL_DOCUMENT";
+    const auth = options.auth ?? "google";
+    const outputDimensionality = options.outputDimensionality && modelInfo.supportsOutputDimensionality
+        ? options.outputDimensionality
+        : undefined;
+    const dimensions = outputDimensionality ?? modelInfo.dimensions;
+    const capabilities = {
+        modelId: options.modelId ?? `gemini/${model}/v1`,
+        dimensions,
+        maxTokensPerInput: modelInfo.maxTokensPerInput,
+        maxBatchSize: modelInfo.maxBatchSize,
+        supportedLanguages: modelInfo.multilingual ? null : undefined,
+    };
+    return {
+        capabilities,
+        async embed(texts) {
+            if (texts.length === 0)
+                return [];
+            if (texts.length > capabilities.maxBatchSize) {
+                throw new EmbeddingProviderError(EMBEDDING_BATCH_TOO_LARGE, `Gemini embedding batch size ${texts.length} exceeds max ${capabilities.maxBatchSize}; chunk inputs via chunkForBatch() first`);
+            }
+            const url = `${baseUrl}/models/${model}:batchEmbedContents`;
+            const body = JSON.stringify({
+                requests: texts.map((text) => ({
+                    model: `models/${model}`,
+                    content: { parts: [{ text }] },
+                    taskType,
+                    ...(outputDimensionality ? { outputDimensionality } : {}),
+                })),
+            });
+            const authHeaders = auth === "bearer"
+                ? { Authorization: `Bearer ${options.apiKey}` }
+                : { "x-goog-api-key": options.apiKey };
+            let response;
+            try {
+                response = await fetchImpl(url, {
+                    method: "POST",
+                    headers: {
+                        "Content-Type": "application/json",
+                        ...authHeaders,
+                    },
+                    body,
+                });
+            }
+            catch (cause) {
+                throw new EmbeddingProviderError(EMBEDDING_PROVIDER_ERROR, "Gemini embeddings request failed at the network layer", cause);
+            }
+            if (!response.ok) {
+                const text = await response.text().catch(() => "");
+                let parsed;
+                try {
+                    parsed = JSON.parse(text);
+                }
+                catch {
+                    // ignore parse failure; surface the raw text
+                }
+                const message = parsed?.error?.message ?? text ?? `HTTP ${response.status}`;
+                const status = parsed?.error?.status ?? "";
+                const code = status === "INVALID_ARGUMENT" ? EMBEDDING_INPUT_TOO_LONG : EMBEDDING_PROVIDER_ERROR;
+                throw new EmbeddingProviderError(code, `Gemini embeddings request failed (${response.status}): ${message}`);
+            }
+            const json = (await response.json());
+            // Gemini returns embeddings in input order — no `index` field.
+            return json.embeddings.map((entry) => entry.values);
+        },
+    };
+}
+/**
+ * Re-export the chunking helper alongside the Gemini provider so callers
+ * can `embedBatched(provider, texts)` for very large inputs.
+ */
+export { chunkForBatch, GEMINI_MODELS };

package/dist/embeddings/gemini.test.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ export {};
2	+ //# sourceMappingURL=gemini.test.d.ts.map

package/dist/embeddings/gemini.test.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"gemini.test.d.ts","sourceRoot":"","sources":["../../src/embeddings/gemini.test.ts"],"names":[],"mappings":""}

package/dist/embeddings/gemini.test.js ADDED Viewed

@@ -0,0 +1,126 @@
+import { describe, expect, it, vi } from "vitest";
+import { EMBEDDING_BATCH_TOO_LARGE, EmbeddingProviderError } from "./contract.js";
+import { createGeminiEmbeddingProvider, GEMINI_MODELS } from "./gemini.js";
+function mockFetch(response) {
+    return vi.fn(async () => {
+        return new Response(typeof response.json === "function" ? JSON.stringify(await response.json()) : "", {
+            status: response.status ?? (response.ok ? 200 : 400),
+        });
+    });
+}
+describe("createGeminiEmbeddingProvider", () => {
+    it("declares correct capabilities for the default model", () => {
+        const provider = createGeminiEmbeddingProvider({
+            apiKey: "key",
+            fetchImpl: mockFetch({ ok: true, json: async () => ({ embeddings: [] }) }),
+        });
+        expect(provider.capabilities.modelId).toBe("gemini/gemini-embedding-001/v1");
+        expect(provider.capabilities.dimensions).toBe(3072);
+        expect(provider.capabilities.maxBatchSize).toBe(100);
+    });
+    it("honors outputDimensionality on MRL-capable models", () => {
+        const provider = createGeminiEmbeddingProvider({
+            apiKey: "key",
+            outputDimensionality: 768,
+            fetchImpl: mockFetch({ ok: true, json: async () => ({ embeddings: [] }) }),
+        });
+        expect(provider.capabilities.dimensions).toBe(768);
+    });
+    it("ignores outputDimensionality on legacy text-embedding-004", () => {
+        const provider = createGeminiEmbeddingProvider({
+            apiKey: "key",
+            model: "text-embedding-004",
+            outputDimensionality: 1536,
+            fetchImpl: mockFetch({ ok: true, json: async () => ({ embeddings: [] }) }),
+        });
+        expect(provider.capabilities.dimensions).toBe(GEMINI_MODELS["text-embedding-004"].dimensions);
+    });
+    it("returns vectors in input order from a batch response", async () => {
+        const provider = createGeminiEmbeddingProvider({
+            apiKey: "key",
+            fetchImpl: mockFetch({
+                ok: true,
+                json: async () => ({
+                    embeddings: [{ values: [0.1] }, { values: [0.2] }, { values: [0.3] }],
+                }),
+            }),
+        });
+        const vectors = await provider.embed(["a", "b", "c"]);
+        expect(vectors).toEqual([[0.1], [0.2], [0.3]]);
+    });
+    it("returns an empty array for empty input without hitting the API", async () => {
+        const fetchSpy = vi.fn();
+        const provider = createGeminiEmbeddingProvider({ apiKey: "key", fetchImpl: fetchSpy });
+        expect(await provider.embed([])).toEqual([]);
+        expect(fetchSpy).not.toHaveBeenCalled();
+    });
+    it("throws EMBEDDING_BATCH_TOO_LARGE when input exceeds maxBatchSize", async () => {
+        const provider = createGeminiEmbeddingProvider({
+            apiKey: "key",
+            fetchImpl: mockFetch({ ok: true, json: async () => ({ embeddings: [] }) }),
+        });
+        const tooMany = Array.from({ length: 101 }, (_, i) => `t-${i}`);
+        await expect(provider.embed(tooMany)).rejects.toMatchObject({
+            code: EMBEDDING_BATCH_TOO_LARGE,
+        });
+    });
+    it("wraps non-2xx responses as EmbeddingProviderError", async () => {
+        const provider = createGeminiEmbeddingProvider({
+            apiKey: "key",
+            fetchImpl: mockFetch({
+                ok: false,
+                status: 401,
+                json: async () => ({ error: { code: 401, message: "API key not valid" } }),
+            }),
+        });
+        await expect(provider.embed(["x"])).rejects.toBeInstanceOf(EmbeddingProviderError);
+    });
+    it("sends the api key as x-goog-api-key header", async () => {
+        const fetchSpy = vi.fn(async () => {
+            return new Response(JSON.stringify({ embeddings: [{ values: [0.1] }] }), { status: 200 });
+        });
+        const provider = createGeminiEmbeddingProvider({ apiKey: "key-xyz", fetchImpl: fetchSpy });
+        await provider.embed(["x"]);
+        // biome-ignore lint/suspicious/noExplicitAny: vi.fn return type
+        const init = fetchSpy.mock.calls[0]?.[1];
+        const headers = init.headers;
+        expect(headers["x-goog-api-key"]).toBe("key-xyz");
+    });
+    it("uses Bearer auth when auth mode is 'bearer' (Voyant Cloud gateway)", async () => {
+        const fetchSpy = vi.fn(async () => {
+            return new Response(JSON.stringify({ embeddings: [{ values: [0] }] }), { status: 200 });
+        });
+        const provider = createGeminiEmbeddingProvider({
+            apiKey: "vc-token-abc",
+            auth: "bearer",
+            baseUrl: "https://api.voyantjs.com/ai/v1/gemini",
+            fetchImpl: fetchSpy,
+        });
+        await provider.embed(["x"]);
+        // biome-ignore lint/suspicious/noExplicitAny: vi.fn return type
+        const init = fetchSpy.mock.calls[0]?.[1];
+        const headers = init.headers;
+        expect(headers.Authorization).toBe("Bearer vc-token-abc");
+        expect(headers["x-goog-api-key"]).toBeUndefined();
+        // biome-ignore lint/suspicious/noExplicitAny: vi.fn return type
+        const url = fetchSpy.mock.calls[0]?.[0];
+        expect(url).toBe("https://api.voyantjs.com/ai/v1/gemini/models/gemini-embedding-001:batchEmbedContents");
+    });
+    it("passes outputDimensionality + taskType in each request body", async () => {
+        const fetchSpy = vi.fn(async () => {
+            return new Response(JSON.stringify({ embeddings: [{ values: [0] }] }), { status: 200 });
+        });
+        const provider = createGeminiEmbeddingProvider({
+            apiKey: "key",
+            outputDimensionality: 768,
+            taskType: "RETRIEVAL_QUERY",
+            fetchImpl: fetchSpy,
+        });
+        await provider.embed(["q"]);
+        // biome-ignore lint/suspicious/noExplicitAny: vi.fn return type
+        const init = fetchSpy.mock.calls[0]?.[1];
+        const body = JSON.parse(String(init.body));
+        expect(body.requests[0].outputDimensionality).toBe(768);
+        expect(body.requests[0].taskType).toBe("RETRIEVAL_QUERY");
+    });
+});

package/dist/embeddings/model-registry.d.ts ADDED Viewed

@@ -0,0 +1,62 @@
+/**
+ * Embedding model registry helpers.
+ *
+ * Each search-index document carries an `embedding_model_id` field that
+ * identifies which model produced its vector. The registry's job is to:
+ *
+ *   1. Validate at deployment startup that the configured embedding
+ *      provider's `dimensions` matches the configured `IndexerAdapter`'s
+ *      `vectorDimensions`. Mismatch → fail loudly.
+ *   2. Track the active model id so search queries can scope vector
+ *      lookups to documents using a compatible model. During a model
+ *      migration window the index can hold mixed-model documents — old
+ *      ones get skipped on vector queries and re-embedded by the
+ *      `bulkReindex(forceReembed: true)` job.
+ *
+ * See `docs/architecture/catalog-rag-architecture.md` §8.
+ */
+import type { IndexerCapabilities } from "@voyantjs/catalog";
+import type { EmbeddingProviderCapabilities } from "./contract.js";
+/**
+ * Validate that an embedding provider's capabilities are compatible with
+ * the search engine's vector configuration. Call this at deployment
+ * startup; throw if incompatible.
+ */
+export declare function validateEmbeddingCompatibility(providerCapabilities: EmbeddingProviderCapabilities, indexerCapabilities: IndexerCapabilities): void;
+/**
+ * Returns true if a given document's `embedding_model_id` matches the
+ * deployment's active model. Vector queries should filter to active-model
+ * documents; non-matching documents fall through to keyword-only
+ * scoring until `bulkReindex(forceReembed: true)` re-embeds them.
+ */
+export declare function isActiveEmbeddingModel(documentModelId: string | undefined, activeModelId: string): boolean;
+/**
+ * Convenience: stamp an `IndexerDocument`'s `embedding_model_id` from a
+ * provider's capabilities. Use this when constructing documents in the
+ * embedding pipeline so the active model id propagates to the index.
+ */
+export declare function stampEmbeddingModelId(providerCapabilities: EmbeddingProviderCapabilities): {
+    embedding_model_id: string;
+};
+/**
+ * Plan the embedding workload for a re-index pipeline. Given the current
+ * document set (each row tagged with its embedding_model_id) and the
+ * active model, returns lists of:
+ *   - `embedded` — already on the active model; no work
+ *   - `pending` — never embedded; needs first-time embedding
+ *   - `migrating` — embedded under an older model; needs re-embedding
+ *
+ * Drives the `bulkReindex(forceReembed: true)` migration UX.
+ */
+export interface EmbeddingMigrationPlan {
+    embedded: string[];
+    pending: string[];
+    migrating: string[];
+    totalDocuments: number;
+    activeModelId: string;
+}
+export declare function planEmbeddingMigration(documents: ReadonlyArray<{
+    id: string;
+    embedding_model_id?: string | null;
+}>, activeModelId: string): EmbeddingMigrationPlan;
+//# sourceMappingURL=model-registry.d.ts.map

package/dist/embeddings/model-registry.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"model-registry.d.ts","sourceRoot":"","sources":["../../src/embeddings/model-registry.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAA;AAE5D,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,eAAe,CAAA;AAElE;;;;GAIG;AACH,wBAAgB,8BAA8B,CAC5C,oBAAoB,EAAE,6BAA6B,EACnD,mBAAmB,EAAE,mBAAmB,GACvC,IAAI,CAyBN;AAED;;;;;GAKG;AACH,wBAAgB,sBAAsB,CACpC,eAAe,EAAE,MAAM,GAAG,SAAS,EACnC,aAAa,EAAE,MAAM,GACpB,OAAO,CAET;AAED;;;;GAIG;AACH,wBAAgB,qBAAqB,CAAC,oBAAoB,EAAE,6BAA6B,GAAG;IAC1F,kBAAkB,EAAE,MAAM,CAAA;CAC3B,CAEA;AAED;;;;;;;;;GASG;AACH,MAAM,WAAW,sBAAsB;IACrC,QAAQ,EAAE,MAAM,EAAE,CAAA;IAClB,OAAO,EAAE,MAAM,EAAE,CAAA;IACjB,SAAS,EAAE,MAAM,EAAE,CAAA;IACnB,cAAc,EAAE,MAAM,CAAA;IACtB,aAAa,EAAE,MAAM,CAAA;CACtB;AAED,wBAAgB,sBAAsB,CACpC,SAAS,EAAE,aAAa,CAAC;IAAE,EAAE,EAAE,MAAM,CAAC;IAAC,kBAAkB,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE,CAAC,EAC5E,aAAa,EAAE,MAAM,GACpB,sBAAsB,CAoBxB"}