npm - @forinda/kickjs-ai - Versions diffs - 2.3.0 - Mend

@forinda/kickjs-ai 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.d.mts ADDED Viewed

@@ -0,0 +1,1973 @@
+import { AdapterContext, AppAdapter, Constructor } from "@forinda/kickjs";
+import { ZodTypeAny } from "zod";
+//#region src/types.d.ts
+/**
+ * A chat message in the OpenAI/Anthropic-style conversation format.
+ *
+ * All four built-in providers (OpenAI, Anthropic, Google, Ollama)
+ * translate this shape into their native wire format. The `tool` and
+ * `tool_calls` variants support function calling.
+ */
+interface ChatMessage {
+  role: 'system' | 'user' | 'assistant' | 'tool';
+  content: string;
+  /** Tool call ID if `role === 'tool'`. Set by the framework during tool loops. */
+  toolCallId?: string;
+  /** Tool calls made by the assistant. Set by the provider. */
+  toolCalls?: Array<{
+    id: string;
+    name: string;
+    arguments: Record<string, unknown>;
+  }>;
+}
+/**
+ * A resolved tool definition that providers can include in their
+ * wire-format request payload. This is the shape `ChatInput.tools`
+ * carries once `AiAdapter.runAgent` has expanded `'auto'` against
+ * the registry of `@AiTool`-decorated controller methods.
+ *
+ * Providers translate this into their native tool-calling format
+ * (OpenAI's `tools`, Anthropic's `tools`, Google's function declarations,
+ * etc.). The shape is deliberately minimal — anything provider-specific
+ * lives in the provider implementation, not on this type.
+ */
+interface ChatToolDefinition {
+  /** Stable tool identifier, e.g. "TaskController.create". */
+  name: string;
+  /** Human-readable description shown to the model at call time. */
+  description: string;
+  /**
+   * JSON Schema for the tool input, converted from the Zod body schema
+   * on the underlying route. Providers pass this through to the wire
+   * payload verbatim; the schema only needs to be valid JSON Schema.
+   */
+  inputSchema: Record<string, unknown>;
+}
+/**
+ * Input to `AiProvider.chat()` and `AiProvider.stream()`.
+ *
+ * Providers accept this shape, map it to their native format, call the
+ * underlying API, and return a normalized `ChatResponse` (or stream of
+ * `ChatChunk`s).
+ */
+interface ChatInput {
+  /** Conversation history, in order. System prompt can be the first message. */
+  messages: ChatMessage[];
+  /**
+   * Optional model override. If omitted, the provider uses its default
+   * model. Accepts provider-specific model IDs (e.g. `gpt-4o`, `claude-opus-4-6`).
+   */
+  model?: string;
+  /**
+   * Tools the model can call.
+   *
+   * - `'auto'` — only meaningful when passed to `AiAdapter.runAgent`,
+   *   which resolves it against the `@AiTool` registry before handing
+   *   the request to the provider. Raw providers that receive `'auto'`
+   *   directly omit tools entirely rather than doing a hidden lookup.
+   * - An array of `ChatToolDefinition` — providers include these in
+   *   the wire payload directly.
+   * - Omitted — no tool-calling in this request.
+   */
+  tools?: 'auto' | ChatToolDefinition[];
+}
+/** Runtime options for a chat call. */
+interface ChatOptions {
+  temperature?: number;
+  maxTokens?: number;
+  topP?: number;
+  stopSequences?: string[];
+  /** Abort signal — cancel the request mid-flight. */
+  signal?: AbortSignal;
+}
+/** Normalized response from a non-streaming chat call. */
+interface ChatResponse {
+  /** The assistant's text output. */
+  content: string;
+  /** Any tool calls the model made. Usually executed by the agent loop. */
+  toolCalls?: Array<{
+    id: string;
+    name: string;
+    arguments: Record<string, unknown>;
+  }>;
+  /** Provider-reported token usage. */
+  usage?: {
+    promptTokens: number;
+    completionTokens: number;
+    totalTokens: number;
+  };
+  /** Finish reason from the provider. */
+  finishReason?: 'stop' | 'length' | 'tool_call' | 'content_filter' | string;
+}
+/** A single chunk from a streaming chat call. */
+interface ChatChunk {
+  /** Incremental text delta. Empty for chunks that only carry tool deltas. */
+  content: string;
+  /** Partial tool call delta, if the model is building one. */
+  toolCallDelta?: {
+    id: string;
+    name?: string;
+    argumentsDelta?: string;
+  };
+  /** True on the final chunk. */
+  done: boolean;
+}
+/**
+ * Input to `AiProvider.embed()`.
+ *
+ * Accepts a single string or an array; the response always matches the
+ * input shape (single string → single vector, array → array of vectors).
+ */
+type EmbedInput = string | string[];
+/**
+ * Input to `AiProvider.tool()` for one-shot tool execution outside the
+ * normal chat flow. Useful for programmatic workflows where you know
+ * which tool to call but want provider-specific argument normalization.
+ */
+interface ToolCallInput {
+  name: string;
+  arguments: Record<string, unknown>;
+}
+/** Response from `AiProvider.tool()`. */
+interface ToolCallResponse {
+  /** The raw tool result. Shape depends on the tool. */
+  result: unknown;
+  /** Whether the provider considers the call successful. */
+  ok: boolean;
+}
+/**
+ * Provider abstraction. All built-in providers (OpenAI, Anthropic,
+ * Google, Ollama) implement this interface. Users can also implement
+ * it for custom/internal providers.
+ */
+interface AiProvider {
+  /** Provider identifier — `'openai'`, `'anthropic'`, `'google'`, `'ollama'`, or a custom string. */
+  name: string;
+  /** Non-streaming chat call. */
+  chat(input: ChatInput, options?: ChatOptions): Promise<ChatResponse>;
+  /** Streaming chat call. Yields chunks until `done: true`. */
+  stream(input: ChatInput, options?: ChatOptions): AsyncIterable<ChatChunk>;
+  /** Generate embeddings. Shape matches the input shape. */
+  embed(input: EmbedInput): Promise<number[][]>;
+  /** One-shot tool execution. Optional — providers may omit. */
+  tool?(input: ToolCallInput): Promise<ToolCallResponse>;
+}
+/** Options for the `AiAdapter` constructor. */
+interface AiAdapterOptions {
+  /** The active provider. Registered under the `AI_PROVIDER` DI token. */
+  provider: AiProvider;
+  /**
+   * Default chat options applied to every call unless overridden at
+   * the call site. Useful for setting a project-wide temperature or
+   * model.
+   */
+  defaults?: ChatOptions & {
+    model?: string;
+  };
+}
+/**
+ * Options for the `@AiTool` decorator.
+ *
+ * Marks a controller method as callable by the LLM. The input schema
+ * is inferred from the route's `body` Zod schema — you don't repeat
+ * it here.
+ */
+interface AiToolOptions {
+  /** Tool name override. Defaults to `<ControllerName>.<methodName>`. */
+  name?: string;
+  /** Human-readable description shown to the LLM at tool-call time. */
+  description: string;
+  /** Optional input schema override if the route has no Zod body. */
+  inputSchema?: ZodTypeAny;
+}
+/**
+ * Resolved AI tool definition built by the adapter's startup scan.
+ *
+ * Bundles the tool's wire-format definition (`ChatToolDefinition`)
+ * with the HTTP routing info needed for dispatch (`httpMethod` +
+ * `mountPath`). `AiAdapter.runAgent` hands `ChatToolDefinition[]` to
+ * the provider and keeps `httpMethod`/`mountPath` internal for the
+ * dispatch loop.
+ */
+interface AiToolDefinition extends ChatToolDefinition {
+  /** HTTP method of the underlying route. */
+  httpMethod: string;
+  /** Full mount path of the underlying route (after apiPrefix + version). */
+  mountPath: string;
+}
+/**
+ * Options for `AiAdapter.runAgent()`.
+ *
+ * Runs a tool-calling loop: the provider responds, any tool calls are
+ * dispatched through the Express pipeline, results are fed back, and
+ * the loop continues until the model returns plain text or the
+ * `maxSteps` cap is hit.
+ */
+interface RunAgentOptions extends ChatOptions {
+  /** Starting conversation. System prompt can be the first message. */
+  messages: ChatMessage[];
+  /** Model override. Defaults to the provider's configured default. */
+  model?: string;
+  /**
+   * Tools the agent can call. Defaults to `'auto'` — every tool in
+   * the adapter's `@AiTool` registry. Pass an explicit array to
+   * restrict the agent to a subset.
+   */
+  tools?: 'auto' | ChatToolDefinition[];
+  /**
+   * Maximum number of chat → tool-call → dispatch → feedback cycles
+   * before the loop gives up. Prevents runaway loops on broken tool
+   * call behavior. Defaults to 8.
+   */
+  maxSteps?: number;
+}
+/** Result of `AiAdapter.runAgent()` — the final assistant response. */
+interface RunAgentResult {
+  /** The assistant's final text output after all tool calls resolved. */
+  content: string;
+  /** The full message history including tool calls and results. */
+  messages: ChatMessage[];
+  /** Number of chat iterations the loop ran before terminating. */
+  steps: number;
+  /** Aggregated usage across every provider call in the loop. */
+  usage?: {
+    promptTokens: number;
+    completionTokens: number;
+    totalTokens: number;
+  };
+  /** True if the loop stopped because `maxSteps` was reached. */
+  maxStepsReached?: boolean;
+}
+//#endregion
+//#region src/memory/types.d.ts
+/**
+ * Chat memory contract.
+ *
+ * Memory is the persistence layer for multi-turn conversations. The
+ * agent loop inside `runAgent` maintains history WITHIN a single call,
+ * but a real chatbot needs to remember what the user said in their
+ * previous request — that's the job of this interface.
+ *
+ * Every backend (in-memory, sliding window, Drizzle, Redis) implements
+ * this same contract, so swapping storage is a DI binding change and
+ * nothing else. Services stay identical regardless of whether memory
+ * lives in a `Map`, a Postgres row, or a Redis list.
+ *
+ * ### Session scoping
+ *
+ * The interface has no session concept itself — every `ChatMemory`
+ * instance is implicitly scoped to one conversation. Services that
+ * serve multiple users construct one memory instance per session,
+ * typically via a factory bound to the request scope or a
+ * `sessionId` parameter on the backend.
+ *
+ * @typeParam M — optional metadata attached to every stored message.
+ *   Most backends ignore this; Drizzle and Redis stores can use it
+ *   for timestamps, speaker IDs, or audit info.
+ *
+ * @module @forinda/kickjs-ai/memory/types
+ */
+interface ChatMemory {
+  /** Short identifier for logs and debug UIs. */
+  readonly name: string;
+  /**
+   * Return the full message history in chronological order.
+   *
+   * The returned array should be safe to pass directly into
+   * `provider.chat({ messages })` — backends are responsible for
+   * returning the shape the framework expects without requiring
+   * callers to transform it.
+   */
+  get(): Promise<ChatMessage[]>;
+  /**
+   * Append one or more messages to the history.
+   *
+   * Backends should persist in insertion order. Arrays are accepted
+   * for efficiency — storing a batch in one round-trip is faster
+   * than N individual calls for most real databases.
+   */
+  add(message: ChatMessage | ChatMessage[]): Promise<void>;
+  /**
+   * Drop every message from this session.
+   *
+   * Called by the /chat/reset route pattern and by tests between
+   * cases. Backends that persist to an external store should commit
+   * the clear transactionally so partial deletes can't happen.
+   */
+  clear(): Promise<void>;
+  /**
+   * Optional: return the number of stored messages. Not every
+   * backend can compute this cheaply — Redis lists and in-memory
+   * arrays can, Drizzle can via COUNT(*), but long-tail stores may
+   * decline. Callers should treat `undefined` returns as "unknown".
+   */
+  size?(): Promise<number>;
+}
+/**
+ * Options for `AiAdapter.runAgentWithMemory()`.
+ *
+ * The helper wraps `runAgent` with an automatic "read history →
+ * append user message → run loop → persist assistant response" cycle
+ * so services don't have to manage the plumbing themselves. Most
+ * real chatbots end up writing this wrapper anyway; shipping it in
+ * the framework saves everyone that boilerplate.
+ */
+interface RunAgentWithMemoryOptions {
+  /** Memory backend for this conversation. Typically scoped to a request or session. */
+  memory: ChatMemory;
+  /** The user's message for this turn. */
+  userMessage: string;
+  /**
+   * System prompt to prepend IF the memory is empty — i.e. it's the
+   * first turn of the conversation. Skipped on subsequent turns so
+   * the model sees a single, stable system prompt for the session.
+   */
+  systemPrompt?: string;
+  /** Model override. Defaults to the provider's configured default. */
+  model?: string;
+  /**
+   * Tools the agent can call. Defaults to `'auto'` — every tool in
+   * the adapter's `@AiTool` registry.
+   */
+  tools?: 'auto' | ChatToolDefinition[];
+  /** Maximum chat → tool-call → dispatch cycles per turn. Defaults to 8. */
+  maxSteps?: number;
+  /** Runtime chat options passed through to the provider. */
+  temperature?: number;
+  maxTokens?: number;
+  topP?: number;
+  stopSequences?: string[];
+  signal?: AbortSignal;
+  /**
+   * When true, tool call results written to memory preserve their
+   * full content. When false (the default), tool results are
+   * dropped from memory on the grounds that they're usually large
+   * API responses the user doesn't need to see on a later turn.
+   * Turn this on for debugging sessions or full-transcript replay.
+   */
+  persistToolResults?: boolean;
+}
+//#endregion
+//#region src/ai.adapter.d.ts
+/**
+ * Register an AI provider in the DI container, discover every
+ * `@AiTool`-decorated controller method, and run agent loops that
+ * dispatch tool calls through the Express pipeline.
+ *
+ * The adapter plays the same role for AI as the MCP adapter plays for
+ * external clients: it's the glue between the framework's metadata
+ * (Zod schemas, route decorators, DI container) and a runtime that
+ * can actually call LLMs and execute tools. Both adapters reuse the
+ * framework's `onRouteMount` hook to discover tools at startup.
+ *
+ * @example
+ * ```ts
+ * import { bootstrap, getEnv } from '@forinda/kickjs'
+ * import { AiAdapter, OpenAIProvider } from '@forinda/kickjs-ai'
+ *
+ * export const app = await bootstrap({
+ *   modules,
+ *   adapters: [
+ *     new AiAdapter({
+ *       provider: new OpenAIProvider({ apiKey: getEnv('OPENAI_API_KEY') }),
+ *     }),
+ *   ],
+ * })
+ * ```
+ *
+ * Then in any service:
+ *
+ * ```ts
+ * @Service()
+ * class AgentService {
+ *   @Autowired() private readonly ai!: AiAdapter
+ *
+ *   async handleQuery(userPrompt: string) {
+ *     const result = await this.ai.runAgent({
+ *       messages: [
+ *         { role: 'system', content: 'You can create tasks via tools.' },
+ *         { role: 'user', content: userPrompt },
+ *       ],
+ *       tools: 'auto',  // use every @AiTool-decorated method
+ *     })
+ *     return result.content
+ *   }
+ * }
+ * ```
+ */
+declare class AiAdapter implements AppAdapter {
+  readonly name = "AiAdapter";
+  private readonly provider;
+  /** Controllers collected during the mount phase, in insertion order. */
+  private readonly mountedControllers;
+  /** Tool definitions built during `beforeStart` from `@AiTool` metadata. */
+  private readonly tools;
+  /**
+   * Base URL of the running KickJS HTTP server, captured in `afterStart`.
+   * Agent tool dispatch makes internal HTTP requests against this base
+   * URL so calls flow through the normal Express pipeline (middleware,
+   * validation, auth, logging, error handling).
+   */
+  private serverBaseUrl;
+  constructor(options: AiAdapterOptions);
+  /** Return the active provider. Useful for services that want the raw API. */
+  getProvider(): AiProvider;
+  /** Return the discovered tool registry. Primarily for tests and debug UIs. */
+  getTools(): readonly AiToolDefinition[];
+  /**
+   * Override the server base URL. Used by tests that spin up an
+   * ephemeral http.Server and can't rely on the framework's
+   * `afterStart` hook to supply it.
+   */
+  setServerBaseUrl(url: string | null): void;
+  /**
+   * Record every mounted controller so `beforeStart` can walk them
+   * looking for `@AiTool` decorations. We don't scan here because
+   * onRouteMount fires per-controller and we want the scan to run
+   * once against the full set.
+   */
+  onRouteMount(controller: Constructor, mountPath: string): void;
+  /**
+   * Register the provider in the DI container and run the tool scan.
+   *
+   * The adapter itself is also registered under its class constructor
+   * so services can inject the adapter directly (to call `runAgent`)
+   * while other services inject just the provider via `AI_PROVIDER`
+   * for plain `chat` / `embed` calls.
+   */
+  beforeStart({
+    container
+  }: AdapterContext): void;
+  /**
+   * Capture the running server's address so agent dispatch can make
+   * internal HTTP requests against the actual port. Runs after the
+   * HTTP server is listening, so `server.address()` returns a real
+   * `AddressInfo` here.
+   */
+  afterStart(ctx: AdapterContext): void;
+  /** Best-effort cleanup. Providers are currently stateless HTTP clients. */
+  shutdown(): Promise<void>;
+  /**
+   * Run a tool-calling agent loop.
+   *
+   * Calls the provider with the given messages and tools, dispatches
+   * any tool calls the model emits, feeds the results back into the
+   * conversation, and repeats until the model responds with plain text
+   * (no more tool calls) or `maxSteps` is reached.
+   *
+   * Tool dispatch goes through the Express pipeline via internal HTTP
+   * requests — same pattern as the MCP adapter — so middleware, auth,
+   * validation, logging, and error handling all apply to tool calls
+   * the same way they apply to external client requests.
+   *
+   * @example
+   * ```ts
+   * const result = await adapter.runAgent({
+   *   messages: [
+   *     { role: 'system', content: 'Create tasks the user asks for.' },
+   *     { role: 'user', content: 'Create a high-priority task titled Ship v3.' },
+   *   ],
+   *   tools: 'auto',
+   *   maxSteps: 5,
+   * })
+   * console.log(result.content)   // assistant's final reply
+   * console.log(result.messages)  // full history including tool calls
+   * console.log(result.steps)     // how many rounds it took
+   * ```
+   */
+  runAgent(options: RunAgentOptions): Promise<RunAgentResult>;
+  /**
+   * Memory-aware agent turn.
+   *
+   * Wraps `runAgent` with an automatic "read history → append user
+   * message → run loop → persist assistant response" cycle. Services
+   * that want multi-turn conversations don't need to manage the
+   * plumbing themselves — pass a `ChatMemory` and a user message,
+   * get back the agent's response, and the memory is updated.
+   *
+   * System prompt handling:
+   *   - If the memory is empty AND `systemPrompt` is provided, the
+   *     system prompt is persisted as the first message in the
+   *     session. It stays put for every subsequent turn.
+   *   - On follow-up turns, the existing system prompt is reused
+   *     from memory; the `systemPrompt` option is ignored to keep
+   *     the session persona stable.
+   *
+   * Tool result persistence:
+   *   - By default, tool messages are NOT persisted to memory —
+   *     they're usually large API responses the user doesn't need
+   *     on later turns, and including them blows up prompt tokens
+   *     unnecessarily. Set `persistToolResults: true` to keep them
+   *     (useful for debugging / full-transcript replay).
+   *   - Assistant messages with tool calls ARE persisted so the
+   *     conversation shows what the agent did.
+   *
+   * @example
+   * ```ts
+   * @Service()
+   * class ChatService {
+   *   @Autowired() private ai!: AiAdapter
+   *   private readonly memory = new InMemoryChatMemory()
+   *
+   *   async handle(userMessage: string) {
+   *     const result = await this.ai.runAgentWithMemory({
+   *       memory: this.memory,
+   *       userMessage,
+   *       systemPrompt: 'You are a helpful assistant.',
+   *       tools: 'auto',
+   *     })
+   *     return result.content
+   *   }
+   * }
+   * ```
+   */
+  runAgentWithMemory(options: RunAgentWithMemoryOptions): Promise<RunAgentResult>;
+  /**
+   * Expand an agent `tools` option to an explicit array. `'auto'`
+   * resolves to the full discovered registry; an explicit array is
+   * passed through unchanged (so callers can restrict the agent to a
+   * subset of tools).
+   */
+  private resolveTools;
+  /**
+   * Dispatch a single tool call through the Express pipeline by
+   * making an internal HTTP request matching the underlying route's
+   * method + path + body/query.
+   *
+   * Returns a `ChatMessage` with `role: 'tool'` suitable for feeding
+   * back into the next `provider.chat` call. Non-2xx responses are
+   * surfaced as tool error messages rather than throwing, so the
+   * agent loop can let the model recover.
+   */
+  private dispatchToolCall;
+  /**
+   * Build an `AiToolDefinition` for a route decorated with `@AiTool`.
+   * Skips routes without the decorator so the registry only exposes
+   * deliberately opted-in methods.
+   */
+  private tryBuildTool;
+  /**
+   * Join a module mount path with the route-level sub-path. Same
+   * helper as McpAdapter's — kept local so the two packages don't
+   * couple via a shared util file.
+   */
+  private joinMountPath;
+  /**
+   * Substitute Express-style `:param` placeholders in the mount path
+   * with values pulled from the tool call arguments. Consumed keys
+   * are removed from the remaining args so they aren't sent twice
+   * (once in the path, once in the body/query).
+   */
+  private substitutePathParams;
+  /**
+   * Resolve the running server's base URL from a Node `http.Server`
+   * instance. Same handling as McpAdapter: IPv6 bracketing, rewrite
+   * of 0.0.0.0/:: to 127.0.0.1.
+   */
+  private resolveServerBaseUrl;
+}
+//#endregion
+//#region src/decorators.d.ts
+/**
+ * Mark a controller method as an AI-callable tool.
+ *
+ * At startup, the `AiAdapter` scans all `@Controller` classes in the
+ * DI container for this decorator and builds a tool registry. When a
+ * service calls `ai.chat({ ..., tools: 'auto' })`, the framework
+ * passes the registered tools to the provider, the model may call
+ * them, and the framework dispatches back through the normal Express
+ * pipeline — so tool calls go through auth, validation, and logging
+ * just like external HTTP requests.
+ *
+ * The input schema is derived from the route's `body` Zod schema:
+ *
+ * @example
+ * ```ts
+ * import { Controller, Post, type Ctx } from '@forinda/kickjs'
+ * import { AiTool } from '@forinda/kickjs-ai'
+ * import { createTaskSchema } from './dtos/create-task.dto'
+ *
+ * @Controller('/tasks')
+ * export class TaskController {
+ *   @Post('/', { body: createTaskSchema, name: 'CreateTask' })
+ *   @AiTool({ description: 'Create a new task' })
+ *   create(ctx: Ctx<KickRoutes.TaskController['create']>) {
+ *     return this.createTaskUseCase.execute(ctx.body)
+ *   }
+ * }
+ * ```
+ */
+declare function AiTool(options: AiToolOptions): MethodDecorator;
+/** Read the AI tool metadata attached to a method, if any. */
+declare function getAiToolMeta(target: object, method: string): AiToolOptions | undefined;
+/** Check whether a method was decorated with `@AiTool`. */
+declare function isAiTool(target: object, method: string): boolean;
+//#endregion
+//#region src/constants.d.ts
+/**
+ * Metadata key for the `@AiTool` decorator.
+ *
+ * Using `createToken` for metadata keys (rather than a raw `Symbol`)
+ * gives a collision-safe, type-carrying identifier: the phantom type
+ * parameter flows through `getMethodMetaOrUndefined` so consumers get
+ * `AiToolOptions` back without a manual cast, and reference-equality
+ * guarantees that two separate definitions can never shadow each other
+ * even if the package is loaded more than once.
+ */
+declare const AI_TOOL_METADATA: any;
+/**
+ * DI token for the active AI provider.
+ *
+ * Injected via `@Inject(AI_PROVIDER)` in services or use-cases that
+ * need to call an LLM. The adapter registers the concrete provider
+ * (OpenAI, Anthropic, Google, Ollama) during `beforeStart`.
+ *
+ * @example
+ * ```ts
+ * @Service()
+ * export class SummarizeService {
+ *   constructor(@Inject(AI_PROVIDER) private ai: AiProvider) {}
+ *
+ *   async summarize(text: string) {
+ *     const res = await this.ai.chat({
+ *       messages: [
+ *         { role: 'system', content: 'Summarize in 2 sentences.' },
+ *         { role: 'user', content: text },
+ *       ],
+ *     })
+ *     return res.content
+ *   }
+ * }
+ * ```
+ */
+declare const AI_PROVIDER: any;
+/**
+ * DI token for the active vector store backend.
+ *
+ * Injected via `@Inject(VECTOR_STORE)` in services that need
+ * retrieval-augmented generation. The adapter does not register a
+ * default — users bind the backend they want at bootstrap time,
+ * typically `InMemoryVectorStore` for development/tests and
+ * `PgVectorStore` / `QdrantStore` / `PineconeStore` for production.
+ *
+ * @example
+ * ```ts
+ * import { bootstrap, getEnv } from '@forinda/kickjs'
+ * import { AiAdapter, InMemoryVectorStore, VECTOR_STORE } from '@forinda/kickjs-ai'
+ *
+ * export const app = await bootstrap({
+ *   modules,
+ *   adapters: [
+ *     new AiAdapter({
+ *       provider: new OpenAIProvider({ apiKey: getEnv('OPENAI_API_KEY') }),
+ *     }),
+ *   ],
+ *   plugins: [
+ *     {
+ *       name: 'vector-store',
+ *       register: (container) => {
+ *         container.registerInstance(VECTOR_STORE, new InMemoryVectorStore())
+ *       },
+ *     },
+ *   ],
+ * })
+ * ```
+ */
+declare const VECTOR_STORE: any;
+//#endregion
+//#region src/providers/openai.d.ts
+/**
+ * Configuration for the built-in OpenAI provider.
+ *
+ * The base URL is configurable so the same provider class can target
+ * any OpenAI-compatible endpoint — Azure OpenAI, Ollama's
+ * `/v1/chat/completions` shim, OpenRouter, vLLM, and so on. The
+ * provider only assumes the wire shape, not the hostname.
+ */
+interface OpenAIProviderOptions {
+  /** API key sent as `Authorization: Bearer <apiKey>`. Required. */
+  apiKey: string;
+  /** Override base URL. Defaults to `https://api.openai.com/v1`. */
+  baseURL?: string;
+  /** Default chat model used when `ChatInput.model` is not set. */
+  defaultChatModel?: string;
+  /** Default embedding model used by `embed()`. */
+  defaultEmbedModel?: string;
+  /**
+   * OpenAI organization header. Optional. Some accounts need it; most
+   * don't. If unset, the header is omitted entirely.
+   */
+  organization?: string;
+  /**
+   * Provider name to expose on `provider.name`. Defaults to `'openai'`
+   * but can be overridden to label compatible endpoints — e.g.
+   * `'ollama'` if pointing baseURL at a local Ollama instance.
+   */
+  name?: string;
+}
+/**
+ * Built-in OpenAI provider.
+ *
+ * Implements the framework's `AiProvider` interface using nothing but
+ * the global `fetch` API (Node 20+). Translates the framework's
+ * normalized chat shape to OpenAI's `/chat/completions` wire format
+ * and back, including streaming via SSE.
+ *
+ * Tool calling is wired in this provider but the agent loop that
+ * actually invokes tools and feeds results back to the model lives in
+ * a later phase — for now, `chat()` and `stream()` surface tool calls
+ * via `ChatResponse.toolCalls` so callers can react.
+ *
+ * @example
+ * ```ts
+ * import { bootstrap, getEnv } from '@forinda/kickjs'
+ * import { AiAdapter, OpenAIProvider } from '@forinda/kickjs-ai'
+ *
+ * export const app = await bootstrap({
+ *   modules,
+ *   adapters: [
+ *     new AiAdapter({
+ *       provider: new OpenAIProvider({
+ *         apiKey: getEnv('OPENAI_API_KEY'),
+ *         defaultChatModel: 'gpt-4o-mini',
+ *       }),
+ *     }),
+ *   ],
+ * })
+ * ```
+ */
+declare class OpenAIProvider implements AiProvider {
+  readonly name: string;
+  private readonly baseURL;
+  private readonly defaultChatModel;
+  private readonly defaultEmbedModel;
+  /**
+   * Full header map passed to every request. Includes the bearer auth
+   * header and the optional openai-organization header. Constructed
+   * once in the constructor so per-call code just spreads it into the
+   * fetch init.
+   */
+  private readonly headers;
+  constructor(options: OpenAIProviderOptions);
+  /**
+   * Non-streaming chat completion.
+   *
+   * Translates the framework's `ChatInput` to OpenAI's chat completion
+   * payload, posts it, and normalizes the response back to a
+   * `ChatResponse`. Tool calls are surfaced on the response so callers
+   * can decide whether to feed them back into a tool registry.
+   */
+  chat(input: ChatInput, options?: ChatOptions): Promise<ChatResponse>;
+  /**
+   * Streaming chat completion. Yields `ChatChunk`s as deltas arrive
+   * over the wire and emits one final chunk with `done: true` after
+   * the upstream `[DONE]` sentinel.
+   *
+   * Cancellation via `options.signal` is supported end-to-end — the
+   * underlying fetch is aborted and the consumer's `for await` loop
+   * throws `AbortError`.
+   */
+  stream(input: ChatInput, options?: ChatOptions): AsyncIterable<ChatChunk>;
+  /**
+   * Generate embeddings for a string or array of strings.
+   *
+   * Returns vectors in input order. Single-string input still gets a
+   * length-1 array back, so callers can use the same indexed access
+   * pattern regardless of input shape.
+   */
+  embed(input: EmbedInput): Promise<number[][]>;
+  private buildChatPayload;
+  /**
+   * Translate a framework `ChatMessage` to OpenAI's wire format.
+   * Handles the `tool` role and the `tool_calls` field on assistant
+   * messages, both of which use slightly different shapes than the
+   * normalized form on `ChatMessage`.
+   */
+  private toOpenAIMessage;
+  /**
+   * Normalize an OpenAI chat completion response back to the
+   * framework's `ChatResponse` shape.
+   */
+  private normalizeChatResponse;
+  /**
+   * Extract the first tool-call delta from an OpenAI streaming chunk.
+   *
+   * The `tool_calls` array in a delta chunk can contain partial state
+   * for multiple parallel tool calls; this method picks the first one
+   * with a non-empty payload, which is enough for the v0 streaming
+   * surface. Multi-tool streaming is a follow-up.
+   */
+  private firstToolCallDelta;
+}
+//#endregion
+//#region src/providers/anthropic.d.ts
+/**
+ * Configuration for the Anthropic provider.
+ *
+ * The base URL is configurable so the same class can target an
+ * Anthropic-compatible proxy, an internal gateway that adds auth
+ * headers, or an air-gapped deployment. The provider only assumes
+ * Anthropic's Messages API wire shape, not the hostname.
+ */
+interface AnthropicProviderOptions {
+  /** API key sent as `x-api-key`. Required. */
+  apiKey: string;
+  /** Override base URL. Defaults to `https://api.anthropic.com/v1`. */
+  baseURL?: string;
+  /** Default chat model used when `ChatInput.model` is not set. */
+  defaultChatModel?: string;
+  /** Anthropic API version header. Defaults to `'2023-06-01'`. */
+  apiVersion?: string;
+  /**
+   * Default `max_tokens` for responses. Anthropic requires an explicit
+   * max_tokens on every request; the framework's ChatOptions.maxTokens
+   * takes precedence when set, but this supplies a fallback so callers
+   * don't have to set it every time.
+   */
+  defaultMaxTokens?: number;
+  /** Provider name override. Defaults to `'anthropic'`. */
+  name?: string;
+}
+/**
+ * Built-in Anthropic provider.
+ *
+ * Implements the framework's `AiProvider` interface using Anthropic's
+ * Messages API (`/v1/messages`). Translates the normalized
+ * `ChatInput` shape to and from Anthropic's content-block format,
+ * including tool calling and streaming.
+ *
+ * ### Differences from OpenAI
+ *
+ * Anthropic's API has a few quirks the provider translates away:
+ *
+ * - **System prompt is separated.** The framework puts system
+ *   messages in the `messages` array; Anthropic wants them in a
+ *   top-level `system` field. The provider extracts the first system
+ *   message and filters out any others.
+ * - **Content is always a block array.** Even simple text replies
+ *   are wrapped in `[{ type: 'text', text: '...' }]`. The provider
+ *   flattens text blocks to a single string on the response.
+ * - **Tool calls use `tool_use` content blocks, not a separate
+ *   `tool_calls` field.** Normalization pulls them out of the
+ *   response content and into `ChatResponse.toolCalls`.
+ * - **Tool results are `user` messages with `tool_result` content
+ *   blocks**, not a `'tool'` role. The provider handles the
+ *   translation both ways.
+ * - **`max_tokens` is required on every request.** Framework
+ *   `ChatOptions.maxTokens` wins; otherwise falls back to
+ *   `defaultMaxTokens` (default 4096).
+ *
+ * ### Embeddings
+ *
+ * Anthropic does not ship an embeddings API. Calling `embed()` on
+ * this provider throws a descriptive error — users who need
+ * embeddings should construct a separate provider (OpenAI's
+ * `text-embedding-3-small` is a good default) and bind it
+ * alongside the Anthropic chat provider.
+ *
+ * @example
+ * ```ts
+ * import { bootstrap, getEnv } from '@forinda/kickjs'
+ * import { AiAdapter, AnthropicProvider } from '@forinda/kickjs-ai'
+ *
+ * export const app = await bootstrap({
+ *   modules,
+ *   adapters: [
+ *     new AiAdapter({
+ *       provider: new AnthropicProvider({
+ *         apiKey: getEnv('ANTHROPIC_API_KEY'),
+ *         defaultChatModel: 'claude-opus-4-6',
+ *       }),
+ *     }),
+ *   ],
+ * })
+ * ```
+ */
+declare class AnthropicProvider implements AiProvider {
+  readonly name: string;
+  private readonly baseURL;
+  private readonly defaultChatModel;
+  private readonly defaultMaxTokens;
+  private readonly headers;
+  constructor(options: AnthropicProviderOptions);
+  /**
+   * Non-streaming chat completion.
+   *
+   * Builds the Anthropic Messages payload, posts it, and normalizes
+   * the response back to the framework's `ChatResponse` shape.
+   */
+  chat(input: ChatInput, options?: ChatOptions): Promise<ChatResponse>;
+  /**
+   * Streaming chat completion. Yields `ChatChunk`s as Anthropic
+   * events arrive and emits a final chunk with `done: true` after
+   * the `message_stop` event.
+   *
+   * Anthropic's SSE stream uses distinct event types instead of the
+   * single-channel deltas OpenAI sends:
+   *
+   *   - `message_start` — session init, carries model + id
+   *   - `content_block_start` — new text or tool_use block begins
+   *   - `content_block_delta` — incremental text or partial tool JSON
+   *   - `content_block_stop` — block complete
+   *   - `message_delta` — stop_reason + final usage
+   *   - `message_stop` — end of stream
+   *
+   * The provider cares about text deltas (for streaming content) and
+   * input_json deltas (for tool call argument streaming). Everything
+   * else is noise for our purposes and gets filtered.
+   */
+  stream(input: ChatInput, options?: ChatOptions): AsyncIterable<ChatChunk>;
+  /**
+   * Anthropic does not ship an embeddings API. Throws a descriptive
+   * error rather than silently returning an empty vector — embedding
+   * workflows should use a dedicated provider (OpenAI text-embedding-3-*
+   * is the common pick) and bind it alongside this one in the
+   * `AI_PROVIDER` token registry if needed.
+   */
+  embed(_input: EmbedInput): Promise<number[][]>;
+  private buildMessagesPayload;
+  /**
+   * Extract the first system message from the framework's messages
+   * array and return it separately — Anthropic puts system prompts
+   * in a top-level `system` field, not in `messages`. Any additional
+   * system messages are dropped on the grounds that models handle
+   * one persona prompt per call and concatenating them silently
+   * would produce confusing behavior.
+   */
+  private splitSystemMessage;
+  /**
+   * Translate a framework `ChatMessage` to Anthropic's wire format.
+   *
+   * User and plain assistant messages become content blocks with a
+   * single `text` entry. Assistant messages with tool calls become
+   * a block list mixing `text` and `tool_use` entries. Framework
+   * `'tool'` role messages become Anthropic `'user'` messages with
+   * a `tool_result` block — that's how Anthropic represents tool
+   * call responses.
+   */
+  private toAnthropicMessage;
+  /**
+   * Normalize an Anthropic response back to the framework's
+   * `ChatResponse`. Flattens text content blocks into a single
+   * string and pulls `tool_use` blocks out into `toolCalls`.
+   */
+  private normalizeResponse;
+}
+//#endregion
+//#region src/providers/base.d.ts
+/**
+ * Provider-side helpers shared by every built-in `AiProvider`
+ * implementation.
+ *
+ * Each provider in `packages/ai/src/providers/` implements the
+ * `AiProvider` interface from `../types`. This file holds the bits
+ * that all of them need: HTTP error mapping, JSON parsing, SSE line
+ * splitting for streaming responses. Keeping these here means each
+ * provider's main file stays focused on the wire-format translation
+ * specific to its vendor.
+ */
+/**
+ * Error thrown by built-in providers when the upstream API returns a
+ * non-2xx status. Carries the HTTP status, the raw response body, and
+ * a parsed error object when available, so callers can branch on
+ * specific failure modes (auth, rate limit, content filter, etc.).
+ */
+declare class ProviderError extends Error {
+  readonly status: number;
+  readonly body: string;
+  readonly parsedBody?: unknown;
+  constructor(status: number, body: string, message?: string);
+}
+//#endregion
+//#region src/prompts/prompt.d.ts
+/**
+ * Options for `createPrompt`.
+ */
+interface CreatePromptOptions {
+  /**
+   * Short identifier used in logs, errors, and typegen output.
+   * Defaults to `'prompt'` if not provided — give every non-trivial
+   * template a real name so error messages point to the right place.
+   */
+  name?: string;
+  /**
+   * Message role the rendered prompt produces. Defaults to `'user'`.
+   * Set to `'system'` for persona / instruction prompts.
+   */
+  role?: ChatMessage['role'];
+  /**
+   * How missing variables at render time are handled:
+   *   - `'throw'` (default): throw a descriptive error. Catches bugs
+   *     early and matches what most users expect.
+   *   - `'warn'`: leave the placeholder as-is and log a warning via
+   *     console.warn. Useful for templates with optional sections
+   *     that the caller might not fill in.
+   *   - `'silent'`: leave the placeholder as-is and don't warn.
+   */
+  onMissing?: 'throw' | 'warn' | 'silent';
+}
+/**
+ * A reusable prompt template with `{{variable}}` placeholders and
+ * a typed variables object at the render site.
+ *
+ * The type parameter `TVars` is a record of the variables the
+ * template expects. Callers pass it explicitly:
+ *
+ * ```ts
+ * const summarize = createPrompt<{ text: string; sentenceCount: number }>(
+ *   'Summarize the following in {{sentenceCount}} sentences:\n\n{{text}}',
+ *   { name: 'summarize' },
+ * )
+ *
+ * const msg = summarize.render({ text: 'Long article...', sentenceCount: 3 })
+ * // → { role: 'user', content: 'Summarize the following in 3 sentences:\n\nLong article...' }
+ * ```
+ *
+ * TypeScript catches missing or mistyped variables at compile time:
+ *
+ * ```ts
+ * summarize.render({ text: 'x' })           // ✗ missing sentenceCount
+ * summarize.render({ text: 'x', count: 3 }) // ✗ wrong key name
+ * ```
+ *
+ * @remarks
+ * Runtime-only in v0 — the type parameter is opt-in and has to be
+ * provided explicitly. Workstream 5 adds a `kick typegen` pass that
+ * scans `createPrompt` call sites and generates the TVars shape
+ * automatically, so you can write `createPrompt('...')` and get
+ * the types for free.
+ */
+declare class Prompt<TVars extends Record<string, unknown> = Record<string, unknown>> {
+  readonly name: string;
+  readonly role: ChatMessage['role'];
+  private readonly template;
+  private readonly onMissing;
+  constructor(template: string, options?: CreatePromptOptions);
+  /**
+   * Substitute variables into the template and return a
+   * ready-to-use `ChatMessage`.
+   *
+   * Placeholder syntax is `{{name}}` — double curly braces around
+   * the variable name. Whitespace inside the braces is ignored
+   * (`{{ name }}` works too). Unknown variables in the template
+   * are left as-is, so Markdown or code blocks that happen to use
+   * `{{` for their own reasons don't break.
+   *
+   * @throws If `onMissing === 'throw'` and a required variable is absent
+   */
+  render(vars: TVars): ChatMessage;
+  /**
+   * Same as `render` but returns the raw string instead of wrapping
+   * it in a `ChatMessage`. Useful for building composite messages
+   * where several templates contribute to a single string.
+   */
+  renderString(vars: TVars): string;
+  /** Return the raw template string. Useful for debugging and snapshot tests. */
+  getTemplate(): string;
+  /**
+   * Return the set of placeholder names the template references.
+   *
+   * Mostly useful for testing and for tooling that wants to show
+   * users what variables a prompt takes. Not a substitute for the
+   * compile-time type check — templates can always reference
+   * variables that aren't in TVars; this helper reads the string,
+   * not the type.
+   */
+  getPlaceholders(): string[];
+  private handleMissing;
+}
+/**
+ * Construct a reusable prompt template.
+ *
+ * Thin factory for the `Prompt` class — keeps call sites short and
+ * matches the naming convention of other kickjs-ai factories
+ * (`createToken`, etc.). Use the class form directly if you need
+ * subclassing or custom rendering logic.
+ *
+ * @example
+ * ```ts
+ * import { createPrompt } from '@forinda/kickjs-ai'
+ *
+ * const persona = createPrompt<{ name: string; tone: string }>(
+ *   'You are {{name}}, a {{tone}} assistant.',
+ *   { role: 'system', name: 'persona' },
+ * )
+ *
+ * const msg = persona.render({ name: 'Claude', tone: 'concise' })
+ * ```
+ */
+declare function createPrompt<TVars extends Record<string, unknown> = Record<string, unknown>>(template: string, options?: CreatePromptOptions): Prompt<TVars>;
+//#endregion
+//#region src/memory/in-memory.d.ts
+/**
+ * Zero-dependency in-memory chat memory.
+ *
+ * Backed by a plain array. Each instance represents ONE conversation
+ * — services that serve multiple sessions construct one instance per
+ * session, typically via a `sessionId → memory` map in a parent
+ * service or a request-scoped DI factory.
+ *
+ * Good for:
+ *   - Tests and prototypes
+ *   - Single-process CLI tools
+ *   - Short-lived request handlers that don't outlive the HTTP response
+ *
+ * Not good for:
+ *   - Multi-replica deployments (memory isn't shared across pods)
+ *   - Sessions that need to survive a restart
+ *   - Anything with a compliance retention policy
+ *
+ * For any of those, swap in a persistent backend (Drizzle, Redis,
+ * Postgres) that implements the same `ChatMemory` interface — the
+ * calling service doesn't change.
+ *
+ * @example
+ * ```ts
+ * import { InMemoryChatMemory } from '@forinda/kickjs-ai'
+ *
+ * const memory = new InMemoryChatMemory()
+ * await memory.add({ role: 'user', content: 'hello' })
+ * const history = await memory.get()
+ * ```
+ */
+declare class InMemoryChatMemory implements ChatMemory {
+  readonly name = "in-memory";
+  private messages;
+  get(): Promise<ChatMessage[]>;
+  add(message: ChatMessage | ChatMessage[]): Promise<void>;
+  clear(): Promise<void>;
+  size(): Promise<number>;
+}
+//#endregion
+//#region src/memory/sliding-window.d.ts
+/**
+ * Options for `SlidingWindowChatMemory`.
+ */
+interface SlidingWindowChatMemoryOptions {
+  /** Underlying memory to wrap. */
+  inner: ChatMemory;
+  /**
+   * Maximum number of messages to keep in the sliding window. The
+   * LAST `maxMessages` messages are retained; anything older is
+   * dropped on every `get()` call and on every `add()` that pushes
+   * the count past the cap.
+   *
+   * A typical value is 20 — enough for several user/assistant
+   * exchanges with tool call overhead, short enough to keep prompt
+   * tokens under control. Tune up or down based on model context
+   * window and cost sensitivity.
+   */
+  maxMessages: number;
+  /**
+   * Whether to treat the FIRST system message as pinned — i.e. never
+   * evict it, even when the window would otherwise cap it out.
+   *
+   * This matches the common pattern of putting a single persona /
+   * instruction prompt at the start of every conversation. Without
+   * pinning, a long session would eventually drop the system prompt
+   * and the model would lose its instructions.
+   *
+   * Defaults to `true` because forgetting the system prompt is
+   * almost never what users want.
+   */
+  pinSystemPrompt?: boolean;
+}
+/**
+ * Sliding-window memory wrapper.
+ *
+ * Wraps any `ChatMemory` implementation with a bounded history: only
+ * the most recent N messages survive. Older messages are evicted on
+ * every `get()` and after every `add()` that pushes the count past
+ * the cap. The first system message is pinned by default so long
+ * sessions don't lose their persona.
+ *
+ * Use this to keep prompt token usage predictable without writing
+ * eviction logic in every service. It composes with any backend —
+ * in-memory, Drizzle, Redis — because it only touches the inner
+ * memory through its public interface.
+ *
+ * @example
+ * ```ts
+ * import { InMemoryChatMemory, SlidingWindowChatMemory } from '@forinda/kickjs-ai'
+ *
+ * const memory = new SlidingWindowChatMemory({
+ *   inner: new InMemoryChatMemory(),
+ *   maxMessages: 20,
+ *   pinSystemPrompt: true,
+ * })
+ * ```
+ *
+ * @remarks
+ * Eviction writes back to the inner memory via `clear()` + `add()`.
+ * That's fine for in-memory backends where clearing is O(1), but
+ * costs a round-trip for network-backed stores. If you're wrapping
+ * a remote backend, consider an inner memory that supports native
+ * trimming — the wrapper's contract assumes clear+add is cheap.
+ */
+declare class SlidingWindowChatMemory implements ChatMemory {
+  readonly name: string;
+  private readonly inner;
+  private readonly maxMessages;
+  private readonly pinSystemPrompt;
+  constructor(options: SlidingWindowChatMemoryOptions);
+  get(): Promise<ChatMessage[]>;
+  add(message: ChatMessage | ChatMessage[]): Promise<void>;
+  clear(): Promise<void>;
+  size(): Promise<number>;
+  /**
+   * Apply the sliding window to an array of messages, returning the
+   * bounded view. Pure function so both `get()` and `add()` can use
+   * the same logic.
+   *
+   * When `pinSystemPrompt` is set and the first message is a system
+   * message, we keep it AND fill the remaining `maxMessages - 1`
+   * slots with the most recent messages after it. Otherwise we just
+   * take the tail of the array.
+   */
+  private applyWindow;
+}
+//#endregion
+//#region src/rag/types.d.ts
+/**
+ * RAG primitive types.
+ *
+ * The `VectorStore` interface is the contract every backend (in-memory,
+ * pgvector, Qdrant, Pinecone) implements. The framework's own `RagService`
+ * takes any `VectorStore` + an `AiProvider` and produces retrieval-
+ * augmented chat helpers, so swapping storage backends is a one-line
+ * change to the DI binding — services that consume `VECTOR_STORE` stay
+ * the same.
+ *
+ * The shapes here are deliberately minimal. Vendor-specific features
+ * (hybrid search, reranking, sparse vectors) live on the concrete
+ * implementations as extensions, not on this interface.
+ *
+ * @module @forinda/kickjs-ai/rag/types
+ */
+/**
+ * A single document stored in a vector store.
+ *
+ * The `content` field carries the original text — the vector alone
+ * isn't enough because RAG retrieval needs to feed the original text
+ * back into the LLM context. `metadata` is the escape hatch for
+ * anything the application wants to filter or track (author, date,
+ * tags, tenant ID, etc.).
+ *
+ * @typeParam M — the metadata shape; defaults to a loose record so
+ * users don't need to parameterize the type unless they want the
+ * extra rigor.
+ */
+interface VectorDocument<M extends Record<string, unknown> = Record<string, unknown>> {
+  /** Unique identifier — repeated upsert with the same id replaces the previous version. */
+  id: string;
+  /** Original text the vector was computed from. */
+  content: string;
+  /** Dense embedding. Length must match the store's configured dimensions. */
+  vector: number[];
+  /** Optional arbitrary metadata used for filtering and display. */
+  metadata?: M;
+}
+/**
+ * A single search result from `VectorStore.query`.
+ *
+ * `score` is normalized across backends: higher = more similar.
+ * Cosine similarity returns values in [-1, 1]; most backends clamp to
+ * [0, 1] for usability. Services should treat the number as a
+ * monotonic rank, not an absolute probability.
+ */
+interface VectorSearchHit<M extends Record<string, unknown> = Record<string, unknown>> {
+  id: string;
+  content: string;
+  score: number;
+  metadata?: M;
+}
+/**
+ * Options for `VectorStore.query`.
+ *
+ * `filter` is an equality map against `metadata` — backends that
+ * support richer predicates (range, $in, $not) should accept them
+ * here as well, using the MongoDB-style operator prefix convention.
+ * The in-memory store implements equality only, which is enough for
+ * most v0 use cases.
+ */
+interface VectorQueryOptions {
+  /** The embedding of the query text. */
+  vector: number[];
+  /** Maximum number of hits to return. Defaults to 5. */
+  topK?: number;
+  /** Metadata equality filter. Hits whose metadata doesn't match are dropped. */
+  filter?: Record<string, unknown>;
+  /** Drop hits whose score falls below this threshold. */
+  minScore?: number;
+}
+/**
+ * Vector store contract. Backends:
+ *   - `InMemoryVectorStore` — in-package, zero deps, perfect for tests
+ *     and prototypes; up to a few thousand docs before linear scan hurts
+ *   - pgvector — runs inside any Postgres 13+ KickJS project (follow-up commit)
+ *   - Qdrant — dedicated vector DB with payload filtering (follow-up commit)
+ *   - Pinecone — managed cloud service (follow-up commit)
+ *
+ * Implementations must honor two contracts: upserts are idempotent on
+ * id, and query results are ordered by descending score.
+ */
+interface VectorStore<M extends Record<string, unknown> = Record<string, unknown>> {
+  /** Short identifier for logs, e.g. `'in-memory'`, `'pgvector'`. */
+  readonly name: string;
+  /**
+   * Insert or replace one or more documents. Re-upserting an existing
+   * id overwrites its vector, content, and metadata.
+   */
+  upsert(doc: VectorDocument<M> | VectorDocument<M>[]): Promise<void>;
+  /**
+   * Search for the nearest vectors. Results are ordered by descending
+   * score, capped at `options.topK` (default 5), and filtered by
+   * `options.filter` / `options.minScore` if provided.
+   */
+  query(options: VectorQueryOptions): Promise<VectorSearchHit<M>[]>;
+  /** Remove documents by id. Missing ids are silently ignored. */
+  delete(id: string | string[]): Promise<void>;
+  /** Clear every document from the store. Mostly for tests and admin tools. */
+  deleteAll(): Promise<void>;
+  /** Optional count — not every backend supports it cheaply. */
+  count?(): Promise<number>;
+}
+/** Input to `RagService.index`. */
+interface RagIndexInput<M extends Record<string, unknown> = Record<string, unknown>> {
+  id: string;
+  content: string;
+  metadata?: M;
+}
+/** Options for `RagService.search` / `RagService.augmentChatInput`. */
+interface RagSearchOptions {
+  /** Maximum number of documents to retrieve. Defaults to 5. */
+  topK?: number;
+  /** Metadata equality filter forwarded to the underlying store. */
+  filter?: Record<string, unknown>;
+  /** Drop hits whose similarity score falls below this threshold. */
+  minScore?: number;
+}
+/** Options for `RagService.augmentChatInput`. */
+interface RagAugmentOptions extends RagSearchOptions {
+  /**
+   * Template for the retrieved-context system message. `{documents}`
+   * is replaced with the concatenated document contents. If omitted,
+   * a sensible default is used.
+   */
+  systemTemplate?: string;
+  /**
+   * When true, prepend the context as a NEW system message. When false
+   * (the default), merge into the first existing system message or
+   * prepend if none exists. The merge path avoids producing chat
+   * histories with two competing system prompts, which confuses models.
+   */
+  asSeparateSystemMessage?: boolean;
+}
+//#endregion
+//#region src/rag/in-memory.d.ts
+/**
+ * Zero-dependency in-memory vector store.
+ *
+ * Backed by a plain `Map<string, VectorDocument>` with a linear-scan
+ * cosine-similarity search. Perfect for tests, prototypes, CLI tools,
+ * and any project with a bounded corpus (roughly < 10k documents
+ * before the scan starts taking more than a handful of milliseconds).
+ *
+ * For production workloads with larger corpora, swap in the pgvector,
+ * Qdrant, or Pinecone store — the `VectorStore` interface is the same,
+ * so services that consume `VECTOR_STORE` don't need to change.
+ *
+ * @example
+ * ```ts
+ * import { InMemoryVectorStore, VECTOR_STORE } from '@forinda/kickjs-ai'
+ *
+ * container.registerInstance(VECTOR_STORE, new InMemoryVectorStore())
+ * ```
+ *
+ * The class is entirely synchronous under the hood but wraps each
+ * method in a Promise so it matches the async interface every other
+ * backend implements. This keeps the calling code uniform regardless
+ * of which backend is wired in.
+ */
+declare class InMemoryVectorStore<M extends Record<string, unknown> = Record<string, unknown>> implements VectorStore<M> {
+  readonly name = "in-memory";
+  private readonly docs;
+  upsert(doc: VectorDocument<M> | VectorDocument<M>[]): Promise<void>;
+  query(options: VectorQueryOptions): Promise<VectorSearchHit<M>[]>;
+  delete(id: string | string[]): Promise<void>;
+  deleteAll(): Promise<void>;
+  count(): Promise<number>;
+}
+/**
+ * Cosine similarity between two vectors. Returns a value in [-1, 1]
+ * where 1 means identical direction, 0 means orthogonal, -1 means
+ * opposite. The function is symmetric and scale-invariant.
+ *
+ * Returns 0 for length mismatches or zero-magnitude vectors rather
+ * than throwing — callers get a useless hit they can filter out via
+ * `minScore`, but the store doesn't crash on bad input.
+ */
+declare function cosineSimilarity(a: number[], b: number[]): number;
+//#endregion
+//#region src/rag/pgvector.d.ts
+/**
+ * Minimal SQL executor contract.
+ *
+ * Covers everything `PgVectorStore` needs from a Postgres client: a
+ * single `query(text, params)` call that returns rows. The shape is
+ * deliberately narrower than node-postgres's `Pool.query` so it can
+ * be satisfied by any of:
+ *
+ *   - `pg.Pool` / `pg.Client` (node-postgres)
+ *   - `drizzle.$client` (the underlying pool on the Drizzle adapter)
+ *   - `postgres.js` (by @porsager, via a small adapter)
+ *   - A unit-test fake that records calls
+ *
+ * Users who already have a Postgres connection somewhere in their
+ * app can hand it to the store without installing `pg` twice.
+ */
+interface SqlExecutor {
+  query<T = unknown>(text: string, params?: unknown[]): Promise<{
+    rows: T[];
+  }>;
+}
+/**
+ * Options for `PgVectorStore`.
+ *
+ * Exactly one of `client` or `connectionString` must be provided. If
+ * `connectionString` is set, the store dynamically imports `pg` on
+ * first use and creates its own pool; `pg` must be installed as a
+ * peer dep in that case. If `client` is set, the store uses the
+ * supplied executor and never touches `pg` directly.
+ */
+interface PgVectorStoreOptions {
+  /** Pre-made SQL executor — any object with a `query(text, params)` method. */
+  client?: SqlExecutor;
+  /** Connection string used to create a pg.Pool if `client` is not provided. */
+  connectionString?: string;
+  /** Vector dimensionality. Must match the embedding model. Required. */
+  dimensions: number;
+  /** Postgres schema. Defaults to `'public'`. */
+  schema?: string;
+  /** Table name. Defaults to `'kickjs_embeddings'`. */
+  table?: string;
+  /**
+   * Skip the first-use schema bootstrap (`CREATE EXTENSION IF NOT
+   * EXISTS vector; CREATE TABLE IF NOT EXISTS ...`). Set this to true
+   * if you manage migrations manually or run in a read-only role.
+   */
+  skipSetup?: boolean;
+  /**
+   * Provider name to expose on `store.name`. Defaults to `'pgvector'`
+   * but can be overridden to label a Postgres-compatible backend
+   * (e.g. `'timescale'`, `'cockroach-vector'`).
+   */
+  name?: string;
+}
+/**
+ * pgvector-backed `VectorStore` implementation.
+ *
+ * Stores documents in a single table with a `vector` column indexed
+ * via pgvector's native operators. Cosine similarity is the scoring
+ * metric — computed as `1 - (vector <=> query_vector)` because the
+ * `<=>` operator returns cosine DISTANCE, not similarity.
+ *
+ * ### Lazy initialization
+ *
+ * The Postgres pool and schema are set up on first use, not in the
+ * constructor. That keeps the constructor synchronous, matches the
+ * rest of the `VectorStore` implementations, and lets users construct
+ * the store inside a module's `register(container)` method without
+ * awaiting inside DI resolution.
+ *
+ * ### Schema
+ *
+ * The default schema is:
+ *
+ * ```sql
+ * CREATE EXTENSION IF NOT EXISTS vector;
+ * CREATE TABLE IF NOT EXISTS <schema>.<table> (
+ *   id TEXT PRIMARY KEY,
+ *   content TEXT NOT NULL,
+ *   vector vector(<dimensions>) NOT NULL,
+ *   metadata JSONB
+ * );
+ * ```
+ *
+ * No index is created by default — pgvector's IVFFlat and HNSW
+ * indexes benefit from being created AFTER data is loaded, and the
+ * right choice depends on corpus size. Users should add an index
+ * themselves in a real migration when they're ready:
+ *
+ * ```sql
+ * CREATE INDEX ON kickjs_embeddings
+ *   USING hnsw (vector vector_cosine_ops);
+ * ```
+ *
+ * ### Metadata filtering
+ *
+ * Filters are translated to JSONB WHERE clauses:
+ *   - Scalar: `metadata->>'key' = $N` (coerced to text)
+ *   - Array:  `metadata->>'key' = ANY($N::text[])`
+ *
+ * Keys are validated against `[a-zA-Z0-9_.-]+` before being
+ * interpolated into SQL — anything else throws. Values go through
+ * parameter binding, so SQL injection via values is not possible.
+ *
+ * @example
+ * ```ts
+ * import { Pool } from 'pg'
+ * import { getEnv } from '@forinda/kickjs'
+ * import { AiAdapter, PgVectorStore, VECTOR_STORE } from '@forinda/kickjs-ai'
+ *
+ * const pool = new Pool({ connectionString: getEnv('DATABASE_URL') })
+ * const store = new PgVectorStore({ client: pool, dimensions: 1536 })
+ *
+ * export const app = await bootstrap({
+ *   modules,
+ *   adapters: [new AiAdapter({ provider })],
+ *   plugins: [
+ *     {
+ *       name: 'pgvector',
+ *       register: (container) => {
+ *         container.registerInstance(VECTOR_STORE, store)
+ *       },
+ *     },
+ *   ],
+ * })
+ * ```
+ */
+declare class PgVectorStore<M extends Record<string, unknown> = Record<string, unknown>> implements VectorStore<M> {
+  readonly name: string;
+  private readonly dimensions;
+  private readonly schema;
+  private readonly table;
+  private readonly fullyQualified;
+  private readonly skipSetup;
+  private client;
+  private readonly connectionString;
+  private setupPromise;
+  constructor(options: PgVectorStoreOptions);
+  upsert(doc: VectorDocument<M> | VectorDocument<M>[]): Promise<void>;
+  query(options: VectorQueryOptions): Promise<VectorSearchHit<M>[]>;
+  delete(id: string | string[]): Promise<void>;
+  deleteAll(): Promise<void>;
+  count(): Promise<number>;
+  /**
+   * Release the internal connection pool, if the store created one.
+   *
+   * If the caller supplied their own `client`, this is a no-op —
+   * lifecycle of a user-owned pool stays with the user. This method
+   * is intentionally not on the `VectorStore` interface because most
+   * backends don't need explicit teardown; services that want to
+   * clean up call it via an adapter.shutdown hook.
+   */
+  close(): Promise<void>;
+  /**
+   * Ensure the pool exists and the schema is set up. Called by every
+   * public method before running any SQL. The setup migration runs
+   * at most once per store instance — subsequent calls reuse the
+   * cached promise.
+   */
+  private ensureReady;
+  /**
+   * Dynamically import `pg` and create a Pool from the configured
+   * connection string. Imported lazily so users who supply their own
+   * `client` never force `pg` to be installed.
+   *
+   * Throws a friendly error if `pg` is not installed — the same
+   * graceful-degradation pattern the CLI uses for optional packages.
+   */
+  private createPoolFromConnectionString;
+  /**
+   * Run the schema bootstrap: enable the pgvector extension, create
+   * the embeddings table if it doesn't exist, and nothing else.
+   *
+   * Indexes are deliberately not created here — pgvector's IVFFlat
+   * and HNSW indexes perform best when created after data is loaded,
+   * and the right choice depends on corpus size. Users should add
+   * their index in a real migration when they're ready.
+   */
+  private runSchemaSetup;
+}
+/**
+ * Serialize a JS number array to pgvector's wire format: a string
+ * like `'[0.1,0.2,0.3]'`. The `pg` driver doesn't know about vectors
+ * so we have to stringify ourselves and cast with `::vector` in the
+ * SQL. Non-finite values become `0` rather than `null` or `NaN` —
+ * pgvector rejects non-finite values in inserts.
+ */
+declare function toPgVector(vector: number[]): string;
+/**
+ * Translate a metadata filter into a WHERE clause + bound parameters.
+ *
+ * - Scalar values become `metadata->>'key' = $N`
+ * - Array values become `metadata->>'key' = ANY($N::text[])`
+ *
+ * Keys must match `[a-zA-Z0-9_.-]+` — anything else is rejected. All
+ * values are coerced to string before binding, because `->>` returns
+ * text. Callers that need numeric range queries should issue raw SQL
+ * via their own executor; this helper covers the equality-case 90%.
+ *
+ * Exported for unit testing.
+ */
+declare function buildWhereClause(filter: Record<string, unknown> | undefined, startAt: number): {
+  whereSql: string;
+  whereParams: unknown[];
+};
+//#endregion
+//#region src/rag/qdrant.d.ts
+/**
+ * Options for `QdrantVectorStore`.
+ *
+ * Qdrant exposes a REST API under `/collections/{name}` — this store
+ * talks to it directly with `fetch`, so no client SDK is needed. A
+ * bearer `apiKey` is optional because self-hosted Qdrant instances
+ * often run without auth; managed Qdrant Cloud always requires one.
+ */
+interface QdrantVectorStoreOptions {
+  /** Base URL of the Qdrant HTTP API. Defaults to `http://localhost:6333`. */
+  url?: string;
+  /** API key sent as `api-key` header. Optional for local/self-hosted. */
+  apiKey?: string;
+  /** Collection name. Required — Qdrant does not have a default collection. */
+  collection: string;
+  /** Vector dimensionality. Must match the embedding model. Required. */
+  dimensions: number;
+  /**
+   * Distance metric for the collection on first create. Qdrant supports
+   * `Cosine`, `Dot`, `Euclid`, and `Manhattan`. Defaults to `'Cosine'`
+   * since that's what every OpenAI/Anthropic-compatible embedding
+   * model ships.
+   */
+  distance?: 'Cosine' | 'Dot' | 'Euclid' | 'Manhattan';
+  /**
+   * Skip the first-use collection bootstrap. Turn this on if the
+   * collection is managed by your infra team or provisioned via
+   * Terraform, and the runtime role doesn't have create permission.
+   */
+  skipSetup?: boolean;
+  /** Provider name override. Defaults to `'qdrant'`. */
+  name?: string;
+}
+/**
+ * Qdrant-backed `VectorStore` implementation.
+ *
+ * Qdrant stores vectors as "points" inside a named "collection". Each
+ * point has an id, a dense vector, and an arbitrary JSON "payload" —
+ * the store uses the payload to carry both the original `content`
+ * string (so RAG retrieval can feed text back to the LLM) and the
+ * `metadata` record.
+ *
+ * ### Filtering
+ *
+ * The framework's equality-map filter (`{ key: value }` or
+ * `{ key: [v1, v2] }`) is translated into Qdrant's `filter.must`
+ * conditions against `payload.metadata.<key>`. Scalar values become
+ * `match: { value }`, arrays become `match: { any: [...] }`. Users
+ * who need richer queries (nested, range, should/must_not) can bypass
+ * this by extending the class, but equality covers the 90% case.
+ *
+ * ### Lazy collection creation
+ *
+ * On first write, the store calls `PUT /collections/{name}` with
+ * `vectors: { size, distance }` — idempotent, so it's safe to run on
+ * every boot. Pass `skipSetup: true` if your cluster is provisioned
+ * externally and the runtime API key doesn't have create permission.
+ *
+ * @example
+ * ```ts
+ * import { bootstrap, getEnv } from '@forinda/kickjs'
+ * import { AiAdapter, QdrantVectorStore, VECTOR_STORE } from '@forinda/kickjs-ai'
+ *
+ * const store = new QdrantVectorStore({
+ *   url: getEnv('QDRANT_URL'),
+ *   apiKey: getEnv('QDRANT_API_KEY'),
+ *   collection: 'docs',
+ *   dimensions: 1536,
+ * })
+ *
+ * export const app = await bootstrap({
+ *   modules,
+ *   adapters: [new AiAdapter({ provider })],
+ *   plugins: [
+ *     {
+ *       name: 'qdrant',
+ *       register: (container) => {
+ *         container.registerInstance(VECTOR_STORE, store)
+ *       },
+ *     },
+ *   ],
+ * })
+ * ```
+ */
+declare class QdrantVectorStore<M extends Record<string, unknown> = Record<string, unknown>> implements VectorStore<M> {
+  readonly name: string;
+  private readonly url;
+  private readonly collection;
+  private readonly dimensions;
+  private readonly distance;
+  private readonly headers;
+  private readonly skipSetup;
+  /**
+   * Cached bootstrap promise. The first method call triggers collection
+   * creation; every subsequent call awaits the same promise so the
+   * check happens exactly once per process. On failure we clear the
+   * cache so the next call can retry (networks blink, DNS flaps).
+   */
+  private setupPromise;
+  constructor(options: QdrantVectorStoreOptions);
+  upsert(doc: VectorDocument<M> | VectorDocument<M>[]): Promise<void>;
+  query(options: VectorQueryOptions): Promise<VectorSearchHit<M>[]>;
+  delete(id: string | string[]): Promise<void>;
+  deleteAll(): Promise<void>;
+  count(): Promise<number>;
+  /**
+   * Thin wrapper around `fetch` that applies the shared headers, JSON
+   * encodes the body, and maps non-2xx responses to `Error` instances
+   * with the response body attached for debugging. Matches the shape
+   * used by `providers/base.ts`, kept local here so the RAG module has
+   * no dependency on the provider internals.
+   */
+  private request;
+  /**
+   * Create the collection on first use. The `PUT /collections/{name}`
+   * endpoint is idempotent — calling it on an existing collection is a
+   * no-op with status 200. We cache the promise so concurrent callers
+   * share the same in-flight request and every subsequent call resolves
+   * immediately.
+   */
+  private ensureCollection;
+  private runSetup;
+}
+/**
+ * Translate the framework's equality-map filter into Qdrant's
+ * `must` condition format.
+ *
+ * Scalars become `{ key, match: { value } }`. Arrays become
+ * `{ key, match: { any: [...] } }`. Keys are interpreted as paths into
+ * `payload.metadata`, matching how `upsert` nests the metadata record.
+ *
+ * Exported so tests (and future richer filter builders) can verify the
+ * translation without going through a live Qdrant instance.
+ */
+declare function buildQdrantFilter(filter: Record<string, unknown>): {
+  must: Array<Record<string, unknown>>;
+};
+//#endregion
+//#region src/rag/pinecone.d.ts
+/**
+ * Options for `PineconeVectorStore`.
+ *
+ * Unlike Qdrant, Pinecone does not have a "create collection on first
+ * use" endpoint that's cheap to call — the index must be provisioned
+ * separately (via the Pinecone dashboard, API, or Terraform) before
+ * the store can use it. Every Pinecone index has its own hostname,
+ * which the SDK normally looks up; this store requires the caller to
+ * pass it directly via `indexHost` so there's zero runtime dependency
+ * on the Pinecone client.
+ */
+interface PineconeVectorStoreOptions {
+  /** Required API key, sent as `Api-Key` header. */
+  apiKey: string;
+  /**
+   * Fully qualified hostname for the Pinecone index, e.g.
+   * `my-index-abcdef1.svc.us-east-1-aws.pinecone.io`. Find it in
+   * the Pinecone dashboard or via the `describe_index` API. The
+   * scheme is optional — the store adds `https://` if it's missing.
+   */
+  indexHost: string;
+  /**
+   * Namespace for all operations. Pinecone partitions indexes with
+   * namespaces; omitting this uses the default (empty) namespace.
+   * Most multi-tenant apps use one namespace per tenant.
+   */
+  namespace?: string;
+  /** Vector dimensionality. Required — used to validate upsert shapes. */
+  dimensions: number;
+  /** Provider name override. Defaults to `'pinecone'`. */
+  name?: string;
+}
+/**
+ * Pinecone-backed `VectorStore` implementation.
+ *
+ * Pinecone stores vectors with a flat id, a dense vector, and an
+ * arbitrary metadata object. Like Qdrant the store uses metadata to
+ * carry both the original `content` (for RAG retrieval) and the
+ * application's own metadata fields — they're merged into one
+ * Pinecone metadata record at write time and split back apart at
+ * read time.
+ *
+ * ### Filtering
+ *
+ * Pinecone has a native filter DSL that looks almost identical to
+ * MongoDB's — `{ key: { $eq: value } }`, `{ key: { $in: [...] } }`,
+ * etc. The framework's equality-map filter is translated directly:
+ * scalars become `$eq` and arrays become `$in`. Users who need the
+ * full DSL (range, $ne, $or) can pass a raw Pinecone filter through
+ * the same `filter` field — the translator is a no-op when the keys
+ * start with `$`, so advanced filters pass through unchanged.
+ *
+ * ### Index provisioning
+ *
+ * Pinecone indexes must be created out-of-band. This store does NOT
+ * provision indexes automatically — the dimensionality, metric, and
+ * pod type are infrastructure decisions that should live in
+ * Terraform or the Pinecone dashboard, not in runtime code.
+ *
+ * @example
+ * ```ts
+ * import { bootstrap, getEnv } from '@forinda/kickjs'
+ * import { AiAdapter, PineconeVectorStore, VECTOR_STORE } from '@forinda/kickjs-ai'
+ *
+ * const store = new PineconeVectorStore({
+ *   apiKey: getEnv('PINECONE_API_KEY'),
+ *   indexHost: getEnv('PINECONE_INDEX_HOST'),
+ *   dimensions: 1536,
+ *   namespace: 'docs',
+ * })
+ *
+ * export const app = await bootstrap({
+ *   modules,
+ *   adapters: [new AiAdapter({ provider })],
+ *   plugins: [
+ *     {
+ *       name: 'pinecone',
+ *       register: (container) => {
+ *         container.registerInstance(VECTOR_STORE, store)
+ *       },
+ *     },
+ *   ],
+ * })
+ * ```
+ */
+declare class PineconeVectorStore<M extends Record<string, unknown> = Record<string, unknown>> implements VectorStore<M> {
+  readonly name: string;
+  private readonly baseURL;
+  private readonly namespace;
+  private readonly dimensions;
+  private readonly headers;
+  constructor(options: PineconeVectorStoreOptions);
+  upsert(doc: VectorDocument<M> | VectorDocument<M>[]): Promise<void>;
+  query(options: VectorQueryOptions): Promise<VectorSearchHit<M>[]>;
+  delete(id: string | string[]): Promise<void>;
+  deleteAll(): Promise<void>;
+  count(): Promise<number>;
+  /**
+   * POST a JSON body to the Pinecone data-plane and return the parsed
+   * JSON response. Every Pinecone data-plane endpoint uses POST even
+   * for reads (`/query`, `/describe_index_stats`), so the helper
+   * doesn't bother parameterizing the method.
+   */
+  private request;
+}
+/**
+ * Translate the framework's equality-map filter into Pinecone's
+ * MongoDB-style filter DSL.
+ *
+ * Rules:
+ *   - Scalar value           → `{ key: { $eq: value } }`
+ *   - Array value            → `{ key: { $in: [...] } }`
+ *   - Key that starts with $ → passed through untouched, letting
+ *     callers hand-craft `{ $or: [...] }` or range conditions
+ *     without the translator mangling them
+ *   - Value already shaped like `{ $eq, $in, $gt, ... }` → passed
+ *     through untouched for the same reason
+ *
+ * Exported so tests can verify the translation offline.
+ */
+declare function buildPineconeFilter(filter: Record<string, unknown>): Record<string, unknown>;
+//#endregion
+//#region src/rag/rag-service.d.ts
+/**
+ * High-level RAG helper that ties an `AiProvider` (for embeddings)
+ * to a `VectorStore` (for retrieval) and produces the three operations
+ * every RAG-powered service needs: index documents, search by query,
+ * and augment a chat input with retrieved context.
+ *
+ * The service itself is a thin orchestrator — all the storage and
+ * model calls go through the injected interfaces, so swapping
+ * backends (in-memory → pgvector, OpenAI → Ollama) is a DI binding
+ * change, not a code change.
+ *
+ * @example
+ * ```ts
+ * import { Service, Autowired, Inject } from '@forinda/kickjs'
+ * import { AI_PROVIDER, VECTOR_STORE, RagService } from '@forinda/kickjs-ai'
+ * import type { AiProvider, VectorStore } from '@forinda/kickjs-ai'
+ *
+ * @Service()
+ * class DocsService {
+ *   private readonly rag: RagService
+ *
+ *   constructor(
+ *     @Inject(AI_PROVIDER) provider: AiProvider,
+ *     @Inject(VECTOR_STORE) store: VectorStore,
+ *   ) {
+ *     this.rag = new RagService(provider, store)
+ *   }
+ *
+ *   async ingest(articles: Array<{ id: string; body: string }>) {
+ *     await this.rag.index(articles.map((a) => ({ id: a.id, content: a.body })))
+ *   }
+ *
+ *   async ask(question: string) {
+ *     const input = await this.rag.augmentChatInput(
+ *       { messages: [{ role: 'user', content: question }] },
+ *       question,
+ *       { topK: 3 },
+ *     )
+ *     const res = await provider.chat(input)
+ *     return res.content
+ *   }
+ * }
+ * ```
+ */
+declare class RagService<M extends Record<string, unknown> = Record<string, unknown>> {
+  private readonly provider;
+  private readonly store;
+  constructor(provider: AiProvider, store: VectorStore<M>);
+  /** Underlying provider — exposed for services that want to reuse it for chat. */
+  getProvider(): AiProvider;
+  /** Underlying store — useful for admin tools that want raw access. */
+  getStore(): VectorStore<M>;
+  /**
+   * Index a batch of documents: embed each one's content via the
+   * provider, then upsert into the store. Embedding happens in a
+   * single batched call, which is both faster and cheaper than one
+   * call per document for most providers.
+   *
+   * Documents with empty content are skipped rather than failing the
+   * whole batch — the store can't meaningfully retrieve empty strings
+   * and silently dropping them matches what users usually expect when
+   * a content field turns out to be blank.
+   */
+  index(docs: RagIndexInput<M>[]): Promise<void>;
+  /**
+   * Search the store for documents relevant to a natural-language
+   * query. Embeds the query once, then delegates to the store's
+   * `query` method with the resolved vector.
+   */
+  search(query: string, options?: RagSearchOptions): Promise<VectorSearchHit<M>[]>;
+  /**
+   * Retrieve relevant documents for a query and inject them into a
+   * `ChatInput` as a system message. Returns a new input — the
+   * original is not mutated.
+   *
+   * Two injection modes:
+   *   - Merge (default): prepend the context to the first existing
+   *     system message if one exists, otherwise add a new one. Avoids
+   *     producing chat histories with competing system prompts.
+   *   - Separate (`asSeparateSystemMessage: true`): always insert a
+   *     new system message at the start. Useful when the existing
+   *     system prompt is small and you want to keep roles distinct.
+   *
+   * If no documents are retrieved, the input is returned unchanged.
+   */
+  augmentChatInput(input: ChatInput, query: string, options?: RagAugmentOptions): Promise<ChatInput>;
+}
+//#endregion
+export { AI_PROVIDER, AI_TOOL_METADATA, AiAdapter, type AiAdapterOptions, type AiProvider, AiTool, type AiToolDefinition, type AiToolOptions, AnthropicProvider, type AnthropicProviderOptions, type ChatChunk, type ChatInput, type ChatMemory, type ChatMessage, type ChatOptions, type ChatResponse, type ChatToolDefinition, type CreatePromptOptions, type EmbedInput, InMemoryChatMemory, InMemoryVectorStore, OpenAIProvider, type OpenAIProviderOptions, PgVectorStore, type PgVectorStoreOptions, PineconeVectorStore, type PineconeVectorStoreOptions, Prompt, ProviderError, QdrantVectorStore, type QdrantVectorStoreOptions, type RagAugmentOptions, type RagIndexInput, type RagSearchOptions, RagService, type RunAgentOptions, type RunAgentResult, type RunAgentWithMemoryOptions, SlidingWindowChatMemory, type SlidingWindowChatMemoryOptions, type SqlExecutor, type ToolCallInput, type ToolCallResponse, VECTOR_STORE, type VectorDocument, type VectorQueryOptions, type VectorSearchHit, type VectorStore, buildPineconeFilter, buildQdrantFilter, buildWhereClause, cosineSimilarity, createPrompt, getAiToolMeta, isAiTool, toPgVector };
+//# sourceMappingURL=index.d.mts.map