npm - @forinda/kickjs-ai - Versions diffs - 2.3.0 - Mend

@forinda/kickjs-ai 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.mjs ADDED Viewed

@@ -0,0 +1,2474 @@
+/**
+ * @forinda/kickjs-ai v2.3.0
+ *
+ * Copyright (c) Felix Orinda
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ *
+ * @license MIT
+ */
+import { Logger, METADATA, Scope, createToken, getClassMeta, getMethodMetaOrUndefined, setMethodMeta } from "@forinda/kickjs";
+//#region src/constants.ts
+/**
+* Metadata key for the `@AiTool` decorator.
+*
+* Using `createToken` for metadata keys (rather than a raw `Symbol`)
+* gives a collision-safe, type-carrying identifier: the phantom type
+* parameter flows through `getMethodMetaOrUndefined` so consumers get
+* `AiToolOptions` back without a manual cast, and reference-equality
+* guarantees that two separate definitions can never shadow each other
+* even if the package is loaded more than once.
+*/
+const AI_TOOL_METADATA = createToken("kickjs.ai.tool");
+/**
+* DI token for the active AI provider.
+*
+* Injected via `@Inject(AI_PROVIDER)` in services or use-cases that
+* need to call an LLM. The adapter registers the concrete provider
+* (OpenAI, Anthropic, Google, Ollama) during `beforeStart`.
+*
+* @example
+* ```ts
+* @Service()
+* export class SummarizeService {
+*   constructor(@Inject(AI_PROVIDER) private ai: AiProvider) {}
+*
+*   async summarize(text: string) {
+*     const res = await this.ai.chat({
+*       messages: [
+*         { role: 'system', content: 'Summarize in 2 sentences.' },
+*         { role: 'user', content: text },
+*       ],
+*     })
+*     return res.content
+*   }
+* }
+* ```
+*/
+const AI_PROVIDER = createToken("kickjs.ai.provider");
+/**
+* DI token for the active vector store backend.
+*
+* Injected via `@Inject(VECTOR_STORE)` in services that need
+* retrieval-augmented generation. The adapter does not register a
+* default — users bind the backend they want at bootstrap time,
+* typically `InMemoryVectorStore` for development/tests and
+* `PgVectorStore` / `QdrantStore` / `PineconeStore` for production.
+*
+* @example
+* ```ts
+* import { bootstrap, getEnv } from '@forinda/kickjs'
+* import { AiAdapter, InMemoryVectorStore, VECTOR_STORE } from '@forinda/kickjs-ai'
+*
+* export const app = await bootstrap({
+*   modules,
+*   adapters: [
+*     new AiAdapter({
+*       provider: new OpenAIProvider({ apiKey: getEnv('OPENAI_API_KEY') }),
+*     }),
+*   ],
+*   plugins: [
+*     {
+*       name: 'vector-store',
+*       register: (container) => {
+*         container.registerInstance(VECTOR_STORE, new InMemoryVectorStore())
+*       },
+*     },
+*   ],
+* })
+* ```
+*/
+const VECTOR_STORE = createToken("kickjs.ai.vector_store");
+//#endregion
+//#region src/decorators.ts
+/**
+* Mark a controller method as an AI-callable tool.
+*
+* At startup, the `AiAdapter` scans all `@Controller` classes in the
+* DI container for this decorator and builds a tool registry. When a
+* service calls `ai.chat({ ..., tools: 'auto' })`, the framework
+* passes the registered tools to the provider, the model may call
+* them, and the framework dispatches back through the normal Express
+* pipeline — so tool calls go through auth, validation, and logging
+* just like external HTTP requests.
+*
+* The input schema is derived from the route's `body` Zod schema:
+*
+* @example
+* ```ts
+* import { Controller, Post, type Ctx } from '@forinda/kickjs'
+* import { AiTool } from '@forinda/kickjs-ai'
+* import { createTaskSchema } from './dtos/create-task.dto'
+*
+* @Controller('/tasks')
+* export class TaskController {
+*   @Post('/', { body: createTaskSchema, name: 'CreateTask' })
+*   @AiTool({ description: 'Create a new task' })
+*   create(ctx: Ctx<KickRoutes.TaskController['create']>) {
+*     return this.createTaskUseCase.execute(ctx.body)
+*   }
+* }
+* ```
+*/
+function AiTool(options) {
+	return (target, propertyKey) => {
+		setMethodMeta(AI_TOOL_METADATA, options, target, propertyKey);
+	};
+}
+/** Read the AI tool metadata attached to a method, if any. */
+function getAiToolMeta(target, method) {
+	return getMethodMetaOrUndefined(AI_TOOL_METADATA, target, method);
+}
+/** Check whether a method was decorated with `@AiTool`. */
+function isAiTool(target, method) {
+	return getAiToolMeta(target, method) !== void 0;
+}
+//#endregion
+//#region src/zod-to-json-schema.ts
+/**
+* Minimal Zod v4+ schema parser.
+*
+* Mirrors the helper in `@forinda/kickjs-mcp` and the `zodSchemaParser`
+* in `@forinda/kickjs-swagger`. Zod v4 ships with a native
+* `.toJSONSchema()` instance method, so this is a type guard + a call.
+*
+* Kept in-package so the AI adapter has no cross-package dependency
+* on MCP or Swagger. If KickJS ever extracts a shared
+* `@forinda/kickjs-schema` utility, all three packages can switch
+* to it in one PR.
+*/
+/**
+* Check whether a value looks like a Zod v4+ schema.
+*
+* Uses structural duck-typing: the object has `safeParse` (all Zod
+* versions) AND `toJSONSchema` (Zod v4+). This avoids importing Zod
+* as a value, which would force it to become a runtime dep.
+*/
+function isZodSchema(schema) {
+	return schema != null && typeof schema === "object" && typeof schema.safeParse === "function" && typeof schema.toJSONSchema === "function";
+}
+/**
+* Convert a Zod v4+ schema to a JSON Schema object, stripping the
+* top-level `$schema` key so the output can be embedded inside an
+* AI tool definition directly.
+*
+* Returns `null` if the input doesn't look like a Zod schema. Callers
+* should fall back to an empty-object input schema in that case.
+*/
+function zodToJsonSchema(schema) {
+	if (!isZodSchema(schema)) return null;
+	const { $schema: _ignored, ...rest } = schema.toJSONSchema();
+	return rest;
+}
+//#endregion
+//#region src/ai.adapter.ts
+const log = Logger.for("AiAdapter");
+/**
+* Register an AI provider in the DI container, discover every
+* `@AiTool`-decorated controller method, and run agent loops that
+* dispatch tool calls through the Express pipeline.
+*
+* The adapter plays the same role for AI as the MCP adapter plays for
+* external clients: it's the glue between the framework's metadata
+* (Zod schemas, route decorators, DI container) and a runtime that
+* can actually call LLMs and execute tools. Both adapters reuse the
+* framework's `onRouteMount` hook to discover tools at startup.
+*
+* @example
+* ```ts
+* import { bootstrap, getEnv } from '@forinda/kickjs'
+* import { AiAdapter, OpenAIProvider } from '@forinda/kickjs-ai'
+*
+* export const app = await bootstrap({
+*   modules,
+*   adapters: [
+*     new AiAdapter({
+*       provider: new OpenAIProvider({ apiKey: getEnv('OPENAI_API_KEY') }),
+*     }),
+*   ],
+* })
+* ```
+*
+* Then in any service:
+*
+* ```ts
+* @Service()
+* class AgentService {
+*   @Autowired() private readonly ai!: AiAdapter
+*
+*   async handleQuery(userPrompt: string) {
+*     const result = await this.ai.runAgent({
+*       messages: [
+*         { role: 'system', content: 'You can create tasks via tools.' },
+*         { role: 'user', content: userPrompt },
+*       ],
+*       tools: 'auto',  // use every @AiTool-decorated method
+*     })
+*     return result.content
+*   }
+* }
+* ```
+*/
+var AiAdapter = class AiAdapter {
+	name = "AiAdapter";
+	provider;
+	/** Controllers collected during the mount phase, in insertion order. */
+	mountedControllers = [];
+	/** Tool definitions built during `beforeStart` from `@AiTool` metadata. */
+	tools = [];
+	/**
+	* Base URL of the running KickJS HTTP server, captured in `afterStart`.
+	* Agent tool dispatch makes internal HTTP requests against this base
+	* URL so calls flow through the normal Express pipeline (middleware,
+	* validation, auth, logging, error handling).
+	*/
+	serverBaseUrl = null;
+	constructor(options) {
+		this.provider = options.provider;
+	}
+	/** Return the active provider. Useful for services that want the raw API. */
+	getProvider() {
+		return this.provider;
+	}
+	/** Return the discovered tool registry. Primarily for tests and debug UIs. */
+	getTools() {
+		return this.tools;
+	}
+	/**
+	* Override the server base URL. Used by tests that spin up an
+	* ephemeral http.Server and can't rely on the framework's
+	* `afterStart` hook to supply it.
+	*/
+	setServerBaseUrl(url) {
+		this.serverBaseUrl = url;
+	}
+	/**
+	* Record every mounted controller so `beforeStart` can walk them
+	* looking for `@AiTool` decorations. We don't scan here because
+	* onRouteMount fires per-controller and we want the scan to run
+	* once against the full set.
+	*/
+	onRouteMount(controller, mountPath) {
+		this.mountedControllers.push({
+			controller,
+			mountPath
+		});
+	}
+	/**
+	* Register the provider in the DI container and run the tool scan.
+	*
+	* The adapter itself is also registered under its class constructor
+	* so services can inject the adapter directly (to call `runAgent`)
+	* while other services inject just the provider via `AI_PROVIDER`
+	* for plain `chat` / `embed` calls.
+	*/
+	beforeStart({ container }) {
+		container.registerFactory(AI_PROVIDER, () => this.provider, Scope.SINGLETON);
+		container.registerInstance(AiAdapter, this);
+		for (const { controller, mountPath } of this.mountedControllers) {
+			const routes = getClassMeta(METADATA.ROUTES, controller, []);
+			for (const route of routes) {
+				const tool = this.tryBuildTool(controller, mountPath, route);
+				if (tool) this.tools.push(tool);
+			}
+		}
+		log.info(`AiAdapter ready — provider: ${this.provider.name}, ${this.tools.length} tool(s) discovered`);
+	}
+	/**
+	* Capture the running server's address so agent dispatch can make
+	* internal HTTP requests against the actual port. Runs after the
+	* HTTP server is listening, so `server.address()` returns a real
+	* `AddressInfo` here.
+	*/
+	afterStart(ctx) {
+		this.serverBaseUrl = this.resolveServerBaseUrl(ctx.server);
+		log.debug(`AiAdapter agent dispatch target: ${this.serverBaseUrl ?? "(unknown)"}`);
+	}
+	/** Best-effort cleanup. Providers are currently stateless HTTP clients. */
+	async shutdown() {
+		this.serverBaseUrl = null;
+		log.debug("AiAdapter shutdown complete");
+	}
+	/**
+	* Run a tool-calling agent loop.
+	*
+	* Calls the provider with the given messages and tools, dispatches
+	* any tool calls the model emits, feeds the results back into the
+	* conversation, and repeats until the model responds with plain text
+	* (no more tool calls) or `maxSteps` is reached.
+	*
+	* Tool dispatch goes through the Express pipeline via internal HTTP
+	* requests — same pattern as the MCP adapter — so middleware, auth,
+	* validation, logging, and error handling all apply to tool calls
+	* the same way they apply to external client requests.
+	*
+	* @example
+	* ```ts
+	* const result = await adapter.runAgent({
+	*   messages: [
+	*     { role: 'system', content: 'Create tasks the user asks for.' },
+	*     { role: 'user', content: 'Create a high-priority task titled Ship v3.' },
+	*   ],
+	*   tools: 'auto',
+	*   maxSteps: 5,
+	* })
+	* console.log(result.content)   // assistant's final reply
+	* console.log(result.messages)  // full history including tool calls
+	* console.log(result.steps)     // how many rounds it took
+	* ```
+	*/
+	async runAgent(options) {
+		const maxSteps = options.maxSteps ?? 8;
+		const resolvedTools = this.resolveTools(options.tools ?? "auto");
+		const messages = [...options.messages];
+		let steps = 0;
+		const usage = {
+			promptTokens: 0,
+			completionTokens: 0,
+			totalTokens: 0
+		};
+		for (let i = 0; i < maxSteps; i++) {
+			steps++;
+			const response = await this.provider.chat({
+				messages,
+				model: options.model,
+				tools: resolvedTools.length > 0 ? resolvedTools : void 0
+			}, {
+				temperature: options.temperature,
+				maxTokens: options.maxTokens,
+				topP: options.topP,
+				stopSequences: options.stopSequences,
+				signal: options.signal
+			});
+			if (response.usage) {
+				usage.promptTokens += response.usage.promptTokens;
+				usage.completionTokens += response.usage.completionTokens;
+				usage.totalTokens += response.usage.totalTokens;
+			}
+			if (!response.toolCalls || response.toolCalls.length === 0) {
+				messages.push({
+					role: "assistant",
+					content: response.content
+				});
+				return {
+					content: response.content,
+					messages,
+					steps,
+					usage: usage.totalTokens > 0 ? usage : void 0
+				};
+			}
+			messages.push({
+				role: "assistant",
+				content: response.content,
+				toolCalls: response.toolCalls
+			});
+			const results = await Promise.all(response.toolCalls.map((call) => this.dispatchToolCall(call)));
+			for (const result of results) messages.push(result);
+		}
+		return {
+			content: messages.slice().reverse().find((m) => m.role === "assistant")?.content ?? "",
+			messages,
+			steps,
+			usage: usage.totalTokens > 0 ? usage : void 0,
+			maxStepsReached: true
+		};
+	}
+	/**
+	* Memory-aware agent turn.
+	*
+	* Wraps `runAgent` with an automatic "read history → append user
+	* message → run loop → persist assistant response" cycle. Services
+	* that want multi-turn conversations don't need to manage the
+	* plumbing themselves — pass a `ChatMemory` and a user message,
+	* get back the agent's response, and the memory is updated.
+	*
+	* System prompt handling:
+	*   - If the memory is empty AND `systemPrompt` is provided, the
+	*     system prompt is persisted as the first message in the
+	*     session. It stays put for every subsequent turn.
+	*   - On follow-up turns, the existing system prompt is reused
+	*     from memory; the `systemPrompt` option is ignored to keep
+	*     the session persona stable.
+	*
+	* Tool result persistence:
+	*   - By default, tool messages are NOT persisted to memory —
+	*     they're usually large API responses the user doesn't need
+	*     on later turns, and including them blows up prompt tokens
+	*     unnecessarily. Set `persistToolResults: true` to keep them
+	*     (useful for debugging / full-transcript replay).
+	*   - Assistant messages with tool calls ARE persisted so the
+	*     conversation shows what the agent did.
+	*
+	* @example
+	* ```ts
+	* @Service()
+	* class ChatService {
+	*   @Autowired() private ai!: AiAdapter
+	*   private readonly memory = new InMemoryChatMemory()
+	*
+	*   async handle(userMessage: string) {
+	*     const result = await this.ai.runAgentWithMemory({
+	*       memory: this.memory,
+	*       userMessage,
+	*       systemPrompt: 'You are a helpful assistant.',
+	*       tools: 'auto',
+	*     })
+	*     return result.content
+	*   }
+	* }
+	* ```
+	*/
+	async runAgentWithMemory(options) {
+		const messages = [...await options.memory.get()];
+		if (messages.length === 0 && options.systemPrompt) {
+			const systemMessage = {
+				role: "system",
+				content: options.systemPrompt
+			};
+			messages.push(systemMessage);
+			await options.memory.add(systemMessage);
+		}
+		const userMessage = {
+			role: "user",
+			content: options.userMessage
+		};
+		messages.push(userMessage);
+		await options.memory.add(userMessage);
+		const result = await this.runAgent({
+			messages,
+			model: options.model,
+			tools: options.tools,
+			maxSteps: options.maxSteps,
+			temperature: options.temperature,
+			maxTokens: options.maxTokens,
+			topP: options.topP,
+			stopSequences: options.stopSequences,
+			signal: options.signal
+		});
+		const newMessages = result.messages.slice(messages.length);
+		const toPersist = options.persistToolResults ? newMessages : newMessages.filter((m) => m.role !== "tool");
+		if (toPersist.length > 0) await options.memory.add(toPersist);
+		return result;
+	}
+	/**
+	* Expand an agent `tools` option to an explicit array. `'auto'`
+	* resolves to the full discovered registry; an explicit array is
+	* passed through unchanged (so callers can restrict the agent to a
+	* subset of tools).
+	*/
+	resolveTools(spec) {
+		if (spec === "auto") return this.tools;
+		return spec;
+	}
+	/**
+	* Dispatch a single tool call through the Express pipeline by
+	* making an internal HTTP request matching the underlying route's
+	* method + path + body/query.
+	*
+	* Returns a `ChatMessage` with `role: 'tool'` suitable for feeding
+	* back into the next `provider.chat` call. Non-2xx responses are
+	* surfaced as tool error messages rather than throwing, so the
+	* agent loop can let the model recover.
+	*/
+	async dispatchToolCall(call) {
+		const tool = this.tools.find((t) => t.name === call.name);
+		if (!tool) return {
+			role: "tool",
+			toolCallId: call.id,
+			content: JSON.stringify({ error: `Tool not found: ${call.name}` })
+		};
+		if (!this.serverBaseUrl) return {
+			role: "tool",
+			toolCallId: call.id,
+			content: JSON.stringify({ error: `Cannot dispatch ${call.name}: HTTP server address not yet captured` })
+		};
+		const args = call.arguments ?? {};
+		const { path, remainingArgs } = this.substitutePathParams(tool.mountPath, args);
+		const method = tool.httpMethod.toUpperCase();
+		const hasBody = method === "POST" || method === "PUT" || method === "PATCH";
+		let url = `${this.serverBaseUrl}${path}`;
+		const init = {
+			method,
+			headers: {
+				accept: "application/json",
+				"x-ai-tool": tool.name
+			}
+		};
+		if (hasBody) {
+			init.headers["content-type"] = "application/json";
+			init.body = JSON.stringify(remainingArgs);
+		} else if (Object.keys(remainingArgs).length > 0) {
+			const qs = new URLSearchParams();
+			for (const [key, value] of Object.entries(remainingArgs)) {
+				if (value === void 0 || value === null) continue;
+				qs.append(key, typeof value === "string" ? value : JSON.stringify(value));
+			}
+			const sep = url.includes("?") ? "&" : "?";
+			url = `${url}${sep}${qs.toString()}`;
+		}
+		try {
+			const res = await fetch(url, init);
+			const text = await res.text();
+			const content = res.ok ? text || `(${res.status} ${res.statusText})` : JSON.stringify({
+				error: `Tool ${call.name} returned ${res.status}`,
+				body: text
+			});
+			return {
+				role: "tool",
+				toolCallId: call.id,
+				content
+			};
+		} catch (err) {
+			const message = err instanceof Error ? err.message : String(err);
+			log.error(err, `AiAdapter: tool dispatch failed for ${call.name}`);
+			return {
+				role: "tool",
+				toolCallId: call.id,
+				content: JSON.stringify({ error: `Dispatch error: ${message}` })
+			};
+		}
+	}
+	/**
+	* Build an `AiToolDefinition` for a route decorated with `@AiTool`.
+	* Skips routes without the decorator so the registry only exposes
+	* deliberately opted-in methods.
+	*/
+	tryBuildTool(controller, mountPath, route) {
+		const meta = getAiToolMeta(controller.prototype, route.handlerName);
+		if (!meta) return null;
+		const inputSchema = zodToJsonSchema(meta.inputSchema ?? route.validation?.body ?? route.validation?.query) ?? {
+			type: "object",
+			properties: {},
+			additionalProperties: false
+		};
+		return {
+			name: meta.name ?? `${controller.name}.${route.handlerName}`,
+			description: meta.description,
+			inputSchema,
+			httpMethod: route.method.toUpperCase(),
+			mountPath: this.joinMountPath(mountPath, route.path)
+		};
+	}
+	/**
+	* Join a module mount path with the route-level sub-path. Same
+	* helper as McpAdapter's — kept local so the two packages don't
+	* couple via a shared util file.
+	*/
+	joinMountPath(mountPath, routePath) {
+		const base = mountPath.endsWith("/") ? mountPath.slice(0, -1) : mountPath;
+		if (!routePath || routePath === "/") return base;
+		return `${base}${routePath.startsWith("/") ? routePath : `/${routePath}`}`;
+	}
+	/**
+	* Substitute Express-style `:param` placeholders in the mount path
+	* with values pulled from the tool call arguments. Consumed keys
+	* are removed from the remaining args so they aren't sent twice
+	* (once in the path, once in the body/query).
+	*/
+	substitutePathParams(mountPath, args) {
+		const remaining = { ...args };
+		return {
+			path: mountPath.replace(/:([a-zA-Z_][a-zA-Z0-9_]*)/g, (_match, param) => {
+				if (param in remaining) {
+					const value = remaining[param];
+					delete remaining[param];
+					return encodeURIComponent(String(value));
+				}
+				return `:${param}`;
+			}),
+			remainingArgs: remaining
+		};
+	}
+	/**
+	* Resolve the running server's base URL from a Node `http.Server`
+	* instance. Same handling as McpAdapter: IPv6 bracketing, rewrite
+	* of 0.0.0.0/:: to 127.0.0.1.
+	*/
+	resolveServerBaseUrl(server) {
+		if (!server) return null;
+		const address = server.address();
+		if (!address || typeof address === "string") return null;
+		let host = address.address;
+		if (host === "::" || host === "0.0.0.0" || host === "") host = "127.0.0.1";
+		if (host.includes(":") && !host.startsWith("[")) host = `[${host}]`;
+		return `http://${host}:${address.port}`;
+	}
+};
+//#endregion
+//#region src/providers/base.ts
+/**
+* Provider-side helpers shared by every built-in `AiProvider`
+* implementation.
+*
+* Each provider in `packages/ai/src/providers/` implements the
+* `AiProvider` interface from `../types`. This file holds the bits
+* that all of them need: HTTP error mapping, JSON parsing, SSE line
+* splitting for streaming responses. Keeping these here means each
+* provider's main file stays focused on the wire-format translation
+* specific to its vendor.
+*/
+/**
+* Error thrown by built-in providers when the upstream API returns a
+* non-2xx status. Carries the HTTP status, the raw response body, and
+* a parsed error object when available, so callers can branch on
+* specific failure modes (auth, rate limit, content filter, etc.).
+*/
+var ProviderError = class extends Error {
+	status;
+	body;
+	parsedBody;
+	constructor(status, body, message) {
+		super(message ?? `Provider request failed with status ${status}`);
+		this.name = "ProviderError";
+		this.status = status;
+		this.body = body;
+		try {
+			this.parsedBody = JSON.parse(body);
+		} catch {}
+	}
+};
+/**
+* POST a JSON payload to a URL and parse the JSON response. Throws a
+* `ProviderError` on non-2xx status codes so the caller never has to
+* check `res.ok` itself.
+*
+* Auth headers are the caller's responsibility. Different providers
+* use different conventions — OpenAI uses `Authorization: Bearer ...`,
+* Anthropic uses `x-api-key: ...`, Google uses `?key=...` in the URL —
+* so this helper stays neutral and lets each provider build exactly
+* the headers it needs.
+*/
+async function postJson(url, body, options = {}) {
+	const res = await fetch(url, {
+		method: "POST",
+		headers: {
+			"content-type": "application/json",
+			...options.headers
+		},
+		body: JSON.stringify(body),
+		signal: options.signal
+	});
+	if (!res.ok) {
+		const text = await res.text();
+		throw new ProviderError(res.status, text);
+	}
+	return await res.json();
+}
+/**
+* POST a JSON payload and stream the response body as a sequence of
+* SSE-style `data: ...` events. Each yielded value is the raw payload
+* after the `data: ` prefix is stripped — provider code is responsible
+* for parsing it as JSON (or detecting the `[DONE]` sentinel that
+* OpenAI uses to signal end-of-stream).
+*
+* Implementation notes:
+*  - Uses the global `fetch` ReadableStream so it works in Node 20+
+*    without depending on `node-fetch` or `eventsource-parser`.
+*  - Buffers partial lines across chunk boundaries; an SSE event can
+*    arrive split across two TCP packets.
+*  - Skips empty lines and lines that don't start with `data: ` per
+*    the SSE spec.
+*  - Aborts cleanly via the optional AbortSignal — the caller's
+*    `for await` loop will throw `AbortError` if the signal fires.
+*/
+async function* postJsonStream(url, body, options = {}) {
+	const res = await fetch(url, {
+		method: "POST",
+		headers: {
+			"content-type": "application/json",
+			accept: "text/event-stream",
+			...options.headers
+		},
+		body: JSON.stringify(body),
+		signal: options.signal
+	});
+	if (!res.ok) {
+		const text = await res.text();
+		throw new ProviderError(res.status, text);
+	}
+	if (!res.body) throw new ProviderError(res.status, "", "Provider streaming response had no body");
+	const reader = res.body.getReader();
+	const decoder = new TextDecoder("utf-8");
+	let buffer = "";
+	try {
+		while (true) {
+			const { value, done } = await reader.read();
+			if (done) break;
+			buffer += decoder.decode(value, { stream: true });
+			let newlineIdx;
+			while ((newlineIdx = buffer.indexOf("\n")) !== -1) {
+				const line = buffer.slice(0, newlineIdx).trim();
+				buffer = buffer.slice(newlineIdx + 1);
+				if (line.length === 0) continue;
+				if (!line.startsWith("data:")) continue;
+				const payload = line.slice(5).trim();
+				if (payload.length === 0) continue;
+				yield payload;
+			}
+		}
+		const tail = buffer.trim();
+		if (tail.startsWith("data:")) {
+			const payload = tail.slice(5).trim();
+			if (payload.length > 0) yield payload;
+		}
+	} finally {
+		try {
+			reader.releaseLock();
+		} catch {}
+	}
+}
+//#endregion
+//#region src/providers/openai.ts
+/**
+* Built-in OpenAI provider.
+*
+* Implements the framework's `AiProvider` interface using nothing but
+* the global `fetch` API (Node 20+). Translates the framework's
+* normalized chat shape to OpenAI's `/chat/completions` wire format
+* and back, including streaming via SSE.
+*
+* Tool calling is wired in this provider but the agent loop that
+* actually invokes tools and feeds results back to the model lives in
+* a later phase — for now, `chat()` and `stream()` surface tool calls
+* via `ChatResponse.toolCalls` so callers can react.
+*
+* @example
+* ```ts
+* import { bootstrap, getEnv } from '@forinda/kickjs'
+* import { AiAdapter, OpenAIProvider } from '@forinda/kickjs-ai'
+*
+* export const app = await bootstrap({
+*   modules,
+*   adapters: [
+*     new AiAdapter({
+*       provider: new OpenAIProvider({
+*         apiKey: getEnv('OPENAI_API_KEY'),
+*         defaultChatModel: 'gpt-4o-mini',
+*       }),
+*     }),
+*   ],
+* })
+* ```
+*/
+var OpenAIProvider = class {
+	name;
+	baseURL;
+	defaultChatModel;
+	defaultEmbedModel;
+	/**
+	* Full header map passed to every request. Includes the bearer auth
+	* header and the optional openai-organization header. Constructed
+	* once in the constructor so per-call code just spreads it into the
+	* fetch init.
+	*/
+	headers;
+	constructor(options) {
+		if (!options.apiKey) throw new Error("OpenAIProvider: apiKey is required");
+		this.baseURL = (options.baseURL ?? "https://api.openai.com/v1").replace(/\/$/, "");
+		this.defaultChatModel = options.defaultChatModel ?? "gpt-4o-mini";
+		this.defaultEmbedModel = options.defaultEmbedModel ?? "text-embedding-3-small";
+		this.name = options.name ?? "openai";
+		this.headers = {
+			authorization: `Bearer ${options.apiKey}`,
+			...options.organization ? { "openai-organization": options.organization } : {}
+		};
+	}
+	/**
+	* Non-streaming chat completion.
+	*
+	* Translates the framework's `ChatInput` to OpenAI's chat completion
+	* payload, posts it, and normalizes the response back to a
+	* `ChatResponse`. Tool calls are surfaced on the response so callers
+	* can decide whether to feed them back into a tool registry.
+	*/
+	async chat(input, options = {}) {
+		const payload = this.buildChatPayload(input, options, false);
+		const data = await postJson(`${this.baseURL}/chat/completions`, payload, {
+			headers: this.headers,
+			signal: options.signal
+		});
+		return this.normalizeChatResponse(data);
+	}
+	/**
+	* Streaming chat completion. Yields `ChatChunk`s as deltas arrive
+	* over the wire and emits one final chunk with `done: true` after
+	* the upstream `[DONE]` sentinel.
+	*
+	* Cancellation via `options.signal` is supported end-to-end — the
+	* underlying fetch is aborted and the consumer's `for await` loop
+	* throws `AbortError`.
+	*/
+	async *stream(input, options = {}) {
+		const payload = this.buildChatPayload(input, options, true);
+		const events = postJsonStream(`${this.baseURL}/chat/completions`, payload, {
+			headers: this.headers,
+			signal: options.signal
+		});
+		let sawAnyChunk = false;
+		for await (const raw of events) {
+			if (raw === "[DONE]") {
+				yield {
+					content: "",
+					done: true
+				};
+				return;
+			}
+			let parsed;
+			try {
+				parsed = JSON.parse(raw);
+			} catch {
+				continue;
+			}
+			const choice = parsed.choices?.[0];
+			if (!choice) continue;
+			const deltaContent = choice.delta?.content ?? "";
+			const toolCallDelta = this.firstToolCallDelta(choice.delta?.tool_calls);
+			sawAnyChunk = true;
+			const chunk = {
+				content: deltaContent,
+				done: false
+			};
+			if (toolCallDelta) chunk.toolCallDelta = toolCallDelta;
+			yield chunk;
+		}
+		if (sawAnyChunk) yield {
+			content: "",
+			done: true
+		};
+	}
+	/**
+	* Generate embeddings for a string or array of strings.
+	*
+	* Returns vectors in input order. Single-string input still gets a
+	* length-1 array back, so callers can use the same indexed access
+	* pattern regardless of input shape.
+	*/
+	async embed(input) {
+		const inputs = Array.isArray(input) ? input : [input];
+		if (inputs.length === 0) return [];
+		const data = await postJson(`${this.baseURL}/embeddings`, {
+			model: this.defaultEmbedModel,
+			input: inputs
+		}, { headers: this.headers });
+		if (!data.data || !Array.isArray(data.data)) throw new ProviderError(200, JSON.stringify(data), "OpenAI embedding response had no data");
+		return [...data.data].sort((a, b) => a.index - b.index).map((d) => d.embedding);
+	}
+	buildChatPayload(input, options, stream) {
+		const payload = {
+			model: input.model ?? this.defaultChatModel,
+			messages: input.messages.map((m) => this.toOpenAIMessage(m)),
+			stream
+		};
+		if (options.temperature !== void 0) payload.temperature = options.temperature;
+		if (options.maxTokens !== void 0) payload.max_tokens = options.maxTokens;
+		if (options.topP !== void 0) payload.top_p = options.topP;
+		if (options.stopSequences && options.stopSequences.length > 0) payload.stop = options.stopSequences;
+		if (Array.isArray(input.tools) && input.tools.length > 0) payload.tools = input.tools.map((t) => ({
+			type: "function",
+			function: {
+				name: t.name,
+				description: t.description,
+				parameters: t.inputSchema
+			}
+		}));
+		return payload;
+	}
+	/**
+	* Translate a framework `ChatMessage` to OpenAI's wire format.
+	* Handles the `tool` role and the `tool_calls` field on assistant
+	* messages, both of which use slightly different shapes than the
+	* normalized form on `ChatMessage`.
+	*/
+	toOpenAIMessage(m) {
+		if (m.role === "tool") return {
+			role: "tool",
+			tool_call_id: m.toolCallId ?? "",
+			content: m.content
+		};
+		if (m.role === "assistant" && m.toolCalls && m.toolCalls.length > 0) return {
+			role: "assistant",
+			content: m.content,
+			tool_calls: m.toolCalls.map((tc) => ({
+				id: tc.id,
+				type: "function",
+				function: {
+					name: tc.name,
+					arguments: JSON.stringify(tc.arguments)
+				}
+			}))
+		};
+		return {
+			role: m.role,
+			content: m.content
+		};
+	}
+	/**
+	* Normalize an OpenAI chat completion response back to the
+	* framework's `ChatResponse` shape.
+	*/
+	normalizeChatResponse(data) {
+		const choice = data.choices?.[0];
+		const message = choice?.message;
+		const content = typeof message?.content === "string" ? message.content : "";
+		const toolCalls = message?.tool_calls?.filter((tc) => Boolean(tc.function?.name)).map((tc) => {
+			let args = {};
+			try {
+				args = tc.function.arguments ? JSON.parse(tc.function.arguments) : {};
+			} catch {
+				args = { _raw: tc.function.arguments };
+			}
+			return {
+				id: tc.id,
+				name: tc.function.name,
+				arguments: args
+			};
+		});
+		const result = { content };
+		if (toolCalls && toolCalls.length > 0) result.toolCalls = toolCalls;
+		if (data.usage) result.usage = {
+			promptTokens: data.usage.prompt_tokens,
+			completionTokens: data.usage.completion_tokens,
+			totalTokens: data.usage.total_tokens
+		};
+		if (choice?.finish_reason) result.finishReason = choice.finish_reason;
+		return result;
+	}
+	/**
+	* Extract the first tool-call delta from an OpenAI streaming chunk.
+	*
+	* The `tool_calls` array in a delta chunk can contain partial state
+	* for multiple parallel tool calls; this method picks the first one
+	* with a non-empty payload, which is enough for the v0 streaming
+	* surface. Multi-tool streaming is a follow-up.
+	*/
+	firstToolCallDelta(toolCalls) {
+		if (!toolCalls || toolCalls.length === 0) return void 0;
+		const first = toolCalls[0];
+		if (!first) return void 0;
+		const result = { id: first.id ?? "" };
+		if (first.function?.name) result.name = first.function.name;
+		if (first.function?.arguments !== void 0) result.argumentsDelta = first.function.arguments;
+		return result;
+	}
+};
+//#endregion
+//#region src/providers/anthropic.ts
+/**
+* Built-in Anthropic provider.
+*
+* Implements the framework's `AiProvider` interface using Anthropic's
+* Messages API (`/v1/messages`). Translates the normalized
+* `ChatInput` shape to and from Anthropic's content-block format,
+* including tool calling and streaming.
+*
+* ### Differences from OpenAI
+*
+* Anthropic's API has a few quirks the provider translates away:
+*
+* - **System prompt is separated.** The framework puts system
+*   messages in the `messages` array; Anthropic wants them in a
+*   top-level `system` field. The provider extracts the first system
+*   message and filters out any others.
+* - **Content is always a block array.** Even simple text replies
+*   are wrapped in `[{ type: 'text', text: '...' }]`. The provider
+*   flattens text blocks to a single string on the response.
+* - **Tool calls use `tool_use` content blocks, not a separate
+*   `tool_calls` field.** Normalization pulls them out of the
+*   response content and into `ChatResponse.toolCalls`.
+* - **Tool results are `user` messages with `tool_result` content
+*   blocks**, not a `'tool'` role. The provider handles the
+*   translation both ways.
+* - **`max_tokens` is required on every request.** Framework
+*   `ChatOptions.maxTokens` wins; otherwise falls back to
+*   `defaultMaxTokens` (default 4096).
+*
+* ### Embeddings
+*
+* Anthropic does not ship an embeddings API. Calling `embed()` on
+* this provider throws a descriptive error — users who need
+* embeddings should construct a separate provider (OpenAI's
+* `text-embedding-3-small` is a good default) and bind it
+* alongside the Anthropic chat provider.
+*
+* @example
+* ```ts
+* import { bootstrap, getEnv } from '@forinda/kickjs'
+* import { AiAdapter, AnthropicProvider } from '@forinda/kickjs-ai'
+*
+* export const app = await bootstrap({
+*   modules,
+*   adapters: [
+*     new AiAdapter({
+*       provider: new AnthropicProvider({
+*         apiKey: getEnv('ANTHROPIC_API_KEY'),
+*         defaultChatModel: 'claude-opus-4-6',
+*       }),
+*     }),
+*   ],
+* })
+* ```
+*/
+var AnthropicProvider = class {
+	name;
+	baseURL;
+	defaultChatModel;
+	defaultMaxTokens;
+	headers;
+	constructor(options) {
+		if (!options.apiKey) throw new Error("AnthropicProvider: apiKey is required");
+		this.baseURL = (options.baseURL ?? "https://api.anthropic.com/v1").replace(/\/$/, "");
+		this.defaultChatModel = options.defaultChatModel ?? "claude-opus-4-6";
+		this.defaultMaxTokens = options.defaultMaxTokens ?? 4096;
+		this.name = options.name ?? "anthropic";
+		this.headers = {
+			"x-api-key": options.apiKey,
+			"anthropic-version": options.apiVersion ?? "2023-06-01"
+		};
+	}
+	/**
+	* Non-streaming chat completion.
+	*
+	* Builds the Anthropic Messages payload, posts it, and normalizes
+	* the response back to the framework's `ChatResponse` shape.
+	*/
+	async chat(input, options = {}) {
+		const payload = this.buildMessagesPayload(input, options, false);
+		const data = await postJson(`${this.baseURL}/messages`, payload, {
+			headers: this.headers,
+			signal: options.signal
+		});
+		return this.normalizeResponse(data);
+	}
+	/**
+	* Streaming chat completion. Yields `ChatChunk`s as Anthropic
+	* events arrive and emits a final chunk with `done: true` after
+	* the `message_stop` event.
+	*
+	* Anthropic's SSE stream uses distinct event types instead of the
+	* single-channel deltas OpenAI sends:
+	*
+	*   - `message_start` — session init, carries model + id
+	*   - `content_block_start` — new text or tool_use block begins
+	*   - `content_block_delta` — incremental text or partial tool JSON
+	*   - `content_block_stop` — block complete
+	*   - `message_delta` — stop_reason + final usage
+	*   - `message_stop` — end of stream
+	*
+	* The provider cares about text deltas (for streaming content) and
+	* input_json deltas (for tool call argument streaming). Everything
+	* else is noise for our purposes and gets filtered.
+	*/
+	async *stream(input, options = {}) {
+		const payload = this.buildMessagesPayload(input, options, true);
+		const events = postJsonStream(`${this.baseURL}/messages`, payload, {
+			headers: this.headers,
+			signal: options.signal
+		});
+		let currentToolBlock = null;
+		let sawAnyChunk = false;
+		for await (const raw of events) {
+			let parsed;
+			try {
+				parsed = JSON.parse(raw);
+			} catch {
+				continue;
+			}
+			if (parsed.type === "content_block_start") {
+				const block = parsed.content_block;
+				if (block?.type === "tool_use") {
+					currentToolBlock = {
+						id: block.id ?? "",
+						name: block.name ?? ""
+					};
+					sawAnyChunk = true;
+					yield {
+						content: "",
+						done: false,
+						toolCallDelta: {
+							id: currentToolBlock.id,
+							name: currentToolBlock.name
+						}
+					};
+				}
+				continue;
+			}
+			if (parsed.type === "content_block_delta") {
+				const delta = parsed.delta;
+				if (delta?.type === "text_delta" && typeof delta.text === "string") {
+					sawAnyChunk = true;
+					yield {
+						content: delta.text,
+						done: false
+					};
+					continue;
+				}
+				if (delta?.type === "input_json_delta" && typeof delta.partial_json === "string") {
+					if (!currentToolBlock) continue;
+					sawAnyChunk = true;
+					yield {
+						content: "",
+						done: false,
+						toolCallDelta: {
+							id: currentToolBlock.id,
+							argumentsDelta: delta.partial_json
+						}
+					};
+					continue;
+				}
+				continue;
+			}
+			if (parsed.type === "content_block_stop") {
+				currentToolBlock = null;
+				continue;
+			}
+			if (parsed.type === "message_stop") {
+				yield {
+					content: "",
+					done: true
+				};
+				return;
+			}
+		}
+		if (sawAnyChunk) yield {
+			content: "",
+			done: true
+		};
+	}
+	/**
+	* Anthropic does not ship an embeddings API. Throws a descriptive
+	* error rather than silently returning an empty vector — embedding
+	* workflows should use a dedicated provider (OpenAI text-embedding-3-*
+	* is the common pick) and bind it alongside this one in the
+	* `AI_PROVIDER` token registry if needed.
+	*/
+	async embed(_input) {
+		throw new Error("AnthropicProvider.embed is not available — Anthropic does not provide an embeddings API. Use OpenAIProvider (or another embeddings-capable provider) for embed calls, and keep Anthropic for chat.");
+	}
+	buildMessagesPayload(input, options, stream) {
+		const { systemPrompt, messages } = this.splitSystemMessage(input.messages);
+		const payload = {
+			model: input.model ?? this.defaultChatModel,
+			max_tokens: options.maxTokens ?? this.defaultMaxTokens,
+			messages: messages.map((m) => this.toAnthropicMessage(m))
+		};
+		if (systemPrompt) payload.system = systemPrompt;
+		if (options.temperature !== void 0) payload.temperature = options.temperature;
+		if (options.topP !== void 0) payload.top_p = options.topP;
+		if (options.stopSequences && options.stopSequences.length > 0) payload.stop_sequences = options.stopSequences;
+		if (stream) payload.stream = true;
+		if (Array.isArray(input.tools) && input.tools.length > 0) payload.tools = input.tools.map((t) => ({
+			name: t.name,
+			description: t.description,
+			input_schema: t.inputSchema
+		}));
+		return payload;
+	}
+	/**
+	* Extract the first system message from the framework's messages
+	* array and return it separately — Anthropic puts system prompts
+	* in a top-level `system` field, not in `messages`. Any additional
+	* system messages are dropped on the grounds that models handle
+	* one persona prompt per call and concatenating them silently
+	* would produce confusing behavior.
+	*/
+	splitSystemMessage(messages) {
+		let systemPrompt = null;
+		const rest = [];
+		for (const m of messages) {
+			if (m.role === "system") {
+				systemPrompt ??= m.content;
+				continue;
+			}
+			rest.push(m);
+		}
+		return {
+			systemPrompt,
+			messages: rest
+		};
+	}
+	/**
+	* Translate a framework `ChatMessage` to Anthropic's wire format.
+	*
+	* User and plain assistant messages become content blocks with a
+	* single `text` entry. Assistant messages with tool calls become
+	* a block list mixing `text` and `tool_use` entries. Framework
+	* `'tool'` role messages become Anthropic `'user'` messages with
+	* a `tool_result` block — that's how Anthropic represents tool
+	* call responses.
+	*/
+	toAnthropicMessage(m) {
+		if (m.role === "tool") return {
+			role: "user",
+			content: [{
+				type: "tool_result",
+				tool_use_id: m.toolCallId ?? "",
+				content: m.content
+			}]
+		};
+		if (m.role === "assistant" && m.toolCalls && m.toolCalls.length > 0) {
+			const blocks = [];
+			if (m.content) blocks.push({
+				type: "text",
+				text: m.content
+			});
+			for (const tc of m.toolCalls) blocks.push({
+				type: "tool_use",
+				id: tc.id,
+				name: tc.name,
+				input: tc.arguments
+			});
+			return {
+				role: "assistant",
+				content: blocks
+			};
+		}
+		return {
+			role: m.role === "assistant" ? "assistant" : "user",
+			content: [{
+				type: "text",
+				text: m.content
+			}]
+		};
+	}
+	/**
+	* Normalize an Anthropic response back to the framework's
+	* `ChatResponse`. Flattens text content blocks into a single
+	* string and pulls `tool_use` blocks out into `toolCalls`.
+	*/
+	normalizeResponse(data) {
+		const blocks = data.content ?? [];
+		const textParts = [];
+		const toolCalls = [];
+		for (const block of blocks) {
+			if (block.type === "text" && typeof block.text === "string") textParts.push(block.text);
+			if (block.type === "tool_use" && block.name && block.id) toolCalls.push({
+				id: block.id,
+				name: block.name,
+				arguments: block.input && typeof block.input === "object" ? block.input : {}
+			});
+		}
+		const result = { content: textParts.join("") };
+		if (toolCalls.length > 0) result.toolCalls = toolCalls;
+		if (data.usage) result.usage = {
+			promptTokens: data.usage.input_tokens,
+			completionTokens: data.usage.output_tokens,
+			totalTokens: data.usage.input_tokens + data.usage.output_tokens
+		};
+		if (data.stop_reason) result.finishReason = data.stop_reason;
+		return result;
+	}
+};
+//#endregion
+//#region src/prompts/prompt.ts
+/**
+* A reusable prompt template with `{{variable}}` placeholders and
+* a typed variables object at the render site.
+*
+* The type parameter `TVars` is a record of the variables the
+* template expects. Callers pass it explicitly:
+*
+* ```ts
+* const summarize = createPrompt<{ text: string; sentenceCount: number }>(
+*   'Summarize the following in {{sentenceCount}} sentences:\n\n{{text}}',
+*   { name: 'summarize' },
+* )
+*
+* const msg = summarize.render({ text: 'Long article...', sentenceCount: 3 })
+* // → { role: 'user', content: 'Summarize the following in 3 sentences:\n\nLong article...' }
+* ```
+*
+* TypeScript catches missing or mistyped variables at compile time:
+*
+* ```ts
+* summarize.render({ text: 'x' })           // ✗ missing sentenceCount
+* summarize.render({ text: 'x', count: 3 }) // ✗ wrong key name
+* ```
+*
+* @remarks
+* Runtime-only in v0 — the type parameter is opt-in and has to be
+* provided explicitly. Workstream 5 adds a `kick typegen` pass that
+* scans `createPrompt` call sites and generates the TVars shape
+* automatically, so you can write `createPrompt('...')` and get
+* the types for free.
+*/
+var Prompt = class {
+	name;
+	role;
+	template;
+	onMissing;
+	constructor(template, options = {}) {
+		if (typeof template !== "string") throw new Error("createPrompt: template must be a string");
+		this.template = template;
+		this.name = options.name ?? "prompt";
+		this.role = options.role ?? "user";
+		this.onMissing = options.onMissing ?? "throw";
+	}
+	/**
+	* Substitute variables into the template and return a
+	* ready-to-use `ChatMessage`.
+	*
+	* Placeholder syntax is `{{name}}` — double curly braces around
+	* the variable name. Whitespace inside the braces is ignored
+	* (`{{ name }}` works too). Unknown variables in the template
+	* are left as-is, so Markdown or code blocks that happen to use
+	* `{{` for their own reasons don't break.
+	*
+	* @throws If `onMissing === 'throw'` and a required variable is absent
+	*/
+	render(vars) {
+		return {
+			role: this.role,
+			content: this.renderString(vars)
+		};
+	}
+	/**
+	* Same as `render` but returns the raw string instead of wrapping
+	* it in a `ChatMessage`. Useful for building composite messages
+	* where several templates contribute to a single string.
+	*/
+	renderString(vars) {
+		return this.template.replace(/\{\{\s*([a-zA-Z_][a-zA-Z0-9_.]*)\s*\}\}/g, (_match, key) => {
+			if (!(key in vars)) return this.handleMissing(key, _match);
+			const value = vars[key];
+			if (value === void 0 || value === null) return this.handleMissing(key, _match);
+			return String(value);
+		});
+	}
+	/** Return the raw template string. Useful for debugging and snapshot tests. */
+	getTemplate() {
+		return this.template;
+	}
+	/**
+	* Return the set of placeholder names the template references.
+	*
+	* Mostly useful for testing and for tooling that wants to show
+	* users what variables a prompt takes. Not a substitute for the
+	* compile-time type check — templates can always reference
+	* variables that aren't in TVars; this helper reads the string,
+	* not the type.
+	*/
+	getPlaceholders() {
+		const matches = this.template.matchAll(/\{\{\s*([a-zA-Z_][a-zA-Z0-9_.]*)\s*\}\}/g);
+		const names = /* @__PURE__ */ new Set();
+		for (const m of matches) {
+			const name = m[1];
+			if (name) names.add(name);
+		}
+		return [...names];
+	}
+	handleMissing(key, original) {
+		if (this.onMissing === "throw") throw new Error(`Prompt(${this.name}): variable "${key}" is missing from the render call`);
+		if (this.onMissing === "warn") console.warn(`Prompt(${this.name}): variable "${key}" is missing from the render call; leaving placeholder`);
+		return original;
+	}
+};
+/**
+* Construct a reusable prompt template.
+*
+* Thin factory for the `Prompt` class — keeps call sites short and
+* matches the naming convention of other kickjs-ai factories
+* (`createToken`, etc.). Use the class form directly if you need
+* subclassing or custom rendering logic.
+*
+* @example
+* ```ts
+* import { createPrompt } from '@forinda/kickjs-ai'
+*
+* const persona = createPrompt<{ name: string; tone: string }>(
+*   'You are {{name}}, a {{tone}} assistant.',
+*   { role: 'system', name: 'persona' },
+* )
+*
+* const msg = persona.render({ name: 'Claude', tone: 'concise' })
+* ```
+*/
+function createPrompt(template, options = {}) {
+	return new Prompt(template, options);
+}
+//#endregion
+//#region src/memory/in-memory.ts
+/**
+* Zero-dependency in-memory chat memory.
+*
+* Backed by a plain array. Each instance represents ONE conversation
+* — services that serve multiple sessions construct one instance per
+* session, typically via a `sessionId → memory` map in a parent
+* service or a request-scoped DI factory.
+*
+* Good for:
+*   - Tests and prototypes
+*   - Single-process CLI tools
+*   - Short-lived request handlers that don't outlive the HTTP response
+*
+* Not good for:
+*   - Multi-replica deployments (memory isn't shared across pods)
+*   - Sessions that need to survive a restart
+*   - Anything with a compliance retention policy
+*
+* For any of those, swap in a persistent backend (Drizzle, Redis,
+* Postgres) that implements the same `ChatMemory` interface — the
+* calling service doesn't change.
+*
+* @example
+* ```ts
+* import { InMemoryChatMemory } from '@forinda/kickjs-ai'
+*
+* const memory = new InMemoryChatMemory()
+* await memory.add({ role: 'user', content: 'hello' })
+* const history = await memory.get()
+* ```
+*/
+var InMemoryChatMemory = class {
+	name = "in-memory";
+	messages = [];
+	async get() {
+		return [...this.messages];
+	}
+	async add(message) {
+		const list = Array.isArray(message) ? message : [message];
+		for (const m of list) this.messages.push(m);
+	}
+	async clear() {
+		this.messages = [];
+	}
+	async size() {
+		return this.messages.length;
+	}
+};
+//#endregion
+//#region src/memory/sliding-window.ts
+/**
+* Sliding-window memory wrapper.
+*
+* Wraps any `ChatMemory` implementation with a bounded history: only
+* the most recent N messages survive. Older messages are evicted on
+* every `get()` and after every `add()` that pushes the count past
+* the cap. The first system message is pinned by default so long
+* sessions don't lose their persona.
+*
+* Use this to keep prompt token usage predictable without writing
+* eviction logic in every service. It composes with any backend —
+* in-memory, Drizzle, Redis — because it only touches the inner
+* memory through its public interface.
+*
+* @example
+* ```ts
+* import { InMemoryChatMemory, SlidingWindowChatMemory } from '@forinda/kickjs-ai'
+*
+* const memory = new SlidingWindowChatMemory({
+*   inner: new InMemoryChatMemory(),
+*   maxMessages: 20,
+*   pinSystemPrompt: true,
+* })
+* ```
+*
+* @remarks
+* Eviction writes back to the inner memory via `clear()` + `add()`.
+* That's fine for in-memory backends where clearing is O(1), but
+* costs a round-trip for network-backed stores. If you're wrapping
+* a remote backend, consider an inner memory that supports native
+* trimming — the wrapper's contract assumes clear+add is cheap.
+*/
+var SlidingWindowChatMemory = class {
+	name;
+	inner;
+	maxMessages;
+	pinSystemPrompt;
+	constructor(options) {
+		if (!options.inner) throw new Error("SlidingWindowChatMemory: `inner` memory is required");
+		if (!Number.isInteger(options.maxMessages) || options.maxMessages <= 0) throw new Error("SlidingWindowChatMemory: `maxMessages` must be a positive integer");
+		this.inner = options.inner;
+		this.maxMessages = options.maxMessages;
+		this.pinSystemPrompt = options.pinSystemPrompt ?? true;
+		this.name = `sliding-window(${options.inner.name})`;
+	}
+	async get() {
+		const raw = await this.inner.get();
+		return this.applyWindow(raw);
+	}
+	async add(message) {
+		await this.inner.add(message);
+		const raw = await this.inner.get();
+		const windowed = this.applyWindow(raw);
+		if (windowed.length !== raw.length) {
+			await this.inner.clear();
+			await this.inner.add(windowed);
+		}
+	}
+	async clear() {
+		await this.inner.clear();
+	}
+	async size() {
+		if (this.inner.size) return this.inner.size();
+		return (await this.inner.get()).length;
+	}
+	/**
+	* Apply the sliding window to an array of messages, returning the
+	* bounded view. Pure function so both `get()` and `add()` can use
+	* the same logic.
+	*
+	* When `pinSystemPrompt` is set and the first message is a system
+	* message, we keep it AND fill the remaining `maxMessages - 1`
+	* slots with the most recent messages after it. Otherwise we just
+	* take the tail of the array.
+	*/
+	applyWindow(messages) {
+		if (messages.length <= this.maxMessages) return messages;
+		if (this.pinSystemPrompt && messages[0]?.role === "system") return [messages[0], ...messages.slice(-(this.maxMessages - 1))];
+		return messages.slice(-this.maxMessages);
+	}
+};
+//#endregion
+//#region src/rag/in-memory.ts
+/**
+* Zero-dependency in-memory vector store.
+*
+* Backed by a plain `Map<string, VectorDocument>` with a linear-scan
+* cosine-similarity search. Perfect for tests, prototypes, CLI tools,
+* and any project with a bounded corpus (roughly < 10k documents
+* before the scan starts taking more than a handful of milliseconds).
+*
+* For production workloads with larger corpora, swap in the pgvector,
+* Qdrant, or Pinecone store — the `VectorStore` interface is the same,
+* so services that consume `VECTOR_STORE` don't need to change.
+*
+* @example
+* ```ts
+* import { InMemoryVectorStore, VECTOR_STORE } from '@forinda/kickjs-ai'
+*
+* container.registerInstance(VECTOR_STORE, new InMemoryVectorStore())
+* ```
+*
+* The class is entirely synchronous under the hood but wraps each
+* method in a Promise so it matches the async interface every other
+* backend implements. This keeps the calling code uniform regardless
+* of which backend is wired in.
+*/
+var InMemoryVectorStore = class {
+	name = "in-memory";
+	docs = /* @__PURE__ */ new Map();
+	async upsert(doc) {
+		const list = Array.isArray(doc) ? doc : [doc];
+		for (const d of list) {
+			if (!d.id) throw new Error("InMemoryVectorStore.upsert: document id is required");
+			if (!Array.isArray(d.vector)) throw new Error(`InMemoryVectorStore.upsert: vector must be an array (id=${d.id})`);
+			this.docs.set(d.id, {
+				id: d.id,
+				content: d.content,
+				vector: [...d.vector],
+				metadata: d.metadata
+			});
+		}
+	}
+	async query(options) {
+		if (!Array.isArray(options.vector) || options.vector.length === 0) throw new Error("InMemoryVectorStore.query: vector is required");
+		const topK = options.topK ?? 5;
+		const minScore = options.minScore ?? -Infinity;
+		const filter = options.filter;
+		const scored = [];
+		for (const doc of this.docs.values()) {
+			if (filter && !matchesFilter(doc.metadata, filter)) continue;
+			const score = cosineSimilarity(options.vector, doc.vector);
+			if (score < minScore) continue;
+			scored.push({
+				id: doc.id,
+				content: doc.content,
+				score,
+				metadata: doc.metadata
+			});
+		}
+		scored.sort((a, b) => {
+			if (b.score !== a.score) return b.score - a.score;
+			return a.id.localeCompare(b.id);
+		});
+		return scored.slice(0, topK);
+	}
+	async delete(id) {
+		const ids = Array.isArray(id) ? id : [id];
+		for (const i of ids) this.docs.delete(i);
+	}
+	async deleteAll() {
+		this.docs.clear();
+	}
+	async count() {
+		return this.docs.size;
+	}
+};
+/**
+* Cosine similarity between two vectors. Returns a value in [-1, 1]
+* where 1 means identical direction, 0 means orthogonal, -1 means
+* opposite. The function is symmetric and scale-invariant.
+*
+* Returns 0 for length mismatches or zero-magnitude vectors rather
+* than throwing — callers get a useless hit they can filter out via
+* `minScore`, but the store doesn't crash on bad input.
+*/
+function cosineSimilarity(a, b) {
+	if (a.length !== b.length || a.length === 0) return 0;
+	let dot = 0;
+	let magA = 0;
+	let magB = 0;
+	for (let i = 0; i < a.length; i++) {
+		const x = a[i];
+		const y = b[i];
+		dot += x * y;
+		magA += x * x;
+		magB += y * y;
+	}
+	if (magA === 0 || magB === 0) return 0;
+	return dot / (Math.sqrt(magA) * Math.sqrt(magB));
+}
+/**
+* Simple equality-based metadata filter. Every key in `filter` must
+* exist on the metadata and be strictly equal. Array values on the
+* filter are treated as an `IN` clause — the metadata value must be
+* one of the listed values.
+*
+* This covers 90% of metadata filtering use cases without pulling in
+* a query-language dependency. Backends that support richer filters
+* (pgvector's WHERE, Qdrant's conditions, Pinecone's filter DSL) can
+* pass through their native syntax via the same `filter` field,
+* since the type is `Record<string, unknown>`.
+*/
+function matchesFilter(metadata, filter) {
+	if (!metadata) return false;
+	for (const [key, expected] of Object.entries(filter)) {
+		const actual = metadata[key];
+		if (Array.isArray(expected)) {
+			if (!expected.includes(actual)) return false;
+		} else if (actual !== expected) return false;
+	}
+	return true;
+}
+//#endregion
+//#region src/rag/pgvector.ts
+/**
+* pgvector-backed `VectorStore` implementation.
+*
+* Stores documents in a single table with a `vector` column indexed
+* via pgvector's native operators. Cosine similarity is the scoring
+* metric — computed as `1 - (vector <=> query_vector)` because the
+* `<=>` operator returns cosine DISTANCE, not similarity.
+*
+* ### Lazy initialization
+*
+* The Postgres pool and schema are set up on first use, not in the
+* constructor. That keeps the constructor synchronous, matches the
+* rest of the `VectorStore` implementations, and lets users construct
+* the store inside a module's `register(container)` method without
+* awaiting inside DI resolution.
+*
+* ### Schema
+*
+* The default schema is:
+*
+* ```sql
+* CREATE EXTENSION IF NOT EXISTS vector;
+* CREATE TABLE IF NOT EXISTS <schema>.<table> (
+*   id TEXT PRIMARY KEY,
+*   content TEXT NOT NULL,
+*   vector vector(<dimensions>) NOT NULL,
+*   metadata JSONB
+* );
+* ```
+*
+* No index is created by default — pgvector's IVFFlat and HNSW
+* indexes benefit from being created AFTER data is loaded, and the
+* right choice depends on corpus size. Users should add an index
+* themselves in a real migration when they're ready:
+*
+* ```sql
+* CREATE INDEX ON kickjs_embeddings
+*   USING hnsw (vector vector_cosine_ops);
+* ```
+*
+* ### Metadata filtering
+*
+* Filters are translated to JSONB WHERE clauses:
+*   - Scalar: `metadata->>'key' = $N` (coerced to text)
+*   - Array:  `metadata->>'key' = ANY($N::text[])`
+*
+* Keys are validated against `[a-zA-Z0-9_.-]+` before being
+* interpolated into SQL — anything else throws. Values go through
+* parameter binding, so SQL injection via values is not possible.
+*
+* @example
+* ```ts
+* import { Pool } from 'pg'
+* import { getEnv } from '@forinda/kickjs'
+* import { AiAdapter, PgVectorStore, VECTOR_STORE } from '@forinda/kickjs-ai'
+*
+* const pool = new Pool({ connectionString: getEnv('DATABASE_URL') })
+* const store = new PgVectorStore({ client: pool, dimensions: 1536 })
+*
+* export const app = await bootstrap({
+*   modules,
+*   adapters: [new AiAdapter({ provider })],
+*   plugins: [
+*     {
+*       name: 'pgvector',
+*       register: (container) => {
+*         container.registerInstance(VECTOR_STORE, store)
+*       },
+*     },
+*   ],
+* })
+* ```
+*/
+var PgVectorStore = class {
+	name;
+	dimensions;
+	schema;
+	table;
+	fullyQualified;
+	skipSetup;
+	client;
+	connectionString;
+	setupPromise = null;
+	constructor(options) {
+		if (!options.client && !options.connectionString) throw new Error("PgVectorStore: either `client` or `connectionString` must be provided");
+		if (!Number.isInteger(options.dimensions) || options.dimensions <= 0) throw new Error("PgVectorStore: `dimensions` must be a positive integer");
+		this.dimensions = options.dimensions;
+		this.schema = options.schema ?? "public";
+		this.table = options.table ?? "kickjs_embeddings";
+		this.fullyQualified = `${quoteIdent(this.schema)}.${quoteIdent(this.table)}`;
+		this.skipSetup = options.skipSetup ?? false;
+		this.name = options.name ?? "pgvector";
+		this.client = options.client ?? null;
+		this.connectionString = options.connectionString ?? null;
+	}
+	async upsert(doc) {
+		const list = Array.isArray(doc) ? doc : [doc];
+		if (list.length === 0) return;
+		for (const d of list) {
+			if (!d.id) throw new Error("PgVectorStore.upsert: document id is required");
+			if (!Array.isArray(d.vector)) throw new Error(`PgVectorStore.upsert: vector must be an array (id=${d.id})`);
+			if (d.vector.length !== this.dimensions) throw new Error(`PgVectorStore.upsert: vector length ${d.vector.length} does not match configured dimensions ${this.dimensions} (id=${d.id})`);
+		}
+		const client = await this.ensureReady();
+		const values = [];
+		const params = [];
+		let p = 1;
+		for (const d of list) {
+			values.push(`($${p++}, $${p++}, $${p++}::vector, $${p++}::jsonb)`);
+			params.push(d.id, d.content, toPgVector(d.vector), JSON.stringify(d.metadata ?? {}));
+		}
+		const sql = `INSERT INTO ${this.fullyQualified} (id, content, vector, metadata) VALUES ` + values.join(", ") + " ON CONFLICT (id) DO UPDATE SET content = EXCLUDED.content, vector = EXCLUDED.vector, metadata = EXCLUDED.metadata";
+		await client.query(sql, params);
+	}
+	async query(options) {
+		if (!Array.isArray(options.vector) || options.vector.length === 0) throw new Error("PgVectorStore.query: vector is required");
+		if (options.vector.length !== this.dimensions) throw new Error(`PgVectorStore.query: vector length ${options.vector.length} does not match configured dimensions ${this.dimensions}`);
+		const client = await this.ensureReady();
+		const topK = options.topK ?? 5;
+		const minScore = options.minScore ?? -Infinity;
+		const { whereSql, whereParams } = buildWhereClause(options.filter, 2);
+		const limitParamIdx = whereParams.length + 2;
+		const sql = `SELECT id, content, metadata, (1 - (vector <=> \$1::vector)) AS score FROM ${this.fullyQualified} ` + whereSql + ` ORDER BY vector <=> $1::vector LIMIT $${limitParamIdx}`;
+		const params = [
+			toPgVector(options.vector),
+			...whereParams,
+			topK
+		];
+		const { rows } = await client.query(sql, params);
+		const hits = [];
+		for (const row of rows) {
+			if (row.score < minScore) continue;
+			hits.push({
+				id: row.id,
+				content: row.content,
+				score: row.score,
+				metadata: row.metadata ?? void 0
+			});
+		}
+		return hits;
+	}
+	async delete(id) {
+		const ids = Array.isArray(id) ? id : [id];
+		if (ids.length === 0) return;
+		await (await this.ensureReady()).query(`DELETE FROM ${this.fullyQualified} WHERE id = ANY($1::text[])`, [ids]);
+	}
+	async deleteAll() {
+		await (await this.ensureReady()).query(`TRUNCATE ${this.fullyQualified}`);
+	}
+	async count() {
+		const { rows } = await (await this.ensureReady()).query(`SELECT COUNT(*)::text AS count FROM ${this.fullyQualified}`);
+		const raw = rows[0]?.count ?? "0";
+		return Number.parseInt(raw, 10);
+	}
+	/**
+	* Release the internal connection pool, if the store created one.
+	*
+	* If the caller supplied their own `client`, this is a no-op —
+	* lifecycle of a user-owned pool stays with the user. This method
+	* is intentionally not on the `VectorStore` interface because most
+	* backends don't need explicit teardown; services that want to
+	* clean up call it via an adapter.shutdown hook.
+	*/
+	async close() {
+		if (this.connectionString && this.client) {
+			const withEnd = this.client;
+			if (typeof withEnd.end === "function") await withEnd.end();
+			this.client = null;
+		}
+	}
+	/**
+	* Ensure the pool exists and the schema is set up. Called by every
+	* public method before running any SQL. The setup migration runs
+	* at most once per store instance — subsequent calls reuse the
+	* cached promise.
+	*/
+	async ensureReady() {
+		if (!this.client) this.client = await this.createPoolFromConnectionString();
+		if (!this.skipSetup) {
+			if (!this.setupPromise) this.setupPromise = this.runSchemaSetup(this.client);
+			await this.setupPromise;
+		}
+		return this.client;
+	}
+	/**
+	* Dynamically import `pg` and create a Pool from the configured
+	* connection string. Imported lazily so users who supply their own
+	* `client` never force `pg` to be installed.
+	*
+	* Throws a friendly error if `pg` is not installed — the same
+	* graceful-degradation pattern the CLI uses for optional packages.
+	*/
+	async createPoolFromConnectionString() {
+		if (!this.connectionString) throw new Error("PgVectorStore: no client or connectionString configured (this should never happen)");
+		const pgSpec = "pg";
+		let pgModule;
+		try {
+			pgModule = await import(pgSpec);
+		} catch {
+			throw new Error("PgVectorStore: the `pg` package is not installed. Run `pnpm add pg` (or pass a pre-made executor via the `client` option) to use the pgvector store.");
+		}
+		const Pool = pgModule.default?.Pool ?? pgModule.Pool;
+		if (!Pool) throw new Error("PgVectorStore: the `pg` module did not export a `Pool` class (unexpected version).");
+		return new Pool({ connectionString: this.connectionString });
+	}
+	/**
+	* Run the schema bootstrap: enable the pgvector extension, create
+	* the embeddings table if it doesn't exist, and nothing else.
+	*
+	* Indexes are deliberately not created here — pgvector's IVFFlat
+	* and HNSW indexes perform best when created after data is loaded,
+	* and the right choice depends on corpus size. Users should add
+	* their index in a real migration when they're ready.
+	*/
+	async runSchemaSetup(client) {
+		await client.query("CREATE EXTENSION IF NOT EXISTS vector");
+		await client.query(`CREATE TABLE IF NOT EXISTS ${this.fullyQualified} (id TEXT PRIMARY KEY, content TEXT NOT NULL, vector vector(${this.dimensions}) NOT NULL, metadata JSONB )`);
+	}
+};
+/**
+* Serialize a JS number array to pgvector's wire format: a string
+* like `'[0.1,0.2,0.3]'`. The `pg` driver doesn't know about vectors
+* so we have to stringify ourselves and cast with `::vector` in the
+* SQL. Non-finite values become `0` rather than `null` or `NaN` —
+* pgvector rejects non-finite values in inserts.
+*/
+function toPgVector(vector) {
+	return `[${vector.map((n) => Number.isFinite(n) ? n : 0).join(",")}]`;
+}
+/**
+* Double-quote a Postgres identifier and escape any embedded quotes.
+* Used for schema and table names so users can pass lowercase
+* identifiers without worrying about reserved words.
+*/
+function quoteIdent(ident) {
+	return `"${ident.replace(/"/g, "\"\"")}"`;
+}
+/**
+* Translate a metadata filter into a WHERE clause + bound parameters.
+*
+* - Scalar values become `metadata->>'key' = $N`
+* - Array values become `metadata->>'key' = ANY($N::text[])`
+*
+* Keys must match `[a-zA-Z0-9_.-]+` — anything else is rejected. All
+* values are coerced to string before binding, because `->>` returns
+* text. Callers that need numeric range queries should issue raw SQL
+* via their own executor; this helper covers the equality-case 90%.
+*
+* Exported for unit testing.
+*/
+function buildWhereClause(filter, startAt) {
+	if (!filter || Object.keys(filter).length === 0) return {
+		whereSql: "",
+		whereParams: []
+	};
+	const keyPattern = /^[a-zA-Z0-9_.\-]+$/;
+	const clauses = [];
+	const params = [];
+	let p = startAt;
+	for (const [key, value] of Object.entries(filter)) {
+		if (!keyPattern.test(key)) throw new Error(`PgVectorStore: metadata filter key "${key}" contains unsupported characters (allowed: letters, digits, underscore, dot, dash)`);
+		if (Array.isArray(value)) {
+			clauses.push(`metadata->>'${key}' = ANY($${p}::text[])`);
+			params.push(value.map(String));
+		} else {
+			clauses.push(`metadata->>'${key}' = $${p}`);
+			params.push(value === null || value === void 0 ? "" : String(value));
+		}
+		p++;
+	}
+	return {
+		whereSql: "WHERE " + clauses.join(" AND "),
+		whereParams: params
+	};
+}
+//#endregion
+//#region src/rag/qdrant.ts
+/**
+* Qdrant-backed `VectorStore` implementation.
+*
+* Qdrant stores vectors as "points" inside a named "collection". Each
+* point has an id, a dense vector, and an arbitrary JSON "payload" —
+* the store uses the payload to carry both the original `content`
+* string (so RAG retrieval can feed text back to the LLM) and the
+* `metadata` record.
+*
+* ### Filtering
+*
+* The framework's equality-map filter (`{ key: value }` or
+* `{ key: [v1, v2] }`) is translated into Qdrant's `filter.must`
+* conditions against `payload.metadata.<key>`. Scalar values become
+* `match: { value }`, arrays become `match: { any: [...] }`. Users
+* who need richer queries (nested, range, should/must_not) can bypass
+* this by extending the class, but equality covers the 90% case.
+*
+* ### Lazy collection creation
+*
+* On first write, the store calls `PUT /collections/{name}` with
+* `vectors: { size, distance }` — idempotent, so it's safe to run on
+* every boot. Pass `skipSetup: true` if your cluster is provisioned
+* externally and the runtime API key doesn't have create permission.
+*
+* @example
+* ```ts
+* import { bootstrap, getEnv } from '@forinda/kickjs'
+* import { AiAdapter, QdrantVectorStore, VECTOR_STORE } from '@forinda/kickjs-ai'
+*
+* const store = new QdrantVectorStore({
+*   url: getEnv('QDRANT_URL'),
+*   apiKey: getEnv('QDRANT_API_KEY'),
+*   collection: 'docs',
+*   dimensions: 1536,
+* })
+*
+* export const app = await bootstrap({
+*   modules,
+*   adapters: [new AiAdapter({ provider })],
+*   plugins: [
+*     {
+*       name: 'qdrant',
+*       register: (container) => {
+*         container.registerInstance(VECTOR_STORE, store)
+*       },
+*     },
+*   ],
+* })
+* ```
+*/
+var QdrantVectorStore = class {
+	name;
+	url;
+	collection;
+	dimensions;
+	distance;
+	headers;
+	skipSetup;
+	/**
+	* Cached bootstrap promise. The first method call triggers collection
+	* creation; every subsequent call awaits the same promise so the
+	* check happens exactly once per process. On failure we clear the
+	* cache so the next call can retry (networks blink, DNS flaps).
+	*/
+	setupPromise = null;
+	constructor(options) {
+		if (!options.collection) throw new Error("QdrantVectorStore: collection is required");
+		if (!Number.isInteger(options.dimensions) || options.dimensions <= 0) throw new Error("QdrantVectorStore: dimensions must be a positive integer");
+		this.url = (options.url ?? "http://localhost:6333").replace(/\/$/, "");
+		this.collection = options.collection;
+		this.dimensions = options.dimensions;
+		this.distance = options.distance ?? "Cosine";
+		this.skipSetup = options.skipSetup ?? false;
+		this.name = options.name ?? "qdrant";
+		this.headers = {
+			"content-type": "application/json",
+			...options.apiKey ? { "api-key": options.apiKey } : {}
+		};
+	}
+	async upsert(doc) {
+		const list = Array.isArray(doc) ? doc : [doc];
+		if (list.length === 0) return;
+		for (const d of list) {
+			if (!d.id) throw new Error("QdrantVectorStore.upsert: document id is required");
+			if (!Array.isArray(d.vector) || d.vector.length !== this.dimensions) throw new Error(`QdrantVectorStore.upsert: vector length ${d.vector?.length ?? 0} does not match collection dimensions ${this.dimensions} (id=${d.id})`);
+		}
+		await this.ensureCollection();
+		const points = list.map((d) => ({
+			id: d.id,
+			vector: d.vector,
+			payload: {
+				content: d.content,
+				metadata: d.metadata ?? {}
+			}
+		}));
+		await this.request("PUT", `/collections/${this.collection}/points?wait=true`, { points });
+	}
+	async query(options) {
+		if (!Array.isArray(options.vector) || options.vector.length === 0) throw new Error("QdrantVectorStore.query: vector is required");
+		if (options.vector.length !== this.dimensions) throw new Error(`QdrantVectorStore.query: vector length ${options.vector.length} does not match collection dimensions ${this.dimensions}`);
+		await this.ensureCollection();
+		const topK = options.topK ?? 5;
+		const minScore = options.minScore;
+		const body = {
+			vector: options.vector,
+			limit: topK,
+			with_payload: true
+		};
+		if (options.filter && Object.keys(options.filter).length > 0) body.filter = buildQdrantFilter(options.filter);
+		if (minScore !== void 0) body.score_threshold = minScore;
+		return (await this.request("POST", `/collections/${this.collection}/points/search`, body)).result.map((hit) => ({
+			id: String(hit.id),
+			content: hit.payload?.content ?? "",
+			score: hit.score,
+			metadata: hit.payload?.metadata ?? {}
+		}));
+	}
+	async delete(id) {
+		const ids = Array.isArray(id) ? id : [id];
+		if (ids.length === 0) return;
+		await this.ensureCollection();
+		await this.request("POST", `/collections/${this.collection}/points/delete?wait=true`, { points: ids });
+	}
+	async deleteAll() {
+		await this.request("DELETE", `/collections/${this.collection}`, void 0);
+		this.setupPromise = null;
+		if (!this.skipSetup) await this.ensureCollection();
+	}
+	async count() {
+		await this.ensureCollection();
+		return (await this.request("POST", `/collections/${this.collection}/points/count`, { exact: true })).result.count;
+	}
+	/**
+	* Thin wrapper around `fetch` that applies the shared headers, JSON
+	* encodes the body, and maps non-2xx responses to `Error` instances
+	* with the response body attached for debugging. Matches the shape
+	* used by `providers/base.ts`, kept local here so the RAG module has
+	* no dependency on the provider internals.
+	*/
+	async request(method, path, body) {
+		const res = await fetch(`${this.url}${path}`, {
+			method,
+			headers: this.headers,
+			body: body === void 0 ? void 0 : JSON.stringify(body)
+		});
+		if (!res.ok) {
+			const text = await res.text().catch(() => "");
+			throw new Error(`QdrantVectorStore: ${method} ${path} failed with ${res.status}: ${text}`);
+		}
+		const text = await res.text();
+		if (!text) return void 0;
+		try {
+			return JSON.parse(text);
+		} catch {
+			return;
+		}
+	}
+	/**
+	* Create the collection on first use. The `PUT /collections/{name}`
+	* endpoint is idempotent — calling it on an existing collection is a
+	* no-op with status 200. We cache the promise so concurrent callers
+	* share the same in-flight request and every subsequent call resolves
+	* immediately.
+	*/
+	ensureCollection() {
+		if (this.skipSetup) return Promise.resolve();
+		this.setupPromise ??= this.runSetup().catch((err) => {
+			this.setupPromise = null;
+			throw err;
+		});
+		return this.setupPromise;
+	}
+	async runSetup() {
+		await this.request("PUT", `/collections/${this.collection}`, { vectors: {
+			size: this.dimensions,
+			distance: this.distance
+		} });
+	}
+};
+/**
+* Translate the framework's equality-map filter into Qdrant's
+* `must` condition format.
+*
+* Scalars become `{ key, match: { value } }`. Arrays become
+* `{ key, match: { any: [...] } }`. Keys are interpreted as paths into
+* `payload.metadata`, matching how `upsert` nests the metadata record.
+*
+* Exported so tests (and future richer filter builders) can verify the
+* translation without going through a live Qdrant instance.
+*/
+function buildQdrantFilter(filter) {
+	const must = [];
+	for (const [key, value] of Object.entries(filter)) {
+		const qdrantKey = `metadata.${key}`;
+		if (Array.isArray(value)) must.push({
+			key: qdrantKey,
+			match: { any: value }
+		});
+		else must.push({
+			key: qdrantKey,
+			match: { value }
+		});
+	}
+	return { must };
+}
+//#endregion
+//#region src/rag/pinecone.ts
+/**
+* Pinecone-backed `VectorStore` implementation.
+*
+* Pinecone stores vectors with a flat id, a dense vector, and an
+* arbitrary metadata object. Like Qdrant the store uses metadata to
+* carry both the original `content` (for RAG retrieval) and the
+* application's own metadata fields — they're merged into one
+* Pinecone metadata record at write time and split back apart at
+* read time.
+*
+* ### Filtering
+*
+* Pinecone has a native filter DSL that looks almost identical to
+* MongoDB's — `{ key: { $eq: value } }`, `{ key: { $in: [...] } }`,
+* etc. The framework's equality-map filter is translated directly:
+* scalars become `$eq` and arrays become `$in`. Users who need the
+* full DSL (range, $ne, $or) can pass a raw Pinecone filter through
+* the same `filter` field — the translator is a no-op when the keys
+* start with `$`, so advanced filters pass through unchanged.
+*
+* ### Index provisioning
+*
+* Pinecone indexes must be created out-of-band. This store does NOT
+* provision indexes automatically — the dimensionality, metric, and
+* pod type are infrastructure decisions that should live in
+* Terraform or the Pinecone dashboard, not in runtime code.
+*
+* @example
+* ```ts
+* import { bootstrap, getEnv } from '@forinda/kickjs'
+* import { AiAdapter, PineconeVectorStore, VECTOR_STORE } from '@forinda/kickjs-ai'
+*
+* const store = new PineconeVectorStore({
+*   apiKey: getEnv('PINECONE_API_KEY'),
+*   indexHost: getEnv('PINECONE_INDEX_HOST'),
+*   dimensions: 1536,
+*   namespace: 'docs',
+* })
+*
+* export const app = await bootstrap({
+*   modules,
+*   adapters: [new AiAdapter({ provider })],
+*   plugins: [
+*     {
+*       name: 'pinecone',
+*       register: (container) => {
+*         container.registerInstance(VECTOR_STORE, store)
+*       },
+*     },
+*   ],
+* })
+* ```
+*/
+var PineconeVectorStore = class {
+	name;
+	baseURL;
+	namespace;
+	dimensions;
+	headers;
+	constructor(options) {
+		if (!options.apiKey) throw new Error("PineconeVectorStore: apiKey is required");
+		if (!options.indexHost) throw new Error("PineconeVectorStore: indexHost is required");
+		if (!Number.isInteger(options.dimensions) || options.dimensions <= 0) throw new Error("PineconeVectorStore: dimensions must be a positive integer");
+		const host = options.indexHost.replace(/\/$/, "");
+		this.baseURL = host.startsWith("http") ? host : `https://${host}`;
+		this.namespace = options.namespace;
+		this.dimensions = options.dimensions;
+		this.name = options.name ?? "pinecone";
+		this.headers = {
+			"content-type": "application/json",
+			"Api-Key": options.apiKey,
+			"X-Pinecone-API-Version": "2024-10"
+		};
+	}
+	async upsert(doc) {
+		const list = Array.isArray(doc) ? doc : [doc];
+		if (list.length === 0) return;
+		for (const d of list) {
+			if (!d.id) throw new Error("PineconeVectorStore.upsert: document id is required");
+			if (!Array.isArray(d.vector) || d.vector.length !== this.dimensions) throw new Error(`PineconeVectorStore.upsert: vector length ${d.vector?.length ?? 0} does not match index dimensions ${this.dimensions} (id=${d.id})`);
+		}
+		const body = { vectors: list.map((d) => ({
+			id: d.id,
+			values: d.vector,
+			metadata: {
+				content: d.content,
+				...d.metadata ?? {}
+			}
+		})) };
+		if (this.namespace) body.namespace = this.namespace;
+		await this.request("/vectors/upsert", body);
+	}
+	async query(options) {
+		if (!Array.isArray(options.vector) || options.vector.length === 0) throw new Error("PineconeVectorStore.query: vector is required");
+		if (options.vector.length !== this.dimensions) throw new Error(`PineconeVectorStore.query: vector length ${options.vector.length} does not match index dimensions ${this.dimensions}`);
+		const topK = options.topK ?? 5;
+		const body = {
+			vector: options.vector,
+			topK,
+			includeMetadata: true
+		};
+		if (this.namespace) body.namespace = this.namespace;
+		if (options.filter && Object.keys(options.filter).length > 0) body.filter = buildPineconeFilter(options.filter);
+		const data = await this.request("/query", body);
+		const minScore = options.minScore ?? -Infinity;
+		return data.matches.filter((m) => m.score >= minScore).map((match) => {
+			const { content, ...metadata } = match.metadata ?? {};
+			return {
+				id: match.id,
+				content: typeof content === "string" ? content : "",
+				score: match.score,
+				metadata
+			};
+		});
+	}
+	async delete(id) {
+		const ids = Array.isArray(id) ? id : [id];
+		if (ids.length === 0) return;
+		const body = { ids };
+		if (this.namespace) body.namespace = this.namespace;
+		await this.request("/vectors/delete", body);
+	}
+	async deleteAll() {
+		const body = { deleteAll: true };
+		if (this.namespace) body.namespace = this.namespace;
+		await this.request("/vectors/delete", body);
+	}
+	async count() {
+		const data = await this.request("/describe_index_stats", this.namespace ? { filter: {} } : {});
+		if (this.namespace) return data.namespaces?.[this.namespace]?.vectorCount ?? 0;
+		return data.totalVectorCount ?? 0;
+	}
+	/**
+	* POST a JSON body to the Pinecone data-plane and return the parsed
+	* JSON response. Every Pinecone data-plane endpoint uses POST even
+	* for reads (`/query`, `/describe_index_stats`), so the helper
+	* doesn't bother parameterizing the method.
+	*/
+	async request(path, body) {
+		const res = await fetch(`${this.baseURL}${path}`, {
+			method: "POST",
+			headers: this.headers,
+			body: JSON.stringify(body)
+		});
+		if (!res.ok) {
+			const text = await res.text().catch(() => "");
+			throw new Error(`PineconeVectorStore: POST ${path} failed with ${res.status}: ${text}`);
+		}
+		const text = await res.text();
+		if (!text) return void 0;
+		try {
+			return JSON.parse(text);
+		} catch {
+			return;
+		}
+	}
+};
+/**
+* Translate the framework's equality-map filter into Pinecone's
+* MongoDB-style filter DSL.
+*
+* Rules:
+*   - Scalar value           → `{ key: { $eq: value } }`
+*   - Array value            → `{ key: { $in: [...] } }`
+*   - Key that starts with $ → passed through untouched, letting
+*     callers hand-craft `{ $or: [...] }` or range conditions
+*     without the translator mangling them
+*   - Value already shaped like `{ $eq, $in, $gt, ... }` → passed
+*     through untouched for the same reason
+*
+* Exported so tests can verify the translation offline.
+*/
+function buildPineconeFilter(filter) {
+	const result = {};
+	for (const [key, value] of Object.entries(filter)) {
+		if (key.startsWith("$")) {
+			result[key] = value;
+			continue;
+		}
+		if (isOperatorRecord(value)) {
+			result[key] = value;
+			continue;
+		}
+		if (Array.isArray(value)) result[key] = { $in: value };
+		else result[key] = { $eq: value };
+	}
+	return result;
+}
+function isOperatorRecord(value) {
+	if (!value || typeof value !== "object" || Array.isArray(value)) return false;
+	for (const key of Object.keys(value)) if (key.startsWith("$")) return true;
+	return false;
+}
+//#endregion
+//#region src/rag/rag-service.ts
+const DEFAULT_SYSTEM_TEMPLATE = "You have access to the following context documents. Use them when they are relevant to the user question; ignore them otherwise.\n\n{documents}\n\nIf the context doesn't contain enough information, say so plainly — don't invent answers.";
+/**
+* High-level RAG helper that ties an `AiProvider` (for embeddings)
+* to a `VectorStore` (for retrieval) and produces the three operations
+* every RAG-powered service needs: index documents, search by query,
+* and augment a chat input with retrieved context.
+*
+* The service itself is a thin orchestrator — all the storage and
+* model calls go through the injected interfaces, so swapping
+* backends (in-memory → pgvector, OpenAI → Ollama) is a DI binding
+* change, not a code change.
+*
+* @example
+* ```ts
+* import { Service, Autowired, Inject } from '@forinda/kickjs'
+* import { AI_PROVIDER, VECTOR_STORE, RagService } from '@forinda/kickjs-ai'
+* import type { AiProvider, VectorStore } from '@forinda/kickjs-ai'
+*
+* @Service()
+* class DocsService {
+*   private readonly rag: RagService
+*
+*   constructor(
+*     @Inject(AI_PROVIDER) provider: AiProvider,
+*     @Inject(VECTOR_STORE) store: VectorStore,
+*   ) {
+*     this.rag = new RagService(provider, store)
+*   }
+*
+*   async ingest(articles: Array<{ id: string; body: string }>) {
+*     await this.rag.index(articles.map((a) => ({ id: a.id, content: a.body })))
+*   }
+*
+*   async ask(question: string) {
+*     const input = await this.rag.augmentChatInput(
+*       { messages: [{ role: 'user', content: question }] },
+*       question,
+*       { topK: 3 },
+*     )
+*     const res = await provider.chat(input)
+*     return res.content
+*   }
+* }
+* ```
+*/
+var RagService = class {
+	constructor(provider, store) {
+		this.provider = provider;
+		this.store = store;
+	}
+	/** Underlying provider — exposed for services that want to reuse it for chat. */
+	getProvider() {
+		return this.provider;
+	}
+	/** Underlying store — useful for admin tools that want raw access. */
+	getStore() {
+		return this.store;
+	}
+	/**
+	* Index a batch of documents: embed each one's content via the
+	* provider, then upsert into the store. Embedding happens in a
+	* single batched call, which is both faster and cheaper than one
+	* call per document for most providers.
+	*
+	* Documents with empty content are skipped rather than failing the
+	* whole batch — the store can't meaningfully retrieve empty strings
+	* and silently dropping them matches what users usually expect when
+	* a content field turns out to be blank.
+	*/
+	async index(docs) {
+		const nonEmpty = docs.filter((d) => d.content && d.content.trim().length > 0);
+		if (nonEmpty.length === 0) return;
+		const vectors = await this.provider.embed(nonEmpty.map((d) => d.content));
+		if (vectors.length !== nonEmpty.length) throw new Error(`RagService.index: provider returned ${vectors.length} vectors for ${nonEmpty.length} inputs`);
+		const toUpsert = nonEmpty.map((doc, i) => ({
+			id: doc.id,
+			content: doc.content,
+			vector: vectors[i],
+			metadata: doc.metadata
+		}));
+		await this.store.upsert(toUpsert);
+	}
+	/**
+	* Search the store for documents relevant to a natural-language
+	* query. Embeds the query once, then delegates to the store's
+	* `query` method with the resolved vector.
+	*/
+	async search(query, options = {}) {
+		const [queryVector] = await this.provider.embed(query);
+		if (!queryVector) return [];
+		return this.store.query({
+			vector: queryVector,
+			topK: options.topK ?? 5,
+			filter: options.filter,
+			minScore: options.minScore
+		});
+	}
+	/**
+	* Retrieve relevant documents for a query and inject them into a
+	* `ChatInput` as a system message. Returns a new input — the
+	* original is not mutated.
+	*
+	* Two injection modes:
+	*   - Merge (default): prepend the context to the first existing
+	*     system message if one exists, otherwise add a new one. Avoids
+	*     producing chat histories with competing system prompts.
+	*   - Separate (`asSeparateSystemMessage: true`): always insert a
+	*     new system message at the start. Useful when the existing
+	*     system prompt is small and you want to keep roles distinct.
+	*
+	* If no documents are retrieved, the input is returned unchanged.
+	*/
+	async augmentChatInput(input, query, options = {}) {
+		const hits = await this.search(query, {
+			topK: options.topK ?? 5,
+			filter: options.filter,
+			minScore: options.minScore
+		});
+		if (hits.length === 0) return input;
+		const template = options.systemTemplate ?? DEFAULT_SYSTEM_TEMPLATE;
+		const documentBlock = hits.map((h, i) => `[Document ${i + 1} (id=${h.id}, score=${h.score.toFixed(3)})]\n${h.content}`).join("\n\n");
+		const contextMessage = template.replace("{documents}", documentBlock);
+		const newMessages = [];
+		const existingSystemIdx = input.messages.findIndex((m) => m.role === "system");
+		if (!options.asSeparateSystemMessage && existingSystemIdx !== -1) for (let i = 0; i < input.messages.length; i++) {
+			const msg = input.messages[i];
+			if (i === existingSystemIdx) newMessages.push({
+				...msg,
+				content: `${contextMessage}\n\n---\n\n${msg.content}`
+			});
+			else newMessages.push(msg);
+		}
+		else {
+			newMessages.push({
+				role: "system",
+				content: contextMessage
+			});
+			newMessages.push(...input.messages);
+		}
+		return {
+			...input,
+			messages: newMessages
+		};
+	}
+};
+//#endregion
+export { AI_PROVIDER, AI_TOOL_METADATA, AiAdapter, AiTool, AnthropicProvider, InMemoryChatMemory, InMemoryVectorStore, OpenAIProvider, PgVectorStore, PineconeVectorStore, Prompt, ProviderError, QdrantVectorStore, RagService, SlidingWindowChatMemory, VECTOR_STORE, buildPineconeFilter, buildQdrantFilter, buildWhereClause, cosineSimilarity, createPrompt, getAiToolMeta, isAiTool, toPgVector };
+//# sourceMappingURL=index.mjs.map