@forinda/kickjs-ai 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs ADDED
@@ -0,0 +1,2474 @@
1
+ /**
2
+ * @forinda/kickjs-ai v2.3.0
3
+ *
4
+ * Copyright (c) Felix Orinda
5
+ *
6
+ * This source code is licensed under the MIT license found in the
7
+ * LICENSE file in the root directory of this source tree.
8
+ *
9
+ * @license MIT
10
+ */
11
+ import { Logger, METADATA, Scope, createToken, getClassMeta, getMethodMetaOrUndefined, setMethodMeta } from "@forinda/kickjs";
12
+ //#region src/constants.ts
13
+ /**
14
+ * Metadata key for the `@AiTool` decorator.
15
+ *
16
+ * Using `createToken` for metadata keys (rather than a raw `Symbol`)
17
+ * gives a collision-safe, type-carrying identifier: the phantom type
18
+ * parameter flows through `getMethodMetaOrUndefined` so consumers get
19
+ * `AiToolOptions` back without a manual cast, and reference-equality
20
+ * guarantees that two separate definitions can never shadow each other
21
+ * even if the package is loaded more than once.
22
+ */
23
+ const AI_TOOL_METADATA = createToken("kickjs.ai.tool");
24
+ /**
25
+ * DI token for the active AI provider.
26
+ *
27
+ * Injected via `@Inject(AI_PROVIDER)` in services or use-cases that
28
+ * need to call an LLM. The adapter registers the concrete provider
29
+ * (OpenAI, Anthropic, Google, Ollama) during `beforeStart`.
30
+ *
31
+ * @example
32
+ * ```ts
33
+ * @Service()
34
+ * export class SummarizeService {
35
+ * constructor(@Inject(AI_PROVIDER) private ai: AiProvider) {}
36
+ *
37
+ * async summarize(text: string) {
38
+ * const res = await this.ai.chat({
39
+ * messages: [
40
+ * { role: 'system', content: 'Summarize in 2 sentences.' },
41
+ * { role: 'user', content: text },
42
+ * ],
43
+ * })
44
+ * return res.content
45
+ * }
46
+ * }
47
+ * ```
48
+ */
49
+ const AI_PROVIDER = createToken("kickjs.ai.provider");
50
+ /**
51
+ * DI token for the active vector store backend.
52
+ *
53
+ * Injected via `@Inject(VECTOR_STORE)` in services that need
54
+ * retrieval-augmented generation. The adapter does not register a
55
+ * default — users bind the backend they want at bootstrap time,
56
+ * typically `InMemoryVectorStore` for development/tests and
57
+ * `PgVectorStore` / `QdrantStore` / `PineconeStore` for production.
58
+ *
59
+ * @example
60
+ * ```ts
61
+ * import { bootstrap, getEnv } from '@forinda/kickjs'
62
+ * import { AiAdapter, InMemoryVectorStore, VECTOR_STORE } from '@forinda/kickjs-ai'
63
+ *
64
+ * export const app = await bootstrap({
65
+ * modules,
66
+ * adapters: [
67
+ * new AiAdapter({
68
+ * provider: new OpenAIProvider({ apiKey: getEnv('OPENAI_API_KEY') }),
69
+ * }),
70
+ * ],
71
+ * plugins: [
72
+ * {
73
+ * name: 'vector-store',
74
+ * register: (container) => {
75
+ * container.registerInstance(VECTOR_STORE, new InMemoryVectorStore())
76
+ * },
77
+ * },
78
+ * ],
79
+ * })
80
+ * ```
81
+ */
82
+ const VECTOR_STORE = createToken("kickjs.ai.vector_store");
83
+ //#endregion
84
+ //#region src/decorators.ts
85
+ /**
86
+ * Mark a controller method as an AI-callable tool.
87
+ *
88
+ * At startup, the `AiAdapter` scans all `@Controller` classes in the
89
+ * DI container for this decorator and builds a tool registry. When a
90
+ * service calls `ai.chat({ ..., tools: 'auto' })`, the framework
91
+ * passes the registered tools to the provider, the model may call
92
+ * them, and the framework dispatches back through the normal Express
93
+ * pipeline — so tool calls go through auth, validation, and logging
94
+ * just like external HTTP requests.
95
+ *
96
+ * The input schema is derived from the route's `body` Zod schema:
97
+ *
98
+ * @example
99
+ * ```ts
100
+ * import { Controller, Post, type Ctx } from '@forinda/kickjs'
101
+ * import { AiTool } from '@forinda/kickjs-ai'
102
+ * import { createTaskSchema } from './dtos/create-task.dto'
103
+ *
104
+ * @Controller('/tasks')
105
+ * export class TaskController {
106
+ * @Post('/', { body: createTaskSchema, name: 'CreateTask' })
107
+ * @AiTool({ description: 'Create a new task' })
108
+ * create(ctx: Ctx<KickRoutes.TaskController['create']>) {
109
+ * return this.createTaskUseCase.execute(ctx.body)
110
+ * }
111
+ * }
112
+ * ```
113
+ */
114
+ function AiTool(options) {
115
+ return (target, propertyKey) => {
116
+ setMethodMeta(AI_TOOL_METADATA, options, target, propertyKey);
117
+ };
118
+ }
119
+ /** Read the AI tool metadata attached to a method, if any. */
120
+ function getAiToolMeta(target, method) {
121
+ return getMethodMetaOrUndefined(AI_TOOL_METADATA, target, method);
122
+ }
123
+ /** Check whether a method was decorated with `@AiTool`. */
124
+ function isAiTool(target, method) {
125
+ return getAiToolMeta(target, method) !== void 0;
126
+ }
127
+ //#endregion
128
+ //#region src/zod-to-json-schema.ts
129
+ /**
130
+ * Minimal Zod v4+ schema parser.
131
+ *
132
+ * Mirrors the helper in `@forinda/kickjs-mcp` and the `zodSchemaParser`
133
+ * in `@forinda/kickjs-swagger`. Zod v4 ships with a native
134
+ * `.toJSONSchema()` instance method, so this is a type guard + a call.
135
+ *
136
+ * Kept in-package so the AI adapter has no cross-package dependency
137
+ * on MCP or Swagger. If KickJS ever extracts a shared
138
+ * `@forinda/kickjs-schema` utility, all three packages can switch
139
+ * to it in one PR.
140
+ */
141
+ /**
142
+ * Check whether a value looks like a Zod v4+ schema.
143
+ *
144
+ * Uses structural duck-typing: the object has `safeParse` (all Zod
145
+ * versions) AND `toJSONSchema` (Zod v4+). This avoids importing Zod
146
+ * as a value, which would force it to become a runtime dep.
147
+ */
148
+ function isZodSchema(schema) {
149
+ return schema != null && typeof schema === "object" && typeof schema.safeParse === "function" && typeof schema.toJSONSchema === "function";
150
+ }
151
+ /**
152
+ * Convert a Zod v4+ schema to a JSON Schema object, stripping the
153
+ * top-level `$schema` key so the output can be embedded inside an
154
+ * AI tool definition directly.
155
+ *
156
+ * Returns `null` if the input doesn't look like a Zod schema. Callers
157
+ * should fall back to an empty-object input schema in that case.
158
+ */
159
+ function zodToJsonSchema(schema) {
160
+ if (!isZodSchema(schema)) return null;
161
+ const { $schema: _ignored, ...rest } = schema.toJSONSchema();
162
+ return rest;
163
+ }
164
+ //#endregion
165
+ //#region src/ai.adapter.ts
166
+ const log = Logger.for("AiAdapter");
167
+ /**
168
+ * Register an AI provider in the DI container, discover every
169
+ * `@AiTool`-decorated controller method, and run agent loops that
170
+ * dispatch tool calls through the Express pipeline.
171
+ *
172
+ * The adapter plays the same role for AI as the MCP adapter plays for
173
+ * external clients: it's the glue between the framework's metadata
174
+ * (Zod schemas, route decorators, DI container) and a runtime that
175
+ * can actually call LLMs and execute tools. Both adapters reuse the
176
+ * framework's `onRouteMount` hook to discover tools at startup.
177
+ *
178
+ * @example
179
+ * ```ts
180
+ * import { bootstrap, getEnv } from '@forinda/kickjs'
181
+ * import { AiAdapter, OpenAIProvider } from '@forinda/kickjs-ai'
182
+ *
183
+ * export const app = await bootstrap({
184
+ * modules,
185
+ * adapters: [
186
+ * new AiAdapter({
187
+ * provider: new OpenAIProvider({ apiKey: getEnv('OPENAI_API_KEY') }),
188
+ * }),
189
+ * ],
190
+ * })
191
+ * ```
192
+ *
193
+ * Then in any service:
194
+ *
195
+ * ```ts
196
+ * @Service()
197
+ * class AgentService {
198
+ * @Autowired() private readonly ai!: AiAdapter
199
+ *
200
+ * async handleQuery(userPrompt: string) {
201
+ * const result = await this.ai.runAgent({
202
+ * messages: [
203
+ * { role: 'system', content: 'You can create tasks via tools.' },
204
+ * { role: 'user', content: userPrompt },
205
+ * ],
206
+ * tools: 'auto', // use every @AiTool-decorated method
207
+ * })
208
+ * return result.content
209
+ * }
210
+ * }
211
+ * ```
212
+ */
213
+ var AiAdapter = class AiAdapter {
214
+ name = "AiAdapter";
215
+ provider;
216
+ /** Controllers collected during the mount phase, in insertion order. */
217
+ mountedControllers = [];
218
+ /** Tool definitions built during `beforeStart` from `@AiTool` metadata. */
219
+ tools = [];
220
+ /**
221
+ * Base URL of the running KickJS HTTP server, captured in `afterStart`.
222
+ * Agent tool dispatch makes internal HTTP requests against this base
223
+ * URL so calls flow through the normal Express pipeline (middleware,
224
+ * validation, auth, logging, error handling).
225
+ */
226
+ serverBaseUrl = null;
227
+ constructor(options) {
228
+ this.provider = options.provider;
229
+ }
230
+ /** Return the active provider. Useful for services that want the raw API. */
231
+ getProvider() {
232
+ return this.provider;
233
+ }
234
+ /** Return the discovered tool registry. Primarily for tests and debug UIs. */
235
+ getTools() {
236
+ return this.tools;
237
+ }
238
+ /**
239
+ * Override the server base URL. Used by tests that spin up an
240
+ * ephemeral http.Server and can't rely on the framework's
241
+ * `afterStart` hook to supply it.
242
+ */
243
+ setServerBaseUrl(url) {
244
+ this.serverBaseUrl = url;
245
+ }
246
+ /**
247
+ * Record every mounted controller so `beforeStart` can walk them
248
+ * looking for `@AiTool` decorations. We don't scan here because
249
+ * onRouteMount fires per-controller and we want the scan to run
250
+ * once against the full set.
251
+ */
252
+ onRouteMount(controller, mountPath) {
253
+ this.mountedControllers.push({
254
+ controller,
255
+ mountPath
256
+ });
257
+ }
258
+ /**
259
+ * Register the provider in the DI container and run the tool scan.
260
+ *
261
+ * The adapter itself is also registered under its class constructor
262
+ * so services can inject the adapter directly (to call `runAgent`)
263
+ * while other services inject just the provider via `AI_PROVIDER`
264
+ * for plain `chat` / `embed` calls.
265
+ */
266
+ beforeStart({ container }) {
267
+ container.registerFactory(AI_PROVIDER, () => this.provider, Scope.SINGLETON);
268
+ container.registerInstance(AiAdapter, this);
269
+ for (const { controller, mountPath } of this.mountedControllers) {
270
+ const routes = getClassMeta(METADATA.ROUTES, controller, []);
271
+ for (const route of routes) {
272
+ const tool = this.tryBuildTool(controller, mountPath, route);
273
+ if (tool) this.tools.push(tool);
274
+ }
275
+ }
276
+ log.info(`AiAdapter ready — provider: ${this.provider.name}, ${this.tools.length} tool(s) discovered`);
277
+ }
278
+ /**
279
+ * Capture the running server's address so agent dispatch can make
280
+ * internal HTTP requests against the actual port. Runs after the
281
+ * HTTP server is listening, so `server.address()` returns a real
282
+ * `AddressInfo` here.
283
+ */
284
+ afterStart(ctx) {
285
+ this.serverBaseUrl = this.resolveServerBaseUrl(ctx.server);
286
+ log.debug(`AiAdapter agent dispatch target: ${this.serverBaseUrl ?? "(unknown)"}`);
287
+ }
288
+ /** Best-effort cleanup. Providers are currently stateless HTTP clients. */
289
+ async shutdown() {
290
+ this.serverBaseUrl = null;
291
+ log.debug("AiAdapter shutdown complete");
292
+ }
293
+ /**
294
+ * Run a tool-calling agent loop.
295
+ *
296
+ * Calls the provider with the given messages and tools, dispatches
297
+ * any tool calls the model emits, feeds the results back into the
298
+ * conversation, and repeats until the model responds with plain text
299
+ * (no more tool calls) or `maxSteps` is reached.
300
+ *
301
+ * Tool dispatch goes through the Express pipeline via internal HTTP
302
+ * requests — same pattern as the MCP adapter — so middleware, auth,
303
+ * validation, logging, and error handling all apply to tool calls
304
+ * the same way they apply to external client requests.
305
+ *
306
+ * @example
307
+ * ```ts
308
+ * const result = await adapter.runAgent({
309
+ * messages: [
310
+ * { role: 'system', content: 'Create tasks the user asks for.' },
311
+ * { role: 'user', content: 'Create a high-priority task titled Ship v3.' },
312
+ * ],
313
+ * tools: 'auto',
314
+ * maxSteps: 5,
315
+ * })
316
+ * console.log(result.content) // assistant's final reply
317
+ * console.log(result.messages) // full history including tool calls
318
+ * console.log(result.steps) // how many rounds it took
319
+ * ```
320
+ */
321
+ async runAgent(options) {
322
+ const maxSteps = options.maxSteps ?? 8;
323
+ const resolvedTools = this.resolveTools(options.tools ?? "auto");
324
+ const messages = [...options.messages];
325
+ let steps = 0;
326
+ const usage = {
327
+ promptTokens: 0,
328
+ completionTokens: 0,
329
+ totalTokens: 0
330
+ };
331
+ for (let i = 0; i < maxSteps; i++) {
332
+ steps++;
333
+ const response = await this.provider.chat({
334
+ messages,
335
+ model: options.model,
336
+ tools: resolvedTools.length > 0 ? resolvedTools : void 0
337
+ }, {
338
+ temperature: options.temperature,
339
+ maxTokens: options.maxTokens,
340
+ topP: options.topP,
341
+ stopSequences: options.stopSequences,
342
+ signal: options.signal
343
+ });
344
+ if (response.usage) {
345
+ usage.promptTokens += response.usage.promptTokens;
346
+ usage.completionTokens += response.usage.completionTokens;
347
+ usage.totalTokens += response.usage.totalTokens;
348
+ }
349
+ if (!response.toolCalls || response.toolCalls.length === 0) {
350
+ messages.push({
351
+ role: "assistant",
352
+ content: response.content
353
+ });
354
+ return {
355
+ content: response.content,
356
+ messages,
357
+ steps,
358
+ usage: usage.totalTokens > 0 ? usage : void 0
359
+ };
360
+ }
361
+ messages.push({
362
+ role: "assistant",
363
+ content: response.content,
364
+ toolCalls: response.toolCalls
365
+ });
366
+ const results = await Promise.all(response.toolCalls.map((call) => this.dispatchToolCall(call)));
367
+ for (const result of results) messages.push(result);
368
+ }
369
+ return {
370
+ content: messages.slice().reverse().find((m) => m.role === "assistant")?.content ?? "",
371
+ messages,
372
+ steps,
373
+ usage: usage.totalTokens > 0 ? usage : void 0,
374
+ maxStepsReached: true
375
+ };
376
+ }
377
+ /**
378
+ * Memory-aware agent turn.
379
+ *
380
+ * Wraps `runAgent` with an automatic "read history → append user
381
+ * message → run loop → persist assistant response" cycle. Services
382
+ * that want multi-turn conversations don't need to manage the
383
+ * plumbing themselves — pass a `ChatMemory` and a user message,
384
+ * get back the agent's response, and the memory is updated.
385
+ *
386
+ * System prompt handling:
387
+ * - If the memory is empty AND `systemPrompt` is provided, the
388
+ * system prompt is persisted as the first message in the
389
+ * session. It stays put for every subsequent turn.
390
+ * - On follow-up turns, the existing system prompt is reused
391
+ * from memory; the `systemPrompt` option is ignored to keep
392
+ * the session persona stable.
393
+ *
394
+ * Tool result persistence:
395
+ * - By default, tool messages are NOT persisted to memory —
396
+ * they're usually large API responses the user doesn't need
397
+ * on later turns, and including them blows up prompt tokens
398
+ * unnecessarily. Set `persistToolResults: true` to keep them
399
+ * (useful for debugging / full-transcript replay).
400
+ * - Assistant messages with tool calls ARE persisted so the
401
+ * conversation shows what the agent did.
402
+ *
403
+ * @example
404
+ * ```ts
405
+ * @Service()
406
+ * class ChatService {
407
+ * @Autowired() private ai!: AiAdapter
408
+ * private readonly memory = new InMemoryChatMemory()
409
+ *
410
+ * async handle(userMessage: string) {
411
+ * const result = await this.ai.runAgentWithMemory({
412
+ * memory: this.memory,
413
+ * userMessage,
414
+ * systemPrompt: 'You are a helpful assistant.',
415
+ * tools: 'auto',
416
+ * })
417
+ * return result.content
418
+ * }
419
+ * }
420
+ * ```
421
+ */
422
+ async runAgentWithMemory(options) {
423
+ const messages = [...await options.memory.get()];
424
+ if (messages.length === 0 && options.systemPrompt) {
425
+ const systemMessage = {
426
+ role: "system",
427
+ content: options.systemPrompt
428
+ };
429
+ messages.push(systemMessage);
430
+ await options.memory.add(systemMessage);
431
+ }
432
+ const userMessage = {
433
+ role: "user",
434
+ content: options.userMessage
435
+ };
436
+ messages.push(userMessage);
437
+ await options.memory.add(userMessage);
438
+ const result = await this.runAgent({
439
+ messages,
440
+ model: options.model,
441
+ tools: options.tools,
442
+ maxSteps: options.maxSteps,
443
+ temperature: options.temperature,
444
+ maxTokens: options.maxTokens,
445
+ topP: options.topP,
446
+ stopSequences: options.stopSequences,
447
+ signal: options.signal
448
+ });
449
+ const newMessages = result.messages.slice(messages.length);
450
+ const toPersist = options.persistToolResults ? newMessages : newMessages.filter((m) => m.role !== "tool");
451
+ if (toPersist.length > 0) await options.memory.add(toPersist);
452
+ return result;
453
+ }
454
+ /**
455
+ * Expand an agent `tools` option to an explicit array. `'auto'`
456
+ * resolves to the full discovered registry; an explicit array is
457
+ * passed through unchanged (so callers can restrict the agent to a
458
+ * subset of tools).
459
+ */
460
+ resolveTools(spec) {
461
+ if (spec === "auto") return this.tools;
462
+ return spec;
463
+ }
464
+ /**
465
+ * Dispatch a single tool call through the Express pipeline by
466
+ * making an internal HTTP request matching the underlying route's
467
+ * method + path + body/query.
468
+ *
469
+ * Returns a `ChatMessage` with `role: 'tool'` suitable for feeding
470
+ * back into the next `provider.chat` call. Non-2xx responses are
471
+ * surfaced as tool error messages rather than throwing, so the
472
+ * agent loop can let the model recover.
473
+ */
474
+ async dispatchToolCall(call) {
475
+ const tool = this.tools.find((t) => t.name === call.name);
476
+ if (!tool) return {
477
+ role: "tool",
478
+ toolCallId: call.id,
479
+ content: JSON.stringify({ error: `Tool not found: ${call.name}` })
480
+ };
481
+ if (!this.serverBaseUrl) return {
482
+ role: "tool",
483
+ toolCallId: call.id,
484
+ content: JSON.stringify({ error: `Cannot dispatch ${call.name}: HTTP server address not yet captured` })
485
+ };
486
+ const args = call.arguments ?? {};
487
+ const { path, remainingArgs } = this.substitutePathParams(tool.mountPath, args);
488
+ const method = tool.httpMethod.toUpperCase();
489
+ const hasBody = method === "POST" || method === "PUT" || method === "PATCH";
490
+ let url = `${this.serverBaseUrl}${path}`;
491
+ const init = {
492
+ method,
493
+ headers: {
494
+ accept: "application/json",
495
+ "x-ai-tool": tool.name
496
+ }
497
+ };
498
+ if (hasBody) {
499
+ init.headers["content-type"] = "application/json";
500
+ init.body = JSON.stringify(remainingArgs);
501
+ } else if (Object.keys(remainingArgs).length > 0) {
502
+ const qs = new URLSearchParams();
503
+ for (const [key, value] of Object.entries(remainingArgs)) {
504
+ if (value === void 0 || value === null) continue;
505
+ qs.append(key, typeof value === "string" ? value : JSON.stringify(value));
506
+ }
507
+ const sep = url.includes("?") ? "&" : "?";
508
+ url = `${url}${sep}${qs.toString()}`;
509
+ }
510
+ try {
511
+ const res = await fetch(url, init);
512
+ const text = await res.text();
513
+ const content = res.ok ? text || `(${res.status} ${res.statusText})` : JSON.stringify({
514
+ error: `Tool ${call.name} returned ${res.status}`,
515
+ body: text
516
+ });
517
+ return {
518
+ role: "tool",
519
+ toolCallId: call.id,
520
+ content
521
+ };
522
+ } catch (err) {
523
+ const message = err instanceof Error ? err.message : String(err);
524
+ log.error(err, `AiAdapter: tool dispatch failed for ${call.name}`);
525
+ return {
526
+ role: "tool",
527
+ toolCallId: call.id,
528
+ content: JSON.stringify({ error: `Dispatch error: ${message}` })
529
+ };
530
+ }
531
+ }
532
+ /**
533
+ * Build an `AiToolDefinition` for a route decorated with `@AiTool`.
534
+ * Skips routes without the decorator so the registry only exposes
535
+ * deliberately opted-in methods.
536
+ */
537
+ tryBuildTool(controller, mountPath, route) {
538
+ const meta = getAiToolMeta(controller.prototype, route.handlerName);
539
+ if (!meta) return null;
540
+ const inputSchema = zodToJsonSchema(meta.inputSchema ?? route.validation?.body ?? route.validation?.query) ?? {
541
+ type: "object",
542
+ properties: {},
543
+ additionalProperties: false
544
+ };
545
+ return {
546
+ name: meta.name ?? `${controller.name}.${route.handlerName}`,
547
+ description: meta.description,
548
+ inputSchema,
549
+ httpMethod: route.method.toUpperCase(),
550
+ mountPath: this.joinMountPath(mountPath, route.path)
551
+ };
552
+ }
553
+ /**
554
+ * Join a module mount path with the route-level sub-path. Same
555
+ * helper as McpAdapter's — kept local so the two packages don't
556
+ * couple via a shared util file.
557
+ */
558
+ joinMountPath(mountPath, routePath) {
559
+ const base = mountPath.endsWith("/") ? mountPath.slice(0, -1) : mountPath;
560
+ if (!routePath || routePath === "/") return base;
561
+ return `${base}${routePath.startsWith("/") ? routePath : `/${routePath}`}`;
562
+ }
563
+ /**
564
+ * Substitute Express-style `:param` placeholders in the mount path
565
+ * with values pulled from the tool call arguments. Consumed keys
566
+ * are removed from the remaining args so they aren't sent twice
567
+ * (once in the path, once in the body/query).
568
+ */
569
+ substitutePathParams(mountPath, args) {
570
+ const remaining = { ...args };
571
+ return {
572
+ path: mountPath.replace(/:([a-zA-Z_][a-zA-Z0-9_]*)/g, (_match, param) => {
573
+ if (param in remaining) {
574
+ const value = remaining[param];
575
+ delete remaining[param];
576
+ return encodeURIComponent(String(value));
577
+ }
578
+ return `:${param}`;
579
+ }),
580
+ remainingArgs: remaining
581
+ };
582
+ }
583
+ /**
584
+ * Resolve the running server's base URL from a Node `http.Server`
585
+ * instance. Same handling as McpAdapter: IPv6 bracketing, rewrite
586
+ * of 0.0.0.0/:: to 127.0.0.1.
587
+ */
588
+ resolveServerBaseUrl(server) {
589
+ if (!server) return null;
590
+ const address = server.address();
591
+ if (!address || typeof address === "string") return null;
592
+ let host = address.address;
593
+ if (host === "::" || host === "0.0.0.0" || host === "") host = "127.0.0.1";
594
+ if (host.includes(":") && !host.startsWith("[")) host = `[${host}]`;
595
+ return `http://${host}:${address.port}`;
596
+ }
597
+ };
598
+ //#endregion
599
+ //#region src/providers/base.ts
600
+ /**
601
+ * Provider-side helpers shared by every built-in `AiProvider`
602
+ * implementation.
603
+ *
604
+ * Each provider in `packages/ai/src/providers/` implements the
605
+ * `AiProvider` interface from `../types`. This file holds the bits
606
+ * that all of them need: HTTP error mapping, JSON parsing, SSE line
607
+ * splitting for streaming responses. Keeping these here means each
608
+ * provider's main file stays focused on the wire-format translation
609
+ * specific to its vendor.
610
+ */
611
+ /**
612
+ * Error thrown by built-in providers when the upstream API returns a
613
+ * non-2xx status. Carries the HTTP status, the raw response body, and
614
+ * a parsed error object when available, so callers can branch on
615
+ * specific failure modes (auth, rate limit, content filter, etc.).
616
+ */
617
+ var ProviderError = class extends Error {
618
+ status;
619
+ body;
620
+ parsedBody;
621
+ constructor(status, body, message) {
622
+ super(message ?? `Provider request failed with status ${status}`);
623
+ this.name = "ProviderError";
624
+ this.status = status;
625
+ this.body = body;
626
+ try {
627
+ this.parsedBody = JSON.parse(body);
628
+ } catch {}
629
+ }
630
+ };
631
+ /**
632
+ * POST a JSON payload to a URL and parse the JSON response. Throws a
633
+ * `ProviderError` on non-2xx status codes so the caller never has to
634
+ * check `res.ok` itself.
635
+ *
636
+ * Auth headers are the caller's responsibility. Different providers
637
+ * use different conventions — OpenAI uses `Authorization: Bearer ...`,
638
+ * Anthropic uses `x-api-key: ...`, Google uses `?key=...` in the URL —
639
+ * so this helper stays neutral and lets each provider build exactly
640
+ * the headers it needs.
641
+ */
642
+ async function postJson(url, body, options = {}) {
643
+ const res = await fetch(url, {
644
+ method: "POST",
645
+ headers: {
646
+ "content-type": "application/json",
647
+ ...options.headers
648
+ },
649
+ body: JSON.stringify(body),
650
+ signal: options.signal
651
+ });
652
+ if (!res.ok) {
653
+ const text = await res.text();
654
+ throw new ProviderError(res.status, text);
655
+ }
656
+ return await res.json();
657
+ }
658
+ /**
659
+ * POST a JSON payload and stream the response body as a sequence of
660
+ * SSE-style `data: ...` events. Each yielded value is the raw payload
661
+ * after the `data: ` prefix is stripped — provider code is responsible
662
+ * for parsing it as JSON (or detecting the `[DONE]` sentinel that
663
+ * OpenAI uses to signal end-of-stream).
664
+ *
665
+ * Implementation notes:
666
+ * - Uses the global `fetch` ReadableStream so it works in Node 20+
667
+ * without depending on `node-fetch` or `eventsource-parser`.
668
+ * - Buffers partial lines across chunk boundaries; an SSE event can
669
+ * arrive split across two TCP packets.
670
+ * - Skips empty lines and lines that don't start with `data: ` per
671
+ * the SSE spec.
672
+ * - Aborts cleanly via the optional AbortSignal — the caller's
673
+ * `for await` loop will throw `AbortError` if the signal fires.
674
+ */
675
+ async function* postJsonStream(url, body, options = {}) {
676
+ const res = await fetch(url, {
677
+ method: "POST",
678
+ headers: {
679
+ "content-type": "application/json",
680
+ accept: "text/event-stream",
681
+ ...options.headers
682
+ },
683
+ body: JSON.stringify(body),
684
+ signal: options.signal
685
+ });
686
+ if (!res.ok) {
687
+ const text = await res.text();
688
+ throw new ProviderError(res.status, text);
689
+ }
690
+ if (!res.body) throw new ProviderError(res.status, "", "Provider streaming response had no body");
691
+ const reader = res.body.getReader();
692
+ const decoder = new TextDecoder("utf-8");
693
+ let buffer = "";
694
+ try {
695
+ while (true) {
696
+ const { value, done } = await reader.read();
697
+ if (done) break;
698
+ buffer += decoder.decode(value, { stream: true });
699
+ let newlineIdx;
700
+ while ((newlineIdx = buffer.indexOf("\n")) !== -1) {
701
+ const line = buffer.slice(0, newlineIdx).trim();
702
+ buffer = buffer.slice(newlineIdx + 1);
703
+ if (line.length === 0) continue;
704
+ if (!line.startsWith("data:")) continue;
705
+ const payload = line.slice(5).trim();
706
+ if (payload.length === 0) continue;
707
+ yield payload;
708
+ }
709
+ }
710
+ const tail = buffer.trim();
711
+ if (tail.startsWith("data:")) {
712
+ const payload = tail.slice(5).trim();
713
+ if (payload.length > 0) yield payload;
714
+ }
715
+ } finally {
716
+ try {
717
+ reader.releaseLock();
718
+ } catch {}
719
+ }
720
+ }
721
+ //#endregion
722
+ //#region src/providers/openai.ts
723
+ /**
724
+ * Built-in OpenAI provider.
725
+ *
726
+ * Implements the framework's `AiProvider` interface using nothing but
727
+ * the global `fetch` API (Node 20+). Translates the framework's
728
+ * normalized chat shape to OpenAI's `/chat/completions` wire format
729
+ * and back, including streaming via SSE.
730
+ *
731
+ * Tool calling is wired in this provider but the agent loop that
732
+ * actually invokes tools and feeds results back to the model lives in
733
+ * a later phase — for now, `chat()` and `stream()` surface tool calls
734
+ * via `ChatResponse.toolCalls` so callers can react.
735
+ *
736
+ * @example
737
+ * ```ts
738
+ * import { bootstrap, getEnv } from '@forinda/kickjs'
739
+ * import { AiAdapter, OpenAIProvider } from '@forinda/kickjs-ai'
740
+ *
741
+ * export const app = await bootstrap({
742
+ * modules,
743
+ * adapters: [
744
+ * new AiAdapter({
745
+ * provider: new OpenAIProvider({
746
+ * apiKey: getEnv('OPENAI_API_KEY'),
747
+ * defaultChatModel: 'gpt-4o-mini',
748
+ * }),
749
+ * }),
750
+ * ],
751
+ * })
752
+ * ```
753
+ */
754
+ var OpenAIProvider = class {
755
+ name;
756
+ baseURL;
757
+ defaultChatModel;
758
+ defaultEmbedModel;
759
+ /**
760
+ * Full header map passed to every request. Includes the bearer auth
761
+ * header and the optional openai-organization header. Constructed
762
+ * once in the constructor so per-call code just spreads it into the
763
+ * fetch init.
764
+ */
765
+ headers;
766
+ constructor(options) {
767
+ if (!options.apiKey) throw new Error("OpenAIProvider: apiKey is required");
768
+ this.baseURL = (options.baseURL ?? "https://api.openai.com/v1").replace(/\/$/, "");
769
+ this.defaultChatModel = options.defaultChatModel ?? "gpt-4o-mini";
770
+ this.defaultEmbedModel = options.defaultEmbedModel ?? "text-embedding-3-small";
771
+ this.name = options.name ?? "openai";
772
+ this.headers = {
773
+ authorization: `Bearer ${options.apiKey}`,
774
+ ...options.organization ? { "openai-organization": options.organization } : {}
775
+ };
776
+ }
777
+ /**
778
+ * Non-streaming chat completion.
779
+ *
780
+ * Translates the framework's `ChatInput` to OpenAI's chat completion
781
+ * payload, posts it, and normalizes the response back to a
782
+ * `ChatResponse`. Tool calls are surfaced on the response so callers
783
+ * can decide whether to feed them back into a tool registry.
784
+ */
785
+ async chat(input, options = {}) {
786
+ const payload = this.buildChatPayload(input, options, false);
787
+ const data = await postJson(`${this.baseURL}/chat/completions`, payload, {
788
+ headers: this.headers,
789
+ signal: options.signal
790
+ });
791
+ return this.normalizeChatResponse(data);
792
+ }
793
+ /**
794
+ * Streaming chat completion. Yields `ChatChunk`s as deltas arrive
795
+ * over the wire and emits one final chunk with `done: true` after
796
+ * the upstream `[DONE]` sentinel.
797
+ *
798
+ * Cancellation via `options.signal` is supported end-to-end — the
799
+ * underlying fetch is aborted and the consumer's `for await` loop
800
+ * throws `AbortError`.
801
+ */
802
+ async *stream(input, options = {}) {
803
+ const payload = this.buildChatPayload(input, options, true);
804
+ const events = postJsonStream(`${this.baseURL}/chat/completions`, payload, {
805
+ headers: this.headers,
806
+ signal: options.signal
807
+ });
808
+ let sawAnyChunk = false;
809
+ for await (const raw of events) {
810
+ if (raw === "[DONE]") {
811
+ yield {
812
+ content: "",
813
+ done: true
814
+ };
815
+ return;
816
+ }
817
+ let parsed;
818
+ try {
819
+ parsed = JSON.parse(raw);
820
+ } catch {
821
+ continue;
822
+ }
823
+ const choice = parsed.choices?.[0];
824
+ if (!choice) continue;
825
+ const deltaContent = choice.delta?.content ?? "";
826
+ const toolCallDelta = this.firstToolCallDelta(choice.delta?.tool_calls);
827
+ sawAnyChunk = true;
828
+ const chunk = {
829
+ content: deltaContent,
830
+ done: false
831
+ };
832
+ if (toolCallDelta) chunk.toolCallDelta = toolCallDelta;
833
+ yield chunk;
834
+ }
835
+ if (sawAnyChunk) yield {
836
+ content: "",
837
+ done: true
838
+ };
839
+ }
840
+ /**
841
+ * Generate embeddings for a string or array of strings.
842
+ *
843
+ * Returns vectors in input order. Single-string input still gets a
844
+ * length-1 array back, so callers can use the same indexed access
845
+ * pattern regardless of input shape.
846
+ */
847
+ async embed(input) {
848
+ const inputs = Array.isArray(input) ? input : [input];
849
+ if (inputs.length === 0) return [];
850
+ const data = await postJson(`${this.baseURL}/embeddings`, {
851
+ model: this.defaultEmbedModel,
852
+ input: inputs
853
+ }, { headers: this.headers });
854
+ if (!data.data || !Array.isArray(data.data)) throw new ProviderError(200, JSON.stringify(data), "OpenAI embedding response had no data");
855
+ return [...data.data].sort((a, b) => a.index - b.index).map((d) => d.embedding);
856
+ }
857
+ buildChatPayload(input, options, stream) {
858
+ const payload = {
859
+ model: input.model ?? this.defaultChatModel,
860
+ messages: input.messages.map((m) => this.toOpenAIMessage(m)),
861
+ stream
862
+ };
863
+ if (options.temperature !== void 0) payload.temperature = options.temperature;
864
+ if (options.maxTokens !== void 0) payload.max_tokens = options.maxTokens;
865
+ if (options.topP !== void 0) payload.top_p = options.topP;
866
+ if (options.stopSequences && options.stopSequences.length > 0) payload.stop = options.stopSequences;
867
+ if (Array.isArray(input.tools) && input.tools.length > 0) payload.tools = input.tools.map((t) => ({
868
+ type: "function",
869
+ function: {
870
+ name: t.name,
871
+ description: t.description,
872
+ parameters: t.inputSchema
873
+ }
874
+ }));
875
+ return payload;
876
+ }
877
+ /**
878
+ * Translate a framework `ChatMessage` to OpenAI's wire format.
879
+ * Handles the `tool` role and the `tool_calls` field on assistant
880
+ * messages, both of which use slightly different shapes than the
881
+ * normalized form on `ChatMessage`.
882
+ */
883
+ toOpenAIMessage(m) {
884
+ if (m.role === "tool") return {
885
+ role: "tool",
886
+ tool_call_id: m.toolCallId ?? "",
887
+ content: m.content
888
+ };
889
+ if (m.role === "assistant" && m.toolCalls && m.toolCalls.length > 0) return {
890
+ role: "assistant",
891
+ content: m.content,
892
+ tool_calls: m.toolCalls.map((tc) => ({
893
+ id: tc.id,
894
+ type: "function",
895
+ function: {
896
+ name: tc.name,
897
+ arguments: JSON.stringify(tc.arguments)
898
+ }
899
+ }))
900
+ };
901
+ return {
902
+ role: m.role,
903
+ content: m.content
904
+ };
905
+ }
906
+ /**
907
+ * Normalize an OpenAI chat completion response back to the
908
+ * framework's `ChatResponse` shape.
909
+ */
910
+ normalizeChatResponse(data) {
911
+ const choice = data.choices?.[0];
912
+ const message = choice?.message;
913
+ const content = typeof message?.content === "string" ? message.content : "";
914
+ const toolCalls = message?.tool_calls?.filter((tc) => Boolean(tc.function?.name)).map((tc) => {
915
+ let args = {};
916
+ try {
917
+ args = tc.function.arguments ? JSON.parse(tc.function.arguments) : {};
918
+ } catch {
919
+ args = { _raw: tc.function.arguments };
920
+ }
921
+ return {
922
+ id: tc.id,
923
+ name: tc.function.name,
924
+ arguments: args
925
+ };
926
+ });
927
+ const result = { content };
928
+ if (toolCalls && toolCalls.length > 0) result.toolCalls = toolCalls;
929
+ if (data.usage) result.usage = {
930
+ promptTokens: data.usage.prompt_tokens,
931
+ completionTokens: data.usage.completion_tokens,
932
+ totalTokens: data.usage.total_tokens
933
+ };
934
+ if (choice?.finish_reason) result.finishReason = choice.finish_reason;
935
+ return result;
936
+ }
937
+ /**
938
+ * Extract the first tool-call delta from an OpenAI streaming chunk.
939
+ *
940
+ * The `tool_calls` array in a delta chunk can contain partial state
941
+ * for multiple parallel tool calls; this method picks the first one
942
+ * with a non-empty payload, which is enough for the v0 streaming
943
+ * surface. Multi-tool streaming is a follow-up.
944
+ */
945
+ firstToolCallDelta(toolCalls) {
946
+ if (!toolCalls || toolCalls.length === 0) return void 0;
947
+ const first = toolCalls[0];
948
+ if (!first) return void 0;
949
+ const result = { id: first.id ?? "" };
950
+ if (first.function?.name) result.name = first.function.name;
951
+ if (first.function?.arguments !== void 0) result.argumentsDelta = first.function.arguments;
952
+ return result;
953
+ }
954
+ };
955
+ //#endregion
956
+ //#region src/providers/anthropic.ts
957
+ /**
958
+ * Built-in Anthropic provider.
959
+ *
960
+ * Implements the framework's `AiProvider` interface using Anthropic's
961
+ * Messages API (`/v1/messages`). Translates the normalized
962
+ * `ChatInput` shape to and from Anthropic's content-block format,
963
+ * including tool calling and streaming.
964
+ *
965
+ * ### Differences from OpenAI
966
+ *
967
+ * Anthropic's API has a few quirks the provider translates away:
968
+ *
969
+ * - **System prompt is separated.** The framework puts system
970
+ * messages in the `messages` array; Anthropic wants them in a
971
+ * top-level `system` field. The provider extracts the first system
972
+ * message and filters out any others.
973
+ * - **Content is always a block array.** Even simple text replies
974
+ * are wrapped in `[{ type: 'text', text: '...' }]`. The provider
975
+ * flattens text blocks to a single string on the response.
976
+ * - **Tool calls use `tool_use` content blocks, not a separate
977
+ * `tool_calls` field.** Normalization pulls them out of the
978
+ * response content and into `ChatResponse.toolCalls`.
979
+ * - **Tool results are `user` messages with `tool_result` content
980
+ * blocks**, not a `'tool'` role. The provider handles the
981
+ * translation both ways.
982
+ * - **`max_tokens` is required on every request.** Framework
983
+ * `ChatOptions.maxTokens` wins; otherwise falls back to
984
+ * `defaultMaxTokens` (default 4096).
985
+ *
986
+ * ### Embeddings
987
+ *
988
+ * Anthropic does not ship an embeddings API. Calling `embed()` on
989
+ * this provider throws a descriptive error — users who need
990
+ * embeddings should construct a separate provider (OpenAI's
991
+ * `text-embedding-3-small` is a good default) and bind it
992
+ * alongside the Anthropic chat provider.
993
+ *
994
+ * @example
995
+ * ```ts
996
+ * import { bootstrap, getEnv } from '@forinda/kickjs'
997
+ * import { AiAdapter, AnthropicProvider } from '@forinda/kickjs-ai'
998
+ *
999
+ * export const app = await bootstrap({
1000
+ * modules,
1001
+ * adapters: [
1002
+ * new AiAdapter({
1003
+ * provider: new AnthropicProvider({
1004
+ * apiKey: getEnv('ANTHROPIC_API_KEY'),
1005
+ * defaultChatModel: 'claude-opus-4-6',
1006
+ * }),
1007
+ * }),
1008
+ * ],
1009
+ * })
1010
+ * ```
1011
+ */
1012
+ var AnthropicProvider = class {
1013
+ name;
1014
+ baseURL;
1015
+ defaultChatModel;
1016
+ defaultMaxTokens;
1017
+ headers;
1018
+ constructor(options) {
1019
+ if (!options.apiKey) throw new Error("AnthropicProvider: apiKey is required");
1020
+ this.baseURL = (options.baseURL ?? "https://api.anthropic.com/v1").replace(/\/$/, "");
1021
+ this.defaultChatModel = options.defaultChatModel ?? "claude-opus-4-6";
1022
+ this.defaultMaxTokens = options.defaultMaxTokens ?? 4096;
1023
+ this.name = options.name ?? "anthropic";
1024
+ this.headers = {
1025
+ "x-api-key": options.apiKey,
1026
+ "anthropic-version": options.apiVersion ?? "2023-06-01"
1027
+ };
1028
+ }
1029
+ /**
1030
+ * Non-streaming chat completion.
1031
+ *
1032
+ * Builds the Anthropic Messages payload, posts it, and normalizes
1033
+ * the response back to the framework's `ChatResponse` shape.
1034
+ */
1035
+ async chat(input, options = {}) {
1036
+ const payload = this.buildMessagesPayload(input, options, false);
1037
+ const data = await postJson(`${this.baseURL}/messages`, payload, {
1038
+ headers: this.headers,
1039
+ signal: options.signal
1040
+ });
1041
+ return this.normalizeResponse(data);
1042
+ }
1043
+ /**
1044
+ * Streaming chat completion. Yields `ChatChunk`s as Anthropic
1045
+ * events arrive and emits a final chunk with `done: true` after
1046
+ * the `message_stop` event.
1047
+ *
1048
+ * Anthropic's SSE stream uses distinct event types instead of the
1049
+ * single-channel deltas OpenAI sends:
1050
+ *
1051
+ * - `message_start` — session init, carries model + id
1052
+ * - `content_block_start` — new text or tool_use block begins
1053
+ * - `content_block_delta` — incremental text or partial tool JSON
1054
+ * - `content_block_stop` — block complete
1055
+ * - `message_delta` — stop_reason + final usage
1056
+ * - `message_stop` — end of stream
1057
+ *
1058
+ * The provider cares about text deltas (for streaming content) and
1059
+ * input_json deltas (for tool call argument streaming). Everything
1060
+ * else is noise for our purposes and gets filtered.
1061
+ */
1062
+ async *stream(input, options = {}) {
1063
+ const payload = this.buildMessagesPayload(input, options, true);
1064
+ const events = postJsonStream(`${this.baseURL}/messages`, payload, {
1065
+ headers: this.headers,
1066
+ signal: options.signal
1067
+ });
1068
+ let currentToolBlock = null;
1069
+ let sawAnyChunk = false;
1070
+ for await (const raw of events) {
1071
+ let parsed;
1072
+ try {
1073
+ parsed = JSON.parse(raw);
1074
+ } catch {
1075
+ continue;
1076
+ }
1077
+ if (parsed.type === "content_block_start") {
1078
+ const block = parsed.content_block;
1079
+ if (block?.type === "tool_use") {
1080
+ currentToolBlock = {
1081
+ id: block.id ?? "",
1082
+ name: block.name ?? ""
1083
+ };
1084
+ sawAnyChunk = true;
1085
+ yield {
1086
+ content: "",
1087
+ done: false,
1088
+ toolCallDelta: {
1089
+ id: currentToolBlock.id,
1090
+ name: currentToolBlock.name
1091
+ }
1092
+ };
1093
+ }
1094
+ continue;
1095
+ }
1096
+ if (parsed.type === "content_block_delta") {
1097
+ const delta = parsed.delta;
1098
+ if (delta?.type === "text_delta" && typeof delta.text === "string") {
1099
+ sawAnyChunk = true;
1100
+ yield {
1101
+ content: delta.text,
1102
+ done: false
1103
+ };
1104
+ continue;
1105
+ }
1106
+ if (delta?.type === "input_json_delta" && typeof delta.partial_json === "string") {
1107
+ if (!currentToolBlock) continue;
1108
+ sawAnyChunk = true;
1109
+ yield {
1110
+ content: "",
1111
+ done: false,
1112
+ toolCallDelta: {
1113
+ id: currentToolBlock.id,
1114
+ argumentsDelta: delta.partial_json
1115
+ }
1116
+ };
1117
+ continue;
1118
+ }
1119
+ continue;
1120
+ }
1121
+ if (parsed.type === "content_block_stop") {
1122
+ currentToolBlock = null;
1123
+ continue;
1124
+ }
1125
+ if (parsed.type === "message_stop") {
1126
+ yield {
1127
+ content: "",
1128
+ done: true
1129
+ };
1130
+ return;
1131
+ }
1132
+ }
1133
+ if (sawAnyChunk) yield {
1134
+ content: "",
1135
+ done: true
1136
+ };
1137
+ }
1138
+ /**
1139
+ * Anthropic does not ship an embeddings API. Throws a descriptive
1140
+ * error rather than silently returning an empty vector — embedding
1141
+ * workflows should use a dedicated provider (OpenAI text-embedding-3-*
1142
+ * is the common pick) and bind it alongside this one in the
1143
+ * `AI_PROVIDER` token registry if needed.
1144
+ */
1145
+ async embed(_input) {
1146
+ throw new Error("AnthropicProvider.embed is not available — Anthropic does not provide an embeddings API. Use OpenAIProvider (or another embeddings-capable provider) for embed calls, and keep Anthropic for chat.");
1147
+ }
1148
+ buildMessagesPayload(input, options, stream) {
1149
+ const { systemPrompt, messages } = this.splitSystemMessage(input.messages);
1150
+ const payload = {
1151
+ model: input.model ?? this.defaultChatModel,
1152
+ max_tokens: options.maxTokens ?? this.defaultMaxTokens,
1153
+ messages: messages.map((m) => this.toAnthropicMessage(m))
1154
+ };
1155
+ if (systemPrompt) payload.system = systemPrompt;
1156
+ if (options.temperature !== void 0) payload.temperature = options.temperature;
1157
+ if (options.topP !== void 0) payload.top_p = options.topP;
1158
+ if (options.stopSequences && options.stopSequences.length > 0) payload.stop_sequences = options.stopSequences;
1159
+ if (stream) payload.stream = true;
1160
+ if (Array.isArray(input.tools) && input.tools.length > 0) payload.tools = input.tools.map((t) => ({
1161
+ name: t.name,
1162
+ description: t.description,
1163
+ input_schema: t.inputSchema
1164
+ }));
1165
+ return payload;
1166
+ }
1167
+ /**
1168
+ * Extract the first system message from the framework's messages
1169
+ * array and return it separately — Anthropic puts system prompts
1170
+ * in a top-level `system` field, not in `messages`. Any additional
1171
+ * system messages are dropped on the grounds that models handle
1172
+ * one persona prompt per call and concatenating them silently
1173
+ * would produce confusing behavior.
1174
+ */
1175
+ splitSystemMessage(messages) {
1176
+ let systemPrompt = null;
1177
+ const rest = [];
1178
+ for (const m of messages) {
1179
+ if (m.role === "system") {
1180
+ systemPrompt ??= m.content;
1181
+ continue;
1182
+ }
1183
+ rest.push(m);
1184
+ }
1185
+ return {
1186
+ systemPrompt,
1187
+ messages: rest
1188
+ };
1189
+ }
1190
+ /**
1191
+ * Translate a framework `ChatMessage` to Anthropic's wire format.
1192
+ *
1193
+ * User and plain assistant messages become content blocks with a
1194
+ * single `text` entry. Assistant messages with tool calls become
1195
+ * a block list mixing `text` and `tool_use` entries. Framework
1196
+ * `'tool'` role messages become Anthropic `'user'` messages with
1197
+ * a `tool_result` block — that's how Anthropic represents tool
1198
+ * call responses.
1199
+ */
1200
+ toAnthropicMessage(m) {
1201
+ if (m.role === "tool") return {
1202
+ role: "user",
1203
+ content: [{
1204
+ type: "tool_result",
1205
+ tool_use_id: m.toolCallId ?? "",
1206
+ content: m.content
1207
+ }]
1208
+ };
1209
+ if (m.role === "assistant" && m.toolCalls && m.toolCalls.length > 0) {
1210
+ const blocks = [];
1211
+ if (m.content) blocks.push({
1212
+ type: "text",
1213
+ text: m.content
1214
+ });
1215
+ for (const tc of m.toolCalls) blocks.push({
1216
+ type: "tool_use",
1217
+ id: tc.id,
1218
+ name: tc.name,
1219
+ input: tc.arguments
1220
+ });
1221
+ return {
1222
+ role: "assistant",
1223
+ content: blocks
1224
+ };
1225
+ }
1226
+ return {
1227
+ role: m.role === "assistant" ? "assistant" : "user",
1228
+ content: [{
1229
+ type: "text",
1230
+ text: m.content
1231
+ }]
1232
+ };
1233
+ }
1234
+ /**
1235
+ * Normalize an Anthropic response back to the framework's
1236
+ * `ChatResponse`. Flattens text content blocks into a single
1237
+ * string and pulls `tool_use` blocks out into `toolCalls`.
1238
+ */
1239
+ normalizeResponse(data) {
1240
+ const blocks = data.content ?? [];
1241
+ const textParts = [];
1242
+ const toolCalls = [];
1243
+ for (const block of blocks) {
1244
+ if (block.type === "text" && typeof block.text === "string") textParts.push(block.text);
1245
+ if (block.type === "tool_use" && block.name && block.id) toolCalls.push({
1246
+ id: block.id,
1247
+ name: block.name,
1248
+ arguments: block.input && typeof block.input === "object" ? block.input : {}
1249
+ });
1250
+ }
1251
+ const result = { content: textParts.join("") };
1252
+ if (toolCalls.length > 0) result.toolCalls = toolCalls;
1253
+ if (data.usage) result.usage = {
1254
+ promptTokens: data.usage.input_tokens,
1255
+ completionTokens: data.usage.output_tokens,
1256
+ totalTokens: data.usage.input_tokens + data.usage.output_tokens
1257
+ };
1258
+ if (data.stop_reason) result.finishReason = data.stop_reason;
1259
+ return result;
1260
+ }
1261
+ };
1262
+ //#endregion
1263
+ //#region src/prompts/prompt.ts
1264
+ /**
1265
+ * A reusable prompt template with `{{variable}}` placeholders and
1266
+ * a typed variables object at the render site.
1267
+ *
1268
+ * The type parameter `TVars` is a record of the variables the
1269
+ * template expects. Callers pass it explicitly:
1270
+ *
1271
+ * ```ts
1272
+ * const summarize = createPrompt<{ text: string; sentenceCount: number }>(
1273
+ * 'Summarize the following in {{sentenceCount}} sentences:\n\n{{text}}',
1274
+ * { name: 'summarize' },
1275
+ * )
1276
+ *
1277
+ * const msg = summarize.render({ text: 'Long article...', sentenceCount: 3 })
1278
+ * // → { role: 'user', content: 'Summarize the following in 3 sentences:\n\nLong article...' }
1279
+ * ```
1280
+ *
1281
+ * TypeScript catches missing or mistyped variables at compile time:
1282
+ *
1283
+ * ```ts
1284
+ * summarize.render({ text: 'x' }) // ✗ missing sentenceCount
1285
+ * summarize.render({ text: 'x', count: 3 }) // ✗ wrong key name
1286
+ * ```
1287
+ *
1288
+ * @remarks
1289
+ * Runtime-only in v0 — the type parameter is opt-in and has to be
1290
+ * provided explicitly. Workstream 5 adds a `kick typegen` pass that
1291
+ * scans `createPrompt` call sites and generates the TVars shape
1292
+ * automatically, so you can write `createPrompt('...')` and get
1293
+ * the types for free.
1294
+ */
1295
+ var Prompt = class {
1296
+ name;
1297
+ role;
1298
+ template;
1299
+ onMissing;
1300
+ constructor(template, options = {}) {
1301
+ if (typeof template !== "string") throw new Error("createPrompt: template must be a string");
1302
+ this.template = template;
1303
+ this.name = options.name ?? "prompt";
1304
+ this.role = options.role ?? "user";
1305
+ this.onMissing = options.onMissing ?? "throw";
1306
+ }
1307
+ /**
1308
+ * Substitute variables into the template and return a
1309
+ * ready-to-use `ChatMessage`.
1310
+ *
1311
+ * Placeholder syntax is `{{name}}` — double curly braces around
1312
+ * the variable name. Whitespace inside the braces is ignored
1313
+ * (`{{ name }}` works too). Unknown variables in the template
1314
+ * are left as-is, so Markdown or code blocks that happen to use
1315
+ * `{{` for their own reasons don't break.
1316
+ *
1317
+ * @throws If `onMissing === 'throw'` and a required variable is absent
1318
+ */
1319
+ render(vars) {
1320
+ return {
1321
+ role: this.role,
1322
+ content: this.renderString(vars)
1323
+ };
1324
+ }
1325
+ /**
1326
+ * Same as `render` but returns the raw string instead of wrapping
1327
+ * it in a `ChatMessage`. Useful for building composite messages
1328
+ * where several templates contribute to a single string.
1329
+ */
1330
+ renderString(vars) {
1331
+ return this.template.replace(/\{\{\s*([a-zA-Z_][a-zA-Z0-9_.]*)\s*\}\}/g, (_match, key) => {
1332
+ if (!(key in vars)) return this.handleMissing(key, _match);
1333
+ const value = vars[key];
1334
+ if (value === void 0 || value === null) return this.handleMissing(key, _match);
1335
+ return String(value);
1336
+ });
1337
+ }
1338
+ /** Return the raw template string. Useful for debugging and snapshot tests. */
1339
+ getTemplate() {
1340
+ return this.template;
1341
+ }
1342
+ /**
1343
+ * Return the set of placeholder names the template references.
1344
+ *
1345
+ * Mostly useful for testing and for tooling that wants to show
1346
+ * users what variables a prompt takes. Not a substitute for the
1347
+ * compile-time type check — templates can always reference
1348
+ * variables that aren't in TVars; this helper reads the string,
1349
+ * not the type.
1350
+ */
1351
+ getPlaceholders() {
1352
+ const matches = this.template.matchAll(/\{\{\s*([a-zA-Z_][a-zA-Z0-9_.]*)\s*\}\}/g);
1353
+ const names = /* @__PURE__ */ new Set();
1354
+ for (const m of matches) {
1355
+ const name = m[1];
1356
+ if (name) names.add(name);
1357
+ }
1358
+ return [...names];
1359
+ }
1360
+ handleMissing(key, original) {
1361
+ if (this.onMissing === "throw") throw new Error(`Prompt(${this.name}): variable "${key}" is missing from the render call`);
1362
+ if (this.onMissing === "warn") console.warn(`Prompt(${this.name}): variable "${key}" is missing from the render call; leaving placeholder`);
1363
+ return original;
1364
+ }
1365
+ };
1366
+ /**
1367
+ * Construct a reusable prompt template.
1368
+ *
1369
+ * Thin factory for the `Prompt` class — keeps call sites short and
1370
+ * matches the naming convention of other kickjs-ai factories
1371
+ * (`createToken`, etc.). Use the class form directly if you need
1372
+ * subclassing or custom rendering logic.
1373
+ *
1374
+ * @example
1375
+ * ```ts
1376
+ * import { createPrompt } from '@forinda/kickjs-ai'
1377
+ *
1378
+ * const persona = createPrompt<{ name: string; tone: string }>(
1379
+ * 'You are {{name}}, a {{tone}} assistant.',
1380
+ * { role: 'system', name: 'persona' },
1381
+ * )
1382
+ *
1383
+ * const msg = persona.render({ name: 'Claude', tone: 'concise' })
1384
+ * ```
1385
+ */
1386
+ function createPrompt(template, options = {}) {
1387
+ return new Prompt(template, options);
1388
+ }
1389
+ //#endregion
1390
+ //#region src/memory/in-memory.ts
1391
+ /**
1392
+ * Zero-dependency in-memory chat memory.
1393
+ *
1394
+ * Backed by a plain array. Each instance represents ONE conversation
1395
+ * — services that serve multiple sessions construct one instance per
1396
+ * session, typically via a `sessionId → memory` map in a parent
1397
+ * service or a request-scoped DI factory.
1398
+ *
1399
+ * Good for:
1400
+ * - Tests and prototypes
1401
+ * - Single-process CLI tools
1402
+ * - Short-lived request handlers that don't outlive the HTTP response
1403
+ *
1404
+ * Not good for:
1405
+ * - Multi-replica deployments (memory isn't shared across pods)
1406
+ * - Sessions that need to survive a restart
1407
+ * - Anything with a compliance retention policy
1408
+ *
1409
+ * For any of those, swap in a persistent backend (Drizzle, Redis,
1410
+ * Postgres) that implements the same `ChatMemory` interface — the
1411
+ * calling service doesn't change.
1412
+ *
1413
+ * @example
1414
+ * ```ts
1415
+ * import { InMemoryChatMemory } from '@forinda/kickjs-ai'
1416
+ *
1417
+ * const memory = new InMemoryChatMemory()
1418
+ * await memory.add({ role: 'user', content: 'hello' })
1419
+ * const history = await memory.get()
1420
+ * ```
1421
+ */
1422
+ var InMemoryChatMemory = class {
1423
+ name = "in-memory";
1424
+ messages = [];
1425
+ async get() {
1426
+ return [...this.messages];
1427
+ }
1428
+ async add(message) {
1429
+ const list = Array.isArray(message) ? message : [message];
1430
+ for (const m of list) this.messages.push(m);
1431
+ }
1432
+ async clear() {
1433
+ this.messages = [];
1434
+ }
1435
+ async size() {
1436
+ return this.messages.length;
1437
+ }
1438
+ };
1439
+ //#endregion
1440
+ //#region src/memory/sliding-window.ts
1441
+ /**
1442
+ * Sliding-window memory wrapper.
1443
+ *
1444
+ * Wraps any `ChatMemory` implementation with a bounded history: only
1445
+ * the most recent N messages survive. Older messages are evicted on
1446
+ * every `get()` and after every `add()` that pushes the count past
1447
+ * the cap. The first system message is pinned by default so long
1448
+ * sessions don't lose their persona.
1449
+ *
1450
+ * Use this to keep prompt token usage predictable without writing
1451
+ * eviction logic in every service. It composes with any backend —
1452
+ * in-memory, Drizzle, Redis — because it only touches the inner
1453
+ * memory through its public interface.
1454
+ *
1455
+ * @example
1456
+ * ```ts
1457
+ * import { InMemoryChatMemory, SlidingWindowChatMemory } from '@forinda/kickjs-ai'
1458
+ *
1459
+ * const memory = new SlidingWindowChatMemory({
1460
+ * inner: new InMemoryChatMemory(),
1461
+ * maxMessages: 20,
1462
+ * pinSystemPrompt: true,
1463
+ * })
1464
+ * ```
1465
+ *
1466
+ * @remarks
1467
+ * Eviction writes back to the inner memory via `clear()` + `add()`.
1468
+ * That's fine for in-memory backends where clearing is O(1), but
1469
+ * costs a round-trip for network-backed stores. If you're wrapping
1470
+ * a remote backend, consider an inner memory that supports native
1471
+ * trimming — the wrapper's contract assumes clear+add is cheap.
1472
+ */
1473
+ var SlidingWindowChatMemory = class {
1474
+ name;
1475
+ inner;
1476
+ maxMessages;
1477
+ pinSystemPrompt;
1478
+ constructor(options) {
1479
+ if (!options.inner) throw new Error("SlidingWindowChatMemory: `inner` memory is required");
1480
+ if (!Number.isInteger(options.maxMessages) || options.maxMessages <= 0) throw new Error("SlidingWindowChatMemory: `maxMessages` must be a positive integer");
1481
+ this.inner = options.inner;
1482
+ this.maxMessages = options.maxMessages;
1483
+ this.pinSystemPrompt = options.pinSystemPrompt ?? true;
1484
+ this.name = `sliding-window(${options.inner.name})`;
1485
+ }
1486
+ async get() {
1487
+ const raw = await this.inner.get();
1488
+ return this.applyWindow(raw);
1489
+ }
1490
+ async add(message) {
1491
+ await this.inner.add(message);
1492
+ const raw = await this.inner.get();
1493
+ const windowed = this.applyWindow(raw);
1494
+ if (windowed.length !== raw.length) {
1495
+ await this.inner.clear();
1496
+ await this.inner.add(windowed);
1497
+ }
1498
+ }
1499
+ async clear() {
1500
+ await this.inner.clear();
1501
+ }
1502
+ async size() {
1503
+ if (this.inner.size) return this.inner.size();
1504
+ return (await this.inner.get()).length;
1505
+ }
1506
+ /**
1507
+ * Apply the sliding window to an array of messages, returning the
1508
+ * bounded view. Pure function so both `get()` and `add()` can use
1509
+ * the same logic.
1510
+ *
1511
+ * When `pinSystemPrompt` is set and the first message is a system
1512
+ * message, we keep it AND fill the remaining `maxMessages - 1`
1513
+ * slots with the most recent messages after it. Otherwise we just
1514
+ * take the tail of the array.
1515
+ */
1516
+ applyWindow(messages) {
1517
+ if (messages.length <= this.maxMessages) return messages;
1518
+ if (this.pinSystemPrompt && messages[0]?.role === "system") return [messages[0], ...messages.slice(-(this.maxMessages - 1))];
1519
+ return messages.slice(-this.maxMessages);
1520
+ }
1521
+ };
1522
+ //#endregion
1523
+ //#region src/rag/in-memory.ts
1524
+ /**
1525
+ * Zero-dependency in-memory vector store.
1526
+ *
1527
+ * Backed by a plain `Map<string, VectorDocument>` with a linear-scan
1528
+ * cosine-similarity search. Perfect for tests, prototypes, CLI tools,
1529
+ * and any project with a bounded corpus (roughly < 10k documents
1530
+ * before the scan starts taking more than a handful of milliseconds).
1531
+ *
1532
+ * For production workloads with larger corpora, swap in the pgvector,
1533
+ * Qdrant, or Pinecone store — the `VectorStore` interface is the same,
1534
+ * so services that consume `VECTOR_STORE` don't need to change.
1535
+ *
1536
+ * @example
1537
+ * ```ts
1538
+ * import { InMemoryVectorStore, VECTOR_STORE } from '@forinda/kickjs-ai'
1539
+ *
1540
+ * container.registerInstance(VECTOR_STORE, new InMemoryVectorStore())
1541
+ * ```
1542
+ *
1543
+ * The class is entirely synchronous under the hood but wraps each
1544
+ * method in a Promise so it matches the async interface every other
1545
+ * backend implements. This keeps the calling code uniform regardless
1546
+ * of which backend is wired in.
1547
+ */
1548
+ var InMemoryVectorStore = class {
1549
+ name = "in-memory";
1550
+ docs = /* @__PURE__ */ new Map();
1551
+ async upsert(doc) {
1552
+ const list = Array.isArray(doc) ? doc : [doc];
1553
+ for (const d of list) {
1554
+ if (!d.id) throw new Error("InMemoryVectorStore.upsert: document id is required");
1555
+ if (!Array.isArray(d.vector)) throw new Error(`InMemoryVectorStore.upsert: vector must be an array (id=${d.id})`);
1556
+ this.docs.set(d.id, {
1557
+ id: d.id,
1558
+ content: d.content,
1559
+ vector: [...d.vector],
1560
+ metadata: d.metadata
1561
+ });
1562
+ }
1563
+ }
1564
+ async query(options) {
1565
+ if (!Array.isArray(options.vector) || options.vector.length === 0) throw new Error("InMemoryVectorStore.query: vector is required");
1566
+ const topK = options.topK ?? 5;
1567
+ const minScore = options.minScore ?? -Infinity;
1568
+ const filter = options.filter;
1569
+ const scored = [];
1570
+ for (const doc of this.docs.values()) {
1571
+ if (filter && !matchesFilter(doc.metadata, filter)) continue;
1572
+ const score = cosineSimilarity(options.vector, doc.vector);
1573
+ if (score < minScore) continue;
1574
+ scored.push({
1575
+ id: doc.id,
1576
+ content: doc.content,
1577
+ score,
1578
+ metadata: doc.metadata
1579
+ });
1580
+ }
1581
+ scored.sort((a, b) => {
1582
+ if (b.score !== a.score) return b.score - a.score;
1583
+ return a.id.localeCompare(b.id);
1584
+ });
1585
+ return scored.slice(0, topK);
1586
+ }
1587
+ async delete(id) {
1588
+ const ids = Array.isArray(id) ? id : [id];
1589
+ for (const i of ids) this.docs.delete(i);
1590
+ }
1591
+ async deleteAll() {
1592
+ this.docs.clear();
1593
+ }
1594
+ async count() {
1595
+ return this.docs.size;
1596
+ }
1597
+ };
1598
+ /**
1599
+ * Cosine similarity between two vectors. Returns a value in [-1, 1]
1600
+ * where 1 means identical direction, 0 means orthogonal, -1 means
1601
+ * opposite. The function is symmetric and scale-invariant.
1602
+ *
1603
+ * Returns 0 for length mismatches or zero-magnitude vectors rather
1604
+ * than throwing — callers get a useless hit they can filter out via
1605
+ * `minScore`, but the store doesn't crash on bad input.
1606
+ */
1607
+ function cosineSimilarity(a, b) {
1608
+ if (a.length !== b.length || a.length === 0) return 0;
1609
+ let dot = 0;
1610
+ let magA = 0;
1611
+ let magB = 0;
1612
+ for (let i = 0; i < a.length; i++) {
1613
+ const x = a[i];
1614
+ const y = b[i];
1615
+ dot += x * y;
1616
+ magA += x * x;
1617
+ magB += y * y;
1618
+ }
1619
+ if (magA === 0 || magB === 0) return 0;
1620
+ return dot / (Math.sqrt(magA) * Math.sqrt(magB));
1621
+ }
1622
+ /**
1623
+ * Simple equality-based metadata filter. Every key in `filter` must
1624
+ * exist on the metadata and be strictly equal. Array values on the
1625
+ * filter are treated as an `IN` clause — the metadata value must be
1626
+ * one of the listed values.
1627
+ *
1628
+ * This covers 90% of metadata filtering use cases without pulling in
1629
+ * a query-language dependency. Backends that support richer filters
1630
+ * (pgvector's WHERE, Qdrant's conditions, Pinecone's filter DSL) can
1631
+ * pass through their native syntax via the same `filter` field,
1632
+ * since the type is `Record<string, unknown>`.
1633
+ */
1634
+ function matchesFilter(metadata, filter) {
1635
+ if (!metadata) return false;
1636
+ for (const [key, expected] of Object.entries(filter)) {
1637
+ const actual = metadata[key];
1638
+ if (Array.isArray(expected)) {
1639
+ if (!expected.includes(actual)) return false;
1640
+ } else if (actual !== expected) return false;
1641
+ }
1642
+ return true;
1643
+ }
1644
+ //#endregion
1645
+ //#region src/rag/pgvector.ts
1646
+ /**
1647
+ * pgvector-backed `VectorStore` implementation.
1648
+ *
1649
+ * Stores documents in a single table with a `vector` column indexed
1650
+ * via pgvector's native operators. Cosine similarity is the scoring
1651
+ * metric — computed as `1 - (vector <=> query_vector)` because the
1652
+ * `<=>` operator returns cosine DISTANCE, not similarity.
1653
+ *
1654
+ * ### Lazy initialization
1655
+ *
1656
+ * The Postgres pool and schema are set up on first use, not in the
1657
+ * constructor. That keeps the constructor synchronous, matches the
1658
+ * rest of the `VectorStore` implementations, and lets users construct
1659
+ * the store inside a module's `register(container)` method without
1660
+ * awaiting inside DI resolution.
1661
+ *
1662
+ * ### Schema
1663
+ *
1664
+ * The default schema is:
1665
+ *
1666
+ * ```sql
1667
+ * CREATE EXTENSION IF NOT EXISTS vector;
1668
+ * CREATE TABLE IF NOT EXISTS <schema>.<table> (
1669
+ * id TEXT PRIMARY KEY,
1670
+ * content TEXT NOT NULL,
1671
+ * vector vector(<dimensions>) NOT NULL,
1672
+ * metadata JSONB
1673
+ * );
1674
+ * ```
1675
+ *
1676
+ * No index is created by default — pgvector's IVFFlat and HNSW
1677
+ * indexes benefit from being created AFTER data is loaded, and the
1678
+ * right choice depends on corpus size. Users should add an index
1679
+ * themselves in a real migration when they're ready:
1680
+ *
1681
+ * ```sql
1682
+ * CREATE INDEX ON kickjs_embeddings
1683
+ * USING hnsw (vector vector_cosine_ops);
1684
+ * ```
1685
+ *
1686
+ * ### Metadata filtering
1687
+ *
1688
+ * Filters are translated to JSONB WHERE clauses:
1689
+ * - Scalar: `metadata->>'key' = $N` (coerced to text)
1690
+ * - Array: `metadata->>'key' = ANY($N::text[])`
1691
+ *
1692
+ * Keys are validated against `[a-zA-Z0-9_.-]+` before being
1693
+ * interpolated into SQL — anything else throws. Values go through
1694
+ * parameter binding, so SQL injection via values is not possible.
1695
+ *
1696
+ * @example
1697
+ * ```ts
1698
+ * import { Pool } from 'pg'
1699
+ * import { getEnv } from '@forinda/kickjs'
1700
+ * import { AiAdapter, PgVectorStore, VECTOR_STORE } from '@forinda/kickjs-ai'
1701
+ *
1702
+ * const pool = new Pool({ connectionString: getEnv('DATABASE_URL') })
1703
+ * const store = new PgVectorStore({ client: pool, dimensions: 1536 })
1704
+ *
1705
+ * export const app = await bootstrap({
1706
+ * modules,
1707
+ * adapters: [new AiAdapter({ provider })],
1708
+ * plugins: [
1709
+ * {
1710
+ * name: 'pgvector',
1711
+ * register: (container) => {
1712
+ * container.registerInstance(VECTOR_STORE, store)
1713
+ * },
1714
+ * },
1715
+ * ],
1716
+ * })
1717
+ * ```
1718
+ */
1719
+ var PgVectorStore = class {
1720
+ name;
1721
+ dimensions;
1722
+ schema;
1723
+ table;
1724
+ fullyQualified;
1725
+ skipSetup;
1726
+ client;
1727
+ connectionString;
1728
+ setupPromise = null;
1729
+ constructor(options) {
1730
+ if (!options.client && !options.connectionString) throw new Error("PgVectorStore: either `client` or `connectionString` must be provided");
1731
+ if (!Number.isInteger(options.dimensions) || options.dimensions <= 0) throw new Error("PgVectorStore: `dimensions` must be a positive integer");
1732
+ this.dimensions = options.dimensions;
1733
+ this.schema = options.schema ?? "public";
1734
+ this.table = options.table ?? "kickjs_embeddings";
1735
+ this.fullyQualified = `${quoteIdent(this.schema)}.${quoteIdent(this.table)}`;
1736
+ this.skipSetup = options.skipSetup ?? false;
1737
+ this.name = options.name ?? "pgvector";
1738
+ this.client = options.client ?? null;
1739
+ this.connectionString = options.connectionString ?? null;
1740
+ }
1741
+ async upsert(doc) {
1742
+ const list = Array.isArray(doc) ? doc : [doc];
1743
+ if (list.length === 0) return;
1744
+ for (const d of list) {
1745
+ if (!d.id) throw new Error("PgVectorStore.upsert: document id is required");
1746
+ if (!Array.isArray(d.vector)) throw new Error(`PgVectorStore.upsert: vector must be an array (id=${d.id})`);
1747
+ if (d.vector.length !== this.dimensions) throw new Error(`PgVectorStore.upsert: vector length ${d.vector.length} does not match configured dimensions ${this.dimensions} (id=${d.id})`);
1748
+ }
1749
+ const client = await this.ensureReady();
1750
+ const values = [];
1751
+ const params = [];
1752
+ let p = 1;
1753
+ for (const d of list) {
1754
+ values.push(`($${p++}, $${p++}, $${p++}::vector, $${p++}::jsonb)`);
1755
+ params.push(d.id, d.content, toPgVector(d.vector), JSON.stringify(d.metadata ?? {}));
1756
+ }
1757
+ const sql = `INSERT INTO ${this.fullyQualified} (id, content, vector, metadata) VALUES ` + values.join(", ") + " ON CONFLICT (id) DO UPDATE SET content = EXCLUDED.content, vector = EXCLUDED.vector, metadata = EXCLUDED.metadata";
1758
+ await client.query(sql, params);
1759
+ }
1760
+ async query(options) {
1761
+ if (!Array.isArray(options.vector) || options.vector.length === 0) throw new Error("PgVectorStore.query: vector is required");
1762
+ if (options.vector.length !== this.dimensions) throw new Error(`PgVectorStore.query: vector length ${options.vector.length} does not match configured dimensions ${this.dimensions}`);
1763
+ const client = await this.ensureReady();
1764
+ const topK = options.topK ?? 5;
1765
+ const minScore = options.minScore ?? -Infinity;
1766
+ const { whereSql, whereParams } = buildWhereClause(options.filter, 2);
1767
+ const limitParamIdx = whereParams.length + 2;
1768
+ const sql = `SELECT id, content, metadata, (1 - (vector <=> \$1::vector)) AS score FROM ${this.fullyQualified} ` + whereSql + ` ORDER BY vector <=> $1::vector LIMIT $${limitParamIdx}`;
1769
+ const params = [
1770
+ toPgVector(options.vector),
1771
+ ...whereParams,
1772
+ topK
1773
+ ];
1774
+ const { rows } = await client.query(sql, params);
1775
+ const hits = [];
1776
+ for (const row of rows) {
1777
+ if (row.score < minScore) continue;
1778
+ hits.push({
1779
+ id: row.id,
1780
+ content: row.content,
1781
+ score: row.score,
1782
+ metadata: row.metadata ?? void 0
1783
+ });
1784
+ }
1785
+ return hits;
1786
+ }
1787
+ async delete(id) {
1788
+ const ids = Array.isArray(id) ? id : [id];
1789
+ if (ids.length === 0) return;
1790
+ await (await this.ensureReady()).query(`DELETE FROM ${this.fullyQualified} WHERE id = ANY($1::text[])`, [ids]);
1791
+ }
1792
+ async deleteAll() {
1793
+ await (await this.ensureReady()).query(`TRUNCATE ${this.fullyQualified}`);
1794
+ }
1795
+ async count() {
1796
+ const { rows } = await (await this.ensureReady()).query(`SELECT COUNT(*)::text AS count FROM ${this.fullyQualified}`);
1797
+ const raw = rows[0]?.count ?? "0";
1798
+ return Number.parseInt(raw, 10);
1799
+ }
1800
+ /**
1801
+ * Release the internal connection pool, if the store created one.
1802
+ *
1803
+ * If the caller supplied their own `client`, this is a no-op —
1804
+ * lifecycle of a user-owned pool stays with the user. This method
1805
+ * is intentionally not on the `VectorStore` interface because most
1806
+ * backends don't need explicit teardown; services that want to
1807
+ * clean up call it via an adapter.shutdown hook.
1808
+ */
1809
+ async close() {
1810
+ if (this.connectionString && this.client) {
1811
+ const withEnd = this.client;
1812
+ if (typeof withEnd.end === "function") await withEnd.end();
1813
+ this.client = null;
1814
+ }
1815
+ }
1816
+ /**
1817
+ * Ensure the pool exists and the schema is set up. Called by every
1818
+ * public method before running any SQL. The setup migration runs
1819
+ * at most once per store instance — subsequent calls reuse the
1820
+ * cached promise.
1821
+ */
1822
+ async ensureReady() {
1823
+ if (!this.client) this.client = await this.createPoolFromConnectionString();
1824
+ if (!this.skipSetup) {
1825
+ if (!this.setupPromise) this.setupPromise = this.runSchemaSetup(this.client);
1826
+ await this.setupPromise;
1827
+ }
1828
+ return this.client;
1829
+ }
1830
+ /**
1831
+ * Dynamically import `pg` and create a Pool from the configured
1832
+ * connection string. Imported lazily so users who supply their own
1833
+ * `client` never force `pg` to be installed.
1834
+ *
1835
+ * Throws a friendly error if `pg` is not installed — the same
1836
+ * graceful-degradation pattern the CLI uses for optional packages.
1837
+ */
1838
+ async createPoolFromConnectionString() {
1839
+ if (!this.connectionString) throw new Error("PgVectorStore: no client or connectionString configured (this should never happen)");
1840
+ const pgSpec = "pg";
1841
+ let pgModule;
1842
+ try {
1843
+ pgModule = await import(pgSpec);
1844
+ } catch {
1845
+ throw new Error("PgVectorStore: the `pg` package is not installed. Run `pnpm add pg` (or pass a pre-made executor via the `client` option) to use the pgvector store.");
1846
+ }
1847
+ const Pool = pgModule.default?.Pool ?? pgModule.Pool;
1848
+ if (!Pool) throw new Error("PgVectorStore: the `pg` module did not export a `Pool` class (unexpected version).");
1849
+ return new Pool({ connectionString: this.connectionString });
1850
+ }
1851
+ /**
1852
+ * Run the schema bootstrap: enable the pgvector extension, create
1853
+ * the embeddings table if it doesn't exist, and nothing else.
1854
+ *
1855
+ * Indexes are deliberately not created here — pgvector's IVFFlat
1856
+ * and HNSW indexes perform best when created after data is loaded,
1857
+ * and the right choice depends on corpus size. Users should add
1858
+ * their index in a real migration when they're ready.
1859
+ */
1860
+ async runSchemaSetup(client) {
1861
+ await client.query("CREATE EXTENSION IF NOT EXISTS vector");
1862
+ await client.query(`CREATE TABLE IF NOT EXISTS ${this.fullyQualified} (id TEXT PRIMARY KEY, content TEXT NOT NULL, vector vector(${this.dimensions}) NOT NULL, metadata JSONB )`);
1863
+ }
1864
+ };
1865
+ /**
1866
+ * Serialize a JS number array to pgvector's wire format: a string
1867
+ * like `'[0.1,0.2,0.3]'`. The `pg` driver doesn't know about vectors
1868
+ * so we have to stringify ourselves and cast with `::vector` in the
1869
+ * SQL. Non-finite values become `0` rather than `null` or `NaN` —
1870
+ * pgvector rejects non-finite values in inserts.
1871
+ */
1872
+ function toPgVector(vector) {
1873
+ return `[${vector.map((n) => Number.isFinite(n) ? n : 0).join(",")}]`;
1874
+ }
1875
+ /**
1876
+ * Double-quote a Postgres identifier and escape any embedded quotes.
1877
+ * Used for schema and table names so users can pass lowercase
1878
+ * identifiers without worrying about reserved words.
1879
+ */
1880
+ function quoteIdent(ident) {
1881
+ return `"${ident.replace(/"/g, "\"\"")}"`;
1882
+ }
1883
+ /**
1884
+ * Translate a metadata filter into a WHERE clause + bound parameters.
1885
+ *
1886
+ * - Scalar values become `metadata->>'key' = $N`
1887
+ * - Array values become `metadata->>'key' = ANY($N::text[])`
1888
+ *
1889
+ * Keys must match `[a-zA-Z0-9_.-]+` — anything else is rejected. All
1890
+ * values are coerced to string before binding, because `->>` returns
1891
+ * text. Callers that need numeric range queries should issue raw SQL
1892
+ * via their own executor; this helper covers the equality-case 90%.
1893
+ *
1894
+ * Exported for unit testing.
1895
+ */
1896
+ function buildWhereClause(filter, startAt) {
1897
+ if (!filter || Object.keys(filter).length === 0) return {
1898
+ whereSql: "",
1899
+ whereParams: []
1900
+ };
1901
+ const keyPattern = /^[a-zA-Z0-9_.\-]+$/;
1902
+ const clauses = [];
1903
+ const params = [];
1904
+ let p = startAt;
1905
+ for (const [key, value] of Object.entries(filter)) {
1906
+ if (!keyPattern.test(key)) throw new Error(`PgVectorStore: metadata filter key "${key}" contains unsupported characters (allowed: letters, digits, underscore, dot, dash)`);
1907
+ if (Array.isArray(value)) {
1908
+ clauses.push(`metadata->>'${key}' = ANY($${p}::text[])`);
1909
+ params.push(value.map(String));
1910
+ } else {
1911
+ clauses.push(`metadata->>'${key}' = $${p}`);
1912
+ params.push(value === null || value === void 0 ? "" : String(value));
1913
+ }
1914
+ p++;
1915
+ }
1916
+ return {
1917
+ whereSql: "WHERE " + clauses.join(" AND "),
1918
+ whereParams: params
1919
+ };
1920
+ }
1921
+ //#endregion
1922
+ //#region src/rag/qdrant.ts
1923
+ /**
1924
+ * Qdrant-backed `VectorStore` implementation.
1925
+ *
1926
+ * Qdrant stores vectors as "points" inside a named "collection". Each
1927
+ * point has an id, a dense vector, and an arbitrary JSON "payload" —
1928
+ * the store uses the payload to carry both the original `content`
1929
+ * string (so RAG retrieval can feed text back to the LLM) and the
1930
+ * `metadata` record.
1931
+ *
1932
+ * ### Filtering
1933
+ *
1934
+ * The framework's equality-map filter (`{ key: value }` or
1935
+ * `{ key: [v1, v2] }`) is translated into Qdrant's `filter.must`
1936
+ * conditions against `payload.metadata.<key>`. Scalar values become
1937
+ * `match: { value }`, arrays become `match: { any: [...] }`. Users
1938
+ * who need richer queries (nested, range, should/must_not) can bypass
1939
+ * this by extending the class, but equality covers the 90% case.
1940
+ *
1941
+ * ### Lazy collection creation
1942
+ *
1943
+ * On first write, the store calls `PUT /collections/{name}` with
1944
+ * `vectors: { size, distance }` — idempotent, so it's safe to run on
1945
+ * every boot. Pass `skipSetup: true` if your cluster is provisioned
1946
+ * externally and the runtime API key doesn't have create permission.
1947
+ *
1948
+ * @example
1949
+ * ```ts
1950
+ * import { bootstrap, getEnv } from '@forinda/kickjs'
1951
+ * import { AiAdapter, QdrantVectorStore, VECTOR_STORE } from '@forinda/kickjs-ai'
1952
+ *
1953
+ * const store = new QdrantVectorStore({
1954
+ * url: getEnv('QDRANT_URL'),
1955
+ * apiKey: getEnv('QDRANT_API_KEY'),
1956
+ * collection: 'docs',
1957
+ * dimensions: 1536,
1958
+ * })
1959
+ *
1960
+ * export const app = await bootstrap({
1961
+ * modules,
1962
+ * adapters: [new AiAdapter({ provider })],
1963
+ * plugins: [
1964
+ * {
1965
+ * name: 'qdrant',
1966
+ * register: (container) => {
1967
+ * container.registerInstance(VECTOR_STORE, store)
1968
+ * },
1969
+ * },
1970
+ * ],
1971
+ * })
1972
+ * ```
1973
+ */
1974
+ var QdrantVectorStore = class {
1975
+ name;
1976
+ url;
1977
+ collection;
1978
+ dimensions;
1979
+ distance;
1980
+ headers;
1981
+ skipSetup;
1982
+ /**
1983
+ * Cached bootstrap promise. The first method call triggers collection
1984
+ * creation; every subsequent call awaits the same promise so the
1985
+ * check happens exactly once per process. On failure we clear the
1986
+ * cache so the next call can retry (networks blink, DNS flaps).
1987
+ */
1988
+ setupPromise = null;
1989
+ constructor(options) {
1990
+ if (!options.collection) throw new Error("QdrantVectorStore: collection is required");
1991
+ if (!Number.isInteger(options.dimensions) || options.dimensions <= 0) throw new Error("QdrantVectorStore: dimensions must be a positive integer");
1992
+ this.url = (options.url ?? "http://localhost:6333").replace(/\/$/, "");
1993
+ this.collection = options.collection;
1994
+ this.dimensions = options.dimensions;
1995
+ this.distance = options.distance ?? "Cosine";
1996
+ this.skipSetup = options.skipSetup ?? false;
1997
+ this.name = options.name ?? "qdrant";
1998
+ this.headers = {
1999
+ "content-type": "application/json",
2000
+ ...options.apiKey ? { "api-key": options.apiKey } : {}
2001
+ };
2002
+ }
2003
+ async upsert(doc) {
2004
+ const list = Array.isArray(doc) ? doc : [doc];
2005
+ if (list.length === 0) return;
2006
+ for (const d of list) {
2007
+ if (!d.id) throw new Error("QdrantVectorStore.upsert: document id is required");
2008
+ if (!Array.isArray(d.vector) || d.vector.length !== this.dimensions) throw new Error(`QdrantVectorStore.upsert: vector length ${d.vector?.length ?? 0} does not match collection dimensions ${this.dimensions} (id=${d.id})`);
2009
+ }
2010
+ await this.ensureCollection();
2011
+ const points = list.map((d) => ({
2012
+ id: d.id,
2013
+ vector: d.vector,
2014
+ payload: {
2015
+ content: d.content,
2016
+ metadata: d.metadata ?? {}
2017
+ }
2018
+ }));
2019
+ await this.request("PUT", `/collections/${this.collection}/points?wait=true`, { points });
2020
+ }
2021
+ async query(options) {
2022
+ if (!Array.isArray(options.vector) || options.vector.length === 0) throw new Error("QdrantVectorStore.query: vector is required");
2023
+ if (options.vector.length !== this.dimensions) throw new Error(`QdrantVectorStore.query: vector length ${options.vector.length} does not match collection dimensions ${this.dimensions}`);
2024
+ await this.ensureCollection();
2025
+ const topK = options.topK ?? 5;
2026
+ const minScore = options.minScore;
2027
+ const body = {
2028
+ vector: options.vector,
2029
+ limit: topK,
2030
+ with_payload: true
2031
+ };
2032
+ if (options.filter && Object.keys(options.filter).length > 0) body.filter = buildQdrantFilter(options.filter);
2033
+ if (minScore !== void 0) body.score_threshold = minScore;
2034
+ return (await this.request("POST", `/collections/${this.collection}/points/search`, body)).result.map((hit) => ({
2035
+ id: String(hit.id),
2036
+ content: hit.payload?.content ?? "",
2037
+ score: hit.score,
2038
+ metadata: hit.payload?.metadata ?? {}
2039
+ }));
2040
+ }
2041
+ async delete(id) {
2042
+ const ids = Array.isArray(id) ? id : [id];
2043
+ if (ids.length === 0) return;
2044
+ await this.ensureCollection();
2045
+ await this.request("POST", `/collections/${this.collection}/points/delete?wait=true`, { points: ids });
2046
+ }
2047
+ async deleteAll() {
2048
+ await this.request("DELETE", `/collections/${this.collection}`, void 0);
2049
+ this.setupPromise = null;
2050
+ if (!this.skipSetup) await this.ensureCollection();
2051
+ }
2052
+ async count() {
2053
+ await this.ensureCollection();
2054
+ return (await this.request("POST", `/collections/${this.collection}/points/count`, { exact: true })).result.count;
2055
+ }
2056
+ /**
2057
+ * Thin wrapper around `fetch` that applies the shared headers, JSON
2058
+ * encodes the body, and maps non-2xx responses to `Error` instances
2059
+ * with the response body attached for debugging. Matches the shape
2060
+ * used by `providers/base.ts`, kept local here so the RAG module has
2061
+ * no dependency on the provider internals.
2062
+ */
2063
+ async request(method, path, body) {
2064
+ const res = await fetch(`${this.url}${path}`, {
2065
+ method,
2066
+ headers: this.headers,
2067
+ body: body === void 0 ? void 0 : JSON.stringify(body)
2068
+ });
2069
+ if (!res.ok) {
2070
+ const text = await res.text().catch(() => "");
2071
+ throw new Error(`QdrantVectorStore: ${method} ${path} failed with ${res.status}: ${text}`);
2072
+ }
2073
+ const text = await res.text();
2074
+ if (!text) return void 0;
2075
+ try {
2076
+ return JSON.parse(text);
2077
+ } catch {
2078
+ return;
2079
+ }
2080
+ }
2081
+ /**
2082
+ * Create the collection on first use. The `PUT /collections/{name}`
2083
+ * endpoint is idempotent — calling it on an existing collection is a
2084
+ * no-op with status 200. We cache the promise so concurrent callers
2085
+ * share the same in-flight request and every subsequent call resolves
2086
+ * immediately.
2087
+ */
2088
+ ensureCollection() {
2089
+ if (this.skipSetup) return Promise.resolve();
2090
+ this.setupPromise ??= this.runSetup().catch((err) => {
2091
+ this.setupPromise = null;
2092
+ throw err;
2093
+ });
2094
+ return this.setupPromise;
2095
+ }
2096
+ async runSetup() {
2097
+ await this.request("PUT", `/collections/${this.collection}`, { vectors: {
2098
+ size: this.dimensions,
2099
+ distance: this.distance
2100
+ } });
2101
+ }
2102
+ };
2103
+ /**
2104
+ * Translate the framework's equality-map filter into Qdrant's
2105
+ * `must` condition format.
2106
+ *
2107
+ * Scalars become `{ key, match: { value } }`. Arrays become
2108
+ * `{ key, match: { any: [...] } }`. Keys are interpreted as paths into
2109
+ * `payload.metadata`, matching how `upsert` nests the metadata record.
2110
+ *
2111
+ * Exported so tests (and future richer filter builders) can verify the
2112
+ * translation without going through a live Qdrant instance.
2113
+ */
2114
+ function buildQdrantFilter(filter) {
2115
+ const must = [];
2116
+ for (const [key, value] of Object.entries(filter)) {
2117
+ const qdrantKey = `metadata.${key}`;
2118
+ if (Array.isArray(value)) must.push({
2119
+ key: qdrantKey,
2120
+ match: { any: value }
2121
+ });
2122
+ else must.push({
2123
+ key: qdrantKey,
2124
+ match: { value }
2125
+ });
2126
+ }
2127
+ return { must };
2128
+ }
2129
+ //#endregion
2130
+ //#region src/rag/pinecone.ts
2131
+ /**
2132
+ * Pinecone-backed `VectorStore` implementation.
2133
+ *
2134
+ * Pinecone stores vectors with a flat id, a dense vector, and an
2135
+ * arbitrary metadata object. Like Qdrant the store uses metadata to
2136
+ * carry both the original `content` (for RAG retrieval) and the
2137
+ * application's own metadata fields — they're merged into one
2138
+ * Pinecone metadata record at write time and split back apart at
2139
+ * read time.
2140
+ *
2141
+ * ### Filtering
2142
+ *
2143
+ * Pinecone has a native filter DSL that looks almost identical to
2144
+ * MongoDB's — `{ key: { $eq: value } }`, `{ key: { $in: [...] } }`,
2145
+ * etc. The framework's equality-map filter is translated directly:
2146
+ * scalars become `$eq` and arrays become `$in`. Users who need the
2147
+ * full DSL (range, $ne, $or) can pass a raw Pinecone filter through
2148
+ * the same `filter` field — the translator is a no-op when the keys
2149
+ * start with `$`, so advanced filters pass through unchanged.
2150
+ *
2151
+ * ### Index provisioning
2152
+ *
2153
+ * Pinecone indexes must be created out-of-band. This store does NOT
2154
+ * provision indexes automatically — the dimensionality, metric, and
2155
+ * pod type are infrastructure decisions that should live in
2156
+ * Terraform or the Pinecone dashboard, not in runtime code.
2157
+ *
2158
+ * @example
2159
+ * ```ts
2160
+ * import { bootstrap, getEnv } from '@forinda/kickjs'
2161
+ * import { AiAdapter, PineconeVectorStore, VECTOR_STORE } from '@forinda/kickjs-ai'
2162
+ *
2163
+ * const store = new PineconeVectorStore({
2164
+ * apiKey: getEnv('PINECONE_API_KEY'),
2165
+ * indexHost: getEnv('PINECONE_INDEX_HOST'),
2166
+ * dimensions: 1536,
2167
+ * namespace: 'docs',
2168
+ * })
2169
+ *
2170
+ * export const app = await bootstrap({
2171
+ * modules,
2172
+ * adapters: [new AiAdapter({ provider })],
2173
+ * plugins: [
2174
+ * {
2175
+ * name: 'pinecone',
2176
+ * register: (container) => {
2177
+ * container.registerInstance(VECTOR_STORE, store)
2178
+ * },
2179
+ * },
2180
+ * ],
2181
+ * })
2182
+ * ```
2183
+ */
2184
+ var PineconeVectorStore = class {
2185
+ name;
2186
+ baseURL;
2187
+ namespace;
2188
+ dimensions;
2189
+ headers;
2190
+ constructor(options) {
2191
+ if (!options.apiKey) throw new Error("PineconeVectorStore: apiKey is required");
2192
+ if (!options.indexHost) throw new Error("PineconeVectorStore: indexHost is required");
2193
+ if (!Number.isInteger(options.dimensions) || options.dimensions <= 0) throw new Error("PineconeVectorStore: dimensions must be a positive integer");
2194
+ const host = options.indexHost.replace(/\/$/, "");
2195
+ this.baseURL = host.startsWith("http") ? host : `https://${host}`;
2196
+ this.namespace = options.namespace;
2197
+ this.dimensions = options.dimensions;
2198
+ this.name = options.name ?? "pinecone";
2199
+ this.headers = {
2200
+ "content-type": "application/json",
2201
+ "Api-Key": options.apiKey,
2202
+ "X-Pinecone-API-Version": "2024-10"
2203
+ };
2204
+ }
2205
+ async upsert(doc) {
2206
+ const list = Array.isArray(doc) ? doc : [doc];
2207
+ if (list.length === 0) return;
2208
+ for (const d of list) {
2209
+ if (!d.id) throw new Error("PineconeVectorStore.upsert: document id is required");
2210
+ if (!Array.isArray(d.vector) || d.vector.length !== this.dimensions) throw new Error(`PineconeVectorStore.upsert: vector length ${d.vector?.length ?? 0} does not match index dimensions ${this.dimensions} (id=${d.id})`);
2211
+ }
2212
+ const body = { vectors: list.map((d) => ({
2213
+ id: d.id,
2214
+ values: d.vector,
2215
+ metadata: {
2216
+ content: d.content,
2217
+ ...d.metadata ?? {}
2218
+ }
2219
+ })) };
2220
+ if (this.namespace) body.namespace = this.namespace;
2221
+ await this.request("/vectors/upsert", body);
2222
+ }
2223
+ async query(options) {
2224
+ if (!Array.isArray(options.vector) || options.vector.length === 0) throw new Error("PineconeVectorStore.query: vector is required");
2225
+ if (options.vector.length !== this.dimensions) throw new Error(`PineconeVectorStore.query: vector length ${options.vector.length} does not match index dimensions ${this.dimensions}`);
2226
+ const topK = options.topK ?? 5;
2227
+ const body = {
2228
+ vector: options.vector,
2229
+ topK,
2230
+ includeMetadata: true
2231
+ };
2232
+ if (this.namespace) body.namespace = this.namespace;
2233
+ if (options.filter && Object.keys(options.filter).length > 0) body.filter = buildPineconeFilter(options.filter);
2234
+ const data = await this.request("/query", body);
2235
+ const minScore = options.minScore ?? -Infinity;
2236
+ return data.matches.filter((m) => m.score >= minScore).map((match) => {
2237
+ const { content, ...metadata } = match.metadata ?? {};
2238
+ return {
2239
+ id: match.id,
2240
+ content: typeof content === "string" ? content : "",
2241
+ score: match.score,
2242
+ metadata
2243
+ };
2244
+ });
2245
+ }
2246
+ async delete(id) {
2247
+ const ids = Array.isArray(id) ? id : [id];
2248
+ if (ids.length === 0) return;
2249
+ const body = { ids };
2250
+ if (this.namespace) body.namespace = this.namespace;
2251
+ await this.request("/vectors/delete", body);
2252
+ }
2253
+ async deleteAll() {
2254
+ const body = { deleteAll: true };
2255
+ if (this.namespace) body.namespace = this.namespace;
2256
+ await this.request("/vectors/delete", body);
2257
+ }
2258
+ async count() {
2259
+ const data = await this.request("/describe_index_stats", this.namespace ? { filter: {} } : {});
2260
+ if (this.namespace) return data.namespaces?.[this.namespace]?.vectorCount ?? 0;
2261
+ return data.totalVectorCount ?? 0;
2262
+ }
2263
+ /**
2264
+ * POST a JSON body to the Pinecone data-plane and return the parsed
2265
+ * JSON response. Every Pinecone data-plane endpoint uses POST even
2266
+ * for reads (`/query`, `/describe_index_stats`), so the helper
2267
+ * doesn't bother parameterizing the method.
2268
+ */
2269
+ async request(path, body) {
2270
+ const res = await fetch(`${this.baseURL}${path}`, {
2271
+ method: "POST",
2272
+ headers: this.headers,
2273
+ body: JSON.stringify(body)
2274
+ });
2275
+ if (!res.ok) {
2276
+ const text = await res.text().catch(() => "");
2277
+ throw new Error(`PineconeVectorStore: POST ${path} failed with ${res.status}: ${text}`);
2278
+ }
2279
+ const text = await res.text();
2280
+ if (!text) return void 0;
2281
+ try {
2282
+ return JSON.parse(text);
2283
+ } catch {
2284
+ return;
2285
+ }
2286
+ }
2287
+ };
2288
+ /**
2289
+ * Translate the framework's equality-map filter into Pinecone's
2290
+ * MongoDB-style filter DSL.
2291
+ *
2292
+ * Rules:
2293
+ * - Scalar value → `{ key: { $eq: value } }`
2294
+ * - Array value → `{ key: { $in: [...] } }`
2295
+ * - Key that starts with $ → passed through untouched, letting
2296
+ * callers hand-craft `{ $or: [...] }` or range conditions
2297
+ * without the translator mangling them
2298
+ * - Value already shaped like `{ $eq, $in, $gt, ... }` → passed
2299
+ * through untouched for the same reason
2300
+ *
2301
+ * Exported so tests can verify the translation offline.
2302
+ */
2303
+ function buildPineconeFilter(filter) {
2304
+ const result = {};
2305
+ for (const [key, value] of Object.entries(filter)) {
2306
+ if (key.startsWith("$")) {
2307
+ result[key] = value;
2308
+ continue;
2309
+ }
2310
+ if (isOperatorRecord(value)) {
2311
+ result[key] = value;
2312
+ continue;
2313
+ }
2314
+ if (Array.isArray(value)) result[key] = { $in: value };
2315
+ else result[key] = { $eq: value };
2316
+ }
2317
+ return result;
2318
+ }
2319
+ function isOperatorRecord(value) {
2320
+ if (!value || typeof value !== "object" || Array.isArray(value)) return false;
2321
+ for (const key of Object.keys(value)) if (key.startsWith("$")) return true;
2322
+ return false;
2323
+ }
2324
+ //#endregion
2325
+ //#region src/rag/rag-service.ts
2326
+ const DEFAULT_SYSTEM_TEMPLATE = "You have access to the following context documents. Use them when they are relevant to the user question; ignore them otherwise.\n\n{documents}\n\nIf the context doesn't contain enough information, say so plainly — don't invent answers.";
2327
+ /**
2328
+ * High-level RAG helper that ties an `AiProvider` (for embeddings)
2329
+ * to a `VectorStore` (for retrieval) and produces the three operations
2330
+ * every RAG-powered service needs: index documents, search by query,
2331
+ * and augment a chat input with retrieved context.
2332
+ *
2333
+ * The service itself is a thin orchestrator — all the storage and
2334
+ * model calls go through the injected interfaces, so swapping
2335
+ * backends (in-memory → pgvector, OpenAI → Ollama) is a DI binding
2336
+ * change, not a code change.
2337
+ *
2338
+ * @example
2339
+ * ```ts
2340
+ * import { Service, Autowired, Inject } from '@forinda/kickjs'
2341
+ * import { AI_PROVIDER, VECTOR_STORE, RagService } from '@forinda/kickjs-ai'
2342
+ * import type { AiProvider, VectorStore } from '@forinda/kickjs-ai'
2343
+ *
2344
+ * @Service()
2345
+ * class DocsService {
2346
+ * private readonly rag: RagService
2347
+ *
2348
+ * constructor(
2349
+ * @Inject(AI_PROVIDER) provider: AiProvider,
2350
+ * @Inject(VECTOR_STORE) store: VectorStore,
2351
+ * ) {
2352
+ * this.rag = new RagService(provider, store)
2353
+ * }
2354
+ *
2355
+ * async ingest(articles: Array<{ id: string; body: string }>) {
2356
+ * await this.rag.index(articles.map((a) => ({ id: a.id, content: a.body })))
2357
+ * }
2358
+ *
2359
+ * async ask(question: string) {
2360
+ * const input = await this.rag.augmentChatInput(
2361
+ * { messages: [{ role: 'user', content: question }] },
2362
+ * question,
2363
+ * { topK: 3 },
2364
+ * )
2365
+ * const res = await provider.chat(input)
2366
+ * return res.content
2367
+ * }
2368
+ * }
2369
+ * ```
2370
+ */
2371
+ var RagService = class {
2372
+ constructor(provider, store) {
2373
+ this.provider = provider;
2374
+ this.store = store;
2375
+ }
2376
+ /** Underlying provider — exposed for services that want to reuse it for chat. */
2377
+ getProvider() {
2378
+ return this.provider;
2379
+ }
2380
+ /** Underlying store — useful for admin tools that want raw access. */
2381
+ getStore() {
2382
+ return this.store;
2383
+ }
2384
+ /**
2385
+ * Index a batch of documents: embed each one's content via the
2386
+ * provider, then upsert into the store. Embedding happens in a
2387
+ * single batched call, which is both faster and cheaper than one
2388
+ * call per document for most providers.
2389
+ *
2390
+ * Documents with empty content are skipped rather than failing the
2391
+ * whole batch — the store can't meaningfully retrieve empty strings
2392
+ * and silently dropping them matches what users usually expect when
2393
+ * a content field turns out to be blank.
2394
+ */
2395
+ async index(docs) {
2396
+ const nonEmpty = docs.filter((d) => d.content && d.content.trim().length > 0);
2397
+ if (nonEmpty.length === 0) return;
2398
+ const vectors = await this.provider.embed(nonEmpty.map((d) => d.content));
2399
+ if (vectors.length !== nonEmpty.length) throw new Error(`RagService.index: provider returned ${vectors.length} vectors for ${nonEmpty.length} inputs`);
2400
+ const toUpsert = nonEmpty.map((doc, i) => ({
2401
+ id: doc.id,
2402
+ content: doc.content,
2403
+ vector: vectors[i],
2404
+ metadata: doc.metadata
2405
+ }));
2406
+ await this.store.upsert(toUpsert);
2407
+ }
2408
+ /**
2409
+ * Search the store for documents relevant to a natural-language
2410
+ * query. Embeds the query once, then delegates to the store's
2411
+ * `query` method with the resolved vector.
2412
+ */
2413
+ async search(query, options = {}) {
2414
+ const [queryVector] = await this.provider.embed(query);
2415
+ if (!queryVector) return [];
2416
+ return this.store.query({
2417
+ vector: queryVector,
2418
+ topK: options.topK ?? 5,
2419
+ filter: options.filter,
2420
+ minScore: options.minScore
2421
+ });
2422
+ }
2423
+ /**
2424
+ * Retrieve relevant documents for a query and inject them into a
2425
+ * `ChatInput` as a system message. Returns a new input — the
2426
+ * original is not mutated.
2427
+ *
2428
+ * Two injection modes:
2429
+ * - Merge (default): prepend the context to the first existing
2430
+ * system message if one exists, otherwise add a new one. Avoids
2431
+ * producing chat histories with competing system prompts.
2432
+ * - Separate (`asSeparateSystemMessage: true`): always insert a
2433
+ * new system message at the start. Useful when the existing
2434
+ * system prompt is small and you want to keep roles distinct.
2435
+ *
2436
+ * If no documents are retrieved, the input is returned unchanged.
2437
+ */
2438
+ async augmentChatInput(input, query, options = {}) {
2439
+ const hits = await this.search(query, {
2440
+ topK: options.topK ?? 5,
2441
+ filter: options.filter,
2442
+ minScore: options.minScore
2443
+ });
2444
+ if (hits.length === 0) return input;
2445
+ const template = options.systemTemplate ?? DEFAULT_SYSTEM_TEMPLATE;
2446
+ const documentBlock = hits.map((h, i) => `[Document ${i + 1} (id=${h.id}, score=${h.score.toFixed(3)})]\n${h.content}`).join("\n\n");
2447
+ const contextMessage = template.replace("{documents}", documentBlock);
2448
+ const newMessages = [];
2449
+ const existingSystemIdx = input.messages.findIndex((m) => m.role === "system");
2450
+ if (!options.asSeparateSystemMessage && existingSystemIdx !== -1) for (let i = 0; i < input.messages.length; i++) {
2451
+ const msg = input.messages[i];
2452
+ if (i === existingSystemIdx) newMessages.push({
2453
+ ...msg,
2454
+ content: `${contextMessage}\n\n---\n\n${msg.content}`
2455
+ });
2456
+ else newMessages.push(msg);
2457
+ }
2458
+ else {
2459
+ newMessages.push({
2460
+ role: "system",
2461
+ content: contextMessage
2462
+ });
2463
+ newMessages.push(...input.messages);
2464
+ }
2465
+ return {
2466
+ ...input,
2467
+ messages: newMessages
2468
+ };
2469
+ }
2470
+ };
2471
+ //#endregion
2472
+ export { AI_PROVIDER, AI_TOOL_METADATA, AiAdapter, AiTool, AnthropicProvider, InMemoryChatMemory, InMemoryVectorStore, OpenAIProvider, PgVectorStore, PineconeVectorStore, Prompt, ProviderError, QdrantVectorStore, RagService, SlidingWindowChatMemory, VECTOR_STORE, buildPineconeFilter, buildQdrantFilter, buildWhereClause, cosineSimilarity, createPrompt, getAiToolMeta, isAiTool, toPgVector };
2473
+
2474
+ //# sourceMappingURL=index.mjs.map