@forinda/kickjs-ai 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1973 @@
1
+
2
+ import { AdapterContext, AppAdapter, Constructor } from "@forinda/kickjs";
3
+ import { ZodTypeAny } from "zod";
4
+
5
+ //#region src/types.d.ts
6
+ /**
7
+ * A chat message in the OpenAI/Anthropic-style conversation format.
8
+ *
9
+ * All four built-in providers (OpenAI, Anthropic, Google, Ollama)
10
+ * translate this shape into their native wire format. The `tool` and
11
+ * `tool_calls` variants support function calling.
12
+ */
13
+ interface ChatMessage {
14
+ role: 'system' | 'user' | 'assistant' | 'tool';
15
+ content: string;
16
+ /** Tool call ID if `role === 'tool'`. Set by the framework during tool loops. */
17
+ toolCallId?: string;
18
+ /** Tool calls made by the assistant. Set by the provider. */
19
+ toolCalls?: Array<{
20
+ id: string;
21
+ name: string;
22
+ arguments: Record<string, unknown>;
23
+ }>;
24
+ }
25
+ /**
26
+ * A resolved tool definition that providers can include in their
27
+ * wire-format request payload. This is the shape `ChatInput.tools`
28
+ * carries once `AiAdapter.runAgent` has expanded `'auto'` against
29
+ * the registry of `@AiTool`-decorated controller methods.
30
+ *
31
+ * Providers translate this into their native tool-calling format
32
+ * (OpenAI's `tools`, Anthropic's `tools`, Google's function declarations,
33
+ * etc.). The shape is deliberately minimal — anything provider-specific
34
+ * lives in the provider implementation, not on this type.
35
+ */
36
+ interface ChatToolDefinition {
37
+ /** Stable tool identifier, e.g. "TaskController.create". */
38
+ name: string;
39
+ /** Human-readable description shown to the model at call time. */
40
+ description: string;
41
+ /**
42
+ * JSON Schema for the tool input, converted from the Zod body schema
43
+ * on the underlying route. Providers pass this through to the wire
44
+ * payload verbatim; the schema only needs to be valid JSON Schema.
45
+ */
46
+ inputSchema: Record<string, unknown>;
47
+ }
48
+ /**
49
+ * Input to `AiProvider.chat()` and `AiProvider.stream()`.
50
+ *
51
+ * Providers accept this shape, map it to their native format, call the
52
+ * underlying API, and return a normalized `ChatResponse` (or stream of
53
+ * `ChatChunk`s).
54
+ */
55
+ interface ChatInput {
56
+ /** Conversation history, in order. System prompt can be the first message. */
57
+ messages: ChatMessage[];
58
+ /**
59
+ * Optional model override. If omitted, the provider uses its default
60
+ * model. Accepts provider-specific model IDs (e.g. `gpt-4o`, `claude-opus-4-6`).
61
+ */
62
+ model?: string;
63
+ /**
64
+ * Tools the model can call.
65
+ *
66
+ * - `'auto'` — only meaningful when passed to `AiAdapter.runAgent`,
67
+ * which resolves it against the `@AiTool` registry before handing
68
+ * the request to the provider. Raw providers that receive `'auto'`
69
+ * directly omit tools entirely rather than doing a hidden lookup.
70
+ * - An array of `ChatToolDefinition` — providers include these in
71
+ * the wire payload directly.
72
+ * - Omitted — no tool-calling in this request.
73
+ */
74
+ tools?: 'auto' | ChatToolDefinition[];
75
+ }
76
+ /** Runtime options for a chat call. */
77
+ interface ChatOptions {
78
+ temperature?: number;
79
+ maxTokens?: number;
80
+ topP?: number;
81
+ stopSequences?: string[];
82
+ /** Abort signal — cancel the request mid-flight. */
83
+ signal?: AbortSignal;
84
+ }
85
+ /** Normalized response from a non-streaming chat call. */
86
+ interface ChatResponse {
87
+ /** The assistant's text output. */
88
+ content: string;
89
+ /** Any tool calls the model made. Usually executed by the agent loop. */
90
+ toolCalls?: Array<{
91
+ id: string;
92
+ name: string;
93
+ arguments: Record<string, unknown>;
94
+ }>;
95
+ /** Provider-reported token usage. */
96
+ usage?: {
97
+ promptTokens: number;
98
+ completionTokens: number;
99
+ totalTokens: number;
100
+ };
101
+ /** Finish reason from the provider. */
102
+ finishReason?: 'stop' | 'length' | 'tool_call' | 'content_filter' | string;
103
+ }
104
+ /** A single chunk from a streaming chat call. */
105
+ interface ChatChunk {
106
+ /** Incremental text delta. Empty for chunks that only carry tool deltas. */
107
+ content: string;
108
+ /** Partial tool call delta, if the model is building one. */
109
+ toolCallDelta?: {
110
+ id: string;
111
+ name?: string;
112
+ argumentsDelta?: string;
113
+ };
114
+ /** True on the final chunk. */
115
+ done: boolean;
116
+ }
117
+ /**
118
+ * Input to `AiProvider.embed()`.
119
+ *
120
+ * Accepts a single string or an array; the response always matches the
121
+ * input shape (single string → single vector, array → array of vectors).
122
+ */
123
+ type EmbedInput = string | string[];
124
+ /**
125
+ * Input to `AiProvider.tool()` for one-shot tool execution outside the
126
+ * normal chat flow. Useful for programmatic workflows where you know
127
+ * which tool to call but want provider-specific argument normalization.
128
+ */
129
+ interface ToolCallInput {
130
+ name: string;
131
+ arguments: Record<string, unknown>;
132
+ }
133
+ /** Response from `AiProvider.tool()`. */
134
+ interface ToolCallResponse {
135
+ /** The raw tool result. Shape depends on the tool. */
136
+ result: unknown;
137
+ /** Whether the provider considers the call successful. */
138
+ ok: boolean;
139
+ }
140
+ /**
141
+ * Provider abstraction. All built-in providers (OpenAI, Anthropic,
142
+ * Google, Ollama) implement this interface. Users can also implement
143
+ * it for custom/internal providers.
144
+ */
145
+ interface AiProvider {
146
+ /** Provider identifier — `'openai'`, `'anthropic'`, `'google'`, `'ollama'`, or a custom string. */
147
+ name: string;
148
+ /** Non-streaming chat call. */
149
+ chat(input: ChatInput, options?: ChatOptions): Promise<ChatResponse>;
150
+ /** Streaming chat call. Yields chunks until `done: true`. */
151
+ stream(input: ChatInput, options?: ChatOptions): AsyncIterable<ChatChunk>;
152
+ /** Generate embeddings. Shape matches the input shape. */
153
+ embed(input: EmbedInput): Promise<number[][]>;
154
+ /** One-shot tool execution. Optional — providers may omit. */
155
+ tool?(input: ToolCallInput): Promise<ToolCallResponse>;
156
+ }
157
+ /** Options for the `AiAdapter` constructor. */
158
+ interface AiAdapterOptions {
159
+ /** The active provider. Registered under the `AI_PROVIDER` DI token. */
160
+ provider: AiProvider;
161
+ /**
162
+ * Default chat options applied to every call unless overridden at
163
+ * the call site. Useful for setting a project-wide temperature or
164
+ * model.
165
+ */
166
+ defaults?: ChatOptions & {
167
+ model?: string;
168
+ };
169
+ }
170
+ /**
171
+ * Options for the `@AiTool` decorator.
172
+ *
173
+ * Marks a controller method as callable by the LLM. The input schema
174
+ * is inferred from the route's `body` Zod schema — you don't repeat
175
+ * it here.
176
+ */
177
+ interface AiToolOptions {
178
+ /** Tool name override. Defaults to `<ControllerName>.<methodName>`. */
179
+ name?: string;
180
+ /** Human-readable description shown to the LLM at tool-call time. */
181
+ description: string;
182
+ /** Optional input schema override if the route has no Zod body. */
183
+ inputSchema?: ZodTypeAny;
184
+ }
185
+ /**
186
+ * Resolved AI tool definition built by the adapter's startup scan.
187
+ *
188
+ * Bundles the tool's wire-format definition (`ChatToolDefinition`)
189
+ * with the HTTP routing info needed for dispatch (`httpMethod` +
190
+ * `mountPath`). `AiAdapter.runAgent` hands `ChatToolDefinition[]` to
191
+ * the provider and keeps `httpMethod`/`mountPath` internal for the
192
+ * dispatch loop.
193
+ */
194
+ interface AiToolDefinition extends ChatToolDefinition {
195
+ /** HTTP method of the underlying route. */
196
+ httpMethod: string;
197
+ /** Full mount path of the underlying route (after apiPrefix + version). */
198
+ mountPath: string;
199
+ }
200
+ /**
201
+ * Options for `AiAdapter.runAgent()`.
202
+ *
203
+ * Runs a tool-calling loop: the provider responds, any tool calls are
204
+ * dispatched through the Express pipeline, results are fed back, and
205
+ * the loop continues until the model returns plain text or the
206
+ * `maxSteps` cap is hit.
207
+ */
208
+ interface RunAgentOptions extends ChatOptions {
209
+ /** Starting conversation. System prompt can be the first message. */
210
+ messages: ChatMessage[];
211
+ /** Model override. Defaults to the provider's configured default. */
212
+ model?: string;
213
+ /**
214
+ * Tools the agent can call. Defaults to `'auto'` — every tool in
215
+ * the adapter's `@AiTool` registry. Pass an explicit array to
216
+ * restrict the agent to a subset.
217
+ */
218
+ tools?: 'auto' | ChatToolDefinition[];
219
+ /**
220
+ * Maximum number of chat → tool-call → dispatch → feedback cycles
221
+ * before the loop gives up. Prevents runaway loops on broken tool
222
+ * call behavior. Defaults to 8.
223
+ */
224
+ maxSteps?: number;
225
+ }
226
+ /** Result of `AiAdapter.runAgent()` — the final assistant response. */
227
+ interface RunAgentResult {
228
+ /** The assistant's final text output after all tool calls resolved. */
229
+ content: string;
230
+ /** The full message history including tool calls and results. */
231
+ messages: ChatMessage[];
232
+ /** Number of chat iterations the loop ran before terminating. */
233
+ steps: number;
234
+ /** Aggregated usage across every provider call in the loop. */
235
+ usage?: {
236
+ promptTokens: number;
237
+ completionTokens: number;
238
+ totalTokens: number;
239
+ };
240
+ /** True if the loop stopped because `maxSteps` was reached. */
241
+ maxStepsReached?: boolean;
242
+ }
243
+ //#endregion
244
+ //#region src/memory/types.d.ts
245
+ /**
246
+ * Chat memory contract.
247
+ *
248
+ * Memory is the persistence layer for multi-turn conversations. The
249
+ * agent loop inside `runAgent` maintains history WITHIN a single call,
250
+ * but a real chatbot needs to remember what the user said in their
251
+ * previous request — that's the job of this interface.
252
+ *
253
+ * Every backend (in-memory, sliding window, Drizzle, Redis) implements
254
+ * this same contract, so swapping storage is a DI binding change and
255
+ * nothing else. Services stay identical regardless of whether memory
256
+ * lives in a `Map`, a Postgres row, or a Redis list.
257
+ *
258
+ * ### Session scoping
259
+ *
260
+ * The interface has no session concept itself — every `ChatMemory`
261
+ * instance is implicitly scoped to one conversation. Services that
262
+ * serve multiple users construct one memory instance per session,
263
+ * typically via a factory bound to the request scope or a
264
+ * `sessionId` parameter on the backend.
265
+ *
266
+ * @typeParam M — optional metadata attached to every stored message.
267
+ * Most backends ignore this; Drizzle and Redis stores can use it
268
+ * for timestamps, speaker IDs, or audit info.
269
+ *
270
+ * @module @forinda/kickjs-ai/memory/types
271
+ */
272
+ interface ChatMemory {
273
+ /** Short identifier for logs and debug UIs. */
274
+ readonly name: string;
275
+ /**
276
+ * Return the full message history in chronological order.
277
+ *
278
+ * The returned array should be safe to pass directly into
279
+ * `provider.chat({ messages })` — backends are responsible for
280
+ * returning the shape the framework expects without requiring
281
+ * callers to transform it.
282
+ */
283
+ get(): Promise<ChatMessage[]>;
284
+ /**
285
+ * Append one or more messages to the history.
286
+ *
287
+ * Backends should persist in insertion order. Arrays are accepted
288
+ * for efficiency — storing a batch in one round-trip is faster
289
+ * than N individual calls for most real databases.
290
+ */
291
+ add(message: ChatMessage | ChatMessage[]): Promise<void>;
292
+ /**
293
+ * Drop every message from this session.
294
+ *
295
+ * Called by the /chat/reset route pattern and by tests between
296
+ * cases. Backends that persist to an external store should commit
297
+ * the clear transactionally so partial deletes can't happen.
298
+ */
299
+ clear(): Promise<void>;
300
+ /**
301
+ * Optional: return the number of stored messages. Not every
302
+ * backend can compute this cheaply — Redis lists and in-memory
303
+ * arrays can, Drizzle can via COUNT(*), but long-tail stores may
304
+ * decline. Callers should treat `undefined` returns as "unknown".
305
+ */
306
+ size?(): Promise<number>;
307
+ }
308
+ /**
309
+ * Options for `AiAdapter.runAgentWithMemory()`.
310
+ *
311
+ * The helper wraps `runAgent` with an automatic "read history →
312
+ * append user message → run loop → persist assistant response" cycle
313
+ * so services don't have to manage the plumbing themselves. Most
314
+ * real chatbots end up writing this wrapper anyway; shipping it in
315
+ * the framework saves everyone that boilerplate.
316
+ */
317
+ interface RunAgentWithMemoryOptions {
318
+ /** Memory backend for this conversation. Typically scoped to a request or session. */
319
+ memory: ChatMemory;
320
+ /** The user's message for this turn. */
321
+ userMessage: string;
322
+ /**
323
+ * System prompt to prepend IF the memory is empty — i.e. it's the
324
+ * first turn of the conversation. Skipped on subsequent turns so
325
+ * the model sees a single, stable system prompt for the session.
326
+ */
327
+ systemPrompt?: string;
328
+ /** Model override. Defaults to the provider's configured default. */
329
+ model?: string;
330
+ /**
331
+ * Tools the agent can call. Defaults to `'auto'` — every tool in
332
+ * the adapter's `@AiTool` registry.
333
+ */
334
+ tools?: 'auto' | ChatToolDefinition[];
335
+ /** Maximum chat → tool-call → dispatch cycles per turn. Defaults to 8. */
336
+ maxSteps?: number;
337
+ /** Runtime chat options passed through to the provider. */
338
+ temperature?: number;
339
+ maxTokens?: number;
340
+ topP?: number;
341
+ stopSequences?: string[];
342
+ signal?: AbortSignal;
343
+ /**
344
+ * When true, tool call results written to memory preserve their
345
+ * full content. When false (the default), tool results are
346
+ * dropped from memory on the grounds that they're usually large
347
+ * API responses the user doesn't need to see on a later turn.
348
+ * Turn this on for debugging sessions or full-transcript replay.
349
+ */
350
+ persistToolResults?: boolean;
351
+ }
352
+ //#endregion
353
+ //#region src/ai.adapter.d.ts
354
+ /**
355
+ * Register an AI provider in the DI container, discover every
356
+ * `@AiTool`-decorated controller method, and run agent loops that
357
+ * dispatch tool calls through the Express pipeline.
358
+ *
359
+ * The adapter plays the same role for AI as the MCP adapter plays for
360
+ * external clients: it's the glue between the framework's metadata
361
+ * (Zod schemas, route decorators, DI container) and a runtime that
362
+ * can actually call LLMs and execute tools. Both adapters reuse the
363
+ * framework's `onRouteMount` hook to discover tools at startup.
364
+ *
365
+ * @example
366
+ * ```ts
367
+ * import { bootstrap, getEnv } from '@forinda/kickjs'
368
+ * import { AiAdapter, OpenAIProvider } from '@forinda/kickjs-ai'
369
+ *
370
+ * export const app = await bootstrap({
371
+ * modules,
372
+ * adapters: [
373
+ * new AiAdapter({
374
+ * provider: new OpenAIProvider({ apiKey: getEnv('OPENAI_API_KEY') }),
375
+ * }),
376
+ * ],
377
+ * })
378
+ * ```
379
+ *
380
+ * Then in any service:
381
+ *
382
+ * ```ts
383
+ * @Service()
384
+ * class AgentService {
385
+ * @Autowired() private readonly ai!: AiAdapter
386
+ *
387
+ * async handleQuery(userPrompt: string) {
388
+ * const result = await this.ai.runAgent({
389
+ * messages: [
390
+ * { role: 'system', content: 'You can create tasks via tools.' },
391
+ * { role: 'user', content: userPrompt },
392
+ * ],
393
+ * tools: 'auto', // use every @AiTool-decorated method
394
+ * })
395
+ * return result.content
396
+ * }
397
+ * }
398
+ * ```
399
+ */
400
+ declare class AiAdapter implements AppAdapter {
401
+ readonly name = "AiAdapter";
402
+ private readonly provider;
403
+ /** Controllers collected during the mount phase, in insertion order. */
404
+ private readonly mountedControllers;
405
+ /** Tool definitions built during `beforeStart` from `@AiTool` metadata. */
406
+ private readonly tools;
407
+ /**
408
+ * Base URL of the running KickJS HTTP server, captured in `afterStart`.
409
+ * Agent tool dispatch makes internal HTTP requests against this base
410
+ * URL so calls flow through the normal Express pipeline (middleware,
411
+ * validation, auth, logging, error handling).
412
+ */
413
+ private serverBaseUrl;
414
+ constructor(options: AiAdapterOptions);
415
+ /** Return the active provider. Useful for services that want the raw API. */
416
+ getProvider(): AiProvider;
417
+ /** Return the discovered tool registry. Primarily for tests and debug UIs. */
418
+ getTools(): readonly AiToolDefinition[];
419
+ /**
420
+ * Override the server base URL. Used by tests that spin up an
421
+ * ephemeral http.Server and can't rely on the framework's
422
+ * `afterStart` hook to supply it.
423
+ */
424
+ setServerBaseUrl(url: string | null): void;
425
+ /**
426
+ * Record every mounted controller so `beforeStart` can walk them
427
+ * looking for `@AiTool` decorations. We don't scan here because
428
+ * onRouteMount fires per-controller and we want the scan to run
429
+ * once against the full set.
430
+ */
431
+ onRouteMount(controller: Constructor, mountPath: string): void;
432
+ /**
433
+ * Register the provider in the DI container and run the tool scan.
434
+ *
435
+ * The adapter itself is also registered under its class constructor
436
+ * so services can inject the adapter directly (to call `runAgent`)
437
+ * while other services inject just the provider via `AI_PROVIDER`
438
+ * for plain `chat` / `embed` calls.
439
+ */
440
+ beforeStart({
441
+ container
442
+ }: AdapterContext): void;
443
+ /**
444
+ * Capture the running server's address so agent dispatch can make
445
+ * internal HTTP requests against the actual port. Runs after the
446
+ * HTTP server is listening, so `server.address()` returns a real
447
+ * `AddressInfo` here.
448
+ */
449
+ afterStart(ctx: AdapterContext): void;
450
+ /** Best-effort cleanup. Providers are currently stateless HTTP clients. */
451
+ shutdown(): Promise<void>;
452
+ /**
453
+ * Run a tool-calling agent loop.
454
+ *
455
+ * Calls the provider with the given messages and tools, dispatches
456
+ * any tool calls the model emits, feeds the results back into the
457
+ * conversation, and repeats until the model responds with plain text
458
+ * (no more tool calls) or `maxSteps` is reached.
459
+ *
460
+ * Tool dispatch goes through the Express pipeline via internal HTTP
461
+ * requests — same pattern as the MCP adapter — so middleware, auth,
462
+ * validation, logging, and error handling all apply to tool calls
463
+ * the same way they apply to external client requests.
464
+ *
465
+ * @example
466
+ * ```ts
467
+ * const result = await adapter.runAgent({
468
+ * messages: [
469
+ * { role: 'system', content: 'Create tasks the user asks for.' },
470
+ * { role: 'user', content: 'Create a high-priority task titled Ship v3.' },
471
+ * ],
472
+ * tools: 'auto',
473
+ * maxSteps: 5,
474
+ * })
475
+ * console.log(result.content) // assistant's final reply
476
+ * console.log(result.messages) // full history including tool calls
477
+ * console.log(result.steps) // how many rounds it took
478
+ * ```
479
+ */
480
+ runAgent(options: RunAgentOptions): Promise<RunAgentResult>;
481
+ /**
482
+ * Memory-aware agent turn.
483
+ *
484
+ * Wraps `runAgent` with an automatic "read history → append user
485
+ * message → run loop → persist assistant response" cycle. Services
486
+ * that want multi-turn conversations don't need to manage the
487
+ * plumbing themselves — pass a `ChatMemory` and a user message,
488
+ * get back the agent's response, and the memory is updated.
489
+ *
490
+ * System prompt handling:
491
+ * - If the memory is empty AND `systemPrompt` is provided, the
492
+ * system prompt is persisted as the first message in the
493
+ * session. It stays put for every subsequent turn.
494
+ * - On follow-up turns, the existing system prompt is reused
495
+ * from memory; the `systemPrompt` option is ignored to keep
496
+ * the session persona stable.
497
+ *
498
+ * Tool result persistence:
499
+ * - By default, tool messages are NOT persisted to memory —
500
+ * they're usually large API responses the user doesn't need
501
+ * on later turns, and including them blows up prompt tokens
502
+ * unnecessarily. Set `persistToolResults: true` to keep them
503
+ * (useful for debugging / full-transcript replay).
504
+ * - Assistant messages with tool calls ARE persisted so the
505
+ * conversation shows what the agent did.
506
+ *
507
+ * @example
508
+ * ```ts
509
+ * @Service()
510
+ * class ChatService {
511
+ * @Autowired() private ai!: AiAdapter
512
+ * private readonly memory = new InMemoryChatMemory()
513
+ *
514
+ * async handle(userMessage: string) {
515
+ * const result = await this.ai.runAgentWithMemory({
516
+ * memory: this.memory,
517
+ * userMessage,
518
+ * systemPrompt: 'You are a helpful assistant.',
519
+ * tools: 'auto',
520
+ * })
521
+ * return result.content
522
+ * }
523
+ * }
524
+ * ```
525
+ */
526
+ runAgentWithMemory(options: RunAgentWithMemoryOptions): Promise<RunAgentResult>;
527
+ /**
528
+ * Expand an agent `tools` option to an explicit array. `'auto'`
529
+ * resolves to the full discovered registry; an explicit array is
530
+ * passed through unchanged (so callers can restrict the agent to a
531
+ * subset of tools).
532
+ */
533
+ private resolveTools;
534
+ /**
535
+ * Dispatch a single tool call through the Express pipeline by
536
+ * making an internal HTTP request matching the underlying route's
537
+ * method + path + body/query.
538
+ *
539
+ * Returns a `ChatMessage` with `role: 'tool'` suitable for feeding
540
+ * back into the next `provider.chat` call. Non-2xx responses are
541
+ * surfaced as tool error messages rather than throwing, so the
542
+ * agent loop can let the model recover.
543
+ */
544
+ private dispatchToolCall;
545
+ /**
546
+ * Build an `AiToolDefinition` for a route decorated with `@AiTool`.
547
+ * Skips routes without the decorator so the registry only exposes
548
+ * deliberately opted-in methods.
549
+ */
550
+ private tryBuildTool;
551
+ /**
552
+ * Join a module mount path with the route-level sub-path. Same
553
+ * helper as McpAdapter's — kept local so the two packages don't
554
+ * couple via a shared util file.
555
+ */
556
+ private joinMountPath;
557
+ /**
558
+ * Substitute Express-style `:param` placeholders in the mount path
559
+ * with values pulled from the tool call arguments. Consumed keys
560
+ * are removed from the remaining args so they aren't sent twice
561
+ * (once in the path, once in the body/query).
562
+ */
563
+ private substitutePathParams;
564
+ /**
565
+ * Resolve the running server's base URL from a Node `http.Server`
566
+ * instance. Same handling as McpAdapter: IPv6 bracketing, rewrite
567
+ * of 0.0.0.0/:: to 127.0.0.1.
568
+ */
569
+ private resolveServerBaseUrl;
570
+ }
571
+ //#endregion
572
+ //#region src/decorators.d.ts
573
+ /**
574
+ * Mark a controller method as an AI-callable tool.
575
+ *
576
+ * At startup, the `AiAdapter` scans all `@Controller` classes in the
577
+ * DI container for this decorator and builds a tool registry. When a
578
+ * service calls `ai.chat({ ..., tools: 'auto' })`, the framework
579
+ * passes the registered tools to the provider, the model may call
580
+ * them, and the framework dispatches back through the normal Express
581
+ * pipeline — so tool calls go through auth, validation, and logging
582
+ * just like external HTTP requests.
583
+ *
584
+ * The input schema is derived from the route's `body` Zod schema:
585
+ *
586
+ * @example
587
+ * ```ts
588
+ * import { Controller, Post, type Ctx } from '@forinda/kickjs'
589
+ * import { AiTool } from '@forinda/kickjs-ai'
590
+ * import { createTaskSchema } from './dtos/create-task.dto'
591
+ *
592
+ * @Controller('/tasks')
593
+ * export class TaskController {
594
+ * @Post('/', { body: createTaskSchema, name: 'CreateTask' })
595
+ * @AiTool({ description: 'Create a new task' })
596
+ * create(ctx: Ctx<KickRoutes.TaskController['create']>) {
597
+ * return this.createTaskUseCase.execute(ctx.body)
598
+ * }
599
+ * }
600
+ * ```
601
+ */
602
+ declare function AiTool(options: AiToolOptions): MethodDecorator;
603
+ /** Read the AI tool metadata attached to a method, if any. */
604
+ declare function getAiToolMeta(target: object, method: string): AiToolOptions | undefined;
605
+ /** Check whether a method was decorated with `@AiTool`. */
606
+ declare function isAiTool(target: object, method: string): boolean;
607
+ //#endregion
608
+ //#region src/constants.d.ts
609
+ /**
610
+ * Metadata key for the `@AiTool` decorator.
611
+ *
612
+ * Using `createToken` for metadata keys (rather than a raw `Symbol`)
613
+ * gives a collision-safe, type-carrying identifier: the phantom type
614
+ * parameter flows through `getMethodMetaOrUndefined` so consumers get
615
+ * `AiToolOptions` back without a manual cast, and reference-equality
616
+ * guarantees that two separate definitions can never shadow each other
617
+ * even if the package is loaded more than once.
618
+ */
619
+ declare const AI_TOOL_METADATA: any;
620
+ /**
621
+ * DI token for the active AI provider.
622
+ *
623
+ * Injected via `@Inject(AI_PROVIDER)` in services or use-cases that
624
+ * need to call an LLM. The adapter registers the concrete provider
625
+ * (OpenAI, Anthropic, Google, Ollama) during `beforeStart`.
626
+ *
627
+ * @example
628
+ * ```ts
629
+ * @Service()
630
+ * export class SummarizeService {
631
+ * constructor(@Inject(AI_PROVIDER) private ai: AiProvider) {}
632
+ *
633
+ * async summarize(text: string) {
634
+ * const res = await this.ai.chat({
635
+ * messages: [
636
+ * { role: 'system', content: 'Summarize in 2 sentences.' },
637
+ * { role: 'user', content: text },
638
+ * ],
639
+ * })
640
+ * return res.content
641
+ * }
642
+ * }
643
+ * ```
644
+ */
645
+ declare const AI_PROVIDER: any;
646
+ /**
647
+ * DI token for the active vector store backend.
648
+ *
649
+ * Injected via `@Inject(VECTOR_STORE)` in services that need
650
+ * retrieval-augmented generation. The adapter does not register a
651
+ * default — users bind the backend they want at bootstrap time,
652
+ * typically `InMemoryVectorStore` for development/tests and
653
+ * `PgVectorStore` / `QdrantStore` / `PineconeStore` for production.
654
+ *
655
+ * @example
656
+ * ```ts
657
+ * import { bootstrap, getEnv } from '@forinda/kickjs'
658
+ * import { AiAdapter, InMemoryVectorStore, VECTOR_STORE } from '@forinda/kickjs-ai'
659
+ *
660
+ * export const app = await bootstrap({
661
+ * modules,
662
+ * adapters: [
663
+ * new AiAdapter({
664
+ * provider: new OpenAIProvider({ apiKey: getEnv('OPENAI_API_KEY') }),
665
+ * }),
666
+ * ],
667
+ * plugins: [
668
+ * {
669
+ * name: 'vector-store',
670
+ * register: (container) => {
671
+ * container.registerInstance(VECTOR_STORE, new InMemoryVectorStore())
672
+ * },
673
+ * },
674
+ * ],
675
+ * })
676
+ * ```
677
+ */
678
+ declare const VECTOR_STORE: any;
679
+ //#endregion
680
+ //#region src/providers/openai.d.ts
681
+ /**
682
+ * Configuration for the built-in OpenAI provider.
683
+ *
684
+ * The base URL is configurable so the same provider class can target
685
+ * any OpenAI-compatible endpoint — Azure OpenAI, Ollama's
686
+ * `/v1/chat/completions` shim, OpenRouter, vLLM, and so on. The
687
+ * provider only assumes the wire shape, not the hostname.
688
+ */
689
+ interface OpenAIProviderOptions {
690
+ /** API key sent as `Authorization: Bearer <apiKey>`. Required. */
691
+ apiKey: string;
692
+ /** Override base URL. Defaults to `https://api.openai.com/v1`. */
693
+ baseURL?: string;
694
+ /** Default chat model used when `ChatInput.model` is not set. */
695
+ defaultChatModel?: string;
696
+ /** Default embedding model used by `embed()`. */
697
+ defaultEmbedModel?: string;
698
+ /**
699
+ * OpenAI organization header. Optional. Some accounts need it; most
700
+ * don't. If unset, the header is omitted entirely.
701
+ */
702
+ organization?: string;
703
+ /**
704
+ * Provider name to expose on `provider.name`. Defaults to `'openai'`
705
+ * but can be overridden to label compatible endpoints — e.g.
706
+ * `'ollama'` if pointing baseURL at a local Ollama instance.
707
+ */
708
+ name?: string;
709
+ }
710
+ /**
711
+ * Built-in OpenAI provider.
712
+ *
713
+ * Implements the framework's `AiProvider` interface using nothing but
714
+ * the global `fetch` API (Node 20+). Translates the framework's
715
+ * normalized chat shape to OpenAI's `/chat/completions` wire format
716
+ * and back, including streaming via SSE.
717
+ *
718
+ * Tool calling is wired in this provider but the agent loop that
719
+ * actually invokes tools and feeds results back to the model lives in
720
+ * a later phase — for now, `chat()` and `stream()` surface tool calls
721
+ * via `ChatResponse.toolCalls` so callers can react.
722
+ *
723
+ * @example
724
+ * ```ts
725
+ * import { bootstrap, getEnv } from '@forinda/kickjs'
726
+ * import { AiAdapter, OpenAIProvider } from '@forinda/kickjs-ai'
727
+ *
728
+ * export const app = await bootstrap({
729
+ * modules,
730
+ * adapters: [
731
+ * new AiAdapter({
732
+ * provider: new OpenAIProvider({
733
+ * apiKey: getEnv('OPENAI_API_KEY'),
734
+ * defaultChatModel: 'gpt-4o-mini',
735
+ * }),
736
+ * }),
737
+ * ],
738
+ * })
739
+ * ```
740
+ */
741
+ declare class OpenAIProvider implements AiProvider {
742
+ readonly name: string;
743
+ private readonly baseURL;
744
+ private readonly defaultChatModel;
745
+ private readonly defaultEmbedModel;
746
+ /**
747
+ * Full header map passed to every request. Includes the bearer auth
748
+ * header and the optional openai-organization header. Constructed
749
+ * once in the constructor so per-call code just spreads it into the
750
+ * fetch init.
751
+ */
752
+ private readonly headers;
753
+ constructor(options: OpenAIProviderOptions);
754
+ /**
755
+ * Non-streaming chat completion.
756
+ *
757
+ * Translates the framework's `ChatInput` to OpenAI's chat completion
758
+ * payload, posts it, and normalizes the response back to a
759
+ * `ChatResponse`. Tool calls are surfaced on the response so callers
760
+ * can decide whether to feed them back into a tool registry.
761
+ */
762
+ chat(input: ChatInput, options?: ChatOptions): Promise<ChatResponse>;
763
+ /**
764
+ * Streaming chat completion. Yields `ChatChunk`s as deltas arrive
765
+ * over the wire and emits one final chunk with `done: true` after
766
+ * the upstream `[DONE]` sentinel.
767
+ *
768
+ * Cancellation via `options.signal` is supported end-to-end — the
769
+ * underlying fetch is aborted and the consumer's `for await` loop
770
+ * throws `AbortError`.
771
+ */
772
+ stream(input: ChatInput, options?: ChatOptions): AsyncIterable<ChatChunk>;
773
+ /**
774
+ * Generate embeddings for a string or array of strings.
775
+ *
776
+ * Returns vectors in input order. Single-string input still gets a
777
+ * length-1 array back, so callers can use the same indexed access
778
+ * pattern regardless of input shape.
779
+ */
780
+ embed(input: EmbedInput): Promise<number[][]>;
781
+ private buildChatPayload;
782
+ /**
783
+ * Translate a framework `ChatMessage` to OpenAI's wire format.
784
+ * Handles the `tool` role and the `tool_calls` field on assistant
785
+ * messages, both of which use slightly different shapes than the
786
+ * normalized form on `ChatMessage`.
787
+ */
788
+ private toOpenAIMessage;
789
+ /**
790
+ * Normalize an OpenAI chat completion response back to the
791
+ * framework's `ChatResponse` shape.
792
+ */
793
+ private normalizeChatResponse;
794
+ /**
795
+ * Extract the first tool-call delta from an OpenAI streaming chunk.
796
+ *
797
+ * The `tool_calls` array in a delta chunk can contain partial state
798
+ * for multiple parallel tool calls; this method picks the first one
799
+ * with a non-empty payload, which is enough for the v0 streaming
800
+ * surface. Multi-tool streaming is a follow-up.
801
+ */
802
+ private firstToolCallDelta;
803
+ }
804
+ //#endregion
805
+ //#region src/providers/anthropic.d.ts
806
+ /**
807
+ * Configuration for the Anthropic provider.
808
+ *
809
+ * The base URL is configurable so the same class can target an
810
+ * Anthropic-compatible proxy, an internal gateway that adds auth
811
+ * headers, or an air-gapped deployment. The provider only assumes
812
+ * Anthropic's Messages API wire shape, not the hostname.
813
+ */
814
+ interface AnthropicProviderOptions {
815
+ /** API key sent as `x-api-key`. Required. */
816
+ apiKey: string;
817
+ /** Override base URL. Defaults to `https://api.anthropic.com/v1`. */
818
+ baseURL?: string;
819
+ /** Default chat model used when `ChatInput.model` is not set. */
820
+ defaultChatModel?: string;
821
+ /** Anthropic API version header. Defaults to `'2023-06-01'`. */
822
+ apiVersion?: string;
823
+ /**
824
+ * Default `max_tokens` for responses. Anthropic requires an explicit
825
+ * max_tokens on every request; the framework's ChatOptions.maxTokens
826
+ * takes precedence when set, but this supplies a fallback so callers
827
+ * don't have to set it every time.
828
+ */
829
+ defaultMaxTokens?: number;
830
+ /** Provider name override. Defaults to `'anthropic'`. */
831
+ name?: string;
832
+ }
833
+ /**
834
+ * Built-in Anthropic provider.
835
+ *
836
+ * Implements the framework's `AiProvider` interface using Anthropic's
837
+ * Messages API (`/v1/messages`). Translates the normalized
838
+ * `ChatInput` shape to and from Anthropic's content-block format,
839
+ * including tool calling and streaming.
840
+ *
841
+ * ### Differences from OpenAI
842
+ *
843
+ * Anthropic's API has a few quirks the provider translates away:
844
+ *
845
+ * - **System prompt is separated.** The framework puts system
846
+ * messages in the `messages` array; Anthropic wants them in a
847
+ * top-level `system` field. The provider extracts the first system
848
+ * message and filters out any others.
849
+ * - **Content is always a block array.** Even simple text replies
850
+ * are wrapped in `[{ type: 'text', text: '...' }]`. The provider
851
+ * flattens text blocks to a single string on the response.
852
+ * - **Tool calls use `tool_use` content blocks, not a separate
853
+ * `tool_calls` field.** Normalization pulls them out of the
854
+ * response content and into `ChatResponse.toolCalls`.
855
+ * - **Tool results are `user` messages with `tool_result` content
856
+ * blocks**, not a `'tool'` role. The provider handles the
857
+ * translation both ways.
858
+ * - **`max_tokens` is required on every request.** Framework
859
+ * `ChatOptions.maxTokens` wins; otherwise falls back to
860
+ * `defaultMaxTokens` (default 4096).
861
+ *
862
+ * ### Embeddings
863
+ *
864
+ * Anthropic does not ship an embeddings API. Calling `embed()` on
865
+ * this provider throws a descriptive error — users who need
866
+ * embeddings should construct a separate provider (OpenAI's
867
+ * `text-embedding-3-small` is a good default) and bind it
868
+ * alongside the Anthropic chat provider.
869
+ *
870
+ * @example
871
+ * ```ts
872
+ * import { bootstrap, getEnv } from '@forinda/kickjs'
873
+ * import { AiAdapter, AnthropicProvider } from '@forinda/kickjs-ai'
874
+ *
875
+ * export const app = await bootstrap({
876
+ * modules,
877
+ * adapters: [
878
+ * new AiAdapter({
879
+ * provider: new AnthropicProvider({
880
+ * apiKey: getEnv('ANTHROPIC_API_KEY'),
881
+ * defaultChatModel: 'claude-opus-4-6',
882
+ * }),
883
+ * }),
884
+ * ],
885
+ * })
886
+ * ```
887
+ */
888
+ declare class AnthropicProvider implements AiProvider {
889
+ readonly name: string;
890
+ private readonly baseURL;
891
+ private readonly defaultChatModel;
892
+ private readonly defaultMaxTokens;
893
+ private readonly headers;
894
+ constructor(options: AnthropicProviderOptions);
895
+ /**
896
+ * Non-streaming chat completion.
897
+ *
898
+ * Builds the Anthropic Messages payload, posts it, and normalizes
899
+ * the response back to the framework's `ChatResponse` shape.
900
+ */
901
+ chat(input: ChatInput, options?: ChatOptions): Promise<ChatResponse>;
902
+ /**
903
+ * Streaming chat completion. Yields `ChatChunk`s as Anthropic
904
+ * events arrive and emits a final chunk with `done: true` after
905
+ * the `message_stop` event.
906
+ *
907
+ * Anthropic's SSE stream uses distinct event types instead of the
908
+ * single-channel deltas OpenAI sends:
909
+ *
910
+ * - `message_start` — session init, carries model + id
911
+ * - `content_block_start` — new text or tool_use block begins
912
+ * - `content_block_delta` — incremental text or partial tool JSON
913
+ * - `content_block_stop` — block complete
914
+ * - `message_delta` — stop_reason + final usage
915
+ * - `message_stop` — end of stream
916
+ *
917
+ * The provider cares about text deltas (for streaming content) and
918
+ * input_json deltas (for tool call argument streaming). Everything
919
+ * else is noise for our purposes and gets filtered.
920
+ */
921
+ stream(input: ChatInput, options?: ChatOptions): AsyncIterable<ChatChunk>;
922
+ /**
923
+ * Anthropic does not ship an embeddings API. Throws a descriptive
924
+ * error rather than silently returning an empty vector — embedding
925
+ * workflows should use a dedicated provider (OpenAI text-embedding-3-*
926
+ * is the common pick) and bind it alongside this one in the
927
+ * `AI_PROVIDER` token registry if needed.
928
+ */
929
+ embed(_input: EmbedInput): Promise<number[][]>;
930
+ private buildMessagesPayload;
931
+ /**
932
+ * Extract the first system message from the framework's messages
933
+ * array and return it separately — Anthropic puts system prompts
934
+ * in a top-level `system` field, not in `messages`. Any additional
935
+ * system messages are dropped on the grounds that models handle
936
+ * one persona prompt per call and concatenating them silently
937
+ * would produce confusing behavior.
938
+ */
939
+ private splitSystemMessage;
940
+ /**
941
+ * Translate a framework `ChatMessage` to Anthropic's wire format.
942
+ *
943
+ * User and plain assistant messages become content blocks with a
944
+ * single `text` entry. Assistant messages with tool calls become
945
+ * a block list mixing `text` and `tool_use` entries. Framework
946
+ * `'tool'` role messages become Anthropic `'user'` messages with
947
+ * a `tool_result` block — that's how Anthropic represents tool
948
+ * call responses.
949
+ */
950
+ private toAnthropicMessage;
951
+ /**
952
+ * Normalize an Anthropic response back to the framework's
953
+ * `ChatResponse`. Flattens text content blocks into a single
954
+ * string and pulls `tool_use` blocks out into `toolCalls`.
955
+ */
956
+ private normalizeResponse;
957
+ }
958
+ //#endregion
959
+ //#region src/providers/base.d.ts
960
+ /**
961
+ * Provider-side helpers shared by every built-in `AiProvider`
962
+ * implementation.
963
+ *
964
+ * Each provider in `packages/ai/src/providers/` implements the
965
+ * `AiProvider` interface from `../types`. This file holds the bits
966
+ * that all of them need: HTTP error mapping, JSON parsing, SSE line
967
+ * splitting for streaming responses. Keeping these here means each
968
+ * provider's main file stays focused on the wire-format translation
969
+ * specific to its vendor.
970
+ */
971
+ /**
972
+ * Error thrown by built-in providers when the upstream API returns a
973
+ * non-2xx status. Carries the HTTP status, the raw response body, and
974
+ * a parsed error object when available, so callers can branch on
975
+ * specific failure modes (auth, rate limit, content filter, etc.).
976
+ */
977
+ declare class ProviderError extends Error {
978
+ readonly status: number;
979
+ readonly body: string;
980
+ readonly parsedBody?: unknown;
981
+ constructor(status: number, body: string, message?: string);
982
+ }
983
+ //#endregion
984
+ //#region src/prompts/prompt.d.ts
985
+ /**
986
+ * Options for `createPrompt`.
987
+ */
988
+ interface CreatePromptOptions {
989
+ /**
990
+ * Short identifier used in logs, errors, and typegen output.
991
+ * Defaults to `'prompt'` if not provided — give every non-trivial
992
+ * template a real name so error messages point to the right place.
993
+ */
994
+ name?: string;
995
+ /**
996
+ * Message role the rendered prompt produces. Defaults to `'user'`.
997
+ * Set to `'system'` for persona / instruction prompts.
998
+ */
999
+ role?: ChatMessage['role'];
1000
+ /**
1001
+ * How missing variables at render time are handled:
1002
+ * - `'throw'` (default): throw a descriptive error. Catches bugs
1003
+ * early and matches what most users expect.
1004
+ * - `'warn'`: leave the placeholder as-is and log a warning via
1005
+ * console.warn. Useful for templates with optional sections
1006
+ * that the caller might not fill in.
1007
+ * - `'silent'`: leave the placeholder as-is and don't warn.
1008
+ */
1009
+ onMissing?: 'throw' | 'warn' | 'silent';
1010
+ }
1011
+ /**
1012
+ * A reusable prompt template with `{{variable}}` placeholders and
1013
+ * a typed variables object at the render site.
1014
+ *
1015
+ * The type parameter `TVars` is a record of the variables the
1016
+ * template expects. Callers pass it explicitly:
1017
+ *
1018
+ * ```ts
1019
+ * const summarize = createPrompt<{ text: string; sentenceCount: number }>(
1020
+ * 'Summarize the following in {{sentenceCount}} sentences:\n\n{{text}}',
1021
+ * { name: 'summarize' },
1022
+ * )
1023
+ *
1024
+ * const msg = summarize.render({ text: 'Long article...', sentenceCount: 3 })
1025
+ * // → { role: 'user', content: 'Summarize the following in 3 sentences:\n\nLong article...' }
1026
+ * ```
1027
+ *
1028
+ * TypeScript catches missing or mistyped variables at compile time:
1029
+ *
1030
+ * ```ts
1031
+ * summarize.render({ text: 'x' }) // ✗ missing sentenceCount
1032
+ * summarize.render({ text: 'x', count: 3 }) // ✗ wrong key name
1033
+ * ```
1034
+ *
1035
+ * @remarks
1036
+ * Runtime-only in v0 — the type parameter is opt-in and has to be
1037
+ * provided explicitly. Workstream 5 adds a `kick typegen` pass that
1038
+ * scans `createPrompt` call sites and generates the TVars shape
1039
+ * automatically, so you can write `createPrompt('...')` and get
1040
+ * the types for free.
1041
+ */
1042
+ declare class Prompt<TVars extends Record<string, unknown> = Record<string, unknown>> {
1043
+ readonly name: string;
1044
+ readonly role: ChatMessage['role'];
1045
+ private readonly template;
1046
+ private readonly onMissing;
1047
+ constructor(template: string, options?: CreatePromptOptions);
1048
+ /**
1049
+ * Substitute variables into the template and return a
1050
+ * ready-to-use `ChatMessage`.
1051
+ *
1052
+ * Placeholder syntax is `{{name}}` — double curly braces around
1053
+ * the variable name. Whitespace inside the braces is ignored
1054
+ * (`{{ name }}` works too). Unknown variables in the template
1055
+ * are left as-is, so Markdown or code blocks that happen to use
1056
+ * `{{` for their own reasons don't break.
1057
+ *
1058
+ * @throws If `onMissing === 'throw'` and a required variable is absent
1059
+ */
1060
+ render(vars: TVars): ChatMessage;
1061
+ /**
1062
+ * Same as `render` but returns the raw string instead of wrapping
1063
+ * it in a `ChatMessage`. Useful for building composite messages
1064
+ * where several templates contribute to a single string.
1065
+ */
1066
+ renderString(vars: TVars): string;
1067
+ /** Return the raw template string. Useful for debugging and snapshot tests. */
1068
+ getTemplate(): string;
1069
+ /**
1070
+ * Return the set of placeholder names the template references.
1071
+ *
1072
+ * Mostly useful for testing and for tooling that wants to show
1073
+ * users what variables a prompt takes. Not a substitute for the
1074
+ * compile-time type check — templates can always reference
1075
+ * variables that aren't in TVars; this helper reads the string,
1076
+ * not the type.
1077
+ */
1078
+ getPlaceholders(): string[];
1079
+ private handleMissing;
1080
+ }
1081
+ /**
1082
+ * Construct a reusable prompt template.
1083
+ *
1084
+ * Thin factory for the `Prompt` class — keeps call sites short and
1085
+ * matches the naming convention of other kickjs-ai factories
1086
+ * (`createToken`, etc.). Use the class form directly if you need
1087
+ * subclassing or custom rendering logic.
1088
+ *
1089
+ * @example
1090
+ * ```ts
1091
+ * import { createPrompt } from '@forinda/kickjs-ai'
1092
+ *
1093
+ * const persona = createPrompt<{ name: string; tone: string }>(
1094
+ * 'You are {{name}}, a {{tone}} assistant.',
1095
+ * { role: 'system', name: 'persona' },
1096
+ * )
1097
+ *
1098
+ * const msg = persona.render({ name: 'Claude', tone: 'concise' })
1099
+ * ```
1100
+ */
1101
+ declare function createPrompt<TVars extends Record<string, unknown> = Record<string, unknown>>(template: string, options?: CreatePromptOptions): Prompt<TVars>;
1102
+ //#endregion
1103
+ //#region src/memory/in-memory.d.ts
1104
+ /**
1105
+ * Zero-dependency in-memory chat memory.
1106
+ *
1107
+ * Backed by a plain array. Each instance represents ONE conversation
1108
+ * — services that serve multiple sessions construct one instance per
1109
+ * session, typically via a `sessionId → memory` map in a parent
1110
+ * service or a request-scoped DI factory.
1111
+ *
1112
+ * Good for:
1113
+ * - Tests and prototypes
1114
+ * - Single-process CLI tools
1115
+ * - Short-lived request handlers that don't outlive the HTTP response
1116
+ *
1117
+ * Not good for:
1118
+ * - Multi-replica deployments (memory isn't shared across pods)
1119
+ * - Sessions that need to survive a restart
1120
+ * - Anything with a compliance retention policy
1121
+ *
1122
+ * For any of those, swap in a persistent backend (Drizzle, Redis,
1123
+ * Postgres) that implements the same `ChatMemory` interface — the
1124
+ * calling service doesn't change.
1125
+ *
1126
+ * @example
1127
+ * ```ts
1128
+ * import { InMemoryChatMemory } from '@forinda/kickjs-ai'
1129
+ *
1130
+ * const memory = new InMemoryChatMemory()
1131
+ * await memory.add({ role: 'user', content: 'hello' })
1132
+ * const history = await memory.get()
1133
+ * ```
1134
+ */
1135
+ declare class InMemoryChatMemory implements ChatMemory {
1136
+ readonly name = "in-memory";
1137
+ private messages;
1138
+ get(): Promise<ChatMessage[]>;
1139
+ add(message: ChatMessage | ChatMessage[]): Promise<void>;
1140
+ clear(): Promise<void>;
1141
+ size(): Promise<number>;
1142
+ }
1143
+ //#endregion
1144
+ //#region src/memory/sliding-window.d.ts
1145
+ /**
1146
+ * Options for `SlidingWindowChatMemory`.
1147
+ */
1148
+ interface SlidingWindowChatMemoryOptions {
1149
+ /** Underlying memory to wrap. */
1150
+ inner: ChatMemory;
1151
+ /**
1152
+ * Maximum number of messages to keep in the sliding window. The
1153
+ * LAST `maxMessages` messages are retained; anything older is
1154
+ * dropped on every `get()` call and on every `add()` that pushes
1155
+ * the count past the cap.
1156
+ *
1157
+ * A typical value is 20 — enough for several user/assistant
1158
+ * exchanges with tool call overhead, short enough to keep prompt
1159
+ * tokens under control. Tune up or down based on model context
1160
+ * window and cost sensitivity.
1161
+ */
1162
+ maxMessages: number;
1163
+ /**
1164
+ * Whether to treat the FIRST system message as pinned — i.e. never
1165
+ * evict it, even when the window would otherwise cap it out.
1166
+ *
1167
+ * This matches the common pattern of putting a single persona /
1168
+ * instruction prompt at the start of every conversation. Without
1169
+ * pinning, a long session would eventually drop the system prompt
1170
+ * and the model would lose its instructions.
1171
+ *
1172
+ * Defaults to `true` because forgetting the system prompt is
1173
+ * almost never what users want.
1174
+ */
1175
+ pinSystemPrompt?: boolean;
1176
+ }
1177
+ /**
1178
+ * Sliding-window memory wrapper.
1179
+ *
1180
+ * Wraps any `ChatMemory` implementation with a bounded history: only
1181
+ * the most recent N messages survive. Older messages are evicted on
1182
+ * every `get()` and after every `add()` that pushes the count past
1183
+ * the cap. The first system message is pinned by default so long
1184
+ * sessions don't lose their persona.
1185
+ *
1186
+ * Use this to keep prompt token usage predictable without writing
1187
+ * eviction logic in every service. It composes with any backend —
1188
+ * in-memory, Drizzle, Redis — because it only touches the inner
1189
+ * memory through its public interface.
1190
+ *
1191
+ * @example
1192
+ * ```ts
1193
+ * import { InMemoryChatMemory, SlidingWindowChatMemory } from '@forinda/kickjs-ai'
1194
+ *
1195
+ * const memory = new SlidingWindowChatMemory({
1196
+ * inner: new InMemoryChatMemory(),
1197
+ * maxMessages: 20,
1198
+ * pinSystemPrompt: true,
1199
+ * })
1200
+ * ```
1201
+ *
1202
+ * @remarks
1203
+ * Eviction writes back to the inner memory via `clear()` + `add()`.
1204
+ * That's fine for in-memory backends where clearing is O(1), but
1205
+ * costs a round-trip for network-backed stores. If you're wrapping
1206
+ * a remote backend, consider an inner memory that supports native
1207
+ * trimming — the wrapper's contract assumes clear+add is cheap.
1208
+ */
1209
+ declare class SlidingWindowChatMemory implements ChatMemory {
1210
+ readonly name: string;
1211
+ private readonly inner;
1212
+ private readonly maxMessages;
1213
+ private readonly pinSystemPrompt;
1214
+ constructor(options: SlidingWindowChatMemoryOptions);
1215
+ get(): Promise<ChatMessage[]>;
1216
+ add(message: ChatMessage | ChatMessage[]): Promise<void>;
1217
+ clear(): Promise<void>;
1218
+ size(): Promise<number>;
1219
+ /**
1220
+ * Apply the sliding window to an array of messages, returning the
1221
+ * bounded view. Pure function so both `get()` and `add()` can use
1222
+ * the same logic.
1223
+ *
1224
+ * When `pinSystemPrompt` is set and the first message is a system
1225
+ * message, we keep it AND fill the remaining `maxMessages - 1`
1226
+ * slots with the most recent messages after it. Otherwise we just
1227
+ * take the tail of the array.
1228
+ */
1229
+ private applyWindow;
1230
+ }
1231
+ //#endregion
1232
+ //#region src/rag/types.d.ts
1233
+ /**
1234
+ * RAG primitive types.
1235
+ *
1236
+ * The `VectorStore` interface is the contract every backend (in-memory,
1237
+ * pgvector, Qdrant, Pinecone) implements. The framework's own `RagService`
1238
+ * takes any `VectorStore` + an `AiProvider` and produces retrieval-
1239
+ * augmented chat helpers, so swapping storage backends is a one-line
1240
+ * change to the DI binding — services that consume `VECTOR_STORE` stay
1241
+ * the same.
1242
+ *
1243
+ * The shapes here are deliberately minimal. Vendor-specific features
1244
+ * (hybrid search, reranking, sparse vectors) live on the concrete
1245
+ * implementations as extensions, not on this interface.
1246
+ *
1247
+ * @module @forinda/kickjs-ai/rag/types
1248
+ */
1249
+ /**
1250
+ * A single document stored in a vector store.
1251
+ *
1252
+ * The `content` field carries the original text — the vector alone
1253
+ * isn't enough because RAG retrieval needs to feed the original text
1254
+ * back into the LLM context. `metadata` is the escape hatch for
1255
+ * anything the application wants to filter or track (author, date,
1256
+ * tags, tenant ID, etc.).
1257
+ *
1258
+ * @typeParam M — the metadata shape; defaults to a loose record so
1259
+ * users don't need to parameterize the type unless they want the
1260
+ * extra rigor.
1261
+ */
1262
+ interface VectorDocument<M extends Record<string, unknown> = Record<string, unknown>> {
1263
+ /** Unique identifier — repeated upsert with the same id replaces the previous version. */
1264
+ id: string;
1265
+ /** Original text the vector was computed from. */
1266
+ content: string;
1267
+ /** Dense embedding. Length must match the store's configured dimensions. */
1268
+ vector: number[];
1269
+ /** Optional arbitrary metadata used for filtering and display. */
1270
+ metadata?: M;
1271
+ }
1272
+ /**
1273
+ * A single search result from `VectorStore.query`.
1274
+ *
1275
+ * `score` is normalized across backends: higher = more similar.
1276
+ * Cosine similarity returns values in [-1, 1]; most backends clamp to
1277
+ * [0, 1] for usability. Services should treat the number as a
1278
+ * monotonic rank, not an absolute probability.
1279
+ */
1280
+ interface VectorSearchHit<M extends Record<string, unknown> = Record<string, unknown>> {
1281
+ id: string;
1282
+ content: string;
1283
+ score: number;
1284
+ metadata?: M;
1285
+ }
1286
+ /**
1287
+ * Options for `VectorStore.query`.
1288
+ *
1289
+ * `filter` is an equality map against `metadata` — backends that
1290
+ * support richer predicates (range, $in, $not) should accept them
1291
+ * here as well, using the MongoDB-style operator prefix convention.
1292
+ * The in-memory store implements equality only, which is enough for
1293
+ * most v0 use cases.
1294
+ */
1295
+ interface VectorQueryOptions {
1296
+ /** The embedding of the query text. */
1297
+ vector: number[];
1298
+ /** Maximum number of hits to return. Defaults to 5. */
1299
+ topK?: number;
1300
+ /** Metadata equality filter. Hits whose metadata doesn't match are dropped. */
1301
+ filter?: Record<string, unknown>;
1302
+ /** Drop hits whose score falls below this threshold. */
1303
+ minScore?: number;
1304
+ }
1305
+ /**
1306
+ * Vector store contract. Backends:
1307
+ * - `InMemoryVectorStore` — in-package, zero deps, perfect for tests
1308
+ * and prototypes; up to a few thousand docs before linear scan hurts
1309
+ * - pgvector — runs inside any Postgres 13+ KickJS project (follow-up commit)
1310
+ * - Qdrant — dedicated vector DB with payload filtering (follow-up commit)
1311
+ * - Pinecone — managed cloud service (follow-up commit)
1312
+ *
1313
+ * Implementations must honor two contracts: upserts are idempotent on
1314
+ * id, and query results are ordered by descending score.
1315
+ */
1316
+ interface VectorStore<M extends Record<string, unknown> = Record<string, unknown>> {
1317
+ /** Short identifier for logs, e.g. `'in-memory'`, `'pgvector'`. */
1318
+ readonly name: string;
1319
+ /**
1320
+ * Insert or replace one or more documents. Re-upserting an existing
1321
+ * id overwrites its vector, content, and metadata.
1322
+ */
1323
+ upsert(doc: VectorDocument<M> | VectorDocument<M>[]): Promise<void>;
1324
+ /**
1325
+ * Search for the nearest vectors. Results are ordered by descending
1326
+ * score, capped at `options.topK` (default 5), and filtered by
1327
+ * `options.filter` / `options.minScore` if provided.
1328
+ */
1329
+ query(options: VectorQueryOptions): Promise<VectorSearchHit<M>[]>;
1330
+ /** Remove documents by id. Missing ids are silently ignored. */
1331
+ delete(id: string | string[]): Promise<void>;
1332
+ /** Clear every document from the store. Mostly for tests and admin tools. */
1333
+ deleteAll(): Promise<void>;
1334
+ /** Optional count — not every backend supports it cheaply. */
1335
+ count?(): Promise<number>;
1336
+ }
1337
+ /** Input to `RagService.index`. */
1338
+ interface RagIndexInput<M extends Record<string, unknown> = Record<string, unknown>> {
1339
+ id: string;
1340
+ content: string;
1341
+ metadata?: M;
1342
+ }
1343
+ /** Options for `RagService.search` / `RagService.augmentChatInput`. */
1344
+ interface RagSearchOptions {
1345
+ /** Maximum number of documents to retrieve. Defaults to 5. */
1346
+ topK?: number;
1347
+ /** Metadata equality filter forwarded to the underlying store. */
1348
+ filter?: Record<string, unknown>;
1349
+ /** Drop hits whose similarity score falls below this threshold. */
1350
+ minScore?: number;
1351
+ }
1352
+ /** Options for `RagService.augmentChatInput`. */
1353
+ interface RagAugmentOptions extends RagSearchOptions {
1354
+ /**
1355
+ * Template for the retrieved-context system message. `{documents}`
1356
+ * is replaced with the concatenated document contents. If omitted,
1357
+ * a sensible default is used.
1358
+ */
1359
+ systemTemplate?: string;
1360
+ /**
1361
+ * When true, prepend the context as a NEW system message. When false
1362
+ * (the default), merge into the first existing system message or
1363
+ * prepend if none exists. The merge path avoids producing chat
1364
+ * histories with two competing system prompts, which confuses models.
1365
+ */
1366
+ asSeparateSystemMessage?: boolean;
1367
+ }
1368
+ //#endregion
1369
+ //#region src/rag/in-memory.d.ts
1370
+ /**
1371
+ * Zero-dependency in-memory vector store.
1372
+ *
1373
+ * Backed by a plain `Map<string, VectorDocument>` with a linear-scan
1374
+ * cosine-similarity search. Perfect for tests, prototypes, CLI tools,
1375
+ * and any project with a bounded corpus (roughly < 10k documents
1376
+ * before the scan starts taking more than a handful of milliseconds).
1377
+ *
1378
+ * For production workloads with larger corpora, swap in the pgvector,
1379
+ * Qdrant, or Pinecone store — the `VectorStore` interface is the same,
1380
+ * so services that consume `VECTOR_STORE` don't need to change.
1381
+ *
1382
+ * @example
1383
+ * ```ts
1384
+ * import { InMemoryVectorStore, VECTOR_STORE } from '@forinda/kickjs-ai'
1385
+ *
1386
+ * container.registerInstance(VECTOR_STORE, new InMemoryVectorStore())
1387
+ * ```
1388
+ *
1389
+ * The class is entirely synchronous under the hood but wraps each
1390
+ * method in a Promise so it matches the async interface every other
1391
+ * backend implements. This keeps the calling code uniform regardless
1392
+ * of which backend is wired in.
1393
+ */
1394
+ declare class InMemoryVectorStore<M extends Record<string, unknown> = Record<string, unknown>> implements VectorStore<M> {
1395
+ readonly name = "in-memory";
1396
+ private readonly docs;
1397
+ upsert(doc: VectorDocument<M> | VectorDocument<M>[]): Promise<void>;
1398
+ query(options: VectorQueryOptions): Promise<VectorSearchHit<M>[]>;
1399
+ delete(id: string | string[]): Promise<void>;
1400
+ deleteAll(): Promise<void>;
1401
+ count(): Promise<number>;
1402
+ }
1403
+ /**
1404
+ * Cosine similarity between two vectors. Returns a value in [-1, 1]
1405
+ * where 1 means identical direction, 0 means orthogonal, -1 means
1406
+ * opposite. The function is symmetric and scale-invariant.
1407
+ *
1408
+ * Returns 0 for length mismatches or zero-magnitude vectors rather
1409
+ * than throwing — callers get a useless hit they can filter out via
1410
+ * `minScore`, but the store doesn't crash on bad input.
1411
+ */
1412
+ declare function cosineSimilarity(a: number[], b: number[]): number;
1413
+ //#endregion
1414
+ //#region src/rag/pgvector.d.ts
1415
+ /**
1416
+ * Minimal SQL executor contract.
1417
+ *
1418
+ * Covers everything `PgVectorStore` needs from a Postgres client: a
1419
+ * single `query(text, params)` call that returns rows. The shape is
1420
+ * deliberately narrower than node-postgres's `Pool.query` so it can
1421
+ * be satisfied by any of:
1422
+ *
1423
+ * - `pg.Pool` / `pg.Client` (node-postgres)
1424
+ * - `drizzle.$client` (the underlying pool on the Drizzle adapter)
1425
+ * - `postgres.js` (by @porsager, via a small adapter)
1426
+ * - A unit-test fake that records calls
1427
+ *
1428
+ * Users who already have a Postgres connection somewhere in their
1429
+ * app can hand it to the store without installing `pg` twice.
1430
+ */
1431
+ interface SqlExecutor {
1432
+ query<T = unknown>(text: string, params?: unknown[]): Promise<{
1433
+ rows: T[];
1434
+ }>;
1435
+ }
1436
+ /**
1437
+ * Options for `PgVectorStore`.
1438
+ *
1439
+ * Exactly one of `client` or `connectionString` must be provided. If
1440
+ * `connectionString` is set, the store dynamically imports `pg` on
1441
+ * first use and creates its own pool; `pg` must be installed as a
1442
+ * peer dep in that case. If `client` is set, the store uses the
1443
+ * supplied executor and never touches `pg` directly.
1444
+ */
1445
+ interface PgVectorStoreOptions {
1446
+ /** Pre-made SQL executor — any object with a `query(text, params)` method. */
1447
+ client?: SqlExecutor;
1448
+ /** Connection string used to create a pg.Pool if `client` is not provided. */
1449
+ connectionString?: string;
1450
+ /** Vector dimensionality. Must match the embedding model. Required. */
1451
+ dimensions: number;
1452
+ /** Postgres schema. Defaults to `'public'`. */
1453
+ schema?: string;
1454
+ /** Table name. Defaults to `'kickjs_embeddings'`. */
1455
+ table?: string;
1456
+ /**
1457
+ * Skip the first-use schema bootstrap (`CREATE EXTENSION IF NOT
1458
+ * EXISTS vector; CREATE TABLE IF NOT EXISTS ...`). Set this to true
1459
+ * if you manage migrations manually or run in a read-only role.
1460
+ */
1461
+ skipSetup?: boolean;
1462
+ /**
1463
+ * Provider name to expose on `store.name`. Defaults to `'pgvector'`
1464
+ * but can be overridden to label a Postgres-compatible backend
1465
+ * (e.g. `'timescale'`, `'cockroach-vector'`).
1466
+ */
1467
+ name?: string;
1468
+ }
1469
+ /**
1470
+ * pgvector-backed `VectorStore` implementation.
1471
+ *
1472
+ * Stores documents in a single table with a `vector` column indexed
1473
+ * via pgvector's native operators. Cosine similarity is the scoring
1474
+ * metric — computed as `1 - (vector <=> query_vector)` because the
1475
+ * `<=>` operator returns cosine DISTANCE, not similarity.
1476
+ *
1477
+ * ### Lazy initialization
1478
+ *
1479
+ * The Postgres pool and schema are set up on first use, not in the
1480
+ * constructor. That keeps the constructor synchronous, matches the
1481
+ * rest of the `VectorStore` implementations, and lets users construct
1482
+ * the store inside a module's `register(container)` method without
1483
+ * awaiting inside DI resolution.
1484
+ *
1485
+ * ### Schema
1486
+ *
1487
+ * The default schema is:
1488
+ *
1489
+ * ```sql
1490
+ * CREATE EXTENSION IF NOT EXISTS vector;
1491
+ * CREATE TABLE IF NOT EXISTS <schema>.<table> (
1492
+ * id TEXT PRIMARY KEY,
1493
+ * content TEXT NOT NULL,
1494
+ * vector vector(<dimensions>) NOT NULL,
1495
+ * metadata JSONB
1496
+ * );
1497
+ * ```
1498
+ *
1499
+ * No index is created by default — pgvector's IVFFlat and HNSW
1500
+ * indexes benefit from being created AFTER data is loaded, and the
1501
+ * right choice depends on corpus size. Users should add an index
1502
+ * themselves in a real migration when they're ready:
1503
+ *
1504
+ * ```sql
1505
+ * CREATE INDEX ON kickjs_embeddings
1506
+ * USING hnsw (vector vector_cosine_ops);
1507
+ * ```
1508
+ *
1509
+ * ### Metadata filtering
1510
+ *
1511
+ * Filters are translated to JSONB WHERE clauses:
1512
+ * - Scalar: `metadata->>'key' = $N` (coerced to text)
1513
+ * - Array: `metadata->>'key' = ANY($N::text[])`
1514
+ *
1515
+ * Keys are validated against `[a-zA-Z0-9_.-]+` before being
1516
+ * interpolated into SQL — anything else throws. Values go through
1517
+ * parameter binding, so SQL injection via values is not possible.
1518
+ *
1519
+ * @example
1520
+ * ```ts
1521
+ * import { Pool } from 'pg'
1522
+ * import { getEnv } from '@forinda/kickjs'
1523
+ * import { AiAdapter, PgVectorStore, VECTOR_STORE } from '@forinda/kickjs-ai'
1524
+ *
1525
+ * const pool = new Pool({ connectionString: getEnv('DATABASE_URL') })
1526
+ * const store = new PgVectorStore({ client: pool, dimensions: 1536 })
1527
+ *
1528
+ * export const app = await bootstrap({
1529
+ * modules,
1530
+ * adapters: [new AiAdapter({ provider })],
1531
+ * plugins: [
1532
+ * {
1533
+ * name: 'pgvector',
1534
+ * register: (container) => {
1535
+ * container.registerInstance(VECTOR_STORE, store)
1536
+ * },
1537
+ * },
1538
+ * ],
1539
+ * })
1540
+ * ```
1541
+ */
1542
+ declare class PgVectorStore<M extends Record<string, unknown> = Record<string, unknown>> implements VectorStore<M> {
1543
+ readonly name: string;
1544
+ private readonly dimensions;
1545
+ private readonly schema;
1546
+ private readonly table;
1547
+ private readonly fullyQualified;
1548
+ private readonly skipSetup;
1549
+ private client;
1550
+ private readonly connectionString;
1551
+ private setupPromise;
1552
+ constructor(options: PgVectorStoreOptions);
1553
+ upsert(doc: VectorDocument<M> | VectorDocument<M>[]): Promise<void>;
1554
+ query(options: VectorQueryOptions): Promise<VectorSearchHit<M>[]>;
1555
+ delete(id: string | string[]): Promise<void>;
1556
+ deleteAll(): Promise<void>;
1557
+ count(): Promise<number>;
1558
+ /**
1559
+ * Release the internal connection pool, if the store created one.
1560
+ *
1561
+ * If the caller supplied their own `client`, this is a no-op —
1562
+ * lifecycle of a user-owned pool stays with the user. This method
1563
+ * is intentionally not on the `VectorStore` interface because most
1564
+ * backends don't need explicit teardown; services that want to
1565
+ * clean up call it via an adapter.shutdown hook.
1566
+ */
1567
+ close(): Promise<void>;
1568
+ /**
1569
+ * Ensure the pool exists and the schema is set up. Called by every
1570
+ * public method before running any SQL. The setup migration runs
1571
+ * at most once per store instance — subsequent calls reuse the
1572
+ * cached promise.
1573
+ */
1574
+ private ensureReady;
1575
+ /**
1576
+ * Dynamically import `pg` and create a Pool from the configured
1577
+ * connection string. Imported lazily so users who supply their own
1578
+ * `client` never force `pg` to be installed.
1579
+ *
1580
+ * Throws a friendly error if `pg` is not installed — the same
1581
+ * graceful-degradation pattern the CLI uses for optional packages.
1582
+ */
1583
+ private createPoolFromConnectionString;
1584
+ /**
1585
+ * Run the schema bootstrap: enable the pgvector extension, create
1586
+ * the embeddings table if it doesn't exist, and nothing else.
1587
+ *
1588
+ * Indexes are deliberately not created here — pgvector's IVFFlat
1589
+ * and HNSW indexes perform best when created after data is loaded,
1590
+ * and the right choice depends on corpus size. Users should add
1591
+ * their index in a real migration when they're ready.
1592
+ */
1593
+ private runSchemaSetup;
1594
+ }
1595
+ /**
1596
+ * Serialize a JS number array to pgvector's wire format: a string
1597
+ * like `'[0.1,0.2,0.3]'`. The `pg` driver doesn't know about vectors
1598
+ * so we have to stringify ourselves and cast with `::vector` in the
1599
+ * SQL. Non-finite values become `0` rather than `null` or `NaN` —
1600
+ * pgvector rejects non-finite values in inserts.
1601
+ */
1602
+ declare function toPgVector(vector: number[]): string;
1603
+ /**
1604
+ * Translate a metadata filter into a WHERE clause + bound parameters.
1605
+ *
1606
+ * - Scalar values become `metadata->>'key' = $N`
1607
+ * - Array values become `metadata->>'key' = ANY($N::text[])`
1608
+ *
1609
+ * Keys must match `[a-zA-Z0-9_.-]+` — anything else is rejected. All
1610
+ * values are coerced to string before binding, because `->>` returns
1611
+ * text. Callers that need numeric range queries should issue raw SQL
1612
+ * via their own executor; this helper covers the equality-case 90%.
1613
+ *
1614
+ * Exported for unit testing.
1615
+ */
1616
+ declare function buildWhereClause(filter: Record<string, unknown> | undefined, startAt: number): {
1617
+ whereSql: string;
1618
+ whereParams: unknown[];
1619
+ };
1620
+ //#endregion
1621
+ //#region src/rag/qdrant.d.ts
1622
+ /**
1623
+ * Options for `QdrantVectorStore`.
1624
+ *
1625
+ * Qdrant exposes a REST API under `/collections/{name}` — this store
1626
+ * talks to it directly with `fetch`, so no client SDK is needed. A
1627
+ * bearer `apiKey` is optional because self-hosted Qdrant instances
1628
+ * often run without auth; managed Qdrant Cloud always requires one.
1629
+ */
1630
+ interface QdrantVectorStoreOptions {
1631
+ /** Base URL of the Qdrant HTTP API. Defaults to `http://localhost:6333`. */
1632
+ url?: string;
1633
+ /** API key sent as `api-key` header. Optional for local/self-hosted. */
1634
+ apiKey?: string;
1635
+ /** Collection name. Required — Qdrant does not have a default collection. */
1636
+ collection: string;
1637
+ /** Vector dimensionality. Must match the embedding model. Required. */
1638
+ dimensions: number;
1639
+ /**
1640
+ * Distance metric for the collection on first create. Qdrant supports
1641
+ * `Cosine`, `Dot`, `Euclid`, and `Manhattan`. Defaults to `'Cosine'`
1642
+ * since that's what every OpenAI/Anthropic-compatible embedding
1643
+ * model ships.
1644
+ */
1645
+ distance?: 'Cosine' | 'Dot' | 'Euclid' | 'Manhattan';
1646
+ /**
1647
+ * Skip the first-use collection bootstrap. Turn this on if the
1648
+ * collection is managed by your infra team or provisioned via
1649
+ * Terraform, and the runtime role doesn't have create permission.
1650
+ */
1651
+ skipSetup?: boolean;
1652
+ /** Provider name override. Defaults to `'qdrant'`. */
1653
+ name?: string;
1654
+ }
1655
+ /**
1656
+ * Qdrant-backed `VectorStore` implementation.
1657
+ *
1658
+ * Qdrant stores vectors as "points" inside a named "collection". Each
1659
+ * point has an id, a dense vector, and an arbitrary JSON "payload" —
1660
+ * the store uses the payload to carry both the original `content`
1661
+ * string (so RAG retrieval can feed text back to the LLM) and the
1662
+ * `metadata` record.
1663
+ *
1664
+ * ### Filtering
1665
+ *
1666
+ * The framework's equality-map filter (`{ key: value }` or
1667
+ * `{ key: [v1, v2] }`) is translated into Qdrant's `filter.must`
1668
+ * conditions against `payload.metadata.<key>`. Scalar values become
1669
+ * `match: { value }`, arrays become `match: { any: [...] }`. Users
1670
+ * who need richer queries (nested, range, should/must_not) can bypass
1671
+ * this by extending the class, but equality covers the 90% case.
1672
+ *
1673
+ * ### Lazy collection creation
1674
+ *
1675
+ * On first write, the store calls `PUT /collections/{name}` with
1676
+ * `vectors: { size, distance }` — idempotent, so it's safe to run on
1677
+ * every boot. Pass `skipSetup: true` if your cluster is provisioned
1678
+ * externally and the runtime API key doesn't have create permission.
1679
+ *
1680
+ * @example
1681
+ * ```ts
1682
+ * import { bootstrap, getEnv } from '@forinda/kickjs'
1683
+ * import { AiAdapter, QdrantVectorStore, VECTOR_STORE } from '@forinda/kickjs-ai'
1684
+ *
1685
+ * const store = new QdrantVectorStore({
1686
+ * url: getEnv('QDRANT_URL'),
1687
+ * apiKey: getEnv('QDRANT_API_KEY'),
1688
+ * collection: 'docs',
1689
+ * dimensions: 1536,
1690
+ * })
1691
+ *
1692
+ * export const app = await bootstrap({
1693
+ * modules,
1694
+ * adapters: [new AiAdapter({ provider })],
1695
+ * plugins: [
1696
+ * {
1697
+ * name: 'qdrant',
1698
+ * register: (container) => {
1699
+ * container.registerInstance(VECTOR_STORE, store)
1700
+ * },
1701
+ * },
1702
+ * ],
1703
+ * })
1704
+ * ```
1705
+ */
1706
+ declare class QdrantVectorStore<M extends Record<string, unknown> = Record<string, unknown>> implements VectorStore<M> {
1707
+ readonly name: string;
1708
+ private readonly url;
1709
+ private readonly collection;
1710
+ private readonly dimensions;
1711
+ private readonly distance;
1712
+ private readonly headers;
1713
+ private readonly skipSetup;
1714
+ /**
1715
+ * Cached bootstrap promise. The first method call triggers collection
1716
+ * creation; every subsequent call awaits the same promise so the
1717
+ * check happens exactly once per process. On failure we clear the
1718
+ * cache so the next call can retry (networks blink, DNS flaps).
1719
+ */
1720
+ private setupPromise;
1721
+ constructor(options: QdrantVectorStoreOptions);
1722
+ upsert(doc: VectorDocument<M> | VectorDocument<M>[]): Promise<void>;
1723
+ query(options: VectorQueryOptions): Promise<VectorSearchHit<M>[]>;
1724
+ delete(id: string | string[]): Promise<void>;
1725
+ deleteAll(): Promise<void>;
1726
+ count(): Promise<number>;
1727
+ /**
1728
+ * Thin wrapper around `fetch` that applies the shared headers, JSON
1729
+ * encodes the body, and maps non-2xx responses to `Error` instances
1730
+ * with the response body attached for debugging. Matches the shape
1731
+ * used by `providers/base.ts`, kept local here so the RAG module has
1732
+ * no dependency on the provider internals.
1733
+ */
1734
+ private request;
1735
+ /**
1736
+ * Create the collection on first use. The `PUT /collections/{name}`
1737
+ * endpoint is idempotent — calling it on an existing collection is a
1738
+ * no-op with status 200. We cache the promise so concurrent callers
1739
+ * share the same in-flight request and every subsequent call resolves
1740
+ * immediately.
1741
+ */
1742
+ private ensureCollection;
1743
+ private runSetup;
1744
+ }
1745
+ /**
1746
+ * Translate the framework's equality-map filter into Qdrant's
1747
+ * `must` condition format.
1748
+ *
1749
+ * Scalars become `{ key, match: { value } }`. Arrays become
1750
+ * `{ key, match: { any: [...] } }`. Keys are interpreted as paths into
1751
+ * `payload.metadata`, matching how `upsert` nests the metadata record.
1752
+ *
1753
+ * Exported so tests (and future richer filter builders) can verify the
1754
+ * translation without going through a live Qdrant instance.
1755
+ */
1756
+ declare function buildQdrantFilter(filter: Record<string, unknown>): {
1757
+ must: Array<Record<string, unknown>>;
1758
+ };
1759
+ //#endregion
1760
+ //#region src/rag/pinecone.d.ts
1761
+ /**
1762
+ * Options for `PineconeVectorStore`.
1763
+ *
1764
+ * Unlike Qdrant, Pinecone does not have a "create collection on first
1765
+ * use" endpoint that's cheap to call — the index must be provisioned
1766
+ * separately (via the Pinecone dashboard, API, or Terraform) before
1767
+ * the store can use it. Every Pinecone index has its own hostname,
1768
+ * which the SDK normally looks up; this store requires the caller to
1769
+ * pass it directly via `indexHost` so there's zero runtime dependency
1770
+ * on the Pinecone client.
1771
+ */
1772
+ interface PineconeVectorStoreOptions {
1773
+ /** Required API key, sent as `Api-Key` header. */
1774
+ apiKey: string;
1775
+ /**
1776
+ * Fully qualified hostname for the Pinecone index, e.g.
1777
+ * `my-index-abcdef1.svc.us-east-1-aws.pinecone.io`. Find it in
1778
+ * the Pinecone dashboard or via the `describe_index` API. The
1779
+ * scheme is optional — the store adds `https://` if it's missing.
1780
+ */
1781
+ indexHost: string;
1782
+ /**
1783
+ * Namespace for all operations. Pinecone partitions indexes with
1784
+ * namespaces; omitting this uses the default (empty) namespace.
1785
+ * Most multi-tenant apps use one namespace per tenant.
1786
+ */
1787
+ namespace?: string;
1788
+ /** Vector dimensionality. Required — used to validate upsert shapes. */
1789
+ dimensions: number;
1790
+ /** Provider name override. Defaults to `'pinecone'`. */
1791
+ name?: string;
1792
+ }
1793
+ /**
1794
+ * Pinecone-backed `VectorStore` implementation.
1795
+ *
1796
+ * Pinecone stores vectors with a flat id, a dense vector, and an
1797
+ * arbitrary metadata object. Like Qdrant the store uses metadata to
1798
+ * carry both the original `content` (for RAG retrieval) and the
1799
+ * application's own metadata fields — they're merged into one
1800
+ * Pinecone metadata record at write time and split back apart at
1801
+ * read time.
1802
+ *
1803
+ * ### Filtering
1804
+ *
1805
+ * Pinecone has a native filter DSL that looks almost identical to
1806
+ * MongoDB's — `{ key: { $eq: value } }`, `{ key: { $in: [...] } }`,
1807
+ * etc. The framework's equality-map filter is translated directly:
1808
+ * scalars become `$eq` and arrays become `$in`. Users who need the
1809
+ * full DSL (range, $ne, $or) can pass a raw Pinecone filter through
1810
+ * the same `filter` field — the translator is a no-op when the keys
1811
+ * start with `$`, so advanced filters pass through unchanged.
1812
+ *
1813
+ * ### Index provisioning
1814
+ *
1815
+ * Pinecone indexes must be created out-of-band. This store does NOT
1816
+ * provision indexes automatically — the dimensionality, metric, and
1817
+ * pod type are infrastructure decisions that should live in
1818
+ * Terraform or the Pinecone dashboard, not in runtime code.
1819
+ *
1820
+ * @example
1821
+ * ```ts
1822
+ * import { bootstrap, getEnv } from '@forinda/kickjs'
1823
+ * import { AiAdapter, PineconeVectorStore, VECTOR_STORE } from '@forinda/kickjs-ai'
1824
+ *
1825
+ * const store = new PineconeVectorStore({
1826
+ * apiKey: getEnv('PINECONE_API_KEY'),
1827
+ * indexHost: getEnv('PINECONE_INDEX_HOST'),
1828
+ * dimensions: 1536,
1829
+ * namespace: 'docs',
1830
+ * })
1831
+ *
1832
+ * export const app = await bootstrap({
1833
+ * modules,
1834
+ * adapters: [new AiAdapter({ provider })],
1835
+ * plugins: [
1836
+ * {
1837
+ * name: 'pinecone',
1838
+ * register: (container) => {
1839
+ * container.registerInstance(VECTOR_STORE, store)
1840
+ * },
1841
+ * },
1842
+ * ],
1843
+ * })
1844
+ * ```
1845
+ */
1846
+ declare class PineconeVectorStore<M extends Record<string, unknown> = Record<string, unknown>> implements VectorStore<M> {
1847
+ readonly name: string;
1848
+ private readonly baseURL;
1849
+ private readonly namespace;
1850
+ private readonly dimensions;
1851
+ private readonly headers;
1852
+ constructor(options: PineconeVectorStoreOptions);
1853
+ upsert(doc: VectorDocument<M> | VectorDocument<M>[]): Promise<void>;
1854
+ query(options: VectorQueryOptions): Promise<VectorSearchHit<M>[]>;
1855
+ delete(id: string | string[]): Promise<void>;
1856
+ deleteAll(): Promise<void>;
1857
+ count(): Promise<number>;
1858
+ /**
1859
+ * POST a JSON body to the Pinecone data-plane and return the parsed
1860
+ * JSON response. Every Pinecone data-plane endpoint uses POST even
1861
+ * for reads (`/query`, `/describe_index_stats`), so the helper
1862
+ * doesn't bother parameterizing the method.
1863
+ */
1864
+ private request;
1865
+ }
1866
+ /**
1867
+ * Translate the framework's equality-map filter into Pinecone's
1868
+ * MongoDB-style filter DSL.
1869
+ *
1870
+ * Rules:
1871
+ * - Scalar value → `{ key: { $eq: value } }`
1872
+ * - Array value → `{ key: { $in: [...] } }`
1873
+ * - Key that starts with $ → passed through untouched, letting
1874
+ * callers hand-craft `{ $or: [...] }` or range conditions
1875
+ * without the translator mangling them
1876
+ * - Value already shaped like `{ $eq, $in, $gt, ... }` → passed
1877
+ * through untouched for the same reason
1878
+ *
1879
+ * Exported so tests can verify the translation offline.
1880
+ */
1881
+ declare function buildPineconeFilter(filter: Record<string, unknown>): Record<string, unknown>;
1882
+ //#endregion
1883
+ //#region src/rag/rag-service.d.ts
1884
+ /**
1885
+ * High-level RAG helper that ties an `AiProvider` (for embeddings)
1886
+ * to a `VectorStore` (for retrieval) and produces the three operations
1887
+ * every RAG-powered service needs: index documents, search by query,
1888
+ * and augment a chat input with retrieved context.
1889
+ *
1890
+ * The service itself is a thin orchestrator — all the storage and
1891
+ * model calls go through the injected interfaces, so swapping
1892
+ * backends (in-memory → pgvector, OpenAI → Ollama) is a DI binding
1893
+ * change, not a code change.
1894
+ *
1895
+ * @example
1896
+ * ```ts
1897
+ * import { Service, Autowired, Inject } from '@forinda/kickjs'
1898
+ * import { AI_PROVIDER, VECTOR_STORE, RagService } from '@forinda/kickjs-ai'
1899
+ * import type { AiProvider, VectorStore } from '@forinda/kickjs-ai'
1900
+ *
1901
+ * @Service()
1902
+ * class DocsService {
1903
+ * private readonly rag: RagService
1904
+ *
1905
+ * constructor(
1906
+ * @Inject(AI_PROVIDER) provider: AiProvider,
1907
+ * @Inject(VECTOR_STORE) store: VectorStore,
1908
+ * ) {
1909
+ * this.rag = new RagService(provider, store)
1910
+ * }
1911
+ *
1912
+ * async ingest(articles: Array<{ id: string; body: string }>) {
1913
+ * await this.rag.index(articles.map((a) => ({ id: a.id, content: a.body })))
1914
+ * }
1915
+ *
1916
+ * async ask(question: string) {
1917
+ * const input = await this.rag.augmentChatInput(
1918
+ * { messages: [{ role: 'user', content: question }] },
1919
+ * question,
1920
+ * { topK: 3 },
1921
+ * )
1922
+ * const res = await provider.chat(input)
1923
+ * return res.content
1924
+ * }
1925
+ * }
1926
+ * ```
1927
+ */
1928
+ declare class RagService<M extends Record<string, unknown> = Record<string, unknown>> {
1929
+ private readonly provider;
1930
+ private readonly store;
1931
+ constructor(provider: AiProvider, store: VectorStore<M>);
1932
+ /** Underlying provider — exposed for services that want to reuse it for chat. */
1933
+ getProvider(): AiProvider;
1934
+ /** Underlying store — useful for admin tools that want raw access. */
1935
+ getStore(): VectorStore<M>;
1936
+ /**
1937
+ * Index a batch of documents: embed each one's content via the
1938
+ * provider, then upsert into the store. Embedding happens in a
1939
+ * single batched call, which is both faster and cheaper than one
1940
+ * call per document for most providers.
1941
+ *
1942
+ * Documents with empty content are skipped rather than failing the
1943
+ * whole batch — the store can't meaningfully retrieve empty strings
1944
+ * and silently dropping them matches what users usually expect when
1945
+ * a content field turns out to be blank.
1946
+ */
1947
+ index(docs: RagIndexInput<M>[]): Promise<void>;
1948
+ /**
1949
+ * Search the store for documents relevant to a natural-language
1950
+ * query. Embeds the query once, then delegates to the store's
1951
+ * `query` method with the resolved vector.
1952
+ */
1953
+ search(query: string, options?: RagSearchOptions): Promise<VectorSearchHit<M>[]>;
1954
+ /**
1955
+ * Retrieve relevant documents for a query and inject them into a
1956
+ * `ChatInput` as a system message. Returns a new input — the
1957
+ * original is not mutated.
1958
+ *
1959
+ * Two injection modes:
1960
+ * - Merge (default): prepend the context to the first existing
1961
+ * system message if one exists, otherwise add a new one. Avoids
1962
+ * producing chat histories with competing system prompts.
1963
+ * - Separate (`asSeparateSystemMessage: true`): always insert a
1964
+ * new system message at the start. Useful when the existing
1965
+ * system prompt is small and you want to keep roles distinct.
1966
+ *
1967
+ * If no documents are retrieved, the input is returned unchanged.
1968
+ */
1969
+ augmentChatInput(input: ChatInput, query: string, options?: RagAugmentOptions): Promise<ChatInput>;
1970
+ }
1971
+ //#endregion
1972
+ export { AI_PROVIDER, AI_TOOL_METADATA, AiAdapter, type AiAdapterOptions, type AiProvider, AiTool, type AiToolDefinition, type AiToolOptions, AnthropicProvider, type AnthropicProviderOptions, type ChatChunk, type ChatInput, type ChatMemory, type ChatMessage, type ChatOptions, type ChatResponse, type ChatToolDefinition, type CreatePromptOptions, type EmbedInput, InMemoryChatMemory, InMemoryVectorStore, OpenAIProvider, type OpenAIProviderOptions, PgVectorStore, type PgVectorStoreOptions, PineconeVectorStore, type PineconeVectorStoreOptions, Prompt, ProviderError, QdrantVectorStore, type QdrantVectorStoreOptions, type RagAugmentOptions, type RagIndexInput, type RagSearchOptions, RagService, type RunAgentOptions, type RunAgentResult, type RunAgentWithMemoryOptions, SlidingWindowChatMemory, type SlidingWindowChatMemoryOptions, type SqlExecutor, type ToolCallInput, type ToolCallResponse, VECTOR_STORE, type VectorDocument, type VectorQueryOptions, type VectorSearchHit, type VectorStore, buildPineconeFilter, buildQdrantFilter, buildWhereClause, cosineSimilarity, createPrompt, getAiToolMeta, isAiTool, toPgVector };
1973
+ //# sourceMappingURL=index.d.mts.map