@theokit/sdk 1.7.0 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/bin/init-claude.mjs +34 -0
- package/claude-template/AGENTS.md +139 -0
- package/claude-template/CLAUDE.md +51 -0
- package/claude-template/dot-claude/rules/theokit-conventions.md +33 -0
- package/claude-template/dot-claude/settings.json +16 -0
- package/claude-template/dot-claude/skills/theokit-agent-core/SKILL.md +209 -0
- package/claude-template/dot-claude/skills/theokit-budget/SKILL.md +176 -0
- package/claude-template/dot-claude/skills/theokit-config/SKILL.md +139 -0
- package/claude-template/dot-claude/skills/theokit-cron/SKILL.md +148 -0
- package/claude-template/dot-claude/skills/theokit-di/SKILL.md +233 -0
- package/claude-template/dot-claude/skills/theokit-di-agent/SKILL.md +294 -0
- package/claude-template/dot-claude/skills/theokit-errors/SKILL.md +172 -0
- package/claude-template/dot-claude/skills/theokit-eval/SKILL.md +144 -0
- package/claude-template/dot-claude/skills/theokit-gateways/SKILL.md +209 -0
- package/claude-template/dot-claude/skills/theokit-memory/SKILL.md +176 -0
- package/claude-template/dot-claude/skills/theokit-rag/SKILL.md +226 -0
- package/claude-template/dot-claude/skills/theokit-streaming/SKILL.md +156 -0
- package/claude-template/dot-claude/skills/theokit-subscriptions/SKILL.md +148 -0
- package/claude-template/dot-claude/skills/theokit-tools/SKILL.md +170 -0
- package/claude-template/dot-claude/skills/theokit-workflows/SKILL.md +218 -0
- package/package.json +3 -1
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
---
|
|
2
|
+
user-invocable: false
|
|
3
|
+
description: Gateway architecture and 10 platform adapters (Telegram, Slack, Discord, WhatsApp, Teams, Email, SMS, Mattermost, LINE, Matrix).
|
|
4
|
+
paths:
|
|
5
|
+
- "**/*gateway*"
|
|
6
|
+
- "**/*Gateway*"
|
|
7
|
+
- "**/*telegram*"
|
|
8
|
+
- "**/*slack*"
|
|
9
|
+
- "**/*discord*"
|
|
10
|
+
- "**/*whatsapp*"
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
# TheoKit SDK -- Gateways
|
|
14
|
+
|
|
15
|
+
Quick reference for the gateway architecture and all 10 platform adapters.
|
|
16
|
+
|
|
17
|
+
## Architecture
|
|
18
|
+
|
|
19
|
+
The gateway system uses a base adapter pattern. Each platform adapter extends `BasePlatformAdapter` from `@theokit/gateway`. The core package defines:
|
|
20
|
+
|
|
21
|
+
- `BasePlatformAdapter` -- abstract class with `connect`, `disconnect`, `sendMessage`, `onInbound`.
|
|
22
|
+
- `GatewayMessageEvent` -- discriminated union of all platform inbound events (keyed by `platform`).
|
|
23
|
+
- `OutboundMessage` -- what `sendMessage` accepts.
|
|
24
|
+
- `SendResult` -- `{ ok, messageId?, error? }`.
|
|
25
|
+
- Session router for multi-platform agent dispatch.
|
|
26
|
+
|
|
27
|
+
## BasePlatformAdapter
|
|
28
|
+
|
|
29
|
+
```typescript
|
|
30
|
+
abstract class BasePlatformAdapter {
|
|
31
|
+
abstract readonly platform: PlatformName;
|
|
32
|
+
abstract connect(): Promise<boolean>;
|
|
33
|
+
abstract disconnect(): Promise<void>;
|
|
34
|
+
abstract sendMessage(out: OutboundMessage): Promise<SendResult>;
|
|
35
|
+
abstract onInbound(handler: (event: GatewayMessageEvent) => Promise<void>): () => void;
|
|
36
|
+
async startTyping(channelId: string): Promise<void> { /* noop */ }
|
|
37
|
+
async stopTyping(channelId: string): Promise<void> { /* noop */ }
|
|
38
|
+
}
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## GatewayMessageEvent (common fields)
|
|
42
|
+
|
|
43
|
+
```typescript
|
|
44
|
+
interface BaseMessageEvent {
|
|
45
|
+
id: string;
|
|
46
|
+
platform: PlatformName;
|
|
47
|
+
sender: { id: string; username?: string; displayName?: string };
|
|
48
|
+
channel: { id: string; type: "dm" | "group" | "thread"; topicId?: string };
|
|
49
|
+
text: string;
|
|
50
|
+
receivedAt: number;
|
|
51
|
+
replyTo?: string;
|
|
52
|
+
}
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## PlatformName
|
|
56
|
+
|
|
57
|
+
```typescript
|
|
58
|
+
type PlatformName =
|
|
59
|
+
| "telegram" | "discord" | "slack" | "whatsapp" | "teams"
|
|
60
|
+
| "email" | "sms" | "mattermost" | "line" | "matrix";
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Platform adapters
|
|
64
|
+
|
|
65
|
+
### 1. Telegram (`@theokit/gateway-telegram`)
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
pnpm add @theokit/gateway-telegram @theokit/gateway grammy
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
```typescript
|
|
72
|
+
import { TelegramAdapter } from "@theokit/gateway-telegram";
|
|
73
|
+
|
|
74
|
+
const adapter = new TelegramAdapter({ botToken: process.env.TELEGRAM_BOT_TOKEN! });
|
|
75
|
+
await adapter.connect();
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Platform-specific: `event.telegram.chatId`, `event.telegram.messageId`, `event.telegram.threadId?`.
|
|
79
|
+
|
|
80
|
+
### 2. Discord (`@theokit/gateway-discord`)
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
pnpm add @theokit/gateway-discord @theokit/gateway discord.js
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
```typescript
|
|
87
|
+
import { DiscordAdapter } from "@theokit/gateway-discord";
|
|
88
|
+
|
|
89
|
+
const adapter = new DiscordAdapter({ botToken: process.env.DISCORD_BOT_TOKEN! });
|
|
90
|
+
await adapter.connect();
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
Platform-specific: `event.discord.guildId`, `event.discord.channelId`, `event.discord.messageId`.
|
|
94
|
+
|
|
95
|
+
### 3. Slack (`@theokit/gateway-slack`)
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
pnpm add @theokit/gateway-slack @theokit/gateway @slack/bolt @slack/web-api
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
```typescript
|
|
102
|
+
import { SlackAdapter } from "@theokit/gateway-slack";
|
|
103
|
+
|
|
104
|
+
const adapter = new SlackAdapter({
|
|
105
|
+
botToken: process.env.SLACK_BOT_TOKEN!,
|
|
106
|
+
appToken: process.env.SLACK_APP_TOKEN!,
|
|
107
|
+
requireMention: true, // default; public channels need @bot mention
|
|
108
|
+
});
|
|
109
|
+
await adapter.connect();
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
Platform-specific: `event.slack.teamId`, `event.slack.channelId`, `event.slack.ts`, `event.slack.threadTs?`.
|
|
113
|
+
|
|
114
|
+
### 4. WhatsApp (`@theokit/gateway-whatsapp`)
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
pnpm add @theokit/gateway-whatsapp @theokit/gateway
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
Two backends: `"cloud"` (Meta Cloud API) and `"web"` (whatsapp-web.js bridge).
|
|
121
|
+
|
|
122
|
+
Platform-specific: `event.whatsapp.wamid`, `event.whatsapp.backend`, `event.whatsapp.phoneNumberId?`.
|
|
123
|
+
|
|
124
|
+
### 5. Teams (`@theokit/gateway-teams`)
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
pnpm add @theokit/gateway-teams @theokit/gateway botbuilder
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
Platform-specific: `event.teams.activityId`, `event.teams.conversationId`, `event.teams.conversationType`, `event.teams.tenantId?`.
|
|
131
|
+
|
|
132
|
+
### 6. Email (`@theokit/gateway-email`)
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
pnpm add @theokit/gateway-email @theokit/gateway
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
Uses IMAP for inbound and SMTP for outbound.
|
|
139
|
+
|
|
140
|
+
Platform-specific: `event.email.messageId`, `event.email.subject`, `event.email.fromAddress`, `event.email.recipients`.
|
|
141
|
+
|
|
142
|
+
### 7. SMS (`@theokit/gateway-sms`)
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
pnpm add @theokit/gateway-sms @theokit/gateway
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
Backends: `"twilio"`, `"plivo"`, `"vonage"`. Inbound via webhook server.
|
|
149
|
+
|
|
150
|
+
Platform-specific: `event.sms.backend`, `event.sms.from` (E.164), `event.sms.to` (E.164).
|
|
151
|
+
|
|
152
|
+
### 8. Mattermost (`@theokit/gateway-mattermost`)
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
pnpm add @theokit/gateway-mattermost @theokit/gateway
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
Platform-specific: `event.mattermost.postId`, `event.mattermost.channelId`, `event.mattermost.rootId?`.
|
|
159
|
+
|
|
160
|
+
### 9. LINE (`@theokit/gateway-line`)
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
pnpm add @theokit/gateway-line @theokit/gateway
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
Inbound via webhook with signature verification. Reply tokens are cached (one-shot, 60s TTL).
|
|
167
|
+
|
|
168
|
+
Platform-specific: `event.line.sourceType`, `event.line.sourceId`, `event.line.messageId`.
|
|
169
|
+
|
|
170
|
+
### 10. Matrix (`@theokit/gateway-matrix`)
|
|
171
|
+
|
|
172
|
+
```bash
|
|
173
|
+
pnpm add @theokit/gateway-matrix @theokit/gateway matrix-js-sdk
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
Platform-specific: `event.matrix.roomId`, `event.matrix.eventId`, `event.matrix.memberCount`.
|
|
177
|
+
|
|
178
|
+
## Common usage pattern
|
|
179
|
+
|
|
180
|
+
```typescript
|
|
181
|
+
import { Agent } from "@theokit/sdk";
|
|
182
|
+
|
|
183
|
+
const adapter = new TelegramAdapter({ botToken: process.env.TELEGRAM_BOT_TOKEN! });
|
|
184
|
+
await adapter.connect();
|
|
185
|
+
|
|
186
|
+
adapter.onInbound(async (event) => {
|
|
187
|
+
const agent = await Agent.getOrCreate(`${event.platform}-${event.sender.id}`, {
|
|
188
|
+
apiKey: process.env.THEOKIT_API_KEY!,
|
|
189
|
+
model: { id: "google/gemini-2.0-flash-001" },
|
|
190
|
+
local: { cwd: process.cwd() },
|
|
191
|
+
memory: { enabled: true, namespace: "bot", scope: "user", userId: event.sender.id },
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
await adapter.startTyping(event.channel.id);
|
|
195
|
+
const run = await agent.send(event.text);
|
|
196
|
+
const result = await run.wait();
|
|
197
|
+
await adapter.stopTyping(event.channel.id);
|
|
198
|
+
|
|
199
|
+
await adapter.sendMessage({
|
|
200
|
+
channel: event.channel,
|
|
201
|
+
text: result.result ?? "No response.",
|
|
202
|
+
replyTo: event.id,
|
|
203
|
+
});
|
|
204
|
+
});
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
## Error mapping convention
|
|
208
|
+
|
|
209
|
+
Each adapter maps platform errors to canonical `SendResult.error.code` values: `rate_limit`, `channel_not_found`, `no_permission`, `auth_error`, `message_too_long`, `platform_error`.
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
---
|
|
2
|
+
user-invocable: false
|
|
3
|
+
description: Memory API, embedding providers, dreaming, active recall, and backends for @theokit/sdk.
|
|
4
|
+
paths:
|
|
5
|
+
- "**/*memory*"
|
|
6
|
+
- "**/*Memory*"
|
|
7
|
+
- "**/*embed*"
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# TheoKit SDK -- Memory
|
|
11
|
+
|
|
12
|
+
Quick reference for durable memory, embedding providers, dreaming, and backends.
|
|
13
|
+
|
|
14
|
+
## Enabling memory on an agent
|
|
15
|
+
|
|
16
|
+
```typescript
|
|
17
|
+
import { Agent } from "@theokit/sdk";
|
|
18
|
+
|
|
19
|
+
const agent = await Agent.create({
|
|
20
|
+
apiKey: process.env.THEOKIT_API_KEY!,
|
|
21
|
+
model: { id: "google/gemini-2.0-flash-001" },
|
|
22
|
+
local: { cwd: process.cwd() },
|
|
23
|
+
memory: {
|
|
24
|
+
enabled: true,
|
|
25
|
+
namespace: "my-app",
|
|
26
|
+
userId: "user-123",
|
|
27
|
+
scope: "user",
|
|
28
|
+
},
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
await (await agent.send("Remember: my preferred test runner is Vitest.")).wait();
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## MemoryOptions
|
|
35
|
+
|
|
36
|
+
```typescript
|
|
37
|
+
interface MemoryOptions {
|
|
38
|
+
enabled: boolean;
|
|
39
|
+
namespace?: string; // separates application domains
|
|
40
|
+
userId?: string; // isolates user memories
|
|
41
|
+
scope?: "agent" | "user" | "team";
|
|
42
|
+
storePath?: string; // relative to workspace; cannot escape
|
|
43
|
+
}
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Scopes
|
|
47
|
+
|
|
48
|
+
| Scope | Use for |
|
|
49
|
+
|---|---|
|
|
50
|
+
| `"agent"` | Durable state for one agent ID. Default scope. |
|
|
51
|
+
| `"user"` | Stable user preferences across agent instances. Requires `userId`. |
|
|
52
|
+
| `"team"` | Shared team facts safe for every authorized caller. |
|
|
53
|
+
|
|
54
|
+
## Safety rules
|
|
55
|
+
|
|
56
|
+
- Memory MUST NOT store API keys, bearer tokens, passwords, or credential material.
|
|
57
|
+
- Local `storePath` is resolved relative to the workspace. Path traversal raises `ConfigurationError`.
|
|
58
|
+
- Memory is durable by `{ namespace, userId, scope }`, not by JavaScript process.
|
|
59
|
+
|
|
60
|
+
## SDKMemoryManager
|
|
61
|
+
|
|
62
|
+
```typescript
|
|
63
|
+
interface SDKMemoryManager {
|
|
64
|
+
// Reserved for explicit inspection and deletion APIs.
|
|
65
|
+
}
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
The agent uses memory during runs automatically. Public management APIs are narrow until deletion and audit semantics are finalized.
|
|
69
|
+
|
|
70
|
+
## Memory backends (v1.2+)
|
|
71
|
+
|
|
72
|
+
```typescript
|
|
73
|
+
import { Memory } from "@theokit/sdk";
|
|
74
|
+
|
|
75
|
+
const memory = await Memory.create({
|
|
76
|
+
cwd: process.cwd(),
|
|
77
|
+
index: {
|
|
78
|
+
backend: "sqlite-vec", // default
|
|
79
|
+
embedding: { provider: "openai", model: "text-embedding-3-small" },
|
|
80
|
+
},
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
// Or use LanceDB for >100k facts:
|
|
84
|
+
const memory = await Memory.create({
|
|
85
|
+
cwd: process.cwd(),
|
|
86
|
+
index: {
|
|
87
|
+
backend: "lance",
|
|
88
|
+
embedding: { provider: "openai", model: "text-embedding-3-small" },
|
|
89
|
+
},
|
|
90
|
+
});
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
- `@lancedb/lancedb` is an optional peer dep. If missing with `backend: "lance"`, throws `ConfigurationError(code: "lance_backend_unavailable")`.
|
|
94
|
+
- Embedding dimension validated when opening existing index. Mismatch throws `ConfigurationError(code: "embedding_dimension_mismatch")`.
|
|
95
|
+
|
|
96
|
+
## Embedding providers (ADR D11)
|
|
97
|
+
|
|
98
|
+
Locked provider union: `openai`, `mistral`, `openrouter`, `voyage`, `deepinfra`.
|
|
99
|
+
|
|
100
|
+
```typescript
|
|
101
|
+
index: {
|
|
102
|
+
embedding: {
|
|
103
|
+
provider: "openai",
|
|
104
|
+
model: "text-embedding-3-small",
|
|
105
|
+
},
|
|
106
|
+
}
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Each provider adapter implements:
|
|
110
|
+
|
|
111
|
+
```typescript
|
|
112
|
+
interface EmbeddingAdapter {
|
|
113
|
+
id: string;
|
|
114
|
+
model: string;
|
|
115
|
+
dimension: number;
|
|
116
|
+
embed(texts: string[]): Promise<number[][]>;
|
|
117
|
+
}
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## Memory migration CLI (v1.2+)
|
|
121
|
+
|
|
122
|
+
Migrate SQLite memory index to LanceDB without data loss:
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
# Dry-run (preview, no writes)
|
|
126
|
+
pnpm exec theokit-migrate-memory --cwd . --dry-run
|
|
127
|
+
|
|
128
|
+
# Real migration
|
|
129
|
+
pnpm exec theokit-migrate-memory --cwd .
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
Options: `--cwd <path>`, `--dry-run`, `--keep-sqlite`, `--batch-size <n>`.
|
|
133
|
+
|
|
134
|
+
Algorithm: read SQLite, write to staging `lance-new/`, validate count + sample text match, atomic rename, prompt to delete SQLite.
|
|
135
|
+
|
|
136
|
+
## Dreaming (consolidation sweeps)
|
|
137
|
+
|
|
138
|
+
```typescript
|
|
139
|
+
await Memory.runDreamingSweep({
|
|
140
|
+
cwd: process.cwd(),
|
|
141
|
+
embedding: { provider: "openai", model: "text-embedding-3-small" },
|
|
142
|
+
});
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
Dreaming consolidates redundant facts into compressed summaries. In v1.x, consolidation is deterministic only (no LLM-mediated narrative). LLM narrative mode is deferred.
|
|
146
|
+
|
|
147
|
+
## Active recall
|
|
148
|
+
|
|
149
|
+
Active recall queries memory during `agent.send` to inject relevant facts into the LLM context. Configured via the memory options on the agent. Query modes:
|
|
150
|
+
|
|
151
|
+
- `"embedding"` -- cosine similarity search against the vector index.
|
|
152
|
+
- `"keyword"` -- keyword-based search.
|
|
153
|
+
- `"hybrid"` -- combines embedding and keyword results.
|
|
154
|
+
|
|
155
|
+
## Semantic cache (related)
|
|
156
|
+
|
|
157
|
+
`Cache.semantic` from `@theokit/sdk` reuses embedding adapters for LLM response caching with cosine-similarity matching. See the cache documentation for details.
|
|
158
|
+
|
|
159
|
+
## Resume behavior
|
|
160
|
+
|
|
161
|
+
Memory is durable by `{ namespace, userId, scope }`, not by process. Recreating or resuming an agent with the same memory config can recall durable facts. Inline secrets and MCP servers are NOT persisted through memory.
|
|
162
|
+
|
|
163
|
+
## Conversation storage (pluggable persistence)
|
|
164
|
+
|
|
165
|
+
```typescript
|
|
166
|
+
import { Agent, InMemoryConversationStorage } from "@theokit/sdk";
|
|
167
|
+
|
|
168
|
+
const agent = await Agent.create({
|
|
169
|
+
apiKey, model,
|
|
170
|
+
conversationStorage: new InMemoryConversationStorage(),
|
|
171
|
+
});
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
Built-in adapters: `FileSystemConversationStorage`, `InMemoryConversationStorage`. Custom adapters implement `ConversationStorageAdapter`.
|
|
175
|
+
|
|
176
|
+
When using custom storage, `Agent.resume` requires the adapter to be passed again -- silent FS fallback is rejected with `ConfigurationError(code: "conversation_storage_required")`.
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
---
|
|
2
|
+
user-invocable: false
|
|
3
|
+
description: RAG primitives -- VectorRetriever, CohereReranker, text splitters from @theokit/sdk/rag.
|
|
4
|
+
paths:
|
|
5
|
+
- "**/*retriev*"
|
|
6
|
+
- "**/*rerank*"
|
|
7
|
+
- "**/*splitter*"
|
|
8
|
+
- "**/*rag*"
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# TheoKit SDK -- RAG
|
|
12
|
+
|
|
13
|
+
Quick reference for the RAG (Retrieval-Augmented Generation) sub-path at `@theokit/sdk/rag`.
|
|
14
|
+
|
|
15
|
+
## Installation
|
|
16
|
+
|
|
17
|
+
The RAG module ships with `@theokit/sdk` -- no additional install needed.
|
|
18
|
+
|
|
19
|
+
```typescript
|
|
20
|
+
import {
|
|
21
|
+
VectorRetriever,
|
|
22
|
+
CohereReranker,
|
|
23
|
+
NoopReranker,
|
|
24
|
+
splitByCharacter,
|
|
25
|
+
splitBySentence,
|
|
26
|
+
splitRecursive,
|
|
27
|
+
} from "@theokit/sdk/rag";
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Types
|
|
31
|
+
|
|
32
|
+
```typescript
|
|
33
|
+
interface Document {
|
|
34
|
+
id: string;
|
|
35
|
+
text: string;
|
|
36
|
+
metadata?: Record<string, unknown>;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
interface Chunk {
|
|
40
|
+
text: string;
|
|
41
|
+
index: number;
|
|
42
|
+
metadata?: Record<string, unknown>;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
interface RetrievalResult {
|
|
46
|
+
text: string;
|
|
47
|
+
score: number;
|
|
48
|
+
metadata?: Record<string, unknown>;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
interface RankedChunk {
|
|
52
|
+
text: string;
|
|
53
|
+
score: number;
|
|
54
|
+
originalIndex: number;
|
|
55
|
+
metadata?: Record<string, unknown>;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
interface SplitOptions {
|
|
59
|
+
chunkSize: number;
|
|
60
|
+
overlap?: number;
|
|
61
|
+
}
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Interfaces
|
|
65
|
+
|
|
66
|
+
### Retriever
|
|
67
|
+
|
|
68
|
+
```typescript
|
|
69
|
+
interface Retriever {
|
|
70
|
+
retrieve(query: string, options?: { topK?: number }): Promise<RetrievalResult[]>;
|
|
71
|
+
}
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Reranker
|
|
75
|
+
|
|
76
|
+
```typescript
|
|
77
|
+
interface Reranker {
|
|
78
|
+
rerank(query: string, chunks: RetrievalResult[]): Promise<RankedChunk[]>;
|
|
79
|
+
}
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## VectorRetriever
|
|
83
|
+
|
|
84
|
+
Wraps any index that implements `search(query, topK)`.
|
|
85
|
+
|
|
86
|
+
```typescript
|
|
87
|
+
interface VectorIndex {
|
|
88
|
+
search(query: string, topK: number): Promise<RetrievalResult[]>;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
interface VectorRetrieverOptions {
|
|
92
|
+
index: VectorIndex;
|
|
93
|
+
topK?: number; // default 5
|
|
94
|
+
}
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Usage:
|
|
98
|
+
|
|
99
|
+
```typescript
|
|
100
|
+
import { VectorRetriever } from "@theokit/sdk/rag";
|
|
101
|
+
|
|
102
|
+
const retriever = new VectorRetriever({
|
|
103
|
+
index: myVectorIndex,
|
|
104
|
+
topK: 10,
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
const results = await retriever.retrieve("How does auth work?");
|
|
108
|
+
// results: RetrievalResult[] sorted by relevance
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
The `VectorIndex` interface is the DI boundary. Consumers depend on the interface; implementations (e.g., backed by Memory's SQLite-vec or LanceDB index) depend on the index adapter.
|
|
112
|
+
|
|
113
|
+
## CohereReranker
|
|
114
|
+
|
|
115
|
+
Calls the Cohere Rerank v2 API to re-score retrieval results by relevance.
|
|
116
|
+
|
|
117
|
+
```typescript
|
|
118
|
+
interface CohereRerankerOptions {
|
|
119
|
+
apiKey: string;
|
|
120
|
+
model?: string; // default "rerank-v3.5"
|
|
121
|
+
}
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Usage:
|
|
125
|
+
|
|
126
|
+
```typescript
|
|
127
|
+
import { CohereReranker } from "@theokit/sdk/rag";
|
|
128
|
+
|
|
129
|
+
const reranker = new CohereReranker({
|
|
130
|
+
apiKey: process.env.COHERE_API_KEY!,
|
|
131
|
+
model: "rerank-v3.5",
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
const ranked = await reranker.rerank("auth middleware", retrievalResults);
|
|
135
|
+
// ranked: RankedChunk[] re-scored by Cohere
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## NoopReranker
|
|
139
|
+
|
|
140
|
+
Passes through results unchanged. Useful as a baseline or when reranking is not needed.
|
|
141
|
+
|
|
142
|
+
```typescript
|
|
143
|
+
import { NoopReranker } from "@theokit/sdk/rag";
|
|
144
|
+
|
|
145
|
+
const reranker = new NoopReranker();
|
|
146
|
+
const ranked = await reranker.rerank(query, results);
|
|
147
|
+
// ranked === results (with originalIndex added)
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
## Text splitters
|
|
151
|
+
|
|
152
|
+
Three strategies for splitting documents into chunks. All return `Chunk[]` with `text` and `index`. Empty input returns an empty array.
|
|
153
|
+
|
|
154
|
+
### splitByCharacter
|
|
155
|
+
|
|
156
|
+
Fixed-size character windows with optional overlap.
|
|
157
|
+
|
|
158
|
+
```typescript
|
|
159
|
+
import { splitByCharacter } from "@theokit/sdk/rag";
|
|
160
|
+
|
|
161
|
+
const chunks = splitByCharacter(longText, { chunkSize: 500, overlap: 50 });
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
### splitBySentence
|
|
165
|
+
|
|
166
|
+
Groups sentences into chunks up to `chunkSize` characters.
|
|
167
|
+
|
|
168
|
+
```typescript
|
|
169
|
+
import { splitBySentence } from "@theokit/sdk/rag";
|
|
170
|
+
|
|
171
|
+
const chunks = splitBySentence(longText, { chunkSize: 500 });
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
Splits on sentence boundaries (`.`, `!`, `?` followed by whitespace). Sentences are never broken mid-sentence.
|
|
175
|
+
|
|
176
|
+
### splitRecursive
|
|
177
|
+
|
|
178
|
+
Three-level cascading split: paragraph, then sentence, then character.
|
|
179
|
+
|
|
180
|
+
```typescript
|
|
181
|
+
import { splitRecursive } from "@theokit/sdk/rag";
|
|
182
|
+
|
|
183
|
+
const chunks = splitRecursive(longText, { chunkSize: 500, overlap: 50 });
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
Algorithm:
|
|
187
|
+
1. Split by double newlines (paragraphs).
|
|
188
|
+
2. Paragraphs that exceed `chunkSize` are split by sentence.
|
|
189
|
+
3. Sentences that still exceed `chunkSize` are split by character.
|
|
190
|
+
|
|
191
|
+
This is the recommended default for most RAG use cases.
|
|
192
|
+
|
|
193
|
+
## Full RAG pipeline example
|
|
194
|
+
|
|
195
|
+
```typescript
|
|
196
|
+
import { VectorRetriever, CohereReranker, splitRecursive } from "@theokit/sdk/rag";
|
|
197
|
+
|
|
198
|
+
// 1. Split documents
|
|
199
|
+
const chunks = splitRecursive(documentText, { chunkSize: 500, overlap: 50 });
|
|
200
|
+
|
|
201
|
+
// 2. Index chunks (your vector store)
|
|
202
|
+
await vectorStore.upsert(chunks.map((c, i) => ({
|
|
203
|
+
id: `doc-${i}`,
|
|
204
|
+
text: c.text,
|
|
205
|
+
embedding: await embed(c.text),
|
|
206
|
+
})));
|
|
207
|
+
|
|
208
|
+
// 3. Retrieve
|
|
209
|
+
const retriever = new VectorRetriever({ index: vectorStore, topK: 20 });
|
|
210
|
+
const results = await retriever.retrieve(userQuery);
|
|
211
|
+
|
|
212
|
+
// 4. Rerank
|
|
213
|
+
const reranker = new CohereReranker({ apiKey: process.env.COHERE_API_KEY! });
|
|
214
|
+
const ranked = await reranker.rerank(userQuery, results);
|
|
215
|
+
|
|
216
|
+
// 5. Use top results as agent context
|
|
217
|
+
const context = ranked.slice(0, 5).map((r) => r.text).join("\n\n");
|
|
218
|
+
const agent = await Agent.create({
|
|
219
|
+
systemPrompt: `Use this context:\n${context}`,
|
|
220
|
+
// ...
|
|
221
|
+
});
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
## DI integration
|
|
225
|
+
|
|
226
|
+
Use `@Retriever` and `@Reranker` decorators from `@theokit/di-agent` to register RAG components in the DI container. See the theokit-di-agent skill for details.
|