@theokit/sdk 1.7.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,294 @@
1
+ ---
2
+ user-invocable: false
3
+ description: All 15 agentic decorators from @theokit/di-agent for tools, workflows, evals, cron, and more.
4
+ paths:
5
+ - "**/*decorator*"
6
+ - "**/*Decorator*"
7
+ - "**/di-agent*"
8
+ ---
9
+
10
+ # TheoKit DI-Agent -- Agentic Decorators
11
+
12
+ Quick reference for `@theokit/di-agent` -- 15 decorators that wire agentic capabilities into DI-managed classes.
13
+
14
+ ## Installation
15
+
16
+ ```bash
17
+ pnpm add @theokit/di-agent @theokit/di @theokit/sdk
18
+ ```
19
+
20
+ Requires `reflect-metadata` and TypeScript decorator support (see `@theokit/di` docs).
21
+
22
+ ## createAgentProvider
23
+
24
+ Bridges `@theokit/di` container with `@theokit/sdk` Agent. Reads decorator metadata from all registered classes and wires tools, workflows, evals, cron jobs, etc.
25
+
26
+ ```typescript
27
+ import { Container } from "@theokit/di";
28
+ import { createAgentProvider } from "@theokit/di-agent";
29
+
30
+ const container = new Container();
31
+ container.register(MyToolService);
32
+ container.register(MyWorkflowService);
33
+
34
+ const { agent, dispose } = await createAgentProvider(container, {
35
+ apiKey: process.env.THEOKIT_API_KEY!,
36
+ model: { id: "google/gemini-2.0-flash-001" },
37
+ local: { cwd: process.cwd() },
38
+ });
39
+ ```
40
+
41
+ ## @Tool
42
+
43
+ Registers a method as a custom tool exposed to the LLM.
44
+
45
+ ```typescript
46
+ import { Injectable } from "@theokit/di";
47
+ import { Tool } from "@theokit/di-agent";
48
+ import { z } from "zod";
49
+
50
+ @Injectable()
51
+ class MathService {
52
+ @Tool({
53
+ name: "calculate",
54
+ description: "Evaluate a math expression.",
55
+ inputSchema: z.object({ expression: z.string() }),
56
+ })
57
+ calculate(input: { expression: string }): string {
58
+ return String(eval(input.expression));
59
+ }
60
+ }
61
+ ```
62
+
63
+ ## @Workflow
64
+
65
+ Marks a method as a workflow step definition.
66
+
67
+ ```typescript
68
+ import { Workflow } from "@theokit/di-agent";
69
+
70
+ @Injectable()
71
+ class PipelineService {
72
+ @Workflow({ name: "data-pipeline", description: "ETL workflow." })
73
+ async run(input: { source: string }) {
74
+ // workflow implementation
75
+ }
76
+ }
77
+ ```
78
+
79
+ ## @EvalDecorator
80
+
81
+ Registers an eval suite on a method.
82
+
83
+ ```typescript
84
+ import { EvalDecorator } from "@theokit/di-agent";
85
+
86
+ @Injectable()
87
+ class QAService {
88
+ @EvalDecorator({
89
+ name: "qa-smoke",
90
+ dataset: [{ input: "Say ok.", expected: "ok" }],
91
+ })
92
+ async evaluate() { /* ... */ }
93
+ }
94
+ ```
95
+
96
+ ## @Cron
97
+
98
+ Registers a cron-scheduled agent task.
99
+
100
+ ```typescript
101
+ import { Cron } from "@theokit/di-agent";
102
+
103
+ @Injectable()
104
+ class ReportService {
105
+ @Cron({
106
+ expression: "0 9 * * *",
107
+ timezone: "America/Sao_Paulo",
108
+ message: "Summarize yesterday's commits.",
109
+ })
110
+ async dailyReport() { /* ... */ }
111
+ }
112
+ ```
113
+
114
+ ## @Subscription
115
+
116
+ Marks a method as a real-time subscription handler.
117
+
118
+ ```typescript
119
+ import { Subscription } from "@theokit/di-agent";
120
+
121
+ @Injectable()
122
+ class EventService {
123
+ @Subscription({ topic: "orders.created", description: "Handle new orders." })
124
+ async onOrder(event: unknown) { /* ... */ }
125
+ }
126
+ ```
127
+
128
+ ## @Auth
129
+
130
+ Registers authentication/authorization logic for agent operations.
131
+
132
+ ```typescript
133
+ import { Auth } from "@theokit/di-agent";
134
+
135
+ @Injectable()
136
+ class SecurityService {
137
+ @Auth({ strategy: "bearer", description: "JWT validation." })
138
+ async validate(token: string): Promise<boolean> { /* ... */ }
139
+ }
140
+ ```
141
+
142
+ ## @Retriever
143
+
144
+ Registers a retrieval method for RAG pipelines.
145
+
146
+ ```typescript
147
+ import { Retriever } from "@theokit/di-agent";
148
+
149
+ @Injectable()
150
+ class SearchService {
151
+ @Retriever({ name: "docs-search", description: "Search documentation." })
152
+ async search(query: string) { /* ... */ }
153
+ }
154
+ ```
155
+
156
+ ## @Reranker
157
+
158
+ Registers a reranking method for RAG pipelines.
159
+
160
+ ```typescript
161
+ import { Reranker } from "@theokit/di-agent";
162
+
163
+ @Injectable()
164
+ class RankService {
165
+ @Reranker({ name: "cohere-reranker", model: "rerank-v3.5" })
166
+ async rerank(query: string, docs: unknown[]) { /* ... */ }
167
+ }
168
+ ```
169
+
170
+ ## @TextSplitter
171
+
172
+ Registers a text splitting strategy.
173
+
174
+ ```typescript
175
+ import { TextSplitter } from "@theokit/di-agent";
176
+
177
+ @Injectable()
178
+ class SplitterService {
179
+ @TextSplitter({ strategy: "recursive", chunkSize: 1000, overlap: 100 })
180
+ split(text: string) { /* ... */ }
181
+ }
182
+ ```
183
+
184
+ ## @UseSandbox
185
+
186
+ Marks a class or method for sandboxed execution.
187
+
188
+ ```typescript
189
+ import { UseSandbox } from "@theokit/di-agent";
190
+
191
+ @Injectable()
192
+ class CodeRunner {
193
+ @UseSandbox({ enabled: true })
194
+ async execute(code: string) { /* ... */ }
195
+ }
196
+ ```
197
+
198
+ ## @SubAgent
199
+
200
+ Declares a subagent definition on a method.
201
+
202
+ ```typescript
203
+ import { SubAgent } from "@theokit/di-agent";
204
+
205
+ @Injectable()
206
+ class AgentOrchestrator {
207
+ @SubAgent({
208
+ name: "code-reviewer",
209
+ description: "Expert code reviewer.",
210
+ prompt: "Review for bugs and security issues.",
211
+ })
212
+ async review() { /* ... */ }
213
+ }
214
+ ```
215
+
216
+ ## @Hitl (Human-in-the-Loop)
217
+
218
+ Marks a method as requiring human approval before proceeding.
219
+
220
+ ```typescript
221
+ import { Hitl } from "@theokit/di-agent";
222
+
223
+ @Injectable()
224
+ class ApprovalService {
225
+ @Hitl({ description: "Requires manager approval.", timeout: 3600_000 })
226
+ async approve(request: unknown) { /* ... */ }
227
+ }
228
+ ```
229
+
230
+ ## @AutoSummarize
231
+
232
+ Enables automatic conversation summarization.
233
+
234
+ ```typescript
235
+ import { AutoSummarize } from "@theokit/di-agent";
236
+
237
+ @Injectable()
238
+ class ChatService {
239
+ @AutoSummarize({ maxTurns: 20, strategy: "rolling" })
240
+ async chat() { /* ... */ }
241
+ }
242
+ ```
243
+
244
+ ## @InjectAgent
245
+
246
+ Injects the current `SDKAgent` instance into a class.
247
+
248
+ ```typescript
249
+ import { Injectable } from "@theokit/di";
250
+ import { InjectAgent } from "@theokit/di-agent";
251
+ import type { SDKAgent } from "@theokit/sdk";
252
+
253
+ @Injectable()
254
+ class AgentAwareService {
255
+ constructor(@InjectAgent() private readonly agent: SDKAgent) {}
256
+
257
+ async doWork() {
258
+ const run = await this.agent.send("Do something");
259
+ await run.wait();
260
+ }
261
+ }
262
+ ```
263
+
264
+ ## @MemoryScopeDecorator
265
+
266
+ Configures memory scope for a class.
267
+
268
+ ```typescript
269
+ import { MemoryScopeDecorator } from "@theokit/di-agent";
270
+
271
+ @Injectable()
272
+ @MemoryScopeDecorator({ namespace: "billing", scope: "user" })
273
+ class BillingService { /* ... */ }
274
+ ```
275
+
276
+ ## Reading metadata (for framework authors)
277
+
278
+ Each decorator has a companion reader function:
279
+
280
+ ```typescript
281
+ import { readToolMetadata } from "@theokit/di-agent";
282
+ import { readWorkflowMetadata } from "@theokit/di-agent";
283
+ import { readCronMetadata } from "@theokit/di-agent";
284
+ // ... readEvalDecoratorMetadata, readRetrieverMetadata, etc.
285
+
286
+ const tools = readToolMetadata(MyToolService);
287
+ ```
288
+
289
+ ## AGENT_TOKEN
290
+
291
+ ```typescript
292
+ import { AGENT_TOKEN } from "@theokit/di-agent";
293
+ // Symbol token for agent injection in the DI container
294
+ ```
@@ -0,0 +1,172 @@
1
+ ---
2
+ user-invocable: false
3
+ paths:
4
+ - "**/*error*"
5
+ - "**/*Error*"
6
+ - "**/*exception*"
7
+ description: TheoKit SDK error hierarchy — TheokitAgentError, error codes, retry patterns
8
+ ---
9
+
10
+ # TheoKit Error Handling
11
+
12
+ All SDK errors extend `TheokitAgentError`. Use `isRetryable` to drive
13
+ retry/backoff logic without coupling to specific subclasses.
14
+
15
+ ## Error hierarchy
16
+
17
+ ```
18
+ Error
19
+ +-- TheokitAgentError
20
+ | +-- AuthenticationError
21
+ | +-- RateLimitError
22
+ | +-- ConfigurationError
23
+ | | +-- IntegrationNotConnectedError
24
+ | +-- NetworkError
25
+ | +-- UnknownAgentError
26
+ |
27
+ +-- UnsupportedRunOperationError (separate hierarchy)
28
+ +-- AgentRunError (thrown by Agent.prompt with throwOnError)
29
+ ```
30
+
31
+ ## Error reference
32
+
33
+ | Error | When | `isRetryable` |
34
+ |---|---|---|
35
+ | `AuthenticationError` | Invalid API key, not logged in, insufficient permissions | `false` |
36
+ | `RateLimitError` | Too many requests or usage limits exceeded | `true` |
37
+ | `ConfigurationError` | Invalid model, bad request parameters, malformed options | `false` |
38
+ | `IntegrationNotConnectedError` | Cloud agent for a repo whose SCM is not connected | `false` |
39
+ | `NetworkError` | Service unavailable, timeout, transport failure | `true` |
40
+ | `UnknownAgentError` | Catch-all for unclassified errors | `false` |
41
+ | `UnsupportedRunOperationError` | Runtime does not support a `Run` operation | n/a |
42
+ | `AgentRunError` | Run finished with error status (only with `throwOnError: true`) | n/a |
43
+
44
+ ## `TheokitAgentError` properties
45
+
46
+ ```typescript
47
+ class TheokitAgentError extends Error {
48
+ readonly isRetryable: boolean;
49
+ readonly code?: string;
50
+ readonly protoErrorCode?: string;
51
+ readonly cause?: unknown;
52
+ readonly metadata?: ErrorMetadata; // v1.3+ provider HTTP errors
53
+ }
54
+ ```
55
+
56
+ ## `ErrorMetadata` (v1.3+)
57
+
58
+ When an error originates from a provider HTTP call:
59
+
60
+ ```typescript
61
+ interface ErrorMetadata {
62
+ provider: string; // "anthropic" | "openai" | "openrouter" | ...
63
+ endpoint: string; // "/v1/messages" | "/v1/chat/completions"
64
+ code: ErrorCode;
65
+ statusCode?: number;
66
+ retryAfter?: number; // seconds
67
+ raw?: unknown; // raw response body (truncated ~2KB)
68
+ }
69
+
70
+ type ErrorCode =
71
+ | "rate_limit" | "auth_failed" | "invalid_request"
72
+ | "timeout" | "server_error" | "context_too_long"
73
+ | "content_filtered" | "model_unavailable"
74
+ | "network" | "unknown";
75
+ ```
76
+
77
+ ## Retry pattern
78
+
79
+ ```typescript
80
+ import { TheokitAgentError, type Run } from "@theokit/sdk";
81
+
82
+ async function withRetry(send: () => Promise<Run>, attempts = 3): Promise<Run> {
83
+ let lastError: unknown;
84
+ for (let i = 0; i < attempts; i++) {
85
+ try {
86
+ return await send();
87
+ } catch (err) {
88
+ lastError = err;
89
+ if (err instanceof TheokitAgentError && err.isRetryable) {
90
+ await new Promise((r) => setTimeout(r, 2 ** i * 1000));
91
+ continue;
92
+ }
93
+ throw err;
94
+ }
95
+ }
96
+ throw lastError;
97
+ }
98
+ ```
99
+
100
+ ## Using metadata for programmatic handling
101
+
102
+ ```typescript
103
+ try {
104
+ await agent.send("...");
105
+ } catch (err) {
106
+ if (err instanceof TheokitAgentError && err.metadata) {
107
+ switch (err.metadata.code) {
108
+ case "rate_limit":
109
+ await wait(err.metadata.retryAfter ?? 60);
110
+ return retry();
111
+ case "auth_failed":
112
+ throw new Error(`Check API key for ${err.metadata.provider}`);
113
+ case "context_too_long":
114
+ // trigger prompt compression
115
+ break;
116
+ }
117
+ }
118
+ throw err;
119
+ }
120
+ ```
121
+
122
+ ## `IntegrationNotConnectedError`
123
+
124
+ ```typescript
125
+ import { IntegrationNotConnectedError } from "@theokit/sdk/errors";
126
+
127
+ try {
128
+ await Agent.create({ /* cloud with disconnected repo */ });
129
+ } catch (err) {
130
+ if (err instanceof IntegrationNotConnectedError) {
131
+ console.error(`Connect ${err.provider} at ${err.helpUrl}`);
132
+ }
133
+ }
134
+ ```
135
+
136
+ ## `UnsupportedRunOperationError`
137
+
138
+ Check before calling runtime-dependent operations:
139
+
140
+ ```typescript
141
+ if (run.supports("conversation")) {
142
+ const turns = await run.conversation();
143
+ } else {
144
+ console.log(run.unsupportedReason("conversation"));
145
+ }
146
+ ```
147
+
148
+ ## Tree-shaking
149
+
150
+ Import error classes from the `/errors` subpath to avoid pulling the full SDK:
151
+
152
+ ```typescript
153
+ import { TheokitAgentError, RateLimitError } from "@theokit/sdk/errors";
154
+ ```
155
+
156
+ ## `throwOnError` on `Agent.prompt`
157
+
158
+ ```typescript
159
+ import { Agent, AgentRunError } from "@theokit/sdk";
160
+
161
+ try {
162
+ const result = await Agent.prompt("hi", {
163
+ apiKey: process.env.ANTHROPIC_API_KEY!,
164
+ model: { id: "claude-sonnet-4-5-20250929" },
165
+ throwOnError: true,
166
+ });
167
+ } catch (err) {
168
+ if (err instanceof AgentRunError && err.code === "auth_failed") {
169
+ // bad API key
170
+ }
171
+ }
172
+ ```
@@ -0,0 +1,144 @@
1
+ ---
2
+ user-invocable: false
3
+ paths:
4
+ - "**/*eval*"
5
+ - "**/*Eval*"
6
+ - "**/*scorer*"
7
+ description: TheoKit SDK Eval suite API reference — Eval.create, scorers, datasets, EvalRun
8
+ ---
9
+
10
+ # TheoKit Eval Suite
11
+
12
+ Eval-as-code primitive for production deploy gates. Run evals against real LLM
13
+ providers to measure quality, latency, and cost before shipping.
14
+
15
+ ## Quick start
16
+
17
+ ```typescript
18
+ import { Eval, Scorers } from "@theokit/sdk";
19
+
20
+ const run = await Eval.create({
21
+ name: "qa-smoke",
22
+ dataset: [
23
+ { input: "Reply with the word: ok.", expected: "ok" },
24
+ { input: "Say jazz in one word.", expected: "jazz" },
25
+ ],
26
+ scorers: [
27
+ Scorers.containsExpected({ caseSensitive: false }),
28
+ Scorers.regex(/[a-zA-Z]/),
29
+ ],
30
+ agent: {
31
+ apiKey: process.env.OPENROUTER_API_KEY,
32
+ model: { id: "openai/gpt-4o-mini" },
33
+ local: { cwd: process.cwd(), sandboxOptions: { enabled: false } },
34
+ },
35
+ concurrency: 4,
36
+ }).run();
37
+
38
+ console.log(run.aggregate.meanScore); // 0.95
39
+ console.log(run.aggregate.passRatio); // 1.0
40
+ console.log(run.aggregate.tokensInTotal); // 142
41
+ console.log(run.aggregate.durationMsP95); // 1830
42
+ ```
43
+
44
+ ## Built-in scorers (`Scorers`)
45
+
46
+ | Scorer | What it checks |
47
+ |---|---|
48
+ | `Scorers.exactMatch({ caseSensitive? })` | `output.trim() === expected.trim()` — refuses empty `expected` |
49
+ | `Scorers.containsExpected({ caseSensitive? })` | `output.includes(expected)` — refuses empty `expected` |
50
+ | `Scorers.regex(pattern)` | `pattern.test(output)` — test patterns against adversarial output to avoid ReDoS |
51
+ | `Scorers.jsonShape(zodSchema, { strict? })` | `JSON.parse(output)` + Zod validation — caps output at 1 MB before parse |
52
+ | `Scorers.llmJudge({ model, apiKey, criteria, rubric? })` | Second LLM scores against criteria — requires SEPARATE `apiKey` |
53
+
54
+ ### Custom scorer
55
+
56
+ A scorer is an async function returning a number between 0 and 1:
57
+
58
+ ```typescript
59
+ const myScorer = async (row: { input: string; output: string; expected?: string }) => {
60
+ return row.output.length < 100 ? 1.0 : 0.5;
61
+ };
62
+ ```
63
+
64
+ ## Dataset
65
+
66
+ The `dataset` field accepts an array of objects with `input` and optional `expected`:
67
+
68
+ ```typescript
69
+ interface EvalDatasetRow {
70
+ input: string;
71
+ expected?: string;
72
+ }
73
+ ```
74
+
75
+ Recommended ceiling: ~10k rows (v1 materializes in memory). For larger evals,
76
+ partition into multiple `Eval.create` calls.
77
+
78
+ ## `EvalRun` shape
79
+
80
+ ```typescript
81
+ interface EvalRun {
82
+ id: string;
83
+ name: string;
84
+ startedAt: number;
85
+ endedAt: number;
86
+ durationMs: number;
87
+ aggregate: EvalAggregate;
88
+ rows: ReadonlyArray<EvalRowResult>;
89
+ metadata?: Record<string, unknown>;
90
+ }
91
+
92
+ interface EvalAggregate {
93
+ meanScore: number;
94
+ medianScore: number;
95
+ passRatio: number; // rows where meanScore >= 0.5
96
+ perScorer: Record<string, { mean; median; min; max }>;
97
+ totalRows: number;
98
+ errorRows: number;
99
+ durationMsP50: number;
100
+ durationMsP95: number;
101
+ tokensInTotal: number;
102
+ tokensOutTotal: number;
103
+ }
104
+ ```
105
+
106
+ `EvalRun` is plain JSON — `JSON.stringify(run)` works directly.
107
+
108
+ ## Concurrency
109
+
110
+ `concurrency` defaults to 4. Allowed range: `[1, 64]` (integer). 0 and
111
+ Infinity are rejected at `Eval.create` time.
112
+
113
+ ## Concurrent runs
114
+
115
+ Per-process single-flight per `name`. Two `Eval.run` calls with the same
116
+ `name` running simultaneously throw `EvalAlreadyRunningError`. Include model
117
+ id in the name for matrix runs.
118
+
119
+ ## CLI integration
120
+
121
+ The `theokit eval` CLI invokes `Eval.run` internally. User-authored
122
+ `eval.config.{ts,mjs}` files are forward-compatible.
123
+
124
+ ## Telemetry
125
+
126
+ When `agent.telemetry.enabled === true`, `Eval.run` emits a parent `eval.run`
127
+ OTel span; `agent.send` / `llm.call` spans nest under it.
128
+
129
+ ## Cost forecasting
130
+
131
+ ```
132
+ aggregate.tokensInTotal x provider_input_price
133
+ + aggregate.tokensOutTotal x provider_output_price
134
+ ```
135
+
136
+ With `llmJudge`, add ~1 judge call per row. 1000 rows with `gpt-4o-mini`
137
+ costs roughly $3.00 total (base + judge).
138
+
139
+ ## Errors
140
+
141
+ | Error | When |
142
+ |---|---|
143
+ | `EvalAlreadyRunningError` | Same `name` already running in this process |
144
+ | `ConfigurationError` | Invalid concurrency, missing required fields |