@theokit/sdk 1.8.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/CHANGELOG.md +63 -22
  2. package/bin/init-claude.mjs +49 -15
  3. package/dist/a2a/index.cjs +12592 -12884
  4. package/dist/a2a/index.cjs.map +1 -1
  5. package/dist/a2a/index.js +12592 -12884
  6. package/dist/a2a/index.js.map +1 -1
  7. package/dist/{cron-BnywDYLq.d.cts → cron-Bse1MbaE.d.cts} +52 -2
  8. package/dist/{cron-CtZvJD9J.d.ts → cron-Ci_NUkUj.d.ts} +52 -2
  9. package/dist/cron.cjs +8763 -9062
  10. package/dist/cron.cjs.map +1 -1
  11. package/dist/cron.d.cts +1 -1
  12. package/dist/cron.d.ts +1 -1
  13. package/dist/cron.js +8763 -9062
  14. package/dist/cron.js.map +1 -1
  15. package/dist/eval.cjs +8776 -9075
  16. package/dist/eval.cjs.map +1 -1
  17. package/dist/eval.js +8776 -9075
  18. package/dist/eval.js.map +1 -1
  19. package/dist/index.cjs +11217 -10037
  20. package/dist/index.cjs.map +1 -1
  21. package/dist/index.d.cts +174 -3
  22. package/dist/index.d.ts +174 -3
  23. package/dist/index.js +11217 -10038
  24. package/dist/index.js.map +1 -1
  25. package/dist/internal/runtime/{cloud-config-serializer.d.ts → cloud/cloud-config-serializer.d.ts} +1 -1
  26. package/dist/internal/runtime/{real-cloud-run.d.ts → cloud/real-cloud-run.d.ts} +1 -1
  27. package/dist/internal/runtime/fixtures/fixture-scripts.d.ts +1 -1
  28. package/dist/internal/runtime/fork-agent.d.ts +1 -1
  29. package/dist/internal/runtime/{local-agent-bootstrap.d.ts → local-agent/local-agent-bootstrap.d.ts} +4 -4
  30. package/dist/internal/runtime/{local-agent-dispatch.d.ts → local-agent/local-agent-dispatch.d.ts} +7 -7
  31. package/dist/internal/runtime/{local-agent-memory-hooks.d.ts → local-agent/local-agent-memory-hooks.d.ts} +2 -2
  32. package/dist/internal/runtime/{local-agent-memory-provider.d.ts → local-agent/local-agent-memory-provider.d.ts} +2 -2
  33. package/dist/internal/runtime/{real-local-run.d.ts → local-agent/real-local-run.d.ts} +1 -1
  34. package/dist/internal/runtime/{memory-path-selector.d.ts → memory/memory-path-selector.d.ts} +1 -1
  35. package/dist/internal/runtime/{memory-provider.d.ts → memory/memory-provider.d.ts} +2 -2
  36. package/dist/internal/runtime/{memory-store.d.ts → memory/memory-store.d.ts} +1 -1
  37. package/dist/internal/runtime/skills/subagents-loader.d.ts +1 -0
  38. package/dist/types/agent.d.ts +3 -3
  39. package/dist/types/fork.d.ts +49 -0
  40. package/dist/types/goal-events.d.ts +1 -1
  41. package/package.json +15 -16
  42. /package/dist/internal/{runtime/cloud-agent.d.ts → agent-loop/budget-gate.d.ts} +0 -0
  43. /package/dist/internal/runtime/{budget-tracker.d.ts → budget/budget-tracker.d.ts} +0 -0
  44. /package/dist/internal/runtime/{budget.d.ts → budget/budget.d.ts} +0 -0
  45. /package/dist/internal/runtime/{cloud-run.d.ts → cloud/cloud-agent.d.ts} +0 -0
  46. /package/dist/internal/runtime/{cloud-payload-types.d.ts → cloud/cloud-payload-types.d.ts} +0 -0
  47. /package/dist/internal/runtime/{cloud-tool-parity.d.ts → cloud/cloud-run.d.ts} +0 -0
  48. /package/dist/internal/runtime/{local-agent-memory.d.ts → cloud/cloud-tool-parity.d.ts} +0 -0
  49. /package/dist/internal/runtime/{hooks-executor.d.ts → hooks/hooks-executor.d.ts} +0 -0
  50. /package/dist/internal/runtime/{hooks-frontmatter.d.ts → hooks/hooks-frontmatter.d.ts} +0 -0
  51. /package/dist/internal/runtime/{hooks-source.d.ts → hooks/hooks-source.d.ts} +0 -0
  52. /package/dist/internal/runtime/{local-agent-invalidate.d.ts → local-agent/local-agent-invalidate.d.ts} +0 -0
  53. /package/dist/internal/runtime/{local-agent-memory-direct.d.ts → local-agent/local-agent-memory-direct.d.ts} +0 -0
  54. /package/dist/internal/runtime/{local-agent-send.d.ts → local-agent/local-agent-memory.d.ts} +0 -0
  55. /package/dist/internal/runtime/{local-agent-personality-extensions.d.ts → local-agent/local-agent-personality-extensions.d.ts} +0 -0
  56. /package/dist/internal/runtime/{local-agent-plugins.d.ts → local-agent/local-agent-plugins.d.ts} +0 -0
  57. /package/dist/internal/runtime/{local-agent-runtime-extensions.d.ts → local-agent/local-agent-runtime-extensions.d.ts} +0 -0
  58. /package/dist/internal/runtime/{local-agent.d.ts → local-agent/local-agent-send.d.ts} +0 -0
  59. /package/dist/internal/runtime/{local-agent-task-wrap.d.ts → local-agent/local-agent-task-wrap.d.ts} +0 -0
  60. /package/dist/internal/runtime/{local-run.d.ts → local-agent/local-agent.d.ts} +0 -0
  61. /package/dist/internal/runtime/{skills-manager.d.ts → local-agent/local-run.d.ts} +0 -0
  62. /package/dist/internal/runtime/{agent-session-store.d.ts → session/agent-session-store.d.ts} +0 -0
  63. /package/dist/internal/runtime/{agent-session.d.ts → session/agent-session.d.ts} +0 -0
  64. /package/dist/internal/runtime/{session-types.d.ts → session/session-types.d.ts} +0 -0
  65. /package/dist/internal/runtime/{skill-frontmatter.d.ts → skills/skill-frontmatter.d.ts} +0 -0
  66. /package/dist/internal/runtime/{subagents-loader.d.ts → skills/skills-manager.d.ts} +0 -0
package/CHANGELOG.md CHANGED
@@ -1,7 +1,41 @@
1
1
  # Changelog
2
2
 
3
+ ## 1.9.0
4
+
5
+ ### Minor Changes
6
+
7
+ - 461c020: `createSquad` sequential agent-team convenience + `Agent.batch` boundary validation — first real npm publish.
8
+
9
+ - **`createSquad(options)`** — composes `Workflow.create()` + `agentStep` into a sequential agent team (own identity; not a framework copy). Throws `ConfigurationError` (`invalid_squad` for empty agents, `squad_process_unsupported` for hierarchical). Cross-validation Gap 1.
10
+ - **`Agent.batch`** now fail-fast validates `concurrency` + prompt items at the public boundary (`ConfigurationError` with `invalid_concurrency` / `invalid_batch_item`) before any side effect. Cross-validation Gap 3.
11
+
12
+ Note: these features were tagged as `v1.8.0` but that version's npm publish failed (CI build cycle, fixed in `turbo.json`); `1.8.0` / `1.8.1` on npm predate them. They are published to npm for the first time in `1.9.0`. The `[1.8.0]` CHANGELOG section is retained as the GitHub-released record and is not rewritten.
13
+
3
14
  ## [Unreleased]
4
15
 
16
+ ### Added
17
+
18
+ - `createSquad({ agents })` — a thin convenience for sequential agent teams. Runs agents in order, threading each output into the next agent's prompt; returns `{ result, status, steps }`. Composes `Workflow` + `agentStep` internally (no new orchestration engine). `process: "hierarchical"` throws a guiding `ConfigurationError` (use subagents / `@theokit/sdk-handoff`); empty `agents` → `ConfigurationError(code: "invalid_squad")`.
19
+
20
+ ### Fixed
21
+
22
+ - `Agent.batch` now validates its inputs at the boundary (fail-fast). Invalid `concurrency` (not a positive integer) throws `ConfigurationError(code: "invalid_concurrency")` with a user-facing message, and an empty/non-string prompt item throws `ConfigurationError(code: "invalid_batch_item")` — both BEFORE any credential pool is built or Task is registered. Previously invalid `concurrency` surfaced only deep inside the semaphore with a leaky "permits" message AND after task registration (a dangling Task could be registered with `task: true`), and empty-string prompts flowed silently to `agent.send`. New `validateBatchInput` pre-flight; whitespace-only prompts are intentionally still accepted (non-empty strings; the validator does not judge content). (arch-review cross-validation Gap 3)
23
+
24
+ ### Changed
25
+
26
+ - Reorganized the flat 62-file `src/internal/runtime/` god folder into sub-concern folders (arch-review M4): `local-agent/` (15), `cloud/` (6), `compression/` (6), `hooks/` (4), `budget/` (3), `memory/` (4), `session/` (3), `skills/` (3) — alongside the pre-existing `registry/`, `system-prompt/`, `context/`, `fixtures/`, `plugins/`. 18 cross-cutting singletons (abort-utils, async-_, default-model, fork-agent, run-until, system-prompt, validate-_, etc.) remain at the `runtime/` root (down from 62). Pure file moves + import-path updates (44 files moved, ~100 import sites rewritten incl. 2 lint-allowlist path updates); `internal/runtime` is not an exported subpath and has no tsup entry, so the change is fully internal — no API/behavior change. Full SDK suite GREEN (2629 tests); `madge --circular` unchanged (1 intentional type-only `memory/memory-provider` cycle).
27
+ - Broke 2 of 3 type-only dependency cycles in the public type barrel (arch-review ADR 0001). `ForkOptions`/`ForkResult` moved to a leaf `types/fork.ts`, so `types/agent.ts` no longer imports the `internal/runtime/fork-agent.ts` implementation (`fork-agent.ts` re-exports them for back-compat). Eliminates the `types/agent.ts → fork-agent.ts → {plugins/types,(self)}` cycles. No behavior/API change; `madge --circular` drops from 3 to 1 (the remaining `memory-provider` cycle is a genuine bidirectional type relationship, runtime-safe, left intentionally).
28
+
29
+ ### Fixed
30
+
31
+ - Budget pre-flight gate now **fails closed**: if a custom `budgetTracker.check()` throws (a contract violation — `check()` must return a decision), the agent loop denies the next iteration instead of silently proceeding past budget. Previously a throwing tracker defaulted to `allowed: true` (fail-open), letting a broken cost guard run unbounded. Extracted to a unit-tested `evaluateBudgetGate` helper. (arch-review L1)
32
+
33
+ ## [1.8.1] - 2026-06-12
34
+
35
+ ### Changed
36
+
37
+ - `theokit-init-claude` now merges into existing `.claude/` directories instead of refusing. Adds only missing files, preserves user customizations. Use `--force` to overwrite all files.
38
+
5
39
  ## [1.8.0] - 2026-06-12
6
40
 
7
41
  ### Added
@@ -19,7 +53,7 @@
19
53
 
20
54
  - **Compression config resolution module (T2.2 step 2/N of plan `sdk-superiority-2026-06-07`, ADR D440)**: `resolveCompressionConfig(agentModel, config): ResolvedCompressionConfig` bridges the compression-model-registry (step 1) with the `Agent.create({compression})` override surface. Resolves: (a) compression model — registry default OR explicit `config.model` override; (b) API key — first-match chain: explicit `config.apiKey` → `THEOKIT_COMPRESSION_API_KEY` env var → undefined (signals aux-LLM client to use agent's main CredentialPool); (c) maxAttempts (default 3) + grace (default 1). Pure config resolution — no I/O. 11 new tests at `tests/internal/runtime/compression-config.test.ts`. Foundation for step 3 (aux-llm-client with OTel span) and step 4 (agent-loop wire).
21
55
 
22
- - **Model capabilities introspection registry (T3.10c step 1 of plan `sdk-superiority-2026-06-07`, DR3 #17)**: pre-T3.10c the SDK had no way to query a model's capability flags before sending a request — consumers who sent vision content to a text-only model or structured-output requests to a model without json_schema support got an opaque 400 from the provider. T3.10c step 1 adds the foundation pure-function registry `resolveModelCapabilities(modelId): ModelCapabilities` with typed per-model flags: `supportsVision`, `supportsStructuredOutput`, `supportsToolUse`, `supportsCacheControl`, `maxContextTokens`, `maxOutputTokens`. Resolution algorithm: strip routing prefixes (openrouter/, vertex/, bedrock/), exact-match against the vendor-model registry, then infer vendor from model name (claude-* → anthropic/, gpt-* → openai/, gemini-* → google/) for routing-prefixed lookups. Unknown models return conservative defaults (all false, 4096/4096 token counts) — never optimistic assumptions. Initial registry covers OpenAI (gpt-4o/4o-mini/4-turbo/o1/o3) and Anthropic (claude-opus-4/sonnet-4/3-5-sonnet/3-haiku/3-opus) families. 9 new tests at `tests/internal/llm/model-capabilities.test.ts`. Foundation for step 2 (public `Theokit.models.capabilities()` API) and step 3 (Agent.create boundary gate + `CapabilityNotSupportedError`).
56
+ - **Model capabilities introspection registry (T3.10c step 1 of plan `sdk-superiority-2026-06-07`, DR3 #17)**: pre-T3.10c the SDK had no way to query a model's capability flags before sending a request — consumers who sent vision content to a text-only model or structured-output requests to a model without json_schema support got an opaque 400 from the provider. T3.10c step 1 adds the foundation pure-function registry `resolveModelCapabilities(modelId): ModelCapabilities` with typed per-model flags: `supportsVision`, `supportsStructuredOutput`, `supportsToolUse`, `supportsCacheControl`, `maxContextTokens`, `maxOutputTokens`. Resolution algorithm: strip routing prefixes (openrouter/, vertex/, bedrock/), exact-match against the vendor-model registry, then infer vendor from model name (claude-_ → anthropic/, gpt-_ → openai/, gemini-\* → google/) for routing-prefixed lookups. Unknown models return conservative defaults (all false, 4096/4096 token counts) — never optimistic assumptions. Initial registry covers OpenAI (gpt-4o/4o-mini/4-turbo/o1/o3) and Anthropic (claude-opus-4/sonnet-4/3-5-sonnet/3-haiku/3-opus) families. 9 new tests at `tests/internal/llm/model-capabilities.test.ts`. Foundation for step 2 (public `Theokit.models.capabilities()` API) and step 3 (Agent.create boundary gate + `CapabilityNotSupportedError`).
23
57
 
24
58
  ### Fixed
25
59
 
@@ -64,11 +98,11 @@
64
98
  ### Refactored
65
99
 
66
100
  - **Cycle #4 closed via `types/handoff-descriptor.ts` leaf with TAgent generic (iter-20)**: `HandoffDescriptor` + `HandoffOptions` + `HandoffContext` + `HandoffHistory` + `HandoffResult` moved to a new leaf file. The leaf has `HandoffDescriptor<TInput, TAgent>` parameterized over the target agent shape — no dependency on `SDKAgent` or any other agent.ts type. `types/handoff.ts` re-exports the leaf types with `TAgent = SDKAgent` pinned for back-compat callers. `types/agent.ts` now imports `HandoffDescriptor` from the leaf, breaking the bidirectional `types/agent.ts ↔ types/handoff.ts` edge. madge final state: **2 cycles** (only D428-acknowledged rollup-dts subscribe-at-sub-path remain). Cycle gate threshold tightened ≤ 2.
67
- - **`internal/runtime/plugins/` sub-folder promotion + T5.1 complete (4 of 4, FO#1)**: 2 plugin-* files moved from `internal/runtime/` to `internal/runtime/plugins/` via `git mv`. Direct file count: 50 → 48. **T5.1 complete across 4 iterations (15-18)**: cumulative 21 files moved across fixtures/ (5) + context/ (8) + registry/ (6) + plugins/ (2). `internal/runtime/` direct file count dropped 69 → 48. Audit ideal heuristic is 25; remaining 23-file gap is documented as out-of-scope (no further cohesive 5+ file cluster remains). 254/254 runtime + architecture tests GREEN.
68
- - **`internal/runtime/registry/` sub-folder promotion (T5.1 partial 3 of 4, FO#1)**: 6 *-registry* files moved from `internal/runtime/` to `internal/runtime/registry/` via `git mv`. Direct file count: 56 → 50. T5.1 status PARTIAL — 3 of 4 clusters done (fixtures + context + registry). Remaining: plugins/. Cross-package caller surgery covered: `src/agent.ts`, `src/index.ts`, 5 runtime siblings, 4 test files, 1 dynamic `import("./agent-factory-registry.js")` in `local-agent-runtime-extensions.ts`. 253/253 runtime + architecture tests GREEN; madge unchanged.
69
- - **`internal/runtime/context/` sub-folder promotion (T5.1 partial 2 of 4, FO#1)**: 8 context-* files moved from `internal/runtime/` to `internal/runtime/context/` via `git mv`. Direct file count: 64 → 56. T5.1 status PARTIAL — 2 of 4 clusters done (fixtures + context). Remaining: registry/, plugins/. Sibling callers (`local-agent`, `local-agent-bootstrap`, `system-prompt/local-assembly`) had their imports rewritten to `./context/context-X.js` (or `../context/context-X.js` from system-prompt/). 8 test files updated. 252/252 runtime + architecture tests GREEN.
70
- - **`internal/runtime/fixtures/` sub-folder promotion (T5.1 partial, FO#1)**: 5 fixture-* files moved from `internal/runtime/` to `internal/runtime/fixtures/` via `git mv`. Direct file count: 69 → 64. T5.1 status PARTIAL — fixtures is 1 of 4 clusters (context/registry/plugins remain for follow-up iterations). Internal-only refactor; sibling callers (`cloud-run`, `local-run`, `real-local-run`, `real-cloud-run`) had their imports rewritten to `./fixtures/fixture-X.js`. 251/251 runtime + architecture tests GREEN; madge cycle count unchanged.
71
- - **`internal/memory/storage/` sub-folder promotion (T10.1, FO#3)**: 7 storage-primitive files moved from `internal/memory/` to `internal/memory/storage/` via `git mv` — `markdown-store.ts`, `transcript-store.ts`, `session-loader.ts`, `session-summary-writer.ts`, `reader.ts`, `wiki-loader.ts`, `chunk-markdown.ts`. Direct file count in `internal/memory/`: 28 → 22 (under the 25-file god-folder heuristic). Internal-only refactor; zero public API surface change. All sibling imports, runtime/* callers, and test paths updated in the same slice. Architecture guard `tests/architecture/memory-folder-budget.test.ts` (NEW) asserts the budget. 140/140 architecture + memory tests GREEN; madge cycle count unchanged.
101
+ - **`internal/runtime/plugins/` sub-folder promotion + T5.1 complete (4 of 4, FO#1)**: 2 plugin-\* files moved from `internal/runtime/` to `internal/runtime/plugins/` via `git mv`. Direct file count: 50 → 48. **T5.1 complete across 4 iterations (15-18)**: cumulative 21 files moved across fixtures/ (5) + context/ (8) + registry/ (6) + plugins/ (2). `internal/runtime/` direct file count dropped 69 → 48. Audit ideal heuristic is 25; remaining 23-file gap is documented as out-of-scope (no further cohesive 5+ file cluster remains). 254/254 runtime + architecture tests GREEN.
102
+ - **`internal/runtime/registry/` sub-folder promotion (T5.1 partial 3 of 4, FO#1)**: 6 _-registry_ files moved from `internal/runtime/` to `internal/runtime/registry/` via `git mv`. Direct file count: 56 → 50. T5.1 status PARTIAL — 3 of 4 clusters done (fixtures + context + registry). Remaining: plugins/. Cross-package caller surgery covered: `src/agent.ts`, `src/index.ts`, 5 runtime siblings, 4 test files, 1 dynamic `import("./agent-factory-registry.js")` in `local-agent-runtime-extensions.ts`. 253/253 runtime + architecture tests GREEN; madge unchanged.
103
+ - **`internal/runtime/context/` sub-folder promotion (T5.1 partial 2 of 4, FO#1)**: 8 context-\* files moved from `internal/runtime/` to `internal/runtime/context/` via `git mv`. Direct file count: 64 → 56. T5.1 status PARTIAL — 2 of 4 clusters done (fixtures + context). Remaining: registry/, plugins/. Sibling callers (`local-agent`, `local-agent-bootstrap`, `system-prompt/local-assembly`) had their imports rewritten to `./context/context-X.js` (or `../context/context-X.js` from system-prompt/). 8 test files updated. 252/252 runtime + architecture tests GREEN.
104
+ - **`internal/runtime/fixtures/` sub-folder promotion (T5.1 partial, FO#1)**: 5 fixture-\* files moved from `internal/runtime/` to `internal/runtime/fixtures/` via `git mv`. Direct file count: 69 → 64. T5.1 status PARTIAL — fixtures is 1 of 4 clusters (context/registry/plugins remain for follow-up iterations). Internal-only refactor; sibling callers (`cloud-run`, `local-run`, `real-local-run`, `real-cloud-run`) had their imports rewritten to `./fixtures/fixture-X.js`. 251/251 runtime + architecture tests GREEN; madge cycle count unchanged.
105
+ - **`internal/memory/storage/` sub-folder promotion (T10.1, FO#3)**: 7 storage-primitive files moved from `internal/memory/` to `internal/memory/storage/` via `git mv` — `markdown-store.ts`, `transcript-store.ts`, `session-loader.ts`, `session-summary-writer.ts`, `reader.ts`, `wiki-loader.ts`, `chunk-markdown.ts`. Direct file count in `internal/memory/`: 28 → 22 (under the 25-file god-folder heuristic). Internal-only refactor; zero public API surface change. All sibling imports, runtime/\* callers, and test paths updated in the same slice. Architecture guard `tests/architecture/memory-folder-budget.test.ts` (NEW) asserts the budget. 140/140 architecture + memory tests GREEN; madge cycle count unchanged.
72
106
  - **`dispatchSingleCall` orchestrator split (T10.4, PV#2)**: the 158 LOC body in `internal/agent-loop/tool-dispatch.ts` was decomposed into 7 named single-concern helpers (`applyRepairAndExtractCall`, `vetoFromForkWhitelist`, `startToolCallSpan`, `vetoFromPluginPreHook`, `vetoFromFileHookPreDecision`, `runToolWithLifecycle`, `finalizeSpanAndPostHook`). The orchestrator now reads as a ~28 LOC sequence; the previous complexity-suppression `biome-ignore` directive is removed. Zero public-API surface change; 51/51 regression tests (tool-dispatch + hooks + golden custom-tools) continue to pass.
73
107
 
74
108
  ### Fixed
@@ -120,12 +154,12 @@
120
154
 
121
155
  - **Load + chaos suite scaffold (T0.3 of plan `sdk-superiority-2026-06-07`)**: 6 new test files at `tests/load/{1000-concurrent-sse,leaky-generators,slow-consumer-backpressure}.test.ts` and `tests/chaos/{kill-mid-stream,partition-fs,oom-recovery}.test.ts`. Three harness modules ship alongside: `tests/load/_harness/sse-driver.ts` (in-process SSE driver — NOT autocannon — per SEPA brief § E; tracks p50/p95/p99 latencies + SSE event count via `\n\n` terminators per HTML LS § 9.2.6), `tests/load/_harness/socket-monitor.ts` (Linux-only `ss -tnp` probe with no-op fallback for Mac/Win; CI asserts `closeWaitCount ≤ threshold`), `tests/chaos/_harness/process-control.ts` (child-process spawn + SIGKILL injection per ADR D37 methodology). Today's scaffold uses 100 concurrent SSE (override via `T0_3_CONCURRENCY=1000`); T6.2 ratchets to the full 1000-conn p95 < 200ms perf gate, T6.3 wires the kill-mid-stream chaos against the SDK's real streaming surface, T6.4 wires partition-fs against persistence paths, T6.5 wires OOM against the memory subsystem.
122
156
  - **Real-LLM CI matrix scaffold (T0.2 of plan `sdk-superiority-2026-06-07`)**: 15 env-gated integration test files at `tests/integration/real-llm/{openai,anthropic,openrouter}-{tools,vision,stream,cache,structured}.test.ts`. Each file uses `describe.skipIf(...)` so the suite is silent when the relevant API key is absent. `tests/integration/real-llm/_helpers/real-llm-env.ts` centralizes the provider-key resolver with OpenRouter fallback for non-native scenarios (Anthropic cache stays native-only per SEPA initial brief § C). With keys set the matrix validates the happy path for tool use, streaming, vision content parts, prompt caching, and structured outputs across the 3 routes — expanded depth (cache_read_input_tokens > 0 assertion, parallel tool dispatch, error-retry) lands in T3.5 / T3.8 / T6.1. Default model `openai/gpt-4o-mini` per cost budget. Today: 15/15 files skip cleanly.
123
- - **OTel hot-path wiring foundation (T0.1 of plan `sdk-superiority-2026-06-07`)**: emit canonical spans `agent.create`, `agent.send` (parent), and `memory.recall` when `telemetry.enabled: true`. New closed-enum `internal/telemetry/span-names.ts` (14 names + `SpanName` literal type) anticipates the no-`(string & {})` discipline of T1.1. `TelemetryHandle` interface extended with `recordHistogram(name, valueMs, attrs)` and the OTel `metrics` namespace is lazy-loaded the same way `trace` is (graceful no-op when missing). First histogram name registered: `theokit_memory_recall_duration_ms` (recorded with `userId/namespace/scope/status` dimensions). Integration tests use a real `@opentelemetry/sdk-trace-base` `InMemorySpanExporter` (NOT module mocks) — added as devDep alongside `@opentelemetry/api` and `@opentelemetry/sdk-metrics`. Wiring triad: pillar (a) callers are `Agent.create` (production), `LocalAgent.send` (production), `runActiveMemory` (production); pillar (b) covered by `tests/telemetry/*.test.ts` (8 tests). Remaining acceptance items — `agent.send.<step>` 8 child spans, `tool.call`, `llm.call` spans — deferred to T1.7 / T2.4 / T3.* per SEPA brief (zero plan-deviation).
157
+ - **OTel hot-path wiring foundation (T0.1 of plan `sdk-superiority-2026-06-07`)**: emit canonical spans `agent.create`, `agent.send` (parent), and `memory.recall` when `telemetry.enabled: true`. New closed-enum `internal/telemetry/span-names.ts` (14 names + `SpanName` literal type) anticipates the no-`(string & {})` discipline of T1.1. `TelemetryHandle` interface extended with `recordHistogram(name, valueMs, attrs)` and the OTel `metrics` namespace is lazy-loaded the same way `trace` is (graceful no-op when missing). First histogram name registered: `theokit_memory_recall_duration_ms` (recorded with `userId/namespace/scope/status` dimensions). Integration tests use a real `@opentelemetry/sdk-trace-base` `InMemorySpanExporter` (NOT module mocks) — added as devDep alongside `@opentelemetry/api` and `@opentelemetry/sdk-metrics`. Wiring triad: pillar (a) callers are `Agent.create` (production), `LocalAgent.send` (production), `runActiveMemory` (production); pillar (b) covered by `tests/telemetry/*.test.ts` (8 tests). Remaining acceptance items — `agent.send.<step>` 8 child spans, `tool.call`, `llm.call` spans — deferred to T1.7 / T2.4 / T3.\* per SEPA brief (zero plan-deviation).
124
158
  - **`SecretRedactor` interface** at `internal/security/secret-redactor.ts` (T9.1 of plan `arch-review-fixes-2026-06-06`, ADR D437). Types-only — no runtime exports; canonical `redactSecrets` from `redact.ts` satisfies the interface structurally. Closes AF#16 (Martin Zone of Pain D=0.923) from the 2026-06-06 architecture audit through documentation + minimal abstraction without violating D68/D69/D70/D71/D73 (security primitives stay concrete + stable). Rationale + coupling metrics at `internal/security/README.md`.
125
159
 
126
160
  ### Changed
127
161
 
128
- - **Renamed `internal/runtime/system-prompt/providers/` → `internal/runtime/system-prompt/sources/`** (FO#6, plan `arch-review-fixes-2026-06-06` T10.3). The directory previously shared its basename with `internal/providers/` (LLM provider profiles per ADR D105-D107) — auditor flagged the duplicate folder name as a findability hazard. `sources/` better describes the semantic: these 5 modules are system-prompt *sources* (ActiveMemoryPromptProvider, BasePromptProvider, ContextPromptProvider, MemoryPromptProvider, SkillsPromptProvider), not LLM provider profiles. Internal-only rename; no public API touched. Git-rename detection preserved (5/5 files moved with `git mv`); import paths in `pipeline.ts` + 5 golden tests updated atomically.
162
+ - **Renamed `internal/runtime/system-prompt/providers/` → `internal/runtime/system-prompt/sources/`** (FO#6, plan `arch-review-fixes-2026-06-06` T10.3). The directory previously shared its basename with `internal/providers/` (LLM provider profiles per ADR D105-D107) — auditor flagged the duplicate folder name as a findability hazard. `sources/` better describes the semantic: these 5 modules are system-prompt _sources_ (ActiveMemoryPromptProvider, BasePromptProvider, ContextPromptProvider, MemoryPromptProvider, SkillsPromptProvider), not LLM provider profiles. Internal-only rename; no public API touched. Git-rename detection preserved (5/5 files moved with `git mv`); import paths in `pipeline.ts` + 5 golden tests updated atomically.
129
163
 
130
164
  ### Fixed
131
165
 
@@ -169,23 +203,23 @@
169
203
 
170
204
  ### Security threats addressed
171
205
 
172
- | Threat | Mitigation |
173
- |---|---|
174
- | Resume token replay | Consumer SHOULD bind token to session + rotate per reconnect; SDK ships TTL knob via custom `tracked()` envelope semantics |
175
- | WS connection hijacking | Auth at HTTP upgrade — `WsAdapter.upgrade(ctx, raw)` exposes the `request` so consumer middleware (G11 `defineAuth`) runs BEFORE upgrade. Rejected upgrade returns null → caller responds 401 |
176
- | Subscription input tampering | Zod schema validation BEFORE handler invocation; throws `SubscriptionInputError` carrying issues |
177
- | Resource exhaustion | Per-subscription `AbortSignal`; `SubscriptionRuntime.getActiveConnectionCount()` for ops visibility; consumer wires rate-limit middleware (P#10) at upgrade boundary |
178
- | Sensitive data in logs | Telemetry seam (D34) captures metadata only (`subscriptionName`, `lastEventId`, `connectionId`); never payloads (per D73 redact at output boundaries) |
179
- | Long-lived WS survives token expiry | `ctx.disconnect(code, reason)` lets consumer's auth middleware force-close when session revoked |
206
+ | Threat | Mitigation |
207
+ | ----------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
208
+ | Resume token replay | Consumer SHOULD bind token to session + rotate per reconnect; SDK ships TTL knob via custom `tracked()` envelope semantics |
209
+ | WS connection hijacking | Auth at HTTP upgrade — `WsAdapter.upgrade(ctx, raw)` exposes the `request` so consumer middleware (G11 `defineAuth`) runs BEFORE upgrade. Rejected upgrade returns null → caller responds 401 |
210
+ | Subscription input tampering | Zod schema validation BEFORE handler invocation; throws `SubscriptionInputError` carrying issues |
211
+ | Resource exhaustion | Per-subscription `AbortSignal`; `SubscriptionRuntime.getActiveConnectionCount()` for ops visibility; consumer wires rate-limit middleware (P#10) at upgrade boundary |
212
+ | Sensitive data in logs | Telemetry seam (D34) captures metadata only (`subscriptionName`, `lastEventId`, `connectionId`); never payloads (per D73 redact at output boundaries) |
213
+ | Long-lived WS survives token expiry | `ctx.disconnect(code, reason)` lets consumer's auth middleware force-close when session revoked |
180
214
 
181
215
  ### Multi-runtime compatibility matrix
182
216
 
183
- | Runtime | v1.7.0 | v1.8.x (planned) |
184
- |---|---|---|
185
- | Node 22+ | yes (canonical `ws` peer) | yes |
186
- | Cloudflare Workers | consumer adapter only | yes (`@theokit/sdk-ws-cloudflare`) |
187
- | Bun | consumer adapter only | yes (`@theokit/sdk-ws-bun`) |
188
- | Deno | consumer adapter only | yes (`@theokit/sdk-ws-deno`) |
217
+ | Runtime | v1.7.0 | v1.8.x (planned) |
218
+ | ------------------ | ------------------------- | ---------------------------------- |
219
+ | Node 22+ | yes (canonical `ws` peer) | yes |
220
+ | Cloudflare Workers | consumer adapter only | yes (`@theokit/sdk-ws-cloudflare`) |
221
+ | Bun | consumer adapter only | yes (`@theokit/sdk-ws-bun`) |
222
+ | Deno | consumer adapter only | yes (`@theokit/sdk-ws-deno`) |
189
223
 
190
224
  ### Notes
191
225
 
@@ -231,6 +265,7 @@
231
265
  ### Breaking Changes
232
266
 
233
267
  - **`Workflow` and `Eval` moved out of the main barrel into dedicated sub-paths.** The migration is mechanical (rewrite the `from` string); no behavior changes. `@theokit/sdk` main barrel no longer exports:
268
+
234
269
  - From workflow: `Workflow`, `WorkflowBuilder`, `agentStep`, `fn`, `WorkflowAlreadyRunningError`, `WorkflowCompensateNotImplementedError`, `WorkflowDuplicateStepIdError`, `WorkflowMaxIterationsExceededError`, `WorkflowNotSerializableError`, `WorkflowParallelError`, `WorkflowResumeStepNotFoundError`, `WorkflowSnapshotNotFoundError` — **import from `@theokit/sdk/workflow` instead**.
235
270
  - From eval: `Eval`, `EvalAlreadyRunningError`, `Scorers` — **import from `@theokit/sdk/eval` instead**.
236
271
  - From `types/*`: type aliases for workflow + eval (e.g., `EvalRun`, `Scorer`, `Score`, `EvalOptions`, `EvalAggregate`, `Step`, `FnStep`, etc.) no longer reach the main barrel via `types/index.ts`; surface only through the new sub-paths.
@@ -238,6 +273,7 @@
238
273
  Rationale: Interface Segregation. The barrel exported 17+ feature areas, forcing consumers to pay the DTS cost of `Workflow`+`Eval` even if they only used `Agent`+`Memory`. Sub-paths reduce DTS surface and align with the existing pattern (`@theokit/sdk/cron`, `/tools`, `/path-safety`, `/task-store`, `/errors`).
239
274
 
240
275
  **Migration:**
276
+
241
277
  ```ts
242
278
  // Before
243
279
  import { Workflow, Eval, Scorers } from "@theokit/sdk";
@@ -271,18 +307,22 @@
271
307
  - **`Memory.create({ index: { backend: "lance" } })` is now wired end-to-end.** The `LanceIndex` implementation existed since 2026-05-17 (ADR D43) but `IndexManager.open` did not dispatch — public API accepted `backend: "lance"` silently and always fell through to SQLite. Fix: factory dispatcher in `IndexManager.open` + new portable `MemoryIndex` interface + new `LanceMemoryAdapter` wrapper + `@lancedb/lancedb` declared as optional `peerDependency` (`^0.30.0`).
272
308
 
273
309
  **Migration path:** consumer that wants Lance:
310
+
274
311
  ```bash
275
312
  pnpm add @lancedb/lancedb apache-arrow@^18.1.0
276
313
  ```
314
+
277
315
  ```ts
278
316
  await Memory.create({
279
317
  index: { backend: "lance" },
280
318
  embedding: { provider: "openai", apiKey: process.env.OPENAI_API_KEY },
281
319
  });
282
320
  ```
321
+
283
322
  Default keeps SQLite (zero added deps, zero breaking change vs 1.3.0).
284
323
 
285
324
  **When to opt-in (benchmark evidence — `.claude/knowledge-base/benchmarks/memory-backends-2026-05-31.md`):**
325
+
286
326
  - Lance wins **43x** ingest throughput at 100k facts (59849 ops/s vs SQLite-vec 1875 ops/s).
287
327
  - Lance uses **65% less disk** at 100k (33.8 MB vs 93.5 MB).
288
328
  - SQLite-vec recall p95 stays competitive up to 100k (~25 ms). Use Lance when ingest velocity or disk pressure matters; SQLite handles latency well below 1M facts.
@@ -290,6 +330,7 @@
290
330
  **EC-1 hardening:** new `ConfigurationError({code:"invalid_memory_backend"})` for typo-protection — `backend: "lancedb"` (typo) now throws instead of silently falling back to SQLite. Same hardening for `lance_requires_embedding` and `lance_backend_unavailable` typed errors.
291
331
 
292
332
  **Gotchas:**
333
+
293
334
  - `@lancedb/lancedb` ships prebuilds for linux-x64-gnu, darwin-arm64, darwin-x64, win32-x64-msvc. Alpine/musl/ARM-Linux require `node-gyp` toolchain. SQLite default covers those cases.
294
335
  - Bundlers (Next.js/Vite/webpack/rollup) must externalize `@lancedb/lancedb`:
295
336
  - Next.js: `experimental.serverComponentsExternalPackages: ["@lancedb/lancedb"]`
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { cpSync, existsSync } from "node:fs";
2
+ import { cpSync, existsSync, mkdirSync, readdirSync } from "node:fs";
3
3
  import { join } from "node:path";
4
4
 
5
5
  // EC-1: Node version guard (matches SDK engines.node)
@@ -14,21 +14,55 @@ const cwd = process.cwd();
14
14
  const targetDir = join(cwd, ".claude");
15
15
  const force = process.argv.includes("--force");
16
16
 
17
- // EC-4: Check .claude/, AGENTS.md, CLAUDE.md independently
18
- const conflicts = [];
19
- if (existsSync(targetDir)) conflicts.push(".claude/");
20
- if (existsSync(join(cwd, "AGENTS.md"))) conflicts.push("AGENTS.md");
21
- if (existsSync(join(cwd, "CLAUDE.md"))) conflicts.push("CLAUDE.md");
17
+ /**
18
+ * Merge-copy: recursively copies src into dest, skipping files that already
19
+ * exist in dest. With --force, overwrites everything.
20
+ * Returns { added, skipped } counts.
21
+ */
22
+ function mergeCopy(src, dest) {
23
+ let added = 0;
24
+ let skipped = 0;
25
+ mkdirSync(dest, { recursive: true });
26
+ for (const entry of readdirSync(src, { withFileTypes: true })) {
27
+ const srcPath = join(src, entry.name);
28
+ const destPath = join(dest, entry.name);
29
+ if (entry.isDirectory()) {
30
+ const sub = mergeCopy(srcPath, destPath);
31
+ added += sub.added;
32
+ skipped += sub.skipped;
33
+ } else if (force || !existsSync(destPath)) {
34
+ cpSync(srcPath, destPath);
35
+ added++;
36
+ } else {
37
+ skipped++;
38
+ }
39
+ }
40
+ return { added, skipped };
41
+ }
22
42
 
23
- if (conflicts.length > 0 && !force) {
24
- console.error(`Already exists: ${conflicts.join(", ")}. Use --force to overwrite.`);
25
- process.exit(1);
43
+ // Merge .claude/ directory (skills, rules, settings)
44
+ const dotClaude = mergeCopy(join(templateDir, "dot-claude"), targetDir);
45
+
46
+ // Merge root files (AGENTS.md, CLAUDE.md)
47
+ let rootAdded = 0;
48
+ let rootSkipped = 0;
49
+ for (const file of ["AGENTS.md", "CLAUDE.md"]) {
50
+ const dest = join(cwd, file);
51
+ if (force || !existsSync(dest)) {
52
+ cpSync(join(templateDir, file), dest);
53
+ rootAdded++;
54
+ } else {
55
+ rootSkipped++;
56
+ }
26
57
  }
27
58
 
28
- cpSync(join(templateDir, "dot-claude"), targetDir, { recursive: true });
29
- cpSync(join(templateDir, "AGENTS.md"), join(cwd, "AGENTS.md"));
30
- cpSync(join(templateDir, "CLAUDE.md"), join(cwd, "CLAUDE.md"));
59
+ const totalAdded = dotClaude.added + rootAdded;
60
+ const totalSkipped = dotClaude.skipped + rootSkipped;
31
61
 
32
- console.log("Created .claude/ with TheoKit SDK configuration (15 domain skills).");
33
- console.log("Created AGENTS.md (cross-agent) and CLAUDE.md (Claude Code).");
34
- console.log("\nNext: open Claude Code and start building with TheoKit.");
62
+ if (totalAdded === 0) {
63
+ console.log("All TheoKit SDK files already present. Nothing to add.");
64
+ console.log("Use --force to overwrite existing files.");
65
+ } else {
66
+ console.log(`Added ${totalAdded} file(s). Skipped ${totalSkipped} existing file(s).`);
67
+ console.log("\nNext: open Claude Code and start building with TheoKit.");
68
+ }