@theokit/sdk 2.3.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/CHANGELOG.md +113 -0
  2. package/dist/a2a/index.cjs +103 -48
  3. package/dist/a2a/index.cjs.map +1 -1
  4. package/dist/a2a/index.js +104 -49
  5. package/dist/a2a/index.js.map +1 -1
  6. package/dist/compaction.cjs +78 -0
  7. package/dist/compaction.cjs.map +1 -0
  8. package/dist/compaction.d.cts +76 -0
  9. package/dist/compaction.d.ts +76 -0
  10. package/dist/compaction.js +70 -0
  11. package/dist/compaction.js.map +1 -0
  12. package/dist/{cron-B_H8rn-j.d.cts → cron-B656C3iq.d.cts} +8 -0
  13. package/dist/{cron-DX6HbHxd.d.ts → cron-CM2M9mhB.d.ts} +8 -0
  14. package/dist/cron.cjs +104 -57
  15. package/dist/cron.cjs.map +1 -1
  16. package/dist/cron.d.cts +1 -1
  17. package/dist/cron.d.ts +1 -1
  18. package/dist/cron.js +104 -57
  19. package/dist/cron.js.map +1 -1
  20. package/dist/eval.cjs +296 -73
  21. package/dist/eval.cjs.map +1 -1
  22. package/dist/eval.d.cts +2 -0
  23. package/dist/eval.d.ts +2 -0
  24. package/dist/eval.js +295 -75
  25. package/dist/eval.js.map +1 -1
  26. package/dist/index.cjs +135 -65
  27. package/dist/index.cjs.map +1 -1
  28. package/dist/index.d.cts +42 -7
  29. package/dist/index.d.ts +42 -7
  30. package/dist/index.js +135 -66
  31. package/dist/index.js.map +1 -1
  32. package/dist/internal/agent-loop/loop.d.ts +5 -0
  33. package/dist/internal/eval/code-runner.d.ts +28 -0
  34. package/dist/internal/llm/model-capabilities.d.ts +40 -0
  35. package/dist/internal/llm/model-identifier.d.ts +9 -1
  36. package/dist/internal/llm/model-option.d.ts +38 -0
  37. package/dist/internal/persistence/index.cjs +68 -0
  38. package/dist/internal/persistence/index.cjs.map +1 -1
  39. package/dist/internal/persistence/index.d.cts +1 -0
  40. package/dist/internal/persistence/index.d.ts +1 -0
  41. package/dist/internal/persistence/index.js +65 -1
  42. package/dist/internal/persistence/index.js.map +1 -1
  43. package/dist/internal/persistence/jsonl.d.cts +34 -0
  44. package/dist/internal/persistence/jsonl.d.ts +34 -0
  45. package/dist/internal/runtime/compression/compression-attempt.d.ts +24 -0
  46. package/dist/internal/runtime/compression/compression-config.d.ts +33 -0
  47. package/dist/internal/runtime/compression/compression-decision.d.ts +10 -0
  48. package/dist/internal/runtime/compression/compression-helpers.d.ts +18 -0
  49. package/dist/internal/runtime/compression/compression-model-registry.d.ts +41 -0
  50. package/dist/internal/runtime/compression/compression-summarizer.d.ts +29 -0
  51. package/dist/internal/runtime/context/project-instructions.d.ts +66 -0
  52. package/dist/internal/runtime/context/replay-history.d.ts +43 -0
  53. package/dist/internal/runtime/hooks/hooks-frontmatter.d.ts +1 -1
  54. package/dist/internal/runtime/skills/discover-skills.d.ts +68 -0
  55. package/dist/internal/runtime/skills/skills-block.d.ts +18 -0
  56. package/dist/internal/runtime/skills/subagent-tool-scope.d.ts +25 -0
  57. package/dist/messages.cjs +24 -0
  58. package/dist/messages.cjs.map +1 -0
  59. package/dist/messages.d.cts +33 -0
  60. package/dist/messages.d.ts +33 -0
  61. package/dist/messages.js +20 -0
  62. package/dist/messages.js.map +1 -0
  63. package/dist/models.cjs +233 -0
  64. package/dist/models.cjs.map +1 -0
  65. package/dist/models.d.cts +16 -0
  66. package/dist/models.d.ts +16 -0
  67. package/dist/models.js +228 -0
  68. package/dist/models.js.map +1 -0
  69. package/dist/permission-engine.d.ts +12 -4
  70. package/dist/project.cjs +149 -0
  71. package/dist/project.cjs.map +1 -0
  72. package/dist/project.d.cts +14 -0
  73. package/dist/project.d.ts +14 -0
  74. package/dist/project.js +146 -0
  75. package/dist/project.js.map +1 -0
  76. package/dist/sandbox/index.cjs +71 -1
  77. package/dist/sandbox/index.cjs.map +1 -1
  78. package/dist/sandbox/index.d.cts +1 -0
  79. package/dist/sandbox/index.d.ts +1 -0
  80. package/dist/sandbox/index.js +70 -2
  81. package/dist/sandbox/index.js.map +1 -1
  82. package/dist/sandbox/provision.d.cts +53 -0
  83. package/dist/sandbox/provision.d.ts +53 -0
  84. package/dist/sandbox/shell-escape.d.cts +8 -0
  85. package/dist/sandbox/shell-escape.d.ts +8 -0
  86. package/dist/scorers.d.ts +19 -1
  87. package/dist/skills.cjs +282 -0
  88. package/dist/skills.cjs.map +1 -0
  89. package/dist/skills.d.cts +19 -0
  90. package/dist/skills.d.ts +19 -0
  91. package/dist/skills.js +279 -0
  92. package/dist/skills.js.map +1 -0
  93. package/dist/subagents.cjs +24 -0
  94. package/dist/subagents.cjs.map +1 -0
  95. package/dist/subagents.d.cts +14 -0
  96. package/dist/subagents.d.ts +14 -0
  97. package/dist/subagents.js +21 -0
  98. package/dist/subagents.js.map +1 -0
  99. package/dist/types/agent.d.ts +8 -0
  100. package/dist/types/eval.d.ts +71 -0
  101. package/package.json +74 -14
package/CHANGELOG.md CHANGED
@@ -1,5 +1,107 @@
1
1
  # Changelog
2
2
 
3
+ ## 2.5.0
4
+
5
+ ### Minor Changes
6
+
7
+ - 301d4a3: Eval harness (M6, Tema E): first-party SWE-bench-style primitives over the existing `Eval`/`Scorers`/`SandboxBackend` surface, with zero new runtime dependencies.
8
+
9
+ - `loadJsonl(path, { map? })` from `@theokit/sdk/eval` — generic JSONL dataset loader with line-numbered `JsonlParseError`; the dataset schema is the caller's via `map`.
10
+ - Durable batch: `Eval.run({ persist: { path, key, resume }, classify })` flushes each row the instant it completes and resumes a crashed run by skipping already-persisted rows.
11
+ - `provisionRepo(sandbox, { repoUrl, ref, instanceId })` + `RepoProvisionError` — portable git clone+checkout over `SandboxBackend.execute`.
12
+ - `Scorers.verifyGate({ failToPass, passToPass })` — grades a patch by test exit-code via the sandbox; `EvalRowResult.artifact` carries `{ diff, applies }`.
13
+
14
+ - 32180fe: M7 (Tema F) SDK slice — PermissionEngine default-deny + plugin wiring.
15
+
16
+ - `PermissionEngine` now takes `{ defaultAction }` (default `"allow"`, backward-compatible) — opt into default-deny with `new PermissionEngine(rules, { defaultAction: "deny" })`. `PermissionAction`/`PermissionRule`/`PermissionEngineOptions` types are now exported.
17
+ - New `createPermissionPlugin(engine, opts?)` wires a `PermissionEngine` into the `definePlugin` `pre_tool_call` veto (the engine was previously exported-but-unwired): `deny` blocks, `ask` defers to `opts.onAsk` (fail-closed block by default), `allow` passes.
18
+
19
+ ## 2.4.0
20
+
21
+ ### Minor Changes
22
+
23
+ - a21949f: M1-5 — `@theokit/sdk/messages`: pure readers over the `SDKMessage` stream (plan `m1-sdkmessage-readers`).
24
+
25
+ Consumers reading the `SDKMessage` stream had to hand-roll a wire-event mapper. The SDK now ships three pure readers on a dedicated sub-path, promoting the proven first-party hand-roll onto the SDK's own types:
26
+
27
+ - `assistantText(msg)` — concatenates an assistant message's `text` blocks; `""` for any non-assistant message (or one with no text). `tool_use` blocks are ignored.
28
+ - `extractToolUses(msg)` — returns the assistant message's `ToolUseBlock`s; `[]` for non-assistant. Reads the assistant content blocks, NOT the separate `SDKToolUseMessage` (`type:"tool_call"`) lifecycle event.
29
+ - `costAmountUsd(cost)` — reads `RunResult.cost.amountUsd` preserving `number | undefined` verbatim. An unknown cost stays `undefined` (never coerced to `$0`), distinct from a real `$0` subscription-included route — the cost-honesty contract (ADR D377).
30
+
31
+ All three are pure (no I/O, inputs never mutated). Zero new dependencies.
32
+
33
+ - fb268f9: M1-4 — fire the `stop` file-based hook + honor `feedback` as a bounded re-prompt (plan `m1-stop-hook-reflection`).
34
+
35
+ The `HookEvent "stop"` was declared but never dispatched. A local agent now fires `stop` once when it finishes a turn cleanly (not on an errored run or an iteration-ceiling truncation). A `stop` hook returning `{"decision":"feedback","feedback":"…"}` re-prompts the agent with that text and the loop continues — a bounded reflection ladder capped at `MAX_STOP_FEEDBACK_ATTEMPTS` (2), mirroring the existing nudge ceiling, so a hook cannot loop forever. `allow`/no-hook finish normally; `deny` at `stop` finishes (the answer already exists). Reuses the existing `HooksExecutor` — zero new dependencies. Hooks remain file-based (no programmatic callback).
36
+
37
+ - 5b8c9e7: M2-1 — `@theokit/sdk/compaction`: public compaction / context-management helpers (plan `m2-compaction-public-api`).
38
+
39
+ Promotes the SDK's compaction capability to a public sub-path so consumers can manage the context window without reaching into `internal/`:
40
+
41
+ - `compactTranscript(messages, { keepRecent = 6, summarize? })` — keep the last `keepRecent` turns verbatim, preserve leading system turns, and either summarize the older window (via an optional `summarize` callback that can wire the SDK's internal LLM summarizer) or drop it. Reuses the internal `selectCompressionWindow` (no second algorithm). Never mutates its input.
42
+ - `buildCheckpoint(label?)` / `filterFromLatestCheckpoint(messages)` / `CHECKPOINT_MARKER` — a string-sentinel checkpoint: mark a point in a transcript and later filter back to the turns after the most recent marker.
43
+ - `isContextOverflowError(err)` — true iff `err` is a `TheokitAgentError` (or subclass) reporting the typed `context_too_long` code (reads both `err.code` and `err.metadata?.code`; no brittle message regex).
44
+
45
+ Operates on the SDK's own `CompressibleMessage` type (re-exported). Zero new dependencies.
46
+
47
+ - 1cf9c16: M2-4 — per-model capability catalog public + OpenRouter slug-suffix fix (plan `m2-model-capabilities`).
48
+
49
+ - **New `@theokit/sdk/models` subpath.** `resolveModelCapabilities(modelId): ModelCapabilities` (previously dead `@internal`) is now public — returns a model's capability flags + `maxContextTokens`/`maxOutputTokens` from a static, OFFLINE catalog (pure, sync, no network). Pair `maxContextTokens` with `@theokit/sdk/compaction`'s `shouldCompact`.
50
+ - **Fix:** OpenRouter `:variant` suffixes (`:free`/`:nitro`/`:floor`/`:beta`) were not stripped before the catalog lookup, so `openrouter/openai/gpt-4o:free` fell back to conservative defaults (4096) instead of the real 128k window. The suffix is now stripped (alongside the existing routing-prefix strip); unknown models still get conservative defaults.
51
+
52
+ Zero new dependencies.
53
+
54
+ - b31283c: M2-2 — pre-call token estimate + compaction decision (plan `m2-token-estimate`).
55
+
56
+ Two pure, zero-dependency helpers on the `@theokit/sdk/compaction` subpath (siblings of `compactTranscript`/`isContextOverflowError`):
57
+
58
+ - `estimateTokens(text)` — a tokenizer-free token estimate via the ~4-chars-per-token heuristic (`ceil(text.length / 4)`): `""` → 0, any non-empty text → ≥ 1. A cheap PRE-CALL gate, not exact tokenization.
59
+ - `shouldCompact({ estimated, contextWindow, buffer })` — decide BEFORE sending whether to compact: `true` when `estimated >= contextWindow - buffer`. Pure; the caller supplies the window (e.g. from `resolveModelCapabilities`), keeping it decoupled from any per-model catalog.
60
+
61
+ No tokenizer dependency.
62
+
63
+ - 29b1c8c: M4-2 — hierarchical project-instruction reader/writer (plan `m4-project-instructions`).
64
+
65
+ New `@theokit/sdk/project` subpath:
66
+
67
+ - `readProjectInstructions(cwd, options?)` — walk up from `cwd` collecting `<dir>/<filename>` (default `THEO.md`; configurable) up to the filesystem root (or `options.stopDir`). Returns `{ files, content }`: `files` are the found files nearest-first (`{ path, content }[]`, read in full), `content` is a reduction chosen by `options.scope` — `"nearest"` (innermost) or `"merged"` (all joined root-first, nearest text last). NEVER throws — missing/unreadable/non-file paths are skipped.
68
+ - `writeProjectInstructions(cwd, content, options?)` — write `<cwd>/<filename>` atomically (temp + fsync + rename). Fails loud on write errors (unlike the best-effort reader).
69
+
70
+ Composes the SDK's hardened `walkUpForFile` discovery + the atomic `replaceFileAtomic` writer (Rule 9). Zero new dependencies.
71
+
72
+ - f9be17a: M4-1 — first-party skill discovery + `<skills>` block (plan `m4-skills-discovery`).
73
+
74
+ New `@theokit/sdk/skills` subpath exposing two pure first-party primitives the SDK runtime already uses internally:
75
+
76
+ - `discoverSkills(dir, options?)` — discover `<dir>/<name>/SKILL.md` files under an ARBITRARY directory (not a hardcoded `.theokit/skills` root), parsing strict YAML frontmatter (`name`/`description` required; `category`/`dependencies` optional) and returning `Skill[]` (the skill BODY is never included). A subdirectory whose realpath escapes `dir` via symlink is skipped (symlink-escape guard, reusing `@theokit/sdk/path-safety`). NEVER throws — a missing/unreadable/non-directory path yields `[]`. A `SKILL.md` with malformed frontmatter is excluded and optionally reported via `options.onInvalidSkill`; a directory WITHOUT a `SKILL.md` is silently skipped.
77
+ - `buildSkillsBlock(skills)` — render the prompt-injection-safe `<skills>` system-prompt block (name + description XML-escaped); returns `undefined` for an empty list.
78
+
79
+ The internal `SkillsManager` (`.theokit/skills` discovery) and `SkillsPromptProvider` (`<skills>` injection) now delegate to these primitives — single source of truth, behavior preserved (golden + contract tests unchanged). Zero new dependencies.
80
+
81
+ - f2265d7: M4-6 — sub-agent tool scoping via `AgentDefinition.tools` (plan `m4-tool-scoping`).
82
+
83
+ - `AgentDefinition` gains an optional `tools?: string[]` — a tool-name whitelist. When set, the sub-agent may ONLY call tools whose canonical (post-repair, lowercase) name is in the list; any other call is vetoed at dispatch. Absent/empty → unscoped (inherits the parent's full toolset). Backward-compatible.
84
+ - `.theokit/agents/*.md` subagents can declare it as a comma/space-separated frontmatter field (`tools: read_file, list_dir`).
85
+ - New `@theokit/sdk/subagents` subpath: `subagentToolWhitelist(definition): Set<string> | undefined` + `withSubagentToolScope(definition, fn)` enforce the whitelist via the SDK's existing `withToolWhitelist` dispatch veto — the same enforcement `Agent.fork`'s `allowedTools` uses, NOT `PermissionEngine`. A `tools: ["read_file"]` sub-agent provably cannot call `write_file`/`shell_exec`.
86
+
87
+ Zero new dependencies.
88
+
89
+ - f1de451: M5-8 — public `parseModelId` + `humanizeModelName` + `toModelOption` on `@theokit/sdk/models` (plan `m5-model-option`).
90
+
91
+ - `parseModelId(modelId): { provider, name }` is now public (promoted from `@internal`) — splits the provider prefix from the model name, OpenRouter-routing + tag-suffix aware.
92
+ - `humanizeModelName(modelId): string` — a best-effort, deterministic human label: strips the routing/vendor prefix, title-cases the core model segment (known acronyms upper-cased), and appends an OpenRouter `:variant` in parens (`"openrouter/openai/gpt-4o:free"` → `"GPT 4o (free)"`). Not vendor-canonical marketing names.
93
+ - `toModelOption(modelId): { value, label, provider }` — a dropdown-ready entry composing the two.
94
+
95
+ Lets `@theokit/ui` model selectors + the `create-theokit` template stop hand-rolling slug→label. Zero new dependencies.
96
+
97
+ ### Patch Changes
98
+
99
+ - 1abda16: M2-3 — `context_too_long` reaches the run boundary (plan `m2-context-overflow-boundary`).
100
+
101
+ Fixes a code-at-boundary bug: the loop captured the error code from the error's top-level `.code`, which the provider mappers set to a PROVIDER-PREFIXED string (`anthropic_context_too_long` / `${providerId}_context_too_long`), while the CANONICAL `ErrorCode` (`context_too_long`) lives on `metadata.code`. So `RunResult.error.code` surfaced the prefixed form and a consumer checking `result.error.code === "context_too_long"` missed it.
102
+
103
+ `registerLoopError` now prefers `cause.metadata?.code` over the top-level `.code`, so the canonical code reaches the boundary for every provider (verified by a 400-context-overflow contract test through the real `mapAnthropicError`/`mapOpenAICompatibleError`). The prefixed form remains on the thrown `TheokitAgentError.code` for telemetry. Set-once invariant preserved; top-level `.code` fallback unchanged when there is no `metadata.code`.
104
+
3
105
  ## 2.3.0
4
106
 
5
107
  ### Minor Changes
@@ -92,8 +194,19 @@
92
194
 
93
195
  ## [Unreleased]
94
196
 
197
+ ### Fixed
198
+
199
+ - **`context_too_long` reaches the run boundary (M2-3).** `registerLoopError` now prefers the canonical `cause.metadata?.code` over the provider-prefixed top-level `.code`, so `RunResult.error.code` is `context_too_long` (not `anthropic_context_too_long`) for every provider. Set-once + top-level fallback preserved.
200
+
201
+ ### Added
202
+
203
+ - **Pre-call token estimate + compaction decision (M2-2).** `estimateTokens(text)` (tokenizer-free ~4-chars/token; `""`→0, non-empty→≥1) + `shouldCompact({estimated,contextWindow,buffer})` (`true` when `estimated >= contextWindow - buffer`; pure, caller supplies the window) on the `@theokit/sdk/compaction` subpath. No tokenizer dep.
204
+ - **Per-model capability catalog public + OpenRouter slug-suffix fix (M2-4).** New `@theokit/sdk/models` subpath: `resolveModelCapabilities(modelId)` (was `@internal`) — pure/sync/offline capability flags + `maxContextTokens`/`maxOutputTokens`. Fixes an OpenRouter `:variant` suffix lookup miss (fell back to 4096 instead of the real window).
205
+
95
206
  ### Added
96
207
 
208
+ - `@theokit/sdk/compaction` — public compaction / context-management helpers so you manage the context window without reaching into `internal/`. `compactTranscript(messages, { keepRecent = 6, summarize? })` keeps the last `keepRecent` turns, preserves leading system turns, and either summarizes the older window (via an optional callback wiring the internal LLM summarizer) or drops it — reusing the internal compaction window (no second algorithm), never mutating its input. `buildCheckpoint`/`filterFromLatestCheckpoint`/`CHECKPOINT_MARKER` give a string-sentinel checkpoint to bound replay to "since the last checkpoint". `isContextOverflowError(err)` is true for a `TheokitAgentError` reporting the typed `context_too_long` code (checks `code` + `metadata.code`; no message regex). Operates on the SDK's own `CompressibleMessage` (re-exported); zero new dependencies. (M2-1)
209
+ - `@theokit/sdk/messages` — pure readers over the `SDKMessage` stream so you stop hand-rolling a wire-event mapper. `assistantText(msg)` concatenates an assistant message's text (`""` for non-assistant), `extractToolUses(msg)` returns its tool-use blocks (`[]` for non-assistant; reads the assistant content blocks, not the separate `tool_call` lifecycle event), and `costAmountUsd(cost)` reads `RunResult.cost.amountUsd` preserving `number | undefined` verbatim — an unknown cost stays `undefined`, never silently coerced to `$0` (cost-honesty, ADR D377). Zero new dependencies. (#34)
97
210
  - `createSquad({ agents })` — a thin convenience for sequential agent teams. Runs agents in order, threading each output into the next agent's prompt; returns `{ result, status, steps }`. Composes `Workflow` + `agentStep` internally (no new orchestration engine). `process: "hierarchical"` throws a guiding `ConfigurationError` (use subagents / `@theokit/sdk-handoff`); empty `agents` → `ConfigurationError(code: "invalid_squad")`.
98
211
 
99
212
  ### Fixed
@@ -4922,6 +4922,8 @@ function parseSubagentMarkdown(raw, filename) {
4922
4922
  if (fields.model !== void 0) {
4923
4923
  definition.model = fields.model === "inherit" ? "inherit" : { id: fields.model };
4924
4924
  }
4925
+ const tools = fields.tools?.split(/[\s,]+/).map((t) => t.trim()).filter((t) => t.length > 0);
4926
+ if (tools !== void 0 && tools.length > 0) definition.tools = tools;
4925
4927
  return { name, definition };
4926
4928
  }
4927
4929
  function splitFrontmatter2(raw, filename) {
@@ -5125,25 +5127,33 @@ ${lines.join("\n")}
5125
5127
  }
5126
5128
  });
5127
5129
 
5130
+ // src/internal/runtime/skills/skills-block.ts
5131
+ function buildSkillsBlock(skills) {
5132
+ if (skills.length === 0) return void 0;
5133
+ const lines = skills.map(
5134
+ (skill) => ` - ${escapeBlockBody(skill.name)}: ${escapeBlockBody(skill.description)}`
5135
+ );
5136
+ return `<skills>
5137
+ ${lines.join("\n")}
5138
+ </skills>`;
5139
+ }
5140
+ var init_skills_block = __esm({
5141
+ "src/internal/runtime/skills/skills-block.ts"() {
5142
+ init_escape();
5143
+ }
5144
+ });
5145
+
5128
5146
  // src/internal/runtime/system-prompt/sources/skills-provider.ts
5129
5147
  var SkillsPromptProvider;
5130
5148
  var init_skills_provider = __esm({
5131
5149
  "src/internal/runtime/system-prompt/sources/skills-provider.ts"() {
5132
- init_escape();
5150
+ init_skills_block();
5133
5151
  SkillsPromptProvider = class {
5134
5152
  id = "skills";
5135
5153
  priority = 20;
5136
5154
  contribute(ctx) {
5137
5155
  if (ctx.skillsAutoInject === false) return Promise.resolve(void 0);
5138
- if (ctx.skills.length === 0) return Promise.resolve(void 0);
5139
- const lines = ctx.skills.map((skill) => {
5140
- const name = escapeBlockBody(skill.name);
5141
- const description = escapeBlockBody(skill.description);
5142
- return ` - ${name}: ${description}`;
5143
- });
5144
- return Promise.resolve(`<skills>
5145
- ${lines.join("\n")}
5146
- </skills>`);
5156
+ return Promise.resolve(buildSkillsBlock(ctx.skills));
5147
5157
  }
5148
5158
  };
5149
5159
  }
@@ -6399,36 +6409,71 @@ var init_skill_frontmatter = __esm({
6399
6409
  init_yaml_frontmatter();
6400
6410
  }
6401
6411
  });
6402
- function tryParseSkill(raw, fallbackName, source) {
6412
+ async function discoverSkills(dir, options) {
6413
+ let entries;
6414
+ try {
6415
+ entries = await readWorkspaceDir(dir, "skills_read_error", "skills directory");
6416
+ } catch {
6417
+ return [];
6418
+ }
6419
+ const skills = [];
6420
+ for (const entry of entries) {
6421
+ if (!entry.isDirectory()) continue;
6422
+ let skillDir;
6423
+ try {
6424
+ skillDir = safePathJoin(dir, entry.name);
6425
+ assertNoSymlinkEscape(skillDir, dir);
6426
+ } catch {
6427
+ continue;
6428
+ }
6429
+ const skillPath = path.join(skillDir, "SKILL.md");
6430
+ let raw;
6431
+ try {
6432
+ raw = await promises.readFile(skillPath, "utf8");
6433
+ } catch {
6434
+ continue;
6435
+ }
6436
+ const skill = tryParseSkill(raw, entry.name, skillPath, options);
6437
+ if (skill !== void 0) skills.push(skill);
6438
+ }
6439
+ return skills;
6440
+ }
6441
+ function tryParseSkill(raw, fallbackName, source, options) {
6403
6442
  try {
6404
6443
  const frontmatter = parseSkillFrontmatter(raw, fallbackName);
6405
- const metadata = {
6444
+ const skill = {
6406
6445
  name: frontmatter.name,
6407
6446
  description: frontmatter.description,
6408
6447
  source
6409
6448
  };
6410
- if (frontmatter.category !== void 0) metadata.category = frontmatter.category;
6411
- if (frontmatter.dependencies !== void 0) metadata.dependencies = frontmatter.dependencies;
6412
- return metadata;
6449
+ if (frontmatter.category !== void 0) skill.category = frontmatter.category;
6450
+ if (frontmatter.dependencies !== void 0) skill.dependencies = frontmatter.dependencies;
6451
+ return skill;
6413
6452
  } catch (cause) {
6414
6453
  if (cause instanceof ConfigurationError) {
6415
- const code = cause.code ?? "unknown";
6416
- process.stderr.write(
6417
- `[theokit-sdk] skill ${fallbackName} skipped (${code}): ${cause.message}
6418
- `
6419
- );
6454
+ options?.onInvalidSkill?.({
6455
+ name: fallbackName,
6456
+ source,
6457
+ code: cause.code ?? "unknown",
6458
+ message: cause.message
6459
+ });
6420
6460
  return void 0;
6421
6461
  }
6422
6462
  throw cause;
6423
6463
  }
6424
6464
  }
6425
- var SkillsManager;
6426
- var init_skills_manager = __esm({
6427
- "src/internal/runtime/skills/skills-manager.ts"() {
6465
+ var init_discover_skills = __esm({
6466
+ "src/internal/runtime/skills/discover-skills.ts"() {
6428
6467
  init_errors();
6429
6468
  init_path_guard();
6430
6469
  init_workspace_dir();
6431
6470
  init_skill_frontmatter();
6471
+ }
6472
+ });
6473
+ var SkillsManager;
6474
+ var init_skills_manager = __esm({
6475
+ "src/internal/runtime/skills/skills-manager.ts"() {
6476
+ init_discover_skills();
6432
6477
  SkillsManager = class {
6433
6478
  constructor(cwd, _enabled, settingSourcesIncludeProject) {
6434
6479
  this.cwd = cwd;
@@ -6445,28 +6490,15 @@ var init_skills_manager = __esm({
6445
6490
  await this.refresh();
6446
6491
  }
6447
6492
  async refresh() {
6448
- this.skills = [];
6449
6493
  const skillsRoot = path.join(this.cwd, ".theokit", "skills");
6450
- const entries = await readWorkspaceDir(skillsRoot, "skills_read_error", "skills directory");
6451
- for (const entry of entries) {
6452
- if (!entry.isDirectory()) continue;
6453
- let skillDir;
6454
- try {
6455
- skillDir = safePathJoin(skillsRoot, entry.name);
6456
- assertNoSymlinkEscape(skillDir, skillsRoot);
6457
- } catch {
6458
- continue;
6459
- }
6460
- const skillPath = path.join(skillDir, "SKILL.md");
6461
- let raw;
6462
- try {
6463
- raw = await promises.readFile(skillPath, "utf8");
6464
- } catch {
6465
- continue;
6494
+ this.skills = await discoverSkills(skillsRoot, {
6495
+ onInvalidSkill: (info) => {
6496
+ process.stderr.write(
6497
+ `[theokit-sdk] skill ${info.name} skipped (${info.code}): ${info.message}
6498
+ `
6499
+ );
6466
6500
  }
6467
- const metadata = tryParseSkill(raw, entry.name, skillPath);
6468
- if (metadata !== void 0) this.skills.push(metadata);
6469
- }
6501
+ });
6470
6502
  }
6471
6503
  list() {
6472
6504
  return Promise.resolve(this.skills);
@@ -6928,6 +6960,7 @@ async function initLoopContext(inputs) {
6928
6960
  finalStatus: "finished",
6929
6961
  usage: new UsageAccumulator(),
6930
6962
  nudgeAttempts: 0,
6963
+ stopFeedbackAttempts: 0,
6931
6964
  ...memoryProviderHandle !== void 0 ? { memoryProviderHandle } : {},
6932
6965
  ...memorySystemPromptAdditions !== void 0 ? { memorySystemPromptAdditions } : {}
6933
6966
  };
@@ -7083,8 +7116,9 @@ function registerLoopError(ctx, cause) {
7083
7116
  if (ctx.error !== void 0) return;
7084
7117
  const rawMessage = cause?.message;
7085
7118
  const message = typeof rawMessage === "string" ? rawMessage : cause instanceof Error ? cause.message : String(cause);
7119
+ const metaCode = cause?.metadata?.code;
7086
7120
  const rawCode = cause?.code;
7087
- const code = typeof rawCode === "string" ? rawCode : void 0;
7121
+ const code = typeof metaCode === "string" ? metaCode : typeof rawCode === "string" ? rawCode : void 0;
7088
7122
  ctx.error = code !== void 0 ? { message, code, cause } : { message, cause };
7089
7123
  }
7090
7124
  async function runCollectorLoop(generator, inputs, ctx) {
@@ -8167,6 +8201,28 @@ function shouldNudgeAndContinue(ctx, llmOutput) {
8167
8201
  });
8168
8202
  return true;
8169
8203
  }
8204
+ async function reflectAfterStop(inputs, ctx) {
8205
+ const result = await inputs.hooks.run({
8206
+ event: "stop",
8207
+ agentId: inputs.agentId,
8208
+ runId: inputs.runId
8209
+ });
8210
+ if (result.blocked) return false;
8211
+ if (ctx.stopFeedbackAttempts >= MAX_STOP_FEEDBACK_ATTEMPTS) return false;
8212
+ const feedback = result.decisions.find(
8213
+ (d) => d.decision === "feedback" && (d.feedback ?? "").length > 0
8214
+ )?.feedback;
8215
+ if (feedback === void 0) return false;
8216
+ ctx.stopFeedbackAttempts += 1;
8217
+ ctx.messages.push({ role: "user", content: [{ type: "text", text: feedback }] });
8218
+ return true;
8219
+ }
8220
+ async function finishOrReflect(inputs, ctx, llmOutput) {
8221
+ if (shouldNudgeAndContinue(ctx, llmOutput)) return "continue";
8222
+ if (await reflectAfterStop(inputs, ctx)) return "continue";
8223
+ ctx.finalStatus = "finished";
8224
+ return "done";
8225
+ }
8170
8226
  async function runIteration(inputs, ctx) {
8171
8227
  const llmOutput = await streamLlmTurn(inputs, ctx);
8172
8228
  accumulateUsage(ctx.usage, llmOutput);
@@ -8200,9 +8256,7 @@ async function continueOrTerminate(inputs, ctx, llmOutput) {
8200
8256
  await emitAssistantTextStep(inputs, ctx, llmOutput.text);
8201
8257
  }
8202
8258
  if (llmOutput.stopReason !== "tool_use" || llmOutput.toolCalls.length === 0) {
8203
- if (shouldNudgeAndContinue(ctx, llmOutput)) return "continue";
8204
- ctx.finalStatus = "finished";
8205
- return "done";
8259
+ return finishOrReflect(inputs, ctx, llmOutput);
8206
8260
  }
8207
8261
  ctx.messages.push(buildAssistantTurn(llmOutput.text, llmOutput.toolCalls));
8208
8262
  const toolResults = await dispatchTools(inputs, ctx.tools, llmOutput.toolCalls, ctx.events);
@@ -8225,7 +8279,7 @@ async function continueOrTerminate(inputs, ctx, llmOutput) {
8225
8279
  pushToolConversationSteps(ctx, llmOutput.toolCalls, toolResults);
8226
8280
  return handleToolErrorContinuation(inputs, ctx, toolResults);
8227
8281
  }
8228
- var MAX_NUDGE_ATTEMPTS;
8282
+ var MAX_NUDGE_ATTEMPTS, MAX_STOP_FEEDBACK_ATTEMPTS;
8229
8283
  var init_loop = __esm({
8230
8284
  "src/internal/agent-loop/loop.ts"() {
8231
8285
  init_budget();
@@ -8238,6 +8292,7 @@ var init_loop = __esm({
8238
8292
  init_tool_dispatch();
8239
8293
  init_usage_and_cost();
8240
8294
  MAX_NUDGE_ATTEMPTS = 2;
8295
+ MAX_STOP_FEEDBACK_ATTEMPTS = 2;
8241
8296
  }
8242
8297
  });
8243
8298