@inbrowser/agent 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/diagnostics/index.d.ts +5 -0
- package/dist/diagnostics/index.d.ts.map +1 -0
- package/dist/diagnostics/index.js +3 -0
- package/dist/diagnostics/index.js.map +1 -0
- package/dist/diagnostics/timing.d.ts +48 -0
- package/dist/diagnostics/timing.d.ts.map +1 -0
- package/dist/diagnostics/timing.js +85 -0
- package/dist/diagnostics/timing.js.map +1 -0
- package/dist/diagnostics/truthfulness.d.ts +36 -0
- package/dist/diagnostics/truthfulness.d.ts.map +1 -0
- package/dist/diagnostics/truthfulness.js +180 -0
- package/dist/diagnostics/truthfulness.js.map +1 -0
- package/dist/dispatch-memoization.d.ts +84 -0
- package/dist/dispatch-memoization.d.ts.map +1 -0
- package/dist/dispatch-memoization.js +197 -0
- package/dist/dispatch-memoization.js.map +1 -0
- package/dist/eval/comparison-report.d.ts +164 -0
- package/dist/eval/comparison-report.d.ts.map +1 -0
- package/dist/eval/comparison-report.js +316 -0
- package/dist/eval/comparison-report.js.map +1 -0
- package/dist/eval/fixture.d.ts +74 -0
- package/dist/eval/fixture.d.ts.map +1 -0
- package/dist/eval/fixture.js +217 -0
- package/dist/eval/fixture.js.map +1 -0
- package/dist/eval/index.d.ts +13 -0
- package/dist/eval/index.d.ts.map +1 -0
- package/dist/eval/index.js +7 -0
- package/dist/eval/index.js.map +1 -0
- package/dist/eval/load-node.d.ts +16 -0
- package/dist/eval/load-node.d.ts.map +1 -0
- package/dist/eval/load-node.js +58 -0
- package/dist/eval/load-node.js.map +1 -0
- package/dist/eval/metric-collector.d.ts +209 -0
- package/dist/eval/metric-collector.d.ts.map +1 -0
- package/dist/eval/metric-collector.js +293 -0
- package/dist/eval/metric-collector.js.map +1 -0
- package/dist/eval/run-record.d.ts +76 -0
- package/dist/eval/run-record.d.ts.map +1 -0
- package/dist/eval/run-record.js +32 -0
- package/dist/eval/run-record.js.map +1 -0
- package/dist/eval/runner.d.ts +140 -0
- package/dist/eval/runner.d.ts.map +1 -0
- package/dist/eval/runner.js +310 -0
- package/dist/eval/runner.js.map +1 -0
- package/dist/eval/spec-framework.d.ts +113 -0
- package/dist/eval/spec-framework.d.ts.map +1 -0
- package/dist/eval/spec-framework.js +100 -0
- package/dist/eval/spec-framework.js.map +1 -0
- package/dist/eval/spec-helpers.d.ts +245 -0
- package/dist/eval/spec-helpers.d.ts.map +1 -0
- package/dist/eval/spec-helpers.js +605 -0
- package/dist/eval/spec-helpers.js.map +1 -0
- package/dist/index.d.ts +24 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +11 -1
- package/dist/index.js.map +1 -1
- package/dist/node.d.ts +1 -0
- package/dist/node.d.ts.map +1 -1
- package/dist/node.js +1 -0
- package/dist/node.js.map +1 -1
- package/dist/planner-executor.d.ts +132 -0
- package/dist/planner-executor.d.ts.map +1 -0
- package/dist/planner-executor.js +274 -0
- package/dist/planner-executor.js.map +1 -0
- package/dist/skill-catalog.d.ts +81 -0
- package/dist/skill-catalog.d.ts.map +1 -0
- package/dist/skill-catalog.js +388 -0
- package/dist/skill-catalog.js.map +1 -0
- package/dist/skill-router.d.ts +95 -0
- package/dist/skill-router.d.ts.map +1 -0
- package/dist/skill-router.js +130 -0
- package/dist/skill-router.js.map +1 -0
- package/dist/strategy.d.ts +20 -1
- package/dist/strategy.d.ts.map +1 -1
- package/dist/strategy.js +333 -13
- package/dist/strategy.js.map +1 -1
- package/dist/tools.d.ts +15 -1
- package/dist/tools.d.ts.map +1 -1
- package/dist/tools.js +18 -0
- package/dist/tools.js.map +1 -1
- package/dist/types/strategy.d.ts +48 -0
- package/dist/types/strategy.d.ts.map +1 -1
- package/dist/types/tools.d.ts +18 -0
- package/dist/types/tools.d.ts.map +1 -1
- package/dist/types/trace.d.ts +59 -9
- package/dist/types/trace.d.ts.map +1 -1
- package/dist/types/trace.js +5 -3
- package/dist/types/trace.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,YAAY,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACrE,OAAO,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAEvD,YAAY,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAEjE,YAAY,EACV,YAAY,EACZ,eAAe,EACf,aAAa,EACb,UAAU,EACV,WAAW,EACX,aAAa,EACb,UAAU,EACV,OAAO,GACR,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAEnD,YAAY,EACV,WAAW,EACX,QAAQ,EACR,QAAQ,IAAI,YAAY,EACxB,WAAW,EACX,WAAW,EACX,iBAAiB,GAClB,MAAM,iBAAiB,CAAC;AAEzB,YAAY,EACV,SAAS,EACT,gBAAgB,EAChB,SAAS,EACT,WAAW,EACX,SAAS,EACT,eAAe,EACf,UAAU,EACV,QAAQ,GACT,MAAM,gBAAgB,CAAC;AACxB,OAAO,EAAE,yBAAyB,EAAE,MAAM,kBAAkB,CAAC;AAC7D,YAAY,EACV,cAAc,EACd,oBAAoB,EACpB,mBAAmB,EACnB,iBAAiB,EACjB,cAAc,EACd,mBAAmB,EACnB,iBAAiB,GAClB,MAAM,kBAAkB,CAAC;AAE1B,YAAY,EACV,WAAW,EACX,WAAW,EACX,UAAU,EACV,QAAQ,EACR,YAAY,EACZ,YAAY,EACZ,aAAa,EACb,MAAM,EACN,WAAW,EACX,YAAY,GACb,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,kBAAkB,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,YAAY,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACrE,OAAO,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAEvD,YAAY,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AAEjE,YAAY,EACV,YAAY,EACZ,eAAe,EACf,aAAa,EACb,UAAU,EACV,WAAW,EACX,aAAa,EACb,UAAU,EACV,OAAO,GACR,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAEnD,YAAY,EACV,WAAW,EACX,QAAQ,EACR,QAAQ,IAAI,YAAY,EACxB,WAAW,EACX,WAAW,EACX,iBAAiB,GAClB,MAAM,iBAAiB,CAAC;AAEzB,YAAY,EACV,SAAS,EACT,gBAAgB,EAChB,SAAS,EACT,WAAW,EACX,SAAS,EACT,eAAe,EACf,UAAU,EACV,QAAQ,GACT,MAAM,gBAAgB,CAAC;AACxB,OAAO,EAAE,yBAAyB,EAAE,MAAM,kBAAkB,CAAC;AAC7D,YAAY,EACV,cAAc,EACd,oBAAoB,EACpB,mBAAmB,EACnB,iBAAiB,EACjB,cAAc,EACd,mBAAmB,EACnB,iBAAiB,GAClB,MAAM,kBAAkB,CAAC;AAE1B,YAAY,EACV,WAAW,EACX,WAAW,EACX,UAAU,EACV,QAAQ,EACR,YAAY,EACZ,YAAY,EACZ,aAAa,EACb,MAAM,EACN,WAAW,EACX,YAAY,GACb,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,kBAAkB,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,EAAE,MAAM,YAAY,CAAC;AAExF,YAAY,EACV,gBAAgB,EAChB,WAAW,EACX,SAAS,EACT,gBAAgB,GACjB,MAAM,2BAA2B,CAAC;AACnC,OAAO,EAAE,sBAAsB,EAAE,MAAM,2BAA2B,CAAC;AAEnE,YAAY,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAC5D,OAAO,EAAE,oBAAoB,EAAE,MAAM,yBAAyB,CAAC;AAE/D,YAAY,EACV,YAAY,EACZ,kBAAkB,EAClB,YAAY,GACb,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EAAE,kBAAkB,EAAE,MAAM,cAAc,CAAC;AAElD,YAAY,EACV,aAAa,EACb,eAAe,EACf,gBAAgB,EAChB,aAAa,GACd,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,uBAAuB,EAAE,MAAM,eAAe,CAAC;AAExD,YAAY,EAAE,QAAQ,EAAE,YAAY,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AACpF,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AAElF,YAAY,EAAE,cAAc,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AACpF,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C,YAAY,EACV,sBAAsB,EACtB,WAAW,EACX,mBAAmB,EACnB,gBAAgB,GACjB,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EAAE,6BAA6B,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC;AAE5F,YAAY,EACV,MAAM,EACN,UAAU,EACV,eAAe,EACf,gBAAgB,EAChB,yBAAyB,EACzB,mBAAmB,GACpB,MAAM,kBAAkB,CAAC;AAE1B,YAAY,EAAE,aAAa,EAAE,MAAM,yBAAyB,CAAC;AAC7D,OAAO,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AAE1D,YAAY,EACV,gBAAgB,EAChB,wBAAwB,EACxB,kBAAkB,GACnB,MAAM,wBAAwB,CAAC;AAChC,OAAO,EAAE,mBAAmB,EAAE,MAAM,wBAAwB,CAAC;AAE7D,YAAY,EACV,gBAAgB,EAChB,WAAW,EACX,SAAS,EACT,MAAM,EACN,YAAY,EACZ,UAAU,EACV,oBAAoB,EACpB,WAAW,EACX,eAAe,EACf,gBAAgB,GACjB,MAAM,iBAAiB,CAAC;AACzB,OAAO,EACL,iBAAiB,EACjB,WAAW,EACX,iCAAiC,EACjC,iCAAiC,EACjC,iCAAiC,EACjC,4DAA4D,EAC5D,mDAAmD,EACnD,2BAA2B,EAC3B,oCAAoC,EACpC,qCAAqC,EACrC,kBAAkB,EAClB,uBAAuB,EACvB,kBAAkB,EAClB,YAAY,EACZ,yBAAyB,EACzB,yBAAyB,EACzB,wBAAwB,EACxB,gDAAgD,EAChD,YAAY,EACZ,uCAAuC,EACvC,gBAAgB,EAChB,mBAAmB,EACnB,oBAAoB,EACpB,mBAAmB,EACnB,0BAA0B,EAC1B,2BAA2B,EAC3B,eAAe,GAChB,MAAM,iBAAiB,CAAC;AAQzB,YAAY,EAAE,SAAS,IAAI,aAAa,EAAE,MAAM,sBAAsB,CAAC;AACvE,YAAY,EACV,eAAe,EACf,eAAe,EACf,kBAAkB,GACnB,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,0BAA0B,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAKvF,YAAY,EACV,aAAa,EACb,iBAAiB,EACjB,mBAAmB,EACnB,YAAY,EACZ,YAAY,GACb,MAAM,4BAA4B,CAAC;AACpC,OAAO,EACL,eAAe,EACf,cAAc,EACd,mBAAmB,GACpB,MAAM,4BAA4B,CAAC;AAOpC,YAAY,EACV,mBAAmB,EACnB,iBAAiB,EACjB,eAAe,EACf,oBAAoB,EACpB,gBAAgB,EAChB,aAAa,EACb,QAAQ,GACT,MAAM,6BAA6B,CAAC;AACrC,OAAO,EACL,QAAQ,EACR,cAAc,EACd,UAAU,EACV,cAAc,GACf,MAAM,6BAA6B,CAAC;AAErC,YAAY,EACV,gBAAgB,EAChB,eAAe,EACf,aAAa,GACd,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EACL,kBAAkB,EAClB,sBAAsB,EACtB,WAAW,GACZ,MAAM,cAAc,CAAC;AAEtB,YAAY,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,WAAW,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AACtE,OAAO,EAAE,yBAAyB,EAAE,MAAM,cAAc,CAAC;AAEzD,YAAY,EACV,aAAa,EACb,eAAe,GAChB,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,gBAAgB,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AAErE,YAAY,EACV,eAAe,EACf,SAAS,EACT,YAAY,EACZ,eAAe,GAChB,MAAM,kBAAkB,CAAC;AAO1B,YAAY,EAAE,kBAAkB,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAC;AAE5E,YAAY,EACV,SAAS,EACT,eAAe,EACf,MAAM,EACN,iBAAiB,GAClB,MAAM,mBAAmB,CAAC;AAE3B,YAAY,EACV,aAAa,EACb,mBAAmB,EACnB,aAAa,EACb,cAAc,EACd,SAAS,EACT,UAAU,GACX,MAAM,mBAAmB,CAAC;AAS3B,OAAO,EAAE,YAAY,EAAE,gBAAgB,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAClF,YAAY,EAAE,mBAAmB,EAAE,MAAM,kBAAkB,CAAC;AAC5D,OAAO,EAAE,YAAY,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AACxE,YAAY,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACxE,OAAO,EACL,sBAAsB,EACtB,aAAa,EACb,aAAa,EACb,SAAS,EACT,YAAY,GACb,MAAM,mBAAmB,CAAC;AAC3B,YAAY,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAOzD,YAAY,EAAE,QAAQ,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AAClE,YAAY,EAAE,UAAU,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -15,10 +15,20 @@
|
|
|
15
15
|
export { EMPTY_WORKSPACE } from './types/workspace.js';
|
|
16
16
|
export { EMPTY_RUNTIME } from './types/runtime.js';
|
|
17
17
|
export { legacyProviderAsLlmClient } from './llm-adapter.js';
|
|
18
|
-
export { createToolRegistry, createDispatch } from './tools.js';
|
|
18
|
+
export { createToolRegistry, createDispatch, isParallelSafe, isPure } from './tools.js';
|
|
19
|
+
export { createMemoizedDispatch } from './dispatch-memoization.js';
|
|
19
20
|
export { DEFAULT_CAPABILITIES } from './types/capabilities.js';
|
|
20
21
|
export { createAgentSession } from './session.js';
|
|
21
22
|
export { createReactLoopStrategy } from './strategy.js';
|
|
23
|
+
export { SKILL_CATALOG, getSkillEntry, listSkillNames } from './skill-catalog.js';
|
|
24
|
+
export { routeSkill } from './skill-router.js';
|
|
25
|
+
export { createPlannerExecutorStrategy, defaultKeywordRouter } from './planner-executor.js';
|
|
26
|
+
export { turnTimingTable } from './diagnostics/timing.js';
|
|
27
|
+
export { analyzeTruthfulness } from './diagnostics/index.js';
|
|
28
|
+
export { CUSTOM_SPEC_NAMES, SKILL_NAMES, SPEC_FINAL_RULES_EXCLUDES_LITERAL, SPEC_FINAL_RULES_INCLUDES_LITERAL, SPEC_FINAL_RUNTIME_RUN_SUMMARY_OK, SPEC_GAME_RULES_SIMULATOR_ACCEPTS_POSITIVE_AND_REJECTS_CHEAT, SPEC_PYRIC_AGENTS_LINT_CLEAN_AND_RULE_REJECTS_CHEAT, SPEC_REPORT_MENTIONS_ALL_OF, SPEC_REPORT_MENTIONS_AT_LEAST_ONE_OF, SPEC_TRACE_CONTAINS_TOOL_CALL_BY_NAME, STARTER_SPEC_NAMES, applyWorkspaceOverrides, createSpecRegistry, evaluateSpec, finalRulesExcludesLiteral, finalRulesIncludesLiteral, finalRuntimeRunSummaryOk, gameRulesSimulatorAcceptsPositiveAndRejectsCheat, parseFixture, pyricAgentsLintCleanAndRuleRejectsCheat, registerAllSpecs, registerCustomSpecs, registerStarterSpecs, reportMentionsAllOf, reportMentionsAtLeastOneOf, traceContainsToolCallByName, validateFixture, } from './eval/index.js';
|
|
29
|
+
export { defaultSystemPromptBuilder, runFixture, runFixtures } from './eval/runner.js';
|
|
30
|
+
export { aggregateTrials, collectMetrics, extractTrialMetrics, } from './eval/metric-collector.js';
|
|
31
|
+
export { POLARITY, compareMetrics, renderJson, renderMarkdown, } from './eval/comparison-report.js';
|
|
22
32
|
export { computeTurnMetrics, createMetricsCollector, findPricing, } from './metrics.js';
|
|
23
33
|
export { noopStorage, createMemoryStorage } from './types/storage.js';
|
|
24
34
|
export { createLocalStorageAdapter } from './storage.js';
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAGH,OAAO,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAcvD,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAqBnD,OAAO,EAAE,yBAAyB,EAAE,MAAM,kBAAkB,CAAC;AAuB7D,OAAO,EAAE,kBAAkB,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAGH,OAAO,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAcvD,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAqBnD,OAAO,EAAE,yBAAyB,EAAE,MAAM,kBAAkB,CAAC;AAuB7D,OAAO,EAAE,kBAAkB,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,EAAE,MAAM,YAAY,CAAC;AAQxF,OAAO,EAAE,sBAAsB,EAAE,MAAM,2BAA2B,CAAC;AAGnE,OAAO,EAAE,oBAAoB,EAAE,MAAM,yBAAyB,CAAC;AAO/D,OAAO,EAAE,kBAAkB,EAAE,MAAM,cAAc,CAAC;AAQlD,OAAO,EAAE,uBAAuB,EAAE,MAAM,eAAe,CAAC;AAGxD,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AAGlF,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAQ/C,OAAO,EAAE,6BAA6B,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC;AAY5F,OAAO,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AAO1D,OAAO,EAAE,mBAAmB,EAAE,MAAM,wBAAwB,CAAC;AAc7D,OAAO,EACL,iBAAiB,EACjB,WAAW,EACX,iCAAiC,EACjC,iCAAiC,EACjC,iCAAiC,EACjC,4DAA4D,EAC5D,mDAAmD,EACnD,2BAA2B,EAC3B,oCAAoC,EACpC,qCAAqC,EACrC,kBAAkB,EAClB,uBAAuB,EACvB,kBAAkB,EAClB,YAAY,EACZ,yBAAyB,EACzB,yBAAyB,EACzB,wBAAwB,EACxB,gDAAgD,EAChD,YAAY,EACZ,uCAAuC,EACvC,gBAAgB,EAChB,mBAAmB,EACnB,oBAAoB,EACpB,mBAAmB,EACnB,0BAA0B,EAC1B,2BAA2B,EAC3B,eAAe,GAChB,MAAM,iBAAiB,CAAC;AAczB,OAAO,EAAE,0BAA0B,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAYvF,OAAO,EACL,eAAe,EACf,cAAc,EACd,mBAAmB,GACpB,MAAM,4BAA4B,CAAC;AAgBpC,OAAO,EACL,QAAQ,EACR,cAAc,EACd,UAAU,EACV,cAAc,GACf,MAAM,6BAA6B,CAAC;AAOrC,OAAO,EACL,kBAAkB,EAClB,sBAAsB,EACtB,WAAW,GACZ,MAAM,cAAc,CAAC;AAGtB,OAAO,EAAE,WAAW,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AACtE,OAAO,EAAE,yBAAyB,EAAE,MAAM,cAAc,CAAC;AAMzD,OAAO,EAAE,gBAAgB,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AA+BrE,kEAAkE;AAClE,oEAAoE;AACpE,wDAAwD;AACxD,yEAAyE;AACzE,oCAAoC;AACpC,EAAE;AACF,kEAAkE;AAClE,0EAA0E;AAC1E,OAAO,EAAE,YAAY,EAAE,gBAAgB,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAElF,OAAO,EAAE,YAAY,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAC;AAExE,OAAO,EACL,sBAAsB,EACtB,aAAa,EACb,aAAa,EACb,SAAS,EACT,YAAY,GACb,MAAM,mBAAmB,CAAC"}
|
package/dist/node.d.ts
CHANGED
|
@@ -19,4 +19,5 @@
|
|
|
19
19
|
*/
|
|
20
20
|
export { openEventLog, defaultProjectLogDir } from './events/log.js';
|
|
21
21
|
export { generateEventId, buildRollbackEvent, HOST_AGENT_ID, EventTooLargeError, DEFAULT_MAX_EVENT_BYTES, } from './events/log-core.js';
|
|
22
|
+
export { FixtureLoadError, loadFixture, loadFixtures } from './eval/load-node.js';
|
|
22
23
|
//# sourceMappingURL=node.d.ts.map
|
package/dist/node.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"node.d.ts","sourceRoot":"","sources":["../src/node.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,EAAE,YAAY,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AACrE,OAAO,EACL,eAAe,EACf,kBAAkB,EAClB,aAAa,EACb,kBAAkB,EAClB,uBAAuB,GACxB,MAAM,sBAAsB,CAAC"}
|
|
1
|
+
{"version":3,"file":"node.d.ts","sourceRoot":"","sources":["../src/node.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,EAAE,YAAY,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AACrE,OAAO,EACL,eAAe,EACf,kBAAkB,EAClB,aAAa,EACb,kBAAkB,EAClB,uBAAuB,GACxB,MAAM,sBAAsB,CAAC;AAE9B,OAAO,EAAE,gBAAgB,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC"}
|
package/dist/node.js
CHANGED
|
@@ -19,4 +19,5 @@
|
|
|
19
19
|
*/
|
|
20
20
|
export { openEventLog, defaultProjectLogDir } from './events/log.js';
|
|
21
21
|
export { generateEventId, buildRollbackEvent, HOST_AGENT_ID, EventTooLargeError, DEFAULT_MAX_EVENT_BYTES, } from './events/log-core.js';
|
|
22
|
+
export { FixtureLoadError, loadFixture, loadFixtures } from './eval/load-node.js';
|
|
22
23
|
//# sourceMappingURL=node.js.map
|
package/dist/node.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"node.js","sourceRoot":"","sources":["../src/node.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,EAAE,YAAY,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AACrE,OAAO,EACL,eAAe,EACf,kBAAkB,EAClB,aAAa,EACb,kBAAkB,EAClB,uBAAuB,GACxB,MAAM,sBAAsB,CAAC"}
|
|
1
|
+
{"version":3,"file":"node.js","sourceRoot":"","sources":["../src/node.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,EAAE,YAAY,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AACrE,OAAO,EACL,eAAe,EACf,kBAAkB,EAClB,aAAa,EACb,kBAAkB,EAClB,uBAAuB,GACxB,MAAM,sBAAsB,CAAC;AAE9B,OAAO,EAAE,gBAAgB,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC"}
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `createPlannerExecutorStrategy()` — phase five `AgentStrategy`.
|
|
3
|
+
*
|
|
4
|
+
* Takes a user prompt, routes it to a skill from the catalog,
|
|
5
|
+
* materializes that skill's prescribed plan, and executes each step
|
|
6
|
+
* with a bounded ReAct sub-loop. Scratch is dropped between steps:
|
|
7
|
+
* the next step starts fresh with a short summary of each prior step,
|
|
8
|
+
* not the full message history. The working window stays flat across
|
|
9
|
+
* a long workflow, which is the hypothesis this strategy exists to
|
|
10
|
+
* test.
|
|
11
|
+
*
|
|
12
|
+
* Lifecycle:
|
|
13
|
+
*
|
|
14
|
+
* 1. Route the prompt against the catalog. When the router returns
|
|
15
|
+
* no match, either fall back to `createReactLoopStrategy()`
|
|
16
|
+
* (default) or yield an `error` event (when `fallbackToReact`
|
|
17
|
+
* is `false`).
|
|
18
|
+
* 2. Look up the catalog entry. If the router returned a name not
|
|
19
|
+
* in the catalog (shouldn't happen for a catalog-derived router,
|
|
20
|
+
* defensive), behave as the no-match case.
|
|
21
|
+
* 3. Emit a `custom` event `'plan_started'` with the skill name and
|
|
22
|
+
* the step ids.
|
|
23
|
+
* 4. For each step, in order:
|
|
24
|
+
* a. Emit `custom` `'step_started'` with `{ stepId,
|
|
25
|
+
* description }`.
|
|
26
|
+
* b. Build a step-scoped system prompt that wraps the original
|
|
27
|
+
* system prompt with a suffix naming the step ("You are on
|
|
28
|
+
* step X of Y: <description>. Prior step summaries follow.")
|
|
29
|
+
* and one synthetic user message per prior step's summary.
|
|
30
|
+
* c. Drive a bounded `createReactLoopStrategy({ maxTurns })`
|
|
31
|
+
* sub-loop. Every inner `text`, `thinking`, `tool_call`,
|
|
32
|
+
* `tool_result`, and `turn_complete` event is streamed
|
|
33
|
+
* through unchanged so the host's UI continues working.
|
|
34
|
+
* d. After the sub-loop completes, capture the concatenated
|
|
35
|
+
* assistant text the inner loop emitted this step, call
|
|
36
|
+
* `summarizeStep(stepId, transcript)` to get a short
|
|
37
|
+
* summary, and emit `custom` `'step_completed'` with
|
|
38
|
+
* `{ stepId, summary }`.
|
|
39
|
+
* 5. Emit `custom` `'plan_completed'` and return.
|
|
40
|
+
*
|
|
41
|
+
* Tracer: per-step inner loops generate their own `llm_request` /
|
|
42
|
+
* `llm_response` / `turn_dispatch_complete` trace events. The trace's
|
|
43
|
+
* `requestId` carries a `${turnId}#${stepId}#${iteration}` shape so
|
|
44
|
+
* the eval harness can read per-step iteration counts off the trace
|
|
45
|
+
* without changes.
|
|
46
|
+
*
|
|
47
|
+
* What is intentionally NOT done in v1:
|
|
48
|
+
*
|
|
49
|
+
* - Per-step verifier gating. The catalog's `verifier?` is read but
|
|
50
|
+
* not enforced. The executor always advances to the next step.
|
|
51
|
+
* Gating progression on verifier outcomes is a follow-up.
|
|
52
|
+
* - Per-step tool subsetting. Every step sees the same dispatcher
|
|
53
|
+
* and the same tool list. A future enhancement can scope tools by
|
|
54
|
+
* step.
|
|
55
|
+
* - Smarter step summarization. The default `summarizeStep` is
|
|
56
|
+
* `transcript.slice(0, 400)` — dumb truncation. Override with a
|
|
57
|
+
* real summarizer if needed.
|
|
58
|
+
*/
|
|
59
|
+
import type { SkillName } from './eval/fixture.js';
|
|
60
|
+
import type { SkillCatalog } from './skill-catalog.js';
|
|
61
|
+
import type { AgentStrategy } from './types/strategy.js';
|
|
62
|
+
/**
|
|
63
|
+
* Minimal router contract this strategy depends on. The sibling
|
|
64
|
+
* `strategy/skill-router` branch ships a concrete `routeSkill`
|
|
65
|
+
* function with the same signature; until that lands, callers can
|
|
66
|
+
* pass any function with this shape, and this module's
|
|
67
|
+
* `defaultKeywordRouter` does a trivial substring scan against the
|
|
68
|
+
* catalog's `triggerHints` so the strategy works end-to-end on its
|
|
69
|
+
* own.
|
|
70
|
+
*
|
|
71
|
+
* Once the router branch merges, downstream code can replace the
|
|
72
|
+
* default by passing `router: routeSkill` (or by wrapping it) into
|
|
73
|
+
* the options. No change is needed in this file.
|
|
74
|
+
*/
|
|
75
|
+
export interface SkillRouterMatch {
|
|
76
|
+
/** The chosen skill. Must be a name present in the catalog. */
|
|
77
|
+
skill: SkillName;
|
|
78
|
+
/** Higher is better. Consumers may surface it; the strategy uses
|
|
79
|
+
* only its presence (non-null match) to gate execution. */
|
|
80
|
+
score?: number;
|
|
81
|
+
}
|
|
82
|
+
export interface SkillRouterDecision {
|
|
83
|
+
/** The top match, or `null` when no entry crossed the router's
|
|
84
|
+
* internal threshold. */
|
|
85
|
+
match: SkillRouterMatch | null;
|
|
86
|
+
}
|
|
87
|
+
export type SkillRouter = (prompt: string, options?: {
|
|
88
|
+
catalog?: SkillCatalog;
|
|
89
|
+
}) => SkillRouterDecision;
|
|
90
|
+
export interface PlannerExecutorOptions {
|
|
91
|
+
/** Catalog override. Defaults to `SKILL_CATALOG`. */
|
|
92
|
+
catalog?: SkillCatalog;
|
|
93
|
+
/** Per-step bounded turn budget for the inner ReAct sub-loop.
|
|
94
|
+
* Default 4. */
|
|
95
|
+
stepMaxTurns?: number;
|
|
96
|
+
/** When the router returns no match, fall back to
|
|
97
|
+
* `createReactLoopStrategy()` for the rest of the turn. Default
|
|
98
|
+
* `true`. When `false`, the strategy yields an `error` event and
|
|
99
|
+
* returns. */
|
|
100
|
+
fallbackToReact?: boolean;
|
|
101
|
+
/**
|
|
102
|
+
* Summarize a single step's transcript (concatenated assistant
|
|
103
|
+
* text) into a short string that seeds the next step's context.
|
|
104
|
+
* Default: dumb truncation to 400 characters.
|
|
105
|
+
*/
|
|
106
|
+
summarizeStep?: (stepId: string, transcript: string) => string;
|
|
107
|
+
/**
|
|
108
|
+
* Router function. Defaults to a keyword scan against the catalog's
|
|
109
|
+
* `triggerHints`. The sibling `strategy/skill-router` branch ships
|
|
110
|
+
* a more sophisticated `routeSkill`; once that lands, callers can
|
|
111
|
+
* pass it here.
|
|
112
|
+
*/
|
|
113
|
+
router?: SkillRouter;
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Default keyword router. Scores each catalog entry by counting
|
|
117
|
+
* `triggerHints` substring hits in the lowercased prompt and returns
|
|
118
|
+
* the top scorer (or `null` when every entry scored zero). Catalog
|
|
119
|
+
* order breaks ties — the same ordering the sibling
|
|
120
|
+
* `strategy/skill-router` branch documents for its production router.
|
|
121
|
+
*
|
|
122
|
+
* This is intentionally tiny. The point is that the executor can run
|
|
123
|
+
* end-to-end (and be unit-tested) without depending on the router
|
|
124
|
+
* branch landing first.
|
|
125
|
+
*/
|
|
126
|
+
export declare const defaultKeywordRouter: SkillRouter;
|
|
127
|
+
/**
|
|
128
|
+
* Build the planner-executor strategy. Returns an `AgentStrategy`
|
|
129
|
+
* with `id: 'planner-executor'`.
|
|
130
|
+
*/
|
|
131
|
+
export declare function createPlannerExecutorStrategy(options?: PlannerExecutorOptions): AgentStrategy;
|
|
132
|
+
//# sourceMappingURL=planner-executor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"planner-executor.d.ts","sourceRoot":"","sources":["../src/planner-executor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAyDG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AAEnD,OAAO,KAAK,EAAE,YAAY,EAAqB,MAAM,oBAAoB,CAAC;AAG1E,OAAO,KAAK,EAAE,aAAa,EAAmC,MAAM,qBAAqB,CAAC;AAE1F;;;;;;;;;;;;GAYG;AACH,MAAM,WAAW,gBAAgB;IAC/B,+DAA+D;IAC/D,KAAK,EAAE,SAAS,CAAC;IACjB;gEAC4D;IAC5D,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,mBAAmB;IAClC;8BAC0B;IAC1B,KAAK,EAAE,gBAAgB,GAAG,IAAI,CAAC;CAChC;AAED,MAAM,MAAM,WAAW,GAAG,CACxB,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE;IAAE,OAAO,CAAC,EAAE,YAAY,CAAA;CAAE,KACjC,mBAAmB,CAAC;AAEzB,MAAM,WAAW,sBAAsB;IACrC,qDAAqD;IACrD,OAAO,CAAC,EAAE,YAAY,CAAC;IACvB;qBACiB;IACjB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB;;;mBAGe;IACf,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B;;;;OAIG;IACH,aAAa,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,KAAK,MAAM,CAAC;IAC/D;;;;;OAKG;IACH,MAAM,CAAC,EAAE,WAAW,CAAC;CACtB;AAgBD;;;;;;;;;;GAUG;AACH,eAAO,MAAM,oBAAoB,EAAE,WAoBlC,CAAC;AAEF;;;GAGG;AACH,wBAAgB,6BAA6B,CAAC,OAAO,GAAE,sBAA2B,GAAG,aAAa,CA8KjG"}
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* `createPlannerExecutorStrategy()` — phase five `AgentStrategy`.
|
|
3
|
+
*
|
|
4
|
+
* Takes a user prompt, routes it to a skill from the catalog,
|
|
5
|
+
* materializes that skill's prescribed plan, and executes each step
|
|
6
|
+
* with a bounded ReAct sub-loop. Scratch is dropped between steps:
|
|
7
|
+
* the next step starts fresh with a short summary of each prior step,
|
|
8
|
+
* not the full message history. The working window stays flat across
|
|
9
|
+
* a long workflow, which is the hypothesis this strategy exists to
|
|
10
|
+
* test.
|
|
11
|
+
*
|
|
12
|
+
* Lifecycle:
|
|
13
|
+
*
|
|
14
|
+
* 1. Route the prompt against the catalog. When the router returns
|
|
15
|
+
* no match, either fall back to `createReactLoopStrategy()`
|
|
16
|
+
* (default) or yield an `error` event (when `fallbackToReact`
|
|
17
|
+
* is `false`).
|
|
18
|
+
* 2. Look up the catalog entry. If the router returned a name not
|
|
19
|
+
* in the catalog (shouldn't happen for a catalog-derived router,
|
|
20
|
+
* defensive), behave as the no-match case.
|
|
21
|
+
* 3. Emit a `custom` event `'plan_started'` with the skill name and
|
|
22
|
+
* the step ids.
|
|
23
|
+
* 4. For each step, in order:
|
|
24
|
+
* a. Emit `custom` `'step_started'` with `{ stepId,
|
|
25
|
+
* description }`.
|
|
26
|
+
* b. Build a step-scoped system prompt that wraps the original
|
|
27
|
+
* system prompt with a suffix naming the step ("You are on
|
|
28
|
+
* step X of Y: <description>. Prior step summaries follow.")
|
|
29
|
+
* and one synthetic user message per prior step's summary.
|
|
30
|
+
* c. Drive a bounded `createReactLoopStrategy({ maxTurns })`
|
|
31
|
+
* sub-loop. Every inner `text`, `thinking`, `tool_call`,
|
|
32
|
+
* `tool_result`, and `turn_complete` event is streamed
|
|
33
|
+
* through unchanged so the host's UI continues working.
|
|
34
|
+
* d. After the sub-loop completes, capture the concatenated
|
|
35
|
+
* assistant text the inner loop emitted this step, call
|
|
36
|
+
* `summarizeStep(stepId, transcript)` to get a short
|
|
37
|
+
* summary, and emit `custom` `'step_completed'` with
|
|
38
|
+
* `{ stepId, summary }`.
|
|
39
|
+
* 5. Emit `custom` `'plan_completed'` and return.
|
|
40
|
+
*
|
|
41
|
+
* Tracer: per-step inner loops generate their own `llm_request` /
|
|
42
|
+
* `llm_response` / `turn_dispatch_complete` trace events. The trace's
|
|
43
|
+
* `requestId` carries a `${turnId}#${stepId}#${iteration}` shape so
|
|
44
|
+
* the eval harness can read per-step iteration counts off the trace
|
|
45
|
+
* without changes.
|
|
46
|
+
*
|
|
47
|
+
* What is intentionally NOT done in v1:
|
|
48
|
+
*
|
|
49
|
+
* - Per-step verifier gating. The catalog's `verifier?` is read but
|
|
50
|
+
* not enforced. The executor always advances to the next step.
|
|
51
|
+
* Gating progression on verifier outcomes is a follow-up.
|
|
52
|
+
* - Per-step tool subsetting. Every step sees the same dispatcher
|
|
53
|
+
* and the same tool list. A future enhancement can scope tools by
|
|
54
|
+
* step.
|
|
55
|
+
* - Smarter step summarization. The default `summarizeStep` is
|
|
56
|
+
* `transcript.slice(0, 400)` — dumb truncation. Override with a
|
|
57
|
+
* real summarizer if needed.
|
|
58
|
+
*/
|
|
59
|
+
import { SKILL_CATALOG, getSkillEntry } from './skill-catalog.js';
|
|
60
|
+
import { createReactLoopStrategy } from './strategy.js';
|
|
61
|
+
const DEFAULT_STEP_MAX_TURNS = 4;
|
|
62
|
+
const DEFAULT_SUMMARY_LIMIT = 400;
|
|
63
|
+
/**
|
|
64
|
+
* Default step summarizer. Trims and truncates. Sufficient for v1 —
|
|
65
|
+
* the next step's context only needs a rough recall of what the prior
|
|
66
|
+
* step concluded, not a faithful reproduction.
|
|
67
|
+
*/
|
|
68
|
+
function defaultSummarizeStep(_stepId, transcript) {
|
|
69
|
+
const trimmed = transcript.trim();
|
|
70
|
+
if (trimmed.length <= DEFAULT_SUMMARY_LIMIT)
|
|
71
|
+
return trimmed;
|
|
72
|
+
return `${trimmed.slice(0, DEFAULT_SUMMARY_LIMIT)}…`;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Default keyword router. Scores each catalog entry by counting
|
|
76
|
+
* `triggerHints` substring hits in the lowercased prompt and returns
|
|
77
|
+
* the top scorer (or `null` when every entry scored zero). Catalog
|
|
78
|
+
* order breaks ties — the same ordering the sibling
|
|
79
|
+
* `strategy/skill-router` branch documents for its production router.
|
|
80
|
+
*
|
|
81
|
+
* This is intentionally tiny. The point is that the executor can run
|
|
82
|
+
* end-to-end (and be unit-tested) without depending on the router
|
|
83
|
+
* branch landing first.
|
|
84
|
+
*/
|
|
85
|
+
export const defaultKeywordRouter = (prompt, options) => {
|
|
86
|
+
const catalog = options?.catalog ?? SKILL_CATALOG;
|
|
87
|
+
if (!prompt)
|
|
88
|
+
return { match: null };
|
|
89
|
+
const lowered = prompt.toLowerCase();
|
|
90
|
+
let best = null;
|
|
91
|
+
for (const entry of catalog) {
|
|
92
|
+
let score = 0;
|
|
93
|
+
for (const hint of entry.triggerHints) {
|
|
94
|
+
if (hint.length === 0)
|
|
95
|
+
continue;
|
|
96
|
+
if (lowered.includes(hint.toLowerCase()))
|
|
97
|
+
score += 1;
|
|
98
|
+
}
|
|
99
|
+
if (score > 0 && (best === null || score > best.score)) {
|
|
100
|
+
best = { entry, score };
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
if (best === null)
|
|
104
|
+
return { match: null };
|
|
105
|
+
return { match: { skill: best.entry.name, score: best.score } };
|
|
106
|
+
};
|
|
107
|
+
/**
|
|
108
|
+
* Build the planner-executor strategy. Returns an `AgentStrategy`
|
|
109
|
+
* with `id: 'planner-executor'`.
|
|
110
|
+
*/
|
|
111
|
+
export function createPlannerExecutorStrategy(options = {}) {
|
|
112
|
+
const catalog = options.catalog ?? SKILL_CATALOG;
|
|
113
|
+
const stepMaxTurns = options.stepMaxTurns ?? DEFAULT_STEP_MAX_TURNS;
|
|
114
|
+
const fallbackToReact = options.fallbackToReact !== false;
|
|
115
|
+
const summarizeStep = options.summarizeStep ?? defaultSummarizeStep;
|
|
116
|
+
const router = options.router ?? defaultKeywordRouter;
|
|
117
|
+
return {
|
|
118
|
+
id: 'planner-executor',
|
|
119
|
+
async *run(input, signal) {
|
|
120
|
+
if (signal.aborted) {
|
|
121
|
+
yield { kind: 'error', message: 'aborted' };
|
|
122
|
+
return;
|
|
123
|
+
}
|
|
124
|
+
// 1. Route the prompt. When a custom catalog was supplied, look
|
|
125
|
+
// the entry up in that catalog directly so the executor honors
|
|
126
|
+
// the override; otherwise the production `SKILL_CATALOG` table
|
|
127
|
+
// is consulted via `getSkillEntry`.
|
|
128
|
+
const decision = router(input.prompt, { catalog });
|
|
129
|
+
const lookupEntry = (skill) => options.catalog === undefined
|
|
130
|
+
? getSkillEntry(skill)
|
|
131
|
+
: options.catalog.find((entry) => entry.name === skill);
|
|
132
|
+
const matchedEntry = decision.match === null ? undefined : lookupEntry(decision.match.skill);
|
|
133
|
+
// 2. No match or match-but-not-in-catalog → fallback path.
|
|
134
|
+
if (decision.match === null || matchedEntry === undefined) {
|
|
135
|
+
if (!fallbackToReact) {
|
|
136
|
+
yield {
|
|
137
|
+
kind: 'error',
|
|
138
|
+
message: 'planner-executor: no skill matched and fallbackToReact is disabled',
|
|
139
|
+
};
|
|
140
|
+
return;
|
|
141
|
+
}
|
|
142
|
+
// Delegate the rest of the turn to a plain ReAct sub-strategy.
|
|
143
|
+
// Stream every event through unchanged.
|
|
144
|
+
const sub = createReactLoopStrategy();
|
|
145
|
+
for await (const ev of sub.run(input, signal)) {
|
|
146
|
+
yield ev;
|
|
147
|
+
}
|
|
148
|
+
return;
|
|
149
|
+
}
|
|
150
|
+
// 3. Plan started.
|
|
151
|
+
const plan = matchedEntry.steps;
|
|
152
|
+
yield {
|
|
153
|
+
kind: 'custom',
|
|
154
|
+
name: 'plan_started',
|
|
155
|
+
data: {
|
|
156
|
+
skill: matchedEntry.name,
|
|
157
|
+
plan: plan.map((s) => s.id),
|
|
158
|
+
},
|
|
159
|
+
};
|
|
160
|
+
const stepSummaries = [];
|
|
161
|
+
const turnIdForReq = input.turnId ?? 'turn-anon';
|
|
162
|
+
// 4. Walk the steps.
|
|
163
|
+
for (let stepIndex = 0; stepIndex < plan.length; stepIndex++) {
|
|
164
|
+
if (signal.aborted) {
|
|
165
|
+
yield { kind: 'error', message: 'aborted' };
|
|
166
|
+
return;
|
|
167
|
+
}
|
|
168
|
+
const step = plan[stepIndex];
|
|
169
|
+
yield {
|
|
170
|
+
kind: 'custom',
|
|
171
|
+
name: 'step_started',
|
|
172
|
+
data: { stepId: step.id, description: step.description },
|
|
173
|
+
};
|
|
174
|
+
// 4b. Build the step-scoped system prompt + a fresh history
|
|
175
|
+
// composed of one synthetic user message per prior step's
|
|
176
|
+
// summary. The next step sees ONLY these summaries plus the
|
|
177
|
+
// original user prompt — never the raw scratch from prior
|
|
178
|
+
// sub-loops. This is the context-discipline mechanism.
|
|
179
|
+
const stepSystemPrompt = buildStepSystemPrompt(input.systemPrompt, step.id, step.description, stepIndex, plan.length, stepSummaries.length > 0);
|
|
180
|
+
const stepHistory = stepSummaries.map((s, i) => ({
|
|
181
|
+
id: `step-summary-${i}`,
|
|
182
|
+
role: 'user',
|
|
183
|
+
text: `[Prior step '${s.stepId}' summary] ${s.summary}`,
|
|
184
|
+
}));
|
|
185
|
+
// 4c. Drive a bounded ReAct sub-loop. The sub-loop's tracer is
|
|
186
|
+
// the OUTER tracer wrapped so that emitted `llm_request` /
|
|
187
|
+
// `llm_response` / `turn_dispatch_complete` events carry a
|
|
188
|
+
// step-scoped `requestId` prefix; the wrapping rewrites the
|
|
189
|
+
// request id from `${turnId}#${iteration}` to
|
|
190
|
+
// `${turnId}#${stepId}#${iteration}` so the eval harness can
|
|
191
|
+
// read per-step iteration counts off the trace.
|
|
192
|
+
const subStrategy = createReactLoopStrategy({ maxTurns: stepMaxTurns });
|
|
193
|
+
const stepInput = {
|
|
194
|
+
...input,
|
|
195
|
+
history: stepHistory,
|
|
196
|
+
systemPrompt: stepSystemPrompt,
|
|
197
|
+
turnId: `${turnIdForReq}#${step.id}`,
|
|
198
|
+
...(input.tracer ? { tracer: { emit: input.tracer.emit.bind(input.tracer) } } : {}),
|
|
199
|
+
};
|
|
200
|
+
let stepAssistantText = '';
|
|
201
|
+
let stepExceededBudget = false;
|
|
202
|
+
for await (const ev of subStrategy.run(stepInput, signal)) {
|
|
203
|
+
if (ev.kind === 'text') {
|
|
204
|
+
stepAssistantText += ev.chunk;
|
|
205
|
+
}
|
|
206
|
+
// Stream every event from the inner loop through unchanged.
|
|
207
|
+
// The host sees normal `text`/`thinking`/`tool_call`/
|
|
208
|
+
// `tool_result`/`turn_complete` events as if a single ReAct
|
|
209
|
+
// loop were running — plus the planner-executor's own
|
|
210
|
+
// `custom` plan events around them.
|
|
211
|
+
if (ev.kind === 'error') {
|
|
212
|
+
// A sub-loop maxTurns exhaustion (the message format from
|
|
213
|
+
// `createReactLoopStrategy` is `react-loop: exceeded
|
|
214
|
+
// maxTurns (N) without settling`) is treated as a soft
|
|
215
|
+
// step failure: we surface it via a `custom` event so the
|
|
216
|
+
// host can react, then advance to the next step with
|
|
217
|
+
// whatever text the step produced. Hitting the budget on
|
|
218
|
+
// one step shouldn't kill the plan — the next step's
|
|
219
|
+
// summary chain can still seed downstream work.
|
|
220
|
+
//
|
|
221
|
+
// All other errors (abort, provider failure, etc.)
|
|
222
|
+
// propagate and stop the plan.
|
|
223
|
+
if (/exceeded maxTurns/i.test(ev.message)) {
|
|
224
|
+
stepExceededBudget = true;
|
|
225
|
+
yield {
|
|
226
|
+
kind: 'custom',
|
|
227
|
+
name: 'step_budget_exhausted',
|
|
228
|
+
data: { stepId: step.id, message: ev.message },
|
|
229
|
+
};
|
|
230
|
+
break;
|
|
231
|
+
}
|
|
232
|
+
yield ev;
|
|
233
|
+
return;
|
|
234
|
+
}
|
|
235
|
+
yield ev;
|
|
236
|
+
}
|
|
237
|
+
// Silence the lint: a fresh `let` that the planner-executor
|
|
238
|
+
// tracks for follow-up branches that may surface it on the
|
|
239
|
+
// emitted `step_completed` event. v1 keeps it private.
|
|
240
|
+
void stepExceededBudget;
|
|
241
|
+
// 4d. Summarize and record.
|
|
242
|
+
const summary = summarizeStep(step.id, stepAssistantText);
|
|
243
|
+
stepSummaries.push({
|
|
244
|
+
stepId: step.id,
|
|
245
|
+
description: step.description,
|
|
246
|
+
summary,
|
|
247
|
+
});
|
|
248
|
+
yield {
|
|
249
|
+
kind: 'custom',
|
|
250
|
+
name: 'step_completed',
|
|
251
|
+
data: { stepId: step.id, summary },
|
|
252
|
+
};
|
|
253
|
+
}
|
|
254
|
+
// 5. Plan complete.
|
|
255
|
+
yield {
|
|
256
|
+
kind: 'custom',
|
|
257
|
+
name: 'plan_completed',
|
|
258
|
+
data: {
|
|
259
|
+
skill: matchedEntry.name,
|
|
260
|
+
steps: stepSummaries.map((s) => ({ stepId: s.stepId, summary: s.summary })),
|
|
261
|
+
},
|
|
262
|
+
};
|
|
263
|
+
},
|
|
264
|
+
};
|
|
265
|
+
}
|
|
266
|
+
function buildStepSystemPrompt(basePrompt, stepId, stepDescription, stepIndex, stepCount, hasPriorSummaries) {
|
|
267
|
+
// 1-indexed for human-readable display ("step 3 of 5").
|
|
268
|
+
const displayIndex = stepIndex + 1;
|
|
269
|
+
const suffix = hasPriorSummaries
|
|
270
|
+
? `You are on step ${displayIndex} of ${stepCount} (id: ${stepId}): ${stepDescription}. Prior step summaries follow as user messages — treat them as facts you have already established, not as new requests.`
|
|
271
|
+
: `You are on step ${displayIndex} of ${stepCount} (id: ${stepId}): ${stepDescription}. This is the first step.`;
|
|
272
|
+
return `${basePrompt}\n\n${suffix}`;
|
|
273
|
+
}
|
|
274
|
+
//# sourceMappingURL=planner-executor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"planner-executor.js","sourceRoot":"","sources":["../src/planner-executor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAyDG;AAGH,OAAO,EAAE,aAAa,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAElE,OAAO,EAAE,uBAAuB,EAAE,MAAM,eAAe,CAAC;AA8DxD,MAAM,sBAAsB,GAAG,CAAC,CAAC;AACjC,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAElC;;;;GAIG;AACH,SAAS,oBAAoB,CAAC,OAAe,EAAE,UAAkB;IAC/D,MAAM,OAAO,GAAG,UAAU,CAAC,IAAI,EAAE,CAAC;IAClC,IAAI,OAAO,CAAC,MAAM,IAAI,qBAAqB;QAAE,OAAO,OAAO,CAAC;IAC5D,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,qBAAqB,CAAC,GAAG,CAAC;AACvD,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,CAAC,MAAM,oBAAoB,GAAgB,CAC/C,MAAc,EACd,OAAoC,EACf,EAAE;IACvB,MAAM,OAAO,GAAG,OAAO,EAAE,OAAO,IAAI,aAAa,CAAC;IAClD,IAAI,CAAC,MAAM;QAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC;IACpC,MAAM,OAAO,GAAG,MAAM,CAAC,WAAW,EAAE,CAAC;IACrC,IAAI,IAAI,GAAuD,IAAI,CAAC;IACpE,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,YAAY,EAAE,CAAC;YACtC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;gBAAE,SAAS;YAChC,IAAI,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;gBAAE,KAAK,IAAI,CAAC,CAAC;QACvD,CAAC;QACD,IAAI,KAAK,GAAG,CAAC,IAAI,CAAC,IAAI,KAAK,IAAI,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;YACvD,IAAI,GAAG,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;QAC1B,CAAC;IACH,CAAC;IACD,IAAI,IAAI,KAAK,IAAI;QAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC;IAC1C,OAAO,EAAE,KAAK,EAAE,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,EAAE,CAAC;AAClE,CAAC,CAAC;AAEF;;;GAGG;AACH,MAAM,UAAU,6BAA6B,CAAC,UAAkC,EAAE;IAChF,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,aAAa,CAAC;IACjD,MAAM,YAAY,GAAG,OAAO,CAAC,YAAY,IAAI,sBAAsB,CAAC;IACpE,MAAM,eAAe,GAAG,OAAO,CAAC,eAAe,KAAK,KAAK,CAAC;IAC1D,MAAM,aAAa,GAAG,OAAO,CAAC,aAAa,IAAI,oBAAoB,CAAC;IACpE,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,IAAI,oBAAoB,CAAC;IAEtD,OAAO;QACL,EAAE,EAAE,kBAAkB;QACtB,KAAK,CAAC,CAAC,GAAG,CAAC,KAAuB,EAAE,MAAmB;YACrD,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;gBACnB,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,CAAC;gBAC5C,OAAO;YACT,CAAC;YAED,gEAAgE;YAChE,+DAA+D;YAC/D,+DAA+D;YAC/D,oCAAoC;YACpC,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;YACnD,MAAM,WAAW,GAAG,CAAC,KAAgB,EAAE,EAAE,CACvC,OAAO,CAAC,OAAO,KAAK,SAAS;gBAC3B,CAAC,CAAC,aAAa,CAAC,KAAK,CAAC;gBACtB,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,KAAK,KAAK,CAAC,CAAC;YAC5D,MAAM,YAAY,GAAG,QAAQ,CAAC,KAAK,KAAK,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YAE7F,2DAA2D;YAC3D,IAAI,QAAQ,CAAC,KAAK,KAAK,IAAI,IAAI,YAAY,KAAK,SAAS,EAAE,CAAC;gBAC1D,IAAI,CAAC,eAAe,EAAE,CAAC;oBACrB,MAAM;wBACJ,IAAI,EAAE,OAAO;wBACb,OAAO,EAAE,oEAAoE;qBAC9E,CAAC;oBACF,OAAO;gBACT,CAAC;gBACD,+DAA+D;gBAC/D,wCAAwC;gBACxC,MAAM,GAAG,GAAG,uBAAuB,EAAE,CAAC;gBACtC,IAAI,KAAK,EAAE,MAAM,EAAE,IAAI,GAAG,CAAC,GAAG,CAAC,KAAK,EAAE,MAAM,CAAC,EAAE,CAAC;oBAC9C,MAAM,EAAE,CAAC;gBACX,CAAC;gBACD,OAAO;YACT,CAAC;YAED,mBAAmB;YACnB,MAAM,IAAI,GAAG,YAAY,CAAC,KAAK,CAAC;YAChC,MAAM;gBACJ,IAAI,EAAE,QAAQ;gBACd,IAAI,EAAE,cAAc;gBACpB,IAAI,EAAE;oBACJ,KAAK,EAAE,YAAY,CAAC,IAAI;oBACxB,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;iBAC5B;aACF,CAAC;YAEF,MAAM,aAAa,GAA+D,EAAE,CAAC;YACrF,MAAM,YAAY,GAAG,KAAK,CAAC,MAAM,IAAI,WAAW,CAAC;YAEjD,qBAAqB;YACrB,KAAK,IAAI,SAAS,GAAG,CAAC,EAAE,SAAS,GAAG,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,EAAE,CAAC;gBAC7D,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;oBACnB,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,CAAC;oBAC5C,OAAO;gBACT,CAAC;gBAED,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,CAAE,CAAC;gBAC9B,MAAM;oBACJ,IAAI,EAAE,QAAQ;oBACd,IAAI,EAAE,cAAc;oBACpB,IAAI,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,EAAE,WAAW,EAAE,IAAI,CAAC,WAAW,EAAE;iBACzD,CAAC;gBAEF,4DAA4D;gBAC5D,0DAA0D;gBAC1D,4DAA4D;gBAC5D,0DAA0D;gBAC1D,uDAAuD;gBACvD,MAAM,gBAAgB,GAAG,qBAAqB,CAC5C,KAAK,CAAC,YAAY,EAClB,IAAI,CAAC,EAAE,EACP,IAAI,CAAC,WAAW,EAChB,SAAS,EACT,IAAI,CAAC,MAAM,EACX,aAAa,CAAC,MAAM,GAAG,CAAC,CACzB,CAAC;gBACF,MAAM,WAAW,GAAkB,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;oBAC9D,EAAE,EAAE,gBAAgB,CAAC,EAAE;oBACvB,IAAI,EAAE,MAAM;oBACZ,IAAI,EAAE,gBAAgB,CAAC,CAAC,MAAM,cAAc,CAAC,CAAC,OAAO,EAAE;iBACxD,CAAC,CAAC,CAAC;gBAEJ,+DAA+D;gBAC/D,2DAA2D;gBAC3D,2DAA2D;gBAC3D,4DAA4D;gBAC5D,8CAA8C;gBAC9C,6DAA6D;gBAC7D,gDAAgD;gBAChD,MAAM,WAAW,GAAG,uBAAuB,CAAC,EAAE,QAAQ,EAAE,YAAY,EAAE,CAAC,CAAC;gBACxE,MAAM,SAAS,GAAqB;oBAClC,GAAG,KAAK;oBACR,OAAO,EAAE,WAAW;oBACpB,YAAY,EAAE,gBAAgB;oBAC9B,MAAM,EAAE,GAAG,YAAY,IAAI,IAAI,CAAC,EAAE,EAAE;oBACpC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;iBACpF,CAAC;gBAEF,IAAI,iBAAiB,GAAG,EAAE,CAAC;gBAC3B,IAAI,kBAAkB,GAAG,KAAK,CAAC;gBAC/B,IAAI,KAAK,EAAE,MAAM,EAAE,IAAI,WAAW,CAAC,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,EAAE,CAAC;oBAC1D,IAAI,EAAE,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;wBACvB,iBAAiB,IAAI,EAAE,CAAC,KAAK,CAAC;oBAChC,CAAC;oBACD,4DAA4D;oBAC5D,sDAAsD;oBACtD,4DAA4D;oBAC5D,sDAAsD;oBACtD,oCAAoC;oBACpC,IAAI,EAAE,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;wBACxB,0DAA0D;wBAC1D,qDAAqD;wBACrD,uDAAuD;wBACvD,0DAA0D;wBAC1D,qDAAqD;wBACrD,yDAAyD;wBACzD,qDAAqD;wBACrD,gDAAgD;wBAChD,EAAE;wBACF,mDAAmD;wBACnD,+BAA+B;wBAC/B,IAAI,oBAAoB,CAAC,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,EAAE,CAAC;4BAC1C,kBAAkB,GAAG,IAAI,CAAC;4BAC1B,MAAM;gCACJ,IAAI,EAAE,QAAQ;gCACd,IAAI,EAAE,uBAAuB;gCAC7B,IAAI,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,EAAE,OAAO,EAAE,EAAE,CAAC,OAAO,EAAE;6BAC/C,CAAC;4BACF,MAAM;wBACR,CAAC;wBACD,MAAM,EAAE,CAAC;wBACT,OAAO;oBACT,CAAC;oBACD,MAAM,EAAE,CAAC;gBACX,CAAC;gBACD,4DAA4D;gBAC5D,2DAA2D;gBAC3D,uDAAuD;gBACvD,KAAK,kBAAkB,CAAC;gBAExB,4BAA4B;gBAC5B,MAAM,OAAO,GAAG,aAAa,CAAC,IAAI,CAAC,EAAE,EAAE,iBAAiB,CAAC,CAAC;gBAC1D,aAAa,CAAC,IAAI,CAAC;oBACjB,MAAM,EAAE,IAAI,CAAC,EAAE;oBACf,WAAW,EAAE,IAAI,CAAC,WAAW;oBAC7B,OAAO;iBACR,CAAC,CAAC;gBACH,MAAM;oBACJ,IAAI,EAAE,QAAQ;oBACd,IAAI,EAAE,gBAAgB;oBACtB,IAAI,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,EAAE,OAAO,EAAE;iBACnC,CAAC;YACJ,CAAC;YAED,oBAAoB;YACpB,MAAM;gBACJ,IAAI,EAAE,QAAQ;gBACd,IAAI,EAAE,gBAAgB;gBACtB,IAAI,EAAE;oBACJ,KAAK,EAAE,YAAY,CAAC,IAAI;oBACxB,KAAK,EAAE,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;iBAC5E;aACF,CAAC;QACJ,CAAC;KACF,CAAC;AACJ,CAAC;AAED,SAAS,qBAAqB,CAC5B,UAAkB,EAClB,MAAc,EACd,eAAuB,EACvB,SAAiB,EACjB,SAAiB,EACjB,iBAA0B;IAE1B,wDAAwD;IACxD,MAAM,YAAY,GAAG,SAAS,GAAG,CAAC,CAAC;IACnC,MAAM,MAAM,GAAG,iBAAiB;QAC9B,CAAC,CAAC,mBAAmB,YAAY,OAAO,SAAS,SAAS,MAAM,MAAM,eAAe,yHAAyH;QAC9M,CAAC,CAAC,mBAAmB,YAAY,OAAO,SAAS,SAAS,MAAM,MAAM,eAAe,2BAA2B,CAAC;IACnH,OAAO,GAAG,UAAU,OAAO,MAAM,EAAE,CAAC;AACtC,CAAC"}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Static catalog of skill workflows.
|
|
3
|
+
*
|
|
4
|
+
* Phase five of the implementation plan splits the planner-executor work
|
|
5
|
+
* into three branches: this catalog, the router that classifies a user
|
|
6
|
+
* prompt against the catalog, and the executor that walks a chosen
|
|
7
|
+
* skill's prescribed step sequence. This module is the data + types
|
|
8
|
+
* gate — no runtime logic. The router reads `triggerHints` to score
|
|
9
|
+
* candidate matches; the executor reads `steps` to materialize a plan.
|
|
10
|
+
*
|
|
11
|
+
* Each entry's `name` matches a value in the `SkillName` enum from
|
|
12
|
+
* `eval/fixture.ts` (re-exported here for convenience). Step
|
|
13
|
+
* descriptions are imperative, short, and model-agnostic. Per-step
|
|
14
|
+
* `verifier?` references existing starter / custom specs where one
|
|
15
|
+
* fits naturally; not every step has one. Leaf "compile / deploy /
|
|
16
|
+
* verify" steps tend to have a spec; intermediate read / draft steps
|
|
17
|
+
* usually don't.
|
|
18
|
+
*
|
|
19
|
+
* The catalog is hand-authored and intentionally `const`. The
|
|
20
|
+
* companion test in `test/skill-catalog.test.ts` asserts shape
|
|
21
|
+
* invariants: every `name` is in `SKILL_NAMES`, every entry has at
|
|
22
|
+
* least three trigger hints, every entry has between four and nine
|
|
23
|
+
* steps, every step id is unique and kebab-case within its plan, and
|
|
24
|
+
* every `verifier?.name` matches a spec registered by
|
|
25
|
+
* `registerAllSpecs(createSpecRegistry())`.
|
|
26
|
+
*/
|
|
27
|
+
import type { SkillName, SuccessSpecReference } from './eval/fixture.js';
|
|
28
|
+
/**
|
|
29
|
+
* One ordered step in a skill's prescribed workflow.
|
|
30
|
+
*
|
|
31
|
+
* `id` is a stable kebab-case identifier, unique within the parent
|
|
32
|
+
* plan. The executor uses it as the scratch key for the step's
|
|
33
|
+
* bounded sub-loop. `description` is an imperative one-liner — what
|
|
34
|
+
* the step does, in the project's voice. `verifier?` is an optional
|
|
35
|
+
* `SuccessSpecReference` the executor can run after the step finishes
|
|
36
|
+
* to gate whether the plan advances; the same shape that fixtures
|
|
37
|
+
* use, so the executor can re-use `evaluateSpec` without translation.
|
|
38
|
+
*/
|
|
39
|
+
export interface PlanStep {
|
|
40
|
+
/** Stable kebab-case id, unique within the plan. */
|
|
41
|
+
id: string;
|
|
42
|
+
/** Short imperative description of what the step does. */
|
|
43
|
+
description: string;
|
|
44
|
+
/** Optional reference to a success spec that verifies this step. */
|
|
45
|
+
verifier?: SuccessSpecReference;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* One row in the skill catalog. Describes a single skill workflow as
|
|
49
|
+
* a triple of (router signals, plan, identity).
|
|
50
|
+
*/
|
|
51
|
+
export interface SkillCatalogEntry {
|
|
52
|
+
/** Matches a value in `SKILL_NAMES` from `eval/fixture.ts`. */
|
|
53
|
+
name: SkillName;
|
|
54
|
+
/** One-line description used by the router and surfaced in the plan. */
|
|
55
|
+
description: string;
|
|
56
|
+
/**
|
|
57
|
+
* Lowercase keyword tokens the router scans the user prompt for.
|
|
58
|
+
* Order does not matter; at least one hit signals a candidate match.
|
|
59
|
+
*/
|
|
60
|
+
triggerHints: readonly string[];
|
|
61
|
+
/** Prescribed workflow as an ordered sequence of steps. */
|
|
62
|
+
steps: readonly PlanStep[];
|
|
63
|
+
}
|
|
64
|
+
/** Read-only view over the catalog. */
|
|
65
|
+
export type SkillCatalog = readonly SkillCatalogEntry[];
|
|
66
|
+
export declare const SKILL_CATALOG: SkillCatalog;
|
|
67
|
+
/**
|
|
68
|
+
* Look up a catalog entry by skill name. Returns `undefined` when the
|
|
69
|
+
* name is not in the catalog. The router will use this to materialize
|
|
70
|
+
* an entry after picking a winner; the executor will use it to
|
|
71
|
+
* unwrap a router decision into a plan.
|
|
72
|
+
*/
|
|
73
|
+
export declare function getSkillEntry(name: SkillName): SkillCatalogEntry | undefined;
|
|
74
|
+
/**
|
|
75
|
+
* List every skill name present in the catalog, in catalog order. The
|
|
76
|
+
* router uses this to iterate candidates when scoring a prompt; the
|
|
77
|
+
* test suite uses it to assert the catalog covers every value in
|
|
78
|
+
* `SKILL_NAMES`.
|
|
79
|
+
*/
|
|
80
|
+
export declare function listSkillNames(): readonly SkillName[];
|
|
81
|
+
//# sourceMappingURL=skill-catalog.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"skill-catalog.d.ts","sourceRoot":"","sources":["../src/skill-catalog.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,oBAAoB,EAAE,MAAM,mBAAmB,CAAC;AAWzE;;;;;;;;;;GAUG;AACH,MAAM,WAAW,QAAQ;IACvB,oDAAoD;IACpD,EAAE,EAAE,MAAM,CAAC;IACX,0DAA0D;IAC1D,WAAW,EAAE,MAAM,CAAC;IACpB,oEAAoE;IACpE,QAAQ,CAAC,EAAE,oBAAoB,CAAC;CACjC;AAED;;;GAGG;AACH,MAAM,WAAW,iBAAiB;IAChC,+DAA+D;IAC/D,IAAI,EAAE,SAAS,CAAC;IAChB,wEAAwE;IACxE,WAAW,EAAE,MAAM,CAAC;IACpB;;;OAGG;IACH,YAAY,EAAE,SAAS,MAAM,EAAE,CAAC;IAChC,2DAA2D;IAC3D,KAAK,EAAE,SAAS,QAAQ,EAAE,CAAC;CAC5B;AAED,uCAAuC;AACvC,MAAM,MAAM,YAAY,GAAG,SAAS,iBAAiB,EAAE,CAAC;AAQxD,eAAO,MAAM,aAAa,EAAE,YA8V3B,CAAC;AAcF;;;;;GAKG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,SAAS,GAAG,iBAAiB,GAAG,SAAS,CAE5E;AAED;;;;;GAKG;AACH,wBAAgB,cAAc,IAAI,SAAS,SAAS,EAAE,CAErD"}
|