@inbrowser/agent 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/diagnostics/index.d.ts +5 -0
- package/dist/diagnostics/index.d.ts.map +1 -0
- package/dist/diagnostics/index.js +3 -0
- package/dist/diagnostics/index.js.map +1 -0
- package/dist/diagnostics/timing.d.ts +48 -0
- package/dist/diagnostics/timing.d.ts.map +1 -0
- package/dist/diagnostics/timing.js +85 -0
- package/dist/diagnostics/timing.js.map +1 -0
- package/dist/diagnostics/truthfulness.d.ts +36 -0
- package/dist/diagnostics/truthfulness.d.ts.map +1 -0
- package/dist/diagnostics/truthfulness.js +180 -0
- package/dist/diagnostics/truthfulness.js.map +1 -0
- package/dist/dispatch-memoization.d.ts +84 -0
- package/dist/dispatch-memoization.d.ts.map +1 -0
- package/dist/dispatch-memoization.js +197 -0
- package/dist/dispatch-memoization.js.map +1 -0
- package/dist/eval/comparison-report.d.ts +164 -0
- package/dist/eval/comparison-report.d.ts.map +1 -0
- package/dist/eval/comparison-report.js +316 -0
- package/dist/eval/comparison-report.js.map +1 -0
- package/dist/eval/fixture.d.ts +74 -0
- package/dist/eval/fixture.d.ts.map +1 -0
- package/dist/eval/fixture.js +217 -0
- package/dist/eval/fixture.js.map +1 -0
- package/dist/eval/index.d.ts +13 -0
- package/dist/eval/index.d.ts.map +1 -0
- package/dist/eval/index.js +7 -0
- package/dist/eval/index.js.map +1 -0
- package/dist/eval/load-node.d.ts +16 -0
- package/dist/eval/load-node.d.ts.map +1 -0
- package/dist/eval/load-node.js +58 -0
- package/dist/eval/load-node.js.map +1 -0
- package/dist/eval/metric-collector.d.ts +209 -0
- package/dist/eval/metric-collector.d.ts.map +1 -0
- package/dist/eval/metric-collector.js +293 -0
- package/dist/eval/metric-collector.js.map +1 -0
- package/dist/eval/run-record.d.ts +76 -0
- package/dist/eval/run-record.d.ts.map +1 -0
- package/dist/eval/run-record.js +32 -0
- package/dist/eval/run-record.js.map +1 -0
- package/dist/eval/runner.d.ts +140 -0
- package/dist/eval/runner.d.ts.map +1 -0
- package/dist/eval/runner.js +310 -0
- package/dist/eval/runner.js.map +1 -0
- package/dist/eval/spec-framework.d.ts +113 -0
- package/dist/eval/spec-framework.d.ts.map +1 -0
- package/dist/eval/spec-framework.js +100 -0
- package/dist/eval/spec-framework.js.map +1 -0
- package/dist/eval/spec-helpers.d.ts +245 -0
- package/dist/eval/spec-helpers.d.ts.map +1 -0
- package/dist/eval/spec-helpers.js +605 -0
- package/dist/eval/spec-helpers.js.map +1 -0
- package/dist/index.d.ts +24 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +11 -1
- package/dist/index.js.map +1 -1
- package/dist/node.d.ts +1 -0
- package/dist/node.d.ts.map +1 -1
- package/dist/node.js +1 -0
- package/dist/node.js.map +1 -1
- package/dist/planner-executor.d.ts +132 -0
- package/dist/planner-executor.d.ts.map +1 -0
- package/dist/planner-executor.js +274 -0
- package/dist/planner-executor.js.map +1 -0
- package/dist/skill-catalog.d.ts +81 -0
- package/dist/skill-catalog.d.ts.map +1 -0
- package/dist/skill-catalog.js +388 -0
- package/dist/skill-catalog.js.map +1 -0
- package/dist/skill-router.d.ts +95 -0
- package/dist/skill-router.d.ts.map +1 -0
- package/dist/skill-router.js +130 -0
- package/dist/skill-router.js.map +1 -0
- package/dist/strategy.d.ts +20 -1
- package/dist/strategy.d.ts.map +1 -1
- package/dist/strategy.js +333 -13
- package/dist/strategy.js.map +1 -1
- package/dist/tools.d.ts +15 -1
- package/dist/tools.d.ts.map +1 -1
- package/dist/tools.js +18 -0
- package/dist/tools.js.map +1 -1
- package/dist/types/strategy.d.ts +48 -0
- package/dist/types/strategy.d.ts.map +1 -1
- package/dist/types/tools.d.ts +18 -0
- package/dist/types/tools.d.ts.map +1 -1
- package/dist/types/trace.d.ts +59 -9
- package/dist/types/trace.d.ts.map +1 -1
- package/dist/types/trace.js +5 -3
- package/dist/types/trace.js.map +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,388 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Static catalog of skill workflows.
|
|
3
|
+
*
|
|
4
|
+
* Phase five of the implementation plan splits the planner-executor work
|
|
5
|
+
* into three branches: this catalog, the router that classifies a user
|
|
6
|
+
* prompt against the catalog, and the executor that walks a chosen
|
|
7
|
+
* skill's prescribed step sequence. This module is the data + types
|
|
8
|
+
* gate — no runtime logic. The router reads `triggerHints` to score
|
|
9
|
+
* candidate matches; the executor reads `steps` to materialize a plan.
|
|
10
|
+
*
|
|
11
|
+
* Each entry's `name` matches a value in the `SkillName` enum from
|
|
12
|
+
* `eval/fixture.ts` (re-exported here for convenience). Step
|
|
13
|
+
* descriptions are imperative, short, and model-agnostic. Per-step
|
|
14
|
+
* `verifier?` references existing starter / custom specs where one
|
|
15
|
+
* fits naturally; not every step has one. Leaf "compile / deploy /
|
|
16
|
+
* verify" steps tend to have a spec; intermediate read / draft steps
|
|
17
|
+
* usually don't.
|
|
18
|
+
*
|
|
19
|
+
* The catalog is hand-authored and intentionally `const`. The
|
|
20
|
+
* companion test in `test/skill-catalog.test.ts` asserts shape
|
|
21
|
+
* invariants: every `name` is in `SKILL_NAMES`, every entry has at
|
|
22
|
+
* least three trigger hints, every entry has between four and nine
|
|
23
|
+
* steps, every step id is unique and kebab-case within its plan, and
|
|
24
|
+
* every `verifier?.name` matches a spec registered by
|
|
25
|
+
* `registerAllSpecs(createSpecRegistry())`.
|
|
26
|
+
*/
|
|
27
|
+
import { SPEC_FINAL_RULES_INCLUDES_LITERAL, SPEC_FINAL_RUNTIME_RUN_SUMMARY_OK, SPEC_GAME_RULES_SIMULATOR_ACCEPTS_POSITIVE_AND_REJECTS_CHEAT, SPEC_PYRIC_AGENTS_LINT_CLEAN_AND_RULE_REJECTS_CHEAT, SPEC_REPORT_MENTIONS_ALL_OF, SPEC_REPORT_MENTIONS_AT_LEAST_ONE_OF, SPEC_TRACE_CONTAINS_TOOL_CALL_BY_NAME, } from './eval/spec-helpers.js';
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
// The catalog. One entry per in-scope skill, in `SKILL_NAMES` order.
|
|
30
|
+
// Step content is derived from each skill's `SKILL.md` playbook in the
|
|
31
|
+
// downstream firebase-agent-sdk repo.
|
|
32
|
+
// ---------------------------------------------------------------------------
|
|
33
|
+
export const SKILL_CATALOG = [
|
|
34
|
+
{
|
|
35
|
+
name: 'firestore-rules-audit',
|
|
36
|
+
description: 'Audit Firestore security rules for vulnerabilities, semantic errors, and structural anti-patterns.',
|
|
37
|
+
triggerHints: ['firestore', 'audit', 'rules', 'security', 'vulnerability', 'review'],
|
|
38
|
+
steps: [
|
|
39
|
+
{
|
|
40
|
+
id: 'inspect-rules',
|
|
41
|
+
description: 'Fetch deployed Firestore rules, parse to AST, and run validator checks.',
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
id: 'triage-findings',
|
|
45
|
+
description: 'Group validator findings by severity: critical, high, medium, low.',
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
id: 'cross-reference',
|
|
49
|
+
description: 'Combine findings on the same path to surface compound vulnerabilities.',
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
id: 'apply-context',
|
|
53
|
+
description: 'Escalate or de-escalate findings based on path purpose and data sensitivity.',
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
id: 'compile-report',
|
|
57
|
+
description: 'Compile prioritized health report with score, findings, and remediation.',
|
|
58
|
+
verifier: {
|
|
59
|
+
name: SPEC_REPORT_MENTIONS_AT_LEAST_ONE_OF,
|
|
60
|
+
args: { tokens: ['critical', 'high', 'finding'] },
|
|
61
|
+
},
|
|
62
|
+
},
|
|
63
|
+
],
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
name: 'firebase-project-audit',
|
|
67
|
+
description: 'Audit an unknown Firebase project for auth gaps, rule weaknesses, and structural issues across services.',
|
|
68
|
+
triggerHints: ['firebase', 'project', 'audit', 'inspect', 'health', 'unknown'],
|
|
69
|
+
steps: [
|
|
70
|
+
{
|
|
71
|
+
id: 'inspect-project',
|
|
72
|
+
description: 'Inspect auth, rules, and database structure in a single crawl.',
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
id: 'assess-auth',
|
|
76
|
+
description: 'Evaluate enabled providers and surface auth configuration gaps.',
|
|
77
|
+
},
|
|
78
|
+
{
|
|
79
|
+
id: 'assess-rules',
|
|
80
|
+
description: 'Walk the rule tree and flag open paths, broken expressions, and warnings.',
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
id: 'assess-structure',
|
|
84
|
+
description: 'Walk the data structure for deep nesting, god nodes, and array patterns.',
|
|
85
|
+
},
|
|
86
|
+
{
|
|
87
|
+
id: 'cross-reference-rules-data',
|
|
88
|
+
description: 'Compare rule paths against data paths to find unprotected data and orphan rules.',
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
id: 'compile-report',
|
|
92
|
+
description: 'Compile prioritized report with health score, findings, and recommended skills to load.',
|
|
93
|
+
verifier: {
|
|
94
|
+
name: SPEC_REPORT_MENTIONS_AT_LEAST_ONE_OF,
|
|
95
|
+
args: { tokens: ['health', 'finding', 'recommendation'] },
|
|
96
|
+
},
|
|
97
|
+
},
|
|
98
|
+
],
|
|
99
|
+
},
|
|
100
|
+
{
|
|
101
|
+
name: 'rtdb-data-modeling',
|
|
102
|
+
description: 'Analyze and recommend Realtime Database structures with flat collections, fan-out writes, and index tables.',
|
|
103
|
+
triggerHints: [
|
|
104
|
+
'rtdb',
|
|
105
|
+
'realtime',
|
|
106
|
+
'data',
|
|
107
|
+
'model',
|
|
108
|
+
'modeling',
|
|
109
|
+
'denormalize',
|
|
110
|
+
'structure',
|
|
111
|
+
'schema',
|
|
112
|
+
],
|
|
113
|
+
steps: [
|
|
114
|
+
{
|
|
115
|
+
id: 'crawl-structure',
|
|
116
|
+
description: 'Crawl database structure to inventory top-level keys, depth, and child counts.',
|
|
117
|
+
},
|
|
118
|
+
{
|
|
119
|
+
id: 'sample-data',
|
|
120
|
+
description: 'Read representative paths to compare stored shape against per-screen needs.',
|
|
121
|
+
},
|
|
122
|
+
{
|
|
123
|
+
id: 'check-god-nodes',
|
|
124
|
+
description: 'Identify unbounded lists and god nodes that grow without per-user scoping.',
|
|
125
|
+
},
|
|
126
|
+
{
|
|
127
|
+
id: 'evaluate-queries',
|
|
128
|
+
description: 'Map each app list to a single orderBy path and flag multi-field filter needs.',
|
|
129
|
+
},
|
|
130
|
+
{
|
|
131
|
+
id: 'propose-restructure',
|
|
132
|
+
description: 'Propose flat collections, index tables, and summary collections via fan-out writes.',
|
|
133
|
+
},
|
|
134
|
+
{
|
|
135
|
+
id: 'verify-restructure',
|
|
136
|
+
description: 'Validate the proposed shape with a multi-path write and a re-crawl of the structure.',
|
|
137
|
+
verifier: {
|
|
138
|
+
name: SPEC_TRACE_CONTAINS_TOOL_CALL_BY_NAME,
|
|
139
|
+
args: { tool: 'crawl_database_structure', minCount: 1 },
|
|
140
|
+
},
|
|
141
|
+
},
|
|
142
|
+
],
|
|
143
|
+
},
|
|
144
|
+
{
|
|
145
|
+
name: 'firebase-security-rules',
|
|
146
|
+
description: 'Author, simulate, and deploy Realtime Database security rules with auth, validation, and immutability patterns.',
|
|
147
|
+
triggerHints: ['rtdb', 'realtime', 'rules', 'security', 'authz', 'access', 'validate'],
|
|
148
|
+
steps: [
|
|
149
|
+
{
|
|
150
|
+
id: 'inspect-current-rules',
|
|
151
|
+
description: 'Inspect deployed rules and read linter warnings on existing expressions.',
|
|
152
|
+
},
|
|
153
|
+
{
|
|
154
|
+
id: 'design-change',
|
|
155
|
+
description: 'Identify the rule changes required by the request or by prior audit findings.',
|
|
156
|
+
},
|
|
157
|
+
{
|
|
158
|
+
id: 'validate-expressions',
|
|
159
|
+
description: 'Build and validate each rule expression before assembling the ruleset IR.',
|
|
160
|
+
},
|
|
161
|
+
{
|
|
162
|
+
id: 'simulate-access',
|
|
163
|
+
description: 'Simulate positive, negative, cross-user, and validation scenarios against the draft rules.',
|
|
164
|
+
},
|
|
165
|
+
{
|
|
166
|
+
id: 'deploy-rules',
|
|
167
|
+
description: 'Deploy the full ruleset after every simulation passes.',
|
|
168
|
+
verifier: {
|
|
169
|
+
name: SPEC_FINAL_RULES_INCLUDES_LITERAL,
|
|
170
|
+
args: { literal: 'auth' },
|
|
171
|
+
},
|
|
172
|
+
},
|
|
173
|
+
{
|
|
174
|
+
id: 'verify-deployment',
|
|
175
|
+
description: 'Re-inspect the deployed rules to confirm they match the intended IR.',
|
|
176
|
+
},
|
|
177
|
+
],
|
|
178
|
+
},
|
|
179
|
+
{
|
|
180
|
+
name: 'firebase-client-sdk',
|
|
181
|
+
description: 'Generate correct Firebase web client code for init, auth gating, listeners, fan-out writes, and queries.',
|
|
182
|
+
triggerHints: ['client', 'sdk', 'web', 'javascript', 'app', 'listener', 'init'],
|
|
183
|
+
steps: [
|
|
184
|
+
{
|
|
185
|
+
id: 'get-config',
|
|
186
|
+
description: 'Fetch the project client config for initializeApp.',
|
|
187
|
+
},
|
|
188
|
+
{
|
|
189
|
+
id: 'inspect-rules-for-auth',
|
|
190
|
+
description: 'Inspect deployed rules to determine the auth state the client must satisfy.',
|
|
191
|
+
},
|
|
192
|
+
{
|
|
193
|
+
id: 'check-indexes',
|
|
194
|
+
description: 'Verify indexOn declarations cover every planned orderByChild query.',
|
|
195
|
+
},
|
|
196
|
+
{
|
|
197
|
+
id: 'generate-init-and-auth',
|
|
198
|
+
description: 'Emit initialization, the auth gate, and the sign-in flow.',
|
|
199
|
+
},
|
|
200
|
+
{
|
|
201
|
+
id: 'generate-reads',
|
|
202
|
+
description: 'Emit entity listeners, fan-in patterns, or one-time reads as needed.',
|
|
203
|
+
},
|
|
204
|
+
{
|
|
205
|
+
id: 'generate-writes',
|
|
206
|
+
description: 'Emit set, update at path, and multi-location update writes per intent.',
|
|
207
|
+
},
|
|
208
|
+
{
|
|
209
|
+
id: 'add-cleanup-and-errors',
|
|
210
|
+
description: 'Add listener unsubscription and permission-denied error handling.',
|
|
211
|
+
},
|
|
212
|
+
{
|
|
213
|
+
id: 'verify-run',
|
|
214
|
+
description: 'Run the generated code once and confirm the run summary reports success.',
|
|
215
|
+
verifier: { name: SPEC_FINAL_RUNTIME_RUN_SUMMARY_OK },
|
|
216
|
+
},
|
|
217
|
+
],
|
|
218
|
+
},
|
|
219
|
+
{
|
|
220
|
+
name: 'pyric-agents',
|
|
221
|
+
description: 'Author Firestore rules and seed data through the pyric MCP tools with lint gating and reversible commits.',
|
|
222
|
+
triggerHints: ['pyric', 'firestore', 'rules', 'seed', 'lint', 'mcp'],
|
|
223
|
+
steps: [
|
|
224
|
+
{
|
|
225
|
+
id: 'draft-rules',
|
|
226
|
+
description: 'Draft the Firestore rules source in conversation.',
|
|
227
|
+
},
|
|
228
|
+
{
|
|
229
|
+
id: 'lint-rules',
|
|
230
|
+
description: 'Lint the draft and iterate until errors are empty.',
|
|
231
|
+
},
|
|
232
|
+
{
|
|
233
|
+
id: 'write-rules',
|
|
234
|
+
description: 'Write the lint-clean rules through the gated write tool.',
|
|
235
|
+
},
|
|
236
|
+
{
|
|
237
|
+
id: 'draft-seed',
|
|
238
|
+
description: 'Compose seed data as collection-to-documents arrays.',
|
|
239
|
+
},
|
|
240
|
+
{
|
|
241
|
+
id: 'write-seed',
|
|
242
|
+
description: 'Write the shape-validated seed through the gated write tool.',
|
|
243
|
+
verifier: {
|
|
244
|
+
name: SPEC_PYRIC_AGENTS_LINT_CLEAN_AND_RULE_REJECTS_CHEAT,
|
|
245
|
+
},
|
|
246
|
+
},
|
|
247
|
+
{
|
|
248
|
+
id: 'surface-audit-log',
|
|
249
|
+
description: 'Point the user at the emitted plan and commit event ids.',
|
|
250
|
+
},
|
|
251
|
+
],
|
|
252
|
+
},
|
|
253
|
+
{
|
|
254
|
+
name: 'playground-prompts',
|
|
255
|
+
description: 'Generate well-shaped 30-50 word playground prompts with a bounded domain and a rule-enforced security boundary.',
|
|
256
|
+
triggerHints: ['playground', 'prompt', 'prompts', 'demo', 'test', 'scenario'],
|
|
257
|
+
steps: [
|
|
258
|
+
{
|
|
259
|
+
id: 'pick-dimension',
|
|
260
|
+
description: 'Pick the capability dimension to exercise (rules, transactions, queries, listeners).',
|
|
261
|
+
},
|
|
262
|
+
{
|
|
263
|
+
id: 'pick-domain',
|
|
264
|
+
description: 'Pick a bounded familiar domain with two collections and a relationship.',
|
|
265
|
+
},
|
|
266
|
+
{
|
|
267
|
+
id: 'specify-security-boundary',
|
|
268
|
+
description: 'Specify a security boundary tedious to enforce in client code alone.',
|
|
269
|
+
},
|
|
270
|
+
{
|
|
271
|
+
id: 'draft-prompt',
|
|
272
|
+
description: 'Draft a 30-50 word prompt that fills in actors, data model, and constraint.',
|
|
273
|
+
},
|
|
274
|
+
{
|
|
275
|
+
id: 'tag-capabilities',
|
|
276
|
+
description: 'Tag what the prompt exercises so the consumer can pick by capability.',
|
|
277
|
+
verifier: {
|
|
278
|
+
name: SPEC_REPORT_MENTIONS_AT_LEAST_ONE_OF,
|
|
279
|
+
args: { tokens: ['exercises', 'capability', 'rules', 'state', 'membership'] },
|
|
280
|
+
},
|
|
281
|
+
},
|
|
282
|
+
],
|
|
283
|
+
},
|
|
284
|
+
{
|
|
285
|
+
name: 'rtdb-game-rules',
|
|
286
|
+
description: 'Design and deploy Realtime Database security rules for turn-based games with turn guards and win checks.',
|
|
287
|
+
triggerHints: ['rtdb', 'realtime', 'game', 'turn', 'multiplayer', 'rules'],
|
|
288
|
+
steps: [
|
|
289
|
+
{
|
|
290
|
+
id: 'identify-players',
|
|
291
|
+
description: 'Identify players, their marks, and how they map to authenticated UIDs.',
|
|
292
|
+
},
|
|
293
|
+
{
|
|
294
|
+
id: 'design-turn-flow',
|
|
295
|
+
description: 'Design the turn marker, alternation order, and turn-guard write rule.',
|
|
296
|
+
},
|
|
297
|
+
{
|
|
298
|
+
id: 'design-board-and-moves',
|
|
299
|
+
description: 'Design the board layout, move protocol, and per-cell validation.',
|
|
300
|
+
},
|
|
301
|
+
{
|
|
302
|
+
id: 'design-win-and-end',
|
|
303
|
+
description: 'Design win helpers, terminal states, and the winner validation rule.',
|
|
304
|
+
},
|
|
305
|
+
{
|
|
306
|
+
id: 'compose-ruleset',
|
|
307
|
+
description: 'Compose the full ruleset from turnGuard, flip, winCheckHelper, and helpers.',
|
|
308
|
+
},
|
|
309
|
+
{
|
|
310
|
+
id: 'simulate-and-deploy',
|
|
311
|
+
description: 'Simulate positive and cheating moves, then deploy the ruleset.',
|
|
312
|
+
verifier: {
|
|
313
|
+
name: SPEC_GAME_RULES_SIMULATOR_ACCEPTS_POSITIVE_AND_REJECTS_CHEAT,
|
|
314
|
+
args: { database: 'rtdb' },
|
|
315
|
+
},
|
|
316
|
+
},
|
|
317
|
+
],
|
|
318
|
+
},
|
|
319
|
+
{
|
|
320
|
+
name: 'firestore-game-rules',
|
|
321
|
+
description: 'Design and deploy Firestore security rules for turn-based games with split-allow rules and dynamic field keys.',
|
|
322
|
+
triggerHints: ['firestore', 'game', 'turn', 'multiplayer', 'rules', 'split-allow'],
|
|
323
|
+
steps: [
|
|
324
|
+
{
|
|
325
|
+
id: 'design-board',
|
|
326
|
+
description: 'Design a flat board map with statically named cell fields.',
|
|
327
|
+
},
|
|
328
|
+
{
|
|
329
|
+
id: 'design-turns',
|
|
330
|
+
description: 'Design turn enforcement with resource.data and a turn-flip validation.',
|
|
331
|
+
},
|
|
332
|
+
{
|
|
333
|
+
id: 'design-move-validity',
|
|
334
|
+
description: 'Design placement or movement validity with dynamic field keys or a config document.',
|
|
335
|
+
},
|
|
336
|
+
{
|
|
337
|
+
id: 'design-win-detection',
|
|
338
|
+
description: 'Enumerate winning lines or piece-counter wins as static expressions.',
|
|
339
|
+
},
|
|
340
|
+
{
|
|
341
|
+
id: 'split-allow-rules',
|
|
342
|
+
description: 'Split the update rule into normal-move, winning-move, and draw branches to stay under the complexity ceiling.',
|
|
343
|
+
},
|
|
344
|
+
{
|
|
345
|
+
id: 'generate-and-deploy',
|
|
346
|
+
description: 'Generate the rules via code, deploy them, and verify under the test API.',
|
|
347
|
+
verifier: {
|
|
348
|
+
name: SPEC_GAME_RULES_SIMULATOR_ACCEPTS_POSITIVE_AND_REJECTS_CHEAT,
|
|
349
|
+
args: { database: 'firestore' },
|
|
350
|
+
},
|
|
351
|
+
},
|
|
352
|
+
{
|
|
353
|
+
id: 'iterate-on-complexity',
|
|
354
|
+
description: 'If valid moves are denied, re-split the rules and move expensive checks into separate allow branches.',
|
|
355
|
+
verifier: {
|
|
356
|
+
name: SPEC_REPORT_MENTIONS_ALL_OF,
|
|
357
|
+
args: { tokens: ['allow', 'update'] },
|
|
358
|
+
},
|
|
359
|
+
},
|
|
360
|
+
],
|
|
361
|
+
},
|
|
362
|
+
];
|
|
363
|
+
// ---------------------------------------------------------------------------
|
|
364
|
+
// Helpers. The router and executor will read the catalog through these to
|
|
365
|
+
// keep their implementations narrow; the catalog itself is exported as a
|
|
366
|
+
// value so callers can iterate without going through them.
|
|
367
|
+
// ---------------------------------------------------------------------------
|
|
368
|
+
const ENTRY_BY_NAME = new Map(SKILL_CATALOG.map((entry) => [entry.name, entry]));
|
|
369
|
+
const NAMES = SKILL_CATALOG.map((entry) => entry.name);
|
|
370
|
+
/**
|
|
371
|
+
* Look up a catalog entry by skill name. Returns `undefined` when the
|
|
372
|
+
* name is not in the catalog. The router will use this to materialize
|
|
373
|
+
* an entry after picking a winner; the executor will use it to
|
|
374
|
+
* unwrap a router decision into a plan.
|
|
375
|
+
*/
|
|
376
|
+
export function getSkillEntry(name) {
|
|
377
|
+
return ENTRY_BY_NAME.get(name);
|
|
378
|
+
}
|
|
379
|
+
/**
|
|
380
|
+
* List every skill name present in the catalog, in catalog order. The
|
|
381
|
+
* router uses this to iterate candidates when scoring a prompt; the
|
|
382
|
+
* test suite uses it to assert the catalog covers every value in
|
|
383
|
+
* `SKILL_NAMES`.
|
|
384
|
+
*/
|
|
385
|
+
export function listSkillNames() {
|
|
386
|
+
return NAMES;
|
|
387
|
+
}
|
|
388
|
+
//# sourceMappingURL=skill-catalog.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"skill-catalog.js","sourceRoot":"","sources":["../src/skill-catalog.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAGH,OAAO,EACL,iCAAiC,EACjC,iCAAiC,EACjC,4DAA4D,EAC5D,mDAAmD,EACnD,2BAA2B,EAC3B,oCAAoC,EACpC,qCAAqC,GACtC,MAAM,wBAAwB,CAAC;AA2ChC,8EAA8E;AAC9E,qEAAqE;AACrE,uEAAuE;AACvE,sCAAsC;AACtC,8EAA8E;AAE9E,MAAM,CAAC,MAAM,aAAa,GAAiB;IACzC;QACE,IAAI,EAAE,uBAAuB;QAC7B,WAAW,EACT,oGAAoG;QACtG,YAAY,EAAE,CAAC,WAAW,EAAE,OAAO,EAAE,OAAO,EAAE,UAAU,EAAE,eAAe,EAAE,QAAQ,CAAC;QACpF,KAAK,EAAE;YACL;gBACE,EAAE,EAAE,eAAe;gBACnB,WAAW,EAAE,yEAAyE;aACvF;YACD;gBACE,EAAE,EAAE,iBAAiB;gBACrB,WAAW,EAAE,oEAAoE;aAClF;YACD;gBACE,EAAE,EAAE,iBAAiB;gBACrB,WAAW,EAAE,wEAAwE;aACtF;YACD;gBACE,EAAE,EAAE,eAAe;gBACnB,WAAW,EAAE,8EAA8E;aAC5F;YACD;gBACE,EAAE,EAAE,gBAAgB;gBACpB,WAAW,EAAE,0EAA0E;gBACvF,QAAQ,EAAE;oBACR,IAAI,EAAE,oCAAoC;oBAC1C,IAAI,EAAE,EAAE,MAAM,EAAE,CAAC,UAAU,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE;iBAClD;aACF;SACF;KACF;IACD;QACE,IAAI,EAAE,wBAAwB;QAC9B,WAAW,EACT,0GAA0G;QAC5G,YAAY,EAAE,CAAC,UAAU,EAAE,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,SAAS,CAAC;QAC9E,KAAK,EAAE;YACL;gBACE,EAAE,EAAE,iBAAiB;gBACrB,WAAW,EAAE,gEAAgE;aAC9E;YACD;gBACE,EAAE,EAAE,aAAa;gBACjB,WAAW,EAAE,iEAAiE;aAC/E;YACD;gBACE,EAAE,EAAE,cAAc;gBAClB,WAAW,EAAE,2EAA2E;aACzF;YACD;gBACE,EAAE,EAAE,kBAAkB;gBACtB,WAAW,EAAE,0EAA0E;aACxF;YACD;gBACE,EAAE,EAAE,4BAA4B;gBAChC,WAAW,EACT,kFAAkF;aACrF;YACD;gBACE,EAAE,EAAE,gBAAgB;gBACpB,WAAW,EACT,yFAAyF;gBAC3F,QAAQ,EAAE;oBACR,IAAI,EAAE,oCAAoC;oBAC1C,IAAI,EAAE,EAAE,MAAM,EAAE,CAAC,QAAQ,EAAE,SAAS,EAAE,gBAAgB,CAAC,EAAE;iBAC1D;aACF;SACF;KACF;IACD;QACE,IAAI,EAAE,oBAAoB;QAC1B,WAAW,EACT,6GAA6G;QAC/G,YAAY,EAAE;YACZ,MAAM;YACN,UAAU;YACV,MAAM;YACN,OAAO;YACP,UAAU;YACV,aAAa;YACb,WAAW;YACX,QAAQ;SACT;QACD,KAAK,EAAE;YACL;gBACE,EAAE,EAAE,iBAAiB;gBACrB,WAAW,EACT,gFAAgF;aACnF;YACD;gBACE,EAAE,EAAE,aAAa;gBACjB,WAAW,EAAE,6EAA6E;aAC3F;YACD;gBACE,EAAE,EAAE,iBAAiB;gBACrB,WAAW,EAAE,4EAA4E;aAC1F;YACD;gBACE,EAAE,EAAE,kBAAkB;gBACtB,WAAW,EACT,+EAA+E;aAClF;YACD;gBACE,EAAE,EAAE,qBAAqB;gBACzB,WAAW,EACT,qFAAqF;aACxF;YACD;gBACE,EAAE,EAAE,oBAAoB;gBACxB,WAAW,EACT,sFAAsF;gBACxF,QAAQ,EAAE;oBACR,IAAI,EAAE,qCAAqC;oBAC3C,IAAI,EAAE,EAAE,IAAI,EAAE,0BAA0B,EAAE,QAAQ,EAAE,CAAC,EAAE;iBACxD;aACF;SACF;KACF;IACD;QACE,IAAI,EAAE,yBAAyB;QAC/B,WAAW,EACT,iHAAiH;QACnH,YAAY,EAAE,CAAC,MAAM,EAAE,UAAU,EAAE,OAAO,EAAE,UAAU,EAAE,OAAO,EAAE,QAAQ,EAAE,UAAU,CAAC;QACtF,KAAK,EAAE;YACL;gBACE,EAAE,EAAE,uBAAuB;gBAC3B,WAAW,EAAE,0EAA0E;aACxF;YACD;gBACE,EAAE,EAAE,eAAe;gBACnB,WAAW,EACT,+EAA+E;aAClF;YACD;gBACE,EAAE,EAAE,sBAAsB;gBAC1B,WAAW,EAAE,2EAA2E;aACzF;YACD;gBACE,EAAE,EAAE,iBAAiB;gBACrB,WAAW,EACT,4FAA4F;aAC/F;YACD;gBACE,EAAE,EAAE,cAAc;gBAClB,WAAW,EAAE,wDAAwD;gBACrE,QAAQ,EAAE;oBACR,IAAI,EAAE,iCAAiC;oBACvC,IAAI,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE;iBAC1B;aACF;YACD;gBACE,EAAE,EAAE,mBAAmB;gBACvB,WAAW,EAAE,sEAAsE;aACpF;SACF;KACF;IACD;QACE,IAAI,EAAE,qBAAqB;QAC3B,WAAW,EACT,0GAA0G;QAC5G,YAAY,EAAE,CAAC,QAAQ,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,EAAE,KAAK,EAAE,UAAU,EAAE,MAAM,CAAC;QAC/E,KAAK,EAAE;YACL;gBACE,EAAE,EAAE,YAAY;gBAChB,WAAW,EAAE,oDAAoD;aAClE;YACD;gBACE,EAAE,EAAE,wBAAwB;gBAC5B,WAAW,EAAE,6EAA6E;aAC3F;YACD;gBACE,EAAE,EAAE,eAAe;gBACnB,WAAW,EAAE,qEAAqE;aACnF;YACD;gBACE,EAAE,EAAE,wBAAwB;gBAC5B,WAAW,EAAE,2DAA2D;aACzE;YACD;gBACE,EAAE,EAAE,gBAAgB;gBACpB,WAAW,EAAE,sEAAsE;aACpF;YACD;gBACE,EAAE,EAAE,iBAAiB;gBACrB,WAAW,EAAE,wEAAwE;aACtF;YACD;gBACE,EAAE,EAAE,wBAAwB;gBAC5B,WAAW,EAAE,mEAAmE;aACjF;YACD;gBACE,EAAE,EAAE,YAAY;gBAChB,WAAW,EAAE,0EAA0E;gBACvF,QAAQ,EAAE,EAAE,IAAI,EAAE,iCAAiC,EAAE;aACtD;SACF;KACF;IACD;QACE,IAAI,EAAE,cAAc;QACpB,WAAW,EACT,2GAA2G;QAC7G,YAAY,EAAE,CAAC,OAAO,EAAE,WAAW,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,CAAC;QACpE,KAAK,EAAE;YACL;gBACE,EAAE,EAAE,aAAa;gBACjB,WAAW,EAAE,mDAAmD;aACjE;YACD;gBACE,EAAE,EAAE,YAAY;gBAChB,WAAW,EAAE,oDAAoD;aAClE;YACD;gBACE,EAAE,EAAE,aAAa;gBACjB,WAAW,EAAE,0DAA0D;aACxE;YACD;gBACE,EAAE,EAAE,YAAY;gBAChB,WAAW,EAAE,sDAAsD;aACpE;YACD;gBACE,EAAE,EAAE,YAAY;gBAChB,WAAW,EAAE,8DAA8D;gBAC3E,QAAQ,EAAE;oBACR,IAAI,EAAE,mDAAmD;iBAC1D;aACF;YACD;gBACE,EAAE,EAAE,mBAAmB;gBACvB,WAAW,EAAE,0DAA0D;aACxE;SACF;KACF;IACD;QACE,IAAI,EAAE,oBAAoB;QAC1B,WAAW,EACT,iHAAiH;QACnH,YAAY,EAAE,CAAC,YAAY,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,CAAC;QAC7E,KAAK,EAAE;YACL;gBACE,EAAE,EAAE,gBAAgB;gBACpB,WAAW,EACT,sFAAsF;aACzF;YACD;gBACE,EAAE,EAAE,aAAa;gBACjB,WAAW,EAAE,yEAAyE;aACvF;YACD;gBACE,EAAE,EAAE,2BAA2B;gBAC/B,WAAW,EAAE,sEAAsE;aACpF;YACD;gBACE,EAAE,EAAE,cAAc;gBAClB,WAAW,EAAE,6EAA6E;aAC3F;YACD;gBACE,EAAE,EAAE,kBAAkB;gBACtB,WAAW,EAAE,uEAAuE;gBACpF,QAAQ,EAAE;oBACR,IAAI,EAAE,oCAAoC;oBAC1C,IAAI,EAAE,EAAE,MAAM,EAAE,CAAC,WAAW,EAAE,YAAY,EAAE,OAAO,EAAE,OAAO,EAAE,YAAY,CAAC,EAAE;iBAC9E;aACF;SACF;KACF;IACD;QACE,IAAI,EAAE,iBAAiB;QACvB,WAAW,EACT,0GAA0G;QAC5G,YAAY,EAAE,CAAC,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,MAAM,EAAE,aAAa,EAAE,OAAO,CAAC;QAC1E,KAAK,EAAE;YACL;gBACE,EAAE,EAAE,kBAAkB;gBACtB,WAAW,EAAE,wEAAwE;aACtF;YACD;gBACE,EAAE,EAAE,kBAAkB;gBACtB,WAAW,EAAE,uEAAuE;aACrF;YACD;gBACE,EAAE,EAAE,wBAAwB;gBAC5B,WAAW,EAAE,kEAAkE;aAChF;YACD;gBACE,EAAE,EAAE,oBAAoB;gBACxB,WAAW,EAAE,sEAAsE;aACpF;YACD;gBACE,EAAE,EAAE,iBAAiB;gBACrB,WAAW,EAAE,6EAA6E;aAC3F;YACD;gBACE,EAAE,EAAE,qBAAqB;gBACzB,WAAW,EAAE,gEAAgE;gBAC7E,QAAQ,EAAE;oBACR,IAAI,EAAE,4DAA4D;oBAClE,IAAI,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE;iBAC3B;aACF;SACF;KACF;IACD;QACE,IAAI,EAAE,sBAAsB;QAC5B,WAAW,EACT,gHAAgH;QAClH,YAAY,EAAE,CAAC,WAAW,EAAE,MAAM,EAAE,MAAM,EAAE,aAAa,EAAE,OAAO,EAAE,aAAa,CAAC;QAClF,KAAK,EAAE;YACL;gBACE,EAAE,EAAE,cAAc;gBAClB,WAAW,EAAE,4DAA4D;aAC1E;YACD;gBACE,EAAE,EAAE,cAAc;gBAClB,WAAW,EAAE,wEAAwE;aACtF;YACD;gBACE,EAAE,EAAE,sBAAsB;gBAC1B,WAAW,EACT,qFAAqF;aACxF;YACD;gBACE,EAAE,EAAE,sBAAsB;gBAC1B,WAAW,EAAE,sEAAsE;aACpF;YACD;gBACE,EAAE,EAAE,mBAAmB;gBACvB,WAAW,EACT,+GAA+G;aAClH;YACD;gBACE,EAAE,EAAE,qBAAqB;gBACzB,WAAW,EAAE,0EAA0E;gBACvF,QAAQ,EAAE;oBACR,IAAI,EAAE,4DAA4D;oBAClE,IAAI,EAAE,EAAE,QAAQ,EAAE,WAAW,EAAE;iBAChC;aACF;YACD;gBACE,EAAE,EAAE,uBAAuB;gBAC3B,WAAW,EACT,uGAAuG;gBACzG,QAAQ,EAAE;oBACR,IAAI,EAAE,2BAA2B;oBACjC,IAAI,EAAE,EAAE,MAAM,EAAE,CAAC,OAAO,EAAE,QAAQ,CAAC,EAAE;iBACtC;aACF;SACF;KACF;CACF,CAAC;AAEF,8EAA8E;AAC9E,0EAA0E;AAC1E,yEAAyE;AACzE,2DAA2D;AAC3D,8EAA8E;AAE9E,MAAM,aAAa,GAA8C,IAAI,GAAG,CACtE,aAAa,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC,CAClD,CAAC;AAEF,MAAM,KAAK,GAAyB,aAAa,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;AAE7E;;;;;GAKG;AACH,MAAM,UAAU,aAAa,CAAC,IAAe;IAC3C,OAAO,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;AACjC,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,cAAc;IAC5B,OAAO,KAAK,CAAC;AACf,CAAC"}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Keyword-based skill router.
|
|
3
|
+
*
|
|
4
|
+
* Phase five of the implementation plan splits the planner-executor
|
|
5
|
+
* work into three branches: the static `SkillCatalog`, this router
|
|
6
|
+
* that classifies a user prompt against the catalog, and the
|
|
7
|
+
* executor that walks a chosen skill's prescribed step sequence.
|
|
8
|
+
*
|
|
9
|
+
* The router is intentionally simple. It does not call a language
|
|
10
|
+
* model. It lowercases the prompt and counts how many of each
|
|
11
|
+
* catalog entry's `triggerHints` appear as substrings in the
|
|
12
|
+
* prompt. The entry with the highest hit count wins. Ties break
|
|
13
|
+
* in favor of entries whose literal `name` (e.g.
|
|
14
|
+
* `rtdb-game-rules`) appears in the prompt — either in the
|
|
15
|
+
* lowered text or in the prompt's kebab-case form (whitespace
|
|
16
|
+
* collapsed to dashes). After that, ties break by catalog order.
|
|
17
|
+
* When the score AND name-in-prompt signal both tie between the
|
|
18
|
+
* top entry and the runner-up, the router returns `match: null`
|
|
19
|
+
* rather than mis-route into a coin-flip pick.
|
|
20
|
+
*
|
|
21
|
+
* A v1 design point: confidence is the raw hit count. There is no
|
|
22
|
+
* normalization, no probability. The planner-executor decides
|
|
23
|
+
* whether a `RouterDecision.match` is good enough to act on. When
|
|
24
|
+
* no entry scores at or above `threshold` (default 1), the router
|
|
25
|
+
* returns `match: null` and the executor is expected to fall back
|
|
26
|
+
* to plain ReAct.
|
|
27
|
+
*
|
|
28
|
+
* Future work: an LLM-based router could layer on top of this
|
|
29
|
+
* keyword scoring (for example, only invoked when the keyword
|
|
30
|
+
* router returns null or returns a low-margin tie). That layering
|
|
31
|
+
* is explicitly deferred and is not in this branch.
|
|
32
|
+
*/
|
|
33
|
+
import type { SkillName } from './eval/fixture.js';
|
|
34
|
+
import { type SkillCatalog } from './skill-catalog.js';
|
|
35
|
+
/**
|
|
36
|
+
* A single scored entry in the router's ranking. `score` is the
|
|
37
|
+
* raw number of `triggerHints` that appeared as substrings of
|
|
38
|
+
* the lowercased prompt — no normalization or probability.
|
|
39
|
+
*/
|
|
40
|
+
export interface RouterMatch {
|
|
41
|
+
/** The skill the catalog entry identifies. */
|
|
42
|
+
skill: SkillName;
|
|
43
|
+
/** Number of trigger hints matched. Higher is better. */
|
|
44
|
+
score: number;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* The router's verdict for one prompt. `match` is the top-scoring
|
|
48
|
+
* candidate when it cleared the threshold, otherwise `null`.
|
|
49
|
+
* `ranking` always reflects the full sorted scoring, useful for
|
|
50
|
+
* debugging accuracy and for callers that want to inspect runners-up.
|
|
51
|
+
*/
|
|
52
|
+
export interface RouterDecision {
|
|
53
|
+
/** The top-scoring match, or null when no entry scored >= threshold. */
|
|
54
|
+
match: RouterMatch | null;
|
|
55
|
+
/** All scored entries, descending. Useful for debugging accuracy. */
|
|
56
|
+
ranking: readonly RouterMatch[];
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Optional overrides for `routeSkill`. `threshold` is the minimum
|
|
60
|
+
* `score` required to count as a real match (default 1, so a single
|
|
61
|
+
* trigger hit clears the bar). `catalog` lets tests inject a
|
|
62
|
+
* different catalog without touching the global.
|
|
63
|
+
*/
|
|
64
|
+
export interface RouterOptions {
|
|
65
|
+
/** Minimum score required to call it a match. Default 1. */
|
|
66
|
+
threshold?: number;
|
|
67
|
+
/** Optional override of the catalog. Defaults to SKILL_CATALOG. */
|
|
68
|
+
catalog?: SkillCatalog;
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Route a user prompt to a catalog entry using keyword scoring.
|
|
72
|
+
*
|
|
73
|
+
* Algorithm:
|
|
74
|
+
* 1. Lowercase the prompt and compute its kebab-case form
|
|
75
|
+
* (whitespace collapsed to dashes) for name matching.
|
|
76
|
+
* 2. For each entry, score = count of `triggerHints` whose
|
|
77
|
+
* lowercased form appears as a substring of the prompt.
|
|
78
|
+
* 3. Build a ranking sorted by (score DESC, name-in-prompt DESC,
|
|
79
|
+
* catalog order ASC). `name-in-prompt` is true when the
|
|
80
|
+
* literal skill name (e.g. `rtdb-game-rules`) appears in
|
|
81
|
+
* either the lowered prompt or its kebab-case form.
|
|
82
|
+
* 4. If the top entry's score is >= threshold (default 1),
|
|
83
|
+
* return it as `match`; otherwise `match: null`.
|
|
84
|
+
* 5. Ambiguity guard: if the top entry ties on score with the
|
|
85
|
+
* runner-up and the tie-break (name-in-prompt) does not
|
|
86
|
+
* disambiguate, return `match: null` rather than committing
|
|
87
|
+
* to the catalog-order winner. The brief's hard requirement
|
|
88
|
+
* is that the router never returns a *wrong* skill — a
|
|
89
|
+
* coin-flip pick is exactly that failure mode.
|
|
90
|
+
*
|
|
91
|
+
* The `ranking` returned always covers every catalog entry so
|
|
92
|
+
* callers can inspect runners-up regardless of the match decision.
|
|
93
|
+
*/
|
|
94
|
+
export declare function routeSkill(prompt: string, options?: RouterOptions): RouterDecision;
|
|
95
|
+
//# sourceMappingURL=skill-router.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"skill-router.d.ts","sourceRoot":"","sources":["../src/skill-router.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,mBAAmB,CAAC;AACnD,OAAO,EAAiB,KAAK,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAEtE;;;;GAIG;AACH,MAAM,WAAW,WAAW;IAC1B,8CAA8C;IAC9C,KAAK,EAAE,SAAS,CAAC;IACjB,yDAAyD;IACzD,KAAK,EAAE,MAAM,CAAC;CACf;AAED;;;;;GAKG;AACH,MAAM,WAAW,cAAc;IAC7B,wEAAwE;IACxE,KAAK,EAAE,WAAW,GAAG,IAAI,CAAC;IAC1B,qEAAqE;IACrE,OAAO,EAAE,SAAS,WAAW,EAAE,CAAC;CACjC;AAED;;;;;GAKG;AACH,MAAM,WAAW,aAAa;IAC5B,4DAA4D;IAC5D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,mEAAmE;IACnE,OAAO,CAAC,EAAE,YAAY,CAAC;CACxB;AAmCD;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,UAAU,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,aAAa,GAAG,cAAc,CA6ClF"}
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Keyword-based skill router.
|
|
3
|
+
*
|
|
4
|
+
* Phase five of the implementation plan splits the planner-executor
|
|
5
|
+
* work into three branches: the static `SkillCatalog`, this router
|
|
6
|
+
* that classifies a user prompt against the catalog, and the
|
|
7
|
+
* executor that walks a chosen skill's prescribed step sequence.
|
|
8
|
+
*
|
|
9
|
+
* The router is intentionally simple. It does not call a language
|
|
10
|
+
* model. It lowercases the prompt and counts how many of each
|
|
11
|
+
* catalog entry's `triggerHints` appear as substrings in the
|
|
12
|
+
* prompt. The entry with the highest hit count wins. Ties break
|
|
13
|
+
* in favor of entries whose literal `name` (e.g.
|
|
14
|
+
* `rtdb-game-rules`) appears in the prompt — either in the
|
|
15
|
+
* lowered text or in the prompt's kebab-case form (whitespace
|
|
16
|
+
* collapsed to dashes). After that, ties break by catalog order.
|
|
17
|
+
* When the score AND name-in-prompt signal both tie between the
|
|
18
|
+
* top entry and the runner-up, the router returns `match: null`
|
|
19
|
+
* rather than mis-route into a coin-flip pick.
|
|
20
|
+
*
|
|
21
|
+
* A v1 design point: confidence is the raw hit count. There is no
|
|
22
|
+
* normalization, no probability. The planner-executor decides
|
|
23
|
+
* whether a `RouterDecision.match` is good enough to act on. When
|
|
24
|
+
* no entry scores at or above `threshold` (default 1), the router
|
|
25
|
+
* returns `match: null` and the executor is expected to fall back
|
|
26
|
+
* to plain ReAct.
|
|
27
|
+
*
|
|
28
|
+
* Future work: an LLM-based router could layer on top of this
|
|
29
|
+
* keyword scoring (for example, only invoked when the keyword
|
|
30
|
+
* router returns null or returns a low-margin tie). That layering
|
|
31
|
+
* is explicitly deferred and is not in this branch.
|
|
32
|
+
*/
|
|
33
|
+
import { SKILL_CATALOG } from './skill-catalog.js';
|
|
34
|
+
/**
|
|
35
|
+
* Count how many of `hints` appear as substrings of `lowerPrompt`.
|
|
36
|
+
* Each hint is counted at most once, regardless of how many times
|
|
37
|
+
* it appears in the prompt — this matches the "number of hints
|
|
38
|
+
* that matched" framing in the brief, not "total occurrences."
|
|
39
|
+
*/
|
|
40
|
+
function scoreHints(lowerPrompt, hints) {
|
|
41
|
+
let score = 0;
|
|
42
|
+
for (const hint of hints) {
|
|
43
|
+
if (hint.length === 0)
|
|
44
|
+
continue;
|
|
45
|
+
if (lowerPrompt.includes(hint.toLowerCase())) {
|
|
46
|
+
score += 1;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
return score;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* The brief's tie-break: prefer entries whose `name` appears in
|
|
53
|
+
* the prompt as a substring of the lowercased prompt or its
|
|
54
|
+
* kebab-case form. The kebab-case form of the prompt is the
|
|
55
|
+
* lowercased text with all whitespace collapsed to single dashes,
|
|
56
|
+
* so a user typing "rtdb game rules" still picks up the literal
|
|
57
|
+
* `rtdb-game-rules` skill name.
|
|
58
|
+
*/
|
|
59
|
+
function nameInPrompt(lowerPrompt, kebabPrompt, name) {
|
|
60
|
+
return lowerPrompt.includes(name) || kebabPrompt.includes(name);
|
|
61
|
+
}
|
|
62
|
+
function toKebabPrompt(lowerPrompt) {
|
|
63
|
+
return lowerPrompt.replace(/\s+/g, '-');
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Route a user prompt to a catalog entry using keyword scoring.
|
|
67
|
+
*
|
|
68
|
+
* Algorithm:
|
|
69
|
+
* 1. Lowercase the prompt and compute its kebab-case form
|
|
70
|
+
* (whitespace collapsed to dashes) for name matching.
|
|
71
|
+
* 2. For each entry, score = count of `triggerHints` whose
|
|
72
|
+
* lowercased form appears as a substring of the prompt.
|
|
73
|
+
* 3. Build a ranking sorted by (score DESC, name-in-prompt DESC,
|
|
74
|
+
* catalog order ASC). `name-in-prompt` is true when the
|
|
75
|
+
* literal skill name (e.g. `rtdb-game-rules`) appears in
|
|
76
|
+
* either the lowered prompt or its kebab-case form.
|
|
77
|
+
* 4. If the top entry's score is >= threshold (default 1),
|
|
78
|
+
* return it as `match`; otherwise `match: null`.
|
|
79
|
+
* 5. Ambiguity guard: if the top entry ties on score with the
|
|
80
|
+
* runner-up and the tie-break (name-in-prompt) does not
|
|
81
|
+
* disambiguate, return `match: null` rather than committing
|
|
82
|
+
* to the catalog-order winner. The brief's hard requirement
|
|
83
|
+
* is that the router never returns a *wrong* skill — a
|
|
84
|
+
* coin-flip pick is exactly that failure mode.
|
|
85
|
+
*
|
|
86
|
+
* The `ranking` returned always covers every catalog entry so
|
|
87
|
+
* callers can inspect runners-up regardless of the match decision.
|
|
88
|
+
*/
|
|
89
|
+
export function routeSkill(prompt, options) {
|
|
90
|
+
const threshold = options?.threshold ?? 1;
|
|
91
|
+
const catalog = options?.catalog ?? SKILL_CATALOG;
|
|
92
|
+
const lowerPrompt = prompt.toLowerCase();
|
|
93
|
+
const kebabPrompt = toKebabPrompt(lowerPrompt);
|
|
94
|
+
// Snapshot each entry with its score, its catalog position, and
|
|
95
|
+
// the tie-break signal. We sort a copy so the input catalog is
|
|
96
|
+
// not mutated.
|
|
97
|
+
const scored = catalog.map((entry, index) => ({
|
|
98
|
+
skill: entry.name,
|
|
99
|
+
score: scoreHints(lowerPrompt, entry.triggerHints),
|
|
100
|
+
nameInPrompt: nameInPrompt(lowerPrompt, kebabPrompt, entry.name),
|
|
101
|
+
catalogIndex: index,
|
|
102
|
+
}));
|
|
103
|
+
scored.sort((a, b) => {
|
|
104
|
+
if (b.score !== a.score)
|
|
105
|
+
return b.score - a.score;
|
|
106
|
+
if (a.nameInPrompt !== b.nameInPrompt)
|
|
107
|
+
return a.nameInPrompt ? -1 : 1;
|
|
108
|
+
return a.catalogIndex - b.catalogIndex;
|
|
109
|
+
});
|
|
110
|
+
const ranking = scored.map((s) => ({ skill: s.skill, score: s.score }));
|
|
111
|
+
const top = scored[0];
|
|
112
|
+
let match = top !== undefined && top.score >= threshold ? { skill: top.skill, score: top.score } : null;
|
|
113
|
+
// Ambiguity guard. When the top and runner-up share a positive
|
|
114
|
+
// score and the name-in-prompt tie-break does not separate them,
|
|
115
|
+
// the keyword router does not have enough signal to commit.
|
|
116
|
+
// Returning null here hands the prompt off to the planner-executor's
|
|
117
|
+
// fallback (plain ReAct) rather than mis-routing — the brief's
|
|
118
|
+
// hard requirement is that the router never returns a *wrong*
|
|
119
|
+
// skill. Only applies when the top score is at least 1: callers
|
|
120
|
+
// who pass `threshold: 0` are explicitly asking for the
|
|
121
|
+
// catalog-order winner regardless of signal.
|
|
122
|
+
if (match !== null && top.score > 0 && scored.length > 1) {
|
|
123
|
+
const runnerUp = scored[1];
|
|
124
|
+
if (runnerUp.score === top.score && runnerUp.nameInPrompt === top.nameInPrompt) {
|
|
125
|
+
match = null;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
return { match, ranking };
|
|
129
|
+
}
|
|
130
|
+
//# sourceMappingURL=skill-router.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"skill-router.js","sourceRoot":"","sources":["../src/skill-router.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AAGH,OAAO,EAAE,aAAa,EAAqB,MAAM,oBAAoB,CAAC;AAwCtE;;;;;GAKG;AACH,SAAS,UAAU,CAAC,WAAmB,EAAE,KAAwB;IAC/D,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAChC,IAAI,WAAW,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC;YAC7C,KAAK,IAAI,CAAC,CAAC;QACb,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;;;;GAOG;AACH,SAAS,YAAY,CAAC,WAAmB,EAAE,WAAmB,EAAE,IAAe;IAC7E,OAAO,WAAW,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,WAAW,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;AAClE,CAAC;AAED,SAAS,aAAa,CAAC,WAAmB;IACxC,OAAO,WAAW,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;AAC1C,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,UAAU,UAAU,CAAC,MAAc,EAAE,OAAuB;IAChE,MAAM,SAAS,GAAG,OAAO,EAAE,SAAS,IAAI,CAAC,CAAC;IAC1C,MAAM,OAAO,GAAG,OAAO,EAAE,OAAO,IAAI,aAAa,CAAC;IAClD,MAAM,WAAW,GAAG,MAAM,CAAC,WAAW,EAAE,CAAC;IACzC,MAAM,WAAW,GAAG,aAAa,CAAC,WAAW,CAAC,CAAC;IAE/C,gEAAgE;IAChE,+DAA+D;IAC/D,eAAe;IACf,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC;QAC5C,KAAK,EAAE,KAAK,CAAC,IAAI;QACjB,KAAK,EAAE,UAAU,CAAC,WAAW,EAAE,KAAK,CAAC,YAAY,CAAC;QAClD,YAAY,EAAE,YAAY,CAAC,WAAW,EAAE,WAAW,EAAE,KAAK,CAAC,IAAI,CAAC;QAChE,YAAY,EAAE,KAAK;KACpB,CAAC,CAAC,CAAC;IAEJ,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACnB,IAAI,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,KAAK;YAAE,OAAO,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC;QAClD,IAAI,CAAC,CAAC,YAAY,KAAK,CAAC,CAAC,YAAY;YAAE,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACtE,OAAO,CAAC,CAAC,YAAY,GAAG,CAAC,CAAC,YAAY,CAAC;IACzC,CAAC,CAAC,CAAC;IAEH,MAAM,OAAO,GAAkB,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IAEvF,MAAM,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;IACtB,IAAI,KAAK,GACP,GAAG,KAAK,SAAS,IAAI,GAAG,CAAC,KAAK,IAAI,SAAS,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,GAAG,CAAC,KAAK,EAAE,KAAK,EAAE,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;IAE9F,+DAA+D;IAC/D,iEAAiE;IACjE,4DAA4D;IAC5D,qEAAqE;IACrE,+DAA+D;IAC/D,8DAA8D;IAC9D,gEAAgE;IAChE,wDAAwD;IACxD,6CAA6C;IAC7C,IAAI,KAAK,KAAK,IAAI,IAAI,GAAG,CAAC,KAAK,GAAG,CAAC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACzD,MAAM,QAAQ,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QAC3B,IAAI,QAAQ,CAAC,KAAK,KAAK,GAAG,CAAC,KAAK,IAAI,QAAQ,CAAC,YAAY,KAAK,GAAG,CAAC,YAAY,EAAE,CAAC;YAC/E,KAAK,GAAG,IAAI,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC;AAC5B,CAAC"}
|
package/dist/strategy.d.ts
CHANGED
|
@@ -16,10 +16,29 @@
|
|
|
16
16
|
* parallel-branch ensembling) sit alongside this one — same
|
|
17
17
|
* `AgentStrategy` interface, different control flow.
|
|
18
18
|
*/
|
|
19
|
-
import type { AgentStrategy } from './types/strategy.js';
|
|
19
|
+
import type { AgentStrategy, ReflexionConfig } from './types/strategy.js';
|
|
20
20
|
interface ReactLoopOptions {
|
|
21
21
|
/** Cap on loop iterations to avoid runaway tool-call ping-pong. Default 24. */
|
|
22
22
|
maxTurns?: number;
|
|
23
|
+
/**
|
|
24
|
+
* Opt-in: when `true`, tool calls produced in a single turn are partitioned
|
|
25
|
+
* by the handler's `parallelSafe` tag. Parallel-safe calls run concurrently
|
|
26
|
+
* with `Promise.all`; the remaining (mutation) calls run sequentially after
|
|
27
|
+
* the parallel group settles. Result yield order and `messages` order are
|
|
28
|
+
* preserved in the original input order, so the trace and next-turn prompt
|
|
29
|
+
* are byte-for-byte identical to a sequential run — the only observable
|
|
30
|
+
* difference is wall-clock.
|
|
31
|
+
*
|
|
32
|
+
* Defaults to `false` (current behavior: every call serialised).
|
|
33
|
+
*/
|
|
34
|
+
parallelDispatch?: boolean;
|
|
35
|
+
/**
|
|
36
|
+
* Opt-in critique-and-retry pass after a candidate final-answer turn.
|
|
37
|
+
* See `ReflexionConfig` for the verdict shape and the retry/exhaust
|
|
38
|
+
* semantics. When absent or `enabled: false`, behavior is byte-for-byte
|
|
39
|
+
* identical to the pre-reflexion loop.
|
|
40
|
+
*/
|
|
41
|
+
reflexion?: ReflexionConfig;
|
|
23
42
|
}
|
|
24
43
|
export declare function createReactLoopStrategy(options?: ReactLoopOptions): AgentStrategy;
|
|
25
44
|
export {};
|