@deepstrike/wasm 0.2.18 → 0.2.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,6 +14,9 @@ interface SkillMetadata {
14
14
  whenToUse?: string;
15
15
  effort?: number;
16
16
  estimatedTokens?: number;
17
+ /** P1-B tool gating: tool ids this skill needs; when active the kernel narrows the toolset to
18
+ * `stable-core ∪ allowedTools`. Absent ⇒ no narrowing (back-compat). */
19
+ allowedTools?: string[];
17
20
  }
18
21
  export declare const KERNEL_ABI_VERSION = 1;
19
22
  export interface KernelRuntimeHandle {
@@ -24,6 +24,9 @@ export function skillMetadataToKernel(skill) {
24
24
  out.when_to_use = skill.whenToUse;
25
25
  if (skill.effort !== undefined)
26
26
  out.effort = skill.effort;
27
+ // P1-B: forward declared tool ids (additive; omitted when empty so existing skills' wire is unchanged).
28
+ if (skill.allowedTools?.length)
29
+ out.allowed_tools = skill.allowedTools;
27
30
  return out;
28
31
  }
29
32
  export function messageToKernelMessage(message) {
@@ -40,6 +40,19 @@ export interface MemoryPolicy {
40
40
  maxContentBytes?: number;
41
41
  maxNameLength?: number;
42
42
  }
43
+ /** P0-C tool-gating telemetry: per-LLM-turn metrics, emitted via `RuntimeOptions.onTurnMetrics`.
44
+ * Pure observation — no behavior change. `toolsExposed` vs `toolsCalled` quantifies over-exposure;
45
+ * consecutive equal `activeSkill` values measure skill dwell `D`; the cache split gives the
46
+ * prompt-cache hit baseline. Mirrors the node SDK. */
47
+ export interface TurnMetrics {
48
+ turn: number;
49
+ toolsExposed: number;
50
+ toolsCalled: number;
51
+ activeSkill?: string;
52
+ inputTokens: number;
53
+ cacheReadTokens: number;
54
+ cacheCreationTokens: number;
55
+ }
43
56
  export interface RuntimeOptions {
44
57
  provider: LLMProvider;
45
58
  /** M1/G3 intelligence routing: resolve a per-node provider from a workflow node's `modelHint`.
@@ -90,6 +103,18 @@ export interface RuntimeOptions {
90
103
  requiredEvidence: string[];
91
104
  }) => Promise<MilestoneCheckResult> | MilestoneCheckResult;
92
105
  runSpec?: AgentRunSpec;
106
+ /** P0-A tool gating: a static per-run tool profile — only these tool ids (plus the meta-tools)
107
+ * are exposed to the model each turn. Lowers to the same `capability_filter` sub-agents use;
108
+ * byte-stable across the run, so it never busts the prompt-cache prefix. Augments `runSpec`'s
109
+ * filter when both set; synthesizes a minimal spec otherwise. Omitted/empty ⇒ no gating. */
110
+ allowedToolIds?: string[];
111
+ /** P0-C: optional per-turn metrics sink for tool-gating telemetry (see `TurnMetrics`). Pure
112
+ * observation; invoked once per LLM turn. Never throws into the run loop (errors are swallowed). */
113
+ onTurnMetrics?: (metrics: TurnMetrics) => void;
114
+ /** P1-B/D stable-core: tool ids always exposed under skill gating. Empty/absent ⇒ skills narrow
115
+ * to exactly their declared tools + meta-tools. (wasm skills come from `skillContentMap`; gating
116
+ * engages only once that carries per-skill tool lists.) */
117
+ stableCoreToolIds?: string[];
93
118
  dreamProvider?: LLMProvider;
94
119
  dreamSummarizer?: DreamSummarizer;
95
120
  dreamSystemPrompt?: string;
@@ -313,6 +313,13 @@ export class RuntimeRunner {
313
313
  skills: metas.map(skillMetadataToKernel),
314
314
  });
315
315
  }
316
+ // P1-B/D: configure stable-core tool ids (always exposed under skill gating).
317
+ if (this.opts.stableCoreToolIds?.length) {
318
+ kernelApply(runtime, this.pendingObservations, {
319
+ kind: "set_stable_core_tools",
320
+ tool_ids: this.opts.stableCoreToolIds,
321
+ });
322
+ }
316
323
  if (this.opts.dreamStore && this.opts.agentId) {
317
324
  kernelApply(runtime, this.pendingObservations, { kind: "set_memory_enabled", enabled: true });
318
325
  }
@@ -337,18 +344,45 @@ export class RuntimeRunner {
337
344
  if (priorEvents && priorEvents.length > 0) {
338
345
  const repaired = repairEventsForRecovery(priorEvents, maxBytes);
339
346
  seedProviderReplayFromEvents(this.opts.provider, repaired);
347
+ const replayed = replayMessages(repaired, maxBytes);
340
348
  kernelApply(runtime, this.pendingObservations, {
341
349
  kind: "preload_history",
342
- messages: replayMessages(repaired, maxBytes).map(messageToKernelMessage),
350
+ messages: replayed.map(messageToKernelMessage),
343
351
  });
352
+ // P1-B B3: rebuild active-skill gating after a wake (active_skills is not snapshotted).
353
+ for (const m of replayed) {
354
+ for (const tc of m.toolCalls ?? []) {
355
+ if (tc.name !== "skill")
356
+ continue;
357
+ try {
358
+ const name = JSON.parse(tc.arguments || "{}").name;
359
+ if (name)
360
+ kernelApply(runtime, this.pendingObservations, { kind: "skill_activated", name });
361
+ }
362
+ catch { /* skip */ }
363
+ }
364
+ }
344
365
  }
345
366
  const sessionStart = Date.now();
346
367
  const startPayload = {
347
368
  kind: "start_run",
348
369
  task: { goal, criteria },
349
370
  };
350
- if (this.opts.runSpec) {
351
- startPayload.run_spec = agentRunSpecToKernel(this.opts.runSpec);
371
+ // P0-A: lower an explicit `runSpec` and/or the `allowedToolIds` profile to the kernel's
372
+ // `capability_filter` (reuses the existing run_spec wire — no new ABI). Unset on both ⇒ no
373
+ // gating (铁律: no config = old behavior).
374
+ const allowedToolIds = this.opts.allowedToolIds;
375
+ const hasProfile = allowedToolIds !== undefined && allowedToolIds.length > 0;
376
+ if (this.opts.runSpec || hasProfile) {
377
+ const baseSpec = this.opts.runSpec ?? {
378
+ identity: { agentId: this.opts.agentId ?? "root", sessionId, isSubAgent: false },
379
+ role: "custom",
380
+ goal,
381
+ };
382
+ const spec = hasProfile
383
+ ? { ...baseSpec, capabilityFilter: { ...baseSpec.capabilityFilter, allowedIds: allowedToolIds } }
384
+ : baseSpec;
385
+ startPayload.run_spec = agentRunSpecToKernel(spec);
352
386
  }
353
387
  const osProfile = assertNativeProfile(this.opts.osProfile ?? "native");
354
388
  const attentionPolicy = this.opts.attentionPolicy ?? osProfile.attentionPolicy;
@@ -404,6 +438,8 @@ export class RuntimeRunner {
404
438
  ? kernelAction(runtime, this.pendingObservations, { kind: "resume" })
405
439
  : kernelAction(runtime, this.pendingObservations, startPayload);
406
440
  let hasAttemptedReactiveCompact = false;
441
+ // P0-C: the skill loaded and in effect going into the current turn → per-turn `activeSkill` metric.
442
+ let activeSkill;
407
443
  while (!runtime.isTerminal()) {
408
444
  if (action.kind === "execute_tool") {
409
445
  await this.applyKernelPageIn(runtime, sessionId);
@@ -437,6 +473,8 @@ export class RuntimeRunner {
437
473
  let turnTokens = 0;
438
474
  let turnInputTokens = 0;
439
475
  let turnOutputTokens = 0;
476
+ let turnCacheReadTokens = 0;
477
+ let turnCacheCreationTokens = 0;
440
478
  let shouldRetry = false;
441
479
  const abortSignal = this.abortController?.signal;
442
480
  try {
@@ -449,6 +487,9 @@ export class RuntimeRunner {
449
487
  turnTokens = usageEvt.totalTokens;
450
488
  turnInputTokens = usageEvt.inputTokens ?? 0;
451
489
  turnOutputTokens = usageEvt.outputTokens ?? 0;
490
+ // P0-C: capture the prompt-cache split for the tool-gating hit-rate baseline.
491
+ turnCacheReadTokens = usageEvt.cacheReadInputTokens ?? 0;
492
+ turnCacheCreationTokens = usageEvt.cacheCreationInputTokens ?? 0;
452
493
  continue;
453
494
  }
454
495
  yield evt;
@@ -527,6 +568,31 @@ export class RuntimeRunner {
527
568
  toolCalls: finalToolCalls,
528
569
  providerReplay,
529
570
  }));
571
+ // P0-C: per-turn tool-gating telemetry. `activeSkill` reflects the skill in effect GOING INTO
572
+ // this turn; a `skill` call here only takes effect next turn — emit first, then advance.
573
+ if (this.opts.onTurnMetrics) {
574
+ try {
575
+ this.opts.onTurnMetrics({
576
+ turn: runtime.turn(),
577
+ toolsExposed: tools.length,
578
+ toolsCalled: finalToolCalls.length,
579
+ activeSkill,
580
+ inputTokens: turnInputTokens,
581
+ cacheReadTokens: turnCacheReadTokens,
582
+ cacheCreationTokens: turnCacheCreationTokens,
583
+ });
584
+ }
585
+ catch { /* metrics must never break the run */ }
586
+ }
587
+ const skillCall = finalToolCalls.find(c => c.name === "skill");
588
+ if (skillCall) {
589
+ try {
590
+ const name = JSON.parse(skillCall.arguments || "{}").name;
591
+ if (name)
592
+ activeSkill = name;
593
+ }
594
+ catch { /* malformed skill args — leave activeSkill unchanged */ }
595
+ }
530
596
  }
531
597
  else if (action.kind === "execute_tool") {
532
598
  const allCalls = action.calls;
@@ -637,6 +703,20 @@ export class RuntimeRunner {
637
703
  this.pendingSpoolOutputs.set(call.id, { tool: call.name, output: result.output });
638
704
  }
639
705
  }
706
+ // P1-B B3: a successfully-resolved `skill` call activates that skill for the next turn.
707
+ for (const call of allCalls) {
708
+ if (call.name !== "skill")
709
+ continue;
710
+ const res = toolResults.find(r => r.callId === call.id);
711
+ if (!res || res.isError)
712
+ continue;
713
+ try {
714
+ const name = JSON.parse(call.arguments || "{}").name;
715
+ if (name)
716
+ kernelApply(runtime, this.pendingObservations, { kind: "skill_activated", name });
717
+ }
718
+ catch { /* skip */ }
719
+ }
640
720
  action = kernelAction(runtime, this.pendingObservations, {
641
721
  kind: "tool_results",
642
722
  results: toolResults.map(toolResultToKernel),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@deepstrike/wasm",
3
- "version": "0.2.18",
3
+ "version": "0.2.19",
4
4
  "description": "DeepStrike WASM SDK — browser, Cloudflare Workers, Deno Deploy",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -15,7 +15,7 @@
15
15
  "test": "node --experimental-vm-modules node_modules/.bin/jest"
16
16
  },
17
17
  "dependencies": {
18
- "@deepstrike/wasm-kernel": "0.2.18"
18
+ "@deepstrike/wasm-kernel": "0.2.19"
19
19
  },
20
20
  "devDependencies": {
21
21
  "@types/jest": "^30.0.0",