@inbrowser/agent 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/LICENSE +21 -0
  2. package/dist/diagnostics/index.d.ts +5 -0
  3. package/dist/diagnostics/index.d.ts.map +1 -0
  4. package/dist/diagnostics/index.js +3 -0
  5. package/dist/diagnostics/index.js.map +1 -0
  6. package/dist/diagnostics/timing.d.ts +48 -0
  7. package/dist/diagnostics/timing.d.ts.map +1 -0
  8. package/dist/diagnostics/timing.js +85 -0
  9. package/dist/diagnostics/timing.js.map +1 -0
  10. package/dist/diagnostics/truthfulness.d.ts +36 -0
  11. package/dist/diagnostics/truthfulness.d.ts.map +1 -0
  12. package/dist/diagnostics/truthfulness.js +180 -0
  13. package/dist/diagnostics/truthfulness.js.map +1 -0
  14. package/dist/dispatch-memoization.d.ts +84 -0
  15. package/dist/dispatch-memoization.d.ts.map +1 -0
  16. package/dist/dispatch-memoization.js +197 -0
  17. package/dist/dispatch-memoization.js.map +1 -0
  18. package/dist/eval/comparison-report.d.ts +164 -0
  19. package/dist/eval/comparison-report.d.ts.map +1 -0
  20. package/dist/eval/comparison-report.js +316 -0
  21. package/dist/eval/comparison-report.js.map +1 -0
  22. package/dist/eval/fixture.d.ts +74 -0
  23. package/dist/eval/fixture.d.ts.map +1 -0
  24. package/dist/eval/fixture.js +217 -0
  25. package/dist/eval/fixture.js.map +1 -0
  26. package/dist/eval/index.d.ts +13 -0
  27. package/dist/eval/index.d.ts.map +1 -0
  28. package/dist/eval/index.js +7 -0
  29. package/dist/eval/index.js.map +1 -0
  30. package/dist/eval/load-node.d.ts +16 -0
  31. package/dist/eval/load-node.d.ts.map +1 -0
  32. package/dist/eval/load-node.js +58 -0
  33. package/dist/eval/load-node.js.map +1 -0
  34. package/dist/eval/metric-collector.d.ts +209 -0
  35. package/dist/eval/metric-collector.d.ts.map +1 -0
  36. package/dist/eval/metric-collector.js +293 -0
  37. package/dist/eval/metric-collector.js.map +1 -0
  38. package/dist/eval/run-record.d.ts +76 -0
  39. package/dist/eval/run-record.d.ts.map +1 -0
  40. package/dist/eval/run-record.js +32 -0
  41. package/dist/eval/run-record.js.map +1 -0
  42. package/dist/eval/runner.d.ts +140 -0
  43. package/dist/eval/runner.d.ts.map +1 -0
  44. package/dist/eval/runner.js +310 -0
  45. package/dist/eval/runner.js.map +1 -0
  46. package/dist/eval/spec-framework.d.ts +113 -0
  47. package/dist/eval/spec-framework.d.ts.map +1 -0
  48. package/dist/eval/spec-framework.js +100 -0
  49. package/dist/eval/spec-framework.js.map +1 -0
  50. package/dist/eval/spec-helpers.d.ts +245 -0
  51. package/dist/eval/spec-helpers.d.ts.map +1 -0
  52. package/dist/eval/spec-helpers.js +605 -0
  53. package/dist/eval/spec-helpers.js.map +1 -0
  54. package/dist/index.d.ts +24 -3
  55. package/dist/index.d.ts.map +1 -1
  56. package/dist/index.js +11 -1
  57. package/dist/index.js.map +1 -1
  58. package/dist/node.d.ts +1 -0
  59. package/dist/node.d.ts.map +1 -1
  60. package/dist/node.js +1 -0
  61. package/dist/node.js.map +1 -1
  62. package/dist/planner-executor.d.ts +132 -0
  63. package/dist/planner-executor.d.ts.map +1 -0
  64. package/dist/planner-executor.js +274 -0
  65. package/dist/planner-executor.js.map +1 -0
  66. package/dist/skill-catalog.d.ts +81 -0
  67. package/dist/skill-catalog.d.ts.map +1 -0
  68. package/dist/skill-catalog.js +388 -0
  69. package/dist/skill-catalog.js.map +1 -0
  70. package/dist/skill-router.d.ts +95 -0
  71. package/dist/skill-router.d.ts.map +1 -0
  72. package/dist/skill-router.js +130 -0
  73. package/dist/skill-router.js.map +1 -0
  74. package/dist/strategy.d.ts +20 -1
  75. package/dist/strategy.d.ts.map +1 -1
  76. package/dist/strategy.js +333 -13
  77. package/dist/strategy.js.map +1 -1
  78. package/dist/tools.d.ts +15 -1
  79. package/dist/tools.d.ts.map +1 -1
  80. package/dist/tools.js +18 -0
  81. package/dist/tools.js.map +1 -1
  82. package/dist/types/strategy.d.ts +48 -0
  83. package/dist/types/strategy.d.ts.map +1 -1
  84. package/dist/types/tools.d.ts +18 -0
  85. package/dist/types/tools.d.ts.map +1 -1
  86. package/dist/types/trace.d.ts +59 -9
  87. package/dist/types/trace.d.ts.map +1 -1
  88. package/dist/types/trace.js +5 -3
  89. package/dist/types/trace.js.map +1 -1
  90. package/package.json +1 -1
@@ -16,9 +16,11 @@
16
16
  * one — incur zero cost.
17
17
  *
18
18
  * Phase 1 captures `llm_request` only (the priority the playground
19
- * needs to ship a prompt visualizer). `llm_response` shape is
20
- * defined here so consumers can write against the full union from
21
- * day one; emit-site wiring lands in Phase 2.
19
+ * needs to ship a prompt visualizer). `llm_response` and
20
+ * `turn_dispatch_complete` complete the per-iteration wall-clock
21
+ * triple request-dispatched, response-completed, tool-dispatch-
22
+ * completed — that the eval harness uses to split language-model
23
+ * time from tool-dispatch time.
22
24
  */
23
25
  import type { NormalizedMessage } from './chat.js';
24
26
  /**
@@ -36,7 +38,12 @@ export interface LlmRequestTrace {
36
38
  turnId: string;
37
39
  /** 0-indexed ReAct iteration within this turn. */
38
40
  iteration: number;
39
- /** Wall-clock ms at the moment the request was about to dispatch. */
41
+ /** Wall-clock ms captured immediately before the strategy hands
42
+ * the request to `LlmClient.chat()`. Pair with
43
+ * `LlmResponseTrace.ts` (response completed) and
44
+ * `TurnDispatchCompleteTrace.ts` (tool dispatch completed) to
45
+ * derive the language-model vs tool-dispatch wall-clock split for
46
+ * this iteration. */
40
47
  ts: number;
41
48
  /** The system-prompt string the strategy received from
42
49
  * `StrategyRunInput.systemPrompt`. Captured verbatim. */
@@ -72,15 +79,21 @@ export interface ToolDeclarationView {
72
79
  parameters: unknown;
73
80
  }
74
81
  /**
75
- * Response-side snapshot. Defined now so consumers can switch on
76
- * `TraceEvent.kind` and handle the full union from the start. The
77
- * emit-site in `strategy.ts` lands in Phase 2 strategies are free
78
- * to emit it earlier when the data is available.
82
+ * Response-side snapshot. Emitted once per ReAct iteration, paired
83
+ * one-to-one with `LlmRequestTrace` via `requestId`. Captures the
84
+ * full assistant output and the timestamp at which the chat()
85
+ * iterator drained `ts - LlmRequestTrace.ts` is the iteration's
86
+ * language-model wall-clock segment.
79
87
  */
80
88
  export interface LlmResponseTrace {
81
89
  /** Same id as the matching `LlmRequestTrace.requestId`. */
82
90
  requestId: string;
83
- /** Wall-clock ms when the chat() iterator completed. */
91
+ /** Wall-clock ms captured immediately after the `chat()` iterator
92
+ * has yielded its terminal event for this iteration (typically
93
+ * `turn_complete`, or `error` on a streaming failure). Not
94
+ * emitted on mid-stream error — callers should treat a missing
95
+ * `llm_response` as "language-model time unknown for this
96
+ * iteration." */
84
97
  ts: number;
85
98
  /** Full assistant text emitted this iteration. */
86
99
  text: string;
@@ -102,12 +115,49 @@ export interface LlmResponseTrace {
102
115
  cachedTokens?: number;
103
116
  };
104
117
  }
118
+ /**
119
+ * End-of-iteration tool-dispatch marker. Emitted once per ReAct
120
+ * iteration that actually ran tool calls, immediately after the
121
+ * per-turn dispatch loop drained. Paired one-to-one with
122
+ * `LlmResponseTrace` via `requestId`. NOT emitted for the final
123
+ * assistant turn (no tool calls → no dispatch segment to close).
124
+ *
125
+ * `ts - LlmResponseTrace.ts` is the iteration's tool-dispatch
126
+ * wall-clock segment; `ts - LlmRequestTrace.ts` is the iteration's
127
+ * total wall-clock from request dispatch through tool-result append.
128
+ *
129
+ * Only the aggregate is captured. Per-tool wall-clock can be added
130
+ * later — the existing `tool_call`/`tool_result` events on the
131
+ * strategy event stream are the right place for that, not the
132
+ * trace.
133
+ */
134
+ export interface TurnDispatchCompleteTrace {
135
+ /** Same id as the matching `LlmRequestTrace.requestId`. */
136
+ requestId: string;
137
+ /** Mirror of `LlmRequestTrace.turnId`, carried for grouping
138
+ * consumers that key by turn rather than by iteration. */
139
+ turnId: string;
140
+ /** 0-indexed ReAct iteration within the turn. Mirrors
141
+ * `LlmRequestTrace.iteration`. */
142
+ iteration: number;
143
+ /** Wall-clock ms captured immediately after the last tool result
144
+ * for this iteration was appended to the messages array, before
145
+ * the loop steps to the next iteration. */
146
+ ts: number;
147
+ /** Number of tool calls executed in this iteration. Always >= 1
148
+ * in practice (an iteration with zero tool calls does not emit
149
+ * this event). */
150
+ toolCallCount: number;
151
+ }
105
152
  export type TraceEvent = {
106
153
  kind: 'llm_request';
107
154
  data: LlmRequestTrace;
108
155
  } | {
109
156
  kind: 'llm_response';
110
157
  data: LlmResponseTrace;
158
+ } | {
159
+ kind: 'turn_dispatch_complete';
160
+ data: TurnDispatchCompleteTrace;
111
161
  };
112
162
  /**
113
163
  * Pluggable trace sink. Hosts implement `emit()` to push events to a
@@ -1 +1 @@
1
- {"version":3,"file":"trace.d.ts","sourceRoot":"","sources":["../../src/types/trace.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAEH,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,WAAW,CAAC;AAEnD;;;;;GAKG;AACH,MAAM,WAAW,eAAe;IAC9B;;+CAE2C;IAC3C,SAAS,EAAE,MAAM,CAAC;IAClB,kEAAkE;IAClE,MAAM,EAAE,MAAM,CAAC;IACf,kDAAkD;IAClD,SAAS,EAAE,MAAM,CAAC;IAClB,qEAAqE;IACrE,EAAE,EAAE,MAAM,CAAC;IACX;8DAC0D;IAC1D,YAAY,EAAE,MAAM,CAAC;IACrB;;;mCAG+B;IAC/B,QAAQ,EAAE,iBAAiB,EAAE,CAAC;IAC9B;;;iDAG6C;IAC7C,KAAK,EAAE,mBAAmB,EAAE,CAAC;IAC7B;;;oBAGgB;IAChB,GAAG,EAAE;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,aAAa,EAAE,OAAO,CAAA;KAAE,CAAC;CAC7C;AAED;;;;;GAKG;AACH,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,OAAO,CAAC;CACrB;AAED;;;;;GAKG;AACH,MAAM,WAAW,gBAAgB;IAC/B,2DAA2D;IAC3D,SAAS,EAAE,MAAM,CAAC;IAClB,wDAAwD;IACxD,EAAE,EAAE,MAAM,CAAC;IACX,kDAAkD;IAClD,IAAI,EAAE,MAAM,CAAC;IACb;gCAC4B;IAC5B,QAAQ,EAAE,MAAM,CAAC;IACjB,yDAAyD;IACzD,SAAS,EAAE;QACT,EAAE,EAAE,MAAM,CAAC;QACX,IAAI,EAAE,MAAM,CAAC;QACb,IAAI,EAAE,OAAO,CAAC;QACd,SAAS,CAAC,EAAE,MAAM,CAAC;KACpB,EAAE,CAAC;IACJ;kDAC8C;IAC9C,KAAK,CAAC,EAAE;QAAE,YAAY,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAC;QAAC,YAAY,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;CAC/E;AAED,MAAM,MAAM,UAAU,GAClB;IAAE,IAAI,EAAE,aAAa,CAAC;IAAC,IAAI,EAAE,eAAe,CAAA;CAAE,GAC9C;IAAE,IAAI,EAAE,cAAc,CAAC;IAAC,IAAI,EAAE,gBAAgB,CAAA;CAAE,CAAC;AAErD;;;;;;;;;GASG;AACH,MAAM,WAAW,MAAM;IACrB,IAAI,CAAC,KAAK,EAAE,UAAU,GAAG,IAAI,CAAC;CAC/B"}
1
+ {"version":3,"file":"trace.d.ts","sourceRoot":"","sources":["../../src/types/trace.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAEH,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,WAAW,CAAC;AAEnD;;;;;GAKG;AACH,MAAM,WAAW,eAAe;IAC9B;;+CAE2C;IAC3C,SAAS,EAAE,MAAM,CAAC;IAClB,kEAAkE;IAClE,MAAM,EAAE,MAAM,CAAC;IACf,kDAAkD;IAClD,SAAS,EAAE,MAAM,CAAC;IAClB;;;;;0BAKsB;IACtB,EAAE,EAAE,MAAM,CAAC;IACX;8DAC0D;IAC1D,YAAY,EAAE,MAAM,CAAC;IACrB;;;mCAG+B;IAC/B,QAAQ,EAAE,iBAAiB,EAAE,CAAC;IAC9B;;;iDAG6C;IAC7C,KAAK,EAAE,mBAAmB,EAAE,CAAC;IAC7B;;;oBAGgB;IAChB,GAAG,EAAE;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,aAAa,EAAE,OAAO,CAAA;KAAE,CAAC;CAC7C;AAED;;;;;GAKG;AACH,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,OAAO,CAAC;CACrB;AAED;;;;;;GAMG;AACH,MAAM,WAAW,gBAAgB;IAC/B,2DAA2D;IAC3D,SAAS,EAAE,MAAM,CAAC;IAClB;;;;;sBAKkB;IAClB,EAAE,EAAE,MAAM,CAAC;IACX,kDAAkD;IAClD,IAAI,EAAE,MAAM,CAAC;IACb;gCAC4B;IAC5B,QAAQ,EAAE,MAAM,CAAC;IACjB,yDAAyD;IACzD,SAAS,EAAE;QACT,EAAE,EAAE,MAAM,CAAC;QACX,IAAI,EAAE,MAAM,CAAC;QACb,IAAI,EAAE,OAAO,CAAC;QACd,SAAS,CAAC,EAAE,MAAM,CAAC;KACpB,EAAE,CAAC;IACJ;kDAC8C;IAC9C,KAAK,CAAC,EAAE;QAAE,YAAY,EAAE,MAAM,CAAC;QAAC,YAAY,EAAE,MAAM,CAAC;QAAC,YAAY,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC;CAC/E;AAED;;;;;;;;;;;;;;;GAeG;AACH,MAAM,WAAW,yBAAyB;IACxC,2DAA2D;IAC3D,SAAS,EAAE,MAAM,CAAC;IAClB;+DAC2D;IAC3D,MAAM,EAAE,MAAM,CAAC;IACf;uCACmC;IACnC,SAAS,EAAE,MAAM,CAAC;IAClB;;gDAE4C;IAC5C,EAAE,EAAE,MAAM,CAAC;IACX;;uBAEmB;IACnB,aAAa,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,MAAM,UAAU,GAClB;IAAE,IAAI,EAAE,aAAa,CAAC;IAAC,IAAI,EAAE,eAAe,CAAA;CAAE,GAC9C;IAAE,IAAI,EAAE,cAAc,CAAC;IAAC,IAAI,EAAE,gBAAgB,CAAA;CAAE,GAChD;IAAE,IAAI,EAAE,wBAAwB,CAAC;IAAC,IAAI,EAAE,yBAAyB,CAAA;CAAE,CAAC;AAExE;;;;;;;;;GASG;AACH,MAAM,WAAW,MAAM;IACrB,IAAI,CAAC,KAAK,EAAE,UAAU,GAAG,IAAI,CAAC;CAC/B"}
@@ -16,9 +16,11 @@
16
16
  * one — incur zero cost.
17
17
  *
18
18
  * Phase 1 captures `llm_request` only (the priority the playground
19
- * needs to ship a prompt visualizer). `llm_response` shape is
20
- * defined here so consumers can write against the full union from
21
- * day one; emit-site wiring lands in Phase 2.
19
+ * needs to ship a prompt visualizer). `llm_response` and
20
+ * `turn_dispatch_complete` complete the per-iteration wall-clock
21
+ * triple request-dispatched, response-completed, tool-dispatch-
22
+ * completed — that the eval harness uses to split language-model
23
+ * time from tool-dispatch time.
22
24
  */
23
25
  export {};
24
26
  //# sourceMappingURL=trace.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"trace.js","sourceRoot":"","sources":["../../src/types/trace.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG"}
1
+ {"version":3,"file":"trace.js","sourceRoot":"","sources":["../../src/types/trace.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@inbrowser/agent",
3
- "version": "0.1.0",
3
+ "version": "0.2.0",
4
4
  "description": "Agent runtime + CLI. Library exports AgentSession, AgentStrategy, ToolRegistry, LlmClient, MetricsCollector, SandboxObserver. Ships an `agent` binary with NDJSON output, --json stdin, --dry-run, schema introspection, and inverse-mode MCP serve. Domain-agnostic — hosts wire their own AgentDefinitions.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",