@agentuity/runtime 0.0.69 → 0.0.70

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/AGENTS.md +88 -10
  2. package/README.md +0 -2
  3. package/dist/_config.d.ts +16 -0
  4. package/dist/_config.d.ts.map +1 -1
  5. package/dist/_config.js +16 -0
  6. package/dist/_config.js.map +1 -1
  7. package/dist/_context.d.ts +17 -15
  8. package/dist/_context.d.ts.map +1 -1
  9. package/dist/_context.js +17 -8
  10. package/dist/_context.js.map +1 -1
  11. package/dist/_server.d.ts.map +1 -1
  12. package/dist/_server.js +23 -14
  13. package/dist/_server.js.map +1 -1
  14. package/dist/_services.d.ts.map +1 -1
  15. package/dist/_services.js +2 -29
  16. package/dist/_services.js.map +1 -1
  17. package/dist/_validation.d.ts +3 -3
  18. package/dist/_validation.d.ts.map +1 -1
  19. package/dist/_validation.js.map +1 -1
  20. package/dist/agent.d.ts +289 -107
  21. package/dist/agent.d.ts.map +1 -1
  22. package/dist/agent.js +206 -149
  23. package/dist/agent.js.map +1 -1
  24. package/dist/app.d.ts +6 -18
  25. package/dist/app.d.ts.map +1 -1
  26. package/dist/app.js +1 -1
  27. package/dist/app.js.map +1 -1
  28. package/dist/eval.d.ts +4 -4
  29. package/dist/eval.d.ts.map +1 -1
  30. package/dist/index.d.ts +2 -1
  31. package/dist/index.d.ts.map +1 -1
  32. package/dist/index.js +1 -0
  33. package/dist/index.js.map +1 -1
  34. package/dist/otel/config.d.ts.map +1 -1
  35. package/dist/otel/config.js +5 -2
  36. package/dist/otel/config.js.map +1 -1
  37. package/dist/otel/console.d.ts +10 -6
  38. package/dist/otel/console.d.ts.map +1 -1
  39. package/dist/otel/console.js +31 -13
  40. package/dist/otel/console.js.map +1 -1
  41. package/dist/otel/logger.d.ts.map +1 -1
  42. package/dist/otel/logger.js +0 -19
  43. package/dist/otel/logger.js.map +1 -1
  44. package/dist/otel/otel.d.ts +2 -1
  45. package/dist/otel/otel.d.ts.map +1 -1
  46. package/dist/otel/otel.js +28 -15
  47. package/dist/otel/otel.js.map +1 -1
  48. package/dist/services/local/_db.d.ts.map +1 -1
  49. package/dist/services/local/_db.js +1 -22
  50. package/dist/services/local/_db.js.map +1 -1
  51. package/dist/services/local/_router.d.ts.map +1 -1
  52. package/dist/services/local/_router.js +0 -32
  53. package/dist/services/local/_router.js.map +1 -1
  54. package/dist/services/local/index.d.ts +0 -1
  55. package/dist/services/local/index.d.ts.map +1 -1
  56. package/dist/services/local/index.js +0 -1
  57. package/dist/services/local/index.js.map +1 -1
  58. package/dist/session.d.ts +2 -2
  59. package/dist/validator.d.ts +140 -0
  60. package/dist/validator.d.ts.map +1 -0
  61. package/dist/validator.js +146 -0
  62. package/dist/validator.js.map +1 -0
  63. package/dist/workbench.d.ts.map +1 -1
  64. package/dist/workbench.js +38 -29
  65. package/dist/workbench.js.map +1 -1
  66. package/package.json +6 -6
  67. package/src/_config.ts +19 -0
  68. package/src/_context.ts +25 -31
  69. package/src/_server.ts +26 -14
  70. package/src/_services.ts +0 -28
  71. package/src/_validation.ts +3 -7
  72. package/src/agent.ts +555 -348
  73. package/src/app.ts +5 -18
  74. package/src/eval.ts +6 -6
  75. package/src/index.ts +2 -1
  76. package/src/otel/config.ts +5 -2
  77. package/src/otel/console.ts +34 -20
  78. package/src/otel/logger.ts +0 -18
  79. package/src/otel/otel.ts +43 -29
  80. package/src/services/local/_db.ts +1 -27
  81. package/src/services/local/_router.ts +0 -46
  82. package/src/services/local/index.ts +0 -1
  83. package/src/session.ts +2 -2
  84. package/src/validator.ts +277 -0
  85. package/src/workbench.ts +38 -32
  86. package/dist/agent.validator.test.d.ts +0 -2
  87. package/dist/agent.validator.test.d.ts.map +0 -1
  88. package/dist/agent.validator.test.js +0 -508
  89. package/dist/agent.validator.test.js.map +0 -1
  90. package/dist/services/local/objectstore.d.ts +0 -19
  91. package/dist/services/local/objectstore.d.ts.map +0 -1
  92. package/dist/services/local/objectstore.js +0 -117
  93. package/dist/services/local/objectstore.js.map +0 -1
  94. package/src/agent.validator.test.ts +0 -587
  95. package/src/services/local/objectstore.ts +0 -177
package/src/agent.ts CHANGED
@@ -2,26 +2,24 @@
2
2
  import {
3
3
  StructuredError,
4
4
  type KeyValueStorage,
5
- type ObjectStorage,
6
5
  type StandardSchemaV1,
7
6
  type StreamStorage,
8
7
  type VectorStorage,
8
+ type InferOutput,
9
9
  toCamelCase,
10
10
  } from '@agentuity/core';
11
11
  import { context, SpanStatusCode, type Tracer, trace } from '@opentelemetry/api';
12
12
  import type { Context, MiddlewareHandler } from 'hono';
13
13
  import type { Handler } from 'hono/types';
14
14
  import { validator } from 'hono/validator';
15
- import { getAgentContext, runInAgentContext, type RequestAgentContextArgs } from './_context';
15
+ import { AGENT_RUNTIME, INTERNAL_AGENT, CURRENT_AGENT } from './_config';
16
+ import {
17
+ getAgentContext,
18
+ setupRequestAgentContext,
19
+ type RequestAgentContextArgs,
20
+ } from './_context';
16
21
  import type { Logger } from './logger';
17
- import type {
18
- Eval,
19
- EvalContext,
20
- EvalRunResult,
21
- EvalMetadata,
22
- EvalFunction,
23
- ExternalEvalMetadata,
24
- } from './eval';
22
+ import type { Eval, EvalContext, EvalRunResult, EvalFunction } from './eval';
25
23
  import { internal } from './logger/internal';
26
24
  import { getApp } from './app';
27
25
  import type { Thread, Session } from './session';
@@ -53,24 +51,36 @@ export type AgentEventCallback<TAgent extends Agent<any, any, any>> =
53
51
  data: Error
54
52
  ) => Promise<void> | void);
55
53
 
54
+ /**
55
+ * Runtime state container for agents and event listeners.
56
+ * Isolates global state into context for better testing.
57
+ */
58
+ export interface AgentRuntimeState {
59
+ agents: Map<string, Agent<any, any, any, any, any>>;
60
+ agentConfigs: Map<string, unknown>;
61
+ agentEventListeners: WeakMap<
62
+ Agent<any, any, any, any, any>,
63
+ Map<AgentEventName, Set<AgentEventCallback<any>>>
64
+ >;
65
+ }
66
+
56
67
  /**
57
68
  * Context object passed to every agent handler providing access to runtime services and state.
58
69
  *
59
70
  * @template TAgentRegistry - Registry of all available agents (auto-generated, strongly-typed)
60
- * @template TCurrent - Current agent runner type
61
- * @template TParent - Parent agent runner type (if called from another agent)
62
71
  * @template TConfig - Agent-specific configuration type from setup function
63
72
  * @template TAppState - Application-wide state type from createApp
64
73
  *
65
74
  * @example
66
75
  * ```typescript
67
- * const agent = createAgent({
76
+ * const agent = createAgent('my-agent', {
68
77
  * handler: async (ctx, input) => {
69
78
  * // Logging
70
79
  * ctx.logger.info('Processing request', { input });
71
80
  *
72
- * // Call another agent
73
- * const result = await ctx.agent.otherAgent.run({ data: input });
81
+ * // Call another agent (import it directly)
82
+ * import otherAgent from './other-agent';
83
+ * const result = await otherAgent.run({ data: input });
74
84
  *
75
85
  * // Store data
76
86
  * await ctx.kv.set('key', { value: result });
@@ -89,12 +99,17 @@ export type AgentEventCallback<TAgent extends Agent<any, any, any>> =
89
99
  * ```
90
100
  */
91
101
  export interface AgentContext<
92
- TAgentRegistry extends AgentRegistry = AgentRegistry,
93
- TCurrent extends AgentRunner<any, any, any> | undefined = AgentRunner<any, any, any> | undefined,
94
- TParent extends AgentRunner<any, any, any> | undefined = AgentRunner<any, any, any> | undefined,
102
+ _TAgentRegistry extends AgentRegistry = AgentRegistry,
95
103
  TConfig = unknown,
96
104
  TAppState = Record<string, never>,
97
105
  > {
106
+ /**
107
+ * Internal runtime state (agents, configs, event listeners).
108
+ * Stored with Symbol key to prevent accidental access.
109
+ * Use getAgentRuntime(ctx) to access.
110
+ * @internal
111
+ */
112
+ [AGENT_RUNTIME]: AgentRuntimeState;
98
113
  /**
99
114
  * Schedule a background task that continues after the response is sent.
100
115
  * Useful for cleanup, logging, or async operations that don't block the response.
@@ -111,34 +126,6 @@ export interface AgentContext<
111
126
  */
112
127
  waitUntil: (promise: Promise<void> | (() => void | Promise<void>)) => void;
113
128
 
114
- /**
115
- * Registry of all agents in the application. Strongly-typed and auto-generated.
116
- * Use to call other agents from within your handler.
117
- *
118
- * @example
119
- * ```typescript
120
- * const emailResult = await ctx.agent.email.run({ to: 'user@example.com' });
121
- * const smsResult = await ctx.agent.sms.run({ phone: '+1234567890' });
122
- * ```
123
- */
124
- agent: TAgentRegistry;
125
-
126
- /**
127
- * Information about the currently executing agent.
128
- */
129
- current: TCurrent;
130
-
131
- /**
132
- * Information about the parent agent (if this agent was called by another agent).
133
- * Use ctx.agent.parentName for strongly-typed access.
134
- */
135
- parent: TParent;
136
-
137
- /**
138
- * Name of the current agent being executed.
139
- */
140
- agentName: AgentName;
141
-
142
129
  /**
143
130
  * Structured logger with OpenTelemetry integration.
144
131
  * Logs are automatically correlated with traces.
@@ -187,19 +174,6 @@ export interface AgentContext<
187
174
  */
188
175
  kv: KeyValueStorage;
189
176
 
190
- /**
191
- * Object storage for files and blobs (S3-compatible).
192
- *
193
- * @example
194
- * ```typescript
195
- * await ctx.objectstore.put('images/photo.jpg', buffer);
196
- * const file = await ctx.objectstore.get('images/photo.jpg');
197
- * await ctx.objectstore.delete('images/photo.jpg');
198
- * const objects = await ctx.objectstore.list('images/');
199
- * ```
200
- */
201
- objectstore: ObjectStorage;
202
-
203
177
  /**
204
178
  * Stream storage for real-time data streams and logs.
205
179
  *
@@ -286,13 +260,13 @@ export interface AgentContext<
286
260
 
287
261
  type InternalAgentMetadata = {
288
262
  /**
289
- * the unique identifier for this project, agent and deployment.
263
+ * the unique name for the agent (user-provided).
290
264
  */
291
- id: string;
265
+ name: string;
292
266
  /**
293
- * the unique identifier for this project and agent across multiple deployments.
267
+ * the unique identifier for this project, agent and deployment.
294
268
  */
295
- identifier: string;
269
+ id: string;
296
270
  /**
297
271
  * the unique identifier for this agent across multiple deployments
298
272
  */
@@ -318,10 +292,6 @@ type InternalAgentMetadata = {
318
292
  };
319
293
 
320
294
  type ExternalAgentMetadata = {
321
- /**
322
- * the human readable name for the agent.
323
- */
324
- name: string;
325
295
  /**
326
296
  * the human readable description for the agent
327
297
  */
@@ -333,25 +303,22 @@ type AgentMetadata = InternalAgentMetadata & ExternalAgentMetadata;
333
303
  /**
334
304
  * Configuration object for creating an agent evaluation function.
335
305
  *
336
- * @template TInput - Input schema type from the parent agent
337
- * @template TOutput - Output schema type from the parent agent
306
+ * @template TInput - Input schema type from the agent
307
+ * @template TOutput - Output schema type from the agent
338
308
  */
339
309
  export interface CreateEvalConfig<
340
310
  TInput extends StandardSchemaV1 | undefined = any,
341
311
  TOutput extends StandardSchemaV1 | undefined = any,
342
312
  > {
343
313
  /**
344
- * Optional metadata for the evaluation function.
314
+ * Optional description of what this evaluation does.
345
315
  *
346
316
  * @example
347
317
  * ```typescript
348
- * metadata: {
349
- * name: 'Validate positive output',
350
- * description: 'Ensures output is greater than zero'
351
- * }
318
+ * description: 'Ensures output is greater than zero'
352
319
  * ```
353
320
  */
354
- metadata?: Partial<ExternalEvalMetadata>;
321
+ description?: string;
355
322
 
356
323
  /**
357
324
  * Evaluation handler function that tests the agent's behavior.
@@ -386,8 +353,8 @@ export interface CreateEvalConfig<
386
353
  * ```
387
354
  */
388
355
  handler: EvalFunction<
389
- TInput extends StandardSchemaV1 ? StandardSchemaV1.InferOutput<TInput> : undefined,
390
- TOutput extends StandardSchemaV1 ? StandardSchemaV1.InferOutput<TOutput> : undefined
356
+ TInput extends StandardSchemaV1 ? InferOutput<TInput> : undefined,
357
+ TOutput extends StandardSchemaV1 ? InferOutput<TOutput> : undefined
391
358
  >;
392
359
  }
393
360
 
@@ -395,7 +362,7 @@ export interface CreateEvalConfig<
395
362
  type CreateEvalMethod<
396
363
  TInput extends StandardSchemaV1 | undefined = any,
397
364
  TOutput extends StandardSchemaV1 | undefined = any,
398
- > = (config: CreateEvalConfig<TInput, TOutput>) => Eval<TInput, TOutput>;
365
+ > = (name: string, config: CreateEvalConfig<TInput, TOutput>) => Eval<TInput, TOutput>;
399
366
 
400
367
  /**
401
368
  * Validator function type with method overloads for different validation scenarios.
@@ -452,7 +419,7 @@ export interface AgentValidator<
452
419
  {
453
420
  // eslint-disable-next-line @typescript-eslint/no-empty-object-type
454
421
  in: {};
455
- out: { json: StandardSchemaV1.InferOutput<TInput> };
422
+ out: { json: InferOutput<TInput> };
456
423
  }
457
424
  >
458
425
  : Handler<any, any, any>;
@@ -485,7 +452,7 @@ export interface AgentValidator<
485
452
  {
486
453
  // eslint-disable-next-line @typescript-eslint/no-empty-object-type
487
454
  in: {};
488
- out: { json: StandardSchemaV1.InferOutput<TOverrideOutput> };
455
+ out: { json: InferOutput<TOverrideOutput> };
489
456
  }
490
457
  >;
491
458
 
@@ -532,7 +499,7 @@ export interface AgentValidator<
532
499
  // eslint-disable-next-line @typescript-eslint/no-empty-object-type
533
500
  in: {};
534
501
  out: {
535
- json: StandardSchemaV1.InferOutput<TOverrideInput>;
502
+ json: InferOutput<TOverrideInput>;
536
503
  };
537
504
  }
538
505
  >;
@@ -565,7 +532,8 @@ export interface AgentValidator<
565
532
  * });
566
533
  *
567
534
  * // Create evals for testing
568
- * const eval1 = agent.createEval({
535
+ * const eval1 = agent.createEval('check-positive', {
536
+ * description: 'Ensures result is greater than 5',
569
537
  * handler: async (run, result) => {
570
538
  * return result > 5; // Assert output is greater than 5
571
539
  * }
@@ -586,12 +554,9 @@ export type Agent<
586
554
 
587
555
  /**
588
556
  * The main handler function that processes agent requests.
589
- * Receives AgentContext and validated input, returns output or stream.
557
+ * Gets AgentContext from AsyncLocalStorage, receives validated input, returns output or stream.
590
558
  */
591
- handler: (
592
- ctx: AgentContext<any, any, any, TConfig, TAppState>,
593
- ...args: any[]
594
- ) => any | Promise<any>;
559
+ handler: (...args: any[]) => any | Promise<any>;
595
560
 
596
561
  /**
597
562
  * Creates a type-safe validation middleware for routes using StandardSchema validation.
@@ -685,8 +650,8 @@ export type Agent<
685
650
  * });
686
651
  *
687
652
  * // Create eval to validate output
688
- * agent.createEval({
689
- * metadata: { name: 'Check positive output' },
653
+ * agent.createEval('check-positive', {
654
+ * description: 'Ensures output is a positive number',
690
655
  * handler: async (run, result) => {
691
656
  * return result > 0; // Assert output is positive
692
657
  * }
@@ -725,7 +690,7 @@ export type Agent<
725
690
  callback: (
726
691
  eventName: 'started',
727
692
  agent: Agent<TInput, TOutput, TStream, TConfig, TAppState>,
728
- context: AgentContext<any, any, any, TConfig, TAppState>
693
+ context: AgentContext<any, TConfig, TAppState>
729
694
  ) => Promise<void> | void
730
695
  ): void;
731
696
 
@@ -747,7 +712,7 @@ export type Agent<
747
712
  callback: (
748
713
  eventName: 'completed',
749
714
  agent: Agent<TInput, TOutput, TStream, TConfig, TAppState>,
750
- context: AgentContext<any, any, any, TConfig, TAppState>
715
+ context: AgentContext<any, TConfig, TAppState>
751
716
  ) => Promise<void> | void
752
717
  ): void;
753
718
 
@@ -769,7 +734,7 @@ export type Agent<
769
734
  callback: (
770
735
  eventName: 'errored',
771
736
  agent: Agent<TInput, TOutput, TStream, TConfig, TAppState>,
772
- context: AgentContext<any, any, any, TConfig, TAppState>,
737
+ context: AgentContext<any, TConfig, TAppState>,
773
738
  data: Error
774
739
  ) => Promise<void> | void
775
740
  ): void;
@@ -785,7 +750,7 @@ export type Agent<
785
750
  callback: (
786
751
  eventName: 'started',
787
752
  agent: Agent<TInput, TOutput, TStream, TConfig, TAppState>,
788
- context: AgentContext<any, any, any, TConfig, TAppState>
753
+ context: AgentContext<any, TConfig, TAppState>
789
754
  ) => Promise<void> | void
790
755
  ): void;
791
756
 
@@ -800,7 +765,7 @@ export type Agent<
800
765
  callback: (
801
766
  eventName: 'completed',
802
767
  agent: Agent<TInput, TOutput, TStream, TConfig, TAppState>,
803
- context: AgentContext<any, any, any, TConfig, TAppState>
768
+ context: AgentContext<any, TConfig, TAppState>
804
769
  ) => Promise<void> | void
805
770
  ): void;
806
771
 
@@ -815,7 +780,7 @@ export type Agent<
815
780
  callback: (
816
781
  eventName: 'errored',
817
782
  agent: Agent<TInput, TOutput, TStream, TConfig, TAppState>,
818
- context: AgentContext<any, any, any, TConfig, TAppState>,
783
+ context: AgentContext<any, TConfig, TAppState>,
819
784
  data: Error
820
785
  ) => Promise<void> | void
821
786
  ): void;
@@ -823,14 +788,14 @@ export type Agent<
823
788
  (TOutput extends StandardSchemaV1 ? { outputSchema: TOutput } : { outputSchema?: never }) &
824
789
  (TStream extends true ? { stream: true } : { stream?: false });
825
790
 
826
- type InferSchemaInput<T> = T extends StandardSchemaV1 ? StandardSchemaV1.InferOutput<T> : never;
791
+ type InferSchemaInput<T> = T extends StandardSchemaV1 ? InferOutput<T> : never;
827
792
 
828
793
  type InferStreamOutput<TOutput, TStream extends boolean> = TStream extends true
829
794
  ? TOutput extends StandardSchemaV1
830
- ? ReadableStream<StandardSchemaV1.InferOutput<TOutput>>
795
+ ? ReadableStream<InferOutput<TOutput>>
831
796
  : ReadableStream<unknown>
832
797
  : TOutput extends StandardSchemaV1
833
- ? StandardSchemaV1.InferOutput<TOutput>
798
+ ? InferOutput<TOutput>
834
799
  : void;
835
800
 
836
801
  type SchemaInput<TSchema> = TSchema extends { input: infer I } ? I : undefined;
@@ -844,10 +809,10 @@ type SchemaStream<TSchema> = TSchema extends { stream: infer S }
844
809
  type SchemaHandlerReturn<TSchema> =
845
810
  SchemaStream<TSchema> extends true
846
811
  ? SchemaOutput<TSchema> extends StandardSchemaV1
847
- ? ReadableStream<StandardSchemaV1.InferOutput<SchemaOutput<TSchema>>>
812
+ ? ReadableStream<InferOutput<SchemaOutput<TSchema>>>
848
813
  : ReadableStream<unknown>
849
814
  : SchemaOutput<TSchema> extends StandardSchemaV1
850
- ? StandardSchemaV1.InferOutput<SchemaOutput<TSchema>>
815
+ ? InferOutput<SchemaOutput<TSchema>>
851
816
  : void;
852
817
 
853
818
  // Handler signature based on schema + setup result (no self-reference)
@@ -855,21 +820,37 @@ type AgentHandlerFromConfig<TSchema, TSetupReturn, TAppState = AppState> =
855
820
  SchemaInput<TSchema> extends infer I
856
821
  ? I extends StandardSchemaV1
857
822
  ? (
858
- ctx: AgentContext<any, any, any, TSetupReturn, TAppState>,
859
- input: StandardSchemaV1.InferOutput<I>
823
+ ctx: AgentContext<any, TSetupReturn, TAppState>,
824
+ input: InferOutput<I>
860
825
  ) => Promise<SchemaHandlerReturn<TSchema>> | SchemaHandlerReturn<TSchema>
861
826
  : (
862
- ctx: AgentContext<any, any, any, TSetupReturn, TAppState>
827
+ ctx: AgentContext<any, TSetupReturn, TAppState>
863
828
  ) => Promise<SchemaHandlerReturn<TSchema>> | SchemaHandlerReturn<TSchema>
864
829
  : (
865
- ctx: AgentContext<any, any, any, TSetupReturn, TAppState>
830
+ ctx: AgentContext<any, TSetupReturn, TAppState>
866
831
  ) => Promise<SchemaHandlerReturn<TSchema>> | SchemaHandlerReturn<TSchema>;
867
832
 
868
833
  /**
869
834
  * Configuration object for creating an agent with automatic type inference.
870
835
  *
836
+ * Passed as the second parameter to createAgent(name, config).
837
+ *
871
838
  * @template TSchema - Schema definition object containing optional input, output, and stream properties
872
839
  * @template TConfig - Function type that returns agent-specific configuration from setup
840
+ *
841
+ * @example
842
+ * ```typescript
843
+ * const agent = createAgent('greeting', {
844
+ * description: 'Generates personalized greetings',
845
+ * schema: {
846
+ * input: z.object({ name: z.string(), age: z.number() }),
847
+ * output: z.string()
848
+ * },
849
+ * handler: async (ctx, { name, age }) => {
850
+ * return `Hello, ${name}! You are ${age} years old.`;
851
+ * }
852
+ * });
853
+ * ```
873
854
  */
874
855
  export interface CreateAgentConfig<
875
856
  TSchema extends
@@ -896,17 +877,22 @@ export interface CreateAgentConfig<
896
877
  schema?: TSchema;
897
878
 
898
879
  /**
899
- * Agent metadata visible in the Agentuity platform.
880
+ * Optional description of what this agent does, visible in the Agentuity platform.
900
881
  *
901
882
  * @example
902
883
  * ```typescript
903
- * metadata: {
904
- * name: 'Greeting Agent',
905
- * description: 'Returns personalized greetings'
906
- * }
884
+ * description: 'Returns personalized greetings'
907
885
  * ```
908
886
  */
909
- metadata: ExternalAgentMetadata;
887
+ description?: string;
888
+
889
+ /**
890
+ * Optional metadata object (typically injected by build plugin during compilation).
891
+ * Contains agent identification and versioning information.
892
+ *
893
+ * @internal - Usually populated by build tooling, not manually set
894
+ */
895
+ metadata?: Partial<AgentMetadata>;
910
896
 
911
897
  /**
912
898
  * Optional async function called once on app startup to initialize agent-specific resources.
@@ -970,17 +956,187 @@ export interface CreateAgentConfig<
970
956
  ) => Promise<void> | void;
971
957
  }
972
958
 
959
+ /**
960
+ * The public interface returned by createAgent().
961
+ * Provides methods to run the agent, create evaluations, and manage event listeners.
962
+ *
963
+ * @template TInput - Input schema type (StandardSchemaV1 or undefined if no input)
964
+ * @template TOutput - Output schema type (StandardSchemaV1 or undefined if no output)
965
+ * @template TStream - Whether the agent returns a stream (true/false)
966
+ *
967
+ * @example
968
+ * ```typescript
969
+ * const agent = createAgent('greeting', {
970
+ * schema: {
971
+ * input: z.object({ name: z.string() }),
972
+ * output: z.string()
973
+ * },
974
+ * handler: async (ctx, { name }) => `Hello, ${name}!`
975
+ * });
976
+ *
977
+ * // Run the agent
978
+ * const result = await agent.run({ name: 'Alice' });
979
+ *
980
+ * // Create evaluation
981
+ * const evalDef = agent.createEval('greeting-accuracy', {
982
+ * description: 'Checks if greeting includes the user name',
983
+ * handler: async (ctx, input, output) => {
984
+ * return { score: output.includes(input.name) ? 1 : 0 };
985
+ * }
986
+ * });
987
+ *
988
+ * // Listen to events
989
+ * agent.addEventListener('completed', async (eventName, agent, context) => {
990
+ * console.log('Agent completed successfully');
991
+ * });
992
+ * ```
993
+ */
973
994
  export interface AgentRunner<
974
995
  TInput extends StandardSchemaV1 | undefined = any,
975
996
  TOutput extends StandardSchemaV1 | undefined = any,
976
997
  TStream extends boolean = false,
977
998
  > {
999
+ /** Agent metadata (id, name, description, etc.) */
978
1000
  metadata: AgentMetadata;
1001
+
1002
+ /**
1003
+ * Execute the agent with validated input.
1004
+ * If agent has no input schema, call with no arguments.
1005
+ * If agent has input schema, pass validated input object.
1006
+ *
1007
+ * @example
1008
+ * ```typescript
1009
+ * // Agent with input
1010
+ * const result = await agent.run({ name: 'Alice' });
1011
+ *
1012
+ * // Agent without input
1013
+ * const result = await agent.run();
1014
+ * ```
1015
+ */
979
1016
  run: undefined extends TInput
980
1017
  ? () => Promise<InferStreamOutput<Exclude<TOutput, undefined>, TStream>>
981
1018
  : (
982
1019
  input: InferSchemaInput<Exclude<TInput, undefined>>
983
1020
  ) => Promise<InferStreamOutput<Exclude<TOutput, undefined>, TStream>>;
1021
+
1022
+ /**
1023
+ * Create Hono validator middleware for this agent.
1024
+ * Automatically validates request input against the agent's schema.
1025
+ *
1026
+ * @example
1027
+ * ```typescript
1028
+ * import myAgent from './my-agent';
1029
+ * router.post('/', myAgent.validator(), async (c) => {
1030
+ * const data = c.req.valid('json'); // Fully typed!
1031
+ * return c.json(await myAgent.run(data));
1032
+ * });
1033
+ * ```
1034
+ */
1035
+ validator: AgentValidator<TInput, TOutput>;
1036
+
1037
+ /** Input schema (if defined) */
1038
+ inputSchema?: TInput;
1039
+
1040
+ /** Output schema (if defined) */
1041
+ outputSchema?: TOutput;
1042
+
1043
+ /** Whether agent returns a stream */
1044
+ stream?: TStream;
1045
+
1046
+ /**
1047
+ * Create an evaluation for this agent.
1048
+ * Evaluations run automatically after the agent completes.
1049
+ *
1050
+ * @example
1051
+ * ```typescript
1052
+ * const accuracyEval = agent.createEval('accuracy', {
1053
+ * description: 'Validates output length is non-zero',
1054
+ * handler: async (ctx, input, output) => ({
1055
+ * score: output.length > 0 ? 1 : 0,
1056
+ * metadata: { outputLength: output.length }
1057
+ * })
1058
+ * });
1059
+ * ```
1060
+ */
1061
+ createEval: CreateEvalMethod<TInput, TOutput>;
1062
+
1063
+ /**
1064
+ * Add event listener for 'started' or 'completed' events.
1065
+ * Listeners fire sequentially in the order they were added.
1066
+ *
1067
+ * @param eventName - 'started' or 'completed'
1068
+ * @param callback - Function to call when event fires
1069
+ *
1070
+ * @example
1071
+ * ```typescript
1072
+ * agent.addEventListener('started', async (eventName, agent, context) => {
1073
+ * context.logger.info('Agent execution started');
1074
+ * });
1075
+ * ```
1076
+ */
1077
+ addEventListener(
1078
+ eventName: 'started' | 'completed',
1079
+ callback: (
1080
+ eventName: 'started' | 'completed',
1081
+ agent: Agent<TInput, TOutput, TStream, any, any>,
1082
+ context: AgentContext<any, any, any>
1083
+ ) => Promise<void> | void
1084
+ ): void;
1085
+
1086
+ /**
1087
+ * Add event listener for 'errored' event.
1088
+ * Fires when agent handler throws an error.
1089
+ *
1090
+ * @param eventName - 'errored'
1091
+ * @param callback - Function to call when error occurs
1092
+ *
1093
+ * @example
1094
+ * ```typescript
1095
+ * agent.addEventListener('errored', async (eventName, agent, context, error) => {
1096
+ * context.logger.error('Agent failed', { error: error.message });
1097
+ * });
1098
+ * ```
1099
+ */
1100
+ addEventListener(
1101
+ eventName: 'errored',
1102
+ callback: (
1103
+ eventName: 'errored',
1104
+ agent: Agent<TInput, TOutput, TStream, any, any>,
1105
+ context: AgentContext<any, any, any>,
1106
+ error: Error
1107
+ ) => Promise<void> | void
1108
+ ): void;
1109
+
1110
+ /**
1111
+ * Remove event listener for 'started' or 'completed' events.
1112
+ *
1113
+ * @param eventName - 'started' or 'completed'
1114
+ * @param callback - The same callback function that was added
1115
+ */
1116
+ removeEventListener(
1117
+ eventName: 'started' | 'completed',
1118
+ callback: (
1119
+ eventName: 'started' | 'completed',
1120
+ agent: Agent<TInput, TOutput, TStream, any, any>,
1121
+ context: AgentContext<any, any, any>
1122
+ ) => Promise<void> | void
1123
+ ): void;
1124
+
1125
+ /**
1126
+ * Remove event listener for 'errored' event.
1127
+ *
1128
+ * @param eventName - 'errored'
1129
+ * @param callback - The same callback function that was added
1130
+ */
1131
+ removeEventListener(
1132
+ eventName: 'errored',
1133
+ callback: (
1134
+ eventName: 'errored',
1135
+ agent: Agent<TInput, TOutput, TStream, any, any>,
1136
+ context: AgentContext<any, any, any>,
1137
+ error: Error
1138
+ ) => Promise<void> | void
1139
+ ): void;
984
1140
  }
985
1141
 
986
1142
  // Will be populated at runtime with strongly typed agents
@@ -995,26 +1151,49 @@ const agentEventListeners = new WeakMap<
995
1151
  // Map to store agent configs returned from setup (keyed by agent name)
996
1152
  const agentConfigs = new Map<string, unknown>();
997
1153
 
1154
+ /**
1155
+ * Get the global runtime state (for production use).
1156
+ * In tests, use TestAgentContext which has isolated runtime state.
1157
+ */
1158
+ export function getGlobalRuntimeState(): AgentRuntimeState {
1159
+ return {
1160
+ agents,
1161
+ agentConfigs,
1162
+ agentEventListeners,
1163
+ };
1164
+ }
1165
+
1166
+ /**
1167
+ * Get the runtime state from an AgentContext.
1168
+ * @internal
1169
+ */
1170
+ export function getAgentRuntime(ctx: AgentContext<any, any, any>): AgentRuntimeState {
1171
+ return ctx[AGENT_RUNTIME];
1172
+ }
1173
+
998
1174
  // Helper to fire event listeners sequentially, abort on first error
999
1175
  async function fireAgentEvent(
1176
+ runtime: AgentRuntimeState,
1000
1177
  agent: Agent<any, any, any, any, any>,
1001
1178
  eventName: 'started' | 'completed',
1002
- context: AgentContext<any, any, any, any, any>
1179
+ context: AgentContext<any, any, any>
1003
1180
  ): Promise<void>;
1004
1181
  async function fireAgentEvent(
1182
+ runtime: AgentRuntimeState,
1005
1183
  agent: Agent<any, any, any, any, any>,
1006
1184
  eventName: 'errored',
1007
- context: AgentContext<any, any, any, any, any>,
1185
+ context: AgentContext<any, any, any>,
1008
1186
  data: Error
1009
1187
  ): Promise<void>;
1010
1188
  async function fireAgentEvent(
1189
+ runtime: AgentRuntimeState,
1011
1190
  agent: Agent<any, any, any, any, any>,
1012
1191
  eventName: AgentEventName,
1013
- context: AgentContext<any, any, any, any, any>,
1192
+ context: AgentContext<any, any, any>,
1014
1193
  data?: Error
1015
1194
  ): Promise<void> {
1016
1195
  // Fire agent-level listeners
1017
- const listeners = agentEventListeners.get(agent);
1196
+ const listeners = runtime.agentEventListeners.get(agent);
1018
1197
  if (listeners) {
1019
1198
  const callbacks = listeners.get(eventName);
1020
1199
  if (callbacks && callbacks.size > 0) {
@@ -1109,17 +1288,22 @@ export interface CreateAgentConfigExplicit<
1109
1288
  };
1110
1289
 
1111
1290
  /**
1112
- * Agent metadata.
1291
+ * Optional description of what this agent does.
1113
1292
  *
1114
1293
  * @example
1115
1294
  * ```typescript
1116
- * metadata: {
1117
- * name: 'My Agent',
1118
- * description: 'Does something useful'
1119
- * }
1295
+ * description: 'Does something useful'
1120
1296
  * ```
1121
1297
  */
1122
- metadata: ExternalAgentMetadata;
1298
+ description?: string;
1299
+
1300
+ /**
1301
+ * Optional metadata object (typically injected by build plugin during compilation).
1302
+ * Contains agent identification and versioning information.
1303
+ *
1304
+ * @internal - Usually populated by build tooling, not manually set
1305
+ */
1306
+ metadata?: Partial<AgentMetadata>;
1123
1307
 
1124
1308
  /**
1125
1309
  * Optional setup function receiving app state, returns agent config.
@@ -1169,43 +1353,39 @@ export interface CreateAgentConfigExplicit<
1169
1353
  ? TStream extends true
1170
1354
  ? TOutput extends StandardSchemaV1
1171
1355
  ? (
1172
- c: AgentContext<any, any, any, TConfig, TAppState>,
1173
- input: StandardSchemaV1.InferOutput<TInput>
1356
+ c: AgentContext<any, TConfig, TAppState>,
1357
+ input: InferOutput<TInput>
1174
1358
  ) =>
1175
- | Promise<ReadableStream<StandardSchemaV1.InferOutput<TOutput>>>
1176
- | ReadableStream<StandardSchemaV1.InferOutput<TOutput>>
1359
+ | Promise<ReadableStream<InferOutput<TOutput>>>
1360
+ | ReadableStream<InferOutput<TOutput>>
1177
1361
  : (
1178
- c: AgentContext<any, any, any, TConfig, TAppState>,
1179
- input: StandardSchemaV1.InferOutput<TInput>
1362
+ c: AgentContext<any, TConfig, TAppState>,
1363
+ input: InferOutput<TInput>
1180
1364
  ) => Promise<ReadableStream<unknown>> | ReadableStream<unknown>
1181
1365
  : TOutput extends StandardSchemaV1
1182
1366
  ? (
1183
- c: AgentContext<any, any, any, TConfig, TAppState>,
1184
- input: StandardSchemaV1.InferOutput<TInput>
1185
- ) =>
1186
- | Promise<StandardSchemaV1.InferOutput<TOutput>>
1187
- | StandardSchemaV1.InferOutput<TOutput>
1367
+ c: AgentContext<any, TConfig, TAppState>,
1368
+ input: InferOutput<TInput>
1369
+ ) => Promise<InferOutput<TOutput>> | InferOutput<TOutput>
1188
1370
  : (
1189
- c: AgentContext<any, any, any, TConfig, TAppState>,
1190
- input: StandardSchemaV1.InferOutput<TInput>
1371
+ c: AgentContext<any, TConfig, TAppState>,
1372
+ input: InferOutput<TInput>
1191
1373
  ) => Promise<void> | void
1192
1374
  : TStream extends true
1193
1375
  ? TOutput extends StandardSchemaV1
1194
1376
  ? (
1195
- c: AgentContext<any, any, any, TConfig, TAppState>
1377
+ c: AgentContext<any, TConfig, TAppState>
1196
1378
  ) =>
1197
- | Promise<ReadableStream<StandardSchemaV1.InferOutput<TOutput>>>
1198
- | ReadableStream<StandardSchemaV1.InferOutput<TOutput>>
1379
+ | Promise<ReadableStream<InferOutput<TOutput>>>
1380
+ | ReadableStream<InferOutput<TOutput>>
1199
1381
  : (
1200
- c: AgentContext<any, any, any, TConfig, TAppState>
1382
+ c: AgentContext<any, TConfig, TAppState>
1201
1383
  ) => Promise<ReadableStream<unknown>> | ReadableStream<unknown>
1202
1384
  : TOutput extends StandardSchemaV1
1203
1385
  ? (
1204
- c: AgentContext<any, any, any, TConfig, TAppState>
1205
- ) =>
1206
- | Promise<StandardSchemaV1.InferOutput<TOutput>>
1207
- | StandardSchemaV1.InferOutput<TOutput>
1208
- : (c: AgentContext<any, any, any, TConfig, TAppState>) => Promise<void> | void;
1386
+ c: AgentContext<any, TConfig, TAppState>
1387
+ ) => Promise<InferOutput<TOutput>> | InferOutput<TOutput>
1388
+ : (c: AgentContext<any, TConfig, TAppState>) => Promise<void> | void;
1209
1389
  }
1210
1390
 
1211
1391
  /**
@@ -1216,17 +1396,15 @@ export interface CreateAgentConfigExplicit<
1216
1396
  * @template TSchema - Schema definition object containing optional input, output, and stream properties
1217
1397
  * @template TConfig - Function type that returns agent-specific configuration from setup
1218
1398
  *
1399
+ * @param name - Unique agent name (must be unique within the project)
1219
1400
  * @param config - Agent configuration object
1220
1401
  *
1221
- * @returns Agent instance that can be registered with the runtime
1402
+ * @returns AgentRunner with a run method for executing the agent
1222
1403
  *
1223
1404
  * @example
1224
1405
  * ```typescript
1225
- * const agent = createAgent({
1226
- * metadata: {
1227
- * name: 'Greeting Agent',
1228
- * description: 'Returns personalized greetings'
1229
- * },
1406
+ * const agent = createAgent('greeting-agent', {
1407
+ * description: 'Returns personalized greetings',
1230
1408
  * schema: {
1231
1409
  * input: z.object({ name: z.string(), age: z.number() }),
1232
1410
  * output: z.string()
@@ -1236,6 +1414,9 @@ export interface CreateAgentConfigExplicit<
1236
1414
  * return `Hello, ${name}! You are ${age} years old.`;
1237
1415
  * }
1238
1416
  * });
1417
+ *
1418
+ * // Call the agent directly
1419
+ * const result = await agent.run({ name: 'Alice', age: 30 });
1239
1420
  * ```
1240
1421
  */
1241
1422
  export function createAgent<
@@ -1248,14 +1429,9 @@ export function createAgent<
1248
1429
  | undefined = undefined,
1249
1430
  TConfig extends (app: AppState) => any = any,
1250
1431
  >(
1432
+ name: string,
1251
1433
  config: CreateAgentConfig<TSchema, TConfig>
1252
- ): Agent<
1253
- SchemaInput<TSchema>,
1254
- SchemaOutput<TSchema>,
1255
- SchemaStream<TSchema>,
1256
- TConfig extends (app: AppState) => infer R ? Awaited<R> : undefined,
1257
- AppState
1258
- >;
1434
+ ): AgentRunner<SchemaInput<TSchema>, SchemaOutput<TSchema>, SchemaStream<TSchema>>;
1259
1435
 
1260
1436
  /**
1261
1437
  * Creates an agent with explicit generic type parameters.
@@ -1268,9 +1444,10 @@ export function createAgent<
1268
1444
  * @template TConfig - Type returned by setup function
1269
1445
  * @template TAppState - Custom app state type from createApp
1270
1446
  *
1447
+ * @param name - Unique agent name (must be unique within the project)
1271
1448
  * @param config - Agent configuration object
1272
1449
  *
1273
- * @returns Agent instance with explicit types
1450
+ * @returns AgentRunner with explicit types and a run method
1274
1451
  *
1275
1452
  * @example
1276
1453
  * ```typescript
@@ -1280,11 +1457,8 @@ export function createAgent<
1280
1457
  * const agent = createAgent<
1281
1458
  * z.ZodObject<any>, // TInput
1282
1459
  * z.ZodString, // TOutput
1283
- * false, // TStream
1284
- * MyConfig, // TConfig
1285
- * MyAppState // TAppState
1286
- * >({
1287
- * metadata: { name: 'Custom Agent' },
1460
+ * false // TStream
1461
+ * >('custom-agent', {
1288
1462
  * setup: async (app) => ({ cache: new Map() }),
1289
1463
  * handler: async (ctx, input) => {
1290
1464
  * const db = ctx.app.db;
@@ -1301,8 +1475,9 @@ export function createAgent<
1301
1475
  TConfig = unknown,
1302
1476
  TAppState = AppState,
1303
1477
  >(
1478
+ name: string,
1304
1479
  config: CreateAgentConfigExplicit<TInput, TOutput, TStream, TConfig, TAppState>
1305
- ): Agent<TInput, TOutput, TStream, TConfig, TAppState>;
1480
+ ): AgentRunner<TInput, TOutput, TStream>;
1306
1481
 
1307
1482
  // Implementation
1308
1483
  export function createAgent<
@@ -1312,8 +1487,9 @@ export function createAgent<
1312
1487
  TConfig = unknown,
1313
1488
  TAppState = AppState,
1314
1489
  >(
1490
+ name: string,
1315
1491
  config: CreateAgentConfigExplicit<TInput, TOutput, TStream, TConfig, TAppState>
1316
- ): Agent<TInput, TOutput, TStream, TConfig, TAppState> {
1492
+ ): AgentRunner<TInput, TOutput, TStream> {
1317
1493
  const inputSchema = config.schema?.input;
1318
1494
  const outputSchema = config.schema?.output;
1319
1495
 
@@ -1321,7 +1497,7 @@ export function createAgent<
1321
1497
  // Evals should only be added via agent.createEval() after agent creation
1322
1498
  const evalsArray: Eval[] = [];
1323
1499
 
1324
- const handler = async (_ctx: Context, input?: any) => {
1500
+ const handler = async (input?: any) => {
1325
1501
  let validatedInput: any = undefined;
1326
1502
 
1327
1503
  if (inputSchema) {
@@ -1335,17 +1511,16 @@ export function createAgent<
1335
1511
  validatedInput = inputResult.value;
1336
1512
  }
1337
1513
 
1338
- const agentCtx = getAgentContext() as AgentContext<any, any, any, TConfig, TAppState>;
1514
+ const agentCtx = getAgentContext() as AgentContext<any, TConfig, TAppState>;
1339
1515
 
1340
- // Get the agent instance from the agents Map to fire events
1341
- // The agent will be registered in the agents Map before the handler is called
1342
- const agentName = agentCtx.agentName;
1343
- const registeredAgent = agentName ? agents.get(agentName) : undefined;
1516
+ // Store current agent for telemetry (using Symbol to keep it internal)
1517
+ (agentCtx as any)[CURRENT_AGENT] = agent;
1344
1518
 
1345
- // Fire 'started' event (only if agent is registered)
1346
- if (registeredAgent) {
1347
- await fireAgentEvent(registeredAgent, 'started', agentCtx);
1348
- }
1519
+ // Get the agent instance from the runtime state to fire events
1520
+ const runtime = getAgentRuntime(agentCtx);
1521
+
1522
+ // Fire 'started' event
1523
+ await fireAgentEvent(runtime, agent as Agent, 'started', agentCtx);
1349
1524
 
1350
1525
  try {
1351
1526
  const result = inputSchema
@@ -1353,7 +1528,8 @@ export function createAgent<
1353
1528
  : await (config.handler as any)(agentCtx);
1354
1529
 
1355
1530
  let validatedOutput: any = result;
1356
- if (outputSchema) {
1531
+ // Skip output validation for streaming agents (they return ReadableStream)
1532
+ if (outputSchema && !config.schema?.stream) {
1357
1533
  const outputResult = await outputSchema['~standard'].validate(result);
1358
1534
  if (outputResult.issues) {
1359
1535
  throw new ValidationError({
@@ -1369,50 +1545,44 @@ export function createAgent<
1369
1545
  agentCtx.state.set('_evalOutput', validatedOutput);
1370
1546
 
1371
1547
  // Fire 'completed' event - evals will run via event listener
1372
- if (registeredAgent) {
1373
- await fireAgentEvent(registeredAgent, 'completed', agentCtx);
1374
- }
1548
+ await fireAgentEvent(runtime, agent as Agent, 'completed', agentCtx);
1375
1549
 
1376
1550
  return validatedOutput;
1377
1551
  } catch (error) {
1378
1552
  // Fire 'errored' event
1379
- if (registeredAgent) {
1380
- await fireAgentEvent(registeredAgent, 'errored', agentCtx, error as Error);
1381
- }
1553
+ await fireAgentEvent(runtime, agent as Agent, 'errored', agentCtx, error as Error);
1382
1554
  throw error;
1383
1555
  }
1384
1556
  };
1385
1557
 
1386
1558
  // Infer input/output types from agent schema
1387
- type AgentInput = TInput extends StandardSchemaV1
1388
- ? StandardSchemaV1.InferOutput<TInput>
1389
- : undefined;
1390
- type AgentOutput = TOutput extends StandardSchemaV1
1391
- ? StandardSchemaV1.InferOutput<TOutput>
1392
- : undefined;
1559
+ type AgentInput = TInput extends StandardSchemaV1 ? InferOutput<TInput> : undefined;
1560
+ type AgentOutput = TOutput extends StandardSchemaV1 ? InferOutput<TOutput> : undefined;
1393
1561
 
1394
1562
  // Create createEval method that infers types from agent and automatically adds to agent
1395
- const createEval = (evalConfig: {
1396
- metadata?: Partial<EvalMetadata>;
1397
- handler: EvalFunction<AgentInput, AgentOutput>;
1398
- }): Eval<TInput, TOutput> => {
1399
- const evalName = evalConfig.metadata?.name || 'unnamed';
1563
+ const createEval = (
1564
+ evalName: string,
1565
+ evalConfig: {
1566
+ description?: string;
1567
+ handler: EvalFunction<AgentInput, AgentOutput>;
1568
+ }
1569
+ ): Eval<TInput, TOutput> => {
1400
1570
  // Trace log to verify evals file is imported
1401
1571
  internal.debug(
1402
- `createEval called for agent "${config?.metadata?.name || 'unknown'}": registering eval "${evalName}"`
1572
+ `createEval called for agent "${name || 'unknown'}": registering eval "${evalName}"`
1403
1573
  );
1404
1574
 
1405
- // Get filename (can be provided via __filename or set by bundler)
1406
- const filename = evalConfig.metadata?.filename || '';
1575
+ // Get filename (set by bundler)
1576
+ const filename = '';
1407
1577
 
1408
1578
  // Use name as identifier for consistency (same as agents)
1409
1579
  const identifier = evalName;
1410
1580
 
1411
1581
  // Use build-time injected id/version if available, otherwise generate at runtime
1412
1582
  // Build-time injection happens via bundler AST transformation
1413
- let evalId = evalConfig.metadata?.id;
1414
- let stableEvalId = evalConfig.metadata?.evalId;
1415
- let version = evalConfig.metadata?.version;
1583
+ let evalId: string | undefined;
1584
+ let stableEvalId: string | undefined;
1585
+ let version: string | undefined;
1416
1586
 
1417
1587
  // Generate version from available metadata if not provided (deterministic hash)
1418
1588
  // At build-time, version is hash of file contents; at runtime we use metadata
@@ -1453,8 +1623,8 @@ export function createAgent<
1453
1623
  evalId: stableEvalId,
1454
1624
  version,
1455
1625
  identifier,
1456
- name: evalConfig.metadata?.name || '',
1457
- description: evalConfig.metadata?.description || '',
1626
+ name: evalName,
1627
+ description: evalConfig.description || '',
1458
1628
  filename,
1459
1629
  },
1460
1630
  handler: evalConfig.handler,
@@ -1471,15 +1641,31 @@ export function createAgent<
1471
1641
  // Automatically add eval to agent's evals array
1472
1642
  evalsArray.push(evalType);
1473
1643
  internal.debug(
1474
- `Added eval "${evalName}" to agent "${config?.metadata?.name || 'unknown'}". Total evals: ${evalsArray.length}`
1644
+ `Added eval "${evalName}" to agent "${name || 'unknown'}". Total evals: ${evalsArray.length}`
1475
1645
  );
1476
1646
 
1477
1647
  return evalType as Eval<TInput, TOutput>;
1478
1648
  };
1479
1649
 
1650
+ // Build metadata - merge user-provided metadata with defaults
1651
+ // The build plugin injects metadata via config.metadata during AST transformation
1652
+ const metadata: Partial<AgentMetadata> = {
1653
+ // Defaults (used when running without build, e.g., dev mode)
1654
+ name,
1655
+ description: config.description,
1656
+ id: '',
1657
+ agentId: '',
1658
+ filename: '',
1659
+ version: '',
1660
+ inputSchemaCode: '',
1661
+ outputSchemaCode: '',
1662
+ // Merge in build-time injected metadata (overrides defaults)
1663
+ ...config.metadata,
1664
+ };
1665
+
1480
1666
  const agent: any = {
1481
1667
  handler,
1482
- metadata: config.metadata,
1668
+ metadata,
1483
1669
  evals: evalsArray,
1484
1670
  createEval,
1485
1671
  setup: config.setup,
@@ -1505,14 +1691,12 @@ export function createAgent<
1505
1691
 
1506
1692
  // Automatically add event listener for 'completed' event to run evals
1507
1693
  (agent as Agent).addEventListener('completed', async (_event, _agent, ctx) => {
1508
- // Get the agent instance from the agents Map to access its current evals array
1694
+ // Use the agent instance passed to event listener to access its evals array
1509
1695
  // This ensures we get evals that were added via agent.createEval() after agent creation
1510
- const agentName = ctx.agentName;
1511
- const registeredAgent = agentName ? agents.get(agentName) : undefined;
1512
- const agentEvals = registeredAgent?.evals || evalsArray;
1696
+ const agentEvals = _agent?.evals || evalsArray;
1513
1697
 
1514
1698
  internal.debug(
1515
- `Checking evals: agentName=${agentName}, evalsArray.length=${evalsArray?.length || 0}, agent.evals.length=${registeredAgent?.evals?.length || 0}`
1699
+ `Checking evals: agent=${_agent.metadata?.name}, evalsArray.length=${evalsArray?.length || 0}, agent.evals.length=${_agent?.evals?.length || 0}`
1516
1700
  );
1517
1701
 
1518
1702
  if (agentEvals && agentEvals.length > 0) {
@@ -1852,7 +2036,57 @@ export function createAgent<
1852
2036
  return composed as unknown as Handler;
1853
2037
  }) as AgentValidator<TInput, TOutput>;
1854
2038
 
1855
- return agent as Agent<TInput, TOutput, TStream, TConfig, TAppState>;
2039
+ // Register the agent for runtime use
2040
+ // @ts-expect-error - metadata might be incomplete until build plugin injects InternalAgentMetadata
2041
+ agents.set(name, agent as Agent<TInput, TOutput, TStream, TConfig, TAppState>);
2042
+
2043
+ // Create and return AgentRunner
2044
+ const runner: any = {
2045
+ metadata: metadata as AgentMetadata,
2046
+ validator: agent.validator,
2047
+ inputSchema: inputSchema as TInput,
2048
+ outputSchema: outputSchema as TOutput,
2049
+ stream: (config.schema?.stream as TStream) || (false as TStream),
2050
+ createEval,
2051
+ addEventListener: agent.addEventListener,
2052
+ removeEventListener: agent.removeEventListener,
2053
+ run: inputSchema
2054
+ ? async (input: InferSchemaInput<Exclude<TInput, undefined>>) => {
2055
+ // Get tracer from AsyncLocalStorage context if available
2056
+ try {
2057
+ const agentCtx = getAgentContext();
2058
+ if (agentCtx?.tracer) {
2059
+ return runWithSpan<any, TInput, TOutput, TStream>(
2060
+ agentCtx.tracer,
2061
+ agent as Agent<TInput, TOutput, TStream>,
2062
+ async () => await agent.handler(input)
2063
+ );
2064
+ }
2065
+ } catch {
2066
+ // Context not available, skip span creation
2067
+ }
2068
+ return await agent.handler(input);
2069
+ }
2070
+ : async () => {
2071
+ // Get tracer from AsyncLocalStorage context if available
2072
+ try {
2073
+ const agentCtx = getAgentContext();
2074
+ if (agentCtx?.tracer) {
2075
+ return runWithSpan<any, TInput, TOutput, TStream>(
2076
+ agentCtx.tracer,
2077
+ agent as Agent<TInput, TOutput, TStream>,
2078
+ async () => await agent.handler()
2079
+ );
2080
+ }
2081
+ } catch {
2082
+ // Context not available, skip span creation
2083
+ }
2084
+ return await agent.handler();
2085
+ },
2086
+ [INTERNAL_AGENT]: agent, // Store reference to internal agent for testing
2087
+ };
2088
+
2089
+ return runner as AgentRunner<TInput, TOutput, TStream>;
1856
2090
  }
1857
2091
 
1858
2092
  const runWithSpan = async <
@@ -1897,7 +2131,7 @@ const createAgentRunner = <
1897
2131
  return runWithSpan<any, TInput, TOutput, TStream>(
1898
2132
  tracer,
1899
2133
  agent,
1900
- async () => await agent.handler(ctx as unknown as AgentContext<any, any, any>, input)
2134
+ async () => await agent.handler(input)
1901
2135
  );
1902
2136
  },
1903
2137
  } as AgentRunner<TInput, TOutput, TStream>;
@@ -1908,7 +2142,7 @@ const createAgentRunner = <
1908
2142
  return runWithSpan<any, TInput, TOutput, TStream>(
1909
2143
  tracer,
1910
2144
  agent,
1911
- async () => await agent.handler(ctx as unknown as AgentContext<any, any, any>)
2145
+ async () => await agent.handler()
1912
2146
  );
1913
2147
  },
1914
2148
  } as AgentRunner<TInput, TOutput, TStream>;
@@ -1923,108 +2157,30 @@ export const populateAgentsRegistry = (ctx: Context): any => {
1923
2157
  const agentsObj: any = {};
1924
2158
  // Track ownership of camelCase keys to detect collisions between different raw names
1925
2159
  const ownershipMap = new Map<string, string>();
1926
- const childOwnershipMap = new Map<string, string>();
1927
2160
 
1928
- // Build nested structure for agents and subagents
2161
+ // Build flat registry of agents
1929
2162
  for (const [name, agentFn] of agents) {
1930
2163
  const runner = createAgentRunner(agentFn, ctx);
2164
+ const key = toCamelCase(name);
1931
2165
 
1932
- if (name.includes('.')) {
1933
- // Subagent: "parent.child"
1934
- const parts = name.split('.');
1935
- if (parts.length !== 2) {
1936
- internal.warn(`Invalid subagent name format: "${name}". Expected "parent.child".`);
1937
- continue;
1938
- }
1939
- const rawParentName = parts[0];
1940
- const rawChildName = parts[1];
1941
- if (rawParentName && rawChildName) {
1942
- // Convert parent name to camelCase for registry key
1943
- const parentKey = toCamelCase(rawParentName);
1944
-
1945
- // Validate parentKey is non-empty
1946
- if (!parentKey) {
1947
- internal.warn(
1948
- `Agent name "${rawParentName}" converts to empty camelCase key. Skipping.`
1949
- );
1950
- continue;
1951
- }
1952
-
1953
- // Detect collision on parent key - check ownership
1954
- const existingOwner = ownershipMap.get(parentKey);
1955
- if (existingOwner && existingOwner !== rawParentName) {
1956
- internal.error(
1957
- `Agent registry collision: "${rawParentName}" conflicts with "${existingOwner}" (both map to camelCase key "${parentKey}")`
1958
- );
1959
- throw new Error(`Agent registry collision detected for key "${parentKey}"`);
1960
- }
1961
-
1962
- if (!agentsObj[parentKey]) {
1963
- // Ensure parent exists - look up by raw name in agents map
1964
- const parentAgent = agents.get(rawParentName);
1965
- if (parentAgent) {
1966
- agentsObj[parentKey] = createAgentRunner(parentAgent, ctx);
1967
- // Record ownership
1968
- ownershipMap.set(parentKey, rawParentName);
1969
- }
1970
- }
1971
-
1972
- // Attach subagent to parent using camelCase property name
1973
- const childKey = toCamelCase(rawChildName);
1974
-
1975
- // Validate childKey is non-empty
1976
- if (!childKey) {
1977
- internal.warn(
1978
- `Agent name "${rawChildName}" converts to empty camelCase key. Skipping subagent "${name}".`
1979
- );
1980
- continue;
1981
- }
1982
-
1983
- // Detect collision on child key - check ownership
1984
- const childOwnershipKey = `${parentKey}.${childKey}`;
1985
- const existingChildOwner = childOwnershipMap.get(childOwnershipKey);
1986
- if (existingChildOwner && existingChildOwner !== name) {
1987
- internal.error(
1988
- `Agent registry collision: subagent "${name}" conflicts with "${existingChildOwner}" (both map to camelCase key "${childOwnershipKey}")`
1989
- );
1990
- throw new Error(
1991
- `Agent registry collision detected for subagent key "${childOwnershipKey}"`
1992
- );
1993
- }
1994
-
1995
- if (agentsObj[parentKey]) {
1996
- if (agentsObj[parentKey][childKey] === undefined) {
1997
- agentsObj[parentKey][childKey] = runner;
1998
- // Record ownership
1999
- childOwnershipMap.set(childOwnershipKey, name);
2000
- }
2001
- }
2002
- }
2003
- } else {
2004
- // Parent agent or standalone agent - convert to camelCase for registry key
2005
- const parentKey = toCamelCase(name);
2006
-
2007
- // Validate parentKey is non-empty
2008
- if (!parentKey) {
2009
- internal.warn(`Agent name "${name}" converts to empty camelCase key. Skipping.`);
2010
- continue;
2011
- }
2012
-
2013
- // Detect collision on parent key - check ownership
2014
- const existingOwner = ownershipMap.get(parentKey);
2015
- if (existingOwner && existingOwner !== name) {
2016
- internal.error(
2017
- `Agent registry collision: "${name}" conflicts with "${existingOwner}" (both map to camelCase key "${parentKey}")`
2018
- );
2019
- throw new Error(`Agent registry collision detected for key "${parentKey}"`);
2020
- }
2166
+ // Validate key is non-empty
2167
+ if (!key) {
2168
+ internal.warn(`Agent name "${name}" converts to empty camelCase key. Skipping.`);
2169
+ continue;
2170
+ }
2021
2171
 
2022
- if (!agentsObj[parentKey]) {
2023
- agentsObj[parentKey] = runner;
2024
- // Record ownership
2025
- ownershipMap.set(parentKey, name);
2026
- }
2172
+ // Detect collision on key - check ownership
2173
+ const existingOwner = ownershipMap.get(key);
2174
+ if (existingOwner && existingOwner !== name) {
2175
+ internal.error(
2176
+ `Agent registry collision: "${name}" conflicts with "${existingOwner}" (both map to camelCase key "${key}")`
2177
+ );
2178
+ throw new Error(`Agent registry collision detected for key "${key}"`);
2027
2179
  }
2180
+
2181
+ agentsObj[key] = runner;
2182
+ // Record ownership
2183
+ ownershipMap.set(key, name);
2028
2184
  }
2029
2185
 
2030
2186
  return agentsObj;
@@ -2035,36 +2191,16 @@ export const createAgentMiddleware = (agentName: AgentName | ''): MiddlewareHand
2035
2191
  // Populate agents object with strongly-typed keys
2036
2192
  const agentsObj = populateAgentsRegistry(ctx);
2037
2193
 
2038
- // Set agent registry on context for access via c.var.agent
2039
- ctx.set('agent', agentsObj);
2040
-
2041
- // Determine current and parent agents
2042
- let currentAgent: AgentRunner | undefined;
2043
- let parentAgent: AgentRunner | undefined;
2044
-
2045
- if (agentName?.includes('.')) {
2046
- // This is a subagent
2047
- const parts = agentName.split('.');
2048
- const rawParentName = parts[0];
2049
- const rawChildName = parts[1];
2050
- if (rawParentName && rawChildName) {
2051
- // Use camelCase keys to look up in agentsObj (which uses camelCase keys)
2052
- const parentKey = toCamelCase(rawParentName);
2053
- const childKey = toCamelCase(rawChildName);
2054
- currentAgent = agentsObj[parentKey]?.[childKey];
2055
- parentAgent = agentsObj[parentKey];
2194
+ // Track agent ID for session telemetry
2195
+ if (agentName) {
2196
+ const agentKey = toCamelCase(agentName);
2197
+ const agent = agentsObj[agentKey];
2198
+ const _ctx = privateContext(ctx);
2199
+ if (agent?.metadata?.id) {
2200
+ // we add both so that you can query by either
2201
+ _ctx.var.agentIds.add(agent.metadata.id);
2202
+ _ctx.var.agentIds.add(agent.metadata.agentId);
2056
2203
  }
2057
- } else if (agentName) {
2058
- // This is a parent or standalone agent - use camelCase key
2059
- const parentKey = toCamelCase(agentName);
2060
- currentAgent = agentsObj[parentKey];
2061
- }
2062
-
2063
- const _ctx = privateContext(ctx);
2064
- if (currentAgent?.metadata?.id) {
2065
- // we add both so that you can query by either
2066
- _ctx.var.agentIds.add(currentAgent.metadata.id);
2067
- _ctx.var.agentIds.add(currentAgent.metadata.agentId);
2068
2204
  }
2069
2205
 
2070
2206
  const sessionId = ctx.var.sessionId;
@@ -2073,18 +2209,9 @@ export const createAgentMiddleware = (agentName: AgentName | ''): MiddlewareHand
2073
2209
  const config = agentName ? getAgentConfig(agentName as AgentName) : undefined;
2074
2210
  const app = ctx.var.app;
2075
2211
 
2076
- const args: RequestAgentContextArgs<
2077
- AgentRegistry,
2078
- AgentRunner | undefined,
2079
- AgentRunner | undefined,
2080
- unknown,
2081
- unknown
2082
- > = {
2212
+ const args: RequestAgentContextArgs<AgentRegistry, unknown, unknown> = {
2083
2213
  agent: agentsObj,
2084
- current: currentAgent,
2085
- parent: parentAgent,
2086
- agentName: agentName as AgentName,
2087
- logger: ctx.var.logger.child({ agent: agentName }),
2214
+ logger: ctx.var.logger,
2088
2215
  tracer: ctx.var.tracer,
2089
2216
  sessionId,
2090
2217
  session,
@@ -2092,9 +2219,10 @@ export const createAgentMiddleware = (agentName: AgentName | ''): MiddlewareHand
2092
2219
  handler: ctx.var.waitUntilHandler,
2093
2220
  config: config || {},
2094
2221
  app: app || {},
2222
+ runtime: getGlobalRuntimeState(),
2095
2223
  };
2096
2224
 
2097
- return runInAgentContext(ctx as unknown as Record<string, unknown>, args, next);
2225
+ return setupRequestAgentContext(ctx as unknown as Record<string, unknown>, args, next);
2098
2226
  };
2099
2227
  };
2100
2228
 
@@ -2111,10 +2239,89 @@ export const runAgentSetups = async (appState: AppState): Promise<void> => {
2111
2239
  };
2112
2240
 
2113
2241
  export const runAgentShutdowns = async (appState: AppState): Promise<void> => {
2114
- for (const [name, agent] of agents.entries()) {
2242
+ const runtime = getGlobalRuntimeState();
2243
+ for (const [name, agent] of runtime.agents.entries()) {
2115
2244
  if (agent.shutdown) {
2116
- const config = getAgentConfig(name as AgentName);
2245
+ const config = runtime.agentConfigs.get(name) as any;
2117
2246
  await agent.shutdown(appState, config);
2118
2247
  }
2119
2248
  }
2120
2249
  };
2250
+
2251
+ /**
2252
+ * Run an agent within a specific AgentContext.
2253
+ * Sets up AsyncLocalStorage with the provided context and executes the agent.
2254
+ *
2255
+ * This is the recommended way to test agents in unit tests. It automatically:
2256
+ * - Registers the agent in the runtime state so event listeners fire
2257
+ * - Sets up AsyncLocalStorage so getAgentContext() works inside the agent
2258
+ * - Handles both agents with input and agents without input
2259
+ *
2260
+ * **Use cases:**
2261
+ * - Unit testing agents with TestAgentContext
2262
+ * - Running agents outside HTTP request flow
2263
+ * - Custom agent execution environments
2264
+ * - Testing event listeners and evaluations
2265
+ *
2266
+ * @template TInput - Type of the input parameter
2267
+ * @template TOutput - Type of the return value
2268
+ *
2269
+ * @param ctx - The AgentContext to use (typically TestAgentContext in tests)
2270
+ * @param agent - The AgentRunner to execute (returned from createAgent)
2271
+ * @param input - Input data (required if agent has input schema, omit otherwise)
2272
+ *
2273
+ * @returns Promise resolving to the agent's output
2274
+ *
2275
+ * @example
2276
+ * ```typescript
2277
+ * import { runInAgentContext, TestAgentContext } from '@agentuity/runtime/test';
2278
+ *
2279
+ * test('greeting agent', async () => {
2280
+ * const ctx = new TestAgentContext();
2281
+ * const result = await runInAgentContext(ctx, greetingAgent, {
2282
+ * name: 'Alice',
2283
+ * age: 30
2284
+ * });
2285
+ * expect(result).toBe('Hello, Alice! You are 30 years old.');
2286
+ * });
2287
+ *
2288
+ * test('no-input agent', async () => {
2289
+ * const ctx = new TestAgentContext();
2290
+ * const result = await runInAgentContext(ctx, statusAgent);
2291
+ * expect(result).toEqual({ status: 'ok' });
2292
+ * });
2293
+ * ```
2294
+ */
2295
+ export async function runInAgentContext<TInput, TOutput>(
2296
+ ctx: AgentContext<any, any, any>,
2297
+ agent: AgentRunner<any, any, any>,
2298
+ input?: TInput
2299
+ ): Promise<TOutput> {
2300
+ const { getAgentAsyncLocalStorage } = await import('./_context');
2301
+ const storage = getAgentAsyncLocalStorage();
2302
+
2303
+ // Register agent in runtime state so events fire (lookup by metadata.name)
2304
+ const agentName = agent.metadata.name;
2305
+ const runtime = getAgentRuntime(ctx);
2306
+
2307
+ // Get internal agent from runner (stored via symbol) or global registry
2308
+ const internalAgent = (agent as any)[INTERNAL_AGENT] || agents.get(agentName);
2309
+
2310
+ if (internalAgent && agentName) {
2311
+ runtime.agents.set(agentName, internalAgent);
2312
+
2313
+ // Copy event listeners from global to context runtime
2314
+ const globalListeners = agentEventListeners.get(internalAgent);
2315
+ if (globalListeners) {
2316
+ runtime.agentEventListeners.set(internalAgent, globalListeners);
2317
+ }
2318
+ }
2319
+
2320
+ return storage.run(ctx, async () => {
2321
+ if (input !== undefined) {
2322
+ return await (agent.run as any)(input);
2323
+ } else {
2324
+ return await (agent.run as any)();
2325
+ }
2326
+ });
2327
+ }