observability-toolkit 1.8.5 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. package/README.md +126 -5
  2. package/dist/backends/index.d.ts +163 -0
  3. package/dist/backends/index.d.ts.map +1 -1
  4. package/dist/backends/index.js +57 -0
  5. package/dist/backends/index.js.map +1 -1
  6. package/dist/backends/index.test.js +55 -1
  7. package/dist/backends/index.test.js.map +1 -1
  8. package/dist/backends/local-jsonl.d.ts +30 -0
  9. package/dist/backends/local-jsonl.d.ts.map +1 -1
  10. package/dist/backends/local-jsonl.js +912 -550
  11. package/dist/backends/local-jsonl.js.map +1 -1
  12. package/dist/backends/signoz-api-rate-limiter.test.js +2 -1
  13. package/dist/backends/signoz-api-rate-limiter.test.js.map +1 -1
  14. package/dist/backends/signoz-api.d.ts +16 -2
  15. package/dist/backends/signoz-api.d.ts.map +1 -1
  16. package/dist/backends/signoz-api.js +650 -534
  17. package/dist/backends/signoz-api.js.map +1 -1
  18. package/dist/backends/signoz-api.test.js +6 -5
  19. package/dist/backends/signoz-api.test.js.map +1 -1
  20. package/dist/lib/agent-as-judge.d.ts +388 -0
  21. package/dist/lib/agent-as-judge.d.ts.map +1 -0
  22. package/dist/lib/agent-as-judge.js +740 -0
  23. package/dist/lib/agent-as-judge.js.map +1 -0
  24. package/dist/lib/agent-as-judge.test.d.ts +5 -0
  25. package/dist/lib/agent-as-judge.test.d.ts.map +1 -0
  26. package/dist/lib/agent-as-judge.test.js +816 -0
  27. package/dist/lib/agent-as-judge.test.js.map +1 -0
  28. package/dist/lib/cache.d.ts +15 -2
  29. package/dist/lib/cache.d.ts.map +1 -1
  30. package/dist/lib/cache.js +16 -2
  31. package/dist/lib/cache.js.map +1 -1
  32. package/dist/lib/circuit-breaker.d.ts +18 -0
  33. package/dist/lib/circuit-breaker.d.ts.map +1 -1
  34. package/dist/lib/circuit-breaker.js +41 -8
  35. package/dist/lib/circuit-breaker.js.map +1 -1
  36. package/dist/lib/confident-export.d.ts +101 -0
  37. package/dist/lib/confident-export.d.ts.map +1 -0
  38. package/dist/lib/confident-export.js +393 -0
  39. package/dist/lib/confident-export.js.map +1 -0
  40. package/dist/lib/confident-export.test.d.ts +7 -0
  41. package/dist/lib/confident-export.test.d.ts.map +1 -0
  42. package/dist/lib/confident-export.test.js +835 -0
  43. package/dist/lib/confident-export.test.js.map +1 -0
  44. package/dist/lib/constants.d.ts +75 -0
  45. package/dist/lib/constants.d.ts.map +1 -1
  46. package/dist/lib/constants.js +104 -1
  47. package/dist/lib/constants.js.map +1 -1
  48. package/dist/lib/datadog-export.d.ts +156 -0
  49. package/dist/lib/datadog-export.d.ts.map +1 -0
  50. package/dist/lib/datadog-export.js +464 -0
  51. package/dist/lib/datadog-export.js.map +1 -0
  52. package/dist/lib/datadog-export.test.d.ts +14 -0
  53. package/dist/lib/datadog-export.test.d.ts.map +1 -0
  54. package/dist/lib/datadog-export.test.js +890 -0
  55. package/dist/lib/datadog-export.test.js.map +1 -0
  56. package/dist/lib/evaluation-hooks.d.ts +49 -0
  57. package/dist/lib/evaluation-hooks.d.ts.map +1 -0
  58. package/dist/lib/evaluation-hooks.js +488 -0
  59. package/dist/lib/evaluation-hooks.js.map +1 -0
  60. package/dist/lib/evaluation-hooks.test.d.ts +8 -0
  61. package/dist/lib/evaluation-hooks.test.d.ts.map +1 -0
  62. package/dist/lib/evaluation-hooks.test.js +624 -0
  63. package/dist/lib/evaluation-hooks.test.js.map +1 -0
  64. package/dist/lib/export-utils.d.ts +99 -0
  65. package/dist/lib/export-utils.d.ts.map +1 -0
  66. package/dist/lib/export-utils.js +238 -0
  67. package/dist/lib/export-utils.js.map +1 -0
  68. package/dist/lib/export-utils.test.d.ts +5 -0
  69. package/dist/lib/export-utils.test.d.ts.map +1 -0
  70. package/dist/lib/export-utils.test.js +193 -0
  71. package/dist/lib/export-utils.test.js.map +1 -0
  72. package/dist/lib/file-utils.d.ts +17 -2
  73. package/dist/lib/file-utils.d.ts.map +1 -1
  74. package/dist/lib/file-utils.js +24 -5
  75. package/dist/lib/file-utils.js.map +1 -1
  76. package/dist/lib/file-utils.test.js +30 -0
  77. package/dist/lib/file-utils.test.js.map +1 -1
  78. package/dist/lib/histogram.d.ts +119 -0
  79. package/dist/lib/histogram.d.ts.map +1 -0
  80. package/dist/lib/histogram.js +202 -0
  81. package/dist/lib/histogram.js.map +1 -0
  82. package/dist/lib/histogram.test.d.ts +5 -0
  83. package/dist/lib/histogram.test.d.ts.map +1 -0
  84. package/dist/lib/histogram.test.js +381 -0
  85. package/dist/lib/histogram.test.js.map +1 -0
  86. package/dist/lib/instrumentation.d.ts +153 -0
  87. package/dist/lib/instrumentation.d.ts.map +1 -0
  88. package/dist/lib/instrumentation.integration.test.d.ts +2 -0
  89. package/dist/lib/instrumentation.integration.test.d.ts.map +1 -0
  90. package/dist/lib/instrumentation.integration.test.js +589 -0
  91. package/dist/lib/instrumentation.integration.test.js.map +1 -0
  92. package/dist/lib/instrumentation.js +520 -0
  93. package/dist/lib/instrumentation.js.map +1 -0
  94. package/dist/lib/instrumentation.test.d.ts +2 -0
  95. package/dist/lib/instrumentation.test.d.ts.map +1 -0
  96. package/dist/lib/instrumentation.test.js +821 -0
  97. package/dist/lib/instrumentation.test.js.map +1 -0
  98. package/dist/lib/langfuse-export.d.ts +125 -0
  99. package/dist/lib/langfuse-export.d.ts.map +1 -0
  100. package/dist/lib/langfuse-export.js +367 -0
  101. package/dist/lib/langfuse-export.js.map +1 -0
  102. package/dist/lib/langfuse-export.test.d.ts +7 -0
  103. package/dist/lib/langfuse-export.test.d.ts.map +1 -0
  104. package/dist/lib/langfuse-export.test.js +1007 -0
  105. package/dist/lib/langfuse-export.test.js.map +1 -0
  106. package/dist/lib/llm-as-judge.d.ts +657 -0
  107. package/dist/lib/llm-as-judge.d.ts.map +1 -0
  108. package/dist/lib/llm-as-judge.js +1397 -0
  109. package/dist/lib/llm-as-judge.js.map +1 -0
  110. package/dist/lib/llm-as-judge.test.d.ts +2 -0
  111. package/dist/lib/llm-as-judge.test.d.ts.map +1 -0
  112. package/dist/lib/llm-as-judge.test.js +2409 -0
  113. package/dist/lib/llm-as-judge.test.js.map +1 -0
  114. package/dist/lib/logger.d.ts +1 -1
  115. package/dist/lib/logger.d.ts.map +1 -1
  116. package/dist/lib/logger.js.map +1 -1
  117. package/dist/lib/metrics.d.ts +62 -0
  118. package/dist/lib/metrics.d.ts.map +1 -0
  119. package/dist/lib/metrics.js +166 -0
  120. package/dist/lib/metrics.js.map +1 -0
  121. package/dist/lib/metrics.test.d.ts +5 -0
  122. package/dist/lib/metrics.test.d.ts.map +1 -0
  123. package/dist/lib/metrics.test.js +189 -0
  124. package/dist/lib/metrics.test.js.map +1 -0
  125. package/dist/lib/parse-stats.d.ts +119 -0
  126. package/dist/lib/parse-stats.d.ts.map +1 -0
  127. package/dist/lib/parse-stats.js +206 -0
  128. package/dist/lib/parse-stats.js.map +1 -0
  129. package/dist/lib/parse-stats.test.d.ts +5 -0
  130. package/dist/lib/parse-stats.test.d.ts.map +1 -0
  131. package/dist/lib/parse-stats.test.js +283 -0
  132. package/dist/lib/parse-stats.test.js.map +1 -0
  133. package/dist/lib/phoenix-export.d.ts +109 -0
  134. package/dist/lib/phoenix-export.d.ts.map +1 -0
  135. package/dist/lib/phoenix-export.js +429 -0
  136. package/dist/lib/phoenix-export.js.map +1 -0
  137. package/dist/lib/phoenix-export.test.d.ts +11 -0
  138. package/dist/lib/phoenix-export.test.d.ts.map +1 -0
  139. package/dist/lib/phoenix-export.test.js +725 -0
  140. package/dist/lib/phoenix-export.test.js.map +1 -0
  141. package/dist/lib/server-utils.d.ts +6 -1
  142. package/dist/lib/server-utils.d.ts.map +1 -1
  143. package/dist/lib/server-utils.js +9 -1
  144. package/dist/lib/server-utils.js.map +1 -1
  145. package/dist/lib/shared-schemas.d.ts +6 -0
  146. package/dist/lib/shared-schemas.d.ts.map +1 -1
  147. package/dist/lib/shared-schemas.js +11 -4
  148. package/dist/lib/shared-schemas.js.map +1 -1
  149. package/dist/lib/verification-events.d.ts +100 -0
  150. package/dist/lib/verification-events.d.ts.map +1 -0
  151. package/dist/lib/verification-events.js +162 -0
  152. package/dist/lib/verification-events.js.map +1 -0
  153. package/dist/lib/verification-events.test.d.ts +5 -0
  154. package/dist/lib/verification-events.test.d.ts.map +1 -0
  155. package/dist/lib/verification-events.test.js +193 -0
  156. package/dist/lib/verification-events.test.js.map +1 -0
  157. package/dist/server.d.ts +5 -0
  158. package/dist/server.d.ts.map +1 -1
  159. package/dist/server.js +77 -21
  160. package/dist/server.js.map +1 -1
  161. package/dist/tools/context-stats.d.ts.map +1 -1
  162. package/dist/tools/context-stats.js +6 -8
  163. package/dist/tools/context-stats.js.map +1 -1
  164. package/dist/tools/export-confident.d.ts +145 -0
  165. package/dist/tools/export-confident.d.ts.map +1 -0
  166. package/dist/tools/export-confident.js +134 -0
  167. package/dist/tools/export-confident.js.map +1 -0
  168. package/dist/tools/export-confident.test.d.ts +7 -0
  169. package/dist/tools/export-confident.test.d.ts.map +1 -0
  170. package/dist/tools/export-confident.test.js +332 -0
  171. package/dist/tools/export-confident.test.js.map +1 -0
  172. package/dist/tools/export-datadog.d.ts +160 -0
  173. package/dist/tools/export-datadog.d.ts.map +1 -0
  174. package/dist/tools/export-datadog.js +160 -0
  175. package/dist/tools/export-datadog.js.map +1 -0
  176. package/dist/tools/export-datadog.test.d.ts +8 -0
  177. package/dist/tools/export-datadog.test.d.ts.map +1 -0
  178. package/dist/tools/export-datadog.test.js +419 -0
  179. package/dist/tools/export-datadog.test.js.map +1 -0
  180. package/dist/tools/export-langfuse.d.ts +137 -0
  181. package/dist/tools/export-langfuse.d.ts.map +1 -0
  182. package/dist/tools/export-langfuse.js +131 -0
  183. package/dist/tools/export-langfuse.js.map +1 -0
  184. package/dist/tools/export-langfuse.test.d.ts +7 -0
  185. package/dist/tools/export-langfuse.test.d.ts.map +1 -0
  186. package/dist/tools/export-langfuse.test.js +303 -0
  187. package/dist/tools/export-langfuse.test.js.map +1 -0
  188. package/dist/tools/export-phoenix.d.ts +145 -0
  189. package/dist/tools/export-phoenix.d.ts.map +1 -0
  190. package/dist/tools/export-phoenix.js +135 -0
  191. package/dist/tools/export-phoenix.js.map +1 -0
  192. package/dist/tools/export-phoenix.test.d.ts +7 -0
  193. package/dist/tools/export-phoenix.test.d.ts.map +1 -0
  194. package/dist/tools/export-phoenix.test.js +316 -0
  195. package/dist/tools/export-phoenix.test.js.map +1 -0
  196. package/dist/tools/health-check.d.ts +26 -0
  197. package/dist/tools/health-check.d.ts.map +1 -1
  198. package/dist/tools/health-check.js +36 -7
  199. package/dist/tools/health-check.js.map +1 -1
  200. package/dist/tools/index.d.ts +6 -0
  201. package/dist/tools/index.d.ts.map +1 -1
  202. package/dist/tools/index.js +6 -0
  203. package/dist/tools/index.js.map +1 -1
  204. package/dist/tools/inject-evaluations.d.ts +1315 -0
  205. package/dist/tools/inject-evaluations.d.ts.map +1 -0
  206. package/dist/tools/inject-evaluations.js +121 -0
  207. package/dist/tools/inject-evaluations.js.map +1 -0
  208. package/dist/tools/inject-evaluations.test.d.ts +5 -0
  209. package/dist/tools/inject-evaluations.test.d.ts.map +1 -0
  210. package/dist/tools/inject-evaluations.test.js +359 -0
  211. package/dist/tools/inject-evaluations.test.js.map +1 -0
  212. package/dist/tools/query-evaluations.d.ts +25 -4
  213. package/dist/tools/query-evaluations.d.ts.map +1 -1
  214. package/dist/tools/query-evaluations.js +10 -0
  215. package/dist/tools/query-evaluations.js.map +1 -1
  216. package/dist/tools/query-llm-events.js +2 -2
  217. package/dist/tools/query-llm-events.js.map +1 -1
  218. package/dist/tools/query-logs.d.ts +8 -8
  219. package/dist/tools/query-logs.js +3 -3
  220. package/dist/tools/query-logs.js.map +1 -1
  221. package/dist/tools/query-metrics.d.ts +4 -4
  222. package/dist/tools/query-metrics.js +2 -2
  223. package/dist/tools/query-metrics.js.map +1 -1
  224. package/dist/tools/query-traces.d.ts +8 -8
  225. package/dist/tools/query-verifications.d.ts +111 -0
  226. package/dist/tools/query-verifications.d.ts.map +1 -0
  227. package/dist/tools/query-verifications.js +101 -0
  228. package/dist/tools/query-verifications.js.map +1 -0
  229. package/dist/tools/query-verifications.test.d.ts +5 -0
  230. package/dist/tools/query-verifications.test.d.ts.map +1 -0
  231. package/dist/tools/query-verifications.test.js +156 -0
  232. package/dist/tools/query-verifications.test.js.map +1 -0
  233. package/dist/types/evaluation-hooks.d.ts +176 -0
  234. package/dist/types/evaluation-hooks.d.ts.map +1 -0
  235. package/dist/types/evaluation-hooks.js +49 -0
  236. package/dist/types/evaluation-hooks.js.map +1 -0
  237. package/package.json +10 -2
@@ -0,0 +1 @@
1
+ {"version":3,"file":"inject-evaluations.d.ts","sourceRoot":"","sources":["../../src/tools/inject-evaluations.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AA2DxB;;GAEG;AACH,eAAO,MAAM,uBAAuB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAcnC,CAAC;AAEF,MAAM,MAAM,sBAAsB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,uBAAuB,CAAC,CAAC;AAE7E;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACtC,OAAO,EAAE,OAAO,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,MAAM,CAAC,EAAE,KAAK,CAAC;QACb,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,KAAK,EAAE,MAAM,CAAC;QACd,OAAO,EAAE,MAAM,CAAC;QAChB,UAAU,CAAC,EAAE,MAAM,CAAC;KACrB,CAAC,CAAC;CACJ;AAED;;GAEG;AACH,wBAAsB,iBAAiB,CACrC,KAAK,EAAE,sBAAsB,GAC5B,OAAO,CAAC,uBAAuB,CAAC,CAiDlC;AAED,eAAO,MAAM,qBAAqB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAKjC,CAAC"}
@@ -0,0 +1,121 @@
1
+ /**
2
+ * Inject external evaluation results into JSONL storage
3
+ *
4
+ * Enables external evaluators (webhooks, APIs, human reviewers) to submit
5
+ * quality scores that integrate with the evaluation query system.
6
+ */
7
+ import { z } from 'zod';
8
+ import { TELEMETRY_DIR } from '../lib/constants.js';
9
+ import { processEvaluation, processBatch, verifyHmacSignature, MAX_BATCH_SIZE, } from '../lib/evaluation-hooks.js';
10
+ /**
11
+ * Step score schema for agent trajectory evaluation
12
+ */
13
+ const stepScoreSchema = z.object({
14
+ step: z.union([z.string(), z.number()]).describe('Step identifier (index or name)'),
15
+ score: z.number().min(0).max(1).describe('Score for this step (0-1)'),
16
+ evidence: z.record(z.unknown()).optional().describe('Supporting evidence'),
17
+ explanation: z.string().optional().describe('Explanation for step score'),
18
+ });
19
+ /**
20
+ * Tool verification schema for agent tool usage evaluation
21
+ */
22
+ const toolVerificationSchema = z.object({
23
+ toolName: z.string().describe('Name of the tool'),
24
+ toolCallId: z.string().optional().describe('Tool call ID for correlation'),
25
+ toolCorrect: z.boolean().describe('Whether correct tool was selected'),
26
+ argsCorrect: z.boolean().describe('Whether arguments were correct'),
27
+ resultCorrect: z.boolean().optional().describe('Whether result was correct'),
28
+ score: z.number().min(0).max(1).describe('Weighted correctness score'),
29
+ expectedTool: z.string().optional().describe('Expected tool if different'),
30
+ evidence: z.record(z.unknown()).optional().describe('Supporting evidence'),
31
+ });
32
+ /**
33
+ * Single evaluation payload schema
34
+ */
35
+ const evaluationPayloadSchema = z.object({
36
+ evaluationName: z.string().min(1).max(256).describe('Evaluation metric name (e.g., "relevance", "faithfulness")'),
37
+ scoreValue: z.number().optional().describe('Numeric score (0-1 normalized recommended)'),
38
+ scoreLabel: z.string().optional().describe('Human-readable label (e.g., "pass", "fail")'),
39
+ scoreUnit: z.string().optional().describe('Score unit/scale (e.g., "ratio_0_1", "percentage")'),
40
+ explanation: z.string().max(10240).optional().describe('Explanation/rationale'),
41
+ evaluator: z.string().min(1).max(256).describe('Evaluator identity (e.g., "claude-3-sonnet", "human-reviewer-001")'),
42
+ evaluatorType: z.enum(['llm', 'human', 'rule', 'classifier']).describe('Type of evaluator'),
43
+ responseId: z.string().max(128).optional().describe('Response ID for correlation'),
44
+ traceId: z.string().max(64).optional().describe('Trace ID for correlation'),
45
+ sessionId: z.string().max(128).optional().describe('Session ID'),
46
+ agentId: z.string().max(128).optional().describe('Subject agent ID'),
47
+ agentName: z.string().max(256).optional().describe('Subject agent name'),
48
+ stepScores: z.array(stepScoreSchema).max(1000).optional().describe('Per-step evaluation breakdown'),
49
+ toolVerifications: z.array(toolVerificationSchema).max(500).optional().describe('Tool call correctness'),
50
+ trajectoryLength: z.number().int().nonnegative().optional().describe('Agent trajectory length'),
51
+ metadata: z.record(z.unknown()).optional().describe('Custom metadata'),
52
+ }).refine(data => data.scoreValue !== undefined || data.scoreLabel !== undefined, { message: 'At least one of scoreValue or scoreLabel is required' });
53
+ /**
54
+ * Input schema for inject-evaluations tool
55
+ */
56
+ export const injectEvaluationsSchema = z.object({
57
+ // Single evaluation (simple mode)
58
+ evaluation: evaluationPayloadSchema.optional().describe('Single evaluation to inject'),
59
+ // Batch mode
60
+ evaluations: z.array(evaluationPayloadSchema).max(MAX_BATCH_SIZE).optional()
61
+ .describe(`Batch of evaluations (max ${MAX_BATCH_SIZE})`),
62
+ // HMAC signature verification (optional security)
63
+ signature: z.string().optional().describe('HMAC-SHA256 signature (format: sha256=<hex>)'),
64
+ secret: z.string().optional().describe('HMAC secret for signature verification'),
65
+ }).refine(data => data.evaluation !== undefined || (data.evaluations !== undefined && data.evaluations.length > 0), { message: 'Either evaluation or evaluations array is required' });
66
+ /**
67
+ * Inject evaluation(s) into JSONL storage
68
+ */
69
+ export async function injectEvaluations(input) {
70
+ const options = {
71
+ telemetryDir: TELEMETRY_DIR,
72
+ maxBatchSize: MAX_BATCH_SIZE,
73
+ };
74
+ // Verify signature if provided
75
+ if (input.signature && input.secret) {
76
+ // For signature verification, we need the raw payload
77
+ // In MCP context, we verify against the serialized input
78
+ const payload = JSON.stringify(input.evaluation || input.evaluations);
79
+ const result = verifyHmacSignature(payload, input.signature, input.secret);
80
+ if (!result.valid) {
81
+ return {
82
+ success: false,
83
+ message: `Signature verification failed: ${result.error}`,
84
+ };
85
+ }
86
+ }
87
+ // Process single evaluation
88
+ if (input.evaluation) {
89
+ const result = await processEvaluation(input.evaluation, options);
90
+ return {
91
+ success: result.success,
92
+ message: result.message,
93
+ processedCount: result.processedCount,
94
+ errors: result.errors,
95
+ };
96
+ }
97
+ // Process batch
98
+ if (input.evaluations && input.evaluations.length > 0) {
99
+ const batch = {
100
+ evaluations: input.evaluations,
101
+ };
102
+ const result = await processBatch(batch, options);
103
+ return {
104
+ success: result.success,
105
+ message: result.message,
106
+ processedCount: result.processedCount,
107
+ errors: result.errors,
108
+ };
109
+ }
110
+ return {
111
+ success: false,
112
+ message: 'No evaluations provided',
113
+ };
114
+ }
115
+ export const injectEvaluationsTool = {
116
+ name: 'obs_inject_evaluations',
117
+ description: 'Inject evaluations from external evaluators. Supports single or batch mode with optional HMAC verification.',
118
+ inputSchema: injectEvaluationsSchema,
119
+ handler: injectEvaluations,
120
+ };
121
+ //# sourceMappingURL=inject-evaluations.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"inject-evaluations.js","sourceRoot":"","sources":["../../src/tools/inject-evaluations.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACpD,OAAO,EACL,iBAAiB,EACjB,YAAY,EACZ,mBAAmB,EACnB,cAAc,GACf,MAAM,4BAA4B,CAAC;AAGpC;;GAEG;AACH,MAAM,eAAe,GAAG,CAAC,CAAC,MAAM,CAAC;IAC/B,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,iCAAiC,CAAC;IACnF,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,2BAA2B,CAAC;IACrE,QAAQ,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,qBAAqB,CAAC;IAC1E,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,4BAA4B,CAAC;CAC1E,CAAC,CAAC;AAEH;;GAEG;AACH,MAAM,sBAAsB,GAAG,CAAC,CAAC,MAAM,CAAC;IACtC,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,kBAAkB,CAAC;IACjD,UAAU,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,8BAA8B,CAAC;IAC1E,WAAW,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,mCAAmC,CAAC;IACtE,WAAW,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,gCAAgC,CAAC;IACnE,aAAa,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,4BAA4B,CAAC;IAC5E,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,4BAA4B,CAAC;IACtE,YAAY,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,4BAA4B,CAAC;IAC1E,QAAQ,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,qBAAqB,CAAC;CAC3E,CAAC,CAAC;AAEH;;GAEG;AACH,MAAM,uBAAuB,GAAG,CAAC,CAAC,MAAM,CAAC;IACvC,cAAc,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,4DAA4D,CAAC;IACjH,UAAU,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,4CAA4C,CAAC;IACxF,UAAU,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,6CAA6C,CAAC;IACzF,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,oDAAoD,CAAC;IAC/F,WAAW,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,uBAAuB,CAAC;IAC/E,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,oEAAoE,CAAC;IACpH,aAAa,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,YAAY,CAAC,CAAC,CAAC,QAAQ,CAAC,mBAAmB,CAAC;IAC3F,UAAU,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,6BAA6B,CAAC;IAClF,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,0BAA0B,CAAC;IAC3E,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,YAAY,CAAC;IAChE,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,kBAAkB,CAAC;IACpE,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,oBAAoB,CAAC;IACxE,UAAU,EAAE,CAAC,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,+BAA+B,CAAC;IACnG,iBAAiB,EAAE,CAAC,CAAC,KAAK,CAAC,sBAAsB,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,uBAAuB,CAAC;IACxG,gBAAgB,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,WAAW,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,yBAAyB,CAAC;IAC/F,QAAQ,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,iBAAiB,CAAC;CACvE,CAAC,CAAC,MAAM,CACP,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,UAAU,KAAK,SAAS,IAAI,IAAI,CAAC,UAAU,KAAK,SAAS,EACtE,EAAE,OAAO,EAAE,sDAAsD,EAAE,CACpE,CAAC;AAEF;;GAEG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAG,CAAC,CAAC,MAAM,CAAC;IAC9C,kCAAkC;IAClC,UAAU,EAAE,uBAAuB,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,6BAA6B,CAAC;IAEtF,aAAa;IACb,WAAW,EAAE,CAAC,CAAC,KAAK,CAAC,uBAAuB,CAAC,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC,QAAQ,EAAE;SACzE,QAAQ,CAAC,6BAA6B,cAAc,GAAG,CAAC;IAE3D,kDAAkD;IAClD,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,8CAA8C,CAAC;IACzF,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,wCAAwC,CAAC;CACjF,CAAC,CAAC,MAAM,CACP,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,UAAU,KAAK,SAAS,IAAI,CAAC,IAAI,CAAC,WAAW,KAAK,SAAS,IAAI,IAAI,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,EACxG,EAAE,OAAO,EAAE,oDAAoD,EAAE,CAClE,CAAC;AAmBF;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,KAA6B;IAE7B,MAAM,OAAO,GAAwB;QACnC,YAAY,EAAE,aAAa;QAC3B,YAAY,EAAE,cAAc;KAC7B,CAAC;IAEF,+BAA+B;IAC/B,IAAI,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;QACpC,sDAAsD;QACtD,yDAAyD;QACzD,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,UAAU,IAAI,KAAK,CAAC,WAAW,CAAC,CAAC;QACtE,MAAM,MAAM,GAAG,mBAAmB,CAAC,OAAO,EAAE,KAAK,CAAC,SAAS,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;QAC3E,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;YAClB,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,OAAO,EAAE,kCAAkC,MAAM,CAAC,KAAK,EAAE;aAC1D,CAAC;QACJ,CAAC;IACH,CAAC;IAED,4BAA4B;IAC5B,IAAI,KAAK,CAAC,UAAU,EAAE,CAAC;QACrB,MAAM,MAAM,GAAG,MAAM,iBAAiB,CAAC,KAAK,CAAC,UAAsC,EAAE,OAAO,CAAC,CAAC;QAC9F,OAAO;YACL,OAAO,EAAE,MAAM,CAAC,OAAO;YACvB,OAAO,EAAE,MAAM,CAAC,OAAO;YACvB,cAAc,EAAE,MAAM,CAAC,cAAc;YACrC,MAAM,EAAE,MAAM,CAAC,MAAM;SACtB,CAAC;IACJ,CAAC;IAED,gBAAgB;IAChB,IAAI,KAAK,CAAC,WAAW,IAAI,KAAK,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtD,MAAM,KAAK,GAAwB;YACjC,WAAW,EAAE,KAAK,CAAC,WAAyC;SAC7D,CAAC;QACF,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QAClD,OAAO;YACL,OAAO,EAAE,MAAM,CAAC,OAAO;YACvB,OAAO,EAAE,MAAM,CAAC,OAAO;YACvB,cAAc,EAAE,MAAM,CAAC,cAAc;YACrC,MAAM,EAAE,MAAM,CAAC,MAAM;SACtB,CAAC;IACJ,CAAC;IAED,OAAO;QACL,OAAO,EAAE,KAAK;QACd,OAAO,EAAE,yBAAyB;KACnC,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,MAAM,qBAAqB,GAAG;IACnC,IAAI,EAAE,wBAAwB;IAC9B,WAAW,EAAE,6GAA6G;IAC1H,WAAW,EAAE,uBAAuB;IACpC,OAAO,EAAE,iBAAiB;CAC3B,CAAC"}
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Tests for inject-evaluations tool
3
+ */
4
+ export {};
5
+ //# sourceMappingURL=inject-evaluations.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"inject-evaluations.test.d.ts","sourceRoot":"","sources":["../../src/tools/inject-evaluations.test.ts"],"names":[],"mappings":"AAAA;;GAEG"}
@@ -0,0 +1,359 @@
1
+ /**
2
+ * Tests for inject-evaluations tool
3
+ */
4
+ import { describe, it, beforeEach } from 'node:test';
5
+ import assert from 'node:assert';
6
+ import { createHmac } from 'crypto';
7
+ import { mkdirSync, rmSync } from 'fs';
8
+ import { join } from 'path';
9
+ import { tmpdir } from 'os';
10
+ import { injectEvaluations, injectEvaluationsSchema, injectEvaluationsTool, } from './inject-evaluations.js';
11
+ import { validateToolDefinition } from '../test-helpers/tool-validators.js';
12
+ import { resetHookStats } from '../lib/evaluation-hooks.js';
13
+ // Test helper for creating temp directories
14
+ function createTempDir() {
15
+ const dir = join(tmpdir(), `inject-eval-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
16
+ mkdirSync(dir, { recursive: true });
17
+ return dir;
18
+ }
19
+ function removeTempDir(dir) {
20
+ try {
21
+ rmSync(dir, { recursive: true, force: true });
22
+ }
23
+ catch {
24
+ // Ignore cleanup errors
25
+ }
26
+ }
27
+ describe('inject-evaluations', () => {
28
+ beforeEach(() => {
29
+ resetHookStats();
30
+ });
31
+ describe('tool definition', () => {
32
+ validateToolDefinition(injectEvaluationsTool, 'obs_inject_evaluations', injectEvaluationsSchema, injectEvaluations, ['inject', 'evaluations']);
33
+ });
34
+ describe('schema validation', () => {
35
+ it('should require evaluation or evaluations', () => {
36
+ assert.throws(() => {
37
+ injectEvaluationsSchema.parse({});
38
+ });
39
+ });
40
+ it('should accept single evaluation', () => {
41
+ const result = injectEvaluationsSchema.parse({
42
+ evaluation: {
43
+ evaluationName: 'test',
44
+ scoreValue: 0.8,
45
+ evaluator: 'test-evaluator',
46
+ evaluatorType: 'llm',
47
+ },
48
+ });
49
+ assert.ok(result.evaluation);
50
+ });
51
+ it('should accept evaluations array', () => {
52
+ const result = injectEvaluationsSchema.parse({
53
+ evaluations: [
54
+ {
55
+ evaluationName: 'test',
56
+ scoreValue: 0.8,
57
+ evaluator: 'test-evaluator',
58
+ evaluatorType: 'llm',
59
+ },
60
+ ],
61
+ });
62
+ assert.ok(result.evaluations);
63
+ assert.strictEqual(result.evaluations.length, 1);
64
+ });
65
+ it('should require scoreValue or scoreLabel', () => {
66
+ assert.throws(() => {
67
+ injectEvaluationsSchema.parse({
68
+ evaluation: {
69
+ evaluationName: 'test',
70
+ evaluator: 'test-evaluator',
71
+ evaluatorType: 'llm',
72
+ },
73
+ });
74
+ });
75
+ });
76
+ it('should accept scoreLabel without scoreValue', () => {
77
+ const result = injectEvaluationsSchema.parse({
78
+ evaluation: {
79
+ evaluationName: 'test',
80
+ scoreLabel: 'pass',
81
+ evaluator: 'test-evaluator',
82
+ evaluatorType: 'llm',
83
+ },
84
+ });
85
+ assert.ok(result.evaluation);
86
+ assert.strictEqual(result.evaluation.scoreLabel, 'pass');
87
+ });
88
+ it('should enforce evaluatorType enum', () => {
89
+ assert.throws(() => {
90
+ injectEvaluationsSchema.parse({
91
+ evaluation: {
92
+ evaluationName: 'test',
93
+ scoreValue: 0.8,
94
+ evaluator: 'test-evaluator',
95
+ evaluatorType: 'invalid',
96
+ },
97
+ });
98
+ });
99
+ });
100
+ it('should accept all valid evaluator types', () => {
101
+ const types = ['llm', 'human', 'rule', 'classifier'];
102
+ for (const evaluatorType of types) {
103
+ const result = injectEvaluationsSchema.parse({
104
+ evaluation: {
105
+ evaluationName: 'test',
106
+ scoreValue: 0.8,
107
+ evaluator: 'test-evaluator',
108
+ evaluatorType,
109
+ },
110
+ });
111
+ assert.strictEqual(result.evaluation?.evaluatorType, evaluatorType);
112
+ }
113
+ });
114
+ it('should enforce max batch size', () => {
115
+ assert.throws(() => {
116
+ injectEvaluationsSchema.parse({
117
+ evaluations: Array(101).fill({
118
+ evaluationName: 'test',
119
+ scoreValue: 0.8,
120
+ evaluator: 'test-evaluator',
121
+ evaluatorType: 'llm',
122
+ }),
123
+ });
124
+ });
125
+ });
126
+ it('should accept optional signature fields', () => {
127
+ const result = injectEvaluationsSchema.parse({
128
+ evaluation: {
129
+ evaluationName: 'test',
130
+ scoreValue: 0.8,
131
+ evaluator: 'test-evaluator',
132
+ evaluatorType: 'llm',
133
+ },
134
+ signature: 'sha256=abc123',
135
+ secret: 'my-secret',
136
+ });
137
+ assert.strictEqual(result.signature, 'sha256=abc123');
138
+ assert.strictEqual(result.secret, 'my-secret');
139
+ });
140
+ it('should accept stepScores', () => {
141
+ const result = injectEvaluationsSchema.parse({
142
+ evaluation: {
143
+ evaluationName: 'test',
144
+ scoreValue: 0.8,
145
+ evaluator: 'test-evaluator',
146
+ evaluatorType: 'llm',
147
+ stepScores: [
148
+ { step: 0, score: 0.9 },
149
+ { step: 'reasoning', score: 0.8, explanation: 'Good logic' },
150
+ ],
151
+ },
152
+ });
153
+ assert.strictEqual(result.evaluation?.stepScores?.length, 2);
154
+ });
155
+ it('should accept toolVerifications', () => {
156
+ const result = injectEvaluationsSchema.parse({
157
+ evaluation: {
158
+ evaluationName: 'test',
159
+ scoreValue: 0.8,
160
+ evaluator: 'test-evaluator',
161
+ evaluatorType: 'llm',
162
+ toolVerifications: [
163
+ {
164
+ toolName: 'search',
165
+ toolCorrect: true,
166
+ argsCorrect: true,
167
+ score: 1.0,
168
+ },
169
+ ],
170
+ },
171
+ });
172
+ assert.strictEqual(result.evaluation?.toolVerifications?.length, 1);
173
+ });
174
+ it('should enforce stepScores max', () => {
175
+ assert.throws(() => {
176
+ injectEvaluationsSchema.parse({
177
+ evaluation: {
178
+ evaluationName: 'test',
179
+ scoreValue: 0.8,
180
+ evaluator: 'test-evaluator',
181
+ evaluatorType: 'llm',
182
+ stepScores: Array(1001).fill({ step: 0, score: 0.5 }),
183
+ },
184
+ });
185
+ });
186
+ });
187
+ it('should enforce toolVerifications max', () => {
188
+ assert.throws(() => {
189
+ injectEvaluationsSchema.parse({
190
+ evaluation: {
191
+ evaluationName: 'test',
192
+ scoreValue: 0.8,
193
+ evaluator: 'test-evaluator',
194
+ evaluatorType: 'llm',
195
+ toolVerifications: Array(501).fill({
196
+ toolName: 'test',
197
+ toolCorrect: true,
198
+ argsCorrect: true,
199
+ score: 1.0,
200
+ }),
201
+ },
202
+ });
203
+ });
204
+ });
205
+ });
206
+ describe('signature verification', () => {
207
+ it('should verify correct signature format', () => {
208
+ const payload = JSON.stringify({
209
+ evaluationName: 'test',
210
+ scoreValue: 0.8,
211
+ evaluator: 'test-evaluator',
212
+ evaluatorType: 'llm',
213
+ });
214
+ const secret = 'test-secret';
215
+ const signature = 'sha256=' + createHmac('sha256', secret).update(payload).digest('hex');
216
+ assert.ok(signature.startsWith('sha256='));
217
+ assert.strictEqual(signature.length, 7 + 64);
218
+ });
219
+ it('should reject invalid signature in handler', async () => {
220
+ // Use an invalid signature (wrong length or format)
221
+ const result = await injectEvaluations({
222
+ evaluation: {
223
+ evaluationName: 'test',
224
+ scoreValue: 0.8,
225
+ evaluator: 'test-evaluator',
226
+ evaluatorType: 'llm',
227
+ },
228
+ signature: 'sha256=' + 'a'.repeat(64), // Valid format but wrong signature
229
+ secret: 'test-secret',
230
+ });
231
+ assert.strictEqual(result.success, false);
232
+ assert.ok(result.message.includes('Signature verification failed'));
233
+ });
234
+ });
235
+ describe('Agent-as-Judge fields', () => {
236
+ it('should accept agentId and agentName', () => {
237
+ const result = injectEvaluationsSchema.parse({
238
+ evaluation: {
239
+ evaluationName: 'task_completion',
240
+ scoreValue: 0.9,
241
+ evaluator: 'agent-judge',
242
+ evaluatorType: 'llm',
243
+ agentId: 'agent-123',
244
+ agentName: 'TestAgent',
245
+ },
246
+ });
247
+ assert.strictEqual(result.evaluation?.agentId, 'agent-123');
248
+ assert.strictEqual(result.evaluation?.agentName, 'TestAgent');
249
+ });
250
+ it('should accept trajectoryLength', () => {
251
+ const result = injectEvaluationsSchema.parse({
252
+ evaluation: {
253
+ evaluationName: 'efficiency',
254
+ scoreValue: 0.7,
255
+ evaluator: 'agent-judge',
256
+ evaluatorType: 'llm',
257
+ trajectoryLength: 5,
258
+ },
259
+ });
260
+ assert.strictEqual(result.evaluation?.trajectoryLength, 5);
261
+ });
262
+ it('should reject negative trajectoryLength', () => {
263
+ assert.throws(() => {
264
+ injectEvaluationsSchema.parse({
265
+ evaluation: {
266
+ evaluationName: 'efficiency',
267
+ scoreValue: 0.7,
268
+ evaluator: 'agent-judge',
269
+ evaluatorType: 'llm',
270
+ trajectoryLength: -1,
271
+ },
272
+ });
273
+ });
274
+ });
275
+ });
276
+ describe('metadata handling', () => {
277
+ it('should accept custom metadata', () => {
278
+ const result = injectEvaluationsSchema.parse({
279
+ evaluation: {
280
+ evaluationName: 'test',
281
+ scoreValue: 0.8,
282
+ evaluator: 'test-evaluator',
283
+ evaluatorType: 'llm',
284
+ metadata: {
285
+ source: 'external-api',
286
+ version: '1.0',
287
+ custom_field: 42,
288
+ },
289
+ },
290
+ });
291
+ assert.ok(result.evaluation?.metadata);
292
+ assert.strictEqual(result.evaluation?.metadata?.source, 'external-api');
293
+ });
294
+ });
295
+ describe('score normalization', () => {
296
+ it('should accept scoreUnit field', () => {
297
+ const result = injectEvaluationsSchema.parse({
298
+ evaluation: {
299
+ evaluationName: 'test',
300
+ scoreValue: 85,
301
+ scoreUnit: 'percentage',
302
+ evaluator: 'test-evaluator',
303
+ evaluatorType: 'llm',
304
+ },
305
+ });
306
+ assert.strictEqual(result.evaluation?.scoreValue, 85);
307
+ assert.strictEqual(result.evaluation?.scoreUnit, 'percentage');
308
+ });
309
+ });
310
+ describe('string field length limits', () => {
311
+ it('should enforce evaluationName max length', () => {
312
+ const result = injectEvaluationsSchema.safeParse({
313
+ evaluation: {
314
+ evaluationName: 'a'.repeat(257),
315
+ scoreValue: 0.8,
316
+ evaluator: 'test',
317
+ evaluatorType: 'llm',
318
+ },
319
+ });
320
+ assert.strictEqual(result.success, false);
321
+ });
322
+ it('should enforce evaluator max length', () => {
323
+ const result = injectEvaluationsSchema.safeParse({
324
+ evaluation: {
325
+ evaluationName: 'test',
326
+ scoreValue: 0.8,
327
+ evaluator: 'a'.repeat(257),
328
+ evaluatorType: 'llm',
329
+ },
330
+ });
331
+ assert.strictEqual(result.success, false);
332
+ });
333
+ it('should enforce agentId max length', () => {
334
+ const result = injectEvaluationsSchema.safeParse({
335
+ evaluation: {
336
+ evaluationName: 'test',
337
+ scoreValue: 0.8,
338
+ evaluator: 'test',
339
+ evaluatorType: 'llm',
340
+ agentId: 'a'.repeat(129),
341
+ },
342
+ });
343
+ assert.strictEqual(result.success, false);
344
+ });
345
+ it('should enforce explanation max length', () => {
346
+ const result = injectEvaluationsSchema.safeParse({
347
+ evaluation: {
348
+ evaluationName: 'test',
349
+ scoreValue: 0.8,
350
+ evaluator: 'test',
351
+ evaluatorType: 'llm',
352
+ explanation: 'a'.repeat(10241),
353
+ },
354
+ });
355
+ assert.strictEqual(result.success, false);
356
+ });
357
+ });
358
+ });
359
+ //# sourceMappingURL=inject-evaluations.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"inject-evaluations.test.js","sourceRoot":"","sources":["../../src/tools/inject-evaluations.test.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,UAAU,EAAa,MAAM,WAAW,CAAC;AAChE,OAAO,MAAM,MAAM,aAAa,CAAC;AACjC,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACpC,OAAO,EAAE,SAAS,EAAE,MAAM,EAAgB,MAAM,IAAI,CAAC;AACrD,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,EAAE,MAAM,EAAE,MAAM,IAAI,CAAC;AAE5B,OAAO,EACL,iBAAiB,EACjB,uBAAuB,EACvB,qBAAqB,GACtB,MAAM,yBAAyB,CAAC;AACjC,OAAO,EAAE,sBAAsB,EAAE,MAAM,oCAAoC,CAAC;AAC5E,OAAO,EAAE,cAAc,EAAgB,MAAM,4BAA4B,CAAC;AAE1E,4CAA4C;AAC5C,SAAS,aAAa;IACpB,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,EAAE,EAAE,oBAAoB,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IACpG,SAAS,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACpC,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,aAAa,CAAC,GAAW;IAChC,IAAI,CAAC;QACH,MAAM,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IAChD,CAAC;IAAC,MAAM,CAAC;QACP,wBAAwB;IAC1B,CAAC;AACH,CAAC;AAED,QAAQ,CAAC,oBAAoB,EAAE,GAAG,EAAE;IAClC,UAAU,CAAC,GAAG,EAAE;QACd,cAAc,EAAE,CAAC;IACnB,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,iBAAiB,EAAE,GAAG,EAAE;QAC/B,sBAAsB,CACpB,qBAAqB,EACrB,wBAAwB,EACxB,uBAAuB,EACvB,iBAAiB,EACjB,CAAC,QAAQ,EAAE,aAAa,CAAC,CAC1B,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;QACjC,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;YAClD,MAAM,CAAC,MAAM,CAAC,GAAG,EAAE;gBACjB,uBAAuB,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;YACpC,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;YACzC,MAAM,MAAM,GAAG,uBAAuB,CAAC,KAAK,CAAC;gBAC3C,UAAU,EAAE;oBACV,cAAc,EAAE,MAAM;oBACtB,UAAU,EAAE,GAAG;oBACf,SAAS,EAAE,gBAAgB;oBAC3B,aAAa,EAAE,KAAK;iBACrB;aACF,CAAC,CAAC;YACH,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;QAC/B,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;YACzC,MAAM,MAAM,GAAG,uBAAuB,CAAC,KAAK,CAAC;gBAC3C,WAAW,EAAE;oBACX;wBACE,cAAc,EAAE,MAAM;wBACtB,UAAU,EAAE,GAAG;wBACf,SAAS,EAAE,gBAAgB;wBAC3B,aAAa,EAAE,KAAK;qBACrB;iBACF;aACF,CAAC,CAAC;YACH,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;YAC9B,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,WAAW,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QACnD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,yCAAyC,EAAE,GAAG,EAAE;YACjD,MAAM,CAAC,MAAM,CAAC,GAAG,EAAE;gBACjB,uBAAuB,CAAC,KAAK,CAAC;oBAC5B,UAAU,EAAE;wBACV,cAAc,EAAE,MAAM;wBACtB,SAAS,EAAE,gBAAgB;wBAC3B,aAAa,EAAE,KAAK;qBACrB;iBACF,CAAC,CAAC;YACL,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,6CAA6C,EAAE,GAAG,EAAE;YACrD,MAAM,MAAM,GAAG,uBAAuB,CAAC,KAAK,CAAC;gBAC3C,UAAU,EAAE;oBACV,cAAc,EAAE,MAAM;oBACtB,UAAU,EAAE,MAAM;oBAClB,SAAS,EAAE,gBAAgB;oBAC3B,aAAa,EAAE,KAAK;iBACrB;aACF,CAAC,CAAC;YACH,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;YAC7B,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,UAAU,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;QAC3D,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,mCAAmC,EAAE,GAAG,EAAE;YAC3C,MAAM,CAAC,MAAM,CAAC,GAAG,EAAE;gBACjB,uBAAuB,CAAC,KAAK,CAAC;oBAC5B,UAAU,EAAE;wBACV,cAAc,EAAE,MAAM;wBACtB,UAAU,EAAE,GAAG;wBACf,SAAS,EAAE,gBAAgB;wBAC3B,aAAa,EAAE,SAAS;qBACzB;iBACF,CAAC,CAAC;YACL,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,yCAAyC,EAAE,GAAG,EAAE;YACjD,MAAM,KAAK,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,YAAY,CAAC,CAAC;YACrD,KAAK,MAAM,aAAa,IAAI,KAAK,EAAE,CAAC;gBAClC,MAAM,MAAM,GAAG,uBAAuB,CAAC,KAAK,CAAC;oBAC3C,UAAU,EAAE;wBACV,cAAc,EAAE,MAAM;wBACtB,UAAU,EAAE,GAAG;wBACf,SAAS,EAAE,gBAAgB;wBAC3B,aAAa;qBACd;iBACF,CAAC,CAAC;gBACH,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,UAAU,EAAE,aAAa,EAAE,aAAa,CAAC,CAAC;YACtE,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,+BAA+B,EAAE,GAAG,EAAE;YACvC,MAAM,CAAC,MAAM,CAAC,GAAG,EAAE;gBACjB,uBAAuB,CAAC,KAAK,CAAC;oBAC5B,WAAW,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;wBAC3B,cAAc,EAAE,MAAM;wBACtB,UAAU,EAAE,GAAG;wBACf,SAAS,EAAE,gBAAgB;wBAC3B,aAAa,EAAE,KAAK;qBACrB,CAAC;iBACH,CAAC,CAAC;YACL,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,yCAAyC,EAAE,GAAG,EAAE;YACjD,MAAM,MAAM,GAAG,uBAAuB,CAAC,KAAK,CAAC;gBAC3C,UAAU,EAAE;oBACV,cAAc,EAAE,MAAM;oBACtB,UAAU,EAAE,GAAG;oBACf,SAAS,EAAE,gBAAgB;oBAC3B,aAAa,EAAE,KAAK;iBACrB;gBACD,SAAS,EAAE,eAAe;gBAC1B,MAAM,EAAE,WAAW;aACpB,CAAC,CAAC;YACH,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,SAAS,EAAE,eAAe,CAAC,CAAC;YACtD,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC;QACjD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,0BAA0B,EAAE,GAAG,EAAE;YAClC,MAAM,MAAM,GAAG,uBAAuB,CAAC,KAAK,CAAC;gBAC3C,UAAU,EAAE;oBACV,cAAc,EAAE,MAAM;oBACtB,UAAU,EAAE,GAAG;oBACf,SAAS,EAAE,gBAAgB;oBAC3B,aAAa,EAAE,KAAK;oBACpB,UAAU,EAAE;wBACV,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE;wBACvB,EAAE,IAAI,EAAE,WAAW,EAAE,KAAK,EAAE,GAAG,EAAE,WAAW,EAAE,YAAY,EAAE;qBAC7D;iBACF;aACF,CAAC,CAAC;YACH,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,UAAU,EAAE,UAAU,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC;QAC/D,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;YACzC,MAAM,MAAM,GAAG,uBAAuB,CAAC,KAAK,CAAC;gBAC3C,UAAU,EAAE;oBACV,cAAc,EAAE,MAAM;oBACtB,UAAU,EAAE,GAAG;oBACf,SAAS,EAAE,gBAAgB;oBAC3B,aAAa,EAAE,KAAK;oBACpB,iBAAiB,EAAE;wBACjB;4BACE,QAAQ,EAAE,QAAQ;4BAClB,WAAW,EAAE,IAAI;4BACjB,WAAW,EAAE,IAAI;4BACjB,KAAK,EAAE,GAAG;yBACX;qBACF;iBACF;aACF,CAAC,CAAC;YACH,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,UAAU,EAAE,iBAAiB,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC;QACtE,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,+BAA+B,EAAE,GAAG,EAAE;YACvC,MAAM,CAAC,MAAM,CAAC,GAAG,EAAE;gBACjB,uBAAuB,CAAC,KAAK,CAAC;oBAC5B,UAAU,EAAE;wBACV,cAAc,EAAE,MAAM;wBACtB,UAAU,EAAE,GAAG;wBACf,SAAS,EAAE,gBAAgB;wBAC3B,aAAa,EAAE,KAAK;wBACpB,UAAU,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC;qBACtD;iBACF,CAAC,CAAC;YACL,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,sCAAsC,EAAE,GAAG,EAAE;YAC9C,MAAM,CAAC,MAAM,CAAC,GAAG,EAAE;gBACjB,uBAAuB,CAAC,KAAK,CAAC;oBAC5B,UAAU,EAAE;wBACV,cAAc,EAAE,MAAM;wBACtB,UAAU,EAAE,GAAG;wBACf,SAAS,EAAE,gBAAgB;wBAC3B,aAAa,EAAE,KAAK;wBACpB,iBAAiB,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC;4BACjC,QAAQ,EAAE,MAAM;4BAChB,WAAW,EAAE,IAAI;4BACjB,WAAW,EAAE,IAAI;4BACjB,KAAK,EAAE,GAAG;yBACX,CAAC;qBACH;iBACF,CAAC,CAAC;YACL,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,wBAAwB,EAAE,GAAG,EAAE;QACtC,EAAE,CAAC,wCAAwC,EAAE,GAAG,EAAE;YAChD,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC;gBAC7B,cAAc,EAAE,MAAM;gBACtB,UAAU,EAAE,GAAG;gBACf,SAAS,EAAE,gBAAgB;gBAC3B,aAAa,EAAE,KAAK;aACrB,CAAC,CAAC;YACH,MAAM,MAAM,GAAG,aAAa,CAAC;YAC7B,MAAM,SAAS,GAAG,SAAS,GAAG,UAAU,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YAEzF,MAAM,CAAC,EAAE,CAAC,SAAS,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,CAAC;YAC3C,MAAM,CAAC,WAAW,CAAC,SAAS,CAAC,MAAM,EAAE,CAAC,GAAG,EAAE,CAAC,CAAC;QAC/C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,4CAA4C,EAAE,KAAK,IAAI,EAAE;YAC1D,oDAAoD;YACpD,MAAM,MAAM,GAAG,MAAM,iBAAiB,CAAC;gBACrC,UAAU,EAAE;oBACV,cAAc,EAAE,MAAM;oBACtB,UAAU,EAAE,GAAG;oBACf,SAAS,EAAE,gBAAgB;oBAC3B,aAAa,EAAE,KAAK;iBACrB;gBACD,SAAS,EAAE,SAAS,GAAG,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,mCAAmC;gBAC1E,MAAM,EAAE,aAAa;aACtB,CAAC,CAAC;YAEH,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;YAC1C,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,+BAA+B,CAAC,CAAC,CAAC;QACtE,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,uBAAuB,EAAE,GAAG,EAAE;QACrC,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;YAC7C,MAAM,MAAM,GAAG,uBAAuB,CAAC,KAAK,CAAC;gBAC3C,UAAU,EAAE;oBACV,cAAc,EAAE,iBAAiB;oBACjC,UAAU,EAAE,GAAG;oBACf,SAAS,EAAE,aAAa;oBACxB,aAAa,EAAE,KAAK;oBACpB,OAAO,EAAE,WAAW;oBACpB,SAAS,EAAE,WAAW;iBACvB;aACF,CAAC,CAAC;YACH,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,UAAU,EAAE,OAAO,EAAE,WAAW,CAAC,CAAC;YAC5D,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,UAAU,EAAE,SAAS,EAAE,WAAW,CAAC,CAAC;QAChE,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gCAAgC,EAAE,GAAG,EAAE;YACxC,MAAM,MAAM,GAAG,uBAAuB,CAAC,KAAK,CAAC;gBAC3C,UAAU,EAAE;oBACV,cAAc,EAAE,YAAY;oBAC5B,UAAU,EAAE,GAAG;oBACf,SAAS,EAAE,aAAa;oBACxB,aAAa,EAAE,KAAK;oBACpB,gBAAgB,EAAE,CAAC;iBACpB;aACF,CAAC,CAAC;YACH,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,UAAU,EAAE,gBAAgB,EAAE,CAAC,CAAC,CAAC;QAC7D,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,yCAAyC,EAAE,GAAG,EAAE;YACjD,MAAM,CAAC,MAAM,CAAC,GAAG,EAAE;gBACjB,uBAAuB,CAAC,KAAK,CAAC;oBAC5B,UAAU,EAAE;wBACV,cAAc,EAAE,YAAY;wBAC5B,UAAU,EAAE,GAAG;wBACf,SAAS,EAAE,aAAa;wBACxB,aAAa,EAAE,KAAK;wBACpB,gBAAgB,EAAE,CAAC,CAAC;qBACrB;iBACF,CAAC,CAAC;YACL,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;QACjC,EAAE,CAAC,+BAA+B,EAAE,GAAG,EAAE;YACvC,MAAM,MAAM,GAAG,uBAAuB,CAAC,KAAK,CAAC;gBAC3C,UAAU,EAAE;oBACV,cAAc,EAAE,MAAM;oBACtB,UAAU,EAAE,GAAG;oBACf,SAAS,EAAE,gBAAgB;oBAC3B,aAAa,EAAE,KAAK;oBACpB,QAAQ,EAAE;wBACR,MAAM,EAAE,cAAc;wBACtB,OAAO,EAAE,KAAK;wBACd,YAAY,EAAE,EAAE;qBACjB;iBACF;aACF,CAAC,CAAC;YACH,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;YACvC,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,UAAU,EAAE,QAAQ,EAAE,MAAM,EAAE,cAAc,CAAC,CAAC;QAC1E,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,qBAAqB,EAAE,GAAG,EAAE;QACnC,EAAE,CAAC,+BAA+B,EAAE,GAAG,EAAE;YACvC,MAAM,MAAM,GAAG,uBAAuB,CAAC,KAAK,CAAC;gBAC3C,UAAU,EAAE;oBACV,cAAc,EAAE,MAAM;oBACtB,UAAU,EAAE,EAAE;oBACd,SAAS,EAAE,YAAY;oBACvB,SAAS,EAAE,gBAAgB;oBAC3B,aAAa,EAAE,KAAK;iBACrB;aACF,CAAC,CAAC;YACH,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,UAAU,EAAE,UAAU,EAAE,EAAE,CAAC,CAAC;YACtD,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,UAAU,EAAE,SAAS,EAAE,YAAY,CAAC,CAAC;QACjE,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,4BAA4B,EAAE,GAAG,EAAE;QAC1C,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;YAClD,MAAM,MAAM,GAAG,uBAAuB,CAAC,SAAS,CAAC;gBAC/C,UAAU,EAAE;oBACV,cAAc,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC;oBAC/B,UAAU,EAAE,GAAG;oBACf,SAAS,EAAE,MAAM;oBACjB,aAAa,EAAE,KAAK;iBACrB;aACF,CAAC,CAAC;YACH,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;QAC5C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;YAC7C,MAAM,MAAM,GAAG,uBAAuB,CAAC,SAAS,CAAC;gBAC/C,UAAU,EAAE;oBACV,cAAc,EAAE,MAAM;oBACtB,UAAU,EAAE,GAAG;oBACf,SAAS,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC;oBAC1B,aAAa,EAAE,KAAK;iBACrB;aACF,CAAC,CAAC;YACH,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;QAC5C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,mCAAmC,EAAE,GAAG,EAAE;YAC3C,MAAM,MAAM,GAAG,uBAAuB,CAAC,SAAS,CAAC;gBAC/C,UAAU,EAAE;oBACV,cAAc,EAAE,MAAM;oBACtB,UAAU,EAAE,GAAG;oBACf,SAAS,EAAE,MAAM;oBACjB,aAAa,EAAE,KAAK;oBACpB,OAAO,EAAE,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC;iBACzB;aACF,CAAC,CAAC;YACH,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;QAC5C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,uCAAuC,EAAE,GAAG,EAAE;YAC/C,MAAM,MAAM,GAAG,uBAAuB,CAAC,SAAS,CAAC;gBAC/C,UAAU,EAAE;oBACV,cAAc,EAAE,MAAM;oBACtB,UAAU,EAAE,GAAG;oBACf,SAAS,EAAE,MAAM;oBACjB,aAAa,EAAE,KAAK;oBACpB,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC;iBAC/B;aACF,CAAC,CAAC;YACH,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;QAC5C,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
@@ -21,9 +21,14 @@ export declare const queryEvaluationsSchema: z.ZodObject<{
21
21
  evaluator: z.ZodOptional<z.ZodString>;
22
22
  evaluatorType: z.ZodOptional<z.ZodEnum<["llm", "human", "rule", "classifier"]>>;
23
23
  responseId: z.ZodOptional<z.ZodString>;
24
+ agentId: z.ZodOptional<z.ZodString>;
25
+ agentName: z.ZodOptional<z.ZodString>;
24
26
  }, "strip", z.ZodTypeAny, {
25
27
  limit: number;
26
28
  traceId?: string | undefined;
29
+ sessionId?: string | undefined;
30
+ agentId?: string | undefined;
31
+ agentName?: string | undefined;
27
32
  aggregation?: "avg" | "min" | "max" | "count" | "p50" | "p95" | "p99" | undefined;
28
33
  groupBy?: ("evaluationName" | "scoreLabel" | "evaluator")[] | undefined;
29
34
  evaluationName?: string | undefined;
@@ -33,12 +38,14 @@ export declare const queryEvaluationsSchema: z.ZodObject<{
33
38
  scoreMax?: number | undefined;
34
39
  evaluatorType?: "llm" | "human" | "rule" | "classifier" | undefined;
35
40
  responseId?: string | undefined;
36
- sessionId?: string | undefined;
37
41
  endDate?: string | undefined;
38
42
  startDate?: string | undefined;
39
43
  }, {
40
44
  limit?: number | undefined;
41
45
  traceId?: string | undefined;
46
+ sessionId?: string | undefined;
47
+ agentId?: string | undefined;
48
+ agentName?: string | undefined;
42
49
  aggregation?: "avg" | "min" | "max" | "count" | "p50" | "p95" | "p99" | undefined;
43
50
  groupBy?: ("evaluationName" | "scoreLabel" | "evaluator")[] | undefined;
44
51
  evaluationName?: string | undefined;
@@ -48,7 +55,6 @@ export declare const queryEvaluationsSchema: z.ZodObject<{
48
55
  scoreMax?: number | undefined;
49
56
  evaluatorType?: "llm" | "human" | "rule" | "classifier" | undefined;
50
57
  responseId?: string | undefined;
51
- sessionId?: string | undefined;
52
58
  endDate?: string | undefined;
53
59
  startDate?: string | undefined;
54
60
  }>;
@@ -80,6 +86,12 @@ export interface EvaluationResponse {
80
86
  traceId?: string;
81
87
  /** Session ID for session-scoped queries */
82
88
  sessionId?: string;
89
+ /** Subject agent ID being evaluated (gen_ai.agent.id) */
90
+ agentId?: string;
91
+ /** Subject agent name being evaluated (gen_ai.agent.name) */
92
+ agentName?: string;
93
+ /** Number of steps in agent trajectory */
94
+ trajectoryLength?: number;
83
95
  }
84
96
  /** Summary data structure */
85
97
  interface EvaluationSummary {
@@ -126,6 +138,9 @@ export declare function queryEvaluations(rawInput: QueryEvaluationsInput, backen
126
138
  responseId: string | undefined;
127
139
  traceId: string | undefined;
128
140
  sessionId: string | undefined;
141
+ agentId: string | undefined;
142
+ agentName: string | undefined;
143
+ trajectoryLength: number | undefined;
129
144
  }[];
130
145
  truncated?: boolean | undefined;
131
146
  originalCount?: number | undefined;
@@ -149,9 +164,14 @@ export declare const queryEvaluationsTool: {
149
164
  evaluator: z.ZodOptional<z.ZodString>;
150
165
  evaluatorType: z.ZodOptional<z.ZodEnum<["llm", "human", "rule", "classifier"]>>;
151
166
  responseId: z.ZodOptional<z.ZodString>;
167
+ agentId: z.ZodOptional<z.ZodString>;
168
+ agentName: z.ZodOptional<z.ZodString>;
152
169
  }, "strip", z.ZodTypeAny, {
153
170
  limit: number;
154
171
  traceId?: string | undefined;
172
+ sessionId?: string | undefined;
173
+ agentId?: string | undefined;
174
+ agentName?: string | undefined;
155
175
  aggregation?: "avg" | "min" | "max" | "count" | "p50" | "p95" | "p99" | undefined;
156
176
  groupBy?: ("evaluationName" | "scoreLabel" | "evaluator")[] | undefined;
157
177
  evaluationName?: string | undefined;
@@ -161,12 +181,14 @@ export declare const queryEvaluationsTool: {
161
181
  scoreMax?: number | undefined;
162
182
  evaluatorType?: "llm" | "human" | "rule" | "classifier" | undefined;
163
183
  responseId?: string | undefined;
164
- sessionId?: string | undefined;
165
184
  endDate?: string | undefined;
166
185
  startDate?: string | undefined;
167
186
  }, {
168
187
  limit?: number | undefined;
169
188
  traceId?: string | undefined;
189
+ sessionId?: string | undefined;
190
+ agentId?: string | undefined;
191
+ agentName?: string | undefined;
170
192
  aggregation?: "avg" | "min" | "max" | "count" | "p50" | "p95" | "p99" | undefined;
171
193
  groupBy?: ("evaluationName" | "scoreLabel" | "evaluator")[] | undefined;
172
194
  evaluationName?: string | undefined;
@@ -176,7 +198,6 @@ export declare const queryEvaluationsTool: {
176
198
  scoreMax?: number | undefined;
177
199
  evaluatorType?: "llm" | "human" | "rule" | "classifier" | undefined;
178
200
  responseId?: string | undefined;
179
- sessionId?: string | undefined;
180
201
  endDate?: string | undefined;
181
202
  startDate?: string | undefined;
182
203
  }>;
@@ -1 +1 @@
1
- {"version":3,"file":"query-evaluations.d.ts","sourceRoot":"","sources":["../../src/tools/query-evaluations.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAIxB,OAAO,KAAK,EAAE,gBAAgB,EAAE,qBAAqB,EAAE,sBAAsB,EAAE,aAAa,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AAc7I,eAAO,MAAM,sBAAsB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAcjC,CAAC;AAEH,MAAM,MAAM,qBAAqB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,sBAAsB,CAAC,CAAC;AAG3E,MAAM,WAAW,uBAAuB;IACtC,YAAY,CAAC,EAAE,gBAAgB,CAAC;CACjC;AAED,iCAAiC;AACjC,MAAM,WAAW,kBAAkB;IACjC,sCAAsC;IACtC,SAAS,EAAE,MAAM,CAAC;IAClB,sDAAsD;IACtD,cAAc,EAAE,MAAM,CAAC;IACvB,oDAAoD;IACpD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iEAAiE;IACjE,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,yDAAyD;IACzD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,4DAA4D;IAC5D,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,uDAAuD;IACvD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,uDAAuD;IACvD,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,uDAAuD;IACvD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,+BAA+B;IAC/B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,4CAA4C;IAC5C,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAuHD,6BAA6B;AAC7B,UAAU,iBAAiB;IACzB,gBAAgB,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACzC,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACrC,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACpC,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,8BAA8B;AAC9B,UAAU,iBAAiB;IACzB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;;GAGG;AACH,wBAAgB,YAAY,CAAC,WAAW,EAAE,gBAAgB,EAAE,GAAG,iBAAiB,CA6B/E;AAED;;;GAGG;AACH,wBAAgB,iBAAiB,CAC/B,WAAW,EAAE,gBAAgB,EAAE,EAC/B,WAAW,EAAE,qBAAqB,EAClC,OAAO,EAAE,sBAAsB,EAAE,GAChC,iBAAiB,EAAE,CAiDrB;AAED,wBAAsB,gBAAgB,CAAC,QAAQ,EAAE,qBAAqB,EAAE,cAAc,CAAC,EAAE,uBAAuB;;;0BA7G5F,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC;sBAC1B,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC;qBACvB,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC;uBACpB,MAAM;;;;;;;;;;;;;;;;;;GA6QtB;AAED,eAAO,MAAM,oBAAoB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAKhC,CAAC"}
1
+ {"version":3,"file":"query-evaluations.d.ts","sourceRoot":"","sources":["../../src/tools/query-evaluations.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAIxB,OAAO,KAAK,EAAE,gBAAgB,EAAE,qBAAqB,EAAE,sBAAsB,EAAE,aAAa,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AAc7I,eAAO,MAAM,sBAAsB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EAiBjC,CAAC;AAEH,MAAM,MAAM,qBAAqB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,sBAAsB,CAAC,CAAC;AAG3E,MAAM,WAAW,uBAAuB;IACtC,YAAY,CAAC,EAAE,gBAAgB,CAAC;CACjC;AAED,iCAAiC;AACjC,MAAM,WAAW,kBAAkB;IACjC,sCAAsC;IACtC,SAAS,EAAE,MAAM,CAAC;IAClB,sDAAsD;IACtD,cAAc,EAAE,MAAM,CAAC;IACvB,oDAAoD;IACpD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iEAAiE;IACjE,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,yDAAyD;IACzD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,4DAA4D;IAC5D,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,uDAAuD;IACvD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,uDAAuD;IACvD,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,uDAAuD;IACvD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,+BAA+B;IAC/B,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,4CAA4C;IAC5C,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB,yDAAyD;IACzD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,6DAA6D;IAC7D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0CAA0C;IAC1C,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAuHD,6BAA6B;AAC7B,UAAU,iBAAiB;IACzB,gBAAgB,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACzC,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACrC,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACpC,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,8BAA8B;AAC9B,UAAU,iBAAiB;IACzB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;;GAGG;AACH,wBAAgB,YAAY,CAAC,WAAW,EAAE,gBAAgB,EAAE,GAAG,iBAAiB,CA6B/E;AAED;;;GAGG;AACH,wBAAgB,iBAAiB,CAC/B,WAAW,EAAE,gBAAgB,EAAE,EAC/B,WAAW,EAAE,qBAAqB,EAClC,OAAO,EAAE,sBAAsB,EAAE,GAChC,iBAAiB,EAAE,CAiDrB;AAED,wBAAsB,gBAAgB,CAAC,QAAQ,EAAE,qBAAqB,EAAE,cAAc,CAAC,EAAE,uBAAuB;;;0BA7G5F,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC;sBAC1B,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC;qBACvB,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC;uBACpB,MAAM;;;;;;;;;;;;;;;;;;;;;GAoRtB;AAED,eAAO,MAAM,oBAAoB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAKhC,CAAC"}