agentv 2.6.0 → 2.7.1-next.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/README.md +128 -33
  2. package/dist/chunk-3L2L5GIL.js +51 -0
  3. package/dist/{chunk-BKMQNEUD.js.map → chunk-3L2L5GIL.js.map} +1 -1
  4. package/dist/{chunk-BKMQNEUD.js → chunk-5H446C7X.js} +2 -45
  5. package/dist/{chunk-LJVS3JAK.js → chunk-BL4PVUAT.js} +6 -4
  6. package/dist/{chunk-LJVS3JAK.js.map → chunk-BL4PVUAT.js.map} +1 -1
  7. package/dist/{chunk-MGK6HHRR.js → chunk-BWLYFF5N.js} +9813 -11237
  8. package/dist/chunk-BWLYFF5N.js.map +1 -0
  9. package/dist/chunk-C5GOHBQM.js +84 -0
  10. package/dist/chunk-C5GOHBQM.js.map +1 -0
  11. package/dist/chunk-EJEG3DU2.js +5476 -0
  12. package/dist/chunk-EJEG3DU2.js.map +1 -0
  13. package/dist/chunk-FV32QHPB.js +565 -0
  14. package/dist/chunk-FV32QHPB.js.map +1 -0
  15. package/dist/chunk-H5FFZCKI.js +2957 -0
  16. package/dist/chunk-H5FFZCKI.js.map +1 -0
  17. package/dist/chunk-JK6V4KVD.js +114 -0
  18. package/dist/chunk-JK6V4KVD.js.map +1 -0
  19. package/dist/chunk-LRULMAAA.js +1711 -0
  20. package/dist/chunk-LRULMAAA.js.map +1 -0
  21. package/dist/chunk-SR4I5KET.js +1238 -0
  22. package/dist/chunk-SR4I5KET.js.map +1 -0
  23. package/dist/chunk-VQ2ZO7XJ.js +2098 -0
  24. package/dist/chunk-VQ2ZO7XJ.js.map +1 -0
  25. package/dist/chunk-XALGXSKB.js +21 -0
  26. package/dist/chunk-XALGXSKB.js.map +1 -0
  27. package/dist/cli.js +8 -2
  28. package/dist/cli.js.map +1 -1
  29. package/dist/dist-R3OCWGXH.js +257 -0
  30. package/dist/dist-R3OCWGXH.js.map +1 -0
  31. package/dist/esm-5Q4BZALM-5REQWAUV.js +924 -0
  32. package/dist/esm-5Q4BZALM-5REQWAUV.js.map +1 -0
  33. package/dist/esm-DX3WQKEN.js +32 -0
  34. package/dist/esm-DX3WQKEN.js.map +1 -0
  35. package/dist/esm-QNEMCJPL.js +933 -0
  36. package/dist/esm-QNEMCJPL.js.map +1 -0
  37. package/dist/esm-R77SNOF5.js +65 -0
  38. package/dist/esm-R77SNOF5.js.map +1 -0
  39. package/dist/esm-RVQPUGWH.js +1207 -0
  40. package/dist/esm-RVQPUGWH.js.map +1 -0
  41. package/dist/getMachineId-bsd-HSK5LZMG.js +41 -0
  42. package/dist/getMachineId-bsd-HSK5LZMG.js.map +1 -0
  43. package/dist/getMachineId-darwin-4DP6CCJV.js +41 -0
  44. package/dist/getMachineId-darwin-4DP6CCJV.js.map +1 -0
  45. package/dist/getMachineId-linux-44LJ5UJB.js +33 -0
  46. package/dist/getMachineId-linux-44LJ5UJB.js.map +1 -0
  47. package/dist/getMachineId-unsupported-NVK6IATM.js +24 -0
  48. package/dist/getMachineId-unsupported-NVK6IATM.js.map +1 -0
  49. package/dist/getMachineId-win-YZ36S7VA.js +43 -0
  50. package/dist/getMachineId-win-YZ36S7VA.js.map +1 -0
  51. package/dist/index.js +10 -2
  52. package/dist/interactive-33TCZXLF.js +333 -0
  53. package/dist/interactive-33TCZXLF.js.map +1 -0
  54. package/dist/otlp-json-file-exporter-77FDBRSY-EZAPHWP6.js +9 -0
  55. package/dist/otlp-json-file-exporter-77FDBRSY-EZAPHWP6.js.map +1 -0
  56. package/dist/simple-trace-file-exporter-S76DMABU-5FCJESD2.js +9 -0
  57. package/dist/simple-trace-file-exporter-S76DMABU-5FCJESD2.js.map +1 -0
  58. package/dist/src-2N5EJ2N6.js +1733 -0
  59. package/dist/src-2N5EJ2N6.js.map +1 -0
  60. package/dist/templates/.agents/skills/agentv-chat-to-eval/README.md +84 -0
  61. package/dist/templates/.agents/skills/agentv-chat-to-eval/SKILL.md +144 -0
  62. package/dist/templates/.agents/skills/agentv-chat-to-eval/examples/transcript-json.md +67 -0
  63. package/dist/templates/.agents/skills/agentv-chat-to-eval/examples/transcript-markdown.md +101 -0
  64. package/dist/templates/.agents/skills/agentv-eval-builder/SKILL.md +433 -0
  65. package/dist/templates/.agents/skills/agentv-eval-builder/references/config-schema.json +36 -0
  66. package/dist/templates/.agents/skills/agentv-eval-builder/references/custom-evaluators.md +118 -0
  67. package/dist/templates/.agents/skills/agentv-eval-builder/references/eval-schema.json +251 -0
  68. package/dist/templates/.agents/skills/agentv-eval-builder/references/rubric-evaluator.md +77 -0
  69. package/dist/templates/.agents/skills/agentv-eval-orchestrator/SKILL.md +50 -0
  70. package/dist/templates/.agents/skills/agentv-prompt-optimizer/SKILL.md +78 -0
  71. package/dist/templates/.agentv/.env.example +23 -23
  72. package/dist/templates/.agentv/config.yaml +15 -15
  73. package/dist/templates/.claude/skills/agentv-eval-builder/SKILL.md +38 -13
  74. package/dist/templates/.claude/skills/agentv-eval-builder/references/custom-evaluators.md +9 -6
  75. package/dist/templates/.claude/skills/agentv-eval-builder/references/eval-schema.json +4 -4
  76. package/dist/templates/.claude/skills/agentv-eval-builder/references/rubric-evaluator.md +7 -9
  77. package/dist/templates/.github/prompts/agentv-eval-build.prompt.md +4 -4
  78. package/dist/templates/.github/prompts/agentv-optimize.prompt.md +3 -3
  79. package/dist/{token-D3IYDJQZ.js → token-POXF46NU.js} +6 -4
  80. package/dist/{token-D3IYDJQZ.js.map → token-POXF46NU.js.map} +1 -1
  81. package/dist/{token-util-FWFPR2BV.js → token-util-6GWYZWGE.js} +4 -3
  82. package/dist/token-util-6GWYZWGE.js.map +1 -0
  83. package/package.json +7 -3
  84. package/dist/chunk-MGK6HHRR.js.map +0 -1
  85. /package/dist/{token-util-FWFPR2BV.js.map → chunk-5H446C7X.js.map} +0 -0
@@ -0,0 +1,21 @@
1
+ import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
2
+ import {
3
+ __esm
4
+ } from "./chunk-5H446C7X.js";
5
+
6
+ // ../../node_modules/.bun/@opentelemetry+resources@2.5.1+460773ef8ff1e07c/node_modules/@opentelemetry/resources/build/esm/detectors/platform/node/machine-id/execAsync.js
7
+ import * as child_process from "child_process";
8
+ import * as util from "util";
9
+ var execAsync;
10
+ var init_execAsync = __esm({
11
+ "../../node_modules/.bun/@opentelemetry+resources@2.5.1+460773ef8ff1e07c/node_modules/@opentelemetry/resources/build/esm/detectors/platform/node/machine-id/execAsync.js"() {
12
+ "use strict";
13
+ execAsync = util.promisify(child_process.exec);
14
+ }
15
+ });
16
+
17
+ export {
18
+ execAsync,
19
+ init_execAsync
20
+ };
21
+ //# sourceMappingURL=chunk-XALGXSKB.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../../node_modules/.bun/@opentelemetry+resources@2.5.1+460773ef8ff1e07c/node_modules/@opentelemetry/resources/src/detectors/platform/node/machine-id/execAsync.ts"],"sourcesContent":["/*\n * Copyright The OpenTelemetry Authors\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * https://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport * as child_process from 'child_process';\nimport * as util from 'util';\n\nexport const execAsync = util.promisify(child_process.exec);\n"],"mappings":";;;;;;AAgBA,YAAY,mBAAmB;AAC/B,YAAY,UAAU;AAjBtB,IAmBa;AAnBb;;;AAmBO,IAAM,YAAiB,eAAwB,kBAAI;;;","names":[]}
package/dist/cli.js CHANGED
@@ -2,8 +2,14 @@
2
2
  import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
3
3
  import {
4
4
  runCli
5
- } from "./chunk-MGK6HHRR.js";
6
- import "./chunk-BKMQNEUD.js";
5
+ } from "./chunk-EJEG3DU2.js";
6
+ import "./chunk-H5FFZCKI.js";
7
+ import "./chunk-BWLYFF5N.js";
8
+ import "./chunk-JK6V4KVD.js";
9
+ import "./chunk-C5GOHBQM.js";
10
+ import "./chunk-3L2L5GIL.js";
11
+ import "./chunk-LRULMAAA.js";
12
+ import "./chunk-5H446C7X.js";
7
13
 
8
14
  // src/cli.ts
9
15
  runCli().then(() => {
package/dist/cli.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/cli.ts"],"sourcesContent":["#!/usr/bin/env node\nimport { runCli } from './index.js';\n\nrunCli()\n .then(() => {\n process.exit(0);\n })\n .catch((error) => {\n console.error(error);\n process.exit(1);\n });\n"],"mappings":";;;;;;;;AAGA,OAAO,EACJ,KAAK,MAAM;AACV,UAAQ,KAAK,CAAC;AAChB,CAAC,EACA,MAAM,CAAC,UAAU;AAChB,UAAQ,MAAM,KAAK;AACnB,UAAQ,KAAK,CAAC;AAChB,CAAC;","names":[]}
1
+ {"version":3,"sources":["../src/cli.ts"],"sourcesContent":["#!/usr/bin/env node\nimport { runCli } from './index.js';\n\nrunCli()\n .then(() => {\n process.exit(0);\n })\n .catch((error) => {\n console.error(error);\n process.exit(1);\n });\n"],"mappings":";;;;;;;;;;;;;;AAGA,OAAO,EACJ,KAAK,MAAM;AACV,UAAQ,KAAK,CAAC;AAChB,CAAC,EACA,MAAM,CAAC,UAAU;AAChB,UAAQ,MAAM,KAAK;AACnB,UAAQ,KAAK,CAAC;AAChB,CAAC;","names":[]}
@@ -0,0 +1,257 @@
1
+ import { createRequire } from 'node:module'; const require = createRequire(import.meta.url);
2
+ import {
3
+ AgentJudgeEvaluator,
4
+ CodeEvaluator,
5
+ CompositeEvaluator,
6
+ CostEvaluator,
7
+ DEFAULT_EVALUATOR_TEMPLATE,
8
+ DEFAULT_EVAL_PATTERNS,
9
+ DEFAULT_EXPLORATION_TOOLS,
10
+ DeterministicAssertionEvaluator,
11
+ EvaluatorRegistry,
12
+ ExecutionMetricsEvaluator,
13
+ FieldAccuracyEvaluator,
14
+ LatencyEvaluator,
15
+ LlmJudgeEvaluator,
16
+ OTEL_BACKEND_PRESETS,
17
+ OtelStreamingObserver,
18
+ OtelTraceExporter,
19
+ ProviderRegistry,
20
+ ResponseCache,
21
+ TEST_MESSAGE_ROLES,
22
+ TemplateNotDirectoryError,
23
+ TemplateNotFoundError,
24
+ TokenUsageEvaluator,
25
+ ToolTrajectoryEvaluator,
26
+ WorkspaceCreationError,
27
+ assembleLlmJudgePrompt,
28
+ avgToolDurationMs,
29
+ buildDirectoryChain,
30
+ buildOutputSchema,
31
+ buildPromptInputs,
32
+ buildRubricOutputSchema,
33
+ buildScoreRangeOutputSchema,
34
+ buildSearchRoots,
35
+ calculateRubricScore,
36
+ captureFileChanges,
37
+ clampScore,
38
+ cleanupEvalWorkspaces,
39
+ cleanupWorkspace,
40
+ computeTraceSummary,
41
+ consumeClaudeLogEntries,
42
+ consumeCodexLogEntries,
43
+ consumeCopilotCliLogEntries,
44
+ consumeCopilotSdkLogEntries,
45
+ consumePiLogEntries,
46
+ createAgentKernel,
47
+ createBuiltinProviderRegistry,
48
+ createBuiltinRegistry,
49
+ createProvider,
50
+ createTempWorkspace,
51
+ deepEqual,
52
+ defineConfig,
53
+ detectFormat,
54
+ discoverAssertions,
55
+ discoverProviders,
56
+ ensureVSCodeSubagents,
57
+ evaluate,
58
+ executeScript,
59
+ executeWorkspaceScript,
60
+ explorationRatio,
61
+ extractCacheConfig,
62
+ extractJsonBlob,
63
+ extractTargetFromSuite,
64
+ extractTargetsFromSuite,
65
+ extractTargetsFromTestCase,
66
+ extractTrialsConfig,
67
+ fileExists,
68
+ findGitRoot,
69
+ freeformEvaluationSchema,
70
+ generateRubrics,
71
+ getHitCount,
72
+ getWorkspacePath,
73
+ initializeBaseline,
74
+ isEvaluatorKind,
75
+ isGuidelineFile,
76
+ isJsonObject,
77
+ isJsonValue,
78
+ isNonEmptyString,
79
+ isTestMessage,
80
+ isTestMessageRole,
81
+ listTargetNames,
82
+ loadConfig,
83
+ loadEvalCaseById,
84
+ loadEvalCases,
85
+ loadEvalSuite,
86
+ loadTestById,
87
+ loadTestSuite,
88
+ loadTests,
89
+ loadTsConfig,
90
+ mergeExecutionMetrics,
91
+ negateScore,
92
+ normalizeLineEndings,
93
+ parseJsonFromText,
94
+ parseJsonSafe,
95
+ readJsonFile,
96
+ readTargetDefinitions,
97
+ readTestSuiteMetadata,
98
+ readTextFile,
99
+ resolveAndCreateProvider,
100
+ resolveFileReference,
101
+ resolveTargetDefinition,
102
+ resolveWorkspaceTemplate,
103
+ rubricEvaluationSchema,
104
+ runContainsAssertion,
105
+ runEqualsAssertion,
106
+ runEvalCase,
107
+ runEvaluation,
108
+ runIsJsonAssertion,
109
+ runRegexAssertion,
110
+ scoreToVerdict,
111
+ shouldEnableCache,
112
+ shouldSkipCacheForTemperature,
113
+ subscribeToClaudeLogEntries,
114
+ subscribeToCodexLogEntries,
115
+ subscribeToCopilotCliLogEntries,
116
+ subscribeToCopilotSdkLogEntries,
117
+ subscribeToPiLogEntries,
118
+ substituteVariables,
119
+ toCamelCaseDeep,
120
+ toSnakeCaseDeep,
121
+ tokensPerTool,
122
+ trimBaselineResult
123
+ } from "./chunk-BWLYFF5N.js";
124
+ import {
125
+ SimpleTraceFileExporter
126
+ } from "./chunk-JK6V4KVD.js";
127
+ import {
128
+ OtlpJsonFileExporter
129
+ } from "./chunk-C5GOHBQM.js";
130
+ import "./chunk-3L2L5GIL.js";
131
+ import "./chunk-LRULMAAA.js";
132
+ import "./chunk-5H446C7X.js";
133
+ export {
134
+ AgentJudgeEvaluator,
135
+ CodeEvaluator,
136
+ CompositeEvaluator,
137
+ CostEvaluator,
138
+ DEFAULT_EVALUATOR_TEMPLATE,
139
+ DEFAULT_EVAL_PATTERNS,
140
+ DEFAULT_EXPLORATION_TOOLS,
141
+ DeterministicAssertionEvaluator,
142
+ EvaluatorRegistry,
143
+ ExecutionMetricsEvaluator,
144
+ FieldAccuracyEvaluator,
145
+ LatencyEvaluator,
146
+ LlmJudgeEvaluator,
147
+ OTEL_BACKEND_PRESETS,
148
+ OtelStreamingObserver,
149
+ OtelTraceExporter,
150
+ OtlpJsonFileExporter,
151
+ ProviderRegistry,
152
+ ResponseCache,
153
+ SimpleTraceFileExporter,
154
+ TEST_MESSAGE_ROLES,
155
+ TemplateNotDirectoryError,
156
+ TemplateNotFoundError,
157
+ TokenUsageEvaluator,
158
+ ToolTrajectoryEvaluator,
159
+ WorkspaceCreationError,
160
+ assembleLlmJudgePrompt,
161
+ avgToolDurationMs,
162
+ buildDirectoryChain,
163
+ buildOutputSchema,
164
+ buildPromptInputs,
165
+ buildRubricOutputSchema,
166
+ buildScoreRangeOutputSchema,
167
+ buildSearchRoots,
168
+ calculateRubricScore,
169
+ captureFileChanges,
170
+ clampScore,
171
+ cleanupEvalWorkspaces,
172
+ cleanupWorkspace,
173
+ computeTraceSummary,
174
+ consumeClaudeLogEntries,
175
+ consumeCodexLogEntries,
176
+ consumeCopilotCliLogEntries,
177
+ consumeCopilotSdkLogEntries,
178
+ consumePiLogEntries,
179
+ createAgentKernel,
180
+ createBuiltinProviderRegistry,
181
+ createBuiltinRegistry,
182
+ createProvider,
183
+ createTempWorkspace,
184
+ deepEqual,
185
+ defineConfig,
186
+ detectFormat,
187
+ discoverAssertions,
188
+ discoverProviders,
189
+ ensureVSCodeSubagents,
190
+ evaluate,
191
+ executeScript,
192
+ executeWorkspaceScript,
193
+ explorationRatio,
194
+ extractCacheConfig,
195
+ extractJsonBlob,
196
+ extractTargetFromSuite,
197
+ extractTargetsFromSuite,
198
+ extractTargetsFromTestCase,
199
+ extractTrialsConfig,
200
+ fileExists,
201
+ findGitRoot,
202
+ freeformEvaluationSchema,
203
+ generateRubrics,
204
+ getHitCount,
205
+ getWorkspacePath,
206
+ initializeBaseline,
207
+ isEvaluatorKind,
208
+ isGuidelineFile,
209
+ isJsonObject,
210
+ isJsonValue,
211
+ isNonEmptyString,
212
+ isTestMessage,
213
+ isTestMessageRole,
214
+ listTargetNames,
215
+ loadConfig,
216
+ loadEvalCaseById,
217
+ loadEvalCases,
218
+ loadEvalSuite,
219
+ loadTestById,
220
+ loadTestSuite,
221
+ loadTests,
222
+ loadTsConfig,
223
+ mergeExecutionMetrics,
224
+ negateScore,
225
+ normalizeLineEndings,
226
+ parseJsonFromText,
227
+ parseJsonSafe,
228
+ readJsonFile,
229
+ readTargetDefinitions,
230
+ readTestSuiteMetadata,
231
+ readTextFile,
232
+ resolveAndCreateProvider,
233
+ resolveFileReference,
234
+ resolveTargetDefinition,
235
+ resolveWorkspaceTemplate,
236
+ rubricEvaluationSchema,
237
+ runContainsAssertion,
238
+ runEqualsAssertion,
239
+ runEvalCase,
240
+ runEvaluation,
241
+ runIsJsonAssertion,
242
+ runRegexAssertion,
243
+ scoreToVerdict,
244
+ shouldEnableCache,
245
+ shouldSkipCacheForTemperature,
246
+ subscribeToClaudeLogEntries,
247
+ subscribeToCodexLogEntries,
248
+ subscribeToCopilotCliLogEntries,
249
+ subscribeToCopilotSdkLogEntries,
250
+ subscribeToPiLogEntries,
251
+ substituteVariables,
252
+ toCamelCaseDeep,
253
+ toSnakeCaseDeep,
254
+ tokensPerTool,
255
+ trimBaselineResult
256
+ };
257
+ //# sourceMappingURL=dist-R3OCWGXH.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}