agentv 4.37.0-next.1 → 4.38.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{artifact-writer-GFNKYREE.js → artifact-writer-MK5X5MSO.js} +4 -4
- package/dist/{chunk-P4LSNFZR.js → chunk-3G4BK6Z5.js} +21 -20
- package/dist/{chunk-P4LSNFZR.js.map → chunk-3G4BK6Z5.js.map} +1 -1
- package/dist/{chunk-N6E5XFOM.js → chunk-DKUAETXE.js} +3 -3
- package/dist/{chunk-M7AMFWBZ.js → chunk-EKMMIULD.js} +42 -37
- package/dist/chunk-EKMMIULD.js.map +1 -0
- package/dist/{chunk-OYI35QFW.js → chunk-NLTIK3LV.js} +32 -250
- package/dist/chunk-NLTIK3LV.js.map +1 -0
- package/dist/{chunk-RL4S2FBZ.js → chunk-VBHHZQS6.js} +902 -488
- package/dist/chunk-VBHHZQS6.js.map +1 -0
- package/dist/cli.js +5 -5
- package/dist/dashboard/assets/index-BpnllKET.css +1 -0
- package/dist/dashboard/assets/index-Cm9SUopp.js +118 -0
- package/dist/dashboard/assets/{index-BDRYJsGF.js → index-SIl6NbIJ.js} +1 -1
- package/dist/dashboard/index.html +2 -2
- package/dist/{dist-OY3JSP6Z.js → dist-HVLBDG5F.js} +17 -13
- package/dist/index.js +5 -5
- package/dist/{interactive-CQELHITQ.js → interactive-QFAAM4SI.js} +5 -5
- package/dist/skills/agentv-eval-writer/SKILL.md +28 -36
- package/dist/skills/agentv-eval-writer/references/eval-schema.json +57 -210
- package/dist/{ts-eval-loader-RBTB2HG2-H5TRXZLO.js → ts-eval-loader-TJT6BGFF-DI7XNSO4.js} +2 -2
- package/package.json +1 -1
- package/dist/chunk-M7AMFWBZ.js.map +0 -1
- package/dist/chunk-OYI35QFW.js.map +0 -1
- package/dist/chunk-RL4S2FBZ.js.map +0 -1
- package/dist/dashboard/assets/index-9tV-u4HJ.css +0 -1
- package/dist/dashboard/assets/index-DuESU7zZ.js +0 -118
- /package/dist/{artifact-writer-GFNKYREE.js.map → artifact-writer-MK5X5MSO.js.map} +0 -0
- /package/dist/{chunk-N6E5XFOM.js.map → chunk-DKUAETXE.js.map} +0 -0
- /package/dist/{dist-OY3JSP6Z.js.map → dist-HVLBDG5F.js.map} +0 -0
- /package/dist/{interactive-CQELHITQ.js.map → interactive-QFAAM4SI.js.map} +0 -0
- /package/dist/{ts-eval-loader-RBTB2HG2-H5TRXZLO.js.map → ts-eval-loader-TJT6BGFF-DI7XNSO4.js.map} +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import{r as u,W as T}from"./index-
|
|
1
|
+
import{r as u,W as T}from"./index-Cm9SUopp.js";function ie(e,t){(t==null||t>e.length)&&(t=e.length);for(var r=0,n=Array(t);r<t;r++)n[r]=e[r];return n}function be(e){if(Array.isArray(e))return e}function ye(e,t,r){return(t=Ee(t))in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function we(e,t){var r=e==null?null:typeof Symbol<"u"&&e[Symbol.iterator]||e["@@iterator"];if(r!=null){var n,o,i,s,p=[],g=!0,v=!1;try{if(i=(r=r.call(e)).next,t!==0)for(;!(g=(n=i.call(r)).done)&&(p.push(n.value),p.length!==t);g=!0);}catch(P){v=!0,o=P}finally{try{if(!g&&r.return!=null&&(s=r.return(),Object(s)!==s))return}finally{if(v)throw o}}return p}}function Oe(){throw new TypeError(`Invalid attempt to destructure non-iterable instance.
|
|
2
2
|
In order to be iterable, non-array objects must have a [Symbol.iterator]() method.`)}function ae(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter(function(o){return Object.getOwnPropertyDescriptor(e,o).enumerable})),r.push.apply(r,n)}return r}function ue(e){for(var t=1;t<arguments.length;t++){var r=arguments[t]!=null?arguments[t]:{};t%2?ae(Object(r),!0).forEach(function(n){ye(e,n,r[n])}):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(r)):ae(Object(r)).forEach(function(n){Object.defineProperty(e,n,Object.getOwnPropertyDescriptor(r,n))})}return e}function je(e,t){if(e==null)return{};var r,n,o=Me(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n<i.length;n++)r=i[n],t.indexOf(r)===-1&&{}.propertyIsEnumerable.call(e,r)&&(o[r]=e[r])}return o}function Me(e,t){if(e==null)return{};var r={};for(var n in e)if({}.hasOwnProperty.call(e,n)){if(t.indexOf(n)!==-1)continue;r[n]=e[n]}return r}function Pe(e,t){return be(e)||we(e,t)||Ie(e,t)||Oe()}function Se(e,t){if(typeof e!="object"||!e)return e;var r=e[Symbol.toPrimitive];if(r!==void 0){var n=r.call(e,t);if(typeof n!="object")return n;throw new TypeError("@@toPrimitive must return a primitive value.")}return(t==="string"?String:Number)(e)}function Ee(e){var t=Se(e,"string");return typeof t=="symbol"?t:t+""}function Ie(e,t){if(e){if(typeof e=="string")return ie(e,t);var r={}.toString.call(e).slice(8,-1);return r==="Object"&&e.constructor&&(r=e.constructor.name),r==="Map"||r==="Set"?Array.from(e):r==="Arguments"||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(r)?ie(e,t):void 0}}function Re(e,t,r){return t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function ce(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter(function(o){return Object.getOwnPropertyDescriptor(e,o).enumerable})),r.push.apply(r,n)}return r}function le(e){for(var t=1;t<arguments.length;t++){var r=arguments[t]!=null?arguments[t]:{};t%2?ce(Object(r),!0).forEach(function(n){Re(e,n,r[n])}):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(r)):ce(Object(r)).forEach(function(n){Object.defineProperty(e,n,Object.getOwnPropertyDescriptor(r,n))})}return e}function Ce(){for(var e=arguments.length,t=new Array(e),r=0;r<e;r++)t[r]=arguments[r];return function(n){return t.reduceRight(function(o,i){return i(o)},n)}}function q(e){return function t(){for(var r=this,n=arguments.length,o=new Array(n),i=0;i<n;i++)o[i]=arguments[i];return o.length>=e.length?e.apply(this,o):function(){for(var s=arguments.length,p=new Array(s),g=0;g<s;g++)p[g]=arguments[g];return t.apply(r,[].concat(o,p))}}}function K(e){return{}.toString.call(e).includes("Object")}function Le(e){return!Object.keys(e).length}function U(e){return typeof e=="function"}function Ae(e,t){return Object.prototype.hasOwnProperty.call(e,t)}function Te(e,t){return K(t)||E("changeType"),Object.keys(t).some(function(r){return!Ae(e,r)})&&E("changeField"),t}function $e(e){U(e)||E("selectorType")}function De(e){U(e)||K(e)||E("handlerType"),K(e)&&Object.values(e).some(function(t){return!U(t)})&&E("handlersType")}function Ve(e){e||E("initialIsRequired"),K(e)||E("initialType"),Le(e)&&E("initialContent")}function xe(e,t){throw new Error(e[t]||e.default)}var Ne={initialIsRequired:"initial state is required",initialType:"initial state should be an object",initialContent:"initial state shouldn't be an empty object",handlerType:"handler should be an object or a function",handlersType:"all handlers should be a functions",selectorType:"selector should be a function",changeType:"provided value of changes should be an object",changeField:'it seams you want to change a field in the state which is not specified in the "initial" state',default:"an unknown error accured in `state-local` package"},E=q(xe)(Ne),_={changes:Te,selector:$e,handler:De,initial:Ve};function qe(e){var t=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{};_.initial(e),_.handler(t);var r={current:e},n=q(Fe)(r,t),o=q(Ue)(r),i=q(_.changes)(e),s=q(ze)(r);function p(){var v=arguments.length>0&&arguments[0]!==void 0?arguments[0]:function(P){return P};return _.selector(v),v(r.current)}function g(v){Ce(n,o,i,s)(v)}return[p,g]}function ze(e,t){return U(t)?t(e.current):t}function Ue(e,t){return e.current=le(le({},e.current),t),t}function Fe(e,t,r){return U(t)?t(e.current):Object.keys(r).forEach(function(n){var o;return(o=t[n])===null||o===void 0?void 0:o.call(t,e.current[n])}),r}var We={create:qe},He={paths:{vs:"https://cdn.jsdelivr.net/npm/monaco-editor@0.55.1/min/vs"}};function _e(e){return function t(){for(var r=this,n=arguments.length,o=new Array(n),i=0;i<n;i++)o[i]=arguments[i];return o.length>=e.length?e.apply(this,o):function(){for(var s=arguments.length,p=new Array(s),g=0;g<s;g++)p[g]=arguments[g];return t.apply(r,[].concat(o,p))}}}function Be(e){return{}.toString.call(e).includes("Object")}function Ke(e){return e||se("configIsRequired"),Be(e)||se("configType"),e.urls?(Ge(),{paths:{vs:e.urls.monacoBase}}):e}function Ge(){console.warn(fe.deprecation)}function Ye(e,t){throw new Error(e[t]||e.default)}var fe={configIsRequired:"the configuration object is required",configType:"the configuration object should be an object",default:"an unknown error accured in `@monaco-editor/loader` package",deprecation:`Deprecation warning!
|
|
3
3
|
You are using deprecated way of configuration.
|
|
4
4
|
|
|
@@ -4,8 +4,8 @@
|
|
|
4
4
|
<meta charset="UTF-8" />
|
|
5
5
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
6
6
|
<title>AgentV</title>
|
|
7
|
-
<script type="module" crossorigin src="/assets/index-
|
|
8
|
-
<link rel="stylesheet" crossorigin href="/assets/index-
|
|
7
|
+
<script type="module" crossorigin src="/assets/index-Cm9SUopp.js"></script>
|
|
8
|
+
<link rel="stylesheet" crossorigin href="/assets/index-BpnllKET.css">
|
|
9
9
|
</head>
|
|
10
10
|
<body class="bg-gray-950 text-gray-100">
|
|
11
11
|
<div id="root"></div>
|
|
@@ -6,7 +6,6 @@ import {
|
|
|
6
6
|
OtelTraceExporter,
|
|
7
7
|
RunBudgetTracker,
|
|
8
8
|
TranscriptProvider,
|
|
9
|
-
addProject,
|
|
10
9
|
buildWipBranchName,
|
|
11
10
|
checkoutResultsRepoBranch,
|
|
12
11
|
commitAndPushResultsBranch,
|
|
@@ -15,24 +14,18 @@ import {
|
|
|
15
14
|
defineConfig,
|
|
16
15
|
deleteWipBranch,
|
|
17
16
|
deriveCategory,
|
|
18
|
-
deriveProjectId,
|
|
19
17
|
directPushResults,
|
|
20
18
|
directorySizeBytes,
|
|
21
19
|
discoverClaudeSessions,
|
|
22
20
|
discoverCodexSessions,
|
|
23
|
-
discoverProjects,
|
|
24
21
|
ensureResultsRepoClone,
|
|
25
22
|
generateRubrics,
|
|
26
23
|
getOutputFilenames,
|
|
27
|
-
getProject,
|
|
28
|
-
getProjectForPath,
|
|
29
|
-
getProjectsRegistryPath,
|
|
30
24
|
getResultsRepoLocalPaths,
|
|
31
25
|
getResultsRepoStatus,
|
|
32
26
|
getResultsRepoSyncStatus,
|
|
33
27
|
groupTranscriptJsonLines,
|
|
34
28
|
listGitRuns,
|
|
35
|
-
loadProjectRegistry,
|
|
36
29
|
loadTsConfig,
|
|
37
30
|
materializeGitRun,
|
|
38
31
|
normalizeResultsConfig,
|
|
@@ -44,11 +37,9 @@ import {
|
|
|
44
37
|
pushWipCheckpoint,
|
|
45
38
|
readTranscriptFile,
|
|
46
39
|
readTranscriptJsonl,
|
|
47
|
-
removeProject,
|
|
48
40
|
resolveResultsRepoRunsDir,
|
|
49
41
|
resolveResultsRepoUrl,
|
|
50
42
|
runBeforeSessionHook,
|
|
51
|
-
saveProjectRegistry,
|
|
52
43
|
scanRepoDeps,
|
|
53
44
|
setupWipWorktree,
|
|
54
45
|
stageResultsArtifacts,
|
|
@@ -57,13 +48,12 @@ import {
|
|
|
57
48
|
syncResultsRepo,
|
|
58
49
|
syncResultsRepoForProject,
|
|
59
50
|
toTranscriptJsonLines,
|
|
60
|
-
touchProject,
|
|
61
51
|
traceFromTranscriptJsonLines,
|
|
62
52
|
traceToTranscriptJsonLines,
|
|
63
53
|
transpileEvalYaml,
|
|
64
54
|
transpileEvalYamlFile,
|
|
65
55
|
trimBaselineResult
|
|
66
|
-
} from "./chunk-
|
|
56
|
+
} from "./chunk-NLTIK3LV.js";
|
|
67
57
|
import {
|
|
68
58
|
OtlpJsonFileExporter
|
|
69
59
|
} from "./chunk-QOBQ5XYF.js";
|
|
@@ -147,6 +137,7 @@ import {
|
|
|
147
137
|
TraceToolWireSchema,
|
|
148
138
|
WorkspaceCreationError,
|
|
149
139
|
WorkspacePoolManager,
|
|
140
|
+
addProject,
|
|
150
141
|
appendErrorEventToTrace,
|
|
151
142
|
appendReplayFixtureRecord,
|
|
152
143
|
assembleLlmGraderPrompt,
|
|
@@ -178,10 +169,12 @@ import {
|
|
|
178
169
|
createProvider,
|
|
179
170
|
createTempWorkspace,
|
|
180
171
|
deepEqual,
|
|
172
|
+
deriveProjectId,
|
|
181
173
|
detectFormat,
|
|
182
174
|
discoverAssertions,
|
|
183
175
|
discoverCopilotSessions,
|
|
184
176
|
discoverGraders,
|
|
177
|
+
discoverProjects,
|
|
185
178
|
discoverProviders,
|
|
186
179
|
ensureVSCodeSubagents,
|
|
187
180
|
evaluate,
|
|
@@ -213,6 +206,9 @@ import {
|
|
|
213
206
|
getAgentvConfigDir,
|
|
214
207
|
getAgentvDataDir,
|
|
215
208
|
getAgentvHome,
|
|
209
|
+
getProject,
|
|
210
|
+
getProjectForPath,
|
|
211
|
+
getProjectsRegistryPath,
|
|
216
212
|
getSelectedTrajectoryEvents,
|
|
217
213
|
getSubagentsRoot,
|
|
218
214
|
getTextContent,
|
|
@@ -237,6 +233,7 @@ import {
|
|
|
237
233
|
loadEvalCaseById,
|
|
238
234
|
loadEvalCases,
|
|
239
235
|
loadEvalSuite,
|
|
236
|
+
loadProjectRegistry,
|
|
240
237
|
loadTestById,
|
|
241
238
|
loadTestSuite,
|
|
242
239
|
loadTests,
|
|
@@ -244,6 +241,7 @@ import {
|
|
|
244
241
|
mergeExecutionMetrics,
|
|
245
242
|
negateScore,
|
|
246
243
|
normalizeLineEndings,
|
|
244
|
+
normalizeRepoIdentity,
|
|
247
245
|
parseAgentSkillsEvals,
|
|
248
246
|
parseCopilotEvents,
|
|
249
247
|
parseJsonFromText,
|
|
@@ -255,11 +253,13 @@ import {
|
|
|
255
253
|
readTestSuiteMetadata,
|
|
256
254
|
readTextFile,
|
|
257
255
|
readTraceEnvelopeReplayRecords,
|
|
256
|
+
removeProject,
|
|
258
257
|
replayFixtureRecordToProviderResponse,
|
|
259
258
|
replayLookupIdentityMatches,
|
|
260
259
|
resolveAndCreateProvider,
|
|
261
260
|
resolveDelegatedTargetDefinition,
|
|
262
261
|
resolveFileReference,
|
|
262
|
+
resolveRepoCloneUrl,
|
|
263
263
|
resolveResultsConfigForProject,
|
|
264
264
|
resolveTargetDefinition,
|
|
265
265
|
resolveWorkspaceTemplate,
|
|
@@ -278,6 +278,7 @@ import {
|
|
|
278
278
|
runRegexAssertion,
|
|
279
279
|
runStartsWithAssertion,
|
|
280
280
|
sameReplayEvalPath,
|
|
281
|
+
saveProjectRegistry,
|
|
281
282
|
scoreRangeEvaluationSchema,
|
|
282
283
|
scoreToVerdict,
|
|
283
284
|
serializeReplayFixtureRecord,
|
|
@@ -295,13 +296,14 @@ import {
|
|
|
295
296
|
toTraceArtifactWire,
|
|
296
297
|
toTraceEnvelopeWire,
|
|
297
298
|
tokensPerTool,
|
|
299
|
+
touchProject,
|
|
298
300
|
traceEnvelopeReplayRecordToProviderResponse,
|
|
299
301
|
traceEnvelopeToMessages,
|
|
300
302
|
traceEnvelopeToTraceArtifact,
|
|
301
303
|
traceEnvelopeToTraceSummary,
|
|
302
304
|
trackChild,
|
|
303
305
|
trackedChildCount
|
|
304
|
-
} from "./chunk-
|
|
306
|
+
} from "./chunk-VBHHZQS6.js";
|
|
305
307
|
import "./chunk-NPVGBFF6.js";
|
|
306
308
|
import "./chunk-M7BUKBAF.js";
|
|
307
309
|
import "./chunk-5H446C7X.js";
|
|
@@ -516,6 +518,7 @@ export {
|
|
|
516
518
|
mergeExecutionMetrics,
|
|
517
519
|
negateScore,
|
|
518
520
|
normalizeLineEndings,
|
|
521
|
+
normalizeRepoIdentity,
|
|
519
522
|
normalizeResultsConfig,
|
|
520
523
|
parseAgentSkillsEvals,
|
|
521
524
|
parseClaudeSession,
|
|
@@ -542,6 +545,7 @@ export {
|
|
|
542
545
|
resolveAndCreateProvider,
|
|
543
546
|
resolveDelegatedTargetDefinition,
|
|
544
547
|
resolveFileReference,
|
|
548
|
+
resolveRepoCloneUrl,
|
|
545
549
|
resolveResultsConfigForProject,
|
|
546
550
|
resolveResultsRepoRunsDir,
|
|
547
551
|
resolveResultsRepoUrl,
|
|
@@ -602,4 +606,4 @@ export {
|
|
|
602
606
|
transpileEvalYamlFile,
|
|
603
607
|
trimBaselineResult
|
|
604
608
|
};
|
|
605
|
-
//# sourceMappingURL=dist-
|
|
609
|
+
//# sourceMappingURL=dist-HVLBDG5F.js.map
|
package/dist/index.js
CHANGED
|
@@ -4,13 +4,13 @@ import {
|
|
|
4
4
|
preprocessArgv,
|
|
5
5
|
runCli,
|
|
6
6
|
usesDeprecatedStudioAlias
|
|
7
|
-
} from "./chunk-
|
|
8
|
-
import "./chunk-
|
|
9
|
-
import "./chunk-
|
|
10
|
-
import "./chunk-
|
|
7
|
+
} from "./chunk-3G4BK6Z5.js";
|
|
8
|
+
import "./chunk-EKMMIULD.js";
|
|
9
|
+
import "./chunk-DKUAETXE.js";
|
|
10
|
+
import "./chunk-NLTIK3LV.js";
|
|
11
11
|
import "./chunk-QOBQ5XYF.js";
|
|
12
12
|
import "./chunk-BPGJ4HBU.js";
|
|
13
|
-
import "./chunk-
|
|
13
|
+
import "./chunk-VBHHZQS6.js";
|
|
14
14
|
import "./chunk-NPVGBFF6.js";
|
|
15
15
|
import "./chunk-M7BUKBAF.js";
|
|
16
16
|
import "./chunk-5H446C7X.js";
|
|
@@ -7,16 +7,16 @@ import {
|
|
|
7
7
|
findRepoRoot,
|
|
8
8
|
getCategories,
|
|
9
9
|
runEvalCommand
|
|
10
|
-
} from "./chunk-
|
|
11
|
-
import "./chunk-
|
|
12
|
-
import "./chunk-
|
|
10
|
+
} from "./chunk-EKMMIULD.js";
|
|
11
|
+
import "./chunk-DKUAETXE.js";
|
|
12
|
+
import "./chunk-NLTIK3LV.js";
|
|
13
13
|
import "./chunk-QOBQ5XYF.js";
|
|
14
14
|
import "./chunk-BPGJ4HBU.js";
|
|
15
15
|
import {
|
|
16
16
|
getAgentvConfigDir,
|
|
17
17
|
listTargetNames,
|
|
18
18
|
readTargetDefinitions
|
|
19
|
-
} from "./chunk-
|
|
19
|
+
} from "./chunk-VBHHZQS6.js";
|
|
20
20
|
import "./chunk-NPVGBFF6.js";
|
|
21
21
|
import "./chunk-M7BUKBAF.js";
|
|
22
22
|
import "./chunk-5H446C7X.js";
|
|
@@ -360,4 +360,4 @@ ${ANSI_DIM}Retrying execution errors...${ANSI_RESET}
|
|
|
360
360
|
export {
|
|
361
361
|
launchInteractiveWizard
|
|
362
362
|
};
|
|
363
|
-
//# sourceMappingURL=interactive-
|
|
363
|
+
//# sourceMappingURL=interactive-QFAAM4SI.js.map
|
|
@@ -283,66 +283,58 @@ Run scripts before/after each test. Define at suite level or override per case:
|
|
|
283
283
|
```yaml
|
|
284
284
|
workspace:
|
|
285
285
|
template: ./workspace-templates/my-project
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
286
|
+
repos:
|
|
287
|
+
- path: ./repo
|
|
288
|
+
repo: sympy/sympy
|
|
289
|
+
base_commit: "abc123"
|
|
290
|
+
hooks:
|
|
291
|
+
before_all:
|
|
292
|
+
command: ["bun", "run", "setup.ts"]
|
|
293
|
+
timeout_ms: 120000
|
|
294
|
+
after_each:
|
|
295
|
+
reset: fast
|
|
296
|
+
after_all:
|
|
297
|
+
command: ["bun", "run", "teardown.ts"]
|
|
291
298
|
|
|
292
299
|
tests:
|
|
293
300
|
- id: case-1
|
|
294
301
|
input: Fix the bug
|
|
295
302
|
criteria: Bug is fixed
|
|
296
303
|
metadata:
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
type: git
|
|
303
|
-
url: https://github.com/sympy/sympy.git
|
|
304
|
-
checkout:
|
|
305
|
-
base_commit: "abc123"
|
|
306
|
-
docker:
|
|
307
|
-
image: swebench/sweb.eval.django__django:latest
|
|
308
|
-
```
|
|
309
|
-
|
|
310
|
-
**Lifecycle:** template copy → repo clone → setup → git baseline → agent → file changes → teardown → repo reset → cleanup
|
|
304
|
+
source_repo: sympy/sympy
|
|
305
|
+
source_commit: "abc123"
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
**Lifecycle:** template copy → repo materialization → workspace before_all → target before_all → git baseline → before_each hooks → agent → file changes → after_each hooks → after_all hooks → cleanup
|
|
311
309
|
**Merge:** Case-level fields replace suite-level fields.
|
|
312
310
|
**Commands receive stdin JSON:** `{workspace_path, test_id, eval_run_id, case_input, case_metadata}`
|
|
313
311
|
**Setup failure:** aborts case. **Teardown failure:** non-fatal (warning).
|
|
314
|
-
For SWE-bench-style evals, keep operational checkout state under `workspace.repos[].
|
|
312
|
+
For SWE-bench-style evals, keep operational checkout state under `workspace.repos[].base_commit`; treat `metadata.source_commit` as informational only.
|
|
315
313
|
|
|
316
314
|
### Repository Lifecycle
|
|
317
315
|
|
|
318
|
-
|
|
316
|
+
Materialize repos into the eval workspace automatically. Repo entries declare identity and checkout pins only; AgentV resolves acquisition from registered projects, `git_cache.mirrors`, its mirror cache, then remote clone. For shared repo workspaces, pooling is the default:
|
|
319
317
|
|
|
320
318
|
```yaml
|
|
321
319
|
workspace:
|
|
322
320
|
repos:
|
|
323
321
|
- path: ./repo
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
checkout:
|
|
328
|
-
ref: main
|
|
329
|
-
ancestor: 1 # parent commit
|
|
330
|
-
clone:
|
|
331
|
-
depth: 10
|
|
322
|
+
repo: https://github.com/org/repo.git
|
|
323
|
+
commit: main
|
|
324
|
+
ancestor: 1 # parent commit
|
|
332
325
|
hooks:
|
|
333
326
|
after_each:
|
|
334
327
|
reset: fast # none | fast | strict
|
|
335
328
|
isolation: shared # shared | per_test
|
|
336
329
|
mode: pooled # pooled | temp | static
|
|
337
|
-
hooks:
|
|
338
|
-
enabled: true # set false to skip all hooks
|
|
339
330
|
```
|
|
340
331
|
|
|
341
|
-
- `
|
|
342
|
-
- `
|
|
343
|
-
- `
|
|
344
|
-
- `
|
|
345
|
-
- `
|
|
332
|
+
- `repo`: full clone URL or GitHub `org/name` shorthand
|
|
333
|
+
- `commit`: branch, tag, or SHA to check out
|
|
334
|
+
- `base_commit`: alias for `commit` for SWE-bench-style datasets
|
|
335
|
+
- `ancestor`: walk N commits back from the checked-out ref
|
|
336
|
+
- `sparse`: sparse checkout paths array
|
|
337
|
+
- Do not use legacy `source`, `type`, `checkout`, `resolve`, or `clone` fields under `workspace.repos[]`
|
|
346
338
|
- `mode`: `pooled` (default for shared repos), `temp`, or `static`
|
|
347
339
|
- `path`: workspace path used when `mode: static`; when empty/missing the workspace is auto-materialised (template copied + repos cloned); populated dirs are reused as-is
|
|
348
340
|
- `hooks.enabled`: boolean (default `true`); set `false` to skip all lifecycle hooks
|
|
@@ -5093,78 +5093,27 @@
|
|
|
5093
5093
|
"path": {
|
|
5094
5094
|
"type": "string"
|
|
5095
5095
|
},
|
|
5096
|
-
"
|
|
5097
|
-
"
|
|
5098
|
-
|
|
5099
|
-
"type": "object",
|
|
5100
|
-
"properties": {
|
|
5101
|
-
"type": {
|
|
5102
|
-
"type": "string",
|
|
5103
|
-
"const": "git"
|
|
5104
|
-
},
|
|
5105
|
-
"url": {
|
|
5106
|
-
"type": "string",
|
|
5107
|
-
"format": "uri"
|
|
5108
|
-
}
|
|
5109
|
-
},
|
|
5110
|
-
"required": ["type", "url"],
|
|
5111
|
-
"additionalProperties": false
|
|
5112
|
-
},
|
|
5113
|
-
{
|
|
5114
|
-
"type": "object",
|
|
5115
|
-
"properties": {
|
|
5116
|
-
"type": {
|
|
5117
|
-
"type": "string",
|
|
5118
|
-
"const": "local"
|
|
5119
|
-
},
|
|
5120
|
-
"path": {
|
|
5121
|
-
"type": "string"
|
|
5122
|
-
}
|
|
5123
|
-
},
|
|
5124
|
-
"required": ["type", "path"],
|
|
5125
|
-
"additionalProperties": false
|
|
5126
|
-
}
|
|
5127
|
-
]
|
|
5096
|
+
"repo": {
|
|
5097
|
+
"type": "string",
|
|
5098
|
+
"minLength": 1
|
|
5128
5099
|
},
|
|
5129
|
-
"
|
|
5130
|
-
"type": "
|
|
5131
|
-
"
|
|
5132
|
-
"ref": {
|
|
5133
|
-
"type": "string"
|
|
5134
|
-
},
|
|
5135
|
-
"base_commit": {
|
|
5136
|
-
"type": "string",
|
|
5137
|
-
"minLength": 1
|
|
5138
|
-
},
|
|
5139
|
-
"resolve": {
|
|
5140
|
-
"type": "string",
|
|
5141
|
-
"enum": ["remote", "local"]
|
|
5142
|
-
},
|
|
5143
|
-
"ancestor": {
|
|
5144
|
-
"type": "integer",
|
|
5145
|
-
"minimum": 0
|
|
5146
|
-
}
|
|
5147
|
-
},
|
|
5148
|
-
"additionalProperties": false
|
|
5100
|
+
"commit": {
|
|
5101
|
+
"type": "string",
|
|
5102
|
+
"minLength": 1
|
|
5149
5103
|
},
|
|
5150
|
-
"
|
|
5151
|
-
"type": "
|
|
5152
|
-
"
|
|
5153
|
-
|
|
5154
|
-
|
|
5155
|
-
|
|
5156
|
-
|
|
5157
|
-
|
|
5158
|
-
|
|
5159
|
-
|
|
5160
|
-
|
|
5161
|
-
|
|
5162
|
-
|
|
5163
|
-
"type": "string"
|
|
5164
|
-
}
|
|
5165
|
-
}
|
|
5166
|
-
},
|
|
5167
|
-
"additionalProperties": false
|
|
5104
|
+
"base_commit": {
|
|
5105
|
+
"type": "string",
|
|
5106
|
+
"minLength": 1
|
|
5107
|
+
},
|
|
5108
|
+
"ancestor": {
|
|
5109
|
+
"type": "integer",
|
|
5110
|
+
"minimum": 0
|
|
5111
|
+
},
|
|
5112
|
+
"sparse": {
|
|
5113
|
+
"type": "array",
|
|
5114
|
+
"items": {
|
|
5115
|
+
"type": "string"
|
|
5116
|
+
}
|
|
5168
5117
|
}
|
|
5169
5118
|
},
|
|
5170
5119
|
"additionalProperties": false
|
|
@@ -11667,78 +11616,27 @@
|
|
|
11667
11616
|
"path": {
|
|
11668
11617
|
"type": "string"
|
|
11669
11618
|
},
|
|
11670
|
-
"
|
|
11671
|
-
"
|
|
11672
|
-
|
|
11673
|
-
"type": "object",
|
|
11674
|
-
"properties": {
|
|
11675
|
-
"type": {
|
|
11676
|
-
"type": "string",
|
|
11677
|
-
"const": "git"
|
|
11678
|
-
},
|
|
11679
|
-
"url": {
|
|
11680
|
-
"type": "string",
|
|
11681
|
-
"format": "uri"
|
|
11682
|
-
}
|
|
11683
|
-
},
|
|
11684
|
-
"required": ["type", "url"],
|
|
11685
|
-
"additionalProperties": false
|
|
11686
|
-
},
|
|
11687
|
-
{
|
|
11688
|
-
"type": "object",
|
|
11689
|
-
"properties": {
|
|
11690
|
-
"type": {
|
|
11691
|
-
"type": "string",
|
|
11692
|
-
"const": "local"
|
|
11693
|
-
},
|
|
11694
|
-
"path": {
|
|
11695
|
-
"type": "string"
|
|
11696
|
-
}
|
|
11697
|
-
},
|
|
11698
|
-
"required": ["type", "path"],
|
|
11699
|
-
"additionalProperties": false
|
|
11700
|
-
}
|
|
11701
|
-
]
|
|
11619
|
+
"repo": {
|
|
11620
|
+
"type": "string",
|
|
11621
|
+
"minLength": 1
|
|
11702
11622
|
},
|
|
11703
|
-
"
|
|
11704
|
-
"type": "
|
|
11705
|
-
"
|
|
11706
|
-
"ref": {
|
|
11707
|
-
"type": "string"
|
|
11708
|
-
},
|
|
11709
|
-
"base_commit": {
|
|
11710
|
-
"type": "string",
|
|
11711
|
-
"minLength": 1
|
|
11712
|
-
},
|
|
11713
|
-
"resolve": {
|
|
11714
|
-
"type": "string",
|
|
11715
|
-
"enum": ["remote", "local"]
|
|
11716
|
-
},
|
|
11717
|
-
"ancestor": {
|
|
11718
|
-
"type": "integer",
|
|
11719
|
-
"minimum": 0
|
|
11720
|
-
}
|
|
11721
|
-
},
|
|
11722
|
-
"additionalProperties": false
|
|
11623
|
+
"commit": {
|
|
11624
|
+
"type": "string",
|
|
11625
|
+
"minLength": 1
|
|
11723
11626
|
},
|
|
11724
|
-
"
|
|
11725
|
-
"type": "
|
|
11726
|
-
"
|
|
11727
|
-
|
|
11728
|
-
|
|
11729
|
-
|
|
11730
|
-
|
|
11731
|
-
|
|
11732
|
-
|
|
11733
|
-
|
|
11734
|
-
|
|
11735
|
-
|
|
11736
|
-
|
|
11737
|
-
"type": "string"
|
|
11738
|
-
}
|
|
11739
|
-
}
|
|
11740
|
-
},
|
|
11741
|
-
"additionalProperties": false
|
|
11627
|
+
"base_commit": {
|
|
11628
|
+
"type": "string",
|
|
11629
|
+
"minLength": 1
|
|
11630
|
+
},
|
|
11631
|
+
"ancestor": {
|
|
11632
|
+
"type": "integer",
|
|
11633
|
+
"minimum": 0
|
|
11634
|
+
},
|
|
11635
|
+
"sparse": {
|
|
11636
|
+
"type": "array",
|
|
11637
|
+
"items": {
|
|
11638
|
+
"type": "string"
|
|
11639
|
+
}
|
|
11742
11640
|
}
|
|
11743
11641
|
},
|
|
11744
11642
|
"additionalProperties": false
|
|
@@ -17006,78 +16904,27 @@
|
|
|
17006
16904
|
"path": {
|
|
17007
16905
|
"type": "string"
|
|
17008
16906
|
},
|
|
17009
|
-
"
|
|
17010
|
-
"
|
|
17011
|
-
|
|
17012
|
-
"type": "object",
|
|
17013
|
-
"properties": {
|
|
17014
|
-
"type": {
|
|
17015
|
-
"type": "string",
|
|
17016
|
-
"const": "git"
|
|
17017
|
-
},
|
|
17018
|
-
"url": {
|
|
17019
|
-
"type": "string",
|
|
17020
|
-
"format": "uri"
|
|
17021
|
-
}
|
|
17022
|
-
},
|
|
17023
|
-
"required": ["type", "url"],
|
|
17024
|
-
"additionalProperties": false
|
|
17025
|
-
},
|
|
17026
|
-
{
|
|
17027
|
-
"type": "object",
|
|
17028
|
-
"properties": {
|
|
17029
|
-
"type": {
|
|
17030
|
-
"type": "string",
|
|
17031
|
-
"const": "local"
|
|
17032
|
-
},
|
|
17033
|
-
"path": {
|
|
17034
|
-
"type": "string"
|
|
17035
|
-
}
|
|
17036
|
-
},
|
|
17037
|
-
"required": ["type", "path"],
|
|
17038
|
-
"additionalProperties": false
|
|
17039
|
-
}
|
|
17040
|
-
]
|
|
16907
|
+
"repo": {
|
|
16908
|
+
"type": "string",
|
|
16909
|
+
"minLength": 1
|
|
17041
16910
|
},
|
|
17042
|
-
"
|
|
17043
|
-
"type": "
|
|
17044
|
-
"
|
|
17045
|
-
"ref": {
|
|
17046
|
-
"type": "string"
|
|
17047
|
-
},
|
|
17048
|
-
"base_commit": {
|
|
17049
|
-
"type": "string",
|
|
17050
|
-
"minLength": 1
|
|
17051
|
-
},
|
|
17052
|
-
"resolve": {
|
|
17053
|
-
"type": "string",
|
|
17054
|
-
"enum": ["remote", "local"]
|
|
17055
|
-
},
|
|
17056
|
-
"ancestor": {
|
|
17057
|
-
"type": "integer",
|
|
17058
|
-
"minimum": 0
|
|
17059
|
-
}
|
|
17060
|
-
},
|
|
17061
|
-
"additionalProperties": false
|
|
16911
|
+
"commit": {
|
|
16912
|
+
"type": "string",
|
|
16913
|
+
"minLength": 1
|
|
17062
16914
|
},
|
|
17063
|
-
"
|
|
17064
|
-
"type": "
|
|
17065
|
-
"
|
|
17066
|
-
|
|
17067
|
-
|
|
17068
|
-
|
|
17069
|
-
|
|
17070
|
-
|
|
17071
|
-
|
|
17072
|
-
|
|
17073
|
-
|
|
17074
|
-
|
|
17075
|
-
|
|
17076
|
-
"type": "string"
|
|
17077
|
-
}
|
|
17078
|
-
}
|
|
17079
|
-
},
|
|
17080
|
-
"additionalProperties": false
|
|
16915
|
+
"base_commit": {
|
|
16916
|
+
"type": "string",
|
|
16917
|
+
"minLength": 1
|
|
16918
|
+
},
|
|
16919
|
+
"ancestor": {
|
|
16920
|
+
"type": "integer",
|
|
16921
|
+
"minimum": 0
|
|
16922
|
+
},
|
|
16923
|
+
"sparse": {
|
|
16924
|
+
"type": "array",
|
|
16925
|
+
"items": {
|
|
16926
|
+
"type": "string"
|
|
16927
|
+
}
|
|
17081
16928
|
}
|
|
17082
16929
|
},
|
|
17083
16930
|
"additionalProperties": false
|
|
@@ -2,7 +2,7 @@ import { createRequire } from 'node:module'; const require = createRequire(impor
|
|
|
2
2
|
import {
|
|
3
3
|
loadTsEvalFile,
|
|
4
4
|
loadTsEvalSuite
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-VBHHZQS6.js";
|
|
6
6
|
import "./chunk-NPVGBFF6.js";
|
|
7
7
|
import "./chunk-M7BUKBAF.js";
|
|
8
8
|
import "./chunk-5H446C7X.js";
|
|
@@ -10,4 +10,4 @@ export {
|
|
|
10
10
|
loadTsEvalFile,
|
|
11
11
|
loadTsEvalSuite
|
|
12
12
|
};
|
|
13
|
-
//# sourceMappingURL=ts-eval-loader-
|
|
13
|
+
//# sourceMappingURL=ts-eval-loader-TJT6BGFF-DI7XNSO4.js.map
|