@wix/evalforge-types 0.27.0 → 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +438 -432
- package/build/index.js.map +4 -4
- package/build/index.mjs +438 -432
- package/build/index.mjs.map +4 -4
- package/build/types/common/github-source.d.ts +12 -0
- package/build/types/common/index.d.ts +1 -0
- package/build/types/common/models.d.ts +2 -5
- package/build/types/evaluation/eval-result.d.ts +2 -2
- package/build/types/evaluation/eval-run.d.ts +2 -2
- package/build/types/target/agent.d.ts +6 -6
- package/build/types/target/skill.d.ts +3 -11
- package/build/types/template/template.d.ts +20 -3
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -172,14 +172,27 @@ var TenantEntitySchema = BaseEntitySchema.extend({
|
|
|
172
172
|
projectId: import_zod.z.string()
|
|
173
173
|
});
|
|
174
174
|
|
|
175
|
-
// src/common/
|
|
175
|
+
// src/common/github-source.ts
|
|
176
176
|
var import_zod2 = require("zod");
|
|
177
|
+
var GitHubSourceSchema = import_zod2.z.object({
|
|
178
|
+
/** GitHub org or user, e.g. "wix" */
|
|
179
|
+
owner: import_zod2.z.string().min(1),
|
|
180
|
+
/** Repository name, e.g. "skills" */
|
|
181
|
+
repo: import_zod2.z.string().min(1),
|
|
182
|
+
/** Folder path within the repo, e.g. "wix-cli/skills/wix-cli-dashboard-page" */
|
|
183
|
+
path: import_zod2.z.string().min(1),
|
|
184
|
+
/** Git ref (branch, tag, or SHA), e.g. "master" or "v1.2.0" */
|
|
185
|
+
ref: import_zod2.z.string().min(1)
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
// src/common/mcp.ts
|
|
189
|
+
var import_zod3 = require("zod");
|
|
177
190
|
var MCP_SERVERS_JSON_KEY = "mcpServers";
|
|
178
191
|
var MCPEntitySchema = TenantEntitySchema.extend({
|
|
179
192
|
/** Display name for the MCP entity (independent of the server key in config) */
|
|
180
|
-
name:
|
|
193
|
+
name: import_zod3.z.string().min(1),
|
|
181
194
|
/** Keyed MCP server config — top-level key is the server name, value is its config */
|
|
182
|
-
config:
|
|
195
|
+
config: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.unknown())
|
|
183
196
|
});
|
|
184
197
|
var CreateMcpInputSchema = MCPEntitySchema.omit({
|
|
185
198
|
id: true,
|
|
@@ -188,10 +201,10 @@ var CreateMcpInputSchema = MCPEntitySchema.omit({
|
|
|
188
201
|
deleted: true
|
|
189
202
|
});
|
|
190
203
|
var UpdateMcpInputSchema = CreateMcpInputSchema.partial();
|
|
191
|
-
var MCPServerConfigSchema =
|
|
204
|
+
var MCPServerConfigSchema = import_zod3.z.record(import_zod3.z.string(), import_zod3.z.unknown());
|
|
192
205
|
|
|
193
206
|
// src/common/models.ts
|
|
194
|
-
var
|
|
207
|
+
var import_zod4 = require("zod");
|
|
195
208
|
var ModelIds = /* @__PURE__ */ ((ModelIds2) => {
|
|
196
209
|
ModelIds2["CLAUDE_3_HAIKU_1_0"] = "CLAUDE_3_HAIKU_1_0";
|
|
197
210
|
ModelIds2["CLAUDE_3_OPUS_1_0"] = "CLAUDE_3_OPUS_1_0";
|
|
@@ -203,25 +216,29 @@ var ModelIds = /* @__PURE__ */ ((ModelIds2) => {
|
|
|
203
216
|
ModelIds2["CLAUDE_4_SONNET_1_0"] = "CLAUDE_4_SONNET_1_0";
|
|
204
217
|
return ModelIds2;
|
|
205
218
|
})(ModelIds || {});
|
|
206
|
-
var ModelIdsSchema =
|
|
207
|
-
var
|
|
219
|
+
var ModelIdsSchema = import_zod4.z.enum(ModelIds);
|
|
220
|
+
var nullToUndefined = (val) => val === null ? void 0 : val;
|
|
221
|
+
var ModelConfigSchema = import_zod4.z.object({
|
|
208
222
|
model: ModelIdsSchema,
|
|
209
|
-
temperature:
|
|
210
|
-
|
|
223
|
+
temperature: import_zod4.z.preprocess(
|
|
224
|
+
nullToUndefined,
|
|
225
|
+
import_zod4.z.number().min(0).max(1).optional()
|
|
226
|
+
),
|
|
227
|
+
maxTokens: import_zod4.z.preprocess(nullToUndefined, import_zod4.z.number().min(1).optional())
|
|
211
228
|
});
|
|
212
|
-
var ModelPricingSchema =
|
|
213
|
-
inputPer1M:
|
|
214
|
-
outputPer1M:
|
|
229
|
+
var ModelPricingSchema = import_zod4.z.object({
|
|
230
|
+
inputPer1M: import_zod4.z.number(),
|
|
231
|
+
outputPer1M: import_zod4.z.number()
|
|
215
232
|
});
|
|
216
|
-
var ModelSchema =
|
|
233
|
+
var ModelSchema = import_zod4.z.object({
|
|
217
234
|
/** AI Gateway model ID */
|
|
218
235
|
id: ModelIdsSchema,
|
|
219
236
|
/** Display name */
|
|
220
|
-
name:
|
|
237
|
+
name: import_zod4.z.string(),
|
|
221
238
|
/** Provider (always 'anthropic') */
|
|
222
|
-
provider:
|
|
239
|
+
provider: import_zod4.z.literal("anthropic"),
|
|
223
240
|
/** Provider's model identifier (e.g., "claude-3-5-sonnet-20241022") */
|
|
224
|
-
providerModelId:
|
|
241
|
+
providerModelId: import_zod4.z.string(),
|
|
225
242
|
/** Pricing per 1M tokens */
|
|
226
243
|
pricing: ModelPricingSchema
|
|
227
244
|
});
|
|
@@ -294,10 +311,10 @@ var TargetSchema = TenantEntitySchema.extend({
|
|
|
294
311
|
});
|
|
295
312
|
|
|
296
313
|
// src/target/agent.ts
|
|
297
|
-
var
|
|
314
|
+
var import_zod5 = require("zod");
|
|
298
315
|
var AgentSchema = TargetSchema.extend({
|
|
299
316
|
/** Command to run the agent */
|
|
300
|
-
runCommand:
|
|
317
|
+
runCommand: import_zod5.z.string(),
|
|
301
318
|
/** Optional model configuration override */
|
|
302
319
|
modelConfig: ModelConfigSchema.optional()
|
|
303
320
|
});
|
|
@@ -312,61 +329,51 @@ var UpdateAgentInputSchema = CreateAgentInputSchema.partial().extend({
|
|
|
312
329
|
});
|
|
313
330
|
|
|
314
331
|
// src/target/skill.ts
|
|
315
|
-
var
|
|
332
|
+
var import_zod6 = require("zod");
|
|
316
333
|
var SKILL_FOLDER_NAME_REGEX = /^[a-z0-9]+(-[a-z0-9]+)*$/;
|
|
317
334
|
var SEMVER_REGEX = /^\d+\.\d+\.\d+$/;
|
|
318
|
-
var SkillVersionOriginSchema =
|
|
335
|
+
var SkillVersionOriginSchema = import_zod6.z.enum(["manual", "pr", "master"]);
|
|
319
336
|
function isValidSkillFolderName(name) {
|
|
320
337
|
return typeof name === "string" && name.length > 0 && SKILL_FOLDER_NAME_REGEX.test(name.trim());
|
|
321
338
|
}
|
|
322
|
-
var SkillMetadataSchema =
|
|
323
|
-
name:
|
|
324
|
-
description:
|
|
325
|
-
allowedTools:
|
|
326
|
-
skills:
|
|
327
|
-
});
|
|
328
|
-
var GitHubSourceSchema = import_zod5.z.object({
|
|
329
|
-
/** GitHub org or user, e.g. "wix" */
|
|
330
|
-
owner: import_zod5.z.string().min(1),
|
|
331
|
-
/** Repository name, e.g. "skills" */
|
|
332
|
-
repo: import_zod5.z.string().min(1),
|
|
333
|
-
/** Folder path to the skill directory, e.g. "wix-cli/skills/wix-cli-dashboard-page" */
|
|
334
|
-
path: import_zod5.z.string().min(1),
|
|
335
|
-
/** Git ref (branch, tag, or SHA), e.g. "master" or "v1.2.0" */
|
|
336
|
-
ref: import_zod5.z.string().min(1)
|
|
339
|
+
var SkillMetadataSchema = import_zod6.z.object({
|
|
340
|
+
name: import_zod6.z.string(),
|
|
341
|
+
description: import_zod6.z.string(),
|
|
342
|
+
allowedTools: import_zod6.z.array(import_zod6.z.string()).optional(),
|
|
343
|
+
skills: import_zod6.z.array(import_zod6.z.string()).optional()
|
|
337
344
|
});
|
|
338
|
-
var SkillFileSchema =
|
|
345
|
+
var SkillFileSchema = import_zod6.z.object({
|
|
339
346
|
/** Relative path within the skill directory, e.g. "SKILL.md" or "references/API_SPEC.md" */
|
|
340
|
-
path:
|
|
347
|
+
path: import_zod6.z.string().min(1),
|
|
341
348
|
/** File content (UTF-8 text) */
|
|
342
|
-
content:
|
|
349
|
+
content: import_zod6.z.string()
|
|
343
350
|
});
|
|
344
|
-
var SkillVersionSchema =
|
|
345
|
-
id:
|
|
346
|
-
projectId:
|
|
347
|
-
skillId:
|
|
351
|
+
var SkillVersionSchema = import_zod6.z.object({
|
|
352
|
+
id: import_zod6.z.string(),
|
|
353
|
+
projectId: import_zod6.z.string(),
|
|
354
|
+
skillId: import_zod6.z.string(),
|
|
348
355
|
/** Semver string (e.g. "1.2.0") or Falcon fingerprint */
|
|
349
|
-
version:
|
|
356
|
+
version: import_zod6.z.string(),
|
|
350
357
|
/** How this version was created */
|
|
351
358
|
origin: SkillVersionOriginSchema,
|
|
352
359
|
/** Where this snapshot was taken from */
|
|
353
360
|
source: GitHubSourceSchema.optional(),
|
|
354
361
|
/** Frozen snapshot of all files in the skill directory */
|
|
355
|
-
files:
|
|
362
|
+
files: import_zod6.z.array(SkillFileSchema).optional(),
|
|
356
363
|
/** Optional notes about this version (changelog, reason for change) */
|
|
357
|
-
notes:
|
|
358
|
-
createdAt:
|
|
364
|
+
notes: import_zod6.z.string().optional(),
|
|
365
|
+
createdAt: import_zod6.z.string()
|
|
359
366
|
});
|
|
360
|
-
var CreateSkillVersionInputSchema =
|
|
367
|
+
var CreateSkillVersionInputSchema = import_zod6.z.object({
|
|
361
368
|
/** GitHub source to snapshot from. If not provided, uses the Skill's source. */
|
|
362
369
|
source: GitHubSourceSchema.optional(),
|
|
363
370
|
/** Version string for this snapshot (e.g. "1.0.0", "1.0.3"). */
|
|
364
|
-
version:
|
|
365
|
-
notes:
|
|
371
|
+
version: import_zod6.z.string().min(1),
|
|
372
|
+
notes: import_zod6.z.string().optional(),
|
|
366
373
|
/** Origin of this version. Defaults to 'manual' in backend. */
|
|
367
374
|
origin: SkillVersionOriginSchema.optional(),
|
|
368
375
|
/** Pre-edited files to store directly (bypasses GitHub fetch when provided) */
|
|
369
|
-
files:
|
|
376
|
+
files: import_zod6.z.array(SkillFileSchema).optional()
|
|
370
377
|
});
|
|
371
378
|
var SkillSchema = TargetSchema.extend({
|
|
372
379
|
/** GitHub source reference for live content fetching */
|
|
@@ -382,15 +389,15 @@ var SkillInputBaseSchema = SkillSchema.omit({
|
|
|
382
389
|
source: true
|
|
383
390
|
}).extend({
|
|
384
391
|
/** Optional - not stored on Skill; content description lives in SkillVersion */
|
|
385
|
-
description:
|
|
392
|
+
description: import_zod6.z.string().optional(),
|
|
386
393
|
/** GitHub source reference for live content fetching */
|
|
387
394
|
source: GitHubSourceSchema.optional()
|
|
388
395
|
});
|
|
389
|
-
var InitialVersionInputSchema =
|
|
390
|
-
files:
|
|
391
|
-
notes:
|
|
396
|
+
var InitialVersionInputSchema = import_zod6.z.object({
|
|
397
|
+
files: import_zod6.z.array(SkillFileSchema).optional(),
|
|
398
|
+
notes: import_zod6.z.string().optional(),
|
|
392
399
|
source: GitHubSourceSchema.optional(),
|
|
393
|
-
version:
|
|
400
|
+
version: import_zod6.z.string().optional(),
|
|
394
401
|
origin: SkillVersionOriginSchema.optional()
|
|
395
402
|
});
|
|
396
403
|
var CreateSkillInputSchema = SkillInputBaseSchema.extend({
|
|
@@ -408,10 +415,10 @@ var SkillWithLatestVersionSchema = SkillSchema.extend({
|
|
|
408
415
|
});
|
|
409
416
|
|
|
410
417
|
// src/target/skills-group.ts
|
|
411
|
-
var
|
|
418
|
+
var import_zod7 = require("zod");
|
|
412
419
|
var SkillsGroupSchema = TenantEntitySchema.extend({
|
|
413
420
|
/** IDs of skills in this group */
|
|
414
|
-
skillIds:
|
|
421
|
+
skillIds: import_zod7.z.array(import_zod7.z.string())
|
|
415
422
|
});
|
|
416
423
|
var CreateSkillsGroupInputSchema = SkillsGroupSchema.omit({
|
|
417
424
|
id: true,
|
|
@@ -422,10 +429,10 @@ var CreateSkillsGroupInputSchema = SkillsGroupSchema.omit({
|
|
|
422
429
|
var UpdateSkillsGroupInputSchema = CreateSkillsGroupInputSchema.partial();
|
|
423
430
|
|
|
424
431
|
// src/target/sub-agent.ts
|
|
425
|
-
var
|
|
432
|
+
var import_zod8 = require("zod");
|
|
426
433
|
var SubAgentSchema = TargetSchema.extend({
|
|
427
434
|
/** The full sub-agent markdown content (YAML frontmatter + body) */
|
|
428
|
-
subAgentMd:
|
|
435
|
+
subAgentMd: import_zod8.z.string()
|
|
429
436
|
});
|
|
430
437
|
var SubAgentInputBaseSchema = SubAgentSchema.omit({
|
|
431
438
|
id: true,
|
|
@@ -437,10 +444,10 @@ var CreateSubAgentInputSchema = SubAgentInputBaseSchema;
|
|
|
437
444
|
var UpdateSubAgentInputSchema = SubAgentInputBaseSchema.partial();
|
|
438
445
|
|
|
439
446
|
// src/test/index.ts
|
|
440
|
-
var
|
|
447
|
+
var import_zod19 = require("zod");
|
|
441
448
|
|
|
442
449
|
// src/test/base.ts
|
|
443
|
-
var
|
|
450
|
+
var import_zod9 = require("zod");
|
|
444
451
|
var TestType = /* @__PURE__ */ ((TestType2) => {
|
|
445
452
|
TestType2["LLM"] = "LLM";
|
|
446
453
|
TestType2["TOOL"] = "TOOL";
|
|
@@ -453,7 +460,7 @@ var TestType = /* @__PURE__ */ ((TestType2) => {
|
|
|
453
460
|
TestType2["PLAYWRIGHT_NL"] = "PLAYWRIGHT_NL";
|
|
454
461
|
return TestType2;
|
|
455
462
|
})(TestType || {});
|
|
456
|
-
var TestTypeSchema =
|
|
463
|
+
var TestTypeSchema = import_zod9.z.enum(TestType);
|
|
457
464
|
var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
|
|
458
465
|
TestImportance2["LOW"] = "low";
|
|
459
466
|
TestImportance2["MEDIUM"] = "medium";
|
|
@@ -461,153 +468,153 @@ var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
|
|
|
461
468
|
TestImportance2["CRITICAL"] = "critical";
|
|
462
469
|
return TestImportance2;
|
|
463
470
|
})(TestImportance || {});
|
|
464
|
-
var TestImportanceSchema =
|
|
465
|
-
var BaseTestSchema =
|
|
466
|
-
id:
|
|
471
|
+
var TestImportanceSchema = import_zod9.z.enum(TestImportance);
|
|
472
|
+
var BaseTestSchema = import_zod9.z.object({
|
|
473
|
+
id: import_zod9.z.string(),
|
|
467
474
|
type: TestTypeSchema,
|
|
468
|
-
name:
|
|
469
|
-
description:
|
|
475
|
+
name: import_zod9.z.string().min(3),
|
|
476
|
+
description: import_zod9.z.string().optional(),
|
|
470
477
|
importance: TestImportanceSchema.optional()
|
|
471
478
|
});
|
|
472
479
|
|
|
473
480
|
// src/test/llm.ts
|
|
474
|
-
var
|
|
481
|
+
var import_zod10 = require("zod");
|
|
475
482
|
var LLMTestSchema = BaseTestSchema.extend({
|
|
476
|
-
type:
|
|
483
|
+
type: import_zod10.z.literal("LLM" /* LLM */),
|
|
477
484
|
/** Maximum steps for the LLM to take */
|
|
478
|
-
maxSteps:
|
|
485
|
+
maxSteps: import_zod10.z.number().min(1).max(100),
|
|
479
486
|
/** Prompt to send to the evaluator */
|
|
480
|
-
prompt:
|
|
487
|
+
prompt: import_zod10.z.string().min(1),
|
|
481
488
|
/** ID of the evaluator agent to use */
|
|
482
|
-
evaluatorId:
|
|
489
|
+
evaluatorId: import_zod10.z.string()
|
|
483
490
|
});
|
|
484
491
|
|
|
485
492
|
// src/test/tool.ts
|
|
486
|
-
var
|
|
493
|
+
var import_zod11 = require("zod");
|
|
487
494
|
var ToolTestSchema = BaseTestSchema.extend({
|
|
488
|
-
type:
|
|
495
|
+
type: import_zod11.z.literal("TOOL" /* TOOL */),
|
|
489
496
|
/** Name of the tool that should be called */
|
|
490
|
-
toolName:
|
|
497
|
+
toolName: import_zod11.z.string().min(3),
|
|
491
498
|
/** Expected arguments for the tool call */
|
|
492
|
-
args:
|
|
499
|
+
args: import_zod11.z.record(import_zod11.z.string(), import_zod11.z.any()),
|
|
493
500
|
/** Expected content in the tool results */
|
|
494
|
-
resultsContent:
|
|
501
|
+
resultsContent: import_zod11.z.string()
|
|
495
502
|
});
|
|
496
503
|
|
|
497
504
|
// src/test/site-config.ts
|
|
498
|
-
var
|
|
505
|
+
var import_zod12 = require("zod");
|
|
499
506
|
var SiteConfigTestSchema = BaseTestSchema.extend({
|
|
500
|
-
type:
|
|
507
|
+
type: import_zod12.z.literal("SITE_CONFIG" /* SITE_CONFIG */),
|
|
501
508
|
/** URL to call */
|
|
502
|
-
url:
|
|
509
|
+
url: import_zod12.z.string().url(),
|
|
503
510
|
/** HTTP method */
|
|
504
|
-
method:
|
|
511
|
+
method: import_zod12.z.enum(["GET", "POST"]),
|
|
505
512
|
/** Request body (for POST) */
|
|
506
|
-
body:
|
|
513
|
+
body: import_zod12.z.string().optional(),
|
|
507
514
|
/** Expected HTTP status code */
|
|
508
|
-
expectedStatusCode:
|
|
515
|
+
expectedStatusCode: import_zod12.z.number().int().min(100).max(599),
|
|
509
516
|
/** Expected response content */
|
|
510
|
-
expectedResponse:
|
|
517
|
+
expectedResponse: import_zod12.z.string().optional(),
|
|
511
518
|
/** JMESPath expression to extract from response */
|
|
512
|
-
expectedResponseJMESPath:
|
|
519
|
+
expectedResponseJMESPath: import_zod12.z.string().optional()
|
|
513
520
|
});
|
|
514
521
|
|
|
515
522
|
// src/test/command-execution.ts
|
|
516
|
-
var
|
|
523
|
+
var import_zod13 = require("zod");
|
|
517
524
|
var AllowedCommands = [
|
|
518
525
|
"yarn install --no-immutable && yarn build",
|
|
519
526
|
"npm run build",
|
|
520
527
|
"yarn typecheck"
|
|
521
528
|
];
|
|
522
529
|
var CommandExecutionTestSchema = BaseTestSchema.extend({
|
|
523
|
-
type:
|
|
530
|
+
type: import_zod13.z.literal("COMMAND_EXECUTION" /* COMMAND_EXECUTION */),
|
|
524
531
|
/** Command to execute (must be in AllowedCommands) */
|
|
525
|
-
command:
|
|
532
|
+
command: import_zod13.z.string().refine((value) => AllowedCommands.includes(value), {
|
|
526
533
|
message: `Command must be one of: ${AllowedCommands.join(", ")}`
|
|
527
534
|
}),
|
|
528
535
|
/** Expected exit code (default: 0) */
|
|
529
|
-
expectedExitCode:
|
|
536
|
+
expectedExitCode: import_zod13.z.number().default(0).optional()
|
|
530
537
|
});
|
|
531
538
|
|
|
532
539
|
// src/test/file-presence.ts
|
|
533
|
-
var
|
|
540
|
+
var import_zod14 = require("zod");
|
|
534
541
|
var FilePresenceTestSchema = BaseTestSchema.extend({
|
|
535
|
-
type:
|
|
542
|
+
type: import_zod14.z.literal("FILE_PRESENCE" /* FILE_PRESENCE */),
|
|
536
543
|
/** Paths to check */
|
|
537
|
-
paths:
|
|
544
|
+
paths: import_zod14.z.array(import_zod14.z.string()),
|
|
538
545
|
/** Whether files should exist (true) or not exist (false) */
|
|
539
|
-
shouldExist:
|
|
546
|
+
shouldExist: import_zod14.z.boolean()
|
|
540
547
|
});
|
|
541
548
|
|
|
542
549
|
// src/test/file-content.ts
|
|
543
|
-
var
|
|
544
|
-
var FileContentCheckSchema =
|
|
550
|
+
var import_zod15 = require("zod");
|
|
551
|
+
var FileContentCheckSchema = import_zod15.z.object({
|
|
545
552
|
/** Strings that must be present in the file */
|
|
546
|
-
contains:
|
|
553
|
+
contains: import_zod15.z.array(import_zod15.z.string()).optional(),
|
|
547
554
|
/** Strings that must NOT be present in the file */
|
|
548
|
-
notContains:
|
|
555
|
+
notContains: import_zod15.z.array(import_zod15.z.string()).optional(),
|
|
549
556
|
/** Regex pattern the content must match */
|
|
550
|
-
matches:
|
|
557
|
+
matches: import_zod15.z.string().optional(),
|
|
551
558
|
/** JSON path checks for structured content */
|
|
552
|
-
jsonPath:
|
|
553
|
-
|
|
554
|
-
path:
|
|
555
|
-
value:
|
|
559
|
+
jsonPath: import_zod15.z.array(
|
|
560
|
+
import_zod15.z.object({
|
|
561
|
+
path: import_zod15.z.string(),
|
|
562
|
+
value: import_zod15.z.unknown()
|
|
556
563
|
})
|
|
557
564
|
).optional(),
|
|
558
565
|
/** Lines that should be added (for diff checking) */
|
|
559
|
-
added:
|
|
566
|
+
added: import_zod15.z.array(import_zod15.z.string()).optional(),
|
|
560
567
|
/** Lines that should be removed (for diff checking) */
|
|
561
|
-
removed:
|
|
568
|
+
removed: import_zod15.z.array(import_zod15.z.string()).optional()
|
|
562
569
|
});
|
|
563
570
|
var FileContentTestSchema = BaseTestSchema.extend({
|
|
564
|
-
type:
|
|
571
|
+
type: import_zod15.z.literal("FILE_CONTENT" /* FILE_CONTENT */),
|
|
565
572
|
/** Path to the file to check */
|
|
566
|
-
path:
|
|
573
|
+
path: import_zod15.z.string(),
|
|
567
574
|
/** Content checks to perform */
|
|
568
575
|
checks: FileContentCheckSchema
|
|
569
576
|
});
|
|
570
577
|
|
|
571
578
|
// src/test/build-check.ts
|
|
572
|
-
var
|
|
579
|
+
var import_zod16 = require("zod");
|
|
573
580
|
var BuildCheckTestSchema = BaseTestSchema.extend({
|
|
574
|
-
type:
|
|
581
|
+
type: import_zod16.z.literal("BUILD_CHECK" /* BUILD_CHECK */),
|
|
575
582
|
/** Build command to execute */
|
|
576
|
-
command:
|
|
583
|
+
command: import_zod16.z.string(),
|
|
577
584
|
/** Whether the build should succeed */
|
|
578
|
-
expectSuccess:
|
|
585
|
+
expectSuccess: import_zod16.z.boolean(),
|
|
579
586
|
/** Maximum allowed warnings (optional) */
|
|
580
|
-
allowedWarnings:
|
|
587
|
+
allowedWarnings: import_zod16.z.number().optional(),
|
|
581
588
|
/** Timeout in milliseconds */
|
|
582
|
-
timeout:
|
|
589
|
+
timeout: import_zod16.z.number().optional()
|
|
583
590
|
});
|
|
584
591
|
|
|
585
592
|
// src/test/vitest.ts
|
|
586
|
-
var
|
|
593
|
+
var import_zod17 = require("zod");
|
|
587
594
|
var VitestTestSchema = BaseTestSchema.extend({
|
|
588
|
-
type:
|
|
595
|
+
type: import_zod17.z.literal("VITEST" /* VITEST */),
|
|
589
596
|
/** Test file content */
|
|
590
|
-
testFile:
|
|
597
|
+
testFile: import_zod17.z.string(),
|
|
591
598
|
/** Name of the test file */
|
|
592
|
-
testFileName:
|
|
599
|
+
testFileName: import_zod17.z.string(),
|
|
593
600
|
/** Minimum pass rate required (0-100) */
|
|
594
|
-
minPassRate:
|
|
601
|
+
minPassRate: import_zod17.z.number().min(0).max(100)
|
|
595
602
|
});
|
|
596
603
|
|
|
597
604
|
// src/test/playwright-nl.ts
|
|
598
|
-
var
|
|
605
|
+
var import_zod18 = require("zod");
|
|
599
606
|
var PlaywrightNLTestSchema = BaseTestSchema.extend({
|
|
600
|
-
type:
|
|
607
|
+
type: import_zod18.z.literal("PLAYWRIGHT_NL" /* PLAYWRIGHT_NL */),
|
|
601
608
|
/** Natural language steps to execute */
|
|
602
|
-
steps:
|
|
609
|
+
steps: import_zod18.z.array(import_zod18.z.string()),
|
|
603
610
|
/** Expected outcome description */
|
|
604
|
-
expectedOutcome:
|
|
611
|
+
expectedOutcome: import_zod18.z.string(),
|
|
605
612
|
/** Timeout in milliseconds */
|
|
606
|
-
timeout:
|
|
613
|
+
timeout: import_zod18.z.number().optional()
|
|
607
614
|
});
|
|
608
615
|
|
|
609
616
|
// src/test/index.ts
|
|
610
|
-
var TestSchema =
|
|
617
|
+
var TestSchema = import_zod19.z.discriminatedUnion("type", [
|
|
611
618
|
LLMTestSchema,
|
|
612
619
|
ToolTestSchema,
|
|
613
620
|
SiteConfigTestSchema,
|
|
@@ -620,66 +627,66 @@ var TestSchema = import_zod18.z.discriminatedUnion("type", [
|
|
|
620
627
|
]);
|
|
621
628
|
|
|
622
629
|
// src/scenario/assertions.ts
|
|
623
|
-
var
|
|
624
|
-
var SkillWasCalledAssertionSchema =
|
|
625
|
-
type:
|
|
630
|
+
var import_zod20 = require("zod");
|
|
631
|
+
var SkillWasCalledAssertionSchema = import_zod20.z.object({
|
|
632
|
+
type: import_zod20.z.literal("skill_was_called"),
|
|
626
633
|
/** Names of the skills that must have been called (matched against trace Skill tool args) */
|
|
627
|
-
skillNames:
|
|
634
|
+
skillNames: import_zod20.z.array(import_zod20.z.string().min(1)).min(1)
|
|
628
635
|
});
|
|
629
|
-
var BuildPassedAssertionSchema =
|
|
630
|
-
type:
|
|
636
|
+
var BuildPassedAssertionSchema = import_zod20.z.object({
|
|
637
|
+
type: import_zod20.z.literal("build_passed"),
|
|
631
638
|
/** Command to run (default: "yarn build") */
|
|
632
|
-
command:
|
|
639
|
+
command: import_zod20.z.string().optional(),
|
|
633
640
|
/** Expected exit code (default: 0) */
|
|
634
|
-
expectedExitCode:
|
|
641
|
+
expectedExitCode: import_zod20.z.number().int().optional()
|
|
635
642
|
});
|
|
636
|
-
var LlmJudgeAssertionSchema =
|
|
637
|
-
type:
|
|
643
|
+
var LlmJudgeAssertionSchema = import_zod20.z.object({
|
|
644
|
+
type: import_zod20.z.literal("llm_judge"),
|
|
638
645
|
/** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{trace}} */
|
|
639
|
-
prompt:
|
|
646
|
+
prompt: import_zod20.z.string(),
|
|
640
647
|
/** Optional system prompt for the judge (default asks for JSON with score) */
|
|
641
|
-
systemPrompt:
|
|
648
|
+
systemPrompt: import_zod20.z.string().optional(),
|
|
642
649
|
/** Minimum score to pass (0-100, default 70) */
|
|
643
|
-
minScore:
|
|
650
|
+
minScore: import_zod20.z.number().int().min(0).max(100).optional(),
|
|
644
651
|
/** Model for the judge (e.g. claude-3-5-haiku) */
|
|
645
|
-
model:
|
|
646
|
-
maxTokens:
|
|
647
|
-
temperature:
|
|
652
|
+
model: import_zod20.z.string().optional(),
|
|
653
|
+
maxTokens: import_zod20.z.number().int().optional(),
|
|
654
|
+
temperature: import_zod20.z.number().min(0).max(1).optional()
|
|
648
655
|
});
|
|
649
|
-
var AssertionSchema =
|
|
656
|
+
var AssertionSchema = import_zod20.z.union([
|
|
650
657
|
SkillWasCalledAssertionSchema,
|
|
651
658
|
BuildPassedAssertionSchema,
|
|
652
659
|
LlmJudgeAssertionSchema
|
|
653
660
|
]);
|
|
654
661
|
|
|
655
662
|
// src/scenario/environment.ts
|
|
656
|
-
var
|
|
657
|
-
var LocalProjectConfigSchema =
|
|
663
|
+
var import_zod21 = require("zod");
|
|
664
|
+
var LocalProjectConfigSchema = import_zod21.z.object({
|
|
658
665
|
/** Template ID to use for the local project */
|
|
659
|
-
templateId:
|
|
666
|
+
templateId: import_zod21.z.string().optional(),
|
|
660
667
|
/** Files to create in the project */
|
|
661
|
-
files:
|
|
662
|
-
|
|
663
|
-
path:
|
|
664
|
-
content:
|
|
668
|
+
files: import_zod21.z.array(
|
|
669
|
+
import_zod21.z.object({
|
|
670
|
+
path: import_zod21.z.string().min(1),
|
|
671
|
+
content: import_zod21.z.string().min(1)
|
|
665
672
|
})
|
|
666
673
|
).optional()
|
|
667
674
|
});
|
|
668
|
-
var MetaSiteConfigSchema =
|
|
669
|
-
configurations:
|
|
670
|
-
|
|
671
|
-
name:
|
|
672
|
-
apiCalls:
|
|
673
|
-
|
|
674
|
-
url:
|
|
675
|
-
method:
|
|
676
|
-
body:
|
|
675
|
+
var MetaSiteConfigSchema = import_zod21.z.object({
|
|
676
|
+
configurations: import_zod21.z.array(
|
|
677
|
+
import_zod21.z.object({
|
|
678
|
+
name: import_zod21.z.string().min(1),
|
|
679
|
+
apiCalls: import_zod21.z.array(
|
|
680
|
+
import_zod21.z.object({
|
|
681
|
+
url: import_zod21.z.string().url(),
|
|
682
|
+
method: import_zod21.z.enum(["POST", "PUT"]),
|
|
683
|
+
body: import_zod21.z.string()
|
|
677
684
|
})
|
|
678
685
|
)
|
|
679
686
|
})
|
|
680
687
|
).optional()
|
|
681
688
|
});
|
|
682
|
-
var EnvironmentSchema =
|
|
689
|
+
var EnvironmentSchema = import_zod21.z.object({
|
|
683
690
|
/** Local project configuration */
|
|
684
691
|
localProject: LocalProjectConfigSchema.optional(),
|
|
685
692
|
/** Meta site configuration */
|
|
@@ -687,54 +694,54 @@ var EnvironmentSchema = import_zod20.z.object({
|
|
|
687
694
|
});
|
|
688
695
|
|
|
689
696
|
// src/scenario/test-scenario.ts
|
|
690
|
-
var
|
|
697
|
+
var import_zod23 = require("zod");
|
|
691
698
|
|
|
692
699
|
// src/assertion/assertion.ts
|
|
693
|
-
var
|
|
694
|
-
var AssertionTypeSchema =
|
|
700
|
+
var import_zod22 = require("zod");
|
|
701
|
+
var AssertionTypeSchema = import_zod22.z.enum([
|
|
695
702
|
"skill_was_called",
|
|
696
703
|
"build_passed",
|
|
697
704
|
"llm_judge"
|
|
698
705
|
]);
|
|
699
|
-
var AssertionParameterTypeSchema =
|
|
706
|
+
var AssertionParameterTypeSchema = import_zod22.z.enum([
|
|
700
707
|
"string",
|
|
701
708
|
"number",
|
|
702
709
|
"boolean"
|
|
703
710
|
]);
|
|
704
|
-
var AssertionParameterSchema =
|
|
711
|
+
var AssertionParameterSchema = import_zod22.z.object({
|
|
705
712
|
/** Parameter name (used as key in params object) */
|
|
706
|
-
name:
|
|
713
|
+
name: import_zod22.z.string().min(1),
|
|
707
714
|
/** Display label for the parameter */
|
|
708
|
-
label:
|
|
715
|
+
label: import_zod22.z.string().min(1),
|
|
709
716
|
/** Parameter type */
|
|
710
717
|
type: AssertionParameterTypeSchema,
|
|
711
718
|
/** Whether this parameter is required */
|
|
712
|
-
required:
|
|
719
|
+
required: import_zod22.z.boolean(),
|
|
713
720
|
/** Default value (optional, used when not provided) */
|
|
714
|
-
defaultValue:
|
|
721
|
+
defaultValue: import_zod22.z.union([import_zod22.z.string(), import_zod22.z.number(), import_zod22.z.boolean()]).optional(),
|
|
715
722
|
/** If true, parameter is hidden by default behind "Show advanced options" */
|
|
716
|
-
advanced:
|
|
723
|
+
advanced: import_zod22.z.boolean().optional()
|
|
717
724
|
});
|
|
718
|
-
var ScenarioAssertionLinkSchema =
|
|
725
|
+
var ScenarioAssertionLinkSchema = import_zod22.z.object({
|
|
719
726
|
/** ID of the assertion (can be system assertion like 'system:skill_was_called' or custom assertion UUID) */
|
|
720
|
-
assertionId:
|
|
727
|
+
assertionId: import_zod22.z.string(),
|
|
721
728
|
/** Parameter values for this assertion in this scenario */
|
|
722
|
-
params:
|
|
723
|
-
|
|
724
|
-
|
|
729
|
+
params: import_zod22.z.record(
|
|
730
|
+
import_zod22.z.string(),
|
|
731
|
+
import_zod22.z.union([import_zod22.z.string(), import_zod22.z.number(), import_zod22.z.boolean(), import_zod22.z.null()])
|
|
725
732
|
).optional()
|
|
726
733
|
});
|
|
727
|
-
var SkillWasCalledConfigSchema =
|
|
734
|
+
var SkillWasCalledConfigSchema = import_zod22.z.object({
|
|
728
735
|
/** Names of the skills that must have been called */
|
|
729
|
-
skillNames:
|
|
736
|
+
skillNames: import_zod22.z.array(import_zod22.z.string().min(1)).min(1)
|
|
730
737
|
});
|
|
731
|
-
var BuildPassedConfigSchema =
|
|
738
|
+
var BuildPassedConfigSchema = import_zod22.z.strictObject({
|
|
732
739
|
/** Command to run (default: "yarn build") */
|
|
733
|
-
command:
|
|
740
|
+
command: import_zod22.z.string().optional(),
|
|
734
741
|
/** Expected exit code (default: 0) */
|
|
735
|
-
expectedExitCode:
|
|
742
|
+
expectedExitCode: import_zod22.z.number().int().optional()
|
|
736
743
|
});
|
|
737
|
-
var LlmJudgeConfigSchema =
|
|
744
|
+
var LlmJudgeConfigSchema = import_zod22.z.object({
|
|
738
745
|
/**
|
|
739
746
|
* Prompt template with placeholders:
|
|
740
747
|
* - {{output}}: agent's final output
|
|
@@ -745,28 +752,28 @@ var LlmJudgeConfigSchema = import_zod21.z.object({
|
|
|
745
752
|
* - {{trace}}: step-by-step trace of tool calls
|
|
746
753
|
* - Custom parameters defined in the parameters array
|
|
747
754
|
*/
|
|
748
|
-
prompt:
|
|
755
|
+
prompt: import_zod22.z.string().min(1),
|
|
749
756
|
/** Optional system prompt for the judge */
|
|
750
|
-
systemPrompt:
|
|
757
|
+
systemPrompt: import_zod22.z.string().optional(),
|
|
751
758
|
/** Minimum score to pass (0-100, default 70) */
|
|
752
|
-
minScore:
|
|
759
|
+
minScore: import_zod22.z.number().int().min(0).max(100).optional(),
|
|
753
760
|
/** Model for the judge (e.g. claude-3-5-haiku-20241022) */
|
|
754
|
-
model:
|
|
761
|
+
model: import_zod22.z.string().optional(),
|
|
755
762
|
/** Max output tokens */
|
|
756
|
-
maxTokens:
|
|
763
|
+
maxTokens: import_zod22.z.number().int().optional(),
|
|
757
764
|
/** Temperature (0-1) */
|
|
758
|
-
temperature:
|
|
765
|
+
temperature: import_zod22.z.number().min(0).max(1).optional(),
|
|
759
766
|
/** User-defined parameters for this assertion */
|
|
760
|
-
parameters:
|
|
767
|
+
parameters: import_zod22.z.array(AssertionParameterSchema).optional()
|
|
761
768
|
});
|
|
762
|
-
var AssertionConfigSchema =
|
|
769
|
+
var AssertionConfigSchema = import_zod22.z.union([
|
|
763
770
|
LlmJudgeConfigSchema,
|
|
764
771
|
// requires prompt - check first
|
|
765
772
|
SkillWasCalledConfigSchema,
|
|
766
773
|
// requires skillName
|
|
767
774
|
BuildPassedConfigSchema,
|
|
768
775
|
// all optional, uses strictObject to reject unknown keys
|
|
769
|
-
|
|
776
|
+
import_zod22.z.object({})
|
|
770
777
|
// fallback empty config
|
|
771
778
|
]);
|
|
772
779
|
var CustomAssertionSchema = TenantEntitySchema.extend({
|
|
@@ -811,23 +818,23 @@ function getLlmJudgeConfig(assertion) {
|
|
|
811
818
|
}
|
|
812
819
|
|
|
813
820
|
// src/scenario/test-scenario.ts
|
|
814
|
-
var ExpectedFileSchema =
|
|
821
|
+
var ExpectedFileSchema = import_zod23.z.object({
|
|
815
822
|
/** Relative path where the file should be created */
|
|
816
|
-
path:
|
|
823
|
+
path: import_zod23.z.string(),
|
|
817
824
|
/** Optional expected content */
|
|
818
|
-
content:
|
|
825
|
+
content: import_zod23.z.string().optional()
|
|
819
826
|
});
|
|
820
827
|
var TestScenarioSchema = TenantEntitySchema.extend({
|
|
821
828
|
/** The prompt sent to the agent to trigger the task */
|
|
822
|
-
triggerPrompt:
|
|
829
|
+
triggerPrompt: import_zod23.z.string().min(10),
|
|
823
830
|
/** ID of the template to use for this scenario (null = no template) */
|
|
824
|
-
templateId:
|
|
831
|
+
templateId: import_zod23.z.string().nullish(),
|
|
825
832
|
/** Inline assertions to evaluate for this scenario (legacy) */
|
|
826
|
-
assertions:
|
|
833
|
+
assertions: import_zod23.z.array(AssertionSchema).optional(),
|
|
827
834
|
/** IDs of saved assertions to evaluate (from assertions table) - legacy, use assertionLinks */
|
|
828
|
-
assertionIds:
|
|
835
|
+
assertionIds: import_zod23.z.array(import_zod23.z.string()).optional(),
|
|
829
836
|
/** Linked assertions with per-scenario parameter values */
|
|
830
|
-
assertionLinks:
|
|
837
|
+
assertionLinks: import_zod23.z.array(ScenarioAssertionLinkSchema).optional()
|
|
831
838
|
});
|
|
832
839
|
var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
|
|
833
840
|
id: true,
|
|
@@ -838,10 +845,10 @@ var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
|
|
|
838
845
|
var UpdateTestScenarioInputSchema = CreateTestScenarioInputSchema.partial();
|
|
839
846
|
|
|
840
847
|
// src/suite/test-suite.ts
|
|
841
|
-
var
|
|
848
|
+
var import_zod24 = require("zod");
|
|
842
849
|
var TestSuiteSchema = TenantEntitySchema.extend({
|
|
843
850
|
/** IDs of test scenarios in this suite */
|
|
844
|
-
scenarioIds:
|
|
851
|
+
scenarioIds: import_zod24.z.array(import_zod24.z.string())
|
|
845
852
|
});
|
|
846
853
|
var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
|
|
847
854
|
id: true,
|
|
@@ -852,21 +859,21 @@ var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
|
|
|
852
859
|
var UpdateTestSuiteInputSchema = CreateTestSuiteInputSchema.partial();
|
|
853
860
|
|
|
854
861
|
// src/evaluation/metrics.ts
|
|
855
|
-
var
|
|
856
|
-
var TokenUsageSchema =
|
|
857
|
-
prompt:
|
|
858
|
-
completion:
|
|
859
|
-
total:
|
|
860
|
-
});
|
|
861
|
-
var EvalMetricsSchema =
|
|
862
|
-
totalAssertions:
|
|
863
|
-
passed:
|
|
864
|
-
failed:
|
|
865
|
-
skipped:
|
|
866
|
-
errors:
|
|
867
|
-
passRate:
|
|
868
|
-
avgDuration:
|
|
869
|
-
totalDuration:
|
|
862
|
+
var import_zod25 = require("zod");
|
|
863
|
+
var TokenUsageSchema = import_zod25.z.object({
|
|
864
|
+
prompt: import_zod25.z.number(),
|
|
865
|
+
completion: import_zod25.z.number(),
|
|
866
|
+
total: import_zod25.z.number()
|
|
867
|
+
});
|
|
868
|
+
var EvalMetricsSchema = import_zod25.z.object({
|
|
869
|
+
totalAssertions: import_zod25.z.number(),
|
|
870
|
+
passed: import_zod25.z.number(),
|
|
871
|
+
failed: import_zod25.z.number(),
|
|
872
|
+
skipped: import_zod25.z.number(),
|
|
873
|
+
errors: import_zod25.z.number(),
|
|
874
|
+
passRate: import_zod25.z.number(),
|
|
875
|
+
avgDuration: import_zod25.z.number(),
|
|
876
|
+
totalDuration: import_zod25.z.number()
|
|
870
877
|
});
|
|
871
878
|
var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
872
879
|
EvalStatus2["PENDING"] = "pending";
|
|
@@ -876,7 +883,7 @@ var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
|
876
883
|
EvalStatus2["CANCELLED"] = "cancelled";
|
|
877
884
|
return EvalStatus2;
|
|
878
885
|
})(EvalStatus || {});
|
|
879
|
-
var EvalStatusSchema =
|
|
886
|
+
var EvalStatusSchema = import_zod25.z.enum(EvalStatus);
|
|
880
887
|
var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
|
|
881
888
|
LLMStepType2["COMPLETION"] = "completion";
|
|
882
889
|
LLMStepType2["TOOL_USE"] = "tool_use";
|
|
@@ -884,52 +891,52 @@ var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
|
|
|
884
891
|
LLMStepType2["THINKING"] = "thinking";
|
|
885
892
|
return LLMStepType2;
|
|
886
893
|
})(LLMStepType || {});
|
|
887
|
-
var LLMTraceStepSchema =
|
|
888
|
-
id:
|
|
889
|
-
stepNumber:
|
|
890
|
-
type:
|
|
891
|
-
model:
|
|
892
|
-
provider:
|
|
893
|
-
startedAt:
|
|
894
|
-
durationMs:
|
|
894
|
+
var LLMTraceStepSchema = import_zod25.z.object({
|
|
895
|
+
id: import_zod25.z.string(),
|
|
896
|
+
stepNumber: import_zod25.z.number(),
|
|
897
|
+
type: import_zod25.z.enum(LLMStepType),
|
|
898
|
+
model: import_zod25.z.string(),
|
|
899
|
+
provider: import_zod25.z.string(),
|
|
900
|
+
startedAt: import_zod25.z.string(),
|
|
901
|
+
durationMs: import_zod25.z.number(),
|
|
895
902
|
tokenUsage: TokenUsageSchema,
|
|
896
|
-
costUsd:
|
|
897
|
-
toolName:
|
|
898
|
-
toolArguments:
|
|
899
|
-
inputPreview:
|
|
900
|
-
outputPreview:
|
|
901
|
-
success:
|
|
902
|
-
error:
|
|
903
|
-
});
|
|
904
|
-
var LLMBreakdownStatsSchema =
|
|
905
|
-
count:
|
|
906
|
-
durationMs:
|
|
907
|
-
tokens:
|
|
908
|
-
costUsd:
|
|
909
|
-
});
|
|
910
|
-
var LLMTraceSummarySchema =
|
|
911
|
-
totalSteps:
|
|
912
|
-
totalDurationMs:
|
|
903
|
+
costUsd: import_zod25.z.number(),
|
|
904
|
+
toolName: import_zod25.z.string().optional(),
|
|
905
|
+
toolArguments: import_zod25.z.string().optional(),
|
|
906
|
+
inputPreview: import_zod25.z.string().optional(),
|
|
907
|
+
outputPreview: import_zod25.z.string().optional(),
|
|
908
|
+
success: import_zod25.z.boolean(),
|
|
909
|
+
error: import_zod25.z.string().optional()
|
|
910
|
+
});
|
|
911
|
+
var LLMBreakdownStatsSchema = import_zod25.z.object({
|
|
912
|
+
count: import_zod25.z.number(),
|
|
913
|
+
durationMs: import_zod25.z.number(),
|
|
914
|
+
tokens: import_zod25.z.number(),
|
|
915
|
+
costUsd: import_zod25.z.number()
|
|
916
|
+
});
|
|
917
|
+
var LLMTraceSummarySchema = import_zod25.z.object({
|
|
918
|
+
totalSteps: import_zod25.z.number(),
|
|
919
|
+
totalDurationMs: import_zod25.z.number(),
|
|
913
920
|
totalTokens: TokenUsageSchema,
|
|
914
|
-
totalCostUsd:
|
|
915
|
-
stepTypeBreakdown:
|
|
916
|
-
modelBreakdown:
|
|
917
|
-
modelsUsed:
|
|
918
|
-
});
|
|
919
|
-
var LLMTraceSchema =
|
|
920
|
-
id:
|
|
921
|
-
steps:
|
|
921
|
+
totalCostUsd: import_zod25.z.number(),
|
|
922
|
+
stepTypeBreakdown: import_zod25.z.record(import_zod25.z.string(), LLMBreakdownStatsSchema).optional(),
|
|
923
|
+
modelBreakdown: import_zod25.z.record(import_zod25.z.string(), LLMBreakdownStatsSchema),
|
|
924
|
+
modelsUsed: import_zod25.z.array(import_zod25.z.string())
|
|
925
|
+
});
|
|
926
|
+
var LLMTraceSchema = import_zod25.z.object({
|
|
927
|
+
id: import_zod25.z.string(),
|
|
928
|
+
steps: import_zod25.z.array(LLMTraceStepSchema),
|
|
922
929
|
summary: LLMTraceSummarySchema
|
|
923
930
|
});
|
|
924
931
|
|
|
925
932
|
// src/evaluation/eval-result.ts
|
|
926
|
-
var
|
|
933
|
+
var import_zod28 = require("zod");
|
|
927
934
|
|
|
928
935
|
// src/evaluation/eval-run.ts
|
|
929
|
-
var
|
|
936
|
+
var import_zod27 = require("zod");
|
|
930
937
|
|
|
931
938
|
// src/evaluation/live-trace.ts
|
|
932
|
-
var
|
|
939
|
+
var import_zod26 = require("zod");
|
|
933
940
|
var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
934
941
|
LiveTraceEventType2["THINKING"] = "thinking";
|
|
935
942
|
LiveTraceEventType2["TOOL_USE"] = "tool_use";
|
|
@@ -943,37 +950,37 @@ var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
|
943
950
|
LiveTraceEventType2["USER"] = "user";
|
|
944
951
|
return LiveTraceEventType2;
|
|
945
952
|
})(LiveTraceEventType || {});
|
|
946
|
-
var LiveTraceEventSchema =
|
|
953
|
+
var LiveTraceEventSchema = import_zod26.z.object({
|
|
947
954
|
/** The evaluation run ID */
|
|
948
|
-
evalRunId:
|
|
955
|
+
evalRunId: import_zod26.z.string(),
|
|
949
956
|
/** The scenario ID being executed */
|
|
950
|
-
scenarioId:
|
|
957
|
+
scenarioId: import_zod26.z.string(),
|
|
951
958
|
/** The scenario name for display */
|
|
952
|
-
scenarioName:
|
|
959
|
+
scenarioName: import_zod26.z.string(),
|
|
953
960
|
/** The target ID (skill, agent, etc.) */
|
|
954
|
-
targetId:
|
|
961
|
+
targetId: import_zod26.z.string(),
|
|
955
962
|
/** The target name for display */
|
|
956
|
-
targetName:
|
|
963
|
+
targetName: import_zod26.z.string(),
|
|
957
964
|
/** Step number in the current scenario execution */
|
|
958
|
-
stepNumber:
|
|
965
|
+
stepNumber: import_zod26.z.number(),
|
|
959
966
|
/** Type of trace event */
|
|
960
|
-
type:
|
|
967
|
+
type: import_zod26.z.enum(LiveTraceEventType),
|
|
961
968
|
/** Tool name if this is a tool_use event */
|
|
962
|
-
toolName:
|
|
969
|
+
toolName: import_zod26.z.string().optional(),
|
|
963
970
|
/** Tool arguments preview (truncated JSON) */
|
|
964
|
-
toolArgs:
|
|
971
|
+
toolArgs: import_zod26.z.string().optional(),
|
|
965
972
|
/** Output preview (truncated text) */
|
|
966
|
-
outputPreview:
|
|
973
|
+
outputPreview: import_zod26.z.string().optional(),
|
|
967
974
|
/** File path for file operations */
|
|
968
|
-
filePath:
|
|
975
|
+
filePath: import_zod26.z.string().optional(),
|
|
969
976
|
/** Elapsed time in milliseconds for progress events */
|
|
970
|
-
elapsedMs:
|
|
977
|
+
elapsedMs: import_zod26.z.number().optional(),
|
|
971
978
|
/** Thinking/reasoning text from Claude */
|
|
972
|
-
thinking:
|
|
979
|
+
thinking: import_zod26.z.string().optional(),
|
|
973
980
|
/** Timestamp when this event occurred */
|
|
974
|
-
timestamp:
|
|
981
|
+
timestamp: import_zod26.z.string(),
|
|
975
982
|
/** Whether this is the final event for this scenario */
|
|
976
|
-
isComplete:
|
|
983
|
+
isComplete: import_zod26.z.boolean()
|
|
977
984
|
});
|
|
978
985
|
var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
|
|
979
986
|
function parseTraceEventLine(line) {
|
|
@@ -1001,14 +1008,14 @@ var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
|
|
|
1001
1008
|
TriggerType2["MANUAL"] = "MANUAL";
|
|
1002
1009
|
return TriggerType2;
|
|
1003
1010
|
})(TriggerType || {});
|
|
1004
|
-
var TriggerMetadataSchema =
|
|
1005
|
-
version:
|
|
1006
|
-
resourceUpdated:
|
|
1011
|
+
var TriggerMetadataSchema = import_zod27.z.object({
|
|
1012
|
+
version: import_zod27.z.string().optional(),
|
|
1013
|
+
resourceUpdated: import_zod27.z.array(import_zod27.z.string()).optional()
|
|
1007
1014
|
});
|
|
1008
|
-
var TriggerSchema =
|
|
1009
|
-
id:
|
|
1015
|
+
var TriggerSchema = import_zod27.z.object({
|
|
1016
|
+
id: import_zod27.z.string(),
|
|
1010
1017
|
metadata: TriggerMetadataSchema.optional(),
|
|
1011
|
-
type:
|
|
1018
|
+
type: import_zod27.z.enum(TriggerType)
|
|
1012
1019
|
});
|
|
1013
1020
|
var FailureCategory = /* @__PURE__ */ ((FailureCategory2) => {
|
|
1014
1021
|
FailureCategory2["MISSING_FILE"] = "missing_file";
|
|
@@ -1026,28 +1033,28 @@ var FailureSeverity = /* @__PURE__ */ ((FailureSeverity2) => {
|
|
|
1026
1033
|
FailureSeverity2["LOW"] = "low";
|
|
1027
1034
|
return FailureSeverity2;
|
|
1028
1035
|
})(FailureSeverity || {});
|
|
1029
|
-
var DiffLineTypeSchema =
|
|
1030
|
-
var DiffLineSchema =
|
|
1036
|
+
var DiffLineTypeSchema = import_zod27.z.enum(["added", "removed", "unchanged"]);
|
|
1037
|
+
var DiffLineSchema = import_zod27.z.object({
|
|
1031
1038
|
type: DiffLineTypeSchema,
|
|
1032
|
-
content:
|
|
1033
|
-
lineNumber:
|
|
1034
|
-
});
|
|
1035
|
-
var DiffContentSchema =
|
|
1036
|
-
path:
|
|
1037
|
-
expected:
|
|
1038
|
-
actual:
|
|
1039
|
-
diffLines:
|
|
1040
|
-
renamedFrom:
|
|
1041
|
-
});
|
|
1042
|
-
var CommandExecutionSchema =
|
|
1043
|
-
command:
|
|
1044
|
-
exitCode:
|
|
1045
|
-
output:
|
|
1046
|
-
duration:
|
|
1047
|
-
});
|
|
1048
|
-
var FileModificationSchema =
|
|
1049
|
-
path:
|
|
1050
|
-
action:
|
|
1039
|
+
content: import_zod27.z.string(),
|
|
1040
|
+
lineNumber: import_zod27.z.number()
|
|
1041
|
+
});
|
|
1042
|
+
var DiffContentSchema = import_zod27.z.object({
|
|
1043
|
+
path: import_zod27.z.string(),
|
|
1044
|
+
expected: import_zod27.z.string(),
|
|
1045
|
+
actual: import_zod27.z.string(),
|
|
1046
|
+
diffLines: import_zod27.z.array(DiffLineSchema),
|
|
1047
|
+
renamedFrom: import_zod27.z.string().optional()
|
|
1048
|
+
});
|
|
1049
|
+
var CommandExecutionSchema = import_zod27.z.object({
|
|
1050
|
+
command: import_zod27.z.string(),
|
|
1051
|
+
exitCode: import_zod27.z.number(),
|
|
1052
|
+
output: import_zod27.z.string().optional(),
|
|
1053
|
+
duration: import_zod27.z.number()
|
|
1054
|
+
});
|
|
1055
|
+
var FileModificationSchema = import_zod27.z.object({
|
|
1056
|
+
path: import_zod27.z.string(),
|
|
1057
|
+
action: import_zod27.z.enum(["created", "modified", "deleted"])
|
|
1051
1058
|
});
|
|
1052
1059
|
var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
|
|
1053
1060
|
TemplateFileStatus2["NEW"] = "new";
|
|
@@ -1055,81 +1062,81 @@ var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
|
|
|
1055
1062
|
TemplateFileStatus2["UNCHANGED"] = "unchanged";
|
|
1056
1063
|
return TemplateFileStatus2;
|
|
1057
1064
|
})(TemplateFileStatus || {});
|
|
1058
|
-
var TemplateFileSchema =
|
|
1065
|
+
var TemplateFileSchema = import_zod27.z.object({
|
|
1059
1066
|
/** Relative path within the template */
|
|
1060
|
-
path:
|
|
1067
|
+
path: import_zod27.z.string(),
|
|
1061
1068
|
/** Full file content after execution */
|
|
1062
|
-
content:
|
|
1069
|
+
content: import_zod27.z.string(),
|
|
1063
1070
|
/** File status (new, modified, unchanged) */
|
|
1064
|
-
status:
|
|
1065
|
-
});
|
|
1066
|
-
var ApiCallSchema =
|
|
1067
|
-
endpoint:
|
|
1068
|
-
tokensUsed:
|
|
1069
|
-
duration:
|
|
1070
|
-
});
|
|
1071
|
-
var ExecutionTraceSchema =
|
|
1072
|
-
commands:
|
|
1073
|
-
filesModified:
|
|
1074
|
-
apiCalls:
|
|
1075
|
-
totalDuration:
|
|
1076
|
-
});
|
|
1077
|
-
var FailureAnalysisSchema =
|
|
1078
|
-
category:
|
|
1079
|
-
severity:
|
|
1080
|
-
summary:
|
|
1081
|
-
details:
|
|
1082
|
-
rootCause:
|
|
1083
|
-
suggestedFix:
|
|
1084
|
-
relatedAssertions:
|
|
1085
|
-
codeSnippet:
|
|
1086
|
-
similarIssues:
|
|
1087
|
-
patternId:
|
|
1071
|
+
status: import_zod27.z.enum(["new", "modified", "unchanged"])
|
|
1072
|
+
});
|
|
1073
|
+
var ApiCallSchema = import_zod27.z.object({
|
|
1074
|
+
endpoint: import_zod27.z.string(),
|
|
1075
|
+
tokensUsed: import_zod27.z.number(),
|
|
1076
|
+
duration: import_zod27.z.number()
|
|
1077
|
+
});
|
|
1078
|
+
var ExecutionTraceSchema = import_zod27.z.object({
|
|
1079
|
+
commands: import_zod27.z.array(CommandExecutionSchema),
|
|
1080
|
+
filesModified: import_zod27.z.array(FileModificationSchema),
|
|
1081
|
+
apiCalls: import_zod27.z.array(ApiCallSchema),
|
|
1082
|
+
totalDuration: import_zod27.z.number()
|
|
1083
|
+
});
|
|
1084
|
+
var FailureAnalysisSchema = import_zod27.z.object({
|
|
1085
|
+
category: import_zod27.z.enum(FailureCategory),
|
|
1086
|
+
severity: import_zod27.z.enum(FailureSeverity),
|
|
1087
|
+
summary: import_zod27.z.string(),
|
|
1088
|
+
details: import_zod27.z.string(),
|
|
1089
|
+
rootCause: import_zod27.z.string(),
|
|
1090
|
+
suggestedFix: import_zod27.z.string(),
|
|
1091
|
+
relatedAssertions: import_zod27.z.array(import_zod27.z.string()),
|
|
1092
|
+
codeSnippet: import_zod27.z.string().optional(),
|
|
1093
|
+
similarIssues: import_zod27.z.array(import_zod27.z.string()).optional(),
|
|
1094
|
+
patternId: import_zod27.z.string().optional(),
|
|
1088
1095
|
// Extended fields for detailed debugging
|
|
1089
1096
|
diff: DiffContentSchema.optional(),
|
|
1090
1097
|
executionTrace: ExecutionTraceSchema.optional()
|
|
1091
1098
|
});
|
|
1092
1099
|
var EvalRunSchema = TenantEntitySchema.extend({
|
|
1093
1100
|
/** Agent ID for this run */
|
|
1094
|
-
agentId:
|
|
1101
|
+
agentId: import_zod27.z.string().optional(),
|
|
1095
1102
|
/** Skills group ID for this run */
|
|
1096
|
-
skillsGroupId:
|
|
1103
|
+
skillsGroupId: import_zod27.z.string().optional(),
|
|
1097
1104
|
/** Map of skillId to skillVersionId for this run */
|
|
1098
|
-
skillVersions:
|
|
1105
|
+
skillVersions: import_zod27.z.record(import_zod27.z.string(), import_zod27.z.string()).optional(),
|
|
1099
1106
|
/** Scenario IDs to run */
|
|
1100
|
-
scenarioIds:
|
|
1107
|
+
scenarioIds: import_zod27.z.array(import_zod27.z.string()),
|
|
1101
1108
|
/** Current status */
|
|
1102
1109
|
status: EvalStatusSchema,
|
|
1103
1110
|
/** Progress percentage (0-100) */
|
|
1104
|
-
progress:
|
|
1111
|
+
progress: import_zod27.z.number(),
|
|
1105
1112
|
/** Results for each scenario/target combination (lazy to break eval-result ↔ eval-run cycle) */
|
|
1106
|
-
results:
|
|
1113
|
+
results: import_zod27.z.array(import_zod27.z.lazy(() => EvalRunResultSchema)),
|
|
1107
1114
|
/** Aggregated metrics across all results */
|
|
1108
1115
|
aggregateMetrics: EvalMetricsSchema,
|
|
1109
1116
|
/** Failure analyses */
|
|
1110
|
-
failureAnalyses:
|
|
1117
|
+
failureAnalyses: import_zod27.z.array(FailureAnalysisSchema).optional(),
|
|
1111
1118
|
/** Aggregated LLM trace summary */
|
|
1112
1119
|
llmTraceSummary: LLMTraceSummarySchema.optional(),
|
|
1113
1120
|
/** What triggered this run */
|
|
1114
1121
|
trigger: TriggerSchema.optional(),
|
|
1115
1122
|
/** When the run started (set when evaluation is triggered) */
|
|
1116
|
-
startedAt:
|
|
1123
|
+
startedAt: import_zod27.z.string().optional(),
|
|
1117
1124
|
/** When the run completed */
|
|
1118
|
-
completedAt:
|
|
1125
|
+
completedAt: import_zod27.z.string().optional(),
|
|
1119
1126
|
/** Live trace events captured during execution (for playback on results page) */
|
|
1120
|
-
liveTraceEvents:
|
|
1127
|
+
liveTraceEvents: import_zod27.z.array(LiveTraceEventSchema).optional(),
|
|
1121
1128
|
/** Remote job ID for tracking execution in Dev Machines */
|
|
1122
|
-
jobId:
|
|
1129
|
+
jobId: import_zod27.z.string().optional(),
|
|
1123
1130
|
/** Remote job status from the Dev Machine API (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) */
|
|
1124
|
-
jobStatus:
|
|
1131
|
+
jobStatus: import_zod27.z.string().optional(),
|
|
1125
1132
|
/** Remote job error message if the job failed */
|
|
1126
|
-
jobError:
|
|
1133
|
+
jobError: import_zod27.z.string().optional(),
|
|
1127
1134
|
/** Timestamp of the last job status check */
|
|
1128
|
-
jobStatusCheckedAt:
|
|
1135
|
+
jobStatusCheckedAt: import_zod27.z.string().optional(),
|
|
1129
1136
|
/** MCP server IDs to enable for this run (optional) */
|
|
1130
|
-
mcpIds:
|
|
1137
|
+
mcpIds: import_zod27.z.array(import_zod27.z.string()).optional(),
|
|
1131
1138
|
/** Sub-agent IDs to enable for this run (optional) */
|
|
1132
|
-
subAgentIds:
|
|
1139
|
+
subAgentIds: import_zod27.z.array(import_zod27.z.string()).optional()
|
|
1133
1140
|
});
|
|
1134
1141
|
var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
1135
1142
|
id: true,
|
|
@@ -1142,28 +1149,28 @@ var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
|
1142
1149
|
startedAt: true,
|
|
1143
1150
|
completedAt: true
|
|
1144
1151
|
});
|
|
1145
|
-
var EvaluationProgressSchema =
|
|
1146
|
-
runId:
|
|
1147
|
-
targetId:
|
|
1148
|
-
totalScenarios:
|
|
1149
|
-
completedScenarios:
|
|
1150
|
-
scenarioProgress:
|
|
1151
|
-
|
|
1152
|
-
scenarioId:
|
|
1153
|
-
currentStep:
|
|
1154
|
-
error:
|
|
1152
|
+
var EvaluationProgressSchema = import_zod27.z.object({
|
|
1153
|
+
runId: import_zod27.z.string(),
|
|
1154
|
+
targetId: import_zod27.z.string(),
|
|
1155
|
+
totalScenarios: import_zod27.z.number(),
|
|
1156
|
+
completedScenarios: import_zod27.z.number(),
|
|
1157
|
+
scenarioProgress: import_zod27.z.array(
|
|
1158
|
+
import_zod27.z.object({
|
|
1159
|
+
scenarioId: import_zod27.z.string(),
|
|
1160
|
+
currentStep: import_zod27.z.string(),
|
|
1161
|
+
error: import_zod27.z.string().optional()
|
|
1155
1162
|
})
|
|
1156
1163
|
),
|
|
1157
|
-
createdAt:
|
|
1164
|
+
createdAt: import_zod27.z.number()
|
|
1158
1165
|
});
|
|
1159
|
-
var EvaluationLogSchema =
|
|
1160
|
-
runId:
|
|
1161
|
-
scenarioId:
|
|
1162
|
-
log:
|
|
1163
|
-
level:
|
|
1164
|
-
message:
|
|
1165
|
-
args:
|
|
1166
|
-
error:
|
|
1166
|
+
var EvaluationLogSchema = import_zod27.z.object({
|
|
1167
|
+
runId: import_zod27.z.string(),
|
|
1168
|
+
scenarioId: import_zod27.z.string(),
|
|
1169
|
+
log: import_zod27.z.object({
|
|
1170
|
+
level: import_zod27.z.enum(["info", "error", "debug"]),
|
|
1171
|
+
message: import_zod27.z.string().optional(),
|
|
1172
|
+
args: import_zod27.z.array(import_zod27.z.any()).optional(),
|
|
1173
|
+
error: import_zod27.z.string().optional()
|
|
1167
1174
|
})
|
|
1168
1175
|
});
|
|
1169
1176
|
var LLM_TIMEOUT = 12e4;
|
|
@@ -1176,95 +1183,95 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
|
|
|
1176
1183
|
AssertionResultStatus2["ERROR"] = "error";
|
|
1177
1184
|
return AssertionResultStatus2;
|
|
1178
1185
|
})(AssertionResultStatus || {});
|
|
1179
|
-
var AssertionResultSchema =
|
|
1180
|
-
id:
|
|
1181
|
-
assertionId:
|
|
1182
|
-
assertionType:
|
|
1183
|
-
assertionName:
|
|
1184
|
-
status:
|
|
1185
|
-
message:
|
|
1186
|
-
expected:
|
|
1187
|
-
actual:
|
|
1188
|
-
duration:
|
|
1189
|
-
details:
|
|
1190
|
-
llmTraceSteps:
|
|
1191
|
-
});
|
|
1192
|
-
var EvalRunResultSchema =
|
|
1193
|
-
id:
|
|
1194
|
-
targetId:
|
|
1195
|
-
targetName:
|
|
1186
|
+
var AssertionResultSchema = import_zod28.z.object({
|
|
1187
|
+
id: import_zod28.z.string(),
|
|
1188
|
+
assertionId: import_zod28.z.string(),
|
|
1189
|
+
assertionType: import_zod28.z.string(),
|
|
1190
|
+
assertionName: import_zod28.z.string(),
|
|
1191
|
+
status: import_zod28.z.enum(AssertionResultStatus),
|
|
1192
|
+
message: import_zod28.z.string().optional(),
|
|
1193
|
+
expected: import_zod28.z.string().optional(),
|
|
1194
|
+
actual: import_zod28.z.string().optional(),
|
|
1195
|
+
duration: import_zod28.z.number().optional(),
|
|
1196
|
+
details: import_zod28.z.record(import_zod28.z.string(), import_zod28.z.unknown()).optional(),
|
|
1197
|
+
llmTraceSteps: import_zod28.z.array(LLMTraceStepSchema).optional()
|
|
1198
|
+
});
|
|
1199
|
+
var EvalRunResultSchema = import_zod28.z.object({
|
|
1200
|
+
id: import_zod28.z.string(),
|
|
1201
|
+
targetId: import_zod28.z.string(),
|
|
1202
|
+
targetName: import_zod28.z.string().optional(),
|
|
1196
1203
|
/** SkillVersion ID used for this evaluation (for version tracking) */
|
|
1197
|
-
skillVersionId:
|
|
1204
|
+
skillVersionId: import_zod28.z.string().optional(),
|
|
1198
1205
|
/** SkillVersion semver string (e.g., "1.0.0", "1.2.3") for display */
|
|
1199
|
-
skillVersion:
|
|
1200
|
-
scenarioId:
|
|
1201
|
-
scenarioName:
|
|
1206
|
+
skillVersion: import_zod28.z.string().optional(),
|
|
1207
|
+
scenarioId: import_zod28.z.string(),
|
|
1208
|
+
scenarioName: import_zod28.z.string(),
|
|
1202
1209
|
modelConfig: ModelConfigSchema.optional(),
|
|
1203
|
-
assertionResults:
|
|
1210
|
+
assertionResults: import_zod28.z.array(AssertionResultSchema),
|
|
1204
1211
|
metrics: EvalMetricsSchema.optional(),
|
|
1205
|
-
passed:
|
|
1206
|
-
failed:
|
|
1207
|
-
passRate:
|
|
1208
|
-
duration:
|
|
1209
|
-
outputText:
|
|
1210
|
-
files:
|
|
1211
|
-
fileDiffs:
|
|
1212
|
+
passed: import_zod28.z.number(),
|
|
1213
|
+
failed: import_zod28.z.number(),
|
|
1214
|
+
passRate: import_zod28.z.number(),
|
|
1215
|
+
duration: import_zod28.z.number(),
|
|
1216
|
+
outputText: import_zod28.z.string().optional(),
|
|
1217
|
+
files: import_zod28.z.array(ExpectedFileSchema).optional(),
|
|
1218
|
+
fileDiffs: import_zod28.z.array(DiffContentSchema).optional(),
|
|
1212
1219
|
/** Full template files after execution with status indicators */
|
|
1213
|
-
templateFiles:
|
|
1214
|
-
startedAt:
|
|
1215
|
-
completedAt:
|
|
1220
|
+
templateFiles: import_zod28.z.array(TemplateFileSchema).optional(),
|
|
1221
|
+
startedAt: import_zod28.z.string().optional(),
|
|
1222
|
+
completedAt: import_zod28.z.string().optional(),
|
|
1216
1223
|
llmTrace: LLMTraceSchema.optional()
|
|
1217
1224
|
});
|
|
1218
|
-
var PromptResultSchema =
|
|
1219
|
-
text:
|
|
1220
|
-
files:
|
|
1221
|
-
finishReason:
|
|
1222
|
-
reasoning:
|
|
1223
|
-
reasoningDetails:
|
|
1224
|
-
toolCalls:
|
|
1225
|
-
toolResults:
|
|
1226
|
-
warnings:
|
|
1227
|
-
sources:
|
|
1228
|
-
steps:
|
|
1229
|
-
generationTimeMs:
|
|
1230
|
-
prompt:
|
|
1231
|
-
systemPrompt:
|
|
1232
|
-
usage:
|
|
1233
|
-
totalTokens:
|
|
1234
|
-
totalMicrocentsSpent:
|
|
1225
|
+
var PromptResultSchema = import_zod28.z.object({
|
|
1226
|
+
text: import_zod28.z.string(),
|
|
1227
|
+
files: import_zod28.z.array(import_zod28.z.unknown()).optional(),
|
|
1228
|
+
finishReason: import_zod28.z.string().optional(),
|
|
1229
|
+
reasoning: import_zod28.z.string().optional(),
|
|
1230
|
+
reasoningDetails: import_zod28.z.unknown().optional(),
|
|
1231
|
+
toolCalls: import_zod28.z.array(import_zod28.z.unknown()).optional(),
|
|
1232
|
+
toolResults: import_zod28.z.array(import_zod28.z.unknown()).optional(),
|
|
1233
|
+
warnings: import_zod28.z.array(import_zod28.z.unknown()).optional(),
|
|
1234
|
+
sources: import_zod28.z.array(import_zod28.z.unknown()).optional(),
|
|
1235
|
+
steps: import_zod28.z.array(import_zod28.z.unknown()),
|
|
1236
|
+
generationTimeMs: import_zod28.z.number(),
|
|
1237
|
+
prompt: import_zod28.z.string(),
|
|
1238
|
+
systemPrompt: import_zod28.z.string(),
|
|
1239
|
+
usage: import_zod28.z.object({
|
|
1240
|
+
totalTokens: import_zod28.z.number().optional(),
|
|
1241
|
+
totalMicrocentsSpent: import_zod28.z.number().optional()
|
|
1235
1242
|
})
|
|
1236
1243
|
});
|
|
1237
|
-
var EvaluationResultSchema =
|
|
1238
|
-
id:
|
|
1239
|
-
runId:
|
|
1240
|
-
timestamp:
|
|
1244
|
+
var EvaluationResultSchema = import_zod28.z.object({
|
|
1245
|
+
id: import_zod28.z.string(),
|
|
1246
|
+
runId: import_zod28.z.string(),
|
|
1247
|
+
timestamp: import_zod28.z.number(),
|
|
1241
1248
|
promptResult: PromptResultSchema,
|
|
1242
|
-
testResults:
|
|
1243
|
-
tags:
|
|
1244
|
-
feedback:
|
|
1245
|
-
score:
|
|
1246
|
-
suiteId:
|
|
1247
|
-
});
|
|
1248
|
-
var LeanEvaluationResultSchema =
|
|
1249
|
-
id:
|
|
1250
|
-
runId:
|
|
1251
|
-
timestamp:
|
|
1252
|
-
tags:
|
|
1253
|
-
scenarioId:
|
|
1254
|
-
scenarioVersion:
|
|
1255
|
-
targetId:
|
|
1256
|
-
targetVersion:
|
|
1257
|
-
suiteId:
|
|
1258
|
-
score:
|
|
1259
|
-
time:
|
|
1260
|
-
microcentsSpent:
|
|
1249
|
+
testResults: import_zod28.z.array(import_zod28.z.unknown()),
|
|
1250
|
+
tags: import_zod28.z.array(import_zod28.z.string()).optional(),
|
|
1251
|
+
feedback: import_zod28.z.string().optional(),
|
|
1252
|
+
score: import_zod28.z.number(),
|
|
1253
|
+
suiteId: import_zod28.z.string().optional()
|
|
1254
|
+
});
|
|
1255
|
+
var LeanEvaluationResultSchema = import_zod28.z.object({
|
|
1256
|
+
id: import_zod28.z.string(),
|
|
1257
|
+
runId: import_zod28.z.string(),
|
|
1258
|
+
timestamp: import_zod28.z.number(),
|
|
1259
|
+
tags: import_zod28.z.array(import_zod28.z.string()).optional(),
|
|
1260
|
+
scenarioId: import_zod28.z.string(),
|
|
1261
|
+
scenarioVersion: import_zod28.z.number().optional(),
|
|
1262
|
+
targetId: import_zod28.z.string(),
|
|
1263
|
+
targetVersion: import_zod28.z.number().optional(),
|
|
1264
|
+
suiteId: import_zod28.z.string().optional(),
|
|
1265
|
+
score: import_zod28.z.number(),
|
|
1266
|
+
time: import_zod28.z.number().optional(),
|
|
1267
|
+
microcentsSpent: import_zod28.z.number().optional()
|
|
1261
1268
|
});
|
|
1262
1269
|
|
|
1263
1270
|
// src/project/project.ts
|
|
1264
|
-
var
|
|
1271
|
+
var import_zod29 = require("zod");
|
|
1265
1272
|
var ProjectSchema = BaseEntitySchema.extend({
|
|
1266
|
-
appId:
|
|
1267
|
-
appSecret:
|
|
1273
|
+
appId: import_zod29.z.string().optional().describe("The ID of the app in Dev Center"),
|
|
1274
|
+
appSecret: import_zod29.z.string().optional().describe("The secret of the app in Dev Center")
|
|
1268
1275
|
});
|
|
1269
1276
|
var CreateProjectInputSchema = ProjectSchema.omit({
|
|
1270
1277
|
id: true,
|
|
@@ -1275,10 +1282,9 @@ var CreateProjectInputSchema = ProjectSchema.omit({
|
|
|
1275
1282
|
var UpdateProjectInputSchema = CreateProjectInputSchema.partial();
|
|
1276
1283
|
|
|
1277
1284
|
// src/template/template.ts
|
|
1278
|
-
var import_zod29 = require("zod");
|
|
1279
1285
|
var TemplateSchema = TenantEntitySchema.extend({
|
|
1280
|
-
/**
|
|
1281
|
-
|
|
1286
|
+
/** GitHub source reference for fetching template files */
|
|
1287
|
+
source: GitHubSourceSchema.optional()
|
|
1282
1288
|
});
|
|
1283
1289
|
var CreateTemplateInputSchema = TemplateSchema.omit({
|
|
1284
1290
|
id: true,
|