@wix/evalforge-types 0.28.0 → 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +435 -433
- package/build/index.js.map +4 -4
- package/build/index.mjs +435 -433
- package/build/index.mjs.map +4 -4
- package/build/types/common/github-source.d.ts +12 -0
- package/build/types/common/index.d.ts +1 -0
- package/build/types/target/skill.d.ts +3 -11
- package/build/types/template/template.d.ts +20 -3
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -172,14 +172,27 @@ var TenantEntitySchema = BaseEntitySchema.extend({
|
|
|
172
172
|
projectId: import_zod.z.string()
|
|
173
173
|
});
|
|
174
174
|
|
|
175
|
-
// src/common/
|
|
175
|
+
// src/common/github-source.ts
|
|
176
176
|
var import_zod2 = require("zod");
|
|
177
|
+
var GitHubSourceSchema = import_zod2.z.object({
|
|
178
|
+
/** GitHub org or user, e.g. "wix" */
|
|
179
|
+
owner: import_zod2.z.string().min(1),
|
|
180
|
+
/** Repository name, e.g. "skills" */
|
|
181
|
+
repo: import_zod2.z.string().min(1),
|
|
182
|
+
/** Folder path within the repo, e.g. "wix-cli/skills/wix-cli-dashboard-page" */
|
|
183
|
+
path: import_zod2.z.string().min(1),
|
|
184
|
+
/** Git ref (branch, tag, or SHA), e.g. "master" or "v1.2.0" */
|
|
185
|
+
ref: import_zod2.z.string().min(1)
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
// src/common/mcp.ts
|
|
189
|
+
var import_zod3 = require("zod");
|
|
177
190
|
var MCP_SERVERS_JSON_KEY = "mcpServers";
|
|
178
191
|
var MCPEntitySchema = TenantEntitySchema.extend({
|
|
179
192
|
/** Display name for the MCP entity (independent of the server key in config) */
|
|
180
|
-
name:
|
|
193
|
+
name: import_zod3.z.string().min(1),
|
|
181
194
|
/** Keyed MCP server config — top-level key is the server name, value is its config */
|
|
182
|
-
config:
|
|
195
|
+
config: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.unknown())
|
|
183
196
|
});
|
|
184
197
|
var CreateMcpInputSchema = MCPEntitySchema.omit({
|
|
185
198
|
id: true,
|
|
@@ -188,10 +201,10 @@ var CreateMcpInputSchema = MCPEntitySchema.omit({
|
|
|
188
201
|
deleted: true
|
|
189
202
|
});
|
|
190
203
|
var UpdateMcpInputSchema = CreateMcpInputSchema.partial();
|
|
191
|
-
var MCPServerConfigSchema =
|
|
204
|
+
var MCPServerConfigSchema = import_zod3.z.record(import_zod3.z.string(), import_zod3.z.unknown());
|
|
192
205
|
|
|
193
206
|
// src/common/models.ts
|
|
194
|
-
var
|
|
207
|
+
var import_zod4 = require("zod");
|
|
195
208
|
var ModelIds = /* @__PURE__ */ ((ModelIds2) => {
|
|
196
209
|
ModelIds2["CLAUDE_3_HAIKU_1_0"] = "CLAUDE_3_HAIKU_1_0";
|
|
197
210
|
ModelIds2["CLAUDE_3_OPUS_1_0"] = "CLAUDE_3_OPUS_1_0";
|
|
@@ -203,29 +216,29 @@ var ModelIds = /* @__PURE__ */ ((ModelIds2) => {
|
|
|
203
216
|
ModelIds2["CLAUDE_4_SONNET_1_0"] = "CLAUDE_4_SONNET_1_0";
|
|
204
217
|
return ModelIds2;
|
|
205
218
|
})(ModelIds || {});
|
|
206
|
-
var ModelIdsSchema =
|
|
219
|
+
var ModelIdsSchema = import_zod4.z.enum(ModelIds);
|
|
207
220
|
var nullToUndefined = (val) => val === null ? void 0 : val;
|
|
208
|
-
var ModelConfigSchema =
|
|
221
|
+
var ModelConfigSchema = import_zod4.z.object({
|
|
209
222
|
model: ModelIdsSchema,
|
|
210
|
-
temperature:
|
|
223
|
+
temperature: import_zod4.z.preprocess(
|
|
211
224
|
nullToUndefined,
|
|
212
|
-
|
|
225
|
+
import_zod4.z.number().min(0).max(1).optional()
|
|
213
226
|
),
|
|
214
|
-
maxTokens:
|
|
227
|
+
maxTokens: import_zod4.z.preprocess(nullToUndefined, import_zod4.z.number().min(1).optional())
|
|
215
228
|
});
|
|
216
|
-
var ModelPricingSchema =
|
|
217
|
-
inputPer1M:
|
|
218
|
-
outputPer1M:
|
|
229
|
+
var ModelPricingSchema = import_zod4.z.object({
|
|
230
|
+
inputPer1M: import_zod4.z.number(),
|
|
231
|
+
outputPer1M: import_zod4.z.number()
|
|
219
232
|
});
|
|
220
|
-
var ModelSchema =
|
|
233
|
+
var ModelSchema = import_zod4.z.object({
|
|
221
234
|
/** AI Gateway model ID */
|
|
222
235
|
id: ModelIdsSchema,
|
|
223
236
|
/** Display name */
|
|
224
|
-
name:
|
|
237
|
+
name: import_zod4.z.string(),
|
|
225
238
|
/** Provider (always 'anthropic') */
|
|
226
|
-
provider:
|
|
239
|
+
provider: import_zod4.z.literal("anthropic"),
|
|
227
240
|
/** Provider's model identifier (e.g., "claude-3-5-sonnet-20241022") */
|
|
228
|
-
providerModelId:
|
|
241
|
+
providerModelId: import_zod4.z.string(),
|
|
229
242
|
/** Pricing per 1M tokens */
|
|
230
243
|
pricing: ModelPricingSchema
|
|
231
244
|
});
|
|
@@ -298,10 +311,10 @@ var TargetSchema = TenantEntitySchema.extend({
|
|
|
298
311
|
});
|
|
299
312
|
|
|
300
313
|
// src/target/agent.ts
|
|
301
|
-
var
|
|
314
|
+
var import_zod5 = require("zod");
|
|
302
315
|
var AgentSchema = TargetSchema.extend({
|
|
303
316
|
/** Command to run the agent */
|
|
304
|
-
runCommand:
|
|
317
|
+
runCommand: import_zod5.z.string(),
|
|
305
318
|
/** Optional model configuration override */
|
|
306
319
|
modelConfig: ModelConfigSchema.optional()
|
|
307
320
|
});
|
|
@@ -316,61 +329,51 @@ var UpdateAgentInputSchema = CreateAgentInputSchema.partial().extend({
|
|
|
316
329
|
});
|
|
317
330
|
|
|
318
331
|
// src/target/skill.ts
|
|
319
|
-
var
|
|
332
|
+
var import_zod6 = require("zod");
|
|
320
333
|
var SKILL_FOLDER_NAME_REGEX = /^[a-z0-9]+(-[a-z0-9]+)*$/;
|
|
321
334
|
var SEMVER_REGEX = /^\d+\.\d+\.\d+$/;
|
|
322
|
-
var SkillVersionOriginSchema =
|
|
335
|
+
var SkillVersionOriginSchema = import_zod6.z.enum(["manual", "pr", "master"]);
|
|
323
336
|
function isValidSkillFolderName(name) {
|
|
324
337
|
return typeof name === "string" && name.length > 0 && SKILL_FOLDER_NAME_REGEX.test(name.trim());
|
|
325
338
|
}
|
|
326
|
-
var SkillMetadataSchema =
|
|
327
|
-
name:
|
|
328
|
-
description:
|
|
329
|
-
allowedTools:
|
|
330
|
-
skills:
|
|
331
|
-
});
|
|
332
|
-
var GitHubSourceSchema = import_zod5.z.object({
|
|
333
|
-
/** GitHub org or user, e.g. "wix" */
|
|
334
|
-
owner: import_zod5.z.string().min(1),
|
|
335
|
-
/** Repository name, e.g. "skills" */
|
|
336
|
-
repo: import_zod5.z.string().min(1),
|
|
337
|
-
/** Folder path to the skill directory, e.g. "wix-cli/skills/wix-cli-dashboard-page" */
|
|
338
|
-
path: import_zod5.z.string().min(1),
|
|
339
|
-
/** Git ref (branch, tag, or SHA), e.g. "master" or "v1.2.0" */
|
|
340
|
-
ref: import_zod5.z.string().min(1)
|
|
339
|
+
var SkillMetadataSchema = import_zod6.z.object({
|
|
340
|
+
name: import_zod6.z.string(),
|
|
341
|
+
description: import_zod6.z.string(),
|
|
342
|
+
allowedTools: import_zod6.z.array(import_zod6.z.string()).optional(),
|
|
343
|
+
skills: import_zod6.z.array(import_zod6.z.string()).optional()
|
|
341
344
|
});
|
|
342
|
-
var SkillFileSchema =
|
|
345
|
+
var SkillFileSchema = import_zod6.z.object({
|
|
343
346
|
/** Relative path within the skill directory, e.g. "SKILL.md" or "references/API_SPEC.md" */
|
|
344
|
-
path:
|
|
347
|
+
path: import_zod6.z.string().min(1),
|
|
345
348
|
/** File content (UTF-8 text) */
|
|
346
|
-
content:
|
|
349
|
+
content: import_zod6.z.string()
|
|
347
350
|
});
|
|
348
|
-
var SkillVersionSchema =
|
|
349
|
-
id:
|
|
350
|
-
projectId:
|
|
351
|
-
skillId:
|
|
351
|
+
var SkillVersionSchema = import_zod6.z.object({
|
|
352
|
+
id: import_zod6.z.string(),
|
|
353
|
+
projectId: import_zod6.z.string(),
|
|
354
|
+
skillId: import_zod6.z.string(),
|
|
352
355
|
/** Semver string (e.g. "1.2.0") or Falcon fingerprint */
|
|
353
|
-
version:
|
|
356
|
+
version: import_zod6.z.string(),
|
|
354
357
|
/** How this version was created */
|
|
355
358
|
origin: SkillVersionOriginSchema,
|
|
356
359
|
/** Where this snapshot was taken from */
|
|
357
360
|
source: GitHubSourceSchema.optional(),
|
|
358
361
|
/** Frozen snapshot of all files in the skill directory */
|
|
359
|
-
files:
|
|
362
|
+
files: import_zod6.z.array(SkillFileSchema).optional(),
|
|
360
363
|
/** Optional notes about this version (changelog, reason for change) */
|
|
361
|
-
notes:
|
|
362
|
-
createdAt:
|
|
364
|
+
notes: import_zod6.z.string().optional(),
|
|
365
|
+
createdAt: import_zod6.z.string()
|
|
363
366
|
});
|
|
364
|
-
var CreateSkillVersionInputSchema =
|
|
367
|
+
var CreateSkillVersionInputSchema = import_zod6.z.object({
|
|
365
368
|
/** GitHub source to snapshot from. If not provided, uses the Skill's source. */
|
|
366
369
|
source: GitHubSourceSchema.optional(),
|
|
367
370
|
/** Version string for this snapshot (e.g. "1.0.0", "1.0.3"). */
|
|
368
|
-
version:
|
|
369
|
-
notes:
|
|
371
|
+
version: import_zod6.z.string().min(1),
|
|
372
|
+
notes: import_zod6.z.string().optional(),
|
|
370
373
|
/** Origin of this version. Defaults to 'manual' in backend. */
|
|
371
374
|
origin: SkillVersionOriginSchema.optional(),
|
|
372
375
|
/** Pre-edited files to store directly (bypasses GitHub fetch when provided) */
|
|
373
|
-
files:
|
|
376
|
+
files: import_zod6.z.array(SkillFileSchema).optional()
|
|
374
377
|
});
|
|
375
378
|
var SkillSchema = TargetSchema.extend({
|
|
376
379
|
/** GitHub source reference for live content fetching */
|
|
@@ -386,15 +389,15 @@ var SkillInputBaseSchema = SkillSchema.omit({
|
|
|
386
389
|
source: true
|
|
387
390
|
}).extend({
|
|
388
391
|
/** Optional - not stored on Skill; content description lives in SkillVersion */
|
|
389
|
-
description:
|
|
392
|
+
description: import_zod6.z.string().optional(),
|
|
390
393
|
/** GitHub source reference for live content fetching */
|
|
391
394
|
source: GitHubSourceSchema.optional()
|
|
392
395
|
});
|
|
393
|
-
var InitialVersionInputSchema =
|
|
394
|
-
files:
|
|
395
|
-
notes:
|
|
396
|
+
var InitialVersionInputSchema = import_zod6.z.object({
|
|
397
|
+
files: import_zod6.z.array(SkillFileSchema).optional(),
|
|
398
|
+
notes: import_zod6.z.string().optional(),
|
|
396
399
|
source: GitHubSourceSchema.optional(),
|
|
397
|
-
version:
|
|
400
|
+
version: import_zod6.z.string().optional(),
|
|
398
401
|
origin: SkillVersionOriginSchema.optional()
|
|
399
402
|
});
|
|
400
403
|
var CreateSkillInputSchema = SkillInputBaseSchema.extend({
|
|
@@ -412,10 +415,10 @@ var SkillWithLatestVersionSchema = SkillSchema.extend({
|
|
|
412
415
|
});
|
|
413
416
|
|
|
414
417
|
// src/target/skills-group.ts
|
|
415
|
-
var
|
|
418
|
+
var import_zod7 = require("zod");
|
|
416
419
|
var SkillsGroupSchema = TenantEntitySchema.extend({
|
|
417
420
|
/** IDs of skills in this group */
|
|
418
|
-
skillIds:
|
|
421
|
+
skillIds: import_zod7.z.array(import_zod7.z.string())
|
|
419
422
|
});
|
|
420
423
|
var CreateSkillsGroupInputSchema = SkillsGroupSchema.omit({
|
|
421
424
|
id: true,
|
|
@@ -426,10 +429,10 @@ var CreateSkillsGroupInputSchema = SkillsGroupSchema.omit({
|
|
|
426
429
|
var UpdateSkillsGroupInputSchema = CreateSkillsGroupInputSchema.partial();
|
|
427
430
|
|
|
428
431
|
// src/target/sub-agent.ts
|
|
429
|
-
var
|
|
432
|
+
var import_zod8 = require("zod");
|
|
430
433
|
var SubAgentSchema = TargetSchema.extend({
|
|
431
434
|
/** The full sub-agent markdown content (YAML frontmatter + body) */
|
|
432
|
-
subAgentMd:
|
|
435
|
+
subAgentMd: import_zod8.z.string()
|
|
433
436
|
});
|
|
434
437
|
var SubAgentInputBaseSchema = SubAgentSchema.omit({
|
|
435
438
|
id: true,
|
|
@@ -441,10 +444,10 @@ var CreateSubAgentInputSchema = SubAgentInputBaseSchema;
|
|
|
441
444
|
var UpdateSubAgentInputSchema = SubAgentInputBaseSchema.partial();
|
|
442
445
|
|
|
443
446
|
// src/test/index.ts
|
|
444
|
-
var
|
|
447
|
+
var import_zod19 = require("zod");
|
|
445
448
|
|
|
446
449
|
// src/test/base.ts
|
|
447
|
-
var
|
|
450
|
+
var import_zod9 = require("zod");
|
|
448
451
|
var TestType = /* @__PURE__ */ ((TestType2) => {
|
|
449
452
|
TestType2["LLM"] = "LLM";
|
|
450
453
|
TestType2["TOOL"] = "TOOL";
|
|
@@ -457,7 +460,7 @@ var TestType = /* @__PURE__ */ ((TestType2) => {
|
|
|
457
460
|
TestType2["PLAYWRIGHT_NL"] = "PLAYWRIGHT_NL";
|
|
458
461
|
return TestType2;
|
|
459
462
|
})(TestType || {});
|
|
460
|
-
var TestTypeSchema =
|
|
463
|
+
var TestTypeSchema = import_zod9.z.enum(TestType);
|
|
461
464
|
var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
|
|
462
465
|
TestImportance2["LOW"] = "low";
|
|
463
466
|
TestImportance2["MEDIUM"] = "medium";
|
|
@@ -465,153 +468,153 @@ var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
|
|
|
465
468
|
TestImportance2["CRITICAL"] = "critical";
|
|
466
469
|
return TestImportance2;
|
|
467
470
|
})(TestImportance || {});
|
|
468
|
-
var TestImportanceSchema =
|
|
469
|
-
var BaseTestSchema =
|
|
470
|
-
id:
|
|
471
|
+
var TestImportanceSchema = import_zod9.z.enum(TestImportance);
|
|
472
|
+
var BaseTestSchema = import_zod9.z.object({
|
|
473
|
+
id: import_zod9.z.string(),
|
|
471
474
|
type: TestTypeSchema,
|
|
472
|
-
name:
|
|
473
|
-
description:
|
|
475
|
+
name: import_zod9.z.string().min(3),
|
|
476
|
+
description: import_zod9.z.string().optional(),
|
|
474
477
|
importance: TestImportanceSchema.optional()
|
|
475
478
|
});
|
|
476
479
|
|
|
477
480
|
// src/test/llm.ts
|
|
478
|
-
var
|
|
481
|
+
var import_zod10 = require("zod");
|
|
479
482
|
var LLMTestSchema = BaseTestSchema.extend({
|
|
480
|
-
type:
|
|
483
|
+
type: import_zod10.z.literal("LLM" /* LLM */),
|
|
481
484
|
/** Maximum steps for the LLM to take */
|
|
482
|
-
maxSteps:
|
|
485
|
+
maxSteps: import_zod10.z.number().min(1).max(100),
|
|
483
486
|
/** Prompt to send to the evaluator */
|
|
484
|
-
prompt:
|
|
487
|
+
prompt: import_zod10.z.string().min(1),
|
|
485
488
|
/** ID of the evaluator agent to use */
|
|
486
|
-
evaluatorId:
|
|
489
|
+
evaluatorId: import_zod10.z.string()
|
|
487
490
|
});
|
|
488
491
|
|
|
489
492
|
// src/test/tool.ts
|
|
490
|
-
var
|
|
493
|
+
var import_zod11 = require("zod");
|
|
491
494
|
var ToolTestSchema = BaseTestSchema.extend({
|
|
492
|
-
type:
|
|
495
|
+
type: import_zod11.z.literal("TOOL" /* TOOL */),
|
|
493
496
|
/** Name of the tool that should be called */
|
|
494
|
-
toolName:
|
|
497
|
+
toolName: import_zod11.z.string().min(3),
|
|
495
498
|
/** Expected arguments for the tool call */
|
|
496
|
-
args:
|
|
499
|
+
args: import_zod11.z.record(import_zod11.z.string(), import_zod11.z.any()),
|
|
497
500
|
/** Expected content in the tool results */
|
|
498
|
-
resultsContent:
|
|
501
|
+
resultsContent: import_zod11.z.string()
|
|
499
502
|
});
|
|
500
503
|
|
|
501
504
|
// src/test/site-config.ts
|
|
502
|
-
var
|
|
505
|
+
var import_zod12 = require("zod");
|
|
503
506
|
var SiteConfigTestSchema = BaseTestSchema.extend({
|
|
504
|
-
type:
|
|
507
|
+
type: import_zod12.z.literal("SITE_CONFIG" /* SITE_CONFIG */),
|
|
505
508
|
/** URL to call */
|
|
506
|
-
url:
|
|
509
|
+
url: import_zod12.z.string().url(),
|
|
507
510
|
/** HTTP method */
|
|
508
|
-
method:
|
|
511
|
+
method: import_zod12.z.enum(["GET", "POST"]),
|
|
509
512
|
/** Request body (for POST) */
|
|
510
|
-
body:
|
|
513
|
+
body: import_zod12.z.string().optional(),
|
|
511
514
|
/** Expected HTTP status code */
|
|
512
|
-
expectedStatusCode:
|
|
515
|
+
expectedStatusCode: import_zod12.z.number().int().min(100).max(599),
|
|
513
516
|
/** Expected response content */
|
|
514
|
-
expectedResponse:
|
|
517
|
+
expectedResponse: import_zod12.z.string().optional(),
|
|
515
518
|
/** JMESPath expression to extract from response */
|
|
516
|
-
expectedResponseJMESPath:
|
|
519
|
+
expectedResponseJMESPath: import_zod12.z.string().optional()
|
|
517
520
|
});
|
|
518
521
|
|
|
519
522
|
// src/test/command-execution.ts
|
|
520
|
-
var
|
|
523
|
+
var import_zod13 = require("zod");
|
|
521
524
|
var AllowedCommands = [
|
|
522
525
|
"yarn install --no-immutable && yarn build",
|
|
523
526
|
"npm run build",
|
|
524
527
|
"yarn typecheck"
|
|
525
528
|
];
|
|
526
529
|
var CommandExecutionTestSchema = BaseTestSchema.extend({
|
|
527
|
-
type:
|
|
530
|
+
type: import_zod13.z.literal("COMMAND_EXECUTION" /* COMMAND_EXECUTION */),
|
|
528
531
|
/** Command to execute (must be in AllowedCommands) */
|
|
529
|
-
command:
|
|
532
|
+
command: import_zod13.z.string().refine((value) => AllowedCommands.includes(value), {
|
|
530
533
|
message: `Command must be one of: ${AllowedCommands.join(", ")}`
|
|
531
534
|
}),
|
|
532
535
|
/** Expected exit code (default: 0) */
|
|
533
|
-
expectedExitCode:
|
|
536
|
+
expectedExitCode: import_zod13.z.number().default(0).optional()
|
|
534
537
|
});
|
|
535
538
|
|
|
536
539
|
// src/test/file-presence.ts
|
|
537
|
-
var
|
|
540
|
+
var import_zod14 = require("zod");
|
|
538
541
|
var FilePresenceTestSchema = BaseTestSchema.extend({
|
|
539
|
-
type:
|
|
542
|
+
type: import_zod14.z.literal("FILE_PRESENCE" /* FILE_PRESENCE */),
|
|
540
543
|
/** Paths to check */
|
|
541
|
-
paths:
|
|
544
|
+
paths: import_zod14.z.array(import_zod14.z.string()),
|
|
542
545
|
/** Whether files should exist (true) or not exist (false) */
|
|
543
|
-
shouldExist:
|
|
546
|
+
shouldExist: import_zod14.z.boolean()
|
|
544
547
|
});
|
|
545
548
|
|
|
546
549
|
// src/test/file-content.ts
|
|
547
|
-
var
|
|
548
|
-
var FileContentCheckSchema =
|
|
550
|
+
var import_zod15 = require("zod");
|
|
551
|
+
var FileContentCheckSchema = import_zod15.z.object({
|
|
549
552
|
/** Strings that must be present in the file */
|
|
550
|
-
contains:
|
|
553
|
+
contains: import_zod15.z.array(import_zod15.z.string()).optional(),
|
|
551
554
|
/** Strings that must NOT be present in the file */
|
|
552
|
-
notContains:
|
|
555
|
+
notContains: import_zod15.z.array(import_zod15.z.string()).optional(),
|
|
553
556
|
/** Regex pattern the content must match */
|
|
554
|
-
matches:
|
|
557
|
+
matches: import_zod15.z.string().optional(),
|
|
555
558
|
/** JSON path checks for structured content */
|
|
556
|
-
jsonPath:
|
|
557
|
-
|
|
558
|
-
path:
|
|
559
|
-
value:
|
|
559
|
+
jsonPath: import_zod15.z.array(
|
|
560
|
+
import_zod15.z.object({
|
|
561
|
+
path: import_zod15.z.string(),
|
|
562
|
+
value: import_zod15.z.unknown()
|
|
560
563
|
})
|
|
561
564
|
).optional(),
|
|
562
565
|
/** Lines that should be added (for diff checking) */
|
|
563
|
-
added:
|
|
566
|
+
added: import_zod15.z.array(import_zod15.z.string()).optional(),
|
|
564
567
|
/** Lines that should be removed (for diff checking) */
|
|
565
|
-
removed:
|
|
568
|
+
removed: import_zod15.z.array(import_zod15.z.string()).optional()
|
|
566
569
|
});
|
|
567
570
|
var FileContentTestSchema = BaseTestSchema.extend({
|
|
568
|
-
type:
|
|
571
|
+
type: import_zod15.z.literal("FILE_CONTENT" /* FILE_CONTENT */),
|
|
569
572
|
/** Path to the file to check */
|
|
570
|
-
path:
|
|
573
|
+
path: import_zod15.z.string(),
|
|
571
574
|
/** Content checks to perform */
|
|
572
575
|
checks: FileContentCheckSchema
|
|
573
576
|
});
|
|
574
577
|
|
|
575
578
|
// src/test/build-check.ts
|
|
576
|
-
var
|
|
579
|
+
var import_zod16 = require("zod");
|
|
577
580
|
var BuildCheckTestSchema = BaseTestSchema.extend({
|
|
578
|
-
type:
|
|
581
|
+
type: import_zod16.z.literal("BUILD_CHECK" /* BUILD_CHECK */),
|
|
579
582
|
/** Build command to execute */
|
|
580
|
-
command:
|
|
583
|
+
command: import_zod16.z.string(),
|
|
581
584
|
/** Whether the build should succeed */
|
|
582
|
-
expectSuccess:
|
|
585
|
+
expectSuccess: import_zod16.z.boolean(),
|
|
583
586
|
/** Maximum allowed warnings (optional) */
|
|
584
|
-
allowedWarnings:
|
|
587
|
+
allowedWarnings: import_zod16.z.number().optional(),
|
|
585
588
|
/** Timeout in milliseconds */
|
|
586
|
-
timeout:
|
|
589
|
+
timeout: import_zod16.z.number().optional()
|
|
587
590
|
});
|
|
588
591
|
|
|
589
592
|
// src/test/vitest.ts
|
|
590
|
-
var
|
|
593
|
+
var import_zod17 = require("zod");
|
|
591
594
|
var VitestTestSchema = BaseTestSchema.extend({
|
|
592
|
-
type:
|
|
595
|
+
type: import_zod17.z.literal("VITEST" /* VITEST */),
|
|
593
596
|
/** Test file content */
|
|
594
|
-
testFile:
|
|
597
|
+
testFile: import_zod17.z.string(),
|
|
595
598
|
/** Name of the test file */
|
|
596
|
-
testFileName:
|
|
599
|
+
testFileName: import_zod17.z.string(),
|
|
597
600
|
/** Minimum pass rate required (0-100) */
|
|
598
|
-
minPassRate:
|
|
601
|
+
minPassRate: import_zod17.z.number().min(0).max(100)
|
|
599
602
|
});
|
|
600
603
|
|
|
601
604
|
// src/test/playwright-nl.ts
|
|
602
|
-
var
|
|
605
|
+
var import_zod18 = require("zod");
|
|
603
606
|
var PlaywrightNLTestSchema = BaseTestSchema.extend({
|
|
604
|
-
type:
|
|
607
|
+
type: import_zod18.z.literal("PLAYWRIGHT_NL" /* PLAYWRIGHT_NL */),
|
|
605
608
|
/** Natural language steps to execute */
|
|
606
|
-
steps:
|
|
609
|
+
steps: import_zod18.z.array(import_zod18.z.string()),
|
|
607
610
|
/** Expected outcome description */
|
|
608
|
-
expectedOutcome:
|
|
611
|
+
expectedOutcome: import_zod18.z.string(),
|
|
609
612
|
/** Timeout in milliseconds */
|
|
610
|
-
timeout:
|
|
613
|
+
timeout: import_zod18.z.number().optional()
|
|
611
614
|
});
|
|
612
615
|
|
|
613
616
|
// src/test/index.ts
|
|
614
|
-
var TestSchema =
|
|
617
|
+
var TestSchema = import_zod19.z.discriminatedUnion("type", [
|
|
615
618
|
LLMTestSchema,
|
|
616
619
|
ToolTestSchema,
|
|
617
620
|
SiteConfigTestSchema,
|
|
@@ -624,66 +627,66 @@ var TestSchema = import_zod18.z.discriminatedUnion("type", [
|
|
|
624
627
|
]);
|
|
625
628
|
|
|
626
629
|
// src/scenario/assertions.ts
|
|
627
|
-
var
|
|
628
|
-
var SkillWasCalledAssertionSchema =
|
|
629
|
-
type:
|
|
630
|
+
var import_zod20 = require("zod");
|
|
631
|
+
var SkillWasCalledAssertionSchema = import_zod20.z.object({
|
|
632
|
+
type: import_zod20.z.literal("skill_was_called"),
|
|
630
633
|
/** Names of the skills that must have been called (matched against trace Skill tool args) */
|
|
631
|
-
skillNames:
|
|
634
|
+
skillNames: import_zod20.z.array(import_zod20.z.string().min(1)).min(1)
|
|
632
635
|
});
|
|
633
|
-
var BuildPassedAssertionSchema =
|
|
634
|
-
type:
|
|
636
|
+
var BuildPassedAssertionSchema = import_zod20.z.object({
|
|
637
|
+
type: import_zod20.z.literal("build_passed"),
|
|
635
638
|
/** Command to run (default: "yarn build") */
|
|
636
|
-
command:
|
|
639
|
+
command: import_zod20.z.string().optional(),
|
|
637
640
|
/** Expected exit code (default: 0) */
|
|
638
|
-
expectedExitCode:
|
|
641
|
+
expectedExitCode: import_zod20.z.number().int().optional()
|
|
639
642
|
});
|
|
640
|
-
var LlmJudgeAssertionSchema =
|
|
641
|
-
type:
|
|
643
|
+
var LlmJudgeAssertionSchema = import_zod20.z.object({
|
|
644
|
+
type: import_zod20.z.literal("llm_judge"),
|
|
642
645
|
/** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{trace}} */
|
|
643
|
-
prompt:
|
|
646
|
+
prompt: import_zod20.z.string(),
|
|
644
647
|
/** Optional system prompt for the judge (default asks for JSON with score) */
|
|
645
|
-
systemPrompt:
|
|
648
|
+
systemPrompt: import_zod20.z.string().optional(),
|
|
646
649
|
/** Minimum score to pass (0-100, default 70) */
|
|
647
|
-
minScore:
|
|
650
|
+
minScore: import_zod20.z.number().int().min(0).max(100).optional(),
|
|
648
651
|
/** Model for the judge (e.g. claude-3-5-haiku) */
|
|
649
|
-
model:
|
|
650
|
-
maxTokens:
|
|
651
|
-
temperature:
|
|
652
|
+
model: import_zod20.z.string().optional(),
|
|
653
|
+
maxTokens: import_zod20.z.number().int().optional(),
|
|
654
|
+
temperature: import_zod20.z.number().min(0).max(1).optional()
|
|
652
655
|
});
|
|
653
|
-
var AssertionSchema =
|
|
656
|
+
var AssertionSchema = import_zod20.z.union([
|
|
654
657
|
SkillWasCalledAssertionSchema,
|
|
655
658
|
BuildPassedAssertionSchema,
|
|
656
659
|
LlmJudgeAssertionSchema
|
|
657
660
|
]);
|
|
658
661
|
|
|
659
662
|
// src/scenario/environment.ts
|
|
660
|
-
var
|
|
661
|
-
var LocalProjectConfigSchema =
|
|
663
|
+
var import_zod21 = require("zod");
|
|
664
|
+
var LocalProjectConfigSchema = import_zod21.z.object({
|
|
662
665
|
/** Template ID to use for the local project */
|
|
663
|
-
templateId:
|
|
666
|
+
templateId: import_zod21.z.string().optional(),
|
|
664
667
|
/** Files to create in the project */
|
|
665
|
-
files:
|
|
666
|
-
|
|
667
|
-
path:
|
|
668
|
-
content:
|
|
668
|
+
files: import_zod21.z.array(
|
|
669
|
+
import_zod21.z.object({
|
|
670
|
+
path: import_zod21.z.string().min(1),
|
|
671
|
+
content: import_zod21.z.string().min(1)
|
|
669
672
|
})
|
|
670
673
|
).optional()
|
|
671
674
|
});
|
|
672
|
-
var MetaSiteConfigSchema =
|
|
673
|
-
configurations:
|
|
674
|
-
|
|
675
|
-
name:
|
|
676
|
-
apiCalls:
|
|
677
|
-
|
|
678
|
-
url:
|
|
679
|
-
method:
|
|
680
|
-
body:
|
|
675
|
+
var MetaSiteConfigSchema = import_zod21.z.object({
|
|
676
|
+
configurations: import_zod21.z.array(
|
|
677
|
+
import_zod21.z.object({
|
|
678
|
+
name: import_zod21.z.string().min(1),
|
|
679
|
+
apiCalls: import_zod21.z.array(
|
|
680
|
+
import_zod21.z.object({
|
|
681
|
+
url: import_zod21.z.string().url(),
|
|
682
|
+
method: import_zod21.z.enum(["POST", "PUT"]),
|
|
683
|
+
body: import_zod21.z.string()
|
|
681
684
|
})
|
|
682
685
|
)
|
|
683
686
|
})
|
|
684
687
|
).optional()
|
|
685
688
|
});
|
|
686
|
-
var EnvironmentSchema =
|
|
689
|
+
var EnvironmentSchema = import_zod21.z.object({
|
|
687
690
|
/** Local project configuration */
|
|
688
691
|
localProject: LocalProjectConfigSchema.optional(),
|
|
689
692
|
/** Meta site configuration */
|
|
@@ -691,54 +694,54 @@ var EnvironmentSchema = import_zod20.z.object({
|
|
|
691
694
|
});
|
|
692
695
|
|
|
693
696
|
// src/scenario/test-scenario.ts
|
|
694
|
-
var
|
|
697
|
+
var import_zod23 = require("zod");
|
|
695
698
|
|
|
696
699
|
// src/assertion/assertion.ts
|
|
697
|
-
var
|
|
698
|
-
var AssertionTypeSchema =
|
|
700
|
+
var import_zod22 = require("zod");
|
|
701
|
+
var AssertionTypeSchema = import_zod22.z.enum([
|
|
699
702
|
"skill_was_called",
|
|
700
703
|
"build_passed",
|
|
701
704
|
"llm_judge"
|
|
702
705
|
]);
|
|
703
|
-
var AssertionParameterTypeSchema =
|
|
706
|
+
var AssertionParameterTypeSchema = import_zod22.z.enum([
|
|
704
707
|
"string",
|
|
705
708
|
"number",
|
|
706
709
|
"boolean"
|
|
707
710
|
]);
|
|
708
|
-
var AssertionParameterSchema =
|
|
711
|
+
var AssertionParameterSchema = import_zod22.z.object({
|
|
709
712
|
/** Parameter name (used as key in params object) */
|
|
710
|
-
name:
|
|
713
|
+
name: import_zod22.z.string().min(1),
|
|
711
714
|
/** Display label for the parameter */
|
|
712
|
-
label:
|
|
715
|
+
label: import_zod22.z.string().min(1),
|
|
713
716
|
/** Parameter type */
|
|
714
717
|
type: AssertionParameterTypeSchema,
|
|
715
718
|
/** Whether this parameter is required */
|
|
716
|
-
required:
|
|
719
|
+
required: import_zod22.z.boolean(),
|
|
717
720
|
/** Default value (optional, used when not provided) */
|
|
718
|
-
defaultValue:
|
|
721
|
+
defaultValue: import_zod22.z.union([import_zod22.z.string(), import_zod22.z.number(), import_zod22.z.boolean()]).optional(),
|
|
719
722
|
/** If true, parameter is hidden by default behind "Show advanced options" */
|
|
720
|
-
advanced:
|
|
723
|
+
advanced: import_zod22.z.boolean().optional()
|
|
721
724
|
});
|
|
722
|
-
var ScenarioAssertionLinkSchema =
|
|
725
|
+
var ScenarioAssertionLinkSchema = import_zod22.z.object({
|
|
723
726
|
/** ID of the assertion (can be system assertion like 'system:skill_was_called' or custom assertion UUID) */
|
|
724
|
-
assertionId:
|
|
727
|
+
assertionId: import_zod22.z.string(),
|
|
725
728
|
/** Parameter values for this assertion in this scenario */
|
|
726
|
-
params:
|
|
727
|
-
|
|
728
|
-
|
|
729
|
+
params: import_zod22.z.record(
|
|
730
|
+
import_zod22.z.string(),
|
|
731
|
+
import_zod22.z.union([import_zod22.z.string(), import_zod22.z.number(), import_zod22.z.boolean(), import_zod22.z.null()])
|
|
729
732
|
).optional()
|
|
730
733
|
});
|
|
731
|
-
var SkillWasCalledConfigSchema =
|
|
734
|
+
var SkillWasCalledConfigSchema = import_zod22.z.object({
|
|
732
735
|
/** Names of the skills that must have been called */
|
|
733
|
-
skillNames:
|
|
736
|
+
skillNames: import_zod22.z.array(import_zod22.z.string().min(1)).min(1)
|
|
734
737
|
});
|
|
735
|
-
var BuildPassedConfigSchema =
|
|
738
|
+
var BuildPassedConfigSchema = import_zod22.z.strictObject({
|
|
736
739
|
/** Command to run (default: "yarn build") */
|
|
737
|
-
command:
|
|
740
|
+
command: import_zod22.z.string().optional(),
|
|
738
741
|
/** Expected exit code (default: 0) */
|
|
739
|
-
expectedExitCode:
|
|
742
|
+
expectedExitCode: import_zod22.z.number().int().optional()
|
|
740
743
|
});
|
|
741
|
-
var LlmJudgeConfigSchema =
|
|
744
|
+
var LlmJudgeConfigSchema = import_zod22.z.object({
|
|
742
745
|
/**
|
|
743
746
|
* Prompt template with placeholders:
|
|
744
747
|
* - {{output}}: agent's final output
|
|
@@ -749,28 +752,28 @@ var LlmJudgeConfigSchema = import_zod21.z.object({
|
|
|
749
752
|
* - {{trace}}: step-by-step trace of tool calls
|
|
750
753
|
* - Custom parameters defined in the parameters array
|
|
751
754
|
*/
|
|
752
|
-
prompt:
|
|
755
|
+
prompt: import_zod22.z.string().min(1),
|
|
753
756
|
/** Optional system prompt for the judge */
|
|
754
|
-
systemPrompt:
|
|
757
|
+
systemPrompt: import_zod22.z.string().optional(),
|
|
755
758
|
/** Minimum score to pass (0-100, default 70) */
|
|
756
|
-
minScore:
|
|
759
|
+
minScore: import_zod22.z.number().int().min(0).max(100).optional(),
|
|
757
760
|
/** Model for the judge (e.g. claude-3-5-haiku-20241022) */
|
|
758
|
-
model:
|
|
761
|
+
model: import_zod22.z.string().optional(),
|
|
759
762
|
/** Max output tokens */
|
|
760
|
-
maxTokens:
|
|
763
|
+
maxTokens: import_zod22.z.number().int().optional(),
|
|
761
764
|
/** Temperature (0-1) */
|
|
762
|
-
temperature:
|
|
765
|
+
temperature: import_zod22.z.number().min(0).max(1).optional(),
|
|
763
766
|
/** User-defined parameters for this assertion */
|
|
764
|
-
parameters:
|
|
767
|
+
parameters: import_zod22.z.array(AssertionParameterSchema).optional()
|
|
765
768
|
});
|
|
766
|
-
var AssertionConfigSchema =
|
|
769
|
+
var AssertionConfigSchema = import_zod22.z.union([
|
|
767
770
|
LlmJudgeConfigSchema,
|
|
768
771
|
// requires prompt - check first
|
|
769
772
|
SkillWasCalledConfigSchema,
|
|
770
773
|
// requires skillName
|
|
771
774
|
BuildPassedConfigSchema,
|
|
772
775
|
// all optional, uses strictObject to reject unknown keys
|
|
773
|
-
|
|
776
|
+
import_zod22.z.object({})
|
|
774
777
|
// fallback empty config
|
|
775
778
|
]);
|
|
776
779
|
var CustomAssertionSchema = TenantEntitySchema.extend({
|
|
@@ -815,23 +818,23 @@ function getLlmJudgeConfig(assertion) {
|
|
|
815
818
|
}
|
|
816
819
|
|
|
817
820
|
// src/scenario/test-scenario.ts
|
|
818
|
-
var ExpectedFileSchema =
|
|
821
|
+
var ExpectedFileSchema = import_zod23.z.object({
|
|
819
822
|
/** Relative path where the file should be created */
|
|
820
|
-
path:
|
|
823
|
+
path: import_zod23.z.string(),
|
|
821
824
|
/** Optional expected content */
|
|
822
|
-
content:
|
|
825
|
+
content: import_zod23.z.string().optional()
|
|
823
826
|
});
|
|
824
827
|
var TestScenarioSchema = TenantEntitySchema.extend({
|
|
825
828
|
/** The prompt sent to the agent to trigger the task */
|
|
826
|
-
triggerPrompt:
|
|
829
|
+
triggerPrompt: import_zod23.z.string().min(10),
|
|
827
830
|
/** ID of the template to use for this scenario (null = no template) */
|
|
828
|
-
templateId:
|
|
831
|
+
templateId: import_zod23.z.string().nullish(),
|
|
829
832
|
/** Inline assertions to evaluate for this scenario (legacy) */
|
|
830
|
-
assertions:
|
|
833
|
+
assertions: import_zod23.z.array(AssertionSchema).optional(),
|
|
831
834
|
/** IDs of saved assertions to evaluate (from assertions table) - legacy, use assertionLinks */
|
|
832
|
-
assertionIds:
|
|
835
|
+
assertionIds: import_zod23.z.array(import_zod23.z.string()).optional(),
|
|
833
836
|
/** Linked assertions with per-scenario parameter values */
|
|
834
|
-
assertionLinks:
|
|
837
|
+
assertionLinks: import_zod23.z.array(ScenarioAssertionLinkSchema).optional()
|
|
835
838
|
});
|
|
836
839
|
var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
|
|
837
840
|
id: true,
|
|
@@ -842,10 +845,10 @@ var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
|
|
|
842
845
|
var UpdateTestScenarioInputSchema = CreateTestScenarioInputSchema.partial();
|
|
843
846
|
|
|
844
847
|
// src/suite/test-suite.ts
|
|
845
|
-
var
|
|
848
|
+
var import_zod24 = require("zod");
|
|
846
849
|
var TestSuiteSchema = TenantEntitySchema.extend({
|
|
847
850
|
/** IDs of test scenarios in this suite */
|
|
848
|
-
scenarioIds:
|
|
851
|
+
scenarioIds: import_zod24.z.array(import_zod24.z.string())
|
|
849
852
|
});
|
|
850
853
|
var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
|
|
851
854
|
id: true,
|
|
@@ -856,21 +859,21 @@ var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
|
|
|
856
859
|
var UpdateTestSuiteInputSchema = CreateTestSuiteInputSchema.partial();
|
|
857
860
|
|
|
858
861
|
// src/evaluation/metrics.ts
|
|
859
|
-
var
|
|
860
|
-
var TokenUsageSchema =
|
|
861
|
-
prompt:
|
|
862
|
-
completion:
|
|
863
|
-
total:
|
|
864
|
-
});
|
|
865
|
-
var EvalMetricsSchema =
|
|
866
|
-
totalAssertions:
|
|
867
|
-
passed:
|
|
868
|
-
failed:
|
|
869
|
-
skipped:
|
|
870
|
-
errors:
|
|
871
|
-
passRate:
|
|
872
|
-
avgDuration:
|
|
873
|
-
totalDuration:
|
|
862
|
+
var import_zod25 = require("zod");
|
|
863
|
+
var TokenUsageSchema = import_zod25.z.object({
|
|
864
|
+
prompt: import_zod25.z.number(),
|
|
865
|
+
completion: import_zod25.z.number(),
|
|
866
|
+
total: import_zod25.z.number()
|
|
867
|
+
});
|
|
868
|
+
var EvalMetricsSchema = import_zod25.z.object({
|
|
869
|
+
totalAssertions: import_zod25.z.number(),
|
|
870
|
+
passed: import_zod25.z.number(),
|
|
871
|
+
failed: import_zod25.z.number(),
|
|
872
|
+
skipped: import_zod25.z.number(),
|
|
873
|
+
errors: import_zod25.z.number(),
|
|
874
|
+
passRate: import_zod25.z.number(),
|
|
875
|
+
avgDuration: import_zod25.z.number(),
|
|
876
|
+
totalDuration: import_zod25.z.number()
|
|
874
877
|
});
|
|
875
878
|
var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
876
879
|
EvalStatus2["PENDING"] = "pending";
|
|
@@ -880,7 +883,7 @@ var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
|
880
883
|
EvalStatus2["CANCELLED"] = "cancelled";
|
|
881
884
|
return EvalStatus2;
|
|
882
885
|
})(EvalStatus || {});
|
|
883
|
-
var EvalStatusSchema =
|
|
886
|
+
var EvalStatusSchema = import_zod25.z.enum(EvalStatus);
|
|
884
887
|
var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
|
|
885
888
|
LLMStepType2["COMPLETION"] = "completion";
|
|
886
889
|
LLMStepType2["TOOL_USE"] = "tool_use";
|
|
@@ -888,52 +891,52 @@ var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
|
|
|
888
891
|
LLMStepType2["THINKING"] = "thinking";
|
|
889
892
|
return LLMStepType2;
|
|
890
893
|
})(LLMStepType || {});
|
|
891
|
-
var LLMTraceStepSchema =
|
|
892
|
-
id:
|
|
893
|
-
stepNumber:
|
|
894
|
-
type:
|
|
895
|
-
model:
|
|
896
|
-
provider:
|
|
897
|
-
startedAt:
|
|
898
|
-
durationMs:
|
|
894
|
+
var LLMTraceStepSchema = import_zod25.z.object({
|
|
895
|
+
id: import_zod25.z.string(),
|
|
896
|
+
stepNumber: import_zod25.z.number(),
|
|
897
|
+
type: import_zod25.z.enum(LLMStepType),
|
|
898
|
+
model: import_zod25.z.string(),
|
|
899
|
+
provider: import_zod25.z.string(),
|
|
900
|
+
startedAt: import_zod25.z.string(),
|
|
901
|
+
durationMs: import_zod25.z.number(),
|
|
899
902
|
tokenUsage: TokenUsageSchema,
|
|
900
|
-
costUsd:
|
|
901
|
-
toolName:
|
|
902
|
-
toolArguments:
|
|
903
|
-
inputPreview:
|
|
904
|
-
outputPreview:
|
|
905
|
-
success:
|
|
906
|
-
error:
|
|
907
|
-
});
|
|
908
|
-
var LLMBreakdownStatsSchema =
|
|
909
|
-
count:
|
|
910
|
-
durationMs:
|
|
911
|
-
tokens:
|
|
912
|
-
costUsd:
|
|
913
|
-
});
|
|
914
|
-
var LLMTraceSummarySchema =
|
|
915
|
-
totalSteps:
|
|
916
|
-
totalDurationMs:
|
|
903
|
+
costUsd: import_zod25.z.number(),
|
|
904
|
+
toolName: import_zod25.z.string().optional(),
|
|
905
|
+
toolArguments: import_zod25.z.string().optional(),
|
|
906
|
+
inputPreview: import_zod25.z.string().optional(),
|
|
907
|
+
outputPreview: import_zod25.z.string().optional(),
|
|
908
|
+
success: import_zod25.z.boolean(),
|
|
909
|
+
error: import_zod25.z.string().optional()
|
|
910
|
+
});
|
|
911
|
+
var LLMBreakdownStatsSchema = import_zod25.z.object({
|
|
912
|
+
count: import_zod25.z.number(),
|
|
913
|
+
durationMs: import_zod25.z.number(),
|
|
914
|
+
tokens: import_zod25.z.number(),
|
|
915
|
+
costUsd: import_zod25.z.number()
|
|
916
|
+
});
|
|
917
|
+
var LLMTraceSummarySchema = import_zod25.z.object({
|
|
918
|
+
totalSteps: import_zod25.z.number(),
|
|
919
|
+
totalDurationMs: import_zod25.z.number(),
|
|
917
920
|
totalTokens: TokenUsageSchema,
|
|
918
|
-
totalCostUsd:
|
|
919
|
-
stepTypeBreakdown:
|
|
920
|
-
modelBreakdown:
|
|
921
|
-
modelsUsed:
|
|
922
|
-
});
|
|
923
|
-
var LLMTraceSchema =
|
|
924
|
-
id:
|
|
925
|
-
steps:
|
|
921
|
+
totalCostUsd: import_zod25.z.number(),
|
|
922
|
+
stepTypeBreakdown: import_zod25.z.record(import_zod25.z.string(), LLMBreakdownStatsSchema).optional(),
|
|
923
|
+
modelBreakdown: import_zod25.z.record(import_zod25.z.string(), LLMBreakdownStatsSchema),
|
|
924
|
+
modelsUsed: import_zod25.z.array(import_zod25.z.string())
|
|
925
|
+
});
|
|
926
|
+
var LLMTraceSchema = import_zod25.z.object({
|
|
927
|
+
id: import_zod25.z.string(),
|
|
928
|
+
steps: import_zod25.z.array(LLMTraceStepSchema),
|
|
926
929
|
summary: LLMTraceSummarySchema
|
|
927
930
|
});
|
|
928
931
|
|
|
929
932
|
// src/evaluation/eval-result.ts
|
|
930
|
-
var
|
|
933
|
+
var import_zod28 = require("zod");
|
|
931
934
|
|
|
932
935
|
// src/evaluation/eval-run.ts
|
|
933
|
-
var
|
|
936
|
+
var import_zod27 = require("zod");
|
|
934
937
|
|
|
935
938
|
// src/evaluation/live-trace.ts
|
|
936
|
-
var
|
|
939
|
+
var import_zod26 = require("zod");
|
|
937
940
|
var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
938
941
|
LiveTraceEventType2["THINKING"] = "thinking";
|
|
939
942
|
LiveTraceEventType2["TOOL_USE"] = "tool_use";
|
|
@@ -947,37 +950,37 @@ var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
|
947
950
|
LiveTraceEventType2["USER"] = "user";
|
|
948
951
|
return LiveTraceEventType2;
|
|
949
952
|
})(LiveTraceEventType || {});
|
|
950
|
-
var LiveTraceEventSchema =
|
|
953
|
+
var LiveTraceEventSchema = import_zod26.z.object({
|
|
951
954
|
/** The evaluation run ID */
|
|
952
|
-
evalRunId:
|
|
955
|
+
evalRunId: import_zod26.z.string(),
|
|
953
956
|
/** The scenario ID being executed */
|
|
954
|
-
scenarioId:
|
|
957
|
+
scenarioId: import_zod26.z.string(),
|
|
955
958
|
/** The scenario name for display */
|
|
956
|
-
scenarioName:
|
|
959
|
+
scenarioName: import_zod26.z.string(),
|
|
957
960
|
/** The target ID (skill, agent, etc.) */
|
|
958
|
-
targetId:
|
|
961
|
+
targetId: import_zod26.z.string(),
|
|
959
962
|
/** The target name for display */
|
|
960
|
-
targetName:
|
|
963
|
+
targetName: import_zod26.z.string(),
|
|
961
964
|
/** Step number in the current scenario execution */
|
|
962
|
-
stepNumber:
|
|
965
|
+
stepNumber: import_zod26.z.number(),
|
|
963
966
|
/** Type of trace event */
|
|
964
|
-
type:
|
|
967
|
+
type: import_zod26.z.enum(LiveTraceEventType),
|
|
965
968
|
/** Tool name if this is a tool_use event */
|
|
966
|
-
toolName:
|
|
969
|
+
toolName: import_zod26.z.string().optional(),
|
|
967
970
|
/** Tool arguments preview (truncated JSON) */
|
|
968
|
-
toolArgs:
|
|
971
|
+
toolArgs: import_zod26.z.string().optional(),
|
|
969
972
|
/** Output preview (truncated text) */
|
|
970
|
-
outputPreview:
|
|
973
|
+
outputPreview: import_zod26.z.string().optional(),
|
|
971
974
|
/** File path for file operations */
|
|
972
|
-
filePath:
|
|
975
|
+
filePath: import_zod26.z.string().optional(),
|
|
973
976
|
/** Elapsed time in milliseconds for progress events */
|
|
974
|
-
elapsedMs:
|
|
977
|
+
elapsedMs: import_zod26.z.number().optional(),
|
|
975
978
|
/** Thinking/reasoning text from Claude */
|
|
976
|
-
thinking:
|
|
979
|
+
thinking: import_zod26.z.string().optional(),
|
|
977
980
|
/** Timestamp when this event occurred */
|
|
978
|
-
timestamp:
|
|
981
|
+
timestamp: import_zod26.z.string(),
|
|
979
982
|
/** Whether this is the final event for this scenario */
|
|
980
|
-
isComplete:
|
|
983
|
+
isComplete: import_zod26.z.boolean()
|
|
981
984
|
});
|
|
982
985
|
var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
|
|
983
986
|
function parseTraceEventLine(line) {
|
|
@@ -1005,14 +1008,14 @@ var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
|
|
|
1005
1008
|
TriggerType2["MANUAL"] = "MANUAL";
|
|
1006
1009
|
return TriggerType2;
|
|
1007
1010
|
})(TriggerType || {});
|
|
1008
|
-
var TriggerMetadataSchema =
|
|
1009
|
-
version:
|
|
1010
|
-
resourceUpdated:
|
|
1011
|
+
var TriggerMetadataSchema = import_zod27.z.object({
|
|
1012
|
+
version: import_zod27.z.string().optional(),
|
|
1013
|
+
resourceUpdated: import_zod27.z.array(import_zod27.z.string()).optional()
|
|
1011
1014
|
});
|
|
1012
|
-
var TriggerSchema =
|
|
1013
|
-
id:
|
|
1015
|
+
var TriggerSchema = import_zod27.z.object({
|
|
1016
|
+
id: import_zod27.z.string(),
|
|
1014
1017
|
metadata: TriggerMetadataSchema.optional(),
|
|
1015
|
-
type:
|
|
1018
|
+
type: import_zod27.z.enum(TriggerType)
|
|
1016
1019
|
});
|
|
1017
1020
|
var FailureCategory = /* @__PURE__ */ ((FailureCategory2) => {
|
|
1018
1021
|
FailureCategory2["MISSING_FILE"] = "missing_file";
|
|
@@ -1030,28 +1033,28 @@ var FailureSeverity = /* @__PURE__ */ ((FailureSeverity2) => {
|
|
|
1030
1033
|
FailureSeverity2["LOW"] = "low";
|
|
1031
1034
|
return FailureSeverity2;
|
|
1032
1035
|
})(FailureSeverity || {});
|
|
1033
|
-
var DiffLineTypeSchema =
|
|
1034
|
-
var DiffLineSchema =
|
|
1036
|
+
var DiffLineTypeSchema = import_zod27.z.enum(["added", "removed", "unchanged"]);
|
|
1037
|
+
var DiffLineSchema = import_zod27.z.object({
|
|
1035
1038
|
type: DiffLineTypeSchema,
|
|
1036
|
-
content:
|
|
1037
|
-
lineNumber:
|
|
1038
|
-
});
|
|
1039
|
-
var DiffContentSchema =
|
|
1040
|
-
path:
|
|
1041
|
-
expected:
|
|
1042
|
-
actual:
|
|
1043
|
-
diffLines:
|
|
1044
|
-
renamedFrom:
|
|
1045
|
-
});
|
|
1046
|
-
var CommandExecutionSchema =
|
|
1047
|
-
command:
|
|
1048
|
-
exitCode:
|
|
1049
|
-
output:
|
|
1050
|
-
duration:
|
|
1051
|
-
});
|
|
1052
|
-
var FileModificationSchema =
|
|
1053
|
-
path:
|
|
1054
|
-
action:
|
|
1039
|
+
content: import_zod27.z.string(),
|
|
1040
|
+
lineNumber: import_zod27.z.number()
|
|
1041
|
+
});
|
|
1042
|
+
var DiffContentSchema = import_zod27.z.object({
|
|
1043
|
+
path: import_zod27.z.string(),
|
|
1044
|
+
expected: import_zod27.z.string(),
|
|
1045
|
+
actual: import_zod27.z.string(),
|
|
1046
|
+
diffLines: import_zod27.z.array(DiffLineSchema),
|
|
1047
|
+
renamedFrom: import_zod27.z.string().optional()
|
|
1048
|
+
});
|
|
1049
|
+
var CommandExecutionSchema = import_zod27.z.object({
|
|
1050
|
+
command: import_zod27.z.string(),
|
|
1051
|
+
exitCode: import_zod27.z.number(),
|
|
1052
|
+
output: import_zod27.z.string().optional(),
|
|
1053
|
+
duration: import_zod27.z.number()
|
|
1054
|
+
});
|
|
1055
|
+
var FileModificationSchema = import_zod27.z.object({
|
|
1056
|
+
path: import_zod27.z.string(),
|
|
1057
|
+
action: import_zod27.z.enum(["created", "modified", "deleted"])
|
|
1055
1058
|
});
|
|
1056
1059
|
var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
|
|
1057
1060
|
TemplateFileStatus2["NEW"] = "new";
|
|
@@ -1059,81 +1062,81 @@ var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
|
|
|
1059
1062
|
TemplateFileStatus2["UNCHANGED"] = "unchanged";
|
|
1060
1063
|
return TemplateFileStatus2;
|
|
1061
1064
|
})(TemplateFileStatus || {});
|
|
1062
|
-
var TemplateFileSchema =
|
|
1065
|
+
var TemplateFileSchema = import_zod27.z.object({
|
|
1063
1066
|
/** Relative path within the template */
|
|
1064
|
-
path:
|
|
1067
|
+
path: import_zod27.z.string(),
|
|
1065
1068
|
/** Full file content after execution */
|
|
1066
|
-
content:
|
|
1069
|
+
content: import_zod27.z.string(),
|
|
1067
1070
|
/** File status (new, modified, unchanged) */
|
|
1068
|
-
status:
|
|
1069
|
-
});
|
|
1070
|
-
var ApiCallSchema =
|
|
1071
|
-
endpoint:
|
|
1072
|
-
tokensUsed:
|
|
1073
|
-
duration:
|
|
1074
|
-
});
|
|
1075
|
-
var ExecutionTraceSchema =
|
|
1076
|
-
commands:
|
|
1077
|
-
filesModified:
|
|
1078
|
-
apiCalls:
|
|
1079
|
-
totalDuration:
|
|
1080
|
-
});
|
|
1081
|
-
var FailureAnalysisSchema =
|
|
1082
|
-
category:
|
|
1083
|
-
severity:
|
|
1084
|
-
summary:
|
|
1085
|
-
details:
|
|
1086
|
-
rootCause:
|
|
1087
|
-
suggestedFix:
|
|
1088
|
-
relatedAssertions:
|
|
1089
|
-
codeSnippet:
|
|
1090
|
-
similarIssues:
|
|
1091
|
-
patternId:
|
|
1071
|
+
status: import_zod27.z.enum(["new", "modified", "unchanged"])
|
|
1072
|
+
});
|
|
1073
|
+
var ApiCallSchema = import_zod27.z.object({
|
|
1074
|
+
endpoint: import_zod27.z.string(),
|
|
1075
|
+
tokensUsed: import_zod27.z.number(),
|
|
1076
|
+
duration: import_zod27.z.number()
|
|
1077
|
+
});
|
|
1078
|
+
var ExecutionTraceSchema = import_zod27.z.object({
|
|
1079
|
+
commands: import_zod27.z.array(CommandExecutionSchema),
|
|
1080
|
+
filesModified: import_zod27.z.array(FileModificationSchema),
|
|
1081
|
+
apiCalls: import_zod27.z.array(ApiCallSchema),
|
|
1082
|
+
totalDuration: import_zod27.z.number()
|
|
1083
|
+
});
|
|
1084
|
+
var FailureAnalysisSchema = import_zod27.z.object({
|
|
1085
|
+
category: import_zod27.z.enum(FailureCategory),
|
|
1086
|
+
severity: import_zod27.z.enum(FailureSeverity),
|
|
1087
|
+
summary: import_zod27.z.string(),
|
|
1088
|
+
details: import_zod27.z.string(),
|
|
1089
|
+
rootCause: import_zod27.z.string(),
|
|
1090
|
+
suggestedFix: import_zod27.z.string(),
|
|
1091
|
+
relatedAssertions: import_zod27.z.array(import_zod27.z.string()),
|
|
1092
|
+
codeSnippet: import_zod27.z.string().optional(),
|
|
1093
|
+
similarIssues: import_zod27.z.array(import_zod27.z.string()).optional(),
|
|
1094
|
+
patternId: import_zod27.z.string().optional(),
|
|
1092
1095
|
// Extended fields for detailed debugging
|
|
1093
1096
|
diff: DiffContentSchema.optional(),
|
|
1094
1097
|
executionTrace: ExecutionTraceSchema.optional()
|
|
1095
1098
|
});
|
|
1096
1099
|
var EvalRunSchema = TenantEntitySchema.extend({
|
|
1097
1100
|
/** Agent ID for this run */
|
|
1098
|
-
agentId:
|
|
1101
|
+
agentId: import_zod27.z.string().optional(),
|
|
1099
1102
|
/** Skills group ID for this run */
|
|
1100
|
-
skillsGroupId:
|
|
1103
|
+
skillsGroupId: import_zod27.z.string().optional(),
|
|
1101
1104
|
/** Map of skillId to skillVersionId for this run */
|
|
1102
|
-
skillVersions:
|
|
1105
|
+
skillVersions: import_zod27.z.record(import_zod27.z.string(), import_zod27.z.string()).optional(),
|
|
1103
1106
|
/** Scenario IDs to run */
|
|
1104
|
-
scenarioIds:
|
|
1107
|
+
scenarioIds: import_zod27.z.array(import_zod27.z.string()),
|
|
1105
1108
|
/** Current status */
|
|
1106
1109
|
status: EvalStatusSchema,
|
|
1107
1110
|
/** Progress percentage (0-100) */
|
|
1108
|
-
progress:
|
|
1111
|
+
progress: import_zod27.z.number(),
|
|
1109
1112
|
/** Results for each scenario/target combination (lazy to break eval-result ↔ eval-run cycle) */
|
|
1110
|
-
results:
|
|
1113
|
+
results: import_zod27.z.array(import_zod27.z.lazy(() => EvalRunResultSchema)),
|
|
1111
1114
|
/** Aggregated metrics across all results */
|
|
1112
1115
|
aggregateMetrics: EvalMetricsSchema,
|
|
1113
1116
|
/** Failure analyses */
|
|
1114
|
-
failureAnalyses:
|
|
1117
|
+
failureAnalyses: import_zod27.z.array(FailureAnalysisSchema).optional(),
|
|
1115
1118
|
/** Aggregated LLM trace summary */
|
|
1116
1119
|
llmTraceSummary: LLMTraceSummarySchema.optional(),
|
|
1117
1120
|
/** What triggered this run */
|
|
1118
1121
|
trigger: TriggerSchema.optional(),
|
|
1119
1122
|
/** When the run started (set when evaluation is triggered) */
|
|
1120
|
-
startedAt:
|
|
1123
|
+
startedAt: import_zod27.z.string().optional(),
|
|
1121
1124
|
/** When the run completed */
|
|
1122
|
-
completedAt:
|
|
1125
|
+
completedAt: import_zod27.z.string().optional(),
|
|
1123
1126
|
/** Live trace events captured during execution (for playback on results page) */
|
|
1124
|
-
liveTraceEvents:
|
|
1127
|
+
liveTraceEvents: import_zod27.z.array(LiveTraceEventSchema).optional(),
|
|
1125
1128
|
/** Remote job ID for tracking execution in Dev Machines */
|
|
1126
|
-
jobId:
|
|
1129
|
+
jobId: import_zod27.z.string().optional(),
|
|
1127
1130
|
/** Remote job status from the Dev Machine API (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) */
|
|
1128
|
-
jobStatus:
|
|
1131
|
+
jobStatus: import_zod27.z.string().optional(),
|
|
1129
1132
|
/** Remote job error message if the job failed */
|
|
1130
|
-
jobError:
|
|
1133
|
+
jobError: import_zod27.z.string().optional(),
|
|
1131
1134
|
/** Timestamp of the last job status check */
|
|
1132
|
-
jobStatusCheckedAt:
|
|
1135
|
+
jobStatusCheckedAt: import_zod27.z.string().optional(),
|
|
1133
1136
|
/** MCP server IDs to enable for this run (optional) */
|
|
1134
|
-
mcpIds:
|
|
1137
|
+
mcpIds: import_zod27.z.array(import_zod27.z.string()).optional(),
|
|
1135
1138
|
/** Sub-agent IDs to enable for this run (optional) */
|
|
1136
|
-
subAgentIds:
|
|
1139
|
+
subAgentIds: import_zod27.z.array(import_zod27.z.string()).optional()
|
|
1137
1140
|
});
|
|
1138
1141
|
var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
1139
1142
|
id: true,
|
|
@@ -1146,28 +1149,28 @@ var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
|
1146
1149
|
startedAt: true,
|
|
1147
1150
|
completedAt: true
|
|
1148
1151
|
});
|
|
1149
|
-
var EvaluationProgressSchema =
|
|
1150
|
-
runId:
|
|
1151
|
-
targetId:
|
|
1152
|
-
totalScenarios:
|
|
1153
|
-
completedScenarios:
|
|
1154
|
-
scenarioProgress:
|
|
1155
|
-
|
|
1156
|
-
scenarioId:
|
|
1157
|
-
currentStep:
|
|
1158
|
-
error:
|
|
1152
|
+
var EvaluationProgressSchema = import_zod27.z.object({
|
|
1153
|
+
runId: import_zod27.z.string(),
|
|
1154
|
+
targetId: import_zod27.z.string(),
|
|
1155
|
+
totalScenarios: import_zod27.z.number(),
|
|
1156
|
+
completedScenarios: import_zod27.z.number(),
|
|
1157
|
+
scenarioProgress: import_zod27.z.array(
|
|
1158
|
+
import_zod27.z.object({
|
|
1159
|
+
scenarioId: import_zod27.z.string(),
|
|
1160
|
+
currentStep: import_zod27.z.string(),
|
|
1161
|
+
error: import_zod27.z.string().optional()
|
|
1159
1162
|
})
|
|
1160
1163
|
),
|
|
1161
|
-
createdAt:
|
|
1164
|
+
createdAt: import_zod27.z.number()
|
|
1162
1165
|
});
|
|
1163
|
-
var EvaluationLogSchema =
|
|
1164
|
-
runId:
|
|
1165
|
-
scenarioId:
|
|
1166
|
-
log:
|
|
1167
|
-
level:
|
|
1168
|
-
message:
|
|
1169
|
-
args:
|
|
1170
|
-
error:
|
|
1166
|
+
var EvaluationLogSchema = import_zod27.z.object({
|
|
1167
|
+
runId: import_zod27.z.string(),
|
|
1168
|
+
scenarioId: import_zod27.z.string(),
|
|
1169
|
+
log: import_zod27.z.object({
|
|
1170
|
+
level: import_zod27.z.enum(["info", "error", "debug"]),
|
|
1171
|
+
message: import_zod27.z.string().optional(),
|
|
1172
|
+
args: import_zod27.z.array(import_zod27.z.any()).optional(),
|
|
1173
|
+
error: import_zod27.z.string().optional()
|
|
1171
1174
|
})
|
|
1172
1175
|
});
|
|
1173
1176
|
var LLM_TIMEOUT = 12e4;
|
|
@@ -1180,95 +1183,95 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
|
|
|
1180
1183
|
AssertionResultStatus2["ERROR"] = "error";
|
|
1181
1184
|
return AssertionResultStatus2;
|
|
1182
1185
|
})(AssertionResultStatus || {});
|
|
1183
|
-
var AssertionResultSchema =
|
|
1184
|
-
id:
|
|
1185
|
-
assertionId:
|
|
1186
|
-
assertionType:
|
|
1187
|
-
assertionName:
|
|
1188
|
-
status:
|
|
1189
|
-
message:
|
|
1190
|
-
expected:
|
|
1191
|
-
actual:
|
|
1192
|
-
duration:
|
|
1193
|
-
details:
|
|
1194
|
-
llmTraceSteps:
|
|
1195
|
-
});
|
|
1196
|
-
var EvalRunResultSchema =
|
|
1197
|
-
id:
|
|
1198
|
-
targetId:
|
|
1199
|
-
targetName:
|
|
1186
|
+
var AssertionResultSchema = import_zod28.z.object({
|
|
1187
|
+
id: import_zod28.z.string(),
|
|
1188
|
+
assertionId: import_zod28.z.string(),
|
|
1189
|
+
assertionType: import_zod28.z.string(),
|
|
1190
|
+
assertionName: import_zod28.z.string(),
|
|
1191
|
+
status: import_zod28.z.enum(AssertionResultStatus),
|
|
1192
|
+
message: import_zod28.z.string().optional(),
|
|
1193
|
+
expected: import_zod28.z.string().optional(),
|
|
1194
|
+
actual: import_zod28.z.string().optional(),
|
|
1195
|
+
duration: import_zod28.z.number().optional(),
|
|
1196
|
+
details: import_zod28.z.record(import_zod28.z.string(), import_zod28.z.unknown()).optional(),
|
|
1197
|
+
llmTraceSteps: import_zod28.z.array(LLMTraceStepSchema).optional()
|
|
1198
|
+
});
|
|
1199
|
+
var EvalRunResultSchema = import_zod28.z.object({
|
|
1200
|
+
id: import_zod28.z.string(),
|
|
1201
|
+
targetId: import_zod28.z.string(),
|
|
1202
|
+
targetName: import_zod28.z.string().optional(),
|
|
1200
1203
|
/** SkillVersion ID used for this evaluation (for version tracking) */
|
|
1201
|
-
skillVersionId:
|
|
1204
|
+
skillVersionId: import_zod28.z.string().optional(),
|
|
1202
1205
|
/** SkillVersion semver string (e.g., "1.0.0", "1.2.3") for display */
|
|
1203
|
-
skillVersion:
|
|
1204
|
-
scenarioId:
|
|
1205
|
-
scenarioName:
|
|
1206
|
+
skillVersion: import_zod28.z.string().optional(),
|
|
1207
|
+
scenarioId: import_zod28.z.string(),
|
|
1208
|
+
scenarioName: import_zod28.z.string(),
|
|
1206
1209
|
modelConfig: ModelConfigSchema.optional(),
|
|
1207
|
-
assertionResults:
|
|
1210
|
+
assertionResults: import_zod28.z.array(AssertionResultSchema),
|
|
1208
1211
|
metrics: EvalMetricsSchema.optional(),
|
|
1209
|
-
passed:
|
|
1210
|
-
failed:
|
|
1211
|
-
passRate:
|
|
1212
|
-
duration:
|
|
1213
|
-
outputText:
|
|
1214
|
-
files:
|
|
1215
|
-
fileDiffs:
|
|
1212
|
+
passed: import_zod28.z.number(),
|
|
1213
|
+
failed: import_zod28.z.number(),
|
|
1214
|
+
passRate: import_zod28.z.number(),
|
|
1215
|
+
duration: import_zod28.z.number(),
|
|
1216
|
+
outputText: import_zod28.z.string().optional(),
|
|
1217
|
+
files: import_zod28.z.array(ExpectedFileSchema).optional(),
|
|
1218
|
+
fileDiffs: import_zod28.z.array(DiffContentSchema).optional(),
|
|
1216
1219
|
/** Full template files after execution with status indicators */
|
|
1217
|
-
templateFiles:
|
|
1218
|
-
startedAt:
|
|
1219
|
-
completedAt:
|
|
1220
|
+
templateFiles: import_zod28.z.array(TemplateFileSchema).optional(),
|
|
1221
|
+
startedAt: import_zod28.z.string().optional(),
|
|
1222
|
+
completedAt: import_zod28.z.string().optional(),
|
|
1220
1223
|
llmTrace: LLMTraceSchema.optional()
|
|
1221
1224
|
});
|
|
1222
|
-
var PromptResultSchema =
|
|
1223
|
-
text:
|
|
1224
|
-
files:
|
|
1225
|
-
finishReason:
|
|
1226
|
-
reasoning:
|
|
1227
|
-
reasoningDetails:
|
|
1228
|
-
toolCalls:
|
|
1229
|
-
toolResults:
|
|
1230
|
-
warnings:
|
|
1231
|
-
sources:
|
|
1232
|
-
steps:
|
|
1233
|
-
generationTimeMs:
|
|
1234
|
-
prompt:
|
|
1235
|
-
systemPrompt:
|
|
1236
|
-
usage:
|
|
1237
|
-
totalTokens:
|
|
1238
|
-
totalMicrocentsSpent:
|
|
1225
|
+
var PromptResultSchema = import_zod28.z.object({
|
|
1226
|
+
text: import_zod28.z.string(),
|
|
1227
|
+
files: import_zod28.z.array(import_zod28.z.unknown()).optional(),
|
|
1228
|
+
finishReason: import_zod28.z.string().optional(),
|
|
1229
|
+
reasoning: import_zod28.z.string().optional(),
|
|
1230
|
+
reasoningDetails: import_zod28.z.unknown().optional(),
|
|
1231
|
+
toolCalls: import_zod28.z.array(import_zod28.z.unknown()).optional(),
|
|
1232
|
+
toolResults: import_zod28.z.array(import_zod28.z.unknown()).optional(),
|
|
1233
|
+
warnings: import_zod28.z.array(import_zod28.z.unknown()).optional(),
|
|
1234
|
+
sources: import_zod28.z.array(import_zod28.z.unknown()).optional(),
|
|
1235
|
+
steps: import_zod28.z.array(import_zod28.z.unknown()),
|
|
1236
|
+
generationTimeMs: import_zod28.z.number(),
|
|
1237
|
+
prompt: import_zod28.z.string(),
|
|
1238
|
+
systemPrompt: import_zod28.z.string(),
|
|
1239
|
+
usage: import_zod28.z.object({
|
|
1240
|
+
totalTokens: import_zod28.z.number().optional(),
|
|
1241
|
+
totalMicrocentsSpent: import_zod28.z.number().optional()
|
|
1239
1242
|
})
|
|
1240
1243
|
});
|
|
1241
|
-
var EvaluationResultSchema =
|
|
1242
|
-
id:
|
|
1243
|
-
runId:
|
|
1244
|
-
timestamp:
|
|
1244
|
+
var EvaluationResultSchema = import_zod28.z.object({
|
|
1245
|
+
id: import_zod28.z.string(),
|
|
1246
|
+
runId: import_zod28.z.string(),
|
|
1247
|
+
timestamp: import_zod28.z.number(),
|
|
1245
1248
|
promptResult: PromptResultSchema,
|
|
1246
|
-
testResults:
|
|
1247
|
-
tags:
|
|
1248
|
-
feedback:
|
|
1249
|
-
score:
|
|
1250
|
-
suiteId:
|
|
1251
|
-
});
|
|
1252
|
-
var LeanEvaluationResultSchema =
|
|
1253
|
-
id:
|
|
1254
|
-
runId:
|
|
1255
|
-
timestamp:
|
|
1256
|
-
tags:
|
|
1257
|
-
scenarioId:
|
|
1258
|
-
scenarioVersion:
|
|
1259
|
-
targetId:
|
|
1260
|
-
targetVersion:
|
|
1261
|
-
suiteId:
|
|
1262
|
-
score:
|
|
1263
|
-
time:
|
|
1264
|
-
microcentsSpent:
|
|
1249
|
+
testResults: import_zod28.z.array(import_zod28.z.unknown()),
|
|
1250
|
+
tags: import_zod28.z.array(import_zod28.z.string()).optional(),
|
|
1251
|
+
feedback: import_zod28.z.string().optional(),
|
|
1252
|
+
score: import_zod28.z.number(),
|
|
1253
|
+
suiteId: import_zod28.z.string().optional()
|
|
1254
|
+
});
|
|
1255
|
+
var LeanEvaluationResultSchema = import_zod28.z.object({
|
|
1256
|
+
id: import_zod28.z.string(),
|
|
1257
|
+
runId: import_zod28.z.string(),
|
|
1258
|
+
timestamp: import_zod28.z.number(),
|
|
1259
|
+
tags: import_zod28.z.array(import_zod28.z.string()).optional(),
|
|
1260
|
+
scenarioId: import_zod28.z.string(),
|
|
1261
|
+
scenarioVersion: import_zod28.z.number().optional(),
|
|
1262
|
+
targetId: import_zod28.z.string(),
|
|
1263
|
+
targetVersion: import_zod28.z.number().optional(),
|
|
1264
|
+
suiteId: import_zod28.z.string().optional(),
|
|
1265
|
+
score: import_zod28.z.number(),
|
|
1266
|
+
time: import_zod28.z.number().optional(),
|
|
1267
|
+
microcentsSpent: import_zod28.z.number().optional()
|
|
1265
1268
|
});
|
|
1266
1269
|
|
|
1267
1270
|
// src/project/project.ts
|
|
1268
|
-
var
|
|
1271
|
+
var import_zod29 = require("zod");
|
|
1269
1272
|
var ProjectSchema = BaseEntitySchema.extend({
|
|
1270
|
-
appId:
|
|
1271
|
-
appSecret:
|
|
1273
|
+
appId: import_zod29.z.string().optional().describe("The ID of the app in Dev Center"),
|
|
1274
|
+
appSecret: import_zod29.z.string().optional().describe("The secret of the app in Dev Center")
|
|
1272
1275
|
});
|
|
1273
1276
|
var CreateProjectInputSchema = ProjectSchema.omit({
|
|
1274
1277
|
id: true,
|
|
@@ -1279,10 +1282,9 @@ var CreateProjectInputSchema = ProjectSchema.omit({
|
|
|
1279
1282
|
var UpdateProjectInputSchema = CreateProjectInputSchema.partial();
|
|
1280
1283
|
|
|
1281
1284
|
// src/template/template.ts
|
|
1282
|
-
var import_zod29 = require("zod");
|
|
1283
1285
|
var TemplateSchema = TenantEntitySchema.extend({
|
|
1284
|
-
/**
|
|
1285
|
-
|
|
1286
|
+
/** GitHub source reference for fetching template files */
|
|
1287
|
+
source: GitHubSourceSchema.optional()
|
|
1286
1288
|
});
|
|
1287
1289
|
var CreateTemplateInputSchema = TemplateSchema.omit({
|
|
1288
1290
|
id: true,
|