@verica-app/cli 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +66 -0
- package/README.md +71 -5
- package/dist/cli.js +71 -12
- package/package.json +5 -8
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to `@verica-app/cli` are documented here.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
|
|
6
|
+
This package is **pre-1.0**: while on `0.x`, a breaking change bumps the **minor**
|
|
7
|
+
version and additive features/fixes bump the **patch** (see [Stability](./README.md#stability)).
|
|
8
|
+
|
|
9
|
+
## [Unreleased]
|
|
10
|
+
|
|
11
|
+
## [0.1.3] - 2026-06-18
|
|
12
|
+
|
|
13
|
+
### Added
|
|
14
|
+
|
|
15
|
+
- `--tools <file>` flag and `tools:` in `.verica.yml` to push a prompt's tool
|
|
16
|
+
definitions from the repo. The manifest accepts either a **path to a JSON file**
|
|
17
|
+
or an **inline array**. Each tool may use Verica's flat shape
|
|
18
|
+
(`{ name, description, parameters }`) **or** the OpenAI wrapper
|
|
19
|
+
(`{ type: "function", function: { … } }`), which is auto-unwrapped — paste your
|
|
20
|
+
real schemas as-is.
|
|
21
|
+
|
|
22
|
+
### Changed
|
|
23
|
+
|
|
24
|
+
- **Prompt push is now field-level.** `--prompt` (user template), `--system-prompt`,
|
|
25
|
+
and `--tools` are independent: push only the fields you changed and every omitted
|
|
26
|
+
field is **inherited from the eval's current prompt version**. A new version is
|
|
27
|
+
created only if the merged content differs.
|
|
28
|
+
- Previously, a push that included `--prompt` without `--system-prompt` produced a
|
|
29
|
+
version with **no system prompt** (it was dropped, not inherited). If your CI
|
|
30
|
+
relied on that, pass all the fields you intend to set.
|
|
31
|
+
- `--system-prompt` (or `--tools`) can now be pushed **on its own** — earlier the
|
|
32
|
+
CLI sent a prompt block only when `--prompt` was present.
|
|
33
|
+
- Prompt templates reference dataset columns by their **bare name** (`{{ pais }}`),
|
|
34
|
+
and grader/judge prompts reference the model output via `{{ output.text }}` /
|
|
35
|
+
`{{ output.tool_calls }}`. The legacy `{{ item.* }}` / `{{ sample.* }}` forms still
|
|
36
|
+
resolve, so existing prompt files keep working.
|
|
37
|
+
|
|
38
|
+
## [0.1.2] - 2026-06-18
|
|
39
|
+
|
|
40
|
+
### Changed
|
|
41
|
+
|
|
42
|
+
- Default to the hosted API (`https://verica.app`). `VERICA_BASE_URL` and `--base-url`
|
|
43
|
+
are now overrides for local dev / self-hosting only — clients no longer configure a URL.
|
|
44
|
+
|
|
45
|
+
## [0.1.1] - 2026-06-18
|
|
46
|
+
|
|
47
|
+
### Changed
|
|
48
|
+
|
|
49
|
+
- Published under the `@verica-app` scope, with a tag-triggered release workflow
|
|
50
|
+
(`cli-v*` → `npm publish --provenance`). No behavior change.
|
|
51
|
+
|
|
52
|
+
## [0.1.0] - 2026-06-18
|
|
53
|
+
|
|
54
|
+
### Added
|
|
55
|
+
|
|
56
|
+
- Initial release. The `run` command:
|
|
57
|
+
- `--eval <id>` (single) or `--manifest <file>` (`.verica.yml`, multi-prompt).
|
|
58
|
+
- Pushes prompt content (`--prompt` / `--system-prompt`), versioned by content equality.
|
|
59
|
+
- `--model` · `--sampling <file.json>` for execution config.
|
|
60
|
+
- `--wait` polls to a terminal status; the exit code reflects the gate.
|
|
61
|
+
- `--junit <file>` · `--junit-mode rows|gate` for a JUnit report; `--json` for
|
|
62
|
+
machine-readable results.
|
|
63
|
+
- `--threshold` · `--baseline-ref` · `--baseline-run` to override the gate per branch.
|
|
64
|
+
- Git provenance (`--git-sha` / `--git-ref`) auto-detected from common CI env vars.
|
|
65
|
+
- Exit codes: `0` passed · `1` gate failed · `2` validation/transport error.
|
|
66
|
+
- `VERICA_TOKEN` is the only required secret; provider keys stay in Verica (BYOK).
|
package/README.md
CHANGED
|
@@ -27,6 +27,8 @@ npm i -D @verica-app/cli
|
|
|
27
27
|
verica run \
|
|
28
28
|
--eval eval_8x2k9d \
|
|
29
29
|
--prompt prompts/support-agent.txt \
|
|
30
|
+
--system-prompt prompts/support-agent.system.txt \ # optional
|
|
31
|
+
--tools prompts/support-agent.tools.json \ # optional
|
|
30
32
|
--model gpt-4.1-mini \
|
|
31
33
|
--wait \
|
|
32
34
|
--junit verica-results.xml \
|
|
@@ -44,24 +46,32 @@ verica run --manifest .verica.yml --wait --junit report.xml
|
|
|
44
46
|
evals:
|
|
45
47
|
- id: eval_8x2k9d
|
|
46
48
|
prompt: prompts/support-agent.txt
|
|
49
|
+
systemPrompt: prompts/support-agent.system.txt
|
|
50
|
+
tools: prompts/support-agent.tools.json # a path to a JSON file…
|
|
47
51
|
sampling: { temperature: 0.2, maxTokens: 512 }
|
|
48
52
|
model: gpt-4.1-mini
|
|
49
53
|
- id: eval_3p1m7q
|
|
50
54
|
prompt: prompts/triage.txt
|
|
55
|
+
tools: # …or an inline array
|
|
56
|
+
- name: get_order
|
|
57
|
+
description: Look up an order by id
|
|
58
|
+
parameters: { type: object, properties: { id: { type: string } }, required: [id] }
|
|
51
59
|
model: claude-sonnet-4-6
|
|
52
60
|
```
|
|
53
61
|
|
|
54
62
|
## Environment
|
|
55
63
|
|
|
56
|
-
| Var
|
|
57
|
-
|
|
|
58
|
-
| `VERICA_TOKEN`
|
|
59
|
-
|
|
64
|
+
| Var | Required | Notes |
|
|
65
|
+
| -------------- | -------- | -------------------------------------------- |
|
|
66
|
+
| `VERICA_TOKEN` | yes | Workspace API token (Settings → API tokens). |
|
|
67
|
+
|
|
68
|
+
`VERICA_TOKEN` is the only thing you set. The CLI talks to the hosted Verica API by
|
|
69
|
+
default — you don't configure a URL.
|
|
60
70
|
|
|
61
71
|
## Key flags
|
|
62
72
|
|
|
63
73
|
- `--eval <id>` / `--manifest <file>` — what to run.
|
|
64
|
-
- `--prompt <file>` / `--system-prompt <file>` — prompt content to push (versioned by content).
|
|
74
|
+
- `--prompt <file>` / `--system-prompt <file>` / `--tools <file>` — prompt content to push (versioned by content). See [Prompt content](#prompt-content-what-you-push).
|
|
65
75
|
- `--model <model>` · `--sampling <file.json>` — execution config.
|
|
66
76
|
- `--wait` — poll to completion; the exit code reflects the gate.
|
|
67
77
|
- `--junit <file>` · `--junit-mode rows|gate` — JUnit report (default `rows`).
|
|
@@ -69,9 +79,65 @@ evals:
|
|
|
69
79
|
- `--threshold <0..1>` · `--baseline-ref <ref>` · `--baseline-run <id>` — override the gate per branch.
|
|
70
80
|
- `--git-sha` / `--git-ref` — provenance (auto-detected from CI env otherwise).
|
|
71
81
|
|
|
82
|
+
> Local dev / self-hosting only: point the CLI at another instance with `--base-url`
|
|
83
|
+
> (or the `VERICA_BASE_URL` env var). Clients never need this.
|
|
84
|
+
|
|
85
|
+
## Prompt content (what you push)
|
|
86
|
+
|
|
87
|
+
The repo owns the **prompt**: the user template (`--prompt`), the system prompt
|
|
88
|
+
(`--system-prompt`), and the tool definitions (`--tools`). The **dataset, graders,
|
|
89
|
+
gate, and any few-shot/simulated turns stay in Verica** — they're the test scenario,
|
|
90
|
+
managed by whoever owns the eval.
|
|
91
|
+
|
|
92
|
+
Each of the three prompt fields is **independent and optional**: push the ones you
|
|
93
|
+
changed and every omitted field is **inherited from the current version**. A push
|
|
94
|
+
creates a new prompt version only if the merged content actually differs.
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
verica run --eval eval_8x2k9d --system-prompt prompts/agent.system.txt --model gpt-4.1-mini --wait
|
|
98
|
+
# user template + tools inherited; only the system prompt re-versions
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
Templates reference dataset columns by name — e.g. `What is the capital of {{ pais }}?`
|
|
102
|
+
(the column is `pais`). Grader/judge prompts can also reference the model output via
|
|
103
|
+
`{{ output.text }}` / `{{ output.tool_calls }}`.
|
|
104
|
+
|
|
105
|
+
**Tools** are pushed as JSON — a `--tools <file>`, or a path / inline array under
|
|
106
|
+
`tools:` in the manifest. Each entry may be Verica's flat shape **or** the OpenAI
|
|
107
|
+
wrapper (auto-unwrapped), so you can paste your real schemas as-is:
|
|
108
|
+
|
|
109
|
+
```json
|
|
110
|
+
[
|
|
111
|
+
{ "name": "get_order", "description": "Look up an order by id",
|
|
112
|
+
"parameters": { "type": "object", "properties": { "id": { "type": "string" } }, "required": ["id"] } },
|
|
113
|
+
{ "type": "function", "function": { "name": "cancel_order", "description": "…", "parameters": { "type": "object" } } }
|
|
114
|
+
]
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
Tools are never executed — the model's *decision* to call one (and with which
|
|
118
|
+
arguments) is what the eval grades.
|
|
119
|
+
|
|
72
120
|
## Exit codes
|
|
73
121
|
|
|
74
122
|
`0` passed · `1` gate failed · `2` validation/transport error.
|
|
75
123
|
|
|
124
|
+
## Stability
|
|
125
|
+
|
|
126
|
+
This CLI is **pre-1.0 (`0.x`)**. The command surface, the `--json` payload, the JUnit
|
|
127
|
+
output, and the prompt-push behavior are still settling and may change. Exit codes
|
|
128
|
+
(`0`/`1`/`2`) are stable.
|
|
129
|
+
|
|
130
|
+
During `0.x` the **minor** version is the breaking lever, so pin accordingly:
|
|
131
|
+
|
|
132
|
+
```jsonc
|
|
133
|
+
// package.json
|
|
134
|
+
"@verica-app/cli": "~0.1" // >=0.1.0 <0.2.0 — gets patches, not breaking minors
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
We bump the **minor** for any breaking change (flags, output shapes, push behavior) and
|
|
138
|
+
the **patch** for additive features and fixes. **1.0** will freeze the commands, flags,
|
|
139
|
+
exit codes, and output shapes under standard semver. See
|
|
140
|
+
[CHANGELOG.md](./CHANGELOG.md) for what changed in each release.
|
|
141
|
+
|
|
76
142
|
MIT licensed. There's no IP in the client — the engine, graders, gate, and crypto all
|
|
77
143
|
run server-side behind the token API.
|
package/dist/cli.js
CHANGED
|
@@ -4067,9 +4067,16 @@ var samplingParamsSchema = external_exports.object({
|
|
|
4067
4067
|
reasoning: external_exports.boolean().optional()
|
|
4068
4068
|
});
|
|
4069
4069
|
var runRequestSchema = external_exports.object({
|
|
4070
|
-
/**
|
|
4070
|
+
/**
|
|
4071
|
+
* Prompt content to push. Omit the whole block to run the eval's current
|
|
4072
|
+
* (UI-managed) prompt unchanged. Each field is independent: supply only the
|
|
4073
|
+
* ones you're changing — every omitted field (template / systemPrompt / tools)
|
|
4074
|
+
* is inherited from the current version, and a new version is created only if
|
|
4075
|
+
* the merged result differs (e.g. push just `systemPrompt` to re-version the
|
|
4076
|
+
* system prompt while keeping the user template).
|
|
4077
|
+
*/
|
|
4071
4078
|
prompt: external_exports.object({
|
|
4072
|
-
template: external_exports.string(),
|
|
4079
|
+
template: external_exports.string().optional(),
|
|
4073
4080
|
systemPrompt: external_exports.string().optional(),
|
|
4074
4081
|
tools: external_exports.array(toolDefinitionSchema).optional()
|
|
4075
4082
|
}).optional(),
|
|
@@ -4289,6 +4296,7 @@ function parseManifest(raw, source = ".verica.yml") {
|
|
|
4289
4296
|
id: e.id,
|
|
4290
4297
|
prompt: typeof e.prompt === "string" ? e.prompt : void 0,
|
|
4291
4298
|
systemPrompt: typeof e.systemPrompt === "string" ? e.systemPrompt : void 0,
|
|
4299
|
+
tools: typeof e.tools === "string" || Array.isArray(e.tools) ? e.tools : void 0,
|
|
4292
4300
|
model: typeof e.model === "string" ? e.model : void 0,
|
|
4293
4301
|
sampling: e.sampling ?? void 0
|
|
4294
4302
|
};
|
|
@@ -4298,6 +4306,27 @@ async function loadManifest(path) {
|
|
|
4298
4306
|
return parseManifest(await readFile(path, "utf8"), path);
|
|
4299
4307
|
}
|
|
4300
4308
|
|
|
4309
|
+
// src/tools.ts
|
|
4310
|
+
function unwrap(entry) {
|
|
4311
|
+
if (entry !== null && typeof entry === "object" && entry.type === "function" && typeof entry.function === "object" && entry.function !== null) {
|
|
4312
|
+
return entry.function;
|
|
4313
|
+
}
|
|
4314
|
+
return entry;
|
|
4315
|
+
}
|
|
4316
|
+
function normalizeToolDefinitions(raw) {
|
|
4317
|
+
if (!Array.isArray(raw)) {
|
|
4318
|
+
throw new Error("tools must be a JSON array of tool definitions.");
|
|
4319
|
+
}
|
|
4320
|
+
return raw.map((entry, i) => {
|
|
4321
|
+
const parsed = toolDefinitionSchema.safeParse(unwrap(entry));
|
|
4322
|
+
if (!parsed.success) {
|
|
4323
|
+
const why = parsed.error.issues.map((issue) => issue.message).join("; ");
|
|
4324
|
+
throw new Error(`tools[${i}] is not a valid tool definition: ${why}`);
|
|
4325
|
+
}
|
|
4326
|
+
return parsed.data;
|
|
4327
|
+
});
|
|
4328
|
+
}
|
|
4329
|
+
|
|
4301
4330
|
// src/junit.ts
|
|
4302
4331
|
function esc(s) {
|
|
4303
4332
|
return s.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
|
|
@@ -4392,7 +4421,7 @@ async function runCommand(opts) {
|
|
|
4392
4421
|
);
|
|
4393
4422
|
err(` ${accepted.resultUrl}`);
|
|
4394
4423
|
if (!opts.wait) {
|
|
4395
|
-
summaries.push(
|
|
4424
|
+
summaries.push(buildSummary(entry.id, { status: "queued", accepted }));
|
|
4396
4425
|
continue;
|
|
4397
4426
|
}
|
|
4398
4427
|
const run = await pollUntilTerminal(client, accepted.runId, {
|
|
@@ -4406,12 +4435,12 @@ async function runCommand(opts) {
|
|
|
4406
4435
|
opts.junitMode === "rows" ? rowsSuite(entry.id, await client.getResults(accepted.runId)) : gateSuite(entry.id, run)
|
|
4407
4436
|
);
|
|
4408
4437
|
}
|
|
4409
|
-
summaries.push({
|
|
4438
|
+
summaries.push(buildSummary(entry.id, { status: "waited", runId: accepted.runId, run }));
|
|
4410
4439
|
} catch (e) {
|
|
4411
4440
|
worst = EXIT.error;
|
|
4412
4441
|
const message = e instanceof Error ? e.message : String(e);
|
|
4413
4442
|
err(`\u2717 ${entry.id}: ${message}`);
|
|
4414
|
-
summaries.push(
|
|
4443
|
+
summaries.push(buildSummary(entry.id, { status: "error", message }));
|
|
4415
4444
|
}
|
|
4416
4445
|
}
|
|
4417
4446
|
if (opts.junitFile && suites.length > 0) {
|
|
@@ -4432,6 +4461,7 @@ async function resolveEntries(opts) {
|
|
|
4432
4461
|
id: opts.evalId,
|
|
4433
4462
|
prompt: opts.promptFile,
|
|
4434
4463
|
systemPrompt: opts.systemPromptFile,
|
|
4464
|
+
tools: opts.toolsFile,
|
|
4435
4465
|
model: opts.model
|
|
4436
4466
|
}
|
|
4437
4467
|
];
|
|
@@ -4442,18 +4472,39 @@ async function buildRequest(entry, ctx) {
|
|
|
4442
4472
|
}
|
|
4443
4473
|
const template = entry.prompt ? await readFile2(entry.prompt, "utf8") : void 0;
|
|
4444
4474
|
const systemPrompt = entry.systemPrompt ? await readFile2(entry.systemPrompt, "utf8") : void 0;
|
|
4475
|
+
const tools = await resolveTools(entry.tools);
|
|
4445
4476
|
let sampling = entry.sampling;
|
|
4446
4477
|
if (!sampling && ctx.samplingFile) {
|
|
4447
4478
|
sampling = JSON.parse(await readFile2(ctx.samplingFile, "utf8"));
|
|
4448
4479
|
}
|
|
4480
|
+
const prompt = template !== void 0 || systemPrompt !== void 0 || tools !== void 0 ? {
|
|
4481
|
+
...template !== void 0 ? { template } : {},
|
|
4482
|
+
...systemPrompt !== void 0 ? { systemPrompt } : {},
|
|
4483
|
+
...tools !== void 0 ? { tools } : {}
|
|
4484
|
+
} : void 0;
|
|
4449
4485
|
return {
|
|
4450
4486
|
model: entry.model,
|
|
4451
|
-
...
|
|
4487
|
+
...prompt ? { prompt } : {},
|
|
4452
4488
|
...sampling ? { samplingParams: sampling } : {},
|
|
4453
4489
|
...ctx.git ? { git: ctx.git } : {},
|
|
4454
4490
|
...ctx.gate ? { gate: ctx.gate } : {}
|
|
4455
4491
|
};
|
|
4456
4492
|
}
|
|
4493
|
+
function buildSummary(evalId, outcome) {
|
|
4494
|
+
switch (outcome.status) {
|
|
4495
|
+
case "queued":
|
|
4496
|
+
return { evalId, runId: outcome.accepted.runId, resultUrl: outcome.accepted.resultUrl };
|
|
4497
|
+
case "waited":
|
|
4498
|
+
return { evalId, runId: outcome.runId, ...outcome.run };
|
|
4499
|
+
case "error":
|
|
4500
|
+
return { evalId, error: outcome.message };
|
|
4501
|
+
}
|
|
4502
|
+
}
|
|
4503
|
+
async function resolveTools(tools) {
|
|
4504
|
+
if (tools === void 0) return void 0;
|
|
4505
|
+
const raw = typeof tools === "string" ? JSON.parse(await readFile2(tools, "utf8")) : tools;
|
|
4506
|
+
return normalizeToolDefinitions(raw);
|
|
4507
|
+
}
|
|
4457
4508
|
function resolveGit(opts) {
|
|
4458
4509
|
const sha = opts.gitSha ?? process.env.GITHUB_SHA ?? process.env.CI_COMMIT_SHA;
|
|
4459
4510
|
const ref = opts.gitRef ?? process.env.GITHUB_REF ?? process.env.CI_COMMIT_REF_NAME;
|
|
@@ -4485,6 +4536,7 @@ function printSummary(evalId, run) {
|
|
|
4485
4536
|
}
|
|
4486
4537
|
|
|
4487
4538
|
// src/cli.ts
|
|
4539
|
+
var DEFAULT_BASE_URL = "https://verica.app";
|
|
4488
4540
|
var USAGE = `verica \u2014 run a Verica eval from CI and gate the merge on the result.
|
|
4489
4541
|
|
|
4490
4542
|
Usage:
|
|
@@ -4493,8 +4545,13 @@ Usage:
|
|
|
4493
4545
|
|
|
4494
4546
|
Options:
|
|
4495
4547
|
--eval <id> Eval to run (or use --manifest for many).
|
|
4496
|
-
--prompt <file>
|
|
4497
|
-
--system-prompt <file> System-prompt file
|
|
4548
|
+
--prompt <file> User prompt (template) file to push (versioned by content).
|
|
4549
|
+
--system-prompt <file> System-prompt file. Either prompt file is optional and
|
|
4550
|
+
independent: push one and the other is inherited from
|
|
4551
|
+
the current version (omit both to run it unchanged).
|
|
4552
|
+
--tools <file> Tool definitions to push: a JSON file (Verica-flat or
|
|
4553
|
+
OpenAI {type:function,\u2026} entries). Omit to inherit the
|
|
4554
|
+
current version's tools. Inline arrays: .verica.yml only.
|
|
4498
4555
|
--model <model> Model to sample under (overrides the manifest).
|
|
4499
4556
|
--sampling <file> JSON sampling params (temperature, maxTokens, \u2026).
|
|
4500
4557
|
--manifest <file> .verica.yml mapping prompts \u2192 eval IDs (multi-prompt).
|
|
@@ -4507,14 +4564,15 @@ Options:
|
|
|
4507
4564
|
--baseline-run <id> No-regression baseline = this specific run.
|
|
4508
4565
|
--git-sha <sha> Commit SHA (else auto-detected from CI env).
|
|
4509
4566
|
--git-ref <ref> Git ref (else auto-detected from CI env).
|
|
4510
|
-
--base-url <url> API base URL (
|
|
4567
|
+
--base-url <url> Override the API base URL (dev/self-host only).
|
|
4511
4568
|
--poll-interval <sec> Initial poll interval (default 3).
|
|
4512
4569
|
--timeout <sec> Max wait (default 1800).
|
|
4513
4570
|
--help Show this help.
|
|
4514
4571
|
|
|
4515
4572
|
Env:
|
|
4516
4573
|
VERICA_TOKEN Workspace API token (required).
|
|
4517
|
-
VERICA_BASE_URL API base URL
|
|
4574
|
+
VERICA_BASE_URL Override the API base URL \u2014 dev/self-host only;
|
|
4575
|
+
defaults to ${DEFAULT_BASE_URL}.
|
|
4518
4576
|
|
|
4519
4577
|
Exit codes: 0 passed, 1 gate failed, 2 validation/transport error.`;
|
|
4520
4578
|
function finiteNumber(v) {
|
|
@@ -4529,6 +4587,7 @@ async function main() {
|
|
|
4529
4587
|
eval: { type: "string" },
|
|
4530
4588
|
prompt: { type: "string" },
|
|
4531
4589
|
"system-prompt": { type: "string" },
|
|
4590
|
+
tools: { type: "string" },
|
|
4532
4591
|
model: { type: "string" },
|
|
4533
4592
|
sampling: { type: "string" },
|
|
4534
4593
|
manifest: { type: "string" },
|
|
@@ -4554,8 +4613,7 @@ async function main() {
|
|
|
4554
4613
|
}
|
|
4555
4614
|
const token = process.env.VERICA_TOKEN;
|
|
4556
4615
|
if (!token) throw new Error("VERICA_TOKEN is required (a workspace API token).");
|
|
4557
|
-
const baseUrl = values["base-url"]
|
|
4558
|
-
if (!baseUrl) throw new Error("Set --base-url or the VERICA_BASE_URL environment variable.");
|
|
4616
|
+
const baseUrl = values["base-url"]?.trim() || process.env.VERICA_BASE_URL?.trim() || DEFAULT_BASE_URL;
|
|
4559
4617
|
const threshold = finiteNumber(values.threshold);
|
|
4560
4618
|
if (values.threshold !== void 0 && threshold === void 0) {
|
|
4561
4619
|
throw new Error(`--threshold must be a number between 0 and 1 (got "${values.threshold}").`);
|
|
@@ -4566,6 +4624,7 @@ async function main() {
|
|
|
4566
4624
|
evalId: values.eval,
|
|
4567
4625
|
promptFile: values.prompt,
|
|
4568
4626
|
systemPromptFile: values["system-prompt"],
|
|
4627
|
+
toolsFile: values.tools,
|
|
4569
4628
|
samplingFile: values.sampling,
|
|
4570
4629
|
model: values.model,
|
|
4571
4630
|
manifestFile: values.manifest,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@verica-app/cli",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.3",
|
|
4
4
|
"private": false,
|
|
5
5
|
"description": "Run a Verica eval from CI and block the merge on the result.",
|
|
6
6
|
"license": "MIT",
|
|
@@ -12,18 +12,15 @@
|
|
|
12
12
|
"prompt",
|
|
13
13
|
"testing"
|
|
14
14
|
],
|
|
15
|
-
"
|
|
16
|
-
"type": "git",
|
|
17
|
-
"url": "git+https://github.com/mtn-labs/evals.git",
|
|
18
|
-
"directory": "packages/cli"
|
|
19
|
-
},
|
|
20
|
-
"homepage": "https://github.com/mtn-labs/evals/tree/main/packages/cli#readme",
|
|
15
|
+
"homepage": "https://verica.app",
|
|
21
16
|
"type": "module",
|
|
22
17
|
"bin": {
|
|
23
18
|
"verica": "./dist/cli.js"
|
|
24
19
|
},
|
|
25
20
|
"files": [
|
|
26
|
-
"dist"
|
|
21
|
+
"dist",
|
|
22
|
+
"README.md",
|
|
23
|
+
"CHANGELOG.md"
|
|
27
24
|
],
|
|
28
25
|
"publishConfig": {
|
|
29
26
|
"access": "public"
|