@kindlm/cli 0.4.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +142 -0
- package/dist/index.cjs +8 -8
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +8 -8
- package/dist/index.js.map +1 -1
- package/dist/kindlm.js +9 -9
- package/dist/kindlm.js.map +1 -1
- package/package.json +6 -5
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 KindLM Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
# KindLM
|
|
2
|
+
|
|
3
|
+

|
|
4
|
+
|
|
5
|
+
Behavioral regression testing for AI agents. Test what your agents **do** — not just what they say.
|
|
6
|
+
|
|
7
|
+
## Why KindLM?
|
|
8
|
+
|
|
9
|
+
LLM evals measure text quality. KindLM tests **behavior** — the tool calls your agent makes, the decisions it takes, and whether it leaks PII or violates compliance rules. It runs in CI so regressions never ship.
|
|
10
|
+
|
|
11
|
+
## Features
|
|
12
|
+
|
|
13
|
+
- **Tool call assertions** — verify agents call the right tools with the right arguments, in the right order
|
|
14
|
+
- **Schema validation** — structured output checked against JSON Schema (AJV)
|
|
15
|
+
- **PII detection** — catch leaked SSNs, credit cards, emails, phone numbers, IBANs
|
|
16
|
+
- **LLM-as-judge** — score responses against natural-language criteria (0.0–1.0)
|
|
17
|
+
- **Drift detection** — semantic + field-level comparison against saved baselines
|
|
18
|
+
- **Keyword guards** — require or forbid specific phrases in output
|
|
19
|
+
- **Latency & cost budgets** — fail tests that exceed time or token-cost thresholds
|
|
20
|
+
- **EU AI Act compliance** — generate Annex IV documentation from test results
|
|
21
|
+
- **CI-native** — exit code 0/1, JUnit XML reporter, GitHub Actions ready
|
|
22
|
+
|
|
23
|
+
## Supported Providers
|
|
24
|
+
|
|
25
|
+
| Provider | Example config |
|
|
26
|
+
|----------|---------------|
|
|
27
|
+
| OpenAI | `openai:gpt-4o` |
|
|
28
|
+
| Anthropic | `anthropic:claude-sonnet-4-5-20250929` |
|
|
29
|
+
| Google Gemini | `google:gemini-2.0-flash` |
|
|
30
|
+
| Mistral | `mistral:mistral-large-latest` |
|
|
31
|
+
| Cohere | `cohere:command-r-plus` |
|
|
32
|
+
| Ollama | `ollama:llama3` |
|
|
33
|
+
|
|
34
|
+
## Quick Start
|
|
35
|
+
|
|
36
|
+
Try it instantly:
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
npx @kindlm/cli init
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Or install globally:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
npm install -g @kindlm/cli
|
|
46
|
+
kindlm init
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Edit the generated `kindlm.yaml`:
|
|
50
|
+
|
|
51
|
+
```yaml
|
|
52
|
+
kindlm: 1
|
|
53
|
+
project: "my-agent"
|
|
54
|
+
|
|
55
|
+
suite:
|
|
56
|
+
name: "refund-agent"
|
|
57
|
+
|
|
58
|
+
providers:
|
|
59
|
+
openai:
|
|
60
|
+
apiKeyEnv: "OPENAI_API_KEY"
|
|
61
|
+
|
|
62
|
+
models:
|
|
63
|
+
- id: "gpt-4o"
|
|
64
|
+
provider: "openai"
|
|
65
|
+
model: "gpt-4o"
|
|
66
|
+
params:
|
|
67
|
+
temperature: 0
|
|
68
|
+
|
|
69
|
+
prompts:
|
|
70
|
+
refund:
|
|
71
|
+
system: "You are a refund support agent. Use lookup_order(order_id) to find orders."
|
|
72
|
+
user: "{{message}}"
|
|
73
|
+
|
|
74
|
+
tests:
|
|
75
|
+
- name: "looks-up-order"
|
|
76
|
+
prompt: "refund"
|
|
77
|
+
vars:
|
|
78
|
+
message: "I want to return order #12345"
|
|
79
|
+
tools:
|
|
80
|
+
- name: "lookup_order"
|
|
81
|
+
responses:
|
|
82
|
+
- when: { order_id: "12345" }
|
|
83
|
+
then: { order_id: "12345", status: "eligible" }
|
|
84
|
+
expect:
|
|
85
|
+
toolCalls:
|
|
86
|
+
- tool: "lookup_order"
|
|
87
|
+
argsMatch: { order_id: "12345" }
|
|
88
|
+
guardrails:
|
|
89
|
+
pii:
|
|
90
|
+
enabled: true
|
|
91
|
+
judge:
|
|
92
|
+
- criteria: "Response is empathetic and professional"
|
|
93
|
+
minScore: 0.8
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Run your tests:
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
kindlm test
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Output:
|
|
103
|
+
|
|
104
|
+
```
|
|
105
|
+
refund-agent / looks-up-order
|
|
106
|
+
|
|
107
|
+
gpt-4o
|
|
108
|
+
✓ looks-up-order (1.3s)
|
|
109
|
+
✓ tool_called: lookup_order
|
|
110
|
+
✓ pii: no PII detected
|
|
111
|
+
✓ judge: 0.92 ≥ 0.80
|
|
112
|
+
|
|
113
|
+
1 passed, 0 failed
|
|
114
|
+
Gates: ✓ PASSED
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## CI Integration
|
|
118
|
+
|
|
119
|
+
```yaml
|
|
120
|
+
# .github/workflows/test.yml
|
|
121
|
+
- run: npm install -g @kindlm/cli
|
|
122
|
+
- run: kindlm test --reporter junit --output results.xml
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Repository Layout
|
|
126
|
+
|
|
127
|
+
```
|
|
128
|
+
packages/
|
|
129
|
+
core/ @kindlm/core — Business logic, zero I/O dependencies
|
|
130
|
+
cli/ @kindlm/cli — CLI entry point
|
|
131
|
+
cloud/ @kindlm/cloud — Cloudflare Workers API + D1 database
|
|
132
|
+
docs/ Technical specs and documentation
|
|
133
|
+
site/ Documentation website (Next.js)
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
## Documentation
|
|
137
|
+
|
|
138
|
+
Full docs: [kindlm.dev](https://kindlm.dev) | Source: [`docs/`](./docs/)
|
|
139
|
+
|
|
140
|
+
## License
|
|
141
|
+
|
|
142
|
+
MIT (core + CLI) | AGPL (cloud)
|
package/dist/index.cjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"use strict";var
|
|
1
|
+
"use strict";var vt=Object.create;var re=Object.defineProperty;var wt=Object.getOwnPropertyDescriptor;var St=Object.getOwnPropertyNames;var Tt=Object.getPrototypeOf,bt=Object.prototype.hasOwnProperty;var kt=(e,t)=>{for(var o in t)re(e,o,{get:t[o],enumerable:!0})},ke=(e,t,o,n)=>{if(t&&typeof t=="object"||typeof t=="function")for(let r of St(t))!bt.call(e,r)&&r!==o&&re(e,r,{get:()=>t[r],enumerable:!(n=wt(t,r))||n.enumerable});return e};var E=(e,t,o)=>(o=e!=null?vt(Tt(e)):{},ke(t||!e||!e.__esModule?re(o,"default",{value:e,enumerable:!0}):o,e)),Et=e=>ke(re({},"__esModule",{value:!0}),e);var qt={};kt(qt,{createProgram:()=>Xt});module.exports=Et(qt);var ft=require("commander");var oe=require("fs"),Ee=require("path"),_=E(require("chalk"),1),xt=`kindlm: 1
|
|
2
2
|
project: my-project
|
|
3
3
|
|
|
4
4
|
suite:
|
|
@@ -42,11 +42,11 @@ defaults:
|
|
|
42
42
|
repeat: 1
|
|
43
43
|
concurrency: 4
|
|
44
44
|
timeoutMs: 60000
|
|
45
|
-
`;function
|
|
46
|
-
Interrupted. Exiting...`)),process.exit(130)};process.on("SIGINT",
|
|
47
|
-
`),
|
|
48
|
-
`).trim()}),
|
|
49
|
-
`).trim()}),
|
|
50
|
-
`),y=!1,d=0;for(;d<f.length;){let C=f[d]??"";if(C.startsWith("```")){y=!y,y&&B(o,30),d++;continue}if(y){B(o,14);let l=o.y;o.save(),o.rect(o.page.margins.left,l-2,i,14).fill("#f5f5f4"),o.restore(),o.fontSize(9).font("Courier").fillColor("#44403c").text(C,{width:i}),d++;continue}if(!C.trim()){o.moveDown(.4),d++;continue}let b=f[d+1]??"";if(C.includes("|")&&d+1<f.length&&b.match(/^\s*\|[-:\s|]+\|\s*$/)){let l=[],g=d;for(;g<f.length&&(f[g]??"").includes("|");)l.push(f[g]??""),g++;let I=Pt(l);if(I){$t(o,I,i),d=g;continue}}if(C.match(/^\s*\|[-:]+/)||C.match(/^---+$/)){d++;continue}let h=C.match(/^(#{3,4})\s+(.+)$/);if(h?.[1]&&h[2]){let l=h[1].length,g=De(l);B(o,g+10),o.moveDown(.3),o.fontSize(g).font("Helvetica-Bold").fillColor("#1c1917").text(h[2].trim(),{width:i}),o.moveDown(.3),d++;continue}if(C.match(/^\s*[-*] /)){B(o,14),o.fontSize(10).font("Helvetica").fillColor("#44403c").text(C.trim(),{indent:12,width:i-12}),d++;continue}B(o,14),o.fontSize(10).font("Helvetica").fillColor("#44403c").text(C.trim(),{width:i}),d++}It(o)}o.end(),a.on("finish",()=>n(t)),a.on("error",r)})}function ze(e){e.command("test").description("Run test suites").option("-s, --suite <name>","Run a specific suite").option("--compliance","Generate compliance report").option("--reporter <type>","Output format: pretty, json, junit","pretty").option("--runs <count>","Override run count").option("--gate <percent>","Fail if pass rate below threshold").option("--pdf <path>","Export compliance report as PDF (requires --compliance)").option("-c, --config <path>","Path to config file","kindlm.yaml").action(async t=>{try{let{runnerResult:n,config:r,yamlContent:o}=await V({configPath:t.config,runs:t.runs?parseInt(t.runs,10):void 0,gate:t.gate?parseFloat(t.gate):void 0}),{runResult:a,aggregated:i}=n,s=(0,P.evaluateGates)(r.gates,i),m=Ot(t.reporter).generate(a,s);if(console.log(m.content),t.compliance){let y=(0,P.createComplianceReporter)().generate(a,s);if(console.log(""),console.log(y.content),t.pdf){let d=await Ke(y.content,t.pdf);console.log(""),console.log(w.default.green(`PDF report saved to ${d}`))}}try{Ne({runnerResult:n,suiteName:r.suite.name,configHash:_e(o),timestamp:new Date().toISOString()})}catch{}let u=a.failed===0&&a.errored===0&&s.passed;process.exit(u?0:1)}catch(n){if(n instanceof P.ProviderError){let r=n.code==="TIMEOUT"?"Provider timeout":n.code==="NETWORK_ERROR"?"Network error":n.code==="AUTH_FAILED"?"Authentication failed":n.code==="RATE_LIMITED"?"Rate limited":`Provider error (${n.code})`;console.error(w.default.red(`${r}: ${n.message}`)),n.retryable&&console.error(w.default.yellow("This error may be transient. Try again or increase --timeout."))}else if(At(n)){let o=n.code.startsWith("CONFIG_")?"Config error":"Error";console.error(w.default.red(`${o}: ${n.message}`))}else n instanceof Error&&n.name==="AbortError"?console.error(w.default.red("Request timed out. Check network connectivity or increase timeout.")):console.error(w.default.red(`Error: ${n instanceof Error?n.message:String(n)}`));process.exit(1)}})}var Et={bold:e=>w.default.bold(e),red:e=>w.default.red(e),green:e=>w.default.green(e),yellow:e=>w.default.yellow(e),cyan:e=>w.default.cyan(e),dim:e=>w.default.dim(e),greenBold:e=>w.default.green.bold(e),redBold:e=>w.default.red.bold(e)};function Ot(e){switch(e){case"json":return(0,P.createJsonReporter)();case"junit":return(0,P.createJunitReporter)();default:return(0,P.createPrettyReporter)(Et)}}function At(e){return typeof e=="object"&&e!==null&&"code"in e&&"message"in e&&typeof e.code=="string"&&typeof e.message=="string"}var k=require("path"),Je=require("fs"),p=$(require("chalk"),1),S=require("@kindlm/core");var D=require("fs"),ae=require("path");function Ge(e){return e.replace(/[^a-zA-Z0-9_-]/g,"_")}function ce(e){let t=(0,ae.join)(e,"baselines");return{read(n){let r=(0,ae.join)(t,`${Ge(n)}.json`);try{return{success:!0,data:(0,D.readFileSync)(r,"utf-8")}}catch{return{success:!1,error:{code:"BASELINE_NOT_FOUND",message:`No baseline found for suite "${n}" at ${r}`}}}},write(n,r){try{(0,D.mkdirSync)(t,{recursive:!0});let o=(0,ae.join)(t,`${Ge(n)}.json`);return(0,D.writeFileSync)(o,r,"utf-8"),{success:!0,data:void 0}}catch(o){return{success:!1,error:{code:"UNKNOWN_ERROR",message:`Failed to write baseline: ${o instanceof Error?o.message:String(o)}`}}}},list(){try{return(0,D.mkdirSync)(t,{recursive:!0}),{success:!0,data:(0,D.readdirSync)(t).filter(o=>o.endsWith(".json")).map(o=>o.replace(/\.json$/,""))}}catch(n){return{success:!1,error:{code:"UNKNOWN_ERROR",message:`Failed to list baselines: ${n instanceof Error?n.message:String(n)}`}}}}}}function We(e){let t=e.command("baseline").description("Manage test baselines");t.command("set").description("Save current results as baseline").option("-c, --config <path>","Path to config file","kindlm.yaml").option("--runs <count>","Override run count").action(async n=>{try{let r=(0,k.dirname)((0,k.resolve)(process.cwd(),n.config)),o=(0,k.join)(r,".kindlm"),a=ce(o),{config:i,runnerResult:s}=await V({configPath:n.config,runs:n.runs?parseInt(n.runs,10):void 0}),{aggregated:c}=s,m=(0,S.buildBaselineData)(i.suite.name,c,new Date().toISOString()),u=(0,S.writeBaseline)(m,a);u.success||(console.error(p.default.red(`Failed to save baseline: ${u.error.message}`)),process.exit(1));let f=Object.keys(m.results).length;console.log(""),console.log(p.default.green(`Baseline saved for suite "${i.suite.name}" (${f} test${f===1?"":"s"})`)),console.log(p.default.dim(` Location: ${o}/baselines/`)),process.exit(0)}catch(r){console.error(p.default.red(`Error: ${r instanceof Error?r.message:String(r)}`)),process.exit(1)}}),t.command("compare").description("Compare latest against baseline").option("-c, --config <path>","Path to config file","kindlm.yaml").option("--runs <count>","Override run count").action(async n=>{try{let r=(0,k.dirname)((0,k.resolve)(process.cwd(),n.config)),o=(0,k.join)(r,".kindlm"),a=ce(o),i=(0,k.resolve)(process.cwd(),n.config),s;try{s=(0,Je.readFileSync)(i,"utf-8")}catch{console.error(p.default.red(`Config file not found: ${i}`)),process.exit(1)}let c=H(),m=(0,S.parseConfig)(s,{configDir:r,fileReader:c});m.success||(console.error(p.default.red(`Config validation failed: ${m.error.message}`)),process.exit(1));let u=m.data.suite.name,f=(0,S.readBaseline)(u,a);f.success||(f.error.code==="BASELINE_NOT_FOUND"?console.error(p.default.red(`No baseline found for suite "${u}". Run \`kindlm baseline set\` first.`)):console.error(p.default.red(`Failed to read baseline: ${f.error.message}`)),process.exit(1));let y=f.data,{runnerResult:d}=await V({configPath:n.config,runs:n.runs?parseInt(n.runs,10):void 0,baselineData:y}),{aggregated:C}=d,b=(0,S.buildBaselineData)(u,C,new Date().toISOString()),h=(0,S.compareBaseline)(y,b.results);if(console.log(""),console.log(p.default.bold(`Baseline comparison for "${u}"`)),console.log(p.default.dim(` Baseline from: ${y.createdAt}`)),console.log(""),h.regressions.length>0){console.log(p.default.red.bold(` Regressions (${h.regressions.length}):`));for(let l of h.regressions)console.log(p.default.red(` ${l.testName}: ${Z(l.baselinePassRate)} \u2192 ${Z(l.currentPassRate)}`)),l.newFailureCodes.length>0&&console.log(p.default.red(` New failures: ${l.newFailureCodes.join(", ")}`));console.log("")}if(h.improvements.length>0){console.log(p.default.green.bold(` Improvements (${h.improvements.length}):`));for(let l of h.improvements)console.log(p.default.green(` ${l.testName}: ${Z(l.baselinePassRate)} \u2192 ${Z(l.currentPassRate)}`));console.log("")}if(h.unchanged.length>0){console.log(p.default.dim(` Unchanged (${h.unchanged.length}):`));for(let l of h.unchanged)console.log(p.default.dim(` ${l.testName}: ${Z(l.passRate)}`));console.log("")}if(h.newTests.length>0){console.log(p.default.cyan(` New tests (${h.newTests.length}):`));for(let l of h.newTests)console.log(p.default.cyan(` ${l}`));console.log("")}if(h.removedTests.length>0){console.log(p.default.yellow(` Removed tests (${h.removedTests.length}):`));for(let l of h.removedTests)console.log(p.default.yellow(` ${l}`));console.log("")}process.exit(h.regressions.length>0?1:0)}catch(r){console.error(p.default.red(`Error: ${r instanceof Error?r.message:String(r)}`)),process.exit(1)}}),t.command("list").description("List saved baselines").option("-c, --config <path>","Path to config file","kindlm.yaml").action(n=>{try{let r=(0,k.dirname)((0,k.resolve)(process.cwd(),n.config)),o=(0,k.join)(r,".kindlm"),a=ce(o),i=(0,S.listBaselines)(a);i.success||(console.error(p.default.red(`Failed to list baselines: ${i.error.message}`)),process.exit(1));let s=i.data;s.length===0&&(console.log(p.default.dim("No baselines saved yet. Run `kindlm baseline set` to create one.")),process.exit(0)),console.log(p.default.bold("Saved baselines:")),console.log("");for(let c of s){let m=a.read(c);if(!m.success){console.log(` ${c} ${p.default.dim("(unreadable)")}`);continue}let u=(0,S.deserializeBaseline)(m.data);if(!u.success){console.log(` ${c} ${p.default.dim("(corrupt)")}`);continue}let f=Object.keys(u.data.results).length;console.log(` ${p.default.cyan(u.data.suiteName)} \u2014 ${f} test${f===1?"":"s"}, saved ${p.default.dim(u.data.createdAt)}`)}process.exit(0)}catch(r){console.error(p.default.red(`Error: ${r instanceof Error?r.message:String(r)}`)),process.exit(1)}})}function Z(e){return`${(e*100).toFixed(1)}%`}var qe=require("readline"),Ve=require("stream"),A=$(require("chalk"),1);var O=require("fs"),pe=require("path"),he=require("os");function ye(){return(0,pe.join)((0,he.homedir)(),".kindlm","credentials")}function le(){try{let e=(0,O.readFileSync)(ye(),"utf-8"),t=JSON.parse(e);return typeof t.token=="string"&&t.token.length>0?t.token:null}catch{return null}}function Ye(e){let t=ye(),n=(0,pe.join)((0,he.homedir)(),".kindlm");(0,O.mkdirSync)(n,{recursive:!0,mode:448});let r={token:e,savedAt:new Date().toISOString()};(0,O.writeFileSync)(t,JSON.stringify(r,null,2),{mode:384}),(0,O.chmodSync)(t,384)}function Xe(){try{(0,O.unlinkSync)(ye())}catch{}}var Mt="https://api.kindlm.com";var j=class extends Error{status;constructor(t,n){super(n),this.name="CloudApiError",this.status=t}};function Y(){let e=process.env.KINDLM_CLOUD_URL??Mt;if(e.startsWith("http://")&&!Nt(e))throw new Error(`Refusing to use insecure HTTP for Cloud API: ${e}. Use HTTPS or target localhost for development.`);return e}function Nt(e){try{let t=new URL(e);return t.hostname==="localhost"||t.hostname==="127.0.0.1"||t.hostname==="::1"}catch{return!1}}function Lt(e){return new Promise(t=>setTimeout(t,e))}function Q(e,t){async function n(r,o,a){let i=`${e}${o}`,s={Authorization:`Bearer ${t}`},c={method:r,headers:s};a!==void 0&&(s["Content-Type"]="application/json",c.body=JSON.stringify(a));let m;for(let u=0;u<=1;u++){u>0&&await Lt(1e3);let f=new AbortController,y=setTimeout(()=>f.abort(),3e4);c.signal=f.signal;try{let d=await fetch(i,c);if(!d.ok){if(d.status>=500&&u<1){m=new j(d.status,`HTTP ${d.status}`);continue}let b=`HTTP ${d.status}`;if((d.headers.get("content-type")??"").includes("application/json"))try{let l=await d.json();l.error&&(b=l.error)}catch{}throw new j(d.status,b)}if(d.status===204)return;let C=d.headers.get("content-type")??"";if(!C.includes("application/json"))throw new j(d.status,`Expected JSON response but got content-type: ${C}`);return await d.json()}catch(d){if(d instanceof j)throw d;if(m=d instanceof Error?d:new Error(String(d)),u<1)continue}finally{clearTimeout(y)}}throw m??new Error("Request failed")}return{baseUrl:e,get:r=>n("GET",r),post:(r,o)=>n("POST",r,o),patch:(r,o)=>n("PATCH",r,o),delete:r=>n("DELETE",r)}}function Ze(e){e.command("login").description("Authenticate with KindLM Cloud").option("-t, --token <token>","API token (skips interactive prompt)").option("--status","Show current authentication status").option("--logout","Remove stored credentials").action(async t=>{try{if(t.logout){Xe(),console.log(A.default.green("Logged out. Credentials removed."));return}if(t.status){await _t();return}let n=t.token??process.env.KINDLM_API_TOKEN??await Dt();n.startsWith("klm_")||(console.error(A.default.red('Invalid token format. KindLM tokens start with "klm_".')),process.exit(1));let r=Q(Y(),n);try{await r.get("/v1/auth/tokens")}catch(o){throw o instanceof j&&o.status===401&&(console.error(A.default.red("Invalid or expired token.")),process.exit(1)),o}Ye(n),console.log(A.default.green("Authenticated successfully. Token saved."))}catch(n){console.error(A.default.red(`Login failed: ${n instanceof Error?n.message:String(n)}`)),process.exit(1)}})}async function _t(){let e=le();if(!e){console.log(A.default.yellow('Not authenticated. Run "kindlm login" to authenticate.'));return}let t=Q(Y(),e);try{await t.get("/v1/auth/tokens"),console.log(A.default.green("Authenticated.")),console.log(` Cloud URL: ${Y()}`)}catch(n){n instanceof j&&n.status===401?console.log(A.default.yellow('Stored token is invalid or expired. Run "kindlm login" to re-authenticate.')):console.log(A.default.yellow(`Cannot reach Cloud API: ${n instanceof Error?n.message:String(n)}`))}}function Dt(){return new Promise((e,t)=>{let n=new Ve.Writable({write(o,a,i){i()}});process.stderr.write("Paste your KindLM API token: ");let r=(0,qe.createInterface)({input:process.stdin,output:n,terminal:!0});r.question("",o=>{r.close(),process.stderr.write(`
|
|
51
|
-
`);let a=
|
|
45
|
+
`;function xe(e){e.command("init").description("Create a kindlm.yaml template").option("--force","Overwrite existing kindlm.yaml").action(t=>{let o=(0,Ee.resolve)(process.cwd(),"kindlm.yaml");(0,oe.existsSync)(o)&&!t.force&&(console.error(_.default.red("kindlm.yaml already exists. Use --force to overwrite.")),process.exit(1));try{(0,oe.writeFileSync)(o,xt,"utf-8")}catch(n){let r=n instanceof Error&&"code"in n?n.code:void 0;console.error(r==="EACCES"||r==="EROFS"?_.default.red("Cannot create kindlm.yaml: permission denied"):_.default.red(`Cannot create kindlm.yaml: ${n instanceof Error?n.message:String(n)}`)),process.exit(1)}console.log(_.default.green("Created kindlm.yaml")),console.log(""),console.log("Next steps:"),console.log(` 1. Edit ${_.default.bold("kindlm.yaml")} with your test configuration`),console.log(` 2. Set your API key: ${_.default.bold("export OPENAI_API_KEY=sk-...")}`),console.log(` 3. Run tests: ${_.default.bold("kindlm test")}`)})}var Pe=require("fs"),se=require("path"),O=E(require("chalk"),1),$e=require("@kindlm/core");var Ie=require("fs");function D(){return{readFile(e){try{return{success:!0,data:(0,Ie.readFileSync)(e,"utf-8")}}catch(t){return{success:!1,error:{code:"CONFIG_FILE_REF_ERROR",message:`Cannot read file: ${e}: ${t instanceof Error?t.message:String(t)}`}}}}}}function Oe(e){e.command("validate").description("Validate kindlm.yaml configuration").option("-c, --config <path>","Path to config file","kindlm.yaml").action(t=>{let o=(0,se.resolve)(process.cwd(),t.config),n=(0,se.dirname)(o),r;try{r=(0,Pe.readFileSync)(o,"utf-8")}catch{console.error(O.default.red(`Config file not found: ${o}`)),process.exit(1)}let a=D(),s=(0,$e.parseConfig)(r,{configDir:n,fileReader:a});if(!s.success){console.error(O.default.red("Validation failed:"));let c=s.error.details;if(c&&Array.isArray(c.errors))for(let f of c.errors)console.error(O.default.red(` - ${f}`));else console.error(O.default.red(` ${s.error.message}`));process.exit(1)}let i=s.data;console.log(O.default.green("Config is valid!")),console.log(""),console.log(` Suite: ${O.default.bold(i.suite.name)}`),console.log(` Tests: ${O.default.bold(String(i.tests.length))}`),console.log(` Models: ${O.default.bold(String(i.models.length))}`)})}var j=E(require("chalk"),1),J=require("@kindlm/core");var ae=require("fs"),ce=require("path"),v=E(require("chalk"),1),K=require("@kindlm/core");var Me=require("@kindlm/core");function ie(){return{async fetch(e,t){let o=new AbortController,n=t.timeoutMs?setTimeout(()=>o.abort(),t.timeoutMs):void 0;try{let r=await globalThis.fetch(e,{method:t.method,headers:t.headers,body:t.body,signal:o.signal});return{ok:r.ok,status:r.status,json:()=>r.json()}}catch(r){throw r instanceof DOMException&&r.name==="AbortError"||r instanceof Error&&r.name==="AbortError"?new Me.ProviderError("TIMEOUT","Request timed out",408,!0):r}finally{n!==void 0&&clearTimeout(n)}}}}var Ae=E(require("ora"),1);function U(){let e;return{start(t){e=(0,Ae.default)({text:t,stream:process.stderr}).start()},succeed(t){e?.succeed(t),e=void 0},fail(t){e?.fail(t),e=void 0},stop(){e?.stop(),e=void 0}}}var Ne=require("child_process"),Y=require("@kindlm/core");function Le(){return{async execute(e,t){return new Promise(o=>{let n=(0,Ne.spawn)("sh",["-c",e],{cwd:t.cwd,env:{...process.env,...t.env},stdio:["ignore","pipe","pipe"]}),r=[],a=[];n.stdout.on("data",i=>r.push(i)),n.stderr.on("data",i=>a.push(i));let s=setTimeout(()=>{n.kill("SIGTERM"),setTimeout(()=>{n.killed||n.kill("SIGKILL")},1e3)},t.timeoutMs);n.on("close",(i,c)=>{if(clearTimeout(s),c==="SIGTERM"||c==="SIGKILL"){o((0,Y.err)({code:"PROVIDER_TIMEOUT",message:`Command timed out after ${t.timeoutMs}ms`}));return}o((0,Y.ok)({stdout:Buffer.concat(r).toString("utf-8"),stderr:Buffer.concat(a).toString("utf-8"),exitCode:i??1}))}),n.on("error",i=>{clearTimeout(s),o((0,Y.err)({code:"UNKNOWN_ERROR",message:`Failed to spawn command: ${i.message}`}))})})}}}var It=1048576;async function X(e){let t=U(),o=!1,n=()=>{o&&process.exit(130),o=!0,t.stop(),console.error(v.default.yellow(`
|
|
46
|
+
Interrupted. Exiting...`)),process.exit(130)};process.on("SIGINT",n);try{return await Pt(e,t)}finally{process.removeListener("SIGINT",n)}}async function Pt(e,t){let o=(0,ce.resolve)(process.cwd(),e.configPath),n=(0,ce.dirname)(o);try{let u=(0,ae.statSync)(o);u.size>It&&(console.error(v.default.red(`Config file exceeds 1MB limit (${(u.size/1048576).toFixed(1)}MB): ${o}`)),process.exit(1))}catch{console.error(v.default.red(`Config file not found: ${o}`)),process.exit(1)}let r;try{r=(0,ae.readFileSync)(o,"utf-8")}catch{console.error(v.default.red(`Config file not found: ${o}`)),process.exit(1)}let a=D(),s=(0,K.parseConfig)(r,{configDir:n,fileReader:a});if(!s.success){console.error(v.default.red("Config validation failed:"));let u=s.error.details;if(u&&Array.isArray(u.errors))for(let T of u.errors)console.error(v.default.red(` - ${T}`));else console.error(v.default.red(` ${s.error.message}`));process.exit(1)}let i=s.data;e.suite!==void 0&&i.suite.name!==e.suite&&(console.error(v.default.red(`Suite "${e.suite}" not found. Available suite: "${i.suite.name}"`)),process.exit(1)),e.runs!==void 0&&((!Number.isInteger(e.runs)||e.runs<1)&&(console.error(v.default.red(`Invalid --runs value: ${e.runs}. Must be a positive integer (>= 1).`)),process.exit(1)),i.defaults.repeat=e.runs),e.gate!==void 0&&((Number.isNaN(e.gate)||e.gate<0||e.gate>100)&&(console.error(v.default.red(`Invalid --gate value: ${e.gate}. Must be between 0 and 100.`)),process.exit(1)),e.gate>0&&e.gate<=1&&console.error(v.default.yellow(`Warning: --gate ${e.gate} looks like a decimal. Did you mean --gate ${Math.round(e.gate*100)}? (--gate uses 0-100 scale)`)),i.gates?i.gates.passRateMin=e.gate/100:i.gates={passRateMin:e.gate/100});let c=ie(),f=new Map,m=i.providers;for(let[u,T]of Object.entries(m)){if(!T)continue;let ee="";if(T.apiKeyEnv){let N=process.env[T.apiKeyEnv];N||(console.error(v.default.red(`Missing environment variable: ${T.apiKeyEnv}`)),process.exit(1)),ee=N.trim()}else u!=="ollama"&&(console.error(v.default.red(`Provider "${u}" requires apiKeyEnv to be configured`)),process.exit(1));let F;try{F=(0,K.createProvider)(u,c)}catch(N){let Re=N instanceof Error?N.message:String(N);console.error(v.default.red(`Failed to create provider "${u}": ${Re}`)),process.exit(1)}await F.initialize({apiKey:ee,baseUrl:T.baseUrl,organization:T.organization,timeoutMs:i.defaults.timeoutMs,maxRetries:2}),f.set(u,F)}let p=0,g=$t(i),d=u=>{u.type==="test_start"?t.start(`Running ${u.test} [${u.model}] (${p}/${g})`):u.type==="test_complete"&&p++},S=i.tests.some(u=>u.command)?Le():void 0,l=await(0,K.createRunner)(i,{adapters:f,configDir:n,fileReader:a,onProgress:d,baselineData:e.baselineData,commandExecutor:S}).run();return t.stop(),l.success||(console.error(v.default.red(`Run failed: ${l.error.message}`)),process.exit(1)),{config:i,runnerResult:l.data,configDir:n,yamlContent:r}}function $t(e){let t=0;for(let o of e.tests){if(o.skip)continue;let n=o.repeat??e.defaults.repeat;if(o.command)t+=n;else{let r=o.models?.length??e.models.length;t+=r*n}}return t}var z=require("fs"),Ce=require("path"),_e=require("crypto");function De(){return(0,Ce.join)(process.cwd(),".kindlm","last-run.json")}function je(e){let t=De(),o=(0,Ce.join)(process.cwd(),".kindlm");(0,z.mkdirSync)(o,{recursive:!0,mode:448});let n={...e,runnerResult:{...e.runnerResult,aggregated:e.runnerResult.aggregated.map(r=>{let a=r.runs.flatMap(s=>s.assertions.filter(i=>!i.passed).map(i=>i.failureMessage)).filter(s=>s!==void 0);return{...r,failureMessages:a,runs:[]}})}};(0,z.writeFileSync)(t,JSON.stringify(n),{mode:384})}function He(){try{let e=(0,z.readFileSync)(De(),"utf-8"),t=JSON.parse(e);return t.runnerResult?.runResult&&Array.isArray(t.runnerResult.aggregated)&&typeof t.suiteName=="string"&&typeof t.configHash=="string"&&typeof t.timestamp=="string"?t:null}catch{return null}}function Fe(e){return(0,_e.createHash)("sha256").update(e).digest("hex")}var Ue=E(require("pdfkit"),1),Ke=require("fs"),ze=require("fs/promises"),Ge=require("path");function Ot(e){let t=e.split(`
|
|
47
|
+
`),o=[],n="",r=2,a=[];for(let s of t){let i=s.match(/^(#{2,4})\s+(.+)$/);i&&i[1]?.length===2?((n||a.length>0)&&o.push({heading:n,headingLevel:r,body:a.join(`
|
|
48
|
+
`).trim()}),n=i[2]?.trim()??"",r=2,a=[]):a.push(s)}return(n||a.length>0)&&o.push({heading:n,headingLevel:r,body:a.join(`
|
|
49
|
+
`).trim()}),o}function Mt(e){return e.match(/^# (.+)$/m)?.[1]?.trim()??"KindLM Compliance Report"}function At(e){return e.match(/SHA-256:\s*`([a-f0-9]+)`/i)?.[1]??null}function Be(e){switch(e){case 2:return 18;case 3:return 15;case 4:return 13;default:return 13}}function Nt(e){if(e.length<2)return null;let t=e[0]??"",o=e[1]??"";if(!t.includes("|")||!o.match(/^\s*\|[-:\s|]+\|\s*$/))return null;let n=s=>s.split("|").slice(1,-1).map(i=>i.trim()),r={cells:n(t)},a=[];for(let s=2;s<e.length;s++){let i=e[s]??"";if(!i.includes("|"))break;a.push({cells:n(i)})}return{header:r,rows:a}}function B(e,t){let o=e.page.margins.bottom;e.page.height-o-30-e.y<t&&(e.addPage(),Je(e))}function Je(e){let t=new Date().toISOString();e.fontSize(8).font("Helvetica").fillColor("#a8a29e").text("KindLM Compliance Report",60,40),e.text(t,60,40,{align:"right"}),e.moveDown(3)}function Lt(e){e.fontSize(8).font("Helvetica").fillColor("#a8a29e").text("Generated by KindLM \xB7 kindlm.com",60,e.page.height-50,{align:"center",width:e.page.width-120})}function _t(e,t,o){let n=t.header.cells.length,r=o/n,a=e.page.margins.left,s=18;B(e,s*2);let i=(c,f,m)=>{let p=e.y;m&&(e.save(),e.rect(a,p-2,o,s).fill(m),e.restore());for(let g=0;g<c.length;g++){let d=a+g*r;e.fontSize(8).font(f?"Helvetica-Bold":"Courier").fillColor("#44403c").text(c[g]??"",d+4,p,{width:r-8,height:s,lineBreak:!1})}e.y=p+s};i(t.header.cells,!0,"#f5f5f4");for(let c of t.rows)B(e,s),i(c.cells,!1)}async function We(e,t){return await(0,ze.mkdir)((0,Ge.dirname)(t),{recursive:!0}),new Promise((o,n)=>{let r=new Ue.default({size:"A4",margins:{top:72,bottom:72,left:60,right:60},info:{Title:"KindLM EU AI Act Compliance Report",Author:"KindLM",Creator:"KindLM CLI"}}),a=(0,Ke.createWriteStream)(t);r.pipe(a);let s=r.page.width-r.page.margins.left-r.page.margins.right,i=Mt(e),c=At(e);r.moveDown(6),r.fontSize(28).font("Helvetica-Bold").fillColor("#1c1917").text(i,{align:"center",width:s}),r.moveDown(.5),r.fontSize(14).font("Helvetica").fillColor("#57534e").text("EU AI Act Annex IV Documentation",{align:"center",width:s}),r.moveDown(1),r.fontSize(10).fillColor("#a8a29e").text(`Generated: ${new Date().toISOString()}`,{align:"center",width:s}),c&&(r.moveDown(.3),r.fontSize(9).font("Courier").fillColor("#78716c").text(`SHA-256: ${c}`,{align:"center",width:s})),r.moveDown(2),r.fontSize(10).font("Helvetica").fillColor("#6366f1").text("kindlm.com",{align:"center",link:"https://kindlm.com",width:s});let f=Ot(e);for(let m of f){if(r.addPage(),Je(r),m.heading){let y=Be(m.headingLevel);r.fontSize(y).font("Helvetica-Bold").fillColor("#1c1917").text(m.heading,{width:s}),r.moveDown(.5),r.moveTo(60,r.y).lineTo(60+s,r.y).strokeColor("#e7e5e4").lineWidth(1).stroke(),r.moveDown(.8)}let p=m.body.split(`
|
|
50
|
+
`),g=!1,d=0;for(;d<p.length;){let y=p[d]??"";if(y.startsWith("```")){g=!g,g&&B(r,30),d++;continue}if(g){B(r,14);let l=r.y;r.save(),r.rect(r.page.margins.left,l-2,s,14).fill("#f5f5f4"),r.restore(),r.fontSize(9).font("Courier").fillColor("#44403c").text(y,{width:s}),d++;continue}if(!y.trim()){r.moveDown(.4),d++;continue}let S=p[d+1]??"";if(y.includes("|")&&d+1<p.length&&S.match(/^\s*\|[-:\s|]+\|\s*$/)){let l=[],u=d;for(;u<p.length&&(p[u]??"").includes("|");)l.push(p[u]??""),u++;let T=Nt(l);if(T){_t(r,T,s),d=u;continue}}if(y.match(/^\s*\|[-:]+/)||y.match(/^---+$/)){d++;continue}let R=y.match(/^(#{3,4})\s+(.+)$/);if(R?.[1]&&R[2]){let l=R[1].length,u=Be(l);B(r,u+10),r.moveDown(.3),r.fontSize(u).font("Helvetica-Bold").fillColor("#1c1917").text(R[2].trim(),{width:s}),r.moveDown(.3),d++;continue}if(y.match(/^\s*[-*] /)){B(r,14),r.fontSize(10).font("Helvetica").fillColor("#44403c").text(y.trim(),{indent:12,width:s-12}),d++;continue}B(r,14),r.fontSize(10).font("Helvetica").fillColor("#44403c").text(y.trim(),{width:s}),d++}Lt(r)}r.end(),a.on("finish",()=>o(t)),a.on("error",n)})}var I=E(require("chalk"),1),G=require("@kindlm/core"),Dt={bold:e=>I.default.bold(e),red:e=>I.default.red(e),green:e=>I.default.green(e),yellow:e=>I.default.yellow(e),cyan:e=>I.default.cyan(e),dim:e=>I.default.dim(e),greenBold:e=>I.default.green.bold(e),redBold:e=>I.default.red.bold(e)},jt=["pretty","json","junit"];function le(e){switch(e){case"json":return(0,G.createJsonReporter)();case"junit":return(0,G.createJunitReporter)();case"pretty":return(0,G.createPrettyReporter)(Dt);default:console.error(I.default.red(`Unknown reporter: '${e}'. Available: ${jt.join(", ")}`)),process.exit(1)}}var de=require("child_process");function me(){try{let e=(0,de.execSync)("git rev-parse HEAD",{encoding:"utf-8"}).trim()||null,t=(0,de.execSync)("git rev-parse --abbrev-ref HEAD",{encoding:"utf-8"}).trim()||null,n=(0,de.execSync)("git status --porcelain",{encoding:"utf-8"}).trim().length>0;return{commitSha:e,branch:t,dirty:n}}catch{return{commitSha:null,branch:null,dirty:!1}}}function Ve(e){e.command("test").description("Run test suites").option("-s, --suite <name>","Run a specific suite").option("--compliance","Generate compliance report").option("--reporter <type>","Output format: pretty, json, junit","pretty").option("--runs <count>","Override run count").option("--gate <percent>","Fail if pass rate below threshold").option("--pdf <path>","Export compliance report as PDF (requires --compliance)").option("-c, --config <path>","Path to config file","kindlm.yaml").action(async t=>{t.pdf&&!t.compliance&&(console.error(j.default.red("--pdf requires --compliance")),process.exit(1));let o=le(t.reporter);try{let{runnerResult:n,config:r,yamlContent:a}=await X({configPath:t.config,runs:t.runs!==void 0?parseInt(t.runs,10):void 0,gate:t.gate!==void 0?parseFloat(t.gate):void 0,suite:t.suite}),{runResult:s,aggregated:i}=n,c=(0,J.evaluateGates)(r.gates,i),f=await o.generate(s,c);console.log(f.content);let m,p;if(t.compliance){let d=me(),y={runId:crypto.randomUUID(),kindlmVersion:"1.0.0",gitCommitSha:d.commitSha??void 0,modelIds:r.models.map(u=>u.id),...r.compliance?.metadata??{}};if(m=(await(0,J.createComplianceReporter)(y).generate(s,c)).content,p=m.match(/Tamper Evidence Hash \(SHA-256\):\*\* `([a-f0-9]{64})`/)?.[1],t.reporter==="pretty"||!t.reporter?(console.log(""),console.log(m)):(console.error(""),console.error(m)),t.pdf){let u=await We(m,t.pdf);console.log(""),console.log(j.default.green(`PDF report saved to ${u}`))}}try{je({runnerResult:n,suiteName:r.suite.name,configHash:Fe(a),timestamp:new Date().toISOString(),complianceReport:m,complianceHash:p})}catch{}let g=s.failed===0&&s.errored===0&&c.passed;process.exit(g?0:1)}catch(n){if(n instanceof J.ProviderError){let r=n.code==="TIMEOUT"?"Provider timeout":n.code==="NETWORK_ERROR"?"Network error":n.code==="AUTH_FAILED"?"Authentication failed":n.code==="RATE_LIMITED"?"Rate limited":`Provider error (${n.code})`;console.error(j.default.red(`${r}: ${n.message}`)),n.retryable&&console.error(j.default.yellow("This error may be transient. Try again or increase timeoutMs in your kindlm.yaml defaults."))}else if(Ht(n)){let a=n.code.startsWith("CONFIG_")?"Config error":"Error";console.error(j.default.red(`${a}: ${n.message}`))}else n instanceof Error&&n.name==="AbortError"?console.error(j.default.red("Request timed out. Check network connectivity or increase timeout.")):console.error(j.default.red(`Error: ${n instanceof Error?n.message:String(n)}`));process.exit(1)}})}function Ht(e){return typeof e=="object"&&e!==null&&"code"in e&&"message"in e&&typeof e.code=="string"&&typeof e.message=="string"}var b=require("path"),Xe=require("fs"),h=E(require("chalk"),1),w=require("@kindlm/core");var M=require("fs"),ue=require("path");function Ye(e){return e.replace(/[^a-zA-Z0-9_-]/g,"_")}function fe(e){let t=(0,ue.join)(e,"baselines");return{read(o){let n=(0,ue.join)(t,`${Ye(o)}.json`);try{return{success:!0,data:(0,M.readFileSync)(n,"utf-8")}}catch{return{success:!1,error:{code:"BASELINE_NOT_FOUND",message:`No baseline found for suite "${o}" at ${n}`}}}},write(o,n){try{(0,M.mkdirSync)(t,{recursive:!0});let r=(0,ue.join)(t,`${Ye(o)}.json`);return(0,M.writeFileSync)(r,n,"utf-8"),{success:!0,data:void 0}}catch(r){return{success:!1,error:{code:"UNKNOWN_ERROR",message:`Failed to write baseline: ${r instanceof Error?r.message:String(r)}`}}}},list(){try{return(0,M.mkdirSync)(t,{recursive:!0}),{success:!0,data:(0,M.readdirSync)(t).filter(r=>r.endsWith(".json")).map(r=>r.replace(/\.json$/,""))}}catch(o){return{success:!1,error:{code:"UNKNOWN_ERROR",message:`Failed to list baselines: ${o instanceof Error?o.message:String(o)}`}}}}}}function qe(e){let t=e.command("baseline").description("Manage test baselines");t.command("set").description("Save current results as baseline").option("-c, --config <path>","Path to config file","kindlm.yaml").option("--runs <count>","Override run count").option("--force","Save baseline even if all tests failed").action(async o=>{try{let n=(0,b.dirname)((0,b.resolve)(process.cwd(),o.config)),r=(0,b.join)(n,".kindlm"),a=fe(r),{config:s,runnerResult:i}=await X({configPath:o.config,runs:o.runs!==void 0?parseInt(o.runs,10):void 0}),{runResult:c,aggregated:f}=i;(c.totalTests>0?c.passed/c.totalTests:0)===0&&!o.force&&(console.error(h.default.red("All tests failed or errored. Refusing to save a failing baseline.")),console.error(h.default.yellow("Use --force to save anyway.")),process.exit(1));let p=(0,w.buildBaselineData)(s.suite.name,f,new Date().toISOString()),g=(0,w.writeBaseline)(p,a);g.success||(console.error(h.default.red(`Failed to save baseline: ${g.error.message}`)),process.exit(1));let d=Object.keys(p.results).length;console.log(""),console.log(h.default.green(`Baseline saved for suite "${s.suite.name}" (${d} test${d===1?"":"s"})`)),console.log(h.default.dim(` Location: ${r}/baselines/`))}catch(n){console.error(h.default.red(`Error: ${n instanceof Error?n.message:String(n)}`)),process.exit(1)}}),t.command("compare").description("Compare latest against baseline").option("-c, --config <path>","Path to config file","kindlm.yaml").option("--runs <count>","Override run count").action(async o=>{try{let n=(0,b.dirname)((0,b.resolve)(process.cwd(),o.config)),r=(0,b.join)(n,".kindlm"),a=fe(r),s=(0,b.resolve)(process.cwd(),o.config),i;try{i=(0,Xe.readFileSync)(s,"utf-8")}catch{console.error(h.default.red(`Config file not found: ${s}`)),process.exit(1)}let c=D(),f=(0,w.parseConfig)(i,{configDir:n,fileReader:c});f.success||(console.error(h.default.red(`Config validation failed: ${f.error.message}`)),process.exit(1));let m=f.data.suite.name,p=(0,w.readBaseline)(m,a);p.success||(p.error.code==="BASELINE_NOT_FOUND"?console.error(h.default.red(`No baseline found for suite "${m}". Run \`kindlm baseline set\` first.`)):console.error(h.default.red(`Failed to read baseline: ${p.error.message}`)),process.exit(1));let g=p.data,{runnerResult:d}=await X({configPath:o.config,runs:o.runs!==void 0?parseInt(o.runs,10):void 0,baselineData:g}),{aggregated:y}=d,S=(0,w.buildBaselineData)(m,y,new Date().toISOString()),R=(0,w.compareBaseline)(g,S.results);if(console.log(""),console.log(h.default.bold(`Baseline comparison for "${m}"`)),console.log(h.default.dim(` Baseline from: ${g.createdAt}`)),console.log(""),R.regressions.length>0){console.log(h.default.red.bold(` Regressions (${R.regressions.length}):`));for(let l of R.regressions)console.log(h.default.red(` ${l.testName}: ${q(l.baselinePassRate)} \u2192 ${q(l.currentPassRate)}`)),l.newFailureCodes.length>0&&console.log(h.default.red(` New failures: ${l.newFailureCodes.join(", ")}`));console.log("")}if(R.improvements.length>0){console.log(h.default.green.bold(` Improvements (${R.improvements.length}):`));for(let l of R.improvements)console.log(h.default.green(` ${l.testName}: ${q(l.baselinePassRate)} \u2192 ${q(l.currentPassRate)}`));console.log("")}if(R.unchanged.length>0){console.log(h.default.dim(` Unchanged (${R.unchanged.length}):`));for(let l of R.unchanged)console.log(h.default.dim(` ${l.testName}: ${q(l.passRate)}`));console.log("")}if(R.newTests.length>0){console.log(h.default.cyan(` New tests (${R.newTests.length}):`));for(let l of R.newTests)console.log(h.default.cyan(` ${l}`));console.log("")}if(R.removedTests.length>0){console.log(h.default.yellow(` Removed tests (${R.removedTests.length}):`));for(let l of R.removedTests)console.log(h.default.yellow(` ${l}`));console.log("")}process.exit(R.regressions.length>0?1:0)}catch(n){console.error(h.default.red(`Error: ${n instanceof Error?n.message:String(n)}`)),process.exit(1)}}),t.command("list").description("List saved baselines").option("-c, --config <path>","Path to config file","kindlm.yaml").action(o=>{try{let n=(0,b.dirname)((0,b.resolve)(process.cwd(),o.config)),r=(0,b.join)(n,".kindlm"),a=fe(r),s=(0,w.listBaselines)(a);s.success||(console.error(h.default.red(`Failed to list baselines: ${s.error.message}`)),process.exit(1));let i=s.data;if(i.length===0){console.log(h.default.dim("No baselines saved yet. Run `kindlm baseline set` to create one."));return}console.log(h.default.bold("Saved baselines:")),console.log("");for(let c of i){let f=a.read(c);if(!f.success){console.log(` ${c} ${h.default.dim("(unreadable)")}`);continue}let m=(0,w.deserializeBaseline)(f.data);if(!m.success){console.log(` ${c} ${h.default.dim("(corrupt)")}`);continue}let p=Object.keys(m.data.results).length;console.log(` ${h.default.cyan(m.data.suiteName)} \u2014 ${p} test${p===1?"":"s"}, saved ${h.default.dim(m.data.createdAt)}`)}}catch(n){console.error(h.default.red(`Error: ${n instanceof Error?n.message:String(n)}`)),process.exit(1)}})}function q(e){return`${(e*100).toFixed(1)}%`}var et=require("readline"),tt=require("stream"),P=E(require("chalk"),1);var H=require("fs"),ve=require("path"),we=require("os");function Se(){return(0,ve.join)((0,we.homedir)(),".kindlm","credentials")}function pe(){try{let e=(0,H.readFileSync)(Se(),"utf-8"),t=JSON.parse(e);return typeof t.token=="string"&&t.token.length>0?t.token:null}catch{return null}}function Ze(e){let t=Se(),o=(0,ve.join)((0,we.homedir)(),".kindlm");(0,H.mkdirSync)(o,{recursive:!0,mode:448});let n={token:e,savedAt:new Date().toISOString()};(0,H.writeFileSync)(t,JSON.stringify(n,null,2),{mode:384})}function Qe(){try{(0,H.unlinkSync)(Se())}catch{}}var Ft="https://api.kindlm.com";var A=class extends Error{status;constructor(t,o){super(o),this.name="CloudApiError",this.status=t}};function W(){let e=process.env.KINDLM_CLOUD_URL??Ft;if(e.startsWith("http://")&&!Bt(e))throw new Error(`Refusing to use insecure HTTP for Cloud API: ${e}. Use HTTPS or target localhost for development.`);return e}function Bt(e){try{let t=new URL(e);return t.hostname==="localhost"||t.hostname==="127.0.0.1"||t.hostname==="::1"}catch{return!1}}function Ut(e){return new Promise(t=>setTimeout(t,e))}function Z(e,t){async function o(n,r,a){let s=`${e}${r}`,i={Authorization:`Bearer ${t}`},c={method:n,headers:i};a!==void 0&&(i["Content-Type"]="application/json",c.body=JSON.stringify(a));let f;for(let m=0;m<=1;m++){m>0&&await Ut(1e3);let p=new AbortController,g=setTimeout(()=>p.abort(),3e4);c.signal=p.signal;try{let d=await fetch(s,c);if(!d.ok){if(d.status>=500&&m<1){f=new A(d.status,`HTTP ${d.status}`);continue}let S=`HTTP ${d.status}`;if((d.headers.get("content-type")??"").includes("application/json"))try{let l=await d.json();l.error&&(S=l.error)}catch{}throw new A(d.status,S)}if(d.status===204)return;let y=d.headers.get("content-type")??"";if(!y.includes("application/json"))throw new A(d.status,`Expected JSON response but got content-type: ${y}`);return await d.json()}catch(d){if(d instanceof A)throw d;if(f=d instanceof Error?d:new Error(String(d)),m<1)continue}finally{clearTimeout(g)}}throw f??new Error("Request failed")}return{baseUrl:e,get:n=>o("GET",n),post:(n,r)=>o("POST",n,r),patch:(n,r)=>o("PATCH",n,r),delete:n=>o("DELETE",n)}}function nt(e){e.command("login").description("Authenticate with KindLM Cloud").option("-t, --token <token>","API token (skips interactive prompt)").option("--status","Show current authentication status").option("--logout","Remove stored credentials").action(async t=>{try{if(t.logout){Qe(),console.log(P.default.green("Logged out. Credentials removed."));return}if(t.status){await Kt();return}let o=t.token??process.env.KINDLM_API_TOKEN??await zt();o.startsWith("klm_")||(console.error(P.default.red('Invalid token format. KindLM tokens start with "klm_".')),process.exit(1));let n=Z(W(),o);try{await n.get("/v1/auth/tokens")}catch(r){throw r instanceof A&&r.status===401&&(console.error(P.default.red("Invalid or expired token.")),process.exit(1)),r}Ze(o),console.log(P.default.green("Authenticated successfully. Token saved."))}catch(o){console.error(P.default.red(`Login failed: ${o instanceof Error?o.message:String(o)}`)),process.exit(1)}})}async function Kt(){let e=pe();if(!e){console.log(P.default.yellow('Not authenticated. Run "kindlm login" to authenticate.'));return}let t=Z(W(),e);try{await t.get("/v1/auth/tokens"),console.log(P.default.green("Authenticated.")),console.log(` Cloud URL: ${W()}`)}catch(o){o instanceof A&&o.status===401?console.log(P.default.yellow('Stored token is invalid or expired. Run "kindlm login" to re-authenticate.')):console.log(P.default.yellow(`Cannot reach Cloud API: ${o instanceof Error?o.message:String(o)}`))}}function zt(){return new Promise((e,t)=>{let o=new tt.Writable({write(r,a,s){s()}});process.stderr.write("Paste your KindLM API token: ");let n=(0,et.createInterface)({input:process.stdin,output:o,terminal:!0});n.question("",r=>{n.close(),process.stderr.write(`
|
|
51
|
+
`);let a=r.trim();if(!a){t(new Error("No token provided"));return}e(a)})})}var st=require("path"),it=require("child_process"),ge=E(require("chalk"),1);function rt(){return process.env.GITHUB_ACTIONS?{name:"github_actions",isCI:!0,commitSha:process.env.GITHUB_SHA??null,branch:process.env.GITHUB_REF_NAME??null}:process.env.GITLAB_CI?{name:"gitlab_ci",isCI:!0,commitSha:process.env.CI_COMMIT_SHA??null,branch:process.env.CI_COMMIT_BRANCH??null}:process.env.CI?{name:null,isCI:!0,commitSha:null,branch:null}:{name:null,isCI:!1,commitSha:null,branch:null}}function V(e){return encodeURIComponent(e)}async function ot(e,t,o){let n=await Gt(e,o.projectName),r=await Jt(e,n,o.suiteName,o.configHash),a=await e.post(`/v1/projects/${V(n)}/runs`,{suiteId:r,commitSha:o.commitSha,branch:o.branch,environment:o.environment,triggeredBy:o.triggeredBy}),s=Wt(t.aggregated),i=50;try{for(let l=0;l<s.length;l+=i){let u=s.slice(l,l+i);await e.post(`/v1/runs/${V(a.id)}/results`,{results:u})}}catch(l){try{await e.patch(`/v1/runs/${V(a.id)}`,{status:"failed"})}catch{}throw l}let{runResult:c}=t,f=c.totalTests>0?c.passed/c.totalTests:0,m=new Set(t.aggregated.map(l=>l.modelId)),p=t.aggregated.map(l=>l.assertionScores.judge?.mean).filter(l=>l!==void 0),g=p.length>0?p.reduce((l,u)=>l+u,0)/p.length:void 0,d=t.aggregated.map(l=>l.latencyAvgMs),y=d.length>0?d.reduce((l,u)=>l+u,0)/d.length:void 0,S=t.aggregated.reduce((l,u)=>l+u.totalCostUsd,0),R=S>0?S:void 0;return await e.patch(`/v1/runs/${V(a.id)}`,{status:"completed",passRate:f,testCount:c.totalTests,modelCount:m.size,judgeAvgScore:g,latencyAvgMs:y,costEstimateUsd:R,complianceReport:o.complianceReport,complianceHash:o.complianceHash,finishedAt:new Date().toISOString()}),{runId:a.id,projectId:n}}async function Gt(e,t){let{projects:o}=await e.get("/v1/projects"),n=o.find(a=>a.name===t);return n?n.id:(await e.post("/v1/projects",{name:t})).id}async function Jt(e,t,o,n){let{suites:r}=await e.get(`/v1/projects/${V(t)}/suites`),a=r.find(i=>i.name===o);return a?a.id:(await e.post(`/v1/projects/${V(t)}/suites`,{name:o,configHash:n})).id}function Wt(e){return e.map(t=>{let o=t,n=t.runs.length>0?t.runs.flatMap(r=>r.assertions.filter(a=>!a.passed).map(a=>a.failureMessage)).filter(r=>r!==void 0):o.failureMessages??[];return{testCaseName:t.testCaseName,modelId:t.modelId,passed:t.passed?1:0,passRate:t.passRate,runCount:t.runCount,judgeAvg:t.assertionScores.judge?.mean??null,driftScore:t.assertionScores.drift?.mean??null,latencyAvgMs:t.latencyAvgMs??null,costUsd:t.totalCostUsd??null,totalTokens:t.totalTokens??null,failureCodes:t.failureCodes.length>0?JSON.stringify(t.failureCodes):null,failureMessages:n.length>0?JSON.stringify(n):null,assertionScores:Object.keys(t.assertionScores).length>0?JSON.stringify(t.assertionScores):null}})}function at(e){e.command("upload").description("Push last run results to KindLM Cloud").option("-t, --token <token>","API token (overrides stored token)").option("-p, --project <name>","Project name").action(async t=>{try{let o=t.token??process.env.KINDLM_API_TOKEN??pe();o||(console.error(ge.default.red('Not authenticated. Run "kindlm login" first or pass --token.')),process.exit(1));let n=He();n||(console.error(ge.default.red('No test run found. Run "kindlm test" first.')),process.exit(1));let r=me(),a=rt(),s=t.project??Yt(),i=Z(W(),o),c=U();c.start("Uploading results to KindLM Cloud...");try{let f=await ot(i,n.runnerResult,{projectName:s,suiteName:n.suiteName,configHash:n.configHash,commitSha:a.commitSha??r.commitSha??void 0,branch:a.branch??r.branch??void 0,environment:a.isCI?"ci":"local",triggeredBy:a.name??"local",complianceReport:n.complianceReport,complianceHash:n.complianceHash});c.succeed("Uploaded successfully."),console.log(` Run ID: ${f.runId}`),console.log(` Project: ${s}`),console.log(` Suite: ${n.suiteName}`)}catch(f){throw c.fail("Upload failed."),f}}catch(o){console.error(ge.default.red(`Upload failed: ${o instanceof Error?o.message:String(o)}`)),process.exit(1)}})}function Vt(e){try{let n=new URL(e).pathname.split("/").filter(Boolean),r=n[n.length-1];if(r)return r.replace(/\.git$/,"")}catch{}let t=e.match(/^[\w.-]+@[\w.-]+:(.+?)(?:\.git)?$/);if(t?.[1]){let o=t[1].split("/");return o[o.length-1]??null}return null}function Yt(){try{let e=(0,it.execSync)("git remote get-url origin",{encoding:"utf-8"}).trim(),t=Vt(e);if(t)return t}catch{}return(0,st.basename)(process.cwd())}var he=require("fs"),ye=require("path"),mt=require("child_process"),Q=E(require("chalk"),1),k=require("@kindlm/core");var ct=require("http"),lt=require("@kindlm/core");function dt(e){let t=[],o=null,n=[];function r(){for(let s of n)s()}function a(s,i){if(i.setHeader("Access-Control-Allow-Origin","*"),i.setHeader("Access-Control-Allow-Methods","POST, OPTIONS"),i.setHeader("Access-Control-Allow-Headers","Content-Type"),s.method==="OPTIONS"){i.writeHead(204),i.end();return}if(s.method!=="POST"||s.url!=="/v1/traces"){i.writeHead(404,{"Content-Type":"application/json"}),i.end(JSON.stringify({error:"Not found"}));return}let c=10*1024*1024,f=[],m=0,p=!1;s.on("data",g=>{if(m+=g.length,m>c){p=!0,i.writeHead(413,{"Content-Type":"application/json"}),i.end(JSON.stringify({error:"Payload too large"})),s.destroy();return}f.push(g)}),s.on("end",()=>{if(!p)try{let g=Buffer.concat(f).toString("utf-8"),d=JSON.parse(g),y=(0,lt.parseOtlpPayload)(d);y.success?(t.push(...y.data),r(),i.writeHead(200,{"Content-Type":"application/json"}),i.end(JSON.stringify({partialSuccess:{}}))):(i.writeHead(400,{"Content-Type":"application/json"}),i.end(JSON.stringify({error:y.error.message})))}catch{i.writeHead(400,{"Content-Type":"application/json"}),i.end(JSON.stringify({error:"Invalid JSON"}))}})}return{start(){return new Promise((s,i)=>{o=(0,ct.createServer)(a),o.on("error",i),o.listen(e,()=>s())})},stop(){return new Promise(s=>{o?o.close(()=>s()):s()})},getSpans(){return[...t]},waitForSpans({timeoutMs:s}){return new Promise(i=>{if(t.length>0){i([...t]);return}let c=setTimeout(()=>{n=n.filter(m=>m!==f),i([...t])},s),f=()=>{clearTimeout(c),n=n.filter(m=>m!==f),setTimeout(()=>i([...t]),500)};n.push(f)})}}}function ut(e){e.command("trace").description("Ingest OpenTelemetry traces and run assertions against them").option("-c, --config <path>","Config file path","kindlm.yaml").option("--port <port>","OTLP HTTP port","4318").option("--command <cmd>","Command to spawn (traces are collected while it runs)").option("--timeout <ms>","Timeout in ms to wait for traces","30000").option("--reporter <type>","Report format: pretty, json, junit","pretty").action(async t=>{let o=U();try{let n=(0,ye.resolve)(process.cwd(),t.config),r=(0,ye.dirname)(n);try{(0,he.statSync)(n).size>1048576&&(console.error(Q.default.red("Config file exceeds 1MB limit")),process.exit(1))}catch{console.error(Q.default.red(`Config file not found: ${n}`)),process.exit(1)}let a;try{a=(0,he.readFileSync)(n,"utf-8")}catch{console.error(Q.default.red(`Config file not found: ${n}`)),process.exit(1)}let s=D(),i=(0,k.parseConfig)(a,{configDir:r,fileReader:s});i.success||(console.error(Q.default.red(`Config validation failed: ${i.error.message}`)),process.exit(1));let c=i.data,f=c.trace??{port:parseInt(t.port,10),timeoutMs:parseInt(t.timeout,10),spanMapping:{outputTextAttr:"gen_ai.completion.0.content",modelAttr:"gen_ai.response.model",systemAttr:"gen_ai.system",inputTokensAttr:"gen_ai.usage.input_tokens",outputTokensAttr:"gen_ai.usage.output_tokens"}},m=parseInt(t.port,10)||f.port,p=parseInt(t.timeout,10)||f.timeoutMs,g=dt(m);await g.start();let d;try{o.start(`Listening for OTLP traces on port ${m}...`),t.command&&(d=(0,mt.spawn)("sh",["-c",t.command],{cwd:r,env:{...process.env,OTEL_EXPORTER_OTLP_ENDPOINT:`http://localhost:${m}`,OTEL_EXPORTER_OTLP_PROTOCOL:"http/json"},stdio:"inherit"}),d.on("error",C=>{o.fail(`Command failed: ${C.message}`)}));let y=await g.waitForSpans({timeoutMs:p});y.length===0&&(o.fail("No traces received"),process.exit(1)),o.succeed(`Received ${y.length} spans`);let S=(0,k.filterSpans)(y,f.spanFilter),R=(0,k.mapSpansToResult)(S,f.spanMapping),l=ie(),u=new Map,T=c.providers;for(let[C,x]of Object.entries(T)){if(!x)continue;let L="";if(x.apiKeyEnv){let $=process.env[x.apiKeyEnv];$&&(L=$.trim())}if(!(!L&&C!=="ollama"))try{let $=(0,k.createProvider)(C,l);await $.initialize({apiKey:L,baseUrl:x.baseUrl,organization:x.organization,timeoutMs:c.defaults.timeoutMs,maxRetries:2}),u.set(C,$)}catch{}}let ee=c.defaults.judgeModel??c.models[0]?.id,F=c.models.find(C=>C.id===ee),N=F?u.get(F.provider):void 0,Re=(0,k.buildContextFromTrace)(R,{configDir:r,judgeAdapter:N,judgeModel:F?.model}),Te=[];for(let C of c.tests){if(C.skip)continue;let x=(0,k.createAssertionsFromExpect)(C.expect),L=[];for(let $ of x){let Ct=await $.evaluate(Re);L.push(...Ct)}Te.push({testName:C.name,assertions:L})}let pt=c.tests.filter(C=>C.skip).length,be=R.latencyMs,te=Te.map(({testName:C,assertions:x})=>{let L=x.every($=>$.passed);return{name:C,modelId:"trace",status:L?"passed":"failed",assertions:x,latencyMs:be,costUsd:0}}),gt=te.filter(C=>C.status==="passed").length,ne=te.filter(C=>C.status==="failed").length,ht={suites:[{name:c.suite.name,status:ne>0?"failed":"passed",tests:te}],totalTests:te.length,passed:gt,failed:ne,errored:0,skipped:pt,durationMs:be},yt={passed:ne===0,gates:[]},Rt=await le(t.reporter).generate(ht,yt);console.log(Rt.content),process.exit(ne>0?1:0)}finally{d?.kill(),await g.stop()}}catch(n){o.fail(`Trace command failed: ${n instanceof Error?n.message:String(n)}`),process.exit(1)}})}function Xt(){let e=new ft.Command;return e.name("kindlm").description("AI agent behavioral regression testing").version("1.0.0"),xe(e),Oe(e),Ve(e),qe(e),nt(e),at(e),ut(e),e}0&&(module.exports={createProgram});
|
|
52
52
|
//# sourceMappingURL=index.cjs.map
|