@kindlm/cli 0.4.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 KindLM Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,142 @@
1
+ # KindLM
2
+
3
+ ![CI](https://github.com/petrkindlmann/kindlm/actions/workflows/ci.yml/badge.svg)
4
+
5
+ Behavioral regression testing for AI agents. Test what your agents **do** — not just what they say.
6
+
7
+ ## Why KindLM?
8
+
9
+ LLM evals measure text quality. KindLM tests **behavior** — the tool calls your agent makes, the decisions it takes, and whether it leaks PII or violates compliance rules. It runs in CI so regressions never ship.
10
+
11
+ ## Features
12
+
13
+ - **Tool call assertions** — verify agents call the right tools with the right arguments, in the right order
14
+ - **Schema validation** — structured output checked against JSON Schema (AJV)
15
+ - **PII detection** — catch leaked SSNs, credit cards, emails, phone numbers, IBANs
16
+ - **LLM-as-judge** — score responses against natural-language criteria (0.0–1.0)
17
+ - **Drift detection** — semantic + field-level comparison against saved baselines
18
+ - **Keyword guards** — require or forbid specific phrases in output
19
+ - **Latency & cost budgets** — fail tests that exceed time or token-cost thresholds
20
+ - **EU AI Act compliance** — generate Annex IV documentation from test results
21
+ - **CI-native** — exit code 0/1, JUnit XML reporter, GitHub Actions ready
22
+
23
+ ## Supported Providers
24
+
25
+ | Provider | Example config |
26
+ |----------|---------------|
27
+ | OpenAI | `openai:gpt-4o` |
28
+ | Anthropic | `anthropic:claude-sonnet-4-5-20250929` |
29
+ | Google Gemini | `google:gemini-2.0-flash` |
30
+ | Mistral | `mistral:mistral-large-latest` |
31
+ | Cohere | `cohere:command-r-plus` |
32
+ | Ollama | `ollama:llama3` |
33
+
34
+ ## Quick Start
35
+
36
+ Try it instantly:
37
+
38
+ ```bash
39
+ npx @kindlm/cli init
40
+ ```
41
+
42
+ Or install globally:
43
+
44
+ ```bash
45
+ npm install -g @kindlm/cli
46
+ kindlm init
47
+ ```
48
+
49
+ Edit the generated `kindlm.yaml`:
50
+
51
+ ```yaml
52
+ kindlm: 1
53
+ project: "my-agent"
54
+
55
+ suite:
56
+ name: "refund-agent"
57
+
58
+ providers:
59
+ openai:
60
+ apiKeyEnv: "OPENAI_API_KEY"
61
+
62
+ models:
63
+ - id: "gpt-4o"
64
+ provider: "openai"
65
+ model: "gpt-4o"
66
+ params:
67
+ temperature: 0
68
+
69
+ prompts:
70
+ refund:
71
+ system: "You are a refund support agent. Use lookup_order(order_id) to find orders."
72
+ user: "{{message}}"
73
+
74
+ tests:
75
+ - name: "looks-up-order"
76
+ prompt: "refund"
77
+ vars:
78
+ message: "I want to return order #12345"
79
+ tools:
80
+ - name: "lookup_order"
81
+ responses:
82
+ - when: { order_id: "12345" }
83
+ then: { order_id: "12345", status: "eligible" }
84
+ expect:
85
+ toolCalls:
86
+ - tool: "lookup_order"
87
+ argsMatch: { order_id: "12345" }
88
+ guardrails:
89
+ pii:
90
+ enabled: true
91
+ judge:
92
+ - criteria: "Response is empathetic and professional"
93
+ minScore: 0.8
94
+ ```
95
+
96
+ Run your tests:
97
+
98
+ ```bash
99
+ kindlm test
100
+ ```
101
+
102
+ Output:
103
+
104
+ ```
105
+ refund-agent / looks-up-order
106
+
107
+ gpt-4o
108
+ ✓ looks-up-order (1.3s)
109
+ ✓ tool_called: lookup_order
110
+ ✓ pii: no PII detected
111
+ ✓ judge: 0.92 ≥ 0.80
112
+
113
+ 1 passed, 0 failed
114
+ Gates: ✓ PASSED
115
+ ```
116
+
117
+ ## CI Integration
118
+
119
+ ```yaml
120
+ # .github/workflows/test.yml
121
+ - run: npm install -g @kindlm/cli
122
+ - run: kindlm test --reporter junit --output results.xml
123
+ ```
124
+
125
+ ## Repository Layout
126
+
127
+ ```
128
+ packages/
129
+ core/ @kindlm/core — Business logic, zero I/O dependencies
130
+ cli/ @kindlm/cli — CLI entry point
131
+ cloud/ @kindlm/cloud — Cloudflare Workers API + D1 database
132
+ docs/ Technical specs and documentation
133
+ site/ Documentation website (Next.js)
134
+ ```
135
+
136
+ ## Documentation
137
+
138
+ Full docs: [kindlm.dev](https://kindlm.dev) | Source: [`docs/`](./docs/)
139
+
140
+ ## License
141
+
142
+ MIT (core + CLI) | AGPL (cloud)
package/dist/index.cjs CHANGED
@@ -1,4 +1,4 @@
1
- "use strict";var ft=Object.create;var te=Object.defineProperty;var gt=Object.getOwnPropertyDescriptor;var pt=Object.getOwnPropertyNames;var ht=Object.getPrototypeOf,yt=Object.prototype.hasOwnProperty;var Ct=(e,t)=>{for(var n in t)te(e,n,{get:t[n],enumerable:!0})},Se=(e,t,n,r)=>{if(t&&typeof t=="object"||typeof t=="function")for(let o of pt(t))!yt.call(e,o)&&o!==n&&te(e,o,{get:()=>t[o],enumerable:!(r=gt(t,o))||r.enumerable});return e};var $=(e,t,n)=>(n=e!=null?ft(ht(e)):{},Se(t||!e||!e.__esModule?te(n,"default",{value:e,enumerable:!0}):n,e)),vt=e=>Se(te({},"__esModule",{value:!0}),e);var zt={};Ct(zt,{createProgram:()=>Kt});module.exports=vt(zt);var dt=require("commander");var ne=require("fs"),Te=require("path"),z=$(require("chalk"),1),Rt=`kindlm: 1
1
+ "use strict";var vt=Object.create;var re=Object.defineProperty;var wt=Object.getOwnPropertyDescriptor;var St=Object.getOwnPropertyNames;var Tt=Object.getPrototypeOf,bt=Object.prototype.hasOwnProperty;var kt=(e,t)=>{for(var o in t)re(e,o,{get:t[o],enumerable:!0})},ke=(e,t,o,n)=>{if(t&&typeof t=="object"||typeof t=="function")for(let r of St(t))!bt.call(e,r)&&r!==o&&re(e,r,{get:()=>t[r],enumerable:!(n=wt(t,r))||n.enumerable});return e};var E=(e,t,o)=>(o=e!=null?vt(Tt(e)):{},ke(t||!e||!e.__esModule?re(o,"default",{value:e,enumerable:!0}):o,e)),Et=e=>ke(re({},"__esModule",{value:!0}),e);var qt={};kt(qt,{createProgram:()=>Xt});module.exports=Et(qt);var ft=require("commander");var oe=require("fs"),Ee=require("path"),_=E(require("chalk"),1),xt=`kindlm: 1
2
2
  project: my-project
3
3
 
4
4
  suite:
@@ -42,11 +42,11 @@ defaults:
42
42
  repeat: 1
43
43
  concurrency: 4
44
44
  timeoutMs: 60000
45
- `;function be(e){e.command("init").description("Create a kindlm.yaml template").option("--force","Overwrite existing kindlm.yaml").action(t=>{let n=(0,Te.resolve)(process.cwd(),"kindlm.yaml");(0,ne.existsSync)(n)&&!t.force&&(console.error(z.default.red("kindlm.yaml already exists. Use --force to overwrite.")),process.exit(1)),(0,ne.writeFileSync)(n,Rt,"utf-8"),console.log(z.default.green("Created kindlm.yaml")),console.log(""),console.log("Next steps:"),console.log(` 1. Edit ${z.default.bold("kindlm.yaml")} with your test configuration`),console.log(` 2. Set your API key: ${z.default.bold("export OPENAI_API_KEY=sk-...")}`),console.log(` 3. Run tests: ${z.default.bold("kindlm test")}`)})}var xe=require("fs"),oe=require("path"),_=$(require("chalk"),1),Pe=require("@kindlm/core");var ke=require("fs");function H(){return{readFile(e){try{return{success:!0,data:(0,ke.readFileSync)(e,"utf-8")}}catch(t){return{success:!1,error:{code:"CONFIG_FILE_REF_ERROR",message:`Cannot read file: ${e}: ${t instanceof Error?t.message:String(t)}`}}}}}}function Ie(e){e.command("validate").description("Validate kindlm.yaml configuration").option("-c, --config <path>","Path to config file","kindlm.yaml").action(t=>{let n=(0,oe.resolve)(process.cwd(),t.config),r=(0,oe.dirname)(n),o;try{o=(0,xe.readFileSync)(n,"utf-8")}catch{console.error(_.default.red(`Config file not found: ${n}`)),process.exit(1)}let a=H(),i=(0,Pe.parseConfig)(o,{configDir:r,fileReader:a});if(!i.success){console.error(_.default.red("Validation failed:"));let c=i.error.details;if(c&&Array.isArray(c.errors))for(let m of c.errors)console.error(_.default.red(` - ${m}`));else console.error(_.default.red(` ${i.error.message}`));process.exit(1)}let s=i.data;console.log(_.default.green("Config is valid!")),console.log(""),console.log(` Suite: ${_.default.bold(s.suite.name)}`),console.log(` Tests: ${_.default.bold(String(s.tests.length))}`),console.log(` Models: ${_.default.bold(String(s.models.length))}`)})}var w=$(require("chalk"),1),P=require("@kindlm/core");var se=require("fs"),ie=require("path"),E=$(require("chalk"),1),J=require("@kindlm/core");function re(){return{async fetch(e,t){let n=new AbortController,r=t.timeoutMs?setTimeout(()=>n.abort(),t.timeoutMs):void 0;try{let o=await globalThis.fetch(e,{method:t.method,headers:t.headers,body:t.body,signal:n.signal});return{ok:o.ok,status:o.status,json:()=>o.json()}}finally{r!==void 0&&clearTimeout(r)}}}}var $e=$(require("ora"),1);function G(){let e;return{start(t){e=(0,$e.default)(t).start()},succeed(t){e?.succeed(t),e=void 0},fail(t){e?.fail(t),e=void 0},stop(){e?.stop(),e=void 0}}}var Ee=require("child_process"),q=require("@kindlm/core");function Oe(){return{async execute(e,t){return new Promise(n=>{let r=(0,Ee.spawn)("sh",["-c",e],{cwd:t.cwd,env:{...process.env,...t.env},stdio:["ignore","pipe","pipe"]}),o=[],a=[];r.stdout.on("data",s=>o.push(s)),r.stderr.on("data",s=>a.push(s));let i=setTimeout(()=>{r.kill("SIGTERM"),setTimeout(()=>{r.killed||r.kill("SIGKILL")},1e3)},t.timeoutMs);r.on("close",(s,c)=>{if(clearTimeout(i),c==="SIGTERM"||c==="SIGKILL"){n((0,q.err)({code:"PROVIDER_TIMEOUT",message:`Command timed out after ${t.timeoutMs}ms`}));return}n((0,q.ok)({stdout:Buffer.concat(o).toString("utf-8"),stderr:Buffer.concat(a).toString("utf-8"),exitCode:s??1}))}),r.on("error",s=>{clearTimeout(i),n((0,q.err)({code:"UNKNOWN_ERROR",message:`Failed to spawn command: ${s.message}`}))})})}}}var wt=1048576;async function V(e){let t=G(),n=!1,r=()=>{n&&process.exit(130),n=!0,t.stop(),console.error(E.default.yellow(`
46
- Interrupted. Exiting...`)),process.exit(130)};process.on("SIGINT",r);try{return await St(e,t)}finally{process.removeListener("SIGINT",r)}}async function St(e,t){let n=(0,ie.resolve)(process.cwd(),e.configPath),r=(0,ie.dirname)(n);try{let g=(0,se.statSync)(n);g.size>wt&&(console.error(E.default.red(`Config file exceeds 1MB limit (${(g.size/1048576).toFixed(1)}MB): ${n}`)),process.exit(1))}catch{console.error(E.default.red(`Config file not found: ${n}`)),process.exit(1)}let o;try{o=(0,se.readFileSync)(n,"utf-8")}catch{console.error(E.default.red(`Config file not found: ${n}`)),process.exit(1)}let a=H(),i=(0,J.parseConfig)(o,{configDir:r,fileReader:a});i.success||(console.error(E.default.red(`Config validation failed: ${i.error.message}`)),process.exit(1));let s=i.data;e.runs!==void 0&&(s.defaults.repeat=e.runs),e.gate!==void 0&&(s.gates?s.gates.passRateMin=e.gate/100:s.gates={passRateMin:e.gate/100});let c=re(),m=new Map,u=s.providers;for(let[g,I]of Object.entries(u)){if(!I)continue;let U="";if(I.apiKeyEnv){let F=process.env[I.apiKeyEnv];F||(console.error(E.default.red(`Missing environment variable: ${I.apiKeyEnv}`)),process.exit(1)),U=F.trim()}else g!=="ollama"&&(console.error(E.default.red(`Provider "${g}" requires apiKeyEnv to be configured`)),process.exit(1));let X;try{X=(0,J.createProvider)(g,c)}catch(F){let K=F instanceof Error?F.message:String(F);console.error(E.default.red(`Failed to create provider "${g}": ${K}`)),process.exit(1)}await X.initialize({apiKey:U,baseUrl:I.baseUrl,organization:I.organization,timeoutMs:s.defaults.timeoutMs,maxRetries:2}),m.set(g,X)}let f=0,y=Tt(s),d=g=>{g.type==="test_start"?t.start(`Running ${g.test} [${g.model}] (${f}/${y})`):g.type==="test_complete"&&f++},b=s.tests.some(g=>g.command)?Oe():void 0,l=await(0,J.createRunner)(s,{adapters:m,configDir:r,fileReader:a,onProgress:d,baselineData:e.baselineData,commandExecutor:b}).run();return t.stop(),l.success||(console.error(E.default.red(`Run failed: ${l.error.message}`)),process.exit(1)),{config:s,runnerResult:l.data,configDir:r,yamlContent:o}}function Tt(e){let t=0;for(let n of e.tests){if(n.skip)continue;let r=n.repeat??e.defaults.repeat;if(n.command)t+=r;else{let o=n.models?.length??e.models.length;t+=o*r}}return t}var W=require("fs"),ge=require("path"),Ae=require("crypto");function Me(){return(0,ge.join)(process.cwd(),".kindlm","last-run.json")}function Ne(e){let t=Me(),n=(0,ge.join)(process.cwd(),".kindlm");(0,W.mkdirSync)(n,{recursive:!0,mode:448});let r={...e,runnerResult:{...e.runnerResult,aggregated:e.runnerResult.aggregated.map(o=>{let a=o.runs.flatMap(i=>i.assertions.filter(s=>!s.passed).map(s=>s.failureMessage)).filter(i=>i!==void 0);return{...o,failureMessages:a,runs:[]}})}};(0,W.writeFileSync)(t,JSON.stringify(r),{mode:384})}function Le(){try{let e=(0,W.readFileSync)(Me(),"utf-8"),t=JSON.parse(e);return t.runnerResult?.runResult&&Array.isArray(t.runnerResult.aggregated)&&typeof t.suiteName=="string"&&typeof t.configHash=="string"&&typeof t.timestamp=="string"?t:null}catch{return null}}function _e(e){return(0,Ae.createHash)("sha256").update(e).digest("hex")}var je=$(require("pdfkit"),1),Fe=require("fs"),He=require("fs/promises"),Be=require("path");function bt(e){let t=e.split(`
47
- `),n=[],r="",o=2,a=[];for(let i of t){let s=i.match(/^(#{2,4})\s+(.+)$/);s&&s[1]?.length===2?((r||a.length>0)&&n.push({heading:r,headingLevel:o,body:a.join(`
48
- `).trim()}),r=s[2]?.trim()??"",o=2,a=[]):a.push(i)}return(r||a.length>0)&&n.push({heading:r,headingLevel:o,body:a.join(`
49
- `).trim()}),n}function kt(e){return e.match(/^# (.+)$/m)?.[1]?.trim()??"KindLM Compliance Report"}function xt(e){return e.match(/SHA-256:\s*`([a-f0-9]+)`/i)?.[1]??null}function De(e){switch(e){case 2:return 18;case 3:return 15;case 4:return 13;default:return 13}}function Pt(e){if(e.length<2)return null;let t=e[0]??"",n=e[1]??"";if(!t.includes("|")||!n.match(/^\s*\|[-:\s|]+\|\s*$/))return null;let r=i=>i.split("|").slice(1,-1).map(s=>s.trim()),o={cells:r(t)},a=[];for(let i=2;i<e.length;i++){let s=e[i]??"";if(!s.includes("|"))break;a.push({cells:r(s)})}return{header:o,rows:a}}function B(e,t){let n=e.page.margins.bottom;e.page.height-n-30-e.y<t&&(e.addPage(),Ue(e))}function Ue(e){let t=new Date().toISOString();e.fontSize(8).font("Helvetica").fillColor("#a8a29e").text("KindLM Compliance Report",60,40),e.text(t,60,40,{align:"right"}),e.moveDown(3)}function It(e){e.fontSize(8).font("Helvetica").fillColor("#a8a29e").text("Generated by KindLM \xB7 kindlm.com",60,e.page.height-50,{align:"center",width:e.page.width-120})}function $t(e,t,n){let r=t.header.cells.length,o=n/r,a=e.page.margins.left,i=18;B(e,i*2);let s=(c,m,u)=>{let f=e.y;u&&(e.save(),e.rect(a,f-2,n,i).fill(u),e.restore());for(let y=0;y<c.length;y++){let d=a+y*o;e.fontSize(8).font(m?"Helvetica-Bold":"Courier").fillColor("#44403c").text(c[y]??"",d+4,f,{width:o-8,height:i,lineBreak:!1})}e.y=f+i};s(t.header.cells,!0,"#f5f5f4");for(let c of t.rows)B(e,i),s(c.cells,!1)}async function Ke(e,t){return await(0,He.mkdir)((0,Be.dirname)(t),{recursive:!0}),new Promise((n,r)=>{let o=new je.default({size:"A4",margins:{top:72,bottom:72,left:60,right:60},info:{Title:"KindLM EU AI Act Compliance Report",Author:"KindLM",Creator:"KindLM CLI"}}),a=(0,Fe.createWriteStream)(t);o.pipe(a);let i=o.page.width-o.page.margins.left-o.page.margins.right,s=kt(e),c=xt(e);o.moveDown(6),o.fontSize(28).font("Helvetica-Bold").fillColor("#1c1917").text(s,{align:"center",width:i}),o.moveDown(.5),o.fontSize(14).font("Helvetica").fillColor("#57534e").text("EU AI Act Annex IV Documentation",{align:"center",width:i}),o.moveDown(1),o.fontSize(10).fillColor("#a8a29e").text(`Generated: ${new Date().toISOString()}`,{align:"center",width:i}),c&&(o.moveDown(.3),o.fontSize(9).font("Courier").fillColor("#78716c").text(`SHA-256: ${c}`,{align:"center",width:i})),o.moveDown(2),o.fontSize(10).font("Helvetica").fillColor("#6366f1").text("kindlm.com",{align:"center",link:"https://kindlm.com",width:i});let m=bt(e);for(let u of m){if(o.addPage(),Ue(o),u.heading){let C=De(u.headingLevel);o.fontSize(C).font("Helvetica-Bold").fillColor("#1c1917").text(u.heading,{width:i}),o.moveDown(.5),o.moveTo(60,o.y).lineTo(60+i,o.y).strokeColor("#e7e5e4").lineWidth(1).stroke(),o.moveDown(.8)}let f=u.body.split(`
50
- `),y=!1,d=0;for(;d<f.length;){let C=f[d]??"";if(C.startsWith("```")){y=!y,y&&B(o,30),d++;continue}if(y){B(o,14);let l=o.y;o.save(),o.rect(o.page.margins.left,l-2,i,14).fill("#f5f5f4"),o.restore(),o.fontSize(9).font("Courier").fillColor("#44403c").text(C,{width:i}),d++;continue}if(!C.trim()){o.moveDown(.4),d++;continue}let b=f[d+1]??"";if(C.includes("|")&&d+1<f.length&&b.match(/^\s*\|[-:\s|]+\|\s*$/)){let l=[],g=d;for(;g<f.length&&(f[g]??"").includes("|");)l.push(f[g]??""),g++;let I=Pt(l);if(I){$t(o,I,i),d=g;continue}}if(C.match(/^\s*\|[-:]+/)||C.match(/^---+$/)){d++;continue}let h=C.match(/^(#{3,4})\s+(.+)$/);if(h?.[1]&&h[2]){let l=h[1].length,g=De(l);B(o,g+10),o.moveDown(.3),o.fontSize(g).font("Helvetica-Bold").fillColor("#1c1917").text(h[2].trim(),{width:i}),o.moveDown(.3),d++;continue}if(C.match(/^\s*[-*] /)){B(o,14),o.fontSize(10).font("Helvetica").fillColor("#44403c").text(C.trim(),{indent:12,width:i-12}),d++;continue}B(o,14),o.fontSize(10).font("Helvetica").fillColor("#44403c").text(C.trim(),{width:i}),d++}It(o)}o.end(),a.on("finish",()=>n(t)),a.on("error",r)})}function ze(e){e.command("test").description("Run test suites").option("-s, --suite <name>","Run a specific suite").option("--compliance","Generate compliance report").option("--reporter <type>","Output format: pretty, json, junit","pretty").option("--runs <count>","Override run count").option("--gate <percent>","Fail if pass rate below threshold").option("--pdf <path>","Export compliance report as PDF (requires --compliance)").option("-c, --config <path>","Path to config file","kindlm.yaml").action(async t=>{try{let{runnerResult:n,config:r,yamlContent:o}=await V({configPath:t.config,runs:t.runs?parseInt(t.runs,10):void 0,gate:t.gate?parseFloat(t.gate):void 0}),{runResult:a,aggregated:i}=n,s=(0,P.evaluateGates)(r.gates,i),m=Ot(t.reporter).generate(a,s);if(console.log(m.content),t.compliance){let y=(0,P.createComplianceReporter)().generate(a,s);if(console.log(""),console.log(y.content),t.pdf){let d=await Ke(y.content,t.pdf);console.log(""),console.log(w.default.green(`PDF report saved to ${d}`))}}try{Ne({runnerResult:n,suiteName:r.suite.name,configHash:_e(o),timestamp:new Date().toISOString()})}catch{}let u=a.failed===0&&a.errored===0&&s.passed;process.exit(u?0:1)}catch(n){if(n instanceof P.ProviderError){let r=n.code==="TIMEOUT"?"Provider timeout":n.code==="NETWORK_ERROR"?"Network error":n.code==="AUTH_FAILED"?"Authentication failed":n.code==="RATE_LIMITED"?"Rate limited":`Provider error (${n.code})`;console.error(w.default.red(`${r}: ${n.message}`)),n.retryable&&console.error(w.default.yellow("This error may be transient. Try again or increase --timeout."))}else if(At(n)){let o=n.code.startsWith("CONFIG_")?"Config error":"Error";console.error(w.default.red(`${o}: ${n.message}`))}else n instanceof Error&&n.name==="AbortError"?console.error(w.default.red("Request timed out. Check network connectivity or increase timeout.")):console.error(w.default.red(`Error: ${n instanceof Error?n.message:String(n)}`));process.exit(1)}})}var Et={bold:e=>w.default.bold(e),red:e=>w.default.red(e),green:e=>w.default.green(e),yellow:e=>w.default.yellow(e),cyan:e=>w.default.cyan(e),dim:e=>w.default.dim(e),greenBold:e=>w.default.green.bold(e),redBold:e=>w.default.red.bold(e)};function Ot(e){switch(e){case"json":return(0,P.createJsonReporter)();case"junit":return(0,P.createJunitReporter)();default:return(0,P.createPrettyReporter)(Et)}}function At(e){return typeof e=="object"&&e!==null&&"code"in e&&"message"in e&&typeof e.code=="string"&&typeof e.message=="string"}var k=require("path"),Je=require("fs"),p=$(require("chalk"),1),S=require("@kindlm/core");var D=require("fs"),ae=require("path");function Ge(e){return e.replace(/[^a-zA-Z0-9_-]/g,"_")}function ce(e){let t=(0,ae.join)(e,"baselines");return{read(n){let r=(0,ae.join)(t,`${Ge(n)}.json`);try{return{success:!0,data:(0,D.readFileSync)(r,"utf-8")}}catch{return{success:!1,error:{code:"BASELINE_NOT_FOUND",message:`No baseline found for suite "${n}" at ${r}`}}}},write(n,r){try{(0,D.mkdirSync)(t,{recursive:!0});let o=(0,ae.join)(t,`${Ge(n)}.json`);return(0,D.writeFileSync)(o,r,"utf-8"),{success:!0,data:void 0}}catch(o){return{success:!1,error:{code:"UNKNOWN_ERROR",message:`Failed to write baseline: ${o instanceof Error?o.message:String(o)}`}}}},list(){try{return(0,D.mkdirSync)(t,{recursive:!0}),{success:!0,data:(0,D.readdirSync)(t).filter(o=>o.endsWith(".json")).map(o=>o.replace(/\.json$/,""))}}catch(n){return{success:!1,error:{code:"UNKNOWN_ERROR",message:`Failed to list baselines: ${n instanceof Error?n.message:String(n)}`}}}}}}function We(e){let t=e.command("baseline").description("Manage test baselines");t.command("set").description("Save current results as baseline").option("-c, --config <path>","Path to config file","kindlm.yaml").option("--runs <count>","Override run count").action(async n=>{try{let r=(0,k.dirname)((0,k.resolve)(process.cwd(),n.config)),o=(0,k.join)(r,".kindlm"),a=ce(o),{config:i,runnerResult:s}=await V({configPath:n.config,runs:n.runs?parseInt(n.runs,10):void 0}),{aggregated:c}=s,m=(0,S.buildBaselineData)(i.suite.name,c,new Date().toISOString()),u=(0,S.writeBaseline)(m,a);u.success||(console.error(p.default.red(`Failed to save baseline: ${u.error.message}`)),process.exit(1));let f=Object.keys(m.results).length;console.log(""),console.log(p.default.green(`Baseline saved for suite "${i.suite.name}" (${f} test${f===1?"":"s"})`)),console.log(p.default.dim(` Location: ${o}/baselines/`)),process.exit(0)}catch(r){console.error(p.default.red(`Error: ${r instanceof Error?r.message:String(r)}`)),process.exit(1)}}),t.command("compare").description("Compare latest against baseline").option("-c, --config <path>","Path to config file","kindlm.yaml").option("--runs <count>","Override run count").action(async n=>{try{let r=(0,k.dirname)((0,k.resolve)(process.cwd(),n.config)),o=(0,k.join)(r,".kindlm"),a=ce(o),i=(0,k.resolve)(process.cwd(),n.config),s;try{s=(0,Je.readFileSync)(i,"utf-8")}catch{console.error(p.default.red(`Config file not found: ${i}`)),process.exit(1)}let c=H(),m=(0,S.parseConfig)(s,{configDir:r,fileReader:c});m.success||(console.error(p.default.red(`Config validation failed: ${m.error.message}`)),process.exit(1));let u=m.data.suite.name,f=(0,S.readBaseline)(u,a);f.success||(f.error.code==="BASELINE_NOT_FOUND"?console.error(p.default.red(`No baseline found for suite "${u}". Run \`kindlm baseline set\` first.`)):console.error(p.default.red(`Failed to read baseline: ${f.error.message}`)),process.exit(1));let y=f.data,{runnerResult:d}=await V({configPath:n.config,runs:n.runs?parseInt(n.runs,10):void 0,baselineData:y}),{aggregated:C}=d,b=(0,S.buildBaselineData)(u,C,new Date().toISOString()),h=(0,S.compareBaseline)(y,b.results);if(console.log(""),console.log(p.default.bold(`Baseline comparison for "${u}"`)),console.log(p.default.dim(` Baseline from: ${y.createdAt}`)),console.log(""),h.regressions.length>0){console.log(p.default.red.bold(` Regressions (${h.regressions.length}):`));for(let l of h.regressions)console.log(p.default.red(` ${l.testName}: ${Z(l.baselinePassRate)} \u2192 ${Z(l.currentPassRate)}`)),l.newFailureCodes.length>0&&console.log(p.default.red(` New failures: ${l.newFailureCodes.join(", ")}`));console.log("")}if(h.improvements.length>0){console.log(p.default.green.bold(` Improvements (${h.improvements.length}):`));for(let l of h.improvements)console.log(p.default.green(` ${l.testName}: ${Z(l.baselinePassRate)} \u2192 ${Z(l.currentPassRate)}`));console.log("")}if(h.unchanged.length>0){console.log(p.default.dim(` Unchanged (${h.unchanged.length}):`));for(let l of h.unchanged)console.log(p.default.dim(` ${l.testName}: ${Z(l.passRate)}`));console.log("")}if(h.newTests.length>0){console.log(p.default.cyan(` New tests (${h.newTests.length}):`));for(let l of h.newTests)console.log(p.default.cyan(` ${l}`));console.log("")}if(h.removedTests.length>0){console.log(p.default.yellow(` Removed tests (${h.removedTests.length}):`));for(let l of h.removedTests)console.log(p.default.yellow(` ${l}`));console.log("")}process.exit(h.regressions.length>0?1:0)}catch(r){console.error(p.default.red(`Error: ${r instanceof Error?r.message:String(r)}`)),process.exit(1)}}),t.command("list").description("List saved baselines").option("-c, --config <path>","Path to config file","kindlm.yaml").action(n=>{try{let r=(0,k.dirname)((0,k.resolve)(process.cwd(),n.config)),o=(0,k.join)(r,".kindlm"),a=ce(o),i=(0,S.listBaselines)(a);i.success||(console.error(p.default.red(`Failed to list baselines: ${i.error.message}`)),process.exit(1));let s=i.data;s.length===0&&(console.log(p.default.dim("No baselines saved yet. Run `kindlm baseline set` to create one.")),process.exit(0)),console.log(p.default.bold("Saved baselines:")),console.log("");for(let c of s){let m=a.read(c);if(!m.success){console.log(` ${c} ${p.default.dim("(unreadable)")}`);continue}let u=(0,S.deserializeBaseline)(m.data);if(!u.success){console.log(` ${c} ${p.default.dim("(corrupt)")}`);continue}let f=Object.keys(u.data.results).length;console.log(` ${p.default.cyan(u.data.suiteName)} \u2014 ${f} test${f===1?"":"s"}, saved ${p.default.dim(u.data.createdAt)}`)}process.exit(0)}catch(r){console.error(p.default.red(`Error: ${r instanceof Error?r.message:String(r)}`)),process.exit(1)}})}function Z(e){return`${(e*100).toFixed(1)}%`}var qe=require("readline"),Ve=require("stream"),A=$(require("chalk"),1);var O=require("fs"),pe=require("path"),he=require("os");function ye(){return(0,pe.join)((0,he.homedir)(),".kindlm","credentials")}function le(){try{let e=(0,O.readFileSync)(ye(),"utf-8"),t=JSON.parse(e);return typeof t.token=="string"&&t.token.length>0?t.token:null}catch{return null}}function Ye(e){let t=ye(),n=(0,pe.join)((0,he.homedir)(),".kindlm");(0,O.mkdirSync)(n,{recursive:!0,mode:448});let r={token:e,savedAt:new Date().toISOString()};(0,O.writeFileSync)(t,JSON.stringify(r,null,2),{mode:384}),(0,O.chmodSync)(t,384)}function Xe(){try{(0,O.unlinkSync)(ye())}catch{}}var Mt="https://api.kindlm.com";var j=class extends Error{status;constructor(t,n){super(n),this.name="CloudApiError",this.status=t}};function Y(){let e=process.env.KINDLM_CLOUD_URL??Mt;if(e.startsWith("http://")&&!Nt(e))throw new Error(`Refusing to use insecure HTTP for Cloud API: ${e}. Use HTTPS or target localhost for development.`);return e}function Nt(e){try{let t=new URL(e);return t.hostname==="localhost"||t.hostname==="127.0.0.1"||t.hostname==="::1"}catch{return!1}}function Lt(e){return new Promise(t=>setTimeout(t,e))}function Q(e,t){async function n(r,o,a){let i=`${e}${o}`,s={Authorization:`Bearer ${t}`},c={method:r,headers:s};a!==void 0&&(s["Content-Type"]="application/json",c.body=JSON.stringify(a));let m;for(let u=0;u<=1;u++){u>0&&await Lt(1e3);let f=new AbortController,y=setTimeout(()=>f.abort(),3e4);c.signal=f.signal;try{let d=await fetch(i,c);if(!d.ok){if(d.status>=500&&u<1){m=new j(d.status,`HTTP ${d.status}`);continue}let b=`HTTP ${d.status}`;if((d.headers.get("content-type")??"").includes("application/json"))try{let l=await d.json();l.error&&(b=l.error)}catch{}throw new j(d.status,b)}if(d.status===204)return;let C=d.headers.get("content-type")??"";if(!C.includes("application/json"))throw new j(d.status,`Expected JSON response but got content-type: ${C}`);return await d.json()}catch(d){if(d instanceof j)throw d;if(m=d instanceof Error?d:new Error(String(d)),u<1)continue}finally{clearTimeout(y)}}throw m??new Error("Request failed")}return{baseUrl:e,get:r=>n("GET",r),post:(r,o)=>n("POST",r,o),patch:(r,o)=>n("PATCH",r,o),delete:r=>n("DELETE",r)}}function Ze(e){e.command("login").description("Authenticate with KindLM Cloud").option("-t, --token <token>","API token (skips interactive prompt)").option("--status","Show current authentication status").option("--logout","Remove stored credentials").action(async t=>{try{if(t.logout){Xe(),console.log(A.default.green("Logged out. Credentials removed."));return}if(t.status){await _t();return}let n=t.token??process.env.KINDLM_API_TOKEN??await Dt();n.startsWith("klm_")||(console.error(A.default.red('Invalid token format. KindLM tokens start with "klm_".')),process.exit(1));let r=Q(Y(),n);try{await r.get("/v1/auth/tokens")}catch(o){throw o instanceof j&&o.status===401&&(console.error(A.default.red("Invalid or expired token.")),process.exit(1)),o}Ye(n),console.log(A.default.green("Authenticated successfully. Token saved."))}catch(n){console.error(A.default.red(`Login failed: ${n instanceof Error?n.message:String(n)}`)),process.exit(1)}})}async function _t(){let e=le();if(!e){console.log(A.default.yellow('Not authenticated. Run "kindlm login" to authenticate.'));return}let t=Q(Y(),e);try{await t.get("/v1/auth/tokens"),console.log(A.default.green("Authenticated.")),console.log(` Cloud URL: ${Y()}`)}catch(n){n instanceof j&&n.status===401?console.log(A.default.yellow('Stored token is invalid or expired. Run "kindlm login" to re-authenticate.')):console.log(A.default.yellow(`Cannot reach Cloud API: ${n instanceof Error?n.message:String(n)}`))}}function Dt(){return new Promise((e,t)=>{let n=new Ve.Writable({write(o,a,i){i()}});process.stderr.write("Paste your KindLM API token: ");let r=(0,qe.createInterface)({input:process.stdin,output:n,terminal:!0});r.question("",o=>{r.close(),process.stderr.write(`
51
- `);let a=o.trim();if(!a){t(new Error("No token provided"));return}e(a)})})}var nt=require("path"),ot=require("child_process"),me=$(require("chalk"),1);var de=require("child_process");function Qe(){try{let e=(0,de.execSync)("git rev-parse HEAD",{encoding:"utf-8"}).trim()||null,t=(0,de.execSync)("git rev-parse --abbrev-ref HEAD",{encoding:"utf-8"}).trim()||null,r=(0,de.execSync)("git status --porcelain",{encoding:"utf-8"}).trim().length>0;return{commitSha:e,branch:t,dirty:r}}catch{return{commitSha:null,branch:null,dirty:!1}}}function et(){return process.env.GITHUB_ACTIONS?{name:"github_actions",isCI:!0,commitSha:process.env.GITHUB_SHA??null,branch:process.env.GITHUB_REF_NAME??null}:process.env.GITLAB_CI?{name:"gitlab_ci",isCI:!0,commitSha:process.env.CI_COMMIT_SHA??null,branch:process.env.CI_COMMIT_BRANCH??null}:process.env.CI?{name:null,isCI:!0,commitSha:null,branch:null}:{name:null,isCI:!1,commitSha:null,branch:null}}function ee(e){return encodeURIComponent(e)}async function tt(e,t,n){let r=await jt(e,n.projectName),o=await Ft(e,r,n.suiteName,n.configHash),a=await e.post(`/v1/runs/${ee(r)}/runs`,{suiteId:o,commitSha:n.commitSha,branch:n.branch,environment:n.environment,triggeredBy:n.triggeredBy}),i=Ht(t.aggregated),s=50;for(let l=0;l<i.length;l+=s){let g=i.slice(l,l+s);await e.post(`/v1/results/${ee(a.id)}/results`,{results:g})}let{runResult:c}=t,m=c.totalTests>0?c.passed/c.totalTests:0,u=new Set(t.aggregated.map(l=>l.modelId)),f=t.aggregated.map(l=>l.assertionScores.judge?.mean).filter(l=>l!==void 0),y=f.length>0?f.reduce((l,g)=>l+g,0)/f.length:void 0,d=t.aggregated.map(l=>l.latencyAvgMs),C=d.length>0?d.reduce((l,g)=>l+g,0)/d.length:void 0,b=t.aggregated.reduce((l,g)=>l+g.totalCostUsd,0),h=b>0?b:void 0;return await e.patch(`/v1/runs/${ee(a.id)}`,{status:"completed",passRate:m,testCount:c.totalTests,modelCount:u.size,judgeAvgScore:y,latencyAvgMs:C,costEstimateUsd:h,finishedAt:new Date().toISOString()}),{runId:a.id,projectId:r}}async function jt(e,t){let{projects:n}=await e.get("/v1/projects"),r=n.find(a=>a.name===t);return r?r.id:(await e.post("/v1/projects",{name:t})).id}async function Ft(e,t,n,r){let{suites:o}=await e.get(`/v1/suites/${ee(t)}/suites`),a=o.find(s=>s.name===n);return a?a.id:(await e.post(`/v1/suites/${ee(t)}/suites`,{name:n,configHash:r})).id}function Ht(e){return e.map(t=>{let n=t,r=t.runs.length>0?t.runs.flatMap(o=>o.assertions.filter(a=>!a.passed).map(a=>a.failureMessage)).filter(o=>o!==void 0):n.failureMessages??[];return{testCaseName:t.testCaseName,modelId:t.modelId,passed:t.passed?1:0,passRate:t.passRate,runCount:t.runCount,judgeAvg:t.assertionScores.judge?.mean??null,driftScore:t.assertionScores.drift?.mean??null,latencyAvgMs:t.latencyAvgMs??null,costUsd:t.totalCostUsd??null,totalTokens:t.totalTokens??null,failureCodes:t.failureCodes.length>0?JSON.stringify(t.failureCodes):null,failureMessages:r.length>0?JSON.stringify(r):null,assertionScores:Object.keys(t.assertionScores).length>0?JSON.stringify(t.assertionScores):null}})}function rt(e){e.command("upload").description("Push last run results to KindLM Cloud").option("-t, --token <token>","API token (overrides stored token)").option("-p, --project <name>","Project name").action(async t=>{try{let n=t.token??process.env.KINDLM_API_TOKEN??le();n||(console.error(me.default.red('Not authenticated. Run "kindlm login" first or pass --token.')),process.exit(1));let r=Le();r||(console.error(me.default.red('No test run found. Run "kindlm test" first.')),process.exit(1));let o=Qe(),a=et(),i=t.project??Ut(),s=Q(Y(),n),c=G();c.start("Uploading results to KindLM Cloud...");try{let m=await tt(s,r.runnerResult,{projectName:i,suiteName:r.suiteName,configHash:r.configHash,commitSha:a.commitSha??o.commitSha??void 0,branch:a.branch??o.branch??void 0,environment:a.isCI?"ci":"local",triggeredBy:a.name??"local"});c.succeed("Uploaded successfully."),console.log(` Run ID: ${m.runId}`),console.log(` Project: ${i}`),console.log(` Suite: ${r.suiteName}`)}catch(m){throw c.fail("Upload failed."),m}}catch(n){console.error(me.default.red(`Upload failed: ${n instanceof Error?n.message:String(n)}`)),process.exit(1)}})}function Bt(e){try{let r=new URL(e).pathname.split("/").filter(Boolean),o=r[r.length-1];if(o)return o.replace(/\.git$/,"")}catch{}let t=e.match(/^[\w.-]+@[\w.-]+:(.+?)(?:\.git)?$/);if(t?.[1]){let n=t[1].split("/");return n[n.length-1]??null}return null}function Ut(){try{let e=(0,ot.execSync)("git remote get-url origin",{encoding:"utf-8"}).trim(),t=Bt(e);if(t)return t}catch{}return(0,nt.basename)(process.cwd())}var ue=require("fs"),fe=require("path"),ct=require("child_process"),x=$(require("chalk"),1),T=require("@kindlm/core");var st=require("http"),it=require("@kindlm/core");function at(e){let t=[],n=null,r=[];function o(){for(let i of r)i()}function a(i,s){if(s.setHeader("Access-Control-Allow-Origin","*"),s.setHeader("Access-Control-Allow-Methods","POST, OPTIONS"),s.setHeader("Access-Control-Allow-Headers","Content-Type"),i.method==="OPTIONS"){s.writeHead(204),s.end();return}if(i.method!=="POST"||i.url!=="/v1/traces"){s.writeHead(404,{"Content-Type":"application/json"}),s.end(JSON.stringify({error:"Not found"}));return}let c=[];i.on("data",m=>c.push(m)),i.on("end",()=>{try{let m=Buffer.concat(c).toString("utf-8"),u=JSON.parse(m),f=(0,it.parseOtlpPayload)(u);f.success?(t.push(...f.data),o(),s.writeHead(200,{"Content-Type":"application/json"}),s.end(JSON.stringify({partialSuccess:{}}))):(s.writeHead(400,{"Content-Type":"application/json"}),s.end(JSON.stringify({error:f.error.message})))}catch{s.writeHead(400,{"Content-Type":"application/json"}),s.end(JSON.stringify({error:"Invalid JSON"}))}})}return{start(){return new Promise((i,s)=>{n=(0,st.createServer)(a),n.on("error",s),n.listen(e,()=>i())})},stop(){return new Promise(i=>{n?n.close(()=>i()):i()})},getSpans(){return[...t]},waitForSpans({timeoutMs:i}){return new Promise(s=>{if(t.length>0){s([...t]);return}let c=setTimeout(()=>{r=r.filter(u=>u!==m),s([...t])},i),m=()=>{clearTimeout(c),r=r.filter(u=>u!==m),setTimeout(()=>s([...t]),500)};r.push(m)})}}}function lt(e){e.command("trace").description("Ingest OpenTelemetry traces and run assertions against them").option("-c, --config <path>","Config file path","kindlm.yaml").option("--port <port>","OTLP HTTP port","4318").option("--command <cmd>","Command to spawn (traces are collected while it runs)").option("--timeout <ms>","Timeout in ms to wait for traces","30000").option("--reporter <type>","Report format: pretty, json, junit","pretty").action(async t=>{let n=G();try{let r=(0,fe.resolve)(process.cwd(),t.config),o=(0,fe.dirname)(r);try{(0,ue.statSync)(r).size>1048576&&(console.error(x.default.red("Config file exceeds 1MB limit")),process.exit(1))}catch{console.error(x.default.red(`Config file not found: ${r}`)),process.exit(1)}let a;try{a=(0,ue.readFileSync)(r,"utf-8")}catch{console.error(x.default.red(`Config file not found: ${r}`)),process.exit(1)}let i=H(),s=(0,T.parseConfig)(a,{configDir:o,fileReader:i});s.success||(console.error(x.default.red(`Config validation failed: ${s.error.message}`)),process.exit(1));let c=s.data,m=c.trace??{port:parseInt(t.port,10),timeoutMs:parseInt(t.timeout,10),spanMapping:{outputTextAttr:"gen_ai.completion.0.content",modelAttr:"gen_ai.response.model",systemAttr:"gen_ai.system",inputTokensAttr:"gen_ai.usage.input_tokens",outputTokensAttr:"gen_ai.usage.output_tokens"}},u=parseInt(t.port,10)||m.port,f=parseInt(t.timeout,10)||m.timeoutMs,y=at(u);await y.start(),n.start(`Listening for OTLP traces on port ${u}...`),t.command&&(0,ct.spawn)("sh",["-c",t.command],{cwd:o,env:{...process.env,OTEL_EXPORTER_OTLP_ENDPOINT:`http://localhost:${u}`,OTEL_EXPORTER_OTLP_PROTOCOL:"http/json"},stdio:"inherit"}).on("error",R=>{n.fail(`Command failed: ${R.message}`)});let d=await y.waitForSpans({timeoutMs:f});await y.stop(),d.length===0&&(n.fail("No traces received"),process.exit(1)),n.succeed(`Received ${d.length} spans`);let C=(0,T.filterSpans)(d,m.spanFilter),b=(0,T.mapSpansToResult)(C,m.spanMapping),h=re(),l=new Map,g=c.providers;for(let[v,R]of Object.entries(g)){if(!R)continue;let M="";if(R.apiKeyEnv){let N=process.env[R.apiKeyEnv];N&&(M=N.trim())}if(!(!M&&v!=="ollama"))try{let N=(0,T.createProvider)(v,h);await N.initialize({apiKey:M,baseUrl:R.baseUrl,organization:R.organization,timeoutMs:c.defaults.timeoutMs,maxRetries:2}),l.set(v,N)}catch{}}let I=c.defaults.judgeModel??c.models[0]?.id,U=c.models.find(v=>v.id===I),X=U?l.get(U.provider):void 0,F=(0,T.buildContextFromTrace)(b,{configDir:o,judgeAdapter:X,judgeModel:U?.model}),K=[];for(let v of c.tests){if(v.skip)continue;let R=(0,T.createAssertionsFromExpect)(v.expect),M=[];for(let N of R){let L=await N.evaluate(F);M.push(...L)}K.push({testName:v.name,assertions:M})}let Ce=K.reduce((v,R)=>v+R.assertions.length,0),ve=K.reduce((v,R)=>v+R.assertions.filter(M=>M.passed).length,0),Re=Ce-ve;console.log(),console.log(x.default.bold("Trace Test Results")),console.log(x.default.dim("\u2500".repeat(50)));for(let{testName:v,assertions:R}of K){let N=R.every(L=>L.passed)?x.default.green("\u2713"):x.default.red("\u2717");console.log(`${N} ${v}`);for(let L of R){let mt=L.passed?x.default.green(" \u2713"):x.default.red(" \u2717"),ut=L.failureMessage?`${L.label}: ${L.failureMessage}`:L.label;console.log(`${mt} ${ut}`)}}console.log(),console.log(`${x.default.bold("Total:")} ${ve} passed, ${Re} failed out of ${Ce} assertions`);let we=(0,T.evaluateGates)(c.gates,[]);if(!we.passed)for(let v of we.gates.filter(R=>!R.passed))console.log(x.default.red(`Gate failed: ${v.message}`));process.exit(Re>0?1:0)}catch(r){n.fail(`Trace command failed: ${r instanceof Error?r.message:String(r)}`),process.exit(1)}})}function Kt(){let e=new dt.Command;return e.name("kindlm").description("AI agent behavioral regression testing").version("0.4.0"),be(e),Ie(e),ze(e),We(e),Ze(e),rt(e),lt(e),e}0&&(module.exports={createProgram});
45
+ `;function xe(e){e.command("init").description("Create a kindlm.yaml template").option("--force","Overwrite existing kindlm.yaml").action(t=>{let o=(0,Ee.resolve)(process.cwd(),"kindlm.yaml");(0,oe.existsSync)(o)&&!t.force&&(console.error(_.default.red("kindlm.yaml already exists. Use --force to overwrite.")),process.exit(1));try{(0,oe.writeFileSync)(o,xt,"utf-8")}catch(n){let r=n instanceof Error&&"code"in n?n.code:void 0;console.error(r==="EACCES"||r==="EROFS"?_.default.red("Cannot create kindlm.yaml: permission denied"):_.default.red(`Cannot create kindlm.yaml: ${n instanceof Error?n.message:String(n)}`)),process.exit(1)}console.log(_.default.green("Created kindlm.yaml")),console.log(""),console.log("Next steps:"),console.log(` 1. Edit ${_.default.bold("kindlm.yaml")} with your test configuration`),console.log(` 2. Set your API key: ${_.default.bold("export OPENAI_API_KEY=sk-...")}`),console.log(` 3. Run tests: ${_.default.bold("kindlm test")}`)})}var Pe=require("fs"),se=require("path"),O=E(require("chalk"),1),$e=require("@kindlm/core");var Ie=require("fs");function D(){return{readFile(e){try{return{success:!0,data:(0,Ie.readFileSync)(e,"utf-8")}}catch(t){return{success:!1,error:{code:"CONFIG_FILE_REF_ERROR",message:`Cannot read file: ${e}: ${t instanceof Error?t.message:String(t)}`}}}}}}function Oe(e){e.command("validate").description("Validate kindlm.yaml configuration").option("-c, --config <path>","Path to config file","kindlm.yaml").action(t=>{let o=(0,se.resolve)(process.cwd(),t.config),n=(0,se.dirname)(o),r;try{r=(0,Pe.readFileSync)(o,"utf-8")}catch{console.error(O.default.red(`Config file not found: ${o}`)),process.exit(1)}let a=D(),s=(0,$e.parseConfig)(r,{configDir:n,fileReader:a});if(!s.success){console.error(O.default.red("Validation failed:"));let c=s.error.details;if(c&&Array.isArray(c.errors))for(let f of c.errors)console.error(O.default.red(` - ${f}`));else console.error(O.default.red(` ${s.error.message}`));process.exit(1)}let i=s.data;console.log(O.default.green("Config is valid!")),console.log(""),console.log(` Suite: ${O.default.bold(i.suite.name)}`),console.log(` Tests: ${O.default.bold(String(i.tests.length))}`),console.log(` Models: ${O.default.bold(String(i.models.length))}`)})}var j=E(require("chalk"),1),J=require("@kindlm/core");var ae=require("fs"),ce=require("path"),v=E(require("chalk"),1),K=require("@kindlm/core");var Me=require("@kindlm/core");function ie(){return{async fetch(e,t){let o=new AbortController,n=t.timeoutMs?setTimeout(()=>o.abort(),t.timeoutMs):void 0;try{let r=await globalThis.fetch(e,{method:t.method,headers:t.headers,body:t.body,signal:o.signal});return{ok:r.ok,status:r.status,json:()=>r.json()}}catch(r){throw r instanceof DOMException&&r.name==="AbortError"||r instanceof Error&&r.name==="AbortError"?new Me.ProviderError("TIMEOUT","Request timed out",408,!0):r}finally{n!==void 0&&clearTimeout(n)}}}}var Ae=E(require("ora"),1);function U(){let e;return{start(t){e=(0,Ae.default)({text:t,stream:process.stderr}).start()},succeed(t){e?.succeed(t),e=void 0},fail(t){e?.fail(t),e=void 0},stop(){e?.stop(),e=void 0}}}var Ne=require("child_process"),Y=require("@kindlm/core");function Le(){return{async execute(e,t){return new Promise(o=>{let n=(0,Ne.spawn)("sh",["-c",e],{cwd:t.cwd,env:{...process.env,...t.env},stdio:["ignore","pipe","pipe"]}),r=[],a=[];n.stdout.on("data",i=>r.push(i)),n.stderr.on("data",i=>a.push(i));let s=setTimeout(()=>{n.kill("SIGTERM"),setTimeout(()=>{n.killed||n.kill("SIGKILL")},1e3)},t.timeoutMs);n.on("close",(i,c)=>{if(clearTimeout(s),c==="SIGTERM"||c==="SIGKILL"){o((0,Y.err)({code:"PROVIDER_TIMEOUT",message:`Command timed out after ${t.timeoutMs}ms`}));return}o((0,Y.ok)({stdout:Buffer.concat(r).toString("utf-8"),stderr:Buffer.concat(a).toString("utf-8"),exitCode:i??1}))}),n.on("error",i=>{clearTimeout(s),o((0,Y.err)({code:"UNKNOWN_ERROR",message:`Failed to spawn command: ${i.message}`}))})})}}}var It=1048576;async function X(e){let t=U(),o=!1,n=()=>{o&&process.exit(130),o=!0,t.stop(),console.error(v.default.yellow(`
46
+ Interrupted. Exiting...`)),process.exit(130)};process.on("SIGINT",n);try{return await Pt(e,t)}finally{process.removeListener("SIGINT",n)}}async function Pt(e,t){let o=(0,ce.resolve)(process.cwd(),e.configPath),n=(0,ce.dirname)(o);try{let u=(0,ae.statSync)(o);u.size>It&&(console.error(v.default.red(`Config file exceeds 1MB limit (${(u.size/1048576).toFixed(1)}MB): ${o}`)),process.exit(1))}catch{console.error(v.default.red(`Config file not found: ${o}`)),process.exit(1)}let r;try{r=(0,ae.readFileSync)(o,"utf-8")}catch{console.error(v.default.red(`Config file not found: ${o}`)),process.exit(1)}let a=D(),s=(0,K.parseConfig)(r,{configDir:n,fileReader:a});if(!s.success){console.error(v.default.red("Config validation failed:"));let u=s.error.details;if(u&&Array.isArray(u.errors))for(let T of u.errors)console.error(v.default.red(` - ${T}`));else console.error(v.default.red(` ${s.error.message}`));process.exit(1)}let i=s.data;e.suite!==void 0&&i.suite.name!==e.suite&&(console.error(v.default.red(`Suite "${e.suite}" not found. Available suite: "${i.suite.name}"`)),process.exit(1)),e.runs!==void 0&&((!Number.isInteger(e.runs)||e.runs<1)&&(console.error(v.default.red(`Invalid --runs value: ${e.runs}. Must be a positive integer (>= 1).`)),process.exit(1)),i.defaults.repeat=e.runs),e.gate!==void 0&&((Number.isNaN(e.gate)||e.gate<0||e.gate>100)&&(console.error(v.default.red(`Invalid --gate value: ${e.gate}. Must be between 0 and 100.`)),process.exit(1)),e.gate>0&&e.gate<=1&&console.error(v.default.yellow(`Warning: --gate ${e.gate} looks like a decimal. Did you mean --gate ${Math.round(e.gate*100)}? (--gate uses 0-100 scale)`)),i.gates?i.gates.passRateMin=e.gate/100:i.gates={passRateMin:e.gate/100});let c=ie(),f=new Map,m=i.providers;for(let[u,T]of Object.entries(m)){if(!T)continue;let ee="";if(T.apiKeyEnv){let N=process.env[T.apiKeyEnv];N||(console.error(v.default.red(`Missing environment variable: ${T.apiKeyEnv}`)),process.exit(1)),ee=N.trim()}else u!=="ollama"&&(console.error(v.default.red(`Provider "${u}" requires apiKeyEnv to be configured`)),process.exit(1));let F;try{F=(0,K.createProvider)(u,c)}catch(N){let Re=N instanceof Error?N.message:String(N);console.error(v.default.red(`Failed to create provider "${u}": ${Re}`)),process.exit(1)}await F.initialize({apiKey:ee,baseUrl:T.baseUrl,organization:T.organization,timeoutMs:i.defaults.timeoutMs,maxRetries:2}),f.set(u,F)}let p=0,g=$t(i),d=u=>{u.type==="test_start"?t.start(`Running ${u.test} [${u.model}] (${p}/${g})`):u.type==="test_complete"&&p++},S=i.tests.some(u=>u.command)?Le():void 0,l=await(0,K.createRunner)(i,{adapters:f,configDir:n,fileReader:a,onProgress:d,baselineData:e.baselineData,commandExecutor:S}).run();return t.stop(),l.success||(console.error(v.default.red(`Run failed: ${l.error.message}`)),process.exit(1)),{config:i,runnerResult:l.data,configDir:n,yamlContent:r}}function $t(e){let t=0;for(let o of e.tests){if(o.skip)continue;let n=o.repeat??e.defaults.repeat;if(o.command)t+=n;else{let r=o.models?.length??e.models.length;t+=r*n}}return t}var z=require("fs"),Ce=require("path"),_e=require("crypto");function De(){return(0,Ce.join)(process.cwd(),".kindlm","last-run.json")}function je(e){let t=De(),o=(0,Ce.join)(process.cwd(),".kindlm");(0,z.mkdirSync)(o,{recursive:!0,mode:448});let n={...e,runnerResult:{...e.runnerResult,aggregated:e.runnerResult.aggregated.map(r=>{let a=r.runs.flatMap(s=>s.assertions.filter(i=>!i.passed).map(i=>i.failureMessage)).filter(s=>s!==void 0);return{...r,failureMessages:a,runs:[]}})}};(0,z.writeFileSync)(t,JSON.stringify(n),{mode:384})}function He(){try{let e=(0,z.readFileSync)(De(),"utf-8"),t=JSON.parse(e);return t.runnerResult?.runResult&&Array.isArray(t.runnerResult.aggregated)&&typeof t.suiteName=="string"&&typeof t.configHash=="string"&&typeof t.timestamp=="string"?t:null}catch{return null}}function Fe(e){return(0,_e.createHash)("sha256").update(e).digest("hex")}var Ue=E(require("pdfkit"),1),Ke=require("fs"),ze=require("fs/promises"),Ge=require("path");function Ot(e){let t=e.split(`
47
+ `),o=[],n="",r=2,a=[];for(let s of t){let i=s.match(/^(#{2,4})\s+(.+)$/);i&&i[1]?.length===2?((n||a.length>0)&&o.push({heading:n,headingLevel:r,body:a.join(`
48
+ `).trim()}),n=i[2]?.trim()??"",r=2,a=[]):a.push(s)}return(n||a.length>0)&&o.push({heading:n,headingLevel:r,body:a.join(`
49
+ `).trim()}),o}function Mt(e){return e.match(/^# (.+)$/m)?.[1]?.trim()??"KindLM Compliance Report"}function At(e){return e.match(/SHA-256:\s*`([a-f0-9]+)`/i)?.[1]??null}function Be(e){switch(e){case 2:return 18;case 3:return 15;case 4:return 13;default:return 13}}function Nt(e){if(e.length<2)return null;let t=e[0]??"",o=e[1]??"";if(!t.includes("|")||!o.match(/^\s*\|[-:\s|]+\|\s*$/))return null;let n=s=>s.split("|").slice(1,-1).map(i=>i.trim()),r={cells:n(t)},a=[];for(let s=2;s<e.length;s++){let i=e[s]??"";if(!i.includes("|"))break;a.push({cells:n(i)})}return{header:r,rows:a}}function B(e,t){let o=e.page.margins.bottom;e.page.height-o-30-e.y<t&&(e.addPage(),Je(e))}function Je(e){let t=new Date().toISOString();e.fontSize(8).font("Helvetica").fillColor("#a8a29e").text("KindLM Compliance Report",60,40),e.text(t,60,40,{align:"right"}),e.moveDown(3)}function Lt(e){e.fontSize(8).font("Helvetica").fillColor("#a8a29e").text("Generated by KindLM \xB7 kindlm.com",60,e.page.height-50,{align:"center",width:e.page.width-120})}function _t(e,t,o){let n=t.header.cells.length,r=o/n,a=e.page.margins.left,s=18;B(e,s*2);let i=(c,f,m)=>{let p=e.y;m&&(e.save(),e.rect(a,p-2,o,s).fill(m),e.restore());for(let g=0;g<c.length;g++){let d=a+g*r;e.fontSize(8).font(f?"Helvetica-Bold":"Courier").fillColor("#44403c").text(c[g]??"",d+4,p,{width:r-8,height:s,lineBreak:!1})}e.y=p+s};i(t.header.cells,!0,"#f5f5f4");for(let c of t.rows)B(e,s),i(c.cells,!1)}async function We(e,t){return await(0,ze.mkdir)((0,Ge.dirname)(t),{recursive:!0}),new Promise((o,n)=>{let r=new Ue.default({size:"A4",margins:{top:72,bottom:72,left:60,right:60},info:{Title:"KindLM EU AI Act Compliance Report",Author:"KindLM",Creator:"KindLM CLI"}}),a=(0,Ke.createWriteStream)(t);r.pipe(a);let s=r.page.width-r.page.margins.left-r.page.margins.right,i=Mt(e),c=At(e);r.moveDown(6),r.fontSize(28).font("Helvetica-Bold").fillColor("#1c1917").text(i,{align:"center",width:s}),r.moveDown(.5),r.fontSize(14).font("Helvetica").fillColor("#57534e").text("EU AI Act Annex IV Documentation",{align:"center",width:s}),r.moveDown(1),r.fontSize(10).fillColor("#a8a29e").text(`Generated: ${new Date().toISOString()}`,{align:"center",width:s}),c&&(r.moveDown(.3),r.fontSize(9).font("Courier").fillColor("#78716c").text(`SHA-256: ${c}`,{align:"center",width:s})),r.moveDown(2),r.fontSize(10).font("Helvetica").fillColor("#6366f1").text("kindlm.com",{align:"center",link:"https://kindlm.com",width:s});let f=Ot(e);for(let m of f){if(r.addPage(),Je(r),m.heading){let y=Be(m.headingLevel);r.fontSize(y).font("Helvetica-Bold").fillColor("#1c1917").text(m.heading,{width:s}),r.moveDown(.5),r.moveTo(60,r.y).lineTo(60+s,r.y).strokeColor("#e7e5e4").lineWidth(1).stroke(),r.moveDown(.8)}let p=m.body.split(`
50
+ `),g=!1,d=0;for(;d<p.length;){let y=p[d]??"";if(y.startsWith("```")){g=!g,g&&B(r,30),d++;continue}if(g){B(r,14);let l=r.y;r.save(),r.rect(r.page.margins.left,l-2,s,14).fill("#f5f5f4"),r.restore(),r.fontSize(9).font("Courier").fillColor("#44403c").text(y,{width:s}),d++;continue}if(!y.trim()){r.moveDown(.4),d++;continue}let S=p[d+1]??"";if(y.includes("|")&&d+1<p.length&&S.match(/^\s*\|[-:\s|]+\|\s*$/)){let l=[],u=d;for(;u<p.length&&(p[u]??"").includes("|");)l.push(p[u]??""),u++;let T=Nt(l);if(T){_t(r,T,s),d=u;continue}}if(y.match(/^\s*\|[-:]+/)||y.match(/^---+$/)){d++;continue}let R=y.match(/^(#{3,4})\s+(.+)$/);if(R?.[1]&&R[2]){let l=R[1].length,u=Be(l);B(r,u+10),r.moveDown(.3),r.fontSize(u).font("Helvetica-Bold").fillColor("#1c1917").text(R[2].trim(),{width:s}),r.moveDown(.3),d++;continue}if(y.match(/^\s*[-*] /)){B(r,14),r.fontSize(10).font("Helvetica").fillColor("#44403c").text(y.trim(),{indent:12,width:s-12}),d++;continue}B(r,14),r.fontSize(10).font("Helvetica").fillColor("#44403c").text(y.trim(),{width:s}),d++}Lt(r)}r.end(),a.on("finish",()=>o(t)),a.on("error",n)})}var I=E(require("chalk"),1),G=require("@kindlm/core"),Dt={bold:e=>I.default.bold(e),red:e=>I.default.red(e),green:e=>I.default.green(e),yellow:e=>I.default.yellow(e),cyan:e=>I.default.cyan(e),dim:e=>I.default.dim(e),greenBold:e=>I.default.green.bold(e),redBold:e=>I.default.red.bold(e)},jt=["pretty","json","junit"];function le(e){switch(e){case"json":return(0,G.createJsonReporter)();case"junit":return(0,G.createJunitReporter)();case"pretty":return(0,G.createPrettyReporter)(Dt);default:console.error(I.default.red(`Unknown reporter: '${e}'. Available: ${jt.join(", ")}`)),process.exit(1)}}var de=require("child_process");function me(){try{let e=(0,de.execSync)("git rev-parse HEAD",{encoding:"utf-8"}).trim()||null,t=(0,de.execSync)("git rev-parse --abbrev-ref HEAD",{encoding:"utf-8"}).trim()||null,n=(0,de.execSync)("git status --porcelain",{encoding:"utf-8"}).trim().length>0;return{commitSha:e,branch:t,dirty:n}}catch{return{commitSha:null,branch:null,dirty:!1}}}function Ve(e){e.command("test").description("Run test suites").option("-s, --suite <name>","Run a specific suite").option("--compliance","Generate compliance report").option("--reporter <type>","Output format: pretty, json, junit","pretty").option("--runs <count>","Override run count").option("--gate <percent>","Fail if pass rate below threshold").option("--pdf <path>","Export compliance report as PDF (requires --compliance)").option("-c, --config <path>","Path to config file","kindlm.yaml").action(async t=>{t.pdf&&!t.compliance&&(console.error(j.default.red("--pdf requires --compliance")),process.exit(1));let o=le(t.reporter);try{let{runnerResult:n,config:r,yamlContent:a}=await X({configPath:t.config,runs:t.runs!==void 0?parseInt(t.runs,10):void 0,gate:t.gate!==void 0?parseFloat(t.gate):void 0,suite:t.suite}),{runResult:s,aggregated:i}=n,c=(0,J.evaluateGates)(r.gates,i),f=await o.generate(s,c);console.log(f.content);let m,p;if(t.compliance){let d=me(),y={runId:crypto.randomUUID(),kindlmVersion:"1.0.0",gitCommitSha:d.commitSha??void 0,modelIds:r.models.map(u=>u.id),...r.compliance?.metadata??{}};if(m=(await(0,J.createComplianceReporter)(y).generate(s,c)).content,p=m.match(/Tamper Evidence Hash \(SHA-256\):\*\* `([a-f0-9]{64})`/)?.[1],t.reporter==="pretty"||!t.reporter?(console.log(""),console.log(m)):(console.error(""),console.error(m)),t.pdf){let u=await We(m,t.pdf);console.log(""),console.log(j.default.green(`PDF report saved to ${u}`))}}try{je({runnerResult:n,suiteName:r.suite.name,configHash:Fe(a),timestamp:new Date().toISOString(),complianceReport:m,complianceHash:p})}catch{}let g=s.failed===0&&s.errored===0&&c.passed;process.exit(g?0:1)}catch(n){if(n instanceof J.ProviderError){let r=n.code==="TIMEOUT"?"Provider timeout":n.code==="NETWORK_ERROR"?"Network error":n.code==="AUTH_FAILED"?"Authentication failed":n.code==="RATE_LIMITED"?"Rate limited":`Provider error (${n.code})`;console.error(j.default.red(`${r}: ${n.message}`)),n.retryable&&console.error(j.default.yellow("This error may be transient. Try again or increase timeoutMs in your kindlm.yaml defaults."))}else if(Ht(n)){let a=n.code.startsWith("CONFIG_")?"Config error":"Error";console.error(j.default.red(`${a}: ${n.message}`))}else n instanceof Error&&n.name==="AbortError"?console.error(j.default.red("Request timed out. Check network connectivity or increase timeout.")):console.error(j.default.red(`Error: ${n instanceof Error?n.message:String(n)}`));process.exit(1)}})}function Ht(e){return typeof e=="object"&&e!==null&&"code"in e&&"message"in e&&typeof e.code=="string"&&typeof e.message=="string"}var b=require("path"),Xe=require("fs"),h=E(require("chalk"),1),w=require("@kindlm/core");var M=require("fs"),ue=require("path");function Ye(e){return e.replace(/[^a-zA-Z0-9_-]/g,"_")}function fe(e){let t=(0,ue.join)(e,"baselines");return{read(o){let n=(0,ue.join)(t,`${Ye(o)}.json`);try{return{success:!0,data:(0,M.readFileSync)(n,"utf-8")}}catch{return{success:!1,error:{code:"BASELINE_NOT_FOUND",message:`No baseline found for suite "${o}" at ${n}`}}}},write(o,n){try{(0,M.mkdirSync)(t,{recursive:!0});let r=(0,ue.join)(t,`${Ye(o)}.json`);return(0,M.writeFileSync)(r,n,"utf-8"),{success:!0,data:void 0}}catch(r){return{success:!1,error:{code:"UNKNOWN_ERROR",message:`Failed to write baseline: ${r instanceof Error?r.message:String(r)}`}}}},list(){try{return(0,M.mkdirSync)(t,{recursive:!0}),{success:!0,data:(0,M.readdirSync)(t).filter(r=>r.endsWith(".json")).map(r=>r.replace(/\.json$/,""))}}catch(o){return{success:!1,error:{code:"UNKNOWN_ERROR",message:`Failed to list baselines: ${o instanceof Error?o.message:String(o)}`}}}}}}function qe(e){let t=e.command("baseline").description("Manage test baselines");t.command("set").description("Save current results as baseline").option("-c, --config <path>","Path to config file","kindlm.yaml").option("--runs <count>","Override run count").option("--force","Save baseline even if all tests failed").action(async o=>{try{let n=(0,b.dirname)((0,b.resolve)(process.cwd(),o.config)),r=(0,b.join)(n,".kindlm"),a=fe(r),{config:s,runnerResult:i}=await X({configPath:o.config,runs:o.runs!==void 0?parseInt(o.runs,10):void 0}),{runResult:c,aggregated:f}=i;(c.totalTests>0?c.passed/c.totalTests:0)===0&&!o.force&&(console.error(h.default.red("All tests failed or errored. Refusing to save a failing baseline.")),console.error(h.default.yellow("Use --force to save anyway.")),process.exit(1));let p=(0,w.buildBaselineData)(s.suite.name,f,new Date().toISOString()),g=(0,w.writeBaseline)(p,a);g.success||(console.error(h.default.red(`Failed to save baseline: ${g.error.message}`)),process.exit(1));let d=Object.keys(p.results).length;console.log(""),console.log(h.default.green(`Baseline saved for suite "${s.suite.name}" (${d} test${d===1?"":"s"})`)),console.log(h.default.dim(` Location: ${r}/baselines/`))}catch(n){console.error(h.default.red(`Error: ${n instanceof Error?n.message:String(n)}`)),process.exit(1)}}),t.command("compare").description("Compare latest against baseline").option("-c, --config <path>","Path to config file","kindlm.yaml").option("--runs <count>","Override run count").action(async o=>{try{let n=(0,b.dirname)((0,b.resolve)(process.cwd(),o.config)),r=(0,b.join)(n,".kindlm"),a=fe(r),s=(0,b.resolve)(process.cwd(),o.config),i;try{i=(0,Xe.readFileSync)(s,"utf-8")}catch{console.error(h.default.red(`Config file not found: ${s}`)),process.exit(1)}let c=D(),f=(0,w.parseConfig)(i,{configDir:n,fileReader:c});f.success||(console.error(h.default.red(`Config validation failed: ${f.error.message}`)),process.exit(1));let m=f.data.suite.name,p=(0,w.readBaseline)(m,a);p.success||(p.error.code==="BASELINE_NOT_FOUND"?console.error(h.default.red(`No baseline found for suite "${m}". Run \`kindlm baseline set\` first.`)):console.error(h.default.red(`Failed to read baseline: ${p.error.message}`)),process.exit(1));let g=p.data,{runnerResult:d}=await X({configPath:o.config,runs:o.runs!==void 0?parseInt(o.runs,10):void 0,baselineData:g}),{aggregated:y}=d,S=(0,w.buildBaselineData)(m,y,new Date().toISOString()),R=(0,w.compareBaseline)(g,S.results);if(console.log(""),console.log(h.default.bold(`Baseline comparison for "${m}"`)),console.log(h.default.dim(` Baseline from: ${g.createdAt}`)),console.log(""),R.regressions.length>0){console.log(h.default.red.bold(` Regressions (${R.regressions.length}):`));for(let l of R.regressions)console.log(h.default.red(` ${l.testName}: ${q(l.baselinePassRate)} \u2192 ${q(l.currentPassRate)}`)),l.newFailureCodes.length>0&&console.log(h.default.red(` New failures: ${l.newFailureCodes.join(", ")}`));console.log("")}if(R.improvements.length>0){console.log(h.default.green.bold(` Improvements (${R.improvements.length}):`));for(let l of R.improvements)console.log(h.default.green(` ${l.testName}: ${q(l.baselinePassRate)} \u2192 ${q(l.currentPassRate)}`));console.log("")}if(R.unchanged.length>0){console.log(h.default.dim(` Unchanged (${R.unchanged.length}):`));for(let l of R.unchanged)console.log(h.default.dim(` ${l.testName}: ${q(l.passRate)}`));console.log("")}if(R.newTests.length>0){console.log(h.default.cyan(` New tests (${R.newTests.length}):`));for(let l of R.newTests)console.log(h.default.cyan(` ${l}`));console.log("")}if(R.removedTests.length>0){console.log(h.default.yellow(` Removed tests (${R.removedTests.length}):`));for(let l of R.removedTests)console.log(h.default.yellow(` ${l}`));console.log("")}process.exit(R.regressions.length>0?1:0)}catch(n){console.error(h.default.red(`Error: ${n instanceof Error?n.message:String(n)}`)),process.exit(1)}}),t.command("list").description("List saved baselines").option("-c, --config <path>","Path to config file","kindlm.yaml").action(o=>{try{let n=(0,b.dirname)((0,b.resolve)(process.cwd(),o.config)),r=(0,b.join)(n,".kindlm"),a=fe(r),s=(0,w.listBaselines)(a);s.success||(console.error(h.default.red(`Failed to list baselines: ${s.error.message}`)),process.exit(1));let i=s.data;if(i.length===0){console.log(h.default.dim("No baselines saved yet. Run `kindlm baseline set` to create one."));return}console.log(h.default.bold("Saved baselines:")),console.log("");for(let c of i){let f=a.read(c);if(!f.success){console.log(` ${c} ${h.default.dim("(unreadable)")}`);continue}let m=(0,w.deserializeBaseline)(f.data);if(!m.success){console.log(` ${c} ${h.default.dim("(corrupt)")}`);continue}let p=Object.keys(m.data.results).length;console.log(` ${h.default.cyan(m.data.suiteName)} \u2014 ${p} test${p===1?"":"s"}, saved ${h.default.dim(m.data.createdAt)}`)}}catch(n){console.error(h.default.red(`Error: ${n instanceof Error?n.message:String(n)}`)),process.exit(1)}})}function q(e){return`${(e*100).toFixed(1)}%`}var et=require("readline"),tt=require("stream"),P=E(require("chalk"),1);var H=require("fs"),ve=require("path"),we=require("os");function Se(){return(0,ve.join)((0,we.homedir)(),".kindlm","credentials")}function pe(){try{let e=(0,H.readFileSync)(Se(),"utf-8"),t=JSON.parse(e);return typeof t.token=="string"&&t.token.length>0?t.token:null}catch{return null}}function Ze(e){let t=Se(),o=(0,ve.join)((0,we.homedir)(),".kindlm");(0,H.mkdirSync)(o,{recursive:!0,mode:448});let n={token:e,savedAt:new Date().toISOString()};(0,H.writeFileSync)(t,JSON.stringify(n,null,2),{mode:384})}function Qe(){try{(0,H.unlinkSync)(Se())}catch{}}var Ft="https://api.kindlm.com";var A=class extends Error{status;constructor(t,o){super(o),this.name="CloudApiError",this.status=t}};function W(){let e=process.env.KINDLM_CLOUD_URL??Ft;if(e.startsWith("http://")&&!Bt(e))throw new Error(`Refusing to use insecure HTTP for Cloud API: ${e}. Use HTTPS or target localhost for development.`);return e}function Bt(e){try{let t=new URL(e);return t.hostname==="localhost"||t.hostname==="127.0.0.1"||t.hostname==="::1"}catch{return!1}}function Ut(e){return new Promise(t=>setTimeout(t,e))}function Z(e,t){async function o(n,r,a){let s=`${e}${r}`,i={Authorization:`Bearer ${t}`},c={method:n,headers:i};a!==void 0&&(i["Content-Type"]="application/json",c.body=JSON.stringify(a));let f;for(let m=0;m<=1;m++){m>0&&await Ut(1e3);let p=new AbortController,g=setTimeout(()=>p.abort(),3e4);c.signal=p.signal;try{let d=await fetch(s,c);if(!d.ok){if(d.status>=500&&m<1){f=new A(d.status,`HTTP ${d.status}`);continue}let S=`HTTP ${d.status}`;if((d.headers.get("content-type")??"").includes("application/json"))try{let l=await d.json();l.error&&(S=l.error)}catch{}throw new A(d.status,S)}if(d.status===204)return;let y=d.headers.get("content-type")??"";if(!y.includes("application/json"))throw new A(d.status,`Expected JSON response but got content-type: ${y}`);return await d.json()}catch(d){if(d instanceof A)throw d;if(f=d instanceof Error?d:new Error(String(d)),m<1)continue}finally{clearTimeout(g)}}throw f??new Error("Request failed")}return{baseUrl:e,get:n=>o("GET",n),post:(n,r)=>o("POST",n,r),patch:(n,r)=>o("PATCH",n,r),delete:n=>o("DELETE",n)}}function nt(e){e.command("login").description("Authenticate with KindLM Cloud").option("-t, --token <token>","API token (skips interactive prompt)").option("--status","Show current authentication status").option("--logout","Remove stored credentials").action(async t=>{try{if(t.logout){Qe(),console.log(P.default.green("Logged out. Credentials removed."));return}if(t.status){await Kt();return}let o=t.token??process.env.KINDLM_API_TOKEN??await zt();o.startsWith("klm_")||(console.error(P.default.red('Invalid token format. KindLM tokens start with "klm_".')),process.exit(1));let n=Z(W(),o);try{await n.get("/v1/auth/tokens")}catch(r){throw r instanceof A&&r.status===401&&(console.error(P.default.red("Invalid or expired token.")),process.exit(1)),r}Ze(o),console.log(P.default.green("Authenticated successfully. Token saved."))}catch(o){console.error(P.default.red(`Login failed: ${o instanceof Error?o.message:String(o)}`)),process.exit(1)}})}async function Kt(){let e=pe();if(!e){console.log(P.default.yellow('Not authenticated. Run "kindlm login" to authenticate.'));return}let t=Z(W(),e);try{await t.get("/v1/auth/tokens"),console.log(P.default.green("Authenticated.")),console.log(` Cloud URL: ${W()}`)}catch(o){o instanceof A&&o.status===401?console.log(P.default.yellow('Stored token is invalid or expired. Run "kindlm login" to re-authenticate.')):console.log(P.default.yellow(`Cannot reach Cloud API: ${o instanceof Error?o.message:String(o)}`))}}function zt(){return new Promise((e,t)=>{let o=new tt.Writable({write(r,a,s){s()}});process.stderr.write("Paste your KindLM API token: ");let n=(0,et.createInterface)({input:process.stdin,output:o,terminal:!0});n.question("",r=>{n.close(),process.stderr.write(`
51
+ `);let a=r.trim();if(!a){t(new Error("No token provided"));return}e(a)})})}var st=require("path"),it=require("child_process"),ge=E(require("chalk"),1);function rt(){return process.env.GITHUB_ACTIONS?{name:"github_actions",isCI:!0,commitSha:process.env.GITHUB_SHA??null,branch:process.env.GITHUB_REF_NAME??null}:process.env.GITLAB_CI?{name:"gitlab_ci",isCI:!0,commitSha:process.env.CI_COMMIT_SHA??null,branch:process.env.CI_COMMIT_BRANCH??null}:process.env.CI?{name:null,isCI:!0,commitSha:null,branch:null}:{name:null,isCI:!1,commitSha:null,branch:null}}function V(e){return encodeURIComponent(e)}async function ot(e,t,o){let n=await Gt(e,o.projectName),r=await Jt(e,n,o.suiteName,o.configHash),a=await e.post(`/v1/projects/${V(n)}/runs`,{suiteId:r,commitSha:o.commitSha,branch:o.branch,environment:o.environment,triggeredBy:o.triggeredBy}),s=Wt(t.aggregated),i=50;try{for(let l=0;l<s.length;l+=i){let u=s.slice(l,l+i);await e.post(`/v1/runs/${V(a.id)}/results`,{results:u})}}catch(l){try{await e.patch(`/v1/runs/${V(a.id)}`,{status:"failed"})}catch{}throw l}let{runResult:c}=t,f=c.totalTests>0?c.passed/c.totalTests:0,m=new Set(t.aggregated.map(l=>l.modelId)),p=t.aggregated.map(l=>l.assertionScores.judge?.mean).filter(l=>l!==void 0),g=p.length>0?p.reduce((l,u)=>l+u,0)/p.length:void 0,d=t.aggregated.map(l=>l.latencyAvgMs),y=d.length>0?d.reduce((l,u)=>l+u,0)/d.length:void 0,S=t.aggregated.reduce((l,u)=>l+u.totalCostUsd,0),R=S>0?S:void 0;return await e.patch(`/v1/runs/${V(a.id)}`,{status:"completed",passRate:f,testCount:c.totalTests,modelCount:m.size,judgeAvgScore:g,latencyAvgMs:y,costEstimateUsd:R,complianceReport:o.complianceReport,complianceHash:o.complianceHash,finishedAt:new Date().toISOString()}),{runId:a.id,projectId:n}}async function Gt(e,t){let{projects:o}=await e.get("/v1/projects"),n=o.find(a=>a.name===t);return n?n.id:(await e.post("/v1/projects",{name:t})).id}async function Jt(e,t,o,n){let{suites:r}=await e.get(`/v1/projects/${V(t)}/suites`),a=r.find(i=>i.name===o);return a?a.id:(await e.post(`/v1/projects/${V(t)}/suites`,{name:o,configHash:n})).id}function Wt(e){return e.map(t=>{let o=t,n=t.runs.length>0?t.runs.flatMap(r=>r.assertions.filter(a=>!a.passed).map(a=>a.failureMessage)).filter(r=>r!==void 0):o.failureMessages??[];return{testCaseName:t.testCaseName,modelId:t.modelId,passed:t.passed?1:0,passRate:t.passRate,runCount:t.runCount,judgeAvg:t.assertionScores.judge?.mean??null,driftScore:t.assertionScores.drift?.mean??null,latencyAvgMs:t.latencyAvgMs??null,costUsd:t.totalCostUsd??null,totalTokens:t.totalTokens??null,failureCodes:t.failureCodes.length>0?JSON.stringify(t.failureCodes):null,failureMessages:n.length>0?JSON.stringify(n):null,assertionScores:Object.keys(t.assertionScores).length>0?JSON.stringify(t.assertionScores):null}})}function at(e){e.command("upload").description("Push last run results to KindLM Cloud").option("-t, --token <token>","API token (overrides stored token)").option("-p, --project <name>","Project name").action(async t=>{try{let o=t.token??process.env.KINDLM_API_TOKEN??pe();o||(console.error(ge.default.red('Not authenticated. Run "kindlm login" first or pass --token.')),process.exit(1));let n=He();n||(console.error(ge.default.red('No test run found. Run "kindlm test" first.')),process.exit(1));let r=me(),a=rt(),s=t.project??Yt(),i=Z(W(),o),c=U();c.start("Uploading results to KindLM Cloud...");try{let f=await ot(i,n.runnerResult,{projectName:s,suiteName:n.suiteName,configHash:n.configHash,commitSha:a.commitSha??r.commitSha??void 0,branch:a.branch??r.branch??void 0,environment:a.isCI?"ci":"local",triggeredBy:a.name??"local",complianceReport:n.complianceReport,complianceHash:n.complianceHash});c.succeed("Uploaded successfully."),console.log(` Run ID: ${f.runId}`),console.log(` Project: ${s}`),console.log(` Suite: ${n.suiteName}`)}catch(f){throw c.fail("Upload failed."),f}}catch(o){console.error(ge.default.red(`Upload failed: ${o instanceof Error?o.message:String(o)}`)),process.exit(1)}})}function Vt(e){try{let n=new URL(e).pathname.split("/").filter(Boolean),r=n[n.length-1];if(r)return r.replace(/\.git$/,"")}catch{}let t=e.match(/^[\w.-]+@[\w.-]+:(.+?)(?:\.git)?$/);if(t?.[1]){let o=t[1].split("/");return o[o.length-1]??null}return null}function Yt(){try{let e=(0,it.execSync)("git remote get-url origin",{encoding:"utf-8"}).trim(),t=Vt(e);if(t)return t}catch{}return(0,st.basename)(process.cwd())}var he=require("fs"),ye=require("path"),mt=require("child_process"),Q=E(require("chalk"),1),k=require("@kindlm/core");var ct=require("http"),lt=require("@kindlm/core");function dt(e){let t=[],o=null,n=[];function r(){for(let s of n)s()}function a(s,i){if(i.setHeader("Access-Control-Allow-Origin","*"),i.setHeader("Access-Control-Allow-Methods","POST, OPTIONS"),i.setHeader("Access-Control-Allow-Headers","Content-Type"),s.method==="OPTIONS"){i.writeHead(204),i.end();return}if(s.method!=="POST"||s.url!=="/v1/traces"){i.writeHead(404,{"Content-Type":"application/json"}),i.end(JSON.stringify({error:"Not found"}));return}let c=10*1024*1024,f=[],m=0,p=!1;s.on("data",g=>{if(m+=g.length,m>c){p=!0,i.writeHead(413,{"Content-Type":"application/json"}),i.end(JSON.stringify({error:"Payload too large"})),s.destroy();return}f.push(g)}),s.on("end",()=>{if(!p)try{let g=Buffer.concat(f).toString("utf-8"),d=JSON.parse(g),y=(0,lt.parseOtlpPayload)(d);y.success?(t.push(...y.data),r(),i.writeHead(200,{"Content-Type":"application/json"}),i.end(JSON.stringify({partialSuccess:{}}))):(i.writeHead(400,{"Content-Type":"application/json"}),i.end(JSON.stringify({error:y.error.message})))}catch{i.writeHead(400,{"Content-Type":"application/json"}),i.end(JSON.stringify({error:"Invalid JSON"}))}})}return{start(){return new Promise((s,i)=>{o=(0,ct.createServer)(a),o.on("error",i),o.listen(e,()=>s())})},stop(){return new Promise(s=>{o?o.close(()=>s()):s()})},getSpans(){return[...t]},waitForSpans({timeoutMs:s}){return new Promise(i=>{if(t.length>0){i([...t]);return}let c=setTimeout(()=>{n=n.filter(m=>m!==f),i([...t])},s),f=()=>{clearTimeout(c),n=n.filter(m=>m!==f),setTimeout(()=>i([...t]),500)};n.push(f)})}}}function ut(e){e.command("trace").description("Ingest OpenTelemetry traces and run assertions against them").option("-c, --config <path>","Config file path","kindlm.yaml").option("--port <port>","OTLP HTTP port","4318").option("--command <cmd>","Command to spawn (traces are collected while it runs)").option("--timeout <ms>","Timeout in ms to wait for traces","30000").option("--reporter <type>","Report format: pretty, json, junit","pretty").action(async t=>{let o=U();try{let n=(0,ye.resolve)(process.cwd(),t.config),r=(0,ye.dirname)(n);try{(0,he.statSync)(n).size>1048576&&(console.error(Q.default.red("Config file exceeds 1MB limit")),process.exit(1))}catch{console.error(Q.default.red(`Config file not found: ${n}`)),process.exit(1)}let a;try{a=(0,he.readFileSync)(n,"utf-8")}catch{console.error(Q.default.red(`Config file not found: ${n}`)),process.exit(1)}let s=D(),i=(0,k.parseConfig)(a,{configDir:r,fileReader:s});i.success||(console.error(Q.default.red(`Config validation failed: ${i.error.message}`)),process.exit(1));let c=i.data,f=c.trace??{port:parseInt(t.port,10),timeoutMs:parseInt(t.timeout,10),spanMapping:{outputTextAttr:"gen_ai.completion.0.content",modelAttr:"gen_ai.response.model",systemAttr:"gen_ai.system",inputTokensAttr:"gen_ai.usage.input_tokens",outputTokensAttr:"gen_ai.usage.output_tokens"}},m=parseInt(t.port,10)||f.port,p=parseInt(t.timeout,10)||f.timeoutMs,g=dt(m);await g.start();let d;try{o.start(`Listening for OTLP traces on port ${m}...`),t.command&&(d=(0,mt.spawn)("sh",["-c",t.command],{cwd:r,env:{...process.env,OTEL_EXPORTER_OTLP_ENDPOINT:`http://localhost:${m}`,OTEL_EXPORTER_OTLP_PROTOCOL:"http/json"},stdio:"inherit"}),d.on("error",C=>{o.fail(`Command failed: ${C.message}`)}));let y=await g.waitForSpans({timeoutMs:p});y.length===0&&(o.fail("No traces received"),process.exit(1)),o.succeed(`Received ${y.length} spans`);let S=(0,k.filterSpans)(y,f.spanFilter),R=(0,k.mapSpansToResult)(S,f.spanMapping),l=ie(),u=new Map,T=c.providers;for(let[C,x]of Object.entries(T)){if(!x)continue;let L="";if(x.apiKeyEnv){let $=process.env[x.apiKeyEnv];$&&(L=$.trim())}if(!(!L&&C!=="ollama"))try{let $=(0,k.createProvider)(C,l);await $.initialize({apiKey:L,baseUrl:x.baseUrl,organization:x.organization,timeoutMs:c.defaults.timeoutMs,maxRetries:2}),u.set(C,$)}catch{}}let ee=c.defaults.judgeModel??c.models[0]?.id,F=c.models.find(C=>C.id===ee),N=F?u.get(F.provider):void 0,Re=(0,k.buildContextFromTrace)(R,{configDir:r,judgeAdapter:N,judgeModel:F?.model}),Te=[];for(let C of c.tests){if(C.skip)continue;let x=(0,k.createAssertionsFromExpect)(C.expect),L=[];for(let $ of x){let Ct=await $.evaluate(Re);L.push(...Ct)}Te.push({testName:C.name,assertions:L})}let pt=c.tests.filter(C=>C.skip).length,be=R.latencyMs,te=Te.map(({testName:C,assertions:x})=>{let L=x.every($=>$.passed);return{name:C,modelId:"trace",status:L?"passed":"failed",assertions:x,latencyMs:be,costUsd:0}}),gt=te.filter(C=>C.status==="passed").length,ne=te.filter(C=>C.status==="failed").length,ht={suites:[{name:c.suite.name,status:ne>0?"failed":"passed",tests:te}],totalTests:te.length,passed:gt,failed:ne,errored:0,skipped:pt,durationMs:be},yt={passed:ne===0,gates:[]},Rt=await le(t.reporter).generate(ht,yt);console.log(Rt.content),process.exit(ne>0?1:0)}finally{d?.kill(),await g.stop()}}catch(n){o.fail(`Trace command failed: ${n instanceof Error?n.message:String(n)}`),process.exit(1)}})}function Xt(){let e=new ft.Command;return e.name("kindlm").description("AI agent behavioral regression testing").version("1.0.0"),xe(e),Oe(e),Ve(e),qe(e),nt(e),at(e),ut(e),e}0&&(module.exports={createProgram});
52
52
  //# sourceMappingURL=index.cjs.map