@kindlm/cli 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/kindlm.js CHANGED
@@ -49,7 +49,7 @@ Interrupted. Exiting...`)),process.exit(130)};process.on("SIGINT",o);try{return
49
49
  `).trim()}),o=s[2]?.trim()??"",r=2,a=[]):a.push(i)}return(o||a.length>0)&&n.push({heading:o,headingLevel:r,body:a.join(`
50
50
  `).trim()}),n}function ft(e){return e.match(/^# (.+)$/m)?.[1]?.trim()??"KindLM Compliance Report"}function pt(e){return e.match(/SHA-256:\s*`([a-f0-9]+)`/i)?.[1]??null}function pe(e){switch(e){case 2:return 18;case 3:return 15;case 4:return 13;default:return 13}}function gt(e){if(e.length<2)return null;let t=e[0]??"",n=e[1]??"";if(!t.includes("|")||!n.match(/^\s*\|[-:\s|]+\|\s*$/))return null;let o=i=>i.split("|").slice(1,-1).map(s=>s.trim()),r={cells:o(t)},a=[];for(let i=2;i<e.length;i++){let s=e[i]??"";if(!s.includes("|"))break;a.push({cells:o(s)})}return{header:r,rows:a}}function N(e,t){let n=e.page.margins.bottom;e.page.height-n-30-e.y<t&&(e.addPage(),ge(e))}function ge(e){let t=new Date().toISOString();e.fontSize(8).font("Helvetica").fillColor("#a8a29e").text("KindLM Compliance Report",60,40),e.text(t,60,40,{align:"right"}),e.moveDown(3)}function ht(e){e.fontSize(8).font("Helvetica").fillColor("#a8a29e").text("Generated by KindLM \xB7 kindlm.com",60,e.page.height-50,{align:"center",width:e.page.width-120})}function yt(e,t,n){let o=t.header.cells.length,r=n/o,a=e.page.margins.left,i=18;N(e,i*2);let s=(c,m,u)=>{let f=e.y;u&&(e.save(),e.rect(a,f-2,n,i).fill(u),e.restore());for(let y=0;y<c.length;y++){let d=a+y*r;e.fontSize(8).font(m?"Helvetica-Bold":"Courier").fillColor("#44403c").text(c[y]??"",d+4,f,{width:r-8,height:i,lineBreak:!1})}e.y=f+i};s(t.header.cells,!0,"#f5f5f4");for(let c of t.rows)N(e,i),s(c.cells,!1)}async function he(e,t){return await dt(mt(t),{recursive:!0}),new Promise((n,o)=>{let r=new ct({size:"A4",margins:{top:72,bottom:72,left:60,right:60},info:{Title:"KindLM EU AI Act Compliance Report",Author:"KindLM",Creator:"KindLM CLI"}}),a=lt(t);r.pipe(a);let i=r.page.width-r.page.margins.left-r.page.margins.right,s=ft(e),c=pt(e);r.moveDown(6),r.fontSize(28).font("Helvetica-Bold").fillColor("#1c1917").text(s,{align:"center",width:i}),r.moveDown(.5),r.fontSize(14).font("Helvetica").fillColor("#57534e").text("EU AI Act Annex IV Documentation",{align:"center",width:i}),r.moveDown(1),r.fontSize(10).fillColor("#a8a29e").text(`Generated: ${new Date().toISOString()}`,{align:"center",width:i}),c&&(r.moveDown(.3),r.fontSize(9).font("Courier").fillColor("#78716c").text(`SHA-256: ${c}`,{align:"center",width:i})),r.moveDown(2),r.fontSize(10).font("Helvetica").fillColor("#6366f1").text("kindlm.com",{align:"center",link:"https://kindlm.com",width:i});let m=ut(e);for(let u of m){if(r.addPage(),ge(r),u.heading){let C=pe(u.headingLevel);r.fontSize(C).font("Helvetica-Bold").fillColor("#1c1917").text(u.heading,{width:i}),r.moveDown(.5),r.moveTo(60,r.y).lineTo(60+i,r.y).strokeColor("#e7e5e4").lineWidth(1).stroke(),r.moveDown(.8)}let f=u.body.split(`
51
51
  `),y=!1,d=0;for(;d<f.length;){let C=f[d]??"";if(C.startsWith("```")){y=!y,y&&N(r,30),d++;continue}if(y){N(r,14);let l=r.y;r.save(),r.rect(r.page.margins.left,l-2,i,14).fill("#f5f5f4"),r.restore(),r.fontSize(9).font("Courier").fillColor("#44403c").text(C,{width:i}),d++;continue}if(!C.trim()){r.moveDown(.4),d++;continue}let S=f[d+1]??"";if(C.includes("|")&&d+1<f.length&&S.match(/^\s*\|[-:\s|]+\|\s*$/)){let l=[],p=d;for(;p<f.length&&(f[p]??"").includes("|");)l.push(f[p]??""),p++;let b=gt(l);if(b){yt(r,b,i),d=p;continue}}if(C.match(/^\s*\|[-:]+/)||C.match(/^---+$/)){d++;continue}let h=C.match(/^(#{3,4})\s+(.+)$/);if(h?.[1]&&h[2]){let l=h[1].length,p=pe(l);N(r,p+10),r.moveDown(.3),r.fontSize(p).font("Helvetica-Bold").fillColor("#1c1917").text(h[2].trim(),{width:i}),r.moveDown(.3),d++;continue}if(C.match(/^\s*[-*] /)){N(r,14),r.fontSize(10).font("Helvetica").fillColor("#44403c").text(C.trim(),{indent:12,width:i-12}),d++;continue}N(r,14),r.fontSize(10).font("Helvetica").fillColor("#44403c").text(C.trim(),{width:i}),d++}ht(r)}r.end(),a.on("finish",()=>n(t)),a.on("error",o)})}function ye(e){e.command("test").description("Run test suites").option("-s, --suite <name>","Run a specific suite").option("--compliance","Generate compliance report").option("--reporter <type>","Output format: pretty, json, junit","pretty").option("--runs <count>","Override run count").option("--gate <percent>","Fail if pass rate below threshold").option("--pdf <path>","Export compliance report as PDF (requires --compliance)").option("-c, --config <path>","Path to config file","kindlm.yaml").action(async t=>{try{let{runnerResult:n,config:o,yamlContent:r}=await B({configPath:t.config,runs:t.runs?parseInt(t.runs,10):void 0,gate:t.gate?parseFloat(t.gate):void 0}),{runResult:a,aggregated:i}=n,s=Ct(o.gates,i),m=xt(t.reporter).generate(a,s);if(console.log(m.content),t.compliance){let y=St().generate(a,s);if(console.log(""),console.log(y.content),t.pdf){let d=await he(y.content,t.pdf);console.log(""),console.log(w.green(`PDF report saved to ${d}`))}}try{me({runnerResult:n,suiteName:o.suite.name,configHash:fe(r),timestamp:new Date().toISOString()})}catch{}let u=a.failed===0&&a.errored===0&&s.passed;process.exit(u?0:1)}catch(n){if(n instanceof Tt){let o=n.code==="TIMEOUT"?"Provider timeout":n.code==="NETWORK_ERROR"?"Network error":n.code==="AUTH_FAILED"?"Authentication failed":n.code==="RATE_LIMITED"?"Rate limited":`Provider error (${n.code})`;console.error(w.red(`${o}: ${n.message}`)),n.retryable&&console.error(w.yellow("This error may be transient. Try again or increase --timeout."))}else if(kt(n)){let r=n.code.startsWith("CONFIG_")?"Config error":"Error";console.error(w.red(`${r}: ${n.message}`))}else n instanceof Error&&n.name==="AbortError"?console.error(w.red("Request timed out. Check network connectivity or increase timeout.")):console.error(w.red(`Error: ${n instanceof Error?n.message:String(n)}`));process.exit(1)}})}var bt={bold:e=>w.bold(e),red:e=>w.red(e),green:e=>w.green(e),yellow:e=>w.yellow(e),cyan:e=>w.cyan(e),dim:e=>w.dim(e),greenBold:e=>w.green.bold(e),redBold:e=>w.red.bold(e)};function xt(e){switch(e){case"json":return Rt();case"junit":return wt();default:return vt(bt)}}function kt(e){return typeof e=="object"&&e!==null&&"code"in e&&"message"in e&&typeof e.code=="string"&&typeof e.message=="string"}import{resolve as W,dirname as q,join as V}from"path";import{readFileSync as It}from"fs";import g from"chalk";import{parseConfig as Ot,readBaseline as At,writeBaseline as Mt,listBaselines as Nt,buildBaselineData as Re,compareBaseline as Lt,deserializeBaseline as _t}from"@kindlm/core";import{readFileSync as Pt,writeFileSync as $t,mkdirSync as Ce,readdirSync as Et}from"fs";import{join as X}from"path";function ve(e){return e.replace(/[^a-zA-Z0-9_-]/g,"_")}function J(e){let t=X(e,"baselines");return{read(n){let o=X(t,`${ve(n)}.json`);try{return{success:!0,data:Pt(o,"utf-8")}}catch{return{success:!1,error:{code:"BASELINE_NOT_FOUND",message:`No baseline found for suite "${n}" at ${o}`}}}},write(n,o){try{Ce(t,{recursive:!0});let r=X(t,`${ve(n)}.json`);return $t(r,o,"utf-8"),{success:!0,data:void 0}}catch(r){return{success:!1,error:{code:"UNKNOWN_ERROR",message:`Failed to write baseline: ${r instanceof Error?r.message:String(r)}`}}}},list(){try{return Ce(t,{recursive:!0}),{success:!0,data:Et(t).filter(r=>r.endsWith(".json")).map(r=>r.replace(/\.json$/,""))}}catch(n){return{success:!1,error:{code:"UNKNOWN_ERROR",message:`Failed to list baselines: ${n instanceof Error?n.message:String(n)}`}}}}}}function we(e){let t=e.command("baseline").description("Manage test baselines");t.command("set").description("Save current results as baseline").option("-c, --config <path>","Path to config file","kindlm.yaml").option("--runs <count>","Override run count").action(async n=>{try{let o=q(W(process.cwd(),n.config)),r=V(o,".kindlm"),a=J(r),{config:i,runnerResult:s}=await B({configPath:n.config,runs:n.runs?parseInt(n.runs,10):void 0}),{aggregated:c}=s,m=Re(i.suite.name,c,new Date().toISOString()),u=Mt(m,a);u.success||(console.error(g.red(`Failed to save baseline: ${u.error.message}`)),process.exit(1));let f=Object.keys(m.results).length;console.log(""),console.log(g.green(`Baseline saved for suite "${i.suite.name}" (${f} test${f===1?"":"s"})`)),console.log(g.dim(` Location: ${r}/baselines/`)),process.exit(0)}catch(o){console.error(g.red(`Error: ${o instanceof Error?o.message:String(o)}`)),process.exit(1)}}),t.command("compare").description("Compare latest against baseline").option("-c, --config <path>","Path to config file","kindlm.yaml").option("--runs <count>","Override run count").action(async n=>{try{let o=q(W(process.cwd(),n.config)),r=V(o,".kindlm"),a=J(r),i=W(process.cwd(),n.config),s;try{s=It(i,"utf-8")}catch{console.error(g.red(`Config file not found: ${i}`)),process.exit(1)}let c=A(),m=Ot(s,{configDir:o,fileReader:c});m.success||(console.error(g.red(`Config validation failed: ${m.error.message}`)),process.exit(1));let u=m.data.suite.name,f=At(u,a);f.success||(f.error.code==="BASELINE_NOT_FOUND"?console.error(g.red(`No baseline found for suite "${u}". Run \`kindlm baseline set\` first.`)):console.error(g.red(`Failed to read baseline: ${f.error.message}`)),process.exit(1));let y=f.data,{runnerResult:d}=await B({configPath:n.config,runs:n.runs?parseInt(n.runs,10):void 0,baselineData:y}),{aggregated:C}=d,S=Re(u,C,new Date().toISOString()),h=Lt(y,S.results);if(console.log(""),console.log(g.bold(`Baseline comparison for "${u}"`)),console.log(g.dim(` Baseline from: ${y.createdAt}`)),console.log(""),h.regressions.length>0){console.log(g.red.bold(` Regressions (${h.regressions.length}):`));for(let l of h.regressions)console.log(g.red(` ${l.testName}: ${U(l.baselinePassRate)} \u2192 ${U(l.currentPassRate)}`)),l.newFailureCodes.length>0&&console.log(g.red(` New failures: ${l.newFailureCodes.join(", ")}`));console.log("")}if(h.improvements.length>0){console.log(g.green.bold(` Improvements (${h.improvements.length}):`));for(let l of h.improvements)console.log(g.green(` ${l.testName}: ${U(l.baselinePassRate)} \u2192 ${U(l.currentPassRate)}`));console.log("")}if(h.unchanged.length>0){console.log(g.dim(` Unchanged (${h.unchanged.length}):`));for(let l of h.unchanged)console.log(g.dim(` ${l.testName}: ${U(l.passRate)}`));console.log("")}if(h.newTests.length>0){console.log(g.cyan(` New tests (${h.newTests.length}):`));for(let l of h.newTests)console.log(g.cyan(` ${l}`));console.log("")}if(h.removedTests.length>0){console.log(g.yellow(` Removed tests (${h.removedTests.length}):`));for(let l of h.removedTests)console.log(g.yellow(` ${l}`));console.log("")}process.exit(h.regressions.length>0?1:0)}catch(o){console.error(g.red(`Error: ${o instanceof Error?o.message:String(o)}`)),process.exit(1)}}),t.command("list").description("List saved baselines").option("-c, --config <path>","Path to config file","kindlm.yaml").action(n=>{try{let o=q(W(process.cwd(),n.config)),r=V(o,".kindlm"),a=J(r),i=Nt(a);i.success||(console.error(g.red(`Failed to list baselines: ${i.error.message}`)),process.exit(1));let s=i.data;s.length===0&&(console.log(g.dim("No baselines saved yet. Run `kindlm baseline set` to create one.")),process.exit(0)),console.log(g.bold("Saved baselines:")),console.log("");for(let c of s){let m=a.read(c);if(!m.success){console.log(` ${c} ${g.dim("(unreadable)")}`);continue}let u=_t(m.data);if(!u.success){console.log(` ${c} ${g.dim("(corrupt)")}`);continue}let f=Object.keys(u.data.results).length;console.log(` ${g.cyan(u.data.suiteName)} \u2014 ${f} test${f===1?"":"s"}, saved ${g.dim(u.data.createdAt)}`)}process.exit(0)}catch(o){console.error(g.red(`Error: ${o instanceof Error?o.message:String(o)}`)),process.exit(1)}})}function U(e){return`${(e*100).toFixed(1)}%`}import{createInterface as Gt}from"readline";import{Writable as Jt}from"stream";import I from"chalk";import{readFileSync as Dt,writeFileSync as jt,mkdirSync as Ft,unlinkSync as Ht,chmodSync as Bt}from"fs";import{join as Se}from"path";import{homedir as Te}from"os";function Z(){return Se(Te(),".kindlm","credentials")}function Y(){try{let e=Dt(Z(),"utf-8"),t=JSON.parse(e);return typeof t.token=="string"&&t.token.length>0?t.token:null}catch{return null}}function be(e){let t=Z(),n=Se(Te(),".kindlm");Ft(n,{recursive:!0,mode:448});let o={token:e,savedAt:new Date().toISOString()};jt(t,JSON.stringify(o,null,2),{mode:384}),Bt(t,384)}function xe(){try{Ht(Z())}catch{}}var Ut="https://api.kindlm.com";var E=class extends Error{status;constructor(t,n){super(n),this.name="CloudApiError",this.status=t}};function j(){let e=process.env.KINDLM_CLOUD_URL??Ut;if(e.startsWith("http://")&&!Kt(e))throw new Error(`Refusing to use insecure HTTP for Cloud API: ${e}. Use HTTPS or target localhost for development.`);return e}function Kt(e){try{let t=new URL(e);return t.hostname==="localhost"||t.hostname==="127.0.0.1"||t.hostname==="::1"}catch{return!1}}function zt(e){return new Promise(t=>setTimeout(t,e))}function K(e,t){async function n(o,r,a){let i=`${e}${r}`,s={Authorization:`Bearer ${t}`},c={method:o,headers:s};a!==void 0&&(s["Content-Type"]="application/json",c.body=JSON.stringify(a));let m;for(let u=0;u<=1;u++){u>0&&await zt(1e3);let f=new AbortController,y=setTimeout(()=>f.abort(),3e4);c.signal=f.signal;try{let d=await fetch(i,c);if(!d.ok){if(d.status>=500&&u<1){m=new E(d.status,`HTTP ${d.status}`);continue}let S=`HTTP ${d.status}`;if((d.headers.get("content-type")??"").includes("application/json"))try{let l=await d.json();l.error&&(S=l.error)}catch{}throw new E(d.status,S)}if(d.status===204)return;let C=d.headers.get("content-type")??"";if(!C.includes("application/json"))throw new E(d.status,`Expected JSON response but got content-type: ${C}`);return await d.json()}catch(d){if(d instanceof E)throw d;if(m=d instanceof Error?d:new Error(String(d)),u<1)continue}finally{clearTimeout(y)}}throw m??new Error("Request failed")}return{baseUrl:e,get:o=>n("GET",o),post:(o,r)=>n("POST",o,r),patch:(o,r)=>n("PATCH",o,r),delete:o=>n("DELETE",o)}}function ke(e){e.command("login").description("Authenticate with KindLM Cloud").option("-t, --token <token>","API token (skips interactive prompt)").option("--status","Show current authentication status").option("--logout","Remove stored credentials").action(async t=>{try{if(t.logout){xe(),console.log(I.green("Logged out. Credentials removed."));return}if(t.status){await Wt();return}let n=t.token??process.env.KINDLM_API_TOKEN??await Yt();n.startsWith("klm_")||(console.error(I.red('Invalid token format. KindLM tokens start with "klm_".')),process.exit(1));let o=K(j(),n);try{await o.get("/v1/auth/tokens")}catch(r){throw r instanceof E&&r.status===401&&(console.error(I.red("Invalid or expired token.")),process.exit(1)),r}be(n),console.log(I.green("Authenticated successfully. Token saved."))}catch(n){console.error(I.red(`Login failed: ${n instanceof Error?n.message:String(n)}`)),process.exit(1)}})}async function Wt(){let e=Y();if(!e){console.log(I.yellow('Not authenticated. Run "kindlm login" to authenticate.'));return}let t=K(j(),e);try{await t.get("/v1/auth/tokens"),console.log(I.green("Authenticated.")),console.log(` Cloud URL: ${j()}`)}catch(n){n instanceof E&&n.status===401?console.log(I.yellow('Stored token is invalid or expired. Run "kindlm login" to re-authenticate.')):console.log(I.yellow(`Cannot reach Cloud API: ${n instanceof Error?n.message:String(n)}`))}}function Yt(){return new Promise((e,t)=>{let n=new Jt({write(r,a,i){i()}});process.stderr.write("Paste your KindLM API token: ");let o=Gt({input:process.stdin,output:n,terminal:!0});o.question("",r=>{o.close(),process.stderr.write(`
52
- `);let a=r.trim();if(!a){t(new Error("No token provided"));return}e(a)})})}import{basename as Zt}from"path";import{execSync as Qt}from"child_process";import ee from"chalk";import{execSync as Q}from"child_process";function Pe(){try{let e=Q("git rev-parse HEAD",{encoding:"utf-8"}).trim()||null,t=Q("git rev-parse --abbrev-ref HEAD",{encoding:"utf-8"}).trim()||null,o=Q("git status --porcelain",{encoding:"utf-8"}).trim().length>0;return{commitSha:e,branch:t,dirty:o}}catch{return{commitSha:null,branch:null,dirty:!1}}}function $e(){return process.env.GITHUB_ACTIONS?{name:"github_actions",isCI:!0,commitSha:process.env.GITHUB_SHA??null,branch:process.env.GITHUB_REF_NAME??null}:process.env.GITLAB_CI?{name:"gitlab_ci",isCI:!0,commitSha:process.env.CI_COMMIT_SHA??null,branch:process.env.CI_COMMIT_BRANCH??null}:process.env.CI?{name:null,isCI:!0,commitSha:null,branch:null}:{name:null,isCI:!1,commitSha:null,branch:null}}function z(e){return encodeURIComponent(e)}async function Ee(e,t,n){let o=await Xt(e,n.projectName),r=await qt(e,o,n.suiteName,n.configHash),a=await e.post(`/v1/runs/${z(o)}/runs`,{suiteId:r,commitSha:n.commitSha,branch:n.branch,environment:n.environment,triggeredBy:n.triggeredBy}),i=Vt(t.aggregated),s=50;for(let l=0;l<i.length;l+=s){let p=i.slice(l,l+s);await e.post(`/v1/results/${z(a.id)}/results`,{results:p})}let{runResult:c}=t,m=c.totalTests>0?c.passed/c.totalTests:0,u=new Set(t.aggregated.map(l=>l.modelId)),f=t.aggregated.map(l=>l.assertionScores.judge?.mean).filter(l=>l!==void 0),y=f.length>0?f.reduce((l,p)=>l+p,0)/f.length:void 0,d=t.aggregated.map(l=>l.latencyAvgMs),C=d.length>0?d.reduce((l,p)=>l+p,0)/d.length:void 0,S=t.aggregated.reduce((l,p)=>l+p.totalCostUsd,0),h=S>0?S:void 0;return await e.patch(`/v1/runs/${z(a.id)}`,{status:"completed",passRate:m,testCount:c.totalTests,modelCount:u.size,judgeAvgScore:y,latencyAvgMs:C,costEstimateUsd:h,finishedAt:new Date().toISOString()}),{runId:a.id,projectId:o}}async function Xt(e,t){let{projects:n}=await e.get("/v1/projects"),o=n.find(a=>a.name===t);return o?o.id:(await e.post("/v1/projects",{name:t})).id}async function qt(e,t,n,o){let{suites:r}=await e.get(`/v1/suites/${z(t)}/suites`),a=r.find(s=>s.name===n);return a?a.id:(await e.post(`/v1/suites/${z(t)}/suites`,{name:n,configHash:o})).id}function Vt(e){return e.map(t=>{let n=t,o=t.runs.length>0?t.runs.flatMap(r=>r.assertions.filter(a=>!a.passed).map(a=>a.failureMessage)).filter(r=>r!==void 0):n.failureMessages??[];return{testCaseName:t.testCaseName,modelId:t.modelId,passed:t.passed?1:0,passRate:t.passRate,runCount:t.runCount,judgeAvg:t.assertionScores.judge?.mean??null,driftScore:t.assertionScores.drift?.mean??null,latencyAvgMs:t.latencyAvgMs??null,costUsd:t.totalCostUsd??null,totalTokens:t.totalTokens??null,failureCodes:t.failureCodes.length>0?JSON.stringify(t.failureCodes):null,failureMessages:o.length>0?JSON.stringify(o):null,assertionScores:Object.keys(t.assertionScores).length>0?JSON.stringify(t.assertionScores):null}})}function Ie(e){e.command("upload").description("Push last run results to KindLM Cloud").option("-t, --token <token>","API token (overrides stored token)").option("-p, --project <name>","Project name").action(async t=>{try{let n=t.token??process.env.KINDLM_API_TOKEN??Y();n||(console.error(ee.red('Not authenticated. Run "kindlm login" first or pass --token.')),process.exit(1));let o=ue();o||(console.error(ee.red('No test run found. Run "kindlm test" first.')),process.exit(1));let r=Pe(),a=$e(),i=t.project??tn(),s=K(j(),n),c=D();c.start("Uploading results to KindLM Cloud...");try{let m=await Ee(s,o.runnerResult,{projectName:i,suiteName:o.suiteName,configHash:o.configHash,commitSha:a.commitSha??r.commitSha??void 0,branch:a.branch??r.branch??void 0,environment:a.isCI?"ci":"local",triggeredBy:a.name??"local"});c.succeed("Uploaded successfully."),console.log(` Run ID: ${m.runId}`),console.log(` Project: ${i}`),console.log(` Suite: ${o.suiteName}`)}catch(m){throw c.fail("Upload failed."),m}}catch(n){console.error(ee.red(`Upload failed: ${n instanceof Error?n.message:String(n)}`)),process.exit(1)}})}function en(e){try{let o=new URL(e).pathname.split("/").filter(Boolean),r=o[o.length-1];if(r)return r.replace(/\.git$/,"")}catch{}let t=e.match(/^[\w.-]+@[\w.-]+:(.+?)(?:\.git)?$/);if(t?.[1]){let n=t[1].split("/");return n[n.length-1]??null}return null}function tn(){try{let e=Qt("git remote get-url origin",{encoding:"utf-8"}).trim(),t=en(e);if(t)return t}catch{}return Zt(process.cwd())}import{readFileSync as rn,statSync as sn}from"fs";import{resolve as an,dirname as cn}from"path";import{spawn as ln}from"child_process";import T from"chalk";import{parseConfig as dn,createProvider as mn,filterSpans as un,mapSpansToResult as fn,buildContextFromTrace as pn,createAssertionsFromExpect as gn,evaluateGates as hn}from"@kindlm/core";import{createServer as nn}from"http";import{parseOtlpPayload as on}from"@kindlm/core";function Oe(e){let t=[],n=null,o=[];function r(){for(let i of o)i()}function a(i,s){if(s.setHeader("Access-Control-Allow-Origin","*"),s.setHeader("Access-Control-Allow-Methods","POST, OPTIONS"),s.setHeader("Access-Control-Allow-Headers","Content-Type"),i.method==="OPTIONS"){s.writeHead(204),s.end();return}if(i.method!=="POST"||i.url!=="/v1/traces"){s.writeHead(404,{"Content-Type":"application/json"}),s.end(JSON.stringify({error:"Not found"}));return}let c=[];i.on("data",m=>c.push(m)),i.on("end",()=>{try{let m=Buffer.concat(c).toString("utf-8"),u=JSON.parse(m),f=on(u);f.success?(t.push(...f.data),r(),s.writeHead(200,{"Content-Type":"application/json"}),s.end(JSON.stringify({partialSuccess:{}}))):(s.writeHead(400,{"Content-Type":"application/json"}),s.end(JSON.stringify({error:f.error.message})))}catch{s.writeHead(400,{"Content-Type":"application/json"}),s.end(JSON.stringify({error:"Invalid JSON"}))}})}return{start(){return new Promise((i,s)=>{n=nn(a),n.on("error",s),n.listen(e,()=>i())})},stop(){return new Promise(i=>{n?n.close(()=>i()):i()})},getSpans(){return[...t]},waitForSpans({timeoutMs:i}){return new Promise(s=>{if(t.length>0){s([...t]);return}let c=setTimeout(()=>{o=o.filter(u=>u!==m),s([...t])},i),m=()=>{clearTimeout(c),o=o.filter(u=>u!==m),setTimeout(()=>s([...t]),500)};o.push(m)})}}}function Ae(e){e.command("trace").description("Ingest OpenTelemetry traces and run assertions against them").option("-c, --config <path>","Config file path","kindlm.yaml").option("--port <port>","OTLP HTTP port","4318").option("--command <cmd>","Command to spawn (traces are collected while it runs)").option("--timeout <ms>","Timeout in ms to wait for traces","30000").option("--reporter <type>","Report format: pretty, json, junit","pretty").action(async t=>{let n=D();try{let o=an(process.cwd(),t.config),r=cn(o);try{sn(o).size>1048576&&(console.error(T.red("Config file exceeds 1MB limit")),process.exit(1))}catch{console.error(T.red(`Config file not found: ${o}`)),process.exit(1)}let a;try{a=rn(o,"utf-8")}catch{console.error(T.red(`Config file not found: ${o}`)),process.exit(1)}let i=A(),s=dn(a,{configDir:r,fileReader:i});s.success||(console.error(T.red(`Config validation failed: ${s.error.message}`)),process.exit(1));let c=s.data,m=c.trace??{port:parseInt(t.port,10),timeoutMs:parseInt(t.timeout,10),spanMapping:{outputTextAttr:"gen_ai.completion.0.content",modelAttr:"gen_ai.response.model",systemAttr:"gen_ai.system",inputTokensAttr:"gen_ai.usage.input_tokens",outputTokensAttr:"gen_ai.usage.output_tokens"}},u=parseInt(t.port,10)||m.port,f=parseInt(t.timeout,10)||m.timeoutMs,y=Oe(u);await y.start(),n.start(`Listening for OTLP traces on port ${u}...`),t.command&&ln("sh",["-c",t.command],{cwd:r,env:{...process.env,OTEL_EXPORTER_OTLP_ENDPOINT:`http://localhost:${u}`,OTEL_EXPORTER_OTLP_PROTOCOL:"http/json"},stdio:"inherit"}).on("error",R=>{n.fail(`Command failed: ${R.message}`)});let d=await y.waitForSpans({timeoutMs:f});await y.stop(),d.length===0&&(n.fail("No traces received"),process.exit(1)),n.succeed(`Received ${d.length} spans`);let C=un(d,m.spanFilter),S=fn(C,m.spanMapping),h=G(),l=new Map,p=c.providers;for(let[v,R]of Object.entries(p)){if(!R)continue;let x="";if(R.apiKeyEnv){let k=process.env[R.apiKeyEnv];k&&(x=k.trim())}if(!(!x&&v!=="ollama"))try{let k=mn(v,h);await k.initialize({apiKey:x,baseUrl:R.baseUrl,organization:R.organization,timeoutMs:c.defaults.timeoutMs,maxRetries:2}),l.set(v,k)}catch{}}let b=c.defaults.judgeModel??c.models[0]?.id,L=c.models.find(v=>v.id===b),F=L?l.get(L.provider):void 0,O=pn(S,{configDir:r,judgeAdapter:F,judgeModel:L?.model}),_=[];for(let v of c.tests){if(v.skip)continue;let R=gn(v.expect),x=[];for(let k of R){let P=await k.evaluate(O);x.push(...P)}_.push({testName:v.name,assertions:x})}let te=_.reduce((v,R)=>v+R.assertions.length,0),ne=_.reduce((v,R)=>v+R.assertions.filter(x=>x.passed).length,0),oe=te-ne;console.log(),console.log(T.bold("Trace Test Results")),console.log(T.dim("\u2500".repeat(50)));for(let{testName:v,assertions:R}of _){let k=R.every(P=>P.passed)?T.green("\u2713"):T.red("\u2717");console.log(`${k} ${v}`);for(let P of R){let Ne=P.passed?T.green(" \u2713"):T.red(" \u2717"),Le=P.failureMessage?`${P.label}: ${P.failureMessage}`:P.label;console.log(`${Ne} ${Le}`)}}console.log(),console.log(`${T.bold("Total:")} ${ne} passed, ${oe} failed out of ${te} assertions`);let re=hn(c.gates,[]);if(!re.passed)for(let v of re.gates.filter(R=>!R.passed))console.log(T.red(`Gate failed: ${v.message}`));process.exit(oe>0?1:0)}catch(o){n.fail(`Trace command failed: ${o instanceof Error?o.message:String(o)}`),process.exit(1)}})}function Me(){let e=new yn;return e.name("kindlm").description("AI agent behavioral regression testing").version("0.0.0"),se(e),ie(e),ye(e),we(e),ke(e),Ie(e),Ae(e),e}process.on("unhandledRejection",e=>{let t=e instanceof Error?e.message:String(e);process.stderr.write(`
52
+ `);let a=r.trim();if(!a){t(new Error("No token provided"));return}e(a)})})}import{basename as Zt}from"path";import{execSync as Qt}from"child_process";import ee from"chalk";import{execSync as Q}from"child_process";function Pe(){try{let e=Q("git rev-parse HEAD",{encoding:"utf-8"}).trim()||null,t=Q("git rev-parse --abbrev-ref HEAD",{encoding:"utf-8"}).trim()||null,o=Q("git status --porcelain",{encoding:"utf-8"}).trim().length>0;return{commitSha:e,branch:t,dirty:o}}catch{return{commitSha:null,branch:null,dirty:!1}}}function $e(){return process.env.GITHUB_ACTIONS?{name:"github_actions",isCI:!0,commitSha:process.env.GITHUB_SHA??null,branch:process.env.GITHUB_REF_NAME??null}:process.env.GITLAB_CI?{name:"gitlab_ci",isCI:!0,commitSha:process.env.CI_COMMIT_SHA??null,branch:process.env.CI_COMMIT_BRANCH??null}:process.env.CI?{name:null,isCI:!0,commitSha:null,branch:null}:{name:null,isCI:!1,commitSha:null,branch:null}}function z(e){return encodeURIComponent(e)}async function Ee(e,t,n){let o=await Xt(e,n.projectName),r=await qt(e,o,n.suiteName,n.configHash),a=await e.post(`/v1/runs/${z(o)}/runs`,{suiteId:r,commitSha:n.commitSha,branch:n.branch,environment:n.environment,triggeredBy:n.triggeredBy}),i=Vt(t.aggregated),s=50;for(let l=0;l<i.length;l+=s){let p=i.slice(l,l+s);await e.post(`/v1/results/${z(a.id)}/results`,{results:p})}let{runResult:c}=t,m=c.totalTests>0?c.passed/c.totalTests:0,u=new Set(t.aggregated.map(l=>l.modelId)),f=t.aggregated.map(l=>l.assertionScores.judge?.mean).filter(l=>l!==void 0),y=f.length>0?f.reduce((l,p)=>l+p,0)/f.length:void 0,d=t.aggregated.map(l=>l.latencyAvgMs),C=d.length>0?d.reduce((l,p)=>l+p,0)/d.length:void 0,S=t.aggregated.reduce((l,p)=>l+p.totalCostUsd,0),h=S>0?S:void 0;return await e.patch(`/v1/runs/${z(a.id)}`,{status:"completed",passRate:m,testCount:c.totalTests,modelCount:u.size,judgeAvgScore:y,latencyAvgMs:C,costEstimateUsd:h,finishedAt:new Date().toISOString()}),{runId:a.id,projectId:o}}async function Xt(e,t){let{projects:n}=await e.get("/v1/projects"),o=n.find(a=>a.name===t);return o?o.id:(await e.post("/v1/projects",{name:t})).id}async function qt(e,t,n,o){let{suites:r}=await e.get(`/v1/suites/${z(t)}/suites`),a=r.find(s=>s.name===n);return a?a.id:(await e.post(`/v1/suites/${z(t)}/suites`,{name:n,configHash:o})).id}function Vt(e){return e.map(t=>{let n=t,o=t.runs.length>0?t.runs.flatMap(r=>r.assertions.filter(a=>!a.passed).map(a=>a.failureMessage)).filter(r=>r!==void 0):n.failureMessages??[];return{testCaseName:t.testCaseName,modelId:t.modelId,passed:t.passed?1:0,passRate:t.passRate,runCount:t.runCount,judgeAvg:t.assertionScores.judge?.mean??null,driftScore:t.assertionScores.drift?.mean??null,latencyAvgMs:t.latencyAvgMs??null,costUsd:t.totalCostUsd??null,totalTokens:t.totalTokens??null,failureCodes:t.failureCodes.length>0?JSON.stringify(t.failureCodes):null,failureMessages:o.length>0?JSON.stringify(o):null,assertionScores:Object.keys(t.assertionScores).length>0?JSON.stringify(t.assertionScores):null}})}function Ie(e){e.command("upload").description("Push last run results to KindLM Cloud").option("-t, --token <token>","API token (overrides stored token)").option("-p, --project <name>","Project name").action(async t=>{try{let n=t.token??process.env.KINDLM_API_TOKEN??Y();n||(console.error(ee.red('Not authenticated. Run "kindlm login" first or pass --token.')),process.exit(1));let o=ue();o||(console.error(ee.red('No test run found. Run "kindlm test" first.')),process.exit(1));let r=Pe(),a=$e(),i=t.project??tn(),s=K(j(),n),c=D();c.start("Uploading results to KindLM Cloud...");try{let m=await Ee(s,o.runnerResult,{projectName:i,suiteName:o.suiteName,configHash:o.configHash,commitSha:a.commitSha??r.commitSha??void 0,branch:a.branch??r.branch??void 0,environment:a.isCI?"ci":"local",triggeredBy:a.name??"local"});c.succeed("Uploaded successfully."),console.log(` Run ID: ${m.runId}`),console.log(` Project: ${i}`),console.log(` Suite: ${o.suiteName}`)}catch(m){throw c.fail("Upload failed."),m}}catch(n){console.error(ee.red(`Upload failed: ${n instanceof Error?n.message:String(n)}`)),process.exit(1)}})}function en(e){try{let o=new URL(e).pathname.split("/").filter(Boolean),r=o[o.length-1];if(r)return r.replace(/\.git$/,"")}catch{}let t=e.match(/^[\w.-]+@[\w.-]+:(.+?)(?:\.git)?$/);if(t?.[1]){let n=t[1].split("/");return n[n.length-1]??null}return null}function tn(){try{let e=Qt("git remote get-url origin",{encoding:"utf-8"}).trim(),t=en(e);if(t)return t}catch{}return Zt(process.cwd())}import{readFileSync as rn,statSync as sn}from"fs";import{resolve as an,dirname as cn}from"path";import{spawn as ln}from"child_process";import T from"chalk";import{parseConfig as dn,createProvider as mn,filterSpans as un,mapSpansToResult as fn,buildContextFromTrace as pn,createAssertionsFromExpect as gn,evaluateGates as hn}from"@kindlm/core";import{createServer as nn}from"http";import{parseOtlpPayload as on}from"@kindlm/core";function Oe(e){let t=[],n=null,o=[];function r(){for(let i of o)i()}function a(i,s){if(s.setHeader("Access-Control-Allow-Origin","*"),s.setHeader("Access-Control-Allow-Methods","POST, OPTIONS"),s.setHeader("Access-Control-Allow-Headers","Content-Type"),i.method==="OPTIONS"){s.writeHead(204),s.end();return}if(i.method!=="POST"||i.url!=="/v1/traces"){s.writeHead(404,{"Content-Type":"application/json"}),s.end(JSON.stringify({error:"Not found"}));return}let c=[];i.on("data",m=>c.push(m)),i.on("end",()=>{try{let m=Buffer.concat(c).toString("utf-8"),u=JSON.parse(m),f=on(u);f.success?(t.push(...f.data),r(),s.writeHead(200,{"Content-Type":"application/json"}),s.end(JSON.stringify({partialSuccess:{}}))):(s.writeHead(400,{"Content-Type":"application/json"}),s.end(JSON.stringify({error:f.error.message})))}catch{s.writeHead(400,{"Content-Type":"application/json"}),s.end(JSON.stringify({error:"Invalid JSON"}))}})}return{start(){return new Promise((i,s)=>{n=nn(a),n.on("error",s),n.listen(e,()=>i())})},stop(){return new Promise(i=>{n?n.close(()=>i()):i()})},getSpans(){return[...t]},waitForSpans({timeoutMs:i}){return new Promise(s=>{if(t.length>0){s([...t]);return}let c=setTimeout(()=>{o=o.filter(u=>u!==m),s([...t])},i),m=()=>{clearTimeout(c),o=o.filter(u=>u!==m),setTimeout(()=>s([...t]),500)};o.push(m)})}}}function Ae(e){e.command("trace").description("Ingest OpenTelemetry traces and run assertions against them").option("-c, --config <path>","Config file path","kindlm.yaml").option("--port <port>","OTLP HTTP port","4318").option("--command <cmd>","Command to spawn (traces are collected while it runs)").option("--timeout <ms>","Timeout in ms to wait for traces","30000").option("--reporter <type>","Report format: pretty, json, junit","pretty").action(async t=>{let n=D();try{let o=an(process.cwd(),t.config),r=cn(o);try{sn(o).size>1048576&&(console.error(T.red("Config file exceeds 1MB limit")),process.exit(1))}catch{console.error(T.red(`Config file not found: ${o}`)),process.exit(1)}let a;try{a=rn(o,"utf-8")}catch{console.error(T.red(`Config file not found: ${o}`)),process.exit(1)}let i=A(),s=dn(a,{configDir:r,fileReader:i});s.success||(console.error(T.red(`Config validation failed: ${s.error.message}`)),process.exit(1));let c=s.data,m=c.trace??{port:parseInt(t.port,10),timeoutMs:parseInt(t.timeout,10),spanMapping:{outputTextAttr:"gen_ai.completion.0.content",modelAttr:"gen_ai.response.model",systemAttr:"gen_ai.system",inputTokensAttr:"gen_ai.usage.input_tokens",outputTokensAttr:"gen_ai.usage.output_tokens"}},u=parseInt(t.port,10)||m.port,f=parseInt(t.timeout,10)||m.timeoutMs,y=Oe(u);await y.start(),n.start(`Listening for OTLP traces on port ${u}...`),t.command&&ln("sh",["-c",t.command],{cwd:r,env:{...process.env,OTEL_EXPORTER_OTLP_ENDPOINT:`http://localhost:${u}`,OTEL_EXPORTER_OTLP_PROTOCOL:"http/json"},stdio:"inherit"}).on("error",R=>{n.fail(`Command failed: ${R.message}`)});let d=await y.waitForSpans({timeoutMs:f});await y.stop(),d.length===0&&(n.fail("No traces received"),process.exit(1)),n.succeed(`Received ${d.length} spans`);let C=un(d,m.spanFilter),S=fn(C,m.spanMapping),h=G(),l=new Map,p=c.providers;for(let[v,R]of Object.entries(p)){if(!R)continue;let x="";if(R.apiKeyEnv){let k=process.env[R.apiKeyEnv];k&&(x=k.trim())}if(!(!x&&v!=="ollama"))try{let k=mn(v,h);await k.initialize({apiKey:x,baseUrl:R.baseUrl,organization:R.organization,timeoutMs:c.defaults.timeoutMs,maxRetries:2}),l.set(v,k)}catch{}}let b=c.defaults.judgeModel??c.models[0]?.id,L=c.models.find(v=>v.id===b),F=L?l.get(L.provider):void 0,O=pn(S,{configDir:r,judgeAdapter:F,judgeModel:L?.model}),_=[];for(let v of c.tests){if(v.skip)continue;let R=gn(v.expect),x=[];for(let k of R){let P=await k.evaluate(O);x.push(...P)}_.push({testName:v.name,assertions:x})}let te=_.reduce((v,R)=>v+R.assertions.length,0),ne=_.reduce((v,R)=>v+R.assertions.filter(x=>x.passed).length,0),oe=te-ne;console.log(),console.log(T.bold("Trace Test Results")),console.log(T.dim("\u2500".repeat(50)));for(let{testName:v,assertions:R}of _){let k=R.every(P=>P.passed)?T.green("\u2713"):T.red("\u2717");console.log(`${k} ${v}`);for(let P of R){let Ne=P.passed?T.green(" \u2713"):T.red(" \u2717"),Le=P.failureMessage?`${P.label}: ${P.failureMessage}`:P.label;console.log(`${Ne} ${Le}`)}}console.log(),console.log(`${T.bold("Total:")} ${ne} passed, ${oe} failed out of ${te} assertions`);let re=hn(c.gates,[]);if(!re.passed)for(let v of re.gates.filter(R=>!R.passed))console.log(T.red(`Gate failed: ${v.message}`));process.exit(oe>0?1:0)}catch(o){n.fail(`Trace command failed: ${o instanceof Error?o.message:String(o)}`),process.exit(1)}})}function Me(){let e=new yn;return e.name("kindlm").description("AI agent behavioral regression testing").version("0.4.0"),se(e),ie(e),ye(e),we(e),ke(e),Ie(e),Ae(e),e}process.on("unhandledRejection",e=>{let t=e instanceof Error?e.message:String(e);process.stderr.write(`
53
53
  Unhandled error: ${t}
54
54
  `),process.exit(1)});process.on("uncaughtException",e=>{process.stderr.write(`
55
55
  Fatal error: ${e.message}