@kindlm/cli 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -48,5 +48,5 @@ Interrupted. Exiting...`)),process.exit(130)};process.on("SIGINT",o);try{return
48
48
  `).trim()}),o=s[2]?.trim()??"",r=2,a=[]):a.push(i)}return(o||a.length>0)&&n.push({heading:o,headingLevel:r,body:a.join(`
49
49
  `).trim()}),n}function ut(e){return e.match(/^# (.+)$/m)?.[1]?.trim()??"KindLM Compliance Report"}function ft(e){return e.match(/SHA-256:\s*`([a-f0-9]+)`/i)?.[1]??null}function ge(e){switch(e){case 2:return 18;case 3:return 15;case 4:return 13;default:return 13}}function gt(e){if(e.length<2)return null;let t=e[0]??"",n=e[1]??"";if(!t.includes("|")||!n.match(/^\s*\|[-:\s|]+\|\s*$/))return null;let o=i=>i.split("|").slice(1,-1).map(s=>s.trim()),r={cells:o(t)},a=[];for(let i=2;i<e.length;i++){let s=e[i]??"";if(!s.includes("|"))break;a.push({cells:o(s)})}return{header:r,rows:a}}function N(e,t){let n=e.page.margins.bottom;e.page.height-n-30-e.y<t&&(e.addPage(),pe(e))}function pe(e){let t=new Date().toISOString();e.fontSize(8).font("Helvetica").fillColor("#a8a29e").text("KindLM Compliance Report",60,40),e.text(t,60,40,{align:"right"}),e.moveDown(3)}function pt(e){e.fontSize(8).font("Helvetica").fillColor("#a8a29e").text("Generated by KindLM \xB7 kindlm.com",60,e.page.height-50,{align:"center",width:e.page.width-120})}function ht(e,t,n){let o=t.header.cells.length,r=n/o,a=e.page.margins.left,i=18;N(e,i*2);let s=(c,m,u)=>{let f=e.y;u&&(e.save(),e.rect(a,f-2,n,i).fill(u),e.restore());for(let y=0;y<c.length;y++){let d=a+y*r;e.fontSize(8).font(m?"Helvetica-Bold":"Courier").fillColor("#44403c").text(c[y]??"",d+4,f,{width:r-8,height:i,lineBreak:!1})}e.y=f+i};s(t.header.cells,!0,"#f5f5f4");for(let c of t.rows)N(e,i),s(c.cells,!1)}async function he(e,t){return await lt(dt(t),{recursive:!0}),new Promise((n,o)=>{let r=new at({size:"A4",margins:{top:72,bottom:72,left:60,right:60},info:{Title:"KindLM EU AI Act Compliance Report",Author:"KindLM",Creator:"KindLM CLI"}}),a=ct(t);r.pipe(a);let i=r.page.width-r.page.margins.left-r.page.margins.right,s=ut(e),c=ft(e);r.moveDown(6),r.fontSize(28).font("Helvetica-Bold").fillColor("#1c1917").text(s,{align:"center",width:i}),r.moveDown(.5),r.fontSize(14).font("Helvetica").fillColor("#57534e").text("EU AI Act Annex IV Documentation",{align:"center",width:i}),r.moveDown(1),r.fontSize(10).fillColor("#a8a29e").text(`Generated: ${new Date().toISOString()}`,{align:"center",width:i}),c&&(r.moveDown(.3),r.fontSize(9).font("Courier").fillColor("#78716c").text(`SHA-256: ${c}`,{align:"center",width:i})),r.moveDown(2),r.fontSize(10).font("Helvetica").fillColor("#6366f1").text("kindlm.com",{align:"center",link:"https://kindlm.com",width:i});let m=mt(e);for(let u of m){if(r.addPage(),pe(r),u.heading){let C=ge(u.headingLevel);r.fontSize(C).font("Helvetica-Bold").fillColor("#1c1917").text(u.heading,{width:i}),r.moveDown(.5),r.moveTo(60,r.y).lineTo(60+i,r.y).strokeColor("#e7e5e4").lineWidth(1).stroke(),r.moveDown(.8)}let f=u.body.split(`
50
50
  `),y=!1,d=0;for(;d<f.length;){let C=f[d]??"";if(C.startsWith("```")){y=!y,y&&N(r,30),d++;continue}if(y){N(r,14);let l=r.y;r.save(),r.rect(r.page.margins.left,l-2,i,14).fill("#f5f5f4"),r.restore(),r.fontSize(9).font("Courier").fillColor("#44403c").text(C,{width:i}),d++;continue}if(!C.trim()){r.moveDown(.4),d++;continue}let S=f[d+1]??"";if(C.includes("|")&&d+1<f.length&&S.match(/^\s*\|[-:\s|]+\|\s*$/)){let l=[],g=d;for(;g<f.length&&(f[g]??"").includes("|");)l.push(f[g]??""),g++;let b=gt(l);if(b){ht(r,b,i),d=g;continue}}if(C.match(/^\s*\|[-:]+/)||C.match(/^---+$/)){d++;continue}let h=C.match(/^(#{3,4})\s+(.+)$/);if(h?.[1]&&h[2]){let l=h[1].length,g=ge(l);N(r,g+10),r.moveDown(.3),r.fontSize(g).font("Helvetica-Bold").fillColor("#1c1917").text(h[2].trim(),{width:i}),r.moveDown(.3),d++;continue}if(C.match(/^\s*[-*] /)){N(r,14),r.fontSize(10).font("Helvetica").fillColor("#44403c").text(C.trim(),{indent:12,width:i-12}),d++;continue}N(r,14),r.fontSize(10).font("Helvetica").fillColor("#44403c").text(C.trim(),{width:i}),d++}pt(r)}r.end(),a.on("finish",()=>n(t)),a.on("error",o)})}function ye(e){e.command("test").description("Run test suites").option("-s, --suite <name>","Run a specific suite").option("--compliance","Generate compliance report").option("--reporter <type>","Output format: pretty, json, junit","pretty").option("--runs <count>","Override run count").option("--gate <percent>","Fail if pass rate below threshold").option("--pdf <path>","Export compliance report as PDF (requires --compliance)").option("-c, --config <path>","Path to config file","kindlm.yaml").action(async t=>{try{let{runnerResult:n,config:o,yamlContent:r}=await B({configPath:t.config,runs:t.runs?parseInt(t.runs,10):void 0,gate:t.gate?parseFloat(t.gate):void 0}),{runResult:a,aggregated:i}=n,s=yt(o.gates,i),m=bt(t.reporter).generate(a,s);if(console.log(m.content),t.compliance){let y=wt().generate(a,s);if(console.log(""),console.log(y.content),t.pdf){let d=await he(y.content,t.pdf);console.log(""),console.log(w.green(`PDF report saved to ${d}`))}}try{me({runnerResult:n,suiteName:o.suite.name,configHash:fe(r),timestamp:new Date().toISOString()})}catch{}let u=a.failed===0&&a.errored===0&&s.passed;process.exit(u?0:1)}catch(n){if(n instanceof St){let o=n.code==="TIMEOUT"?"Provider timeout":n.code==="NETWORK_ERROR"?"Network error":n.code==="AUTH_FAILED"?"Authentication failed":n.code==="RATE_LIMITED"?"Rate limited":`Provider error (${n.code})`;console.error(w.red(`${o}: ${n.message}`)),n.retryable&&console.error(w.yellow("This error may be transient. Try again or increase --timeout."))}else if(kt(n)){let r=n.code.startsWith("CONFIG_")?"Config error":"Error";console.error(w.red(`${r}: ${n.message}`))}else n instanceof Error&&n.name==="AbortError"?console.error(w.red("Request timed out. Check network connectivity or increase timeout.")):console.error(w.red(`Error: ${n instanceof Error?n.message:String(n)}`));process.exit(1)}})}var Tt={bold:e=>w.bold(e),red:e=>w.red(e),green:e=>w.green(e),yellow:e=>w.yellow(e),cyan:e=>w.cyan(e),dim:e=>w.dim(e),greenBold:e=>w.green.bold(e),redBold:e=>w.red.bold(e)};function bt(e){switch(e){case"json":return vt();case"junit":return Rt();default:return Ct(Tt)}}function kt(e){return typeof e=="object"&&e!==null&&"code"in e&&"message"in e&&typeof e.code=="string"&&typeof e.message=="string"}import{resolve as W,dirname as q,join as V}from"path";import{readFileSync as $t}from"fs";import p from"chalk";import{parseConfig as Et,readBaseline as Ot,writeBaseline as At,listBaselines as Mt,buildBaselineData as Re,compareBaseline as Nt,deserializeBaseline as Lt}from"@kindlm/core";import{readFileSync as xt,writeFileSync as Pt,mkdirSync as Ce,readdirSync as It}from"fs";import{join as X}from"path";function ve(e){return e.replace(/[^a-zA-Z0-9_-]/g,"_")}function J(e){let t=X(e,"baselines");return{read(n){let o=X(t,`${ve(n)}.json`);try{return{success:!0,data:xt(o,"utf-8")}}catch{return{success:!1,error:{code:"BASELINE_NOT_FOUND",message:`No baseline found for suite "${n}" at ${o}`}}}},write(n,o){try{Ce(t,{recursive:!0});let r=X(t,`${ve(n)}.json`);return Pt(r,o,"utf-8"),{success:!0,data:void 0}}catch(r){return{success:!1,error:{code:"UNKNOWN_ERROR",message:`Failed to write baseline: ${r instanceof Error?r.message:String(r)}`}}}},list(){try{return Ce(t,{recursive:!0}),{success:!0,data:It(t).filter(r=>r.endsWith(".json")).map(r=>r.replace(/\.json$/,""))}}catch(n){return{success:!1,error:{code:"UNKNOWN_ERROR",message:`Failed to list baselines: ${n instanceof Error?n.message:String(n)}`}}}}}}function we(e){let t=e.command("baseline").description("Manage test baselines");t.command("set").description("Save current results as baseline").option("-c, --config <path>","Path to config file","kindlm.yaml").option("--runs <count>","Override run count").action(async n=>{try{let o=q(W(process.cwd(),n.config)),r=V(o,".kindlm"),a=J(r),{config:i,runnerResult:s}=await B({configPath:n.config,runs:n.runs?parseInt(n.runs,10):void 0}),{aggregated:c}=s,m=Re(i.suite.name,c,new Date().toISOString()),u=At(m,a);u.success||(console.error(p.red(`Failed to save baseline: ${u.error.message}`)),process.exit(1));let f=Object.keys(m.results).length;console.log(""),console.log(p.green(`Baseline saved for suite "${i.suite.name}" (${f} test${f===1?"":"s"})`)),console.log(p.dim(` Location: ${r}/baselines/`)),process.exit(0)}catch(o){console.error(p.red(`Error: ${o instanceof Error?o.message:String(o)}`)),process.exit(1)}}),t.command("compare").description("Compare latest against baseline").option("-c, --config <path>","Path to config file","kindlm.yaml").option("--runs <count>","Override run count").action(async n=>{try{let o=q(W(process.cwd(),n.config)),r=V(o,".kindlm"),a=J(r),i=W(process.cwd(),n.config),s;try{s=$t(i,"utf-8")}catch{console.error(p.red(`Config file not found: ${i}`)),process.exit(1)}let c=A(),m=Et(s,{configDir:o,fileReader:c});m.success||(console.error(p.red(`Config validation failed: ${m.error.message}`)),process.exit(1));let u=m.data.suite.name,f=Ot(u,a);f.success||(f.error.code==="BASELINE_NOT_FOUND"?console.error(p.red(`No baseline found for suite "${u}". Run \`kindlm baseline set\` first.`)):console.error(p.red(`Failed to read baseline: ${f.error.message}`)),process.exit(1));let y=f.data,{runnerResult:d}=await B({configPath:n.config,runs:n.runs?parseInt(n.runs,10):void 0,baselineData:y}),{aggregated:C}=d,S=Re(u,C,new Date().toISOString()),h=Nt(y,S.results);if(console.log(""),console.log(p.bold(`Baseline comparison for "${u}"`)),console.log(p.dim(` Baseline from: ${y.createdAt}`)),console.log(""),h.regressions.length>0){console.log(p.red.bold(` Regressions (${h.regressions.length}):`));for(let l of h.regressions)console.log(p.red(` ${l.testName}: ${U(l.baselinePassRate)} \u2192 ${U(l.currentPassRate)}`)),l.newFailureCodes.length>0&&console.log(p.red(` New failures: ${l.newFailureCodes.join(", ")}`));console.log("")}if(h.improvements.length>0){console.log(p.green.bold(` Improvements (${h.improvements.length}):`));for(let l of h.improvements)console.log(p.green(` ${l.testName}: ${U(l.baselinePassRate)} \u2192 ${U(l.currentPassRate)}`));console.log("")}if(h.unchanged.length>0){console.log(p.dim(` Unchanged (${h.unchanged.length}):`));for(let l of h.unchanged)console.log(p.dim(` ${l.testName}: ${U(l.passRate)}`));console.log("")}if(h.newTests.length>0){console.log(p.cyan(` New tests (${h.newTests.length}):`));for(let l of h.newTests)console.log(p.cyan(` ${l}`));console.log("")}if(h.removedTests.length>0){console.log(p.yellow(` Removed tests (${h.removedTests.length}):`));for(let l of h.removedTests)console.log(p.yellow(` ${l}`));console.log("")}process.exit(h.regressions.length>0?1:0)}catch(o){console.error(p.red(`Error: ${o instanceof Error?o.message:String(o)}`)),process.exit(1)}}),t.command("list").description("List saved baselines").option("-c, --config <path>","Path to config file","kindlm.yaml").action(n=>{try{let o=q(W(process.cwd(),n.config)),r=V(o,".kindlm"),a=J(r),i=Mt(a);i.success||(console.error(p.red(`Failed to list baselines: ${i.error.message}`)),process.exit(1));let s=i.data;s.length===0&&(console.log(p.dim("No baselines saved yet. Run `kindlm baseline set` to create one.")),process.exit(0)),console.log(p.bold("Saved baselines:")),console.log("");for(let c of s){let m=a.read(c);if(!m.success){console.log(` ${c} ${p.dim("(unreadable)")}`);continue}let u=Lt(m.data);if(!u.success){console.log(` ${c} ${p.dim("(corrupt)")}`);continue}let f=Object.keys(u.data.results).length;console.log(` ${p.cyan(u.data.suiteName)} \u2014 ${f} test${f===1?"":"s"}, saved ${p.dim(u.data.createdAt)}`)}process.exit(0)}catch(o){console.error(p.red(`Error: ${o instanceof Error?o.message:String(o)}`)),process.exit(1)}})}function U(e){return`${(e*100).toFixed(1)}%`}import{createInterface as zt}from"readline";import{Writable as Gt}from"stream";import E from"chalk";import{readFileSync as _t,writeFileSync as Dt,mkdirSync as jt,unlinkSync as Ft,chmodSync as Ht}from"fs";import{join as Se}from"path";import{homedir as Te}from"os";function Z(){return Se(Te(),".kindlm","credentials")}function Y(){try{let e=_t(Z(),"utf-8"),t=JSON.parse(e);return typeof t.token=="string"&&t.token.length>0?t.token:null}catch{return null}}function be(e){let t=Z(),n=Se(Te(),".kindlm");jt(n,{recursive:!0,mode:448});let o={token:e,savedAt:new Date().toISOString()};Dt(t,JSON.stringify(o,null,2),{mode:384}),Ht(t,384)}function ke(){try{Ft(Z())}catch{}}var Bt="https://api.kindlm.com";var $=class extends Error{status;constructor(t,n){super(n),this.name="CloudApiError",this.status=t}};function j(){let e=process.env.KINDLM_CLOUD_URL??Bt;if(e.startsWith("http://")&&!Ut(e))throw new Error(`Refusing to use insecure HTTP for Cloud API: ${e}. Use HTTPS or target localhost for development.`);return e}function Ut(e){try{let t=new URL(e);return t.hostname==="localhost"||t.hostname==="127.0.0.1"||t.hostname==="::1"}catch{return!1}}function Kt(e){return new Promise(t=>setTimeout(t,e))}function K(e,t){async function n(o,r,a){let i=`${e}${r}`,s={Authorization:`Bearer ${t}`},c={method:o,headers:s};a!==void 0&&(s["Content-Type"]="application/json",c.body=JSON.stringify(a));let m;for(let u=0;u<=1;u++){u>0&&await Kt(1e3);let f=new AbortController,y=setTimeout(()=>f.abort(),3e4);c.signal=f.signal;try{let d=await fetch(i,c);if(!d.ok){if(d.status>=500&&u<1){m=new $(d.status,`HTTP ${d.status}`);continue}let S=`HTTP ${d.status}`;if((d.headers.get("content-type")??"").includes("application/json"))try{let l=await d.json();l.error&&(S=l.error)}catch{}throw new $(d.status,S)}if(d.status===204)return;let C=d.headers.get("content-type")??"";if(!C.includes("application/json"))throw new $(d.status,`Expected JSON response but got content-type: ${C}`);return await d.json()}catch(d){if(d instanceof $)throw d;if(m=d instanceof Error?d:new Error(String(d)),u<1)continue}finally{clearTimeout(y)}}throw m??new Error("Request failed")}return{baseUrl:e,get:o=>n("GET",o),post:(o,r)=>n("POST",o,r),patch:(o,r)=>n("PATCH",o,r),delete:o=>n("DELETE",o)}}function xe(e){e.command("login").description("Authenticate with KindLM Cloud").option("-t, --token <token>","API token (skips interactive prompt)").option("--status","Show current authentication status").option("--logout","Remove stored credentials").action(async t=>{try{if(t.logout){ke(),console.log(E.green("Logged out. Credentials removed."));return}if(t.status){await Jt();return}let n=t.token??process.env.KINDLM_API_TOKEN??await Wt();n.startsWith("klm_")||(console.error(E.red('Invalid token format. KindLM tokens start with "klm_".')),process.exit(1));let o=K(j(),n);try{await o.get("/v1/auth/tokens")}catch(r){throw r instanceof $&&r.status===401&&(console.error(E.red("Invalid or expired token.")),process.exit(1)),r}be(n),console.log(E.green("Authenticated successfully. Token saved."))}catch(n){console.error(E.red(`Login failed: ${n instanceof Error?n.message:String(n)}`)),process.exit(1)}})}async function Jt(){let e=Y();if(!e){console.log(E.yellow('Not authenticated. Run "kindlm login" to authenticate.'));return}let t=K(j(),e);try{await t.get("/v1/auth/tokens"),console.log(E.green("Authenticated.")),console.log(` Cloud URL: ${j()}`)}catch(n){n instanceof $&&n.status===401?console.log(E.yellow('Stored token is invalid or expired. Run "kindlm login" to re-authenticate.')):console.log(E.yellow(`Cannot reach Cloud API: ${n instanceof Error?n.message:String(n)}`))}}function Wt(){return new Promise((e,t)=>{let n=new Gt({write(r,a,i){i()}});process.stderr.write("Paste your KindLM API token: ");let o=zt({input:process.stdin,output:n,terminal:!0});o.question("",r=>{o.close(),process.stderr.write(`
51
- `);let a=r.trim();if(!a){t(new Error("No token provided"));return}e(a)})})}import{basename as Vt}from"path";import{execSync as Zt}from"child_process";import ee from"chalk";import{execSync as Q}from"child_process";function Pe(){try{let e=Q("git rev-parse HEAD",{encoding:"utf-8"}).trim()||null,t=Q("git rev-parse --abbrev-ref HEAD",{encoding:"utf-8"}).trim()||null,o=Q("git status --porcelain",{encoding:"utf-8"}).trim().length>0;return{commitSha:e,branch:t,dirty:o}}catch{return{commitSha:null,branch:null,dirty:!1}}}function Ie(){return process.env.GITHUB_ACTIONS?{name:"github_actions",isCI:!0,commitSha:process.env.GITHUB_SHA??null,branch:process.env.GITHUB_REF_NAME??null}:process.env.GITLAB_CI?{name:"gitlab_ci",isCI:!0,commitSha:process.env.CI_COMMIT_SHA??null,branch:process.env.CI_COMMIT_BRANCH??null}:process.env.CI?{name:null,isCI:!0,commitSha:null,branch:null}:{name:null,isCI:!1,commitSha:null,branch:null}}function z(e){return encodeURIComponent(e)}async function $e(e,t,n){let o=await Yt(e,n.projectName),r=await Xt(e,o,n.suiteName,n.configHash),a=await e.post(`/v1/runs/${z(o)}/runs`,{suiteId:r,commitSha:n.commitSha,branch:n.branch,environment:n.environment,triggeredBy:n.triggeredBy}),i=qt(t.aggregated),s=50;for(let l=0;l<i.length;l+=s){let g=i.slice(l,l+s);await e.post(`/v1/results/${z(a.id)}/results`,{results:g})}let{runResult:c}=t,m=c.totalTests>0?c.passed/c.totalTests:0,u=new Set(t.aggregated.map(l=>l.modelId)),f=t.aggregated.map(l=>l.assertionScores.judge?.mean).filter(l=>l!==void 0),y=f.length>0?f.reduce((l,g)=>l+g,0)/f.length:void 0,d=t.aggregated.map(l=>l.latencyAvgMs),C=d.length>0?d.reduce((l,g)=>l+g,0)/d.length:void 0,S=t.aggregated.reduce((l,g)=>l+g.totalCostUsd,0),h=S>0?S:void 0;return await e.patch(`/v1/runs/${z(a.id)}`,{status:"completed",passRate:m,testCount:c.totalTests,modelCount:u.size,judgeAvgScore:y,latencyAvgMs:C,costEstimateUsd:h,finishedAt:new Date().toISOString()}),{runId:a.id,projectId:o}}async function Yt(e,t){let{projects:n}=await e.get("/v1/projects"),o=n.find(a=>a.name===t);return o?o.id:(await e.post("/v1/projects",{name:t})).id}async function Xt(e,t,n,o){let{suites:r}=await e.get(`/v1/suites/${z(t)}/suites`),a=r.find(s=>s.name===n);return a?a.id:(await e.post(`/v1/suites/${z(t)}/suites`,{name:n,configHash:o})).id}function qt(e){return e.map(t=>{let n=t,o=t.runs.length>0?t.runs.flatMap(r=>r.assertions.filter(a=>!a.passed).map(a=>a.failureMessage)).filter(r=>r!==void 0):n.failureMessages??[];return{testCaseName:t.testCaseName,modelId:t.modelId,passed:t.passed?1:0,passRate:t.passRate,runCount:t.runCount,judgeAvg:t.assertionScores.judge?.mean??null,driftScore:t.assertionScores.drift?.mean??null,latencyAvgMs:t.latencyAvgMs??null,costUsd:t.totalCostUsd??null,totalTokens:t.totalTokens??null,failureCodes:t.failureCodes.length>0?JSON.stringify(t.failureCodes):null,failureMessages:o.length>0?JSON.stringify(o):null,assertionScores:Object.keys(t.assertionScores).length>0?JSON.stringify(t.assertionScores):null}})}function Ee(e){e.command("upload").description("Push last run results to KindLM Cloud").option("-t, --token <token>","API token (overrides stored token)").option("-p, --project <name>","Project name").action(async t=>{try{let n=t.token??process.env.KINDLM_API_TOKEN??Y();n||(console.error(ee.red('Not authenticated. Run "kindlm login" first or pass --token.')),process.exit(1));let o=ue();o||(console.error(ee.red('No test run found. Run "kindlm test" first.')),process.exit(1));let r=Pe(),a=Ie(),i=t.project??en(),s=K(j(),n),c=D();c.start("Uploading results to KindLM Cloud...");try{let m=await $e(s,o.runnerResult,{projectName:i,suiteName:o.suiteName,configHash:o.configHash,commitSha:a.commitSha??r.commitSha??void 0,branch:a.branch??r.branch??void 0,environment:a.isCI?"ci":"local",triggeredBy:a.name??"local"});c.succeed("Uploaded successfully."),console.log(` Run ID: ${m.runId}`),console.log(` Project: ${i}`),console.log(` Suite: ${o.suiteName}`)}catch(m){throw c.fail("Upload failed."),m}}catch(n){console.error(ee.red(`Upload failed: ${n instanceof Error?n.message:String(n)}`)),process.exit(1)}})}function Qt(e){try{let o=new URL(e).pathname.split("/").filter(Boolean),r=o[o.length-1];if(r)return r.replace(/\.git$/,"")}catch{}let t=e.match(/^[\w.-]+@[\w.-]+:(.+?)(?:\.git)?$/);if(t?.[1]){let n=t[1].split("/");return n[n.length-1]??null}return null}function en(){try{let e=Zt("git remote get-url origin",{encoding:"utf-8"}).trim(),t=Qt(e);if(t)return t}catch{}return Vt(process.cwd())}import{readFileSync as on,statSync as rn}from"fs";import{resolve as sn,dirname as an}from"path";import{spawn as cn}from"child_process";import T from"chalk";import{parseConfig as ln,createProvider as dn,filterSpans as mn,mapSpansToResult as un,buildContextFromTrace as fn,createAssertionsFromExpect as gn,evaluateGates as pn}from"@kindlm/core";import{createServer as tn}from"http";import{parseOtlpPayload as nn}from"@kindlm/core";function Oe(e){let t=[],n=null,o=[];function r(){for(let i of o)i()}function a(i,s){if(s.setHeader("Access-Control-Allow-Origin","*"),s.setHeader("Access-Control-Allow-Methods","POST, OPTIONS"),s.setHeader("Access-Control-Allow-Headers","Content-Type"),i.method==="OPTIONS"){s.writeHead(204),s.end();return}if(i.method!=="POST"||i.url!=="/v1/traces"){s.writeHead(404,{"Content-Type":"application/json"}),s.end(JSON.stringify({error:"Not found"}));return}let c=[];i.on("data",m=>c.push(m)),i.on("end",()=>{try{let m=Buffer.concat(c).toString("utf-8"),u=JSON.parse(m),f=nn(u);f.success?(t.push(...f.data),r(),s.writeHead(200,{"Content-Type":"application/json"}),s.end(JSON.stringify({partialSuccess:{}}))):(s.writeHead(400,{"Content-Type":"application/json"}),s.end(JSON.stringify({error:f.error.message})))}catch{s.writeHead(400,{"Content-Type":"application/json"}),s.end(JSON.stringify({error:"Invalid JSON"}))}})}return{start(){return new Promise((i,s)=>{n=tn(a),n.on("error",s),n.listen(e,()=>i())})},stop(){return new Promise(i=>{n?n.close(()=>i()):i()})},getSpans(){return[...t]},waitForSpans({timeoutMs:i}){return new Promise(s=>{if(t.length>0){s([...t]);return}let c=setTimeout(()=>{o=o.filter(u=>u!==m),s([...t])},i),m=()=>{clearTimeout(c),o=o.filter(u=>u!==m),setTimeout(()=>s([...t]),500)};o.push(m)})}}}function Ae(e){e.command("trace").description("Ingest OpenTelemetry traces and run assertions against them").option("-c, --config <path>","Config file path","kindlm.yaml").option("--port <port>","OTLP HTTP port","4318").option("--command <cmd>","Command to spawn (traces are collected while it runs)").option("--timeout <ms>","Timeout in ms to wait for traces","30000").option("--reporter <type>","Report format: pretty, json, junit","pretty").action(async t=>{let n=D();try{let o=sn(process.cwd(),t.config),r=an(o);try{rn(o).size>1048576&&(console.error(T.red("Config file exceeds 1MB limit")),process.exit(1))}catch{console.error(T.red(`Config file not found: ${o}`)),process.exit(1)}let a;try{a=on(o,"utf-8")}catch{console.error(T.red(`Config file not found: ${o}`)),process.exit(1)}let i=A(),s=ln(a,{configDir:r,fileReader:i});s.success||(console.error(T.red(`Config validation failed: ${s.error.message}`)),process.exit(1));let c=s.data,m=c.trace??{port:parseInt(t.port,10),timeoutMs:parseInt(t.timeout,10),spanMapping:{outputTextAttr:"gen_ai.completion.0.content",modelAttr:"gen_ai.response.model",systemAttr:"gen_ai.system",inputTokensAttr:"gen_ai.usage.input_tokens",outputTokensAttr:"gen_ai.usage.output_tokens"}},u=parseInt(t.port,10)||m.port,f=parseInt(t.timeout,10)||m.timeoutMs,y=Oe(u);await y.start(),n.start(`Listening for OTLP traces on port ${u}...`),t.command&&cn("sh",["-c",t.command],{cwd:r,env:{...process.env,OTEL_EXPORTER_OTLP_ENDPOINT:`http://localhost:${u}`,OTEL_EXPORTER_OTLP_PROTOCOL:"http/json"},stdio:"inherit"}).on("error",R=>{n.fail(`Command failed: ${R.message}`)});let d=await y.waitForSpans({timeoutMs:f});await y.stop(),d.length===0&&(n.fail("No traces received"),process.exit(1)),n.succeed(`Received ${d.length} spans`);let C=mn(d,m.spanFilter),S=un(C,m.spanMapping),h=G(),l=new Map,g=c.providers;for(let[v,R]of Object.entries(g)){if(!R)continue;let k="";if(R.apiKeyEnv){let x=process.env[R.apiKeyEnv];x&&(k=x.trim())}if(!(!k&&v!=="ollama"))try{let x=dn(v,h);await x.initialize({apiKey:k,baseUrl:R.baseUrl,organization:R.organization,timeoutMs:c.defaults.timeoutMs,maxRetries:2}),l.set(v,x)}catch{}}let b=c.defaults.judgeModel??c.models[0]?.id,L=c.models.find(v=>v.id===b),F=L?l.get(L.provider):void 0,O=fn(S,{configDir:r,judgeAdapter:F,judgeModel:L?.model}),_=[];for(let v of c.tests){if(v.skip)continue;let R=gn(v.expect),k=[];for(let x of R){let P=await x.evaluate(O);k.push(...P)}_.push({testName:v.name,assertions:k})}let te=_.reduce((v,R)=>v+R.assertions.length,0),ne=_.reduce((v,R)=>v+R.assertions.filter(k=>k.passed).length,0),oe=te-ne;console.log(),console.log(T.bold("Trace Test Results")),console.log(T.dim("\u2500".repeat(50)));for(let{testName:v,assertions:R}of _){let x=R.every(P=>P.passed)?T.green("\u2713"):T.red("\u2717");console.log(`${x} ${v}`);for(let P of R){let Me=P.passed?T.green(" \u2713"):T.red(" \u2717"),Ne=P.failureMessage?`${P.label}: ${P.failureMessage}`:P.label;console.log(`${Me} ${Ne}`)}}console.log(),console.log(`${T.bold("Total:")} ${ne} passed, ${oe} failed out of ${te} assertions`);let re=pn(c.gates,[]);if(!re.passed)for(let v of re.gates.filter(R=>!R.passed))console.log(T.red(`Gate failed: ${v.message}`));process.exit(oe>0?1:0)}catch(o){n.fail(`Trace command failed: ${o instanceof Error?o.message:String(o)}`),process.exit(1)}})}function dr(){let e=new hn;return e.name("kindlm").description("AI agent behavioral regression testing").version("0.0.0"),se(e),ie(e),ye(e),we(e),xe(e),Ee(e),Ae(e),e}export{dr as createProgram};
51
+ `);let a=r.trim();if(!a){t(new Error("No token provided"));return}e(a)})})}import{basename as Vt}from"path";import{execSync as Zt}from"child_process";import ee from"chalk";import{execSync as Q}from"child_process";function Pe(){try{let e=Q("git rev-parse HEAD",{encoding:"utf-8"}).trim()||null,t=Q("git rev-parse --abbrev-ref HEAD",{encoding:"utf-8"}).trim()||null,o=Q("git status --porcelain",{encoding:"utf-8"}).trim().length>0;return{commitSha:e,branch:t,dirty:o}}catch{return{commitSha:null,branch:null,dirty:!1}}}function Ie(){return process.env.GITHUB_ACTIONS?{name:"github_actions",isCI:!0,commitSha:process.env.GITHUB_SHA??null,branch:process.env.GITHUB_REF_NAME??null}:process.env.GITLAB_CI?{name:"gitlab_ci",isCI:!0,commitSha:process.env.CI_COMMIT_SHA??null,branch:process.env.CI_COMMIT_BRANCH??null}:process.env.CI?{name:null,isCI:!0,commitSha:null,branch:null}:{name:null,isCI:!1,commitSha:null,branch:null}}function z(e){return encodeURIComponent(e)}async function $e(e,t,n){let o=await Yt(e,n.projectName),r=await Xt(e,o,n.suiteName,n.configHash),a=await e.post(`/v1/runs/${z(o)}/runs`,{suiteId:r,commitSha:n.commitSha,branch:n.branch,environment:n.environment,triggeredBy:n.triggeredBy}),i=qt(t.aggregated),s=50;for(let l=0;l<i.length;l+=s){let g=i.slice(l,l+s);await e.post(`/v1/results/${z(a.id)}/results`,{results:g})}let{runResult:c}=t,m=c.totalTests>0?c.passed/c.totalTests:0,u=new Set(t.aggregated.map(l=>l.modelId)),f=t.aggregated.map(l=>l.assertionScores.judge?.mean).filter(l=>l!==void 0),y=f.length>0?f.reduce((l,g)=>l+g,0)/f.length:void 0,d=t.aggregated.map(l=>l.latencyAvgMs),C=d.length>0?d.reduce((l,g)=>l+g,0)/d.length:void 0,S=t.aggregated.reduce((l,g)=>l+g.totalCostUsd,0),h=S>0?S:void 0;return await e.patch(`/v1/runs/${z(a.id)}`,{status:"completed",passRate:m,testCount:c.totalTests,modelCount:u.size,judgeAvgScore:y,latencyAvgMs:C,costEstimateUsd:h,finishedAt:new Date().toISOString()}),{runId:a.id,projectId:o}}async function Yt(e,t){let{projects:n}=await e.get("/v1/projects"),o=n.find(a=>a.name===t);return o?o.id:(await e.post("/v1/projects",{name:t})).id}async function Xt(e,t,n,o){let{suites:r}=await e.get(`/v1/suites/${z(t)}/suites`),a=r.find(s=>s.name===n);return a?a.id:(await e.post(`/v1/suites/${z(t)}/suites`,{name:n,configHash:o})).id}function qt(e){return e.map(t=>{let n=t,o=t.runs.length>0?t.runs.flatMap(r=>r.assertions.filter(a=>!a.passed).map(a=>a.failureMessage)).filter(r=>r!==void 0):n.failureMessages??[];return{testCaseName:t.testCaseName,modelId:t.modelId,passed:t.passed?1:0,passRate:t.passRate,runCount:t.runCount,judgeAvg:t.assertionScores.judge?.mean??null,driftScore:t.assertionScores.drift?.mean??null,latencyAvgMs:t.latencyAvgMs??null,costUsd:t.totalCostUsd??null,totalTokens:t.totalTokens??null,failureCodes:t.failureCodes.length>0?JSON.stringify(t.failureCodes):null,failureMessages:o.length>0?JSON.stringify(o):null,assertionScores:Object.keys(t.assertionScores).length>0?JSON.stringify(t.assertionScores):null}})}function Ee(e){e.command("upload").description("Push last run results to KindLM Cloud").option("-t, --token <token>","API token (overrides stored token)").option("-p, --project <name>","Project name").action(async t=>{try{let n=t.token??process.env.KINDLM_API_TOKEN??Y();n||(console.error(ee.red('Not authenticated. Run "kindlm login" first or pass --token.')),process.exit(1));let o=ue();o||(console.error(ee.red('No test run found. Run "kindlm test" first.')),process.exit(1));let r=Pe(),a=Ie(),i=t.project??en(),s=K(j(),n),c=D();c.start("Uploading results to KindLM Cloud...");try{let m=await $e(s,o.runnerResult,{projectName:i,suiteName:o.suiteName,configHash:o.configHash,commitSha:a.commitSha??r.commitSha??void 0,branch:a.branch??r.branch??void 0,environment:a.isCI?"ci":"local",triggeredBy:a.name??"local"});c.succeed("Uploaded successfully."),console.log(` Run ID: ${m.runId}`),console.log(` Project: ${i}`),console.log(` Suite: ${o.suiteName}`)}catch(m){throw c.fail("Upload failed."),m}}catch(n){console.error(ee.red(`Upload failed: ${n instanceof Error?n.message:String(n)}`)),process.exit(1)}})}function Qt(e){try{let o=new URL(e).pathname.split("/").filter(Boolean),r=o[o.length-1];if(r)return r.replace(/\.git$/,"")}catch{}let t=e.match(/^[\w.-]+@[\w.-]+:(.+?)(?:\.git)?$/);if(t?.[1]){let n=t[1].split("/");return n[n.length-1]??null}return null}function en(){try{let e=Zt("git remote get-url origin",{encoding:"utf-8"}).trim(),t=Qt(e);if(t)return t}catch{}return Vt(process.cwd())}import{readFileSync as on,statSync as rn}from"fs";import{resolve as sn,dirname as an}from"path";import{spawn as cn}from"child_process";import T from"chalk";import{parseConfig as ln,createProvider as dn,filterSpans as mn,mapSpansToResult as un,buildContextFromTrace as fn,createAssertionsFromExpect as gn,evaluateGates as pn}from"@kindlm/core";import{createServer as tn}from"http";import{parseOtlpPayload as nn}from"@kindlm/core";function Oe(e){let t=[],n=null,o=[];function r(){for(let i of o)i()}function a(i,s){if(s.setHeader("Access-Control-Allow-Origin","*"),s.setHeader("Access-Control-Allow-Methods","POST, OPTIONS"),s.setHeader("Access-Control-Allow-Headers","Content-Type"),i.method==="OPTIONS"){s.writeHead(204),s.end();return}if(i.method!=="POST"||i.url!=="/v1/traces"){s.writeHead(404,{"Content-Type":"application/json"}),s.end(JSON.stringify({error:"Not found"}));return}let c=[];i.on("data",m=>c.push(m)),i.on("end",()=>{try{let m=Buffer.concat(c).toString("utf-8"),u=JSON.parse(m),f=nn(u);f.success?(t.push(...f.data),r(),s.writeHead(200,{"Content-Type":"application/json"}),s.end(JSON.stringify({partialSuccess:{}}))):(s.writeHead(400,{"Content-Type":"application/json"}),s.end(JSON.stringify({error:f.error.message})))}catch{s.writeHead(400,{"Content-Type":"application/json"}),s.end(JSON.stringify({error:"Invalid JSON"}))}})}return{start(){return new Promise((i,s)=>{n=tn(a),n.on("error",s),n.listen(e,()=>i())})},stop(){return new Promise(i=>{n?n.close(()=>i()):i()})},getSpans(){return[...t]},waitForSpans({timeoutMs:i}){return new Promise(s=>{if(t.length>0){s([...t]);return}let c=setTimeout(()=>{o=o.filter(u=>u!==m),s([...t])},i),m=()=>{clearTimeout(c),o=o.filter(u=>u!==m),setTimeout(()=>s([...t]),500)};o.push(m)})}}}function Ae(e){e.command("trace").description("Ingest OpenTelemetry traces and run assertions against them").option("-c, --config <path>","Config file path","kindlm.yaml").option("--port <port>","OTLP HTTP port","4318").option("--command <cmd>","Command to spawn (traces are collected while it runs)").option("--timeout <ms>","Timeout in ms to wait for traces","30000").option("--reporter <type>","Report format: pretty, json, junit","pretty").action(async t=>{let n=D();try{let o=sn(process.cwd(),t.config),r=an(o);try{rn(o).size>1048576&&(console.error(T.red("Config file exceeds 1MB limit")),process.exit(1))}catch{console.error(T.red(`Config file not found: ${o}`)),process.exit(1)}let a;try{a=on(o,"utf-8")}catch{console.error(T.red(`Config file not found: ${o}`)),process.exit(1)}let i=A(),s=ln(a,{configDir:r,fileReader:i});s.success||(console.error(T.red(`Config validation failed: ${s.error.message}`)),process.exit(1));let c=s.data,m=c.trace??{port:parseInt(t.port,10),timeoutMs:parseInt(t.timeout,10),spanMapping:{outputTextAttr:"gen_ai.completion.0.content",modelAttr:"gen_ai.response.model",systemAttr:"gen_ai.system",inputTokensAttr:"gen_ai.usage.input_tokens",outputTokensAttr:"gen_ai.usage.output_tokens"}},u=parseInt(t.port,10)||m.port,f=parseInt(t.timeout,10)||m.timeoutMs,y=Oe(u);await y.start(),n.start(`Listening for OTLP traces on port ${u}...`),t.command&&cn("sh",["-c",t.command],{cwd:r,env:{...process.env,OTEL_EXPORTER_OTLP_ENDPOINT:`http://localhost:${u}`,OTEL_EXPORTER_OTLP_PROTOCOL:"http/json"},stdio:"inherit"}).on("error",R=>{n.fail(`Command failed: ${R.message}`)});let d=await y.waitForSpans({timeoutMs:f});await y.stop(),d.length===0&&(n.fail("No traces received"),process.exit(1)),n.succeed(`Received ${d.length} spans`);let C=mn(d,m.spanFilter),S=un(C,m.spanMapping),h=G(),l=new Map,g=c.providers;for(let[v,R]of Object.entries(g)){if(!R)continue;let k="";if(R.apiKeyEnv){let x=process.env[R.apiKeyEnv];x&&(k=x.trim())}if(!(!k&&v!=="ollama"))try{let x=dn(v,h);await x.initialize({apiKey:k,baseUrl:R.baseUrl,organization:R.organization,timeoutMs:c.defaults.timeoutMs,maxRetries:2}),l.set(v,x)}catch{}}let b=c.defaults.judgeModel??c.models[0]?.id,L=c.models.find(v=>v.id===b),F=L?l.get(L.provider):void 0,O=fn(S,{configDir:r,judgeAdapter:F,judgeModel:L?.model}),_=[];for(let v of c.tests){if(v.skip)continue;let R=gn(v.expect),k=[];for(let x of R){let P=await x.evaluate(O);k.push(...P)}_.push({testName:v.name,assertions:k})}let te=_.reduce((v,R)=>v+R.assertions.length,0),ne=_.reduce((v,R)=>v+R.assertions.filter(k=>k.passed).length,0),oe=te-ne;console.log(),console.log(T.bold("Trace Test Results")),console.log(T.dim("\u2500".repeat(50)));for(let{testName:v,assertions:R}of _){let x=R.every(P=>P.passed)?T.green("\u2713"):T.red("\u2717");console.log(`${x} ${v}`);for(let P of R){let Me=P.passed?T.green(" \u2713"):T.red(" \u2717"),Ne=P.failureMessage?`${P.label}: ${P.failureMessage}`:P.label;console.log(`${Me} ${Ne}`)}}console.log(),console.log(`${T.bold("Total:")} ${ne} passed, ${oe} failed out of ${te} assertions`);let re=pn(c.gates,[]);if(!re.passed)for(let v of re.gates.filter(R=>!R.passed))console.log(T.red(`Gate failed: ${v.message}`));process.exit(oe>0?1:0)}catch(o){n.fail(`Trace command failed: ${o instanceof Error?o.message:String(o)}`),process.exit(1)}})}function dr(){let e=new hn;return e.name("kindlm").description("AI agent behavioral regression testing").version("0.4.0"),se(e),ie(e),ye(e),we(e),xe(e),Ee(e),Ae(e),e}export{dr as createProgram};
52
52
  //# sourceMappingURL=index.js.map