coding-agent-benchmarks 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,7 +20,7 @@ export declare class BaselineManager {
20
20
  /**
21
21
  * Save a baseline result
22
22
  */
23
- saveBaseline(result: EvaluationResult, adapter: AdapterType, model?: string): void;
23
+ saveBaseline(result: EvaluationResult, adapter: AdapterType, model: string): void;
24
24
  /**
25
25
  * Load a baseline result
26
26
  */
@@ -28,7 +28,7 @@ export declare class BaselineManager {
28
28
  /**
29
29
  * Compare current result with baseline
30
30
  */
31
- compareWithBaseline(result: EvaluationResult, adapter: AdapterType, model?: string): {
31
+ compareWithBaseline(result: EvaluationResult, adapter: AdapterType, model: string): {
32
32
  baselineScore: number;
33
33
  delta: number;
34
34
  isImprovement: boolean;
@@ -1 +1 @@
1
- {"version":3,"file":"baselineManager.d.ts","sourceRoot":"","sources":["../../src/utils/baselineManager.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,EAAE,WAAW,EAAE,gBAAgB,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AAEpE,MAAM,WAAW,YAAY;IAC3B,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,SAAS,EAAE,CAAC;IACxB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,WAAW,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,qBAAa,eAAe;IAC1B,OAAO,CAAC,WAAW,CAAS;gBAEhB,aAAa,EAAE,MAAM;IAIjC;;OAEG;IACH,OAAO,CAAC,eAAe;IAQvB;;OAEG;IACH,YAAY,CACV,MAAM,EAAE,gBAAgB,EACxB,OAAO,EAAE,WAAW,EACpB,KAAK,GAAE,MAAkB,GACxB,IAAI;IAsBP;;OAEG;IACH,YAAY,CACV,OAAO,EAAE,WAAW,EACpB,KAAK,EAAE,MAAM,EACb,UAAU,EAAE,MAAM,GACjB,YAAY,GAAG,IAAI;IAgBtB;;OAEG;IACH,mBAAmB,CACjB,MAAM,EAAE,gBAAgB,EACxB,OAAO,EAAE,WAAW,EACpB,KAAK,GAAE,MAAkB,GACxB;QACD,aAAa,EAAE,MAAM,CAAC;QACtB,KAAK,EAAE,MAAM,CAAC;QACd,aAAa,EAAE,OAAO,CAAC;KACxB,GAAG,IAAI;IAiBR;;OAEG;IACH,aAAa,CAAC,OAAO,EAAE,WAAW,EAAE,KAAK,CAAC,EAAE,MAAM,GAAG,YAAY,EAAE;IA4BnE;;OAEG;IACH,OAAO,CAAC,uBAAuB;IAe/B;;OAEG;IACH,cAAc,CAAC,OAAO,EAAE,WAAW,EAAE,KAAK,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,OAAO;IAgBhF;;OAEG;IACH,kBAAkB,CAAC,OAAO,EAAE,WAAW,EAAE,KAAK,CAAC,EAAE,MAAM,GAAG,MAAM;CA8CjE"}
1
+ {"version":3,"file":"baselineManager.d.ts","sourceRoot":"","sources":["../../src/utils/baselineManager.ts"],"names":[],"mappings":"AAAA;;GAEG;AAIH,OAAO,EAAE,WAAW,EAAE,gBAAgB,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AAEpE,MAAM,WAAW,YAAY;IAC3B,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,SAAS,EAAE,CAAC;IACxB,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,WAAW,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,qBAAa,eAAe;IAC1B,OAAO,CAAC,WAAW,CAAS;gBAEhB,aAAa,EAAE,MAAM;IAIjC;;OAEG;IACH,OAAO,CAAC,eAAe;IAQvB;;OAEG;IACH,YAAY,CACV,MAAM,EAAE,gBAAgB,EACxB,OAAO,EAAE,WAAW,EACpB,KAAK,EAAE,MAAM,GACZ,IAAI;IAsBP;;OAEG;IACH,YAAY,CACV,OAAO,EAAE,WAAW,EACpB,KAAK,EAAE,MAAM,EACb,UAAU,EAAE,MAAM,GACjB,YAAY,GAAG,IAAI;IAgBtB;;OAEG;IACH,mBAAmB,CACjB,MAAM,EAAE,gBAAgB,EACxB,OAAO,EAAE,WAAW,EACpB,KAAK,EAAE,MAAM,GACZ;QACD,aAAa,EAAE,MAAM,CAAC;QACtB,KAAK,EAAE,MAAM,CAAC;QACd,aAAa,EAAE,OAAO,CAAC;KACxB,GAAG,IAAI;IAiBR;;OAEG;IACH,aAAa,CAAC,OAAO,EAAE,WAAW,EAAE,KAAK,CAAC,EAAE,MAAM,GAAG,YAAY,EAAE;IA4BnE;;OAEG;IACH,OAAO,CAAC,uBAAuB;IAe/B;;OAEG;IACH,cAAc,CAAC,OAAO,EAAE,WAAW,EAAE,KAAK,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,OAAO;IAgBhF;;OAEG;IACH,kBAAkB,CAAC,OAAO,EAAE,WAAW,EAAE,KAAK,CAAC,EAAE,MAAM,GAAG,MAAM;CA8CjE"}
@@ -52,7 +52,7 @@ class BaselineManager {
52
52
  /**
53
53
  * Save a baseline result
54
54
  */
55
- saveBaseline(result, adapter, model = 'default') {
55
+ saveBaseline(result, adapter, model) {
56
56
  const baseline = {
57
57
  scenarioId: result.scenario.id,
58
58
  score: result.score,
@@ -90,7 +90,7 @@ class BaselineManager {
90
90
  /**
91
91
  * Compare current result with baseline
92
92
  */
93
- compareWithBaseline(result, adapter, model = 'default') {
93
+ compareWithBaseline(result, adapter, model) {
94
94
  const baseline = this.loadBaseline(adapter, model, result.scenario.id);
95
95
  if (!baseline) {
96
96
  return null;
@@ -1 +1 @@
1
- {"version":3,"file":"baselineManager.js","sourceRoot":"","sources":["../../src/utils/baselineManager.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,uCAAyB;AACzB,2CAA6B;AAY7B,MAAa,eAAe;IAG1B,YAAY,aAAqB;QAC/B,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,aAAa,EAAE,WAAW,CAAC,CAAC;IAC1E,CAAC;IAED;;OAEG;IACK,eAAe,CACrB,OAAoB,EACpB,KAAa,EACb,UAAkB;QAElB,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,OAAO,EAAE,KAAK,EAAE,GAAG,UAAU,OAAO,CAAC,CAAC;IAC3E,CAAC;IAED;;OAEG;IACH,YAAY,CACV,MAAwB,EACxB,OAAoB,EACpB,QAAgB,SAAS;QAEzB,MAAM,QAAQ,GAAiB;YAC7B,UAAU,EAAE,MAAM,CAAC,QAAQ,CAAC,EAAE;YAC9B,KAAK,EAAE,MAAM,CAAC,KAAK;YACnB,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACnC,OAAO;YACP,KAAK;SACN,CAAC;QAEF,MAAM,YAAY,GAAG,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,KAAK,EAAE,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;QAC9E,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;QAEvC,0BAA0B;QAC1B,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YACxB,EAAE,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACzC,CAAC;QAED,sBAAsB;QACtB,EAAE,CAAC,aAAa,CAAC,YAAY,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;IAC7E,CAAC;IAED;;OAEG;IACH,YAAY,CACV,OAAoB,EACpB,KAAa,EACb,UAAkB;QAElB,MAAM,YAAY,GAAG,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,KAAK,EAAE,UAAU,CAAC,CAAC;QAEtE,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,YAAY,CAAC,EAAE,CAAC;YACjC,OAAO,IAAI,CAAC;QACd,CAAC;QAED,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;YACvD,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,CAAiB,CAAC;QAC7C,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,IAAI,CAAC,+BAA+B,UAAU,GAAG,EAAE,KAAK,CAAC,CAAC;YAClE,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED;;OAEG;IACH,mBAAmB,CACjB,MAAwB,EACxB,OAAoB,EACpB,QAAgB,SAAS;QAMzB,MAAM,QAAQ,GAAG,IAAI,CAAC,YAAY,CAAC,OAAO,EAAE,KAAK,EAAE,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;QAEvE,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC;QAC5C,MAAM,aAAa,GAAG,KAAK,GAAG,CAAC,CAAC;QAEhC,OAAO;YACL,aAAa,EAAE,QAAQ,CAAC,KAAK;YAC7B,KAAK;YACL,aAAa;SACd,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,aAAa,CAAC,OAAoB,EAAE,KAAc;QAChD,MAAM,SAAS,GAAmB,EAAE,CAAC;QAErC,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;QACxD,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YAC/B,OAAO,SAAS,CAAC;QACnB,CAAC;QAED,uDAAuD;QACvD,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;YAC9C,IAAI,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC5B,IAAI,CAAC,uBAAuB,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;YACpD,CAAC;QACH,CAAC;aAAM,CAAC;YACN,+BAA+B;YAC/B,MAAM,SAAS,GAAG,EAAE,CAAC,WAAW,CAAC,UAAU,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;YACtE,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;gBAC5B,IAAI,GAAG,CAAC,WAAW,EAAE,EAAE,CAAC;oBACtB,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;oBACjD,IAAI,CAAC,uBAAuB,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;gBACpD,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;OAEG;IACK,uBAAuB,CAAC,GAAW,EAAE,SAAyB;QACpE,MAAM,KAAK,GAAG,EAAE,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;QAClC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;gBAC3B,IAAI,CAAC;oBACH,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,IAAI,CAAC,EAAE,OAAO,CAAC,CAAC;oBAC/D,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAiB,CAAC;oBACrD,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAC3B,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,OAAO,CAAC,IAAI,CAAC,2BAA2B,IAAI,GAAG,EAAE,KAAK,CAAC,CAAC;gBAC1D,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED;;OAEG;IACH,cAAc,CAAC,OAAoB,EAAE,KAAa,EAAE,UAAkB;QACpE,MAAM,YAAY,GAAG,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,KAAK,EAAE,UAAU,CAAC,CAAC;QAEtE,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,YAAY,CAAC,EAAE,CAAC;YACjC,OAAO,KAAK,CAAC;QACf,CAAC;QAED,IAAI,CAAC;YACH,EAAE,CAAC,UAAU,CAAC,YAAY,CAAC,CAAC;YAC5B,OAAO,IAAI,CAAC;QACd,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,IAAI,CAAC,iCAAiC,UAAU,GAAG,EAAE,KAAK,CAAC,CAAC;YACpE,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED;;OAEG;IACH,kBAAkB,CAAC,OAAoB,EAAE,KAAc;QACrD,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;QAExD,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YAC/B,OAAO,CAAC,CAAC;QACX,CAAC;QAED,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;YAC9C,IAAI,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC5B,MAAM,KAAK,GAAG,EAAE,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;gBACvC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;oBACzB,IAAI,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;wBAC3B,IAAI,CAAC;4BACH,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC,CAAC;4BACzC,KAAK,EAAE,CAAC;wBACV,CAAC;wBAAC,OAAO,KAAK,EAAE,CAAC;4BACf,OAAO,CAAC,IAAI,CAAC,oBAAoB,IAAI,GAAG,EAAE,KAAK,CAAC,CAAC;wBACnD,CAAC;oBACH,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;aAAM,CAAC;YACN,oBAAoB;YACpB,MAAM,SAAS,GAAG,EAAE,CAAC,WAAW,CAAC,UAAU,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;YACtE,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;gBAC5B,IAAI,GAAG,CAAC,WAAW,EAAE,EAAE,CAAC;oBACtB,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;oBACjD,MAAM,KAAK,GAAG,EAAE,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;oBACvC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;wBACzB,IAAI,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;4BAC3B,IAAI,CAAC;gCACH,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC,CAAC;gCACzC,KAAK,EAAE,CAAC;4BACV,CAAC;4BAAC,OAAO,KAAK,EAAE,CAAC;gCACf,OAAO,CAAC,IAAI,CAAC,oBAAoB,IAAI,GAAG,EAAE,KAAK,CAAC,CAAC;4BACnD,CAAC;wBACH,CAAC;oBACH,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;CACF;AAvND,0CAuNC"}
1
+ {"version":3,"file":"baselineManager.js","sourceRoot":"","sources":["../../src/utils/baselineManager.ts"],"names":[],"mappings":";AAAA;;GAEG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEH,uCAAyB;AACzB,2CAA6B;AAY7B,MAAa,eAAe;IAG1B,YAAY,aAAqB;QAC/B,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,aAAa,EAAE,aAAa,EAAE,WAAW,CAAC,CAAC;IAC1E,CAAC;IAED;;OAEG;IACK,eAAe,CACrB,OAAoB,EACpB,KAAa,EACb,UAAkB;QAElB,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,OAAO,EAAE,KAAK,EAAE,GAAG,UAAU,OAAO,CAAC,CAAC;IAC3E,CAAC;IAED;;OAEG;IACH,YAAY,CACV,MAAwB,EACxB,OAAoB,EACpB,KAAa;QAEb,MAAM,QAAQ,GAAiB;YAC7B,UAAU,EAAE,MAAM,CAAC,QAAQ,CAAC,EAAE;YAC9B,KAAK,EAAE,MAAM,CAAC,KAAK;YACnB,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACnC,OAAO;YACP,KAAK;SACN,CAAC;QAEF,MAAM,YAAY,GAAG,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,KAAK,EAAE,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;QAC9E,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;QAEvC,0BAA0B;QAC1B,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YACxB,EAAE,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACzC,CAAC;QAED,sBAAsB;QACtB,EAAE,CAAC,aAAa,CAAC,YAAY,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;IAC7E,CAAC;IAED;;OAEG;IACH,YAAY,CACV,OAAoB,EACpB,KAAa,EACb,UAAkB;QAElB,MAAM,YAAY,GAAG,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,KAAK,EAAE,UAAU,CAAC,CAAC;QAEtE,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,YAAY,CAAC,EAAE,CAAC;YACjC,OAAO,IAAI,CAAC;QACd,CAAC;QAED,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;YACvD,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,CAAiB,CAAC;QAC7C,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,IAAI,CAAC,+BAA+B,UAAU,GAAG,EAAE,KAAK,CAAC,CAAC;YAClE,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED;;OAEG;IACH,mBAAmB,CACjB,MAAwB,EACxB,OAAoB,EACpB,KAAa;QAMb,MAAM,QAAQ,GAAG,IAAI,CAAC,YAAY,CAAC,OAAO,EAAE,KAAK,EAAE,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;QAEvE,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC;QAC5C,MAAM,aAAa,GAAG,KAAK,GAAG,CAAC,CAAC;QAEhC,OAAO;YACL,aAAa,EAAE,QAAQ,CAAC,KAAK;YAC7B,KAAK;YACL,aAAa;SACd,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,aAAa,CAAC,OAAoB,EAAE,KAAc;QAChD,MAAM,SAAS,GAAmB,EAAE,CAAC;QAErC,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;QACxD,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YAC/B,OAAO,SAAS,CAAC;QACnB,CAAC;QAED,uDAAuD;QACvD,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;YAC9C,IAAI,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC5B,IAAI,CAAC,uBAAuB,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;YACpD,CAAC;QACH,CAAC;aAAM,CAAC;YACN,+BAA+B;YAC/B,MAAM,SAAS,GAAG,EAAE,CAAC,WAAW,CAAC,UAAU,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;YACtE,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;gBAC5B,IAAI,GAAG,CAAC,WAAW,EAAE,EAAE,CAAC;oBACtB,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;oBACjD,IAAI,CAAC,uBAAuB,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;gBACpD,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;OAEG;IACK,uBAAuB,CAAC,GAAW,EAAE,SAAyB;QACpE,MAAM,KAAK,GAAG,EAAE,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;QAClC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;gBAC3B,IAAI,CAAC;oBACH,MAAM,OAAO,GAAG,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,IAAI,CAAC,EAAE,OAAO,CAAC,CAAC;oBAC/D,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAiB,CAAC;oBACrD,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAC3B,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,OAAO,CAAC,IAAI,CAAC,2BAA2B,IAAI,GAAG,EAAE,KAAK,CAAC,CAAC;gBAC1D,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED;;OAEG;IACH,cAAc,CAAC,OAAoB,EAAE,KAAa,EAAE,UAAkB;QACpE,MAAM,YAAY,GAAG,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,KAAK,EAAE,UAAU,CAAC,CAAC;QAEtE,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,YAAY,CAAC,EAAE,CAAC;YACjC,OAAO,KAAK,CAAC;QACf,CAAC;QAED,IAAI,CAAC;YACH,EAAE,CAAC,UAAU,CAAC,YAAY,CAAC,CAAC;YAC5B,OAAO,IAAI,CAAC;QACd,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,IAAI,CAAC,iCAAiC,UAAU,GAAG,EAAE,KAAK,CAAC,CAAC;YACpE,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED;;OAEG;IACH,kBAAkB,CAAC,OAAoB,EAAE,KAAc;QACrD,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC;QAExD,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YAC/B,OAAO,CAAC,CAAC;QACX,CAAC;QAED,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;YAC9C,IAAI,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC5B,MAAM,KAAK,GAAG,EAAE,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;gBACvC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;oBACzB,IAAI,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;wBAC3B,IAAI,CAAC;4BACH,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC,CAAC;4BACzC,KAAK,EAAE,CAAC;wBACV,CAAC;wBAAC,OAAO,KAAK,EAAE,CAAC;4BACf,OAAO,CAAC,IAAI,CAAC,oBAAoB,IAAI,GAAG,EAAE,KAAK,CAAC,CAAC;wBACnD,CAAC;oBACH,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;aAAM,CAAC;YACN,oBAAoB;YACpB,MAAM,SAAS,GAAG,EAAE,CAAC,WAAW,CAAC,UAAU,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;YACtE,KAAK,MAAM,GAAG,IAAI,SAAS,EAAE,CAAC;gBAC5B,IAAI,GAAG,CAAC,WAAW,EAAE,EAAE,CAAC;oBACtB,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC;oBACjD,MAAM,KAAK,GAAG,EAAE,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;oBACvC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;wBACzB,IAAI,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;4BAC3B,IAAI,CAAC;gCACH,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC,CAAC;gCACzC,KAAK,EAAE,CAAC;4BACV,CAAC;4BAAC,OAAO,KAAK,EAAE,CAAC;gCACf,OAAO,CAAC,IAAI,CAAC,oBAAoB,IAAI,GAAG,EAAE,KAAK,CAAC,CAAC;4BACnD,CAAC;wBACH,CAAC;oBACH,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;CACF;AAvND,0CAuNC"}
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "coding-agent-benchmarks",
3
- "version": "0.1.3",
4
- "description": "Open-source framework for evaluating coding agents (GitHub Copilot CLI, Claude Code, etc.) against coding standards and best practices",
3
+ "version": "0.2.0",
4
+ "description": "Testing coding agents (GitHub Copilot CLI, Claude Code, etc.) with your repo's context to evaluate their code generation quality.",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
7
7
  "bin": {
@@ -18,10 +18,24 @@
18
18
  "benchmark",
19
19
  "evaluation",
20
20
  "copilot",
21
+ "github-copilot",
22
+ "claude",
21
23
  "claude-code",
22
24
  "ai",
25
+ "llm",
26
+ "openai",
27
+ "gpt",
23
28
  "testing",
24
- "code-quality"
29
+ "code-quality",
30
+ "code-generation",
31
+ "code-standards",
32
+ "lint",
33
+ "eslint",
34
+ "ci-cd",
35
+ "automation",
36
+ "developer-tools",
37
+ "prompt-testing",
38
+ "ai-testing"
25
39
  ],
26
40
  "author": "chiItepin",
27
41
  "license": "MIT",
@@ -38,12 +52,15 @@
38
52
  "LICENSE"
39
53
  ],
40
54
  "dependencies": {
41
- "commander": "^13.1.0"
55
+ "chalk": "^4.1.2",
56
+ "commander": "^13.1.0",
57
+ "log-update": "^4.0.0"
42
58
  },
43
59
  "devDependencies": {
44
60
  "@types/node": "^25.0.9",
45
61
  "eslint": "^9.39.2",
46
62
  "prettier": "^3.8.0",
63
+ "tsup": "^8.5.1",
47
64
  "tsx": "^4.21.0",
48
65
  "typescript": "^5.9.3"
49
66
  }