task-o-matic-core 0.1.4 → 0.1.5-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/README.md +946 -222
  2. package/dist/index.d.ts +1 -3
  3. package/dist/index.d.ts.map +1 -1
  4. package/dist/index.js +1 -3
  5. package/dist/lib/ai-service/prd-operations.d.ts.map +1 -1
  6. package/dist/lib/ai-service/prd-operations.js +3 -36
  7. package/dist/lib/benchmark/executor.d.ts +93 -0
  8. package/dist/lib/benchmark/executor.d.ts.map +1 -0
  9. package/dist/lib/benchmark/executor.js +395 -0
  10. package/dist/lib/benchmark/index.d.ts +16 -0
  11. package/dist/lib/benchmark/index.d.ts.map +1 -0
  12. package/dist/lib/benchmark/index.js +36 -0
  13. package/dist/lib/benchmark/metrics-collector.d.ts +84 -0
  14. package/dist/lib/benchmark/metrics-collector.d.ts.map +1 -0
  15. package/dist/lib/benchmark/metrics-collector.js +297 -0
  16. package/dist/lib/benchmark/operations/index.d.ts +70 -0
  17. package/dist/lib/benchmark/operations/index.d.ts.map +1 -0
  18. package/dist/lib/benchmark/operations/index.js +298 -0
  19. package/dist/lib/benchmark/orchestrator.d.ts +88 -0
  20. package/dist/lib/benchmark/orchestrator.d.ts.map +1 -0
  21. package/dist/lib/benchmark/orchestrator.js +337 -0
  22. package/dist/lib/benchmark/store.d.ts +140 -0
  23. package/dist/lib/benchmark/store.d.ts.map +1 -0
  24. package/dist/lib/benchmark/store.js +417 -0
  25. package/dist/lib/benchmark/types.d.ts +243 -60
  26. package/dist/lib/benchmark/types.d.ts.map +1 -1
  27. package/dist/lib/benchmark/types.js +7 -0
  28. package/dist/lib/benchmark/worktree-manager.d.ts +127 -0
  29. package/dist/lib/benchmark/worktree-manager.d.ts.map +1 -0
  30. package/dist/lib/benchmark/worktree-manager.js +325 -0
  31. package/dist/lib/benchmark/worktree-pool.d.ts +97 -0
  32. package/dist/lib/benchmark/worktree-pool.d.ts.map +1 -0
  33. package/dist/lib/benchmark/worktree-pool.js +198 -0
  34. package/dist/lib/executors/opencode-executor.js +5 -5
  35. package/dist/lib/index.d.ts +0 -5
  36. package/dist/lib/index.d.ts.map +1 -1
  37. package/dist/lib/index.js +1 -7
  38. package/dist/lib/task-execution-core.js +17 -1
  39. package/dist/lib/task-review.d.ts +7 -0
  40. package/dist/lib/task-review.d.ts.map +1 -1
  41. package/dist/lib/task-review.js +30 -10
  42. package/dist/services/prd.d.ts.map +1 -1
  43. package/dist/services/prd.js +20 -44
  44. package/dist/services/tasks.d.ts.map +1 -1
  45. package/dist/services/tasks.js +12 -54
  46. package/dist/test/benchmark/metrics.test.d.ts +7 -0
  47. package/dist/test/benchmark/metrics.test.d.ts.map +1 -0
  48. package/dist/test/benchmark/metrics.test.js +267 -0
  49. package/dist/test/benchmark/orchestrator.test.d.ts +12 -0
  50. package/dist/test/benchmark/orchestrator.test.d.ts.map +1 -0
  51. package/dist/test/benchmark/orchestrator.test.js +316 -0
  52. package/dist/test/benchmark/store.test.d.ts +7 -0
  53. package/dist/test/benchmark/store.test.d.ts.map +1 -0
  54. package/dist/test/benchmark/store.test.js +356 -0
  55. package/dist/test/benchmark/worktree.test.d.ts +7 -0
  56. package/dist/test/benchmark/worktree.test.d.ts.map +1 -0
  57. package/dist/test/benchmark/worktree.test.js +347 -0
  58. package/dist/test/lib/task-review.test.d.ts +2 -0
  59. package/dist/test/lib/task-review.test.d.ts.map +1 -0
  60. package/dist/test/lib/task-review.test.js +178 -0
  61. package/dist/test/services/task-service.test.js +31 -8
  62. package/package.json +2 -2
  63. package/dist/lib/benchmark/registry.d.ts +0 -11
  64. package/dist/lib/benchmark/registry.d.ts.map +0 -1
  65. package/dist/lib/benchmark/registry.js +0 -212
  66. package/dist/lib/benchmark/runner.d.ts +0 -6
  67. package/dist/lib/benchmark/runner.d.ts.map +0 -1
  68. package/dist/lib/benchmark/runner.js +0 -150
  69. package/dist/lib/benchmark/storage.d.ts +0 -13
  70. package/dist/lib/benchmark/storage.d.ts.map +0 -1
  71. package/dist/lib/benchmark/storage.js +0 -100
  72. package/dist/services/benchmark.d.ts +0 -26
  73. package/dist/services/benchmark.d.ts.map +0 -1
  74. package/dist/services/benchmark.js +0 -343
  75. package/dist/services/workflow-benchmark.d.ts +0 -34
  76. package/dist/services/workflow-benchmark.d.ts.map +0 -1
  77. package/dist/services/workflow-benchmark.js +0 -318
package/dist/index.d.ts CHANGED
@@ -15,10 +15,8 @@ export * from "./lib/task-loop-execution";
15
15
  export * from "./services/tasks";
16
16
  export * from "./services/workflow";
17
17
  export * from "./services/prd";
18
- export * from "./services/benchmark";
19
18
  export * from "./services/project-analysis";
20
- export * from "./lib/benchmark/registry";
21
- export * from "./lib/benchmark/types";
19
+ export * from "./lib/benchmark";
22
20
  export * from "./utils/ai-service-factory";
23
21
  export * from "./utils/task-o-matic-error";
24
22
  export * from "./utils/stack-detector";
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,SAAS,CAAC;AACxB,cAAc,cAAc,CAAC;AAC7B,cAAc,cAAc,CAAC;AAC7B,cAAc,0BAA0B,CAAC;AACzC,cAAc,kBAAkB,CAAC;AACjC,cAAc,uBAAuB,CAAC;AACtC,cAAc,gCAAgC,CAAC;AAC/C,cAAc,kCAAkC,CAAC;AACjD,cAAc,iCAAiC,CAAC;AAChD,cAAc,2CAA2C,CAAC;AAC1D,cAAc,kCAAkC,CAAC;AACjD,cAAc,sBAAsB,CAAC;AACrC,cAAc,sBAAsB,CAAC;AACrC,cAAc,2BAA2B,CAAC;AAE1C,cAAc,kBAAkB,CAAC;AACjC,cAAc,qBAAqB,CAAC;AACpC,cAAc,gBAAgB,CAAC;AAC/B,cAAc,sBAAsB,CAAC;AACrC,cAAc,6BAA6B,CAAC;AAC5C,cAAc,0BAA0B,CAAC;AACzC,cAAc,uBAAuB,CAAC;AAEtC,cAAc,4BAA4B,CAAC;AAC3C,cAAc,4BAA4B,CAAC;AAC3C,cAAc,wBAAwB,CAAC;AAEvC,cAAc,aAAa,CAAC;AAO5B,cAAc,+BAA+B,CAAC;AAC9C,cAAc,sBAAsB,CAAC;AAErC,cAAc,WAAW,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,SAAS,CAAC;AACxB,cAAc,cAAc,CAAC;AAC7B,cAAc,cAAc,CAAC;AAC7B,cAAc,0BAA0B,CAAC;AACzC,cAAc,kBAAkB,CAAC;AACjC,cAAc,uBAAuB,CAAC;AACtC,cAAc,gCAAgC,CAAC;AAC/C,cAAc,kCAAkC,CAAC;AACjD,cAAc,iCAAiC,CAAC;AAChD,cAAc,2CAA2C,CAAC;AAC1D,cAAc,kCAAkC,CAAC;AACjD,cAAc,sBAAsB,CAAC;AACrC,cAAc,sBAAsB,CAAC;AACrC,cAAc,2BAA2B,CAAC;AAE1C,cAAc,kBAAkB,CAAC;AACjC,cAAc,qBAAqB,CAAC;AACpC,cAAc,gBAAgB,CAAC;AAC/B,cAAc,6BAA6B,CAAC;AAC5C,cAAc,iBAAiB,CAAC;AAEhC,cAAc,4BAA4B,CAAC;AAC3C,cAAc,4BAA4B,CAAC;AAC3C,cAAc,wBAAwB,CAAC;AAEvC,cAAc,aAAa,CAAC;AAO5B,cAAc,+BAA+B,CAAC;AAC9C,cAAc,sBAAsB,CAAC;AAErC,cAAc,WAAW,CAAC"}
package/dist/index.js CHANGED
@@ -31,10 +31,8 @@ __exportStar(require("./lib/task-loop-execution"), exports);
31
31
  __exportStar(require("./services/tasks"), exports);
32
32
  __exportStar(require("./services/workflow"), exports);
33
33
  __exportStar(require("./services/prd"), exports);
34
- __exportStar(require("./services/benchmark"), exports);
35
34
  __exportStar(require("./services/project-analysis"), exports);
36
- __exportStar(require("./lib/benchmark/registry"), exports);
37
- __exportStar(require("./lib/benchmark/types"), exports);
35
+ __exportStar(require("./lib/benchmark"), exports);
38
36
  __exportStar(require("./utils/ai-service-factory"), exports);
39
37
  __exportStar(require("./utils/task-o-matic-error"), exports);
40
38
  __exportStar(require("./utils/stack-detector"), exports);
@@ -1 +1 @@
1
- {"version":3,"file":"prd-operations.d.ts","sourceRoot":"","sources":["../../../src/lib/ai-service/prd-operations.ts"],"names":[],"mappings":"AAEA,OAAO,EACL,QAAQ,EAER,gBAAgB,EAChB,gBAAgB,EAChB,WAAW,EAIX,SAAS,EACV,MAAM,aAAa,CAAC;AAerB,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAMnD,qBAAa,aAAc,SAAQ,cAAc;IACzC,QAAQ,CACZ,UAAU,EAAE,MAAM,EAClB,MAAM,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,EAC1B,cAAc,CAAC,EAAE,MAAM,EACvB,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,gBAAgB,EACnC,WAAW,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,EAClC,gBAAgB,CAAC,EAAE,MAAM,EACzB,qBAAqB,CAAC,EAAE,OAAO,GAC9B,OAAO,CAAC,gBAAgB,CAAC;IA6MtB,SAAS,CACb,UAAU,EAAE,MAAM,EAClB,QAAQ,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,EAC1B,cAAc,CAAC,EAAE,MAAM,EACvB,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,gBAAgB,EACnC,WAAW,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,EAClC,gBAAgB,CAAC,EAAE,MAAM,EACzB,qBAAqB,CAAC,EAAE,OAAO,GAC9B,OAAO,CAAC,MAAM,CAAC;IA+GZ,oBAAoB,CACxB,UAAU,EAAE,MAAM,EAClB,MAAM,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,EAC1B,cAAc,CAAC,EAAE,MAAM,EACvB,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,gBAAgB,EACnC,WAAW,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,EAClC,gBAAgB,CAAC,EAAE,MAAM,EACzB,qBAAqB,CAAC,EAAE,OAAO,GAC9B,OAAO,CAAC,MAAM,EAAE,CAAC;IA6Hd,kBAAkB,CACtB,UAAU,EAAE,MAAM,EAClB,SAAS,EAAE,MAAM,EAAE,EACnB,MAAM,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,EAC1B,WAAW,CAAC,EAAE;QACZ,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,kBAAkB,CAAC,EAAE,MAAM,CAAC;KAC7B,EACD,gBAAgB,CAAC,EAAE,gBAAgB,EACnC,WAAW,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,GACjC,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IA8F5B,WAAW,CACf,WAAW,EAAE,MAAM,EACnB,MAAM,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,EAC1B,cAAc,CAAC,EAAE,MAAM,EACvB,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,gBAAgB,EACnC,WAAW,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,GACjC,OAAO,CAAC,MAAM,CAAC;IAqBZ,WAAW,CACf,IAAI,EAAE,MAAM,EAAE,EACd,mBAAmB,EAAE,MAAM,EAC3B,MAAM,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,EAC1B,cAAc,CAAC,EAAE,MAAM,EACvB,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,gBAAgB,EACnC,WAAW,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,GACjC,OAAO,CAAC,MAAM,CAAC;IA6BlB;;OAEG;IACG,YAAY,CAChB,UAAU,EAAE,MAAM,EAClB,WAAW,CAAC,EAAE,MAAM,EACpB,MAAM,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,EAC1B,cAAc,CAAC,EAAE,MAAM,EACvB,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,gBAAgB,EACnC,WAAW,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,EAClC,gBAAgB,CAAC,EAAE,MAAM,EACzB,qBAAqB,CAAC,EAAE,OAAO,GAC9B,OAAO,CAAC;QAAE,MAAM,EAAE,SAAS,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE,CAAC;IAiJpD;;;OAGG;IACG,uBAAuB,CAC3B,eAAe,EAAE;QACf,WAAW,EAAE,MAAM,CAAC;QACpB,kBAAkB,CAAC,EAAE,MAAM,CAAC;QAC5B,QAAQ,EAAE,MAAM,CAAC;QACjB,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,EAAE,MAAM,CAAC;QACzB,aAAa,EAAE,MAAM,CAAC;QACtB,KAAK,EAAE,MAAM,CAAC;QACd,aAAa,EAAE,MAAM,CAAC;KACvB,EACD,MAAM,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,EAC1B,gBAAgB,CAAC,EAAE,gBAAgB,EACnC,WAAW,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,EAClC,qBAAqB,CAAC,EAAE,OAAO,GAC9B,OAAO,CAAC,MAAM,CAAC;IA0FlB;;OAEG;IACH,OAAO,CAAC,mBAAmB;CAqB5B"}
1
+ {"version":3,"file":"prd-operations.d.ts","sourceRoot":"","sources":["../../../src/lib/ai-service/prd-operations.ts"],"names":[],"mappings":"AACA,OAAO,EACL,QAAQ,EAER,gBAAgB,EAChB,gBAAgB,EAChB,WAAW,EAIX,SAAS,EACV,MAAM,aAAa,CAAC;AAYrB,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAOnD,qBAAa,aAAc,SAAQ,cAAc;IACzC,QAAQ,CACZ,UAAU,EAAE,MAAM,EAClB,MAAM,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,EAC1B,cAAc,CAAC,EAAE,MAAM,EACvB,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,gBAAgB,EACnC,WAAW,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,EAClC,gBAAgB,CAAC,EAAE,MAAM,EACzB,qBAAqB,CAAC,EAAE,OAAO,GAC9B,OAAO,CAAC,gBAAgB,CAAC;IA6MtB,SAAS,CACb,UAAU,EAAE,MAAM,EAClB,QAAQ,EAAE,MAAM,EAChB,MAAM,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,EAC1B,cAAc,CAAC,EAAE,MAAM,EACvB,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,gBAAgB,EACnC,WAAW,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,EAClC,gBAAgB,CAAC,EAAE,MAAM,EACzB,qBAAqB,CAAC,EAAE,OAAO,GAC9B,OAAO,CAAC,MAAM,CAAC;IA+GZ,oBAAoB,CACxB,UAAU,EAAE,MAAM,EAClB,MAAM,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,EAC1B,cAAc,CAAC,EAAE,MAAM,EACvB,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,gBAAgB,EACnC,WAAW,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,EAClC,gBAAgB,CAAC,EAAE,MAAM,EACzB,qBAAqB,CAAC,EAAE,OAAO,GAC9B,OAAO,CAAC,MAAM,EAAE,CAAC;IA2Hd,kBAAkB,CACtB,UAAU,EAAE,MAAM,EAClB,SAAS,EAAE,MAAM,EAAE,EACnB,MAAM,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,EAC1B,WAAW,CAAC,EAAE;QACZ,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,kBAAkB,CAAC,EAAE,MAAM,CAAC;KAC7B,EACD,gBAAgB,CAAC,EAAE,gBAAgB,EACnC,WAAW,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,GACjC,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IA8F5B,WAAW,CACf,WAAW,EAAE,MAAM,EACnB,MAAM,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,EAC1B,cAAc,CAAC,EAAE,MAAM,EACvB,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,gBAAgB,EACnC,WAAW,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,GACjC,OAAO,CAAC,MAAM,CAAC;IAqBZ,WAAW,CACf,IAAI,EAAE,MAAM,EAAE,EACd,mBAAmB,EAAE,MAAM,EAC3B,MAAM,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,EAC1B,cAAc,CAAC,EAAE,MAAM,EACvB,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,gBAAgB,EACnC,WAAW,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,GACjC,OAAO,CAAC,MAAM,CAAC;IA6BlB;;OAEG;IACG,YAAY,CAChB,UAAU,EAAE,MAAM,EAClB,WAAW,CAAC,EAAE,MAAM,EACpB,MAAM,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,EAC1B,cAAc,CAAC,EAAE,MAAM,EACvB,WAAW,CAAC,EAAE,MAAM,EACpB,gBAAgB,CAAC,EAAE,gBAAgB,EACnC,WAAW,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,EAClC,gBAAgB,CAAC,EAAE,MAAM,EACzB,qBAAqB,CAAC,EAAE,OAAO,GAC9B,OAAO,CAAC;QAAE,MAAM,EAAE,SAAS,CAAC;QAAC,SAAS,EAAE,MAAM,CAAA;KAAE,CAAC;IAiJpD;;;OAGG;IACG,uBAAuB,CAC3B,eAAe,EAAE;QACf,WAAW,EAAE,MAAM,CAAC;QACpB,kBAAkB,CAAC,EAAE,MAAM,CAAC;QAC5B,QAAQ,EAAE,MAAM,CAAC;QACjB,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,EAAE,MAAM,CAAC;QACzB,aAAa,EAAE,MAAM,CAAC;QACtB,KAAK,EAAE,MAAM,CAAC;QACd,aAAa,EAAE,MAAM,CAAC;KACvB,EACD,MAAM,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,EAC1B,gBAAgB,CAAC,EAAE,gBAAgB,EACnC,WAAW,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,EAClC,qBAAqB,CAAC,EAAE,OAAO,GAC9B,OAAO,CAAC,MAAM,CAAC;IA0FlB;;OAEG;IACH,OAAO,CAAC,mBAAmB;CAqB5B"}
@@ -1,37 +1,4 @@
1
1
  "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || (function () {
19
- var ownKeys = function(o) {
20
- ownKeys = Object.getOwnPropertyNames || function (o) {
21
- var ar = [];
22
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
- return ar;
24
- };
25
- return ownKeys(o);
26
- };
27
- return function (mod) {
28
- if (mod && mod.__esModule) return mod;
29
- var result = {};
30
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
- __setModuleDefault(result, mod);
32
- return result;
33
- };
34
- })();
35
2
  Object.defineProperty(exports, "__esModule", { value: true });
36
3
  exports.PRDOperations = void 0;
37
4
  const ai_1 = require("ai");
@@ -40,6 +7,7 @@ const prompts_1 = require("../../prompts");
40
7
  const filesystem_tools_1 = require("./filesystem-tools");
41
8
  const base_operations_1 = require("./base-operations");
42
9
  const task_o_matic_error_1 = require("../../utils/task-o-matic-error");
10
+ const prompts_2 = require("../../prompts");
43
11
  class PRDOperations extends base_operations_1.BaseOperations {
44
12
  async parsePRD(prdContent, config, promptOverride, userMessage, streamingOptions, retryConfig, workingDirectory, enableFilesystemTools) {
45
13
  // console.log(
@@ -321,7 +289,6 @@ Use these tools to understand the current project structure, existing code patte
321
289
  }
322
290
  prompt = promptResult.prompt;
323
291
  }
324
- const { PRD_QUESTION_SYSTEM_PROMPT } = await Promise.resolve().then(() => __importStar(require("../../prompts")));
325
292
  let response;
326
293
  if (enableFilesystemTools) {
327
294
  const model = this.modelProvider.getModel({
@@ -332,7 +299,7 @@ Use these tools to understand the current project structure, existing code patte
332
299
  const result = await (0, ai_1.streamText)({
333
300
  model,
334
301
  tools: allTools,
335
- system: PRD_QUESTION_SYSTEM_PROMPT +
302
+ system: prompts_2.PRD_QUESTION_SYSTEM_PROMPT +
336
303
  `\n\nYou have access to filesystem tools to check existing code/structure if needed.`,
337
304
  messages: [{ role: "user", content: userMessage || prompt }],
338
305
  maxRetries: 0,
@@ -360,7 +327,7 @@ Use these tools to understand the current project structure, existing code patte
360
327
  response = await result.text;
361
328
  }
362
329
  else {
363
- response = await this.streamText("", config, PRD_QUESTION_SYSTEM_PROMPT, userMessage || prompt, streamingOptions, { maxAttempts: 1 });
330
+ response = await this.streamText("", config, prompts_2.PRD_QUESTION_SYSTEM_PROMPT, userMessage || prompt, streamingOptions, { maxAttempts: 1 });
364
331
  }
365
332
  const parseResult = this.jsonParser.parseJSONFromResponse(response);
366
333
  if (!parseResult.success) {
@@ -0,0 +1,93 @@
1
+ /**
2
+ * BenchmarkExecutor - Per-worktree execution logic for benchmarks
3
+ *
4
+ * This class handles executing benchmark operations in isolated worktrees.
5
+ * It supports operations, single task execution, task loops, and full workflows.
6
+ * Each execution captures timing, token metrics, and code changes.
7
+ */
8
+ import type { Worktree } from "./worktree-manager";
9
+ import { MetricsCollector } from "./metrics-collector";
10
+ import type { BenchmarkModelConfig, BenchmarkModelResult, OperationBenchmarkInput, ExecutionBenchmarkInput, ExecuteLoopBenchmarkInput, WorkflowBenchmarkInput, BenchmarkableOperation } from "./types";
11
+ /**
12
+ * BenchmarkExecutor runs benchmark operations in isolated worktrees
13
+ * and collects comprehensive metrics for comparison.
14
+ */
15
+ export declare class BenchmarkExecutor {
16
+ private metricsCollector;
17
+ private operationRegistry;
18
+ constructor(metricsCollector?: MetricsCollector, operationRegistry?: Map<string, BenchmarkableOperation>);
19
+ /**
20
+ * Register an operation for benchmarking
21
+ */
22
+ registerOperation(operation: BenchmarkableOperation): void;
23
+ /**
24
+ * Get a registered operation by ID
25
+ */
26
+ getOperation(id: string): BenchmarkableOperation | undefined;
27
+ /**
28
+ * List all registered operations
29
+ */
30
+ listOperations(): BenchmarkableOperation[];
31
+ /**
32
+ * Execute a registered operation in a worktree
33
+ *
34
+ * @param worktree - The worktree to execute in
35
+ * @param model - Model configuration to use
36
+ * @param input - Operation input parameters
37
+ * @param baseCommit - Base commit for metrics comparison
38
+ * @returns Benchmark result with metrics
39
+ */
40
+ executeOperation(worktree: Worktree, model: BenchmarkModelConfig, input: OperationBenchmarkInput, baseCommit: string): Promise<BenchmarkModelResult>;
41
+ /**
42
+ * Execute a single task in a worktree
43
+ *
44
+ * @param worktree - The worktree to execute in
45
+ * @param model - Model configuration to use
46
+ * @param input - Task execution input
47
+ * @param baseCommit - Base commit for metrics comparison
48
+ * @returns Benchmark result with metrics
49
+ */
50
+ executeTask(worktree: Worktree, model: BenchmarkModelConfig, input: ExecutionBenchmarkInput, baseCommit: string): Promise<BenchmarkModelResult>;
51
+ /**
52
+ * Execute a task loop in a worktree
53
+ *
54
+ * @param worktree - The worktree to execute in
55
+ * @param model - Model configuration to use
56
+ * @param input - Execute loop input
57
+ * @param baseCommit - Base commit for metrics comparison
58
+ * @returns Benchmark result with metrics
59
+ */
60
+ executeLoop(worktree: Worktree, model: BenchmarkModelConfig, input: ExecuteLoopBenchmarkInput, baseCommit: string): Promise<BenchmarkModelResult>;
61
+ /**
62
+ * Execute a full workflow in a worktree
63
+ *
64
+ * For workflow benchmarks, we execute a series of steps:
65
+ * 1. Initialize project (if needed)
66
+ * 2. Parse PRD to generate tasks
67
+ * 3. Execute tasks with the given model
68
+ *
69
+ * @param worktree - The worktree to execute in
70
+ * @param model - Model configuration to use
71
+ * @param input - Workflow input
72
+ * @param baseCommit - Base commit for metrics comparison
73
+ * @returns Benchmark result with metrics
74
+ */
75
+ executeWorkflow(worktree: Worktree, model: BenchmarkModelConfig, input: WorkflowBenchmarkInput, baseCommit: string): Promise<BenchmarkModelResult>;
76
+ /**
77
+ * Build AI options for a model configuration
78
+ */
79
+ private buildModelAIOptions;
80
+ /**
81
+ * Collect all metrics for a completed execution
82
+ */
83
+ private collectMetrics;
84
+ /**
85
+ * Create a success result
86
+ */
87
+ private createSuccessResult;
88
+ /**
89
+ * Create an error result
90
+ */
91
+ private createErrorResult;
92
+ }
93
+ //# sourceMappingURL=executor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"executor.d.ts","sourceRoot":"","sources":["../../../src/lib/benchmark/executor.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AACvD,OAAO,KAAK,EACV,oBAAoB,EACpB,oBAAoB,EAEpB,uBAAuB,EACvB,uBAAuB,EACvB,yBAAyB,EACzB,sBAAsB,EAGtB,sBAAsB,EACvB,MAAM,SAAS,CAAC;AAqBjB;;;GAGG;AACH,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,gBAAgB,CAAmB;IAC3C,OAAO,CAAC,iBAAiB,CAAsC;gBAG7D,gBAAgB,CAAC,EAAE,gBAAgB,EACnC,iBAAiB,CAAC,EAAE,GAAG,CAAC,MAAM,EAAE,sBAAsB,CAAC;IAMzD;;OAEG;IACH,iBAAiB,CAAC,SAAS,EAAE,sBAAsB,GAAG,IAAI;IAI1D;;OAEG;IACH,YAAY,CAAC,EAAE,EAAE,MAAM,GAAG,sBAAsB,GAAG,SAAS;IAI5D;;OAEG;IACH,cAAc,IAAI,sBAAsB,EAAE;IAI1C;;;;;;;;OAQG;IACG,gBAAgB,CACpB,QAAQ,EAAE,QAAQ,EAClB,KAAK,EAAE,oBAAoB,EAC3B,KAAK,EAAE,uBAAuB,EAC9B,UAAU,EAAE,MAAM,GACjB,OAAO,CAAC,oBAAoB,CAAC;IAuFhC;;;;;;;;OAQG;IACG,WAAW,CACf,QAAQ,EAAE,QAAQ,EAClB,KAAK,EAAE,oBAAoB,EAC3B,KAAK,EAAE,uBAAuB,EAC9B,UAAU,EAAE,MAAM,GACjB,OAAO,CAAC,oBAAoB,CAAC;IA0EhC;;;;;;;;OAQG;IACG,WAAW,CACf,QAAQ,EAAE,QAAQ,EAClB,KAAK,EAAE,oBAAoB,EAC3B,KAAK,EAAE,yBAAyB,EAChC,UAAU,EAAE,MAAM,GACjB,OAAO,CAAC,oBAAoB,CAAC;IAmEhC;;;;;;;;;;;;;OAaG;IACG,eAAe,CACnB,QAAQ,EAAE,QAAQ,EAClB,KAAK,EAAE,oBAAoB,EAC3B,KAAK,EAAE,sBAAsB,EAC7B,UAAU,EAAE,MAAM,GACjB,OAAO,CAAC,oBAAoB,CAAC;IA8GhC;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAsB3B;;OAEG;YACW,cAAc;IAkC5B;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAkB3B;;OAEG;IACH,OAAO,CAAC,iBAAiB;CA6B1B"}
@@ -0,0 +1,395 @@
1
+ "use strict";
2
+ /**
3
+ * BenchmarkExecutor - Per-worktree execution logic for benchmarks
4
+ *
5
+ * This class handles executing benchmark operations in isolated worktrees.
6
+ * It supports operations, single task execution, task loops, and full workflows.
7
+ * Each execution captures timing, token metrics, and code changes.
8
+ */
9
+ Object.defineProperty(exports, "__esModule", { value: true });
10
+ exports.BenchmarkExecutor = void 0;
11
+ const metrics_collector_1 = require("./metrics-collector");
12
+ const logger_1 = require("../logger");
13
+ const config_1 = require("../config");
14
+ const task_execution_core_1 = require("../task-execution-core");
15
+ const task_loop_execution_1 = require("../task-loop-execution");
16
+ const workflow_1 = require("../../services/workflow");
17
+ const prd_1 = require("../../services/prd");
18
+ const node_fs_1 = require("node:fs");
19
+ const node_path_1 = require("node:path");
20
+ const task_o_matic_error_1 = require("../../utils/task-o-matic-error");
21
+ /**
22
+ * BenchmarkExecutor runs benchmark operations in isolated worktrees
23
+ * and collects comprehensive metrics for comparison.
24
+ */
25
+ class BenchmarkExecutor {
26
+ metricsCollector;
27
+ operationRegistry;
28
+ constructor(metricsCollector, operationRegistry) {
29
+ this.metricsCollector = metricsCollector ?? new metrics_collector_1.MetricsCollector();
30
+ this.operationRegistry = operationRegistry ?? new Map();
31
+ }
32
+ /**
33
+ * Register an operation for benchmarking
34
+ */
35
+ registerOperation(operation) {
36
+ this.operationRegistry.set(operation.id, operation);
37
+ }
38
+ /**
39
+ * Get a registered operation by ID
40
+ */
41
+ getOperation(id) {
42
+ return this.operationRegistry.get(id);
43
+ }
44
+ /**
45
+ * List all registered operations
46
+ */
47
+ listOperations() {
48
+ return Array.from(this.operationRegistry.values());
49
+ }
50
+ /**
51
+ * Execute a registered operation in a worktree
52
+ *
53
+ * @param worktree - The worktree to execute in
54
+ * @param model - Model configuration to use
55
+ * @param input - Operation input parameters
56
+ * @param baseCommit - Base commit for metrics comparison
57
+ * @returns Benchmark result with metrics
58
+ */
59
+ async executeOperation(worktree, model, input, baseCommit) {
60
+ const modelId = `${model.provider}:${model.model}`;
61
+ const startedAt = Date.now();
62
+ let timeToFirstOutput;
63
+ logger_1.logger.info(`Executing operation ${input.operationId} with ${modelId}`);
64
+ try {
65
+ // Get the operation from registry
66
+ const operation = this.operationRegistry.get(input.operationId);
67
+ if (!operation) {
68
+ throw (0, task_o_matic_error_1.createStandardError)(task_o_matic_error_1.TaskOMaticErrorCodes.INVALID_INPUT, `Operation not found: ${input.operationId}`, {
69
+ context: `The requested operation "${input.operationId}" is not registered.`,
70
+ suggestions: ["Check the operation ID", "List available operations"],
71
+ });
72
+ }
73
+ // Validate input
74
+ if (!operation.validateInput(input.params)) {
75
+ throw (0, task_o_matic_error_1.createStandardError)(task_o_matic_error_1.TaskOMaticErrorCodes.INVALID_INPUT, `Invalid input for operation ${input.operationId}`, {
76
+ context: `The provided parameters do not match the requirements for "${input.operationId}".`,
77
+ });
78
+ }
79
+ // Setup working directory to the worktree
80
+ await (0, config_1.setupWorkingDirectory)(worktree.path);
81
+ // Build AI options for this model
82
+ const aiOptions = this.buildModelAIOptions(model);
83
+ // Token tracking
84
+ const tokenTracker = { prompt: 0, completion: 0 };
85
+ // Create streaming options with token tracking
86
+ const streamingOptions = {
87
+ enabled: true,
88
+ onChunk: () => {
89
+ if (!timeToFirstOutput) {
90
+ timeToFirstOutput = Date.now() - startedAt;
91
+ }
92
+ },
93
+ onFinish: (result) => {
94
+ if (result.usage) {
95
+ tokenTracker.prompt += result.usage.promptTokens ?? 0;
96
+ tokenTracker.completion += result.usage.completionTokens ?? 0;
97
+ }
98
+ },
99
+ };
100
+ // Execute the operation
101
+ const output = await operation.execute(input.params, aiOptions, streamingOptions);
102
+ const completedAt = Date.now();
103
+ // Collect metrics
104
+ const metrics = await this.collectMetrics(worktree.path, baseCommit, startedAt, completedAt, timeToFirstOutput, tokenTracker);
105
+ return this.createSuccessResult(worktree, modelId, output, metrics, completedAt - startedAt);
106
+ }
107
+ catch (error) {
108
+ const completedAt = Date.now();
109
+ return this.createErrorResult(worktree, modelId, error, completedAt - startedAt, startedAt, completedAt, timeToFirstOutput);
110
+ }
111
+ }
112
+ /**
113
+ * Execute a single task in a worktree
114
+ *
115
+ * @param worktree - The worktree to execute in
116
+ * @param model - Model configuration to use
117
+ * @param input - Task execution input
118
+ * @param baseCommit - Base commit for metrics comparison
119
+ * @returns Benchmark result with metrics
120
+ */
121
+ async executeTask(worktree, model, input, baseCommit) {
122
+ const modelId = `${model.provider}:${model.model}`;
123
+ const startedAt = Date.now();
124
+ let timeToFirstOutput;
125
+ logger_1.logger.info(`Executing task ${input.taskId} with ${modelId}`);
126
+ try {
127
+ // Setup working directory to the worktree
128
+ await (0, config_1.setupWorkingDirectory)(worktree.path);
129
+ // Token tracking
130
+ const tokenTracker = { prompt: 0, completion: 0 };
131
+ // Build execution config with model override
132
+ // The executeTaskCore takes taskId and a TaskExecutionConfig object
133
+ const config = {
134
+ tool: "opencode", // Default tool
135
+ enableRetry: (input.maxRetries ?? 1) > 1,
136
+ maxRetries: input.maxRetries ?? 1,
137
+ verificationCommands: input.verificationCommands ?? [],
138
+ executorConfig: {
139
+ model: `${model.provider}:${model.model}`,
140
+ },
141
+ };
142
+ // Execute the task
143
+ const result = await (0, task_execution_core_1.executeTaskCore)(input.taskId, config);
144
+ const completedAt = Date.now();
145
+ if (!timeToFirstOutput) {
146
+ timeToFirstOutput = completedAt - startedAt;
147
+ }
148
+ // Collect metrics
149
+ const metrics = await this.collectMetrics(worktree.path, baseCommit, startedAt, completedAt, timeToFirstOutput, tokenTracker, input.verificationCommands);
150
+ // TaskExecutionResult has success and attempts, extract error from last attempt if failed
151
+ const lastAttempt = result.attempts[result.attempts.length - 1];
152
+ const errorMsg = result.success ? undefined : lastAttempt?.error;
153
+ const status = result.success ? "success" : "failed";
154
+ return {
155
+ modelId,
156
+ worktree,
157
+ status,
158
+ duration: completedAt - startedAt,
159
+ output: result,
160
+ error: errorMsg,
161
+ metrics,
162
+ timestamp: completedAt,
163
+ };
164
+ }
165
+ catch (error) {
166
+ const completedAt = Date.now();
167
+ return this.createErrorResult(worktree, modelId, error, completedAt - startedAt, startedAt, completedAt, timeToFirstOutput);
168
+ }
169
+ }
170
+ /**
171
+ * Execute a task loop in a worktree
172
+ *
173
+ * @param worktree - The worktree to execute in
174
+ * @param model - Model configuration to use
175
+ * @param input - Execute loop input
176
+ * @param baseCommit - Base commit for metrics comparison
177
+ * @returns Benchmark result with metrics
178
+ */
179
+ async executeLoop(worktree, model, input, baseCommit) {
180
+ const modelId = `${model.provider}:${model.model}`;
181
+ const startedAt = Date.now();
182
+ let timeToFirstOutput;
183
+ logger_1.logger.info(`Executing task loop with ${modelId}`);
184
+ try {
185
+ // Setup working directory to the worktree
186
+ await (0, config_1.setupWorkingDirectory)(worktree.path);
187
+ // Token tracking
188
+ const tokenTracker = { prompt: 0, completion: 0 };
189
+ // Override model in loop options
190
+ const loopOptions = {
191
+ ...input.loopOptions,
192
+ config: {
193
+ ...input.loopOptions.config,
194
+ model: `${model.provider}:${model.model}`,
195
+ },
196
+ };
197
+ // Execute the loop
198
+ const result = await (0, task_loop_execution_1.executeTaskLoop)(loopOptions);
199
+ const completedAt = Date.now();
200
+ if (!timeToFirstOutput) {
201
+ timeToFirstOutput = completedAt - startedAt;
202
+ }
203
+ // Collect metrics
204
+ const metrics = await this.collectMetrics(worktree.path, baseCommit, startedAt, completedAt, timeToFirstOutput, tokenTracker, input.loopOptions.config?.verificationCommands);
205
+ const hasFailures = result.failedTasks > 0;
206
+ return {
207
+ modelId,
208
+ worktree,
209
+ status: hasFailures ? "failed" : "success",
210
+ duration: completedAt - startedAt,
211
+ output: result,
212
+ error: hasFailures ? `${result.failedTasks} tasks failed` : undefined,
213
+ metrics,
214
+ timestamp: completedAt,
215
+ };
216
+ }
217
+ catch (error) {
218
+ const completedAt = Date.now();
219
+ return this.createErrorResult(worktree, modelId, error, completedAt - startedAt, startedAt, completedAt, timeToFirstOutput);
220
+ }
221
+ }
222
+ /**
223
+ * Execute a full workflow in a worktree
224
+ *
225
+ * For workflow benchmarks, we execute a series of steps:
226
+ * 1. Initialize project (if needed)
227
+ * 2. Parse PRD to generate tasks
228
+ * 3. Execute tasks with the given model
229
+ *
230
+ * @param worktree - The worktree to execute in
231
+ * @param model - Model configuration to use
232
+ * @param input - Workflow input
233
+ * @param baseCommit - Base commit for metrics comparison
234
+ * @returns Benchmark result with metrics
235
+ */
236
+ async executeWorkflow(worktree, model, input, baseCommit) {
237
+ const modelId = `${model.provider}:${model.model}`;
238
+ const startedAt = Date.now();
239
+ let timeToFirstOutput;
240
+ logger_1.logger.info(`Executing workflow with ${modelId}`);
241
+ try {
242
+ // Setup working directory to the worktree
243
+ const projectDir = input.projectDir ?? worktree.path;
244
+ await (0, config_1.setupWorkingDirectory)(projectDir);
245
+ // Token tracking
246
+ const tokenTracker = { prompt: 0, completion: 0 };
247
+ // Create workflow service
248
+ const workflowService = new workflow_1.WorkflowService();
249
+ // Build AI options for the model
250
+ const aiOptions = {
251
+ aiProvider: model.provider,
252
+ aiModel: model.model,
253
+ aiReasoning: model.reasoningTokens?.toString(),
254
+ };
255
+ // Execute workflow steps based on collected responses
256
+ const results = {};
257
+ // Step 1: Initialize project if needed
258
+ if (input.collectedResponses.projectName) {
259
+ results.init = await workflowService.initializeProject({
260
+ projectName: input.collectedResponses.projectName,
261
+ projectDir,
262
+ initMethod: input.collectedResponses.initMethod,
263
+ projectDescription: input.collectedResponses.projectDescription,
264
+ aiOptions,
265
+ stackConfig: input.collectedResponses.stackConfig,
266
+ });
267
+ }
268
+ // Step 2: Parse PRD if content is provided
269
+ if (input.collectedResponses.prdContent || input.collectedResponses.prdFile) {
270
+ const prdService = new prd_1.PRDService();
271
+ if (input.collectedResponses.prdContent) {
272
+ // If we have PRD content, save it first
273
+ const prdDir = (0, node_path_1.join)(projectDir, ".task-o-matic", "prd");
274
+ (0, node_fs_1.mkdirSync)(prdDir, { recursive: true });
275
+ const prdPath = (0, node_path_1.join)(prdDir, "benchmark-prd.md");
276
+ (0, node_fs_1.writeFileSync)(prdPath, input.collectedResponses.prdContent);
277
+ results.prdParse = await prdService.parsePRD({
278
+ file: prdPath,
279
+ aiOptions,
280
+ });
281
+ }
282
+ else if (input.collectedResponses.prdFile) {
283
+ results.prdParse = await prdService.parsePRD({
284
+ file: input.collectedResponses.prdFile,
285
+ aiOptions,
286
+ });
287
+ }
288
+ }
289
+ // Step 3: Execute tasks if requested
290
+ if (input.collectedResponses.generateTasks) {
291
+ const loopOptions = {
292
+ filters: { status: "todo" },
293
+ tool: (input.workflowOptions.executeTool ?? "opencode"),
294
+ config: {
295
+ maxRetries: input.workflowOptions.executeMaxRetries ?? 3,
296
+ verificationCommands: input.workflowOptions.verificationCommands ?? [],
297
+ model: modelId,
298
+ },
299
+ };
300
+ results.execution = await (0, task_loop_execution_1.executeTaskLoop)(loopOptions);
301
+ }
302
+ const completedAt = Date.now();
303
+ if (!timeToFirstOutput) {
304
+ timeToFirstOutput = completedAt - startedAt;
305
+ }
306
+ // Collect metrics
307
+ const verificationCommands = input.workflowOptions.verificationCommands ?? [];
308
+ const metrics = await this.collectMetrics(projectDir, baseCommit, startedAt, completedAt, timeToFirstOutput, tokenTracker, verificationCommands);
309
+ return this.createSuccessResult(worktree, modelId, results, metrics, completedAt - startedAt);
310
+ }
311
+ catch (error) {
312
+ const completedAt = Date.now();
313
+ return this.createErrorResult(worktree, modelId, error, completedAt - startedAt, startedAt, completedAt, timeToFirstOutput);
314
+ }
315
+ }
316
+ /**
317
+ * Build AI options for a model configuration
318
+ */
319
+ buildModelAIOptions(model) {
320
+ // Get API key from environment based on provider
321
+ const envKeyMap = {
322
+ anthropic: "ANTHROPIC_API_KEY",
323
+ openai: "OPENAI_API_KEY",
324
+ openrouter: "OPENROUTER_API_KEY",
325
+ google: "GOOGLE_API_KEY",
326
+ gemini: "GEMINI_API_KEY",
327
+ zai: "ZAI_API_KEY",
328
+ };
329
+ const envKey = envKeyMap[model.provider] ?? `${model.provider.toUpperCase()}_API_KEY`;
330
+ const apiKey = process.env[envKey];
331
+ return {
332
+ aiProvider: model.provider,
333
+ aiModel: model.model,
334
+ aiKey: apiKey,
335
+ aiReasoning: model.reasoningTokens?.toString(),
336
+ };
337
+ }
338
+ /**
339
+ * Collect all metrics for a completed execution
340
+ */
341
+ async collectMetrics(worktreePath, baseCommit, startedAt, completedAt, timeToFirstOutput, tokenTracker, verificationCommands) {
342
+ const timing = {
343
+ startedAt,
344
+ completedAt,
345
+ duration: completedAt - startedAt,
346
+ timeToFirstOutput,
347
+ };
348
+ const tokens = tokenTracker.prompt > 0 || tokenTracker.completion > 0
349
+ ? {
350
+ prompt: tokenTracker.prompt,
351
+ completion: tokenTracker.completion,
352
+ total: tokenTracker.prompt + tokenTracker.completion,
353
+ }
354
+ : undefined;
355
+ return this.metricsCollector.collectAll(worktreePath, baseCommit, timing, tokens, verificationCommands);
356
+ }
357
+ /**
358
+ * Create a success result
359
+ */
360
+ createSuccessResult(worktree, modelId, output, metrics, duration) {
361
+ return {
362
+ modelId,
363
+ worktree,
364
+ status: "success",
365
+ duration,
366
+ output,
367
+ metrics,
368
+ timestamp: Date.now(),
369
+ };
370
+ }
371
+ /**
372
+ * Create an error result
373
+ */
374
+ createErrorResult(worktree, modelId, error, duration, startedAt, completedAt, timeToFirstOutput) {
375
+ const errorMessage = error instanceof Error ? error.message : String(error);
376
+ logger_1.logger.error(`Execution failed for ${modelId}: ${errorMessage}`);
377
+ return {
378
+ modelId,
379
+ worktree,
380
+ status: "error",
381
+ duration,
382
+ error: errorMessage,
383
+ metrics: {
384
+ timing: {
385
+ startedAt,
386
+ completedAt,
387
+ duration,
388
+ timeToFirstOutput,
389
+ },
390
+ },
391
+ timestamp: completedAt,
392
+ };
393
+ }
394
+ }
395
+ exports.BenchmarkExecutor = BenchmarkExecutor;
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Benchmark System
3
+ *
4
+ * A unified system for benchmarking AI models across various operations.
5
+ * Supports parallel execution using git worktrees, persistent storage,
6
+ * and comprehensive metrics collection.
7
+ */
8
+ export * from "./types";
9
+ export * from "./worktree-manager";
10
+ export * from "./worktree-pool";
11
+ export * from "./store";
12
+ export { MetricsCollector, type VerificationOptions } from "./metrics-collector";
13
+ export * from "./executor";
14
+ export * from "./orchestrator";
15
+ export * from "./operations";
16
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/lib/benchmark/index.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAGH,cAAc,SAAS,CAAC;AACxB,cAAc,oBAAoB,CAAC;AACnC,cAAc,iBAAiB,CAAC;AAChC,cAAc,SAAS,CAAC;AACxB,OAAO,EAAE,gBAAgB,EAAE,KAAK,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAGjF,cAAc,YAAY,CAAC;AAC3B,cAAc,gBAAgB,CAAC;AAG/B,cAAc,cAAc,CAAC"}
@@ -0,0 +1,36 @@
1
+ "use strict";
2
+ /**
3
+ * Benchmark System
4
+ *
5
+ * A unified system for benchmarking AI models across various operations.
6
+ * Supports parallel execution using git worktrees, persistent storage,
7
+ * and comprehensive metrics collection.
8
+ */
9
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ var desc = Object.getOwnPropertyDescriptor(m, k);
12
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
13
+ desc = { enumerable: true, get: function() { return m[k]; } };
14
+ }
15
+ Object.defineProperty(o, k2, desc);
16
+ }) : (function(o, m, k, k2) {
17
+ if (k2 === undefined) k2 = k;
18
+ o[k2] = m[k];
19
+ }));
20
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
21
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
22
+ };
23
+ Object.defineProperty(exports, "__esModule", { value: true });
24
+ exports.MetricsCollector = void 0;
25
+ // Infrastructure
26
+ __exportStar(require("./types"), exports);
27
+ __exportStar(require("./worktree-manager"), exports);
28
+ __exportStar(require("./worktree-pool"), exports);
29
+ __exportStar(require("./store"), exports);
30
+ var metrics_collector_1 = require("./metrics-collector");
31
+ Object.defineProperty(exports, "MetricsCollector", { enumerable: true, get: function () { return metrics_collector_1.MetricsCollector; } });
32
+ // Execution & Coordination
33
+ __exportStar(require("./executor"), exports);
34
+ __exportStar(require("./orchestrator"), exports);
35
+ // Operations
36
+ __exportStar(require("./operations"), exports);