@united-workforce/eval 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. package/LICENSE +21 -0
  2. package/dist/cli.d.ts +3 -0
  3. package/dist/cli.d.ts.map +1 -0
  4. package/dist/cli.js +14 -0
  5. package/dist/cli.js.map +1 -0
  6. package/dist/commands/diff.d.ts +3 -0
  7. package/dist/commands/diff.d.ts.map +1 -0
  8. package/dist/commands/diff.js +36 -0
  9. package/dist/commands/diff.js.map +1 -0
  10. package/dist/commands/format.d.ts +11 -0
  11. package/dist/commands/format.d.ts.map +1 -0
  12. package/dist/commands/format.js +114 -0
  13. package/dist/commands/format.js.map +1 -0
  14. package/dist/commands/index.d.ts +8 -0
  15. package/dist/commands/index.d.ts.map +1 -0
  16. package/dist/commands/index.js +7 -0
  17. package/dist/commands/index.js.map +1 -0
  18. package/dist/commands/list.d.ts +3 -0
  19. package/dist/commands/list.d.ts.map +1 -0
  20. package/dist/commands/list.js +35 -0
  21. package/dist/commands/list.js.map +1 -0
  22. package/dist/commands/read.d.ts +10 -0
  23. package/dist/commands/read.d.ts.map +1 -0
  24. package/dist/commands/read.js +37 -0
  25. package/dist/commands/read.js.map +1 -0
  26. package/dist/commands/report.d.ts +3 -0
  27. package/dist/commands/report.d.ts.map +1 -0
  28. package/dist/commands/report.js +30 -0
  29. package/dist/commands/report.js.map +1 -0
  30. package/dist/commands/run.d.ts +3 -0
  31. package/dist/commands/run.d.ts.map +1 -0
  32. package/dist/commands/run.js +64 -0
  33. package/dist/commands/run.js.map +1 -0
  34. package/dist/commands/types.d.ts +9 -0
  35. package/dist/commands/types.d.ts.map +1 -0
  36. package/dist/commands/types.js +2 -0
  37. package/dist/commands/types.js.map +1 -0
  38. package/dist/index.d.ts +8 -0
  39. package/dist/index.d.ts.map +1 -0
  40. package/dist/index.js +6 -0
  41. package/dist/index.js.map +1 -0
  42. package/dist/judge/builtin/frontmatter.d.ts +8 -0
  43. package/dist/judge/builtin/frontmatter.d.ts.map +1 -0
  44. package/dist/judge/builtin/frontmatter.js +75 -0
  45. package/dist/judge/builtin/frontmatter.js.map +1 -0
  46. package/dist/judge/builtin/hallucination.d.ts +10 -0
  47. package/dist/judge/builtin/hallucination.d.ts.map +1 -0
  48. package/dist/judge/builtin/hallucination.js +16 -0
  49. package/dist/judge/builtin/hallucination.js.map +1 -0
  50. package/dist/judge/builtin/index.d.ts +7 -0
  51. package/dist/judge/builtin/index.d.ts.map +1 -0
  52. package/dist/judge/builtin/index.js +6 -0
  53. package/dist/judge/builtin/index.js.map +1 -0
  54. package/dist/judge/builtin/read-steps.d.ts +4 -0
  55. package/dist/judge/builtin/read-steps.d.ts.map +1 -0
  56. package/dist/judge/builtin/read-steps.js +12 -0
  57. package/dist/judge/builtin/read-steps.js.map +1 -0
  58. package/dist/judge/builtin/token-stats.d.ts +8 -0
  59. package/dist/judge/builtin/token-stats.d.ts.map +1 -0
  60. package/dist/judge/builtin/token-stats.js +35 -0
  61. package/dist/judge/builtin/token-stats.js.map +1 -0
  62. package/dist/judge/builtin/types.d.ts +15 -0
  63. package/dist/judge/builtin/types.d.ts.map +1 -0
  64. package/dist/judge/builtin/types.js +2 -0
  65. package/dist/judge/builtin/types.js.map +1 -0
  66. package/dist/judge/builtin/upstream.d.ts +10 -0
  67. package/dist/judge/builtin/upstream.d.ts.map +1 -0
  68. package/dist/judge/builtin/upstream.js +16 -0
  69. package/dist/judge/builtin/upstream.js.map +1 -0
  70. package/dist/judge/index.d.ts +3 -0
  71. package/dist/judge/index.d.ts.map +1 -0
  72. package/dist/judge/index.js +2 -0
  73. package/dist/judge/index.js.map +1 -0
  74. package/dist/judge/types.d.ts +15 -0
  75. package/dist/judge/types.d.ts.map +1 -0
  76. package/dist/judge/types.js +2 -0
  77. package/dist/judge/types.js.map +1 -0
  78. package/dist/runner/collect.d.ts +16 -0
  79. package/dist/runner/collect.d.ts.map +1 -0
  80. package/dist/runner/collect.js +129 -0
  81. package/dist/runner/collect.js.map +1 -0
  82. package/dist/runner/execute.d.ts +9 -0
  83. package/dist/runner/execute.d.ts.map +1 -0
  84. package/dist/runner/execute.js +72 -0
  85. package/dist/runner/execute.js.map +1 -0
  86. package/dist/runner/index.d.ts +5 -0
  87. package/dist/runner/index.d.ts.map +1 -0
  88. package/dist/runner/index.js +4 -0
  89. package/dist/runner/index.js.map +1 -0
  90. package/dist/runner/prepare.d.ts +7 -0
  91. package/dist/runner/prepare.d.ts.map +1 -0
  92. package/dist/runner/prepare.js +38 -0
  93. package/dist/runner/prepare.js.map +1 -0
  94. package/dist/runner/types.d.ts +70 -0
  95. package/dist/runner/types.d.ts.map +1 -0
  96. package/dist/runner/types.js +2 -0
  97. package/dist/runner/types.js.map +1 -0
  98. package/dist/storage/index.d.ts +4 -0
  99. package/dist/storage/index.d.ts.map +1 -0
  100. package/dist/storage/index.js +3 -0
  101. package/dist/storage/index.js.map +1 -0
  102. package/dist/storage/schemas.d.ts +7 -0
  103. package/dist/storage/schemas.d.ts.map +1 -0
  104. package/dist/storage/schemas.js +118 -0
  105. package/dist/storage/schemas.js.map +1 -0
  106. package/dist/storage/store.d.ts +10 -0
  107. package/dist/storage/store.d.ts.map +1 -0
  108. package/dist/storage/store.js +36 -0
  109. package/dist/storage/store.js.map +1 -0
  110. package/dist/storage/types.d.ts +30 -0
  111. package/dist/storage/types.d.ts.map +1 -0
  112. package/dist/storage/types.js +2 -0
  113. package/dist/storage/types.js.map +1 -0
  114. package/dist/task/index.d.ts +3 -0
  115. package/dist/task/index.d.ts.map +1 -0
  116. package/dist/task/index.js +2 -0
  117. package/dist/task/index.js.map +1 -0
  118. package/dist/task/loader.d.ts +6 -0
  119. package/dist/task/loader.d.ts.map +1 -0
  120. package/dist/task/loader.js +69 -0
  121. package/dist/task/loader.js.map +1 -0
  122. package/dist/task/types.d.ts +27 -0
  123. package/dist/task/types.d.ts.map +1 -0
  124. package/dist/task/types.js +2 -0
  125. package/dist/task/types.js.map +1 -0
  126. package/package.json +45 -0
  127. package/src/cli.ts +22 -0
  128. package/src/commands/diff.ts +38 -0
  129. package/src/commands/format.ts +148 -0
  130. package/src/commands/index.ts +7 -0
  131. package/src/commands/list.ts +43 -0
  132. package/src/commands/read.ts +41 -0
  133. package/src/commands/report.ts +32 -0
  134. package/src/commands/run.ts +84 -0
  135. package/src/commands/types.ts +9 -0
  136. package/src/index.ts +34 -0
  137. package/src/judge/builtin/frontmatter.ts +95 -0
  138. package/src/judge/builtin/hallucination.ts +17 -0
  139. package/src/judge/builtin/index.ts +6 -0
  140. package/src/judge/builtin/read-steps.ts +14 -0
  141. package/src/judge/builtin/token-stats.ts +53 -0
  142. package/src/judge/builtin/types.ts +16 -0
  143. package/src/judge/builtin/upstream.ts +17 -0
  144. package/src/judge/index.ts +10 -0
  145. package/src/judge/types.ts +15 -0
  146. package/src/runner/collect.ts +172 -0
  147. package/src/runner/execute.ts +87 -0
  148. package/src/runner/index.ts +15 -0
  149. package/src/runner/prepare.ts +45 -0
  150. package/src/runner/types.ts +85 -0
  151. package/src/storage/index.ts +9 -0
  152. package/src/storage/schemas.ts +123 -0
  153. package/src/storage/store.ts +42 -0
  154. package/src/storage/types.ts +33 -0
  155. package/src/task/index.ts +2 -0
  156. package/src/task/loader.ts +74 -0
  157. package/src/task/types.ts +28 -0
@@ -0,0 +1,28 @@
1
+ /** Judge entry in task.yaml */
2
+ export type JudgeEntry = {
3
+ name: string;
4
+ weight: number;
5
+ builtin: boolean;
6
+ /** Path to judge entry script (relative to task root). Required for non-builtin judges. */
7
+ entry: string | null;
8
+ /** Path to OCAS schema JSON for judge data. Required for non-builtin judges. */
9
+ schema: string | null;
10
+ };
11
+
12
+ /** Limits for eval execution. */
13
+ export type TaskLimits = {
14
+ maxSteps: number;
15
+ timeoutMinutes: number;
16
+ };
17
+
18
+ /** Parsed task.yaml manifest. */
19
+ export type TaskManifest = {
20
+ name: string;
21
+ description: string;
22
+ /** Workflow name or relative path to .yaml file. */
23
+ workflow: string;
24
+ /** Initial prompt for thread start. */
25
+ prompt: string;
26
+ limits: TaskLimits;
27
+ judges: JudgeEntry[];
28
+ };