@aigne/cli 1.60.0-beta → 1.74.0-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (360) hide show
  1. package/README.md +12 -12
  2. package/dist/_virtual/rolldown_runtime.cjs +29 -0
  3. package/dist/bunwrapper.cjs +22 -0
  4. package/dist/bunwrapper.d.cts +1 -0
  5. package/dist/bunwrapper.d.mts +1 -0
  6. package/dist/bunwrapper.mjs +23 -0
  7. package/dist/bunwrapper.mjs.map +1 -0
  8. package/dist/cli.cjs +42 -0
  9. package/dist/cli.d.cts +9 -0
  10. package/dist/cli.d.cts.map +1 -0
  11. package/dist/cli.d.mts +10 -0
  12. package/dist/cli.d.mts.map +1 -0
  13. package/dist/cli.mjs +41 -0
  14. package/dist/cli.mjs.map +1 -0
  15. package/dist/commands/aigne.cjs +23 -0
  16. package/dist/commands/aigne.mjs +22 -0
  17. package/dist/commands/aigne.mjs.map +1 -0
  18. package/dist/commands/app/agent.cjs +117 -0
  19. package/dist/commands/app/agent.mjs +113 -0
  20. package/dist/commands/app/agent.mjs.map +1 -0
  21. package/dist/commands/app/app.cjs +92 -0
  22. package/dist/commands/app/app.mjs +90 -0
  23. package/dist/commands/app/app.mjs.map +1 -0
  24. package/dist/commands/app/cli.cjs +6 -0
  25. package/dist/commands/app/cli.d.cts +1 -0
  26. package/dist/commands/app/cli.d.mts +1 -0
  27. package/dist/commands/app/cli.mjs +8 -0
  28. package/dist/commands/app/cli.mjs.map +1 -0
  29. package/dist/commands/app/upgrade.cjs +243 -0
  30. package/dist/commands/app/upgrade.mjs +240 -0
  31. package/dist/commands/app/upgrade.mjs.map +1 -0
  32. package/dist/commands/app.cjs +53 -0
  33. package/dist/commands/app.mjs +53 -0
  34. package/dist/commands/app.mjs.map +1 -0
  35. package/dist/commands/create.cjs +66 -0
  36. package/dist/commands/create.mjs +65 -0
  37. package/dist/commands/create.mjs.map +1 -0
  38. package/dist/commands/deploy.cjs +237 -0
  39. package/dist/commands/deploy.mjs +237 -0
  40. package/dist/commands/deploy.mjs.map +1 -0
  41. package/dist/commands/eval.cjs +88 -0
  42. package/dist/commands/eval.mjs +88 -0
  43. package/dist/commands/eval.mjs.map +1 -0
  44. package/dist/commands/hub.cjs +297 -0
  45. package/dist/commands/hub.mjs +294 -0
  46. package/dist/commands/hub.mjs.map +1 -0
  47. package/dist/commands/observe.cjs +49 -0
  48. package/dist/commands/observe.mjs +46 -0
  49. package/dist/commands/observe.mjs.map +1 -0
  50. package/dist/commands/run-skill.cjs +84 -0
  51. package/dist/commands/run-skill.mjs +81 -0
  52. package/dist/commands/run-skill.mjs.map +1 -0
  53. package/dist/commands/run.cjs +172 -0
  54. package/dist/commands/run.mjs +171 -0
  55. package/dist/commands/run.mjs.map +1 -0
  56. package/dist/commands/serve-mcp.cjs +68 -0
  57. package/dist/commands/serve-mcp.mjs +67 -0
  58. package/dist/commands/serve-mcp.mjs.map +1 -0
  59. package/dist/commands/test.cjs +40 -0
  60. package/dist/commands/test.mjs +39 -0
  61. package/dist/commands/test.mjs.map +1 -0
  62. package/dist/constants.cjs +28 -0
  63. package/dist/constants.d.cts +9 -0
  64. package/dist/constants.d.cts.map +1 -0
  65. package/dist/constants.d.mts +9 -0
  66. package/dist/constants.d.mts.map +1 -0
  67. package/dist/constants.mjs +24 -0
  68. package/dist/constants.mjs.map +1 -0
  69. package/dist/global.d.cjs +0 -0
  70. package/dist/global.d.cts +6 -0
  71. package/dist/global.d.cts.map +1 -0
  72. package/dist/global.d.mts +6 -0
  73. package/dist/global.d.mts.map +1 -0
  74. package/dist/index.cjs +0 -0
  75. package/dist/index.d.cts +2 -0
  76. package/dist/index.d.mts +2 -0
  77. package/dist/index.mjs +1 -0
  78. package/dist/tracer/terminal.cjs +336 -0
  79. package/dist/tracer/terminal.mjs +332 -0
  80. package/dist/tracer/terminal.mjs.map +1 -0
  81. package/dist/type.cjs +0 -0
  82. package/dist/type.d.cts +10 -0
  83. package/dist/type.d.cts.map +1 -0
  84. package/dist/type.d.mts +10 -0
  85. package/dist/type.d.mts.map +1 -0
  86. package/dist/type.mjs +1 -0
  87. package/dist/ui/utils/terminal-input.cjs +145 -0
  88. package/dist/ui/utils/terminal-input.mjs +144 -0
  89. package/dist/ui/utils/terminal-input.mjs.map +1 -0
  90. package/dist/ui/utils/text-buffer.cjs +865 -0
  91. package/dist/ui/utils/text-buffer.mjs +865 -0
  92. package/dist/ui/utils/text-buffer.mjs.map +1 -0
  93. package/dist/ui/utils/text-utils.cjs +85 -0
  94. package/dist/ui/utils/text-utils.mjs +78 -0
  95. package/dist/ui/utils/text-utils.mjs.map +1 -0
  96. package/dist/utils/agent-v1.cjs +180 -0
  97. package/dist/utils/agent-v1.d.cts +138 -0
  98. package/dist/utils/agent-v1.d.cts.map +1 -0
  99. package/dist/utils/agent-v1.d.mts +138 -0
  100. package/dist/utils/agent-v1.d.mts.map +1 -0
  101. package/dist/utils/agent-v1.mjs +179 -0
  102. package/dist/utils/agent-v1.mjs.map +1 -0
  103. package/dist/utils/aigne-hub/constants.cjs +22 -0
  104. package/dist/utils/aigne-hub/constants.mjs +18 -0
  105. package/dist/utils/aigne-hub/constants.mjs.map +1 -0
  106. package/dist/utils/aigne-hub/credential.cjs +179 -0
  107. package/dist/utils/aigne-hub/credential.mjs +175 -0
  108. package/dist/utils/aigne-hub/credential.mjs.map +1 -0
  109. package/dist/utils/aigne-hub/crypto.cjs +41 -0
  110. package/dist/utils/aigne-hub/crypto.mjs +33 -0
  111. package/dist/utils/aigne-hub/crypto.mjs.map +1 -0
  112. package/dist/utils/aigne-hub/model.cjs +112 -0
  113. package/dist/utils/aigne-hub/model.d.cts +19 -0
  114. package/dist/utils/aigne-hub/model.d.cts.map +1 -0
  115. package/dist/utils/aigne-hub/model.d.mts +19 -0
  116. package/dist/utils/aigne-hub/model.d.mts.map +1 -0
  117. package/dist/utils/aigne-hub/model.mjs +106 -0
  118. package/dist/utils/aigne-hub/model.mjs.map +1 -0
  119. package/dist/utils/aigne-hub/store/file.cjs +64 -0
  120. package/dist/utils/aigne-hub/store/file.mjs +64 -0
  121. package/dist/utils/aigne-hub/store/file.mjs.map +1 -0
  122. package/dist/utils/aigne-hub/store/index.cjs +37 -0
  123. package/dist/utils/aigne-hub/store/index.mjs +37 -0
  124. package/dist/utils/aigne-hub/store/index.mjs.map +1 -0
  125. package/dist/utils/aigne-hub/store/keytar.cjs +61 -0
  126. package/dist/utils/aigne-hub/store/keytar.mjs +61 -0
  127. package/dist/utils/aigne-hub/store/keytar.mjs.map +1 -0
  128. package/dist/utils/aigne-hub/store/migrate.cjs +46 -0
  129. package/dist/utils/aigne-hub/store/migrate.mjs +45 -0
  130. package/dist/utils/aigne-hub/store/migrate.mjs.map +1 -0
  131. package/dist/utils/aigne-hub/type.d.cts +18 -0
  132. package/dist/utils/aigne-hub/type.d.cts.map +1 -0
  133. package/dist/utils/aigne-hub/type.d.mts +18 -0
  134. package/dist/utils/aigne-hub/type.d.mts.map +1 -0
  135. package/dist/utils/aigne-hub-user.cjs +11 -0
  136. package/dist/utils/aigne-hub-user.d.cts +23 -0
  137. package/dist/utils/aigne-hub-user.d.cts.map +1 -0
  138. package/dist/utils/aigne-hub-user.d.mts +23 -0
  139. package/dist/utils/aigne-hub-user.d.mts.map +1 -0
  140. package/dist/utils/aigne-hub-user.mjs +11 -0
  141. package/dist/utils/aigne-hub-user.mjs.map +1 -0
  142. package/dist/utils/ascii-logo.cjs +30 -0
  143. package/dist/utils/ascii-logo.d.cts +5 -0
  144. package/dist/utils/ascii-logo.d.cts.map +1 -0
  145. package/dist/utils/ascii-logo.d.mts +5 -0
  146. package/dist/utils/ascii-logo.d.mts.map +1 -0
  147. package/dist/utils/{ascii-logo.js → ascii-logo.mjs} +13 -3
  148. package/dist/utils/ascii-logo.mjs.map +1 -0
  149. package/dist/utils/download.cjs +25 -0
  150. package/dist/utils/download.d.cts +7 -0
  151. package/dist/utils/download.d.cts.map +1 -0
  152. package/dist/utils/download.d.mts +7 -0
  153. package/dist/utils/download.d.mts.map +1 -0
  154. package/dist/utils/download.mjs +25 -0
  155. package/dist/utils/download.mjs.map +1 -0
  156. package/dist/utils/evaluation/core.cjs +84 -0
  157. package/dist/utils/evaluation/core.mjs +84 -0
  158. package/dist/utils/evaluation/core.mjs.map +1 -0
  159. package/dist/utils/evaluation/dataset.cjs +47 -0
  160. package/dist/utils/evaluation/dataset.mjs +46 -0
  161. package/dist/utils/evaluation/dataset.mjs.map +1 -0
  162. package/dist/utils/evaluation/evaluator.cjs +109 -0
  163. package/dist/utils/evaluation/{evaluator.js → evaluator.mjs} +48 -45
  164. package/dist/utils/evaluation/evaluator.mjs.map +1 -0
  165. package/dist/utils/evaluation/reporter.cjs +225 -0
  166. package/dist/utils/evaluation/reporter.mjs +220 -0
  167. package/dist/utils/evaluation/reporter.mjs.map +1 -0
  168. package/dist/utils/evaluation/runner.cjs +85 -0
  169. package/dist/utils/evaluation/runner.mjs +85 -0
  170. package/dist/utils/evaluation/runner.mjs.map +1 -0
  171. package/dist/utils/get-url-origin.cjs +12 -0
  172. package/dist/utils/get-url-origin.d.cts +5 -0
  173. package/dist/utils/get-url-origin.d.cts.map +1 -0
  174. package/dist/utils/get-url-origin.d.mts +5 -0
  175. package/dist/utils/get-url-origin.d.mts.map +1 -0
  176. package/dist/utils/get-url-origin.mjs +12 -0
  177. package/dist/utils/get-url-origin.mjs.map +1 -0
  178. package/dist/utils/inquirer/checkbox.cjs +265 -0
  179. package/dist/utils/inquirer/checkbox.mjs +262 -0
  180. package/dist/utils/inquirer/checkbox.mjs.map +1 -0
  181. package/dist/utils/listr.cjs +226 -0
  182. package/dist/utils/listr.d.cts +71 -0
  183. package/dist/utils/listr.d.cts.map +1 -0
  184. package/dist/utils/listr.d.mts +71 -0
  185. package/dist/utils/listr.d.mts.map +1 -0
  186. package/dist/utils/listr.mjs +222 -0
  187. package/dist/utils/listr.mjs.map +1 -0
  188. package/dist/utils/load-aigne.cjs +77 -0
  189. package/dist/utils/load-aigne.d.cts +29 -0
  190. package/dist/utils/load-aigne.d.cts.map +1 -0
  191. package/dist/utils/load-aigne.d.mts +29 -0
  192. package/dist/utils/load-aigne.d.mts.map +1 -0
  193. package/dist/utils/load-aigne.mjs +74 -0
  194. package/dist/utils/load-aigne.mjs.map +1 -0
  195. package/dist/utils/run-chat-loop.cjs +90 -0
  196. package/dist/utils/run-chat-loop.d.cts +20 -0
  197. package/dist/utils/run-chat-loop.d.cts.map +1 -0
  198. package/dist/utils/run-chat-loop.d.mts +20 -0
  199. package/dist/utils/run-chat-loop.d.mts.map +1 -0
  200. package/dist/utils/run-chat-loop.mjs +89 -0
  201. package/dist/utils/run-chat-loop.mjs.map +1 -0
  202. package/dist/utils/run-with-aigne.cjs +131 -0
  203. package/dist/utils/run-with-aigne.d.cts +46 -0
  204. package/dist/utils/run-with-aigne.d.cts.map +1 -0
  205. package/dist/utils/run-with-aigne.d.mts +46 -0
  206. package/dist/utils/run-with-aigne.d.mts.map +1 -0
  207. package/dist/utils/run-with-aigne.mjs +126 -0
  208. package/dist/utils/run-with-aigne.mjs.map +1 -0
  209. package/dist/utils/serve-mcp.cjs +91 -0
  210. package/dist/utils/serve-mcp.d.cts +20 -0
  211. package/dist/utils/serve-mcp.d.cts.map +1 -0
  212. package/dist/utils/serve-mcp.d.mts +20 -0
  213. package/dist/utils/serve-mcp.d.mts.map +1 -0
  214. package/dist/utils/serve-mcp.mjs +89 -0
  215. package/dist/utils/serve-mcp.mjs.map +1 -0
  216. package/dist/utils/spinner.cjs +19 -0
  217. package/dist/utils/spinner.d.cts +5 -0
  218. package/dist/utils/spinner.d.cts.map +1 -0
  219. package/dist/utils/spinner.d.mts +5 -0
  220. package/dist/utils/spinner.d.mts.map +1 -0
  221. package/dist/utils/spinner.mjs +19 -0
  222. package/dist/utils/spinner.mjs.map +1 -0
  223. package/dist/utils/string-utils.cjs +11 -0
  224. package/dist/utils/string-utils.d.cts +5 -0
  225. package/dist/utils/string-utils.d.cts.map +1 -0
  226. package/dist/utils/string-utils.d.mts +5 -0
  227. package/dist/utils/string-utils.d.mts.map +1 -0
  228. package/dist/utils/string-utils.mjs +10 -0
  229. package/dist/utils/string-utils.mjs.map +1 -0
  230. package/dist/utils/time.cjs +14 -0
  231. package/dist/utils/time.d.cts +5 -0
  232. package/dist/utils/time.d.cts.map +1 -0
  233. package/dist/utils/time.d.mts +5 -0
  234. package/dist/utils/time.d.mts.map +1 -0
  235. package/dist/utils/time.mjs +14 -0
  236. package/dist/utils/time.mjs.map +1 -0
  237. package/dist/utils/url.cjs +8 -0
  238. package/dist/utils/url.d.cts +5 -0
  239. package/dist/utils/url.d.cts.map +1 -0
  240. package/dist/utils/url.d.mts +5 -0
  241. package/dist/utils/url.d.mts.map +1 -0
  242. package/dist/utils/url.mjs +8 -0
  243. package/dist/utils/url.mjs.map +1 -0
  244. package/dist/utils/yargs.cjs +191 -0
  245. package/dist/utils/yargs.d.cts +96 -0
  246. package/dist/utils/yargs.d.cts.map +1 -0
  247. package/dist/utils/yargs.d.mts +96 -0
  248. package/dist/utils/yargs.d.mts.map +1 -0
  249. package/dist/utils/yargs.mjs +186 -0
  250. package/dist/utils/yargs.mjs.map +1 -0
  251. package/package.json +122 -45
  252. package/CHANGELOG.md +0 -5019
  253. package/dist/bunwrapper.d.ts +0 -2
  254. package/dist/bunwrapper.js +0 -18
  255. package/dist/cli.d.ts +0 -7
  256. package/dist/cli.js +0 -42
  257. package/dist/commands/aigne.d.ts +0 -4
  258. package/dist/commands/aigne.js +0 -35
  259. package/dist/commands/app/agent.d.ts +0 -26
  260. package/dist/commands/app/agent.js +0 -122
  261. package/dist/commands/app/app.d.ts +0 -7
  262. package/dist/commands/app/app.js +0 -92
  263. package/dist/commands/app/cli.d.ts +0 -1
  264. package/dist/commands/app/cli.js +0 -2
  265. package/dist/commands/app/upgrade.d.ts +0 -54
  266. package/dist/commands/app/upgrade.js +0 -236
  267. package/dist/commands/app.d.ts +0 -4
  268. package/dist/commands/app.js +0 -54
  269. package/dist/commands/create.d.ts +0 -6
  270. package/dist/commands/create.js +0 -74
  271. package/dist/commands/deploy.d.ts +0 -11
  272. package/dist/commands/deploy.js +0 -255
  273. package/dist/commands/eval.d.ts +0 -11
  274. package/dist/commands/eval.js +0 -110
  275. package/dist/commands/hub.d.ts +0 -3
  276. package/dist/commands/hub.js +0 -323
  277. package/dist/commands/observe.d.ts +0 -7
  278. package/dist/commands/observe.js +0 -41
  279. package/dist/commands/run-skill.d.ts +0 -6
  280. package/dist/commands/run-skill.js +0 -102
  281. package/dist/commands/run.d.ts +0 -9
  282. package/dist/commands/run.js +0 -187
  283. package/dist/commands/serve-mcp.d.ts +0 -20
  284. package/dist/commands/serve-mcp.js +0 -67
  285. package/dist/commands/test.d.ts +0 -9
  286. package/dist/commands/test.js +0 -33
  287. package/dist/constants.d.ts +0 -7
  288. package/dist/constants.js +0 -21
  289. package/dist/index.d.ts +0 -1
  290. package/dist/index.js +0 -1
  291. package/dist/tracer/terminal.d.ts +0 -62
  292. package/dist/tracer/terminal.js +0 -404
  293. package/dist/type.d.ts +0 -5
  294. package/dist/type.js +0 -1
  295. package/dist/ui/utils/terminal-input.d.ts +0 -19
  296. package/dist/ui/utils/terminal-input.js +0 -123
  297. package/dist/ui/utils/text-buffer.d.ts +0 -87
  298. package/dist/ui/utils/text-buffer.js +0 -1059
  299. package/dist/ui/utils/text-utils.d.ts +0 -37
  300. package/dist/ui/utils/text-utils.js +0 -185
  301. package/dist/utils/agent-v1.d.ts +0 -134
  302. package/dist/utils/agent-v1.js +0 -213
  303. package/dist/utils/aigne-hub/constants.d.ts +0 -6
  304. package/dist/utils/aigne-hub/constants.js +0 -12
  305. package/dist/utils/aigne-hub/credential.d.ts +0 -20
  306. package/dist/utils/aigne-hub/credential.js +0 -182
  307. package/dist/utils/aigne-hub/crypto.d.ts +0 -4
  308. package/dist/utils/aigne-hub/crypto.js +0 -30
  309. package/dist/utils/aigne-hub/model.d.ts +0 -13
  310. package/dist/utils/aigne-hub/model.js +0 -122
  311. package/dist/utils/aigne-hub/store/file.d.ts +0 -15
  312. package/dist/utils/aigne-hub/store/file.js +0 -69
  313. package/dist/utils/aigne-hub/store/index.d.ts +0 -5
  314. package/dist/utils/aigne-hub/store/index.js +0 -43
  315. package/dist/utils/aigne-hub/store/keytar.d.ts +0 -15
  316. package/dist/utils/aigne-hub/store/keytar.js +0 -67
  317. package/dist/utils/aigne-hub/store/migrate.d.ts +0 -2
  318. package/dist/utils/aigne-hub/store/migrate.js +0 -57
  319. package/dist/utils/aigne-hub/type.d.ts +0 -38
  320. package/dist/utils/aigne-hub/type.js +0 -1
  321. package/dist/utils/aigne-hub-user.d.ts +0 -16
  322. package/dist/utils/aigne-hub-user.js +0 -10
  323. package/dist/utils/ascii-logo.d.ts +0 -1
  324. package/dist/utils/download.d.ts +0 -3
  325. package/dist/utils/download.js +0 -19
  326. package/dist/utils/evaluation/core.d.ts +0 -8
  327. package/dist/utils/evaluation/core.js +0 -83
  328. package/dist/utils/evaluation/dataset.d.ts +0 -15
  329. package/dist/utils/evaluation/dataset.js +0 -61
  330. package/dist/utils/evaluation/evaluator.d.ts +0 -9
  331. package/dist/utils/evaluation/reporter.d.ts +0 -28
  332. package/dist/utils/evaluation/reporter.js +0 -221
  333. package/dist/utils/evaluation/runner.d.ts +0 -16
  334. package/dist/utils/evaluation/runner.js +0 -129
  335. package/dist/utils/evaluation/type.d.ts +0 -69
  336. package/dist/utils/evaluation/type.js +0 -1
  337. package/dist/utils/get-url-origin.d.ts +0 -1
  338. package/dist/utils/get-url-origin.js +0 -8
  339. package/dist/utils/inquirer/checkbox.d.ts +0 -55
  340. package/dist/utils/inquirer/checkbox.js +0 -319
  341. package/dist/utils/listr.d.ts +0 -64
  342. package/dist/utils/listr.js +0 -265
  343. package/dist/utils/load-aigne.d.ts +0 -18
  344. package/dist/utils/load-aigne.js +0 -80
  345. package/dist/utils/run-chat-loop.d.ts +0 -15
  346. package/dist/utils/run-chat-loop.js +0 -87
  347. package/dist/utils/run-with-aigne.d.ts +0 -27
  348. package/dist/utils/run-with-aigne.js +0 -157
  349. package/dist/utils/serve-mcp.d.ts +0 -9
  350. package/dist/utils/serve-mcp.js +0 -93
  351. package/dist/utils/spinner.d.ts +0 -1
  352. package/dist/utils/spinner.js +0 -14
  353. package/dist/utils/string-utils.d.ts +0 -1
  354. package/dist/utils/string-utils.js +0 -4
  355. package/dist/utils/time.d.ts +0 -1
  356. package/dist/utils/time.js +0 -12
  357. package/dist/utils/url.d.ts +0 -1
  358. package/dist/utils/url.js +0 -3
  359. package/dist/utils/yargs.d.ts +0 -94
  360. package/dist/utils/yargs.js +0 -210
@@ -0,0 +1,7 @@
1
+ //#region src/utils/download.d.ts
2
+ declare function downloadAndExtract(url: string, dir: string, options?: {
3
+ strip?: number;
4
+ }): Promise<void>;
5
+ //#endregion
6
+ export { downloadAndExtract };
7
+ //# sourceMappingURL=download.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"download.d.mts","names":[],"sources":["../../src/utils/download.ts"],"mappings":";iBAOsB,kBAAA,CAAA,GAAA,UAAA,GAAA,UAAA,OAAA;EAAA,KAAA;AAAA,IAGY,OAAA"}
@@ -0,0 +1,25 @@
1
+ import { mkdir } from "node:fs/promises";
2
+ import { fetch } from "@aigne/core/utils/fetch";
3
+ import { Readable } from "node:stream";
4
+ import { finished } from "node:stream/promises";
5
+ import { x } from "tar";
6
+
7
+ //#region src/utils/download.ts
8
+ async function downloadAndExtract(url, dir, options = {}) {
9
+ const response = await fetch(url);
10
+ if (!response.body) throw new Error(`Failed to download package from ${url}: Unexpected to get empty response`);
11
+ try {
12
+ await mkdir(dir, { recursive: true });
13
+ await finished(Readable.fromWeb(response.body).pipe(x({
14
+ C: dir,
15
+ ...options
16
+ })));
17
+ } catch (error) {
18
+ error.message = `Failed to extract package from ${url}: ${error.message}`;
19
+ throw error;
20
+ }
21
+ }
22
+
23
+ //#endregion
24
+ export { downloadAndExtract };
25
+ //# sourceMappingURL=download.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"download.mjs","names":[],"sources":["../../src/utils/download.ts"],"sourcesContent":["import { mkdir } from \"node:fs/promises\";\nimport { Readable } from \"node:stream\";\nimport { finished } from \"node:stream/promises\";\nimport type { ReadableStream } from \"node:stream/web\";\nimport { fetch } from \"@aigne/core/utils/fetch\";\nimport { x } from \"tar\";\n\nexport async function downloadAndExtract(\n url: string,\n dir: string,\n options: { strip?: number } = {},\n) {\n const response = await fetch(url);\n\n if (!response.body) {\n throw new Error(`Failed to download package from ${url}: Unexpected to get empty response`);\n }\n\n try {\n await mkdir(dir, { recursive: true });\n\n await finished(\n Readable.fromWeb(response.body as unknown as ReadableStream).pipe(x({ C: dir, ...options })),\n );\n } catch (error) {\n error.message = `Failed to extract package from ${url}: ${error.message}`;\n throw error;\n }\n}\n"],"mappings":";;;;;;;AAOA,eAAsB,mBACpB,KACA,KACA,UAA8B,EAAE,EAChC;CACA,MAAM,WAAW,MAAM,MAAM,IAAI;AAEjC,KAAI,CAAC,SAAS,KACZ,OAAM,IAAI,MAAM,mCAAmC,IAAI,oCAAoC;AAG7F,KAAI;AACF,QAAM,MAAM,KAAK,EAAE,WAAW,MAAM,CAAC;AAErC,QAAM,SACJ,SAAS,QAAQ,SAAS,KAAkC,CAAC,KAAK,EAAE;GAAE,GAAG;GAAK,GAAG;GAAS,CAAC,CAAC,CAC7F;UACM,OAAO;AACd,QAAM,UAAU,kCAAkC,IAAI,IAAI,MAAM;AAChE,QAAM"}
@@ -0,0 +1,84 @@
1
+ const require_rolldown_runtime = require('../../_virtual/rolldown_runtime.cjs');
2
+ const require_reporter = require('./reporter.cjs');
3
+ let _aigne_listr2 = require("@aigne/listr2");
4
+
5
+ //#region src/utils/evaluation/core.ts
6
+ function aggregateSummary(results, duration) {
7
+ const total = results.length;
8
+ const scores = results.flatMap((r) => r.evaluations.map((e) => e.score));
9
+ const successRate = Number((scores.length > 0 ? scores.reduce((a, b) => a + b, 0) / scores.length : 0).toFixed(2));
10
+ const latencies = results.map((r) => r.latency || 0);
11
+ const totalTokens = results.reduce((a, r) => a + (r.usage?.inputTokens || 0) + (r.usage?.outputTokens || 0), 0);
12
+ const errors = results.filter((r) => r.error).length;
13
+ return {
14
+ total,
15
+ successRate,
16
+ duration: Number(duration.toFixed(3)),
17
+ avgLatency: latencies.reduce((a, b) => a + b, 0) / (latencies.length || 1),
18
+ maxLatency: Math.max(...latencies, 0),
19
+ minLatency: Math.min(...latencies, 0),
20
+ totalTokens,
21
+ errorCount: errors,
22
+ scoreDistribution: {
23
+ min: Math.min(...scores, 0),
24
+ max: Math.max(...scores, 0),
25
+ mean: successRate,
26
+ median: scores.length ? scores.sort((a, b) => a - b)[Math.floor(scores.length / 2)] : 0,
27
+ variance: scores.length > 1 ? scores.reduce((a, b) => a + (b - successRate) ** 2, 0) / scores.length : 0
28
+ }
29
+ };
30
+ }
31
+ async function runEvaluationPipeline(params) {
32
+ const now = Date.now();
33
+ const { dataset, runner, evaluators, reporters = [new require_reporter.ConsoleReporter()], options } = params;
34
+ const results = [];
35
+ const { items } = await new _aigne_listr2.Listr([{
36
+ title: "Load dataset",
37
+ task: async (ctx, _task) => {
38
+ ctx.items = await dataset.loadWithOptions();
39
+ }
40
+ }], { registerSignalListeners: false }).run();
41
+ await new _aigne_listr2.Listr(items.map((item) => {
42
+ const input = JSON.stringify(item.input);
43
+ return {
44
+ title: `Run evaluations with input: ${input.length > 100 ? `${input.slice(0, 100)}...` : input}`,
45
+ task: async (ctx, task) => {
46
+ task.output = `Start running agent with input: ${JSON.stringify(item.input, null, 2)}`;
47
+ const runnerResults = await runner.run([item], options);
48
+ for await (const result of runnerResults) {
49
+ task.output = `Start running evaluation with: ${JSON.stringify({
50
+ input: result.input,
51
+ output: result.output,
52
+ expected: result.expected
53
+ }, null, 2)}`;
54
+ const evaluations = [];
55
+ for (const evaluator of evaluators) {
56
+ const evals = await evaluator.evaluate(result);
57
+ evaluations.push(...evals);
58
+ }
59
+ results.push({
60
+ ...result,
61
+ evaluations
62
+ });
63
+ task.output = `Finish running evaluation`;
64
+ }
65
+ ctx.results = results;
66
+ }
67
+ };
68
+ }), {
69
+ concurrent: options?.concurrency ? Math.min(items.length, options?.concurrency) : false,
70
+ exitOnError: true,
71
+ rendererOptions: { collapseSubtasks: false },
72
+ registerSignalListeners: false
73
+ }).run();
74
+ const summary = aggregateSummary(results, (Date.now() - now) / 1e3);
75
+ const report = {
76
+ dataset: dataset.name,
77
+ results,
78
+ summary
79
+ };
80
+ for (const reporter of reporters) await reporter.report(report);
81
+ }
82
+
83
+ //#endregion
84
+ exports.runEvaluationPipeline = runEvaluationPipeline;
@@ -0,0 +1,84 @@
1
+ import { ConsoleReporter } from "./reporter.mjs";
2
+ import { Listr } from "@aigne/listr2";
3
+
4
+ //#region src/utils/evaluation/core.ts
5
+ function aggregateSummary(results, duration) {
6
+ const total = results.length;
7
+ const scores = results.flatMap((r) => r.evaluations.map((e) => e.score));
8
+ const successRate = Number((scores.length > 0 ? scores.reduce((a, b) => a + b, 0) / scores.length : 0).toFixed(2));
9
+ const latencies = results.map((r) => r.latency || 0);
10
+ const totalTokens = results.reduce((a, r) => a + (r.usage?.inputTokens || 0) + (r.usage?.outputTokens || 0), 0);
11
+ const errors = results.filter((r) => r.error).length;
12
+ return {
13
+ total,
14
+ successRate,
15
+ duration: Number(duration.toFixed(3)),
16
+ avgLatency: latencies.reduce((a, b) => a + b, 0) / (latencies.length || 1),
17
+ maxLatency: Math.max(...latencies, 0),
18
+ minLatency: Math.min(...latencies, 0),
19
+ totalTokens,
20
+ errorCount: errors,
21
+ scoreDistribution: {
22
+ min: Math.min(...scores, 0),
23
+ max: Math.max(...scores, 0),
24
+ mean: successRate,
25
+ median: scores.length ? scores.sort((a, b) => a - b)[Math.floor(scores.length / 2)] : 0,
26
+ variance: scores.length > 1 ? scores.reduce((a, b) => a + (b - successRate) ** 2, 0) / scores.length : 0
27
+ }
28
+ };
29
+ }
30
+ async function runEvaluationPipeline(params) {
31
+ const now = Date.now();
32
+ const { dataset, runner, evaluators, reporters = [new ConsoleReporter()], options } = params;
33
+ const results = [];
34
+ const { items } = await new Listr([{
35
+ title: "Load dataset",
36
+ task: async (ctx, _task) => {
37
+ ctx.items = await dataset.loadWithOptions();
38
+ }
39
+ }], { registerSignalListeners: false }).run();
40
+ await new Listr(items.map((item) => {
41
+ const input = JSON.stringify(item.input);
42
+ return {
43
+ title: `Run evaluations with input: ${input.length > 100 ? `${input.slice(0, 100)}...` : input}`,
44
+ task: async (ctx, task) => {
45
+ task.output = `Start running agent with input: ${JSON.stringify(item.input, null, 2)}`;
46
+ const runnerResults = await runner.run([item], options);
47
+ for await (const result of runnerResults) {
48
+ task.output = `Start running evaluation with: ${JSON.stringify({
49
+ input: result.input,
50
+ output: result.output,
51
+ expected: result.expected
52
+ }, null, 2)}`;
53
+ const evaluations = [];
54
+ for (const evaluator of evaluators) {
55
+ const evals = await evaluator.evaluate(result);
56
+ evaluations.push(...evals);
57
+ }
58
+ results.push({
59
+ ...result,
60
+ evaluations
61
+ });
62
+ task.output = `Finish running evaluation`;
63
+ }
64
+ ctx.results = results;
65
+ }
66
+ };
67
+ }), {
68
+ concurrent: options?.concurrency ? Math.min(items.length, options?.concurrency) : false,
69
+ exitOnError: true,
70
+ rendererOptions: { collapseSubtasks: false },
71
+ registerSignalListeners: false
72
+ }).run();
73
+ const summary = aggregateSummary(results, (Date.now() - now) / 1e3);
74
+ const report = {
75
+ dataset: dataset.name,
76
+ results,
77
+ summary
78
+ };
79
+ for (const reporter of reporters) await reporter.report(report);
80
+ }
81
+
82
+ //#endregion
83
+ export { runEvaluationPipeline };
84
+ //# sourceMappingURL=core.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"core.mjs","names":[],"sources":["../../../src/utils/evaluation/core.ts"],"sourcesContent":["import { Listr } from \"@aigne/listr2\";\nimport { ConsoleReporter } from \"./reporter.js\";\nimport type {\n Dataset,\n DatasetItem,\n Evaluation,\n EvaluationResult,\n EvaluationSummary,\n Evaluator,\n Report,\n Reporter,\n Runner,\n RunOptions,\n} from \"./type.js\";\n\nfunction aggregateSummary(results: EvaluationResult[], duration: number): EvaluationSummary {\n const total = results.length;\n const scores = results.flatMap((r) => r.evaluations.map((e) => e.score));\n const successRate = Number(\n (scores.length > 0 ? scores.reduce((a, b) => a + b, 0) / scores.length : 0).toFixed(2),\n );\n\n const latencies = results.map((r) => r.latency || 0);\n const totalTokens = results.reduce(\n (a, r) => a + (r.usage?.inputTokens || 0) + (r.usage?.outputTokens || 0),\n 0,\n );\n\n const errors = results.filter((r) => r.error).length;\n\n return {\n total,\n successRate,\n duration: Number(duration.toFixed(3)),\n avgLatency: latencies.reduce((a, b) => a + b, 0) / (latencies.length || 1),\n maxLatency: Math.max(...latencies, 0),\n minLatency: Math.min(...latencies, 0),\n totalTokens,\n errorCount: errors,\n scoreDistribution: {\n min: Math.min(...scores, 0),\n max: Math.max(...scores, 0),\n mean: successRate,\n median: scores.length ? scores.sort((a, b) => a - b)[Math.floor(scores.length / 2)] : 0,\n variance:\n scores.length > 1\n ? scores.reduce((a, b) => a + (b - successRate) ** 2, 0) / scores.length\n : 0,\n },\n };\n}\n\ntype EvaluationPipelineContext = {\n items: DatasetItem[];\n results: EvaluationResult[];\n};\n\nexport async function runEvaluationPipeline(params: {\n dataset: Dataset;\n runner: Runner;\n evaluators: Evaluator[];\n reporters?: Reporter[];\n options?: RunOptions;\n}) {\n const now = Date.now();\n const { dataset, runner, evaluators, reporters = [new ConsoleReporter()], options } = params;\n\n const results: EvaluationPipelineContext[\"results\"] = [];\n\n const task1 = new Listr<{ items: DatasetItem[] }>(\n [\n {\n title: \"Load dataset\",\n task: async (ctx, _task) => {\n ctx.items = await dataset.loadWithOptions();\n },\n },\n ],\n {\n registerSignalListeners: false,\n },\n );\n\n const { items } = await task1.run();\n\n const task2 = new Listr<EvaluationPipelineContext>(\n items.map((item) => {\n const input = JSON.stringify(item.input);\n return {\n title: `Run evaluations with input: ${input.length > 100 ? `${input.slice(0, 100)}...` : input}`,\n task: async (ctx, task) => {\n task.output = `Start running agent with input: ${JSON.stringify(item.input, null, 2)}`;\n\n const runnerResults = await runner.run([item], options);\n\n for await (const result of runnerResults) {\n task.output = `Start running evaluation with: ${JSON.stringify(\n {\n input: result.input,\n output: result.output,\n expected: result.expected,\n },\n null,\n 2,\n )}`;\n\n const evaluations: Evaluation[] = [];\n for (const evaluator of evaluators) {\n const evals = await evaluator.evaluate(result);\n evaluations.push(...evals);\n }\n\n results.push({ ...result, evaluations });\n\n task.output = `Finish running evaluation`;\n }\n\n ctx.results = results;\n },\n };\n }),\n {\n concurrent: options?.concurrency ? Math.min(items.length, options?.concurrency) : false,\n exitOnError: true,\n rendererOptions: {\n collapseSubtasks: false,\n },\n registerSignalListeners: false,\n },\n );\n\n await task2.run();\n\n const summary: EvaluationSummary = aggregateSummary(results, (Date.now() - now) / 1000);\n const report: Report = { dataset: dataset.name, results, summary };\n\n for (const reporter of reporters) {\n await reporter.report(report);\n }\n}\n"],"mappings":";;;;AAeA,SAAS,iBAAiB,SAA6B,UAAqC;CAC1F,MAAM,QAAQ,QAAQ;CACtB,MAAM,SAAS,QAAQ,SAAS,MAAM,EAAE,YAAY,KAAK,MAAM,EAAE,MAAM,CAAC;CACxE,MAAM,cAAc,QACjB,OAAO,SAAS,IAAI,OAAO,QAAQ,GAAG,MAAM,IAAI,GAAG,EAAE,GAAG,OAAO,SAAS,GAAG,QAAQ,EAAE,CACvF;CAED,MAAM,YAAY,QAAQ,KAAK,MAAM,EAAE,WAAW,EAAE;CACpD,MAAM,cAAc,QAAQ,QACzB,GAAG,MAAM,KAAK,EAAE,OAAO,eAAe,MAAM,EAAE,OAAO,gBAAgB,IACtE,EACD;CAED,MAAM,SAAS,QAAQ,QAAQ,MAAM,EAAE,MAAM,CAAC;AAE9C,QAAO;EACL;EACA;EACA,UAAU,OAAO,SAAS,QAAQ,EAAE,CAAC;EACrC,YAAY,UAAU,QAAQ,GAAG,MAAM,IAAI,GAAG,EAAE,IAAI,UAAU,UAAU;EACxE,YAAY,KAAK,IAAI,GAAG,WAAW,EAAE;EACrC,YAAY,KAAK,IAAI,GAAG,WAAW,EAAE;EACrC;EACA,YAAY;EACZ,mBAAmB;GACjB,KAAK,KAAK,IAAI,GAAG,QAAQ,EAAE;GAC3B,KAAK,KAAK,IAAI,GAAG,QAAQ,EAAE;GAC3B,MAAM;GACN,QAAQ,OAAO,SAAS,OAAO,MAAM,GAAG,MAAM,IAAI,EAAE,CAAC,KAAK,MAAM,OAAO,SAAS,EAAE,IAAI;GACtF,UACE,OAAO,SAAS,IACZ,OAAO,QAAQ,GAAG,MAAM,KAAK,IAAI,gBAAgB,GAAG,EAAE,GAAG,OAAO,SAChE;GACP;EACF;;AAQH,eAAsB,sBAAsB,QAMzC;CACD,MAAM,MAAM,KAAK,KAAK;CACtB,MAAM,EAAE,SAAS,QAAQ,YAAY,YAAY,CAAC,IAAI,iBAAiB,CAAC,EAAE,YAAY;CAEtF,MAAM,UAAgD,EAAE;CAgBxD,MAAM,EAAE,UAAU,MAdJ,IAAI,MAChB,CACE;EACE,OAAO;EACP,MAAM,OAAO,KAAK,UAAU;AAC1B,OAAI,QAAQ,MAAM,QAAQ,iBAAiB;;EAE9C,CACF,EACD,EACE,yBAAyB,OAC1B,CACF,CAE6B,KAAK;AAgDnC,OA9Cc,IAAI,MAChB,MAAM,KAAK,SAAS;EAClB,MAAM,QAAQ,KAAK,UAAU,KAAK,MAAM;AACxC,SAAO;GACL,OAAO,+BAA+B,MAAM,SAAS,MAAM,GAAG,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO;GACzF,MAAM,OAAO,KAAK,SAAS;AACzB,SAAK,SAAS,mCAAmC,KAAK,UAAU,KAAK,OAAO,MAAM,EAAE;IAEpF,MAAM,gBAAgB,MAAM,OAAO,IAAI,CAAC,KAAK,EAAE,QAAQ;AAEvD,eAAW,MAAM,UAAU,eAAe;AACxC,UAAK,SAAS,kCAAkC,KAAK,UACnD;MACE,OAAO,OAAO;MACd,QAAQ,OAAO;MACf,UAAU,OAAO;MAClB,EACD,MACA,EACD;KAED,MAAM,cAA4B,EAAE;AACpC,UAAK,MAAM,aAAa,YAAY;MAClC,MAAM,QAAQ,MAAM,UAAU,SAAS,OAAO;AAC9C,kBAAY,KAAK,GAAG,MAAM;;AAG5B,aAAQ,KAAK;MAAE,GAAG;MAAQ;MAAa,CAAC;AAExC,UAAK,SAAS;;AAGhB,QAAI,UAAU;;GAEjB;GACD,EACF;EACE,YAAY,SAAS,cAAc,KAAK,IAAI,MAAM,QAAQ,SAAS,YAAY,GAAG;EAClF,aAAa;EACb,iBAAiB,EACf,kBAAkB,OACnB;EACD,yBAAyB;EAC1B,CACF,CAEW,KAAK;CAEjB,MAAM,UAA6B,iBAAiB,UAAU,KAAK,KAAK,GAAG,OAAO,IAAK;CACvF,MAAM,SAAiB;EAAE,SAAS,QAAQ;EAAM;EAAS;EAAS;AAElE,MAAK,MAAM,YAAY,UACrB,OAAM,SAAS,OAAO,OAAO"}
@@ -0,0 +1,47 @@
1
+ const require_rolldown_runtime = require('../../_virtual/rolldown_runtime.cjs');
2
+ let node_fs_promises = require("node:fs/promises");
3
+ node_fs_promises = require_rolldown_runtime.__toESM(node_fs_promises);
4
+ let zod = require("zod");
5
+
6
+ //#region src/utils/evaluation/dataset.ts
7
+ const recordSchema = zod.z.record(zod.z.any());
8
+ const datasetItemSchema = zod.z.object({
9
+ id: zod.z.union([zod.z.string(), zod.z.number()]),
10
+ input: recordSchema,
11
+ output: recordSchema.optional(),
12
+ expected: recordSchema.optional(),
13
+ metadata: recordSchema.optional(),
14
+ tags: zod.z.array(zod.z.string()).optional(),
15
+ selected: zod.z.boolean().optional()
16
+ });
17
+ const datasetSchema = zod.z.array(datasetItemSchema);
18
+ var FileDataset = class {
19
+ name = "file-dataset";
20
+ filePath;
21
+ constructor(filePath) {
22
+ this.filePath = filePath;
23
+ }
24
+ async load() {
25
+ let list;
26
+ try {
27
+ list = await node_fs_promises.default.readFile(this.filePath, "utf-8");
28
+ } catch (err) {
29
+ throw new Error(`Failed to read dataset file: ${err.message}`);
30
+ }
31
+ let parsed;
32
+ try {
33
+ parsed = JSON.parse(list);
34
+ } catch (err) {
35
+ throw new Error(`Invalid JSON in dataset file: ${err.message}`);
36
+ }
37
+ const result = await datasetSchema.safeParseAsync(parsed);
38
+ if (!result.success) throw new Error(`Invalid dataset file: ${JSON.stringify(result.error.format())}`);
39
+ return result.data;
40
+ }
41
+ async loadWithOptions() {
42
+ return this.load();
43
+ }
44
+ };
45
+
46
+ //#endregion
47
+ exports.FileDataset = FileDataset;
@@ -0,0 +1,46 @@
1
+ import fs from "node:fs/promises";
2
+ import { z as z$1 } from "zod";
3
+
4
+ //#region src/utils/evaluation/dataset.ts
5
+ const recordSchema = z$1.record(z$1.any());
6
+ const datasetItemSchema = z$1.object({
7
+ id: z$1.union([z$1.string(), z$1.number()]),
8
+ input: recordSchema,
9
+ output: recordSchema.optional(),
10
+ expected: recordSchema.optional(),
11
+ metadata: recordSchema.optional(),
12
+ tags: z$1.array(z$1.string()).optional(),
13
+ selected: z$1.boolean().optional()
14
+ });
15
+ const datasetSchema = z$1.array(datasetItemSchema);
16
+ var FileDataset = class {
17
+ name = "file-dataset";
18
+ filePath;
19
+ constructor(filePath) {
20
+ this.filePath = filePath;
21
+ }
22
+ async load() {
23
+ let list;
24
+ try {
25
+ list = await fs.readFile(this.filePath, "utf-8");
26
+ } catch (err) {
27
+ throw new Error(`Failed to read dataset file: ${err.message}`);
28
+ }
29
+ let parsed;
30
+ try {
31
+ parsed = JSON.parse(list);
32
+ } catch (err) {
33
+ throw new Error(`Invalid JSON in dataset file: ${err.message}`);
34
+ }
35
+ const result = await datasetSchema.safeParseAsync(parsed);
36
+ if (!result.success) throw new Error(`Invalid dataset file: ${JSON.stringify(result.error.format())}`);
37
+ return result.data;
38
+ }
39
+ async loadWithOptions() {
40
+ return this.load();
41
+ }
42
+ };
43
+
44
+ //#endregion
45
+ export { FileDataset };
46
+ //# sourceMappingURL=dataset.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"dataset.mjs","names":["z"],"sources":["../../../src/utils/evaluation/dataset.ts"],"sourcesContent":["import fs from \"node:fs/promises\";\nimport { z } from \"zod\";\nimport type { Dataset, DatasetItem } from \"./type.js\";\n\nconst recordSchema = z.record(z.any());\n\nconst datasetItemSchema = z.object({\n id: z.union([z.string(), z.number()]),\n input: recordSchema,\n output: recordSchema.optional(),\n expected: recordSchema.optional(),\n metadata: recordSchema.optional(),\n tags: z.array(z.string()).optional(),\n selected: z.boolean().optional(),\n});\n\nconst datasetSchema = z.array(datasetItemSchema);\n\nexport class FileDataset implements Dataset {\n name = \"file-dataset\";\n private filePath: string;\n\n constructor(filePath: string) {\n this.filePath = filePath;\n }\n\n async load(): Promise<DatasetItem[]> {\n let list: string;\n try {\n list = await fs.readFile(this.filePath, \"utf-8\");\n } catch (err) {\n throw new Error(`Failed to read dataset file: ${err.message}`);\n }\n\n let parsed: DatasetItem[];\n try {\n parsed = JSON.parse(list);\n } catch (err) {\n throw new Error(`Invalid JSON in dataset file: ${err.message}`);\n }\n\n const result = await datasetSchema.safeParseAsync(parsed);\n if (!result.success) {\n throw new Error(`Invalid dataset file: ${JSON.stringify(result.error.format())}`);\n }\n\n return result.data;\n }\n\n async loadWithOptions(): Promise<DatasetItem[]> {\n return this.load();\n }\n}\n\nexport class JsonDataset implements Dataset {\n name = \"json-dataset\";\n private data: DatasetItem[];\n\n constructor(data: DatasetItem[]) {\n this.data = data;\n }\n\n async load(): Promise<DatasetItem[]> {\n const result = await datasetSchema.safeParseAsync(this.data);\n\n if (!result.success) {\n throw new Error(`Invalid dataset file: ${JSON.stringify(result.error.format())}`);\n }\n\n return result.data;\n }\n\n async loadWithOptions(): Promise<DatasetItem[]> {\n return this.load();\n }\n}\n"],"mappings":";;;;AAIA,MAAM,eAAeA,IAAE,OAAOA,IAAE,KAAK,CAAC;AAEtC,MAAM,oBAAoBA,IAAE,OAAO;CACjC,IAAIA,IAAE,MAAM,CAACA,IAAE,QAAQ,EAAEA,IAAE,QAAQ,CAAC,CAAC;CACrC,OAAO;CACP,QAAQ,aAAa,UAAU;CAC/B,UAAU,aAAa,UAAU;CACjC,UAAU,aAAa,UAAU;CACjC,MAAMA,IAAE,MAAMA,IAAE,QAAQ,CAAC,CAAC,UAAU;CACpC,UAAUA,IAAE,SAAS,CAAC,UAAU;CACjC,CAAC;AAEF,MAAM,gBAAgBA,IAAE,MAAM,kBAAkB;AAEhD,IAAa,cAAb,MAA4C;CAC1C,OAAO;CACP,AAAQ;CAER,YAAY,UAAkB;AAC5B,OAAK,WAAW;;CAGlB,MAAM,OAA+B;EACnC,IAAI;AACJ,MAAI;AACF,UAAO,MAAM,GAAG,SAAS,KAAK,UAAU,QAAQ;WACzC,KAAK;AACZ,SAAM,IAAI,MAAM,gCAAgC,IAAI,UAAU;;EAGhE,IAAI;AACJ,MAAI;AACF,YAAS,KAAK,MAAM,KAAK;WAClB,KAAK;AACZ,SAAM,IAAI,MAAM,iCAAiC,IAAI,UAAU;;EAGjE,MAAM,SAAS,MAAM,cAAc,eAAe,OAAO;AACzD,MAAI,CAAC,OAAO,QACV,OAAM,IAAI,MAAM,yBAAyB,KAAK,UAAU,OAAO,MAAM,QAAQ,CAAC,GAAG;AAGnF,SAAO,OAAO;;CAGhB,MAAM,kBAA0C;AAC9C,SAAO,KAAK,MAAM"}
@@ -0,0 +1,109 @@
1
+ const require_rolldown_runtime = require('../../_virtual/rolldown_runtime.cjs');
2
+ let _aigne_core = require("@aigne/core");
3
+ let zod = require("zod");
4
+
5
+ //#region src/utils/evaluation/evaluator.ts
6
+ const defaultAgent = _aigne_core.AIAgent.from({
7
+ name: "LLMEvaluator",
8
+ instructions: `
9
+ # Instructions
10
+ You are an expert evaluator. Your task is to evaluate the quality of AI-generated responses.
11
+ You will be given:
12
+ 1. User Input (Prompt)
13
+ 2. AI-generated Output
14
+ 3. Expected Output
15
+
16
+ ## Evaluation Methods
17
+ Follow these three correlation checks before assigning a score:
18
+ 1. **AI Output vs User Input**: Check if the AI response is relevant to the user input.
19
+ 2. **Expected Output vs User Input**: Check if the expected output is relevant to the user input.
20
+ 3. **AI Output vs Expected Output**: Check the similarity and alignment between the AI output and the expected output.
21
+
22
+ Then assign a rating and a score based on the overall quality.
23
+
24
+ ## Criteria
25
+ - **Instruction following**: Does the AI response follow the prompt’s requirements?
26
+ - **Groundedness**: Is the AI response consistent with the expected output and free from irrelevant information?
27
+ - **Completeness**: Does the AI response fully address the task?
28
+ - **Accuracy/Correctness**: Is the AI response factually correct and logically consistent?
29
+ - **Fluency**: Is the AI response clear, structured, and easy to read?
30
+
31
+ ## Rating Rubric (1–5)
32
+ - **5 - Very Good**: Highly relevant, closely aligned with the expected output, accurate, complete, and fluent.
33
+ - **4 - Good**: Relevant, mostly aligned with the expected output, generally accurate and complete, only minor issues.
34
+ - **3 - Ok**: Somewhat relevant, partially aligned, or missing important details.
35
+ - **2 - Bad**: Weak relevance, low similarity with expected output, contains significant errors or omissions.
36
+ - **1 - Very Bad**: Irrelevant, fails to align with expected output, or completely incorrect.
37
+
38
+ ## Evaluation Steps
39
+ 1. Compare the **semantic content** of AI Output vs Expected Output.
40
+ - Ignore JSON keys, object structure, formatting, whitespace, capitalization, and minor punctuation differences.
41
+ - If meaning is the same but phrasing differs slightly, assign a higher score (4–5).
42
+ - If AI output deviates significantly, assign a lower score (1–2).
43
+ - If AI output is empty, assign a lower score (1–2).
44
+ 2. Assess against criteria: instruction following, groundedness, completeness, correctness, fluency.
45
+ 3. Assign a 1–5 integer score.
46
+ 4. Provide reasoning, and explicitly justify why this result is **not** a 1/2/3 case (why it avoids being a negative example).
47
+
48
+ # Response Output Format
49
+ Your output must strictly follow this three-line format:
50
+ - First line: rating (Very Good, Good, Ok, Bad, Very Bad)
51
+ - Second line: reasoning (must include justification why it is not a 1, 2, or 3 if scored higher)
52
+ - Third line: SCORE: [1-5]
53
+
54
+ Example:
55
+ Good
56
+ The response follows most instructions and is largely consistent with the expected output, but it omits one detail. This prevents it from being 5. However, it is more accurate and complete than an "Ok" response, so it deserves 4.
57
+ SCORE: 4
58
+
59
+ # User Inputs and AI-generated Response
60
+ ### Input
61
+ {{input}}
62
+
63
+ ### AI-generated Output
64
+ {{output}}
65
+
66
+ ### Expected Output
67
+ {{expectedOutput}}
68
+ `,
69
+ inputSchema: zod.z.object({
70
+ input: zod.z.string().describe("The input content to analyze"),
71
+ output: zod.z.string().describe("The output content to analyze"),
72
+ expectedOutput: zod.z.string().describe("The expected output content to analyze")
73
+ }),
74
+ outputSchema: zod.z.object({
75
+ rating: zod.z.enum([
76
+ "Very Good",
77
+ "Good",
78
+ "Ok",
79
+ "Bad",
80
+ "Very Bad"
81
+ ]).describe("The rating of the output"),
82
+ reasoning: zod.z.string().describe("The reasoning of the rating, including justification"),
83
+ score: zod.z.number().int().min(1).max(5).describe("The score of the output, 1–5, 5 is the best")
84
+ })
85
+ });
86
+ var LLMEvaluator = class {
87
+ name = "llm-as-judge";
88
+ constructor(aigne = new _aigne_core.AIGNE(), agent = defaultAgent) {
89
+ this.aigne = aigne;
90
+ this.agent = agent;
91
+ }
92
+ async evaluate(dataset) {
93
+ const result = await this.aigne.invoke(this.agent, {
94
+ input: typeof dataset.input === "string" ? dataset.input : JSON.stringify(dataset.input, null, 2),
95
+ output: dataset.output ? JSON.stringify(dataset.output, null, 2) : "",
96
+ expectedOutput: JSON.stringify(dataset.expected, null, 2)
97
+ }, { returnMetadata: true });
98
+ return [{
99
+ name: this.name,
100
+ rating: result.rating,
101
+ score: result.score,
102
+ reason: result.reasoning,
103
+ usage: result?.$meta?.usage ?? {}
104
+ }];
105
+ }
106
+ };
107
+
108
+ //#endregion
109
+ exports.LLMEvaluator = LLMEvaluator;
@@ -1,6 +1,10 @@
1
1
  import { AIAgent, AIGNE } from "@aigne/core";
2
- import { z } from "zod";
3
- const EVALUATOR_PROMPT = `
2
+ import { z as z$1 } from "zod";
3
+
4
+ //#region src/utils/evaluation/evaluator.ts
5
+ const defaultAgent = AIAgent.from({
6
+ name: "LLMEvaluator",
7
+ instructions: `
4
8
  # Instructions
5
9
  You are an expert evaluator. Your task is to evaluate the quality of AI-generated responses.
6
10
  You will be given:
@@ -60,47 +64,46 @@ SCORE: 4
60
64
 
61
65
  ### Expected Output
62
66
  {{expectedOutput}}
63
- `;
64
- const defaultAgent = AIAgent.from({
65
- name: "LLMEvaluator",
66
- instructions: EVALUATOR_PROMPT,
67
- inputSchema: z.object({
68
- input: z.string().describe("The input content to analyze"),
69
- output: z.string().describe("The output content to analyze"),
70
- expectedOutput: z.string().describe("The expected output content to analyze"),
71
- }),
72
- outputSchema: z.object({
73
- rating: z
74
- .enum(["Very Good", "Good", "Ok", "Bad", "Very Bad"])
75
- .describe("The rating of the output"),
76
- reasoning: z.string().describe("The reasoning of the rating, including justification"),
77
- score: z.number().int().min(1).max(5).describe("The score of the output, 1–5, 5 is the best"),
78
- }),
67
+ `,
68
+ inputSchema: z$1.object({
69
+ input: z$1.string().describe("The input content to analyze"),
70
+ output: z$1.string().describe("The output content to analyze"),
71
+ expectedOutput: z$1.string().describe("The expected output content to analyze")
72
+ }),
73
+ outputSchema: z$1.object({
74
+ rating: z$1.enum([
75
+ "Very Good",
76
+ "Good",
77
+ "Ok",
78
+ "Bad",
79
+ "Very Bad"
80
+ ]).describe("The rating of the output"),
81
+ reasoning: z$1.string().describe("The reasoning of the rating, including justification"),
82
+ score: z$1.number().int().min(1).max(5).describe("The score of the output, 1–5, 5 is the best")
83
+ })
79
84
  });
80
- export class LLMEvaluator {
81
- aigne;
82
- agent;
83
- name = "llm-as-judge";
84
- constructor(aigne = new AIGNE(), agent = defaultAgent) {
85
- this.aigne = aigne;
86
- this.agent = agent;
87
- }
88
- async evaluate(dataset) {
89
- const result = await this.aigne.invoke(this.agent, {
90
- input: typeof dataset.input === "string"
91
- ? dataset.input
92
- : JSON.stringify(dataset.input, null, 2),
93
- output: dataset.output ? JSON.stringify(dataset.output, null, 2) : "",
94
- expectedOutput: JSON.stringify(dataset.expected, null, 2),
95
- }, { returnMetadata: true });
96
- return [
97
- {
98
- name: this.name,
99
- rating: result.rating,
100
- score: result.score,
101
- reason: result.reasoning,
102
- usage: result?.$meta?.usage ?? {},
103
- },
104
- ];
105
- }
106
- }
85
+ var LLMEvaluator = class {
86
+ name = "llm-as-judge";
87
+ constructor(aigne = new AIGNE(), agent = defaultAgent) {
88
+ this.aigne = aigne;
89
+ this.agent = agent;
90
+ }
91
+ async evaluate(dataset) {
92
+ const result = await this.aigne.invoke(this.agent, {
93
+ input: typeof dataset.input === "string" ? dataset.input : JSON.stringify(dataset.input, null, 2),
94
+ output: dataset.output ? JSON.stringify(dataset.output, null, 2) : "",
95
+ expectedOutput: JSON.stringify(dataset.expected, null, 2)
96
+ }, { returnMetadata: true });
97
+ return [{
98
+ name: this.name,
99
+ rating: result.rating,
100
+ score: result.score,
101
+ reason: result.reasoning,
102
+ usage: result?.$meta?.usage ?? {}
103
+ }];
104
+ }
105
+ };
106
+
107
+ //#endregion
108
+ export { LLMEvaluator };
109
+ //# sourceMappingURL=evaluator.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evaluator.mjs","names":["z"],"sources":["../../../src/utils/evaluation/evaluator.ts"],"sourcesContent":["import { type Agent, AIAgent, AIGNE } from \"@aigne/core\";\nimport { z } from \"zod\";\nimport type { DatasetItem, Evaluation, Evaluator } from \"./type.js\";\n\nconst EVALUATOR_PROMPT = `\n# Instructions\nYou are an expert evaluator. Your task is to evaluate the quality of AI-generated responses.\nYou will be given:\n1. User Input (Prompt)\n2. AI-generated Output\n3. Expected Output\n\n## Evaluation Methods\nFollow these three correlation checks before assigning a score:\n1. **AI Output vs User Input**: Check if the AI response is relevant to the user input.\n2. **Expected Output vs User Input**: Check if the expected output is relevant to the user input.\n3. **AI Output vs Expected Output**: Check the similarity and alignment between the AI output and the expected output.\n\nThen assign a rating and a score based on the overall quality.\n\n## Criteria\n- **Instruction following**: Does the AI response follow the prompt’s requirements?\n- **Groundedness**: Is the AI response consistent with the expected output and free from irrelevant information?\n- **Completeness**: Does the AI response fully address the task?\n- **Accuracy/Correctness**: Is the AI response factually correct and logically consistent?\n- **Fluency**: Is the AI response clear, structured, and easy to read?\n\n## Rating Rubric (1–5)\n- **5 - Very Good**: Highly relevant, closely aligned with the expected output, accurate, complete, and fluent.\n- **4 - Good**: Relevant, mostly aligned with the expected output, generally accurate and complete, only minor issues.\n- **3 - Ok**: Somewhat relevant, partially aligned, or missing important details.\n- **2 - Bad**: Weak relevance, low similarity with expected output, contains significant errors or omissions.\n- **1 - Very Bad**: Irrelevant, fails to align with expected output, or completely incorrect.\n\n## Evaluation Steps\n1. Compare the **semantic content** of AI Output vs Expected Output.\n - Ignore JSON keys, object structure, formatting, whitespace, capitalization, and minor punctuation differences.\n - If meaning is the same but phrasing differs slightly, assign a higher score (4–5).\n - If AI output deviates significantly, assign a lower score (1–2).\n - If AI output is empty, assign a lower score (1–2).\n2. Assess against criteria: instruction following, groundedness, completeness, correctness, fluency.\n3. Assign a 1–5 integer score.\n4. Provide reasoning, and explicitly justify why this result is **not** a 1/2/3 case (why it avoids being a negative example).\n\n# Response Output Format\nYour output must strictly follow this three-line format:\n- First line: rating (Very Good, Good, Ok, Bad, Very Bad)\n- Second line: reasoning (must include justification why it is not a 1, 2, or 3 if scored higher)\n- Third line: SCORE: [1-5]\n\nExample:\nGood\nThe response follows most instructions and is largely consistent with the expected output, but it omits one detail. This prevents it from being 5. However, it is more accurate and complete than an \"Ok\" response, so it deserves 4.\nSCORE: 4\n\n# User Inputs and AI-generated Response\n### Input\n{{input}}\n\n### AI-generated Output\n{{output}}\n\n### Expected Output\n{{expectedOutput}}\n`;\n\nconst defaultAgent = AIAgent.from({\n name: \"LLMEvaluator\",\n instructions: EVALUATOR_PROMPT,\n inputSchema: z.object({\n input: z.string().describe(\"The input content to analyze\"),\n output: z.string().describe(\"The output content to analyze\"),\n expectedOutput: z.string().describe(\"The expected output content to analyze\"),\n }),\n outputSchema: z.object({\n rating: z\n .enum([\"Very Good\", \"Good\", \"Ok\", \"Bad\", \"Very Bad\"])\n .describe(\"The rating of the output\"),\n reasoning: z.string().describe(\"The reasoning of the rating, including justification\"),\n score: z.number().int().min(1).max(5).describe(\"The score of the output, 1–5, 5 is the best\"),\n }),\n});\n\nexport class LLMEvaluator implements Evaluator {\n name = \"llm-as-judge\";\n\n constructor(\n private readonly aigne: AIGNE = new AIGNE(),\n private readonly agent: Agent = defaultAgent,\n ) {}\n\n async evaluate(dataset: DatasetItem): Promise<Evaluation[]> {\n const result = await this.aigne.invoke(\n this.agent,\n {\n input:\n typeof dataset.input === \"string\"\n ? dataset.input\n : JSON.stringify(dataset.input, null, 2),\n output: dataset.output ? JSON.stringify(dataset.output, null, 2) : \"\",\n expectedOutput: JSON.stringify(dataset.expected, null, 2),\n },\n { returnMetadata: true },\n );\n\n return [\n {\n name: this.name,\n rating: result.rating,\n score: result.score,\n reason: result.reasoning,\n usage: result?.$meta?.usage ?? {},\n },\n ];\n }\n}\n"],"mappings":";;;;AAkEA,MAAM,eAAe,QAAQ,KAAK;CAChC,MAAM;CACN,cAhEuB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAiEvB,aAAaA,IAAE,OAAO;EACpB,OAAOA,IAAE,QAAQ,CAAC,SAAS,+BAA+B;EAC1D,QAAQA,IAAE,QAAQ,CAAC,SAAS,gCAAgC;EAC5D,gBAAgBA,IAAE,QAAQ,CAAC,SAAS,yCAAyC;EAC9E,CAAC;CACF,cAAcA,IAAE,OAAO;EACrB,QAAQA,IACL,KAAK;GAAC;GAAa;GAAQ;GAAM;GAAO;GAAW,CAAC,CACpD,SAAS,2BAA2B;EACvC,WAAWA,IAAE,QAAQ,CAAC,SAAS,uDAAuD;EACtF,OAAOA,IAAE,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,SAAS,8CAA8C;EAC9F,CAAC;CACH,CAAC;AAEF,IAAa,eAAb,MAA+C;CAC7C,OAAO;CAEP,YACE,AAAiB,QAAe,IAAI,OAAO,EAC3C,AAAiB,QAAe,cAChC;EAFiB;EACA;;CAGnB,MAAM,SAAS,SAA6C;EAC1D,MAAM,SAAS,MAAM,KAAK,MAAM,OAC9B,KAAK,OACL;GACE,OACE,OAAO,QAAQ,UAAU,WACrB,QAAQ,QACR,KAAK,UAAU,QAAQ,OAAO,MAAM,EAAE;GAC5C,QAAQ,QAAQ,SAAS,KAAK,UAAU,QAAQ,QAAQ,MAAM,EAAE,GAAG;GACnE,gBAAgB,KAAK,UAAU,QAAQ,UAAU,MAAM,EAAE;GAC1D,EACD,EAAE,gBAAgB,MAAM,CACzB;AAED,SAAO,CACL;GACE,MAAM,KAAK;GACX,QAAQ,OAAO;GACf,OAAO,OAAO;GACd,QAAQ,OAAO;GACf,OAAO,QAAQ,OAAO,SAAS,EAAE;GAClC,CACF"}