@exercode/problem-utils 1.5.5 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,2 @@
1
- "use strict";var e=require("../_virtual/_rollupPluginBabelHelpers.cjs"),r=require("node:fs"),s=require("node:path"),t=require("@ai-sdk/google"),n=require("ai"),a=require("zod"),i=require("../helpers/parseArgs.cjs"),o=require("../helpers/printTestCaseResult.cjs"),u=require("../helpers/readTestCases.cjs"),c=require("../types/decisionCode.cjs"),p=a.z.object({model:a.z.enum(["google/gemini-2.5-flash-lite"])});function l(){return(l=e.asyncToGenerator(e.regenerator().m(function a(l,d){var C,g,f,m,v,q,T,j,b,h,D,E,R,k,w,A,x,y;return e.regenerator().w(function(a){for(;;)switch(a.p=a.n){case 0:return C=i.parseArgs(process.argv),g=p.parse(C.params),a.n=1,u.readTestCases(s.join(l,"test_cases"));case 1:return f=a.v,a.n=2,r.promises.readFile(s.join(C.cwd,"prompt.txt"),"utf8");case 2:m=a.v,v=e.createForOfIteratorHelper(f),a.p=3,v.s();case 4:if((q=v.n()).done){a.n=11;break}return T=q.value,j=Date.now(),a.p=5,a.n=6,n.generateText({model:t.google(g.model.slice(7)),prompt:m.replaceAll("{input}",null!=(b=T.input)?b:"")});case 6:return h=a.v,D=h.text,E=Date.now(),w=e.objectSpread2,A={testCaseId:T.id,decisionCode:c.DecisionCode.ACCEPTED,stdin:T.input,stdout:D,timeSeconds:(E-j)/1e3},a.n=7,d.test({testCase:T,result:{output:D}});case 7:if(R=w(A,a.v),o.printTestCaseResult(R),R.decisionCode===c.DecisionCode.ACCEPTED){a.n=8;break}return a.a(3,11);case 8:a.n=10;break;case 9:return a.p=9,x=a.v,k=Date.now(),o.printTestCaseResult({testCaseId:T.id,decisionCode:c.DecisionCode.RUNTIME_ERROR,stdin:T.input,stderr:x instanceof Error?x.message:String(x),timeSeconds:(k-j)/1e3}),a.a(3,11);case 10:a.n=4;break;case 11:a.n=13;break;case 12:a.p=12,y=a.v,v.e(y);case 13:return a.p=13,v.f(),a.f(13);case 14:return a.a(2)}},a,null,[[5,9],[3,12,13,14]])}))).apply(this,arguments)}exports.llmJudgePreset=function(e,r){return l.apply(this,arguments)};
1
+ "use strict";var e=require("../_virtual/_rollupPluginBabelHelpers.cjs"),r=require("node:fs"),s=require("node:path"),t=require("@ai-sdk/google"),n=require("ai"),a=require("zod"),i=require("../helpers/parseArgs.cjs"),o=require("../helpers/printTestCaseResult.cjs"),u=require("../helpers/readTestCases.cjs"),c=require("../types/decisionCode.cjs"),l=a.z.object({model:a.z.enum(["google/gemini-2.5-flash-lite"])});function p(){return(p=e.asyncToGenerator(e.regenerator().m(function a(p,d){var C,g,m,f,v,q,T,b,j,h,D,E,R,k,w,A,P,x,y,I;return e.regenerator().w(function(a){for(;;)switch(a.p=a.n){case 0:return C=i.parseArgs(process.argv),g=l.parse(C.params),a.n=1,u.readTestCases(s.join(p,"test_cases"));case 1:return m=a.v,a.n=2,r.promises.readFile(s.join(C.cwd,"prompt.txt"),"utf8");case 2:f=a.v,v=e.createForOfIteratorHelper(m),a.p=3,v.s();case 4:if((q=v.n()).done){a.n=11;break}return T=q.value,b=Date.now(),a.p=5,a.n=6,n.generateText({model:t.google(g.model.slice(7)),prompt:null!=(j=null==(h=d.buildPrompt)?void 0:h.call(d,{prompt:f,testCase:T}))?j:f.replaceAll("{input}",null!=(D=T.input)?D:"")});case 6:return E=a.v,R=E.text,k=Date.now(),P=e.objectSpread2,x={testCaseId:T.id,decisionCode:c.DecisionCode.ACCEPTED,stdin:T.input,stdout:R,timeSeconds:(k-b)/1e3},a.n=7,d.test({testCase:T,result:{output:R}});case 7:if(w=P(x,a.v),o.printTestCaseResult(w),w.decisionCode===c.DecisionCode.ACCEPTED){a.n=8;break}return a.a(3,11);case 8:a.n=10;break;case 9:return a.p=9,y=a.v,A=Date.now(),o.printTestCaseResult({testCaseId:T.id,decisionCode:c.DecisionCode.RUNTIME_ERROR,stdin:T.input,stderr:y instanceof Error?y.message:String(y),timeSeconds:(A-b)/1e3}),a.a(3,11);case 10:a.n=4;break;case 11:a.n=13;break;case 12:a.p=12,I=a.v,v.e(I);case 13:return a.p=13,v.f(),a.f(13);case 14:return a.a(2)}},a,null,[[5,9],[3,12,13,14]])}))).apply(this,arguments)}exports.llmJudgePreset=function(e,r){return p.apply(this,arguments)};
2
2
  //# sourceMappingURL=llm.cjs.map
@@ -1 +1 @@
1
- {"version":3,"file":"llm.cjs","sources":["../../src/presets/llm.ts"],"sourcesContent":["import fs from 'node:fs';\nimport path from 'node:path';\n\nimport { google } from '@ai-sdk/google';\nimport { generateText } from 'ai';\nimport { z } from 'zod';\n\nimport { parseArgs } from '../helpers/parseArgs.js';\nimport { printTestCaseResult } from '../helpers/printTestCaseResult.js';\nimport { readTestCases } from '../helpers/readTestCases.js';\nimport { DecisionCode } from '../types/decisionCode.js';\nimport type { TestCaseResult } from '../types/testCaseResult.js';\n\nconst PROMPT_FILENAME = 'prompt.txt';\n\nconst judgeParamsSchema = z.object({\n model: z.enum(['google/gemini-2.5-flash-lite']),\n});\n\ninterface LlmJudgePresetOptions {\n test: (context: {\n testCase: { id: string; input?: string; output?: string };\n result: { output: string };\n }) => Partial<TestCaseResult> | Promise<Partial<TestCaseResult>>;\n}\n\n/**\n * A preset judge function for running and testing a user prompt in LLM.\n *\n * @example\n * Create `judge.ts`:\n * ```ts\n * import { llmJudgePreset } from '@exercode/problem-utils/presets/llm';\n * import { DecisionCode } from '@exercode/problem-utils';\n *\n * await llmJudgePreset(import.meta.dirname, {\n * test: (context) {\n * return { decisionCode: context.result.output ? DecisionCode.ACCEPTED : DecisionCode.WRONG_ANSWER };\n * }\n * });\n * ```\n *\n * Run with the required parameters:\n * ```bash\n * bun judge.ts model_answers/java '{ \"model\": \"gemini-2.5-flash-lite\" }'\n * ```\n */\nexport async function llmJudgePreset(problemDir: string, options: LlmJudgePresetOptions): Promise<void> {\n const args = parseArgs(process.argv);\n const params = judgeParamsSchema.parse(args.params);\n\n const testCases = await readTestCases(path.join(problemDir, 'test_cases'));\n\n const prompt = await fs.promises.readFile(path.join(args.cwd, PROMPT_FILENAME), 'utf8');\n\n for (const testCase of testCases) {\n const startTimeMilliseconds = Date.now();\n try {\n // requires `GOOGLE_GENERATIVE_AI_API_KEY`\n const { text } = await generateText({\n model: google(params.model.slice('google/'.length)),\n prompt: prompt.replaceAll('{input}', testCase.input ?? ''),\n });\n\n const stopTimeMilliseconds = Date.now();\n\n const testCaseResult = {\n testCaseId: testCase.id,\n decisionCode: DecisionCode.ACCEPTED,\n stdin: testCase.input,\n stdout: text,\n timeSeconds: (stopTimeMilliseconds - startTimeMilliseconds) / 1000,\n ...(await options.test({ testCase, result: { output: text } })),\n };\n\n printTestCaseResult(testCaseResult);\n\n if (testCaseResult.decisionCode !== DecisionCode.ACCEPTED) break;\n } catch (error) {\n const stopTimeMilliseconds = Date.now();\n\n printTestCaseResult({\n testCaseId: testCase.id,\n decisionCode: DecisionCode.RUNTIME_ERROR,\n stdin: testCase.input,\n stderr: error instanceof Error ? error.message : String(error),\n timeSeconds: (stopTimeMilliseconds - startTimeMilliseconds) / 1000,\n });\n\n break;\n }\n }\n}\n"],"names":["judgeParamsSchema","z","object","model","_llmJudgePreset","_asyncToGenerator","_regenerator","m","_callee","problemDir","options","args","params","testCases","prompt","_iterator","_step","testCase","startTimeMilliseconds","_testCase$input","_yield$generateText","text","stopTimeMilliseconds","testCaseResult","_stopTimeMilliseconds","_t","_t2","_t3","_t4","w","_context","p","n","parseArgs","process","argv","parse","readTestCases","path","join","v","fs","promises","readFile","cwd","_createForOfIteratorHelper","s","done","value","Date","now","generateText","google","slice","replaceAll","input","_objectSpread","testCaseId","id","decisionCode","DecisionCode","ACCEPTED","stdin","stdout","timeSeconds","test","result","output","printTestCaseResult","a","RUNTIME_ERROR","stderr","Error","message","String","e","f","apply","this","arguments","_x","_x2"],"mappings":"wVAeMA,EAAoBC,EAAAA,EAAEC,OAAO,CACjCC,MAAOF,EAAAA,EAAC,KAAM,CAAC,mCA4EhB,SAAAG,IAAA,OAAAA,EAAAC,EAAAA,iBAAAC,gBAAAC,EA7CM,SAAAC,EAA8BC,EAAoBC,GAA8B,IAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAA,OAAAtB,gBAAAuB,EAAA,SAAAC,GAAA,cAAAA,EAAAC,EAAAD,EAAAE,GAAA,KAAA,EAElC,OAD7CrB,EAAOsB,EAAAA,UAAUC,QAAQC,MACzBvB,EAASZ,EAAkBoC,MAAMzB,EAAKC,QAAOkB,EAAAE,EAAA,EAE3BK,EAAAA,cAAcC,EAAKC,KAAK9B,EAAY,eAAc,KAAA,EAA3D,OAATI,EAASiB,EAAAU,EAAAV,EAAAE,EAAA,EAEMS,EAAGC,SAASC,SAASL,EAAKC,KAAK5B,EAAKiC,IAxCnC,cAwC0D,QAAO,KAAA,EAAjF9B,EAAMgB,EAAAU,EAAAzB,EAAA8B,EAAAA,0BAEWhC,GAASiB,EAAAC,EAAA,EAAAhB,EAAA+B,IAAA,KAAA,EAAA,IAAA9B,EAAAD,EAAAiB,KAAAe,KAAA,CAAAjB,EAAAE,EAAA,GAAA,KAAA,CACU,OAD/Bf,EAAQD,EAAAgC,MACX9B,EAAwB+B,KAAKC,MAAKpB,EAAAC,EAAA,EAAAD,EAAAE,EAAA,EAGfmB,eAAa,CAClChD,MAAOiD,EAAAA,OAAOxC,EAAOT,MAAMkD,MAAM,IACjCvC,OAAQA,EAAOwC,WAAW,UAAyB,OAAhBnC,EAAEF,EAASsC,OAAKpC,EAAI,MACvD,KAAA,EASkE,OATlEC,EAAAU,EAAAU,EAHMnB,EAAID,EAAJC,KAKFC,EAAuB2B,KAAKC,MAAKzB,EAAA+B,EAAAA,cAAA9B,EAAA,CAGrC+B,WAAYxC,EAASyC,GACrBC,aAAcC,EAAAA,aAAaC,SAC3BC,MAAO7C,EAASsC,MAChBQ,OAAQ1C,EACR2C,aAAc1C,EAAuBJ,GAAyB,KAAIY,EAAAE,EAAA,EACxDtB,EAAQuD,KAAK,CAAEhD,SAAAA,EAAUiD,OAAQ,CAAEC,OAAQ9C,KAAS,KAAA,EAG5B,GAT9BE,EAAcE,EAAAC,EAAAI,EAAAU,GASpB4B,EAAAA,oBAAoB7C,GAEhBA,EAAeoC,eAAiBC,EAAAA,aAAaC,SAAQ,CAAA/B,EAAAE,EAAA,EAAA,KAAA,CAAA,OAAAF,EAAAuC,EAAA,EAAA,IAAA,KAAA,EAAAvC,EAAAE,EAAA,GAAA,MAAA,KAAA,EAUtD,OAVsDF,EAAAC,EAAA,EAAAJ,EAAAG,EAAAU,EAEnDlB,EAAuB2B,KAAKC,MAElCkB,sBAAoB,CAClBX,WAAYxC,EAASyC,GACrBC,aAAcC,EAAAA,aAAaU,cAC3BR,MAAO7C,EAASsC,MAChBgB,OAAQ5C,aAAiB6C,MAAQ7C,EAAM8C,QAAUC,OAAM/C,GACvDqC,aAAc1C,EAAuBJ,GAAyB,MAC7DY,EAAAuC,EAAA,EAAA,IAAA,KAAA,GAAAvC,EAAAE,EAAA,EAAA,MAAA,KAAA,GAAAF,EAAAE,EAAA,GAAA,MAAA,KAAA,GAAAF,EAAAC,EAAA,GAAAH,EAAAE,EAAAU,EAAAzB,EAAA4D,EAAA/C,GAAA,KAAA,GAAA,OAAAE,EAAAC,EAAA,GAAAhB,EAAA6D,IAAA9C,EAAA8C,EAAA,IAAA,KAAA,GAAA,OAAA9C,EAAAuC,EAAA,GAAA,EAAA7D,EAAA,KAAA,CAAA,CAAA,EAAA,GAAA,CAAA,EAAA,GAAA,GAAA,KAAA,KAKRqE,MAAAC,KAAAC,UAAA,wBA7CD,SAAoCC,EAAAC,GAAA,OAAA7E,EAAAyE,MAAAC,KAAAC,UAAA"}
1
+ {"version":3,"file":"llm.cjs","sources":["../../src/presets/llm.ts"],"sourcesContent":["import fs from 'node:fs';\nimport path from 'node:path';\n\nimport { google } from '@ai-sdk/google';\nimport type { ModelMessage } from 'ai';\nimport { generateText } from 'ai';\nimport { z } from 'zod';\n\nimport { parseArgs } from '../helpers/parseArgs.js';\nimport { printTestCaseResult } from '../helpers/printTestCaseResult.js';\nimport { readTestCases } from '../helpers/readTestCases.js';\nimport { DecisionCode } from '../types/decisionCode.js';\nimport type { TestCaseResult } from '../types/testCaseResult.js';\n\nconst PROMPT_FILENAME = 'prompt.txt';\n\nconst judgeParamsSchema = z.object({\n model: z.enum(['google/gemini-2.5-flash-lite']),\n});\n\ninterface LlmJudgePresetOptions {\n buildPrompt?: (context: {\n prompt: string;\n testCase: { id: string; input?: string; output?: string };\n }) => string | ModelMessage[];\n test: (context: {\n testCase: { id: string; input?: string; output?: string };\n result: { output: string };\n }) => Partial<TestCaseResult> | Promise<Partial<TestCaseResult>>;\n}\n\n/**\n * A preset judge function for running and testing a user prompt in LLM.\n *\n * @example\n * Create `judge.ts`:\n * ```ts\n * import { llmJudgePreset } from '@exercode/problem-utils/presets/llm';\n * import { DecisionCode } from '@exercode/problem-utils';\n *\n * await llmJudgePreset(import.meta.dirname, {\n * test: (context) {\n * return { decisionCode: context.result.output ? DecisionCode.ACCEPTED : DecisionCode.WRONG_ANSWER };\n * }\n * });\n * ```\n *\n * Run with the required parameters:\n * ```bash\n * bun judge.ts model_answers/java '{ \"model\": \"gemini-2.5-flash-lite\" }'\n * ```\n */\nexport async function llmJudgePreset(problemDir: string, options: LlmJudgePresetOptions): Promise<void> {\n const args = parseArgs(process.argv);\n const params = judgeParamsSchema.parse(args.params);\n\n const testCases = await readTestCases(path.join(problemDir, 'test_cases'));\n\n const prompt = await fs.promises.readFile(path.join(args.cwd, PROMPT_FILENAME), 'utf8');\n\n for (const testCase of testCases) {\n const startTimeMilliseconds = Date.now();\n try {\n // requires `GOOGLE_GENERATIVE_AI_API_KEY`\n const { text } = await generateText({\n model: google(params.model.slice('google/'.length)),\n prompt: options.buildPrompt?.({ prompt, testCase }) ?? prompt.replaceAll('{input}', testCase.input ?? ''),\n });\n\n const stopTimeMilliseconds = Date.now();\n\n const testCaseResult = {\n testCaseId: testCase.id,\n decisionCode: DecisionCode.ACCEPTED,\n stdin: testCase.input,\n stdout: text,\n timeSeconds: (stopTimeMilliseconds - startTimeMilliseconds) / 1000,\n ...(await options.test({ testCase, result: { output: text } })),\n };\n\n printTestCaseResult(testCaseResult);\n\n if (testCaseResult.decisionCode !== DecisionCode.ACCEPTED) break;\n } catch (error) {\n const stopTimeMilliseconds = Date.now();\n\n printTestCaseResult({\n testCaseId: testCase.id,\n decisionCode: DecisionCode.RUNTIME_ERROR,\n stdin: testCase.input,\n stderr: error instanceof Error ? error.message : String(error),\n timeSeconds: (stopTimeMilliseconds - startTimeMilliseconds) / 1000,\n });\n\n break;\n }\n }\n}\n"],"names":["judgeParamsSchema","z","object","model","_llmJudgePreset","_asyncToGenerator","_regenerator","m","_callee","problemDir","options","args","params","testCases","prompt","_iterator","_step","testCase","startTimeMilliseconds","_options$buildPrompt","_options$buildPrompt2","_testCase$input","_yield$generateText","text","stopTimeMilliseconds","testCaseResult","_stopTimeMilliseconds","_t","_t2","_t3","_t4","w","_context","p","n","parseArgs","process","argv","parse","readTestCases","path","join","v","fs","promises","readFile","cwd","_createForOfIteratorHelper","s","done","value","Date","now","generateText","google","slice","buildPrompt","call","replaceAll","input","_objectSpread","testCaseId","id","decisionCode","DecisionCode","ACCEPTED","stdin","stdout","timeSeconds","test","result","output","printTestCaseResult","a","RUNTIME_ERROR","stderr","Error","message","String","e","f","apply","this","arguments","_x","_x2"],"mappings":"wVAgBMA,EAAoBC,EAAAA,EAAEC,OAAO,CACjCC,MAAOF,EAAAA,EAAC,KAAM,CAAC,mCAgFhB,SAAAG,IAAA,OAAAA,EAAAC,EAAAA,iBAAAC,gBAAAC,EA7CM,SAAAC,EAA8BC,EAAoBC,GAA8B,IAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAA,OAAAxB,gBAAAyB,EAAA,SAAAC,GAAA,cAAAA,EAAAC,EAAAD,EAAAE,GAAA,KAAA,EAElC,OAD7CvB,EAAOwB,EAAAA,UAAUC,QAAQC,MACzBzB,EAASZ,EAAkBsC,MAAM3B,EAAKC,QAAOoB,EAAAE,EAAA,EAE3BK,EAAAA,cAAcC,EAAKC,KAAKhC,EAAY,eAAc,KAAA,EAA3D,OAATI,EAASmB,EAAAU,EAAAV,EAAAE,EAAA,EAEMS,EAAGC,SAASC,SAASL,EAAKC,KAAK9B,EAAKmC,IA5CnC,cA4C0D,QAAO,KAAA,EAAjFhC,EAAMkB,EAAAU,EAAA3B,EAAAgC,EAAAA,0BAEWlC,GAASmB,EAAAC,EAAA,EAAAlB,EAAAiC,IAAA,KAAA,EAAA,IAAAhC,EAAAD,EAAAmB,KAAAe,KAAA,CAAAjB,EAAAE,EAAA,GAAA,KAAA,CACU,OAD/BjB,EAAQD,EAAAkC,MACXhC,EAAwBiC,KAAKC,MAAKpB,EAAAC,EAAA,EAAAD,EAAAE,EAAA,EAGfmB,eAAa,CAClClD,MAAOmD,EAAAA,OAAO1C,EAAOT,MAAMoD,MAAM,IACjCzC,OAAmD,OAA7CK,EAAqB,OAArBC,EAAEV,EAAQ8C,kBAAW,EAAnBpC,EAAAqC,KAAA/C,EAAsB,CAAEI,OAAAA,EAAQG,SAAAA,KAAWE,EAAIL,EAAO4C,WAAW,UAAyB,OAAhBrC,EAAEJ,EAAS0C,OAAKtC,EAAI,MACtG,KAAA,EASkE,OATlEC,EAAAU,EAAAU,EAHMnB,EAAID,EAAJC,KAKFC,EAAuB2B,KAAKC,MAAKzB,EAAAiC,EAAAA,cAAAhC,EAAA,CAGrCiC,WAAY5C,EAAS6C,GACrBC,aAAcC,EAAAA,aAAaC,SAC3BC,MAAOjD,EAAS0C,MAChBQ,OAAQ5C,EACR6C,aAAc5C,EAAuBN,GAAyB,KAAIc,EAAAE,EAAA,EACxDxB,EAAQ2D,KAAK,CAAEpD,SAAAA,EAAUqD,OAAQ,CAAEC,OAAQhD,KAAS,KAAA,EAG5B,GAT9BE,EAAcE,EAAAC,EAAAI,EAAAU,GASpB8B,EAAAA,oBAAoB/C,GAEhBA,EAAesC,eAAiBC,EAAAA,aAAaC,SAAQ,CAAAjC,EAAAE,EAAA,EAAA,KAAA,CAAA,OAAAF,EAAAyC,EAAA,EAAA,IAAA,KAAA,EAAAzC,EAAAE,EAAA,GAAA,MAAA,KAAA,EAUtD,OAVsDF,EAAAC,EAAA,EAAAJ,EAAAG,EAAAU,EAEnDlB,EAAuB2B,KAAKC,MAElCoB,sBAAoB,CAClBX,WAAY5C,EAAS6C,GACrBC,aAAcC,EAAAA,aAAaU,cAC3BR,MAAOjD,EAAS0C,MAChBgB,OAAQ9C,aAAiB+C,MAAQ/C,EAAMgD,QAAUC,OAAMjD,GACvDuC,aAAc5C,EAAuBN,GAAyB,MAC7Dc,EAAAyC,EAAA,EAAA,IAAA,KAAA,GAAAzC,EAAAE,EAAA,EAAA,MAAA,KAAA,GAAAF,EAAAE,EAAA,GAAA,MAAA,KAAA,GAAAF,EAAAC,EAAA,GAAAH,EAAAE,EAAAU,EAAA3B,EAAAgE,EAAAjD,GAAA,KAAA,GAAA,OAAAE,EAAAC,EAAA,GAAAlB,EAAAiE,IAAAhD,EAAAgD,EAAA,IAAA,KAAA,GAAA,OAAAhD,EAAAyC,EAAA,GAAA,EAAAjE,EAAA,KAAA,CAAA,CAAA,EAAA,GAAA,CAAA,EAAA,GAAA,GAAA,KAAA,KAKRyE,MAAAC,KAAAC,UAAA,wBA7CD,SAAoCC,EAAAC,GAAA,OAAAjF,EAAA6E,MAAAC,KAAAC,UAAA"}
@@ -1,5 +1,14 @@
1
+ import type { ModelMessage } from 'ai';
1
2
  import type { TestCaseResult } from '../types/testCaseResult.js';
2
3
  interface LlmJudgePresetOptions {
4
+ buildPrompt?: (context: {
5
+ prompt: string;
6
+ testCase: {
7
+ id: string;
8
+ input?: string;
9
+ output?: string;
10
+ };
11
+ }) => string | ModelMessage[];
3
12
  test: (context: {
4
13
  testCase: {
5
14
  id: string;
@@ -1,2 +1,2 @@
1
- import{asyncToGenerator as e,regenerator as r,objectSpread2 as t,createForOfIteratorHelper as s}from"../_virtual/_rollupPluginBabelHelpers.js";import o from"node:fs";import n from"node:path";import{google as a}from"@ai-sdk/google";import{generateText as i}from"ai";import{z as p}from"zod";import{parseArgs as c}from"../helpers/parseArgs.js";import{printTestCaseResult as m}from"../helpers/printTestCaseResult.js";import{readTestCases as u}from"../helpers/readTestCases.js";import{DecisionCode as l}from"../types/decisionCode.js";var d=p.object({model:p.enum(["google/gemini-2.5-flash-lite"])});function f(e,r){return C.apply(this,arguments)}function C(){return(C=e(r().m(function e(p,f){var C,g,v,j,b,h,E,k,w,T,D,R,y,A,I,S,_,x;return r().w(function(e){for(;;)switch(e.p=e.n){case 0:return C=c(process.argv),g=d.parse(C.params),e.n=1,u(n.join(p,"test_cases"));case 1:return v=e.v,e.n=2,o.promises.readFile(n.join(C.cwd,"prompt.txt"),"utf8");case 2:j=e.v,b=s(v),e.p=3,b.s();case 4:if((h=b.n()).done){e.n=11;break}return E=h.value,k=Date.now(),e.p=5,e.n=6,i({model:a(g.model.slice(7)),prompt:j.replaceAll("{input}",null!=(w=E.input)?w:"")});case 6:return T=e.v,D=T.text,R=Date.now(),I=t,S={testCaseId:E.id,decisionCode:l.ACCEPTED,stdin:E.input,stdout:D,timeSeconds:(R-k)/1e3},e.n=7,f.test({testCase:E,result:{output:D}});case 7:if(y=I(S,e.v),m(y),y.decisionCode===l.ACCEPTED){e.n=8;break}return e.a(3,11);case 8:e.n=10;break;case 9:return e.p=9,_=e.v,A=Date.now(),m({testCaseId:E.id,decisionCode:l.RUNTIME_ERROR,stdin:E.input,stderr:_ instanceof Error?_.message:String(_),timeSeconds:(A-k)/1e3}),e.a(3,11);case 10:e.n=4;break;case 11:e.n=13;break;case 12:e.p=12,x=e.v,b.e(x);case 13:return e.p=13,b.f(),e.f(13);case 14:return e.a(2)}},e,null,[[5,9],[3,12,13,14]])}))).apply(this,arguments)}export{f as llmJudgePreset};
1
+ import{asyncToGenerator as e,regenerator as r,objectSpread2 as t,createForOfIteratorHelper as s}from"../_virtual/_rollupPluginBabelHelpers.js";import o from"node:fs";import n from"node:path";import{google as a}from"@ai-sdk/google";import{generateText as i}from"ai";import{z as p}from"zod";import{parseArgs as l}from"../helpers/parseArgs.js";import{printTestCaseResult as c}from"../helpers/printTestCaseResult.js";import{readTestCases as m}from"../helpers/readTestCases.js";import{DecisionCode as u}from"../types/decisionCode.js";var d=p.object({model:p.enum(["google/gemini-2.5-flash-lite"])});function f(e,r){return C.apply(this,arguments)}function C(){return(C=e(r().m(function e(p,f){var C,v,g,b,j,h,E,k,w,T,D,R,y,A,I,P,S,_,x,F;return r().w(function(e){for(;;)switch(e.p=e.n){case 0:return C=l(process.argv),v=d.parse(C.params),e.n=1,m(n.join(p,"test_cases"));case 1:return g=e.v,e.n=2,o.promises.readFile(n.join(C.cwd,"prompt.txt"),"utf8");case 2:b=e.v,j=s(g),e.p=3,j.s();case 4:if((h=j.n()).done){e.n=11;break}return E=h.value,k=Date.now(),e.p=5,e.n=6,i({model:a(v.model.slice(7)),prompt:null!=(w=null==(T=f.buildPrompt)?void 0:T.call(f,{prompt:b,testCase:E}))?w:b.replaceAll("{input}",null!=(D=E.input)?D:"")});case 6:return R=e.v,y=R.text,A=Date.now(),S=t,_={testCaseId:E.id,decisionCode:u.ACCEPTED,stdin:E.input,stdout:y,timeSeconds:(A-k)/1e3},e.n=7,f.test({testCase:E,result:{output:y}});case 7:if(I=S(_,e.v),c(I),I.decisionCode===u.ACCEPTED){e.n=8;break}return e.a(3,11);case 8:e.n=10;break;case 9:return e.p=9,x=e.v,P=Date.now(),c({testCaseId:E.id,decisionCode:u.RUNTIME_ERROR,stdin:E.input,stderr:x instanceof Error?x.message:String(x),timeSeconds:(P-k)/1e3}),e.a(3,11);case 10:e.n=4;break;case 11:e.n=13;break;case 12:e.p=12,F=e.v,j.e(F);case 13:return e.p=13,j.f(),e.f(13);case 14:return e.a(2)}},e,null,[[5,9],[3,12,13,14]])}))).apply(this,arguments)}export{f as llmJudgePreset};
2
2
  //# sourceMappingURL=llm.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"llm.js","sources":["../../src/presets/llm.ts"],"sourcesContent":["import fs from 'node:fs';\nimport path from 'node:path';\n\nimport { google } from '@ai-sdk/google';\nimport { generateText } from 'ai';\nimport { z } from 'zod';\n\nimport { parseArgs } from '../helpers/parseArgs.js';\nimport { printTestCaseResult } from '../helpers/printTestCaseResult.js';\nimport { readTestCases } from '../helpers/readTestCases.js';\nimport { DecisionCode } from '../types/decisionCode.js';\nimport type { TestCaseResult } from '../types/testCaseResult.js';\n\nconst PROMPT_FILENAME = 'prompt.txt';\n\nconst judgeParamsSchema = z.object({\n model: z.enum(['google/gemini-2.5-flash-lite']),\n});\n\ninterface LlmJudgePresetOptions {\n test: (context: {\n testCase: { id: string; input?: string; output?: string };\n result: { output: string };\n }) => Partial<TestCaseResult> | Promise<Partial<TestCaseResult>>;\n}\n\n/**\n * A preset judge function for running and testing a user prompt in LLM.\n *\n * @example\n * Create `judge.ts`:\n * ```ts\n * import { llmJudgePreset } from '@exercode/problem-utils/presets/llm';\n * import { DecisionCode } from '@exercode/problem-utils';\n *\n * await llmJudgePreset(import.meta.dirname, {\n * test: (context) {\n * return { decisionCode: context.result.output ? DecisionCode.ACCEPTED : DecisionCode.WRONG_ANSWER };\n * }\n * });\n * ```\n *\n * Run with the required parameters:\n * ```bash\n * bun judge.ts model_answers/java '{ \"model\": \"gemini-2.5-flash-lite\" }'\n * ```\n */\nexport async function llmJudgePreset(problemDir: string, options: LlmJudgePresetOptions): Promise<void> {\n const args = parseArgs(process.argv);\n const params = judgeParamsSchema.parse(args.params);\n\n const testCases = await readTestCases(path.join(problemDir, 'test_cases'));\n\n const prompt = await fs.promises.readFile(path.join(args.cwd, PROMPT_FILENAME), 'utf8');\n\n for (const testCase of testCases) {\n const startTimeMilliseconds = Date.now();\n try {\n // requires `GOOGLE_GENERATIVE_AI_API_KEY`\n const { text } = await generateText({\n model: google(params.model.slice('google/'.length)),\n prompt: prompt.replaceAll('{input}', testCase.input ?? ''),\n });\n\n const stopTimeMilliseconds = Date.now();\n\n const testCaseResult = {\n testCaseId: testCase.id,\n decisionCode: DecisionCode.ACCEPTED,\n stdin: testCase.input,\n stdout: text,\n timeSeconds: (stopTimeMilliseconds - startTimeMilliseconds) / 1000,\n ...(await options.test({ testCase, result: { output: text } })),\n };\n\n printTestCaseResult(testCaseResult);\n\n if (testCaseResult.decisionCode !== DecisionCode.ACCEPTED) break;\n } catch (error) {\n const stopTimeMilliseconds = Date.now();\n\n printTestCaseResult({\n testCaseId: testCase.id,\n decisionCode: DecisionCode.RUNTIME_ERROR,\n stdin: testCase.input,\n stderr: error instanceof Error ? error.message : String(error),\n timeSeconds: (stopTimeMilliseconds - startTimeMilliseconds) / 1000,\n });\n\n break;\n }\n }\n}\n"],"names":["judgeParamsSchema","z","object","model","llmJudgePreset","_x","_x2","_llmJudgePreset","apply","this","arguments","_asyncToGenerator","_regenerator","m","_callee","problemDir","options","args","params","testCases","prompt","_iterator","_step","testCase","startTimeMilliseconds","_testCase$input","_yield$generateText","text","stopTimeMilliseconds","testCaseResult","_stopTimeMilliseconds","_t","_t2","_t3","_t4","w","_context","p","n","parseArgs","process","argv","parse","readTestCases","path","join","v","fs","promises","readFile","cwd","_createForOfIteratorHelper","s","done","value","Date","now","generateText","google","slice","replaceAll","input","_objectSpread","testCaseId","id","decisionCode","DecisionCode","ACCEPTED","stdin","stdout","timeSeconds","test","result","output","printTestCaseResult","a","RUNTIME_ERROR","stderr","Error","message","String","e","f"],"mappings":"ihBAaA,IAEMA,EAAoBC,EAAEC,OAAO,CACjCC,MAAOF,EAAC,KAAM,CAAC,mCA+BjB,SAAsBG,EAAcC,EAAAC,GAAA,OAAAC,EAAAC,MAAAC,KAAAC,UAAA,CA6CnC,SAAAH,IAAA,OAAAA,EAAAI,EAAAC,IAAAC,EA7CM,SAAAC,EAA8BC,EAAoBC,GAA8B,IAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAA,OAAAtB,IAAAuB,EAAA,SAAAC,GAAA,cAAAA,EAAAC,EAAAD,EAAAE,GAAA,KAAA,EAElC,OAD7CrB,EAAOsB,EAAUC,QAAQC,MACzBvB,EAASlB,EAAkB0C,MAAMzB,EAAKC,QAAOkB,EAAAE,EAAA,EAE3BK,EAAcC,EAAKC,KAAK9B,EAAY,eAAc,KAAA,EAA3D,OAATI,EAASiB,EAAAU,EAAAV,EAAAE,EAAA,EAEMS,EAAGC,SAASC,SAASL,EAAKC,KAAK5B,EAAKiC,IAxCnC,cAwC0D,QAAO,KAAA,EAAjF9B,EAAMgB,EAAAU,EAAAzB,EAAA8B,EAEWhC,GAASiB,EAAAC,EAAA,EAAAhB,EAAA+B,IAAA,KAAA,EAAA,IAAA9B,EAAAD,EAAAiB,KAAAe,KAAA,CAAAjB,EAAAE,EAAA,GAAA,KAAA,CACU,OAD/Bf,EAAQD,EAAAgC,MACX9B,EAAwB+B,KAAKC,MAAKpB,EAAAC,EAAA,EAAAD,EAAAE,EAAA,EAGfmB,EAAa,CAClCtD,MAAOuD,EAAOxC,EAAOf,MAAMwD,MAAM,IACjCvC,OAAQA,EAAOwC,WAAW,UAAyB,OAAhBnC,EAAEF,EAASsC,OAAKpC,EAAI,MACvD,KAAA,EASkE,OATlEC,EAAAU,EAAAU,EAHMnB,EAAID,EAAJC,KAKFC,EAAuB2B,KAAKC,MAAKzB,EAAA+B,EAAA9B,EAAA,CAGrC+B,WAAYxC,EAASyC,GACrBC,aAAcC,EAAaC,SAC3BC,MAAO7C,EAASsC,MAChBQ,OAAQ1C,EACR2C,aAAc1C,EAAuBJ,GAAyB,KAAIY,EAAAE,EAAA,EACxDtB,EAAQuD,KAAK,CAAEhD,SAAAA,EAAUiD,OAAQ,CAAEC,OAAQ9C,KAAS,KAAA,EAG5B,GAT9BE,EAAcE,EAAAC,EAAAI,EAAAU,GASpB4B,EAAoB7C,GAEhBA,EAAeoC,eAAiBC,EAAaC,SAAQ,CAAA/B,EAAAE,EAAA,EAAA,KAAA,CAAA,OAAAF,EAAAuC,EAAA,EAAA,IAAA,KAAA,EAAAvC,EAAAE,EAAA,GAAA,MAAA,KAAA,EAUtD,OAVsDF,EAAAC,EAAA,EAAAJ,EAAAG,EAAAU,EAEnDlB,EAAuB2B,KAAKC,MAElCkB,EAAoB,CAClBX,WAAYxC,EAASyC,GACrBC,aAAcC,EAAaU,cAC3BR,MAAO7C,EAASsC,MAChBgB,OAAQ5C,aAAiB6C,MAAQ7C,EAAM8C,QAAUC,OAAM/C,GACvDqC,aAAc1C,EAAuBJ,GAAyB,MAC7DY,EAAAuC,EAAA,EAAA,IAAA,KAAA,GAAAvC,EAAAE,EAAA,EAAA,MAAA,KAAA,GAAAF,EAAAE,EAAA,GAAA,MAAA,KAAA,GAAAF,EAAAC,EAAA,GAAAH,EAAAE,EAAAU,EAAAzB,EAAA4D,EAAA/C,GAAA,KAAA,GAAA,OAAAE,EAAAC,EAAA,GAAAhB,EAAA6D,IAAA9C,EAAA8C,EAAA,IAAA,KAAA,GAAA,OAAA9C,EAAAuC,EAAA,GAAA,EAAA7D,EAAA,KAAA,CAAA,CAAA,EAAA,GAAA,CAAA,EAAA,GAAA,GAAA,KAAA,KAKRN,MAAAC,KAAAC,UAAA"}
1
+ {"version":3,"file":"llm.js","sources":["../../src/presets/llm.ts"],"sourcesContent":["import fs from 'node:fs';\nimport path from 'node:path';\n\nimport { google } from '@ai-sdk/google';\nimport type { ModelMessage } from 'ai';\nimport { generateText } from 'ai';\nimport { z } from 'zod';\n\nimport { parseArgs } from '../helpers/parseArgs.js';\nimport { printTestCaseResult } from '../helpers/printTestCaseResult.js';\nimport { readTestCases } from '../helpers/readTestCases.js';\nimport { DecisionCode } from '../types/decisionCode.js';\nimport type { TestCaseResult } from '../types/testCaseResult.js';\n\nconst PROMPT_FILENAME = 'prompt.txt';\n\nconst judgeParamsSchema = z.object({\n model: z.enum(['google/gemini-2.5-flash-lite']),\n});\n\ninterface LlmJudgePresetOptions {\n buildPrompt?: (context: {\n prompt: string;\n testCase: { id: string; input?: string; output?: string };\n }) => string | ModelMessage[];\n test: (context: {\n testCase: { id: string; input?: string; output?: string };\n result: { output: string };\n }) => Partial<TestCaseResult> | Promise<Partial<TestCaseResult>>;\n}\n\n/**\n * A preset judge function for running and testing a user prompt in LLM.\n *\n * @example\n * Create `judge.ts`:\n * ```ts\n * import { llmJudgePreset } from '@exercode/problem-utils/presets/llm';\n * import { DecisionCode } from '@exercode/problem-utils';\n *\n * await llmJudgePreset(import.meta.dirname, {\n * test: (context) {\n * return { decisionCode: context.result.output ? DecisionCode.ACCEPTED : DecisionCode.WRONG_ANSWER };\n * }\n * });\n * ```\n *\n * Run with the required parameters:\n * ```bash\n * bun judge.ts model_answers/java '{ \"model\": \"gemini-2.5-flash-lite\" }'\n * ```\n */\nexport async function llmJudgePreset(problemDir: string, options: LlmJudgePresetOptions): Promise<void> {\n const args = parseArgs(process.argv);\n const params = judgeParamsSchema.parse(args.params);\n\n const testCases = await readTestCases(path.join(problemDir, 'test_cases'));\n\n const prompt = await fs.promises.readFile(path.join(args.cwd, PROMPT_FILENAME), 'utf8');\n\n for (const testCase of testCases) {\n const startTimeMilliseconds = Date.now();\n try {\n // requires `GOOGLE_GENERATIVE_AI_API_KEY`\n const { text } = await generateText({\n model: google(params.model.slice('google/'.length)),\n prompt: options.buildPrompt?.({ prompt, testCase }) ?? prompt.replaceAll('{input}', testCase.input ?? ''),\n });\n\n const stopTimeMilliseconds = Date.now();\n\n const testCaseResult = {\n testCaseId: testCase.id,\n decisionCode: DecisionCode.ACCEPTED,\n stdin: testCase.input,\n stdout: text,\n timeSeconds: (stopTimeMilliseconds - startTimeMilliseconds) / 1000,\n ...(await options.test({ testCase, result: { output: text } })),\n };\n\n printTestCaseResult(testCaseResult);\n\n if (testCaseResult.decisionCode !== DecisionCode.ACCEPTED) break;\n } catch (error) {\n const stopTimeMilliseconds = Date.now();\n\n printTestCaseResult({\n testCaseId: testCase.id,\n decisionCode: DecisionCode.RUNTIME_ERROR,\n stdin: testCase.input,\n stderr: error instanceof Error ? error.message : String(error),\n timeSeconds: (stopTimeMilliseconds - startTimeMilliseconds) / 1000,\n });\n\n break;\n }\n }\n}\n"],"names":["judgeParamsSchema","z","object","model","llmJudgePreset","_x","_x2","_llmJudgePreset","apply","this","arguments","_asyncToGenerator","_regenerator","m","_callee","problemDir","options","args","params","testCases","prompt","_iterator","_step","testCase","startTimeMilliseconds","_options$buildPrompt","_options$buildPrompt2","_testCase$input","_yield$generateText","text","stopTimeMilliseconds","testCaseResult","_stopTimeMilliseconds","_t","_t2","_t3","_t4","w","_context","p","n","parseArgs","process","argv","parse","readTestCases","path","join","v","fs","promises","readFile","cwd","_createForOfIteratorHelper","s","done","value","Date","now","generateText","google","slice","buildPrompt","call","replaceAll","input","_objectSpread","testCaseId","id","decisionCode","DecisionCode","ACCEPTED","stdin","stdout","timeSeconds","test","result","output","printTestCaseResult","a","RUNTIME_ERROR","stderr","Error","message","String","e","f"],"mappings":"ihBAcA,IAEMA,EAAoBC,EAAEC,OAAO,CACjCC,MAAOF,EAAC,KAAM,CAAC,mCAmCjB,SAAsBG,EAAcC,EAAAC,GAAA,OAAAC,EAAAC,MAAAC,KAAAC,UAAA,CA6CnC,SAAAH,IAAA,OAAAA,EAAAI,EAAAC,IAAAC,EA7CM,SAAAC,EAA8BC,EAAoBC,GAA8B,IAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAAC,EAAA,OAAAxB,IAAAyB,EAAA,SAAAC,GAAA,cAAAA,EAAAC,EAAAD,EAAAE,GAAA,KAAA,EAElC,OAD7CvB,EAAOwB,EAAUC,QAAQC,MACzBzB,EAASlB,EAAkB4C,MAAM3B,EAAKC,QAAOoB,EAAAE,EAAA,EAE3BK,EAAcC,EAAKC,KAAKhC,EAAY,eAAc,KAAA,EAA3D,OAATI,EAASmB,EAAAU,EAAAV,EAAAE,EAAA,EAEMS,EAAGC,SAASC,SAASL,EAAKC,KAAK9B,EAAKmC,IA5CnC,cA4C0D,QAAO,KAAA,EAAjFhC,EAAMkB,EAAAU,EAAA3B,EAAAgC,EAEWlC,GAASmB,EAAAC,EAAA,EAAAlB,EAAAiC,IAAA,KAAA,EAAA,IAAAhC,EAAAD,EAAAmB,KAAAe,KAAA,CAAAjB,EAAAE,EAAA,GAAA,KAAA,CACU,OAD/BjB,EAAQD,EAAAkC,MACXhC,EAAwBiC,KAAKC,MAAKpB,EAAAC,EAAA,EAAAD,EAAAE,EAAA,EAGfmB,EAAa,CAClCxD,MAAOyD,EAAO1C,EAAOf,MAAM0D,MAAM,IACjCzC,OAAmD,OAA7CK,EAAqB,OAArBC,EAAEV,EAAQ8C,kBAAW,EAAnBpC,EAAAqC,KAAA/C,EAAsB,CAAEI,OAAAA,EAAQG,SAAAA,KAAWE,EAAIL,EAAO4C,WAAW,UAAyB,OAAhBrC,EAAEJ,EAAS0C,OAAKtC,EAAI,MACtG,KAAA,EASkE,OATlEC,EAAAU,EAAAU,EAHMnB,EAAID,EAAJC,KAKFC,EAAuB2B,KAAKC,MAAKzB,EAAAiC,EAAAhC,EAAA,CAGrCiC,WAAY5C,EAAS6C,GACrBC,aAAcC,EAAaC,SAC3BC,MAAOjD,EAAS0C,MAChBQ,OAAQ5C,EACR6C,aAAc5C,EAAuBN,GAAyB,KAAIc,EAAAE,EAAA,EACxDxB,EAAQ2D,KAAK,CAAEpD,SAAAA,EAAUqD,OAAQ,CAAEC,OAAQhD,KAAS,KAAA,EAG5B,GAT9BE,EAAcE,EAAAC,EAAAI,EAAAU,GASpB8B,EAAoB/C,GAEhBA,EAAesC,eAAiBC,EAAaC,SAAQ,CAAAjC,EAAAE,EAAA,EAAA,KAAA,CAAA,OAAAF,EAAAyC,EAAA,EAAA,IAAA,KAAA,EAAAzC,EAAAE,EAAA,GAAA,MAAA,KAAA,EAUtD,OAVsDF,EAAAC,EAAA,EAAAJ,EAAAG,EAAAU,EAEnDlB,EAAuB2B,KAAKC,MAElCoB,EAAoB,CAClBX,WAAY5C,EAAS6C,GACrBC,aAAcC,EAAaU,cAC3BR,MAAOjD,EAAS0C,MAChBgB,OAAQ9C,aAAiB+C,MAAQ/C,EAAMgD,QAAUC,OAAMjD,GACvDuC,aAAc5C,EAAuBN,GAAyB,MAC7Dc,EAAAyC,EAAA,EAAA,IAAA,KAAA,GAAAzC,EAAAE,EAAA,EAAA,MAAA,KAAA,GAAAF,EAAAE,EAAA,GAAA,MAAA,KAAA,GAAAF,EAAAC,EAAA,GAAAH,EAAAE,EAAAU,EAAA3B,EAAAgE,EAAAjD,GAAA,KAAA,GAAA,OAAAE,EAAAC,EAAA,GAAAlB,EAAAiE,IAAAhD,EAAAgD,EAAA,IAAA,KAAA,GAAA,OAAAhD,EAAAyC,EAAA,GAAA,EAAAjE,EAAA,KAAA,CAAA,CAAA,EAAA,GAAA,CAAA,EAAA,GAAA,GAAA,KAAA,KAKRN,MAAAC,KAAAC,UAAA"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@exercode/problem-utils",
3
- "version": "1.5.5",
3
+ "version": "1.6.0",
4
4
  "description": ":100: A set of utilities for judging programs on Exercode (https://exercode.willbooster.com/).",
5
5
  "keywords": [
6
6
  "exercode",