even-pf 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bun.lock CHANGED
@@ -14,11 +14,11 @@
14
14
  "@types/bun": "latest",
15
15
  },
16
16
  "optionalDependencies": {
17
- "even-pf-darwin-arm64": "0.3.4",
18
- "even-pf-darwin-x64": "0.3.4",
19
- "even-pf-linux-arm64": "0.3.4",
20
- "even-pf-linux-x64": "0.3.4",
21
- "even-pf-windows-x64": "0.3.4",
17
+ "even-pf-darwin-arm64": "0.4.2",
18
+ "even-pf-darwin-x64": "0.4.2",
19
+ "even-pf-linux-arm64": "0.4.2",
20
+ "even-pf-linux-x64": "0.4.2",
21
+ "even-pf-windows-x64": "0.4.2",
22
22
  },
23
23
  "peerDependencies": {
24
24
  "typescript": "^5.9.3",
@@ -36,16 +36,6 @@
36
36
 
37
37
  "chalk": ["chalk@5.6.2", "", {}, "sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA=="],
38
38
 
39
- "even-pf-darwin-arm64": ["even-pf-darwin-arm64@0.3.4", "", { "os": "darwin", "cpu": "arm64", "bin": { "even-pf": "bin/even-pf" } }, "sha512-x2vTM0ogvlFhUiHqb13kXJTKPRPU/VdoZa1G51c3IHsZz7wdDpkD/DxcEvxAmO28MbJtfjxig8nRFMvld5J6jg=="],
40
-
41
- "even-pf-darwin-x64": ["even-pf-darwin-x64@0.3.4", "", { "os": "darwin", "cpu": "x64", "bin": { "even-pf": "bin/even-pf" } }, "sha512-L2hzBvSLFcWMB/MJQeZTQHI8mqpGMQ7T0tSPXjv4S1tFglF8ZtdxggDAhmItEyyqVfsAT6LY+HyOpJnUAga9tg=="],
42
-
43
- "even-pf-linux-arm64": ["even-pf-linux-arm64@0.3.4", "", { "os": "linux", "cpu": "arm64", "bin": { "even-pf": "bin/even-pf" } }, "sha512-/5nLtKs+8xvTHEkrVPQQ5XQBTKROmF42z6+fo4AOkOj/TbDGwCher6RYYMHQ6pD7M0jjF5AdSlj5HLEGf/N9Qg=="],
44
-
45
- "even-pf-linux-x64": ["even-pf-linux-x64@0.3.4", "", { "os": "linux", "cpu": "x64", "bin": { "even-pf": "bin/even-pf" } }, "sha512-UN0wz2svjcjckugzFyc4tHxllrTM7IScSmnLDq5z9AB5cplHZrvAg8cYcvz20YEcHsr7aUkxrhA7iDv5KKYhkA=="],
46
-
47
- "even-pf-windows-x64": ["even-pf-windows-x64@0.3.4", "", { "os": "win32", "cpu": "x64", "bin": { "even-pf": "bin/even-pf.exe" } }, "sha512-ni84uLUdo95TlACDUyz7Ia7+4wigSByvUuR+IrXbLzkN90mZTsJoZVbAoJMR8CnOlPPEClcPHqkTcYl1lbLOwA=="],
48
-
49
39
  "smol-toml": ["smol-toml@1.6.0", "", {}, "sha512-4zemZi0HvTnYwLfrpk/CF9LOd9Lt87kAt50GnqhMpyF9U3poDAP2+iukq2bZsO/ufegbYehBkqINbsWxj4l4cw=="],
50
40
 
51
41
  "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],
package/epf.example.toml CHANGED
@@ -12,6 +12,8 @@ top_p = 1
12
12
  frequency_penalty = 0
13
13
  presence_penalty = 0
14
14
  reasoning_effort = "high"
15
+ max_retries = 1
16
+ retry_delay_ms = 1000
15
17
 
16
18
  [llm.models.output_comparison]
17
19
  sdk = "openrouter"
@@ -22,6 +24,8 @@ top_p = 1
22
24
  frequency_penalty = 0
23
25
  presence_penalty = 0
24
26
  reasoning_effort = "high"
27
+ max_retries = 1
28
+ retry_delay_ms = 1000
25
29
 
26
30
  [llm.prompt_replacement]
27
31
  role = "role_placeholder"
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "even-pf",
3
- "version": "0.4.1",
3
+ "version": "0.4.2",
4
4
  "description": "AI-assisted responsible grading tool for programming assignments",
5
5
  "module": "src/cli.ts",
6
6
  "type": "module",
@@ -31,11 +31,11 @@
31
31
  "zod-defaults": "^0.2.3"
32
32
  },
33
33
  "optionalDependencies": {
34
- "even-pf-linux-x64": "0.4.1",
35
- "even-pf-linux-arm64": "0.4.1",
36
- "even-pf-windows-x64": "0.4.1",
37
- "even-pf-darwin-x64": "0.4.1",
38
- "even-pf-darwin-arm64": "0.4.1"
34
+ "even-pf-linux-x64": "0.4.2",
35
+ "even-pf-linux-arm64": "0.4.2",
36
+ "even-pf-windows-x64": "0.4.2",
37
+ "even-pf-darwin-x64": "0.4.2",
38
+ "even-pf-darwin-arm64": "0.4.2"
39
39
  },
40
40
  "files": [
41
41
  "bin/even-pf.js",
package/src/cli.ts CHANGED
@@ -4,6 +4,7 @@ import "./version.ts";
4
4
 
5
5
  import {OpenRouter} from "@openrouter/sdk";
6
6
 
7
+ import {ARGS} from "./util/args.ts";
7
8
  import {CONFIG} from "./util/config.ts";
8
9
  import {executeTestingWorkflow} from "./workflow/testing-workflow.ts";
9
10
  import {executeAnalysisWorkflow} from "./workflow/analysis-workflow.ts";
@@ -20,36 +21,65 @@ const workflowDependencies: WorkflowDependencies = {
20
21
  }
21
22
 
22
23
  // Parallelize workflows with Promise.allSettled
23
- const analysisWorkflows = CONFIG.analysis_workflows;
24
- const testingWorkflows = CONFIG.testing_workflows;
25
- console.log(`Starting execution of ${analysisWorkflows.length} workflows...`);
26
- console.log(analysisWorkflows.map((w) => w.slug));
27
- let workflowRuns: Promise<void>[] = [];
24
+ const onlySlugs: string[] | undefined = ARGS.values.only_workflows;
25
+ const skipSlugs: string[] | undefined = ARGS.values.skip_workflow;
26
+
27
+ function applyWorkflowFilters<T extends { slug: string }>(workflows: T[]): T[] {
28
+ let filtered = workflows;
29
+ if (onlySlugs && onlySlugs.length > 0) {
30
+ filtered = filtered.filter((w) => {
31
+ if (onlySlugs.includes(w.slug)) {
32
+ return true;
33
+ }
34
+ console.log(`Skipping workflow '${w.slug}' (not in --only_workflows list)`);
35
+ return false;
36
+ });
37
+ }
38
+ if (skipSlugs && skipSlugs.length > 0) {
39
+ filtered = filtered.filter((w) => {
40
+ if (skipSlugs.includes(w.slug)) {
41
+ console.log(`Skipping workflow '${w.slug}' (matched --skip_workflow)`);
42
+ return false;
43
+ }
44
+ return true;
45
+ });
46
+ }
47
+ return filtered;
48
+ }
49
+
50
+ const analysisWorkflows = applyWorkflowFilters(CONFIG.analysis_workflows);
51
+ const testingWorkflows = applyWorkflowFilters(CONFIG.testing_workflows);
52
+ console.log(`Starting execution of ${analysisWorkflows.length} analysis + ${testingWorkflows.length} testing workflows...`);
53
+ console.log([...analysisWorkflows, ...testingWorkflows].map((w) => w.slug));
54
+ const workflowRuns: Promise<void>[] = [];
55
+ const workflowRunSlugs: string[] = [];
28
56
  analysisWorkflows.forEach((workflow) => {
29
57
  for (let i = 0; i < workflow.runs; i++) {
30
58
  workflowRuns.push(executeAnalysisWorkflow(workflow, i+1, workflowDependencies));
59
+ workflowRunSlugs.push(workflow.slug);
31
60
  }
32
61
  });
33
62
  testingWorkflows.forEach((workflow) => {
34
63
  for (let i = 0; i < workflow.runs; i++) {
35
64
  workflowRuns.push(executeTestingWorkflow(workflow, i+1, workflowDependencies));
65
+ workflowRunSlugs.push(workflow.slug);
36
66
  }
37
67
  });
38
- workflowDependencies.outputViewer.display(); // For start the server early.
68
+ workflowDependencies.outputViewer.display(); // Start the server early.
39
69
  const workflowsResults = await Promise.allSettled(workflowRuns);
40
70
  // Summarize with indices to include slugs in failure logs
41
71
  const failedIndices: number[] = [];
42
72
  const succeededIndices: number[] = [];
43
73
  workflowsResults.forEach((r, i) => {
44
- if (r.status === "rejected") failedIndices.push(i);
45
- else succeededIndices.push(i);
74
+ if (r.status === "rejected") { failedIndices.push(i); }
75
+ else { succeededIndices.push(i); }
46
76
  });
47
77
 
48
78
  console.log(`Workflows completed. Succeeded: ${succeededIndices.length}; Failed: ${failedIndices.length}`);
49
79
  if (failedIndices.length > 0) {
50
80
  failedIndices.forEach((i) => {
51
81
  const r = workflowsResults[i] as PromiseRejectedResult;
52
- const slug = analysisWorkflows[i]?.slug ?? `#${i + 1}`;
82
+ const slug = workflowRunSlugs[i] ?? `#${i + 1}`;
53
83
  console.warn(`Workflow '${slug}' failed:`, r.reason);
54
84
  });
55
85
  }
package/src/util/args.ts CHANGED
@@ -1,4 +1,4 @@
1
- import {parseArgs} from "util";
1
+ import { parseArgs } from "util";
2
2
 
3
3
  // console.log(Bun.argv);
4
4
  export const ARGS = parseArgs({
@@ -23,6 +23,11 @@ export const ARGS = parseArgs({
23
23
  short: "S",
24
24
  multiple: true,
25
25
  },
26
+ only_workflows: {
27
+ type: "string",
28
+ short: "O",
29
+ multiple: true,
30
+ },
26
31
  completion_inputs_destination: {
27
32
  type: "string",
28
33
  },
@@ -1,4 +1,4 @@
1
- import {z} from "zod";
1
+ import { z } from "zod";
2
2
 
3
3
  export enum OutputViewingModeEnum {
4
4
  Local = "local",
@@ -20,6 +20,8 @@ export const ModelConfigSchema = z.object({
20
20
  frequency_penalty: z.number().min(-2).max(2).default(0),
21
21
  presence_penalty: z.number().min(-2).max(2).default(0),
22
22
  reasoning_effort: z.enum(["low", "medium", "high"]).default("high"),
23
+ max_retries: z.number().min(0).default(1), // 0 for no retry
24
+ retry_delay_ms: z.number().min(0).default(1000),
23
25
  });
24
26
 
25
27
  export const LLMConfigSchema = z.object({
@@ -45,7 +47,7 @@ export const AnalysisWorkflowEntrySchema = BaseWorkflowEntrySchema.extend({
45
47
  prompt: z.string(),
46
48
  })
47
49
 
48
- export enum LLMJudgeInputModeEnum{
50
+ export enum LLMJudgeInputModeEnum {
49
51
  None = "NONE",
50
52
  Diff = "DIFF",
51
53
  Full = "FULL",
package/src/util/llm.ts CHANGED
@@ -5,6 +5,10 @@ import type {WorkflowDependencies} from "../workflow";
5
5
  import {recordCompletionInput} from "./eval-harness.ts";
6
6
 
7
7
 
8
+ async function delay(ms: number): Promise<void> {
9
+ return new Promise(resolve => setTimeout(resolve, ms));
10
+ }
11
+
8
12
  export async function generateCompletion(deps: WorkflowDependencies,
9
13
  log: (..._: any[])=>void,
10
14
  warn: (..._: any[])=>void,
@@ -15,31 +19,30 @@ export async function generateCompletion(deps: WorkflowDependencies,
15
19
  if (!modelSettings) {
16
20
  throw new Error(`No model settings found for model "${model}"`);
17
21
  }
18
-
22
+
19
23
  let replacedCount = 0;
20
24
  for (const [replacementKey, replacementValue] of Object.entries(CONFIG.llm.prompt_replacement)) {
21
- if (systemPrompt.includes(replacementKey)) {replacedCount++}
25
+ if (systemPrompt.includes(replacementKey)) {replacedCount++;}
22
26
  systemPrompt = systemPrompt.replaceAll(`{{${replacementKey}}}`, replacementValue);
23
27
  if (typeof content === "string") {
24
- if (content.includes(replacementKey)) {replacedCount++}
28
+ if (content.includes(replacementKey)) {replacedCount++;}
25
29
  content = content.replaceAll(`{{${replacementKey}}}`, replacementValue);
26
30
  }
27
31
  else {
28
32
  for (let i = 0; i < content.length; i++) {
29
33
  const element = content[i];
30
34
  if (element && "type" in element && element.type === "text" && typeof element.text === "string") {
31
- if (element.text.includes(replacementKey)) {replacedCount++}
35
+ if (element.text.includes(replacementKey)) {replacedCount++;}
32
36
  content[i] = {
33
37
  ...element,
34
38
  text: element.text.replaceAll(`{{${replacementKey}}}`, replacementValue),
35
- }
39
+ };
36
40
  }
37
41
  }
38
-
39
42
  }
40
43
  }
41
44
  log(`Replaced ${replacedCount} instances of prompt variables in system prompt and content`);
42
-
45
+
43
46
  let messages: (SystemMessage | UserMessage)[] = [
44
47
  {
45
48
  role: "system",
@@ -51,30 +54,64 @@ export async function generateCompletion(deps: WorkflowDependencies,
51
54
  }
52
55
  ];
53
56
  setTimeout(async ()=> await recordCompletionInput(messages), 5);
54
-
55
- log("Sending chat completion request...");
56
- let startTime = Date.now();
57
- let completion = await deps.openRouter.chat.send({
58
- model: modelSettings.model_name,
59
- maxCompletionTokens: modelSettings.max_completion_tokens,
60
- messages: messages,
61
- stream: false,
62
- seed: deps.seed,
63
- frequencyPenalty: modelSettings.frequency_penalty,
64
- presencePenalty: modelSettings.presence_penalty,
65
- temperature: modelSettings.temperature,
66
- reasoning: {
67
- effort: modelSettings.reasoning_effort,
68
- },
69
- });
70
- log(`Completion response generated in ${(Date.now() - startTime) / 1000} seconds`);
71
- if (completion.choices.length < 1){
72
- warn("No choices returned from completion");
73
- console.log(completion);
57
+
58
+ const maxRetries = modelSettings.max_retries;
59
+ const retryDelayMs = modelSettings.retry_delay_ms;
60
+ const totalAttempts = maxRetries + 1;
61
+
62
+ let lastError: unknown = null;
63
+
64
+ for (let attempt = 0; attempt < totalAttempts; attempt++) {
65
+ const attemptLabel = `${attempt + 1}/${totalAttempts}`;
66
+
67
+ if (attempt > 0) {
68
+ const backoffMs = retryDelayMs * (2 ** (attempt - 1)) + Math.random() * 200;
69
+ warn(`Retrying after ${Math.round(backoffMs)}ms (attempt ${attemptLabel})...`);
70
+ await delay(backoffMs);
71
+ }
72
+
73
+ log(`Sending chat completion request (attempt ${attemptLabel})...`);
74
+ let startTime = Date.now();
75
+
76
+ try {
77
+ let completion = await deps.openRouter.chat.send({
78
+ model: modelSettings.model_name,
79
+ maxCompletionTokens: modelSettings.max_completion_tokens,
80
+ messages: messages,
81
+ stream: false,
82
+ seed: deps.seed,
83
+ frequencyPenalty: modelSettings.frequency_penalty,
84
+ presencePenalty: modelSettings.presence_penalty,
85
+ temperature: modelSettings.temperature,
86
+ reasoning: {
87
+ effort: modelSettings.reasoning_effort,
88
+ },
89
+ });
90
+ log(`Completion response received in ${(Date.now() - startTime) / 1000}s (attempt ${attemptLabel})`);
91
+
92
+ const text = completion.choices[0]?.message.content?.toString() ?? "";
93
+
94
+ if (completion.choices.length < 1 || text.length === 0) {
95
+ warn(`Empty completion on attempt ${attemptLabel}`);
96
+ console.log(completion);
97
+ // Retry if attempts remain; otherwise return empty
98
+ if (attempt < maxRetries) {
99
+ continue;
100
+ }
101
+ warn("Exhausted all retries — returning empty completion");
102
+ return {text: "", model: completion.model};
103
+ }
104
+
105
+ return {text, model: completion.model};
106
+
107
+ } catch (error) {
108
+ const message = error instanceof Error ? error.message : String(error);
109
+ warn(`Chat completion error on attempt ${attemptLabel}: ${message}`);
110
+ lastError = error;
111
+ // Loop continues to next attempt (or exits if this was the last)
112
+ }
74
113
  }
75
-
76
- return {
77
- text: completion.choices[0]?.message.content?.toString() ?? "",
78
- model: completion.model,
79
- };
114
+
115
+ warn("Exhausted all retries due to errors — re-throwing last error");
116
+ throw lastError;
80
117
  }
@@ -6,6 +6,7 @@ import {OutputViewingModeEnum} from "./config-schema.ts";
6
6
  type FileRecord = {
7
7
  type: "markdown" | "text";
8
8
  content: string;
9
+ modification_time: Date
9
10
  }
10
11
 
11
12
  const CORS_HEADERS = {
@@ -25,16 +26,17 @@ function jsonResponse(data: unknown, status = 200): Response {
25
26
  }
26
27
 
27
28
  export class OutputViewer {
28
- filesRecords: Record<string, FileRecord> = {};
29
+ fileRecords: Record<string, FileRecord> = {};
29
30
  displayed: boolean = false;
30
31
 
31
- addFile(filename: string, _: FileRecord): void {
32
- this.filesRecords[filename] = _;
32
+ addFile(filename: string, fileRecord: Omit<FileRecord, "modification_time">): void {
33
+ this.fileRecords[filename] = {
34
+ ...fileRecord,
35
+ modification_time: new Date(),
36
+ };
33
37
  }
34
38
 
35
39
  serve(): string {
36
- let files = Object.entries(this.filesRecords).sort((a, b) => a[0].localeCompare(b[0]));
37
-
38
40
  let server = Bun.serve({
39
41
  port: CONFIG.output_viewing.api_port,
40
42
  routes: {
@@ -42,10 +44,12 @@ export class OutputViewer {
42
44
  if (req.method === "OPTIONS") {
43
45
  return new Response(null, { status: 204, headers: CORS_HEADERS });
44
46
  }
47
+ let files = Object.entries(this.fileRecords).sort((a, b) => a[0].localeCompare(b[0]));
45
48
  return jsonResponse({
46
49
  files: files.map(([filename, fileRecord]) => ({
47
50
  name: filename,
48
51
  type: fileRecord.type,
52
+ modification_time: fileRecord.modification_time,
49
53
  })),
50
54
  });
51
55
  },
@@ -54,7 +58,7 @@ export class OutputViewer {
54
58
  return new Response(null, { status: 204, headers: CORS_HEADERS });
55
59
  }
56
60
  let slug = req.params.slug;
57
- let record = this.filesRecords[slug];
61
+ let record = this.fileRecords[slug];
58
62
  if (!record) {
59
63
  return jsonResponse({ error: "Not Found" }, 404);
60
64
  }
@@ -72,7 +76,7 @@ export class OutputViewer {
72
76
  return jsonResponse({ error: "Not Found" }, 404);
73
77
  },
74
78
  });
75
- console.log(server.url);
79
+ console.log(server.url.toString());
76
80
  return server.url.toString();
77
81
  }
78
82
 
@@ -80,14 +84,14 @@ export class OutputViewer {
80
84
  let frontendURL = "";
81
85
  switch (CONFIG.output_viewing.mode) {
82
86
  case OutputViewingModeEnum.Local:
83
- if (Object.keys(this.filesRecords).length === 0) {
87
+ if (Object.keys(this.fileRecords).length === 0) {
84
88
  console.warn("No files to display (you can probably ignore this warning if your workflows haven't completed yet)");
85
89
  return;
86
90
  }
87
91
 
88
92
  console.log("Click the following links to view the outputs in your browser:");
89
93
 
90
- let files = Object.entries(this.filesRecords).sort((a, b) => a[0].localeCompare(b[0]));
94
+ let files = Object.entries(this.fileRecords).sort((a, b) => a[0].localeCompare(b[0]));
91
95
  for (const [filename, fileRecord] of files) {
92
96
  let params = new URLSearchParams();
93
97
  params.set("name", filename);