npm - even-pf - Versions diffs - 0.4.0 → 0.4.2 - Mend

even-pf 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md +12 -0
package/bun.lock +5 -15
package/epf.example.toml +4 -0
package/package.json +7 -7
package/src/cli.ts +39 -9
package/src/util/args.ts +6 -1
package/src/util/config-schema.ts +4 -2
package/src/util/llm.ts +69 -32
package/src/util/output-viewer.ts +13 -9

package/README.md CHANGED Viewed

@@ -28,11 +28,23 @@ Make sure you have a config file in your home or current directory. Alternativel
 This project was created using `bun init` in bun v1.3.2. [Bun](https://bun.com) is a fast all-in-one JavaScript runtime.
+### Publishing
 After making changes, you might want to bump the version.
 ```bash
 bun run bump <semver>
 ```
+Build the executable:
+```bash
+bun run build:all
+```
+Then publish to npm:
+```bash
+bun run publish:all
+```
+Do not run `bun publish` directly, the executables are distributed as separate packages and need to be published separately.
 ## Specs
 ### File-viewer Frontend
 In consideration of the tool might be running at a remote server, for easily viewing the Markdown files, we will use a simple file-viewer frontend.

package/bun.lock CHANGED Viewed

@@ -14,11 +14,11 @@
         "@types/bun": "latest",
       },
       "optionalDependencies": {
-        "even-pf-darwin-arm64": "0.3.4",
-        "even-pf-darwin-x64": "0.3.4",
-        "even-pf-linux-arm64": "0.3.4",
-        "even-pf-linux-x64": "0.3.4",
-        "even-pf-windows-x64": "0.3.4",
+        "even-pf-darwin-arm64": "0.4.2",
+        "even-pf-darwin-x64": "0.4.2",
+        "even-pf-linux-arm64": "0.4.2",
+        "even-pf-linux-x64": "0.4.2",
+        "even-pf-windows-x64": "0.4.2",
       },
       "peerDependencies": {
         "typescript": "^5.9.3",
@@ -36,16 +36,6 @@
     "chalk": ["chalk@5.6.2", "", {}, "sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA=="],
-    "even-pf-darwin-arm64": ["even-pf-darwin-arm64@0.3.4", "", { "os": "darwin", "cpu": "arm64", "bin": { "even-pf": "bin/even-pf" } }, "sha512-x2vTM0ogvlFhUiHqb13kXJTKPRPU/VdoZa1G51c3IHsZz7wdDpkD/DxcEvxAmO28MbJtfjxig8nRFMvld5J6jg=="],
-    "even-pf-darwin-x64": ["even-pf-darwin-x64@0.3.4", "", { "os": "darwin", "cpu": "x64", "bin": { "even-pf": "bin/even-pf" } }, "sha512-L2hzBvSLFcWMB/MJQeZTQHI8mqpGMQ7T0tSPXjv4S1tFglF8ZtdxggDAhmItEyyqVfsAT6LY+HyOpJnUAga9tg=="],
-    "even-pf-linux-arm64": ["even-pf-linux-arm64@0.3.4", "", { "os": "linux", "cpu": "arm64", "bin": { "even-pf": "bin/even-pf" } }, "sha512-/5nLtKs+8xvTHEkrVPQQ5XQBTKROmF42z6+fo4AOkOj/TbDGwCher6RYYMHQ6pD7M0jjF5AdSlj5HLEGf/N9Qg=="],
-    "even-pf-linux-x64": ["even-pf-linux-x64@0.3.4", "", { "os": "linux", "cpu": "x64", "bin": { "even-pf": "bin/even-pf" } }, "sha512-UN0wz2svjcjckugzFyc4tHxllrTM7IScSmnLDq5z9AB5cplHZrvAg8cYcvz20YEcHsr7aUkxrhA7iDv5KKYhkA=="],
-    "even-pf-windows-x64": ["even-pf-windows-x64@0.3.4", "", { "os": "win32", "cpu": "x64", "bin": { "even-pf": "bin/even-pf.exe" } }, "sha512-ni84uLUdo95TlACDUyz7Ia7+4wigSByvUuR+IrXbLzkN90mZTsJoZVbAoJMR8CnOlPPEClcPHqkTcYl1lbLOwA=="],
     "smol-toml": ["smol-toml@1.6.0", "", {}, "sha512-4zemZi0HvTnYwLfrpk/CF9LOd9Lt87kAt50GnqhMpyF9U3poDAP2+iukq2bZsO/ufegbYehBkqINbsWxj4l4cw=="],
     "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],

package/epf.example.toml CHANGED Viewed

@@ -12,6 +12,8 @@ top_p = 1
 frequency_penalty = 0
 presence_penalty = 0
 reasoning_effort = "high"
+max_retries = 1
+retry_delay_ms = 1000
 [llm.models.output_comparison]
 sdk = "openrouter"
@@ -22,6 +24,8 @@ top_p = 1
 frequency_penalty = 0
 presence_penalty = 0
 reasoning_effort = "high"
+max_retries = 1
+retry_delay_ms = 1000
 [llm.prompt_replacement]
 role = "role_placeholder"

package/package.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
   "name": "even-pf",
-  "version": "0.4.0",
+  "version": "0.4.2",
   "description": "AI-assisted responsible grading tool for programming assignments",
   "module": "src/cli.ts",
   "type": "module",
   "license": "UNLICENSED",
   "scripts": {
     "start": "bun run src/cli.ts",
-    "build-dev": "bun build src/cli.ts --compile --outfile build/epf",
+    "build:dev": "bun build src/cli.ts --compile --outfile build/epf",
     "build:all": "bun scripts/build-all.ts",
     "publish:all": "bun scripts/publish-all.ts",
     "publish:dry": "bun scripts/publish-all.ts --dry-run",
@@ -31,11 +31,11 @@
     "zod-defaults": "^0.2.3"
   },
   "optionalDependencies": {
-    "even-pf-linux-x64": "0.4.0",
-    "even-pf-linux-arm64": "0.4.0",
-    "even-pf-windows-x64": "0.4.0",
-    "even-pf-darwin-x64": "0.4.0",
-    "even-pf-darwin-arm64": "0.4.0"
+    "even-pf-linux-x64": "0.4.2",
+    "even-pf-linux-arm64": "0.4.2",
+    "even-pf-windows-x64": "0.4.2",
+    "even-pf-darwin-x64": "0.4.2",
+    "even-pf-darwin-arm64": "0.4.2"
   },
   "files": [
     "bin/even-pf.js",

package/src/cli.ts CHANGED Viewed

@@ -4,6 +4,7 @@ import "./version.ts";
 import {OpenRouter} from "@openrouter/sdk";
+import {ARGS} from "./util/args.ts";
 import {CONFIG} from "./util/config.ts";
 import {executeTestingWorkflow} from "./workflow/testing-workflow.ts";
 import {executeAnalysisWorkflow} from "./workflow/analysis-workflow.ts";
@@ -20,36 +21,65 @@ const workflowDependencies: WorkflowDependencies = {
 }
 // Parallelize workflows with Promise.allSettled
-const analysisWorkflows = CONFIG.analysis_workflows;
-const testingWorkflows = CONFIG.testing_workflows;
-console.log(`Starting execution of ${analysisWorkflows.length} workflows...`);
-console.log(analysisWorkflows.map((w) => w.slug));
-let workflowRuns: Promise<void>[] = [];
+const onlySlugs: string[] | undefined = ARGS.values.only_workflows;
+const skipSlugs: string[] | undefined = ARGS.values.skip_workflow;
+function applyWorkflowFilters<T extends { slug: string }>(workflows: T[]): T[] {
+    let filtered = workflows;
+    if (onlySlugs && onlySlugs.length > 0) {
+        filtered = filtered.filter((w) => {
+            if (onlySlugs.includes(w.slug)) {
+                return true;
+            }
+            console.log(`Skipping workflow '${w.slug}' (not in --only_workflows list)`);
+            return false;
+        });
+    }
+    if (skipSlugs && skipSlugs.length > 0) {
+        filtered = filtered.filter((w) => {
+            if (skipSlugs.includes(w.slug)) {
+                console.log(`Skipping workflow '${w.slug}' (matched --skip_workflow)`);
+                return false;
+            }
+            return true;
+        });
+    }
+    return filtered;
+}
+const analysisWorkflows = applyWorkflowFilters(CONFIG.analysis_workflows);
+const testingWorkflows = applyWorkflowFilters(CONFIG.testing_workflows);
+console.log(`Starting execution of ${analysisWorkflows.length} analysis + ${testingWorkflows.length} testing workflows...`);
+console.log([...analysisWorkflows, ...testingWorkflows].map((w) => w.slug));
+const workflowRuns: Promise<void>[] = [];
+const workflowRunSlugs: string[] = [];
 analysisWorkflows.forEach((workflow) => {
     for (let i = 0; i < workflow.runs; i++) {
         workflowRuns.push(executeAnalysisWorkflow(workflow, i+1, workflowDependencies));
+        workflowRunSlugs.push(workflow.slug);
     }
 });
 testingWorkflows.forEach((workflow) => {
     for (let i = 0; i < workflow.runs; i++) {
         workflowRuns.push(executeTestingWorkflow(workflow, i+1, workflowDependencies));
+        workflowRunSlugs.push(workflow.slug);
     }
 });
-workflowDependencies.outputViewer.display();  // For start the server early.
+workflowDependencies.outputViewer.display();  // Start the server early.
 const workflowsResults = await Promise.allSettled(workflowRuns);
 // Summarize with indices to include slugs in failure logs
 const failedIndices: number[] = [];
 const succeededIndices: number[] = [];
 workflowsResults.forEach((r, i) => {
-    if (r.status === "rejected") failedIndices.push(i);
-    else succeededIndices.push(i);
+    if (r.status === "rejected") { failedIndices.push(i); }
+    else { succeededIndices.push(i); }
 });
 console.log(`Workflows completed. Succeeded: ${succeededIndices.length}; Failed: ${failedIndices.length}`);
 if (failedIndices.length > 0) {
     failedIndices.forEach((i) => {
         const r = workflowsResults[i] as PromiseRejectedResult;
-        const slug = analysisWorkflows[i]?.slug ?? `#${i + 1}`;
+        const slug = workflowRunSlugs[i] ?? `#${i + 1}`;
         console.warn(`Workflow '${slug}' failed:`, r.reason);
     });
 }

package/src/util/args.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import {parseArgs} from "util";
+import { parseArgs } from "util";
 // console.log(Bun.argv);
 export const ARGS = parseArgs({
@@ -23,6 +23,11 @@ export const ARGS = parseArgs({
             short: "S",
             multiple: true,
         },
+        only_workflows: {
+            type: "string",
+            short: "O",
+            multiple: true,
+        },
         completion_inputs_destination: {
             type: "string",
         },

package/src/util/config-schema.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import {z} from "zod";
+import { z } from "zod";
 export enum OutputViewingModeEnum {
     Local = "local",
@@ -20,6 +20,8 @@ export const ModelConfigSchema = z.object({
     frequency_penalty: z.number().min(-2).max(2).default(0),
     presence_penalty: z.number().min(-2).max(2).default(0),
     reasoning_effort: z.enum(["low", "medium", "high"]).default("high"),
+    max_retries: z.number().min(0).default(1),  // 0 for no retry
+    retry_delay_ms: z.number().min(0).default(1000),
 });
 export const LLMConfigSchema = z.object({
@@ -45,7 +47,7 @@ export const AnalysisWorkflowEntrySchema = BaseWorkflowEntrySchema.extend({
     prompt: z.string(),
 })
-export enum LLMJudgeInputModeEnum{
+export enum LLMJudgeInputModeEnum {
     None = "NONE",
     Diff = "DIFF",
     Full = "FULL",

package/src/util/llm.ts CHANGED Viewed

@@ -5,6 +5,10 @@ import type {WorkflowDependencies} from "../workflow";
 import {recordCompletionInput} from "./eval-harness.ts";
+async function delay(ms: number): Promise<void> {
+    return new Promise(resolve => setTimeout(resolve, ms));
+}
 export async function generateCompletion(deps: WorkflowDependencies,
                                          log: (..._: any[])=>void,
                                          warn: (..._: any[])=>void,
@@ -15,31 +19,30 @@ export async function generateCompletion(deps: WorkflowDependencies,
     if (!modelSettings) {
         throw new Error(`No model settings found for model "${model}"`);
     }
     let replacedCount = 0;
     for (const [replacementKey, replacementValue] of Object.entries(CONFIG.llm.prompt_replacement)) {
-        if (systemPrompt.includes(replacementKey)) {replacedCount++}
+        if (systemPrompt.includes(replacementKey)) {replacedCount++;}
         systemPrompt = systemPrompt.replaceAll(`{{${replacementKey}}}`, replacementValue);
         if (typeof content === "string") {
-            if (content.includes(replacementKey)) {replacedCount++}
+            if (content.includes(replacementKey)) {replacedCount++;}
             content = content.replaceAll(`{{${replacementKey}}}`, replacementValue);
         }
         else {
             for (let i = 0; i < content.length; i++) {
                 const element = content[i];
                 if (element && "type" in element && element.type === "text" && typeof element.text === "string") {
-                    if (element.text.includes(replacementKey)) {replacedCount++}
+                    if (element.text.includes(replacementKey)) {replacedCount++;}
                     content[i] = {
                         ...element,
                         text: element.text.replaceAll(`{{${replacementKey}}}`, replacementValue),
-                    }
+                    };
                 }
             }
         }
     }
     log(`Replaced ${replacedCount} instances of prompt variables in system prompt and content`);
     let messages: (SystemMessage | UserMessage)[] = [
         {
             role: "system",
@@ -51,30 +54,64 @@ export async function generateCompletion(deps: WorkflowDependencies,
         }
     ];
     setTimeout(async ()=> await recordCompletionInput(messages), 5);
-    log("Sending chat completion request...");
-    let startTime = Date.now();
-    let completion = await deps.openRouter.chat.send({
-        model: modelSettings.model_name,
-        maxCompletionTokens: modelSettings.max_completion_tokens,
-        messages: messages,
-        stream: false,
-        seed: deps.seed,
-        frequencyPenalty: modelSettings.frequency_penalty,
-        presencePenalty: modelSettings.presence_penalty,
-        temperature: modelSettings.temperature,
-        reasoning: {
-            effort: modelSettings.reasoning_effort,
-        },
-    });
-    log(`Completion response generated in ${(Date.now() - startTime) / 1000} seconds`);
-    if (completion.choices.length < 1){
-        warn("No choices returned from completion");
-        console.log(completion);
+    const maxRetries = modelSettings.max_retries;
+    const retryDelayMs = modelSettings.retry_delay_ms;
+    const totalAttempts = maxRetries + 1;
+    let lastError: unknown = null;
+    for (let attempt = 0; attempt < totalAttempts; attempt++) {
+        const attemptLabel = `${attempt + 1}/${totalAttempts}`;
+        if (attempt > 0) {
+            const backoffMs = retryDelayMs * (2 ** (attempt - 1)) + Math.random() * 200;
+            warn(`Retrying after ${Math.round(backoffMs)}ms (attempt ${attemptLabel})...`);
+            await delay(backoffMs);
+        }
+        log(`Sending chat completion request (attempt ${attemptLabel})...`);
+        let startTime = Date.now();
+        try {
+            let completion = await deps.openRouter.chat.send({
+                model: modelSettings.model_name,
+                maxCompletionTokens: modelSettings.max_completion_tokens,
+                messages: messages,
+                stream: false,
+                seed: deps.seed,
+                frequencyPenalty: modelSettings.frequency_penalty,
+                presencePenalty: modelSettings.presence_penalty,
+                temperature: modelSettings.temperature,
+                reasoning: {
+                    effort: modelSettings.reasoning_effort,
+                },
+            });
+            log(`Completion response received in ${(Date.now() - startTime) / 1000}s (attempt ${attemptLabel})`);
+            const text = completion.choices[0]?.message.content?.toString() ?? "";
+            if (completion.choices.length < 1 || text.length === 0) {
+                warn(`Empty completion on attempt ${attemptLabel}`);
+                console.log(completion);
+                // Retry if attempts remain; otherwise return empty
+                if (attempt < maxRetries) {
+                    continue;
+                }
+                warn("Exhausted all retries — returning empty completion");
+                return {text: "", model: completion.model};
+            }
+            return {text, model: completion.model};
+        } catch (error) {
+            const message = error instanceof Error ? error.message : String(error);
+            warn(`Chat completion error on attempt ${attemptLabel}: ${message}`);
+            lastError = error;
+            // Loop continues to next attempt (or exits if this was the last)
+        }
     }
-    return {
-        text: completion.choices[0]?.message.content?.toString() ?? "",
-        model: completion.model,
-    };
+    warn("Exhausted all retries due to errors — re-throwing last error");
+    throw lastError;
 }

package/src/util/output-viewer.ts CHANGED Viewed

@@ -6,6 +6,7 @@ import {OutputViewingModeEnum} from "./config-schema.ts";
 type FileRecord = {
     type: "markdown" | "text";
     content: string;
+    modification_time: Date
 }
 const CORS_HEADERS = {
@@ -25,16 +26,17 @@ function jsonResponse(data: unknown, status = 200): Response {
 }
 export class OutputViewer {
-    filesRecords: Record<string, FileRecord> = {};
+    fileRecords: Record<string, FileRecord> = {};
     displayed: boolean = false;
-    addFile(filename: string, _: FileRecord): void {
-        this.filesRecords[filename] = _;
+    addFile(filename: string, fileRecord: Omit<FileRecord, "modification_time">): void {
+        this.fileRecords[filename] = {
+            ...fileRecord,
+            modification_time: new Date(),
+        };
     }
     serve(): string {
-        let files = Object.entries(this.filesRecords).sort((a, b) => a[0].localeCompare(b[0]));
         let server = Bun.serve({
             port: CONFIG.output_viewing.api_port,
             routes: {
@@ -42,10 +44,12 @@ export class OutputViewer {
                     if (req.method === "OPTIONS") {
                         return new Response(null, { status: 204, headers: CORS_HEADERS });
                     }
+                    let files = Object.entries(this.fileRecords).sort((a, b) => a[0].localeCompare(b[0]));
                     return jsonResponse({
                         files: files.map(([filename, fileRecord]) => ({
                             name: filename,
                             type: fileRecord.type,
+                            modification_time: fileRecord.modification_time,
                         })),
                     });
                 },
@@ -54,7 +58,7 @@ export class OutputViewer {
                         return new Response(null, { status: 204, headers: CORS_HEADERS });
                     }
                     let slug = req.params.slug;
-                    let record = this.filesRecords[slug];
+                    let record = this.fileRecords[slug];
                     if (!record) {
                         return jsonResponse({ error: "Not Found" }, 404);
                     }
@@ -72,7 +76,7 @@ export class OutputViewer {
                 return jsonResponse({ error: "Not Found" }, 404);
             },
         });
-        console.log(server.url);
+        console.log(server.url.toString());
         return server.url.toString();
     }
@@ -80,14 +84,14 @@ export class OutputViewer {
         let frontendURL = "";
         switch (CONFIG.output_viewing.mode) {
             case OutputViewingModeEnum.Local:
-                if (Object.keys(this.filesRecords).length === 0) {
+                if (Object.keys(this.fileRecords).length === 0) {
                     console.warn("No files to display (you can probably ignore this warning if your workflows haven't completed yet)");
                     return;
                 }
                 console.log("Click the following links to view the outputs in your browser:");
-                let files = Object.entries(this.filesRecords).sort((a, b) => a[0].localeCompare(b[0]));
+                let files = Object.entries(this.fileRecords).sort((a, b) => a[0].localeCompare(b[0]));
                 for (const [filename, fileRecord] of files) {
                     let params = new URLSearchParams();
                     params.set("name", filename);