npm - even-pf - Versions diffs - 0.1.0 - Mend

even-pf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/.gitignore +4 -0
package/README.md +20 -0
package/bun.lock +46 -0
package/bunfig.toml +2 -0
package/epf.example.toml +42 -0
package/package.json +37 -0
package/src/generate-config.ts +30 -0
package/src/index.ts +54 -0
package/src/util/config-schema.ts +69 -0
package/src/util/config.ts +64 -0
package/src/util/file-payload.ts +87 -0
package/src/workflow/analysis-workflow.ts +88 -0
package/src/workflow/index.ts +5 -0
package/src/workflow/testing-workflow.ts +152 -0
package/tsconfig.json +29 -0

package/.gitignore ADDED Viewed

@@ -0,0 +1,4 @@
+/node_modules
+/build
+/outputs

package/README.md ADDED Viewed

@@ -0,0 +1,20 @@
+# Project Even-Playfield (E-PF)
+AI-assisted responsible grading tool for programming assignments. A GPT-wrapper in CLI for CS graders.
+To install dependencies:
+```bash
+bun install
+```
+To install as a tool globally:
+```bash
+bun link
+```
+To run:
+```bash
+bunx e-pf
+```
+Make sure you have a config file in your home or current directory. Alternatively, you can set environment variable `EPF_CONFIG_URL`.
+This project was created using `bun init` in bun v1.3.2. [Bun](https://bun.com) is a fast all-in-one JavaScript runtime.

package/bun.lock ADDED Viewed

@@ -0,0 +1,46 @@
+{
+  "lockfileVersion": 1,
+  "configVersion": 1,
+  "workspaces": {
+    "": {
+      "name": "tools",
+      "dependencies": {
+        "@openrouter/sdk": "^0.5.1",
+        "chalk": "^5.6.2",
+        "smol-toml": "^1.5.2",
+        "zod-defaults": "^0.2.3",
+      },
+      "devDependencies": {
+        "@types/bun": "latest",
+      },
+      "peerDependencies": {
+        "typescript": "^5",
+      },
+    },
+  },
+  "packages": {
+    "@openrouter/sdk": ["@openrouter/sdk@0.5.1", "", { "dependencies": { "zod": "^3.25.0 || ^4.0.0" } }, "sha512-Kl0N1jIj7A3lnkM5dO3SGP8JP3jAozzs6JWcHVuZUBt5DsGKxFGNH1Y15bCfsJiLNA2ylAQpCN3aNcgEYkkL5Q=="],
+    "@types/bun": ["@types/bun@1.3.2", "", { "dependencies": { "bun-types": "1.3.2" } }, "sha512-t15P7k5UIgHKkxwnMNkJbWlh/617rkDGEdSsDbu+qNHTaz9SKf7aC8fiIlUdD5RPpH6GEkP0cK7WlvmrEBRtWg=="],
+    "@types/node": ["@types/node@24.10.1", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ=="],
+    "@types/react": ["@types/react@19.2.6", "", { "dependencies": { "csstype": "^3.2.2" } }, "sha512-p/jUvulfgU7oKtj6Xpk8cA2Y1xKTtICGpJYeJXz2YVO2UcvjQgeRMLDGfDeqeRW2Ta+0QNFwcc8X3GH8SxZz6w=="],
+    "bun-types": ["bun-types@1.3.2", "", { "dependencies": { "@types/node": "*" }, "peerDependencies": { "@types/react": "^19" } }, "sha512-i/Gln4tbzKNuxP70OWhJRZz1MRfvqExowP7U6JKoI8cntFrtxg7RJK3jvz7wQW54UuvNC8tbKHHri5fy74FVqg=="],
+    "chalk": ["chalk@5.6.2", "", {}, "sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA=="],
+    "csstype": ["csstype@3.2.3", "", {}, "sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ=="],
+    "smol-toml": ["smol-toml@1.5.2", "", {}, "sha512-QlaZEqcAH3/RtNyet1IPIYPsEWAaYyXXv1Krsi+1L/QHppjX4Ifm8MQsBISz9vE8cHicIq3clogsheili5vhaQ=="],
+    "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],
+    "undici-types": ["undici-types@7.16.0", "", {}, "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw=="],
+    "zod": ["zod@4.1.12", "", {}, "sha512-JInaHOamG8pt5+Ey8kGmdcAcg3OL9reK8ltczgHTAwNhMys/6ThXHityHxVV2p3fkw/c+MAvBHFVYHFZDmjMCQ=="],
+    "zod-defaults": ["zod-defaults@0.2.3", "", { "peerDependencies": { "zod": "^4.1.12" } }, "sha512-7pYkOH1/c+Ril5AZUYtsbhMkehkI8CMqzFZ7YZXfC9SMLRvZuLyonQE7BAIVSNxeTpqTZmW5BLxGSzWMnKNdIw=="],
+  }
+}

package/bunfig.toml ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console]
2	+ depth = 6

package/epf.example.toml ADDED Viewed

@@ -0,0 +1,42 @@
+[openrouter]
+api_key = ""
+model = ""
+[hyperparameters]
+max_completion_tokens = 20000
+temperature = 0.9
+top_p = 1
+frequency_penalty = 0
+presence_penalty = 0
+reasoning_effort = "high"
+[[analysis_workflows]]
+slug = ""
+runs = 1
+output_filename = ""
+prompt = ""
+[[analysis_workflows.input_files_searches]]
+file_glob = ""
+search_directory = "."
+excluded_files = []
+[[testing_workflows]]
+slug = ""
+runs = 1
+output_filename = ""
+setup_commands = []
+cleanup_commands = []
+[[testing_workflows.test_cases]]
+name = ""
+work_directory = "."
+single_run_command = ""
+interactive_steps = []
+[testing_workflows.test_cases.single_run_expected_output]
+prefix_strip_string = ""
+postfix_strip_string = ""
+substring = ""
+llm_judge_input_mode = "NONE"
+llm_judge_prompt = ""

package/package.json ADDED Viewed

@@ -0,0 +1,37 @@
+{
+  "name": "even-pf",
+  "version": "0.1.0",
+  "module": "src/index.ts",
+  "type": "module",
+  "license": "UNLICENSED",
+  "scripts": {
+    "start": "bun run src/index.ts",
+    "build": "bun build src/index.ts --compile --outfile build/epf",
+    "config-gen": "bun run --console-depth 6 src/generate-config.ts"
+  },
+  "bin": {
+    "e-pf": "src/index.ts"
+  },
+  "devDependencies": {
+    "@types/bun": "latest"
+  },
+  "peerDependencies": {
+    "typescript": "^5"
+  },
+  "dependencies": {
+    "@openrouter/sdk": "^0.5.1",
+    "chalk": "^5.6.2",
+    "smol-toml": "^1.5.2",
+    "zod-defaults": "^0.2.3"
+  },
+  "files": [
+    ".gitignore",
+    "src",
+    "bun.lock",
+    "bunfig.toml",
+    "epf.example.toml",
+    "package.json",
+    "README.md",
+    "tsconfig.json"
+  ]
+}

package/src/generate-config.ts ADDED Viewed

@@ -0,0 +1,30 @@
+import {stringify} from "smol-toml";
+import {getDefaultsForSchema} from "zod-defaults";
+import {
+    ConfigSchema,
+    AnalysisWorkflowEntrySchema,
+    FileSearchEntrySchema,
+    TestingWorkflowEntrySchema, TestCaseSchema
+} from "./util/config-schema.ts";
+console.log("generate-config.ts");
+let defaultConfig = getDefaultsForSchema(ConfigSchema);
+let defaultAnalysisWorkflowEntry = getDefaultsForSchema(AnalysisWorkflowEntrySchema);
+let defaultFileSearchEntry = getDefaultsForSchema(FileSearchEntrySchema);
+defaultAnalysisWorkflowEntry.input_files_searches = [defaultFileSearchEntry];
+defaultConfig.analysis_workflows = [defaultAnalysisWorkflowEntry];
+let defaultTestingWorkflowEntry = getDefaultsForSchema(TestingWorkflowEntrySchema);
+defaultTestingWorkflowEntry.test_cases = [getDefaultsForSchema(TestCaseSchema)];
+defaultConfig.testing_workflows = [defaultTestingWorkflowEntry];
+console.log(defaultConfig);
+const tomlString = stringify(defaultConfig);
+const outputFilename = "epf.example.toml";
+await Bun.file(outputFilename).write(tomlString);
+console.log(`Default config written to ${outputFilename}`);
+console.log("generate-config.ts done");

package/src/index.ts ADDED Viewed

@@ -0,0 +1,54 @@
+#!/usr/bin/env bun
+import {OpenRouter} from "@openrouter/sdk";
+import {CONFIG} from "./util/config.ts";
+import {executeTestingWorkflow} from "./workflow/testing-workflow.ts";
+import {executeAnalysisWorkflow} from "./workflow/analysis-workflow.ts";
+import type {WorkflowDependencies} from "./workflow";
+console.log("EPF index.ts");
+const workflowDependencies: WorkflowDependencies = {
+    openRouter: new OpenRouter({
+        apiKey: CONFIG.openrouter.api_key,
+    })
+}
+// Parallelize workflows with Promise.allSettled
+const analysisWorkflows = CONFIG.analysis_workflows;
+const testingWorkflows = CONFIG.testing_workflows;
+console.log(`Starting execution of ${analysisWorkflows.length} workflows...`);
+console.log(analysisWorkflows.map((w) => w.slug));
+let workflowRuns: Promise<void>[] = [];
+analysisWorkflows.forEach((workflow) => {
+    for (let i = 0; i < workflow.runs; i++) {
+        workflowRuns.push(executeAnalysisWorkflow(workflow, i+1, workflowDependencies));
+    }
+});
+testingWorkflows.forEach((workflow) => {
+    for (let i = 0; i < workflow.runs; i++) {
+        workflowRuns.push(executeTestingWorkflow(workflow, i+1, workflowDependencies));
+    }
+});
+const workflowsResults = await Promise.allSettled(workflowRuns);
+// Summarize with indices to include slugs in failure logs
+const failedIndices: number[] = [];
+const succeededIndices: number[] = [];
+workflowsResults.forEach((r, i) => {
+    if (r.status === "rejected") failedIndices.push(i);
+    else succeededIndices.push(i);
+});
+console.log(`Workflows completed. Succeeded: ${succeededIndices.length}; Failed: ${failedIndices.length}`);
+if (failedIndices.length > 0) {
+    failedIndices.forEach((i) => {
+        const r = workflowsResults[i] as PromiseRejectedResult;
+        const slug = analysisWorkflows[i]?.slug ?? `#${i + 1}`;
+        console.warn(`Workflow '${slug}' failed:`, r.reason);
+    });
+}
+console.log("index.ts done");

package/src/util/config-schema.ts ADDED Viewed

@@ -0,0 +1,69 @@
+import {z} from "zod";
+export const FileSearchEntrySchema = z.object({
+    file_glob: z.string().min(1),
+    search_directory: z.string().default("."),
+    excluded_files: z.array(z.string()).default([]),
+});
+export const BaseWorkflowEntrySchema = z.object({
+    slug: z.string(),
+    runs: z.number().min(1).default(1),
+    input_files_searches: z.array(FileSearchEntrySchema).default([]),
+    output_filename: z.string().min(1),
+});
+export const AnalysisWorkflowEntrySchema = BaseWorkflowEntrySchema.extend({
+    prompt: z.string(),
+})
+export enum LLMJudgeInputModeEnum{
+    None = "NONE",
+    Diff = "DIFF",
+    Full = "FULL",
+}
+const LLMJudgeInputModeSchema = z.enum(LLMJudgeInputModeEnum);
+const ExpectedOutputSchema = z.object({
+    prefix_strip_string: z.string().min(0),
+    postfix_strip_string: z.string().min(0),
+    substring: z.string().min(0),
+    llm_judge_input_mode: LLMJudgeInputModeSchema.default(LLMJudgeInputModeEnum.None),
+    llm_judge_prompt: z.string().min(0),
+});
+export const TestCaseSchema = z.object({
+    name: z.string(),
+    work_directory: z.string().default("."),
+    single_run_command: z.string(),
+    single_run_expected_output: ExpectedOutputSchema,
+    interactive_steps: z.array(z.object({
+        input: z.string(),
+        expected_output: ExpectedOutputSchema,
+    })),
+});
+export const TestingWorkflowEntrySchema = BaseWorkflowEntrySchema.extend({
+    setup_commands: z.array(z.string()).default([]),
+    test_cases: z.array(TestCaseSchema).default([]),
+    cleanup_commands: z.array(z.string()).default([]),
+}).omit({
+    input_files_searches: true,
+});
+export const ConfigSchema = z.object({
+    openrouter: z.object({
+        api_key: z.string(),
+        model: z.string(),
+    }),
+    hyperparameters: z.object({
+        max_completion_tokens: z.number().min(1).default(20000),
+        temperature: z.number().min(0).max(1).default(0.9),
+        top_p: z.number().min(0).max(1).default(1),
+        frequency_penalty: z.number().min(-2).max(2).default(0),
+        presence_penalty: z.number().min(-2).max(2).default(0),
+        reasoning_effort: z.enum(["low", "medium", "high"]).default("high"),
+    }),
+    analysis_workflows: z.array(AnalysisWorkflowEntrySchema),
+    testing_workflows: z.array(TestingWorkflowEntrySchema),
+});

package/src/util/config.ts ADDED Viewed

@@ -0,0 +1,64 @@
+import os from "node:os";
+import { readFileSync, existsSync } from "node:fs";
+import {z} from "zod";
+import {ConfigSchema} from "./config-schema.ts";
+const homeDir: string = os.homedir();
+const defaultConfigFileName = "epf.toml";
+const configURLEnvVar = "EPF_CONFIG_URL";
+type Config = z.infer<typeof ConfigSchema>;
+async function readConfig() {
+    console.log(`Loading config`);
+    let configFilePath: string;
+    if (process.argv.length >= 3) {
+        configFilePath = process.argv[2]!;
+        console.log(`Found config from command line argument: ${configFilePath}`);
+    }
+    else if (process.env[configURLEnvVar]) {
+        configFilePath = process.env[configURLEnvVar]!;
+        console.log(`Found config from environment variable ${configURLEnvVar}`);
+    }
+    else {
+        if (existsSync(defaultConfigFileName)) {
+            configFilePath = defaultConfigFileName;
+            console.log(`Found config from current directory`);
+        }
+        else if (existsSync(`${homeDir}/${defaultConfigFileName}`)) {
+            configFilePath = `${homeDir}/${defaultConfigFileName}`;
+            console.log(`Found config from home directory`);
+        }
+        else {
+            throw new Error(`Config file ${defaultConfigFileName} not found`);
+        }
+    }
+    let configFileContents: string;
+    if (/^https?:\/\//.test(configFilePath)) {
+        console.log(`Fetching config from URL: ${configFilePath}`);
+        const configResponse = await fetch(configFilePath);
+        if (!configResponse.ok) {
+            throw new Error(`Failed to fetch config from URL: ${configFilePath}, status: ${configResponse.status}`);
+        }
+        configFileContents = await configResponse.text();
+    } else {
+        console.log(`Loading config from file: ${configFilePath}`);
+        configFileContents = readFileSync(configFilePath).toString();
+    }
+    let obj =  Bun.TOML.parse(configFileContents);
+    const parsedConfig = ConfigSchema.safeParse(obj);
+    if (!parsedConfig.success) {
+        console.error("Config file is invalid:", parsedConfig.error.format());
+        throw new Error("Config file is invalid");
+    }
+    console.log(`Config loaded from ${configFilePath}`);
+    return parsedConfig.data as Config;
+}
+export const CONFIG = await readConfig();

package/src/util/file-payload.ts ADDED Viewed

@@ -0,0 +1,87 @@
+/**
+ * Helper class for generating file content payloads with language-specific formatting
+ */
+export class FilePayloadGenerator {
+    private static readonly LANGUAGE_MAP: Record<string, string> = {
+        '.cs': 'csharp',
+        '.cpp': 'cpp',
+        '.cc': 'cpp',
+        '.cxx': 'cpp',
+        '.c': 'c',
+        '.h': 'cpp',
+        '.hpp': 'cpp',
+        '.hxx': 'cpp',
+        '.java': 'java',
+        '.js': 'javascript',
+        '.ts': 'typescript',
+        '.py': 'python',
+        '.rb': 'ruby',
+        '.go': 'go',
+        '.rs': 'rust',
+        '.swift': 'swift',
+        '.kt': 'kotlin',
+        '.php': 'php',
+        '.scala': 'scala',
+        '.sh': 'bash',
+        '.bash': 'bash',
+        '.zsh': 'zsh',
+        '.ps1': 'powershell',
+        '.md': 'markdown',
+        '.json': 'json',
+        '.xml': 'xml',
+        '.yaml': 'yaml',
+        '.yml': 'yaml',
+        '.toml': 'toml',
+        '.html': 'html',
+        '.css': 'css',
+        '.scss': 'scss',
+        '.sql': 'sql',
+    };
+    /**
+     * Get the language identifier for a file based on its extension
+     * @param filePath The path to the file
+     * @returns The language identifier (e.g., 'csharp', 'cpp')
+     */
+    private static getLanguageFromPath(filePath: string): string {
+        const ext = filePath.substring(filePath.lastIndexOf('.')).toLowerCase();
+        return this.LANGUAGE_MAP[ext] || 'text';
+    }
+    /**
+     * Generate a formatted payload for a single file
+     * @param filePath The path to the file
+     * @param content The content of the file
+     * @returns A formatted string with file path and content in a code block
+     */
+    static formatFileContent(filePath: string, content: string): string {
+        const language = this.getLanguageFromPath(filePath);
+        return `${filePath}\n\`\`\`${language}\n${content}\n\`\`\``;
+    }
+    /**
+     * Generate payloads for multiple files
+     * @param files Array of file paths
+     * @returns Array of formatted file content strings
+     */
+    static async generatePayloads(files: string[]): Promise<string[]> {
+        const payloads: string[] = [];
+        for (const file of files) {
+            const content = await Bun.file(file).text();
+            payloads.push(this.formatFileContent(file, content));
+        }
+        return payloads;
+    }
+    /**
+     * Add a custom language mapping
+     * @param extension The file extension (including the dot, e.g., '.custom')
+     * @param language The language identifier to use
+     */
+    static addLanguageMapping(extension: string, language: string): void {
+        this.LANGUAGE_MAP[extension.toLowerCase()] = language;
+    }
+}

package/src/workflow/analysis-workflow.ts ADDED Viewed

@@ -0,0 +1,88 @@
+import {Glob} from "bun";
+import {CONFIG} from "../util/config.ts";
+import {FilePayloadGenerator} from "../util/file-payload.ts";
+import type {WorkflowDependencies} from "./index.ts";
+import chalk from "chalk";
+export async function executeAnalysisWorkflow(workflow: typeof CONFIG.analysis_workflows[number], runNum: number, deps: WorkflowDependencies) {
+    console.log(`Executing analysis workflow: ${workflow.slug}`);
+    const log = (...args: Parameters<typeof console.log>) => {
+        console.log(chalk.cyan(`[${workflow.slug}]`), ...args);
+    }
+    const warn = (...args: Parameters<typeof console.warn>) => {
+        console.warn(chalk.red(`[${workflow.slug}]`), ...args);
+    }
+    let allFiles = (
+        await Promise.all(
+            workflow.input_files_searches.map(async (fileSearch) => {
+                const fileExclusionsSet = new Set(fileSearch.excluded_files);
+                const glob = new Glob(fileSearch.file_glob);
+                const matches: string[] = [];
+                for await (const file of glob.scan(fileSearch.search_directory)) {
+                    if (fileExclusionsSet.has(file)) {
+                        log(`Excluding file: ${file}`);
+                        continue;
+                    }
+                    matches.push(file);
+                }
+                log(`Found ${matches.length} files for search: ${fileSearch.file_glob} in ${fileSearch.search_directory}`, matches);
+                return matches;
+            })
+        )
+    ).flat();
+    if (allFiles.length === 0) {
+        warn(`No files found for workflow, skipping...`);
+        return;
+    }
+    log(`Found ${allFiles.length} files for workflow`);
+    const fileContentsPayload = await FilePayloadGenerator.generatePayloads(allFiles);
+    log("Sending chat completion request...");
+    let startTime = Date.now();
+    const seed = Math.floor(Date.now() / 1000);
+    let completion = await deps.openRouter.chat.send({
+        model: CONFIG.openrouter.model,
+        maxCompletionTokens: CONFIG.hyperparameters.max_completion_tokens,
+        messages: [
+            {
+                role: "system",
+                content: workflow.prompt,
+            },
+            {
+                role: "user",
+                content: fileContentsPayload.map((file) => {
+                    return {
+                        type: "text",
+                        text: file,
+                    }
+                }),
+            }
+        ],
+        stream: false,
+        seed: seed,
+        frequencyPenalty: CONFIG.hyperparameters.frequency_penalty,
+        presencePenalty: CONFIG.hyperparameters.presence_penalty,
+        temperature: CONFIG.hyperparameters.temperature,
+        reasoning: {
+            effort: CONFIG.hyperparameters.reasoning_effort,
+        },
+    });
+    log(`Completion response generated in ${(Date.now() - startTime) / 1000} seconds`);
+    if (completion.choices.length < 1){
+        warn("No choices returned from completion");
+        console.log(completion);
+    }
+    const completionText = completion.choices[0]?.message.content?.toString() ?? "";
+    // TODO: Add more template variables
+    const outputFileName = workflow.output_filename
+        .replaceAll("[seed]", seed.toString())
+        .replaceAll("[slug]", workflow.slug)
+        .replaceAll("[model]", `(${completion.model.replaceAll("/", "--")})`)
+        .replaceAll("[run]", runNum.toString());
+    await Bun.write(outputFileName, completionText);
+    log(`Completion written to ${outputFileName}`);
+}

package/src/workflow/index.ts ADDED Viewed

@@ -0,0 +1,5 @@
+import type {OpenRouter} from "@openrouter/sdk";
+export type WorkflowDependencies = {
+    openRouter: OpenRouter,
+}

package/src/workflow/testing-workflow.ts ADDED Viewed

@@ -0,0 +1,152 @@
+import {$} from "bun";
+import {CONFIG} from "../util/config.ts";
+import chalk from "chalk";
+import {LLMJudgeInputModeEnum} from "../util/config-schema.ts";
+import type {WorkflowDependencies} from "./index.ts";
+export async function executeTestingWorkflow(workflow: typeof CONFIG.testing_workflows[number], runNum: number, deps: WorkflowDependencies) {
+    console.log(`Executing testing workflow: ${workflow.slug}`);
+    const log = (...args: Parameters<typeof console.log>) => {
+        console.log(chalk.cyan(`[${workflow.slug}]`), ...args);
+    }
+    const warn = (...args: Parameters<typeof console.warn>) => {
+        console.warn(chalk.red(`[${workflow.slug}]`), ...args);
+    }
+    const debug = (...args: Parameters<typeof console.debug>) => {
+        console.debug(chalk.gray(`[${workflow.slug}]`), ...args.map(e => typeof e === "string" ? chalk.gray(e) : e));
+    }
+    log(`$PATH: ${process.env.PATH}`);
+    for (const command of workflow.setup_commands){
+        log(`Running setup command: ${command}`);
+        await $`${{raw: command}}`.nothrow();
+    }
+    let testCasesResults: boolean[] = new Array(workflow.test_cases.length);
+    let testCasesResultsExplanation: string[] = new Array(workflow.test_cases.length);
+    for (let i = 0; i < workflow.test_cases.length; i++) {
+        try {
+            const testCase = workflow.test_cases[i]!;
+            log(`Running test case ${i + 1}/${workflow.test_cases.length}: ${testCase.name}`);
+            if (testCase.interactive_steps.length > 0) {
+                warn("Interactive steps are not supported in this version. Skipping interactive steps.");
+                continue;
+            }
+            let {stdout, stderr, exitCode} = await $`${{raw: testCase.single_run_command}}`.cwd(testCase.work_directory).nothrow().quiet();
+            console.log();  // Blank line for readability
+            debug(`Test case stdout (${stdout.length}):\n${stdout}`);
+            debug(`Test case stderr (${stderr.length}):\n${stderr}`);
+            debug(`Exit code: ${exitCode}`);
+            let commandOutput = stdout.toString();
+            if (testCase.single_run_expected_output.prefix_strip_string.length > 0) {
+                let prefixIndex = commandOutput.indexOf(testCase.single_run_expected_output.prefix_strip_string);
+                if (prefixIndex !== -1) {
+                    commandOutput = commandOutput.substring(prefixIndex + testCase.single_run_expected_output.prefix_strip_string.length);
+                }
+            }
+            if (testCase.single_run_expected_output.postfix_strip_string.length > 0) {
+                let postfixIndex = commandOutput.lastIndexOf(testCase.single_run_expected_output.postfix_strip_string);
+                if (postfixIndex !== -1) {
+                    commandOutput = commandOutput.substring(0, postfixIndex);
+                }
+            }
+            debug("Sanitized command output for evaluation:\n", commandOutput);
+            if (testCase.single_run_expected_output.llm_judge_input_mode == LLMJudgeInputModeEnum.None){
+                if (stdout.includes(testCase.single_run_expected_output.substring)) {
+                    log(`Test case '${testCase.name}' passed: expected substring found in output.`);
+                    testCasesResults[i] = true;
+                }
+                else {
+                    warn(`Test case '${testCase.name}' failed: expected substring NOT found in output.`);
+                    testCasesResults[i] = false;
+                }
+            }
+            else {
+                switch (testCase.single_run_expected_output.llm_judge_input_mode) {
+                    case LLMJudgeInputModeEnum.Full:
+                        log("Evaluating full output with LLM judge...");
+                        const seed = Math.floor(Date.now() / 1000);
+                        let completion = await deps.openRouter.chat.send({
+                            model: CONFIG.openrouter.model,
+                            maxCompletionTokens: CONFIG.hyperparameters.max_completion_tokens,
+                            messages: [
+                                {
+                                    role: "system",
+                                    content: testCase.single_run_expected_output.llm_judge_prompt,
+                                },
+                                {
+                                    role: "user",
+                                    content: JSON.stringify({
+                                        "expected_output_substring": testCase.single_run_expected_output.substring,
+                                        "actual_output": commandOutput,
+                                    }),
+                                }
+                            ],
+                            stream: false,
+                            seed: seed,
+                            frequencyPenalty: CONFIG.hyperparameters.frequency_penalty,
+                            presencePenalty: CONFIG.hyperparameters.presence_penalty,
+                            temperature: 0,
+                            reasoning: {
+                                effort: CONFIG.hyperparameters.reasoning_effort,
+                            },
+                        });
+                        if (completion.choices.length < 1){
+                            warn("No choices returned from completion");
+                            console.log(completion);
+                        }
+                        const completionText = completion.choices[0]?.message.content?.toString() ?? "";
+                        log(`LLM judge completion:\n${completionText}`);
+                        const llmJudgeResult = completionText.toLowerCase().includes("pass");  // TODO: More robust parsing
+                        if (llmJudgeResult) {
+                            log(chalk.green(`Test case '${testCase.name}' passed according to LLM judge.`));
+                            testCasesResults[i] = true;
+                        }
+                        else {
+                            warn(chalk.yellowBright(`Test case '${testCase.name}' failed according to LLM judge.`));
+                            testCasesResults[i] = false;
+                        }
+                        try {
+                            const judgeResultObject = JSON.parse(completionText);
+                            if ("summary" in judgeResultObject) {
+                                testCasesResultsExplanation[i] = judgeResultObject.summary;
+                            }
+                        } catch (e) {
+                            warn("Failed to parse LLM judge output as JSON. Make sure the LLM prompt requests JSON output.");
+                        }
+                        break;
+                    default:
+                        warn(`LLM judge input mode '${testCase.single_run_expected_output.llm_judge_input_mode}' is not supported in this version. Skipping LLM judging.`);
+                        break;
+                }
+            }
+        } catch (e) {
+            warn(`Error occurred while executing test case ${i + 1}:`, e);
+            testCasesResults[i] = false;
+        }
+    }
+    for (const command of workflow.cleanup_commands){
+        log(`Running cleanup command: ${command}`);
+        await $`${{raw: command}}`.nothrow();
+    }
+    const passedCount = testCasesResults.filter((r) => r).length;
+    log(`Testing workflow completed. Passed ${passedCount}/${workflow.test_cases.length} test cases.`);
+    console.table(testCasesResults.map((entry, idx) => {
+        return [
+            workflow.test_cases[idx]?.name,
+            entry ? chalk.green("PASS") : chalk.red("FAIL"),
+            testCasesResultsExplanation[idx] || "",
+        ];
+    }));
+    log(`Finished testing workflow: ${workflow.slug}`);
+}

package/tsconfig.json ADDED Viewed

@@ -0,0 +1,29 @@
+{
+  "compilerOptions": {
+    // Environment setup & latest features
+    "lib": ["ESNext"],
+    "target": "ESNext",
+    "module": "Preserve",
+    "moduleDetection": "force",
+    "jsx": "react-jsx",
+    "allowJs": true,
+    // Bundler mode
+    "moduleResolution": "bundler",
+    "allowImportingTsExtensions": true,
+    "verbatimModuleSyntax": true,
+    "noEmit": true,
+    // Best practices
+    "strict": true,
+    "skipLibCheck": true,
+    "noFallthroughCasesInSwitch": true,
+    "noUncheckedIndexedAccess": true,
+    "noImplicitOverride": true,
+    // Some stricter flags (disabled by default)
+    "noUnusedLocals": false,
+    "noUnusedParameters": false,
+    "noPropertyAccessFromIndexSignature": false
+  }
+}