even-pf 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -8,6 +8,11 @@ To run:
8
8
  bunx --bun even-pf [config]
9
9
  ```
10
10
 
11
+ The `config` can be an absolute or relative path. Can also be a URL.
12
+ If not specified, it will first try to get `EPF_CONFIG_URL` environment variable, then look for `epf.toml` in the current and the home directory.
13
+
14
+ If you're using the tool in a resource-constrained environment, you can use platform-specific executables like [even-pf-linux-x64](https://www.npmjs.com/package/even-pf-linux-x64).
15
+
11
16
  ## Development
12
17
  To install dependencies:
13
18
  ```bash
@@ -16,13 +21,18 @@ bun install
16
21
 
17
22
  To install as a tool globally:
18
23
  ```bash
19
- bun link
24
+ bun link -g e-pf
20
25
  ```
21
26
 
22
27
  Make sure you have a config file in your home or current directory. Alternatively, you can set environment variable `EPF_CONFIG_URL`.
23
28
 
24
29
  This project was created using `bun init` in bun v1.3.2. [Bun](https://bun.com) is a fast all-in-one JavaScript runtime.
25
30
 
31
+ After making changes, you might want to bump the version.
32
+ ```bash
33
+ bun run bump <semver>
34
+ ```
35
+
26
36
  ## Specs
27
37
  ### File-viewer Frontend
28
38
  In consideration of the tool might be running at a remote server, for easily viewing the Markdown files, we will use a simple file-viewer frontend.
package/bin/even-pf.js CHANGED
@@ -1,8 +1,8 @@
1
- #!/usr/bin/env node
1
+ #!/usr/bin/env bun
2
2
  "use strict";
3
3
 
4
- import {spawnSync} from "child_process";
5
- import {chmodSync, statSync} from "fs";
4
+ const { spawnSync } = require("child_process");
5
+ const { chmodSync, statSync } = require("fs");
6
6
 
7
7
  // Map process.platform + process.arch to the sub-package name and binary filename
8
8
  const PLATFORM_MAP = {
@@ -56,3 +56,4 @@ if (result.error) {
56
56
  }
57
57
 
58
58
  process.exit(result.status ?? 0);
59
+
package/bun.lock CHANGED
@@ -13,6 +13,13 @@
13
13
  "devDependencies": {
14
14
  "@types/bun": "latest",
15
15
  },
16
+ "optionalDependencies": {
17
+ "even-pf-darwin-arm64": "0.3.4",
18
+ "even-pf-darwin-x64": "0.3.4",
19
+ "even-pf-linux-arm64": "0.3.4",
20
+ "even-pf-linux-x64": "0.3.4",
21
+ "even-pf-windows-x64": "0.3.4",
22
+ },
16
23
  "peerDependencies": {
17
24
  "typescript": "^5.9.3",
18
25
  },
@@ -29,6 +36,16 @@
29
36
 
30
37
  "chalk": ["chalk@5.6.2", "", {}, "sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA=="],
31
38
 
39
+ "even-pf-darwin-arm64": ["even-pf-darwin-arm64@0.3.4", "", { "os": "darwin", "cpu": "arm64", "bin": { "even-pf": "bin/even-pf" } }, "sha512-x2vTM0ogvlFhUiHqb13kXJTKPRPU/VdoZa1G51c3IHsZz7wdDpkD/DxcEvxAmO28MbJtfjxig8nRFMvld5J6jg=="],
40
+
41
+ "even-pf-darwin-x64": ["even-pf-darwin-x64@0.3.4", "", { "os": "darwin", "cpu": "x64", "bin": { "even-pf": "bin/even-pf" } }, "sha512-L2hzBvSLFcWMB/MJQeZTQHI8mqpGMQ7T0tSPXjv4S1tFglF8ZtdxggDAhmItEyyqVfsAT6LY+HyOpJnUAga9tg=="],
42
+
43
+ "even-pf-linux-arm64": ["even-pf-linux-arm64@0.3.4", "", { "os": "linux", "cpu": "arm64", "bin": { "even-pf": "bin/even-pf" } }, "sha512-/5nLtKs+8xvTHEkrVPQQ5XQBTKROmF42z6+fo4AOkOj/TbDGwCher6RYYMHQ6pD7M0jjF5AdSlj5HLEGf/N9Qg=="],
44
+
45
+ "even-pf-linux-x64": ["even-pf-linux-x64@0.3.4", "", { "os": "linux", "cpu": "x64", "bin": { "even-pf": "bin/even-pf" } }, "sha512-UN0wz2svjcjckugzFyc4tHxllrTM7IScSmnLDq5z9AB5cplHZrvAg8cYcvz20YEcHsr7aUkxrhA7iDv5KKYhkA=="],
46
+
47
+ "even-pf-windows-x64": ["even-pf-windows-x64@0.3.4", "", { "os": "win32", "cpu": "x64", "bin": { "even-pf": "bin/even-pf.exe" } }, "sha512-ni84uLUdo95TlACDUyz7Ia7+4wigSByvUuR+IrXbLzkN90mZTsJoZVbAoJMR8CnOlPPEClcPHqkTcYl1lbLOwA=="],
48
+
32
49
  "smol-toml": ["smol-toml@1.6.0", "", {}, "sha512-4zemZi0HvTnYwLfrpk/CF9LOd9Lt87kAt50GnqhMpyF9U3poDAP2+iukq2bZsO/ufegbYehBkqINbsWxj4l4cw=="],
33
50
 
34
51
  "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],
package/epf.example.toml CHANGED
@@ -1,3 +1,8 @@
1
+ [output_viewing]
2
+ mode = "webui"
3
+ api_port = 0
4
+ webui_base_url = "https://ta-tools-dashboard.vercel.app"
5
+
1
6
  [llm.models.general_analysis]
2
7
  sdk = "openrouter"
3
8
  model_name = ""
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "even-pf",
3
- "version": "0.3.3",
3
+ "version": "0.4.0",
4
4
  "description": "AI-assisted responsible grading tool for programming assignments",
5
5
  "module": "src/cli.ts",
6
6
  "type": "module",
@@ -15,7 +15,8 @@
15
15
  "config-gen": "bun run --console-depth 6 src/generate-config.ts"
16
16
  },
17
17
  "bin": {
18
- "even-pf": "bin/even-pf.js"
18
+ "even-pf": "bin/even-pf.js",
19
+ "e-pf": "src/cli.ts"
19
20
  },
20
21
  "devDependencies": {
21
22
  "@types/bun": "latest"
@@ -30,11 +31,11 @@
30
31
  "zod-defaults": "^0.2.3"
31
32
  },
32
33
  "optionalDependencies": {
33
- "even-pf-linux-x64": "0.3.3",
34
- "even-pf-linux-arm64": "0.3.3",
35
- "even-pf-windows-x64": "0.3.3",
36
- "even-pf-darwin-x64": "0.3.3",
37
- "even-pf-darwin-arm64": "0.3.3"
34
+ "even-pf-linux-x64": "0.4.0",
35
+ "even-pf-linux-arm64": "0.4.0",
36
+ "even-pf-windows-x64": "0.4.0",
37
+ "even-pf-darwin-x64": "0.4.0",
38
+ "even-pf-darwin-arm64": "0.4.0"
38
39
  },
39
40
  "files": [
40
41
  "bin/even-pf.js",
package/src/cli.ts CHANGED
@@ -35,7 +35,7 @@ testingWorkflows.forEach((workflow) => {
35
35
  workflowRuns.push(executeTestingWorkflow(workflow, i+1, workflowDependencies));
36
36
  }
37
37
  });
38
-
38
+ workflowDependencies.outputViewer.display(); // For start the server early.
39
39
  const workflowsResults = await Promise.allSettled(workflowRuns);
40
40
  // Summarize with indices to include slugs in failure logs
41
41
  const failedIndices: number[] = [];
@@ -1,80 +1,92 @@
1
- import {z} from "zod";
2
-
3
- export const ModelConfigSchema = z.object({
4
- sdk: z.enum(["openrouter"]).default("openrouter"),
5
- model_name: z.string().default(""),
6
- max_completion_tokens: z.number().min(1).default(20000),
7
- temperature: z.number().min(0).max(1).default(0.9),
8
- top_p: z.number().min(0).max(1).default(1),
9
- frequency_penalty: z.number().min(-2).max(2).default(0),
10
- presence_penalty: z.number().min(-2).max(2).default(0),
11
- reasoning_effort: z.enum(["low", "medium", "high"]).default("high"),
12
- });
13
-
14
- export const LLMConfigSchema = z.object({
15
- models: z.record(z.string(), ModelConfigSchema),
16
- prompt_replacement: z.record(z.string(), z.string()),
17
- });
18
-
19
- export const FileSearchEntrySchema = z.object({
20
- file_glob: z.string().min(1),
21
- search_directory: z.string().default("."),
22
- excluded_files: z.array(z.string()).default([]),
23
- });
24
-
25
- export const BaseWorkflowEntrySchema = z.object({
26
- slug: z.string(),
27
- model: z.string().default("general_analysis"),
28
- runs: z.number().min(1).default(1),
29
- input_files_searches: z.array(FileSearchEntrySchema).default([]),
30
- output_filename: z.string().min(1),
31
- });
32
-
33
- export const AnalysisWorkflowEntrySchema = BaseWorkflowEntrySchema.extend({
34
- prompt: z.string(),
35
- })
36
-
37
- export enum LLMJudgeInputModeEnum{
38
- None = "NONE",
39
- Diff = "DIFF",
40
- Full = "FULL",
41
- }
42
- const LLMJudgeInputModeSchema = z.enum(LLMJudgeInputModeEnum);
43
-
44
- const ExpectedOutputSchema = z.object({
45
- prefix_strip_string: z.string().min(0),
46
- postfix_strip_string: z.string().min(0),
47
- substring: z.string().min(0),
48
- llm_judge_input_mode: LLMJudgeInputModeSchema.default(LLMJudgeInputModeEnum.None),
49
- llm_judge_prompt: z.string().min(0),
50
- });
51
-
52
- export const TestCaseSchema = z.object({
53
- name: z.string(),
54
- work_directory: z.string().default("."),
55
- single_run_command: z.string(),
56
- single_run_expected_output: ExpectedOutputSchema,
57
- interactive_steps: z.array(z.object({
58
- input: z.string(),
59
- expected_output: ExpectedOutputSchema,
60
- })),
61
- });
62
-
63
- export const TestingWorkflowEntrySchema = BaseWorkflowEntrySchema.extend({
64
- setup_commands: z.array(z.string()).default([]),
65
- test_cases: z.array(TestCaseSchema).default([]),
66
- cleanup_commands: z.array(z.string()).default([]),
67
- }).omit({
68
- input_files_searches: true,
69
- });
70
-
71
- export const ConfigSchema = z.object({
72
- llm: LLMConfigSchema,
73
- vendors: z.object({
74
- openrouter: z.object({
75
- api_key: z.string(),
76
- }),
77
- }),
78
- analysis_workflows: z.array(AnalysisWorkflowEntrySchema),
79
- testing_workflows: z.array(TestingWorkflowEntrySchema),
80
- });
1
+ import {z} from "zod";
2
+
3
+ export enum OutputViewingModeEnum {
4
+ Local = "local",
5
+ WebUI = "webui",
6
+ }
7
+
8
+ export const OutputViewingConfigSchema = z.object({
9
+ mode: z.enum(OutputViewingModeEnum).default(OutputViewingModeEnum.WebUI),
10
+ api_port: z.number().min(0).max(65535).default(0), // 0 means random available port
11
+ webui_base_url: z.string().default("https://ta-tools-dashboard.vercel.app"),
12
+ });
13
+
14
+ export const ModelConfigSchema = z.object({
15
+ sdk: z.enum(["openrouter"]).default("openrouter"),
16
+ model_name: z.string().default(""),
17
+ max_completion_tokens: z.number().min(1).default(20000),
18
+ temperature: z.number().min(0).max(1).default(0.9),
19
+ top_p: z.number().min(0).max(1).default(1),
20
+ frequency_penalty: z.number().min(-2).max(2).default(0),
21
+ presence_penalty: z.number().min(-2).max(2).default(0),
22
+ reasoning_effort: z.enum(["low", "medium", "high"]).default("high"),
23
+ });
24
+
25
+ export const LLMConfigSchema = z.object({
26
+ models: z.record(z.string(), ModelConfigSchema),
27
+ prompt_replacement: z.record(z.string(), z.string()),
28
+ });
29
+
30
+ export const FileSearchEntrySchema = z.object({
31
+ file_glob: z.string().min(1),
32
+ search_directory: z.string().default("."),
33
+ excluded_files: z.array(z.string()).default([]),
34
+ });
35
+
36
+ export const BaseWorkflowEntrySchema = z.object({
37
+ slug: z.string(),
38
+ model: z.string().default("general_analysis"),
39
+ runs: z.number().min(1).default(1),
40
+ input_files_searches: z.array(FileSearchEntrySchema).default([]),
41
+ output_filename: z.string().min(1),
42
+ });
43
+
44
+ export const AnalysisWorkflowEntrySchema = BaseWorkflowEntrySchema.extend({
45
+ prompt: z.string(),
46
+ })
47
+
48
+ export enum LLMJudgeInputModeEnum{
49
+ None = "NONE",
50
+ Diff = "DIFF",
51
+ Full = "FULL",
52
+ }
53
+ const LLMJudgeInputModeSchema = z.enum(LLMJudgeInputModeEnum);
54
+
55
+ const ExpectedOutputSchema = z.object({
56
+ prefix_strip_string: z.string().min(0),
57
+ postfix_strip_string: z.string().min(0),
58
+ substring: z.string().min(0),
59
+ llm_judge_input_mode: LLMJudgeInputModeSchema.default(LLMJudgeInputModeEnum.None),
60
+ llm_judge_prompt: z.string().min(0),
61
+ });
62
+
63
+ export const TestCaseSchema = z.object({
64
+ name: z.string(),
65
+ work_directory: z.string().default("."),
66
+ single_run_command: z.string(),
67
+ single_run_expected_output: ExpectedOutputSchema,
68
+ interactive_steps: z.array(z.object({
69
+ input: z.string(),
70
+ expected_output: ExpectedOutputSchema,
71
+ })),
72
+ });
73
+
74
+ export const TestingWorkflowEntrySchema = BaseWorkflowEntrySchema.extend({
75
+ setup_commands: z.array(z.string()).default([]),
76
+ test_cases: z.array(TestCaseSchema).default([]),
77
+ cleanup_commands: z.array(z.string()).default([]),
78
+ }).omit({
79
+ input_files_searches: true,
80
+ });
81
+
82
+ export const ConfigSchema = z.object({
83
+ output_viewing: OutputViewingConfigSchema,
84
+ llm: LLMConfigSchema,
85
+ vendors: z.object({
86
+ openrouter: z.object({
87
+ api_key: z.string(),
88
+ }),
89
+ }),
90
+ analysis_workflows: z.array(AnalysisWorkflowEntrySchema).default([]),
91
+ testing_workflows: z.array(TestingWorkflowEntrySchema).default([]),
92
+ });
@@ -1,54 +1,119 @@
1
- import chalk from "chalk";
2
-
3
- type FileRecord = {
4
- type: "markdown" | "text";
5
- content: string;
6
- }
7
-
8
- export class OutputViewer {
9
- filesRecords: Record<string, FileRecord> = {};
10
-
11
-
12
- addFile(filename: string, _: FileRecord): void {
13
- this.filesRecords[filename] = _;
14
- }
15
-
16
- private serve(): void {
17
- let files = Object.entries(this.filesRecords).sort((a, b) => a[0].localeCompare(b[0]));
18
-
19
- let server = Bun.serve({
20
- port: 0,
21
- routes: {
22
- "/:slug": (req) => {
23
- let slug = req.params.slug;
24
- console.log(`Request for slug: "${slug}"`);
25
- return new Response(this.filesRecords[slug]?.content ?? "Not Found");
26
- }
27
- },
28
- fetch(req) {
29
- return new Response("Not Found (fallback)", { status: 404 });
30
- },
31
- });
32
- console.log(server.url);
33
- }
34
-
35
- display() {
36
- if (Object.keys(this.filesRecords).length === 0) {
37
- console.warn("No files to display");
38
- return;
39
- }
40
-
41
- console.log("Click the following links to view the outputs in your browser:");
42
-
43
- const FRONTEND_URL = "https://ta-tools-dashboard.vercel.app/tools/md-viewer"; //TODO: not hardcode this
44
- let files = Object.entries(this.filesRecords).sort((a, b) => a[0].localeCompare(b[0]));
45
- for (const [filename, fileRecord] of files) {
46
- let params = new URLSearchParams();
47
- params.set("name", filename);
48
- params.set("comp", "gzip");
49
- params.set("data", Bun.gzipSync(fileRecord.content).toBase64());
50
- let url = `${FRONTEND_URL}#${params.toString()}`;
51
- console.log(`${chalk.cyan(filename)}: ${url}` + "\n");
52
- }
53
- }
54
- }
1
+ import chalk from "chalk";
2
+
3
+ import {CONFIG} from "./config.ts";
4
+ import {OutputViewingModeEnum} from "./config-schema.ts";
5
+
6
+ type FileRecord = {
7
+ type: "markdown" | "text";
8
+ content: string;
9
+ }
10
+
11
+ const CORS_HEADERS = {
12
+ "Access-Control-Allow-Origin": "*",
13
+ "Access-Control-Allow-Methods": "GET, OPTIONS",
14
+ "Access-Control-Allow-Headers": "Content-Type",
15
+ };
16
+
17
+ function jsonResponse(data: unknown, status = 200): Response {
18
+ return new Response(JSON.stringify(data), {
19
+ status,
20
+ headers: {
21
+ "Content-Type": "application/json",
22
+ ...CORS_HEADERS,
23
+ },
24
+ });
25
+ }
26
+
27
+ export class OutputViewer {
28
+ filesRecords: Record<string, FileRecord> = {};
29
+ displayed: boolean = false;
30
+
31
+ addFile(filename: string, _: FileRecord): void {
32
+ this.filesRecords[filename] = _;
33
+ }
34
+
35
+ serve(): string {
36
+ let files = Object.entries(this.filesRecords).sort((a, b) => a[0].localeCompare(b[0]));
37
+
38
+ let server = Bun.serve({
39
+ port: CONFIG.output_viewing.api_port,
40
+ routes: {
41
+ "/": (req) => {
42
+ if (req.method === "OPTIONS") {
43
+ return new Response(null, { status: 204, headers: CORS_HEADERS });
44
+ }
45
+ return jsonResponse({
46
+ files: files.map(([filename, fileRecord]) => ({
47
+ name: filename,
48
+ type: fileRecord.type,
49
+ })),
50
+ });
51
+ },
52
+ "/:slug": (req) => {
53
+ if (req.method === "OPTIONS") {
54
+ return new Response(null, { status: 204, headers: CORS_HEADERS });
55
+ }
56
+ let slug = req.params.slug;
57
+ let record = this.filesRecords[slug];
58
+ if (!record) {
59
+ return jsonResponse({ error: "Not Found" }, 404);
60
+ }
61
+ return jsonResponse({
62
+ name: slug,
63
+ type: record.type,
64
+ content: record.content,
65
+ });
66
+ }
67
+ },
68
+ fetch(req) {
69
+ if (req.method === "OPTIONS") {
70
+ return new Response(null, { status: 204, headers: CORS_HEADERS });
71
+ }
72
+ return jsonResponse({ error: "Not Found" }, 404);
73
+ },
74
+ });
75
+ console.log(server.url);
76
+ return server.url.toString();
77
+ }
78
+
79
+ display() {
80
+ let frontendURL = "";
81
+ switch (CONFIG.output_viewing.mode) {
82
+ case OutputViewingModeEnum.Local:
83
+ if (Object.keys(this.filesRecords).length === 0) {
84
+ console.warn("No files to display (you can probably ignore this warning if your workflows haven't completed yet)");
85
+ return;
86
+ }
87
+
88
+ console.log("Click the following links to view the outputs in your browser:");
89
+
90
+ let files = Object.entries(this.filesRecords).sort((a, b) => a[0].localeCompare(b[0]));
91
+ for (const [filename, fileRecord] of files) {
92
+ let params = new URLSearchParams();
93
+ params.set("name", filename);
94
+ params.set("comp", "gzip");
95
+ params.set("data", Bun.gzipSync(fileRecord.content).toBase64());
96
+ frontendURL = `${CONFIG.output_viewing.webui_base_url}/tools/results-viewer#${params.toString()}`;
97
+ console.log(`${chalk.cyan(filename)}: ${frontendURL}` + "\n");
98
+ }
99
+ break
100
+ case OutputViewingModeEnum.WebUI:
101
+ if (this.displayed){
102
+ console.log("Output viewer API is already running");
103
+ console.log(frontendURL + "\n");
104
+ console.log("Press Ctrl+C to stop")
105
+ return;
106
+ }
107
+ this.displayed = true;
108
+ let apiURL = this.serve();
109
+ let params = new URLSearchParams();
110
+ params.set("api", apiURL);
111
+ frontendURL = `${CONFIG.output_viewing.webui_base_url}/tools/results-viewer#${params.toString()}`;
112
+
113
+ console.log(chalk.cyan("Open the following URL to view all outputs:"));
114
+ console.log(frontendURL);
115
+ console.log("Press Ctrl+C to stop the server")
116
+ }
117
+
118
+ }
119
+ }
package/src/version.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  // Injected at compile time by Bun.build({ define }) in scripts/build-all.ts
2
2
  declare const EPF_VERSION: string;
3
3
 
4
- console.log(`even-pf v${EPF_VERSION}`);
4
+ const version = typeof EPF_VERSION !== "undefined" ? `v${EPF_VERSION}` : "dev";
5
+ console.log(`even-pf ${version}`);
5
6