npm - @syntheticlab/synbad - Versions diffs - 0.0.3 → 0.0.5 - Mend

@syntheticlab/synbad 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/README.md +83 -8
package/dist/evals/reasoning/multiturn-reasoning-parsing.js +2 -1
package/dist/evals/reasoning/reasoning-claude-tool-call.d.ts +2 -2
package/dist/evals/reasoning/reasoning-parsing.js +4 -3
package/dist/source/chat-completion.d.ts +5 -0
package/dist/source/chat-completion.js +3 -0
package/dist/source/index.js +107 -1
package/evals/reasoning/multiturn-reasoning-parsing.ts +2 -2
package/evals/reasoning/reasoning-claude-tool-call.ts +2 -2
package/evals/reasoning/reasoning-parsing.ts +4 -4
package/package.json +1 -1
package/source/chat-completion.ts +5 -0
package/source/index.ts +132 -1

package/README.md CHANGED Viewed

@@ -8,6 +8,55 @@ inference quality as high as possible.
 If you find bugs in Synthetic's model hosting, please contribute the bugs here!
 We will fix them.
+## Results
+We keep a running tally of provider+model results for GLM-4.6, Kimi K2
+Thinking, and MiniMax M2. Feel free to add more provider results!
+|Provider |Model           |Success Rate|
+|---------|----------------|------------|
+|Synthetic|GLM-4.6         |:white_check_mark: 100%|
+|Synthetic|Kimi K2 Thinking|:white_check_mark: 100%|
+|Synthetic|MiniMax M2      |:white_check_mark: 100%|
+|Provider |Model           |Success Rate|
+|---------|----------------|------------|
+|Fireworks|GLM-4.6         |:white_check_mark: 100%|
+|Fireworks|Kimi K2 Thinking|:x: 86%|
+|Fireworks|MiniMax M2      |:x: 29%|
+|Provider |Model           |Success Rate|
+|---------|----------------|------------|
+|Together |GLM-4.6         |:white_check_mark: 100%|
+|Together |Kimi K2 Thinking|:x: 71%|
+|Provider |Model           |Success Rate|
+|---------|----------------|------------|
+|Parasail |GLM-4.6         |:x: 71%|
+|Parasail |Kimi K2 Thinking|:x: 57%|
+## How do I contribute inference bugs?
+If you already have some problematic JSON, head over to the
+[Contributing](#Contributing) section. If you don't, don't worry! Synbad makes
+it easy to capture the problematic JSON you're encountering.
+First, run the Synbad Proxy, specifying the local port you want to use and the
+inference host you want to target. For example, to forward requests from
+`localhost:3000` to Synthetic's API, you'd do:
+```bash
+synbad proxy -p 3000 -t https://api.synthetic.new/openai/v1
+```
+Then, configure your coding agent — or whichever local tool you're using — to
+point to `http://localhost:3000` (or whichever port you selected). The Synbad
+Proxy will log all request bodies to `stdout`, so all you need to do is
+reproduce the bug by using your tool or coding agent, and then copy the JSON it
+printed to `stdout`.
+Now you have reproducible JSON to file a bug via Synbad!
 ## Contributing
 First, clone this repo from Github. Then `cd` into it and run:
@@ -30,16 +79,17 @@ For example, we can test reasoning parsing very simply (as we do in the
 ```typescript
 import * as assert from "../../source/asserts.ts";
-import { ChatResponse } from "../../source/chat-completion.ts";
+import { ChatResponse, getReasoning } from "../../source/chat-completion.ts";
 export function test(response: ChatResponse) {
-  const reasoning = response.choices[0].message.reasoning_content;
+  const reasoning = getReasoning(response.choices[0].message);
   assert.isNotNullish(reasoning);
 }
+// Insert your JSON. You can paste your results from the Synbad proxy here.
 export const json = {
   messages: [
-    { role: "user", content: "Why does 1+1=2?" },
+    { role: "user", content: "Why does 1+1=2?" }
   ],
 }
 ```
@@ -48,18 +98,43 @@ The `asserts.ts` file re-exports all of the built-in NodeJS assertion
 functions, and also adds a few extra ones, e.g. `isNotNullish` which checks
 whether an object is `null` or `undefined`.
-To run your new eval, use the `synbad.sh` script in this repo. Assuming you're
-testing the `evals/reasoning/reasoning-parsing` test, for GLM-4.6 on Synthetic,
-and you want to run it 5 times since it isn't consistently failing:
+To run your new eval, use the `synbad.sh` script in this repo, which
+auto-recompiles everything (including your new test!) before running the evals.
+Assuming you're testing the `evals/reasoning/reasoning-parsing` test, for
+GLM-4.6 on Synthetic, and you want to run it 5 times since it isn't
+consistently failing:
 ```bash
-./synbad.sh --env-var SYNTHETIC_API_KEY \
+./synbad.sh eval --env-var SYNTHETIC_API_KEY \
   --base-url "https://api.synthetic.new/openai/v1" \
   --only evals/reasoning/reasoning-parsing \
   --model "hf:zai-org/GLM-4.6" \
   --count 5
 ```
+### Handling reasoning parsing
+The OpenAI spec didn't originally include reasoning content parsing, since the
+original OpenAI models didn't reason. The open-source community added support
+for reasoning later, but there are two competing specs:
+1. Storing the reasoning content in `message.reasoning_content`, or
+2. Storing the reasoning content in `message.reasoning`.
+To make sure your evals work with a wider range of inference providers, use
+the `getReasoning` function when testing reasoning parsing like so:
+```typescript
+import { getReasoning } from "../../source/chat-completion.ts";
+// In your test:
+const reasoning = getReasoning(response.choices[0].message);
+```
+This ensures your test will use the correct reasoning content data regardless
+of which spec the underlying inference provider is using.
 ## Running Synbad
 First, install it:
@@ -71,7 +146,7 @@ npm install -g @syntheticlab/synbad
 Then run:
 ```bash
-synbad --env-var SYNTHETIC_API_KEY \
+synbad eval --env-var SYNTHETIC_API_KEY \
   --base-url "https://api.synthetic.new/openai/v1" \
   --model "hf:zai-org/GLM-4.6"
 ```

package/dist/evals/reasoning/multiturn-reasoning-parsing.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import * as assert from "../../source/asserts.js";
+import { getReasoning } from "../../source/chat-completion.js";
 export function test(response) {
-    const reasoning = response.choices[0].message.reasoning_content;
+    const reasoning = getReasoning(response.choices[0].message);
     assert.isNotNullish(reasoning);
 }
 export const json = {

package/dist/evals/reasoning/reasoning-claude-tool-call.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
-import OpenAI from "openai";
-export declare function test(response: OpenAI.ChatCompletion): void;
+import { ChatResponse } from "../../source/chat-completion.ts";
+export declare function test(response: ChatResponse): void;
 export declare const json: {
     messages: ({
         role: string;

package/dist/evals/reasoning/reasoning-parsing.js CHANGED Viewed

@@ -1,10 +1,11 @@
 import * as assert from "../../source/asserts.js";
+import { getReasoning } from "../../source/chat-completion.js";
 export function test(response) {
-    const reasoning = response.choices[0].message.reasoning_content;
+    const reasoning = getReasoning(response.choices[0].message);
     assert.isNotNullish(reasoning);
 }
 export const json = {
-    "messages": [
-        { "role": "user", "content": "Why does 1+1=2?" }
+    messages: [
+        { role: "user", content: "Why does 1+1=2?" }
     ],
 };

package/dist/source/chat-completion.d.ts CHANGED Viewed

@@ -1,9 +1,14 @@
 import { t } from "structural";
 import OpenAI from "openai";
+export declare function getReasoning(msg: {
+    reasoning_content?: string;
+    reasoning?: string;
+}): string | undefined;
 export type ChatResponse = OpenAI.ChatCompletion & {
     choices: Array<{
         message: {
             reasoning_content?: string;
+            reasoning?: string;
         };
     }>;
 };

package/dist/source/chat-completion.js CHANGED Viewed

@@ -1,4 +1,7 @@
 import { t } from "structural";
+export function getReasoning(msg) {
+    return msg.reasoning_content || msg.reasoning;
+}
 const TextContentPart = t.subtype({
     type: t.value("text"),
     text: t.str,

package/dist/source/index.js CHANGED Viewed

@@ -7,9 +7,11 @@ var __rewriteRelativeImportExtension = (this && this.__rewriteRelativeImportExte
     }
     return path;
 };
-import { Command } from "@commander-js/extra-typings";
+import * as http from "http";
+import * as https from "https";
 import fs from "fs/promises";
 import path from "path";
+import { Command } from "@commander-js/extra-typings";
 import OpenAI from "openai";
 const cli = new Command()
     .name("synbad")
@@ -79,6 +81,103 @@ ${passed}/${found} evals passed. Failures:
 - ${Array.from(failures).map(evalName).join("\n- ")}
 `.trim());
 });
+cli.command("proxy")
+    .requiredOption("-p, --port <number>", "Port to listen on")
+    .requiredOption("-t, --target <url>", "Target URL to proxy to")
+    .action(async (options) => {
+    const port = parseInt(options.port, 10);
+    const targetUrl = new URL(options.target);
+    stderrLog(`🚀 Starting proxy on port ${port}`);
+    stderrLog(`📯 Proxying to: ${targetUrl.origin}`);
+    const server = http.createServer(async (req, res) => {
+        try {
+            const timestamp = new Date().toISOString();
+            // Log request metadata
+            stderrLog(`\n[${timestamp}] 📥 ${req.method} ${req.url}`);
+            // Construct target URL - handle target path correctly
+            const incomingPath = req.url || "";
+            const targetBasePath = targetUrl.pathname.replace(/\/$/, ''); // Remove trailing slash
+            const targetPath = targetBasePath + incomingPath;
+            const target = `${targetUrl.origin}${targetPath}`;
+            // Prepare request headers (remove problematic ones)
+            const requestHeaders = { ...req.headers };
+            delete requestHeaders["host"];
+            delete requestHeaders["content-length"];
+            delete requestHeaders["transfer-encoding"];
+            stderrLog(`[${timestamp}] ➡️  Forwarding to: ${target}`);
+            stderrLog(`[${timestamp}] 📦 Writing request data to stdout...`);
+            // Choose the right module based on target protocol
+            const httpModule = targetUrl.protocol === "https:" ? https : http;
+            // Create proxy request
+            const proxyReq = httpModule.request({
+                hostname: targetUrl.hostname,
+                port: targetUrl.port || (targetUrl.protocol === "https:" ? 443 : 80),
+                path: targetPath,
+                method: req.method,
+                headers: requestHeaders,
+            }, (proxyRes) => {
+                // Log response status and headers
+                stderrLog(`[${timestamp}] 📤 Response to ${req.url}: ${proxyRes.statusCode} ${proxyRes.statusMessage}`);
+                stderrLog(`[${timestamp}] 📦 Loading response...`);
+                // Filter problematic response headers
+                const responseHeaders = { ...proxyRes.headers };
+                delete responseHeaders["transfer-encoding"];
+                delete responseHeaders["content-length"];
+                res.writeHead(proxyRes.statusCode || 200, responseHeaders);
+                // Stream response data immediately to client
+                proxyRes.on("data", (chunk) => {
+                    res.write(chunk);
+                });
+                proxyRes.on("end", () => {
+                    stderrLog(`[${timestamp}] ✅ Response complete`);
+                    res.end();
+                });
+            });
+            // Handle proxy request errors
+            proxyReq.on("error", (e) => {
+                console.error(`[${timestamp}] ❌ Proxy request error:`, e);
+                if (!res.headersSent) {
+                    res.writeHead(500, { "Content-Type": "application/json" });
+                    res.end(JSON.stringify({ error: "Proxy error", message: e.message }));
+                }
+            });
+            // Handle client request errors
+            req.on("error", (e) => {
+                console.error(`[${timestamp}] ❌ Client request error:`, e);
+                proxyReq.destroy();
+                if (!res.headersSent) {
+                    res.writeHead(400, { "Content-Type": "application/json" });
+                    res.end(JSON.stringify({ error: "Client error", message: e.message }));
+                }
+            });
+            req.on("data", (chunk) => {
+                process.stdout.write(chunk);
+                proxyReq.write(chunk);
+            });
+            req.on("end", () => {
+                process.stdout.write("\n");
+                console.log(`[${timestamp}] ✅ Request complete`);
+                proxyReq.end();
+            });
+        }
+        catch (e) {
+            const timestamp = new Date().toISOString();
+            console.error(`[${timestamp}] ❌ Server error:`, e);
+            if (!res.headersSent) {
+                res.writeHead(500, { "Content-Type": "application/json" });
+                res.end(JSON.stringify({ error: "Server error", message: e.message }));
+            }
+        }
+    });
+    server.on("error", (e) => {
+        console.error("❌ Server error:", e);
+    });
+    server.listen(port, () => {
+        stderrLog(`✅ Server listening on http://localhost:${port}`);
+        stderrLog(`📡 All HTTP request data will be logged to stdout`);
+        stderrLog("🤓 Terminal UI messages (such as this one) will be logged to stderr");
+    });
+});
 function evalName(file) {
     return `${path.basename(path.dirname(file))}/${path.basename(file).replace(/.js$/, "")}`;
 }
@@ -111,4 +210,11 @@ async function* findTestFiles(dir) {
         }
     }
 }
+function stderrLog(item, ...items) {
+    let formatted = item;
+    if (items.length > 0) {
+        formatted += " " + items.join(" ");
+    }
+    process.stderr.write(formatted + "\n");
+}
 cli.parse();

package/evals/reasoning/multiturn-reasoning-parsing.ts CHANGED Viewed

@@ -1,8 +1,8 @@
 import * as assert from "../../source/asserts.ts";
-import { ChatResponse } from "../../source/chat-completion.ts";
+import { ChatResponse, getReasoning } from "../../source/chat-completion.ts";
 export function test(response: ChatResponse) {
-  const reasoning = response.choices[0].message.reasoning_content;
+  const reasoning = getReasoning(response.choices[0].message);
   assert.isNotNullish(reasoning);
 }

package/evals/reasoning/reasoning-claude-tool-call.ts CHANGED Viewed

@@ -1,7 +1,7 @@
-import OpenAI from "openai";
+import { ChatResponse } from "../../source/chat-completion.ts";
 import * as assert from "../../source/asserts.ts";
-export function test(response: OpenAI.ChatCompletion) {
+export function test(response: ChatResponse) {
   const { tool_calls } = response.choices[0].message;
   assert.isNotNullish(tool_calls);
   assert.isNotEmptyArray(tool_calls);

package/evals/reasoning/reasoning-parsing.ts CHANGED Viewed

@@ -1,13 +1,13 @@
 import * as assert from "../../source/asserts.ts";
-import { ChatResponse } from "../../source/chat-completion.ts";
+import { ChatResponse, getReasoning } from "../../source/chat-completion.ts";
 export function test(response: ChatResponse) {
-  const reasoning = response.choices[0].message.reasoning_content;
+  const reasoning = getReasoning(response.choices[0].message);
   assert.isNotNullish(reasoning);
 }
 export const json = {
-  "messages": [
-    {"role": "user", "content": "Why does 1+1=2?"}
+  messages: [
+    { role: "user", content: "Why does 1+1=2?" }
   ],
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@syntheticlab/synbad",
-  "version": "0.0.3",
+  "version": "0.0.5",
   "description": "LLM inference provider evals",
   "main": "dist/source/index.js",
   "bin": {

package/source/chat-completion.ts CHANGED Viewed

@@ -1,10 +1,15 @@
 import { t } from "structural";
 import OpenAI from "openai";
+export function getReasoning(msg: { reasoning_content?: string, reasoning?: string }) {
+  return msg.reasoning_content || msg.reasoning;
+}
 export type ChatResponse = OpenAI.ChatCompletion & {
   choices: Array<{
     message: {
       reasoning_content?: string,
+      reasoning?: string,
     },
   }>
 };

package/source/index.ts CHANGED Viewed

@@ -1,7 +1,9 @@
 #!/usr/bin/env node
-import { Command } from "@commander-js/extra-typings";
+import * as http from "http";
+import * as https from "https";
 import fs from "fs/promises";
 import path from "path";
+import { Command } from "@commander-js/extra-typings";
 import OpenAI from "openai";
 const cli = new Command()
@@ -85,6 +87,127 @@ ${passed}/${found} evals passed. Failures:
 `.trim());
 });
+cli.command("proxy")
+.requiredOption("-p, --port <number>", "Port to listen on")
+.requiredOption("-t, --target <url>", "Target URL to proxy to")
+.action(async (options) => {
+  const port = parseInt(options.port, 10);
+  const targetUrl = new URL(options.target);
+  stderrLog(`🚀 Starting proxy on port ${port}`);
+  stderrLog(`📯 Proxying to: ${targetUrl.origin}`);
+  const server = http.createServer(async (req, res) => {
+    try {
+      const timestamp = new Date().toISOString();
+      // Log request metadata
+      stderrLog(`\n[${timestamp}] 📥 ${req.method} ${req.url}`);
+      // Construct target URL - handle target path correctly
+      const incomingPath = req.url || "";
+      const targetBasePath = targetUrl.pathname.replace(/\/$/, ''); // Remove trailing slash
+      const targetPath = targetBasePath + incomingPath;
+      const target = `${targetUrl.origin}${targetPath}`;
+      // Prepare request headers (remove problematic ones)
+      const requestHeaders = { ...req.headers };
+      delete requestHeaders["host"];
+      delete requestHeaders["content-length"];
+      delete requestHeaders["transfer-encoding"];
+      stderrLog(`[${timestamp}] ➡️  Forwarding to: ${target}`);
+      stderrLog(`[${timestamp}] 📦 Writing request data to stdout...`);
+      // Choose the right module based on target protocol
+      const httpModule = targetUrl.protocol === "https:" ? https : http;
+      // Create proxy request
+      const proxyReq = httpModule.request(
+        {
+          hostname: targetUrl.hostname,
+          port: targetUrl.port || (targetUrl.protocol === "https:" ? 443 : 80),
+          path: targetPath,
+          method: req.method,
+          headers: requestHeaders,
+        },
+        (proxyRes) => {
+          // Log response status and headers
+          stderrLog(
+            `[${timestamp}] 📤 Response to ${req.url}: ${proxyRes.statusCode} ${proxyRes.statusMessage}`
+          );
+          stderrLog(`[${timestamp}] 📦 Loading response...`);
+          // Filter problematic response headers
+          const responseHeaders = { ...proxyRes.headers };
+          delete responseHeaders["transfer-encoding"];
+          delete responseHeaders["content-length"];
+          res.writeHead(proxyRes.statusCode || 200, responseHeaders);
+          // Stream response data immediately to client
+          proxyRes.on("data", (chunk) => {
+            res.write(chunk);
+          });
+          proxyRes.on("end", () => {
+            stderrLog(`[${timestamp}] ✅ Response complete`);
+            res.end();
+          });
+        }
+      );
+      // Handle proxy request errors
+      proxyReq.on("error", (e) => {
+        console.error(`[${timestamp}] ❌ Proxy request error:`, e);
+        if (!res.headersSent) {
+          res.writeHead(500, { "Content-Type": "application/json" });
+          res.end(JSON.stringify({ error: "Proxy error", message: e.message }));
+        }
+      });
+      // Handle client request errors
+      req.on("error", (e) => {
+        console.error(`[${timestamp}] ❌ Client request error:`, e);
+        proxyReq.destroy();
+        if (!res.headersSent) {
+          res.writeHead(400, { "Content-Type": "application/json" });
+          res.end(JSON.stringify({ error: "Client error", message: e.message }));
+        }
+      });
+      req.on("data", (chunk) => {
+        process.stdout.write(chunk);
+        proxyReq.write(chunk);
+      });
+      req.on("end", () => {
+        process.stdout.write("\n");
+        console.log(`[${timestamp}] ✅ Request complete`);
+        proxyReq.end();
+      });
+    } catch (e) {
+      const timestamp = new Date().toISOString();
+      console.error(`[${timestamp}] ❌ Server error:`, e);
+      if (!res.headersSent) {
+        res.writeHead(500, { "Content-Type": "application/json" });
+        res.end(JSON.stringify({ error: "Server error", message: (e as Error).message }));
+      }
+    }
+  });
+  server.on("error", (e) => {
+    console.error("❌ Server error:", e);
+  });
+  server.listen(port, () => {
+    stderrLog(`✅ Server listening on http://localhost:${port}`);
+    stderrLog(`📡 All HTTP request data will be logged to stdout`);
+    stderrLog("🤓 Terminal UI messages (such as this one) will be logged to stderr");
+  });
+});
 function evalName(file: string) {
   return `${path.basename(path.dirname(file))}/${path.basename(file).replace(/.js$/, "")}`
 }
@@ -118,4 +241,12 @@ async function* findTestFiles(dir: string): AsyncGenerator<string> {
   }
 }
+function stderrLog(item: string, ...items: string[]) {
+  let formatted = item;
+  if(items.length > 0) {
+    formatted += " " + items.join(" ");
+  }
+  process.stderr.write(formatted + "\n");
+}
 cli.parse();