@syntheticlab/synbad 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -8,6 +8,55 @@ inference quality as high as possible.
8
8
  If you find bugs in Synthetic's model hosting, please contribute the bugs here!
9
9
  We will fix them.
10
10
 
11
+ ## Results
12
+
13
+ We keep a running tally of provider+model results for GLM-4.6, Kimi K2
14
+ Thinking, and MiniMax M2. Feel free to add more provider results!
15
+
16
+ |Provider |Model |Success Rate|
17
+ |---------|----------------|------------|
18
+ |Synthetic|GLM-4.6 |:white_check_mark: 100%|
19
+ |Synthetic|Kimi K2 Thinking|:white_check_mark: 100%|
20
+ |Synthetic|MiniMax M2 |:white_check_mark: 100%|
21
+
22
+ |Provider |Model |Success Rate|
23
+ |---------|----------------|------------|
24
+ |Fireworks|GLM-4.6 |:white_check_mark: 100%|
25
+ |Fireworks|Kimi K2 Thinking|:x: 86%|
26
+ |Fireworks|MiniMax M2 |:x: 29%|
27
+
28
+ |Provider |Model |Success Rate|
29
+ |---------|----------------|------------|
30
+ |Together |GLM-4.6 |:white_check_mark: 100%|
31
+ |Together |Kimi K2 Thinking|:x: 71%|
32
+
33
+ |Provider |Model |Success Rate|
34
+ |---------|----------------|------------|
35
+ |Parasail |GLM-4.6 |:x: 71%|
36
+ |Parasail |Kimi K2 Thinking|:x: 57%|
37
+
38
+ ## How do I contribute inference bugs?
39
+
40
+ If you already have some problematic JSON, head over to the
41
+ [Contributing](#Contributing) section. If you don't, don't worry! Synbad makes
42
+ it easy to capture the problematic JSON you're encountering.
43
+
44
+ First, run the Synbad Proxy, specifying the local port you want to use and the
45
+ inference host you want to target. For example, to forward requests from
46
+ `localhost:3000` to Synthetic's API, you'd do:
47
+
48
+ ```bash
49
+ synbad proxy -p 3000 -t https://api.synthetic.new/openai/v1
50
+ ```
51
+
52
+ Then, configure your coding agent — or whichever local tool you're using — to
53
+ point to `http://localhost:3000` (or whichever port you selected). The Synbad
54
+ Proxy will log all request bodies to `stdout`, so all you need to do is
55
+ reproduce the bug by using your tool or coding agent, and then copy the JSON it
56
+ printed to `stdout`.
57
+
58
+ Now you have reproducible JSON to file a bug via Synbad!
59
+
11
60
  ## Contributing
12
61
 
13
62
  First, clone this repo from Github. Then `cd` into it and run:
@@ -30,16 +79,17 @@ For example, we can test reasoning parsing very simply (as we do in the
30
79
 
31
80
  ```typescript
32
81
  import * as assert from "../../source/asserts.ts";
33
- import { ChatResponse } from "../../source/chat-completion.ts";
82
+ import { ChatResponse, getReasoning } from "../../source/chat-completion.ts";
34
83
 
35
84
  export function test(response: ChatResponse) {
36
- const reasoning = response.choices[0].message.reasoning_content;
85
+ const reasoning = getReasoning(response.choices[0].message);
37
86
  assert.isNotNullish(reasoning);
38
87
  }
39
88
 
89
+ // Insert your JSON. You can paste your results from the Synbad proxy here.
40
90
  export const json = {
41
91
  messages: [
42
- { role: "user", content: "Why does 1+1=2?" },
92
+ { role: "user", content: "Why does 1+1=2?" }
43
93
  ],
44
94
  }
45
95
  ```
@@ -48,18 +98,43 @@ The `asserts.ts` file re-exports all of the built-in NodeJS assertion
48
98
  functions, and also adds a few extra ones, e.g. `isNotNullish` which checks
49
99
  whether an object is `null` or `undefined`.
50
100
 
51
- To run your new eval, use the `synbad.sh` script in this repo. Assuming you're
52
- testing the `evals/reasoning/reasoning-parsing` test, for GLM-4.6 on Synthetic,
53
- and you want to run it 5 times since it isn't consistently failing:
101
+ To run your new eval, use the `synbad.sh` script in this repo, which
102
+ auto-recompiles everything (including your new test!) before running the evals.
103
+ Assuming you're testing the `evals/reasoning/reasoning-parsing` test, for
104
+ GLM-4.6 on Synthetic, and you want to run it 5 times since it isn't
105
+ consistently failing:
54
106
 
55
107
  ```bash
56
- ./synbad.sh --env-var SYNTHETIC_API_KEY \
108
+ ./synbad.sh eval --env-var SYNTHETIC_API_KEY \
57
109
  --base-url "https://api.synthetic.new/openai/v1" \
58
110
  --only evals/reasoning/reasoning-parsing \
59
111
  --model "hf:zai-org/GLM-4.6" \
60
112
  --count 5
61
113
  ```
62
114
 
115
+ ### Handling reasoning parsing
116
+
117
+ The OpenAI spec didn't originally include reasoning content parsing, since the
118
+ original OpenAI models didn't reason. The open-source community added support
119
+ for reasoning later, but there are two competing specs:
120
+
121
+ 1. Storing the reasoning content in `message.reasoning_content`, or
122
+ 2. Storing the reasoning content in `message.reasoning`.
123
+
124
+ To make sure your evals work with a wider range of inference providers, use
125
+ the `getReasoning` function when testing reasoning parsing like so:
126
+
127
+ ```typescript
128
+ import { getReasoning } from "../../source/chat-completion.ts";
129
+
130
+ // In your test:
131
+
132
+ const reasoning = getReasoning(response.choices[0].message);
133
+ ```
134
+
135
+ This ensures your test will use the correct reasoning content data regardless
136
+ of which spec the underlying inference provider is using.
137
+
63
138
  ## Running Synbad
64
139
 
65
140
  First, install it:
@@ -71,7 +146,7 @@ npm install -g @syntheticlab/synbad
71
146
  Then run:
72
147
 
73
148
  ```bash
74
- synbad --env-var SYNTHETIC_API_KEY \
149
+ synbad eval --env-var SYNTHETIC_API_KEY \
75
150
  --base-url "https://api.synthetic.new/openai/v1" \
76
151
  --model "hf:zai-org/GLM-4.6"
77
152
  ```
@@ -1,6 +1,7 @@
1
1
  import * as assert from "../../source/asserts.js";
2
+ import { getReasoning } from "../../source/chat-completion.js";
2
3
  export function test(response) {
3
- const reasoning = response.choices[0].message.reasoning_content;
4
+ const reasoning = getReasoning(response.choices[0].message);
4
5
  assert.isNotNullish(reasoning);
5
6
  }
6
7
  export const json = {
@@ -1,5 +1,5 @@
1
- import OpenAI from "openai";
2
- export declare function test(response: OpenAI.ChatCompletion): void;
1
+ import { ChatResponse } from "../../source/chat-completion.ts";
2
+ export declare function test(response: ChatResponse): void;
3
3
  export declare const json: {
4
4
  messages: ({
5
5
  role: string;
@@ -1,10 +1,11 @@
1
1
  import * as assert from "../../source/asserts.js";
2
+ import { getReasoning } from "../../source/chat-completion.js";
2
3
  export function test(response) {
3
- const reasoning = response.choices[0].message.reasoning_content;
4
+ const reasoning = getReasoning(response.choices[0].message);
4
5
  assert.isNotNullish(reasoning);
5
6
  }
6
7
  export const json = {
7
- "messages": [
8
- { "role": "user", "content": "Why does 1+1=2?" }
8
+ messages: [
9
+ { role: "user", content: "Why does 1+1=2?" }
9
10
  ],
10
11
  };
@@ -1,9 +1,14 @@
1
1
  import { t } from "structural";
2
2
  import OpenAI from "openai";
3
+ export declare function getReasoning(msg: {
4
+ reasoning_content?: string;
5
+ reasoning?: string;
6
+ }): string | undefined;
3
7
  export type ChatResponse = OpenAI.ChatCompletion & {
4
8
  choices: Array<{
5
9
  message: {
6
10
  reasoning_content?: string;
11
+ reasoning?: string;
7
12
  };
8
13
  }>;
9
14
  };
@@ -1,4 +1,7 @@
1
1
  import { t } from "structural";
2
+ export function getReasoning(msg) {
3
+ return msg.reasoning_content || msg.reasoning;
4
+ }
2
5
  const TextContentPart = t.subtype({
3
6
  type: t.value("text"),
4
7
  text: t.str,
@@ -7,9 +7,11 @@ var __rewriteRelativeImportExtension = (this && this.__rewriteRelativeImportExte
7
7
  }
8
8
  return path;
9
9
  };
10
- import { Command } from "@commander-js/extra-typings";
10
+ import * as http from "http";
11
+ import * as https from "https";
11
12
  import fs from "fs/promises";
12
13
  import path from "path";
14
+ import { Command } from "@commander-js/extra-typings";
13
15
  import OpenAI from "openai";
14
16
  const cli = new Command()
15
17
  .name("synbad")
@@ -79,6 +81,103 @@ ${passed}/${found} evals passed. Failures:
79
81
  - ${Array.from(failures).map(evalName).join("\n- ")}
80
82
  `.trim());
81
83
  });
84
+ cli.command("proxy")
85
+ .requiredOption("-p, --port <number>", "Port to listen on")
86
+ .requiredOption("-t, --target <url>", "Target URL to proxy to")
87
+ .action(async (options) => {
88
+ const port = parseInt(options.port, 10);
89
+ const targetUrl = new URL(options.target);
90
+ stderrLog(`🚀 Starting proxy on port ${port}`);
91
+ stderrLog(`📯 Proxying to: ${targetUrl.origin}`);
92
+ const server = http.createServer(async (req, res) => {
93
+ try {
94
+ const timestamp = new Date().toISOString();
95
+ // Log request metadata
96
+ stderrLog(`\n[${timestamp}] 📥 ${req.method} ${req.url}`);
97
+ // Construct target URL - handle target path correctly
98
+ const incomingPath = req.url || "";
99
+ const targetBasePath = targetUrl.pathname.replace(/\/$/, ''); // Remove trailing slash
100
+ const targetPath = targetBasePath + incomingPath;
101
+ const target = `${targetUrl.origin}${targetPath}`;
102
+ // Prepare request headers (remove problematic ones)
103
+ const requestHeaders = { ...req.headers };
104
+ delete requestHeaders["host"];
105
+ delete requestHeaders["content-length"];
106
+ delete requestHeaders["transfer-encoding"];
107
+ stderrLog(`[${timestamp}] ➡️ Forwarding to: ${target}`);
108
+ stderrLog(`[${timestamp}] 📦 Writing request data to stdout...`);
109
+ // Choose the right module based on target protocol
110
+ const httpModule = targetUrl.protocol === "https:" ? https : http;
111
+ // Create proxy request
112
+ const proxyReq = httpModule.request({
113
+ hostname: targetUrl.hostname,
114
+ port: targetUrl.port || (targetUrl.protocol === "https:" ? 443 : 80),
115
+ path: targetPath,
116
+ method: req.method,
117
+ headers: requestHeaders,
118
+ }, (proxyRes) => {
119
+ // Log response status and headers
120
+ stderrLog(`[${timestamp}] 📤 Response to ${req.url}: ${proxyRes.statusCode} ${proxyRes.statusMessage}`);
121
+ stderrLog(`[${timestamp}] 📦 Loading response...`);
122
+ // Filter problematic response headers
123
+ const responseHeaders = { ...proxyRes.headers };
124
+ delete responseHeaders["transfer-encoding"];
125
+ delete responseHeaders["content-length"];
126
+ res.writeHead(proxyRes.statusCode || 200, responseHeaders);
127
+ // Stream response data immediately to client
128
+ proxyRes.on("data", (chunk) => {
129
+ res.write(chunk);
130
+ });
131
+ proxyRes.on("end", () => {
132
+ stderrLog(`[${timestamp}] ✅ Response complete`);
133
+ res.end();
134
+ });
135
+ });
136
+ // Handle proxy request errors
137
+ proxyReq.on("error", (e) => {
138
+ console.error(`[${timestamp}] ❌ Proxy request error:`, e);
139
+ if (!res.headersSent) {
140
+ res.writeHead(500, { "Content-Type": "application/json" });
141
+ res.end(JSON.stringify({ error: "Proxy error", message: e.message }));
142
+ }
143
+ });
144
+ // Handle client request errors
145
+ req.on("error", (e) => {
146
+ console.error(`[${timestamp}] ❌ Client request error:`, e);
147
+ proxyReq.destroy();
148
+ if (!res.headersSent) {
149
+ res.writeHead(400, { "Content-Type": "application/json" });
150
+ res.end(JSON.stringify({ error: "Client error", message: e.message }));
151
+ }
152
+ });
153
+ req.on("data", (chunk) => {
154
+ process.stdout.write(chunk);
155
+ proxyReq.write(chunk);
156
+ });
157
+ req.on("end", () => {
158
+ process.stdout.write("\n");
159
+ console.log(`[${timestamp}] ✅ Request complete`);
160
+ proxyReq.end();
161
+ });
162
+ }
163
+ catch (e) {
164
+ const timestamp = new Date().toISOString();
165
+ console.error(`[${timestamp}] ❌ Server error:`, e);
166
+ if (!res.headersSent) {
167
+ res.writeHead(500, { "Content-Type": "application/json" });
168
+ res.end(JSON.stringify({ error: "Server error", message: e.message }));
169
+ }
170
+ }
171
+ });
172
+ server.on("error", (e) => {
173
+ console.error("❌ Server error:", e);
174
+ });
175
+ server.listen(port, () => {
176
+ stderrLog(`✅ Server listening on http://localhost:${port}`);
177
+ stderrLog(`📡 All HTTP request data will be logged to stdout`);
178
+ stderrLog("🤓 Terminal UI messages (such as this one) will be logged to stderr");
179
+ });
180
+ });
82
181
  function evalName(file) {
83
182
  return `${path.basename(path.dirname(file))}/${path.basename(file).replace(/.js$/, "")}`;
84
183
  }
@@ -111,4 +210,11 @@ async function* findTestFiles(dir) {
111
210
  }
112
211
  }
113
212
  }
213
+ function stderrLog(item, ...items) {
214
+ let formatted = item;
215
+ if (items.length > 0) {
216
+ formatted += " " + items.join(" ");
217
+ }
218
+ process.stderr.write(formatted + "\n");
219
+ }
114
220
  cli.parse();
@@ -1,8 +1,8 @@
1
1
  import * as assert from "../../source/asserts.ts";
2
- import { ChatResponse } from "../../source/chat-completion.ts";
2
+ import { ChatResponse, getReasoning } from "../../source/chat-completion.ts";
3
3
 
4
4
  export function test(response: ChatResponse) {
5
- const reasoning = response.choices[0].message.reasoning_content;
5
+ const reasoning = getReasoning(response.choices[0].message);
6
6
  assert.isNotNullish(reasoning);
7
7
  }
8
8
 
@@ -1,7 +1,7 @@
1
- import OpenAI from "openai";
1
+ import { ChatResponse } from "../../source/chat-completion.ts";
2
2
  import * as assert from "../../source/asserts.ts";
3
3
 
4
- export function test(response: OpenAI.ChatCompletion) {
4
+ export function test(response: ChatResponse) {
5
5
  const { tool_calls } = response.choices[0].message;
6
6
  assert.isNotNullish(tool_calls);
7
7
  assert.isNotEmptyArray(tool_calls);
@@ -1,13 +1,13 @@
1
1
  import * as assert from "../../source/asserts.ts";
2
- import { ChatResponse } from "../../source/chat-completion.ts";
2
+ import { ChatResponse, getReasoning } from "../../source/chat-completion.ts";
3
3
 
4
4
  export function test(response: ChatResponse) {
5
- const reasoning = response.choices[0].message.reasoning_content;
5
+ const reasoning = getReasoning(response.choices[0].message);
6
6
  assert.isNotNullish(reasoning);
7
7
  }
8
8
 
9
9
  export const json = {
10
- "messages": [
11
- {"role": "user", "content": "Why does 1+1=2?"}
10
+ messages: [
11
+ { role: "user", content: "Why does 1+1=2?" }
12
12
  ],
13
13
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@syntheticlab/synbad",
3
- "version": "0.0.3",
3
+ "version": "0.0.5",
4
4
  "description": "LLM inference provider evals",
5
5
  "main": "dist/source/index.js",
6
6
  "bin": {
@@ -1,10 +1,15 @@
1
1
  import { t } from "structural";
2
2
  import OpenAI from "openai";
3
3
 
4
+ export function getReasoning(msg: { reasoning_content?: string, reasoning?: string }) {
5
+ return msg.reasoning_content || msg.reasoning;
6
+ }
7
+
4
8
  export type ChatResponse = OpenAI.ChatCompletion & {
5
9
  choices: Array<{
6
10
  message: {
7
11
  reasoning_content?: string,
12
+ reasoning?: string,
8
13
  },
9
14
  }>
10
15
  };
package/source/index.ts CHANGED
@@ -1,7 +1,9 @@
1
1
  #!/usr/bin/env node
2
- import { Command } from "@commander-js/extra-typings";
2
+ import * as http from "http";
3
+ import * as https from "https";
3
4
  import fs from "fs/promises";
4
5
  import path from "path";
6
+ import { Command } from "@commander-js/extra-typings";
5
7
  import OpenAI from "openai";
6
8
 
7
9
  const cli = new Command()
@@ -85,6 +87,127 @@ ${passed}/${found} evals passed. Failures:
85
87
  `.trim());
86
88
  });
87
89
 
90
+ cli.command("proxy")
91
+ .requiredOption("-p, --port <number>", "Port to listen on")
92
+ .requiredOption("-t, --target <url>", "Target URL to proxy to")
93
+ .action(async (options) => {
94
+ const port = parseInt(options.port, 10);
95
+ const targetUrl = new URL(options.target);
96
+
97
+ stderrLog(`🚀 Starting proxy on port ${port}`);
98
+ stderrLog(`📯 Proxying to: ${targetUrl.origin}`);
99
+
100
+ const server = http.createServer(async (req, res) => {
101
+ try {
102
+ const timestamp = new Date().toISOString();
103
+
104
+ // Log request metadata
105
+ stderrLog(`\n[${timestamp}] 📥 ${req.method} ${req.url}`);
106
+
107
+ // Construct target URL - handle target path correctly
108
+ const incomingPath = req.url || "";
109
+ const targetBasePath = targetUrl.pathname.replace(/\/$/, ''); // Remove trailing slash
110
+ const targetPath = targetBasePath + incomingPath;
111
+ const target = `${targetUrl.origin}${targetPath}`;
112
+
113
+ // Prepare request headers (remove problematic ones)
114
+ const requestHeaders = { ...req.headers };
115
+ delete requestHeaders["host"];
116
+ delete requestHeaders["content-length"];
117
+ delete requestHeaders["transfer-encoding"];
118
+
119
+ stderrLog(`[${timestamp}] ➡️ Forwarding to: ${target}`);
120
+ stderrLog(`[${timestamp}] 📦 Writing request data to stdout...`);
121
+
122
+ // Choose the right module based on target protocol
123
+ const httpModule = targetUrl.protocol === "https:" ? https : http;
124
+
125
+ // Create proxy request
126
+ const proxyReq = httpModule.request(
127
+ {
128
+ hostname: targetUrl.hostname,
129
+ port: targetUrl.port || (targetUrl.protocol === "https:" ? 443 : 80),
130
+ path: targetPath,
131
+ method: req.method,
132
+ headers: requestHeaders,
133
+ },
134
+ (proxyRes) => {
135
+ // Log response status and headers
136
+ stderrLog(
137
+ `[${timestamp}] 📤 Response to ${req.url}: ${proxyRes.statusCode} ${proxyRes.statusMessage}`
138
+ );
139
+ stderrLog(`[${timestamp}] 📦 Loading response...`);
140
+
141
+ // Filter problematic response headers
142
+ const responseHeaders = { ...proxyRes.headers };
143
+ delete responseHeaders["transfer-encoding"];
144
+ delete responseHeaders["content-length"];
145
+
146
+ res.writeHead(proxyRes.statusCode || 200, responseHeaders);
147
+
148
+ // Stream response data immediately to client
149
+ proxyRes.on("data", (chunk) => {
150
+ res.write(chunk);
151
+ });
152
+
153
+ proxyRes.on("end", () => {
154
+ stderrLog(`[${timestamp}] ✅ Response complete`);
155
+ res.end();
156
+ });
157
+ }
158
+ );
159
+
160
+ // Handle proxy request errors
161
+ proxyReq.on("error", (e) => {
162
+ console.error(`[${timestamp}] ❌ Proxy request error:`, e);
163
+ if (!res.headersSent) {
164
+ res.writeHead(500, { "Content-Type": "application/json" });
165
+ res.end(JSON.stringify({ error: "Proxy error", message: e.message }));
166
+ }
167
+ });
168
+
169
+ // Handle client request errors
170
+ req.on("error", (e) => {
171
+ console.error(`[${timestamp}] ❌ Client request error:`, e);
172
+ proxyReq.destroy();
173
+ if (!res.headersSent) {
174
+ res.writeHead(400, { "Content-Type": "application/json" });
175
+ res.end(JSON.stringify({ error: "Client error", message: e.message }));
176
+ }
177
+ });
178
+
179
+ req.on("data", (chunk) => {
180
+ process.stdout.write(chunk);
181
+ proxyReq.write(chunk);
182
+ });
183
+
184
+ req.on("end", () => {
185
+ process.stdout.write("\n");
186
+ console.log(`[${timestamp}] ✅ Request complete`);
187
+ proxyReq.end();
188
+ });
189
+
190
+ } catch (e) {
191
+ const timestamp = new Date().toISOString();
192
+ console.error(`[${timestamp}] ❌ Server error:`, e);
193
+ if (!res.headersSent) {
194
+ res.writeHead(500, { "Content-Type": "application/json" });
195
+ res.end(JSON.stringify({ error: "Server error", message: (e as Error).message }));
196
+ }
197
+ }
198
+ });
199
+
200
+ server.on("error", (e) => {
201
+ console.error("❌ Server error:", e);
202
+ });
203
+
204
+ server.listen(port, () => {
205
+ stderrLog(`✅ Server listening on http://localhost:${port}`);
206
+ stderrLog(`📡 All HTTP request data will be logged to stdout`);
207
+ stderrLog("🤓 Terminal UI messages (such as this one) will be logged to stderr");
208
+ });
209
+ });
210
+
88
211
  function evalName(file: string) {
89
212
  return `${path.basename(path.dirname(file))}/${path.basename(file).replace(/.js$/, "")}`
90
213
  }
@@ -118,4 +241,12 @@ async function* findTestFiles(dir: string): AsyncGenerator<string> {
118
241
  }
119
242
  }
120
243
 
244
+ function stderrLog(item: string, ...items: string[]) {
245
+ let formatted = item;
246
+ if(items.length > 0) {
247
+ formatted += " " + items.join(" ");
248
+ }
249
+ process.stderr.write(formatted + "\n");
250
+ }
251
+
121
252
  cli.parse();