@syntheticlab/synbad 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +111 -26
  2. package/dist/evals/reasoning/multiturn-reasoning-parsing.d.ts +2 -2
  3. package/dist/evals/reasoning/multiturn-reasoning-parsing.js +2 -2
  4. package/dist/evals/reasoning/reasoning-claude-tool-call.d.ts +2 -2
  5. package/dist/evals/reasoning/reasoning-claude-tool-call.js +1 -2
  6. package/dist/evals/reasoning/reasoning-parsing.d.ts +2 -2
  7. package/dist/evals/reasoning/reasoning-parsing.js +4 -4
  8. package/dist/evals/reasoning/response-in-reasoning.d.ts +45 -0
  9. package/dist/evals/reasoning/response-in-reasoning.js +59 -0
  10. package/dist/evals/tools/claude-dash.d.ts +2 -2
  11. package/dist/evals/tools/claude-dash.js +1 -2
  12. package/dist/evals/tools/crush-list-files.d.ts +2 -5
  13. package/dist/evals/tools/crush-list-files.js +6 -8
  14. package/dist/evals/tools/multi-turn-tools.d.ts +46 -0
  15. package/dist/evals/tools/multi-turn-tools.js +100 -0
  16. package/dist/evals/tools/no-fn-args.d.ts +22 -0
  17. package/dist/evals/tools/no-fn-args.js +31 -0
  18. package/dist/evals/tools/octo-list-no-optional-args.d.ts +209 -0
  19. package/dist/evals/tools/octo-list-no-optional-args.js +73 -0
  20. package/dist/evals/tools/parallel-tool.d.ts +2 -2
  21. package/dist/evals/tools/parallel-tool.js +1 -2
  22. package/dist/evals/tools/simple-tool.d.ts +2 -2
  23. package/dist/evals/tools/simple-tool.js +3 -2
  24. package/dist/evals/tools/tool-dash-underscore.d.ts +26 -0
  25. package/dist/evals/tools/tool-dash-underscore.js +37 -0
  26. package/dist/evals/tools/tool-path-corruption.d.ts +26 -0
  27. package/dist/evals/tools/tool-path-corruption.js +41 -0
  28. package/dist/source/asserts.d.ts +4 -1
  29. package/dist/source/asserts.js +36 -0
  30. package/dist/source/chat-completion.d.ts +5 -0
  31. package/dist/source/chat-completion.js +1 -0
  32. package/dist/source/evals.d.ts +9 -0
  33. package/dist/source/evals.js +53 -0
  34. package/dist/source/evals.test.d.ts +1 -0
  35. package/dist/source/evals.test.js +12 -0
  36. package/dist/source/exports.d.ts +2 -0
  37. package/dist/source/exports.js +1 -0
  38. package/dist/source/index.js +204 -38
  39. package/evals/reasoning/multiturn-reasoning-parsing.ts +3 -3
  40. package/evals/reasoning/reasoning-claude-tool-call.ts +2 -3
  41. package/evals/reasoning/reasoning-parsing.ts +5 -5
  42. package/evals/reasoning/response-in-reasoning.ts +65 -0
  43. package/evals/tools/claude-dash.ts +2 -3
  44. package/evals/tools/crush-list-files.ts +11 -13
  45. package/evals/tools/multi-turn-tools.ts +104 -0
  46. package/evals/tools/no-fn-args.ts +34 -0
  47. package/evals/tools/octo-list-no-optional-args.ts +81 -0
  48. package/evals/tools/parallel-tool.ts +2 -3
  49. package/evals/tools/simple-tool.ts +4 -3
  50. package/evals/tools/tool-dash-underscore.ts +40 -0
  51. package/evals/tools/tool-path-corruption.ts +46 -0
  52. package/package.json +10 -3
  53. package/source/asserts.ts +37 -1
  54. package/source/chat-completion.ts +6 -0
  55. package/source/evals.test.ts +13 -0
  56. package/source/evals.ts +56 -0
  57. package/source/exports.ts +2 -0
  58. package/source/index.ts +246 -38
@@ -0,0 +1,81 @@
1
+ import { ChatMessage } from "../../source/chat-completion.ts";
2
+ import * as assert from "../../source/asserts.ts";
3
+
4
+ export function test({ tool_calls }: ChatMessage) {
5
+ assert.isNotNullish(tool_calls);
6
+ assert.isNotEmptyArray(tool_calls);
7
+ assert.strictEqual(tool_calls.length, 1);
8
+ assert.strictEqual(tool_calls[0].type, "function");
9
+ const parsed = JSON.parse(tool_calls[0].function.arguments);
10
+ assert.or(
11
+ () => assert.isNullish(parsed),
12
+ () => assert.deepEqual(parsed, {}),
13
+ () => assert.isNullish(parsed.dirPath),
14
+ () => assert.strictEqual(parsed.dirPath, ""),
15
+ );
16
+ }
17
+
18
+ export const json = {
19
+ "messages":[
20
+ {
21
+ "role":"system",
22
+ "content":"You are a coding assistant called Octo."
23
+ },
24
+ {
25
+ "role":"user",
26
+ "content":"call the list tool with no args"
27
+ }
28
+ ],
29
+ tools: [
30
+ {
31
+ "type":"function",
32
+ "function":{
33
+ "name":"append",
34
+ "description":"The append tool",
35
+ "parameters":{
36
+ "type":"object",
37
+ "required":["filePath","text"],
38
+ "properties":{
39
+ "filePath":{
40
+ "description":"The path to the file",
41
+ "type":"string"
42
+ },
43
+ "text":{
44
+ "description":"The text to append",
45
+ "type":"string"
46
+ }
47
+ }
48
+ },
49
+ "strict":true
50
+ }
51
+ },
52
+ {
53
+ "type":"function",
54
+ "function":{
55
+ "name":"create",
56
+ "description":"The create tool",
57
+ "parameters":{
58
+ "type":"object",
59
+ "required":["filePath","content"],
60
+ "properties":{
61
+ "filePath":{
62
+ "description":"Path where the file should be created",
63
+ "type":"string"
64
+ },"content":{
65
+ "description":"Content to write to the file",
66
+ "type":"string"
67
+ }
68
+ }
69
+ },
70
+ "strict":true
71
+ }
72
+ },
73
+ {"type":"function","function":{"name":"edit","description":"The edit tool","parameters":{"type":"object","required":["filePath","search","replace"],"properties":{"filePath":{"description":"The path to the file","type":"string"},"search":{"description":"The search string to replace. Must EXACTLY match the text you intend to replace, including\nwhitespace, punctuation, etc. Make sure to give a few lines of context above and below so you\ndon't accidentally replace a different matching substring in the same file.","type":"string"},"replace":{"description":"The string you want to insert into the file","type":"string"}}},"strict":true}},
74
+ {"type":"function","function":{"name":"fetch","description":"The fetch tool","parameters":{"type":"object","required":["url"],"properties":{"url":{"description":"Full url to fetch, e.g. https://...","type":"string"},"includeMarkup":{"description":"Include the HTML markup? Defaults to false. By default or when set to false, markup will be\nstripped and converted to plain text. Prefer markup stripping, and only set this to true if the\noutput is confusing: otherwise you may download a massive amount of data","type":"boolean"}}},"strict":true}},
75
+ {"type":"function","function":{"name":"list","description":"The list tool","parameters":{"type":"object","required":[],"properties":{"dirPath":{"description":"Path to the directory","type":"string"}}},"strict":true}},
76
+ {"type":"function","function":{"name":"prepend","description":"The prepend tool","parameters":{"type":"object","required":["filePath","text"],"properties":{"filePath":{"description":"The path to the file","type":"string"},"text":{"description":"The text to prepend","type":"string"}}},"strict":true}},
77
+ {"type":"function","function":{"name":"read","description":"The read tool","parameters":{"type":"object","required":["filePath"],"properties":{"filePath":{"description":"Path to file to read","type":"string"}}},"strict":true}},
78
+ {"type":"function","function":{"name":"rewrite","description":"The rewrite tool","parameters":{"type":"object","required":["filePath","text"],"properties":{"filePath":{"description":"The path to the file","type":"string"},"text":{"description":"The replaced file contents. This will rewrite and replace the entire file","type":"string"}}},"strict":true}},
79
+ {"type":"function","function":{"name":"shell","description":"The shell tool","parameters":{"type":"object","required":["timeout","cmd"],"properties":{"timeout":{"description":"A timeout for the command, in milliseconds. Be generous. You MUST specify this.","type":"number"},"cmd":{"description":"The command to run","type":"string"}}},"strict":true}}
80
+ ],
81
+ };
@@ -1,8 +1,7 @@
1
- import OpenAI from "openai";
2
1
  import * as assert from "../../source/asserts.ts";
2
+ import { ChatMessage } from "../../source/chat-completion.ts";
3
3
 
4
- export function test(response: OpenAI.ChatCompletion) {
5
- const { tool_calls } = response.choices[0].message;
4
+ export function test({ tool_calls }: ChatMessage) {
6
5
  assert.isNotNullish(tool_calls);
7
6
  assert.isNotEmptyArray(tool_calls);
8
7
  assert.strictEqual(tool_calls.length, 2);
@@ -1,8 +1,7 @@
1
- import { ChatResponse } from "../../source/chat-completion.ts";
1
+ import { ChatMessage } from "../../source/chat-completion.ts";
2
2
  import * as assert from "../../source/asserts.ts";
3
3
 
4
- export function test(response: ChatResponse) {
5
- const { tool_calls } = response.choices[0].message;
4
+ export function test({ content, tool_calls }: ChatMessage) {
6
5
  assert.isNotNullish(tool_calls);
7
6
  assert.isNotEmptyArray(tool_calls);
8
7
  assert.strictEqual(tool_calls.length, 1);
@@ -10,6 +9,8 @@ export function test(response: ChatResponse) {
10
9
  assert.strictEqual(tool_calls[0].function.name, "get_weather");
11
10
  const args = JSON.parse(tool_calls[0].function.arguments);
12
11
  assert.match(args.location.toLowerCase(), /paris/);
12
+ // Assert the tool call didn't leak into the content
13
+ assert.doesNotMatch(content || "", /get_weather/);
13
14
  }
14
15
 
15
16
  export const json = {
@@ -0,0 +1,40 @@
1
+ import { ChatMessage } from "../../source/chat-completion.ts";
2
+ import * as assert from "../../source/asserts.ts";
3
+
4
+ export function test({ content, tool_calls }: ChatMessage) {
5
+ assert.isNotNullish(tool_calls);
6
+ assert.isNotEmptyArray(tool_calls);
7
+ assert.strictEqual(tool_calls.length, 1);
8
+ assert.strictEqual(tool_calls[0].type, "function");
9
+ assert.strictEqual(tool_calls[0].function.name, "get-weather__v1");
10
+ const args = JSON.parse(tool_calls[0].function.arguments);
11
+ assert.match(args.location.toLowerCase(), /paris/);
12
+ // Assert the tool call didn't leak into the content
13
+ assert.doesNotMatch(content || "", /get_weather/);
14
+ }
15
+
16
+ export const json = {
17
+ "messages": [
18
+ {"role": "user", "content": "What's the weather in Paris?"}
19
+ ],
20
+ "tools": [
21
+ {
22
+ "type": "function",
23
+ "function": {
24
+ "name": "get-weather__v1",
25
+ "description": "Get current weather for a location",
26
+ "parameters": {
27
+ "type": "object",
28
+ "properties": {
29
+ "location": {
30
+ "type": "string",
31
+ "description": "City name"
32
+ }
33
+ },
34
+ "required": ["location"]
35
+ }
36
+ }
37
+ }
38
+ ],
39
+ "tool_choice": "auto",
40
+ }
@@ -0,0 +1,46 @@
1
+ import { ChatMessage } from "../../source/chat-completion.ts";
2
+ import * as assert from "../../source/asserts.ts";
3
+
4
+ const PATH = "/development/evals/reasoning/Scratch/reasoning-claude-tool-call.ts";
5
+
6
+ export function test({ tool_calls }: ChatMessage) {
7
+ assert.isNotNullish(tool_calls);
8
+ assert.isNotEmptyArray(tool_calls);
9
+ assert.strictEqual(tool_calls.length, 1);
10
+ assert.strictEqual(tool_calls[0].type, "function");
11
+ assert.strictEqual(tool_calls[0].function.name, "read");
12
+
13
+ const args = JSON.parse(tool_calls[0].function.arguments);
14
+ assert.stringContains(args.filePath, PATH);
15
+ }
16
+
17
+ export const json = {
18
+ "messages": [
19
+ {
20
+ "role": "user",
21
+ "content": "Read and summarize the file /development/evals/reasoning/Scratch/reasoning-claude-tool-call.ts"
22
+ }
23
+ ],
24
+ "tools": [
25
+ {
26
+ "type": "function",
27
+ "function": {
28
+ "name": "read",
29
+ "description": "The read tool",
30
+ "parameters": {
31
+ "type": "object",
32
+ "required": [
33
+ "filePath"
34
+ ],
35
+ "properties": {
36
+ "filePath": {
37
+ "description": "Path to file to read",
38
+ "type": "string"
39
+ }
40
+ }
41
+ },
42
+ "strict": true
43
+ }
44
+ },
45
+ ],
46
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@syntheticlab/synbad",
3
- "version": "0.0.4",
3
+ "version": "0.0.6",
4
4
  "description": "LLM inference provider evals",
5
5
  "main": "dist/source/index.js",
6
6
  "bin": {
@@ -9,7 +9,7 @@
9
9
  "preferGlobal": true,
10
10
  "type": "module",
11
11
  "scripts": {
12
- "test": "echo \"Error: no test specified\" && exit 1",
12
+ "test": "vitest",
13
13
  "build": "tsc",
14
14
  "prepublishOnly": "tsc"
15
15
  },
@@ -23,6 +23,12 @@
23
23
  "package-lock.json",
24
24
  "tsconfig.json"
25
25
  ],
26
+ "exports": {
27
+ ".": {
28
+ "types": "./dist/source/exports.d.ts",
29
+ "import": "./dist/source/exports.js"
30
+ }
31
+ },
26
32
  "dependencies": {
27
33
  "@commander-js/extra-typings": "^14.0.0",
28
34
  "commander": "^14.0.2",
@@ -32,6 +38,7 @@
32
38
  },
33
39
  "devDependencies": {
34
40
  "@types/node": "^24.10.1",
35
- "tsx": "^4.20.6"
41
+ "tsx": "^4.20.6",
42
+ "vitest": "^4.0.17"
36
43
  }
37
44
  }
package/source/asserts.ts CHANGED
@@ -61,7 +61,13 @@ export function isEmptyArray(a: any[]) {
61
61
  });
62
62
  }
63
63
 
64
- export function isNotEmptyArray(a: any[]) {
64
+ export function isNotEmptyArray(a: any[] | undefined) {
65
+ if(a == null) {
66
+ throw new assert.AssertionError({
67
+ message: "Expected a non-empty array",
68
+ actual: a,
69
+ });
70
+ }
65
71
  if(a.length !== 0) return true;
66
72
  throw new assert.AssertionError({
67
73
  message: "Expected a non-empty array",
@@ -76,3 +82,33 @@ export function startsWith(a: string, prefix: string) {
76
82
  actual: a,
77
83
  });
78
84
  }
85
+
86
+ export function gt(num: number, target: number) {
87
+ if(num > target) return true;
88
+ throw new assert.AssertionError({
89
+ message: `Expected ${num} > ${target}`,
90
+ actual: num,
91
+ });
92
+ }
93
+
94
+ export function gte(num: number, target: number) {
95
+ if(num >= target) return true;
96
+ throw new assert.AssertionError({
97
+ message: `Expected ${num} >= ${target}`,
98
+ actual: num,
99
+ });
100
+ }
101
+
102
+ export function stringContains(str: string, expected: string) {
103
+ if(typeof str !== "string") {
104
+ throw new assert.AssertionError({
105
+ message: "Expected input to be of type string.",
106
+ actual: typeof str,
107
+ });
108
+ }
109
+ if(str.includes(expected)) return true;
110
+ throw new assert.AssertionError({
111
+ message: `Expected string to contain: "${expected}"`,
112
+ actual: str,
113
+ });
114
+ }
@@ -10,10 +10,15 @@ export type ChatResponse = OpenAI.ChatCompletion & {
10
10
  message: {
11
11
  reasoning_content?: string,
12
12
  reasoning?: string,
13
+ tool_calls?: Array<{
14
+ index: number,
15
+ }>
13
16
  },
14
17
  }>
15
18
  };
16
19
 
20
+ export type ChatMessage = ChatResponse["choices"][number]["message"];
21
+
17
22
  const TextContentPart = t.subtype({
18
23
  type: t.value("text"),
19
24
  text: t.str,
@@ -56,6 +61,7 @@ const AssistantMessageSchema = t.subtype({
56
61
  name: t.str,
57
62
  })),
58
63
  reasoning_content: t.optional(t.str.or(t.nil)),
64
+ reasoning: t.optional(t.str.or(t.nil)),
59
65
  });
60
66
 
61
67
  const UserMessageSchema = t.subtype({
@@ -0,0 +1,13 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import { getEvals } from "./evals.ts";
3
+
4
+ describe("get-evals", () => {
5
+ it("works", async () => {
6
+ const evals = await getEvals();
7
+ evals.map(({ test, json, name }) => {
8
+ expect(name).toBeTypeOf("string");
9
+ expect(json).toBeTruthy();
10
+ expect(test).toBeTypeOf("function");
11
+ });
12
+ });
13
+ })
@@ -0,0 +1,56 @@
1
+ import fs from "fs/promises";
2
+ import path from "path";
3
+ import { ChatMessage } from "./chat-completion.ts";
4
+
5
+ export type Eval = {
6
+ test: (response: ChatMessage) => any;
7
+ json: any;
8
+ name: string;
9
+ };
10
+
11
+ export async function getEvals(): Promise<Eval[]> {
12
+ const evals: Eval[] = [];
13
+ const evalsPath = path.join(import.meta.dirname, "..", "evals");
14
+
15
+ for await (const testFile of findTestFiles(evalsPath, false)) {
16
+ const { test, json } = await import(testFile);
17
+ evals.push({ test, json, name: evalName(testFile) });
18
+ }
19
+
20
+ return evals;
21
+ }
22
+
23
+ export function evalName(file: string) {
24
+ return `${path.basename(path.dirname(file))}/${path.basename(file).replace(/.js$/, "")}`
25
+ }
26
+
27
+ export async function* findTestFiles(dir: string, skipReasoning: boolean): AsyncGenerator<string> {
28
+ try {
29
+ await fs.stat(dir);
30
+ } catch(e) {
31
+ const pathname = `${dir}.js`;
32
+ const stat = await fs.stat(pathname);
33
+ if(stat.isFile()) {
34
+ yield pathname;
35
+ return;
36
+ }
37
+ throw e;
38
+ }
39
+ const entryNames = await fs.readdir(dir);
40
+ const entries = await Promise.all(entryNames.map(async (entry) => {
41
+ return {
42
+ path: path.join(dir, entry),
43
+ stat: await fs.stat(path.join(dir, entry)),
44
+ };
45
+ }));
46
+ for(const entry of entries) {
47
+ if(entry.stat.isFile() && entry.path.endsWith(".js")) {
48
+ yield entry.path;
49
+ }
50
+ if(entry.stat.isDirectory()) {
51
+ if(skipReasoning && path.basename(entry.path) === "reasoning") continue;
52
+ yield* findTestFiles(entry.path, skipReasoning);
53
+ }
54
+ }
55
+ }
56
+
@@ -0,0 +1,2 @@
1
+ export { getEvals, type Eval as SynbadEval } from "./evals.ts";
2
+ export { ChatMessage as SynbadChatMessage } from "./chat-completion.ts";