@syntheticlab/synbad 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -0
- package/dist/evals/reasoning/multiturn-reasoning-parsing.js +2 -1
- package/dist/evals/reasoning/reasoning-claude-tool-call.d.ts +2 -2
- package/dist/evals/reasoning/reasoning-parsing.js +2 -1
- package/dist/source/chat-completion.d.ts +5 -0
- package/dist/source/chat-completion.js +3 -0
- package/evals/reasoning/multiturn-reasoning-parsing.ts +2 -2
- package/evals/reasoning/reasoning-claude-tool-call.ts +2 -2
- package/evals/reasoning/reasoning-parsing.ts +2 -2
- package/package.json +1 -1
- package/source/chat-completion.ts +5 -0
package/README.md
CHANGED
|
@@ -8,6 +8,33 @@ inference quality as high as possible.
|
|
|
8
8
|
If you find bugs in Synthetic's model hosting, please contribute the bugs here!
|
|
9
9
|
We will fix them.
|
|
10
10
|
|
|
11
|
+
## Results
|
|
12
|
+
|
|
13
|
+
We keep a running tally of provider+model results for GLM-4.6, Kimi K2
|
|
14
|
+
Thinking, and MiniMax M2. Feel free to add more provider results!
|
|
15
|
+
|
|
16
|
+
|Provider |Model |Success Rate|
|
|
17
|
+
|---------|----------------|------------|
|
|
18
|
+
|Synthetic|GLM-4.6 |:white_check_mark: 100%|
|
|
19
|
+
|Synthetic|Kimi K2 Thinking|:white_check_mark: 100%|
|
|
20
|
+
|Synthetic|MiniMax M2 |:white_check_mark: 100%|
|
|
21
|
+
|
|
22
|
+
|Provider |Model |Success Rate|
|
|
23
|
+
|---------|----------------|------------|
|
|
24
|
+
|Fireworks|GLM-4.6 |:white_check_mark: 100%|
|
|
25
|
+
|Fireworks|Kimi K2 Thinking|:x: 86%|
|
|
26
|
+
|Fireworks|MiniMax M2 |:x: 29%|
|
|
27
|
+
|
|
28
|
+
|Provider |Model |Success Rate|
|
|
29
|
+
|---------|----------------|------------|
|
|
30
|
+
|Together |GLM-4.6 |:white_check_mark: 100%|
|
|
31
|
+
|Together |Kimi K2 Thinking|:x: 71%|
|
|
32
|
+
|
|
33
|
+
|Provider |Model |Success Rate|
|
|
34
|
+
|---------|----------------|------------|
|
|
35
|
+
|Parasail |GLM-4.6 |:x: 71%|
|
|
36
|
+
|Parasail |Kimi K2 Thinking|:x: 57%|
|
|
37
|
+
|
|
11
38
|
## Contributing
|
|
12
39
|
|
|
13
40
|
First, clone this repo from Github. Then `cd` into it and run:
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import * as assert from "../../source/asserts.js";
|
|
2
|
+
import { getReasoning } from "../../source/chat-completion.js";
|
|
2
3
|
export function test(response) {
|
|
3
|
-
const reasoning = response.choices[0].message
|
|
4
|
+
const reasoning = getReasoning(response.choices[0].message);
|
|
4
5
|
assert.isNotNullish(reasoning);
|
|
5
6
|
}
|
|
6
7
|
export const json = {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import
|
|
2
|
-
export declare function test(response:
|
|
1
|
+
import { ChatResponse } from "../../source/chat-completion.ts";
|
|
2
|
+
export declare function test(response: ChatResponse): void;
|
|
3
3
|
export declare const json: {
|
|
4
4
|
messages: ({
|
|
5
5
|
role: string;
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import * as assert from "../../source/asserts.js";
|
|
2
|
+
import { getReasoning } from "../../source/chat-completion.js";
|
|
2
3
|
export function test(response) {
|
|
3
|
-
const reasoning = response.choices[0].message
|
|
4
|
+
const reasoning = getReasoning(response.choices[0].message);
|
|
4
5
|
assert.isNotNullish(reasoning);
|
|
5
6
|
}
|
|
6
7
|
export const json = {
|
|
@@ -1,9 +1,14 @@
|
|
|
1
1
|
import { t } from "structural";
|
|
2
2
|
import OpenAI from "openai";
|
|
3
|
+
export declare function getReasoning(msg: {
|
|
4
|
+
reasoning_content?: string;
|
|
5
|
+
reasoning?: string;
|
|
6
|
+
}): string | undefined;
|
|
3
7
|
export type ChatResponse = OpenAI.ChatCompletion & {
|
|
4
8
|
choices: Array<{
|
|
5
9
|
message: {
|
|
6
10
|
reasoning_content?: string;
|
|
11
|
+
reasoning?: string;
|
|
7
12
|
};
|
|
8
13
|
}>;
|
|
9
14
|
};
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import * as assert from "../../source/asserts.ts";
|
|
2
|
-
import { ChatResponse } from "../../source/chat-completion.ts";
|
|
2
|
+
import { ChatResponse, getReasoning } from "../../source/chat-completion.ts";
|
|
3
3
|
|
|
4
4
|
export function test(response: ChatResponse) {
|
|
5
|
-
const reasoning = response.choices[0].message
|
|
5
|
+
const reasoning = getReasoning(response.choices[0].message);
|
|
6
6
|
assert.isNotNullish(reasoning);
|
|
7
7
|
}
|
|
8
8
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import
|
|
1
|
+
import { ChatResponse } from "../../source/chat-completion.ts";
|
|
2
2
|
import * as assert from "../../source/asserts.ts";
|
|
3
3
|
|
|
4
|
-
export function test(response:
|
|
4
|
+
export function test(response: ChatResponse) {
|
|
5
5
|
const { tool_calls } = response.choices[0].message;
|
|
6
6
|
assert.isNotNullish(tool_calls);
|
|
7
7
|
assert.isNotEmptyArray(tool_calls);
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import * as assert from "../../source/asserts.ts";
|
|
2
|
-
import { ChatResponse } from "../../source/chat-completion.ts";
|
|
2
|
+
import { ChatResponse, getReasoning } from "../../source/chat-completion.ts";
|
|
3
3
|
|
|
4
4
|
export function test(response: ChatResponse) {
|
|
5
|
-
const reasoning = response.choices[0].message
|
|
5
|
+
const reasoning = getReasoning(response.choices[0].message);
|
|
6
6
|
assert.isNotNullish(reasoning);
|
|
7
7
|
}
|
|
8
8
|
|
package/package.json
CHANGED
|
@@ -1,10 +1,15 @@
|
|
|
1
1
|
import { t } from "structural";
|
|
2
2
|
import OpenAI from "openai";
|
|
3
3
|
|
|
4
|
+
export function getReasoning(msg: { reasoning_content?: string, reasoning?: string }) {
|
|
5
|
+
return msg.reasoning_content || msg.reasoning;
|
|
6
|
+
}
|
|
7
|
+
|
|
4
8
|
export type ChatResponse = OpenAI.ChatCompletion & {
|
|
5
9
|
choices: Array<{
|
|
6
10
|
message: {
|
|
7
11
|
reasoning_content?: string,
|
|
12
|
+
reasoning?: string,
|
|
8
13
|
},
|
|
9
14
|
}>
|
|
10
15
|
};
|