@botbotgo/better-call 0.1.12 → 0.1.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -17
- package/benchmarks/bfcl-real-remote-completed-summary.json +119 -1
- package/docs/banner.svg +2 -5
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
# BetterCall
|
|
6
6
|
|
|
7
|
-
**One-line wrapper.
|
|
7
|
+
**One-line wrapper. Eight full BFCL remote runs completed. Best: 73.4% → 83.8%.**
|
|
8
8
|
|
|
9
9
|
```ts
|
|
10
10
|
const tools = betterTools([searchTool, calculatorTool]);
|
|
@@ -142,6 +142,9 @@ qwen3.5:2b
|
|
|
142
142
|
lfm2.5-thinking:latest
|
|
143
143
|
Raw 50.8% | ####################....................
|
|
144
144
|
BetterCall 54.8% | ######################..................
|
|
145
|
+
qwen3.5:4b
|
|
146
|
+
Raw 43.6% | #################.......................
|
|
147
|
+
BetterCall 43.4% | #################.......................
|
|
145
148
|
gemma4:e2b
|
|
146
149
|
Raw 24.3% | ##########..............................
|
|
147
150
|
BetterCall 24.7% | ##########..............................
|
|
@@ -155,26 +158,27 @@ gemma4:e2b
|
|
|
155
158
|
| 4 | `qwen3.5:0.8b` | 3,625 | 54.6% | 56.9% | +2.3pp | 901 |
|
|
156
159
|
| 5 | `qwen3.5:2b` | 3,625 | 53.9% | 54.9% | +1.0pp | 1,308 |
|
|
157
160
|
| 6 | `lfm2.5-thinking:latest` | 3,625 | 50.8% | 54.8% | +4.0pp | 1,142 |
|
|
158
|
-
| 7 | `
|
|
161
|
+
| 7 | `qwen3.5:4b` | 3,625 | 43.6% | 43.4% | -0.2pp | 1,847 |
|
|
162
|
+
| 8 | `gemma4:e2b` | 3,625 | 24.3% | 24.7% | +0.4pp | 2,641 |
|
|
159
163
|
|
|
160
|
-
Latest completed model category detail: `
|
|
164
|
+
Latest completed model category detail: `qwen3.5:4b`.
|
|
161
165
|
|
|
162
166
|
| Category | Cases | Raw | BetterCall repair | Lift | Request errors |
|
|
163
167
|
| --- | ---: | ---: | ---: | ---: | ---: |
|
|
164
|
-
| `simple_python` | 400 |
|
|
165
|
-
| `simple_java` | 100 |
|
|
166
|
-
| `simple_javascript` | 50 |
|
|
167
|
-
| `multiple` | 200 |
|
|
168
|
-
| `parallel` | 200 |
|
|
169
|
-
| `parallel_multiple` | 200 |
|
|
170
|
-
| `irrelevance` | 240 |
|
|
171
|
-
| `live_simple` | 258 |
|
|
172
|
-
| `live_multiple` | 1,053 |
|
|
173
|
-
| `live_parallel` | 16 |
|
|
174
|
-
| `live_parallel_multiple` | 24 |
|
|
175
|
-
| `live_irrelevance` | 884 |
|
|
176
|
-
|
|
177
|
-
|
|
168
|
+
| `simple_python` | 400 | 81.3% | 81.3% | +0.0pp | 54 |
|
|
169
|
+
| `simple_java` | 100 | 56.0% | 56.0% | +0.0pp | 32 |
|
|
170
|
+
| `simple_javascript` | 50 | 48.0% | 48.0% | +0.0pp | 18 |
|
|
171
|
+
| `multiple` | 200 | 83.5% | 83.5% | +0.0pp | 20 |
|
|
172
|
+
| `parallel` | 200 | 70.0% | 70.0% | +0.0pp | 45 |
|
|
173
|
+
| `parallel_multiple` | 200 | 47.0% | 47.0% | +0.0pp | 96 |
|
|
174
|
+
| `irrelevance` | 240 | 68.8% | 68.8% | +0.0pp | 75 |
|
|
175
|
+
| `live_simple` | 258 | 66.7% | 66.3% | -0.4pp | 45 |
|
|
176
|
+
| `live_multiple` | 1,053 | 41.6% | 41.0% | -0.6pp | 538 |
|
|
177
|
+
| `live_parallel` | 16 | 0.0% | 0.0% | +0.0pp | 16 |
|
|
178
|
+
| `live_parallel_multiple` | 24 | 0.0% | 0.0% | +0.0pp | 24 |
|
|
179
|
+
| `live_irrelevance` | 884 | 0.0% | 0.0% | +0.0pp | 884 |
|
|
180
|
+
|
|
181
|
+
This `qwen3.5:4b` run hit sustained remote request failures in the live categories; those failures are counted as incorrect by the benchmark.
|
|
178
182
|
|
|
179
183
|
Historical targeted wrapper benchmark:
|
|
180
184
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "BFCL v4 full remote Ollama runs",
|
|
3
3
|
"source": "Real model calls against all supported BFCL v4 single-turn tool-call categories completed in this repository",
|
|
4
4
|
"note": "Endpoint redacted. Scores count request errors/timeouts as incorrect. This is not an official BFCL leaderboard submission.",
|
|
5
|
-
"generatedAt": "2026-05-
|
|
5
|
+
"generatedAt": "2026-05-09T01:31:12.909Z",
|
|
6
6
|
"results": [
|
|
7
7
|
{
|
|
8
8
|
"model": "granite4.1:3b",
|
|
@@ -712,6 +712,124 @@
|
|
|
712
712
|
}
|
|
713
713
|
]
|
|
714
714
|
},
|
|
715
|
+
{
|
|
716
|
+
"model": "qwen3.5:4b",
|
|
717
|
+
"total": 3625,
|
|
718
|
+
"rawCorrect": 1581,
|
|
719
|
+
"betterCorrect": 1574,
|
|
720
|
+
"errors": 1847,
|
|
721
|
+
"repaired": 0,
|
|
722
|
+
"categories": [
|
|
723
|
+
{
|
|
724
|
+
"model": "qwen3.5:4b",
|
|
725
|
+
"category": "simple_python",
|
|
726
|
+
"total": 400,
|
|
727
|
+
"rawCorrect": 325,
|
|
728
|
+
"betterCorrect": 325,
|
|
729
|
+
"errors": 54,
|
|
730
|
+
"repaired": 0
|
|
731
|
+
},
|
|
732
|
+
{
|
|
733
|
+
"model": "qwen3.5:4b",
|
|
734
|
+
"category": "simple_java",
|
|
735
|
+
"total": 100,
|
|
736
|
+
"rawCorrect": 56,
|
|
737
|
+
"betterCorrect": 56,
|
|
738
|
+
"errors": 32,
|
|
739
|
+
"repaired": 0
|
|
740
|
+
},
|
|
741
|
+
{
|
|
742
|
+
"model": "qwen3.5:4b",
|
|
743
|
+
"category": "simple_javascript",
|
|
744
|
+
"total": 50,
|
|
745
|
+
"rawCorrect": 24,
|
|
746
|
+
"betterCorrect": 24,
|
|
747
|
+
"errors": 18,
|
|
748
|
+
"repaired": 0
|
|
749
|
+
},
|
|
750
|
+
{
|
|
751
|
+
"model": "qwen3.5:4b",
|
|
752
|
+
"category": "multiple",
|
|
753
|
+
"total": 200,
|
|
754
|
+
"rawCorrect": 167,
|
|
755
|
+
"betterCorrect": 167,
|
|
756
|
+
"errors": 20,
|
|
757
|
+
"repaired": 0
|
|
758
|
+
},
|
|
759
|
+
{
|
|
760
|
+
"model": "qwen3.5:4b",
|
|
761
|
+
"category": "parallel",
|
|
762
|
+
"total": 200,
|
|
763
|
+
"rawCorrect": 140,
|
|
764
|
+
"betterCorrect": 140,
|
|
765
|
+
"errors": 45,
|
|
766
|
+
"repaired": 0
|
|
767
|
+
},
|
|
768
|
+
{
|
|
769
|
+
"model": "qwen3.5:4b",
|
|
770
|
+
"category": "parallel_multiple",
|
|
771
|
+
"total": 200,
|
|
772
|
+
"rawCorrect": 94,
|
|
773
|
+
"betterCorrect": 94,
|
|
774
|
+
"errors": 96,
|
|
775
|
+
"repaired": 0
|
|
776
|
+
},
|
|
777
|
+
{
|
|
778
|
+
"model": "qwen3.5:4b",
|
|
779
|
+
"category": "irrelevance",
|
|
780
|
+
"total": 240,
|
|
781
|
+
"rawCorrect": 165,
|
|
782
|
+
"betterCorrect": 165,
|
|
783
|
+
"errors": 75,
|
|
784
|
+
"repaired": 0
|
|
785
|
+
},
|
|
786
|
+
{
|
|
787
|
+
"model": "qwen3.5:4b",
|
|
788
|
+
"category": "live_simple",
|
|
789
|
+
"total": 258,
|
|
790
|
+
"rawCorrect": 172,
|
|
791
|
+
"betterCorrect": 171,
|
|
792
|
+
"errors": 45,
|
|
793
|
+
"repaired": 0
|
|
794
|
+
},
|
|
795
|
+
{
|
|
796
|
+
"model": "qwen3.5:4b",
|
|
797
|
+
"category": "live_multiple",
|
|
798
|
+
"total": 1053,
|
|
799
|
+
"rawCorrect": 438,
|
|
800
|
+
"betterCorrect": 432,
|
|
801
|
+
"errors": 538,
|
|
802
|
+
"repaired": 0
|
|
803
|
+
},
|
|
804
|
+
{
|
|
805
|
+
"model": "qwen3.5:4b",
|
|
806
|
+
"category": "live_parallel",
|
|
807
|
+
"total": 16,
|
|
808
|
+
"rawCorrect": 0,
|
|
809
|
+
"betterCorrect": 0,
|
|
810
|
+
"errors": 16,
|
|
811
|
+
"repaired": 0
|
|
812
|
+
},
|
|
813
|
+
{
|
|
814
|
+
"model": "qwen3.5:4b",
|
|
815
|
+
"category": "live_parallel_multiple",
|
|
816
|
+
"total": 24,
|
|
817
|
+
"rawCorrect": 0,
|
|
818
|
+
"betterCorrect": 0,
|
|
819
|
+
"errors": 24,
|
|
820
|
+
"repaired": 0
|
|
821
|
+
},
|
|
822
|
+
{
|
|
823
|
+
"model": "qwen3.5:4b",
|
|
824
|
+
"category": "live_irrelevance",
|
|
825
|
+
"total": 884,
|
|
826
|
+
"rawCorrect": 0,
|
|
827
|
+
"betterCorrect": 0,
|
|
828
|
+
"errors": 884,
|
|
829
|
+
"repaired": 0
|
|
830
|
+
}
|
|
831
|
+
]
|
|
832
|
+
},
|
|
715
833
|
{
|
|
716
834
|
"model": "gemma4:e2b",
|
|
717
835
|
"total": 3625,
|
package/docs/banner.svg
CHANGED
|
@@ -28,9 +28,6 @@
|
|
|
28
28
|
<feMergeNode in="SourceGraphic"/>
|
|
29
29
|
</feMerge>
|
|
30
30
|
</filter>
|
|
31
|
-
<marker id="growth-arrow" viewBox="0 0 16 16" refX="13" refY="8" markerWidth="18" markerHeight="18" orient="auto">
|
|
32
|
-
<path d="M1 1 L15 8 L1 15 Z" fill="#35ff87"/>
|
|
33
|
-
</marker>
|
|
34
31
|
</defs>
|
|
35
32
|
|
|
36
33
|
<rect width="1400" height="520" fill="url(#background)"/>
|
|
@@ -58,8 +55,8 @@
|
|
|
58
55
|
<rect x="0" y="0" width="870" height="520" fill="url(#left-fade)"/>
|
|
59
56
|
|
|
60
57
|
<path d="M858 326 C 928 302, 992 312, 1060 292 S 1168 276, 1268 238" fill="none" stroke="#67d4ff" stroke-width="8" stroke-linecap="round" stroke-linejoin="round" opacity="0.52"/>
|
|
61
|
-
<path d="M858 246 C 926 206, 994 214, 1060 182 S 1168 154,
|
|
62
|
-
<polygon points="
|
|
58
|
+
<path d="M858 246 C 926 206, 994 214, 1060 182 S 1168 154, 1288 72" fill="none" stroke="#35ff87" stroke-width="15" stroke-linecap="round" stroke-linejoin="round" opacity="0.92"/>
|
|
59
|
+
<polygon points="1270,58 1312,44 1292,90" fill="#35ff87" opacity="0.92"/>
|
|
63
60
|
|
|
64
61
|
<g transform="translate(104 166)">
|
|
65
62
|
<text x="0" y="0" font-family="Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, Segoe UI, sans-serif" font-size="90" font-weight="850" fill="#f8fbff">BetterCall</text>
|