aiforcecli-chat 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/License.MD +49 -0
  2. package/README.md +642 -0
  3. package/aiforcecli.config.example.json +66 -0
  4. package/assets/README.md +14 -0
  5. package/dist/cli.js +2 -0
  6. package/dist/index.js +2 -0
  7. package/package.json +62 -0
  8. package/tools/scorecard/README.md +92 -0
  9. package/tools/scorecard/config.json +134 -0
  10. package/tools/scorecard/fetch.mjs +335 -0
  11. package/tools/scorecard/generate.mjs +289 -0
  12. package/tools/scorecard/generated/example/invalid-rows.json +1 -0
  13. package/tools/scorecard/generated/example/scorecard-report.md +147 -0
  14. package/tools/scorecard/generated/example/scorecard.compact.json +61 -0
  15. package/tools/scorecard/generated/example/scorecard.json +1492 -0
  16. package/tools/scorecard/generated/example/unmapped-models.json +1492 -0
  17. package/tools/scorecard/generated/raw/aider_polyglot.html +21071 -0
  18. package/tools/scorecard/generated/raw/terminal_bench_2_1.html +2 -0
  19. package/tools/scorecard/generated/scorecard/invalid-rows.json +1 -0
  20. package/tools/scorecard/generated/scorecard/scorecard-report.md +133 -0
  21. package/tools/scorecard/generated/scorecard/scorecard.compact.json +51 -0
  22. package/tools/scorecard/generated/scorecard/scorecard.json +1181 -0
  23. package/tools/scorecard/generated/scorecard/unmapped-models.json +1492 -0
  24. package/tools/scorecard/generated/scorecard-example/invalid-rows.json +1 -0
  25. package/tools/scorecard/generated/scorecard-example/scorecard-report.md +40 -0
  26. package/tools/scorecard/generated/scorecard-example/scorecard.compact.json +22 -0
  27. package/tools/scorecard/generated/scorecard-example/scorecard.json +389 -0
  28. package/tools/scorecard/generated/scorecard-example/unmapped-models.json +1 -0
  29. package/tools/scorecard/generated/scorecard-fetch/raw/aider_polyglot.html +21071 -0
  30. package/tools/scorecard/generated/scorecard-fetch/raw/terminal_bench_2_1.html +2 -0
  31. package/tools/scorecard/snapshots/example.normalized.example.json +38 -0
  32. package/tools/scorecard/snapshots/live.aider_polyglot.json +1318 -0
  33. package/tools/scorecard/snapshots/live.terminal_bench_2_1.json +294 -0
@@ -0,0 +1,294 @@
1
+ {
2
+ "source": "terminal_bench_2_1",
3
+ "fetchedAt": "2026-06-16T21:10:47.903Z",
4
+ "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
5
+ "parser": "terminalBench",
6
+ "rows": [
7
+ {
8
+ "source": "terminal_bench",
9
+ "benchmark": "terminal_bench",
10
+ "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
11
+ "modelRaw": "Codex CLI GPT-5.5",
12
+ "metric": "accuracy",
13
+ "score": 0.8337078651685393,
14
+ "scoreScale": "0-1",
15
+ "date": "2026-05-01",
16
+ "extra": {
17
+ "agent": "Codex CLI",
18
+ "model": [
19
+ "GPT-5.5"
20
+ ],
21
+ "stderr": 0.011123028018664792,
22
+ "verified": true,
23
+ "agentName": "codex",
24
+ "agentVersion": "0.125.0",
25
+ "modelNames": [
26
+ "gpt-5.5"
27
+ ],
28
+ "modelProviders": [
29
+ "openai"
30
+ ]
31
+ }
32
+ },
33
+ {
34
+ "source": "terminal_bench",
35
+ "benchmark": "terminal_bench",
36
+ "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
37
+ "modelRaw": "Claude Code Claude Opus 4.8",
38
+ "metric": "accuracy",
39
+ "score": 0.7887640449438202,
40
+ "scoreScale": "0-1",
41
+ "date": "2026-05-29",
42
+ "extra": {
43
+ "agent": "Claude Code",
44
+ "model": [
45
+ "Claude Opus 4.8"
46
+ ],
47
+ "stderr": 0.012612328270024521,
48
+ "verified": true,
49
+ "agentName": "claude-code",
50
+ "agentVersion": "2.1.152",
51
+ "modelNames": [
52
+ "claude-opus-4-8"
53
+ ],
54
+ "modelProviders": [
55
+ "anthropic"
56
+ ]
57
+ }
58
+ },
59
+ {
60
+ "source": "terminal_bench",
61
+ "benchmark": "terminal_bench",
62
+ "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
63
+ "modelRaw": "Terminus 2 GPT-5.5",
64
+ "metric": "accuracy",
65
+ "score": 0.7820224719101123,
66
+ "scoreScale": "0-1",
67
+ "date": "2026-05-01",
68
+ "extra": {
69
+ "agent": "Terminus 2",
70
+ "model": [
71
+ "GPT-5.5"
72
+ ],
73
+ "stderr": 0.011996717137113833,
74
+ "verified": true,
75
+ "agentName": "terminus-2",
76
+ "agentVersion": "2.0.0",
77
+ "modelNames": [
78
+ "gpt-5.5"
79
+ ],
80
+ "modelProviders": [
81
+ "openai"
82
+ ]
83
+ }
84
+ },
85
+ {
86
+ "source": "terminal_bench",
87
+ "benchmark": "terminal_bench",
88
+ "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
89
+ "modelRaw": "Terminus 2 Claude Opus 4.8",
90
+ "metric": "accuracy",
91
+ "score": 0.7460674157303371,
92
+ "scoreScale": "0-1",
93
+ "date": "2026-05-29",
94
+ "extra": {
95
+ "agent": "Terminus 2",
96
+ "model": [
97
+ "Claude Opus 4.8"
98
+ ],
99
+ "stderr": 0.012308372078767778,
100
+ "verified": true,
101
+ "agentName": "terminus-2",
102
+ "agentVersion": "2.0.0",
103
+ "modelNames": [
104
+ "claude-opus-4-8"
105
+ ],
106
+ "modelProviders": [
107
+ "anthropic"
108
+ ]
109
+ }
110
+ },
111
+ {
112
+ "source": "terminal_bench",
113
+ "benchmark": "terminal_bench",
114
+ "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
115
+ "modelRaw": "Terminus 2 Gemini 3 Pro",
116
+ "metric": "accuracy",
117
+ "score": 0.7438202247191011,
118
+ "scoreScale": "0-1",
119
+ "date": "2026-05-01",
120
+ "extra": {
121
+ "agent": "Terminus 2",
122
+ "model": [
123
+ "Gemini 3 Pro"
124
+ ],
125
+ "stderr": 0.013199258566821045,
126
+ "verified": true,
127
+ "agentName": "terminus-2",
128
+ "agentVersion": "2.0.0",
129
+ "modelNames": [
130
+ "gemini-3-pro-preview"
131
+ ],
132
+ "modelProviders": [
133
+ "gemini"
134
+ ]
135
+ }
136
+ },
137
+ {
138
+ "source": "terminal_bench",
139
+ "benchmark": "terminal_bench",
140
+ "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
141
+ "modelRaw": "Gemini CLI Gemini 3.1 Pro",
142
+ "metric": "accuracy",
143
+ "score": 0.7065543071161049,
144
+ "scoreScale": "0-1",
145
+ "date": "2026-05-05",
146
+ "extra": {
147
+ "agent": "Gemini CLI",
148
+ "model": [
149
+ "Gemini 3.1 Pro"
150
+ ],
151
+ "stderr": 0.014843703568740315,
152
+ "verified": true,
153
+ "agentName": "gemini-cli",
154
+ "agentVersion": "0.40.0",
155
+ "modelNames": [
156
+ "gemini-3.1-pro-preview"
157
+ ],
158
+ "modelProviders": [
159
+ "gemini"
160
+ ]
161
+ }
162
+ },
163
+ {
164
+ "source": "terminal_bench",
165
+ "benchmark": "terminal_bench",
166
+ "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
167
+ "modelRaw": "Terminus 2 Gemini 3.1 Pro",
168
+ "metric": "accuracy",
169
+ "score": 0.7031835205992509,
170
+ "scoreScale": "0-1",
171
+ "date": "2026-05-05",
172
+ "extra": {
173
+ "agent": "Terminus 2",
174
+ "model": [
175
+ "Gemini 3.1 Pro"
176
+ ],
177
+ "stderr": 0.014791636846043224,
178
+ "verified": true,
179
+ "agentName": "terminus-2",
180
+ "agentVersion": "2.0.0",
181
+ "modelNames": [
182
+ "gemini-3.1-pro-preview"
183
+ ],
184
+ "modelProviders": [
185
+ "gemini"
186
+ ]
187
+ }
188
+ },
189
+ {
190
+ "source": "terminal_bench",
191
+ "benchmark": "terminal_bench",
192
+ "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
193
+ "modelRaw": "Claude Code Claude Opus 4.7",
194
+ "metric": "accuracy",
195
+ "score": 0.6971910112359551,
196
+ "scoreScale": "0-1",
197
+ "date": "2026-05-01",
198
+ "extra": {
199
+ "agent": "Claude Code",
200
+ "model": [
201
+ "Claude Opus 4.7"
202
+ ],
203
+ "stderr": 0.013864003010396704,
204
+ "verified": true,
205
+ "agentName": "claude-code",
206
+ "agentVersion": "2.1.123",
207
+ "modelNames": [
208
+ "claude-opus-4-7"
209
+ ],
210
+ "modelProviders": [
211
+ "anthropic"
212
+ ]
213
+ }
214
+ },
215
+ {
216
+ "source": "terminal_bench",
217
+ "benchmark": "terminal_bench",
218
+ "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
219
+ "modelRaw": "Gemini CLI Gemini 3 Pro",
220
+ "metric": "accuracy",
221
+ "score": 0.6629213483146067,
222
+ "scoreScale": "0-1",
223
+ "date": "2026-05-02",
224
+ "extra": {
225
+ "agent": "Gemini CLI",
226
+ "model": [
227
+ "Gemini 3 Pro"
228
+ ],
229
+ "stderr": 0.013669129281569032,
230
+ "verified": true,
231
+ "agentName": "gemini-cli",
232
+ "agentVersion": "0.40.0",
233
+ "modelNames": [
234
+ "gemini-3-pro-preview"
235
+ ],
236
+ "modelProviders": [
237
+ "gemini"
238
+ ]
239
+ }
240
+ },
241
+ {
242
+ "source": "terminal_bench",
243
+ "benchmark": "terminal_bench",
244
+ "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
245
+ "modelRaw": "Terminus 2 Claude Opus 4.7",
246
+ "metric": "accuracy",
247
+ "score": 0.6606741573033708,
248
+ "scoreScale": "0-1",
249
+ "date": "2026-05-01",
250
+ "extra": {
251
+ "agent": "Terminus 2",
252
+ "model": [
253
+ "Claude Opus 4.7"
254
+ ],
255
+ "stderr": 0.013669129281569032,
256
+ "verified": true,
257
+ "agentName": "terminus-2",
258
+ "agentVersion": "2.0.0",
259
+ "modelNames": [
260
+ "claude-opus-4-7"
261
+ ],
262
+ "modelProviders": [
263
+ "anthropic"
264
+ ]
265
+ }
266
+ },
267
+ {
268
+ "source": "terminal_bench",
269
+ "benchmark": "terminal_bench",
270
+ "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.1",
271
+ "modelRaw": "Claude Code GLM 5.1",
272
+ "metric": "accuracy",
273
+ "score": 0.5865168539325842,
274
+ "scoreScale": "0-1",
275
+ "date": "2026-05-02",
276
+ "extra": {
277
+ "agent": "Claude Code",
278
+ "model": [
279
+ "GLM 5.1"
280
+ ],
281
+ "stderr": 0.012410517996839619,
282
+ "verified": true,
283
+ "agentName": "claude-code",
284
+ "agentVersion": "2.1.123",
285
+ "modelNames": [
286
+ "glm-5.1"
287
+ ],
288
+ "modelProviders": [
289
+ "z-ai"
290
+ ]
291
+ }
292
+ }
293
+ ]
294
+ }