qualspec 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.qualspec_cassettes/comparison_test.yml +439 -0
- data/.qualspec_cassettes/quick_test.yml +232 -0
- data/.rspec +3 -0
- data/.rubocop.yml +1 -0
- data/.rubocop_todo.yml +70 -0
- data/CHANGELOG.md +16 -0
- data/README.md +84 -0
- data/Rakefile +8 -0
- data/docs/configuration.md +132 -0
- data/docs/evaluation-suites.md +180 -0
- data/docs/getting-started.md +102 -0
- data/docs/recording.md +196 -0
- data/docs/rspec-integration.md +233 -0
- data/docs/rubrics.md +174 -0
- data/examples/cassettes/qualspec_rspec_integration_basic_evaluation_evaluates_responses_with_inline_criteria.yml +65 -0
- data/examples/cassettes/qualspec_rspec_integration_basic_evaluation_provides_detailed_feedback_on_failure.yml +64 -0
- data/examples/cassettes/qualspec_rspec_integration_comparative_evaluation_compares_multiple_responses.yml +74 -0
- data/examples/cassettes/qualspec_rspec_integration_score_matchers_supports_score_comparisons.yml +65 -0
- data/examples/cassettes/qualspec_rspec_integration_vcr_integration_records_and_plays_back_api_calls_automatically.yml +65 -0
- data/examples/cassettes/qualspec_rspec_integration_with_context_uses_context_in_evaluation.yml +67 -0
- data/examples/cassettes/qualspec_rspec_integration_with_rubrics_evaluates_using_builtin_rubrics.yml +67 -0
- data/examples/comparison.rb +22 -0
- data/examples/model_comparison.rb +38 -0
- data/examples/persona_test.rb +49 -0
- data/examples/quick_test.rb +28 -0
- data/examples/report.html +399 -0
- data/examples/rspec_example_spec.rb +153 -0
- data/exe/qualspec +142 -0
- data/lib/qualspec/builtin_rubrics.rb +83 -0
- data/lib/qualspec/client.rb +127 -0
- data/lib/qualspec/configuration.rb +32 -0
- data/lib/qualspec/evaluation.rb +52 -0
- data/lib/qualspec/judge.rb +217 -0
- data/lib/qualspec/recorder.rb +55 -0
- data/lib/qualspec/rspec/configuration.rb +49 -0
- data/lib/qualspec/rspec/evaluation_result.rb +142 -0
- data/lib/qualspec/rspec/helpers.rb +155 -0
- data/lib/qualspec/rspec/matchers.rb +163 -0
- data/lib/qualspec/rspec.rb +66 -0
- data/lib/qualspec/rubric.rb +43 -0
- data/lib/qualspec/suite/behavior.rb +43 -0
- data/lib/qualspec/suite/builtin_behaviors.rb +84 -0
- data/lib/qualspec/suite/candidate.rb +30 -0
- data/lib/qualspec/suite/dsl.rb +64 -0
- data/lib/qualspec/suite/html_reporter.rb +673 -0
- data/lib/qualspec/suite/reporter.rb +274 -0
- data/lib/qualspec/suite/runner.rb +261 -0
- data/lib/qualspec/suite/scenario.rb +57 -0
- data/lib/qualspec/version.rb +5 -0
- data/lib/qualspec.rb +103 -0
- data/sig/qualspec.rbs +4 -0
- metadata +142 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 282d6bd28dd788cbd3cc9abc7a7c521ceb29d6f5589ba30b210680ff9aa82151
|
|
4
|
+
data.tar.gz: 24f9e2395ea3bb2b79a7ca93ed0b0cdee974220304e8a82214cbbf7aeabe0727
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: e1fb1878f58f112e082611e29ed5b064eaa866a801c79cf8385919b73c7e616ede3871fdc11b8fc650ca2e522c075c1c9b4c2d9ce8f9cdbf8fb237b3e2d4505c
|
|
7
|
+
data.tar.gz: 2cb94b86bda191b0261de74185384afa2728cb94638ad33d63b3c8d7bb73fa0ff6853ba9f4f54917d5cc8768feff5f3e082672700f137fd1a3da8ded9ae9e7a6
|
|
@@ -0,0 +1,439 @@
|
|
|
1
|
+
---
|
|
2
|
+
http_interactions:
|
|
3
|
+
- request:
|
|
4
|
+
method: post
|
|
5
|
+
uri: https://openrouter.ai/api/v1/chat/completions
|
|
6
|
+
body:
|
|
7
|
+
encoding: UTF-8
|
|
8
|
+
string: '{"model":"google/gemini-2.0-flash-001","messages":[{"role":"user","content":"Say
|
|
9
|
+
hello in a friendly way!"}]}'
|
|
10
|
+
headers:
|
|
11
|
+
Content-Type:
|
|
12
|
+
- application/json
|
|
13
|
+
Authorization:
|
|
14
|
+
- Bearer <API_KEY>
|
|
15
|
+
User-Agent:
|
|
16
|
+
- Faraday v2.14.0
|
|
17
|
+
response:
|
|
18
|
+
status:
|
|
19
|
+
code: 200
|
|
20
|
+
message: OK
|
|
21
|
+
headers:
|
|
22
|
+
date:
|
|
23
|
+
- Thu, 25 Dec 2025 01:32:54 GMT
|
|
24
|
+
content-type:
|
|
25
|
+
- application/json
|
|
26
|
+
transfer-encoding:
|
|
27
|
+
- chunked
|
|
28
|
+
connection:
|
|
29
|
+
- keep-alive
|
|
30
|
+
content-encoding:
|
|
31
|
+
- gzip
|
|
32
|
+
access-control-allow-origin:
|
|
33
|
+
- "*"
|
|
34
|
+
vary:
|
|
35
|
+
- Accept-Encoding
|
|
36
|
+
permissions-policy:
|
|
37
|
+
- payment=(self "https://checkout.stripe.com" "https://connect-js.stripe.com"
|
|
38
|
+
"https://js.stripe.com" "https://*.js.stripe.com" "https://hooks.stripe.com")
|
|
39
|
+
referrer-policy:
|
|
40
|
+
- no-referrer, strict-origin-when-cross-origin
|
|
41
|
+
x-content-type-options:
|
|
42
|
+
- nosniff
|
|
43
|
+
server:
|
|
44
|
+
- cloudflare
|
|
45
|
+
cf-ray:
|
|
46
|
+
- 9b349653a95eace2-MSP
|
|
47
|
+
body:
|
|
48
|
+
encoding: ASCII-8BIT
|
|
49
|
+
string: !binary |-
|
|
50
|
+
CiAgICAgICAgIAp7ImlkIjoiZ2VuLTE3NjY2MjYzNzMtU09KdTA5TjJkSDN4d3BFSWZsRlIiLCJwcm92aWRlciI6Ikdvb2dsZSBBSSBTdHVkaW8iLCJtb2RlbCI6Imdvb2dsZS9nZW1pbmktMi4wLWZsYXNoLTAwMSIsIm9iamVjdCI6ImNoYXQuY29tcGxldGlvbiIsImNyZWF0ZWQiOjE3NjY2MjYzNzMsImNob2ljZXMiOlt7ImxvZ3Byb2JzIjpudWxsLCJmaW5pc2hfcmVhc29uIjoic3RvcCIsIm5hdGl2ZV9maW5pc2hfcmVhc29uIjoiU1RPUCIsImluZGV4IjowLCJtZXNzYWdlIjp7InJvbGUiOiJhc3Npc3RhbnQiLCJjb250ZW50IjoiSGV5IHRoZXJlISBIb3cncyBpdCBnb2luZz8g8J+YilxuIiwicmVmdXNhbCI6bnVsbCwicmVhc29uaW5nIjpudWxsfX1dLCJ1c2FnZSI6eyJwcm9tcHRfdG9rZW5zIjo3LCJjb21wbGV0aW9uX3Rva2VucyI6MTEsInRvdGFsX3Rva2VucyI6MTgsImNvc3QiOjAuMDAwMDA1MSwiaXNfYnlvayI6ZmFsc2UsInByb21wdF90b2tlbnNfZGV0YWlscyI6eyJjYWNoZWRfdG9rZW5zIjowLCJhdWRpb190b2tlbnMiOjAsInZpZGVvX3Rva2VucyI6MH0sImNvc3RfZGV0YWlscyI6eyJ1cHN0cmVhbV9pbmZlcmVuY2VfY29zdCI6bnVsbCwidXBzdHJlYW1faW5mZXJlbmNlX3Byb21wdF9jb3N0Ijo3ZS03LCJ1cHN0cmVhbV9pbmZlcmVuY2VfY29tcGxldGlvbnNfY29zdCI6MC4wMDAwMDQ0fSwiY29tcGxldGlvbl90b2tlbnNfZGV0YWlscyI6eyJyZWFzb25pbmdfdG9rZW5zIjowLCJpbWFnZV90b2tlbnMiOjB9fX0=
|
|
51
|
+
recorded_at: Thu, 25 Dec 2025 01:32:54 GMT
|
|
52
|
+
- request:
|
|
53
|
+
method: post
|
|
54
|
+
uri: https://openrouter.ai/api/v1/chat/completions
|
|
55
|
+
body:
|
|
56
|
+
encoding: UTF-8
|
|
57
|
+
string: '{"model":"google/gemini-2.5-pro-preview","messages":[{"role":"user","content":"Say
|
|
58
|
+
hello in a friendly way!"}]}'
|
|
59
|
+
headers:
|
|
60
|
+
Content-Type:
|
|
61
|
+
- application/json
|
|
62
|
+
Authorization:
|
|
63
|
+
- Bearer <API_KEY>
|
|
64
|
+
User-Agent:
|
|
65
|
+
- Faraday v2.14.0
|
|
66
|
+
response:
|
|
67
|
+
status:
|
|
68
|
+
code: 200
|
|
69
|
+
message: OK
|
|
70
|
+
headers:
|
|
71
|
+
date:
|
|
72
|
+
- Thu, 25 Dec 2025 01:32:56 GMT
|
|
73
|
+
content-type:
|
|
74
|
+
- application/json
|
|
75
|
+
transfer-encoding:
|
|
76
|
+
- chunked
|
|
77
|
+
connection:
|
|
78
|
+
- keep-alive
|
|
79
|
+
content-encoding:
|
|
80
|
+
- gzip
|
|
81
|
+
access-control-allow-origin:
|
|
82
|
+
- "*"
|
|
83
|
+
vary:
|
|
84
|
+
- Accept-Encoding
|
|
85
|
+
permissions-policy:
|
|
86
|
+
- payment=(self "https://checkout.stripe.com" "https://connect-js.stripe.com"
|
|
87
|
+
"https://js.stripe.com" "https://*.js.stripe.com" "https://hooks.stripe.com")
|
|
88
|
+
referrer-policy:
|
|
89
|
+
- no-referrer, strict-origin-when-cross-origin
|
|
90
|
+
x-content-type-options:
|
|
91
|
+
- nosniff
|
|
92
|
+
server:
|
|
93
|
+
- cloudflare
|
|
94
|
+
cf-ray:
|
|
95
|
+
- 9b3496577dafa1d9-MSP
|
|
96
|
+
body:
|
|
97
|
+
encoding: ASCII-8BIT
|
|
98
|
+
string: !binary |-
|
|
99
|
+
CiAgICAgICAgIAoKICAgICAgICAgCgogICAgICAgICAKCiAgICAgICAgIAoKICAgICAgICAgCgogICAgICAgICAKCiAgICAgICAgIAoKICAgICAgICAgCgogICAgICAgICAKCiAgICAgICAgIAoKICAgICAgICAgCgogICAgICAgICAKCiAgICAgICAgIAoKICAgICAgICAgCgogICAgICAgICAKCiAgICAgICAgIAoKICAgICAgICAgCgogICAgICAgICAKCiAgICAgICAgIAoKICAgICAgICAgCgogICAgICAgICAKCiAgICAgICAgIAoKICAgICAgICAgCgogICAgICAgICAKCiAgICAgICAgIAoKICAgICAgICAgCgogICAgICAgICAKCiAgICAgICAgIAp7ImlkIjoiZ2VuLTE3NjY2MjYzNzQtem1KdVk4cDNscHcza2FKMG5Td1UiLCJwcm92aWRlciI6Ikdvb2dsZSIsIm1vZGVsIjoiZ29vZ2xlL2dlbWluaS0yLjUtcHJvIiwib2JqZWN0IjoiY2hhdC5jb21wbGV0aW9uIiwiY3JlYXRlZCI6MTc2NjYyNjM3NCwiY2hvaWNlcyI6W3sibG9ncHJvYnMiOm51bGwsImZpbmlzaF9yZWFzb24iOiJzdG9wIiwibmF0aXZlX2ZpbmlzaF9yZWFzb24iOiJTVE9QIiwiaW5kZXgiOjAsIm1lc3NhZ2UiOnsicm9sZSI6ImFzc2lzdGFudCIsImNvbnRlbnQiOiJXZWxsLCBoZWxsbyB0aGVyZSEg8J+Ri1xuXG5JdCdzIGdyZWF0IHRvIGhlYXIgZnJvbSB5b3UuIEkgaG9wZSB5b3UncmUgaGF2aW5nIGEgZmFudGFzdGljIGRheSFcblxuV2hhdCBjYW4gSSBoZWxwIHlvdSB3aXRoPyIsInJlZnVzYWwiOm51bGwsInJlYXNvbmluZyI6IioqQ3JhZnRpbmcgYSBXYXJtIEdyZWV0aW5nKipcblxuSSd2ZSBob21lZCBpbiBvbiB0aGUgY29yZSBvZiB0aGUgcHJvbXB0OiBmcmllbmRseS4gSSdtIGV4cGxvcmluZyB3YXlzIHRvIGluZnVzZSB0aGUgZ3JlZXRpbmcgd2l0aCB3YXJtdGgsIGF2b2lkaW5nIGEgcm9ib3RpYyB0b25lLiBDb25zaWRlcmluZyBwaHJhc2VzIHRoYXQgY29udmV5IGdlbnVpbmUgYXBwcm9hY2hhYmlsaXR5IGlzIGtleS4gVGhlIGdvYWwgaXMgdG8gbWFrZSB0aGUgdXNlciBmZWVsIGFja25vd2xlZGdlZCBhbmQgd2VsY29tZSwgYmV5b25kIGEgYmFzaWMgc2FsdXRhdGlvbi5cblxuXG4qKlByaW9yaXRpemluZyBVc2VyIE5lZWRzKipcblxuSSdtIG5vdyB6ZXJvaW5nIGluIG9uIHRoZSBwcmFjdGljYWwgYXNwZWN0OiBpbnRlZ3JhdGluZyB0aGUgdXNlcidzIG5lZWQgd2l0aCBhIHdhcm0gd2VsY29tZS4gVGhlIGZvY3VzIGlzIG9uIG9mZmVyaW5nIGltbWVkaWF0ZSBhc3Npc3RhbmNlLiBJJ20gd2VpZ2hpbmcgdGhlIGJhbGFuY2UgYmV0d2VlbiBmcmllbmRsaW5lc3MgYW5kIGRpcmVjdG5lc3MsIGVuc3VyaW5nIHRoZSBncmVldGluZyBpcyBib3RoIHdlbGNvbWluZyBhbmQgc2V0cyB0aGUgc3RhZ2UgZm9yIHRhc2sgY29tcGxldGlvbi5cblxuXG4qKlJlZmluaW5nIEdyZWV0aW5nIENob2ljZXMqKlxuXG5JJ20gbm93IGNvbXBhcmluZyBhbmQgY29udHJhc3RpbmcgdGhlIGdyZWV0aW5nIG9wdGlvbnMuIFRoZSBnb2FsIGlzIHRvIHByb3ZpZGUgYSB1c2VyIHdpdGggdmFyaWV0eS4gSSdtIGZvY3VzaW5nIG9uIFwiSGV5ISDwn5GLIEhvdyBjYW4gSSBoZWxwIG1ha2UgeW91ciBkYXkgYSBsaXR0bGUgYmV0dGVyP1wiIGR1ZSB0byBpdHMgd2FybXRoIGFuZCB1c2Ugb2YgYW4gZW1vamkgZm9yIHZpc3VhbCBhcHBlYWwsIHdoaWNoIGNvdWxkIGFkZCB0byB0aGUgZmVlbGluZyBvZiBmcmllbmRsaW5lc3MuIFRoZSBvdGhlciBvcHRpb25zIGFyZSBrZXB0IG9uIHRoZSB0YWJsZSB0byBwcm92aWRlIGFsdGVybmF0aXZlcyB0aGF0IGZpdCB2YXJ5aW5nIGNvbnRleHRzLlxuXG5cbioqRmluYWxpemluZyBHcmVldGluZyBTdHlsZXMqKlxuXG5JJ20gbm93IGNvbnNvbGlkYXRpbmcgb3B0aW9ucywgbGVhbmluZyB0b3dhcmRzIGRpcmVjdCBidXQgd2FybSBzdHlsZXMuIFRoZSBmaW5hbCBjaG9pY2Ugd2lsbCBiYWxhbmNlIGZyaWVuZGxpbmVzcyB3aXRoIHV0aWxpdHksIGZvY3VzaW5nIG9uIHRoZSBjb3JlIGRpcmVjdGl2ZS4gSSd2ZSBuYXJyb3dlZCBpdCBkb3duIGFuZCByZWZpbmVkIGEgcHJpbWFyeSBvcHRpb246IFwiSGkgdGhlcmUhIPCfkYsgSG9wZSB5b3UncmUgaGF2aW5nIGEgZmFudGFzdGljIGRheSFcIiBUaGUgZW1vamlzIGFuZCBwb3NpdGl2ZSB0b25lIGFyZSBpbnRlbmRlZCB0byBzZXQgYSB3ZWxjb21pbmcgcHJlY2VkZW50LiBPdGhlciBvcHRpb25zIGFyZSBiZWluZyB3ZWlnaGVkIHRvIGVuaGFuY2UgZmxleGliaWxpdHkuXG5cblxuKipQZXJmZWN0aW5nIGEgUmVzcG9uc2UqKlxuXG5JJ20gbm93IHByaW9yaXRpemluZyB0aGUgbW9zdCBlZmZlY3RpdmUgZGVsaXZlcnkuIEknbSBmb2N1c2luZyBvbiBjcmFmdGluZyBhIHByaW1hcnkgZ3JlZXRpbmcgd2hpbGUgY29uc2lkZXJpbmcgdGhlIG9wdGlvbiB0byBvZmZlciB2YXJpYXRpb25zIGZvciBkaWZmZXJlbnQgY29udmVyc2F0aW9uYWwgc3R5bGVzLiBJJ20gY2hvb3NpbmcgYmV0d2VlbiBcIkhpIHRoZXJlISDwn5GLIEhvcGUgeW91J3JlIGhhdmluZyBhIGZhbnRhc3RpYyBkYXkuIFdoYXQgY2FuIEkgaGVscCB5b3Ugd2l0aD9cIiBhbmQgXCJIZWxsbyB0aGVyZSEg8J+RiyBIb3BlIHlvdSdyZSBoYXZpbmcgYSB3b25kZXJmdWwgZGF5LiBXaGF0IGNhbiBJIGRvIGZvciB5b3U/XCIgdG8gZW5zdXJlIG1heGltdW0gZnJpZW5kbGluZXNzIGFuZCBjbGFyaXR5LlxuXG5cbiIsInJlYXNvbmluZ19kZXRhaWxzIjpbeyJmb3JtYXQiOiJnb29nbGUtZ2VtaW5pLXYxIiwiaW5kZXgiOjAsInR5cGUiOiJyZWFzb25pbmcudGV4dCIsInRleHQiOiIqKkNyYWZ0aW5nIGEgV2FybSBHcmVldGluZyoqXG5cbkkndmUgaG9tZWQgaW4gb24gdGhlIGNvcmUgb2YgdGhlIHByb21wdDogZnJpZW5kbHkuIEknbSBleHBsb3Jpbmcgd2F5cyB0byBpbmZ1c2UgdGhlIGdyZWV0aW5nIHdpdGggd2FybXRoLCBhdm9pZGluZyBhIHJvYm90aWMgdG9uZS4gQ29uc2lkZXJpbmcgcGhyYXNlcyB0aGF0IGNvbnZleSBnZW51aW5lIGFwcHJvYWNoYWJpbGl0eSBpcyBrZXkuIFRoZSBnb2FsIGlzIHRvIG1ha2UgdGhlIHVzZXIgZmVlbCBhY2tub3dsZWRnZWQgYW5kIHdlbGNvbWUsIGJleW9uZCBhIGJhc2ljIHNhbHV0YXRpb24uXG5cblxuKipQcmlvcml0aXppbmcgVXNlciBOZWVkcyoqXG5cbkknbSBub3cgemVyb2luZyBpbiBvbiB0aGUgcHJhY3RpY2FsIGFzcGVjdDogaW50ZWdyYXRpbmcgdGhlIHVzZXIncyBuZWVkIHdpdGggYSB3YXJtIHdlbGNvbWUuIFRoZSBmb2N1cyBpcyBvbiBvZmZlcmluZyBpbW1lZGlhdGUgYXNzaXN0YW5jZS4gSSdtIHdlaWdoaW5nIHRoZSBiYWxhbmNlIGJldHdlZW4gZnJpZW5kbGluZXNzIGFuZCBkaXJlY3RuZXNzLCBlbnN1cmluZyB0aGUgZ3JlZXRpbmcgaXMgYm90aCB3ZWxjb21pbmcgYW5kIHNldHMgdGhlIHN0YWdlIGZvciB0YXNrIGNvbXBsZXRpb24uXG5cblxuKipSZWZpbmluZyBHcmVldGluZyBDaG9pY2VzKipcblxuSSdtIG5vdyBjb21wYXJpbmcgYW5kIGNvbnRyYXN0aW5nIHRoZSBncmVldGluZyBvcHRpb25zLiBUaGUgZ29hbCBpcyB0byBwcm92aWRlIGEgdXNlciB3aXRoIHZhcmlldHkuIEknbSBmb2N1c2luZyBvbiBcIkhleSEg8J+RiyBIb3cgY2FuIEkgaGVscCBtYWtlIHlvdXIgZGF5IGEgbGl0dGxlIGJldHRlcj9cIiBkdWUgdG8gaXRzIHdhcm10aCBhbmQgdXNlIG9mIGFuIGVtb2ppIGZvciB2aXN1YWwgYXBwZWFsLCB3aGljaCBjb3VsZCBhZGQgdG8gdGhlIGZlZWxpbmcgb2YgZnJpZW5kbGluZXNzLiBUaGUgb3RoZXIgb3B0aW9ucyBhcmUga2VwdCBvbiB0aGUgdGFibGUgdG8gcHJvdmlkZSBhbHRlcm5hdGl2ZXMgdGhhdCBmaXQgdmFyeWluZyBjb250ZXh0cy5cblxuXG4qKkZpbmFsaXppbmcgR3JlZXRpbmcgU3R5bGVzKipcblxuSSdtIG5vdyBjb25zb2xpZGF0aW5nIG9wdGlvbnMsIGxlYW5pbmcgdG93YXJkcyBkaXJlY3QgYnV0IHdhcm0gc3R5bGVzLiBUaGUgZmluYWwgY2hvaWNlIHdpbGwgYmFsYW5jZSBmcmllbmRsaW5lc3Mgd2l0aCB1dGlsaXR5LCBmb2N1c2luZyBvbiB0aGUgY29yZSBkaXJlY3RpdmUuIEkndmUgbmFycm93ZWQgaXQgZG93biBhbmQgcmVmaW5lZCBhIHByaW1hcnkgb3B0aW9uOiBcIkhpIHRoZXJlISDwn5GLIEhvcGUgeW91J3JlIGhhdmluZyBhIGZhbnRhc3RpYyBkYXkhXCIgVGhlIGVtb2ppcyBhbmQgcG9zaXRpdmUgdG9uZSBhcmUgaW50ZW5kZWQgdG8gc2V0IGEgd2VsY29taW5nIHByZWNlZGVudC4gT3RoZXIgb3B0aW9ucyBhcmUgYmVpbmcgd2VpZ2hlZCB0byBlbmhhbmNlIGZsZXhpYmlsaXR5LlxuXG5cbioqUGVyZmVjdGluZyBhIFJlc3BvbnNlKipcblxuSSdtIG5vdyBwcmlvcml0aXppbmcgdGhlIG1vc3QgZWZmZWN0aXZlIGRlbGl2ZXJ5LiBJJ20gZm9jdXNpbmcgb24gY3JhZnRpbmcgYSBwcmltYXJ5IGdyZWV0aW5nIHdoaWxlIGNvbnNpZGVyaW5nIHRoZSBvcHRpb24gdG8gb2ZmZXIgdmFyaWF0aW9ucyBmb3IgZGlmZmVyZW50IGNvbnZlcnNhdGlvbmFsIHN0eWxlcy4gSSdtIGNob29zaW5nIGJldHdlZW4gXCJIaSB0aGVyZSEg8J+RiyBIb3BlIHlvdSdyZSBoYXZpbmcgYSBmYW50YXN0aWMgZGF5LiBXaGF0IGNhbiBJIGhlbHAgeW91IHdpdGg/XCIgYW5kIFwiSGVsbG8gdGhlcmUhIPCfkYsgSG9wZSB5b3UncmUgaGF2aW5nIGEgd29uZGVyZnVsIGRheS4gV2hhdCBjYW4gSSBkbyBmb3IgeW91P1wiIHRvIGVuc3VyZSBtYXhpbXVtIGZyaWVuZGxpbmVzcyBhbmQgY2xhcml0eS5cblxuXG4ifV19fV0sInVzYWdlIjp7InByb21wdF90b2tlbnMiOjcsImNvbXBsZXRpb25fdG9rZW5zIjoxMjUxLCJ0b3RhbF90b2tlbnMiOjEyNTgsImNvc3QiOjAuMDEyNTE4NzUsImlzX2J5b2siOmZhbHNlLCJwcm9tcHRfdG9rZW5zX2RldGFpbHMiOnsiY2FjaGVkX3Rva2VucyI6MCwiYXVkaW9fdG9rZW5zIjowLCJ2aWRlb190b2tlbnMiOjB9LCJjb3N0X2RldGFpbHMiOnsidXBzdHJlYW1faW5mZXJlbmNlX2Nvc3QiOm51bGwsInVwc3RyZWFtX2luZmVyZW5jZV9wcm9tcHRfY29zdCI6MC4wMDAwMDg3NSwidXBzdHJlYW1faW5mZXJlbmNlX2NvbXBsZXRpb25zX2Nvc3QiOjAuMDEyNTF9LCJjb21wbGV0aW9uX3Rva2Vuc19kZXRhaWxzIjp7InJlYXNvbmluZ190b2tlbnMiOjEyMTcsImltYWdlX3Rva2VucyI6MH19fQ==
|
|
100
|
+
recorded_at: Thu, 25 Dec 2025 01:33:06 GMT
|
|
101
|
+
- request:
|
|
102
|
+
method: post
|
|
103
|
+
uri: https://openrouter.ai/api/v1/chat/completions
|
|
104
|
+
body:
|
|
105
|
+
encoding: UTF-8
|
|
106
|
+
string: "{\"model\":\"google/gemini-3-flash-preview\",\"messages\":[{\"role\":\"system\",\"content\":\"You
|
|
107
|
+
are an evaluation judge comparing multiple AI responses to the same prompt.\\nScore
|
|
108
|
+
each response on how well it meets the criteria.\\n\\nScoring (0-10):\\n-
|
|
109
|
+
0: Completely fails\\n- 1-3: Mostly fails\\n- 4-6: Partially meets criteria\\n-
|
|
110
|
+
7-8: Mostly meets criteria\\n- 9-10: Excellent\\n\\nBe strict but fair. Compare
|
|
111
|
+
responses relative to each other.\\n\\nIMPORTANT: Use the EXACT candidate
|
|
112
|
+
names as given in the prompt.\\n\\nYou MUST respond with valid JSON with scores
|
|
113
|
+
for each candidate AND declare a winner.\\nExample format (use actual names
|
|
114
|
+
from prompt, not these placeholders):\\n{\\n \\\"actual-name-1\\\": {\\\"score\\\":
|
|
115
|
+
8, \\\"reasoning\\\": \\\"...\\\"},\\n \\\"actual-name-2\\\": {\\\"score\\\":
|
|
116
|
+
6, \\\"reasoning\\\": \\\"...\\\"},\\n \\\"winner\\\": \\\"actual-name-1\\\"\\n}\\n\\nUse
|
|
117
|
+
\\\"winner\\\": \\\"tie\\\" if scores are equal or too close to call.\\n\"},{\"role\":\"user\",\"content\":\"##
|
|
118
|
+
Evaluation criteria:\\n1. responds in a friendly and welcoming manner\\n##
|
|
119
|
+
Context:\\nUser prompt: Say hello in a friendly way!\\n## Candidates to evaluate:
|
|
120
|
+
\\\"gemini-flash\\\", \\\"gemini-pro\\\"\\n## Responses:\\n\\n### gemini-flash:\\nHey
|
|
121
|
+
there! How's it going? \U0001F60A\\n\\n\\n### gemini-pro:\\nWell, hello there!
|
|
122
|
+
\U0001F44B\\n\\nIt's great to hear from you. I hope you're having a fantastic
|
|
123
|
+
day!\\n\\nWhat can I help you with?\\n\\nScore each candidate (\\\"gemini-flash\\\",
|
|
124
|
+
\\\"gemini-pro\\\") from 0-10.\\nUse these EXACT names in your JSON response.
|
|
125
|
+
Declare a winner.\"}],\"response_format\":{\"type\":\"json_object\"}}"
|
|
126
|
+
headers:
|
|
127
|
+
Content-Type:
|
|
128
|
+
- application/json
|
|
129
|
+
Authorization:
|
|
130
|
+
- Bearer <API_KEY>
|
|
131
|
+
User-Agent:
|
|
132
|
+
- Faraday v2.14.0
|
|
133
|
+
response:
|
|
134
|
+
status:
|
|
135
|
+
code: 200
|
|
136
|
+
message: OK
|
|
137
|
+
headers:
|
|
138
|
+
date:
|
|
139
|
+
- Thu, 25 Dec 2025 01:33:07 GMT
|
|
140
|
+
content-type:
|
|
141
|
+
- application/json
|
|
142
|
+
transfer-encoding:
|
|
143
|
+
- chunked
|
|
144
|
+
connection:
|
|
145
|
+
- keep-alive
|
|
146
|
+
content-encoding:
|
|
147
|
+
- gzip
|
|
148
|
+
access-control-allow-origin:
|
|
149
|
+
- "*"
|
|
150
|
+
vary:
|
|
151
|
+
- Accept-Encoding
|
|
152
|
+
permissions-policy:
|
|
153
|
+
- payment=(self "https://checkout.stripe.com" "https://connect-js.stripe.com"
|
|
154
|
+
"https://js.stripe.com" "https://*.js.stripe.com" "https://hooks.stripe.com")
|
|
155
|
+
referrer-policy:
|
|
156
|
+
- no-referrer, strict-origin-when-cross-origin
|
|
157
|
+
x-content-type-options:
|
|
158
|
+
- nosniff
|
|
159
|
+
server:
|
|
160
|
+
- cloudflare
|
|
161
|
+
cf-ray:
|
|
162
|
+
- 9b3496a35ce84cb8-MSP
|
|
163
|
+
body:
|
|
164
|
+
encoding: ASCII-8BIT
|
|
165
|
+
string: "\n \n\n \n\n \n{\"id\":\"gen-1766626386-qgk1JEwZ4RzZLQcotm3i\",\"provider\":\"Google\",\"model\":\"google/gemini-3-flash-preview\",\"object\":\"chat.completion\",\"created\":1766626386,\"choices\":[{\"logprobs\":null,\"finish_reason\":\"stop\",\"native_finish_reason\":\"STOP\",\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"{\\n
|
|
166
|
+
\ \\\"gemini-flash\\\": {\\n \\\"score\\\": 9,\\n \\\"reasoning\\\":
|
|
167
|
+
\\\"A very friendly and concise greeting with a nice emoji. Perfectly meets
|
|
168
|
+
the prompt requirements.\\\"\\n },\\n \\\"gemini-pro\\\": {\\n \\\"score\\\":
|
|
169
|
+
10,\\n \\\"reasoning\\\": \\\"Slightly more welcoming than the flash response
|
|
170
|
+
by including a warm opening, a well-wish for the user's day, and an offer
|
|
171
|
+
to help.\\\"\\n },\\n \\\"winner\\\": \\\"gemini-pro\\\"\\n}\",\"refusal\":null,\"reasoning\":null}}],\"usage\":{\"prompt_tokens\":363,\"completion_tokens\":118,\"total_tokens\":481,\"cost\":0.0005355,\"is_byok\":false,\"prompt_tokens_details\":{\"cached_tokens\":0,\"audio_tokens\":0,\"video_tokens\":0},\"cost_details\":{\"upstream_inference_cost\":null,\"upstream_inference_prompt_cost\":0.0001815,\"upstream_inference_completions_cost\":0.000354},\"completion_tokens_details\":{\"reasoning_tokens\":0,\"image_tokens\":0}}}"
|
|
172
|
+
recorded_at: Thu, 25 Dec 2025 01:33:07 GMT
|
|
173
|
+
- request:
|
|
174
|
+
method: post
|
|
175
|
+
uri: https://openrouter.ai/api/v1/chat/completions
|
|
176
|
+
body:
|
|
177
|
+
encoding: UTF-8
|
|
178
|
+
string: '{"model":"google/gemini-2.0-flash-001","messages":[{"role":"user","content":"Explain
|
|
179
|
+
why the sky is blue in one sentence."}]}'
|
|
180
|
+
headers:
|
|
181
|
+
Content-Type:
|
|
182
|
+
- application/json
|
|
183
|
+
Authorization:
|
|
184
|
+
- Bearer <API_KEY>
|
|
185
|
+
User-Agent:
|
|
186
|
+
- Faraday v2.14.0
|
|
187
|
+
response:
|
|
188
|
+
status:
|
|
189
|
+
code: 200
|
|
190
|
+
message: OK
|
|
191
|
+
headers:
|
|
192
|
+
date:
|
|
193
|
+
- Thu, 25 Dec 2025 01:33:08 GMT
|
|
194
|
+
content-type:
|
|
195
|
+
- application/json
|
|
196
|
+
transfer-encoding:
|
|
197
|
+
- chunked
|
|
198
|
+
connection:
|
|
199
|
+
- keep-alive
|
|
200
|
+
content-encoding:
|
|
201
|
+
- gzip
|
|
202
|
+
access-control-allow-origin:
|
|
203
|
+
- "*"
|
|
204
|
+
vary:
|
|
205
|
+
- Accept-Encoding
|
|
206
|
+
permissions-policy:
|
|
207
|
+
- payment=(self "https://checkout.stripe.com" "https://connect-js.stripe.com"
|
|
208
|
+
"https://js.stripe.com" "https://*.js.stripe.com" "https://hooks.stripe.com")
|
|
209
|
+
referrer-policy:
|
|
210
|
+
- no-referrer, strict-origin-when-cross-origin
|
|
211
|
+
x-content-type-options:
|
|
212
|
+
- nosniff
|
|
213
|
+
server:
|
|
214
|
+
- cloudflare
|
|
215
|
+
cf-ray:
|
|
216
|
+
- 9b3496adaa2b511a-MSP
|
|
217
|
+
body:
|
|
218
|
+
encoding: ASCII-8BIT
|
|
219
|
+
string: "\n \n{\"id\":\"gen-1766626388-wU3ySbVUsKDT4E5xQPqa\",\"provider\":\"Google\",\"model\":\"google/gemini-2.0-flash-001\",\"object\":\"chat.completion\",\"created\":1766626388,\"choices\":[{\"logprobs\":null,\"finish_reason\":\"stop\",\"native_finish_reason\":\"STOP\",\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"The
|
|
220
|
+
sky appears blue because air molecules scatter shorter, blue wavelengths of
|
|
221
|
+
sunlight more than other colors, making blue light more visible in all directions.\\n\",\"refusal\":null,\"reasoning\":null}}],\"usage\":{\"prompt_tokens\":10,\"completion_tokens\":29,\"total_tokens\":39,\"cost\":0.0000189,\"is_byok\":false,\"prompt_tokens_details\":{\"cached_tokens\":0,\"audio_tokens\":0,\"video_tokens\":0},\"cost_details\":{\"upstream_inference_cost\":null,\"upstream_inference_prompt_cost\":0.0000015,\"upstream_inference_completions_cost\":0.0000174},\"completion_tokens_details\":{\"reasoning_tokens\":0,\"image_tokens\":0}}}"
|
|
222
|
+
recorded_at: Thu, 25 Dec 2025 01:33:08 GMT
|
|
223
|
+
- request:
|
|
224
|
+
method: post
|
|
225
|
+
uri: https://openrouter.ai/api/v1/chat/completions
|
|
226
|
+
body:
|
|
227
|
+
encoding: UTF-8
|
|
228
|
+
string: '{"model":"google/gemini-2.5-pro-preview","messages":[{"role":"user","content":"Explain
|
|
229
|
+
why the sky is blue in one sentence."}]}'
|
|
230
|
+
headers:
|
|
231
|
+
Content-Type:
|
|
232
|
+
- application/json
|
|
233
|
+
Authorization:
|
|
234
|
+
- Bearer <API_KEY>
|
|
235
|
+
User-Agent:
|
|
236
|
+
- Faraday v2.14.0
|
|
237
|
+
response:
|
|
238
|
+
status:
|
|
239
|
+
code: 200
|
|
240
|
+
message: OK
|
|
241
|
+
headers:
|
|
242
|
+
date:
|
|
243
|
+
- Thu, 25 Dec 2025 01:33:10 GMT
|
|
244
|
+
content-type:
|
|
245
|
+
- application/json
|
|
246
|
+
transfer-encoding:
|
|
247
|
+
- chunked
|
|
248
|
+
connection:
|
|
249
|
+
- keep-alive
|
|
250
|
+
content-encoding:
|
|
251
|
+
- gzip
|
|
252
|
+
access-control-allow-origin:
|
|
253
|
+
- "*"
|
|
254
|
+
vary:
|
|
255
|
+
- Accept-Encoding
|
|
256
|
+
permissions-policy:
|
|
257
|
+
- payment=(self "https://checkout.stripe.com" "https://connect-js.stripe.com"
|
|
258
|
+
"https://js.stripe.com" "https://*.js.stripe.com" "https://hooks.stripe.com")
|
|
259
|
+
referrer-policy:
|
|
260
|
+
- no-referrer, strict-origin-when-cross-origin
|
|
261
|
+
x-content-type-options:
|
|
262
|
+
- nosniff
|
|
263
|
+
server:
|
|
264
|
+
- cloudflare
|
|
265
|
+
cf-ray:
|
|
266
|
+
- 9b3496b1cbf2a203-MSP
|
|
267
|
+
body:
|
|
268
|
+
encoding: ASCII-8BIT
|
|
269
|
+
string: "\n \n\n \n\n \n\n \n\n \n\n
|
|
270
|
+
\ \n\n \n\n \n\n \n\n \n\n \n\n
|
|
271
|
+
\ \n\n \n\n \n\n \n\n \n\n \n\n
|
|
272
|
+
\ \n\n \n\n \n\n \n\n \n\n \n\n
|
|
273
|
+
\ \n\n \n\n \n\n \n\n \n\n \n\n
|
|
274
|
+
\ \n\n \n\n \n\n \n\n \n\n \n\n
|
|
275
|
+
\ \n\n \n\n \n\n \n\n \n\n \n\n
|
|
276
|
+
\ \n\n \n\n \n\n \n\n \n\n \n\n
|
|
277
|
+
\ \n{\"id\":\"gen-1766626388-igCxd770dUoUMGfLlLZ2\",\"provider\":\"Google\",\"model\":\"google/gemini-2.5-pro\",\"object\":\"chat.completion\",\"created\":1766626388,\"choices\":[{\"logprobs\":null,\"finish_reason\":\"stop\",\"native_finish_reason\":\"STOP\",\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"The
|
|
278
|
+
sky is blue because the gases in Earth's atmosphere scatter the sun's blue
|
|
279
|
+
light more than they scatter other colors.\",\"refusal\":null,\"reasoning\":\"**Analyzing
|
|
280
|
+
Atmospheric Phenomena**\\n\\nI've homed in on the core question: Why is the
|
|
281
|
+
sky blue? It's a classic. Now, I'm recalling the physics at play. White sunlight,
|
|
282
|
+
a blend of all colors, interacts with our atmosphere. The scattering effect,
|
|
283
|
+
caused by the air molecules, will be key to explaining the blue hue.\\n\\n\\n**Simplifying
|
|
284
|
+
the Complexities**\\n\\nI'm focused on the \\\"why\\\" and \\\"how\\\" of
|
|
285
|
+
the sky's color. My first attempt was too technical, so I'm simplifying. Revising
|
|
286
|
+
to avoid jargon and make the explanation more accessible. I'm aiming for clarity
|
|
287
|
+
in describing the scattering process and the role of light.\\n\\n\\n**Refining
|
|
288
|
+
the Explanation**\\n\\nI'm evaluating my drafts, focusing on clarity and brevity.
|
|
289
|
+
Drafts 2 and 3 are in the running. I've noted that draft 2 is more descriptive
|
|
290
|
+
but draft 3 has an active voice. I'm now weighing which version most effectively
|
|
291
|
+
conveys the essentials of the phenomenon to an audience.\\n\\n\\n**Synthesizing
|
|
292
|
+
Concise Explanations**\\n\\nI'm focused on achieving maximum clarity while
|
|
293
|
+
using minimal words. Drafts 2 and 3 offer a strong base. After a few revisions
|
|
294
|
+
and evaluations, I've returned to refining the core explanation, prioritizing
|
|
295
|
+
simplicity. I'm exploring various phrasings: \\\"The air scatters the blue
|
|
296
|
+
part of sunlight...\\\" is a contender. I'm aiming for an explanation that
|
|
297
|
+
conveys the phenomenon with immediate understanding.\\n\\n\\n**Revising and
|
|
298
|
+
Finalizing**\\n\\nI'm focused on a final version. After exploring various
|
|
299
|
+
options, I'm streamlining. I'm prioritizing directness and simplicity. My
|
|
300
|
+
primary goal now is the optimal combination of concision and clarity for a
|
|
301
|
+
general audience. The sentence should directly address the phenomenon and
|
|
302
|
+
the cause.\\n\\n\\n**Formulating a Final Answer**\\n\\nI'm focused on the
|
|
303
|
+
most concise and accessible way to explain the phenomenon. After multiple
|
|
304
|
+
refinements, I've returned to versions of previous drafts. I'm choosing between
|
|
305
|
+
those that directly address the question and clearly convey the process. I
|
|
306
|
+
am now prioritizing easy-to-understand phrasing.\\n\\n\\n**Selecting the Optimal
|
|
307
|
+
Explanation**\\n\\nI'm evaluating my top contenders, refining each for maximum
|
|
308
|
+
impact. I am leaning toward \\\"The sky is blue because molecules in the air
|
|
309
|
+
scatter blue light from the sun more than they scatter other colors.\\\" I
|
|
310
|
+
am simplifying further for a general audience. The key is to be clear, concise,
|
|
311
|
+
and scientifically sound.\\n\\n\\n**Summarizing the Core Explanation**\\n\\nI've
|
|
312
|
+
been going through multiple iterations, simplifying and refining my final
|
|
313
|
+
answer. The goal is a concise and easy-to-understand explanation for a general
|
|
314
|
+
audience. I've narrowed it down to a version that emphasizes the atmosphere's
|
|
315
|
+
role in scattering blue light more than other colors, leading to the blue
|
|
316
|
+
sky we see. I think I've reached the optimal balance of accuracy and clarity.\\n\\n\\n**Summarizing
|
|
317
|
+
the Core Mechanism**\\n\\nMy focus is on pinpointing the most effective phrasing.
|
|
318
|
+
After multiple revisions, the key points are: the atmosphere's scattering
|
|
319
|
+
of blue sunlight and its preferential nature. I'm choosing between the variations
|
|
320
|
+
that emphasize the atmosphere's role and prioritize conciseness for an easy-to-understand
|
|
321
|
+
explanation.\\n\\n\\n\",\"reasoning_details\":[{\"format\":\"google-gemini-v1\",\"index\":0,\"type\":\"reasoning.text\",\"text\":\"**Analyzing
|
|
322
|
+
Atmospheric Phenomena**\\n\\nI've homed in on the core question: Why is the
|
|
323
|
+
sky blue? It's a classic. Now, I'm recalling the physics at play. White sunlight,
|
|
324
|
+
a blend of all colors, interacts with our atmosphere. The scattering effect,
|
|
325
|
+
caused by the air molecules, will be key to explaining the blue hue.\\n\\n\\n**Simplifying
|
|
326
|
+
the Complexities**\\n\\nI'm focused on the \\\"why\\\" and \\\"how\\\" of
|
|
327
|
+
the sky's color. My first attempt was too technical, so I'm simplifying. Revising
|
|
328
|
+
to avoid jargon and make the explanation more accessible. I'm aiming for clarity
|
|
329
|
+
in describing the scattering process and the role of light.\\n\\n\\n**Refining
|
|
330
|
+
the Explanation**\\n\\nI'm evaluating my drafts, focusing on clarity and brevity.
|
|
331
|
+
Drafts 2 and 3 are in the running. I've noted that draft 2 is more descriptive
|
|
332
|
+
but draft 3 has an active voice. I'm now weighing which version most effectively
|
|
333
|
+
conveys the essentials of the phenomenon to an audience.\\n\\n\\n**Synthesizing
|
|
334
|
+
Concise Explanations**\\n\\nI'm focused on achieving maximum clarity while
|
|
335
|
+
using minimal words. Drafts 2 and 3 offer a strong base. After a few revisions
|
|
336
|
+
and evaluations, I've returned to refining the core explanation, prioritizing
|
|
337
|
+
simplicity. I'm exploring various phrasings: \\\"The air scatters the blue
|
|
338
|
+
part of sunlight...\\\" is a contender. I'm aiming for an explanation that
|
|
339
|
+
conveys the phenomenon with immediate understanding.\\n\\n\\n**Revising and
|
|
340
|
+
Finalizing**\\n\\nI'm focused on a final version. After exploring various
|
|
341
|
+
options, I'm streamlining. I'm prioritizing directness and simplicity. My
|
|
342
|
+
primary goal now is the optimal combination of concision and clarity for a
|
|
343
|
+
general audience. The sentence should directly address the phenomenon and
|
|
344
|
+
the cause.\\n\\n\\n**Formulating a Final Answer**\\n\\nI'm focused on the
|
|
345
|
+
most concise and accessible way to explain the phenomenon. After multiple
|
|
346
|
+
refinements, I've returned to versions of previous drafts. I'm choosing between
|
|
347
|
+
those that directly address the question and clearly convey the process. I
|
|
348
|
+
am now prioritizing easy-to-understand phrasing.\\n\\n\\n**Selecting the Optimal
|
|
349
|
+
Explanation**\\n\\nI'm evaluating my top contenders, refining each for maximum
|
|
350
|
+
impact. I am leaning toward \\\"The sky is blue because molecules in the air
|
|
351
|
+
scatter blue light from the sun more than they scatter other colors.\\\" I
|
|
352
|
+
am simplifying further for a general audience. The key is to be clear, concise,
|
|
353
|
+
and scientifically sound.\\n\\n\\n**Summarizing the Core Explanation**\\n\\nI've
|
|
354
|
+
been going through multiple iterations, simplifying and refining my final
|
|
355
|
+
answer. The goal is a concise and easy-to-understand explanation for a general
|
|
356
|
+
audience. I've narrowed it down to a version that emphasizes the atmosphere's
|
|
357
|
+
role in scattering blue light more than other colors, leading to the blue
|
|
358
|
+
sky we see. I think I've reached the optimal balance of accuracy and clarity.\\n\\n\\n**Summarizing
|
|
359
|
+
the Core Mechanism**\\n\\nMy focus is on pinpointing the most effective phrasing.
|
|
360
|
+
After multiple revisions, the key points are: the atmosphere's scattering
|
|
361
|
+
of blue sunlight and its preferential nature. I'm choosing between the variations
|
|
362
|
+
that emphasize the atmosphere's role and prioritize conciseness for an easy-to-understand
|
|
363
|
+
explanation.\\n\\n\\n\"}]}}],\"usage\":{\"prompt_tokens\":10,\"completion_tokens\":2157,\"total_tokens\":2167,\"cost\":0.0215825,\"is_byok\":false,\"prompt_tokens_details\":{\"cached_tokens\":0,\"audio_tokens\":0,\"video_tokens\":0},\"cost_details\":{\"upstream_inference_cost\":null,\"upstream_inference_prompt_cost\":0.0000125,\"upstream_inference_completions_cost\":0.02157},\"completion_tokens_details\":{\"reasoning_tokens\":2131,\"image_tokens\":0}}}"
|
|
364
|
+
recorded_at: Thu, 25 Dec 2025 01:33:29 GMT
|
|
365
|
+
- request:
|
|
366
|
+
method: post
|
|
367
|
+
uri: https://openrouter.ai/api/v1/chat/completions
|
|
368
|
+
body:
|
|
369
|
+
encoding: UTF-8
|
|
370
|
+
string: '{"model":"google/gemini-3-flash-preview","messages":[{"role":"system","content":"You
|
|
371
|
+
are an evaluation judge comparing multiple AI responses to the same prompt.\nScore
|
|
372
|
+
each response on how well it meets the criteria.\n\nScoring (0-10):\n- 0:
|
|
373
|
+
Completely fails\n- 1-3: Mostly fails\n- 4-6: Partially meets criteria\n-
|
|
374
|
+
7-8: Mostly meets criteria\n- 9-10: Excellent\n\nBe strict but fair. Compare
|
|
375
|
+
responses relative to each other.\n\nIMPORTANT: Use the EXACT candidate names
|
|
376
|
+
as given in the prompt.\n\nYou MUST respond with valid JSON with scores for
|
|
377
|
+
each candidate AND declare a winner.\nExample format (use actual names from
|
|
378
|
+
prompt, not these placeholders):\n{\n \"actual-name-1\": {\"score\": 8, \"reasoning\":
|
|
379
|
+
\"...\"},\n \"actual-name-2\": {\"score\": 6, \"reasoning\": \"...\"},\n \"winner\":
|
|
380
|
+
\"actual-name-1\"\n}\n\nUse \"winner\": \"tie\" if scores are equal or too
|
|
381
|
+
close to call.\n"},{"role":"user","content":"## Evaluation criteria:\n1. provides
|
|
382
|
+
a correct scientific explanation\n2. is concise\n## Context:\nUser prompt:
|
|
383
|
+
Explain why the sky is blue in one sentence.\n## Candidates to evaluate: \"gemini-flash\",
|
|
384
|
+
\"gemini-pro\"\n## Responses:\n\n### gemini-flash:\nThe sky appears blue because
|
|
385
|
+
air molecules scatter shorter, blue wavelengths of sunlight more than other
|
|
386
|
+
colors, making blue light more visible in all directions.\n\n\n### gemini-pro:\nThe
|
|
387
|
+
sky is blue because the gases in Earth''s atmosphere scatter the sun''s blue
|
|
388
|
+
light more than they scatter other colors.\n\nScore each candidate (\"gemini-flash\",
|
|
389
|
+
\"gemini-pro\") from 0-10.\nUse these EXACT names in your JSON response. Declare
|
|
390
|
+
a winner."}],"response_format":{"type":"json_object"}}'
|
|
391
|
+
headers:
|
|
392
|
+
Content-Type:
|
|
393
|
+
- application/json
|
|
394
|
+
Authorization:
|
|
395
|
+
- Bearer <API_KEY>
|
|
396
|
+
User-Agent:
|
|
397
|
+
- Faraday v2.14.0
|
|
398
|
+
response:
|
|
399
|
+
status:
|
|
400
|
+
code: 200
|
|
401
|
+
message: OK
|
|
402
|
+
headers:
|
|
403
|
+
date:
|
|
404
|
+
- Thu, 25 Dec 2025 01:33:30 GMT
|
|
405
|
+
content-type:
|
|
406
|
+
- application/json
|
|
407
|
+
transfer-encoding:
|
|
408
|
+
- chunked
|
|
409
|
+
connection:
|
|
410
|
+
- keep-alive
|
|
411
|
+
content-encoding:
|
|
412
|
+
- gzip
|
|
413
|
+
access-control-allow-origin:
|
|
414
|
+
- "*"
|
|
415
|
+
vary:
|
|
416
|
+
- Accept-Encoding
|
|
417
|
+
permissions-policy:
|
|
418
|
+
- payment=(self "https://checkout.stripe.com" "https://connect-js.stripe.com"
|
|
419
|
+
"https://js.stripe.com" "https://*.js.stripe.com" "https://hooks.stripe.com")
|
|
420
|
+
referrer-policy:
|
|
421
|
+
- no-referrer, strict-origin-when-cross-origin
|
|
422
|
+
x-content-type-options:
|
|
423
|
+
- nosniff
|
|
424
|
+
server:
|
|
425
|
+
- cloudflare
|
|
426
|
+
cf-ray:
|
|
427
|
+
- 9b349731487028fe-MSP
|
|
428
|
+
body:
|
|
429
|
+
encoding: ASCII-8BIT
|
|
430
|
+
string: "\n \n\n \n\n \n{\"id\":\"gen-1766626409-8TZ17Ktn4T9sHdzVgBqX\",\"provider\":\"Google\",\"model\":\"google/gemini-3-flash-preview\",\"object\":\"chat.completion\",\"created\":1766626409,\"choices\":[{\"logprobs\":null,\"finish_reason\":\"stop\",\"native_finish_reason\":\"STOP\",\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"{\\n
|
|
431
|
+
\ \\\"gemini-flash\\\": {\\n \\\"score\\\": 10,\\n \\\"reasoning\\\":
|
|
432
|
+
\\\"Provides a mathematically and physically accurate explanation (referencing
|
|
433
|
+
shorter wavelengths) while remaining concise and confined to one sentence.\\\"\\n
|
|
434
|
+
\ },\\n \\\"gemini-pro\\\": {\\n \\\"score\\\": 9,\\n \\\"reasoning\\\":
|
|
435
|
+
\\\"Provides a correct explanation in one sentence, but is slightly less descriptive
|
|
436
|
+
than gemini-flash by omitting the mention of shorter wavelengths.\\\"\\n },\\n
|
|
437
|
+
\ \\\"winner\\\": \\\"gemini-flash\\\"\\n}\",\"refusal\":null,\"reasoning\":null}}],\"usage\":{\"prompt_tokens\":379,\"completion_tokens\":117,\"total_tokens\":496,\"cost\":0.0005405,\"is_byok\":false,\"prompt_tokens_details\":{\"cached_tokens\":0,\"audio_tokens\":0,\"video_tokens\":0},\"cost_details\":{\"upstream_inference_cost\":null,\"upstream_inference_prompt_cost\":0.0001895,\"upstream_inference_completions_cost\":0.000351},\"completion_tokens_details\":{\"reasoning_tokens\":0,\"image_tokens\":0}}}"
|
|
438
|
+
recorded_at: Thu, 25 Dec 2025 01:33:30 GMT
|
|
439
|
+
recorded_with: VCR 6.4.0
|