benchgecko 0.1.1 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (6) hide show
  1. checksums.yaml +4 -4
  2. data/lib/benchgecko.rb +28 -246
  3. metadata +10 -15
  4. data/CHANGELOG.md +0 -9
  5. data/LICENSE.txt +0 -21
  6. data/README.md +0 -120
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2e3c24fda70edf90737896315311a7fd3dadf39bdc5b72c0f80fa8d70aa794d5
4
- data.tar.gz: 63f534363f7724060ff2f430a65cda55b1d4d432ec3d566e7aa38527a8e5a14e
3
+ metadata.gz: 6a126fb765dd87b64cb48087748871672d902c2aee7913f1b3a2bc5eec495933
4
+ data.tar.gz: ee0aeaeec203627d07af67928e495f95c6828a3729020eea75a942e902e36e04
5
5
  SHA512:
6
- metadata.gz: 11141fb93c9345a50e484fe6597b89c205fb2cba4a81451929e19af888c6de94e1a71a8b7413073c8c990fb7a211a14ca498dcdce2b58b49743cfe6d35aa0867
7
- data.tar.gz: 4b6ef5a4f8d420710122b3a903e4e3d185298463bd8983b8911d1c00f9aede1c1a77a9cb1f13bdffc349448ab5e615b087b929c67cf8c5f55164ab006e09b5c5
6
+ metadata.gz: 02d105ade04b5cff84f970c70c926ec961343763da3a47f15da4cc325d30a1ba98e57d377330345cce5306925f832b9f8847c1da530d2a04b43d7334e90a6375
7
+ data.tar.gz: 0fe0f60f0f26b53bb717c415da31b3c167a148f9c0dcc9ed4315c8c1aad7e0e61f3ef6d98c88c07c42accfd0058e0eec80fed5eab71d5a88c9b3ed402799af38
data/lib/benchgecko.rb CHANGED
@@ -1,261 +1,43 @@
1
- # frozen_string_literal: true
2
-
3
- # BenchGecko - The CoinGecko for AI
4
- # Data platform for AI model benchmarks, pricing, and agent comparison
5
- # https://benchgecko.ai
1
+ require 'net/http'
2
+ require 'json'
6
3
 
7
4
  module BenchGecko
8
- VERSION = "0.1.1"
9
-
10
- # Represents an AI model with its benchmark scores, pricing, and metadata.
11
- class Model
12
- attr_reader :id, :name, :provider, :parameters, :context_window,
13
- :input_price, :output_price, :benchmarks, :metadata
14
-
15
- def initialize(attrs = {})
16
- @id = attrs[:id] || attrs["id"]
17
- @name = attrs[:name] || attrs["name"]
18
- @provider = attrs[:provider] || attrs["provider"]
19
- @parameters = attrs[:parameters] || attrs["parameters"]
20
- @context_window = attrs[:context_window] || attrs["context_window"]
21
- @input_price = attrs[:input_price] || attrs["input_price"]
22
- @output_price = attrs[:output_price] || attrs["output_price"]
23
- @benchmarks = attrs[:benchmarks] || attrs["benchmarks"] || {}
24
- @metadata = attrs[:metadata] || attrs["metadata"] || {}
25
- end
26
-
27
- # Cost per million tokens (input + output averaged)
28
- def cost_per_million
29
- return nil unless input_price && output_price
30
- ((input_price + output_price) / 2.0).round(4)
31
- end
32
-
33
- # Returns the score for a specific benchmark
34
- def score(benchmark_name)
35
- benchmarks[benchmark_name.to_s] || benchmarks[benchmark_name.to_sym]
36
- end
37
-
38
- # Returns a hash summary suitable for comparison tables
39
- def to_summary
40
- {
41
- name: name,
42
- provider: provider,
43
- parameters: parameters,
44
- context_window: context_window,
45
- cost_per_million: cost_per_million
46
- }
47
- end
5
+ VERSION = '0.2.1'
6
+ BASE_URL = 'https://benchgecko.ai/api/v1'
48
7
 
49
- def to_s
50
- "#{name} (#{provider}) - #{parameters}B params"
51
- end
8
+ def self.models(params = {})
9
+ get('/models', params)
52
10
  end
53
11
 
54
- # Represents an AI agent with capabilities and scores.
55
- class Agent
56
- attr_reader :id, :name, :category, :provider, :models_used,
57
- :scores, :capabilities, :metadata
58
-
59
- def initialize(attrs = {})
60
- @id = attrs[:id] || attrs["id"]
61
- @name = attrs[:name] || attrs["name"]
62
- @category = attrs[:category] || attrs["category"]
63
- @provider = attrs[:provider] || attrs["provider"]
64
- @models_used = attrs[:models_used] || attrs["models_used"] || []
65
- @scores = attrs[:scores] || attrs["scores"] || {}
66
- @capabilities = attrs[:capabilities] || attrs["capabilities"] || []
67
- @metadata = attrs[:metadata] || attrs["metadata"] || {}
68
- end
69
-
70
- def supports?(capability)
71
- capabilities.include?(capability.to_s)
72
- end
73
-
74
- def to_s
75
- "#{name} (#{category}) by #{provider}"
76
- end
12
+ def self.model(slug)
13
+ get("/models/#{slug}")
77
14
  end
78
15
 
79
- # Benchmark categories tracked by BenchGecko
80
- BENCHMARK_CATEGORIES = {
81
- reasoning: {
82
- name: "Reasoning",
83
- benchmarks: %w[MMLU MMLU-Pro ARC-Challenge HellaSwag WinoGrande GPQA],
84
- description: "Logical reasoning, knowledge, and common sense"
85
- },
86
- coding: {
87
- name: "Coding",
88
- benchmarks: %w[HumanEval MBPP SWE-bench LiveCodeBench BigCodeBench],
89
- description: "Code generation, debugging, and software engineering"
90
- },
91
- math: {
92
- name: "Mathematics",
93
- benchmarks: %w[GSM8K MATH AIME AMC Competition-Math],
94
- description: "Mathematical problem solving from arithmetic to olympiad"
95
- },
96
- instruction: {
97
- name: "Instruction Following",
98
- benchmarks: %w[IFEval MT-Bench AlpacaEval Chatbot-Arena],
99
- description: "Following complex instructions and conversational ability"
100
- },
101
- safety: {
102
- name: "Safety",
103
- benchmarks: %w[TruthfulQA BBQ ToxiGen BOLD],
104
- description: "Truthfulness, bias, and safety alignment"
105
- },
106
- multimodal: {
107
- name: "Multimodal",
108
- benchmarks: %w[MMMU MathVista VQAv2 TextVQA DocVQA],
109
- description: "Vision, document understanding, and cross-modal reasoning"
110
- },
111
- multilingual: {
112
- name: "Multilingual",
113
- benchmarks: %w[MGSM XL-Sum FLORES],
114
- description: "Performance across languages and translation"
115
- },
116
- long_context: {
117
- name: "Long Context",
118
- benchmarks: %w[RULER NIAH InfiniteBench LongBench],
119
- description: "Retrieval and reasoning over long documents"
120
- }
121
- }.freeze
122
-
123
- # Built-in model catalog with real benchmark data and pricing
124
- MODELS = {
125
- "gpt-4o" => {
126
- name: "GPT-4o", provider: "OpenAI", parameters: 200,
127
- context_window: 128_000, input_price: 2.50, output_price: 10.00,
128
- benchmarks: { "MMLU" => 88.7, "HumanEval" => 90.2, "GSM8K" => 95.8, "GPQA" => 53.6 }
129
- },
130
- "claude-3.5-sonnet" => {
131
- name: "Claude 3.5 Sonnet", provider: "Anthropic", parameters: nil,
132
- context_window: 200_000, input_price: 3.00, output_price: 15.00,
133
- benchmarks: { "MMLU" => 88.7, "HumanEval" => 92.0, "GSM8K" => 96.4, "GPQA" => 59.4 }
134
- },
135
- "gemini-2.0-flash" => {
136
- name: "Gemini 2.0 Flash", provider: "Google", parameters: nil,
137
- context_window: 1_000_000, input_price: 0.10, output_price: 0.40,
138
- benchmarks: { "MMLU" => 85.2, "HumanEval" => 84.0, "GSM8K" => 92.1 }
139
- },
140
- "llama-3.1-405b" => {
141
- name: "Llama 3.1 405B", provider: "Meta", parameters: 405,
142
- context_window: 128_000, input_price: 3.00, output_price: 3.00,
143
- benchmarks: { "MMLU" => 88.6, "HumanEval" => 89.0, "GSM8K" => 96.8, "GPQA" => 50.7 }
144
- },
145
- "mistral-large" => {
146
- name: "Mistral Large", provider: "Mistral", parameters: 123,
147
- context_window: 128_000, input_price: 2.00, output_price: 6.00,
148
- benchmarks: { "MMLU" => 84.0, "HumanEval" => 82.0, "GSM8K" => 91.2 }
149
- },
150
- "deepseek-v3" => {
151
- name: "DeepSeek V3", provider: "DeepSeek", parameters: 671,
152
- context_window: 128_000, input_price: 0.27, output_price: 1.10,
153
- benchmarks: { "MMLU" => 87.1, "HumanEval" => 82.6, "GSM8K" => 89.3, "GPQA" => 59.1 }
154
- }
155
- }.freeze
156
-
157
- class << self
158
- # Retrieve a model by its identifier
159
- #
160
- # model = BenchGecko.get_model("gpt-4o")
161
- # model.name #=> "GPT-4o"
162
- # model.provider #=> "OpenAI"
163
- # model.score("MMLU") #=> 88.7
164
- #
165
- def get_model(model_id)
166
- data = MODELS[model_id.to_s]
167
- return nil unless data
168
- Model.new(data.merge(id: model_id.to_s))
169
- end
170
-
171
- # List all available model identifiers
172
- def list_models
173
- MODELS.keys
174
- end
175
-
176
- # Compare two models side by side across benchmarks and pricing
177
- #
178
- # result = BenchGecko.compare_models("gpt-4o", "claude-3.5-sonnet")
179
- # result[:benchmark_diff] #=> {"MMLU" => 0.0, "HumanEval" => -1.8, ...}
180
- # result[:cheaper] #=> "gpt-4o"
181
- #
182
- def compare_models(model_a_id, model_b_id)
183
- a = get_model(model_a_id)
184
- b = get_model(model_b_id)
185
- return nil unless a && b
186
-
187
- all_benchmarks = (a.benchmarks.keys + b.benchmarks.keys).uniq
188
- benchmark_diff = {}
189
- all_benchmarks.each do |bench|
190
- score_a = a.score(bench)
191
- score_b = b.score(bench)
192
- benchmark_diff[bench] = (score_a && score_b) ? (score_a - score_b).round(2) : nil
193
- end
194
-
195
- cost_a = a.cost_per_million
196
- cost_b = b.cost_per_million
197
- cheaper = if cost_a && cost_b
198
- cost_a <= cost_b ? model_a_id : model_b_id
199
- end
200
-
201
- {
202
- model_a: a.to_summary,
203
- model_b: b.to_summary,
204
- benchmark_diff: benchmark_diff,
205
- cheaper: cheaper,
206
- cost_ratio: (cost_a && cost_b && cost_b > 0) ? (cost_a / cost_b).round(2) : nil
207
- }
208
- end
16
+ def self.benchmarks
17
+ get('/benchmarks')
18
+ end
209
19
 
210
- # Estimate cost for a given number of tokens
211
- #
212
- # BenchGecko.estimate_cost("gpt-4o", input_tokens: 1_000_000, output_tokens: 500_000)
213
- # #=> { input_cost: 2.50, output_cost: 5.00, total: 7.50 }
214
- #
215
- def estimate_cost(model_id, input_tokens:, output_tokens: 0)
216
- model = get_model(model_id)
217
- return nil unless model&.input_price && model&.output_price
20
+ def self.compare(*slugs)
21
+ get('/compare', models: slugs.join(','))
22
+ end
218
23
 
219
- input_cost = (model.input_price * input_tokens / 1_000_000.0).round(4)
220
- output_cost = (model.output_price * output_tokens / 1_000_000.0).round(4)
24
+ def self.pricing(slug = nil)
25
+ slug ? get("/pricing/#{slug}") : get('/pricing')
26
+ end
221
27
 
222
- {
223
- model: model.name,
224
- input_tokens: input_tokens,
225
- output_tokens: output_tokens,
226
- input_cost: input_cost,
227
- output_cost: output_cost,
228
- total: (input_cost + output_cost).round(4)
229
- }
230
- end
28
+ def self.providers
29
+ get('/providers')
30
+ end
231
31
 
232
- # List all benchmark categories
233
- def benchmark_categories
234
- BENCHMARK_CATEGORIES
235
- end
32
+ def self.agents
33
+ get('/agents')
34
+ end
236
35
 
237
- # Find models that score above a threshold on a given benchmark
238
- #
239
- # BenchGecko.top_models("MMLU", min_score: 87.0)
240
- # #=> [Model, Model, ...]
241
- #
242
- def top_models(benchmark, min_score: 0)
243
- MODELS.filter_map do |id, data|
244
- score = data[:benchmarks][benchmark]
245
- next unless score && score >= min_score
246
- get_model(id)
247
- end.sort_by { |m| -m.score(benchmark) }
248
- end
36
+ private
249
37
 
250
- # Find the cheapest model that meets a minimum score on a benchmark
251
- #
252
- # BenchGecko.cheapest_above("MMLU", 85.0)
253
- # #=> Model (Gemini 2.0 Flash)
254
- #
255
- def cheapest_above(benchmark, min_score)
256
- top_models(benchmark, min_score: min_score)
257
- .select(&:cost_per_million)
258
- .min_by(&:cost_per_million)
259
- end
38
+ def self.get(path, params = {})
39
+ uri = URI("#{BASE_URL}#{path}")
40
+ uri.query = URI.encode_www_form(params) unless params.empty?
41
+ JSON.parse(Net::HTTP.get(uri))
260
42
  end
261
43
  end
metadata CHANGED
@@ -1,28 +1,23 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: benchgecko
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - BenchGecko
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-03-31 00:00:00.000000000 Z
11
+ date: 2026-04-25 00:00:00.000000000 Z
12
12
  dependencies: []
13
- description: BenchGecko is the CoinGecko for AI. This gem provides a Ruby interface
14
- for accessing AI model benchmarks, comparing language models, estimating inference
15
- costs, and discovering AI agents. Query structured data on 300+ models across 50+
16
- providers with real benchmark scores, latency metrics, and transparent pricing.
17
- email:
18
- - hello@benchgecko.ai
13
+ description: Query AI model data, benchmark scores, and run side-by-side comparisons.
14
+ BenchGecko tracks every major AI model, benchmark, and provider with cross-provider
15
+ pricing.
16
+ email: hello@benchgecko.ai
19
17
  executables: []
20
18
  extensions: []
21
19
  extra_rdoc_files: []
22
20
  files:
23
- - CHANGELOG.md
24
- - LICENSE.txt
25
- - README.md
26
21
  - lib/benchgecko.rb
27
22
  homepage: https://benchgecko.ai
28
23
  licenses:
@@ -30,7 +25,8 @@ licenses:
30
25
  metadata:
31
26
  homepage_uri: https://benchgecko.ai
32
27
  source_code_uri: https://github.com/BenchGecko/benchgecko-ruby
33
- changelog_uri: https://github.com/BenchGecko/benchgecko-ruby/blob/main/CHANGELOG.md
28
+ documentation_uri: https://benchgecko.ai/api-docs
29
+ changelog_uri: https://benchgecko.ai/changelog
34
30
  post_install_message:
35
31
  rdoc_options: []
36
32
  require_paths:
@@ -39,7 +35,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
39
35
  requirements:
40
36
  - - ">="
41
37
  - !ruby/object:Gem::Version
42
- version: 2.7.0
38
+ version: '0'
43
39
  required_rubygems_version: !ruby/object:Gem::Requirement
44
40
  requirements:
45
41
  - - ">="
@@ -49,6 +45,5 @@ requirements: []
49
45
  rubygems_version: 3.0.3.1
50
46
  signing_key:
51
47
  specification_version: 4
52
- summary: Ruby client for BenchGecko - the data platform for AI model benchmarks, pricing,
53
- and agent comparison.
48
+ summary: Ruby SDK for BenchGecko AI model data platform
54
49
  test_files: []
data/CHANGELOG.md DELETED
@@ -1,9 +0,0 @@
1
- # Changelog
2
-
3
- ## 0.1.0 (2026-03-30)
4
-
5
- - Initial release
6
- - Model lookup, comparison, and cost estimation
7
- - Built-in catalog: GPT-4o, Claude 3.5 Sonnet, Gemini 2.0 Flash, Llama 3.1 405B, Mistral Large, DeepSeek V3
8
- - Benchmark categories: reasoning, coding, math, instruction, safety, multimodal, multilingual, long context
9
- - Top models filtering and cheapest-above-threshold finder
data/LICENSE.txt DELETED
@@ -1,21 +0,0 @@
1
- MIT License
2
-
3
- Copyright (c) 2026 BenchGecko
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
data/README.md DELETED
@@ -1,120 +0,0 @@
1
- # BenchGecko for Ruby
2
-
3
- **The CoinGecko for AI.** Ruby client for accessing AI model benchmarks, comparing language models, estimating inference costs, and discovering AI agents.
4
-
5
- BenchGecko tracks 300+ AI models across 50+ providers with real benchmark scores, latency metrics, and transparent pricing. This gem gives you structured access to that data directly in your Ruby applications -- no API key required for the built-in catalog.
6
-
7
- ## Installation
8
-
9
- Add to your Gemfile:
10
-
11
- ```ruby
12
- gem "benchgecko"
13
- ```
14
-
15
- Or install directly:
16
-
17
- ```bash
18
- gem install benchgecko
19
- ```
20
-
21
- ## Quick Start
22
-
23
- ```ruby
24
- require "benchgecko"
25
-
26
- # Look up any model
27
- model = BenchGecko.get_model("claude-3.5-sonnet")
28
- puts model.name #=> "Claude 3.5 Sonnet"
29
- puts model.provider #=> "Anthropic"
30
- puts model.score("MMLU") #=> 88.7
31
-
32
- # List all tracked models
33
- BenchGecko.list_models.each { |id| puts id }
34
- ```
35
-
36
- ## Comparing Models
37
-
38
- The comparison engine surfaces benchmark differences and pricing ratios, making it straightforward to evaluate tradeoffs between models:
39
-
40
- ```ruby
41
- result = BenchGecko.compare_models("gpt-4o", "claude-3.5-sonnet")
42
-
43
- puts result[:cheaper] #=> "gpt-4o"
44
- puts result[:cost_ratio] #=> 0.69
45
- puts result[:benchmark_diff] #=> {"MMLU" => 0.0, "HumanEval" => -1.8, ...}
46
-
47
- # Positive diff means model_a scores higher
48
- result[:benchmark_diff].each do |bench, diff|
49
- next unless diff
50
- winner = diff >= 0 ? "GPT-4o" : "Claude 3.5 Sonnet"
51
- puts "#{bench}: #{winner} wins by #{diff.abs} points"
52
- end
53
- ```
54
-
55
- ## Cost Estimation
56
-
57
- Estimate inference costs before committing to a provider. Prices are per million tokens:
58
-
59
- ```ruby
60
- cost = BenchGecko.estimate_cost("gpt-4o",
61
- input_tokens: 2_000_000,
62
- output_tokens: 500_000
63
- )
64
-
65
- puts cost[:input_cost] #=> 5.0
66
- puts cost[:output_cost] #=> 5.0
67
- puts cost[:total] #=> 10.0
68
- ```
69
-
70
- ## Finding the Right Model
71
-
72
- Filter models by benchmark performance to find the best fit for your workload:
73
-
74
- ```ruby
75
- # All models scoring 87+ on MMLU
76
- strong_reasoners = BenchGecko.top_models("MMLU", min_score: 87.0)
77
- strong_reasoners.each { |m| puts "#{m.name}: #{m.score('MMLU')}" }
78
-
79
- # Cheapest model above a quality threshold
80
- budget_pick = BenchGecko.cheapest_above("MMLU", 85.0)
81
- puts "#{budget_pick.name} at $#{budget_pick.cost_per_million}/M tokens"
82
- ```
83
-
84
- ## Benchmark Categories
85
-
86
- BenchGecko organizes benchmarks into categories covering reasoning, coding, math, instruction following, safety, multimodal, multilingual, and long context evaluation:
87
-
88
- ```ruby
89
- BenchGecko.benchmark_categories.each do |key, info|
90
- puts "#{info[:name]}: #{info[:benchmarks].join(', ')}"
91
- puts " #{info[:description]}"
92
- end
93
- ```
94
-
95
- ## Built-in Model Catalog
96
-
97
- The gem ships with a curated catalog of major models from OpenAI, Anthropic, Google, Meta, Mistral, and DeepSeek. Each entry includes benchmark scores, parameter counts, context window sizes, and per-token pricing.
98
-
99
- ```ruby
100
- model = BenchGecko.get_model("deepseek-v3")
101
- puts model.parameters #=> 671
102
- puts model.context_window #=> 128000
103
- puts model.cost_per_million #=> 0.685
104
- ```
105
-
106
- ## Use Cases
107
-
108
- - **Model selection pipelines** -- programmatically pick the cheapest model that meets your quality bar
109
- - **Cost monitoring** -- estimate monthly spend across different model configurations
110
- - **Benchmark dashboards** -- pull structured scores into internal reporting tools
111
- - **Agent evaluation** -- compare AI agents across capability dimensions
112
-
113
- ## Resources
114
-
115
- - [BenchGecko](https://benchgecko.ai) -- Full platform with interactive comparisons
116
- - [Source Code](https://github.com/BenchGecko/benchgecko-ruby) -- Contributions welcome
117
-
118
- ## License
119
-
120
- MIT License. See [LICENSE.txt](LICENSE.txt) for details.