active_genie 0.0.10 → 0.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +63 -57
- data/VERSION +1 -1
- data/lib/active_genie/battle/README.md +7 -7
- data/lib/active_genie/battle/basic.rb +75 -68
- data/lib/active_genie/battle.rb +4 -0
- data/lib/active_genie/clients/anthropic_client.rb +110 -0
- data/lib/active_genie/clients/google_client.rb +158 -0
- data/lib/active_genie/clients/helpers/retry.rb +29 -0
- data/lib/active_genie/clients/openai_client.rb +58 -38
- data/lib/active_genie/clients/unified_client.rb +5 -5
- data/lib/active_genie/concerns/loggable.rb +44 -0
- data/lib/active_genie/configuration/log_config.rb +1 -1
- data/lib/active_genie/configuration/providers/anthropic_config.rb +54 -0
- data/lib/active_genie/configuration/providers/base_config.rb +85 -0
- data/lib/active_genie/configuration/providers/deepseek_config.rb +54 -0
- data/lib/active_genie/configuration/providers/google_config.rb +56 -0
- data/lib/active_genie/configuration/providers/openai_config.rb +54 -0
- data/lib/active_genie/configuration/providers_config.rb +7 -4
- data/lib/active_genie/configuration/runtime_config.rb +35 -0
- data/lib/active_genie/configuration.rb +18 -4
- data/lib/active_genie/data_extractor/README.md +0 -1
- data/lib/active_genie/data_extractor/basic.rb +22 -19
- data/lib/active_genie/data_extractor/from_informal.rb +4 -15
- data/lib/active_genie/data_extractor.rb +4 -0
- data/lib/active_genie/logger.rb +60 -14
- data/lib/active_genie/{league → ranking}/README.md +7 -7
- data/lib/active_genie/ranking/elo_round.rb +134 -0
- data/lib/active_genie/ranking/free_for_all.rb +93 -0
- data/lib/active_genie/ranking/player.rb +92 -0
- data/lib/active_genie/{league → ranking}/players_collection.rb +19 -12
- data/lib/active_genie/ranking/ranking.rb +153 -0
- data/lib/active_genie/ranking/ranking_scoring.rb +71 -0
- data/lib/active_genie/ranking.rb +12 -0
- data/lib/active_genie/scoring/README.md +1 -1
- data/lib/active_genie/scoring/basic.rb +93 -49
- data/lib/active_genie/scoring/{recommended_reviews.rb → recommended_reviewers.rb} +18 -7
- data/lib/active_genie/scoring.rb +6 -3
- data/lib/active_genie.rb +1 -1
- data/lib/tasks/benchmark.rake +27 -0
- metadata +100 -100
- data/lib/active_genie/configuration/openai_config.rb +0 -56
- data/lib/active_genie/league/elo_ranking.rb +0 -121
- data/lib/active_genie/league/free_for_all.rb +0 -62
- data/lib/active_genie/league/league.rb +0 -120
- data/lib/active_genie/league/player.rb +0 -59
- data/lib/active_genie/league.rb +0 -12
@@ -0,0 +1,27 @@
|
|
1
|
+
|
2
|
+
namespace :active_genie do
|
3
|
+
desc "Run benchmarks, optionally for a specific module (e.g., rake active_genie:benchmark[data_extractor])"
|
4
|
+
task :benchmark, [:module_name] do |_, args|
|
5
|
+
Rake::TestTask.new(:run_benchmarks) do |t|
|
6
|
+
t.libs << "benchmark"
|
7
|
+
|
8
|
+
if args[:module_name]
|
9
|
+
module_name = args[:module_name]
|
10
|
+
module_path = "benchmark/test_cases/#{module_name}/"
|
11
|
+
t.test_files = FileList["#{module_path}**/*_test.rb"]
|
12
|
+
puts "Running benchmarks for module: #{module_name}"
|
13
|
+
else
|
14
|
+
t.test_files = FileList["benchmark/test_cases/**/*_test.rb"]
|
15
|
+
puts "Running all benchmarks"
|
16
|
+
end
|
17
|
+
|
18
|
+
t.warning = false
|
19
|
+
end
|
20
|
+
|
21
|
+
begin
|
22
|
+
Rake::Task[:run_benchmarks].invoke
|
23
|
+
rescue => e
|
24
|
+
puts e
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
metadata
CHANGED
@@ -1,114 +1,103 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: active_genie
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.18
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Radamés Roriz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-02
|
11
|
+
date: 2025-04-02 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
|
-
description: "# ActiveGenie \U0001F9DE♂️\n>
|
14
|
-
|
15
|
-
is a Ruby gem that provides
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
Multi-reviewer evaluation system with automatic expert selection\n- \U0001F4AD **Leaderboard**:
|
21
|
-
Consistent rank items based on custom criteria, using multiple tecniques of ranking\n\n##
|
22
|
-
Installation\n\n1. Add to your Gemfile:\n```ruby\ngem 'active_genie'\n```\n\n2.
|
13
|
+
description: "# ActiveGenie \U0001F9DE♂️\n> The lodash for GenAI, stop reinventing
|
14
|
+
the wheel\n\n[](https://badge.fury.io/rb/active_genie)\n[](https://github.com/roriz/active_genie/actions/workflows/benchmark.yml)\n\nActiveGenie
|
15
|
+
is a Ruby gem that provides valuable solutions powered by Generative AI (GenAI)
|
16
|
+
models. Just like Lodash or ActiveStorage, ActiveGenie brings a set of Modules reach
|
17
|
+
real value fast and reliable.\nActiveGenie is backed by a custom benchmarking system
|
18
|
+
that ensures consistent quality and performance across different models and providers
|
19
|
+
in every release.\n\n## Installation\n\n1. Add to your Gemfile:\n```ruby\ngem 'active_genie'\n```\n\n2.
|
23
20
|
Install the gem:\n```shell\nbundle install\n```\n\n3. Generate the configuration:\n```shell\necho
|
24
21
|
\"ActiveGenie.load_tasks\" >> Rakefile\nrails g active_genie:install\n```\n\n4.
|
25
22
|
Configure your credentials in `config/initializers/active_genie.rb`:\n```ruby\nActiveGenie.configure
|
26
23
|
do |config|\n config.openai.api_key = ENV['OPENAI_API_KEY']\nend\n```\n\n## Quick
|
27
|
-
Start\n\n### Data Extractor\nExtract structured data from text using AI-powered
|
24
|
+
Start\n\n### Data Extractor\n\nExtract structured data from text using AI-powered
|
28
25
|
analysis, handling informal language and complex expressions.\n\n```ruby\ntext =
|
29
26
|
\"Nike Air Max 90 - Size 42 - $199.99\"\nschema = {\n brand: { \n type: 'string',\n
|
30
27
|
\ enum: [\"Nike\", \"Adidas\", \"Puma\"]\n },\n price: { \n type: 'number',\n
|
31
|
-
\ minimum: 0\n },\n size: {\n type: '
|
32
|
-
46\n }\n}\n\nresult = ActiveGenie::DataExtractor.call(text
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
\
|
49
|
-
|
50
|
-
|
51
|
-
|
28
|
+
\ minimum: 0\n },\n size: {\n type: 'number',\n minimum: 35,\n maximum:
|
29
|
+
46\n }\n}\n\nresult = ActiveGenie::DataExtractor.call(\n text,\n schema,\n config:
|
30
|
+
{ provider: :openai, model: 'gpt-4o-mini' } # optional\n)\n# => { \n# brand:
|
31
|
+
\"Nike\", \n# brand_explanation: \"Brand name found at start of text\",\n#
|
32
|
+
\ price: 199.99,\n# price_explanation: \"Price found in USD format at end\",\n#
|
33
|
+
\ size: 42,\n# size_explanation: \"Size explicitly stated in the middle\"\n#
|
34
|
+
\ }\n```\n\n*Recommended model*: `gpt-4o-mini`\n\nFeatures:\n- Structured data
|
35
|
+
extraction with type validation\n- Schema-based extraction with custom constraints\n-
|
36
|
+
Informal text analysis (litotes, hedging)\n- Detailed explanations for extracted
|
37
|
+
values\n\nSee the [Data Extractor README](lib/active_genie/data_extractor/README.md)
|
38
|
+
for informal text processing, advanced schemas, and detailed interface documentation.\n\n###
|
39
|
+
Scoring\nText evaluation system that provides detailed scoring and feedback using
|
40
|
+
multiple expert reviewers. Get balanced scoring through AI-powered expert reviewers
|
41
|
+
that automatically adapt to your content.\n\n```ruby\ntext = \"The code implements
|
42
|
+
a binary search algorithm with O(log n) complexity\"\ncriteria = \"Evaluate technical
|
43
|
+
accuracy and clarity\"\n\nresult = ActiveGenie::Scoring.basic(\n text,\n criteria,\n
|
44
|
+
\ config: { provider: :anthropic, model: 'claude-3-5-haiku-20241022' } # optional\n)\n#
|
45
|
+
=> {\n# algorithm_expert_score: 95,\n# algorithm_expert_reasoning: \"Accurately
|
46
|
+
describes binary search and its complexity\",\n# technical_writer_score: 90,\n#
|
47
|
+
\ technical_writer_reasoning: \"Clear and concise explanation of the algorithm\",\n#
|
48
|
+
\ final_score: 92.5\n# }\n```\n\n*Recommended model*: `claude-3-5-haiku-20241022`\n\nFeatures:\n-
|
49
|
+
Multi-reviewer evaluation with automatic expert selection\n- Detailed feedback with
|
50
|
+
scoring reasoning\n- Customizable reviewer weights\n- Flexible evaluation criteria\n\nSee
|
51
|
+
the [Scoring README](lib/active_genie/scoring/README.md) for advanced usage, custom
|
52
|
+
reviewers, and detailed interface documentation.\n\n### Battle\nAI-powered battle
|
53
|
+
evaluation system that determines winners between two players based on specified
|
54
|
+
criteria.\n\n```ruby\nrequire 'active_genie'\n\nplayer_1 = \"Implementation uses
|
55
|
+
dependency injection for better testability\"\nplayer_2 = \"Code has high test coverage
|
56
|
+
but tightly coupled components\"\ncriteria = \"Evaluate code quality and maintainability\"\n\nresult
|
57
|
+
= ActiveGenie::Battle.call(\n player_1,\n player_2,\n criteria,\n config: {
|
58
|
+
provider: :google, model: 'gemini-2.0-flash-lite' } # optional\n)\n# => {\n# winner_player:
|
59
|
+
\"Implementation uses dependency injection for better testability\",\n# reasoning:
|
60
|
+
\"Player 1 implementation demonstrates better maintainability through dependency
|
61
|
+
injection, \n# which allows for easier testing and component replacement.
|
62
|
+
While Player 2 has good test coverage, \n# the tight coupling makes
|
63
|
+
the code harder to maintain and modify.\",\n# what_could_be_changed_to_avoid_draw:
|
64
|
+
\"Focus on specific architectural patterns and design principles\"\n# }\n```\n\n*Recommended
|
65
|
+
model*: `gemini-2.0-flash-lite`\n\nFeatures:\n- Multi-reviewer evaluation with automatic
|
66
|
+
expert selection\n- Detailed feedback with scoring reasoning\n- Customizable reviewer
|
67
|
+
weights\n- Flexible evaluation criteria\n\nSee the [Battle README](lib/active_genie/battle/README.md)
|
52
68
|
for advanced usage, custom reviewers, and detailed interface documentation.\n\n###
|
53
|
-
|
54
|
-
|
55
|
-
= \"
|
56
|
-
= \
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
demonstrates better maintainability through dependency injection, \n# which
|
61
|
-
allows for easier testing and component replacement. While Player B has good test
|
62
|
-
coverage, \n# the tight coupling makes the code harder to maintain
|
63
|
-
and modify.\",\n# what_could_be_changed_to_avoid_draw: \"Focus on specific
|
64
|
-
architectural patterns and design principles\"\n# }\n```\n\nFeatures:\n- Multi-reviewer
|
65
|
-
evaluation with automatic expert selection\n- Detailed feedback with scoring reasoning\n-
|
66
|
-
Customizable reviewer weights\n- Flexible evaluation criteria\n\nSee the [Battle
|
67
|
-
README](lib/active_genie/battle/README.md) for advanced usage, custom reviewers,
|
68
|
-
and detailed interface documentation.\n\n### League\nThe League module provides
|
69
|
-
competitive ranking through multi-stage evaluation:\n\n\n```ruby\nrequire 'active_genie'\n\nplayers
|
70
|
-
= ['REST API', 'GraphQL API', 'SOAP API', 'gRPC API', 'Websocket API']\ncriteria
|
71
|
-
= \"Best one to be used into a high changing environment\"\n\nresult = ActiveGenie::League.call(players,
|
72
|
-
criteria)\n# => {\n# winner_player: \"gRPC API\",\n# reasoning: \"gRPC
|
73
|
-
API is the best one to be used into a high changing environment\",\n# }\n```\n\n-
|
69
|
+
Ranking\nThe Ranking module provides competitive ranking through multi-stage evaluation:\n\n```ruby\nrequire
|
70
|
+
'active_genie'\n\nplayers = ['REST API', 'GraphQL API', 'SOAP API', 'gRPC API',
|
71
|
+
'Websocket API']\ncriteria = \"Best one to be used into a high changing environment\"\n\nresult
|
72
|
+
= ActiveGenie::Ranking.call(\n players,\n criteria,\n config: { provider: :google,
|
73
|
+
model: 'gemini-2.0-flash-lite' } # optional\n)\n# => {\n# winner_player: \"gRPC
|
74
|
+
API\",\n# reasoning: \"gRPC API is the best one to be used into a high changing
|
75
|
+
environment\",\n# }\n```\n\n*Recommended model*: `gemini-2.0-flash-lite`\n\n-
|
74
76
|
**Multi-phase ranking system** combining expert scoring and ELO algorithms\n- **Automatic
|
75
77
|
elimination** of inconsistent performers using statistical analysis\n- **Dynamic
|
76
78
|
ranking adjustments** based on simulated pairwise battles, from bottom to top\n\nSee
|
77
|
-
the [
|
78
|
-
configuration, and advanced ranking strategies.\n\n### Summarizer (
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
to make the sentiment analysis as accurate as possible.\n\n```ruby\nrequire 'active_genie'\n\ntext
|
100
|
-
= \"Example text to be analyzed\"\nsentiment = ActiveGenie::SentimentAnalyzer.call(text)\nputs
|
101
|
-
sentiment # => \"positive\"\n```\n\n## Configuration\n\n| Config | Description |
|
102
|
-
Default |\n|--------|-------------|---------|\n| `provider` | LLM provider (openai,
|
103
|
-
anthropic, etc) | `nil` |\n| `model` | Model to use | `nil` |\n| `api_key` | Provider
|
104
|
-
API key | `nil` |\n| `timeout` | Request timeout in seconds | `5` |\n| `max_retries`
|
105
|
-
| Maximum retry attempts | `3` |\n\n> **Note:** Each module can append its own set
|
106
|
-
of configuration, see the individual module documentation for details.\n\n## Contributing\n\n1.
|
107
|
-
Fork the repository\n2. Create your feature branch (`git checkout -b feature/amazing-feature`)\n3.
|
108
|
-
Commit your changes (`git commit -m 'Add amazing feature'`)\n4. Push to the branch
|
109
|
-
(`git push origin feature/amazing-feature`)\n5. Open a Pull Request\n## License\n\nThis
|
110
|
-
project is licensed under the MIT License - see the [LICENSE](LICENSE) file for
|
111
|
-
details.\n"
|
79
|
+
the [Ranking README](lib/active_genie/ranking/README.md) for implementation details,
|
80
|
+
configuration, and advanced ranking strategies.\n\n### Text Summarizer (Future)\n###
|
81
|
+
Categorizer (Future)\n### Language detector (Future)\n### Translator (Future)\n###
|
82
|
+
Sentiment analyzer (Future)\n\n## Benchmarking \U0001F9EA\n\nActiveGenie includes
|
83
|
+
a comprehensive benchmarking system to ensure consistent, high-quality outputs across
|
84
|
+
different LLM models and providers.\n\n```ruby\n# Run all benchmarks\nbundle exec
|
85
|
+
rake active_genie:benchmark\n\n# Run benchmarks for a specific module\nbundle exec
|
86
|
+
rake active_genie:benchmark[data_extractor]\n```\n\n### Latest Results\n\n| Model
|
87
|
+
| Overall Precision |\n|-------|-------------------|\n| claude-3-5-haiku-20241022
|
88
|
+
| 92.25% |\n| gemini-2.0-flash-lite | 84.25% |\n| gpt-4o-mini | 62.75% |\n| deepseek-chat
|
89
|
+
| 57.25% |\n\nSee the [Benchmark README](benchmark/README.md) for detailed results,
|
90
|
+
methodology, and how to contribute to our test suite.\n\n## Configuration\n\n| Config
|
91
|
+
| Description | Default |\n|--------|-------------|---------|\n| `provider` | LLM
|
92
|
+
provider (openai, anthropic, etc) | `nil` |\n| `model` | Model to use | `nil` |\n|
|
93
|
+
`api_key` | Provider API key | `nil` |\n| `timeout` | Request timeout in seconds
|
94
|
+
| `5` |\n| `max_retries` | Maximum retry attempts | `3` |\n\n> **Note:** Each module
|
95
|
+
can append its own set of configuration, see the individual module documentation
|
96
|
+
for details.\n\n## Contributing\n\n1. Fork the repository\n2. Create your feature
|
97
|
+
branch (`git checkout -b feature/amazing-feature`)\n3. Commit your changes (`git
|
98
|
+
commit -m 'Add amazing feature'`)\n4. Push to the branch (`git push origin feature/amazing-feature`)\n5.
|
99
|
+
Open a Pull Request\n\n## License\n\nThis project is licensed under the Apache License
|
100
|
+
2.0 License - see the [LICENSE](LICENSE) file for details.\n"
|
112
101
|
email:
|
113
102
|
- radames@roriz.dev
|
114
103
|
executables: []
|
@@ -122,28 +111,39 @@ files:
|
|
122
111
|
- lib/active_genie/battle.rb
|
123
112
|
- lib/active_genie/battle/README.md
|
124
113
|
- lib/active_genie/battle/basic.rb
|
114
|
+
- lib/active_genie/clients/anthropic_client.rb
|
115
|
+
- lib/active_genie/clients/google_client.rb
|
116
|
+
- lib/active_genie/clients/helpers/retry.rb
|
125
117
|
- lib/active_genie/clients/openai_client.rb
|
126
118
|
- lib/active_genie/clients/unified_client.rb
|
119
|
+
- lib/active_genie/concerns/loggable.rb
|
127
120
|
- lib/active_genie/configuration.rb
|
128
121
|
- lib/active_genie/configuration/log_config.rb
|
129
|
-
- lib/active_genie/configuration/
|
122
|
+
- lib/active_genie/configuration/providers/anthropic_config.rb
|
123
|
+
- lib/active_genie/configuration/providers/base_config.rb
|
124
|
+
- lib/active_genie/configuration/providers/deepseek_config.rb
|
125
|
+
- lib/active_genie/configuration/providers/google_config.rb
|
126
|
+
- lib/active_genie/configuration/providers/openai_config.rb
|
130
127
|
- lib/active_genie/configuration/providers_config.rb
|
128
|
+
- lib/active_genie/configuration/runtime_config.rb
|
131
129
|
- lib/active_genie/data_extractor.rb
|
132
130
|
- lib/active_genie/data_extractor/README.md
|
133
131
|
- lib/active_genie/data_extractor/basic.rb
|
134
132
|
- lib/active_genie/data_extractor/from_informal.rb
|
135
|
-
- lib/active_genie/league.rb
|
136
|
-
- lib/active_genie/league/README.md
|
137
|
-
- lib/active_genie/league/elo_ranking.rb
|
138
|
-
- lib/active_genie/league/free_for_all.rb
|
139
|
-
- lib/active_genie/league/league.rb
|
140
|
-
- lib/active_genie/league/player.rb
|
141
|
-
- lib/active_genie/league/players_collection.rb
|
142
133
|
- lib/active_genie/logger.rb
|
134
|
+
- lib/active_genie/ranking.rb
|
135
|
+
- lib/active_genie/ranking/README.md
|
136
|
+
- lib/active_genie/ranking/elo_round.rb
|
137
|
+
- lib/active_genie/ranking/free_for_all.rb
|
138
|
+
- lib/active_genie/ranking/player.rb
|
139
|
+
- lib/active_genie/ranking/players_collection.rb
|
140
|
+
- lib/active_genie/ranking/ranking.rb
|
141
|
+
- lib/active_genie/ranking/ranking_scoring.rb
|
143
142
|
- lib/active_genie/scoring.rb
|
144
143
|
- lib/active_genie/scoring/README.md
|
145
144
|
- lib/active_genie/scoring/basic.rb
|
146
|
-
- lib/active_genie/scoring/
|
145
|
+
- lib/active_genie/scoring/recommended_reviewers.rb
|
146
|
+
- lib/tasks/benchmark.rake
|
147
147
|
- lib/tasks/install.rake
|
148
148
|
- lib/tasks/templates/active_genie.rb
|
149
149
|
homepage: https://github.com/Roriz/active_genie
|
@@ -163,7 +163,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
163
163
|
requirements:
|
164
164
|
- - ">="
|
165
165
|
- !ruby/object:Gem::Version
|
166
|
-
version:
|
166
|
+
version: 3.0.0
|
167
167
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
168
168
|
requirements:
|
169
169
|
- - ">="
|
@@ -1,56 +0,0 @@
|
|
1
|
-
require_relative '../clients/openai_client'
|
2
|
-
|
3
|
-
module ActiveGenie::Configuration
|
4
|
-
class OpenaiConfig
|
5
|
-
attr_writer :api_key, :organization, :api_url, :client,
|
6
|
-
:lower_tier_model, :middle_tier_model, :upper_tier_model
|
7
|
-
|
8
|
-
def api_key
|
9
|
-
@api_key || ENV['OPENAI_API_KEY']
|
10
|
-
end
|
11
|
-
|
12
|
-
def organization
|
13
|
-
@organization || ENV['OPENAI_ORGANIZATION']
|
14
|
-
end
|
15
|
-
|
16
|
-
def lower_tier_model
|
17
|
-
@lower_tier_model || 'gpt-4o-mini'
|
18
|
-
end
|
19
|
-
|
20
|
-
def middle_tier_model
|
21
|
-
@middle_tier_model || 'gpt-4o'
|
22
|
-
end
|
23
|
-
|
24
|
-
def upper_tier_model
|
25
|
-
@upper_tier_model || 'o1-preview'
|
26
|
-
end
|
27
|
-
|
28
|
-
def tier_to_model(tier)
|
29
|
-
{
|
30
|
-
lower_tier: lower_tier_model,
|
31
|
-
middle_tier: middle_tier_model,
|
32
|
-
upper_tier: upper_tier_model
|
33
|
-
}[tier&.to_sym]
|
34
|
-
end
|
35
|
-
|
36
|
-
def api_url
|
37
|
-
@api_url || 'https://api.openai.com/v1'
|
38
|
-
end
|
39
|
-
|
40
|
-
def client
|
41
|
-
@client ||= ::ActiveGenie::Clients::OpenaiClient.new(self)
|
42
|
-
end
|
43
|
-
|
44
|
-
def to_h(config = {})
|
45
|
-
{
|
46
|
-
api_key:,
|
47
|
-
organization:,
|
48
|
-
api_url:,
|
49
|
-
lower_tier_model:,
|
50
|
-
middle_tier_model:,
|
51
|
-
upper_tier_model:,
|
52
|
-
**config
|
53
|
-
}
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
@@ -1,121 +0,0 @@
|
|
1
|
-
require_relative '../battle/basic'
|
2
|
-
|
3
|
-
module ActiveGenie::Leaderboard
|
4
|
-
class EloRanking
|
5
|
-
def self.call(players, criteria, config: {})
|
6
|
-
new(players, criteria, config:).call
|
7
|
-
end
|
8
|
-
|
9
|
-
def initialize(players, criteria, config: {})
|
10
|
-
@players = players
|
11
|
-
@criteria = criteria
|
12
|
-
@config = config
|
13
|
-
@start_time = Time.now
|
14
|
-
end
|
15
|
-
|
16
|
-
def call
|
17
|
-
@players.each(&:generate_elo_by_score)
|
18
|
-
|
19
|
-
round_count = 0
|
20
|
-
while @players.eligible_size > MINIMAL_PLAYERS_TO_BATTLE
|
21
|
-
round = create_round(@players.tier_relegation, @players.tier_defense)
|
22
|
-
|
23
|
-
round.each do |player_a, player_b|
|
24
|
-
winner, loser = battle(player_a, player_b) # This can take a while, can be parallelized
|
25
|
-
update_elo(winner, loser)
|
26
|
-
ActiveGenie::Logger.trace({ **log, step: :elo_battle, winner_id: winner.id, loser_id: loser.id, winner_elo: winner.elo, loser_elo: loser.elo })
|
27
|
-
end
|
28
|
-
|
29
|
-
eliminate_all_relegation_players
|
30
|
-
round_count += 1
|
31
|
-
end
|
32
|
-
|
33
|
-
ActiveGenie::Logger.info({ **log, step: :elo_end, round_count:, eligible_size: @players.eligible_size })
|
34
|
-
@players
|
35
|
-
end
|
36
|
-
|
37
|
-
private
|
38
|
-
|
39
|
-
MATCHS_PER_PLAYER = 3
|
40
|
-
LOSE_PENALTY = 15
|
41
|
-
MINIMAL_PLAYERS_TO_BATTLE = 10
|
42
|
-
K = 32
|
43
|
-
|
44
|
-
# Create a round of matches
|
45
|
-
# each round is exactly 1 regation player vs 3 defense players for all regation players
|
46
|
-
# each match is unique (player vs player)
|
47
|
-
# each defense player is battle exactly 3 times
|
48
|
-
def create_round(relegation_players, defense_players)
|
49
|
-
matches = []
|
50
|
-
|
51
|
-
relegation_players.each do |player_a|
|
52
|
-
player_enemies = []
|
53
|
-
MATCHS_PER_PLAYER.times do
|
54
|
-
defender = nil
|
55
|
-
while defender.nil? || player_enemies.include?(defender.id)
|
56
|
-
defender = defense_players.sample
|
57
|
-
end
|
58
|
-
|
59
|
-
matches << [player_a, defender].shuffle
|
60
|
-
player_enemies << defender.id
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
matches
|
65
|
-
end
|
66
|
-
|
67
|
-
def battle(player_a, player_b)
|
68
|
-
ActiveGenie::Battle.basic(
|
69
|
-
player_a,
|
70
|
-
player_b,
|
71
|
-
@criteria,
|
72
|
-
config:
|
73
|
-
).values_at('winner', 'loser')
|
74
|
-
end
|
75
|
-
|
76
|
-
def update_elo(winner, loser)
|
77
|
-
return if winner.nil? || loser.nil?
|
78
|
-
|
79
|
-
new_winner_elo, new_loser_elo = calculate_new_elo(winner.elo, loser.elo)
|
80
|
-
|
81
|
-
winner.elo = [new_winner_elo, max_defense_elo].min
|
82
|
-
loser.elo = [new_loser_elo - LOSE_PENALTY, min_relegation_elo].max
|
83
|
-
end
|
84
|
-
|
85
|
-
def max_defense_elo
|
86
|
-
@players.tier_defense.max_by(&:elo).elo
|
87
|
-
end
|
88
|
-
|
89
|
-
def min_relegation_elo
|
90
|
-
@players.tier_relegation.min_by(&:elo).elo
|
91
|
-
end
|
92
|
-
|
93
|
-
# Read more about the formula on https://en.wikipedia.org/wiki/Elo_rating_system
|
94
|
-
def calculate_new_elo(winner_elo, loser_elo)
|
95
|
-
expected_score_a = 1 / (1 + 10**((loser_elo - winner_elo) / 400))
|
96
|
-
expected_score_b = 1 - expected_score_a
|
97
|
-
|
98
|
-
new_elo_winner = winner_elo + K * (1 - expected_score_a)
|
99
|
-
new_elo_loser = loser_elo + K * (1 - expected_score_b)
|
100
|
-
|
101
|
-
[new_elo_winner, new_elo_loser]
|
102
|
-
end
|
103
|
-
|
104
|
-
def eliminate_all_relegation_players
|
105
|
-
eliminations = @players.tier_relegation.size
|
106
|
-
@players.tier_relegation.each { |player| player.eliminated = 'tier_relegation' }
|
107
|
-
ActiveGenie::Logger.trace({ **log, step: :elo_round, eligible_size: @players.eligible_size, eliminations: })
|
108
|
-
end
|
109
|
-
|
110
|
-
def config
|
111
|
-
{ **@config }
|
112
|
-
end
|
113
|
-
|
114
|
-
def log
|
115
|
-
{
|
116
|
-
**(@config.dig(:log) || {}),
|
117
|
-
duration: Time.now - @start_time
|
118
|
-
}
|
119
|
-
end
|
120
|
-
end
|
121
|
-
end
|
@@ -1,62 +0,0 @@
|
|
1
|
-
require_relative '../battle/basic'
|
2
|
-
|
3
|
-
module ActiveGenie::Leaderboard
|
4
|
-
class FreeForAll
|
5
|
-
def self.call(players, criteria, config: {})
|
6
|
-
new(players, criteria, config:).call
|
7
|
-
end
|
8
|
-
|
9
|
-
def initialize(players, criteria, config: {})
|
10
|
-
@players = players
|
11
|
-
@criteria = criteria
|
12
|
-
@config = config
|
13
|
-
@start_time = Time.now
|
14
|
-
end
|
15
|
-
|
16
|
-
def call
|
17
|
-
matches.each do |player_a, player_b|
|
18
|
-
winner, loser = battle(player_a, player_b)
|
19
|
-
|
20
|
-
if winner.nil? || loser.nil?
|
21
|
-
player_a.free_for_all[:draw] += 1
|
22
|
-
player_b.free_for_all[:draw] += 1
|
23
|
-
else
|
24
|
-
winner.free_for_all[:win] += 1
|
25
|
-
loser.free_for_all[:lose] += 1
|
26
|
-
end
|
27
|
-
|
28
|
-
ActiveGenie::Logger.trace({**log, step: :free_for_all_battle, winner_id: winner&.id, player_a_id: player_a.id, player_a_free_for_all_score: player_a.free_for_all_score, player_b_id: player_b.id, player_b_free_for_all_score: player_b.free_for_all_score })
|
29
|
-
end
|
30
|
-
|
31
|
-
@players
|
32
|
-
end
|
33
|
-
|
34
|
-
private
|
35
|
-
|
36
|
-
# TODO: reduce the number of matches based on transitivity.
|
37
|
-
# For example, if A is better than B, and B is better than C, then A should clearly be better than C
|
38
|
-
def matches
|
39
|
-
@players.eligible.combination(2).to_a
|
40
|
-
end
|
41
|
-
|
42
|
-
def battle(player_a, player_b)
|
43
|
-
result = ActiveGenie::Battle.basic(
|
44
|
-
player_a,
|
45
|
-
player_b,
|
46
|
-
@criteria,
|
47
|
-
config:
|
48
|
-
)
|
49
|
-
|
50
|
-
|
51
|
-
result.values_at('winner', 'loser')
|
52
|
-
end
|
53
|
-
|
54
|
-
def config
|
55
|
-
{ **@config }
|
56
|
-
end
|
57
|
-
|
58
|
-
def log
|
59
|
-
{ **(@config.dig(:log) || {}), duration: Time.now - @start_time }
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
@@ -1,120 +0,0 @@
|
|
1
|
-
require 'securerandom'
|
2
|
-
|
3
|
-
require_relative './players_collection'
|
4
|
-
require_relative './free_for_all'
|
5
|
-
require_relative './elo_ranking'
|
6
|
-
require_relative '../scoring/recommended_reviews'
|
7
|
-
|
8
|
-
# This class orchestrates player ranking through multiple evaluation stages
|
9
|
-
# using Elo ranking and free-for-all match simulations.
|
10
|
-
# 1. Sets initial scores
|
11
|
-
# 2. Eliminates low performers
|
12
|
-
# 3. Runs Elo ranking (for large groups)
|
13
|
-
# 4. Conducts free-for-all matches
|
14
|
-
#
|
15
|
-
# @example Basic usage
|
16
|
-
# League.call(players, criteria)
|
17
|
-
#
|
18
|
-
# @param param_players [Array] Collection of player objects to evaluate
|
19
|
-
# Example: ["Circle", "Triangle", "Square"]
|
20
|
-
# or
|
21
|
-
# [
|
22
|
-
# { content: "Circle", score: 10 },
|
23
|
-
# { content: "Triangle", score: 7 },
|
24
|
-
# { content: "Square", score: 5 }
|
25
|
-
# ]
|
26
|
-
# @param criteria [String] Evaluation criteria configuration
|
27
|
-
# Example: "What is more similar to the letter 'O'?"
|
28
|
-
# @param config [Hash] Additional configuration config
|
29
|
-
# Example: { model: "gpt-4o", api_key: ENV['OPENAI_API_KEY'] }
|
30
|
-
# @return [Hash] Final ranked player results
|
31
|
-
module ActiveGenie::League
|
32
|
-
class League
|
33
|
-
def self.call(param_players, criteria, config: {})
|
34
|
-
new(param_players, criteria, config:).call
|
35
|
-
end
|
36
|
-
|
37
|
-
def initialize(param_players, criteria, config: {})
|
38
|
-
@param_players = param_players
|
39
|
-
@criteria = criteria
|
40
|
-
@config = config
|
41
|
-
@league_id = SecureRandom.uuid
|
42
|
-
@start_time = Time.now
|
43
|
-
end
|
44
|
-
|
45
|
-
def call
|
46
|
-
set_initial_score_players
|
47
|
-
eliminate_obvious_bad_players
|
48
|
-
run_elo_ranking if players.eligible_size > 10
|
49
|
-
run_free_for_all
|
50
|
-
|
51
|
-
ActiveGenie::Logger.info({ **log, step: :league_end, top5: players.first(5).map(&:id) })
|
52
|
-
players.to_h
|
53
|
-
end
|
54
|
-
|
55
|
-
private
|
56
|
-
|
57
|
-
SCORE_VARIATION_THRESHOLD = 10
|
58
|
-
|
59
|
-
def set_initial_score_players
|
60
|
-
players_without_score = players.reject { |player| player.score }
|
61
|
-
players_without_score.each do |player|
|
62
|
-
player.score = generate_score(player.content) # This can take a while, can be parallelized
|
63
|
-
ActiveGenie::Logger.trace({ **log, step: :player_score, player_id: player.id, score: player.score })
|
64
|
-
end
|
65
|
-
|
66
|
-
ActiveGenie::Logger.info({ **log, step: :initial_score, evaluated_players: players_without_score.size })
|
67
|
-
end
|
68
|
-
|
69
|
-
def generate_score(content)
|
70
|
-
ActiveGenie::Scoring::Basic.call(content, @criteria, reviewers, config:)['final_score']
|
71
|
-
end
|
72
|
-
|
73
|
-
def eliminate_obvious_bad_players
|
74
|
-
eliminated_count = 0
|
75
|
-
while players.coefficient_of_variation >= SCORE_VARIATION_THRESHOLD
|
76
|
-
players.eligible.last.eliminated = 'variation_too_high'
|
77
|
-
eliminated_count += 1
|
78
|
-
end
|
79
|
-
|
80
|
-
ActiveGenie::Logger.info({ **log, step: :eliminate_obvious_bad_players, eliminated_count: })
|
81
|
-
end
|
82
|
-
|
83
|
-
def run_elo_ranking
|
84
|
-
EloRanking.call(players, @criteria, config:)
|
85
|
-
end
|
86
|
-
|
87
|
-
def run_free_for_all
|
88
|
-
FreeForAll.call(players, @criteria, config:)
|
89
|
-
end
|
90
|
-
|
91
|
-
def reviewers
|
92
|
-
[recommended_reviews['reviewer1'], recommended_reviews['reviewer2'], recommended_reviews['reviewer3']]
|
93
|
-
end
|
94
|
-
|
95
|
-
def recommended_reviews
|
96
|
-
@recommended_reviews ||= ActiveGenie::Scoring::RecommendedReviews.call(
|
97
|
-
[players.sample.content, players.sample.content].join("\n\n"),
|
98
|
-
@criteria,
|
99
|
-
config:
|
100
|
-
)
|
101
|
-
end
|
102
|
-
|
103
|
-
def players
|
104
|
-
@players ||= PlayersCollection.new(@param_players)
|
105
|
-
end
|
106
|
-
|
107
|
-
def config
|
108
|
-
{ log:, **@config }
|
109
|
-
end
|
110
|
-
|
111
|
-
def log
|
112
|
-
{
|
113
|
-
**(@config.dig(:log) || {}),
|
114
|
-
league_id: @league_id,
|
115
|
-
league_start_time: @start_time,
|
116
|
-
duration: Time.now - @start_time
|
117
|
-
}
|
118
|
-
end
|
119
|
-
end
|
120
|
-
end
|