active_genie 0.0.10 → 0.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +63 -57
  3. data/VERSION +1 -1
  4. data/lib/active_genie/battle/README.md +7 -7
  5. data/lib/active_genie/battle/basic.rb +75 -68
  6. data/lib/active_genie/battle.rb +4 -0
  7. data/lib/active_genie/clients/anthropic_client.rb +110 -0
  8. data/lib/active_genie/clients/google_client.rb +158 -0
  9. data/lib/active_genie/clients/helpers/retry.rb +29 -0
  10. data/lib/active_genie/clients/openai_client.rb +58 -38
  11. data/lib/active_genie/clients/unified_client.rb +5 -5
  12. data/lib/active_genie/concerns/loggable.rb +44 -0
  13. data/lib/active_genie/configuration/log_config.rb +1 -1
  14. data/lib/active_genie/configuration/providers/anthropic_config.rb +54 -0
  15. data/lib/active_genie/configuration/providers/base_config.rb +85 -0
  16. data/lib/active_genie/configuration/providers/deepseek_config.rb +54 -0
  17. data/lib/active_genie/configuration/providers/google_config.rb +56 -0
  18. data/lib/active_genie/configuration/providers/openai_config.rb +54 -0
  19. data/lib/active_genie/configuration/providers_config.rb +7 -4
  20. data/lib/active_genie/configuration/runtime_config.rb +35 -0
  21. data/lib/active_genie/configuration.rb +18 -4
  22. data/lib/active_genie/data_extractor/README.md +0 -1
  23. data/lib/active_genie/data_extractor/basic.rb +22 -19
  24. data/lib/active_genie/data_extractor/from_informal.rb +4 -15
  25. data/lib/active_genie/data_extractor.rb +4 -0
  26. data/lib/active_genie/logger.rb +60 -14
  27. data/lib/active_genie/{league → ranking}/README.md +7 -7
  28. data/lib/active_genie/ranking/elo_round.rb +134 -0
  29. data/lib/active_genie/ranking/free_for_all.rb +93 -0
  30. data/lib/active_genie/ranking/player.rb +92 -0
  31. data/lib/active_genie/{league → ranking}/players_collection.rb +19 -12
  32. data/lib/active_genie/ranking/ranking.rb +153 -0
  33. data/lib/active_genie/ranking/ranking_scoring.rb +71 -0
  34. data/lib/active_genie/ranking.rb +12 -0
  35. data/lib/active_genie/scoring/README.md +1 -1
  36. data/lib/active_genie/scoring/basic.rb +93 -49
  37. data/lib/active_genie/scoring/{recommended_reviews.rb → recommended_reviewers.rb} +18 -7
  38. data/lib/active_genie/scoring.rb +6 -3
  39. data/lib/active_genie.rb +1 -1
  40. data/lib/tasks/benchmark.rake +27 -0
  41. metadata +100 -100
  42. data/lib/active_genie/configuration/openai_config.rb +0 -56
  43. data/lib/active_genie/league/elo_ranking.rb +0 -121
  44. data/lib/active_genie/league/free_for_all.rb +0 -62
  45. data/lib/active_genie/league/league.rb +0 -120
  46. data/lib/active_genie/league/player.rb +0 -59
  47. data/lib/active_genie/league.rb +0 -12
@@ -0,0 +1,27 @@
1
+
2
+ namespace :active_genie do
3
+ desc "Run benchmarks, optionally for a specific module (e.g., rake active_genie:benchmark[data_extractor])"
4
+ task :benchmark, [:module_name] do |_, args|
5
+ Rake::TestTask.new(:run_benchmarks) do |t|
6
+ t.libs << "benchmark"
7
+
8
+ if args[:module_name]
9
+ module_name = args[:module_name]
10
+ module_path = "benchmark/test_cases/#{module_name}/"
11
+ t.test_files = FileList["#{module_path}**/*_test.rb"]
12
+ puts "Running benchmarks for module: #{module_name}"
13
+ else
14
+ t.test_files = FileList["benchmark/test_cases/**/*_test.rb"]
15
+ puts "Running all benchmarks"
16
+ end
17
+
18
+ t.warning = false
19
+ end
20
+
21
+ begin
22
+ Rake::Task[:run_benchmarks].invoke
23
+ rescue => e
24
+ puts e
25
+ end
26
+ end
27
+ end
metadata CHANGED
@@ -1,114 +1,103 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: active_genie
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.10
4
+ version: 0.0.18
5
5
  platform: ruby
6
6
  authors:
7
7
  - Radamés Roriz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-02-10 00:00:00.000000000 Z
11
+ date: 2025-04-02 00:00:00.000000000 Z
12
12
  dependencies: []
13
- description: "# ActiveGenie \U0001F9DE‍♂️\n> Transform your Ruby application with
14
- powerful, production-ready GenAI features\n\n[![Gem Version](https://badge.fury.io/rb/active_genie.svg?icon=si%3Arubygems)](https://badge.fury.io/rb/active_genie)\n[![Ruby](https://github.com/roriz/active_genie/actions/workflows/ruby.yml/badge.svg)](https://github.com/roriz/active_genie/actions/workflows/ruby.yml)\n\nActiveGenie
15
- is a Ruby gem that provides a polished, production-ready interface for working with
16
- Generative AI (GenAI) models. Just like ActiveStorage simplifies file handling in
17
- Rails, ActiveGenie makes it effortless to integrate GenAI capabilities into your
18
- Ruby applications.\n\n## Features\n\n- \U0001F3AF **Data Extraction**: Extract structured
19
- data from unstructured text with type validation\n- \U0001F4CA **Smart Scoring**:
20
- Multi-reviewer evaluation system with automatic expert selection\n- \U0001F4AD **Leaderboard**:
21
- Consistent rank items based on custom criteria, using multiple tecniques of ranking\n\n##
22
- Installation\n\n1. Add to your Gemfile:\n```ruby\ngem 'active_genie'\n```\n\n2.
13
+ description: "# ActiveGenie \U0001F9DE‍♂️\n> The lodash for GenAI, stop reinventing
14
+ the wheel\n\n[![Gem Version](https://badge.fury.io/rb/active_genie.svg?icon=si%3Arubygems)](https://badge.fury.io/rb/active_genie)\n[![Ruby](https://github.com/roriz/active_genie/actions/workflows/benchmark.yml/badge.svg)](https://github.com/roriz/active_genie/actions/workflows/benchmark.yml)\n\nActiveGenie
15
+ is a Ruby gem that provides valuable solutions powered by Generative AI (GenAI)
16
+ models. Just like Lodash or ActiveStorage, ActiveGenie brings a set of Modules reach
17
+ real value fast and reliable.\nActiveGenie is backed by a custom benchmarking system
18
+ that ensures consistent quality and performance across different models and providers
19
+ in every release.\n\n## Installation\n\n1. Add to your Gemfile:\n```ruby\ngem 'active_genie'\n```\n\n2.
23
20
  Install the gem:\n```shell\nbundle install\n```\n\n3. Generate the configuration:\n```shell\necho
24
21
  \"ActiveGenie.load_tasks\" >> Rakefile\nrails g active_genie:install\n```\n\n4.
25
22
  Configure your credentials in `config/initializers/active_genie.rb`:\n```ruby\nActiveGenie.configure
26
23
  do |config|\n config.openai.api_key = ENV['OPENAI_API_KEY']\nend\n```\n\n## Quick
27
- Start\n\n### Data Extractor\nExtract structured data from text using AI-powered
24
+ Start\n\n### Data Extractor\n\nExtract structured data from text using AI-powered
28
25
  analysis, handling informal language and complex expressions.\n\n```ruby\ntext =
29
26
  \"Nike Air Max 90 - Size 42 - $199.99\"\nschema = {\n brand: { \n type: 'string',\n
30
27
  \ enum: [\"Nike\", \"Adidas\", \"Puma\"]\n },\n price: { \n type: 'number',\n
31
- \ minimum: 0\n },\n size: {\n type: 'integer',\n minimum: 35,\n maximum:
32
- 46\n }\n}\n\nresult = ActiveGenie::DataExtractor.call(text, schema)\n# => { \n#
33
- \ brand: \"Nike\", \n# brand_explanation: \"Brand name found at start of
34
- text\",\n# price: 199.99,\n# price_explanation: \"Price found in USD format
35
- at end\",\n# size: 42,\n# size_explanation: \"Size explicitly stated in
36
- the middle\"\n# }\n```\n\nFeatures:\n- Structured data extraction with type validation\n-
37
- Schema-based extraction with custom constraints\n- Informal text analysis (litotes,
38
- hedging)\n- Detailed explanations for extracted values\n\nSee the [Data Extractor
39
- README](lib/active_genie/data_extractor/README.md) for informal text processing,
40
- advanced schemas, and detailed interface documentation.\n\n### Scoring\nText evaluation
41
- system that provides detailed scoring and feedback using multiple expert reviewers.
42
- Get balanced scoring through AI-powered expert reviewers that automatically adapt
43
- to your content.\n\n```ruby\ntext = \"The code implements a binary search algorithm
44
- with O(log n) complexity\"\ncriteria = \"Evaluate technical accuracy and clarity\"\n\nresult
45
- = ActiveGenie::Scoring.basic(text, criteria)\n# => {\n# algorithm_expert_score:
46
- 95,\n# algorithm_expert_reasoning: \"Accurately describes binary search and
47
- its complexity\",\n# technical_writer_score: 90,\n# technical_writer_reasoning:
48
- \"Clear and concise explanation of the algorithm\",\n# final_score: 92.5\n#
49
- \ }\n```\n\nFeatures:\n- Multi-reviewer evaluation with automatic expert selection\n-
50
- Detailed feedback with scoring reasoning\n- Customizable reviewer weights\n- Flexible
51
- evaluation criteria\n\nSee the [Scoring README](lib/active_genie/scoring/README.md)
28
+ \ minimum: 0\n },\n size: {\n type: 'number',\n minimum: 35,\n maximum:
29
+ 46\n }\n}\n\nresult = ActiveGenie::DataExtractor.call(\n text,\n schema,\n config:
30
+ { provider: :openai, model: 'gpt-4o-mini' } # optional\n)\n# => { \n# brand:
31
+ \"Nike\", \n# brand_explanation: \"Brand name found at start of text\",\n#
32
+ \ price: 199.99,\n# price_explanation: \"Price found in USD format at end\",\n#
33
+ \ size: 42,\n# size_explanation: \"Size explicitly stated in the middle\"\n#
34
+ \ }\n```\n\n*Recommended model*: `gpt-4o-mini`\n\nFeatures:\n- Structured data
35
+ extraction with type validation\n- Schema-based extraction with custom constraints\n-
36
+ Informal text analysis (litotes, hedging)\n- Detailed explanations for extracted
37
+ values\n\nSee the [Data Extractor README](lib/active_genie/data_extractor/README.md)
38
+ for informal text processing, advanced schemas, and detailed interface documentation.\n\n###
39
+ Scoring\nText evaluation system that provides detailed scoring and feedback using
40
+ multiple expert reviewers. Get balanced scoring through AI-powered expert reviewers
41
+ that automatically adapt to your content.\n\n```ruby\ntext = \"The code implements
42
+ a binary search algorithm with O(log n) complexity\"\ncriteria = \"Evaluate technical
43
+ accuracy and clarity\"\n\nresult = ActiveGenie::Scoring.basic(\n text,\n criteria,\n
44
+ \ config: { provider: :anthropic, model: 'claude-3-5-haiku-20241022' } # optional\n)\n#
45
+ => {\n# algorithm_expert_score: 95,\n# algorithm_expert_reasoning: \"Accurately
46
+ describes binary search and its complexity\",\n# technical_writer_score: 90,\n#
47
+ \ technical_writer_reasoning: \"Clear and concise explanation of the algorithm\",\n#
48
+ \ final_score: 92.5\n# }\n```\n\n*Recommended model*: `claude-3-5-haiku-20241022`\n\nFeatures:\n-
49
+ Multi-reviewer evaluation with automatic expert selection\n- Detailed feedback with
50
+ scoring reasoning\n- Customizable reviewer weights\n- Flexible evaluation criteria\n\nSee
51
+ the [Scoring README](lib/active_genie/scoring/README.md) for advanced usage, custom
52
+ reviewers, and detailed interface documentation.\n\n### Battle\nAI-powered battle
53
+ evaluation system that determines winners between two players based on specified
54
+ criteria.\n\n```ruby\nrequire 'active_genie'\n\nplayer_1 = \"Implementation uses
55
+ dependency injection for better testability\"\nplayer_2 = \"Code has high test coverage
56
+ but tightly coupled components\"\ncriteria = \"Evaluate code quality and maintainability\"\n\nresult
57
+ = ActiveGenie::Battle.call(\n player_1,\n player_2,\n criteria,\n config: {
58
+ provider: :google, model: 'gemini-2.0-flash-lite' } # optional\n)\n# => {\n# winner_player:
59
+ \"Implementation uses dependency injection for better testability\",\n# reasoning:
60
+ \"Player 1 implementation demonstrates better maintainability through dependency
61
+ injection, \n# which allows for easier testing and component replacement.
62
+ While Player 2 has good test coverage, \n# the tight coupling makes
63
+ the code harder to maintain and modify.\",\n# what_could_be_changed_to_avoid_draw:
64
+ \"Focus on specific architectural patterns and design principles\"\n# }\n```\n\n*Recommended
65
+ model*: `gemini-2.0-flash-lite`\n\nFeatures:\n- Multi-reviewer evaluation with automatic
66
+ expert selection\n- Detailed feedback with scoring reasoning\n- Customizable reviewer
67
+ weights\n- Flexible evaluation criteria\n\nSee the [Battle README](lib/active_genie/battle/README.md)
52
68
  for advanced usage, custom reviewers, and detailed interface documentation.\n\n###
53
- Battle\nAI-powered battle evaluation system that determines winners between two
54
- players based on specified criteria.\n\n```ruby\nrequire 'active_genie'\n\nplayer_a
55
- = \"Implementation uses dependency injection for better testability\"\nplayer_b
56
- = \"Code has high test coverage but tightly coupled components\"\ncriteria = \"Evaluate
57
- code quality and maintainability\"\n\nresult = ActiveGenie::Battle.call(player_a,
58
- player_b, criteria)\n# => {\n# winner_player: \"Implementation uses dependency
59
- injection for better testability\",\n# reasoning: \"Player A's implementation
60
- demonstrates better maintainability through dependency injection, \n# which
61
- allows for easier testing and component replacement. While Player B has good test
62
- coverage, \n# the tight coupling makes the code harder to maintain
63
- and modify.\",\n# what_could_be_changed_to_avoid_draw: \"Focus on specific
64
- architectural patterns and design principles\"\n# }\n```\n\nFeatures:\n- Multi-reviewer
65
- evaluation with automatic expert selection\n- Detailed feedback with scoring reasoning\n-
66
- Customizable reviewer weights\n- Flexible evaluation criteria\n\nSee the [Battle
67
- README](lib/active_genie/battle/README.md) for advanced usage, custom reviewers,
68
- and detailed interface documentation.\n\n### League\nThe League module provides
69
- competitive ranking through multi-stage evaluation:\n\n\n```ruby\nrequire 'active_genie'\n\nplayers
70
- = ['REST API', 'GraphQL API', 'SOAP API', 'gRPC API', 'Websocket API']\ncriteria
71
- = \"Best one to be used into a high changing environment\"\n\nresult = ActiveGenie::League.call(players,
72
- criteria)\n# => {\n# winner_player: \"gRPC API\",\n# reasoning: \"gRPC
73
- API is the best one to be used into a high changing environment\",\n# }\n```\n\n-
69
+ Ranking\nThe Ranking module provides competitive ranking through multi-stage evaluation:\n\n```ruby\nrequire
70
+ 'active_genie'\n\nplayers = ['REST API', 'GraphQL API', 'SOAP API', 'gRPC API',
71
+ 'Websocket API']\ncriteria = \"Best one to be used into a high changing environment\"\n\nresult
72
+ = ActiveGenie::Ranking.call(\n players,\n criteria,\n config: { provider: :google,
73
+ model: 'gemini-2.0-flash-lite' } # optional\n)\n# => {\n# winner_player: \"gRPC
74
+ API\",\n# reasoning: \"gRPC API is the best one to be used into a high changing
75
+ environment\",\n# }\n```\n\n*Recommended model*: `gemini-2.0-flash-lite`\n\n-
74
76
  **Multi-phase ranking system** combining expert scoring and ELO algorithms\n- **Automatic
75
77
  elimination** of inconsistent performers using statistical analysis\n- **Dynamic
76
78
  ranking adjustments** based on simulated pairwise battles, from bottom to top\n\nSee
77
- the [League README](lib/active_genie/league/README.md) for implementation details,
78
- configuration, and advanced ranking strategies.\n\n### Summarizer (WIP)\nThe summarizer
79
- is a tool that can be used to summarize a given text. It uses a set of rules to
80
- summarize the text out of the box. Uses the best practices of prompt engineering
81
- and engineering to make the summarization as accurate as possible.\n\n```ruby\nrequire
82
- 'active_genie'\n\ntext = \"Example text to be summarized. The fox jumps over the
83
- dog\"\nsummarized_text = ActiveGenie::Summarizer.call(text)\nputs summarized_text
84
- # => \"The fox jumps over the dog\"\n```\n\n### Language detector (WIP)\nThe language
85
- detector is a tool that can be used to detect the language of a given text. It uses
86
- a set of rules to detect the language of the text out of the box. Uses the best
87
- practices of prompt engineering and engineering to make the language detection as
88
- accurate as possible.\n\n```ruby\nrequire 'active_genie'\n\ntext = \"Example text
89
- to be detected\"\nlanguage = ActiveGenie::LanguageDetector.call(text)\nputs language
90
- # => \"en\"\n```\n\n### Translator (WIP)\nThe translator is a tool that can be used
91
- to translate a given text. It uses a set of rules to translate the text out of the
92
- box. Uses the best practices of prompt engineering and engineering to make the translation
93
- as accurate as possible.\n\n```ruby\nrequire 'active_genie'\n\ntext = \"Example
94
- text to be translated\"\ntranslated_text = ActiveGenie::Translator.call(text, from:
95
- 'en', to: 'pt')\nputs translated_text # => \"Exemplo de texto a ser traduzido\"\n```\n\n###
96
- Sentiment analyzer (WIP)\nThe sentiment analyzer is a tool that can be used to analyze
97
- the sentiment of a given text. It uses a set of rules to analyze the sentiment of
98
- the text out of the box. Uses the best practices of prompt engineering and engineering
99
- to make the sentiment analysis as accurate as possible.\n\n```ruby\nrequire 'active_genie'\n\ntext
100
- = \"Example text to be analyzed\"\nsentiment = ActiveGenie::SentimentAnalyzer.call(text)\nputs
101
- sentiment # => \"positive\"\n```\n\n## Configuration\n\n| Config | Description |
102
- Default |\n|--------|-------------|---------|\n| `provider` | LLM provider (openai,
103
- anthropic, etc) | `nil` |\n| `model` | Model to use | `nil` |\n| `api_key` | Provider
104
- API key | `nil` |\n| `timeout` | Request timeout in seconds | `5` |\n| `max_retries`
105
- | Maximum retry attempts | `3` |\n\n> **Note:** Each module can append its own set
106
- of configuration, see the individual module documentation for details.\n\n## Contributing\n\n1.
107
- Fork the repository\n2. Create your feature branch (`git checkout -b feature/amazing-feature`)\n3.
108
- Commit your changes (`git commit -m 'Add amazing feature'`)\n4. Push to the branch
109
- (`git push origin feature/amazing-feature`)\n5. Open a Pull Request\n## License\n\nThis
110
- project is licensed under the MIT License - see the [LICENSE](LICENSE) file for
111
- details.\n"
79
+ the [Ranking README](lib/active_genie/ranking/README.md) for implementation details,
80
+ configuration, and advanced ranking strategies.\n\n### Text Summarizer (Future)\n###
81
+ Categorizer (Future)\n### Language detector (Future)\n### Translator (Future)\n###
82
+ Sentiment analyzer (Future)\n\n## Benchmarking \U0001F9EA\n\nActiveGenie includes
83
+ a comprehensive benchmarking system to ensure consistent, high-quality outputs across
84
+ different LLM models and providers.\n\n```ruby\n# Run all benchmarks\nbundle exec
85
+ rake active_genie:benchmark\n\n# Run benchmarks for a specific module\nbundle exec
86
+ rake active_genie:benchmark[data_extractor]\n```\n\n### Latest Results\n\n| Model
87
+ | Overall Precision |\n|-------|-------------------|\n| claude-3-5-haiku-20241022
88
+ | 92.25% |\n| gemini-2.0-flash-lite | 84.25% |\n| gpt-4o-mini | 62.75% |\n| deepseek-chat
89
+ | 57.25% |\n\nSee the [Benchmark README](benchmark/README.md) for detailed results,
90
+ methodology, and how to contribute to our test suite.\n\n## Configuration\n\n| Config
91
+ | Description | Default |\n|--------|-------------|---------|\n| `provider` | LLM
92
+ provider (openai, anthropic, etc) | `nil` |\n| `model` | Model to use | `nil` |\n|
93
+ `api_key` | Provider API key | `nil` |\n| `timeout` | Request timeout in seconds
94
+ | `5` |\n| `max_retries` | Maximum retry attempts | `3` |\n\n> **Note:** Each module
95
+ can append its own set of configuration, see the individual module documentation
96
+ for details.\n\n## Contributing\n\n1. Fork the repository\n2. Create your feature
97
+ branch (`git checkout -b feature/amazing-feature`)\n3. Commit your changes (`git
98
+ commit -m 'Add amazing feature'`)\n4. Push to the branch (`git push origin feature/amazing-feature`)\n5.
99
+ Open a Pull Request\n\n## License\n\nThis project is licensed under the Apache License
100
+ 2.0 License - see the [LICENSE](LICENSE) file for details.\n"
112
101
  email:
113
102
  - radames@roriz.dev
114
103
  executables: []
@@ -122,28 +111,39 @@ files:
122
111
  - lib/active_genie/battle.rb
123
112
  - lib/active_genie/battle/README.md
124
113
  - lib/active_genie/battle/basic.rb
114
+ - lib/active_genie/clients/anthropic_client.rb
115
+ - lib/active_genie/clients/google_client.rb
116
+ - lib/active_genie/clients/helpers/retry.rb
125
117
  - lib/active_genie/clients/openai_client.rb
126
118
  - lib/active_genie/clients/unified_client.rb
119
+ - lib/active_genie/concerns/loggable.rb
127
120
  - lib/active_genie/configuration.rb
128
121
  - lib/active_genie/configuration/log_config.rb
129
- - lib/active_genie/configuration/openai_config.rb
122
+ - lib/active_genie/configuration/providers/anthropic_config.rb
123
+ - lib/active_genie/configuration/providers/base_config.rb
124
+ - lib/active_genie/configuration/providers/deepseek_config.rb
125
+ - lib/active_genie/configuration/providers/google_config.rb
126
+ - lib/active_genie/configuration/providers/openai_config.rb
130
127
  - lib/active_genie/configuration/providers_config.rb
128
+ - lib/active_genie/configuration/runtime_config.rb
131
129
  - lib/active_genie/data_extractor.rb
132
130
  - lib/active_genie/data_extractor/README.md
133
131
  - lib/active_genie/data_extractor/basic.rb
134
132
  - lib/active_genie/data_extractor/from_informal.rb
135
- - lib/active_genie/league.rb
136
- - lib/active_genie/league/README.md
137
- - lib/active_genie/league/elo_ranking.rb
138
- - lib/active_genie/league/free_for_all.rb
139
- - lib/active_genie/league/league.rb
140
- - lib/active_genie/league/player.rb
141
- - lib/active_genie/league/players_collection.rb
142
133
  - lib/active_genie/logger.rb
134
+ - lib/active_genie/ranking.rb
135
+ - lib/active_genie/ranking/README.md
136
+ - lib/active_genie/ranking/elo_round.rb
137
+ - lib/active_genie/ranking/free_for_all.rb
138
+ - lib/active_genie/ranking/player.rb
139
+ - lib/active_genie/ranking/players_collection.rb
140
+ - lib/active_genie/ranking/ranking.rb
141
+ - lib/active_genie/ranking/ranking_scoring.rb
143
142
  - lib/active_genie/scoring.rb
144
143
  - lib/active_genie/scoring/README.md
145
144
  - lib/active_genie/scoring/basic.rb
146
- - lib/active_genie/scoring/recommended_reviews.rb
145
+ - lib/active_genie/scoring/recommended_reviewers.rb
146
+ - lib/tasks/benchmark.rake
147
147
  - lib/tasks/install.rake
148
148
  - lib/tasks/templates/active_genie.rb
149
149
  homepage: https://github.com/Roriz/active_genie
@@ -163,7 +163,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
163
163
  requirements:
164
164
  - - ">="
165
165
  - !ruby/object:Gem::Version
166
- version: 2.0.0
166
+ version: 3.0.0
167
167
  required_rubygems_version: !ruby/object:Gem::Requirement
168
168
  requirements:
169
169
  - - ">="
@@ -1,56 +0,0 @@
1
- require_relative '../clients/openai_client'
2
-
3
- module ActiveGenie::Configuration
4
- class OpenaiConfig
5
- attr_writer :api_key, :organization, :api_url, :client,
6
- :lower_tier_model, :middle_tier_model, :upper_tier_model
7
-
8
- def api_key
9
- @api_key || ENV['OPENAI_API_KEY']
10
- end
11
-
12
- def organization
13
- @organization || ENV['OPENAI_ORGANIZATION']
14
- end
15
-
16
- def lower_tier_model
17
- @lower_tier_model || 'gpt-4o-mini'
18
- end
19
-
20
- def middle_tier_model
21
- @middle_tier_model || 'gpt-4o'
22
- end
23
-
24
- def upper_tier_model
25
- @upper_tier_model || 'o1-preview'
26
- end
27
-
28
- def tier_to_model(tier)
29
- {
30
- lower_tier: lower_tier_model,
31
- middle_tier: middle_tier_model,
32
- upper_tier: upper_tier_model
33
- }[tier&.to_sym]
34
- end
35
-
36
- def api_url
37
- @api_url || 'https://api.openai.com/v1'
38
- end
39
-
40
- def client
41
- @client ||= ::ActiveGenie::Clients::OpenaiClient.new(self)
42
- end
43
-
44
- def to_h(config = {})
45
- {
46
- api_key:,
47
- organization:,
48
- api_url:,
49
- lower_tier_model:,
50
- middle_tier_model:,
51
- upper_tier_model:,
52
- **config
53
- }
54
- end
55
- end
56
- end
@@ -1,121 +0,0 @@
1
- require_relative '../battle/basic'
2
-
3
- module ActiveGenie::Leaderboard
4
- class EloRanking
5
- def self.call(players, criteria, config: {})
6
- new(players, criteria, config:).call
7
- end
8
-
9
- def initialize(players, criteria, config: {})
10
- @players = players
11
- @criteria = criteria
12
- @config = config
13
- @start_time = Time.now
14
- end
15
-
16
- def call
17
- @players.each(&:generate_elo_by_score)
18
-
19
- round_count = 0
20
- while @players.eligible_size > MINIMAL_PLAYERS_TO_BATTLE
21
- round = create_round(@players.tier_relegation, @players.tier_defense)
22
-
23
- round.each do |player_a, player_b|
24
- winner, loser = battle(player_a, player_b) # This can take a while, can be parallelized
25
- update_elo(winner, loser)
26
- ActiveGenie::Logger.trace({ **log, step: :elo_battle, winner_id: winner.id, loser_id: loser.id, winner_elo: winner.elo, loser_elo: loser.elo })
27
- end
28
-
29
- eliminate_all_relegation_players
30
- round_count += 1
31
- end
32
-
33
- ActiveGenie::Logger.info({ **log, step: :elo_end, round_count:, eligible_size: @players.eligible_size })
34
- @players
35
- end
36
-
37
- private
38
-
39
- MATCHS_PER_PLAYER = 3
40
- LOSE_PENALTY = 15
41
- MINIMAL_PLAYERS_TO_BATTLE = 10
42
- K = 32
43
-
44
- # Create a round of matches
45
- # each round is exactly 1 regation player vs 3 defense players for all regation players
46
- # each match is unique (player vs player)
47
- # each defense player is battle exactly 3 times
48
- def create_round(relegation_players, defense_players)
49
- matches = []
50
-
51
- relegation_players.each do |player_a|
52
- player_enemies = []
53
- MATCHS_PER_PLAYER.times do
54
- defender = nil
55
- while defender.nil? || player_enemies.include?(defender.id)
56
- defender = defense_players.sample
57
- end
58
-
59
- matches << [player_a, defender].shuffle
60
- player_enemies << defender.id
61
- end
62
- end
63
-
64
- matches
65
- end
66
-
67
- def battle(player_a, player_b)
68
- ActiveGenie::Battle.basic(
69
- player_a,
70
- player_b,
71
- @criteria,
72
- config:
73
- ).values_at('winner', 'loser')
74
- end
75
-
76
- def update_elo(winner, loser)
77
- return if winner.nil? || loser.nil?
78
-
79
- new_winner_elo, new_loser_elo = calculate_new_elo(winner.elo, loser.elo)
80
-
81
- winner.elo = [new_winner_elo, max_defense_elo].min
82
- loser.elo = [new_loser_elo - LOSE_PENALTY, min_relegation_elo].max
83
- end
84
-
85
- def max_defense_elo
86
- @players.tier_defense.max_by(&:elo).elo
87
- end
88
-
89
- def min_relegation_elo
90
- @players.tier_relegation.min_by(&:elo).elo
91
- end
92
-
93
- # Read more about the formula on https://en.wikipedia.org/wiki/Elo_rating_system
94
- def calculate_new_elo(winner_elo, loser_elo)
95
- expected_score_a = 1 / (1 + 10**((loser_elo - winner_elo) / 400))
96
- expected_score_b = 1 - expected_score_a
97
-
98
- new_elo_winner = winner_elo + K * (1 - expected_score_a)
99
- new_elo_loser = loser_elo + K * (1 - expected_score_b)
100
-
101
- [new_elo_winner, new_elo_loser]
102
- end
103
-
104
- def eliminate_all_relegation_players
105
- eliminations = @players.tier_relegation.size
106
- @players.tier_relegation.each { |player| player.eliminated = 'tier_relegation' }
107
- ActiveGenie::Logger.trace({ **log, step: :elo_round, eligible_size: @players.eligible_size, eliminations: })
108
- end
109
-
110
- def config
111
- { **@config }
112
- end
113
-
114
- def log
115
- {
116
- **(@config.dig(:log) || {}),
117
- duration: Time.now - @start_time
118
- }
119
- end
120
- end
121
- end
@@ -1,62 +0,0 @@
1
- require_relative '../battle/basic'
2
-
3
- module ActiveGenie::Leaderboard
4
- class FreeForAll
5
- def self.call(players, criteria, config: {})
6
- new(players, criteria, config:).call
7
- end
8
-
9
- def initialize(players, criteria, config: {})
10
- @players = players
11
- @criteria = criteria
12
- @config = config
13
- @start_time = Time.now
14
- end
15
-
16
- def call
17
- matches.each do |player_a, player_b|
18
- winner, loser = battle(player_a, player_b)
19
-
20
- if winner.nil? || loser.nil?
21
- player_a.free_for_all[:draw] += 1
22
- player_b.free_for_all[:draw] += 1
23
- else
24
- winner.free_for_all[:win] += 1
25
- loser.free_for_all[:lose] += 1
26
- end
27
-
28
- ActiveGenie::Logger.trace({**log, step: :free_for_all_battle, winner_id: winner&.id, player_a_id: player_a.id, player_a_free_for_all_score: player_a.free_for_all_score, player_b_id: player_b.id, player_b_free_for_all_score: player_b.free_for_all_score })
29
- end
30
-
31
- @players
32
- end
33
-
34
- private
35
-
36
- # TODO: reduce the number of matches based on transitivity.
37
- # For example, if A is better than B, and B is better than C, then A should clearly be better than C
38
- def matches
39
- @players.eligible.combination(2).to_a
40
- end
41
-
42
- def battle(player_a, player_b)
43
- result = ActiveGenie::Battle.basic(
44
- player_a,
45
- player_b,
46
- @criteria,
47
- config:
48
- )
49
-
50
-
51
- result.values_at('winner', 'loser')
52
- end
53
-
54
- def config
55
- { **@config }
56
- end
57
-
58
- def log
59
- { **(@config.dig(:log) || {}), duration: Time.now - @start_time }
60
- end
61
- end
62
- end
@@ -1,120 +0,0 @@
1
- require 'securerandom'
2
-
3
- require_relative './players_collection'
4
- require_relative './free_for_all'
5
- require_relative './elo_ranking'
6
- require_relative '../scoring/recommended_reviews'
7
-
8
- # This class orchestrates player ranking through multiple evaluation stages
9
- # using Elo ranking and free-for-all match simulations.
10
- # 1. Sets initial scores
11
- # 2. Eliminates low performers
12
- # 3. Runs Elo ranking (for large groups)
13
- # 4. Conducts free-for-all matches
14
- #
15
- # @example Basic usage
16
- # League.call(players, criteria)
17
- #
18
- # @param param_players [Array] Collection of player objects to evaluate
19
- # Example: ["Circle", "Triangle", "Square"]
20
- # or
21
- # [
22
- # { content: "Circle", score: 10 },
23
- # { content: "Triangle", score: 7 },
24
- # { content: "Square", score: 5 }
25
- # ]
26
- # @param criteria [String] Evaluation criteria configuration
27
- # Example: "What is more similar to the letter 'O'?"
28
- # @param config [Hash] Additional configuration config
29
- # Example: { model: "gpt-4o", api_key: ENV['OPENAI_API_KEY'] }
30
- # @return [Hash] Final ranked player results
31
- module ActiveGenie::League
32
- class League
33
- def self.call(param_players, criteria, config: {})
34
- new(param_players, criteria, config:).call
35
- end
36
-
37
- def initialize(param_players, criteria, config: {})
38
- @param_players = param_players
39
- @criteria = criteria
40
- @config = config
41
- @league_id = SecureRandom.uuid
42
- @start_time = Time.now
43
- end
44
-
45
- def call
46
- set_initial_score_players
47
- eliminate_obvious_bad_players
48
- run_elo_ranking if players.eligible_size > 10
49
- run_free_for_all
50
-
51
- ActiveGenie::Logger.info({ **log, step: :league_end, top5: players.first(5).map(&:id) })
52
- players.to_h
53
- end
54
-
55
- private
56
-
57
- SCORE_VARIATION_THRESHOLD = 10
58
-
59
- def set_initial_score_players
60
- players_without_score = players.reject { |player| player.score }
61
- players_without_score.each do |player|
62
- player.score = generate_score(player.content) # This can take a while, can be parallelized
63
- ActiveGenie::Logger.trace({ **log, step: :player_score, player_id: player.id, score: player.score })
64
- end
65
-
66
- ActiveGenie::Logger.info({ **log, step: :initial_score, evaluated_players: players_without_score.size })
67
- end
68
-
69
- def generate_score(content)
70
- ActiveGenie::Scoring::Basic.call(content, @criteria, reviewers, config:)['final_score']
71
- end
72
-
73
- def eliminate_obvious_bad_players
74
- eliminated_count = 0
75
- while players.coefficient_of_variation >= SCORE_VARIATION_THRESHOLD
76
- players.eligible.last.eliminated = 'variation_too_high'
77
- eliminated_count += 1
78
- end
79
-
80
- ActiveGenie::Logger.info({ **log, step: :eliminate_obvious_bad_players, eliminated_count: })
81
- end
82
-
83
- def run_elo_ranking
84
- EloRanking.call(players, @criteria, config:)
85
- end
86
-
87
- def run_free_for_all
88
- FreeForAll.call(players, @criteria, config:)
89
- end
90
-
91
- def reviewers
92
- [recommended_reviews['reviewer1'], recommended_reviews['reviewer2'], recommended_reviews['reviewer3']]
93
- end
94
-
95
- def recommended_reviews
96
- @recommended_reviews ||= ActiveGenie::Scoring::RecommendedReviews.call(
97
- [players.sample.content, players.sample.content].join("\n\n"),
98
- @criteria,
99
- config:
100
- )
101
- end
102
-
103
- def players
104
- @players ||= PlayersCollection.new(@param_players)
105
- end
106
-
107
- def config
108
- { log:, **@config }
109
- end
110
-
111
- def log
112
- {
113
- **(@config.dig(:log) || {}),
114
- league_id: @league_id,
115
- league_start_time: @start_time,
116
- duration: Time.now - @start_time
117
- }
118
- end
119
- end
120
- end