active_genie 0.0.10 → 0.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +63 -57
  3. data/VERSION +1 -1
  4. data/lib/active_genie/battle/README.md +7 -7
  5. data/lib/active_genie/battle/basic.rb +75 -68
  6. data/lib/active_genie/battle.rb +4 -0
  7. data/lib/active_genie/clients/anthropic_client.rb +110 -0
  8. data/lib/active_genie/clients/google_client.rb +158 -0
  9. data/lib/active_genie/clients/helpers/retry.rb +29 -0
  10. data/lib/active_genie/clients/openai_client.rb +58 -38
  11. data/lib/active_genie/clients/unified_client.rb +5 -5
  12. data/lib/active_genie/concerns/loggable.rb +44 -0
  13. data/lib/active_genie/configuration/log_config.rb +1 -1
  14. data/lib/active_genie/configuration/providers/anthropic_config.rb +54 -0
  15. data/lib/active_genie/configuration/providers/base_config.rb +85 -0
  16. data/lib/active_genie/configuration/providers/deepseek_config.rb +54 -0
  17. data/lib/active_genie/configuration/providers/google_config.rb +56 -0
  18. data/lib/active_genie/configuration/providers/openai_config.rb +54 -0
  19. data/lib/active_genie/configuration/providers_config.rb +7 -4
  20. data/lib/active_genie/configuration/runtime_config.rb +35 -0
  21. data/lib/active_genie/configuration.rb +18 -4
  22. data/lib/active_genie/data_extractor/README.md +0 -1
  23. data/lib/active_genie/data_extractor/basic.rb +22 -19
  24. data/lib/active_genie/data_extractor/from_informal.rb +4 -15
  25. data/lib/active_genie/data_extractor.rb +4 -0
  26. data/lib/active_genie/logger.rb +60 -14
  27. data/lib/active_genie/{league → ranking}/README.md +7 -7
  28. data/lib/active_genie/ranking/elo_round.rb +134 -0
  29. data/lib/active_genie/ranking/free_for_all.rb +93 -0
  30. data/lib/active_genie/ranking/player.rb +92 -0
  31. data/lib/active_genie/{league → ranking}/players_collection.rb +19 -12
  32. data/lib/active_genie/ranking/ranking.rb +153 -0
  33. data/lib/active_genie/ranking/ranking_scoring.rb +71 -0
  34. data/lib/active_genie/ranking.rb +12 -0
  35. data/lib/active_genie/scoring/README.md +1 -1
  36. data/lib/active_genie/scoring/basic.rb +93 -49
  37. data/lib/active_genie/scoring/{recommended_reviews.rb → recommended_reviewers.rb} +18 -7
  38. data/lib/active_genie/scoring.rb +6 -3
  39. data/lib/active_genie.rb +1 -1
  40. data/lib/tasks/benchmark.rake +27 -0
  41. metadata +100 -100
  42. data/lib/active_genie/configuration/openai_config.rb +0 -56
  43. data/lib/active_genie/league/elo_ranking.rb +0 -121
  44. data/lib/active_genie/league/free_for_all.rb +0 -62
  45. data/lib/active_genie/league/league.rb +0 -120
  46. data/lib/active_genie/league/player.rb +0 -59
  47. data/lib/active_genie/league.rb +0 -12
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9d0424a39ba21d821cb2419730387e1b026c35b5e2e5dff9f6d615f3ec54e6a3
4
- data.tar.gz: 17b460ccd1a689d0f8709af2b84f3cde65aa0075b76104ee1b4bb8b3b0ffc182
3
+ metadata.gz: 81b6b3ccf366bdeb07e1dfc1942749e4a1d48da74735c48a95cb9d53afb61b33
4
+ data.tar.gz: df2d1ee4ac8bbcfa031b261bedd228ed5c3a8772c055e312360d6a4ad2f699fa
5
5
  SHA512:
6
- metadata.gz: ad98b2d5d063d0d1c4009e1a9f92d6d326ed948cbdee71317c94b6a4a0ee57042609c1f405ca7ce00d4beb321bc1e98ad722646349076929bcdc0a28da8b6b8b
7
- data.tar.gz: d2cc39b77757619c5235041f12d5182778b4237444f3c2246982ebcf54c0542af0da194783cea57fbe4fbde0985ef635802c648796b4f5c4d7a5d4f42c6519c7
6
+ metadata.gz: d3a2ff8342483f8b475f0e60d91fa839ba57b0853e82e637ba4e761fd9ae749917e5ae134803200bfe3fd4bab658b297c1888c88d6c433d4f2c0a0694face6aa
7
+ data.tar.gz: a4b37bd1e6ba7a3a4b6edea20bd37f7e2dd11142182b65b8e62707e993d9c42d44486c2247e342ab8a81b01778456dd5bf15616261e01d6c0dd556757646da18
data/README.md CHANGED
@@ -1,16 +1,11 @@
1
1
  # ActiveGenie 🧞‍♂️
2
- > Transform your Ruby application with powerful, production-ready GenAI features
2
+ > The lodash for GenAI, stop reinventing the wheel
3
3
 
4
4
  [![Gem Version](https://badge.fury.io/rb/active_genie.svg?icon=si%3Arubygems)](https://badge.fury.io/rb/active_genie)
5
- [![Ruby](https://github.com/roriz/active_genie/actions/workflows/ruby.yml/badge.svg)](https://github.com/roriz/active_genie/actions/workflows/ruby.yml)
5
+ [![Ruby](https://github.com/roriz/active_genie/actions/workflows/benchmark.yml/badge.svg)](https://github.com/roriz/active_genie/actions/workflows/benchmark.yml)
6
6
 
7
- ActiveGenie is a Ruby gem that provides a polished, production-ready interface for working with Generative AI (GenAI) models. Just like ActiveStorage simplifies file handling in Rails, ActiveGenie makes it effortless to integrate GenAI capabilities into your Ruby applications.
8
-
9
- ## Features
10
-
11
- - 🎯 **Data Extraction**: Extract structured data from unstructured text with type validation
12
- - 📊 **Smart Scoring**: Multi-reviewer evaluation system with automatic expert selection
13
- - 💭 **Leaderboard**: Consistent rank items based on custom criteria, using multiple tecniques of ranking
7
+ ActiveGenie is a Ruby gem that provides valuable solutions powered by Generative AI (GenAI) models. Just like Lodash or ActiveStorage, ActiveGenie brings a set of Modules reach real value fast and reliable.
8
+ ActiveGenie is backed by a custom benchmarking system that ensures consistent quality and performance across different models and providers in every release.
14
9
 
15
10
  ## Installation
16
11
 
@@ -40,6 +35,7 @@ end
40
35
  ## Quick Start
41
36
 
42
37
  ### Data Extractor
38
+
43
39
  Extract structured data from text using AI-powered analysis, handling informal language and complex expressions.
44
40
 
45
41
  ```ruby
@@ -54,13 +50,17 @@ schema = {
54
50
  minimum: 0
55
51
  },
56
52
  size: {
57
- type: 'integer',
53
+ type: 'number',
58
54
  minimum: 35,
59
55
  maximum: 46
60
56
  }
61
57
  }
62
58
 
63
- result = ActiveGenie::DataExtractor.call(text, schema)
59
+ result = ActiveGenie::DataExtractor.call(
60
+ text,
61
+ schema,
62
+ config: { provider: :openai, model: 'gpt-4o-mini' } # optional
63
+ )
64
64
  # => {
65
65
  # brand: "Nike",
66
66
  # brand_explanation: "Brand name found at start of text",
@@ -71,6 +71,8 @@ result = ActiveGenie::DataExtractor.call(text, schema)
71
71
  # }
72
72
  ```
73
73
 
74
+ *Recommended model*: `gpt-4o-mini`
75
+
74
76
  Features:
75
77
  - Structured data extraction with type validation
76
78
  - Schema-based extraction with custom constraints
@@ -86,7 +88,11 @@ Text evaluation system that provides detailed scoring and feedback using multipl
86
88
  text = "The code implements a binary search algorithm with O(log n) complexity"
87
89
  criteria = "Evaluate technical accuracy and clarity"
88
90
 
89
- result = ActiveGenie::Scoring.basic(text, criteria)
91
+ result = ActiveGenie::Scoring.basic(
92
+ text,
93
+ criteria,
94
+ config: { provider: :anthropic, model: 'claude-3-5-haiku-20241022' } # optional
95
+ )
90
96
  # => {
91
97
  # algorithm_expert_score: 95,
92
98
  # algorithm_expert_reasoning: "Accurately describes binary search and its complexity",
@@ -96,6 +102,8 @@ result = ActiveGenie::Scoring.basic(text, criteria)
96
102
  # }
97
103
  ```
98
104
 
105
+ *Recommended model*: `claude-3-5-haiku-20241022`
106
+
99
107
  Features:
100
108
  - Multi-reviewer evaluation with automatic expert selection
101
109
  - Detailed feedback with scoring reasoning
@@ -110,20 +118,27 @@ AI-powered battle evaluation system that determines winners between two players
110
118
  ```ruby
111
119
  require 'active_genie'
112
120
 
113
- player_a = "Implementation uses dependency injection for better testability"
114
- player_b = "Code has high test coverage but tightly coupled components"
121
+ player_1 = "Implementation uses dependency injection for better testability"
122
+ player_2 = "Code has high test coverage but tightly coupled components"
115
123
  criteria = "Evaluate code quality and maintainability"
116
124
 
117
- result = ActiveGenie::Battle.call(player_a, player_b, criteria)
125
+ result = ActiveGenie::Battle.call(
126
+ player_1,
127
+ player_2,
128
+ criteria,
129
+ config: { provider: :google, model: 'gemini-2.0-flash-lite' } # optional
130
+ )
118
131
  # => {
119
132
  # winner_player: "Implementation uses dependency injection for better testability",
120
- # reasoning: "Player A's implementation demonstrates better maintainability through dependency injection,
121
- # which allows for easier testing and component replacement. While Player B has good test coverage,
133
+ # reasoning: "Player 1 implementation demonstrates better maintainability through dependency injection,
134
+ # which allows for easier testing and component replacement. While Player 2 has good test coverage,
122
135
  # the tight coupling makes the code harder to maintain and modify.",
123
136
  # what_could_be_changed_to_avoid_draw: "Focus on specific architectural patterns and design principles"
124
137
  # }
125
138
  ```
126
139
 
140
+ *Recommended model*: `gemini-2.0-flash-lite`
141
+
127
142
  Features:
128
143
  - Multi-reviewer evaluation with automatic expert selection
129
144
  - Detailed feedback with scoring reasoning
@@ -132,9 +147,8 @@ Features:
132
147
 
133
148
  See the [Battle README](lib/active_genie/battle/README.md) for advanced usage, custom reviewers, and detailed interface documentation.
134
149
 
135
- ### League
136
- The League module provides competitive ranking through multi-stage evaluation:
137
-
150
+ ### Ranking
151
+ The Ranking module provides competitive ranking through multi-stage evaluation:
138
152
 
139
153
  ```ruby
140
154
  require 'active_genie'
@@ -142,62 +156,53 @@ require 'active_genie'
142
156
  players = ['REST API', 'GraphQL API', 'SOAP API', 'gRPC API', 'Websocket API']
143
157
  criteria = "Best one to be used into a high changing environment"
144
158
 
145
- result = ActiveGenie::League.call(players, criteria)
159
+ result = ActiveGenie::Ranking.call(
160
+ players,
161
+ criteria,
162
+ config: { provider: :google, model: 'gemini-2.0-flash-lite' } # optional
163
+ )
146
164
  # => {
147
165
  # winner_player: "gRPC API",
148
166
  # reasoning: "gRPC API is the best one to be used into a high changing environment",
149
167
  # }
150
168
  ```
151
169
 
170
+ *Recommended model*: `gemini-2.0-flash-lite`
171
+
152
172
  - **Multi-phase ranking system** combining expert scoring and ELO algorithms
153
173
  - **Automatic elimination** of inconsistent performers using statistical analysis
154
174
  - **Dynamic ranking adjustments** based on simulated pairwise battles, from bottom to top
155
175
 
156
- See the [League README](lib/active_genie/league/README.md) for implementation details, configuration, and advanced ranking strategies.
157
-
158
- ### Summarizer (WIP)
159
- The summarizer is a tool that can be used to summarize a given text. It uses a set of rules to summarize the text out of the box. Uses the best practices of prompt engineering and engineering to make the summarization as accurate as possible.
160
-
161
- ```ruby
162
- require 'active_genie'
163
-
164
- text = "Example text to be summarized. The fox jumps over the dog"
165
- summarized_text = ActiveGenie::Summarizer.call(text)
166
- puts summarized_text # => "The fox jumps over the dog"
167
- ```
168
-
169
- ### Language detector (WIP)
170
- The language detector is a tool that can be used to detect the language of a given text. It uses a set of rules to detect the language of the text out of the box. Uses the best practices of prompt engineering and engineering to make the language detection as accurate as possible.
176
+ See the [Ranking README](lib/active_genie/ranking/README.md) for implementation details, configuration, and advanced ranking strategies.
171
177
 
172
- ```ruby
173
- require 'active_genie'
178
+ ### Text Summarizer (Future)
179
+ ### Categorizer (Future)
180
+ ### Language detector (Future)
181
+ ### Translator (Future)
182
+ ### Sentiment analyzer (Future)
174
183
 
175
- text = "Example text to be detected"
176
- language = ActiveGenie::LanguageDetector.call(text)
177
- puts language # => "en"
178
- ```
184
+ ## Benchmarking 🧪
179
185
 
180
- ### Translator (WIP)
181
- The translator is a tool that can be used to translate a given text. It uses a set of rules to translate the text out of the box. Uses the best practices of prompt engineering and engineering to make the translation as accurate as possible.
186
+ ActiveGenie includes a comprehensive benchmarking system to ensure consistent, high-quality outputs across different LLM models and providers.
182
187
 
183
188
  ```ruby
184
- require 'active_genie'
189
+ # Run all benchmarks
190
+ bundle exec rake active_genie:benchmark
185
191
 
186
- text = "Example text to be translated"
187
- translated_text = ActiveGenie::Translator.call(text, from: 'en', to: 'pt')
188
- puts translated_text # => "Exemplo de texto a ser traduzido"
192
+ # Run benchmarks for a specific module
193
+ bundle exec rake active_genie:benchmark[data_extractor]
189
194
  ```
190
195
 
191
- ### Sentiment analyzer (WIP)
192
- The sentiment analyzer is a tool that can be used to analyze the sentiment of a given text. It uses a set of rules to analyze the sentiment of the text out of the box. Uses the best practices of prompt engineering and engineering to make the sentiment analysis as accurate as possible.
196
+ ### Latest Results
193
197
 
194
- ```ruby
195
- require 'active_genie'
198
+ | Model | Overall Precision |
199
+ |-------|-------------------|
200
+ | claude-3-5-haiku-20241022 | 92.25% |
201
+ | gemini-2.0-flash-lite | 84.25% |
202
+ | gpt-4o-mini | 62.75% |
203
+ | deepseek-chat | 57.25% |
196
204
 
197
- text = "Example text to be analyzed"
198
- sentiment = ActiveGenie::SentimentAnalyzer.call(text)
199
- puts sentiment # => "positive"
200
- ```
205
+ See the [Benchmark README](benchmark/README.md) for detailed results, methodology, and how to contribute to our test suite.
201
206
 
202
207
  ## Configuration
203
208
 
@@ -218,6 +223,7 @@ puts sentiment # => "positive"
218
223
  3. Commit your changes (`git commit -m 'Add amazing feature'`)
219
224
  4. Push to the branch (`git push origin feature/amazing-feature`)
220
225
  5. Open a Pull Request
226
+
221
227
  ## License
222
228
 
223
- This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
229
+ This project is licensed under the Apache License 2.0 License - see the [LICENSE](LICENSE) file for details.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.10
1
+ 0.0.18
@@ -12,11 +12,11 @@ AI-powered battle evaluation system that determines winners between two players
12
12
  Evaluate a battle between two players with simple text content:
13
13
 
14
14
  ```ruby
15
- player_a = "Implementation uses dependency injection for better testability"
16
- player_b = "Code has high test coverage but tightly coupled components"
15
+ player_1 = "Implementation uses dependency injection for better testability"
16
+ player_2 = "Code has high test coverage but tightly coupled components"
17
17
  criteria = "Evaluate code quality and maintainability"
18
18
 
19
- result = ActiveGenie::Battle::Basic.call(player_a, player_b, criteria)
19
+ result = ActiveGenie::Battle::Basic.call(player_1, player_2, criteria)
20
20
  # => {
21
21
  # winner_player: "Implementation uses dependency injection for better testability",
22
22
  # reasoning: "Player A's implementation demonstrates better maintainability through dependency injection,
@@ -27,13 +27,13 @@ result = ActiveGenie::Battle::Basic.call(player_a, player_b, criteria)
27
27
  ```
28
28
 
29
29
  ## Interface
30
- ### Basic.call(player_a, player_b, criteria, config: {})
31
- - `player_a` [String, Hash] - The content or submission from the first player
32
- - `player_b` [String, Hash] - The content or submission from the second player
30
+ ### Basic.call(player_1, player_2, criteria, config: {})
31
+ - `player_1` [String, Hash] - The content or submission from the first player
32
+ - `player_2` [String, Hash] - The content or submission from the second player
33
33
  - `criteria` [String] - The evaluation criteria or rules to assess against
34
34
  - `config` [Hash] - Additional configuration config that modify the battle evaluation behavior
35
35
 
36
36
  Returns a Hash containing:
37
- - `winner_player` [String, Hash] - The winning player's content (either player_a or player_b)
37
+ - `winner_player` [String, Hash] - The winning player's content (either player_1 or player_2)
38
38
  - `reasoning` [String] - Detailed explanation of why the winner was chosen
39
39
  - `what_could_be_changed_to_avoid_draw` [String] - A suggestion on how to avoid a draw
@@ -14,117 +14,124 @@ module ActiveGenie::Battle
14
14
  # Basic.call("Player A content", "Player B content", "Evaluate keyword usage and pattern matching")
15
15
  #
16
16
  class Basic
17
- def self.call(player_a, player_b, criteria, config: {})
18
- new(player_a, player_b, criteria, config:).call
17
+ def self.call(...)
18
+ new(...).call
19
19
  end
20
20
 
21
- # @param player_a [String] The content or submission from the first player
22
- # @param player_b [String] The content or submission from the second player
21
+ # @param player_1 [String] The content or submission from the first player
22
+ # @param player_2 [String] The content or submission from the second player
23
23
  # @param criteria [String] The evaluation criteria or rules to assess against
24
- # @param config [Hash] Additional configuration config that modify the battle evaluation behavior
24
+ # @param config [Hash] Additional configuration options that modify the battle evaluation behavior
25
25
  # @return [Hash] The evaluation result containing the winner and reasoning
26
- # @return [String] :winner The @param player_a or player_b
26
+ # @return [String] :winner The winner, either player_1 or player_2
27
27
  # @return [String] :reasoning Detailed explanation of why the winner was chosen
28
28
  # @return [String] :what_could_be_changed_to_avoid_draw A suggestion on how to avoid a draw
29
- def initialize(player_a, player_b, criteria, config: {})
30
- @player_a = player_a
31
- @player_b = player_b
29
+ def initialize(player_1, player_2, criteria, config: {})
30
+ @player_1 = player_1
31
+ @player_2 = player_2
32
32
  @criteria = criteria
33
- @config = config
34
- @response = nil
33
+ @config = ActiveGenie::Configuration.to_h(config)
35
34
  end
36
35
 
37
36
  def call
38
37
  messages = [
39
38
  { role: 'system', content: PROMPT },
40
39
  { role: 'user', content: "criteria: #{@criteria}" },
41
- { role: 'user', content: "player_a: #{player_content(@player_a)}" },
42
- { role: 'user', content: "player_b: #{player_content(@player_b)}" },
40
+ { role: 'user', content: "player_1: #{@player_1}" },
41
+ { role: 'user', content: "player_2: #{@player_2}" },
43
42
  ]
44
43
 
45
- @response = ::ActiveGenie::Clients::UnifiedClient.function_calling(messages, FUNCTION, config:)
46
-
47
- response_formatted
44
+ response = ::ActiveGenie::Clients::UnifiedClient.function_calling(
45
+ messages,
46
+ FUNCTION,
47
+ model_tier: 'lower_tier',
48
+ config: @config
49
+ )
50
+
51
+ ActiveGenie::Logger.debug({
52
+ code: :battle,
53
+ player_1: @player_1[0..30],
54
+ player_2: @player_2[0..30],
55
+ criteria: @criteria[0..30],
56
+ winner: response['impartial_judge_winner'],
57
+ reasoning: response['impartial_judge_winner_reasoning']
58
+ })
59
+
60
+ response_formatted(response)
48
61
  end
49
62
 
50
63
  private
51
64
 
52
- def player_content(player)
53
- return player.dig('content') if player.is_a?(Hash)
65
+ def response_formatted(response)
66
+ winner = response['impartial_judge_winner']
67
+ loser = case response['impartial_judge_winner']
68
+ when 'player_1' then 'player_2'
69
+ when 'player_2' then 'player_1'
70
+ end
54
71
 
55
- player
56
- end
57
-
58
- def response_formatted
59
- winner = case @response['winner']
60
- when 'player_a' then @player_a
61
- when 'player_b' then @player_b
62
- end
63
-
64
- @response.merge!('winner' => winner, 'loser' => winner ? (winner == @player_a ? @player_b : @player_a) : nil)
72
+ { 'winner' => winner, 'loser' => loser, 'reasoning' => response['impartial_judge_winner_reasoning'] }
65
73
  end
66
74
 
67
75
  PROMPT = <<~PROMPT
68
- Evaluate a battle between player_a and player_b using predefined criteria and identify the winner.
69
-
70
- Consider rules, keywords, and patterns as the criteria for evaluation. Analyze the content from both players objectively, focusing on who meets the criteria most effectively. Explain your decision clearly, with specific reasoning on how the chosen player fulfilled the criteria better than the other. Avoid selecting a draw unless absolutely necessary.
76
+ Based on two players, player_1 and player_2, they will battle against each other based on criteria. Criteria are vital as they provide a clear metric to compare the players. Follow these criteria strictly.
71
77
 
72
78
  # Steps
73
- 1. **Review Predefined Criteria**: Understand the specific rules, keywords, and patterns that serve as the basis for evaluation.
74
- 2. **Analyze Content**: Examine the contributions of both player_a and player_b. Look for how each player meets or fails to meet the criteria.
75
- 3. **Comparison**: Compare both players against each criterion to determine who aligns better with the standards set.
76
- 4. **Decision-Making**: Based on the analysis, determine the player who meets the most or all criteria effectively.
77
- 5. **Provide Justification**: Offer a clear and concise reason for your choice detailing how the winner outperformed the other.
79
+ 1. player_1 presents their strengths and how they meet the criteria. Max of 100 words.
80
+ 2. player_2 presents their strengths and how they meet the criteria. Max of 100 words.
81
+ 3. player_1 argues why they should be the winner compared to player_2. Max of 100 words.
82
+ 4. player_2 counter-argues why they should be the winner compared to player_1. Max of 100 words.
83
+ 5. The impartial judge chooses the winner.
78
84
 
79
- # Examples
80
- - **Example 1**:
81
- - Input: Player A uses keyword X, follows rule Y, Player B uses keyword Z, breaks rule Y.
82
- - Output: winner: player_a
83
- - Justification: Player A successfully used keyword X and followed rule Y, whereas Player B broke rule Y.
84
-
85
- - **Example 2**:
86
- - Input: Player A matches pattern P, Player B matches pattern P, uses keyword Q.
87
- - Output: winner: player_b
88
- - Justification: Both matched pattern P, but Player B also used keyword Q, meeting more criteria.
85
+ # Output Format
86
+ - The impartial judge chooses this player as the winner.
89
87
 
90
88
  # Notes
91
- - Avoid drawing if a clear winner can be discerned.
89
+ - Avoid resulting in a draw. Use reasoning or make fair assumptions if needed.
92
90
  - Critically assess each player's adherence to the criteria.
93
91
  - Clearly communicate the reasoning behind your decision.
94
92
  PROMPT
95
93
 
96
94
  FUNCTION = {
97
95
  name: 'battle_evaluation',
98
- description: 'Evaluate a battle between player_a and player_b using predefined criteria and identify the winner.',
96
+ description: 'Evaluate a battle between player_1 and player_2 using predefined criteria and identify the winner.',
99
97
  schema: {
100
98
  type: "object",
101
99
  properties: {
102
- winner: {
100
+ player_1_sell_himself: {
103
101
  type: 'string',
104
- description: 'The player who won the battle based on the criteria.',
105
- enum: ['player_a', 'player_b', 'draw']
102
+ description: 'player_1 presents their strengths and how they meet the criteria. Max of 100 words.',
106
103
  },
107
- reasoning_of_winner: {
104
+ player_2_sell_himself: {
108
105
  type: 'string',
109
- description: 'The detailed reasoning about why the winner won based on the criteria.',
106
+ description: 'player_2 presents their strengths and how they meet the criteria. Max of 100 words.',
110
107
  },
111
- what_could_be_changed_to_avoid_draw: {
108
+ player_1_arguments: {
112
109
  type: 'string',
113
- description: 'Suggestions on how to avoid a draw based on the criteria. Be as objective and short as possible. Can be empty.',
114
- }
115
- }
116
- }
117
- }
118
-
119
- def config
120
- {
121
- all_providers: { model_tier: 'lower_tier' },
122
- log: {
123
- **(@config.dig(:log) || {}),
124
- trace: self.class.name,
110
+ description: 'player_1 arguments for why they should be the winner compared to player_2. Max of 100 words.',
111
+ },
112
+ player_2_counter: {
113
+ type: 'string',
114
+ description: 'player_2 counter arguments for why they should be the winner compared to player_1. Max of 100 words.',
115
+ },
116
+ impartial_judge_winner_reasoning: {
117
+ type: 'string',
118
+ description: 'The detailed reasoning about why the impartial judge chose the winner. Max of 100 words.',
119
+ },
120
+ impartial_judge_winner: {
121
+ type: 'string',
122
+ description: 'Who is the winner based on the impartial judge reasoning?',
123
+ enum: ['player_1', 'player_2']
124
+ },
125
125
  },
126
- **@config
126
+ required: [
127
+ 'player_1_sell_himself',
128
+ 'player_2_sell_himself',
129
+ 'player_1_arguments',
130
+ 'player_2_counter',
131
+ 'impartial_judge_winner_reasoning',
132
+ 'impartial_judge_winner'
133
+ ]
127
134
  }
128
- end
135
+ }
129
136
  end
130
137
  end
@@ -9,5 +9,9 @@ module ActiveGenie
9
9
  def basic(...)
10
10
  Basic.call(...)
11
11
  end
12
+
13
+ def call(...)
14
+ Basic.call(...)
15
+ end
12
16
  end
13
17
  end
@@ -0,0 +1,110 @@
1
+ require 'json'
2
+ require 'net/http'
3
+ require 'uri'
4
+ require_relative './helpers/retry'
5
+
6
+ module ActiveGenie
7
+ module Clients
8
+ # Client for interacting with the Anthropic (Claude) API with json response
9
+ class AnthropicClient
10
+ class AnthropicError < StandardError; end
11
+ class RateLimitError < AnthropicError; end
12
+
13
+ def initialize(config)
14
+ @app_config = config
15
+ end
16
+
17
+ # Requests structured JSON output from the Anthropic Claude model based on a schema.
18
+ #
19
+ # @param messages [Array<Hash>] A list of messages representing the conversation history.
20
+ # Each hash should have :role ('user', 'assistant', or 'system') and :content (String).
21
+ # Claude uses 'user', 'assistant', and 'system' roles.
22
+ # @param function [Hash] A JSON schema definition describing the desired output format.
23
+ # @param model_tier [Symbol, nil] A symbolic representation of the model quality/size tier.
24
+ # @param config [Hash] Optional configuration overrides:
25
+ # - :api_key [String] Override the default API key.
26
+ # - :model [String] Override the model name directly.
27
+ # - :max_retries [Integer] Max retries for the request.
28
+ # - :retry_delay [Integer] Initial delay for retries.
29
+ # - :anthropic_version [String] Override the default Anthropic API version.
30
+ # @return [Hash, nil] The parsed JSON object matching the schema, or nil if parsing fails or content is empty.
31
+ def function_calling(messages, function, model_tier: nil, config: {})
32
+ model = config[:runtime][:model] || @app_config.tier_to_model(model_tier)
33
+
34
+ system_message = messages.find { |m| m[:role] == 'system' }&.dig(:content) || ''
35
+ user_messages = messages.select { |m| m[:role] == 'user' || m[:role] == 'assistant' }
36
+ .map { |m| { role: m[:role], content: m[:content] } }
37
+
38
+ anthropic_function = function
39
+ anthropic_function[:input_schema] = function[:schema]
40
+ anthropic_function.delete(:schema)
41
+
42
+ payload = {
43
+ model:,
44
+ system: system_message,
45
+ messages: user_messages,
46
+ tools: [anthropic_function],
47
+ tool_choice: { name: anthropic_function[:name], type: 'tool' },
48
+ max_tokens: config[:runtime][:max_tokens],
49
+ temperature: config[:runtime][:temperature] || 0,
50
+ }
51
+
52
+ api_key = config[:runtime][:api_key] || @app_config.api_key
53
+ headers = DEFAULT_HEADERS.merge(
54
+ 'x-api-key': api_key,
55
+ 'anthropic-version': config[:anthropic_version] || ANTHROPIC_VERSION
56
+ ).compact
57
+
58
+ retry_with_backoff(config:) do
59
+ response = request(payload, headers, config:)
60
+ content = response.dig('content', 0, 'input')
61
+
62
+ ActiveGenie::Logger.trace({code: :function_calling, payload:, parsed_response: content})
63
+
64
+ content
65
+ end
66
+ end
67
+
68
+ private
69
+
70
+ DEFAULT_HEADERS = {
71
+ 'Content-Type': 'application/json',
72
+ }
73
+ ANTHROPIC_VERSION = '2023-06-01'
74
+
75
+ def request(payload, headers, config:)
76
+ start_time = Time.now
77
+
78
+ retry_with_backoff(config:) do
79
+ response = Net::HTTP.post(
80
+ URI("#{@app_config.api_url}/v1/messages"),
81
+ payload.to_json,
82
+ headers
83
+ )
84
+
85
+ if response.is_a?(Net::HTTPTooManyRequests)
86
+ raise RateLimitError, "Anthropic API rate limit exceeded: #{response.body}"
87
+ end
88
+
89
+ raise AnthropicError, response.body unless response.is_a?(Net::HTTPSuccess)
90
+
91
+ return nil if response.body.empty?
92
+
93
+ parsed_body = JSON.parse(response.body)
94
+
95
+ ActiveGenie::Logger.trace({
96
+ code: :llm_usage,
97
+ input_tokens: parsed_body.dig('usage', 'input_tokens'),
98
+ output_tokens: parsed_body.dig('usage', 'output_tokens'),
99
+ total_tokens: parsed_body.dig('usage', 'input_tokens') + parsed_body.dig('usage', 'output_tokens'),
100
+ model: payload[:model],
101
+ duration: Time.now - start_time,
102
+ usage: parsed_body.dig('usage')
103
+ })
104
+
105
+ parsed_body
106
+ end
107
+ end
108
+ end
109
+ end
110
+ end