active_genie 0.0.10 → 0.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +63 -57
- data/VERSION +1 -1
- data/lib/active_genie/battle/README.md +7 -7
- data/lib/active_genie/battle/basic.rb +75 -68
- data/lib/active_genie/battle.rb +4 -0
- data/lib/active_genie/clients/anthropic_client.rb +110 -0
- data/lib/active_genie/clients/google_client.rb +158 -0
- data/lib/active_genie/clients/helpers/retry.rb +29 -0
- data/lib/active_genie/clients/openai_client.rb +58 -38
- data/lib/active_genie/clients/unified_client.rb +5 -5
- data/lib/active_genie/concerns/loggable.rb +44 -0
- data/lib/active_genie/configuration/log_config.rb +1 -1
- data/lib/active_genie/configuration/providers/anthropic_config.rb +54 -0
- data/lib/active_genie/configuration/providers/base_config.rb +85 -0
- data/lib/active_genie/configuration/providers/deepseek_config.rb +54 -0
- data/lib/active_genie/configuration/providers/google_config.rb +56 -0
- data/lib/active_genie/configuration/providers/openai_config.rb +54 -0
- data/lib/active_genie/configuration/providers_config.rb +7 -4
- data/lib/active_genie/configuration/runtime_config.rb +35 -0
- data/lib/active_genie/configuration.rb +18 -4
- data/lib/active_genie/data_extractor/README.md +0 -1
- data/lib/active_genie/data_extractor/basic.rb +22 -19
- data/lib/active_genie/data_extractor/from_informal.rb +4 -15
- data/lib/active_genie/data_extractor.rb +4 -0
- data/lib/active_genie/logger.rb +60 -14
- data/lib/active_genie/{league → ranking}/README.md +7 -7
- data/lib/active_genie/ranking/elo_round.rb +134 -0
- data/lib/active_genie/ranking/free_for_all.rb +93 -0
- data/lib/active_genie/ranking/player.rb +92 -0
- data/lib/active_genie/{league → ranking}/players_collection.rb +19 -12
- data/lib/active_genie/ranking/ranking.rb +153 -0
- data/lib/active_genie/ranking/ranking_scoring.rb +71 -0
- data/lib/active_genie/ranking.rb +12 -0
- data/lib/active_genie/scoring/README.md +1 -1
- data/lib/active_genie/scoring/basic.rb +93 -49
- data/lib/active_genie/scoring/{recommended_reviews.rb → recommended_reviewers.rb} +18 -7
- data/lib/active_genie/scoring.rb +6 -3
- data/lib/active_genie.rb +1 -1
- data/lib/tasks/benchmark.rake +27 -0
- metadata +100 -100
- data/lib/active_genie/configuration/openai_config.rb +0 -56
- data/lib/active_genie/league/elo_ranking.rb +0 -121
- data/lib/active_genie/league/free_for_all.rb +0 -62
- data/lib/active_genie/league/league.rb +0 -120
- data/lib/active_genie/league/player.rb +0 -59
- data/lib/active_genie/league.rb +0 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 81b6b3ccf366bdeb07e1dfc1942749e4a1d48da74735c48a95cb9d53afb61b33
|
4
|
+
data.tar.gz: df2d1ee4ac8bbcfa031b261bedd228ed5c3a8772c055e312360d6a4ad2f699fa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d3a2ff8342483f8b475f0e60d91fa839ba57b0853e82e637ba4e761fd9ae749917e5ae134803200bfe3fd4bab658b297c1888c88d6c433d4f2c0a0694face6aa
|
7
|
+
data.tar.gz: a4b37bd1e6ba7a3a4b6edea20bd37f7e2dd11142182b65b8e62707e993d9c42d44486c2247e342ab8a81b01778456dd5bf15616261e01d6c0dd556757646da18
|
data/README.md
CHANGED
@@ -1,16 +1,11 @@
|
|
1
1
|
# ActiveGenie 🧞♂️
|
2
|
-
>
|
2
|
+
> The lodash for GenAI, stop reinventing the wheel
|
3
3
|
|
4
4
|
[](https://badge.fury.io/rb/active_genie)
|
5
|
-
[](https://github.com/roriz/active_genie/actions/workflows/benchmark.yml)
|
6
6
|
|
7
|
-
ActiveGenie is a Ruby gem that provides
|
8
|
-
|
9
|
-
## Features
|
10
|
-
|
11
|
-
- 🎯 **Data Extraction**: Extract structured data from unstructured text with type validation
|
12
|
-
- 📊 **Smart Scoring**: Multi-reviewer evaluation system with automatic expert selection
|
13
|
-
- 💭 **Leaderboard**: Consistent rank items based on custom criteria, using multiple tecniques of ranking
|
7
|
+
ActiveGenie is a Ruby gem that provides valuable solutions powered by Generative AI (GenAI) models. Just like Lodash or ActiveStorage, ActiveGenie brings a set of Modules reach real value fast and reliable.
|
8
|
+
ActiveGenie is backed by a custom benchmarking system that ensures consistent quality and performance across different models and providers in every release.
|
14
9
|
|
15
10
|
## Installation
|
16
11
|
|
@@ -40,6 +35,7 @@ end
|
|
40
35
|
## Quick Start
|
41
36
|
|
42
37
|
### Data Extractor
|
38
|
+
|
43
39
|
Extract structured data from text using AI-powered analysis, handling informal language and complex expressions.
|
44
40
|
|
45
41
|
```ruby
|
@@ -54,13 +50,17 @@ schema = {
|
|
54
50
|
minimum: 0
|
55
51
|
},
|
56
52
|
size: {
|
57
|
-
type: '
|
53
|
+
type: 'number',
|
58
54
|
minimum: 35,
|
59
55
|
maximum: 46
|
60
56
|
}
|
61
57
|
}
|
62
58
|
|
63
|
-
result = ActiveGenie::DataExtractor.call(
|
59
|
+
result = ActiveGenie::DataExtractor.call(
|
60
|
+
text,
|
61
|
+
schema,
|
62
|
+
config: { provider: :openai, model: 'gpt-4o-mini' } # optional
|
63
|
+
)
|
64
64
|
# => {
|
65
65
|
# brand: "Nike",
|
66
66
|
# brand_explanation: "Brand name found at start of text",
|
@@ -71,6 +71,8 @@ result = ActiveGenie::DataExtractor.call(text, schema)
|
|
71
71
|
# }
|
72
72
|
```
|
73
73
|
|
74
|
+
*Recommended model*: `gpt-4o-mini`
|
75
|
+
|
74
76
|
Features:
|
75
77
|
- Structured data extraction with type validation
|
76
78
|
- Schema-based extraction with custom constraints
|
@@ -86,7 +88,11 @@ Text evaluation system that provides detailed scoring and feedback using multipl
|
|
86
88
|
text = "The code implements a binary search algorithm with O(log n) complexity"
|
87
89
|
criteria = "Evaluate technical accuracy and clarity"
|
88
90
|
|
89
|
-
result = ActiveGenie::Scoring.basic(
|
91
|
+
result = ActiveGenie::Scoring.basic(
|
92
|
+
text,
|
93
|
+
criteria,
|
94
|
+
config: { provider: :anthropic, model: 'claude-3-5-haiku-20241022' } # optional
|
95
|
+
)
|
90
96
|
# => {
|
91
97
|
# algorithm_expert_score: 95,
|
92
98
|
# algorithm_expert_reasoning: "Accurately describes binary search and its complexity",
|
@@ -96,6 +102,8 @@ result = ActiveGenie::Scoring.basic(text, criteria)
|
|
96
102
|
# }
|
97
103
|
```
|
98
104
|
|
105
|
+
*Recommended model*: `claude-3-5-haiku-20241022`
|
106
|
+
|
99
107
|
Features:
|
100
108
|
- Multi-reviewer evaluation with automatic expert selection
|
101
109
|
- Detailed feedback with scoring reasoning
|
@@ -110,20 +118,27 @@ AI-powered battle evaluation system that determines winners between two players
|
|
110
118
|
```ruby
|
111
119
|
require 'active_genie'
|
112
120
|
|
113
|
-
|
114
|
-
|
121
|
+
player_1 = "Implementation uses dependency injection for better testability"
|
122
|
+
player_2 = "Code has high test coverage but tightly coupled components"
|
115
123
|
criteria = "Evaluate code quality and maintainability"
|
116
124
|
|
117
|
-
result = ActiveGenie::Battle.call(
|
125
|
+
result = ActiveGenie::Battle.call(
|
126
|
+
player_1,
|
127
|
+
player_2,
|
128
|
+
criteria,
|
129
|
+
config: { provider: :google, model: 'gemini-2.0-flash-lite' } # optional
|
130
|
+
)
|
118
131
|
# => {
|
119
132
|
# winner_player: "Implementation uses dependency injection for better testability",
|
120
|
-
# reasoning: "Player
|
121
|
-
# which allows for easier testing and component replacement. While Player
|
133
|
+
# reasoning: "Player 1 implementation demonstrates better maintainability through dependency injection,
|
134
|
+
# which allows for easier testing and component replacement. While Player 2 has good test coverage,
|
122
135
|
# the tight coupling makes the code harder to maintain and modify.",
|
123
136
|
# what_could_be_changed_to_avoid_draw: "Focus on specific architectural patterns and design principles"
|
124
137
|
# }
|
125
138
|
```
|
126
139
|
|
140
|
+
*Recommended model*: `gemini-2.0-flash-lite`
|
141
|
+
|
127
142
|
Features:
|
128
143
|
- Multi-reviewer evaluation with automatic expert selection
|
129
144
|
- Detailed feedback with scoring reasoning
|
@@ -132,9 +147,8 @@ Features:
|
|
132
147
|
|
133
148
|
See the [Battle README](lib/active_genie/battle/README.md) for advanced usage, custom reviewers, and detailed interface documentation.
|
134
149
|
|
135
|
-
###
|
136
|
-
The
|
137
|
-
|
150
|
+
### Ranking
|
151
|
+
The Ranking module provides competitive ranking through multi-stage evaluation:
|
138
152
|
|
139
153
|
```ruby
|
140
154
|
require 'active_genie'
|
@@ -142,62 +156,53 @@ require 'active_genie'
|
|
142
156
|
players = ['REST API', 'GraphQL API', 'SOAP API', 'gRPC API', 'Websocket API']
|
143
157
|
criteria = "Best one to be used into a high changing environment"
|
144
158
|
|
145
|
-
result = ActiveGenie::
|
159
|
+
result = ActiveGenie::Ranking.call(
|
160
|
+
players,
|
161
|
+
criteria,
|
162
|
+
config: { provider: :google, model: 'gemini-2.0-flash-lite' } # optional
|
163
|
+
)
|
146
164
|
# => {
|
147
165
|
# winner_player: "gRPC API",
|
148
166
|
# reasoning: "gRPC API is the best one to be used into a high changing environment",
|
149
167
|
# }
|
150
168
|
```
|
151
169
|
|
170
|
+
*Recommended model*: `gemini-2.0-flash-lite`
|
171
|
+
|
152
172
|
- **Multi-phase ranking system** combining expert scoring and ELO algorithms
|
153
173
|
- **Automatic elimination** of inconsistent performers using statistical analysis
|
154
174
|
- **Dynamic ranking adjustments** based on simulated pairwise battles, from bottom to top
|
155
175
|
|
156
|
-
See the [
|
157
|
-
|
158
|
-
### Summarizer (WIP)
|
159
|
-
The summarizer is a tool that can be used to summarize a given text. It uses a set of rules to summarize the text out of the box. Uses the best practices of prompt engineering and engineering to make the summarization as accurate as possible.
|
160
|
-
|
161
|
-
```ruby
|
162
|
-
require 'active_genie'
|
163
|
-
|
164
|
-
text = "Example text to be summarized. The fox jumps over the dog"
|
165
|
-
summarized_text = ActiveGenie::Summarizer.call(text)
|
166
|
-
puts summarized_text # => "The fox jumps over the dog"
|
167
|
-
```
|
168
|
-
|
169
|
-
### Language detector (WIP)
|
170
|
-
The language detector is a tool that can be used to detect the language of a given text. It uses a set of rules to detect the language of the text out of the box. Uses the best practices of prompt engineering and engineering to make the language detection as accurate as possible.
|
176
|
+
See the [Ranking README](lib/active_genie/ranking/README.md) for implementation details, configuration, and advanced ranking strategies.
|
171
177
|
|
172
|
-
|
173
|
-
|
178
|
+
### Text Summarizer (Future)
|
179
|
+
### Categorizer (Future)
|
180
|
+
### Language detector (Future)
|
181
|
+
### Translator (Future)
|
182
|
+
### Sentiment analyzer (Future)
|
174
183
|
|
175
|
-
|
176
|
-
language = ActiveGenie::LanguageDetector.call(text)
|
177
|
-
puts language # => "en"
|
178
|
-
```
|
184
|
+
## Benchmarking 🧪
|
179
185
|
|
180
|
-
|
181
|
-
The translator is a tool that can be used to translate a given text. It uses a set of rules to translate the text out of the box. Uses the best practices of prompt engineering and engineering to make the translation as accurate as possible.
|
186
|
+
ActiveGenie includes a comprehensive benchmarking system to ensure consistent, high-quality outputs across different LLM models and providers.
|
182
187
|
|
183
188
|
```ruby
|
184
|
-
|
189
|
+
# Run all benchmarks
|
190
|
+
bundle exec rake active_genie:benchmark
|
185
191
|
|
186
|
-
|
187
|
-
|
188
|
-
puts translated_text # => "Exemplo de texto a ser traduzido"
|
192
|
+
# Run benchmarks for a specific module
|
193
|
+
bundle exec rake active_genie:benchmark[data_extractor]
|
189
194
|
```
|
190
195
|
|
191
|
-
###
|
192
|
-
The sentiment analyzer is a tool that can be used to analyze the sentiment of a given text. It uses a set of rules to analyze the sentiment of the text out of the box. Uses the best practices of prompt engineering and engineering to make the sentiment analysis as accurate as possible.
|
196
|
+
### Latest Results
|
193
197
|
|
194
|
-
|
195
|
-
|
198
|
+
| Model | Overall Precision |
|
199
|
+
|-------|-------------------|
|
200
|
+
| claude-3-5-haiku-20241022 | 92.25% |
|
201
|
+
| gemini-2.0-flash-lite | 84.25% |
|
202
|
+
| gpt-4o-mini | 62.75% |
|
203
|
+
| deepseek-chat | 57.25% |
|
196
204
|
|
197
|
-
|
198
|
-
sentiment = ActiveGenie::SentimentAnalyzer.call(text)
|
199
|
-
puts sentiment # => "positive"
|
200
|
-
```
|
205
|
+
See the [Benchmark README](benchmark/README.md) for detailed results, methodology, and how to contribute to our test suite.
|
201
206
|
|
202
207
|
## Configuration
|
203
208
|
|
@@ -218,6 +223,7 @@ puts sentiment # => "positive"
|
|
218
223
|
3. Commit your changes (`git commit -m 'Add amazing feature'`)
|
219
224
|
4. Push to the branch (`git push origin feature/amazing-feature`)
|
220
225
|
5. Open a Pull Request
|
226
|
+
|
221
227
|
## License
|
222
228
|
|
223
|
-
This project is licensed under the
|
229
|
+
This project is licensed under the Apache License 2.0 License - see the [LICENSE](LICENSE) file for details.
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.18
|
@@ -12,11 +12,11 @@ AI-powered battle evaluation system that determines winners between two players
|
|
12
12
|
Evaluate a battle between two players with simple text content:
|
13
13
|
|
14
14
|
```ruby
|
15
|
-
|
16
|
-
|
15
|
+
player_1 = "Implementation uses dependency injection for better testability"
|
16
|
+
player_2 = "Code has high test coverage but tightly coupled components"
|
17
17
|
criteria = "Evaluate code quality and maintainability"
|
18
18
|
|
19
|
-
result = ActiveGenie::Battle::Basic.call(
|
19
|
+
result = ActiveGenie::Battle::Basic.call(player_1, player_2, criteria)
|
20
20
|
# => {
|
21
21
|
# winner_player: "Implementation uses dependency injection for better testability",
|
22
22
|
# reasoning: "Player A's implementation demonstrates better maintainability through dependency injection,
|
@@ -27,13 +27,13 @@ result = ActiveGenie::Battle::Basic.call(player_a, player_b, criteria)
|
|
27
27
|
```
|
28
28
|
|
29
29
|
## Interface
|
30
|
-
### Basic.call(
|
31
|
-
- `
|
32
|
-
- `
|
30
|
+
### Basic.call(player_1, player_2, criteria, config: {})
|
31
|
+
- `player_1` [String, Hash] - The content or submission from the first player
|
32
|
+
- `player_2` [String, Hash] - The content or submission from the second player
|
33
33
|
- `criteria` [String] - The evaluation criteria or rules to assess against
|
34
34
|
- `config` [Hash] - Additional configuration config that modify the battle evaluation behavior
|
35
35
|
|
36
36
|
Returns a Hash containing:
|
37
|
-
- `winner_player` [String, Hash] - The winning player's content (either
|
37
|
+
- `winner_player` [String, Hash] - The winning player's content (either player_1 or player_2)
|
38
38
|
- `reasoning` [String] - Detailed explanation of why the winner was chosen
|
39
39
|
- `what_could_be_changed_to_avoid_draw` [String] - A suggestion on how to avoid a draw
|
@@ -14,117 +14,124 @@ module ActiveGenie::Battle
|
|
14
14
|
# Basic.call("Player A content", "Player B content", "Evaluate keyword usage and pattern matching")
|
15
15
|
#
|
16
16
|
class Basic
|
17
|
-
def self.call(
|
18
|
-
new(
|
17
|
+
def self.call(...)
|
18
|
+
new(...).call
|
19
19
|
end
|
20
20
|
|
21
|
-
# @param
|
22
|
-
# @param
|
21
|
+
# @param player_1 [String] The content or submission from the first player
|
22
|
+
# @param player_2 [String] The content or submission from the second player
|
23
23
|
# @param criteria [String] The evaluation criteria or rules to assess against
|
24
|
-
# @param config [Hash] Additional configuration
|
24
|
+
# @param config [Hash] Additional configuration options that modify the battle evaluation behavior
|
25
25
|
# @return [Hash] The evaluation result containing the winner and reasoning
|
26
|
-
# @return [String] :winner The
|
26
|
+
# @return [String] :winner The winner, either player_1 or player_2
|
27
27
|
# @return [String] :reasoning Detailed explanation of why the winner was chosen
|
28
28
|
# @return [String] :what_could_be_changed_to_avoid_draw A suggestion on how to avoid a draw
|
29
|
-
def initialize(
|
30
|
-
@
|
31
|
-
@
|
29
|
+
def initialize(player_1, player_2, criteria, config: {})
|
30
|
+
@player_1 = player_1
|
31
|
+
@player_2 = player_2
|
32
32
|
@criteria = criteria
|
33
|
-
@config = config
|
34
|
-
@response = nil
|
33
|
+
@config = ActiveGenie::Configuration.to_h(config)
|
35
34
|
end
|
36
35
|
|
37
36
|
def call
|
38
37
|
messages = [
|
39
38
|
{ role: 'system', content: PROMPT },
|
40
39
|
{ role: 'user', content: "criteria: #{@criteria}" },
|
41
|
-
{ role: 'user', content: "
|
42
|
-
{ role: 'user', content: "
|
40
|
+
{ role: 'user', content: "player_1: #{@player_1}" },
|
41
|
+
{ role: 'user', content: "player_2: #{@player_2}" },
|
43
42
|
]
|
44
43
|
|
45
|
-
|
46
|
-
|
47
|
-
|
44
|
+
response = ::ActiveGenie::Clients::UnifiedClient.function_calling(
|
45
|
+
messages,
|
46
|
+
FUNCTION,
|
47
|
+
model_tier: 'lower_tier',
|
48
|
+
config: @config
|
49
|
+
)
|
50
|
+
|
51
|
+
ActiveGenie::Logger.debug({
|
52
|
+
code: :battle,
|
53
|
+
player_1: @player_1[0..30],
|
54
|
+
player_2: @player_2[0..30],
|
55
|
+
criteria: @criteria[0..30],
|
56
|
+
winner: response['impartial_judge_winner'],
|
57
|
+
reasoning: response['impartial_judge_winner_reasoning']
|
58
|
+
})
|
59
|
+
|
60
|
+
response_formatted(response)
|
48
61
|
end
|
49
62
|
|
50
63
|
private
|
51
64
|
|
52
|
-
def
|
53
|
-
|
65
|
+
def response_formatted(response)
|
66
|
+
winner = response['impartial_judge_winner']
|
67
|
+
loser = case response['impartial_judge_winner']
|
68
|
+
when 'player_1' then 'player_2'
|
69
|
+
when 'player_2' then 'player_1'
|
70
|
+
end
|
54
71
|
|
55
|
-
|
56
|
-
end
|
57
|
-
|
58
|
-
def response_formatted
|
59
|
-
winner = case @response['winner']
|
60
|
-
when 'player_a' then @player_a
|
61
|
-
when 'player_b' then @player_b
|
62
|
-
end
|
63
|
-
|
64
|
-
@response.merge!('winner' => winner, 'loser' => winner ? (winner == @player_a ? @player_b : @player_a) : nil)
|
72
|
+
{ 'winner' => winner, 'loser' => loser, 'reasoning' => response['impartial_judge_winner_reasoning'] }
|
65
73
|
end
|
66
74
|
|
67
75
|
PROMPT = <<~PROMPT
|
68
|
-
|
69
|
-
|
70
|
-
Consider rules, keywords, and patterns as the criteria for evaluation. Analyze the content from both players objectively, focusing on who meets the criteria most effectively. Explain your decision clearly, with specific reasoning on how the chosen player fulfilled the criteria better than the other. Avoid selecting a draw unless absolutely necessary.
|
76
|
+
Based on two players, player_1 and player_2, they will battle against each other based on criteria. Criteria are vital as they provide a clear metric to compare the players. Follow these criteria strictly.
|
71
77
|
|
72
78
|
# Steps
|
73
|
-
1.
|
74
|
-
2.
|
75
|
-
3.
|
76
|
-
4.
|
77
|
-
5.
|
79
|
+
1. player_1 presents their strengths and how they meet the criteria. Max of 100 words.
|
80
|
+
2. player_2 presents their strengths and how they meet the criteria. Max of 100 words.
|
81
|
+
3. player_1 argues why they should be the winner compared to player_2. Max of 100 words.
|
82
|
+
4. player_2 counter-argues why they should be the winner compared to player_1. Max of 100 words.
|
83
|
+
5. The impartial judge chooses the winner.
|
78
84
|
|
79
|
-
#
|
80
|
-
-
|
81
|
-
- Input: Player A uses keyword X, follows rule Y, Player B uses keyword Z, breaks rule Y.
|
82
|
-
- Output: winner: player_a
|
83
|
-
- Justification: Player A successfully used keyword X and followed rule Y, whereas Player B broke rule Y.
|
84
|
-
|
85
|
-
- **Example 2**:
|
86
|
-
- Input: Player A matches pattern P, Player B matches pattern P, uses keyword Q.
|
87
|
-
- Output: winner: player_b
|
88
|
-
- Justification: Both matched pattern P, but Player B also used keyword Q, meeting more criteria.
|
85
|
+
# Output Format
|
86
|
+
- The impartial judge chooses this player as the winner.
|
89
87
|
|
90
88
|
# Notes
|
91
|
-
- Avoid
|
89
|
+
- Avoid resulting in a draw. Use reasoning or make fair assumptions if needed.
|
92
90
|
- Critically assess each player's adherence to the criteria.
|
93
91
|
- Clearly communicate the reasoning behind your decision.
|
94
92
|
PROMPT
|
95
93
|
|
96
94
|
FUNCTION = {
|
97
95
|
name: 'battle_evaluation',
|
98
|
-
description: 'Evaluate a battle between
|
96
|
+
description: 'Evaluate a battle between player_1 and player_2 using predefined criteria and identify the winner.',
|
99
97
|
schema: {
|
100
98
|
type: "object",
|
101
99
|
properties: {
|
102
|
-
|
100
|
+
player_1_sell_himself: {
|
103
101
|
type: 'string',
|
104
|
-
description: '
|
105
|
-
enum: ['player_a', 'player_b', 'draw']
|
102
|
+
description: 'player_1 presents their strengths and how they meet the criteria. Max of 100 words.',
|
106
103
|
},
|
107
|
-
|
104
|
+
player_2_sell_himself: {
|
108
105
|
type: 'string',
|
109
|
-
description: '
|
106
|
+
description: 'player_2 presents their strengths and how they meet the criteria. Max of 100 words.',
|
110
107
|
},
|
111
|
-
|
108
|
+
player_1_arguments: {
|
112
109
|
type: 'string',
|
113
|
-
description: '
|
114
|
-
}
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
110
|
+
description: 'player_1 arguments for why they should be the winner compared to player_2. Max of 100 words.',
|
111
|
+
},
|
112
|
+
player_2_counter: {
|
113
|
+
type: 'string',
|
114
|
+
description: 'player_2 counter arguments for why they should be the winner compared to player_1. Max of 100 words.',
|
115
|
+
},
|
116
|
+
impartial_judge_winner_reasoning: {
|
117
|
+
type: 'string',
|
118
|
+
description: 'The detailed reasoning about why the impartial judge chose the winner. Max of 100 words.',
|
119
|
+
},
|
120
|
+
impartial_judge_winner: {
|
121
|
+
type: 'string',
|
122
|
+
description: 'Who is the winner based on the impartial judge reasoning?',
|
123
|
+
enum: ['player_1', 'player_2']
|
124
|
+
},
|
125
125
|
},
|
126
|
-
|
126
|
+
required: [
|
127
|
+
'player_1_sell_himself',
|
128
|
+
'player_2_sell_himself',
|
129
|
+
'player_1_arguments',
|
130
|
+
'player_2_counter',
|
131
|
+
'impartial_judge_winner_reasoning',
|
132
|
+
'impartial_judge_winner'
|
133
|
+
]
|
127
134
|
}
|
128
|
-
|
135
|
+
}
|
129
136
|
end
|
130
137
|
end
|
data/lib/active_genie/battle.rb
CHANGED
@@ -0,0 +1,110 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'net/http'
|
3
|
+
require 'uri'
|
4
|
+
require_relative './helpers/retry'
|
5
|
+
|
6
|
+
module ActiveGenie
|
7
|
+
module Clients
|
8
|
+
# Client for interacting with the Anthropic (Claude) API with json response
|
9
|
+
class AnthropicClient
|
10
|
+
class AnthropicError < StandardError; end
|
11
|
+
class RateLimitError < AnthropicError; end
|
12
|
+
|
13
|
+
def initialize(config)
|
14
|
+
@app_config = config
|
15
|
+
end
|
16
|
+
|
17
|
+
# Requests structured JSON output from the Anthropic Claude model based on a schema.
|
18
|
+
#
|
19
|
+
# @param messages [Array<Hash>] A list of messages representing the conversation history.
|
20
|
+
# Each hash should have :role ('user', 'assistant', or 'system') and :content (String).
|
21
|
+
# Claude uses 'user', 'assistant', and 'system' roles.
|
22
|
+
# @param function [Hash] A JSON schema definition describing the desired output format.
|
23
|
+
# @param model_tier [Symbol, nil] A symbolic representation of the model quality/size tier.
|
24
|
+
# @param config [Hash] Optional configuration overrides:
|
25
|
+
# - :api_key [String] Override the default API key.
|
26
|
+
# - :model [String] Override the model name directly.
|
27
|
+
# - :max_retries [Integer] Max retries for the request.
|
28
|
+
# - :retry_delay [Integer] Initial delay for retries.
|
29
|
+
# - :anthropic_version [String] Override the default Anthropic API version.
|
30
|
+
# @return [Hash, nil] The parsed JSON object matching the schema, or nil if parsing fails or content is empty.
|
31
|
+
def function_calling(messages, function, model_tier: nil, config: {})
|
32
|
+
model = config[:runtime][:model] || @app_config.tier_to_model(model_tier)
|
33
|
+
|
34
|
+
system_message = messages.find { |m| m[:role] == 'system' }&.dig(:content) || ''
|
35
|
+
user_messages = messages.select { |m| m[:role] == 'user' || m[:role] == 'assistant' }
|
36
|
+
.map { |m| { role: m[:role], content: m[:content] } }
|
37
|
+
|
38
|
+
anthropic_function = function
|
39
|
+
anthropic_function[:input_schema] = function[:schema]
|
40
|
+
anthropic_function.delete(:schema)
|
41
|
+
|
42
|
+
payload = {
|
43
|
+
model:,
|
44
|
+
system: system_message,
|
45
|
+
messages: user_messages,
|
46
|
+
tools: [anthropic_function],
|
47
|
+
tool_choice: { name: anthropic_function[:name], type: 'tool' },
|
48
|
+
max_tokens: config[:runtime][:max_tokens],
|
49
|
+
temperature: config[:runtime][:temperature] || 0,
|
50
|
+
}
|
51
|
+
|
52
|
+
api_key = config[:runtime][:api_key] || @app_config.api_key
|
53
|
+
headers = DEFAULT_HEADERS.merge(
|
54
|
+
'x-api-key': api_key,
|
55
|
+
'anthropic-version': config[:anthropic_version] || ANTHROPIC_VERSION
|
56
|
+
).compact
|
57
|
+
|
58
|
+
retry_with_backoff(config:) do
|
59
|
+
response = request(payload, headers, config:)
|
60
|
+
content = response.dig('content', 0, 'input')
|
61
|
+
|
62
|
+
ActiveGenie::Logger.trace({code: :function_calling, payload:, parsed_response: content})
|
63
|
+
|
64
|
+
content
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
DEFAULT_HEADERS = {
|
71
|
+
'Content-Type': 'application/json',
|
72
|
+
}
|
73
|
+
ANTHROPIC_VERSION = '2023-06-01'
|
74
|
+
|
75
|
+
def request(payload, headers, config:)
|
76
|
+
start_time = Time.now
|
77
|
+
|
78
|
+
retry_with_backoff(config:) do
|
79
|
+
response = Net::HTTP.post(
|
80
|
+
URI("#{@app_config.api_url}/v1/messages"),
|
81
|
+
payload.to_json,
|
82
|
+
headers
|
83
|
+
)
|
84
|
+
|
85
|
+
if response.is_a?(Net::HTTPTooManyRequests)
|
86
|
+
raise RateLimitError, "Anthropic API rate limit exceeded: #{response.body}"
|
87
|
+
end
|
88
|
+
|
89
|
+
raise AnthropicError, response.body unless response.is_a?(Net::HTTPSuccess)
|
90
|
+
|
91
|
+
return nil if response.body.empty?
|
92
|
+
|
93
|
+
parsed_body = JSON.parse(response.body)
|
94
|
+
|
95
|
+
ActiveGenie::Logger.trace({
|
96
|
+
code: :llm_usage,
|
97
|
+
input_tokens: parsed_body.dig('usage', 'input_tokens'),
|
98
|
+
output_tokens: parsed_body.dig('usage', 'output_tokens'),
|
99
|
+
total_tokens: parsed_body.dig('usage', 'input_tokens') + parsed_body.dig('usage', 'output_tokens'),
|
100
|
+
model: payload[:model],
|
101
|
+
duration: Time.now - start_time,
|
102
|
+
usage: parsed_body.dig('usage')
|
103
|
+
})
|
104
|
+
|
105
|
+
parsed_body
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|