active_genie 0.0.24 → 0.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +35 -50
  3. data/VERSION +1 -1
  4. data/lib/active_genie/battle/README.md +5 -5
  5. data/lib/active_genie/battle/generalist.rb +132 -0
  6. data/lib/active_genie/battle.rb +6 -5
  7. data/lib/active_genie/clients/providers/anthropic_client.rb +77 -0
  8. data/lib/active_genie/clients/{base_client.rb → providers/base_client.rb} +74 -100
  9. data/lib/active_genie/clients/providers/deepseek_client.rb +91 -0
  10. data/lib/active_genie/clients/providers/google_client.rb +132 -0
  11. data/lib/active_genie/clients/providers/openai_client.rb +96 -0
  12. data/lib/active_genie/clients/unified_client.rb +42 -12
  13. data/lib/active_genie/concerns/loggable.rb +11 -23
  14. data/lib/active_genie/config/battle_config.rb +8 -0
  15. data/lib/active_genie/config/data_extractor_config.rb +23 -0
  16. data/lib/active_genie/config/llm_config.rb +36 -0
  17. data/lib/active_genie/config/log_config.rb +44 -0
  18. data/lib/active_genie/config/providers/anthropic_config.rb +57 -0
  19. data/lib/active_genie/config/providers/deepseek_config.rb +50 -0
  20. data/lib/active_genie/config/providers/google_config.rb +52 -0
  21. data/lib/active_genie/config/providers/openai_config.rb +50 -0
  22. data/lib/active_genie/config/providers/provider_base.rb +89 -0
  23. data/lib/active_genie/config/providers_config.rb +62 -0
  24. data/lib/active_genie/config/ranking_config.rb +21 -0
  25. data/lib/active_genie/config/scoring_config.rb +8 -0
  26. data/lib/active_genie/configuration.rb +51 -28
  27. data/lib/active_genie/data_extractor/README.md +13 -13
  28. data/lib/active_genie/data_extractor/from_informal.rb +54 -48
  29. data/lib/active_genie/data_extractor/generalist.md +12 -0
  30. data/lib/active_genie/data_extractor/generalist.rb +125 -0
  31. data/lib/active_genie/data_extractor.rb +7 -5
  32. data/lib/active_genie/errors/invalid_provider_error.rb +41 -0
  33. data/lib/active_genie/logger.rb +17 -66
  34. data/lib/active_genie/ranking/README.md +31 -1
  35. data/lib/active_genie/ranking/elo_round.rb +107 -104
  36. data/lib/active_genie/ranking/free_for_all.rb +78 -74
  37. data/lib/active_genie/ranking/player.rb +79 -71
  38. data/lib/active_genie/ranking/players_collection.rb +83 -71
  39. data/lib/active_genie/ranking/ranking.rb +71 -94
  40. data/lib/active_genie/ranking/ranking_scoring.rb +71 -50
  41. data/lib/active_genie/ranking.rb +2 -0
  42. data/lib/active_genie/scoring/README.md +4 -4
  43. data/lib/active_genie/scoring/generalist.rb +171 -0
  44. data/lib/active_genie/scoring/recommended_reviewers.rb +70 -71
  45. data/lib/active_genie/scoring.rb +8 -5
  46. data/lib/active_genie.rb +23 -1
  47. data/lib/tasks/benchmark.rake +10 -9
  48. data/lib/tasks/install.rake +3 -1
  49. data/lib/tasks/templates/active_genie.rb +11 -6
  50. metadata +31 -22
  51. data/lib/active_genie/battle/basic.rb +0 -129
  52. data/lib/active_genie/clients/anthropic_client.rb +0 -84
  53. data/lib/active_genie/clients/google_client.rb +0 -135
  54. data/lib/active_genie/clients/helpers/retry.rb +0 -29
  55. data/lib/active_genie/clients/openai_client.rb +0 -98
  56. data/lib/active_genie/configuration/log_config.rb +0 -14
  57. data/lib/active_genie/configuration/providers/anthropic_config.rb +0 -54
  58. data/lib/active_genie/configuration/providers/base_config.rb +0 -85
  59. data/lib/active_genie/configuration/providers/deepseek_config.rb +0 -54
  60. data/lib/active_genie/configuration/providers/google_config.rb +0 -56
  61. data/lib/active_genie/configuration/providers/internal_company_api_config.rb +0 -54
  62. data/lib/active_genie/configuration/providers/openai_config.rb +0 -54
  63. data/lib/active_genie/configuration/providers_config.rb +0 -40
  64. data/lib/active_genie/configuration/runtime_config.rb +0 -35
  65. data/lib/active_genie/data_extractor/basic.rb +0 -101
  66. data/lib/active_genie/scoring/basic.rb +0 -170
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 88cd13341dc47e212e3d0cafdf530dfe6f6360c482d15e999330e231a03fbdd3
4
- data.tar.gz: 44465211f6da6fa22815073156a4c2f99630272db3976a893c4b3653b83bd4fd
3
+ metadata.gz: 5197c43d9b0e25e91a23602e0c694eda8fe9cac3db52c5f98a93410925eb581f
4
+ data.tar.gz: d44255a41f1de2c2738486e0852cc3c5d3362d979bc6ac00e40e0bd08caed786
5
5
  SHA512:
6
- metadata.gz: 3a8f1bee6522930c86120feb3ec5a4fd0c1e38f0afae287bb5892bfbf949449500b4e186f74048d2a06563f61254f66cf1eee26bd73d0b7876d02be923dc7213
7
- data.tar.gz: f2eafe8adb628044264436b197c54e09c4a8b27af210bf62c57071bfb83e5631144bc32ac61647f1e714249b181ede1acfa4f9315516da44099ce964ace1e72a
6
+ metadata.gz: 26011be1a41293f5fb5968f345f19a32bf52f74fdb27080f07d30fac3036a6b5ac31b6bc4933f5f9a780bd6f16c529157fc4ea210cdf884f7b093e99851c641b
7
+ data.tar.gz: 1a31a9253ba70e727d54af1f1d531f90ecaee21405ed7b2989aafb972c52329e71293fc1af9b975627767da25a40a06fc928af598d8603a84c6ce1828a225e5d
data/README.md CHANGED
@@ -28,7 +28,7 @@ rails g active_genie:install
28
28
  4. Configure your credentials in `config/initializers/active_genie.rb`:
29
29
  ```ruby
30
30
  ActiveGenie.configure do |config|
31
- config.openai.api_key = ENV['OPENAI_API_KEY']
31
+ config.providers.openai.api_key = ENV['OPENAI_API_KEY']
32
32
  end
33
33
  ```
34
34
 
@@ -41,11 +41,11 @@ Extract structured data from text using AI-powered analysis, handling informal l
41
41
  ```ruby
42
42
  text = "Nike Air Max 90 - Size 42 - $199.99"
43
43
  schema = {
44
- brand: {
44
+ brand: {
45
45
  type: 'string',
46
46
  enum: ["Nike", "Adidas", "Puma"]
47
47
  },
48
- price: {
48
+ price: {
49
49
  type: 'number',
50
50
  minimum: 0
51
51
  },
@@ -59,10 +59,10 @@ schema = {
59
59
  result = ActiveGenie::DataExtractor.call(
60
60
  text,
61
61
  schema,
62
- config: { provider: :openai, model: 'gpt-4o-mini' } # optional
62
+ config: { provider: :openai, model: 'gpt-4.1-mini' } # optional
63
63
  )
64
- # => {
65
- # brand: "Nike",
64
+ # => {
65
+ # brand: "Nike",
66
66
  # brand_explanation: "Brand name found at start of text",
67
67
  # price: 199.99,
68
68
  # price_explanation: "Price found in USD format at end",
@@ -88,7 +88,7 @@ Text evaluation system that provides detailed scoring and feedback using multipl
88
88
  text = "The code implements a binary search algorithm with O(log n) complexity"
89
89
  criteria = "Evaluate technical accuracy and clarity"
90
90
 
91
- result = ActiveGenie::Scoring.basic(
91
+ result = ActiveGenie::Scoring.call(
92
92
  text,
93
93
  criteria,
94
94
  config: { provider: :anthropic, model: 'claude-3-5-haiku-20241022' } # optional
@@ -130,8 +130,8 @@ result = ActiveGenie::Battle.call(
130
130
  )
131
131
  # => {
132
132
  # winner_player: "Implementation uses dependency injection for better testability",
133
- # reasoning: "Player 1 implementation demonstrates better maintainability through dependency injection,
134
- # which allows for easier testing and component replacement. While Player 2 has good test coverage,
133
+ # reasoning: "Player 1 implementation demonstrates better maintainability through dependency injection,
134
+ # which allows for easier testing and component replacement. While Player 2 has good test coverage,
135
135
  # the tight coupling makes the code harder to maintain and modify.",
136
136
  # what_could_be_changed_to_avoid_draw: "Focus on specific architectural patterns and design principles"
137
137
  # }
@@ -221,56 +221,41 @@ See the [Benchmark README](benchmark/README.md) for detailed results, methodolog
221
221
  ActiveGenie supports adding custom providers to integrate with different LLM services. To create a new provider:
222
222
 
223
223
  1. Create a configuration class for your provider in `lib/active_genie/configuration/providers/`:
224
+ 2. Register your client
224
225
 
225
226
  ```ruby
226
- # Example: lib/active_genie/configuration/providers/internal_company_api_config.rb
227
- module ActiveGenie
228
- module Configuration::Providers
229
- class InternalCompanyApiConfig < BaseConfig
230
- NAME = :internal_company_api
231
-
232
- # API key accessor with environment variable fallback
233
- def api_key
234
- @api_key || ENV['INTERNAL_COMPANY_API_KEY']
235
- end
236
-
237
- # Base API URL
238
- def api_url
239
- @api_url || 'https://api.internal-company.com/v1'
240
- end
241
-
242
- # Client instantiation
243
- def client
244
- @client ||= ::ActiveGenie::Clients::InternalCompanyApiClient.new(self)
245
- end
246
-
247
- # Model tier definitions
248
- def lower_tier_model
249
- @lower_tier_model || 'internal-basic'
250
- end
251
-
252
- def middle_tier_model
253
- @middle_tier_model || 'internal-standard'
254
- end
255
-
256
- def upper_tier_model
257
- @upper_tier_model || 'internal-premium'
258
- end
259
- end
227
+ class InternalCompanyApi
228
+ # @param messages [Array<Hash>] A list of messages representing the conversation history.
229
+ # Each hash should have :role ('user', 'assistant', or 'system') and :content (String).
230
+ # @param function [Hash] A JSON schema definition describing the desired output format.
231
+ # @return [Hash, nil] The parsed JSON object matching the schema, or nil if parsing fails or content is empty.
232
+ def function_calling(messages, function)
233
+ # ...
260
234
  end
261
235
  end
236
+
237
+ ActiveGenie.configure do |config|
238
+ config.llm.client = InternalCompanyApi
239
+ end
240
+ # or
241
+ ActiveGenie::Battle.call('player_1', 'player_2', 'criteria', { client: InternalCompanyApi })
262
242
  ```
263
243
 
264
- 2. Register your provider in your configuration:
244
+ ## Observability
245
+ Fundamental to managing any production system, observability is crucial for GenAI features. At a minimum, track these key metrics:
246
+
247
+ - Usage Rate (e.g., uses_per_minute): Detect anomalies like sudden traffic spikes (potential DDoS) or drops (feature outage or declining usage).
248
+ - Failure/Retry Rate (e.g., retry_count, fail_count): Monitor the frequency of errors. Exceeding a defined threshold should trigger downtime or degradation alerts.
249
+ - Token Consumption (e.g., tokens_used): Track usage to monitor costs. Set alerts if tokens_used * price_per_token exceeds budget thresholds.
265
250
 
266
251
  ```ruby
267
- # In config/initializers/active_genie.rb
268
252
  ActiveGenie.configure do |config|
269
- # Register your custom provider
270
- config.providers.register(InternalCompanyApi::Configuration)
271
-
272
- # Configure your provider
273
- config.internal_company_api.api_key = ENV['INTERNAL_COMPANY_API_KEY']
253
+ config.log.add_observer(scope: { code: :llm_usage }) do |log|
254
+ puts "LLM Usage: #{log[:model]} - #{log[:total_tokens]} tokens"
255
+ end
256
+ config.log.add_observer(scope: { code: :retry_attempt }) do |log|
257
+ puts "Retry Attempt: #{log[:attempt]} of #{log[:max_retries]}"
258
+ end
274
259
  end
275
260
  ```
276
261
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.24
1
+ 0.0.25
@@ -16,18 +16,18 @@ player_1 = "Implementation uses dependency injection for better testability"
16
16
  player_2 = "Code has high test coverage but tightly coupled components"
17
17
  criteria = "Evaluate code quality and maintainability"
18
18
 
19
- result = ActiveGenie::Battle::Basic.call(player_1, player_2, criteria)
19
+ result = ActiveGenie::Battle.call(player_1, player_2, criteria)
20
20
  # => {
21
21
  # winner_player: "Implementation uses dependency injection for better testability",
22
- # reasoning: "Player A's implementation demonstrates better maintainability through dependency injection,
23
- # which allows for easier testing and component replacement. While Player B has good test coverage,
22
+ # reasoning: "Player A's implementation demonstrates better maintainability through dependency injection,
23
+ # which allows for easier testing and component replacement. While Player B has good test coverage,
24
24
  # the tight coupling makes the code harder to maintain and modify.",
25
25
  # what_could_be_changed_to_avoid_draw: "Focus on specific architectural patterns and design principles"
26
26
  # }
27
27
  ```
28
28
 
29
29
  ## Interface
30
- ### Basic.call(player_1, player_2, criteria, config: {})
30
+ ### .call(player_1, player_2, criteria, config: {})
31
31
  - `player_1` [String, Hash] - The content or submission from the first player
32
32
  - `player_2` [String, Hash] - The content or submission from the second player
33
33
  - `criteria` [String] - The evaluation criteria or rules to assess against
@@ -36,4 +36,4 @@ result = ActiveGenie::Battle::Basic.call(player_1, player_2, criteria)
36
36
  Returns a Hash containing:
37
37
  - `winner_player` [String, Hash] - The winning player's content (either player_1 or player_2)
38
38
  - `reasoning` [String] - Detailed explanation of why the winner was chosen
39
- - `what_could_be_changed_to_avoid_draw` [String] - A suggestion on how to avoid a draw
39
+ - `what_could_be_changed_to_avoid_draw` [String] - A suggestion on how to avoid a draw
@@ -0,0 +1,132 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../clients/unified_client'
4
+
5
+ module ActiveGenie
6
+ module Battle
7
+ # The Generalist class provides a foundation for evaluating battles between two players
8
+ # using AI-powered evaluation. It determines a winner based on specified criteria,
9
+ # analyzing how well each player meets the requirements.
10
+ #
11
+ # The battle evaluation process compares two players' content against given criteria
12
+ # and returns detailed feedback including the winner and reasoning for the decision.
13
+ #
14
+ # @example Generalist usage with two players and criteria
15
+ # Generalist.call("Player A content", "Player B content", "Evaluate keyword usage and pattern matching")
16
+ #
17
+ class Generalist
18
+ def self.call(...)
19
+ new(...).call
20
+ end
21
+
22
+ # @param player_1 [String] The content or submission from the first player
23
+ # @param player_2 [String] The content or submission from the second player
24
+ # @param criteria [String] The evaluation criteria or rules to assess against
25
+ # @param config [Hash] Additional configuration options that modify the battle evaluation behavior
26
+ # @return [Hash] The evaluation result containing the winner and reasoning
27
+ # @return [String] :winner The winner, either player_1 or player_2
28
+ # @return [String] :reasoning Detailed explanation of why the winner was chosen
29
+ # @return [String] :what_could_be_changed_to_avoid_draw A suggestion on how to avoid a draw
30
+ def initialize(player_1, player_2, criteria, config: {})
31
+ @player_1 = player_1
32
+ @player_2 = player_2
33
+ @criteria = criteria
34
+ @config = ActiveGenie.configuration.merge(config)
35
+ end
36
+
37
+ def call
38
+ messages = [
39
+ { role: 'system', content: PROMPT },
40
+ { role: 'user', content: "criteria: #{@criteria}" },
41
+ { role: 'user', content: "player_1: #{@player_1}" },
42
+ { role: 'user', content: "player_2: #{@player_2}" }
43
+ ]
44
+
45
+ response = ::ActiveGenie::Clients::UnifiedClient.function_calling(
46
+ messages,
47
+ FUNCTION,
48
+ config: @config
49
+ )
50
+
51
+ ActiveGenie::Logger.call({
52
+ code: :battle,
53
+ player_1: @player_1[0..30],
54
+ player_2: @player_2[0..30],
55
+ criteria: @criteria[0..30],
56
+ winner: response['impartial_judge_winner'],
57
+ reasoning: response['impartial_judge_winner_reasoning']
58
+ })
59
+
60
+ response_formatted(response)
61
+ end
62
+
63
+ private
64
+
65
+ def response_formatted(response)
66
+ winner = response['impartial_judge_winner']
67
+ loser = case response['impartial_judge_winner']
68
+ when 'player_1' then 'player_2'
69
+ when 'player_2' then 'player_1'
70
+ end
71
+
72
+ { 'winner' => winner, 'loser' => loser, 'reasoning' => response['impartial_judge_winner_reasoning'] }
73
+ end
74
+
75
+ PROMPT = <<~PROMPT
76
+ Based on two players, player_1 and player_2, they will battle against each other based on criteria. Criteria are vital as they provide a clear metric to compare the players. Follow these criteria strictly.
77
+
78
+ # Steps
79
+ 1. player_1 presents their strengths and how they meet the criteria. Max of 100 words.
80
+ 2. player_2 presents their strengths and how they meet the criteria. Max of 100 words.
81
+ 3. player_1 argues why they should be the winner compared to player_2. Max of 100 words.
82
+ 4. player_2 counter-argues why they should be the winner compared to player_1. Max of 100 words.
83
+ 5. The impartial judge chooses the winner.
84
+
85
+ # Output Format
86
+ - The impartial judge chooses this player as the winner.
87
+
88
+ # Notes
89
+ - Avoid resulting in a draw. Use reasoning or make fair assumptions if needed.
90
+ - Critically assess each player's adherence to the criteria.
91
+ - Clearly communicate the reasoning behind your decision.
92
+ PROMPT
93
+
94
+ FUNCTION = {
95
+ name: 'battle_evaluation',
96
+ description: 'Evaluate a battle between player_1 and player_2 using predefined criteria and identify the winner.',
97
+ parameters: {
98
+ type: 'object',
99
+ properties: {
100
+ player_1_sell_himself: {
101
+ type: 'string',
102
+ description: 'player_1 presents their strengths and how they meet the criteria. Max of 100 words.'
103
+ },
104
+ player_2_sell_himself: {
105
+ type: 'string',
106
+ description: 'player_2 presents their strengths and how they meet the criteria. Max of 100 words.'
107
+ },
108
+ player_1_arguments: {
109
+ type: 'string',
110
+ description: 'player_1 arguments for why they should be the winner compared to player_2. Max of 100 words.'
111
+ },
112
+ player_2_counter: {
113
+ type: 'string',
114
+ description: 'player_2 counter arguments for why they should be the winner compared to player_1. Max of 100 words.'
115
+ },
116
+ impartial_judge_winner_reasoning: {
117
+ type: 'string',
118
+ description: 'The detailed reasoning about why the impartial judge chose the winner. Max of 100 words.'
119
+ },
120
+ impartial_judge_winner: {
121
+ type: 'string',
122
+ description: 'Who is the winner based on the impartial judge reasoning?',
123
+ enum: %w[player_1 player_2]
124
+ }
125
+ },
126
+ required: %w[player_1_sell_himself player_2_sell_himself player_1_arguments player_2_counter
127
+ impartial_judge_winner_reasoning impartial_judge_winner]
128
+ }
129
+ }.freeze
130
+ end
131
+ end
132
+ end
@@ -1,17 +1,18 @@
1
+ # frozen_string_literal: true
1
2
 
2
- require_relative 'battle/basic'
3
+ require_relative 'battle/generalist'
3
4
 
4
5
  module ActiveGenie
5
6
  # See the [Battle README](lib/active_genie/battle/README.md) for more information.
6
7
  module Battle
7
8
  module_function
8
9
 
9
- def basic(...)
10
- Basic.call(...)
10
+ def call(...)
11
+ Generalist.call(...)
11
12
  end
12
13
 
13
- def call(...)
14
- Basic.call(...)
14
+ def generalist(...)
15
+ Generalist.call(...)
15
16
  end
16
17
  end
17
18
  end
@@ -0,0 +1,77 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'json'
4
+ require 'net/http'
5
+ require 'uri'
6
+ require_relative './base_client'
7
+
8
+ module ActiveGenie
9
+ module Clients
10
+ # Client for interacting with the Anthropic (Claude) API with json response
11
+ class AnthropicClient < BaseClient
12
+ class AnthropicError < ClientError; end
13
+ class RateLimitError < AnthropicError; end
14
+
15
+ ANTHROPIC_ENDPOINT = '/v1/messages'
16
+
17
+ # Requests structured JSON output from the Anthropic Claude model based on a schema.
18
+ #
19
+ # @param messages [Array<Hash>] A list of messages representing the conversation history.
20
+ # Each hash should have :role ('user', 'assistant', or 'system') and :content (String).
21
+ # Claude uses 'user', 'assistant', and 'system' roles.
22
+ # @param function [Hash] A JSON schema definition describing the desired output format.
23
+ # @return [Hash, nil] The parsed JSON object matching the schema, or nil if parsing fails or content is empty.
24
+ def function_calling(messages, function)
25
+ model = @config.llm.model || @config.providers.anthropic.tier_to_model(@config.llm.model_tier)
26
+
27
+ system_message = messages.find { |m| m[:role] == 'system' }&.dig(:content) || ''
28
+ user_messages = messages.select { |m| %w[user assistant].include?(m[:role]) }
29
+ .map { |m| { role: m[:role], content: m[:content] } }
30
+
31
+ anthropic_function = function.dup
32
+ anthropic_function[:input_schema] = function[:parameters]
33
+ anthropic_function.delete(:parameters)
34
+
35
+ payload = {
36
+ model:,
37
+ system: system_message,
38
+ messages: user_messages,
39
+ tools: [anthropic_function],
40
+ tool_choice: { name: anthropic_function[:name], type: 'tool' },
41
+ max_tokens: @config.llm.max_tokens,
42
+ temperature: @config.llm.temperature || 0
43
+ }
44
+
45
+ headers = {
46
+ 'x-api-key': @config.providers.anthropic.api_key,
47
+ 'anthropic-version': @config.providers.anthropic.anthropic_version
48
+ }.compact
49
+
50
+ retry_with_backoff do
51
+ start_time = Time.now
52
+ url = "#{@config.providers.anthropic.api_url}#{ANTHROPIC_ENDPOINT}"
53
+
54
+ response = post(url, payload, headers: headers)
55
+
56
+ content = response.dig('content', 0, 'input')
57
+
58
+ ActiveGenie::Logger.call({
59
+ code: :llm_usage,
60
+ input_tokens: response.dig('usage', 'input_tokens'),
61
+ output_tokens: response.dig('usage', 'output_tokens'),
62
+ total_tokens: response.dig('usage',
63
+ 'input_tokens') + response.dig('usage',
64
+ 'output_tokens'),
65
+ model: payload[:model],
66
+ duration: Time.now - start_time,
67
+ usage: response['usage']
68
+ })
69
+
70
+ ActiveGenie::Logger.call({ code: :function_calling, payload:, parsed_response: content })
71
+
72
+ content
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end