hyrum 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,12 @@
1
+ <% if validation_result && show_scores -%>
2
+ # Quality Score: <%= validation_result.score %>/100
3
+ # - Semantic similarity: <%= validation_result.semantic_similarity %>% (variations preserve meaning)
4
+ # - Lexical diversity: <%= validation_result.lexical_diversity %>% (variation in wording)
5
+ <% validation_result.warnings.each do |warning| -%>
6
+ # Warning: <%= warning %>
7
+ <% end -%>
8
+ #
9
+ <% end -%>
1
10
  # frozen_string_literal: true
2
11
 
3
12
  module Messages
@@ -1,3 +1,12 @@
1
+ <% if validation_result && show_scores -%>
2
+ Quality Score: <%= validation_result.score %>/100
3
+ - Semantic similarity: <%= validation_result.semantic_similarity %>% (variations preserve meaning)
4
+ - Lexical diversity: <%= validation_result.lexical_diversity %>% (variation in wording)
5
+ <% validation_result.warnings.each do |warning| -%>
6
+ Warning: <%= warning %>
7
+ <% end -%>
8
+
9
+ <% end -%>
1
10
  <% messages.each do |key, values| -%>
2
11
  Messages for <%= key %>:
3
12
  <% values.each do |msg| -%>
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ruby_llm'
4
+
5
+ module Hyrum
6
+ module Generators
7
+ class AiGenerator
8
+ API_KEY_ENV_VARS = {
9
+ openai: 'OPENAI_API_KEY',
10
+ anthropic: 'ANTHROPIC_API_KEY',
11
+ gemini: 'GEMINI_API_KEY',
12
+ ollama: 'OLLAMA_API_BASE',
13
+ vertexai: 'GOOGLE_CLOUD_PROJECT',
14
+ bedrock: 'AWS_ACCESS_KEY_ID',
15
+ deepseek: 'DEEPSEEK_API_KEY',
16
+ mistral: 'MISTRAL_API_KEY',
17
+ perplexity: 'PERPLEXITY_API_KEY',
18
+ openrouter: 'OPENROUTER_API_KEY',
19
+ gpustack: 'GPUSTACK_API_KEY'
20
+ }.freeze
21
+
22
+ attr_reader :options
23
+
24
+ def initialize(options)
25
+ @options = options
26
+ end
27
+
28
+ def generate
29
+ response = chat.ask(prompt)
30
+ puts "AI response: #{response.inspect}" if options[:verbose]
31
+
32
+ # Prepend the original message to the generated variations
33
+ # RubyLLM returns string keys, but our options use symbols
34
+ result = response.content.dup
35
+ key_str = options[:key].to_s
36
+ if result[key_str].is_a?(Array)
37
+ result[key_str] = [options[:message]] + result[key_str]
38
+ end
39
+
40
+ # Convert string keys to symbols for consistency with the rest of hyrum
41
+ result.transform_keys(&:to_sym)
42
+ rescue RubyLLM::ConfigurationError => e
43
+ handle_configuration_error(e)
44
+ rescue RubyLLM::Error => e
45
+ handle_general_error(e)
46
+ end
47
+
48
+ private
49
+
50
+ def chat
51
+ @chat ||= RubyLLM.chat(
52
+ model: options[:ai_model].to_s,
53
+ provider: options[:ai_service]
54
+ ).with_schema(response_schema)
55
+ end
56
+
57
+ def prompt
58
+ <<~PROMPT
59
+ Please provide #{options[:number]} alternative status messages for the following message:
60
+ "#{options[:message]}"
61
+
62
+ The messages should be unique and informative.
63
+ PROMPT
64
+ end
65
+
66
+ # rubocop:disable Metrics/MethodLength
67
+ def response_schema
68
+ {
69
+ type: 'object',
70
+ properties: {
71
+ options[:key] => {
72
+ type: 'array',
73
+ items: { type: 'string' },
74
+ minItems: options[:number],
75
+ maxItems: options[:number]
76
+ }
77
+ },
78
+ required: [options[:key].to_s],
79
+ additionalProperties: false
80
+ }
81
+ end
82
+ # rubocop:enable Metrics/MethodLength
83
+
84
+ def handle_configuration_error(error)
85
+ puts "Configuration Error: #{error.message}"
86
+ puts "Please set the required API key for #{options[:ai_service]}."
87
+ puts "Example: export #{api_key_env_var_name}=your-key-here"
88
+ exit 1
89
+ end
90
+
91
+ def handle_general_error(error)
92
+ puts "Error: #{error.message}"
93
+ puts 'Please check your configuration and try again.'
94
+ exit 1
95
+ end
96
+
97
+ def api_key_env_var_name
98
+ API_KEY_ENV_VARS.fetch(options[:ai_service], "#{options[:ai_service].to_s.upcase}_API_KEY")
99
+ end
100
+ end
101
+ end
102
+ end
@@ -3,48 +3,36 @@
3
3
  module Hyrum
4
4
  module Generators
5
5
  class FakeGenerator
6
- FAKE_MESSAGES = %(
7
- {
8
- "e404": [
9
- "We couldn't locate the resource you were looking for.",
10
- "The resource you requested is not available at this time.",
11
- "Unfortunately, we were unable to find the specified resource.",
12
- "It seems the resource you're searching for does not exist.",
13
- "The item you are trying to access is currently missing."
14
- ],
15
- "e418": [
16
- "I'm a teapot",
17
- "The server refuses the attempt to brew coffee with a teapot",
18
- "Coffee brewing denied: a teapot is not suitable for this operation.",
19
- "Request failed: the server cannot process coffee with a teapot.",
20
- "Brewing error: teapots are incompatible with coffee preparation.",
21
- "Action halted: using a teapot to brew coffee is not permitted.",
22
- "Invalid request: please use a coffee maker instead of a teapot."
23
- ],
24
- "e500": [
25
- "Internal Server Error",
26
- "An unexpected condition was encountered"
27
- ],
28
- "e503": [
29
- "Service Unavailable",
30
- "The server is currently unavailable"
31
- ],
32
- "e504": [
33
- "Gateway Timeout",
34
- "The server is currently unavailable"
35
- ]
36
- }
37
- )
6
+ DATA_FILE = File.expand_path('../data/fake_messages.json', __dir__)
38
7
 
39
8
  attr_reader :options
40
9
 
41
10
  def initialize(options)
42
11
  @options = options
43
- # @ai_service = options[:ai_service]
44
12
  end
45
13
 
46
14
  def generate
47
- JSON.parse(FAKE_MESSAGES)
15
+ messages = load_messages
16
+ key = options[:key]&.to_s&.downcase
17
+ number = (options[:number] || 1).to_i
18
+
19
+ return messages unless key
20
+
21
+ key_with_prefix = key.start_with?('e') ? key : "e#{key}"
22
+ available_messages = messages[key_with_prefix] || []
23
+ selected_messages = available_messages.sample([number, available_messages.length].min)
24
+
25
+ # Prepend the original message if provided
26
+ selected_messages = [options[:message]] + selected_messages if options[:message]
27
+
28
+ # Return as a hash to match expected format
29
+ { options[:key] => selected_messages }
30
+ end
31
+
32
+ private
33
+
34
+ def load_messages
35
+ JSON.parse(File.read(DATA_FILE))
48
36
  end
49
37
  end
50
38
  end
@@ -2,27 +2,37 @@
2
2
 
3
3
  module Hyrum
4
4
  module Generators
5
- AI_SERVICES = %i[openai ollama fake].freeze
5
+ AI_SERVICES = %i[
6
+ openai anthropic gemini ollama mistral deepseek
7
+ perplexity openrouter vertexai bedrock gpustack fake
8
+ ].freeze
6
9
 
7
10
  AI_MODEL_DEFAULTS = {
8
11
  openai: :'gpt-4o-mini',
12
+ anthropic: :'claude-haiku-20250514',
13
+ gemini: :'gemini-2.0-flash-exp',
9
14
  ollama: :llama3,
15
+ mistral: :'mistral-small-latest',
16
+ deepseek: :'deepseek-chat',
17
+ perplexity: :'llama-3.1-sonar-small-128k-online',
18
+ openrouter: :'openai/gpt-4o-mini',
19
+ vertexai: :'gemini-2.0-flash-exp',
20
+ bedrock: :'anthropic.claude-3-haiku-20240307-v1:0',
21
+ gpustack: :llama3,
10
22
  fake: :fake
11
23
  }.freeze
12
24
 
13
25
  GENERATOR_CLASSES = {
14
- openai: OpenaiGenerator,
15
- ollama: OpenaiGenerator,
16
26
  fake: FakeGenerator
27
+ # All other providers default to AiGenerator
17
28
  }.freeze
18
29
 
19
30
  class MessageGenerator
20
31
  def self.create(options)
21
- unless GENERATOR_CLASSES.key?(options[:ai_service].to_sym)
22
- raise ArgumentError, "Invalid AI service: #{options[:ai_service]}"
23
- end
32
+ service = options[:ai_service].to_sym
24
33
 
25
- generator_class = GENERATOR_CLASSES[options[:ai_service].to_sym]
34
+ # Get generator class, defaulting to AiGenerator for unlisted services
35
+ generator_class = GENERATOR_CLASSES.fetch(service, AiGenerator)
26
36
  generator_class.new(options)
27
37
  end
28
38
  end
@@ -11,10 +11,17 @@ module Hyrum
11
11
  attr_reader :options
12
12
 
13
13
  def initialize(args)
14
- @options = {}
14
+ @options = {
15
+ message: nil,
16
+ validate: false,
17
+ min_quality: 70,
18
+ strict: false,
19
+ show_scores: false
20
+ }
15
21
  @args = args
16
22
  end
17
23
 
24
+ # rubocop:disable Metrics/MethodLength
18
25
  def parse
19
26
  OptionParser.new do |parser|
20
27
  define_options(parser)
@@ -24,12 +31,13 @@ module Hyrum
24
31
  set_dynamic_defaults
25
32
  options
26
33
  rescue OptionParser::InvalidOption => e
27
- raise ScriptOptionsError.new("Invalid option: #{e.message}")
34
+ raise ScriptOptionsError, "Invalid option: #{e.message}"
28
35
  rescue OptionParser::MissingArgument => e
29
- raise ScriptOptionsError.new("Missing argument for option: #{e.message}")
36
+ raise ScriptOptionsError, "Missing argument for option: #{e.message}"
30
37
  rescue OptionParser::InvalidArgument => e
31
- raise ScriptOptionsError.new("Invalid argument for option: #{e.message}")
38
+ raise ScriptOptionsError, "Invalid argument for option: #{e.message}"
32
39
  end
40
+ # rubocop:enable Metrics/MethodLength
33
41
 
34
42
  private
35
43
 
@@ -39,10 +47,10 @@ module Hyrum
39
47
  end
40
48
 
41
49
  def enforce_mandatory_options
42
- missing = MANDATORY_OPTIONS.select { |param| options[param].nil? }
43
- return if missing.empty?
50
+ return unless options[:ai_service] != :fake
44
51
 
45
- raise OptionParser::MissingArgument, missing.join(', ')
52
+ missing = MANDATORY_OPTIONS.select { |param| options[param].nil? }
53
+ raise OptionParser::MissingArgument, missing.join(', ') unless missing.empty?
46
54
  end
47
55
 
48
56
  def define_options(parser)
@@ -52,7 +60,9 @@ module Hyrum
52
60
  format_options(parser)
53
61
  message_options(parser)
54
62
  message_key_options(parser)
63
+ number_options(parser)
55
64
  ai_service_options(parser)
65
+ validation_options(parser)
56
66
  on_tail_options(parser)
57
67
  end
58
68
 
@@ -83,21 +93,19 @@ module Hyrum
83
93
  end
84
94
 
85
95
  def message_key_options(parser)
86
- options[:key] = :status
87
-
88
96
  parser.on('-k KEY', '--key KEY', 'Message key (default: status)') do |key|
89
97
  options[:key] = key.to_sym
90
98
  end
91
99
  end
92
100
 
93
101
  def message_options(parser)
94
- parser.on('-m MESSAGE', '--message MESSAGE', 'Status message (required)') do |message|
102
+ parser.on('-m MESSAGE', '--message MESSAGE', 'Status message (required unless fake)') do |message|
95
103
  options[:message] = message
96
104
  end
105
+ end
97
106
 
98
- options[:number] = 5
99
-
100
- parser.on('-n NUMBER', '--number NUMBER', Integer, 'Number of messages to generate (default: 5)',) do |number|
107
+ def number_options(parser)
108
+ parser.on('-n NUMBER', '--number NUMBER', Integer, 'Number of messages to generate (default: 5)') do |number|
101
109
  options[:number] = number.to_i
102
110
  end
103
111
  end
@@ -109,14 +117,30 @@ module Hyrum
109
117
  end
110
118
 
111
119
  def format_options(parser)
112
- options[:format] = :text
113
-
114
120
  formats = Formats::FORMATS
115
121
  description = 'Output format. Supported formats are:'
116
122
  supported = formats.join(', ')
117
- parser.on('-f FORMAT', '--format FORMAT', formats, description, supported, "(default: text)") do |format|
123
+ parser.on('-f FORMAT', '--format FORMAT', formats, description, supported, '(default: text)') do |format|
118
124
  options[:format] = format
119
125
  end
120
126
  end
127
+
128
+ def validation_options(parser)
129
+ parser.on('--validate', 'Enable quality validation (default: off)') do
130
+ options[:validate] = true
131
+ end
132
+
133
+ parser.on('--min-quality SCORE', Integer, 'Minimum quality score 0-100 (default: 70)') do |score|
134
+ options[:min_quality] = score
135
+ end
136
+
137
+ parser.on('--strict', 'Fail on quality issues instead of warning (default: false)') do
138
+ options[:strict] = true
139
+ end
140
+
141
+ parser.on('--show-scores', 'Include quality metrics in output (default: false)') do
142
+ options[:show_scores] = true
143
+ end
144
+ end
121
145
  end
122
146
  end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'set'
4
+
5
+ module Hyrum
6
+ module Validators
7
+ class LexicalDiversity
8
+ attr_reader :variations
9
+
10
+ def initialize(variations)
11
+ @variations = variations
12
+ end
13
+
14
+ def calculate
15
+ return 0.0 if variations.empty? || variations.size == 1
16
+
17
+ # Calculate average pairwise Jaccard distance
18
+ distances = []
19
+ variations.combination(2).each do |var1, var2|
20
+ distances << jaccard_distance(tokenize(var1), tokenize(var2))
21
+ end
22
+
23
+ # Convert to percentage (0-100)
24
+ (distances.sum / distances.size * 100).round(2)
25
+ end
26
+
27
+ private
28
+
29
+ def tokenize(text)
30
+ # Convert to lowercase and split into words, removing punctuation
31
+ text.downcase.scan(/\w+/).to_set
32
+ end
33
+
34
+ def jaccard_distance(set1, set2)
35
+ # Jaccard distance = 1 - Jaccard similarity
36
+ # Jaccard similarity = intersection / union
37
+ return 1.0 if set1.empty? && set2.empty?
38
+ return 1.0 if set1.union(set2).empty?
39
+
40
+ intersection = set1.intersection(set2).size.to_f
41
+ union = set1.union(set2).size.to_f
42
+ 1.0 - (intersection / union)
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,107 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Hyrum
4
+ module Validators
5
+ class QualityValidator
6
+ DIVERSITY_WEIGHT = 0.5
7
+ SIMILARITY_WEIGHT = 0.5
8
+ MIN_DIVERSITY_THRESHOLD = 30.0
9
+ MIN_SIMILARITY_THRESHOLD = 85.0
10
+
11
+ attr_reader :original_message, :messages, :options
12
+
13
+ def initialize(original_message, messages, options)
14
+ @original_message = original_message
15
+ @messages = messages
16
+ @options = options
17
+ end
18
+
19
+ def validate
20
+ return empty_result if messages.empty?
21
+
22
+ all_variations = messages.values.flatten
23
+ return single_variation_result if all_variations.size <= 1
24
+
25
+ semantic_score = calculate_semantic_similarity(all_variations)
26
+ lexical_score = calculate_lexical_diversity(all_variations)
27
+
28
+ overall_score = (semantic_score * SIMILARITY_WEIGHT) + (lexical_score * DIVERSITY_WEIGHT)
29
+
30
+ passed = overall_score >= options[:min_quality] &&
31
+ lexical_score >= MIN_DIVERSITY_THRESHOLD &&
32
+ semantic_score >= MIN_SIMILARITY_THRESHOLD
33
+
34
+ warnings = build_warnings(semantic_score, lexical_score)
35
+
36
+ ValidationResult.new(
37
+ score: overall_score.round(2),
38
+ semantic_similarity: semantic_score.round(2),
39
+ lexical_diversity: lexical_score.round(2),
40
+ passed: passed,
41
+ details: {
42
+ min_quality_threshold: options[:min_quality],
43
+ variation_count: all_variations.size
44
+ },
45
+ warnings: warnings
46
+ )
47
+ end
48
+
49
+ private
50
+
51
+ def calculate_semantic_similarity(variations)
52
+ calculator = SemanticSimilarity.new(
53
+ original_message,
54
+ variations,
55
+ options[:ai_service],
56
+ options[:ai_model]
57
+ )
58
+ calculator.calculate
59
+ rescue StandardError => e
60
+ # Fall back to 100% on error (assume semantic similarity is good)
61
+ warn "Semantic similarity calculation failed: #{e.message}"
62
+ 100.0
63
+ end
64
+
65
+ def calculate_lexical_diversity(variations)
66
+ calculator = LexicalDiversity.new(variations)
67
+ calculator.calculate
68
+ end
69
+
70
+ def build_warnings(semantic_score, lexical_score)
71
+ warnings = []
72
+
73
+ if lexical_score < MIN_DIVERSITY_THRESHOLD
74
+ warnings << "Low lexical diversity (#{lexical_score.round(2)}%). Variations may be too similar."
75
+ end
76
+
77
+ if semantic_score < MIN_SIMILARITY_THRESHOLD
78
+ warnings << "Low semantic similarity (#{semantic_score.round(2)}%). Variations may have different meanings."
79
+ end
80
+
81
+ warnings
82
+ end
83
+
84
+ def empty_result
85
+ ValidationResult.new(
86
+ score: 0.0,
87
+ semantic_similarity: 0.0,
88
+ lexical_diversity: 0.0,
89
+ passed: true,
90
+ details: { variation_count: 0 },
91
+ warnings: ['No variations to validate']
92
+ )
93
+ end
94
+
95
+ def single_variation_result
96
+ ValidationResult.new(
97
+ score: 0.0,
98
+ semantic_similarity: 0.0,
99
+ lexical_diversity: 0.0,
100
+ passed: true,
101
+ details: { variation_count: messages.values.flatten.size },
102
+ warnings: ['Only one variation - nothing to compare']
103
+ )
104
+ end
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,100 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'matrix'
4
+ require 'set'
5
+
6
+ module Hyrum
7
+ module Validators
8
+ class SemanticSimilarity
9
+ attr_reader :original_message, :variations, :ai_service, :ai_model
10
+
11
+ def initialize(original_message, variations, ai_service, ai_model)
12
+ @original_message = original_message
13
+ @variations = variations
14
+ @ai_service = ai_service
15
+ @ai_model = ai_model
16
+ end
17
+
18
+ def calculate
19
+ return 100.0 if variations.empty?
20
+
21
+ if supports_embeddings?
22
+ calculate_with_embeddings
23
+ else
24
+ calculate_with_fallback
25
+ end
26
+ end
27
+
28
+ def supports_embeddings?
29
+ # Check if RubyLLM has any embedding models available in the current registry
30
+ # User is responsible for calling RubyLLM.models.refresh! if needed
31
+ RubyLLM.models.embedding_models.any?
32
+ rescue StandardError
33
+ # If we can't check the registry, assume embeddings aren't available
34
+ false
35
+ end
36
+
37
+ private
38
+
39
+ def calculate_with_embeddings
40
+ # Batch all texts together for efficient API call
41
+ all_texts = [original_message] + variations
42
+ all_embeddings = get_embeddings(all_texts)
43
+
44
+ # First embedding is the original, rest are variations
45
+ original_embedding = all_embeddings.first
46
+ variation_embeddings = all_embeddings[1..]
47
+
48
+ # Compare each variation to the original message
49
+ similarities = variation_embeddings.map do |var_embedding|
50
+ cosine_similarity(original_embedding, var_embedding)
51
+ end
52
+
53
+ # Convert to percentage (0-100)
54
+ (similarities.sum / similarities.size * 100).round(2)
55
+ end
56
+
57
+ def calculate_with_fallback
58
+ # Simple word overlap heuristic when embeddings not available
59
+ original_words = original_message.downcase.scan(/\w+/).to_set
60
+
61
+ # Compare each variation to the original message
62
+ similarities = variations.map do |variation|
63
+ var_words = variation.downcase.scan(/\w+/).to_set
64
+ intersection = original_words.intersection(var_words).size.to_f
65
+ union = original_words.union(var_words).size.to_f
66
+ union.zero? ? 1.0 : intersection / union
67
+ end
68
+
69
+ (similarities.sum / similarities.size * 100).round(2)
70
+ end
71
+
72
+ def get_embeddings(texts)
73
+ # Use RubyLLM.embed with user's configured default embedding model
74
+ # Works with any provider (OpenAI, Google, Anthropic, etc.)
75
+ result = RubyLLM.embed(texts)
76
+
77
+ # RubyLLM.embed returns a single result with vectors array
78
+ result.vectors
79
+ rescue RubyLLM::Error => e
80
+ # Fall back to heuristic if embedding fails
81
+ warn "Embedding API failed: #{e.message}. Using fallback heuristic."
82
+ raise # Re-raise to trigger fallback in calculate method
83
+ end
84
+
85
+ def cosine_similarity(vec1, vec2)
86
+ # Calculate cosine similarity between two vectors
87
+ v1 = Vector.elements(vec1)
88
+ v2 = Vector.elements(vec2)
89
+
90
+ dot_product = v1.inner_product(v2)
91
+ magnitude1 = Math.sqrt(v1.inner_product(v1))
92
+ magnitude2 = Math.sqrt(v2.inner_product(v2))
93
+
94
+ return 0.0 if magnitude1.zero? || magnitude2.zero?
95
+
96
+ dot_product / (magnitude1 * magnitude2)
97
+ end
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Hyrum
4
+ module Validators
5
+ class ValidationResult < Dry::Struct
6
+ attribute :score, Types::Coercible::Float
7
+ attribute :semantic_similarity, Types::Coercible::Float
8
+ attribute :lexical_diversity, Types::Coercible::Float
9
+ attribute :passed, Types::Bool
10
+ attribute :details, Types::Hash.default({}.freeze)
11
+ attribute :warnings, Types::Array.of(Types::String).default([].freeze)
12
+
13
+ def passed?
14
+ passed
15
+ end
16
+
17
+ def failed?
18
+ !passed
19
+ end
20
+ end
21
+ end
22
+ end
data/lib/hyrum/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Hyrum
4
- VERSION = '0.1.0'
5
- end
4
+ VERSION = '0.2.0'
5
+ end