RubyGems - hyrum - Versions diffs - 0.0.2 → 0.2.0 - Mend

hyrum 0.0.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +92 -3
data/README.md +133 -14
data/bin/hyrum +1 -2
data/lib/hyrum/data/fake_messages.json +282 -0
data/lib/hyrum/formats/formatter.rb +8 -2
data/lib/hyrum/formats/templates/java.erb +9 -0
data/lib/hyrum/formats/templates/javascript.erb +9 -0
data/lib/hyrum/formats/templates/json.erb +13 -0
data/lib/hyrum/formats/templates/python.erb +9 -0
data/lib/hyrum/formats/templates/ruby.erb +9 -0
data/lib/hyrum/formats/templates/text.erb +9 -0
data/lib/hyrum/generators/ai_generator.rb +102 -0
data/lib/hyrum/generators/fake_generator.rb +22 -34
data/lib/hyrum/generators/message_generator.rb +17 -5
data/lib/hyrum/script_options.rb +47 -20
data/lib/hyrum/validators/lexical_diversity.rb +46 -0
data/lib/hyrum/validators/quality_validator.rb +107 -0
data/lib/hyrum/validators/semantic_similarity.rb +100 -0
data/lib/hyrum/validators/validation_result.rb +22 -0
data/lib/hyrum/version.rb +1 -1
data/lib/hyrum.rb +158 -7
metadata +13 -11
data/lib/hyrum/generators/openai_generator.rb +0 -77

data/lib/hyrum/formats/templates/python.erb CHANGED Viewed

@@ -1,3 +1,12 @@
+<% if validation_result && show_scores -%>
+# Quality Score: <%= validation_result.score %>/100
+# - Semantic similarity: <%= validation_result.semantic_similarity %>% (variations preserve meaning)
+# - Lexical diversity: <%= validation_result.lexical_diversity %>% (variation in wording)
+<% validation_result.warnings.each do |warning| -%>
+# Warning: <%= warning %>
+<% end -%>
+#
+<% end -%>
 import random
 import sys

data/lib/hyrum/formats/templates/ruby.erb CHANGED Viewed

@@ -1,3 +1,12 @@
+<% if validation_result && show_scores -%>
+# Quality Score: <%= validation_result.score %>/100
+# - Semantic similarity: <%= validation_result.semantic_similarity %>% (variations preserve meaning)
+# - Lexical diversity: <%= validation_result.lexical_diversity %>% (variation in wording)
+<% validation_result.warnings.each do |warning| -%>
+# Warning: <%= warning %>
+<% end -%>
+#
+<% end -%>
 # frozen_string_literal: true
 module Messages

data/lib/hyrum/formats/templates/text.erb CHANGED Viewed

@@ -1,3 +1,12 @@
+<% if validation_result && show_scores -%>
+Quality Score: <%= validation_result.score %>/100
+- Semantic similarity: <%= validation_result.semantic_similarity %>% (variations preserve meaning)
+- Lexical diversity: <%= validation_result.lexical_diversity %>% (variation in wording)
+<% validation_result.warnings.each do |warning| -%>
+Warning: <%= warning %>
+<% end -%>
+<% end -%>
 <% messages.each do |key, values| -%>
 Messages for <%= key %>:
   <% values.each do |msg| -%>

data/lib/hyrum/generators/ai_generator.rb ADDED Viewed

@@ -0,0 +1,102 @@
+# frozen_string_literal: true
+require 'ruby_llm'
+module Hyrum
+  module Generators
+    class AiGenerator
+      API_KEY_ENV_VARS = {
+        openai: 'OPENAI_API_KEY',
+        anthropic: 'ANTHROPIC_API_KEY',
+        gemini: 'GEMINI_API_KEY',
+        ollama: 'OLLAMA_API_BASE',
+        vertexai: 'GOOGLE_CLOUD_PROJECT',
+        bedrock: 'AWS_ACCESS_KEY_ID',
+        deepseek: 'DEEPSEEK_API_KEY',
+        mistral: 'MISTRAL_API_KEY',
+        perplexity: 'PERPLEXITY_API_KEY',
+        openrouter: 'OPENROUTER_API_KEY',
+        gpustack: 'GPUSTACK_API_KEY'
+      }.freeze
+      attr_reader :options
+      def initialize(options)
+        @options = options
+      end
+      def generate
+        response = chat.ask(prompt)
+        puts "AI response: #{response.inspect}" if options[:verbose]
+        # Prepend the original message to the generated variations
+        # RubyLLM returns string keys, but our options use symbols
+        result = response.content.dup
+        key_str = options[:key].to_s
+        if result[key_str].is_a?(Array)
+          result[key_str] = [options[:message]] + result[key_str]
+        end
+        # Convert string keys to symbols for consistency with the rest of hyrum
+        result.transform_keys(&:to_sym)
+      rescue RubyLLM::ConfigurationError => e
+        handle_configuration_error(e)
+      rescue RubyLLM::Error => e
+        handle_general_error(e)
+      end
+      private
+      def chat
+        @chat ||= RubyLLM.chat(
+          model: options[:ai_model].to_s,
+          provider: options[:ai_service]
+        ).with_schema(response_schema)
+      end
+      def prompt
+        <<~PROMPT
+          Please provide #{options[:number]} alternative status messages for the following message:
+          "#{options[:message]}"
+          The messages should be unique and informative.
+        PROMPT
+      end
+      # rubocop:disable Metrics/MethodLength
+      def response_schema
+        {
+          type: 'object',
+          properties: {
+            options[:key] => {
+              type: 'array',
+              items: { type: 'string' },
+              minItems: options[:number],
+              maxItems: options[:number]
+            }
+          },
+          required: [options[:key].to_s],
+          additionalProperties: false
+        }
+      end
+      # rubocop:enable Metrics/MethodLength
+      def handle_configuration_error(error)
+        puts "Configuration Error: #{error.message}"
+        puts "Please set the required API key for #{options[:ai_service]}."
+        puts "Example: export #{api_key_env_var_name}=your-key-here"
+        exit 1
+      end
+      def handle_general_error(error)
+        puts "Error: #{error.message}"
+        puts 'Please check your configuration and try again.'
+        exit 1
+      end
+      def api_key_env_var_name
+        API_KEY_ENV_VARS.fetch(options[:ai_service], "#{options[:ai_service].to_s.upcase}_API_KEY")
+      end
+    end
+  end
+end

data/lib/hyrum/generators/fake_generator.rb CHANGED Viewed

@@ -3,48 +3,36 @@
 module Hyrum
   module Generators
     class FakeGenerator
-      FAKE_MESSAGES = %(
-        {
-          "e404": [
-            "We couldn't locate the resource you were looking for.",
-            "The resource you requested is not available at this time.",
-            "Unfortunately, we were unable to find the specified resource.",
-            "It seems the resource you're searching for does not exist.",
-            "The item you are trying to access is currently missing."
-          ],
-          "e418": [
-            "I'm a teapot",
-            "The server refuses the attempt to brew coffee with a teapot",
-            "Coffee brewing denied: a teapot is not suitable for this operation.",
-            "Request failed: the server cannot process coffee with a teapot.",
-            "Brewing error: teapots are incompatible with coffee preparation.",
-            "Action halted: using a teapot to brew coffee is not permitted.",
-            "Invalid request: please use a coffee maker instead of a teapot."
-          ],
-          "e500": [
-            "Internal Server Error",
-            "An unexpected condition was encountered"
-          ],
-          "e503": [
-            "Service Unavailable",
-            "The server is currently unavailable"
-          ],
-          "e504": [
-            "Gateway Timeout",
-            "The server is currently unavailable"
-          ]
-        }
-      )
+      DATA_FILE = File.expand_path('../data/fake_messages.json', __dir__)
       attr_reader :options
       def initialize(options)
         @options = options
-        @ai_service = options[:ai_service]
       end
       def generate
-        JSON.parse(FAKE_MESSAGES)
+        messages = load_messages
+        key = options[:key]&.to_s&.downcase
+        number = (options[:number] || 1).to_i
+        return messages unless key
+        key_with_prefix = key.start_with?('e') ? key : "e#{key}"
+        available_messages = messages[key_with_prefix] || []
+        selected_messages = available_messages.sample([number, available_messages.length].min)
+        # Prepend the original message if provided
+        selected_messages = [options[:message]] + selected_messages if options[:message]
+        # Return as a hash to match expected format
+        { options[:key] => selected_messages }
+      end
+      private
+      def load_messages
+        JSON.parse(File.read(DATA_FILE))
       end
     end
   end

data/lib/hyrum/generators/message_generator.rb CHANGED Viewed

@@ -2,25 +2,37 @@
 module Hyrum
   module Generators
-    AI_SERVICES = %i[openai ollama fake].freeze
+    AI_SERVICES = %i[
+      openai anthropic gemini ollama mistral deepseek
+      perplexity openrouter vertexai bedrock gpustack fake
+    ].freeze
     AI_MODEL_DEFAULTS = {
       openai: :'gpt-4o-mini',
+      anthropic: :'claude-haiku-20250514',
+      gemini: :'gemini-2.0-flash-exp',
       ollama: :llama3,
+      mistral: :'mistral-small-latest',
+      deepseek: :'deepseek-chat',
+      perplexity: :'llama-3.1-sonar-small-128k-online',
+      openrouter: :'openai/gpt-4o-mini',
+      vertexai: :'gemini-2.0-flash-exp',
+      bedrock: :'anthropic.claude-3-haiku-20240307-v1:0',
+      gpustack: :llama3,
       fake: :fake
     }.freeze
     GENERATOR_CLASSES = {
-      openai: OpenaiGenerator,
-      ollama: OpenaiGenerator,
       fake: FakeGenerator
+      # All other providers default to AiGenerator
     }.freeze
     class MessageGenerator
       def self.create(options)
-        generator_class = GENERATOR_CLASSES[options[:ai_service].to_sym]
+        service = options[:ai_service].to_sym
-        # Add error handling for invalid format
+        # Get generator class, defaulting to AiGenerator for unlisted services
+        generator_class = GENERATOR_CLASSES.fetch(service, AiGenerator)
         generator_class.new(options)
       end
     end

data/lib/hyrum/script_options.rb CHANGED Viewed

@@ -3,16 +3,25 @@
 require 'optparse'
 module Hyrum
+  class ScriptOptionsError < StandardError; end
   class ScriptOptions
     MANDATORY_OPTIONS = %i[message].freeze
     attr_reader :options
     def initialize(args)
-      @options = {}
+      @options = {
+        message: nil,
+        validate: false,
+        min_quality: 70,
+        strict: false,
+        show_scores: false
+      }
       @args = args
     end
+    # rubocop:disable Metrics/MethodLength
     def parse
       OptionParser.new do |parser|
         define_options(parser)
@@ -22,17 +31,13 @@ module Hyrum
       set_dynamic_defaults
       options
     rescue OptionParser::InvalidOption => e
-      err = "Invalid option: #{e.message}"
+      raise ScriptOptionsError, "Invalid option: #{e.message}"
     rescue OptionParser::MissingArgument => e
-      err = "Missing argument for option: #{e.message}"
+      raise ScriptOptionsError, "Missing argument for option: #{e.message}"
     rescue OptionParser::InvalidArgument => e
-      err = "Invalid argument for option: #{e.message}"
-    ensure
-      if err
-        puts err
-        exit
-      end
+      raise ScriptOptionsError, "Invalid argument for option: #{e.message}"
     end
+    # rubocop:enable Metrics/MethodLength
     private
@@ -42,10 +47,10 @@ module Hyrum
     end
     def enforce_mandatory_options
-      missing = MANDATORY_OPTIONS.select { |param| options[param].nil? }
-      return if missing.empty?
+      return unless options[:ai_service] != :fake
-      raise OptionParser::MissingArgument, missing.join(', ')
+      missing = MANDATORY_OPTIONS.select { |param| options[param].nil? }
+      raise OptionParser::MissingArgument, missing.join(', ') unless missing.empty?
     end
     def define_options(parser)
@@ -55,7 +60,9 @@ module Hyrum
       format_options(parser)
       message_options(parser)
       message_key_options(parser)
+      number_options(parser)
       ai_service_options(parser)
+      validation_options(parser)
       on_tail_options(parser)
     end
@@ -74,7 +81,7 @@ module Hyrum
     def ai_service_options(parser)
       options[:ai_service] = :fake
-      description = "AI service: one of #{Generators::AI_SERVICES.join(', ')}"
+      description = "AI service: one of #{Generators::AI_SERVICES.join(', ')} (default: fake)"
       parser.on('-s SERVICE', '--service SERVICE', Generators::AI_SERVICES, description) do |service|
         options[:ai_service] = service.to_sym
       end
@@ -86,19 +93,23 @@ module Hyrum
     end
     def message_key_options(parser)
-      options[:key] = :status
-      parser.on('-k KEY', '--key KEY', 'Message key') do |key|
+      parser.on('-k KEY', '--key KEY', 'Message key (default: status)') do |key|
         options[:key] = key.to_sym
       end
     end
     def message_options(parser)
-      parser.on('-m MESSAGE', '--message MESSAGE', 'Status message') do |message|
+      parser.on('-m MESSAGE', '--message MESSAGE', 'Status message (required unless fake)') do |message|
         options[:message] = message
       end
     end
+    def number_options(parser)
+      parser.on('-n NUMBER', '--number NUMBER', Integer, 'Number of messages to generate (default: 5)') do |number|
+        options[:number] = number.to_i
+      end
+    end
     def verbosity_options(parser)
       parser.on('-v', '--[no-]verbose', 'Run verbosely') do |v|
         options[:verbose] = v
@@ -106,14 +117,30 @@ module Hyrum
     end
     def format_options(parser)
-      options[:format] = :text
       formats = Formats::FORMATS
       description = 'Output format. Supported formats are:'
       supported   = formats.join(', ')
-      parser.on('-f FORMAT', '--format FORMAT', formats, description, supported) do |format|
+      parser.on('-f FORMAT', '--format FORMAT', formats, description, supported, '(default: text)') do |format|
         options[:format] = format
       end
     end
+    def validation_options(parser)
+      parser.on('--validate', 'Enable quality validation (default: off)') do
+        options[:validate] = true
+      end
+      parser.on('--min-quality SCORE', Integer, 'Minimum quality score 0-100 (default: 70)') do |score|
+        options[:min_quality] = score
+      end
+      parser.on('--strict', 'Fail on quality issues instead of warning (default: false)') do
+        options[:strict] = true
+      end
+      parser.on('--show-scores', 'Include quality metrics in output (default: false)') do
+        options[:show_scores] = true
+      end
+    end
   end
 end

data/lib/hyrum/validators/lexical_diversity.rb ADDED Viewed

@@ -0,0 +1,46 @@
+# frozen_string_literal: true
+require 'set'
+module Hyrum
+  module Validators
+    class LexicalDiversity
+      attr_reader :variations
+      def initialize(variations)
+        @variations = variations
+      end
+      def calculate
+        return 0.0 if variations.empty? || variations.size == 1
+        # Calculate average pairwise Jaccard distance
+        distances = []
+        variations.combination(2).each do |var1, var2|
+          distances << jaccard_distance(tokenize(var1), tokenize(var2))
+        end
+        # Convert to percentage (0-100)
+        (distances.sum / distances.size * 100).round(2)
+      end
+      private
+      def tokenize(text)
+        # Convert to lowercase and split into words, removing punctuation
+        text.downcase.scan(/\w+/).to_set
+      end
+      def jaccard_distance(set1, set2)
+        # Jaccard distance = 1 - Jaccard similarity
+        # Jaccard similarity = intersection / union
+        return 1.0 if set1.empty? && set2.empty?
+        return 1.0 if set1.union(set2).empty?
+        intersection = set1.intersection(set2).size.to_f
+        union = set1.union(set2).size.to_f
+        1.0 - (intersection / union)
+      end
+    end
+  end
+end

data/lib/hyrum/validators/quality_validator.rb ADDED Viewed

@@ -0,0 +1,107 @@
+# frozen_string_literal: true
+module Hyrum
+  module Validators
+    class QualityValidator
+      DIVERSITY_WEIGHT = 0.5
+      SIMILARITY_WEIGHT = 0.5
+      MIN_DIVERSITY_THRESHOLD = 30.0
+      MIN_SIMILARITY_THRESHOLD = 85.0
+      attr_reader :original_message, :messages, :options
+      def initialize(original_message, messages, options)
+        @original_message = original_message
+        @messages = messages
+        @options = options
+      end
+      def validate
+        return empty_result if messages.empty?
+        all_variations = messages.values.flatten
+        return single_variation_result if all_variations.size <= 1
+        semantic_score = calculate_semantic_similarity(all_variations)
+        lexical_score = calculate_lexical_diversity(all_variations)
+        overall_score = (semantic_score * SIMILARITY_WEIGHT) + (lexical_score * DIVERSITY_WEIGHT)
+        passed = overall_score >= options[:min_quality] &&
+                 lexical_score >= MIN_DIVERSITY_THRESHOLD &&
+                 semantic_score >= MIN_SIMILARITY_THRESHOLD
+        warnings = build_warnings(semantic_score, lexical_score)
+        ValidationResult.new(
+          score: overall_score.round(2),
+          semantic_similarity: semantic_score.round(2),
+          lexical_diversity: lexical_score.round(2),
+          passed: passed,
+          details: {
+            min_quality_threshold: options[:min_quality],
+            variation_count: all_variations.size
+          },
+          warnings: warnings
+        )
+      end
+      private
+      def calculate_semantic_similarity(variations)
+        calculator = SemanticSimilarity.new(
+          original_message,
+          variations,
+          options[:ai_service],
+          options[:ai_model]
+        )
+        calculator.calculate
+      rescue StandardError => e
+        # Fall back to 100% on error (assume semantic similarity is good)
+        warn "Semantic similarity calculation failed: #{e.message}"
+        100.0
+      end
+      def calculate_lexical_diversity(variations)
+        calculator = LexicalDiversity.new(variations)
+        calculator.calculate
+      end
+      def build_warnings(semantic_score, lexical_score)
+        warnings = []
+        if lexical_score < MIN_DIVERSITY_THRESHOLD
+          warnings << "Low lexical diversity (#{lexical_score.round(2)}%). Variations may be too similar."
+        end
+        if semantic_score < MIN_SIMILARITY_THRESHOLD
+          warnings << "Low semantic similarity (#{semantic_score.round(2)}%). Variations may have different meanings."
+        end
+        warnings
+      end
+      def empty_result
+        ValidationResult.new(
+          score: 0.0,
+          semantic_similarity: 0.0,
+          lexical_diversity: 0.0,
+          passed: true,
+          details: { variation_count: 0 },
+          warnings: ['No variations to validate']
+        )
+      end
+      def single_variation_result
+        ValidationResult.new(
+          score: 0.0,
+          semantic_similarity: 0.0,
+          lexical_diversity: 0.0,
+          passed: true,
+          details: { variation_count: messages.values.flatten.size },
+          warnings: ['Only one variation - nothing to compare']
+        )
+      end
+    end
+  end
+end

data/lib/hyrum/validators/semantic_similarity.rb ADDED Viewed

@@ -0,0 +1,100 @@
+# frozen_string_literal: true
+require 'matrix'
+require 'set'
+module Hyrum
+  module Validators
+    class SemanticSimilarity
+      attr_reader :original_message, :variations, :ai_service, :ai_model
+      def initialize(original_message, variations, ai_service, ai_model)
+        @original_message = original_message
+        @variations = variations
+        @ai_service = ai_service
+        @ai_model = ai_model
+      end
+      def calculate
+        return 100.0 if variations.empty?
+        if supports_embeddings?
+          calculate_with_embeddings
+        else
+          calculate_with_fallback
+        end
+      end
+      def supports_embeddings?
+        # Check if RubyLLM has any embedding models available in the current registry
+        # User is responsible for calling RubyLLM.models.refresh! if needed
+        RubyLLM.models.embedding_models.any?
+      rescue StandardError
+        # If we can't check the registry, assume embeddings aren't available
+        false
+      end
+      private
+      def calculate_with_embeddings
+        # Batch all texts together for efficient API call
+        all_texts = [original_message] + variations
+        all_embeddings = get_embeddings(all_texts)
+        # First embedding is the original, rest are variations
+        original_embedding = all_embeddings.first
+        variation_embeddings = all_embeddings[1..]
+        # Compare each variation to the original message
+        similarities = variation_embeddings.map do |var_embedding|
+          cosine_similarity(original_embedding, var_embedding)
+        end
+        # Convert to percentage (0-100)
+        (similarities.sum / similarities.size * 100).round(2)
+      end
+      def calculate_with_fallback
+        # Simple word overlap heuristic when embeddings not available
+        original_words = original_message.downcase.scan(/\w+/).to_set
+        # Compare each variation to the original message
+        similarities = variations.map do |variation|
+          var_words = variation.downcase.scan(/\w+/).to_set
+          intersection = original_words.intersection(var_words).size.to_f
+          union = original_words.union(var_words).size.to_f
+          union.zero? ? 1.0 : intersection / union
+        end
+        (similarities.sum / similarities.size * 100).round(2)
+      end
+      def get_embeddings(texts)
+        # Use RubyLLM.embed with user's configured default embedding model
+        # Works with any provider (OpenAI, Google, Anthropic, etc.)
+        result = RubyLLM.embed(texts)
+        # RubyLLM.embed returns a single result with vectors array
+        result.vectors
+      rescue RubyLLM::Error => e
+        # Fall back to heuristic if embedding fails
+        warn "Embedding API failed: #{e.message}. Using fallback heuristic."
+        raise # Re-raise to trigger fallback in calculate method
+      end
+      def cosine_similarity(vec1, vec2)
+        # Calculate cosine similarity between two vectors
+        v1 = Vector.elements(vec1)
+        v2 = Vector.elements(vec2)
+        dot_product = v1.inner_product(v2)
+        magnitude1 = Math.sqrt(v1.inner_product(v1))
+        magnitude2 = Math.sqrt(v2.inner_product(v2))
+        return 0.0 if magnitude1.zero? || magnitude2.zero?
+        dot_product / (magnitude1 * magnitude2)
+      end
+    end
+  end
+end

data/lib/hyrum/validators/validation_result.rb ADDED Viewed

@@ -0,0 +1,22 @@
+# frozen_string_literal: true
+module Hyrum
+  module Validators
+    class ValidationResult < Dry::Struct
+      attribute :score, Types::Coercible::Float
+      attribute :semantic_similarity, Types::Coercible::Float
+      attribute :lexical_diversity, Types::Coercible::Float
+      attribute :passed, Types::Bool
+      attribute :details, Types::Hash.default({}.freeze)
+      attribute :warnings, Types::Array.of(Types::String).default([].freeze)
+      def passed?
+        passed
+      end
+      def failed?
+        !passed
+      end
+    end
+  end
+end

data/lib/hyrum/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Hyrum
-  VERSION = '0.0.2'
+  VERSION = '0.2.0'
 end