RubyGems - langsmithrb_rails - Versions diffs - 0.1.1 → 0.3.0 - Mend

langsmithrb_rails 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +4 -4
data/.rspec_status +158 -79
data/CHANGELOG.md +13 -0
data/Gemfile.lock +1 -1
data/README.md +153 -0
data/langsmithrb_rails-0.1.1.gem +0 -0
data/lib/langsmithrb_rails/client.rb +217 -2
data/lib/langsmithrb_rails/config.rb +143 -46
data/lib/langsmithrb_rails/evaluation/evaluator.rb +178 -0
data/lib/langsmithrb_rails/evaluation/llm_evaluator.rb +154 -0
data/lib/langsmithrb_rails/evaluation/string_evaluator.rb +158 -0
data/lib/langsmithrb_rails/evaluation.rb +76 -0
data/lib/langsmithrb_rails/otel/exporter.rb +120 -0
data/lib/langsmithrb_rails/otel.rb +135 -0
data/lib/langsmithrb_rails/run_trees.rb +157 -0
data/lib/langsmithrb_rails/version.rb +1 -1
data/lib/langsmithrb_rails/wrappers/anthropic.rb +146 -0
data/lib/langsmithrb_rails/wrappers/base.rb +81 -0
data/lib/langsmithrb_rails/wrappers/llm.rb +151 -0
data/lib/langsmithrb_rails/wrappers/openai.rb +193 -0
data/lib/langsmithrb_rails/wrappers.rb +41 -0
data/lib/langsmithrb_rails.rb +121 -1
data/pkg/langsmithrb_rails-0.3.0.gem +0 -0
metadata +16 -2

data/lib/langsmithrb_rails/evaluation/llm_evaluator.rb ADDED Viewed

@@ -0,0 +1,154 @@
+# frozen_string_literal: true
+require_relative "evaluator"
+module LangsmithrbRails
+  module Evaluation
+    # Evaluator that uses an LLM to evaluate responses
+    class LLMEvaluator < Evaluator
+      # Initialize a new LLM evaluator
+      # @param llm [Object] The LLM to use for evaluation
+      # @param criteria [String] Evaluation criteria
+      # @param client [LangsmithrbRails::Client] LangSmith client
+      # @param project_name [String] Optional project name for evaluations
+      # @param tags [Array<String>] Optional tags for evaluations
+      def initialize(llm:, criteria: nil, client: nil, project_name: nil, tags: [])
+        super(client: client, project_name: project_name, tags: tags)
+        @llm = llm
+        @criteria = criteria || "Evaluate the response for accuracy, relevance, and completeness."
+      end
+      # Evaluate a prediction against a reference
+      # @param prediction [String, Hash] The prediction to evaluate
+      # @param reference [String, Hash] The reference to compare against
+      # @param input [Hash] Optional input that generated the prediction
+      # @return [Hash] Evaluation result with score and metadata
+      def evaluate(prediction, reference = nil, input = nil)
+        # Extract strings
+        prediction_str = extract_string(prediction)
+        reference_str = extract_string(reference)
+        input_str = input.is_a?(Hash) ? input.to_json : input.to_s if input
+        # Create evaluation prompt
+        prompt = create_evaluation_prompt(prediction_str, reference_str, input_str)
+        # Get evaluation from LLM
+        begin
+          evaluation = get_llm_evaluation(prompt)
+          # Parse the evaluation
+          score, feedback = parse_evaluation(evaluation)
+          {
+            score: score,
+            metadata: {
+              feedback: feedback,
+              criteria: @criteria,
+              evaluation: evaluation
+            }
+          }
+        rescue => e
+          {
+            score: nil,
+            metadata: {
+              error: "Evaluation failed: #{e.message}"
+            }
+          }
+        end
+      end
+      private
+      # Extract a string from an object
+      # @param obj [Object] The object to extract a string from
+      # @return [String] The extracted string
+      def extract_string(obj)
+        return nil if obj.nil?
+        case obj
+        when String
+          obj
+        when Hash
+          # Try common output keys
+          %w[output response result text completion answer].each do |key|
+            return obj[key].to_s if obj.key?(key)
+          end
+          obj.to_json
+        else
+          obj.to_s
+        end
+      end
+      # Create an evaluation prompt for the LLM
+      # @param prediction [String] The prediction string
+      # @param reference [String] The reference string
+      # @param input [String] The input string
+      # @return [String] The evaluation prompt
+      def create_evaluation_prompt(prediction, reference, input)
+        prompt = "You are an impartial evaluator. Your task is to evaluate the quality of a response.\n\n"
+        if input
+          prompt += "Input:\n#{input}\n\n"
+        end
+        prompt += "Response to evaluate:\n#{prediction}\n\n"
+        if reference
+          prompt += "Reference (correct) response:\n#{reference}\n\n"
+        end
+        prompt += "Evaluation criteria:\n#{@criteria}\n\n"
+        prompt += "Please provide a score between 0.0 and 1.0, where 1.0 is perfect, and detailed feedback.\n"
+        prompt += "Format your response as:\nScore: [score between 0.0 and 1.0]\nFeedback: [your detailed feedback]"
+        prompt
+      end
+      # Get evaluation from LLM
+      # @param prompt [String] The evaluation prompt
+      # @return [String] The LLM's evaluation
+      # @raise [RuntimeError] If the LLM interface is not supported
+      def get_llm_evaluation(prompt)
+        result = if @llm.respond_to?(:call)
+          @llm.call(prompt)
+        elsif @llm.respond_to?(:generate)
+          @llm.generate(prompt)
+        elsif @llm.respond_to?(:complete)
+          @llm.complete(prompt)
+        elsif @llm.respond_to?(:chat)
+          @llm.chat(messages: [{ role: "user", content: prompt }])
+        else
+          raise "Unsupported LLM interface"
+        end
+        # Extract content from response if it's a hash
+        if result.is_a?(Hash)
+          result[:content] || result["content"] || result[:text] || result["text"] ||
+          result[:completion] || result["completion"] || result.to_s
+        else
+          result.to_s
+        end
+      end
+      # Parse the LLM's evaluation
+      # @param evaluation [String] The LLM's evaluation
+      # @return [Array] Score and feedback
+      def parse_evaluation(evaluation)
+        # Extract score
+        score_match = evaluation.match(/Score:\s*(\d+(\.\d+)?)/)
+        score = score_match ? score_match[1].to_f : nil
+        # Validate score
+        if score.nil? || score < 0.0 || score > 1.0
+          score = 0.5 # Default to middle score if invalid
+        end
+        # Extract feedback
+        feedback_match = evaluation.match(/Feedback:\s*(.+)/m)
+        feedback = feedback_match ? feedback_match[1].strip : evaluation
+        [score, feedback]
+      end
+    end
+  end
+end

data/lib/langsmithrb_rails/evaluation/string_evaluator.rb ADDED Viewed

@@ -0,0 +1,158 @@
+# frozen_string_literal: true
+require_relative "evaluator"
+module LangsmithrbRails
+  module Evaluation
+    # Evaluator for string comparisons
+    class StringEvaluator < Evaluator
+      # Initialize a new string evaluator
+      # @param match_type [Symbol] Type of string matching (:exact, :contains, :regex)
+      # @param case_sensitive [Boolean] Whether to perform case-sensitive matching
+      # @param client [LangsmithrbRails::Client] LangSmith client
+      # @param project_name [String] Optional project name for evaluations
+      # @param tags [Array<String>] Optional tags for evaluations
+      def initialize(match_type: :exact, case_sensitive: true, client: nil, project_name: nil, tags: [])
+        super(client: client, project_name: project_name, tags: tags)
+        @match_type = match_type
+        @case_sensitive = case_sensitive
+      end
+      # Evaluate a prediction against a reference
+      # @param prediction [String] The prediction to evaluate
+      # @param reference [String] The reference to compare against
+      # @param input [Hash] Optional input that generated the prediction
+      # @return [Hash] Evaluation result with score and metadata
+      def evaluate(prediction, reference = nil, input = nil)
+        # Extract strings from prediction and reference
+        prediction_str = extract_string(prediction)
+        reference_str = extract_string(reference)
+        # If reference is nil, we can't evaluate
+        if reference_str.nil?
+          return {
+            score: nil,
+            metadata: {
+              error: "No reference provided for evaluation"
+            }
+          }
+        end
+        # Prepare strings for comparison
+        unless @case_sensitive
+          prediction_str = prediction_str.downcase
+          reference_str = reference_str.downcase
+        end
+        # Perform comparison based on match type
+        score, metadata = case @match_type
+                          when :exact
+                            exact_match(prediction_str, reference_str)
+                          when :contains
+                            contains_match(prediction_str, reference_str)
+                          when :regex
+                            regex_match(prediction_str, reference_str)
+                          else
+                            [0.0, { error: "Unknown match type: #{@match_type}" }]
+                          end
+        {
+          score: score,
+          metadata: metadata
+        }
+      end
+      private
+      # Extract a string from an object
+      # @param obj [Object] The object to extract a string from
+      # @return [String] The extracted string
+      def extract_string(obj)
+        return nil if obj.nil?
+        case obj
+        when String
+          obj
+        when Hash
+          # Try common output keys
+          %w[output response result text completion answer].each do |key|
+            return obj[key].to_s if obj.key?(key) || obj.key?(key.to_sym)
+          end
+          # Try to find common keys between prediction and reference
+          if obj.key?(:prediction) && obj.key?(:reference)
+            return obj[:prediction].to_s
+          end
+          # If it's a simple hash with a single value, use that
+          return obj.values.first.to_s if obj.size == 1
+          # Otherwise convert the whole hash to a string
+          obj.to_s
+        else
+          obj.to_s
+        end
+      end
+      # Perform exact string matching
+      # @param prediction [String] The prediction string
+      # @param reference [String] The reference string
+      # @return [Array] Score and metadata
+      def exact_match(prediction, reference)
+        match = prediction == reference
+        [
+          match ? 1.0 : 0.0,
+          {
+            match: match,
+            match_type: "exact",
+            case_sensitive: @case_sensitive
+          }
+        ]
+      end
+      # Perform contains string matching
+      # @param prediction [String] The prediction string
+      # @param reference [String] The reference string
+      # @return [Array] Score and metadata
+      def contains_match(prediction, reference)
+        match = prediction.include?(reference)
+        [
+          match ? 1.0 : 0.0,
+          {
+            match: match,
+            match_type: "contains",
+            case_sensitive: @case_sensitive
+          }
+        ]
+      end
+      # Perform regex string matching
+      # @param prediction [String] The prediction string
+      # @param reference [String] The reference string (as regex pattern)
+      # @return [Array] Score and metadata
+      def regex_match(prediction, reference)
+        begin
+          regex = Regexp.new(reference, @case_sensitive ? nil : Regexp::IGNORECASE)
+          match = regex.match?(prediction)
+          [
+            match ? 1.0 : 0.0,
+            {
+              match: match,
+              match_type: "regex",
+              case_sensitive: @case_sensitive
+            }
+          ]
+        rescue RegexpError => e
+          [
+            0.0,
+            {
+              error: "Invalid regex pattern: #{e.message}",
+              match_type: "regex",
+              case_sensitive: @case_sensitive
+            }
+          ]
+        end
+      end
+    end
+  end
+end

data/lib/langsmithrb_rails/evaluation.rb ADDED Viewed

@@ -0,0 +1,76 @@
+# frozen_string_literal: true
+require_relative "evaluation/evaluator"
+require_relative "evaluation/string_evaluator"
+require_relative "evaluation/llm_evaluator"
+module LangsmithrbRails
+  # Evaluation framework for LangSmith
+  module Evaluation
+    # Create a new evaluator
+    # @param type [Symbol] Type of evaluator (:string, :llm)
+    # @param options [Hash] Options for the evaluator
+    # @return [Evaluator] The evaluator instance
+    def self.create(type, **options)
+      case type
+      when :string
+        StringEvaluator.new(**options)
+      when :llm
+        unless options[:llm]
+          raise ArgumentError, "LLM evaluator requires an :llm option"
+        end
+        LLMEvaluator.new(**options)
+      else
+        raise ArgumentError, "Unknown evaluator type: #{type}"
+      end
+    end
+    # Run a batch evaluation on a dataset
+    # @param dataset_id [String] The dataset ID to evaluate
+    # @param evaluators [Array<Evaluator>] The evaluators to use
+    # @param experiment_name [String] Name for the experiment
+    # @param target_llm [Object] Optional LLM to use for generating predictions
+    # @return [Hash] Evaluation results
+    def self.evaluate_dataset(dataset_id, evaluators, experiment_name: nil, target_llm: nil)
+      experiment_name ||= "Evaluation #{Time.now.utc.iso8601}"
+      results = {
+        experiment_name: experiment_name,
+        dataset_id: dataset_id,
+        evaluators: evaluators.map { |e| e.class.name },
+        results: []
+      }
+      evaluators.each do |evaluator|
+        evaluator_results = evaluator.evaluate_dataset(dataset_id, experiment_name, target_llm)
+        results[:results] << evaluator_results
+      end
+      results
+    end
+    # Run a batch evaluation on runs
+    # @param run_ids [Array<String>] The run IDs to evaluate
+    # @param evaluators [Array<Evaluator>] The evaluators to use
+    # @param references [Hash<String, Object>] Map of run IDs to references
+    # @return [Hash] Evaluation results
+    def self.evaluate_runs(run_ids, evaluators, references = {})
+      results = {
+        run_ids: run_ids,
+        evaluators: evaluators.map { |e| e.class.name },
+        results: {}
+      }
+      run_ids.each do |run_id|
+        results[:results][run_id] = {}
+        evaluators.each do |evaluator|
+          reference = references[run_id]
+          results[:results][run_id][evaluator.class.name] = evaluator.evaluate_run(run_id, reference)
+        end
+      end
+      results
+    end
+  end
+end

data/lib/langsmithrb_rails/otel/exporter.rb ADDED Viewed

@@ -0,0 +1,120 @@
+# frozen_string_literal: true
+module LangsmithrbRails
+  module OTEL
+    # OpenTelemetry exporter for LangSmith
+    class Exporter
+      # Initialize a new OpenTelemetry exporter
+      # @param api_key [String] LangSmith API key
+      # @param api_url [String] LangSmith API URL
+      def initialize(api_key: nil, api_url: nil)
+        @api_key = api_key || Config[:api_key]
+        @api_url = api_url || Config[:api_url]
+        @client = LangsmithrbRails::Client.new(api_key: @api_key, api_url: @api_url)
+      end
+      # Export spans to LangSmith
+      # @param spans [Array<OpenTelemetry::SDK::Trace::SpanData>] Spans to export
+      # @return [Integer] Export result (success, failure, etc.)
+      def export(spans)
+        return :success if spans.empty?
+        spans.each do |span|
+          export_span(span)
+        end
+        :success
+      rescue => e
+        LangsmithrbRails.logger.error("Failed to export spans: #{e.message}")
+        :failure
+      end
+      # Shutdown the exporter
+      # @param timeout [Integer] Timeout in seconds
+      # @return [Boolean] True if shutdown was successful
+      def shutdown(timeout = 0)
+        true
+      end
+      private
+      # Export a single span to LangSmith
+      # @param span [OpenTelemetry::SDK::Trace::SpanData] Span to export
+      def export_span(span)
+        # Convert span to LangSmith run
+        run_data = convert_span_to_run(span)
+        # Create or update the run in LangSmith
+        if span.parent_span_id.nil?
+          @client.create_run(run_data)
+        else
+          @client.update_run(run_data[:id], run_data)
+        end
+      end
+      # Convert an OpenTelemetry span to a LangSmith run
+      # @param span [OpenTelemetry::SDK::Trace::SpanData] Span to convert
+      # @return [Hash] LangSmith run data
+      def convert_span_to_run(span)
+        # Extract span attributes
+        attributes = span.attributes.to_h
+        # Basic run data
+        run_data = {
+          id: span.span_id.to_s,
+          name: span.name,
+          start_time: span.start_timestamp.to_time.utc.iso8601,
+          end_time: span.end_timestamp.to_time.utc.iso8601,
+          status: span.status.code == 0 ? "success" : "error"
+        }
+        # Add parent run ID if present
+        if span.parent_span_id
+          run_data[:parent_run_id] = span.parent_span_id.to_s
+        end
+        # Add error if present
+        if span.status.code != 0
+          run_data[:error] = span.status.description || "Unknown error"
+        end
+        # Add inputs and outputs from attributes
+        if attributes[:inputs]
+          run_data[:inputs] = parse_json_attribute(attributes[:inputs])
+        end
+        if attributes[:outputs]
+          run_data[:outputs] = parse_json_attribute(attributes[:outputs])
+        end
+        # Add run type
+        run_data[:run_type] = attributes[:run_type] || "chain"
+        # Add project name if present
+        if attributes[:project_name]
+          run_data[:session_name] = attributes[:project_name]
+        end
+        # Add tags if present
+        if attributes[:tags]
+          run_data[:tags] = parse_json_attribute(attributes[:tags])
+        end
+        run_data
+      end
+      # Parse a JSON attribute
+      # @param value [String] JSON string
+      # @return [Hash, Array, String] Parsed JSON or original string
+      def parse_json_attribute(value)
+        return value unless value.is_a?(String)
+        begin
+          JSON.parse(value)
+        rescue JSON::ParserError
+          value
+        end
+      end
+    end
+  end
+end

data/lib/langsmithrb_rails/otel.rb ADDED Viewed

@@ -0,0 +1,135 @@
+# frozen_string_literal: true
+require_relative "otel/exporter"
+module LangsmithrbRails
+  # OpenTelemetry integration for LangSmith
+  module OTEL
+    class << self
+      # Initialize OpenTelemetry integration
+      # @param api_key [String] LangSmith API key
+      # @param api_url [String] LangSmith API URL
+      # @param service_name [String] Service name for OpenTelemetry
+      # @param service_version [String] Service version for OpenTelemetry
+      # @return [Boolean] True if initialization was successful
+      def init(api_key: nil, api_url: nil, service_name: "langsmithrb-rails", service_version: LangsmithrbRails::VERSION)
+        return false unless otel_available?
+        require "opentelemetry/sdk"
+        require "opentelemetry/exporter/otlp"
+        require "opentelemetry/instrumentation/all"
+        # Create LangSmith exporter
+        langsmith_exporter = Exporter.new(api_key: api_key, api_url: api_url)
+        # Configure OpenTelemetry
+        OpenTelemetry::SDK.configure do |c|
+          c.service_name = service_name
+          c.service_version = service_version
+          # Add LangSmith exporter
+          c.add_span_processor(
+            OpenTelemetry::SDK::Trace::Export::SimpleSpanProcessor.new(langsmith_exporter)
+          )
+          # Use batch processor for better performance
+          c.add_span_processor(
+            OpenTelemetry::SDK::Trace::Export::BatchSpanProcessor.new(
+              langsmith_exporter,
+              max_queue_size: 1000,
+              max_export_batch_size: 100,
+              schedule_delay_millis: 5000
+            )
+          )
+          # Install all available instrumentation
+          c.use_all()
+        end
+        true
+      rescue => e
+        LangsmithrbRails.logger.error("Failed to initialize OpenTelemetry: #{e.message}")
+        false
+      end
+      # Create a traced span
+      # @param name [String] Span name
+      # @param attributes [Hash] Span attributes
+      # @param kind [Symbol] Span kind
+      # @yield Block to execute within the span
+      # @return [Object] Result of the block
+      def trace(name, attributes: {}, kind: :internal, &block)
+        return yield unless otel_available? && otel_enabled?
+        tracer = OpenTelemetry.tracer_provider.tracer("langsmithrb_rails", LangsmithrbRails::VERSION)
+        tracer.in_span(name, attributes: attributes, kind: kind, &block)
+      end
+      # Create a traced span for an LLM operation
+      # @param name [String] Operation name
+      # @param inputs [Hash] Input data
+      # @param run_type [String] Type of run (e.g., "llm", "chain")
+      # @param project_name [String] Optional project name
+      # @param tags [Array<String>] Optional tags
+      # @yield Block to execute within the span
+      # @return [Object] Result of the block
+      def trace_llm(name, inputs:, run_type: "llm", project_name: nil, tags: [], &block)
+        return yield unless otel_available? && otel_enabled?
+        attributes = {
+          inputs: inputs.to_json,
+          run_type: run_type
+        }
+        if project_name
+          attributes[:project_name] = project_name
+        end
+        if tags.any?
+          attributes[:tags] = tags.to_json
+        end
+        trace(name, attributes: attributes) do |span|
+          begin
+            result = yield
+            # Add outputs to span
+            if result
+              span.add_attributes(outputs: { result: result }.to_json)
+            end
+            result
+          rescue => e
+            # Add error to span
+            if span.respond_to?(:record_exception)
+              span.record_exception(e)
+              if defined?(OpenTelemetry::Trace::Status)
+                span.status = OpenTelemetry::Trace::Status.error(e.message)
+              end
+            end
+            raise e
+          end
+        end
+      end
+      # Check if OpenTelemetry is available
+      # @return [Boolean] True if OpenTelemetry is available
+      def otel_available?
+        @otel_available ||= begin
+          require "opentelemetry"
+          require "opentelemetry/sdk"
+          true
+        rescue LoadError
+          false
+        end
+      end
+      # Check if OpenTelemetry is enabled
+      # @return [Boolean] True if OpenTelemetry is enabled
+      def otel_enabled?
+        Config[:otel_enabled]
+      end
+    end
+  end
+end