langsmithrb_rails 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,154 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "evaluator"
4
+
5
+ module LangsmithrbRails
6
+ module Evaluation
7
+ # Evaluator that uses an LLM to evaluate responses
8
+ class LLMEvaluator < Evaluator
9
+ # Initialize a new LLM evaluator
10
+ # @param llm [Object] The LLM to use for evaluation
11
+ # @param criteria [String] Evaluation criteria
12
+ # @param client [LangsmithrbRails::Client] LangSmith client
13
+ # @param project_name [String] Optional project name for evaluations
14
+ # @param tags [Array<String>] Optional tags for evaluations
15
+ def initialize(llm:, criteria: nil, client: nil, project_name: nil, tags: [])
16
+ super(client: client, project_name: project_name, tags: tags)
17
+ @llm = llm
18
+ @criteria = criteria || "Evaluate the response for accuracy, relevance, and completeness."
19
+ end
20
+
21
+ # Evaluate a prediction against a reference
22
+ # @param prediction [String, Hash] The prediction to evaluate
23
+ # @param reference [String, Hash] The reference to compare against
24
+ # @param input [Hash] Optional input that generated the prediction
25
+ # @return [Hash] Evaluation result with score and metadata
26
+ def evaluate(prediction, reference = nil, input = nil)
27
+ # Extract strings
28
+ prediction_str = extract_string(prediction)
29
+ reference_str = extract_string(reference)
30
+ input_str = input.is_a?(Hash) ? input.to_json : input.to_s if input
31
+
32
+ # Create evaluation prompt
33
+ prompt = create_evaluation_prompt(prediction_str, reference_str, input_str)
34
+
35
+ # Get evaluation from LLM
36
+ begin
37
+ evaluation = get_llm_evaluation(prompt)
38
+
39
+ # Parse the evaluation
40
+ score, feedback = parse_evaluation(evaluation)
41
+
42
+ {
43
+ score: score,
44
+ metadata: {
45
+ feedback: feedback,
46
+ criteria: @criteria,
47
+ evaluation: evaluation
48
+ }
49
+ }
50
+ rescue => e
51
+ {
52
+ score: nil,
53
+ metadata: {
54
+ error: "Evaluation failed: #{e.message}"
55
+ }
56
+ }
57
+ end
58
+ end
59
+
60
+ private
61
+
62
+ # Extract a string from an object
63
+ # @param obj [Object] The object to extract a string from
64
+ # @return [String] The extracted string
65
+ def extract_string(obj)
66
+ return nil if obj.nil?
67
+
68
+ case obj
69
+ when String
70
+ obj
71
+ when Hash
72
+ # Try common output keys
73
+ %w[output response result text completion answer].each do |key|
74
+ return obj[key].to_s if obj.key?(key)
75
+ end
76
+ obj.to_json
77
+ else
78
+ obj.to_s
79
+ end
80
+ end
81
+
82
+ # Create an evaluation prompt for the LLM
83
+ # @param prediction [String] The prediction string
84
+ # @param reference [String] The reference string
85
+ # @param input [String] The input string
86
+ # @return [String] The evaluation prompt
87
+ def create_evaluation_prompt(prediction, reference, input)
88
+ prompt = "You are an impartial evaluator. Your task is to evaluate the quality of a response.\n\n"
89
+
90
+ if input
91
+ prompt += "Input:\n#{input}\n\n"
92
+ end
93
+
94
+ prompt += "Response to evaluate:\n#{prediction}\n\n"
95
+
96
+ if reference
97
+ prompt += "Reference (correct) response:\n#{reference}\n\n"
98
+ end
99
+
100
+ prompt += "Evaluation criteria:\n#{@criteria}\n\n"
101
+ prompt += "Please provide a score between 0.0 and 1.0, where 1.0 is perfect, and detailed feedback.\n"
102
+ prompt += "Format your response as:\nScore: [score between 0.0 and 1.0]\nFeedback: [your detailed feedback]"
103
+
104
+ prompt
105
+ end
106
+
107
+ # Get evaluation from LLM
108
+ # @param prompt [String] The evaluation prompt
109
+ # @return [String] The LLM's evaluation
110
+ # @raise [RuntimeError] If the LLM interface is not supported
111
+ def get_llm_evaluation(prompt)
112
+ result = if @llm.respond_to?(:call)
113
+ @llm.call(prompt)
114
+ elsif @llm.respond_to?(:generate)
115
+ @llm.generate(prompt)
116
+ elsif @llm.respond_to?(:complete)
117
+ @llm.complete(prompt)
118
+ elsif @llm.respond_to?(:chat)
119
+ @llm.chat(messages: [{ role: "user", content: prompt }])
120
+ else
121
+ raise "Unsupported LLM interface"
122
+ end
123
+
124
+ # Extract content from response if it's a hash
125
+ if result.is_a?(Hash)
126
+ result[:content] || result["content"] || result[:text] || result["text"] ||
127
+ result[:completion] || result["completion"] || result.to_s
128
+ else
129
+ result.to_s
130
+ end
131
+ end
132
+
133
+ # Parse the LLM's evaluation
134
+ # @param evaluation [String] The LLM's evaluation
135
+ # @return [Array] Score and feedback
136
+ def parse_evaluation(evaluation)
137
+ # Extract score
138
+ score_match = evaluation.match(/Score:\s*(\d+(\.\d+)?)/)
139
+ score = score_match ? score_match[1].to_f : nil
140
+
141
+ # Validate score
142
+ if score.nil? || score < 0.0 || score > 1.0
143
+ score = 0.5 # Default to middle score if invalid
144
+ end
145
+
146
+ # Extract feedback
147
+ feedback_match = evaluation.match(/Feedback:\s*(.+)/m)
148
+ feedback = feedback_match ? feedback_match[1].strip : evaluation
149
+
150
+ [score, feedback]
151
+ end
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,158 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "evaluator"
4
+
5
+ module LangsmithrbRails
6
+ module Evaluation
7
+ # Evaluator for string comparisons
8
+ class StringEvaluator < Evaluator
9
+ # Initialize a new string evaluator
10
+ # @param match_type [Symbol] Type of string matching (:exact, :contains, :regex)
11
+ # @param case_sensitive [Boolean] Whether to perform case-sensitive matching
12
+ # @param client [LangsmithrbRails::Client] LangSmith client
13
+ # @param project_name [String] Optional project name for evaluations
14
+ # @param tags [Array<String>] Optional tags for evaluations
15
+ def initialize(match_type: :exact, case_sensitive: true, client: nil, project_name: nil, tags: [])
16
+ super(client: client, project_name: project_name, tags: tags)
17
+ @match_type = match_type
18
+ @case_sensitive = case_sensitive
19
+ end
20
+
21
+ # Evaluate a prediction against a reference
22
+ # @param prediction [String] The prediction to evaluate
23
+ # @param reference [String] The reference to compare against
24
+ # @param input [Hash] Optional input that generated the prediction
25
+ # @return [Hash] Evaluation result with score and metadata
26
+ def evaluate(prediction, reference = nil, input = nil)
27
+ # Extract strings from prediction and reference
28
+ prediction_str = extract_string(prediction)
29
+ reference_str = extract_string(reference)
30
+
31
+ # If reference is nil, we can't evaluate
32
+ if reference_str.nil?
33
+ return {
34
+ score: nil,
35
+ metadata: {
36
+ error: "No reference provided for evaluation"
37
+ }
38
+ }
39
+ end
40
+
41
+ # Prepare strings for comparison
42
+ unless @case_sensitive
43
+ prediction_str = prediction_str.downcase
44
+ reference_str = reference_str.downcase
45
+ end
46
+
47
+ # Perform comparison based on match type
48
+ score, metadata = case @match_type
49
+ when :exact
50
+ exact_match(prediction_str, reference_str)
51
+ when :contains
52
+ contains_match(prediction_str, reference_str)
53
+ when :regex
54
+ regex_match(prediction_str, reference_str)
55
+ else
56
+ [0.0, { error: "Unknown match type: #{@match_type}" }]
57
+ end
58
+
59
+ {
60
+ score: score,
61
+ metadata: metadata
62
+ }
63
+ end
64
+
65
+ private
66
+
67
+ # Extract a string from an object
68
+ # @param obj [Object] The object to extract a string from
69
+ # @return [String] The extracted string
70
+ def extract_string(obj)
71
+ return nil if obj.nil?
72
+
73
+ case obj
74
+ when String
75
+ obj
76
+ when Hash
77
+ # Try common output keys
78
+ %w[output response result text completion answer].each do |key|
79
+ return obj[key].to_s if obj.key?(key) || obj.key?(key.to_sym)
80
+ end
81
+
82
+ # Try to find common keys between prediction and reference
83
+ if obj.key?(:prediction) && obj.key?(:reference)
84
+ return obj[:prediction].to_s
85
+ end
86
+
87
+ # If it's a simple hash with a single value, use that
88
+ return obj.values.first.to_s if obj.size == 1
89
+
90
+ # Otherwise convert the whole hash to a string
91
+ obj.to_s
92
+ else
93
+ obj.to_s
94
+ end
95
+ end
96
+
97
+ # Perform exact string matching
98
+ # @param prediction [String] The prediction string
99
+ # @param reference [String] The reference string
100
+ # @return [Array] Score and metadata
101
+ def exact_match(prediction, reference)
102
+ match = prediction == reference
103
+ [
104
+ match ? 1.0 : 0.0,
105
+ {
106
+ match: match,
107
+ match_type: "exact",
108
+ case_sensitive: @case_sensitive
109
+ }
110
+ ]
111
+ end
112
+
113
+ # Perform contains string matching
114
+ # @param prediction [String] The prediction string
115
+ # @param reference [String] The reference string
116
+ # @return [Array] Score and metadata
117
+ def contains_match(prediction, reference)
118
+ match = prediction.include?(reference)
119
+ [
120
+ match ? 1.0 : 0.0,
121
+ {
122
+ match: match,
123
+ match_type: "contains",
124
+ case_sensitive: @case_sensitive
125
+ }
126
+ ]
127
+ end
128
+
129
+ # Perform regex string matching
130
+ # @param prediction [String] The prediction string
131
+ # @param reference [String] The reference string (as regex pattern)
132
+ # @return [Array] Score and metadata
133
+ def regex_match(prediction, reference)
134
+ begin
135
+ regex = Regexp.new(reference, @case_sensitive ? nil : Regexp::IGNORECASE)
136
+ match = regex.match?(prediction)
137
+ [
138
+ match ? 1.0 : 0.0,
139
+ {
140
+ match: match,
141
+ match_type: "regex",
142
+ case_sensitive: @case_sensitive
143
+ }
144
+ ]
145
+ rescue RegexpError => e
146
+ [
147
+ 0.0,
148
+ {
149
+ error: "Invalid regex pattern: #{e.message}",
150
+ match_type: "regex",
151
+ case_sensitive: @case_sensitive
152
+ }
153
+ ]
154
+ end
155
+ end
156
+ end
157
+ end
158
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "evaluation/evaluator"
4
+ require_relative "evaluation/string_evaluator"
5
+ require_relative "evaluation/llm_evaluator"
6
+
7
+ module LangsmithrbRails
8
+ # Evaluation framework for LangSmith
9
+ module Evaluation
10
+ # Create a new evaluator
11
+ # @param type [Symbol] Type of evaluator (:string, :llm)
12
+ # @param options [Hash] Options for the evaluator
13
+ # @return [Evaluator] The evaluator instance
14
+ def self.create(type, **options)
15
+ case type
16
+ when :string
17
+ StringEvaluator.new(**options)
18
+ when :llm
19
+ unless options[:llm]
20
+ raise ArgumentError, "LLM evaluator requires an :llm option"
21
+ end
22
+ LLMEvaluator.new(**options)
23
+ else
24
+ raise ArgumentError, "Unknown evaluator type: #{type}"
25
+ end
26
+ end
27
+
28
+ # Run a batch evaluation on a dataset
29
+ # @param dataset_id [String] The dataset ID to evaluate
30
+ # @param evaluators [Array<Evaluator>] The evaluators to use
31
+ # @param experiment_name [String] Name for the experiment
32
+ # @param target_llm [Object] Optional LLM to use for generating predictions
33
+ # @return [Hash] Evaluation results
34
+ def self.evaluate_dataset(dataset_id, evaluators, experiment_name: nil, target_llm: nil)
35
+ experiment_name ||= "Evaluation #{Time.now.utc.iso8601}"
36
+
37
+ results = {
38
+ experiment_name: experiment_name,
39
+ dataset_id: dataset_id,
40
+ evaluators: evaluators.map { |e| e.class.name },
41
+ results: []
42
+ }
43
+
44
+ evaluators.each do |evaluator|
45
+ evaluator_results = evaluator.evaluate_dataset(dataset_id, experiment_name, target_llm)
46
+ results[:results] << evaluator_results
47
+ end
48
+
49
+ results
50
+ end
51
+
52
+ # Run a batch evaluation on runs
53
+ # @param run_ids [Array<String>] The run IDs to evaluate
54
+ # @param evaluators [Array<Evaluator>] The evaluators to use
55
+ # @param references [Hash<String, Object>] Map of run IDs to references
56
+ # @return [Hash] Evaluation results
57
+ def self.evaluate_runs(run_ids, evaluators, references = {})
58
+ results = {
59
+ run_ids: run_ids,
60
+ evaluators: evaluators.map { |e| e.class.name },
61
+ results: {}
62
+ }
63
+
64
+ run_ids.each do |run_id|
65
+ results[:results][run_id] = {}
66
+
67
+ evaluators.each do |evaluator|
68
+ reference = references[run_id]
69
+ results[:results][run_id][evaluator.class.name] = evaluator.evaluate_run(run_id, reference)
70
+ end
71
+ end
72
+
73
+ results
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LangsmithrbRails
4
+ module OTEL
5
+ # OpenTelemetry exporter for LangSmith
6
+ class Exporter
7
+ # Initialize a new OpenTelemetry exporter
8
+ # @param api_key [String] LangSmith API key
9
+ # @param api_url [String] LangSmith API URL
10
+ def initialize(api_key: nil, api_url: nil)
11
+ @api_key = api_key || Config[:api_key]
12
+ @api_url = api_url || Config[:api_url]
13
+ @client = LangsmithrbRails::Client.new(api_key: @api_key, api_url: @api_url)
14
+ end
15
+
16
+ # Export spans to LangSmith
17
+ # @param spans [Array<OpenTelemetry::SDK::Trace::SpanData>] Spans to export
18
+ # @return [Integer] Export result (success, failure, etc.)
19
+ def export(spans)
20
+ return :success if spans.empty?
21
+
22
+ spans.each do |span|
23
+ export_span(span)
24
+ end
25
+
26
+ :success
27
+ rescue => e
28
+ LangsmithrbRails.logger.error("Failed to export spans: #{e.message}")
29
+ :failure
30
+ end
31
+
32
+ # Shutdown the exporter
33
+ # @param timeout [Integer] Timeout in seconds
34
+ # @return [Boolean] True if shutdown was successful
35
+ def shutdown(timeout = 0)
36
+ true
37
+ end
38
+
39
+ private
40
+
41
+ # Export a single span to LangSmith
42
+ # @param span [OpenTelemetry::SDK::Trace::SpanData] Span to export
43
+ def export_span(span)
44
+ # Convert span to LangSmith run
45
+ run_data = convert_span_to_run(span)
46
+
47
+ # Create or update the run in LangSmith
48
+ if span.parent_span_id.nil?
49
+ @client.create_run(run_data)
50
+ else
51
+ @client.update_run(run_data[:id], run_data)
52
+ end
53
+ end
54
+
55
+ # Convert an OpenTelemetry span to a LangSmith run
56
+ # @param span [OpenTelemetry::SDK::Trace::SpanData] Span to convert
57
+ # @return [Hash] LangSmith run data
58
+ def convert_span_to_run(span)
59
+ # Extract span attributes
60
+ attributes = span.attributes.to_h
61
+
62
+ # Basic run data
63
+ run_data = {
64
+ id: span.span_id.to_s,
65
+ name: span.name,
66
+ start_time: span.start_timestamp.to_time.utc.iso8601,
67
+ end_time: span.end_timestamp.to_time.utc.iso8601,
68
+ status: span.status.code == 0 ? "success" : "error"
69
+ }
70
+
71
+ # Add parent run ID if present
72
+ if span.parent_span_id
73
+ run_data[:parent_run_id] = span.parent_span_id.to_s
74
+ end
75
+
76
+ # Add error if present
77
+ if span.status.code != 0
78
+ run_data[:error] = span.status.description || "Unknown error"
79
+ end
80
+
81
+ # Add inputs and outputs from attributes
82
+ if attributes[:inputs]
83
+ run_data[:inputs] = parse_json_attribute(attributes[:inputs])
84
+ end
85
+
86
+ if attributes[:outputs]
87
+ run_data[:outputs] = parse_json_attribute(attributes[:outputs])
88
+ end
89
+
90
+ # Add run type
91
+ run_data[:run_type] = attributes[:run_type] || "chain"
92
+
93
+ # Add project name if present
94
+ if attributes[:project_name]
95
+ run_data[:session_name] = attributes[:project_name]
96
+ end
97
+
98
+ # Add tags if present
99
+ if attributes[:tags]
100
+ run_data[:tags] = parse_json_attribute(attributes[:tags])
101
+ end
102
+
103
+ run_data
104
+ end
105
+
106
+ # Parse a JSON attribute
107
+ # @param value [String] JSON string
108
+ # @return [Hash, Array, String] Parsed JSON or original string
109
+ def parse_json_attribute(value)
110
+ return value unless value.is_a?(String)
111
+
112
+ begin
113
+ JSON.parse(value)
114
+ rescue JSON::ParserError
115
+ value
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,135 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "otel/exporter"
4
+
5
+ module LangsmithrbRails
6
+ # OpenTelemetry integration for LangSmith
7
+ module OTEL
8
+ class << self
9
+ # Initialize OpenTelemetry integration
10
+ # @param api_key [String] LangSmith API key
11
+ # @param api_url [String] LangSmith API URL
12
+ # @param service_name [String] Service name for OpenTelemetry
13
+ # @param service_version [String] Service version for OpenTelemetry
14
+ # @return [Boolean] True if initialization was successful
15
+ def init(api_key: nil, api_url: nil, service_name: "langsmithrb-rails", service_version: LangsmithrbRails::VERSION)
16
+ return false unless otel_available?
17
+
18
+ require "opentelemetry/sdk"
19
+ require "opentelemetry/exporter/otlp"
20
+ require "opentelemetry/instrumentation/all"
21
+
22
+ # Create LangSmith exporter
23
+ langsmith_exporter = Exporter.new(api_key: api_key, api_url: api_url)
24
+
25
+ # Configure OpenTelemetry
26
+ OpenTelemetry::SDK.configure do |c|
27
+ c.service_name = service_name
28
+ c.service_version = service_version
29
+
30
+ # Add LangSmith exporter
31
+ c.add_span_processor(
32
+ OpenTelemetry::SDK::Trace::Export::SimpleSpanProcessor.new(langsmith_exporter)
33
+ )
34
+
35
+ # Use batch processor for better performance
36
+ c.add_span_processor(
37
+ OpenTelemetry::SDK::Trace::Export::BatchSpanProcessor.new(
38
+ langsmith_exporter,
39
+ max_queue_size: 1000,
40
+ max_export_batch_size: 100,
41
+ schedule_delay_millis: 5000
42
+ )
43
+ )
44
+
45
+ # Install all available instrumentation
46
+ c.use_all()
47
+ end
48
+
49
+ true
50
+ rescue => e
51
+ LangsmithrbRails.logger.error("Failed to initialize OpenTelemetry: #{e.message}")
52
+ false
53
+ end
54
+
55
+ # Create a traced span
56
+ # @param name [String] Span name
57
+ # @param attributes [Hash] Span attributes
58
+ # @param kind [Symbol] Span kind
59
+ # @yield Block to execute within the span
60
+ # @return [Object] Result of the block
61
+ def trace(name, attributes: {}, kind: :internal, &block)
62
+ return yield unless otel_available? && otel_enabled?
63
+
64
+ tracer = OpenTelemetry.tracer_provider.tracer("langsmithrb_rails", LangsmithrbRails::VERSION)
65
+
66
+ tracer.in_span(name, attributes: attributes, kind: kind, &block)
67
+ end
68
+
69
+ # Create a traced span for an LLM operation
70
+ # @param name [String] Operation name
71
+ # @param inputs [Hash] Input data
72
+ # @param run_type [String] Type of run (e.g., "llm", "chain")
73
+ # @param project_name [String] Optional project name
74
+ # @param tags [Array<String>] Optional tags
75
+ # @yield Block to execute within the span
76
+ # @return [Object] Result of the block
77
+ def trace_llm(name, inputs:, run_type: "llm", project_name: nil, tags: [], &block)
78
+ return yield unless otel_available? && otel_enabled?
79
+
80
+ attributes = {
81
+ inputs: inputs.to_json,
82
+ run_type: run_type
83
+ }
84
+
85
+ if project_name
86
+ attributes[:project_name] = project_name
87
+ end
88
+
89
+ if tags.any?
90
+ attributes[:tags] = tags.to_json
91
+ end
92
+
93
+ trace(name, attributes: attributes) do |span|
94
+ begin
95
+ result = yield
96
+
97
+ # Add outputs to span
98
+ if result
99
+ span.add_attributes(outputs: { result: result }.to_json)
100
+ end
101
+
102
+ result
103
+ rescue => e
104
+ # Add error to span
105
+ if span.respond_to?(:record_exception)
106
+ span.record_exception(e)
107
+ if defined?(OpenTelemetry::Trace::Status)
108
+ span.status = OpenTelemetry::Trace::Status.error(e.message)
109
+ end
110
+ end
111
+ raise e
112
+ end
113
+ end
114
+ end
115
+
116
+ # Check if OpenTelemetry is available
117
+ # @return [Boolean] True if OpenTelemetry is available
118
+ def otel_available?
119
+ @otel_available ||= begin
120
+ require "opentelemetry"
121
+ require "opentelemetry/sdk"
122
+ true
123
+ rescue LoadError
124
+ false
125
+ end
126
+ end
127
+
128
+ # Check if OpenTelemetry is enabled
129
+ # @return [Boolean] True if OpenTelemetry is enabled
130
+ def otel_enabled?
131
+ Config[:otel_enabled]
132
+ end
133
+ end
134
+ end
135
+ end