langsmithrb_rails 0.1.0 ā 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec +3 -0
- data/.rspec_status +161 -0
- data/CHANGELOG.md +38 -0
- data/Gemfile +20 -0
- data/Gemfile.lock +321 -0
- data/LICENSE +21 -0
- data/README.md +421 -0
- data/Rakefile +10 -0
- data/langsmithrb_rails-0.1.0.gem +0 -0
- data/langsmithrb_rails-0.1.1.gem +0 -0
- data/langsmithrb_rails.gemspec +45 -0
- data/lib/generators/langsmithrb_rails/buffer/buffer_generator.rb +94 -0
- data/lib/generators/langsmithrb_rails/buffer/templates/create_langsmith_run_buffers.rb +29 -0
- data/lib/generators/langsmithrb_rails/buffer/templates/flush_buffer_job.rb +40 -0
- data/lib/generators/langsmithrb_rails/buffer/templates/langsmith.rake +71 -0
- data/lib/generators/langsmithrb_rails/buffer/templates/langsmith_run_buffer.rb +70 -0
- data/lib/generators/langsmithrb_rails/buffer/templates/migration.rb +28 -0
- data/lib/generators/langsmithrb_rails/ci/ci_generator.rb +37 -0
- data/lib/generators/langsmithrb_rails/ci/templates/langsmith-evals.yml +85 -0
- data/lib/generators/langsmithrb_rails/ci/templates/langsmith_export_summary.rb +81 -0
- data/lib/generators/langsmithrb_rails/demo/demo_generator.rb +81 -0
- data/lib/generators/langsmithrb_rails/demo/templates/chat_controller.js +88 -0
- data/lib/generators/langsmithrb_rails/demo/templates/chat_controller.rb +58 -0
- data/lib/generators/langsmithrb_rails/demo/templates/chat_message.rb +24 -0
- data/lib/generators/langsmithrb_rails/demo/templates/create_chat_messages.rb +19 -0
- data/lib/generators/langsmithrb_rails/demo/templates/index.html.erb +180 -0
- data/lib/generators/langsmithrb_rails/demo/templates/llm_service.rb +165 -0
- data/lib/generators/langsmithrb_rails/evals/evals_generator.rb +52 -0
- data/lib/generators/langsmithrb_rails/evals/templates/checks/correctness.rb +71 -0
- data/lib/generators/langsmithrb_rails/evals/templates/checks/llm_graded.rb +137 -0
- data/lib/generators/langsmithrb_rails/evals/templates/datasets/sample.yml +60 -0
- data/lib/generators/langsmithrb_rails/evals/templates/langsmith_evals.rake +255 -0
- data/lib/generators/langsmithrb_rails/evals/templates/targets/http.rb +120 -0
- data/lib/generators/langsmithrb_rails/evals/templates/targets/ruby.rb +136 -0
- data/lib/generators/langsmithrb_rails/install/install_generator.rb +35 -0
- data/lib/generators/langsmithrb_rails/install/templates/config.yml +45 -0
- data/lib/generators/langsmithrb_rails/install/templates/initializer.rb +34 -0
- data/lib/generators/langsmithrb_rails/privacy/privacy_generator.rb +39 -0
- data/lib/generators/langsmithrb_rails/privacy/templates/custom_redactor.rb +132 -0
- data/lib/generators/langsmithrb_rails/privacy/templates/privacy.yml +88 -0
- data/lib/generators/langsmithrb_rails/privacy/templates/privacy_initializer.rb +41 -0
- data/lib/generators/langsmithrb_rails/tracing/templates/langsmith_traced.rb +146 -0
- data/lib/generators/langsmithrb_rails/tracing/templates/langsmith_traced_job.rb +151 -0
- data/lib/generators/langsmithrb_rails/tracing/templates/request_tracing.rb +117 -0
- data/lib/generators/langsmithrb_rails/tracing/tracing_generator.rb +78 -0
- data/lib/langsmithrb_rails/client.rb +292 -0
- data/lib/langsmithrb_rails/config.rb +169 -0
- data/lib/langsmithrb_rails/evaluation/evaluator.rb +178 -0
- data/lib/langsmithrb_rails/evaluation/llm_evaluator.rb +154 -0
- data/lib/langsmithrb_rails/evaluation/string_evaluator.rb +158 -0
- data/lib/langsmithrb_rails/evaluation.rb +76 -0
- data/lib/langsmithrb_rails/generators/langsmithrb_rails/langsmith_generator.rb +61 -0
- data/lib/langsmithrb_rails/generators/langsmithrb_rails/templates/langsmith_initializer.rb +22 -0
- data/lib/langsmithrb_rails/langsmith.rb +35 -0
- data/lib/langsmithrb_rails/otel/exporter.rb +120 -0
- data/lib/langsmithrb_rails/otel.rb +135 -0
- data/lib/langsmithrb_rails/railtie.rb +33 -0
- data/lib/langsmithrb_rails/redactor.rb +76 -0
- data/lib/langsmithrb_rails/run_trees.rb +157 -0
- data/lib/langsmithrb_rails/version.rb +5 -0
- data/lib/langsmithrb_rails/wrappers/anthropic.rb +146 -0
- data/lib/langsmithrb_rails/wrappers/base.rb +81 -0
- data/lib/langsmithrb_rails/wrappers/llm.rb +151 -0
- data/lib/langsmithrb_rails/wrappers/openai.rb +193 -0
- data/lib/langsmithrb_rails/wrappers.rb +41 -0
- data/lib/langsmithrb_rails.rb +151 -0
- data/pkg/langsmithrb_rails-0.3.0.gem +0 -0
- metadata +74 -7
@@ -0,0 +1,178 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module LangsmithrbRails
|
4
|
+
module Evaluation
|
5
|
+
# Base evaluator class
|
6
|
+
class Evaluator
|
7
|
+
attr_reader :client, :project_name, :tags
|
8
|
+
|
9
|
+
# Initialize a new evaluator
|
10
|
+
# @param client [LangsmithrbRails::Client] LangSmith client
|
11
|
+
# @param project_name [String] Optional project name for evaluations
|
12
|
+
# @param tags [Array<String>] Optional tags for evaluations
|
13
|
+
def initialize(client: nil, project_name: nil, tags: [])
|
14
|
+
@client = client || LangsmithrbRails::Client.new
|
15
|
+
@project_name = project_name
|
16
|
+
@tags = tags
|
17
|
+
end
|
18
|
+
|
19
|
+
# Evaluate a prediction against a reference
|
20
|
+
# @param prediction [String, Hash] The prediction to evaluate
|
21
|
+
# @param reference [String, Hash] The reference to compare against
|
22
|
+
# @param input [Hash] Optional input that generated the prediction
|
23
|
+
# @return [Hash] Evaluation result with score and metadata
|
24
|
+
def evaluate(prediction, reference = nil, input = nil)
|
25
|
+
raise NotImplementedError, "Subclasses must implement evaluate method"
|
26
|
+
end
|
27
|
+
|
28
|
+
# Evaluate a run
|
29
|
+
# @param run_id [String] The ID of the run to evaluate
|
30
|
+
# @param reference [String, Hash] The reference to compare against
|
31
|
+
# @return [Hash] Evaluation result with score and metadata
|
32
|
+
def evaluate_run(run_id, reference = nil)
|
33
|
+
# Get the run
|
34
|
+
response = client.get_run(run_id)
|
35
|
+
|
36
|
+
unless response[:status] >= 200 && response[:status] < 300
|
37
|
+
raise "Failed to get run: #{response[:error] || response[:body]}"
|
38
|
+
end
|
39
|
+
|
40
|
+
run = response[:body]
|
41
|
+
|
42
|
+
# Extract prediction from run outputs
|
43
|
+
prediction = extract_prediction_from_run(run)
|
44
|
+
|
45
|
+
# Extract input from run inputs
|
46
|
+
input = run["inputs"]
|
47
|
+
|
48
|
+
# Evaluate
|
49
|
+
result = evaluate(prediction, reference, input)
|
50
|
+
|
51
|
+
# Create feedback
|
52
|
+
create_feedback(run_id, result)
|
53
|
+
|
54
|
+
result
|
55
|
+
end
|
56
|
+
|
57
|
+
# Evaluate multiple runs
|
58
|
+
# @param run_ids [Array<String>] The IDs of the runs to evaluate
|
59
|
+
# @param references [Hash<String, Object>] Map of run IDs to references
|
60
|
+
# @return [Hash<String, Hash>] Map of run IDs to evaluation results
|
61
|
+
def evaluate_runs(run_ids, references = {})
|
62
|
+
results = {}
|
63
|
+
|
64
|
+
run_ids.each do |run_id|
|
65
|
+
reference = references[run_id]
|
66
|
+
results[run_id] = evaluate_run(run_id, reference)
|
67
|
+
end
|
68
|
+
|
69
|
+
results
|
70
|
+
end
|
71
|
+
|
72
|
+
# Evaluate a dataset
|
73
|
+
# @param dataset_id [String] The ID of the dataset to evaluate
|
74
|
+
# @param experiment_name [String] Name for the experiment
|
75
|
+
# @param target_llm [Object] Optional LLM to use for generating predictions
|
76
|
+
# @return [Hash] Evaluation results for the dataset
|
77
|
+
def evaluate_dataset(dataset_id, experiment_name, target_llm = nil)
|
78
|
+
# Get the dataset examples
|
79
|
+
response = client.list_examples(dataset_id)
|
80
|
+
|
81
|
+
unless response[:status] >= 200 && response[:status] < 300
|
82
|
+
raise "Failed to get dataset examples: #{response[:error] || response[:body]}"
|
83
|
+
end
|
84
|
+
|
85
|
+
examples = response[:body]
|
86
|
+
|
87
|
+
results = {
|
88
|
+
experiment_name: experiment_name,
|
89
|
+
dataset_id: dataset_id,
|
90
|
+
evaluator_name: self.class.name,
|
91
|
+
results: []
|
92
|
+
}
|
93
|
+
|
94
|
+
examples.each do |example|
|
95
|
+
# If target LLM is provided, generate a prediction
|
96
|
+
if target_llm
|
97
|
+
prediction = generate_prediction(target_llm, example["inputs"])
|
98
|
+
else
|
99
|
+
# Otherwise use the example's outputs as the prediction
|
100
|
+
prediction = example["outputs"]
|
101
|
+
end
|
102
|
+
|
103
|
+
# Evaluate
|
104
|
+
result = evaluate(prediction, example["outputs"], example["inputs"])
|
105
|
+
|
106
|
+
results[:results] << {
|
107
|
+
example_id: example["id"],
|
108
|
+
score: result[:score],
|
109
|
+
metadata: result[:metadata]
|
110
|
+
}
|
111
|
+
end
|
112
|
+
|
113
|
+
results
|
114
|
+
end
|
115
|
+
|
116
|
+
private
|
117
|
+
|
118
|
+
# Extract prediction from run outputs
|
119
|
+
# @param run [Hash] The run data
|
120
|
+
# @return [Object] The prediction
|
121
|
+
def extract_prediction_from_run(run)
|
122
|
+
outputs = run["outputs"] || {}
|
123
|
+
|
124
|
+
# Try common output keys
|
125
|
+
%w[output response result text completion answer].each do |key|
|
126
|
+
return outputs[key] if outputs.key?(key)
|
127
|
+
end
|
128
|
+
|
129
|
+
# If no common keys found, return the entire outputs
|
130
|
+
outputs
|
131
|
+
end
|
132
|
+
|
133
|
+
# Generate a prediction using an LLM
|
134
|
+
# @param llm [Object] The LLM to use
|
135
|
+
# @param input [Hash] The input to generate from
|
136
|
+
# @return [Object] The generated prediction
|
137
|
+
def generate_prediction(llm, input)
|
138
|
+
if llm.respond_to?(:call)
|
139
|
+
llm.call(input)
|
140
|
+
elsif llm.respond_to?(:generate)
|
141
|
+
llm.generate(input)
|
142
|
+
elsif llm.respond_to?(:complete)
|
143
|
+
llm.complete(input)
|
144
|
+
else
|
145
|
+
raise "Unsupported LLM interface"
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
# Create feedback for a run
|
150
|
+
# @param run_id [String] The ID of the run
|
151
|
+
# @param result [Hash] The evaluation result
|
152
|
+
# @return [Hash] The created feedback
|
153
|
+
def create_feedback(run_id, result)
|
154
|
+
feedback_data = {
|
155
|
+
run_id: run_id,
|
156
|
+
key: self.class.name.split("::").last.downcase,
|
157
|
+
score: result[:score],
|
158
|
+
value: result[:metadata],
|
159
|
+
comment: result[:comment]
|
160
|
+
}.compact
|
161
|
+
|
162
|
+
response = client.create_feedback(
|
163
|
+
feedback_data[:run_id],
|
164
|
+
feedback_data[:key],
|
165
|
+
feedback_data[:score],
|
166
|
+
value: feedback_data[:value],
|
167
|
+
comment: feedback_data[:comment]
|
168
|
+
)
|
169
|
+
|
170
|
+
unless response[:status] >= 200 && response[:status] < 300
|
171
|
+
LangsmithrbRails.logger.error("Failed to create feedback: #{response[:error] || response[:body]}")
|
172
|
+
end
|
173
|
+
|
174
|
+
response[:body]
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
@@ -0,0 +1,154 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "evaluator"
|
4
|
+
|
5
|
+
module LangsmithrbRails
|
6
|
+
module Evaluation
|
7
|
+
# Evaluator that uses an LLM to evaluate responses
|
8
|
+
class LLMEvaluator < Evaluator
|
9
|
+
# Initialize a new LLM evaluator
|
10
|
+
# @param llm [Object] The LLM to use for evaluation
|
11
|
+
# @param criteria [String] Evaluation criteria
|
12
|
+
# @param client [LangsmithrbRails::Client] LangSmith client
|
13
|
+
# @param project_name [String] Optional project name for evaluations
|
14
|
+
# @param tags [Array<String>] Optional tags for evaluations
|
15
|
+
def initialize(llm:, criteria: nil, client: nil, project_name: nil, tags: [])
|
16
|
+
super(client: client, project_name: project_name, tags: tags)
|
17
|
+
@llm = llm
|
18
|
+
@criteria = criteria || "Evaluate the response for accuracy, relevance, and completeness."
|
19
|
+
end
|
20
|
+
|
21
|
+
# Evaluate a prediction against a reference
|
22
|
+
# @param prediction [String, Hash] The prediction to evaluate
|
23
|
+
# @param reference [String, Hash] The reference to compare against
|
24
|
+
# @param input [Hash] Optional input that generated the prediction
|
25
|
+
# @return [Hash] Evaluation result with score and metadata
|
26
|
+
def evaluate(prediction, reference = nil, input = nil)
|
27
|
+
# Extract strings
|
28
|
+
prediction_str = extract_string(prediction)
|
29
|
+
reference_str = extract_string(reference)
|
30
|
+
input_str = input.is_a?(Hash) ? input.to_json : input.to_s if input
|
31
|
+
|
32
|
+
# Create evaluation prompt
|
33
|
+
prompt = create_evaluation_prompt(prediction_str, reference_str, input_str)
|
34
|
+
|
35
|
+
# Get evaluation from LLM
|
36
|
+
begin
|
37
|
+
evaluation = get_llm_evaluation(prompt)
|
38
|
+
|
39
|
+
# Parse the evaluation
|
40
|
+
score, feedback = parse_evaluation(evaluation)
|
41
|
+
|
42
|
+
{
|
43
|
+
score: score,
|
44
|
+
metadata: {
|
45
|
+
feedback: feedback,
|
46
|
+
criteria: @criteria,
|
47
|
+
evaluation: evaluation
|
48
|
+
}
|
49
|
+
}
|
50
|
+
rescue => e
|
51
|
+
{
|
52
|
+
score: nil,
|
53
|
+
metadata: {
|
54
|
+
error: "Evaluation failed: #{e.message}"
|
55
|
+
}
|
56
|
+
}
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
# Extract a string from an object
|
63
|
+
# @param obj [Object] The object to extract a string from
|
64
|
+
# @return [String] The extracted string
|
65
|
+
def extract_string(obj)
|
66
|
+
return nil if obj.nil?
|
67
|
+
|
68
|
+
case obj
|
69
|
+
when String
|
70
|
+
obj
|
71
|
+
when Hash
|
72
|
+
# Try common output keys
|
73
|
+
%w[output response result text completion answer].each do |key|
|
74
|
+
return obj[key].to_s if obj.key?(key)
|
75
|
+
end
|
76
|
+
obj.to_json
|
77
|
+
else
|
78
|
+
obj.to_s
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
# Create an evaluation prompt for the LLM
|
83
|
+
# @param prediction [String] The prediction string
|
84
|
+
# @param reference [String] The reference string
|
85
|
+
# @param input [String] The input string
|
86
|
+
# @return [String] The evaluation prompt
|
87
|
+
def create_evaluation_prompt(prediction, reference, input)
|
88
|
+
prompt = "You are an impartial evaluator. Your task is to evaluate the quality of a response.\n\n"
|
89
|
+
|
90
|
+
if input
|
91
|
+
prompt += "Input:\n#{input}\n\n"
|
92
|
+
end
|
93
|
+
|
94
|
+
prompt += "Response to evaluate:\n#{prediction}\n\n"
|
95
|
+
|
96
|
+
if reference
|
97
|
+
prompt += "Reference (correct) response:\n#{reference}\n\n"
|
98
|
+
end
|
99
|
+
|
100
|
+
prompt += "Evaluation criteria:\n#{@criteria}\n\n"
|
101
|
+
prompt += "Please provide a score between 0.0 and 1.0, where 1.0 is perfect, and detailed feedback.\n"
|
102
|
+
prompt += "Format your response as:\nScore: [score between 0.0 and 1.0]\nFeedback: [your detailed feedback]"
|
103
|
+
|
104
|
+
prompt
|
105
|
+
end
|
106
|
+
|
107
|
+
# Get evaluation from LLM
|
108
|
+
# @param prompt [String] The evaluation prompt
|
109
|
+
# @return [String] The LLM's evaluation
|
110
|
+
# @raise [RuntimeError] If the LLM interface is not supported
|
111
|
+
def get_llm_evaluation(prompt)
|
112
|
+
result = if @llm.respond_to?(:call)
|
113
|
+
@llm.call(prompt)
|
114
|
+
elsif @llm.respond_to?(:generate)
|
115
|
+
@llm.generate(prompt)
|
116
|
+
elsif @llm.respond_to?(:complete)
|
117
|
+
@llm.complete(prompt)
|
118
|
+
elsif @llm.respond_to?(:chat)
|
119
|
+
@llm.chat(messages: [{ role: "user", content: prompt }])
|
120
|
+
else
|
121
|
+
raise "Unsupported LLM interface"
|
122
|
+
end
|
123
|
+
|
124
|
+
# Extract content from response if it's a hash
|
125
|
+
if result.is_a?(Hash)
|
126
|
+
result[:content] || result["content"] || result[:text] || result["text"] ||
|
127
|
+
result[:completion] || result["completion"] || result.to_s
|
128
|
+
else
|
129
|
+
result.to_s
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
# Parse the LLM's evaluation
|
134
|
+
# @param evaluation [String] The LLM's evaluation
|
135
|
+
# @return [Array] Score and feedback
|
136
|
+
def parse_evaluation(evaluation)
|
137
|
+
# Extract score
|
138
|
+
score_match = evaluation.match(/Score:\s*(\d+(\.\d+)?)/)
|
139
|
+
score = score_match ? score_match[1].to_f : nil
|
140
|
+
|
141
|
+
# Validate score
|
142
|
+
if score.nil? || score < 0.0 || score > 1.0
|
143
|
+
score = 0.5 # Default to middle score if invalid
|
144
|
+
end
|
145
|
+
|
146
|
+
# Extract feedback
|
147
|
+
feedback_match = evaluation.match(/Feedback:\s*(.+)/m)
|
148
|
+
feedback = feedback_match ? feedback_match[1].strip : evaluation
|
149
|
+
|
150
|
+
[score, feedback]
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
@@ -0,0 +1,158 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "evaluator"
|
4
|
+
|
5
|
+
module LangsmithrbRails
|
6
|
+
module Evaluation
|
7
|
+
# Evaluator for string comparisons
|
8
|
+
class StringEvaluator < Evaluator
|
9
|
+
# Initialize a new string evaluator
|
10
|
+
# @param match_type [Symbol] Type of string matching (:exact, :contains, :regex)
|
11
|
+
# @param case_sensitive [Boolean] Whether to perform case-sensitive matching
|
12
|
+
# @param client [LangsmithrbRails::Client] LangSmith client
|
13
|
+
# @param project_name [String] Optional project name for evaluations
|
14
|
+
# @param tags [Array<String>] Optional tags for evaluations
|
15
|
+
def initialize(match_type: :exact, case_sensitive: true, client: nil, project_name: nil, tags: [])
|
16
|
+
super(client: client, project_name: project_name, tags: tags)
|
17
|
+
@match_type = match_type
|
18
|
+
@case_sensitive = case_sensitive
|
19
|
+
end
|
20
|
+
|
21
|
+
# Evaluate a prediction against a reference
|
22
|
+
# @param prediction [String] The prediction to evaluate
|
23
|
+
# @param reference [String] The reference to compare against
|
24
|
+
# @param input [Hash] Optional input that generated the prediction
|
25
|
+
# @return [Hash] Evaluation result with score and metadata
|
26
|
+
def evaluate(prediction, reference = nil, input = nil)
|
27
|
+
# Extract strings from prediction and reference
|
28
|
+
prediction_str = extract_string(prediction)
|
29
|
+
reference_str = extract_string(reference)
|
30
|
+
|
31
|
+
# If reference is nil, we can't evaluate
|
32
|
+
if reference_str.nil?
|
33
|
+
return {
|
34
|
+
score: nil,
|
35
|
+
metadata: {
|
36
|
+
error: "No reference provided for evaluation"
|
37
|
+
}
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
# Prepare strings for comparison
|
42
|
+
unless @case_sensitive
|
43
|
+
prediction_str = prediction_str.downcase
|
44
|
+
reference_str = reference_str.downcase
|
45
|
+
end
|
46
|
+
|
47
|
+
# Perform comparison based on match type
|
48
|
+
score, metadata = case @match_type
|
49
|
+
when :exact
|
50
|
+
exact_match(prediction_str, reference_str)
|
51
|
+
when :contains
|
52
|
+
contains_match(prediction_str, reference_str)
|
53
|
+
when :regex
|
54
|
+
regex_match(prediction_str, reference_str)
|
55
|
+
else
|
56
|
+
[0.0, { error: "Unknown match type: #{@match_type}" }]
|
57
|
+
end
|
58
|
+
|
59
|
+
{
|
60
|
+
score: score,
|
61
|
+
metadata: metadata
|
62
|
+
}
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
# Extract a string from an object
|
68
|
+
# @param obj [Object] The object to extract a string from
|
69
|
+
# @return [String] The extracted string
|
70
|
+
def extract_string(obj)
|
71
|
+
return nil if obj.nil?
|
72
|
+
|
73
|
+
case obj
|
74
|
+
when String
|
75
|
+
obj
|
76
|
+
when Hash
|
77
|
+
# Try common output keys
|
78
|
+
%w[output response result text completion answer].each do |key|
|
79
|
+
return obj[key].to_s if obj.key?(key) || obj.key?(key.to_sym)
|
80
|
+
end
|
81
|
+
|
82
|
+
# Try to find common keys between prediction and reference
|
83
|
+
if obj.key?(:prediction) && obj.key?(:reference)
|
84
|
+
return obj[:prediction].to_s
|
85
|
+
end
|
86
|
+
|
87
|
+
# If it's a simple hash with a single value, use that
|
88
|
+
return obj.values.first.to_s if obj.size == 1
|
89
|
+
|
90
|
+
# Otherwise convert the whole hash to a string
|
91
|
+
obj.to_s
|
92
|
+
else
|
93
|
+
obj.to_s
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# Perform exact string matching
|
98
|
+
# @param prediction [String] The prediction string
|
99
|
+
# @param reference [String] The reference string
|
100
|
+
# @return [Array] Score and metadata
|
101
|
+
def exact_match(prediction, reference)
|
102
|
+
match = prediction == reference
|
103
|
+
[
|
104
|
+
match ? 1.0 : 0.0,
|
105
|
+
{
|
106
|
+
match: match,
|
107
|
+
match_type: "exact",
|
108
|
+
case_sensitive: @case_sensitive
|
109
|
+
}
|
110
|
+
]
|
111
|
+
end
|
112
|
+
|
113
|
+
# Perform contains string matching
|
114
|
+
# @param prediction [String] The prediction string
|
115
|
+
# @param reference [String] The reference string
|
116
|
+
# @return [Array] Score and metadata
|
117
|
+
def contains_match(prediction, reference)
|
118
|
+
match = prediction.include?(reference)
|
119
|
+
[
|
120
|
+
match ? 1.0 : 0.0,
|
121
|
+
{
|
122
|
+
match: match,
|
123
|
+
match_type: "contains",
|
124
|
+
case_sensitive: @case_sensitive
|
125
|
+
}
|
126
|
+
]
|
127
|
+
end
|
128
|
+
|
129
|
+
# Perform regex string matching
|
130
|
+
# @param prediction [String] The prediction string
|
131
|
+
# @param reference [String] The reference string (as regex pattern)
|
132
|
+
# @return [Array] Score and metadata
|
133
|
+
def regex_match(prediction, reference)
|
134
|
+
begin
|
135
|
+
regex = Regexp.new(reference, @case_sensitive ? nil : Regexp::IGNORECASE)
|
136
|
+
match = regex.match?(prediction)
|
137
|
+
[
|
138
|
+
match ? 1.0 : 0.0,
|
139
|
+
{
|
140
|
+
match: match,
|
141
|
+
match_type: "regex",
|
142
|
+
case_sensitive: @case_sensitive
|
143
|
+
}
|
144
|
+
]
|
145
|
+
rescue RegexpError => e
|
146
|
+
[
|
147
|
+
0.0,
|
148
|
+
{
|
149
|
+
error: "Invalid regex pattern: #{e.message}",
|
150
|
+
match_type: "regex",
|
151
|
+
case_sensitive: @case_sensitive
|
152
|
+
}
|
153
|
+
]
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "evaluation/evaluator"
|
4
|
+
require_relative "evaluation/string_evaluator"
|
5
|
+
require_relative "evaluation/llm_evaluator"
|
6
|
+
|
7
|
+
module LangsmithrbRails
|
8
|
+
# Evaluation framework for LangSmith
|
9
|
+
module Evaluation
|
10
|
+
# Create a new evaluator
|
11
|
+
# @param type [Symbol] Type of evaluator (:string, :llm)
|
12
|
+
# @param options [Hash] Options for the evaluator
|
13
|
+
# @return [Evaluator] The evaluator instance
|
14
|
+
def self.create(type, **options)
|
15
|
+
case type
|
16
|
+
when :string
|
17
|
+
StringEvaluator.new(**options)
|
18
|
+
when :llm
|
19
|
+
unless options[:llm]
|
20
|
+
raise ArgumentError, "LLM evaluator requires an :llm option"
|
21
|
+
end
|
22
|
+
LLMEvaluator.new(**options)
|
23
|
+
else
|
24
|
+
raise ArgumentError, "Unknown evaluator type: #{type}"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Run a batch evaluation on a dataset
|
29
|
+
# @param dataset_id [String] The dataset ID to evaluate
|
30
|
+
# @param evaluators [Array<Evaluator>] The evaluators to use
|
31
|
+
# @param experiment_name [String] Name for the experiment
|
32
|
+
# @param target_llm [Object] Optional LLM to use for generating predictions
|
33
|
+
# @return [Hash] Evaluation results
|
34
|
+
def self.evaluate_dataset(dataset_id, evaluators, experiment_name: nil, target_llm: nil)
|
35
|
+
experiment_name ||= "Evaluation #{Time.now.utc.iso8601}"
|
36
|
+
|
37
|
+
results = {
|
38
|
+
experiment_name: experiment_name,
|
39
|
+
dataset_id: dataset_id,
|
40
|
+
evaluators: evaluators.map { |e| e.class.name },
|
41
|
+
results: []
|
42
|
+
}
|
43
|
+
|
44
|
+
evaluators.each do |evaluator|
|
45
|
+
evaluator_results = evaluator.evaluate_dataset(dataset_id, experiment_name, target_llm)
|
46
|
+
results[:results] << evaluator_results
|
47
|
+
end
|
48
|
+
|
49
|
+
results
|
50
|
+
end
|
51
|
+
|
52
|
+
# Run a batch evaluation on runs
|
53
|
+
# @param run_ids [Array<String>] The run IDs to evaluate
|
54
|
+
# @param evaluators [Array<Evaluator>] The evaluators to use
|
55
|
+
# @param references [Hash<String, Object>] Map of run IDs to references
|
56
|
+
# @return [Hash] Evaluation results
|
57
|
+
def self.evaluate_runs(run_ids, evaluators, references = {})
|
58
|
+
results = {
|
59
|
+
run_ids: run_ids,
|
60
|
+
evaluators: evaluators.map { |e| e.class.name },
|
61
|
+
results: {}
|
62
|
+
}
|
63
|
+
|
64
|
+
run_ids.each do |run_id|
|
65
|
+
results[:results][run_id] = {}
|
66
|
+
|
67
|
+
evaluators.each do |evaluator|
|
68
|
+
reference = references[run_id]
|
69
|
+
results[:results][run_id][evaluator.class.name] = evaluator.evaluate_run(run_id, reference)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
results
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "rails/generators"
|
4
|
+
|
5
|
+
module LangsmithrbRails
|
6
|
+
module Generators
|
7
|
+
# Generator for adding LangSmith support to a Rails application
|
8
|
+
class LangsmithGenerator < Rails::Generators::Base
|
9
|
+
source_root File.expand_path("templates", __dir__)
|
10
|
+
desc "Adds LangSmith support to your Rails application"
|
11
|
+
|
12
|
+
def create_initializer
|
13
|
+
template "langsmith_initializer.rb", "config/initializers/langsmith.rb"
|
14
|
+
end
|
15
|
+
|
16
|
+
def update_env_example
|
17
|
+
create_file ".env.example" unless File.exist?(".env.example")
|
18
|
+
|
19
|
+
# Check if LangSmith config is already in the file
|
20
|
+
env_content = File.exist?(".env.example") ? File.read(".env.example") : ""
|
21
|
+
return if env_content.include?("LANGSMITH_API_KEY")
|
22
|
+
|
23
|
+
append_to_file ".env.example" do
|
24
|
+
# Add a newline if the file doesn't end with one
|
25
|
+
(env_content.end_with?("\n") ? "" : "\n") +
|
26
|
+
<<~ENV
|
27
|
+
# LangSmith configuration
|
28
|
+
LANGSMITH_API_KEY=
|
29
|
+
LANGSMITH_PROJECT=
|
30
|
+
ENV
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def update_gitignore
|
35
|
+
create_file ".gitignore" unless File.exist?(".gitignore")
|
36
|
+
|
37
|
+
# Check if .env is already in gitignore
|
38
|
+
gitignore_content = File.exist?(".gitignore") ? File.read(".gitignore") : ""
|
39
|
+
return if gitignore_content.match?(/^\.env\s*$/)
|
40
|
+
|
41
|
+
append_to_file ".gitignore" do
|
42
|
+
# Add a newline if the file doesn't end with one
|
43
|
+
(gitignore_content.end_with?("\n") ? "" : "\n") +
|
44
|
+
<<~GITIGNORE
|
45
|
+
# LangSmith
|
46
|
+
.env
|
47
|
+
GITIGNORE
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def display_post_install_message
|
52
|
+
say "\nš LangSmith support has been added to your Rails application!", :green
|
53
|
+
say "\nTo enable LangSmith tracing, add the following to your .env file:", :yellow
|
54
|
+
say "LANGSMITH_API_KEY=your_api_key", :yellow
|
55
|
+
say "LANGSMITH_PROJECT=your_project_name (optional)", :yellow
|
56
|
+
say "\nYou can get your API key from https://smith.langchain.com/", :yellow
|
57
|
+
say "\nRestart your Rails server to apply the changes.", :yellow
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Configure LangSmith using the langsmithrb gem
|
4
|
+
|
5
|
+
# First, configure the langsmithrb gem directly
|
6
|
+
Langsmithrb.configure do |config|
|
7
|
+
config.api_key = ENV["LANGSMITH_API_KEY"]
|
8
|
+
config.project_name = ENV["LANGSMITH_PROJECT"]
|
9
|
+
config.tracing_enabled = ENV["LANGSMITH_API_KEY"].present?
|
10
|
+
end
|
11
|
+
|
12
|
+
# Then, configure our Rails integration
|
13
|
+
LangsmithrbRails.configure do |config|
|
14
|
+
# Enable LangSmith tracing
|
15
|
+
config.enabled = ENV["LANGSMITH_API_KEY"].present?
|
16
|
+
|
17
|
+
# Your LangSmith API key from https://smith.langchain.com/
|
18
|
+
config.api_key = ENV["LANGSMITH_API_KEY"]
|
19
|
+
|
20
|
+
# Optional: The project name to use for LangSmith traces
|
21
|
+
config.project_name = ENV["LANGSMITH_PROJECT"]
|
22
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "langsmithrb"
|
4
|
+
|
5
|
+
module LangsmithrbRails
|
6
|
+
# LangSmith integration for Rails applications using the langsmithrb gem
|
7
|
+
module LangSmith
|
8
|
+
class << self
|
9
|
+
# Configure LangSmith tracing
|
10
|
+
# This method sets up LangSmith tracing using the langsmithrb gem
|
11
|
+
# @param api_key [String] LangSmith API key
|
12
|
+
# @param project_name [String] Optional project name for LangSmith traces
|
13
|
+
# @param tracing [Boolean] Whether to enable tracing (default: true)
|
14
|
+
def configure(api_key:, project_name: nil, tracing: true)
|
15
|
+
Langsmithrb.configure do |config|
|
16
|
+
config.api_key = api_key
|
17
|
+
config.project_name = project_name if project_name
|
18
|
+
config.tracing_enabled = tracing
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Check if LangSmith tracing is enabled
|
23
|
+
# @return [Boolean] Whether LangSmith tracing is enabled
|
24
|
+
def enabled?
|
25
|
+
Langsmithrb.config.tracing_enabled && !Langsmithrb.config.api_key.nil?
|
26
|
+
end
|
27
|
+
|
28
|
+
# Get the current LangSmith project name
|
29
|
+
# @return [String, nil] The current LangSmith project name or nil if not set
|
30
|
+
def project_name
|
31
|
+
Langsmithrb.config.project_name
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|