langsmithrb_rails 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/.rspec +3 -0
  3. data/.rspec_status +161 -0
  4. data/CHANGELOG.md +38 -0
  5. data/Gemfile +20 -0
  6. data/Gemfile.lock +321 -0
  7. data/LICENSE +21 -0
  8. data/README.md +421 -0
  9. data/Rakefile +10 -0
  10. data/langsmithrb_rails-0.1.0.gem +0 -0
  11. data/langsmithrb_rails-0.1.1.gem +0 -0
  12. data/langsmithrb_rails.gemspec +45 -0
  13. data/lib/generators/langsmithrb_rails/buffer/buffer_generator.rb +94 -0
  14. data/lib/generators/langsmithrb_rails/buffer/templates/create_langsmith_run_buffers.rb +29 -0
  15. data/lib/generators/langsmithrb_rails/buffer/templates/flush_buffer_job.rb +40 -0
  16. data/lib/generators/langsmithrb_rails/buffer/templates/langsmith.rake +71 -0
  17. data/lib/generators/langsmithrb_rails/buffer/templates/langsmith_run_buffer.rb +70 -0
  18. data/lib/generators/langsmithrb_rails/buffer/templates/migration.rb +28 -0
  19. data/lib/generators/langsmithrb_rails/ci/ci_generator.rb +37 -0
  20. data/lib/generators/langsmithrb_rails/ci/templates/langsmith-evals.yml +85 -0
  21. data/lib/generators/langsmithrb_rails/ci/templates/langsmith_export_summary.rb +81 -0
  22. data/lib/generators/langsmithrb_rails/demo/demo_generator.rb +81 -0
  23. data/lib/generators/langsmithrb_rails/demo/templates/chat_controller.js +88 -0
  24. data/lib/generators/langsmithrb_rails/demo/templates/chat_controller.rb +58 -0
  25. data/lib/generators/langsmithrb_rails/demo/templates/chat_message.rb +24 -0
  26. data/lib/generators/langsmithrb_rails/demo/templates/create_chat_messages.rb +19 -0
  27. data/lib/generators/langsmithrb_rails/demo/templates/index.html.erb +180 -0
  28. data/lib/generators/langsmithrb_rails/demo/templates/llm_service.rb +165 -0
  29. data/lib/generators/langsmithrb_rails/evals/evals_generator.rb +52 -0
  30. data/lib/generators/langsmithrb_rails/evals/templates/checks/correctness.rb +71 -0
  31. data/lib/generators/langsmithrb_rails/evals/templates/checks/llm_graded.rb +137 -0
  32. data/lib/generators/langsmithrb_rails/evals/templates/datasets/sample.yml +60 -0
  33. data/lib/generators/langsmithrb_rails/evals/templates/langsmith_evals.rake +255 -0
  34. data/lib/generators/langsmithrb_rails/evals/templates/targets/http.rb +120 -0
  35. data/lib/generators/langsmithrb_rails/evals/templates/targets/ruby.rb +136 -0
  36. data/lib/generators/langsmithrb_rails/install/install_generator.rb +35 -0
  37. data/lib/generators/langsmithrb_rails/install/templates/config.yml +45 -0
  38. data/lib/generators/langsmithrb_rails/install/templates/initializer.rb +34 -0
  39. data/lib/generators/langsmithrb_rails/privacy/privacy_generator.rb +39 -0
  40. data/lib/generators/langsmithrb_rails/privacy/templates/custom_redactor.rb +132 -0
  41. data/lib/generators/langsmithrb_rails/privacy/templates/privacy.yml +88 -0
  42. data/lib/generators/langsmithrb_rails/privacy/templates/privacy_initializer.rb +41 -0
  43. data/lib/generators/langsmithrb_rails/tracing/templates/langsmith_traced.rb +146 -0
  44. data/lib/generators/langsmithrb_rails/tracing/templates/langsmith_traced_job.rb +151 -0
  45. data/lib/generators/langsmithrb_rails/tracing/templates/request_tracing.rb +117 -0
  46. data/lib/generators/langsmithrb_rails/tracing/tracing_generator.rb +78 -0
  47. data/lib/langsmithrb_rails/client.rb +292 -0
  48. data/lib/langsmithrb_rails/config.rb +169 -0
  49. data/lib/langsmithrb_rails/evaluation/evaluator.rb +178 -0
  50. data/lib/langsmithrb_rails/evaluation/llm_evaluator.rb +154 -0
  51. data/lib/langsmithrb_rails/evaluation/string_evaluator.rb +158 -0
  52. data/lib/langsmithrb_rails/evaluation.rb +76 -0
  53. data/lib/langsmithrb_rails/generators/langsmithrb_rails/langsmith_generator.rb +61 -0
  54. data/lib/langsmithrb_rails/generators/langsmithrb_rails/templates/langsmith_initializer.rb +22 -0
  55. data/lib/langsmithrb_rails/langsmith.rb +35 -0
  56. data/lib/langsmithrb_rails/otel/exporter.rb +120 -0
  57. data/lib/langsmithrb_rails/otel.rb +135 -0
  58. data/lib/langsmithrb_rails/railtie.rb +33 -0
  59. data/lib/langsmithrb_rails/redactor.rb +76 -0
  60. data/lib/langsmithrb_rails/run_trees.rb +157 -0
  61. data/lib/langsmithrb_rails/version.rb +5 -0
  62. data/lib/langsmithrb_rails/wrappers/anthropic.rb +146 -0
  63. data/lib/langsmithrb_rails/wrappers/base.rb +81 -0
  64. data/lib/langsmithrb_rails/wrappers/llm.rb +151 -0
  65. data/lib/langsmithrb_rails/wrappers/openai.rb +193 -0
  66. data/lib/langsmithrb_rails/wrappers.rb +41 -0
  67. data/lib/langsmithrb_rails.rb +151 -0
  68. data/pkg/langsmithrb_rails-0.3.0.gem +0 -0
  69. metadata +74 -7
@@ -0,0 +1,178 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LangsmithrbRails
4
+ module Evaluation
5
+ # Base evaluator class
6
+ class Evaluator
7
+ attr_reader :client, :project_name, :tags
8
+
9
+ # Initialize a new evaluator
10
+ # @param client [LangsmithrbRails::Client] LangSmith client
11
+ # @param project_name [String] Optional project name for evaluations
12
+ # @param tags [Array<String>] Optional tags for evaluations
13
+ def initialize(client: nil, project_name: nil, tags: [])
14
+ @client = client || LangsmithrbRails::Client.new
15
+ @project_name = project_name
16
+ @tags = tags
17
+ end
18
+
19
+ # Evaluate a prediction against a reference
20
+ # @param prediction [String, Hash] The prediction to evaluate
21
+ # @param reference [String, Hash] The reference to compare against
22
+ # @param input [Hash] Optional input that generated the prediction
23
+ # @return [Hash] Evaluation result with score and metadata
24
+ def evaluate(prediction, reference = nil, input = nil)
25
+ raise NotImplementedError, "Subclasses must implement evaluate method"
26
+ end
27
+
28
+ # Evaluate a run
29
+ # @param run_id [String] The ID of the run to evaluate
30
+ # @param reference [String, Hash] The reference to compare against
31
+ # @return [Hash] Evaluation result with score and metadata
32
+ def evaluate_run(run_id, reference = nil)
33
+ # Get the run
34
+ response = client.get_run(run_id)
35
+
36
+ unless response[:status] >= 200 && response[:status] < 300
37
+ raise "Failed to get run: #{response[:error] || response[:body]}"
38
+ end
39
+
40
+ run = response[:body]
41
+
42
+ # Extract prediction from run outputs
43
+ prediction = extract_prediction_from_run(run)
44
+
45
+ # Extract input from run inputs
46
+ input = run["inputs"]
47
+
48
+ # Evaluate
49
+ result = evaluate(prediction, reference, input)
50
+
51
+ # Create feedback
52
+ create_feedback(run_id, result)
53
+
54
+ result
55
+ end
56
+
57
+ # Evaluate multiple runs
58
+ # @param run_ids [Array<String>] The IDs of the runs to evaluate
59
+ # @param references [Hash<String, Object>] Map of run IDs to references
60
+ # @return [Hash<String, Hash>] Map of run IDs to evaluation results
61
+ def evaluate_runs(run_ids, references = {})
62
+ results = {}
63
+
64
+ run_ids.each do |run_id|
65
+ reference = references[run_id]
66
+ results[run_id] = evaluate_run(run_id, reference)
67
+ end
68
+
69
+ results
70
+ end
71
+
72
+ # Evaluate a dataset
73
+ # @param dataset_id [String] The ID of the dataset to evaluate
74
+ # @param experiment_name [String] Name for the experiment
75
+ # @param target_llm [Object] Optional LLM to use for generating predictions
76
+ # @return [Hash] Evaluation results for the dataset
77
+ def evaluate_dataset(dataset_id, experiment_name, target_llm = nil)
78
+ # Get the dataset examples
79
+ response = client.list_examples(dataset_id)
80
+
81
+ unless response[:status] >= 200 && response[:status] < 300
82
+ raise "Failed to get dataset examples: #{response[:error] || response[:body]}"
83
+ end
84
+
85
+ examples = response[:body]
86
+
87
+ results = {
88
+ experiment_name: experiment_name,
89
+ dataset_id: dataset_id,
90
+ evaluator_name: self.class.name,
91
+ results: []
92
+ }
93
+
94
+ examples.each do |example|
95
+ # If target LLM is provided, generate a prediction
96
+ if target_llm
97
+ prediction = generate_prediction(target_llm, example["inputs"])
98
+ else
99
+ # Otherwise use the example's outputs as the prediction
100
+ prediction = example["outputs"]
101
+ end
102
+
103
+ # Evaluate
104
+ result = evaluate(prediction, example["outputs"], example["inputs"])
105
+
106
+ results[:results] << {
107
+ example_id: example["id"],
108
+ score: result[:score],
109
+ metadata: result[:metadata]
110
+ }
111
+ end
112
+
113
+ results
114
+ end
115
+
116
+ private
117
+
118
+ # Extract prediction from run outputs
119
+ # @param run [Hash] The run data
120
+ # @return [Object] The prediction
121
+ def extract_prediction_from_run(run)
122
+ outputs = run["outputs"] || {}
123
+
124
+ # Try common output keys
125
+ %w[output response result text completion answer].each do |key|
126
+ return outputs[key] if outputs.key?(key)
127
+ end
128
+
129
+ # If no common keys found, return the entire outputs
130
+ outputs
131
+ end
132
+
133
+ # Generate a prediction using an LLM
134
+ # @param llm [Object] The LLM to use
135
+ # @param input [Hash] The input to generate from
136
+ # @return [Object] The generated prediction
137
+ def generate_prediction(llm, input)
138
+ if llm.respond_to?(:call)
139
+ llm.call(input)
140
+ elsif llm.respond_to?(:generate)
141
+ llm.generate(input)
142
+ elsif llm.respond_to?(:complete)
143
+ llm.complete(input)
144
+ else
145
+ raise "Unsupported LLM interface"
146
+ end
147
+ end
148
+
149
+ # Create feedback for a run
150
+ # @param run_id [String] The ID of the run
151
+ # @param result [Hash] The evaluation result
152
+ # @return [Hash] The created feedback
153
+ def create_feedback(run_id, result)
154
+ feedback_data = {
155
+ run_id: run_id,
156
+ key: self.class.name.split("::").last.downcase,
157
+ score: result[:score],
158
+ value: result[:metadata],
159
+ comment: result[:comment]
160
+ }.compact
161
+
162
+ response = client.create_feedback(
163
+ feedback_data[:run_id],
164
+ feedback_data[:key],
165
+ feedback_data[:score],
166
+ value: feedback_data[:value],
167
+ comment: feedback_data[:comment]
168
+ )
169
+
170
+ unless response[:status] >= 200 && response[:status] < 300
171
+ LangsmithrbRails.logger.error("Failed to create feedback: #{response[:error] || response[:body]}")
172
+ end
173
+
174
+ response[:body]
175
+ end
176
+ end
177
+ end
178
+ end
@@ -0,0 +1,154 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "evaluator"
4
+
5
+ module LangsmithrbRails
6
+ module Evaluation
7
+ # Evaluator that uses an LLM to evaluate responses
8
+ class LLMEvaluator < Evaluator
9
+ # Initialize a new LLM evaluator
10
+ # @param llm [Object] The LLM to use for evaluation
11
+ # @param criteria [String] Evaluation criteria
12
+ # @param client [LangsmithrbRails::Client] LangSmith client
13
+ # @param project_name [String] Optional project name for evaluations
14
+ # @param tags [Array<String>] Optional tags for evaluations
15
+ def initialize(llm:, criteria: nil, client: nil, project_name: nil, tags: [])
16
+ super(client: client, project_name: project_name, tags: tags)
17
+ @llm = llm
18
+ @criteria = criteria || "Evaluate the response for accuracy, relevance, and completeness."
19
+ end
20
+
21
+ # Evaluate a prediction against a reference
22
+ # @param prediction [String, Hash] The prediction to evaluate
23
+ # @param reference [String, Hash] The reference to compare against
24
+ # @param input [Hash] Optional input that generated the prediction
25
+ # @return [Hash] Evaluation result with score and metadata
26
+ def evaluate(prediction, reference = nil, input = nil)
27
+ # Extract strings
28
+ prediction_str = extract_string(prediction)
29
+ reference_str = extract_string(reference)
30
+ input_str = input.is_a?(Hash) ? input.to_json : input.to_s if input
31
+
32
+ # Create evaluation prompt
33
+ prompt = create_evaluation_prompt(prediction_str, reference_str, input_str)
34
+
35
+ # Get evaluation from LLM
36
+ begin
37
+ evaluation = get_llm_evaluation(prompt)
38
+
39
+ # Parse the evaluation
40
+ score, feedback = parse_evaluation(evaluation)
41
+
42
+ {
43
+ score: score,
44
+ metadata: {
45
+ feedback: feedback,
46
+ criteria: @criteria,
47
+ evaluation: evaluation
48
+ }
49
+ }
50
+ rescue => e
51
+ {
52
+ score: nil,
53
+ metadata: {
54
+ error: "Evaluation failed: #{e.message}"
55
+ }
56
+ }
57
+ end
58
+ end
59
+
60
+ private
61
+
62
+ # Extract a string from an object
63
+ # @param obj [Object] The object to extract a string from
64
+ # @return [String] The extracted string
65
+ def extract_string(obj)
66
+ return nil if obj.nil?
67
+
68
+ case obj
69
+ when String
70
+ obj
71
+ when Hash
72
+ # Try common output keys
73
+ %w[output response result text completion answer].each do |key|
74
+ return obj[key].to_s if obj.key?(key)
75
+ end
76
+ obj.to_json
77
+ else
78
+ obj.to_s
79
+ end
80
+ end
81
+
82
+ # Create an evaluation prompt for the LLM
83
+ # @param prediction [String] The prediction string
84
+ # @param reference [String] The reference string
85
+ # @param input [String] The input string
86
+ # @return [String] The evaluation prompt
87
+ def create_evaluation_prompt(prediction, reference, input)
88
+ prompt = "You are an impartial evaluator. Your task is to evaluate the quality of a response.\n\n"
89
+
90
+ if input
91
+ prompt += "Input:\n#{input}\n\n"
92
+ end
93
+
94
+ prompt += "Response to evaluate:\n#{prediction}\n\n"
95
+
96
+ if reference
97
+ prompt += "Reference (correct) response:\n#{reference}\n\n"
98
+ end
99
+
100
+ prompt += "Evaluation criteria:\n#{@criteria}\n\n"
101
+ prompt += "Please provide a score between 0.0 and 1.0, where 1.0 is perfect, and detailed feedback.\n"
102
+ prompt += "Format your response as:\nScore: [score between 0.0 and 1.0]\nFeedback: [your detailed feedback]"
103
+
104
+ prompt
105
+ end
106
+
107
+ # Get evaluation from LLM
108
+ # @param prompt [String] The evaluation prompt
109
+ # @return [String] The LLM's evaluation
110
+ # @raise [RuntimeError] If the LLM interface is not supported
111
+ def get_llm_evaluation(prompt)
112
+ result = if @llm.respond_to?(:call)
113
+ @llm.call(prompt)
114
+ elsif @llm.respond_to?(:generate)
115
+ @llm.generate(prompt)
116
+ elsif @llm.respond_to?(:complete)
117
+ @llm.complete(prompt)
118
+ elsif @llm.respond_to?(:chat)
119
+ @llm.chat(messages: [{ role: "user", content: prompt }])
120
+ else
121
+ raise "Unsupported LLM interface"
122
+ end
123
+
124
+ # Extract content from response if it's a hash
125
+ if result.is_a?(Hash)
126
+ result[:content] || result["content"] || result[:text] || result["text"] ||
127
+ result[:completion] || result["completion"] || result.to_s
128
+ else
129
+ result.to_s
130
+ end
131
+ end
132
+
133
+ # Parse the LLM's evaluation
134
+ # @param evaluation [String] The LLM's evaluation
135
+ # @return [Array] Score and feedback
136
+ def parse_evaluation(evaluation)
137
+ # Extract score
138
+ score_match = evaluation.match(/Score:\s*(\d+(\.\d+)?)/)
139
+ score = score_match ? score_match[1].to_f : nil
140
+
141
+ # Validate score
142
+ if score.nil? || score < 0.0 || score > 1.0
143
+ score = 0.5 # Default to middle score if invalid
144
+ end
145
+
146
+ # Extract feedback
147
+ feedback_match = evaluation.match(/Feedback:\s*(.+)/m)
148
+ feedback = feedback_match ? feedback_match[1].strip : evaluation
149
+
150
+ [score, feedback]
151
+ end
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,158 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "evaluator"
4
+
5
+ module LangsmithrbRails
6
+ module Evaluation
7
+ # Evaluator for string comparisons
8
+ class StringEvaluator < Evaluator
9
+ # Initialize a new string evaluator
10
+ # @param match_type [Symbol] Type of string matching (:exact, :contains, :regex)
11
+ # @param case_sensitive [Boolean] Whether to perform case-sensitive matching
12
+ # @param client [LangsmithrbRails::Client] LangSmith client
13
+ # @param project_name [String] Optional project name for evaluations
14
+ # @param tags [Array<String>] Optional tags for evaluations
15
+ def initialize(match_type: :exact, case_sensitive: true, client: nil, project_name: nil, tags: [])
16
+ super(client: client, project_name: project_name, tags: tags)
17
+ @match_type = match_type
18
+ @case_sensitive = case_sensitive
19
+ end
20
+
21
+ # Evaluate a prediction against a reference
22
+ # @param prediction [String] The prediction to evaluate
23
+ # @param reference [String] The reference to compare against
24
+ # @param input [Hash] Optional input that generated the prediction
25
+ # @return [Hash] Evaluation result with score and metadata
26
+ def evaluate(prediction, reference = nil, input = nil)
27
+ # Extract strings from prediction and reference
28
+ prediction_str = extract_string(prediction)
29
+ reference_str = extract_string(reference)
30
+
31
+ # If reference is nil, we can't evaluate
32
+ if reference_str.nil?
33
+ return {
34
+ score: nil,
35
+ metadata: {
36
+ error: "No reference provided for evaluation"
37
+ }
38
+ }
39
+ end
40
+
41
+ # Prepare strings for comparison
42
+ unless @case_sensitive
43
+ prediction_str = prediction_str.downcase
44
+ reference_str = reference_str.downcase
45
+ end
46
+
47
+ # Perform comparison based on match type
48
+ score, metadata = case @match_type
49
+ when :exact
50
+ exact_match(prediction_str, reference_str)
51
+ when :contains
52
+ contains_match(prediction_str, reference_str)
53
+ when :regex
54
+ regex_match(prediction_str, reference_str)
55
+ else
56
+ [0.0, { error: "Unknown match type: #{@match_type}" }]
57
+ end
58
+
59
+ {
60
+ score: score,
61
+ metadata: metadata
62
+ }
63
+ end
64
+
65
+ private
66
+
67
+ # Extract a string from an object
68
+ # @param obj [Object] The object to extract a string from
69
+ # @return [String] The extracted string
70
+ def extract_string(obj)
71
+ return nil if obj.nil?
72
+
73
+ case obj
74
+ when String
75
+ obj
76
+ when Hash
77
+ # Try common output keys
78
+ %w[output response result text completion answer].each do |key|
79
+ return obj[key].to_s if obj.key?(key) || obj.key?(key.to_sym)
80
+ end
81
+
82
+ # Try to find common keys between prediction and reference
83
+ if obj.key?(:prediction) && obj.key?(:reference)
84
+ return obj[:prediction].to_s
85
+ end
86
+
87
+ # If it's a simple hash with a single value, use that
88
+ return obj.values.first.to_s if obj.size == 1
89
+
90
+ # Otherwise convert the whole hash to a string
91
+ obj.to_s
92
+ else
93
+ obj.to_s
94
+ end
95
+ end
96
+
97
+ # Perform exact string matching
98
+ # @param prediction [String] The prediction string
99
+ # @param reference [String] The reference string
100
+ # @return [Array] Score and metadata
101
+ def exact_match(prediction, reference)
102
+ match = prediction == reference
103
+ [
104
+ match ? 1.0 : 0.0,
105
+ {
106
+ match: match,
107
+ match_type: "exact",
108
+ case_sensitive: @case_sensitive
109
+ }
110
+ ]
111
+ end
112
+
113
+ # Perform contains string matching
114
+ # @param prediction [String] The prediction string
115
+ # @param reference [String] The reference string
116
+ # @return [Array] Score and metadata
117
+ def contains_match(prediction, reference)
118
+ match = prediction.include?(reference)
119
+ [
120
+ match ? 1.0 : 0.0,
121
+ {
122
+ match: match,
123
+ match_type: "contains",
124
+ case_sensitive: @case_sensitive
125
+ }
126
+ ]
127
+ end
128
+
129
+ # Perform regex string matching
130
+ # @param prediction [String] The prediction string
131
+ # @param reference [String] The reference string (as regex pattern)
132
+ # @return [Array] Score and metadata
133
+ def regex_match(prediction, reference)
134
+ begin
135
+ regex = Regexp.new(reference, @case_sensitive ? nil : Regexp::IGNORECASE)
136
+ match = regex.match?(prediction)
137
+ [
138
+ match ? 1.0 : 0.0,
139
+ {
140
+ match: match,
141
+ match_type: "regex",
142
+ case_sensitive: @case_sensitive
143
+ }
144
+ ]
145
+ rescue RegexpError => e
146
+ [
147
+ 0.0,
148
+ {
149
+ error: "Invalid regex pattern: #{e.message}",
150
+ match_type: "regex",
151
+ case_sensitive: @case_sensitive
152
+ }
153
+ ]
154
+ end
155
+ end
156
+ end
157
+ end
158
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "evaluation/evaluator"
4
+ require_relative "evaluation/string_evaluator"
5
+ require_relative "evaluation/llm_evaluator"
6
+
7
+ module LangsmithrbRails
8
+ # Evaluation framework for LangSmith
9
+ module Evaluation
10
+ # Create a new evaluator
11
+ # @param type [Symbol] Type of evaluator (:string, :llm)
12
+ # @param options [Hash] Options for the evaluator
13
+ # @return [Evaluator] The evaluator instance
14
+ def self.create(type, **options)
15
+ case type
16
+ when :string
17
+ StringEvaluator.new(**options)
18
+ when :llm
19
+ unless options[:llm]
20
+ raise ArgumentError, "LLM evaluator requires an :llm option"
21
+ end
22
+ LLMEvaluator.new(**options)
23
+ else
24
+ raise ArgumentError, "Unknown evaluator type: #{type}"
25
+ end
26
+ end
27
+
28
+ # Run a batch evaluation on a dataset
29
+ # @param dataset_id [String] The dataset ID to evaluate
30
+ # @param evaluators [Array<Evaluator>] The evaluators to use
31
+ # @param experiment_name [String] Name for the experiment
32
+ # @param target_llm [Object] Optional LLM to use for generating predictions
33
+ # @return [Hash] Evaluation results
34
+ def self.evaluate_dataset(dataset_id, evaluators, experiment_name: nil, target_llm: nil)
35
+ experiment_name ||= "Evaluation #{Time.now.utc.iso8601}"
36
+
37
+ results = {
38
+ experiment_name: experiment_name,
39
+ dataset_id: dataset_id,
40
+ evaluators: evaluators.map { |e| e.class.name },
41
+ results: []
42
+ }
43
+
44
+ evaluators.each do |evaluator|
45
+ evaluator_results = evaluator.evaluate_dataset(dataset_id, experiment_name, target_llm)
46
+ results[:results] << evaluator_results
47
+ end
48
+
49
+ results
50
+ end
51
+
52
+ # Run a batch evaluation on runs
53
+ # @param run_ids [Array<String>] The run IDs to evaluate
54
+ # @param evaluators [Array<Evaluator>] The evaluators to use
55
+ # @param references [Hash<String, Object>] Map of run IDs to references
56
+ # @return [Hash] Evaluation results
57
+ def self.evaluate_runs(run_ids, evaluators, references = {})
58
+ results = {
59
+ run_ids: run_ids,
60
+ evaluators: evaluators.map { |e| e.class.name },
61
+ results: {}
62
+ }
63
+
64
+ run_ids.each do |run_id|
65
+ results[:results][run_id] = {}
66
+
67
+ evaluators.each do |evaluator|
68
+ reference = references[run_id]
69
+ results[:results][run_id][evaluator.class.name] = evaluator.evaluate_run(run_id, reference)
70
+ end
71
+ end
72
+
73
+ results
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rails/generators"
4
+
5
+ module LangsmithrbRails
6
+ module Generators
7
+ # Generator for adding LangSmith support to a Rails application
8
+ class LangsmithGenerator < Rails::Generators::Base
9
+ source_root File.expand_path("templates", __dir__)
10
+ desc "Adds LangSmith support to your Rails application"
11
+
12
+ def create_initializer
13
+ template "langsmith_initializer.rb", "config/initializers/langsmith.rb"
14
+ end
15
+
16
+ def update_env_example
17
+ create_file ".env.example" unless File.exist?(".env.example")
18
+
19
+ # Check if LangSmith config is already in the file
20
+ env_content = File.exist?(".env.example") ? File.read(".env.example") : ""
21
+ return if env_content.include?("LANGSMITH_API_KEY")
22
+
23
+ append_to_file ".env.example" do
24
+ # Add a newline if the file doesn't end with one
25
+ (env_content.end_with?("\n") ? "" : "\n") +
26
+ <<~ENV
27
+ # LangSmith configuration
28
+ LANGSMITH_API_KEY=
29
+ LANGSMITH_PROJECT=
30
+ ENV
31
+ end
32
+ end
33
+
34
+ def update_gitignore
35
+ create_file ".gitignore" unless File.exist?(".gitignore")
36
+
37
+ # Check if .env is already in gitignore
38
+ gitignore_content = File.exist?(".gitignore") ? File.read(".gitignore") : ""
39
+ return if gitignore_content.match?(/^\.env\s*$/)
40
+
41
+ append_to_file ".gitignore" do
42
+ # Add a newline if the file doesn't end with one
43
+ (gitignore_content.end_with?("\n") ? "" : "\n") +
44
+ <<~GITIGNORE
45
+ # LangSmith
46
+ .env
47
+ GITIGNORE
48
+ end
49
+ end
50
+
51
+ def display_post_install_message
52
+ say "\nšŸŽ‰ LangSmith support has been added to your Rails application!", :green
53
+ say "\nTo enable LangSmith tracing, add the following to your .env file:", :yellow
54
+ say "LANGSMITH_API_KEY=your_api_key", :yellow
55
+ say "LANGSMITH_PROJECT=your_project_name (optional)", :yellow
56
+ say "\nYou can get your API key from https://smith.langchain.com/", :yellow
57
+ say "\nRestart your Rails server to apply the changes.", :yellow
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Configure LangSmith using the langsmithrb gem
4
+
5
+ # First, configure the langsmithrb gem directly
6
+ Langsmithrb.configure do |config|
7
+ config.api_key = ENV["LANGSMITH_API_KEY"]
8
+ config.project_name = ENV["LANGSMITH_PROJECT"]
9
+ config.tracing_enabled = ENV["LANGSMITH_API_KEY"].present?
10
+ end
11
+
12
+ # Then, configure our Rails integration
13
+ LangsmithrbRails.configure do |config|
14
+ # Enable LangSmith tracing
15
+ config.enabled = ENV["LANGSMITH_API_KEY"].present?
16
+
17
+ # Your LangSmith API key from https://smith.langchain.com/
18
+ config.api_key = ENV["LANGSMITH_API_KEY"]
19
+
20
+ # Optional: The project name to use for LangSmith traces
21
+ config.project_name = ENV["LANGSMITH_PROJECT"]
22
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "langsmithrb"
4
+
5
+ module LangsmithrbRails
6
+ # LangSmith integration for Rails applications using the langsmithrb gem
7
+ module LangSmith
8
+ class << self
9
+ # Configure LangSmith tracing
10
+ # This method sets up LangSmith tracing using the langsmithrb gem
11
+ # @param api_key [String] LangSmith API key
12
+ # @param project_name [String] Optional project name for LangSmith traces
13
+ # @param tracing [Boolean] Whether to enable tracing (default: true)
14
+ def configure(api_key:, project_name: nil, tracing: true)
15
+ Langsmithrb.configure do |config|
16
+ config.api_key = api_key
17
+ config.project_name = project_name if project_name
18
+ config.tracing_enabled = tracing
19
+ end
20
+ end
21
+
22
+ # Check if LangSmith tracing is enabled
23
+ # @return [Boolean] Whether LangSmith tracing is enabled
24
+ def enabled?
25
+ Langsmithrb.config.tracing_enabled && !Langsmithrb.config.api_key.nil?
26
+ end
27
+
28
+ # Get the current LangSmith project name
29
+ # @return [String, nil] The current LangSmith project name or nil if not set
30
+ def project_name
31
+ Langsmithrb.config.project_name
32
+ end
33
+ end
34
+ end
35
+ end