langfuse-ruby 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,278 @@
1
+ require 'faraday'
2
+ require 'faraday/net_http'
3
+ require 'json'
4
+ require 'base64'
5
+ require 'concurrent'
6
+
7
+ module Langfuse
8
+ class Client
9
+ attr_reader :public_key, :secret_key, :host, :debug, :timeout, :retries
10
+
11
+ def initialize(public_key: nil, secret_key: nil, host: nil, debug: false, timeout: 30, retries: 3)
12
+ @public_key = public_key || ENV['LANGFUSE_PUBLIC_KEY'] || Langfuse.configuration.public_key
13
+ @secret_key = secret_key || ENV['LANGFUSE_SECRET_KEY'] || Langfuse.configuration.secret_key
14
+ @host = host || ENV['LANGFUSE_HOST'] || Langfuse.configuration.host
15
+ @debug = debug || Langfuse.configuration.debug
16
+ @timeout = timeout || Langfuse.configuration.timeout
17
+ @retries = retries || Langfuse.configuration.retries
18
+
19
+ raise AuthenticationError, "Public key is required" unless @public_key
20
+ raise AuthenticationError, "Secret key is required" unless @secret_key
21
+
22
+ @connection = build_connection
23
+ @event_queue = Concurrent::Array.new
24
+ @flush_thread = start_flush_thread
25
+ end
26
+
27
+ # Trace operations
28
+ def trace(id: nil, name: nil, user_id: nil, session_id: nil, version: nil, release: nil,
29
+ input: nil, output: nil, metadata: nil, tags: nil, timestamp: nil, **kwargs)
30
+ Trace.new(
31
+ client: self,
32
+ id: id || Utils.generate_id,
33
+ name: name,
34
+ user_id: user_id,
35
+ session_id: session_id,
36
+ version: version,
37
+ release: release,
38
+ input: input,
39
+ output: output,
40
+ metadata: metadata,
41
+ tags: tags,
42
+ timestamp: timestamp || Utils.current_timestamp,
43
+ **kwargs
44
+ )
45
+ end
46
+
47
+ # Span operations
48
+ def span(trace_id:, name: nil, start_time: nil, end_time: nil, input: nil, output: nil,
49
+ metadata: nil, level: nil, status_message: nil, parent_observation_id: nil,
50
+ version: nil, **kwargs)
51
+ Span.new(
52
+ client: self,
53
+ trace_id: trace_id,
54
+ name: name,
55
+ start_time: start_time || Utils.current_timestamp,
56
+ end_time: end_time,
57
+ input: input,
58
+ output: output,
59
+ metadata: metadata,
60
+ level: level,
61
+ status_message: status_message,
62
+ parent_observation_id: parent_observation_id,
63
+ version: version,
64
+ **kwargs
65
+ )
66
+ end
67
+
68
+ # Generation operations
69
+ def generation(trace_id:, name: nil, start_time: nil, end_time: nil, completion_start_time: nil,
70
+ model: nil, model_parameters: nil, input: nil, output: nil, usage: nil,
71
+ metadata: nil, level: nil, status_message: nil, parent_observation_id: nil,
72
+ version: nil, **kwargs)
73
+ Generation.new(
74
+ client: self,
75
+ trace_id: trace_id,
76
+ name: name,
77
+ start_time: start_time || Utils.current_timestamp,
78
+ end_time: end_time,
79
+ completion_start_time: completion_start_time,
80
+ model: model,
81
+ model_parameters: model_parameters,
82
+ input: input,
83
+ output: output,
84
+ usage: usage,
85
+ metadata: metadata,
86
+ level: level,
87
+ status_message: status_message,
88
+ parent_observation_id: parent_observation_id,
89
+ version: version,
90
+ **kwargs
91
+ )
92
+ end
93
+
94
+ # Prompt operations
95
+ def get_prompt(name, version: nil, label: nil, cache_ttl_seconds: 60)
96
+ cache_key = "prompt:#{name}:#{version}:#{label}"
97
+
98
+ if cached_prompt = @prompt_cache&.dig(cache_key)
99
+ return cached_prompt[:prompt] if Time.now - cached_prompt[:cached_at] < cache_ttl_seconds
100
+ end
101
+
102
+ path = "/api/public/prompts/#{name}"
103
+ params = {}
104
+ params[:version] = version if version
105
+ params[:label] = label if label
106
+
107
+ response = get(path, params)
108
+ prompt = Prompt.new(response.body)
109
+
110
+ # Cache the prompt
111
+ @prompt_cache ||= {}
112
+ @prompt_cache[cache_key] = { prompt: prompt, cached_at: Time.now }
113
+
114
+ prompt
115
+ end
116
+
117
+ def create_prompt(name:, prompt:, labels: [], config: {}, **kwargs)
118
+ data = {
119
+ name: name,
120
+ prompt: prompt,
121
+ labels: labels,
122
+ config: config,
123
+ **kwargs
124
+ }
125
+
126
+ response = post("/api/public/prompts", data)
127
+ Prompt.new(response.body)
128
+ end
129
+
130
+ # Score/Evaluation operations
131
+ def score(trace_id: nil, observation_id: nil, name:, value:, data_type: nil, comment: nil, **kwargs)
132
+ data = {
133
+ name: name,
134
+ value: value,
135
+ data_type: data_type,
136
+ comment: comment,
137
+ **kwargs
138
+ }
139
+
140
+ data[:trace_id] = trace_id if trace_id
141
+ data[:observation_id] = observation_id if observation_id
142
+
143
+ enqueue_event('score-create', data)
144
+ end
145
+
146
+ # HTTP methods
147
+ def get(path, params = {})
148
+ request(:get, path, params: params)
149
+ end
150
+
151
+ def post(path, data = {})
152
+ request(:post, path, json: data)
153
+ end
154
+
155
+ def put(path, data = {})
156
+ request(:put, path, json: data)
157
+ end
158
+
159
+ def delete(path, params = {})
160
+ request(:delete, path, params: params)
161
+ end
162
+
163
+ def patch(path, data = {})
164
+ request(:patch, path, json: data)
165
+ end
166
+
167
+ # Event queue management
168
+ def enqueue_event(type, body)
169
+ event = {
170
+ id: Utils.generate_id,
171
+ type: type,
172
+ timestamp: Utils.current_timestamp,
173
+ body: Utils.deep_stringify_keys(body)
174
+ }
175
+
176
+ @event_queue << event
177
+ puts "Enqueued event: #{type}" if @debug
178
+ end
179
+
180
+ def flush
181
+ return if @event_queue.empty?
182
+
183
+ events = @event_queue.shift(@event_queue.length)
184
+ return if events.empty?
185
+
186
+ batch_data = {
187
+ batch: events,
188
+ metadata: {
189
+ batch_size: events.length,
190
+ sdk_name: "langfuse-ruby",
191
+ sdk_version: Langfuse::VERSION
192
+ }
193
+ }
194
+
195
+ begin
196
+ response = post("/api/public/ingestion", batch_data)
197
+ puts "Flushed #{events.length} events" if @debug
198
+ rescue => e
199
+ puts "Failed to flush events: #{e.message}" if @debug
200
+ # Re-queue events on failure
201
+ events.each { |event| @event_queue << event }
202
+ raise
203
+ end
204
+ end
205
+
206
+ def shutdown
207
+ @flush_thread&.kill
208
+ flush unless @event_queue.empty?
209
+ end
210
+
211
+ private
212
+
213
+ def build_connection
214
+ Faraday.new(url: @host) do |conn|
215
+ conn.request :authorization, :basic, @public_key, @secret_key
216
+ conn.request :json
217
+ conn.response :json, content_type: /\bjson$/
218
+ conn.adapter Faraday.default_adapter
219
+ conn.options.timeout = @timeout
220
+ end
221
+ end
222
+
223
+ def request(method, path, params: {}, json: nil)
224
+ retries_left = @retries
225
+
226
+ begin
227
+ response = @connection.send(method) do |req|
228
+ req.url path
229
+ req.params = params if params.any?
230
+ req.body = json if json
231
+ end
232
+
233
+ handle_response(response)
234
+ rescue Faraday::TimeoutError => e
235
+ raise TimeoutError, "Request timed out: #{e.message}"
236
+ rescue Faraday::ConnectionFailed => e
237
+ if retries_left > 0
238
+ retries_left -= 1
239
+ sleep(2 ** (@retries - retries_left))
240
+ retry
241
+ end
242
+ raise NetworkError, "Connection failed: #{e.message}"
243
+ rescue => e
244
+ raise APIError, "Request failed: #{e.message}"
245
+ end
246
+ end
247
+
248
+ def handle_response(response)
249
+ case response.status
250
+ when 200..299
251
+ response
252
+ when 401
253
+ raise AuthenticationError, "Authentication failed: #{response.body}"
254
+ when 429
255
+ raise RateLimitError, "Rate limit exceeded: #{response.body}"
256
+ when 400..499
257
+ raise ValidationError, "Client error: #{response.body}"
258
+ when 500..599
259
+ raise APIError, "Server error: #{response.body}"
260
+ else
261
+ raise APIError, "Unexpected response: #{response.status} #{response.body}"
262
+ end
263
+ end
264
+
265
+ def start_flush_thread
266
+ Thread.new do
267
+ loop do
268
+ sleep(5) # Flush every 5 seconds
269
+ begin
270
+ flush unless @event_queue.empty?
271
+ rescue => e
272
+ puts "Error in flush thread: #{e.message}" if @debug
273
+ end
274
+ end
275
+ end
276
+ end
277
+ end
278
+ end
@@ -0,0 +1,9 @@
1
+ module Langfuse
2
+ class Error < StandardError; end
3
+ class AuthenticationError < Error; end
4
+ class APIError < Error; end
5
+ class NetworkError < Error; end
6
+ class ValidationError < Error; end
7
+ class RateLimitError < Error; end
8
+ class TimeoutError < Error; end
9
+ end
@@ -0,0 +1,267 @@
1
+ module Langfuse
2
+ class Evaluation
3
+ attr_reader :id, :name, :value, :data_type, :comment, :trace_id, :observation_id, :created_at
4
+
5
+ def initialize(data)
6
+ @data = data.is_a?(Hash) ? Utils.deep_symbolize_keys(data) : data
7
+
8
+ @id = @data[:id]
9
+ @name = @data[:name]
10
+ @value = @data[:value]
11
+ @data_type = @data[:data_type]
12
+ @comment = @data[:comment]
13
+ @trace_id = @data[:trace_id]
14
+ @observation_id = @data[:observation_id]
15
+ @created_at = @data[:created_at]
16
+ end
17
+
18
+ def to_dict
19
+ {
20
+ id: @id,
21
+ name: @name,
22
+ value: @value,
23
+ data_type: @data_type,
24
+ comment: @comment,
25
+ trace_id: @trace_id,
26
+ observation_id: @observation_id,
27
+ created_at: @created_at
28
+ }.compact
29
+ end
30
+ end
31
+
32
+ class Score
33
+ attr_reader :id, :name, :value, :data_type, :comment, :trace_id, :observation_id, :created_at
34
+
35
+ def initialize(data)
36
+ @data = data.is_a?(Hash) ? Utils.deep_symbolize_keys(data) : data
37
+
38
+ @id = @data[:id]
39
+ @name = @data[:name]
40
+ @value = @data[:value]
41
+ @data_type = @data[:data_type]
42
+ @comment = @data[:comment]
43
+ @trace_id = @data[:trace_id]
44
+ @observation_id = @data[:observation_id]
45
+ @created_at = @data[:created_at]
46
+ end
47
+
48
+ def to_dict
49
+ {
50
+ id: @id,
51
+ name: @name,
52
+ value: @value,
53
+ data_type: @data_type,
54
+ comment: @comment,
55
+ trace_id: @trace_id,
56
+ observation_id: @observation_id,
57
+ created_at: @created_at
58
+ }.compact
59
+ end
60
+ end
61
+
62
+ module Evaluators
63
+ class BaseEvaluator
64
+ def initialize(name:, description: nil)
65
+ @name = name
66
+ @description = description
67
+ end
68
+
69
+ def evaluate(input, output, expected: nil, context: nil)
70
+ raise NotImplementedError, "Subclasses must implement evaluate method"
71
+ end
72
+
73
+ protected
74
+
75
+ def create_score(value:, data_type: 'NUMERIC', comment: nil)
76
+ {
77
+ name: @name,
78
+ value: value,
79
+ data_type: data_type,
80
+ comment: comment
81
+ }
82
+ end
83
+ end
84
+
85
+ class ExactMatchEvaluator < BaseEvaluator
86
+ def initialize(name: 'exact_match', description: 'Exact match evaluator')
87
+ super(name: name, description: description)
88
+ end
89
+
90
+ def evaluate(input, output, expected: nil, context: nil)
91
+ return create_score(value: 0, comment: 'No expected value provided') unless expected
92
+
93
+ score = output.to_s.strip == expected.to_s.strip ? 1 : 0
94
+ create_score(
95
+ value: score,
96
+ comment: score == 1 ? 'Exact match' : 'No match'
97
+ )
98
+ end
99
+ end
100
+
101
+ class ContainsEvaluator < BaseEvaluator
102
+ def initialize(name: 'contains', description: 'Contains evaluator', case_sensitive: false)
103
+ super(name: name, description: description)
104
+ @case_sensitive = case_sensitive
105
+ end
106
+
107
+ def evaluate(input, output, expected: nil, context: nil)
108
+ return create_score(value: 0, comment: 'No expected value provided') unless expected
109
+
110
+ output_str = @case_sensitive ? output.to_s : output.to_s.downcase
111
+ expected_str = @case_sensitive ? expected.to_s : expected.to_s.downcase
112
+
113
+ score = output_str.include?(expected_str) ? 1 : 0
114
+ create_score(
115
+ value: score,
116
+ comment: score == 1 ? 'Contains expected text' : 'Does not contain expected text'
117
+ )
118
+ end
119
+ end
120
+
121
+ class LengthEvaluator < BaseEvaluator
122
+ def initialize(name: 'length', description: 'Length evaluator', min_length: nil, max_length: nil)
123
+ super(name: name, description: description)
124
+ @min_length = min_length
125
+ @max_length = max_length
126
+ end
127
+
128
+ def evaluate(input, output, expected: nil, context: nil)
129
+ length = output.to_s.length
130
+
131
+ if @min_length && @max_length
132
+ score = (length >= @min_length && length <= @max_length) ? 1 : 0
133
+ comment = score == 1 ? "Length #{length} within range" : "Length #{length} outside range #{@min_length}-#{@max_length}"
134
+ elsif @min_length
135
+ score = length >= @min_length ? 1 : 0
136
+ comment = score == 1 ? "Length #{length} meets minimum" : "Length #{length} below minimum #{@min_length}"
137
+ elsif @max_length
138
+ score = length <= @max_length ? 1 : 0
139
+ comment = score == 1 ? "Length #{length} within maximum" : "Length #{length} exceeds maximum #{@max_length}"
140
+ else
141
+ score = length
142
+ comment = "Length: #{length}"
143
+ end
144
+
145
+ create_score(
146
+ value: score,
147
+ data_type: 'NUMERIC',
148
+ comment: comment
149
+ )
150
+ end
151
+ end
152
+
153
+ class RegexEvaluator < BaseEvaluator
154
+ def initialize(name: 'regex', description: 'Regex evaluator', pattern:)
155
+ super(name: name, description: description)
156
+ @pattern = pattern.is_a?(Regexp) ? pattern : Regexp.new(pattern)
157
+ end
158
+
159
+ def evaluate(input, output, expected: nil, context: nil)
160
+ match = @pattern.match(output.to_s)
161
+ score = match ? 1 : 0
162
+
163
+ create_score(
164
+ value: score,
165
+ comment: score == 1 ? 'Regex pattern matched' : 'Regex pattern not matched'
166
+ )
167
+ end
168
+ end
169
+
170
+ class SimilarityEvaluator < BaseEvaluator
171
+ def initialize(name: 'similarity', description: 'Similarity evaluator')
172
+ super(name: name, description: description)
173
+ end
174
+
175
+ def evaluate(input, output, expected: nil, context: nil)
176
+ return create_score(value: 0, comment: 'No expected value provided') unless expected
177
+
178
+ # Simple character-based similarity (Levenshtein distance)
179
+ similarity = calculate_similarity(output.to_s, expected.to_s)
180
+
181
+ create_score(
182
+ value: similarity,
183
+ data_type: 'NUMERIC',
184
+ comment: "Similarity: #{(similarity * 100).round(2)}%"
185
+ )
186
+ end
187
+
188
+ private
189
+
190
+ def calculate_similarity(str1, str2)
191
+ return 1.0 if str1 == str2
192
+ return 0.0 if str1.empty? || str2.empty?
193
+
194
+ distance = levenshtein_distance(str1, str2)
195
+ max_length = [str1.length, str2.length].max
196
+
197
+ 1.0 - (distance.to_f / max_length)
198
+ end
199
+
200
+ def levenshtein_distance(str1, str2)
201
+ matrix = Array.new(str1.length + 1) { Array.new(str2.length + 1) }
202
+
203
+ (0..str1.length).each { |i| matrix[i][0] = i }
204
+ (0..str2.length).each { |j| matrix[0][j] = j }
205
+
206
+ (1..str1.length).each do |i|
207
+ (1..str2.length).each do |j|
208
+ cost = str1[i-1] == str2[j-1] ? 0 : 1
209
+ matrix[i][j] = [
210
+ matrix[i-1][j] + 1, # deletion
211
+ matrix[i][j-1] + 1, # insertion
212
+ matrix[i-1][j-1] + cost # substitution
213
+ ].min
214
+ end
215
+ end
216
+
217
+ matrix[str1.length][str2.length]
218
+ end
219
+ end
220
+
221
+ class LLMEvaluator < BaseEvaluator
222
+ def initialize(name: 'llm_evaluator', description: 'LLM-based evaluator', client:, model: 'gpt-3.5-turbo', prompt_template: nil)
223
+ super(name: name, description: description)
224
+ @client = client
225
+ @model = model
226
+ @prompt_template = prompt_template || default_prompt_template
227
+ end
228
+
229
+ def evaluate(input, output, expected: nil, context: nil)
230
+ # This is a placeholder for LLM-based evaluation
231
+ # In a real implementation, you would call an LLM API here
232
+ prompt = @prompt_template.gsub('{input}', input.to_s)
233
+ .gsub('{output}', output.to_s)
234
+ .gsub('{expected}', expected.to_s)
235
+ .gsub('{context}', context.to_s)
236
+
237
+ # Simulate LLM response (in real implementation, call actual LLM)
238
+ score = rand(0.0..1.0).round(2)
239
+
240
+ create_score(
241
+ value: score,
242
+ data_type: 'NUMERIC',
243
+ comment: "LLM evaluation score: #{score}"
244
+ )
245
+ end
246
+
247
+ private
248
+
249
+ def default_prompt_template
250
+ <<~PROMPT
251
+ Please evaluate the following response:
252
+
253
+ Input: {input}
254
+ Output: {output}
255
+ Expected: {expected}
256
+ Context: {context}
257
+
258
+ Rate the quality of the output on a scale from 0 to 1, where:
259
+ - 0 = Poor quality, incorrect or irrelevant
260
+ - 1 = Excellent quality, accurate and relevant
261
+
262
+ Provide only the numeric score.
263
+ PROMPT
264
+ end
265
+ end
266
+ end
267
+ end