langfuse-rb 0.6.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +27 -2
- data/README.md +28 -73
- data/lib/langfuse/api_client.rb +72 -0
- data/lib/langfuse/cache_warmer.rb +1 -1
- data/lib/langfuse/client.rb +70 -7
- data/lib/langfuse/config.rb +59 -6
- data/lib/langfuse/observations.rb +24 -19
- data/lib/langfuse/otel_setup.rb +140 -80
- data/lib/langfuse/sampling.rb +20 -0
- data/lib/langfuse/score_client.rb +43 -18
- data/lib/langfuse/span_filter.rb +81 -0
- data/lib/langfuse/span_processor.rb +37 -36
- data/lib/langfuse/trace_id.rb +88 -0
- data/lib/langfuse/version.rb +1 -1
- data/lib/langfuse.rb +140 -44
- metadata +9 -3
data/lib/langfuse/otel_setup.rb
CHANGED
|
@@ -2,104 +2,78 @@
|
|
|
2
2
|
|
|
3
3
|
require "opentelemetry/sdk"
|
|
4
4
|
require "opentelemetry/exporter/otlp"
|
|
5
|
-
require "opentelemetry/trace/propagation/trace_context"
|
|
6
5
|
require "base64"
|
|
7
6
|
|
|
8
7
|
module Langfuse
|
|
9
|
-
# OpenTelemetry initialization and setup
|
|
10
|
-
#
|
|
11
|
-
# Handles configuration of the OTel SDK with Langfuse OTLP exporter
|
|
12
|
-
# when tracing is enabled.
|
|
13
|
-
#
|
|
8
|
+
# OpenTelemetry initialization and setup for Langfuse tracing.
|
|
9
|
+
# rubocop:disable Metrics/ModuleLength
|
|
14
10
|
module OtelSetup
|
|
11
|
+
TRACING_CONFIG_FIELDS = %i[
|
|
12
|
+
public_key
|
|
13
|
+
secret_key
|
|
14
|
+
base_url
|
|
15
|
+
environment
|
|
16
|
+
release
|
|
17
|
+
sample_rate
|
|
18
|
+
should_export_span
|
|
19
|
+
tracing_async
|
|
20
|
+
batch_size
|
|
21
|
+
flush_interval
|
|
22
|
+
].freeze
|
|
23
|
+
private_constant(:TRACING_CONFIG_FIELDS)
|
|
24
|
+
|
|
15
25
|
class << self
|
|
16
|
-
# @return [OpenTelemetry::SDK::Trace::TracerProvider, nil] The configured tracer provider
|
|
26
|
+
# @return [OpenTelemetry::SDK::Trace::TracerProvider, nil] The configured internal tracer provider
|
|
17
27
|
attr_reader :tracer_provider
|
|
18
28
|
|
|
19
|
-
# Initialize
|
|
29
|
+
# Initialize Langfuse's internal tracer provider without mutating global OpenTelemetry state.
|
|
20
30
|
#
|
|
21
31
|
# @param config [Langfuse::Config] The Langfuse configuration
|
|
22
|
-
# @return [
|
|
23
|
-
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
|
32
|
+
# @return [OpenTelemetry::SDK::Trace::TracerProvider]
|
|
24
33
|
def setup(config)
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
# Async: BatchSpanProcessor batches and sends in background
|
|
37
|
-
OpenTelemetry::SDK::Trace::Export::BatchSpanProcessor.new(
|
|
38
|
-
exporter,
|
|
39
|
-
max_queue_size: config.batch_size * 2, # Buffer more than batch_size
|
|
40
|
-
schedule_delay: config.flush_interval * 1000, # Convert seconds to milliseconds
|
|
41
|
-
max_export_batch_size: config.batch_size
|
|
42
|
-
)
|
|
43
|
-
else
|
|
44
|
-
# Sync: BatchSpanProcessor with minimal delay (flushes on force_flush)
|
|
45
|
-
# This collects spans from the same trace and exports them together,
|
|
46
|
-
# which is critical for correct parent_observation_id calculation
|
|
47
|
-
OpenTelemetry::SDK::Trace::Export::BatchSpanProcessor.new(
|
|
48
|
-
exporter,
|
|
49
|
-
max_queue_size: config.batch_size * 2,
|
|
50
|
-
schedule_delay: 60_000, # 60 seconds (relies on explicit force_flush)
|
|
51
|
-
max_export_batch_size: config.batch_size
|
|
52
|
-
)
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
# Create TracerProvider with processor
|
|
56
|
-
@tracer_provider = OpenTelemetry::SDK::Trace::TracerProvider.new
|
|
57
|
-
@tracer_provider.add_span_processor(processor)
|
|
58
|
-
|
|
59
|
-
# Add span processor for propagated attributes and env/release defaults
|
|
60
|
-
# This must be added AFTER the BatchSpanProcessor so it runs before export and can
|
|
61
|
-
# apply all attributes (propagated IDs, environment, release) to the spans being sent
|
|
62
|
-
span_processor = SpanProcessor.new(config: config)
|
|
63
|
-
@tracer_provider.add_span_processor(span_processor)
|
|
64
|
-
|
|
65
|
-
# Set as global tracer provider
|
|
66
|
-
OpenTelemetry.tracer_provider = @tracer_provider
|
|
67
|
-
|
|
68
|
-
# Configure W3C TraceContext propagator if not already set
|
|
69
|
-
if OpenTelemetry.propagation.is_a?(OpenTelemetry::Context::Propagation::NoopTextMapPropagator)
|
|
70
|
-
OpenTelemetry.propagation = OpenTelemetry::Trace::Propagation::TraceContext::TextMapPropagator.new
|
|
71
|
-
config.logger.debug("Langfuse: Configured W3C TraceContext propagator")
|
|
72
|
-
else
|
|
73
|
-
config.logger.debug("Langfuse: Using existing propagator: #{OpenTelemetry.propagation.class}")
|
|
34
|
+
validate_tracing_config!(config)
|
|
35
|
+
return existing_provider_for(config) if initialized?
|
|
36
|
+
|
|
37
|
+
candidate_provider = nil
|
|
38
|
+
provider = nil
|
|
39
|
+
created = false
|
|
40
|
+
candidate_provider = build_tracer_provider(config)
|
|
41
|
+
provider, created = publish_provider(candidate_provider, tracing_config_snapshot(config))
|
|
42
|
+
unless created
|
|
43
|
+
candidate_provider.shutdown(timeout: 30)
|
|
44
|
+
return existing_provider_for(config)
|
|
74
45
|
end
|
|
75
46
|
|
|
76
|
-
|
|
77
|
-
|
|
47
|
+
log_initialized(config)
|
|
48
|
+
provider
|
|
49
|
+
rescue StandardError
|
|
50
|
+
rollback_provider(provider) if created
|
|
51
|
+
raise
|
|
78
52
|
end
|
|
79
|
-
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
|
80
53
|
|
|
81
|
-
# Shutdown the tracer provider and flush any pending spans
|
|
54
|
+
# Shutdown the internal tracer provider and flush any pending spans.
|
|
82
55
|
#
|
|
83
56
|
# @param timeout [Integer] Timeout in seconds
|
|
84
57
|
# @return [void]
|
|
85
58
|
def shutdown(timeout: 30)
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
59
|
+
provider = nil
|
|
60
|
+
setup_mutex.synchronize do
|
|
61
|
+
provider = @tracer_provider
|
|
62
|
+
@tracer_provider = nil
|
|
63
|
+
@config_snapshot = nil
|
|
64
|
+
end
|
|
65
|
+
provider&.shutdown(timeout: timeout)
|
|
90
66
|
end
|
|
91
67
|
|
|
92
|
-
# Force flush all pending spans
|
|
68
|
+
# Force flush all pending spans on the internal tracer provider.
|
|
93
69
|
#
|
|
94
70
|
# @param timeout [Integer] Timeout in seconds
|
|
95
71
|
# @return [void]
|
|
96
72
|
def force_flush(timeout: 30)
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
@tracer_provider.force_flush(timeout: timeout)
|
|
73
|
+
@tracer_provider&.force_flush(timeout: timeout)
|
|
100
74
|
end
|
|
101
75
|
|
|
102
|
-
# Check if
|
|
76
|
+
# Check if Langfuse tracing has been initialized.
|
|
103
77
|
#
|
|
104
78
|
# @return [Boolean]
|
|
105
79
|
def initialized?
|
|
@@ -108,18 +82,104 @@ module Langfuse
|
|
|
108
82
|
|
|
109
83
|
private
|
|
110
84
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
85
|
+
def existing_provider_for(config)
|
|
86
|
+
snapshot = tracing_config_snapshot(config)
|
|
87
|
+
if @config_snapshot == snapshot
|
|
88
|
+
config.logger.debug("Langfuse tracing already initialized; reusing existing tracer provider")
|
|
89
|
+
else
|
|
90
|
+
config.logger.warn(
|
|
91
|
+
"Langfuse tracing is already initialized. Changes to #{TRACING_CONFIG_FIELDS.join(', ')} " \
|
|
92
|
+
"require Langfuse.reset! before they take effect."
|
|
93
|
+
)
|
|
94
|
+
end
|
|
95
|
+
@tracer_provider
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def publish_provider(provider, snapshot)
|
|
99
|
+
created = false
|
|
100
|
+
current = nil
|
|
101
|
+
|
|
102
|
+
# This mutex only guards publication so setup never exposes a half-built provider.
|
|
103
|
+
setup_mutex.synchronize do
|
|
104
|
+
if @tracer_provider
|
|
105
|
+
current = @tracer_provider
|
|
106
|
+
else
|
|
107
|
+
@tracer_provider = provider
|
|
108
|
+
@config_snapshot = snapshot
|
|
109
|
+
current = provider
|
|
110
|
+
created = true
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
[current, created]
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def rollback_provider(provider)
|
|
118
|
+
setup_mutex.synchronize do
|
|
119
|
+
return unless @tracer_provider.equal?(provider)
|
|
120
|
+
|
|
121
|
+
@tracer_provider = nil
|
|
122
|
+
@config_snapshot = nil
|
|
123
|
+
end
|
|
124
|
+
provider.shutdown(timeout: 1)
|
|
125
|
+
rescue StandardError
|
|
126
|
+
nil
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def build_tracer_provider(config)
|
|
130
|
+
provider = OpenTelemetry::SDK::Trace::TracerProvider.new(
|
|
131
|
+
sampler: build_sampler(config.sample_rate)
|
|
132
|
+
)
|
|
133
|
+
provider.add_span_processor(
|
|
134
|
+
SpanProcessor.new(config: config, exporter: build_exporter(config))
|
|
135
|
+
)
|
|
136
|
+
provider
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def build_exporter(config)
|
|
140
|
+
OpenTelemetry::Exporter::OTLP::Exporter.new(
|
|
141
|
+
endpoint: "#{config.base_url}/api/public/otel/v1/traces",
|
|
142
|
+
headers: build_headers(config.public_key, config.secret_key),
|
|
143
|
+
compression: "gzip"
|
|
144
|
+
)
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def log_initialized(config)
|
|
148
|
+
mode = config.tracing_async ? "async" : "sync"
|
|
149
|
+
config.logger.info("Langfuse tracing initialized with OpenTelemetry (#{mode} mode)")
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def validate_tracing_config!(config)
|
|
153
|
+
raise ConfigurationError, "public_key is required" if blank?(config.public_key)
|
|
154
|
+
raise ConfigurationError, "secret_key is required" if blank?(config.secret_key)
|
|
155
|
+
raise ConfigurationError, "base_url cannot be empty" if blank?(config.base_url)
|
|
156
|
+
return if config.should_export_span.nil? || config.should_export_span.respond_to?(:call)
|
|
157
|
+
|
|
158
|
+
raise ConfigurationError, "should_export_span must respond to #call"
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def tracing_config_snapshot(config)
|
|
162
|
+
TRACING_CONFIG_FIELDS.to_h { |field| [field, config.public_send(field)] }.freeze
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def setup_mutex
|
|
166
|
+
@setup_mutex ||= Mutex.new
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def blank?(value)
|
|
170
|
+
value.nil? || value.empty?
|
|
171
|
+
end
|
|
172
|
+
|
|
116
173
|
def build_headers(public_key, secret_key)
|
|
117
174
|
credentials = "#{public_key}:#{secret_key}"
|
|
118
175
|
encoded = Base64.strict_encode64(credentials)
|
|
119
|
-
{
|
|
120
|
-
|
|
121
|
-
|
|
176
|
+
{ "Authorization" => "Basic #{encoded}" }
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def build_sampler(sample_rate)
|
|
180
|
+
Sampling.build_sampler(sample_rate) || OpenTelemetry::SDK::Trace::Samplers::ALWAYS_ON
|
|
122
181
|
end
|
|
123
182
|
end
|
|
124
183
|
end
|
|
184
|
+
# rubocop:enable Metrics/ModuleLength
|
|
125
185
|
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Langfuse
|
|
4
|
+
# Shared sampling helpers for trace and score emission.
|
|
5
|
+
#
|
|
6
|
+
# @api private
|
|
7
|
+
module Sampling
|
|
8
|
+
module_function
|
|
9
|
+
|
|
10
|
+
# Build the sampler used by both trace export and trace-linked score emission.
|
|
11
|
+
#
|
|
12
|
+
# @param sample_rate [Float] Sampling rate from 0.0 to 1.0
|
|
13
|
+
# @return [OpenTelemetry::SDK::Trace::Samplers::TraceIdRatioBased, nil]
|
|
14
|
+
def build_sampler(sample_rate)
|
|
15
|
+
return nil if sample_rate >= 1.0
|
|
16
|
+
|
|
17
|
+
OpenTelemetry::SDK::Trace::Samplers::TraceIdRatioBased.new(sample_rate)
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -31,6 +31,8 @@ module Langfuse
|
|
|
31
31
|
# @return [Logger] Logger instance
|
|
32
32
|
attr_reader :logger
|
|
33
33
|
|
|
34
|
+
HEX_TRACE_ID_PATTERN = /\A[0-9a-f]{32}\z/
|
|
35
|
+
|
|
34
36
|
# Initialize a new ScoreClient
|
|
35
37
|
#
|
|
36
38
|
# @param api_client [ApiClient] The API client for sending batches
|
|
@@ -43,6 +45,9 @@ module Langfuse
|
|
|
43
45
|
@mutex = Mutex.new
|
|
44
46
|
@flush_thread = nil
|
|
45
47
|
@shutdown = false
|
|
48
|
+
# Match the immutable tracing setup contract: once this client exists, later config
|
|
49
|
+
# mutations must not change score sampling without rebuilding the client.
|
|
50
|
+
@score_sampler = Sampling.build_sampler(config.sample_rate)
|
|
46
51
|
|
|
47
52
|
start_flush_timer
|
|
48
53
|
end
|
|
@@ -76,28 +81,19 @@ module Langfuse
|
|
|
76
81
|
def create(name:, value:, id: nil, trace_id: nil, session_id: nil, observation_id: nil, comment: nil,
|
|
77
82
|
metadata: nil, environment: nil, data_type: :numeric, dataset_run_id: nil, config_id: nil)
|
|
78
83
|
validate_name(name)
|
|
79
|
-
# Keep identifier policy server-side to preserve cross-SDK parity and avoid blocking valid future payloads.
|
|
80
84
|
normalized_value = normalize_value(value, data_type)
|
|
81
85
|
data_type_str = Types::SCORE_DATA_TYPES[data_type] || raise(ArgumentError, "Invalid data_type: #{data_type}")
|
|
82
86
|
|
|
87
|
+
return unless enqueue_trace_linked_score?(trace_id)
|
|
88
|
+
|
|
83
89
|
event = build_score_event(
|
|
84
|
-
name: name,
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
session_id: session_id,
|
|
89
|
-
observation_id: observation_id,
|
|
90
|
-
comment: comment,
|
|
91
|
-
metadata: metadata,
|
|
92
|
-
environment: environment,
|
|
93
|
-
data_type: data_type_str,
|
|
94
|
-
dataset_run_id: dataset_run_id,
|
|
95
|
-
config_id: config_id
|
|
90
|
+
name: name, value: normalized_value, id: id, trace_id: trace_id,
|
|
91
|
+
session_id: session_id, observation_id: observation_id, comment: comment,
|
|
92
|
+
metadata: metadata, environment: environment, data_type: data_type_str,
|
|
93
|
+
dataset_run_id: dataset_run_id, config_id: config_id
|
|
96
94
|
)
|
|
97
95
|
|
|
98
96
|
@queue << event
|
|
99
|
-
|
|
100
|
-
# Trigger flush if batch size reached
|
|
101
97
|
flush if @queue.size >= config.batch_size
|
|
102
98
|
rescue StandardError => e
|
|
103
99
|
logger.error("Langfuse score creation failed: #{e.message}")
|
|
@@ -294,14 +290,43 @@ module Langfuse
|
|
|
294
290
|
# @return [Hash] Hash with :trace_id and :observation_id (may be nil)
|
|
295
291
|
def extract_ids_from_active_span
|
|
296
292
|
span = OpenTelemetry::Trace.current_span
|
|
297
|
-
|
|
293
|
+
span_context = span&.context
|
|
294
|
+
return { trace_id: nil, observation_id: nil } unless span_context&.valid?
|
|
298
295
|
|
|
299
296
|
{
|
|
300
|
-
trace_id:
|
|
301
|
-
observation_id:
|
|
297
|
+
trace_id: span_context.trace_id.unpack1("H*"),
|
|
298
|
+
observation_id: span_context.span_id.unpack1("H*")
|
|
302
299
|
}
|
|
303
300
|
end
|
|
304
301
|
|
|
302
|
+
# Score sampling is decided purely by the configured sampler on the trace_id hash,
|
|
303
|
+
# matching langfuse-python. Non-hex trace ids and session/dataset-only scores bypass sampling.
|
|
304
|
+
def enqueue_trace_linked_score?(trace_id)
|
|
305
|
+
return true if trace_id.nil?
|
|
306
|
+
return true unless HEX_TRACE_ID_PATTERN.match?(trace_id)
|
|
307
|
+
|
|
308
|
+
sampler = score_sampler
|
|
309
|
+
return true if sampler.nil?
|
|
310
|
+
return true unless sampler.respond_to?(:should_sample?)
|
|
311
|
+
|
|
312
|
+
sample_result = sampler.should_sample?(
|
|
313
|
+
trace_id: [trace_id].pack("H*"),
|
|
314
|
+
parent_context: nil,
|
|
315
|
+
links: [],
|
|
316
|
+
name: "score",
|
|
317
|
+
kind: OpenTelemetry::Trace::SpanKind::INTERNAL,
|
|
318
|
+
attributes: {}
|
|
319
|
+
)
|
|
320
|
+
sample_result.sampled?
|
|
321
|
+
rescue StandardError => e
|
|
322
|
+
logger.warn("Langfuse score sampling fallback for trace_id=#{trace_id}: #{e.message}")
|
|
323
|
+
true
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
# Sampler is pinned at ScoreClient construction to match the "sample_rate requires reset!"
|
|
327
|
+
# contract and to keep each client's sampling scoped to its own config.
|
|
328
|
+
attr_reader :score_sampler
|
|
329
|
+
|
|
305
330
|
# Send a batch of events to the API
|
|
306
331
|
#
|
|
307
332
|
# @param events [Array<Hash>] Array of event hashes
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Langfuse
|
|
4
|
+
# Instrumentation scope name used by module-level Langfuse tracing.
|
|
5
|
+
LANGFUSE_TRACER_NAME = "langfuse-rb"
|
|
6
|
+
|
|
7
|
+
# Conservative allowlist of instrumentation scope prefixes that clearly belong to LLM workflows.
|
|
8
|
+
KNOWN_LLM_INSTRUMENTATION_SCOPE_PREFIXES = [
|
|
9
|
+
LANGFUSE_TRACER_NAME,
|
|
10
|
+
"agent_framework",
|
|
11
|
+
"ai",
|
|
12
|
+
"haystack",
|
|
13
|
+
"langsmith",
|
|
14
|
+
"litellm",
|
|
15
|
+
"openinference",
|
|
16
|
+
"opentelemetry.instrumentation.anthropic",
|
|
17
|
+
"strands-agents",
|
|
18
|
+
"vllm"
|
|
19
|
+
].freeze
|
|
20
|
+
|
|
21
|
+
# Matched per span in the export path, so avoid allocating the dotted form each call.
|
|
22
|
+
KNOWN_LLM_INSTRUMENTATION_SCOPE_DOTTED_PREFIXES =
|
|
23
|
+
KNOWN_LLM_INSTRUMENTATION_SCOPE_PREFIXES.map { |prefix| "#{prefix}." }.freeze
|
|
24
|
+
private_constant :KNOWN_LLM_INSTRUMENTATION_SCOPE_DOTTED_PREFIXES
|
|
25
|
+
|
|
26
|
+
class << self
|
|
27
|
+
# Return whether the span was created by Langfuse's tracer.
|
|
28
|
+
#
|
|
29
|
+
# @param span [#instrumentation_scope] Span or span data to inspect
|
|
30
|
+
# @return [Boolean]
|
|
31
|
+
def langfuse_span?(span)
|
|
32
|
+
instrumentation_scope_name(span) == LANGFUSE_TRACER_NAME
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Return whether the span contains `gen_ai.*` attributes.
|
|
36
|
+
#
|
|
37
|
+
# @param span [#attributes] Span or span data to inspect
|
|
38
|
+
# @return [Boolean]
|
|
39
|
+
def genai_span?(span)
|
|
40
|
+
attributes = span.attributes
|
|
41
|
+
return false unless attributes
|
|
42
|
+
|
|
43
|
+
attributes.keys.any? { |key| key.is_a?(String) && key.start_with?("gen_ai.") }
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Return whether the span came from a known LLM instrumentation scope.
|
|
47
|
+
#
|
|
48
|
+
# @param span [#instrumentation_scope] Span or span data to inspect
|
|
49
|
+
# @return [Boolean]
|
|
50
|
+
def known_llm_instrumentor?(span)
|
|
51
|
+
scope_name = instrumentation_scope_name(span)
|
|
52
|
+
return false unless scope_name
|
|
53
|
+
|
|
54
|
+
return true if KNOWN_LLM_INSTRUMENTATION_SCOPE_PREFIXES.include?(scope_name)
|
|
55
|
+
|
|
56
|
+
KNOWN_LLM_INSTRUMENTATION_SCOPE_DOTTED_PREFIXES.any? do |dotted_prefix|
|
|
57
|
+
scope_name.start_with?(dotted_prefix)
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Return whether a span should be exported when no custom filter is configured.
|
|
62
|
+
#
|
|
63
|
+
# @param span [#instrumentation_scope, #attributes] Span or span data to inspect
|
|
64
|
+
# @return [Boolean]
|
|
65
|
+
def default_export_span?(span)
|
|
66
|
+
langfuse_span?(span) || genai_span?(span) || known_llm_instrumentor?(span)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Cross-SDK parity keeps the `is_*` names public for compatibility.
|
|
70
|
+
alias is_langfuse_span langfuse_span?
|
|
71
|
+
alias is_genai_span genai_span?
|
|
72
|
+
alias is_known_llm_instrumentor known_llm_instrumentor?
|
|
73
|
+
alias is_default_export_span default_export_span?
|
|
74
|
+
|
|
75
|
+
private
|
|
76
|
+
|
|
77
|
+
def instrumentation_scope_name(span)
|
|
78
|
+
span.instrumentation_scope&.name
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
@@ -3,22 +3,26 @@
|
|
|
3
3
|
require "opentelemetry/sdk"
|
|
4
4
|
|
|
5
5
|
module Langfuse
|
|
6
|
-
#
|
|
7
|
-
#
|
|
8
|
-
# On span start, this processor first applies configured trace defaults
|
|
9
|
-
# (environment/release), then overlays attributes propagated in OpenTelemetry
|
|
10
|
-
# context (user/session/metadata/tags/version). This ensures consistent
|
|
11
|
-
# trace dimensions while still honoring per-request propagation.
|
|
6
|
+
# Batch span processor that owns Langfuse's enrichment and export filtering.
|
|
12
7
|
#
|
|
13
8
|
# @api private
|
|
14
|
-
class SpanProcessor < OpenTelemetry::SDK::Trace::
|
|
15
|
-
# @param config [Langfuse::Config
|
|
16
|
-
|
|
9
|
+
class SpanProcessor < OpenTelemetry::SDK::Trace::Export::BatchSpanProcessor
|
|
10
|
+
# @param config [Langfuse::Config] SDK configuration used for defaults and filtering
|
|
11
|
+
# @param exporter [#export, #force_flush, #shutdown] Span exporter used by the batch processor
|
|
12
|
+
def initialize(config:, exporter:)
|
|
13
|
+
@logger = config.logger
|
|
17
14
|
@default_trace_attributes = build_default_trace_attributes(config).freeze
|
|
18
|
-
|
|
15
|
+
@should_export_span = config.should_export_span || Langfuse.method(:default_export_span?)
|
|
16
|
+
|
|
17
|
+
super(
|
|
18
|
+
exporter,
|
|
19
|
+
max_queue_size: config.batch_size * 2,
|
|
20
|
+
schedule_delay: schedule_delay_for(config),
|
|
21
|
+
max_export_batch_size: config.batch_size
|
|
22
|
+
)
|
|
19
23
|
end
|
|
20
24
|
|
|
21
|
-
#
|
|
25
|
+
# Apply Langfuse trace defaults and propagated attributes before a span records work.
|
|
22
26
|
#
|
|
23
27
|
# @param span [OpenTelemetry::SDK::Trace::Span] The span that started
|
|
24
28
|
# @param parent_context [OpenTelemetry::Context] The parent context
|
|
@@ -30,41 +34,28 @@ module Langfuse
|
|
|
30
34
|
apply_attributes(span, propagated_attributes(parent_context))
|
|
31
35
|
end
|
|
32
36
|
|
|
33
|
-
#
|
|
37
|
+
# Drop spans when the export filter rejects them or raises.
|
|
34
38
|
#
|
|
35
39
|
# @param span [OpenTelemetry::SDK::Trace::Span] The span that ended
|
|
36
40
|
# @return [void]
|
|
37
41
|
def on_finish(span)
|
|
38
|
-
|
|
39
|
-
end
|
|
42
|
+
return unless should_export_span?(span)
|
|
40
43
|
|
|
41
|
-
|
|
42
|
-
#
|
|
43
|
-
# @param timeout [Integer, nil] Timeout in seconds (unused for this processor)
|
|
44
|
-
# @return [Integer] Always returns 0 (no timeout needed for no-op)
|
|
45
|
-
def shutdown(timeout: nil)
|
|
46
|
-
# No-op - nothing to clean up
|
|
47
|
-
# Return 0 to match OpenTelemetry SDK expectation (it finds max timeout from processors)
|
|
48
|
-
_ = timeout # Suppress unused argument warning
|
|
49
|
-
0
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
# Force flush (no-op for this processor)
|
|
53
|
-
#
|
|
54
|
-
# @param timeout [Integer, nil] Timeout in seconds (unused for this processor)
|
|
55
|
-
# @return [Integer] Always returns 0 (no timeout needed for no-op)
|
|
56
|
-
def force_flush(timeout: nil)
|
|
57
|
-
# No-op - nothing to flush
|
|
58
|
-
# Return 0 to match OpenTelemetry SDK expectation (it finds max timeout from processors)
|
|
59
|
-
_ = timeout # Suppress unused argument warning
|
|
60
|
-
0
|
|
44
|
+
super
|
|
61
45
|
end
|
|
62
46
|
|
|
63
47
|
private
|
|
64
48
|
|
|
65
|
-
|
|
66
|
-
|
|
49
|
+
# Sync mode relies on explicit `force_flush` calls, so keep the background flush
|
|
50
|
+
# interval long enough that it rarely fires on its own.
|
|
51
|
+
SYNC_SCHEDULE_DELAY_MS = 60_000
|
|
52
|
+
private_constant :SYNC_SCHEDULE_DELAY_MS
|
|
53
|
+
|
|
54
|
+
def schedule_delay_for(config)
|
|
55
|
+
config.tracing_async ? config.flush_interval * 1000 : SYNC_SCHEDULE_DELAY_MS
|
|
56
|
+
end
|
|
67
57
|
|
|
58
|
+
def build_default_trace_attributes(config)
|
|
68
59
|
OtelAttributes.create_trace_attributes(
|
|
69
60
|
{ environment: config.environment, release: config.release }
|
|
70
61
|
)
|
|
@@ -79,5 +70,15 @@ module Langfuse
|
|
|
79
70
|
def apply_attributes(span, attributes)
|
|
80
71
|
attributes.each { |key, value| span.set_attribute(key, value) }
|
|
81
72
|
end
|
|
73
|
+
|
|
74
|
+
def should_export_span?(span)
|
|
75
|
+
@should_export_span.call(span)
|
|
76
|
+
rescue StandardError => e
|
|
77
|
+
@logger.error(
|
|
78
|
+
"Langfuse tracing dropped span '#{span.name}' because should_export_span raised: " \
|
|
79
|
+
"#{e.class}: #{e.message}"
|
|
80
|
+
)
|
|
81
|
+
false
|
|
82
|
+
end
|
|
82
83
|
end
|
|
83
84
|
end
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "digest"
|
|
4
|
+
|
|
5
|
+
module Langfuse
|
|
6
|
+
# Deterministic and random trace/observation ID generation.
|
|
7
|
+
#
|
|
8
|
+
# Mirrors the Python and JS SDK helpers so the same seed produces the same
|
|
9
|
+
# trace ID across all three SDKs. This lets callers correlate Langfuse traces
|
|
10
|
+
# with external system identifiers (database primary keys, request IDs, etc.)
|
|
11
|
+
# and score or reference traces later without having to persist the generated
|
|
12
|
+
# Langfuse ID.
|
|
13
|
+
#
|
|
14
|
+
# @example Deterministic from an external ID
|
|
15
|
+
# trace_id = Langfuse::TraceId.create(seed: "order-12345")
|
|
16
|
+
# Langfuse.observe("process-order", trace_id: trace_id) { |span| ... }
|
|
17
|
+
# Langfuse.create_score(name: "quality", value: 0.9, trace_id: trace_id)
|
|
18
|
+
#
|
|
19
|
+
# @example Random (no seed)
|
|
20
|
+
# trace_id = Langfuse::TraceId.create
|
|
21
|
+
module TraceId
|
|
22
|
+
TRACE_ID_PATTERN = /\A[0-9a-f]{32}\z/
|
|
23
|
+
INVALID_TRACE_ID = ("0" * 32)
|
|
24
|
+
|
|
25
|
+
private_constant :TRACE_ID_PATTERN, :INVALID_TRACE_ID
|
|
26
|
+
|
|
27
|
+
class << self
|
|
28
|
+
# Generate a W3C trace ID (32 lowercase hex chars).
|
|
29
|
+
#
|
|
30
|
+
# With no seed, delegates to OpenTelemetry's random trace ID generator.
|
|
31
|
+
# With a seed, takes the first 16 bytes of SHA-256(seed) so the same
|
|
32
|
+
# input always produces the same trace ID.
|
|
33
|
+
#
|
|
34
|
+
# @note Avoid passing PII, secrets, or credentials as seeds — the raw seed
|
|
35
|
+
# value appears in application code and may leak through logs/backtraces.
|
|
36
|
+
# Use stable external identifiers (database PKs, UUIDs, request IDs).
|
|
37
|
+
# @param seed [String, nil] Optional seed for deterministic generation.
|
|
38
|
+
# Must be a String if provided; non-String values raise ArgumentError
|
|
39
|
+
# for cross-SDK parity (Python/JS both reject non-strings).
|
|
40
|
+
# @return [String] 32-character lowercase hex trace ID
|
|
41
|
+
# @raise [ArgumentError] if seed is not nil and not a String
|
|
42
|
+
def create(seed: nil)
|
|
43
|
+
return OpenTelemetry::Trace.generate_trace_id.unpack1("H*") if seed.nil?
|
|
44
|
+
|
|
45
|
+
Digest::SHA256.digest(validate_seed!(seed))[0, 16].unpack1("H*")
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
# @api private
|
|
51
|
+
def validate_seed!(seed)
|
|
52
|
+
raise ArgumentError, "seed must be a String, got #{seed.class}" unless seed.is_a?(String)
|
|
53
|
+
|
|
54
|
+
# ASCII-8BIT strings (binary) often already hold valid UTF-8 bytes
|
|
55
|
+
# but can't be transcoded — re-tag them instead.
|
|
56
|
+
return seed.dup.force_encoding("UTF-8") if seed.encoding == Encoding::ASCII_8BIT
|
|
57
|
+
|
|
58
|
+
seed.encode("UTF-8")
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# @api private
|
|
62
|
+
def valid?(trace_id)
|
|
63
|
+
return false unless trace_id.is_a?(String) && TRACE_ID_PATTERN.match?(trace_id)
|
|
64
|
+
|
|
65
|
+
trace_id != INVALID_TRACE_ID
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Build a sampled OpenTelemetry SpanContext carrying the given hex trace ID.
|
|
69
|
+
#
|
|
70
|
+
# A random span_id is generated as a placeholder — only the trace_id is
|
|
71
|
+
# consumed by the child span that gets created.
|
|
72
|
+
#
|
|
73
|
+
# @api private
|
|
74
|
+
def to_span_context(trace_id)
|
|
75
|
+
raise ArgumentError, "Invalid trace_id: #{trace_id.inspect}" unless valid?(trace_id)
|
|
76
|
+
|
|
77
|
+
OpenTelemetry::Trace::SpanContext.new(
|
|
78
|
+
trace_id: [trace_id].pack("H*"),
|
|
79
|
+
span_id: OpenTelemetry::Trace.generate_span_id,
|
|
80
|
+
trace_flags: OpenTelemetry::Trace::TraceFlags::SAMPLED,
|
|
81
|
+
# Cross-SDK parity: Python uses is_remote=False (_create_remote_parent_span).
|
|
82
|
+
# Changing this would alter ParentBased sampler behavior across SDKs.
|
|
83
|
+
remote: false
|
|
84
|
+
)
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
data/lib/langfuse/version.rb
CHANGED