langfuse-rb 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,6 +18,7 @@ module Langfuse
18
18
  # c.secret_key = "sk_..."
19
19
  # end
20
20
  #
21
+ # rubocop:disable Metrics/ClassLength
21
22
  class Config
22
23
  # @return [String, nil] Langfuse public API key
23
24
  attr_accessor :public_key
@@ -74,6 +75,12 @@ module Langfuse
74
75
  # @return [String, nil] Default release identifier applied to new traces/observations
75
76
  attr_accessor :release
76
77
 
78
+ # @return [Float] Trace sampling rate from 0.0 to 1.0
79
+ attr_reader :sample_rate
80
+
81
+ # @return [#call, nil] Callback that decides whether an ended span should export to Langfuse.
82
+ attr_accessor :should_export_span
83
+
77
84
  # @return [#call, nil] Mask callable applied to input, output, and metadata before serialization.
78
85
  # Receives `data:` keyword argument. nil disables masking.
79
86
  attr_accessor :mask
@@ -114,6 +121,9 @@ module Langfuse
114
121
  # @return [Symbol] Default ActiveJob queue name
115
122
  DEFAULT_JOB_QUEUE = :default
116
123
 
124
+ # @return [Float] Default trace sampling rate (sample all traces)
125
+ DEFAULT_SAMPLE_RATE = 1.0
126
+
117
127
  # @return [Integer] Number of seconds representing indefinite cache duration (~1000 years)
118
128
  INDEFINITE_SECONDS = 1000 * 365 * 24 * 60 * 60
119
129
 
@@ -136,7 +146,6 @@ module Langfuse
136
146
  # @yield [config] Optional block for configuration
137
147
  # @yieldparam config [Config] The config instance
138
148
  # @return [Config] a new Config instance
139
- # rubocop:disable Metrics/AbcSize
140
149
  def initialize
141
150
  @public_key = ENV.fetch("LANGFUSE_PUBLIC_KEY", nil)
142
151
  @secret_key = ENV.fetch("LANGFUSE_SECRET_KEY", nil)
@@ -153,14 +162,11 @@ module Langfuse
153
162
  @batch_size = DEFAULT_BATCH_SIZE
154
163
  @flush_interval = DEFAULT_FLUSH_INTERVAL
155
164
  @job_queue = DEFAULT_JOB_QUEUE
156
- @environment = env_value("LANGFUSE_TRACING_ENVIRONMENT")
157
- @release = env_value("LANGFUSE_RELEASE") || detect_release_from_ci_env
158
- @mask = nil
165
+ initialize_tracing_defaults
159
166
  @logger = default_logger
160
167
 
161
168
  yield(self) if block_given?
162
169
  end
163
- # rubocop:enable Metrics/AbcSize
164
170
 
165
171
  # Validate the configuration
166
172
  #
@@ -183,7 +189,8 @@ module Langfuse
183
189
  validate_swr_config!
184
190
 
185
191
  validate_cache_backend!
186
-
192
+ validate_sample_rate!
193
+ validate_should_export_span!
187
194
  validate_mask!
188
195
  end
189
196
  # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
@@ -205,6 +212,15 @@ module Langfuse
205
212
  cache_stale_ttl == :indefinite ? INDEFINITE_SECONDS : cache_stale_ttl
206
213
  end
207
214
 
215
+ # Set trace sampling rate.
216
+ #
217
+ # @param value [Numeric, String] Sampling rate from 0.0 to 1.0
218
+ # @raise [ConfigurationError] if value is non-numeric or outside 0.0..1.0
219
+ # @return [Float]
220
+ def sample_rate=(value)
221
+ @sample_rate = coerce_sample_rate(value)
222
+ end
223
+
208
224
  private
209
225
 
210
226
  def default_logger
@@ -215,6 +231,14 @@ module Langfuse
215
231
  end
216
232
  end
217
233
 
234
+ def initialize_tracing_defaults
235
+ @environment = env_value("LANGFUSE_TRACING_ENVIRONMENT")
236
+ @release = env_value("LANGFUSE_RELEASE") || detect_release_from_ci_env
237
+ self.sample_rate = env_value("LANGFUSE_SAMPLE_RATE") || DEFAULT_SAMPLE_RATE
238
+ @should_export_span = nil
239
+ @mask = nil
240
+ end
241
+
218
242
  def validate_cache_backend!
219
243
  valid_backends = %i[memory rails]
220
244
  return if valid_backends.include?(cache_backend)
@@ -255,12 +279,24 @@ module Langfuse
255
279
  raise ConfigurationError, "cache_refresh_threads must be positive"
256
280
  end
257
281
 
282
+ def validate_sample_rate!
283
+ return if sample_rate.is_a?(Numeric) && sample_rate.between?(0.0, 1.0)
284
+
285
+ raise ConfigurationError, "sample_rate must be between 0.0 and 1.0"
286
+ end
287
+
258
288
  def validate_mask!
259
289
  return if mask.nil? || mask.respond_to?(:call)
260
290
 
261
291
  raise ConfigurationError, "mask must respond to #call"
262
292
  end
263
293
 
294
+ def validate_should_export_span!
295
+ return if should_export_span.nil? || should_export_span.respond_to?(:call)
296
+
297
+ raise ConfigurationError, "should_export_span must respond to #call"
298
+ end
299
+
264
300
  def detect_release_from_ci_env
265
301
  COMMON_RELEASE_ENV_KEYS.each do |key|
266
302
  value = env_value(key)
@@ -276,5 +312,22 @@ module Langfuse
276
312
 
277
313
  value
278
314
  end
315
+
316
+ def coerce_sample_rate(value)
317
+ numeric_value = if value.is_a?(Numeric)
318
+ value.to_f
319
+ elsif value.is_a?(String)
320
+ Float(value)
321
+ else
322
+ raise ConfigurationError, "sample_rate must be numeric"
323
+ end
324
+
325
+ return numeric_value if numeric_value.between?(0.0, 1.0)
326
+
327
+ raise ConfigurationError, "sample_rate must be between 0.0 and 1.0"
328
+ rescue ArgumentError, TypeError
329
+ raise ConfigurationError, "sample_rate must be numeric"
330
+ end
279
331
  end
332
+ # rubocop:enable Metrics/ClassLength
280
333
  end
@@ -302,11 +302,9 @@ module Langfuse
302
302
  # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
303
303
  def self.add_prompt_attributes(otel_attributes, prompt)
304
304
  return unless prompt
305
+ return if fallback_prompt?(prompt)
305
306
 
306
- # Handle hash-like prompts
307
307
  if prompt.is_a?(Hash) || prompt.respond_to?(:[])
308
- return if prompt[:is_fallback] || prompt["is_fallback"]
309
-
310
308
  otel_attributes[OBSERVATION_PROMPT_NAME] = prompt[:name] || prompt["name"]
311
309
  otel_attributes[OBSERVATION_PROMPT_VERSION] = prompt[:version] || prompt["version"]
312
310
  # Handle objects with name/version methods (already converted in Trace#generation)
@@ -315,6 +313,16 @@ module Langfuse
315
313
  otel_attributes[OBSERVATION_PROMPT_VERSION] = prompt.version
316
314
  end
317
315
  end
316
+ # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
317
+
318
+ # @api private
319
+ def self.fallback_prompt?(prompt)
320
+ return true if prompt.respond_to?(:is_fallback) && prompt.is_fallback
321
+ return false unless prompt.is_a?(Hash)
322
+
323
+ !!get_hash_value(prompt, :is_fallback)
324
+ end
325
+ private_class_method :fallback_prompt?
318
326
  end
319
- # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/ModuleLength
327
+ # rubocop:enable Metrics/ModuleLength
320
328
  end
@@ -2,104 +2,78 @@
2
2
 
3
3
  require "opentelemetry/sdk"
4
4
  require "opentelemetry/exporter/otlp"
5
- require "opentelemetry/trace/propagation/trace_context"
6
5
  require "base64"
7
6
 
8
7
  module Langfuse
9
- # OpenTelemetry initialization and setup
10
- #
11
- # Handles configuration of the OTel SDK with Langfuse OTLP exporter
12
- # when tracing is enabled.
13
- #
8
+ # OpenTelemetry initialization and setup for Langfuse tracing.
9
+ # rubocop:disable Metrics/ModuleLength
14
10
  module OtelSetup
11
+ TRACING_CONFIG_FIELDS = %i[
12
+ public_key
13
+ secret_key
14
+ base_url
15
+ environment
16
+ release
17
+ sample_rate
18
+ should_export_span
19
+ tracing_async
20
+ batch_size
21
+ flush_interval
22
+ ].freeze
23
+ private_constant(:TRACING_CONFIG_FIELDS)
24
+
15
25
  class << self
16
- # @return [OpenTelemetry::SDK::Trace::TracerProvider, nil] The configured tracer provider
26
+ # @return [OpenTelemetry::SDK::Trace::TracerProvider, nil] The configured internal tracer provider
17
27
  attr_reader :tracer_provider
18
28
 
19
- # Initialize OpenTelemetry with Langfuse OTLP exporter
29
+ # Initialize Langfuse's internal tracer provider without mutating global OpenTelemetry state.
20
30
  #
21
31
  # @param config [Langfuse::Config] The Langfuse configuration
22
- # @return [void]
23
- # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
32
+ # @return [OpenTelemetry::SDK::Trace::TracerProvider]
24
33
  def setup(config)
25
- # Create OTLP exporter configured for Langfuse
26
- exporter = OpenTelemetry::Exporter::OTLP::Exporter.new(
27
- endpoint: "#{config.base_url}/api/public/otel/v1/traces",
28
- headers: build_headers(config.public_key, config.secret_key),
29
- compression: "gzip"
30
- )
31
-
32
- # Create processor based on async configuration
33
- # IMPORTANT: Always use BatchSpanProcessor (even in sync mode) to ensure spans
34
- # are exported together, which allows proper parent-child relationship detection
35
- processor = if config.tracing_async
36
- # Async: BatchSpanProcessor batches and sends in background
37
- OpenTelemetry::SDK::Trace::Export::BatchSpanProcessor.new(
38
- exporter,
39
- max_queue_size: config.batch_size * 2, # Buffer more than batch_size
40
- schedule_delay: config.flush_interval * 1000, # Convert seconds to milliseconds
41
- max_export_batch_size: config.batch_size
42
- )
43
- else
44
- # Sync: BatchSpanProcessor with minimal delay (flushes on force_flush)
45
- # This collects spans from the same trace and exports them together,
46
- # which is critical for correct parent_observation_id calculation
47
- OpenTelemetry::SDK::Trace::Export::BatchSpanProcessor.new(
48
- exporter,
49
- max_queue_size: config.batch_size * 2,
50
- schedule_delay: 60_000, # 60 seconds (relies on explicit force_flush)
51
- max_export_batch_size: config.batch_size
52
- )
53
- end
54
-
55
- # Create TracerProvider with processor
56
- @tracer_provider = OpenTelemetry::SDK::Trace::TracerProvider.new
57
- @tracer_provider.add_span_processor(processor)
58
-
59
- # Add span processor for propagated attributes and env/release defaults
60
- # This must be added AFTER the BatchSpanProcessor so it runs before export and can
61
- # apply all attributes (propagated IDs, environment, release) to the spans being sent
62
- span_processor = SpanProcessor.new(config: config)
63
- @tracer_provider.add_span_processor(span_processor)
64
-
65
- # Set as global tracer provider
66
- OpenTelemetry.tracer_provider = @tracer_provider
67
-
68
- # Configure W3C TraceContext propagator if not already set
69
- if OpenTelemetry.propagation.is_a?(OpenTelemetry::Context::Propagation::NoopTextMapPropagator)
70
- OpenTelemetry.propagation = OpenTelemetry::Trace::Propagation::TraceContext::TextMapPropagator.new
71
- config.logger.debug("Langfuse: Configured W3C TraceContext propagator")
72
- else
73
- config.logger.debug("Langfuse: Using existing propagator: #{OpenTelemetry.propagation.class}")
34
+ validate_tracing_config!(config)
35
+ return existing_provider_for(config) if initialized?
36
+
37
+ candidate_provider = nil
38
+ provider = nil
39
+ created = false
40
+ candidate_provider = build_tracer_provider(config)
41
+ provider, created = publish_provider(candidate_provider, tracing_config_snapshot(config))
42
+ unless created
43
+ candidate_provider.shutdown(timeout: 30)
44
+ return existing_provider_for(config)
74
45
  end
75
46
 
76
- mode = config.tracing_async ? "async" : "sync"
77
- config.logger.info("Langfuse tracing initialized with OpenTelemetry (#{mode} mode)")
47
+ log_initialized(config)
48
+ provider
49
+ rescue StandardError
50
+ rollback_provider(provider) if created
51
+ raise
78
52
  end
79
- # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
80
53
 
81
- # Shutdown the tracer provider and flush any pending spans
54
+ # Shutdown the internal tracer provider and flush any pending spans.
82
55
  #
83
56
  # @param timeout [Integer] Timeout in seconds
84
57
  # @return [void]
85
58
  def shutdown(timeout: 30)
86
- return unless @tracer_provider
87
-
88
- @tracer_provider.shutdown(timeout: timeout)
89
- @tracer_provider = nil
59
+ provider = nil
60
+ setup_mutex.synchronize do
61
+ provider = @tracer_provider
62
+ @tracer_provider = nil
63
+ @config_snapshot = nil
64
+ end
65
+ provider&.shutdown(timeout: timeout)
90
66
  end
91
67
 
92
- # Force flush all pending spans
68
+ # Force flush all pending spans on the internal tracer provider.
93
69
  #
94
70
  # @param timeout [Integer] Timeout in seconds
95
71
  # @return [void]
96
72
  def force_flush(timeout: 30)
97
- return unless @tracer_provider
98
-
99
- @tracer_provider.force_flush(timeout: timeout)
73
+ @tracer_provider&.force_flush(timeout: timeout)
100
74
  end
101
75
 
102
- # Check if OTel is initialized
76
+ # Check if Langfuse tracing has been initialized.
103
77
  #
104
78
  # @return [Boolean]
105
79
  def initialized?
@@ -108,18 +82,104 @@ module Langfuse
108
82
 
109
83
  private
110
84
 
111
- # Build HTTP headers for Langfuse OTLP endpoint
112
- #
113
- # @param public_key [String] Langfuse public API key
114
- # @param secret_key [String] Langfuse secret API key
115
- # @return [Hash] HTTP headers with Basic Auth
85
+ def existing_provider_for(config)
86
+ snapshot = tracing_config_snapshot(config)
87
+ if @config_snapshot == snapshot
88
+ config.logger.debug("Langfuse tracing already initialized; reusing existing tracer provider")
89
+ else
90
+ config.logger.warn(
91
+ "Langfuse tracing is already initialized. Changes to #{TRACING_CONFIG_FIELDS.join(', ')} " \
92
+ "require Langfuse.reset! before they take effect."
93
+ )
94
+ end
95
+ @tracer_provider
96
+ end
97
+
98
+ def publish_provider(provider, snapshot)
99
+ created = false
100
+ current = nil
101
+
102
+ # This mutex only guards publication so setup never exposes a half-built provider.
103
+ setup_mutex.synchronize do
104
+ if @tracer_provider
105
+ current = @tracer_provider
106
+ else
107
+ @tracer_provider = provider
108
+ @config_snapshot = snapshot
109
+ current = provider
110
+ created = true
111
+ end
112
+ end
113
+
114
+ [current, created]
115
+ end
116
+
117
+ def rollback_provider(provider)
118
+ setup_mutex.synchronize do
119
+ return unless @tracer_provider.equal?(provider)
120
+
121
+ @tracer_provider = nil
122
+ @config_snapshot = nil
123
+ end
124
+ provider.shutdown(timeout: 1)
125
+ rescue StandardError
126
+ nil
127
+ end
128
+
129
+ def build_tracer_provider(config)
130
+ provider = OpenTelemetry::SDK::Trace::TracerProvider.new(
131
+ sampler: build_sampler(config.sample_rate)
132
+ )
133
+ provider.add_span_processor(
134
+ SpanProcessor.new(config: config, exporter: build_exporter(config))
135
+ )
136
+ provider
137
+ end
138
+
139
+ def build_exporter(config)
140
+ OpenTelemetry::Exporter::OTLP::Exporter.new(
141
+ endpoint: "#{config.base_url}/api/public/otel/v1/traces",
142
+ headers: build_headers(config.public_key, config.secret_key),
143
+ compression: "gzip"
144
+ )
145
+ end
146
+
147
+ def log_initialized(config)
148
+ mode = config.tracing_async ? "async" : "sync"
149
+ config.logger.info("Langfuse tracing initialized with OpenTelemetry (#{mode} mode)")
150
+ end
151
+
152
+ def validate_tracing_config!(config)
153
+ raise ConfigurationError, "public_key is required" if blank?(config.public_key)
154
+ raise ConfigurationError, "secret_key is required" if blank?(config.secret_key)
155
+ raise ConfigurationError, "base_url cannot be empty" if blank?(config.base_url)
156
+ return if config.should_export_span.nil? || config.should_export_span.respond_to?(:call)
157
+
158
+ raise ConfigurationError, "should_export_span must respond to #call"
159
+ end
160
+
161
+ def tracing_config_snapshot(config)
162
+ TRACING_CONFIG_FIELDS.to_h { |field| [field, config.public_send(field)] }.freeze
163
+ end
164
+
165
+ def setup_mutex
166
+ @setup_mutex ||= Mutex.new
167
+ end
168
+
169
+ def blank?(value)
170
+ value.nil? || value.empty?
171
+ end
172
+
116
173
  def build_headers(public_key, secret_key)
117
174
  credentials = "#{public_key}:#{secret_key}"
118
175
  encoded = Base64.strict_encode64(credentials)
119
- {
120
- "Authorization" => "Basic #{encoded}"
121
- }
176
+ { "Authorization" => "Basic #{encoded}" }
177
+ end
178
+
179
+ def build_sampler(sample_rate)
180
+ Sampling.build_sampler(sample_rate) || OpenTelemetry::SDK::Trace::Samplers::ALWAYS_ON
122
181
  end
123
182
  end
124
183
  end
184
+ # rubocop:enable Metrics/ModuleLength
125
185
  end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "mustache"
4
+
5
+ module Langfuse
6
+ # Renders prompt templates with Langfuse SDK-compatible variable semantics.
7
+ #
8
+ # @api private
9
+ class PromptRenderer < Mustache
10
+ # Langfuse variables are model input, not browser output; JS/Python SDKs substitute raw values.
11
+ #
12
+ # @param value [Object] Value to insert into the prompt
13
+ # @return [String] Raw string representation
14
+ def escape(value)
15
+ value.to_s
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langfuse
4
+ # Shared sampling helpers for trace and score emission.
5
+ #
6
+ # @api private
7
+ module Sampling
8
+ module_function
9
+
10
+ # Build the sampler used by both trace export and trace-linked score emission.
11
+ #
12
+ # @param sample_rate [Float] Sampling rate from 0.0 to 1.0
13
+ # @return [OpenTelemetry::SDK::Trace::Samplers::TraceIdRatioBased, nil]
14
+ def build_sampler(sample_rate)
15
+ return nil if sample_rate >= 1.0
16
+
17
+ OpenTelemetry::SDK::Trace::Samplers::TraceIdRatioBased.new(sample_rate)
18
+ end
19
+ end
20
+ end
@@ -31,6 +31,8 @@ module Langfuse
31
31
  # @return [Logger] Logger instance
32
32
  attr_reader :logger
33
33
 
34
+ HEX_TRACE_ID_PATTERN = /\A[0-9a-f]{32}\z/
35
+
34
36
  # Initialize a new ScoreClient
35
37
  #
36
38
  # @param api_client [ApiClient] The API client for sending batches
@@ -43,6 +45,9 @@ module Langfuse
43
45
  @mutex = Mutex.new
44
46
  @flush_thread = nil
45
47
  @shutdown = false
48
+ # Match the immutable tracing setup contract: once this client exists, later config
49
+ # mutations must not change score sampling without rebuilding the client.
50
+ @score_sampler = Sampling.build_sampler(config.sample_rate)
46
51
 
47
52
  start_flush_timer
48
53
  end
@@ -76,28 +81,19 @@ module Langfuse
76
81
  def create(name:, value:, id: nil, trace_id: nil, session_id: nil, observation_id: nil, comment: nil,
77
82
  metadata: nil, environment: nil, data_type: :numeric, dataset_run_id: nil, config_id: nil)
78
83
  validate_name(name)
79
- # Keep identifier policy server-side to preserve cross-SDK parity and avoid blocking valid future payloads.
80
84
  normalized_value = normalize_value(value, data_type)
81
85
  data_type_str = Types::SCORE_DATA_TYPES[data_type] || raise(ArgumentError, "Invalid data_type: #{data_type}")
82
86
 
87
+ return unless enqueue_trace_linked_score?(trace_id)
88
+
83
89
  event = build_score_event(
84
- name: name,
85
- value: normalized_value,
86
- id: id,
87
- trace_id: trace_id,
88
- session_id: session_id,
89
- observation_id: observation_id,
90
- comment: comment,
91
- metadata: metadata,
92
- environment: environment,
93
- data_type: data_type_str,
94
- dataset_run_id: dataset_run_id,
95
- config_id: config_id
90
+ name: name, value: normalized_value, id: id, trace_id: trace_id,
91
+ session_id: session_id, observation_id: observation_id, comment: comment,
92
+ metadata: metadata, environment: environment, data_type: data_type_str,
93
+ dataset_run_id: dataset_run_id, config_id: config_id
96
94
  )
97
95
 
98
96
  @queue << event
99
-
100
- # Trigger flush if batch size reached
101
97
  flush if @queue.size >= config.batch_size
102
98
  rescue StandardError => e
103
99
  logger.error("Langfuse score creation failed: #{e.message}")
@@ -294,14 +290,43 @@ module Langfuse
294
290
  # @return [Hash] Hash with :trace_id and :observation_id (may be nil)
295
291
  def extract_ids_from_active_span
296
292
  span = OpenTelemetry::Trace.current_span
297
- return { trace_id: nil, observation_id: nil } unless span&.recording?
293
+ span_context = span&.context
294
+ return { trace_id: nil, observation_id: nil } unless span_context&.valid?
298
295
 
299
296
  {
300
- trace_id: span.context.trace_id.unpack1("H*"),
301
- observation_id: span.context.span_id.unpack1("H*")
297
+ trace_id: span_context.trace_id.unpack1("H*"),
298
+ observation_id: span_context.span_id.unpack1("H*")
302
299
  }
303
300
  end
304
301
 
302
+ # Score sampling is decided purely by the configured sampler on the trace_id hash,
303
+ # matching langfuse-python. Non-hex trace ids and session/dataset-only scores bypass sampling.
304
+ def enqueue_trace_linked_score?(trace_id)
305
+ return true if trace_id.nil?
306
+ return true unless HEX_TRACE_ID_PATTERN.match?(trace_id)
307
+
308
+ sampler = score_sampler
309
+ return true if sampler.nil?
310
+ return true unless sampler.respond_to?(:should_sample?)
311
+
312
+ sample_result = sampler.should_sample?(
313
+ trace_id: [trace_id].pack("H*"),
314
+ parent_context: nil,
315
+ links: [],
316
+ name: "score",
317
+ kind: OpenTelemetry::Trace::SpanKind::INTERNAL,
318
+ attributes: {}
319
+ )
320
+ sample_result.sampled?
321
+ rescue StandardError => e
322
+ logger.warn("Langfuse score sampling fallback for trace_id=#{trace_id}: #{e.message}")
323
+ true
324
+ end
325
+
326
+ # Sampler is pinned at ScoreClient construction to match the "sample_rate requires reset!"
327
+ # contract and to keep each client's sampling scoped to its own config.
328
+ attr_reader :score_sampler
329
+
305
330
  # Send a batch of events to the API
306
331
  #
307
332
  # @param events [Array<Hash>] Array of event hashes
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langfuse
4
+ # Instrumentation scope name used by module-level Langfuse tracing.
5
+ LANGFUSE_TRACER_NAME = "langfuse-rb"
6
+
7
+ # Conservative allowlist of instrumentation scope prefixes that clearly belong to LLM workflows.
8
+ KNOWN_LLM_INSTRUMENTATION_SCOPE_PREFIXES = [
9
+ LANGFUSE_TRACER_NAME,
10
+ "agent_framework",
11
+ "ai",
12
+ "haystack",
13
+ "langsmith",
14
+ "litellm",
15
+ "openinference",
16
+ "opentelemetry.instrumentation.anthropic",
17
+ "strands-agents",
18
+ "vllm"
19
+ ].freeze
20
+
21
+ # Matched per span in the export path, so avoid allocating the dotted form each call.
22
+ KNOWN_LLM_INSTRUMENTATION_SCOPE_DOTTED_PREFIXES =
23
+ KNOWN_LLM_INSTRUMENTATION_SCOPE_PREFIXES.map { |prefix| "#{prefix}." }.freeze
24
+ private_constant :KNOWN_LLM_INSTRUMENTATION_SCOPE_DOTTED_PREFIXES
25
+
26
+ class << self
27
+ # Return whether the span was created by Langfuse's tracer.
28
+ #
29
+ # @param span [#instrumentation_scope] Span or span data to inspect
30
+ # @return [Boolean]
31
+ def langfuse_span?(span)
32
+ instrumentation_scope_name(span) == LANGFUSE_TRACER_NAME
33
+ end
34
+
35
+ # Return whether the span contains `gen_ai.*` attributes.
36
+ #
37
+ # @param span [#attributes] Span or span data to inspect
38
+ # @return [Boolean]
39
+ def genai_span?(span)
40
+ attributes = span.attributes
41
+ return false unless attributes
42
+
43
+ attributes.keys.any? { |key| key.is_a?(String) && key.start_with?("gen_ai.") }
44
+ end
45
+
46
+ # Return whether the span came from a known LLM instrumentation scope.
47
+ #
48
+ # @param span [#instrumentation_scope] Span or span data to inspect
49
+ # @return [Boolean]
50
+ def known_llm_instrumentor?(span)
51
+ scope_name = instrumentation_scope_name(span)
52
+ return false unless scope_name
53
+
54
+ return true if KNOWN_LLM_INSTRUMENTATION_SCOPE_PREFIXES.include?(scope_name)
55
+
56
+ KNOWN_LLM_INSTRUMENTATION_SCOPE_DOTTED_PREFIXES.any? do |dotted_prefix|
57
+ scope_name.start_with?(dotted_prefix)
58
+ end
59
+ end
60
+
61
+ # Return whether a span should be exported when no custom filter is configured.
62
+ #
63
+ # @param span [#instrumentation_scope, #attributes] Span or span data to inspect
64
+ # @return [Boolean]
65
+ def default_export_span?(span)
66
+ langfuse_span?(span) || genai_span?(span) || known_llm_instrumentor?(span)
67
+ end
68
+
69
+ # Cross-SDK parity keeps the `is_*` names public for compatibility.
70
+ alias is_langfuse_span langfuse_span?
71
+ alias is_genai_span genai_span?
72
+ alias is_known_llm_instrumentor known_llm_instrumentor?
73
+ alias is_default_export_span default_export_span?
74
+
75
+ private
76
+
77
+ def instrumentation_scope_name(span)
78
+ span.instrumentation_scope&.name
79
+ end
80
+ end
81
+ end