langfuse-rb 0.6.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,104 +2,78 @@
2
2
 
3
3
  require "opentelemetry/sdk"
4
4
  require "opentelemetry/exporter/otlp"
5
- require "opentelemetry/trace/propagation/trace_context"
6
5
  require "base64"
7
6
 
8
7
  module Langfuse
9
- # OpenTelemetry initialization and setup
10
- #
11
- # Handles configuration of the OTel SDK with Langfuse OTLP exporter
12
- # when tracing is enabled.
13
- #
8
+ # OpenTelemetry initialization and setup for Langfuse tracing.
9
+ # rubocop:disable Metrics/ModuleLength
14
10
  module OtelSetup
11
+ TRACING_CONFIG_FIELDS = %i[
12
+ public_key
13
+ secret_key
14
+ base_url
15
+ environment
16
+ release
17
+ sample_rate
18
+ should_export_span
19
+ tracing_async
20
+ batch_size
21
+ flush_interval
22
+ ].freeze
23
+ private_constant(:TRACING_CONFIG_FIELDS)
24
+
15
25
  class << self
16
- # @return [OpenTelemetry::SDK::Trace::TracerProvider, nil] The configured tracer provider
26
+ # @return [OpenTelemetry::SDK::Trace::TracerProvider, nil] The configured internal tracer provider
17
27
  attr_reader :tracer_provider
18
28
 
19
- # Initialize OpenTelemetry with Langfuse OTLP exporter
29
+ # Initialize Langfuse's internal tracer provider without mutating global OpenTelemetry state.
20
30
  #
21
31
  # @param config [Langfuse::Config] The Langfuse configuration
22
- # @return [void]
23
- # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
32
+ # @return [OpenTelemetry::SDK::Trace::TracerProvider]
24
33
  def setup(config)
25
- # Create OTLP exporter configured for Langfuse
26
- exporter = OpenTelemetry::Exporter::OTLP::Exporter.new(
27
- endpoint: "#{config.base_url}/api/public/otel/v1/traces",
28
- headers: build_headers(config.public_key, config.secret_key),
29
- compression: "gzip"
30
- )
31
-
32
- # Create processor based on async configuration
33
- # IMPORTANT: Always use BatchSpanProcessor (even in sync mode) to ensure spans
34
- # are exported together, which allows proper parent-child relationship detection
35
- processor = if config.tracing_async
36
- # Async: BatchSpanProcessor batches and sends in background
37
- OpenTelemetry::SDK::Trace::Export::BatchSpanProcessor.new(
38
- exporter,
39
- max_queue_size: config.batch_size * 2, # Buffer more than batch_size
40
- schedule_delay: config.flush_interval * 1000, # Convert seconds to milliseconds
41
- max_export_batch_size: config.batch_size
42
- )
43
- else
44
- # Sync: BatchSpanProcessor with minimal delay (flushes on force_flush)
45
- # This collects spans from the same trace and exports them together,
46
- # which is critical for correct parent_observation_id calculation
47
- OpenTelemetry::SDK::Trace::Export::BatchSpanProcessor.new(
48
- exporter,
49
- max_queue_size: config.batch_size * 2,
50
- schedule_delay: 60_000, # 60 seconds (relies on explicit force_flush)
51
- max_export_batch_size: config.batch_size
52
- )
53
- end
54
-
55
- # Create TracerProvider with processor
56
- @tracer_provider = OpenTelemetry::SDK::Trace::TracerProvider.new
57
- @tracer_provider.add_span_processor(processor)
58
-
59
- # Add span processor for propagated attributes and env/release defaults
60
- # This must be added AFTER the BatchSpanProcessor so it runs before export and can
61
- # apply all attributes (propagated IDs, environment, release) to the spans being sent
62
- span_processor = SpanProcessor.new(config: config)
63
- @tracer_provider.add_span_processor(span_processor)
64
-
65
- # Set as global tracer provider
66
- OpenTelemetry.tracer_provider = @tracer_provider
67
-
68
- # Configure W3C TraceContext propagator if not already set
69
- if OpenTelemetry.propagation.is_a?(OpenTelemetry::Context::Propagation::NoopTextMapPropagator)
70
- OpenTelemetry.propagation = OpenTelemetry::Trace::Propagation::TraceContext::TextMapPropagator.new
71
- config.logger.debug("Langfuse: Configured W3C TraceContext propagator")
72
- else
73
- config.logger.debug("Langfuse: Using existing propagator: #{OpenTelemetry.propagation.class}")
34
+ validate_tracing_config!(config)
35
+ return existing_provider_for(config) if initialized?
36
+
37
+ candidate_provider = nil
38
+ provider = nil
39
+ created = false
40
+ candidate_provider = build_tracer_provider(config)
41
+ provider, created = publish_provider(candidate_provider, tracing_config_snapshot(config))
42
+ unless created
43
+ candidate_provider.shutdown(timeout: 30)
44
+ return existing_provider_for(config)
74
45
  end
75
46
 
76
- mode = config.tracing_async ? "async" : "sync"
77
- config.logger.info("Langfuse tracing initialized with OpenTelemetry (#{mode} mode)")
47
+ log_initialized(config)
48
+ provider
49
+ rescue StandardError
50
+ rollback_provider(provider) if created
51
+ raise
78
52
  end
79
- # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
80
53
 
81
- # Shutdown the tracer provider and flush any pending spans
54
+ # Shutdown the internal tracer provider and flush any pending spans.
82
55
  #
83
56
  # @param timeout [Integer] Timeout in seconds
84
57
  # @return [void]
85
58
  def shutdown(timeout: 30)
86
- return unless @tracer_provider
87
-
88
- @tracer_provider.shutdown(timeout: timeout)
89
- @tracer_provider = nil
59
+ provider = nil
60
+ setup_mutex.synchronize do
61
+ provider = @tracer_provider
62
+ @tracer_provider = nil
63
+ @config_snapshot = nil
64
+ end
65
+ provider&.shutdown(timeout: timeout)
90
66
  end
91
67
 
92
- # Force flush all pending spans
68
+ # Force flush all pending spans on the internal tracer provider.
93
69
  #
94
70
  # @param timeout [Integer] Timeout in seconds
95
71
  # @return [void]
96
72
  def force_flush(timeout: 30)
97
- return unless @tracer_provider
98
-
99
- @tracer_provider.force_flush(timeout: timeout)
73
+ @tracer_provider&.force_flush(timeout: timeout)
100
74
  end
101
75
 
102
- # Check if OTel is initialized
76
+ # Check if Langfuse tracing has been initialized.
103
77
  #
104
78
  # @return [Boolean]
105
79
  def initialized?
@@ -108,18 +82,104 @@ module Langfuse
108
82
 
109
83
  private
110
84
 
111
- # Build HTTP headers for Langfuse OTLP endpoint
112
- #
113
- # @param public_key [String] Langfuse public API key
114
- # @param secret_key [String] Langfuse secret API key
115
- # @return [Hash] HTTP headers with Basic Auth
85
+ def existing_provider_for(config)
86
+ snapshot = tracing_config_snapshot(config)
87
+ if @config_snapshot == snapshot
88
+ config.logger.debug("Langfuse tracing already initialized; reusing existing tracer provider")
89
+ else
90
+ config.logger.warn(
91
+ "Langfuse tracing is already initialized. Changes to #{TRACING_CONFIG_FIELDS.join(', ')} " \
92
+ "require Langfuse.reset! before they take effect."
93
+ )
94
+ end
95
+ @tracer_provider
96
+ end
97
+
98
+ def publish_provider(provider, snapshot)
99
+ created = false
100
+ current = nil
101
+
102
+ # This mutex only guards publication so setup never exposes a half-built provider.
103
+ setup_mutex.synchronize do
104
+ if @tracer_provider
105
+ current = @tracer_provider
106
+ else
107
+ @tracer_provider = provider
108
+ @config_snapshot = snapshot
109
+ current = provider
110
+ created = true
111
+ end
112
+ end
113
+
114
+ [current, created]
115
+ end
116
+
117
+ def rollback_provider(provider)
118
+ setup_mutex.synchronize do
119
+ return unless @tracer_provider.equal?(provider)
120
+
121
+ @tracer_provider = nil
122
+ @config_snapshot = nil
123
+ end
124
+ provider.shutdown(timeout: 1)
125
+ rescue StandardError
126
+ nil
127
+ end
128
+
129
+ def build_tracer_provider(config)
130
+ provider = OpenTelemetry::SDK::Trace::TracerProvider.new(
131
+ sampler: build_sampler(config.sample_rate)
132
+ )
133
+ provider.add_span_processor(
134
+ SpanProcessor.new(config: config, exporter: build_exporter(config))
135
+ )
136
+ provider
137
+ end
138
+
139
+ def build_exporter(config)
140
+ OpenTelemetry::Exporter::OTLP::Exporter.new(
141
+ endpoint: "#{config.base_url}/api/public/otel/v1/traces",
142
+ headers: build_headers(config.public_key, config.secret_key),
143
+ compression: "gzip"
144
+ )
145
+ end
146
+
147
+ def log_initialized(config)
148
+ mode = config.tracing_async ? "async" : "sync"
149
+ config.logger.info("Langfuse tracing initialized with OpenTelemetry (#{mode} mode)")
150
+ end
151
+
152
+ def validate_tracing_config!(config)
153
+ raise ConfigurationError, "public_key is required" if blank?(config.public_key)
154
+ raise ConfigurationError, "secret_key is required" if blank?(config.secret_key)
155
+ raise ConfigurationError, "base_url cannot be empty" if blank?(config.base_url)
156
+ return if config.should_export_span.nil? || config.should_export_span.respond_to?(:call)
157
+
158
+ raise ConfigurationError, "should_export_span must respond to #call"
159
+ end
160
+
161
+ def tracing_config_snapshot(config)
162
+ TRACING_CONFIG_FIELDS.to_h { |field| [field, config.public_send(field)] }.freeze
163
+ end
164
+
165
+ def setup_mutex
166
+ @setup_mutex ||= Mutex.new
167
+ end
168
+
169
+ def blank?(value)
170
+ value.nil? || value.empty?
171
+ end
172
+
116
173
  def build_headers(public_key, secret_key)
117
174
  credentials = "#{public_key}:#{secret_key}"
118
175
  encoded = Base64.strict_encode64(credentials)
119
- {
120
- "Authorization" => "Basic #{encoded}"
121
- }
176
+ { "Authorization" => "Basic #{encoded}" }
177
+ end
178
+
179
+ def build_sampler(sample_rate)
180
+ Sampling.build_sampler(sample_rate) || OpenTelemetry::SDK::Trace::Samplers::ALWAYS_ON
122
181
  end
123
182
  end
124
183
  end
184
+ # rubocop:enable Metrics/ModuleLength
125
185
  end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langfuse
4
+ # Shared sampling helpers for trace and score emission.
5
+ #
6
+ # @api private
7
+ module Sampling
8
+ module_function
9
+
10
+ # Build the sampler used by both trace export and trace-linked score emission.
11
+ #
12
+ # @param sample_rate [Float] Sampling rate from 0.0 to 1.0
13
+ # @return [OpenTelemetry::SDK::Trace::Samplers::TraceIdRatioBased, nil]
14
+ def build_sampler(sample_rate)
15
+ return nil if sample_rate >= 1.0
16
+
17
+ OpenTelemetry::SDK::Trace::Samplers::TraceIdRatioBased.new(sample_rate)
18
+ end
19
+ end
20
+ end
@@ -31,6 +31,8 @@ module Langfuse
31
31
  # @return [Logger] Logger instance
32
32
  attr_reader :logger
33
33
 
34
+ HEX_TRACE_ID_PATTERN = /\A[0-9a-f]{32}\z/
35
+
34
36
  # Initialize a new ScoreClient
35
37
  #
36
38
  # @param api_client [ApiClient] The API client for sending batches
@@ -43,6 +45,9 @@ module Langfuse
43
45
  @mutex = Mutex.new
44
46
  @flush_thread = nil
45
47
  @shutdown = false
48
+ # Match the immutable tracing setup contract: once this client exists, later config
49
+ # mutations must not change score sampling without rebuilding the client.
50
+ @score_sampler = Sampling.build_sampler(config.sample_rate)
46
51
 
47
52
  start_flush_timer
48
53
  end
@@ -76,28 +81,19 @@ module Langfuse
76
81
  def create(name:, value:, id: nil, trace_id: nil, session_id: nil, observation_id: nil, comment: nil,
77
82
  metadata: nil, environment: nil, data_type: :numeric, dataset_run_id: nil, config_id: nil)
78
83
  validate_name(name)
79
- # Keep identifier policy server-side to preserve cross-SDK parity and avoid blocking valid future payloads.
80
84
  normalized_value = normalize_value(value, data_type)
81
85
  data_type_str = Types::SCORE_DATA_TYPES[data_type] || raise(ArgumentError, "Invalid data_type: #{data_type}")
82
86
 
87
+ return unless enqueue_trace_linked_score?(trace_id)
88
+
83
89
  event = build_score_event(
84
- name: name,
85
- value: normalized_value,
86
- id: id,
87
- trace_id: trace_id,
88
- session_id: session_id,
89
- observation_id: observation_id,
90
- comment: comment,
91
- metadata: metadata,
92
- environment: environment,
93
- data_type: data_type_str,
94
- dataset_run_id: dataset_run_id,
95
- config_id: config_id
90
+ name: name, value: normalized_value, id: id, trace_id: trace_id,
91
+ session_id: session_id, observation_id: observation_id, comment: comment,
92
+ metadata: metadata, environment: environment, data_type: data_type_str,
93
+ dataset_run_id: dataset_run_id, config_id: config_id
96
94
  )
97
95
 
98
96
  @queue << event
99
-
100
- # Trigger flush if batch size reached
101
97
  flush if @queue.size >= config.batch_size
102
98
  rescue StandardError => e
103
99
  logger.error("Langfuse score creation failed: #{e.message}")
@@ -294,14 +290,43 @@ module Langfuse
294
290
  # @return [Hash] Hash with :trace_id and :observation_id (may be nil)
295
291
  def extract_ids_from_active_span
296
292
  span = OpenTelemetry::Trace.current_span
297
- return { trace_id: nil, observation_id: nil } unless span&.recording?
293
+ span_context = span&.context
294
+ return { trace_id: nil, observation_id: nil } unless span_context&.valid?
298
295
 
299
296
  {
300
- trace_id: span.context.trace_id.unpack1("H*"),
301
- observation_id: span.context.span_id.unpack1("H*")
297
+ trace_id: span_context.trace_id.unpack1("H*"),
298
+ observation_id: span_context.span_id.unpack1("H*")
302
299
  }
303
300
  end
304
301
 
302
+ # Score sampling is decided purely by the configured sampler on the trace_id hash,
303
+ # matching langfuse-python. Non-hex trace ids and session/dataset-only scores bypass sampling.
304
+ def enqueue_trace_linked_score?(trace_id)
305
+ return true if trace_id.nil?
306
+ return true unless HEX_TRACE_ID_PATTERN.match?(trace_id)
307
+
308
+ sampler = score_sampler
309
+ return true if sampler.nil?
310
+ return true unless sampler.respond_to?(:should_sample?)
311
+
312
+ sample_result = sampler.should_sample?(
313
+ trace_id: [trace_id].pack("H*"),
314
+ parent_context: nil,
315
+ links: [],
316
+ name: "score",
317
+ kind: OpenTelemetry::Trace::SpanKind::INTERNAL,
318
+ attributes: {}
319
+ )
320
+ sample_result.sampled?
321
+ rescue StandardError => e
322
+ logger.warn("Langfuse score sampling fallback for trace_id=#{trace_id}: #{e.message}")
323
+ true
324
+ end
325
+
326
+ # Sampler is pinned at ScoreClient construction to match the "sample_rate requires reset!"
327
+ # contract and to keep each client's sampling scoped to its own config.
328
+ attr_reader :score_sampler
329
+
305
330
  # Send a batch of events to the API
306
331
  #
307
332
  # @param events [Array<Hash>] Array of event hashes
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langfuse
4
+ # Instrumentation scope name used by module-level Langfuse tracing.
5
+ LANGFUSE_TRACER_NAME = "langfuse-rb"
6
+
7
+ # Conservative allowlist of instrumentation scope prefixes that clearly belong to LLM workflows.
8
+ KNOWN_LLM_INSTRUMENTATION_SCOPE_PREFIXES = [
9
+ LANGFUSE_TRACER_NAME,
10
+ "agent_framework",
11
+ "ai",
12
+ "haystack",
13
+ "langsmith",
14
+ "litellm",
15
+ "openinference",
16
+ "opentelemetry.instrumentation.anthropic",
17
+ "strands-agents",
18
+ "vllm"
19
+ ].freeze
20
+
21
+ # Matched per span in the export path, so avoid allocating the dotted form each call.
22
+ KNOWN_LLM_INSTRUMENTATION_SCOPE_DOTTED_PREFIXES =
23
+ KNOWN_LLM_INSTRUMENTATION_SCOPE_PREFIXES.map { |prefix| "#{prefix}." }.freeze
24
+ private_constant :KNOWN_LLM_INSTRUMENTATION_SCOPE_DOTTED_PREFIXES
25
+
26
+ class << self
27
+ # Return whether the span was created by Langfuse's tracer.
28
+ #
29
+ # @param span [#instrumentation_scope] Span or span data to inspect
30
+ # @return [Boolean]
31
+ def langfuse_span?(span)
32
+ instrumentation_scope_name(span) == LANGFUSE_TRACER_NAME
33
+ end
34
+
35
+ # Return whether the span contains `gen_ai.*` attributes.
36
+ #
37
+ # @param span [#attributes] Span or span data to inspect
38
+ # @return [Boolean]
39
+ def genai_span?(span)
40
+ attributes = span.attributes
41
+ return false unless attributes
42
+
43
+ attributes.keys.any? { |key| key.is_a?(String) && key.start_with?("gen_ai.") }
44
+ end
45
+
46
+ # Return whether the span came from a known LLM instrumentation scope.
47
+ #
48
+ # @param span [#instrumentation_scope] Span or span data to inspect
49
+ # @return [Boolean]
50
+ def known_llm_instrumentor?(span)
51
+ scope_name = instrumentation_scope_name(span)
52
+ return false unless scope_name
53
+
54
+ return true if KNOWN_LLM_INSTRUMENTATION_SCOPE_PREFIXES.include?(scope_name)
55
+
56
+ KNOWN_LLM_INSTRUMENTATION_SCOPE_DOTTED_PREFIXES.any? do |dotted_prefix|
57
+ scope_name.start_with?(dotted_prefix)
58
+ end
59
+ end
60
+
61
+ # Return whether a span should be exported when no custom filter is configured.
62
+ #
63
+ # @param span [#instrumentation_scope, #attributes] Span or span data to inspect
64
+ # @return [Boolean]
65
+ def default_export_span?(span)
66
+ langfuse_span?(span) || genai_span?(span) || known_llm_instrumentor?(span)
67
+ end
68
+
69
+ # Cross-SDK parity keeps the `is_*` names public for compatibility.
70
+ alias is_langfuse_span langfuse_span?
71
+ alias is_genai_span genai_span?
72
+ alias is_known_llm_instrumentor known_llm_instrumentor?
73
+ alias is_default_export_span default_export_span?
74
+
75
+ private
76
+
77
+ def instrumentation_scope_name(span)
78
+ span.instrumentation_scope&.name
79
+ end
80
+ end
81
+ end
@@ -3,22 +3,26 @@
3
3
  require "opentelemetry/sdk"
4
4
 
5
5
  module Langfuse
6
- # Span processor that applies default and propagated trace attributes on new spans.
7
- #
8
- # On span start, this processor first applies configured trace defaults
9
- # (environment/release), then overlays attributes propagated in OpenTelemetry
10
- # context (user/session/metadata/tags/version). This ensures consistent
11
- # trace dimensions while still honoring per-request propagation.
6
+ # Batch span processor that owns Langfuse's enrichment and export filtering.
12
7
  #
13
8
  # @api private
14
- class SpanProcessor < OpenTelemetry::SDK::Trace::SpanProcessor
15
- # @param config [Langfuse::Config, nil] SDK configuration used to build trace defaults
16
- def initialize(config: Langfuse.configuration)
9
+ class SpanProcessor < OpenTelemetry::SDK::Trace::Export::BatchSpanProcessor
10
+ # @param config [Langfuse::Config] SDK configuration used for defaults and filtering
11
+ # @param exporter [#export, #force_flush, #shutdown] Span exporter used by the batch processor
12
+ def initialize(config:, exporter:)
13
+ @logger = config.logger
17
14
  @default_trace_attributes = build_default_trace_attributes(config).freeze
18
- super()
15
+ @should_export_span = config.should_export_span || Langfuse.method(:default_export_span?)
16
+
17
+ super(
18
+ exporter,
19
+ max_queue_size: config.batch_size * 2,
20
+ schedule_delay: schedule_delay_for(config),
21
+ max_export_batch_size: config.batch_size
22
+ )
19
23
  end
20
24
 
21
- # Called when a span starts
25
+ # Apply Langfuse trace defaults and propagated attributes before a span records work.
22
26
  #
23
27
  # @param span [OpenTelemetry::SDK::Trace::Span] The span that started
24
28
  # @param parent_context [OpenTelemetry::Context] The parent context
@@ -30,41 +34,28 @@ module Langfuse
30
34
  apply_attributes(span, propagated_attributes(parent_context))
31
35
  end
32
36
 
33
- # Called when a span ends
37
+ # Drop spans when the export filter rejects them or raises.
34
38
  #
35
39
  # @param span [OpenTelemetry::SDK::Trace::Span] The span that ended
36
40
  # @return [void]
37
41
  def on_finish(span)
38
- # No-op - we don't need to do anything when spans finish
39
- end
42
+ return unless should_export_span?(span)
40
43
 
41
- # Shutdown the processor
42
- #
43
- # @param timeout [Integer, nil] Timeout in seconds (unused for this processor)
44
- # @return [Integer] Always returns 0 (no timeout needed for no-op)
45
- def shutdown(timeout: nil)
46
- # No-op - nothing to clean up
47
- # Return 0 to match OpenTelemetry SDK expectation (it finds max timeout from processors)
48
- _ = timeout # Suppress unused argument warning
49
- 0
50
- end
51
-
52
- # Force flush (no-op for this processor)
53
- #
54
- # @param timeout [Integer, nil] Timeout in seconds (unused for this processor)
55
- # @return [Integer] Always returns 0 (no timeout needed for no-op)
56
- def force_flush(timeout: nil)
57
- # No-op - nothing to flush
58
- # Return 0 to match OpenTelemetry SDK expectation (it finds max timeout from processors)
59
- _ = timeout # Suppress unused argument warning
60
- 0
44
+ super
61
45
  end
62
46
 
63
47
  private
64
48
 
65
- def build_default_trace_attributes(config)
66
- return {} unless config
49
+ # Sync mode relies on explicit `force_flush` calls, so keep the background flush
50
+ # interval long enough that it rarely fires on its own.
51
+ SYNC_SCHEDULE_DELAY_MS = 60_000
52
+ private_constant :SYNC_SCHEDULE_DELAY_MS
53
+
54
+ def schedule_delay_for(config)
55
+ config.tracing_async ? config.flush_interval * 1000 : SYNC_SCHEDULE_DELAY_MS
56
+ end
67
57
 
58
+ def build_default_trace_attributes(config)
68
59
  OtelAttributes.create_trace_attributes(
69
60
  { environment: config.environment, release: config.release }
70
61
  )
@@ -79,5 +70,15 @@ module Langfuse
79
70
  def apply_attributes(span, attributes)
80
71
  attributes.each { |key, value| span.set_attribute(key, value) }
81
72
  end
73
+
74
+ def should_export_span?(span)
75
+ @should_export_span.call(span)
76
+ rescue StandardError => e
77
+ @logger.error(
78
+ "Langfuse tracing dropped span '#{span.name}' because should_export_span raised: " \
79
+ "#{e.class}: #{e.message}"
80
+ )
81
+ false
82
+ end
82
83
  end
83
84
  end
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "digest"
4
+
5
+ module Langfuse
6
+ # Deterministic and random trace/observation ID generation.
7
+ #
8
+ # Mirrors the Python and JS SDK helpers so the same seed produces the same
9
+ # trace ID across all three SDKs. This lets callers correlate Langfuse traces
10
+ # with external system identifiers (database primary keys, request IDs, etc.)
11
+ # and score or reference traces later without having to persist the generated
12
+ # Langfuse ID.
13
+ #
14
+ # @example Deterministic from an external ID
15
+ # trace_id = Langfuse::TraceId.create(seed: "order-12345")
16
+ # Langfuse.observe("process-order", trace_id: trace_id) { |span| ... }
17
+ # Langfuse.create_score(name: "quality", value: 0.9, trace_id: trace_id)
18
+ #
19
+ # @example Random (no seed)
20
+ # trace_id = Langfuse::TraceId.create
21
+ module TraceId
22
+ TRACE_ID_PATTERN = /\A[0-9a-f]{32}\z/
23
+ INVALID_TRACE_ID = ("0" * 32)
24
+
25
+ private_constant :TRACE_ID_PATTERN, :INVALID_TRACE_ID
26
+
27
+ class << self
28
+ # Generate a W3C trace ID (32 lowercase hex chars).
29
+ #
30
+ # With no seed, delegates to OpenTelemetry's random trace ID generator.
31
+ # With a seed, takes the first 16 bytes of SHA-256(seed) so the same
32
+ # input always produces the same trace ID.
33
+ #
34
+ # @note Avoid passing PII, secrets, or credentials as seeds — the raw seed
35
+ # value appears in application code and may leak through logs/backtraces.
36
+ # Use stable external identifiers (database PKs, UUIDs, request IDs).
37
+ # @param seed [String, nil] Optional seed for deterministic generation.
38
+ # Must be a String if provided; non-String values raise ArgumentError
39
+ # for cross-SDK parity (Python/JS both reject non-strings).
40
+ # @return [String] 32-character lowercase hex trace ID
41
+ # @raise [ArgumentError] if seed is not nil and not a String
42
+ def create(seed: nil)
43
+ return OpenTelemetry::Trace.generate_trace_id.unpack1("H*") if seed.nil?
44
+
45
+ Digest::SHA256.digest(validate_seed!(seed))[0, 16].unpack1("H*")
46
+ end
47
+
48
+ private
49
+
50
+ # @api private
51
+ def validate_seed!(seed)
52
+ raise ArgumentError, "seed must be a String, got #{seed.class}" unless seed.is_a?(String)
53
+
54
+ # ASCII-8BIT strings (binary) often already hold valid UTF-8 bytes
55
+ # but can't be transcoded — re-tag them instead.
56
+ return seed.dup.force_encoding("UTF-8") if seed.encoding == Encoding::ASCII_8BIT
57
+
58
+ seed.encode("UTF-8")
59
+ end
60
+
61
+ # @api private
62
+ def valid?(trace_id)
63
+ return false unless trace_id.is_a?(String) && TRACE_ID_PATTERN.match?(trace_id)
64
+
65
+ trace_id != INVALID_TRACE_ID
66
+ end
67
+
68
+ # Build a sampled OpenTelemetry SpanContext carrying the given hex trace ID.
69
+ #
70
+ # A random span_id is generated as a placeholder — only the trace_id is
71
+ # consumed by the child span that gets created.
72
+ #
73
+ # @api private
74
+ def to_span_context(trace_id)
75
+ raise ArgumentError, "Invalid trace_id: #{trace_id.inspect}" unless valid?(trace_id)
76
+
77
+ OpenTelemetry::Trace::SpanContext.new(
78
+ trace_id: [trace_id].pack("H*"),
79
+ span_id: OpenTelemetry::Trace.generate_span_id,
80
+ trace_flags: OpenTelemetry::Trace::TraceFlags::SAMPLED,
81
+ # Cross-SDK parity: Python uses is_remote=False (_create_remote_parent_span).
82
+ # Changing this would alter ParentBased sampler behavior across SDKs.
83
+ remote: false
84
+ )
85
+ end
86
+ end
87
+ end
88
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langfuse
4
- VERSION = "0.6.0"
4
+ VERSION = "0.8.0"
5
5
  end