datadog 2.22.0 → 2.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +59 -2
- data/ext/LIBDATADOG_DEVELOPMENT.md +1 -58
- data/ext/datadog_profiling_native_extension/collectors_stack.c +4 -0
- data/ext/datadog_profiling_native_extension/datadog_ruby_common.h +1 -1
- data/ext/datadog_profiling_native_extension/extconf.rb +6 -4
- data/ext/datadog_profiling_native_extension/heap_recorder.c +1 -1
- data/ext/libdatadog_api/datadog_ruby_common.h +1 -1
- data/ext/libdatadog_api/feature_flags.c +554 -0
- data/ext/libdatadog_api/feature_flags.h +5 -0
- data/ext/libdatadog_api/init.c +2 -0
- data/ext/libdatadog_api/library_config.c +12 -11
- data/ext/libdatadog_extconf_helpers.rb +1 -1
- data/lib/datadog/appsec/api_security/route_extractor.rb +23 -6
- data/lib/datadog/appsec/api_security/sampler.rb +7 -4
- data/lib/datadog/appsec/assets/blocked.html +8 -0
- data/lib/datadog/appsec/assets/blocked.json +1 -1
- data/lib/datadog/appsec/assets/blocked.text +3 -1
- data/lib/datadog/appsec/assets.rb +1 -1
- data/lib/datadog/appsec/remote.rb +4 -0
- data/lib/datadog/appsec/response.rb +18 -4
- data/lib/datadog/core/configuration/components.rb +30 -3
- data/lib/datadog/core/configuration/config_helper.rb +1 -1
- data/lib/datadog/core/configuration/settings.rb +14 -0
- data/lib/datadog/core/configuration/supported_configurations.rb +330 -301
- data/lib/datadog/core/ddsketch.rb +0 -2
- data/lib/datadog/core/environment/ext.rb +6 -0
- data/lib/datadog/core/environment/process.rb +79 -0
- data/lib/datadog/core/feature_flags.rb +61 -0
- data/lib/datadog/core/remote/client/capabilities.rb +7 -0
- data/lib/datadog/core/remote/transport/config.rb +2 -10
- data/lib/datadog/core/remote/transport/http/config.rb +9 -9
- data/lib/datadog/core/remote/transport/http/negotiation.rb +17 -8
- data/lib/datadog/core/remote/transport/http.rb +2 -0
- data/lib/datadog/core/remote/transport/negotiation.rb +2 -18
- data/lib/datadog/core/remote/worker.rb +25 -37
- data/lib/datadog/core/tag_builder.rb +0 -4
- data/lib/datadog/core/tag_normalizer.rb +84 -0
- data/lib/datadog/core/telemetry/component.rb +7 -3
- data/lib/datadog/core/telemetry/event/app_started.rb +52 -49
- data/lib/datadog/core/telemetry/event/synth_app_client_configuration_change.rb +1 -1
- data/lib/datadog/core/telemetry/logger.rb +2 -2
- data/lib/datadog/core/telemetry/logging.rb +2 -8
- data/lib/datadog/core/telemetry/transport/http/telemetry.rb +5 -6
- data/lib/datadog/core/telemetry/transport/telemetry.rb +1 -2
- data/lib/datadog/core/transport/http/client.rb +69 -0
- data/lib/datadog/core/utils/array.rb +29 -0
- data/lib/datadog/{appsec/api_security → core/utils}/lru_cache.rb +10 -21
- data/lib/datadog/core/utils/network.rb +3 -1
- data/lib/datadog/core/utils/only_once_successful.rb +6 -2
- data/lib/datadog/core/utils.rb +2 -0
- data/lib/datadog/data_streams/configuration/settings.rb +49 -0
- data/lib/datadog/data_streams/configuration.rb +11 -0
- data/lib/datadog/data_streams/ext.rb +11 -0
- data/lib/datadog/data_streams/extensions.rb +16 -0
- data/lib/datadog/data_streams/pathway_context.rb +169 -0
- data/lib/datadog/data_streams/processor.rb +509 -0
- data/lib/datadog/data_streams/transport/http/api.rb +33 -0
- data/lib/datadog/data_streams/transport/http/client.rb +21 -0
- data/lib/datadog/data_streams/transport/http/stats.rb +87 -0
- data/lib/datadog/data_streams/transport/http.rb +41 -0
- data/lib/datadog/data_streams/transport/stats.rb +60 -0
- data/lib/datadog/data_streams.rb +100 -0
- data/lib/datadog/di/component.rb +0 -16
- data/lib/datadog/di/el/evaluator.rb +1 -1
- data/lib/datadog/di/error.rb +4 -0
- data/lib/datadog/di/instrumenter.rb +76 -30
- data/lib/datadog/di/probe.rb +20 -0
- data/lib/datadog/di/probe_manager.rb +10 -2
- data/lib/datadog/di/probe_notification_builder.rb +62 -23
- data/lib/datadog/di/proc_responder.rb +32 -0
- data/lib/datadog/di/transport/diagnostics.rb +2 -2
- data/lib/datadog/di/transport/http/diagnostics.rb +2 -4
- data/lib/datadog/di/transport/http/input.rb +2 -4
- data/lib/datadog/di/transport/http.rb +6 -2
- data/lib/datadog/di/transport/input.rb +64 -4
- data/lib/datadog/open_feature/component.rb +60 -0
- data/lib/datadog/open_feature/configuration.rb +27 -0
- data/lib/datadog/open_feature/evaluation_engine.rb +69 -0
- data/lib/datadog/open_feature/exposures/batch_builder.rb +32 -0
- data/lib/datadog/open_feature/exposures/buffer.rb +43 -0
- data/lib/datadog/open_feature/exposures/deduplicator.rb +30 -0
- data/lib/datadog/open_feature/exposures/event.rb +60 -0
- data/lib/datadog/open_feature/exposures/reporter.rb +40 -0
- data/lib/datadog/open_feature/exposures/worker.rb +116 -0
- data/lib/datadog/open_feature/ext.rb +14 -0
- data/lib/datadog/open_feature/native_evaluator.rb +38 -0
- data/lib/datadog/open_feature/noop_evaluator.rb +26 -0
- data/lib/datadog/open_feature/provider.rb +141 -0
- data/lib/datadog/open_feature/remote.rb +74 -0
- data/lib/datadog/open_feature/resolution_details.rb +35 -0
- data/lib/datadog/open_feature/transport.rb +72 -0
- data/lib/datadog/open_feature.rb +19 -0
- data/lib/datadog/opentelemetry/configuration/settings.rb +159 -0
- data/lib/datadog/opentelemetry/metrics.rb +110 -0
- data/lib/datadog/opentelemetry/sdk/configurator.rb +25 -1
- data/lib/datadog/opentelemetry/sdk/metrics_exporter.rb +38 -0
- data/lib/datadog/opentelemetry.rb +3 -0
- data/lib/datadog/profiling/collectors/code_provenance.rb +15 -6
- data/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +1 -1
- data/lib/datadog/profiling/collectors/idle_sampling_helper.rb +1 -1
- data/lib/datadog/profiling/profiler.rb +4 -0
- data/lib/datadog/profiling/tag_builder.rb +36 -3
- data/lib/datadog/profiling.rb +1 -2
- data/lib/datadog/single_step_instrument.rb +1 -1
- data/lib/datadog/tracing/configuration/ext.rb +9 -0
- data/lib/datadog/tracing/configuration/settings.rb +74 -0
- data/lib/datadog/tracing/contrib/action_pack/action_controller/instrumentation.rb +4 -4
- data/lib/datadog/tracing/contrib/action_pack/utils.rb +1 -2
- data/lib/datadog/tracing/contrib/active_job/log_injection.rb +21 -7
- data/lib/datadog/tracing/contrib/active_job/patcher.rb +5 -1
- data/lib/datadog/tracing/contrib/aws/instrumentation.rb +4 -2
- data/lib/datadog/tracing/contrib/ethon/easy_patch.rb +4 -1
- data/lib/datadog/tracing/contrib/excon/configuration/settings.rb +11 -3
- data/lib/datadog/tracing/contrib/faraday/configuration/settings.rb +11 -7
- data/lib/datadog/tracing/contrib/grape/configuration/settings.rb +7 -3
- data/lib/datadog/tracing/contrib/graphql/unified_trace.rb +22 -17
- data/lib/datadog/tracing/contrib/http/configuration/settings.rb +11 -3
- data/lib/datadog/tracing/contrib/httpclient/configuration/settings.rb +11 -3
- data/lib/datadog/tracing/contrib/httprb/configuration/settings.rb +11 -3
- data/lib/datadog/tracing/contrib/kafka/instrumentation/consumer.rb +66 -0
- data/lib/datadog/tracing/contrib/kafka/instrumentation/producer.rb +66 -0
- data/lib/datadog/tracing/contrib/kafka/patcher.rb +14 -0
- data/lib/datadog/tracing/contrib/karafka/framework.rb +30 -0
- data/lib/datadog/tracing/contrib/karafka/monitor.rb +11 -0
- data/lib/datadog/tracing/contrib/karafka/patcher.rb +32 -0
- data/lib/datadog/tracing/contrib/rack/middlewares.rb +59 -27
- data/lib/datadog/tracing/contrib/rack/route_inference.rb +53 -0
- data/lib/datadog/tracing/contrib/rails/middlewares.rb +2 -2
- data/lib/datadog/tracing/contrib/rest_client/request_patch.rb +4 -1
- data/lib/datadog/tracing/contrib/roda/instrumentation.rb +3 -1
- data/lib/datadog/tracing/contrib/sinatra/tracer_middleware.rb +3 -1
- data/lib/datadog/tracing/contrib/status_range_matcher.rb +7 -0
- data/lib/datadog/tracing/contrib/waterdrop/configuration/settings.rb +27 -0
- data/lib/datadog/tracing/contrib/waterdrop/distributed/propagation.rb +48 -0
- data/lib/datadog/tracing/contrib/waterdrop/ext.rb +17 -0
- data/lib/datadog/tracing/contrib/waterdrop/integration.rb +43 -0
- data/lib/datadog/tracing/contrib/waterdrop/middleware.rb +46 -0
- data/lib/datadog/tracing/contrib/waterdrop/patcher.rb +46 -0
- data/lib/datadog/tracing/contrib/waterdrop/producer.rb +50 -0
- data/lib/datadog/tracing/contrib/waterdrop.rb +37 -0
- data/lib/datadog/tracing/contrib.rb +1 -0
- data/lib/datadog/tracing/metadata/ext.rb +1 -1
- data/lib/datadog/tracing/transport/http/client.rb +12 -26
- data/lib/datadog/tracing/transport/trace_formatter.rb +11 -0
- data/lib/datadog/tracing/transport/traces.rb +3 -5
- data/lib/datadog/version.rb +2 -2
- data/lib/datadog.rb +2 -0
- metadata +78 -15
- data/lib/datadog/core/remote/transport/http/client.rb +0 -49
- data/lib/datadog/core/telemetry/transport/http/client.rb +0 -49
- data/lib/datadog/di/transport/http/client.rb +0 -47
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'stringio'
|
|
4
|
+
require_relative '../core/utils/base64'
|
|
5
|
+
|
|
6
|
+
module Datadog
|
|
7
|
+
module DataStreams
|
|
8
|
+
# Represents a pathway context for data streams monitoring
|
|
9
|
+
class PathwayContext
|
|
10
|
+
# The current pathway hash value (result of FNV-1a hash function)
|
|
11
|
+
attr_accessor :hash
|
|
12
|
+
# When the pathway started
|
|
13
|
+
attr_accessor :pathway_start
|
|
14
|
+
# When the current edge started
|
|
15
|
+
attr_accessor :current_edge_start
|
|
16
|
+
# The hash value of the parent checkpoint
|
|
17
|
+
attr_accessor :parent_hash
|
|
18
|
+
# The direction tag of the previous checkpoint (e.g., 'direction:in', 'direction:out'), or nil if none
|
|
19
|
+
attr_accessor :previous_direction
|
|
20
|
+
# Hash value of the closest checkpoint in opposite direction (used for loop detection)
|
|
21
|
+
attr_accessor :closest_opposite_direction_hash
|
|
22
|
+
# Edge start time of the closest opposite direction checkpoint
|
|
23
|
+
attr_accessor :closest_opposite_direction_edge_start
|
|
24
|
+
|
|
25
|
+
def initialize(hash_value:, pathway_start:, current_edge_start:)
|
|
26
|
+
@hash = hash_value
|
|
27
|
+
@pathway_start = pathway_start
|
|
28
|
+
@current_edge_start = current_edge_start
|
|
29
|
+
@parent_hash = nil
|
|
30
|
+
|
|
31
|
+
@previous_direction = nil
|
|
32
|
+
@closest_opposite_direction_hash = 0
|
|
33
|
+
@closest_opposite_direction_edge_start = current_edge_start
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def encode_b64
|
|
37
|
+
Core::Utils::Base64.strict_encode64(encode)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Decode pathway context from base64 encoded string
|
|
41
|
+
def self.decode_b64(encoded_ctx)
|
|
42
|
+
return nil unless encoded_ctx && !encoded_ctx.empty?
|
|
43
|
+
|
|
44
|
+
begin
|
|
45
|
+
binary_data = Core::Utils::Base64.strict_decode64(encoded_ctx)
|
|
46
|
+
decode(binary_data)
|
|
47
|
+
rescue ArgumentError => e
|
|
48
|
+
# Invalid base64 encoding - may indicate version mismatch or corruption
|
|
49
|
+
Datadog.logger.debug("Failed to decode DSM pathway context: #{e.message}")
|
|
50
|
+
nil
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
private
|
|
55
|
+
|
|
56
|
+
def encode
|
|
57
|
+
# Format:
|
|
58
|
+
# - 8 bytes: hash value (little-endian)
|
|
59
|
+
# - VarInt: pathway start time (milliseconds)
|
|
60
|
+
# - VarInt: current edge start time (milliseconds)
|
|
61
|
+
[@hash].pack('Q') <<
|
|
62
|
+
encode_var_int_64(time_to_ms(@pathway_start)) <<
|
|
63
|
+
encode_var_int_64(time_to_ms(@current_edge_start))
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Decode pathway context from binary data
|
|
67
|
+
def self.decode(binary_data)
|
|
68
|
+
return nil unless binary_data && binary_data.bytesize >= 8
|
|
69
|
+
|
|
70
|
+
reader = StringIO.new(binary_data)
|
|
71
|
+
|
|
72
|
+
# Extract 8-byte hash (little-endian)
|
|
73
|
+
hash_bytes = reader.read(8)
|
|
74
|
+
return nil unless hash_bytes
|
|
75
|
+
|
|
76
|
+
hash_value = hash_bytes.unpack1('Q') # : Integer
|
|
77
|
+
|
|
78
|
+
# Extract pathway start time (VarInt milliseconds)
|
|
79
|
+
pathway_start_ms = decode_varint(reader)
|
|
80
|
+
return nil unless pathway_start_ms
|
|
81
|
+
|
|
82
|
+
# Extract current edge start time (VarInt milliseconds)
|
|
83
|
+
current_edge_start_ms = decode_varint(reader)
|
|
84
|
+
return nil unless current_edge_start_ms
|
|
85
|
+
|
|
86
|
+
# Convert milliseconds to Time objects
|
|
87
|
+
pathway_start = ms_to_time(pathway_start_ms)
|
|
88
|
+
current_edge_start = ms_to_time(current_edge_start_ms)
|
|
89
|
+
|
|
90
|
+
new(
|
|
91
|
+
hash_value: hash_value,
|
|
92
|
+
pathway_start: pathway_start,
|
|
93
|
+
current_edge_start: current_edge_start
|
|
94
|
+
)
|
|
95
|
+
rescue EOFError
|
|
96
|
+
# Not enough data in binary stream
|
|
97
|
+
nil
|
|
98
|
+
end
|
|
99
|
+
private_class_method :decode
|
|
100
|
+
|
|
101
|
+
# Encode an unsigned 64-bit integer using LEB128 variable-length encoding.
|
|
102
|
+
#
|
|
103
|
+
# This implements unsigned LEB128 (Little Endian Base 128) encoding as specified
|
|
104
|
+
# in DWARF5 standard section 7.6:
|
|
105
|
+
# https://dwarfstd.org/doc/DWARF5.pdf#page=301
|
|
106
|
+
#
|
|
107
|
+
# Each byte uses 7 bits for data and 1 bit to indicate continuation.
|
|
108
|
+
# The high bit is set if more bytes follow, clear for the final byte.
|
|
109
|
+
#
|
|
110
|
+
# @param value [Integer] Unsigned integer value to encode
|
|
111
|
+
# @return [String] Binary string of encoded bytes
|
|
112
|
+
def encode_var_int_64(value)
|
|
113
|
+
bytes = []
|
|
114
|
+
while value >= 0x80
|
|
115
|
+
bytes << ((value & 0x7F) | 0x80)
|
|
116
|
+
value >>= 7
|
|
117
|
+
end
|
|
118
|
+
bytes << value
|
|
119
|
+
bytes.pack('C*')
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Decode an unsigned LEB128 variable-length integer from IO stream.
|
|
123
|
+
#
|
|
124
|
+
# This implements unsigned LEB128 (Little Endian Base 128) decoding as specified
|
|
125
|
+
# in DWARF5 standard section 7.6:
|
|
126
|
+
# https://dwarfstd.org/doc/DWARF5.pdf#page=301
|
|
127
|
+
#
|
|
128
|
+
# VarInt format: Each byte uses 7 bits for data, 1 bit for continuation
|
|
129
|
+
# - High bit set = more bytes follow
|
|
130
|
+
# - High bit clear = final byte
|
|
131
|
+
# - Data bits accumulated in little-endian order
|
|
132
|
+
#
|
|
133
|
+
# @param io [StringIO] IO stream to read from
|
|
134
|
+
# @return [Integer, nil] Decoded unsigned integer, or nil if malformed
|
|
135
|
+
def self.decode_varint(io)
|
|
136
|
+
value = 0
|
|
137
|
+
shift = 0
|
|
138
|
+
|
|
139
|
+
loop do
|
|
140
|
+
byte = io.readbyte
|
|
141
|
+
|
|
142
|
+
# Add this byte's 7 data bits to our value
|
|
143
|
+
value |= (byte & 0x7F) << shift
|
|
144
|
+
|
|
145
|
+
# If high bit is clear, we're done
|
|
146
|
+
return value unless (byte & 0x80).nonzero?
|
|
147
|
+
|
|
148
|
+
shift += 7
|
|
149
|
+
|
|
150
|
+
# Safety: prevent infinite decoding
|
|
151
|
+
return nil if shift >= 64
|
|
152
|
+
end
|
|
153
|
+
rescue EOFError
|
|
154
|
+
# Stream ended unexpectedly - malformed data
|
|
155
|
+
nil
|
|
156
|
+
end
|
|
157
|
+
private_class_method :decode_varint
|
|
158
|
+
|
|
159
|
+
def self.ms_to_time(milliseconds)
|
|
160
|
+
::Time.at(milliseconds / 1000.0)
|
|
161
|
+
end
|
|
162
|
+
private_class_method :ms_to_time
|
|
163
|
+
|
|
164
|
+
def time_to_ms(time)
|
|
165
|
+
(time.to_f * 1000).to_i
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
@@ -0,0 +1,509 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'zlib'
|
|
4
|
+
require_relative 'pathway_context'
|
|
5
|
+
require_relative 'transport/http'
|
|
6
|
+
require_relative '../version'
|
|
7
|
+
require_relative '../core/worker'
|
|
8
|
+
require_relative '../core/workers/polling'
|
|
9
|
+
require_relative '../core/ddsketch'
|
|
10
|
+
require_relative '../core/buffer/cruby'
|
|
11
|
+
require_relative '../core/utils/time'
|
|
12
|
+
|
|
13
|
+
module Datadog
|
|
14
|
+
module DataStreams
|
|
15
|
+
# Raised when Data Streams Monitoring cannot be initialized due to missing dependencies
|
|
16
|
+
class UnsupportedError < StandardError; end
|
|
17
|
+
|
|
18
|
+
# Processor for Data Streams Monitoring
|
|
19
|
+
# This class is responsible for collecting and reporting pathway stats
|
|
20
|
+
# Periodically (every interval, 10 seconds by default) flushes stats to the Datadog agent.
|
|
21
|
+
class Processor < Core::Worker
|
|
22
|
+
include Core::Workers::Polling
|
|
23
|
+
|
|
24
|
+
PROPAGATION_KEY = 'dd-pathway-ctx-base64'
|
|
25
|
+
|
|
26
|
+
# Default buffer size for lock-free event queue
|
|
27
|
+
# Set to handle high-throughput scenarios (e.g., 10k events/sec for 10s interval)
|
|
28
|
+
DEFAULT_BUFFER_SIZE = 100_000
|
|
29
|
+
|
|
30
|
+
attr_reader :pathway_context, :buckets, :bucket_size_ns
|
|
31
|
+
|
|
32
|
+
# Initialize the Data Streams Monitoring processor
|
|
33
|
+
#
|
|
34
|
+
# @param interval [Float] Flush interval in seconds (e.g., 10.0 for 10 seconds)
|
|
35
|
+
# @param logger [Datadog::Core::Logger] Logger instance for debugging
|
|
36
|
+
# @param settings [Datadog::Core::Configuration::Settings] Global configuration settings
|
|
37
|
+
# @param agent_settings [Datadog::Core::Configuration::AgentSettings] Agent connection settings
|
|
38
|
+
# @param buffer_size [Integer] Size of the lock-free event buffer for async stat collection
|
|
39
|
+
# (default: DEFAULT_BUFFER_SIZE). Higher values support more throughput but use more memory.
|
|
40
|
+
# @raise [UnsupportedError] if DDSketch is not available on this platform
|
|
41
|
+
def initialize(interval:, logger:, settings:, agent_settings:, buffer_size: DEFAULT_BUFFER_SIZE)
|
|
42
|
+
raise UnsupportedError, 'DDSketch is not supported' unless Datadog::Core::DDSketch.supported?
|
|
43
|
+
|
|
44
|
+
@settings = settings
|
|
45
|
+
@agent_settings = agent_settings
|
|
46
|
+
@logger = logger
|
|
47
|
+
|
|
48
|
+
now = Core::Utils::Time.now
|
|
49
|
+
@pathway_context = PathwayContext.new(
|
|
50
|
+
hash_value: 0,
|
|
51
|
+
pathway_start: now,
|
|
52
|
+
current_edge_start: now
|
|
53
|
+
)
|
|
54
|
+
@bucket_size_ns = (interval * 1e9).to_i
|
|
55
|
+
@buckets = {}
|
|
56
|
+
@consumer_stats = []
|
|
57
|
+
@stats_mutex = Mutex.new
|
|
58
|
+
@event_buffer = Core::Buffer::CRuby.new(buffer_size)
|
|
59
|
+
|
|
60
|
+
super()
|
|
61
|
+
self.loop_base_interval = interval
|
|
62
|
+
|
|
63
|
+
perform
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Track Kafka produce offset for lag monitoring
|
|
67
|
+
# @param topic [String] The Kafka topic name
|
|
68
|
+
# @param partition [Integer] The partition number
|
|
69
|
+
# @param offset [Integer] The offset of the produced message
|
|
70
|
+
# @param now [Time] Timestamp
|
|
71
|
+
# @return [Boolean] true if tracking succeeded
|
|
72
|
+
def track_kafka_produce(topic, partition, offset, now)
|
|
73
|
+
@event_buffer.push(
|
|
74
|
+
{
|
|
75
|
+
type: :kafka_produce,
|
|
76
|
+
topic: topic,
|
|
77
|
+
partition: partition,
|
|
78
|
+
offset: offset,
|
|
79
|
+
timestamp_ns: (now.to_f * 1e9).to_i
|
|
80
|
+
}
|
|
81
|
+
)
|
|
82
|
+
true
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# Track Kafka message consumption for consumer lag monitoring
|
|
86
|
+
# @param topic [String] The Kafka topic name
|
|
87
|
+
# @param partition [Integer] The partition number
|
|
88
|
+
# @param offset [Integer] The offset of the consumed message
|
|
89
|
+
# @param now [Time] Timestamp
|
|
90
|
+
# @return [Boolean] true if tracking succeeded
|
|
91
|
+
def track_kafka_consume(topic, partition, offset, now)
|
|
92
|
+
@event_buffer.push(
|
|
93
|
+
{
|
|
94
|
+
type: :kafka_consume,
|
|
95
|
+
topic: topic,
|
|
96
|
+
partition: partition,
|
|
97
|
+
offset: offset,
|
|
98
|
+
timestamp: now
|
|
99
|
+
}
|
|
100
|
+
)
|
|
101
|
+
true
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Set a produce checkpoint
|
|
105
|
+
# @param type [String] The type of the checkpoint (e.g., 'kafka', 'kinesis', 'sns')
|
|
106
|
+
# @param destination [String] The destination (e.g., topic, exchange, stream name)
|
|
107
|
+
# @param manual_checkpoint [Boolean] Whether this checkpoint was manually set (default: true)
|
|
108
|
+
# @param tags [Hash] Additional tags to include
|
|
109
|
+
# @yield [key, value] Block to inject context into carrier
|
|
110
|
+
# @return [String] Base64 encoded pathway context
|
|
111
|
+
def set_produce_checkpoint(type:, destination:, manual_checkpoint: true, tags: {}, &block)
|
|
112
|
+
checkpoint_tags = ["type:#{type}", "topic:#{destination}", 'direction:out']
|
|
113
|
+
checkpoint_tags << 'manual_checkpoint:true' if manual_checkpoint
|
|
114
|
+
checkpoint_tags.concat(tags.map { |k, v| "#{k}:#{v}" }) unless tags.empty?
|
|
115
|
+
|
|
116
|
+
span = Datadog::Tracing.active_span
|
|
117
|
+
pathway = set_checkpoint(tags: checkpoint_tags, span: span)
|
|
118
|
+
|
|
119
|
+
yield(PROPAGATION_KEY, pathway) if pathway && block
|
|
120
|
+
|
|
121
|
+
pathway
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Set a consume checkpoint
|
|
125
|
+
# @param type [String] The type of the checkpoint (e.g., 'kafka', 'kinesis', 'sns')
|
|
126
|
+
# @param source [String] The source (e.g., topic, exchange, stream name)
|
|
127
|
+
# @param manual_checkpoint [Boolean] Whether this checkpoint was manually set (default: true)
|
|
128
|
+
# @param tags [Hash] Additional tags to include
|
|
129
|
+
# @yield [key] Block to extract context from carrier
|
|
130
|
+
# @return [String] Base64 encoded pathway context
|
|
131
|
+
def set_consume_checkpoint(type:, source:, manual_checkpoint: true, tags: {}, &block)
|
|
132
|
+
if block
|
|
133
|
+
pathway_ctx = yield(PROPAGATION_KEY)
|
|
134
|
+
if pathway_ctx
|
|
135
|
+
decoded_ctx = decode_pathway_b64(pathway_ctx)
|
|
136
|
+
set_pathway_context(decoded_ctx)
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
checkpoint_tags = ["type:#{type}", "topic:#{source}", 'direction:in']
|
|
141
|
+
checkpoint_tags << 'manual_checkpoint:true' if manual_checkpoint
|
|
142
|
+
checkpoint_tags.concat(tags.map { |k, v| "#{k}:#{v}" }) unless tags.empty?
|
|
143
|
+
|
|
144
|
+
span = Datadog::Tracing.active_span
|
|
145
|
+
set_checkpoint(tags: checkpoint_tags, span: span)
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Called periodically by the worker to flush stats to the agent
|
|
149
|
+
def perform
|
|
150
|
+
process_events
|
|
151
|
+
flush_stats
|
|
152
|
+
true
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
private
|
|
156
|
+
|
|
157
|
+
# Drain event buffer and apply updates to shared data structures
|
|
158
|
+
# This runs in the background worker thread, not the critical path
|
|
159
|
+
def process_events
|
|
160
|
+
events = @event_buffer.pop
|
|
161
|
+
return if events.empty?
|
|
162
|
+
|
|
163
|
+
@stats_mutex.synchronize do
|
|
164
|
+
events.each do |event_obj|
|
|
165
|
+
# Buffer stores Objects; we know they're hashes with symbol keys
|
|
166
|
+
event = event_obj # : ::Hash[::Symbol, untyped]
|
|
167
|
+
case event[:type]
|
|
168
|
+
when :kafka_produce
|
|
169
|
+
process_kafka_produce_event(event)
|
|
170
|
+
when :kafka_consume
|
|
171
|
+
process_kafka_consume_event(event)
|
|
172
|
+
when :checkpoint
|
|
173
|
+
process_checkpoint_event(event)
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def process_kafka_produce_event(event)
|
|
180
|
+
partition_key = "#{event[:topic]}:#{event[:partition]}"
|
|
181
|
+
bucket_time_ns = event[:timestamp_ns] - (event[:timestamp_ns] % @bucket_size_ns)
|
|
182
|
+
bucket = @buckets[bucket_time_ns] ||= create_bucket
|
|
183
|
+
|
|
184
|
+
bucket[:latest_produce_offsets][partition_key] = [
|
|
185
|
+
event[:offset],
|
|
186
|
+
bucket[:latest_produce_offsets][partition_key] || 0
|
|
187
|
+
].max
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
def process_kafka_consume_event(event)
|
|
191
|
+
@consumer_stats << {
|
|
192
|
+
topic: event[:topic],
|
|
193
|
+
partition: event[:partition],
|
|
194
|
+
offset: event[:offset],
|
|
195
|
+
timestamp: event[:timestamp],
|
|
196
|
+
timestamp_sec: event[:timestamp].to_f
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
timestamp_ns = (event[:timestamp].to_f * 1e9).to_i
|
|
200
|
+
bucket_time_ns = timestamp_ns - (timestamp_ns % @bucket_size_ns)
|
|
201
|
+
@buckets[bucket_time_ns] ||= create_bucket
|
|
202
|
+
|
|
203
|
+
# Track offset gaps for lag detection
|
|
204
|
+
partition_key = "#{event[:topic]}:#{event[:partition]}"
|
|
205
|
+
@latest_consumer_offsets ||= {}
|
|
206
|
+
previous_offset = @latest_consumer_offsets[partition_key] || 0
|
|
207
|
+
|
|
208
|
+
if event[:offset] > previous_offset + 1
|
|
209
|
+
@consumer_lag_events ||= []
|
|
210
|
+
@consumer_lag_events << {
|
|
211
|
+
topic: event[:topic],
|
|
212
|
+
partition: event[:partition],
|
|
213
|
+
expected_offset: previous_offset + 1,
|
|
214
|
+
actual_offset: event[:offset],
|
|
215
|
+
gap_size: event[:offset] - previous_offset - 1,
|
|
216
|
+
timestamp_sec: event[:timestamp].to_f
|
|
217
|
+
}
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
@latest_consumer_offsets[partition_key] = [event[:offset], previous_offset].max
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
def process_checkpoint_event(event)
|
|
224
|
+
now_ns = (event[:timestamp_sec] * 1e9).to_i
|
|
225
|
+
bucket_time_ns = now_ns - (now_ns % @bucket_size_ns)
|
|
226
|
+
bucket = @buckets[bucket_time_ns] ||= create_bucket
|
|
227
|
+
|
|
228
|
+
aggr_key = [event[:tags].join(','), event[:hash], event[:parent_hash]]
|
|
229
|
+
stats = bucket[:pathway_stats][aggr_key] ||= create_pathway_stats
|
|
230
|
+
|
|
231
|
+
stats[:edge_latency].add(event[:edge_latency_sec])
|
|
232
|
+
stats[:full_pathway_latency].add(event[:full_pathway_latency_sec])
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
def encode_pathway_context
|
|
236
|
+
@pathway_context.encode_b64
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
def set_checkpoint(tags:, now: nil, payload_size: 0, span: nil)
|
|
240
|
+
now ||= Core::Utils::Time.now
|
|
241
|
+
|
|
242
|
+
current_context = get_current_context
|
|
243
|
+
tags = tags.sort
|
|
244
|
+
|
|
245
|
+
direction = nil
|
|
246
|
+
tags.each do |tag|
|
|
247
|
+
if tag.start_with?('direction:')
|
|
248
|
+
direction = tag
|
|
249
|
+
break
|
|
250
|
+
end
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
# Loop detection: consecutive same-direction checkpoints reuse the opposite direction's hash
|
|
254
|
+
if direction && direction == current_context.previous_direction
|
|
255
|
+
current_context.hash = current_context.closest_opposite_direction_hash
|
|
256
|
+
if current_context.hash == 0
|
|
257
|
+
current_context.current_edge_start = now
|
|
258
|
+
current_context.pathway_start = now
|
|
259
|
+
else
|
|
260
|
+
current_context.current_edge_start = current_context.closest_opposite_direction_edge_start
|
|
261
|
+
end
|
|
262
|
+
else
|
|
263
|
+
current_context.previous_direction = direction
|
|
264
|
+
current_context.closest_opposite_direction_hash = current_context.hash
|
|
265
|
+
current_context.closest_opposite_direction_edge_start = current_context.current_edge_start
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
parent_hash = current_context.hash
|
|
269
|
+
new_hash = compute_pathway_hash(parent_hash, tags)
|
|
270
|
+
|
|
271
|
+
# Tag the APM span with the pathway hash to link DSM and APM
|
|
272
|
+
span&.set_tag('pathway.hash', new_hash.to_s)
|
|
273
|
+
|
|
274
|
+
edge_latency_sec = [now - current_context.current_edge_start, 0.0].max
|
|
275
|
+
full_pathway_latency_sec = [now - current_context.pathway_start, 0.0].max
|
|
276
|
+
|
|
277
|
+
record_checkpoint_stats(
|
|
278
|
+
hash: new_hash,
|
|
279
|
+
parent_hash: parent_hash,
|
|
280
|
+
edge_latency_sec: edge_latency_sec,
|
|
281
|
+
full_pathway_latency_sec: full_pathway_latency_sec,
|
|
282
|
+
payload_size: payload_size,
|
|
283
|
+
tags: tags,
|
|
284
|
+
timestamp_sec: now.to_f
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
current_context.parent_hash = current_context.hash
|
|
288
|
+
current_context.hash = new_hash
|
|
289
|
+
current_context.current_edge_start = now
|
|
290
|
+
|
|
291
|
+
current_context.encode_b64
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
def decode_pathway_context(encoded_ctx)
|
|
295
|
+
PathwayContext.decode_b64(encoded_ctx)
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
def decode_pathway_b64(encoded_ctx)
|
|
299
|
+
PathwayContext.decode_b64(encoded_ctx)
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
def flush_stats
|
|
303
|
+
payload = nil # : ::Hash[::String, untyped]?
|
|
304
|
+
|
|
305
|
+
@stats_mutex.synchronize do
|
|
306
|
+
return if @buckets.empty? && @consumer_stats.empty?
|
|
307
|
+
|
|
308
|
+
stats_buckets = serialize_buckets
|
|
309
|
+
|
|
310
|
+
payload = {
|
|
311
|
+
'Service' => @settings.service,
|
|
312
|
+
'TracerVersion' => Datadog::VERSION::STRING,
|
|
313
|
+
'Lang' => 'ruby',
|
|
314
|
+
'Stats' => stats_buckets,
|
|
315
|
+
'Hostname' => hostname
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
# Clear consumer stats even if sending fails to prevent unbounded memory growth
|
|
319
|
+
# Must be done inside mutex before we release it
|
|
320
|
+
@consumer_stats.clear
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
# Send to agent outside mutex to avoid blocking customer code if agent is slow/hung
|
|
324
|
+
send_stats_to_agent(payload) if payload
|
|
325
|
+
rescue => e
|
|
326
|
+
@logger.debug("Failed to flush DSM stats to agent: #{e.class}: #{e}")
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
def get_current_pathway
|
|
330
|
+
get_current_context
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
def get_current_context
|
|
334
|
+
@pathway_context ||= begin
|
|
335
|
+
now = Core::Utils::Time.now
|
|
336
|
+
PathwayContext.new(
|
|
337
|
+
hash_value: 0,
|
|
338
|
+
pathway_start: now,
|
|
339
|
+
current_edge_start: now
|
|
340
|
+
)
|
|
341
|
+
end
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
def set_pathway_context(ctx)
|
|
345
|
+
if ctx
|
|
346
|
+
@pathway_context = ctx
|
|
347
|
+
@pathway_context.previous_direction = nil
|
|
348
|
+
@pathway_context.closest_opposite_direction_hash = 0
|
|
349
|
+
@pathway_context.closest_opposite_direction_edge_start = @pathway_context.current_edge_start
|
|
350
|
+
end
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
def decode_and_set_pathway_context(headers)
|
|
354
|
+
return unless headers && headers['dd-pathway-ctx-base64']
|
|
355
|
+
|
|
356
|
+
pathway_ctx = decode_pathway_context(headers['dd-pathway-ctx-base64'])
|
|
357
|
+
set_pathway_context(pathway_ctx) if pathway_ctx
|
|
358
|
+
end
|
|
359
|
+
|
|
360
|
+
# Compute new pathway hash using FNV-1a algorithm.
|
|
361
|
+
# Combines service, env, tags, and parent hash to create unique pathway identifier.
|
|
362
|
+
def compute_pathway_hash(current_hash, tags)
|
|
363
|
+
service = @settings.service || 'ruby-service'
|
|
364
|
+
env = @settings.env || 'none'
|
|
365
|
+
|
|
366
|
+
bytes = service.bytes + env.bytes
|
|
367
|
+
tags.each { |tag| bytes += tag.bytes }
|
|
368
|
+
byte_string = bytes.pack('C*')
|
|
369
|
+
|
|
370
|
+
node_hash = fnv1_64(byte_string)
|
|
371
|
+
combined_bytes = [node_hash, current_hash].pack('QQ')
|
|
372
|
+
fnv1_64(combined_bytes)
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
# FNV-1a 64-bit hash function.
|
|
376
|
+
def fnv1_64(data)
|
|
377
|
+
fnv_offset_basis = 14695981039346656037
|
|
378
|
+
fnv_prime = 1099511628211
|
|
379
|
+
|
|
380
|
+
hash_value = fnv_offset_basis
|
|
381
|
+
data.each_byte do |byte|
|
|
382
|
+
hash_value ^= byte
|
|
383
|
+
hash_value = (hash_value * fnv_prime) & 0xFFFFFFFFFFFFFFFF
|
|
384
|
+
end
|
|
385
|
+
hash_value
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
def record_checkpoint_stats(
|
|
389
|
+
hash:, parent_hash:, edge_latency_sec:, full_pathway_latency_sec:, payload_size:, tags:,
|
|
390
|
+
timestamp_sec:
|
|
391
|
+
)
|
|
392
|
+
@event_buffer.push(
|
|
393
|
+
{
|
|
394
|
+
type: :checkpoint,
|
|
395
|
+
hash: hash,
|
|
396
|
+
parent_hash: parent_hash,
|
|
397
|
+
edge_latency_sec: edge_latency_sec,
|
|
398
|
+
full_pathway_latency_sec: full_pathway_latency_sec,
|
|
399
|
+
payload_size: payload_size,
|
|
400
|
+
tags: tags,
|
|
401
|
+
timestamp_sec: timestamp_sec
|
|
402
|
+
}
|
|
403
|
+
)
|
|
404
|
+
true
|
|
405
|
+
end
|
|
406
|
+
|
|
407
|
+
def record_consumer_stats(topic:, partition:, offset:, timestamp:)
|
|
408
|
+
# Already handled by track_kafka_consume pushing to buffer
|
|
409
|
+
# This method kept for API compatibility but does nothing
|
|
410
|
+
end
|
|
411
|
+
|
|
412
|
+
def send_stats_to_agent(payload)
|
|
413
|
+
response = transport.send_stats(payload)
|
|
414
|
+
@logger.debug("DSM stats sent to agent: ok=#{response.ok?}")
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
def transport
|
|
418
|
+
@transport ||= Transport::HTTP.default(
|
|
419
|
+
agent_settings: @agent_settings,
|
|
420
|
+
logger: @logger
|
|
421
|
+
)
|
|
422
|
+
end
|
|
423
|
+
|
|
424
|
+
def serialize_buckets
|
|
425
|
+
serialized_buckets = []
|
|
426
|
+
bucket_keys_to_clear = []
|
|
427
|
+
|
|
428
|
+
@buckets.each do |bucket_time_ns, bucket|
|
|
429
|
+
bucket_keys_to_clear << bucket_time_ns
|
|
430
|
+
|
|
431
|
+
bucket_stats = []
|
|
432
|
+
bucket[:pathway_stats].each do |aggr_key, stats|
|
|
433
|
+
edge_tags_str, hash_value, parent_hash = aggr_key
|
|
434
|
+
edge_tags_array = edge_tags_str.split(',')
|
|
435
|
+
|
|
436
|
+
bucket_stats << {
|
|
437
|
+
'EdgeTags' => edge_tags_array,
|
|
438
|
+
'Hash' => hash_value,
|
|
439
|
+
'ParentHash' => parent_hash,
|
|
440
|
+
'PathwayLatency' => stats[:full_pathway_latency].encode,
|
|
441
|
+
'EdgeLatency' => stats[:edge_latency].encode,
|
|
442
|
+
}
|
|
443
|
+
end
|
|
444
|
+
|
|
445
|
+
backlogs = []
|
|
446
|
+
bucket[:latest_produce_offsets].each do |key, offset|
|
|
447
|
+
topic, partition = key.split(':', 2)
|
|
448
|
+
backlogs << {
|
|
449
|
+
'Tags' => ['type:kafka_produce', "topic:#{topic}", "partition:#{partition}"],
|
|
450
|
+
'Value' => offset
|
|
451
|
+
}
|
|
452
|
+
end
|
|
453
|
+
bucket[:latest_commit_offsets].each do |key, offset|
|
|
454
|
+
group, topic, partition = key.split(':', 3)
|
|
455
|
+
backlogs << {
|
|
456
|
+
'Tags' => ['type:kafka_commit', "consumer_group:#{group}", "topic:#{topic}", "partition:#{partition}"],
|
|
457
|
+
'Value' => offset
|
|
458
|
+
}
|
|
459
|
+
end
|
|
460
|
+
|
|
461
|
+
serialized_buckets << {
|
|
462
|
+
'Start' => bucket_time_ns,
|
|
463
|
+
'Duration' => @bucket_size_ns,
|
|
464
|
+
'Stats' => bucket_stats,
|
|
465
|
+
'Backlogs' => backlogs + serialize_consumer_backlogs
|
|
466
|
+
}
|
|
467
|
+
end
|
|
468
|
+
|
|
469
|
+
bucket_keys_to_clear.each { |key| @buckets.delete(key) }
|
|
470
|
+
|
|
471
|
+
serialized_buckets
|
|
472
|
+
end
|
|
473
|
+
|
|
474
|
+
def serialize_consumer_backlogs
|
|
475
|
+
@consumer_stats.map do |stat|
|
|
476
|
+
{
|
|
477
|
+
'Tags' => [
|
|
478
|
+
'type:kafka_commit',
|
|
479
|
+
"topic:#{stat[:topic]}",
|
|
480
|
+
"partition:#{stat[:partition]}"
|
|
481
|
+
],
|
|
482
|
+
'Value' => stat[:offset]
|
|
483
|
+
}
|
|
484
|
+
end
|
|
485
|
+
end
|
|
486
|
+
|
|
487
|
+
def hostname
|
|
488
|
+
Core::Environment::Socket.hostname
|
|
489
|
+
end
|
|
490
|
+
|
|
491
|
+
def create_bucket
|
|
492
|
+
{
|
|
493
|
+
pathway_stats: {},
|
|
494
|
+
latest_produce_offsets: {},
|
|
495
|
+
latest_commit_offsets: {}
|
|
496
|
+
}
|
|
497
|
+
end
|
|
498
|
+
|
|
499
|
+
def create_pathway_stats
|
|
500
|
+
{
|
|
501
|
+
edge_latency: Datadog::Core::DDSketch.new,
|
|
502
|
+
full_pathway_latency: Datadog::Core::DDSketch.new,
|
|
503
|
+
payload_size_sum: 0,
|
|
504
|
+
payload_size_count: 0
|
|
505
|
+
}
|
|
506
|
+
end
|
|
507
|
+
end
|
|
508
|
+
end
|
|
509
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../../../core/transport/http/api/map'
|
|
4
|
+
require_relative '../../../core/transport/http/api/instance'
|
|
5
|
+
require_relative '../../../core/transport/http/api/spec'
|
|
6
|
+
require_relative '../../../core/transport/http/api/endpoint'
|
|
7
|
+
require_relative 'stats'
|
|
8
|
+
|
|
9
|
+
module Datadog
|
|
10
|
+
module DataStreams
|
|
11
|
+
module Transport
|
|
12
|
+
module HTTP
|
|
13
|
+
# Namespace for API components
|
|
14
|
+
module API
|
|
15
|
+
# API version
|
|
16
|
+
V01 = 'v0.1'
|
|
17
|
+
|
|
18
|
+
module_function
|
|
19
|
+
|
|
20
|
+
def defaults
|
|
21
|
+
Core::Transport::HTTP::API::Map[
|
|
22
|
+
V01 => Stats::API::Spec.new do |s|
|
|
23
|
+
s.stats = Stats::API::Endpoint.new(
|
|
24
|
+
'/v0.1/pipeline_stats'
|
|
25
|
+
)
|
|
26
|
+
end
|
|
27
|
+
]
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|