cw-datadog 2.23.0.4 → 2.23.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/libdatadog_api/feature_flags.c +554 -0
- data/ext/libdatadog_api/feature_flags.h +5 -0
- data/ext/libdatadog_api/init.c +2 -0
- data/lib/datadog/core/cloudwise/client.rb +139 -34
- data/lib/datadog/core/cloudwise/component.rb +89 -46
- data/lib/datadog/core/cloudwise/docc_heartbeat_worker.rb +0 -1
- data/lib/datadog/core/cloudwise/docc_operation_worker.rb +16 -11
- data/lib/datadog/core/cloudwise/docc_registration_worker.rb +0 -1
- data/lib/datadog/core/cloudwise/license_worker.rb +7 -0
- data/lib/datadog/core/cloudwise/probe_state.rb +9 -11
- data/lib/datadog/core/cloudwise/time_sync_worker.rb +196 -0
- data/lib/datadog/core/configuration/components.rb +1 -3
- data/lib/datadog/core/configuration/settings.rb +37 -31
- data/lib/datadog/core/configuration/supported_configurations.rb +14 -0
- data/lib/datadog/core/environment/agent_info.rb +18 -0
- data/lib/datadog/core/environment/ext.rb +6 -0
- data/lib/datadog/core/environment/process.rb +79 -0
- data/lib/datadog/core/feature_flags.rb +61 -0
- data/lib/datadog/core/remote/negotiation.rb +14 -0
- data/lib/datadog/core/tag_normalizer.rb +84 -0
- data/lib/datadog/core/transport/http/adapters/net.rb +10 -0
- data/lib/datadog/core/utils/array.rb +29 -0
- data/lib/datadog/core/utils.rb +2 -0
- data/lib/datadog/data_streams/processor.rb +1 -1
- data/lib/datadog/di/transport/http.rb +6 -2
- data/lib/datadog/di/transport/input.rb +62 -2
- data/lib/datadog/open_feature/evaluation_engine.rb +19 -9
- data/lib/datadog/open_feature/ext.rb +1 -0
- data/lib/datadog/open_feature/native_evaluator.rb +38 -0
- data/lib/datadog/open_feature/noop_evaluator.rb +3 -3
- data/lib/datadog/open_feature/provider.rb +15 -8
- data/lib/datadog/open_feature/remote.rb +1 -1
- data/lib/datadog/opentelemetry/configuration/settings.rb +159 -0
- data/lib/datadog/opentelemetry/metrics.rb +110 -0
- data/lib/datadog/opentelemetry/sdk/configurator.rb +25 -1
- data/lib/datadog/opentelemetry/sdk/metrics_exporter.rb +38 -0
- data/lib/datadog/opentelemetry.rb +3 -0
- data/lib/datadog/tracing/configuration/ext.rb +1 -0
- data/lib/datadog/tracing/contrib/cloudwise/propagation.rb +124 -80
- data/lib/datadog/tracing/contrib/grape/endpoint.rb +137 -0
- data/lib/datadog/tracing/contrib/kafka/instrumentation/consumer.rb +26 -44
- data/lib/datadog/tracing/contrib/kafka/instrumentation/producer.rb +20 -26
- data/lib/datadog/tracing/contrib/rack/middlewares.rb +85 -134
- data/lib/datadog/tracing/diagnostics/environment_logger.rb +1 -1
- data/lib/datadog/tracing/tracer.rb +10 -33
- data/lib/datadog/tracing/transport/http/api.rb +2 -4
- data/lib/datadog/tracing/transport/http/traces.rb +2 -2
- data/lib/datadog/tracing/transport/serializable_trace.rb +8 -1
- data/lib/datadog/tracing/transport/trace_formatter.rb +36 -22
- data/lib/datadog/tracing/transport/traces.rb +3 -5
- data/lib/datadog/tracing.rb +3 -3
- data/lib/datadog/version.rb +2 -2
- metadata +29 -4
|
@@ -9,10 +9,11 @@ module Datadog
|
|
|
9
9
|
module Core
|
|
10
10
|
module Cloudwise
|
|
11
11
|
# HTTP client for Cloudwise API calls
|
|
12
|
+
# rubocop:disable CustomCops/EnvUsageCop
|
|
12
13
|
class Client
|
|
13
14
|
attr_reader :base_url, :server_name, :license_key, :logger, :account_id,
|
|
14
|
-
|
|
15
|
-
|
|
15
|
+
:integrated_mode, :token, :token_account_id, :token_user_id,
|
|
16
|
+
:agent_instance_id
|
|
16
17
|
|
|
17
18
|
# Response codes
|
|
18
19
|
CODE_SUCCESS = 1000
|
|
@@ -99,20 +100,20 @@ module Datadog
|
|
|
99
100
|
@account_id = register_response[:data]['account_id'].to_i
|
|
100
101
|
|
|
101
102
|
# 设置到环境变量 (转为字符串存储)
|
|
103
|
+
# NOTE: 设置环境变量必须使用 ENV,DATADOG_ENV 只支持读取
|
|
102
104
|
ENV['CLOUDWISE_ACCOUNT_ID'] = @account_id.to_s
|
|
103
105
|
ENV['CLOUDWISE_HOST_ID'] = @host_id.to_s
|
|
104
106
|
ENV['CLOUDWISE_AGENT_ID'] = generate_agent_id(host_ip).to_s
|
|
105
|
-
ENV['CLOUDWISE_HOST_NAME'] =
|
|
107
|
+
ENV['CLOUDWISE_HOST_NAME'] = safe_gethostname
|
|
106
108
|
ENV['CLOUDWISE_INSTANCE_ID'] = generate_service_instance_id(host_ip).to_s
|
|
107
109
|
|
|
108
110
|
Cloudwise.log_debug { "Cloudwise: Registered Host, account_id: #{@account_id}, host_id: #{@host_id}" }
|
|
109
|
-
# 返回 register_host 的响应(包含 account_id)
|
|
110
|
-
return register_response
|
|
111
111
|
else
|
|
112
112
|
error_msg = register_response[:error] || "code=#{register_response[:code]}"
|
|
113
113
|
Cloudwise.log_error { "Cloudwise: Failed to register host: #{error_msg}" }
|
|
114
|
-
return register_response
|
|
115
114
|
end
|
|
115
|
+
# 返回 register_host 的响应(包含 account_id 或错误信息)
|
|
116
|
+
return register_response
|
|
116
117
|
else
|
|
117
118
|
error_msg = response[:error] || "No hostId in response"
|
|
118
119
|
Cloudwise.log_error { "Cloudwise: Failed to generate host ID: #{error_msg}" }
|
|
@@ -125,11 +126,11 @@ module Datadog
|
|
|
125
126
|
def register_host(host_ip)
|
|
126
127
|
unless @host_id
|
|
127
128
|
Cloudwise.log_error { 'Cloudwise: Cannot register host without host_id' }
|
|
128
|
-
return {
|
|
129
|
+
return {success: false, error: 'host_id not generated', code: nil}
|
|
129
130
|
end
|
|
130
131
|
|
|
131
132
|
# 获取系统信息
|
|
132
|
-
host_name =
|
|
133
|
+
host_name = safe_gethostname
|
|
133
134
|
sys_version = RUBY_VERSION
|
|
134
135
|
|
|
135
136
|
post('/v2/app/registerHost', {
|
|
@@ -146,7 +147,7 @@ module Datadog
|
|
|
146
147
|
# 心跳接口
|
|
147
148
|
def heartbeat
|
|
148
149
|
host_ip = get_local_ip
|
|
149
|
-
host_name =
|
|
150
|
+
host_name = safe_gethostname
|
|
150
151
|
|
|
151
152
|
post('/api/v1/agent/heartbeat', {
|
|
152
153
|
version: Datadog::VERSION::STRING,
|
|
@@ -157,7 +158,7 @@ module Datadog
|
|
|
157
158
|
imagePath: get_agent_path,
|
|
158
159
|
ip: host_ip,
|
|
159
160
|
agentId: generate_agent_id(host_ip),
|
|
160
|
-
timestamp:
|
|
161
|
+
timestamp: get_adjusted_timestamp_seconds,
|
|
161
162
|
routingKey: 'HeartBeat',
|
|
162
163
|
classPath: '',
|
|
163
164
|
isFirst: @is_first_heartbeat ? 1 : 0,
|
|
@@ -191,7 +192,7 @@ module Datadog
|
|
|
191
192
|
host_ip = get_local_ip
|
|
192
193
|
|
|
193
194
|
post('/v2/licence/verification', {
|
|
194
|
-
account_id:
|
|
195
|
+
account_id: @account_id.to_s,
|
|
195
196
|
agent_id: generate_agent_id(host_ip),
|
|
196
197
|
service_type: 'RUBY',
|
|
197
198
|
version: Datadog::VERSION::STRING,
|
|
@@ -232,10 +233,10 @@ module Datadog
|
|
|
232
233
|
codeType: 1012,
|
|
233
234
|
app_type: 1,
|
|
234
235
|
vpc: '',
|
|
235
|
-
env_tag:
|
|
236
|
-
service_tag:
|
|
237
|
-
version_tag:'default',
|
|
238
|
-
business_tag: ''
|
|
236
|
+
env_tag: 'default',
|
|
237
|
+
service_tag: 'default',
|
|
238
|
+
version_tag: 'default',
|
|
239
|
+
business_tag: '',
|
|
239
240
|
sys: '',
|
|
240
241
|
host_tag: '{}',
|
|
241
242
|
tags: {},
|
|
@@ -279,7 +280,7 @@ module Datadog
|
|
|
279
280
|
sample: calculate_sample_rate,
|
|
280
281
|
physical_ip: detect_container? ? get_host_real_ip : '',
|
|
281
282
|
container_id: detect_container? ? get_container_id : '',
|
|
282
|
-
sys:
|
|
283
|
+
sys: get_cloudwise_sys,
|
|
283
284
|
env_tag: Datadog.configuration.env || 'default'
|
|
284
285
|
}
|
|
285
286
|
}
|
|
@@ -325,6 +326,60 @@ module Datadog
|
|
|
325
326
|
post_docc("/api/ext/gaia/daemon/report/#{@agent_instance_id}", data)
|
|
326
327
|
end
|
|
327
328
|
|
|
329
|
+
# 获取服务器时间戳接口
|
|
330
|
+
# @return [Hash] { success: true/false, server_timestamp: Integer (毫秒), offset_ms: Integer }
|
|
331
|
+
def fetch_server_timestamp
|
|
332
|
+
path = apply_api_prefix('/api/v70/timestamp')
|
|
333
|
+
uri = URI.join(base_url, path)
|
|
334
|
+
|
|
335
|
+
Cloudwise.log_debug { "Cloudwise: Fetching server timestamp from #{uri}" }
|
|
336
|
+
|
|
337
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
338
|
+
http.use_ssl = (uri.scheme == 'https')
|
|
339
|
+
http.open_timeout = 5
|
|
340
|
+
http.read_timeout = 5
|
|
341
|
+
|
|
342
|
+
request = Net::HTTP::Get.new(uri.path)
|
|
343
|
+
request['User-Agent'] = "Datadog-Ruby-Agent/#{Datadog::VERSION::STRING}"
|
|
344
|
+
request['DD-Internal-Untraced-Request'] = '1'
|
|
345
|
+
|
|
346
|
+
# 记录请求前的本地时间
|
|
347
|
+
local_before = (Time.now.to_f * 1000).to_i
|
|
348
|
+
|
|
349
|
+
response = http.request(request)
|
|
350
|
+
|
|
351
|
+
# 记录请求后的本地时间
|
|
352
|
+
local_after = (Time.now.to_f * 1000).to_i
|
|
353
|
+
|
|
354
|
+
if response.code.to_i == 200
|
|
355
|
+
server_timestamp = response.body.to_s.strip.to_i
|
|
356
|
+
|
|
357
|
+
# 使用请求前后的中间时间作为本地参考时间
|
|
358
|
+
local_timestamp = (local_before + local_after) / 2
|
|
359
|
+
|
|
360
|
+
# 计算时间偏移(服务器时间 - 本地时间)
|
|
361
|
+
# 正值表示服务器时间比本地快,负值表示服务器时间比本地慢
|
|
362
|
+
offset_ms = server_timestamp - local_timestamp
|
|
363
|
+
|
|
364
|
+
Cloudwise.log_debug do
|
|
365
|
+
"Cloudwise: Server timestamp sync - server=#{server_timestamp}, local=#{local_timestamp}, offset=#{offset_ms}ms"
|
|
366
|
+
end
|
|
367
|
+
|
|
368
|
+
{
|
|
369
|
+
success: true,
|
|
370
|
+
server_timestamp: server_timestamp,
|
|
371
|
+
local_timestamp: local_timestamp,
|
|
372
|
+
offset_ms: offset_ms
|
|
373
|
+
}
|
|
374
|
+
else
|
|
375
|
+
Cloudwise.log_warn { "Cloudwise: path: #{path} Failed to fetch server timestamp: #{response.code}" }
|
|
376
|
+
{success: false, error: "HTTP #{response.code}"}
|
|
377
|
+
end
|
|
378
|
+
rescue => e
|
|
379
|
+
Cloudwise.log_error { "Cloudwise: Error fetching server timestamp: #{e.message}" }
|
|
380
|
+
{success: false, error: e.message}
|
|
381
|
+
end
|
|
382
|
+
|
|
328
383
|
private
|
|
329
384
|
|
|
330
385
|
def post(path, data)
|
|
@@ -344,19 +399,23 @@ module Datadog
|
|
|
344
399
|
# 这可以避免在 tracer 未初始化时触发 NoMethodError
|
|
345
400
|
request['DD-Internal-Untraced-Request'] = '1'
|
|
346
401
|
request.body = data.to_json
|
|
347
|
-
Cloudwise.log_debug { "Cloudwise API request:#{request.method} #{uri
|
|
402
|
+
Cloudwise.log_debug { "Cloudwise API request:#{request.method} #{uri} with data: #{data.inspect}" }
|
|
348
403
|
|
|
349
404
|
response = http.request(request)
|
|
350
405
|
handle_response(response, path)
|
|
351
406
|
rescue => e
|
|
352
407
|
Cloudwise.log_error { "Cloudwise API error for #{path}: #{e.class.name} #{e.message}" }
|
|
353
|
-
{
|
|
408
|
+
{success: false, error: e.message, code: nil}
|
|
354
409
|
end
|
|
355
410
|
|
|
356
411
|
def handle_response(response, path)
|
|
357
412
|
case response.code.to_i
|
|
358
413
|
when 200..299
|
|
359
|
-
body =
|
|
414
|
+
body = begin
|
|
415
|
+
JSON.parse(response.body)
|
|
416
|
+
rescue
|
|
417
|
+
{}
|
|
418
|
+
end
|
|
360
419
|
code = body['code'] || body['status_code']
|
|
361
420
|
|
|
362
421
|
Cloudwise.log_debug { "Cloudwise API response for #{path}: code=#{code}, body=#{body.inspect}" }
|
|
@@ -369,16 +428,40 @@ module Datadog
|
|
|
369
428
|
}
|
|
370
429
|
when 400..499
|
|
371
430
|
Cloudwise.log_warn { "Cloudwise API client error for #{path}: #{response.code} #{response.body}" }
|
|
372
|
-
{
|
|
431
|
+
{success: false, error: "Client error: #{response.code}", code: nil}
|
|
373
432
|
when 500..599
|
|
374
433
|
Cloudwise.log_error { "Cloudwise API server error for #{path}: #{response.code} #{response.body}" }
|
|
375
|
-
{
|
|
434
|
+
{success: false, error: "Server error: #{response.code}", code: nil}
|
|
376
435
|
else
|
|
377
436
|
Cloudwise.log_warn { "Cloudwise API unexpected response for #{path}: #{response.code}" }
|
|
378
|
-
{
|
|
437
|
+
{success: false, error: "Unexpected response: #{response.code}", code: nil}
|
|
438
|
+
end
|
|
439
|
+
end
|
|
440
|
+
|
|
441
|
+
# 获取校正后的秒级时间戳
|
|
442
|
+
# 如果启用了时间同步,会应用偏移量校正
|
|
443
|
+
# @return [Integer] 校正后的秒级时间戳
|
|
444
|
+
def get_adjusted_timestamp_seconds
|
|
445
|
+
local_time_s = Time.now.to_i
|
|
446
|
+
if defined?(TimeSyncWorker) && TimeSyncWorker.sync_enabled?
|
|
447
|
+
# offset_ms 是毫秒,转换为秒
|
|
448
|
+
local_time_s + (TimeSyncWorker.offset_ms / 1000)
|
|
449
|
+
else
|
|
450
|
+
local_time_s
|
|
379
451
|
end
|
|
380
452
|
end
|
|
381
453
|
|
|
454
|
+
# 获取校正后的毫秒级时间戳
|
|
455
|
+
# 如果启用了时间同步,会应用偏移量校正
|
|
456
|
+
# @return [Integer] 校正后的毫秒级时间戳
|
|
457
|
+
def get_adjusted_timestamp_ms
|
|
458
|
+
local_time_ms = (Time.now.to_f * 1000).to_i
|
|
459
|
+
if defined?(TimeSyncWorker) && TimeSyncWorker.sync_enabled?
|
|
460
|
+
local_time_ms + TimeSyncWorker.offset_ms
|
|
461
|
+
else
|
|
462
|
+
local_time_ms
|
|
463
|
+
end
|
|
464
|
+
end
|
|
382
465
|
|
|
383
466
|
# 获取本机 IP 地址
|
|
384
467
|
# 优先获取外网 IP(通过创建 UDP 连接,不实际发送数据)
|
|
@@ -432,7 +515,6 @@ module Datadog
|
|
|
432
515
|
rescue => e
|
|
433
516
|
Cloudwise.log_debug { "Cloudwise: Failed to get IP via interfaces: #{e.message}" }
|
|
434
517
|
end
|
|
435
|
-
|
|
436
518
|
end
|
|
437
519
|
|
|
438
520
|
# 获取 Agent 路径
|
|
@@ -528,16 +610,17 @@ module Datadog
|
|
|
528
610
|
|
|
529
611
|
# 1. 尝试从 Datadog 配置读取(0.0-1.0 的浮点数)
|
|
530
612
|
if defined?(Datadog.configuration) &&
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
613
|
+
Datadog.configuration.respond_to?(:tracing) &&
|
|
614
|
+
Datadog.configuration.tracing.respond_to?(:sampling) &&
|
|
615
|
+
Datadog.configuration.tracing.sampling.respond_to?(:default_rate)
|
|
534
616
|
configured_rate = Datadog.configuration.tracing.sampling.default_rate
|
|
535
617
|
rate = configured_rate if configured_rate && configured_rate >= 0.0 && configured_rate <= 1.0
|
|
536
618
|
end
|
|
537
619
|
|
|
538
620
|
# 2. 尝试从环境变量读取(DD_TRACE_SAMPLE_RATE,0.0-1.0 的浮点数)
|
|
539
|
-
|
|
540
|
-
|
|
621
|
+
env_rate_str = DATADOG_ENV['DD_TRACE_SAMPLE_RATE']
|
|
622
|
+
if rate.nil? && env_rate_str
|
|
623
|
+
env_rate = env_rate_str.to_f
|
|
541
624
|
rate = env_rate if env_rate >= 0.0 && env_rate <= 1.0
|
|
542
625
|
end
|
|
543
626
|
|
|
@@ -623,7 +706,8 @@ module Datadog
|
|
|
623
706
|
'/api/v1/agent/heartbeat',
|
|
624
707
|
'/api/v1/agent/fuseHeart',
|
|
625
708
|
'/v2/app/create',
|
|
626
|
-
'/v2/licence/verification'
|
|
709
|
+
'/v2/licence/verification',
|
|
710
|
+
'/api/v70/timestamp'
|
|
627
711
|
]
|
|
628
712
|
|
|
629
713
|
# 检查是否是需要添加前缀的路径
|
|
@@ -689,7 +773,7 @@ module Datadog
|
|
|
689
773
|
@account_id = @token_account_id
|
|
690
774
|
Cloudwise.log_debug { "Cloudwise: Token parsed - account_id: #{@token_account_id}, user_id: #{@token_user_id}" }
|
|
691
775
|
|
|
692
|
-
#account_id
|
|
776
|
+
# account_id
|
|
693
777
|
ENV['CLOUDWISE_ACCOUNT_ID'] = @token_account_id.to_s
|
|
694
778
|
else
|
|
695
779
|
Cloudwise.log_warn { 'Cloudwise: Invalid token format (expected base64 of account_id@user_id)' }
|
|
@@ -703,6 +787,21 @@ module Datadog
|
|
|
703
787
|
end
|
|
704
788
|
end
|
|
705
789
|
|
|
790
|
+
# 获取 Cloudwise sys 配置值
|
|
791
|
+
# 优先级: Datadog.configuration.cloudwise.sys > CW_SYS 环境变量 > 默认值 'default'
|
|
792
|
+
# @return [String] sys 值
|
|
793
|
+
def get_cloudwise_sys
|
|
794
|
+
# 优先从配置读取
|
|
795
|
+
if defined?(Datadog.configuration) &&
|
|
796
|
+
Datadog.configuration.respond_to?(:cloudwise) &&
|
|
797
|
+
Datadog.configuration.cloudwise.respond_to?(:sys)
|
|
798
|
+
sys = Datadog.configuration.cloudwise.sys
|
|
799
|
+
return sys if sys && !sys.empty? && sys != 'default'
|
|
800
|
+
end
|
|
801
|
+
|
|
802
|
+
# 其次从环境变量读取
|
|
803
|
+
ENV['CW_SYS'] || 'default'
|
|
804
|
+
end
|
|
706
805
|
|
|
707
806
|
# Get current run user
|
|
708
807
|
def get_run_user
|
|
@@ -722,7 +821,6 @@ module Datadog
|
|
|
722
821
|
''
|
|
723
822
|
end
|
|
724
823
|
|
|
725
|
-
|
|
726
824
|
# Get system UUID
|
|
727
825
|
# Try multiple methods to get a unique system identifier
|
|
728
826
|
def get_system_uuid
|
|
@@ -739,7 +837,7 @@ module Datadog
|
|
|
739
837
|
end
|
|
740
838
|
|
|
741
839
|
# Method 3: Try macOS system_profiler
|
|
742
|
-
if RUBY_PLATFORM
|
|
840
|
+
if RUBY_PLATFORM.match?(/darwin/)
|
|
743
841
|
uuid = `system_profiler SPHardwareDataType 2>/dev/null | awk '/UUID/ { print $3; }'`.strip
|
|
744
842
|
return uuid unless uuid.empty?
|
|
745
843
|
end
|
|
@@ -775,6 +873,13 @@ module Datadog
|
|
|
775
873
|
'unknown'
|
|
776
874
|
end
|
|
777
875
|
|
|
876
|
+
# Safe gethostname with rescue
|
|
877
|
+
def safe_gethostname
|
|
878
|
+
Socket.gethostname
|
|
879
|
+
rescue
|
|
880
|
+
'unknown'
|
|
881
|
+
end
|
|
882
|
+
|
|
778
883
|
# Get MAC address
|
|
779
884
|
def get_mac_address
|
|
780
885
|
# Try to get MAC address from network interfaces
|
|
@@ -868,10 +973,10 @@ module Datadog
|
|
|
868
973
|
handle_response(response, path)
|
|
869
974
|
rescue => e
|
|
870
975
|
Cloudwise.log_error { "Cloudwise DOCC API error for #{path}: #{e.class.name} #{e.message}" }
|
|
871
|
-
{
|
|
976
|
+
{success: false, error: e.message, code: nil}
|
|
872
977
|
end
|
|
873
|
-
|
|
874
978
|
end
|
|
979
|
+
# rubocop:enable CustomCops/EnvUsageCop
|
|
875
980
|
end
|
|
876
981
|
end
|
|
877
982
|
end
|
|
@@ -9,6 +9,9 @@ require_relative 'app_registration_worker'
|
|
|
9
9
|
require_relative 'docc_registration_worker'
|
|
10
10
|
require_relative 'docc_heartbeat_worker'
|
|
11
11
|
require_relative 'docc_operation_worker'
|
|
12
|
+
require_relative 'time_sync_worker'
|
|
13
|
+
|
|
14
|
+
require_relative '../../core/environment/variable_helpers'
|
|
12
15
|
|
|
13
16
|
module Datadog
|
|
14
17
|
module Core
|
|
@@ -18,7 +21,7 @@ module Datadog
|
|
|
18
21
|
class << self
|
|
19
22
|
# Check if debug logging is enabled
|
|
20
23
|
def debug_enabled?
|
|
21
|
-
@debug_enabled ||=
|
|
24
|
+
@debug_enabled ||= (DATADOG_ENV['DD_TRACE_DEBUG'] || 'false').downcase == 'true'
|
|
22
25
|
end
|
|
23
26
|
|
|
24
27
|
# Log debug message (only when DD_TRACE_DEBUG=true)
|
|
@@ -45,8 +48,9 @@ module Datadog
|
|
|
45
48
|
# Main component that manages Cloudwise workers with proper initialization order
|
|
46
49
|
class Component
|
|
47
50
|
attr_reader :client, :probe_state, :host_id_worker, :heartbeat_worker,
|
|
48
|
-
|
|
49
|
-
|
|
51
|
+
:license_worker, :app_registration_worker, :logger,
|
|
52
|
+
:docc_registration_worker, :docc_heartbeat_worker, :docc_operation_worker,
|
|
53
|
+
:time_sync_worker
|
|
50
54
|
|
|
51
55
|
# 类级别的单例锁,确保全局只初始化一次
|
|
52
56
|
@initialization_mutex = Mutex.new
|
|
@@ -101,46 +105,43 @@ module Datadog
|
|
|
101
105
|
if self.class.initialization_started
|
|
102
106
|
cloudwise_already_started = true
|
|
103
107
|
Cloudwise.log_debug { 'Cloudwise Component already initialized globally' }
|
|
104
|
-
|
|
108
|
+
elsif self.class.singleton_instance && instance_already_started?
|
|
105
109
|
# 检查是否有已存在的单例实例
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
self.class.singleton_instance = self
|
|
112
|
-
end
|
|
110
|
+
cloudwise_already_started = true
|
|
111
|
+
Cloudwise.log_debug { 'Initializing Datadog components for this Components instance...' }
|
|
112
|
+
else
|
|
113
|
+
self.class.initialization_started = true
|
|
114
|
+
self.class.singleton_instance = self
|
|
113
115
|
end
|
|
114
116
|
end
|
|
115
117
|
|
|
116
118
|
# 如果 Cloudwise 已经启动,只需要为这个新的 Components 实例初始化 Datadog 组件
|
|
117
119
|
if cloudwise_already_started
|
|
118
|
-
|
|
119
|
-
block.call # 立即初始化这个实例的 Datadog 组件
|
|
120
|
-
end
|
|
120
|
+
block&.call # 立即初始化这个实例的 Datadog 组件
|
|
121
121
|
return true
|
|
122
122
|
end
|
|
123
123
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
124
|
+
# 立即初始化 Datadog 组件(不等待 Cloudwise 验证)
|
|
125
|
+
# NOTE: 这里不能使用 block&.call 因为需要在调用前后执行日志
|
|
126
|
+
if block # rubocop:disable Style/SafeNavigation
|
|
127
|
+
Cloudwise.log_debug { 'Initializing Datadog components immediately...' }
|
|
128
|
+
block.call
|
|
129
|
+
Cloudwise.log_debug { 'Datadog components initialized (data collection pending ProbeState)' }
|
|
130
|
+
end
|
|
130
131
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
132
|
+
# 根据模式启动不同的 workers
|
|
133
|
+
if @client.use_integrated_mode?
|
|
134
|
+
start_docc_workers_when_ready
|
|
135
|
+
else
|
|
136
|
+
# Start Host ID Worker in background (will retry until success)
|
|
137
|
+
Cloudwise.log_debug { 'Starting Host ID generation worker (async, infinite retry)...' }
|
|
138
|
+
@host_id_worker.perform
|
|
138
139
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
140
|
+
# Start other Cloudwise workers in background
|
|
141
|
+
start_cloudwise_workers_when_ready
|
|
142
|
+
end
|
|
142
143
|
|
|
143
|
-
|
|
144
|
+
Cloudwise.log_debug { 'Cloudwise Component initialization started (async)' }
|
|
144
145
|
|
|
145
146
|
true
|
|
146
147
|
end
|
|
@@ -185,6 +186,9 @@ module Datadog
|
|
|
185
186
|
def stop
|
|
186
187
|
return unless @enabled
|
|
187
188
|
|
|
189
|
+
# 停止时间同步 worker(两种模式都需要)
|
|
190
|
+
@time_sync_worker&.stop(true)
|
|
191
|
+
|
|
188
192
|
if @client.use_integrated_mode?
|
|
189
193
|
@docc_heartbeat_worker&.stop(true)
|
|
190
194
|
@docc_registration_worker&.stop(true)
|
|
@@ -213,9 +217,16 @@ module Datadog
|
|
|
213
217
|
|
|
214
218
|
# Get component status
|
|
215
219
|
def status
|
|
216
|
-
return {
|
|
220
|
+
return {enabled: false} unless @enabled
|
|
217
221
|
|
|
218
222
|
probe_status = probe_state.status
|
|
223
|
+
# 获取时间同步状态
|
|
224
|
+
time_sync_status = if defined?(Datadog::Core::Cloudwise::TimeSyncWorker)
|
|
225
|
+
Datadog::Core::Cloudwise::TimeSyncWorker.status
|
|
226
|
+
else
|
|
227
|
+
{enabled: false}
|
|
228
|
+
end
|
|
229
|
+
|
|
219
230
|
base_status = {
|
|
220
231
|
enabled: true,
|
|
221
232
|
integrated_mode: @client.use_integrated_mode?,
|
|
@@ -223,7 +234,10 @@ module Datadog
|
|
|
223
234
|
probe_active: probe_state.active?,
|
|
224
235
|
probe_suspended: probe_state.suspended?,
|
|
225
236
|
license_valid: probe_status[:license_valid],
|
|
226
|
-
license_running: license_worker&.running
|
|
237
|
+
license_running: license_worker&.running?,
|
|
238
|
+
time_sync_enabled: time_sync_status[:enabled],
|
|
239
|
+
time_sync_offset_ms: time_sync_status[:offset_ms],
|
|
240
|
+
time_sync_running: time_sync_worker&.running?
|
|
227
241
|
}
|
|
228
242
|
|
|
229
243
|
if @client.use_integrated_mode?
|
|
@@ -282,6 +296,13 @@ module Datadog
|
|
|
282
296
|
probe_state: probe_state,
|
|
283
297
|
interval: settings.cloudwise.license_check_interval
|
|
284
298
|
)
|
|
299
|
+
|
|
300
|
+
# 5. Time Sync Worker (3 min interval, depends on License)
|
|
301
|
+
@time_sync_worker = TimeSyncWorker.new(
|
|
302
|
+
client: client,
|
|
303
|
+
logger: logger,
|
|
304
|
+
probe_state: probe_state
|
|
305
|
+
)
|
|
285
306
|
end
|
|
286
307
|
|
|
287
308
|
# 初始化 DOCC 模式的 workers
|
|
@@ -315,6 +336,13 @@ module Datadog
|
|
|
315
336
|
probe_state: probe_state,
|
|
316
337
|
interval: settings.cloudwise.license_check_interval
|
|
317
338
|
)
|
|
339
|
+
|
|
340
|
+
# 5. Time Sync Worker (3 min interval, depends on License)
|
|
341
|
+
@time_sync_worker = TimeSyncWorker.new(
|
|
342
|
+
client: client,
|
|
343
|
+
logger: logger,
|
|
344
|
+
probe_state: probe_state
|
|
345
|
+
)
|
|
318
346
|
end
|
|
319
347
|
|
|
320
348
|
# 检查实例是否已经启动
|
|
@@ -335,7 +363,8 @@ module Datadog
|
|
|
335
363
|
# 3. Start Heartbeat worker
|
|
336
364
|
# 4. Start App Registration worker (non-blocking)
|
|
337
365
|
# 5. Start License worker
|
|
338
|
-
# 6.
|
|
366
|
+
# 6. Start Time Sync worker (3 min interval)
|
|
367
|
+
# 7. ProbeState controls whether data collection is active
|
|
339
368
|
def start_cloudwise_workers_when_ready
|
|
340
369
|
Thread.new do
|
|
341
370
|
Thread.current.name = 'Cloudwise-Initializer'
|
|
@@ -368,7 +397,6 @@ module Datadog
|
|
|
368
397
|
# ============================================================
|
|
369
398
|
# STEP 4: Start License worker
|
|
370
399
|
# ============================================================
|
|
371
|
-
#
|
|
372
400
|
@license_worker.start
|
|
373
401
|
|
|
374
402
|
# Wait for first successful license validation
|
|
@@ -376,6 +404,12 @@ module Datadog
|
|
|
376
404
|
sleep(1)
|
|
377
405
|
end
|
|
378
406
|
|
|
407
|
+
# ============================================================
|
|
408
|
+
# STEP 5: Start Time Sync worker (3 min interval)
|
|
409
|
+
# ============================================================
|
|
410
|
+
Cloudwise.log_debug { 'Cloudwise: Starting Time Sync worker (3 min interval)...' }
|
|
411
|
+
@time_sync_worker.start
|
|
412
|
+
|
|
379
413
|
# ============================================================
|
|
380
414
|
# All validations passed - Probe is now ACTIVE
|
|
381
415
|
# ============================================================
|
|
@@ -392,7 +426,8 @@ module Datadog
|
|
|
392
426
|
# 2. 等待第一次心跳成功
|
|
393
427
|
# 3. 启动 DOCC 注册 worker
|
|
394
428
|
# 4. 启动 License worker
|
|
395
|
-
# 5. 启动
|
|
429
|
+
# 5. 启动 Time Sync worker (3 min interval)
|
|
430
|
+
# 6. 启动 DOCC 操作 worker
|
|
396
431
|
def start_docc_workers_when_ready
|
|
397
432
|
Thread.new do
|
|
398
433
|
Thread.current.name = 'Cloudwise-DOCC-Initializer'
|
|
@@ -400,11 +435,11 @@ module Datadog
|
|
|
400
435
|
# ============================================================
|
|
401
436
|
# STEP 1: Start DOCC Heartbeat worker
|
|
402
437
|
# ============================================================
|
|
403
|
-
Cloudwise.log_debug { 'Cloudwise
|
|
438
|
+
Cloudwise.log_debug { 'Cloudwise docc: Starting heartbeat worker...' }
|
|
404
439
|
@docc_heartbeat_worker.start
|
|
405
440
|
|
|
406
441
|
# Wait for first successful heartbeat
|
|
407
|
-
Cloudwise.log_debug { 'Cloudwise
|
|
442
|
+
Cloudwise.log_debug { 'Cloudwise docc: Waiting for first successful heartbeat...' }
|
|
408
443
|
until probe_state.docc_heartbeat_active?
|
|
409
444
|
sleep(1)
|
|
410
445
|
end
|
|
@@ -412,13 +447,13 @@ module Datadog
|
|
|
412
447
|
# ============================================================
|
|
413
448
|
# STEP 2: Start DOCC Registration worker
|
|
414
449
|
# ============================================================
|
|
415
|
-
Cloudwise.log_debug { 'Cloudwise
|
|
450
|
+
Cloudwise.log_debug { 'Cloudwise docc: Starting registration worker...' }
|
|
416
451
|
@docc_registration_worker.start
|
|
417
452
|
|
|
418
453
|
# ============================================================
|
|
419
454
|
# STEP 3: Start License worker
|
|
420
455
|
# ============================================================
|
|
421
|
-
Cloudwise.log_debug { 'Cloudwise
|
|
456
|
+
Cloudwise.log_debug { 'Cloudwise docc: Starting license worker...' }
|
|
422
457
|
@license_worker.start
|
|
423
458
|
|
|
424
459
|
# Wait for first successful license validation
|
|
@@ -427,17 +462,23 @@ module Datadog
|
|
|
427
462
|
end
|
|
428
463
|
|
|
429
464
|
# ============================================================
|
|
430
|
-
# STEP 4: Start
|
|
465
|
+
# STEP 4: Start Time Sync worker (3 min interval)
|
|
431
466
|
# ============================================================
|
|
432
|
-
Cloudwise.log_debug { 'Cloudwise
|
|
467
|
+
Cloudwise.log_debug { 'Cloudwise docc: Starting Time Sync worker (3 min interval)...' }
|
|
468
|
+
@time_sync_worker.start
|
|
469
|
+
|
|
470
|
+
# ============================================================
|
|
471
|
+
# STEP 5: Start DOCC Operation worker
|
|
472
|
+
# ============================================================
|
|
473
|
+
Cloudwise.log_debug { 'Cloudwise docc: Starting operation worker...' }
|
|
433
474
|
@docc_operation_worker.start
|
|
434
475
|
|
|
435
476
|
# ============================================================
|
|
436
477
|
# All validations passed - Probe is now ACTIVE
|
|
437
478
|
# ============================================================
|
|
438
|
-
Cloudwise.log_debug { 'Cloudwise
|
|
479
|
+
Cloudwise.log_debug { 'Cloudwise docc: Data collection and reporting now enabled.' }
|
|
439
480
|
rescue => e
|
|
440
|
-
Cloudwise.log_error { "Cloudwise
|
|
481
|
+
Cloudwise.log_error { "Cloudwise docc: Error in background initializer: #{e.class.name} #{e.message}" }
|
|
441
482
|
Cloudwise.log_error { e.backtrace.join("\n") }
|
|
442
483
|
end
|
|
443
484
|
end
|
|
@@ -462,7 +503,10 @@ module Datadog
|
|
|
462
503
|
probe_state.mark_host_id_ready!
|
|
463
504
|
|
|
464
505
|
# Set environment variable
|
|
465
|
-
ENV
|
|
506
|
+
# NOTE: 设置环境变量必须使用 ENV,DATADOG_ENV 只支持读取
|
|
507
|
+
# rubocop:disable CustomCops/EnvUsageCop
|
|
508
|
+
ENV['CLOUDWISE_ACCOUNT_ID'] = client.account_id.to_s
|
|
509
|
+
# rubocop:enable CustomCops/EnvUsageCop
|
|
466
510
|
|
|
467
511
|
Cloudwise.log_debug { "Cloudwise: Host ID generated successfully on attempt #{retry_count}" }
|
|
468
512
|
Cloudwise.log_debug { "Cloudwise: account_id = #{client.account_id}" }
|
|
@@ -476,7 +520,6 @@ module Datadog
|
|
|
476
520
|
Cloudwise.log_warn { "Cloudwise: Retrying in #{retry_interval} seconds..." }
|
|
477
521
|
|
|
478
522
|
probe_state.mark_host_id_failed!
|
|
479
|
-
|
|
480
523
|
rescue => e
|
|
481
524
|
# Exception occurred - log and retry
|
|
482
525
|
Cloudwise.log_error { "Cloudwise: Host ID generation error (attempt #{retry_count}): #{e.class.name} #{e.message}" }
|