cw-datadog 2.23.0.2 → 2.23.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/datadog_profiling_native_extension/extconf.rb +4 -2
- data/ext/libdatadog_api/library_config.c +12 -11
- data/ext/libdatadog_extconf_helpers.rb +1 -1
- data/lib/datadog/appsec/api_security/route_extractor.rb +20 -5
- data/lib/datadog/appsec/api_security/sampler.rb +3 -1
- data/lib/datadog/appsec/assets/blocked.html +8 -0
- data/lib/datadog/appsec/assets/blocked.json +1 -1
- data/lib/datadog/appsec/assets/blocked.text +3 -1
- data/lib/datadog/appsec/assets.rb +1 -1
- data/lib/datadog/appsec/remote.rb +4 -0
- data/lib/datadog/appsec/response.rb +18 -4
- data/lib/datadog/core/cloudwise/client.rb +412 -25
- data/lib/datadog/core/cloudwise/component.rb +195 -52
- data/lib/datadog/core/cloudwise/docc_heartbeat_worker.rb +105 -0
- data/lib/datadog/core/cloudwise/docc_operation_worker.rb +191 -0
- data/lib/datadog/core/cloudwise/docc_registration_worker.rb +89 -0
- data/lib/datadog/core/cloudwise/license_worker.rb +90 -4
- data/lib/datadog/core/cloudwise/probe_state.rb +134 -12
- data/lib/datadog/core/configuration/components.rb +10 -9
- data/lib/datadog/core/configuration/settings.rb +43 -0
- data/lib/datadog/core/configuration/supported_configurations.rb +6 -2
- data/lib/datadog/core/remote/client/capabilities.rb +7 -0
- data/lib/datadog/core/remote/component.rb +2 -2
- data/lib/datadog/core/remote/transport/config.rb +2 -10
- data/lib/datadog/core/remote/transport/http/config.rb +9 -9
- data/lib/datadog/core/remote/transport/http/negotiation.rb +17 -8
- data/lib/datadog/core/remote/transport/http.rb +2 -0
- data/lib/datadog/core/remote/transport/negotiation.rb +2 -18
- data/lib/datadog/core/remote/worker.rb +23 -35
- data/lib/datadog/core/telemetry/component.rb +26 -13
- data/lib/datadog/core/telemetry/event/app_started.rb +67 -49
- data/lib/datadog/core/telemetry/event/synth_app_client_configuration_change.rb +27 -4
- data/lib/datadog/core/telemetry/transport/http/telemetry.rb +5 -6
- data/lib/datadog/core/telemetry/transport/telemetry.rb +1 -2
- data/lib/datadog/core/telemetry/worker.rb +51 -6
- data/lib/datadog/core/transport/http/adapters/net.rb +2 -0
- data/lib/datadog/core/transport/http/client.rb +69 -0
- data/lib/datadog/core/utils/only_once_successful.rb +6 -2
- data/lib/datadog/data_streams/transport/http/client.rb +4 -32
- data/lib/datadog/data_streams/transport/stats.rb +1 -1
- data/lib/datadog/di/probe_notification_builder.rb +35 -13
- data/lib/datadog/di/transport/diagnostics.rb +2 -2
- data/lib/datadog/di/transport/http/diagnostics.rb +2 -4
- data/lib/datadog/di/transport/http/input.rb +2 -4
- data/lib/datadog/di/transport/input.rb +2 -2
- data/lib/datadog/open_feature/component.rb +60 -0
- data/lib/datadog/open_feature/configuration.rb +27 -0
- data/lib/datadog/open_feature/evaluation_engine.rb +59 -0
- data/lib/datadog/open_feature/exposures/batch_builder.rb +32 -0
- data/lib/datadog/open_feature/exposures/buffer.rb +43 -0
- data/lib/datadog/open_feature/exposures/deduplicator.rb +30 -0
- data/lib/datadog/open_feature/exposures/event.rb +60 -0
- data/lib/datadog/open_feature/exposures/reporter.rb +40 -0
- data/lib/datadog/open_feature/exposures/worker.rb +116 -0
- data/lib/datadog/open_feature/ext.rb +13 -0
- data/lib/datadog/open_feature/noop_evaluator.rb +26 -0
- data/lib/datadog/open_feature/provider.rb +134 -0
- data/lib/datadog/open_feature/remote.rb +74 -0
- data/lib/datadog/open_feature/resolution_details.rb +35 -0
- data/lib/datadog/open_feature/transport.rb +72 -0
- data/lib/datadog/open_feature.rb +19 -0
- data/lib/datadog/profiling/component.rb +6 -0
- data/lib/datadog/profiling/profiler.rb +4 -0
- data/lib/datadog/profiling.rb +1 -2
- data/lib/datadog/single_step_instrument.rb +1 -1
- data/lib/datadog/tracing/contrib/cloudwise/propagation.rb +164 -7
- data/lib/datadog/tracing/contrib/graphql/unified_trace.rb +22 -17
- data/lib/datadog/tracing/contrib/karafka/framework.rb +30 -0
- data/lib/datadog/tracing/contrib/karafka/patcher.rb +14 -0
- data/lib/datadog/tracing/contrib/rack/middlewares.rb +6 -2
- data/lib/datadog/tracing/contrib/waterdrop/configuration/settings.rb +27 -0
- data/lib/datadog/tracing/contrib/waterdrop/distributed/propagation.rb +48 -0
- data/lib/datadog/tracing/contrib/waterdrop/ext.rb +17 -0
- data/lib/datadog/tracing/contrib/waterdrop/integration.rb +43 -0
- data/lib/datadog/tracing/contrib/waterdrop/middleware.rb +46 -0
- data/lib/datadog/tracing/contrib/waterdrop/patcher.rb +46 -0
- data/lib/datadog/tracing/contrib/waterdrop/producer.rb +50 -0
- data/lib/datadog/tracing/contrib/waterdrop.rb +37 -0
- data/lib/datadog/tracing/contrib.rb +1 -0
- data/lib/datadog/tracing/transport/http/api.rb +73 -1
- data/lib/datadog/tracing/transport/http/client.rb +12 -26
- data/lib/datadog/tracing/transport/http/traces.rb +4 -2
- data/lib/datadog/tracing/transport/trace_formatter.rb +16 -0
- data/lib/datadog/version.rb +2 -2
- data/lib/datadog.rb +1 -0
- metadata +38 -15
- data/lib/datadog/core/cloudwise/IMPLEMENTATION_V2.md +0 -517
- data/lib/datadog/core/cloudwise/QUICKSTART.md +0 -398
- data/lib/datadog/core/cloudwise/README.md +0 -722
- data/lib/datadog/core/remote/transport/http/client.rb +0 -49
- data/lib/datadog/core/telemetry/transport/http/client.rb +0 -49
- data/lib/datadog/di/transport/http/client.rb +0 -47
|
@@ -6,6 +6,9 @@ require_relative 'host_id_worker'
|
|
|
6
6
|
require_relative 'heartbeat_worker'
|
|
7
7
|
require_relative 'license_worker'
|
|
8
8
|
require_relative 'app_registration_worker'
|
|
9
|
+
require_relative 'docc_registration_worker'
|
|
10
|
+
require_relative 'docc_heartbeat_worker'
|
|
11
|
+
require_relative 'docc_operation_worker'
|
|
9
12
|
|
|
10
13
|
module Datadog
|
|
11
14
|
module Core
|
|
@@ -42,7 +45,8 @@ module Datadog
|
|
|
42
45
|
# Main component that manages Cloudwise workers with proper initialization order
|
|
43
46
|
class Component
|
|
44
47
|
attr_reader :client, :probe_state, :host_id_worker, :heartbeat_worker,
|
|
45
|
-
:license_worker, :app_registration_worker, :logger
|
|
48
|
+
:license_worker, :app_registration_worker, :logger,
|
|
49
|
+
:docc_registration_worker, :docc_heartbeat_worker, :docc_operation_worker
|
|
46
50
|
|
|
47
51
|
# 类级别的单例锁,确保全局只初始化一次
|
|
48
52
|
@initialization_mutex = Mutex.new
|
|
@@ -69,36 +73,15 @@ module Datadog
|
|
|
69
73
|
# Probe state manager (shared across workers)
|
|
70
74
|
@probe_state = ProbeState.new(logger: logger)
|
|
71
75
|
|
|
72
|
-
#
|
|
73
|
-
@
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
client: client,
|
|
82
|
-
logger: logger,
|
|
83
|
-
probe_state: probe_state,
|
|
84
|
-
interval: settings.cloudwise.heartbeat_interval
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
# 3. Application Registration Worker (3 min interval, depends on Host ID)
|
|
88
|
-
@app_registration_worker = AppRegistrationWorker.new(
|
|
89
|
-
client: client,
|
|
90
|
-
logger: logger,
|
|
91
|
-
probe_state: probe_state,
|
|
92
|
-
interval: settings.cloudwise.app_registration_interval
|
|
93
|
-
)
|
|
94
|
-
|
|
95
|
-
# 4. License Worker (5 min interval, depends on Host ID)
|
|
96
|
-
@license_worker = LicenseWorker.new(
|
|
97
|
-
client: client,
|
|
98
|
-
logger: logger,
|
|
99
|
-
probe_state: probe_state,
|
|
100
|
-
interval: settings.cloudwise.license_check_interval
|
|
101
|
-
)
|
|
76
|
+
# 判断是否使用融合模式(DOCC)
|
|
77
|
+
if @client.use_integrated_mode?
|
|
78
|
+
Cloudwise.log_info { 'Cloudwise: Initializing in DOCC integrated mode' }
|
|
79
|
+
@probe_state.enable_docc_mode!
|
|
80
|
+
initialize_docc_workers(settings)
|
|
81
|
+
else
|
|
82
|
+
Cloudwise.log_info { 'Cloudwise: Initializing in traditional mode' }
|
|
83
|
+
initialize_traditional_workers(settings)
|
|
84
|
+
end
|
|
102
85
|
end
|
|
103
86
|
|
|
104
87
|
# Initialize Cloudwise asynchronously
|
|
@@ -120,7 +103,7 @@ module Datadog
|
|
|
120
103
|
Cloudwise.log_debug { 'Cloudwise Component already initialized globally' }
|
|
121
104
|
else
|
|
122
105
|
# 检查是否有已存在的单例实例
|
|
123
|
-
if self.class.singleton_instance &&
|
|
106
|
+
if self.class.singleton_instance && instance_already_started?
|
|
124
107
|
cloudwise_already_started = true
|
|
125
108
|
Cloudwise.log_debug { 'Initializing Datadog components for this Components instance...' }
|
|
126
109
|
else
|
|
@@ -145,12 +128,17 @@ module Datadog
|
|
|
145
128
|
Cloudwise.log_debug { 'Datadog components initialized (data collection pending ProbeState)' }
|
|
146
129
|
end
|
|
147
130
|
|
|
148
|
-
#
|
|
149
|
-
|
|
150
|
-
|
|
131
|
+
# 根据模式启动不同的 workers
|
|
132
|
+
if @client.use_integrated_mode?
|
|
133
|
+
start_docc_workers_when_ready
|
|
134
|
+
else
|
|
135
|
+
# Start Host ID Worker in background (will retry until success)
|
|
136
|
+
Cloudwise.log_debug { 'Starting Host ID generation worker (async, infinite retry)...' }
|
|
137
|
+
@host_id_worker.perform
|
|
151
138
|
|
|
152
|
-
|
|
153
|
-
|
|
139
|
+
# Start other Cloudwise workers in background
|
|
140
|
+
start_cloudwise_workers_when_ready
|
|
141
|
+
end
|
|
154
142
|
|
|
155
143
|
Cloudwise.log_debug { 'Cloudwise Component initialization started (async)' }
|
|
156
144
|
|
|
@@ -196,12 +184,20 @@ module Datadog
|
|
|
196
184
|
# Stop all workers
|
|
197
185
|
def stop
|
|
198
186
|
return unless @enabled
|
|
199
|
-
@host_id_worker&.stop(true)
|
|
200
|
-
@heartbeat_worker&.stop(true)
|
|
201
|
-
@license_worker&.stop(true)
|
|
202
|
-
@app_registration_worker&.stop(true)
|
|
203
187
|
|
|
204
|
-
|
|
188
|
+
if @client.use_integrated_mode?
|
|
189
|
+
@docc_heartbeat_worker&.stop(true)
|
|
190
|
+
@docc_registration_worker&.stop(true)
|
|
191
|
+
@docc_operation_worker&.stop(true)
|
|
192
|
+
@license_worker&.stop(true)
|
|
193
|
+
Cloudwise.log_debug { 'Cloudwise DOCC component stopped' }
|
|
194
|
+
else
|
|
195
|
+
@host_id_worker&.stop(true)
|
|
196
|
+
@heartbeat_worker&.stop(true)
|
|
197
|
+
@license_worker&.stop(true)
|
|
198
|
+
@app_registration_worker&.stop(true)
|
|
199
|
+
Cloudwise.log_debug { 'Cloudwise component stopped' }
|
|
200
|
+
end
|
|
205
201
|
end
|
|
206
202
|
|
|
207
203
|
def enabled?
|
|
@@ -220,25 +216,116 @@ module Datadog
|
|
|
220
216
|
return { enabled: false } unless @enabled
|
|
221
217
|
|
|
222
218
|
probe_status = probe_state.status
|
|
223
|
-
{
|
|
219
|
+
base_status = {
|
|
224
220
|
enabled: true,
|
|
225
|
-
|
|
226
|
-
host_id_generated: host_id_worker.host_id_generated?,
|
|
227
|
-
host_id_ready: probe_status[:host_id_ready],
|
|
228
|
-
heartbeat_active: probe_status[:heartbeat_active],
|
|
229
|
-
license_valid: probe_status[:license_valid],
|
|
230
|
-
app_registered: probe_status[:app_registered],
|
|
221
|
+
integrated_mode: @client.use_integrated_mode?,
|
|
231
222
|
can_collect_data: probe_status[:can_collect_data],
|
|
232
223
|
probe_active: probe_state.active?,
|
|
233
224
|
probe_suspended: probe_state.suspended?,
|
|
234
|
-
|
|
235
|
-
license_running: license_worker
|
|
236
|
-
app_registration_running: app_registration_worker.running?
|
|
225
|
+
license_valid: probe_status[:license_valid],
|
|
226
|
+
license_running: license_worker&.running?
|
|
237
227
|
}
|
|
228
|
+
|
|
229
|
+
if @client.use_integrated_mode?
|
|
230
|
+
base_status.merge(
|
|
231
|
+
docc_registered: probe_status[:docc_registered],
|
|
232
|
+
docc_heartbeat_active: probe_status[:docc_heartbeat_active],
|
|
233
|
+
docc_operation_active: probe_status[:docc_operation_active],
|
|
234
|
+
docc_heartbeat_running: docc_heartbeat_worker&.running?,
|
|
235
|
+
docc_registration_running: docc_registration_worker&.running?,
|
|
236
|
+
docc_operation_running: docc_operation_worker&.running?
|
|
237
|
+
)
|
|
238
|
+
else
|
|
239
|
+
base_status.merge(
|
|
240
|
+
account_id: client.account_id,
|
|
241
|
+
host_id_generated: host_id_worker&.host_id_generated?,
|
|
242
|
+
host_id_ready: probe_status[:host_id_ready],
|
|
243
|
+
heartbeat_active: probe_status[:heartbeat_active],
|
|
244
|
+
app_registered: probe_status[:app_registered],
|
|
245
|
+
heartbeat_running: heartbeat_worker&.running?,
|
|
246
|
+
app_registration_running: app_registration_worker&.running?
|
|
247
|
+
)
|
|
248
|
+
end
|
|
238
249
|
end
|
|
239
250
|
|
|
240
251
|
private
|
|
241
252
|
|
|
253
|
+
# 初始化传统模式的 workers
|
|
254
|
+
def initialize_traditional_workers(settings)
|
|
255
|
+
# 1. Host ID Worker (must run first, 30s retry)
|
|
256
|
+
@host_id_worker = HostIdWorker.new(
|
|
257
|
+
client: client,
|
|
258
|
+
logger: logger,
|
|
259
|
+
probe_state: probe_state
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
# 2. Heartbeat Worker (60s interval, depends on Host ID)
|
|
263
|
+
@heartbeat_worker = HeartbeatWorker.new(
|
|
264
|
+
client: client,
|
|
265
|
+
logger: logger,
|
|
266
|
+
probe_state: probe_state,
|
|
267
|
+
interval: settings.cloudwise.heartbeat_interval
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
# 3. Application Registration Worker (3 min interval, depends on Host ID)
|
|
271
|
+
@app_registration_worker = AppRegistrationWorker.new(
|
|
272
|
+
client: client,
|
|
273
|
+
logger: logger,
|
|
274
|
+
probe_state: probe_state,
|
|
275
|
+
interval: settings.cloudwise.app_registration_interval
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
# 4. License Worker (5 min interval, depends on Host ID)
|
|
279
|
+
@license_worker = LicenseWorker.new(
|
|
280
|
+
client: client,
|
|
281
|
+
logger: logger,
|
|
282
|
+
probe_state: probe_state,
|
|
283
|
+
interval: settings.cloudwise.license_check_interval
|
|
284
|
+
)
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
# 初始化 DOCC 模式的 workers
|
|
288
|
+
def initialize_docc_workers(settings)
|
|
289
|
+
# 1. DOCC Registration Worker (10 min interval)
|
|
290
|
+
@docc_registration_worker = DOCCRegistrationWorker.new(
|
|
291
|
+
client: client,
|
|
292
|
+
logger: logger,
|
|
293
|
+
probe_state: probe_state
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
# 2. DOCC Heartbeat Worker (30s interval)
|
|
297
|
+
@docc_heartbeat_worker = DOCCHeartbeatWorker.new(
|
|
298
|
+
client: client,
|
|
299
|
+
logger: logger,
|
|
300
|
+
probe_state: probe_state,
|
|
301
|
+
interval: settings.cloudwise.heartbeat_interval
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
# 3. DOCC Operation Worker (30s interval)
|
|
305
|
+
@docc_operation_worker = DOCCOperationWorker.new(
|
|
306
|
+
client: client,
|
|
307
|
+
logger: logger,
|
|
308
|
+
probe_state: probe_state
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
# 4. License Worker (5 min interval) - License 校验逻辑保持不变
|
|
312
|
+
@license_worker = LicenseWorker.new(
|
|
313
|
+
client: client,
|
|
314
|
+
logger: logger,
|
|
315
|
+
probe_state: probe_state,
|
|
316
|
+
interval: settings.cloudwise.license_check_interval
|
|
317
|
+
)
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
# 检查实例是否已经启动
|
|
321
|
+
def instance_already_started?
|
|
322
|
+
if @client.use_integrated_mode?
|
|
323
|
+
self.class.singleton_instance&.docc_heartbeat_worker&.started?
|
|
324
|
+
else
|
|
325
|
+
self.class.singleton_instance&.host_id_worker&.started?
|
|
326
|
+
end
|
|
327
|
+
end
|
|
328
|
+
|
|
242
329
|
# Start a background thread that waits for Host ID to be ready
|
|
243
330
|
# Then starts other Cloudwise workers
|
|
244
331
|
#
|
|
@@ -299,6 +386,62 @@ module Datadog
|
|
|
299
386
|
end
|
|
300
387
|
end
|
|
301
388
|
|
|
389
|
+
# Start DOCC workers in background
|
|
390
|
+
# DOCC 模式启动流程:
|
|
391
|
+
# 1. 启动 DOCC 心跳 worker
|
|
392
|
+
# 2. 等待第一次心跳成功
|
|
393
|
+
# 3. 启动 DOCC 注册 worker
|
|
394
|
+
# 4. 启动 License worker
|
|
395
|
+
# 5. 启动 DOCC 操作 worker
|
|
396
|
+
def start_docc_workers_when_ready
|
|
397
|
+
Thread.new do
|
|
398
|
+
Thread.current.name = 'Cloudwise-DOCC-Initializer'
|
|
399
|
+
|
|
400
|
+
# ============================================================
|
|
401
|
+
# STEP 1: Start DOCC Heartbeat worker
|
|
402
|
+
# ============================================================
|
|
403
|
+
Cloudwise.log_debug { 'Cloudwise DOCC: Starting heartbeat worker...' }
|
|
404
|
+
@docc_heartbeat_worker.start
|
|
405
|
+
|
|
406
|
+
# Wait for first successful heartbeat
|
|
407
|
+
Cloudwise.log_debug { 'Cloudwise DOCC: Waiting for first successful heartbeat...' }
|
|
408
|
+
until probe_state.docc_heartbeat_active?
|
|
409
|
+
sleep(1)
|
|
410
|
+
end
|
|
411
|
+
|
|
412
|
+
# ============================================================
|
|
413
|
+
# STEP 2: Start DOCC Registration worker
|
|
414
|
+
# ============================================================
|
|
415
|
+
Cloudwise.log_debug { 'Cloudwise DOCC: Starting registration worker...' }
|
|
416
|
+
@docc_registration_worker.start
|
|
417
|
+
|
|
418
|
+
# ============================================================
|
|
419
|
+
# STEP 3: Start License worker
|
|
420
|
+
# ============================================================
|
|
421
|
+
Cloudwise.log_debug { 'Cloudwise DOCC: Starting license worker...' }
|
|
422
|
+
@license_worker.start
|
|
423
|
+
|
|
424
|
+
# Wait for first successful license validation
|
|
425
|
+
until probe_state.license_valid?
|
|
426
|
+
sleep(1)
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
# ============================================================
|
|
430
|
+
# STEP 4: Start DOCC Operation worker
|
|
431
|
+
# ============================================================
|
|
432
|
+
Cloudwise.log_debug { 'Cloudwise DOCC: Starting operation worker...' }
|
|
433
|
+
@docc_operation_worker.start
|
|
434
|
+
|
|
435
|
+
# ============================================================
|
|
436
|
+
# All validations passed - Probe is now ACTIVE
|
|
437
|
+
# ============================================================
|
|
438
|
+
Cloudwise.log_debug { 'Cloudwise DOCC: Data collection and reporting now enabled.' }
|
|
439
|
+
rescue => e
|
|
440
|
+
Cloudwise.log_error { "Cloudwise DOCC: Error in background initializer: #{e.class.name} #{e.message}" }
|
|
441
|
+
Cloudwise.log_error { e.backtrace.join("\n") }
|
|
442
|
+
end
|
|
443
|
+
end
|
|
444
|
+
|
|
302
445
|
# Synchronously generate Host ID with infinite retry (used by HostIdWorker)
|
|
303
446
|
# This method will BLOCK indefinitely until Host ID is successfully generated
|
|
304
447
|
# Never returns false - only returns when successful
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../worker'
|
|
4
|
+
require_relative '../workers/async'
|
|
5
|
+
require_relative '../workers/polling'
|
|
6
|
+
|
|
7
|
+
module Datadog
|
|
8
|
+
module Core
|
|
9
|
+
module Cloudwise
|
|
10
|
+
# Worker that sends heartbeat to DOCC every 30 seconds with retry mechanism
|
|
11
|
+
# DOCC 心跳纳管 Worker
|
|
12
|
+
# 心跳失败 3 次后标记为无心跳,数据不采集
|
|
13
|
+
class DOCCHeartbeatWorker < Worker
|
|
14
|
+
include Workers::Polling
|
|
15
|
+
|
|
16
|
+
# 30 seconds interval
|
|
17
|
+
DEFAULT_INTERVAL = 30
|
|
18
|
+
# Max retries: 3 times
|
|
19
|
+
MAX_RETRIES = 3
|
|
20
|
+
# 成功的状态码
|
|
21
|
+
DOCC_CODE_SUCCESS = 100000
|
|
22
|
+
|
|
23
|
+
attr_reader :client
|
|
24
|
+
|
|
25
|
+
def initialize(client:, logger:, probe_state:, **options)
|
|
26
|
+
@client = client
|
|
27
|
+
@logger = logger
|
|
28
|
+
@probe_state = probe_state
|
|
29
|
+
@failure_count = 0
|
|
30
|
+
|
|
31
|
+
# Workers::Async::Thread settings
|
|
32
|
+
self.fork_policy = options.fetch(:fork_policy, Workers::Async::Thread::FORK_POLICY_STOP)
|
|
33
|
+
|
|
34
|
+
# Workers::IntervalLoop settings
|
|
35
|
+
self.loop_base_interval = options.fetch(:interval, DEFAULT_INTERVAL)
|
|
36
|
+
|
|
37
|
+
self.enabled = options.fetch(:enabled, true)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def perform
|
|
41
|
+
Cloudwise.log_debug { 'Cloudwise DOCC: Sending heartbeat' }
|
|
42
|
+
|
|
43
|
+
result = client.docc_heartbeat_ext
|
|
44
|
+
|
|
45
|
+
process_heartbeat_result(result)
|
|
46
|
+
|
|
47
|
+
true
|
|
48
|
+
rescue => e
|
|
49
|
+
Cloudwise.log_error { "Cloudwise DOCC: Heartbeat worker error: #{e.class.name} #{e.message}" }
|
|
50
|
+
handle_heartbeat_failure
|
|
51
|
+
true # Continue running
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Public method to start the worker
|
|
55
|
+
def start
|
|
56
|
+
return false if !enabled? || started?
|
|
57
|
+
|
|
58
|
+
# Start the async worker thread
|
|
59
|
+
perform
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
attr_reader :logger, :probe_state
|
|
65
|
+
|
|
66
|
+
def process_heartbeat_result(result)
|
|
67
|
+
# 接口请求失败
|
|
68
|
+
unless result[:success]
|
|
69
|
+
handle_heartbeat_failure
|
|
70
|
+
return
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
code = result[:code]
|
|
74
|
+
|
|
75
|
+
# 接口请求成功,但 code 不是 100000
|
|
76
|
+
if code != DOCC_CODE_SUCCESS
|
|
77
|
+
Cloudwise.log_warn { "Cloudwise DOCC: Heartbeat returned code #{code} (expected #{DOCC_CODE_SUCCESS})" }
|
|
78
|
+
Cloudwise.log_warn { "Cloudwise DOCC: Data collection suspended due to invalid heartbeat code" }
|
|
79
|
+
probe_state.mark_docc_heartbeat_inactive!
|
|
80
|
+
@failure_count = 0 # 重置失败计数
|
|
81
|
+
return
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# 成功且 code == 100000
|
|
85
|
+
Cloudwise.log_debug { 'Cloudwise DOCC: Heartbeat successful' }
|
|
86
|
+
probe_state.mark_docc_heartbeat_active!
|
|
87
|
+
@failure_count = 0 # 重置失败计数
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def handle_heartbeat_failure
|
|
91
|
+
@failure_count += 1
|
|
92
|
+
|
|
93
|
+
Cloudwise.log_warn { "Cloudwise DOCC: Heartbeat failed (#{@failure_count}/#{MAX_RETRIES})" }
|
|
94
|
+
|
|
95
|
+
if @failure_count >= MAX_RETRIES
|
|
96
|
+
Cloudwise.log_error { "Cloudwise DOCC: Heartbeat failed #{@failure_count} times, marking as inactive" }
|
|
97
|
+
Cloudwise.log_error { "Cloudwise DOCC: Data collection suspended due to heartbeat failure" }
|
|
98
|
+
probe_state.mark_docc_heartbeat_inactive!
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../worker'
|
|
4
|
+
require_relative '../workers/async'
|
|
5
|
+
require_relative '../workers/polling'
|
|
6
|
+
|
|
7
|
+
module Datadog
|
|
8
|
+
module Core
|
|
9
|
+
module Cloudwise
|
|
10
|
+
# Worker that fetches and executes operations from DOCC every 30 seconds
|
|
11
|
+
# DOCC 操作纳管 Worker(操作拉取和结果上报)
|
|
12
|
+
class DOCCOperationWorker < Worker
|
|
13
|
+
include Workers::Polling
|
|
14
|
+
|
|
15
|
+
# 30 seconds interval
|
|
16
|
+
DEFAULT_INTERVAL = 30
|
|
17
|
+
# 成功的状态码
|
|
18
|
+
DOCC_CODE_SUCCESS = 100000
|
|
19
|
+
|
|
20
|
+
# Operation types
|
|
21
|
+
OPERATION_AGENT_START = 'agent_start'
|
|
22
|
+
OPERATION_AGENT_STOP = 'agent_stop'
|
|
23
|
+
|
|
24
|
+
attr_reader :client
|
|
25
|
+
|
|
26
|
+
def initialize(client:, logger:, probe_state:, **options)
|
|
27
|
+
@client = client
|
|
28
|
+
@logger = logger
|
|
29
|
+
@probe_state = probe_state
|
|
30
|
+
|
|
31
|
+
# Workers::Async::Thread settings
|
|
32
|
+
self.fork_policy = options.fetch(:fork_policy, Workers::Async::Thread::FORK_POLICY_STOP)
|
|
33
|
+
|
|
34
|
+
# Workers::IntervalLoop settings
|
|
35
|
+
self.loop_base_interval = options.fetch(:interval, DEFAULT_INTERVAL)
|
|
36
|
+
|
|
37
|
+
self.enabled = options.fetch(:enabled, true)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def perform
|
|
41
|
+
Cloudwise.log_debug { 'Cloudwise DOCC: Fetching operations' }
|
|
42
|
+
|
|
43
|
+
result = client.docc_fetch_operation
|
|
44
|
+
|
|
45
|
+
process_operation_result(result)
|
|
46
|
+
|
|
47
|
+
true
|
|
48
|
+
rescue => e
|
|
49
|
+
Cloudwise.log_error { "Cloudwise DOCC: Operation worker error: #{e.class.name} #{e.message}" }
|
|
50
|
+
true # Continue running
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Public method to start the worker
|
|
54
|
+
def start
|
|
55
|
+
return false if !enabled? || started?
|
|
56
|
+
|
|
57
|
+
# Start the async worker thread
|
|
58
|
+
perform
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
private
|
|
62
|
+
|
|
63
|
+
attr_reader :logger, :probe_state
|
|
64
|
+
|
|
65
|
+
def process_operation_result(result)
|
|
66
|
+
# 接口请求失败
|
|
67
|
+
unless result[:success]
|
|
68
|
+
Cloudwise.log_error { "Cloudwise DOCC: Failed to fetch operations: #{result[:error]}" }
|
|
69
|
+
return
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
code = result[:code]
|
|
73
|
+
|
|
74
|
+
# 检查响应码
|
|
75
|
+
if code != DOCC_CODE_SUCCESS
|
|
76
|
+
Cloudwise.log_warn { "Cloudwise DOCC: Fetch operation returned code #{code} (expected #{DOCC_CODE_SUCCESS})" }
|
|
77
|
+
return
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# 获取任务数据(data 是一个数组)
|
|
81
|
+
tasks = result[:data]
|
|
82
|
+
return unless tasks && tasks.is_a?(Array)
|
|
83
|
+
|
|
84
|
+
# 如果没有任务,直接返回
|
|
85
|
+
if tasks.empty?
|
|
86
|
+
Cloudwise.log_debug { "Cloudwise DOCC: No operations to execute" }
|
|
87
|
+
return
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# 收集所有任务的执行结果
|
|
91
|
+
results = []
|
|
92
|
+
|
|
93
|
+
# 处理所有任务
|
|
94
|
+
tasks.each do |task_data|
|
|
95
|
+
task_result = process_single_task(task_data)
|
|
96
|
+
results << task_result if task_result
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# 批量上报结果
|
|
100
|
+
report_operation_results(results) unless results.empty?
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
def process_single_task(task_data)
|
|
104
|
+
return nil unless task_data.is_a?(Hash)
|
|
105
|
+
|
|
106
|
+
task_id = task_data['taskId']
|
|
107
|
+
operation = task_data['operate']
|
|
108
|
+
agent_instance_id = task_data['agentInstanceId']
|
|
109
|
+
|
|
110
|
+
Cloudwise.log_debug { "Cloudwise DOCC: Received operation - taskId: #{task_id}, operation: #{operation}" }
|
|
111
|
+
|
|
112
|
+
# 执行操作并返回结果
|
|
113
|
+
execute_operation(task_id, operation, agent_instance_id)
|
|
114
|
+
rescue => e
|
|
115
|
+
Cloudwise.log_error { "Cloudwise DOCC: Error processing task #{task_id}: #{e.class.name} #{e.message}" }
|
|
116
|
+
# 返回失败结果
|
|
117
|
+
create_operation_result(task_id, 'failed', "Error processing task: #{e.message}")
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def execute_operation(task_id, operation, agent_instance_id)
|
|
121
|
+
case operation
|
|
122
|
+
when OPERATION_AGENT_START
|
|
123
|
+
handle_agent_start(task_id)
|
|
124
|
+
when OPERATION_AGENT_STOP
|
|
125
|
+
handle_agent_stop(task_id)
|
|
126
|
+
else
|
|
127
|
+
Cloudwise.log_warn { "Cloudwise DOCC: Unknown operation type: #{operation}" }
|
|
128
|
+
create_operation_result(task_id, 'failed', "Unknown operation type: #{operation}")
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def handle_agent_start(task_id)
|
|
133
|
+
Cloudwise.log_info { 'Cloudwise DOCC: Executing agent_start operation' }
|
|
134
|
+
|
|
135
|
+
begin
|
|
136
|
+
# 启用数据采集和上报
|
|
137
|
+
probe_state.mark_docc_operation_active!
|
|
138
|
+
|
|
139
|
+
# 返回成功结果
|
|
140
|
+
create_operation_result(task_id, 'success', 'Agent started successfully')
|
|
141
|
+
rescue => e
|
|
142
|
+
Cloudwise.log_error { "Cloudwise DOCC: Failed to execute agent_start: #{e.message}" }
|
|
143
|
+
create_operation_result(task_id, 'failed', "Failed to start agent: #{e.message}")
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def handle_agent_stop(task_id)
|
|
148
|
+
Cloudwise.log_info { 'Cloudwise DOCC: Executing agent_stop operation' }
|
|
149
|
+
|
|
150
|
+
begin
|
|
151
|
+
# 禁用数据采集和上报
|
|
152
|
+
probe_state.mark_docc_operation_inactive!
|
|
153
|
+
|
|
154
|
+
# 返回成功结果
|
|
155
|
+
create_operation_result(task_id, 'success', 'Agent stopped successfully')
|
|
156
|
+
rescue => e
|
|
157
|
+
Cloudwise.log_error { "Cloudwise DOCC: Failed to execute agent_stop: #{e.message}" }
|
|
158
|
+
create_operation_result(task_id, 'failed', "Failed to stop agent: #{e.message}")
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# 创建单个操作结果
|
|
163
|
+
def create_operation_result(task_id, status, msg)
|
|
164
|
+
{
|
|
165
|
+
code: DOCC_CODE_SUCCESS,
|
|
166
|
+
taskId: task_id,
|
|
167
|
+
detail: {
|
|
168
|
+
msg: msg,
|
|
169
|
+
agent_instance_id: client.agent_instance_id,
|
|
170
|
+
agent_id: 'rubyagent'
|
|
171
|
+
},
|
|
172
|
+
status: status,
|
|
173
|
+
timestamp: Time.now.to_i * 1000
|
|
174
|
+
}
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# 批量上报操作结果
|
|
178
|
+
def report_operation_results(results)
|
|
179
|
+
result = client.docc_report_operation(results)
|
|
180
|
+
|
|
181
|
+
if result[:success] && result[:code] == DOCC_CODE_SUCCESS
|
|
182
|
+
Cloudwise.log_debug { "Cloudwise DOCC: Operation results reported successfully - #{results.size} task(s)" }
|
|
183
|
+
else
|
|
184
|
+
Cloudwise.log_error { "Cloudwise DOCC: Failed to report operation results - #{results.size} task(s)" }
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../worker'
|
|
4
|
+
require_relative '../workers/async'
|
|
5
|
+
require_relative '../workers/polling'
|
|
6
|
+
|
|
7
|
+
module Datadog
|
|
8
|
+
module Core
|
|
9
|
+
module Cloudwise
|
|
10
|
+
# Worker that registers extension to DOCC every 10 minutes
|
|
11
|
+
# DOCC 注册纳管 Worker
|
|
12
|
+
class DOCCRegistrationWorker < Worker
|
|
13
|
+
include Workers::Polling
|
|
14
|
+
|
|
15
|
+
# 10 minutes interval (600 seconds)
|
|
16
|
+
DEFAULT_INTERVAL = 600
|
|
17
|
+
# 成功的状态码
|
|
18
|
+
DOCC_CODE_SUCCESS = 100000
|
|
19
|
+
|
|
20
|
+
attr_reader :client
|
|
21
|
+
|
|
22
|
+
def initialize(client:, logger:, probe_state:, **options)
|
|
23
|
+
@client = client
|
|
24
|
+
@logger = logger
|
|
25
|
+
@probe_state = probe_state
|
|
26
|
+
|
|
27
|
+
# Workers::Async::Thread settings
|
|
28
|
+
self.fork_policy = options.fetch(:fork_policy, Workers::Async::Thread::FORK_POLICY_STOP)
|
|
29
|
+
|
|
30
|
+
# Workers::IntervalLoop settings
|
|
31
|
+
self.loop_base_interval = options.fetch(:interval, DEFAULT_INTERVAL)
|
|
32
|
+
|
|
33
|
+
self.enabled = options.fetch(:enabled, true)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def perform
|
|
37
|
+
Cloudwise.log_debug { 'Cloudwise DOCC: Registering extension' }
|
|
38
|
+
|
|
39
|
+
result = client.docc_register_ext
|
|
40
|
+
|
|
41
|
+
process_registration_result(result)
|
|
42
|
+
|
|
43
|
+
true
|
|
44
|
+
rescue => e
|
|
45
|
+
Cloudwise.log_error { "Cloudwise DOCC: Registration worker error: #{e.class.name} #{e.message}" }
|
|
46
|
+
true # Continue running
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Public method to start the worker
|
|
50
|
+
def start
|
|
51
|
+
return false if !enabled? || started?
|
|
52
|
+
|
|
53
|
+
# Start the async worker thread
|
|
54
|
+
perform
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
private
|
|
58
|
+
|
|
59
|
+
attr_reader :logger, :probe_state
|
|
60
|
+
|
|
61
|
+
def process_registration_result(result)
|
|
62
|
+
# 接口请求失败
|
|
63
|
+
unless result[:success]
|
|
64
|
+
Cloudwise.log_error { "Cloudwise DOCC: Registration failed: #{result[:error]}" }
|
|
65
|
+
return
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
code = result[:code]
|
|
69
|
+
|
|
70
|
+
# 注册成功 (code == 100000)
|
|
71
|
+
if code == DOCC_CODE_SUCCESS
|
|
72
|
+
instance_id = result.dig(:data, 'instanceId')
|
|
73
|
+
status = result.dig(:data, 'status')
|
|
74
|
+
|
|
75
|
+
Cloudwise.log_debug { "Cloudwise DOCC: Registration successful - instanceId: #{instance_id}, status: #{status}" }
|
|
76
|
+
|
|
77
|
+
# 标记注册成功
|
|
78
|
+
probe_state.mark_docc_registered!
|
|
79
|
+
return
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# 注册失败 (code != 100000)
|
|
83
|
+
Cloudwise.log_error { "Cloudwise DOCC: Registration failed with code #{code}" }
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|