cw-datadog 2.23.0.2 → 2.23.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. checksums.yaml +4 -4
  2. data/ext/datadog_profiling_native_extension/extconf.rb +4 -2
  3. data/ext/libdatadog_api/library_config.c +12 -11
  4. data/ext/libdatadog_extconf_helpers.rb +1 -1
  5. data/lib/datadog/appsec/api_security/route_extractor.rb +20 -5
  6. data/lib/datadog/appsec/api_security/sampler.rb +3 -1
  7. data/lib/datadog/appsec/assets/blocked.html +8 -0
  8. data/lib/datadog/appsec/assets/blocked.json +1 -1
  9. data/lib/datadog/appsec/assets/blocked.text +3 -1
  10. data/lib/datadog/appsec/assets.rb +1 -1
  11. data/lib/datadog/appsec/remote.rb +4 -0
  12. data/lib/datadog/appsec/response.rb +18 -4
  13. data/lib/datadog/core/cloudwise/client.rb +364 -25
  14. data/lib/datadog/core/cloudwise/component.rb +197 -52
  15. data/lib/datadog/core/cloudwise/docc_heartbeat_worker.rb +105 -0
  16. data/lib/datadog/core/cloudwise/docc_operation_worker.rb +191 -0
  17. data/lib/datadog/core/cloudwise/docc_registration_worker.rb +89 -0
  18. data/lib/datadog/core/cloudwise/license_worker.rb +3 -1
  19. data/lib/datadog/core/cloudwise/probe_state.rb +134 -12
  20. data/lib/datadog/core/configuration/components.rb +10 -9
  21. data/lib/datadog/core/configuration/settings.rb +28 -0
  22. data/lib/datadog/core/configuration/supported_configurations.rb +5 -2
  23. data/lib/datadog/core/remote/client/capabilities.rb +7 -0
  24. data/lib/datadog/core/remote/component.rb +2 -2
  25. data/lib/datadog/core/remote/transport/config.rb +2 -10
  26. data/lib/datadog/core/remote/transport/http/config.rb +9 -9
  27. data/lib/datadog/core/remote/transport/http/negotiation.rb +17 -8
  28. data/lib/datadog/core/remote/transport/http.rb +2 -0
  29. data/lib/datadog/core/remote/transport/negotiation.rb +2 -18
  30. data/lib/datadog/core/remote/worker.rb +23 -35
  31. data/lib/datadog/core/telemetry/component.rb +26 -13
  32. data/lib/datadog/core/telemetry/event/app_started.rb +67 -49
  33. data/lib/datadog/core/telemetry/event/synth_app_client_configuration_change.rb +27 -4
  34. data/lib/datadog/core/telemetry/transport/http/telemetry.rb +5 -6
  35. data/lib/datadog/core/telemetry/transport/telemetry.rb +1 -2
  36. data/lib/datadog/core/telemetry/worker.rb +51 -6
  37. data/lib/datadog/core/transport/http/adapters/net.rb +2 -0
  38. data/lib/datadog/core/transport/http/client.rb +69 -0
  39. data/lib/datadog/core/utils/only_once_successful.rb +6 -2
  40. data/lib/datadog/data_streams/transport/http/client.rb +4 -32
  41. data/lib/datadog/data_streams/transport/stats.rb +1 -1
  42. data/lib/datadog/di/probe_notification_builder.rb +35 -13
  43. data/lib/datadog/di/transport/diagnostics.rb +2 -2
  44. data/lib/datadog/di/transport/http/diagnostics.rb +2 -4
  45. data/lib/datadog/di/transport/http/input.rb +2 -4
  46. data/lib/datadog/di/transport/input.rb +2 -2
  47. data/lib/datadog/open_feature/component.rb +60 -0
  48. data/lib/datadog/open_feature/configuration.rb +27 -0
  49. data/lib/datadog/open_feature/evaluation_engine.rb +59 -0
  50. data/lib/datadog/open_feature/exposures/batch_builder.rb +32 -0
  51. data/lib/datadog/open_feature/exposures/buffer.rb +43 -0
  52. data/lib/datadog/open_feature/exposures/deduplicator.rb +30 -0
  53. data/lib/datadog/open_feature/exposures/event.rb +60 -0
  54. data/lib/datadog/open_feature/exposures/reporter.rb +40 -0
  55. data/lib/datadog/open_feature/exposures/worker.rb +116 -0
  56. data/lib/datadog/open_feature/ext.rb +13 -0
  57. data/lib/datadog/open_feature/noop_evaluator.rb +26 -0
  58. data/lib/datadog/open_feature/provider.rb +134 -0
  59. data/lib/datadog/open_feature/remote.rb +74 -0
  60. data/lib/datadog/open_feature/resolution_details.rb +35 -0
  61. data/lib/datadog/open_feature/transport.rb +72 -0
  62. data/lib/datadog/open_feature.rb +19 -0
  63. data/lib/datadog/profiling/component.rb +6 -0
  64. data/lib/datadog/profiling/profiler.rb +4 -0
  65. data/lib/datadog/profiling.rb +1 -2
  66. data/lib/datadog/single_step_instrument.rb +1 -1
  67. data/lib/datadog/tracing/contrib/cloudwise/propagation.rb +164 -7
  68. data/lib/datadog/tracing/contrib/graphql/unified_trace.rb +22 -17
  69. data/lib/datadog/tracing/contrib/karafka/framework.rb +30 -0
  70. data/lib/datadog/tracing/contrib/karafka/patcher.rb +14 -0
  71. data/lib/datadog/tracing/contrib/rack/middlewares.rb +6 -2
  72. data/lib/datadog/tracing/contrib/waterdrop/configuration/settings.rb +27 -0
  73. data/lib/datadog/tracing/contrib/waterdrop/distributed/propagation.rb +48 -0
  74. data/lib/datadog/tracing/contrib/waterdrop/ext.rb +17 -0
  75. data/lib/datadog/tracing/contrib/waterdrop/integration.rb +43 -0
  76. data/lib/datadog/tracing/contrib/waterdrop/middleware.rb +46 -0
  77. data/lib/datadog/tracing/contrib/waterdrop/patcher.rb +46 -0
  78. data/lib/datadog/tracing/contrib/waterdrop/producer.rb +50 -0
  79. data/lib/datadog/tracing/contrib/waterdrop.rb +37 -0
  80. data/lib/datadog/tracing/contrib.rb +1 -0
  81. data/lib/datadog/tracing/transport/http/api.rb +40 -1
  82. data/lib/datadog/tracing/transport/http/client.rb +12 -26
  83. data/lib/datadog/tracing/transport/http/traces.rb +4 -2
  84. data/lib/datadog/tracing/transport/trace_formatter.rb +16 -0
  85. data/lib/datadog/version.rb +2 -2
  86. data/lib/datadog.rb +1 -0
  87. metadata +38 -15
  88. data/lib/datadog/core/cloudwise/IMPLEMENTATION_V2.md +0 -517
  89. data/lib/datadog/core/cloudwise/QUICKSTART.md +0 -398
  90. data/lib/datadog/core/cloudwise/README.md +0 -722
  91. data/lib/datadog/core/remote/transport/http/client.rb +0 -49
  92. data/lib/datadog/core/telemetry/transport/http/client.rb +0 -49
  93. data/lib/datadog/di/transport/http/client.rb +0 -47
@@ -6,6 +6,9 @@ require_relative 'host_id_worker'
6
6
  require_relative 'heartbeat_worker'
7
7
  require_relative 'license_worker'
8
8
  require_relative 'app_registration_worker'
9
+ require_relative 'docc_registration_worker'
10
+ require_relative 'docc_heartbeat_worker'
11
+ require_relative 'docc_operation_worker'
9
12
 
10
13
  module Datadog
11
14
  module Core
@@ -42,7 +45,8 @@ module Datadog
42
45
  # Main component that manages Cloudwise workers with proper initialization order
43
46
  class Component
44
47
  attr_reader :client, :probe_state, :host_id_worker, :heartbeat_worker,
45
- :license_worker, :app_registration_worker, :logger
48
+ :license_worker, :app_registration_worker, :logger,
49
+ :docc_registration_worker, :docc_heartbeat_worker, :docc_operation_worker
46
50
 
47
51
  # 类级别的单例锁,确保全局只初始化一次
48
52
  @initialization_mutex = Mutex.new
@@ -69,36 +73,15 @@ module Datadog
69
73
  # Probe state manager (shared across workers)
70
74
  @probe_state = ProbeState.new(logger: logger)
71
75
 
72
- # 1. Host ID Worker (must run first, 30s retry)
73
- @host_id_worker = HostIdWorker.new(
74
- client: client,
75
- logger: logger,
76
- probe_state: probe_state
77
- )
78
-
79
- # 2. Heartbeat Worker (60s interval, depends on Host ID)
80
- @heartbeat_worker = HeartbeatWorker.new(
81
- client: client,
82
- logger: logger,
83
- probe_state: probe_state,
84
- interval: settings.cloudwise.heartbeat_interval
85
- )
86
-
87
- # 3. Application Registration Worker (3 min interval, depends on Host ID)
88
- @app_registration_worker = AppRegistrationWorker.new(
89
- client: client,
90
- logger: logger,
91
- probe_state: probe_state,
92
- interval: settings.cloudwise.app_registration_interval
93
- )
94
-
95
- # 4. License Worker (5 min interval, depends on Host ID)
96
- @license_worker = LicenseWorker.new(
97
- client: client,
98
- logger: logger,
99
- probe_state: probe_state,
100
- interval: settings.cloudwise.license_check_interval
101
- )
76
+ # 判断是否使用融合模式(DOCC)
77
+ if @client.use_integrated_mode?
78
+ Cloudwise.log_info { 'Cloudwise: Initializing in DOCC integrated mode' }
79
+ @probe_state.enable_docc_mode!
80
+ initialize_docc_workers(settings)
81
+ else
82
+ Cloudwise.log_info { 'Cloudwise: Initializing in traditional mode' }
83
+ initialize_traditional_workers(settings)
84
+ end
102
85
  end
103
86
 
104
87
  # Initialize Cloudwise asynchronously
@@ -120,7 +103,7 @@ module Datadog
120
103
  Cloudwise.log_debug { 'Cloudwise Component already initialized globally' }
121
104
  else
122
105
  # 检查是否有已存在的单例实例
123
- if self.class.singleton_instance && self.class.singleton_instance.host_id_worker.started?
106
+ if self.class.singleton_instance && instance_already_started?
124
107
  cloudwise_already_started = true
125
108
  Cloudwise.log_debug { 'Initializing Datadog components for this Components instance...' }
126
109
  else
@@ -145,12 +128,17 @@ module Datadog
145
128
  Cloudwise.log_debug { 'Datadog components initialized (data collection pending ProbeState)' }
146
129
  end
147
130
 
148
- # Start Host ID Worker in background (will retry until success)
149
- Cloudwise.log_debug { 'Starting Host ID generation worker (async, infinite retry)...' }
150
- @host_id_worker.perform
131
+ # 根据模式启动不同的 workers
132
+ if @client.use_integrated_mode?
133
+ start_docc_workers_when_ready
134
+ else
135
+ # Start Host ID Worker in background (will retry until success)
136
+ Cloudwise.log_debug { 'Starting Host ID generation worker (async, infinite retry)...' }
137
+ @host_id_worker.perform
151
138
 
152
- # Start other Cloudwise workers in background
153
- start_cloudwise_workers_when_ready
139
+ # Start other Cloudwise workers in background
140
+ start_cloudwise_workers_when_ready
141
+ end
154
142
 
155
143
  Cloudwise.log_debug { 'Cloudwise Component initialization started (async)' }
156
144
 
@@ -196,12 +184,20 @@ module Datadog
196
184
  # Stop all workers
197
185
  def stop
198
186
  return unless @enabled
199
- @host_id_worker&.stop(true)
200
- @heartbeat_worker&.stop(true)
201
- @license_worker&.stop(true)
202
- @app_registration_worker&.stop(true)
203
187
 
204
- Cloudwise.log_debug { 'Cloudwise component stopped' }
188
+ if @client.use_integrated_mode?
189
+ @docc_heartbeat_worker&.stop(true)
190
+ @docc_registration_worker&.stop(true)
191
+ @docc_operation_worker&.stop(true)
192
+ @license_worker&.stop(true)
193
+ Cloudwise.log_debug { 'Cloudwise DOCC component stopped' }
194
+ else
195
+ @host_id_worker&.stop(true)
196
+ @heartbeat_worker&.stop(true)
197
+ @license_worker&.stop(true)
198
+ @app_registration_worker&.stop(true)
199
+ Cloudwise.log_debug { 'Cloudwise component stopped' }
200
+ end
205
201
  end
206
202
 
207
203
  def enabled?
@@ -220,25 +216,118 @@ module Datadog
220
216
  return { enabled: false } unless @enabled
221
217
 
222
218
  probe_status = probe_state.status
223
- {
219
+ base_status = {
224
220
  enabled: true,
225
- account_id: client.account_id,
226
- host_id_generated: host_id_worker.host_id_generated?,
227
- host_id_ready: probe_status[:host_id_ready],
228
- heartbeat_active: probe_status[:heartbeat_active],
229
- license_valid: probe_status[:license_valid],
230
- app_registered: probe_status[:app_registered],
221
+ integrated_mode: @client.use_integrated_mode?,
231
222
  can_collect_data: probe_status[:can_collect_data],
232
223
  probe_active: probe_state.active?,
233
224
  probe_suspended: probe_state.suspended?,
234
- heartbeat_running: heartbeat_worker.running?,
235
- license_running: license_worker.running?,
236
- app_registration_running: app_registration_worker.running?
225
+ license_valid: probe_status[:license_valid],
226
+ license_running: license_worker&.running?
237
227
  }
228
+
229
+ if @client.use_integrated_mode?
230
+ base_status.merge(
231
+ docc_registered: probe_status[:docc_registered],
232
+ docc_heartbeat_active: probe_status[:docc_heartbeat_active],
233
+ docc_operation_active: probe_status[:docc_operation_active],
234
+ docc_heartbeat_running: docc_heartbeat_worker&.running?,
235
+ docc_registration_running: docc_registration_worker&.running?,
236
+ docc_operation_running: docc_operation_worker&.running?
237
+ )
238
+ else
239
+ base_status.merge(
240
+ account_id: client.account_id,
241
+ host_id_generated: host_id_worker&.host_id_generated?,
242
+ host_id_ready: probe_status[:host_id_ready],
243
+ heartbeat_active: probe_status[:heartbeat_active],
244
+ app_registered: probe_status[:app_registered],
245
+ heartbeat_running: heartbeat_worker&.running?,
246
+ app_registration_running: app_registration_worker&.running?
247
+ )
248
+ end
238
249
  end
239
250
 
240
251
  private
241
252
 
253
+ # 初始化传统模式的 workers
254
+ def initialize_traditional_workers(settings)
255
+ # 1. Host ID Worker (must run first, 30s retry)
256
+ @host_id_worker = HostIdWorker.new(
257
+ client: client,
258
+ logger: logger,
259
+ probe_state: probe_state
260
+ )
261
+
262
+ # 2. Heartbeat Worker (60s interval, depends on Host ID)
263
+ @heartbeat_worker = HeartbeatWorker.new(
264
+ client: client,
265
+ logger: logger,
266
+ probe_state: probe_state,
267
+ interval: settings.cloudwise.heartbeat_interval
268
+ )
269
+
270
+ # 3. Application Registration Worker (3 min interval, depends on Host ID)
271
+ @app_registration_worker = AppRegistrationWorker.new(
272
+ client: client,
273
+ logger: logger,
274
+ probe_state: probe_state,
275
+ interval: settings.cloudwise.app_registration_interval
276
+ )
277
+
278
+ # 4. License Worker (5 min interval, depends on Host ID)
279
+ @license_worker = LicenseWorker.new(
280
+ client: client,
281
+ logger: logger,
282
+ probe_state: probe_state,
283
+ interval: settings.cloudwise.license_check_interval
284
+ )
285
+ end
286
+
287
+ # 初始化 DOCC 模式的 workers
288
+ def initialize_docc_workers(settings)
289
+ # 1. DOCC Registration Worker (10 min interval)
290
+ @docc_registration_worker = DOCCRegistrationWorker.new(
291
+ client: client,
292
+ logger: logger,
293
+ probe_state: probe_state,
294
+ interval: settings.cloudwise.docc_registration_interval
295
+ )
296
+
297
+ # 2. DOCC Heartbeat Worker (30s interval)
298
+ @docc_heartbeat_worker = DOCCHeartbeatWorker.new(
299
+ client: client,
300
+ logger: logger,
301
+ probe_state: probe_state,
302
+ interval: settings.cloudwise.heartbeat_interval
303
+ )
304
+
305
+ # 3. DOCC Operation Worker (30s interval)
306
+ @docc_operation_worker = DOCCOperationWorker.new(
307
+ client: client,
308
+ logger: logger,
309
+ probe_state: probe_state,
310
+ interval: settings.cloudwise.docc_operation_interval
311
+ )
312
+
313
+ # 4. License Worker (5 min interval) - License 校验逻辑保持不变
314
+ @license_worker = LicenseWorker.new(
315
+ client: client,
316
+ logger: logger,
317
+ probe_state: probe_state,
318
+ interval: settings.cloudwise.license_check_interval
319
+ )
320
+ end
321
+
322
+ # 检查实例是否已经启动
323
+ def instance_already_started?
324
+ if @client.use_integrated_mode?
325
+ self.class.singleton_instance&.docc_heartbeat_worker&.started?
326
+ else
327
+ self.class.singleton_instance&.host_id_worker&.started?
328
+ end
329
+ end
330
+
242
331
  # Start a background thread that waits for Host ID to be ready
243
332
  # Then starts other Cloudwise workers
244
333
  #
@@ -299,6 +388,62 @@ module Datadog
299
388
  end
300
389
  end
301
390
 
391
+ # Start DOCC workers in background
392
+ # DOCC 模式启动流程:
393
+ # 1. 启动 DOCC 心跳 worker
394
+ # 2. 等待第一次心跳成功
395
+ # 3. 启动 DOCC 注册 worker
396
+ # 4. 启动 License worker
397
+ # 5. 启动 DOCC 操作 worker
398
+ def start_docc_workers_when_ready
399
+ Thread.new do
400
+ Thread.current.name = 'Cloudwise-DOCC-Initializer'
401
+
402
+ # ============================================================
403
+ # STEP 1: Start DOCC Heartbeat worker
404
+ # ============================================================
405
+ Cloudwise.log_debug { 'Cloudwise DOCC: Starting heartbeat worker...' }
406
+ @docc_heartbeat_worker.start
407
+
408
+ # Wait for first successful heartbeat
409
+ Cloudwise.log_debug { 'Cloudwise DOCC: Waiting for first successful heartbeat...' }
410
+ until probe_state.docc_heartbeat_active?
411
+ sleep(1)
412
+ end
413
+
414
+ # ============================================================
415
+ # STEP 2: Start DOCC Registration worker
416
+ # ============================================================
417
+ Cloudwise.log_debug { 'Cloudwise DOCC: Starting registration worker...' }
418
+ @docc_registration_worker.start
419
+
420
+ # ============================================================
421
+ # STEP 3: Start License worker
422
+ # ============================================================
423
+ Cloudwise.log_debug { 'Cloudwise DOCC: Starting license worker...' }
424
+ @license_worker.start
425
+
426
+ # Wait for first successful license validation
427
+ until probe_state.license_valid?
428
+ sleep(1)
429
+ end
430
+
431
+ # ============================================================
432
+ # STEP 4: Start DOCC Operation worker
433
+ # ============================================================
434
+ Cloudwise.log_debug { 'Cloudwise DOCC: Starting operation worker...' }
435
+ @docc_operation_worker.start
436
+
437
+ # ============================================================
438
+ # All validations passed - Probe is now ACTIVE
439
+ # ============================================================
440
+ Cloudwise.log_debug { 'Cloudwise DOCC: Data collection and reporting now enabled.' }
441
+ rescue => e
442
+ Cloudwise.log_error { "Cloudwise DOCC: Error in background initializer: #{e.class.name} #{e.message}" }
443
+ Cloudwise.log_error { e.backtrace.join("\n") }
444
+ end
445
+ end
446
+
302
447
  # Synchronously generate Host ID with infinite retry (used by HostIdWorker)
303
448
  # This method will BLOCK indefinitely until Host ID is successfully generated
304
449
  # Never returns false - only returns when successful
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../worker'
4
+ require_relative '../workers/async'
5
+ require_relative '../workers/polling'
6
+
7
+ module Datadog
8
+ module Core
9
+ module Cloudwise
10
+ # Worker that sends heartbeat to DOCC every 30 seconds with retry mechanism
11
+ # DOCC 心跳纳管 Worker
12
+ # 心跳失败 3 次后标记为无心跳,数据不采集
13
+ class DOCCHeartbeatWorker < Worker
14
+ include Workers::Polling
15
+
16
+ # 30 seconds interval
17
+ DEFAULT_INTERVAL = 30
18
+ # Max retries: 3 times
19
+ MAX_RETRIES = 3
20
+ # 成功的状态码
21
+ DOCC_CODE_SUCCESS = 100000
22
+
23
+ attr_reader :client
24
+
25
+ def initialize(client:, logger:, probe_state:, **options)
26
+ @client = client
27
+ @logger = logger
28
+ @probe_state = probe_state
29
+ @failure_count = 0
30
+
31
+ # Workers::Async::Thread settings
32
+ self.fork_policy = options.fetch(:fork_policy, Workers::Async::Thread::FORK_POLICY_STOP)
33
+
34
+ # Workers::IntervalLoop settings
35
+ self.loop_base_interval = options.fetch(:interval, DEFAULT_INTERVAL)
36
+
37
+ self.enabled = options.fetch(:enabled, true)
38
+ end
39
+
40
+ def perform
41
+ Cloudwise.log_debug { 'Cloudwise DOCC: Sending heartbeat' }
42
+
43
+ result = client.docc_heartbeat_ext
44
+
45
+ process_heartbeat_result(result)
46
+
47
+ true
48
+ rescue => e
49
+ Cloudwise.log_error { "Cloudwise DOCC: Heartbeat worker error: #{e.class.name} #{e.message}" }
50
+ handle_heartbeat_failure
51
+ true # Continue running
52
+ end
53
+
54
+ # Public method to start the worker
55
+ def start
56
+ return false if !enabled? || started?
57
+
58
+ # Start the async worker thread
59
+ perform
60
+ end
61
+
62
+ private
63
+
64
+ attr_reader :logger, :probe_state
65
+
66
+ def process_heartbeat_result(result)
67
+ # 接口请求失败
68
+ unless result[:success]
69
+ handle_heartbeat_failure
70
+ return
71
+ end
72
+
73
+ code = result[:code]
74
+
75
+ # 接口请求成功,但 code 不是 100000
76
+ if code != DOCC_CODE_SUCCESS
77
+ Cloudwise.log_warn { "Cloudwise DOCC: Heartbeat returned code #{code} (expected #{DOCC_CODE_SUCCESS})" }
78
+ Cloudwise.log_warn { "Cloudwise DOCC: Data collection suspended due to invalid heartbeat code" }
79
+ probe_state.mark_docc_heartbeat_inactive!
80
+ @failure_count = 0 # 重置失败计数
81
+ return
82
+ end
83
+
84
+ # 成功且 code == 100000
85
+ Cloudwise.log_debug { 'Cloudwise DOCC: Heartbeat successful' }
86
+ probe_state.mark_docc_heartbeat_active!
87
+ @failure_count = 0 # 重置失败计数
88
+ end
89
+
90
+ def handle_heartbeat_failure
91
+ @failure_count += 1
92
+
93
+ Cloudwise.log_warn { "Cloudwise DOCC: Heartbeat failed (#{@failure_count}/#{MAX_RETRIES})" }
94
+
95
+ if @failure_count >= MAX_RETRIES
96
+ Cloudwise.log_error { "Cloudwise DOCC: Heartbeat failed #{@failure_count} times, marking as inactive" }
97
+ Cloudwise.log_error { "Cloudwise DOCC: Data collection suspended due to heartbeat failure" }
98
+ probe_state.mark_docc_heartbeat_inactive!
99
+ end
100
+ end
101
+ end
102
+ end
103
+ end
104
+ end
105
+
@@ -0,0 +1,191 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../worker'
4
+ require_relative '../workers/async'
5
+ require_relative '../workers/polling'
6
+
7
+ module Datadog
8
+ module Core
9
+ module Cloudwise
10
+ # Worker that fetches and executes operations from DOCC every 30 seconds
11
+ # DOCC 操作纳管 Worker(操作拉取和结果上报)
12
+ class DOCCOperationWorker < Worker
13
+ include Workers::Polling
14
+
15
+ # 30 seconds interval
16
+ DEFAULT_INTERVAL = 30
17
+ # 成功的状态码
18
+ DOCC_CODE_SUCCESS = 100000
19
+
20
+ # Operation types
21
+ OPERATION_AGENT_START = 'agent_start'
22
+ OPERATION_AGENT_STOP = 'agent_stop'
23
+
24
+ attr_reader :client
25
+
26
+ def initialize(client:, logger:, probe_state:, **options)
27
+ @client = client
28
+ @logger = logger
29
+ @probe_state = probe_state
30
+
31
+ # Workers::Async::Thread settings
32
+ self.fork_policy = options.fetch(:fork_policy, Workers::Async::Thread::FORK_POLICY_STOP)
33
+
34
+ # Workers::IntervalLoop settings
35
+ self.loop_base_interval = options.fetch(:interval, DEFAULT_INTERVAL)
36
+
37
+ self.enabled = options.fetch(:enabled, true)
38
+ end
39
+
40
+ def perform
41
+ Cloudwise.log_debug { 'Cloudwise DOCC: Fetching operations' }
42
+
43
+ result = client.docc_fetch_operation
44
+
45
+ process_operation_result(result)
46
+
47
+ true
48
+ rescue => e
49
+ Cloudwise.log_error { "Cloudwise DOCC: Operation worker error: #{e.class.name} #{e.message}" }
50
+ true # Continue running
51
+ end
52
+
53
+ # Public method to start the worker
54
+ def start
55
+ return false if !enabled? || started?
56
+
57
+ # Start the async worker thread
58
+ perform
59
+ end
60
+
61
+ private
62
+
63
+ attr_reader :logger, :probe_state
64
+
65
+ def process_operation_result(result)
66
+ # 接口请求失败
67
+ unless result[:success]
68
+ Cloudwise.log_error { "Cloudwise DOCC: Failed to fetch operations: #{result[:error]}" }
69
+ return
70
+ end
71
+
72
+ code = result[:code]
73
+
74
+ # 检查响应码
75
+ if code != DOCC_CODE_SUCCESS
76
+ Cloudwise.log_warn { "Cloudwise DOCC: Fetch operation returned code #{code} (expected #{DOCC_CODE_SUCCESS})" }
77
+ return
78
+ end
79
+
80
+ # 获取任务数据(data 是一个数组)
81
+ tasks = result[:data]
82
+ return unless tasks && tasks.is_a?(Array)
83
+
84
+ # 如果没有任务,直接返回
85
+ if tasks.empty?
86
+ Cloudwise.log_debug { "Cloudwise DOCC: No operations to execute" }
87
+ return
88
+ end
89
+
90
+ # 收集所有任务的执行结果
91
+ results = []
92
+
93
+ # 处理所有任务
94
+ tasks.each do |task_data|
95
+ task_result = process_single_task(task_data)
96
+ results << task_result if task_result
97
+ end
98
+
99
+ # 批量上报结果
100
+ report_operation_results(results) unless results.empty?
101
+ end
102
+
103
+ def process_single_task(task_data)
104
+ return nil unless task_data.is_a?(Hash)
105
+
106
+ task_id = task_data['taskId']
107
+ operation = task_data['operate']
108
+ agent_instance_id = task_data['agentInstanceId']
109
+
110
+ Cloudwise.log_debug { "Cloudwise DOCC: Received operation - taskId: #{task_id}, operation: #{operation}" }
111
+
112
+ # 执行操作并返回结果
113
+ execute_operation(task_id, operation, agent_instance_id)
114
+ rescue => e
115
+ Cloudwise.log_error { "Cloudwise DOCC: Error processing task #{task_id}: #{e.class.name} #{e.message}" }
116
+ # 返回失败结果
117
+ create_operation_result(task_id, 'failed', "Error processing task: #{e.message}")
118
+ end
119
+
120
+ def execute_operation(task_id, operation, agent_instance_id)
121
+ case operation
122
+ when OPERATION_AGENT_START
123
+ handle_agent_start(task_id)
124
+ when OPERATION_AGENT_STOP
125
+ handle_agent_stop(task_id)
126
+ else
127
+ Cloudwise.log_warn { "Cloudwise DOCC: Unknown operation type: #{operation}" }
128
+ create_operation_result(task_id, 'failed', "Unknown operation type: #{operation}")
129
+ end
130
+ end
131
+
132
+ def handle_agent_start(task_id)
133
+ Cloudwise.log_info { 'Cloudwise DOCC: Executing agent_start operation' }
134
+
135
+ begin
136
+ # 启用数据采集和上报
137
+ probe_state.mark_docc_operation_active!
138
+
139
+ # 返回成功结果
140
+ create_operation_result(task_id, 'success', 'Agent started successfully')
141
+ rescue => e
142
+ Cloudwise.log_error { "Cloudwise DOCC: Failed to execute agent_start: #{e.message}" }
143
+ create_operation_result(task_id, 'failed', "Failed to start agent: #{e.message}")
144
+ end
145
+ end
146
+
147
+ def handle_agent_stop(task_id)
148
+ Cloudwise.log_info { 'Cloudwise DOCC: Executing agent_stop operation' }
149
+
150
+ begin
151
+ # 禁用数据采集和上报
152
+ probe_state.mark_docc_operation_inactive!
153
+
154
+ # 返回成功结果
155
+ create_operation_result(task_id, 'success', 'Agent stopped successfully')
156
+ rescue => e
157
+ Cloudwise.log_error { "Cloudwise DOCC: Failed to execute agent_stop: #{e.message}" }
158
+ create_operation_result(task_id, 'failed', "Failed to stop agent: #{e.message}")
159
+ end
160
+ end
161
+
162
+ # 创建单个操作结果
163
+ def create_operation_result(task_id, status, msg)
164
+ {
165
+ code: DOCC_CODE_SUCCESS,
166
+ taskId: task_id,
167
+ detail: {
168
+ msg: msg,
169
+ agent_instance_id: client.agent_instance_id,
170
+ agent_id: 'rubyagent'
171
+ },
172
+ status: status,
173
+ timestamp: Time.now.to_i * 1000
174
+ }
175
+ end
176
+
177
+ # 批量上报操作结果
178
+ def report_operation_results(results)
179
+ result = client.docc_report_operation(results)
180
+
181
+ if result[:success] && result[:code] == DOCC_CODE_SUCCESS
182
+ Cloudwise.log_debug { "Cloudwise DOCC: Operation results reported successfully - #{results.size} task(s)" }
183
+ else
184
+ Cloudwise.log_error { "Cloudwise DOCC: Failed to report operation results - #{results.size} task(s)" }
185
+ end
186
+ end
187
+ end
188
+ end
189
+ end
190
+ end
191
+
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../worker'
4
+ require_relative '../workers/async'
5
+ require_relative '../workers/polling'
6
+
7
+ module Datadog
8
+ module Core
9
+ module Cloudwise
10
+ # Worker that registers extension to DOCC every 10 minutes
11
+ # DOCC 注册纳管 Worker
12
+ class DOCCRegistrationWorker < Worker
13
+ include Workers::Polling
14
+
15
+ # 10 minutes interval (600 seconds)
16
+ DEFAULT_INTERVAL = 600
17
+ # 成功的状态码
18
+ DOCC_CODE_SUCCESS = 100000
19
+
20
+ attr_reader :client
21
+
22
+ def initialize(client:, logger:, probe_state:, **options)
23
+ @client = client
24
+ @logger = logger
25
+ @probe_state = probe_state
26
+
27
+ # Workers::Async::Thread settings
28
+ self.fork_policy = options.fetch(:fork_policy, Workers::Async::Thread::FORK_POLICY_STOP)
29
+
30
+ # Workers::IntervalLoop settings
31
+ self.loop_base_interval = options.fetch(:interval, DEFAULT_INTERVAL)
32
+
33
+ self.enabled = options.fetch(:enabled, true)
34
+ end
35
+
36
+ def perform
37
+ Cloudwise.log_debug { 'Cloudwise DOCC: Registering extension' }
38
+
39
+ result = client.docc_register_ext
40
+
41
+ process_registration_result(result)
42
+
43
+ true
44
+ rescue => e
45
+ Cloudwise.log_error { "Cloudwise DOCC: Registration worker error: #{e.class.name} #{e.message}" }
46
+ true # Continue running
47
+ end
48
+
49
+ # Public method to start the worker
50
+ def start
51
+ return false if !enabled? || started?
52
+
53
+ # Start the async worker thread
54
+ perform
55
+ end
56
+
57
+ private
58
+
59
+ attr_reader :logger, :probe_state
60
+
61
+ def process_registration_result(result)
62
+ # 接口请求失败
63
+ unless result[:success]
64
+ Cloudwise.log_error { "Cloudwise DOCC: Registration failed: #{result[:error]}" }
65
+ return
66
+ end
67
+
68
+ code = result[:code]
69
+
70
+ # 注册成功 (code == 100000)
71
+ if code == DOCC_CODE_SUCCESS
72
+ instance_id = result.dig(:data, 'instanceId')
73
+ status = result.dig(:data, 'status')
74
+
75
+ Cloudwise.log_debug { "Cloudwise DOCC: Registration successful - instanceId: #{instance_id}, status: #{status}" }
76
+
77
+ # 标记注册成功
78
+ probe_state.mark_docc_registered!
79
+ return
80
+ end
81
+
82
+ # 注册失败 (code != 100000)
83
+ Cloudwise.log_error { "Cloudwise DOCC: Registration failed with code #{code}" }
84
+ end
85
+ end
86
+ end
87
+ end
88
+ end
89
+