drtrace 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +74 -4
  2. package/agents/CONTRIBUTING.md +296 -0
  3. package/agents/README.md +174 -0
  4. package/agents/daemon-method-selection.md +370 -0
  5. package/agents/integration-guides/cpp-best-practices.md +218 -0
  6. package/agents/integration-guides/cpp-ros-integration.md +88 -0
  7. package/agents/log-analysis.md +218 -0
  8. package/agents/log-help.md +226 -0
  9. package/agents/log-init.md +933 -0
  10. package/agents/log-it.md +1126 -0
  11. package/bin/init.js +4 -4
  12. package/dist/bin/init.js +31 -0
  13. package/dist/browser.d.ts +28 -0
  14. package/dist/browser.js +91 -0
  15. package/dist/config-schema.d.ts +2 -2
  16. package/dist/index.d.ts +1 -1
  17. package/dist/index.js +2 -2
  18. package/dist/init.d.ts +44 -2
  19. package/dist/init.js +460 -30
  20. package/dist/logger.d.ts +7 -0
  21. package/dist/logger.js +30 -4
  22. package/dist/node.d.ts +13 -0
  23. package/dist/node.js +67 -0
  24. package/dist/resources/agents/CONTRIBUTING.md +296 -0
  25. package/dist/resources/agents/README.md +174 -0
  26. package/dist/resources/agents/daemon-method-selection.md +370 -0
  27. package/dist/resources/agents/integration-guides/cpp-best-practices.md +218 -0
  28. package/dist/resources/agents/integration-guides/cpp-ros-integration.md +88 -0
  29. package/dist/resources/agents/log-analysis.md +218 -0
  30. package/dist/resources/agents/log-help.md +226 -0
  31. package/dist/resources/agents/log-init.md +933 -0
  32. package/dist/resources/agents/log-it.md +1126 -0
  33. package/dist/resources/cpp/drtrace_sink.hpp +1249 -0
  34. package/dist/transport.js +5 -1
  35. package/dist/types.d.ts +8 -2
  36. package/package.json +28 -4
  37. package/.eslintrc.js +0 -20
  38. package/jest.config.js +0 -11
  39. package/src/client.ts +0 -68
  40. package/src/config-schema.ts +0 -115
  41. package/src/config.ts +0 -326
  42. package/src/index.ts +0 -3
  43. package/src/init.ts +0 -451
  44. package/src/logger.ts +0 -56
  45. package/src/queue.ts +0 -105
  46. package/src/transport.ts +0 -60
  47. package/src/types.ts +0 -20
  48. package/tests/client.test.ts +0 -66
  49. package/tests/config-schema.test.ts +0 -198
  50. package/tests/config.test.ts +0 -456
  51. package/tests/queue.test.ts +0 -72
  52. package/tests/transport.test.ts +0 -52
  53. package/tsconfig.json +0 -18
@@ -0,0 +1,1249 @@
1
+ /**
2
+ * DrTrace C++ Client Integration
3
+ *
4
+ * A spdlog sink that enriches log records and sends them to the DrTrace daemon
5
+ * via HTTP POST, matching the unified schema from Story 4.1.
6
+ */
7
+
8
+ #pragma once
9
+ #define DRTRACE_VERSION "0.4.0"
10
+
11
+ // Standard library includes required for header-only implementation
12
+ #include <atomic>
13
+ #include <chrono>
14
+ #include <condition_variable>
15
+ #include <cstdlib>
16
+ #include <fstream>
17
+ #include <iomanip>
18
+ #include <iostream>
19
+ #include <map>
20
+ #include <memory>
21
+ #include <mutex>
22
+ #include <regex>
23
+ #include <sstream>
24
+ #include <string>
25
+ #include <thread>
26
+ #include <vector>
27
+
28
+ // libcurl for HTTP transport
29
+ #include <curl/curl.h>
30
+
31
+ // spdlog includes (optional - only needed for spdlog adapter)
32
+ // By default, try to detect spdlog availability (if headers are present)
33
+ // Users can explicitly disable with DRTRACE_DISABLE_SPDLOG
34
+ #ifndef DRTRACE_DISABLE_SPDLOG
35
+ #if __has_include(<spdlog/spdlog.h>)
36
+ #include <spdlog/details/log_msg.h>
37
+ #include <spdlog/sinks/base_sink.h>
38
+ #include <spdlog/spdlog.h>
39
+ #define DRTRACE_SPDLOG_AVAILABLE 1
40
+ #else
41
+ #define DRTRACE_SPDLOG_AVAILABLE 0
42
+ #endif
43
+ #else
44
+ // Explicitly disabled
45
+ #define DRTRACE_SPDLOG_AVAILABLE 0
46
+ #endif
47
+
48
+ namespace drtrace {
49
+
50
+ // Log level enum - defined here for use in DrtraceConfig
51
+ // Also available via core::LogLevel (defined later as alias)
52
+ namespace core {
53
+ enum class LogLevel {
54
+ DEBUG = 0,
55
+ INFO = 1,
56
+ WARN = 2,
57
+ ERROR = 3,
58
+ CRITICAL = 4
59
+ };
60
+ }
61
+
62
+ /**
63
+ * Parse log level from string (case-insensitive).
64
+ * Returns DEBUG on invalid input (backward compatible).
65
+ */
66
+ inline core::LogLevel parse_log_level(const char* str) {
67
+ if (!str || str[0] == '\0') return core::LogLevel::DEBUG;
68
+
69
+ std::string level(str);
70
+ // Convert to lowercase
71
+ for (auto& c : level) {
72
+ c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
73
+ }
74
+
75
+ if (level == "debug") return core::LogLevel::DEBUG;
76
+ if (level == "info") return core::LogLevel::INFO;
77
+ if (level == "warn" || level == "warning") return core::LogLevel::WARN;
78
+ if (level == "error") return core::LogLevel::ERROR;
79
+ if (level == "critical") return core::LogLevel::CRITICAL;
80
+
81
+ return core::LogLevel::DEBUG; // Default on invalid input
82
+ }
83
+
84
+ /**
85
+ * Configuration for the DrTrace C++ client.
86
+ */
87
+ struct DrtraceConfig {
88
+ std::string application_id;
89
+ std::string daemon_url = "http://localhost:8001/logs/ingest";
90
+ std::string service_name;
91
+ bool enabled = true;
92
+ size_t batch_size = 10;
93
+ std::chrono::milliseconds flush_interval{5000}; // 5 seconds
94
+ std::chrono::milliseconds circuit_reset_interval{30000}; // 30 seconds - circuit breaker cooldown
95
+
96
+ /**
97
+ * Maximum number of log records to buffer.
98
+ * When exceeded, oldest logs are dropped (backpressure).
99
+ * Set to 0 for unlimited (not recommended for production).
100
+ * Default: 10000
101
+ */
102
+ size_t max_buffer_size = 10000;
103
+
104
+ /**
105
+ * Minimum log level to send to daemon.
106
+ * Logs below this level are filtered at the client (not sent).
107
+ * Default: DEBUG (send everything - backward compatible)
108
+ */
109
+ core::LogLevel min_level = core::LogLevel::DEBUG;
110
+
111
+ /**
112
+ * HTTP request timeout in milliseconds.
113
+ * Default: 1000 (1 second)
114
+ */
115
+ std::chrono::milliseconds http_timeout{1000};
116
+
117
+ /**
118
+ * Base backoff time for retry attempts.
119
+ * Actual backoff = base_backoff * attempt_number
120
+ * Default: 100ms
121
+ */
122
+ std::chrono::milliseconds retry_backoff{100};
123
+
124
+ /**
125
+ * Maximum retry attempts for failed requests.
126
+ * Default: 3
127
+ */
128
+ int max_retries = 3;
129
+
130
+ /**
131
+ * Load configuration from environment variables, with fallback to config file.
132
+ *
133
+ * Priority (highest to lowest):
134
+ * 1. DRTRACE_APPLICATION_ID environment variable
135
+ * 2. _drtrace/config.json file (application_id field)
136
+ *
137
+ * Required:
138
+ * - DRTRACE_APPLICATION_ID (env var) OR application_id in _drtrace/config.json
139
+ *
140
+ * Optional:
141
+ * - DRTRACE_DAEMON_URL (default: http://localhost:8001/logs/ingest)
142
+ * - DRTRACE_SERVICE_NAME
143
+ * - DRTRACE_ENABLED (default: true, set to "false" to disable)
144
+ */
145
+ static DrtraceConfig from_env();
146
+ };
147
+
148
+ namespace detail {
149
+ // Reference counter for curl_global_init
150
+ // curl_global_init is idempotent (safe to call multiple times)
151
+ // We use reference counting to ensure it's initialized, but never call
152
+ // curl_global_cleanup (should only be called at program termination)
153
+ inline std::atomic<int>& curl_init_ref_count() {
154
+ static std::atomic<int> count{0};
155
+ return count;
156
+ }
157
+
158
+ inline std::mutex& curl_init_mutex() {
159
+ static std::mutex mtx;
160
+ return mtx;
161
+ }
162
+
163
+ // Initialize curl once (thread-safe, idempotent)
164
+ inline void ensure_curl_initialized() {
165
+ std::lock_guard<std::mutex> lock(curl_init_mutex());
166
+ if (curl_init_ref_count().fetch_add(1) == 0) {
167
+ curl_global_init(CURL_GLOBAL_DEFAULT);
168
+ }
169
+ }
170
+ }
171
+
172
+ /**
173
+ * HTTP transport for sending log batches to the daemon.
174
+ *
175
+ * Uses libcurl for HTTP POST requests. Handles retries and errors
176
+ * gracefully without throwing exceptions.
177
+ */
178
+ class HttpTransport {
179
+ public:
180
+ inline explicit HttpTransport(const DrtraceConfig& config);
181
+ inline ~HttpTransport();
182
+
183
+ // Non-copyable
184
+ HttpTransport(const HttpTransport&) = delete;
185
+ HttpTransport& operator=(const HttpTransport&) = delete;
186
+
187
+ /**
188
+ * Send a batch of log records to the daemon.
189
+ *
190
+ * This method is thread-safe and handles network errors gracefully.
191
+ * Returns true if the batch was sent successfully, false otherwise.
192
+ *
193
+ * Circuit Breaker Behavior:
194
+ * - When daemon is unavailable, circuit opens and fast-fails (< 1µs)
195
+ * - After circuit_reset_interval, one probe request is allowed
196
+ * - On success, circuit closes; on failure, circuit stays open
197
+ */
198
+ inline bool send_batch(const std::vector<std::string>& log_records);
199
+
200
+ /**
201
+ * Check if circuit breaker is open (for testing).
202
+ */
203
+ bool is_circuit_open_for_test() const {
204
+ return is_circuit_open();
205
+ }
206
+
207
+ private:
208
+ std::string endpoint_;
209
+ std::string application_id_;
210
+ int max_retries_;
211
+ std::chrono::milliseconds base_backoff_ms_;
212
+ std::chrono::milliseconds http_timeout_;
213
+
214
+ void* curl_handle_ = nullptr; // CURL* handle
215
+
216
+ // Thread safety: protect curl_handle_ access
217
+ std::mutex curl_mutex_;
218
+ std::atomic<bool> shutdown_flag_{false};
219
+
220
+ // Circuit breaker state - atomic for thread safety
221
+ // States: CLOSED (normal) -> OPEN (fast-fail) -> HALF-OPEN (probe) -> CLOSED/OPEN
222
+ std::atomic<bool> circuit_open_{false};
223
+ std::atomic<int64_t> circuit_open_until_ms_{0};
224
+ std::chrono::milliseconds circuit_reset_interval_{30000};
225
+
226
+ /**
227
+ * Get current time in milliseconds since epoch.
228
+ */
229
+ int64_t now_ms() const {
230
+ return std::chrono::duration_cast<std::chrono::milliseconds>(
231
+ std::chrono::steady_clock::now().time_since_epoch()
232
+ ).count();
233
+ }
234
+
235
+ /**
236
+ * Check if circuit is open (should fast-fail).
237
+ * Returns false if circuit is closed or cooldown has expired (half-open).
238
+ */
239
+ bool is_circuit_open() const {
240
+ if (!circuit_open_.load(std::memory_order_acquire)) {
241
+ return false; // Fast path - circuit closed
242
+ }
243
+ // Check if cooldown expired (half-open state - allow probe request)
244
+ if (now_ms() >= circuit_open_until_ms_.load(std::memory_order_acquire)) {
245
+ return false; // Allow probe request
246
+ }
247
+ return true; // Fast-fail
248
+ }
249
+
250
+ /**
251
+ * Open the circuit (daemon unavailable).
252
+ * Sets cooldown timer to circuit_reset_interval from now.
253
+ */
254
+ void open_circuit() {
255
+ circuit_open_until_ms_.store(
256
+ now_ms() + circuit_reset_interval_.count(),
257
+ std::memory_order_release
258
+ );
259
+ circuit_open_.store(true, std::memory_order_release);
260
+ }
261
+
262
+ /**
263
+ * Close the circuit (daemon is available).
264
+ */
265
+ void close_circuit() {
266
+ circuit_open_.store(false, std::memory_order_release);
267
+ }
268
+
269
+ // Wait for any in-flight operations to complete
270
+ inline void wait_for_operations();
271
+ };
272
+
273
+ // =========================
274
+ // Core Components (spdlog-independent)
275
+ // =========================
276
+
277
+ namespace core {
278
+
279
+ // LogLevel enum is defined at the top of the drtrace namespace
280
+ // (before DrtraceConfig, so it can be used in config)
281
+ // core::LogLevel is available from there
282
+
283
+ /**
284
+ * Source location information (optional).
285
+ */
286
+ struct SourceLocation {
287
+ std::string filename; // Use std::string for memory safety (copies strings)
288
+ int line = 0;
289
+ std::string function; // Use std::string for memory safety (copies strings)
290
+ };
291
+
292
+ /**
293
+ * Log record structure (spdlog-independent).
294
+ */
295
+ struct LogRecord {
296
+ LogLevel level;
297
+ std::string message;
298
+ std::string logger_name;
299
+ std::chrono::system_clock::time_point timestamp;
300
+ SourceLocation source;
301
+
302
+ // Additional context (optional)
303
+ std::map<std::string, std::string> context;
304
+ };
305
+
306
+ /**
307
+ * Core DrTrace logging engine (spdlog-independent).
308
+ *
309
+ * Handles:
310
+ * - Serialization of LogRecord to JSON
311
+ * - Batching records
312
+ * - Flushing batches via HttpTransport
313
+ * - Thread-safe operations
314
+ */
315
+ class DrtraceCore {
316
+ public:
317
+ explicit DrtraceCore(const DrtraceConfig& config);
318
+ ~DrtraceCore();
319
+
320
+ // Non-copyable
321
+ DrtraceCore(const DrtraceCore&) = delete;
322
+ DrtraceCore& operator=(const DrtraceCore&) = delete;
323
+
324
+ /**
325
+ * Log a record (thread-safe).
326
+ */
327
+ void log(const LogRecord& record);
328
+
329
+ /**
330
+ * Flush pending records immediately.
331
+ */
332
+ void flush();
333
+
334
+ /**
335
+ * Check if enabled.
336
+ */
337
+ bool is_enabled() const { return config_.enabled; }
338
+
339
+ private:
340
+ const DrtraceConfig& config_;
341
+ std::unique_ptr<HttpTransport> transport_;
342
+ std::vector<std::string> batch_; // JSON strings
343
+ std::mutex batch_mutex_;
344
+
345
+ // Flush thread management
346
+ std::thread flush_thread_;
347
+ std::mutex flush_mutex_;
348
+ std::condition_variable flush_cv_;
349
+ bool should_stop_ = false;
350
+ bool flush_thread_running_ = false;
351
+
352
+ /**
353
+ * Serialize LogRecord to JSON string (unified schema).
354
+ */
355
+ std::string serialize_record(const LogRecord& record);
356
+
357
+ /**
358
+ * Escape JSON string.
359
+ */
360
+ std::string escape_json(const std::string& str);
361
+
362
+ /**
363
+ * Flush batch to daemon (internal, thread-safe).
364
+ */
365
+ void flush_internal();
366
+
367
+ /**
368
+ * Start background flush thread.
369
+ */
370
+ void start_flush_thread();
371
+
372
+ /**
373
+ * Stop background flush thread.
374
+ */
375
+ void stop_flush_thread();
376
+
377
+ /**
378
+ * Flush thread function.
379
+ */
380
+ void flush_thread_func();
381
+ };
382
+
383
+ } // namespace core
384
+
385
+ // =========================
386
+ // spdlog Adapter (Optional - requires spdlog)
387
+ // =========================
388
+
389
+ #if DRTRACE_SPDLOG_AVAILABLE
390
+
391
+ /**
392
+ * spdlog sink adapter for DrTrace.
393
+ *
394
+ * This sink converts spdlog log messages to core::LogRecord
395
+ * and forwards them to DrtraceCore.
396
+ *
397
+ * API surface unchanged from previous implementation (for consistency).
398
+ */
399
+ template <typename Mutex>
400
+ class DrtraceSink : public spdlog::sinks::base_sink<Mutex> {
401
+ public:
402
+ explicit DrtraceSink(const DrtraceConfig& config)
403
+ : config_(config), core_(std::make_unique<core::DrtraceCore>(config)) {
404
+ }
405
+
406
+ ~DrtraceSink() {
407
+ // Flush any remaining records
408
+ this->flush_();
409
+ }
410
+
411
+ protected:
412
+ void sink_it_(const spdlog::details::log_msg& msg) override {
413
+ if (!core_ || !core_->is_enabled()) {
414
+ return;
415
+ }
416
+
417
+ // Convert spdlog log_msg to core::LogRecord
418
+ core::LogRecord record = convert_to_log_record(msg);
419
+
420
+ // Delegate to core (thread-safe)
421
+ core_->log(record);
422
+ }
423
+
424
+ void flush_() override {
425
+ if (core_) {
426
+ core_->flush();
427
+ }
428
+ }
429
+
430
+ protected:
431
+ /**
432
+ * Convert spdlog log_msg to core::LogRecord.
433
+ * Protected for testing purposes.
434
+ */
435
+ core::LogRecord convert_to_log_record(const spdlog::details::log_msg& msg) {
436
+ core::LogRecord record;
437
+
438
+ // Map spdlog level to core::LogLevel
439
+ switch (msg.level) {
440
+ case spdlog::level::trace:
441
+ case spdlog::level::debug:
442
+ record.level = core::LogLevel::DEBUG;
443
+ break;
444
+ case spdlog::level::info:
445
+ record.level = core::LogLevel::INFO;
446
+ break;
447
+ case spdlog::level::warn:
448
+ record.level = core::LogLevel::WARN;
449
+ break;
450
+ case spdlog::level::err:
451
+ record.level = core::LogLevel::ERROR;
452
+ break;
453
+ case spdlog::level::critical:
454
+ record.level = core::LogLevel::CRITICAL;
455
+ break;
456
+ default:
457
+ record.level = core::LogLevel::INFO;
458
+ break;
459
+ }
460
+
461
+ // Copy message and logger name
462
+ record.message = std::string(msg.payload.data(), msg.payload.size());
463
+ record.logger_name = std::string(msg.logger_name.data(), msg.logger_name.size());
464
+
465
+ // Set timestamp
466
+ record.timestamp = msg.time;
467
+
468
+ // Set source location (copy strings from spdlog string views for memory safety)
469
+ if (msg.source.filename) {
470
+ record.source.filename = std::string(msg.source.filename);
471
+ }
472
+ record.source.line = msg.source.line;
473
+ if (msg.source.funcname) {
474
+ record.source.function = std::string(msg.source.funcname);
475
+ }
476
+
477
+ // Add thread ID to context
478
+ std::ostringstream thread_id_str;
479
+ thread_id_str << std::this_thread::get_id();
480
+ record.context["thread_id"] = thread_id_str.str();
481
+
482
+ return record;
483
+ }
484
+
485
+ private:
486
+ DrtraceConfig config_;
487
+ std::unique_ptr<core::DrtraceCore> core_;
488
+ };
489
+
490
+ // Convenience type aliases
491
+ using DrtraceSink_mt = DrtraceSink<std::mutex>; // Multi-threaded
492
+ using DrtraceSink_st = DrtraceSink<spdlog::details::null_mutex>; // Single-threaded
493
+
494
+ /**
495
+ * Setup DrTrace integration for an existing spdlog logger.
496
+ *
497
+ * This adds a DrtraceSink to the logger without removing existing sinks.
498
+ */
499
+ inline void setup_drtrace(std::shared_ptr<spdlog::logger> logger,
500
+ const DrtraceConfig& config);
501
+
502
+ /**
503
+ * Create a new spdlog logger with DrTrace integration enabled.
504
+ */
505
+ inline std::shared_ptr<spdlog::logger> create_drtrace_logger(
506
+ const std::string& logger_name, const DrtraceConfig& config);
507
+
508
+ #endif // DRTRACE_SPDLOG_AVAILABLE
509
+
510
+ // =========================
511
+ // Direct API (No spdlog required)
512
+ // =========================
513
+
514
+ /**
515
+ * Direct DrTrace client API (no spdlog required).
516
+ *
517
+ * Usage:
518
+ * drtrace::DrtraceClient client(config);
519
+ * client.info("Application started");
520
+ * client.error("Something went wrong", __FILE__, __LINE__);
521
+ */
522
+ class DrtraceClient {
523
+ public:
524
+ explicit DrtraceClient(const DrtraceConfig& config,
525
+ const std::string& logger_name = "default")
526
+ : config_(config), logger_name_(logger_name),
527
+ core_(std::make_unique<core::DrtraceCore>(config)) {
528
+ }
529
+
530
+ ~DrtraceClient() {
531
+ // Flush any remaining records
532
+ if (core_) {
533
+ core_->flush();
534
+ }
535
+ }
536
+
537
+ // Non-copyable
538
+ DrtraceClient(const DrtraceClient&) = delete;
539
+ DrtraceClient& operator=(const DrtraceClient&) = delete;
540
+
541
+ /**
542
+ * Log a message.
543
+ *
544
+ * @param level Log level
545
+ * @param message Log message
546
+ * @param filename Optional source filename (for __FILE__)
547
+ * @param line Optional source line (for __LINE__)
548
+ * @param function Optional function name (for __FUNCTION__)
549
+ */
550
+ void log(core::LogLevel level,
551
+ const std::string& message,
552
+ const char* filename = nullptr,
553
+ int line = 0,
554
+ const char* function = nullptr) {
555
+ if (!core_ || !core_->is_enabled()) {
556
+ return;
557
+ }
558
+
559
+ core::LogRecord record;
560
+ record.level = level;
561
+ record.message = message;
562
+ record.logger_name = logger_name_;
563
+ record.timestamp = std::chrono::system_clock::now();
564
+ // Copy strings for memory safety (filename/function may be temporary)
565
+ if (filename) {
566
+ record.source.filename = filename;
567
+ }
568
+ record.source.line = line;
569
+ if (function) {
570
+ record.source.function = function;
571
+ }
572
+
573
+ // Add thread ID to context
574
+ std::ostringstream thread_id_str;
575
+ thread_id_str << std::this_thread::get_id();
576
+ record.context["thread_id"] = thread_id_str.str();
577
+
578
+ core_->log(record);
579
+ }
580
+
581
+ /**
582
+ * Convenience methods for each log level.
583
+ */
584
+ void debug(const std::string& message,
585
+ const char* filename = nullptr,
586
+ int line = 0,
587
+ const char* function = nullptr) {
588
+ log(core::LogLevel::DEBUG, message, filename, line, function);
589
+ }
590
+
591
+ void info(const std::string& message,
592
+ const char* filename = nullptr,
593
+ int line = 0,
594
+ const char* function = nullptr) {
595
+ log(core::LogLevel::INFO, message, filename, line, function);
596
+ }
597
+
598
+ void warn(const std::string& message,
599
+ const char* filename = nullptr,
600
+ int line = 0,
601
+ const char* function = nullptr) {
602
+ log(core::LogLevel::WARN, message, filename, line, function);
603
+ }
604
+
605
+ void error(const std::string& message,
606
+ const char* filename = nullptr,
607
+ int line = 0,
608
+ const char* function = nullptr) {
609
+ log(core::LogLevel::ERROR, message, filename, line, function);
610
+ }
611
+
612
+ void critical(const std::string& message,
613
+ const char* filename = nullptr,
614
+ int line = 0,
615
+ const char* function = nullptr) {
616
+ log(core::LogLevel::CRITICAL, message, filename, line, function);
617
+ }
618
+
619
+ /**
620
+ * Flush pending logs.
621
+ */
622
+ void flush() {
623
+ if (core_) {
624
+ core_->flush();
625
+ }
626
+ }
627
+
628
+ /**
629
+ * Check if enabled.
630
+ */
631
+ bool is_enabled() const {
632
+ return core_ && core_->is_enabled();
633
+ }
634
+
635
+ private:
636
+ DrtraceConfig config_;
637
+ std::string logger_name_;
638
+ std::unique_ptr<core::DrtraceCore> core_;
639
+ };
640
+
641
+ // =========================
642
+ // Inline Implementations
643
+ // =========================
644
+
645
+ namespace detail {
646
+
647
+ // Helper for libcurl write callback
648
+ struct WriteData {
649
+ std::string data;
650
+ };
651
+
652
+ inline size_t WriteCallback(void* contents, size_t size, size_t nmemb,
653
+ void* userp) {
654
+ size_t total_size = size * nmemb;
655
+ WriteData* write_data = static_cast<WriteData*>(userp);
656
+ write_data->data.append(static_cast<char*>(contents), total_size);
657
+ return total_size;
658
+ }
659
+
660
+ /**
661
+ * Read application_id from _drtrace/config.json file.
662
+ *
663
+ * This is a simple JSON parser that extracts the "application_id" field.
664
+ * Returns empty string if file doesn't exist or field is not found.
665
+ */
666
+ inline std::string read_application_id_from_config(const std::string& config_path) {
667
+ std::ifstream file(config_path);
668
+ if (!file.is_open()) {
669
+ return "";
670
+ }
671
+
672
+ // Read entire file into a string
673
+ std::string content((std::istreambuf_iterator<char>(file)),
674
+ std::istreambuf_iterator<char>());
675
+ file.close();
676
+
677
+ // Simple regex-based extraction for "application_id": "value"
678
+ // Handles both "application_id" and "applicationId" (camelCase)
679
+ std::regex pattern(
680
+ R"delim("application_id"\s*:\s*"([^"]+)"|"applicationId"\s*:\s*"([^"]+)")delim");
681
+ std::smatch match;
682
+
683
+ if (std::regex_search(content, match, pattern)) {
684
+ // Return the first non-empty capture group
685
+ return match[1].matched ? match[1].str() : match[2].str();
686
+ }
687
+
688
+ // Also try nested drtrace.applicationId format
689
+ std::regex nested_pattern(
690
+ R"delim("drtrace"\s*:\s*\{[^}]*"applicationId"\s*:\s*"([^"]+)")delim");
691
+ if (std::regex_search(content, match, nested_pattern)) {
692
+ return match[1].str();
693
+ }
694
+
695
+ return "";
696
+ }
697
+
698
+ } // namespace detail
699
+
700
+ // DrtraceConfig::from_env inline implementation
701
+ inline DrtraceConfig DrtraceConfig::from_env() {
702
+ DrtraceConfig config;
703
+
704
+ // Priority 1: Try environment variable first
705
+ const char* app_id = std::getenv("DRTRACE_APPLICATION_ID");
706
+
707
+ // Priority 2: Fall back to _drtrace/config.json if env var not set
708
+ if (!app_id) {
709
+ // Try to find config file relative to current working directory
710
+ // Look for _drtrace/config.json in current directory
711
+ std::string config_path = "_drtrace/config.json";
712
+ std::string app_id_from_config =
713
+ detail::read_application_id_from_config(config_path);
714
+
715
+ if (!app_id_from_config.empty()) {
716
+ config.application_id = app_id_from_config;
717
+ } else {
718
+ // Priority 3: Final fallback to default value (ensures application never crashes)
719
+ // CRITICAL: Must use same default value as Python and JavaScript: "my-app"
720
+ config.application_id = "my-app";
721
+ // Optional: std::cerr << "Warning: Using default application_id 'my-app'. "
722
+ // << "Set DRTRACE_APPLICATION_ID or _drtrace/config.json to customize." << std::endl;
723
+ }
724
+ } else {
725
+ config.application_id = app_id;
726
+ }
727
+
728
+ const char* daemon_url = std::getenv("DRTRACE_DAEMON_URL");
729
+ if (daemon_url) {
730
+ config.daemon_url = daemon_url;
731
+ }
732
+
733
+ const char* service_name = std::getenv("DRTRACE_SERVICE_NAME");
734
+ if (service_name) {
735
+ config.service_name = service_name;
736
+ }
737
+
738
+ const char* enabled = std::getenv("DRTRACE_ENABLED");
739
+ if (enabled && std::string(enabled) == "false") {
740
+ config.enabled = false;
741
+ }
742
+
743
+ // Circuit breaker reset interval (milliseconds)
744
+ const char* circuit_reset_ms = std::getenv("DRTRACE_CIRCUIT_RESET_MS");
745
+ if (circuit_reset_ms) {
746
+ try {
747
+ long ms = std::stol(circuit_reset_ms);
748
+ if (ms > 0) {
749
+ config.circuit_reset_interval = std::chrono::milliseconds(ms);
750
+ }
751
+ } catch (...) {
752
+ // Invalid value, use default
753
+ }
754
+ }
755
+
756
+ // Maximum buffer size (backpressure)
757
+ const char* max_buffer = std::getenv("DRTRACE_MAX_BUFFER_SIZE");
758
+ if (max_buffer) {
759
+ try {
760
+ long size = std::stol(max_buffer);
761
+ if (size >= 0) {
762
+ config.max_buffer_size = static_cast<size_t>(size);
763
+ }
764
+ } catch (...) {
765
+ // Invalid value, use default
766
+ }
767
+ }
768
+
769
+ // Minimum log level (filtering)
770
+ const char* min_level = std::getenv("DRTRACE_MIN_LEVEL");
771
+ if (min_level) {
772
+ config.min_level = parse_log_level(min_level);
773
+ }
774
+
775
+ // HTTP timeout (milliseconds)
776
+ const char* http_timeout = std::getenv("DRTRACE_HTTP_TIMEOUT_MS");
777
+ if (http_timeout) {
778
+ try {
779
+ long ms = std::stol(http_timeout);
780
+ if (ms > 0) {
781
+ config.http_timeout = std::chrono::milliseconds(ms);
782
+ }
783
+ } catch (...) {
784
+ // Invalid value, use default
785
+ }
786
+ }
787
+
788
+ // Retry backoff (milliseconds)
789
+ const char* retry_backoff = std::getenv("DRTRACE_RETRY_BACKOFF_MS");
790
+ if (retry_backoff) {
791
+ try {
792
+ long ms = std::stol(retry_backoff);
793
+ if (ms > 0) {
794
+ config.retry_backoff = std::chrono::milliseconds(ms);
795
+ }
796
+ } catch (...) {
797
+ // Invalid value, use default
798
+ }
799
+ }
800
+
801
+ // Max retries
802
+ const char* max_retries = std::getenv("DRTRACE_MAX_RETRIES");
803
+ if (max_retries) {
804
+ try {
805
+ int retries = std::stoi(max_retries);
806
+ if (retries >= 0) {
807
+ config.max_retries = retries;
808
+ }
809
+ } catch (...) {
810
+ // Invalid value, use default
811
+ }
812
+ }
813
+
814
+ return config;
815
+ }
816
+
817
+ // HttpTransport inline implementations
818
+ inline HttpTransport::HttpTransport(const DrtraceConfig& config)
819
+ : endpoint_(config.daemon_url),
820
+ application_id_(config.application_id),
821
+ max_retries_(config.max_retries),
822
+ base_backoff_ms_(config.retry_backoff),
823
+ http_timeout_(config.http_timeout),
824
+ circuit_reset_interval_(config.circuit_reset_interval) {
825
+ // Ensure curl is initialized (thread-safe, idempotent)
826
+ detail::ensure_curl_initialized();
827
+
828
+ curl_handle_ = curl_easy_init();
829
+ if (!curl_handle_) {
830
+ std::cerr << "Warning: Failed to initialize libcurl for drtrace transport"
831
+ << std::endl;
832
+ }
833
+ }
834
+
835
+ inline void HttpTransport::wait_for_operations() {
836
+ // Wait for any in-flight send_batch() operations to complete.
837
+ // We use a short timeout and retry mechanism to avoid hanging
838
+ // if a network operation is blocked.
839
+ //
840
+ // IMPORTANT: Even if this function times out, the destructor's lock_guard
841
+ // will still block until the mutex is available. This ensures that:
842
+ // 1. If operations complete quickly, we return early (optimization)
843
+ // 2. If operations are slow, we don't wait forever (timeout protection)
844
+ // 3. The destructor's lock_guard ensures operations complete before cleanup
845
+ //
846
+ // This design provides both performance (early return) and safety
847
+ // (guaranteed wait via lock_guard).
848
+ auto start = std::chrono::steady_clock::now();
849
+ auto timeout = start + std::chrono::milliseconds(500); // 500ms timeout
850
+
851
+ while (std::chrono::steady_clock::now() < timeout) {
852
+ // Try to acquire lock - if successful, no operations are in progress
853
+ if (curl_mutex_.try_lock()) {
854
+ curl_mutex_.unlock();
855
+ return; // No operations in progress
856
+ }
857
+ // Lock is held by send_batch(), wait a bit
858
+ std::this_thread::sleep_for(std::chrono::milliseconds(10));
859
+ }
860
+
861
+ // Timeout reached - operations may still be in progress, but we proceed anyway.
862
+ // The destructor's lock_guard will still block until operations complete,
863
+ // ensuring thread safety even if this timeout is reached.
864
+ }
865
+
866
+ inline HttpTransport::~HttpTransport() {
867
+ // Shutdown sequence:
868
+ // 1. Set shutdown flag to prevent new operations from starting
869
+ shutdown_flag_.store(true);
870
+
871
+ // 2. Wait for any in-flight send_batch() operations to complete
872
+ // (with timeout to prevent hanging if network I/O is blocked)
873
+ wait_for_operations();
874
+
875
+ // 3. Acquire mutex before cleanup. This lock_guard will block until
876
+ // any remaining operations release the mutex, ensuring curl_handle_
877
+ // is never accessed after cleanup begins.
878
+ std::lock_guard<std::mutex> lock(curl_mutex_);
879
+
880
+ // 4. Now safe to cleanup curl_handle_ (no operations can be using it)
881
+ if (curl_handle_) {
882
+ curl_easy_cleanup(curl_handle_);
883
+ curl_handle_ = nullptr;
884
+ }
885
+
886
+ // Decrement reference count
887
+ // Note: We don't call curl_global_cleanup here - it should only be called
888
+ // at program termination, not in destructors. curl_global_init is idempotent
889
+ // and safe to call multiple times, so leaving it initialized is fine.
890
+ detail::curl_init_ref_count().fetch_sub(1);
891
+ }
892
+
893
+ inline bool HttpTransport::send_batch(
894
+ const std::vector<std::string>& log_records) {
895
+ // Shutdown flag check sequence:
896
+ // 1. Check BEFORE acquiring lock (fast path - avoids lock acquisition if shutdown)
897
+ if (shutdown_flag_.load()) {
898
+ return false;
899
+ }
900
+
901
+ if (log_records.empty()) {
902
+ return false;
903
+ }
904
+
905
+ // Circuit breaker fast-fail check (< 1 microsecond)
906
+ // This is checked BEFORE any network operations to provide fast-fail behavior
907
+ if (is_circuit_open()) {
908
+ return false; // Fast-fail - daemon known to be unavailable
909
+ }
910
+
911
+ // Build JSON payload matching Python client format
912
+ std::ostringstream payload;
913
+ payload << "{\"application_id\":\"" << application_id_ << "\",\"logs\":[";
914
+ for (size_t i = 0; i < log_records.size(); ++i) {
915
+ if (i > 0) {
916
+ payload << ",";
917
+ }
918
+ payload << log_records[i];
919
+ }
920
+ payload << "]}";
921
+
922
+ std::string payload_str = payload.str();
923
+
924
+ // 2. Acquire lock to check and use curl_handle_
925
+ // This ensures only one thread accesses curl_handle_ at a time
926
+ std::lock_guard<std::mutex> lock(curl_mutex_);
927
+
928
+ // 3. Check AGAIN after acquiring lock (shutdown may have happened between checks)
929
+ // This double-check pattern prevents race conditions where shutdown happens
930
+ // between the first check and lock acquisition
931
+ if (shutdown_flag_.load() || !curl_handle_) {
932
+ return false;
933
+ }
934
+
935
+ // Setup curl request (all operations protected by mutex)
936
+ curl_easy_reset(curl_handle_);
937
+ curl_easy_setopt(curl_handle_, CURLOPT_URL, endpoint_.c_str());
938
+ curl_easy_setopt(curl_handle_, CURLOPT_POSTFIELDS, payload_str.c_str());
939
+ curl_easy_setopt(curl_handle_, CURLOPT_POSTFIELDSIZE, payload_str.length());
940
+
941
+ struct curl_slist* headers = nullptr;
942
+ headers = curl_slist_append(headers, "Content-Type: application/json");
943
+ curl_easy_setopt(curl_handle_, CURLOPT_HTTPHEADER, headers);
944
+
945
+ // Use configurable timeout (milliseconds)
946
+ curl_easy_setopt(curl_handle_, CURLOPT_TIMEOUT_MS,
947
+ static_cast<long>(http_timeout_.count()));
948
+ curl_easy_setopt(curl_handle_, CURLOPT_WRITEFUNCTION,
949
+ detail::WriteCallback);
950
+
951
+ detail::WriteData write_data;
952
+ curl_easy_setopt(curl_handle_, CURLOPT_WRITEDATA, &write_data);
953
+
954
+ // Retry loop
955
+ for (int attempt = 1; attempt <= max_retries_; ++attempt) {
956
+ // 4. Check shutdown flag before each retry attempt
957
+ // This allows long-running operations to exit early if shutdown occurs
958
+ // during network I/O (e.g., if curl_easy_perform() is slow)
959
+ if (shutdown_flag_.load()) {
960
+ curl_slist_free_all(headers);
961
+ return false;
962
+ }
963
+
964
+ CURLcode res = curl_easy_perform(curl_handle_);
965
+ if (res == CURLE_OK) {
966
+ long response_code;
967
+ curl_easy_getinfo(curl_handle_, CURLINFO_RESPONSE_CODE, &response_code);
968
+ if (response_code >= 200 && response_code < 300) {
969
+ // Success - close circuit (daemon is available)
970
+ close_circuit();
971
+ curl_slist_free_all(headers);
972
+ return true;
973
+ }
974
+ // Non-2xx response - will retry or fail
975
+ }
976
+
977
+ if (attempt < max_retries_) {
978
+ std::this_thread::sleep_for(base_backoff_ms_ * attempt);
979
+ }
980
+ }
981
+
982
+ // All retries failed - open circuit (daemon unavailable)
983
+ open_circuit();
984
+
985
+ curl_slist_free_all(headers);
986
+ return false;
987
+ }
988
+
989
+ #if DRTRACE_SPDLOG_AVAILABLE
990
+
991
+ // setup_drtrace and create_drtrace_logger inline implementations
992
+ inline void setup_drtrace(std::shared_ptr<spdlog::logger> logger,
993
+ const DrtraceConfig& config) {
994
+ if (!config.enabled) {
995
+ return;
996
+ }
997
+
998
+ auto sink = std::make_shared<DrtraceSink_mt>(config);
999
+ logger->sinks().push_back(sink);
1000
+ }
1001
+
1002
+ inline std::shared_ptr<spdlog::logger> create_drtrace_logger(
1003
+ const std::string& logger_name, const DrtraceConfig& config) {
1004
+ auto logger = spdlog::get(logger_name);
1005
+ if (logger) {
1006
+ return logger;
1007
+ }
1008
+
1009
+ logger = std::make_shared<spdlog::logger>(logger_name);
1010
+ if (config.enabled) {
1011
+ auto sink = std::make_shared<DrtraceSink_mt>(config);
1012
+ logger->sinks().push_back(sink);
1013
+ }
1014
+ spdlog::register_logger(logger);
1015
+ return logger;
1016
+ }
1017
+
1018
+ #endif // DRTRACE_SPDLOG_AVAILABLE
1019
+
1020
+ // =========================
1021
+ // Core Components Inline Implementations
1022
+ // =========================
1023
+
1024
+ namespace core {
1025
+
1026
+ // DrtraceCore inline implementations
1027
+ inline DrtraceCore::DrtraceCore(const DrtraceConfig& config)
1028
+ : config_(config), transport_(std::make_unique<HttpTransport>(config)), flush_thread_running_(false) {
1029
+ if (config_.enabled) {
1030
+ start_flush_thread();
1031
+ }
1032
+ }
1033
+
1034
+ inline DrtraceCore::~DrtraceCore() {
1035
+ // Stop flush thread first (before flushing to avoid race conditions)
1036
+ if (flush_thread_running_) {
1037
+ stop_flush_thread();
1038
+ }
1039
+ // Flush any remaining records (after thread is stopped)
1040
+ flush();
1041
+ }
1042
+
1043
+ inline void DrtraceCore::log(const LogRecord& record) {
1044
+ if (!config_.enabled) {
1045
+ return;
1046
+ }
1047
+
1048
+ // Level filtering: skip logs below min_level
1049
+ if (record.level < config_.min_level) {
1050
+ return;
1051
+ }
1052
+
1053
+ bool should_flush = false;
1054
+ {
1055
+ std::lock_guard<std::mutex> lock(batch_mutex_);
1056
+
1057
+ // Backpressure: drop oldest log if buffer is full
1058
+ // This prevents OOM when daemon is unavailable or slow
1059
+ if (config_.max_buffer_size > 0 && batch_.size() >= config_.max_buffer_size) {
1060
+ batch_.erase(batch_.begin()); // Drop oldest
1061
+ }
1062
+
1063
+ std::string json_record = serialize_record(record);
1064
+ batch_.push_back(std::move(json_record));
1065
+
1066
+ // Check if batch size reached (flush outside lock)
1067
+ should_flush = (batch_.size() >= config_.batch_size);
1068
+ }
1069
+
1070
+ // Flush outside the lock to avoid holding lock during network I/O
1071
+ if (should_flush) {
1072
+ flush_internal();
1073
+ }
1074
+ }
1075
+
1076
+ inline void DrtraceCore::flush() {
1077
+ // flush_internal() manages its own locking - don't hold lock here
1078
+ flush_internal();
1079
+ }
1080
+
1081
+ inline std::string DrtraceCore::escape_json(const std::string& str) {
1082
+ std::ostringstream escaped;
1083
+ for (char c : str) {
1084
+ switch (c) {
1085
+ case '"':
1086
+ escaped << "\\\"";
1087
+ break;
1088
+ case '\\':
1089
+ escaped << "\\\\";
1090
+ break;
1091
+ case '\b':
1092
+ escaped << "\\b";
1093
+ break;
1094
+ case '\f':
1095
+ escaped << "\\f";
1096
+ break;
1097
+ case '\n':
1098
+ escaped << "\\n";
1099
+ break;
1100
+ case '\r':
1101
+ escaped << "\\r";
1102
+ break;
1103
+ case '\t':
1104
+ escaped << "\\t";
1105
+ break;
1106
+ default:
1107
+ if (static_cast<unsigned char>(c) < 0x20) {
1108
+ escaped << "\\u" << std::hex << std::setw(4) << std::setfill('0')
1109
+ << static_cast<int>(c);
1110
+ } else {
1111
+ escaped << c;
1112
+ }
1113
+ break;
1114
+ }
1115
+ }
1116
+ return escaped.str();
1117
+ }
1118
+
1119
+ inline std::string DrtraceCore::serialize_record(const LogRecord& record) {
1120
+ std::ostringstream json;
1121
+ // Set precision to preserve fractional seconds (6 decimal places = microsecond precision)
1122
+ json << std::fixed << std::setprecision(6);
1123
+
1124
+ // Get timestamp as Unix timestamp (seconds since epoch, with fractional seconds)
1125
+ auto ts_duration = record.timestamp.time_since_epoch();
1126
+ auto ts_seconds = std::chrono::duration_cast<std::chrono::seconds>(ts_duration);
1127
+ auto ts_fractional = std::chrono::duration_cast<std::chrono::milliseconds>(
1128
+ ts_duration - ts_seconds);
1129
+ double ts = ts_seconds.count() + (ts_fractional.count() / 1000.0);
1130
+
1131
+ // Map LogLevel to string
1132
+ std::string level_str;
1133
+ switch (record.level) {
1134
+ case LogLevel::DEBUG:
1135
+ level_str = "debug";
1136
+ break;
1137
+ case LogLevel::INFO:
1138
+ level_str = "info";
1139
+ break;
1140
+ case LogLevel::WARN:
1141
+ level_str = "warn";
1142
+ break;
1143
+ case LogLevel::ERROR:
1144
+ level_str = "error";
1145
+ break;
1146
+ case LogLevel::CRITICAL:
1147
+ level_str = "critical";
1148
+ break;
1149
+ }
1150
+
1151
+ json << "{"
1152
+ << "\"ts\":" << ts << ","
1153
+ << "\"level\":\"" << escape_json(level_str) << "\","
1154
+ << "\"message\":\"" << escape_json(record.message) << "\","
1155
+ << "\"application_id\":\"" << escape_json(config_.application_id) << "\","
1156
+ << "\"module_name\":\"" << escape_json(record.logger_name) << "\"";
1157
+
1158
+ // Optional service_name
1159
+ if (!config_.service_name.empty()) {
1160
+ json << ",\"service_name\":\"" << escape_json(config_.service_name) << "\"";
1161
+ }
1162
+
1163
+ // Optional file_path and line_no (check for empty strings instead of null pointers)
1164
+ if (!record.source.filename.empty()) {
1165
+ json << ",\"file_path\":\"" << escape_json(record.source.filename) << "\"";
1166
+ }
1167
+ if (record.source.line > 0) {
1168
+ json << ",\"line_no\":" << record.source.line;
1169
+ }
1170
+
1171
+ // Context field
1172
+ json << ",\"context\":{"
1173
+ << "\"language\":\"cpp\"";
1174
+ // Add thread ID
1175
+ json << ",\"thread_id\":\"" << std::this_thread::get_id() << "\"";
1176
+ // Add any additional context
1177
+ for (const auto& [key, value] : record.context) {
1178
+ json << ",\"" << escape_json(key) << "\":\"" << escape_json(value) << "\"";
1179
+ }
1180
+ json << "}";
1181
+
1182
+ json << "}";
1183
+ return json.str();
1184
+ }
1185
+
1186
+ inline void DrtraceCore::flush_internal() {
1187
+ // RAII locking - manages its own lock, callers should NOT hold batch_mutex_
1188
+ std::vector<std::string> to_send;
1189
+ {
1190
+ std::lock_guard<std::mutex> lock(batch_mutex_);
1191
+ if (batch_.empty()) {
1192
+ return;
1193
+ }
1194
+ to_send.swap(batch_);
1195
+ }
1196
+ // Lock released here - safe to do network I/O without blocking other threads
1197
+
1198
+ // Send batch (transport handles errors internally)
1199
+ if (transport_) {
1200
+ transport_->send_batch(to_send);
1201
+ }
1202
+ }
1203
+
1204
+ inline void DrtraceCore::start_flush_thread() {
1205
+ flush_thread_running_ = true;
1206
+ flush_thread_ = std::thread([this]() {
1207
+ while (true) {
1208
+ std::unique_lock<std::mutex> lock(flush_mutex_);
1209
+ if (flush_cv_.wait_for(lock, config_.flush_interval,
1210
+ [this] { return should_stop_; })) {
1211
+ break; // Stop requested
1212
+ }
1213
+ lock.unlock(); // Release flush_mutex_ before calling flush_internal
1214
+
1215
+ // flush_internal() manages batch_mutex_ internally - don't hold it here
1216
+ flush_internal();
1217
+ }
1218
+ });
1219
+ }
1220
+
1221
+ inline void DrtraceCore::stop_flush_thread() {
1222
+ // Set stop flag
1223
+ {
1224
+ std::lock_guard<std::mutex> lock(flush_mutex_);
1225
+ should_stop_ = true;
1226
+ }
1227
+ // Notify flush thread to wake up and check stop flag
1228
+ flush_cv_.notify_one();
1229
+
1230
+ // Always join - never detach to avoid use-after-free
1231
+ // The flush thread checks should_stop_ in its wait condition, so it will
1232
+ // exit promptly. The only delay is if send_batch() is in progress, which
1233
+ // is bounded by curl timeout and circuit breaker fast-fail.
1234
+ if (flush_thread_.joinable()) {
1235
+ flush_thread_.join();
1236
+ }
1237
+
1238
+ flush_thread_running_ = false;
1239
+ }
1240
+
1241
+ inline void DrtraceCore::flush_thread_func() {
1242
+ // This is handled by the lambda in start_flush_thread
1243
+ // Kept for consistency with architecture
1244
+ }
1245
+
1246
+ } // namespace core
1247
+
1248
+ } // namespace drtrace
1249
+