drtrace 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +29 -0
  2. package/agents/CONTRIBUTING.md +296 -0
  3. package/agents/README.md +174 -0
  4. package/agents/daemon-method-selection.md +298 -0
  5. package/agents/integration-guides/cpp-best-practices.md +218 -0
  6. package/agents/integration-guides/cpp-ros-integration.md +88 -0
  7. package/agents/log-analysis.md +217 -0
  8. package/agents/log-help.md +226 -0
  9. package/agents/log-init.md +933 -0
  10. package/agents/log-it.md +1126 -0
  11. package/bin/init.js +4 -4
  12. package/dist/bin/init.js +31 -0
  13. package/dist/config-schema.d.ts +2 -2
  14. package/dist/init.d.ts +49 -4
  15. package/dist/init.js +494 -35
  16. package/dist/resources/agents/CONTRIBUTING.md +296 -0
  17. package/dist/resources/agents/README.md +174 -0
  18. package/dist/resources/agents/daemon-method-selection.md +298 -0
  19. package/dist/resources/agents/integration-guides/cpp-best-practices.md +218 -0
  20. package/dist/resources/agents/integration-guides/cpp-ros-integration.md +88 -0
  21. package/dist/resources/agents/log-analysis.md +217 -0
  22. package/dist/resources/agents/log-help.md +226 -0
  23. package/dist/resources/agents/log-init.md +933 -0
  24. package/dist/resources/agents/log-it.md +1126 -0
  25. package/dist/resources/cpp/drtrace_sink.hpp +1248 -0
  26. package/package.json +9 -2
  27. package/.eslintrc.js +0 -20
  28. package/jest.config.js +0 -11
  29. package/src/client.ts +0 -68
  30. package/src/config-schema.ts +0 -115
  31. package/src/config.ts +0 -326
  32. package/src/index.ts +0 -3
  33. package/src/init.ts +0 -410
  34. package/src/logger.ts +0 -56
  35. package/src/queue.ts +0 -105
  36. package/src/transport.ts +0 -60
  37. package/src/types.ts +0 -20
  38. package/tests/client.test.ts +0 -66
  39. package/tests/config-schema.test.ts +0 -198
  40. package/tests/config.test.ts +0 -456
  41. package/tests/queue.test.ts +0 -72
  42. package/tests/transport.test.ts +0 -52
  43. package/tsconfig.json +0 -18
@@ -0,0 +1,1248 @@
1
+ /**
2
+ * DrTrace C++ Client Integration
3
+ *
4
+ * A spdlog sink that enriches log records and sends them to the DrTrace daemon
5
+ * via HTTP POST, matching the unified schema from Story 4.1.
6
+ */
7
+
8
+ #pragma once
9
+
10
+ // Standard library includes required for header-only implementation
11
+ #include <atomic>
12
+ #include <chrono>
13
+ #include <condition_variable>
14
+ #include <cstdlib>
15
+ #include <fstream>
16
+ #include <iomanip>
17
+ #include <iostream>
18
+ #include <map>
19
+ #include <memory>
20
+ #include <mutex>
21
+ #include <regex>
22
+ #include <sstream>
23
+ #include <string>
24
+ #include <thread>
25
+ #include <vector>
26
+
27
+ // libcurl for HTTP transport
28
+ #include <curl/curl.h>
29
+
30
+ // spdlog includes (optional - only needed for spdlog adapter)
31
+ // By default, try to detect spdlog availability (if headers are present)
32
+ // Users can explicitly disable with DRTRACE_DISABLE_SPDLOG
33
+ #ifndef DRTRACE_DISABLE_SPDLOG
34
+ #if __has_include(<spdlog/spdlog.h>)
35
+ #include <spdlog/details/log_msg.h>
36
+ #include <spdlog/sinks/base_sink.h>
37
+ #include <spdlog/spdlog.h>
38
+ #define DRTRACE_SPDLOG_AVAILABLE 1
39
+ #else
40
+ #define DRTRACE_SPDLOG_AVAILABLE 0
41
+ #endif
42
+ #else
43
+ // Explicitly disabled
44
+ #define DRTRACE_SPDLOG_AVAILABLE 0
45
+ #endif
46
+
47
+ namespace drtrace {
48
+
49
+ // Log level enum - defined here for use in DrtraceConfig
50
+ // Also available via core::LogLevel (defined later as alias)
51
+ namespace core {
52
+ enum class LogLevel {
53
+ DEBUG = 0,
54
+ INFO = 1,
55
+ WARN = 2,
56
+ ERROR = 3,
57
+ CRITICAL = 4
58
+ };
59
+ }
60
+
61
+ /**
62
+ * Parse log level from string (case-insensitive).
63
+ * Returns DEBUG on invalid input (backward compatible).
64
+ */
65
+ inline core::LogLevel parse_log_level(const char* str) {
66
+ if (!str || str[0] == '\0') return core::LogLevel::DEBUG;
67
+
68
+ std::string level(str);
69
+ // Convert to lowercase
70
+ for (auto& c : level) {
71
+ c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
72
+ }
73
+
74
+ if (level == "debug") return core::LogLevel::DEBUG;
75
+ if (level == "info") return core::LogLevel::INFO;
76
+ if (level == "warn" || level == "warning") return core::LogLevel::WARN;
77
+ if (level == "error") return core::LogLevel::ERROR;
78
+ if (level == "critical") return core::LogLevel::CRITICAL;
79
+
80
+ return core::LogLevel::DEBUG; // Default on invalid input
81
+ }
82
+
83
+ /**
84
+ * Configuration for the DrTrace C++ client.
85
+ */
86
+ struct DrtraceConfig {
87
+ std::string application_id;
88
+ std::string daemon_url = "http://localhost:8001/logs/ingest";
89
+ std::string service_name;
90
+ bool enabled = true;
91
+ size_t batch_size = 10;
92
+ std::chrono::milliseconds flush_interval{5000}; // 5 seconds
93
+ std::chrono::milliseconds circuit_reset_interval{30000}; // 30 seconds - circuit breaker cooldown
94
+
95
+ /**
96
+ * Maximum number of log records to buffer.
97
+ * When exceeded, oldest logs are dropped (backpressure).
98
+ * Set to 0 for unlimited (not recommended for production).
99
+ * Default: 10000
100
+ */
101
+ size_t max_buffer_size = 10000;
102
+
103
+ /**
104
+ * Minimum log level to send to daemon.
105
+ * Logs below this level are filtered at the client (not sent).
106
+ * Default: DEBUG (send everything - backward compatible)
107
+ */
108
+ core::LogLevel min_level = core::LogLevel::DEBUG;
109
+
110
+ /**
111
+ * HTTP request timeout in milliseconds.
112
+ * Default: 1000 (1 second)
113
+ */
114
+ std::chrono::milliseconds http_timeout{1000};
115
+
116
+ /**
117
+ * Base backoff time for retry attempts.
118
+ * Actual backoff = base_backoff * attempt_number
119
+ * Default: 100ms
120
+ */
121
+ std::chrono::milliseconds retry_backoff{100};
122
+
123
+ /**
124
+ * Maximum retry attempts for failed requests.
125
+ * Default: 3
126
+ */
127
+ int max_retries = 3;
128
+
129
+ /**
130
+ * Load configuration from environment variables, with fallback to config file.
131
+ *
132
+ * Priority (highest to lowest):
133
+ * 1. DRTRACE_APPLICATION_ID environment variable
134
+ * 2. _drtrace/config.json file (application_id field)
135
+ *
136
+ * Required:
137
+ * - DRTRACE_APPLICATION_ID (env var) OR application_id in _drtrace/config.json
138
+ *
139
+ * Optional:
140
+ * - DRTRACE_DAEMON_URL (default: http://localhost:8001/logs/ingest)
141
+ * - DRTRACE_SERVICE_NAME
142
+ * - DRTRACE_ENABLED (default: true, set to "false" to disable)
143
+ */
144
+ static DrtraceConfig from_env();
145
+ };
146
+
147
+ namespace detail {
148
+ // Reference counter for curl_global_init
149
+ // curl_global_init is idempotent (safe to call multiple times)
150
+ // We use reference counting to ensure it's initialized, but never call
151
+ // curl_global_cleanup (should only be called at program termination)
152
+ inline std::atomic<int>& curl_init_ref_count() {
153
+ static std::atomic<int> count{0};
154
+ return count;
155
+ }
156
+
157
+ inline std::mutex& curl_init_mutex() {
158
+ static std::mutex mtx;
159
+ return mtx;
160
+ }
161
+
162
+ // Initialize curl once (thread-safe, idempotent)
163
+ inline void ensure_curl_initialized() {
164
+ std::lock_guard<std::mutex> lock(curl_init_mutex());
165
+ if (curl_init_ref_count().fetch_add(1) == 0) {
166
+ curl_global_init(CURL_GLOBAL_DEFAULT);
167
+ }
168
+ }
169
+ }
170
+
171
+ /**
172
+ * HTTP transport for sending log batches to the daemon.
173
+ *
174
+ * Uses libcurl for HTTP POST requests. Handles retries and errors
175
+ * gracefully without throwing exceptions.
176
+ */
177
+ class HttpTransport {
178
+ public:
179
+ inline explicit HttpTransport(const DrtraceConfig& config);
180
+ inline ~HttpTransport();
181
+
182
+ // Non-copyable
183
+ HttpTransport(const HttpTransport&) = delete;
184
+ HttpTransport& operator=(const HttpTransport&) = delete;
185
+
186
+ /**
187
+ * Send a batch of log records to the daemon.
188
+ *
189
+ * This method is thread-safe and handles network errors gracefully.
190
+ * Returns true if the batch was sent successfully, false otherwise.
191
+ *
192
+ * Circuit Breaker Behavior:
193
+ * - When daemon is unavailable, circuit opens and fast-fails (< 1µs)
194
+ * - After circuit_reset_interval, one probe request is allowed
195
+ * - On success, circuit closes; on failure, circuit stays open
196
+ */
197
+ inline bool send_batch(const std::vector<std::string>& log_records);
198
+
199
+ /**
200
+ * Check if circuit breaker is open (for testing).
201
+ */
202
+ bool is_circuit_open_for_test() const {
203
+ return is_circuit_open();
204
+ }
205
+
206
+ private:
207
+ std::string endpoint_;
208
+ std::string application_id_;
209
+ int max_retries_;
210
+ std::chrono::milliseconds base_backoff_ms_;
211
+ std::chrono::milliseconds http_timeout_;
212
+
213
+ void* curl_handle_ = nullptr; // CURL* handle
214
+
215
+ // Thread safety: protect curl_handle_ access
216
+ std::mutex curl_mutex_;
217
+ std::atomic<bool> shutdown_flag_{false};
218
+
219
+ // Circuit breaker state - atomic for thread safety
220
+ // States: CLOSED (normal) -> OPEN (fast-fail) -> HALF-OPEN (probe) -> CLOSED/OPEN
221
+ std::atomic<bool> circuit_open_{false};
222
+ std::atomic<int64_t> circuit_open_until_ms_{0};
223
+ std::chrono::milliseconds circuit_reset_interval_{30000};
224
+
225
+ /**
226
+ * Get current time in milliseconds since epoch.
227
+ */
228
+ int64_t now_ms() const {
229
+ return std::chrono::duration_cast<std::chrono::milliseconds>(
230
+ std::chrono::steady_clock::now().time_since_epoch()
231
+ ).count();
232
+ }
233
+
234
+ /**
235
+ * Check if circuit is open (should fast-fail).
236
+ * Returns false if circuit is closed or cooldown has expired (half-open).
237
+ */
238
+ bool is_circuit_open() const {
239
+ if (!circuit_open_.load(std::memory_order_acquire)) {
240
+ return false; // Fast path - circuit closed
241
+ }
242
+ // Check if cooldown expired (half-open state - allow probe request)
243
+ if (now_ms() >= circuit_open_until_ms_.load(std::memory_order_acquire)) {
244
+ return false; // Allow probe request
245
+ }
246
+ return true; // Fast-fail
247
+ }
248
+
249
+ /**
250
+ * Open the circuit (daemon unavailable).
251
+ * Sets cooldown timer to circuit_reset_interval from now.
252
+ */
253
+ void open_circuit() {
254
+ circuit_open_until_ms_.store(
255
+ now_ms() + circuit_reset_interval_.count(),
256
+ std::memory_order_release
257
+ );
258
+ circuit_open_.store(true, std::memory_order_release);
259
+ }
260
+
261
+ /**
262
+ * Close the circuit (daemon is available).
263
+ */
264
+ void close_circuit() {
265
+ circuit_open_.store(false, std::memory_order_release);
266
+ }
267
+
268
+ // Wait for any in-flight operations to complete
269
+ inline void wait_for_operations();
270
+ };
271
+
272
+ // =========================
273
+ // Core Components (spdlog-independent)
274
+ // =========================
275
+
276
+ namespace core {
277
+
278
+ // LogLevel enum is defined at the top of the drtrace namespace
279
+ // (before DrtraceConfig, so it can be used in config)
280
+ // core::LogLevel is available from there
281
+
282
+ /**
283
+ * Source location information (optional).
284
+ */
285
+ struct SourceLocation {
286
+ std::string filename; // Use std::string for memory safety (copies strings)
287
+ int line = 0;
288
+ std::string function; // Use std::string for memory safety (copies strings)
289
+ };
290
+
291
+ /**
292
+ * Log record structure (spdlog-independent).
293
+ */
294
+ struct LogRecord {
295
+ LogLevel level;
296
+ std::string message;
297
+ std::string logger_name;
298
+ std::chrono::system_clock::time_point timestamp;
299
+ SourceLocation source;
300
+
301
+ // Additional context (optional)
302
+ std::map<std::string, std::string> context;
303
+ };
304
+
305
+ /**
306
+ * Core DrTrace logging engine (spdlog-independent).
307
+ *
308
+ * Handles:
309
+ * - Serialization of LogRecord to JSON
310
+ * - Batching records
311
+ * - Flushing batches via HttpTransport
312
+ * - Thread-safe operations
313
+ */
314
+ class DrtraceCore {
315
+ public:
316
+ explicit DrtraceCore(const DrtraceConfig& config);
317
+ ~DrtraceCore();
318
+
319
+ // Non-copyable
320
+ DrtraceCore(const DrtraceCore&) = delete;
321
+ DrtraceCore& operator=(const DrtraceCore&) = delete;
322
+
323
+ /**
324
+ * Log a record (thread-safe).
325
+ */
326
+ void log(const LogRecord& record);
327
+
328
+ /**
329
+ * Flush pending records immediately.
330
+ */
331
+ void flush();
332
+
333
+ /**
334
+ * Check if enabled.
335
+ */
336
+ bool is_enabled() const { return config_.enabled; }
337
+
338
+ private:
339
+ const DrtraceConfig& config_;
340
+ std::unique_ptr<HttpTransport> transport_;
341
+ std::vector<std::string> batch_; // JSON strings
342
+ std::mutex batch_mutex_;
343
+
344
+ // Flush thread management
345
+ std::thread flush_thread_;
346
+ std::mutex flush_mutex_;
347
+ std::condition_variable flush_cv_;
348
+ bool should_stop_ = false;
349
+ bool flush_thread_running_ = false;
350
+
351
+ /**
352
+ * Serialize LogRecord to JSON string (unified schema).
353
+ */
354
+ std::string serialize_record(const LogRecord& record);
355
+
356
+ /**
357
+ * Escape JSON string.
358
+ */
359
+ std::string escape_json(const std::string& str);
360
+
361
+ /**
362
+ * Flush batch to daemon (internal, thread-safe).
363
+ */
364
+ void flush_internal();
365
+
366
+ /**
367
+ * Start background flush thread.
368
+ */
369
+ void start_flush_thread();
370
+
371
+ /**
372
+ * Stop background flush thread.
373
+ */
374
+ void stop_flush_thread();
375
+
376
+ /**
377
+ * Flush thread function.
378
+ */
379
+ void flush_thread_func();
380
+ };
381
+
382
+ } // namespace core
383
+
384
+ // =========================
385
+ // spdlog Adapter (Optional - requires spdlog)
386
+ // =========================
387
+
388
+ #if DRTRACE_SPDLOG_AVAILABLE
389
+
390
+ /**
391
+ * spdlog sink adapter for DrTrace.
392
+ *
393
+ * This sink converts spdlog log messages to core::LogRecord
394
+ * and forwards them to DrtraceCore.
395
+ *
396
+ * API surface unchanged from previous implementation (for consistency).
397
+ */
398
+ template <typename Mutex>
399
+ class DrtraceSink : public spdlog::sinks::base_sink<Mutex> {
400
+ public:
401
+ explicit DrtraceSink(const DrtraceConfig& config)
402
+ : config_(config), core_(std::make_unique<core::DrtraceCore>(config)) {
403
+ }
404
+
405
+ ~DrtraceSink() {
406
+ // Flush any remaining records
407
+ this->flush_();
408
+ }
409
+
410
+ protected:
411
+ void sink_it_(const spdlog::details::log_msg& msg) override {
412
+ if (!core_ || !core_->is_enabled()) {
413
+ return;
414
+ }
415
+
416
+ // Convert spdlog log_msg to core::LogRecord
417
+ core::LogRecord record = convert_to_log_record(msg);
418
+
419
+ // Delegate to core (thread-safe)
420
+ core_->log(record);
421
+ }
422
+
423
+ void flush_() override {
424
+ if (core_) {
425
+ core_->flush();
426
+ }
427
+ }
428
+
429
+ protected:
430
+ /**
431
+ * Convert spdlog log_msg to core::LogRecord.
432
+ * Protected for testing purposes.
433
+ */
434
+ core::LogRecord convert_to_log_record(const spdlog::details::log_msg& msg) {
435
+ core::LogRecord record;
436
+
437
+ // Map spdlog level to core::LogLevel
438
+ switch (msg.level) {
439
+ case spdlog::level::trace:
440
+ case spdlog::level::debug:
441
+ record.level = core::LogLevel::DEBUG;
442
+ break;
443
+ case spdlog::level::info:
444
+ record.level = core::LogLevel::INFO;
445
+ break;
446
+ case spdlog::level::warn:
447
+ record.level = core::LogLevel::WARN;
448
+ break;
449
+ case spdlog::level::err:
450
+ record.level = core::LogLevel::ERROR;
451
+ break;
452
+ case spdlog::level::critical:
453
+ record.level = core::LogLevel::CRITICAL;
454
+ break;
455
+ default:
456
+ record.level = core::LogLevel::INFO;
457
+ break;
458
+ }
459
+
460
+ // Copy message and logger name
461
+ record.message = std::string(msg.payload.data(), msg.payload.size());
462
+ record.logger_name = std::string(msg.logger_name.data(), msg.logger_name.size());
463
+
464
+ // Set timestamp
465
+ record.timestamp = msg.time;
466
+
467
+ // Set source location (copy strings from spdlog string views for memory safety)
468
+ if (msg.source.filename) {
469
+ record.source.filename = std::string(msg.source.filename);
470
+ }
471
+ record.source.line = msg.source.line;
472
+ if (msg.source.funcname) {
473
+ record.source.function = std::string(msg.source.funcname);
474
+ }
475
+
476
+ // Add thread ID to context
477
+ std::ostringstream thread_id_str;
478
+ thread_id_str << std::this_thread::get_id();
479
+ record.context["thread_id"] = thread_id_str.str();
480
+
481
+ return record;
482
+ }
483
+
484
+ private:
485
+ DrtraceConfig config_;
486
+ std::unique_ptr<core::DrtraceCore> core_;
487
+ };
488
+
489
+ // Convenience type aliases
490
+ using DrtraceSink_mt = DrtraceSink<std::mutex>; // Multi-threaded
491
+ using DrtraceSink_st = DrtraceSink<spdlog::details::null_mutex>; // Single-threaded
492
+
493
+ /**
494
+ * Setup DrTrace integration for an existing spdlog logger.
495
+ *
496
+ * This adds a DrtraceSink to the logger without removing existing sinks.
497
+ */
498
+ inline void setup_drtrace(std::shared_ptr<spdlog::logger> logger,
499
+ const DrtraceConfig& config);
500
+
501
+ /**
502
+ * Create a new spdlog logger with DrTrace integration enabled.
503
+ */
504
+ inline std::shared_ptr<spdlog::logger> create_drtrace_logger(
505
+ const std::string& logger_name, const DrtraceConfig& config);
506
+
507
+ #endif // DRTRACE_SPDLOG_AVAILABLE
508
+
509
+ // =========================
510
+ // Direct API (No spdlog required)
511
+ // =========================
512
+
513
+ /**
514
+ * Direct DrTrace client API (no spdlog required).
515
+ *
516
+ * Usage:
517
+ * drtrace::DrtraceClient client(config);
518
+ * client.info("Application started");
519
+ * client.error("Something went wrong", __FILE__, __LINE__);
520
+ */
521
+ class DrtraceClient {
522
+ public:
523
+ explicit DrtraceClient(const DrtraceConfig& config,
524
+ const std::string& logger_name = "default")
525
+ : config_(config), logger_name_(logger_name),
526
+ core_(std::make_unique<core::DrtraceCore>(config)) {
527
+ }
528
+
529
+ ~DrtraceClient() {
530
+ // Flush any remaining records
531
+ if (core_) {
532
+ core_->flush();
533
+ }
534
+ }
535
+
536
+ // Non-copyable
537
+ DrtraceClient(const DrtraceClient&) = delete;
538
+ DrtraceClient& operator=(const DrtraceClient&) = delete;
539
+
540
+ /**
541
+ * Log a message.
542
+ *
543
+ * @param level Log level
544
+ * @param message Log message
545
+ * @param filename Optional source filename (for __FILE__)
546
+ * @param line Optional source line (for __LINE__)
547
+ * @param function Optional function name (for __FUNCTION__)
548
+ */
549
+ void log(core::LogLevel level,
550
+ const std::string& message,
551
+ const char* filename = nullptr,
552
+ int line = 0,
553
+ const char* function = nullptr) {
554
+ if (!core_ || !core_->is_enabled()) {
555
+ return;
556
+ }
557
+
558
+ core::LogRecord record;
559
+ record.level = level;
560
+ record.message = message;
561
+ record.logger_name = logger_name_;
562
+ record.timestamp = std::chrono::system_clock::now();
563
+ // Copy strings for memory safety (filename/function may be temporary)
564
+ if (filename) {
565
+ record.source.filename = filename;
566
+ }
567
+ record.source.line = line;
568
+ if (function) {
569
+ record.source.function = function;
570
+ }
571
+
572
+ // Add thread ID to context
573
+ std::ostringstream thread_id_str;
574
+ thread_id_str << std::this_thread::get_id();
575
+ record.context["thread_id"] = thread_id_str.str();
576
+
577
+ core_->log(record);
578
+ }
579
+
580
+ /**
581
+ * Convenience methods for each log level.
582
+ */
583
+ void debug(const std::string& message,
584
+ const char* filename = nullptr,
585
+ int line = 0,
586
+ const char* function = nullptr) {
587
+ log(core::LogLevel::DEBUG, message, filename, line, function);
588
+ }
589
+
590
+ void info(const std::string& message,
591
+ const char* filename = nullptr,
592
+ int line = 0,
593
+ const char* function = nullptr) {
594
+ log(core::LogLevel::INFO, message, filename, line, function);
595
+ }
596
+
597
+ void warn(const std::string& message,
598
+ const char* filename = nullptr,
599
+ int line = 0,
600
+ const char* function = nullptr) {
601
+ log(core::LogLevel::WARN, message, filename, line, function);
602
+ }
603
+
604
+ void error(const std::string& message,
605
+ const char* filename = nullptr,
606
+ int line = 0,
607
+ const char* function = nullptr) {
608
+ log(core::LogLevel::ERROR, message, filename, line, function);
609
+ }
610
+
611
+ void critical(const std::string& message,
612
+ const char* filename = nullptr,
613
+ int line = 0,
614
+ const char* function = nullptr) {
615
+ log(core::LogLevel::CRITICAL, message, filename, line, function);
616
+ }
617
+
618
+ /**
619
+ * Flush pending logs.
620
+ */
621
+ void flush() {
622
+ if (core_) {
623
+ core_->flush();
624
+ }
625
+ }
626
+
627
+ /**
628
+ * Check if enabled.
629
+ */
630
+ bool is_enabled() const {
631
+ return core_ && core_->is_enabled();
632
+ }
633
+
634
+ private:
635
+ DrtraceConfig config_;
636
+ std::string logger_name_;
637
+ std::unique_ptr<core::DrtraceCore> core_;
638
+ };
639
+
640
+ // =========================
641
+ // Inline Implementations
642
+ // =========================
643
+
644
+ namespace detail {
645
+
646
+ // Helper for libcurl write callback
647
+ struct WriteData {
648
+ std::string data;
649
+ };
650
+
651
+ inline size_t WriteCallback(void* contents, size_t size, size_t nmemb,
652
+ void* userp) {
653
+ size_t total_size = size * nmemb;
654
+ WriteData* write_data = static_cast<WriteData*>(userp);
655
+ write_data->data.append(static_cast<char*>(contents), total_size);
656
+ return total_size;
657
+ }
658
+
659
+ /**
660
+ * Read application_id from _drtrace/config.json file.
661
+ *
662
+ * This is a simple JSON parser that extracts the "application_id" field.
663
+ * Returns empty string if file doesn't exist or field is not found.
664
+ */
665
+ inline std::string read_application_id_from_config(const std::string& config_path) {
666
+ std::ifstream file(config_path);
667
+ if (!file.is_open()) {
668
+ return "";
669
+ }
670
+
671
+ // Read entire file into a string
672
+ std::string content((std::istreambuf_iterator<char>(file)),
673
+ std::istreambuf_iterator<char>());
674
+ file.close();
675
+
676
+ // Simple regex-based extraction for "application_id": "value"
677
+ // Handles both "application_id" and "applicationId" (camelCase)
678
+ std::regex pattern(
679
+ R"delim("application_id"\s*:\s*"([^"]+)"|"applicationId"\s*:\s*"([^"]+)")delim");
680
+ std::smatch match;
681
+
682
+ if (std::regex_search(content, match, pattern)) {
683
+ // Return the first non-empty capture group
684
+ return match[1].matched ? match[1].str() : match[2].str();
685
+ }
686
+
687
+ // Also try nested drtrace.applicationId format
688
+ std::regex nested_pattern(
689
+ R"delim("drtrace"\s*:\s*\{[^}]*"applicationId"\s*:\s*"([^"]+)")delim");
690
+ if (std::regex_search(content, match, nested_pattern)) {
691
+ return match[1].str();
692
+ }
693
+
694
+ return "";
695
+ }
696
+
697
+ } // namespace detail
698
+
699
+ // DrtraceConfig::from_env inline implementation
700
+ inline DrtraceConfig DrtraceConfig::from_env() {
701
+ DrtraceConfig config;
702
+
703
+ // Priority 1: Try environment variable first
704
+ const char* app_id = std::getenv("DRTRACE_APPLICATION_ID");
705
+
706
+ // Priority 2: Fall back to _drtrace/config.json if env var not set
707
+ if (!app_id) {
708
+ // Try to find config file relative to current working directory
709
+ // Look for _drtrace/config.json in current directory
710
+ std::string config_path = "_drtrace/config.json";
711
+ std::string app_id_from_config =
712
+ detail::read_application_id_from_config(config_path);
713
+
714
+ if (!app_id_from_config.empty()) {
715
+ config.application_id = app_id_from_config;
716
+ } else {
717
+ // Priority 3: Final fallback to default value (ensures application never crashes)
718
+ // CRITICAL: Must use same default value as Python and JavaScript: "my-app"
719
+ config.application_id = "my-app";
720
+ // Optional: std::cerr << "Warning: Using default application_id 'my-app'. "
721
+ // << "Set DRTRACE_APPLICATION_ID or _drtrace/config.json to customize." << std::endl;
722
+ }
723
+ } else {
724
+ config.application_id = app_id;
725
+ }
726
+
727
+ const char* daemon_url = std::getenv("DRTRACE_DAEMON_URL");
728
+ if (daemon_url) {
729
+ config.daemon_url = daemon_url;
730
+ }
731
+
732
+ const char* service_name = std::getenv("DRTRACE_SERVICE_NAME");
733
+ if (service_name) {
734
+ config.service_name = service_name;
735
+ }
736
+
737
+ const char* enabled = std::getenv("DRTRACE_ENABLED");
738
+ if (enabled && std::string(enabled) == "false") {
739
+ config.enabled = false;
740
+ }
741
+
742
+ // Circuit breaker reset interval (milliseconds)
743
+ const char* circuit_reset_ms = std::getenv("DRTRACE_CIRCUIT_RESET_MS");
744
+ if (circuit_reset_ms) {
745
+ try {
746
+ long ms = std::stol(circuit_reset_ms);
747
+ if (ms > 0) {
748
+ config.circuit_reset_interval = std::chrono::milliseconds(ms);
749
+ }
750
+ } catch (...) {
751
+ // Invalid value, use default
752
+ }
753
+ }
754
+
755
+ // Maximum buffer size (backpressure)
756
+ const char* max_buffer = std::getenv("DRTRACE_MAX_BUFFER_SIZE");
757
+ if (max_buffer) {
758
+ try {
759
+ long size = std::stol(max_buffer);
760
+ if (size >= 0) {
761
+ config.max_buffer_size = static_cast<size_t>(size);
762
+ }
763
+ } catch (...) {
764
+ // Invalid value, use default
765
+ }
766
+ }
767
+
768
+ // Minimum log level (filtering)
769
+ const char* min_level = std::getenv("DRTRACE_MIN_LEVEL");
770
+ if (min_level) {
771
+ config.min_level = parse_log_level(min_level);
772
+ }
773
+
774
+ // HTTP timeout (milliseconds)
775
+ const char* http_timeout = std::getenv("DRTRACE_HTTP_TIMEOUT_MS");
776
+ if (http_timeout) {
777
+ try {
778
+ long ms = std::stol(http_timeout);
779
+ if (ms > 0) {
780
+ config.http_timeout = std::chrono::milliseconds(ms);
781
+ }
782
+ } catch (...) {
783
+ // Invalid value, use default
784
+ }
785
+ }
786
+
787
+ // Retry backoff (milliseconds)
788
+ const char* retry_backoff = std::getenv("DRTRACE_RETRY_BACKOFF_MS");
789
+ if (retry_backoff) {
790
+ try {
791
+ long ms = std::stol(retry_backoff);
792
+ if (ms > 0) {
793
+ config.retry_backoff = std::chrono::milliseconds(ms);
794
+ }
795
+ } catch (...) {
796
+ // Invalid value, use default
797
+ }
798
+ }
799
+
800
+ // Max retries
801
+ const char* max_retries = std::getenv("DRTRACE_MAX_RETRIES");
802
+ if (max_retries) {
803
+ try {
804
+ int retries = std::stoi(max_retries);
805
+ if (retries >= 0) {
806
+ config.max_retries = retries;
807
+ }
808
+ } catch (...) {
809
+ // Invalid value, use default
810
+ }
811
+ }
812
+
813
+ return config;
814
+ }
815
+
816
+ // HttpTransport inline implementations
817
+ inline HttpTransport::HttpTransport(const DrtraceConfig& config)
818
+ : endpoint_(config.daemon_url),
819
+ application_id_(config.application_id),
820
+ max_retries_(config.max_retries),
821
+ base_backoff_ms_(config.retry_backoff),
822
+ http_timeout_(config.http_timeout),
823
+ circuit_reset_interval_(config.circuit_reset_interval) {
824
+ // Ensure curl is initialized (thread-safe, idempotent)
825
+ detail::ensure_curl_initialized();
826
+
827
+ curl_handle_ = curl_easy_init();
828
+ if (!curl_handle_) {
829
+ std::cerr << "Warning: Failed to initialize libcurl for drtrace transport"
830
+ << std::endl;
831
+ }
832
+ }
833
+
834
+ inline void HttpTransport::wait_for_operations() {
835
+ // Wait for any in-flight send_batch() operations to complete.
836
+ // We use a short timeout and retry mechanism to avoid hanging
837
+ // if a network operation is blocked.
838
+ //
839
+ // IMPORTANT: Even if this function times out, the destructor's lock_guard
840
+ // will still block until the mutex is available. This ensures that:
841
+ // 1. If operations complete quickly, we return early (optimization)
842
+ // 2. If operations are slow, we don't wait forever (timeout protection)
843
+ // 3. The destructor's lock_guard ensures operations complete before cleanup
844
+ //
845
+ // This design provides both performance (early return) and safety
846
+ // (guaranteed wait via lock_guard).
847
+ auto start = std::chrono::steady_clock::now();
848
+ auto timeout = start + std::chrono::milliseconds(500); // 500ms timeout
849
+
850
+ while (std::chrono::steady_clock::now() < timeout) {
851
+ // Try to acquire lock - if successful, no operations are in progress
852
+ if (curl_mutex_.try_lock()) {
853
+ curl_mutex_.unlock();
854
+ return; // No operations in progress
855
+ }
856
+ // Lock is held by send_batch(), wait a bit
857
+ std::this_thread::sleep_for(std::chrono::milliseconds(10));
858
+ }
859
+
860
+ // Timeout reached - operations may still be in progress, but we proceed anyway.
861
+ // The destructor's lock_guard will still block until operations complete,
862
+ // ensuring thread safety even if this timeout is reached.
863
+ }
864
+
865
+ inline HttpTransport::~HttpTransport() {
866
+ // Shutdown sequence:
867
+ // 1. Set shutdown flag to prevent new operations from starting
868
+ shutdown_flag_.store(true);
869
+
870
+ // 2. Wait for any in-flight send_batch() operations to complete
871
+ // (with timeout to prevent hanging if network I/O is blocked)
872
+ wait_for_operations();
873
+
874
+ // 3. Acquire mutex before cleanup. This lock_guard will block until
875
+ // any remaining operations release the mutex, ensuring curl_handle_
876
+ // is never accessed after cleanup begins.
877
+ std::lock_guard<std::mutex> lock(curl_mutex_);
878
+
879
+ // 4. Now safe to cleanup curl_handle_ (no operations can be using it)
880
+ if (curl_handle_) {
881
+ curl_easy_cleanup(curl_handle_);
882
+ curl_handle_ = nullptr;
883
+ }
884
+
885
+ // Decrement reference count
886
+ // Note: We don't call curl_global_cleanup here - it should only be called
887
+ // at program termination, not in destructors. curl_global_init is idempotent
888
+ // and safe to call multiple times, so leaving it initialized is fine.
889
+ detail::curl_init_ref_count().fetch_sub(1);
890
+ }
891
+
892
+ inline bool HttpTransport::send_batch(
893
+ const std::vector<std::string>& log_records) {
894
+ // Shutdown flag check sequence:
895
+ // 1. Check BEFORE acquiring lock (fast path - avoids lock acquisition if shutdown)
896
+ if (shutdown_flag_.load()) {
897
+ return false;
898
+ }
899
+
900
+ if (log_records.empty()) {
901
+ return false;
902
+ }
903
+
904
+ // Circuit breaker fast-fail check (< 1 microsecond)
905
+ // This is checked BEFORE any network operations to provide fast-fail behavior
906
+ if (is_circuit_open()) {
907
+ return false; // Fast-fail - daemon known to be unavailable
908
+ }
909
+
910
+ // Build JSON payload matching Python client format
911
+ std::ostringstream payload;
912
+ payload << "{\"application_id\":\"" << application_id_ << "\",\"logs\":[";
913
+ for (size_t i = 0; i < log_records.size(); ++i) {
914
+ if (i > 0) {
915
+ payload << ",";
916
+ }
917
+ payload << log_records[i];
918
+ }
919
+ payload << "]}";
920
+
921
+ std::string payload_str = payload.str();
922
+
923
+ // 2. Acquire lock to check and use curl_handle_
924
+ // This ensures only one thread accesses curl_handle_ at a time
925
+ std::lock_guard<std::mutex> lock(curl_mutex_);
926
+
927
+ // 3. Check AGAIN after acquiring lock (shutdown may have happened between checks)
928
+ // This double-check pattern prevents race conditions where shutdown happens
929
+ // between the first check and lock acquisition
930
+ if (shutdown_flag_.load() || !curl_handle_) {
931
+ return false;
932
+ }
933
+
934
+ // Setup curl request (all operations protected by mutex)
935
+ curl_easy_reset(curl_handle_);
936
+ curl_easy_setopt(curl_handle_, CURLOPT_URL, endpoint_.c_str());
937
+ curl_easy_setopt(curl_handle_, CURLOPT_POSTFIELDS, payload_str.c_str());
938
+ curl_easy_setopt(curl_handle_, CURLOPT_POSTFIELDSIZE, payload_str.length());
939
+
940
+ struct curl_slist* headers = nullptr;
941
+ headers = curl_slist_append(headers, "Content-Type: application/json");
942
+ curl_easy_setopt(curl_handle_, CURLOPT_HTTPHEADER, headers);
943
+
944
+ // Use configurable timeout (milliseconds)
945
+ curl_easy_setopt(curl_handle_, CURLOPT_TIMEOUT_MS,
946
+ static_cast<long>(http_timeout_.count()));
947
+ curl_easy_setopt(curl_handle_, CURLOPT_WRITEFUNCTION,
948
+ detail::WriteCallback);
949
+
950
+ detail::WriteData write_data;
951
+ curl_easy_setopt(curl_handle_, CURLOPT_WRITEDATA, &write_data);
952
+
953
+ // Retry loop
954
+ for (int attempt = 1; attempt <= max_retries_; ++attempt) {
955
+ // 4. Check shutdown flag before each retry attempt
956
+ // This allows long-running operations to exit early if shutdown occurs
957
+ // during network I/O (e.g., if curl_easy_perform() is slow)
958
+ if (shutdown_flag_.load()) {
959
+ curl_slist_free_all(headers);
960
+ return false;
961
+ }
962
+
963
+ CURLcode res = curl_easy_perform(curl_handle_);
964
+ if (res == CURLE_OK) {
965
+ long response_code;
966
+ curl_easy_getinfo(curl_handle_, CURLINFO_RESPONSE_CODE, &response_code);
967
+ if (response_code >= 200 && response_code < 300) {
968
+ // Success - close circuit (daemon is available)
969
+ close_circuit();
970
+ curl_slist_free_all(headers);
971
+ return true;
972
+ }
973
+ // Non-2xx response - will retry or fail
974
+ }
975
+
976
+ if (attempt < max_retries_) {
977
+ std::this_thread::sleep_for(base_backoff_ms_ * attempt);
978
+ }
979
+ }
980
+
981
+ // All retries failed - open circuit (daemon unavailable)
982
+ open_circuit();
983
+
984
+ curl_slist_free_all(headers);
985
+ return false;
986
+ }
987
+
988
+ #if DRTRACE_SPDLOG_AVAILABLE
989
+
990
+ // setup_drtrace and create_drtrace_logger inline implementations
991
+ inline void setup_drtrace(std::shared_ptr<spdlog::logger> logger,
992
+ const DrtraceConfig& config) {
993
+ if (!config.enabled) {
994
+ return;
995
+ }
996
+
997
+ auto sink = std::make_shared<DrtraceSink_mt>(config);
998
+ logger->sinks().push_back(sink);
999
+ }
1000
+
1001
+ inline std::shared_ptr<spdlog::logger> create_drtrace_logger(
1002
+ const std::string& logger_name, const DrtraceConfig& config) {
1003
+ auto logger = spdlog::get(logger_name);
1004
+ if (logger) {
1005
+ return logger;
1006
+ }
1007
+
1008
+ logger = std::make_shared<spdlog::logger>(logger_name);
1009
+ if (config.enabled) {
1010
+ auto sink = std::make_shared<DrtraceSink_mt>(config);
1011
+ logger->sinks().push_back(sink);
1012
+ }
1013
+ spdlog::register_logger(logger);
1014
+ return logger;
1015
+ }
1016
+
1017
+ #endif // DRTRACE_SPDLOG_AVAILABLE
1018
+
1019
+ // =========================
1020
+ // Core Components Inline Implementations
1021
+ // =========================
1022
+
1023
+ namespace core {
1024
+
1025
+ // DrtraceCore inline implementations
1026
+ inline DrtraceCore::DrtraceCore(const DrtraceConfig& config)
1027
+ : config_(config), transport_(std::make_unique<HttpTransport>(config)), flush_thread_running_(false) {
1028
+ if (config_.enabled) {
1029
+ start_flush_thread();
1030
+ }
1031
+ }
1032
+
1033
+ inline DrtraceCore::~DrtraceCore() {
1034
+ // Stop flush thread first (before flushing to avoid race conditions)
1035
+ if (flush_thread_running_) {
1036
+ stop_flush_thread();
1037
+ }
1038
+ // Flush any remaining records (after thread is stopped)
1039
+ flush();
1040
+ }
1041
+
1042
+ inline void DrtraceCore::log(const LogRecord& record) {
1043
+ if (!config_.enabled) {
1044
+ return;
1045
+ }
1046
+
1047
+ // Level filtering: skip logs below min_level
1048
+ if (record.level < config_.min_level) {
1049
+ return;
1050
+ }
1051
+
1052
+ bool should_flush = false;
1053
+ {
1054
+ std::lock_guard<std::mutex> lock(batch_mutex_);
1055
+
1056
+ // Backpressure: drop oldest log if buffer is full
1057
+ // This prevents OOM when daemon is unavailable or slow
1058
+ if (config_.max_buffer_size > 0 && batch_.size() >= config_.max_buffer_size) {
1059
+ batch_.erase(batch_.begin()); // Drop oldest
1060
+ }
1061
+
1062
+ std::string json_record = serialize_record(record);
1063
+ batch_.push_back(std::move(json_record));
1064
+
1065
+ // Check if batch size reached (flush outside lock)
1066
+ should_flush = (batch_.size() >= config_.batch_size);
1067
+ }
1068
+
1069
+ // Flush outside the lock to avoid holding lock during network I/O
1070
+ if (should_flush) {
1071
+ flush_internal();
1072
+ }
1073
+ }
1074
+
1075
+ inline void DrtraceCore::flush() {
1076
+ // flush_internal() manages its own locking - don't hold lock here
1077
+ flush_internal();
1078
+ }
1079
+
1080
+ inline std::string DrtraceCore::escape_json(const std::string& str) {
1081
+ std::ostringstream escaped;
1082
+ for (char c : str) {
1083
+ switch (c) {
1084
+ case '"':
1085
+ escaped << "\\\"";
1086
+ break;
1087
+ case '\\':
1088
+ escaped << "\\\\";
1089
+ break;
1090
+ case '\b':
1091
+ escaped << "\\b";
1092
+ break;
1093
+ case '\f':
1094
+ escaped << "\\f";
1095
+ break;
1096
+ case '\n':
1097
+ escaped << "\\n";
1098
+ break;
1099
+ case '\r':
1100
+ escaped << "\\r";
1101
+ break;
1102
+ case '\t':
1103
+ escaped << "\\t";
1104
+ break;
1105
+ default:
1106
+ if (static_cast<unsigned char>(c) < 0x20) {
1107
+ escaped << "\\u" << std::hex << std::setw(4) << std::setfill('0')
1108
+ << static_cast<int>(c);
1109
+ } else {
1110
+ escaped << c;
1111
+ }
1112
+ break;
1113
+ }
1114
+ }
1115
+ return escaped.str();
1116
+ }
1117
+
1118
+ inline std::string DrtraceCore::serialize_record(const LogRecord& record) {
1119
+ std::ostringstream json;
1120
+ // Set precision to preserve fractional seconds (6 decimal places = microsecond precision)
1121
+ json << std::fixed << std::setprecision(6);
1122
+
1123
+ // Get timestamp as Unix timestamp (seconds since epoch, with fractional seconds)
1124
+ auto ts_duration = record.timestamp.time_since_epoch();
1125
+ auto ts_seconds = std::chrono::duration_cast<std::chrono::seconds>(ts_duration);
1126
+ auto ts_fractional = std::chrono::duration_cast<std::chrono::milliseconds>(
1127
+ ts_duration - ts_seconds);
1128
+ double ts = ts_seconds.count() + (ts_fractional.count() / 1000.0);
1129
+
1130
+ // Map LogLevel to string
1131
+ std::string level_str;
1132
+ switch (record.level) {
1133
+ case LogLevel::DEBUG:
1134
+ level_str = "debug";
1135
+ break;
1136
+ case LogLevel::INFO:
1137
+ level_str = "info";
1138
+ break;
1139
+ case LogLevel::WARN:
1140
+ level_str = "warn";
1141
+ break;
1142
+ case LogLevel::ERROR:
1143
+ level_str = "error";
1144
+ break;
1145
+ case LogLevel::CRITICAL:
1146
+ level_str = "critical";
1147
+ break;
1148
+ }
1149
+
1150
+ json << "{"
1151
+ << "\"ts\":" << ts << ","
1152
+ << "\"level\":\"" << escape_json(level_str) << "\","
1153
+ << "\"message\":\"" << escape_json(record.message) << "\","
1154
+ << "\"application_id\":\"" << escape_json(config_.application_id) << "\","
1155
+ << "\"module_name\":\"" << escape_json(record.logger_name) << "\"";
1156
+
1157
+ // Optional service_name
1158
+ if (!config_.service_name.empty()) {
1159
+ json << ",\"service_name\":\"" << escape_json(config_.service_name) << "\"";
1160
+ }
1161
+
1162
+ // Optional file_path and line_no (check for empty strings instead of null pointers)
1163
+ if (!record.source.filename.empty()) {
1164
+ json << ",\"file_path\":\"" << escape_json(record.source.filename) << "\"";
1165
+ }
1166
+ if (record.source.line > 0) {
1167
+ json << ",\"line_no\":" << record.source.line;
1168
+ }
1169
+
1170
+ // Context field
1171
+ json << ",\"context\":{"
1172
+ << "\"language\":\"cpp\"";
1173
+ // Add thread ID
1174
+ json << ",\"thread_id\":\"" << std::this_thread::get_id() << "\"";
1175
+ // Add any additional context
1176
+ for (const auto& [key, value] : record.context) {
1177
+ json << ",\"" << escape_json(key) << "\":\"" << escape_json(value) << "\"";
1178
+ }
1179
+ json << "}";
1180
+
1181
+ json << "}";
1182
+ return json.str();
1183
+ }
1184
+
1185
+ inline void DrtraceCore::flush_internal() {
1186
+ // RAII locking - manages its own lock, callers should NOT hold batch_mutex_
1187
+ std::vector<std::string> to_send;
1188
+ {
1189
+ std::lock_guard<std::mutex> lock(batch_mutex_);
1190
+ if (batch_.empty()) {
1191
+ return;
1192
+ }
1193
+ to_send.swap(batch_);
1194
+ }
1195
+ // Lock released here - safe to do network I/O without blocking other threads
1196
+
1197
+ // Send batch (transport handles errors internally)
1198
+ if (transport_) {
1199
+ transport_->send_batch(to_send);
1200
+ }
1201
+ }
1202
+
1203
+ inline void DrtraceCore::start_flush_thread() {
1204
+ flush_thread_running_ = true;
1205
+ flush_thread_ = std::thread([this]() {
1206
+ while (true) {
1207
+ std::unique_lock<std::mutex> lock(flush_mutex_);
1208
+ if (flush_cv_.wait_for(lock, config_.flush_interval,
1209
+ [this] { return should_stop_; })) {
1210
+ break; // Stop requested
1211
+ }
1212
+ lock.unlock(); // Release flush_mutex_ before calling flush_internal
1213
+
1214
+ // flush_internal() manages batch_mutex_ internally - don't hold it here
1215
+ flush_internal();
1216
+ }
1217
+ });
1218
+ }
1219
+
1220
+ inline void DrtraceCore::stop_flush_thread() {
1221
+ // Set stop flag
1222
+ {
1223
+ std::lock_guard<std::mutex> lock(flush_mutex_);
1224
+ should_stop_ = true;
1225
+ }
1226
+ // Notify flush thread to wake up and check stop flag
1227
+ flush_cv_.notify_one();
1228
+
1229
+ // Always join - never detach to avoid use-after-free
1230
+ // The flush thread checks should_stop_ in its wait condition, so it will
1231
+ // exit promptly. The only delay is if send_batch() is in progress, which
1232
+ // is bounded by curl timeout and circuit breaker fast-fail.
1233
+ if (flush_thread_.joinable()) {
1234
+ flush_thread_.join();
1235
+ }
1236
+
1237
+ flush_thread_running_ = false;
1238
+ }
1239
+
1240
+ inline void DrtraceCore::flush_thread_func() {
1241
+ // This is handled by the lambda in start_flush_thread
1242
+ // Kept for consistency with architecture
1243
+ }
1244
+
1245
+ } // namespace core
1246
+
1247
+ } // namespace drtrace
1248
+