observability-toolkit 1.8.5 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. package/README.md +126 -5
  2. package/dist/backends/index.d.ts +163 -0
  3. package/dist/backends/index.d.ts.map +1 -1
  4. package/dist/backends/index.js +57 -0
  5. package/dist/backends/index.js.map +1 -1
  6. package/dist/backends/index.test.js +55 -1
  7. package/dist/backends/index.test.js.map +1 -1
  8. package/dist/backends/local-jsonl.d.ts +30 -0
  9. package/dist/backends/local-jsonl.d.ts.map +1 -1
  10. package/dist/backends/local-jsonl.js +912 -550
  11. package/dist/backends/local-jsonl.js.map +1 -1
  12. package/dist/backends/signoz-api-rate-limiter.test.js +2 -1
  13. package/dist/backends/signoz-api-rate-limiter.test.js.map +1 -1
  14. package/dist/backends/signoz-api.d.ts +16 -2
  15. package/dist/backends/signoz-api.d.ts.map +1 -1
  16. package/dist/backends/signoz-api.js +650 -534
  17. package/dist/backends/signoz-api.js.map +1 -1
  18. package/dist/backends/signoz-api.test.js +6 -5
  19. package/dist/backends/signoz-api.test.js.map +1 -1
  20. package/dist/lib/agent-as-judge.d.ts +388 -0
  21. package/dist/lib/agent-as-judge.d.ts.map +1 -0
  22. package/dist/lib/agent-as-judge.js +740 -0
  23. package/dist/lib/agent-as-judge.js.map +1 -0
  24. package/dist/lib/agent-as-judge.test.d.ts +5 -0
  25. package/dist/lib/agent-as-judge.test.d.ts.map +1 -0
  26. package/dist/lib/agent-as-judge.test.js +816 -0
  27. package/dist/lib/agent-as-judge.test.js.map +1 -0
  28. package/dist/lib/cache.d.ts +15 -2
  29. package/dist/lib/cache.d.ts.map +1 -1
  30. package/dist/lib/cache.js +16 -2
  31. package/dist/lib/cache.js.map +1 -1
  32. package/dist/lib/circuit-breaker.d.ts +18 -0
  33. package/dist/lib/circuit-breaker.d.ts.map +1 -1
  34. package/dist/lib/circuit-breaker.js +41 -8
  35. package/dist/lib/circuit-breaker.js.map +1 -1
  36. package/dist/lib/confident-export.d.ts +101 -0
  37. package/dist/lib/confident-export.d.ts.map +1 -0
  38. package/dist/lib/confident-export.js +393 -0
  39. package/dist/lib/confident-export.js.map +1 -0
  40. package/dist/lib/confident-export.test.d.ts +7 -0
  41. package/dist/lib/confident-export.test.d.ts.map +1 -0
  42. package/dist/lib/confident-export.test.js +835 -0
  43. package/dist/lib/confident-export.test.js.map +1 -0
  44. package/dist/lib/constants.d.ts +75 -0
  45. package/dist/lib/constants.d.ts.map +1 -1
  46. package/dist/lib/constants.js +104 -1
  47. package/dist/lib/constants.js.map +1 -1
  48. package/dist/lib/datadog-export.d.ts +156 -0
  49. package/dist/lib/datadog-export.d.ts.map +1 -0
  50. package/dist/lib/datadog-export.js +464 -0
  51. package/dist/lib/datadog-export.js.map +1 -0
  52. package/dist/lib/datadog-export.test.d.ts +14 -0
  53. package/dist/lib/datadog-export.test.d.ts.map +1 -0
  54. package/dist/lib/datadog-export.test.js +890 -0
  55. package/dist/lib/datadog-export.test.js.map +1 -0
  56. package/dist/lib/evaluation-hooks.d.ts +49 -0
  57. package/dist/lib/evaluation-hooks.d.ts.map +1 -0
  58. package/dist/lib/evaluation-hooks.js +488 -0
  59. package/dist/lib/evaluation-hooks.js.map +1 -0
  60. package/dist/lib/evaluation-hooks.test.d.ts +8 -0
  61. package/dist/lib/evaluation-hooks.test.d.ts.map +1 -0
  62. package/dist/lib/evaluation-hooks.test.js +624 -0
  63. package/dist/lib/evaluation-hooks.test.js.map +1 -0
  64. package/dist/lib/export-utils.d.ts +99 -0
  65. package/dist/lib/export-utils.d.ts.map +1 -0
  66. package/dist/lib/export-utils.js +238 -0
  67. package/dist/lib/export-utils.js.map +1 -0
  68. package/dist/lib/export-utils.test.d.ts +5 -0
  69. package/dist/lib/export-utils.test.d.ts.map +1 -0
  70. package/dist/lib/export-utils.test.js +193 -0
  71. package/dist/lib/export-utils.test.js.map +1 -0
  72. package/dist/lib/file-utils.d.ts +17 -2
  73. package/dist/lib/file-utils.d.ts.map +1 -1
  74. package/dist/lib/file-utils.js +24 -5
  75. package/dist/lib/file-utils.js.map +1 -1
  76. package/dist/lib/file-utils.test.js +30 -0
  77. package/dist/lib/file-utils.test.js.map +1 -1
  78. package/dist/lib/histogram.d.ts +119 -0
  79. package/dist/lib/histogram.d.ts.map +1 -0
  80. package/dist/lib/histogram.js +202 -0
  81. package/dist/lib/histogram.js.map +1 -0
  82. package/dist/lib/histogram.test.d.ts +5 -0
  83. package/dist/lib/histogram.test.d.ts.map +1 -0
  84. package/dist/lib/histogram.test.js +381 -0
  85. package/dist/lib/histogram.test.js.map +1 -0
  86. package/dist/lib/instrumentation.d.ts +153 -0
  87. package/dist/lib/instrumentation.d.ts.map +1 -0
  88. package/dist/lib/instrumentation.integration.test.d.ts +2 -0
  89. package/dist/lib/instrumentation.integration.test.d.ts.map +1 -0
  90. package/dist/lib/instrumentation.integration.test.js +589 -0
  91. package/dist/lib/instrumentation.integration.test.js.map +1 -0
  92. package/dist/lib/instrumentation.js +520 -0
  93. package/dist/lib/instrumentation.js.map +1 -0
  94. package/dist/lib/instrumentation.test.d.ts +2 -0
  95. package/dist/lib/instrumentation.test.d.ts.map +1 -0
  96. package/dist/lib/instrumentation.test.js +821 -0
  97. package/dist/lib/instrumentation.test.js.map +1 -0
  98. package/dist/lib/langfuse-export.d.ts +125 -0
  99. package/dist/lib/langfuse-export.d.ts.map +1 -0
  100. package/dist/lib/langfuse-export.js +367 -0
  101. package/dist/lib/langfuse-export.js.map +1 -0
  102. package/dist/lib/langfuse-export.test.d.ts +7 -0
  103. package/dist/lib/langfuse-export.test.d.ts.map +1 -0
  104. package/dist/lib/langfuse-export.test.js +1007 -0
  105. package/dist/lib/langfuse-export.test.js.map +1 -0
  106. package/dist/lib/llm-as-judge.d.ts +657 -0
  107. package/dist/lib/llm-as-judge.d.ts.map +1 -0
  108. package/dist/lib/llm-as-judge.js +1397 -0
  109. package/dist/lib/llm-as-judge.js.map +1 -0
  110. package/dist/lib/llm-as-judge.test.d.ts +2 -0
  111. package/dist/lib/llm-as-judge.test.d.ts.map +1 -0
  112. package/dist/lib/llm-as-judge.test.js +2409 -0
  113. package/dist/lib/llm-as-judge.test.js.map +1 -0
  114. package/dist/lib/logger.d.ts +1 -1
  115. package/dist/lib/logger.d.ts.map +1 -1
  116. package/dist/lib/logger.js.map +1 -1
  117. package/dist/lib/metrics.d.ts +62 -0
  118. package/dist/lib/metrics.d.ts.map +1 -0
  119. package/dist/lib/metrics.js +166 -0
  120. package/dist/lib/metrics.js.map +1 -0
  121. package/dist/lib/metrics.test.d.ts +5 -0
  122. package/dist/lib/metrics.test.d.ts.map +1 -0
  123. package/dist/lib/metrics.test.js +189 -0
  124. package/dist/lib/metrics.test.js.map +1 -0
  125. package/dist/lib/parse-stats.d.ts +119 -0
  126. package/dist/lib/parse-stats.d.ts.map +1 -0
  127. package/dist/lib/parse-stats.js +206 -0
  128. package/dist/lib/parse-stats.js.map +1 -0
  129. package/dist/lib/parse-stats.test.d.ts +5 -0
  130. package/dist/lib/parse-stats.test.d.ts.map +1 -0
  131. package/dist/lib/parse-stats.test.js +283 -0
  132. package/dist/lib/parse-stats.test.js.map +1 -0
  133. package/dist/lib/phoenix-export.d.ts +109 -0
  134. package/dist/lib/phoenix-export.d.ts.map +1 -0
  135. package/dist/lib/phoenix-export.js +429 -0
  136. package/dist/lib/phoenix-export.js.map +1 -0
  137. package/dist/lib/phoenix-export.test.d.ts +11 -0
  138. package/dist/lib/phoenix-export.test.d.ts.map +1 -0
  139. package/dist/lib/phoenix-export.test.js +725 -0
  140. package/dist/lib/phoenix-export.test.js.map +1 -0
  141. package/dist/lib/server-utils.d.ts +6 -1
  142. package/dist/lib/server-utils.d.ts.map +1 -1
  143. package/dist/lib/server-utils.js +9 -1
  144. package/dist/lib/server-utils.js.map +1 -1
  145. package/dist/lib/shared-schemas.d.ts +6 -0
  146. package/dist/lib/shared-schemas.d.ts.map +1 -1
  147. package/dist/lib/shared-schemas.js +11 -4
  148. package/dist/lib/shared-schemas.js.map +1 -1
  149. package/dist/lib/verification-events.d.ts +100 -0
  150. package/dist/lib/verification-events.d.ts.map +1 -0
  151. package/dist/lib/verification-events.js +162 -0
  152. package/dist/lib/verification-events.js.map +1 -0
  153. package/dist/lib/verification-events.test.d.ts +5 -0
  154. package/dist/lib/verification-events.test.d.ts.map +1 -0
  155. package/dist/lib/verification-events.test.js +193 -0
  156. package/dist/lib/verification-events.test.js.map +1 -0
  157. package/dist/server.d.ts +5 -0
  158. package/dist/server.d.ts.map +1 -1
  159. package/dist/server.js +77 -21
  160. package/dist/server.js.map +1 -1
  161. package/dist/tools/context-stats.d.ts.map +1 -1
  162. package/dist/tools/context-stats.js +6 -8
  163. package/dist/tools/context-stats.js.map +1 -1
  164. package/dist/tools/export-confident.d.ts +145 -0
  165. package/dist/tools/export-confident.d.ts.map +1 -0
  166. package/dist/tools/export-confident.js +134 -0
  167. package/dist/tools/export-confident.js.map +1 -0
  168. package/dist/tools/export-confident.test.d.ts +7 -0
  169. package/dist/tools/export-confident.test.d.ts.map +1 -0
  170. package/dist/tools/export-confident.test.js +332 -0
  171. package/dist/tools/export-confident.test.js.map +1 -0
  172. package/dist/tools/export-datadog.d.ts +160 -0
  173. package/dist/tools/export-datadog.d.ts.map +1 -0
  174. package/dist/tools/export-datadog.js +160 -0
  175. package/dist/tools/export-datadog.js.map +1 -0
  176. package/dist/tools/export-datadog.test.d.ts +8 -0
  177. package/dist/tools/export-datadog.test.d.ts.map +1 -0
  178. package/dist/tools/export-datadog.test.js +419 -0
  179. package/dist/tools/export-datadog.test.js.map +1 -0
  180. package/dist/tools/export-langfuse.d.ts +137 -0
  181. package/dist/tools/export-langfuse.d.ts.map +1 -0
  182. package/dist/tools/export-langfuse.js +131 -0
  183. package/dist/tools/export-langfuse.js.map +1 -0
  184. package/dist/tools/export-langfuse.test.d.ts +7 -0
  185. package/dist/tools/export-langfuse.test.d.ts.map +1 -0
  186. package/dist/tools/export-langfuse.test.js +303 -0
  187. package/dist/tools/export-langfuse.test.js.map +1 -0
  188. package/dist/tools/export-phoenix.d.ts +145 -0
  189. package/dist/tools/export-phoenix.d.ts.map +1 -0
  190. package/dist/tools/export-phoenix.js +135 -0
  191. package/dist/tools/export-phoenix.js.map +1 -0
  192. package/dist/tools/export-phoenix.test.d.ts +7 -0
  193. package/dist/tools/export-phoenix.test.d.ts.map +1 -0
  194. package/dist/tools/export-phoenix.test.js +316 -0
  195. package/dist/tools/export-phoenix.test.js.map +1 -0
  196. package/dist/tools/health-check.d.ts +26 -0
  197. package/dist/tools/health-check.d.ts.map +1 -1
  198. package/dist/tools/health-check.js +36 -7
  199. package/dist/tools/health-check.js.map +1 -1
  200. package/dist/tools/index.d.ts +6 -0
  201. package/dist/tools/index.d.ts.map +1 -1
  202. package/dist/tools/index.js +6 -0
  203. package/dist/tools/index.js.map +1 -1
  204. package/dist/tools/inject-evaluations.d.ts +1315 -0
  205. package/dist/tools/inject-evaluations.d.ts.map +1 -0
  206. package/dist/tools/inject-evaluations.js +121 -0
  207. package/dist/tools/inject-evaluations.js.map +1 -0
  208. package/dist/tools/inject-evaluations.test.d.ts +5 -0
  209. package/dist/tools/inject-evaluations.test.d.ts.map +1 -0
  210. package/dist/tools/inject-evaluations.test.js +359 -0
  211. package/dist/tools/inject-evaluations.test.js.map +1 -0
  212. package/dist/tools/query-evaluations.d.ts +25 -4
  213. package/dist/tools/query-evaluations.d.ts.map +1 -1
  214. package/dist/tools/query-evaluations.js +10 -0
  215. package/dist/tools/query-evaluations.js.map +1 -1
  216. package/dist/tools/query-llm-events.js +2 -2
  217. package/dist/tools/query-llm-events.js.map +1 -1
  218. package/dist/tools/query-logs.d.ts +8 -8
  219. package/dist/tools/query-logs.js +3 -3
  220. package/dist/tools/query-logs.js.map +1 -1
  221. package/dist/tools/query-metrics.d.ts +4 -4
  222. package/dist/tools/query-metrics.js +2 -2
  223. package/dist/tools/query-metrics.js.map +1 -1
  224. package/dist/tools/query-traces.d.ts +8 -8
  225. package/dist/tools/query-verifications.d.ts +111 -0
  226. package/dist/tools/query-verifications.d.ts.map +1 -0
  227. package/dist/tools/query-verifications.js +101 -0
  228. package/dist/tools/query-verifications.js.map +1 -0
  229. package/dist/tools/query-verifications.test.d.ts +5 -0
  230. package/dist/tools/query-verifications.test.d.ts.map +1 -0
  231. package/dist/tools/query-verifications.test.js +156 -0
  232. package/dist/tools/query-verifications.test.js.map +1 -0
  233. package/dist/types/evaluation-hooks.d.ts +176 -0
  234. package/dist/types/evaluation-hooks.d.ts.map +1 -0
  235. package/dist/types/evaluation-hooks.js +49 -0
  236. package/dist/types/evaluation-hooks.js.map +1 -0
  237. package/package.json +10 -2
@@ -0,0 +1,890 @@
1
+ /**
2
+ * Tests for datadog-export.ts
3
+ *
4
+ * Covers config validation, API endpoint mapping, auth headers, metric type
5
+ * inference, span/eval metric conversion, and batched export functionality.
6
+ *
7
+ * Datadog differs from OTLP-based integrations:
8
+ * - Uses proprietary HTTP API (not OTLP)
9
+ * - Two-phase export: spans first, then eval metrics
10
+ * - Fixed endpoints per site (no SSRF validation needed)
11
+ * - Three metric types: boolean, score, categorical
12
+ */
13
+ import { describe, it, beforeEach, afterEach, mock } from 'node:test';
14
+ import assert from 'node:assert';
15
+ import { validateDatadogApiKey, validateDatadogConfig, createDatadogAuthHeader, getDatadogEndpoints, inferMetricType, sanitizeDatadogString, evaluationsToDatadogSpans, evaluationsToDatadogEvalMetrics, exportToDatadog, } from './datadog-export.js';
16
+ import { DD_ROOT_SPAN_PARENT_ID, DD_SPAN_DURATION_NS, DD_MIN_BATCH_SIZE, DD_MAX_BATCH_SIZE, DD_MIN_TIMEOUT_MS, DD_MAX_TIMEOUT_MS, DD_API_KEY_MIN_LENGTH, DD_API_KEY_MAX_LENGTH, DD_MAX_STRING_LENGTH_DEFAULT, } from './constants.js';
17
+ describe('datadog-export', () => {
18
+ describe('validateDatadogApiKey', () => {
19
+ it('accepts valid minimum-length hex API key', () => {
20
+ assert.doesNotThrow(() => {
21
+ validateDatadogApiKey('a'.repeat(DD_API_KEY_MIN_LENGTH));
22
+ });
23
+ });
24
+ it('accepts valid maximum-length hex API key', () => {
25
+ assert.doesNotThrow(() => {
26
+ validateDatadogApiKey('a'.repeat(DD_API_KEY_MAX_LENGTH));
27
+ });
28
+ });
29
+ it('accepts mixed case hex characters', () => {
30
+ assert.doesNotThrow(() => {
31
+ validateDatadogApiKey('AbCdEf0123456789'.repeat(2));
32
+ });
33
+ });
34
+ it('throws for empty API key', () => {
35
+ assert.throws(() => validateDatadogApiKey(''), /Datadog API key not configured/);
36
+ });
37
+ it('throws for too short API key', () => {
38
+ assert.throws(() => validateDatadogApiKey('a'.repeat(DD_API_KEY_MIN_LENGTH - 1)), /Invalid Datadog API key format/);
39
+ });
40
+ it('throws for too long API key', () => {
41
+ assert.throws(() => validateDatadogApiKey('a'.repeat(DD_API_KEY_MAX_LENGTH + 1)), /Invalid Datadog API key format/);
42
+ });
43
+ it('throws for non-hex characters', () => {
44
+ assert.throws(() => validateDatadogApiKey('g'.repeat(DD_API_KEY_MIN_LENGTH)), /Invalid Datadog API key format/);
45
+ });
46
+ it('throws for API key with special characters', () => {
47
+ assert.throws(() => validateDatadogApiKey('abcd-efgh-1234-5678'.repeat(2)), /Invalid Datadog API key format/);
48
+ });
49
+ });
50
+ describe('sanitizeDatadogString', () => {
51
+ it('returns empty string for undefined', () => {
52
+ assert.strictEqual(sanitizeDatadogString(undefined), '');
53
+ });
54
+ it('returns empty string for empty input', () => {
55
+ assert.strictEqual(sanitizeDatadogString(''), '');
56
+ });
57
+ it('preserves normal strings', () => {
58
+ assert.strictEqual(sanitizeDatadogString('hello world'), 'hello world');
59
+ });
60
+ it('removes control characters (ASCII 0-31)', () => {
61
+ assert.strictEqual(sanitizeDatadogString('hello\x00world'), 'helloworld');
62
+ assert.strictEqual(sanitizeDatadogString('test\x1Fvalue'), 'testvalue');
63
+ assert.strictEqual(sanitizeDatadogString('\x00\x01\x02clean'), 'clean');
64
+ });
65
+ it('removes control characters (ASCII 127-159)', () => {
66
+ assert.strictEqual(sanitizeDatadogString('hello\x7Fworld'), 'helloworld');
67
+ assert.strictEqual(sanitizeDatadogString('test\x9Fvalue'), 'testvalue');
68
+ });
69
+ it('preserves newlines by removing them (they are control chars)', () => {
70
+ assert.strictEqual(sanitizeDatadogString('line1\nline2'), 'line1line2');
71
+ assert.strictEqual(sanitizeDatadogString('line1\r\nline2'), 'line1line2');
72
+ });
73
+ it('preserves tabs by removing them (they are control chars)', () => {
74
+ assert.strictEqual(sanitizeDatadogString('col1\tcol2'), 'col1col2');
75
+ });
76
+ it('limits string to maxLength', () => {
77
+ const longString = 'a'.repeat(100);
78
+ assert.strictEqual(sanitizeDatadogString(longString, 50).length, 50);
79
+ });
80
+ it('uses default maxLength', () => {
81
+ const longString = 'a'.repeat(DD_MAX_STRING_LENGTH_DEFAULT * 2);
82
+ assert.strictEqual(sanitizeDatadogString(longString).length, DD_MAX_STRING_LENGTH_DEFAULT);
83
+ });
84
+ it('handles mixed content (control chars and length limit)', () => {
85
+ const input = '\x00' + 'a'.repeat(100) + '\x00';
86
+ const result = sanitizeDatadogString(input, 50);
87
+ assert.strictEqual(result.length, 50);
88
+ assert.ok(!result.includes('\x00'));
89
+ });
90
+ it('preserves unicode characters', () => {
91
+ assert.strictEqual(sanitizeDatadogString('héllo wörld 日本語'), 'héllo wörld 日本語');
92
+ });
93
+ it('preserves emojis', () => {
94
+ assert.strictEqual(sanitizeDatadogString('test 🚀 emoji'), 'test 🚀 emoji');
95
+ });
96
+ });
97
+ describe('getDatadogEndpoints', () => {
98
+ const sites = [
99
+ 'datadoghq.com',
100
+ 'us3.datadoghq.com',
101
+ 'us5.datadoghq.com',
102
+ 'datadoghq.eu',
103
+ 'ap1.datadoghq.com',
104
+ ];
105
+ for (const site of sites) {
106
+ describe(`site: ${site}`, () => {
107
+ it('returns spans endpoint', () => {
108
+ const endpoints = getDatadogEndpoints(site);
109
+ assert.ok(endpoints.spans.includes('/api/intake/llm-obs/v1/trace/spans'));
110
+ assert.ok(endpoints.spans.startsWith('https://'));
111
+ });
112
+ it('returns evals endpoint', () => {
113
+ const endpoints = getDatadogEndpoints(site);
114
+ assert.ok(endpoints.evals.includes('/api/intake/llm-obs/v2/eval-metric'));
115
+ assert.ok(endpoints.evals.startsWith('https://'));
116
+ });
117
+ it('endpoints match site domain', () => {
118
+ const endpoints = getDatadogEndpoints(site);
119
+ // Extract expected domain from site
120
+ const expectedDomain = site === 'datadoghq.com'
121
+ ? 'api.datadoghq.com'
122
+ : site === 'datadoghq.eu'
123
+ ? 'api.datadoghq.eu'
124
+ : `api.${site}`;
125
+ assert.ok(endpoints.spans.includes(expectedDomain));
126
+ assert.ok(endpoints.evals.includes(expectedDomain));
127
+ });
128
+ });
129
+ }
130
+ });
131
+ describe('validateDatadogConfig', () => {
132
+ const validApiKey = 'a'.repeat(32);
133
+ const validEnv = {
134
+ DD_API_KEY: validApiKey,
135
+ DD_SITE: 'datadoghq.com',
136
+ DD_LLMOBS_ML_APP: 'test-app',
137
+ };
138
+ describe('with valid overrides', () => {
139
+ it('returns config with all overrides', () => {
140
+ const config = validateDatadogConfig({
141
+ apiKey: validApiKey,
142
+ site: 'datadoghq.eu',
143
+ mlApp: 'my-app',
144
+ batchSize: 50,
145
+ timeoutMs: 15000,
146
+ exportSpans: true,
147
+ exportEvals: false,
148
+ });
149
+ assert.strictEqual(config.apiKey, validApiKey);
150
+ assert.strictEqual(config.site, 'datadoghq.eu');
151
+ assert.strictEqual(config.mlApp, 'my-app');
152
+ assert.strictEqual(config.batchSize, 50);
153
+ assert.strictEqual(config.timeoutMs, 15000);
154
+ assert.strictEqual(config.exportSpans, true);
155
+ assert.strictEqual(config.exportEvals, false);
156
+ });
157
+ it('defaults exportSpans to true', () => {
158
+ const config = validateDatadogConfig({
159
+ apiKey: validApiKey,
160
+ mlApp: 'test-app',
161
+ });
162
+ assert.strictEqual(config.exportSpans, true);
163
+ });
164
+ it('defaults exportEvals to true', () => {
165
+ const config = validateDatadogConfig({
166
+ apiKey: validApiKey,
167
+ mlApp: 'test-app',
168
+ });
169
+ assert.strictEqual(config.exportEvals, true);
170
+ });
171
+ it('defaults metricType to auto', () => {
172
+ const config = validateDatadogConfig({
173
+ apiKey: validApiKey,
174
+ mlApp: 'test-app',
175
+ });
176
+ assert.strictEqual(config.metricType, 'auto');
177
+ });
178
+ });
179
+ describe('site validation', () => {
180
+ it('accepts all valid sites', () => {
181
+ const sites = [
182
+ 'datadoghq.com',
183
+ 'us3.datadoghq.com',
184
+ 'us5.datadoghq.com',
185
+ 'datadoghq.eu',
186
+ 'ap1.datadoghq.com',
187
+ ];
188
+ for (const site of sites) {
189
+ const config = validateDatadogConfig({
190
+ apiKey: validApiKey,
191
+ site,
192
+ mlApp: 'test-app',
193
+ });
194
+ assert.strictEqual(config.site, site);
195
+ }
196
+ });
197
+ it('throws for invalid site', () => {
198
+ assert.throws(() => validateDatadogConfig({
199
+ apiKey: validApiKey,
200
+ site: 'invalid.datadoghq.com',
201
+ mlApp: 'test-app',
202
+ }), /Invalid Datadog site/);
203
+ });
204
+ });
205
+ describe('mlApp validation', () => {
206
+ it('throws when mlApp is missing', () => {
207
+ assert.throws(() => validateDatadogConfig({
208
+ apiKey: validApiKey,
209
+ }), /ML app name not configured/);
210
+ });
211
+ it('throws when mlApp is empty string', () => {
212
+ assert.throws(() => validateDatadogConfig({
213
+ apiKey: validApiKey,
214
+ mlApp: '',
215
+ }), /ML app name not configured/);
216
+ });
217
+ });
218
+ describe('batchSize validation', () => {
219
+ it('accepts minimum batch size', () => {
220
+ const config = validateDatadogConfig({
221
+ apiKey: validApiKey,
222
+ mlApp: 'test-app',
223
+ batchSize: DD_MIN_BATCH_SIZE,
224
+ });
225
+ assert.strictEqual(config.batchSize, DD_MIN_BATCH_SIZE);
226
+ });
227
+ it('accepts maximum batch size', () => {
228
+ const config = validateDatadogConfig({
229
+ apiKey: validApiKey,
230
+ mlApp: 'test-app',
231
+ batchSize: DD_MAX_BATCH_SIZE,
232
+ });
233
+ assert.strictEqual(config.batchSize, DD_MAX_BATCH_SIZE);
234
+ });
235
+ it('throws for batch size below minimum', () => {
236
+ assert.throws(() => validateDatadogConfig({
237
+ apiKey: validApiKey,
238
+ mlApp: 'test-app',
239
+ batchSize: DD_MIN_BATCH_SIZE - 1,
240
+ }), /batch size must be between/);
241
+ });
242
+ it('throws for batch size above maximum', () => {
243
+ assert.throws(() => validateDatadogConfig({
244
+ apiKey: validApiKey,
245
+ mlApp: 'test-app',
246
+ batchSize: DD_MAX_BATCH_SIZE + 1,
247
+ }), /batch size must be between/);
248
+ });
249
+ });
250
+ describe('timeoutMs validation', () => {
251
+ it('accepts minimum timeout', () => {
252
+ const config = validateDatadogConfig({
253
+ apiKey: validApiKey,
254
+ mlApp: 'test-app',
255
+ timeoutMs: DD_MIN_TIMEOUT_MS,
256
+ });
257
+ assert.strictEqual(config.timeoutMs, DD_MIN_TIMEOUT_MS);
258
+ });
259
+ it('accepts maximum timeout', () => {
260
+ const config = validateDatadogConfig({
261
+ apiKey: validApiKey,
262
+ mlApp: 'test-app',
263
+ timeoutMs: DD_MAX_TIMEOUT_MS,
264
+ });
265
+ assert.strictEqual(config.timeoutMs, DD_MAX_TIMEOUT_MS);
266
+ });
267
+ it('throws for timeout below minimum', () => {
268
+ assert.throws(() => validateDatadogConfig({
269
+ apiKey: validApiKey,
270
+ mlApp: 'test-app',
271
+ timeoutMs: DD_MIN_TIMEOUT_MS - 1,
272
+ }), /timeout must be between/);
273
+ });
274
+ it('throws for timeout above maximum', () => {
275
+ assert.throws(() => validateDatadogConfig({
276
+ apiKey: validApiKey,
277
+ mlApp: 'test-app',
278
+ timeoutMs: DD_MAX_TIMEOUT_MS + 1,
279
+ }), /timeout must be between/);
280
+ });
281
+ });
282
+ });
283
+ describe('createDatadogAuthHeader', () => {
284
+ it('returns API key as-is', () => {
285
+ const apiKey = 'test-api-key-value';
286
+ const header = createDatadogAuthHeader(apiKey);
287
+ assert.strictEqual(header, apiKey);
288
+ });
289
+ it('preserves hex API key', () => {
290
+ const apiKey = 'abcdef1234567890'.repeat(2);
291
+ const header = createDatadogAuthHeader(apiKey);
292
+ assert.strictEqual(header, apiKey);
293
+ });
294
+ });
295
+ describe('inferMetricType', () => {
296
+ describe('boolean detection', () => {
297
+ it('returns boolean for score = 0', () => {
298
+ const evaluation = {
299
+ timestamp: new Date().toISOString(),
300
+ evaluationName: 'test',
301
+ scoreValue: 0,
302
+ };
303
+ assert.strictEqual(inferMetricType(evaluation), 'boolean');
304
+ });
305
+ it('returns boolean for score = 1', () => {
306
+ const evaluation = {
307
+ timestamp: new Date().toISOString(),
308
+ evaluationName: 'test',
309
+ scoreValue: 1,
310
+ };
311
+ assert.strictEqual(inferMetricType(evaluation), 'boolean');
312
+ });
313
+ });
314
+ describe('score detection', () => {
315
+ it('returns score for 0.5', () => {
316
+ const evaluation = {
317
+ timestamp: new Date().toISOString(),
318
+ evaluationName: 'test',
319
+ scoreValue: 0.5,
320
+ };
321
+ assert.strictEqual(inferMetricType(evaluation), 'score');
322
+ });
323
+ it('returns score for 0.01', () => {
324
+ const evaluation = {
325
+ timestamp: new Date().toISOString(),
326
+ evaluationName: 'test',
327
+ scoreValue: 0.01,
328
+ };
329
+ assert.strictEqual(inferMetricType(evaluation), 'score');
330
+ });
331
+ it('returns score for 0.99', () => {
332
+ const evaluation = {
333
+ timestamp: new Date().toISOString(),
334
+ evaluationName: 'test',
335
+ scoreValue: 0.99,
336
+ };
337
+ assert.strictEqual(inferMetricType(evaluation), 'score');
338
+ });
339
+ });
340
+ describe('categorical detection', () => {
341
+ it('returns categorical for label only', () => {
342
+ const evaluation = {
343
+ timestamp: new Date().toISOString(),
344
+ evaluationName: 'test',
345
+ scoreLabel: 'good',
346
+ };
347
+ assert.strictEqual(inferMetricType(evaluation), 'categorical');
348
+ });
349
+ it('returns categorical for label with out-of-range score', () => {
350
+ const evaluation = {
351
+ timestamp: new Date().toISOString(),
352
+ evaluationName: 'test',
353
+ scoreValue: 5,
354
+ scoreLabel: 'five-stars',
355
+ };
356
+ assert.strictEqual(inferMetricType(evaluation), 'categorical');
357
+ });
358
+ });
359
+ describe('default fallback', () => {
360
+ it('returns score for undefined scoreValue', () => {
361
+ const evaluation = {
362
+ timestamp: new Date().toISOString(),
363
+ evaluationName: 'test',
364
+ };
365
+ assert.strictEqual(inferMetricType(evaluation), 'score');
366
+ });
367
+ it('returns score for negative scoreValue', () => {
368
+ const evaluation = {
369
+ timestamp: new Date().toISOString(),
370
+ evaluationName: 'test',
371
+ scoreValue: -0.5,
372
+ };
373
+ assert.strictEqual(inferMetricType(evaluation), 'score');
374
+ });
375
+ it('returns score for scoreValue > 1', () => {
376
+ const evaluation = {
377
+ timestamp: new Date().toISOString(),
378
+ evaluationName: 'test',
379
+ scoreValue: 1.5,
380
+ };
381
+ assert.strictEqual(inferMetricType(evaluation), 'score');
382
+ });
383
+ });
384
+ });
385
+ describe('evaluationsToDatadogSpans', () => {
386
+ const baseConfig = {
387
+ apiKey: 'a'.repeat(32),
388
+ site: 'datadoghq.com',
389
+ mlApp: 'test-app',
390
+ batchSize: 100,
391
+ timeoutMs: 30000,
392
+ exportSpans: true,
393
+ exportEvals: true,
394
+ metricType: 'auto',
395
+ };
396
+ it('converts single evaluation to span', () => {
397
+ const evaluations = [{
398
+ timestamp: '2024-01-15T10:00:00.000Z',
399
+ evaluationName: 'relevance',
400
+ scoreValue: 0.85,
401
+ traceId: 'abc123',
402
+ }];
403
+ const spans = evaluationsToDatadogSpans(evaluations, baseConfig);
404
+ assert.strictEqual(spans.length, 1);
405
+ assert.strictEqual(spans[0].name, 'evaluation.relevance');
406
+ assert.strictEqual(spans[0].trace_id, 'abc123');
407
+ assert.ok(spans[0].span_id);
408
+ assert.strictEqual(spans[0].parent_id, DD_ROOT_SPAN_PARENT_ID);
409
+ assert.strictEqual(spans[0].duration, DD_SPAN_DURATION_NS);
410
+ assert.strictEqual(spans[0].meta.ml_app, 'test-app');
411
+ assert.strictEqual(spans[0].meta['evaluation.name'], 'relevance');
412
+ assert.strictEqual(spans[0].meta['evaluation.score'], '0.85');
413
+ });
414
+ it('generates trace ID when missing', () => {
415
+ const evaluations = [{
416
+ timestamp: '2024-01-15T10:00:00.000Z',
417
+ evaluationName: 'test',
418
+ }];
419
+ const spans = evaluationsToDatadogSpans(evaluations, baseConfig);
420
+ assert.ok(spans[0].trace_id);
421
+ assert.strictEqual(spans[0].trace_id.length, 32);
422
+ });
423
+ it('includes session ID when present', () => {
424
+ const evaluations = [{
425
+ timestamp: '2024-01-15T10:00:00.000Z',
426
+ evaluationName: 'test',
427
+ sessionId: 'session-123',
428
+ }];
429
+ const spans = evaluationsToDatadogSpans(evaluations, baseConfig);
430
+ assert.strictEqual(spans[0].meta['session.id'], 'session-123');
431
+ });
432
+ it('includes evaluation label', () => {
433
+ const evaluations = [{
434
+ timestamp: '2024-01-15T10:00:00.000Z',
435
+ evaluationName: 'quality',
436
+ scoreLabel: 'excellent',
437
+ }];
438
+ const spans = evaluationsToDatadogSpans(evaluations, baseConfig);
439
+ assert.strictEqual(spans[0].meta['evaluation.label'], 'excellent');
440
+ });
441
+ it('includes explanation as output', () => {
442
+ const evaluations = [{
443
+ timestamp: '2024-01-15T10:00:00.000Z',
444
+ evaluationName: 'test',
445
+ explanation: 'Response was accurate and helpful',
446
+ }];
447
+ const spans = evaluationsToDatadogSpans(evaluations, baseConfig);
448
+ assert.deepStrictEqual(spans[0].meta.output, {
449
+ value: 'Response was accurate and helpful',
450
+ });
451
+ });
452
+ it('includes evaluator as model_name', () => {
453
+ const evaluations = [{
454
+ timestamp: '2024-01-15T10:00:00.000Z',
455
+ evaluationName: 'test',
456
+ evaluator: 'claude-3-opus',
457
+ }];
458
+ const spans = evaluationsToDatadogSpans(evaluations, baseConfig);
459
+ assert.strictEqual(spans[0].meta.model_name, 'claude-3-opus');
460
+ });
461
+ describe('evaluator type to span kind mapping', () => {
462
+ it('maps llm evaluator to llm kind', () => {
463
+ const evaluations = [{
464
+ timestamp: '2024-01-15T10:00:00.000Z',
465
+ evaluationName: 'test',
466
+ evaluatorType: 'llm',
467
+ }];
468
+ const spans = evaluationsToDatadogSpans(evaluations, baseConfig);
469
+ assert.strictEqual(spans[0].meta.kind, 'llm');
470
+ });
471
+ it('maps human evaluator to tool kind', () => {
472
+ const evaluations = [{
473
+ timestamp: '2024-01-15T10:00:00.000Z',
474
+ evaluationName: 'test',
475
+ evaluatorType: 'human',
476
+ }];
477
+ const spans = evaluationsToDatadogSpans(evaluations, baseConfig);
478
+ assert.strictEqual(spans[0].meta.kind, 'tool');
479
+ });
480
+ it('maps rule evaluator to tool kind', () => {
481
+ const evaluations = [{
482
+ timestamp: '2024-01-15T10:00:00.000Z',
483
+ evaluationName: 'test',
484
+ evaluatorType: 'rule',
485
+ }];
486
+ const spans = evaluationsToDatadogSpans(evaluations, baseConfig);
487
+ assert.strictEqual(spans[0].meta.kind, 'tool');
488
+ });
489
+ it('maps classifier evaluator to tool kind', () => {
490
+ const evaluations = [{
491
+ timestamp: '2024-01-15T10:00:00.000Z',
492
+ evaluationName: 'test',
493
+ evaluatorType: 'classifier',
494
+ }];
495
+ const spans = evaluationsToDatadogSpans(evaluations, baseConfig);
496
+ assert.strictEqual(spans[0].meta.kind, 'tool');
497
+ });
498
+ it('defaults to workflow kind for unknown evaluator', () => {
499
+ const evaluations = [{
500
+ timestamp: '2024-01-15T10:00:00.000Z',
501
+ evaluationName: 'test',
502
+ // Use type assertion to test behavior with unrecognized evaluator type
503
+ evaluatorType: 'unknown',
504
+ }];
505
+ const spans = evaluationsToDatadogSpans(evaluations, baseConfig);
506
+ assert.strictEqual(spans[0].meta.kind, 'workflow');
507
+ });
508
+ it('defaults to workflow kind for undefined evaluator', () => {
509
+ const evaluations = [{
510
+ timestamp: '2024-01-15T10:00:00.000Z',
511
+ evaluationName: 'test',
512
+ }];
513
+ const spans = evaluationsToDatadogSpans(evaluations, baseConfig);
514
+ assert.strictEqual(spans[0].meta.kind, 'workflow');
515
+ });
516
+ });
517
+ it('converts multiple evaluations', () => {
518
+ const evaluations = [
519
+ { timestamp: '2024-01-15T10:00:00.000Z', evaluationName: 'relevance', scoreValue: 0.9 },
520
+ { timestamp: '2024-01-15T10:00:01.000Z', evaluationName: 'accuracy', scoreValue: 0.8 },
521
+ { timestamp: '2024-01-15T10:00:02.000Z', evaluationName: 'safety', scoreValue: 1 },
522
+ ];
523
+ const spans = evaluationsToDatadogSpans(evaluations, baseConfig);
524
+ assert.strictEqual(spans.length, 3);
525
+ assert.strictEqual(spans[0].name, 'evaluation.relevance');
526
+ assert.strictEqual(spans[1].name, 'evaluation.accuracy');
527
+ assert.strictEqual(spans[2].name, 'evaluation.safety');
528
+ });
529
+ it('handles invalid timestamp gracefully', () => {
530
+ const evaluations = [{
531
+ timestamp: 'invalid-date',
532
+ evaluationName: 'test',
533
+ }];
534
+ const spans = evaluationsToDatadogSpans(evaluations, baseConfig);
535
+ // Should use current time when timestamp is invalid
536
+ assert.ok(spans[0].start_ns > 0);
537
+ });
538
+ });
539
+ describe('evaluationsToDatadogEvalMetrics', () => {
540
+ const baseConfig = {
541
+ apiKey: 'a'.repeat(32),
542
+ site: 'datadoghq.com',
543
+ mlApp: 'test-app',
544
+ batchSize: 100,
545
+ timeoutMs: 30000,
546
+ exportSpans: true,
547
+ exportEvals: true,
548
+ metricType: 'auto',
549
+ };
550
+ it('converts evaluation to eval metric', () => {
551
+ const evaluations = [{
552
+ timestamp: '2024-01-15T10:00:00.000Z',
553
+ evaluationName: 'relevance',
554
+ scoreValue: 0.85,
555
+ traceId: 'abc123',
556
+ }];
557
+ const metrics = evaluationsToDatadogEvalMetrics(evaluations, baseConfig);
558
+ assert.strictEqual(metrics.length, 1);
559
+ assert.strictEqual(metrics[0].label, 'relevance');
560
+ assert.strictEqual(metrics[0].ml_app, 'test-app');
561
+ assert.strictEqual(metrics[0].metric_type, 'score');
562
+ assert.strictEqual(metrics[0].score_value, 0.85);
563
+ assert.strictEqual(metrics[0].assessment, 'pass');
564
+ assert.ok(metrics[0].join_on.span);
565
+ });
566
+ describe('boolean metrics', () => {
567
+ it('converts score=1 to boolean true with pass', () => {
568
+ const evaluations = [{
569
+ timestamp: '2024-01-15T10:00:00.000Z',
570
+ evaluationName: 'is_safe',
571
+ scoreValue: 1,
572
+ }];
573
+ const metrics = evaluationsToDatadogEvalMetrics(evaluations, baseConfig);
574
+ assert.strictEqual(metrics[0].metric_type, 'boolean');
575
+ assert.strictEqual(metrics[0].boolean_value, true);
576
+ assert.strictEqual(metrics[0].assessment, 'pass');
577
+ });
578
+ it('converts score=0 to boolean false with fail', () => {
579
+ const evaluations = [{
580
+ timestamp: '2024-01-15T10:00:00.000Z',
581
+ evaluationName: 'is_safe',
582
+ scoreValue: 0,
583
+ }];
584
+ const metrics = evaluationsToDatadogEvalMetrics(evaluations, baseConfig);
585
+ assert.strictEqual(metrics[0].metric_type, 'boolean');
586
+ assert.strictEqual(metrics[0].boolean_value, false);
587
+ assert.strictEqual(metrics[0].assessment, 'fail');
588
+ });
589
+ });
590
+ describe('score metrics', () => {
591
+ it('sets pass for score >= 0.5', () => {
592
+ const evaluations = [{
593
+ timestamp: '2024-01-15T10:00:00.000Z',
594
+ evaluationName: 'quality',
595
+ scoreValue: 0.75,
596
+ }];
597
+ const metrics = evaluationsToDatadogEvalMetrics(evaluations, baseConfig);
598
+ assert.strictEqual(metrics[0].metric_type, 'score');
599
+ assert.strictEqual(metrics[0].score_value, 0.75);
600
+ assert.strictEqual(metrics[0].assessment, 'pass');
601
+ });
602
+ it('sets fail for score < 0.5', () => {
603
+ const evaluations = [{
604
+ timestamp: '2024-01-15T10:00:00.000Z',
605
+ evaluationName: 'quality',
606
+ scoreValue: 0.3,
607
+ }];
608
+ const metrics = evaluationsToDatadogEvalMetrics(evaluations, baseConfig);
609
+ assert.strictEqual(metrics[0].metric_type, 'score');
610
+ assert.strictEqual(metrics[0].score_value, 0.3);
611
+ assert.strictEqual(metrics[0].assessment, 'fail');
612
+ });
613
+ it('defaults to 0 for undefined score', () => {
614
+ const configForced = { ...baseConfig, metricType: 'score' };
615
+ const evaluations = [{
616
+ timestamp: '2024-01-15T10:00:00.000Z',
617
+ evaluationName: 'quality',
618
+ }];
619
+ const metrics = evaluationsToDatadogEvalMetrics(evaluations, configForced);
620
+ assert.strictEqual(metrics[0].score_value, 0);
621
+ });
622
+ });
623
+ describe('categorical metrics', () => {
624
+ it('uses scoreLabel for categorical value', () => {
625
+ const evaluations = [{
626
+ timestamp: '2024-01-15T10:00:00.000Z',
627
+ evaluationName: 'sentiment',
628
+ scoreLabel: 'positive',
629
+ }];
630
+ const metrics = evaluationsToDatadogEvalMetrics(evaluations, baseConfig);
631
+ assert.strictEqual(metrics[0].metric_type, 'categorical');
632
+ assert.strictEqual(metrics[0].categorical_value, 'positive');
633
+ });
634
+ it('defaults to unknown for missing label', () => {
635
+ const configForced = { ...baseConfig, metricType: 'categorical' };
636
+ const evaluations = [{
637
+ timestamp: '2024-01-15T10:00:00.000Z',
638
+ evaluationName: 'sentiment',
639
+ }];
640
+ const metrics = evaluationsToDatadogEvalMetrics(evaluations, configForced);
641
+ assert.strictEqual(metrics[0].categorical_value, 'unknown');
642
+ });
643
+ });
644
+ describe('forced metric type', () => {
645
+ it('respects forced boolean type', () => {
646
+ const configForced = { ...baseConfig, metricType: 'boolean' };
647
+ const evaluations = [{
648
+ timestamp: '2024-01-15T10:00:00.000Z',
649
+ evaluationName: 'test',
650
+ scoreValue: 0.5, // Would normally be 'score'
651
+ }];
652
+ const metrics = evaluationsToDatadogEvalMetrics(evaluations, configForced);
653
+ assert.strictEqual(metrics[0].metric_type, 'boolean');
654
+ });
655
+ it('respects forced score type', () => {
656
+ const configForced = { ...baseConfig, metricType: 'score' };
657
+ const evaluations = [{
658
+ timestamp: '2024-01-15T10:00:00.000Z',
659
+ evaluationName: 'test',
660
+ scoreValue: 1, // Would normally be 'boolean'
661
+ }];
662
+ const metrics = evaluationsToDatadogEvalMetrics(evaluations, configForced);
663
+ assert.strictEqual(metrics[0].metric_type, 'score');
664
+ });
665
+ it('respects forced categorical type', () => {
666
+ const configForced = { ...baseConfig, metricType: 'categorical' };
667
+ const evaluations = [{
668
+ timestamp: '2024-01-15T10:00:00.000Z',
669
+ evaluationName: 'test',
670
+ scoreValue: 0.5,
671
+ }];
672
+ const metrics = evaluationsToDatadogEvalMetrics(evaluations, configForced);
673
+ assert.strictEqual(metrics[0].metric_type, 'categorical');
674
+ });
675
+ });
676
+ it('includes reasoning from explanation', () => {
677
+ const evaluations = [{
678
+ timestamp: '2024-01-15T10:00:00.000Z',
679
+ evaluationName: 'test',
680
+ scoreValue: 0.9,
681
+ explanation: 'The response was comprehensive and accurate',
682
+ }];
683
+ const metrics = evaluationsToDatadogEvalMetrics(evaluations, baseConfig);
684
+ assert.strictEqual(metrics[0].reasoning, 'The response was comprehensive and accurate');
685
+ });
686
+ it('converts multiple evaluations', () => {
687
+ const evaluations = [
688
+ { timestamp: '2024-01-15T10:00:00.000Z', evaluationName: 'relevance', scoreValue: 0.9 },
689
+ { timestamp: '2024-01-15T10:00:01.000Z', evaluationName: 'is_safe', scoreValue: 1 },
690
+ { timestamp: '2024-01-15T10:00:02.000Z', evaluationName: 'sentiment', scoreLabel: 'neutral' },
691
+ ];
692
+ const metrics = evaluationsToDatadogEvalMetrics(evaluations, baseConfig);
693
+ assert.strictEqual(metrics.length, 3);
694
+ assert.strictEqual(metrics[0].metric_type, 'score');
695
+ assert.strictEqual(metrics[1].metric_type, 'boolean');
696
+ assert.strictEqual(metrics[2].metric_type, 'categorical');
697
+ });
698
+ it('handles invalid timestamp gracefully', () => {
699
+ const evaluations = [{
700
+ timestamp: 'invalid-date',
701
+ evaluationName: 'test',
702
+ scoreValue: 0.5,
703
+ }];
704
+ const metrics = evaluationsToDatadogEvalMetrics(evaluations, baseConfig);
705
+ // Should use current time when timestamp is invalid
706
+ assert.ok(metrics[0].timestamp_ms > 0);
707
+ });
708
+ });
709
+ describe('exportToDatadog', () => {
710
+ const baseConfig = {
711
+ apiKey: 'a'.repeat(32),
712
+ site: 'datadoghq.com',
713
+ mlApp: 'test-app',
714
+ batchSize: 100,
715
+ timeoutMs: 30000,
716
+ exportSpans: true,
717
+ exportEvals: true,
718
+ metricType: 'auto',
719
+ };
720
+ let originalFetch;
721
+ beforeEach(() => {
722
+ originalFetch = globalThis.fetch;
723
+ });
724
+ afterEach(() => {
725
+ globalThis.fetch = originalFetch;
726
+ });
727
+ it('returns success for empty evaluations', async () => {
728
+ const result = await exportToDatadog([], baseConfig);
729
+ assert.strictEqual(result.success, true);
730
+ assert.strictEqual(result.evaluationsExported, 0);
731
+ assert.strictEqual(result.spansExported, 0);
732
+ assert.strictEqual(result.evalMetricsExported, 0);
733
+ assert.strictEqual(result.batches, 0);
734
+ });
735
+ it('exports spans and evals in two phases', async () => {
736
+ const fetchCalls = [];
737
+ globalThis.fetch = mock.fn(async (url, init) => {
738
+ fetchCalls.push({
739
+ url: url.toString(),
740
+ body: init?.body,
741
+ });
742
+ return new Response(JSON.stringify({ status: 'ok' }), { status: 200 });
743
+ });
744
+ const evaluations = [{
745
+ timestamp: '2024-01-15T10:00:00.000Z',
746
+ evaluationName: 'test',
747
+ scoreValue: 0.9,
748
+ }];
749
+ const result = await exportToDatadog(evaluations, baseConfig);
750
+ assert.strictEqual(result.success, true);
751
+ assert.strictEqual(fetchCalls.length, 2);
752
+ assert.ok(fetchCalls[0].url.includes('/trace/spans'));
753
+ assert.ok(fetchCalls[1].url.includes('/eval-metric'));
754
+ });
755
+ it('only exports spans when exportEvals is false', async () => {
756
+ const fetchCalls = [];
757
+ globalThis.fetch = mock.fn(async (url) => {
758
+ fetchCalls.push(url.toString());
759
+ return new Response(JSON.stringify({ status: 'ok' }), { status: 200 });
760
+ });
761
+ const config = { ...baseConfig, exportEvals: false };
762
+ const evaluations = [{
763
+ timestamp: '2024-01-15T10:00:00.000Z',
764
+ evaluationName: 'test',
765
+ scoreValue: 0.9,
766
+ }];
767
+ const result = await exportToDatadog(evaluations, config);
768
+ assert.strictEqual(result.success, true);
769
+ assert.strictEqual(fetchCalls.length, 1);
770
+ assert.ok(fetchCalls[0].includes('/trace/spans'));
771
+ });
772
+ it('only exports evals when exportSpans is false', async () => {
773
+ const fetchCalls = [];
774
+ globalThis.fetch = mock.fn(async (url) => {
775
+ fetchCalls.push(url.toString());
776
+ return new Response(JSON.stringify({ status: 'ok' }), { status: 200 });
777
+ });
778
+ const config = { ...baseConfig, exportSpans: false };
779
+ const evaluations = [{
780
+ timestamp: '2024-01-15T10:00:00.000Z',
781
+ evaluationName: 'test',
782
+ scoreValue: 0.9,
783
+ }];
784
+ const result = await exportToDatadog(evaluations, config);
785
+ assert.strictEqual(result.success, true);
786
+ assert.strictEqual(fetchCalls.length, 1);
787
+ assert.ok(fetchCalls[0].includes('/eval-metric'));
788
+ });
789
+ it('uses DD-API-KEY header', async () => {
790
+ let capturedHeaders = {};
791
+ globalThis.fetch = mock.fn(async (_url, init) => {
792
+ capturedHeaders = Object.fromEntries(Object.entries(init?.headers || {}));
793
+ return new Response(JSON.stringify({ status: 'ok' }), { status: 200 });
794
+ });
795
+ const evaluations = [{
796
+ timestamp: '2024-01-15T10:00:00.000Z',
797
+ evaluationName: 'test',
798
+ scoreValue: 0.9,
799
+ }];
800
+ await exportToDatadog(evaluations, baseConfig);
801
+ assert.strictEqual(capturedHeaders['DD-API-KEY'], baseConfig.apiKey);
802
+ });
803
+ it('handles HTTP errors gracefully', async () => {
804
+ globalThis.fetch = mock.fn(async () => {
805
+ return new Response('Internal Server Error', { status: 500 });
806
+ });
807
+ const evaluations = [{
808
+ timestamp: '2024-01-15T10:00:00.000Z',
809
+ evaluationName: 'test',
810
+ scoreValue: 0.9,
811
+ }];
812
+ const result = await exportToDatadog(evaluations, baseConfig);
813
+ assert.strictEqual(result.success, false);
814
+ assert.ok(result.errors);
815
+ assert.ok(result.errors.some(e => e.includes('500')));
816
+ });
817
+ it('handles network errors gracefully', async () => {
818
+ globalThis.fetch = mock.fn(async () => {
819
+ throw new Error('Network error');
820
+ });
821
+ const evaluations = [{
822
+ timestamp: '2024-01-15T10:00:00.000Z',
823
+ evaluationName: 'test',
824
+ scoreValue: 0.9,
825
+ }];
826
+ const result = await exportToDatadog(evaluations, baseConfig);
827
+ assert.strictEqual(result.success, false);
828
+ assert.ok(result.errors);
829
+ });
830
+ it('batches large exports', async () => {
831
+ let batchCount = 0;
832
+ globalThis.fetch = mock.fn(async () => {
833
+ batchCount++;
834
+ return new Response(JSON.stringify({ status: 'ok' }), { status: 200 });
835
+ });
836
+ const config = { ...baseConfig, batchSize: 10 };
837
+ const evaluations = Array(25).fill(null).map((_, i) => ({
838
+ timestamp: '2024-01-15T10:00:00.000Z',
839
+ evaluationName: `test-${i}`,
840
+ scoreValue: 0.9,
841
+ }));
842
+ const result = await exportToDatadog(evaluations, config);
843
+ // 25 evals / 10 batch = 3 batches * 2 phases (spans + evals) = 6 calls
844
+ assert.strictEqual(batchCount, 6);
845
+ assert.strictEqual(result.batches, 3);
846
+ });
847
+ it('reports duration in milliseconds', async () => {
848
+ globalThis.fetch = mock.fn(async () => {
849
+ return new Response(JSON.stringify({ status: 'ok' }), { status: 200 });
850
+ });
851
+ const evaluations = [{
852
+ timestamp: '2024-01-15T10:00:00.000Z',
853
+ evaluationName: 'test',
854
+ scoreValue: 0.9,
855
+ }];
856
+ const result = await exportToDatadog(evaluations, baseConfig);
857
+ assert.ok(result.durationMs >= 0);
858
+ assert.strictEqual(typeof result.durationMs, 'number');
859
+ });
860
+ describe('site-specific endpoints', () => {
861
+ const sites = [
862
+ 'datadoghq.com',
863
+ 'us3.datadoghq.com',
864
+ 'us5.datadoghq.com',
865
+ 'datadoghq.eu',
866
+ 'ap1.datadoghq.com',
867
+ ];
868
+ for (const site of sites) {
869
+ it(`uses correct endpoints for ${site}`, async () => {
870
+ const usedUrls = [];
871
+ globalThis.fetch = mock.fn(async (url) => {
872
+ usedUrls.push(url.toString());
873
+ return new Response(JSON.stringify({ status: 'ok' }), { status: 200 });
874
+ });
875
+ const config = { ...baseConfig, site };
876
+ const evaluations = [{
877
+ timestamp: '2024-01-15T10:00:00.000Z',
878
+ evaluationName: 'test',
879
+ scoreValue: 0.9,
880
+ }];
881
+ await exportToDatadog(evaluations, config);
882
+ const expectedEndpoints = getDatadogEndpoints(site);
883
+ assert.ok(usedUrls.includes(expectedEndpoints.spans));
884
+ assert.ok(usedUrls.includes(expectedEndpoints.evals));
885
+ });
886
+ }
887
+ });
888
+ });
889
+ });
890
+ //# sourceMappingURL=datadog-export.test.js.map