observability-toolkit 1.8.5 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. package/README.md +126 -5
  2. package/dist/backends/index.d.ts +163 -0
  3. package/dist/backends/index.d.ts.map +1 -1
  4. package/dist/backends/index.js +57 -0
  5. package/dist/backends/index.js.map +1 -1
  6. package/dist/backends/index.test.js +55 -1
  7. package/dist/backends/index.test.js.map +1 -1
  8. package/dist/backends/local-jsonl.d.ts +30 -0
  9. package/dist/backends/local-jsonl.d.ts.map +1 -1
  10. package/dist/backends/local-jsonl.js +912 -550
  11. package/dist/backends/local-jsonl.js.map +1 -1
  12. package/dist/backends/signoz-api-rate-limiter.test.js +2 -1
  13. package/dist/backends/signoz-api-rate-limiter.test.js.map +1 -1
  14. package/dist/backends/signoz-api.d.ts +16 -2
  15. package/dist/backends/signoz-api.d.ts.map +1 -1
  16. package/dist/backends/signoz-api.js +650 -534
  17. package/dist/backends/signoz-api.js.map +1 -1
  18. package/dist/backends/signoz-api.test.js +6 -5
  19. package/dist/backends/signoz-api.test.js.map +1 -1
  20. package/dist/lib/agent-as-judge.d.ts +388 -0
  21. package/dist/lib/agent-as-judge.d.ts.map +1 -0
  22. package/dist/lib/agent-as-judge.js +740 -0
  23. package/dist/lib/agent-as-judge.js.map +1 -0
  24. package/dist/lib/agent-as-judge.test.d.ts +5 -0
  25. package/dist/lib/agent-as-judge.test.d.ts.map +1 -0
  26. package/dist/lib/agent-as-judge.test.js +816 -0
  27. package/dist/lib/agent-as-judge.test.js.map +1 -0
  28. package/dist/lib/cache.d.ts +15 -2
  29. package/dist/lib/cache.d.ts.map +1 -1
  30. package/dist/lib/cache.js +16 -2
  31. package/dist/lib/cache.js.map +1 -1
  32. package/dist/lib/circuit-breaker.d.ts +18 -0
  33. package/dist/lib/circuit-breaker.d.ts.map +1 -1
  34. package/dist/lib/circuit-breaker.js +41 -8
  35. package/dist/lib/circuit-breaker.js.map +1 -1
  36. package/dist/lib/confident-export.d.ts +101 -0
  37. package/dist/lib/confident-export.d.ts.map +1 -0
  38. package/dist/lib/confident-export.js +393 -0
  39. package/dist/lib/confident-export.js.map +1 -0
  40. package/dist/lib/confident-export.test.d.ts +7 -0
  41. package/dist/lib/confident-export.test.d.ts.map +1 -0
  42. package/dist/lib/confident-export.test.js +835 -0
  43. package/dist/lib/confident-export.test.js.map +1 -0
  44. package/dist/lib/constants.d.ts +75 -0
  45. package/dist/lib/constants.d.ts.map +1 -1
  46. package/dist/lib/constants.js +104 -1
  47. package/dist/lib/constants.js.map +1 -1
  48. package/dist/lib/datadog-export.d.ts +156 -0
  49. package/dist/lib/datadog-export.d.ts.map +1 -0
  50. package/dist/lib/datadog-export.js +464 -0
  51. package/dist/lib/datadog-export.js.map +1 -0
  52. package/dist/lib/datadog-export.test.d.ts +14 -0
  53. package/dist/lib/datadog-export.test.d.ts.map +1 -0
  54. package/dist/lib/datadog-export.test.js +890 -0
  55. package/dist/lib/datadog-export.test.js.map +1 -0
  56. package/dist/lib/evaluation-hooks.d.ts +49 -0
  57. package/dist/lib/evaluation-hooks.d.ts.map +1 -0
  58. package/dist/lib/evaluation-hooks.js +488 -0
  59. package/dist/lib/evaluation-hooks.js.map +1 -0
  60. package/dist/lib/evaluation-hooks.test.d.ts +8 -0
  61. package/dist/lib/evaluation-hooks.test.d.ts.map +1 -0
  62. package/dist/lib/evaluation-hooks.test.js +624 -0
  63. package/dist/lib/evaluation-hooks.test.js.map +1 -0
  64. package/dist/lib/export-utils.d.ts +99 -0
  65. package/dist/lib/export-utils.d.ts.map +1 -0
  66. package/dist/lib/export-utils.js +238 -0
  67. package/dist/lib/export-utils.js.map +1 -0
  68. package/dist/lib/export-utils.test.d.ts +5 -0
  69. package/dist/lib/export-utils.test.d.ts.map +1 -0
  70. package/dist/lib/export-utils.test.js +193 -0
  71. package/dist/lib/export-utils.test.js.map +1 -0
  72. package/dist/lib/file-utils.d.ts +17 -2
  73. package/dist/lib/file-utils.d.ts.map +1 -1
  74. package/dist/lib/file-utils.js +24 -5
  75. package/dist/lib/file-utils.js.map +1 -1
  76. package/dist/lib/file-utils.test.js +30 -0
  77. package/dist/lib/file-utils.test.js.map +1 -1
  78. package/dist/lib/histogram.d.ts +119 -0
  79. package/dist/lib/histogram.d.ts.map +1 -0
  80. package/dist/lib/histogram.js +202 -0
  81. package/dist/lib/histogram.js.map +1 -0
  82. package/dist/lib/histogram.test.d.ts +5 -0
  83. package/dist/lib/histogram.test.d.ts.map +1 -0
  84. package/dist/lib/histogram.test.js +381 -0
  85. package/dist/lib/histogram.test.js.map +1 -0
  86. package/dist/lib/instrumentation.d.ts +153 -0
  87. package/dist/lib/instrumentation.d.ts.map +1 -0
  88. package/dist/lib/instrumentation.integration.test.d.ts +2 -0
  89. package/dist/lib/instrumentation.integration.test.d.ts.map +1 -0
  90. package/dist/lib/instrumentation.integration.test.js +589 -0
  91. package/dist/lib/instrumentation.integration.test.js.map +1 -0
  92. package/dist/lib/instrumentation.js +520 -0
  93. package/dist/lib/instrumentation.js.map +1 -0
  94. package/dist/lib/instrumentation.test.d.ts +2 -0
  95. package/dist/lib/instrumentation.test.d.ts.map +1 -0
  96. package/dist/lib/instrumentation.test.js +821 -0
  97. package/dist/lib/instrumentation.test.js.map +1 -0
  98. package/dist/lib/langfuse-export.d.ts +125 -0
  99. package/dist/lib/langfuse-export.d.ts.map +1 -0
  100. package/dist/lib/langfuse-export.js +367 -0
  101. package/dist/lib/langfuse-export.js.map +1 -0
  102. package/dist/lib/langfuse-export.test.d.ts +7 -0
  103. package/dist/lib/langfuse-export.test.d.ts.map +1 -0
  104. package/dist/lib/langfuse-export.test.js +1007 -0
  105. package/dist/lib/langfuse-export.test.js.map +1 -0
  106. package/dist/lib/llm-as-judge.d.ts +657 -0
  107. package/dist/lib/llm-as-judge.d.ts.map +1 -0
  108. package/dist/lib/llm-as-judge.js +1397 -0
  109. package/dist/lib/llm-as-judge.js.map +1 -0
  110. package/dist/lib/llm-as-judge.test.d.ts +2 -0
  111. package/dist/lib/llm-as-judge.test.d.ts.map +1 -0
  112. package/dist/lib/llm-as-judge.test.js +2409 -0
  113. package/dist/lib/llm-as-judge.test.js.map +1 -0
  114. package/dist/lib/logger.d.ts +1 -1
  115. package/dist/lib/logger.d.ts.map +1 -1
  116. package/dist/lib/logger.js.map +1 -1
  117. package/dist/lib/metrics.d.ts +62 -0
  118. package/dist/lib/metrics.d.ts.map +1 -0
  119. package/dist/lib/metrics.js +166 -0
  120. package/dist/lib/metrics.js.map +1 -0
  121. package/dist/lib/metrics.test.d.ts +5 -0
  122. package/dist/lib/metrics.test.d.ts.map +1 -0
  123. package/dist/lib/metrics.test.js +189 -0
  124. package/dist/lib/metrics.test.js.map +1 -0
  125. package/dist/lib/parse-stats.d.ts +119 -0
  126. package/dist/lib/parse-stats.d.ts.map +1 -0
  127. package/dist/lib/parse-stats.js +206 -0
  128. package/dist/lib/parse-stats.js.map +1 -0
  129. package/dist/lib/parse-stats.test.d.ts +5 -0
  130. package/dist/lib/parse-stats.test.d.ts.map +1 -0
  131. package/dist/lib/parse-stats.test.js +283 -0
  132. package/dist/lib/parse-stats.test.js.map +1 -0
  133. package/dist/lib/phoenix-export.d.ts +109 -0
  134. package/dist/lib/phoenix-export.d.ts.map +1 -0
  135. package/dist/lib/phoenix-export.js +429 -0
  136. package/dist/lib/phoenix-export.js.map +1 -0
  137. package/dist/lib/phoenix-export.test.d.ts +11 -0
  138. package/dist/lib/phoenix-export.test.d.ts.map +1 -0
  139. package/dist/lib/phoenix-export.test.js +725 -0
  140. package/dist/lib/phoenix-export.test.js.map +1 -0
  141. package/dist/lib/server-utils.d.ts +6 -1
  142. package/dist/lib/server-utils.d.ts.map +1 -1
  143. package/dist/lib/server-utils.js +9 -1
  144. package/dist/lib/server-utils.js.map +1 -1
  145. package/dist/lib/shared-schemas.d.ts +6 -0
  146. package/dist/lib/shared-schemas.d.ts.map +1 -1
  147. package/dist/lib/shared-schemas.js +11 -4
  148. package/dist/lib/shared-schemas.js.map +1 -1
  149. package/dist/lib/verification-events.d.ts +100 -0
  150. package/dist/lib/verification-events.d.ts.map +1 -0
  151. package/dist/lib/verification-events.js +162 -0
  152. package/dist/lib/verification-events.js.map +1 -0
  153. package/dist/lib/verification-events.test.d.ts +5 -0
  154. package/dist/lib/verification-events.test.d.ts.map +1 -0
  155. package/dist/lib/verification-events.test.js +193 -0
  156. package/dist/lib/verification-events.test.js.map +1 -0
  157. package/dist/server.d.ts +5 -0
  158. package/dist/server.d.ts.map +1 -1
  159. package/dist/server.js +77 -21
  160. package/dist/server.js.map +1 -1
  161. package/dist/tools/context-stats.d.ts.map +1 -1
  162. package/dist/tools/context-stats.js +6 -8
  163. package/dist/tools/context-stats.js.map +1 -1
  164. package/dist/tools/export-confident.d.ts +145 -0
  165. package/dist/tools/export-confident.d.ts.map +1 -0
  166. package/dist/tools/export-confident.js +134 -0
  167. package/dist/tools/export-confident.js.map +1 -0
  168. package/dist/tools/export-confident.test.d.ts +7 -0
  169. package/dist/tools/export-confident.test.d.ts.map +1 -0
  170. package/dist/tools/export-confident.test.js +332 -0
  171. package/dist/tools/export-confident.test.js.map +1 -0
  172. package/dist/tools/export-datadog.d.ts +160 -0
  173. package/dist/tools/export-datadog.d.ts.map +1 -0
  174. package/dist/tools/export-datadog.js +160 -0
  175. package/dist/tools/export-datadog.js.map +1 -0
  176. package/dist/tools/export-datadog.test.d.ts +8 -0
  177. package/dist/tools/export-datadog.test.d.ts.map +1 -0
  178. package/dist/tools/export-datadog.test.js +419 -0
  179. package/dist/tools/export-datadog.test.js.map +1 -0
  180. package/dist/tools/export-langfuse.d.ts +137 -0
  181. package/dist/tools/export-langfuse.d.ts.map +1 -0
  182. package/dist/tools/export-langfuse.js +131 -0
  183. package/dist/tools/export-langfuse.js.map +1 -0
  184. package/dist/tools/export-langfuse.test.d.ts +7 -0
  185. package/dist/tools/export-langfuse.test.d.ts.map +1 -0
  186. package/dist/tools/export-langfuse.test.js +303 -0
  187. package/dist/tools/export-langfuse.test.js.map +1 -0
  188. package/dist/tools/export-phoenix.d.ts +145 -0
  189. package/dist/tools/export-phoenix.d.ts.map +1 -0
  190. package/dist/tools/export-phoenix.js +135 -0
  191. package/dist/tools/export-phoenix.js.map +1 -0
  192. package/dist/tools/export-phoenix.test.d.ts +7 -0
  193. package/dist/tools/export-phoenix.test.d.ts.map +1 -0
  194. package/dist/tools/export-phoenix.test.js +316 -0
  195. package/dist/tools/export-phoenix.test.js.map +1 -0
  196. package/dist/tools/health-check.d.ts +26 -0
  197. package/dist/tools/health-check.d.ts.map +1 -1
  198. package/dist/tools/health-check.js +36 -7
  199. package/dist/tools/health-check.js.map +1 -1
  200. package/dist/tools/index.d.ts +6 -0
  201. package/dist/tools/index.d.ts.map +1 -1
  202. package/dist/tools/index.js +6 -0
  203. package/dist/tools/index.js.map +1 -1
  204. package/dist/tools/inject-evaluations.d.ts +1315 -0
  205. package/dist/tools/inject-evaluations.d.ts.map +1 -0
  206. package/dist/tools/inject-evaluations.js +121 -0
  207. package/dist/tools/inject-evaluations.js.map +1 -0
  208. package/dist/tools/inject-evaluations.test.d.ts +5 -0
  209. package/dist/tools/inject-evaluations.test.d.ts.map +1 -0
  210. package/dist/tools/inject-evaluations.test.js +359 -0
  211. package/dist/tools/inject-evaluations.test.js.map +1 -0
  212. package/dist/tools/query-evaluations.d.ts +25 -4
  213. package/dist/tools/query-evaluations.d.ts.map +1 -1
  214. package/dist/tools/query-evaluations.js +10 -0
  215. package/dist/tools/query-evaluations.js.map +1 -1
  216. package/dist/tools/query-llm-events.js +2 -2
  217. package/dist/tools/query-llm-events.js.map +1 -1
  218. package/dist/tools/query-logs.d.ts +8 -8
  219. package/dist/tools/query-logs.js +3 -3
  220. package/dist/tools/query-logs.js.map +1 -1
  221. package/dist/tools/query-metrics.d.ts +4 -4
  222. package/dist/tools/query-metrics.js +2 -2
  223. package/dist/tools/query-metrics.js.map +1 -1
  224. package/dist/tools/query-traces.d.ts +8 -8
  225. package/dist/tools/query-verifications.d.ts +111 -0
  226. package/dist/tools/query-verifications.d.ts.map +1 -0
  227. package/dist/tools/query-verifications.js +101 -0
  228. package/dist/tools/query-verifications.js.map +1 -0
  229. package/dist/tools/query-verifications.test.d.ts +5 -0
  230. package/dist/tools/query-verifications.test.d.ts.map +1 -0
  231. package/dist/tools/query-verifications.test.js +156 -0
  232. package/dist/tools/query-verifications.test.js.map +1 -0
  233. package/dist/types/evaluation-hooks.d.ts +176 -0
  234. package/dist/types/evaluation-hooks.d.ts.map +1 -0
  235. package/dist/types/evaluation-hooks.js +49 -0
  236. package/dist/types/evaluation-hooks.js.map +1 -0
  237. package/package.json +10 -2
@@ -0,0 +1,1007 @@
1
+ /**
2
+ * Tests for langfuse-export.ts
3
+ *
4
+ * Covers config validation, auth header creation, OTLP conversion, and batching.
5
+ */
6
+ import { describe, it, beforeEach, afterEach, mock } from 'node:test';
7
+ import assert from 'node:assert';
8
+ import { validateLangfuseUrl, validateLangfuseConfig, createLangfuseAuthHeader, evaluationsToOTLPTraces, exportToLangfuse, } from './langfuse-export.js';
9
+ import { HttpStatus } from './constants.js';
10
+ describe('langfuse-export', () => {
11
+ describe('validateLangfuseUrl', () => {
12
+ describe('valid URLs', () => {
13
+ it('accepts valid HTTPS URL', () => {
14
+ const url = 'https://cloud.langfuse.com';
15
+ assert.strictEqual(validateLangfuseUrl(url), 'https://cloud.langfuse.com');
16
+ });
17
+ it('accepts HTTPS URL with port', () => {
18
+ const url = 'https://langfuse.example.com:8443';
19
+ assert.strictEqual(validateLangfuseUrl(url), 'https://langfuse.example.com:8443');
20
+ });
21
+ it('preserves path for self-hosted Langfuse on subpath', () => {
22
+ // M4 FIX: Support Langfuse hosted on subpaths like /langfuse or /api
23
+ const url = 'https://example.com/langfuse';
24
+ assert.strictEqual(validateLangfuseUrl(url), 'https://example.com/langfuse');
25
+ });
26
+ it('preserves API path prefix', () => {
27
+ const url = 'https://cloud.langfuse.com/api/v1/traces';
28
+ assert.strictEqual(validateLangfuseUrl(url), 'https://cloud.langfuse.com/api/v1/traces');
29
+ });
30
+ it('strips trailing slash from path', () => {
31
+ const url = 'https://example.com/langfuse/';
32
+ assert.strictEqual(validateLangfuseUrl(url), 'https://example.com/langfuse');
33
+ });
34
+ it('strips query params from URL', () => {
35
+ const url = 'https://cloud.langfuse.com?key=value';
36
+ assert.strictEqual(validateLangfuseUrl(url), 'https://cloud.langfuse.com');
37
+ });
38
+ it('strips query params but preserves path', () => {
39
+ const url = 'https://example.com/langfuse?key=value';
40
+ assert.strictEqual(validateLangfuseUrl(url), 'https://example.com/langfuse');
41
+ });
42
+ });
43
+ describe('SSRF protection - rejects localhost', () => {
44
+ it('rejects localhost', () => {
45
+ assert.strictEqual(validateLangfuseUrl('https://localhost'), '');
46
+ });
47
+ it('rejects 127.0.0.1', () => {
48
+ assert.strictEqual(validateLangfuseUrl('https://127.0.0.1'), '');
49
+ });
50
+ it('rejects 127.x.x.x variants', () => {
51
+ assert.strictEqual(validateLangfuseUrl('https://127.100.200.1'), '');
52
+ });
53
+ it('rejects ::1 IPv6 localhost', () => {
54
+ assert.strictEqual(validateLangfuseUrl('https://[::1]'), '');
55
+ });
56
+ it('rejects 0.0.0.0', () => {
57
+ assert.strictEqual(validateLangfuseUrl('https://0.0.0.0'), '');
58
+ });
59
+ it('rejects .localhost TLD', () => {
60
+ assert.strictEqual(validateLangfuseUrl('https://app.localhost'), '');
61
+ });
62
+ });
63
+ describe('SSRF protection - rejects cloud metadata', () => {
64
+ it('rejects AWS/GCP/Azure metadata IP', () => {
65
+ assert.strictEqual(validateLangfuseUrl('https://169.254.169.254'), '');
66
+ });
67
+ it('rejects link-local range', () => {
68
+ assert.strictEqual(validateLangfuseUrl('https://169.254.0.1'), '');
69
+ });
70
+ });
71
+ describe('SSRF protection - rejects private networks', () => {
72
+ it('rejects 10.x.x.x', () => {
73
+ assert.strictEqual(validateLangfuseUrl('https://10.0.0.1'), '');
74
+ });
75
+ it('rejects 192.168.x.x', () => {
76
+ assert.strictEqual(validateLangfuseUrl('https://192.168.1.1'), '');
77
+ });
78
+ it('rejects 172.16-31.x.x', () => {
79
+ assert.strictEqual(validateLangfuseUrl('https://172.16.0.1'), '');
80
+ assert.strictEqual(validateLangfuseUrl('https://172.31.255.255'), '');
81
+ });
82
+ it('rejects fc/fd IPv6 ULA', () => {
83
+ assert.strictEqual(validateLangfuseUrl('https://[fc00::1]'), '');
84
+ assert.strictEqual(validateLangfuseUrl('https://[fd00::1]'), '');
85
+ });
86
+ it('rejects fe80 IPv6 link-local', () => {
87
+ assert.strictEqual(validateLangfuseUrl('https://[fe80::1]'), '');
88
+ });
89
+ });
90
+ describe('SSRF protection - rejects reserved domains', () => {
91
+ it('rejects .local TLD', () => {
92
+ assert.strictEqual(validateLangfuseUrl('https://server.local'), '');
93
+ });
94
+ it('rejects .internal TLD', () => {
95
+ assert.strictEqual(validateLangfuseUrl('https://api.internal'), '');
96
+ });
97
+ it('rejects .localdomain TLD', () => {
98
+ assert.strictEqual(validateLangfuseUrl('https://host.localdomain'), '');
99
+ });
100
+ it('rejects .home.arpa TLD', () => {
101
+ assert.strictEqual(validateLangfuseUrl('https://server.home.arpa'), '');
102
+ });
103
+ });
104
+ describe('SSRF protection - rejects HTTP', () => {
105
+ it('rejects HTTP protocol', () => {
106
+ assert.strictEqual(validateLangfuseUrl('http://cloud.langfuse.com'), '');
107
+ });
108
+ it('rejects file protocol', () => {
109
+ assert.strictEqual(validateLangfuseUrl('file:///etc/passwd'), '');
110
+ });
111
+ });
112
+ describe('edge cases', () => {
113
+ it('returns empty for empty string', () => {
114
+ assert.strictEqual(validateLangfuseUrl(''), '');
115
+ });
116
+ it('returns empty for invalid URL', () => {
117
+ assert.strictEqual(validateLangfuseUrl('not-a-url'), '');
118
+ });
119
+ });
120
+ });
121
+ describe('validateLangfuseConfig', () => {
122
+ const originalEnv = { ...process.env };
123
+ beforeEach(() => {
124
+ delete process.env.LANGFUSE_ENDPOINT;
125
+ delete process.env.LANGFUSE_PUBLIC_KEY;
126
+ delete process.env.LANGFUSE_SECRET_KEY;
127
+ delete process.env.LANGFUSE_BATCH_SIZE;
128
+ delete process.env.LANGFUSE_TIMEOUT_MS;
129
+ });
130
+ afterEach(() => {
131
+ Object.assign(process.env, originalEnv);
132
+ });
133
+ it('throws when endpoint not configured', () => {
134
+ assert.throws(() => validateLangfuseConfig(), /endpoint not configured/);
135
+ });
136
+ it('throws when endpoint is invalid', () => {
137
+ assert.throws(() => validateLangfuseConfig({ endpoint: 'http://localhost' }), /endpoint not configured or invalid/);
138
+ });
139
+ it('throws when public key not configured', () => {
140
+ assert.throws(() => validateLangfuseConfig({ endpoint: 'https://cloud.langfuse.com' }), /public key not configured/);
141
+ });
142
+ it('throws when secret key not configured', () => {
143
+ assert.throws(() => validateLangfuseConfig({
144
+ endpoint: 'https://cloud.langfuse.com',
145
+ publicKey: 'pk-test',
146
+ }), /secret key not configured/);
147
+ });
148
+ it('returns valid config with all required fields', () => {
149
+ const config = validateLangfuseConfig({
150
+ endpoint: 'https://cloud.langfuse.com',
151
+ publicKey: 'pk-test',
152
+ secretKey: 'sk-test',
153
+ });
154
+ assert.strictEqual(config.endpoint, 'https://cloud.langfuse.com');
155
+ assert.strictEqual(config.publicKey, 'pk-test');
156
+ assert.strictEqual(config.secretKey, 'sk-test');
157
+ assert.strictEqual(config.batchSize, 100); // default
158
+ assert.strictEqual(config.timeoutMs, 30000); // default
159
+ });
160
+ it('uses custom batch size', () => {
161
+ const config = validateLangfuseConfig({
162
+ endpoint: 'https://cloud.langfuse.com',
163
+ publicKey: 'pk-test',
164
+ secretKey: 'sk-test',
165
+ batchSize: 50,
166
+ });
167
+ assert.strictEqual(config.batchSize, 50);
168
+ });
169
+ it('throws when batch size too small', () => {
170
+ assert.throws(() => validateLangfuseConfig({
171
+ endpoint: 'https://cloud.langfuse.com',
172
+ publicKey: 'pk-test',
173
+ secretKey: 'sk-test',
174
+ batchSize: 0,
175
+ }), /batch size must be between/);
176
+ });
177
+ it('throws when batch size too large', () => {
178
+ assert.throws(() => validateLangfuseConfig({
179
+ endpoint: 'https://cloud.langfuse.com',
180
+ publicKey: 'pk-test',
181
+ secretKey: 'sk-test',
182
+ batchSize: 1001,
183
+ }), /batch size must be between/);
184
+ });
185
+ it('throws when timeout too small', () => {
186
+ assert.throws(() => validateLangfuseConfig({
187
+ endpoint: 'https://cloud.langfuse.com',
188
+ publicKey: 'pk-test',
189
+ secretKey: 'sk-test',
190
+ timeoutMs: 500,
191
+ }), /timeout must be between/);
192
+ });
193
+ it('throws when timeout too large', () => {
194
+ assert.throws(() => validateLangfuseConfig({
195
+ endpoint: 'https://cloud.langfuse.com',
196
+ publicKey: 'pk-test',
197
+ secretKey: 'sk-test',
198
+ timeoutMs: 200000,
199
+ }), /timeout must be between/);
200
+ });
201
+ });
202
+ describe('createLangfuseAuthHeader', () => {
203
+ it('creates Basic auth header with base64 encoding', () => {
204
+ const header = createLangfuseAuthHeader('pk-test', 'sk-test');
205
+ assert.ok(header.startsWith('Basic '));
206
+ // Decode and verify
207
+ const encoded = header.slice(6);
208
+ const decoded = Buffer.from(encoded, 'base64').toString('utf-8');
209
+ assert.strictEqual(decoded, 'pk-test:sk-test');
210
+ });
211
+ it('handles special characters in keys', () => {
212
+ const header = createLangfuseAuthHeader('pk-test+special', 'sk-test=value');
213
+ const encoded = header.slice(6);
214
+ const decoded = Buffer.from(encoded, 'base64').toString('utf-8');
215
+ assert.strictEqual(decoded, 'pk-test+special:sk-test=value');
216
+ });
217
+ });
218
+ describe('evaluationsToOTLPTraces', () => {
219
+ const baseEvaluation = {
220
+ timestamp: '2024-01-15T10:00:00Z',
221
+ evaluationName: 'relevance',
222
+ scoreValue: 0.85,
223
+ scoreLabel: 'good',
224
+ scoreUnit: 'ratio_0_1',
225
+ explanation: 'The response was relevant',
226
+ evaluator: 'gpt-4',
227
+ evaluatorType: 'llm',
228
+ responseId: 'resp-123',
229
+ traceId: 'abc123def456',
230
+ sessionId: 'session-1',
231
+ };
232
+ it('converts single evaluation to OTLP trace', () => {
233
+ const result = evaluationsToOTLPTraces([baseEvaluation]);
234
+ assert.strictEqual(result.resourceSpans.length, 1);
235
+ assert.strictEqual(result.resourceSpans[0].scopeSpans.length, 1);
236
+ assert.strictEqual(result.resourceSpans[0].scopeSpans[0].spans.length, 1);
237
+ const span = result.resourceSpans[0].scopeSpans[0].spans[0];
238
+ assert.strictEqual(span.traceId, 'abc123def456');
239
+ assert.strictEqual(span.name, 'evaluation_export');
240
+ assert.ok(span.events);
241
+ assert.strictEqual(span.events.length, 1);
242
+ const event = span.events[0];
243
+ assert.strictEqual(event.name, 'gen_ai.evaluation.result');
244
+ assert.ok(event.attributes);
245
+ // Check evaluation attributes
246
+ const attrs = Object.fromEntries(event.attributes.map(a => [a.key, a.value.stringValue ?? a.value.doubleValue]));
247
+ assert.strictEqual(attrs['gen_ai.evaluation.name'], 'relevance');
248
+ assert.strictEqual(attrs['gen_ai.evaluation.score.value'], 0.85);
249
+ assert.strictEqual(attrs['gen_ai.evaluation.score.label'], 'good');
250
+ assert.strictEqual(attrs['gen_ai.evaluation.explanation'], 'The response was relevant');
251
+ });
252
+ it('groups multiple evaluations by traceId', () => {
253
+ const evaluations = [
254
+ { ...baseEvaluation, traceId: 'trace-1' },
255
+ { ...baseEvaluation, traceId: 'trace-1', evaluationName: 'quality' },
256
+ { ...baseEvaluation, traceId: 'trace-2' },
257
+ ];
258
+ const result = evaluationsToOTLPTraces(evaluations);
259
+ // Should have 2 spans (one per traceId)
260
+ const spans = result.resourceSpans[0].scopeSpans[0].spans;
261
+ assert.strictEqual(spans.length, 2);
262
+ // Find the trace-1 span
263
+ const trace1Span = spans.find(s => s.traceId === 'trace-1');
264
+ assert.ok(trace1Span);
265
+ assert.strictEqual(trace1Span.events?.length, 2); // Two evaluations for trace-1
266
+ // Find the trace-2 span
267
+ const trace2Span = spans.find(s => s.traceId === 'trace-2');
268
+ assert.ok(trace2Span);
269
+ assert.strictEqual(trace2Span.events?.length, 1);
270
+ });
271
+ it('generates traceId when not provided', () => {
272
+ const evaluation = { ...baseEvaluation };
273
+ delete evaluation.traceId;
274
+ const result = evaluationsToOTLPTraces([evaluation]);
275
+ const span = result.resourceSpans[0].scopeSpans[0].spans[0];
276
+ assert.ok(span.traceId);
277
+ assert.strictEqual(span.traceId.length, 32); // 32 hex chars
278
+ });
279
+ it('handles evaluations with missing optional fields', () => {
280
+ const minimalEvaluation = {
281
+ timestamp: '2024-01-15T10:00:00Z',
282
+ evaluationName: 'test',
283
+ };
284
+ const result = evaluationsToOTLPTraces([minimalEvaluation]);
285
+ const event = result.resourceSpans[0].scopeSpans[0].spans[0].events?.[0];
286
+ assert.ok(event);
287
+ assert.strictEqual(event.name, 'gen_ai.evaluation.result');
288
+ // Should only have the evaluation name attribute
289
+ const evalNameAttr = event.attributes?.find(a => a.key === 'gen_ai.evaluation.name');
290
+ assert.ok(evalNameAttr);
291
+ assert.strictEqual(evalNameAttr.value.stringValue, 'test');
292
+ });
293
+ it('includes resource attributes', () => {
294
+ const result = evaluationsToOTLPTraces([baseEvaluation]);
295
+ const resourceAttrs = result.resourceSpans[0].resource.attributes;
296
+ const serviceNameAttr = resourceAttrs.find(a => a.key === 'service.name');
297
+ assert.ok(serviceNameAttr);
298
+ assert.strictEqual(serviceNameAttr.value.stringValue, 'observability-toolkit');
299
+ });
300
+ it('handles empty evaluations array', () => {
301
+ const result = evaluationsToOTLPTraces([]);
302
+ assert.strictEqual(result.resourceSpans[0].scopeSpans[0].spans.length, 0);
303
+ });
304
+ // R4 BACKLOG: Extreme timestamp validation (year 3000+)
305
+ describe('extreme timestamp handling', () => {
306
+ it('accepts exactly year 2000 timestamp (minimum boundary)', () => {
307
+ // Year 2000 is exactly EARLIEST_VALID_TIMESTAMP_MS - should be VALID
308
+ const boundaryEval = {
309
+ timestamp: '2000-01-01T00:00:00.000Z',
310
+ evaluationName: 'test-boundary-min',
311
+ };
312
+ const result = evaluationsToOTLPTraces([boundaryEval]);
313
+ const span = result.resourceSpans[0].scopeSpans[0].spans[0];
314
+ // Year 2000 = 946684800000 ms from epoch
315
+ const startNano = BigInt(span.startTimeUnixNano);
316
+ const expectedMs = new Date('2000-01-01T00:00:00.000Z').getTime();
317
+ const expectedNs = BigInt(expectedMs) * BigInt(1_000_000);
318
+ assert.strictEqual(startNano, expectedNs, 'Year 2000 boundary should be precise, not fallback');
319
+ });
320
+ it('accepts exactly year 3000 timestamp (maximum boundary)', () => {
321
+ // Year 3000 is exactly LATEST_VALID_TIMESTAMP_MS - should be VALID
322
+ const extremeEval = {
323
+ timestamp: '3000-01-01T00:00:00Z',
324
+ evaluationName: 'test-boundary-max',
325
+ };
326
+ const result = evaluationsToOTLPTraces([extremeEval]);
327
+ const span = result.resourceSpans[0].scopeSpans[0].spans[0];
328
+ const event = span.events?.[0];
329
+ // Verify the conversion succeeds
330
+ assert.ok(span.startTimeUnixNano, 'startTimeUnixNano should exist');
331
+ assert.ok(event?.timeUnixNano, 'event timeUnixNano should exist');
332
+ // Year 3000 = ~32503680000000 ms from epoch
333
+ const startNano = BigInt(span.startTimeUnixNano);
334
+ const expectedMs = new Date('3000-01-01T00:00:00Z').getTime();
335
+ const expectedNs = BigInt(expectedMs) * BigInt(1_000_000);
336
+ assert.strictEqual(startNano, expectedNs, 'Year 3000 boundary should be precise, not fallback');
337
+ });
338
+ it('uses fallback for year 9999 timestamps (R4 validation)', () => {
339
+ // Year 9999 exceeds MAX_TIMESTAMP_MS (year 3000) - should use fallback
340
+ const maxEval = {
341
+ timestamp: '9999-12-31T23:59:59Z',
342
+ evaluationName: 'test-max-date',
343
+ };
344
+ const result = evaluationsToOTLPTraces([maxEval]);
345
+ const span = result.resourceSpans[0].scopeSpans[0].spans[0];
346
+ // Should use fallback time (Date.now), not the extreme timestamp
347
+ assert.ok(span.startTimeUnixNano);
348
+ const startNano = BigInt(span.startTimeUnixNano);
349
+ // Year 9999 would be ~253402300799000ms; fallback should be much smaller (near now)
350
+ const year9999Ms = new Date('9999-12-31T23:59:59Z').getTime();
351
+ const year9999Ns = BigInt(year9999Ms) * BigInt(1_000_000);
352
+ // Verify we did NOT use the extreme timestamp
353
+ assert.ok(startNano < year9999Ns, 'Should use fallback time, not year 9999 timestamp');
354
+ // Fallback should be within 1 day of now
355
+ const nowNs = BigInt(Date.now()) * BigInt(1_000_000);
356
+ const diff = startNano > nowNs ? startNano - nowNs : nowNs - startNano;
357
+ assert.ok(diff < BigInt(86400_000_000_000), 'Fallback should be within 1 day of now');
358
+ });
359
+ it('uses fallback for invalid date strings', () => {
360
+ const invalidEval = {
361
+ timestamp: 'not-a-valid-date',
362
+ evaluationName: 'test-invalid',
363
+ };
364
+ const result = evaluationsToOTLPTraces([invalidEval]);
365
+ const span = result.resourceSpans[0].scopeSpans[0].spans[0];
366
+ const event = span.events?.[0];
367
+ // Should fall back to current time, not crash
368
+ assert.ok(span.startTimeUnixNano);
369
+ assert.ok(event?.timeUnixNano);
370
+ // Fallback time should be reasonable (within last day)
371
+ const nowNs = BigInt(Date.now()) * BigInt(1_000_000);
372
+ const startNano = BigInt(span.startTimeUnixNano);
373
+ const diff = startNano - nowNs;
374
+ assert.ok(diff < BigInt(86400_000_000_000), 'Fallback should be within 1 day of now');
375
+ });
376
+ it('uses fallback for timestamps before year 2000 (R4 validation)', () => {
377
+ // Year 1999 is before MIN_TIMESTAMP_MS (year 2000) - should use fallback
378
+ const oldEval = {
379
+ timestamp: '1999-12-31T23:59:59Z',
380
+ evaluationName: 'test-old-date',
381
+ };
382
+ const result = evaluationsToOTLPTraces([oldEval]);
383
+ const span = result.resourceSpans[0].scopeSpans[0].spans[0];
384
+ // Should use fallback time (Date.now), not the old timestamp
385
+ assert.ok(span.startTimeUnixNano);
386
+ const startNano = BigInt(span.startTimeUnixNano);
387
+ // Year 1999 would be ~946598399000ms; fallback should be larger (near now)
388
+ const year1999Ms = new Date('1999-12-31T23:59:59Z').getTime();
389
+ const year1999Ns = BigInt(year1999Ms) * BigInt(1_000_000);
390
+ // Verify we did NOT use the old timestamp
391
+ assert.ok(startNano > year1999Ns, 'Should use fallback time, not year 1999 timestamp');
392
+ // Fallback should be within 1 day of now
393
+ const nowNs = BigInt(Date.now()) * BigInt(1_000_000);
394
+ const diff = startNano > nowNs ? startNano - nowNs : nowNs - startNano;
395
+ assert.ok(diff < BigInt(86400_000_000_000), 'Fallback should be within 1 day of now');
396
+ });
397
+ });
398
+ // H3 FIX: BigInt timestamp handling to prevent integer overflow
399
+ describe('BigInt timestamp handling', () => {
400
+ it('handles timestamps near MAX_SAFE_INTEGER without precision loss', () => {
401
+ // Far future timestamp: year 2100 = ~4102444800000 ms
402
+ const farFutureEval = {
403
+ timestamp: '2100-01-01T00:00:00Z',
404
+ evaluationName: 'test-future',
405
+ };
406
+ const result = evaluationsToOTLPTraces([farFutureEval]);
407
+ const span = result.resourceSpans[0].scopeSpans[0].spans[0];
408
+ const event = span.events?.[0];
409
+ // Verify nanosecond timestamps are valid strings
410
+ assert.ok(span.startTimeUnixNano, 'startTimeUnixNano should exist');
411
+ assert.ok(span.endTimeUnixNano, 'endTimeUnixNano should exist');
412
+ assert.ok(event?.timeUnixNano, 'event timeUnixNano should exist');
413
+ // Parse and verify precision (should be 19+ digits for ns timestamps)
414
+ const startNano = BigInt(span.startTimeUnixNano);
415
+ const eventNano = BigInt(event.timeUnixNano);
416
+ // Year 2100 in nanoseconds should be ~4.1e18 - well beyond MAX_SAFE_INTEGER
417
+ assert.ok(startNano > BigInt(Number.MAX_SAFE_INTEGER), 'Timestamp should exceed MAX_SAFE_INTEGER');
418
+ // Verify no precision loss by checking string representation
419
+ // Without BigInt, the result would have trailing zeros or incorrect digits
420
+ const expectedNs = BigInt(new Date('2100-01-01T00:00:00Z').getTime()) * BigInt(1_000_000);
421
+ assert.strictEqual(startNano, expectedNs, 'Nanosecond value should match expected');
422
+ assert.strictEqual(eventNano, expectedNs, 'Event timestamp should match');
423
+ });
424
+ it('handles current timestamps correctly', () => {
425
+ const currentEval = {
426
+ timestamp: '2026-02-01T10:00:00Z',
427
+ evaluationName: 'test-current',
428
+ };
429
+ const result = evaluationsToOTLPTraces([currentEval]);
430
+ const span = result.resourceSpans[0].scopeSpans[0].spans[0];
431
+ // Current timestamps also exceed MAX_SAFE_INTEGER when converted to ns
432
+ const startNano = BigInt(span.startTimeUnixNano);
433
+ const expectedMs = new Date('2026-02-01T10:00:00Z').getTime();
434
+ const expectedNs = BigInt(expectedMs) * BigInt(1_000_000);
435
+ assert.strictEqual(startNano, expectedNs, 'Current timestamp should be precise');
436
+ });
437
+ });
438
+ });
439
+ describe('exportToLangfuse', () => {
440
+ const mockConfig = {
441
+ endpoint: 'https://cloud.langfuse.com',
442
+ publicKey: 'pk-test',
443
+ secretKey: 'sk-test',
444
+ batchSize: 2,
445
+ timeoutMs: 5000,
446
+ };
447
+ const testEvaluations = [
448
+ { timestamp: '2024-01-15T10:00:00Z', evaluationName: 'eval1', scoreValue: 0.8 },
449
+ { timestamp: '2024-01-15T10:00:01Z', evaluationName: 'eval2', scoreValue: 0.9 },
450
+ { timestamp: '2024-01-15T10:00:02Z', evaluationName: 'eval3', scoreValue: 0.7 },
451
+ ];
452
+ it('batches evaluations correctly', async () => {
453
+ const fetchCalls = [];
454
+ // Mock global fetch
455
+ const originalFetch = globalThis.fetch;
456
+ globalThis.fetch = mock.fn(async (url, opts) => {
457
+ fetchCalls.push({ url: url.toString(), body: opts?.body });
458
+ return new Response('{}', { status: HttpStatus.OK });
459
+ });
460
+ try {
461
+ const result = await exportToLangfuse(testEvaluations, mockConfig);
462
+ assert.strictEqual(result.success, true);
463
+ assert.strictEqual(result.evaluationsExported, 3);
464
+ assert.strictEqual(result.batches, 2); // 3 evals / batch size 2 = 2 batches
465
+ assert.strictEqual(result.failed, 0);
466
+ assert.ok(result.durationMs >= 0);
467
+ // Verify fetch was called twice (2 batches)
468
+ assert.strictEqual(fetchCalls.length, 2);
469
+ assert.ok(fetchCalls[0].url.includes('/v1/traces'));
470
+ }
471
+ finally {
472
+ globalThis.fetch = originalFetch;
473
+ }
474
+ });
475
+ it('handles HTTP errors', async () => {
476
+ const originalFetch = globalThis.fetch;
477
+ globalThis.fetch = mock.fn(async () => {
478
+ return new Response('Unauthorized', { status: HttpStatus.UNAUTHORIZED });
479
+ });
480
+ try {
481
+ const result = await exportToLangfuse([testEvaluations[0]], mockConfig);
482
+ assert.strictEqual(result.success, false);
483
+ assert.strictEqual(result.evaluationsExported, 0);
484
+ assert.strictEqual(result.failed, 1);
485
+ assert.ok(result.errors);
486
+ assert.ok(result.errors[0].includes('HTTP 401'));
487
+ }
488
+ finally {
489
+ globalThis.fetch = originalFetch;
490
+ }
491
+ });
492
+ it('handles network errors', async () => {
493
+ const originalFetch = globalThis.fetch;
494
+ globalThis.fetch = mock.fn(async () => {
495
+ throw new Error('Network error');
496
+ });
497
+ try {
498
+ const result = await exportToLangfuse([testEvaluations[0]], mockConfig);
499
+ assert.strictEqual(result.success, false);
500
+ assert.strictEqual(result.failed, 1);
501
+ assert.ok(result.errors);
502
+ assert.ok(result.errors[0].includes('Network error'));
503
+ }
504
+ finally {
505
+ globalThis.fetch = originalFetch;
506
+ }
507
+ });
508
+ it('handles empty evaluations array', async () => {
509
+ const result = await exportToLangfuse([], mockConfig);
510
+ assert.strictEqual(result.success, true);
511
+ assert.strictEqual(result.evaluationsExported, 0);
512
+ assert.strictEqual(result.batches, 0);
513
+ assert.strictEqual(result.failed, 0);
514
+ });
515
+ it('includes correct authorization header', async () => {
516
+ let capturedHeaders = {};
517
+ const originalFetch = globalThis.fetch;
518
+ globalThis.fetch = mock.fn(async (_url, opts) => {
519
+ capturedHeaders = Object.fromEntries(Object.entries(opts?.headers || {}));
520
+ return new Response('{}', { status: HttpStatus.OK });
521
+ });
522
+ try {
523
+ await exportToLangfuse([testEvaluations[0]], mockConfig);
524
+ assert.ok(capturedHeaders['Authorization']);
525
+ assert.ok(capturedHeaders['Authorization'].startsWith('Basic '));
526
+ assert.strictEqual(capturedHeaders['Content-Type'], 'application/json');
527
+ }
528
+ finally {
529
+ globalThis.fetch = originalFetch;
530
+ }
531
+ });
532
+ it('handles partial batch failures', async () => {
533
+ let callCount = 0;
534
+ const originalFetch = globalThis.fetch;
535
+ globalThis.fetch = mock.fn(async () => {
536
+ callCount++;
537
+ if (callCount === 1) {
538
+ return new Response('{}', { status: HttpStatus.OK });
539
+ }
540
+ return new Response('Server Error', { status: HttpStatus.INTERNAL_SERVER_ERROR });
541
+ });
542
+ try {
543
+ const result = await exportToLangfuse(testEvaluations, {
544
+ ...mockConfig,
545
+ batchSize: 2,
546
+ });
547
+ // First batch of 2 succeeds, second batch of 1 fails
548
+ assert.strictEqual(result.success, false);
549
+ assert.strictEqual(result.evaluationsExported, 2);
550
+ assert.strictEqual(result.failed, 1);
551
+ assert.strictEqual(result.batches, 2);
552
+ }
553
+ finally {
554
+ globalThis.fetch = originalFetch;
555
+ }
556
+ });
557
+ // C2 FIX: Credential sanitization tests
558
+ describe('credential sanitization in error responses', () => {
559
+ it('sanitizes Basic auth tokens in error text', async () => {
560
+ const originalFetch = globalThis.fetch;
561
+ const originalConsoleError = console.error;
562
+ let loggedMessage = '';
563
+ console.error = (msg) => { loggedMessage = msg; };
564
+ globalThis.fetch = mock.fn(async () => {
565
+ return new Response('Invalid credentials: Basic cGstdGVzdDpzay10ZXN0', { status: HttpStatus.UNAUTHORIZED });
566
+ });
567
+ try {
568
+ await exportToLangfuse([testEvaluations[0]], mockConfig);
569
+ assert.ok(!loggedMessage.includes('cGstdGVzdDpzay10ZXN0'), 'Base64 credentials should be redacted');
570
+ assert.ok(loggedMessage.includes('[REDACTED]'), 'Should contain [REDACTED]');
571
+ }
572
+ finally {
573
+ globalThis.fetch = originalFetch;
574
+ console.error = originalConsoleError;
575
+ }
576
+ });
577
+ it('sanitizes pk-xxx:sk-xxx patterns in error text', async () => {
578
+ const originalFetch = globalThis.fetch;
579
+ const originalConsoleError = console.error;
580
+ let loggedMessage = '';
581
+ console.error = (msg) => { loggedMessage = msg; };
582
+ globalThis.fetch = mock.fn(async () => {
583
+ return new Response('Bad key: pk-mykey123:sk-mysecret456', { status: HttpStatus.UNAUTHORIZED });
584
+ });
585
+ try {
586
+ await exportToLangfuse([testEvaluations[0]], mockConfig);
587
+ assert.ok(!loggedMessage.includes('pk-mykey123'), 'Public key should be redacted');
588
+ assert.ok(!loggedMessage.includes('sk-mysecret456'), 'Secret key should be redacted');
589
+ assert.ok(loggedMessage.includes('[REDACTED]'), 'Should contain [REDACTED]');
590
+ }
591
+ finally {
592
+ globalThis.fetch = originalFetch;
593
+ console.error = originalConsoleError;
594
+ }
595
+ });
596
+ it('sanitizes Bearer tokens in error text', async () => {
597
+ const originalFetch = globalThis.fetch;
598
+ const originalConsoleError = console.error;
599
+ let loggedMessage = '';
600
+ console.error = (msg) => { loggedMessage = msg; };
601
+ globalThis.fetch = mock.fn(async () => {
602
+ return new Response('Invalid Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9', { status: HttpStatus.UNAUTHORIZED });
603
+ });
604
+ try {
605
+ await exportToLangfuse([testEvaluations[0]], mockConfig);
606
+ assert.ok(!loggedMessage.includes('eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9'), 'JWT should be redacted');
607
+ assert.ok(loggedMessage.includes('[REDACTED]'), 'Should contain [REDACTED]');
608
+ }
609
+ finally {
610
+ globalThis.fetch = originalFetch;
611
+ console.error = originalConsoleError;
612
+ }
613
+ });
614
+ });
615
+ // BACKLOG: Concurrent exports test
616
+ describe('concurrent exports', () => {
617
+ it('handles multiple simultaneous exports without interference', async () => {
618
+ const fetchCalls = [];
619
+ let batchCounter = 0;
620
+ const originalFetch = globalThis.fetch;
621
+ globalThis.fetch = mock.fn(async () => {
622
+ const batch = ++batchCounter;
623
+ fetchCalls.push({ batch, timestamp: Date.now() });
624
+ // Simulate network latency
625
+ await new Promise(resolve => setTimeout(resolve, 10));
626
+ return new Response('{}', { status: HttpStatus.OK });
627
+ });
628
+ try {
629
+ // Launch 3 concurrent exports
630
+ const [result1, result2, result3] = await Promise.all([
631
+ exportToLangfuse([testEvaluations[0]], { ...mockConfig, batchSize: 1 }),
632
+ exportToLangfuse([testEvaluations[1]], { ...mockConfig, batchSize: 1 }),
633
+ exportToLangfuse([testEvaluations[2]], { ...mockConfig, batchSize: 1 }),
634
+ ]);
635
+ // All exports should succeed
636
+ assert.strictEqual(result1.success, true);
637
+ assert.strictEqual(result2.success, true);
638
+ assert.strictEqual(result3.success, true);
639
+ // Total should be 3 fetches
640
+ assert.strictEqual(fetchCalls.length, 3);
641
+ // Combined exports should total 3 evaluations
642
+ const totalExported = result1.evaluationsExported + result2.evaluationsExported + result3.evaluationsExported;
643
+ assert.strictEqual(totalExported, 3);
644
+ }
645
+ finally {
646
+ globalThis.fetch = originalFetch;
647
+ }
648
+ });
649
+ it('prevents cascade failures in concurrent exports', async () => {
650
+ const originalFetch = globalThis.fetch;
651
+ globalThis.fetch = mock.fn(async (_url, options) => {
652
+ // Parse the body to identify which export this is by evaluation name
653
+ const body = options?.body;
654
+ const isSecondExport = body?.includes('eval2');
655
+ // Second export fails with 400 (client error, not retried)
656
+ if (isSecondExport) {
657
+ return new Response('Bad Request', { status: HttpStatus.BAD_REQUEST });
658
+ }
659
+ return new Response('{}', { status: HttpStatus.OK });
660
+ });
661
+ try {
662
+ const [result1, result2, result3] = await Promise.all([
663
+ exportToLangfuse([testEvaluations[0]], { ...mockConfig, batchSize: 1, timeoutMs: 1000 }),
664
+ exportToLangfuse([testEvaluations[1]], { ...mockConfig, batchSize: 1, timeoutMs: 1000 }),
665
+ exportToLangfuse([testEvaluations[2]], { ...mockConfig, batchSize: 1, timeoutMs: 1000 }),
666
+ ]);
667
+ // First and third exports should succeed, second should fail
668
+ // This verifies failures don't cascade to other concurrent exports
669
+ assert.ok(result1.success, 'First export should succeed');
670
+ assert.ok(!result2.success, 'Second export should fail (400 client error)');
671
+ assert.ok(result3.success, 'Third export should succeed');
672
+ }
673
+ finally {
674
+ globalThis.fetch = originalFetch;
675
+ }
676
+ });
677
+ });
678
+ // BACKLOG: Memory threshold behavior test
679
+ describe('memory protection', () => {
680
+ it('aborts export when memory exceeds threshold', async () => {
681
+ // Mock process.memoryUsage to return high heap usage
682
+ const originalMemoryUsage = process.memoryUsage;
683
+ let memoryCallCount = 0;
684
+ process.memoryUsage = (() => {
685
+ memoryCallCount++;
686
+ // After first batch, simulate high memory (700MB)
687
+ const heapUsed = memoryCallCount > 1 ? 700 * 1024 * 1024 : 100 * 1024 * 1024;
688
+ return {
689
+ heapUsed,
690
+ heapTotal: 1024 * 1024 * 1024,
691
+ external: 0,
692
+ arrayBuffers: 0,
693
+ rss: 0,
694
+ };
695
+ });
696
+ const originalFetch = globalThis.fetch;
697
+ globalThis.fetch = mock.fn(async () => {
698
+ return new Response('{}', { status: HttpStatus.OK });
699
+ });
700
+ try {
701
+ const manyEvaluations = Array.from({ length: 100 }, (_, i) => ({
702
+ timestamp: '2024-01-15T10:00:00Z',
703
+ evaluationName: `eval-${i}`,
704
+ scoreValue: 0.5,
705
+ }));
706
+ const result = await exportToLangfuse(manyEvaluations, {
707
+ ...mockConfig,
708
+ batchSize: 10, // 10 batches needed
709
+ });
710
+ // Should abort partway through
711
+ assert.strictEqual(result.success, false);
712
+ assert.ok(result.failed > 0, 'Should have some failed evaluations');
713
+ assert.ok(result.errors?.some(e => e.includes('Memory limit exceeded')));
714
+ // Should not export all 100
715
+ assert.ok(result.evaluationsExported < 100, 'Should abort before exporting all');
716
+ }
717
+ finally {
718
+ process.memoryUsage = originalMemoryUsage;
719
+ globalThis.fetch = originalFetch;
720
+ }
721
+ });
722
+ it('logs warning when memory is high but continues', async () => {
723
+ const warnings = [];
724
+ const originalWarn = console.warn;
725
+ console.warn = (msg) => { warnings.push(msg); };
726
+ // Mock process.memoryUsage to return moderately high usage (450MB)
727
+ const originalMemoryUsage = process.memoryUsage;
728
+ process.memoryUsage = (() => ({
729
+ heapUsed: 450 * 1024 * 1024, // Above 400MB warning threshold
730
+ heapTotal: 1024 * 1024 * 1024,
731
+ external: 0,
732
+ arrayBuffers: 0,
733
+ rss: 0,
734
+ }));
735
+ const originalFetch = globalThis.fetch;
736
+ globalThis.fetch = mock.fn(async () => {
737
+ return new Response('{}', { status: HttpStatus.OK });
738
+ });
739
+ try {
740
+ const evaluations = Array.from({ length: 10 }, (_, i) => ({
741
+ timestamp: '2024-01-15T10:00:00Z',
742
+ evaluationName: `eval-${i}`,
743
+ scoreValue: 0.5,
744
+ }));
745
+ const result = await exportToLangfuse(evaluations, {
746
+ ...mockConfig,
747
+ batchSize: 10,
748
+ });
749
+ // Should succeed despite high memory (below abort threshold)
750
+ assert.strictEqual(result.success, true);
751
+ assert.strictEqual(result.evaluationsExported, 10);
752
+ // Should have logged a memory warning
753
+ assert.ok(warnings.some(w => w.includes('High memory usage')), 'Should warn about high memory usage');
754
+ }
755
+ finally {
756
+ console.warn = originalWarn;
757
+ process.memoryUsage = originalMemoryUsage;
758
+ globalThis.fetch = originalFetch;
759
+ }
760
+ });
761
+ });
762
+ // C1 FIX: DNS rebinding protection tests
763
+ describe('DNS rebinding protection', () => {
764
+ it('re-validates endpoint before each batch', async () => {
765
+ let fetchCallCount = 0;
766
+ const originalFetch = globalThis.fetch;
767
+ globalThis.fetch = mock.fn(async () => {
768
+ fetchCallCount++;
769
+ return new Response('{}', { status: HttpStatus.OK });
770
+ });
771
+ try {
772
+ // With batch size 1 and 3 evaluations, should call fetch 3 times
773
+ // Each call should re-validate the URL
774
+ const result = await exportToLangfuse(testEvaluations, {
775
+ ...mockConfig,
776
+ batchSize: 1,
777
+ });
778
+ assert.strictEqual(result.success, true);
779
+ assert.strictEqual(result.batches, 3);
780
+ assert.strictEqual(fetchCallCount, 3);
781
+ }
782
+ finally {
783
+ globalThis.fetch = originalFetch;
784
+ }
785
+ });
786
+ it('fails batch if endpoint becomes invalid mid-export', async () => {
787
+ // This tests the DNS rebinding protection: if the endpoint
788
+ // was modified to point to a blocked address between batches,
789
+ // subsequent batches should fail validation
790
+ let fetchCallCount = 0;
791
+ const originalFetch = globalThis.fetch;
792
+ globalThis.fetch = mock.fn(async () => {
793
+ fetchCallCount++;
794
+ return new Response('{}', { status: HttpStatus.OK });
795
+ });
796
+ try {
797
+ // Use an invalid endpoint that will fail re-validation
798
+ const result = await exportToLangfuse(testEvaluations, {
799
+ ...mockConfig,
800
+ endpoint: 'http://localhost', // HTTP not allowed, will fail validation
801
+ batchSize: 1,
802
+ });
803
+ // All batches should fail due to invalid endpoint
804
+ assert.strictEqual(result.success, false);
805
+ assert.strictEqual(result.failed, 3);
806
+ assert.strictEqual(fetchCallCount, 0); // No fetch calls made
807
+ assert.ok(result.errors?.some(e => e.includes('Endpoint validation failed')));
808
+ }
809
+ finally {
810
+ globalThis.fetch = originalFetch;
811
+ }
812
+ });
813
+ it('continues processing valid batches after endpoint re-validation', async () => {
814
+ let fetchCallCount = 0;
815
+ const originalFetch = globalThis.fetch;
816
+ globalThis.fetch = mock.fn(async () => {
817
+ fetchCallCount++;
818
+ return new Response('{}', { status: HttpStatus.OK });
819
+ });
820
+ try {
821
+ const result = await exportToLangfuse(testEvaluations, {
822
+ ...mockConfig,
823
+ batchSize: 2,
824
+ });
825
+ assert.strictEqual(result.success, true);
826
+ assert.strictEqual(result.batches, 2); // 3 evals / batch 2 = 2 batches
827
+ assert.strictEqual(fetchCallCount, 2);
828
+ }
829
+ finally {
830
+ globalThis.fetch = originalFetch;
831
+ }
832
+ });
833
+ });
834
+ // H1 FIX: Retry logic tests
835
+ describe('retry logic for transient failures', () => {
836
+ it('retries on 429 rate limit response', async () => {
837
+ let fetchCallCount = 0;
838
+ const originalFetch = globalThis.fetch;
839
+ globalThis.fetch = mock.fn(async () => {
840
+ fetchCallCount++;
841
+ // First 2 calls return 429, third succeeds
842
+ if (fetchCallCount <= 2) {
843
+ return new Response('Rate limited', { status: HttpStatus.TOO_MANY_REQUESTS });
844
+ }
845
+ return new Response('{}', { status: HttpStatus.OK });
846
+ });
847
+ try {
848
+ const result = await exportToLangfuse([testEvaluations[0]], {
849
+ ...mockConfig,
850
+ timeoutMs: 100, // Short timeout for faster test
851
+ });
852
+ // Should succeed after retries
853
+ assert.strictEqual(result.success, true);
854
+ assert.strictEqual(result.evaluationsExported, 1);
855
+ // Retried 2 times + 1 successful = 3 calls
856
+ assert.strictEqual(fetchCallCount, 3);
857
+ }
858
+ finally {
859
+ globalThis.fetch = originalFetch;
860
+ }
861
+ });
862
+ it('retries on 500 server error', async () => {
863
+ let fetchCallCount = 0;
864
+ const originalFetch = globalThis.fetch;
865
+ globalThis.fetch = mock.fn(async () => {
866
+ fetchCallCount++;
867
+ // First call returns 500, second succeeds
868
+ if (fetchCallCount === 1) {
869
+ return new Response('Internal Server Error', { status: HttpStatus.INTERNAL_SERVER_ERROR });
870
+ }
871
+ return new Response('{}', { status: HttpStatus.OK });
872
+ });
873
+ try {
874
+ const result = await exportToLangfuse([testEvaluations[0]], {
875
+ ...mockConfig,
876
+ timeoutMs: 100,
877
+ });
878
+ assert.strictEqual(result.success, true);
879
+ assert.strictEqual(result.evaluationsExported, 1);
880
+ assert.strictEqual(fetchCallCount, 2);
881
+ }
882
+ finally {
883
+ globalThis.fetch = originalFetch;
884
+ }
885
+ });
886
+ it('retries on 503 service unavailable', async () => {
887
+ let fetchCallCount = 0;
888
+ const originalFetch = globalThis.fetch;
889
+ globalThis.fetch = mock.fn(async () => {
890
+ fetchCallCount++;
891
+ if (fetchCallCount === 1) {
892
+ return new Response('Service Unavailable', { status: HttpStatus.SERVICE_UNAVAILABLE });
893
+ }
894
+ return new Response('{}', { status: HttpStatus.OK });
895
+ });
896
+ try {
897
+ const result = await exportToLangfuse([testEvaluations[0]], {
898
+ ...mockConfig,
899
+ timeoutMs: 100,
900
+ });
901
+ assert.strictEqual(result.success, true);
902
+ assert.strictEqual(fetchCallCount, 2);
903
+ }
904
+ finally {
905
+ globalThis.fetch = originalFetch;
906
+ }
907
+ });
908
+ it('does not retry on 400 bad request', async () => {
909
+ let fetchCallCount = 0;
910
+ const originalFetch = globalThis.fetch;
911
+ globalThis.fetch = mock.fn(async () => {
912
+ fetchCallCount++;
913
+ return new Response('Bad Request', { status: HttpStatus.BAD_REQUEST });
914
+ });
915
+ try {
916
+ const result = await exportToLangfuse([testEvaluations[0]], {
917
+ ...mockConfig,
918
+ timeoutMs: 100,
919
+ });
920
+ // Should fail without retry on 4xx (except 429)
921
+ assert.strictEqual(result.success, false);
922
+ assert.strictEqual(result.failed, 1);
923
+ assert.strictEqual(fetchCallCount, 1);
924
+ }
925
+ finally {
926
+ globalThis.fetch = originalFetch;
927
+ }
928
+ });
929
+ it('gives up after max retries on persistent 5xx', async () => {
930
+ let fetchCallCount = 0;
931
+ const originalFetch = globalThis.fetch;
932
+ globalThis.fetch = mock.fn(async () => {
933
+ fetchCallCount++;
934
+ return new Response('Server Error', { status: HttpStatus.INTERNAL_SERVER_ERROR });
935
+ });
936
+ try {
937
+ const result = await exportToLangfuse([testEvaluations[0]], {
938
+ ...mockConfig,
939
+ timeoutMs: 100,
940
+ });
941
+ // Should fail after 4 attempts (initial + 3 retries)
942
+ assert.strictEqual(result.success, false);
943
+ assert.strictEqual(result.failed, 1);
944
+ assert.strictEqual(fetchCallCount, 4); // 1 initial + 3 retries
945
+ assert.ok(result.errors?.[0].includes('HTTP 500'));
946
+ }
947
+ finally {
948
+ globalThis.fetch = originalFetch;
949
+ }
950
+ });
951
+ // CRITICAL FIX: Test that network errors are retried (not just AbortError)
952
+ it('retries on network errors (DNS failure, connection reset)', async () => {
953
+ let fetchCallCount = 0;
954
+ const originalFetch = globalThis.fetch;
955
+ globalThis.fetch = mock.fn(async () => {
956
+ fetchCallCount++;
957
+ // First 2 calls throw network error, third succeeds
958
+ if (fetchCallCount <= 2) {
959
+ const error = new TypeError('fetch failed');
960
+ error.cause = new Error('getaddrinfo ENOTFOUND invalid-host.example.com');
961
+ throw error;
962
+ }
963
+ return new Response('{}', { status: HttpStatus.OK });
964
+ });
965
+ try {
966
+ const result = await exportToLangfuse([testEvaluations[0]], {
967
+ ...mockConfig,
968
+ timeoutMs: 100,
969
+ });
970
+ // Should succeed after retrying network errors
971
+ assert.strictEqual(result.success, true);
972
+ assert.strictEqual(result.evaluationsExported, 1);
973
+ // Retried 2 times + 1 successful = 3 calls
974
+ assert.strictEqual(fetchCallCount, 3);
975
+ }
976
+ finally {
977
+ globalThis.fetch = originalFetch;
978
+ }
979
+ });
980
+ it('gives up after max retries on persistent network errors', async () => {
981
+ let fetchCallCount = 0;
982
+ const originalFetch = globalThis.fetch;
983
+ globalThis.fetch = mock.fn(async () => {
984
+ fetchCallCount++;
985
+ const error = new TypeError('fetch failed');
986
+ error.cause = new Error('ECONNREFUSED');
987
+ throw error;
988
+ });
989
+ try {
990
+ const result = await exportToLangfuse([testEvaluations[0]], {
991
+ ...mockConfig,
992
+ timeoutMs: 100,
993
+ });
994
+ // Should fail after 4 attempts (initial + 3 retries)
995
+ assert.strictEqual(result.success, false);
996
+ assert.strictEqual(result.failed, 1);
997
+ assert.strictEqual(fetchCallCount, 4); // 1 initial + 3 retries
998
+ assert.ok(result.errors?.[0].includes('retries exhausted'));
999
+ }
1000
+ finally {
1001
+ globalThis.fetch = originalFetch;
1002
+ }
1003
+ });
1004
+ });
1005
+ });
1006
+ });
1007
+ //# sourceMappingURL=langfuse-export.test.js.map