observability-toolkit 1.8.5 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +126 -5
- package/dist/backends/index.d.ts +163 -0
- package/dist/backends/index.d.ts.map +1 -1
- package/dist/backends/index.js +57 -0
- package/dist/backends/index.js.map +1 -1
- package/dist/backends/index.test.js +55 -1
- package/dist/backends/index.test.js.map +1 -1
- package/dist/backends/local-jsonl.d.ts +30 -0
- package/dist/backends/local-jsonl.d.ts.map +1 -1
- package/dist/backends/local-jsonl.js +912 -550
- package/dist/backends/local-jsonl.js.map +1 -1
- package/dist/backends/signoz-api-rate-limiter.test.js +2 -1
- package/dist/backends/signoz-api-rate-limiter.test.js.map +1 -1
- package/dist/backends/signoz-api.d.ts +16 -2
- package/dist/backends/signoz-api.d.ts.map +1 -1
- package/dist/backends/signoz-api.js +650 -534
- package/dist/backends/signoz-api.js.map +1 -1
- package/dist/backends/signoz-api.test.js +6 -5
- package/dist/backends/signoz-api.test.js.map +1 -1
- package/dist/lib/agent-as-judge.d.ts +388 -0
- package/dist/lib/agent-as-judge.d.ts.map +1 -0
- package/dist/lib/agent-as-judge.js +740 -0
- package/dist/lib/agent-as-judge.js.map +1 -0
- package/dist/lib/agent-as-judge.test.d.ts +5 -0
- package/dist/lib/agent-as-judge.test.d.ts.map +1 -0
- package/dist/lib/agent-as-judge.test.js +816 -0
- package/dist/lib/agent-as-judge.test.js.map +1 -0
- package/dist/lib/cache.d.ts +15 -2
- package/dist/lib/cache.d.ts.map +1 -1
- package/dist/lib/cache.js +16 -2
- package/dist/lib/cache.js.map +1 -1
- package/dist/lib/circuit-breaker.d.ts +18 -0
- package/dist/lib/circuit-breaker.d.ts.map +1 -1
- package/dist/lib/circuit-breaker.js +41 -8
- package/dist/lib/circuit-breaker.js.map +1 -1
- package/dist/lib/confident-export.d.ts +101 -0
- package/dist/lib/confident-export.d.ts.map +1 -0
- package/dist/lib/confident-export.js +393 -0
- package/dist/lib/confident-export.js.map +1 -0
- package/dist/lib/confident-export.test.d.ts +7 -0
- package/dist/lib/confident-export.test.d.ts.map +1 -0
- package/dist/lib/confident-export.test.js +835 -0
- package/dist/lib/confident-export.test.js.map +1 -0
- package/dist/lib/constants.d.ts +75 -0
- package/dist/lib/constants.d.ts.map +1 -1
- package/dist/lib/constants.js +104 -1
- package/dist/lib/constants.js.map +1 -1
- package/dist/lib/datadog-export.d.ts +156 -0
- package/dist/lib/datadog-export.d.ts.map +1 -0
- package/dist/lib/datadog-export.js +464 -0
- package/dist/lib/datadog-export.js.map +1 -0
- package/dist/lib/datadog-export.test.d.ts +14 -0
- package/dist/lib/datadog-export.test.d.ts.map +1 -0
- package/dist/lib/datadog-export.test.js +890 -0
- package/dist/lib/datadog-export.test.js.map +1 -0
- package/dist/lib/evaluation-hooks.d.ts +49 -0
- package/dist/lib/evaluation-hooks.d.ts.map +1 -0
- package/dist/lib/evaluation-hooks.js +488 -0
- package/dist/lib/evaluation-hooks.js.map +1 -0
- package/dist/lib/evaluation-hooks.test.d.ts +8 -0
- package/dist/lib/evaluation-hooks.test.d.ts.map +1 -0
- package/dist/lib/evaluation-hooks.test.js +624 -0
- package/dist/lib/evaluation-hooks.test.js.map +1 -0
- package/dist/lib/export-utils.d.ts +99 -0
- package/dist/lib/export-utils.d.ts.map +1 -0
- package/dist/lib/export-utils.js +238 -0
- package/dist/lib/export-utils.js.map +1 -0
- package/dist/lib/export-utils.test.d.ts +5 -0
- package/dist/lib/export-utils.test.d.ts.map +1 -0
- package/dist/lib/export-utils.test.js +193 -0
- package/dist/lib/export-utils.test.js.map +1 -0
- package/dist/lib/file-utils.d.ts +17 -2
- package/dist/lib/file-utils.d.ts.map +1 -1
- package/dist/lib/file-utils.js +24 -5
- package/dist/lib/file-utils.js.map +1 -1
- package/dist/lib/file-utils.test.js +30 -0
- package/dist/lib/file-utils.test.js.map +1 -1
- package/dist/lib/histogram.d.ts +119 -0
- package/dist/lib/histogram.d.ts.map +1 -0
- package/dist/lib/histogram.js +202 -0
- package/dist/lib/histogram.js.map +1 -0
- package/dist/lib/histogram.test.d.ts +5 -0
- package/dist/lib/histogram.test.d.ts.map +1 -0
- package/dist/lib/histogram.test.js +381 -0
- package/dist/lib/histogram.test.js.map +1 -0
- package/dist/lib/instrumentation.d.ts +153 -0
- package/dist/lib/instrumentation.d.ts.map +1 -0
- package/dist/lib/instrumentation.integration.test.d.ts +2 -0
- package/dist/lib/instrumentation.integration.test.d.ts.map +1 -0
- package/dist/lib/instrumentation.integration.test.js +589 -0
- package/dist/lib/instrumentation.integration.test.js.map +1 -0
- package/dist/lib/instrumentation.js +520 -0
- package/dist/lib/instrumentation.js.map +1 -0
- package/dist/lib/instrumentation.test.d.ts +2 -0
- package/dist/lib/instrumentation.test.d.ts.map +1 -0
- package/dist/lib/instrumentation.test.js +821 -0
- package/dist/lib/instrumentation.test.js.map +1 -0
- package/dist/lib/langfuse-export.d.ts +125 -0
- package/dist/lib/langfuse-export.d.ts.map +1 -0
- package/dist/lib/langfuse-export.js +367 -0
- package/dist/lib/langfuse-export.js.map +1 -0
- package/dist/lib/langfuse-export.test.d.ts +7 -0
- package/dist/lib/langfuse-export.test.d.ts.map +1 -0
- package/dist/lib/langfuse-export.test.js +1007 -0
- package/dist/lib/langfuse-export.test.js.map +1 -0
- package/dist/lib/llm-as-judge.d.ts +657 -0
- package/dist/lib/llm-as-judge.d.ts.map +1 -0
- package/dist/lib/llm-as-judge.js +1397 -0
- package/dist/lib/llm-as-judge.js.map +1 -0
- package/dist/lib/llm-as-judge.test.d.ts +2 -0
- package/dist/lib/llm-as-judge.test.d.ts.map +1 -0
- package/dist/lib/llm-as-judge.test.js +2409 -0
- package/dist/lib/llm-as-judge.test.js.map +1 -0
- package/dist/lib/logger.d.ts +1 -1
- package/dist/lib/logger.d.ts.map +1 -1
- package/dist/lib/logger.js.map +1 -1
- package/dist/lib/metrics.d.ts +62 -0
- package/dist/lib/metrics.d.ts.map +1 -0
- package/dist/lib/metrics.js +166 -0
- package/dist/lib/metrics.js.map +1 -0
- package/dist/lib/metrics.test.d.ts +5 -0
- package/dist/lib/metrics.test.d.ts.map +1 -0
- package/dist/lib/metrics.test.js +189 -0
- package/dist/lib/metrics.test.js.map +1 -0
- package/dist/lib/parse-stats.d.ts +119 -0
- package/dist/lib/parse-stats.d.ts.map +1 -0
- package/dist/lib/parse-stats.js +206 -0
- package/dist/lib/parse-stats.js.map +1 -0
- package/dist/lib/parse-stats.test.d.ts +5 -0
- package/dist/lib/parse-stats.test.d.ts.map +1 -0
- package/dist/lib/parse-stats.test.js +283 -0
- package/dist/lib/parse-stats.test.js.map +1 -0
- package/dist/lib/phoenix-export.d.ts +109 -0
- package/dist/lib/phoenix-export.d.ts.map +1 -0
- package/dist/lib/phoenix-export.js +429 -0
- package/dist/lib/phoenix-export.js.map +1 -0
- package/dist/lib/phoenix-export.test.d.ts +11 -0
- package/dist/lib/phoenix-export.test.d.ts.map +1 -0
- package/dist/lib/phoenix-export.test.js +725 -0
- package/dist/lib/phoenix-export.test.js.map +1 -0
- package/dist/lib/server-utils.d.ts +6 -1
- package/dist/lib/server-utils.d.ts.map +1 -1
- package/dist/lib/server-utils.js +9 -1
- package/dist/lib/server-utils.js.map +1 -1
- package/dist/lib/shared-schemas.d.ts +6 -0
- package/dist/lib/shared-schemas.d.ts.map +1 -1
- package/dist/lib/shared-schemas.js +11 -4
- package/dist/lib/shared-schemas.js.map +1 -1
- package/dist/lib/verification-events.d.ts +100 -0
- package/dist/lib/verification-events.d.ts.map +1 -0
- package/dist/lib/verification-events.js +162 -0
- package/dist/lib/verification-events.js.map +1 -0
- package/dist/lib/verification-events.test.d.ts +5 -0
- package/dist/lib/verification-events.test.d.ts.map +1 -0
- package/dist/lib/verification-events.test.js +193 -0
- package/dist/lib/verification-events.test.js.map +1 -0
- package/dist/server.d.ts +5 -0
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +77 -21
- package/dist/server.js.map +1 -1
- package/dist/tools/context-stats.d.ts.map +1 -1
- package/dist/tools/context-stats.js +6 -8
- package/dist/tools/context-stats.js.map +1 -1
- package/dist/tools/export-confident.d.ts +145 -0
- package/dist/tools/export-confident.d.ts.map +1 -0
- package/dist/tools/export-confident.js +134 -0
- package/dist/tools/export-confident.js.map +1 -0
- package/dist/tools/export-confident.test.d.ts +7 -0
- package/dist/tools/export-confident.test.d.ts.map +1 -0
- package/dist/tools/export-confident.test.js +332 -0
- package/dist/tools/export-confident.test.js.map +1 -0
- package/dist/tools/export-datadog.d.ts +160 -0
- package/dist/tools/export-datadog.d.ts.map +1 -0
- package/dist/tools/export-datadog.js +160 -0
- package/dist/tools/export-datadog.js.map +1 -0
- package/dist/tools/export-datadog.test.d.ts +8 -0
- package/dist/tools/export-datadog.test.d.ts.map +1 -0
- package/dist/tools/export-datadog.test.js +419 -0
- package/dist/tools/export-datadog.test.js.map +1 -0
- package/dist/tools/export-langfuse.d.ts +137 -0
- package/dist/tools/export-langfuse.d.ts.map +1 -0
- package/dist/tools/export-langfuse.js +131 -0
- package/dist/tools/export-langfuse.js.map +1 -0
- package/dist/tools/export-langfuse.test.d.ts +7 -0
- package/dist/tools/export-langfuse.test.d.ts.map +1 -0
- package/dist/tools/export-langfuse.test.js +303 -0
- package/dist/tools/export-langfuse.test.js.map +1 -0
- package/dist/tools/export-phoenix.d.ts +145 -0
- package/dist/tools/export-phoenix.d.ts.map +1 -0
- package/dist/tools/export-phoenix.js +135 -0
- package/dist/tools/export-phoenix.js.map +1 -0
- package/dist/tools/export-phoenix.test.d.ts +7 -0
- package/dist/tools/export-phoenix.test.d.ts.map +1 -0
- package/dist/tools/export-phoenix.test.js +316 -0
- package/dist/tools/export-phoenix.test.js.map +1 -0
- package/dist/tools/health-check.d.ts +26 -0
- package/dist/tools/health-check.d.ts.map +1 -1
- package/dist/tools/health-check.js +36 -7
- package/dist/tools/health-check.js.map +1 -1
- package/dist/tools/index.d.ts +6 -0
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +6 -0
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/inject-evaluations.d.ts +1315 -0
- package/dist/tools/inject-evaluations.d.ts.map +1 -0
- package/dist/tools/inject-evaluations.js +121 -0
- package/dist/tools/inject-evaluations.js.map +1 -0
- package/dist/tools/inject-evaluations.test.d.ts +5 -0
- package/dist/tools/inject-evaluations.test.d.ts.map +1 -0
- package/dist/tools/inject-evaluations.test.js +359 -0
- package/dist/tools/inject-evaluations.test.js.map +1 -0
- package/dist/tools/query-evaluations.d.ts +25 -4
- package/dist/tools/query-evaluations.d.ts.map +1 -1
- package/dist/tools/query-evaluations.js +10 -0
- package/dist/tools/query-evaluations.js.map +1 -1
- package/dist/tools/query-llm-events.js +2 -2
- package/dist/tools/query-llm-events.js.map +1 -1
- package/dist/tools/query-logs.d.ts +8 -8
- package/dist/tools/query-logs.js +3 -3
- package/dist/tools/query-logs.js.map +1 -1
- package/dist/tools/query-metrics.d.ts +4 -4
- package/dist/tools/query-metrics.js +2 -2
- package/dist/tools/query-metrics.js.map +1 -1
- package/dist/tools/query-traces.d.ts +8 -8
- package/dist/tools/query-verifications.d.ts +111 -0
- package/dist/tools/query-verifications.d.ts.map +1 -0
- package/dist/tools/query-verifications.js +101 -0
- package/dist/tools/query-verifications.js.map +1 -0
- package/dist/tools/query-verifications.test.d.ts +5 -0
- package/dist/tools/query-verifications.test.d.ts.map +1 -0
- package/dist/tools/query-verifications.test.js +156 -0
- package/dist/tools/query-verifications.test.js.map +1 -0
- package/dist/types/evaluation-hooks.d.ts +176 -0
- package/dist/types/evaluation-hooks.d.ts.map +1 -0
- package/dist/types/evaluation-hooks.js +49 -0
- package/dist/types/evaluation-hooks.js.map +1 -0
- package/package.json +10 -2
|
@@ -0,0 +1,1007 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for langfuse-export.ts
|
|
3
|
+
*
|
|
4
|
+
* Covers config validation, auth header creation, OTLP conversion, and batching.
|
|
5
|
+
*/
|
|
6
|
+
import { describe, it, beforeEach, afterEach, mock } from 'node:test';
|
|
7
|
+
import assert from 'node:assert';
|
|
8
|
+
import { validateLangfuseUrl, validateLangfuseConfig, createLangfuseAuthHeader, evaluationsToOTLPTraces, exportToLangfuse, } from './langfuse-export.js';
|
|
9
|
+
import { HttpStatus } from './constants.js';
|
|
10
|
+
describe('langfuse-export', () => {
|
|
11
|
+
describe('validateLangfuseUrl', () => {
|
|
12
|
+
describe('valid URLs', () => {
|
|
13
|
+
it('accepts valid HTTPS URL', () => {
|
|
14
|
+
const url = 'https://cloud.langfuse.com';
|
|
15
|
+
assert.strictEqual(validateLangfuseUrl(url), 'https://cloud.langfuse.com');
|
|
16
|
+
});
|
|
17
|
+
it('accepts HTTPS URL with port', () => {
|
|
18
|
+
const url = 'https://langfuse.example.com:8443';
|
|
19
|
+
assert.strictEqual(validateLangfuseUrl(url), 'https://langfuse.example.com:8443');
|
|
20
|
+
});
|
|
21
|
+
it('preserves path for self-hosted Langfuse on subpath', () => {
|
|
22
|
+
// M4 FIX: Support Langfuse hosted on subpaths like /langfuse or /api
|
|
23
|
+
const url = 'https://example.com/langfuse';
|
|
24
|
+
assert.strictEqual(validateLangfuseUrl(url), 'https://example.com/langfuse');
|
|
25
|
+
});
|
|
26
|
+
it('preserves API path prefix', () => {
|
|
27
|
+
const url = 'https://cloud.langfuse.com/api/v1/traces';
|
|
28
|
+
assert.strictEqual(validateLangfuseUrl(url), 'https://cloud.langfuse.com/api/v1/traces');
|
|
29
|
+
});
|
|
30
|
+
it('strips trailing slash from path', () => {
|
|
31
|
+
const url = 'https://example.com/langfuse/';
|
|
32
|
+
assert.strictEqual(validateLangfuseUrl(url), 'https://example.com/langfuse');
|
|
33
|
+
});
|
|
34
|
+
it('strips query params from URL', () => {
|
|
35
|
+
const url = 'https://cloud.langfuse.com?key=value';
|
|
36
|
+
assert.strictEqual(validateLangfuseUrl(url), 'https://cloud.langfuse.com');
|
|
37
|
+
});
|
|
38
|
+
it('strips query params but preserves path', () => {
|
|
39
|
+
const url = 'https://example.com/langfuse?key=value';
|
|
40
|
+
assert.strictEqual(validateLangfuseUrl(url), 'https://example.com/langfuse');
|
|
41
|
+
});
|
|
42
|
+
});
|
|
43
|
+
describe('SSRF protection - rejects localhost', () => {
|
|
44
|
+
it('rejects localhost', () => {
|
|
45
|
+
assert.strictEqual(validateLangfuseUrl('https://localhost'), '');
|
|
46
|
+
});
|
|
47
|
+
it('rejects 127.0.0.1', () => {
|
|
48
|
+
assert.strictEqual(validateLangfuseUrl('https://127.0.0.1'), '');
|
|
49
|
+
});
|
|
50
|
+
it('rejects 127.x.x.x variants', () => {
|
|
51
|
+
assert.strictEqual(validateLangfuseUrl('https://127.100.200.1'), '');
|
|
52
|
+
});
|
|
53
|
+
it('rejects ::1 IPv6 localhost', () => {
|
|
54
|
+
assert.strictEqual(validateLangfuseUrl('https://[::1]'), '');
|
|
55
|
+
});
|
|
56
|
+
it('rejects 0.0.0.0', () => {
|
|
57
|
+
assert.strictEqual(validateLangfuseUrl('https://0.0.0.0'), '');
|
|
58
|
+
});
|
|
59
|
+
it('rejects .localhost TLD', () => {
|
|
60
|
+
assert.strictEqual(validateLangfuseUrl('https://app.localhost'), '');
|
|
61
|
+
});
|
|
62
|
+
});
|
|
63
|
+
describe('SSRF protection - rejects cloud metadata', () => {
|
|
64
|
+
it('rejects AWS/GCP/Azure metadata IP', () => {
|
|
65
|
+
assert.strictEqual(validateLangfuseUrl('https://169.254.169.254'), '');
|
|
66
|
+
});
|
|
67
|
+
it('rejects link-local range', () => {
|
|
68
|
+
assert.strictEqual(validateLangfuseUrl('https://169.254.0.1'), '');
|
|
69
|
+
});
|
|
70
|
+
});
|
|
71
|
+
describe('SSRF protection - rejects private networks', () => {
|
|
72
|
+
it('rejects 10.x.x.x', () => {
|
|
73
|
+
assert.strictEqual(validateLangfuseUrl('https://10.0.0.1'), '');
|
|
74
|
+
});
|
|
75
|
+
it('rejects 192.168.x.x', () => {
|
|
76
|
+
assert.strictEqual(validateLangfuseUrl('https://192.168.1.1'), '');
|
|
77
|
+
});
|
|
78
|
+
it('rejects 172.16-31.x.x', () => {
|
|
79
|
+
assert.strictEqual(validateLangfuseUrl('https://172.16.0.1'), '');
|
|
80
|
+
assert.strictEqual(validateLangfuseUrl('https://172.31.255.255'), '');
|
|
81
|
+
});
|
|
82
|
+
it('rejects fc/fd IPv6 ULA', () => {
|
|
83
|
+
assert.strictEqual(validateLangfuseUrl('https://[fc00::1]'), '');
|
|
84
|
+
assert.strictEqual(validateLangfuseUrl('https://[fd00::1]'), '');
|
|
85
|
+
});
|
|
86
|
+
it('rejects fe80 IPv6 link-local', () => {
|
|
87
|
+
assert.strictEqual(validateLangfuseUrl('https://[fe80::1]'), '');
|
|
88
|
+
});
|
|
89
|
+
});
|
|
90
|
+
describe('SSRF protection - rejects reserved domains', () => {
|
|
91
|
+
it('rejects .local TLD', () => {
|
|
92
|
+
assert.strictEqual(validateLangfuseUrl('https://server.local'), '');
|
|
93
|
+
});
|
|
94
|
+
it('rejects .internal TLD', () => {
|
|
95
|
+
assert.strictEqual(validateLangfuseUrl('https://api.internal'), '');
|
|
96
|
+
});
|
|
97
|
+
it('rejects .localdomain TLD', () => {
|
|
98
|
+
assert.strictEqual(validateLangfuseUrl('https://host.localdomain'), '');
|
|
99
|
+
});
|
|
100
|
+
it('rejects .home.arpa TLD', () => {
|
|
101
|
+
assert.strictEqual(validateLangfuseUrl('https://server.home.arpa'), '');
|
|
102
|
+
});
|
|
103
|
+
});
|
|
104
|
+
describe('SSRF protection - rejects HTTP', () => {
|
|
105
|
+
it('rejects HTTP protocol', () => {
|
|
106
|
+
assert.strictEqual(validateLangfuseUrl('http://cloud.langfuse.com'), '');
|
|
107
|
+
});
|
|
108
|
+
it('rejects file protocol', () => {
|
|
109
|
+
assert.strictEqual(validateLangfuseUrl('file:///etc/passwd'), '');
|
|
110
|
+
});
|
|
111
|
+
});
|
|
112
|
+
describe('edge cases', () => {
|
|
113
|
+
it('returns empty for empty string', () => {
|
|
114
|
+
assert.strictEqual(validateLangfuseUrl(''), '');
|
|
115
|
+
});
|
|
116
|
+
it('returns empty for invalid URL', () => {
|
|
117
|
+
assert.strictEqual(validateLangfuseUrl('not-a-url'), '');
|
|
118
|
+
});
|
|
119
|
+
});
|
|
120
|
+
});
|
|
121
|
+
describe('validateLangfuseConfig', () => {
|
|
122
|
+
const originalEnv = { ...process.env };
|
|
123
|
+
beforeEach(() => {
|
|
124
|
+
delete process.env.LANGFUSE_ENDPOINT;
|
|
125
|
+
delete process.env.LANGFUSE_PUBLIC_KEY;
|
|
126
|
+
delete process.env.LANGFUSE_SECRET_KEY;
|
|
127
|
+
delete process.env.LANGFUSE_BATCH_SIZE;
|
|
128
|
+
delete process.env.LANGFUSE_TIMEOUT_MS;
|
|
129
|
+
});
|
|
130
|
+
afterEach(() => {
|
|
131
|
+
Object.assign(process.env, originalEnv);
|
|
132
|
+
});
|
|
133
|
+
it('throws when endpoint not configured', () => {
|
|
134
|
+
assert.throws(() => validateLangfuseConfig(), /endpoint not configured/);
|
|
135
|
+
});
|
|
136
|
+
it('throws when endpoint is invalid', () => {
|
|
137
|
+
assert.throws(() => validateLangfuseConfig({ endpoint: 'http://localhost' }), /endpoint not configured or invalid/);
|
|
138
|
+
});
|
|
139
|
+
it('throws when public key not configured', () => {
|
|
140
|
+
assert.throws(() => validateLangfuseConfig({ endpoint: 'https://cloud.langfuse.com' }), /public key not configured/);
|
|
141
|
+
});
|
|
142
|
+
it('throws when secret key not configured', () => {
|
|
143
|
+
assert.throws(() => validateLangfuseConfig({
|
|
144
|
+
endpoint: 'https://cloud.langfuse.com',
|
|
145
|
+
publicKey: 'pk-test',
|
|
146
|
+
}), /secret key not configured/);
|
|
147
|
+
});
|
|
148
|
+
it('returns valid config with all required fields', () => {
|
|
149
|
+
const config = validateLangfuseConfig({
|
|
150
|
+
endpoint: 'https://cloud.langfuse.com',
|
|
151
|
+
publicKey: 'pk-test',
|
|
152
|
+
secretKey: 'sk-test',
|
|
153
|
+
});
|
|
154
|
+
assert.strictEqual(config.endpoint, 'https://cloud.langfuse.com');
|
|
155
|
+
assert.strictEqual(config.publicKey, 'pk-test');
|
|
156
|
+
assert.strictEqual(config.secretKey, 'sk-test');
|
|
157
|
+
assert.strictEqual(config.batchSize, 100); // default
|
|
158
|
+
assert.strictEqual(config.timeoutMs, 30000); // default
|
|
159
|
+
});
|
|
160
|
+
it('uses custom batch size', () => {
|
|
161
|
+
const config = validateLangfuseConfig({
|
|
162
|
+
endpoint: 'https://cloud.langfuse.com',
|
|
163
|
+
publicKey: 'pk-test',
|
|
164
|
+
secretKey: 'sk-test',
|
|
165
|
+
batchSize: 50,
|
|
166
|
+
});
|
|
167
|
+
assert.strictEqual(config.batchSize, 50);
|
|
168
|
+
});
|
|
169
|
+
it('throws when batch size too small', () => {
|
|
170
|
+
assert.throws(() => validateLangfuseConfig({
|
|
171
|
+
endpoint: 'https://cloud.langfuse.com',
|
|
172
|
+
publicKey: 'pk-test',
|
|
173
|
+
secretKey: 'sk-test',
|
|
174
|
+
batchSize: 0,
|
|
175
|
+
}), /batch size must be between/);
|
|
176
|
+
});
|
|
177
|
+
it('throws when batch size too large', () => {
|
|
178
|
+
assert.throws(() => validateLangfuseConfig({
|
|
179
|
+
endpoint: 'https://cloud.langfuse.com',
|
|
180
|
+
publicKey: 'pk-test',
|
|
181
|
+
secretKey: 'sk-test',
|
|
182
|
+
batchSize: 1001,
|
|
183
|
+
}), /batch size must be between/);
|
|
184
|
+
});
|
|
185
|
+
it('throws when timeout too small', () => {
|
|
186
|
+
assert.throws(() => validateLangfuseConfig({
|
|
187
|
+
endpoint: 'https://cloud.langfuse.com',
|
|
188
|
+
publicKey: 'pk-test',
|
|
189
|
+
secretKey: 'sk-test',
|
|
190
|
+
timeoutMs: 500,
|
|
191
|
+
}), /timeout must be between/);
|
|
192
|
+
});
|
|
193
|
+
it('throws when timeout too large', () => {
|
|
194
|
+
assert.throws(() => validateLangfuseConfig({
|
|
195
|
+
endpoint: 'https://cloud.langfuse.com',
|
|
196
|
+
publicKey: 'pk-test',
|
|
197
|
+
secretKey: 'sk-test',
|
|
198
|
+
timeoutMs: 200000,
|
|
199
|
+
}), /timeout must be between/);
|
|
200
|
+
});
|
|
201
|
+
});
|
|
202
|
+
describe('createLangfuseAuthHeader', () => {
|
|
203
|
+
it('creates Basic auth header with base64 encoding', () => {
|
|
204
|
+
const header = createLangfuseAuthHeader('pk-test', 'sk-test');
|
|
205
|
+
assert.ok(header.startsWith('Basic '));
|
|
206
|
+
// Decode and verify
|
|
207
|
+
const encoded = header.slice(6);
|
|
208
|
+
const decoded = Buffer.from(encoded, 'base64').toString('utf-8');
|
|
209
|
+
assert.strictEqual(decoded, 'pk-test:sk-test');
|
|
210
|
+
});
|
|
211
|
+
it('handles special characters in keys', () => {
|
|
212
|
+
const header = createLangfuseAuthHeader('pk-test+special', 'sk-test=value');
|
|
213
|
+
const encoded = header.slice(6);
|
|
214
|
+
const decoded = Buffer.from(encoded, 'base64').toString('utf-8');
|
|
215
|
+
assert.strictEqual(decoded, 'pk-test+special:sk-test=value');
|
|
216
|
+
});
|
|
217
|
+
});
|
|
218
|
+
describe('evaluationsToOTLPTraces', () => {
|
|
219
|
+
const baseEvaluation = {
|
|
220
|
+
timestamp: '2024-01-15T10:00:00Z',
|
|
221
|
+
evaluationName: 'relevance',
|
|
222
|
+
scoreValue: 0.85,
|
|
223
|
+
scoreLabel: 'good',
|
|
224
|
+
scoreUnit: 'ratio_0_1',
|
|
225
|
+
explanation: 'The response was relevant',
|
|
226
|
+
evaluator: 'gpt-4',
|
|
227
|
+
evaluatorType: 'llm',
|
|
228
|
+
responseId: 'resp-123',
|
|
229
|
+
traceId: 'abc123def456',
|
|
230
|
+
sessionId: 'session-1',
|
|
231
|
+
};
|
|
232
|
+
it('converts single evaluation to OTLP trace', () => {
|
|
233
|
+
const result = evaluationsToOTLPTraces([baseEvaluation]);
|
|
234
|
+
assert.strictEqual(result.resourceSpans.length, 1);
|
|
235
|
+
assert.strictEqual(result.resourceSpans[0].scopeSpans.length, 1);
|
|
236
|
+
assert.strictEqual(result.resourceSpans[0].scopeSpans[0].spans.length, 1);
|
|
237
|
+
const span = result.resourceSpans[0].scopeSpans[0].spans[0];
|
|
238
|
+
assert.strictEqual(span.traceId, 'abc123def456');
|
|
239
|
+
assert.strictEqual(span.name, 'evaluation_export');
|
|
240
|
+
assert.ok(span.events);
|
|
241
|
+
assert.strictEqual(span.events.length, 1);
|
|
242
|
+
const event = span.events[0];
|
|
243
|
+
assert.strictEqual(event.name, 'gen_ai.evaluation.result');
|
|
244
|
+
assert.ok(event.attributes);
|
|
245
|
+
// Check evaluation attributes
|
|
246
|
+
const attrs = Object.fromEntries(event.attributes.map(a => [a.key, a.value.stringValue ?? a.value.doubleValue]));
|
|
247
|
+
assert.strictEqual(attrs['gen_ai.evaluation.name'], 'relevance');
|
|
248
|
+
assert.strictEqual(attrs['gen_ai.evaluation.score.value'], 0.85);
|
|
249
|
+
assert.strictEqual(attrs['gen_ai.evaluation.score.label'], 'good');
|
|
250
|
+
assert.strictEqual(attrs['gen_ai.evaluation.explanation'], 'The response was relevant');
|
|
251
|
+
});
|
|
252
|
+
it('groups multiple evaluations by traceId', () => {
|
|
253
|
+
const evaluations = [
|
|
254
|
+
{ ...baseEvaluation, traceId: 'trace-1' },
|
|
255
|
+
{ ...baseEvaluation, traceId: 'trace-1', evaluationName: 'quality' },
|
|
256
|
+
{ ...baseEvaluation, traceId: 'trace-2' },
|
|
257
|
+
];
|
|
258
|
+
const result = evaluationsToOTLPTraces(evaluations);
|
|
259
|
+
// Should have 2 spans (one per traceId)
|
|
260
|
+
const spans = result.resourceSpans[0].scopeSpans[0].spans;
|
|
261
|
+
assert.strictEqual(spans.length, 2);
|
|
262
|
+
// Find the trace-1 span
|
|
263
|
+
const trace1Span = spans.find(s => s.traceId === 'trace-1');
|
|
264
|
+
assert.ok(trace1Span);
|
|
265
|
+
assert.strictEqual(trace1Span.events?.length, 2); // Two evaluations for trace-1
|
|
266
|
+
// Find the trace-2 span
|
|
267
|
+
const trace2Span = spans.find(s => s.traceId === 'trace-2');
|
|
268
|
+
assert.ok(trace2Span);
|
|
269
|
+
assert.strictEqual(trace2Span.events?.length, 1);
|
|
270
|
+
});
|
|
271
|
+
it('generates traceId when not provided', () => {
|
|
272
|
+
const evaluation = { ...baseEvaluation };
|
|
273
|
+
delete evaluation.traceId;
|
|
274
|
+
const result = evaluationsToOTLPTraces([evaluation]);
|
|
275
|
+
const span = result.resourceSpans[0].scopeSpans[0].spans[0];
|
|
276
|
+
assert.ok(span.traceId);
|
|
277
|
+
assert.strictEqual(span.traceId.length, 32); // 32 hex chars
|
|
278
|
+
});
|
|
279
|
+
it('handles evaluations with missing optional fields', () => {
|
|
280
|
+
const minimalEvaluation = {
|
|
281
|
+
timestamp: '2024-01-15T10:00:00Z',
|
|
282
|
+
evaluationName: 'test',
|
|
283
|
+
};
|
|
284
|
+
const result = evaluationsToOTLPTraces([minimalEvaluation]);
|
|
285
|
+
const event = result.resourceSpans[0].scopeSpans[0].spans[0].events?.[0];
|
|
286
|
+
assert.ok(event);
|
|
287
|
+
assert.strictEqual(event.name, 'gen_ai.evaluation.result');
|
|
288
|
+
// Should only have the evaluation name attribute
|
|
289
|
+
const evalNameAttr = event.attributes?.find(a => a.key === 'gen_ai.evaluation.name');
|
|
290
|
+
assert.ok(evalNameAttr);
|
|
291
|
+
assert.strictEqual(evalNameAttr.value.stringValue, 'test');
|
|
292
|
+
});
|
|
293
|
+
it('includes resource attributes', () => {
|
|
294
|
+
const result = evaluationsToOTLPTraces([baseEvaluation]);
|
|
295
|
+
const resourceAttrs = result.resourceSpans[0].resource.attributes;
|
|
296
|
+
const serviceNameAttr = resourceAttrs.find(a => a.key === 'service.name');
|
|
297
|
+
assert.ok(serviceNameAttr);
|
|
298
|
+
assert.strictEqual(serviceNameAttr.value.stringValue, 'observability-toolkit');
|
|
299
|
+
});
|
|
300
|
+
it('handles empty evaluations array', () => {
|
|
301
|
+
const result = evaluationsToOTLPTraces([]);
|
|
302
|
+
assert.strictEqual(result.resourceSpans[0].scopeSpans[0].spans.length, 0);
|
|
303
|
+
});
|
|
304
|
+
// R4 BACKLOG: Extreme timestamp validation (year 3000+)
|
|
305
|
+
describe('extreme timestamp handling', () => {
|
|
306
|
+
it('accepts exactly year 2000 timestamp (minimum boundary)', () => {
|
|
307
|
+
// Year 2000 is exactly EARLIEST_VALID_TIMESTAMP_MS - should be VALID
|
|
308
|
+
const boundaryEval = {
|
|
309
|
+
timestamp: '2000-01-01T00:00:00.000Z',
|
|
310
|
+
evaluationName: 'test-boundary-min',
|
|
311
|
+
};
|
|
312
|
+
const result = evaluationsToOTLPTraces([boundaryEval]);
|
|
313
|
+
const span = result.resourceSpans[0].scopeSpans[0].spans[0];
|
|
314
|
+
// Year 2000 = 946684800000 ms from epoch
|
|
315
|
+
const startNano = BigInt(span.startTimeUnixNano);
|
|
316
|
+
const expectedMs = new Date('2000-01-01T00:00:00.000Z').getTime();
|
|
317
|
+
const expectedNs = BigInt(expectedMs) * BigInt(1_000_000);
|
|
318
|
+
assert.strictEqual(startNano, expectedNs, 'Year 2000 boundary should be precise, not fallback');
|
|
319
|
+
});
|
|
320
|
+
it('accepts exactly year 3000 timestamp (maximum boundary)', () => {
|
|
321
|
+
// Year 3000 is exactly LATEST_VALID_TIMESTAMP_MS - should be VALID
|
|
322
|
+
const extremeEval = {
|
|
323
|
+
timestamp: '3000-01-01T00:00:00Z',
|
|
324
|
+
evaluationName: 'test-boundary-max',
|
|
325
|
+
};
|
|
326
|
+
const result = evaluationsToOTLPTraces([extremeEval]);
|
|
327
|
+
const span = result.resourceSpans[0].scopeSpans[0].spans[0];
|
|
328
|
+
const event = span.events?.[0];
|
|
329
|
+
// Verify the conversion succeeds
|
|
330
|
+
assert.ok(span.startTimeUnixNano, 'startTimeUnixNano should exist');
|
|
331
|
+
assert.ok(event?.timeUnixNano, 'event timeUnixNano should exist');
|
|
332
|
+
// Year 3000 = ~32503680000000 ms from epoch
|
|
333
|
+
const startNano = BigInt(span.startTimeUnixNano);
|
|
334
|
+
const expectedMs = new Date('3000-01-01T00:00:00Z').getTime();
|
|
335
|
+
const expectedNs = BigInt(expectedMs) * BigInt(1_000_000);
|
|
336
|
+
assert.strictEqual(startNano, expectedNs, 'Year 3000 boundary should be precise, not fallback');
|
|
337
|
+
});
|
|
338
|
+
it('uses fallback for year 9999 timestamps (R4 validation)', () => {
|
|
339
|
+
// Year 9999 exceeds MAX_TIMESTAMP_MS (year 3000) - should use fallback
|
|
340
|
+
const maxEval = {
|
|
341
|
+
timestamp: '9999-12-31T23:59:59Z',
|
|
342
|
+
evaluationName: 'test-max-date',
|
|
343
|
+
};
|
|
344
|
+
const result = evaluationsToOTLPTraces([maxEval]);
|
|
345
|
+
const span = result.resourceSpans[0].scopeSpans[0].spans[0];
|
|
346
|
+
// Should use fallback time (Date.now), not the extreme timestamp
|
|
347
|
+
assert.ok(span.startTimeUnixNano);
|
|
348
|
+
const startNano = BigInt(span.startTimeUnixNano);
|
|
349
|
+
// Year 9999 would be ~253402300799000ms; fallback should be much smaller (near now)
|
|
350
|
+
const year9999Ms = new Date('9999-12-31T23:59:59Z').getTime();
|
|
351
|
+
const year9999Ns = BigInt(year9999Ms) * BigInt(1_000_000);
|
|
352
|
+
// Verify we did NOT use the extreme timestamp
|
|
353
|
+
assert.ok(startNano < year9999Ns, 'Should use fallback time, not year 9999 timestamp');
|
|
354
|
+
// Fallback should be within 1 day of now
|
|
355
|
+
const nowNs = BigInt(Date.now()) * BigInt(1_000_000);
|
|
356
|
+
const diff = startNano > nowNs ? startNano - nowNs : nowNs - startNano;
|
|
357
|
+
assert.ok(diff < BigInt(86400_000_000_000), 'Fallback should be within 1 day of now');
|
|
358
|
+
});
|
|
359
|
+
it('uses fallback for invalid date strings', () => {
|
|
360
|
+
const invalidEval = {
|
|
361
|
+
timestamp: 'not-a-valid-date',
|
|
362
|
+
evaluationName: 'test-invalid',
|
|
363
|
+
};
|
|
364
|
+
const result = evaluationsToOTLPTraces([invalidEval]);
|
|
365
|
+
const span = result.resourceSpans[0].scopeSpans[0].spans[0];
|
|
366
|
+
const event = span.events?.[0];
|
|
367
|
+
// Should fall back to current time, not crash
|
|
368
|
+
assert.ok(span.startTimeUnixNano);
|
|
369
|
+
assert.ok(event?.timeUnixNano);
|
|
370
|
+
// Fallback time should be reasonable (within last day)
|
|
371
|
+
const nowNs = BigInt(Date.now()) * BigInt(1_000_000);
|
|
372
|
+
const startNano = BigInt(span.startTimeUnixNano);
|
|
373
|
+
const diff = startNano - nowNs;
|
|
374
|
+
assert.ok(diff < BigInt(86400_000_000_000), 'Fallback should be within 1 day of now');
|
|
375
|
+
});
|
|
376
|
+
it('uses fallback for timestamps before year 2000 (R4 validation)', () => {
|
|
377
|
+
// Year 1999 is before MIN_TIMESTAMP_MS (year 2000) - should use fallback
|
|
378
|
+
const oldEval = {
|
|
379
|
+
timestamp: '1999-12-31T23:59:59Z',
|
|
380
|
+
evaluationName: 'test-old-date',
|
|
381
|
+
};
|
|
382
|
+
const result = evaluationsToOTLPTraces([oldEval]);
|
|
383
|
+
const span = result.resourceSpans[0].scopeSpans[0].spans[0];
|
|
384
|
+
// Should use fallback time (Date.now), not the old timestamp
|
|
385
|
+
assert.ok(span.startTimeUnixNano);
|
|
386
|
+
const startNano = BigInt(span.startTimeUnixNano);
|
|
387
|
+
// Year 1999 would be ~946598399000ms; fallback should be larger (near now)
|
|
388
|
+
const year1999Ms = new Date('1999-12-31T23:59:59Z').getTime();
|
|
389
|
+
const year1999Ns = BigInt(year1999Ms) * BigInt(1_000_000);
|
|
390
|
+
// Verify we did NOT use the old timestamp
|
|
391
|
+
assert.ok(startNano > year1999Ns, 'Should use fallback time, not year 1999 timestamp');
|
|
392
|
+
// Fallback should be within 1 day of now
|
|
393
|
+
const nowNs = BigInt(Date.now()) * BigInt(1_000_000);
|
|
394
|
+
const diff = startNano > nowNs ? startNano - nowNs : nowNs - startNano;
|
|
395
|
+
assert.ok(diff < BigInt(86400_000_000_000), 'Fallback should be within 1 day of now');
|
|
396
|
+
});
|
|
397
|
+
});
|
|
398
|
+
// H3 FIX: BigInt timestamp handling to prevent integer overflow
|
|
399
|
+
describe('BigInt timestamp handling', () => {
|
|
400
|
+
it('handles timestamps near MAX_SAFE_INTEGER without precision loss', () => {
|
|
401
|
+
// Far future timestamp: year 2100 = ~4102444800000 ms
|
|
402
|
+
const farFutureEval = {
|
|
403
|
+
timestamp: '2100-01-01T00:00:00Z',
|
|
404
|
+
evaluationName: 'test-future',
|
|
405
|
+
};
|
|
406
|
+
const result = evaluationsToOTLPTraces([farFutureEval]);
|
|
407
|
+
const span = result.resourceSpans[0].scopeSpans[0].spans[0];
|
|
408
|
+
const event = span.events?.[0];
|
|
409
|
+
// Verify nanosecond timestamps are valid strings
|
|
410
|
+
assert.ok(span.startTimeUnixNano, 'startTimeUnixNano should exist');
|
|
411
|
+
assert.ok(span.endTimeUnixNano, 'endTimeUnixNano should exist');
|
|
412
|
+
assert.ok(event?.timeUnixNano, 'event timeUnixNano should exist');
|
|
413
|
+
// Parse and verify precision (should be 19+ digits for ns timestamps)
|
|
414
|
+
const startNano = BigInt(span.startTimeUnixNano);
|
|
415
|
+
const eventNano = BigInt(event.timeUnixNano);
|
|
416
|
+
// Year 2100 in nanoseconds should be ~4.1e18 - well beyond MAX_SAFE_INTEGER
|
|
417
|
+
assert.ok(startNano > BigInt(Number.MAX_SAFE_INTEGER), 'Timestamp should exceed MAX_SAFE_INTEGER');
|
|
418
|
+
// Verify no precision loss by checking string representation
|
|
419
|
+
// Without BigInt, the result would have trailing zeros or incorrect digits
|
|
420
|
+
const expectedNs = BigInt(new Date('2100-01-01T00:00:00Z').getTime()) * BigInt(1_000_000);
|
|
421
|
+
assert.strictEqual(startNano, expectedNs, 'Nanosecond value should match expected');
|
|
422
|
+
assert.strictEqual(eventNano, expectedNs, 'Event timestamp should match');
|
|
423
|
+
});
|
|
424
|
+
it('handles current timestamps correctly', () => {
|
|
425
|
+
const currentEval = {
|
|
426
|
+
timestamp: '2026-02-01T10:00:00Z',
|
|
427
|
+
evaluationName: 'test-current',
|
|
428
|
+
};
|
|
429
|
+
const result = evaluationsToOTLPTraces([currentEval]);
|
|
430
|
+
const span = result.resourceSpans[0].scopeSpans[0].spans[0];
|
|
431
|
+
// Current timestamps also exceed MAX_SAFE_INTEGER when converted to ns
|
|
432
|
+
const startNano = BigInt(span.startTimeUnixNano);
|
|
433
|
+
const expectedMs = new Date('2026-02-01T10:00:00Z').getTime();
|
|
434
|
+
const expectedNs = BigInt(expectedMs) * BigInt(1_000_000);
|
|
435
|
+
assert.strictEqual(startNano, expectedNs, 'Current timestamp should be precise');
|
|
436
|
+
});
|
|
437
|
+
});
|
|
438
|
+
});
|
|
439
|
+
describe('exportToLangfuse', () => {
|
|
440
|
+
const mockConfig = {
|
|
441
|
+
endpoint: 'https://cloud.langfuse.com',
|
|
442
|
+
publicKey: 'pk-test',
|
|
443
|
+
secretKey: 'sk-test',
|
|
444
|
+
batchSize: 2,
|
|
445
|
+
timeoutMs: 5000,
|
|
446
|
+
};
|
|
447
|
+
const testEvaluations = [
|
|
448
|
+
{ timestamp: '2024-01-15T10:00:00Z', evaluationName: 'eval1', scoreValue: 0.8 },
|
|
449
|
+
{ timestamp: '2024-01-15T10:00:01Z', evaluationName: 'eval2', scoreValue: 0.9 },
|
|
450
|
+
{ timestamp: '2024-01-15T10:00:02Z', evaluationName: 'eval3', scoreValue: 0.7 },
|
|
451
|
+
];
|
|
452
|
+
it('batches evaluations correctly', async () => {
|
|
453
|
+
const fetchCalls = [];
|
|
454
|
+
// Mock global fetch
|
|
455
|
+
const originalFetch = globalThis.fetch;
|
|
456
|
+
globalThis.fetch = mock.fn(async (url, opts) => {
|
|
457
|
+
fetchCalls.push({ url: url.toString(), body: opts?.body });
|
|
458
|
+
return new Response('{}', { status: HttpStatus.OK });
|
|
459
|
+
});
|
|
460
|
+
try {
|
|
461
|
+
const result = await exportToLangfuse(testEvaluations, mockConfig);
|
|
462
|
+
assert.strictEqual(result.success, true);
|
|
463
|
+
assert.strictEqual(result.evaluationsExported, 3);
|
|
464
|
+
assert.strictEqual(result.batches, 2); // 3 evals / batch size 2 = 2 batches
|
|
465
|
+
assert.strictEqual(result.failed, 0);
|
|
466
|
+
assert.ok(result.durationMs >= 0);
|
|
467
|
+
// Verify fetch was called twice (2 batches)
|
|
468
|
+
assert.strictEqual(fetchCalls.length, 2);
|
|
469
|
+
assert.ok(fetchCalls[0].url.includes('/v1/traces'));
|
|
470
|
+
}
|
|
471
|
+
finally {
|
|
472
|
+
globalThis.fetch = originalFetch;
|
|
473
|
+
}
|
|
474
|
+
});
|
|
475
|
+
it('handles HTTP errors', async () => {
|
|
476
|
+
const originalFetch = globalThis.fetch;
|
|
477
|
+
globalThis.fetch = mock.fn(async () => {
|
|
478
|
+
return new Response('Unauthorized', { status: HttpStatus.UNAUTHORIZED });
|
|
479
|
+
});
|
|
480
|
+
try {
|
|
481
|
+
const result = await exportToLangfuse([testEvaluations[0]], mockConfig);
|
|
482
|
+
assert.strictEqual(result.success, false);
|
|
483
|
+
assert.strictEqual(result.evaluationsExported, 0);
|
|
484
|
+
assert.strictEqual(result.failed, 1);
|
|
485
|
+
assert.ok(result.errors);
|
|
486
|
+
assert.ok(result.errors[0].includes('HTTP 401'));
|
|
487
|
+
}
|
|
488
|
+
finally {
|
|
489
|
+
globalThis.fetch = originalFetch;
|
|
490
|
+
}
|
|
491
|
+
});
|
|
492
|
+
it('handles network errors', async () => {
|
|
493
|
+
const originalFetch = globalThis.fetch;
|
|
494
|
+
globalThis.fetch = mock.fn(async () => {
|
|
495
|
+
throw new Error('Network error');
|
|
496
|
+
});
|
|
497
|
+
try {
|
|
498
|
+
const result = await exportToLangfuse([testEvaluations[0]], mockConfig);
|
|
499
|
+
assert.strictEqual(result.success, false);
|
|
500
|
+
assert.strictEqual(result.failed, 1);
|
|
501
|
+
assert.ok(result.errors);
|
|
502
|
+
assert.ok(result.errors[0].includes('Network error'));
|
|
503
|
+
}
|
|
504
|
+
finally {
|
|
505
|
+
globalThis.fetch = originalFetch;
|
|
506
|
+
}
|
|
507
|
+
});
|
|
508
|
+
it('handles empty evaluations array', async () => {
|
|
509
|
+
const result = await exportToLangfuse([], mockConfig);
|
|
510
|
+
assert.strictEqual(result.success, true);
|
|
511
|
+
assert.strictEqual(result.evaluationsExported, 0);
|
|
512
|
+
assert.strictEqual(result.batches, 0);
|
|
513
|
+
assert.strictEqual(result.failed, 0);
|
|
514
|
+
});
|
|
515
|
+
it('includes correct authorization header', async () => {
|
|
516
|
+
let capturedHeaders = {};
|
|
517
|
+
const originalFetch = globalThis.fetch;
|
|
518
|
+
globalThis.fetch = mock.fn(async (_url, opts) => {
|
|
519
|
+
capturedHeaders = Object.fromEntries(Object.entries(opts?.headers || {}));
|
|
520
|
+
return new Response('{}', { status: HttpStatus.OK });
|
|
521
|
+
});
|
|
522
|
+
try {
|
|
523
|
+
await exportToLangfuse([testEvaluations[0]], mockConfig);
|
|
524
|
+
assert.ok(capturedHeaders['Authorization']);
|
|
525
|
+
assert.ok(capturedHeaders['Authorization'].startsWith('Basic '));
|
|
526
|
+
assert.strictEqual(capturedHeaders['Content-Type'], 'application/json');
|
|
527
|
+
}
|
|
528
|
+
finally {
|
|
529
|
+
globalThis.fetch = originalFetch;
|
|
530
|
+
}
|
|
531
|
+
});
|
|
532
|
+
it('handles partial batch failures', async () => {
|
|
533
|
+
let callCount = 0;
|
|
534
|
+
const originalFetch = globalThis.fetch;
|
|
535
|
+
globalThis.fetch = mock.fn(async () => {
|
|
536
|
+
callCount++;
|
|
537
|
+
if (callCount === 1) {
|
|
538
|
+
return new Response('{}', { status: HttpStatus.OK });
|
|
539
|
+
}
|
|
540
|
+
return new Response('Server Error', { status: HttpStatus.INTERNAL_SERVER_ERROR });
|
|
541
|
+
});
|
|
542
|
+
try {
|
|
543
|
+
const result = await exportToLangfuse(testEvaluations, {
|
|
544
|
+
...mockConfig,
|
|
545
|
+
batchSize: 2,
|
|
546
|
+
});
|
|
547
|
+
// First batch of 2 succeeds, second batch of 1 fails
|
|
548
|
+
assert.strictEqual(result.success, false);
|
|
549
|
+
assert.strictEqual(result.evaluationsExported, 2);
|
|
550
|
+
assert.strictEqual(result.failed, 1);
|
|
551
|
+
assert.strictEqual(result.batches, 2);
|
|
552
|
+
}
|
|
553
|
+
finally {
|
|
554
|
+
globalThis.fetch = originalFetch;
|
|
555
|
+
}
|
|
556
|
+
});
|
|
557
|
+
// C2 FIX: Credential sanitization tests
|
|
558
|
+
describe('credential sanitization in error responses', () => {
|
|
559
|
+
it('sanitizes Basic auth tokens in error text', async () => {
|
|
560
|
+
const originalFetch = globalThis.fetch;
|
|
561
|
+
const originalConsoleError = console.error;
|
|
562
|
+
let loggedMessage = '';
|
|
563
|
+
console.error = (msg) => { loggedMessage = msg; };
|
|
564
|
+
globalThis.fetch = mock.fn(async () => {
|
|
565
|
+
return new Response('Invalid credentials: Basic cGstdGVzdDpzay10ZXN0', { status: HttpStatus.UNAUTHORIZED });
|
|
566
|
+
});
|
|
567
|
+
try {
|
|
568
|
+
await exportToLangfuse([testEvaluations[0]], mockConfig);
|
|
569
|
+
assert.ok(!loggedMessage.includes('cGstdGVzdDpzay10ZXN0'), 'Base64 credentials should be redacted');
|
|
570
|
+
assert.ok(loggedMessage.includes('[REDACTED]'), 'Should contain [REDACTED]');
|
|
571
|
+
}
|
|
572
|
+
finally {
|
|
573
|
+
globalThis.fetch = originalFetch;
|
|
574
|
+
console.error = originalConsoleError;
|
|
575
|
+
}
|
|
576
|
+
});
|
|
577
|
+
it('sanitizes pk-xxx:sk-xxx patterns in error text', async () => {
|
|
578
|
+
const originalFetch = globalThis.fetch;
|
|
579
|
+
const originalConsoleError = console.error;
|
|
580
|
+
let loggedMessage = '';
|
|
581
|
+
console.error = (msg) => { loggedMessage = msg; };
|
|
582
|
+
globalThis.fetch = mock.fn(async () => {
|
|
583
|
+
return new Response('Bad key: pk-mykey123:sk-mysecret456', { status: HttpStatus.UNAUTHORIZED });
|
|
584
|
+
});
|
|
585
|
+
try {
|
|
586
|
+
await exportToLangfuse([testEvaluations[0]], mockConfig);
|
|
587
|
+
assert.ok(!loggedMessage.includes('pk-mykey123'), 'Public key should be redacted');
|
|
588
|
+
assert.ok(!loggedMessage.includes('sk-mysecret456'), 'Secret key should be redacted');
|
|
589
|
+
assert.ok(loggedMessage.includes('[REDACTED]'), 'Should contain [REDACTED]');
|
|
590
|
+
}
|
|
591
|
+
finally {
|
|
592
|
+
globalThis.fetch = originalFetch;
|
|
593
|
+
console.error = originalConsoleError;
|
|
594
|
+
}
|
|
595
|
+
});
|
|
596
|
+
it('sanitizes Bearer tokens in error text', async () => {
|
|
597
|
+
const originalFetch = globalThis.fetch;
|
|
598
|
+
const originalConsoleError = console.error;
|
|
599
|
+
let loggedMessage = '';
|
|
600
|
+
console.error = (msg) => { loggedMessage = msg; };
|
|
601
|
+
globalThis.fetch = mock.fn(async () => {
|
|
602
|
+
return new Response('Invalid Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9', { status: HttpStatus.UNAUTHORIZED });
|
|
603
|
+
});
|
|
604
|
+
try {
|
|
605
|
+
await exportToLangfuse([testEvaluations[0]], mockConfig);
|
|
606
|
+
assert.ok(!loggedMessage.includes('eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9'), 'JWT should be redacted');
|
|
607
|
+
assert.ok(loggedMessage.includes('[REDACTED]'), 'Should contain [REDACTED]');
|
|
608
|
+
}
|
|
609
|
+
finally {
|
|
610
|
+
globalThis.fetch = originalFetch;
|
|
611
|
+
console.error = originalConsoleError;
|
|
612
|
+
}
|
|
613
|
+
});
|
|
614
|
+
});
|
|
615
|
+
// BACKLOG: Concurrent exports test
|
|
616
|
+
describe('concurrent exports', () => {
|
|
617
|
+
it('handles multiple simultaneous exports without interference', async () => {
|
|
618
|
+
const fetchCalls = [];
|
|
619
|
+
let batchCounter = 0;
|
|
620
|
+
const originalFetch = globalThis.fetch;
|
|
621
|
+
globalThis.fetch = mock.fn(async () => {
|
|
622
|
+
const batch = ++batchCounter;
|
|
623
|
+
fetchCalls.push({ batch, timestamp: Date.now() });
|
|
624
|
+
// Simulate network latency
|
|
625
|
+
await new Promise(resolve => setTimeout(resolve, 10));
|
|
626
|
+
return new Response('{}', { status: HttpStatus.OK });
|
|
627
|
+
});
|
|
628
|
+
try {
|
|
629
|
+
// Launch 3 concurrent exports
|
|
630
|
+
const [result1, result2, result3] = await Promise.all([
|
|
631
|
+
exportToLangfuse([testEvaluations[0]], { ...mockConfig, batchSize: 1 }),
|
|
632
|
+
exportToLangfuse([testEvaluations[1]], { ...mockConfig, batchSize: 1 }),
|
|
633
|
+
exportToLangfuse([testEvaluations[2]], { ...mockConfig, batchSize: 1 }),
|
|
634
|
+
]);
|
|
635
|
+
// All exports should succeed
|
|
636
|
+
assert.strictEqual(result1.success, true);
|
|
637
|
+
assert.strictEqual(result2.success, true);
|
|
638
|
+
assert.strictEqual(result3.success, true);
|
|
639
|
+
// Total should be 3 fetches
|
|
640
|
+
assert.strictEqual(fetchCalls.length, 3);
|
|
641
|
+
// Combined exports should total 3 evaluations
|
|
642
|
+
const totalExported = result1.evaluationsExported + result2.evaluationsExported + result3.evaluationsExported;
|
|
643
|
+
assert.strictEqual(totalExported, 3);
|
|
644
|
+
}
|
|
645
|
+
finally {
|
|
646
|
+
globalThis.fetch = originalFetch;
|
|
647
|
+
}
|
|
648
|
+
});
|
|
649
|
+
it('prevents cascade failures in concurrent exports', async () => {
|
|
650
|
+
const originalFetch = globalThis.fetch;
|
|
651
|
+
globalThis.fetch = mock.fn(async (_url, options) => {
|
|
652
|
+
// Parse the body to identify which export this is by evaluation name
|
|
653
|
+
const body = options?.body;
|
|
654
|
+
const isSecondExport = body?.includes('eval2');
|
|
655
|
+
// Second export fails with 400 (client error, not retried)
|
|
656
|
+
if (isSecondExport) {
|
|
657
|
+
return new Response('Bad Request', { status: HttpStatus.BAD_REQUEST });
|
|
658
|
+
}
|
|
659
|
+
return new Response('{}', { status: HttpStatus.OK });
|
|
660
|
+
});
|
|
661
|
+
try {
|
|
662
|
+
const [result1, result2, result3] = await Promise.all([
|
|
663
|
+
exportToLangfuse([testEvaluations[0]], { ...mockConfig, batchSize: 1, timeoutMs: 1000 }),
|
|
664
|
+
exportToLangfuse([testEvaluations[1]], { ...mockConfig, batchSize: 1, timeoutMs: 1000 }),
|
|
665
|
+
exportToLangfuse([testEvaluations[2]], { ...mockConfig, batchSize: 1, timeoutMs: 1000 }),
|
|
666
|
+
]);
|
|
667
|
+
// First and third exports should succeed, second should fail
|
|
668
|
+
// This verifies failures don't cascade to other concurrent exports
|
|
669
|
+
assert.ok(result1.success, 'First export should succeed');
|
|
670
|
+
assert.ok(!result2.success, 'Second export should fail (400 client error)');
|
|
671
|
+
assert.ok(result3.success, 'Third export should succeed');
|
|
672
|
+
}
|
|
673
|
+
finally {
|
|
674
|
+
globalThis.fetch = originalFetch;
|
|
675
|
+
}
|
|
676
|
+
});
|
|
677
|
+
});
|
|
678
|
+
// BACKLOG: Memory threshold behavior test
|
|
679
|
+
describe('memory protection', () => {
|
|
680
|
+
it('aborts export when memory exceeds threshold', async () => {
|
|
681
|
+
// Mock process.memoryUsage to return high heap usage
|
|
682
|
+
const originalMemoryUsage = process.memoryUsage;
|
|
683
|
+
let memoryCallCount = 0;
|
|
684
|
+
process.memoryUsage = (() => {
|
|
685
|
+
memoryCallCount++;
|
|
686
|
+
// After first batch, simulate high memory (700MB)
|
|
687
|
+
const heapUsed = memoryCallCount > 1 ? 700 * 1024 * 1024 : 100 * 1024 * 1024;
|
|
688
|
+
return {
|
|
689
|
+
heapUsed,
|
|
690
|
+
heapTotal: 1024 * 1024 * 1024,
|
|
691
|
+
external: 0,
|
|
692
|
+
arrayBuffers: 0,
|
|
693
|
+
rss: 0,
|
|
694
|
+
};
|
|
695
|
+
});
|
|
696
|
+
const originalFetch = globalThis.fetch;
|
|
697
|
+
globalThis.fetch = mock.fn(async () => {
|
|
698
|
+
return new Response('{}', { status: HttpStatus.OK });
|
|
699
|
+
});
|
|
700
|
+
try {
|
|
701
|
+
const manyEvaluations = Array.from({ length: 100 }, (_, i) => ({
|
|
702
|
+
timestamp: '2024-01-15T10:00:00Z',
|
|
703
|
+
evaluationName: `eval-${i}`,
|
|
704
|
+
scoreValue: 0.5,
|
|
705
|
+
}));
|
|
706
|
+
const result = await exportToLangfuse(manyEvaluations, {
|
|
707
|
+
...mockConfig,
|
|
708
|
+
batchSize: 10, // 10 batches needed
|
|
709
|
+
});
|
|
710
|
+
// Should abort partway through
|
|
711
|
+
assert.strictEqual(result.success, false);
|
|
712
|
+
assert.ok(result.failed > 0, 'Should have some failed evaluations');
|
|
713
|
+
assert.ok(result.errors?.some(e => e.includes('Memory limit exceeded')));
|
|
714
|
+
// Should not export all 100
|
|
715
|
+
assert.ok(result.evaluationsExported < 100, 'Should abort before exporting all');
|
|
716
|
+
}
|
|
717
|
+
finally {
|
|
718
|
+
process.memoryUsage = originalMemoryUsage;
|
|
719
|
+
globalThis.fetch = originalFetch;
|
|
720
|
+
}
|
|
721
|
+
});
|
|
722
|
+
it('logs warning when memory is high but continues', async () => {
|
|
723
|
+
const warnings = [];
|
|
724
|
+
const originalWarn = console.warn;
|
|
725
|
+
console.warn = (msg) => { warnings.push(msg); };
|
|
726
|
+
// Mock process.memoryUsage to return moderately high usage (450MB)
|
|
727
|
+
const originalMemoryUsage = process.memoryUsage;
|
|
728
|
+
process.memoryUsage = (() => ({
|
|
729
|
+
heapUsed: 450 * 1024 * 1024, // Above 400MB warning threshold
|
|
730
|
+
heapTotal: 1024 * 1024 * 1024,
|
|
731
|
+
external: 0,
|
|
732
|
+
arrayBuffers: 0,
|
|
733
|
+
rss: 0,
|
|
734
|
+
}));
|
|
735
|
+
const originalFetch = globalThis.fetch;
|
|
736
|
+
globalThis.fetch = mock.fn(async () => {
|
|
737
|
+
return new Response('{}', { status: HttpStatus.OK });
|
|
738
|
+
});
|
|
739
|
+
try {
|
|
740
|
+
const evaluations = Array.from({ length: 10 }, (_, i) => ({
|
|
741
|
+
timestamp: '2024-01-15T10:00:00Z',
|
|
742
|
+
evaluationName: `eval-${i}`,
|
|
743
|
+
scoreValue: 0.5,
|
|
744
|
+
}));
|
|
745
|
+
const result = await exportToLangfuse(evaluations, {
|
|
746
|
+
...mockConfig,
|
|
747
|
+
batchSize: 10,
|
|
748
|
+
});
|
|
749
|
+
// Should succeed despite high memory (below abort threshold)
|
|
750
|
+
assert.strictEqual(result.success, true);
|
|
751
|
+
assert.strictEqual(result.evaluationsExported, 10);
|
|
752
|
+
// Should have logged a memory warning
|
|
753
|
+
assert.ok(warnings.some(w => w.includes('High memory usage')), 'Should warn about high memory usage');
|
|
754
|
+
}
|
|
755
|
+
finally {
|
|
756
|
+
console.warn = originalWarn;
|
|
757
|
+
process.memoryUsage = originalMemoryUsage;
|
|
758
|
+
globalThis.fetch = originalFetch;
|
|
759
|
+
}
|
|
760
|
+
});
|
|
761
|
+
});
|
|
762
|
+
// C1 FIX: DNS rebinding protection tests
|
|
763
|
+
describe('DNS rebinding protection', () => {
|
|
764
|
+
it('re-validates endpoint before each batch', async () => {
|
|
765
|
+
let fetchCallCount = 0;
|
|
766
|
+
const originalFetch = globalThis.fetch;
|
|
767
|
+
globalThis.fetch = mock.fn(async () => {
|
|
768
|
+
fetchCallCount++;
|
|
769
|
+
return new Response('{}', { status: HttpStatus.OK });
|
|
770
|
+
});
|
|
771
|
+
try {
|
|
772
|
+
// With batch size 1 and 3 evaluations, should call fetch 3 times
|
|
773
|
+
// Each call should re-validate the URL
|
|
774
|
+
const result = await exportToLangfuse(testEvaluations, {
|
|
775
|
+
...mockConfig,
|
|
776
|
+
batchSize: 1,
|
|
777
|
+
});
|
|
778
|
+
assert.strictEqual(result.success, true);
|
|
779
|
+
assert.strictEqual(result.batches, 3);
|
|
780
|
+
assert.strictEqual(fetchCallCount, 3);
|
|
781
|
+
}
|
|
782
|
+
finally {
|
|
783
|
+
globalThis.fetch = originalFetch;
|
|
784
|
+
}
|
|
785
|
+
});
|
|
786
|
+
it('fails batch if endpoint becomes invalid mid-export', async () => {
|
|
787
|
+
// This tests the DNS rebinding protection: if the endpoint
|
|
788
|
+
// was modified to point to a blocked address between batches,
|
|
789
|
+
// subsequent batches should fail validation
|
|
790
|
+
let fetchCallCount = 0;
|
|
791
|
+
const originalFetch = globalThis.fetch;
|
|
792
|
+
globalThis.fetch = mock.fn(async () => {
|
|
793
|
+
fetchCallCount++;
|
|
794
|
+
return new Response('{}', { status: HttpStatus.OK });
|
|
795
|
+
});
|
|
796
|
+
try {
|
|
797
|
+
// Use an invalid endpoint that will fail re-validation
|
|
798
|
+
const result = await exportToLangfuse(testEvaluations, {
|
|
799
|
+
...mockConfig,
|
|
800
|
+
endpoint: 'http://localhost', // HTTP not allowed, will fail validation
|
|
801
|
+
batchSize: 1,
|
|
802
|
+
});
|
|
803
|
+
// All batches should fail due to invalid endpoint
|
|
804
|
+
assert.strictEqual(result.success, false);
|
|
805
|
+
assert.strictEqual(result.failed, 3);
|
|
806
|
+
assert.strictEqual(fetchCallCount, 0); // No fetch calls made
|
|
807
|
+
assert.ok(result.errors?.some(e => e.includes('Endpoint validation failed')));
|
|
808
|
+
}
|
|
809
|
+
finally {
|
|
810
|
+
globalThis.fetch = originalFetch;
|
|
811
|
+
}
|
|
812
|
+
});
|
|
813
|
+
it('continues processing valid batches after endpoint re-validation', async () => {
|
|
814
|
+
let fetchCallCount = 0;
|
|
815
|
+
const originalFetch = globalThis.fetch;
|
|
816
|
+
globalThis.fetch = mock.fn(async () => {
|
|
817
|
+
fetchCallCount++;
|
|
818
|
+
return new Response('{}', { status: HttpStatus.OK });
|
|
819
|
+
});
|
|
820
|
+
try {
|
|
821
|
+
const result = await exportToLangfuse(testEvaluations, {
|
|
822
|
+
...mockConfig,
|
|
823
|
+
batchSize: 2,
|
|
824
|
+
});
|
|
825
|
+
assert.strictEqual(result.success, true);
|
|
826
|
+
assert.strictEqual(result.batches, 2); // 3 evals / batch 2 = 2 batches
|
|
827
|
+
assert.strictEqual(fetchCallCount, 2);
|
|
828
|
+
}
|
|
829
|
+
finally {
|
|
830
|
+
globalThis.fetch = originalFetch;
|
|
831
|
+
}
|
|
832
|
+
});
|
|
833
|
+
});
|
|
834
|
+
// H1 FIX: Retry logic tests
|
|
835
|
+
describe('retry logic for transient failures', () => {
|
|
836
|
+
it('retries on 429 rate limit response', async () => {
|
|
837
|
+
let fetchCallCount = 0;
|
|
838
|
+
const originalFetch = globalThis.fetch;
|
|
839
|
+
globalThis.fetch = mock.fn(async () => {
|
|
840
|
+
fetchCallCount++;
|
|
841
|
+
// First 2 calls return 429, third succeeds
|
|
842
|
+
if (fetchCallCount <= 2) {
|
|
843
|
+
return new Response('Rate limited', { status: HttpStatus.TOO_MANY_REQUESTS });
|
|
844
|
+
}
|
|
845
|
+
return new Response('{}', { status: HttpStatus.OK });
|
|
846
|
+
});
|
|
847
|
+
try {
|
|
848
|
+
const result = await exportToLangfuse([testEvaluations[0]], {
|
|
849
|
+
...mockConfig,
|
|
850
|
+
timeoutMs: 100, // Short timeout for faster test
|
|
851
|
+
});
|
|
852
|
+
// Should succeed after retries
|
|
853
|
+
assert.strictEqual(result.success, true);
|
|
854
|
+
assert.strictEqual(result.evaluationsExported, 1);
|
|
855
|
+
// Retried 2 times + 1 successful = 3 calls
|
|
856
|
+
assert.strictEqual(fetchCallCount, 3);
|
|
857
|
+
}
|
|
858
|
+
finally {
|
|
859
|
+
globalThis.fetch = originalFetch;
|
|
860
|
+
}
|
|
861
|
+
});
|
|
862
|
+
it('retries on 500 server error', async () => {
|
|
863
|
+
let fetchCallCount = 0;
|
|
864
|
+
const originalFetch = globalThis.fetch;
|
|
865
|
+
globalThis.fetch = mock.fn(async () => {
|
|
866
|
+
fetchCallCount++;
|
|
867
|
+
// First call returns 500, second succeeds
|
|
868
|
+
if (fetchCallCount === 1) {
|
|
869
|
+
return new Response('Internal Server Error', { status: HttpStatus.INTERNAL_SERVER_ERROR });
|
|
870
|
+
}
|
|
871
|
+
return new Response('{}', { status: HttpStatus.OK });
|
|
872
|
+
});
|
|
873
|
+
try {
|
|
874
|
+
const result = await exportToLangfuse([testEvaluations[0]], {
|
|
875
|
+
...mockConfig,
|
|
876
|
+
timeoutMs: 100,
|
|
877
|
+
});
|
|
878
|
+
assert.strictEqual(result.success, true);
|
|
879
|
+
assert.strictEqual(result.evaluationsExported, 1);
|
|
880
|
+
assert.strictEqual(fetchCallCount, 2);
|
|
881
|
+
}
|
|
882
|
+
finally {
|
|
883
|
+
globalThis.fetch = originalFetch;
|
|
884
|
+
}
|
|
885
|
+
});
|
|
886
|
+
it('retries on 503 service unavailable', async () => {
|
|
887
|
+
let fetchCallCount = 0;
|
|
888
|
+
const originalFetch = globalThis.fetch;
|
|
889
|
+
globalThis.fetch = mock.fn(async () => {
|
|
890
|
+
fetchCallCount++;
|
|
891
|
+
if (fetchCallCount === 1) {
|
|
892
|
+
return new Response('Service Unavailable', { status: HttpStatus.SERVICE_UNAVAILABLE });
|
|
893
|
+
}
|
|
894
|
+
return new Response('{}', { status: HttpStatus.OK });
|
|
895
|
+
});
|
|
896
|
+
try {
|
|
897
|
+
const result = await exportToLangfuse([testEvaluations[0]], {
|
|
898
|
+
...mockConfig,
|
|
899
|
+
timeoutMs: 100,
|
|
900
|
+
});
|
|
901
|
+
assert.strictEqual(result.success, true);
|
|
902
|
+
assert.strictEqual(fetchCallCount, 2);
|
|
903
|
+
}
|
|
904
|
+
finally {
|
|
905
|
+
globalThis.fetch = originalFetch;
|
|
906
|
+
}
|
|
907
|
+
});
|
|
908
|
+
it('does not retry on 400 bad request', async () => {
|
|
909
|
+
let fetchCallCount = 0;
|
|
910
|
+
const originalFetch = globalThis.fetch;
|
|
911
|
+
globalThis.fetch = mock.fn(async () => {
|
|
912
|
+
fetchCallCount++;
|
|
913
|
+
return new Response('Bad Request', { status: HttpStatus.BAD_REQUEST });
|
|
914
|
+
});
|
|
915
|
+
try {
|
|
916
|
+
const result = await exportToLangfuse([testEvaluations[0]], {
|
|
917
|
+
...mockConfig,
|
|
918
|
+
timeoutMs: 100,
|
|
919
|
+
});
|
|
920
|
+
// Should fail without retry on 4xx (except 429)
|
|
921
|
+
assert.strictEqual(result.success, false);
|
|
922
|
+
assert.strictEqual(result.failed, 1);
|
|
923
|
+
assert.strictEqual(fetchCallCount, 1);
|
|
924
|
+
}
|
|
925
|
+
finally {
|
|
926
|
+
globalThis.fetch = originalFetch;
|
|
927
|
+
}
|
|
928
|
+
});
|
|
929
|
+
it('gives up after max retries on persistent 5xx', async () => {
|
|
930
|
+
let fetchCallCount = 0;
|
|
931
|
+
const originalFetch = globalThis.fetch;
|
|
932
|
+
globalThis.fetch = mock.fn(async () => {
|
|
933
|
+
fetchCallCount++;
|
|
934
|
+
return new Response('Server Error', { status: HttpStatus.INTERNAL_SERVER_ERROR });
|
|
935
|
+
});
|
|
936
|
+
try {
|
|
937
|
+
const result = await exportToLangfuse([testEvaluations[0]], {
|
|
938
|
+
...mockConfig,
|
|
939
|
+
timeoutMs: 100,
|
|
940
|
+
});
|
|
941
|
+
// Should fail after 4 attempts (initial + 3 retries)
|
|
942
|
+
assert.strictEqual(result.success, false);
|
|
943
|
+
assert.strictEqual(result.failed, 1);
|
|
944
|
+
assert.strictEqual(fetchCallCount, 4); // 1 initial + 3 retries
|
|
945
|
+
assert.ok(result.errors?.[0].includes('HTTP 500'));
|
|
946
|
+
}
|
|
947
|
+
finally {
|
|
948
|
+
globalThis.fetch = originalFetch;
|
|
949
|
+
}
|
|
950
|
+
});
|
|
951
|
+
// CRITICAL FIX: Test that network errors are retried (not just AbortError)
|
|
952
|
+
it('retries on network errors (DNS failure, connection reset)', async () => {
|
|
953
|
+
let fetchCallCount = 0;
|
|
954
|
+
const originalFetch = globalThis.fetch;
|
|
955
|
+
globalThis.fetch = mock.fn(async () => {
|
|
956
|
+
fetchCallCount++;
|
|
957
|
+
// First 2 calls throw network error, third succeeds
|
|
958
|
+
if (fetchCallCount <= 2) {
|
|
959
|
+
const error = new TypeError('fetch failed');
|
|
960
|
+
error.cause = new Error('getaddrinfo ENOTFOUND invalid-host.example.com');
|
|
961
|
+
throw error;
|
|
962
|
+
}
|
|
963
|
+
return new Response('{}', { status: HttpStatus.OK });
|
|
964
|
+
});
|
|
965
|
+
try {
|
|
966
|
+
const result = await exportToLangfuse([testEvaluations[0]], {
|
|
967
|
+
...mockConfig,
|
|
968
|
+
timeoutMs: 100,
|
|
969
|
+
});
|
|
970
|
+
// Should succeed after retrying network errors
|
|
971
|
+
assert.strictEqual(result.success, true);
|
|
972
|
+
assert.strictEqual(result.evaluationsExported, 1);
|
|
973
|
+
// Retried 2 times + 1 successful = 3 calls
|
|
974
|
+
assert.strictEqual(fetchCallCount, 3);
|
|
975
|
+
}
|
|
976
|
+
finally {
|
|
977
|
+
globalThis.fetch = originalFetch;
|
|
978
|
+
}
|
|
979
|
+
});
|
|
980
|
+
it('gives up after max retries on persistent network errors', async () => {
|
|
981
|
+
let fetchCallCount = 0;
|
|
982
|
+
const originalFetch = globalThis.fetch;
|
|
983
|
+
globalThis.fetch = mock.fn(async () => {
|
|
984
|
+
fetchCallCount++;
|
|
985
|
+
const error = new TypeError('fetch failed');
|
|
986
|
+
error.cause = new Error('ECONNREFUSED');
|
|
987
|
+
throw error;
|
|
988
|
+
});
|
|
989
|
+
try {
|
|
990
|
+
const result = await exportToLangfuse([testEvaluations[0]], {
|
|
991
|
+
...mockConfig,
|
|
992
|
+
timeoutMs: 100,
|
|
993
|
+
});
|
|
994
|
+
// Should fail after 4 attempts (initial + 3 retries)
|
|
995
|
+
assert.strictEqual(result.success, false);
|
|
996
|
+
assert.strictEqual(result.failed, 1);
|
|
997
|
+
assert.strictEqual(fetchCallCount, 4); // 1 initial + 3 retries
|
|
998
|
+
assert.ok(result.errors?.[0].includes('retries exhausted'));
|
|
999
|
+
}
|
|
1000
|
+
finally {
|
|
1001
|
+
globalThis.fetch = originalFetch;
|
|
1002
|
+
}
|
|
1003
|
+
});
|
|
1004
|
+
});
|
|
1005
|
+
});
|
|
1006
|
+
});
|
|
1007
|
+
//# sourceMappingURL=langfuse-export.test.js.map
|