langwatch 0.1.7 → 0.3.0-prerelease.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.editorconfig +16 -0
- package/LICENSE +7 -0
- package/README.md +268 -1
- package/copy-types.sh +19 -8
- package/examples/langchain/.env.example +2 -0
- package/examples/langchain/README.md +42 -0
- package/examples/langchain/package-lock.json +2930 -0
- package/examples/langchain/package.json +27 -0
- package/examples/langchain/src/cli-markdown.d.ts +137 -0
- package/examples/langchain/src/index.ts +109 -0
- package/examples/langchain/tsconfig.json +25 -0
- package/examples/langgraph/.env.example +2 -0
- package/examples/langgraph/README.md +42 -0
- package/examples/langgraph/package-lock.json +3031 -0
- package/examples/langgraph/package.json +28 -0
- package/examples/langgraph/src/cli-markdown.d.ts +137 -0
- package/examples/langgraph/src/index.ts +196 -0
- package/examples/langgraph/tsconfig.json +25 -0
- package/examples/mastra/.env.example +2 -0
- package/examples/mastra/README.md +57 -0
- package/examples/mastra/package-lock.json +5296 -0
- package/examples/mastra/package.json +32 -0
- package/examples/mastra/src/cli-markdown.d.ts +137 -0
- package/examples/mastra/src/index.ts +120 -0
- package/examples/mastra/src/mastra/agents/weather-agent.ts +30 -0
- package/examples/mastra/src/mastra/index.ts +21 -0
- package/examples/mastra/src/mastra/tools/weather-tool.ts +102 -0
- package/examples/mastra/tsconfig.json +25 -0
- package/examples/vercel-ai/.env.example +2 -0
- package/examples/vercel-ai/README.md +38 -0
- package/examples/vercel-ai/package-lock.json +2571 -0
- package/examples/vercel-ai/package.json +27 -0
- package/examples/vercel-ai/src/cli-markdown.d.ts +137 -0
- package/examples/vercel-ai/src/index.ts +110 -0
- package/examples/vercel-ai/src/instrumentation.ts +9 -0
- package/examples/vercel-ai/tsconfig.json +25 -0
- package/package.json +80 -33
- package/src/__tests__/client-browser.test.ts +92 -0
- package/src/__tests__/client-node.test.ts +76 -0
- package/src/__tests__/client.test.ts +71 -0
- package/src/__tests__/integration/client-browser.test.ts +46 -0
- package/src/__tests__/integration/client-node.test.ts +46 -0
- package/src/client-browser.ts +70 -0
- package/src/client-node.ts +82 -0
- package/src/client-shared.ts +72 -0
- package/src/client.ts +119 -0
- package/src/evaluation/__tests__/record-evaluation.test.ts +112 -0
- package/src/evaluation/__tests__/run-evaluation.test.ts +171 -0
- package/src/evaluation/index.ts +2 -0
- package/src/evaluation/record-evaluation.ts +101 -0
- package/src/evaluation/run-evaluation.ts +133 -0
- package/src/evaluation/tracer.ts +3 -0
- package/src/evaluation/types.ts +23 -0
- package/src/index.ts +10 -591
- package/src/internal/api/__tests__/errors.test.ts +98 -0
- package/src/internal/api/client.ts +30 -0
- package/src/internal/api/errors.ts +32 -0
- package/src/internal/generated/types/.gitkeep +0 -0
- package/src/observability/__tests__/integration/base.test.ts +74 -0
- package/src/observability/__tests__/integration/browser-setup-ordering.test.ts +60 -0
- package/src/observability/__tests__/integration/complex-nested-spans.test.ts +29 -0
- package/src/observability/__tests__/integration/error-handling.test.ts +24 -0
- package/src/observability/__tests__/integration/langwatch-disabled-otel.test.ts +24 -0
- package/src/observability/__tests__/integration/langwatch-first-then-vercel.test.ts +24 -0
- package/src/observability/__tests__/integration/multiple-setup-attempts.test.ts +27 -0
- package/src/observability/__tests__/integration/otel-ordering.test.ts +27 -0
- package/src/observability/__tests__/integration/vercel-configurations.test.ts +20 -0
- package/src/observability/__tests__/integration/vercel-first-then-langwatch.test.ts +27 -0
- package/src/observability/__tests__/span.test.ts +214 -0
- package/src/observability/__tests__/trace.test.ts +180 -0
- package/src/observability/exporters/index.ts +1 -0
- package/src/observability/exporters/langwatch-exporter.ts +53 -0
- package/src/observability/index.ts +4 -0
- package/src/observability/instrumentation/langchain/__tests__/integration/langchain-chatbot.test.ts +112 -0
- package/src/observability/instrumentation/langchain/__tests__/langchain.test.ts +284 -0
- package/src/observability/instrumentation/langchain/index.ts +624 -0
- package/src/observability/processors/__tests__/filterable-batch-span-exporter.test.ts +98 -0
- package/src/observability/processors/filterable-batch-span-processor.ts +99 -0
- package/src/observability/processors/index.ts +1 -0
- package/src/observability/semconv/attributes.ts +185 -0
- package/src/observability/semconv/events.ts +42 -0
- package/src/observability/semconv/index.ts +16 -0
- package/src/observability/semconv/values.ts +159 -0
- package/src/observability/span.ts +728 -0
- package/src/observability/trace.ts +301 -0
- package/src/prompt/__tests__/prompt.test.ts +139 -0
- package/src/prompt/get-prompt-version.ts +49 -0
- package/src/prompt/get-prompt.ts +44 -0
- package/src/prompt/index.ts +3 -0
- package/src/prompt/prompt.ts +133 -0
- package/src/prompt/service.ts +221 -0
- package/src/prompt/tracer.ts +3 -0
- package/src/prompt/types.ts +0 -0
- package/ts-to-zod.config.js +11 -0
- package/tsconfig.json +3 -9
- package/tsup.config.ts +11 -1
- package/vitest.config.ts +1 -0
- package/dist/chunk-FWBCQQYZ.mjs +0 -711
- package/dist/chunk-FWBCQQYZ.mjs.map +0 -1
- package/dist/index.d.mts +0 -1010
- package/dist/index.d.ts +0 -1010
- package/dist/index.js +0 -27294
- package/dist/index.js.map +0 -1
- package/dist/index.mjs +0 -959
- package/dist/index.mjs.map +0 -1
- package/dist/utils-B0pgWcps.d.mts +0 -303
- package/dist/utils-B0pgWcps.d.ts +0 -303
- package/dist/utils.d.mts +0 -2
- package/dist/utils.d.ts +0 -2
- package/dist/utils.js +0 -703
- package/dist/utils.js.map +0 -1
- package/dist/utils.mjs +0 -11
- package/dist/utils.mjs.map +0 -1
- package/example/.env.example +0 -12
- package/example/.eslintrc.json +0 -26
- package/example/LICENSE +0 -13
- package/example/README.md +0 -12
- package/example/app/(chat)/chat/[id]/page.tsx +0 -60
- package/example/app/(chat)/layout.tsx +0 -14
- package/example/app/(chat)/page.tsx +0 -27
- package/example/app/actions.ts +0 -156
- package/example/app/globals.css +0 -76
- package/example/app/guardrails/page.tsx +0 -26
- package/example/app/langchain/page.tsx +0 -27
- package/example/app/langchain-rag/page.tsx +0 -28
- package/example/app/late-update/page.tsx +0 -27
- package/example/app/layout.tsx +0 -64
- package/example/app/login/actions.ts +0 -71
- package/example/app/login/page.tsx +0 -18
- package/example/app/manual/page.tsx +0 -27
- package/example/app/new/page.tsx +0 -5
- package/example/app/opengraph-image.png +0 -0
- package/example/app/share/[id]/page.tsx +0 -58
- package/example/app/signup/actions.ts +0 -111
- package/example/app/signup/page.tsx +0 -18
- package/example/app/twitter-image.png +0 -0
- package/example/auth.config.ts +0 -42
- package/example/auth.ts +0 -45
- package/example/components/button-scroll-to-bottom.tsx +0 -36
- package/example/components/chat-history.tsx +0 -49
- package/example/components/chat-list.tsx +0 -52
- package/example/components/chat-message-actions.tsx +0 -40
- package/example/components/chat-message.tsx +0 -80
- package/example/components/chat-panel.tsx +0 -139
- package/example/components/chat-share-dialog.tsx +0 -95
- package/example/components/chat.tsx +0 -84
- package/example/components/clear-history.tsx +0 -75
- package/example/components/empty-screen.tsx +0 -38
- package/example/components/external-link.tsx +0 -29
- package/example/components/footer.tsx +0 -19
- package/example/components/header.tsx +0 -114
- package/example/components/login-button.tsx +0 -42
- package/example/components/login-form.tsx +0 -97
- package/example/components/markdown.tsx +0 -9
- package/example/components/prompt-form.tsx +0 -115
- package/example/components/providers.tsx +0 -17
- package/example/components/sidebar-actions.tsx +0 -125
- package/example/components/sidebar-desktop.tsx +0 -19
- package/example/components/sidebar-footer.tsx +0 -16
- package/example/components/sidebar-item.tsx +0 -124
- package/example/components/sidebar-items.tsx +0 -42
- package/example/components/sidebar-list.tsx +0 -38
- package/example/components/sidebar-mobile.tsx +0 -31
- package/example/components/sidebar-toggle.tsx +0 -24
- package/example/components/sidebar.tsx +0 -21
- package/example/components/signup-form.tsx +0 -95
- package/example/components/stocks/events-skeleton.tsx +0 -31
- package/example/components/stocks/events.tsx +0 -30
- package/example/components/stocks/index.tsx +0 -36
- package/example/components/stocks/message.tsx +0 -134
- package/example/components/stocks/spinner.tsx +0 -16
- package/example/components/stocks/stock-purchase.tsx +0 -146
- package/example/components/stocks/stock-skeleton.tsx +0 -22
- package/example/components/stocks/stock.tsx +0 -210
- package/example/components/stocks/stocks-skeleton.tsx +0 -9
- package/example/components/stocks/stocks.tsx +0 -67
- package/example/components/tailwind-indicator.tsx +0 -14
- package/example/components/theme-toggle.tsx +0 -31
- package/example/components/ui/alert-dialog.tsx +0 -141
- package/example/components/ui/badge.tsx +0 -36
- package/example/components/ui/button.tsx +0 -57
- package/example/components/ui/codeblock.tsx +0 -148
- package/example/components/ui/dialog.tsx +0 -122
- package/example/components/ui/dropdown-menu.tsx +0 -205
- package/example/components/ui/icons.tsx +0 -507
- package/example/components/ui/input.tsx +0 -25
- package/example/components/ui/label.tsx +0 -26
- package/example/components/ui/select.tsx +0 -164
- package/example/components/ui/separator.tsx +0 -31
- package/example/components/ui/sheet.tsx +0 -140
- package/example/components/ui/sonner.tsx +0 -31
- package/example/components/ui/switch.tsx +0 -29
- package/example/components/ui/textarea.tsx +0 -24
- package/example/components/ui/tooltip.tsx +0 -30
- package/example/components/user-menu.tsx +0 -53
- package/example/components.json +0 -17
- package/example/instrumentation.ts +0 -11
- package/example/lib/chat/guardrails.tsx +0 -181
- package/example/lib/chat/langchain-rag.tsx +0 -191
- package/example/lib/chat/langchain.tsx +0 -112
- package/example/lib/chat/late-update.tsx +0 -208
- package/example/lib/chat/manual.tsx +0 -605
- package/example/lib/chat/vercel-ai.tsx +0 -576
- package/example/lib/hooks/use-copy-to-clipboard.tsx +0 -33
- package/example/lib/hooks/use-enter-submit.tsx +0 -23
- package/example/lib/hooks/use-local-storage.ts +0 -24
- package/example/lib/hooks/use-scroll-anchor.tsx +0 -86
- package/example/lib/hooks/use-sidebar.tsx +0 -60
- package/example/lib/hooks/use-streamable-text.ts +0 -25
- package/example/lib/types.ts +0 -41
- package/example/lib/utils.ts +0 -89
- package/example/middleware.ts +0 -8
- package/example/next-env.d.ts +0 -5
- package/example/next.config.js +0 -16
- package/example/package-lock.json +0 -9990
- package/example/package.json +0 -84
- package/example/pnpm-lock.yaml +0 -5712
- package/example/postcss.config.js +0 -6
- package/example/prettier.config.cjs +0 -34
- package/example/public/apple-touch-icon.png +0 -0
- package/example/public/favicon-16x16.png +0 -0
- package/example/public/favicon.ico +0 -0
- package/example/public/next.svg +0 -1
- package/example/public/thirteen.svg +0 -1
- package/example/public/vercel.svg +0 -1
- package/example/tailwind.config.ts +0 -81
- package/example/tsconfig.json +0 -35
- package/src/LangWatchExporter.ts +0 -91
- package/src/evaluations.ts +0 -219
- package/src/index.test.ts +0 -402
- package/src/langchain.ts +0 -557
- package/src/typeUtils.ts +0 -89
- package/src/types.ts +0 -79
- package/src/utils.ts +0 -205
- /package/src/{server/types → internal/generated/openapi}/.gitkeep +0 -0
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
// --- Mock setup (must be at the top for Vitest hoisting) ---
|
|
2
|
+
const { mockStartActiveSpan } = vi.hoisted(() => ({
|
|
3
|
+
mockStartActiveSpan: vi.fn((name, fn) => fn({
|
|
4
|
+
setType: vi.fn(),
|
|
5
|
+
setInput: vi.fn(),
|
|
6
|
+
setMetrics: vi.fn(),
|
|
7
|
+
setStatus: vi.fn(),
|
|
8
|
+
setOutputEvaluation: vi.fn(),
|
|
9
|
+
recordException: vi.fn(),
|
|
10
|
+
end: vi.fn(),
|
|
11
|
+
spanContext: () => ({ traceId: 'trace', spanId: 'span' }),
|
|
12
|
+
})),
|
|
13
|
+
}));
|
|
14
|
+
|
|
15
|
+
vi.mock('../tracer', () => ({ tracer: { startActiveSpan: mockStartActiveSpan } }));
|
|
16
|
+
|
|
17
|
+
const mockFetch = vi.fn();
|
|
18
|
+
globalThis.fetch = mockFetch;
|
|
19
|
+
|
|
20
|
+
vi.mock('../../client', () => ({
|
|
21
|
+
canAutomaticallyCaptureInput: () => true,
|
|
22
|
+
getApiKey: () => 'test-key',
|
|
23
|
+
getEndpoint: () => 'https://api',
|
|
24
|
+
}));
|
|
25
|
+
|
|
26
|
+
// --- Imports (must be after mocks for Vitest hoisting) ---
|
|
27
|
+
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
|
28
|
+
import { runEvaluation } from '../run-evaluation';
|
|
29
|
+
import { LangWatchApiError } from '../../internal/api/errors';
|
|
30
|
+
|
|
31
|
+
const baseProcessed = {
|
|
32
|
+
status: 'processed',
|
|
33
|
+
passed: true,
|
|
34
|
+
score: 1,
|
|
35
|
+
details: 'ok',
|
|
36
|
+
label: 'label',
|
|
37
|
+
cost: { currency: 'USD', amount: 0.1 },
|
|
38
|
+
};
|
|
39
|
+
const baseSkipped = { status: 'skipped', details: 'skipped' };
|
|
40
|
+
const baseError = { status: 'error', details: 'fail', error_type: 'EvalError', traceback: ['trace'] };
|
|
41
|
+
|
|
42
|
+
const details = {
|
|
43
|
+
name: 'test',
|
|
44
|
+
data: { input: 'foo', output: 'bar' },
|
|
45
|
+
evaluator: 'test-eval',
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
describe('runEvaluation', () => {
|
|
49
|
+
beforeEach(() => {
|
|
50
|
+
vi.clearAllMocks();
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
it('returns processed result', async () => {
|
|
54
|
+
mockFetch.mockResolvedValueOnce({
|
|
55
|
+
ok: true,
|
|
56
|
+
json: async () => ({ ...baseProcessed }),
|
|
57
|
+
});
|
|
58
|
+
const result = await runEvaluation(details as any);
|
|
59
|
+
expect(result.status).toBe('processed');
|
|
60
|
+
if (result.status === 'processed') {
|
|
61
|
+
expect(result.passed).toBe(true);
|
|
62
|
+
expect(result.score).toBe(1);
|
|
63
|
+
expect(result.details).toBe('ok');
|
|
64
|
+
expect(result.label).toBe('label');
|
|
65
|
+
expect(result.cost).toEqual({ currency: 'USD', amount: 0.1 });
|
|
66
|
+
} else {
|
|
67
|
+
throw new Error('Expected processed result');
|
|
68
|
+
}
|
|
69
|
+
expect(mockFetch).toHaveBeenCalledWith(
|
|
70
|
+
expect.stringContaining('/api/evaluations/test-eval/evaluate'),
|
|
71
|
+
expect.objectContaining({ method: 'POST' })
|
|
72
|
+
);
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
it('returns skipped result', async () => {
|
|
76
|
+
mockFetch.mockResolvedValueOnce({
|
|
77
|
+
ok: true,
|
|
78
|
+
json: async () => ({ ...baseSkipped }),
|
|
79
|
+
});
|
|
80
|
+
const result = await runEvaluation(details as any);
|
|
81
|
+
expect(result.status).toBe('skipped');
|
|
82
|
+
expect(result.details).toBe('skipped');
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
it('returns error result', async () => {
|
|
86
|
+
mockFetch.mockResolvedValueOnce({
|
|
87
|
+
ok: true,
|
|
88
|
+
json: async () => ({ ...baseError }),
|
|
89
|
+
});
|
|
90
|
+
const result = await runEvaluation(details as any);
|
|
91
|
+
expect(result.status).toBe('error');
|
|
92
|
+
if (result.status === 'error') {
|
|
93
|
+
expect(result.details).toBe('fail');
|
|
94
|
+
expect(result.error_type).toBe('EvalError');
|
|
95
|
+
expect(result.traceback).toEqual(['trace']);
|
|
96
|
+
} else {
|
|
97
|
+
throw new Error('Expected error result');
|
|
98
|
+
}
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
it('returns unknown status as error', async () => {
|
|
102
|
+
mockFetch.mockResolvedValueOnce({
|
|
103
|
+
ok: true,
|
|
104
|
+
json: async () => ({ status: 'weird' }),
|
|
105
|
+
});
|
|
106
|
+
const result = await runEvaluation(details as any);
|
|
107
|
+
expect(result.status).toBe('error');
|
|
108
|
+
if (result.status === 'error') {
|
|
109
|
+
expect(result.error_type).toBe('UnknownStatus');
|
|
110
|
+
expect(result.details).toContain('Unknown evaluation status');
|
|
111
|
+
} else {
|
|
112
|
+
throw new Error('Expected error result');
|
|
113
|
+
}
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it('throws LangWatchApiError on non-ok response', async () => {
|
|
117
|
+
mockFetch.mockResolvedValueOnce({ ok: false, json: async () => ({}), status: 400, statusText: 'Bad', headers: { get: () => 'application/json' } });
|
|
118
|
+
await expect(runEvaluation(details as any)).rejects.toBeInstanceOf(LangWatchApiError);
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
it('propagates fetch errors', async () => {
|
|
122
|
+
mockFetch.mockRejectedValueOnce(new Error('network fail'));
|
|
123
|
+
await expect(runEvaluation(details as any)).rejects.toThrow('network fail');
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
it('calls setInput if canAutomaticallyCaptureInput is true', async () => {
|
|
127
|
+
vi.resetModules();
|
|
128
|
+
vi.doMock('../../client', () => ({
|
|
129
|
+
canAutomaticallyCaptureInput: () => true,
|
|
130
|
+
getApiKey: () => 'test-key',
|
|
131
|
+
getEndpoint: () => 'https://api',
|
|
132
|
+
}));
|
|
133
|
+
const span = {
|
|
134
|
+
setType: vi.fn(),
|
|
135
|
+
setInput: vi.fn(),
|
|
136
|
+
setMetrics: vi.fn(),
|
|
137
|
+
setOutputEvaluation: vi.fn(),
|
|
138
|
+
recordException: vi.fn(),
|
|
139
|
+
end: vi.fn(),
|
|
140
|
+
spanContext: () => ({ traceId: 'trace', spanId: 'span' }),
|
|
141
|
+
};
|
|
142
|
+
mockStartActiveSpan.mockImplementationOnce((name, fn) => fn(span));
|
|
143
|
+
mockFetch.mockResolvedValueOnce({ ok: true, json: async () => ({ ...baseProcessed }) });
|
|
144
|
+
const { runEvaluation: runEval } = await import('../run-evaluation.js');
|
|
145
|
+
await runEval(details as any);
|
|
146
|
+
expect(span.setInput).toHaveBeenCalledWith(expect.objectContaining({ trace_id: 'trace' }));
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
it('does not call setInput if canAutomaticallyCaptureInput is false', async () => {
|
|
150
|
+
vi.resetModules();
|
|
151
|
+
vi.doMock('../../client', () => ({
|
|
152
|
+
canAutomaticallyCaptureInput: () => false,
|
|
153
|
+
getApiKey: () => 'test-key',
|
|
154
|
+
getEndpoint: () => 'https://api',
|
|
155
|
+
}));
|
|
156
|
+
const span = {
|
|
157
|
+
setType: vi.fn(),
|
|
158
|
+
setInput: vi.fn(),
|
|
159
|
+
setMetrics: vi.fn(),
|
|
160
|
+
setOutputEvaluation: vi.fn(),
|
|
161
|
+
recordException: vi.fn(),
|
|
162
|
+
end: vi.fn(),
|
|
163
|
+
spanContext: () => ({ traceId: 'trace', spanId: 'span' }),
|
|
164
|
+
};
|
|
165
|
+
mockStartActiveSpan.mockImplementationOnce((name, fn) => fn(span));
|
|
166
|
+
mockFetch.mockResolvedValueOnce({ ok: true, json: async () => ({ ...baseProcessed }) });
|
|
167
|
+
const { runEvaluation: runEval } = await import('../run-evaluation.js');
|
|
168
|
+
await runEval(details as any);
|
|
169
|
+
expect(span.setInput).not.toHaveBeenCalled();
|
|
170
|
+
});
|
|
171
|
+
});
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import { EvaluationRESTResult } from "../internal/generated/types/evaluations";
|
|
2
|
+
import * as intSemconv from "../observability/semconv";
|
|
3
|
+
import { Attributes, SpanStatusCode } from "@opentelemetry/api";
|
|
4
|
+
import { generate } from "xksuid";
|
|
5
|
+
import { tracer } from "./tracer";
|
|
6
|
+
|
|
7
|
+
export interface RecordedEvaluationDetails {
|
|
8
|
+
evaluationId?: string;
|
|
9
|
+
name: string;
|
|
10
|
+
type?: string;
|
|
11
|
+
isGuardrail?: boolean;
|
|
12
|
+
status?: "processed" | "skipped" | "error";
|
|
13
|
+
passed?: boolean;
|
|
14
|
+
score?: number;
|
|
15
|
+
label?: string;
|
|
16
|
+
details?: string;
|
|
17
|
+
cost?: number | { currency: string; amount: number };
|
|
18
|
+
error?: Error;
|
|
19
|
+
timestamps?: {
|
|
20
|
+
startedAtUnixMs: number;
|
|
21
|
+
finishedAtUnixMs: number;
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export function recordEvaluation(
|
|
26
|
+
details: RecordedEvaluationDetails,
|
|
27
|
+
attributes?: Attributes,
|
|
28
|
+
) {
|
|
29
|
+
let result: EvaluationRESTResult;
|
|
30
|
+
const status = details.status || "processed";
|
|
31
|
+
|
|
32
|
+
if (status === "skipped") {
|
|
33
|
+
result = {
|
|
34
|
+
status: "skipped",
|
|
35
|
+
details: details.details,
|
|
36
|
+
};
|
|
37
|
+
} else if (status === "error") {
|
|
38
|
+
result = {
|
|
39
|
+
status: "error",
|
|
40
|
+
error_type: details.error?.name || "Unknown",
|
|
41
|
+
details: details.details || details.error?.message || "Unknown error",
|
|
42
|
+
};
|
|
43
|
+
} else {
|
|
44
|
+
result = {
|
|
45
|
+
status: "processed",
|
|
46
|
+
passed: details.passed,
|
|
47
|
+
score: details.score,
|
|
48
|
+
label: details.label,
|
|
49
|
+
details: details.details,
|
|
50
|
+
};
|
|
51
|
+
if (details.cost) {
|
|
52
|
+
(result as any).cost =
|
|
53
|
+
typeof details.cost === "number"
|
|
54
|
+
? { currency: "USD", amount: details.cost }
|
|
55
|
+
: details.cost;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
tracer.startActiveSpan("record evaluation", (span) => {
|
|
60
|
+
try {
|
|
61
|
+
span.setType(details.isGuardrail ? "guardrail" : "evaluation");
|
|
62
|
+
span.addEvent(intSemconv.ATTR_LANGWATCH_EVALUATION_CUSTOM, {
|
|
63
|
+
json_encoded_event: JSON.stringify({
|
|
64
|
+
evaluation_id: details.evaluationId ?? `eval_${generate()}`,
|
|
65
|
+
name: details.name,
|
|
66
|
+
type: details.type,
|
|
67
|
+
is_guardrail: details.isGuardrail,
|
|
68
|
+
status: result.status,
|
|
69
|
+
passed: details.passed,
|
|
70
|
+
score: details.score,
|
|
71
|
+
label: details.label,
|
|
72
|
+
details: details.details,
|
|
73
|
+
cost: details.cost,
|
|
74
|
+
error: details.error,
|
|
75
|
+
timestamps: details.timestamps,
|
|
76
|
+
}),
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
span.setOutput(result);
|
|
80
|
+
|
|
81
|
+
if (attributes) {
|
|
82
|
+
span.setAttributes(attributes);
|
|
83
|
+
}
|
|
84
|
+
if (details.cost) {
|
|
85
|
+
span.setMetrics({
|
|
86
|
+
cost:
|
|
87
|
+
typeof details.cost === "number"
|
|
88
|
+
? details.cost
|
|
89
|
+
: details.cost.amount,
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
} catch (error) {
|
|
93
|
+
span.recordException(error as Error);
|
|
94
|
+
span.setStatus({ code: SpanStatusCode.ERROR, message: (error as Error)?.message });
|
|
95
|
+
} finally {
|
|
96
|
+
span.end();
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
return;
|
|
100
|
+
});
|
|
101
|
+
}
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import { LangWatchApiError } from "../internal/api/errors";
|
|
2
|
+
import { canAutomaticallyCaptureInput, getApiKey, getEndpoint } from "../client";
|
|
3
|
+
import { Conversation } from "../internal/generated/types/evaluations";
|
|
4
|
+
import {
|
|
5
|
+
Evaluators,
|
|
6
|
+
EvaluatorTypes,
|
|
7
|
+
SingleEvaluationResult,
|
|
8
|
+
} from "../internal/generated/types/evaluators.generated";
|
|
9
|
+
import { RAGChunk } from "../internal/generated/types/tracer";
|
|
10
|
+
import { tracer } from "./tracer";
|
|
11
|
+
import { EvaluationResultModel } from "./types";
|
|
12
|
+
import { SpanStatusCode } from "@opentelemetry/api";
|
|
13
|
+
|
|
14
|
+
export interface BasicEvaluationData {
|
|
15
|
+
input?: string;
|
|
16
|
+
output?: string;
|
|
17
|
+
expected_output?: unknown;
|
|
18
|
+
contexts?: RAGChunk[] | string[];
|
|
19
|
+
expected_contexts?: RAGChunk[] | string[];
|
|
20
|
+
conversation?: Conversation;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface EvaluationDetailsBase {
|
|
24
|
+
name?: string;
|
|
25
|
+
data: BasicEvaluationData | Record<string, unknown>;
|
|
26
|
+
contexts?: RAGChunk[] | string[];
|
|
27
|
+
conversation?: Conversation;
|
|
28
|
+
asGuardrail?: boolean;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export interface SavedEvaluationDetails extends EvaluationDetailsBase {
|
|
32
|
+
slug: string;
|
|
33
|
+
settings?: Record<string, unknown>;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export interface LangEvalsEvaluationDetails<T extends EvaluatorTypes>
|
|
37
|
+
extends EvaluationDetailsBase {
|
|
38
|
+
evaluator: T;
|
|
39
|
+
settings?: Evaluators[T]["settings"];
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export type EvaluationDetails =
|
|
43
|
+
| SavedEvaluationDetails
|
|
44
|
+
| LangEvalsEvaluationDetails<EvaluatorTypes>;
|
|
45
|
+
|
|
46
|
+
export async function runEvaluation(
|
|
47
|
+
details: EvaluationDetails,
|
|
48
|
+
): Promise<SingleEvaluationResult> {
|
|
49
|
+
return await tracer.startActiveSpan("run evaluation", async (span) => {
|
|
50
|
+
span.setType(details.asGuardrail ? "guardrail" : "evaluation");
|
|
51
|
+
|
|
52
|
+
try {
|
|
53
|
+
const evaluatorId =
|
|
54
|
+
"slug" in details ? details.slug : details.evaluator;
|
|
55
|
+
const request = {
|
|
56
|
+
trace_id: span.spanContext().traceId,
|
|
57
|
+
span_id: span.spanContext().spanId,
|
|
58
|
+
data: details.data,
|
|
59
|
+
name: details.name,
|
|
60
|
+
settings: details.settings,
|
|
61
|
+
as_guardrail: details.asGuardrail,
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
if (canAutomaticallyCaptureInput()) {
|
|
65
|
+
span.setInput(request);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const url = new URL(
|
|
69
|
+
`/api/evaluations/${evaluatorId}/evaluate`,
|
|
70
|
+
getEndpoint(),
|
|
71
|
+
);
|
|
72
|
+
|
|
73
|
+
const response = await fetch(url.toString(), {
|
|
74
|
+
method: "POST",
|
|
75
|
+
headers: {
|
|
76
|
+
"X-Auth-Token": getApiKey(),
|
|
77
|
+
"Content-Type": "application/json",
|
|
78
|
+
},
|
|
79
|
+
body: JSON.stringify(request),
|
|
80
|
+
});
|
|
81
|
+
if (!response.ok) {
|
|
82
|
+
const err = new LangWatchApiError("Unable to run evaluation", response);
|
|
83
|
+
await err.safeParseBody(response);
|
|
84
|
+
|
|
85
|
+
throw err;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const result: EvaluationResultModel = await response.json();
|
|
89
|
+
|
|
90
|
+
span.setMetrics({
|
|
91
|
+
cost: result.cost?.amount,
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
span.setOutputEvaluation(details.asGuardrail ?? false, result);
|
|
95
|
+
|
|
96
|
+
if (result.status === "processed") {
|
|
97
|
+
return {
|
|
98
|
+
status: "processed",
|
|
99
|
+
passed: result.passed,
|
|
100
|
+
score: result.score,
|
|
101
|
+
details: result.details,
|
|
102
|
+
label: result.label,
|
|
103
|
+
cost: result.cost,
|
|
104
|
+
} as SingleEvaluationResult;
|
|
105
|
+
} else if (result.status === "skipped") {
|
|
106
|
+
return {
|
|
107
|
+
status: "skipped",
|
|
108
|
+
details: result.details,
|
|
109
|
+
} as SingleEvaluationResult;
|
|
110
|
+
} else if (result.status === "error") {
|
|
111
|
+
return {
|
|
112
|
+
status: "error",
|
|
113
|
+
error_type: (result as any).error_type || "Unknown",
|
|
114
|
+
details: result.details || "Unknown error",
|
|
115
|
+
traceback: (result as any).traceback || [],
|
|
116
|
+
} as SingleEvaluationResult;
|
|
117
|
+
} else {
|
|
118
|
+
return {
|
|
119
|
+
status: "error",
|
|
120
|
+
error_type: "UnknownStatus",
|
|
121
|
+
details: `Unknown evaluation status: ${result.status}`,
|
|
122
|
+
traceback: [],
|
|
123
|
+
} as SingleEvaluationResult;
|
|
124
|
+
}
|
|
125
|
+
} catch (error) {
|
|
126
|
+
span.recordException(error as Error);
|
|
127
|
+
span.setStatus({ code: SpanStatusCode.ERROR, message: (error as Error)?.message });
|
|
128
|
+
throw error;
|
|
129
|
+
} finally {
|
|
130
|
+
span.end();
|
|
131
|
+
}
|
|
132
|
+
});
|
|
133
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
export class EvaluationError extends Error {
|
|
2
|
+
readonly httpStatus: number;
|
|
3
|
+
readonly body: unknown;
|
|
4
|
+
|
|
5
|
+
constructor(message: string, httpStatus: number, body: unknown) {
|
|
6
|
+
super(message);
|
|
7
|
+
this.name = "EvaluationError";
|
|
8
|
+
this.httpStatus = httpStatus;
|
|
9
|
+
this.body = body;
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export interface EvaluationResultModel {
|
|
14
|
+
status: "processed" | "skipped" | "error";
|
|
15
|
+
passed?: boolean;
|
|
16
|
+
score?: number;
|
|
17
|
+
details?: string;
|
|
18
|
+
label?: string;
|
|
19
|
+
cost?: {
|
|
20
|
+
currency: string;
|
|
21
|
+
amount: number;
|
|
22
|
+
};
|
|
23
|
+
}
|