langwatch 0.1.7 → 0.3.0-prerelease.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. package/.editorconfig +16 -0
  2. package/LICENSE +7 -0
  3. package/README.md +268 -1
  4. package/copy-types.sh +19 -8
  5. package/examples/langchain/.env.example +2 -0
  6. package/examples/langchain/README.md +42 -0
  7. package/examples/langchain/package-lock.json +2930 -0
  8. package/examples/langchain/package.json +27 -0
  9. package/examples/langchain/src/cli-markdown.d.ts +137 -0
  10. package/examples/langchain/src/index.ts +109 -0
  11. package/examples/langchain/tsconfig.json +25 -0
  12. package/examples/langgraph/.env.example +2 -0
  13. package/examples/langgraph/README.md +42 -0
  14. package/examples/langgraph/package-lock.json +3031 -0
  15. package/examples/langgraph/package.json +28 -0
  16. package/examples/langgraph/src/cli-markdown.d.ts +137 -0
  17. package/examples/langgraph/src/index.ts +196 -0
  18. package/examples/langgraph/tsconfig.json +25 -0
  19. package/examples/mastra/.env.example +2 -0
  20. package/examples/mastra/README.md +57 -0
  21. package/examples/mastra/package-lock.json +5296 -0
  22. package/examples/mastra/package.json +32 -0
  23. package/examples/mastra/src/cli-markdown.d.ts +137 -0
  24. package/examples/mastra/src/index.ts +120 -0
  25. package/examples/mastra/src/mastra/agents/weather-agent.ts +30 -0
  26. package/examples/mastra/src/mastra/index.ts +21 -0
  27. package/examples/mastra/src/mastra/tools/weather-tool.ts +102 -0
  28. package/examples/mastra/tsconfig.json +25 -0
  29. package/examples/vercel-ai/.env.example +2 -0
  30. package/examples/vercel-ai/README.md +38 -0
  31. package/examples/vercel-ai/package-lock.json +2571 -0
  32. package/examples/vercel-ai/package.json +27 -0
  33. package/examples/vercel-ai/src/cli-markdown.d.ts +137 -0
  34. package/examples/vercel-ai/src/index.ts +110 -0
  35. package/examples/vercel-ai/src/instrumentation.ts +9 -0
  36. package/examples/vercel-ai/tsconfig.json +25 -0
  37. package/package.json +80 -33
  38. package/src/__tests__/client-browser.test.ts +92 -0
  39. package/src/__tests__/client-node.test.ts +76 -0
  40. package/src/__tests__/client.test.ts +71 -0
  41. package/src/__tests__/integration/client-browser.test.ts +46 -0
  42. package/src/__tests__/integration/client-node.test.ts +46 -0
  43. package/src/client-browser.ts +70 -0
  44. package/src/client-node.ts +82 -0
  45. package/src/client-shared.ts +72 -0
  46. package/src/client.ts +119 -0
  47. package/src/evaluation/__tests__/record-evaluation.test.ts +112 -0
  48. package/src/evaluation/__tests__/run-evaluation.test.ts +171 -0
  49. package/src/evaluation/index.ts +2 -0
  50. package/src/evaluation/record-evaluation.ts +101 -0
  51. package/src/evaluation/run-evaluation.ts +133 -0
  52. package/src/evaluation/tracer.ts +3 -0
  53. package/src/evaluation/types.ts +23 -0
  54. package/src/index.ts +10 -591
  55. package/src/internal/api/__tests__/errors.test.ts +98 -0
  56. package/src/internal/api/client.ts +30 -0
  57. package/src/internal/api/errors.ts +32 -0
  58. package/src/internal/generated/types/.gitkeep +0 -0
  59. package/src/observability/__tests__/integration/base.test.ts +74 -0
  60. package/src/observability/__tests__/integration/browser-setup-ordering.test.ts +60 -0
  61. package/src/observability/__tests__/integration/complex-nested-spans.test.ts +29 -0
  62. package/src/observability/__tests__/integration/error-handling.test.ts +24 -0
  63. package/src/observability/__tests__/integration/langwatch-disabled-otel.test.ts +24 -0
  64. package/src/observability/__tests__/integration/langwatch-first-then-vercel.test.ts +24 -0
  65. package/src/observability/__tests__/integration/multiple-setup-attempts.test.ts +27 -0
  66. package/src/observability/__tests__/integration/otel-ordering.test.ts +27 -0
  67. package/src/observability/__tests__/integration/vercel-configurations.test.ts +20 -0
  68. package/src/observability/__tests__/integration/vercel-first-then-langwatch.test.ts +27 -0
  69. package/src/observability/__tests__/span.test.ts +214 -0
  70. package/src/observability/__tests__/trace.test.ts +180 -0
  71. package/src/observability/exporters/index.ts +1 -0
  72. package/src/observability/exporters/langwatch-exporter.ts +53 -0
  73. package/src/observability/index.ts +4 -0
  74. package/src/observability/instrumentation/langchain/__tests__/integration/langchain-chatbot.test.ts +112 -0
  75. package/src/observability/instrumentation/langchain/__tests__/langchain.test.ts +284 -0
  76. package/src/observability/instrumentation/langchain/index.ts +624 -0
  77. package/src/observability/processors/__tests__/filterable-batch-span-exporter.test.ts +98 -0
  78. package/src/observability/processors/filterable-batch-span-processor.ts +99 -0
  79. package/src/observability/processors/index.ts +1 -0
  80. package/src/observability/semconv/attributes.ts +185 -0
  81. package/src/observability/semconv/events.ts +42 -0
  82. package/src/observability/semconv/index.ts +16 -0
  83. package/src/observability/semconv/values.ts +159 -0
  84. package/src/observability/span.ts +728 -0
  85. package/src/observability/trace.ts +301 -0
  86. package/src/prompt/__tests__/prompt.test.ts +139 -0
  87. package/src/prompt/get-prompt-version.ts +49 -0
  88. package/src/prompt/get-prompt.ts +44 -0
  89. package/src/prompt/index.ts +3 -0
  90. package/src/prompt/prompt.ts +133 -0
  91. package/src/prompt/service.ts +221 -0
  92. package/src/prompt/tracer.ts +3 -0
  93. package/src/prompt/types.ts +0 -0
  94. package/ts-to-zod.config.js +11 -0
  95. package/tsconfig.json +3 -9
  96. package/tsup.config.ts +11 -1
  97. package/vitest.config.ts +1 -0
  98. package/dist/chunk-FWBCQQYZ.mjs +0 -711
  99. package/dist/chunk-FWBCQQYZ.mjs.map +0 -1
  100. package/dist/index.d.mts +0 -1010
  101. package/dist/index.d.ts +0 -1010
  102. package/dist/index.js +0 -27294
  103. package/dist/index.js.map +0 -1
  104. package/dist/index.mjs +0 -959
  105. package/dist/index.mjs.map +0 -1
  106. package/dist/utils-B0pgWcps.d.mts +0 -303
  107. package/dist/utils-B0pgWcps.d.ts +0 -303
  108. package/dist/utils.d.mts +0 -2
  109. package/dist/utils.d.ts +0 -2
  110. package/dist/utils.js +0 -703
  111. package/dist/utils.js.map +0 -1
  112. package/dist/utils.mjs +0 -11
  113. package/dist/utils.mjs.map +0 -1
  114. package/example/.env.example +0 -12
  115. package/example/.eslintrc.json +0 -26
  116. package/example/LICENSE +0 -13
  117. package/example/README.md +0 -12
  118. package/example/app/(chat)/chat/[id]/page.tsx +0 -60
  119. package/example/app/(chat)/layout.tsx +0 -14
  120. package/example/app/(chat)/page.tsx +0 -27
  121. package/example/app/actions.ts +0 -156
  122. package/example/app/globals.css +0 -76
  123. package/example/app/guardrails/page.tsx +0 -26
  124. package/example/app/langchain/page.tsx +0 -27
  125. package/example/app/langchain-rag/page.tsx +0 -28
  126. package/example/app/late-update/page.tsx +0 -27
  127. package/example/app/layout.tsx +0 -64
  128. package/example/app/login/actions.ts +0 -71
  129. package/example/app/login/page.tsx +0 -18
  130. package/example/app/manual/page.tsx +0 -27
  131. package/example/app/new/page.tsx +0 -5
  132. package/example/app/opengraph-image.png +0 -0
  133. package/example/app/share/[id]/page.tsx +0 -58
  134. package/example/app/signup/actions.ts +0 -111
  135. package/example/app/signup/page.tsx +0 -18
  136. package/example/app/twitter-image.png +0 -0
  137. package/example/auth.config.ts +0 -42
  138. package/example/auth.ts +0 -45
  139. package/example/components/button-scroll-to-bottom.tsx +0 -36
  140. package/example/components/chat-history.tsx +0 -49
  141. package/example/components/chat-list.tsx +0 -52
  142. package/example/components/chat-message-actions.tsx +0 -40
  143. package/example/components/chat-message.tsx +0 -80
  144. package/example/components/chat-panel.tsx +0 -139
  145. package/example/components/chat-share-dialog.tsx +0 -95
  146. package/example/components/chat.tsx +0 -84
  147. package/example/components/clear-history.tsx +0 -75
  148. package/example/components/empty-screen.tsx +0 -38
  149. package/example/components/external-link.tsx +0 -29
  150. package/example/components/footer.tsx +0 -19
  151. package/example/components/header.tsx +0 -114
  152. package/example/components/login-button.tsx +0 -42
  153. package/example/components/login-form.tsx +0 -97
  154. package/example/components/markdown.tsx +0 -9
  155. package/example/components/prompt-form.tsx +0 -115
  156. package/example/components/providers.tsx +0 -17
  157. package/example/components/sidebar-actions.tsx +0 -125
  158. package/example/components/sidebar-desktop.tsx +0 -19
  159. package/example/components/sidebar-footer.tsx +0 -16
  160. package/example/components/sidebar-item.tsx +0 -124
  161. package/example/components/sidebar-items.tsx +0 -42
  162. package/example/components/sidebar-list.tsx +0 -38
  163. package/example/components/sidebar-mobile.tsx +0 -31
  164. package/example/components/sidebar-toggle.tsx +0 -24
  165. package/example/components/sidebar.tsx +0 -21
  166. package/example/components/signup-form.tsx +0 -95
  167. package/example/components/stocks/events-skeleton.tsx +0 -31
  168. package/example/components/stocks/events.tsx +0 -30
  169. package/example/components/stocks/index.tsx +0 -36
  170. package/example/components/stocks/message.tsx +0 -134
  171. package/example/components/stocks/spinner.tsx +0 -16
  172. package/example/components/stocks/stock-purchase.tsx +0 -146
  173. package/example/components/stocks/stock-skeleton.tsx +0 -22
  174. package/example/components/stocks/stock.tsx +0 -210
  175. package/example/components/stocks/stocks-skeleton.tsx +0 -9
  176. package/example/components/stocks/stocks.tsx +0 -67
  177. package/example/components/tailwind-indicator.tsx +0 -14
  178. package/example/components/theme-toggle.tsx +0 -31
  179. package/example/components/ui/alert-dialog.tsx +0 -141
  180. package/example/components/ui/badge.tsx +0 -36
  181. package/example/components/ui/button.tsx +0 -57
  182. package/example/components/ui/codeblock.tsx +0 -148
  183. package/example/components/ui/dialog.tsx +0 -122
  184. package/example/components/ui/dropdown-menu.tsx +0 -205
  185. package/example/components/ui/icons.tsx +0 -507
  186. package/example/components/ui/input.tsx +0 -25
  187. package/example/components/ui/label.tsx +0 -26
  188. package/example/components/ui/select.tsx +0 -164
  189. package/example/components/ui/separator.tsx +0 -31
  190. package/example/components/ui/sheet.tsx +0 -140
  191. package/example/components/ui/sonner.tsx +0 -31
  192. package/example/components/ui/switch.tsx +0 -29
  193. package/example/components/ui/textarea.tsx +0 -24
  194. package/example/components/ui/tooltip.tsx +0 -30
  195. package/example/components/user-menu.tsx +0 -53
  196. package/example/components.json +0 -17
  197. package/example/instrumentation.ts +0 -11
  198. package/example/lib/chat/guardrails.tsx +0 -181
  199. package/example/lib/chat/langchain-rag.tsx +0 -191
  200. package/example/lib/chat/langchain.tsx +0 -112
  201. package/example/lib/chat/late-update.tsx +0 -208
  202. package/example/lib/chat/manual.tsx +0 -605
  203. package/example/lib/chat/vercel-ai.tsx +0 -576
  204. package/example/lib/hooks/use-copy-to-clipboard.tsx +0 -33
  205. package/example/lib/hooks/use-enter-submit.tsx +0 -23
  206. package/example/lib/hooks/use-local-storage.ts +0 -24
  207. package/example/lib/hooks/use-scroll-anchor.tsx +0 -86
  208. package/example/lib/hooks/use-sidebar.tsx +0 -60
  209. package/example/lib/hooks/use-streamable-text.ts +0 -25
  210. package/example/lib/types.ts +0 -41
  211. package/example/lib/utils.ts +0 -89
  212. package/example/middleware.ts +0 -8
  213. package/example/next-env.d.ts +0 -5
  214. package/example/next.config.js +0 -16
  215. package/example/package-lock.json +0 -9990
  216. package/example/package.json +0 -84
  217. package/example/pnpm-lock.yaml +0 -5712
  218. package/example/postcss.config.js +0 -6
  219. package/example/prettier.config.cjs +0 -34
  220. package/example/public/apple-touch-icon.png +0 -0
  221. package/example/public/favicon-16x16.png +0 -0
  222. package/example/public/favicon.ico +0 -0
  223. package/example/public/next.svg +0 -1
  224. package/example/public/thirteen.svg +0 -1
  225. package/example/public/vercel.svg +0 -1
  226. package/example/tailwind.config.ts +0 -81
  227. package/example/tsconfig.json +0 -35
  228. package/src/LangWatchExporter.ts +0 -91
  229. package/src/evaluations.ts +0 -219
  230. package/src/index.test.ts +0 -402
  231. package/src/langchain.ts +0 -557
  232. package/src/typeUtils.ts +0 -89
  233. package/src/types.ts +0 -79
  234. package/src/utils.ts +0 -205
  235. /package/src/{server/types → internal/generated/openapi}/.gitkeep +0 -0
@@ -0,0 +1,171 @@
1
+ // --- Mock setup (must be at the top for Vitest hoisting) ---
2
+ const { mockStartActiveSpan } = vi.hoisted(() => ({
3
+ mockStartActiveSpan: vi.fn((name, fn) => fn({
4
+ setType: vi.fn(),
5
+ setInput: vi.fn(),
6
+ setMetrics: vi.fn(),
7
+ setStatus: vi.fn(),
8
+ setOutputEvaluation: vi.fn(),
9
+ recordException: vi.fn(),
10
+ end: vi.fn(),
11
+ spanContext: () => ({ traceId: 'trace', spanId: 'span' }),
12
+ })),
13
+ }));
14
+
15
+ vi.mock('../tracer', () => ({ tracer: { startActiveSpan: mockStartActiveSpan } }));
16
+
17
+ const mockFetch = vi.fn();
18
+ globalThis.fetch = mockFetch;
19
+
20
+ vi.mock('../../client', () => ({
21
+ canAutomaticallyCaptureInput: () => true,
22
+ getApiKey: () => 'test-key',
23
+ getEndpoint: () => 'https://api',
24
+ }));
25
+
26
+ // --- Imports (must be after mocks for Vitest hoisting) ---
27
+ import { describe, it, expect, vi, beforeEach } from 'vitest';
28
+ import { runEvaluation } from '../run-evaluation';
29
+ import { LangWatchApiError } from '../../internal/api/errors';
30
+
31
+ const baseProcessed = {
32
+ status: 'processed',
33
+ passed: true,
34
+ score: 1,
35
+ details: 'ok',
36
+ label: 'label',
37
+ cost: { currency: 'USD', amount: 0.1 },
38
+ };
39
+ const baseSkipped = { status: 'skipped', details: 'skipped' };
40
+ const baseError = { status: 'error', details: 'fail', error_type: 'EvalError', traceback: ['trace'] };
41
+
42
+ const details = {
43
+ name: 'test',
44
+ data: { input: 'foo', output: 'bar' },
45
+ evaluator: 'test-eval',
46
+ };
47
+
48
+ describe('runEvaluation', () => {
49
+ beforeEach(() => {
50
+ vi.clearAllMocks();
51
+ });
52
+
53
+ it('returns processed result', async () => {
54
+ mockFetch.mockResolvedValueOnce({
55
+ ok: true,
56
+ json: async () => ({ ...baseProcessed }),
57
+ });
58
+ const result = await runEvaluation(details as any);
59
+ expect(result.status).toBe('processed');
60
+ if (result.status === 'processed') {
61
+ expect(result.passed).toBe(true);
62
+ expect(result.score).toBe(1);
63
+ expect(result.details).toBe('ok');
64
+ expect(result.label).toBe('label');
65
+ expect(result.cost).toEqual({ currency: 'USD', amount: 0.1 });
66
+ } else {
67
+ throw new Error('Expected processed result');
68
+ }
69
+ expect(mockFetch).toHaveBeenCalledWith(
70
+ expect.stringContaining('/api/evaluations/test-eval/evaluate'),
71
+ expect.objectContaining({ method: 'POST' })
72
+ );
73
+ });
74
+
75
+ it('returns skipped result', async () => {
76
+ mockFetch.mockResolvedValueOnce({
77
+ ok: true,
78
+ json: async () => ({ ...baseSkipped }),
79
+ });
80
+ const result = await runEvaluation(details as any);
81
+ expect(result.status).toBe('skipped');
82
+ expect(result.details).toBe('skipped');
83
+ });
84
+
85
+ it('returns error result', async () => {
86
+ mockFetch.mockResolvedValueOnce({
87
+ ok: true,
88
+ json: async () => ({ ...baseError }),
89
+ });
90
+ const result = await runEvaluation(details as any);
91
+ expect(result.status).toBe('error');
92
+ if (result.status === 'error') {
93
+ expect(result.details).toBe('fail');
94
+ expect(result.error_type).toBe('EvalError');
95
+ expect(result.traceback).toEqual(['trace']);
96
+ } else {
97
+ throw new Error('Expected error result');
98
+ }
99
+ });
100
+
101
+ it('returns unknown status as error', async () => {
102
+ mockFetch.mockResolvedValueOnce({
103
+ ok: true,
104
+ json: async () => ({ status: 'weird' }),
105
+ });
106
+ const result = await runEvaluation(details as any);
107
+ expect(result.status).toBe('error');
108
+ if (result.status === 'error') {
109
+ expect(result.error_type).toBe('UnknownStatus');
110
+ expect(result.details).toContain('Unknown evaluation status');
111
+ } else {
112
+ throw new Error('Expected error result');
113
+ }
114
+ });
115
+
116
+ it('throws LangWatchApiError on non-ok response', async () => {
117
+ mockFetch.mockResolvedValueOnce({ ok: false, json: async () => ({}), status: 400, statusText: 'Bad', headers: { get: () => 'application/json' } });
118
+ await expect(runEvaluation(details as any)).rejects.toBeInstanceOf(LangWatchApiError);
119
+ });
120
+
121
+ it('propagates fetch errors', async () => {
122
+ mockFetch.mockRejectedValueOnce(new Error('network fail'));
123
+ await expect(runEvaluation(details as any)).rejects.toThrow('network fail');
124
+ });
125
+
126
+ it('calls setInput if canAutomaticallyCaptureInput is true', async () => {
127
+ vi.resetModules();
128
+ vi.doMock('../../client', () => ({
129
+ canAutomaticallyCaptureInput: () => true,
130
+ getApiKey: () => 'test-key',
131
+ getEndpoint: () => 'https://api',
132
+ }));
133
+ const span = {
134
+ setType: vi.fn(),
135
+ setInput: vi.fn(),
136
+ setMetrics: vi.fn(),
137
+ setOutputEvaluation: vi.fn(),
138
+ recordException: vi.fn(),
139
+ end: vi.fn(),
140
+ spanContext: () => ({ traceId: 'trace', spanId: 'span' }),
141
+ };
142
+ mockStartActiveSpan.mockImplementationOnce((name, fn) => fn(span));
143
+ mockFetch.mockResolvedValueOnce({ ok: true, json: async () => ({ ...baseProcessed }) });
144
+ const { runEvaluation: runEval } = await import('../run-evaluation.js');
145
+ await runEval(details as any);
146
+ expect(span.setInput).toHaveBeenCalledWith(expect.objectContaining({ trace_id: 'trace' }));
147
+ });
148
+
149
+ it('does not call setInput if canAutomaticallyCaptureInput is false', async () => {
150
+ vi.resetModules();
151
+ vi.doMock('../../client', () => ({
152
+ canAutomaticallyCaptureInput: () => false,
153
+ getApiKey: () => 'test-key',
154
+ getEndpoint: () => 'https://api',
155
+ }));
156
+ const span = {
157
+ setType: vi.fn(),
158
+ setInput: vi.fn(),
159
+ setMetrics: vi.fn(),
160
+ setOutputEvaluation: vi.fn(),
161
+ recordException: vi.fn(),
162
+ end: vi.fn(),
163
+ spanContext: () => ({ traceId: 'trace', spanId: 'span' }),
164
+ };
165
+ mockStartActiveSpan.mockImplementationOnce((name, fn) => fn(span));
166
+ mockFetch.mockResolvedValueOnce({ ok: true, json: async () => ({ ...baseProcessed }) });
167
+ const { runEvaluation: runEval } = await import('../run-evaluation.js');
168
+ await runEval(details as any);
169
+ expect(span.setInput).not.toHaveBeenCalled();
170
+ });
171
+ });
@@ -0,0 +1,2 @@
1
+ export * from "./run-evaluation";
2
+ export * from "./record-evaluation";
@@ -0,0 +1,101 @@
1
+ import { EvaluationRESTResult } from "../internal/generated/types/evaluations";
2
+ import * as intSemconv from "../observability/semconv";
3
+ import { Attributes, SpanStatusCode } from "@opentelemetry/api";
4
+ import { generate } from "xksuid";
5
+ import { tracer } from "./tracer";
6
+
7
+ export interface RecordedEvaluationDetails {
8
+ evaluationId?: string;
9
+ name: string;
10
+ type?: string;
11
+ isGuardrail?: boolean;
12
+ status?: "processed" | "skipped" | "error";
13
+ passed?: boolean;
14
+ score?: number;
15
+ label?: string;
16
+ details?: string;
17
+ cost?: number | { currency: string; amount: number };
18
+ error?: Error;
19
+ timestamps?: {
20
+ startedAtUnixMs: number;
21
+ finishedAtUnixMs: number;
22
+ };
23
+ }
24
+
25
+ export function recordEvaluation(
26
+ details: RecordedEvaluationDetails,
27
+ attributes?: Attributes,
28
+ ) {
29
+ let result: EvaluationRESTResult;
30
+ const status = details.status || "processed";
31
+
32
+ if (status === "skipped") {
33
+ result = {
34
+ status: "skipped",
35
+ details: details.details,
36
+ };
37
+ } else if (status === "error") {
38
+ result = {
39
+ status: "error",
40
+ error_type: details.error?.name || "Unknown",
41
+ details: details.details || details.error?.message || "Unknown error",
42
+ };
43
+ } else {
44
+ result = {
45
+ status: "processed",
46
+ passed: details.passed,
47
+ score: details.score,
48
+ label: details.label,
49
+ details: details.details,
50
+ };
51
+ if (details.cost) {
52
+ (result as any).cost =
53
+ typeof details.cost === "number"
54
+ ? { currency: "USD", amount: details.cost }
55
+ : details.cost;
56
+ }
57
+ }
58
+
59
+ tracer.startActiveSpan("record evaluation", (span) => {
60
+ try {
61
+ span.setType(details.isGuardrail ? "guardrail" : "evaluation");
62
+ span.addEvent(intSemconv.ATTR_LANGWATCH_EVALUATION_CUSTOM, {
63
+ json_encoded_event: JSON.stringify({
64
+ evaluation_id: details.evaluationId ?? `eval_${generate()}`,
65
+ name: details.name,
66
+ type: details.type,
67
+ is_guardrail: details.isGuardrail,
68
+ status: result.status,
69
+ passed: details.passed,
70
+ score: details.score,
71
+ label: details.label,
72
+ details: details.details,
73
+ cost: details.cost,
74
+ error: details.error,
75
+ timestamps: details.timestamps,
76
+ }),
77
+ });
78
+
79
+ span.setOutput(result);
80
+
81
+ if (attributes) {
82
+ span.setAttributes(attributes);
83
+ }
84
+ if (details.cost) {
85
+ span.setMetrics({
86
+ cost:
87
+ typeof details.cost === "number"
88
+ ? details.cost
89
+ : details.cost.amount,
90
+ });
91
+ }
92
+ } catch (error) {
93
+ span.recordException(error as Error);
94
+ span.setStatus({ code: SpanStatusCode.ERROR, message: (error as Error)?.message });
95
+ } finally {
96
+ span.end();
97
+ }
98
+
99
+ return;
100
+ });
101
+ }
@@ -0,0 +1,133 @@
1
+ import { LangWatchApiError } from "../internal/api/errors";
2
+ import { canAutomaticallyCaptureInput, getApiKey, getEndpoint } from "../client";
3
+ import { Conversation } from "../internal/generated/types/evaluations";
4
+ import {
5
+ Evaluators,
6
+ EvaluatorTypes,
7
+ SingleEvaluationResult,
8
+ } from "../internal/generated/types/evaluators.generated";
9
+ import { RAGChunk } from "../internal/generated/types/tracer";
10
+ import { tracer } from "./tracer";
11
+ import { EvaluationResultModel } from "./types";
12
+ import { SpanStatusCode } from "@opentelemetry/api";
13
+
14
+ export interface BasicEvaluationData {
15
+ input?: string;
16
+ output?: string;
17
+ expected_output?: unknown;
18
+ contexts?: RAGChunk[] | string[];
19
+ expected_contexts?: RAGChunk[] | string[];
20
+ conversation?: Conversation;
21
+ }
22
+
23
+ export interface EvaluationDetailsBase {
24
+ name?: string;
25
+ data: BasicEvaluationData | Record<string, unknown>;
26
+ contexts?: RAGChunk[] | string[];
27
+ conversation?: Conversation;
28
+ asGuardrail?: boolean;
29
+ }
30
+
31
+ export interface SavedEvaluationDetails extends EvaluationDetailsBase {
32
+ slug: string;
33
+ settings?: Record<string, unknown>;
34
+ }
35
+
36
+ export interface LangEvalsEvaluationDetails<T extends EvaluatorTypes>
37
+ extends EvaluationDetailsBase {
38
+ evaluator: T;
39
+ settings?: Evaluators[T]["settings"];
40
+ }
41
+
42
+ export type EvaluationDetails =
43
+ | SavedEvaluationDetails
44
+ | LangEvalsEvaluationDetails<EvaluatorTypes>;
45
+
46
+ export async function runEvaluation(
47
+ details: EvaluationDetails,
48
+ ): Promise<SingleEvaluationResult> {
49
+ return await tracer.startActiveSpan("run evaluation", async (span) => {
50
+ span.setType(details.asGuardrail ? "guardrail" : "evaluation");
51
+
52
+ try {
53
+ const evaluatorId =
54
+ "slug" in details ? details.slug : details.evaluator;
55
+ const request = {
56
+ trace_id: span.spanContext().traceId,
57
+ span_id: span.spanContext().spanId,
58
+ data: details.data,
59
+ name: details.name,
60
+ settings: details.settings,
61
+ as_guardrail: details.asGuardrail,
62
+ };
63
+
64
+ if (canAutomaticallyCaptureInput()) {
65
+ span.setInput(request);
66
+ }
67
+
68
+ const url = new URL(
69
+ `/api/evaluations/${evaluatorId}/evaluate`,
70
+ getEndpoint(),
71
+ );
72
+
73
+ const response = await fetch(url.toString(), {
74
+ method: "POST",
75
+ headers: {
76
+ "X-Auth-Token": getApiKey(),
77
+ "Content-Type": "application/json",
78
+ },
79
+ body: JSON.stringify(request),
80
+ });
81
+ if (!response.ok) {
82
+ const err = new LangWatchApiError("Unable to run evaluation", response);
83
+ await err.safeParseBody(response);
84
+
85
+ throw err;
86
+ }
87
+
88
+ const result: EvaluationResultModel = await response.json();
89
+
90
+ span.setMetrics({
91
+ cost: result.cost?.amount,
92
+ });
93
+
94
+ span.setOutputEvaluation(details.asGuardrail ?? false, result);
95
+
96
+ if (result.status === "processed") {
97
+ return {
98
+ status: "processed",
99
+ passed: result.passed,
100
+ score: result.score,
101
+ details: result.details,
102
+ label: result.label,
103
+ cost: result.cost,
104
+ } as SingleEvaluationResult;
105
+ } else if (result.status === "skipped") {
106
+ return {
107
+ status: "skipped",
108
+ details: result.details,
109
+ } as SingleEvaluationResult;
110
+ } else if (result.status === "error") {
111
+ return {
112
+ status: "error",
113
+ error_type: (result as any).error_type || "Unknown",
114
+ details: result.details || "Unknown error",
115
+ traceback: (result as any).traceback || [],
116
+ } as SingleEvaluationResult;
117
+ } else {
118
+ return {
119
+ status: "error",
120
+ error_type: "UnknownStatus",
121
+ details: `Unknown evaluation status: ${result.status}`,
122
+ traceback: [],
123
+ } as SingleEvaluationResult;
124
+ }
125
+ } catch (error) {
126
+ span.recordException(error as Error);
127
+ span.setStatus({ code: SpanStatusCode.ERROR, message: (error as Error)?.message });
128
+ throw error;
129
+ } finally {
130
+ span.end();
131
+ }
132
+ });
133
+ }
@@ -0,0 +1,3 @@
1
+ import { getLangWatchTracer } from "../observability/trace";
2
+
3
+ export const tracer = getLangWatchTracer("langwatch.evaluation");
@@ -0,0 +1,23 @@
1
+ export class EvaluationError extends Error {
2
+ readonly httpStatus: number;
3
+ readonly body: unknown;
4
+
5
+ constructor(message: string, httpStatus: number, body: unknown) {
6
+ super(message);
7
+ this.name = "EvaluationError";
8
+ this.httpStatus = httpStatus;
9
+ this.body = body;
10
+ }
11
+ }
12
+
13
+ export interface EvaluationResultModel {
14
+ status: "processed" | "skipped" | "error";
15
+ passed?: boolean;
16
+ score?: number;
17
+ details?: string;
18
+ label?: string;
19
+ cost?: {
20
+ currency: string;
21
+ amount: number;
22
+ };
23
+ }