npm - langwatch - Versions diffs - 0.1.1 → 0.1.3 - Mend

langwatch 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/copy-types.sh +17 -0
package/dist/{chunk-OVS4NSDE.mjs → chunk-2I4YLOQY.mjs} +184 -115
package/dist/chunk-2I4YLOQY.mjs.map +1 -0
package/dist/index.d.mts +310 -4
package/dist/index.d.ts +310 -4
package/dist/index.js +1429 -1121
package/dist/index.js.map +1 -1
package/dist/index.mjs +264 -26
package/dist/index.mjs.map +1 -1
package/dist/{utils-K-jSEpnZ.d.mts → utils-CFtM8VVg.d.mts} +107 -31
package/dist/{utils-K-jSEpnZ.d.ts → utils-CFtM8VVg.d.ts} +107 -31
package/dist/utils.d.mts +1 -2
package/dist/utils.d.ts +1 -2
package/dist/utils.js +181 -114
package/dist/utils.js.map +1 -1
package/dist/utils.mjs +1 -1
package/example/app/guardrails/page.tsx +26 -0
package/example/components/header.tsx +4 -0
package/example/lib/chat/guardrails.tsx +181 -0
package/example/lib/chat/langchain-rag.tsx +1 -1
package/example/lib/chat/langchain.tsx +1 -1
package/example/lib/chat/vercel-ai.tsx +1 -1
package/example/package-lock.json +4 -3
package/package.json +3 -4
package/src/evaluations.ts +219 -0
package/src/index.test.ts +5 -0
package/src/index.ts +182 -8
package/src/typeUtils.ts +20 -2
package/src/types.ts +6 -2
package/src/utils.ts +4 -8
package/ts-to-zod.config.js +2 -0
package/dist/chunk-OVS4NSDE.mjs.map +0 -1

package/src/evaluations.ts ADDED Viewed

@@ -0,0 +1,219 @@
+import { type LangWatchSpan, type LangWatchTrace } from "./index";
+import { type Conversation } from "./server/types/evaluations";
+import {
+  type Evaluators,
+  type EvaluatorTypes,
+} from "./server/types/evaluators.generated";
+import {
+  type RAGChunk,
+  type SpanTypes,
+  type TypedValueEvaluationResult,
+  type TypedValueGuardrailResult,
+  type TypedValueJson,
+} from "./server/types/tracer";
+type Money = {
+  currency: string;
+  amount: number;
+};
+export type EvaluationResultModel = {
+  status: "processed" | "skipped" | "error";
+  passed?: boolean;
+  score?: number;
+  details?: string;
+  label?: string;
+  cost?: Money;
+};
+export type CommonEvaluationParams = {
+  name?: string;
+  input?: string;
+  output?: string;
+  expectedOutput?: string;
+  contexts?: RAGChunk[] | string[];
+  conversation?: Conversation;
+  asGuardrail?: boolean;
+  trace?: LangWatchTrace;
+  span?: LangWatchSpan;
+};
+export type SavedEvaluationParams = {
+  slug: string;
+  settings?: Record<string, unknown>;
+} & CommonEvaluationParams;
+export type LangEvalsEvaluationParams<T extends EvaluatorTypes> = {
+  evaluator: T;
+  settings?: Evaluators[T]["settings"];
+} & CommonEvaluationParams;
+export type EvaluationParams =
+  | SavedEvaluationParams
+  | LangEvalsEvaluationParams<EvaluatorTypes>;
+export const evaluate = async (
+  params: EvaluationParams
+): Promise<EvaluationResultModel> => {
+  const slug = "slug" in params ? params.slug : params.evaluator;
+  const span = optionalCreateSpan({
+    trace: params.trace,
+    span: params.span,
+    name: params.name ? params.name : slug,
+    type: params.asGuardrail ? "guardrail" : "evaluation",
+  });
+  try {
+    const requestParams = prepareData({
+      ...params,
+      slug,
+      traceId: span?.trace.traceId,
+      spanId: span?.spanId,
+      span,
+    });
+    const response = await fetch(requestParams.url, {
+      method: "POST",
+      headers: requestParams.headers,
+      body: JSON.stringify(requestParams.json),
+    });
+    if (!response.ok) {
+      throw new Error(`HTTP error! status: ${response.status}`);
+    }
+    const result = await response.json();
+    return handleResponse(result, span, params.asGuardrail);
+  } catch (e) {
+    return handleException(e as Error, span, params.asGuardrail);
+  }
+};
+const optionalCreateSpan = ({
+  trace,
+  span,
+  name,
+  type,
+}: {
+  trace?: LangWatchTrace;
+  span?: LangWatchSpan;
+  name: string;
+  type: SpanTypes;
+}): LangWatchSpan | undefined => {
+  if (span) {
+    return span.startSpan({ name, type });
+  } else if (trace) {
+    return trace.startSpan({ name, type });
+  }
+  return undefined;
+};
+const prepareData = (params: {
+  slug: string;
+  name?: string;
+  input?: string;
+  output?: string;
+  expectedOutput?: string;
+  contexts?: RAGChunk[] | string[];
+  conversation?: Conversation;
+  settings?: Record<string, unknown>;
+  traceId?: string;
+  spanId?: string;
+  span?: LangWatchSpan;
+  asGuardrail?: boolean;
+}) => {
+  const data: Record<string, unknown> = {};
+  if (params.input) data.input = params.input;
+  if (params.output) data.output = params.output;
+  if (params.expectedOutput) data.expected_output = params.expectedOutput;
+  if (params.contexts && params.contexts.length > 0)
+    data.contexts = params.contexts;
+  if (params.conversation && params.conversation.length > 0)
+    data.conversation = params.conversation;
+  if (params.span) {
+    params.span.update({
+      input: { type: "json", value: data } as TypedValueJson,
+      params: params.settings,
+    });
+  }
+  return {
+    url: `${process.env.LANGWATCH_ENDPOINT}/api/evaluations/${params.slug}/evaluate`,
+    json: {
+      trace_id: params.traceId,
+      span_id: params.spanId,
+      name: params.name,
+      data,
+      settings: params.settings,
+      as_guardrail: params.asGuardrail,
+    },
+    headers: {
+      "X-Auth-Token": process.env.LANGWATCH_API_KEY ?? "",
+      "Content-Type": "application/json",
+    },
+  };
+};
+const handleResponse = (
+  response: EvaluationResultModel,
+  span?: LangWatchSpan,
+  asGuardrail = false
+): EvaluationResultModel => {
+  if (response.status === "error") {
+    response.details = response.details ?? "";
+  }
+  for (const key of Object.keys(response)) {
+    if (
+      response[key as keyof EvaluationResultModel] === null ||
+      response[key as keyof EvaluationResultModel] === undefined
+    ) {
+      delete response[key as keyof EvaluationResultModel];
+    }
+  }
+  if (span) {
+    const output: TypedValueGuardrailResult | TypedValueEvaluationResult =
+      asGuardrail
+        ? {
+            type: "guardrail_result",
+            value: response,
+          }
+        : {
+            type: "evaluation_result",
+            value: response,
+          };
+    span.update({ output });
+    if (response.cost) {
+      span.update({
+        metrics: {
+          cost: response.cost.amount,
+        },
+      });
+    }
+    span.end();
+  }
+  return response;
+};
+const handleException = (
+  e: Error,
+  span?: LangWatchSpan,
+  asGuardrail = false
+): EvaluationResultModel => {
+  const response: EvaluationResultModel = {
+    status: "error",
+    details: e.toString(),
+  };
+  if (asGuardrail) {
+    response.passed = true;
+  }
+  return handleResponse(response, span, asGuardrail);
+};

package/src/index.test.ts CHANGED Viewed

@@ -11,6 +11,7 @@ import {
 import { openai } from "@ai-sdk/openai";
 import { generateText, type CoreMessage } from "ai";
 import "dotenv/config";
+import { version } from "../package.json";
 describe("LangWatch tracer", () => {
   let mockFetch: SpyInstanceFn;
@@ -65,6 +66,8 @@ describe("LangWatch tracer", () => {
       threadId: "123",
       userId: "456",
       labels: ["foo", "bar"],
+      sdkLanguage: "typescript",
+      sdkVersion: version,
     });
     expect(span.timestamps.startedAt).toBeDefined();
     expect(span.timestamps.finishedAt).toBeDefined();
@@ -133,6 +136,8 @@ describe("LangWatch tracer", () => {
       thread_id: "123",
       user_id: "456",
       labels: ["foo", "bar"],
+      sdk_language: "typescript",
+      sdk_version: version,
     });
     expect(requestBody.spans.length).toBe(3);
   });

package/src/index.ts CHANGED Viewed

@@ -2,11 +2,19 @@ import EventEmitter from "events";
 import { nanoid } from "nanoid";
 import { ZodError } from "zod";
 import { fromZodError } from "zod-validation-error";
-import { camelToSnakeCaseNested, type Strict } from "./typeUtils";
+import { version } from "../package.json";
+import {
+  evaluate,
+  type EvaluationParams,
+  type EvaluationResultModel,
+} from "./evaluations";
+import { LangWatchCallbackHandler } from "./langchain";
 import {
   type CollectorRESTParams,
+  type EvaluationResult,
   type Span as ServerSpan,
   type SpanTypes,
+  type TypedValueEvaluationResult,
 } from "./server/types/tracer";
 import {
   collectorRESTParamsSchema,
@@ -22,10 +30,15 @@ import {
   type PendingLLMSpan,
   type PendingRAGSpan,
   type RAGSpan,
+  type RESTEvaluation,
   type SpanInputOutput,
 } from "./types";
-import { autoconvertTypedValues, captureError, convertFromVercelAIMessages } from "./utils";
-import { LangWatchCallbackHandler } from "./langchain";
+import { camelToSnakeCaseNested, type Strict } from "./typeUtils";
+import {
+  autoconvertTypedValues,
+  captureError,
+  convertFromVercelAIMessages,
+} from "./utils";
 export type {
   BaseSpan,
@@ -40,7 +53,7 @@ export type {
   SpanInputOutput,
 };
-export { convertFromVercelAIMessages, captureError, autoconvertTypedValues };
+export { autoconvertTypedValues, captureError, convertFromVercelAIMessages };
 export class LangWatch extends EventEmitter {
   apiKey: string | undefined;
@@ -133,13 +146,35 @@ export class LangWatch extends EventEmitter {
   }
 }
+type CurrentSpan = {
+  current: LangWatchSpan;
+  previous?: CurrentSpan;
+};
+type AddEvaluationParams = {
+  evaluationId?: string;
+  span?: LangWatchSpan;
+  name: string;
+  type?: string;
+  isGuardrail?: boolean;
+  status?: "processed" | "skipped" | "error";
+  passed?: boolean;
+  score?: number;
+  label?: string;
+  details?: string;
+  error?: Error;
+  timestamps?: RESTEvaluation["timestamps"];
+};
 export class LangWatchTrace {
   client: LangWatch;
   traceId: string;
   metadata?: Metadata;
   finishedSpans: Record<string, ServerSpan> = {};
-  timeoutRef?: NodeJS.Timeout;
   langchainCallback?: LangWatchCallbackHandler;
+  evaluations: RESTEvaluation[] = [];
+  private currentSpan?: CurrentSpan;
+  private timeoutRef?: NodeJS.Timeout;
   constructor({
     client,
@@ -152,7 +187,11 @@ export class LangWatchTrace {
   }) {
     this.client = client;
     this.traceId = traceId;
-    this.metadata = metadata;
+    this.metadata = {
+      ...metadata,
+      sdkVersion: version,
+      sdkLanguage: "typescript",
+    };
   }
   update({ metadata }: { metadata: Metadata }) {
@@ -160,16 +199,37 @@ export class LangWatchTrace {
       ...this.metadata,
       ...metadata,
       ...(typeof metadata.labels !== "undefined"
-        ? { labels: [...(this.metadata?.labels ?? []), ...metadata.labels] }
+        ? {
+            labels: [
+              ...(this.metadata?.labels ?? []),
+              ...(metadata.labels ?? []),
+            ],
+          }
         : {}),
     };
   }
+  setCurrentSpan(span: LangWatchSpan) {
+    this.currentSpan = {
+      current: span,
+      previous: this.currentSpan,
+    };
+  }
+  getCurrentSpan() {
+    return this.currentSpan?.current;
+  }
+  resetCurrentSpan() {
+    this.currentSpan = this.currentSpan?.previous;
+  }
   startSpan(params: Omit<Partial<PendingBaseSpan>, "parentId">) {
     const span = new LangWatchSpan({
       trace: this,
       ...params,
     });
+    this.setCurrentSpan(span);
     return span;
   }
@@ -178,6 +238,7 @@ export class LangWatchTrace {
       trace: this,
       ...params,
     });
+    this.setCurrentSpan(span);
     return span;
   }
@@ -186,9 +247,103 @@ export class LangWatchTrace {
       trace: this,
       ...params,
     });
+    this.setCurrentSpan(span);
     return span;
   }
+  addEvaluation = ({
+    evaluationId,
+    span,
+    name,
+    type,
+    isGuardrail,
+    status = "processed",
+    passed,
+    score,
+    label,
+    details,
+    error,
+    timestamps,
+  }: AddEvaluationParams): void => {
+    const currentEvaluationIndex = this.evaluations.findIndex(
+      (e) =>
+        evaluationId && "evaluationId" in e && e.evaluationId === evaluationId
+    );
+    const currentEvaluation =
+      currentEvaluationIndex !== -1
+        ? this.evaluations[currentEvaluationIndex]
+        : undefined;
+    const evaluationResult: EvaluationResult = {
+      status,
+      ...(passed !== undefined && { passed }),
+      ...(score !== undefined && { score }),
+      ...(label !== undefined && { label }),
+      ...(details !== undefined && { details }),
+    };
+    let span_ = span;
+    if (!span_) {
+      span_ = this.startSpan({
+        type: "evaluation",
+      });
+    }
+    if (span_.type !== "evaluation") {
+      span_ = span_.startSpan({ type: "evaluation" });
+    }
+    span_.update({
+      name,
+      output: {
+        type: "evaluation_result",
+        value: evaluationResult,
+      } as TypedValueEvaluationResult,
+      error,
+      timestamps: timestamps
+        ? {
+            startedAt: timestamps.startedAt ?? span_.timestamps.startedAt,
+            finishedAt: timestamps.finishedAt ?? undefined,
+          }
+        : undefined,
+    });
+    span_.end();
+    const evaluation: RESTEvaluation = {
+      evaluationId: evaluationId ?? `eval_${nanoid()}`,
+      spanId: span_.spanId,
+      name,
+      type,
+      isGuardrail,
+      status,
+      passed,
+      score,
+      label,
+      details,
+      error: error ? captureError(error) : undefined,
+      timestamps: timestamps ?? {
+        startedAt: span_.timestamps.startedAt,
+        finishedAt: span_.timestamps.finishedAt,
+      },
+    };
+    if (currentEvaluation && currentEvaluationIndex !== -1) {
+      this.evaluations[currentEvaluationIndex] = {
+        ...currentEvaluation,
+        ...evaluation,
+      };
+    } else {
+      this.evaluations.push(evaluation);
+    }
+  };
+  async evaluate(params: EvaluationParams): Promise<EvaluationResultModel> {
+    return evaluate({
+      trace: this,
+      ...params,
+    });
+  }
   getLangChainCallback() {
     if (!this.langchainCallback) {
       this.langchainCallback = new LangWatchCallbackHandler({ trace: this });
@@ -198,6 +353,7 @@ export class LangWatchTrace {
   onEnd(span: ServerSpan) {
     this.finishedSpans[span.span_id] = span;
+    this.resetCurrentSpan();
     this.delayedSendSpans();
   }
@@ -215,8 +371,9 @@ export class LangWatchTrace {
     try {
       trace = collectorRESTParamsSchema.parse({
         trace_id: this.traceId,
-        metadata: camelToSnakeCaseNested(this.metadata),
+        metadata: camelToSnakeCaseNested(this.metadata, "metadata"),
         spans: Object.values(this.finishedSpans),
+        evaluations: camelToSnakeCaseNested(this.evaluations),
       } as Strict<CollectorRESTParams>);
     } catch (error) {
       if (error instanceof ZodError) {
@@ -309,6 +466,7 @@ export class LangWatchSpan implements PendingBaseSpan {
       parentId: this.spanId,
       ...params,
     });
+    this.trace.setCurrentSpan(span);
     return span;
   }
@@ -318,6 +476,7 @@ export class LangWatchSpan implements PendingBaseSpan {
       parentId: this.spanId,
       ...params,
     });
+    this.trace.setCurrentSpan(span);
     return span;
   }
@@ -327,9 +486,24 @@ export class LangWatchSpan implements PendingBaseSpan {
       parentId: this.spanId,
       ...params,
     });
+    this.trace.setCurrentSpan(span);
     return span;
   }
+  addEvaluation(params: AddEvaluationParams) {
+    this.trace.addEvaluation({
+      ...params,
+      span: this,
+    });
+  }
+  async evaluate(params: EvaluationParams): Promise<EvaluationResultModel> {
+    return evaluate({
+      span: this,
+      ...params,
+    });
+  }
   end(params?: Partial<Omit<PendingBaseSpan, "spanId" | "parentId">>) {
     this.timestamps.finishedAt = Date.now();
     if (params) {

package/src/typeUtils.ts CHANGED Viewed

@@ -1,3 +1,8 @@
+import {
+  reservedSpanParamsSchema,
+  reservedTraceMetadataSchema
+} from "./server/types/tracer.generated";
 export type Strict<T> = T & { [K in Exclude<keyof any, keyof T>]: never };
 type SnakeToCamelCase<S extends string> = S extends `${infer T}_${infer U}`
@@ -44,7 +49,10 @@ function camelToSnakeCase(str: string): string {
   return str.replace(/[A-Z]/g, (letter) => `_${letter.toLowerCase()}`);
 }
-export function camelToSnakeCaseNested<T>(obj: T): CamelToSnakeCaseNested<T> {
+export function camelToSnakeCaseNested<T>(
+  obj: T,
+  parentKey?: string
+): CamelToSnakeCaseNested<T> {
   if (Array.isArray(obj)) {
     return obj.map((item) =>
       camelToSnakeCaseNested(item)
@@ -54,7 +62,17 @@ export function camelToSnakeCaseNested<T>(obj: T): CamelToSnakeCaseNested<T> {
     for (const key in obj) {
       if (obj.hasOwnProperty(key)) {
         const newKey = camelToSnakeCase(key);
-        newObj[newKey] = camelToSnakeCaseNested((obj as any)[key]);
+        // Keep arbitrary keys the same
+        if (
+          (parentKey === "metadata" &&
+            !Object.keys(reservedTraceMetadataSchema.shape).includes(newKey)) ||
+          (parentKey === "params" &&
+            !Object.keys(reservedSpanParamsSchema.shape).includes(newKey))
+        ) {
+          newObj[key] = (obj as any)[key];
+        } else {
+          newObj[newKey] = camelToSnakeCaseNested((obj as any)[key], newKey);
+        }
       }
     }
     return newObj as CamelToSnakeCaseNested<T>;

package/src/types.ts CHANGED Viewed

@@ -1,4 +1,3 @@
-import type modelPrices from "llm-cost/model_prices_and_context_window.json";
 import type { OpenAI } from "openai";
 import { type SnakeToCamelCaseNested } from "./typeUtils";
 import {
@@ -10,6 +9,7 @@ import {
   type SpanInputOutput as ServerSpanInputOutput,
   type TypedValueChatMessages,
   type Trace,
+  type RESTEvaluation as ServerRESTEvaluation,
 } from "./server/types/tracer";
 export type Metadata = SnakeToCamelCaseNested<Trace["metadata"]>;
@@ -63,8 +63,12 @@ export type PendingBaseSpan = PendingSpan<BaseSpan>;
 // vendor is deprecated, and we try to force the available models here
 export type LLMSpan = ConvertServerSpan<
   Omit<ServerLLMSpan, "vendor" | "model">
-> & { model: keyof typeof modelPrices | (string & NonNullable<unknown>) };
+> & { model: string };
 export type PendingLLMSpan = PendingSpan<LLMSpan>;
 export type RAGSpan = ConvertServerSpan<ServerRAGSpan>;
 export type PendingRAGSpan = PendingSpan<RAGSpan>;
+export type RESTEvaluation = SnakeToCamelCaseNested<
+  Omit<ServerRESTEvaluation, "error">
+> & { error?: ServerRESTEvaluation["error"] };

package/src/utils.ts CHANGED Viewed

@@ -1,13 +1,9 @@
 import { convertUint8ArrayToBase64 } from "@ai-sdk/provider-utils";
-import { type ImagePart, type CoreMessage } from "ai";
-import { type ChatMessage, type SpanInputOutput } from "./types";
-import { type ErrorCapture } from "./server/types/tracer";
-import {
-  chatMessageSchema,
-  spanInputOutputSchema,
-  typedValueChatMessagesSchema,
-} from "./server/types/tracer.generated";
+import { type CoreMessage, type ImagePart } from "ai";
 import { z } from "zod";
+import { type ErrorCapture } from "./server/types/tracer";
+import { chatMessageSchema } from "./server/types/tracer.generated";
+import { type ChatMessage, type SpanInputOutput } from "./types";
 const convertImageToUrl = (
   image: ImagePart["image"],

package/ts-to-zod.config.js CHANGED Viewed

@@ -14,5 +14,7 @@ module.exports = {
       "TraceCheckJob",
       "AnalyticsMetric",
       "NewDatasetEntries",
+      "EvaluationRESTParams",
+      "EvaluationRESTResult",
     ].includes(name),
 };