npm - @arizeai/phoenix-evals - Versions diffs - 1.0.0 → 1.0.2 - Mend

@arizeai/phoenix-evals 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/README.md +45 -14
package/dist/esm/helpers/createEvaluator.d.ts.map +1 -1
package/dist/esm/helpers/createEvaluator.js +2 -1
package/dist/esm/helpers/createEvaluator.js.map +1 -1
package/dist/esm/llm/ClassificationEvaluator.d.ts +1 -1
package/dist/esm/llm/ClassificationEvaluator.d.ts.map +1 -1
package/dist/esm/telemetry/index.d.ts +10 -1
package/dist/esm/telemetry/index.d.ts.map +1 -1
package/dist/esm/telemetry/index.js +20 -1
package/dist/esm/telemetry/index.js.map +1 -1
package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
package/dist/src/helpers/createEvaluator.d.ts.map +1 -1
package/dist/src/helpers/createEvaluator.js +3 -1
package/dist/src/helpers/createEvaluator.js.map +1 -1
package/dist/src/telemetry/index.d.ts +10 -1
package/dist/src/telemetry/index.d.ts.map +1 -1
package/dist/src/telemetry/index.js +21 -1
package/dist/src/telemetry/index.js.map +1 -1
package/dist/tsconfig.tsbuildinfo +1 -1
package/docs/classification.mdx +40 -0
package/docs/create-evaluator.mdx +86 -0
package/docs/llm-evaluators.mdx +66 -0
package/docs/overview.mdx +90 -0
package/docs/phoenix-integration.mdx +80 -0
package/docs/templates.mdx +52 -0
package/package.json +6 -2
package/src/helpers/createEvaluator.ts +2 -1
package/src/telemetry/index.ts +23 -2

package/docs/classification.mdx ADDED Viewed

@@ -0,0 +1,40 @@
+---
+title: "Classification"
+description: "Classification helpers in @arizeai/phoenix-evals"
+---
+Use the classification helpers when you want an LLM to choose from a fixed set of labels and return a structured explanation.
+## Create A Classifier Function
+```ts
+import { openai } from "@ai-sdk/openai";
+import { createClassifierFn } from "@arizeai/phoenix-evals";
+const classify = createClassifierFn({
+  model: openai("gpt-4o-mini"),
+  choices: { relevant: 1, irrelevant: 0 },
+  promptTemplate:
+    "Question: {{input}}\nContext: {{context}}\nAnswer: {{output}}\nLabel as relevant or irrelevant.",
+});
+const result = await classify({
+  input: "What is Phoenix?",
+  context: "Phoenix is an AI observability platform.",
+  output: "Phoenix helps teams inspect traces and experiments.",
+});
+```
+## Lower-Level API
+Use `generateClassification` directly when you already have a rendered prompt and only need structured label generation.
+<section className="hidden" data-agent-context="source-map" aria-label="Source map">
+  <h2>Source Map</h2>
+  <ul>
+    <li><code>src/llm/createClassifierFn.ts</code></li>
+    <li><code>src/llm/createClassificationEvaluator.ts</code></li>
+    <li><code>src/llm/generateClassification.ts</code></li>
+    <li><code>src/types/evals.ts</code></li>
+  </ul>
+</section>

package/docs/create-evaluator.mdx ADDED Viewed

@@ -0,0 +1,86 @@
+---
+title: "Create Evaluator"
+description: "Build custom evaluators with @arizeai/phoenix-evals"
+---
+Use `createEvaluator` when your evaluation logic is plain TypeScript and you want a reusable evaluator object with consistent metadata and telemetry.
+<section className="hidden" data-agent-context="relevant-source-files" aria-label="Relevant source files">
+  <h2>Relevant Source Files</h2>
+  <ul>
+    <li><code>src/helpers/createEvaluator.ts</code></li>
+  </ul>
+</section>
+## Example
+```ts
+import { createEvaluator } from "@arizeai/phoenix-evals";
+const exactMatch = createEvaluator(
+  ({ output, expected }) => ({
+    score: output === expected ? 1 : 0,
+    label: output === expected ? "match" : "mismatch",
+  }),
+  {
+    name: "exact-match",
+    kind: "CODE",
+  }
+);
+const result = await exactMatch.evaluate({
+  output: "Paris",
+  expected: "Paris",
+});
+```
+## What You Get
+- an evaluator name
+- evaluator kind such as `CODE` or `LLM`
+- optimization direction metadata
+- optional OpenTelemetry spans around execution
+## When To Use Code Evaluators
+Code evaluators are the right fit when the scoring logic should stay deterministic, cheap, and fully under your control:
+- regex or exact-match checks
+- JSON structure validation
+- latency and cost thresholds
+- post-processing checks on existing model output
+```ts
+import { createEvaluator } from "@arizeai/phoenix-evals";
+const lengthCheck = createEvaluator(
+  ({ output }) => {
+    const score = typeof output === "string" && output.length < 280 ? 1 : 0;
+    return {
+      score,
+      label: score ? "fits-limit" : "too-long",
+    };
+  },
+  {
+    name: "response-length",
+    kind: "CODE",
+  }
+);
+```
+## Related Helpers
+- `asEvaluatorFn`
+- `toEvaluationResult`
+<section className="hidden" data-agent-context="source-map" aria-label="Source map">
+  <h2>Source Map</h2>
+  <ul>
+    <li><code>src/helpers/createEvaluator.ts</code></li>
+    <li><code>src/helpers/asEvaluatorFn.ts</code></li>
+    <li><code>src/helpers/toEvaluationResult.ts</code></li>
+    <li><code>src/core/FunctionEvaluator.ts</code></li>
+    <li><code>src/core/EvaluatorBase.ts</code></li>
+    <li><code>src/types/evals.ts</code></li>
+  </ul>
+</section>

package/docs/llm-evaluators.mdx ADDED Viewed

@@ -0,0 +1,66 @@
+---
+title: "LLM Evaluators"
+description: "Use LLM-backed evaluators in @arizeai/phoenix-evals"
+---
+The `llm` entrypoint provides reusable evaluator factories that call an AI SDK model and return structured evaluation results.
+<section className="hidden" data-agent-context="relevant-source-files" aria-label="Relevant source files">
+  <h2>Relevant Source Files</h2>
+  <ul>
+    <li><code>src/llm/index.ts</code></li>
+  </ul>
+</section>
+## Example
+```ts
+import { openai } from "@ai-sdk/openai";
+import { createFaithfulnessEvaluator } from "@arizeai/phoenix-evals";
+const faithfulness = createFaithfulnessEvaluator({
+  model: openai("gpt-4o-mini"),
+});
+const result = await faithfulness.evaluate({
+  input: "What is the capital of France?",
+  context: "France is a country in Europe. Paris is its capital city.",
+  output: "The capital of France is Paris.",
+});
+```
+## Built-In Evaluator Factories
+- `createConcisenessEvaluator`
+- `createCorrectnessEvaluator`
+- `createDocumentRelevanceEvaluator`
+- `createFaithfulnessEvaluator`
+- `createRefusalEvaluator`
+- `createClassificationEvaluator`
+- `createToolSelectionEvaluator`
+- `createToolInvocationEvaluator`
+- `createToolResponseHandlingEvaluator`
+```ts
+import { openai } from "@ai-sdk/openai";
+import {
+  createCorrectnessEvaluator,
+  createRefusalEvaluator,
+} from "@arizeai/phoenix-evals";
+const model = openai("gpt-4o-mini");
+const correctness = createCorrectnessEvaluator({ model });
+const refusal = createRefusalEvaluator({ model });
+```
+<section className="hidden" data-agent-context="source-map" aria-label="Source map">
+  <h2>Source Map</h2>
+  <ul>
+    <li><code>src/llm/createClassificationEvaluator.ts</code></li>
+    <li><code>src/llm/ClassificationEvaluator.ts</code></li>
+    <li><code>src/llm/LLMEvaluator.ts</code></li>
+    <li><code>src/llm/createFaithfulnessEvaluator.ts</code></li>
+    <li><code>src/types/evals.ts</code></li>
+  </ul>
+</section>

package/docs/overview.mdx ADDED Viewed

@@ -0,0 +1,90 @@
+---
+title: "Overview"
+description: "Bundled docs for @arizeai/phoenix-evals"
+---
+`@arizeai/phoenix-evals` provides evaluator building blocks for TypeScript workflows. It includes LLM-based evaluators, code-based evaluators, prompt templating helpers, and compatibility points for Phoenix experiments.
+## Install
+`@arizeai/phoenix-evals` depends on model adapters from the AI SDK ecosystem. Install the package plus at least one provider adapter for the models you plan to use.
+```bash
+npm install @arizeai/phoenix-evals
+```
+### Common Setups
+```bash
+npm install @arizeai/phoenix-evals @ai-sdk/openai
+```
+```bash
+npm install @arizeai/phoenix-evals @ai-sdk/google
+```
+You can also pair it with Phoenix experiments:
+```bash
+npm install @arizeai/phoenix-evals @arizeai/phoenix-client @ai-sdk/openai
+```
+### Runtime Expectations
+- Node.js 18+
+- an AI SDK provider package such as `@ai-sdk/openai`
+- credentials required by your chosen provider
+## Minimal Example
+```ts
+import { openai } from "@ai-sdk/openai";
+import { createFaithfulnessEvaluator } from "@arizeai/phoenix-evals";
+const faithfulness = createFaithfulnessEvaluator({
+  model: openai("gpt-4o-mini"),
+});
+const result = await faithfulness.evaluate({
+  input: "What is Phoenix?",
+  context: "Phoenix is an open-source AI observability platform from Arize.",
+  output: "Phoenix is an open-source AI observability platform from Arize.",
+});
+```
+## Docs And Source In `node_modules`
+After install, a coding agent can inspect the installed package directly:
+```text
+node_modules/@arizeai/phoenix-evals/docs/
+node_modules/@arizeai/phoenix-evals/src/
+```
+The bundled docs cover evaluator creation, LLM evaluators, templates, classification, and Phoenix integration.
+## Where To Start
+- [Create evaluator](./create-evaluator) for custom and code-based evaluator flows
+- [LLM evaluators](./llm-evaluators) and [Classification](./classification) for model-backed evaluation
+- [Templates](./templates) and [Phoenix integration](./phoenix-integration) for prompt helpers and experiment wiring
+## Source Layout
+- `src/index.ts` re-exports the package surface you usually import from `@arizeai/phoenix-evals`
+- `src/llm/` contains classification helpers and built-in LLM evaluator factories
+- `src/helpers/` contains `createEvaluator` and evaluation-result helpers
+- `src/template/` contains `formatTemplate` and `getTemplateVariables`
+- `src/types/` contains shared evaluator and prompt types
+<section className="hidden" data-agent-context="source-map" aria-label="Source map">
+  <h2>Source Map</h2>
+  <ul>
+    <li><code>src/index.ts</code></li>
+    <li><code>src/llm/</code></li>
+    <li><code>src/helpers/</code></li>
+    <li><code>src/template/</code></li>
+    <li><code>src/core/</code></li>
+    <li><code>src/types/</code></li>
+  </ul>
+</section>

package/docs/phoenix-integration.mdx ADDED Viewed

@@ -0,0 +1,80 @@
+---
+title: "Phoenix Integration"
+description: "Use @arizeai/phoenix-evals with Phoenix experiments"
+---
+`@arizeai/phoenix-evals` pairs with `@arizeai/phoenix-client` when you want to run evaluator-backed experiments and store both task traces and evaluation results in Phoenix.
+<section className="hidden" data-agent-context="relevant-source-files" aria-label="Relevant source files">
+  <h2>Relevant Source Files</h2>
+  <ul>
+    <li><code>src/index.ts</code> for the root evaluator exports</li>
+    <li><code>companion package: @arizeai/phoenix-client/datasets</code></li>
+    <li><code>companion package: @arizeai/phoenix-client/experiments</code></li>
+  </ul>
+</section>
+## Example
+```ts
+import { openai } from "@ai-sdk/openai";
+import { createFaithfulnessEvaluator } from "@arizeai/phoenix-evals";
+import { createOrGetDataset } from "@arizeai/phoenix-client/datasets";
+import {
+  asExperimentEvaluator,
+  runExperiment,
+} from "@arizeai/phoenix-client/experiments";
+await createOrGetDataset({
+  name: "support-eval",
+  examples: [
+    {
+      input: {
+        question: "Is Phoenix open source?",
+        context: "Phoenix is open source.",
+      },
+      output: {
+        answer: "Phoenix is open source.",
+      },
+    },
+  ],
+});
+const faithfulness = createFaithfulnessEvaluator({
+  model: openai("gpt-4o-mini"),
+});
+await runExperiment({
+  dataset: { datasetName: "support-eval" },
+  task: async ({ question, context }) =>
+    `${question} Answer using only this context: ${context}`,
+  evaluators: [
+    asExperimentEvaluator({
+      name: "faithfulness",
+      kind: "LLM",
+      evaluate: async ({ input, output }) =>
+        faithfulness.evaluate({
+          input: String(input.question ?? ""),
+          context: String(input.context ?? ""),
+          output: String(output ?? ""),
+        }),
+    }),
+  ],
+});
+```
+## What Each Package Does
+- `@arizeai/phoenix-evals` builds evaluator logic
+- `@arizeai/phoenix-client` handles experiment execution and persistence
+- combined usage produces evaluator traces and experiment results in Phoenix
+<section className="hidden" data-agent-context="source-map" aria-label="Source map">
+  <h2>Source Map</h2>
+  <ul>
+    <li><code>src/index.ts</code></li>
+    <li><code>src/llm/</code></li>
+    <li><code>companion package: @arizeai/phoenix-client/datasets</code></li>
+    <li><code>companion package: @arizeai/phoenix-client/experiments</code></li>
+  </ul>
+</section>

package/docs/templates.mdx ADDED Viewed

@@ -0,0 +1,52 @@
+---
+title: "Templates"
+description: "Template helpers in @arizeai/phoenix-evals"
+---
+The template helpers make it easier to manage Mustache-style prompt templates separately from evaluator execution.
+<section className="hidden" data-agent-context="relevant-source-files" aria-label="Relevant source files">
+  <h2>Relevant Source Files</h2>
+  <ul>
+    <li><code>src/template/applyTemplate.ts</code></li>
+    <li><code>src/template/getTemplateVariables.ts</code></li>
+  </ul>
+</section>
+## Render A Template
+```ts
+import { formatTemplate } from "@arizeai/phoenix-evals";
+const prompt = formatTemplate({
+  template: [
+    {
+      role: "user",
+      content: "Rate the answer to {{question}}",
+    },
+  ],
+  variables: {
+    question: "What is retrieval-augmented generation?",
+  },
+});
+```
+## Discover Variables
+```ts
+import { getTemplateVariables } from "@arizeai/phoenix-evals";
+const variables = getTemplateVariables({
+  template: "Answer {{question}} using {{context}}",
+});
+```
+<section className="hidden" data-agent-context="source-map" aria-label="Source map">
+  <h2>Source Map</h2>
+  <ul>
+    <li><code>src/template/applyTemplate.ts</code></li>
+    <li><code>src/template/getTemplateVariables.ts</code></li>
+    <li><code>src/template/createTemplateVariablesProxy.ts</code></li>
+    <li><code>src/types/templating.ts</code></li>
+  </ul>
+</section>

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@arizeai/phoenix-evals",
-  "version": "1.0.0",
+  "version": "1.0.2",
   "description": "A library for running evaluations for AI use cases",
   "keywords": [
     "arize",
@@ -19,8 +19,12 @@
     "type": "git",
     "url": "git+https://github.com/Arize-ai/phoenix.git"
   },
+  "directories": {
+    "doc": "./docs"
+  },
   "files": [
     "dist",
+    "docs",
     "src",
     "package.json"
   ],
@@ -72,7 +76,7 @@
     "nock": "^14.0.5",
     "tsx": "^4.19.3",
     "typedoc": "^0.28.17",
-    "vitest": "^4.0.10"
+    "vitest": "^4.1.0"
   },
   "engines": {
     "node": ">=18"

package/src/helpers/createEvaluator.ts CHANGED Viewed

@@ -2,6 +2,7 @@ import { withSpan } from "@arizeai/openinference-core";
 import type { EvaluatorBase } from "../core/EvaluatorBase";
 import { FunctionEvaluator } from "../core/FunctionEvaluator";
+import { tracer as defaultTracer } from "../telemetry";
 import type {
   EvaluationKind,
   OptimizationDirection,
@@ -168,7 +169,7 @@ export function createEvaluator<
   // Add OpenTelemetry span wrapping if telemetry is enabled
   if (telemetry && telemetry.isEnabled) {
     evaluateFn = withSpan(evaluateFn, {
-      tracer: telemetry.tracer,
+      tracer: telemetry.tracer ?? defaultTracer,
       name: evaluatorName,
       kind: "EVALUATOR",
     });

package/src/telemetry/index.ts CHANGED Viewed

@@ -1,3 +1,24 @@
-import { trace } from "@opentelemetry/api";
+import { trace, type Tracer } from "@opentelemetry/api";
-export const tracer = trace.getTracer("phoenix-evals");
+const DEFAULT_TRACER_NAME = "phoenix-evals";
+/**
+ * Returns a lazy tracer that resolves from `trace.getTracer()` on every call,
+ * so evaluator spans follow whichever provider is currently mounted as global.
+ *
+ * Cast to `Tracer` is necessary because `startActiveSpan` has multiple
+ * overload signatures that cannot be satisfied by a single implementation.
+ */
+export function getTracer(name: string = DEFAULT_TRACER_NAME): Tracer {
+  return {
+    startSpan(spanName, options, context) {
+      return trace.getTracer(name).startSpan(spanName, options, context);
+    },
+    startActiveSpan(...args: unknown[]) {
+      const tracer = trace.getTracer(name);
+      return Reflect.apply(tracer.startActiveSpan, tracer, args);
+    },
+  } as Tracer;
+}
+export const tracer = getTracer();