npm - @fallom/trace - Versions diffs - 0.2.21 → 0.2.23 - Mend

@fallom/trace 0.2.21 → 0.2.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/chunk-KFD5AQ7V.mjs +308 -0
package/dist/{chunk-GZ6TE7G4.mjs → chunk-NNVWIZN5.mjs} +101 -10
package/dist/{core-DUG2SP2V.mjs → core-3MHBKYBC.mjs} +1 -1
package/dist/index.d.mts +46 -14
package/dist/index.d.ts +46 -14
package/dist/index.js +108 -9
package/dist/index.mjs +6 -2
package/dist/models-SEFDGZU2.mjs +8 -0
package/package.json +1 -1
package/dist/chunk-XBZ3ESNV.mjs +0 -824
package/dist/core-JLHYFVYS.mjs +0 -21

package/dist/index.d.ts CHANGED Viewed

@@ -568,22 +568,36 @@ interface GEvalScore {
     score: number;
     reasoning: string;
 }
+/**
+ * Options for runGEval function.
+ */
+interface RunGEvalOptions {
+    /** Built-in metric name or custom metric config */
+    metric: string | {
+        name: string;
+        criteria: string;
+        steps: string[];
+    };
+    /** The user's input/query */
+    inputText: string;
+    /** The LLM's response to evaluate */
+    outputText: string;
+    /** Optional system message for context */
+    systemMessage?: string;
+    /** The model to use as judge (OpenRouter format, e.g., "openai/gpt-4o-mini") */
+    judgeModel: string;
+    /** OpenRouter API key (defaults to OPENROUTER_API_KEY env var) */
+    openrouterKey?: string;
+    /** Optional Fallom API key to enable tracing of the judge LLM call */
+    fallomApiKey?: string;
+}
 /**
  * Run G-Eval for a single metric using OpenRouter.
  * This is the low-level function used by both the SDK and backend workers.
  *
- * @param metric - Built-in metric name or custom metric config
- * @param inputText - The user's input/query
- * @param outputText - The LLM's response
- * @param systemMessage - Optional system message
- * @param judgeModel - The model to use as judge (OpenRouter format)
- * @param openrouterKey - OpenRouter API key (defaults to env var)
+ * If `fallomApiKey` is provided, the judge LLM call will be traced to Fallom.
  */
-declare function runGEval(metric: string | {
-    name: string;
-    criteria: string;
-    steps: string[];
-}, inputText: string, outputText: string, systemMessage: string | undefined, judgeModel: string, openrouterKey?: string): Promise<GEvalScore>;
+declare function runGEval(options: RunGEvalOptions): Promise<GEvalScore>;
 /**
  * Calculate aggregate scores from a list of results.
  */
@@ -614,12 +628,22 @@ declare function detectRegression(currentScores: Record<string, {
 };
 /**
- * Core evaluation functions.
+ * Core evaluation functions for Fallom Evals.
+ *
+ * Provides the main API for running LLM evaluations using G-Eval methodology.
  */
+/** Default judge model (via OpenRouter) */
 declare const DEFAULT_JUDGE_MODEL = "openai/gpt-4o-mini";
 /**
  * Initialize Fallom evals.
+ *
+ * @example
+ * ```typescript
+ * import fallom from "@fallom/trace";
+ *
+ * fallom.evals.init({ apiKey: "your-api-key" });
+ * ```
  */
 declare function init$1(options?: InitOptions$1): void;
 /**
@@ -627,6 +651,13 @@ declare function init$1(options?: InitOptions$1): void;
  *
  * Results are automatically uploaded to Fallom dashboard.
  *
+ * @example
+ * ```typescript
+ * const results = await fallom.evals.evaluate({
+ *   dataset: [{ input: "What is 2+2?", output: "4" }],
+ *   metrics: ["answer_relevancy", "faithfulness"],
+ * });
+ * ```
  */
 declare function evaluate(options: EvaluateOptions): Promise<EvalResult[]>;
 /**
@@ -848,6 +879,7 @@ type evals_MetricName = MetricName;
 type evals_Model = Model;
 type evals_ModelCallable = ModelCallable;
 type evals_ModelResponse = ModelResponse;
+type evals_RunGEvalOptions = RunGEvalOptions;
 declare const evals_buildGEvalPrompt: typeof buildGEvalPrompt;
 declare const evals_calculateAggregateScores: typeof calculateAggregateScores;
 declare const evals_compareModels: typeof compareModels;
@@ -863,7 +895,7 @@ declare const evals_getMetricName: typeof getMetricName;
 declare const evals_isCustomMetric: typeof isCustomMetric;
 declare const evals_runGEval: typeof runGEval;
 declare namespace evals {
-  export { evals_AVAILABLE_METRICS as AVAILABLE_METRICS, type evals_CompareModelsOptions as CompareModelsOptions, type evals_CustomMetric as CustomMetric, evals_DEFAULT_JUDGE_MODEL as DEFAULT_JUDGE_MODEL, type evals_DatasetInput as DatasetInput, type evals_DatasetItem as DatasetItem, type evals_EvalResult as EvalResult, type evals_EvaluateOptions as EvaluateOptions, evals_EvaluationDataset as EvaluationDataset, type evals_GEvalScore as GEvalScore, type evals_Golden as Golden, type InitOptions$1 as InitOptions, type evals_LLMTestCase as LLMTestCase, evals_METRIC_PROMPTS as METRIC_PROMPTS, type evals_Message as Message, type evals_MetricInput as MetricInput, type evals_MetricName as MetricName, type evals_Model as Model, type evals_ModelCallable as ModelCallable, type evals_ModelResponse as ModelResponse, evals_buildGEvalPrompt as buildGEvalPrompt, evals_calculateAggregateScores as calculateAggregateScores, evals_compareModels as compareModels, evals_createCustomModel as createCustomModel, evals_createModelFromCallable as createModelFromCallable, evals_createOpenAIModel as createOpenAIModel, evals_customMetric as customMetric, evals_datasetFromFallom as datasetFromFallom, evals_datasetFromTraces as datasetFromTraces, evals_detectRegression as detectRegression, evals_evaluate as evaluate, evals_getMetricName as getMetricName, init$1 as init, evals_isCustomMetric as isCustomMetric, evals_runGEval as runGEval, uploadResultsPublic as uploadResults };
+  export { evals_AVAILABLE_METRICS as AVAILABLE_METRICS, type evals_CompareModelsOptions as CompareModelsOptions, type evals_CustomMetric as CustomMetric, evals_DEFAULT_JUDGE_MODEL as DEFAULT_JUDGE_MODEL, type evals_DatasetInput as DatasetInput, type evals_DatasetItem as DatasetItem, type evals_EvalResult as EvalResult, type evals_EvaluateOptions as EvaluateOptions, evals_EvaluationDataset as EvaluationDataset, type evals_GEvalScore as GEvalScore, type evals_Golden as Golden, type InitOptions$1 as InitOptions, type evals_LLMTestCase as LLMTestCase, evals_METRIC_PROMPTS as METRIC_PROMPTS, type evals_Message as Message, type evals_MetricInput as MetricInput, type evals_MetricName as MetricName, type evals_Model as Model, type evals_ModelCallable as ModelCallable, type evals_ModelResponse as ModelResponse, type evals_RunGEvalOptions as RunGEvalOptions, evals_buildGEvalPrompt as buildGEvalPrompt, evals_calculateAggregateScores as calculateAggregateScores, evals_compareModels as compareModels, evals_createCustomModel as createCustomModel, evals_createModelFromCallable as createModelFromCallable, evals_createOpenAIModel as createOpenAIModel, evals_customMetric as customMetric, evals_datasetFromFallom as datasetFromFallom, evals_datasetFromTraces as datasetFromTraces, evals_detectRegression as detectRegression, evals_evaluate as evaluate, evals_getMetricName as getMetricName, init$1 as init, evals_isCustomMetric as isCustomMetric, evals_runGEval as runGEval, uploadResultsPublic as uploadResults };
 }
 /**
@@ -1072,4 +1104,4 @@ declare const _default: {
     session: typeof session;
 };
-export { type CompareModelsOptions, type DatasetItem, type EvalResult, type EvaluateOptions, FallomExporter, type FallomExporterOptions, FallomSession, type InitOptions, type MetricName, type PromptResult, type SessionContext, type SessionOptions, clearMastraPrompt, _default as default, evals, init, models, prompts, session, setMastraPrompt, setMastraPromptAB, trace };
+export { type CompareModelsOptions, type DatasetItem, type EvalResult, type EvaluateOptions, FallomExporter, type FallomExporterOptions, FallomSession, type GEvalScore, type InitOptions, type MetricName, type PromptResult, type SessionContext, type SessionOptions, buildGEvalPrompt, calculateAggregateScores, clearMastraPrompt, _default as default, detectRegression, evals, init, models, prompts, runGEval, session, setMastraPrompt, setMastraPromptAB, trace };

package/dist/index.js CHANGED Viewed

@@ -378,7 +378,16 @@ Respond in JSON format:
   "score": 0.85
 }`;
 }
-async function runGEval(metric, inputText, outputText, systemMessage, judgeModel, openrouterKey) {
+async function runGEval(options) {
+  const {
+    metric,
+    inputText,
+    outputText,
+    systemMessage,
+    judgeModel,
+    openrouterKey,
+    fallomApiKey
+  } = options;
   const apiKey4 = openrouterKey || process.env.OPENROUTER_API_KEY;
   if (!apiKey4) {
     throw new Error(
@@ -389,6 +398,7 @@ async function runGEval(metric, inputText, outputText, systemMessage, judgeModel
   if (!config) {
     throw new Error(`Unknown metric: ${metric}`);
   }
+  const metricName = typeof metric === "object" ? metric.name : metric;
   const prompt = buildGEvalPrompt(
     config.criteria,
     config.steps,
@@ -396,6 +406,7 @@ async function runGEval(metric, inputText, outputText, systemMessage, judgeModel
     inputText,
     outputText
   );
+  const startTime = Date.now();
   const response = await fetch(
     "https://openrouter.ai/api/v1/chat/completions",
     {
@@ -416,17 +427,89 @@ async function runGEval(metric, inputText, outputText, systemMessage, judgeModel
     throw new Error(`G-Eval API error: ${response.statusText}`);
   }
   const data = await response.json();
+  const endTime = Date.now();
   try {
     const result = JSON.parse(data.choices[0].message.content);
-    return {
-      score: Math.max(0, Math.min(1, result.score)),
-      // Clamp to 0-1
-      reasoning: result.overall_reasoning || ""
-    };
+    const score = Math.max(0, Math.min(1, result.score));
+    const reasoning = result.overall_reasoning || "";
+    if (fallomApiKey) {
+      sendGEvalTrace({
+        fallomApiKey,
+        metricName,
+        judgeModel,
+        prompt,
+        response: data.choices[0].message.content,
+        score,
+        reasoning,
+        startTime,
+        endTime,
+        usage: data.usage
+      }).catch(() => {
+      });
+    }
+    return { score, reasoning };
   } catch {
     throw new Error("Failed to parse G-Eval response");
   }
 }
+async function sendGEvalTrace(options) {
+  const {
+    fallomApiKey,
+    metricName,
+    judgeModel,
+    prompt,
+    response,
+    score,
+    reasoning,
+    startTime,
+    endTime,
+    usage
+  } = options;
+  const traceUrl = process.env.FALLOM_TRACES_URL || "https://traces.fallom.com";
+  const traceData = {
+    config_key: "eval-worker",
+    session_id: `geval-${Date.now()}`,
+    trace_id: generateHexId2(32),
+    span_id: generateHexId2(16),
+    name: `geval.${metricName}`,
+    kind: "llm",
+    model: judgeModel,
+    start_time: new Date(startTime).toISOString(),
+    end_time: new Date(endTime).toISOString(),
+    duration_ms: endTime - startTime,
+    status: "OK",
+    metadata: {
+      metric: metricName,
+      score
+    },
+    tags: ["eval-worker", "geval", metricName],
+    attributes: {
+      "fallom.sdk_version": "2",
+      "fallom.method": "runGEval",
+      "geval.metric": metricName,
+      "geval.score": score,
+      "geval.reasoning": reasoning,
+      "gen_ai.prompt.0.role": "user",
+      "gen_ai.prompt.0.content": prompt,
+      "gen_ai.completion.0.content": response,
+      "gen_ai.usage.prompt_tokens": usage?.prompt_tokens,
+      "gen_ai.usage.completion_tokens": usage?.completion_tokens
+    }
+  };
+  await fetch(`${traceUrl}/v1/traces`, {
+    method: "POST",
+    headers: {
+      Authorization: `Bearer ${fallomApiKey}`,
+      "Content-Type": "application/json"
+    },
+    body: JSON.stringify(traceData)
+  });
+}
+function generateHexId2(length) {
+  const bytes = new Uint8Array(length / 2);
+  crypto.getRandomValues(bytes);
+  return Array.from(bytes).map((b) => b.toString(16).padStart(2, "0")).join("");
+}
 function calculateAggregateScores(results) {
   const aggregates = {};
   for (const result of results) {
@@ -894,7 +977,13 @@ function init4(options = {}) {
 }
 async function runGEval2(metric, inputText, outputText, systemMessage, judgeModel) {
   const metricArg = isCustomMetric(metric) ? { name: metric.name, criteria: metric.criteria, steps: metric.steps } : metric;
-  return runGEval(metricArg, inputText, outputText, systemMessage, judgeModel);
+  return runGEval({
+    metric: metricArg,
+    inputText,
+    outputText,
+    systemMessage,
+    judgeModel
+  });
 }
 async function resolveDataset(datasetInput) {
   if (typeof datasetInput === "string") {
@@ -966,7 +1055,9 @@ async function evaluate(options) {
   for (const m of metrics) {
     if (typeof m === "string" && !AVAILABLE_METRICS.includes(m)) {
       throw new Error(
-        `Invalid metric: ${m}. Available: ${AVAILABLE_METRICS.join(", ")}. Or use CustomMetric for custom metrics.`
+        `Invalid metric: ${m}. Available: ${AVAILABLE_METRICS.join(
+          ", "
+        )}. Or use CustomMetric for custom metrics.`
       );
     }
   }
@@ -1263,12 +1354,16 @@ var index_exports = {};
 __export(index_exports, {
   FallomExporter: () => FallomExporter,
   FallomSession: () => FallomSession,
+  buildGEvalPrompt: () => buildGEvalPrompt,
+  calculateAggregateScores: () => calculateAggregateScores,
   clearMastraPrompt: () => clearMastraPrompt,
   default: () => index_default,
+  detectRegression: () => detectRegression,
   evals: () => evals_exports,
   init: () => init5,
   models: () => models_exports,
   prompts: () => prompts_exports,
+  runGEval: () => runGEval,
   session: () => session,
   setMastraPrompt: () => setMastraPrompt,
   setMastraPromptAB: () => setMastraPromptAB,
@@ -1293,7 +1388,7 @@ var import_exporter_trace_otlp_http = require("@opentelemetry/exporter-trace-otl
 // node_modules/@opentelemetry/resources/build/esm/Resource.js
 var import_api = require("@opentelemetry/api");
-// node_modules/@opentelemetry/semantic-conventions/build/esm/resource/SemanticResourceAttributes.js
+// node_modules/@opentelemetry/resources/node_modules/@opentelemetry/semantic-conventions/build/esm/resource/SemanticResourceAttributes.js
 var SemanticResourceAttributes = {
   /**
    * Name of the cloud provider.
@@ -4289,11 +4384,15 @@ var index_default = {
 0 && (module.exports = {
   FallomExporter,
   FallomSession,
+  buildGEvalPrompt,
+  calculateAggregateScores,
   clearMastraPrompt,
+  detectRegression,
   evals,
   init,
   models,
   prompts,
+  runGEval,
   session,
   setMastraPrompt,
   setMastraPromptAB,

package/dist/index.mjs CHANGED Viewed

@@ -23,7 +23,7 @@ import {
   isCustomMetric,
   runGEval,
   uploadResultsPublic
-} from "./chunk-GZ6TE7G4.mjs";
+} from "./chunk-NNVWIZN5.mjs";
 import {
   __export
 } from "./chunk-7P6ASYW6.mjs";
@@ -45,7 +45,7 @@ import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
 // node_modules/@opentelemetry/resources/build/esm/Resource.js
 import { diag } from "@opentelemetry/api";
-// node_modules/@opentelemetry/semantic-conventions/build/esm/resource/SemanticResourceAttributes.js
+// node_modules/@opentelemetry/resources/node_modules/@opentelemetry/semantic-conventions/build/esm/resource/SemanticResourceAttributes.js
 var SemanticResourceAttributes = {
   /**
    * Name of the cloud provider.
@@ -3031,12 +3031,16 @@ var index_default = {
 export {
   FallomExporter,
   FallomSession,
+  buildGEvalPrompt,
+  calculateAggregateScores,
   clearMastraPrompt,
   index_default as default,
+  detectRegression,
   evals_exports as evals,
   init5 as init,
   models_exports as models,
   prompts_exports as prompts,
+  runGEval,
   session,
   setMastraPrompt,
   setMastraPromptAB,

package/dist/models-SEFDGZU2.mjs ADDED Viewed

@@ -0,0 +1,8 @@
+import {
+  get,
+  init
+} from "./chunk-KFD5AQ7V.mjs";
+export {
+  get,
+  init
+};

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@fallom/trace",
-  "version": "0.2.21",
+  "version": "0.2.23",
   "description": "Model A/B testing and tracing for LLM applications. Zero latency, production-ready.",
   "main": "./dist/index.js",
   "module": "./dist/index.mjs",