npm - @fallom/trace - Versions diffs - 0.2.25 → 0.2.28 - Mend

@fallom/trace 0.2.25 → 0.2.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/dist/chunk-2NGJF2JZ.mjs +661 -0
package/dist/chunk-3HBKT4HK.mjs +827 -0
package/dist/{chunk-3VWF2OJX.mjs → chunk-FTZVXPQN.mjs} +25 -8
package/dist/chunk-GZ6TE7G4.mjs +923 -0
package/dist/chunk-MSI4HGK6.mjs +1051 -0
package/dist/chunk-TNNLTWRG.mjs +1045 -0
package/dist/chunk-XBZ3ESNV.mjs +824 -0
package/dist/{core-Q3IHBEHB.mjs → core-46Z4Q54J.mjs} +1 -1
package/dist/core-4L56QWI7.mjs +21 -0
package/dist/core-5BF6KLNO.mjs +21 -0
package/dist/core-DUG2SP2V.mjs +21 -0
package/dist/core-JLHYFVYS.mjs +21 -0
package/dist/core-NTEI2B5Z.mjs +21 -0
package/dist/core-SL7FAAJN.mjs +21 -0
package/dist/index.d.mts +125 -3
package/dist/index.d.ts +125 -3
package/dist/index.js +180 -21
package/dist/index.mjs +131 -4
package/package.json +2 -2

package/dist/{core-Q3IHBEHB.mjs → core-46Z4Q54J.mjs} RENAMED Viewed

@@ -7,7 +7,7 @@ import {
   evaluate,
   init,
   uploadResultsPublic
-} from "./chunk-3VWF2OJX.mjs";
+} from "./chunk-2NGJF2JZ.mjs";
 import "./chunk-7P6ASYW6.mjs";
 export {
   DEFAULT_JUDGE_MODEL,

package/dist/core-4L56QWI7.mjs ADDED Viewed

@@ -0,0 +1,21 @@
+import {
+  DEFAULT_JUDGE_MODEL,
+  _apiKey,
+  _baseUrl,
+  _initialized,
+  compareModels,
+  evaluate,
+  init,
+  uploadResultsPublic
+} from "./chunk-3HBKT4HK.mjs";
+import "./chunk-7P6ASYW6.mjs";
+export {
+  DEFAULT_JUDGE_MODEL,
+  _apiKey,
+  _baseUrl,
+  _initialized,
+  compareModels,
+  evaluate,
+  init,
+  uploadResultsPublic
+};

package/dist/core-5BF6KLNO.mjs ADDED Viewed

@@ -0,0 +1,21 @@
+import {
+  DEFAULT_JUDGE_MODEL,
+  _apiKey,
+  _baseUrl,
+  _initialized,
+  compareModels,
+  evaluate,
+  init,
+  uploadResultsPublic
+} from "./chunk-MSI4HGK6.mjs";
+import "./chunk-7P6ASYW6.mjs";
+export {
+  DEFAULT_JUDGE_MODEL,
+  _apiKey,
+  _baseUrl,
+  _initialized,
+  compareModels,
+  evaluate,
+  init,
+  uploadResultsPublic
+};

package/dist/core-DUG2SP2V.mjs ADDED Viewed

@@ -0,0 +1,21 @@
+import {
+  DEFAULT_JUDGE_MODEL,
+  _apiKey,
+  _baseUrl,
+  _initialized,
+  compareModels,
+  evaluate,
+  init,
+  uploadResultsPublic
+} from "./chunk-GZ6TE7G4.mjs";
+import "./chunk-7P6ASYW6.mjs";
+export {
+  DEFAULT_JUDGE_MODEL,
+  _apiKey,
+  _baseUrl,
+  _initialized,
+  compareModels,
+  evaluate,
+  init,
+  uploadResultsPublic
+};

package/dist/core-JLHYFVYS.mjs ADDED Viewed

@@ -0,0 +1,21 @@
+import {
+  DEFAULT_JUDGE_MODEL,
+  _apiKey,
+  _baseUrl,
+  _initialized,
+  compareModels,
+  evaluate,
+  init,
+  uploadResultsPublic
+} from "./chunk-XBZ3ESNV.mjs";
+import "./chunk-7P6ASYW6.mjs";
+export {
+  DEFAULT_JUDGE_MODEL,
+  _apiKey,
+  _baseUrl,
+  _initialized,
+  compareModels,
+  evaluate,
+  init,
+  uploadResultsPublic
+};

package/dist/core-NTEI2B5Z.mjs ADDED Viewed

@@ -0,0 +1,21 @@
+import {
+  DEFAULT_JUDGE_MODEL,
+  _apiKey,
+  _baseUrl,
+  _initialized,
+  compareModels,
+  evaluate,
+  init,
+  uploadResultsPublic
+} from "./chunk-FTZVXPQN.mjs";
+import "./chunk-7P6ASYW6.mjs";
+export {
+  DEFAULT_JUDGE_MODEL,
+  _apiKey,
+  _baseUrl,
+  _initialized,
+  compareModels,
+  evaluate,
+  init,
+  uploadResultsPublic
+};

package/dist/core-SL7FAAJN.mjs ADDED Viewed

@@ -0,0 +1,21 @@
+import {
+  DEFAULT_JUDGE_MODEL,
+  _apiKey,
+  _baseUrl,
+  _initialized,
+  compareModels,
+  evaluate,
+  init,
+  uploadResultsPublic
+} from "./chunk-TNNLTWRG.mjs";
+import "./chunk-7P6ASYW6.mjs";
+export {
+  DEFAULT_JUDGE_MODEL,
+  _apiKey,
+  _baseUrl,
+  _initialized,
+  compareModels,
+  evaluate,
+  init,
+  uploadResultsPublic
+};

package/dist/index.d.mts CHANGED Viewed

@@ -130,6 +130,92 @@ declare function init$4(options?: {
  */
 declare function shutdown(): Promise<void>;
+/**
+ * FallomSpan - Manual span for custom operations.
+ *
+ * Use for non-LLM operations like RAG retrieval, preprocessing, tool execution, etc.
+ *
+ * @example
+ * ```typescript
+ * const session = fallom.session({ configKey: "my-agent", sessionId });
+ *
+ * // Create a manual span
+ * const span = session.span("rag.retrieve");
+ * span.set({ "rag.query": userQuery, "rag.topK": 5 });
+ *
+ * const docs = await retrieveDocuments(userQuery);
+ * span.set({ "rag.documents.count": docs.length });
+ *
+ * span.end(); // Sends the span
+ * ```
+ */
+interface SpanOptions {
+    /** Parent span ID for nested spans */
+    parentSpanId?: string;
+    /** Trace ID to continue an existing trace */
+    traceId?: string;
+    /** Span kind (defaults to "custom") */
+    kind?: "custom" | "tool" | "retrieval" | "preprocessing" | "postprocessing";
+}
+declare class FallomSpan {
+    private name;
+    private ctx;
+    private attrs;
+    private startTime;
+    private ended;
+    private _status;
+    private _errorMessage?;
+    readonly spanId: string;
+    readonly traceId: string;
+    readonly parentSpanId?: string;
+    readonly kind: string;
+    constructor(name: string, ctx: SessionContext, options?: SpanOptions);
+    /**
+     * Set attributes on the span.
+     * Can be called multiple times - attributes are merged.
+     */
+    set(attributes: Record<string, unknown>): this;
+    /**
+     * Mark the span as errored.
+     */
+    setError(error: Error | string): this;
+    /**
+     * Get span context for creating child spans.
+     */
+    context(): {
+        traceId: string;
+        spanId: string;
+    };
+    /**
+     * End the span and send it.
+     * Must be called for the span to be recorded.
+     */
+    end(): void;
+}
+/**
+ * Wrap a function to automatically create a span around it.
+ * Similar to Braintrust's wrapTraced().
+ *
+ * @example
+ * ```typescript
+ * const fetchDocuments = wrapTraced(
+ *   session,
+ *   "rag.fetch",
+ *   async (query: string) => {
+ *     const docs = await vectorDb.search(query);
+ *     return docs;
+ *   }
+ * );
+ *
+ * // Function input/output automatically captured
+ * const docs = await fetchDocuments("user query");
+ * ```
+ */
+declare function wrapTraced<T extends (...args: any[]) => Promise<any>>(session: {
+    span: (name: string, options?: SpanOptions) => FallomSpan;
+}, name: string, fn: T, options?: SpanOptions): T;
 /**
  * FallomSession - Session-scoped tracing for concurrent-safe operations.
  */
@@ -162,6 +248,24 @@ declare class FallomSession {
     constructor(options: SessionOptions);
     /** Get the session context. */
     getContext(): SessionContext;
+    /**
+     * Create a manual span for custom operations.
+     *
+     * Use for non-LLM operations like RAG retrieval, preprocessing, tool execution, etc.
+     * The span uses the session's context (configKey, sessionId, etc.).
+     *
+     * @example
+     * ```typescript
+     * const span = session.span("rag.retrieve");
+     * span.set({ "rag.query": userQuery, "rag.topK": 5 });
+     *
+     * const docs = await retrieveDocuments(userQuery);
+     * span.set({ "rag.documents.count": docs.length });
+     *
+     * span.end(); // Must call to send the span
+     * ```
+     */
+    span(name: string, options?: SpanOptions): FallomSpan;
     /**
      * Get model assignment for this session (A/B testing).
      */
@@ -233,15 +337,19 @@ declare function session(options: SessionOptions): FallomSession;
 type trace_FallomSession = FallomSession;
 declare const trace_FallomSession: typeof FallomSession;
+type trace_FallomSpan = FallomSpan;
+declare const trace_FallomSpan: typeof FallomSpan;
 type trace_SessionContext = SessionContext;
 type trace_SessionOptions = SessionOptions;
+type trace_SpanOptions = SpanOptions;
 type trace_TraceContext = TraceContext;
 type trace_TraceData = TraceData;
 type trace_WrapAISDKOptions = WrapAISDKOptions;
 declare const trace_session: typeof session;
 declare const trace_shutdown: typeof shutdown;
+declare const trace_wrapTraced: typeof wrapTraced;
 declare namespace trace {
-  export { trace_FallomSession as FallomSession, type trace_SessionContext as SessionContext, type trace_SessionOptions as SessionOptions, type trace_TraceContext as TraceContext, type trace_TraceData as TraceData, type trace_WrapAISDKOptions as WrapAISDKOptions, init$4 as init, trace_session as session, trace_shutdown as shutdown };
+  export { trace_FallomSession as FallomSession, trace_FallomSpan as FallomSpan, type trace_SessionContext as SessionContext, type trace_SessionOptions as SessionOptions, type trace_SpanOptions as SpanOptions, type trace_TraceContext as TraceContext, type trace_TraceData as TraceData, type trace_WrapAISDKOptions as WrapAISDKOptions, init$4 as init, trace_session as session, trace_shutdown as shutdown, trace_wrapTraced as wrapTraced };
 }
 /**
@@ -445,6 +553,12 @@ interface EvalResult {
     input: string;
     output: string;
     systemMessage?: string;
+    /** Expected/golden output for comparison (if provided) */
+    expectedOutput?: string;
+    /** Retrieved documents/context for RAG evaluation */
+    context?: string[];
+    /** Additional metadata */
+    metadata?: Record<string, unknown>;
     model: string;
     isProduction: boolean;
     answerRelevancy?: number;
@@ -525,6 +639,12 @@ interface EvaluateOptions {
     /** List of metrics to run (built-in or custom). Default: all built-in metrics */
     metrics?: MetricInput[];
     judgeModel?: string;
+    /**
+     * Context to provide the LLM judge about the product/domain being evaluated.
+     * This helps the judge make better evaluations by understanding what features
+     * or capabilities are valid (e.g., won't mark valid features as hallucinations).
+     */
+    judgeContext?: string;
     name?: string;
     description?: string;
     verbose?: boolean;
@@ -560,7 +680,7 @@ declare const METRIC_PROMPTS: Record<MetricName, {
 /**
  * Build the G-Eval prompt for the LLM judge.
  */
-declare function buildGEvalPrompt(criteria: string, steps: string[], systemMessage: string | undefined, inputText: string, outputText: string): string;
+declare function buildGEvalPrompt(criteria: string, steps: string[], systemMessage: string | undefined, inputText: string, outputText: string, judgeContext?: string): string;
 /**
  * Result of running G-Eval on a single metric.
  */
@@ -594,6 +714,8 @@ interface RunGEvalOptions {
     traceSessionId?: string;
     /** Optional customer ID for tracing (e.g., organization ID) */
     traceCustomerId?: string;
+    /** Optional context to provide the judge about the product/domain being evaluated */
+    judgeContext?: string;
 }
 /**
  * Run G-Eval for a single metric using OpenRouter.
@@ -1108,4 +1230,4 @@ declare const _default: {
     session: typeof session;
 };
-export { type CompareModelsOptions, type DatasetItem, type EvalResult, type EvaluateOptions, FallomExporter, type FallomExporterOptions, FallomSession, type GEvalScore, type InitOptions, type MetricName, type PromptResult, type SessionContext, type SessionOptions, buildGEvalPrompt, calculateAggregateScores, clearMastraPrompt, _default as default, detectRegression, evals, init, models, prompts, runGEval, session, setMastraPrompt, setMastraPromptAB, trace };
+export { type CompareModelsOptions, type DatasetItem, type EvalResult, type EvaluateOptions, FallomExporter, type FallomExporterOptions, FallomSession, FallomSpan, type GEvalScore, type InitOptions, type MetricName, type PromptResult, type SessionContext, type SessionOptions, type SpanOptions, buildGEvalPrompt, calculateAggregateScores, clearMastraPrompt, _default as default, detectRegression, evals, init, models, prompts, runGEval, session, setMastraPrompt, setMastraPromptAB, trace, wrapTraced };

package/dist/index.d.ts CHANGED Viewed

@@ -130,6 +130,92 @@ declare function init$4(options?: {
  */
 declare function shutdown(): Promise<void>;
+/**
+ * FallomSpan - Manual span for custom operations.
+ *
+ * Use for non-LLM operations like RAG retrieval, preprocessing, tool execution, etc.
+ *
+ * @example
+ * ```typescript
+ * const session = fallom.session({ configKey: "my-agent", sessionId });
+ *
+ * // Create a manual span
+ * const span = session.span("rag.retrieve");
+ * span.set({ "rag.query": userQuery, "rag.topK": 5 });
+ *
+ * const docs = await retrieveDocuments(userQuery);
+ * span.set({ "rag.documents.count": docs.length });
+ *
+ * span.end(); // Sends the span
+ * ```
+ */
+interface SpanOptions {
+    /** Parent span ID for nested spans */
+    parentSpanId?: string;
+    /** Trace ID to continue an existing trace */
+    traceId?: string;
+    /** Span kind (defaults to "custom") */
+    kind?: "custom" | "tool" | "retrieval" | "preprocessing" | "postprocessing";
+}
+declare class FallomSpan {
+    private name;
+    private ctx;
+    private attrs;
+    private startTime;
+    private ended;
+    private _status;
+    private _errorMessage?;
+    readonly spanId: string;
+    readonly traceId: string;
+    readonly parentSpanId?: string;
+    readonly kind: string;
+    constructor(name: string, ctx: SessionContext, options?: SpanOptions);
+    /**
+     * Set attributes on the span.
+     * Can be called multiple times - attributes are merged.
+     */
+    set(attributes: Record<string, unknown>): this;
+    /**
+     * Mark the span as errored.
+     */
+    setError(error: Error | string): this;
+    /**
+     * Get span context for creating child spans.
+     */
+    context(): {
+        traceId: string;
+        spanId: string;
+    };
+    /**
+     * End the span and send it.
+     * Must be called for the span to be recorded.
+     */
+    end(): void;
+}
+/**
+ * Wrap a function to automatically create a span around it.
+ * Similar to Braintrust's wrapTraced().
+ *
+ * @example
+ * ```typescript
+ * const fetchDocuments = wrapTraced(
+ *   session,
+ *   "rag.fetch",
+ *   async (query: string) => {
+ *     const docs = await vectorDb.search(query);
+ *     return docs;
+ *   }
+ * );
+ *
+ * // Function input/output automatically captured
+ * const docs = await fetchDocuments("user query");
+ * ```
+ */
+declare function wrapTraced<T extends (...args: any[]) => Promise<any>>(session: {
+    span: (name: string, options?: SpanOptions) => FallomSpan;
+}, name: string, fn: T, options?: SpanOptions): T;
 /**
  * FallomSession - Session-scoped tracing for concurrent-safe operations.
  */
@@ -162,6 +248,24 @@ declare class FallomSession {
     constructor(options: SessionOptions);
     /** Get the session context. */
     getContext(): SessionContext;
+    /**
+     * Create a manual span for custom operations.
+     *
+     * Use for non-LLM operations like RAG retrieval, preprocessing, tool execution, etc.
+     * The span uses the session's context (configKey, sessionId, etc.).
+     *
+     * @example
+     * ```typescript
+     * const span = session.span("rag.retrieve");
+     * span.set({ "rag.query": userQuery, "rag.topK": 5 });
+     *
+     * const docs = await retrieveDocuments(userQuery);
+     * span.set({ "rag.documents.count": docs.length });
+     *
+     * span.end(); // Must call to send the span
+     * ```
+     */
+    span(name: string, options?: SpanOptions): FallomSpan;
     /**
      * Get model assignment for this session (A/B testing).
      */
@@ -233,15 +337,19 @@ declare function session(options: SessionOptions): FallomSession;
 type trace_FallomSession = FallomSession;
 declare const trace_FallomSession: typeof FallomSession;
+type trace_FallomSpan = FallomSpan;
+declare const trace_FallomSpan: typeof FallomSpan;
 type trace_SessionContext = SessionContext;
 type trace_SessionOptions = SessionOptions;
+type trace_SpanOptions = SpanOptions;
 type trace_TraceContext = TraceContext;
 type trace_TraceData = TraceData;
 type trace_WrapAISDKOptions = WrapAISDKOptions;
 declare const trace_session: typeof session;
 declare const trace_shutdown: typeof shutdown;
+declare const trace_wrapTraced: typeof wrapTraced;
 declare namespace trace {
-  export { trace_FallomSession as FallomSession, type trace_SessionContext as SessionContext, type trace_SessionOptions as SessionOptions, type trace_TraceContext as TraceContext, type trace_TraceData as TraceData, type trace_WrapAISDKOptions as WrapAISDKOptions, init$4 as init, trace_session as session, trace_shutdown as shutdown };
+  export { trace_FallomSession as FallomSession, trace_FallomSpan as FallomSpan, type trace_SessionContext as SessionContext, type trace_SessionOptions as SessionOptions, type trace_SpanOptions as SpanOptions, type trace_TraceContext as TraceContext, type trace_TraceData as TraceData, type trace_WrapAISDKOptions as WrapAISDKOptions, init$4 as init, trace_session as session, trace_shutdown as shutdown, trace_wrapTraced as wrapTraced };
 }
 /**
@@ -445,6 +553,12 @@ interface EvalResult {
     input: string;
     output: string;
     systemMessage?: string;
+    /** Expected/golden output for comparison (if provided) */
+    expectedOutput?: string;
+    /** Retrieved documents/context for RAG evaluation */
+    context?: string[];
+    /** Additional metadata */
+    metadata?: Record<string, unknown>;
     model: string;
     isProduction: boolean;
     answerRelevancy?: number;
@@ -525,6 +639,12 @@ interface EvaluateOptions {
     /** List of metrics to run (built-in or custom). Default: all built-in metrics */
     metrics?: MetricInput[];
     judgeModel?: string;
+    /**
+     * Context to provide the LLM judge about the product/domain being evaluated.
+     * This helps the judge make better evaluations by understanding what features
+     * or capabilities are valid (e.g., won't mark valid features as hallucinations).
+     */
+    judgeContext?: string;
     name?: string;
     description?: string;
     verbose?: boolean;
@@ -560,7 +680,7 @@ declare const METRIC_PROMPTS: Record<MetricName, {
 /**
  * Build the G-Eval prompt for the LLM judge.
  */
-declare function buildGEvalPrompt(criteria: string, steps: string[], systemMessage: string | undefined, inputText: string, outputText: string): string;
+declare function buildGEvalPrompt(criteria: string, steps: string[], systemMessage: string | undefined, inputText: string, outputText: string, judgeContext?: string): string;
 /**
  * Result of running G-Eval on a single metric.
  */
@@ -594,6 +714,8 @@ interface RunGEvalOptions {
     traceSessionId?: string;
     /** Optional customer ID for tracing (e.g., organization ID) */
     traceCustomerId?: string;
+    /** Optional context to provide the judge about the product/domain being evaluated */
+    judgeContext?: string;
 }
 /**
  * Run G-Eval for a single metric using OpenRouter.
@@ -1108,4 +1230,4 @@ declare const _default: {
     session: typeof session;
 };
-export { type CompareModelsOptions, type DatasetItem, type EvalResult, type EvaluateOptions, FallomExporter, type FallomExporterOptions, FallomSession, type GEvalScore, type InitOptions, type MetricName, type PromptResult, type SessionContext, type SessionOptions, buildGEvalPrompt, calculateAggregateScores, clearMastraPrompt, _default as default, detectRegression, evals, init, models, prompts, runGEval, session, setMastraPrompt, setMastraPromptAB, trace };
+export { type CompareModelsOptions, type DatasetItem, type EvalResult, type EvaluateOptions, FallomExporter, type FallomExporterOptions, FallomSession, FallomSpan, type GEvalScore, type InitOptions, type MetricName, type PromptResult, type SessionContext, type SessionOptions, type SpanOptions, buildGEvalPrompt, calculateAggregateScores, clearMastraPrompt, _default as default, detectRegression, evals, init, models, prompts, runGEval, session, setMastraPrompt, setMastraPromptAB, trace, wrapTraced };