@fallom/trace 0.2.25 → 0.2.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,7 @@ import {
7
7
  evaluate,
8
8
  init,
9
9
  uploadResultsPublic
10
- } from "./chunk-3VWF2OJX.mjs";
10
+ } from "./chunk-2NGJF2JZ.mjs";
11
11
  import "./chunk-7P6ASYW6.mjs";
12
12
  export {
13
13
  DEFAULT_JUDGE_MODEL,
@@ -0,0 +1,21 @@
1
+ import {
2
+ DEFAULT_JUDGE_MODEL,
3
+ _apiKey,
4
+ _baseUrl,
5
+ _initialized,
6
+ compareModels,
7
+ evaluate,
8
+ init,
9
+ uploadResultsPublic
10
+ } from "./chunk-3HBKT4HK.mjs";
11
+ import "./chunk-7P6ASYW6.mjs";
12
+ export {
13
+ DEFAULT_JUDGE_MODEL,
14
+ _apiKey,
15
+ _baseUrl,
16
+ _initialized,
17
+ compareModels,
18
+ evaluate,
19
+ init,
20
+ uploadResultsPublic
21
+ };
@@ -0,0 +1,21 @@
1
+ import {
2
+ DEFAULT_JUDGE_MODEL,
3
+ _apiKey,
4
+ _baseUrl,
5
+ _initialized,
6
+ compareModels,
7
+ evaluate,
8
+ init,
9
+ uploadResultsPublic
10
+ } from "./chunk-MSI4HGK6.mjs";
11
+ import "./chunk-7P6ASYW6.mjs";
12
+ export {
13
+ DEFAULT_JUDGE_MODEL,
14
+ _apiKey,
15
+ _baseUrl,
16
+ _initialized,
17
+ compareModels,
18
+ evaluate,
19
+ init,
20
+ uploadResultsPublic
21
+ };
@@ -0,0 +1,21 @@
1
+ import {
2
+ DEFAULT_JUDGE_MODEL,
3
+ _apiKey,
4
+ _baseUrl,
5
+ _initialized,
6
+ compareModels,
7
+ evaluate,
8
+ init,
9
+ uploadResultsPublic
10
+ } from "./chunk-GZ6TE7G4.mjs";
11
+ import "./chunk-7P6ASYW6.mjs";
12
+ export {
13
+ DEFAULT_JUDGE_MODEL,
14
+ _apiKey,
15
+ _baseUrl,
16
+ _initialized,
17
+ compareModels,
18
+ evaluate,
19
+ init,
20
+ uploadResultsPublic
21
+ };
@@ -0,0 +1,21 @@
1
+ import {
2
+ DEFAULT_JUDGE_MODEL,
3
+ _apiKey,
4
+ _baseUrl,
5
+ _initialized,
6
+ compareModels,
7
+ evaluate,
8
+ init,
9
+ uploadResultsPublic
10
+ } from "./chunk-XBZ3ESNV.mjs";
11
+ import "./chunk-7P6ASYW6.mjs";
12
+ export {
13
+ DEFAULT_JUDGE_MODEL,
14
+ _apiKey,
15
+ _baseUrl,
16
+ _initialized,
17
+ compareModels,
18
+ evaluate,
19
+ init,
20
+ uploadResultsPublic
21
+ };
@@ -0,0 +1,21 @@
1
+ import {
2
+ DEFAULT_JUDGE_MODEL,
3
+ _apiKey,
4
+ _baseUrl,
5
+ _initialized,
6
+ compareModels,
7
+ evaluate,
8
+ init,
9
+ uploadResultsPublic
10
+ } from "./chunk-FTZVXPQN.mjs";
11
+ import "./chunk-7P6ASYW6.mjs";
12
+ export {
13
+ DEFAULT_JUDGE_MODEL,
14
+ _apiKey,
15
+ _baseUrl,
16
+ _initialized,
17
+ compareModels,
18
+ evaluate,
19
+ init,
20
+ uploadResultsPublic
21
+ };
@@ -0,0 +1,21 @@
1
+ import {
2
+ DEFAULT_JUDGE_MODEL,
3
+ _apiKey,
4
+ _baseUrl,
5
+ _initialized,
6
+ compareModels,
7
+ evaluate,
8
+ init,
9
+ uploadResultsPublic
10
+ } from "./chunk-TNNLTWRG.mjs";
11
+ import "./chunk-7P6ASYW6.mjs";
12
+ export {
13
+ DEFAULT_JUDGE_MODEL,
14
+ _apiKey,
15
+ _baseUrl,
16
+ _initialized,
17
+ compareModels,
18
+ evaluate,
19
+ init,
20
+ uploadResultsPublic
21
+ };
package/dist/index.d.mts CHANGED
@@ -130,6 +130,92 @@ declare function init$4(options?: {
130
130
  */
131
131
  declare function shutdown(): Promise<void>;
132
132
 
133
+ /**
134
+ * FallomSpan - Manual span for custom operations.
135
+ *
136
+ * Use for non-LLM operations like RAG retrieval, preprocessing, tool execution, etc.
137
+ *
138
+ * @example
139
+ * ```typescript
140
+ * const session = fallom.session({ configKey: "my-agent", sessionId });
141
+ *
142
+ * // Create a manual span
143
+ * const span = session.span("rag.retrieve");
144
+ * span.set({ "rag.query": userQuery, "rag.topK": 5 });
145
+ *
146
+ * const docs = await retrieveDocuments(userQuery);
147
+ * span.set({ "rag.documents.count": docs.length });
148
+ *
149
+ * span.end(); // Sends the span
150
+ * ```
151
+ */
152
+
153
+ interface SpanOptions {
154
+ /** Parent span ID for nested spans */
155
+ parentSpanId?: string;
156
+ /** Trace ID to continue an existing trace */
157
+ traceId?: string;
158
+ /** Span kind (defaults to "custom") */
159
+ kind?: "custom" | "tool" | "retrieval" | "preprocessing" | "postprocessing";
160
+ }
161
+ declare class FallomSpan {
162
+ private name;
163
+ private ctx;
164
+ private attrs;
165
+ private startTime;
166
+ private ended;
167
+ private _status;
168
+ private _errorMessage?;
169
+ readonly spanId: string;
170
+ readonly traceId: string;
171
+ readonly parentSpanId?: string;
172
+ readonly kind: string;
173
+ constructor(name: string, ctx: SessionContext, options?: SpanOptions);
174
+ /**
175
+ * Set attributes on the span.
176
+ * Can be called multiple times - attributes are merged.
177
+ */
178
+ set(attributes: Record<string, unknown>): this;
179
+ /**
180
+ * Mark the span as errored.
181
+ */
182
+ setError(error: Error | string): this;
183
+ /**
184
+ * Get span context for creating child spans.
185
+ */
186
+ context(): {
187
+ traceId: string;
188
+ spanId: string;
189
+ };
190
+ /**
191
+ * End the span and send it.
192
+ * Must be called for the span to be recorded.
193
+ */
194
+ end(): void;
195
+ }
196
+ /**
197
+ * Wrap a function to automatically create a span around it.
198
+ * Similar to Braintrust's wrapTraced().
199
+ *
200
+ * @example
201
+ * ```typescript
202
+ * const fetchDocuments = wrapTraced(
203
+ * session,
204
+ * "rag.fetch",
205
+ * async (query: string) => {
206
+ * const docs = await vectorDb.search(query);
207
+ * return docs;
208
+ * }
209
+ * );
210
+ *
211
+ * // Function input/output automatically captured
212
+ * const docs = await fetchDocuments("user query");
213
+ * ```
214
+ */
215
+ declare function wrapTraced<T extends (...args: any[]) => Promise<any>>(session: {
216
+ span: (name: string, options?: SpanOptions) => FallomSpan;
217
+ }, name: string, fn: T, options?: SpanOptions): T;
218
+
133
219
  /**
134
220
  * FallomSession - Session-scoped tracing for concurrent-safe operations.
135
221
  */
@@ -162,6 +248,24 @@ declare class FallomSession {
162
248
  constructor(options: SessionOptions);
163
249
  /** Get the session context. */
164
250
  getContext(): SessionContext;
251
+ /**
252
+ * Create a manual span for custom operations.
253
+ *
254
+ * Use for non-LLM operations like RAG retrieval, preprocessing, tool execution, etc.
255
+ * The span uses the session's context (configKey, sessionId, etc.).
256
+ *
257
+ * @example
258
+ * ```typescript
259
+ * const span = session.span("rag.retrieve");
260
+ * span.set({ "rag.query": userQuery, "rag.topK": 5 });
261
+ *
262
+ * const docs = await retrieveDocuments(userQuery);
263
+ * span.set({ "rag.documents.count": docs.length });
264
+ *
265
+ * span.end(); // Must call to send the span
266
+ * ```
267
+ */
268
+ span(name: string, options?: SpanOptions): FallomSpan;
165
269
  /**
166
270
  * Get model assignment for this session (A/B testing).
167
271
  */
@@ -233,15 +337,19 @@ declare function session(options: SessionOptions): FallomSession;
233
337
 
234
338
  type trace_FallomSession = FallomSession;
235
339
  declare const trace_FallomSession: typeof FallomSession;
340
+ type trace_FallomSpan = FallomSpan;
341
+ declare const trace_FallomSpan: typeof FallomSpan;
236
342
  type trace_SessionContext = SessionContext;
237
343
  type trace_SessionOptions = SessionOptions;
344
+ type trace_SpanOptions = SpanOptions;
238
345
  type trace_TraceContext = TraceContext;
239
346
  type trace_TraceData = TraceData;
240
347
  type trace_WrapAISDKOptions = WrapAISDKOptions;
241
348
  declare const trace_session: typeof session;
242
349
  declare const trace_shutdown: typeof shutdown;
350
+ declare const trace_wrapTraced: typeof wrapTraced;
243
351
  declare namespace trace {
244
- export { trace_FallomSession as FallomSession, type trace_SessionContext as SessionContext, type trace_SessionOptions as SessionOptions, type trace_TraceContext as TraceContext, type trace_TraceData as TraceData, type trace_WrapAISDKOptions as WrapAISDKOptions, init$4 as init, trace_session as session, trace_shutdown as shutdown };
352
+ export { trace_FallomSession as FallomSession, trace_FallomSpan as FallomSpan, type trace_SessionContext as SessionContext, type trace_SessionOptions as SessionOptions, type trace_SpanOptions as SpanOptions, type trace_TraceContext as TraceContext, type trace_TraceData as TraceData, type trace_WrapAISDKOptions as WrapAISDKOptions, init$4 as init, trace_session as session, trace_shutdown as shutdown, trace_wrapTraced as wrapTraced };
245
353
  }
246
354
 
247
355
  /**
@@ -445,6 +553,12 @@ interface EvalResult {
445
553
  input: string;
446
554
  output: string;
447
555
  systemMessage?: string;
556
+ /** Expected/golden output for comparison (if provided) */
557
+ expectedOutput?: string;
558
+ /** Retrieved documents/context for RAG evaluation */
559
+ context?: string[];
560
+ /** Additional metadata */
561
+ metadata?: Record<string, unknown>;
448
562
  model: string;
449
563
  isProduction: boolean;
450
564
  answerRelevancy?: number;
@@ -525,6 +639,12 @@ interface EvaluateOptions {
525
639
  /** List of metrics to run (built-in or custom). Default: all built-in metrics */
526
640
  metrics?: MetricInput[];
527
641
  judgeModel?: string;
642
+ /**
643
+ * Context to provide the LLM judge about the product/domain being evaluated.
644
+ * This helps the judge make better evaluations by understanding what features
645
+ * or capabilities are valid (e.g., won't mark valid features as hallucinations).
646
+ */
647
+ judgeContext?: string;
528
648
  name?: string;
529
649
  description?: string;
530
650
  verbose?: boolean;
@@ -560,7 +680,7 @@ declare const METRIC_PROMPTS: Record<MetricName, {
560
680
  /**
561
681
  * Build the G-Eval prompt for the LLM judge.
562
682
  */
563
- declare function buildGEvalPrompt(criteria: string, steps: string[], systemMessage: string | undefined, inputText: string, outputText: string): string;
683
+ declare function buildGEvalPrompt(criteria: string, steps: string[], systemMessage: string | undefined, inputText: string, outputText: string, judgeContext?: string): string;
564
684
  /**
565
685
  * Result of running G-Eval on a single metric.
566
686
  */
@@ -594,6 +714,8 @@ interface RunGEvalOptions {
594
714
  traceSessionId?: string;
595
715
  /** Optional customer ID for tracing (e.g., organization ID) */
596
716
  traceCustomerId?: string;
717
+ /** Optional context to provide the judge about the product/domain being evaluated */
718
+ judgeContext?: string;
597
719
  }
598
720
  /**
599
721
  * Run G-Eval for a single metric using OpenRouter.
@@ -1108,4 +1230,4 @@ declare const _default: {
1108
1230
  session: typeof session;
1109
1231
  };
1110
1232
 
1111
- export { type CompareModelsOptions, type DatasetItem, type EvalResult, type EvaluateOptions, FallomExporter, type FallomExporterOptions, FallomSession, type GEvalScore, type InitOptions, type MetricName, type PromptResult, type SessionContext, type SessionOptions, buildGEvalPrompt, calculateAggregateScores, clearMastraPrompt, _default as default, detectRegression, evals, init, models, prompts, runGEval, session, setMastraPrompt, setMastraPromptAB, trace };
1233
+ export { type CompareModelsOptions, type DatasetItem, type EvalResult, type EvaluateOptions, FallomExporter, type FallomExporterOptions, FallomSession, FallomSpan, type GEvalScore, type InitOptions, type MetricName, type PromptResult, type SessionContext, type SessionOptions, type SpanOptions, buildGEvalPrompt, calculateAggregateScores, clearMastraPrompt, _default as default, detectRegression, evals, init, models, prompts, runGEval, session, setMastraPrompt, setMastraPromptAB, trace, wrapTraced };
package/dist/index.d.ts CHANGED
@@ -130,6 +130,92 @@ declare function init$4(options?: {
130
130
  */
131
131
  declare function shutdown(): Promise<void>;
132
132
 
133
+ /**
134
+ * FallomSpan - Manual span for custom operations.
135
+ *
136
+ * Use for non-LLM operations like RAG retrieval, preprocessing, tool execution, etc.
137
+ *
138
+ * @example
139
+ * ```typescript
140
+ * const session = fallom.session({ configKey: "my-agent", sessionId });
141
+ *
142
+ * // Create a manual span
143
+ * const span = session.span("rag.retrieve");
144
+ * span.set({ "rag.query": userQuery, "rag.topK": 5 });
145
+ *
146
+ * const docs = await retrieveDocuments(userQuery);
147
+ * span.set({ "rag.documents.count": docs.length });
148
+ *
149
+ * span.end(); // Sends the span
150
+ * ```
151
+ */
152
+
153
+ interface SpanOptions {
154
+ /** Parent span ID for nested spans */
155
+ parentSpanId?: string;
156
+ /** Trace ID to continue an existing trace */
157
+ traceId?: string;
158
+ /** Span kind (defaults to "custom") */
159
+ kind?: "custom" | "tool" | "retrieval" | "preprocessing" | "postprocessing";
160
+ }
161
+ declare class FallomSpan {
162
+ private name;
163
+ private ctx;
164
+ private attrs;
165
+ private startTime;
166
+ private ended;
167
+ private _status;
168
+ private _errorMessage?;
169
+ readonly spanId: string;
170
+ readonly traceId: string;
171
+ readonly parentSpanId?: string;
172
+ readonly kind: string;
173
+ constructor(name: string, ctx: SessionContext, options?: SpanOptions);
174
+ /**
175
+ * Set attributes on the span.
176
+ * Can be called multiple times - attributes are merged.
177
+ */
178
+ set(attributes: Record<string, unknown>): this;
179
+ /**
180
+ * Mark the span as errored.
181
+ */
182
+ setError(error: Error | string): this;
183
+ /**
184
+ * Get span context for creating child spans.
185
+ */
186
+ context(): {
187
+ traceId: string;
188
+ spanId: string;
189
+ };
190
+ /**
191
+ * End the span and send it.
192
+ * Must be called for the span to be recorded.
193
+ */
194
+ end(): void;
195
+ }
196
+ /**
197
+ * Wrap a function to automatically create a span around it.
198
+ * Similar to Braintrust's wrapTraced().
199
+ *
200
+ * @example
201
+ * ```typescript
202
+ * const fetchDocuments = wrapTraced(
203
+ * session,
204
+ * "rag.fetch",
205
+ * async (query: string) => {
206
+ * const docs = await vectorDb.search(query);
207
+ * return docs;
208
+ * }
209
+ * );
210
+ *
211
+ * // Function input/output automatically captured
212
+ * const docs = await fetchDocuments("user query");
213
+ * ```
214
+ */
215
+ declare function wrapTraced<T extends (...args: any[]) => Promise<any>>(session: {
216
+ span: (name: string, options?: SpanOptions) => FallomSpan;
217
+ }, name: string, fn: T, options?: SpanOptions): T;
218
+
133
219
  /**
134
220
  * FallomSession - Session-scoped tracing for concurrent-safe operations.
135
221
  */
@@ -162,6 +248,24 @@ declare class FallomSession {
162
248
  constructor(options: SessionOptions);
163
249
  /** Get the session context. */
164
250
  getContext(): SessionContext;
251
+ /**
252
+ * Create a manual span for custom operations.
253
+ *
254
+ * Use for non-LLM operations like RAG retrieval, preprocessing, tool execution, etc.
255
+ * The span uses the session's context (configKey, sessionId, etc.).
256
+ *
257
+ * @example
258
+ * ```typescript
259
+ * const span = session.span("rag.retrieve");
260
+ * span.set({ "rag.query": userQuery, "rag.topK": 5 });
261
+ *
262
+ * const docs = await retrieveDocuments(userQuery);
263
+ * span.set({ "rag.documents.count": docs.length });
264
+ *
265
+ * span.end(); // Must call to send the span
266
+ * ```
267
+ */
268
+ span(name: string, options?: SpanOptions): FallomSpan;
165
269
  /**
166
270
  * Get model assignment for this session (A/B testing).
167
271
  */
@@ -233,15 +337,19 @@ declare function session(options: SessionOptions): FallomSession;
233
337
 
234
338
  type trace_FallomSession = FallomSession;
235
339
  declare const trace_FallomSession: typeof FallomSession;
340
+ type trace_FallomSpan = FallomSpan;
341
+ declare const trace_FallomSpan: typeof FallomSpan;
236
342
  type trace_SessionContext = SessionContext;
237
343
  type trace_SessionOptions = SessionOptions;
344
+ type trace_SpanOptions = SpanOptions;
238
345
  type trace_TraceContext = TraceContext;
239
346
  type trace_TraceData = TraceData;
240
347
  type trace_WrapAISDKOptions = WrapAISDKOptions;
241
348
  declare const trace_session: typeof session;
242
349
  declare const trace_shutdown: typeof shutdown;
350
+ declare const trace_wrapTraced: typeof wrapTraced;
243
351
  declare namespace trace {
244
- export { trace_FallomSession as FallomSession, type trace_SessionContext as SessionContext, type trace_SessionOptions as SessionOptions, type trace_TraceContext as TraceContext, type trace_TraceData as TraceData, type trace_WrapAISDKOptions as WrapAISDKOptions, init$4 as init, trace_session as session, trace_shutdown as shutdown };
352
+ export { trace_FallomSession as FallomSession, trace_FallomSpan as FallomSpan, type trace_SessionContext as SessionContext, type trace_SessionOptions as SessionOptions, type trace_SpanOptions as SpanOptions, type trace_TraceContext as TraceContext, type trace_TraceData as TraceData, type trace_WrapAISDKOptions as WrapAISDKOptions, init$4 as init, trace_session as session, trace_shutdown as shutdown, trace_wrapTraced as wrapTraced };
245
353
  }
246
354
 
247
355
  /**
@@ -445,6 +553,12 @@ interface EvalResult {
445
553
  input: string;
446
554
  output: string;
447
555
  systemMessage?: string;
556
+ /** Expected/golden output for comparison (if provided) */
557
+ expectedOutput?: string;
558
+ /** Retrieved documents/context for RAG evaluation */
559
+ context?: string[];
560
+ /** Additional metadata */
561
+ metadata?: Record<string, unknown>;
448
562
  model: string;
449
563
  isProduction: boolean;
450
564
  answerRelevancy?: number;
@@ -525,6 +639,12 @@ interface EvaluateOptions {
525
639
  /** List of metrics to run (built-in or custom). Default: all built-in metrics */
526
640
  metrics?: MetricInput[];
527
641
  judgeModel?: string;
642
+ /**
643
+ * Context to provide the LLM judge about the product/domain being evaluated.
644
+ * This helps the judge make better evaluations by understanding what features
645
+ * or capabilities are valid (e.g., won't mark valid features as hallucinations).
646
+ */
647
+ judgeContext?: string;
528
648
  name?: string;
529
649
  description?: string;
530
650
  verbose?: boolean;
@@ -560,7 +680,7 @@ declare const METRIC_PROMPTS: Record<MetricName, {
560
680
  /**
561
681
  * Build the G-Eval prompt for the LLM judge.
562
682
  */
563
- declare function buildGEvalPrompt(criteria: string, steps: string[], systemMessage: string | undefined, inputText: string, outputText: string): string;
683
+ declare function buildGEvalPrompt(criteria: string, steps: string[], systemMessage: string | undefined, inputText: string, outputText: string, judgeContext?: string): string;
564
684
  /**
565
685
  * Result of running G-Eval on a single metric.
566
686
  */
@@ -594,6 +714,8 @@ interface RunGEvalOptions {
594
714
  traceSessionId?: string;
595
715
  /** Optional customer ID for tracing (e.g., organization ID) */
596
716
  traceCustomerId?: string;
717
+ /** Optional context to provide the judge about the product/domain being evaluated */
718
+ judgeContext?: string;
597
719
  }
598
720
  /**
599
721
  * Run G-Eval for a single metric using OpenRouter.
@@ -1108,4 +1230,4 @@ declare const _default: {
1108
1230
  session: typeof session;
1109
1231
  };
1110
1232
 
1111
- export { type CompareModelsOptions, type DatasetItem, type EvalResult, type EvaluateOptions, FallomExporter, type FallomExporterOptions, FallomSession, type GEvalScore, type InitOptions, type MetricName, type PromptResult, type SessionContext, type SessionOptions, buildGEvalPrompt, calculateAggregateScores, clearMastraPrompt, _default as default, detectRegression, evals, init, models, prompts, runGEval, session, setMastraPrompt, setMastraPromptAB, trace };
1233
+ export { type CompareModelsOptions, type DatasetItem, type EvalResult, type EvaluateOptions, FallomExporter, type FallomExporterOptions, FallomSession, FallomSpan, type GEvalScore, type InitOptions, type MetricName, type PromptResult, type SessionContext, type SessionOptions, type SpanOptions, buildGEvalPrompt, calculateAggregateScores, clearMastraPrompt, _default as default, detectRegression, evals, init, models, prompts, runGEval, session, setMastraPrompt, setMastraPromptAB, trace, wrapTraced };