@fallom/trace 0.2.25 → 0.2.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-2NGJF2JZ.mjs +661 -0
- package/dist/chunk-3HBKT4HK.mjs +827 -0
- package/dist/{chunk-3VWF2OJX.mjs → chunk-FTZVXPQN.mjs} +25 -8
- package/dist/chunk-GZ6TE7G4.mjs +923 -0
- package/dist/chunk-MSI4HGK6.mjs +1051 -0
- package/dist/chunk-TNNLTWRG.mjs +1045 -0
- package/dist/chunk-XBZ3ESNV.mjs +824 -0
- package/dist/{core-Q3IHBEHB.mjs → core-46Z4Q54J.mjs} +1 -1
- package/dist/core-4L56QWI7.mjs +21 -0
- package/dist/core-5BF6KLNO.mjs +21 -0
- package/dist/core-DUG2SP2V.mjs +21 -0
- package/dist/core-JLHYFVYS.mjs +21 -0
- package/dist/core-NTEI2B5Z.mjs +21 -0
- package/dist/core-SL7FAAJN.mjs +21 -0
- package/dist/index.d.mts +125 -3
- package/dist/index.d.ts +125 -3
- package/dist/index.js +180 -21
- package/dist/index.mjs +131 -4
- package/package.json +2 -2
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import {
|
|
2
|
+
DEFAULT_JUDGE_MODEL,
|
|
3
|
+
_apiKey,
|
|
4
|
+
_baseUrl,
|
|
5
|
+
_initialized,
|
|
6
|
+
compareModels,
|
|
7
|
+
evaluate,
|
|
8
|
+
init,
|
|
9
|
+
uploadResultsPublic
|
|
10
|
+
} from "./chunk-3HBKT4HK.mjs";
|
|
11
|
+
import "./chunk-7P6ASYW6.mjs";
|
|
12
|
+
export {
|
|
13
|
+
DEFAULT_JUDGE_MODEL,
|
|
14
|
+
_apiKey,
|
|
15
|
+
_baseUrl,
|
|
16
|
+
_initialized,
|
|
17
|
+
compareModels,
|
|
18
|
+
evaluate,
|
|
19
|
+
init,
|
|
20
|
+
uploadResultsPublic
|
|
21
|
+
};
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import {
|
|
2
|
+
DEFAULT_JUDGE_MODEL,
|
|
3
|
+
_apiKey,
|
|
4
|
+
_baseUrl,
|
|
5
|
+
_initialized,
|
|
6
|
+
compareModels,
|
|
7
|
+
evaluate,
|
|
8
|
+
init,
|
|
9
|
+
uploadResultsPublic
|
|
10
|
+
} from "./chunk-MSI4HGK6.mjs";
|
|
11
|
+
import "./chunk-7P6ASYW6.mjs";
|
|
12
|
+
export {
|
|
13
|
+
DEFAULT_JUDGE_MODEL,
|
|
14
|
+
_apiKey,
|
|
15
|
+
_baseUrl,
|
|
16
|
+
_initialized,
|
|
17
|
+
compareModels,
|
|
18
|
+
evaluate,
|
|
19
|
+
init,
|
|
20
|
+
uploadResultsPublic
|
|
21
|
+
};
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import {
|
|
2
|
+
DEFAULT_JUDGE_MODEL,
|
|
3
|
+
_apiKey,
|
|
4
|
+
_baseUrl,
|
|
5
|
+
_initialized,
|
|
6
|
+
compareModels,
|
|
7
|
+
evaluate,
|
|
8
|
+
init,
|
|
9
|
+
uploadResultsPublic
|
|
10
|
+
} from "./chunk-GZ6TE7G4.mjs";
|
|
11
|
+
import "./chunk-7P6ASYW6.mjs";
|
|
12
|
+
export {
|
|
13
|
+
DEFAULT_JUDGE_MODEL,
|
|
14
|
+
_apiKey,
|
|
15
|
+
_baseUrl,
|
|
16
|
+
_initialized,
|
|
17
|
+
compareModels,
|
|
18
|
+
evaluate,
|
|
19
|
+
init,
|
|
20
|
+
uploadResultsPublic
|
|
21
|
+
};
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import {
|
|
2
|
+
DEFAULT_JUDGE_MODEL,
|
|
3
|
+
_apiKey,
|
|
4
|
+
_baseUrl,
|
|
5
|
+
_initialized,
|
|
6
|
+
compareModels,
|
|
7
|
+
evaluate,
|
|
8
|
+
init,
|
|
9
|
+
uploadResultsPublic
|
|
10
|
+
} from "./chunk-XBZ3ESNV.mjs";
|
|
11
|
+
import "./chunk-7P6ASYW6.mjs";
|
|
12
|
+
export {
|
|
13
|
+
DEFAULT_JUDGE_MODEL,
|
|
14
|
+
_apiKey,
|
|
15
|
+
_baseUrl,
|
|
16
|
+
_initialized,
|
|
17
|
+
compareModels,
|
|
18
|
+
evaluate,
|
|
19
|
+
init,
|
|
20
|
+
uploadResultsPublic
|
|
21
|
+
};
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import {
|
|
2
|
+
DEFAULT_JUDGE_MODEL,
|
|
3
|
+
_apiKey,
|
|
4
|
+
_baseUrl,
|
|
5
|
+
_initialized,
|
|
6
|
+
compareModels,
|
|
7
|
+
evaluate,
|
|
8
|
+
init,
|
|
9
|
+
uploadResultsPublic
|
|
10
|
+
} from "./chunk-FTZVXPQN.mjs";
|
|
11
|
+
import "./chunk-7P6ASYW6.mjs";
|
|
12
|
+
export {
|
|
13
|
+
DEFAULT_JUDGE_MODEL,
|
|
14
|
+
_apiKey,
|
|
15
|
+
_baseUrl,
|
|
16
|
+
_initialized,
|
|
17
|
+
compareModels,
|
|
18
|
+
evaluate,
|
|
19
|
+
init,
|
|
20
|
+
uploadResultsPublic
|
|
21
|
+
};
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import {
|
|
2
|
+
DEFAULT_JUDGE_MODEL,
|
|
3
|
+
_apiKey,
|
|
4
|
+
_baseUrl,
|
|
5
|
+
_initialized,
|
|
6
|
+
compareModels,
|
|
7
|
+
evaluate,
|
|
8
|
+
init,
|
|
9
|
+
uploadResultsPublic
|
|
10
|
+
} from "./chunk-TNNLTWRG.mjs";
|
|
11
|
+
import "./chunk-7P6ASYW6.mjs";
|
|
12
|
+
export {
|
|
13
|
+
DEFAULT_JUDGE_MODEL,
|
|
14
|
+
_apiKey,
|
|
15
|
+
_baseUrl,
|
|
16
|
+
_initialized,
|
|
17
|
+
compareModels,
|
|
18
|
+
evaluate,
|
|
19
|
+
init,
|
|
20
|
+
uploadResultsPublic
|
|
21
|
+
};
|
package/dist/index.d.mts
CHANGED
|
@@ -130,6 +130,92 @@ declare function init$4(options?: {
|
|
|
130
130
|
*/
|
|
131
131
|
declare function shutdown(): Promise<void>;
|
|
132
132
|
|
|
133
|
+
/**
|
|
134
|
+
* FallomSpan - Manual span for custom operations.
|
|
135
|
+
*
|
|
136
|
+
* Use for non-LLM operations like RAG retrieval, preprocessing, tool execution, etc.
|
|
137
|
+
*
|
|
138
|
+
* @example
|
|
139
|
+
* ```typescript
|
|
140
|
+
* const session = fallom.session({ configKey: "my-agent", sessionId });
|
|
141
|
+
*
|
|
142
|
+
* // Create a manual span
|
|
143
|
+
* const span = session.span("rag.retrieve");
|
|
144
|
+
* span.set({ "rag.query": userQuery, "rag.topK": 5 });
|
|
145
|
+
*
|
|
146
|
+
* const docs = await retrieveDocuments(userQuery);
|
|
147
|
+
* span.set({ "rag.documents.count": docs.length });
|
|
148
|
+
*
|
|
149
|
+
* span.end(); // Sends the span
|
|
150
|
+
* ```
|
|
151
|
+
*/
|
|
152
|
+
|
|
153
|
+
interface SpanOptions {
|
|
154
|
+
/** Parent span ID for nested spans */
|
|
155
|
+
parentSpanId?: string;
|
|
156
|
+
/** Trace ID to continue an existing trace */
|
|
157
|
+
traceId?: string;
|
|
158
|
+
/** Span kind (defaults to "custom") */
|
|
159
|
+
kind?: "custom" | "tool" | "retrieval" | "preprocessing" | "postprocessing";
|
|
160
|
+
}
|
|
161
|
+
declare class FallomSpan {
|
|
162
|
+
private name;
|
|
163
|
+
private ctx;
|
|
164
|
+
private attrs;
|
|
165
|
+
private startTime;
|
|
166
|
+
private ended;
|
|
167
|
+
private _status;
|
|
168
|
+
private _errorMessage?;
|
|
169
|
+
readonly spanId: string;
|
|
170
|
+
readonly traceId: string;
|
|
171
|
+
readonly parentSpanId?: string;
|
|
172
|
+
readonly kind: string;
|
|
173
|
+
constructor(name: string, ctx: SessionContext, options?: SpanOptions);
|
|
174
|
+
/**
|
|
175
|
+
* Set attributes on the span.
|
|
176
|
+
* Can be called multiple times - attributes are merged.
|
|
177
|
+
*/
|
|
178
|
+
set(attributes: Record<string, unknown>): this;
|
|
179
|
+
/**
|
|
180
|
+
* Mark the span as errored.
|
|
181
|
+
*/
|
|
182
|
+
setError(error: Error | string): this;
|
|
183
|
+
/**
|
|
184
|
+
* Get span context for creating child spans.
|
|
185
|
+
*/
|
|
186
|
+
context(): {
|
|
187
|
+
traceId: string;
|
|
188
|
+
spanId: string;
|
|
189
|
+
};
|
|
190
|
+
/**
|
|
191
|
+
* End the span and send it.
|
|
192
|
+
* Must be called for the span to be recorded.
|
|
193
|
+
*/
|
|
194
|
+
end(): void;
|
|
195
|
+
}
|
|
196
|
+
/**
|
|
197
|
+
* Wrap a function to automatically create a span around it.
|
|
198
|
+
* Similar to Braintrust's wrapTraced().
|
|
199
|
+
*
|
|
200
|
+
* @example
|
|
201
|
+
* ```typescript
|
|
202
|
+
* const fetchDocuments = wrapTraced(
|
|
203
|
+
* session,
|
|
204
|
+
* "rag.fetch",
|
|
205
|
+
* async (query: string) => {
|
|
206
|
+
* const docs = await vectorDb.search(query);
|
|
207
|
+
* return docs;
|
|
208
|
+
* }
|
|
209
|
+
* );
|
|
210
|
+
*
|
|
211
|
+
* // Function input/output automatically captured
|
|
212
|
+
* const docs = await fetchDocuments("user query");
|
|
213
|
+
* ```
|
|
214
|
+
*/
|
|
215
|
+
declare function wrapTraced<T extends (...args: any[]) => Promise<any>>(session: {
|
|
216
|
+
span: (name: string, options?: SpanOptions) => FallomSpan;
|
|
217
|
+
}, name: string, fn: T, options?: SpanOptions): T;
|
|
218
|
+
|
|
133
219
|
/**
|
|
134
220
|
* FallomSession - Session-scoped tracing for concurrent-safe operations.
|
|
135
221
|
*/
|
|
@@ -162,6 +248,24 @@ declare class FallomSession {
|
|
|
162
248
|
constructor(options: SessionOptions);
|
|
163
249
|
/** Get the session context. */
|
|
164
250
|
getContext(): SessionContext;
|
|
251
|
+
/**
|
|
252
|
+
* Create a manual span for custom operations.
|
|
253
|
+
*
|
|
254
|
+
* Use for non-LLM operations like RAG retrieval, preprocessing, tool execution, etc.
|
|
255
|
+
* The span uses the session's context (configKey, sessionId, etc.).
|
|
256
|
+
*
|
|
257
|
+
* @example
|
|
258
|
+
* ```typescript
|
|
259
|
+
* const span = session.span("rag.retrieve");
|
|
260
|
+
* span.set({ "rag.query": userQuery, "rag.topK": 5 });
|
|
261
|
+
*
|
|
262
|
+
* const docs = await retrieveDocuments(userQuery);
|
|
263
|
+
* span.set({ "rag.documents.count": docs.length });
|
|
264
|
+
*
|
|
265
|
+
* span.end(); // Must call to send the span
|
|
266
|
+
* ```
|
|
267
|
+
*/
|
|
268
|
+
span(name: string, options?: SpanOptions): FallomSpan;
|
|
165
269
|
/**
|
|
166
270
|
* Get model assignment for this session (A/B testing).
|
|
167
271
|
*/
|
|
@@ -233,15 +337,19 @@ declare function session(options: SessionOptions): FallomSession;
|
|
|
233
337
|
|
|
234
338
|
type trace_FallomSession = FallomSession;
|
|
235
339
|
declare const trace_FallomSession: typeof FallomSession;
|
|
340
|
+
type trace_FallomSpan = FallomSpan;
|
|
341
|
+
declare const trace_FallomSpan: typeof FallomSpan;
|
|
236
342
|
type trace_SessionContext = SessionContext;
|
|
237
343
|
type trace_SessionOptions = SessionOptions;
|
|
344
|
+
type trace_SpanOptions = SpanOptions;
|
|
238
345
|
type trace_TraceContext = TraceContext;
|
|
239
346
|
type trace_TraceData = TraceData;
|
|
240
347
|
type trace_WrapAISDKOptions = WrapAISDKOptions;
|
|
241
348
|
declare const trace_session: typeof session;
|
|
242
349
|
declare const trace_shutdown: typeof shutdown;
|
|
350
|
+
declare const trace_wrapTraced: typeof wrapTraced;
|
|
243
351
|
declare namespace trace {
|
|
244
|
-
export { trace_FallomSession as FallomSession, type trace_SessionContext as SessionContext, type trace_SessionOptions as SessionOptions, type trace_TraceContext as TraceContext, type trace_TraceData as TraceData, type trace_WrapAISDKOptions as WrapAISDKOptions, init$4 as init, trace_session as session, trace_shutdown as shutdown };
|
|
352
|
+
export { trace_FallomSession as FallomSession, trace_FallomSpan as FallomSpan, type trace_SessionContext as SessionContext, type trace_SessionOptions as SessionOptions, type trace_SpanOptions as SpanOptions, type trace_TraceContext as TraceContext, type trace_TraceData as TraceData, type trace_WrapAISDKOptions as WrapAISDKOptions, init$4 as init, trace_session as session, trace_shutdown as shutdown, trace_wrapTraced as wrapTraced };
|
|
245
353
|
}
|
|
246
354
|
|
|
247
355
|
/**
|
|
@@ -445,6 +553,12 @@ interface EvalResult {
|
|
|
445
553
|
input: string;
|
|
446
554
|
output: string;
|
|
447
555
|
systemMessage?: string;
|
|
556
|
+
/** Expected/golden output for comparison (if provided) */
|
|
557
|
+
expectedOutput?: string;
|
|
558
|
+
/** Retrieved documents/context for RAG evaluation */
|
|
559
|
+
context?: string[];
|
|
560
|
+
/** Additional metadata */
|
|
561
|
+
metadata?: Record<string, unknown>;
|
|
448
562
|
model: string;
|
|
449
563
|
isProduction: boolean;
|
|
450
564
|
answerRelevancy?: number;
|
|
@@ -525,6 +639,12 @@ interface EvaluateOptions {
|
|
|
525
639
|
/** List of metrics to run (built-in or custom). Default: all built-in metrics */
|
|
526
640
|
metrics?: MetricInput[];
|
|
527
641
|
judgeModel?: string;
|
|
642
|
+
/**
|
|
643
|
+
* Context to provide the LLM judge about the product/domain being evaluated.
|
|
644
|
+
* This helps the judge make better evaluations by understanding what features
|
|
645
|
+
* or capabilities are valid (e.g., won't mark valid features as hallucinations).
|
|
646
|
+
*/
|
|
647
|
+
judgeContext?: string;
|
|
528
648
|
name?: string;
|
|
529
649
|
description?: string;
|
|
530
650
|
verbose?: boolean;
|
|
@@ -560,7 +680,7 @@ declare const METRIC_PROMPTS: Record<MetricName, {
|
|
|
560
680
|
/**
|
|
561
681
|
* Build the G-Eval prompt for the LLM judge.
|
|
562
682
|
*/
|
|
563
|
-
declare function buildGEvalPrompt(criteria: string, steps: string[], systemMessage: string | undefined, inputText: string, outputText: string): string;
|
|
683
|
+
declare function buildGEvalPrompt(criteria: string, steps: string[], systemMessage: string | undefined, inputText: string, outputText: string, judgeContext?: string): string;
|
|
564
684
|
/**
|
|
565
685
|
* Result of running G-Eval on a single metric.
|
|
566
686
|
*/
|
|
@@ -594,6 +714,8 @@ interface RunGEvalOptions {
|
|
|
594
714
|
traceSessionId?: string;
|
|
595
715
|
/** Optional customer ID for tracing (e.g., organization ID) */
|
|
596
716
|
traceCustomerId?: string;
|
|
717
|
+
/** Optional context to provide the judge about the product/domain being evaluated */
|
|
718
|
+
judgeContext?: string;
|
|
597
719
|
}
|
|
598
720
|
/**
|
|
599
721
|
* Run G-Eval for a single metric using OpenRouter.
|
|
@@ -1108,4 +1230,4 @@ declare const _default: {
|
|
|
1108
1230
|
session: typeof session;
|
|
1109
1231
|
};
|
|
1110
1232
|
|
|
1111
|
-
export { type CompareModelsOptions, type DatasetItem, type EvalResult, type EvaluateOptions, FallomExporter, type FallomExporterOptions, FallomSession, type GEvalScore, type InitOptions, type MetricName, type PromptResult, type SessionContext, type SessionOptions, buildGEvalPrompt, calculateAggregateScores, clearMastraPrompt, _default as default, detectRegression, evals, init, models, prompts, runGEval, session, setMastraPrompt, setMastraPromptAB, trace };
|
|
1233
|
+
export { type CompareModelsOptions, type DatasetItem, type EvalResult, type EvaluateOptions, FallomExporter, type FallomExporterOptions, FallomSession, FallomSpan, type GEvalScore, type InitOptions, type MetricName, type PromptResult, type SessionContext, type SessionOptions, type SpanOptions, buildGEvalPrompt, calculateAggregateScores, clearMastraPrompt, _default as default, detectRegression, evals, init, models, prompts, runGEval, session, setMastraPrompt, setMastraPromptAB, trace, wrapTraced };
|
package/dist/index.d.ts
CHANGED
|
@@ -130,6 +130,92 @@ declare function init$4(options?: {
|
|
|
130
130
|
*/
|
|
131
131
|
declare function shutdown(): Promise<void>;
|
|
132
132
|
|
|
133
|
+
/**
|
|
134
|
+
* FallomSpan - Manual span for custom operations.
|
|
135
|
+
*
|
|
136
|
+
* Use for non-LLM operations like RAG retrieval, preprocessing, tool execution, etc.
|
|
137
|
+
*
|
|
138
|
+
* @example
|
|
139
|
+
* ```typescript
|
|
140
|
+
* const session = fallom.session({ configKey: "my-agent", sessionId });
|
|
141
|
+
*
|
|
142
|
+
* // Create a manual span
|
|
143
|
+
* const span = session.span("rag.retrieve");
|
|
144
|
+
* span.set({ "rag.query": userQuery, "rag.topK": 5 });
|
|
145
|
+
*
|
|
146
|
+
* const docs = await retrieveDocuments(userQuery);
|
|
147
|
+
* span.set({ "rag.documents.count": docs.length });
|
|
148
|
+
*
|
|
149
|
+
* span.end(); // Sends the span
|
|
150
|
+
* ```
|
|
151
|
+
*/
|
|
152
|
+
|
|
153
|
+
interface SpanOptions {
|
|
154
|
+
/** Parent span ID for nested spans */
|
|
155
|
+
parentSpanId?: string;
|
|
156
|
+
/** Trace ID to continue an existing trace */
|
|
157
|
+
traceId?: string;
|
|
158
|
+
/** Span kind (defaults to "custom") */
|
|
159
|
+
kind?: "custom" | "tool" | "retrieval" | "preprocessing" | "postprocessing";
|
|
160
|
+
}
|
|
161
|
+
declare class FallomSpan {
|
|
162
|
+
private name;
|
|
163
|
+
private ctx;
|
|
164
|
+
private attrs;
|
|
165
|
+
private startTime;
|
|
166
|
+
private ended;
|
|
167
|
+
private _status;
|
|
168
|
+
private _errorMessage?;
|
|
169
|
+
readonly spanId: string;
|
|
170
|
+
readonly traceId: string;
|
|
171
|
+
readonly parentSpanId?: string;
|
|
172
|
+
readonly kind: string;
|
|
173
|
+
constructor(name: string, ctx: SessionContext, options?: SpanOptions);
|
|
174
|
+
/**
|
|
175
|
+
* Set attributes on the span.
|
|
176
|
+
* Can be called multiple times - attributes are merged.
|
|
177
|
+
*/
|
|
178
|
+
set(attributes: Record<string, unknown>): this;
|
|
179
|
+
/**
|
|
180
|
+
* Mark the span as errored.
|
|
181
|
+
*/
|
|
182
|
+
setError(error: Error | string): this;
|
|
183
|
+
/**
|
|
184
|
+
* Get span context for creating child spans.
|
|
185
|
+
*/
|
|
186
|
+
context(): {
|
|
187
|
+
traceId: string;
|
|
188
|
+
spanId: string;
|
|
189
|
+
};
|
|
190
|
+
/**
|
|
191
|
+
* End the span and send it.
|
|
192
|
+
* Must be called for the span to be recorded.
|
|
193
|
+
*/
|
|
194
|
+
end(): void;
|
|
195
|
+
}
|
|
196
|
+
/**
|
|
197
|
+
* Wrap a function to automatically create a span around it.
|
|
198
|
+
* Similar to Braintrust's wrapTraced().
|
|
199
|
+
*
|
|
200
|
+
* @example
|
|
201
|
+
* ```typescript
|
|
202
|
+
* const fetchDocuments = wrapTraced(
|
|
203
|
+
* session,
|
|
204
|
+
* "rag.fetch",
|
|
205
|
+
* async (query: string) => {
|
|
206
|
+
* const docs = await vectorDb.search(query);
|
|
207
|
+
* return docs;
|
|
208
|
+
* }
|
|
209
|
+
* );
|
|
210
|
+
*
|
|
211
|
+
* // Function input/output automatically captured
|
|
212
|
+
* const docs = await fetchDocuments("user query");
|
|
213
|
+
* ```
|
|
214
|
+
*/
|
|
215
|
+
declare function wrapTraced<T extends (...args: any[]) => Promise<any>>(session: {
|
|
216
|
+
span: (name: string, options?: SpanOptions) => FallomSpan;
|
|
217
|
+
}, name: string, fn: T, options?: SpanOptions): T;
|
|
218
|
+
|
|
133
219
|
/**
|
|
134
220
|
* FallomSession - Session-scoped tracing for concurrent-safe operations.
|
|
135
221
|
*/
|
|
@@ -162,6 +248,24 @@ declare class FallomSession {
|
|
|
162
248
|
constructor(options: SessionOptions);
|
|
163
249
|
/** Get the session context. */
|
|
164
250
|
getContext(): SessionContext;
|
|
251
|
+
/**
|
|
252
|
+
* Create a manual span for custom operations.
|
|
253
|
+
*
|
|
254
|
+
* Use for non-LLM operations like RAG retrieval, preprocessing, tool execution, etc.
|
|
255
|
+
* The span uses the session's context (configKey, sessionId, etc.).
|
|
256
|
+
*
|
|
257
|
+
* @example
|
|
258
|
+
* ```typescript
|
|
259
|
+
* const span = session.span("rag.retrieve");
|
|
260
|
+
* span.set({ "rag.query": userQuery, "rag.topK": 5 });
|
|
261
|
+
*
|
|
262
|
+
* const docs = await retrieveDocuments(userQuery);
|
|
263
|
+
* span.set({ "rag.documents.count": docs.length });
|
|
264
|
+
*
|
|
265
|
+
* span.end(); // Must call to send the span
|
|
266
|
+
* ```
|
|
267
|
+
*/
|
|
268
|
+
span(name: string, options?: SpanOptions): FallomSpan;
|
|
165
269
|
/**
|
|
166
270
|
* Get model assignment for this session (A/B testing).
|
|
167
271
|
*/
|
|
@@ -233,15 +337,19 @@ declare function session(options: SessionOptions): FallomSession;
|
|
|
233
337
|
|
|
234
338
|
type trace_FallomSession = FallomSession;
|
|
235
339
|
declare const trace_FallomSession: typeof FallomSession;
|
|
340
|
+
type trace_FallomSpan = FallomSpan;
|
|
341
|
+
declare const trace_FallomSpan: typeof FallomSpan;
|
|
236
342
|
type trace_SessionContext = SessionContext;
|
|
237
343
|
type trace_SessionOptions = SessionOptions;
|
|
344
|
+
type trace_SpanOptions = SpanOptions;
|
|
238
345
|
type trace_TraceContext = TraceContext;
|
|
239
346
|
type trace_TraceData = TraceData;
|
|
240
347
|
type trace_WrapAISDKOptions = WrapAISDKOptions;
|
|
241
348
|
declare const trace_session: typeof session;
|
|
242
349
|
declare const trace_shutdown: typeof shutdown;
|
|
350
|
+
declare const trace_wrapTraced: typeof wrapTraced;
|
|
243
351
|
declare namespace trace {
|
|
244
|
-
export { trace_FallomSession as FallomSession, type trace_SessionContext as SessionContext, type trace_SessionOptions as SessionOptions, type trace_TraceContext as TraceContext, type trace_TraceData as TraceData, type trace_WrapAISDKOptions as WrapAISDKOptions, init$4 as init, trace_session as session, trace_shutdown as shutdown };
|
|
352
|
+
export { trace_FallomSession as FallomSession, trace_FallomSpan as FallomSpan, type trace_SessionContext as SessionContext, type trace_SessionOptions as SessionOptions, type trace_SpanOptions as SpanOptions, type trace_TraceContext as TraceContext, type trace_TraceData as TraceData, type trace_WrapAISDKOptions as WrapAISDKOptions, init$4 as init, trace_session as session, trace_shutdown as shutdown, trace_wrapTraced as wrapTraced };
|
|
245
353
|
}
|
|
246
354
|
|
|
247
355
|
/**
|
|
@@ -445,6 +553,12 @@ interface EvalResult {
|
|
|
445
553
|
input: string;
|
|
446
554
|
output: string;
|
|
447
555
|
systemMessage?: string;
|
|
556
|
+
/** Expected/golden output for comparison (if provided) */
|
|
557
|
+
expectedOutput?: string;
|
|
558
|
+
/** Retrieved documents/context for RAG evaluation */
|
|
559
|
+
context?: string[];
|
|
560
|
+
/** Additional metadata */
|
|
561
|
+
metadata?: Record<string, unknown>;
|
|
448
562
|
model: string;
|
|
449
563
|
isProduction: boolean;
|
|
450
564
|
answerRelevancy?: number;
|
|
@@ -525,6 +639,12 @@ interface EvaluateOptions {
|
|
|
525
639
|
/** List of metrics to run (built-in or custom). Default: all built-in metrics */
|
|
526
640
|
metrics?: MetricInput[];
|
|
527
641
|
judgeModel?: string;
|
|
642
|
+
/**
|
|
643
|
+
* Context to provide the LLM judge about the product/domain being evaluated.
|
|
644
|
+
* This helps the judge make better evaluations by understanding what features
|
|
645
|
+
* or capabilities are valid (e.g., won't mark valid features as hallucinations).
|
|
646
|
+
*/
|
|
647
|
+
judgeContext?: string;
|
|
528
648
|
name?: string;
|
|
529
649
|
description?: string;
|
|
530
650
|
verbose?: boolean;
|
|
@@ -560,7 +680,7 @@ declare const METRIC_PROMPTS: Record<MetricName, {
|
|
|
560
680
|
/**
|
|
561
681
|
* Build the G-Eval prompt for the LLM judge.
|
|
562
682
|
*/
|
|
563
|
-
declare function buildGEvalPrompt(criteria: string, steps: string[], systemMessage: string | undefined, inputText: string, outputText: string): string;
|
|
683
|
+
declare function buildGEvalPrompt(criteria: string, steps: string[], systemMessage: string | undefined, inputText: string, outputText: string, judgeContext?: string): string;
|
|
564
684
|
/**
|
|
565
685
|
* Result of running G-Eval on a single metric.
|
|
566
686
|
*/
|
|
@@ -594,6 +714,8 @@ interface RunGEvalOptions {
|
|
|
594
714
|
traceSessionId?: string;
|
|
595
715
|
/** Optional customer ID for tracing (e.g., organization ID) */
|
|
596
716
|
traceCustomerId?: string;
|
|
717
|
+
/** Optional context to provide the judge about the product/domain being evaluated */
|
|
718
|
+
judgeContext?: string;
|
|
597
719
|
}
|
|
598
720
|
/**
|
|
599
721
|
* Run G-Eval for a single metric using OpenRouter.
|
|
@@ -1108,4 +1230,4 @@ declare const _default: {
|
|
|
1108
1230
|
session: typeof session;
|
|
1109
1231
|
};
|
|
1110
1232
|
|
|
1111
|
-
export { type CompareModelsOptions, type DatasetItem, type EvalResult, type EvaluateOptions, FallomExporter, type FallomExporterOptions, FallomSession, type GEvalScore, type InitOptions, type MetricName, type PromptResult, type SessionContext, type SessionOptions, buildGEvalPrompt, calculateAggregateScores, clearMastraPrompt, _default as default, detectRegression, evals, init, models, prompts, runGEval, session, setMastraPrompt, setMastraPromptAB, trace };
|
|
1233
|
+
export { type CompareModelsOptions, type DatasetItem, type EvalResult, type EvaluateOptions, FallomExporter, type FallomExporterOptions, FallomSession, FallomSpan, type GEvalScore, type InitOptions, type MetricName, type PromptResult, type SessionContext, type SessionOptions, type SpanOptions, buildGEvalPrompt, calculateAggregateScores, clearMastraPrompt, _default as default, detectRegression, evals, init, models, prompts, runGEval, session, setMastraPrompt, setMastraPromptAB, trace, wrapTraced };
|