braintrust 0.0.175 → 0.0.177
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser.js +15 -0
- package/dist/browser.mjs +15 -0
- package/dist/cli.js +100 -21
- package/dist/index.d.mts +50 -51
- package/dist/index.d.ts +50 -51
- package/dist/index.js +136 -55
- package/dist/index.mjs +135 -53
- package/package.json +2 -2
package/dist/browser.js
CHANGED
|
@@ -3423,6 +3423,21 @@ function renderMessage(render, message) {
|
|
|
3423
3423
|
return _exhaustiveCheck;
|
|
3424
3424
|
}
|
|
3425
3425
|
})
|
|
3426
|
+
} : {},
|
|
3427
|
+
..."tool_calls" in message ? {
|
|
3428
|
+
tool_calls: isEmpty(message.tool_calls) ? void 0 : message.tool_calls.map((t) => {
|
|
3429
|
+
return {
|
|
3430
|
+
type: t.type,
|
|
3431
|
+
id: render(t.id),
|
|
3432
|
+
function: {
|
|
3433
|
+
name: render(t.function.name),
|
|
3434
|
+
arguments: render(t.function.arguments)
|
|
3435
|
+
}
|
|
3436
|
+
};
|
|
3437
|
+
})
|
|
3438
|
+
} : {},
|
|
3439
|
+
..."tool_call_id" in message ? {
|
|
3440
|
+
tool_call_id: render(message.tool_call_id)
|
|
3426
3441
|
} : {}
|
|
3427
3442
|
};
|
|
3428
3443
|
}
|
package/dist/browser.mjs
CHANGED
|
@@ -3373,6 +3373,21 @@ function renderMessage(render, message) {
|
|
|
3373
3373
|
return _exhaustiveCheck;
|
|
3374
3374
|
}
|
|
3375
3375
|
})
|
|
3376
|
+
} : {},
|
|
3377
|
+
..."tool_calls" in message ? {
|
|
3378
|
+
tool_calls: isEmpty(message.tool_calls) ? void 0 : message.tool_calls.map((t) => {
|
|
3379
|
+
return {
|
|
3380
|
+
type: t.type,
|
|
3381
|
+
id: render(t.id),
|
|
3382
|
+
function: {
|
|
3383
|
+
name: render(t.function.name),
|
|
3384
|
+
arguments: render(t.function.arguments)
|
|
3385
|
+
}
|
|
3386
|
+
};
|
|
3387
|
+
})
|
|
3388
|
+
} : {},
|
|
3389
|
+
..."tool_call_id" in message ? {
|
|
3390
|
+
tool_call_id: render(message.tool_call_id)
|
|
3376
3391
|
} : {}
|
|
3377
3392
|
};
|
|
3378
3393
|
}
|
package/dist/cli.js
CHANGED
|
@@ -1236,7 +1236,7 @@ var require_package = __commonJS({
|
|
|
1236
1236
|
"package.json"(exports2, module2) {
|
|
1237
1237
|
module2.exports = {
|
|
1238
1238
|
name: "braintrust",
|
|
1239
|
-
version: "0.0.
|
|
1239
|
+
version: "0.0.177",
|
|
1240
1240
|
description: "SDK for integrating Braintrust",
|
|
1241
1241
|
repository: {
|
|
1242
1242
|
type: "git",
|
|
@@ -1311,7 +1311,7 @@ var require_package = __commonJS({
|
|
|
1311
1311
|
},
|
|
1312
1312
|
dependencies: {
|
|
1313
1313
|
"@ai-sdk/provider": "^1.0.1",
|
|
1314
|
-
"@braintrust/core": "0.0.
|
|
1314
|
+
"@braintrust/core": "0.0.71",
|
|
1315
1315
|
"@next/env": "^14.2.3",
|
|
1316
1316
|
"@vercel/functions": "^1.0.2",
|
|
1317
1317
|
ai: "^3.2.16",
|
|
@@ -4392,6 +4392,21 @@ function renderMessage(render, message) {
|
|
|
4392
4392
|
return _exhaustiveCheck;
|
|
4393
4393
|
}
|
|
4394
4394
|
})
|
|
4395
|
+
} : {},
|
|
4396
|
+
..."tool_calls" in message ? {
|
|
4397
|
+
tool_calls: isEmpty(message.tool_calls) ? void 0 : message.tool_calls.map((t) => {
|
|
4398
|
+
return {
|
|
4399
|
+
type: t.type,
|
|
4400
|
+
id: render(t.id),
|
|
4401
|
+
function: {
|
|
4402
|
+
name: render(t.function.name),
|
|
4403
|
+
arguments: render(t.function.arguments)
|
|
4404
|
+
}
|
|
4405
|
+
};
|
|
4406
|
+
})
|
|
4407
|
+
} : {},
|
|
4408
|
+
..."tool_call_id" in message ? {
|
|
4409
|
+
tool_call_id: render(message.tool_call_id)
|
|
4395
4410
|
} : {}
|
|
4396
4411
|
};
|
|
4397
4412
|
}
|
|
@@ -4574,9 +4589,7 @@ var BarProgressReporter = class {
|
|
|
4574
4589
|
};
|
|
4575
4590
|
|
|
4576
4591
|
// src/framework.ts
|
|
4577
|
-
var import_chalk = __toESM(require("chalk"));
|
|
4578
4592
|
var import_core2 = require("@braintrust/core");
|
|
4579
|
-
var import_pluralize = __toESM(require("pluralize"));
|
|
4580
4593
|
|
|
4581
4594
|
// ../../node_modules/.pnpm/async@3.2.5/node_modules/async/dist/async.mjs
|
|
4582
4595
|
function initialParams(fn) {
|
|
@@ -5682,6 +5695,8 @@ function waterfall(tasks, callback) {
|
|
|
5682
5695
|
var waterfall$1 = awaitify(waterfall);
|
|
5683
5696
|
|
|
5684
5697
|
// src/framework.ts
|
|
5698
|
+
var import_chalk = __toESM(require("chalk"));
|
|
5699
|
+
var import_pluralize = __toESM(require("pluralize"));
|
|
5685
5700
|
var EvalResultWithSummary = class {
|
|
5686
5701
|
constructor(summary, results) {
|
|
5687
5702
|
this.summary = summary;
|
|
@@ -5858,7 +5873,11 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
5858
5873
|
const meta = (o) => metadata = { ...metadata, ...o };
|
|
5859
5874
|
await rootSpan.traced(
|
|
5860
5875
|
async (span) => {
|
|
5861
|
-
const outputResult = evaluator.task(datum.input, {
|
|
5876
|
+
const outputResult = evaluator.task(datum.input, {
|
|
5877
|
+
meta,
|
|
5878
|
+
metadata,
|
|
5879
|
+
span
|
|
5880
|
+
});
|
|
5862
5881
|
if (outputResult instanceof Promise) {
|
|
5863
5882
|
output = await outputResult;
|
|
5864
5883
|
} else {
|
|
@@ -6720,6 +6739,7 @@ async function uploadHandleBundles({
|
|
|
6720
6739
|
function_data: {
|
|
6721
6740
|
type: "prompt"
|
|
6722
6741
|
},
|
|
6742
|
+
function_type: prompt.functionType,
|
|
6723
6743
|
prompt_data,
|
|
6724
6744
|
if_exists: prompt.ifExists
|
|
6725
6745
|
});
|
|
@@ -7045,8 +7065,8 @@ async function bundleCommand(args) {
|
|
|
7045
7065
|
}
|
|
7046
7066
|
|
|
7047
7067
|
// src/cli-util/pull.ts
|
|
7048
|
-
var
|
|
7049
|
-
var
|
|
7068
|
+
var import_typespecs5 = require("@braintrust/core/typespecs");
|
|
7069
|
+
var import_zod4 = require("zod");
|
|
7050
7070
|
var import_promises = __toESM(require("fs/promises"));
|
|
7051
7071
|
var import_util4 = __toESM(require("util"));
|
|
7052
7072
|
var import_slugify3 = __toESM(require("slugify"));
|
|
@@ -7056,7 +7076,7 @@ var import_core4 = require("@braintrust/core");
|
|
|
7056
7076
|
// src/framework2.ts
|
|
7057
7077
|
var import_path4 = __toESM(require("path"));
|
|
7058
7078
|
var import_slugify2 = __toESM(require("slugify"));
|
|
7059
|
-
var
|
|
7079
|
+
var import_typespecs4 = require("@braintrust/core/typespecs");
|
|
7060
7080
|
var ProjectBuilder = class {
|
|
7061
7081
|
create(opts) {
|
|
7062
7082
|
return new Project(opts);
|
|
@@ -7068,12 +7088,14 @@ var Project = class {
|
|
|
7068
7088
|
id;
|
|
7069
7089
|
tools;
|
|
7070
7090
|
prompts;
|
|
7091
|
+
scorers;
|
|
7071
7092
|
constructor(args) {
|
|
7072
7093
|
_initializeSpanContext();
|
|
7073
7094
|
this.name = "name" in args ? args.name : void 0;
|
|
7074
7095
|
this.id = "id" in args ? args.id : void 0;
|
|
7075
7096
|
this.tools = new ToolBuilder(this);
|
|
7076
7097
|
this.prompts = new PromptBuilder(this);
|
|
7098
|
+
this.scorers = new ScorerBuilder(this);
|
|
7077
7099
|
}
|
|
7078
7100
|
};
|
|
7079
7101
|
var ToolBuilder = class {
|
|
@@ -7107,6 +7129,70 @@ var ToolBuilder = class {
|
|
|
7107
7129
|
return tool;
|
|
7108
7130
|
}
|
|
7109
7131
|
};
|
|
7132
|
+
var ScorerBuilder = class {
|
|
7133
|
+
constructor(project) {
|
|
7134
|
+
this.project = project;
|
|
7135
|
+
}
|
|
7136
|
+
taskCounter = 0;
|
|
7137
|
+
create(opts) {
|
|
7138
|
+
this.taskCounter++;
|
|
7139
|
+
let resolvedName = opts.name;
|
|
7140
|
+
if (!resolvedName && "handler" in opts) {
|
|
7141
|
+
resolvedName = opts.handler.name;
|
|
7142
|
+
}
|
|
7143
|
+
if (!resolvedName || resolvedName.trim().length === 0) {
|
|
7144
|
+
resolvedName = `Scorer ${import_path4.default.basename(__filename)} ${this.taskCounter}`;
|
|
7145
|
+
}
|
|
7146
|
+
const slug = opts.slug ?? (0, import_slugify2.default)(resolvedName, { lower: true, strict: true });
|
|
7147
|
+
if ("handler" in opts) {
|
|
7148
|
+
const scorer = new CodeFunction(this.project, {
|
|
7149
|
+
...opts,
|
|
7150
|
+
name: resolvedName,
|
|
7151
|
+
slug,
|
|
7152
|
+
type: "scorer"
|
|
7153
|
+
});
|
|
7154
|
+
if (globalThis._lazy_load) {
|
|
7155
|
+
globalThis._evals.functions.push(
|
|
7156
|
+
scorer
|
|
7157
|
+
);
|
|
7158
|
+
}
|
|
7159
|
+
} else {
|
|
7160
|
+
const promptBlock = "messages" in opts ? {
|
|
7161
|
+
type: "chat",
|
|
7162
|
+
messages: opts.messages
|
|
7163
|
+
} : {
|
|
7164
|
+
type: "completion",
|
|
7165
|
+
content: opts.prompt
|
|
7166
|
+
};
|
|
7167
|
+
const promptData = {
|
|
7168
|
+
prompt: promptBlock,
|
|
7169
|
+
options: {
|
|
7170
|
+
model: opts.model,
|
|
7171
|
+
params: opts.params
|
|
7172
|
+
},
|
|
7173
|
+
parser: {
|
|
7174
|
+
type: "llm_classifier",
|
|
7175
|
+
use_cot: opts.useCot,
|
|
7176
|
+
choice_scores: opts.choiceScores
|
|
7177
|
+
}
|
|
7178
|
+
};
|
|
7179
|
+
const codePrompt = new CodePrompt(
|
|
7180
|
+
this.project,
|
|
7181
|
+
promptData,
|
|
7182
|
+
[],
|
|
7183
|
+
{
|
|
7184
|
+
...opts,
|
|
7185
|
+
name: resolvedName,
|
|
7186
|
+
slug
|
|
7187
|
+
},
|
|
7188
|
+
"scorer"
|
|
7189
|
+
);
|
|
7190
|
+
if (globalThis._lazy_load) {
|
|
7191
|
+
globalThis._evals.prompts.push(codePrompt);
|
|
7192
|
+
}
|
|
7193
|
+
}
|
|
7194
|
+
}
|
|
7195
|
+
};
|
|
7110
7196
|
var CodeFunction = class {
|
|
7111
7197
|
constructor(project, opts) {
|
|
7112
7198
|
this.project = project;
|
|
@@ -7146,8 +7232,9 @@ var CodePrompt = class {
|
|
|
7146
7232
|
ifExists;
|
|
7147
7233
|
description;
|
|
7148
7234
|
id;
|
|
7235
|
+
functionType;
|
|
7149
7236
|
toolFunctions;
|
|
7150
|
-
constructor(project, prompt, toolFunctions, opts) {
|
|
7237
|
+
constructor(project, prompt, toolFunctions, opts, functionType) {
|
|
7151
7238
|
this.project = project;
|
|
7152
7239
|
this.name = opts.name;
|
|
7153
7240
|
this.slug = opts.slug;
|
|
@@ -7156,17 +7243,9 @@ var CodePrompt = class {
|
|
|
7156
7243
|
this.ifExists = opts.ifExists;
|
|
7157
7244
|
this.description = opts.description;
|
|
7158
7245
|
this.id = opts.id;
|
|
7246
|
+
this.functionType = functionType;
|
|
7159
7247
|
}
|
|
7160
7248
|
};
|
|
7161
|
-
var toolFunctionDefinitionSchema = import_zod4.z.object({
|
|
7162
|
-
type: import_zod4.z.literal("function"),
|
|
7163
|
-
function: import_zod4.z.object({
|
|
7164
|
-
name: import_zod4.z.string(),
|
|
7165
|
-
description: import_zod4.z.string().optional(),
|
|
7166
|
-
parameters: import_zod4.z.record(import_zod4.z.unknown()).optional(),
|
|
7167
|
-
strict: import_zod4.z.boolean().optional()
|
|
7168
|
-
})
|
|
7169
|
-
});
|
|
7170
7249
|
var PromptBuilder = class {
|
|
7171
7250
|
constructor(project) {
|
|
7172
7251
|
this.project = project;
|
|
@@ -7235,11 +7314,11 @@ async function pullCommand(args) {
|
|
|
7235
7314
|
...args.id ? { ids: [args.id] } : {},
|
|
7236
7315
|
...args.version ? { version: (0, import_core4.loadPrettyXact)(args.version) } : {}
|
|
7237
7316
|
});
|
|
7238
|
-
const functionObjects =
|
|
7317
|
+
const functionObjects = import_zod4.z.object({ objects: import_zod4.z.array(import_zod4.z.unknown()) }).parse(functions);
|
|
7239
7318
|
const projectNameToFunctions = {};
|
|
7240
7319
|
const projectNameIdMap = new ProjectNameIdMap();
|
|
7241
7320
|
for (const rawFunc of functionObjects.objects) {
|
|
7242
|
-
const parsedFunc =
|
|
7321
|
+
const parsedFunc = import_typespecs5.functionSchema.safeParse(rawFunc);
|
|
7243
7322
|
if (!parsedFunc.success) {
|
|
7244
7323
|
const id = typeof rawFunc === "object" && rawFunc && "id" in rawFunc ? ` ${rawFunc.id}` : "";
|
|
7245
7324
|
console.warn(
|
|
@@ -7388,7 +7467,7 @@ function makeFunctionDefinition({
|
|
|
7388
7467
|
const objectType = "prompt";
|
|
7389
7468
|
const prompt = func.prompt_data.prompt;
|
|
7390
7469
|
const promptContents = prompt.type === "completion" ? `prompt: ${doubleQuote(prompt.content)}` : `messages: ${import_util4.default.inspect(prompt.messages, { depth: null }).trimStart()}`;
|
|
7391
|
-
const rawToolsParsed = prompt.type === "chat" && prompt.tools && prompt.tools.length > 0 ?
|
|
7470
|
+
const rawToolsParsed = prompt.type === "chat" && prompt.tools && prompt.tools.length > 0 ? import_zod4.z.array(import_typespecs4.toolFunctionDefinitionSchema).safeParse(JSON.parse(prompt.tools)) : void 0;
|
|
7392
7471
|
if (rawToolsParsed && !rawToolsParsed.success) {
|
|
7393
7472
|
console.warn(
|
|
7394
7473
|
warning(
|
package/dist/index.d.mts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { LogFeedbackFullArgs, ExperimentEvent, BackgroundLogEvent, ExperimentLogFullArgs, ExperimentLogPartialArgs, IdField, SpanType, SpanComponentsV3, DEFAULT_IS_LEGACY_DATASET, TRANSACTION_ID_FIELD, TransactionId, SpanObjectTypeV3, DatasetRecord, Score, CommentEvent, InputField, LogCommentFullArgs, OtherExperimentLogFields, ParentExperimentIds, ParentProjectLogIds } from '@braintrust/core';
|
|
2
2
|
export { CommentEvent, DatasetRecord, ExperimentLogFullArgs, ExperimentLogPartialArgs, IdField, InputField, LogCommentFullArgs, LogFeedbackFullArgs, OtherExperimentLogFields, ParentExperimentIds, ParentProjectLogIds } from '@braintrust/core';
|
|
3
|
-
import { GitMetadataSettings, AttachmentReference, AttachmentStatus, RepoInfo, PromptData, OpenAIMessage, Tools, AnyModelParam, Message, Prompt as Prompt$1, PromptSessionEvent, StreamingMode, FunctionType, IfExists, SavedFunctionId, ModelParams } from '@braintrust/core/typespecs';
|
|
3
|
+
import { GitMetadataSettings, AttachmentReference, AttachmentStatus, RepoInfo, PromptData, OpenAIMessage, Tools, AnyModelParam, Message, Prompt as Prompt$1, PromptSessionEvent, StreamingMode, FunctionType, IfExists, SavedFunctionId, ModelParams, ToolFunctionDefinition, toolFunctionDefinitionSchema } from '@braintrust/core/typespecs';
|
|
4
|
+
export { ToolFunctionDefinition, toolFunctionDefinitionSchema } from '@braintrust/core/typespecs';
|
|
4
5
|
import { z } from 'zod';
|
|
5
6
|
|
|
6
7
|
interface IsoAsyncLocalStorage<T> {
|
|
@@ -1572,13 +1573,20 @@ declare class Project {
|
|
|
1572
1573
|
readonly id?: string;
|
|
1573
1574
|
tools: ToolBuilder;
|
|
1574
1575
|
prompts: PromptBuilder;
|
|
1576
|
+
scorers: ScorerBuilder;
|
|
1575
1577
|
constructor(args: CreateProjectOpts);
|
|
1576
1578
|
}
|
|
1577
1579
|
declare class ToolBuilder {
|
|
1578
1580
|
private readonly project;
|
|
1579
1581
|
private taskCounter;
|
|
1580
1582
|
constructor(project: Project);
|
|
1581
|
-
create<Input, Output, Fn extends GenericFunction<Input, Output>>(opts:
|
|
1583
|
+
create<Input, Output, Fn extends GenericFunction<Input, Output>>(opts: CodeOpts<Input, Output, Fn>): CodeFunction<Input, Output, Fn>;
|
|
1584
|
+
}
|
|
1585
|
+
declare class ScorerBuilder {
|
|
1586
|
+
private readonly project;
|
|
1587
|
+
private taskCounter;
|
|
1588
|
+
constructor(project: Project);
|
|
1589
|
+
create<Output, Input, Params, Returns, Fn extends GenericFunction<Exact<Params, ScorerArgs<Output, Input>>, Returns>>(opts: ScorerOpts<Output, Input, Params, Returns, Fn>): void;
|
|
1582
1590
|
}
|
|
1583
1591
|
type Schema<Input, Output> = Partial<{
|
|
1584
1592
|
parameters: z.ZodSchema<Input>;
|
|
@@ -1590,9 +1598,21 @@ interface BaseFnOpts {
|
|
|
1590
1598
|
description: string;
|
|
1591
1599
|
ifExists: IfExists;
|
|
1592
1600
|
}
|
|
1593
|
-
type
|
|
1601
|
+
type CodeOpts<Params, Returns, Fn extends GenericFunction<Params, Returns>> = Partial<BaseFnOpts> & {
|
|
1594
1602
|
handler: Fn;
|
|
1595
1603
|
} & Schema<Params, Returns>;
|
|
1604
|
+
type ScorerPromptOpts = Partial<BaseFnOpts> & PromptOpts<false, false, false, false> & {
|
|
1605
|
+
useCot: boolean;
|
|
1606
|
+
choiceScores: Record<string, number>;
|
|
1607
|
+
};
|
|
1608
|
+
type ScorerArgs<Output, Input> = {
|
|
1609
|
+
output: Output;
|
|
1610
|
+
expected?: Output;
|
|
1611
|
+
input?: Input;
|
|
1612
|
+
metadata?: Record<string, unknown>;
|
|
1613
|
+
};
|
|
1614
|
+
type Exact<T, Shape> = T extends Shape ? Exclude<keyof T, keyof Shape> extends never ? T : never : never;
|
|
1615
|
+
type ScorerOpts<Output, Input, Params, Returns, Fn extends GenericFunction<Exact<Params, ScorerArgs<Output, Input>>, Returns>> = CodeOpts<Exact<Params, ScorerArgs<Output, Input>>, Returns, Fn> | ScorerPromptOpts;
|
|
1596
1616
|
declare class CodeFunction<Input, Output, Fn extends GenericFunction<Input, Output>> {
|
|
1597
1617
|
readonly project: Project;
|
|
1598
1618
|
readonly handler: Fn;
|
|
@@ -1603,7 +1623,7 @@ declare class CodeFunction<Input, Output, Fn extends GenericFunction<Input, Outp
|
|
|
1603
1623
|
readonly parameters?: z.ZodSchema<Input>;
|
|
1604
1624
|
readonly returns?: z.ZodSchema<Output>;
|
|
1605
1625
|
readonly ifExists?: IfExists;
|
|
1606
|
-
constructor(project: Project, opts: Omit<
|
|
1626
|
+
constructor(project: Project, opts: Omit<CodeOpts<Input, Output, Fn>, "name" | "slug"> & {
|
|
1607
1627
|
name: string;
|
|
1608
1628
|
slug: string;
|
|
1609
1629
|
type: FunctionType;
|
|
@@ -1619,66 +1639,35 @@ declare class CodePrompt {
|
|
|
1619
1639
|
readonly ifExists?: IfExists;
|
|
1620
1640
|
readonly description?: string;
|
|
1621
1641
|
readonly id?: string;
|
|
1642
|
+
readonly functionType?: FunctionType;
|
|
1622
1643
|
readonly toolFunctions: (SavedFunctionId | GenericCodeFunction)[];
|
|
1623
|
-
constructor(project: Project, prompt: PromptData, toolFunctions: (SavedFunctionId | GenericCodeFunction)[], opts: Omit<PromptOpts<false, false>, "name" | "slug"> & {
|
|
1644
|
+
constructor(project: Project, prompt: PromptData, toolFunctions: (SavedFunctionId | GenericCodeFunction)[], opts: Omit<PromptOpts<false, false, false, false>, "name" | "slug"> & {
|
|
1624
1645
|
name: string;
|
|
1625
1646
|
slug: string;
|
|
1626
|
-
});
|
|
1647
|
+
}, functionType?: FunctionType);
|
|
1627
1648
|
}
|
|
1628
|
-
declare const toolFunctionDefinitionSchema: z.ZodObject<{
|
|
1629
|
-
type: z.ZodLiteral<"function">;
|
|
1630
|
-
function: z.ZodObject<{
|
|
1631
|
-
name: z.ZodString;
|
|
1632
|
-
description: z.ZodOptional<z.ZodString>;
|
|
1633
|
-
parameters: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
1634
|
-
strict: z.ZodOptional<z.ZodBoolean>;
|
|
1635
|
-
}, "strip", z.ZodTypeAny, {
|
|
1636
|
-
name: string;
|
|
1637
|
-
description?: string | undefined;
|
|
1638
|
-
parameters?: Record<string, unknown> | undefined;
|
|
1639
|
-
strict?: boolean | undefined;
|
|
1640
|
-
}, {
|
|
1641
|
-
name: string;
|
|
1642
|
-
description?: string | undefined;
|
|
1643
|
-
parameters?: Record<string, unknown> | undefined;
|
|
1644
|
-
strict?: boolean | undefined;
|
|
1645
|
-
}>;
|
|
1646
|
-
}, "strip", z.ZodTypeAny, {
|
|
1647
|
-
function: {
|
|
1648
|
-
name: string;
|
|
1649
|
-
description?: string | undefined;
|
|
1650
|
-
parameters?: Record<string, unknown> | undefined;
|
|
1651
|
-
strict?: boolean | undefined;
|
|
1652
|
-
};
|
|
1653
|
-
type: "function";
|
|
1654
|
-
}, {
|
|
1655
|
-
function: {
|
|
1656
|
-
name: string;
|
|
1657
|
-
description?: string | undefined;
|
|
1658
|
-
parameters?: Record<string, unknown> | undefined;
|
|
1659
|
-
strict?: boolean | undefined;
|
|
1660
|
-
};
|
|
1661
|
-
type: "function";
|
|
1662
|
-
}>;
|
|
1663
|
-
type ToolFunctionDefinition = z.infer<typeof toolFunctionDefinitionSchema>;
|
|
1664
1649
|
interface PromptId {
|
|
1665
1650
|
id: string;
|
|
1666
1651
|
}
|
|
1667
1652
|
interface PromptVersion {
|
|
1668
1653
|
version: TransactionId;
|
|
1669
1654
|
}
|
|
1655
|
+
interface PromptTools {
|
|
1656
|
+
tools: (GenericCodeFunction | SavedFunctionId | ToolFunctionDefinition)[];
|
|
1657
|
+
}
|
|
1658
|
+
interface PromptNoTrace {
|
|
1659
|
+
noTrace: boolean;
|
|
1660
|
+
}
|
|
1670
1661
|
type PromptContents = {
|
|
1671
1662
|
prompt: string;
|
|
1672
1663
|
} | {
|
|
1673
1664
|
messages: Message[];
|
|
1674
1665
|
};
|
|
1675
|
-
type PromptOpts<HasId extends boolean, HasVersion extends boolean> = (Partial<Omit<BaseFnOpts, "name">> & {
|
|
1666
|
+
type PromptOpts<HasId extends boolean, HasVersion extends boolean, HasTools extends boolean = true, HasNoTrace extends boolean = true> = (Partial<Omit<BaseFnOpts, "name">> & {
|
|
1676
1667
|
name: string;
|
|
1677
|
-
}) & (HasId extends true ? PromptId : Partial<PromptId>) & (HasVersion extends true ? PromptVersion : Partial<PromptVersion>) & PromptContents & {
|
|
1668
|
+
}) & (HasId extends true ? PromptId : Partial<PromptId>) & (HasVersion extends true ? PromptVersion : Partial<PromptVersion>) & (HasTools extends true ? Partial<PromptTools> : {}) & (HasNoTrace extends true ? Partial<PromptNoTrace> : {}) & PromptContents & {
|
|
1678
1669
|
model: string;
|
|
1679
1670
|
params?: ModelParams;
|
|
1680
|
-
tools?: (GenericCodeFunction | SavedFunctionId | ToolFunctionDefinition)[];
|
|
1681
|
-
noTrace?: boolean;
|
|
1682
1671
|
};
|
|
1683
1672
|
declare class PromptBuilder {
|
|
1684
1673
|
private readonly project;
|
|
@@ -1707,7 +1696,14 @@ declare function BaseExperiment<Input = unknown, Expected = unknown, Metadata ex
|
|
|
1707
1696
|
type EvalData<Input, Expected, Metadata extends BaseMetadata = DefaultMetadataType> = EvalCase<Input, Expected, Metadata>[] | (() => EvalCase<Input, Expected, Metadata>[]) | Promise<EvalCase<Input, Expected, Metadata>[]> | (() => Promise<EvalCase<Input, Expected, Metadata>[]>) | AsyncGenerator<EvalCase<Input, Expected, Metadata>> | AsyncIterable<EvalCase<Input, Expected, Metadata>> | BaseExperiment<Input, Expected, Metadata> | (() => BaseExperiment<Input, Expected, Metadata>);
|
|
1708
1697
|
type EvalTask<Input, Output> = ((input: Input, hooks: EvalHooks) => Promise<Output>) | ((input: Input, hooks: EvalHooks) => Output);
|
|
1709
1698
|
interface EvalHooks {
|
|
1699
|
+
/**
|
|
1700
|
+
* @deprecated Use `metadata` instead.
|
|
1701
|
+
*/
|
|
1710
1702
|
meta: (info: Record<string, unknown>) => void;
|
|
1703
|
+
/**
|
|
1704
|
+
* The metadata object for the current evaluation. You can mutate this object to add or remove metadata.
|
|
1705
|
+
*/
|
|
1706
|
+
metadata: Record<string, unknown>;
|
|
1711
1707
|
span: Span;
|
|
1712
1708
|
}
|
|
1713
1709
|
type EvalScorerArgs<Input, Output, Expected, Metadata extends BaseMetadata = DefaultMetadataType> = EvalCase<Input, Expected, Metadata> & {
|
|
@@ -1923,6 +1919,7 @@ type braintrust_BraintrustStreamChunk = BraintrustStreamChunk;
|
|
|
1923
1919
|
type braintrust_ChatPrompt = ChatPrompt;
|
|
1924
1920
|
type braintrust_CodeFunction<Input, Output, Fn extends GenericFunction<Input, Output>> = CodeFunction<Input, Output, Fn>;
|
|
1925
1921
|
declare const braintrust_CodeFunction: typeof CodeFunction;
|
|
1922
|
+
type braintrust_CodeOpts<Params, Returns, Fn extends GenericFunction<Params, Returns>> = CodeOpts<Params, Returns, Fn>;
|
|
1926
1923
|
type braintrust_CodePrompt = CodePrompt;
|
|
1927
1924
|
declare const braintrust_CodePrompt: typeof CodePrompt;
|
|
1928
1925
|
declare const braintrust_CommentEvent: typeof CommentEvent;
|
|
@@ -1986,7 +1983,7 @@ type braintrust_Prompt<HasId extends boolean = true, HasVersion extends boolean
|
|
|
1986
1983
|
declare const braintrust_Prompt: typeof Prompt;
|
|
1987
1984
|
type braintrust_PromptBuilder = PromptBuilder;
|
|
1988
1985
|
declare const braintrust_PromptBuilder: typeof PromptBuilder;
|
|
1989
|
-
type braintrust_PromptOpts<HasId extends boolean, HasVersion extends boolean> = PromptOpts<HasId, HasVersion>;
|
|
1986
|
+
type braintrust_PromptOpts<HasId extends boolean, HasVersion extends boolean, HasTools extends boolean = true, HasNoTrace extends boolean = true> = PromptOpts<HasId, HasVersion, HasTools, HasNoTrace>;
|
|
1990
1987
|
type braintrust_PromptRowWithId<HasId extends boolean = true, HasVersion extends boolean = true> = PromptRowWithId<HasId, HasVersion>;
|
|
1991
1988
|
type braintrust_ReadonlyAttachment = ReadonlyAttachment;
|
|
1992
1989
|
declare const braintrust_ReadonlyAttachment: typeof ReadonlyAttachment;
|
|
@@ -1995,6 +1992,9 @@ declare const braintrust_ReadonlyExperiment: typeof ReadonlyExperiment;
|
|
|
1995
1992
|
declare const braintrust_Reporter: typeof Reporter;
|
|
1996
1993
|
type braintrust_ReporterBody<EvalReport> = ReporterBody<EvalReport>;
|
|
1997
1994
|
type braintrust_ScoreSummary = ScoreSummary;
|
|
1995
|
+
type braintrust_ScorerBuilder = ScorerBuilder;
|
|
1996
|
+
declare const braintrust_ScorerBuilder: typeof ScorerBuilder;
|
|
1997
|
+
type braintrust_ScorerOpts<Output, Input, Params, Returns, Fn extends GenericFunction<Exact<Params, ScorerArgs<Output, Input>>, Returns>> = ScorerOpts<Output, Input, Params, Returns, Fn>;
|
|
1998
1998
|
type braintrust_SerializedBraintrustState = SerializedBraintrustState;
|
|
1999
1999
|
type braintrust_SetCurrentArg = SetCurrentArg;
|
|
2000
2000
|
type braintrust_Span = Span;
|
|
@@ -2004,8 +2004,7 @@ declare const braintrust_SpanImpl: typeof SpanImpl;
|
|
|
2004
2004
|
type braintrust_StartSpanArgs = StartSpanArgs;
|
|
2005
2005
|
type braintrust_ToolBuilder = ToolBuilder;
|
|
2006
2006
|
declare const braintrust_ToolBuilder: typeof ToolBuilder;
|
|
2007
|
-
|
|
2008
|
-
type braintrust_ToolOpts<Params, Returns, Fn extends GenericFunction<Params, Returns>> = ToolOpts<Params, Returns, Fn>;
|
|
2007
|
+
declare const braintrust_ToolFunctionDefinition: typeof ToolFunctionDefinition;
|
|
2009
2008
|
type braintrust_WithTransactionId<R> = WithTransactionId<R>;
|
|
2010
2009
|
declare const braintrust_X_CACHED_HEADER: typeof X_CACHED_HEADER;
|
|
2011
2010
|
declare const braintrust__exportsForTestingOnly: typeof _exportsForTestingOnly;
|
|
@@ -2053,7 +2052,7 @@ declare const braintrust_wrapOpenAI: typeof wrapOpenAI;
|
|
|
2053
2052
|
declare const braintrust_wrapOpenAIv4: typeof wrapOpenAIv4;
|
|
2054
2053
|
declare const braintrust_wrapTraced: typeof wrapTraced;
|
|
2055
2054
|
declare namespace braintrust {
|
|
2056
|
-
export { type braintrust_AnyDataset as AnyDataset, braintrust_Attachment as Attachment, type braintrust_AttachmentParams as AttachmentParams, type braintrust_BackgroundLoggerOpts as BackgroundLoggerOpts, braintrust_BaseExperiment as BaseExperiment, type braintrust_BaseMetadata as BaseMetadata, braintrust_BraintrustState as BraintrustState, braintrust_BraintrustStream as BraintrustStream, type braintrust_BraintrustStreamChunk as BraintrustStreamChunk, type braintrust_ChatPrompt as ChatPrompt, braintrust_CodeFunction as CodeFunction, braintrust_CodePrompt as CodePrompt, braintrust_CommentEvent as CommentEvent, type braintrust_CompiledPrompt as CompiledPrompt, type braintrust_CompiledPromptParams as CompiledPromptParams, type braintrust_CompletionPrompt as CompletionPrompt, type braintrust_CreateProjectOpts as CreateProjectOpts, type braintrust_DataSummary as DataSummary, braintrust_Dataset as Dataset, braintrust_DatasetRecord as DatasetRecord, type braintrust_DatasetSummary as DatasetSummary, type braintrust_DefaultMetadataType as DefaultMetadataType, type braintrust_DefaultPromptArgs as DefaultPromptArgs, type braintrust_EndSpanArgs as EndSpanArgs, braintrust_Eval as Eval, type braintrust_EvalCase as EvalCase, type braintrust_EvalResult as EvalResult, type braintrust_EvalScorer as EvalScorer, type braintrust_EvalScorerArgs as EvalScorerArgs, type braintrust_EvalTask as EvalTask, type braintrust_Evaluator as Evaluator, type braintrust_EvaluatorDef as EvaluatorDef, type braintrust_EvaluatorFile as EvaluatorFile, braintrust_Experiment as Experiment, braintrust_ExperimentLogFullArgs as ExperimentLogFullArgs, braintrust_ExperimentLogPartialArgs as ExperimentLogPartialArgs, type braintrust_ExperimentSummary as ExperimentSummary, type braintrust_Exportable as Exportable, braintrust_FailedHTTPResponse as FailedHTTPResponse, type braintrust_FullInitOptions as FullInitOptions, type braintrust_FullLoginOptions as FullLoginOptions, braintrust_IdField as IdField, type braintrust_InitOptions as InitOptions, braintrust_InputField as InputField, type braintrust_InvokeFunctionArgs as InvokeFunctionArgs, type braintrust_InvokeReturn as InvokeReturn, braintrust_LEGACY_CACHED_HEADER as LEGACY_CACHED_HEADER, braintrust_LazyValue as LazyValue, braintrust_LogCommentFullArgs as LogCommentFullArgs, braintrust_LogFeedbackFullArgs as LogFeedbackFullArgs, type braintrust_LogOptions as LogOptions, braintrust_Logger as Logger, type braintrust_LoginOptions as LoginOptions, type braintrust_MetricSummary as MetricSummary, braintrust_NOOP_SPAN as NOOP_SPAN, braintrust_NoopSpan as NoopSpan, type braintrust_ObjectMetadata as ObjectMetadata, braintrust_OtherExperimentLogFields as OtherExperimentLogFields, braintrust_ParentExperimentIds as ParentExperimentIds, braintrust_ParentProjectLogIds as ParentProjectLogIds, braintrust_Project as Project, type braintrust_PromiseUnless as PromiseUnless, braintrust_Prompt as Prompt, braintrust_PromptBuilder as PromptBuilder, type braintrust_PromptOpts as PromptOpts, type braintrust_PromptRowWithId as PromptRowWithId, braintrust_ReadonlyAttachment as ReadonlyAttachment, braintrust_ReadonlyExperiment as ReadonlyExperiment, braintrust_Reporter as Reporter, type braintrust_ReporterBody as ReporterBody, type braintrust_ScoreSummary as ScoreSummary, type braintrust_SerializedBraintrustState as SerializedBraintrustState, type braintrust_SetCurrentArg as SetCurrentArg, type braintrust_Span as Span, type braintrust_SpanContext as SpanContext, braintrust_SpanImpl as SpanImpl, type braintrust_StartSpanArgs as StartSpanArgs, braintrust_ToolBuilder as ToolBuilder,
|
|
2055
|
+
export { type braintrust_AnyDataset as AnyDataset, braintrust_Attachment as Attachment, type braintrust_AttachmentParams as AttachmentParams, type braintrust_BackgroundLoggerOpts as BackgroundLoggerOpts, braintrust_BaseExperiment as BaseExperiment, type braintrust_BaseMetadata as BaseMetadata, braintrust_BraintrustState as BraintrustState, braintrust_BraintrustStream as BraintrustStream, type braintrust_BraintrustStreamChunk as BraintrustStreamChunk, type braintrust_ChatPrompt as ChatPrompt, braintrust_CodeFunction as CodeFunction, type braintrust_CodeOpts as CodeOpts, braintrust_CodePrompt as CodePrompt, braintrust_CommentEvent as CommentEvent, type braintrust_CompiledPrompt as CompiledPrompt, type braintrust_CompiledPromptParams as CompiledPromptParams, type braintrust_CompletionPrompt as CompletionPrompt, type braintrust_CreateProjectOpts as CreateProjectOpts, type braintrust_DataSummary as DataSummary, braintrust_Dataset as Dataset, braintrust_DatasetRecord as DatasetRecord, type braintrust_DatasetSummary as DatasetSummary, type braintrust_DefaultMetadataType as DefaultMetadataType, type braintrust_DefaultPromptArgs as DefaultPromptArgs, type braintrust_EndSpanArgs as EndSpanArgs, braintrust_Eval as Eval, type braintrust_EvalCase as EvalCase, type braintrust_EvalResult as EvalResult, type braintrust_EvalScorer as EvalScorer, type braintrust_EvalScorerArgs as EvalScorerArgs, type braintrust_EvalTask as EvalTask, type braintrust_Evaluator as Evaluator, type braintrust_EvaluatorDef as EvaluatorDef, type braintrust_EvaluatorFile as EvaluatorFile, braintrust_Experiment as Experiment, braintrust_ExperimentLogFullArgs as ExperimentLogFullArgs, braintrust_ExperimentLogPartialArgs as ExperimentLogPartialArgs, type braintrust_ExperimentSummary as ExperimentSummary, type braintrust_Exportable as Exportable, braintrust_FailedHTTPResponse as FailedHTTPResponse, type braintrust_FullInitOptions as FullInitOptions, type braintrust_FullLoginOptions as FullLoginOptions, braintrust_IdField as IdField, type braintrust_InitOptions as InitOptions, braintrust_InputField as InputField, type braintrust_InvokeFunctionArgs as InvokeFunctionArgs, type braintrust_InvokeReturn as InvokeReturn, braintrust_LEGACY_CACHED_HEADER as LEGACY_CACHED_HEADER, braintrust_LazyValue as LazyValue, braintrust_LogCommentFullArgs as LogCommentFullArgs, braintrust_LogFeedbackFullArgs as LogFeedbackFullArgs, type braintrust_LogOptions as LogOptions, braintrust_Logger as Logger, type braintrust_LoginOptions as LoginOptions, type braintrust_MetricSummary as MetricSummary, braintrust_NOOP_SPAN as NOOP_SPAN, braintrust_NoopSpan as NoopSpan, type braintrust_ObjectMetadata as ObjectMetadata, braintrust_OtherExperimentLogFields as OtherExperimentLogFields, braintrust_ParentExperimentIds as ParentExperimentIds, braintrust_ParentProjectLogIds as ParentProjectLogIds, braintrust_Project as Project, type braintrust_PromiseUnless as PromiseUnless, braintrust_Prompt as Prompt, braintrust_PromptBuilder as PromptBuilder, type braintrust_PromptOpts as PromptOpts, type braintrust_PromptRowWithId as PromptRowWithId, braintrust_ReadonlyAttachment as ReadonlyAttachment, braintrust_ReadonlyExperiment as ReadonlyExperiment, braintrust_Reporter as Reporter, type braintrust_ReporterBody as ReporterBody, type braintrust_ScoreSummary as ScoreSummary, braintrust_ScorerBuilder as ScorerBuilder, type braintrust_ScorerOpts as ScorerOpts, type braintrust_SerializedBraintrustState as SerializedBraintrustState, type braintrust_SetCurrentArg as SetCurrentArg, type braintrust_Span as Span, type braintrust_SpanContext as SpanContext, braintrust_SpanImpl as SpanImpl, type braintrust_StartSpanArgs as StartSpanArgs, braintrust_ToolBuilder as ToolBuilder, braintrust_ToolFunctionDefinition as ToolFunctionDefinition, type braintrust_WithTransactionId as WithTransactionId, braintrust_X_CACHED_HEADER as X_CACHED_HEADER, braintrust__exportsForTestingOnly as _exportsForTestingOnly, braintrust__internalGetGlobalState as _internalGetGlobalState, braintrust__internalSetInitialState as _internalSetInitialState, braintrust_braintrustStreamChunkSchema as braintrustStreamChunkSchema, braintrust_buildLocalSummary as buildLocalSummary, braintrust_createFinalValuePassThroughStream as createFinalValuePassThroughStream, braintrust_currentExperiment as currentExperiment, braintrust_currentLogger as currentLogger, braintrust_currentSpan as currentSpan, braintrust_devNullWritableStream as devNullWritableStream, braintrust_flush as flush, braintrust_getSpanParentObject as getSpanParentObject, braintrust_init as init, braintrust_initDataset as initDataset, braintrust_initExperiment as initExperiment, braintrust_initLogger as initLogger, braintrust_invoke as invoke, braintrust_loadPrompt as loadPrompt, braintrust_log as log, braintrust_logError as logError, braintrust_login as login, braintrust_loginToState as loginToState, braintrust_newId as newId, braintrust_parseCachedHeader as parseCachedHeader, braintrust_permalink as permalink, braintrust_projects as projects, braintrust_renderMessage as renderMessage, braintrust_reportFailures as reportFailures, braintrust_setFetch as setFetch, braintrust_spanComponentsToObjectId as spanComponentsToObjectId, braintrust_startSpan as startSpan, braintrust_summarize as summarize, braintrust_toolFunctionDefinitionSchema as toolFunctionDefinitionSchema, braintrust_traceable as traceable, braintrust_traced as traced, braintrust_updateSpan as updateSpan, braintrust_withCurrent as withCurrent, braintrust_withDataset as withDataset, braintrust_withExperiment as withExperiment, braintrust_withLogger as withLogger, braintrust_wrapAISDKModel as wrapAISDKModel, braintrust_wrapOpenAI as wrapOpenAI, braintrust_wrapOpenAIv4 as wrapOpenAIv4, braintrust_wrapTraced as wrapTraced };
|
|
2057
2056
|
}
|
|
2058
2057
|
|
|
2059
2058
|
/**
|
|
@@ -2109,4 +2108,4 @@ declare namespace braintrust {
|
|
|
2109
2108
|
* @module braintrust
|
|
2110
2109
|
*/
|
|
2111
2110
|
|
|
2112
|
-
export { type AnyDataset, Attachment, type AttachmentParams, type BackgroundLoggerOpts, BaseExperiment, type BaseMetadata, BraintrustState, BraintrustStream, type BraintrustStreamChunk, type ChatPrompt, CodeFunction, CodePrompt, type CompiledPrompt, type CompiledPromptParams, type CompletionPrompt, type CreateProjectOpts, type DataSummary, Dataset, type DatasetSummary, type DefaultMetadataType, type DefaultPromptArgs, type EndSpanArgs, Eval, type EvalCase, type EvalResult, type EvalScorer, type EvalScorerArgs, type EvalTask, type Evaluator, type EvaluatorDef, type EvaluatorFile, Experiment, type ExperimentSummary, type Exportable, FailedHTTPResponse, type FullInitOptions, type FullLoginOptions, type InitOptions, type InvokeFunctionArgs, type InvokeReturn, LEGACY_CACHED_HEADER, LazyValue, type LogOptions, Logger, type LoginOptions, type MetricSummary, NOOP_SPAN, NoopSpan, type ObjectMetadata, Project, type PromiseUnless, Prompt, PromptBuilder, type PromptOpts, type PromptRowWithId, ReadonlyAttachment, ReadonlyExperiment, Reporter, type ReporterBody, type ScoreSummary, type SerializedBraintrustState, type SetCurrentArg, type Span, type SpanContext, SpanImpl, type StartSpanArgs, ToolBuilder, type
|
|
2111
|
+
export { type AnyDataset, Attachment, type AttachmentParams, type BackgroundLoggerOpts, BaseExperiment, type BaseMetadata, BraintrustState, BraintrustStream, type BraintrustStreamChunk, type ChatPrompt, CodeFunction, type CodeOpts, CodePrompt, type CompiledPrompt, type CompiledPromptParams, type CompletionPrompt, type CreateProjectOpts, type DataSummary, Dataset, type DatasetSummary, type DefaultMetadataType, type DefaultPromptArgs, type EndSpanArgs, Eval, type EvalCase, type EvalResult, type EvalScorer, type EvalScorerArgs, type EvalTask, type Evaluator, type EvaluatorDef, type EvaluatorFile, Experiment, type ExperimentSummary, type Exportable, FailedHTTPResponse, type FullInitOptions, type FullLoginOptions, type InitOptions, type InvokeFunctionArgs, type InvokeReturn, LEGACY_CACHED_HEADER, LazyValue, type LogOptions, Logger, type LoginOptions, type MetricSummary, NOOP_SPAN, NoopSpan, type ObjectMetadata, Project, type PromiseUnless, Prompt, PromptBuilder, type PromptOpts, type PromptRowWithId, ReadonlyAttachment, ReadonlyExperiment, Reporter, type ReporterBody, type ScoreSummary, ScorerBuilder, type ScorerOpts, type SerializedBraintrustState, type SetCurrentArg, type Span, type SpanContext, SpanImpl, type StartSpanArgs, ToolBuilder, type WithTransactionId, X_CACHED_HEADER, _exportsForTestingOnly, _internalGetGlobalState, _internalSetInitialState, braintrustStreamChunkSchema, buildLocalSummary, createFinalValuePassThroughStream, currentExperiment, currentLogger, currentSpan, braintrust as default, devNullWritableStream, flush, getSpanParentObject, init, initDataset, initExperiment, initLogger, invoke, loadPrompt, log, logError, login, loginToState, newId, parseCachedHeader, permalink, projects, renderMessage, reportFailures, setFetch, spanComponentsToObjectId, startSpan, summarize, traceable, traced, updateSpan, withCurrent, withDataset, withExperiment, withLogger, wrapAISDKModel, wrapOpenAI, wrapOpenAIv4, wrapTraced };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { LogFeedbackFullArgs, ExperimentEvent, BackgroundLogEvent, ExperimentLogFullArgs, ExperimentLogPartialArgs, IdField, SpanType, SpanComponentsV3, DEFAULT_IS_LEGACY_DATASET, TRANSACTION_ID_FIELD, TransactionId, SpanObjectTypeV3, DatasetRecord, Score, CommentEvent, InputField, LogCommentFullArgs, OtherExperimentLogFields, ParentExperimentIds, ParentProjectLogIds } from '@braintrust/core';
|
|
2
2
|
export { CommentEvent, DatasetRecord, ExperimentLogFullArgs, ExperimentLogPartialArgs, IdField, InputField, LogCommentFullArgs, LogFeedbackFullArgs, OtherExperimentLogFields, ParentExperimentIds, ParentProjectLogIds } from '@braintrust/core';
|
|
3
|
-
import { GitMetadataSettings, AttachmentReference, AttachmentStatus, RepoInfo, PromptData, OpenAIMessage, Tools, AnyModelParam, Message, Prompt as Prompt$1, PromptSessionEvent, StreamingMode, FunctionType, IfExists, SavedFunctionId, ModelParams } from '@braintrust/core/typespecs';
|
|
3
|
+
import { GitMetadataSettings, AttachmentReference, AttachmentStatus, RepoInfo, PromptData, OpenAIMessage, Tools, AnyModelParam, Message, Prompt as Prompt$1, PromptSessionEvent, StreamingMode, FunctionType, IfExists, SavedFunctionId, ModelParams, ToolFunctionDefinition, toolFunctionDefinitionSchema } from '@braintrust/core/typespecs';
|
|
4
|
+
export { ToolFunctionDefinition, toolFunctionDefinitionSchema } from '@braintrust/core/typespecs';
|
|
4
5
|
import { z } from 'zod';
|
|
5
6
|
|
|
6
7
|
interface IsoAsyncLocalStorage<T> {
|
|
@@ -1572,13 +1573,20 @@ declare class Project {
|
|
|
1572
1573
|
readonly id?: string;
|
|
1573
1574
|
tools: ToolBuilder;
|
|
1574
1575
|
prompts: PromptBuilder;
|
|
1576
|
+
scorers: ScorerBuilder;
|
|
1575
1577
|
constructor(args: CreateProjectOpts);
|
|
1576
1578
|
}
|
|
1577
1579
|
declare class ToolBuilder {
|
|
1578
1580
|
private readonly project;
|
|
1579
1581
|
private taskCounter;
|
|
1580
1582
|
constructor(project: Project);
|
|
1581
|
-
create<Input, Output, Fn extends GenericFunction<Input, Output>>(opts:
|
|
1583
|
+
create<Input, Output, Fn extends GenericFunction<Input, Output>>(opts: CodeOpts<Input, Output, Fn>): CodeFunction<Input, Output, Fn>;
|
|
1584
|
+
}
|
|
1585
|
+
declare class ScorerBuilder {
|
|
1586
|
+
private readonly project;
|
|
1587
|
+
private taskCounter;
|
|
1588
|
+
constructor(project: Project);
|
|
1589
|
+
create<Output, Input, Params, Returns, Fn extends GenericFunction<Exact<Params, ScorerArgs<Output, Input>>, Returns>>(opts: ScorerOpts<Output, Input, Params, Returns, Fn>): void;
|
|
1582
1590
|
}
|
|
1583
1591
|
type Schema<Input, Output> = Partial<{
|
|
1584
1592
|
parameters: z.ZodSchema<Input>;
|
|
@@ -1590,9 +1598,21 @@ interface BaseFnOpts {
|
|
|
1590
1598
|
description: string;
|
|
1591
1599
|
ifExists: IfExists;
|
|
1592
1600
|
}
|
|
1593
|
-
type
|
|
1601
|
+
type CodeOpts<Params, Returns, Fn extends GenericFunction<Params, Returns>> = Partial<BaseFnOpts> & {
|
|
1594
1602
|
handler: Fn;
|
|
1595
1603
|
} & Schema<Params, Returns>;
|
|
1604
|
+
type ScorerPromptOpts = Partial<BaseFnOpts> & PromptOpts<false, false, false, false> & {
|
|
1605
|
+
useCot: boolean;
|
|
1606
|
+
choiceScores: Record<string, number>;
|
|
1607
|
+
};
|
|
1608
|
+
type ScorerArgs<Output, Input> = {
|
|
1609
|
+
output: Output;
|
|
1610
|
+
expected?: Output;
|
|
1611
|
+
input?: Input;
|
|
1612
|
+
metadata?: Record<string, unknown>;
|
|
1613
|
+
};
|
|
1614
|
+
type Exact<T, Shape> = T extends Shape ? Exclude<keyof T, keyof Shape> extends never ? T : never : never;
|
|
1615
|
+
type ScorerOpts<Output, Input, Params, Returns, Fn extends GenericFunction<Exact<Params, ScorerArgs<Output, Input>>, Returns>> = CodeOpts<Exact<Params, ScorerArgs<Output, Input>>, Returns, Fn> | ScorerPromptOpts;
|
|
1596
1616
|
declare class CodeFunction<Input, Output, Fn extends GenericFunction<Input, Output>> {
|
|
1597
1617
|
readonly project: Project;
|
|
1598
1618
|
readonly handler: Fn;
|
|
@@ -1603,7 +1623,7 @@ declare class CodeFunction<Input, Output, Fn extends GenericFunction<Input, Outp
|
|
|
1603
1623
|
readonly parameters?: z.ZodSchema<Input>;
|
|
1604
1624
|
readonly returns?: z.ZodSchema<Output>;
|
|
1605
1625
|
readonly ifExists?: IfExists;
|
|
1606
|
-
constructor(project: Project, opts: Omit<
|
|
1626
|
+
constructor(project: Project, opts: Omit<CodeOpts<Input, Output, Fn>, "name" | "slug"> & {
|
|
1607
1627
|
name: string;
|
|
1608
1628
|
slug: string;
|
|
1609
1629
|
type: FunctionType;
|
|
@@ -1619,66 +1639,35 @@ declare class CodePrompt {
|
|
|
1619
1639
|
readonly ifExists?: IfExists;
|
|
1620
1640
|
readonly description?: string;
|
|
1621
1641
|
readonly id?: string;
|
|
1642
|
+
readonly functionType?: FunctionType;
|
|
1622
1643
|
readonly toolFunctions: (SavedFunctionId | GenericCodeFunction)[];
|
|
1623
|
-
constructor(project: Project, prompt: PromptData, toolFunctions: (SavedFunctionId | GenericCodeFunction)[], opts: Omit<PromptOpts<false, false>, "name" | "slug"> & {
|
|
1644
|
+
constructor(project: Project, prompt: PromptData, toolFunctions: (SavedFunctionId | GenericCodeFunction)[], opts: Omit<PromptOpts<false, false, false, false>, "name" | "slug"> & {
|
|
1624
1645
|
name: string;
|
|
1625
1646
|
slug: string;
|
|
1626
|
-
});
|
|
1647
|
+
}, functionType?: FunctionType);
|
|
1627
1648
|
}
|
|
1628
|
-
declare const toolFunctionDefinitionSchema: z.ZodObject<{
|
|
1629
|
-
type: z.ZodLiteral<"function">;
|
|
1630
|
-
function: z.ZodObject<{
|
|
1631
|
-
name: z.ZodString;
|
|
1632
|
-
description: z.ZodOptional<z.ZodString>;
|
|
1633
|
-
parameters: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
1634
|
-
strict: z.ZodOptional<z.ZodBoolean>;
|
|
1635
|
-
}, "strip", z.ZodTypeAny, {
|
|
1636
|
-
name: string;
|
|
1637
|
-
description?: string | undefined;
|
|
1638
|
-
parameters?: Record<string, unknown> | undefined;
|
|
1639
|
-
strict?: boolean | undefined;
|
|
1640
|
-
}, {
|
|
1641
|
-
name: string;
|
|
1642
|
-
description?: string | undefined;
|
|
1643
|
-
parameters?: Record<string, unknown> | undefined;
|
|
1644
|
-
strict?: boolean | undefined;
|
|
1645
|
-
}>;
|
|
1646
|
-
}, "strip", z.ZodTypeAny, {
|
|
1647
|
-
function: {
|
|
1648
|
-
name: string;
|
|
1649
|
-
description?: string | undefined;
|
|
1650
|
-
parameters?: Record<string, unknown> | undefined;
|
|
1651
|
-
strict?: boolean | undefined;
|
|
1652
|
-
};
|
|
1653
|
-
type: "function";
|
|
1654
|
-
}, {
|
|
1655
|
-
function: {
|
|
1656
|
-
name: string;
|
|
1657
|
-
description?: string | undefined;
|
|
1658
|
-
parameters?: Record<string, unknown> | undefined;
|
|
1659
|
-
strict?: boolean | undefined;
|
|
1660
|
-
};
|
|
1661
|
-
type: "function";
|
|
1662
|
-
}>;
|
|
1663
|
-
type ToolFunctionDefinition = z.infer<typeof toolFunctionDefinitionSchema>;
|
|
1664
1649
|
interface PromptId {
|
|
1665
1650
|
id: string;
|
|
1666
1651
|
}
|
|
1667
1652
|
interface PromptVersion {
|
|
1668
1653
|
version: TransactionId;
|
|
1669
1654
|
}
|
|
1655
|
+
interface PromptTools {
|
|
1656
|
+
tools: (GenericCodeFunction | SavedFunctionId | ToolFunctionDefinition)[];
|
|
1657
|
+
}
|
|
1658
|
+
interface PromptNoTrace {
|
|
1659
|
+
noTrace: boolean;
|
|
1660
|
+
}
|
|
1670
1661
|
type PromptContents = {
|
|
1671
1662
|
prompt: string;
|
|
1672
1663
|
} | {
|
|
1673
1664
|
messages: Message[];
|
|
1674
1665
|
};
|
|
1675
|
-
type PromptOpts<HasId extends boolean, HasVersion extends boolean> = (Partial<Omit<BaseFnOpts, "name">> & {
|
|
1666
|
+
type PromptOpts<HasId extends boolean, HasVersion extends boolean, HasTools extends boolean = true, HasNoTrace extends boolean = true> = (Partial<Omit<BaseFnOpts, "name">> & {
|
|
1676
1667
|
name: string;
|
|
1677
|
-
}) & (HasId extends true ? PromptId : Partial<PromptId>) & (HasVersion extends true ? PromptVersion : Partial<PromptVersion>) & PromptContents & {
|
|
1668
|
+
}) & (HasId extends true ? PromptId : Partial<PromptId>) & (HasVersion extends true ? PromptVersion : Partial<PromptVersion>) & (HasTools extends true ? Partial<PromptTools> : {}) & (HasNoTrace extends true ? Partial<PromptNoTrace> : {}) & PromptContents & {
|
|
1678
1669
|
model: string;
|
|
1679
1670
|
params?: ModelParams;
|
|
1680
|
-
tools?: (GenericCodeFunction | SavedFunctionId | ToolFunctionDefinition)[];
|
|
1681
|
-
noTrace?: boolean;
|
|
1682
1671
|
};
|
|
1683
1672
|
declare class PromptBuilder {
|
|
1684
1673
|
private readonly project;
|
|
@@ -1707,7 +1696,14 @@ declare function BaseExperiment<Input = unknown, Expected = unknown, Metadata ex
|
|
|
1707
1696
|
type EvalData<Input, Expected, Metadata extends BaseMetadata = DefaultMetadataType> = EvalCase<Input, Expected, Metadata>[] | (() => EvalCase<Input, Expected, Metadata>[]) | Promise<EvalCase<Input, Expected, Metadata>[]> | (() => Promise<EvalCase<Input, Expected, Metadata>[]>) | AsyncGenerator<EvalCase<Input, Expected, Metadata>> | AsyncIterable<EvalCase<Input, Expected, Metadata>> | BaseExperiment<Input, Expected, Metadata> | (() => BaseExperiment<Input, Expected, Metadata>);
|
|
1708
1697
|
type EvalTask<Input, Output> = ((input: Input, hooks: EvalHooks) => Promise<Output>) | ((input: Input, hooks: EvalHooks) => Output);
|
|
1709
1698
|
interface EvalHooks {
|
|
1699
|
+
/**
|
|
1700
|
+
* @deprecated Use `metadata` instead.
|
|
1701
|
+
*/
|
|
1710
1702
|
meta: (info: Record<string, unknown>) => void;
|
|
1703
|
+
/**
|
|
1704
|
+
* The metadata object for the current evaluation. You can mutate this object to add or remove metadata.
|
|
1705
|
+
*/
|
|
1706
|
+
metadata: Record<string, unknown>;
|
|
1711
1707
|
span: Span;
|
|
1712
1708
|
}
|
|
1713
1709
|
type EvalScorerArgs<Input, Output, Expected, Metadata extends BaseMetadata = DefaultMetadataType> = EvalCase<Input, Expected, Metadata> & {
|
|
@@ -1923,6 +1919,7 @@ type braintrust_BraintrustStreamChunk = BraintrustStreamChunk;
|
|
|
1923
1919
|
type braintrust_ChatPrompt = ChatPrompt;
|
|
1924
1920
|
type braintrust_CodeFunction<Input, Output, Fn extends GenericFunction<Input, Output>> = CodeFunction<Input, Output, Fn>;
|
|
1925
1921
|
declare const braintrust_CodeFunction: typeof CodeFunction;
|
|
1922
|
+
type braintrust_CodeOpts<Params, Returns, Fn extends GenericFunction<Params, Returns>> = CodeOpts<Params, Returns, Fn>;
|
|
1926
1923
|
type braintrust_CodePrompt = CodePrompt;
|
|
1927
1924
|
declare const braintrust_CodePrompt: typeof CodePrompt;
|
|
1928
1925
|
declare const braintrust_CommentEvent: typeof CommentEvent;
|
|
@@ -1986,7 +1983,7 @@ type braintrust_Prompt<HasId extends boolean = true, HasVersion extends boolean
|
|
|
1986
1983
|
declare const braintrust_Prompt: typeof Prompt;
|
|
1987
1984
|
type braintrust_PromptBuilder = PromptBuilder;
|
|
1988
1985
|
declare const braintrust_PromptBuilder: typeof PromptBuilder;
|
|
1989
|
-
type braintrust_PromptOpts<HasId extends boolean, HasVersion extends boolean> = PromptOpts<HasId, HasVersion>;
|
|
1986
|
+
type braintrust_PromptOpts<HasId extends boolean, HasVersion extends boolean, HasTools extends boolean = true, HasNoTrace extends boolean = true> = PromptOpts<HasId, HasVersion, HasTools, HasNoTrace>;
|
|
1990
1987
|
type braintrust_PromptRowWithId<HasId extends boolean = true, HasVersion extends boolean = true> = PromptRowWithId<HasId, HasVersion>;
|
|
1991
1988
|
type braintrust_ReadonlyAttachment = ReadonlyAttachment;
|
|
1992
1989
|
declare const braintrust_ReadonlyAttachment: typeof ReadonlyAttachment;
|
|
@@ -1995,6 +1992,9 @@ declare const braintrust_ReadonlyExperiment: typeof ReadonlyExperiment;
|
|
|
1995
1992
|
declare const braintrust_Reporter: typeof Reporter;
|
|
1996
1993
|
type braintrust_ReporterBody<EvalReport> = ReporterBody<EvalReport>;
|
|
1997
1994
|
type braintrust_ScoreSummary = ScoreSummary;
|
|
1995
|
+
type braintrust_ScorerBuilder = ScorerBuilder;
|
|
1996
|
+
declare const braintrust_ScorerBuilder: typeof ScorerBuilder;
|
|
1997
|
+
type braintrust_ScorerOpts<Output, Input, Params, Returns, Fn extends GenericFunction<Exact<Params, ScorerArgs<Output, Input>>, Returns>> = ScorerOpts<Output, Input, Params, Returns, Fn>;
|
|
1998
1998
|
type braintrust_SerializedBraintrustState = SerializedBraintrustState;
|
|
1999
1999
|
type braintrust_SetCurrentArg = SetCurrentArg;
|
|
2000
2000
|
type braintrust_Span = Span;
|
|
@@ -2004,8 +2004,7 @@ declare const braintrust_SpanImpl: typeof SpanImpl;
|
|
|
2004
2004
|
type braintrust_StartSpanArgs = StartSpanArgs;
|
|
2005
2005
|
type braintrust_ToolBuilder = ToolBuilder;
|
|
2006
2006
|
declare const braintrust_ToolBuilder: typeof ToolBuilder;
|
|
2007
|
-
|
|
2008
|
-
type braintrust_ToolOpts<Params, Returns, Fn extends GenericFunction<Params, Returns>> = ToolOpts<Params, Returns, Fn>;
|
|
2007
|
+
declare const braintrust_ToolFunctionDefinition: typeof ToolFunctionDefinition;
|
|
2009
2008
|
type braintrust_WithTransactionId<R> = WithTransactionId<R>;
|
|
2010
2009
|
declare const braintrust_X_CACHED_HEADER: typeof X_CACHED_HEADER;
|
|
2011
2010
|
declare const braintrust__exportsForTestingOnly: typeof _exportsForTestingOnly;
|
|
@@ -2053,7 +2052,7 @@ declare const braintrust_wrapOpenAI: typeof wrapOpenAI;
|
|
|
2053
2052
|
declare const braintrust_wrapOpenAIv4: typeof wrapOpenAIv4;
|
|
2054
2053
|
declare const braintrust_wrapTraced: typeof wrapTraced;
|
|
2055
2054
|
declare namespace braintrust {
|
|
2056
|
-
export { type braintrust_AnyDataset as AnyDataset, braintrust_Attachment as Attachment, type braintrust_AttachmentParams as AttachmentParams, type braintrust_BackgroundLoggerOpts as BackgroundLoggerOpts, braintrust_BaseExperiment as BaseExperiment, type braintrust_BaseMetadata as BaseMetadata, braintrust_BraintrustState as BraintrustState, braintrust_BraintrustStream as BraintrustStream, type braintrust_BraintrustStreamChunk as BraintrustStreamChunk, type braintrust_ChatPrompt as ChatPrompt, braintrust_CodeFunction as CodeFunction, braintrust_CodePrompt as CodePrompt, braintrust_CommentEvent as CommentEvent, type braintrust_CompiledPrompt as CompiledPrompt, type braintrust_CompiledPromptParams as CompiledPromptParams, type braintrust_CompletionPrompt as CompletionPrompt, type braintrust_CreateProjectOpts as CreateProjectOpts, type braintrust_DataSummary as DataSummary, braintrust_Dataset as Dataset, braintrust_DatasetRecord as DatasetRecord, type braintrust_DatasetSummary as DatasetSummary, type braintrust_DefaultMetadataType as DefaultMetadataType, type braintrust_DefaultPromptArgs as DefaultPromptArgs, type braintrust_EndSpanArgs as EndSpanArgs, braintrust_Eval as Eval, type braintrust_EvalCase as EvalCase, type braintrust_EvalResult as EvalResult, type braintrust_EvalScorer as EvalScorer, type braintrust_EvalScorerArgs as EvalScorerArgs, type braintrust_EvalTask as EvalTask, type braintrust_Evaluator as Evaluator, type braintrust_EvaluatorDef as EvaluatorDef, type braintrust_EvaluatorFile as EvaluatorFile, braintrust_Experiment as Experiment, braintrust_ExperimentLogFullArgs as ExperimentLogFullArgs, braintrust_ExperimentLogPartialArgs as ExperimentLogPartialArgs, type braintrust_ExperimentSummary as ExperimentSummary, type braintrust_Exportable as Exportable, braintrust_FailedHTTPResponse as FailedHTTPResponse, type braintrust_FullInitOptions as FullInitOptions, type braintrust_FullLoginOptions as FullLoginOptions, braintrust_IdField as IdField, type braintrust_InitOptions as InitOptions, braintrust_InputField as InputField, type braintrust_InvokeFunctionArgs as InvokeFunctionArgs, type braintrust_InvokeReturn as InvokeReturn, braintrust_LEGACY_CACHED_HEADER as LEGACY_CACHED_HEADER, braintrust_LazyValue as LazyValue, braintrust_LogCommentFullArgs as LogCommentFullArgs, braintrust_LogFeedbackFullArgs as LogFeedbackFullArgs, type braintrust_LogOptions as LogOptions, braintrust_Logger as Logger, type braintrust_LoginOptions as LoginOptions, type braintrust_MetricSummary as MetricSummary, braintrust_NOOP_SPAN as NOOP_SPAN, braintrust_NoopSpan as NoopSpan, type braintrust_ObjectMetadata as ObjectMetadata, braintrust_OtherExperimentLogFields as OtherExperimentLogFields, braintrust_ParentExperimentIds as ParentExperimentIds, braintrust_ParentProjectLogIds as ParentProjectLogIds, braintrust_Project as Project, type braintrust_PromiseUnless as PromiseUnless, braintrust_Prompt as Prompt, braintrust_PromptBuilder as PromptBuilder, type braintrust_PromptOpts as PromptOpts, type braintrust_PromptRowWithId as PromptRowWithId, braintrust_ReadonlyAttachment as ReadonlyAttachment, braintrust_ReadonlyExperiment as ReadonlyExperiment, braintrust_Reporter as Reporter, type braintrust_ReporterBody as ReporterBody, type braintrust_ScoreSummary as ScoreSummary, type braintrust_SerializedBraintrustState as SerializedBraintrustState, type braintrust_SetCurrentArg as SetCurrentArg, type braintrust_Span as Span, type braintrust_SpanContext as SpanContext, braintrust_SpanImpl as SpanImpl, type braintrust_StartSpanArgs as StartSpanArgs, braintrust_ToolBuilder as ToolBuilder,
|
|
2055
|
+
export { type braintrust_AnyDataset as AnyDataset, braintrust_Attachment as Attachment, type braintrust_AttachmentParams as AttachmentParams, type braintrust_BackgroundLoggerOpts as BackgroundLoggerOpts, braintrust_BaseExperiment as BaseExperiment, type braintrust_BaseMetadata as BaseMetadata, braintrust_BraintrustState as BraintrustState, braintrust_BraintrustStream as BraintrustStream, type braintrust_BraintrustStreamChunk as BraintrustStreamChunk, type braintrust_ChatPrompt as ChatPrompt, braintrust_CodeFunction as CodeFunction, type braintrust_CodeOpts as CodeOpts, braintrust_CodePrompt as CodePrompt, braintrust_CommentEvent as CommentEvent, type braintrust_CompiledPrompt as CompiledPrompt, type braintrust_CompiledPromptParams as CompiledPromptParams, type braintrust_CompletionPrompt as CompletionPrompt, type braintrust_CreateProjectOpts as CreateProjectOpts, type braintrust_DataSummary as DataSummary, braintrust_Dataset as Dataset, braintrust_DatasetRecord as DatasetRecord, type braintrust_DatasetSummary as DatasetSummary, type braintrust_DefaultMetadataType as DefaultMetadataType, type braintrust_DefaultPromptArgs as DefaultPromptArgs, type braintrust_EndSpanArgs as EndSpanArgs, braintrust_Eval as Eval, type braintrust_EvalCase as EvalCase, type braintrust_EvalResult as EvalResult, type braintrust_EvalScorer as EvalScorer, type braintrust_EvalScorerArgs as EvalScorerArgs, type braintrust_EvalTask as EvalTask, type braintrust_Evaluator as Evaluator, type braintrust_EvaluatorDef as EvaluatorDef, type braintrust_EvaluatorFile as EvaluatorFile, braintrust_Experiment as Experiment, braintrust_ExperimentLogFullArgs as ExperimentLogFullArgs, braintrust_ExperimentLogPartialArgs as ExperimentLogPartialArgs, type braintrust_ExperimentSummary as ExperimentSummary, type braintrust_Exportable as Exportable, braintrust_FailedHTTPResponse as FailedHTTPResponse, type braintrust_FullInitOptions as FullInitOptions, type braintrust_FullLoginOptions as FullLoginOptions, braintrust_IdField as IdField, type braintrust_InitOptions as InitOptions, braintrust_InputField as InputField, type braintrust_InvokeFunctionArgs as InvokeFunctionArgs, type braintrust_InvokeReturn as InvokeReturn, braintrust_LEGACY_CACHED_HEADER as LEGACY_CACHED_HEADER, braintrust_LazyValue as LazyValue, braintrust_LogCommentFullArgs as LogCommentFullArgs, braintrust_LogFeedbackFullArgs as LogFeedbackFullArgs, type braintrust_LogOptions as LogOptions, braintrust_Logger as Logger, type braintrust_LoginOptions as LoginOptions, type braintrust_MetricSummary as MetricSummary, braintrust_NOOP_SPAN as NOOP_SPAN, braintrust_NoopSpan as NoopSpan, type braintrust_ObjectMetadata as ObjectMetadata, braintrust_OtherExperimentLogFields as OtherExperimentLogFields, braintrust_ParentExperimentIds as ParentExperimentIds, braintrust_ParentProjectLogIds as ParentProjectLogIds, braintrust_Project as Project, type braintrust_PromiseUnless as PromiseUnless, braintrust_Prompt as Prompt, braintrust_PromptBuilder as PromptBuilder, type braintrust_PromptOpts as PromptOpts, type braintrust_PromptRowWithId as PromptRowWithId, braintrust_ReadonlyAttachment as ReadonlyAttachment, braintrust_ReadonlyExperiment as ReadonlyExperiment, braintrust_Reporter as Reporter, type braintrust_ReporterBody as ReporterBody, type braintrust_ScoreSummary as ScoreSummary, braintrust_ScorerBuilder as ScorerBuilder, type braintrust_ScorerOpts as ScorerOpts, type braintrust_SerializedBraintrustState as SerializedBraintrustState, type braintrust_SetCurrentArg as SetCurrentArg, type braintrust_Span as Span, type braintrust_SpanContext as SpanContext, braintrust_SpanImpl as SpanImpl, type braintrust_StartSpanArgs as StartSpanArgs, braintrust_ToolBuilder as ToolBuilder, braintrust_ToolFunctionDefinition as ToolFunctionDefinition, type braintrust_WithTransactionId as WithTransactionId, braintrust_X_CACHED_HEADER as X_CACHED_HEADER, braintrust__exportsForTestingOnly as _exportsForTestingOnly, braintrust__internalGetGlobalState as _internalGetGlobalState, braintrust__internalSetInitialState as _internalSetInitialState, braintrust_braintrustStreamChunkSchema as braintrustStreamChunkSchema, braintrust_buildLocalSummary as buildLocalSummary, braintrust_createFinalValuePassThroughStream as createFinalValuePassThroughStream, braintrust_currentExperiment as currentExperiment, braintrust_currentLogger as currentLogger, braintrust_currentSpan as currentSpan, braintrust_devNullWritableStream as devNullWritableStream, braintrust_flush as flush, braintrust_getSpanParentObject as getSpanParentObject, braintrust_init as init, braintrust_initDataset as initDataset, braintrust_initExperiment as initExperiment, braintrust_initLogger as initLogger, braintrust_invoke as invoke, braintrust_loadPrompt as loadPrompt, braintrust_log as log, braintrust_logError as logError, braintrust_login as login, braintrust_loginToState as loginToState, braintrust_newId as newId, braintrust_parseCachedHeader as parseCachedHeader, braintrust_permalink as permalink, braintrust_projects as projects, braintrust_renderMessage as renderMessage, braintrust_reportFailures as reportFailures, braintrust_setFetch as setFetch, braintrust_spanComponentsToObjectId as spanComponentsToObjectId, braintrust_startSpan as startSpan, braintrust_summarize as summarize, braintrust_toolFunctionDefinitionSchema as toolFunctionDefinitionSchema, braintrust_traceable as traceable, braintrust_traced as traced, braintrust_updateSpan as updateSpan, braintrust_withCurrent as withCurrent, braintrust_withDataset as withDataset, braintrust_withExperiment as withExperiment, braintrust_withLogger as withLogger, braintrust_wrapAISDKModel as wrapAISDKModel, braintrust_wrapOpenAI as wrapOpenAI, braintrust_wrapOpenAIv4 as wrapOpenAIv4, braintrust_wrapTraced as wrapTraced };
|
|
2057
2056
|
}
|
|
2058
2057
|
|
|
2059
2058
|
/**
|
|
@@ -2109,4 +2108,4 @@ declare namespace braintrust {
|
|
|
2109
2108
|
* @module braintrust
|
|
2110
2109
|
*/
|
|
2111
2110
|
|
|
2112
|
-
export { type AnyDataset, Attachment, type AttachmentParams, type BackgroundLoggerOpts, BaseExperiment, type BaseMetadata, BraintrustState, BraintrustStream, type BraintrustStreamChunk, type ChatPrompt, CodeFunction, CodePrompt, type CompiledPrompt, type CompiledPromptParams, type CompletionPrompt, type CreateProjectOpts, type DataSummary, Dataset, type DatasetSummary, type DefaultMetadataType, type DefaultPromptArgs, type EndSpanArgs, Eval, type EvalCase, type EvalResult, type EvalScorer, type EvalScorerArgs, type EvalTask, type Evaluator, type EvaluatorDef, type EvaluatorFile, Experiment, type ExperimentSummary, type Exportable, FailedHTTPResponse, type FullInitOptions, type FullLoginOptions, type InitOptions, type InvokeFunctionArgs, type InvokeReturn, LEGACY_CACHED_HEADER, LazyValue, type LogOptions, Logger, type LoginOptions, type MetricSummary, NOOP_SPAN, NoopSpan, type ObjectMetadata, Project, type PromiseUnless, Prompt, PromptBuilder, type PromptOpts, type PromptRowWithId, ReadonlyAttachment, ReadonlyExperiment, Reporter, type ReporterBody, type ScoreSummary, type SerializedBraintrustState, type SetCurrentArg, type Span, type SpanContext, SpanImpl, type StartSpanArgs, ToolBuilder, type
|
|
2111
|
+
export { type AnyDataset, Attachment, type AttachmentParams, type BackgroundLoggerOpts, BaseExperiment, type BaseMetadata, BraintrustState, BraintrustStream, type BraintrustStreamChunk, type ChatPrompt, CodeFunction, type CodeOpts, CodePrompt, type CompiledPrompt, type CompiledPromptParams, type CompletionPrompt, type CreateProjectOpts, type DataSummary, Dataset, type DatasetSummary, type DefaultMetadataType, type DefaultPromptArgs, type EndSpanArgs, Eval, type EvalCase, type EvalResult, type EvalScorer, type EvalScorerArgs, type EvalTask, type Evaluator, type EvaluatorDef, type EvaluatorFile, Experiment, type ExperimentSummary, type Exportable, FailedHTTPResponse, type FullInitOptions, type FullLoginOptions, type InitOptions, type InvokeFunctionArgs, type InvokeReturn, LEGACY_CACHED_HEADER, LazyValue, type LogOptions, Logger, type LoginOptions, type MetricSummary, NOOP_SPAN, NoopSpan, type ObjectMetadata, Project, type PromiseUnless, Prompt, PromptBuilder, type PromptOpts, type PromptRowWithId, ReadonlyAttachment, ReadonlyExperiment, Reporter, type ReporterBody, type ScoreSummary, ScorerBuilder, type ScorerOpts, type SerializedBraintrustState, type SetCurrentArg, type Span, type SpanContext, SpanImpl, type StartSpanArgs, ToolBuilder, type WithTransactionId, X_CACHED_HEADER, _exportsForTestingOnly, _internalGetGlobalState, _internalSetInitialState, braintrustStreamChunkSchema, buildLocalSummary, createFinalValuePassThroughStream, currentExperiment, currentLogger, currentSpan, braintrust as default, devNullWritableStream, flush, getSpanParentObject, init, initDataset, initExperiment, initLogger, invoke, loadPrompt, log, logError, login, loginToState, newId, parseCachedHeader, permalink, projects, renderMessage, reportFailures, setFetch, spanComponentsToObjectId, startSpan, summarize, traceable, traced, updateSpan, withCurrent, withDataset, withExperiment, withLogger, wrapAISDKModel, wrapOpenAI, wrapOpenAIv4, wrapTraced };
|
package/dist/index.js
CHANGED
|
@@ -51,6 +51,7 @@ __export(src_exports, {
|
|
|
51
51
|
ReadonlyAttachment: () => ReadonlyAttachment,
|
|
52
52
|
ReadonlyExperiment: () => ReadonlyExperiment,
|
|
53
53
|
Reporter: () => Reporter,
|
|
54
|
+
ScorerBuilder: () => ScorerBuilder,
|
|
54
55
|
SpanImpl: () => SpanImpl,
|
|
55
56
|
ToolBuilder: () => ToolBuilder,
|
|
56
57
|
X_CACHED_HEADER: () => X_CACHED_HEADER,
|
|
@@ -87,7 +88,7 @@ __export(src_exports, {
|
|
|
87
88
|
spanComponentsToObjectId: () => spanComponentsToObjectId,
|
|
88
89
|
startSpan: () => startSpan,
|
|
89
90
|
summarize: () => summarize,
|
|
90
|
-
toolFunctionDefinitionSchema: () => toolFunctionDefinitionSchema,
|
|
91
|
+
toolFunctionDefinitionSchema: () => import_typespecs4.toolFunctionDefinitionSchema,
|
|
91
92
|
traceable: () => traceable,
|
|
92
93
|
traced: () => traced,
|
|
93
94
|
updateSpan: () => updateSpan,
|
|
@@ -3656,6 +3657,21 @@ function renderMessage(render, message) {
|
|
|
3656
3657
|
return _exhaustiveCheck;
|
|
3657
3658
|
}
|
|
3658
3659
|
})
|
|
3660
|
+
} : {},
|
|
3661
|
+
..."tool_calls" in message ? {
|
|
3662
|
+
tool_calls: isEmpty(message.tool_calls) ? void 0 : message.tool_calls.map((t) => {
|
|
3663
|
+
return {
|
|
3664
|
+
type: t.type,
|
|
3665
|
+
id: render(t.id),
|
|
3666
|
+
function: {
|
|
3667
|
+
name: render(t.function.name),
|
|
3668
|
+
arguments: render(t.function.arguments)
|
|
3669
|
+
}
|
|
3670
|
+
};
|
|
3671
|
+
})
|
|
3672
|
+
} : {},
|
|
3673
|
+
..."tool_call_id" in message ? {
|
|
3674
|
+
tool_call_id: render(message.tool_call_id)
|
|
3659
3675
|
} : {}
|
|
3660
3676
|
};
|
|
3661
3677
|
}
|
|
@@ -3834,6 +3850,7 @@ __export(exports_node_exports, {
|
|
|
3834
3850
|
ReadonlyAttachment: () => ReadonlyAttachment,
|
|
3835
3851
|
ReadonlyExperiment: () => ReadonlyExperiment,
|
|
3836
3852
|
Reporter: () => Reporter,
|
|
3853
|
+
ScorerBuilder: () => ScorerBuilder,
|
|
3837
3854
|
SpanImpl: () => SpanImpl,
|
|
3838
3855
|
ToolBuilder: () => ToolBuilder,
|
|
3839
3856
|
X_CACHED_HEADER: () => X_CACHED_HEADER,
|
|
@@ -3869,7 +3886,7 @@ __export(exports_node_exports, {
|
|
|
3869
3886
|
spanComponentsToObjectId: () => spanComponentsToObjectId,
|
|
3870
3887
|
startSpan: () => startSpan,
|
|
3871
3888
|
summarize: () => summarize,
|
|
3872
|
-
toolFunctionDefinitionSchema: () => toolFunctionDefinitionSchema,
|
|
3889
|
+
toolFunctionDefinitionSchema: () => import_typespecs4.toolFunctionDefinitionSchema,
|
|
3873
3890
|
traceable: () => traceable,
|
|
3874
3891
|
traced: () => traced,
|
|
3875
3892
|
updateSpan: () => updateSpan,
|
|
@@ -3948,49 +3965,8 @@ async function invoke(args) {
|
|
|
3948
3965
|
}
|
|
3949
3966
|
|
|
3950
3967
|
// src/framework.ts
|
|
3951
|
-
var import_chalk = __toESM(require("chalk"));
|
|
3952
3968
|
var import_core2 = require("@braintrust/core");
|
|
3953
3969
|
|
|
3954
|
-
// src/progress.ts
|
|
3955
|
-
var cliProgress = __toESM(require("cli-progress"));
|
|
3956
|
-
var MAX_NAME_LENGTH = 40;
|
|
3957
|
-
function fitNameToSpaces(name, length) {
|
|
3958
|
-
const padded = name.padEnd(length);
|
|
3959
|
-
if (padded.length <= length) {
|
|
3960
|
-
return padded;
|
|
3961
|
-
}
|
|
3962
|
-
return padded.substring(0, length - 3) + "...";
|
|
3963
|
-
}
|
|
3964
|
-
var BarProgressReporter = class {
|
|
3965
|
-
multiBar;
|
|
3966
|
-
bars = {};
|
|
3967
|
-
constructor() {
|
|
3968
|
-
this.multiBar = new cliProgress.MultiBar(
|
|
3969
|
-
{
|
|
3970
|
-
clearOnComplete: false,
|
|
3971
|
-
format: " {bar} | {evaluator} | {percentage}% | {value}/{total} datapoints",
|
|
3972
|
-
autopadding: true
|
|
3973
|
-
},
|
|
3974
|
-
cliProgress.Presets.shades_grey
|
|
3975
|
-
);
|
|
3976
|
-
}
|
|
3977
|
-
start(name, total) {
|
|
3978
|
-
const bar = this.multiBar.create(total, 0);
|
|
3979
|
-
this.bars[name] = bar;
|
|
3980
|
-
}
|
|
3981
|
-
stop() {
|
|
3982
|
-
this.multiBar.stop();
|
|
3983
|
-
}
|
|
3984
|
-
increment(name) {
|
|
3985
|
-
this.bars[name].increment({
|
|
3986
|
-
evaluator: fitNameToSpaces(name, MAX_NAME_LENGTH)
|
|
3987
|
-
});
|
|
3988
|
-
}
|
|
3989
|
-
};
|
|
3990
|
-
|
|
3991
|
-
// src/framework.ts
|
|
3992
|
-
var import_pluralize = __toESM(require("pluralize"));
|
|
3993
|
-
|
|
3994
3970
|
// ../../node_modules/.pnpm/async@3.2.5/node_modules/async/dist/async.mjs
|
|
3995
3971
|
function initialParams(fn) {
|
|
3996
3972
|
return function(...args) {
|
|
@@ -5094,6 +5070,47 @@ function waterfall(tasks, callback) {
|
|
|
5094
5070
|
}
|
|
5095
5071
|
var waterfall$1 = awaitify(waterfall);
|
|
5096
5072
|
|
|
5073
|
+
// src/framework.ts
|
|
5074
|
+
var import_chalk = __toESM(require("chalk"));
|
|
5075
|
+
var import_pluralize = __toESM(require("pluralize"));
|
|
5076
|
+
|
|
5077
|
+
// src/progress.ts
|
|
5078
|
+
var cliProgress = __toESM(require("cli-progress"));
|
|
5079
|
+
var MAX_NAME_LENGTH = 40;
|
|
5080
|
+
function fitNameToSpaces(name, length) {
|
|
5081
|
+
const padded = name.padEnd(length);
|
|
5082
|
+
if (padded.length <= length) {
|
|
5083
|
+
return padded;
|
|
5084
|
+
}
|
|
5085
|
+
return padded.substring(0, length - 3) + "...";
|
|
5086
|
+
}
|
|
5087
|
+
var BarProgressReporter = class {
|
|
5088
|
+
multiBar;
|
|
5089
|
+
bars = {};
|
|
5090
|
+
constructor() {
|
|
5091
|
+
this.multiBar = new cliProgress.MultiBar(
|
|
5092
|
+
{
|
|
5093
|
+
clearOnComplete: false,
|
|
5094
|
+
format: " {bar} | {evaluator} | {percentage}% | {value}/{total} datapoints",
|
|
5095
|
+
autopadding: true
|
|
5096
|
+
},
|
|
5097
|
+
cliProgress.Presets.shades_grey
|
|
5098
|
+
);
|
|
5099
|
+
}
|
|
5100
|
+
start(name, total) {
|
|
5101
|
+
const bar = this.multiBar.create(total, 0);
|
|
5102
|
+
this.bars[name] = bar;
|
|
5103
|
+
}
|
|
5104
|
+
stop() {
|
|
5105
|
+
this.multiBar.stop();
|
|
5106
|
+
}
|
|
5107
|
+
increment(name) {
|
|
5108
|
+
this.bars[name].increment({
|
|
5109
|
+
evaluator: fitNameToSpaces(name, MAX_NAME_LENGTH)
|
|
5110
|
+
});
|
|
5111
|
+
}
|
|
5112
|
+
};
|
|
5113
|
+
|
|
5097
5114
|
// src/framework.ts
|
|
5098
5115
|
function BaseExperiment(options = {}) {
|
|
5099
5116
|
return { _type: "BaseExperiment", ...options };
|
|
@@ -5339,7 +5356,11 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
5339
5356
|
const meta = (o) => metadata = { ...metadata, ...o };
|
|
5340
5357
|
await rootSpan.traced(
|
|
5341
5358
|
async (span) => {
|
|
5342
|
-
const outputResult = evaluator.task(datum.input, {
|
|
5359
|
+
const outputResult = evaluator.task(datum.input, {
|
|
5360
|
+
meta,
|
|
5361
|
+
metadata,
|
|
5362
|
+
span
|
|
5363
|
+
});
|
|
5343
5364
|
if (outputResult instanceof Promise) {
|
|
5344
5365
|
output = await outputResult;
|
|
5345
5366
|
} else {
|
|
@@ -5622,7 +5643,7 @@ function formatMetricSummary(summary, longestMetricName) {
|
|
|
5622
5643
|
// src/framework2.ts
|
|
5623
5644
|
var import_path = __toESM(require("path"));
|
|
5624
5645
|
var import_slugify = __toESM(require("slugify"));
|
|
5625
|
-
var
|
|
5646
|
+
var import_typespecs4 = require("@braintrust/core/typespecs");
|
|
5626
5647
|
var ProjectBuilder = class {
|
|
5627
5648
|
create(opts) {
|
|
5628
5649
|
return new Project(opts);
|
|
@@ -5634,12 +5655,14 @@ var Project = class {
|
|
|
5634
5655
|
id;
|
|
5635
5656
|
tools;
|
|
5636
5657
|
prompts;
|
|
5658
|
+
scorers;
|
|
5637
5659
|
constructor(args) {
|
|
5638
5660
|
_initializeSpanContext();
|
|
5639
5661
|
this.name = "name" in args ? args.name : void 0;
|
|
5640
5662
|
this.id = "id" in args ? args.id : void 0;
|
|
5641
5663
|
this.tools = new ToolBuilder(this);
|
|
5642
5664
|
this.prompts = new PromptBuilder(this);
|
|
5665
|
+
this.scorers = new ScorerBuilder(this);
|
|
5643
5666
|
}
|
|
5644
5667
|
};
|
|
5645
5668
|
var ToolBuilder = class {
|
|
@@ -5673,6 +5696,70 @@ var ToolBuilder = class {
|
|
|
5673
5696
|
return tool;
|
|
5674
5697
|
}
|
|
5675
5698
|
};
|
|
5699
|
+
var ScorerBuilder = class {
|
|
5700
|
+
constructor(project) {
|
|
5701
|
+
this.project = project;
|
|
5702
|
+
}
|
|
5703
|
+
taskCounter = 0;
|
|
5704
|
+
create(opts) {
|
|
5705
|
+
this.taskCounter++;
|
|
5706
|
+
let resolvedName = opts.name;
|
|
5707
|
+
if (!resolvedName && "handler" in opts) {
|
|
5708
|
+
resolvedName = opts.handler.name;
|
|
5709
|
+
}
|
|
5710
|
+
if (!resolvedName || resolvedName.trim().length === 0) {
|
|
5711
|
+
resolvedName = `Scorer ${import_path.default.basename(__filename)} ${this.taskCounter}`;
|
|
5712
|
+
}
|
|
5713
|
+
const slug = opts.slug ?? (0, import_slugify.default)(resolvedName, { lower: true, strict: true });
|
|
5714
|
+
if ("handler" in opts) {
|
|
5715
|
+
const scorer = new CodeFunction(this.project, {
|
|
5716
|
+
...opts,
|
|
5717
|
+
name: resolvedName,
|
|
5718
|
+
slug,
|
|
5719
|
+
type: "scorer"
|
|
5720
|
+
});
|
|
5721
|
+
if (globalThis._lazy_load) {
|
|
5722
|
+
globalThis._evals.functions.push(
|
|
5723
|
+
scorer
|
|
5724
|
+
);
|
|
5725
|
+
}
|
|
5726
|
+
} else {
|
|
5727
|
+
const promptBlock = "messages" in opts ? {
|
|
5728
|
+
type: "chat",
|
|
5729
|
+
messages: opts.messages
|
|
5730
|
+
} : {
|
|
5731
|
+
type: "completion",
|
|
5732
|
+
content: opts.prompt
|
|
5733
|
+
};
|
|
5734
|
+
const promptData = {
|
|
5735
|
+
prompt: promptBlock,
|
|
5736
|
+
options: {
|
|
5737
|
+
model: opts.model,
|
|
5738
|
+
params: opts.params
|
|
5739
|
+
},
|
|
5740
|
+
parser: {
|
|
5741
|
+
type: "llm_classifier",
|
|
5742
|
+
use_cot: opts.useCot,
|
|
5743
|
+
choice_scores: opts.choiceScores
|
|
5744
|
+
}
|
|
5745
|
+
};
|
|
5746
|
+
const codePrompt = new CodePrompt(
|
|
5747
|
+
this.project,
|
|
5748
|
+
promptData,
|
|
5749
|
+
[],
|
|
5750
|
+
{
|
|
5751
|
+
...opts,
|
|
5752
|
+
name: resolvedName,
|
|
5753
|
+
slug
|
|
5754
|
+
},
|
|
5755
|
+
"scorer"
|
|
5756
|
+
);
|
|
5757
|
+
if (globalThis._lazy_load) {
|
|
5758
|
+
globalThis._evals.prompts.push(codePrompt);
|
|
5759
|
+
}
|
|
5760
|
+
}
|
|
5761
|
+
}
|
|
5762
|
+
};
|
|
5676
5763
|
var CodeFunction = class {
|
|
5677
5764
|
constructor(project, opts) {
|
|
5678
5765
|
this.project = project;
|
|
@@ -5712,8 +5799,9 @@ var CodePrompt = class {
|
|
|
5712
5799
|
ifExists;
|
|
5713
5800
|
description;
|
|
5714
5801
|
id;
|
|
5802
|
+
functionType;
|
|
5715
5803
|
toolFunctions;
|
|
5716
|
-
constructor(project, prompt, toolFunctions, opts) {
|
|
5804
|
+
constructor(project, prompt, toolFunctions, opts, functionType) {
|
|
5717
5805
|
this.project = project;
|
|
5718
5806
|
this.name = opts.name;
|
|
5719
5807
|
this.slug = opts.slug;
|
|
@@ -5722,17 +5810,9 @@ var CodePrompt = class {
|
|
|
5722
5810
|
this.ifExists = opts.ifExists;
|
|
5723
5811
|
this.description = opts.description;
|
|
5724
5812
|
this.id = opts.id;
|
|
5813
|
+
this.functionType = functionType;
|
|
5725
5814
|
}
|
|
5726
5815
|
};
|
|
5727
|
-
var toolFunctionDefinitionSchema = import_zod3.z.object({
|
|
5728
|
-
type: import_zod3.z.literal("function"),
|
|
5729
|
-
function: import_zod3.z.object({
|
|
5730
|
-
name: import_zod3.z.string(),
|
|
5731
|
-
description: import_zod3.z.string().optional(),
|
|
5732
|
-
parameters: import_zod3.z.record(import_zod3.z.unknown()).optional(),
|
|
5733
|
-
strict: import_zod3.z.boolean().optional()
|
|
5734
|
-
})
|
|
5735
|
-
});
|
|
5736
5816
|
var PromptBuilder = class {
|
|
5737
5817
|
constructor(project) {
|
|
5738
5818
|
this.project = project;
|
|
@@ -6456,6 +6536,7 @@ var src_default = exports_node_exports;
|
|
|
6456
6536
|
ReadonlyAttachment,
|
|
6457
6537
|
ReadonlyExperiment,
|
|
6458
6538
|
Reporter,
|
|
6539
|
+
ScorerBuilder,
|
|
6459
6540
|
SpanImpl,
|
|
6460
6541
|
ToolBuilder,
|
|
6461
6542
|
X_CACHED_HEADER,
|
package/dist/index.mjs
CHANGED
|
@@ -3592,6 +3592,21 @@ function renderMessage(render, message) {
|
|
|
3592
3592
|
return _exhaustiveCheck;
|
|
3593
3593
|
}
|
|
3594
3594
|
})
|
|
3595
|
+
} : {},
|
|
3596
|
+
..."tool_calls" in message ? {
|
|
3597
|
+
tool_calls: isEmpty(message.tool_calls) ? void 0 : message.tool_calls.map((t) => {
|
|
3598
|
+
return {
|
|
3599
|
+
type: t.type,
|
|
3600
|
+
id: render(t.id),
|
|
3601
|
+
function: {
|
|
3602
|
+
name: render(t.function.name),
|
|
3603
|
+
arguments: render(t.function.arguments)
|
|
3604
|
+
}
|
|
3605
|
+
};
|
|
3606
|
+
})
|
|
3607
|
+
} : {},
|
|
3608
|
+
..."tool_call_id" in message ? {
|
|
3609
|
+
tool_call_id: render(message.tool_call_id)
|
|
3595
3610
|
} : {}
|
|
3596
3611
|
};
|
|
3597
3612
|
}
|
|
@@ -3770,6 +3785,7 @@ __export(exports_node_exports, {
|
|
|
3770
3785
|
ReadonlyAttachment: () => ReadonlyAttachment,
|
|
3771
3786
|
ReadonlyExperiment: () => ReadonlyExperiment,
|
|
3772
3787
|
Reporter: () => Reporter,
|
|
3788
|
+
ScorerBuilder: () => ScorerBuilder,
|
|
3773
3789
|
SpanImpl: () => SpanImpl,
|
|
3774
3790
|
ToolBuilder: () => ToolBuilder,
|
|
3775
3791
|
X_CACHED_HEADER: () => X_CACHED_HEADER,
|
|
@@ -3886,49 +3902,8 @@ async function invoke(args) {
|
|
|
3886
3902
|
}
|
|
3887
3903
|
|
|
3888
3904
|
// src/framework.ts
|
|
3889
|
-
import chalk from "chalk";
|
|
3890
3905
|
import { SpanTypeAttribute as SpanTypeAttribute2, mergeDicts as mergeDicts2 } from "@braintrust/core";
|
|
3891
3906
|
|
|
3892
|
-
// src/progress.ts
|
|
3893
|
-
import * as cliProgress from "cli-progress";
|
|
3894
|
-
var MAX_NAME_LENGTH = 40;
|
|
3895
|
-
function fitNameToSpaces(name, length) {
|
|
3896
|
-
const padded = name.padEnd(length);
|
|
3897
|
-
if (padded.length <= length) {
|
|
3898
|
-
return padded;
|
|
3899
|
-
}
|
|
3900
|
-
return padded.substring(0, length - 3) + "...";
|
|
3901
|
-
}
|
|
3902
|
-
var BarProgressReporter = class {
|
|
3903
|
-
multiBar;
|
|
3904
|
-
bars = {};
|
|
3905
|
-
constructor() {
|
|
3906
|
-
this.multiBar = new cliProgress.MultiBar(
|
|
3907
|
-
{
|
|
3908
|
-
clearOnComplete: false,
|
|
3909
|
-
format: " {bar} | {evaluator} | {percentage}% | {value}/{total} datapoints",
|
|
3910
|
-
autopadding: true
|
|
3911
|
-
},
|
|
3912
|
-
cliProgress.Presets.shades_grey
|
|
3913
|
-
);
|
|
3914
|
-
}
|
|
3915
|
-
start(name, total) {
|
|
3916
|
-
const bar = this.multiBar.create(total, 0);
|
|
3917
|
-
this.bars[name] = bar;
|
|
3918
|
-
}
|
|
3919
|
-
stop() {
|
|
3920
|
-
this.multiBar.stop();
|
|
3921
|
-
}
|
|
3922
|
-
increment(name) {
|
|
3923
|
-
this.bars[name].increment({
|
|
3924
|
-
evaluator: fitNameToSpaces(name, MAX_NAME_LENGTH)
|
|
3925
|
-
});
|
|
3926
|
-
}
|
|
3927
|
-
};
|
|
3928
|
-
|
|
3929
|
-
// src/framework.ts
|
|
3930
|
-
import pluralize from "pluralize";
|
|
3931
|
-
|
|
3932
3907
|
// ../../node_modules/.pnpm/async@3.2.5/node_modules/async/dist/async.mjs
|
|
3933
3908
|
function initialParams(fn) {
|
|
3934
3909
|
return function(...args) {
|
|
@@ -5032,6 +5007,47 @@ function waterfall(tasks, callback) {
|
|
|
5032
5007
|
}
|
|
5033
5008
|
var waterfall$1 = awaitify(waterfall);
|
|
5034
5009
|
|
|
5010
|
+
// src/framework.ts
|
|
5011
|
+
import chalk from "chalk";
|
|
5012
|
+
import pluralize from "pluralize";
|
|
5013
|
+
|
|
5014
|
+
// src/progress.ts
|
|
5015
|
+
import * as cliProgress from "cli-progress";
|
|
5016
|
+
var MAX_NAME_LENGTH = 40;
|
|
5017
|
+
function fitNameToSpaces(name, length) {
|
|
5018
|
+
const padded = name.padEnd(length);
|
|
5019
|
+
if (padded.length <= length) {
|
|
5020
|
+
return padded;
|
|
5021
|
+
}
|
|
5022
|
+
return padded.substring(0, length - 3) + "...";
|
|
5023
|
+
}
|
|
5024
|
+
var BarProgressReporter = class {
|
|
5025
|
+
multiBar;
|
|
5026
|
+
bars = {};
|
|
5027
|
+
constructor() {
|
|
5028
|
+
this.multiBar = new cliProgress.MultiBar(
|
|
5029
|
+
{
|
|
5030
|
+
clearOnComplete: false,
|
|
5031
|
+
format: " {bar} | {evaluator} | {percentage}% | {value}/{total} datapoints",
|
|
5032
|
+
autopadding: true
|
|
5033
|
+
},
|
|
5034
|
+
cliProgress.Presets.shades_grey
|
|
5035
|
+
);
|
|
5036
|
+
}
|
|
5037
|
+
start(name, total) {
|
|
5038
|
+
const bar = this.multiBar.create(total, 0);
|
|
5039
|
+
this.bars[name] = bar;
|
|
5040
|
+
}
|
|
5041
|
+
stop() {
|
|
5042
|
+
this.multiBar.stop();
|
|
5043
|
+
}
|
|
5044
|
+
increment(name) {
|
|
5045
|
+
this.bars[name].increment({
|
|
5046
|
+
evaluator: fitNameToSpaces(name, MAX_NAME_LENGTH)
|
|
5047
|
+
});
|
|
5048
|
+
}
|
|
5049
|
+
};
|
|
5050
|
+
|
|
5035
5051
|
// src/framework.ts
|
|
5036
5052
|
function BaseExperiment(options = {}) {
|
|
5037
5053
|
return { _type: "BaseExperiment", ...options };
|
|
@@ -5277,7 +5293,11 @@ async function runEvaluatorInternal(experiment, evaluator, progressReporter, fil
|
|
|
5277
5293
|
const meta = (o) => metadata = { ...metadata, ...o };
|
|
5278
5294
|
await rootSpan.traced(
|
|
5279
5295
|
async (span) => {
|
|
5280
|
-
const outputResult = evaluator.task(datum.input, {
|
|
5296
|
+
const outputResult = evaluator.task(datum.input, {
|
|
5297
|
+
meta,
|
|
5298
|
+
metadata,
|
|
5299
|
+
span
|
|
5300
|
+
});
|
|
5281
5301
|
if (outputResult instanceof Promise) {
|
|
5282
5302
|
output = await outputResult;
|
|
5283
5303
|
} else {
|
|
@@ -5560,7 +5580,9 @@ function formatMetricSummary(summary, longestMetricName) {
|
|
|
5560
5580
|
// src/framework2.ts
|
|
5561
5581
|
import path2 from "path";
|
|
5562
5582
|
import slugifyLib from "slugify";
|
|
5563
|
-
import {
|
|
5583
|
+
import {
|
|
5584
|
+
toolFunctionDefinitionSchema
|
|
5585
|
+
} from "@braintrust/core/typespecs";
|
|
5564
5586
|
var ProjectBuilder = class {
|
|
5565
5587
|
create(opts) {
|
|
5566
5588
|
return new Project(opts);
|
|
@@ -5572,12 +5594,14 @@ var Project = class {
|
|
|
5572
5594
|
id;
|
|
5573
5595
|
tools;
|
|
5574
5596
|
prompts;
|
|
5597
|
+
scorers;
|
|
5575
5598
|
constructor(args) {
|
|
5576
5599
|
_initializeSpanContext();
|
|
5577
5600
|
this.name = "name" in args ? args.name : void 0;
|
|
5578
5601
|
this.id = "id" in args ? args.id : void 0;
|
|
5579
5602
|
this.tools = new ToolBuilder(this);
|
|
5580
5603
|
this.prompts = new PromptBuilder(this);
|
|
5604
|
+
this.scorers = new ScorerBuilder(this);
|
|
5581
5605
|
}
|
|
5582
5606
|
};
|
|
5583
5607
|
var ToolBuilder = class {
|
|
@@ -5611,6 +5635,70 @@ var ToolBuilder = class {
|
|
|
5611
5635
|
return tool;
|
|
5612
5636
|
}
|
|
5613
5637
|
};
|
|
5638
|
+
var ScorerBuilder = class {
|
|
5639
|
+
constructor(project) {
|
|
5640
|
+
this.project = project;
|
|
5641
|
+
}
|
|
5642
|
+
taskCounter = 0;
|
|
5643
|
+
create(opts) {
|
|
5644
|
+
this.taskCounter++;
|
|
5645
|
+
let resolvedName = opts.name;
|
|
5646
|
+
if (!resolvedName && "handler" in opts) {
|
|
5647
|
+
resolvedName = opts.handler.name;
|
|
5648
|
+
}
|
|
5649
|
+
if (!resolvedName || resolvedName.trim().length === 0) {
|
|
5650
|
+
resolvedName = `Scorer ${path2.basename(__filename)} ${this.taskCounter}`;
|
|
5651
|
+
}
|
|
5652
|
+
const slug = opts.slug ?? slugifyLib(resolvedName, { lower: true, strict: true });
|
|
5653
|
+
if ("handler" in opts) {
|
|
5654
|
+
const scorer = new CodeFunction(this.project, {
|
|
5655
|
+
...opts,
|
|
5656
|
+
name: resolvedName,
|
|
5657
|
+
slug,
|
|
5658
|
+
type: "scorer"
|
|
5659
|
+
});
|
|
5660
|
+
if (globalThis._lazy_load) {
|
|
5661
|
+
globalThis._evals.functions.push(
|
|
5662
|
+
scorer
|
|
5663
|
+
);
|
|
5664
|
+
}
|
|
5665
|
+
} else {
|
|
5666
|
+
const promptBlock = "messages" in opts ? {
|
|
5667
|
+
type: "chat",
|
|
5668
|
+
messages: opts.messages
|
|
5669
|
+
} : {
|
|
5670
|
+
type: "completion",
|
|
5671
|
+
content: opts.prompt
|
|
5672
|
+
};
|
|
5673
|
+
const promptData = {
|
|
5674
|
+
prompt: promptBlock,
|
|
5675
|
+
options: {
|
|
5676
|
+
model: opts.model,
|
|
5677
|
+
params: opts.params
|
|
5678
|
+
},
|
|
5679
|
+
parser: {
|
|
5680
|
+
type: "llm_classifier",
|
|
5681
|
+
use_cot: opts.useCot,
|
|
5682
|
+
choice_scores: opts.choiceScores
|
|
5683
|
+
}
|
|
5684
|
+
};
|
|
5685
|
+
const codePrompt = new CodePrompt(
|
|
5686
|
+
this.project,
|
|
5687
|
+
promptData,
|
|
5688
|
+
[],
|
|
5689
|
+
{
|
|
5690
|
+
...opts,
|
|
5691
|
+
name: resolvedName,
|
|
5692
|
+
slug
|
|
5693
|
+
},
|
|
5694
|
+
"scorer"
|
|
5695
|
+
);
|
|
5696
|
+
if (globalThis._lazy_load) {
|
|
5697
|
+
globalThis._evals.prompts.push(codePrompt);
|
|
5698
|
+
}
|
|
5699
|
+
}
|
|
5700
|
+
}
|
|
5701
|
+
};
|
|
5614
5702
|
var CodeFunction = class {
|
|
5615
5703
|
constructor(project, opts) {
|
|
5616
5704
|
this.project = project;
|
|
@@ -5650,8 +5738,9 @@ var CodePrompt = class {
|
|
|
5650
5738
|
ifExists;
|
|
5651
5739
|
description;
|
|
5652
5740
|
id;
|
|
5741
|
+
functionType;
|
|
5653
5742
|
toolFunctions;
|
|
5654
|
-
constructor(project, prompt, toolFunctions, opts) {
|
|
5743
|
+
constructor(project, prompt, toolFunctions, opts, functionType) {
|
|
5655
5744
|
this.project = project;
|
|
5656
5745
|
this.name = opts.name;
|
|
5657
5746
|
this.slug = opts.slug;
|
|
@@ -5660,17 +5749,9 @@ var CodePrompt = class {
|
|
|
5660
5749
|
this.ifExists = opts.ifExists;
|
|
5661
5750
|
this.description = opts.description;
|
|
5662
5751
|
this.id = opts.id;
|
|
5752
|
+
this.functionType = functionType;
|
|
5663
5753
|
}
|
|
5664
5754
|
};
|
|
5665
|
-
var toolFunctionDefinitionSchema = z3.object({
|
|
5666
|
-
type: z3.literal("function"),
|
|
5667
|
-
function: z3.object({
|
|
5668
|
-
name: z3.string(),
|
|
5669
|
-
description: z3.string().optional(),
|
|
5670
|
-
parameters: z3.record(z3.unknown()).optional(),
|
|
5671
|
-
strict: z3.boolean().optional()
|
|
5672
|
-
})
|
|
5673
|
-
});
|
|
5674
5755
|
var PromptBuilder = class {
|
|
5675
5756
|
constructor(project) {
|
|
5676
5757
|
this.project = project;
|
|
@@ -6393,6 +6474,7 @@ export {
|
|
|
6393
6474
|
ReadonlyAttachment,
|
|
6394
6475
|
ReadonlyExperiment,
|
|
6395
6476
|
Reporter,
|
|
6477
|
+
ScorerBuilder,
|
|
6396
6478
|
SpanImpl,
|
|
6397
6479
|
ToolBuilder,
|
|
6398
6480
|
X_CACHED_HEADER,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "braintrust",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.177",
|
|
4
4
|
"description": "SDK for integrating Braintrust",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -75,7 +75,7 @@
|
|
|
75
75
|
},
|
|
76
76
|
"dependencies": {
|
|
77
77
|
"@ai-sdk/provider": "^1.0.1",
|
|
78
|
-
"@braintrust/core": "0.0.
|
|
78
|
+
"@braintrust/core": "0.0.71",
|
|
79
79
|
"@next/env": "^14.2.3",
|
|
80
80
|
"@vercel/functions": "^1.0.2",
|
|
81
81
|
"ai": "^3.2.16",
|