braintrust 0.0.21 → 0.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.d.ts ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env node
2
+ export {};
package/dist/cli.js ADDED
@@ -0,0 +1,348 @@
1
+ #!/usr/bin/env node
2
+ "use strict";
3
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
4
+ if (k2 === undefined) k2 = k;
5
+ var desc = Object.getOwnPropertyDescriptor(m, k);
6
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
7
+ desc = { enumerable: true, get: function() { return m[k]; } };
8
+ }
9
+ Object.defineProperty(o, k2, desc);
10
+ }) : (function(o, m, k, k2) {
11
+ if (k2 === undefined) k2 = k;
12
+ o[k2] = m[k];
13
+ }));
14
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
15
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
16
+ }) : function(o, v) {
17
+ o["default"] = v;
18
+ });
19
+ var __importStar = (this && this.__importStar) || function (mod) {
20
+ if (mod && mod.__esModule) return mod;
21
+ var result = {};
22
+ if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
23
+ __setModuleDefault(result, mod);
24
+ return result;
25
+ };
26
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
27
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
28
+ return new (P || (P = Promise))(function (resolve, reject) {
29
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
30
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
31
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
32
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
33
+ });
34
+ };
35
+ var __importDefault = (this && this.__importDefault) || function (mod) {
36
+ return (mod && mod.__esModule) ? mod : { "default": mod };
37
+ };
38
+ Object.defineProperty(exports, "__esModule", { value: true });
39
+ const esbuild = __importStar(require("esbuild"));
40
+ const chalk_1 = __importDefault(require("chalk"));
41
+ const fs_1 = __importDefault(require("fs"));
42
+ const os_1 = __importDefault(require("os"));
43
+ const path_1 = __importDefault(require("path"));
44
+ const util_1 = __importDefault(require("util"));
45
+ const fsWalk = __importStar(require("@nodelib/fs.walk"));
46
+ const minimatch_1 = require("minimatch");
47
+ const argparse_1 = require("argparse");
48
+ const uuid_1 = require("uuid");
49
+ const pluralize_1 = __importDefault(require("pluralize"));
50
+ const logger_1 = require("./logger");
51
+ // Re-use the module resolution logic from Jest
52
+ const nodeModulesPaths_1 = __importDefault(require("./jest/nodeModulesPaths"));
53
+ const framework_1 = require("./framework");
54
+ // This requires require
55
+ // https://stackoverflow.com/questions/50822310/how-to-import-package-json-in-typescript
56
+ const { version } = require("../package.json");
57
+ // TODO: This could be loaded from configuration
58
+ const INCLUDE = ["**/*.eval.ts", "**/*.eval.js"];
59
+ const EXCLUDE = ["**/node_modules/**", "**/dist/**", "**/build/**"];
60
+ const OUT_EXT = "js";
61
+ const error = chalk_1.default.bold.red;
62
+ const warning = chalk_1.default.hex("#FFA500"); // Orange color
63
+ function logError(e, verbose) {
64
+ if (!verbose) {
65
+ console.error(`${e}`);
66
+ }
67
+ else {
68
+ console.error(e);
69
+ }
70
+ }
71
+ function evalWithModuleContext(inFile, evalFn) {
72
+ const modulePaths = [...module.paths];
73
+ try {
74
+ module.paths = (0, nodeModulesPaths_1.default)(path_1.default.dirname(inFile), {});
75
+ return evalFn();
76
+ }
77
+ finally {
78
+ module.paths = modulePaths;
79
+ }
80
+ }
81
+ function initFile(inFile, outFile) {
82
+ return __awaiter(this, void 0, void 0, function* () {
83
+ const ctx = yield esbuild.context(buildOpts(inFile, outFile));
84
+ return {
85
+ inFile,
86
+ outFile,
87
+ rebuild: () => __awaiter(this, void 0, void 0, function* () {
88
+ try {
89
+ const result = yield ctx.rebuild();
90
+ if (!result.outputFiles) {
91
+ return {
92
+ type: "failure",
93
+ error: new Error("No output file generated"),
94
+ sourceFile: inFile,
95
+ };
96
+ }
97
+ const moduleText = result.outputFiles[0].text;
98
+ const evaluator = evalWithModuleContext(inFile, () => {
99
+ globalThis._evals = {};
100
+ eval(moduleText);
101
+ return Object.assign({}, globalThis._evals);
102
+ });
103
+ return { type: "success", result, evaluator, sourceFile: inFile };
104
+ }
105
+ catch (e) {
106
+ return { type: "failure", error: e, sourceFile: inFile };
107
+ }
108
+ }),
109
+ destroy: () => __awaiter(this, void 0, void 0, function* () {
110
+ yield ctx.dispose();
111
+ }),
112
+ };
113
+ });
114
+ }
115
+ function updateEvaluators(evaluators, buildResults, opts) {
116
+ for (const result of buildResults) {
117
+ if (result.type === "failure") {
118
+ if (opts.verbose) {
119
+ console.warn(`Failed to compile ${result.sourceFile}`);
120
+ console.warn(result.error);
121
+ }
122
+ else {
123
+ console.warn(`Failed to compile ${result.sourceFile}: ${result.error.message}`);
124
+ }
125
+ continue;
126
+ }
127
+ for (const [name, evaluator] of Object.entries(result.evaluator)) {
128
+ if (evaluators[name] &&
129
+ (evaluators[name].sourceFile !== result.sourceFile ||
130
+ evaluators[name].evaluator !== evaluator)) {
131
+ console.warn(warning(`Evaluator ${name} already exists (in ${evaluators[name].sourceFile} and ${result.sourceFile}). Will skip ${name} in ${result.sourceFile}.`));
132
+ }
133
+ evaluators[name] = {
134
+ sourceFile: result.sourceFile,
135
+ evaluator,
136
+ };
137
+ }
138
+ }
139
+ }
140
+ function runOnce(handles, opts) {
141
+ return __awaiter(this, void 0, void 0, function* () {
142
+ const buildPromises = Object.values(handles).map((handle) => handle.rebuild());
143
+ const buildResults = yield Promise.all(buildPromises);
144
+ const evaluators = {};
145
+ updateEvaluators(evaluators, buildResults, opts);
146
+ // TODO: We should probably take login arguments here
147
+ yield (0, logger_1.login)({
148
+ apiKey: opts.apiKey,
149
+ orgName: opts.orgName,
150
+ apiUrl: opts.apiUrl,
151
+ });
152
+ const resultPromises = Object.values(evaluators).map((evaluator) => __awaiter(this, void 0, void 0, function* () {
153
+ // TODO: For now, use the eval name as the project. However, we need to evolve
154
+ // the definition of a project and create a new concept called run, so that we
155
+ // can name the experiment/evaluation within the run the evaluator's name.
156
+ const logger = yield (0, logger_1.init)(evaluator.evaluator.name);
157
+ return yield (0, framework_1.runEvaluator)(logger, evaluator.evaluator);
158
+ }));
159
+ const allEvalsResults = yield Promise.all(resultPromises);
160
+ for (const [evaluator, idx] of Object.keys(evaluators).map((k, i) => [
161
+ k,
162
+ i,
163
+ ])) {
164
+ const { results, summary } = allEvalsResults[idx];
165
+ const failingResults = results.filter((r) => r.error !== undefined);
166
+ if (failingResults.length > 0) {
167
+ // TODO: We may want to support a non-strict mode (and make this the "strict" behavior), so that
168
+ // users can still log imperfect evaluations. In the meantime, they should handle these cases inside
169
+ // of their tasks.
170
+ console.warn(warning(`Evaluator ${evaluator} failed with ${(0, pluralize_1.default)("error", failingResults.length, true)}${!opts.verbose ? " (add --verbose to see the full error)" : ""}. This evaluation ("${evaluator}") will not be fully logged.`));
171
+ for (const result of failingResults) {
172
+ logError(result.error, opts.verbose);
173
+ }
174
+ }
175
+ else {
176
+ console.log(summary);
177
+ }
178
+ }
179
+ });
180
+ }
181
+ function checkMatch(pathInput, include_patterns, exclude_patterns) {
182
+ const p = path_1.default.resolve(pathInput);
183
+ if (include_patterns !== null) {
184
+ let include = false;
185
+ for (const pattern of include_patterns) {
186
+ if ((0, minimatch_1.minimatch)(p, pattern)) {
187
+ include = true;
188
+ break;
189
+ }
190
+ }
191
+ if (!include) {
192
+ return false;
193
+ }
194
+ }
195
+ if (exclude_patterns !== null) {
196
+ let exclude = false;
197
+ for (const pattern of exclude_patterns) {
198
+ if ((0, minimatch_1.minimatch)(p, pattern)) {
199
+ exclude = true;
200
+ break;
201
+ }
202
+ }
203
+ return !exclude;
204
+ }
205
+ return true;
206
+ }
207
+ function collectFiles(inputPath) {
208
+ return __awaiter(this, void 0, void 0, function* () {
209
+ let pathStat = null;
210
+ try {
211
+ pathStat = fs_1.default.lstatSync(inputPath);
212
+ }
213
+ catch (e) {
214
+ console.error(error(`Error reading ${inputPath}: ${e}`));
215
+ process.exit(1);
216
+ }
217
+ let files = [];
218
+ if (!pathStat.isDirectory()) {
219
+ if (checkMatch(inputPath, INCLUDE, EXCLUDE)) {
220
+ files.push(inputPath);
221
+ }
222
+ }
223
+ else {
224
+ const walked = yield util_1.default.promisify(fsWalk.walk)(inputPath, {
225
+ deepFilter: (entry) => {
226
+ return checkMatch(entry.path, null, EXCLUDE);
227
+ },
228
+ entryFilter: (entry) => {
229
+ return (entry.dirent.isFile() && checkMatch(entry.path, INCLUDE, EXCLUDE));
230
+ },
231
+ });
232
+ files = files.concat(walked.map((entry) => entry.path));
233
+ }
234
+ return files;
235
+ });
236
+ }
237
+ function buildOpts(fileName, outFile) {
238
+ return {
239
+ entryPoints: [fileName],
240
+ bundle: true,
241
+ outfile: outFile,
242
+ platform: "node",
243
+ packages: "external",
244
+ write: false,
245
+ // Remove the leading "v" from process.version
246
+ target: `node${process.version.slice(1)}`,
247
+ };
248
+ }
249
+ function buildContext(fileName, outFile) {
250
+ return __awaiter(this, void 0, void 0, function* () {
251
+ return yield esbuild.context(buildOpts(fileName, outFile));
252
+ });
253
+ }
254
+ function initializeHandles(args) {
255
+ return __awaiter(this, void 0, void 0, function* () {
256
+ const files = {};
257
+ const inputPaths = args.files.length > 0 ? args.files : ["."];
258
+ for (const inputPath of inputPaths) {
259
+ const newFiles = yield collectFiles(inputPath);
260
+ for (const file of newFiles) {
261
+ files[path_1.default.resolve(file)] = true;
262
+ }
263
+ }
264
+ // XXX We can probably not even create this dir?
265
+ let tmpDir = path_1.default.join(os_1.default.tmpdir(), `btevals-${(0, uuid_1.v4)().slice(0, 8)}`);
266
+ // fs.mkdirSync(tmpDir, { recursive: true });
267
+ const initPromises = [];
268
+ for (const file of Object.keys(files)) {
269
+ const outFile = path_1.default.join(tmpDir, `${path_1.default.basename(file, path_1.default.extname(file))}-${(0, uuid_1.v4)().slice(0, 8)}.${OUT_EXT}`);
270
+ initPromises.push(initFile(file, outFile));
271
+ }
272
+ const handles = {};
273
+ const initResults = yield Promise.all(initPromises);
274
+ for (const result of initResults) {
275
+ handles[result.inFile] = result;
276
+ }
277
+ return handles;
278
+ });
279
+ }
280
+ function run(args) {
281
+ return __awaiter(this, void 0, void 0, function* () {
282
+ const handles = yield initializeHandles(args);
283
+ try {
284
+ if (args.watch) {
285
+ throw new Error("Unimplmented");
286
+ }
287
+ else {
288
+ runOnce(handles, {
289
+ verbose: args.verbose,
290
+ apiKey: args.api_key,
291
+ orgName: args.org_name,
292
+ apiUrl: args.api_url,
293
+ });
294
+ }
295
+ }
296
+ finally {
297
+ // ESBuild can freeze up if you do not clean up the handles properly
298
+ for (const handle of Object.values(handles)) {
299
+ yield handle.destroy();
300
+ }
301
+ }
302
+ });
303
+ }
304
+ function main() {
305
+ return __awaiter(this, void 0, void 0, function* () {
306
+ const [, ...args] = process.argv;
307
+ const parser = new argparse_1.ArgumentParser({
308
+ description: "Argparse example",
309
+ });
310
+ parser.add_argument("-v", "--version", { action: "version", version });
311
+ const parentParser = new argparse_1.ArgumentParser({ add_help: false });
312
+ parentParser.add_argument("--verbose", { action: "store_true" });
313
+ const subparser = parser.add_subparsers({
314
+ required: true,
315
+ });
316
+ const parser_run = subparser.add_parser("run", {
317
+ help: "Run evals locally",
318
+ parents: [parentParser],
319
+ });
320
+ parser_run.add_argument("--api-key", {
321
+ help: "Specify a braintrust api key. If the parameter is not specified, the BRAINTRUST_API_KEY environment variable will be used.",
322
+ });
323
+ parser_run.add_argument("--org-name", {
324
+ help: "The name of a specific organization to connect to. This is useful if you belong to multiple.",
325
+ });
326
+ parser_run.add_argument("--api-url", {
327
+ help: "Specify a custom braintrust api url. Defaults to https://www.braintrustdata.com. This is only necessary if you are using an experimental version of BrainTrust",
328
+ });
329
+ parser_run.add_argument("--watch", {
330
+ action: "store_true",
331
+ help: "Watch files for changes and rerun evals when changes are detected",
332
+ });
333
+ parser_run.add_argument("files", {
334
+ nargs: "*",
335
+ help: "A list of files or directories to run. If no files are specified, the current directory is used.",
336
+ });
337
+ parser_run.set_defaults({ func: run });
338
+ const parsed = parser.parse_args();
339
+ try {
340
+ yield parsed.func(parsed);
341
+ }
342
+ catch (e) {
343
+ logError(e, parsed.verbose);
344
+ process.exit(1);
345
+ }
346
+ });
347
+ }
348
+ main();
@@ -0,0 +1,42 @@
1
+ import { Experiment } from "./logger";
2
+ import { Score } from "autoevals";
3
+ export type Metadata = Record<string, unknown>;
4
+ export interface DatasetRecord<Input, Output> {
5
+ input: Input;
6
+ expected?: Output;
7
+ metadata?: Metadata;
8
+ }
9
+ export type EvalData<Input, Output> = string | (() => DatasetRecord<Input, Output>[]) | (() => Promise<DatasetRecord<Input, Output>[]>);
10
+ export type EvalTask<Input, Output> = ((input: Input, hooks: EvalHooks) => Promise<Output>) | ((input: Input, hooks: EvalHooks) => Output);
11
+ export interface EvalHooks {
12
+ meta: (info: Record<string, unknown>) => void;
13
+ }
14
+ export type EvalScorerArgs<Input, Output> = DatasetRecord<Input, Output> & {
15
+ output: Output;
16
+ };
17
+ export type EvalScorer<Input, Output> = ((args: EvalScorerArgs<Input, Output>) => Score) | ((args: EvalScorerArgs<Input, Output>) => Promise<Score>);
18
+ export interface Evaluator<Input, Output> {
19
+ task: EvalTask<Input, Output>;
20
+ scores: EvalScorer<Input, Output>[];
21
+ data: EvalData<Input, Output>;
22
+ }
23
+ export type EvaluatorDef<Input, Output> = {
24
+ name: string;
25
+ } & Evaluator<Input, Output>;
26
+ export type EvaluatorFile = {
27
+ [evaluator: string]: EvaluatorDef<any, any>;
28
+ };
29
+ declare global {
30
+ var _evals: EvaluatorFile;
31
+ }
32
+ export declare function Eval<Input, Output>(name: string, evaluator: Evaluator<Input, Output>): void;
33
+ export declare function getLoadedEvals(): EvaluatorFile;
34
+ export declare function runEvaluator(experiment: Experiment, evaluator: EvaluatorDef<unknown, unknown>): Promise<{
35
+ results: {
36
+ output: any;
37
+ metadata: Metadata;
38
+ scores: Record<string, number>;
39
+ error: unknown;
40
+ }[];
41
+ summary: import("./logger").ExperimentSummary;
42
+ }>;
@@ -0,0 +1,104 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.runEvaluator = exports.getLoadedEvals = exports.Eval = void 0;
13
+ globalThis._evals = {};
14
+ function Eval(name, evaluator) {
15
+ if (_evals[name]) {
16
+ throw new Error(`Evaluator ${name} already exists`);
17
+ }
18
+ _evals[name] = Object.assign({ name }, evaluator);
19
+ }
20
+ exports.Eval = Eval;
21
+ function getLoadedEvals() {
22
+ return _evals;
23
+ }
24
+ exports.getLoadedEvals = getLoadedEvals;
25
+ function runEvaluator(experiment, evaluator) {
26
+ return __awaiter(this, void 0, void 0, function* () {
27
+ console.log(`Running evaluator ${evaluator.name}`);
28
+ if (typeof evaluator.data === "string") {
29
+ throw new Error("Unimplemented: string data paths");
30
+ }
31
+ const dataResult = evaluator.data();
32
+ let data = null;
33
+ if (dataResult instanceof Promise) {
34
+ data = yield dataResult;
35
+ }
36
+ else {
37
+ data = dataResult;
38
+ }
39
+ const evals = data.map((datum) => __awaiter(this, void 0, void 0, function* () {
40
+ let metadata = {};
41
+ let output = undefined;
42
+ let error = undefined;
43
+ let scores = {};
44
+ try {
45
+ const meta = (o) => (metadata = Object.assign(Object.assign({}, metadata), o));
46
+ const outputResult = evaluator.task(datum.input, {
47
+ meta,
48
+ });
49
+ if (outputResult instanceof Promise) {
50
+ output = yield outputResult;
51
+ }
52
+ else {
53
+ output = outputResult;
54
+ }
55
+ const scoringArgs = Object.assign(Object.assign({}, datum), { output });
56
+ const scoreResults = yield Promise.all(evaluator.scores.map((score) => __awaiter(this, void 0, void 0, function* () {
57
+ const scoreResult = score(scoringArgs);
58
+ if (scoreResult instanceof Promise) {
59
+ return yield scoreResult;
60
+ }
61
+ else {
62
+ return scoreResult;
63
+ }
64
+ })));
65
+ const scoreMetadata = {};
66
+ for (const scoreResult of scoreResults) {
67
+ scores[scoreResult.name] = scoreResult.score;
68
+ const metadata = Object.assign(Object.assign({}, scoreResult.metadata), { error: scoreResult.error });
69
+ if (Object.keys(metadata).length > 0) {
70
+ scoreMetadata[scoreResult.name] = metadata;
71
+ }
72
+ }
73
+ if (Object.keys(scoreMetadata).length > 0) {
74
+ meta({ scores: scoreMetadata });
75
+ }
76
+ }
77
+ catch (e) {
78
+ error = e;
79
+ }
80
+ experiment.log({
81
+ // TODO We should rename this from inputs -> input in the logger, etc.
82
+ // https://github.com/braintrustdata/braintrust/issues/217
83
+ inputs: datum.input,
84
+ metadata: datum.metadata,
85
+ expected: datum.expected,
86
+ output,
87
+ scores,
88
+ });
89
+ return {
90
+ output,
91
+ metadata,
92
+ scores,
93
+ error,
94
+ };
95
+ }));
96
+ const results = yield Promise.all(evals);
97
+ const summary = yield experiment.summarize();
98
+ return {
99
+ results,
100
+ summary,
101
+ };
102
+ });
103
+ }
104
+ exports.runEvaluator = runEvaluator;
package/dist/gitutil.js CHANGED
@@ -38,15 +38,22 @@ function getBaseBranch(remote = undefined) {
38
38
  // fail with a cryptic error message.
39
39
  throw new Error("No remote found");
40
40
  }
41
- const remoteInfo = yield git.remote(["show", remoteName]);
42
- if (!remoteInfo) {
43
- throw new Error(`Could not find remote ${remoteName}`);
41
+ let branch = null;
42
+ try {
43
+ const remoteInfo = yield git.remote(["show", remoteName]);
44
+ if (!remoteInfo) {
45
+ throw new Error(`Could not find remote ${remoteName}`);
46
+ }
47
+ const match = remoteInfo.match(/\s*HEAD branch:\s*(.*)$/m);
48
+ if (!match) {
49
+ throw new Error(`Could not find HEAD branch in remote ${remoteName}`);
50
+ }
51
+ branch = match[1];
44
52
  }
45
- const match = remoteInfo.match(/\s*HEAD branch:\s*(.*)$/m);
46
- if (!match) {
47
- throw new Error(`Could not find HEAD branch in remote ${remoteName}`);
53
+ catch (_b) {
54
+ branch = "main";
48
55
  }
49
- _baseBranch = { remote: remoteName, branch: match[1] };
56
+ _baseBranch = { remote: remoteName, branch };
50
57
  }
51
58
  return _baseBranch;
52
59
  });