@crewhaus/eval-dataset 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -11
- package/src/loaders/csv.ts +5 -8
- package/src/loaders/http.ts +2 -4
- package/src/loaders/loaders.test.ts +290 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@crewhaus/eval-dataset",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Eval dataset loaders (JSONL, CSV, YAML, HTTP) with a lazy iterator API",
|
|
6
6
|
"main": "src/index.ts",
|
|
@@ -12,15 +12,15 @@
|
|
|
12
12
|
"test": "bun test src"
|
|
13
13
|
},
|
|
14
14
|
"dependencies": {
|
|
15
|
-
"@crewhaus/errors": "0.1.
|
|
15
|
+
"@crewhaus/errors": "0.1.2",
|
|
16
16
|
"yaml": "^2.6.0",
|
|
17
17
|
"zod": "^3.23.8"
|
|
18
18
|
},
|
|
19
19
|
"license": "Apache-2.0",
|
|
20
20
|
"author": {
|
|
21
21
|
"name": "Max Meier",
|
|
22
|
-
"email": "max@
|
|
23
|
-
"url": "https://
|
|
22
|
+
"email": "max@crewhaus.ai",
|
|
23
|
+
"url": "https://crewhaus.ai"
|
|
24
24
|
},
|
|
25
25
|
"repository": {
|
|
26
26
|
"type": "git",
|
|
@@ -32,12 +32,7 @@
|
|
|
32
32
|
"url": "https://github.com/crewhaus/factory/issues"
|
|
33
33
|
},
|
|
34
34
|
"publishConfig": {
|
|
35
|
-
"access": "
|
|
35
|
+
"access": "public"
|
|
36
36
|
},
|
|
37
|
-
"files": [
|
|
38
|
-
"src",
|
|
39
|
-
"README.md",
|
|
40
|
-
"LICENSE",
|
|
41
|
-
"NOTICE"
|
|
42
|
-
]
|
|
37
|
+
"files": ["src", "README.md", "LICENSE", "NOTICE"]
|
|
43
38
|
}
|
package/src/loaders/csv.ts
CHANGED
|
@@ -8,18 +8,15 @@ export async function loadCsv(path: string): Promise<LoadedDataset> {
|
|
|
8
8
|
throw new DatasetLoadError(`file not found: ${path}`);
|
|
9
9
|
}
|
|
10
10
|
const text = await file.text();
|
|
11
|
-
const
|
|
12
|
-
|
|
13
|
-
return { name: basename(path).replace(/\.csv$/i, ""), samples: emptyIterable() };
|
|
14
|
-
}
|
|
15
|
-
const header = rows[0];
|
|
11
|
+
const [header, ...dataRows] = parseCsv(text);
|
|
12
|
+
const name = basename(path).replace(/\.csv$/i, "");
|
|
16
13
|
if (!header) {
|
|
17
|
-
return { name
|
|
14
|
+
return { name, samples: emptyIterable() };
|
|
18
15
|
}
|
|
19
16
|
|
|
20
17
|
return {
|
|
21
|
-
name
|
|
22
|
-
samples: rowsToSamples(
|
|
18
|
+
name,
|
|
19
|
+
samples: rowsToSamples(dataRows, header, path),
|
|
23
20
|
};
|
|
24
21
|
}
|
|
25
22
|
|
package/src/loaders/http.ts
CHANGED
|
@@ -76,12 +76,10 @@ async function* parseJsonlText(text: string, source: string): AsyncIterable<Samp
|
|
|
76
76
|
}
|
|
77
77
|
|
|
78
78
|
async function* parseCsvText(text: string, source: string): AsyncIterable<Sample> {
|
|
79
|
-
const
|
|
80
|
-
if (rows.length === 0) return;
|
|
81
|
-
const header = rows[0];
|
|
79
|
+
const [header, ...dataRows] = parseCsv(text);
|
|
82
80
|
if (!header) return;
|
|
83
81
|
let rowNo = 1;
|
|
84
|
-
for (const row of
|
|
82
|
+
for (const row of dataRows) {
|
|
85
83
|
rowNo += 1;
|
|
86
84
|
if (row.length === 1 && row[0] === "") continue;
|
|
87
85
|
const obj: Record<string, string | string[]> = {};
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
import { afterEach, describe, expect, spyOn, test } from "bun:test";
|
|
2
|
+
import { tmpdir } from "node:os";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { DatasetLoadError } from "../errors";
|
|
5
|
+
import { loadCsv } from "./csv";
|
|
6
|
+
import { loadHttp } from "./http";
|
|
7
|
+
import { loadYaml } from "./yaml";
|
|
8
|
+
|
|
9
|
+
async function collect<T>(iter: AsyncIterable<T>): Promise<T[]> {
|
|
10
|
+
const out: T[] = [];
|
|
11
|
+
for await (const item of iter) out.push(item);
|
|
12
|
+
return out;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
let tmpCounter = 0;
|
|
16
|
+
async function withTmp(name: string, contents: string): Promise<string> {
|
|
17
|
+
const path = join(tmpdir(), `eval-dataset-${process.pid}-${tmpCounter++}-${name}`);
|
|
18
|
+
await Bun.write(path, contents);
|
|
19
|
+
return path;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/** Build a minimal Response stub the http loader understands. */
|
|
23
|
+
function httpResponse(
|
|
24
|
+
body: string,
|
|
25
|
+
init: { status?: number; ok?: boolean; contentType?: string } = {},
|
|
26
|
+
): Response {
|
|
27
|
+
const status = init.status ?? 200;
|
|
28
|
+
const headers = new Headers();
|
|
29
|
+
if (init.contentType !== undefined) headers.set("content-type", init.contentType);
|
|
30
|
+
return {
|
|
31
|
+
ok: init.ok ?? (status >= 200 && status < 300),
|
|
32
|
+
status,
|
|
33
|
+
headers,
|
|
34
|
+
text: async () => body,
|
|
35
|
+
} as unknown as Response;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
describe("loadCsv — file-level branches", () => {
|
|
39
|
+
test("throws DatasetLoadError when the file does not exist", async () => {
|
|
40
|
+
await expect(loadCsv(join(tmpdir(), "eval-dataset-absent.csv"))).rejects.toThrow(
|
|
41
|
+
DatasetLoadError,
|
|
42
|
+
);
|
|
43
|
+
await expect(loadCsv(join(tmpdir(), "eval-dataset-absent.csv"))).rejects.toThrow(
|
|
44
|
+
/file not found/,
|
|
45
|
+
);
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
test("empty CSV file yields zero samples (no header row)", async () => {
|
|
49
|
+
const path = await withTmp("empty.csv", "");
|
|
50
|
+
try {
|
|
51
|
+
const ds = await loadCsv(path);
|
|
52
|
+
expect(ds.name).toBe(`eval-dataset-${process.pid}-${tmpCounter - 1}-empty`);
|
|
53
|
+
expect(await collect(ds.samples)).toEqual([]);
|
|
54
|
+
} finally {
|
|
55
|
+
await Bun.file(path).delete();
|
|
56
|
+
}
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
test("skips blank in-body rows but parses real rows", async () => {
|
|
60
|
+
// A blank line between data rows parses to a single empty field `[""]`,
|
|
61
|
+
// which rowsToSamples must skip rather than validate.
|
|
62
|
+
const path = await withTmp("blank.csv", "id,input\nq1,hello\n\nq2,world\n");
|
|
63
|
+
try {
|
|
64
|
+
const samples = await collect((await loadCsv(path)).samples);
|
|
65
|
+
expect(samples).toEqual([
|
|
66
|
+
{ id: "q1", input: "hello" },
|
|
67
|
+
{ id: "q2", input: "world" },
|
|
68
|
+
]);
|
|
69
|
+
} finally {
|
|
70
|
+
await Bun.file(path).delete();
|
|
71
|
+
}
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
test("rejects a row that fails schema validation with the row number", async () => {
|
|
75
|
+
// Missing `id` (empty cell) — `id` is required and min-length 1.
|
|
76
|
+
const path = await withTmp("bad.csv", "id,input\n,orphan\n");
|
|
77
|
+
try {
|
|
78
|
+
const ds = await loadCsv(path);
|
|
79
|
+
await expect(collect(ds.samples)).rejects.toThrow(DatasetLoadError);
|
|
80
|
+
await expect(collect((await loadCsv(path)).samples)).rejects.toThrow(
|
|
81
|
+
/invalid sample on row 2/,
|
|
82
|
+
);
|
|
83
|
+
} finally {
|
|
84
|
+
await Bun.file(path).delete();
|
|
85
|
+
}
|
|
86
|
+
});
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
describe("loadYaml — file-level and validation branches", () => {
|
|
90
|
+
test("throws DatasetLoadError when the file does not exist", async () => {
|
|
91
|
+
await expect(loadYaml(join(tmpdir(), "eval-dataset-absent.yaml"))).rejects.toThrow(
|
|
92
|
+
/file not found/,
|
|
93
|
+
);
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
test("rejects malformed YAML", async () => {
|
|
97
|
+
// Unbalanced flow mapping is a parse error in the `yaml` library.
|
|
98
|
+
const path = await withTmp("broken.yaml", "name: x\nsamples: [a: 1, : :\n");
|
|
99
|
+
try {
|
|
100
|
+
await expect(loadYaml(path)).rejects.toThrow(/malformed YAML/);
|
|
101
|
+
} finally {
|
|
102
|
+
await Bun.file(path).delete();
|
|
103
|
+
}
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
test("rejects a non-array document that fails the Dataset schema", async () => {
|
|
107
|
+
// Object shape, but `samples` is missing → DatasetSchema fails.
|
|
108
|
+
const path = await withTmp("notdataset.yaml", "name: only-a-name\n");
|
|
109
|
+
try {
|
|
110
|
+
await expect(loadYaml(path)).rejects.toThrow(/invalid dataset/);
|
|
111
|
+
} finally {
|
|
112
|
+
await Bun.file(path).delete();
|
|
113
|
+
}
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
test("rejects an invalid sample inside a bare array with its index", async () => {
|
|
117
|
+
const path = await withTmp("badsample.yaml", "- id: ok\n input: fine\n- input: no-id\n");
|
|
118
|
+
try {
|
|
119
|
+
const ds = await loadYaml(path);
|
|
120
|
+
await expect(collect(ds.samples)).rejects.toThrow(/invalid sample at index 1/);
|
|
121
|
+
} finally {
|
|
122
|
+
await Bun.file(path).delete();
|
|
123
|
+
}
|
|
124
|
+
});
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
describe("loadHttp — dispatch and parsing", () => {
|
|
128
|
+
afterEach(() => {
|
|
129
|
+
// Restore any fetch spy installed inside a test.
|
|
130
|
+
(globalThis.fetch as unknown as { mockRestore?: () => void }).mockRestore?.();
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
test("throws on non-OK responses", async () => {
|
|
134
|
+
spyOn(globalThis, "fetch").mockResolvedValue(httpResponse("nope", { status: 404, ok: false }));
|
|
135
|
+
await expect(loadHttp("https://example.test/data.jsonl")).rejects.toThrow(/HTTP 404/);
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
test("parses JSONL by extension", async () => {
|
|
139
|
+
spyOn(globalThis, "fetch").mockResolvedValue(
|
|
140
|
+
httpResponse('{"id":"q1","input":"hi"}\n\n{"id":"q2","input":"yo"}\n'),
|
|
141
|
+
);
|
|
142
|
+
const ds = await loadHttp("https://example.test/path/remote.jsonl");
|
|
143
|
+
expect(ds.name).toBe("remote");
|
|
144
|
+
expect(await collect(ds.samples)).toEqual([
|
|
145
|
+
{ id: "q1", input: "hi" },
|
|
146
|
+
{ id: "q2", input: "yo" },
|
|
147
|
+
]);
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
test("parses JSONL by content-type when extension is absent", async () => {
|
|
151
|
+
spyOn(globalThis, "fetch").mockResolvedValue(
|
|
152
|
+
httpResponse('{"id":"q1","input":"hi"}\n', {
|
|
153
|
+
contentType: "application/x-ndjson; charset=utf-8",
|
|
154
|
+
}),
|
|
155
|
+
);
|
|
156
|
+
const ds = await loadHttp("https://example.test/download");
|
|
157
|
+
expect(await collect(ds.samples)).toEqual([{ id: "q1", input: "hi" }]);
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
test("rejects malformed JSON over HTTP with the line number", async () => {
|
|
161
|
+
spyOn(globalThis, "fetch").mockResolvedValue(httpResponse('{"id":"q1","input":"ok"}\nnope\n'));
|
|
162
|
+
const ds = await loadHttp("https://example.test/data.jsonl");
|
|
163
|
+
await expect(collect(ds.samples)).rejects.toThrow(/malformed JSON on line 2/);
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
test("rejects an invalid JSONL sample over HTTP", async () => {
|
|
167
|
+
spyOn(globalThis, "fetch").mockResolvedValue(httpResponse('{"input":"no id"}\n'));
|
|
168
|
+
const ds = await loadHttp("https://example.test/data.jsonl");
|
|
169
|
+
await expect(collect(ds.samples)).rejects.toThrow(/invalid sample on line 1/);
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
test("parses CSV by content-type", async () => {
|
|
173
|
+
spyOn(globalThis, "fetch").mockResolvedValue(
|
|
174
|
+
httpResponse('id,input,expected_tools\nq1,hi,\nq2,yo,"bash, read"\n', {
|
|
175
|
+
contentType: "text/csv",
|
|
176
|
+
}),
|
|
177
|
+
);
|
|
178
|
+
const ds = await loadHttp("https://example.test/data");
|
|
179
|
+
const samples = await collect(ds.samples);
|
|
180
|
+
expect(samples[0]).toEqual({ id: "q1", input: "hi" });
|
|
181
|
+
expect(samples[1]).toEqual({ id: "q2", input: "yo", expected_tools: ["bash", "read"] });
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
test("CSV with only a header (no data) yields zero samples", async () => {
|
|
185
|
+
spyOn(globalThis, "fetch").mockResolvedValue(
|
|
186
|
+
httpResponse("id,input\n", { contentType: "text/csv" }),
|
|
187
|
+
);
|
|
188
|
+
const ds = await loadHttp("https://example.test/data.csv");
|
|
189
|
+
expect(await collect(ds.samples)).toEqual([]);
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
test("empty CSV body yields zero samples (no header)", async () => {
|
|
193
|
+
spyOn(globalThis, "fetch").mockResolvedValue(httpResponse("", { contentType: "text/csv" }));
|
|
194
|
+
const ds = await loadHttp("https://example.test/data.csv");
|
|
195
|
+
expect(await collect(ds.samples)).toEqual([]);
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
test("skips blank in-body CSV rows over HTTP", async () => {
|
|
199
|
+
spyOn(globalThis, "fetch").mockResolvedValue(
|
|
200
|
+
httpResponse("id,input\nq1,a\n\nq2,b\n", { contentType: "text/csv" }),
|
|
201
|
+
);
|
|
202
|
+
const ds = await loadHttp("https://example.test/data.csv");
|
|
203
|
+
expect(await collect(ds.samples)).toEqual([
|
|
204
|
+
{ id: "q1", input: "a" },
|
|
205
|
+
{ id: "q2", input: "b" },
|
|
206
|
+
]);
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
test("rejects an invalid CSV sample over HTTP with the row number", async () => {
|
|
210
|
+
spyOn(globalThis, "fetch").mockResolvedValue(
|
|
211
|
+
httpResponse("id,input\n,missing-id\n", { contentType: "text/csv" }),
|
|
212
|
+
);
|
|
213
|
+
const ds = await loadHttp("https://example.test/data.csv");
|
|
214
|
+
await expect(collect(ds.samples)).rejects.toThrow(/invalid sample on row 2/);
|
|
215
|
+
});
|
|
216
|
+
|
|
217
|
+
test("parses YAML Dataset wrapper by extension", async () => {
|
|
218
|
+
spyOn(globalThis, "fetch").mockResolvedValue(
|
|
219
|
+
httpResponse("name: remote-yaml\nsamples:\n - id: q1\n input: hi\n"),
|
|
220
|
+
);
|
|
221
|
+
const ds = await loadHttp("https://example.test/path/data.yaml");
|
|
222
|
+
expect(ds.name).toBe("remote-yaml");
|
|
223
|
+
expect(await collect(ds.samples)).toEqual([{ id: "q1", input: "hi" }]);
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
test("parses YAML bare array by content-type and derives name", async () => {
|
|
227
|
+
spyOn(globalThis, "fetch").mockResolvedValue(
|
|
228
|
+
httpResponse("- id: q1\n input: hi\n", { contentType: "application/yaml" }),
|
|
229
|
+
);
|
|
230
|
+
const ds = await loadHttp("https://example.test/path/bare.yml?ref=main");
|
|
231
|
+
expect(ds.name).toBe("bare");
|
|
232
|
+
expect(await collect(ds.samples)).toEqual([{ id: "q1", input: "hi" }]);
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
test("rejects malformed YAML over HTTP", async () => {
|
|
236
|
+
spyOn(globalThis, "fetch").mockResolvedValue(
|
|
237
|
+
httpResponse("name: x\nsamples: [a: 1, : :\n", { contentType: "text/yaml" }),
|
|
238
|
+
);
|
|
239
|
+
await expect(loadHttp("https://example.test/data.yaml")).rejects.toThrow(/malformed YAML/);
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
test("rejects a non-array YAML document that fails the Dataset schema", async () => {
|
|
243
|
+
spyOn(globalThis, "fetch").mockResolvedValue(
|
|
244
|
+
httpResponse("name: only-a-name\n", { contentType: "application/yaml" }),
|
|
245
|
+
);
|
|
246
|
+
await expect(loadHttp("https://example.test/data.yaml")).rejects.toThrow(/invalid dataset/);
|
|
247
|
+
});
|
|
248
|
+
|
|
249
|
+
test("rejects an invalid sample inside an HTTP YAML payload", async () => {
|
|
250
|
+
spyOn(globalThis, "fetch").mockResolvedValue(
|
|
251
|
+
httpResponse("- id: ok\n input: fine\n- input: no-id\n", { contentType: "text/yaml" }),
|
|
252
|
+
);
|
|
253
|
+
const ds = await loadHttp("https://example.test/data.yaml");
|
|
254
|
+
await expect(collect(ds.samples)).rejects.toThrow(/invalid sample at index 1/);
|
|
255
|
+
});
|
|
256
|
+
|
|
257
|
+
test("rejects an unrecognized HTTP dataset format", async () => {
|
|
258
|
+
spyOn(globalThis, "fetch").mockResolvedValue(
|
|
259
|
+
httpResponse("plain", { contentType: "text/plain" }),
|
|
260
|
+
);
|
|
261
|
+
await expect(loadHttp("https://example.test/data")).rejects.toThrow(
|
|
262
|
+
/unrecognized HTTP dataset format/,
|
|
263
|
+
);
|
|
264
|
+
});
|
|
265
|
+
|
|
266
|
+
test("reports content-type 'unknown' when the header is absent", async () => {
|
|
267
|
+
spyOn(globalThis, "fetch").mockResolvedValue(httpResponse("plain"));
|
|
268
|
+
await expect(loadHttp("https://example.test/data")).rejects.toThrow(/content-type: unknown/);
|
|
269
|
+
});
|
|
270
|
+
|
|
271
|
+
test("deriveName falls back to 'remote-dataset' for a path with no segments", async () => {
|
|
272
|
+
spyOn(globalThis, "fetch").mockResolvedValue(
|
|
273
|
+
httpResponse('{"id":"q1","input":"hi"}\n', { contentType: "application/x-jsonlines" }),
|
|
274
|
+
);
|
|
275
|
+
// Root path: no trailing segment to derive a name from.
|
|
276
|
+
const ds = await loadHttp("https://example.test/");
|
|
277
|
+
expect(ds.name).toBe("remote-dataset");
|
|
278
|
+
});
|
|
279
|
+
|
|
280
|
+
test("deriveName catch-branch returns 'remote-dataset' when URL parsing throws", async () => {
|
|
281
|
+
// fetch is stubbed, so an unparseable URL string still reaches deriveName,
|
|
282
|
+
// where `new URL(...)` throws and the catch returns the fallback name.
|
|
283
|
+
spyOn(globalThis, "fetch").mockResolvedValue(
|
|
284
|
+
httpResponse('{"id":"q1","input":"hi"}\n', { contentType: "application/x-ndjson" }),
|
|
285
|
+
);
|
|
286
|
+
const ds = await loadHttp("not a parseable url");
|
|
287
|
+
expect(ds.name).toBe("remote-dataset");
|
|
288
|
+
expect(await collect(ds.samples)).toEqual([{ id: "q1", input: "hi" }]);
|
|
289
|
+
});
|
|
290
|
+
});
|