@cephalization/phoenix-insight 0.3.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/README.md +195 -1
  2. package/dist/agent/index.js +9 -4
  3. package/dist/cli.js +172 -0
  4. package/dist/commands/index.js +1 -0
  5. package/dist/commands/report-tool.js +239 -0
  6. package/dist/config/schema.js +2 -2
  7. package/dist/modes/local.js +7 -0
  8. package/dist/modes/sandbox.js +8 -0
  9. package/dist/prompts/index.js +1 -1
  10. package/dist/prompts/system.js +10 -3
  11. package/dist/server/session.js +357 -0
  12. package/dist/server/ui.js +232 -0
  13. package/dist/server/websocket.js +212 -0
  14. package/dist/snapshot/spans.js +28 -4
  15. package/dist/tsconfig.esm.tsbuildinfo +1 -1
  16. package/dist/ui/assets/code-block-F6WJLWQG-BTdTzfvl.js +154 -0
  17. package/dist/ui/assets/code-block-F6WJLWQG-BTdTzfvl.js.map +1 -0
  18. package/dist/ui/assets/index-CX8aDatf.css +1 -0
  19. package/dist/ui/assets/index-DjZuAW6Y.js +63 -0
  20. package/dist/ui/assets/index-DjZuAW6Y.js.map +1 -0
  21. package/dist/ui/assets/vendor-data-r1ZEkUds.js +40 -0
  22. package/dist/ui/assets/vendor-data-r1ZEkUds.js.map +1 -0
  23. package/dist/ui/assets/vendor-react-Cgg2GOmP.js +2 -0
  24. package/dist/ui/assets/vendor-react-Cgg2GOmP.js.map +1 -0
  25. package/dist/ui/assets/vendor-render-DoMl5bum.js +381 -0
  26. package/dist/ui/assets/vendor-render-DoMl5bum.js.map +1 -0
  27. package/dist/ui/assets/vendor-ui-Cg-YC4hK.js +46 -0
  28. package/dist/ui/assets/vendor-ui-Cg-YC4hK.js.map +1 -0
  29. package/dist/ui/index.html +18 -0
  30. package/dist/ui/vite.svg +1 -0
  31. package/package.json +13 -14
  32. package/src/agent/index.ts +0 -323
  33. package/src/cli.ts +0 -854
  34. package/src/commands/index.ts +0 -8
  35. package/src/commands/px-fetch-more-spans.ts +0 -174
  36. package/src/commands/px-fetch-more-trace.ts +0 -183
  37. package/src/config/index.ts +0 -225
  38. package/src/config/loader.ts +0 -173
  39. package/src/config/schema.ts +0 -66
  40. package/src/index.ts +0 -1
  41. package/src/modes/index.ts +0 -21
  42. package/src/modes/local.ts +0 -163
  43. package/src/modes/sandbox.ts +0 -144
  44. package/src/modes/types.ts +0 -31
  45. package/src/observability/index.ts +0 -90
  46. package/src/progress.ts +0 -239
  47. package/src/prompts/index.ts +0 -1
  48. package/src/prompts/system.ts +0 -31
  49. package/src/snapshot/client.ts +0 -129
  50. package/src/snapshot/context.ts +0 -587
  51. package/src/snapshot/datasets.ts +0 -132
  52. package/src/snapshot/experiments.ts +0 -246
  53. package/src/snapshot/index.ts +0 -403
  54. package/src/snapshot/projects.ts +0 -58
  55. package/src/snapshot/prompts.ts +0 -267
  56. package/src/snapshot/spans.ts +0 -142
  57. package/src/snapshot/utils.ts +0 -140
@@ -1,587 +0,0 @@
1
- import type { ExecutionMode } from "../modes/types.js";
2
-
3
- interface ContextMetadata {
4
- phoenixUrl: string;
5
- snapshotTime: Date;
6
- spansPerProject?: number;
7
- }
8
-
9
- interface ProjectStats {
10
- name: string;
11
- spanCount: number;
12
- hasErrors?: boolean;
13
- recentSpans?: number;
14
- }
15
-
16
- interface DatasetInfo {
17
- name: string;
18
- exampleCount: number;
19
- updatedAt?: string;
20
- }
21
-
22
- interface ExperimentInfo {
23
- id: string;
24
- datasetName: string;
25
- projectName?: string;
26
- status: "completed" | "in_progress" | "failed";
27
- runCounts: {
28
- successful: number;
29
- failed: number;
30
- missing: number;
31
- };
32
- updatedAt?: string;
33
- }
34
-
35
- interface PromptInfo {
36
- name: string;
37
- versionCount: number;
38
- latestVersion?: string;
39
- updatedAt?: string;
40
- }
41
-
42
- // =============================================================================
43
- // Static Section Templates
44
- // =============================================================================
45
-
46
- /**
47
- * Quick Start section for external agents - appears at the top for discoverability
48
- */
49
- const QUICK_START_SECTION = `## Quick Start for External Agents
50
-
51
- This is a **read-only snapshot** of Phoenix observability data. You cannot modify this data.
52
-
53
- ### Key Files to Start With
54
-
55
- | File | Description |
56
- |------|-------------|
57
- | \`/phoenix/projects/index.jsonl\` | List of all projects with traces |
58
- | \`/phoenix/datasets/index.jsonl\` | List of all datasets |
59
- | \`/phoenix/experiments/index.jsonl\` | List of all experiments |
60
- | \`/phoenix/prompts/index.jsonl\` | List of all prompts |
61
-
62
- ### How to Parse Each File Format
63
-
64
- **JSONL files** (\`.jsonl\`): One JSON object per line
65
- \`\`\`bash
66
- # Read all lines as a JSON array
67
- cat /phoenix/projects/index.jsonl | jq -s '.'
68
-
69
- # Process each line individually
70
- while read -r line; do echo "$line" | jq '.name'; done < /phoenix/projects/index.jsonl
71
-
72
- # Get first N items
73
- head -n 5 /phoenix/projects/index.jsonl | jq -s '.'
74
- \`\`\`
75
-
76
- **JSON files** (\`.json\`): Standard JSON format
77
- \`\`\`bash
78
- # Read and pretty-print
79
- cat /phoenix/projects/my-project/metadata.json | jq '.'
80
-
81
- # Extract specific field
82
- cat /phoenix/projects/my-project/metadata.json | jq '.name'
83
- \`\`\`
84
-
85
- **Markdown files** (\`.md\`): Plain text prompt templates
86
- \`\`\`bash
87
- # Read prompt template
88
- cat /phoenix/prompts/my-prompt/versions/v1.md
89
- \`\`\`
90
-
91
- ### Common Operations
92
-
93
- \`\`\`bash
94
- # List all project names
95
- cat /phoenix/projects/index.jsonl | jq -r '.name'
96
-
97
- # Count spans in a project
98
- wc -l < /phoenix/projects/my-project/spans/index.jsonl
99
-
100
- # Find spans with errors
101
- cat /phoenix/projects/my-project/spans/index.jsonl | jq 'select(.status_code == "ERROR")'
102
-
103
- # Get dataset examples
104
- cat /phoenix/datasets/my-dataset/examples.jsonl | jq -s '.' | head -n 100
105
-
106
- # Search across all files
107
- grep -r "error" /phoenix/
108
- \`\`\``;
109
-
110
- /**
111
- * Directory Structure section showing the snapshot layout
112
- */
113
- const DIRECTORY_STRUCTURE_SECTION = `## Directory Structure
114
-
115
- \`\`\`
116
- /phoenix/
117
- _context.md # This file - start here!
118
- /projects/
119
- index.jsonl # List of all projects
120
- /{project_name}/
121
- metadata.json # Project details
122
- /spans/
123
- index.jsonl # Span data (may be sampled)
124
- metadata.json # Span snapshot metadata
125
- /datasets/
126
- index.jsonl # List of all datasets
127
- /{dataset_name}/
128
- metadata.json # Dataset details
129
- examples.jsonl # Dataset examples
130
- /experiments/
131
- index.jsonl # List of all experiments
132
- /{experiment_id}/
133
- metadata.json # Experiment details
134
- runs.jsonl # Experiment runs
135
- /prompts/
136
- index.jsonl # List of all prompts
137
- /{prompt_name}/
138
- metadata.json # Prompt details
139
- /versions/
140
- index.jsonl # Version list
141
- /{version_id}.md # Version template
142
- /_meta/
143
- snapshot.json # Snapshot metadata
144
- \`\`\``;
145
-
146
- /**
147
- * What You Can Do section describing available operations
148
- */
149
- const WHAT_YOU_CAN_DO_SECTION = `## What You Can Do
150
-
151
- - **Explore**: ls, cat, grep, find, jq, awk, sed
152
- - **Fetch more data**: \`px-fetch-more spans --project <name> --limit 500\`
153
- - **Fetch specific trace**: \`px-fetch-more trace --trace-id <id>\``;
154
-
155
- /**
156
- * Data Freshness section with refresh instructions
157
- */
158
- const DATA_FRESHNESS_SECTION = `## Data Freshness
159
-
160
- This is a **read-only snapshot**. Data may have changed since capture.
161
- Run with \`--refresh\` to get latest data.`;
162
-
163
- // =============================================================================
164
- // Main Context Generation
165
- // =============================================================================
166
-
167
- /**
168
- * Generates a _context.md summary file for the Phoenix snapshot
169
- * This provides human and agent-readable context about what data is available
170
- */
171
- export async function generateContext(
172
- mode: ExecutionMode,
173
- metadata: ContextMetadata
174
- ): Promise<void> {
175
- // Collect stats from the snapshot
176
- const stats = await collectSnapshotStats(mode);
177
-
178
- // Build the dynamic "What's Here" section
179
- const whatsHereSection = buildWhatsHereSection(stats, metadata);
180
-
181
- // Build the dynamic "Recent Activity" section (may be empty)
182
- const recentActivitySection = buildRecentActivitySection(stats);
183
-
184
- // Compose the full context document
185
- const content = [
186
- "# Phoenix Snapshot Context",
187
- "",
188
- QUICK_START_SECTION,
189
- "",
190
- whatsHereSection,
191
- recentActivitySection,
192
- DIRECTORY_STRUCTURE_SECTION,
193
- "",
194
- WHAT_YOU_CAN_DO_SECTION,
195
- "",
196
- DATA_FRESHNESS_SECTION,
197
- ].join("\n");
198
-
199
- // Write the context file
200
- await mode.writeFile("/phoenix/_context.md", content);
201
- }
202
-
203
- // =============================================================================
204
- // Dynamic Section Builders
205
- // =============================================================================
206
-
207
- /**
208
- * Builds the "What's Here" section with project/dataset/experiment/prompt summaries
209
- */
210
- function buildWhatsHereSection(
211
- stats: {
212
- projects: ProjectStats[];
213
- datasets: DatasetInfo[];
214
- experiments: ExperimentInfo[];
215
- prompts: PromptInfo[];
216
- },
217
- metadata: ContextMetadata
218
- ): string {
219
- const lines: string[] = [];
220
-
221
- lines.push("## What's Here");
222
- lines.push("");
223
-
224
- // Projects summary
225
- if (stats.projects.length > 0) {
226
- const projectSummary = stats.projects
227
- .map((p) => `${p.name} (${p.spanCount} spans)`)
228
- .join(", ");
229
- lines.push(`- **${stats.projects.length} projects**: ${projectSummary}`);
230
- } else {
231
- lines.push("- **No projects found**");
232
- }
233
-
234
- // Datasets summary
235
- if (stats.datasets.length > 0) {
236
- const datasetNames = stats.datasets.map((d) => d.name).join(", ");
237
- lines.push(`- **${stats.datasets.length} datasets**: ${datasetNames}`);
238
- } else {
239
- lines.push("- **No datasets found**");
240
- }
241
-
242
- // Experiments summary
243
- if (stats.experiments.length > 0) {
244
- const completedCount = stats.experiments.filter(
245
- (e) => e.status === "completed"
246
- ).length;
247
- const inProgressCount = stats.experiments.filter(
248
- (e) => e.status === "in_progress"
249
- ).length;
250
- const failedCount = stats.experiments.filter(
251
- (e) => e.status === "failed"
252
- ).length;
253
-
254
- const parts: string[] = [];
255
- if (completedCount > 0) parts.push(`${completedCount} completed`);
256
- if (inProgressCount > 0) parts.push(`${inProgressCount} in progress`);
257
- if (failedCount > 0) parts.push(`${failedCount} failed`);
258
-
259
- lines.push(
260
- `- **${stats.experiments.length} experiments**: ${parts.join(", ")}`
261
- );
262
- } else {
263
- lines.push("- **No experiments found**");
264
- }
265
-
266
- // Prompts summary
267
- if (stats.prompts.length > 0) {
268
- const promptNames = stats.prompts.map((p) => p.name).join(", ");
269
- lines.push(`- **${stats.prompts.length} prompts**: ${promptNames}`);
270
- } else {
271
- lines.push("- **No prompts found**");
272
- }
273
-
274
- // Snapshot metadata
275
- lines.push(
276
- `- **Snapshot**: Created ${formatRelativeTime(metadata.snapshotTime)} from ${metadata.phoenixUrl}`
277
- );
278
- lines.push("");
279
-
280
- return lines.join("\n");
281
- }
282
-
283
- /**
284
- * Builds the "Recent Activity" section if there are recent updates
285
- * Returns an empty string if no recent activity
286
- */
287
- function buildRecentActivitySection(stats: {
288
- projects: ProjectStats[];
289
- datasets: DatasetInfo[];
290
- experiments: ExperimentInfo[];
291
- prompts: PromptInfo[];
292
- }): string {
293
- const activities = getRecentActivity(stats);
294
-
295
- if (activities.length === 0) {
296
- return "";
297
- }
298
-
299
- const lines: string[] = [];
300
- lines.push("## Recent Activity");
301
- lines.push("");
302
- for (const activity of activities) {
303
- lines.push(`- ${activity}`);
304
- }
305
- lines.push("");
306
-
307
- return lines.join("\n");
308
- }
309
-
310
- // =============================================================================
311
- // Data Collection
312
- // =============================================================================
313
-
314
- /**
315
- * Collects statistics from the snapshot filesystem
316
- */
317
- async function collectSnapshotStats(mode: ExecutionMode): Promise<{
318
- projects: ProjectStats[];
319
- datasets: DatasetInfo[];
320
- experiments: ExperimentInfo[];
321
- prompts: PromptInfo[];
322
- }> {
323
- const result = {
324
- projects: [] as ProjectStats[],
325
- datasets: [] as DatasetInfo[],
326
- experiments: [] as ExperimentInfo[],
327
- prompts: [] as PromptInfo[],
328
- };
329
-
330
- // Collect project stats
331
- try {
332
- const projectsExec = await mode.exec(
333
- "cat /phoenix/projects/index.jsonl 2>/dev/null || true"
334
- );
335
- if (projectsExec.stdout) {
336
- const projectLines = projectsExec.stdout
337
- .trim()
338
- .split("\n")
339
- .filter((line) => line.length > 0);
340
-
341
- for (const line of projectLines) {
342
- try {
343
- const project = JSON.parse(line);
344
- const stats: ProjectStats = {
345
- name: project.name,
346
- spanCount: 0,
347
- };
348
-
349
- // Get span count for this project
350
- const spansMetaExec = await mode.exec(
351
- `cat /phoenix/projects/${project.name}/spans/metadata.json 2>/dev/null || echo "{}"`
352
- );
353
- if (spansMetaExec.stdout) {
354
- try {
355
- const spansMeta = JSON.parse(spansMetaExec.stdout);
356
- stats.spanCount = spansMeta.spanCount || 0;
357
- } catch (e) {
358
- // Ignore parse errors
359
- }
360
- }
361
-
362
- result.projects.push(stats);
363
- } catch (e) {
364
- // Skip invalid project lines
365
- }
366
- }
367
- }
368
- } catch (e) {
369
- // No projects file
370
- }
371
-
372
- // Collect dataset stats
373
- try {
374
- const datasetsExec = await mode.exec(
375
- "cat /phoenix/datasets/index.jsonl 2>/dev/null || true"
376
- );
377
- if (datasetsExec.stdout) {
378
- const datasetLines = datasetsExec.stdout
379
- .trim()
380
- .split("\n")
381
- .filter((line) => line.length > 0);
382
-
383
- for (const line of datasetLines) {
384
- try {
385
- const dataset = JSON.parse(line);
386
-
387
- // Get example count
388
- const examplesExec = await mode.exec(
389
- `wc -l < /phoenix/datasets/${dataset.name}/examples.jsonl 2>/dev/null || echo "0"`
390
- );
391
- const exampleCount = parseInt(examplesExec.stdout.trim()) || 0;
392
-
393
- result.datasets.push({
394
- name: dataset.name,
395
- exampleCount,
396
- updatedAt: dataset.updated_at,
397
- });
398
- } catch (e) {
399
- // Skip invalid dataset lines
400
- }
401
- }
402
- }
403
- } catch (e) {
404
- // No datasets file
405
- }
406
-
407
- // Collect experiment stats
408
- try {
409
- const experimentsExec = await mode.exec(
410
- "cat /phoenix/experiments/index.jsonl 2>/dev/null || true"
411
- );
412
- if (experimentsExec.stdout) {
413
- const experimentLines = experimentsExec.stdout
414
- .trim()
415
- .split("\n")
416
- .filter((line) => line.length > 0);
417
-
418
- for (const line of experimentLines) {
419
- try {
420
- const experiment = JSON.parse(line);
421
- const status = determineExperimentStatus(experiment);
422
-
423
- result.experiments.push({
424
- id: experiment.id,
425
- datasetName: experiment.datasetName || "unknown",
426
- projectName: experiment.project_name,
427
- status,
428
- runCounts: {
429
- successful: experiment.successful_run_count || 0,
430
- failed: experiment.failed_run_count || 0,
431
- missing: experiment.missing_run_count || 0,
432
- },
433
- updatedAt: experiment.updated_at,
434
- });
435
- } catch (e) {
436
- // Skip invalid experiment lines
437
- }
438
- }
439
- }
440
- } catch (e) {
441
- // No experiments file
442
- }
443
-
444
- // Collect prompt stats
445
- try {
446
- const promptsExec = await mode.exec(
447
- "cat /phoenix/prompts/index.jsonl 2>/dev/null || true"
448
- );
449
- if (promptsExec.stdout) {
450
- const promptLines = promptsExec.stdout
451
- .trim()
452
- .split("\n")
453
- .filter((line) => line.length > 0);
454
-
455
- for (const line of promptLines) {
456
- try {
457
- const prompt = JSON.parse(line);
458
-
459
- // Count versions
460
- const versionsExec = await mode.exec(
461
- `wc -l < /phoenix/prompts/${prompt.name}/versions/index.jsonl 2>/dev/null || echo "0"`
462
- );
463
- const versionCount = parseInt(versionsExec.stdout.trim()) || 0;
464
-
465
- result.prompts.push({
466
- name: prompt.name,
467
- versionCount,
468
- updatedAt: prompt.updated_at,
469
- });
470
- } catch (e) {
471
- // Skip invalid prompt lines
472
- }
473
- }
474
- }
475
- } catch (e) {
476
- // No prompts file
477
- }
478
-
479
- return result;
480
- }
481
-
482
- // =============================================================================
483
- // Helper Functions
484
- // =============================================================================
485
-
486
- /**
487
- * Determines the status of an experiment based on its run counts
488
- */
489
- function determineExperimentStatus(
490
- experiment: any
491
- ): "completed" | "in_progress" | "failed" {
492
- const totalExpected = experiment.example_count * experiment.repetitions;
493
- const totalRuns =
494
- (experiment.successful_run_count || 0) + (experiment.failed_run_count || 0);
495
-
496
- if (totalRuns === 0) {
497
- return "in_progress";
498
- }
499
-
500
- // If most runs are failed, consider it failed
501
- if (
502
- (experiment.failed_run_count || 0) > (experiment.successful_run_count || 0)
503
- ) {
504
- return "failed";
505
- }
506
-
507
- if (totalRuns >= totalExpected) {
508
- return "completed";
509
- }
510
-
511
- return "in_progress";
512
- }
513
-
514
- /**
515
- * Gets recent activity highlights
516
- */
517
- function getRecentActivity(stats: {
518
- projects: ProjectStats[];
519
- datasets: DatasetInfo[];
520
- experiments: ExperimentInfo[];
521
- prompts: PromptInfo[];
522
- }): string[] {
523
- const activities: string[] = [];
524
-
525
- // Find recently updated experiments
526
- const recentExperiments = stats.experiments
527
- .filter((e) => e.updatedAt && isRecent(new Date(e.updatedAt), 24))
528
- .sort(
529
- (a, b) =>
530
- new Date(b.updatedAt!).getTime() - new Date(a.updatedAt!).getTime()
531
- );
532
-
533
- for (const exp of recentExperiments.slice(0, 2)) {
534
- const timeAgo = formatRelativeTime(new Date(exp.updatedAt!));
535
- activities.push(
536
- `${exp.projectName || exp.datasetName}: experiment "${exp.id.slice(0, 8)}..." ${exp.status} ${timeAgo}`
537
- );
538
- }
539
-
540
- // Find recently updated datasets
541
- const recentDatasets = stats.datasets
542
- .filter((d) => d.updatedAt && isRecent(new Date(d.updatedAt), 24))
543
- .sort(
544
- (a, b) =>
545
- new Date(b.updatedAt!).getTime() - new Date(a.updatedAt!).getTime()
546
- );
547
-
548
- for (const dataset of recentDatasets.slice(0, 2)) {
549
- const timeAgo = formatRelativeTime(new Date(dataset.updatedAt!));
550
- activities.push(
551
- `${dataset.name}: dataset updated ${timeAgo} (${dataset.exampleCount} examples)`
552
- );
553
- }
554
-
555
- return activities.slice(0, 3); // Limit to 3 activities
556
- }
557
-
558
- /**
559
- * Checks if a date is within the specified hours from now
560
- */
561
- function isRecent(date: Date, hoursAgo: number): boolean {
562
- const now = new Date();
563
- const diff = now.getTime() - date.getTime();
564
- return diff < hoursAgo * 60 * 60 * 1000;
565
- }
566
-
567
- /**
568
- * Formats a date as relative time (e.g., "2 hours ago")
569
- */
570
- function formatRelativeTime(date: Date): string {
571
- const now = new Date();
572
- const diff = now.getTime() - date.getTime();
573
-
574
- const minutes = Math.floor(diff / (1000 * 60));
575
- const hours = Math.floor(diff / (1000 * 60 * 60));
576
- const days = Math.floor(diff / (1000 * 60 * 60 * 24));
577
-
578
- if (minutes < 1) {
579
- return "just now";
580
- } else if (minutes < 60) {
581
- return `${minutes} minute${minutes !== 1 ? "s" : ""} ago`;
582
- } else if (hours < 24) {
583
- return `${hours} hour${hours !== 1 ? "s" : ""} ago`;
584
- } else {
585
- return `${days} day${days !== 1 ? "s" : ""} ago`;
586
- }
587
- }
@@ -1,132 +0,0 @@
1
- import type { PhoenixClient } from "@arizeai/phoenix-client";
2
- import type { ExecutionMode } from "../modes/types.js";
3
- import { withErrorHandling, extractData } from "./client.js";
4
-
5
- interface Dataset {
6
- id: string;
7
- name: string;
8
- description: string | null;
9
- metadata: Record<string, unknown>;
10
- created_at: string;
11
- updated_at: string;
12
- }
13
-
14
- interface DatasetExample {
15
- id: string;
16
- input: Record<string, unknown>;
17
- output: Record<string, unknown>;
18
- metadata: Record<string, unknown>;
19
- updated_at: string;
20
- }
21
-
22
- interface FetchDatasetsOptions {
23
- limit?: number;
24
- }
25
-
26
- /**
27
- * Converts an array to JSONL format
28
- */
29
- function toJSONL(items: unknown[]): string {
30
- if (items.length === 0) {
31
- return "";
32
- }
33
- return items.map((item) => JSON.stringify(item)).join("\n");
34
- }
35
-
36
- /**
37
- * Fetches all datasets and their examples from Phoenix
38
- */
39
- export async function fetchDatasets(
40
- client: PhoenixClient,
41
- mode: ExecutionMode,
42
- options: FetchDatasetsOptions = {}
43
- ): Promise<void> {
44
- const { limit = 100 } = options;
45
-
46
- // Fetch all datasets with pagination
47
- const datasets: Dataset[] = [];
48
- let cursor: string | null = null;
49
-
50
- while (datasets.length < limit) {
51
- const query: Record<string, unknown> = {
52
- limit: Math.min(limit - datasets.length, 100),
53
- };
54
- if (cursor) {
55
- query.cursor = cursor;
56
- }
57
-
58
- const response = await withErrorHandling(
59
- () => client.GET("/v1/datasets", { params: { query } }),
60
- "fetching datasets"
61
- );
62
-
63
- const data = extractData(response);
64
- datasets.push(...data.data);
65
- cursor = data.next_cursor;
66
-
67
- // Stop if no more data
68
- if (!cursor || data.data.length === 0) {
69
- break;
70
- }
71
- }
72
-
73
- // Write datasets index
74
- await mode.writeFile("/phoenix/datasets/index.jsonl", toJSONL(datasets));
75
-
76
- // Fetch examples for each dataset
77
- for (const dataset of datasets) {
78
- // Write dataset metadata
79
- await mode.writeFile(
80
- `/phoenix/datasets/${dataset.name}/metadata.json`,
81
- JSON.stringify(
82
- {
83
- id: dataset.id,
84
- name: dataset.name,
85
- description: dataset.description,
86
- metadata: dataset.metadata,
87
- created_at: dataset.created_at,
88
- updated_at: dataset.updated_at,
89
- snapshot_timestamp: new Date().toISOString(),
90
- },
91
- null,
92
- 2
93
- )
94
- );
95
-
96
- // Fetch examples for this dataset
97
- const examplesResponse = await withErrorHandling(
98
- () =>
99
- client.GET("/v1/datasets/{id}/examples", {
100
- params: {
101
- path: { id: dataset.id },
102
- },
103
- }),
104
- `fetching examples for dataset ${dataset.name}`
105
- );
106
-
107
- const examplesData = extractData(examplesResponse);
108
- const examples = examplesData.data.examples;
109
-
110
- // Write examples as JSONL
111
- await mode.writeFile(
112
- `/phoenix/datasets/${dataset.name}/examples.jsonl`,
113
- toJSONL(examples)
114
- );
115
-
116
- // Write dataset info with example count
117
- await mode.writeFile(
118
- `/phoenix/datasets/${dataset.name}/info.json`,
119
- JSON.stringify(
120
- {
121
- dataset_id: dataset.id,
122
- dataset_name: dataset.name,
123
- example_count: examples.length,
124
- version_id: examplesData.data.version_id,
125
- filtered_splits: examplesData.data.filtered_splits,
126
- },
127
- null,
128
- 2
129
- )
130
- );
131
- }
132
- }