@cephalization/phoenix-insight 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +620 -0
  3. package/dist/agent/index.js +230 -0
  4. package/dist/cli.js +640 -0
  5. package/dist/commands/index.js +2 -0
  6. package/dist/commands/px-fetch-more-spans.js +98 -0
  7. package/dist/commands/px-fetch-more-trace.js +110 -0
  8. package/dist/config/index.js +165 -0
  9. package/dist/config/loader.js +141 -0
  10. package/dist/config/schema.js +53 -0
  11. package/dist/index.js +1 -0
  12. package/dist/modes/index.js +17 -0
  13. package/dist/modes/local.js +134 -0
  14. package/dist/modes/sandbox.js +121 -0
  15. package/dist/modes/types.js +1 -0
  16. package/dist/observability/index.js +65 -0
  17. package/dist/progress.js +209 -0
  18. package/dist/prompts/index.js +1 -0
  19. package/dist/prompts/system.js +30 -0
  20. package/dist/snapshot/client.js +74 -0
  21. package/dist/snapshot/context.js +332 -0
  22. package/dist/snapshot/datasets.js +68 -0
  23. package/dist/snapshot/experiments.js +135 -0
  24. package/dist/snapshot/index.js +262 -0
  25. package/dist/snapshot/projects.js +44 -0
  26. package/dist/snapshot/prompts.js +199 -0
  27. package/dist/snapshot/spans.js +80 -0
  28. package/dist/tsconfig.esm.tsbuildinfo +1 -0
  29. package/package.json +75 -0
  30. package/src/agent/index.ts +323 -0
  31. package/src/cli.ts +782 -0
  32. package/src/commands/index.ts +8 -0
  33. package/src/commands/px-fetch-more-spans.ts +174 -0
  34. package/src/commands/px-fetch-more-trace.ts +183 -0
  35. package/src/config/index.ts +225 -0
  36. package/src/config/loader.ts +173 -0
  37. package/src/config/schema.ts +66 -0
  38. package/src/index.ts +1 -0
  39. package/src/modes/index.ts +21 -0
  40. package/src/modes/local.ts +163 -0
  41. package/src/modes/sandbox.ts +144 -0
  42. package/src/modes/types.ts +31 -0
  43. package/src/observability/index.ts +90 -0
  44. package/src/progress.ts +239 -0
  45. package/src/prompts/index.ts +1 -0
  46. package/src/prompts/system.ts +31 -0
  47. package/src/snapshot/client.ts +129 -0
  48. package/src/snapshot/context.ts +462 -0
  49. package/src/snapshot/datasets.ts +132 -0
  50. package/src/snapshot/experiments.ts +246 -0
  51. package/src/snapshot/index.ts +403 -0
  52. package/src/snapshot/projects.ts +58 -0
  53. package/src/snapshot/prompts.ts +267 -0
  54. package/src/snapshot/spans.ts +142 -0
@@ -0,0 +1,462 @@
1
+ import type { ExecutionMode } from "../modes/types.js";
2
+
3
+ interface ContextMetadata {
4
+ phoenixUrl: string;
5
+ snapshotTime: Date;
6
+ spansPerProject?: number;
7
+ }
8
+
9
+ interface ProjectStats {
10
+ name: string;
11
+ spanCount: number;
12
+ hasErrors?: boolean;
13
+ recentSpans?: number;
14
+ }
15
+
16
+ interface DatasetInfo {
17
+ name: string;
18
+ exampleCount: number;
19
+ updatedAt?: string;
20
+ }
21
+
22
+ interface ExperimentInfo {
23
+ id: string;
24
+ datasetName: string;
25
+ projectName?: string;
26
+ status: "completed" | "in_progress" | "failed";
27
+ runCounts: {
28
+ successful: number;
29
+ failed: number;
30
+ missing: number;
31
+ };
32
+ updatedAt?: string;
33
+ }
34
+
35
+ interface PromptInfo {
36
+ name: string;
37
+ versionCount: number;
38
+ latestVersion?: string;
39
+ updatedAt?: string;
40
+ }
41
+
42
+ /**
43
+ * Generates a _context.md summary file for the Phoenix snapshot
44
+ * This provides human and agent-readable context about what data is available
45
+ */
46
+ export async function generateContext(
47
+ mode: ExecutionMode,
48
+ metadata: ContextMetadata
49
+ ): Promise<void> {
50
+ const lines: string[] = [];
51
+
52
+ // Header
53
+ lines.push("# Phoenix Snapshot Context");
54
+ lines.push("");
55
+
56
+ // Collect stats from the snapshot
57
+ const stats = await collectSnapshotStats(mode);
58
+
59
+ // What's Here section
60
+ lines.push("## What's Here");
61
+
62
+ // Projects summary
63
+ if (stats.projects.length > 0) {
64
+ const projectSummary = stats.projects
65
+ .map((p) => `${p.name} (${p.spanCount} spans)`)
66
+ .join(", ");
67
+ lines.push(`- **${stats.projects.length} projects**: ${projectSummary}`);
68
+ } else {
69
+ lines.push("- **No projects found**");
70
+ }
71
+
72
+ // Datasets summary
73
+ if (stats.datasets.length > 0) {
74
+ const datasetNames = stats.datasets.map((d) => d.name).join(", ");
75
+ lines.push(`- **${stats.datasets.length} datasets**: ${datasetNames}`);
76
+ } else {
77
+ lines.push("- **No datasets found**");
78
+ }
79
+
80
+ // Experiments summary
81
+ if (stats.experiments.length > 0) {
82
+ const completedCount = stats.experiments.filter(
83
+ (e) => e.status === "completed"
84
+ ).length;
85
+ const inProgressCount = stats.experiments.filter(
86
+ (e) => e.status === "in_progress"
87
+ ).length;
88
+ const failedCount = stats.experiments.filter(
89
+ (e) => e.status === "failed"
90
+ ).length;
91
+
92
+ const parts: string[] = [];
93
+ if (completedCount > 0) parts.push(`${completedCount} completed`);
94
+ if (inProgressCount > 0) parts.push(`${inProgressCount} in progress`);
95
+ if (failedCount > 0) parts.push(`${failedCount} failed`);
96
+
97
+ lines.push(
98
+ `- **${stats.experiments.length} experiments**: ${parts.join(", ")}`
99
+ );
100
+ } else {
101
+ lines.push("- **No experiments found**");
102
+ }
103
+
104
+ // Prompts summary
105
+ if (stats.prompts.length > 0) {
106
+ const promptNames = stats.prompts.map((p) => p.name).join(", ");
107
+ lines.push(`- **${stats.prompts.length} prompts**: ${promptNames}`);
108
+ } else {
109
+ lines.push("- **No prompts found**");
110
+ }
111
+
112
+ // Snapshot metadata
113
+ lines.push(
114
+ `- **Snapshot**: Created ${formatRelativeTime(metadata.snapshotTime)} from ${metadata.phoenixUrl}`
115
+ );
116
+ lines.push("");
117
+
118
+ // Recent Activity section (if we have recent data)
119
+ const recentActivity = getRecentActivity(stats);
120
+ if (recentActivity.length > 0) {
121
+ lines.push("## Recent Activity");
122
+ for (const activity of recentActivity) {
123
+ lines.push(`- ${activity}`);
124
+ }
125
+ lines.push("");
126
+ }
127
+
128
+ // What You Can Do section
129
+ lines.push("## What You Can Do");
130
+ lines.push("- **Explore**: ls, cat, grep, find, jq, awk, sed");
131
+ lines.push(
132
+ "- **Fetch more data**: `px-fetch-more spans --project <name> --limit 500`"
133
+ );
134
+ lines.push(
135
+ "- **Fetch specific trace**: `px-fetch-more trace --trace-id <id>`"
136
+ );
137
+ lines.push("");
138
+
139
+ // Data Freshness section
140
+ lines.push("## Data Freshness");
141
+ lines.push(
142
+ "This is a **read-only snapshot**. Data may have changed since capture."
143
+ );
144
+ lines.push("Run with `--refresh` to get latest data.");
145
+ lines.push("");
146
+
147
+ // File Formats section
148
+ lines.push("## File Formats");
149
+ lines.push(
150
+ "- `.jsonl` files: One JSON object per line, use `jq -s` to parse as array"
151
+ );
152
+ lines.push("- `.json` files: Standard JSON");
153
+ lines.push("- `.md` files: Markdown (prompt templates)");
154
+ lines.push("");
155
+
156
+ // Directory Structure section
157
+ lines.push("## Directory Structure");
158
+ lines.push("```");
159
+ lines.push("/phoenix/");
160
+ lines.push(" _context.md # This file");
161
+ lines.push(" /projects/");
162
+ lines.push(" index.jsonl # List of all projects");
163
+ lines.push(" /{project_name}/");
164
+ lines.push(" metadata.json # Project details");
165
+ lines.push(" /spans/");
166
+ lines.push(" index.jsonl # Span data (may be sampled)");
167
+ lines.push(" metadata.json # Span snapshot metadata");
168
+ lines.push(" /datasets/");
169
+ lines.push(" index.jsonl # List of all datasets");
170
+ lines.push(" /{dataset_name}/");
171
+ lines.push(" metadata.json # Dataset details");
172
+ lines.push(" examples.jsonl # Dataset examples");
173
+ lines.push(" /experiments/");
174
+ lines.push(" index.jsonl # List of all experiments");
175
+ lines.push(" /{experiment_id}/");
176
+ lines.push(" metadata.json # Experiment details");
177
+ lines.push(" runs.jsonl # Experiment runs");
178
+ lines.push(" /prompts/");
179
+ lines.push(" index.jsonl # List of all prompts");
180
+ lines.push(" /{prompt_name}/");
181
+ lines.push(" metadata.json # Prompt details");
182
+ lines.push(" /versions/");
183
+ lines.push(" index.jsonl # Version list");
184
+ lines.push(" /{version_id}.md # Version template");
185
+ lines.push(" /_meta/");
186
+ lines.push(" snapshot.json # Snapshot metadata");
187
+ lines.push("```");
188
+
189
+ // Write the context file
190
+ await mode.writeFile("/phoenix/_context.md", lines.join("\n"));
191
+ }
192
+
193
+ /**
194
+ * Collects statistics from the snapshot filesystem
195
+ */
196
+ async function collectSnapshotStats(mode: ExecutionMode): Promise<{
197
+ projects: ProjectStats[];
198
+ datasets: DatasetInfo[];
199
+ experiments: ExperimentInfo[];
200
+ prompts: PromptInfo[];
201
+ }> {
202
+ const result = {
203
+ projects: [] as ProjectStats[],
204
+ datasets: [] as DatasetInfo[],
205
+ experiments: [] as ExperimentInfo[],
206
+ prompts: [] as PromptInfo[],
207
+ };
208
+
209
+ // Collect project stats
210
+ try {
211
+ const projectsExec = await mode.exec(
212
+ "cat /phoenix/projects/index.jsonl 2>/dev/null || true"
213
+ );
214
+ if (projectsExec.stdout) {
215
+ const projectLines = projectsExec.stdout
216
+ .trim()
217
+ .split("\n")
218
+ .filter((line) => line.length > 0);
219
+
220
+ for (const line of projectLines) {
221
+ try {
222
+ const project = JSON.parse(line);
223
+ const stats: ProjectStats = {
224
+ name: project.name,
225
+ spanCount: 0,
226
+ };
227
+
228
+ // Get span count for this project
229
+ const spansMetaExec = await mode.exec(
230
+ `cat /phoenix/projects/${project.name}/spans/metadata.json 2>/dev/null || echo "{}"`
231
+ );
232
+ if (spansMetaExec.stdout) {
233
+ try {
234
+ const spansMeta = JSON.parse(spansMetaExec.stdout);
235
+ stats.spanCount = spansMeta.spanCount || 0;
236
+ } catch (e) {
237
+ // Ignore parse errors
238
+ }
239
+ }
240
+
241
+ result.projects.push(stats);
242
+ } catch (e) {
243
+ // Skip invalid project lines
244
+ }
245
+ }
246
+ }
247
+ } catch (e) {
248
+ // No projects file
249
+ }
250
+
251
+ // Collect dataset stats
252
+ try {
253
+ const datasetsExec = await mode.exec(
254
+ "cat /phoenix/datasets/index.jsonl 2>/dev/null || true"
255
+ );
256
+ if (datasetsExec.stdout) {
257
+ const datasetLines = datasetsExec.stdout
258
+ .trim()
259
+ .split("\n")
260
+ .filter((line) => line.length > 0);
261
+
262
+ for (const line of datasetLines) {
263
+ try {
264
+ const dataset = JSON.parse(line);
265
+
266
+ // Get example count
267
+ const examplesExec = await mode.exec(
268
+ `wc -l < /phoenix/datasets/${dataset.name}/examples.jsonl 2>/dev/null || echo "0"`
269
+ );
270
+ const exampleCount = parseInt(examplesExec.stdout.trim()) || 0;
271
+
272
+ result.datasets.push({
273
+ name: dataset.name,
274
+ exampleCount,
275
+ updatedAt: dataset.updated_at,
276
+ });
277
+ } catch (e) {
278
+ // Skip invalid dataset lines
279
+ }
280
+ }
281
+ }
282
+ } catch (e) {
283
+ // No datasets file
284
+ }
285
+
286
+ // Collect experiment stats
287
+ try {
288
+ const experimentsExec = await mode.exec(
289
+ "cat /phoenix/experiments/index.jsonl 2>/dev/null || true"
290
+ );
291
+ if (experimentsExec.stdout) {
292
+ const experimentLines = experimentsExec.stdout
293
+ .trim()
294
+ .split("\n")
295
+ .filter((line) => line.length > 0);
296
+
297
+ for (const line of experimentLines) {
298
+ try {
299
+ const experiment = JSON.parse(line);
300
+ const status = determineExperimentStatus(experiment);
301
+
302
+ result.experiments.push({
303
+ id: experiment.id,
304
+ datasetName: experiment.datasetName || "unknown",
305
+ projectName: experiment.project_name,
306
+ status,
307
+ runCounts: {
308
+ successful: experiment.successful_run_count || 0,
309
+ failed: experiment.failed_run_count || 0,
310
+ missing: experiment.missing_run_count || 0,
311
+ },
312
+ updatedAt: experiment.updated_at,
313
+ });
314
+ } catch (e) {
315
+ // Skip invalid experiment lines
316
+ }
317
+ }
318
+ }
319
+ } catch (e) {
320
+ // No experiments file
321
+ }
322
+
323
+ // Collect prompt stats
324
+ try {
325
+ const promptsExec = await mode.exec(
326
+ "cat /phoenix/prompts/index.jsonl 2>/dev/null || true"
327
+ );
328
+ if (promptsExec.stdout) {
329
+ const promptLines = promptsExec.stdout
330
+ .trim()
331
+ .split("\n")
332
+ .filter((line) => line.length > 0);
333
+
334
+ for (const line of promptLines) {
335
+ try {
336
+ const prompt = JSON.parse(line);
337
+
338
+ // Count versions
339
+ const versionsExec = await mode.exec(
340
+ `wc -l < /phoenix/prompts/${prompt.name}/versions/index.jsonl 2>/dev/null || echo "0"`
341
+ );
342
+ const versionCount = parseInt(versionsExec.stdout.trim()) || 0;
343
+
344
+ result.prompts.push({
345
+ name: prompt.name,
346
+ versionCount,
347
+ updatedAt: prompt.updated_at,
348
+ });
349
+ } catch (e) {
350
+ // Skip invalid prompt lines
351
+ }
352
+ }
353
+ }
354
+ } catch (e) {
355
+ // No prompts file
356
+ }
357
+
358
+ return result;
359
+ }
360
+
361
+ /**
362
+ * Determines the status of an experiment based on its run counts
363
+ */
364
+ function determineExperimentStatus(
365
+ experiment: any
366
+ ): "completed" | "in_progress" | "failed" {
367
+ const totalExpected = experiment.example_count * experiment.repetitions;
368
+ const totalRuns =
369
+ (experiment.successful_run_count || 0) + (experiment.failed_run_count || 0);
370
+
371
+ if (totalRuns === 0) {
372
+ return "in_progress";
373
+ }
374
+
375
+ // If most runs are failed, consider it failed
376
+ if (
377
+ (experiment.failed_run_count || 0) > (experiment.successful_run_count || 0)
378
+ ) {
379
+ return "failed";
380
+ }
381
+
382
+ if (totalRuns >= totalExpected) {
383
+ return "completed";
384
+ }
385
+
386
+ return "in_progress";
387
+ }
388
+
389
+ /**
390
+ * Gets recent activity highlights
391
+ */
392
+ function getRecentActivity(stats: {
393
+ projects: ProjectStats[];
394
+ datasets: DatasetInfo[];
395
+ experiments: ExperimentInfo[];
396
+ prompts: PromptInfo[];
397
+ }): string[] {
398
+ const activities: string[] = [];
399
+
400
+ // Find recently updated experiments
401
+ const recentExperiments = stats.experiments
402
+ .filter((e) => e.updatedAt && isRecent(new Date(e.updatedAt), 24))
403
+ .sort(
404
+ (a, b) =>
405
+ new Date(b.updatedAt!).getTime() - new Date(a.updatedAt!).getTime()
406
+ );
407
+
408
+ for (const exp of recentExperiments.slice(0, 2)) {
409
+ const timeAgo = formatRelativeTime(new Date(exp.updatedAt!));
410
+ activities.push(
411
+ `${exp.projectName || exp.datasetName}: experiment "${exp.id.slice(0, 8)}..." ${exp.status} ${timeAgo}`
412
+ );
413
+ }
414
+
415
+ // Find recently updated datasets
416
+ const recentDatasets = stats.datasets
417
+ .filter((d) => d.updatedAt && isRecent(new Date(d.updatedAt), 24))
418
+ .sort(
419
+ (a, b) =>
420
+ new Date(b.updatedAt!).getTime() - new Date(a.updatedAt!).getTime()
421
+ );
422
+
423
+ for (const dataset of recentDatasets.slice(0, 2)) {
424
+ const timeAgo = formatRelativeTime(new Date(dataset.updatedAt!));
425
+ activities.push(
426
+ `${dataset.name}: dataset updated ${timeAgo} (${dataset.exampleCount} examples)`
427
+ );
428
+ }
429
+
430
+ return activities.slice(0, 3); // Limit to 3 activities
431
+ }
432
+
433
+ /**
434
+ * Checks if a date is within the specified hours from now
435
+ */
436
+ function isRecent(date: Date, hoursAgo: number): boolean {
437
+ const now = new Date();
438
+ const diff = now.getTime() - date.getTime();
439
+ return diff < hoursAgo * 60 * 60 * 1000;
440
+ }
441
+
442
+ /**
443
+ * Formats a date as relative time (e.g., "2 hours ago")
444
+ */
445
+ function formatRelativeTime(date: Date): string {
446
+ const now = new Date();
447
+ const diff = now.getTime() - date.getTime();
448
+
449
+ const minutes = Math.floor(diff / (1000 * 60));
450
+ const hours = Math.floor(diff / (1000 * 60 * 60));
451
+ const days = Math.floor(diff / (1000 * 60 * 60 * 24));
452
+
453
+ if (minutes < 1) {
454
+ return "just now";
455
+ } else if (minutes < 60) {
456
+ return `${minutes} minute${minutes !== 1 ? "s" : ""} ago`;
457
+ } else if (hours < 24) {
458
+ return `${hours} hour${hours !== 1 ? "s" : ""} ago`;
459
+ } else {
460
+ return `${days} day${days !== 1 ? "s" : ""} ago`;
461
+ }
462
+ }
@@ -0,0 +1,132 @@
1
+ import type { PhoenixClient } from "@arizeai/phoenix-client";
2
+ import type { ExecutionMode } from "../modes/types.js";
3
+ import { withErrorHandling, extractData } from "./client.js";
4
+
5
+ interface Dataset {
6
+ id: string;
7
+ name: string;
8
+ description: string | null;
9
+ metadata: Record<string, unknown>;
10
+ created_at: string;
11
+ updated_at: string;
12
+ }
13
+
14
+ interface DatasetExample {
15
+ id: string;
16
+ input: Record<string, unknown>;
17
+ output: Record<string, unknown>;
18
+ metadata: Record<string, unknown>;
19
+ updated_at: string;
20
+ }
21
+
22
+ interface FetchDatasetsOptions {
23
+ limit?: number;
24
+ }
25
+
26
+ /**
27
+ * Converts an array to JSONL format
28
+ */
29
+ function toJSONL(items: unknown[]): string {
30
+ if (items.length === 0) {
31
+ return "";
32
+ }
33
+ return items.map((item) => JSON.stringify(item)).join("\n");
34
+ }
35
+
36
+ /**
37
+ * Fetches all datasets and their examples from Phoenix
38
+ */
39
+ export async function fetchDatasets(
40
+ client: PhoenixClient,
41
+ mode: ExecutionMode,
42
+ options: FetchDatasetsOptions = {}
43
+ ): Promise<void> {
44
+ const { limit = 100 } = options;
45
+
46
+ // Fetch all datasets with pagination
47
+ const datasets: Dataset[] = [];
48
+ let cursor: string | null = null;
49
+
50
+ while (datasets.length < limit) {
51
+ const query: Record<string, unknown> = {
52
+ limit: Math.min(limit - datasets.length, 100),
53
+ };
54
+ if (cursor) {
55
+ query.cursor = cursor;
56
+ }
57
+
58
+ const response = await withErrorHandling(
59
+ () => client.GET("/v1/datasets", { params: { query } }),
60
+ "fetching datasets"
61
+ );
62
+
63
+ const data = extractData(response);
64
+ datasets.push(...data.data);
65
+ cursor = data.next_cursor;
66
+
67
+ // Stop if no more data
68
+ if (!cursor || data.data.length === 0) {
69
+ break;
70
+ }
71
+ }
72
+
73
+ // Write datasets index
74
+ await mode.writeFile("/phoenix/datasets/index.jsonl", toJSONL(datasets));
75
+
76
+ // Fetch examples for each dataset
77
+ for (const dataset of datasets) {
78
+ // Write dataset metadata
79
+ await mode.writeFile(
80
+ `/phoenix/datasets/${dataset.name}/metadata.json`,
81
+ JSON.stringify(
82
+ {
83
+ id: dataset.id,
84
+ name: dataset.name,
85
+ description: dataset.description,
86
+ metadata: dataset.metadata,
87
+ created_at: dataset.created_at,
88
+ updated_at: dataset.updated_at,
89
+ snapshot_timestamp: new Date().toISOString(),
90
+ },
91
+ null,
92
+ 2
93
+ )
94
+ );
95
+
96
+ // Fetch examples for this dataset
97
+ const examplesResponse = await withErrorHandling(
98
+ () =>
99
+ client.GET("/v1/datasets/{id}/examples", {
100
+ params: {
101
+ path: { id: dataset.id },
102
+ },
103
+ }),
104
+ `fetching examples for dataset ${dataset.name}`
105
+ );
106
+
107
+ const examplesData = extractData(examplesResponse);
108
+ const examples = examplesData.data.examples;
109
+
110
+ // Write examples as JSONL
111
+ await mode.writeFile(
112
+ `/phoenix/datasets/${dataset.name}/examples.jsonl`,
113
+ toJSONL(examples)
114
+ );
115
+
116
+ // Write dataset info with example count
117
+ await mode.writeFile(
118
+ `/phoenix/datasets/${dataset.name}/info.json`,
119
+ JSON.stringify(
120
+ {
121
+ dataset_id: dataset.id,
122
+ dataset_name: dataset.name,
123
+ example_count: examples.length,
124
+ version_id: examplesData.data.version_id,
125
+ filtered_splits: examplesData.data.filtered_splits,
126
+ },
127
+ null,
128
+ 2
129
+ )
130
+ );
131
+ }
132
+ }