@arthur-ai/buzz 2.1.554-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +132 -0
- package/dist/chunk-DNHKKPJY.js +118 -0
- package/dist/index.js +2452 -0
- package/dist/prompts-NKUEI6PW.js +34 -0
- package/package.json +54 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,2452 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import {
|
|
3
|
+
buzzSay,
|
|
4
|
+
confirm,
|
|
5
|
+
logError,
|
|
6
|
+
logInfo,
|
|
7
|
+
logSuccess,
|
|
8
|
+
logWarn,
|
|
9
|
+
note,
|
|
10
|
+
p,
|
|
11
|
+
password,
|
|
12
|
+
select,
|
|
13
|
+
text
|
|
14
|
+
} from "./chunk-DNHKKPJY.js";
|
|
15
|
+
|
|
16
|
+
// src/index.ts
|
|
17
|
+
import figlet from "figlet";
|
|
18
|
+
import chalk3 from "chalk";
|
|
19
|
+
import { intro, outro } from "@clack/prompts";
|
|
20
|
+
|
|
21
|
+
// src/workflow/steps/01-prereqs.ts
|
|
22
|
+
import { execa } from "execa";
|
|
23
|
+
import ora from "ora";
|
|
24
|
+
|
|
25
|
+
// src/errors.ts
|
|
26
|
+
var BuzzError = class extends Error {
|
|
27
|
+
constructor(message, fatal = true) {
|
|
28
|
+
super(message);
|
|
29
|
+
this.fatal = fatal;
|
|
30
|
+
this.name = "BuzzError";
|
|
31
|
+
}
|
|
32
|
+
fatal;
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
// src/workflow/steps/01-prereqs.ts
|
|
36
|
+
async function step1_VerifyPrereqs(state) {
|
|
37
|
+
const spinner = ora({ text: buzzSay("Running pre-flight checklist..."), color: "cyan" }).start();
|
|
38
|
+
try {
|
|
39
|
+
const { stdout } = await execa("git", ["status", "--porcelain"], {
|
|
40
|
+
cwd: state.repoPath
|
|
41
|
+
});
|
|
42
|
+
const dirtyLines = stdout.split("\n").filter((l) => l.length > 0).filter((l) => !(l[1] === " " && l[0] !== "?"));
|
|
43
|
+
if (dirtyLines.length > 0) {
|
|
44
|
+
spinner.stop();
|
|
45
|
+
logError("Cannot proceed \u2014 uncommitted work in progress detected in your repository.");
|
|
46
|
+
note(
|
|
47
|
+
"Please commit or stash your unstaged changes and re-run Buzz.\n(Staged/added changes are fine.)",
|
|
48
|
+
"git status is dirty"
|
|
49
|
+
);
|
|
50
|
+
throw new BuzzError("Git repository has uncommitted changes.");
|
|
51
|
+
}
|
|
52
|
+
} catch (err) {
|
|
53
|
+
if (err instanceof BuzzError) throw err;
|
|
54
|
+
spinner.stop();
|
|
55
|
+
logError("This directory is not a git repository, or git is not installed.");
|
|
56
|
+
throw new BuzzError("Not a git repository.");
|
|
57
|
+
}
|
|
58
|
+
spinner.text = buzzSay("Git status confirmed. Checking Claude Code...");
|
|
59
|
+
const claudeWhich = await execa("which", ["claude"]).catch(() => null);
|
|
60
|
+
if (!claudeWhich) {
|
|
61
|
+
spinner.stop();
|
|
62
|
+
logError("Claude Code is not installed.");
|
|
63
|
+
note(
|
|
64
|
+
"Install Claude Code with:\n npm install -g @anthropic-ai/claude-code\nThen re-run Buzz.",
|
|
65
|
+
"Missing dependency"
|
|
66
|
+
);
|
|
67
|
+
throw new BuzzError("Claude Code is not installed.");
|
|
68
|
+
}
|
|
69
|
+
const claudeVersion = await execa("claude", ["--version"]).catch(() => null);
|
|
70
|
+
if (!claudeVersion) {
|
|
71
|
+
spinner.stop();
|
|
72
|
+
logError("Claude Code binary is not responding.");
|
|
73
|
+
throw new BuzzError("Claude Code binary is not working.");
|
|
74
|
+
}
|
|
75
|
+
spinner.text = buzzSay("Checking Claude Code authentication...");
|
|
76
|
+
const authStatus = await execa("claude", ["auth", "status"]).catch(() => null);
|
|
77
|
+
let isAuthenticated = authStatus?.exitCode === 0;
|
|
78
|
+
if (!isAuthenticated) {
|
|
79
|
+
spinner.stop();
|
|
80
|
+
logError("Claude Code is not authenticated.");
|
|
81
|
+
const { password: password2 } = await import("./prompts-NKUEI6PW.js");
|
|
82
|
+
const apiKey = await password2("Enter your ANTHROPIC_API_KEY to continue:");
|
|
83
|
+
process.env.ANTHROPIC_API_KEY = apiKey;
|
|
84
|
+
spinner.start(buzzSay("Verifying API key..."));
|
|
85
|
+
const authVerify = await execa("claude", ["auth", "status"]).catch(() => null);
|
|
86
|
+
isAuthenticated = authVerify?.exitCode === 0;
|
|
87
|
+
if (!isAuthenticated) {
|
|
88
|
+
spinner.stop();
|
|
89
|
+
logError("The provided ANTHROPIC_API_KEY does not appear to be valid.");
|
|
90
|
+
throw new BuzzError("Claude Code is not authenticated.");
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
spinner.stop();
|
|
94
|
+
logSuccess("All systems go. Pre-flight checklist complete.");
|
|
95
|
+
logSuccess(`Running in repo: ${state.repoPath}`);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// src/workflow/steps/02-arthur-engine.ts
|
|
99
|
+
import fs2 from "fs";
|
|
100
|
+
import path2 from "path";
|
|
101
|
+
import ora2 from "ora";
|
|
102
|
+
import { execa as execa2 } from "execa";
|
|
103
|
+
|
|
104
|
+
// src/config/env.ts
|
|
105
|
+
import fs from "fs";
|
|
106
|
+
import { createHash } from "crypto";
|
|
107
|
+
import path from "path";
|
|
108
|
+
import os from "os";
|
|
109
|
+
import * as dotenv from "dotenv";
|
|
110
|
+
function getBuzzEnvPath(repoPath) {
|
|
111
|
+
const resolved = path.resolve(repoPath);
|
|
112
|
+
const hash = createHash("sha1").update(resolved).digest("hex").slice(0, 12);
|
|
113
|
+
const key = `${path.basename(resolved)}-${hash}`;
|
|
114
|
+
return path.join(os.homedir(), ".arthur-engine", "local-stack", "buzz", key, ".env");
|
|
115
|
+
}
|
|
116
|
+
var GENAI_ENGINE_ENV_PATH = path.join(
|
|
117
|
+
os.homedir(),
|
|
118
|
+
".arthur-engine",
|
|
119
|
+
"local-stack",
|
|
120
|
+
"genai-engine",
|
|
121
|
+
".env"
|
|
122
|
+
);
|
|
123
|
+
var GENAI_ENGINE_DIR = path.join(
|
|
124
|
+
os.homedir(),
|
|
125
|
+
".arthur-engine",
|
|
126
|
+
"local-stack",
|
|
127
|
+
"genai-engine"
|
|
128
|
+
);
|
|
129
|
+
function readBuzzConfig(envPath) {
|
|
130
|
+
if (!fs.existsSync(envPath)) return {};
|
|
131
|
+
const result = dotenv.config({ path: envPath, override: false, processEnv: {} });
|
|
132
|
+
return result.parsed ?? {};
|
|
133
|
+
}
|
|
134
|
+
function writeBuzzConfig(config2, envPath) {
|
|
135
|
+
fs.mkdirSync(path.dirname(envPath), { recursive: true });
|
|
136
|
+
const existing = readBuzzConfig(envPath);
|
|
137
|
+
const merged = { ...existing, ...config2 };
|
|
138
|
+
const content = Object.entries(merged).filter(([, v]) => v !== void 0).map(([k, v]) => `${k}=${v}`).join("\n");
|
|
139
|
+
fs.writeFileSync(envPath, content + "\n", "utf-8");
|
|
140
|
+
}
|
|
141
|
+
function readGenaiEngineConfig() {
|
|
142
|
+
if (!fs.existsSync(GENAI_ENGINE_ENV_PATH)) return {};
|
|
143
|
+
const result = dotenv.config({ path: GENAI_ENGINE_ENV_PATH, override: false, processEnv: {} });
|
|
144
|
+
return result.parsed ?? {};
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// src/arthur/client.ts
|
|
148
|
+
var ArthurEngineClient = class {
|
|
149
|
+
constructor(baseUrl, apiKey) {
|
|
150
|
+
this.apiKey = apiKey;
|
|
151
|
+
this.baseUrl = baseUrl.replace(/\/$/, "");
|
|
152
|
+
}
|
|
153
|
+
apiKey;
|
|
154
|
+
baseUrl;
|
|
155
|
+
get headers() {
|
|
156
|
+
return {
|
|
157
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
158
|
+
"Content-Type": "application/json"
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
/** Returns true if the engine is reachable (regardless of auth status). */
|
|
162
|
+
async verifyConnection() {
|
|
163
|
+
try {
|
|
164
|
+
const res = await fetch(`${this.baseUrl}/api/v2/tasks?page=0&page_size=1`, {
|
|
165
|
+
headers: this.headers,
|
|
166
|
+
signal: AbortSignal.timeout(1e4)
|
|
167
|
+
});
|
|
168
|
+
return res.status === 200 || res.status === 401;
|
|
169
|
+
} catch {
|
|
170
|
+
return false;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
/** Returns true if the API key is valid. */
|
|
174
|
+
async login() {
|
|
175
|
+
try {
|
|
176
|
+
const res = await fetch(`${this.baseUrl}/api/v2/tasks?page=0&page_size=1`, {
|
|
177
|
+
headers: this.headers,
|
|
178
|
+
signal: AbortSignal.timeout(1e4)
|
|
179
|
+
});
|
|
180
|
+
return res.ok;
|
|
181
|
+
} catch {
|
|
182
|
+
return false;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
async getTasks() {
|
|
186
|
+
const res = await fetch(`${this.baseUrl}/api/v2/tasks`, {
|
|
187
|
+
headers: this.headers,
|
|
188
|
+
signal: AbortSignal.timeout(15e3)
|
|
189
|
+
});
|
|
190
|
+
if (!res.ok) throw new Error(`Failed to get tasks: HTTP ${res.status}`);
|
|
191
|
+
const data = await res.json();
|
|
192
|
+
return data.tasks ?? data.data ?? [];
|
|
193
|
+
}
|
|
194
|
+
async createTask(name) {
|
|
195
|
+
const res = await fetch(`${this.baseUrl}/api/v2/tasks`, {
|
|
196
|
+
method: "POST",
|
|
197
|
+
headers: this.headers,
|
|
198
|
+
body: JSON.stringify({ name }),
|
|
199
|
+
signal: AbortSignal.timeout(15e3)
|
|
200
|
+
});
|
|
201
|
+
if (!res.ok) {
|
|
202
|
+
const body = await res.text().catch(() => "");
|
|
203
|
+
throw new Error(`Failed to create task: HTTP ${res.status} ${body}`);
|
|
204
|
+
}
|
|
205
|
+
return await res.json();
|
|
206
|
+
}
|
|
207
|
+
async getTraces(taskId) {
|
|
208
|
+
try {
|
|
209
|
+
const res = await fetch(
|
|
210
|
+
`${this.baseUrl}/api/v1/traces?task_ids=${encodeURIComponent(taskId)}&page_size=5`,
|
|
211
|
+
{
|
|
212
|
+
headers: this.headers,
|
|
213
|
+
signal: AbortSignal.timeout(15e3)
|
|
214
|
+
}
|
|
215
|
+
);
|
|
216
|
+
if (!res.ok) {
|
|
217
|
+
const body = await res.text().catch(() => "");
|
|
218
|
+
return { traces: [], error: `HTTP ${res.status}: ${body}` };
|
|
219
|
+
}
|
|
220
|
+
const data = await res.json();
|
|
221
|
+
return { traces: data.traces ?? data.data ?? [] };
|
|
222
|
+
} catch (err) {
|
|
223
|
+
return { traces: [], error: err instanceof Error ? err.message : String(err) };
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
async getTraceDetail(traceId) {
|
|
227
|
+
try {
|
|
228
|
+
const res = await fetch(
|
|
229
|
+
`${this.baseUrl}/api/v1/traces/${encodeURIComponent(traceId)}`,
|
|
230
|
+
{
|
|
231
|
+
headers: this.headers,
|
|
232
|
+
signal: AbortSignal.timeout(15e3)
|
|
233
|
+
}
|
|
234
|
+
);
|
|
235
|
+
if (!res.ok) return null;
|
|
236
|
+
return await res.json();
|
|
237
|
+
} catch {
|
|
238
|
+
return null;
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
async getModelProviders() {
|
|
242
|
+
try {
|
|
243
|
+
const res = await fetch(`${this.baseUrl}/api/v1/model_providers`, {
|
|
244
|
+
headers: this.headers,
|
|
245
|
+
signal: AbortSignal.timeout(1e4)
|
|
246
|
+
});
|
|
247
|
+
if (!res.ok) return [];
|
|
248
|
+
const data = await res.json();
|
|
249
|
+
return data.providers ?? [];
|
|
250
|
+
} catch {
|
|
251
|
+
return [];
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
async createLlmEval(taskId, evalSlug, body) {
|
|
255
|
+
try {
|
|
256
|
+
const res = await fetch(
|
|
257
|
+
`${this.baseUrl}/api/v1/tasks/${encodeURIComponent(taskId)}/llm_evals/${encodeURIComponent(evalSlug)}`,
|
|
258
|
+
{
|
|
259
|
+
method: "POST",
|
|
260
|
+
headers: this.headers,
|
|
261
|
+
body: JSON.stringify(body),
|
|
262
|
+
signal: AbortSignal.timeout(15e3)
|
|
263
|
+
}
|
|
264
|
+
);
|
|
265
|
+
if (!res.ok) {
|
|
266
|
+
const text2 = await res.text().catch(() => "");
|
|
267
|
+
return { error: `HTTP ${res.status}: ${text2}` };
|
|
268
|
+
}
|
|
269
|
+
return { eval: await res.json() };
|
|
270
|
+
} catch (err) {
|
|
271
|
+
return { error: err instanceof Error ? err.message : String(err) };
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
async createTransform(taskId, body) {
|
|
275
|
+
try {
|
|
276
|
+
const res = await fetch(
|
|
277
|
+
`${this.baseUrl}/api/v1/tasks/${encodeURIComponent(taskId)}/traces/transforms`,
|
|
278
|
+
{
|
|
279
|
+
method: "POST",
|
|
280
|
+
headers: this.headers,
|
|
281
|
+
body: JSON.stringify(body),
|
|
282
|
+
signal: AbortSignal.timeout(15e3)
|
|
283
|
+
}
|
|
284
|
+
);
|
|
285
|
+
if (!res.ok) {
|
|
286
|
+
const text2 = await res.text().catch(() => "");
|
|
287
|
+
return { error: `HTTP ${res.status}: ${text2}` };
|
|
288
|
+
}
|
|
289
|
+
return { transform: await res.json() };
|
|
290
|
+
} catch (err) {
|
|
291
|
+
return { error: err instanceof Error ? err.message : String(err) };
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
async createContinuousEval(taskId, body) {
|
|
295
|
+
try {
|
|
296
|
+
const res = await fetch(
|
|
297
|
+
`${this.baseUrl}/api/v1/tasks/${encodeURIComponent(taskId)}/continuous_evals`,
|
|
298
|
+
{
|
|
299
|
+
method: "POST",
|
|
300
|
+
headers: this.headers,
|
|
301
|
+
body: JSON.stringify(body),
|
|
302
|
+
signal: AbortSignal.timeout(15e3)
|
|
303
|
+
}
|
|
304
|
+
);
|
|
305
|
+
if (!res.ok) {
|
|
306
|
+
const text2 = await res.text().catch(() => "");
|
|
307
|
+
return { error: `HTTP ${res.status}: ${text2}` };
|
|
308
|
+
}
|
|
309
|
+
return { continuousEval: await res.json() };
|
|
310
|
+
} catch (err) {
|
|
311
|
+
return { error: err instanceof Error ? err.message : String(err) };
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
async getLlmEvals(taskId) {
|
|
315
|
+
try {
|
|
316
|
+
const res = await fetch(
|
|
317
|
+
`${this.baseUrl}/api/v1/tasks/${encodeURIComponent(taskId)}/llm_evals?page=0&page_size=100`,
|
|
318
|
+
{
|
|
319
|
+
headers: this.headers,
|
|
320
|
+
signal: AbortSignal.timeout(15e3)
|
|
321
|
+
}
|
|
322
|
+
);
|
|
323
|
+
if (!res.ok) return [];
|
|
324
|
+
const data = await res.json();
|
|
325
|
+
return data.evals ?? data.items ?? [];
|
|
326
|
+
} catch {
|
|
327
|
+
return [];
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
async getContinuousEvals(taskId) {
|
|
331
|
+
try {
|
|
332
|
+
const res = await fetch(
|
|
333
|
+
`${this.baseUrl}/api/v1/tasks/${encodeURIComponent(taskId)}/continuous_evals?page=0&page_size=100`,
|
|
334
|
+
{
|
|
335
|
+
headers: this.headers,
|
|
336
|
+
signal: AbortSignal.timeout(15e3)
|
|
337
|
+
}
|
|
338
|
+
);
|
|
339
|
+
if (!res.ok) return [];
|
|
340
|
+
const data = await res.json();
|
|
341
|
+
return data.evals ?? [];
|
|
342
|
+
} catch {
|
|
343
|
+
return [];
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
async configureModelProvider(provider, credentials) {
|
|
347
|
+
try {
|
|
348
|
+
const res = await fetch(
|
|
349
|
+
`${this.baseUrl}/api/v1/model_providers/${encodeURIComponent(provider)}`,
|
|
350
|
+
{
|
|
351
|
+
method: "PUT",
|
|
352
|
+
headers: this.headers,
|
|
353
|
+
body: JSON.stringify({ api_key: credentials.api_key }),
|
|
354
|
+
signal: AbortSignal.timeout(15e3)
|
|
355
|
+
}
|
|
356
|
+
);
|
|
357
|
+
if (!res.ok) {
|
|
358
|
+
const text2 = await res.text().catch(() => "");
|
|
359
|
+
return { success: false, error: `HTTP ${res.status}: ${text2}` };
|
|
360
|
+
}
|
|
361
|
+
return { success: true };
|
|
362
|
+
} catch (err) {
|
|
363
|
+
return { success: false, error: err instanceof Error ? err.message : String(err) };
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
async createPrompt(taskId, promptName, body) {
|
|
367
|
+
try {
|
|
368
|
+
const res = await fetch(
|
|
369
|
+
`${this.baseUrl}/api/v1/tasks/${encodeURIComponent(taskId)}/prompts/${encodeURIComponent(promptName)}`,
|
|
370
|
+
{
|
|
371
|
+
method: "POST",
|
|
372
|
+
headers: this.headers,
|
|
373
|
+
body: JSON.stringify(body),
|
|
374
|
+
signal: AbortSignal.timeout(15e3)
|
|
375
|
+
}
|
|
376
|
+
);
|
|
377
|
+
if (!res.ok) {
|
|
378
|
+
const text2 = await res.text().catch(() => "");
|
|
379
|
+
return { error: `HTTP ${res.status}: ${text2}` };
|
|
380
|
+
}
|
|
381
|
+
return { prompt: await res.json() };
|
|
382
|
+
} catch (err) {
|
|
383
|
+
return { error: err instanceof Error ? err.message : String(err) };
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
};
|
|
387
|
+
|
|
388
|
+
// src/workflow/steps/02-arthur-engine.ts
|
|
389
|
+
var DOCKER_COMPOSE_PATH = path2.join(GENAI_ENGINE_DIR, "docker-compose.yml");
|
|
390
|
+
var ENV_PATH = path2.join(GENAI_ENGINE_DIR, ".env");
|
|
391
|
+
var INSTALL_SCRIPT = "bash <(curl -sSL https://get-genai-engine.arthur.ai/mac)";
|
|
392
|
+
function readLocalAdminKey() {
|
|
393
|
+
const cfg = readGenaiEngineConfig();
|
|
394
|
+
if (cfg.GENAI_ENGINE_ADMIN_KEY) return cfg.GENAI_ENGINE_ADMIN_KEY;
|
|
395
|
+
if (!fs2.existsSync(DOCKER_COMPOSE_PATH)) return "";
|
|
396
|
+
const content = fs2.readFileSync(DOCKER_COMPOSE_PATH, "utf-8");
|
|
397
|
+
const match = content.match(/^\s*-\s*GENAI_ENGINE_ADMIN_KEY=(.+)$/m);
|
|
398
|
+
return match ? match[1].trim() : "";
|
|
399
|
+
}
|
|
400
|
+
async function waitForEngine(url, apiKey, options = {}) {
|
|
401
|
+
const client = new ArthurEngineClient(url, apiKey);
|
|
402
|
+
const start = Date.now();
|
|
403
|
+
const watchDocker = options.watchDocker === true && fs2.existsSync(DOCKER_COMPOSE_PATH);
|
|
404
|
+
if (watchDocker) {
|
|
405
|
+
p.log.info(buzzSay("First-time startup may take several minutes while models are downloaded."));
|
|
406
|
+
p.log.info(buzzSay("Subsequent startups will load them from the local cache on disk."));
|
|
407
|
+
}
|
|
408
|
+
const spinner = ora2({ text: buzzSay("Waiting for Arthur Engine to become ready..."), color: "cyan" }).start();
|
|
409
|
+
let recentLogLine = "";
|
|
410
|
+
const logsProcess = watchDocker ? execa2("docker", ["compose", "-f", DOCKER_COMPOSE_PATH, "logs", "--follow", "--tail", "10", "--no-color"], {
|
|
411
|
+
all: true,
|
|
412
|
+
reject: false
|
|
413
|
+
}) : null;
|
|
414
|
+
logsProcess?.all?.on("data", (chunk) => {
|
|
415
|
+
const text2 = chunk.toString();
|
|
416
|
+
const lines = text2.split("\n").filter((l) => l.trim() && !l.includes("Attaching to"));
|
|
417
|
+
for (const line of lines) {
|
|
418
|
+
const stripped = line.replace(/^[\w-]+-\d+\s*\|\s*/, "").trim();
|
|
419
|
+
if (stripped) recentLogLine = stripped.slice(0, 120);
|
|
420
|
+
}
|
|
421
|
+
});
|
|
422
|
+
try {
|
|
423
|
+
while (true) {
|
|
424
|
+
if (await client.verifyConnection()) {
|
|
425
|
+
spinner.stop();
|
|
426
|
+
logSuccess("Arthur Engine is online.");
|
|
427
|
+
return true;
|
|
428
|
+
}
|
|
429
|
+
if (watchDocker) {
|
|
430
|
+
try {
|
|
431
|
+
const { stdout } = await execa2("docker", [
|
|
432
|
+
"compose",
|
|
433
|
+
"-f",
|
|
434
|
+
DOCKER_COMPOSE_PATH,
|
|
435
|
+
"ps",
|
|
436
|
+
"--status",
|
|
437
|
+
"running",
|
|
438
|
+
"-q"
|
|
439
|
+
]);
|
|
440
|
+
if (!stdout.trim()) {
|
|
441
|
+
spinner.stop();
|
|
442
|
+
logError("Arthur Engine container has exited unexpectedly.");
|
|
443
|
+
return false;
|
|
444
|
+
}
|
|
445
|
+
} catch {
|
|
446
|
+
}
|
|
447
|
+
} else if (Date.now() - start >= 12e4) {
|
|
448
|
+
spinner.stop();
|
|
449
|
+
logError("Engine did not become ready in time.");
|
|
450
|
+
return false;
|
|
451
|
+
}
|
|
452
|
+
await new Promise((r) => setTimeout(r, 5e3));
|
|
453
|
+
const elapsed = Math.round((Date.now() - start) / 1e3);
|
|
454
|
+
const logHint = recentLogLine ? `
|
|
455
|
+
${recentLogLine}` : "";
|
|
456
|
+
spinner.text = buzzSay(`Engine starting up... (${elapsed}s elapsed)${logHint}`);
|
|
457
|
+
}
|
|
458
|
+
} finally {
|
|
459
|
+
logsProcess?.kill();
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
async function detectLocalArthurEngine() {
|
|
463
|
+
if (!fs2.existsSync(DOCKER_COMPOSE_PATH) || !fs2.existsSync(ENV_PATH)) {
|
|
464
|
+
return { status: "not-installed" };
|
|
465
|
+
}
|
|
466
|
+
const cfg = readGenaiEngineConfig();
|
|
467
|
+
const url = cfg.GENAI_ENGINE_INGRESS_URI ?? "http://localhost:3030";
|
|
468
|
+
const apiKey = readLocalAdminKey();
|
|
469
|
+
try {
|
|
470
|
+
const { stdout } = await execa2("docker", ["compose", "-f", DOCKER_COMPOSE_PATH, "ps", "--status", "running", "-q"]);
|
|
471
|
+
if (!stdout.trim()) return { status: "down", url, apiKey };
|
|
472
|
+
} catch {
|
|
473
|
+
return { status: "down", url, apiKey };
|
|
474
|
+
}
|
|
475
|
+
const client = new ArthurEngineClient(url, apiKey);
|
|
476
|
+
if (!await client.verifyConnection()) return { status: "down", url, apiKey };
|
|
477
|
+
return { status: "running", url, apiKey };
|
|
478
|
+
}
|
|
479
|
+
async function verifyAndLogin(url, apiKey) {
|
|
480
|
+
const client = new ArthurEngineClient(url, apiKey);
|
|
481
|
+
const spinner = ora2({ text: buzzSay("Verifying Arthur Engine connection..."), color: "cyan" }).start();
|
|
482
|
+
const reachable = await client.verifyConnection();
|
|
483
|
+
if (!reachable) {
|
|
484
|
+
spinner.stop();
|
|
485
|
+
logError(`Arthur Engine at ${url} is not reachable.`);
|
|
486
|
+
throw new BuzzError(`Engine at ${url} is not reachable.`);
|
|
487
|
+
}
|
|
488
|
+
spinner.text = buzzSay("Engine reachable. Verifying API key...");
|
|
489
|
+
const loggedIn = await client.login();
|
|
490
|
+
if (!loggedIn) {
|
|
491
|
+
spinner.stop();
|
|
492
|
+
logError("API key is invalid. Cannot authenticate with Arthur Engine.");
|
|
493
|
+
throw new BuzzError("Arthur Engine API key is invalid.");
|
|
494
|
+
}
|
|
495
|
+
spinner.stop();
|
|
496
|
+
logSuccess("Arthur Engine connection and authentication verified.");
|
|
497
|
+
}
|
|
498
|
+
async function handleLocalInstall(state) {
|
|
499
|
+
note(
|
|
500
|
+
`Running the Arthur GenAI Engine install script:
|
|
501
|
+
${INSTALL_SCRIPT}
|
|
502
|
+
|
|
503
|
+
This will install Docker and start the local engine.`,
|
|
504
|
+
"Installing Arthur GenAI Engine"
|
|
505
|
+
);
|
|
506
|
+
const proceed = await confirm("Ready to run the local install script?");
|
|
507
|
+
if (!proceed) {
|
|
508
|
+
throw new BuzzError("Installation cancelled by user.");
|
|
509
|
+
}
|
|
510
|
+
p.log.info(buzzSay("Initiating Arthur Engine launch sequence..."));
|
|
511
|
+
try {
|
|
512
|
+
await execa2("bash", ["-c", INSTALL_SCRIPT], { stdio: "inherit" });
|
|
513
|
+
} catch (err) {
|
|
514
|
+
logError("Install script failed.");
|
|
515
|
+
throw new BuzzError(`Install script failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
516
|
+
}
|
|
517
|
+
const cfg = readGenaiEngineConfig();
|
|
518
|
+
const url = cfg.GENAI_ENGINE_INGRESS_URI ?? "http://localhost:3030";
|
|
519
|
+
let apiKey = readLocalAdminKey();
|
|
520
|
+
if (!apiKey) {
|
|
521
|
+
logWarn("Could not find API key in engine config files.");
|
|
522
|
+
apiKey = await password("Enter your Arthur Engine admin API key:");
|
|
523
|
+
}
|
|
524
|
+
const ready = await waitForEngine(url, apiKey, { watchDocker: true });
|
|
525
|
+
if (!ready) {
|
|
526
|
+
throw new BuzzError("Arthur Engine did not start in time. Check Docker logs.");
|
|
527
|
+
}
|
|
528
|
+
writeBuzzConfig({ ARTHUR_ENGINE_URL: url, ARTHUR_API_KEY: apiKey }, state.buzzEnvPath);
|
|
529
|
+
state.engineUrl = url;
|
|
530
|
+
state.apiKey = apiKey;
|
|
531
|
+
logSuccess(`All systems nominal. Local Arthur Engine ready at ${url}`);
|
|
532
|
+
}
|
|
533
|
+
async function handleRemoteEngine(state) {
|
|
534
|
+
note(
|
|
535
|
+
"Enter the URL and API key for your remote Arthur GenAI Engine.\nExample URL: https://myengine.example.com",
|
|
536
|
+
"Remote Arthur Engine"
|
|
537
|
+
);
|
|
538
|
+
const url = await text("Arthur Engine URL:", "https://myengine.example.com");
|
|
539
|
+
const apiKey = await password("Arthur Engine API key:");
|
|
540
|
+
await verifyAndLogin(url, apiKey);
|
|
541
|
+
const taskId = await text("Arthur Engine task ID:", "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx");
|
|
542
|
+
writeBuzzConfig({ ARTHUR_ENGINE_URL: url, ARTHUR_API_KEY: apiKey, ARTHUR_TASK_ID: taskId }, state.buzzEnvPath);
|
|
543
|
+
state.engineUrl = url;
|
|
544
|
+
state.apiKey = apiKey;
|
|
545
|
+
state.taskId = taskId;
|
|
546
|
+
logSuccess(`All systems nominal. Remote Arthur Engine configured at ${url}`);
|
|
547
|
+
}
|
|
548
|
+
function isLocalEngine(url) {
|
|
549
|
+
return url.includes("localhost") || url.includes("127.0.0.1");
|
|
550
|
+
}
|
|
551
|
+
async function step2_EnsureArthurEngine(state) {
|
|
552
|
+
const buzzCfg = readBuzzConfig(state.buzzEnvPath);
|
|
553
|
+
if (buzzCfg.ARTHUR_ENGINE_URL && buzzCfg.ARTHUR_API_KEY) {
|
|
554
|
+
const useExisting = await confirm(
|
|
555
|
+
`Found Arthur Engine config at ${buzzCfg.ARTHUR_ENGINE_URL}. Use this?`
|
|
556
|
+
);
|
|
557
|
+
if (useExisting) {
|
|
558
|
+
const url = buzzCfg.ARTHUR_ENGINE_URL;
|
|
559
|
+
const apiKey = buzzCfg.ARTHUR_API_KEY;
|
|
560
|
+
const spinner = ora2({ text: buzzSay("Verifying Arthur Engine connection..."), color: "cyan" }).start();
|
|
561
|
+
const reachable = await new ArthurEngineClient(url, apiKey).verifyConnection();
|
|
562
|
+
spinner.stop();
|
|
563
|
+
if (reachable) {
|
|
564
|
+
await verifyAndLogin(url, apiKey);
|
|
565
|
+
state.engineUrl = url;
|
|
566
|
+
state.apiKey = apiKey;
|
|
567
|
+
return;
|
|
568
|
+
}
|
|
569
|
+
logWarn(`Arthur Engine at ${url} is not reachable.`);
|
|
570
|
+
if (isLocalEngine(url)) {
|
|
571
|
+
const recovery = await select(
|
|
572
|
+
"What would you like to do?",
|
|
573
|
+
[
|
|
574
|
+
{ value: "wait", label: "I'll bring it back up", hint: "Buzz will wait until the engine is ready" },
|
|
575
|
+
{ value: "install", label: "Rerun the Arthur GenAI Engine installer", hint: "Re-runs the install script" },
|
|
576
|
+
{ value: "remote", label: "Connect to a remote engine instead", hint: "Need URL + API key + task ID" }
|
|
577
|
+
]
|
|
578
|
+
);
|
|
579
|
+
if (recovery === "wait") {
|
|
580
|
+
const ready = await waitForEngine(url, apiKey, { watchDocker: true });
|
|
581
|
+
if (!ready) throw new BuzzError("Arthur Engine did not come back online in time. Check Docker logs.");
|
|
582
|
+
await verifyAndLogin(url, apiKey);
|
|
583
|
+
state.engineUrl = url;
|
|
584
|
+
state.apiKey = apiKey;
|
|
585
|
+
logSuccess(`All systems nominal. Arthur Engine back online at ${url}`);
|
|
586
|
+
return;
|
|
587
|
+
} else if (recovery === "install") {
|
|
588
|
+
await handleLocalInstall(state);
|
|
589
|
+
return;
|
|
590
|
+
} else {
|
|
591
|
+
await handleRemoteEngine(state);
|
|
592
|
+
return;
|
|
593
|
+
}
|
|
594
|
+
} else {
|
|
595
|
+
const recovery = await select(
|
|
596
|
+
"What would you like to do?",
|
|
597
|
+
[
|
|
598
|
+
{ value: "retry", label: "Check again", hint: "Try reconnecting to the same URL" },
|
|
599
|
+
{ value: "remote", label: "Connect to a different remote engine", hint: "Need URL + API key" }
|
|
600
|
+
]
|
|
601
|
+
);
|
|
602
|
+
if (recovery === "retry") {
|
|
603
|
+
await verifyAndLogin(url, apiKey);
|
|
604
|
+
state.engineUrl = url;
|
|
605
|
+
state.apiKey = apiKey;
|
|
606
|
+
return;
|
|
607
|
+
} else {
|
|
608
|
+
await handleRemoteEngine(state);
|
|
609
|
+
return;
|
|
610
|
+
}
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
}
|
|
614
|
+
const local = await detectLocalArthurEngine();
|
|
615
|
+
if (local.status === "running") {
|
|
616
|
+
const useLocal = await confirm(
|
|
617
|
+
`Detected Arthur Engine running at ${local.url}. Use this?`
|
|
618
|
+
);
|
|
619
|
+
if (useLocal) {
|
|
620
|
+
await verifyAndLogin(local.url, local.apiKey);
|
|
621
|
+
writeBuzzConfig({ ARTHUR_ENGINE_URL: local.url, ARTHUR_API_KEY: local.apiKey }, state.buzzEnvPath);
|
|
622
|
+
state.engineUrl = local.url;
|
|
623
|
+
state.apiKey = local.apiKey;
|
|
624
|
+
return;
|
|
625
|
+
}
|
|
626
|
+
} else if (local.status === "down") {
|
|
627
|
+
logWarn(`Found a local Arthur Engine installation at ${local.url} but it is not reachable.`);
|
|
628
|
+
const recovery = await select(
|
|
629
|
+
"What would you like to do?",
|
|
630
|
+
[
|
|
631
|
+
{ value: "restart", label: "I'll bring it back up", hint: "Buzz will wait until the engine is ready" },
|
|
632
|
+
{ value: "install", label: "Run a fresh install", hint: "Re-runs the install script" },
|
|
633
|
+
{ value: "remote", label: "Connect to a remote engine instead", hint: "Need URL + API key + task ID" }
|
|
634
|
+
]
|
|
635
|
+
);
|
|
636
|
+
if (recovery === "restart") {
|
|
637
|
+
const ready = await waitForEngine(local.url, local.apiKey, { watchDocker: true });
|
|
638
|
+
if (!ready) {
|
|
639
|
+
throw new BuzzError("Arthur Engine did not come back online in time. Check Docker logs.");
|
|
640
|
+
}
|
|
641
|
+
await verifyAndLogin(local.url, local.apiKey);
|
|
642
|
+
writeBuzzConfig({ ARTHUR_ENGINE_URL: local.url, ARTHUR_API_KEY: local.apiKey }, state.buzzEnvPath);
|
|
643
|
+
state.engineUrl = local.url;
|
|
644
|
+
state.apiKey = local.apiKey;
|
|
645
|
+
logSuccess(`All systems nominal. Arthur Engine back online at ${local.url}`);
|
|
646
|
+
return;
|
|
647
|
+
} else if (recovery === "install") {
|
|
648
|
+
await handleLocalInstall(state);
|
|
649
|
+
return;
|
|
650
|
+
} else {
|
|
651
|
+
await handleRemoteEngine(state);
|
|
652
|
+
return;
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
if (process.platform !== "darwin") {
|
|
656
|
+
logError("This mission currently requires Mac OS hardware. Returning to base.");
|
|
657
|
+
note(
|
|
658
|
+
'Arthur Engine local installation is currently Mac-only.\nFor a remote engine, re-run Buzz and choose the "Remote" option.',
|
|
659
|
+
"Mac only"
|
|
660
|
+
);
|
|
661
|
+
throw new BuzzError("Local Arthur Engine installation requires macOS.", true);
|
|
662
|
+
}
|
|
663
|
+
const choice = await select(
|
|
664
|
+
"Where should we connect Arthur GenAI Engine?",
|
|
665
|
+
[
|
|
666
|
+
{ value: "local", label: "Install on this machine (Mac)", hint: "Requires Docker" },
|
|
667
|
+
{ value: "remote", label: "Connect to a remote deployment", hint: "Need URL + API key" }
|
|
668
|
+
]
|
|
669
|
+
);
|
|
670
|
+
if (choice === "local") {
|
|
671
|
+
await handleLocalInstall(state);
|
|
672
|
+
} else {
|
|
673
|
+
await handleRemoteEngine(state);
|
|
674
|
+
}
|
|
675
|
+
const client = new ArthurEngineClient(state.engineUrl, state.apiKey);
|
|
676
|
+
const loggedIn = await client.login();
|
|
677
|
+
if (!loggedIn) {
|
|
678
|
+
throw new BuzzError("Failed to authenticate with Arthur GenAI Engine.");
|
|
679
|
+
}
|
|
680
|
+
logSuccess("Login verified. Arthur Engine is go.");
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
// src/workflow/steps/03-task.ts
|
|
684
|
+
import ora3 from "ora";
|
|
685
|
+
async function step3_EnsureTaskId(state) {
|
|
686
|
+
const client = new ArthurEngineClient(state.engineUrl, state.apiKey);
|
|
687
|
+
const buzzCfg = readBuzzConfig(state.buzzEnvPath);
|
|
688
|
+
if (buzzCfg.ARTHUR_TASK_ID) {
|
|
689
|
+
const taskId = buzzCfg.ARTHUR_TASK_ID;
|
|
690
|
+
note(`Existing task ID found:
|
|
691
|
+
${taskId}`, "Saved task ID");
|
|
692
|
+
const confirmed = await confirm(`Is ${taskId} the correct task for this application?`);
|
|
693
|
+
if (confirmed) {
|
|
694
|
+
state.taskId = taskId;
|
|
695
|
+
logSuccess(`Task ID confirmed: ${taskId}`);
|
|
696
|
+
return;
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
const spinner = ora3({ text: buzzSay("Retrieving active tasks from Arthur Engine..."), color: "cyan" }).start();
|
|
700
|
+
let tasks = [];
|
|
701
|
+
try {
|
|
702
|
+
tasks = await client.getTasks();
|
|
703
|
+
spinner.stop();
|
|
704
|
+
logSuccess(`Found ${tasks.length} active task(s).`);
|
|
705
|
+
} catch (err) {
|
|
706
|
+
spinner.stop();
|
|
707
|
+
logError(`Failed to retrieve tasks: ${err instanceof Error ? err.message : String(err)}`);
|
|
708
|
+
throw new BuzzError("Could not retrieve tasks from Arthur Engine.");
|
|
709
|
+
}
|
|
710
|
+
if (tasks.length > 0) {
|
|
711
|
+
const options = tasks.map((t) => ({
|
|
712
|
+
value: t.id,
|
|
713
|
+
label: t.name,
|
|
714
|
+
hint: t.id
|
|
715
|
+
}));
|
|
716
|
+
options.push({ value: "__new__", label: "+ Create a new task", hint: "Give your application a name" });
|
|
717
|
+
const selected = await select("Which Arthur task should we use for this application?", options);
|
|
718
|
+
if (selected !== "__new__") {
|
|
719
|
+
writeBuzzConfig({ ARTHUR_TASK_ID: selected }, state.buzzEnvPath);
|
|
720
|
+
state.taskId = selected;
|
|
721
|
+
logSuccess(`Task selected: ${selected}`);
|
|
722
|
+
return;
|
|
723
|
+
}
|
|
724
|
+
} else {
|
|
725
|
+
p.log.info(buzzSay("No active tasks found. Let's create one."));
|
|
726
|
+
}
|
|
727
|
+
const taskName = await text(
|
|
728
|
+
'What should we call this task? (e.g., "My Customer Support Bot"):',
|
|
729
|
+
"My Agentic Application"
|
|
730
|
+
);
|
|
731
|
+
const createSpinner = ora3({ text: buzzSay(`Creating task "${taskName}"...`), color: "cyan" }).start();
|
|
732
|
+
try {
|
|
733
|
+
const newTask = await client.createTask(taskName);
|
|
734
|
+
createSpinner.stop();
|
|
735
|
+
logSuccess(`Task created: "${newTask.name}" (${newTask.id})`);
|
|
736
|
+
writeBuzzConfig({ ARTHUR_TASK_ID: newTask.id }, state.buzzEnvPath);
|
|
737
|
+
state.taskId = newTask.id;
|
|
738
|
+
note(`Task ID: ${newTask.id}
|
|
739
|
+
This has been saved to ${state.buzzEnvPath}`, "New task created");
|
|
740
|
+
} catch (err) {
|
|
741
|
+
createSpinner.stop();
|
|
742
|
+
logError(`Failed to create task: ${err instanceof Error ? err.message : String(err)}`);
|
|
743
|
+
throw new BuzzError("Could not create a task in Arthur Engine.");
|
|
744
|
+
}
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
// src/claude-code/sdk.ts
|
|
748
|
+
import { query } from "@anthropic-ai/claude-agent-sdk";
|
|
749
|
+
import ora4 from "ora";
|
|
750
|
+
import chalk from "chalk";
|
|
751
|
+
var SYSTEM_PROMPTS = {
|
|
752
|
+
"python-arthur-sdk": "You are an expert Python developer instrumenting AI/LLM applications with OpenTelemetry-based observability. You write clean, idiomatic Python code.",
|
|
753
|
+
"mastra-arthur-exporter": "You are an expert TypeScript developer instrumenting Mastra AI agent applications with OpenTelemetry tracing. You write clean, idiomatic TypeScript.",
|
|
754
|
+
openinference: "You are an expert developer instrumenting AI agent applications with OpenTelemetry and OpenInference. You adapt to the framework in use (Python or TypeScript)."
|
|
755
|
+
};
|
|
756
|
+
function buildPrompt(req) {
|
|
757
|
+
const base = `
|
|
758
|
+
Arthur Engine URL: ${req.arthurEngineUrl}
|
|
759
|
+
Arthur Task ID: ${req.taskId}
|
|
760
|
+
|
|
761
|
+
IMPORTANT RULES:
|
|
762
|
+
- Do NOT hardcode API keys in source code. Always read from environment variables (ARTHUR_API_KEY).
|
|
763
|
+
- Add these entries to .env (create if it does not exist; ensure .env is in .gitignore):
|
|
764
|
+
ARTHUR_API_KEY=$ARTHUR_API_KEY
|
|
765
|
+
ARTHUR_BASE_URL=${req.arthurEngineUrl}
|
|
766
|
+
ARTHUR_TASK_ID=${req.taskId}
|
|
767
|
+
- Also add placeholder entries to .env.example:
|
|
768
|
+
ARTHUR_API_KEY=your-api-key-here
|
|
769
|
+
ARTHUR_BASE_URL=${req.arthurEngineUrl}
|
|
770
|
+
ARTHUR_TASK_ID=${req.taskId}
|
|
771
|
+
- Make the smallest possible changes \u2014 instrument, don't refactor.
|
|
772
|
+
- The task is complete only when:
|
|
773
|
+
1. All dependencies are installed (pip install or npm install).
|
|
774
|
+
2. An import/syntax check passes (python -c "import <module>" or tsc --noEmit).
|
|
775
|
+
3. The existing test suite runs (if one exists). Fix any NEW test failures you introduced.
|
|
776
|
+
4. You print a final JSON result block on the last line of your output.
|
|
777
|
+
|
|
778
|
+
FINAL OUTPUT FORMAT (print this exact JSON on the last line):
|
|
779
|
+
{"success":true,"testsPassed":true,"summary":"<one sentence>"}
|
|
780
|
+
or
|
|
781
|
+
{"success":false,"testsPassed":false,"summary":"<what went wrong>"}
|
|
782
|
+
`;
|
|
783
|
+
const typePrompts = {
|
|
784
|
+
"python-arthur-sdk": `
|
|
785
|
+
Instrument this Python agentic application with the Arthur Python Observability SDK.
|
|
786
|
+
|
|
787
|
+
Reference SDK: https://github.com/arthur-ai/arthur-engine/tree/main/arthur-observability-sdk
|
|
788
|
+
|
|
789
|
+
Plan ultrathink \u2014 carefully examine all files first, then implement:
|
|
790
|
+
|
|
791
|
+
STEP 1 \u2014 ANALYSIS:
|
|
792
|
+
- List all files to understand the project structure
|
|
793
|
+
- Read requirements.txt / pyproject.toml to see current dependencies and note the package manager (uv/pip/poetry)
|
|
794
|
+
- Find the application entry point (main.py, app.py, __main__.py, or similar)
|
|
795
|
+
- Identify the LLM framework used (openai, langchain, anthropic, crewai, etc.)
|
|
796
|
+
- Check if arthur_observability_sdk is already installed (skip if yes)
|
|
797
|
+
|
|
798
|
+
STEP 2 \u2014 IMPLEMENTATION (only if not already instrumented):
|
|
799
|
+
|
|
800
|
+
PART A \u2014 SDK SETUP (always required):
|
|
801
|
+
- Add "arthur-observability-sdk[<framework>]" to requirements.txt / pyproject.toml
|
|
802
|
+
where <framework> matches the detected LLM framework (e.g. langchain, openai, anthropic, crewai)
|
|
803
|
+
- In the entry point, add:
|
|
804
|
+
from arthur_observability_sdk import Arthur
|
|
805
|
+
import os
|
|
806
|
+
task_id = os.environ.get("ARTHUR_TASK_ID", "${req.taskId}")
|
|
807
|
+
arthur = Arthur(
|
|
808
|
+
api_key=os.environ.get("ARTHUR_API_KEY"),
|
|
809
|
+
base_url=os.environ.get("ARTHUR_BASE_URL", "${req.arthurEngineUrl}"),
|
|
810
|
+
task_id=task_id,
|
|
811
|
+
service_name="<app-name>",
|
|
812
|
+
resource_attributes={"arthur.task": task_id},
|
|
813
|
+
)
|
|
814
|
+
- Call arthur.instrument_<framework>() to auto-instrument all LLM API calls
|
|
815
|
+
- Add to .env (create if needed, ensure .env is in .gitignore):
|
|
816
|
+
ARTHUR_API_KEY=$ARTHUR_API_KEY
|
|
817
|
+
- Add to .env.example: ARTHUR_API_KEY=your-api-key-here
|
|
818
|
+
|
|
819
|
+
PART B \u2014 ROOT SPAN + SESSION ID (CRITICAL \u2014 without this, each LLM call is a SEPARATE trace):
|
|
820
|
+
The auto-instrumentor creates one span per LLM call. Without a parent span to connect them,
|
|
821
|
+
one user interaction emits multiple disconnected traces. A root CHAIN span fixes this.
|
|
822
|
+
|
|
823
|
+
Add these imports near the top of the file where arthur is initialised (or in a shared module):
|
|
824
|
+
from opentelemetry import trace
|
|
825
|
+
from openinference.semconv.trace import SpanAttributes, OpenInferenceSpanKindValues
|
|
826
|
+
import json, uuid
|
|
827
|
+
tracer = trace.get_tracer(__name__)
|
|
828
|
+
|
|
829
|
+
Find the main request handler \u2014 the function/method that processes one complete user message
|
|
830
|
+
end-to-end (e.g. a Gradio/FastAPI/Flask chat handler, a process() / answer() / run() method).
|
|
831
|
+
|
|
832
|
+
Wrap its body with a root CHAIN span AND a session context manager:
|
|
833
|
+
# Derive session_id from app state: look for existing session key, conversation ID,
|
|
834
|
+
# user ID, or generate one: str(uuid.uuid4())
|
|
835
|
+
session_id = <session_id>
|
|
836
|
+
with arthur.session(session_id):
|
|
837
|
+
with tracer.start_as_current_span("<handler_name>") as root_span:
|
|
838
|
+
root_span.set_attribute(SpanAttributes.OPENINFERENCE_SPAN_KIND,
|
|
839
|
+
OpenInferenceSpanKindValues.CHAIN.value)
|
|
840
|
+
root_span.set_attribute(SpanAttributes.INPUT_VALUE, <user_input_message>)
|
|
841
|
+
# all existing processing code runs here; every sub-span becomes a child
|
|
842
|
+
root_span.set_attribute(SpanAttributes.OUTPUT_VALUE, <final_response_text>)
|
|
843
|
+
|
|
844
|
+
IMPORTANT \u2014 streaming / generator handlers: Python async frameworks (Gradio, FastAPI, etc.)
|
|
845
|
+
may resume a generator in a different async task or thread, which resets the OTel context
|
|
846
|
+
between iterations. A context-manager-based span (with tracer.start_as_current_span) WILL
|
|
847
|
+
lose its parent context across yield points. Use explicit attach/detach instead:
|
|
848
|
+
|
|
849
|
+
from opentelemetry import context as otel_ctx
|
|
850
|
+
from opentelemetry.trace import set_span_in_context
|
|
851
|
+
from opentelemetry.context import set_value as otel_set_value
|
|
852
|
+
|
|
853
|
+
def handler(message, ...):
|
|
854
|
+
yield <initial_update> # yield BEFORE creating span (fine)
|
|
855
|
+
|
|
856
|
+
root_span = tracer.start_span("handler_name")
|
|
857
|
+
root_span.set_attribute(SpanAttributes.OPENINFERENCE_SPAN_KIND,
|
|
858
|
+
OpenInferenceSpanKindValues.CHAIN.value)
|
|
859
|
+
root_span.set_attribute(SpanAttributes.INPUT_VALUE, message)
|
|
860
|
+
|
|
861
|
+
span_ctx = set_span_in_context(
|
|
862
|
+
root_span,
|
|
863
|
+
otel_set_value(SpanAttributes.SESSION_ID, session_id),
|
|
864
|
+
)
|
|
865
|
+
token = otel_ctx.attach(span_ctx)
|
|
866
|
+
try:
|
|
867
|
+
for chunk in <inner_generator>:
|
|
868
|
+
<process chunk>
|
|
869
|
+
otel_ctx.detach(token)
|
|
870
|
+
yield <chunk_to_caller>
|
|
871
|
+
token = otel_ctx.attach(span_ctx) # re-attach after each yield
|
|
872
|
+
root_span.set_attribute(SpanAttributes.OUTPUT_VALUE, <final_response>)
|
|
873
|
+
finally:
|
|
874
|
+
root_span.end()
|
|
875
|
+
otel_ctx.detach(token)
|
|
876
|
+
|
|
877
|
+
PART C \u2014 TOOL SPANS (for LLM tool-calling patterns):
|
|
878
|
+
If the application uses LLM tool calling \u2014 i.e. the LLM outputs tool-call parameters and
|
|
879
|
+
then Python code executes the actual function \u2014 wrap each tool execution with a TOOL span:
|
|
880
|
+
with tracer.start_as_current_span("<tool_name>") as tool_span:
|
|
881
|
+
tool_span.set_attribute(SpanAttributes.OPENINFERENCE_SPAN_KIND,
|
|
882
|
+
OpenInferenceSpanKindValues.TOOL.value)
|
|
883
|
+
tool_span.set_attribute(SpanAttributes.TOOL_NAME, "<tool_name>")
|
|
884
|
+
tool_span.set_attribute(SpanAttributes.INPUT_VALUE, json.dumps(<tool_input_params>))
|
|
885
|
+
result = <execute_tool(tool_input_params)>
|
|
886
|
+
tool_span.set_attribute(SpanAttributes.OUTPUT_VALUE,
|
|
887
|
+
json.dumps(result) if not isinstance(result, str) else result)
|
|
888
|
+
|
|
889
|
+
PART D \u2014 RETRIEVAL SPANS (for RAG / search patterns):
|
|
890
|
+
If the application performs retrieval (vector search, semantic search, full-text search,
|
|
891
|
+
document lookup), wrap the retrieval call with a RETRIEVER span. You MUST set BOTH:
|
|
892
|
+
(a) per-document attributes so Arthur can index individual docs, AND
|
|
893
|
+
(b) output.value as a JSON list so Arthur Engine displays the retrieved context \u2014 without
|
|
894
|
+
output.value the retrieval output panel will appear empty in the UI.
|
|
895
|
+
|
|
896
|
+
with tracer.start_as_current_span("retrieval") as ret_span:
|
|
897
|
+
ret_span.set_attribute(SpanAttributes.OPENINFERENCE_SPAN_KIND,
|
|
898
|
+
OpenInferenceSpanKindValues.RETRIEVER.value)
|
|
899
|
+
ret_span.set_attribute(SpanAttributes.INPUT_VALUE, <search_query_string>)
|
|
900
|
+
docs = <execute_retrieval(search_query)>
|
|
901
|
+
retrieved = []
|
|
902
|
+
for i, doc in enumerate(docs):
|
|
903
|
+
doc_text = <doc_text_content>
|
|
904
|
+
ret_span.set_attribute(f"retrieval.documents.{i}.document.content", doc_text)
|
|
905
|
+
entry = {"document_content": doc_text}
|
|
906
|
+
# Include score if the retrieval API returns one (float 0\u20131 preferred)
|
|
907
|
+
if <score_available>:
|
|
908
|
+
score = float(<doc_score>)
|
|
909
|
+
ret_span.set_attribute(f"retrieval.documents.{i}.document.score", score)
|
|
910
|
+
entry["score"] = score
|
|
911
|
+
retrieved.append(entry)
|
|
912
|
+
# REQUIRED: set output.value so the retrieval output is visible in Arthur Engine
|
|
913
|
+
ret_span.set_attribute(SpanAttributes.OUTPUT_VALUE, json.dumps(retrieved))
|
|
914
|
+
|
|
915
|
+
STEP 3 \u2014 VALIDATION:
|
|
916
|
+
- Run: pip install 'arthur-observability-sdk[<framework>]'
|
|
917
|
+
(or: uv sync if using uv)
|
|
918
|
+
- Run: python -c "from arthur_observability_sdk import Arthur; print('import OK')"
|
|
919
|
+
- Run the existing test suite if present (pytest, python -m pytest, or similar)
|
|
920
|
+
- Fix any new test failures you introduced
|
|
921
|
+
- Print the final JSON result
|
|
922
|
+
|
|
923
|
+
${base}`,
|
|
924
|
+
"mastra-arthur-exporter": `
|
|
925
|
+
Instrument this Mastra TypeScript application with the Arthur observability exporter.
|
|
926
|
+
|
|
927
|
+
Reference: https://mastra.ai/docs/observability/tracing/exporters/arthur
|
|
928
|
+
|
|
929
|
+
Plan ultrathink \u2014 carefully examine all files first, then implement:
|
|
930
|
+
|
|
931
|
+
STEP 1 \u2014 ANALYSIS:
|
|
932
|
+
- List all files to understand the project structure
|
|
933
|
+
- Read package.json to see current dependencies
|
|
934
|
+
- Find the Mastra instance initialization file (usually src/mastra/index.ts)
|
|
935
|
+
- Check if @mastra/arthur is already installed and ArthurExporter is already registered (skip if yes)
|
|
936
|
+
|
|
937
|
+
STEP 2 \u2014 IMPLEMENTATION (only if not already instrumented):
|
|
938
|
+
Install the published Arthur exporter package:
|
|
939
|
+
npm install @mastra/arthur
|
|
940
|
+
|
|
941
|
+
Import and register in the Mastra instance initialization file:
|
|
942
|
+
import { Mastra } from '@mastra/core'
|
|
943
|
+
import { Observability } from '@mastra/observability'
|
|
944
|
+
import { ArthurExporter } from '@mastra/arthur'
|
|
945
|
+
|
|
946
|
+
export const mastra = new Mastra({
|
|
947
|
+
observability: new Observability({
|
|
948
|
+
configs: {
|
|
949
|
+
arthur: {
|
|
950
|
+
serviceName: '<app-name>',
|
|
951
|
+
exporters: [new ArthurExporter()],
|
|
952
|
+
},
|
|
953
|
+
},
|
|
954
|
+
}),
|
|
955
|
+
})
|
|
956
|
+
|
|
957
|
+
The ArthurExporter reads these env vars automatically (no constructor args needed):
|
|
958
|
+
ARTHUR_API_KEY \u2014 required
|
|
959
|
+
ARTHUR_BASE_URL \u2014 required (set to ${req.arthurEngineUrl})
|
|
960
|
+
ARTHUR_TASK_ID \u2014 optional (set to ${req.taskId})
|
|
961
|
+
|
|
962
|
+
Add to .env (create if needed, ensure .env is in .gitignore):
|
|
963
|
+
ARTHUR_BASE_URL=${req.arthurEngineUrl}
|
|
964
|
+
ARTHUR_API_KEY=$ARTHUR_API_KEY
|
|
965
|
+
ARTHUR_TASK_ID=${req.taskId}
|
|
966
|
+
Add to .env.example:
|
|
967
|
+
ARTHUR_BASE_URL=${req.arthurEngineUrl}
|
|
968
|
+
ARTHUR_API_KEY=your-api-key-here
|
|
969
|
+
ARTHUR_TASK_ID=${req.taskId}
|
|
970
|
+
|
|
971
|
+
STEP 3 \u2014 VALIDATION:
|
|
972
|
+
- Run: npm install (or yarn install / pnpm install)
|
|
973
|
+
- Run: npx tsc --noEmit
|
|
974
|
+
- Run the existing test suite if present (npm test, vitest run, or similar)
|
|
975
|
+
- Fix any new test failures you introduced
|
|
976
|
+
- Print the final JSON result
|
|
977
|
+
|
|
978
|
+
${base}`,
|
|
979
|
+
openinference: `
|
|
980
|
+
Instrument this agentic application with OpenInference / OpenTelemetry for Arthur GenAI Engine.
|
|
981
|
+
|
|
982
|
+
Reference examples: https://github.com/arthur-ai/arthur-engine/tree/dev/genai-engine/examples/agents
|
|
983
|
+
|
|
984
|
+
Plan ultrathink \u2014 carefully examine all files first, then implement:
|
|
985
|
+
|
|
986
|
+
STEP 1 \u2014 ANALYSIS:
|
|
987
|
+
- List all files to understand project structure and language
|
|
988
|
+
- Read dependency manifests (requirements.txt, package.json, pyproject.toml)
|
|
989
|
+
- Find the entry point and identify the LLM framework (LangChain, OpenAI, CrewAI, etc.)
|
|
990
|
+
- Check if OpenInference / OpenTelemetry is already configured (skip if yes)
|
|
991
|
+
|
|
992
|
+
STEP 2 \u2014 IMPLEMENTATION (only if not already instrumented):
|
|
993
|
+
|
|
994
|
+
For Python:
|
|
995
|
+
PART A \u2014 OTel + framework instrumentor setup:
|
|
996
|
+
Add to requirements.txt:
|
|
997
|
+
opentelemetry-sdk
|
|
998
|
+
opentelemetry-exporter-otlp-proto-http
|
|
999
|
+
openinference-instrumentation-<framework> (e.g., openinference-instrumentation-langchain)
|
|
1000
|
+
openinference-semantic-conventions
|
|
1001
|
+
|
|
1002
|
+
In the entry point:
|
|
1003
|
+
from opentelemetry import trace
|
|
1004
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
1005
|
+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
1006
|
+
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
|
1007
|
+
from openinference.instrumentation.<framework> import <Framework>Instrumentor
|
|
1008
|
+
from openinference.semconv.trace import SpanAttributes, OpenInferenceSpanKindValues
|
|
1009
|
+
import os, json, uuid
|
|
1010
|
+
|
|
1011
|
+
provider = TracerProvider()
|
|
1012
|
+
exporter = OTLPSpanExporter(
|
|
1013
|
+
endpoint="${req.arthurEngineUrl}/api/v1/traces",
|
|
1014
|
+
headers={"Authorization": f"Bearer {os.environ.get('ARTHUR_API_KEY', '')}"},
|
|
1015
|
+
)
|
|
1016
|
+
provider.add_span_processor(BatchSpanProcessor(exporter))
|
|
1017
|
+
trace.set_tracer_provider(provider)
|
|
1018
|
+
<Framework>Instrumentor().instrument()
|
|
1019
|
+
tracer = trace.get_tracer(__name__)
|
|
1020
|
+
|
|
1021
|
+
PART B \u2014 ROOT SPAN + SESSION ID (CRITICAL \u2014 without this, each LLM call is a SEPARATE trace):
|
|
1022
|
+
The auto-instrumentor creates one span per LLM call; without a parent span every call
|
|
1023
|
+
becomes its own trace. A root CHAIN span fixes this.
|
|
1024
|
+
|
|
1025
|
+
Find the main request handler \u2014 the function/method that processes one complete user
|
|
1026
|
+
message end-to-end (e.g. a Gradio/FastAPI/Flask chat handler, a process() / answer() /
|
|
1027
|
+
run() / invoke() method).
|
|
1028
|
+
|
|
1029
|
+
Wrap its body with a root CHAIN span AND a session context manager. Use
|
|
1030
|
+
using_session from openinference.instrumentation:
|
|
1031
|
+
from openinference.instrumentation import using_session
|
|
1032
|
+
|
|
1033
|
+
session_id = <get from app state: session key, conversation ID, or str(uuid.uuid4())>
|
|
1034
|
+
with using_session(session_id=session_id):
|
|
1035
|
+
with tracer.start_as_current_span("<handler_name>") as root_span:
|
|
1036
|
+
root_span.set_attribute(SpanAttributes.OPENINFERENCE_SPAN_KIND,
|
|
1037
|
+
OpenInferenceSpanKindValues.CHAIN.value)
|
|
1038
|
+
root_span.set_attribute(SpanAttributes.INPUT_VALUE, <user_input_message>)
|
|
1039
|
+
# all existing processing code runs here
|
|
1040
|
+
root_span.set_attribute(SpanAttributes.OUTPUT_VALUE, <final_response_text>)
|
|
1041
|
+
|
|
1042
|
+
If the handler is a streaming/generator function (uses yield), the session + root span
|
|
1043
|
+
must wrap the generator CONSUMPTION in the caller so spans stay open during streaming.
|
|
1044
|
+
|
|
1045
|
+
PART C \u2014 TOOL SPANS (for LLM tool-calling patterns):
|
|
1046
|
+
If the application uses LLM tool calling (the LLM outputs tool-call params and then Python
|
|
1047
|
+
code executes the actual function), wrap each tool execution:
|
|
1048
|
+
with tracer.start_as_current_span("<tool_name>") as tool_span:
|
|
1049
|
+
tool_span.set_attribute(SpanAttributes.OPENINFERENCE_SPAN_KIND,
|
|
1050
|
+
OpenInferenceSpanKindValues.TOOL.value)
|
|
1051
|
+
tool_span.set_attribute(SpanAttributes.TOOL_NAME, "<tool_name>")
|
|
1052
|
+
tool_span.set_attribute(SpanAttributes.INPUT_VALUE, json.dumps(<tool_input_params>))
|
|
1053
|
+
result = <execute_tool(tool_input_params)>
|
|
1054
|
+
tool_span.set_attribute(SpanAttributes.OUTPUT_VALUE,
|
|
1055
|
+
json.dumps(result) if not isinstance(result, str) else result)
|
|
1056
|
+
|
|
1057
|
+
PART D \u2014 RETRIEVAL SPANS (for RAG / search patterns):
|
|
1058
|
+
If the application performs retrieval (vector search, semantic search, document lookup),
|
|
1059
|
+
wrap the retrieval call with a RETRIEVER span. You MUST set BOTH per-document attributes
|
|
1060
|
+
AND output.value as a JSON list \u2014 without output.value, Arthur Engine shows empty output:
|
|
1061
|
+
with tracer.start_as_current_span("retrieval") as ret_span:
|
|
1062
|
+
ret_span.set_attribute(SpanAttributes.OPENINFERENCE_SPAN_KIND,
|
|
1063
|
+
OpenInferenceSpanKindValues.RETRIEVER.value)
|
|
1064
|
+
ret_span.set_attribute(SpanAttributes.INPUT_VALUE, <search_query_string>)
|
|
1065
|
+
docs = <execute_retrieval(search_query)>
|
|
1066
|
+
retrieved = []
|
|
1067
|
+
for i, doc in enumerate(docs):
|
|
1068
|
+
doc_text = <doc_text_content>
|
|
1069
|
+
ret_span.set_attribute(f"retrieval.documents.{i}.document.content", doc_text)
|
|
1070
|
+
entry = {"document_content": doc_text}
|
|
1071
|
+
if <score_available>:
|
|
1072
|
+
score = float(<doc_score>)
|
|
1073
|
+
ret_span.set_attribute(f"retrieval.documents.{i}.document.score", score)
|
|
1074
|
+
entry["score"] = score
|
|
1075
|
+
retrieved.append(entry)
|
|
1076
|
+
# REQUIRED: set output.value so retrieved docs are visible in Arthur Engine
|
|
1077
|
+
ret_span.set_attribute(SpanAttributes.OUTPUT_VALUE, json.dumps(retrieved))
|
|
1078
|
+
|
|
1079
|
+
For TypeScript/JavaScript:
|
|
1080
|
+
Follow the pattern from the customer-support-agent example, creating an OTLP exporter
|
|
1081
|
+
pointing to ${req.arthurEngineUrl}/api/v1/traces with Bearer auth.
|
|
1082
|
+
Also add a root span around the request handler (same concept as Python PART B above).
|
|
1083
|
+
|
|
1084
|
+
Add to .env (create if needed, ensure .env is in .gitignore):
|
|
1085
|
+
ARTHUR_BASE_URL=${req.arthurEngineUrl}
|
|
1086
|
+
ARTHUR_API_KEY=$ARTHUR_API_KEY
|
|
1087
|
+
ARTHUR_TASK_ID=${req.taskId}
|
|
1088
|
+
Add to .env.example:
|
|
1089
|
+
ARTHUR_BASE_URL=${req.arthurEngineUrl}
|
|
1090
|
+
ARTHUR_API_KEY=your-api-key-here
|
|
1091
|
+
ARTHUR_TASK_ID=${req.taskId}
|
|
1092
|
+
|
|
1093
|
+
STEP 3 \u2014 VALIDATION:
|
|
1094
|
+
- Install new dependencies
|
|
1095
|
+
- Run an import/syntax check
|
|
1096
|
+
- Run the existing test suite if present and fix any new failures
|
|
1097
|
+
- Print the final JSON result
|
|
1098
|
+
|
|
1099
|
+
${base}`
|
|
1100
|
+
};
|
|
1101
|
+
return typePrompts[req.type];
|
|
1102
|
+
}
|
|
1103
|
+
function parseResult(text2) {
|
|
1104
|
+
const lines = text2.split("\n").reverse();
|
|
1105
|
+
for (const line of lines) {
|
|
1106
|
+
const trimmed = line.trim();
|
|
1107
|
+
if (trimmed.startsWith("{") && trimmed.includes('"success"')) {
|
|
1108
|
+
try {
|
|
1109
|
+
return JSON.parse(trimmed);
|
|
1110
|
+
} catch {
|
|
1111
|
+
}
|
|
1112
|
+
}
|
|
1113
|
+
}
|
|
1114
|
+
const failed = /error|fail|exception|traceback/i.test(text2.slice(-500));
|
|
1115
|
+
return {
|
|
1116
|
+
success: !failed,
|
|
1117
|
+
testsPassed: !failed,
|
|
1118
|
+
summary: failed ? "Instrumentation may have issues (no structured result returned)" : "Instrumentation applied"
|
|
1119
|
+
};
|
|
1120
|
+
}
|
|
1121
|
+
async function instrumentCodeWithClaude(req) {
|
|
1122
|
+
const spinner = ora4({ text: buzzSay("Analyzing codebase and applying instrumentation..."), color: "cyan" }).start();
|
|
1123
|
+
const onProgress = (msg) => {
|
|
1124
|
+
const clean = msg.replace(/\x1B\[[0-9;]*m/g, "").trim();
|
|
1125
|
+
if (clean) {
|
|
1126
|
+
spinner.clear();
|
|
1127
|
+
process.stdout.write(chalk.dim(" \u203A ") + clean + "\n");
|
|
1128
|
+
}
|
|
1129
|
+
};
|
|
1130
|
+
let fullOutput = "";
|
|
1131
|
+
let finalResult = null;
|
|
1132
|
+
try {
|
|
1133
|
+
const stream = query({
|
|
1134
|
+
prompt: buildPrompt(req),
|
|
1135
|
+
options: {
|
|
1136
|
+
cwd: req.repoPath,
|
|
1137
|
+
// Full tool access so Claude can run tests, install deps, fix failures
|
|
1138
|
+
allowedTools: ["Read", "Glob", "Grep", "Edit", "Write", "Bash"],
|
|
1139
|
+
permissionMode: "acceptEdits",
|
|
1140
|
+
systemPrompt: SYSTEM_PROMPTS[req.type],
|
|
1141
|
+
env: {
|
|
1142
|
+
...process.env,
|
|
1143
|
+
ARTHUR_API_KEY: req.apiKey,
|
|
1144
|
+
ARTHUR_BASE_URL: req.arthurEngineUrl,
|
|
1145
|
+
ARTHUR_TASK_ID: req.taskId
|
|
1146
|
+
}
|
|
1147
|
+
}
|
|
1148
|
+
});
|
|
1149
|
+
for await (const message of stream) {
|
|
1150
|
+
if (message.type === "assistant") {
|
|
1151
|
+
const content = message.message?.content ?? [];
|
|
1152
|
+
for (const block of content) {
|
|
1153
|
+
if (block.type === "text" && block.text) {
|
|
1154
|
+
fullOutput += block.text;
|
|
1155
|
+
const lastLine = block.text.split("\n").filter(Boolean).at(-1);
|
|
1156
|
+
if (lastLine) {
|
|
1157
|
+
onProgress(lastLine);
|
|
1158
|
+
}
|
|
1159
|
+
}
|
|
1160
|
+
}
|
|
1161
|
+
} else if (message.type === "result") {
|
|
1162
|
+
const resultMsg = message;
|
|
1163
|
+
finalResult = parseResult(fullOutput + "\n" + (resultMsg.result ?? ""));
|
|
1164
|
+
if (resultMsg.subtype === "success") {
|
|
1165
|
+
spinner.succeed(buzzSay("Instrumentation task completed."));
|
|
1166
|
+
} else {
|
|
1167
|
+
spinner.fail(buzzSay(`Task ended: ${resultMsg.subtype}`));
|
|
1168
|
+
if (!finalResult.success) {
|
|
1169
|
+
finalResult.success = false;
|
|
1170
|
+
}
|
|
1171
|
+
}
|
|
1172
|
+
}
|
|
1173
|
+
}
|
|
1174
|
+
} catch (err) {
|
|
1175
|
+
spinner.fail(buzzSay("Instrumentation failed."));
|
|
1176
|
+
const errMsg = err instanceof Error ? err.message : String(err);
|
|
1177
|
+
return { success: false, testsPassed: false, summary: errMsg };
|
|
1178
|
+
}
|
|
1179
|
+
return finalResult ?? parseResult(fullOutput);
|
|
1180
|
+
}
|
|
1181
|
+
|
|
1182
|
+
// src/workflow/steps/04-python.ts
|
|
1183
|
+
async function step4_InstrumentPython(state) {
|
|
1184
|
+
const analysis = state.analysis;
|
|
1185
|
+
if (analysis.language !== "python") {
|
|
1186
|
+
p.log.info(buzzSay(`Repository detected as ${analysis.language} \u2014 skipping Python instrumentation.`));
|
|
1187
|
+
return false;
|
|
1188
|
+
}
|
|
1189
|
+
logSuccess(`Python application detected. Framework: ${analysis.framework ?? "unknown"}`);
|
|
1190
|
+
if (analysis.isInstrumented && analysis.instrumentationType === "arthur-sdk") {
|
|
1191
|
+
logSuccess("Arthur Python Observability SDK is already instrumented. All systems go.");
|
|
1192
|
+
return true;
|
|
1193
|
+
}
|
|
1194
|
+
if (analysis.isInstrumented) {
|
|
1195
|
+
logSuccess(`Application is already instrumented (${analysis.instrumentationType}). Proceeding.`);
|
|
1196
|
+
return true;
|
|
1197
|
+
}
|
|
1198
|
+
note(
|
|
1199
|
+
`Buzz will add the Arthur Python Observability SDK to your application.
|
|
1200
|
+
|
|
1201
|
+
What will change:
|
|
1202
|
+
\u2022 arthur-observability-sdk added to requirements.txt / pyproject.toml
|
|
1203
|
+
\u2022 Arthur initialization code added to your entry point
|
|
1204
|
+
\u2022 ARTHUR_API_KEY added to .env (with actual key) and .env.example (placeholder)
|
|
1205
|
+
|
|
1206
|
+
Arthur Engine URL: ${state.engineUrl}
|
|
1207
|
+
Task ID: ${state.taskId}`,
|
|
1208
|
+
"Instrumentation plan"
|
|
1209
|
+
);
|
|
1210
|
+
const approved = await confirm("May Buzz instrument your Python app with the Arthur SDK?");
|
|
1211
|
+
if (!approved) {
|
|
1212
|
+
logSuccess("Instrumentation skipped by user request.");
|
|
1213
|
+
return true;
|
|
1214
|
+
}
|
|
1215
|
+
p.log.info(buzzSay("Initiating Python instrumentation launch sequence..."));
|
|
1216
|
+
console.log();
|
|
1217
|
+
const result = await instrumentCodeWithClaude(
|
|
1218
|
+
{
|
|
1219
|
+
repoPath: state.repoPath,
|
|
1220
|
+
type: "python-arthur-sdk",
|
|
1221
|
+
arthurEngineUrl: state.engineUrl,
|
|
1222
|
+
taskId: state.taskId,
|
|
1223
|
+
apiKey: state.apiKey
|
|
1224
|
+
}
|
|
1225
|
+
);
|
|
1226
|
+
console.log();
|
|
1227
|
+
if (result.success) {
|
|
1228
|
+
logSuccess(`Instrumentation applied. ${result.summary}`);
|
|
1229
|
+
} else {
|
|
1230
|
+
logWarn(`Instrumentation may be incomplete. ${result.summary}`);
|
|
1231
|
+
note(
|
|
1232
|
+
"Review the changes Claude made and verify manually before proceeding.",
|
|
1233
|
+
"Manual verification needed"
|
|
1234
|
+
);
|
|
1235
|
+
}
|
|
1236
|
+
if (!result.testsPassed && result.success) {
|
|
1237
|
+
logWarn("Some tests are not passing. This may be pre-existing failures unrelated to Buzz.");
|
|
1238
|
+
note(result.summary, "Test results");
|
|
1239
|
+
}
|
|
1240
|
+
return true;
|
|
1241
|
+
}
|
|
1242
|
+
|
|
1243
|
+
// src/workflow/steps/05-mastra.ts
|
|
1244
|
+
async function step5_InstrumentMastra(state) {
|
|
1245
|
+
const analysis = state.analysis;
|
|
1246
|
+
if (analysis.framework !== "mastra") {
|
|
1247
|
+
p.log.info(buzzSay(`Framework detected as "${analysis.framework ?? "unknown"}" \u2014 skipping Mastra instrumentation.`));
|
|
1248
|
+
return false;
|
|
1249
|
+
}
|
|
1250
|
+
logSuccess(`Mastra TypeScript application detected.`);
|
|
1251
|
+
if (analysis.isInstrumented && analysis.instrumentationType === "mastra-arthur-exporter") {
|
|
1252
|
+
logSuccess("Mastra Arthur exporter is already configured. All systems go.");
|
|
1253
|
+
return true;
|
|
1254
|
+
}
|
|
1255
|
+
if (analysis.isInstrumented) {
|
|
1256
|
+
logSuccess(`Application is already instrumented (${analysis.instrumentationType}). Proceeding.`);
|
|
1257
|
+
return true;
|
|
1258
|
+
}
|
|
1259
|
+
note(
|
|
1260
|
+
`Buzz will add the Mastra Arthur exporter to your application.
|
|
1261
|
+
|
|
1262
|
+
What will change:
|
|
1263
|
+
\u2022 @mastra/arthur package installed (npm install @mastra/arthur)
|
|
1264
|
+
\u2022 ArthurExporter registered in your Mastra instance observability config
|
|
1265
|
+
\u2022 ARTHUR_BASE_URL, ARTHUR_API_KEY, ARTHUR_TASK_ID added to .env (with actual values) and .env.example (placeholders)
|
|
1266
|
+
|
|
1267
|
+
Arthur Engine URL: ${state.engineUrl}
|
|
1268
|
+
Task ID: ${state.taskId}`,
|
|
1269
|
+
"Instrumentation plan"
|
|
1270
|
+
);
|
|
1271
|
+
const approved = await confirm("May Buzz add the Mastra Arthur exporter to your application?");
|
|
1272
|
+
if (!approved) {
|
|
1273
|
+
logSuccess("Instrumentation skipped by user request.");
|
|
1274
|
+
return true;
|
|
1275
|
+
}
|
|
1276
|
+
p.log.info(buzzSay("Initiating Mastra Arthur exporter launch sequence..."));
|
|
1277
|
+
console.log();
|
|
1278
|
+
const result = await instrumentCodeWithClaude(
|
|
1279
|
+
{
|
|
1280
|
+
repoPath: state.repoPath,
|
|
1281
|
+
type: "mastra-arthur-exporter",
|
|
1282
|
+
arthurEngineUrl: state.engineUrl,
|
|
1283
|
+
taskId: state.taskId,
|
|
1284
|
+
apiKey: state.apiKey
|
|
1285
|
+
}
|
|
1286
|
+
);
|
|
1287
|
+
console.log();
|
|
1288
|
+
if (result.success) {
|
|
1289
|
+
logSuccess(`Mastra exporter configured. ${result.summary}`);
|
|
1290
|
+
} else {
|
|
1291
|
+
logWarn(`Instrumentation may be incomplete. ${result.summary}`);
|
|
1292
|
+
note(
|
|
1293
|
+
"Review the changes Claude made and verify manually before proceeding.",
|
|
1294
|
+
"Manual verification needed"
|
|
1295
|
+
);
|
|
1296
|
+
}
|
|
1297
|
+
if (!result.testsPassed && result.success) {
|
|
1298
|
+
logWarn("Some tests are not passing. This may be pre-existing failures.");
|
|
1299
|
+
note(result.summary, "Test results");
|
|
1300
|
+
}
|
|
1301
|
+
return true;
|
|
1302
|
+
}
|
|
1303
|
+
|
|
1304
|
+
// src/workflow/steps/06-other.ts
|
|
1305
|
+
async function step6_InstrumentOther(state) {
|
|
1306
|
+
const analysis = state.analysis;
|
|
1307
|
+
p.log.info(
|
|
1308
|
+
buzzSay(
|
|
1309
|
+
`Detected: ${analysis.language} application${analysis.framework ? ` (${analysis.framework})` : ""}. Applying OpenInference instrumentation.`
|
|
1310
|
+
)
|
|
1311
|
+
);
|
|
1312
|
+
if (analysis.isInstrumented && analysis.instrumentationType === "openinference") {
|
|
1313
|
+
logSuccess("OpenInference instrumentation is already configured. All systems go.");
|
|
1314
|
+
return true;
|
|
1315
|
+
}
|
|
1316
|
+
if (analysis.isInstrumented) {
|
|
1317
|
+
logSuccess(`Application is already instrumented (${analysis.instrumentationType}). Proceeding.`);
|
|
1318
|
+
return true;
|
|
1319
|
+
}
|
|
1320
|
+
note(
|
|
1321
|
+
`Buzz will instrument your application with OpenInference and OpenTelemetry for Arthur.
|
|
1322
|
+
|
|
1323
|
+
What will change:
|
|
1324
|
+
\u2022 OpenTelemetry OTLP exporter configured to send traces to Arthur
|
|
1325
|
+
\u2022 OpenInference instrumentor added for your detected LLM framework
|
|
1326
|
+
\u2022 ARTHUR_BASE_URL, ARTHUR_API_KEY, ARTHUR_TASK_ID added to .env (with actual values) and .env.example (placeholders)
|
|
1327
|
+
|
|
1328
|
+
Arthur Engine URL: ${state.engineUrl}
|
|
1329
|
+
Task ID: ${state.taskId}
|
|
1330
|
+
|
|
1331
|
+
Reference: github.com/arthur-ai/arthur-engine/tree/dev/genai-engine/examples/agents`,
|
|
1332
|
+
"Instrumentation plan (OpenInference)"
|
|
1333
|
+
);
|
|
1334
|
+
const approved = await confirm("May Buzz instrument your application with OpenInference for Arthur?");
|
|
1335
|
+
if (!approved) {
|
|
1336
|
+
logSuccess("Instrumentation skipped by user request.");
|
|
1337
|
+
return true;
|
|
1338
|
+
}
|
|
1339
|
+
p.log.info(buzzSay("Initiating OpenInference instrumentation launch sequence..."));
|
|
1340
|
+
console.log();
|
|
1341
|
+
const result = await instrumentCodeWithClaude(
|
|
1342
|
+
{
|
|
1343
|
+
repoPath: state.repoPath,
|
|
1344
|
+
type: "openinference",
|
|
1345
|
+
arthurEngineUrl: state.engineUrl,
|
|
1346
|
+
taskId: state.taskId,
|
|
1347
|
+
apiKey: state.apiKey
|
|
1348
|
+
}
|
|
1349
|
+
);
|
|
1350
|
+
console.log();
|
|
1351
|
+
if (result.success) {
|
|
1352
|
+
logSuccess(`OpenInference instrumentation applied. ${result.summary}`);
|
|
1353
|
+
} else {
|
|
1354
|
+
logWarn(`Instrumentation may be incomplete. ${result.summary}`);
|
|
1355
|
+
note(
|
|
1356
|
+
"Review the changes Claude made and verify manually.\nYou can also consult the examples at:\ngithub.com/arthur-ai/arthur-engine/tree/dev/genai-engine/examples/agents",
|
|
1357
|
+
"Manual verification needed"
|
|
1358
|
+
);
|
|
1359
|
+
}
|
|
1360
|
+
if (!result.testsPassed && result.success) {
|
|
1361
|
+
logWarn("Some tests are not passing. This may be pre-existing failures.");
|
|
1362
|
+
note(result.summary, "Test results");
|
|
1363
|
+
}
|
|
1364
|
+
return true;
|
|
1365
|
+
}
|
|
1366
|
+
|
|
1367
|
+
// src/workflow/steps/07-prompts.ts
|
|
1368
|
+
import { query as query2 } from "@anthropic-ai/claude-agent-sdk";
|
|
1369
|
+
import ora5 from "ora";
|
|
1370
|
+
var PROVIDER_LABELS = {
|
|
1371
|
+
openai: "OpenAI",
|
|
1372
|
+
anthropic: "Anthropic",
|
|
1373
|
+
gemini: "Google Gemini",
|
|
1374
|
+
bedrock: "AWS Bedrock",
|
|
1375
|
+
vertex_ai: "Google Vertex AI"
|
|
1376
|
+
};
|
|
1377
|
+
var MODEL_DEFAULTS = {
|
|
1378
|
+
openai: "gpt-4o",
|
|
1379
|
+
anthropic: "claude-3-5-haiku-20241022",
|
|
1380
|
+
gemini: "gemini-1.5-flash",
|
|
1381
|
+
bedrock: "anthropic.claude-3-haiku-20240307-v1:0",
|
|
1382
|
+
vertex_ai: "gemini-1.5-flash"
|
|
1383
|
+
};
|
|
1384
|
+
function isSupportedProvider(provider) {
|
|
1385
|
+
return provider in MODEL_DEFAULTS;
|
|
1386
|
+
}
|
|
1387
|
+
var EXTRACTION_INSTRUCTIONS = `You are Buzz's prompt extraction module. Your job is to analyze an agentic
|
|
1388
|
+
application's repository and extract all prompt definitions \u2014 system prompts, user prompt templates,
|
|
1389
|
+
and agent instructions.
|
|
1390
|
+
|
|
1391
|
+
Use your tools to thoroughly examine the codebase:
|
|
1392
|
+
1. Glob to see all files
|
|
1393
|
+
2. Read manifests and key source files
|
|
1394
|
+
3. Grep for prompt patterns: system_prompt, SYSTEM_PROMPT, systemPrompt, messages, ChatCompletion, etc.
|
|
1395
|
+
|
|
1396
|
+
Look for:
|
|
1397
|
+
- System prompt strings assigned to variables (any language)
|
|
1398
|
+
- User prompt templates (strings with {variables} or {{variables}})
|
|
1399
|
+
- Multi-turn message arrays in OpenAI format ([{"role": "system", ...}, {"role": "user", ...}])
|
|
1400
|
+
- Prompt files (.txt, .md, .jinja2, .j2) that contain prompt templates
|
|
1401
|
+
- Agent instruction strings passed to agent/chain initialization
|
|
1402
|
+
|
|
1403
|
+
Also detect the LLM model and provider the application uses (e.g. from openai.ChatCompletion calls,
|
|
1404
|
+
from anthropic.Anthropic(), from model= parameters, from environment variable names like OPENAI_API_KEY).
|
|
1405
|
+
Report the provider exactly as detected (e.g. "azure" if the app uses Azure OpenAI).
|
|
1406
|
+
|
|
1407
|
+
Return ONLY a raw JSON object (no markdown, no explanation):
|
|
1408
|
+
{
|
|
1409
|
+
"prompts": [
|
|
1410
|
+
{
|
|
1411
|
+
"name": "kebab-case-prompt-name",
|
|
1412
|
+
"messages": [
|
|
1413
|
+
{"role": "system", "content": "..."},
|
|
1414
|
+
{"role": "user", "content": "..."}
|
|
1415
|
+
],
|
|
1416
|
+
"model_name": "gpt-4o" | null,
|
|
1417
|
+
"model_provider": "openai" | "anthropic" | "gemini" | "bedrock" | "vertex_ai" | "azure" | null
|
|
1418
|
+
}
|
|
1419
|
+
],
|
|
1420
|
+
"detected_model_name": "gpt-4o" | null,
|
|
1421
|
+
"detected_model_provider": "openai" | "anthropic" | "gemini" | "bedrock" | "vertex_ai" | "azure" | null
|
|
1422
|
+
}
|
|
1423
|
+
|
|
1424
|
+
Rules:
|
|
1425
|
+
- Only include prompts with actual substantive content (not empty strings or placeholders)
|
|
1426
|
+
- For user prompt templates with variables, convert ALL template variables to {{double_brace}} format regardless of how they appear in source (e.g. {text} \u2192 {{text}}, %(text)s \u2192 {{text}})
|
|
1427
|
+
- The "name" must be unique, lowercase, kebab-case, and descriptive (e.g. "customer-support-agent", "summarization-prompt")
|
|
1428
|
+
- If the same prompt appears multiple times (e.g. in tests), include it only once
|
|
1429
|
+
- model_name and model_provider at prompt level: use values detected for that specific prompt call if known, else null
|
|
1430
|
+
- detected_model_name/detected_model_provider: the primary model the app uses overall
|
|
1431
|
+
- If no prompts are found, return {"prompts": [], "detected_model_name": null, "detected_model_provider": null}`;
|
|
1432
|
+
function normalizeMessageContent(content) {
|
|
1433
|
+
let result = content.replace(/\n{3,}/g, "\n\n");
|
|
1434
|
+
result = result.replace(/(?<!\{)\{([^{}]+)\}(?!\})/g, "{{$1}}");
|
|
1435
|
+
return result.trim();
|
|
1436
|
+
}
|
|
1437
|
+
function extractJSON(text2) {
|
|
1438
|
+
const blockMatch = text2.match(/```(?:json)?\s*([\s\S]+?)```/);
|
|
1439
|
+
if (blockMatch) return blockMatch[1].trim();
|
|
1440
|
+
const jsonMatch = text2.match(/\{[\s\S]+\}/);
|
|
1441
|
+
if (jsonMatch) return jsonMatch[0];
|
|
1442
|
+
return text2.trim();
|
|
1443
|
+
}
|
|
1444
|
+
async function extractPromptsWithClaude(repoPath) {
|
|
1445
|
+
try {
|
|
1446
|
+
const stream = query2({
|
|
1447
|
+
prompt: `Extract all prompts from the agentic application at: ${repoPath}
|
|
1448
|
+
|
|
1449
|
+
Use your tools to examine the files thoroughly, then return the JSON result.`,
|
|
1450
|
+
options: {
|
|
1451
|
+
cwd: repoPath,
|
|
1452
|
+
allowedTools: ["Read", "Glob", "Grep"],
|
|
1453
|
+
systemPrompt: EXTRACTION_INSTRUCTIONS,
|
|
1454
|
+
maxTurns: 5
|
|
1455
|
+
}
|
|
1456
|
+
});
|
|
1457
|
+
let fullOutput = "";
|
|
1458
|
+
for await (const message of stream) {
|
|
1459
|
+
if (message.type === "assistant") {
|
|
1460
|
+
const content = message.message?.content ?? [];
|
|
1461
|
+
for (const block of content) {
|
|
1462
|
+
if (block.type === "text" && block.text) {
|
|
1463
|
+
fullOutput += block.text;
|
|
1464
|
+
}
|
|
1465
|
+
}
|
|
1466
|
+
}
|
|
1467
|
+
}
|
|
1468
|
+
const result = JSON.parse(extractJSON(fullOutput));
|
|
1469
|
+
result.prompts = result.prompts.map((prompt) => ({
|
|
1470
|
+
...prompt,
|
|
1471
|
+
messages: prompt.messages.map((msg) => ({
|
|
1472
|
+
...msg,
|
|
1473
|
+
content: normalizeMessageContent(msg.content)
|
|
1474
|
+
}))
|
|
1475
|
+
}));
|
|
1476
|
+
return result;
|
|
1477
|
+
} catch {
|
|
1478
|
+
return { prompts: [], detected_model_name: null, detected_model_provider: null };
|
|
1479
|
+
}
|
|
1480
|
+
}
|
|
1481
|
+
async function ensureProviderConfiguredInEngine(client, provider, model) {
|
|
1482
|
+
const spinner = ora5({ text: buzzSay(`Checking if ${PROVIDER_LABELS[provider] ?? provider} is configured in Arthur Engine...`), color: "cyan" }).start();
|
|
1483
|
+
const providers = await client.getModelProviders();
|
|
1484
|
+
spinner.stop();
|
|
1485
|
+
const isConfigured = providers.some((p3) => p3.provider === provider && p3.enabled);
|
|
1486
|
+
if (isConfigured) {
|
|
1487
|
+
logSuccess(`${PROVIDER_LABELS[provider] ?? provider} is already configured in Arthur Engine.`);
|
|
1488
|
+
note(
|
|
1489
|
+
`Prompts will be registered with model: ${model ?? "default"} (${PROVIDER_LABELS[provider] ?? provider})`,
|
|
1490
|
+
"Prompt model provider"
|
|
1491
|
+
);
|
|
1492
|
+
return;
|
|
1493
|
+
}
|
|
1494
|
+
logWarn(`${PROVIDER_LABELS[provider] ?? provider} is not yet configured in Arthur Engine.`);
|
|
1495
|
+
note(
|
|
1496
|
+
"Arthur Engine needs the API key for this provider to be able to run the prompts.\nYou can skip this and configure it later in the Arthur Engine UI under Settings \u2192 Model Providers.",
|
|
1497
|
+
"Model provider required"
|
|
1498
|
+
);
|
|
1499
|
+
const wantsConfigure = await confirm(
|
|
1500
|
+
`Configure ${PROVIDER_LABELS[provider] ?? provider} in Arthur Engine now?`
|
|
1501
|
+
);
|
|
1502
|
+
if (!wantsConfigure) {
|
|
1503
|
+
logWarn("Skipping provider configuration. Prompts will be registered but cannot be run until a provider is configured.");
|
|
1504
|
+
return;
|
|
1505
|
+
}
|
|
1506
|
+
const apiKey = await password(`Enter your ${PROVIDER_LABELS[provider] ?? provider} API key:`);
|
|
1507
|
+
const configSpinner = ora5({ text: buzzSay(`Configuring ${PROVIDER_LABELS[provider] ?? provider}...`), color: "cyan" }).start();
|
|
1508
|
+
const result = await client.configureModelProvider(provider, { api_key: apiKey });
|
|
1509
|
+
configSpinner.stop();
|
|
1510
|
+
if (!result.success) {
|
|
1511
|
+
logError(`Failed to configure ${PROVIDER_LABELS[provider] ?? provider}: ${result.error}`);
|
|
1512
|
+
logWarn("Prompts will still be registered. Configure the provider manually in Arthur Engine UI.");
|
|
1513
|
+
} else {
|
|
1514
|
+
logSuccess(`${PROVIDER_LABELS[provider] ?? provider} configured in Arthur Engine.`);
|
|
1515
|
+
note(
|
|
1516
|
+
`Prompts will be registered with model: ${model ?? "default"} (${PROVIDER_LABELS[provider] ?? provider})`,
|
|
1517
|
+
"Prompt model provider ready"
|
|
1518
|
+
);
|
|
1519
|
+
}
|
|
1520
|
+
}
|
|
1521
|
+
async function step7_ExtractAndRegisterPrompts(state) {
|
|
1522
|
+
const spinner = ora5({ text: buzzSay("Scanning codebase for prompts..."), color: "cyan" }).start();
|
|
1523
|
+
const extraction = await extractPromptsWithClaude(state.repoPath);
|
|
1524
|
+
spinner.stop();
|
|
1525
|
+
if (extraction.detected_model_provider && isSupportedProvider(extraction.detected_model_provider)) {
|
|
1526
|
+
state.promptModelProvider = extraction.detected_model_provider;
|
|
1527
|
+
state.promptModelName = extraction.detected_model_name ?? MODEL_DEFAULTS[extraction.detected_model_provider] ?? null;
|
|
1528
|
+
}
|
|
1529
|
+
if (extraction.prompts.length === 0) {
|
|
1530
|
+
p.log.info(buzzSay("No prompts detected in the repository. Skipping prompt registration."));
|
|
1531
|
+
return;
|
|
1532
|
+
}
|
|
1533
|
+
logInfo(`Found ${extraction.prompts.length} prompt(s) in your codebase:`);
|
|
1534
|
+
for (const [i, prompt] of extraction.prompts.entries()) {
|
|
1535
|
+
const rolesSummary = prompt.messages.map((m) => m.role).join(" \u2192 ");
|
|
1536
|
+
p.log.message(` ${i + 1}. ${prompt.name} (${rolesSummary})`);
|
|
1537
|
+
}
|
|
1538
|
+
const shouldRegister = await confirm(
|
|
1539
|
+
`Should Buzz register these ${extraction.prompts.length} prompt(s) in Arthur Engine for version tracking?`
|
|
1540
|
+
);
|
|
1541
|
+
if (!shouldRegister) {
|
|
1542
|
+
logInfo("Prompt registration skipped. You can add prompts manually in the Arthur Engine UI.");
|
|
1543
|
+
return;
|
|
1544
|
+
}
|
|
1545
|
+
const detectedRaw = extraction.detected_model_provider;
|
|
1546
|
+
let fallbackProvider = null;
|
|
1547
|
+
let fallbackModel = extraction.detected_model_name;
|
|
1548
|
+
if (detectedRaw && isSupportedProvider(detectedRaw)) {
|
|
1549
|
+
fallbackProvider = detectedRaw;
|
|
1550
|
+
} else if (detectedRaw) {
|
|
1551
|
+
logWarn(`Your app uses ${detectedRaw}, which isn't currently supported by Arthur Engine.`);
|
|
1552
|
+
note(
|
|
1553
|
+
"Arthur Engine supports: OpenAI, Anthropic, Google Gemini, AWS Bedrock, Google Vertex AI.\nTo register prompts you'll need to configure one of these providers.",
|
|
1554
|
+
"Unsupported provider detected"
|
|
1555
|
+
);
|
|
1556
|
+
}
|
|
1557
|
+
if (!fallbackProvider) {
|
|
1558
|
+
const choice = await select(
|
|
1559
|
+
"Which supported provider should Arthur Engine use to run your prompts?",
|
|
1560
|
+
[
|
|
1561
|
+
{ value: "openai", label: "OpenAI", hint: "gpt-4o, gpt-4o-mini, ..." },
|
|
1562
|
+
{ value: "anthropic", label: "Anthropic", hint: "claude-3-5-haiku, claude-3-5-sonnet, ..." },
|
|
1563
|
+
{ value: "gemini", label: "Google Gemini", hint: "gemini-1.5-flash, gemini-1.5-pro, ..." },
|
|
1564
|
+
{ value: "bedrock", label: "AWS Bedrock", hint: "anthropic.claude-3-haiku, ..." },
|
|
1565
|
+
{ value: "vertex_ai", label: "Google Vertex AI", hint: "gemini-1.5-flash, ..." },
|
|
1566
|
+
{ value: "skip", label: "Skip \u2014 I'll set it later", hint: "Prompts will not be registered until a provider is configured" }
|
|
1567
|
+
]
|
|
1568
|
+
);
|
|
1569
|
+
if (choice !== "skip") {
|
|
1570
|
+
fallbackProvider = choice;
|
|
1571
|
+
fallbackModel = MODEL_DEFAULTS[choice] ?? null;
|
|
1572
|
+
state.promptModelProvider = fallbackProvider;
|
|
1573
|
+
state.promptModelName = fallbackModel;
|
|
1574
|
+
}
|
|
1575
|
+
}
|
|
1576
|
+
if (!fallbackProvider) {
|
|
1577
|
+
note(
|
|
1578
|
+
"Prompt registration requires a model provider.\nConfigure one in the Arthur Engine UI under Settings \u2192 Model Providers, then re-run Buzz to register your prompts.",
|
|
1579
|
+
"Prompt registration skipped"
|
|
1580
|
+
);
|
|
1581
|
+
return;
|
|
1582
|
+
}
|
|
1583
|
+
const client = new ArthurEngineClient(state.engineUrl, state.apiKey);
|
|
1584
|
+
await ensureProviderConfiguredInEngine(client, fallbackProvider, fallbackModel);
|
|
1585
|
+
let created = 0;
|
|
1586
|
+
for (const prompt of extraction.prompts) {
|
|
1587
|
+
const rawProvider = prompt.model_provider ?? fallbackProvider ?? "openai";
|
|
1588
|
+
const provider = isSupportedProvider(rawProvider) ? rawProvider : fallbackProvider ?? "openai";
|
|
1589
|
+
const model = prompt.model_name ?? fallbackModel ?? MODEL_DEFAULTS[provider] ?? "gpt-4o";
|
|
1590
|
+
const promptSpinner = ora5({
|
|
1591
|
+
text: buzzSay(`Registering prompt: ${prompt.name}...`),
|
|
1592
|
+
color: "cyan"
|
|
1593
|
+
}).start();
|
|
1594
|
+
const result = await client.createPrompt(state.taskId, prompt.name, {
|
|
1595
|
+
messages: prompt.messages,
|
|
1596
|
+
model_name: model,
|
|
1597
|
+
model_provider: provider
|
|
1598
|
+
});
|
|
1599
|
+
promptSpinner.stop();
|
|
1600
|
+
if (result.error) {
|
|
1601
|
+
logError(`Failed to register "${prompt.name}": ${result.error}`);
|
|
1602
|
+
} else {
|
|
1603
|
+
logSuccess(`Registered prompt: ${prompt.name} (v${result.prompt?.version ?? 1})`);
|
|
1604
|
+
created++;
|
|
1605
|
+
}
|
|
1606
|
+
}
|
|
1607
|
+
if (created > 0) {
|
|
1608
|
+
logSuccess(`${created} prompt(s) registered in Arthur Engine.`);
|
|
1609
|
+
note(
|
|
1610
|
+
`View and manage your prompts in Arthur Engine:
|
|
1611
|
+
${state.engineUrl}`,
|
|
1612
|
+
"Prompts registered"
|
|
1613
|
+
);
|
|
1614
|
+
} else {
|
|
1615
|
+
logWarn("No prompts were registered. You can add them manually in the Arthur Engine UI.");
|
|
1616
|
+
}
|
|
1617
|
+
}
|
|
1618
|
+
|
|
1619
|
+
// src/workflow/steps/08-verify.ts
|
|
1620
|
+
import ora6 from "ora";
|
|
1621
|
+
var POLL_INTERVAL_MS = 3e3;
|
|
1622
|
+
var POLL_MAX_MS = process.env.BUZZ_CI === "true" ? 12e4 : 6e4;
|
|
1623
|
+
async function pollForTraces(client, taskId) {
|
|
1624
|
+
const spinner = ora6({ text: buzzSay("Checking for traces in Arthur..."), color: "cyan" }).start();
|
|
1625
|
+
const start = Date.now();
|
|
1626
|
+
let lastError;
|
|
1627
|
+
while (Date.now() - start < POLL_MAX_MS) {
|
|
1628
|
+
const result = await client.getTraces(taskId);
|
|
1629
|
+
if (result.traces.length > 0) {
|
|
1630
|
+
spinner.stop();
|
|
1631
|
+
logSuccess(`We have signal! ${result.traces.length} trace(s) detected.`);
|
|
1632
|
+
return { found: true };
|
|
1633
|
+
}
|
|
1634
|
+
if (result.error) {
|
|
1635
|
+
lastError = result.error;
|
|
1636
|
+
}
|
|
1637
|
+
await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS));
|
|
1638
|
+
spinner.text = buzzSay(`Scanning for traces... (${Math.round((Date.now() - start) / 1e3)}s)`);
|
|
1639
|
+
}
|
|
1640
|
+
spinner.stop();
|
|
1641
|
+
logWarn("No traces detected after 60 seconds.");
|
|
1642
|
+
return { found: false, lastError };
|
|
1643
|
+
}
|
|
1644
|
+
async function step8_VerifyInstrumentation(state) {
|
|
1645
|
+
const client = new ArthurEngineClient(state.engineUrl, state.apiKey);
|
|
1646
|
+
const isLocal = state.engineUrl?.includes("localhost") || state.engineUrl?.includes("127.0.0.1");
|
|
1647
|
+
note(
|
|
1648
|
+
`To verify instrumentation, run your agentic application now so it sends traces to Arthur.
|
|
1649
|
+
|
|
1650
|
+
Make sure to set these environment variables before running:
|
|
1651
|
+
ARTHUR_API_KEY=<your-api-key>
|
|
1652
|
+
ARTHUR_BASE_URL=${state.engineUrl}
|
|
1653
|
+
ARTHUR_TASK_ID=${state.taskId}`,
|
|
1654
|
+
"Run your agent"
|
|
1655
|
+
);
|
|
1656
|
+
const ready = await confirm("Press Enter / confirm when your agent has run at least once");
|
|
1657
|
+
if (!ready) {
|
|
1658
|
+
return;
|
|
1659
|
+
}
|
|
1660
|
+
const { found: traced, lastError } = await pollForTraces(client, state.taskId);
|
|
1661
|
+
if (traced) {
|
|
1662
|
+
const apiKeyHint = isLocal ? `
|
|
1663
|
+
|
|
1664
|
+
API key hint (local install):
|
|
1665
|
+
cat ${GENAI_ENGINE_ENV_PATH} | grep GENAI_ENGINE_ADMIN_KEY` : "";
|
|
1666
|
+
note(
|
|
1667
|
+
`Login to Arthur GenAI Engine at:
|
|
1668
|
+
${state.engineUrl}
|
|
1669
|
+
|
|
1670
|
+
Your Task ID: ${state.taskId}${apiKeyHint}`,
|
|
1671
|
+
"Arthur GenAI Engine is ready"
|
|
1672
|
+
);
|
|
1673
|
+
logSuccess("Instrumentation confirmed. Your traces are flowing to Arthur.");
|
|
1674
|
+
} else {
|
|
1675
|
+
logWarn("No traces detected. Instrumentation may not be sending data yet.");
|
|
1676
|
+
const errorHint = lastError ? `
|
|
1677
|
+
|
|
1678
|
+
Last error from engine:
|
|
1679
|
+
${lastError}` : "";
|
|
1680
|
+
note(
|
|
1681
|
+
"Troubleshooting checklist:\n 1. Did your application run and make at least one LLM call?\n 2. Is ARTHUR_API_KEY set correctly in your environment?\n 3. Is ARTHUR_BASE_URL pointing to: " + state.engineUrl + "?\n 4. Is ARTHUR_TASK_ID set to: " + state.taskId + "?\n 5. Check your application logs for any OpenTelemetry export errors." + errorHint,
|
|
1682
|
+
"No traces detected"
|
|
1683
|
+
);
|
|
1684
|
+
const retry = await confirm("Would you like to check for traces again?");
|
|
1685
|
+
if (retry) {
|
|
1686
|
+
const { found: retried, lastError: retryError } = await pollForTraces(client, state.taskId);
|
|
1687
|
+
if (retried) {
|
|
1688
|
+
logSuccess("Instrumentation confirmed. Your traces are flowing to Arthur.");
|
|
1689
|
+
note(
|
|
1690
|
+
`Login at: ${state.engineUrl}
|
|
1691
|
+
Task ID: ${state.taskId}`,
|
|
1692
|
+
"Arthur GenAI Engine is ready"
|
|
1693
|
+
);
|
|
1694
|
+
} else {
|
|
1695
|
+
logWarn("Still no traces. Please review the troubleshooting checklist above.");
|
|
1696
|
+
const retryErrorHint = retryError ? `
|
|
1697
|
+
|
|
1698
|
+
Last error from engine:
|
|
1699
|
+
${retryError}` : "";
|
|
1700
|
+
note(
|
|
1701
|
+
`Arthur Engine URL: ${state.engineUrl}
|
|
1702
|
+
Task ID: ${state.taskId}` + retryErrorHint,
|
|
1703
|
+
"Manual verification needed"
|
|
1704
|
+
);
|
|
1705
|
+
}
|
|
1706
|
+
}
|
|
1707
|
+
}
|
|
1708
|
+
}
|
|
1709
|
+
|
|
1710
|
+
// src/workflow/steps/09-model-provider.ts
|
|
1711
|
+
import ora7 from "ora";
|
|
1712
|
+
var PROVIDER_PRIORITY = ["openai", "anthropic", "gemini", "bedrock", "vertex_ai"];
|
|
1713
|
+
var PROVIDER_LABELS2 = {
|
|
1714
|
+
openai: "OpenAI",
|
|
1715
|
+
anthropic: "Anthropic",
|
|
1716
|
+
gemini: "Google Gemini (AI Studio)",
|
|
1717
|
+
bedrock: "AWS Bedrock",
|
|
1718
|
+
vertex_ai: "Google Vertex AI"
|
|
1719
|
+
};
|
|
1720
|
+
var MODEL_DEFAULTS2 = {
|
|
1721
|
+
openai: "gpt-4o",
|
|
1722
|
+
anthropic: "claude-3-5-haiku-20241022",
|
|
1723
|
+
gemini: "gemini-1.5-flash",
|
|
1724
|
+
bedrock: "anthropic.claude-3-haiku-20240307-v1:0",
|
|
1725
|
+
vertex_ai: "gemini-1.5-flash"
|
|
1726
|
+
};
|
|
1727
|
+
function pickHighestPriorityProvider(providers) {
|
|
1728
|
+
const enabled = providers.filter((p3) => p3.enabled);
|
|
1729
|
+
for (const name of PROVIDER_PRIORITY) {
|
|
1730
|
+
const match = enabled.find((e) => e.provider === name);
|
|
1731
|
+
if (match && MODEL_DEFAULTS2[name]) {
|
|
1732
|
+
return { provider: name, model: MODEL_DEFAULTS2[name] };
|
|
1733
|
+
}
|
|
1734
|
+
}
|
|
1735
|
+
return null;
|
|
1736
|
+
}
|
|
1737
|
+
async function configureNewProvider(client) {
|
|
1738
|
+
const provider = await select(
|
|
1739
|
+
"Which LLM provider should Arthur use to run evaluations?",
|
|
1740
|
+
[
|
|
1741
|
+
{ value: "openai", label: "OpenAI", hint: "gpt-4o, gpt-4o-mini, ..." },
|
|
1742
|
+
{ value: "anthropic", label: "Anthropic", hint: "claude-3-5-haiku, claude-3-5-sonnet, ..." },
|
|
1743
|
+
{ value: "gemini", label: "Google Gemini", hint: "gemini-1.5-flash, gemini-1.5-pro, ..." },
|
|
1744
|
+
{ value: "bedrock", label: "AWS Bedrock", hint: "anthropic.claude-3-haiku, ..." },
|
|
1745
|
+
{ value: "vertex_ai", label: "Google Vertex AI", hint: "gemini-1.5-flash, ..." },
|
|
1746
|
+
{ value: "skip", label: "Skip for now", hint: "Configure manually in Arthur Engine UI \u2014 evals will be skipped" }
|
|
1747
|
+
]
|
|
1748
|
+
);
|
|
1749
|
+
if (provider === "skip") {
|
|
1750
|
+
note(
|
|
1751
|
+
"You can configure a model provider in the Arthur Engine UI under Settings \u2192 Model Providers.",
|
|
1752
|
+
"Eval model provider skipped"
|
|
1753
|
+
);
|
|
1754
|
+
return null;
|
|
1755
|
+
}
|
|
1756
|
+
const apiKey = await password(`Enter your ${PROVIDER_LABELS2[provider]} API key:`);
|
|
1757
|
+
const spinner = ora7({ text: buzzSay(`Configuring ${PROVIDER_LABELS2[provider]}...`), color: "cyan" }).start();
|
|
1758
|
+
const result = await client.configureModelProvider(provider, { api_key: apiKey });
|
|
1759
|
+
spinner.stop();
|
|
1760
|
+
if (!result.success) {
|
|
1761
|
+
logError(`Failed to configure ${PROVIDER_LABELS2[provider]}: ${result.error}`);
|
|
1762
|
+
note(
|
|
1763
|
+
"You can configure a model provider manually in the Arthur Engine UI under Settings \u2192 Model Providers.",
|
|
1764
|
+
"Configuration failed"
|
|
1765
|
+
);
|
|
1766
|
+
return null;
|
|
1767
|
+
}
|
|
1768
|
+
logSuccess(`${PROVIDER_LABELS2[provider]} configured successfully.`);
|
|
1769
|
+
return { provider, model: MODEL_DEFAULTS2[provider] };
|
|
1770
|
+
}
|
|
1771
|
+
async function step9_SelectEvalModelProvider(state) {
|
|
1772
|
+
const client = new ArthurEngineClient(state.engineUrl, state.apiKey);
|
|
1773
|
+
const spinner = ora7({ text: buzzSay("Checking available model providers for evals..."), color: "cyan" }).start();
|
|
1774
|
+
const providers = await client.getModelProviders();
|
|
1775
|
+
spinner.stop();
|
|
1776
|
+
const auto = pickHighestPriorityProvider(providers);
|
|
1777
|
+
if (auto) {
|
|
1778
|
+
const enabledSupported = providers.filter((p3) => p3.enabled && MODEL_DEFAULTS2[p3.provider]);
|
|
1779
|
+
logInfo(`Model providers configured on this Arthur Engine:`);
|
|
1780
|
+
for (const mp of enabledSupported) {
|
|
1781
|
+
logInfo(` \u2022 ${PROVIDER_LABELS2[mp.provider] ?? mp.provider}`);
|
|
1782
|
+
}
|
|
1783
|
+
if (enabledSupported.length === 1) {
|
|
1784
|
+
const useAuto = await confirm(
|
|
1785
|
+
`Use ${PROVIDER_LABELS2[auto.provider] ?? auto.provider} (${auto.model}) to run evaluations?`
|
|
1786
|
+
);
|
|
1787
|
+
if (useAuto) {
|
|
1788
|
+
state.evalModelProvider = auto.provider;
|
|
1789
|
+
state.evalModelName = auto.model;
|
|
1790
|
+
logSuccess(`Eval model set: ${auto.model} via ${PROVIDER_LABELS2[auto.provider] ?? auto.provider}`);
|
|
1791
|
+
return;
|
|
1792
|
+
}
|
|
1793
|
+
const result2 = await configureNewProvider(client);
|
|
1794
|
+
if (result2) {
|
|
1795
|
+
state.evalModelProvider = result2.provider;
|
|
1796
|
+
state.evalModelName = result2.model;
|
|
1797
|
+
logSuccess(`Eval model set: ${result2.model} via ${PROVIDER_LABELS2[result2.provider] ?? result2.provider}`);
|
|
1798
|
+
} else {
|
|
1799
|
+
logWarn("No eval model provider selected. Eval recommendations will be skipped.");
|
|
1800
|
+
}
|
|
1801
|
+
return;
|
|
1802
|
+
}
|
|
1803
|
+
const options = enabledSupported.map((p3) => ({
|
|
1804
|
+
value: p3.provider,
|
|
1805
|
+
label: PROVIDER_LABELS2[p3.provider] ?? p3.provider,
|
|
1806
|
+
hint: MODEL_DEFAULTS2[p3.provider]
|
|
1807
|
+
}));
|
|
1808
|
+
options.push({ value: "new", label: "Configure a different provider", hint: "Add API key for a new provider" });
|
|
1809
|
+
const chosen = await select("Which provider should run evaluations?", options);
|
|
1810
|
+
if (chosen === "new") {
|
|
1811
|
+
const result2 = await configureNewProvider(client);
|
|
1812
|
+
if (result2) {
|
|
1813
|
+
state.evalModelProvider = result2.provider;
|
|
1814
|
+
state.evalModelName = result2.model;
|
|
1815
|
+
logSuccess(`Eval model set: ${result2.model} via ${PROVIDER_LABELS2[result2.provider] ?? result2.provider}`);
|
|
1816
|
+
} else {
|
|
1817
|
+
logWarn("No eval model provider selected. Eval recommendations will be skipped.");
|
|
1818
|
+
}
|
|
1819
|
+
} else {
|
|
1820
|
+
state.evalModelProvider = chosen;
|
|
1821
|
+
state.evalModelName = MODEL_DEFAULTS2[chosen] ?? null;
|
|
1822
|
+
logSuccess(`Eval model set: ${state.evalModelName} via ${PROVIDER_LABELS2[chosen] ?? chosen}`);
|
|
1823
|
+
}
|
|
1824
|
+
return;
|
|
1825
|
+
}
|
|
1826
|
+
logWarn("No model providers are configured on this Arthur Engine yet.");
|
|
1827
|
+
const wantsSetup = await confirm(
|
|
1828
|
+
"Would you like to configure one now so Buzz can recommend evals?"
|
|
1829
|
+
);
|
|
1830
|
+
if (!wantsSetup) {
|
|
1831
|
+
note(
|
|
1832
|
+
"Eval recommendations require a model provider.\nConfigure one in the Arthur Engine UI under Settings \u2192 Model Providers, then re-run Buzz.",
|
|
1833
|
+
"No eval model provider"
|
|
1834
|
+
);
|
|
1835
|
+
return;
|
|
1836
|
+
}
|
|
1837
|
+
const result = await configureNewProvider(client);
|
|
1838
|
+
if (result) {
|
|
1839
|
+
state.evalModelProvider = result.provider;
|
|
1840
|
+
state.evalModelName = result.model;
|
|
1841
|
+
logSuccess(`Eval model set: ${result.model} via ${PROVIDER_LABELS2[result.provider] ?? result.provider}`);
|
|
1842
|
+
} else {
|
|
1843
|
+
logWarn("No eval model provider configured. Eval recommendations will be skipped.");
|
|
1844
|
+
}
|
|
1845
|
+
}
|
|
1846
|
+
|
|
1847
|
+
// src/workflow/steps/10-evals.ts
|
|
1848
|
+
import ora8 from "ora";
|
|
1849
|
+
|
|
1850
|
+
// src/mastra/eval-recommender.ts
|
|
1851
|
+
import { query as query3 } from "@anthropic-ai/claude-agent-sdk";
|
|
1852
|
+
var INSTRUCTIONS = `You are an Arthur GenAI Engine eval analyst. Given a sample trace from a user's
|
|
1853
|
+
agentic application (input prompts + model outputs) and their framework info, recommend 2-4
|
|
1854
|
+
continuous eval configurations that provide the most meaningful quality monitoring.
|
|
1855
|
+
|
|
1856
|
+
Each eval uses a Jinja2 template where {{ input }} is the user's prompt and {{ output }} is the
|
|
1857
|
+
model's response. Instructions must end with a clear scoring directive asking the evaluator LLM
|
|
1858
|
+
to return a JSON object with "score" (0.0-1.0, where 1.0 = fully passes) and "reason" (brief explanation).
|
|
1859
|
+
|
|
1860
|
+
IMPORTANT \u2014 Hallucination / Faithfulness evals:
|
|
1861
|
+
Only recommend a hallucination or faithfulness eval if the prompt explicitly states
|
|
1862
|
+
"Retrieval context available: YES" AND the trace includes reference material (retrieved
|
|
1863
|
+
documents, ground truth passages, or source context) that the model output should be faithful to.
|
|
1864
|
+
If the trace only shows a user input and a model response with no reference material, do NOT
|
|
1865
|
+
recommend hallucination or faithfulness evals \u2014 they cannot be evaluated correctly without a
|
|
1866
|
+
reference context to compare against.
|
|
1867
|
+
|
|
1868
|
+
If the user already has evals covering the main quality dimensions, it is acceptable to return an empty recommendations array if no additional high-value evals remain.
|
|
1869
|
+
|
|
1870
|
+
Consider these common eval types and adapt them to the application's context:
|
|
1871
|
+
- Relevance: does the output address what the input asked?
|
|
1872
|
+
- Faithfulness / Hallucination: does the output contain unsupported claims? (ONLY if retrieval context is available)
|
|
1873
|
+
- Toxicity: does the output contain harmful or offensive content?
|
|
1874
|
+
- Conciseness: is the output appropriately concise?
|
|
1875
|
+
- Task completion: did the agent accomplish what was asked?
|
|
1876
|
+
|
|
1877
|
+
Choose evals that fit the actual use case evident from the trace. For example:
|
|
1878
|
+
- Customer support apps \u2192 relevance, tone, task completion
|
|
1879
|
+
- RAG / Q&A apps \u2192 faithfulness, relevance (only when retrieval context is confirmed present)
|
|
1880
|
+
- Code assistants \u2192 correctness, clarity
|
|
1881
|
+
- General chat \u2192 relevance, toxicity
|
|
1882
|
+
|
|
1883
|
+
Return ONLY a raw JSON object with no markdown fences, no preamble, no explanation:
|
|
1884
|
+
{
|
|
1885
|
+
"recommendations": [
|
|
1886
|
+
{
|
|
1887
|
+
"slug": "kebab-case-slug",
|
|
1888
|
+
"displayName": "Human Readable Name",
|
|
1889
|
+
"rationale": "One sentence explaining why this eval matters for this application",
|
|
1890
|
+
"instructions": "Evaluate the following LLM interaction...
|
|
1891
|
+
|
|
1892
|
+
Input: {{ input }}
|
|
1893
|
+
Output: {{ output }}
|
|
1894
|
+
|
|
1895
|
+
Return a JSON object: {"score": <0.0-1.0>, "reason": "<brief explanation>"}"
|
|
1896
|
+
}
|
|
1897
|
+
]
|
|
1898
|
+
}`;
|
|
1899
|
+
function extractJSON2(text2) {
|
|
1900
|
+
const blockMatch = text2.match(/```(?:json)?\s*([\s\S]+?)```/);
|
|
1901
|
+
if (blockMatch) return blockMatch[1].trim();
|
|
1902
|
+
const jsonMatch = text2.match(/\{[\s\S]+\}/);
|
|
1903
|
+
if (jsonMatch) return jsonMatch[0];
|
|
1904
|
+
return text2.trim();
|
|
1905
|
+
}
|
|
1906
|
+
async function recommendEvals(traceContent, spanName, framework, language, modelProvider, hasRetrievalContext, existingEvals = []) {
|
|
1907
|
+
const frameworkNote = framework ? `Framework: ${framework} (${language})` : `Language: ${language}`;
|
|
1908
|
+
const existingEvalsNote = existingEvals.length > 0 ? `
|
|
1909
|
+
Existing evals already configured (DO NOT recommend these or any functionally equivalent eval):
|
|
1910
|
+
` + existingEvals.map(
|
|
1911
|
+
(e) => e.instructions ? `- ${e.name}
|
|
1912
|
+
Instructions: ${e.instructions.slice(0, 300)}` : `- ${e.name}`
|
|
1913
|
+
).join("\n") + "\n" : "";
|
|
1914
|
+
const prompt = `Analyze this trace and recommend the most impactful continuous evals:
|
|
1915
|
+
|
|
1916
|
+
${frameworkNote}
|
|
1917
|
+
Eval model provider available: ${modelProvider}
|
|
1918
|
+
Span name: ${spanName}
|
|
1919
|
+
Retrieval context available: ${hasRetrievalContext ? "YES" : "NO"}
|
|
1920
|
+
${existingEvalsNote}
|
|
1921
|
+
Trace content:
|
|
1922
|
+
${traceContent}`;
|
|
1923
|
+
try {
|
|
1924
|
+
const stream = query3({
|
|
1925
|
+
prompt,
|
|
1926
|
+
options: {
|
|
1927
|
+
allowedTools: [],
|
|
1928
|
+
systemPrompt: INSTRUCTIONS,
|
|
1929
|
+
maxTurns: 1
|
|
1930
|
+
}
|
|
1931
|
+
});
|
|
1932
|
+
let fullOutput = "";
|
|
1933
|
+
for await (const message of stream) {
|
|
1934
|
+
if (message.type === "assistant") {
|
|
1935
|
+
const content = message.message?.content ?? [];
|
|
1936
|
+
for (const block of content) {
|
|
1937
|
+
if (block.type === "text" && block.text) {
|
|
1938
|
+
fullOutput += block.text;
|
|
1939
|
+
}
|
|
1940
|
+
}
|
|
1941
|
+
}
|
|
1942
|
+
}
|
|
1943
|
+
const jsonText = extractJSON2(fullOutput);
|
|
1944
|
+
const parsed = JSON.parse(jsonText);
|
|
1945
|
+
if (!Array.isArray(parsed.recommendations)) {
|
|
1946
|
+
return { ok: false, reason: "Claude returned no eval recommendations for this trace." };
|
|
1947
|
+
}
|
|
1948
|
+
return { ok: true, recommendations: parsed };
|
|
1949
|
+
} catch (err) {
|
|
1950
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
1951
|
+
return { ok: false, reason: `Claude API error: ${message}` };
|
|
1952
|
+
}
|
|
1953
|
+
}
|
|
1954
|
+
|
|
1955
|
+
// src/workflow/steps/10-evals.ts
|
|
1956
|
+
var RETRIEVAL_SPAN_PATTERN = /retriev|search|fetch|document|rag/i;
|
|
1957
|
+
function isRetrievalSpan(span) {
|
|
1958
|
+
return span.span_kind === "RETRIEVER" || span.span_name != null && RETRIEVAL_SPAN_PATTERN.test(span.span_name);
|
|
1959
|
+
}
|
|
1960
|
+
function flattenSpans(spans) {
|
|
1961
|
+
const result = [];
|
|
1962
|
+
for (const span of spans) {
|
|
1963
|
+
result.push(span);
|
|
1964
|
+
if (span.children?.length) {
|
|
1965
|
+
result.push(...flattenSpans(span.children));
|
|
1966
|
+
}
|
|
1967
|
+
}
|
|
1968
|
+
return result;
|
|
1969
|
+
}
|
|
1970
|
+
function extractBestSpan(trace) {
|
|
1971
|
+
const all = flattenSpans(trace.root_spans ?? []);
|
|
1972
|
+
return all.find((s) => s.input_content || s.output_content) ?? null;
|
|
1973
|
+
}
|
|
1974
|
+
function buildTraceContent(span, trace) {
|
|
1975
|
+
const input = span.input_content ? span.input_content.slice(0, 1500) : "(none)";
|
|
1976
|
+
const output = span.output_content ? span.output_content.slice(0, 1500) : "(none)";
|
|
1977
|
+
const allSpans = flattenSpans(trace.root_spans ?? []);
|
|
1978
|
+
const retrievalSpans = allSpans.filter(isRetrievalSpan);
|
|
1979
|
+
const hasRetrievalContext = retrievalSpans.length > 0;
|
|
1980
|
+
let content = `INPUT:
|
|
1981
|
+
${input}
|
|
1982
|
+
|
|
1983
|
+
OUTPUT:
|
|
1984
|
+
${output}`;
|
|
1985
|
+
if (hasRetrievalContext) {
|
|
1986
|
+
const contextParts = retrievalSpans.map((s) => {
|
|
1987
|
+
const parts = [];
|
|
1988
|
+
if (s.input_content) parts.push(`Query: ${s.input_content.slice(0, 500)}`);
|
|
1989
|
+
if (s.output_content) parts.push(`Retrieved: ${s.output_content.slice(0, 1e3)}`);
|
|
1990
|
+
return parts.join("\n");
|
|
1991
|
+
}).filter(Boolean);
|
|
1992
|
+
if (contextParts.length > 0) {
|
|
1993
|
+
content += `
|
|
1994
|
+
|
|
1995
|
+
RETRIEVAL CONTEXT:
|
|
1996
|
+
${contextParts.join("\n---\n")}`;
|
|
1997
|
+
}
|
|
1998
|
+
}
|
|
1999
|
+
return { content, hasRetrievalContext };
|
|
2000
|
+
}
|
|
2001
|
+
async function step10_RecommendEvals(state) {
|
|
2002
|
+
if (!state.engineUrl || !state.apiKey || !state.taskId) {
|
|
2003
|
+
logWarn("Skipping eval recommendations \u2014 engine connection not established.");
|
|
2004
|
+
return;
|
|
2005
|
+
}
|
|
2006
|
+
if (!state.evalModelProvider || !state.evalModelName) {
|
|
2007
|
+
logWarn("Skipping eval recommendations \u2014 no eval model provider configured.");
|
|
2008
|
+
note(
|
|
2009
|
+
"Configure a model provider in the Arthur Engine UI under Settings \u2192 Model Providers,\nthen re-run Buzz to get personalized eval recommendations.",
|
|
2010
|
+
"No eval model provider"
|
|
2011
|
+
);
|
|
2012
|
+
return;
|
|
2013
|
+
}
|
|
2014
|
+
const client = new ArthurEngineClient(state.engineUrl, state.apiKey);
|
|
2015
|
+
const existingSpinner = ora8({
|
|
2016
|
+
text: buzzSay("Checking existing evals..."),
|
|
2017
|
+
color: "cyan"
|
|
2018
|
+
}).start();
|
|
2019
|
+
const [existingContinuousEvals, existingLlmEvals] = await Promise.all([
|
|
2020
|
+
client.getContinuousEvals(state.taskId),
|
|
2021
|
+
client.getLlmEvals(state.taskId)
|
|
2022
|
+
]);
|
|
2023
|
+
existingSpinner.stop();
|
|
2024
|
+
if (existingContinuousEvals.length > 0) {
|
|
2025
|
+
logInfo(
|
|
2026
|
+
`You already have ${existingContinuousEvals.length} continuous eval(s) configured on this task:`
|
|
2027
|
+
);
|
|
2028
|
+
for (const e of existingContinuousEvals) {
|
|
2029
|
+
p.log.message(` \u2022 ${e.name}`);
|
|
2030
|
+
}
|
|
2031
|
+
const wantsMore = await confirm(
|
|
2032
|
+
"Would you like Buzz to analyze your traces and recommend additional evals?"
|
|
2033
|
+
);
|
|
2034
|
+
if (!wantsMore) {
|
|
2035
|
+
logSuccess("Your existing evals look great \u2014 no changes needed.");
|
|
2036
|
+
return;
|
|
2037
|
+
}
|
|
2038
|
+
}
|
|
2039
|
+
const existingEvals = existingContinuousEvals.map((ce) => ({
|
|
2040
|
+
name: ce.name,
|
|
2041
|
+
instructions: existingLlmEvals.find((e) => e.name === ce.llm_eval_name)?.instructions
|
|
2042
|
+
}));
|
|
2043
|
+
const modelSelection = { provider: state.evalModelProvider, model: state.evalModelName };
|
|
2044
|
+
const traceSpinner = ora8({
|
|
2045
|
+
text: buzzSay("Fetching trace data for deep scan..."),
|
|
2046
|
+
color: "cyan"
|
|
2047
|
+
}).start();
|
|
2048
|
+
const traceResult = await client.getTraces(state.taskId);
|
|
2049
|
+
if (traceResult.traces.length === 0) {
|
|
2050
|
+
traceSpinner.stop();
|
|
2051
|
+
note(
|
|
2052
|
+
"Once your application has sent traces to Arthur, re-run Buzz\nto get personalized eval recommendations based on your real trace data.",
|
|
2053
|
+
"No traces available for analysis"
|
|
2054
|
+
);
|
|
2055
|
+
return;
|
|
2056
|
+
}
|
|
2057
|
+
const traceDetail = await client.getTraceDetail(traceResult.traces[0].trace_id);
|
|
2058
|
+
traceSpinner.stop();
|
|
2059
|
+
if (!traceDetail) {
|
|
2060
|
+
logWarn("Could not fetch trace details. Skipping eval recommendations.");
|
|
2061
|
+
return;
|
|
2062
|
+
}
|
|
2063
|
+
const bestSpan = extractBestSpan(traceDetail);
|
|
2064
|
+
if (!bestSpan) {
|
|
2065
|
+
logWarn("No span content found in trace. Skipping eval recommendations.");
|
|
2066
|
+
return;
|
|
2067
|
+
}
|
|
2068
|
+
const analysisSpinner = ora8({
|
|
2069
|
+
text: buzzSay("Deep scanning your application traces with Claude..."),
|
|
2070
|
+
color: "cyan"
|
|
2071
|
+
}).start();
|
|
2072
|
+
const { content: traceContent, hasRetrievalContext } = buildTraceContent(bestSpan, traceDetail);
|
|
2073
|
+
const result = await recommendEvals(
|
|
2074
|
+
traceContent,
|
|
2075
|
+
bestSpan.span_name ?? "unknown",
|
|
2076
|
+
state.analysis?.framework ?? null,
|
|
2077
|
+
state.analysis?.language ?? "unknown",
|
|
2078
|
+
modelSelection.provider,
|
|
2079
|
+
hasRetrievalContext,
|
|
2080
|
+
existingEvals
|
|
2081
|
+
);
|
|
2082
|
+
analysisSpinner.stop();
|
|
2083
|
+
if (!result.ok) {
|
|
2084
|
+
logWarn(`Could not generate eval recommendations: ${result.reason}`);
|
|
2085
|
+
return;
|
|
2086
|
+
}
|
|
2087
|
+
const recommendations = result.recommendations;
|
|
2088
|
+
if (recommendations.recommendations.length === 0) {
|
|
2089
|
+
logSuccess(
|
|
2090
|
+
"Your existing evals already cover the key quality dimensions \u2014 no additional evals recommended at this time."
|
|
2091
|
+
);
|
|
2092
|
+
return;
|
|
2093
|
+
}
|
|
2094
|
+
logInfo("Based on your trace data, Buzz recommends these continuous evals:");
|
|
2095
|
+
for (const [i, rec] of recommendations.recommendations.entries()) {
|
|
2096
|
+
p.log.message(`${i + 1}. ${rec.displayName}`);
|
|
2097
|
+
p.log.message(` ${rec.rationale}`);
|
|
2098
|
+
}
|
|
2099
|
+
const approved = await confirm(
|
|
2100
|
+
"Should Buzz configure these continuous evals on your task now?"
|
|
2101
|
+
);
|
|
2102
|
+
if (!approved) {
|
|
2103
|
+
logInfo(
|
|
2104
|
+
"Eval configuration skipped. You can configure evals manually in the Arthur Engine UI."
|
|
2105
|
+
);
|
|
2106
|
+
return;
|
|
2107
|
+
}
|
|
2108
|
+
const createdEvalSlugs = [];
|
|
2109
|
+
for (const rec of recommendations.recommendations) {
|
|
2110
|
+
const spinner = ora8({
|
|
2111
|
+
text: buzzSay(`Creating LLM eval: ${rec.displayName}...`),
|
|
2112
|
+
color: "cyan"
|
|
2113
|
+
}).start();
|
|
2114
|
+
const result2 = await client.createLlmEval(state.taskId, rec.slug, {
|
|
2115
|
+
model_name: modelSelection.model,
|
|
2116
|
+
model_provider: modelSelection.provider,
|
|
2117
|
+
instructions: rec.instructions
|
|
2118
|
+
});
|
|
2119
|
+
spinner.stop();
|
|
2120
|
+
if (result2.error) {
|
|
2121
|
+
logError(`Failed to create eval "${rec.displayName}": ${result2.error}`);
|
|
2122
|
+
} else {
|
|
2123
|
+
logSuccess(`Created LLM eval: ${rec.displayName}`);
|
|
2124
|
+
createdEvalSlugs.push(rec.slug);
|
|
2125
|
+
}
|
|
2126
|
+
}
|
|
2127
|
+
if (createdEvalSlugs.length === 0) {
|
|
2128
|
+
logWarn("No LLM evals were created. Skipping transform and continuous eval setup.");
|
|
2129
|
+
return;
|
|
2130
|
+
}
|
|
2131
|
+
const transformSpinner = ora8({
|
|
2132
|
+
text: buzzSay("Creating trace transform..."),
|
|
2133
|
+
color: "cyan"
|
|
2134
|
+
}).start();
|
|
2135
|
+
const transformResult = await client.createTransform(state.taskId, {
|
|
2136
|
+
name: "Buzz \u2014 Input/Output Extractor",
|
|
2137
|
+
definition: {
|
|
2138
|
+
variables: [
|
|
2139
|
+
{
|
|
2140
|
+
variable_name: "input",
|
|
2141
|
+
span_name: bestSpan.span_name ?? "",
|
|
2142
|
+
attribute_path: "attributes.input.value",
|
|
2143
|
+
fallback: ""
|
|
2144
|
+
},
|
|
2145
|
+
{
|
|
2146
|
+
variable_name: "output",
|
|
2147
|
+
span_name: bestSpan.span_name ?? "",
|
|
2148
|
+
attribute_path: "attributes.output.value",
|
|
2149
|
+
fallback: ""
|
|
2150
|
+
}
|
|
2151
|
+
]
|
|
2152
|
+
}
|
|
2153
|
+
});
|
|
2154
|
+
transformSpinner.stop();
|
|
2155
|
+
if (transformResult.error || !transformResult.transform) {
|
|
2156
|
+
logError(`Failed to create transform: ${transformResult.error ?? "unknown error"}`);
|
|
2157
|
+
logWarn("Continuous evals could not be linked without a transform. Configure them manually in the Arthur Engine UI.");
|
|
2158
|
+
return;
|
|
2159
|
+
}
|
|
2160
|
+
logSuccess("Created trace transform");
|
|
2161
|
+
const VARIABLE_MAPPING = [
|
|
2162
|
+
{ transform_variable: "input", eval_variable: "input" },
|
|
2163
|
+
{ transform_variable: "output", eval_variable: "output" }
|
|
2164
|
+
];
|
|
2165
|
+
let continuousEvalCount = 0;
|
|
2166
|
+
for (const slug of createdEvalSlugs) {
|
|
2167
|
+
const rec = recommendations.recommendations.find((r) => r.slug === slug);
|
|
2168
|
+
const spinner = ora8({
|
|
2169
|
+
text: buzzSay(`Activating continuous eval: ${rec.displayName}...`),
|
|
2170
|
+
color: "cyan"
|
|
2171
|
+
}).start();
|
|
2172
|
+
const result2 = await client.createContinuousEval(state.taskId, {
|
|
2173
|
+
name: rec.displayName,
|
|
2174
|
+
llm_eval_name: slug,
|
|
2175
|
+
llm_eval_version: "latest",
|
|
2176
|
+
transform_id: transformResult.transform.id,
|
|
2177
|
+
transform_variable_mapping: VARIABLE_MAPPING,
|
|
2178
|
+
enabled: true
|
|
2179
|
+
});
|
|
2180
|
+
spinner.stop();
|
|
2181
|
+
if (result2.error) {
|
|
2182
|
+
logError(`Failed to activate "${rec.displayName}": ${result2.error}`);
|
|
2183
|
+
} else {
|
|
2184
|
+
logSuccess(`Activated: ${rec.displayName}`);
|
|
2185
|
+
continuousEvalCount++;
|
|
2186
|
+
}
|
|
2187
|
+
}
|
|
2188
|
+
if (continuousEvalCount > 0) {
|
|
2189
|
+
logSuccess(
|
|
2190
|
+
`${continuousEvalCount} continuous eval(s) are now monitoring your application.`
|
|
2191
|
+
);
|
|
2192
|
+
note(
|
|
2193
|
+
`View and manage your evals in Arthur Engine:
|
|
2194
|
+
${state.engineUrl}`,
|
|
2195
|
+
"Continuous evals configured"
|
|
2196
|
+
);
|
|
2197
|
+
} else {
|
|
2198
|
+
logWarn("No continuous evals were activated. Configure them manually in the Arthur Engine UI.");
|
|
2199
|
+
}
|
|
2200
|
+
}
|
|
2201
|
+
|
|
2202
|
+
// src/mastra/agent.ts
|
|
2203
|
+
import { query as query4 } from "@anthropic-ai/claude-agent-sdk";
|
|
2204
|
+
import fs3 from "fs";
|
|
2205
|
+
import path3 from "path";
|
|
2206
|
+
var INSTRUCTIONS2 = `You are Buzz's code analysis module. Your job is to analyze an agentic application's
|
|
2207
|
+
repository and return a structured JSON assessment.
|
|
2208
|
+
|
|
2209
|
+
Always use your tools to examine the repository before drawing conclusions:
|
|
2210
|
+
1. Call Glob first to see all files
|
|
2211
|
+
2. Call Read to read manifests (package.json, requirements.txt, pyproject.toml)
|
|
2212
|
+
3. Call Grep or Read as needed to verify findings
|
|
2213
|
+
|
|
2214
|
+
Return ONLY a JSON object with this exact structure (no markdown, no explanation, just raw JSON):
|
|
2215
|
+
{
|
|
2216
|
+
"language": "python" | "typescript" | "javascript" | "other",
|
|
2217
|
+
"framework": "mastra" | "langchain" | "openai" | "anthropic" | "crewai" | "autogen" | "other" | null,
|
|
2218
|
+
"isInstrumented": boolean,
|
|
2219
|
+
"instrumentationType": "arthur-sdk" | "mastra-arthur-exporter" | "openinference" | null,
|
|
2220
|
+
"entryPoint": "<relative path to main entry file>" | null,
|
|
2221
|
+
"details": "<one sentence summary>"
|
|
2222
|
+
}
|
|
2223
|
+
|
|
2224
|
+
Detection rules:
|
|
2225
|
+
- language: "python" if requirements.txt or pyproject.toml exists
|
|
2226
|
+
- language: "typescript" if tsconfig.json or .ts files exist
|
|
2227
|
+
- language: "javascript" if package.json exists but no tsconfig.json
|
|
2228
|
+
- framework: "mastra" if @mastra/core or mastra in package.json dependencies
|
|
2229
|
+
- framework: "langchain" if langchain or langchain-* in dependencies
|
|
2230
|
+
- framework: "openai" if openai in dependencies and no higher-level framework detected
|
|
2231
|
+
- isInstrumented: true if arthur_observability_sdk in Python deps
|
|
2232
|
+
OR ArthurExporter imported in TS files
|
|
2233
|
+
OR @opentelemetry/exporter-trace-otlp-proto configured with Arthur URL
|
|
2234
|
+
- instrumentationType: "arthur-sdk" for Python SDK, "mastra-arthur-exporter" for Mastra, "openinference" for OTel/OpenInference`;
|
|
2235
|
+
function extractJSON3(text2) {
|
|
2236
|
+
const blockMatch = text2.match(/```(?:json)?\s*([\s\S]+?)```/);
|
|
2237
|
+
if (blockMatch) return blockMatch[1].trim();
|
|
2238
|
+
const jsonMatch = text2.match(/\{[\s\S]+\}/);
|
|
2239
|
+
if (jsonMatch) return jsonMatch[0];
|
|
2240
|
+
return text2.trim();
|
|
2241
|
+
}
|
|
2242
|
+
function heuristicAnalysis(repoPath) {
|
|
2243
|
+
const hasPyproject = fs3.existsSync(path3.join(repoPath, "pyproject.toml"));
|
|
2244
|
+
const hasRequirements = fs3.existsSync(path3.join(repoPath, "requirements.txt"));
|
|
2245
|
+
const hasTsconfig = fs3.existsSync(path3.join(repoPath, "tsconfig.json"));
|
|
2246
|
+
const hasPackageJson = fs3.existsSync(path3.join(repoPath, "package.json"));
|
|
2247
|
+
let language = "other";
|
|
2248
|
+
if (hasPyproject || hasRequirements) language = "python";
|
|
2249
|
+
else if (hasTsconfig) language = "typescript";
|
|
2250
|
+
else if (hasPackageJson) language = "javascript";
|
|
2251
|
+
let framework = null;
|
|
2252
|
+
let isInstrumented = false;
|
|
2253
|
+
let instrumentationType = null;
|
|
2254
|
+
if (hasPackageJson) {
|
|
2255
|
+
try {
|
|
2256
|
+
const pkg = JSON.parse(fs3.readFileSync(path3.join(repoPath, "package.json"), "utf-8"));
|
|
2257
|
+
const deps = { ...pkg.dependencies ?? {}, ...pkg.devDependencies ?? {} };
|
|
2258
|
+
if ("@mastra/core" in deps || "mastra" in deps) framework = "mastra";
|
|
2259
|
+
else if ("langchain" in deps || "@langchain/core" in deps) framework = "langchain";
|
|
2260
|
+
else if ("openai" in deps) framework = "openai";
|
|
2261
|
+
else if ("@anthropic-ai/sdk" in deps) framework = "anthropic";
|
|
2262
|
+
if ("@arizeai/openinference-core" in deps || "openinference-instrumentation" in deps) {
|
|
2263
|
+
isInstrumented = true;
|
|
2264
|
+
instrumentationType = "openinference";
|
|
2265
|
+
}
|
|
2266
|
+
} catch {
|
|
2267
|
+
}
|
|
2268
|
+
}
|
|
2269
|
+
if (hasRequirements) {
|
|
2270
|
+
try {
|
|
2271
|
+
const req = fs3.readFileSync(path3.join(repoPath, "requirements.txt"), "utf-8");
|
|
2272
|
+
if (req.includes("arthur-observability-sdk")) {
|
|
2273
|
+
isInstrumented = true;
|
|
2274
|
+
instrumentationType = "arthur-sdk";
|
|
2275
|
+
}
|
|
2276
|
+
if (!framework) {
|
|
2277
|
+
if (req.includes("langchain")) framework = "langchain";
|
|
2278
|
+
else if (req.includes("openai")) framework = "openai";
|
|
2279
|
+
else if (req.includes("anthropic")) framework = "anthropic";
|
|
2280
|
+
}
|
|
2281
|
+
} catch {
|
|
2282
|
+
}
|
|
2283
|
+
}
|
|
2284
|
+
if (hasPyproject) {
|
|
2285
|
+
try {
|
|
2286
|
+
const toml = fs3.readFileSync(path3.join(repoPath, "pyproject.toml"), "utf-8");
|
|
2287
|
+
if (toml.includes("arthur-observability-sdk")) {
|
|
2288
|
+
isInstrumented = true;
|
|
2289
|
+
instrumentationType = "arthur-sdk";
|
|
2290
|
+
}
|
|
2291
|
+
if (!framework) {
|
|
2292
|
+
if (toml.includes("langchain")) framework = "langchain";
|
|
2293
|
+
else if (toml.includes("openai")) framework = "openai";
|
|
2294
|
+
}
|
|
2295
|
+
} catch {
|
|
2296
|
+
}
|
|
2297
|
+
}
|
|
2298
|
+
return {
|
|
2299
|
+
language,
|
|
2300
|
+
framework,
|
|
2301
|
+
isInstrumented,
|
|
2302
|
+
instrumentationType,
|
|
2303
|
+
entryPoint: null,
|
|
2304
|
+
details: `Heuristic detection: ${language} app${framework ? ` (${framework})` : ""}`
|
|
2305
|
+
};
|
|
2306
|
+
}
|
|
2307
|
+
async function analyzeRepository(repoPath) {
|
|
2308
|
+
try {
|
|
2309
|
+
const stream = query4({
|
|
2310
|
+
prompt: `Analyze the agentic application repository at: ${repoPath}
|
|
2311
|
+
|
|
2312
|
+
Use your tools to examine the files and return the JSON assessment.`,
|
|
2313
|
+
options: {
|
|
2314
|
+
cwd: repoPath,
|
|
2315
|
+
allowedTools: ["Read", "Glob", "Grep"],
|
|
2316
|
+
systemPrompt: INSTRUCTIONS2,
|
|
2317
|
+
maxTurns: 3
|
|
2318
|
+
}
|
|
2319
|
+
});
|
|
2320
|
+
let fullOutput = "";
|
|
2321
|
+
for await (const message of stream) {
|
|
2322
|
+
if (message.type === "assistant") {
|
|
2323
|
+
const content = message.message?.content ?? [];
|
|
2324
|
+
for (const block of content) {
|
|
2325
|
+
if (block.type === "text" && block.text) {
|
|
2326
|
+
fullOutput += block.text;
|
|
2327
|
+
}
|
|
2328
|
+
}
|
|
2329
|
+
}
|
|
2330
|
+
}
|
|
2331
|
+
return JSON.parse(extractJSON3(fullOutput));
|
|
2332
|
+
} catch {
|
|
2333
|
+
return heuristicAnalysis(repoPath);
|
|
2334
|
+
}
|
|
2335
|
+
}
|
|
2336
|
+
|
|
2337
|
+
// src/workflow/orchestrator.ts
|
|
2338
|
+
import ora9 from "ora";
|
|
2339
|
+
function stepBanner(n, title) {
|
|
2340
|
+
p.log.info(buzzSay(`Step ${n}/10 \u2014 ${title}`));
|
|
2341
|
+
}
|
|
2342
|
+
async function runBuzzWorkflow(repoPath) {
|
|
2343
|
+
const state = {
|
|
2344
|
+
repoPath,
|
|
2345
|
+
buzzEnvPath: getBuzzEnvPath(repoPath),
|
|
2346
|
+
engineUrl: null,
|
|
2347
|
+
apiKey: null,
|
|
2348
|
+
taskId: null,
|
|
2349
|
+
analysis: null,
|
|
2350
|
+
promptModelProvider: null,
|
|
2351
|
+
promptModelName: null,
|
|
2352
|
+
evalModelProvider: null,
|
|
2353
|
+
evalModelName: null
|
|
2354
|
+
};
|
|
2355
|
+
stepBanner(1, "Verify pre-requisites");
|
|
2356
|
+
await step1_VerifyPrereqs(state);
|
|
2357
|
+
stepBanner(2, "Ensure Arthur GenAI Engine is available");
|
|
2358
|
+
await step2_EnsureArthurEngine(state);
|
|
2359
|
+
stepBanner(3, "Set up Arthur task ID");
|
|
2360
|
+
await step3_EnsureTaskId(state);
|
|
2361
|
+
const analysisSpinner = ora9({ text: buzzSay("Analyzing repository language and framework..."), color: "cyan" }).start();
|
|
2362
|
+
state.analysis = await analyzeRepository(state.repoPath);
|
|
2363
|
+
analysisSpinner.stop();
|
|
2364
|
+
stepBanner(4, "Instrument your agentic application");
|
|
2365
|
+
const instrumented = await step4_InstrumentPython(state) || await step5_InstrumentMastra(state) || await step6_InstrumentOther(state);
|
|
2366
|
+
if (!instrumented) {
|
|
2367
|
+
p.log.warn(buzzSay("Could not determine application type. Skipping instrumentation."));
|
|
2368
|
+
}
|
|
2369
|
+
stepBanner(7, "Extract & register prompts");
|
|
2370
|
+
try {
|
|
2371
|
+
await step7_ExtractAndRegisterPrompts(state);
|
|
2372
|
+
} catch {
|
|
2373
|
+
p.log.warn(buzzSay("Prompt extraction encountered an error. Skipping."));
|
|
2374
|
+
}
|
|
2375
|
+
stepBanner(8, "Verify instrumentation is working");
|
|
2376
|
+
await step8_VerifyInstrumentation(state);
|
|
2377
|
+
stepBanner(9, "Select model provider for evaluations");
|
|
2378
|
+
await step9_SelectEvalModelProvider(state);
|
|
2379
|
+
stepBanner(10, "Recommend & configure evals");
|
|
2380
|
+
try {
|
|
2381
|
+
await step10_RecommendEvals(state);
|
|
2382
|
+
} catch {
|
|
2383
|
+
p.log.warn(buzzSay("Eval recommendations encountered an error. Skipping."));
|
|
2384
|
+
}
|
|
2385
|
+
}
|
|
2386
|
+
|
|
2387
|
+
// src/ui/avatar.ts
|
|
2388
|
+
import chalk2 from "chalk";
|
|
2389
|
+
import boxen from "boxen";
|
|
2390
|
+
var HELMET_IDLE_A = [
|
|
2391
|
+
" \u2597\u259B\u2580\u2580\u2580\u2580\u2580\u2599\u2596 ",
|
|
2392
|
+
"\u2590\u2588\u2591 \u2591\u2588\u258C",
|
|
2393
|
+
"\u2590\u2588 \u2588\u258C",
|
|
2394
|
+
"\u2590\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258C",
|
|
2395
|
+
"\u259D\u259B\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2599\u2598",
|
|
2396
|
+
" \u2590\u258C \u2590\u258C "
|
|
2397
|
+
];
|
|
2398
|
+
var HELMET_IDLE_B = [
|
|
2399
|
+
" \u2597\u259B\u2580\u2580\u2580\u2580\u2580\u2599\u2596 ",
|
|
2400
|
+
"\u2590\u2588\u2584 \u2584\u2588\u258C",
|
|
2401
|
+
"\u2590\u2588 \u2588\u258C",
|
|
2402
|
+
"\u2590\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u258C",
|
|
2403
|
+
"\u259D\u259B\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2599\u2598",
|
|
2404
|
+
" \u2590\u258C \u2590\u258C "
|
|
2405
|
+
];
|
|
2406
|
+
async function playStartupAnimation(sideLines) {
|
|
2407
|
+
const frames = [HELMET_IDLE_A, HELMET_IDLE_B, HELMET_IDLE_A, HELMET_IDLE_B, HELMET_IDLE_A];
|
|
2408
|
+
const HEIGHT = HELMET_IDLE_A.length;
|
|
2409
|
+
const GAP = " ";
|
|
2410
|
+
for (let i = 0; i < frames.length; i++) {
|
|
2411
|
+
if (i > 0) {
|
|
2412
|
+
process.stdout.write(`\x1B[${HEIGHT}A`);
|
|
2413
|
+
}
|
|
2414
|
+
const colorFn = chalk2.hex("#9B59B6");
|
|
2415
|
+
const coloredLines = frames[i].map((l) => colorFn(l));
|
|
2416
|
+
const output = sideLines ? coloredLines.map((l, j) => l + GAP + (sideLines[j] ?? "")).join("\n") : coloredLines.join("\n");
|
|
2417
|
+
process.stdout.write(output + "\n");
|
|
2418
|
+
await new Promise((resolve) => setTimeout(resolve, 350));
|
|
2419
|
+
}
|
|
2420
|
+
}
|
|
2421
|
+
|
|
2422
|
+
// src/index.ts
|
|
2423
|
+
async function main() {
|
|
2424
|
+
const banner = figlet.textSync("BUZZ", { font: "Big" });
|
|
2425
|
+
const bannerLines = banner.split("\n").filter((l) => l.length > 0).map((l) => chalk3.hex("#CE93D8").bold(l));
|
|
2426
|
+
await playStartupAnimation(bannerLines);
|
|
2427
|
+
console.log(chalk3.dim(" Arthur GenAI Engine Onboarding Agent"));
|
|
2428
|
+
const buildTime = true ? "2026-05-12T17:28:43.011Z" : null;
|
|
2429
|
+
if (buildTime) {
|
|
2430
|
+
console.log(chalk3.dim(` Build: ${new Date(buildTime).toLocaleString()}`));
|
|
2431
|
+
}
|
|
2432
|
+
console.log();
|
|
2433
|
+
intro(chalk3.cyan.bold(" Arthur GenAI Engine \u2014 Onboarding "));
|
|
2434
|
+
const repoPath = process.cwd();
|
|
2435
|
+
try {
|
|
2436
|
+
await runBuzzWorkflow(repoPath);
|
|
2437
|
+
console.log();
|
|
2438
|
+
outro(chalk3.green.bold("Your application is now connected to Arthur GenAI Engine."));
|
|
2439
|
+
} catch (err) {
|
|
2440
|
+
console.log();
|
|
2441
|
+
if (err instanceof BuzzError) {
|
|
2442
|
+
outro(chalk3.red("Mission aborted. Check the messages above for guidance."));
|
|
2443
|
+
} else {
|
|
2444
|
+
outro(chalk3.red("Mission aborted due to an unexpected error."));
|
|
2445
|
+
}
|
|
2446
|
+
process.exit(1);
|
|
2447
|
+
}
|
|
2448
|
+
}
|
|
2449
|
+
main().catch((err) => {
|
|
2450
|
+
console.error(chalk3.red("\nFatal error:"), err);
|
|
2451
|
+
process.exit(1);
|
|
2452
|
+
});
|