@hatchway/cli 0.50.65 → 0.50.67

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1462,23 +1462,23 @@ var init_droid_strategy$1 = __esm$1({
1462
1462
  // src/lib/prompts.ts
1463
1463
  var CLAUDE_SYSTEM_PROMPT = `You are an elite coding assistant specialized in building visually stunning, production-ready JavaScript applications.
1464
1464
 
1465
- ## Platform Skills
1465
+ ## Platform Skills (hatchway-platform plugin)
1466
1466
 
1467
- You have access to platform skills that provide critical procedural knowledge. Load and follow these skills for EVERY task:
1467
+ You have platform skills from the \`hatchway-platform\` plugin. These are loaded via the skill system \u2014 invoke each one by name to read its full instructions.
1468
1468
 
1469
- **Always load (required for every build):**
1470
- - \`todo-workflow\` \u2014 You MUST use TodoWrite to track progress. Without it, users see no progress in the UI.
1471
- - \`communication-style\` \u2014 Follow the Hatchway output formatting conventions.
1472
- - \`build-verification\` \u2014 Use the fix-verify loop for all dependency and build errors.
1473
- - \`context-awareness\` \u2014 Read existing code before modifying. Never write blind.
1474
- - \`dependency-management\` \u2014 Install all dependencies upfront in a single operation.
1469
+ **BEFORE doing any work, load ALL 5 of these required skills:**
1470
+ 1. \`hatchway-platform:todo-workflow\` \u2014 You MUST load this FIRST. It defines how to use TodoWrite for progress tracking. Without it, users see no progress in the UI.
1471
+ 2. \`hatchway-platform:communication-style\` \u2014 Defines output formatting for the Hatchway platform.
1472
+ 3. \`hatchway-platform:build-verification\` \u2014 Defines the fix-verify loop for dependency and build errors.
1473
+ 4. \`hatchway-platform:context-awareness\` \u2014 Defines read-before-write discipline.
1474
+ 5. \`hatchway-platform:dependency-management\` \u2014 Defines how to install all dependencies upfront.
1475
1475
 
1476
- **Load when relevant:**
1477
- - \`architectural-thinking\` \u2014 Load when starting a new feature or multi-file change.
1478
- - \`design-excellence\` \u2014 Load when building or styling user-facing UI.
1479
- - \`template-originality\` \u2014 Load only when building a new project from a template scaffold.
1476
+ **Also load these when the task involves them:**
1477
+ - \`hatchway-platform:architectural-thinking\` \u2014 Load for new features or multi-file changes.
1478
+ - \`hatchway-platform:design-excellence\` \u2014 Load when building or styling UI.
1479
+ - \`hatchway-platform:template-originality\` \u2014 Load when building from a template scaffold.
1480
1480
 
1481
- Load each skill by reading its SKILL.md file, then follow its instructions throughout the task.
1481
+ Load each skill at the START of the task before writing any code. Follow the loaded skill instructions throughout the entire task.
1482
1482
 
1483
1483
  ## Plan Mode
1484
1484
 
@@ -4424,33 +4424,28 @@ function createNativeClaudeQuery(modelId = DEFAULT_CLAUDE_MODEL_ID, abortControl
4424
4424
  let messageCount = 0;
4425
4425
  let toolCallCount = 0;
4426
4426
  let textBlockCount = 0;
4427
- // Create the gen_ai.invoke_agent span using startSpanManual.
4427
+ // Create the gen_ai.invoke_agent span as a child of the current active span.
4428
4428
  //
4429
- // Why startSpanManual and not startSpan?
4430
- // startSpan() takes a callback and ends the span when the callback returns.
4431
- // But this is an async generator we can't yield from inside a callback.
4432
- // startSpanManual() makes the span active on the current scope AND gives us
4433
- // a handle to end it ourselves in the finally block.
4429
+ // We use startInactiveSpan because this is an async generator — we can't use
4430
+ // startSpan/startSpanManual (both require a callback, and yields can't cross
4431
+ // callback boundaries). startInactiveSpan creates a span that inherits the
4432
+ // parent from the current active span (build.runner, restored by engine.ts
4433
+ // via Sentry.withActiveSpan).
4434
4434
  //
4435
- // Why this works now (it didn't before):
4436
- // engine.ts captures the parent build.runner span before creating the
4437
- // ReadableStream, then restores it via Sentry.withActiveSpan() inside the
4438
- // stream's start() callback. So when this generator runs, the build.runner
4439
- // span is the active parent, and our gen_ai.invoke_agent becomes its child.
4440
- // Tool spans created with startSpan() inside the loop become children of
4441
- // gen_ai.invoke_agent because it's the active span at that point.
4442
- const agentSpan = Sentry.startSpanManual({
4435
+ // For tool spans, we use Sentry.withActiveSpan(agentSpan, ...) to temporarily
4436
+ // make the agent span active so tool spans become its children.
4437
+ const agentSpan = Sentry.startInactiveSpan({
4443
4438
  op: 'gen_ai.invoke_agent',
4444
- name: `Claude Agent (${modelId})`,
4439
+ name: 'invoke_agent hatchway-builder',
4445
4440
  attributes: {
4441
+ 'gen_ai.operation.name': 'invoke_agent',
4446
4442
  'gen_ai.agent.name': 'hatchway-builder',
4447
4443
  'gen_ai.request.model': modelId,
4448
- 'gen_ai.agent.input': finalPrompt.substring(0, 500),
4449
- 'gen_ai.system_prompt.length': appendedSystemPrompt.length,
4450
- 'gen_ai.agent.available_tools': JSON.stringify(['Bash', 'Read', 'Write', 'Edit', 'Glob', 'Grep', 'Task', 'TodoWrite', 'WebFetch']),
4444
+ 'gen_ai.request.messages': JSON.stringify([{ role: 'user', content: finalPrompt.substring(0, 1000) }]),
4445
+ 'gen_ai.request.available_tools': JSON.stringify(['Bash', 'Read', 'Write', 'Edit', 'Glob', 'Grep', 'Task', 'TodoWrite', 'WebFetch']
4446
+ .map(name => ({ name, type: 'function' }))),
4451
4447
  },
4452
- }, (span) => span // Return the span so we control its lifecycle
4453
- );
4448
+ });
4454
4449
  try {
4455
4450
  // Stream messages directly from the SDK
4456
4451
  for await (const sdkMessage of query({ prompt: finalPrompt, options })) {
@@ -4465,20 +4460,21 @@ function createNativeClaudeQuery(modelId = DEFAULT_CLAUDE_MODEL_ID, abortControl
4465
4460
  toolCallCount++;
4466
4461
  debugLog$4(`[runner] [native-sdk] 🔧 Tool call: ${block.name}\n`);
4467
4462
  // Emit a gen_ai.execute_tool span as a child of gen_ai.invoke_agent.
4468
- // Using startSpan (active) with an empty callback — the span is created,
4469
- // becomes briefly active, records the tool invocation, and ends when
4470
- // the callback returns. This gives Sentry the tool call event with
4471
- // proper parent-child nesting.
4472
- Sentry.startSpan({
4473
- op: 'gen_ai.execute_tool',
4474
- name: `Tool: ${block.name}`,
4475
- attributes: {
4476
- 'gen_ai.tool.name': block.name,
4477
- 'gen_ai.tool.call_id': block.id,
4478
- 'gen_ai.tool.input': JSON.stringify(block.input).substring(0, 1000),
4479
- },
4480
- }, () => {
4481
- // Span created and ended — marks the tool invocation point
4463
+ // withActiveSpan temporarily makes agentSpan the active span so
4464
+ // the startSpan inside creates a proper child.
4465
+ Sentry.withActiveSpan(agentSpan, () => {
4466
+ Sentry.startSpan({
4467
+ op: 'gen_ai.execute_tool',
4468
+ name: `execute_tool ${block.name}`,
4469
+ attributes: {
4470
+ 'gen_ai.tool.name': block.name,
4471
+ 'gen_ai.tool.call_id': block.id,
4472
+ 'gen_ai.tool.input': JSON.stringify(block.input).substring(0, 1000),
4473
+ 'gen_ai.request.model': modelId,
4474
+ },
4475
+ }, () => {
4476
+ // Span created and ended — marks the tool invocation point
4477
+ });
4482
4478
  });
4483
4479
  }
4484
4480
  else if (block.type === 'text') {
@@ -4499,6 +4495,11 @@ function createNativeClaudeQuery(modelId = DEFAULT_CLAUDE_MODEL_ID, abortControl
4499
4495
  process.stderr.write(`[native-sdk] SDK init — plugins: ${JSON.stringify(loadedPlugins)}\n`);
4500
4496
  process.stderr.write(`[native-sdk] SDK init — tools: ${toolCount} loaded\n`);
4501
4497
  }
4498
+ // Set discovered skills on the agent span
4499
+ if (agentSpan) {
4500
+ agentSpan.setAttribute('gen_ai.agent.skills', discoveredSkills.join(', '));
4501
+ agentSpan.setAttribute('gen_ai.agent.skill_count', discoveredSkills.length);
4502
+ }
4502
4503
  if (discoveredSkills.length > 0) {
4503
4504
  Sentry.logger.info('SDK initialized with skills', {
4504
4505
  skillCount: String(discoveredSkills.length),
@@ -4532,21 +4533,27 @@ function createNativeClaudeQuery(modelId = DEFAULT_CLAUDE_MODEL_ID, abortControl
4532
4533
  if (sdkMessage.type === 'result') {
4533
4534
  const resultMsg = sdkMessage;
4534
4535
  if (agentSpan) {
4536
+ // Standard gen_ai token usage attributes (Sentry AI Agent Monitoring spec)
4535
4537
  agentSpan.setAttribute('gen_ai.usage.input_tokens', resultMsg.usage?.input_tokens ?? 0);
4536
4538
  agentSpan.setAttribute('gen_ai.usage.output_tokens', resultMsg.usage?.output_tokens ?? 0);
4537
4539
  agentSpan.setAttribute('gen_ai.usage.total_tokens', (resultMsg.usage?.input_tokens ?? 0) + (resultMsg.usage?.output_tokens ?? 0));
4538
- agentSpan.setAttribute('gen_ai.usage.cost_usd', resultMsg.total_cost_usd ?? 0);
4539
- agentSpan.setAttribute('gen_ai.agent.num_turns', resultMsg.num_turns ?? 0);
4540
- agentSpan.setAttribute('gen_ai.agent.num_tool_calls', toolCallCount);
4541
- agentSpan.setAttribute('gen_ai.agent.result', resultMsg.subtype ?? 'unknown');
4542
- agentSpan.setAttribute('gen_ai.agent.duration_ms', resultMsg.duration_ms ?? 0);
4543
- agentSpan.setAttribute('gen_ai.agent.duration_api_ms', resultMsg.duration_api_ms ?? 0);
4544
4540
  if (resultMsg.usage?.cache_read_input_tokens) {
4545
- agentSpan.setAttribute('gen_ai.usage.cache_read_tokens', resultMsg.usage.cache_read_input_tokens);
4541
+ agentSpan.setAttribute('gen_ai.usage.input_tokens.cached', resultMsg.usage.cache_read_input_tokens);
4546
4542
  }
4547
4543
  if (resultMsg.usage?.cache_creation_input_tokens) {
4548
- agentSpan.setAttribute('gen_ai.usage.cache_creation_tokens', resultMsg.usage.cache_creation_input_tokens);
4544
+ agentSpan.setAttribute('gen_ai.usage.input_tokens.cache_write', resultMsg.usage.cache_creation_input_tokens);
4545
+ }
4546
+ // Response text (truncated for span safety)
4547
+ if (resultMsg.result) {
4548
+ agentSpan.setAttribute('gen_ai.response.text', JSON.stringify(resultMsg.result.substring(0, 1000)));
4549
4549
  }
4550
+ // Custom (non-spec) attributes for operational insight
4551
+ agentSpan.setAttribute('hatchway.cost_usd', resultMsg.total_cost_usd ?? 0);
4552
+ agentSpan.setAttribute('hatchway.num_turns', resultMsg.num_turns ?? 0);
4553
+ agentSpan.setAttribute('hatchway.num_tool_calls', toolCallCount);
4554
+ agentSpan.setAttribute('hatchway.result', resultMsg.subtype ?? 'unknown');
4555
+ agentSpan.setAttribute('hatchway.duration_ms', resultMsg.duration_ms ?? 0);
4556
+ agentSpan.setAttribute('hatchway.duration_api_ms', resultMsg.duration_api_ms ?? 0);
4550
4557
  }
4551
4558
  if (resultMsg.subtype === 'success') {
4552
4559
  debugLog$4(`[runner] [native-sdk] ✅ Query complete - ${resultMsg.num_turns} turns, $${resultMsg.total_cost_usd?.toFixed(4)} USD\n`);