npm - mstro-app - Versions diffs - 0.4.20 → 0.4.22 - Mend

mstro-app 0.4.20 → 0.4.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (177) hide show

package/server/services/deploy/headless-session-handler.ts ADDED Viewed

@@ -0,0 +1,415 @@
+// Copyright (c) 2025-present Mstro, Inc. All rights reserved.
+// Licensed under the MIT License. See LICENSE file for details.
+/**
+ * Headless Session Handler
+ *
+ * Handles headless Claude Code session requests from a developer's backend
+ * on behalf of end users. Each session is isolated — no shared context
+ * between end users.
+ *
+ * Security: End-user prompts are untrusted input. They are always passed as
+ * user messages, never injected into system prompts or tool parameters.
+ * The Security Bouncer governs tool access within each session.
+ */
+import type { ToolUseEvent } from '../../cli/headless/types.js';
+import { DeployAiService, type DeployExecutionResult } from './deploy-ai-service.js';
+// ========== Types ==========
+export interface HeadlessSessionRequest {
+  /** The end user's prompt (untrusted input) */
+  prompt: string;
+  /** Override the deployment's default system prompt */
+  systemPrompt?: string;
+  /** Restrict which tools Claude can use in this session */
+  allowedTools?: string[];
+  /** Override the deployment's default model */
+  model?: string;
+  /** Unique identifier for the end user (for isolation + rate tracking) */
+  endUserId: string;
+}
+export interface DeploymentAiConfig {
+  deploymentId: string;
+  aiEnabled: boolean;
+  allowedAiCapabilities: string[];
+  maxTokensPerRequest: number | null;
+  maxRequestsPerMinute: number | null;
+  maxConcurrentSessions: number;
+  defaultSystemPrompt: string | null;
+  defaultModel: string;
+  workingDir: string;
+}
+export type HeadlessSessionErrorCode =
+  | 'CAPABILITY_DENIED'
+  | 'AI_DISABLED'
+  | 'RATE_LIMIT_EXCEEDED'
+  | 'CONCURRENT_LIMIT_EXCEEDED'
+  | 'INVALID_REQUEST'
+  | 'EXECUTION_FAILED';
+export interface HeadlessSessionError {
+  code: HeadlessSessionErrorCode;
+  message: string;
+}
+export interface HeadlessSessionStreamCallbacks {
+  onOutput?: (text: string) => void;
+  onThinking?: (text: string) => void;
+  onToolUse?: (event: ToolUseEvent) => void;
+  onUsageReport?: (report: UsageReportData) => void;
+  onHealthUpdate?: (update: HealthUpdateData) => void;
+}
+export interface UsageReportData {
+  deploymentId: string;
+  endUserId: string;
+  capability: 'headless' | 'pm-board';
+  tokensUsed: number;
+  model: string;
+  durationMs: number;
+  boardId?: string;
+}
+export interface HealthUpdateData {
+  deploymentId: string;
+  status: 'healthy' | 'invalid_key' | 'no_credits' | 'rate_limited' | 'unknown_error';
+  message: string;
+  aiDisabled: boolean;
+}
+export type HeadlessSessionResult =
+  | { ok: true; result: DeployExecutionResult }
+  | { ok: false; error: HeadlessSessionError };
+// ========== Rate Limiter ==========
+interface RateBucket {
+  timestamps: number[];
+  activeSessions: number;
+}
+const rateBuckets = new Map<string, RateBucket>();
+function getBucket(deploymentId: string): RateBucket {
+  let bucket = rateBuckets.get(deploymentId);
+  if (!bucket) {
+    bucket = { timestamps: [], activeSessions: 0 };
+    rateBuckets.set(deploymentId, bucket);
+  }
+  return bucket;
+}
+function pruneTimestamps(bucket: RateBucket): void {
+  const oneMinuteAgo = Date.now() - 60_000;
+  // Remove timestamps older than 1 minute
+  while (bucket.timestamps.length > 0 && bucket.timestamps[0] < oneMinuteAgo) {
+    bucket.timestamps.shift();
+  }
+}
+function checkRateLimit(
+  config: DeploymentAiConfig,
+): HeadlessSessionError | null {
+  const bucket = getBucket(config.deploymentId);
+  // Check concurrent sessions
+  if (bucket.activeSessions >= config.maxConcurrentSessions) {
+    return {
+      code: 'CONCURRENT_LIMIT_EXCEEDED',
+      message: `Deployment has reached the maximum of ${config.maxConcurrentSessions} concurrent sessions. Wait for an existing session to complete.`,
+    };
+  }
+  // Check requests per minute
+  if (config.maxRequestsPerMinute !== null) {
+    pruneTimestamps(bucket);
+    if (bucket.timestamps.length >= config.maxRequestsPerMinute) {
+      return {
+        code: 'RATE_LIMIT_EXCEEDED',
+        message: `Deployment has exceeded the rate limit of ${config.maxRequestsPerMinute} requests per minute. Try again shortly.`,
+      };
+    }
+  }
+  return null;
+}
+function recordRequestStart(deploymentId: string): void {
+  const bucket = getBucket(deploymentId);
+  bucket.timestamps.push(Date.now());
+  bucket.activeSessions++;
+}
+function recordRequestEnd(deploymentId: string): void {
+  const bucket = getBucket(deploymentId);
+  bucket.activeSessions = Math.max(0, bucket.activeSessions - 1);
+}
+// ========== Prompt Composition ==========
+/**
+ * Compose the final prompt sent to Claude. The system prompt (from deployment
+ * config or per-request override) is prepended as a system instruction block.
+ * The end-user prompt follows as a clearly delimited user message.
+ *
+ * SECURITY: The end-user prompt is always in the user-message section,
+ * never interpolated into the system instruction.
+ */
+function composePrompt(systemPrompt: string | null, userPrompt: string): string {
+  if (!systemPrompt) {
+    return userPrompt;
+  }
+  return [
+    '<system-instruction>',
+    systemPrompt,
+    '</system-instruction>',
+    '',
+    userPrompt,
+  ].join('\n');
+}
+// ========== Handler ==========
+/**
+ * Handle a headless session request for an end user.
+ *
+ * Validates the deployment config, checks rate limits, composes the prompt
+ * with the system instruction, and launches an isolated headless session
+ * via DeployAiService. Streams results back through the provided callbacks.
+ *
+ * @returns Structured result with either the execution result or an error.
+ */
+export async function handleHeadlessSession(
+  request: HeadlessSessionRequest,
+  config: DeploymentAiConfig,
+  callbacks?: HeadlessSessionStreamCallbacks,
+): Promise<HeadlessSessionResult> {
+  // ── Validate request ───────────────────────────────────────
+  if (!request.prompt || request.prompt.trim().length === 0) {
+    return {
+      ok: false,
+      error: { code: 'INVALID_REQUEST', message: 'prompt is required and must not be empty.' },
+    };
+  }
+  if (!request.endUserId || request.endUserId.trim().length === 0) {
+    return {
+      ok: false,
+      error: { code: 'INVALID_REQUEST', message: 'endUserId is required.' },
+    };
+  }
+  // ── Validate AI is enabled ─────────────────────────────────
+  if (!config.aiEnabled) {
+    return {
+      ok: false,
+      error: { code: 'AI_DISABLED', message: 'AI features are not enabled for this deployment.' },
+    };
+  }
+  // ── Validate headless capability ───────────────────────────
+  if (!config.allowedAiCapabilities.includes('headless')) {
+    return {
+      ok: false,
+      error: {
+        code: 'CAPABILITY_DENIED',
+        message: "This deployment does not have the 'headless' AI capability enabled.",
+      },
+    };
+  }
+  // ── Rate limit checks ─────────────────────────────────────
+  const rateLimitError = checkRateLimit(config);
+  if (rateLimitError) {
+    return { ok: false, error: rateLimitError };
+  }
+  // ── Token limit pre-check ─────────────────────────────────
+  // Estimate input tokens from prompt length (~4 chars per token).
+  // Reject if estimated input alone exceeds the cap.
+  if (config.maxTokensPerRequest !== null) {
+    const estimatedInputTokens = Math.ceil(request.prompt.length / 4);
+    if (estimatedInputTokens > config.maxTokensPerRequest) {
+      return {
+        ok: false,
+        error: {
+          code: 'RATE_LIMIT_EXCEEDED',
+          message: `Estimated input tokens (${estimatedInputTokens}) exceeds maxTokensPerRequest (${config.maxTokensPerRequest}). Shorten your prompt.`,
+        },
+      };
+    }
+  }
+  // ── Compose prompt ─────────────────────────────────────────
+  // Use per-request system prompt if provided, otherwise deployment default
+  const effectiveSystemPrompt = request.systemPrompt ?? config.defaultSystemPrompt;
+  const composedPrompt = composePrompt(effectiveSystemPrompt, request.prompt);
+  // Use per-request model if provided, otherwise deployment default
+  const effectiveModel = request.model ?? config.defaultModel;
+  // ── Launch isolated session ────────────────────────────────
+  const service = DeployAiService.getInstance();
+  recordRequestStart(config.deploymentId);
+  try {
+    const result = await service.execute({
+      deploymentId: config.deploymentId,
+      prompt: composedPrompt,
+      workingDir: config.workingDir,
+      model: effectiveModel,
+      outputCallback: callbacks?.onOutput,
+      thinkingCallback: callbacks?.onThinking,
+      toolUseCallback: callbacks?.onToolUse,
+      // allowedTools from request are inverted: any tool NOT in the list is disallowed.
+      // If allowedTools is not specified, no additional restrictions are applied
+      // (Security Bouncer still governs tool access).
+      disallowedTools: request.allowedTools
+        ? invertAllowedTools(request.allowedTools)
+        : undefined,
+    });
+    // Check token limit if configured
+    if (
+      config.maxTokensPerRequest !== null &&
+      result.totalTokens > config.maxTokensPerRequest
+    ) {
+      // Session already ran — log but don't fail the response.
+      // The token overage is informational; the developer can use this
+      // for billing or to tighten limits.
+    }
+    // Emit usage report after successful execution
+    callbacks?.onUsageReport?.({
+      deploymentId: config.deploymentId,
+      endUserId: request.endUserId,
+      capability: 'headless',
+      tokensUsed: result.totalTokens,
+      model: effectiveModel,
+      durationMs: result.durationMs,
+    });
+    // Check for API key health issues from execution result
+    const healthStatus = detectAiHealthIssue(result.error);
+    if (healthStatus) {
+      callbacks?.onHealthUpdate?.({
+        deploymentId: config.deploymentId,
+        ...healthStatus,
+      });
+    }
+    return { ok: true, result };
+  } catch (error: unknown) {
+    const message = error instanceof Error ? error.message : String(error);
+    // Check for API key health issues from caught errors
+    const healthStatus = detectAiHealthIssue(message);
+    if (healthStatus) {
+      callbacks?.onHealthUpdate?.({
+        deploymentId: config.deploymentId,
+        ...healthStatus,
+      });
+    }
+    return {
+      ok: false,
+      error: { code: 'EXECUTION_FAILED', message },
+    };
+  } finally {
+    recordRequestEnd(config.deploymentId);
+  }
+}
+// ========== Health Detection ==========
+/**
+ * Detect API key health issues from error messages returned by Claude Code.
+ *
+ * Anthropic API errors that indicate credential/billing problems:
+ * - 401: Invalid API key
+ * - 402/insufficient_funds: Account has no credits
+ * - 429: Rate limited by Anthropic
+ */
+function detectAiHealthIssue(
+  errorMessage: string | undefined,
+): { status: HealthUpdateData['status']; message: string; aiDisabled: boolean } | null {
+  if (!errorMessage) return null;
+  const lower = errorMessage.toLowerCase();
+  if (lower.includes('invalid api key') || lower.includes('invalid x-api-key') || lower.includes('authentication_error')) {
+    return { status: 'invalid_key', message: 'Anthropic API key is invalid or revoked.', aiDisabled: true };
+  }
+  if (lower.includes('insufficient_funds') || lower.includes('no credits') || lower.includes('billing') || lower.includes('credit balance')) {
+    return { status: 'no_credits', message: 'Anthropic account has insufficient credits.', aiDisabled: true };
+  }
+  if (lower.includes('rate_limit') || lower.includes('rate limit') || lower.includes('too many requests')) {
+    return { status: 'rate_limited', message: 'Anthropic API rate limit exceeded.', aiDisabled: false };
+  }
+  return null;
+}
+// ========== Helpers ==========
+/**
+ * The DeployAiService accepts `disallowedTools` (blocklist), but the
+ * headless session API exposes `allowedTools` (allowlist) for a better
+ * developer UX. This converts an allowlist into a blocklist by marking
+ * everything outside the allowlist as disallowed.
+ *
+ * We use a known set of standard Claude Code tool names. Tools not in
+ * the known set are left unrestricted (the Security Bouncer handles them).
+ */
+const KNOWN_TOOLS = [
+  'Read',
+  'Write',
+  'Edit',
+  'MultiEdit',
+  'Bash',
+  'Glob',
+  'Grep',
+  'WebFetch',
+  'WebSearch',
+  'TodoRead',
+  'TodoWrite',
+  'NotebookEdit',
+  'Agent',
+] as const;
+function invertAllowedTools(allowedTools: string[]): string[] {
+  const allowed = new Set(allowedTools);
+  return KNOWN_TOOLS.filter((tool) => !allowed.has(tool));
+}
+/**
+ * Get the current rate limit state for a deployment.
+ * Useful for status/monitoring endpoints.
+ */
+export function getDeploymentRateLimitState(deploymentId: string): {
+  requestsInLastMinute: number;
+  activeSessions: number;
+} {
+  const bucket = getBucket(deploymentId);
+  pruneTimestamps(bucket);
+  return {
+    requestsInLastMinute: bucket.timestamps.length,
+    activeSessions: bucket.activeSessions,
+  };
+}
+/**
+ * Reset rate limit state for a deployment. Call when a deployment
+ * is deleted or all its sessions are force-stopped.
+ */
+export function resetDeploymentRateLimit(deploymentId: string): void {
+  rateBuckets.delete(deploymentId);
+}

package/server/services/pathUtils.ts CHANGED Viewed

@@ -8,7 +8,8 @@
  * All file explorer operations MUST validate paths through these functions.
  */
-import { isAbsolute, normalize, relative, resolve } from 'node:path';
+import { existsSync, lstatSync, realpathSync } from 'node:fs';
+import { dirname, isAbsolute, normalize, relative, resolve } from 'node:path';
 export interface PathValidationResult {
   valid: boolean;
@@ -43,6 +44,39 @@ export function validatePathWithinWorkingDir(
     // Normalize to remove any .. or . segments
     resolvedPath = normalize(resolvedPath);
+    // Resolve symlinks to prevent symlink-based path traversal.
+    // A symlink at /project/link -> /etc/passwd would pass the string
+    // check below but actually read outside the working directory.
+    // For existing paths: resolve the full path via realpath.
+    // For new paths (create operations): resolve the parent directory.
+    if (existsSync(resolvedPath)) {
+      // If the path itself is a symlink, resolve it to the real target
+      const stat = lstatSync(resolvedPath);
+      if (stat.isSymbolicLink()) {
+        resolvedPath = realpathSync(resolvedPath);
+      }
+    } else {
+      // Path doesn't exist yet (create operation) — validate the parent
+      const parentDir = dirname(resolvedPath);
+      if (existsSync(parentDir)) {
+        const realParent = realpathSync(parentDir);
+        const parentWithSep = normalizedWorkingDir.endsWith('/')
+          ? normalizedWorkingDir
+          : `${normalizedWorkingDir}/`;
+        if (realParent !== normalizedWorkingDir && !realParent.startsWith(parentWithSep)) {
+          console.error(
+            `[PathUtils] SECURITY: Symlink traversal in parent directory blocked. ` +
+            `Target: "${targetPath}", RealParent: "${realParent}", WorkingDir: "${normalizedWorkingDir}"`
+          );
+          return {
+            valid: false,
+            resolvedPath: '',
+            error: 'Access denied: parent directory resolves outside working directory'
+          };
+        }
+      }
+    }
     // Check if the resolved path starts with the working directory
     // Add trailing separator to prevent partial matches (e.g., /home/user vs /home/username)
     const workingDirWithSep = normalizedWorkingDir.endsWith('/')

package/server/services/plan/agent-loader.ts ADDED Viewed

@@ -0,0 +1,73 @@
+// Copyright (c) 2025-present Mstro, Inc. All rights reserved.
+// Licensed under the MIT License. See LICENSE file for details.
+/**
+ * Agent Prompt Loader — loads review agent prompts from markdown files.
+ *
+ * Resolution order (first match wins):
+ *   1. Board-level override:  {boardDir}/agents/{agentName}.md
+ *   2. System default:        cli/server/services/plan/agents/{agentName}.md
+ *
+ * Files use YAML frontmatter + markdown body with {{variable}} placeholders.
+ * Falls back to null when no file is found (caller should use hardcoded fallback).
+ */
+import { existsSync, readFileSync } from 'node:fs';
+import { dirname, join } from 'node:path';
+import { fileURLToPath } from 'node:url';
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const SYSTEM_AGENTS_DIR = join(__dirname, 'agents');
+/** Strip YAML frontmatter (--- ... ---) from markdown, returning just the body. */
+function stripFrontmatter(content: string): string {
+  if (!content.startsWith('---')) return content;
+  const endIdx = content.indexOf('---', 3);
+  if (endIdx === -1) return content;
+  return content.slice(endIdx + 3).trimStart();
+}
+/** Replace all {{variable}} placeholders with values from the provided map. */
+function interpolate(template: string, variables: Record<string, string>): string {
+  return template.replace(/\{\{(\w+)\}\}/g, (match, key: string) => {
+    return key in variables ? variables[key] : match;
+  });
+}
+/**
+ * Load an agent prompt by name with layered resolution.
+ *
+ * @param agentName - The agent file name without extension (e.g., "review-code")
+ * @param variables - Key-value map for {{variable}} substitution
+ * @param boardDir  - Optional board directory for board-level overrides
+ * @returns The interpolated prompt string, or null if no agent file found
+ */
+export function loadAgentPrompt(
+  agentName: string,
+  variables: Record<string, string>,
+  boardDir?: string | null,
+): string | null {
+  const fileName = `${agentName}.md`;
+  // 1. Board-level override
+  if (boardDir) {
+    const boardAgentPath = join(boardDir, 'agents', fileName);
+    if (existsSync(boardAgentPath)) {
+      try {
+        const raw = readFileSync(boardAgentPath, 'utf-8');
+        return interpolate(stripFrontmatter(raw), variables);
+      } catch { /* fall through to system default */ }
+    }
+  }
+  // 2. System default
+  const systemPath = join(SYSTEM_AGENTS_DIR, fileName);
+  if (existsSync(systemPath)) {
+    try {
+      const raw = readFileSync(systemPath, 'utf-8');
+      return interpolate(stripFrontmatter(raw), variables);
+    } catch { /* return null */ }
+  }
+  return null;
+}

package/server/services/plan/agents/review-code.md ADDED Viewed

@@ -0,0 +1,28 @@
+---
+name: review-code
+description: Reviews tasks that modify files — checks acceptance criteria, code quality where applicable, and output correctness
+type: review
+variables: [issue_id, issue_title, files_modified, acceptance_criteria, output_path]
+checks: [criteria_met, code_quality, no_obvious_bugs]
+---
+You are a reviewer. Review the work done for issue {{issue_id}}: {{issue_title}}.
+## Files Modified
+{{files_modified}}
+## Acceptance Criteria
+{{acceptance_criteria}}
+## Instructions
+1. Read each modified file listed above
+2. Check if all acceptance criteria are met by the changes
+3. Evaluate the quality of the changes:
+   - For source code files: look for obvious bugs, security vulnerabilities, or code quality issues
+   - For content files (markdown, docs, config, copy): check for accuracy, completeness, and appropriate structure
+4. Check if the output artifact exists at: {{output_path}}
+Output EXACTLY one JSON object on its own line (no markdown fencing):
+{"passed": true, "checks": [{"name": "criteria_met", "passed": true, "details": "..."}]}
+Include checks for: criteria_met, code_quality, no_obvious_bugs.

package/server/services/plan/agents/review-custom.md ADDED Viewed

@@ -0,0 +1,27 @@
+---
+name: review-custom
+description: Reviews work using board-defined custom criteria alongside acceptance criteria — works for code, content, research, planning, and any other task type
+type: review
+variables: [issue_id, issue_title, context_section, acceptance_criteria, review_criteria, read_instruction]
+checks: [criteria_met, review_criteria]
+---
+You are a reviewer. Review the work done for issue {{issue_id}}: {{issue_title}}.
+{{context_section}}
+## Acceptance Criteria
+{{acceptance_criteria}}
+## Review Criteria
+{{review_criteria}}
+## Instructions
+1. {{read_instruction}}
+2. Check if all acceptance criteria are met — evaluate each criterion individually
+3. Evaluate thoroughly against the review criteria above
+4. Consider the overall quality of the work: does it fully address the issue's intent, is it well-structured, and is it ready to ship?
+Output EXACTLY one JSON object on its own line (no markdown fencing):
+{"passed": true, "checks": [{"name": "criteria_met", "passed": true, "details": "..."}]}
+Include checks for: criteria_met, review_criteria.

package/server/services/plan/agents/review-quality.md ADDED Viewed

@@ -0,0 +1,42 @@
+---
+name: review-quality
+description: Reviews non-code output (writing, research, plans, designs, analysis) for completeness, accuracy, and quality against acceptance criteria
+type: review
+variables: [issue_id, issue_title, output_path, issue_spec_path, acceptance_criteria]
+checks: [criteria_met, output_quality, completeness]
+---
+You are a quality reviewer. Review the work done for issue {{issue_id}}: {{issue_title}}.
+## Output File
+{{output_path}}
+## Issue Spec
+{{issue_spec_path}}
+## Acceptance Criteria
+{{acceptance_criteria}}
+## Instructions
+1. Read the output file at the path above
+2. Read the full issue spec to understand the original requirements and intent
+3. Evaluate the output against ALL of the following dimensions:
+### Acceptance Criteria
+- Are all acceptance criteria met? Check each one individually.
+### Content Quality
+- Is the content accurate, well-reasoned, and free of factual errors?
+- Is it written clearly with appropriate structure and organization?
+- Does it have sufficient depth and detail for its purpose?
+- Is the tone and style appropriate for the intended audience?
+### Completeness
+- Does the output fully address what was requested in the issue spec?
+- Are there obvious gaps, missing sections, or incomplete thoughts?
+- If the issue requested specific deliverables (e.g., a plan, analysis, document), are all deliverables present?
+Output EXACTLY one JSON object on its own line (no markdown fencing):
+{"passed": true, "checks": [{"name": "criteria_met", "passed": true, "details": "..."}]}
+Include checks for: criteria_met, output_quality, completeness.

package/server/services/plan/composer.ts CHANGED Viewed

@@ -5,7 +5,7 @@
  * Plan Composer — Handles natural language prompts for PPS creation/editing.
  *
  * When a planPrompt message arrives, this builds a context-enriched prompt
- * against the .pm/ (or legacy .plan/) directory and spawns a scoped
+ * against the .mstro/pm/ directory and spawns a scoped
  * HeadlessRunner session to execute it.
  */
@@ -248,6 +248,10 @@ User request: ${userPrompt}`;
     const runner = new HeadlessRunner({
       workingDir,
       directPrompt: enrichedPrompt,
+      stallWarningMs: 300_000,   // 5 min — compose usually finishes quickly
+      stallKillMs: 900_000,      // 15 min
+      stallHardCapMs: 1_800_000, // 30 min hard cap
+      verbose: true,
       outputCallback: (text: string) => {
         ctx.send(ws, {
           type: 'planPromptStreaming',

package/server/services/plan/dependency-resolver.ts CHANGED Viewed

@@ -79,7 +79,7 @@ function dfs(
  * Compute the set of issues that are ready to work on.
  * An issue is ready if:
  * - It's not an epic
- * - Its status is backlog or todo (not started, done, or cancelled)
+ * - Its status is todo (refined and ready for execution)
  * - All its blocked_by items are done or cancelled
  *
  * If epicScope is provided, only returns issues belonging to that epic.
@@ -90,7 +90,7 @@ export function resolveReadyToWork(issues: Issue[], epicScope?: string, sprintSc
     issueByPath.set(issue.path, issue);
   }
-  const readyStatuses = new Set(['backlog', 'todo']);
+  const readyStatuses = new Set(['todo']);
   const doneStatuses = new Set(['done', 'cancelled']);
   const priorityOrder: Record<string, number> = { P0: 0, P1: 1, P2: 2, P3: 3 };