npm - @blockrun/franklin - Versions diffs - 3.3.2 → 3.5.0 - Mend

@blockrun/franklin 3.3.2 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (110) hide show

package/README.md +58 -7
package/dist/agent/commands.d.ts +1 -1
package/dist/agent/commands.js +128 -17
package/dist/agent/compact.d.ts +2 -2
package/dist/agent/compact.js +148 -22
package/dist/agent/context.d.ts +8 -3
package/dist/agent/context.js +301 -108
package/dist/agent/error-classifier.d.ts +11 -2
package/dist/agent/error-classifier.js +64 -10
package/dist/agent/llm.d.ts +8 -1
package/dist/agent/llm.js +114 -19
package/dist/agent/loop.d.ts +1 -2
package/dist/agent/loop.js +509 -61
package/dist/agent/optimize.d.ts +2 -2
package/dist/agent/optimize.js +9 -7
package/dist/agent/permissions.d.ts +1 -1
package/dist/agent/permissions.js +1 -1
package/dist/agent/planner.d.ts +42 -0
package/dist/agent/planner.js +110 -0
package/dist/agent/reduce.d.ts +7 -1
package/dist/agent/reduce.js +85 -3
package/dist/agent/streaming-executor.d.ts +6 -1
package/dist/agent/streaming-executor.js +83 -5
package/dist/agent/tokens.d.ts +11 -2
package/dist/agent/tokens.js +38 -5
package/dist/agent/tool-guard.d.ts +27 -0
package/dist/agent/tool-guard.js +324 -0
package/dist/agent/types.d.ts +7 -1
package/dist/agent/types.js +1 -1
package/dist/banner.js +27 -40
package/dist/brain/extract.d.ts +11 -0
package/dist/brain/extract.js +154 -0
package/dist/brain/index.d.ts +3 -0
package/dist/brain/index.js +2 -0
package/dist/brain/store.d.ts +42 -0
package/dist/brain/store.js +225 -0
package/dist/brain/types.d.ts +45 -0
package/dist/brain/types.js +5 -0
package/dist/commands/daemon.js +2 -1
package/dist/commands/start.js +16 -3
package/dist/config.js +1 -1
package/dist/index.js +27 -2
package/dist/learnings/extractor.d.ts +13 -0
package/dist/learnings/extractor.js +69 -8
package/dist/learnings/index.d.ts +1 -1
package/dist/learnings/index.js +1 -1
package/dist/learnings/store.js +42 -13
package/dist/learnings/types.d.ts +1 -1
package/dist/mcp/client.d.ts +1 -1
package/dist/mcp/client.js +5 -5
package/dist/mcp/config.d.ts +1 -1
package/dist/mcp/config.js +1 -1
package/dist/panel/html.d.ts +2 -0
package/dist/panel/html.js +409 -146
package/dist/panel/server.js +19 -0
package/dist/pricing.js +3 -2
package/dist/proxy/fallback.d.ts +3 -1
package/dist/proxy/fallback.js +4 -4
package/dist/proxy/server.js +29 -11
package/dist/proxy/sse-translator.js +1 -1
package/dist/router/categories.d.ts +21 -0
package/dist/router/categories.js +96 -0
package/dist/router/index.d.ts +9 -2
package/dist/router/index.js +106 -27
package/dist/router/local-elo.d.ts +32 -0
package/dist/router/local-elo.js +107 -0
package/dist/router/selector.d.ts +46 -0
package/dist/router/selector.js +106 -0
package/dist/session/storage.d.ts +5 -1
package/dist/session/storage.js +24 -2
package/dist/social/a11y.d.ts +1 -1
package/dist/social/a11y.js +5 -1
package/dist/social/browser.d.ts +5 -0
package/dist/social/browser.js +22 -0
package/dist/social/preflight.d.ts +4 -0
package/dist/social/preflight.js +42 -3
package/dist/stats/failures.d.ts +20 -0
package/dist/stats/failures.js +63 -0
package/dist/stats/format.d.ts +6 -0
package/dist/stats/format.js +23 -0
package/dist/stats/insights.js +1 -21
package/dist/stats/session-tracker.d.ts +21 -0
package/dist/stats/session-tracker.js +28 -0
package/dist/stats/tracker.d.ts +1 -1
package/dist/stats/tracker.js +1 -1
package/dist/tools/bash.d.ts +14 -1
package/dist/tools/bash.js +132 -7
package/dist/tools/edit.js +77 -14
package/dist/tools/glob.js +13 -3
package/dist/tools/grep.js +30 -12
package/dist/tools/imagegen.js +3 -3
package/dist/tools/index.d.ts +1 -1
package/dist/tools/index.js +5 -1
package/dist/tools/read.d.ts +16 -2
package/dist/tools/read.js +36 -8
package/dist/tools/searchx.d.ts +6 -2
package/dist/tools/searchx.js +221 -44
package/dist/tools/subagent.js +37 -3
package/dist/tools/task.js +43 -7
package/dist/tools/validate.d.ts +11 -0
package/dist/tools/validate.js +42 -0
package/dist/tools/webfetch.js +18 -7
package/dist/tools/websearch.js +41 -7
package/dist/tools/write.js +26 -6
package/dist/ui/app.js +31 -6
package/dist/ui/model-picker.d.ts +1 -1
package/dist/ui/model-picker.js +1 -1
package/dist/ui/terminal.d.ts +1 -1
package/dist/ui/terminal.js +1 -1
package/package.json +2 -2

package/dist/agent/context.js CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * Context Manager for runcode
+ * Context Manager for Franklin
  * Assembles system instructions, reads project config, injects environment info.
  */
 import fs from 'node:fs';
@@ -7,50 +7,160 @@ import path from 'node:path';
 import { execSync } from 'node:child_process';
 import { loadLearnings, decayLearnings, saveLearnings, formatForPrompt } from '../learnings/store.js';
 // ─── System Instructions Assembly ──────────────────────────────────────────
-const BASE_INSTRUCTIONS = `You are runcode, an AI coding agent that helps users with software engineering tasks.
-You have access to tools for reading, writing, editing files, running shell commands, searching codebases, web browsing, and more.
+// Composable prompt sections — each independently maintainable and conditionally includable.
+function getCoreInstructions() {
+    return `You are Franklin, an autonomous AI agent with a wallet. You help users with software engineering, marketing campaigns, trading signals, and any task that benefits from an agent that can reason, act, and spend.
-# Core Principles
-- Read before writing: always understand existing code before making changes.
-- Be precise: make minimal, targeted changes. Don't refactor code you weren't asked to touch.
-- Be safe: never introduce security vulnerabilities. Validate at system boundaries.
-- Be honest: if you're unsure, say so. Don't guess at implementation details.
+You are an interactive agent — not a chatbot. Use the tools available to you to accomplish tasks. Your job is to be a highly capable collaborator who takes initiative, makes progress, and delivers results.
-# Tool Usage
-- **Read**: Read files with line numbers. Use offset/limit for large files.
-- **Edit**: Targeted string replacement (preferred for existing files). old_string must be unique.
-- **Write**: Create new files or full rewrites.
-- **Bash**: Run shell commands. Default timeout 2min. Batch sequential commands with && to reduce round-trips.
-- **Glob**: Find files by pattern. Skips node_modules/.git.
-- **Grep**: Regex search. Default: file paths. output_mode "content" for matching lines.
-- **WebFetch** / **WebSearch**: Fetch pages or search the web.
-- **Task**: Track multi-step work.
-- **Agent**: Spawn parallel sub-agents.
+# System
+- All text you output outside of tool use is displayed to the user. Use markdown for formatting.
+- Tools are your hands. You MUST use tools to take action — do not describe what you would do without doing it. Never end your turn with a promise of future action — execute it now. Every response should either (a) contain tool calls that make progress, or (b) deliver a final result to the user.
+- You can call multiple tools in a single response. If you intend to call multiple tools and there are no dependencies between them, make ALL independent tool calls in parallel. This is critical for performance. However, if tool calls depend on previous results, run them sequentially — do NOT use placeholders or guess dependent values.
-# Best Practices
-- Glob/Grep before Read; Read before Edit.
-- **Parallel**: call independent tools together in one response.
-- **Batch bash**: combine sequential shell commands into one Bash call with && or a script. Only split when you need to inspect intermediate output.
-- **AskUser**: Only use AskUser when you are about to perform a destructive action (deleting files, dropping databases) and need explicit confirmation. NEVER use AskUser to ask what the user wants — just answer their message directly. If their request is vague, make a reasonable assumption and proceed.
-- Never write to /etc, /usr, ~/.ssh, ~/.aws. Don't commit secrets.
-- Type /help to see all slash commands.
+# Doing Tasks
+- The user will primarily request software engineering tasks: solving bugs, adding features, refactoring, explaining code, and more. When given an unclear or generic instruction, consider it in the context of the current working directory and codebase.
+- You are highly capable. Users come to you for ambitious tasks that would otherwise take too long. Defer to user judgment about scope.
+- In general, do not propose changes to code you haven't read. Read it first. Understand existing code before suggesting modifications.
+- Do not create files unless absolutely necessary. Prefer editing existing files to creating new ones.
+- If an approach fails, diagnose why before switching tactics — read the error, check your assumptions, try a focused fix. Don't retry the identical action blindly, but don't abandon a viable approach after a single failure either. Escalate to the user only when genuinely stuck after investigation.
+- For UI or frontend changes, always test in a browser before reporting the task as complete. Type checking and test suites verify code correctness, not feature correctness.
+- Break down complex work with the Task tool to track progress. Mark each task completed as soon as you finish it — don't batch.
-# Missing Access
+# Using Your Tools
+- Do NOT use Bash when a dedicated tool exists. This is CRITICAL:
+  - Read files: use Read (NOT cat/head/tail/sed)
+  - Edit files: use Edit (NOT sed/awk)
+  - Create files: use Write (NOT echo/heredoc)
+  - Search content: use Grep (NOT grep/rg)
+  - Find files: use Glob (NOT find/ls)
+- Reserve Bash exclusively for shell operations: builds, installs, git, npm/pip, processes, scripts.
+- **Search strategy**: Glob/Grep for directed searches (known file/symbol). Use Agent for open-ended exploration that may require multiple rounds.
+- **Batch bash**: chain sequential shell commands with && in a single call. Only split when you need intermediate output.
+- **AskUser discipline**: Only use AskUser when you need explicit confirmation for a destructive action (deleting files, dropping databases). NEVER use AskUser to ask what the user wants — just answer their message directly. If the request is vague, make a reasonable assumption and proceed.
+- Never write to /etc, /usr, ~/.ssh, ~/.aws. Don't commit secrets.`;
+}
+function getCodeStyleSection() {
+    return `# Code Quality
+- Don't add features, refactor code, or make "improvements" beyond what was asked. A bug fix doesn't need surrounding code cleaned up. A simple feature doesn't need extra configurability. Don't add docstrings, comments, or type annotations to code you didn't change. Only add comments where the logic isn't self-evident.
+- Don't add error handling, fallbacks, or validation for scenarios that can't happen. Trust internal code and framework guarantees. Only validate at system boundaries (user input, external APIs). Don't use feature flags or backwards-compatibility shims when you can just change the code.
+- Don't create helpers, utilities, or abstractions for one-time operations. Don't design for hypothetical future requirements. The right amount of complexity is what the task actually requires — no speculative abstractions, but no half-finished implementations either. Three similar lines of code is better than a premature abstraction.
+- Be careful not to introduce security vulnerabilities such as command injection, XSS, SQL injection, and other OWASP top 10 vulnerabilities. If you notice insecure code, fix it immediately. Prioritize writing safe, secure, and correct code.
+- Avoid backwards-compatibility hacks like renaming unused _vars, re-exporting types, adding // removed comments for removed code. If something is unused, delete it completely.
+# Verification & Honesty
+- Before reporting a task complete, verify it actually works: run the test, execute the script, check the output. If you can't verify, say so explicitly rather than claiming success.
+- Report outcomes faithfully: if tests fail, say so with the relevant output. Never claim "all tests pass" when output shows failures. Never suppress or simplify failing checks to manufacture a green result. When a check did pass, state it plainly — do not hedge confirmed results with unnecessary disclaimers.`;
+}
+function getActionsSection() {
+    return `# Executing Actions with Care
+Carefully consider the reversibility and blast radius of actions. You can freely take local, reversible actions like editing files or running tests. But for actions that are hard to reverse, affect shared systems, or could be destructive, check with the user before proceeding. The cost of pausing to confirm is low; the cost of an unwanted action (lost work, unintended messages, deleted branches) can be very high.
+Examples of risky actions that warrant user confirmation:
+- Destructive operations: deleting files/branches, dropping database tables, killing processes, rm -rf, overwriting uncommitted changes
+- Hard-to-reverse operations: force-pushing, git reset --hard, amending published commits, removing or downgrading packages/dependencies
+- Actions visible to others or that affect shared state: pushing code, creating/closing/commenting on PRs or issues, sending messages, posting to external services
+- Uploading content to third-party web tools (pastebins, gists) publishes it — consider whether it could be sensitive
+When you encounter an obstacle, do not use destructive actions as a shortcut. Identify root causes and fix underlying issues rather than bypassing safety checks (e.g. --no-verify). If you discover unexpected state like unfamiliar files, branches, or configuration, investigate before deleting or overwriting — it may represent the user's in-progress work.
+A user approving an action once does NOT mean they approve it in all contexts. Match the scope of your actions to what was actually requested. When in doubt, ask before acting.`;
+}
+function getOutputEfficiencySection() {
+    return `# Output Efficiency
+Go straight to the point. Lead with the action, not the reasoning. Do not restate what the user said. Do not narrate your actions ("Let me read the file...", "I'll now search for..."). Just call the tools.
+Focus text output on:
+- Decisions that need the user's input
+- Results and conclusions (not the process)
+- Errors or blockers that change the plan
+If you can say it in one sentence, don't use three. Don't explain what tools you're going to use — the user can see tool calls directly. Only add text when it provides value beyond what the tool calls show.`;
+}
+function getToneAndStyleSection() {
+    return `# Tone and Style
+- Only use emojis if the user explicitly requests it. Avoid using emojis in all communication unless asked.
+- Your responses should be short and concise.
+- When referencing specific functions or pieces of code include the pattern file_path:line_number to allow the user to easily navigate to the source code location.
+- Do not use a colon before tool calls. Your tool calls may not be shown directly in the output, so text like "Let me read the file:" followed by a read tool call should just be "Let me read the file." with a period.`;
+}
+function getGitProtocolSection() {
+    return `# Git Protocol
+Only create commits when the user explicitly asks. Do not commit proactively.
+## Git Safety
+- NEVER update the git config.
+- NEVER run destructive git commands (push --force, reset --hard, checkout ., clean -f, branch -D) unless the user explicitly requests it.
+- NEVER skip hooks (--no-verify) unless the user explicitly requests it.
+- NEVER force push to main/master. Warn the user if they request it.
+- ALWAYS create NEW commits rather than amending, unless the user explicitly requests a git amend. When a pre-commit hook fails, the commit did NOT happen — so --amend would modify the PREVIOUS commit. Fix the issue, re-stage, and create a NEW commit.
+- When staging files, prefer adding specific files by name rather than using "git add -A" or "git add .", which can accidentally include sensitive files (.env, credentials) or large binaries.
+## Commit Workflow
+When the user asks you to commit:
+1. Run git status and git diff to see all changes.
+2. Run git log --oneline -5 to match the repo's commit message style.
+3. Draft a concise commit message (1-2 sentences) that focuses on the "why" rather than the "what".
+4. Stage relevant files by name. Do not commit files that likely contain secrets (.env, credentials.json).
+5. Create the commit.
+6. Run git status to verify success.
+## PR Workflow
+When the user asks you to create a PR:
+1. Run git status, git diff, and git log to understand the full commit history for the branch.
+2. Draft a short PR title (under 70 chars) and a description with Summary and Test Plan sections.
+3. Push to remote with -u flag if needed.
+4. Create the PR.`;
+}
+function getSocialMarketingSection() {
+    return `# X / Social Marketing — STRICT RULES
+SearchX is the ONLY tool that can access X.com. WebSearch and WebFetch CANNOT access X.com content.
+RULES (violations will produce garbage output):
+1. Make ONE SearchX call per topic. Never retry with variations.
+2. If SearchX returns empty, tell the user "No posts found" and suggest a different keyword. Do NOT fall back to WebSearch/WebFetch — they will return non-X content that you must NEVER present as X posts.
+3. NEVER fabricate X post URLs. Every link you show MUST come from SearchX results. If a URL doesn't start with "https://x.com/", do NOT present it as an X post.
+4. Present results as a numbered list. Each item: author, snippet, URL from SearchX, and a 1-2 sentence suggested reply.
+5. Reply drafts must sound like a real human: short, specific to the post content, conversational. NO marketing speak, NO "Great point about...", NO corporate tone. Write like a smart friend, not a LinkedIn bot.
+6. End with: "Reply to any? Give me the number."
+7. Do NOT auto-post. Do NOT explain how the social system works.
+When checking notifications/mentions: Use SearchX with mode="notifications". One call, done.`;
+}
+function getMissingAccessSection() {
+    return `# Missing Access
 Always deliver results first using whatever tools work (WebSearch, WebFetch, etc.). Never let missing access block you.
 After delivering results, if a better data source exists, add one line at the end:
 "Tip: run franklin social setup && franklin social login x for live X data."
-Do NOT check access before acting. Do NOT explain what you tried. Just deliver, then tip.
-# X / Social Marketing
-When the user asks about X posts, trending topics, or social growth:
-1. Search and find relevant posts immediately (WebSearch or SearchX).
-2. Present results as a numbered list of SUGGESTIONS, not actions. Each item must include:
-   - The post author and a short snippet
-   - A clickable link (https://x.com/...)
-   - A suggested reply draft (2-3 sentences, natural tone, not salesy)
-3. End with: "Reply to any of these? Give me the number."
-4. Do NOT auto-post. Do NOT explain how the social system works. Do NOT dump config JSON.
-5. If the user asks to set up X access, ask them simple questions one at a time (handle? product? keywords?) and write the config yourself. Never show raw JSON to the user.`;
+Do NOT check access before acting. Do NOT explain what you tried. Just deliver, then tip.`;
+}
+function getToolPatternsSection() {
+    return `# Tool Selection Patterns
+- **Finding files**: Glob first (by name/pattern), then Grep (by content), then Read (specific file). Don't start with Read unless you know the exact path.
+- **Understanding code**: Glob for structure → Read key files → Grep for specific symbols/patterns. Don't read every file in a directory.
+- **Making changes**: Read the file → Edit with targeted replacement → verify the edit worked (Read again or run tests). Never Edit without Reading first.
+- **Running commands**: Use Bash for shell operations that have no dedicated tool. Chain commands with && when sequential. Use separate Bash calls when you need to inspect intermediate output.
+- **Research**: WebSearch for discovery → WebFetch for specific URLs from search results. Don't WebFetch URLs you invented.
+- **Complex tasks**: Use Agent to spawn sub-agents for 2+ independent research or implementation tasks. Don't do sequentially what can be done in parallel.
+- **Multiple independent lookups**: Call all tools in a single response. NEVER make sequential calls when parallel calls would work.`;
+}
+function getTokenEfficiencySection() {
+    return `# Token Efficiency
+- **Search once, not 10 times.** Do NOT run WebSearch with slight query variations. 3-5 searches MAX per topic. If results are empty, stop.
+- **Stop after repeated misses.** If 2 similar searches return empty results, stop and synthesize what you have.
+- **Read files once.** Do NOT re-read files you already read in this conversation. The content is already in your context. Check your memory before calling Read.
+- **Present results early.** After 3 searches, present what you found. Do not keep searching — the user can ask for more.
+- **Minimize tool calls.** Each tool call costs tokens. Before calling a tool, ask: do I already have this information? Can I answer from what's in context? If yes, don't call the tool.
+- **Be concise.** Short, direct responses. Don't repeat what the user said. Don't explain what you're about to do — just do it. Don't narrate your tool calls.
+- **Parallel, not sequential.** When you need 3 pieces of independent information, make 3 tool calls in ONE response — not 3 separate turns. Each turn has overhead.`;
+}
+function getVerificationSection() {
+    return `# Before Responding (verification checklist)
+- Correctness: does your output satisfy the user's request?
+- Grounding: are all factual claims backed by tool results, not your memory?
+- URLs: does every link come from a tool result? NEVER fabricate URLs.
+- Conciseness: is the response direct and actionable, not verbose filler?`;
+}
 // Cache assembled instructions per workingDir — avoids re-running git commands
 // when sub-agents are spawned (common in parallel tool use patterns).
 const _instructionCache = new Map();
@@ -58,11 +168,24 @@ const _instructionCache = new Map();
  * Build the full system instructions array for a session.
  * Result is memoized per workingDir for the process lifetime.
  */
-export function assembleInstructions(workingDir) {
-    const cached = _instructionCache.get(workingDir);
+export function assembleInstructions(workingDir, model) {
+    const cacheKey = model ? `${workingDir}::${model}` : workingDir;
+    const cached = _instructionCache.get(cacheKey);
     if (cached)
         return cached;
-    const parts = [BASE_INSTRUCTIONS];
+    const parts = [
+        getCoreInstructions(),
+        getCodeStyleSection(),
+        getActionsSection(),
+        getOutputEfficiencySection(),
+        getToneAndStyleSection(),
+        getGitProtocolSection(),
+        getSocialMarketingSection(),
+        getMissingAccessSection(),
+        getToolPatternsSection(),
+        getTokenEfficiencySection(),
+        getVerificationSection(),
+    ];
     // Read RUNCODE.md or CLAUDE.md from the project
     const projectConfig = readProjectConfig(workingDir);
     if (projectConfig) {
@@ -87,12 +210,71 @@ export function assembleInstructions(workingDir) {
         }
     }
     catch { /* learnings are optional — never block startup */ }
-    _instructionCache.set(workingDir, parts);
+    // Model-specific execution guidance
+    if (model) {
+        parts.push(getModelGuidance(model));
+    }
+    _instructionCache.set(cacheKey, parts);
     return parts;
 }
+/**
+ * Model-family-specific execution guidance.
+ * Weak models get strict guardrails. Strong models get quality standards.
+ */
+export function getModelGuidance(model) {
+    const m = model.toLowerCase();
+    // Weak/cheap models: strict discipline to prevent looping and hallucination
+    if (m.includes('glm') || m.includes('gpt-oss') || m.includes('nemotron') ||
+        m.includes('minimax') || m.includes('devstral') || m.includes('llama-4')) {
+        return `# Execution Discipline (strict — this model requires guardrails)
+- Make ONE tool call per task. Do NOT retry the same tool with query variations.
+- If a tool returns empty results, tell the user immediately. Do NOT fall back to other tools.
+- NEVER fabricate data, URLs, or quotes. If you don't have it, say so.
+- Keep responses under 300 words. Be direct, not verbose.
+- Before responding: does every URL and fact come from a tool result? If not, remove it.`;
+    }
+    // Medium models: balanced guidance
+    if (m.includes('kimi') || m.includes('grok') || m.includes('flash') ||
+        m.includes('haiku') || m.includes('deepseek') || m.includes('qwen')) {
+        return `# Execution Guidance
+- Use tools to verify facts before stating them. Do not answer from memory when a tool can confirm.
+- Batch independent tool calls in one response (parallel execution).
+- If a tool fails, explain the failure to the user. Do not silently retry with a different tool.
+- Before responding: are all claims grounded in tool output? Remove anything unverified.`;
+    }
+    // Strong models: quality standards + thinking guidance
+    if (m.includes('claude') || m.includes('gpt-5') || m.includes('opus') ||
+        m.includes('sonnet') || m.includes('gemini-2.5-pro') || m.includes('gemini-3') ||
+        m.includes('o3') || m.includes('o1') || m.includes('codex')) {
+        return `# Quality Standards (strong model)
+- Keep calling tools until the task is complete AND the result is verified. Don't stop at "this should work" — prove it works.
+- Before finalizing: check correctness, grounding in tool output, and formatting.
+- If proceeding with incomplete information, label assumptions explicitly.
+- Prefer depth over breadth — a thorough answer to one question beats shallow answers to many.
+- Use your thinking to plan multi-step operations before executing them. Think about what tools you need, in what order, and what could go wrong.
+- When debugging: think through the error systematically — read the error message, form a hypothesis, verify with tools, then fix. Don't guess-and-check.
+- When making architectural decisions, consider second-order effects: will this change break other callers? Will it scale? Is it consistent with existing patterns?
+- You have the capability to handle ambitious, complex tasks. Don't artificially constrain yourself — if the task needs 20 tool calls, make 20 tool calls.`;
+    }
+    // Default: basic guidance
+    return `# Execution Guidance
+- Use tools to verify facts. Do not answer from memory when a tool can confirm.
+- If a tool fails, tell the user. Do not silently retry.
+- Before responding: are claims grounded in tool output?`;
+}
 /** Invalidate cache for a workingDir (call after /clear or session reset). */
 export function invalidateInstructionCache(workingDir) {
-    _instructionCache.delete(workingDir);
+    if (workingDir) {
+        // Clear all entries for this workDir (any model)
+        for (const key of _instructionCache.keys()) {
+            if (key.startsWith(workingDir)) {
+                _instructionCache.delete(key);
+            }
+        }
+    }
+    else {
+        _instructionCache.clear();
+    }
 }
 // ─── Project Config ────────────────────────────────────────────────────────
 /**
@@ -124,12 +306,26 @@ function readProjectConfig(dir) {
 // ─── Environment ───────────────────────────────────────────────────────────
 function buildEnvironmentSection(workingDir) {
     const lines = ['# Environment'];
-    lines.push(`- Working directory: ${workingDir}`);
+    lines.push(`- Primary working directory: ${workingDir}`);
     lines.push(`- Platform: ${process.platform}`);
     lines.push(`- Node.js: ${process.version}`);
     // Detect shell
     const shell = process.env.SHELL || process.env.COMSPEC || 'unknown';
     lines.push(`- Shell: ${path.basename(shell)}`);
+    // OS version
+    try {
+        const osRelease = execSync('uname -r', { encoding: 'utf-8', timeout: 2000, stdio: ['pipe', 'pipe', 'pipe'] }).trim();
+        lines.push(`- OS Version: ${process.platform === 'darwin' ? 'Darwin' : process.platform} ${osRelease}`);
+    }
+    catch { /* ignore */ }
+    // Git repo detection
+    try {
+        execSync('git rev-parse --is-inside-work-tree', { cwd: workingDir, timeout: 2000, stdio: ['pipe', 'pipe', 'pipe'] });
+        lines.push('- Is a git repository: true');
+    }
+    catch {
+        lines.push('- Is a git repository: false');
+    }
     // Date
     lines.push(`- Date: ${new Date().toISOString().split('T')[0]}`);
     return lines.join('\n');
@@ -139,76 +335,73 @@ const GIT_TIMEOUT_MS = 5_000;
 // Max chars for git log output — long commit messages can bloat the system prompt
 const MAX_GIT_LOG_CHARS = 2_000;
 function getGitContext(workingDir) {
+    const gitCmd = (cmd) => execSync(cmd, {
+        cwd: workingDir,
+        encoding: 'utf-8',
+        stdio: ['pipe', 'pipe', 'pipe'],
+        timeout: GIT_TIMEOUT_MS,
+    }).trim();
     try {
-        const isGit = execSync('git rev-parse --is-inside-work-tree', {
-            cwd: workingDir,
-            encoding: 'utf-8',
-            stdio: ['pipe', 'pipe', 'pipe'],
-            timeout: GIT_TIMEOUT_MS,
-        }).trim();
-        if (isGit !== 'true')
+        if (gitCmd('git rev-parse --is-inside-work-tree') !== 'true')
             return null;
-        const lines = [];
-        // Current branch
-        try {
-            const branch = execSync('git branch --show-current', {
-                cwd: workingDir,
-                encoding: 'utf-8',
-                stdio: ['pipe', 'pipe', 'pipe'],
-                timeout: GIT_TIMEOUT_MS,
-            }).trim();
-            if (branch)
-                lines.push(`Branch: ${branch}`);
-        }
-        catch { /* detached HEAD or error */ }
-        // Git status (brief)
-        try {
-            const status = execSync('git status --short', {
-                cwd: workingDir,
-                encoding: 'utf-8',
-                stdio: ['pipe', 'pipe', 'pipe'],
-                timeout: GIT_TIMEOUT_MS,
-            }).trim();
-            if (status) {
-                const fileCount = status.split('\n').length;
-                lines.push(`Changed files: ${fileCount}`);
-            }
-            else {
-                lines.push('Status: clean');
+    }
+    catch {
+        return null;
+    }
+    const lines = [];
+    // Current branch
+    try {
+        const branch = gitCmd('git branch --show-current');
+        if (branch)
+            lines.push(`Current branch: ${branch}`);
+    }
+    catch { /* detached HEAD */ }
+    // Main/default branch detection (for PR context)
+    try {
+        // Check common default branch names
+        const refs = gitCmd('git branch -l main master develop 2>/dev/null');
+        const mainBranch = refs.split('\n')
+            .map(l => l.trim().replace('* ', ''))
+            .find(b => ['main', 'master'].includes(b));
+        if (mainBranch)
+            lines.push(`Main branch: ${mainBranch}`);
+    }
+    catch { /* ignore */ }
+    // Git status with file paths (not just counts)
+    try {
+        const status = gitCmd('git status --short');
+        if (status) {
+            const statusLines = status.split('\n');
+            // Cap at 20 files to avoid bloating the prompt
+            const cap = 20;
+            const display = statusLines.slice(0, cap).join('\n');
+            lines.push(`\nStatus:\n${display}`);
+            if (statusLines.length > cap) {
+                lines.push(`... and ${statusLines.length - cap} more files`);
             }
         }
-        catch { /* ignore */ }
-        // Recent commits (last 5) — capped to prevent huge messages bloating context
-        try {
-            let log = execSync('git log --oneline -5', {
-                cwd: workingDir,
-                encoding: 'utf-8',
-                stdio: ['pipe', 'pipe', 'pipe'],
-                timeout: GIT_TIMEOUT_MS,
-            }).trim();
-            if (log) {
-                if (log.length > MAX_GIT_LOG_CHARS) {
-                    log = log.slice(0, MAX_GIT_LOG_CHARS) + '\n... (truncated)';
-                }
-                lines.push(`\nRecent commits:\n${log}`);
-            }
+        else {
+            lines.push('Status: clean');
         }
-        catch { /* ignore */ }
-        // Git user
-        try {
-            const user = execSync('git config user.name', {
-                cwd: workingDir,
-                encoding: 'utf-8',
-                stdio: ['pipe', 'pipe', 'pipe'],
-                timeout: GIT_TIMEOUT_MS,
-            }).trim();
-            if (user)
-                lines.push(`User: ${user}`);
+    }
+    catch { /* ignore */ }
+    // Recent commits
+    try {
+        let log = gitCmd('git log --oneline -5');
+        if (log) {
+            if (log.length > MAX_GIT_LOG_CHARS) {
+                log = log.slice(0, MAX_GIT_LOG_CHARS) + '\n... (truncated)';
+            }
+            lines.push(`\nRecent commits:\n${log}`);
         }
-        catch { /* ignore */ }
-        return lines.length > 0 ? lines.join('\n') : null;
     }
-    catch {
-        return null;
+    catch { /* ignore */ }
+    // Git user
+    try {
+        const user = gitCmd('git config user.name');
+        if (user)
+            lines.push(`\nGit user: ${user}`);
     }
+    catch { /* ignore */ }
+    return lines.length > 0 ? lines.join('\n') : null;
 }

package/dist/agent/error-classifier.d.ts CHANGED Viewed

@@ -1,10 +1,19 @@
 /**
  * Classify model/runtime errors so recovery and UX can be more consistent.
+ *
+ * Inspired by Claude Code's multi-layer error classification:
+ * - Separate 'overloaded' category (529) from general server errors — shorter retry budget
+ * - Auth errors (401) get special handling (token refresh, not retry)
+ * - EPIPE/connection reset handled as network errors (retryable)
  */
-export type AgentErrorCategory = 'rate_limit' | 'payment' | 'network' | 'timeout' | 'context_limit' | 'server' | 'unknown';
+export type AgentErrorCategory = 'rate_limit' | 'payment' | 'network' | 'timeout' | 'context_limit' | 'overloaded' | 'server' | 'auth' | 'unknown';
 export interface AgentErrorInfo {
     category: AgentErrorCategory;
-    label: 'RateLimit' | 'Payment' | 'Network' | 'Timeout' | 'Context' | 'Server' | 'Unknown';
+    label: 'RateLimit' | 'Payment' | 'Network' | 'Timeout' | 'Context' | 'Overloaded' | 'Server' | 'Auth' | 'Unknown';
     isTransient: boolean;
+    /** Max retries for this error type (overrides default). undefined = use default. */
+    maxRetries?: number;
+    /** User-facing suggestion for how to recover. Appended to error message in UI. */
+    suggestion?: string;
 }
 export declare function classifyAgentError(message: string): AgentErrorInfo;

package/dist/agent/error-classifier.js CHANGED Viewed

@@ -1,5 +1,10 @@
 /**
  * Classify model/runtime errors so recovery and UX can be more consistent.
+ *
+ * Inspired by Claude Code's multi-layer error classification:
+ * - Separate 'overloaded' category (529) from general server errors — shorter retry budget
+ * - Auth errors (401) get special handling (token refresh, not retry)
+ * - EPIPE/connection reset handled as network errors (retryable)
  */
 function includesAny(text, patterns) {
     return patterns.some((p) => text.includes(p));
@@ -14,37 +19,86 @@ export function classifyAgentError(message) {
         '402',
         'free tier',
     ])) {
-        return { category: 'payment', label: 'Payment', isTransient: false };
+        return {
+            category: 'payment', label: 'Payment', isTransient: false,
+            suggestion: 'Run `franklin balance` to check funds. Try /model free for free models.',
+        };
+    }
+    // Auth errors — not retryable (need user action: re-login, new API key)
+    if (includesAny(err, [
+        '401',
+        'unauthorized',
+        'invalid api key',
+        'invalid x-api-key',
+        'authentication failed',
+    ])) {
+        return {
+            category: 'auth', label: 'Auth', isTransient: false,
+            suggestion: 'Check your API key or wallet configuration. Run `franklin setup` to reconfigure.',
+        };
     }
     if (includesAny(err, [
         '429',
         'rate limit',
         'too many requests',
     ])) {
-        return { category: 'rate_limit', label: 'RateLimit', isTransient: true };
+        return {
+            category: 'rate_limit', label: 'RateLimit', isTransient: true,
+            suggestion: 'Try /model to switch to a different model, or wait a moment and /retry.',
+        };
     }
     if (includesAny(err, [
         'prompt is too long',
         'context length',
         'maximum context',
+        'prompt too long',
+        'token limit exceeded',
     ])) {
-        return { category: 'context_limit', label: 'Context', isTransient: false };
+        return {
+            category: 'context_limit', label: 'Context', isTransient: false,
+            suggestion: 'Run /compact to compress conversation history.',
+        };
     }
     if (includesAny(err, [
         'timeout',
         'timed out',
+        'deadline exceeded',
     ])) {
-        return { category: 'timeout', label: 'Timeout', isTransient: true };
+        return {
+            category: 'timeout', label: 'Timeout', isTransient: true,
+            suggestion: 'Check your network connection. Use /retry to try again.',
+        };
     }
     if (includesAny(err, [
         'fetch failed',
         'econnrefused',
         'econnreset',
         'enotfound',
+        'epipe',
         'network',
         'socket hang up',
+        'connection reset',
+        'dns resolution',
+    ])) {
+        return {
+            category: 'network', label: 'Network', isTransient: true,
+            suggestion: 'Check your network connection. Use /retry to try again.',
+        };
+    }
+    // 529 / Overloaded — separate from generic server errors
+    // Claude Code only allows 3 retries for these (they tend to persist)
+    if (includesAny(err, [
+        '529',
+        'overloaded',
+        'workers are busy',
+        'all workers are busy',
+        'server busy',
+        'capacity',
     ])) {
-        return { category: 'network', label: 'Network', isTransient: true };
+        return {
+            category: 'overloaded', label: 'Overloaded', isTransient: true, maxRetries: 3,
+            suggestion: 'The model is overloaded. Try /model to switch, or wait and /retry.',
+        };
     }
     if (includesAny(err, [
         '500',
@@ -54,15 +108,15 @@ export function classifyAgentError(message) {
         'internal server error',
         'bad gateway',
         'service unavailable',
-        'temporarily unavailable', // "Service temporarily unavailable"
-        'workers are busy', // "All workers are busy"
-        'server busy',
-        'overloaded',
+        'temporarily unavailable',
         'please retry later',
         'retry in a few',
         'upstream error',
     ])) {
-        return { category: 'server', label: 'Server', isTransient: true };
+        return {
+            category: 'server', label: 'Server', isTransient: true,
+            suggestion: 'Server error. Use /retry to try again, or /model to switch models.',
+        };
     }
     return { category: 'unknown', label: 'Unknown', isTransient: false };
 }