npm - @f5xc-salesdemos/xcsh - Versions diffs - 18.52.0 → 18.53.1 - Mend

@f5xc-salesdemos/xcsh 18.52.0 → 18.53.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/CHANGELOG.md +14 -0
package/package.json +7 -7
package/src/autoresearch/helpers.ts +0 -2
package/src/autoresearch/index.ts +0 -6
package/src/autoresearch/prompt.md +9 -38
package/src/internal-urls/build-info-runtime.ts +8 -3
package/src/internal-urls/build-info.generated.ts +8 -8
package/src/prompts/agents/explore.md +2 -1
package/src/prompts/system/subagent-submit-reminder.md +3 -7
package/src/prompts/system/system-prompt.md +31 -2
package/src/prompts/tools/sf-query.md +28 -0
package/src/session/messages.ts +120 -2
package/src/task/executor.ts +26 -1
package/src/utils/tool-choice.ts +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,20 @@
 ## [Unreleased]
+## [18.53.0] - 2026-05-09
+### Fixed
+- Replaced `xcsh --version` recommendation in `renderAboutDoc()` with authoritative intrinsic version guidance — the previous guidance misdirected to the installed binary, not the running session ([#722](https://github.com/f5xc-salesdemos/xcsh/pull/722))
+- System prompt `xcsh://about` entry now routes version questions to the workstation header (zero tool calls) and reserves `xcsh://about` for deeper identity ([#722](https://github.com/f5xc-salesdemos/xcsh/pull/722))
+### Added
+- SE specialization block in `renderAboutDoc()` capabilities section: F5 XC API, Salesforce pipeline, user/computer profiling, SE-specific subagents ([#722](https://github.com/f5xc-salesdemos/xcsh/pull/722))
+- SE capability skills: account-planning, competitive, meeting-prep, roi-calculator, validation-plan ([#715](https://github.com/f5xc-salesdemos/xcsh/pull/715))
+- MEDDPICC qualification and competitive positioning sections in system prompt ([#715](https://github.com/f5xc-salesdemos/xcsh/pull/715))
+- Version self-awareness and capabilities completeness regression tests ([#722](https://github.com/f5xc-salesdemos/xcsh/pull/722))
 ## [18.40.0] - 2026-05-05
 ### Added

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@f5xc-salesdemos/xcsh",
-	"version": "18.52.0",
+	"version": "18.53.1",
 	"description": "Coding agent CLI with read, bash, edit, write tools and session management",
 	"homepage": "https://github.com/f5xc-salesdemos/xcsh",
 	"author": "Can Boluk",
@@ -48,12 +48,12 @@
 	"dependencies": {
 		"@agentclientprotocol/sdk": "0.16.1",
 		"@mozilla/readability": "^0.6",
-		"@f5xc-salesdemos/xcsh-stats": "18.52.0",
-		"@f5xc-salesdemos/pi-agent-core": "18.52.0",
-		"@f5xc-salesdemos/pi-ai": "18.52.0",
-		"@f5xc-salesdemos/pi-natives": "18.52.0",
-		"@f5xc-salesdemos/pi-tui": "18.52.0",
-		"@f5xc-salesdemos/pi-utils": "18.52.0",
+		"@f5xc-salesdemos/xcsh-stats": "18.53.1",
+		"@f5xc-salesdemos/pi-agent-core": "18.53.1",
+		"@f5xc-salesdemos/pi-ai": "18.53.1",
+		"@f5xc-salesdemos/pi-natives": "18.53.1",
+		"@f5xc-salesdemos/pi-tui": "18.53.1",
+		"@f5xc-salesdemos/pi-utils": "18.53.1",
 		"@sinclair/typebox": "^0.34",
 		"@xterm/headless": "^6.0",
 		"ajv": "^8.18",

package/src/autoresearch/helpers.ts CHANGED Viewed

@@ -17,11 +17,9 @@ export const EXPERIMENT_MAX_LINES = 10;
 export const EXPERIMENT_MAX_BYTES = 4 * 1024;
 export const AUTORESEARCH_COMMITTABLE_FILES = [
 	"autoresearch.md",
-	"autoresearch.program.md",
 	"autoresearch.sh",
 	"autoresearch.checks.sh",
 	"autoresearch.ideas.md",
-	"SELF_AWARENESS.md",
 ] as const;
 export const AUTORESEARCH_LOCAL_STATE_FILES = ["autoresearch.jsonl"] as const;
 export const AUTORESEARCH_LOCAL_STATE_DIRECTORIES = [".autoresearch"] as const;

package/src/autoresearch/index.ts CHANGED Viewed

@@ -322,8 +322,6 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
 		const autoresearchMdPath = path.join(workDir, "autoresearch.md");
 		const checksPath = path.join(workDir, "autoresearch.checks.sh");
 		const ideasPath = path.join(workDir, "autoresearch.ideas.md");
-		const programPath = path.join(workDir, "autoresearch.program.md");
-		const selfAwarenessPath = path.join(workDir, "SELF_AWARENESS.md");
 		const pendingRun =
 			runtime.lastRunSummary ??
 			(await readPendingRunSummary(workDir, collectLoggedRunNumbers(runtime.state.results)));
@@ -361,10 +359,6 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
 				checks_path: checksPath,
 				has_ideas: fs.existsSync(ideasPath),
 				ideas_path: ideasPath,
-				has_program: fs.existsSync(programPath),
-				program_path: programPath,
-				has_self_awareness: fs.existsSync(selfAwarenessPath),
-				self_awareness_path: selfAwarenessPath,
 				current_segment: runtime.state.currentSegment + 1,
 				current_segment_run_count: currentSegmentResults.length,
 				has_baseline_metric: baselineMetric !== null,

package/src/autoresearch/prompt.md CHANGED Viewed

@@ -19,22 +19,6 @@ Working directory:
 `{{working_dir}}`
 You are running an autonomous experiment loop. Keep iterating until the user interrupts you or the configured maximum iteration count is reached.
-{{#if has_program}}
-### Local Playbook
-`autoresearch.program.md` exists at `{{program_path}}`.
-Use it as a repo-local strategy overlay for this session. `autoresearch.md` remains the source of truth for benchmark, scope, and constraints.
-{{/if}}
-{{#if has_self_awareness}}
-### Self-Awareness Manifest
-`SELF_AWARENESS.md` exists at `{{self_awareness_path}}`.
-This document defines xcsh's mission, current capability inventory, evaluation dimensions, and known gaps. When the session goal involves self-evaluation, capability improvement, or SE workflow enhancement, read this document first — it is the ground truth for what xcsh is, what it should become, and how to measure progress.
-{{/if}}
 {{#if has_recent_results}}
 ### Current Segment Snapshot
@@ -89,11 +73,11 @@ An unlogged run artifact exists at `{{pending_run_directory}}`.
    - Update the notes whenever the strategy changes.
    - Keep durable conclusions in `autoresearch.md`.
    - Use `autoresearch.ideas.md` for deferred experiment ideas that are promising but not active yet.
-3. Use `autoresearch.sh` as the canonical benchmark entrypoint.
-   - If it does not exist yet, create it.
+3. The benchmark command in `autoresearch.md` is the canonical entrypoint.
+   - If it does not exist yet, create a benchmark script.
    - Make it print structured metric lines in the form `METRIC name=value`.
+   - Quality scores (`direction: higher`) are first-class — not every benchmark is a timing measurement.
    - Use the same workload every run unless you intentionally re-initialize with a new segment.
-   - Keep the measurement harness, evaluator, and fixed benchmark inputs stable unless you intentionally start a new segment and document the change.
 4. Initialize the loop with `init_experiment` before the first logged run of a segment.
    - Pass `from_autoresearch_md: true` with only `name` to load the benchmark contract from `autoresearch.md` without mirroring every field in the tool call.
    - Use `abandon_unlogged_runs: true` only when you intentionally discard unlogged run artifacts and need a fresh segment (for example after a bad or obsolete benchmark directory).
@@ -105,7 +89,8 @@ An unlogged run artifact exists at `{{pending_run_directory}}`.
    - Run `run_experiment`.
    - Interpret the result honestly.
    - Call `log_experiment` after every run (it refreshes benchmark/scope fields from `autoresearch.md` before logging so keep validation matches the file on disk).
-   - Use `run_experiment` with `force: true` only when you must override the segment benchmark command or skip the direct-`autoresearch.sh` rule.
+   - Use `run_experiment` with `force: true` only when you must override the segment benchmark command.
+   - After any code change, verify with `bun check:ts` or the project test suite before logging. A kept experiment that breaks the build is worse than a discarded one.
    - On `log_experiment`, `force: true` relaxes ASI requirements and allows keeping a primary-metric regression; prefer normal logging when possible.
 7. Keep the primary metric as the decision maker.
    - `keep` when the primary metric improves.
@@ -130,7 +115,7 @@ An unlogged run artifact exists at `{{pending_run_directory}}`.
 Your benchmark script SHOULD:
-- live at `autoresearch.sh`
+- match the benchmark command from `autoresearch.md`
 - run from `{{working_dir}}`
 - fail with a non-zero exit status on invalid runs
 - print the primary metric as `METRIC {{default_metric_name}}=<number>` or another explicit metric name chosen during initialization
@@ -218,29 +203,15 @@ Resume from the existing notes:
 Before the first benchmark:
-- Write `autoresearch.md` with goal, benchmark command (must be a **direct** invocation of `autoresearch.sh`, e.g. `bash autoresearch.sh`), primary metric name and unit, direction (`lower` or `higher`), tradeoff metrics if relevant, files in scope, off limits, and constraints.
+- Write `autoresearch.md` with goal, benchmark command, primary metric name and unit, direction (`lower` or `higher`), tradeoff metrics if relevant, files in scope, off limits, and constraints.
 - Add a short preflight section: prerequisites, one-time setup, and the comparability invariant that must stay fixed across runs.
 - Mark ground-truth evaluators, fixed datasets, and other measurement-critical files as off limits or hard constraints when they define the benchmark contract.
-- Write or update `autoresearch.program.md` when you learn durable heuristics, failure patterns, or repo-specific strategy for later resume turns.
-- Create `autoresearch.sh` as the canonical benchmark entrypoint; print the primary metric as `METRIC <name>=<number>` and optional secondary metrics as additional `METRIC` lines.
+- Create a benchmark script; print the primary metric as `METRIC <name>=<number>` and optional secondary metrics as additional `METRIC` lines.
 - Optionally add `autoresearch.checks.sh` if correctness or quality needs a hard gate.
 - Call `init_experiment` with arguments that match `autoresearch.md` exactly (benchmark command, metric, unit, direction, scope paths, off limits, constraints).
 - Run and log the baseline.
-{{#if has_self_awareness}}
-#### SE Self-Evaluation Sessions
-When the goal involves evaluating or improving xcsh's sales engineering capabilities (not runtime code performance):
-- Read `SELF_AWARENESS.md` first to understand the current capability inventory and evaluation dimensions
-- Read `autoresearch.program.md` for the SE-specific evaluation strategy
-- Design the benchmark script (`autoresearch.sh`) to test the specific SE capability dimension — product knowledge accuracy, API reliability, prompt effectiveness, or workflow completeness
-- Use quality/accuracy scores as the primary metric (direction: `higher`) rather than timing metrics
-- Focus `Files in Scope` on the prompts, agent definitions, tool descriptions, or service modules relevant to the SE capability being evaluated
-- Record capability status changes in `SELF_AWARENESS.md` when experiments yield durable improvements
-{{/if}}
-Until `init_experiment` succeeds, only autoresearch control files (`autoresearch.md`, `autoresearch.sh`, `autoresearch.program.md`, `autoresearch.ideas.md`, `autoresearch.checks.sh`) may be edited; after initialization, respect Files in Scope from the contract.
+Until `init_experiment` succeeds, only autoresearch control files (`autoresearch.md`, `autoresearch.sh`, `autoresearch.ideas.md`, `autoresearch.checks.sh`) may be edited; after initialization, respect Files in Scope from the contract.
 {{/if}}
 {{#if has_checks}}

package/src/internal-urls/build-info-runtime.ts CHANGED Viewed

@@ -206,12 +206,17 @@ export function renderAboutDoc(info: RuntimeBuildInfo, context: ContextStatus |
 		"",
 		"Sessions, MCP server/client, skills, TUI with themes, commit assistant,",
 		"Python REPL, native shell/PTY, provider-agnostic LLM routing, slash commands,",
-		"SSH remote execution, F5 XC federated product docs (llms.txt hierarchy),",
-		"image generation and analysis.",
+		"SSH remote execution, image generation and analysis.",
+		"",
+		"SE specialization: F5 XC API integration (xcsh_api, api-catalog, api-spec),",
+		"Salesforce pipeline intelligence (sf_query, xcsh://salesforce),",
+		"F5 XC federated product docs (llms.txt hierarchy),",
+		"user/computer profiling (xcsh://user, xcsh://computer),",
+		"SE-specific subagents (deal-analyst, status-operator, cli-operator, github-ops).",
 		"",
 		"## What to do when asked about xcsh itself",
 		"",
-		"1. Confirm the user is running the version above. If unsure, ask them to run `xcsh --version`.",
+		"1. The version above is authoritative — it is embedded at build time in this session's BUILD_INFO and also shown in the `<workstation>` header of the system prompt. Do not run `xcsh --version` to check — that reports the installed binary, which may differ from the running session after an upgrade.",
 		"2. Check recent changes with `gh pr list --repo f5xc-salesdemos/xcsh --base main --state merged --limit 20`",
 		"   or `git log --oneline -n 20` if you have a local clone. A fix may already be on `main`.",
 		"3. If behavior contradicts `xcsh://…` docs, read the actual source under the repo above to determine",

package/src/internal-urls/build-info.generated.ts CHANGED Viewed

@@ -17,17 +17,17 @@ export interface BuildInfo {
 }
 export const BUILD_INFO: BuildInfo = {
-	"version": "18.52.0",
-	"commit": "896d3c21a90cd0c7b02ce19558b5799c72369056",
-	"shortCommit": "896d3c2",
+	"version": "18.53.1",
+	"commit": "8c2358f28cbf35bd5a8ce14498666912f6ffb0da",
+	"shortCommit": "8c2358f",
 	"branch": "main",
-	"tag": "v18.52.0",
-	"commitDate": "2026-05-09T01:55:47Z",
-	"buildDate": "2026-05-09T02:23:57.907Z",
+	"tag": "v18.53.1",
+	"commitDate": "2026-05-09T09:32:57Z",
+	"buildDate": "2026-05-09T09:56:11.380Z",
 	"dirty": false,
 	"prNumber": "",
 	"repoUrl": "https://github.com/f5xc-salesdemos/xcsh",
 	"repoSlug": "f5xc-salesdemos/xcsh",
-	"commitUrl": "https://github.com/f5xc-salesdemos/xcsh/commit/896d3c21a90cd0c7b02ce19558b5799c72369056",
-	"releaseUrl": "https://github.com/f5xc-salesdemos/xcsh/releases/tag/v18.52.0"
+	"commitUrl": "https://github.com/f5xc-salesdemos/xcsh/commit/8c2358f28cbf35bd5a8ce14498666912f6ffb0da",
+	"releaseUrl": "https://github.com/f5xc-salesdemos/xcsh/releases/tag/v18.53.1"
 };

package/src/prompts/agents/explore.md CHANGED Viewed

@@ -2,7 +2,7 @@
 name: explore
 description: Fast read-only codebase scout returning compressed context for handoff
 tools: read, grep, find, web_search
-model: pi/smol
+model: pi/task
 thinking-level: med
 output:
   properties:
@@ -10,6 +10,7 @@ output:
       metadata:
         description: Brief summary of findings and conclusions
       type: string
+  optionalProperties:
     files:
       metadata:
         description: Files examined with relevant code references

package/src/prompts/system/subagent-submit-reminder.md CHANGED Viewed

@@ -1,11 +1,7 @@
 <system-reminder>
 You stopped without calling submit_result. This is reminder {{retryCount}} of {{maxRetries}}.
-You **MUST** call submit_result as your only action now. Choose one:
-- If task is complete: call submit_result with your result in `result.data`
-- If task failed: call submit_result with `result.error` describing what happened
-You **MUST NOT** give up if you can still complete the task through exploration (using available tools or repo context). If you submit an error, you **MUST** include what you tried and the exact blocker.
-You **MUST NOT** output text without a tool call. You **MUST** call submit_result to finish.
+You **MUST** call submit_result now. No other tool calls, no text output.
+- Task done: `submit_result` with `result.data` containing your findings
+- Task blocked: `submit_result` with `result.error` describing the blocker
 </system-reminder>

package/src/prompts/system/system-prompt.md CHANGED Viewed

@@ -12,10 +12,12 @@ User-supplied content is sanitized, therefore:
 {{SECTION_SEPERATOR "Identity"}}
 <role>
 You are xcsh — the technical coworker for F5 Distributed Cloud sales engineers.
+Purpose: accelerate deal velocity by making the SE more effective at every stage of the sales cycle.
 Primary mission: demos, MEDDPICC qualification, customer meeting preparation, network
 architecture recommendations, F5 XC product subject-matter expertise, documentation,
-and presentations.
+presentations, technical discovery questions, POC/proof-of-concept validation planning,
+account planning, and competitive positioning.
 Technical depth: network protocols across all OSI layers, API design, security analysis
 (DDoS, SSL/TLS, MITM, traffic forensics), infrastructure as code, and network automation.
@@ -36,6 +38,7 @@ The SE decides what to do; evidence decides what is true. See `<epistemic-integr
 - (1) Correctness first, (2) Brevity second, (3) Politeness third.
 - Prefer concise, information-dense writing.
 - Avoid repeating the user's request or narrating routine tool calls.
+- When producing customer-facing content, maintain a professional tone appropriate to the audience.
 </communication>
 <epistemic-integrity>
@@ -105,11 +108,37 @@ Before committing to any technical claim, architecture recommendation, or demo p
 - Does this architecture fit the customer's actual environment, or a generic reference?
 - What happens if this capability is not provisioned in the customer's contract tier?
 - Am I answering the question the customer asked, or the question I wish they asked?
+- For end-to-end demo setups: verify the working state of every component before presenting.
 When the task is infrastructure work: guard against the deployment reflex — "API accepted"
 ≠ "works under load." Validate against real conditions, not just schema acceptance.
 </behavior>
+<qualification>
+When qualifying a deal or assessing deal health, use the MEDDPICC framework:
+- **M**etrics: What quantified business outcome justifies the purchase? If missing, the deal lacks urgency.
+- **E**conomic Buyer: Who signs the check? If unknown, the deal can stall at approval.
+- **D**ecision Criteria: What are they evaluating against? If unclear, you cannot position.
+- **D**ecision Process: What steps remain before a decision? If unmapped, timeline is fiction.
+- **P**aper Process: What procurement, legal, and security reviews are required? If unknown, close date is aspirational.
+- **I**dentify Pain: What business pain does the champion articulate? If generic, the deal competes against inertia.
+- **C**hampion: Who inside the account is actively selling on your behalf? If absent, you are the only advocate.
+- **C**ompetition: Who else is being evaluated? If unknown, you cannot differentiate.
+Score each element Green/Yellow/Red. Surface gaps as specific action items.
+A deal with Red on Economic Buyer or Champion is at structural risk regardless of pipeline stage.
+When delegating deal analysis to the deal-analyst subagent, include the account name, deal stage, and any known MEDDPICC context in the assignment.
+</qualification>
+<competitive-positioning>
+When positioning F5 XC against competitors or handling competitive objections:
+- Verify every competitive claim against current product documentation before presenting it.
+- Differentiate on architecture (global network, distributed cloud), not just features.
+- Use battlecard structure: competitor weakness, F5 XC strength, proof point, objection handling.
+- Never disparage competitors — win on merit, not FUD.
+- If the competitive landscape is unclear, ask what alternatives the customer is evaluating.
+</competitive-positioning>
 <stakes>
 The SE works in customer-facing contexts. Product claims, architecture recommendations,
 demo environments, and competitive positioning reach customers, partners, and leadership.
@@ -208,7 +237,7 @@ Most tools resolve custom protocol URLs to internal resources (not web URLs):
 - `xcsh://..` — Internal xcsh documentation. **MUST NOT** read unless the user asks about xcsh itself.
   - `xcsh://about` — Identity, version, build fingerprint, architecture, self-improvement. **MUST** read for any question about xcsh before exploring `~/.xcsh/`.
     This document contains the authoritative repository URL, issues URL, and source location.
-    For identity questions (source code, repo, version, who built this) — answer from `xcsh://about` alone. Do not call external GitHub tools.
+    For the running version alone, the `<workstation>` header already has it — no tool call needed. For deeper identity (commit, branch, repo, build provenance), read `xcsh://about`. Do not call external GitHub tools or run `xcsh --version`.
   - `xcsh://user` — Primary human user profile (identity, employment, contact, demographics). Read when personal identity context is needed. Do not read proactively on every turn.
   - `xcsh://user?seed=true` — Refresh profile from Salesforce, GitHub, and system sources.
   - `xcsh://computer` — Machine hardware and environment profile. Read when platform-specific recommendations needed.

package/src/prompts/tools/sf-query.md CHANGED Viewed

@@ -50,6 +50,21 @@ Year-to-date bookings / top wins ("what are my top wins this year", "year-to-dat
 Pipeline by territory ("break down pipeline by territory", "territory performance summary"):
   SELECT ETM_Core_Territory__c, COUNT(Id) DealCount, SUM(Amount) TotalAmount FROM Opportunity WHERE Id IN (SELECT OpportunityId FROM OpportunityTeamMember WHERE UserId = '{userId}') AND IsClosed = false AND ForecastCategoryName <> 'Omitted' GROUP BY ETM_Core_Territory__c ORDER BY SUM(Amount) DESC NULLS LAST
+Next-quarter pipeline (forward-looking):
+  SELECT Account.Name, Name, Amount, StageName, ForecastCategoryName, CloseDate FROM Opportunity WHERE Id IN (SELECT OpportunityId FROM OpportunityTeamMember WHERE UserId = '{userId}') AND IsClosed = false AND CloseDate = NEXT_FISCAL_QUARTER AND ForecastCategoryName <> 'Omitted' ORDER BY Amount DESC NULLS LAST LIMIT 30
+Stalled deals (no activity in 30+ days):
+  SELECT Account.Name, Name, Amount, StageName, CloseDate, LastActivityDate FROM Opportunity WHERE Id IN (SELECT OpportunityId FROM OpportunityTeamMember WHERE UserId = '{userId}') AND IsClosed = false AND CloseDate = THIS_FISCAL_QUARTER AND LastActivityDate < LAST_N_DAYS:30 ORDER BY Amount DESC NULLS LAST LIMIT 20
+Large deals (top opportunities by amount):
+  SELECT Account.Name, Name, Amount, StageName, ForecastCategoryName, CloseDate, Owner.Name FROM Opportunity WHERE Id IN (SELECT OpportunityId FROM OpportunityTeamMember WHERE UserId = '{userId}') AND IsClosed = false AND Amount > 100000 ORDER BY Amount DESC NULLS LAST LIMIT 15
+Deals by product/use case (solution mapping):
+  SELECT Account.Name, Name, Amount, StageName, CloseDate, Type FROM Opportunity WHERE Id IN (SELECT OpportunityId FROM OpportunityTeamMember WHERE UserId = '{userId}') AND IsClosed = false AND CloseDate = THIS_FISCAL_YEAR ORDER BY Account.Name, Amount DESC NULLS LAST LIMIT 30
+Renewal pipeline (existing customer retention):
+  SELECT Account.Name, Name, Amount, StageName, CloseDate, Type FROM Opportunity WHERE Id IN (SELECT OpportunityId FROM OpportunityTeamMember WHERE UserId = '{userId}') AND IsClosed = false AND Type = 'Renewal' ORDER BY CloseDate ASC LIMIT 20
 Open cases:
   SELECT CaseNumber, Subject, Status, Priority, Account.Name, CreatedDate FROM Case WHERE IsClosed = false ORDER BY Priority, CreatedDate DESC LIMIT 50
@@ -81,6 +96,19 @@ Territory-based filtering: Add WHERE clauses on territory fields when the user a
 Coverage ratio: When the user asks about pipeline coverage or "do I have enough pipeline", calculate coverage = in-quarter pipeline total / quarterly quota target. Healthy coverage is 3x-5x quota. Below 2x is a risk. Use the forecast breakdown (T2) total as the numerator. Quota is available from the user profile when set.
+MEDDPICC deal qualification — when user asks to "qualify", "score", or assess deal health:
+For each deal, assess these 8 MEDDPICC elements from available SFDC data:
+- **M**etrics: Is there a quantified business outcome? Check Opportunity.Description, close plan notes.
+- **E**conomic Buyer: Is the EB identified? Check Contact roles with 'Economic Buyer' or 'Decision Maker'.
+- **D**ecision Criteria: Are evaluation criteria documented? Check Opportunity.NextStep, Description.
+- **D**ecision Process: Is the buying process mapped? Check stage progression timeline, paper process.
+- **P**aper Process: Are procurement steps known? Check Opportunity.Description for legal/procurement notes.
+- **I**dentify Pain: Is the business pain articulated? Check Opportunity.Description, discovery notes.
+- **C**hampion: Is there an internal advocate? Check Contact roles for 'Champion' or active engagement.
+- **C**ompetition: Are competitors identified? Check Opportunity.CompetitorName or description.
+Score each element: Green (validated), Yellow (partially known), Red (unknown/missing).
+Surface the gaps as action items, not just labels.
 Results with relationship fields (e.g., Account.Name) are automatically flattened into dot-notation columns.
 If the query returns more than 10,000 records, suggest using sf data export bulk instead.
 Set use_tooling_api to true when querying metadata objects (ApexTrigger, ApexClass, CustomField).

package/src/session/messages.ts CHANGED Viewed

@@ -12,9 +12,10 @@ import type {
 	MessageAttribution,
 	ProviderPayload,
 	TextContent,
+	ToolCall,
 	ToolResultMessage,
 } from "@f5xc-salesdemos/pi-ai";
-import { prompt } from "@f5xc-salesdemos/pi-utils";
+import { logger, prompt } from "@f5xc-salesdemos/pi-utils";
 import branchSummaryContextPrompt from "../prompts/compaction/branch-summary-context.md" with { type: "text" };
 import compactionSummaryContextPrompt from "../prompts/compaction/compaction-summary-context.md" with { type: "text" };
 import type { OutputMeta } from "../tools/output-meta";
@@ -260,6 +261,122 @@ export function createCustomMessage(
 	};
 }
+/**
+ * Repair tool_use / tool_result ordering in converted LLM messages.
+ *
+ * The Claude API requires every assistant message containing tool_use blocks
+ * to be immediately followed by the matching tool_result messages. Session
+ * corruption (injected messages, compaction boundaries, crash during tool
+ * execution) can break this invariant, producing a 400 error that bricks
+ * the session.
+ *
+ * This function:
+ * 1. Finds assistant messages with tool_use (toolCall) content
+ * 2. Collects the required tool_result IDs
+ * 3. If tool_results are elsewhere in the array, moves them to the correct position
+ * 4. If tool_results are missing entirely, injects synthetic error tool_results
+ * 5. Non-tool messages that got wedged between tool_use and tool_result are relocated
+ *    to just before the assistant message
+ */
+function repairToolResultOrdering(messages: Message[]): Message[] {
+	const result: Message[] = [];
+	let repaired = false;
+	// Index all toolResult messages by their toolCallId for O(1) lookup
+	const toolResultsByCallId = new Map<string, { message: Message; originalIndex: number }>();
+	for (let i = 0; i < messages.length; i++) {
+		const msg = messages[i];
+		if (msg.role === "toolResult") {
+			const trMsg = msg as ToolResultMessage;
+			toolResultsByCallId.set(trMsg.toolCallId, { message: msg, originalIndex: i });
+		}
+	}
+	// Track which toolResult messages have been placed by repair
+	const placedToolResultIndices = new Set<number>();
+	for (let i = 0; i < messages.length; i++) {
+		const msg = messages[i];
+		// Skip toolResult messages that were already placed by repair
+		if (msg.role === "toolResult" && placedToolResultIndices.has(i)) {
+			continue;
+		}
+		result.push(msg);
+		// Not an assistant message with tool calls — nothing to repair
+		if (msg.role !== "assistant") continue;
+		const assistantMsg = msg as AssistantMessage;
+		const toolCalls = assistantMsg.content.filter((c): c is ToolCall => c.type === "toolCall");
+		if (toolCalls.length === 0) continue;
+		// Collect required tool call IDs
+		const requiredIds = new Set(toolCalls.map(tc => tc.id));
+		// Check what immediately follows in the remaining messages
+		// Consume consecutive toolResult messages that match, and relocate any
+		// non-toolResult messages that got wedged between
+		const displaced: Message[] = [];
+		let j = i + 1;
+		while (j < messages.length && requiredIds.size > 0) {
+			const next = messages[j];
+			if (next.role === "toolResult") {
+				const trMsg = next as ToolResultMessage;
+				if (requiredIds.has(trMsg.toolCallId)) {
+					// This tool_result belongs here — place it
+					result.push(next);
+					placedToolResultIndices.add(j);
+					requiredIds.delete(trMsg.toolCallId);
+					if (displaced.length > 0) repaired = true;
+					j++;
+					continue;
+				}
+			}
+			// Non-matching message between tool_use and tool_result — displace it
+			displaced.push(next);
+			placedToolResultIndices.add(j); // Mark original index as consumed
+			j++;
+		}
+		// Advance main iterator past consumed messages
+		i = j - 1;
+		// Any remaining required IDs: find them later in the array or synthesize
+		for (const id of requiredIds) {
+			const found = toolResultsByCallId.get(id);
+			if (found && !placedToolResultIndices.has(found.originalIndex)) {
+				result.push(found.message);
+				placedToolResultIndices.add(found.originalIndex);
+				repaired = true;
+			} else {
+				// Missing tool_result entirely — inject synthetic error result
+				const toolCall = toolCalls.find(tc => tc.id === id);
+				result.push({
+					role: "toolResult",
+					toolCallId: id,
+					toolName: toolCall?.name ?? "unknown",
+					content: [{ type: "text", text: "Tool execution was interrupted (session recovery)." }],
+					isError: true,
+					timestamp: Date.now(),
+				} as ToolResultMessage);
+				repaired = true;
+			}
+		}
+		// Re-insert displaced messages after the tool_results
+		for (const d of displaced) {
+			result.push(d);
+		}
+	}
+	if (repaired) {
+		logger.warn("Repaired tool_use/tool_result ordering in conversation history");
+	}
+	return repaired ? result : messages;
+}
 /**
  * Transform AgentMessages (including custom types) to LLM-compatible Messages.
  *
@@ -269,7 +386,7 @@ export function createCustomMessage(
  * - Custom extensions and tools
  */
 export function convertToLlm(messages: AgentMessage[]): Message[] {
-	return messages
+	const converted = messages
 		.map((m): Message | undefined => {
 			switch (m.role) {
 				case "bashExecution":
@@ -370,4 +487,5 @@ export function convertToLlm(messages: AgentMessage[]): Message[] {
 			}
 		})
 		.filter(m => m !== undefined);
+	return repairToolResultOrdering(converted);
 }

package/src/task/executor.ts CHANGED Viewed

@@ -325,6 +325,29 @@ export function finalizeSubprocessOutput(args: FinalizeSubprocessOutputArgs): Fi
 				? `${SUBAGENT_WARNING_MISSING_SUBMIT_RESULT}\n\n${rawOutput}`
 				: SUBAGENT_WARNING_MISSING_SUBMIT_RESULT;
 		}
+		// Salvage output from aborted runs that produced content without calling submit_result
+		if (exitCode !== 0 && doneAborted && !signalAborted && rawOutput.trim().length > 0) {
+			if (hasOutputSchema) {
+				// Try schema-validated fallback: if the model produced valid JSON matching the schema,
+				// use it even though submit_result was never called
+				const abortFallback = resolveFallbackCompletion(rawOutput, outputSchema);
+				if (abortFallback) {
+					const completeData = normalizeCompleteData(abortFallback.data, reportFindings);
+					try {
+						rawOutput = JSON.stringify(completeData, null, 2) ?? "null";
+					} catch {
+						// Keep rawOutput as-is if serialization fails
+					}
+					exitCode = 0;
+					stderr = "";
+				}
+			} else {
+				// No schema required — raw text output is directly useful
+				exitCode = 0;
+				stderr = "";
+			}
+		}
 	}
 	return { rawOutput, exitCode, stderr, abortedViaSubmitResult, hasSubmitResult };
@@ -1250,7 +1273,9 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
 	}
 	// Update final progress
-	const wasAborted = abortedViaSubmitResult || (!hasSubmitResult && (done.aborted || signal?.aborted || false));
+	// When salvage recovered the output (exitCode became 0), the result is not aborted.
+	const wasAborted =
+		abortedViaSubmitResult || (!hasSubmitResult && exitCode !== 0 && (done.aborted || signal?.aborted || false));
 	const finalAbortReason = wasAborted
 		? abortedViaSubmitResult
 			? submitResultAbortReason

package/src/utils/tool-choice.ts CHANGED Viewed

@@ -24,5 +24,5 @@ export function buildNamedToolChoice(toolName: string, model?: Model<Api>): Tool
 		return "required";
 	}
-	return undefined;
+	return "required";
 }