@f5xc-salesdemos/xcsh 18.52.0 → 18.53.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,20 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [18.53.0] - 2026-05-09
6
+
7
+ ### Fixed
8
+
9
+ - Replaced `xcsh --version` recommendation in `renderAboutDoc()` with authoritative intrinsic version guidance — the previous guidance misdirected to the installed binary, not the running session ([#722](https://github.com/f5xc-salesdemos/xcsh/pull/722))
10
+ - System prompt `xcsh://about` entry now routes version questions to the workstation header (zero tool calls) and reserves `xcsh://about` for deeper identity ([#722](https://github.com/f5xc-salesdemos/xcsh/pull/722))
11
+
12
+ ### Added
13
+
14
+ - SE specialization block in `renderAboutDoc()` capabilities section: F5 XC API, Salesforce pipeline, user/computer profiling, SE-specific subagents ([#722](https://github.com/f5xc-salesdemos/xcsh/pull/722))
15
+ - SE capability skills: account-planning, competitive, meeting-prep, roi-calculator, validation-plan ([#715](https://github.com/f5xc-salesdemos/xcsh/pull/715))
16
+ - MEDDPICC qualification and competitive positioning sections in system prompt ([#715](https://github.com/f5xc-salesdemos/xcsh/pull/715))
17
+ - Version self-awareness and capabilities completeness regression tests ([#722](https://github.com/f5xc-salesdemos/xcsh/pull/722))
18
+
5
19
  ## [18.40.0] - 2026-05-05
6
20
 
7
21
  ### Added
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@f5xc-salesdemos/xcsh",
4
- "version": "18.52.0",
4
+ "version": "18.53.1",
5
5
  "description": "Coding agent CLI with read, bash, edit, write tools and session management",
6
6
  "homepage": "https://github.com/f5xc-salesdemos/xcsh",
7
7
  "author": "Can Boluk",
@@ -48,12 +48,12 @@
48
48
  "dependencies": {
49
49
  "@agentclientprotocol/sdk": "0.16.1",
50
50
  "@mozilla/readability": "^0.6",
51
- "@f5xc-salesdemos/xcsh-stats": "18.52.0",
52
- "@f5xc-salesdemos/pi-agent-core": "18.52.0",
53
- "@f5xc-salesdemos/pi-ai": "18.52.0",
54
- "@f5xc-salesdemos/pi-natives": "18.52.0",
55
- "@f5xc-salesdemos/pi-tui": "18.52.0",
56
- "@f5xc-salesdemos/pi-utils": "18.52.0",
51
+ "@f5xc-salesdemos/xcsh-stats": "18.53.1",
52
+ "@f5xc-salesdemos/pi-agent-core": "18.53.1",
53
+ "@f5xc-salesdemos/pi-ai": "18.53.1",
54
+ "@f5xc-salesdemos/pi-natives": "18.53.1",
55
+ "@f5xc-salesdemos/pi-tui": "18.53.1",
56
+ "@f5xc-salesdemos/pi-utils": "18.53.1",
57
57
  "@sinclair/typebox": "^0.34",
58
58
  "@xterm/headless": "^6.0",
59
59
  "ajv": "^8.18",
@@ -17,11 +17,9 @@ export const EXPERIMENT_MAX_LINES = 10;
17
17
  export const EXPERIMENT_MAX_BYTES = 4 * 1024;
18
18
  export const AUTORESEARCH_COMMITTABLE_FILES = [
19
19
  "autoresearch.md",
20
- "autoresearch.program.md",
21
20
  "autoresearch.sh",
22
21
  "autoresearch.checks.sh",
23
22
  "autoresearch.ideas.md",
24
- "SELF_AWARENESS.md",
25
23
  ] as const;
26
24
  export const AUTORESEARCH_LOCAL_STATE_FILES = ["autoresearch.jsonl"] as const;
27
25
  export const AUTORESEARCH_LOCAL_STATE_DIRECTORIES = [".autoresearch"] as const;
@@ -322,8 +322,6 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
322
322
  const autoresearchMdPath = path.join(workDir, "autoresearch.md");
323
323
  const checksPath = path.join(workDir, "autoresearch.checks.sh");
324
324
  const ideasPath = path.join(workDir, "autoresearch.ideas.md");
325
- const programPath = path.join(workDir, "autoresearch.program.md");
326
- const selfAwarenessPath = path.join(workDir, "SELF_AWARENESS.md");
327
325
  const pendingRun =
328
326
  runtime.lastRunSummary ??
329
327
  (await readPendingRunSummary(workDir, collectLoggedRunNumbers(runtime.state.results)));
@@ -361,10 +359,6 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
361
359
  checks_path: checksPath,
362
360
  has_ideas: fs.existsSync(ideasPath),
363
361
  ideas_path: ideasPath,
364
- has_program: fs.existsSync(programPath),
365
- program_path: programPath,
366
- has_self_awareness: fs.existsSync(selfAwarenessPath),
367
- self_awareness_path: selfAwarenessPath,
368
362
  current_segment: runtime.state.currentSegment + 1,
369
363
  current_segment_run_count: currentSegmentResults.length,
370
364
  has_baseline_metric: baselineMetric !== null,
@@ -19,22 +19,6 @@ Working directory:
19
19
  `{{working_dir}}`
20
20
 
21
21
  You are running an autonomous experiment loop. Keep iterating until the user interrupts you or the configured maximum iteration count is reached.
22
- {{#if has_program}}
23
-
24
- ### Local Playbook
25
-
26
- `autoresearch.program.md` exists at `{{program_path}}`.
27
-
28
- Use it as a repo-local strategy overlay for this session. `autoresearch.md` remains the source of truth for benchmark, scope, and constraints.
29
- {{/if}}
30
- {{#if has_self_awareness}}
31
-
32
- ### Self-Awareness Manifest
33
-
34
- `SELF_AWARENESS.md` exists at `{{self_awareness_path}}`.
35
-
36
- This document defines xcsh's mission, current capability inventory, evaluation dimensions, and known gaps. When the session goal involves self-evaluation, capability improvement, or SE workflow enhancement, read this document first — it is the ground truth for what xcsh is, what it should become, and how to measure progress.
37
- {{/if}}
38
22
  {{#if has_recent_results}}
39
23
 
40
24
  ### Current Segment Snapshot
@@ -89,11 +73,11 @@ An unlogged run artifact exists at `{{pending_run_directory}}`.
89
73
  - Update the notes whenever the strategy changes.
90
74
  - Keep durable conclusions in `autoresearch.md`.
91
75
  - Use `autoresearch.ideas.md` for deferred experiment ideas that are promising but not active yet.
92
- 3. Use `autoresearch.sh` as the canonical benchmark entrypoint.
93
- - If it does not exist yet, create it.
76
+ 3. The benchmark command in `autoresearch.md` is the canonical entrypoint.
77
+ - If it does not exist yet, create a benchmark script.
94
78
  - Make it print structured metric lines in the form `METRIC name=value`.
79
+ - Quality scores (`direction: higher`) are first-class — not every benchmark is a timing measurement.
95
80
  - Use the same workload every run unless you intentionally re-initialize with a new segment.
96
- - Keep the measurement harness, evaluator, and fixed benchmark inputs stable unless you intentionally start a new segment and document the change.
97
81
  4. Initialize the loop with `init_experiment` before the first logged run of a segment.
98
82
  - Pass `from_autoresearch_md: true` with only `name` to load the benchmark contract from `autoresearch.md` without mirroring every field in the tool call.
99
83
  - Use `abandon_unlogged_runs: true` only when you intentionally discard unlogged run artifacts and need a fresh segment (for example after a bad or obsolete benchmark directory).
@@ -105,7 +89,8 @@ An unlogged run artifact exists at `{{pending_run_directory}}`.
105
89
  - Run `run_experiment`.
106
90
  - Interpret the result honestly.
107
91
  - Call `log_experiment` after every run (it refreshes benchmark/scope fields from `autoresearch.md` before logging so keep validation matches the file on disk).
108
- - Use `run_experiment` with `force: true` only when you must override the segment benchmark command or skip the direct-`autoresearch.sh` rule.
92
+ - Use `run_experiment` with `force: true` only when you must override the segment benchmark command.
93
+ - After any code change, verify with `bun check:ts` or the project test suite before logging. A kept experiment that breaks the build is worse than a discarded one.
109
94
  - On `log_experiment`, `force: true` relaxes ASI requirements and allows keeping a primary-metric regression; prefer normal logging when possible.
110
95
  7. Keep the primary metric as the decision maker.
111
96
  - `keep` when the primary metric improves.
@@ -130,7 +115,7 @@ An unlogged run artifact exists at `{{pending_run_directory}}`.
130
115
 
131
116
  Your benchmark script SHOULD:
132
117
 
133
- - live at `autoresearch.sh`
118
+ - match the benchmark command from `autoresearch.md`
134
119
  - run from `{{working_dir}}`
135
120
  - fail with a non-zero exit status on invalid runs
136
121
  - print the primary metric as `METRIC {{default_metric_name}}=<number>` or another explicit metric name chosen during initialization
@@ -218,29 +203,15 @@ Resume from the existing notes:
218
203
 
219
204
  Before the first benchmark:
220
205
 
221
- - Write `autoresearch.md` with goal, benchmark command (must be a **direct** invocation of `autoresearch.sh`, e.g. `bash autoresearch.sh`), primary metric name and unit, direction (`lower` or `higher`), tradeoff metrics if relevant, files in scope, off limits, and constraints.
206
+ - Write `autoresearch.md` with goal, benchmark command, primary metric name and unit, direction (`lower` or `higher`), tradeoff metrics if relevant, files in scope, off limits, and constraints.
222
207
  - Add a short preflight section: prerequisites, one-time setup, and the comparability invariant that must stay fixed across runs.
223
208
  - Mark ground-truth evaluators, fixed datasets, and other measurement-critical files as off limits or hard constraints when they define the benchmark contract.
224
- - Write or update `autoresearch.program.md` when you learn durable heuristics, failure patterns, or repo-specific strategy for later resume turns.
225
- - Create `autoresearch.sh` as the canonical benchmark entrypoint; print the primary metric as `METRIC <name>=<number>` and optional secondary metrics as additional `METRIC` lines.
209
+ - Create a benchmark script; print the primary metric as `METRIC <name>=<number>` and optional secondary metrics as additional `METRIC` lines.
226
210
  - Optionally add `autoresearch.checks.sh` if correctness or quality needs a hard gate.
227
211
  - Call `init_experiment` with arguments that match `autoresearch.md` exactly (benchmark command, metric, unit, direction, scope paths, off limits, constraints).
228
212
  - Run and log the baseline.
229
- {{#if has_self_awareness}}
230
-
231
- #### SE Self-Evaluation Sessions
232
-
233
- When the goal involves evaluating or improving xcsh's sales engineering capabilities (not runtime code performance):
234
-
235
- - Read `SELF_AWARENESS.md` first to understand the current capability inventory and evaluation dimensions
236
- - Read `autoresearch.program.md` for the SE-specific evaluation strategy
237
- - Design the benchmark script (`autoresearch.sh`) to test the specific SE capability dimension — product knowledge accuracy, API reliability, prompt effectiveness, or workflow completeness
238
- - Use quality/accuracy scores as the primary metric (direction: `higher`) rather than timing metrics
239
- - Focus `Files in Scope` on the prompts, agent definitions, tool descriptions, or service modules relevant to the SE capability being evaluated
240
- - Record capability status changes in `SELF_AWARENESS.md` when experiments yield durable improvements
241
- {{/if}}
242
213
 
243
- Until `init_experiment` succeeds, only autoresearch control files (`autoresearch.md`, `autoresearch.sh`, `autoresearch.program.md`, `autoresearch.ideas.md`, `autoresearch.checks.sh`) may be edited; after initialization, respect Files in Scope from the contract.
214
+ Until `init_experiment` succeeds, only autoresearch control files (`autoresearch.md`, `autoresearch.sh`, `autoresearch.ideas.md`, `autoresearch.checks.sh`) may be edited; after initialization, respect Files in Scope from the contract.
244
215
 
245
216
  {{/if}}
246
217
  {{#if has_checks}}
@@ -206,12 +206,17 @@ export function renderAboutDoc(info: RuntimeBuildInfo, context: ContextStatus |
206
206
  "",
207
207
  "Sessions, MCP server/client, skills, TUI with themes, commit assistant,",
208
208
  "Python REPL, native shell/PTY, provider-agnostic LLM routing, slash commands,",
209
- "SSH remote execution, F5 XC federated product docs (llms.txt hierarchy),",
210
- "image generation and analysis.",
209
+ "SSH remote execution, image generation and analysis.",
210
+ "",
211
+ "SE specialization: F5 XC API integration (xcsh_api, api-catalog, api-spec),",
212
+ "Salesforce pipeline intelligence (sf_query, xcsh://salesforce),",
213
+ "F5 XC federated product docs (llms.txt hierarchy),",
214
+ "user/computer profiling (xcsh://user, xcsh://computer),",
215
+ "SE-specific subagents (deal-analyst, status-operator, cli-operator, github-ops).",
211
216
  "",
212
217
  "## What to do when asked about xcsh itself",
213
218
  "",
214
- "1. Confirm the user is running the version above. If unsure, ask them to run `xcsh --version`.",
219
+ "1. The version above is authoritative — it is embedded at build time in this session's BUILD_INFO and also shown in the `<workstation>` header of the system prompt. Do not run `xcsh --version` to check — that reports the installed binary, which may differ from the running session after an upgrade.",
215
220
  "2. Check recent changes with `gh pr list --repo f5xc-salesdemos/xcsh --base main --state merged --limit 20`",
216
221
  " or `git log --oneline -n 20` if you have a local clone. A fix may already be on `main`.",
217
222
  "3. If behavior contradicts `xcsh://…` docs, read the actual source under the repo above to determine",
@@ -17,17 +17,17 @@ export interface BuildInfo {
17
17
  }
18
18
 
19
19
  export const BUILD_INFO: BuildInfo = {
20
- "version": "18.52.0",
21
- "commit": "896d3c21a90cd0c7b02ce19558b5799c72369056",
22
- "shortCommit": "896d3c2",
20
+ "version": "18.53.1",
21
+ "commit": "8c2358f28cbf35bd5a8ce14498666912f6ffb0da",
22
+ "shortCommit": "8c2358f",
23
23
  "branch": "main",
24
- "tag": "v18.52.0",
25
- "commitDate": "2026-05-09T01:55:47Z",
26
- "buildDate": "2026-05-09T02:23:57.907Z",
24
+ "tag": "v18.53.1",
25
+ "commitDate": "2026-05-09T09:32:57Z",
26
+ "buildDate": "2026-05-09T09:56:11.380Z",
27
27
  "dirty": false,
28
28
  "prNumber": "",
29
29
  "repoUrl": "https://github.com/f5xc-salesdemos/xcsh",
30
30
  "repoSlug": "f5xc-salesdemos/xcsh",
31
- "commitUrl": "https://github.com/f5xc-salesdemos/xcsh/commit/896d3c21a90cd0c7b02ce19558b5799c72369056",
32
- "releaseUrl": "https://github.com/f5xc-salesdemos/xcsh/releases/tag/v18.52.0"
31
+ "commitUrl": "https://github.com/f5xc-salesdemos/xcsh/commit/8c2358f28cbf35bd5a8ce14498666912f6ffb0da",
32
+ "releaseUrl": "https://github.com/f5xc-salesdemos/xcsh/releases/tag/v18.53.1"
33
33
  };
@@ -2,7 +2,7 @@
2
2
  name: explore
3
3
  description: Fast read-only codebase scout returning compressed context for handoff
4
4
  tools: read, grep, find, web_search
5
- model: pi/smol
5
+ model: pi/task
6
6
  thinking-level: med
7
7
  output:
8
8
  properties:
@@ -10,6 +10,7 @@ output:
10
10
  metadata:
11
11
  description: Brief summary of findings and conclusions
12
12
  type: string
13
+ optionalProperties:
13
14
  files:
14
15
  metadata:
15
16
  description: Files examined with relevant code references
@@ -1,11 +1,7 @@
1
1
  <system-reminder>
2
2
  You stopped without calling submit_result. This is reminder {{retryCount}} of {{maxRetries}}.
3
3
 
4
- You **MUST** call submit_result as your only action now. Choose one:
5
- - If task is complete: call submit_result with your result in `result.data`
6
- - If task failed: call submit_result with `result.error` describing what happened
7
-
8
- You **MUST NOT** give up if you can still complete the task through exploration (using available tools or repo context). If you submit an error, you **MUST** include what you tried and the exact blocker.
9
-
10
- You **MUST NOT** output text without a tool call. You **MUST** call submit_result to finish.
4
+ You **MUST** call submit_result now. No other tool calls, no text output.
5
+ - Task done: `submit_result` with `result.data` containing your findings
6
+ - Task blocked: `submit_result` with `result.error` describing the blocker
11
7
  </system-reminder>
@@ -12,10 +12,12 @@ User-supplied content is sanitized, therefore:
12
12
  {{SECTION_SEPERATOR "Identity"}}
13
13
  <role>
14
14
  You are xcsh — the technical coworker for F5 Distributed Cloud sales engineers.
15
+ Purpose: accelerate deal velocity by making the SE more effective at every stage of the sales cycle.
15
16
 
16
17
  Primary mission: demos, MEDDPICC qualification, customer meeting preparation, network
17
18
  architecture recommendations, F5 XC product subject-matter expertise, documentation,
18
- and presentations.
19
+ presentations, technical discovery questions, POC/proof-of-concept validation planning,
20
+ account planning, and competitive positioning.
19
21
 
20
22
  Technical depth: network protocols across all OSI layers, API design, security analysis
21
23
  (DDoS, SSL/TLS, MITM, traffic forensics), infrastructure as code, and network automation.
@@ -36,6 +38,7 @@ The SE decides what to do; evidence decides what is true. See `<epistemic-integr
36
38
  - (1) Correctness first, (2) Brevity second, (3) Politeness third.
37
39
  - Prefer concise, information-dense writing.
38
40
  - Avoid repeating the user's request or narrating routine tool calls.
41
+ - When producing customer-facing content, maintain a professional tone appropriate to the audience.
39
42
  </communication>
40
43
 
41
44
  <epistemic-integrity>
@@ -105,11 +108,37 @@ Before committing to any technical claim, architecture recommendation, or demo p
105
108
  - Does this architecture fit the customer's actual environment, or a generic reference?
106
109
  - What happens if this capability is not provisioned in the customer's contract tier?
107
110
  - Am I answering the question the customer asked, or the question I wish they asked?
111
+ - For end-to-end demo setups: verify the working state of every component before presenting.
108
112
 
109
113
  When the task is infrastructure work: guard against the deployment reflex — "API accepted"
110
114
  ≠ "works under load." Validate against real conditions, not just schema acceptance.
111
115
  </behavior>
112
116
 
117
+ <qualification>
118
+ When qualifying a deal or assessing deal health, use the MEDDPICC framework:
119
+ - **M**etrics: What quantified business outcome justifies the purchase? If missing, the deal lacks urgency.
120
+ - **E**conomic Buyer: Who signs the check? If unknown, the deal can stall at approval.
121
+ - **D**ecision Criteria: What are they evaluating against? If unclear, you cannot position.
122
+ - **D**ecision Process: What steps remain before a decision? If unmapped, timeline is fiction.
123
+ - **P**aper Process: What procurement, legal, and security reviews are required? If unknown, close date is aspirational.
124
+ - **I**dentify Pain: What business pain does the champion articulate? If generic, the deal competes against inertia.
125
+ - **C**hampion: Who inside the account is actively selling on your behalf? If absent, you are the only advocate.
126
+ - **C**ompetition: Who else is being evaluated? If unknown, you cannot differentiate.
127
+
128
+ Score each element Green/Yellow/Red. Surface gaps as specific action items.
129
+ A deal with Red on Economic Buyer or Champion is at structural risk regardless of pipeline stage.
130
+ When delegating deal analysis to the deal-analyst subagent, include the account name, deal stage, and any known MEDDPICC context in the assignment.
131
+ </qualification>
132
+
133
+ <competitive-positioning>
134
+ When positioning F5 XC against competitors or handling competitive objections:
135
+ - Verify every competitive claim against current product documentation before presenting it.
136
+ - Differentiate on architecture (global network, distributed cloud), not just features.
137
+ - Use battlecard structure: competitor weakness, F5 XC strength, proof point, objection handling.
138
+ - Never disparage competitors — win on merit, not FUD.
139
+ - If the competitive landscape is unclear, ask what alternatives the customer is evaluating.
140
+ </competitive-positioning>
141
+
113
142
  <stakes>
114
143
  The SE works in customer-facing contexts. Product claims, architecture recommendations,
115
144
  demo environments, and competitive positioning reach customers, partners, and leadership.
@@ -208,7 +237,7 @@ Most tools resolve custom protocol URLs to internal resources (not web URLs):
208
237
  - `xcsh://..` — Internal xcsh documentation. **MUST NOT** read unless the user asks about xcsh itself.
209
238
  - `xcsh://about` — Identity, version, build fingerprint, architecture, self-improvement. **MUST** read for any question about xcsh before exploring `~/.xcsh/`.
210
239
  This document contains the authoritative repository URL, issues URL, and source location.
211
- For identity questions (source code, repo, version, who built this) answer from `xcsh://about` alone. Do not call external GitHub tools.
240
+ For the running version alone, the `<workstation>` header already has it — no tool call needed. For deeper identity (commit, branch, repo, build provenance), read `xcsh://about`. Do not call external GitHub tools or run `xcsh --version`.
212
241
  - `xcsh://user` — Primary human user profile (identity, employment, contact, demographics). Read when personal identity context is needed. Do not read proactively on every turn.
213
242
  - `xcsh://user?seed=true` — Refresh profile from Salesforce, GitHub, and system sources.
214
243
  - `xcsh://computer` — Machine hardware and environment profile. Read when platform-specific recommendations needed.
@@ -50,6 +50,21 @@ Year-to-date bookings / top wins ("what are my top wins this year", "year-to-dat
50
50
  Pipeline by territory ("break down pipeline by territory", "territory performance summary"):
51
51
  SELECT ETM_Core_Territory__c, COUNT(Id) DealCount, SUM(Amount) TotalAmount FROM Opportunity WHERE Id IN (SELECT OpportunityId FROM OpportunityTeamMember WHERE UserId = '{userId}') AND IsClosed = false AND ForecastCategoryName <> 'Omitted' GROUP BY ETM_Core_Territory__c ORDER BY SUM(Amount) DESC NULLS LAST
52
52
 
53
+ Next-quarter pipeline (forward-looking):
54
+ SELECT Account.Name, Name, Amount, StageName, ForecastCategoryName, CloseDate FROM Opportunity WHERE Id IN (SELECT OpportunityId FROM OpportunityTeamMember WHERE UserId = '{userId}') AND IsClosed = false AND CloseDate = NEXT_FISCAL_QUARTER AND ForecastCategoryName <> 'Omitted' ORDER BY Amount DESC NULLS LAST LIMIT 30
55
+
56
+ Stalled deals (no activity in 30+ days):
57
+ SELECT Account.Name, Name, Amount, StageName, CloseDate, LastActivityDate FROM Opportunity WHERE Id IN (SELECT OpportunityId FROM OpportunityTeamMember WHERE UserId = '{userId}') AND IsClosed = false AND CloseDate = THIS_FISCAL_QUARTER AND LastActivityDate < LAST_N_DAYS:30 ORDER BY Amount DESC NULLS LAST LIMIT 20
58
+
59
+ Large deals (top opportunities by amount):
60
+ SELECT Account.Name, Name, Amount, StageName, ForecastCategoryName, CloseDate, Owner.Name FROM Opportunity WHERE Id IN (SELECT OpportunityId FROM OpportunityTeamMember WHERE UserId = '{userId}') AND IsClosed = false AND Amount > 100000 ORDER BY Amount DESC NULLS LAST LIMIT 15
61
+
62
+ Deals by product/use case (solution mapping):
63
+ SELECT Account.Name, Name, Amount, StageName, CloseDate, Type FROM Opportunity WHERE Id IN (SELECT OpportunityId FROM OpportunityTeamMember WHERE UserId = '{userId}') AND IsClosed = false AND CloseDate = THIS_FISCAL_YEAR ORDER BY Account.Name, Amount DESC NULLS LAST LIMIT 30
64
+
65
+ Renewal pipeline (existing customer retention):
66
+ SELECT Account.Name, Name, Amount, StageName, CloseDate, Type FROM Opportunity WHERE Id IN (SELECT OpportunityId FROM OpportunityTeamMember WHERE UserId = '{userId}') AND IsClosed = false AND Type = 'Renewal' ORDER BY CloseDate ASC LIMIT 20
67
+
53
68
  Open cases:
54
69
  SELECT CaseNumber, Subject, Status, Priority, Account.Name, CreatedDate FROM Case WHERE IsClosed = false ORDER BY Priority, CreatedDate DESC LIMIT 50
55
70
 
@@ -81,6 +96,19 @@ Territory-based filtering: Add WHERE clauses on territory fields when the user a
81
96
 
82
97
  Coverage ratio: When the user asks about pipeline coverage or "do I have enough pipeline", calculate coverage = in-quarter pipeline total / quarterly quota target. Healthy coverage is 3x-5x quota. Below 2x is a risk. Use the forecast breakdown (T2) total as the numerator. Quota is available from the user profile when set.
83
98
 
99
+ MEDDPICC deal qualification — when user asks to "qualify", "score", or assess deal health:
100
+ For each deal, assess these 8 MEDDPICC elements from available SFDC data:
101
+ - **M**etrics: Is there a quantified business outcome? Check Opportunity.Description, close plan notes.
102
+ - **E**conomic Buyer: Is the EB identified? Check Contact roles with 'Economic Buyer' or 'Decision Maker'.
103
+ - **D**ecision Criteria: Are evaluation criteria documented? Check Opportunity.NextStep, Description.
104
+ - **D**ecision Process: Is the buying process mapped? Check stage progression timeline, paper process.
105
+ - **P**aper Process: Are procurement steps known? Check Opportunity.Description for legal/procurement notes.
106
+ - **I**dentify Pain: Is the business pain articulated? Check Opportunity.Description, discovery notes.
107
+ - **C**hampion: Is there an internal advocate? Check Contact roles for 'Champion' or active engagement.
108
+ - **C**ompetition: Are competitors identified? Check Opportunity.CompetitorName or description.
109
+ Score each element: Green (validated), Yellow (partially known), Red (unknown/missing).
110
+ Surface the gaps as action items, not just labels.
111
+
84
112
  Results with relationship fields (e.g., Account.Name) are automatically flattened into dot-notation columns.
85
113
  If the query returns more than 10,000 records, suggest using sf data export bulk instead.
86
114
  Set use_tooling_api to true when querying metadata objects (ApexTrigger, ApexClass, CustomField).
@@ -12,9 +12,10 @@ import type {
12
12
  MessageAttribution,
13
13
  ProviderPayload,
14
14
  TextContent,
15
+ ToolCall,
15
16
  ToolResultMessage,
16
17
  } from "@f5xc-salesdemos/pi-ai";
17
- import { prompt } from "@f5xc-salesdemos/pi-utils";
18
+ import { logger, prompt } from "@f5xc-salesdemos/pi-utils";
18
19
  import branchSummaryContextPrompt from "../prompts/compaction/branch-summary-context.md" with { type: "text" };
19
20
  import compactionSummaryContextPrompt from "../prompts/compaction/compaction-summary-context.md" with { type: "text" };
20
21
  import type { OutputMeta } from "../tools/output-meta";
@@ -260,6 +261,122 @@ export function createCustomMessage(
260
261
  };
261
262
  }
262
263
 
264
+ /**
265
+ * Repair tool_use / tool_result ordering in converted LLM messages.
266
+ *
267
+ * The Claude API requires every assistant message containing tool_use blocks
268
+ * to be immediately followed by the matching tool_result messages. Session
269
+ * corruption (injected messages, compaction boundaries, crash during tool
270
+ * execution) can break this invariant, producing a 400 error that bricks
271
+ * the session.
272
+ *
273
+ * This function:
274
+ * 1. Finds assistant messages with tool_use (toolCall) content
275
+ * 2. Collects the required tool_result IDs
276
+ * 3. If tool_results are elsewhere in the array, moves them to the correct position
277
+ * 4. If tool_results are missing entirely, injects synthetic error tool_results
278
+ * 5. Non-tool messages that got wedged between tool_use and tool_result are relocated
279
+ * to just before the assistant message
280
+ */
281
+ function repairToolResultOrdering(messages: Message[]): Message[] {
282
+ const result: Message[] = [];
283
+ let repaired = false;
284
+
285
+ // Index all toolResult messages by their toolCallId for O(1) lookup
286
+ const toolResultsByCallId = new Map<string, { message: Message; originalIndex: number }>();
287
+ for (let i = 0; i < messages.length; i++) {
288
+ const msg = messages[i];
289
+ if (msg.role === "toolResult") {
290
+ const trMsg = msg as ToolResultMessage;
291
+ toolResultsByCallId.set(trMsg.toolCallId, { message: msg, originalIndex: i });
292
+ }
293
+ }
294
+
295
+ // Track which toolResult messages have been placed by repair
296
+ const placedToolResultIndices = new Set<number>();
297
+
298
+ for (let i = 0; i < messages.length; i++) {
299
+ const msg = messages[i];
300
+
301
+ // Skip toolResult messages that were already placed by repair
302
+ if (msg.role === "toolResult" && placedToolResultIndices.has(i)) {
303
+ continue;
304
+ }
305
+
306
+ result.push(msg);
307
+
308
+ // Not an assistant message with tool calls — nothing to repair
309
+ if (msg.role !== "assistant") continue;
310
+ const assistantMsg = msg as AssistantMessage;
311
+ const toolCalls = assistantMsg.content.filter((c): c is ToolCall => c.type === "toolCall");
312
+ if (toolCalls.length === 0) continue;
313
+
314
+ // Collect required tool call IDs
315
+ const requiredIds = new Set(toolCalls.map(tc => tc.id));
316
+
317
+ // Check what immediately follows in the remaining messages
318
+ // Consume consecutive toolResult messages that match, and relocate any
319
+ // non-toolResult messages that got wedged between
320
+ const displaced: Message[] = [];
321
+ let j = i + 1;
322
+ while (j < messages.length && requiredIds.size > 0) {
323
+ const next = messages[j];
324
+ if (next.role === "toolResult") {
325
+ const trMsg = next as ToolResultMessage;
326
+ if (requiredIds.has(trMsg.toolCallId)) {
327
+ // This tool_result belongs here — place it
328
+ result.push(next);
329
+ placedToolResultIndices.add(j);
330
+ requiredIds.delete(trMsg.toolCallId);
331
+ if (displaced.length > 0) repaired = true;
332
+ j++;
333
+ continue;
334
+ }
335
+ }
336
+ // Non-matching message between tool_use and tool_result — displace it
337
+ displaced.push(next);
338
+ placedToolResultIndices.add(j); // Mark original index as consumed
339
+ j++;
340
+ }
341
+
342
+ // Advance main iterator past consumed messages
343
+ i = j - 1;
344
+
345
+ // Any remaining required IDs: find them later in the array or synthesize
346
+ for (const id of requiredIds) {
347
+ const found = toolResultsByCallId.get(id);
348
+ if (found && !placedToolResultIndices.has(found.originalIndex)) {
349
+ result.push(found.message);
350
+ placedToolResultIndices.add(found.originalIndex);
351
+ repaired = true;
352
+ } else {
353
+ // Missing tool_result entirely — inject synthetic error result
354
+ const toolCall = toolCalls.find(tc => tc.id === id);
355
+ result.push({
356
+ role: "toolResult",
357
+ toolCallId: id,
358
+ toolName: toolCall?.name ?? "unknown",
359
+ content: [{ type: "text", text: "Tool execution was interrupted (session recovery)." }],
360
+ isError: true,
361
+ timestamp: Date.now(),
362
+ } as ToolResultMessage);
363
+ repaired = true;
364
+ }
365
+ }
366
+
367
+ // Re-insert displaced messages after the tool_results
368
+ for (const d of displaced) {
369
+ result.push(d);
370
+ }
371
+ }
372
+
373
+ if (repaired) {
374
+ logger.warn("Repaired tool_use/tool_result ordering in conversation history");
375
+ }
376
+
377
+ return repaired ? result : messages;
378
+ }
379
+
263
380
  /**
264
381
  * Transform AgentMessages (including custom types) to LLM-compatible Messages.
265
382
  *
@@ -269,7 +386,7 @@ export function createCustomMessage(
269
386
  * - Custom extensions and tools
270
387
  */
271
388
  export function convertToLlm(messages: AgentMessage[]): Message[] {
272
- return messages
389
+ const converted = messages
273
390
  .map((m): Message | undefined => {
274
391
  switch (m.role) {
275
392
  case "bashExecution":
@@ -370,4 +487,5 @@ export function convertToLlm(messages: AgentMessage[]): Message[] {
370
487
  }
371
488
  })
372
489
  .filter(m => m !== undefined);
490
+ return repairToolResultOrdering(converted);
373
491
  }
@@ -325,6 +325,29 @@ export function finalizeSubprocessOutput(args: FinalizeSubprocessOutputArgs): Fi
325
325
  ? `${SUBAGENT_WARNING_MISSING_SUBMIT_RESULT}\n\n${rawOutput}`
326
326
  : SUBAGENT_WARNING_MISSING_SUBMIT_RESULT;
327
327
  }
328
+
329
+ // Salvage output from aborted runs that produced content without calling submit_result
330
+ if (exitCode !== 0 && doneAborted && !signalAborted && rawOutput.trim().length > 0) {
331
+ if (hasOutputSchema) {
332
+ // Try schema-validated fallback: if the model produced valid JSON matching the schema,
333
+ // use it even though submit_result was never called
334
+ const abortFallback = resolveFallbackCompletion(rawOutput, outputSchema);
335
+ if (abortFallback) {
336
+ const completeData = normalizeCompleteData(abortFallback.data, reportFindings);
337
+ try {
338
+ rawOutput = JSON.stringify(completeData, null, 2) ?? "null";
339
+ } catch {
340
+ // Keep rawOutput as-is if serialization fails
341
+ }
342
+ exitCode = 0;
343
+ stderr = "";
344
+ }
345
+ } else {
346
+ // No schema required — raw text output is directly useful
347
+ exitCode = 0;
348
+ stderr = "";
349
+ }
350
+ }
328
351
  }
329
352
 
330
353
  return { rawOutput, exitCode, stderr, abortedViaSubmitResult, hasSubmitResult };
@@ -1250,7 +1273,9 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
1250
1273
  }
1251
1274
 
1252
1275
  // Update final progress
1253
- const wasAborted = abortedViaSubmitResult || (!hasSubmitResult && (done.aborted || signal?.aborted || false));
1276
+ // When salvage recovered the output (exitCode became 0), the result is not aborted.
1277
+ const wasAborted =
1278
+ abortedViaSubmitResult || (!hasSubmitResult && exitCode !== 0 && (done.aborted || signal?.aborted || false));
1254
1279
  const finalAbortReason = wasAborted
1255
1280
  ? abortedViaSubmitResult
1256
1281
  ? submitResultAbortReason
@@ -24,5 +24,5 @@ export function buildNamedToolChoice(toolName: string, model?: Model<Api>): Tool
24
24
  return "required";
25
25
  }
26
26
 
27
- return undefined;
27
+ return "required";
28
28
  }