@f5xc-salesdemos/xcsh 18.52.0 → 18.53.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/package.json +7 -7
- package/src/autoresearch/helpers.ts +0 -2
- package/src/autoresearch/index.ts +0 -6
- package/src/autoresearch/prompt.md +9 -38
- package/src/internal-urls/build-info-runtime.ts +8 -3
- package/src/internal-urls/build-info.generated.ts +8 -8
- package/src/prompts/agents/explore.md +2 -1
- package/src/prompts/system/subagent-submit-reminder.md +3 -7
- package/src/prompts/system/system-prompt.md +31 -2
- package/src/prompts/tools/sf-query.md +28 -0
- package/src/session/messages.ts +120 -2
- package/src/task/executor.ts +26 -1
- package/src/utils/tool-choice.ts +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,20 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [18.53.0] - 2026-05-09
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
|
|
9
|
+
- Replaced `xcsh --version` recommendation in `renderAboutDoc()` with authoritative intrinsic version guidance — the previous guidance misdirected to the installed binary, not the running session ([#722](https://github.com/f5xc-salesdemos/xcsh/pull/722))
|
|
10
|
+
- System prompt `xcsh://about` entry now routes version questions to the workstation header (zero tool calls) and reserves `xcsh://about` for deeper identity ([#722](https://github.com/f5xc-salesdemos/xcsh/pull/722))
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- SE specialization block in `renderAboutDoc()` capabilities section: F5 XC API, Salesforce pipeline, user/computer profiling, SE-specific subagents ([#722](https://github.com/f5xc-salesdemos/xcsh/pull/722))
|
|
15
|
+
- SE capability skills: account-planning, competitive, meeting-prep, roi-calculator, validation-plan ([#715](https://github.com/f5xc-salesdemos/xcsh/pull/715))
|
|
16
|
+
- MEDDPICC qualification and competitive positioning sections in system prompt ([#715](https://github.com/f5xc-salesdemos/xcsh/pull/715))
|
|
17
|
+
- Version self-awareness and capabilities completeness regression tests ([#722](https://github.com/f5xc-salesdemos/xcsh/pull/722))
|
|
18
|
+
|
|
5
19
|
## [18.40.0] - 2026-05-05
|
|
6
20
|
|
|
7
21
|
### Added
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@f5xc-salesdemos/xcsh",
|
|
4
|
-
"version": "18.
|
|
4
|
+
"version": "18.53.1",
|
|
5
5
|
"description": "Coding agent CLI with read, bash, edit, write tools and session management",
|
|
6
6
|
"homepage": "https://github.com/f5xc-salesdemos/xcsh",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -48,12 +48,12 @@
|
|
|
48
48
|
"dependencies": {
|
|
49
49
|
"@agentclientprotocol/sdk": "0.16.1",
|
|
50
50
|
"@mozilla/readability": "^0.6",
|
|
51
|
-
"@f5xc-salesdemos/xcsh-stats": "18.
|
|
52
|
-
"@f5xc-salesdemos/pi-agent-core": "18.
|
|
53
|
-
"@f5xc-salesdemos/pi-ai": "18.
|
|
54
|
-
"@f5xc-salesdemos/pi-natives": "18.
|
|
55
|
-
"@f5xc-salesdemos/pi-tui": "18.
|
|
56
|
-
"@f5xc-salesdemos/pi-utils": "18.
|
|
51
|
+
"@f5xc-salesdemos/xcsh-stats": "18.53.1",
|
|
52
|
+
"@f5xc-salesdemos/pi-agent-core": "18.53.1",
|
|
53
|
+
"@f5xc-salesdemos/pi-ai": "18.53.1",
|
|
54
|
+
"@f5xc-salesdemos/pi-natives": "18.53.1",
|
|
55
|
+
"@f5xc-salesdemos/pi-tui": "18.53.1",
|
|
56
|
+
"@f5xc-salesdemos/pi-utils": "18.53.1",
|
|
57
57
|
"@sinclair/typebox": "^0.34",
|
|
58
58
|
"@xterm/headless": "^6.0",
|
|
59
59
|
"ajv": "^8.18",
|
|
@@ -17,11 +17,9 @@ export const EXPERIMENT_MAX_LINES = 10;
|
|
|
17
17
|
export const EXPERIMENT_MAX_BYTES = 4 * 1024;
|
|
18
18
|
export const AUTORESEARCH_COMMITTABLE_FILES = [
|
|
19
19
|
"autoresearch.md",
|
|
20
|
-
"autoresearch.program.md",
|
|
21
20
|
"autoresearch.sh",
|
|
22
21
|
"autoresearch.checks.sh",
|
|
23
22
|
"autoresearch.ideas.md",
|
|
24
|
-
"SELF_AWARENESS.md",
|
|
25
23
|
] as const;
|
|
26
24
|
export const AUTORESEARCH_LOCAL_STATE_FILES = ["autoresearch.jsonl"] as const;
|
|
27
25
|
export const AUTORESEARCH_LOCAL_STATE_DIRECTORIES = [".autoresearch"] as const;
|
|
@@ -322,8 +322,6 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
|
|
|
322
322
|
const autoresearchMdPath = path.join(workDir, "autoresearch.md");
|
|
323
323
|
const checksPath = path.join(workDir, "autoresearch.checks.sh");
|
|
324
324
|
const ideasPath = path.join(workDir, "autoresearch.ideas.md");
|
|
325
|
-
const programPath = path.join(workDir, "autoresearch.program.md");
|
|
326
|
-
const selfAwarenessPath = path.join(workDir, "SELF_AWARENESS.md");
|
|
327
325
|
const pendingRun =
|
|
328
326
|
runtime.lastRunSummary ??
|
|
329
327
|
(await readPendingRunSummary(workDir, collectLoggedRunNumbers(runtime.state.results)));
|
|
@@ -361,10 +359,6 @@ export const createAutoresearchExtension: ExtensionFactory = api => {
|
|
|
361
359
|
checks_path: checksPath,
|
|
362
360
|
has_ideas: fs.existsSync(ideasPath),
|
|
363
361
|
ideas_path: ideasPath,
|
|
364
|
-
has_program: fs.existsSync(programPath),
|
|
365
|
-
program_path: programPath,
|
|
366
|
-
has_self_awareness: fs.existsSync(selfAwarenessPath),
|
|
367
|
-
self_awareness_path: selfAwarenessPath,
|
|
368
362
|
current_segment: runtime.state.currentSegment + 1,
|
|
369
363
|
current_segment_run_count: currentSegmentResults.length,
|
|
370
364
|
has_baseline_metric: baselineMetric !== null,
|
|
@@ -19,22 +19,6 @@ Working directory:
|
|
|
19
19
|
`{{working_dir}}`
|
|
20
20
|
|
|
21
21
|
You are running an autonomous experiment loop. Keep iterating until the user interrupts you or the configured maximum iteration count is reached.
|
|
22
|
-
{{#if has_program}}
|
|
23
|
-
|
|
24
|
-
### Local Playbook
|
|
25
|
-
|
|
26
|
-
`autoresearch.program.md` exists at `{{program_path}}`.
|
|
27
|
-
|
|
28
|
-
Use it as a repo-local strategy overlay for this session. `autoresearch.md` remains the source of truth for benchmark, scope, and constraints.
|
|
29
|
-
{{/if}}
|
|
30
|
-
{{#if has_self_awareness}}
|
|
31
|
-
|
|
32
|
-
### Self-Awareness Manifest
|
|
33
|
-
|
|
34
|
-
`SELF_AWARENESS.md` exists at `{{self_awareness_path}}`.
|
|
35
|
-
|
|
36
|
-
This document defines xcsh's mission, current capability inventory, evaluation dimensions, and known gaps. When the session goal involves self-evaluation, capability improvement, or SE workflow enhancement, read this document first — it is the ground truth for what xcsh is, what it should become, and how to measure progress.
|
|
37
|
-
{{/if}}
|
|
38
22
|
{{#if has_recent_results}}
|
|
39
23
|
|
|
40
24
|
### Current Segment Snapshot
|
|
@@ -89,11 +73,11 @@ An unlogged run artifact exists at `{{pending_run_directory}}`.
|
|
|
89
73
|
- Update the notes whenever the strategy changes.
|
|
90
74
|
- Keep durable conclusions in `autoresearch.md`.
|
|
91
75
|
- Use `autoresearch.ideas.md` for deferred experiment ideas that are promising but not active yet.
|
|
92
|
-
3.
|
|
93
|
-
- If it does not exist yet, create
|
|
76
|
+
3. The benchmark command in `autoresearch.md` is the canonical entrypoint.
|
|
77
|
+
- If it does not exist yet, create a benchmark script.
|
|
94
78
|
- Make it print structured metric lines in the form `METRIC name=value`.
|
|
79
|
+
- Quality scores (`direction: higher`) are first-class — not every benchmark is a timing measurement.
|
|
95
80
|
- Use the same workload every run unless you intentionally re-initialize with a new segment.
|
|
96
|
-
- Keep the measurement harness, evaluator, and fixed benchmark inputs stable unless you intentionally start a new segment and document the change.
|
|
97
81
|
4. Initialize the loop with `init_experiment` before the first logged run of a segment.
|
|
98
82
|
- Pass `from_autoresearch_md: true` with only `name` to load the benchmark contract from `autoresearch.md` without mirroring every field in the tool call.
|
|
99
83
|
- Use `abandon_unlogged_runs: true` only when you intentionally discard unlogged run artifacts and need a fresh segment (for example after a bad or obsolete benchmark directory).
|
|
@@ -105,7 +89,8 @@ An unlogged run artifact exists at `{{pending_run_directory}}`.
|
|
|
105
89
|
- Run `run_experiment`.
|
|
106
90
|
- Interpret the result honestly.
|
|
107
91
|
- Call `log_experiment` after every run (it refreshes benchmark/scope fields from `autoresearch.md` before logging so keep validation matches the file on disk).
|
|
108
|
-
- Use `run_experiment` with `force: true` only when you must override the segment benchmark command
|
|
92
|
+
- Use `run_experiment` with `force: true` only when you must override the segment benchmark command.
|
|
93
|
+
- After any code change, verify with `bun check:ts` or the project test suite before logging. A kept experiment that breaks the build is worse than a discarded one.
|
|
109
94
|
- On `log_experiment`, `force: true` relaxes ASI requirements and allows keeping a primary-metric regression; prefer normal logging when possible.
|
|
110
95
|
7. Keep the primary metric as the decision maker.
|
|
111
96
|
- `keep` when the primary metric improves.
|
|
@@ -130,7 +115,7 @@ An unlogged run artifact exists at `{{pending_run_directory}}`.
|
|
|
130
115
|
|
|
131
116
|
Your benchmark script SHOULD:
|
|
132
117
|
|
|
133
|
-
-
|
|
118
|
+
- match the benchmark command from `autoresearch.md`
|
|
134
119
|
- run from `{{working_dir}}`
|
|
135
120
|
- fail with a non-zero exit status on invalid runs
|
|
136
121
|
- print the primary metric as `METRIC {{default_metric_name}}=<number>` or another explicit metric name chosen during initialization
|
|
@@ -218,29 +203,15 @@ Resume from the existing notes:
|
|
|
218
203
|
|
|
219
204
|
Before the first benchmark:
|
|
220
205
|
|
|
221
|
-
- Write `autoresearch.md` with goal, benchmark command
|
|
206
|
+
- Write `autoresearch.md` with goal, benchmark command, primary metric name and unit, direction (`lower` or `higher`), tradeoff metrics if relevant, files in scope, off limits, and constraints.
|
|
222
207
|
- Add a short preflight section: prerequisites, one-time setup, and the comparability invariant that must stay fixed across runs.
|
|
223
208
|
- Mark ground-truth evaluators, fixed datasets, and other measurement-critical files as off limits or hard constraints when they define the benchmark contract.
|
|
224
|
-
-
|
|
225
|
-
- Create `autoresearch.sh` as the canonical benchmark entrypoint; print the primary metric as `METRIC <name>=<number>` and optional secondary metrics as additional `METRIC` lines.
|
|
209
|
+
- Create a benchmark script; print the primary metric as `METRIC <name>=<number>` and optional secondary metrics as additional `METRIC` lines.
|
|
226
210
|
- Optionally add `autoresearch.checks.sh` if correctness or quality needs a hard gate.
|
|
227
211
|
- Call `init_experiment` with arguments that match `autoresearch.md` exactly (benchmark command, metric, unit, direction, scope paths, off limits, constraints).
|
|
228
212
|
- Run and log the baseline.
|
|
229
|
-
{{#if has_self_awareness}}
|
|
230
|
-
|
|
231
|
-
#### SE Self-Evaluation Sessions
|
|
232
|
-
|
|
233
|
-
When the goal involves evaluating or improving xcsh's sales engineering capabilities (not runtime code performance):
|
|
234
|
-
|
|
235
|
-
- Read `SELF_AWARENESS.md` first to understand the current capability inventory and evaluation dimensions
|
|
236
|
-
- Read `autoresearch.program.md` for the SE-specific evaluation strategy
|
|
237
|
-
- Design the benchmark script (`autoresearch.sh`) to test the specific SE capability dimension — product knowledge accuracy, API reliability, prompt effectiveness, or workflow completeness
|
|
238
|
-
- Use quality/accuracy scores as the primary metric (direction: `higher`) rather than timing metrics
|
|
239
|
-
- Focus `Files in Scope` on the prompts, agent definitions, tool descriptions, or service modules relevant to the SE capability being evaluated
|
|
240
|
-
- Record capability status changes in `SELF_AWARENESS.md` when experiments yield durable improvements
|
|
241
|
-
{{/if}}
|
|
242
213
|
|
|
243
|
-
Until `init_experiment` succeeds, only autoresearch control files (`autoresearch.md`, `autoresearch.sh`, `autoresearch.
|
|
214
|
+
Until `init_experiment` succeeds, only autoresearch control files (`autoresearch.md`, `autoresearch.sh`, `autoresearch.ideas.md`, `autoresearch.checks.sh`) may be edited; after initialization, respect Files in Scope from the contract.
|
|
244
215
|
|
|
245
216
|
{{/if}}
|
|
246
217
|
{{#if has_checks}}
|
|
@@ -206,12 +206,17 @@ export function renderAboutDoc(info: RuntimeBuildInfo, context: ContextStatus |
|
|
|
206
206
|
"",
|
|
207
207
|
"Sessions, MCP server/client, skills, TUI with themes, commit assistant,",
|
|
208
208
|
"Python REPL, native shell/PTY, provider-agnostic LLM routing, slash commands,",
|
|
209
|
-
"SSH remote execution,
|
|
210
|
-
"
|
|
209
|
+
"SSH remote execution, image generation and analysis.",
|
|
210
|
+
"",
|
|
211
|
+
"SE specialization: F5 XC API integration (xcsh_api, api-catalog, api-spec),",
|
|
212
|
+
"Salesforce pipeline intelligence (sf_query, xcsh://salesforce),",
|
|
213
|
+
"F5 XC federated product docs (llms.txt hierarchy),",
|
|
214
|
+
"user/computer profiling (xcsh://user, xcsh://computer),",
|
|
215
|
+
"SE-specific subagents (deal-analyst, status-operator, cli-operator, github-ops).",
|
|
211
216
|
"",
|
|
212
217
|
"## What to do when asked about xcsh itself",
|
|
213
218
|
"",
|
|
214
|
-
"1.
|
|
219
|
+
"1. The version above is authoritative — it is embedded at build time in this session's BUILD_INFO and also shown in the `<workstation>` header of the system prompt. Do not run `xcsh --version` to check — that reports the installed binary, which may differ from the running session after an upgrade.",
|
|
215
220
|
"2. Check recent changes with `gh pr list --repo f5xc-salesdemos/xcsh --base main --state merged --limit 20`",
|
|
216
221
|
" or `git log --oneline -n 20` if you have a local clone. A fix may already be on `main`.",
|
|
217
222
|
"3. If behavior contradicts `xcsh://…` docs, read the actual source under the repo above to determine",
|
|
@@ -17,17 +17,17 @@ export interface BuildInfo {
|
|
|
17
17
|
}
|
|
18
18
|
|
|
19
19
|
export const BUILD_INFO: BuildInfo = {
|
|
20
|
-
"version": "18.
|
|
21
|
-
"commit": "
|
|
22
|
-
"shortCommit": "
|
|
20
|
+
"version": "18.53.1",
|
|
21
|
+
"commit": "8c2358f28cbf35bd5a8ce14498666912f6ffb0da",
|
|
22
|
+
"shortCommit": "8c2358f",
|
|
23
23
|
"branch": "main",
|
|
24
|
-
"tag": "v18.
|
|
25
|
-
"commitDate": "2026-05-
|
|
26
|
-
"buildDate": "2026-05-
|
|
24
|
+
"tag": "v18.53.1",
|
|
25
|
+
"commitDate": "2026-05-09T09:32:57Z",
|
|
26
|
+
"buildDate": "2026-05-09T09:56:11.380Z",
|
|
27
27
|
"dirty": false,
|
|
28
28
|
"prNumber": "",
|
|
29
29
|
"repoUrl": "https://github.com/f5xc-salesdemos/xcsh",
|
|
30
30
|
"repoSlug": "f5xc-salesdemos/xcsh",
|
|
31
|
-
"commitUrl": "https://github.com/f5xc-salesdemos/xcsh/commit/
|
|
32
|
-
"releaseUrl": "https://github.com/f5xc-salesdemos/xcsh/releases/tag/v18.
|
|
31
|
+
"commitUrl": "https://github.com/f5xc-salesdemos/xcsh/commit/8c2358f28cbf35bd5a8ce14498666912f6ffb0da",
|
|
32
|
+
"releaseUrl": "https://github.com/f5xc-salesdemos/xcsh/releases/tag/v18.53.1"
|
|
33
33
|
};
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
name: explore
|
|
3
3
|
description: Fast read-only codebase scout returning compressed context for handoff
|
|
4
4
|
tools: read, grep, find, web_search
|
|
5
|
-
model: pi/
|
|
5
|
+
model: pi/task
|
|
6
6
|
thinking-level: med
|
|
7
7
|
output:
|
|
8
8
|
properties:
|
|
@@ -10,6 +10,7 @@ output:
|
|
|
10
10
|
metadata:
|
|
11
11
|
description: Brief summary of findings and conclusions
|
|
12
12
|
type: string
|
|
13
|
+
optionalProperties:
|
|
13
14
|
files:
|
|
14
15
|
metadata:
|
|
15
16
|
description: Files examined with relevant code references
|
|
@@ -1,11 +1,7 @@
|
|
|
1
1
|
<system-reminder>
|
|
2
2
|
You stopped without calling submit_result. This is reminder {{retryCount}} of {{maxRetries}}.
|
|
3
3
|
|
|
4
|
-
You **MUST** call submit_result
|
|
5
|
-
-
|
|
6
|
-
-
|
|
7
|
-
|
|
8
|
-
You **MUST NOT** give up if you can still complete the task through exploration (using available tools or repo context). If you submit an error, you **MUST** include what you tried and the exact blocker.
|
|
9
|
-
|
|
10
|
-
You **MUST NOT** output text without a tool call. You **MUST** call submit_result to finish.
|
|
4
|
+
You **MUST** call submit_result now. No other tool calls, no text output.
|
|
5
|
+
- Task done: `submit_result` with `result.data` containing your findings
|
|
6
|
+
- Task blocked: `submit_result` with `result.error` describing the blocker
|
|
11
7
|
</system-reminder>
|
|
@@ -12,10 +12,12 @@ User-supplied content is sanitized, therefore:
|
|
|
12
12
|
{{SECTION_SEPERATOR "Identity"}}
|
|
13
13
|
<role>
|
|
14
14
|
You are xcsh — the technical coworker for F5 Distributed Cloud sales engineers.
|
|
15
|
+
Purpose: accelerate deal velocity by making the SE more effective at every stage of the sales cycle.
|
|
15
16
|
|
|
16
17
|
Primary mission: demos, MEDDPICC qualification, customer meeting preparation, network
|
|
17
18
|
architecture recommendations, F5 XC product subject-matter expertise, documentation,
|
|
18
|
-
|
|
19
|
+
presentations, technical discovery questions, POC/proof-of-concept validation planning,
|
|
20
|
+
account planning, and competitive positioning.
|
|
19
21
|
|
|
20
22
|
Technical depth: network protocols across all OSI layers, API design, security analysis
|
|
21
23
|
(DDoS, SSL/TLS, MITM, traffic forensics), infrastructure as code, and network automation.
|
|
@@ -36,6 +38,7 @@ The SE decides what to do; evidence decides what is true. See `<epistemic-integr
|
|
|
36
38
|
- (1) Correctness first, (2) Brevity second, (3) Politeness third.
|
|
37
39
|
- Prefer concise, information-dense writing.
|
|
38
40
|
- Avoid repeating the user's request or narrating routine tool calls.
|
|
41
|
+
- When producing customer-facing content, maintain a professional tone appropriate to the audience.
|
|
39
42
|
</communication>
|
|
40
43
|
|
|
41
44
|
<epistemic-integrity>
|
|
@@ -105,11 +108,37 @@ Before committing to any technical claim, architecture recommendation, or demo p
|
|
|
105
108
|
- Does this architecture fit the customer's actual environment, or a generic reference?
|
|
106
109
|
- What happens if this capability is not provisioned in the customer's contract tier?
|
|
107
110
|
- Am I answering the question the customer asked, or the question I wish they asked?
|
|
111
|
+
- For end-to-end demo setups: verify the working state of every component before presenting.
|
|
108
112
|
|
|
109
113
|
When the task is infrastructure work: guard against the deployment reflex — "API accepted"
|
|
110
114
|
≠ "works under load." Validate against real conditions, not just schema acceptance.
|
|
111
115
|
</behavior>
|
|
112
116
|
|
|
117
|
+
<qualification>
|
|
118
|
+
When qualifying a deal or assessing deal health, use the MEDDPICC framework:
|
|
119
|
+
- **M**etrics: What quantified business outcome justifies the purchase? If missing, the deal lacks urgency.
|
|
120
|
+
- **E**conomic Buyer: Who signs the check? If unknown, the deal can stall at approval.
|
|
121
|
+
- **D**ecision Criteria: What are they evaluating against? If unclear, you cannot position.
|
|
122
|
+
- **D**ecision Process: What steps remain before a decision? If unmapped, timeline is fiction.
|
|
123
|
+
- **P**aper Process: What procurement, legal, and security reviews are required? If unknown, close date is aspirational.
|
|
124
|
+
- **I**dentify Pain: What business pain does the champion articulate? If generic, the deal competes against inertia.
|
|
125
|
+
- **C**hampion: Who inside the account is actively selling on your behalf? If absent, you are the only advocate.
|
|
126
|
+
- **C**ompetition: Who else is being evaluated? If unknown, you cannot differentiate.
|
|
127
|
+
|
|
128
|
+
Score each element Green/Yellow/Red. Surface gaps as specific action items.
|
|
129
|
+
A deal with Red on Economic Buyer or Champion is at structural risk regardless of pipeline stage.
|
|
130
|
+
When delegating deal analysis to the deal-analyst subagent, include the account name, deal stage, and any known MEDDPICC context in the assignment.
|
|
131
|
+
</qualification>
|
|
132
|
+
|
|
133
|
+
<competitive-positioning>
|
|
134
|
+
When positioning F5 XC against competitors or handling competitive objections:
|
|
135
|
+
- Verify every competitive claim against current product documentation before presenting it.
|
|
136
|
+
- Differentiate on architecture (global network, distributed cloud), not just features.
|
|
137
|
+
- Use battlecard structure: competitor weakness, F5 XC strength, proof point, objection handling.
|
|
138
|
+
- Never disparage competitors — win on merit, not FUD.
|
|
139
|
+
- If the competitive landscape is unclear, ask what alternatives the customer is evaluating.
|
|
140
|
+
</competitive-positioning>
|
|
141
|
+
|
|
113
142
|
<stakes>
|
|
114
143
|
The SE works in customer-facing contexts. Product claims, architecture recommendations,
|
|
115
144
|
demo environments, and competitive positioning reach customers, partners, and leadership.
|
|
@@ -208,7 +237,7 @@ Most tools resolve custom protocol URLs to internal resources (not web URLs):
|
|
|
208
237
|
- `xcsh://..` — Internal xcsh documentation. **MUST NOT** read unless the user asks about xcsh itself.
|
|
209
238
|
- `xcsh://about` — Identity, version, build fingerprint, architecture, self-improvement. **MUST** read for any question about xcsh before exploring `~/.xcsh/`.
|
|
210
239
|
This document contains the authoritative repository URL, issues URL, and source location.
|
|
211
|
-
For
|
|
240
|
+
For the running version alone, the `<workstation>` header already has it — no tool call needed. For deeper identity (commit, branch, repo, build provenance), read `xcsh://about`. Do not call external GitHub tools or run `xcsh --version`.
|
|
212
241
|
- `xcsh://user` — Primary human user profile (identity, employment, contact, demographics). Read when personal identity context is needed. Do not read proactively on every turn.
|
|
213
242
|
- `xcsh://user?seed=true` — Refresh profile from Salesforce, GitHub, and system sources.
|
|
214
243
|
- `xcsh://computer` — Machine hardware and environment profile. Read when platform-specific recommendations needed.
|
|
@@ -50,6 +50,21 @@ Year-to-date bookings / top wins ("what are my top wins this year", "year-to-dat
|
|
|
50
50
|
Pipeline by territory ("break down pipeline by territory", "territory performance summary"):
|
|
51
51
|
SELECT ETM_Core_Territory__c, COUNT(Id) DealCount, SUM(Amount) TotalAmount FROM Opportunity WHERE Id IN (SELECT OpportunityId FROM OpportunityTeamMember WHERE UserId = '{userId}') AND IsClosed = false AND ForecastCategoryName <> 'Omitted' GROUP BY ETM_Core_Territory__c ORDER BY SUM(Amount) DESC NULLS LAST
|
|
52
52
|
|
|
53
|
+
Next-quarter pipeline (forward-looking):
|
|
54
|
+
SELECT Account.Name, Name, Amount, StageName, ForecastCategoryName, CloseDate FROM Opportunity WHERE Id IN (SELECT OpportunityId FROM OpportunityTeamMember WHERE UserId = '{userId}') AND IsClosed = false AND CloseDate = NEXT_FISCAL_QUARTER AND ForecastCategoryName <> 'Omitted' ORDER BY Amount DESC NULLS LAST LIMIT 30
|
|
55
|
+
|
|
56
|
+
Stalled deals (no activity in 30+ days):
|
|
57
|
+
SELECT Account.Name, Name, Amount, StageName, CloseDate, LastActivityDate FROM Opportunity WHERE Id IN (SELECT OpportunityId FROM OpportunityTeamMember WHERE UserId = '{userId}') AND IsClosed = false AND CloseDate = THIS_FISCAL_QUARTER AND LastActivityDate < LAST_N_DAYS:30 ORDER BY Amount DESC NULLS LAST LIMIT 20
|
|
58
|
+
|
|
59
|
+
Large deals (top opportunities by amount):
|
|
60
|
+
SELECT Account.Name, Name, Amount, StageName, ForecastCategoryName, CloseDate, Owner.Name FROM Opportunity WHERE Id IN (SELECT OpportunityId FROM OpportunityTeamMember WHERE UserId = '{userId}') AND IsClosed = false AND Amount > 100000 ORDER BY Amount DESC NULLS LAST LIMIT 15
|
|
61
|
+
|
|
62
|
+
Deals by product/use case (solution mapping):
|
|
63
|
+
SELECT Account.Name, Name, Amount, StageName, CloseDate, Type FROM Opportunity WHERE Id IN (SELECT OpportunityId FROM OpportunityTeamMember WHERE UserId = '{userId}') AND IsClosed = false AND CloseDate = THIS_FISCAL_YEAR ORDER BY Account.Name, Amount DESC NULLS LAST LIMIT 30
|
|
64
|
+
|
|
65
|
+
Renewal pipeline (existing customer retention):
|
|
66
|
+
SELECT Account.Name, Name, Amount, StageName, CloseDate, Type FROM Opportunity WHERE Id IN (SELECT OpportunityId FROM OpportunityTeamMember WHERE UserId = '{userId}') AND IsClosed = false AND Type = 'Renewal' ORDER BY CloseDate ASC LIMIT 20
|
|
67
|
+
|
|
53
68
|
Open cases:
|
|
54
69
|
SELECT CaseNumber, Subject, Status, Priority, Account.Name, CreatedDate FROM Case WHERE IsClosed = false ORDER BY Priority, CreatedDate DESC LIMIT 50
|
|
55
70
|
|
|
@@ -81,6 +96,19 @@ Territory-based filtering: Add WHERE clauses on territory fields when the user a
|
|
|
81
96
|
|
|
82
97
|
Coverage ratio: When the user asks about pipeline coverage or "do I have enough pipeline", calculate coverage = in-quarter pipeline total / quarterly quota target. Healthy coverage is 3x-5x quota. Below 2x is a risk. Use the forecast breakdown (T2) total as the numerator. Quota is available from the user profile when set.
|
|
83
98
|
|
|
99
|
+
MEDDPICC deal qualification — when user asks to "qualify", "score", or assess deal health:
|
|
100
|
+
For each deal, assess these 8 MEDDPICC elements from available SFDC data:
|
|
101
|
+
- **M**etrics: Is there a quantified business outcome? Check Opportunity.Description, close plan notes.
|
|
102
|
+
- **E**conomic Buyer: Is the EB identified? Check Contact roles with 'Economic Buyer' or 'Decision Maker'.
|
|
103
|
+
- **D**ecision Criteria: Are evaluation criteria documented? Check Opportunity.NextStep, Description.
|
|
104
|
+
- **D**ecision Process: Is the buying process mapped? Check stage progression timeline, paper process.
|
|
105
|
+
- **P**aper Process: Are procurement steps known? Check Opportunity.Description for legal/procurement notes.
|
|
106
|
+
- **I**dentify Pain: Is the business pain articulated? Check Opportunity.Description, discovery notes.
|
|
107
|
+
- **C**hampion: Is there an internal advocate? Check Contact roles for 'Champion' or active engagement.
|
|
108
|
+
- **C**ompetition: Are competitors identified? Check Opportunity.CompetitorName or description.
|
|
109
|
+
Score each element: Green (validated), Yellow (partially known), Red (unknown/missing).
|
|
110
|
+
Surface the gaps as action items, not just labels.
|
|
111
|
+
|
|
84
112
|
Results with relationship fields (e.g., Account.Name) are automatically flattened into dot-notation columns.
|
|
85
113
|
If the query returns more than 10,000 records, suggest using sf data export bulk instead.
|
|
86
114
|
Set use_tooling_api to true when querying metadata objects (ApexTrigger, ApexClass, CustomField).
|
package/src/session/messages.ts
CHANGED
|
@@ -12,9 +12,10 @@ import type {
|
|
|
12
12
|
MessageAttribution,
|
|
13
13
|
ProviderPayload,
|
|
14
14
|
TextContent,
|
|
15
|
+
ToolCall,
|
|
15
16
|
ToolResultMessage,
|
|
16
17
|
} from "@f5xc-salesdemos/pi-ai";
|
|
17
|
-
import { prompt } from "@f5xc-salesdemos/pi-utils";
|
|
18
|
+
import { logger, prompt } from "@f5xc-salesdemos/pi-utils";
|
|
18
19
|
import branchSummaryContextPrompt from "../prompts/compaction/branch-summary-context.md" with { type: "text" };
|
|
19
20
|
import compactionSummaryContextPrompt from "../prompts/compaction/compaction-summary-context.md" with { type: "text" };
|
|
20
21
|
import type { OutputMeta } from "../tools/output-meta";
|
|
@@ -260,6 +261,122 @@ export function createCustomMessage(
|
|
|
260
261
|
};
|
|
261
262
|
}
|
|
262
263
|
|
|
264
|
+
/**
|
|
265
|
+
* Repair tool_use / tool_result ordering in converted LLM messages.
|
|
266
|
+
*
|
|
267
|
+
* The Claude API requires every assistant message containing tool_use blocks
|
|
268
|
+
* to be immediately followed by the matching tool_result messages. Session
|
|
269
|
+
* corruption (injected messages, compaction boundaries, crash during tool
|
|
270
|
+
* execution) can break this invariant, producing a 400 error that bricks
|
|
271
|
+
* the session.
|
|
272
|
+
*
|
|
273
|
+
* This function:
|
|
274
|
+
* 1. Finds assistant messages with tool_use (toolCall) content
|
|
275
|
+
* 2. Collects the required tool_result IDs
|
|
276
|
+
* 3. If tool_results are elsewhere in the array, moves them to the correct position
|
|
277
|
+
* 4. If tool_results are missing entirely, injects synthetic error tool_results
|
|
278
|
+
* 5. Non-tool messages that got wedged between tool_use and tool_result are relocated
|
|
279
|
+
* to just before the assistant message
|
|
280
|
+
*/
|
|
281
|
+
function repairToolResultOrdering(messages: Message[]): Message[] {
|
|
282
|
+
const result: Message[] = [];
|
|
283
|
+
let repaired = false;
|
|
284
|
+
|
|
285
|
+
// Index all toolResult messages by their toolCallId for O(1) lookup
|
|
286
|
+
const toolResultsByCallId = new Map<string, { message: Message; originalIndex: number }>();
|
|
287
|
+
for (let i = 0; i < messages.length; i++) {
|
|
288
|
+
const msg = messages[i];
|
|
289
|
+
if (msg.role === "toolResult") {
|
|
290
|
+
const trMsg = msg as ToolResultMessage;
|
|
291
|
+
toolResultsByCallId.set(trMsg.toolCallId, { message: msg, originalIndex: i });
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// Track which toolResult messages have been placed by repair
|
|
296
|
+
const placedToolResultIndices = new Set<number>();
|
|
297
|
+
|
|
298
|
+
for (let i = 0; i < messages.length; i++) {
|
|
299
|
+
const msg = messages[i];
|
|
300
|
+
|
|
301
|
+
// Skip toolResult messages that were already placed by repair
|
|
302
|
+
if (msg.role === "toolResult" && placedToolResultIndices.has(i)) {
|
|
303
|
+
continue;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
result.push(msg);
|
|
307
|
+
|
|
308
|
+
// Not an assistant message with tool calls — nothing to repair
|
|
309
|
+
if (msg.role !== "assistant") continue;
|
|
310
|
+
const assistantMsg = msg as AssistantMessage;
|
|
311
|
+
const toolCalls = assistantMsg.content.filter((c): c is ToolCall => c.type === "toolCall");
|
|
312
|
+
if (toolCalls.length === 0) continue;
|
|
313
|
+
|
|
314
|
+
// Collect required tool call IDs
|
|
315
|
+
const requiredIds = new Set(toolCalls.map(tc => tc.id));
|
|
316
|
+
|
|
317
|
+
// Check what immediately follows in the remaining messages
|
|
318
|
+
// Consume consecutive toolResult messages that match, and relocate any
|
|
319
|
+
// non-toolResult messages that got wedged between
|
|
320
|
+
const displaced: Message[] = [];
|
|
321
|
+
let j = i + 1;
|
|
322
|
+
while (j < messages.length && requiredIds.size > 0) {
|
|
323
|
+
const next = messages[j];
|
|
324
|
+
if (next.role === "toolResult") {
|
|
325
|
+
const trMsg = next as ToolResultMessage;
|
|
326
|
+
if (requiredIds.has(trMsg.toolCallId)) {
|
|
327
|
+
// This tool_result belongs here — place it
|
|
328
|
+
result.push(next);
|
|
329
|
+
placedToolResultIndices.add(j);
|
|
330
|
+
requiredIds.delete(trMsg.toolCallId);
|
|
331
|
+
if (displaced.length > 0) repaired = true;
|
|
332
|
+
j++;
|
|
333
|
+
continue;
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
// Non-matching message between tool_use and tool_result — displace it
|
|
337
|
+
displaced.push(next);
|
|
338
|
+
placedToolResultIndices.add(j); // Mark original index as consumed
|
|
339
|
+
j++;
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
// Advance main iterator past consumed messages
|
|
343
|
+
i = j - 1;
|
|
344
|
+
|
|
345
|
+
// Any remaining required IDs: find them later in the array or synthesize
|
|
346
|
+
for (const id of requiredIds) {
|
|
347
|
+
const found = toolResultsByCallId.get(id);
|
|
348
|
+
if (found && !placedToolResultIndices.has(found.originalIndex)) {
|
|
349
|
+
result.push(found.message);
|
|
350
|
+
placedToolResultIndices.add(found.originalIndex);
|
|
351
|
+
repaired = true;
|
|
352
|
+
} else {
|
|
353
|
+
// Missing tool_result entirely — inject synthetic error result
|
|
354
|
+
const toolCall = toolCalls.find(tc => tc.id === id);
|
|
355
|
+
result.push({
|
|
356
|
+
role: "toolResult",
|
|
357
|
+
toolCallId: id,
|
|
358
|
+
toolName: toolCall?.name ?? "unknown",
|
|
359
|
+
content: [{ type: "text", text: "Tool execution was interrupted (session recovery)." }],
|
|
360
|
+
isError: true,
|
|
361
|
+
timestamp: Date.now(),
|
|
362
|
+
} as ToolResultMessage);
|
|
363
|
+
repaired = true;
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
// Re-insert displaced messages after the tool_results
|
|
368
|
+
for (const d of displaced) {
|
|
369
|
+
result.push(d);
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
if (repaired) {
|
|
374
|
+
logger.warn("Repaired tool_use/tool_result ordering in conversation history");
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
return repaired ? result : messages;
|
|
378
|
+
}
|
|
379
|
+
|
|
263
380
|
/**
|
|
264
381
|
* Transform AgentMessages (including custom types) to LLM-compatible Messages.
|
|
265
382
|
*
|
|
@@ -269,7 +386,7 @@ export function createCustomMessage(
|
|
|
269
386
|
* - Custom extensions and tools
|
|
270
387
|
*/
|
|
271
388
|
export function convertToLlm(messages: AgentMessage[]): Message[] {
|
|
272
|
-
|
|
389
|
+
const converted = messages
|
|
273
390
|
.map((m): Message | undefined => {
|
|
274
391
|
switch (m.role) {
|
|
275
392
|
case "bashExecution":
|
|
@@ -370,4 +487,5 @@ export function convertToLlm(messages: AgentMessage[]): Message[] {
|
|
|
370
487
|
}
|
|
371
488
|
})
|
|
372
489
|
.filter(m => m !== undefined);
|
|
490
|
+
return repairToolResultOrdering(converted);
|
|
373
491
|
}
|
package/src/task/executor.ts
CHANGED
|
@@ -325,6 +325,29 @@ export function finalizeSubprocessOutput(args: FinalizeSubprocessOutputArgs): Fi
|
|
|
325
325
|
? `${SUBAGENT_WARNING_MISSING_SUBMIT_RESULT}\n\n${rawOutput}`
|
|
326
326
|
: SUBAGENT_WARNING_MISSING_SUBMIT_RESULT;
|
|
327
327
|
}
|
|
328
|
+
|
|
329
|
+
// Salvage output from aborted runs that produced content without calling submit_result
|
|
330
|
+
if (exitCode !== 0 && doneAborted && !signalAborted && rawOutput.trim().length > 0) {
|
|
331
|
+
if (hasOutputSchema) {
|
|
332
|
+
// Try schema-validated fallback: if the model produced valid JSON matching the schema,
|
|
333
|
+
// use it even though submit_result was never called
|
|
334
|
+
const abortFallback = resolveFallbackCompletion(rawOutput, outputSchema);
|
|
335
|
+
if (abortFallback) {
|
|
336
|
+
const completeData = normalizeCompleteData(abortFallback.data, reportFindings);
|
|
337
|
+
try {
|
|
338
|
+
rawOutput = JSON.stringify(completeData, null, 2) ?? "null";
|
|
339
|
+
} catch {
|
|
340
|
+
// Keep rawOutput as-is if serialization fails
|
|
341
|
+
}
|
|
342
|
+
exitCode = 0;
|
|
343
|
+
stderr = "";
|
|
344
|
+
}
|
|
345
|
+
} else {
|
|
346
|
+
// No schema required — raw text output is directly useful
|
|
347
|
+
exitCode = 0;
|
|
348
|
+
stderr = "";
|
|
349
|
+
}
|
|
350
|
+
}
|
|
328
351
|
}
|
|
329
352
|
|
|
330
353
|
return { rawOutput, exitCode, stderr, abortedViaSubmitResult, hasSubmitResult };
|
|
@@ -1250,7 +1273,9 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
|
|
|
1250
1273
|
}
|
|
1251
1274
|
|
|
1252
1275
|
// Update final progress
|
|
1253
|
-
|
|
1276
|
+
// When salvage recovered the output (exitCode became 0), the result is not aborted.
|
|
1277
|
+
const wasAborted =
|
|
1278
|
+
abortedViaSubmitResult || (!hasSubmitResult && exitCode !== 0 && (done.aborted || signal?.aborted || false));
|
|
1254
1279
|
const finalAbortReason = wasAborted
|
|
1255
1280
|
? abortedViaSubmitResult
|
|
1256
1281
|
? submitResultAbortReason
|
package/src/utils/tool-choice.ts
CHANGED