@badliveware/pi-tool-feedback 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +189 -0
- package/index.ts +269 -0
- package/package.json +45 -0
- package/src/core.ts +511 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 BadLiveware
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
# Pi Tool Feedback
|
|
2
|
+
|
|
3
|
+
Generic watched-tool feedback for Pi. It records passive per-turn summaries when selected tools are used, and can optionally queue a non-user feedback task for the agent at the end of a prompt.
|
|
4
|
+
|
|
5
|
+
Use it when you are dogfooding a tool, extension, MCP server, or workflow and want low-friction subjective signals such as “did this feel useful?”, “did output feel incomplete or noisy?”, and “would the agent use it again?”. Objective trace facts such as truncation and follow-up tool categories stay in passive summaries.
|
|
6
|
+
|
|
7
|
+
> Caveat: this is an experiment I am trying on my own Pi setup. I do not yet know whether agent self-feedback is broadly useful or reliable; treat the results as noisy design signals, not proof that a tool works better.
|
|
8
|
+
|
|
9
|
+
## How it works
|
|
10
|
+
|
|
11
|
+
Configure tool names or prefixes to watch. The extension listens to Pi tool events, records sanitized turn summaries, and exposes a `tool_feedback` tool that the agent can call when prompted. Active feedback requests are delivered as Pi custom messages with `triggerTurn`, not as user messages. The request names the watched tools but does not include trace-derived facts, so the agent's self-report is less anchored by telemetry the extension already knows.
|
|
12
|
+
|
|
13
|
+
It does not record raw tool inputs, raw outputs, prompts, file contents, or shell commands in its JSONL log. Optional free-form notes are stored in session entries, while logs keep only note length/hash.
|
|
14
|
+
|
|
15
|
+
## What feedback can and cannot tell you
|
|
16
|
+
|
|
17
|
+
Agent self-feedback is useful as noisy operational feedback, not as a faithful explanation of the model's hidden reasoning. Treat it like a lightweight post-task survey and compare it with trace data.
|
|
18
|
+
|
|
19
|
+
Good questions ask about the agent's observable experience:
|
|
20
|
+
|
|
21
|
+
- Was the output useful, incomplete, noisy, or too slow?
|
|
22
|
+
- Would you use this tool again in a similar situation?
|
|
23
|
+
- Was follow-up work routine or did it feel compensatory?
|
|
24
|
+
- What one missing capability or improvement would help most?
|
|
25
|
+
- How confident are you in this report?
|
|
26
|
+
|
|
27
|
+
Weak questions ask the agent to reconstruct why its internal reasoning happened:
|
|
28
|
+
|
|
29
|
+
- What was the real causal contribution of this tool to your final answer?
|
|
30
|
+
- Which hidden prompt feature or bias changed your reasoning?
|
|
31
|
+
- Did your chain-of-thought faithfully describe the reason you chose an action?
|
|
32
|
+
- Exactly how much would the outcome have changed without this tool?
|
|
33
|
+
|
|
34
|
+
The extension therefore keeps objective trace facts in passive summaries and asks active prompts for subjective/counterfactual judgments. Use self-reports as candidate design signals, not ground truth.
|
|
35
|
+
|
|
36
|
+
## Install
|
|
37
|
+
|
|
38
|
+
From a published package:
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pi install @badliveware/pi-tool-feedback
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
From this repository workspace:
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pi install ./agent/extensions/public/tool-feedback
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Configure
|
|
51
|
+
|
|
52
|
+
Create `~/.pi/agent/tool-feedback.json` for user-wide settings or `.pi/tool-feedback.json` in a project. Project config overlays user config.
|
|
53
|
+
|
|
54
|
+
Minimal example:
|
|
55
|
+
|
|
56
|
+
```json
|
|
57
|
+
{
|
|
58
|
+
"mode": "both",
|
|
59
|
+
"watch": [
|
|
60
|
+
{ "prefix": "code_intel_" },
|
|
61
|
+
{ "name": "process" }
|
|
62
|
+
]
|
|
63
|
+
}
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Modes:
|
|
67
|
+
|
|
68
|
+
| Mode | Behavior |
|
|
69
|
+
| --- | --- |
|
|
70
|
+
| `off` | Disable summaries and feedback prompts. |
|
|
71
|
+
| `passive` | Record turn summaries only. |
|
|
72
|
+
| `ask-agent` | Ask for structured feedback after a prompt that used watched tools. |
|
|
73
|
+
| `both` | Record summaries and ask for feedback. |
|
|
74
|
+
|
|
75
|
+
Other options:
|
|
76
|
+
|
|
77
|
+
```json
|
|
78
|
+
{
|
|
79
|
+
"excludeTools": ["tool_feedback", "tool_feedback_state"],
|
|
80
|
+
"cooldownTurns": 0,
|
|
81
|
+
"skipWhenPendingMessages": true,
|
|
82
|
+
"appendSessionEntries": true,
|
|
83
|
+
"log": true
|
|
84
|
+
}
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### Extra feedback fields
|
|
88
|
+
|
|
89
|
+
The built-in feedback schema stays stable, but you can add project- or user-specific fields. The active prompt lists these fields and agents answer them inside `fieldResponses`.
|
|
90
|
+
|
|
91
|
+
```json
|
|
92
|
+
{
|
|
93
|
+
"mode": "both",
|
|
94
|
+
"watch": [{ "prefix": "code_intel_" }],
|
|
95
|
+
"feedbackFields": [
|
|
96
|
+
{
|
|
97
|
+
"name": "rankingQuality",
|
|
98
|
+
"type": "enum",
|
|
99
|
+
"values": ["good", "mixed", "poor", "unknown"],
|
|
100
|
+
"required": true,
|
|
101
|
+
"description": "How good was result ranking?"
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
"name": "latencyAcceptable",
|
|
105
|
+
"type": "yes_no_unknown"
|
|
106
|
+
}
|
|
107
|
+
]
|
|
108
|
+
}
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Supported field types:
|
|
112
|
+
|
|
113
|
+
| Type | Accepted values |
|
|
114
|
+
| --- | --- |
|
|
115
|
+
| `enum` | one of the configured `values` |
|
|
116
|
+
| `yes_no_unknown` | `yes`, `no`, or `unknown` |
|
|
117
|
+
| `boolean` | JSON boolean |
|
|
118
|
+
| `number` | finite JSON number |
|
|
119
|
+
|
|
120
|
+
Field names must match `/^[a-zA-Z][a-zA-Z0-9_]*$/`. Invalid, unknown, or missing required field responses are recorded in `fieldResponseErrors`; invalid values are not stored in `fieldResponses`.
|
|
121
|
+
|
|
122
|
+
Set `PI_TOOL_FEEDBACK_CONFIG` to load an additional config file, `PI_TOOL_FEEDBACK_DIR` to change the JSONL log directory, or `PI_TOOL_FEEDBACK_LOG` to force one log file.
|
|
123
|
+
|
|
124
|
+
Default log directory:
|
|
125
|
+
|
|
126
|
+
```text
|
|
127
|
+
~/.cache/pi-tool-feedback/<session-id>.jsonl
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## Tools and command
|
|
131
|
+
|
|
132
|
+
### `tool_feedback_state`
|
|
133
|
+
|
|
134
|
+
Read-only state/config inspection. Use it to see the loaded mode, watch rules, config paths, diagnostics, and current prompt usage.
|
|
135
|
+
|
|
136
|
+
### `tool_feedback`
|
|
137
|
+
|
|
138
|
+
Records one structured feedback entry. Typical agent response after a feedback prompt:
|
|
139
|
+
|
|
140
|
+
```json
|
|
141
|
+
{
|
|
142
|
+
"watchedTools": ["code_intel_impact_map"],
|
|
143
|
+
"perceivedUsefulness": "medium",
|
|
144
|
+
"wouldUseAgainSameSituation": "yes",
|
|
145
|
+
"followupWasRoutine": "yes",
|
|
146
|
+
"followupNeededBecauseToolWasInsufficient": "unknown",
|
|
147
|
+
"outputSeemedTooNoisy": "no",
|
|
148
|
+
"outputSeemedIncomplete": "yes",
|
|
149
|
+
"missedImportantContext": "unknown",
|
|
150
|
+
"confidence": "medium",
|
|
151
|
+
"improvement": "better_summary",
|
|
152
|
+
"fieldResponses": {
|
|
153
|
+
"rankingQuality": "mixed"
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
### `/tool-feedback`
|
|
159
|
+
|
|
160
|
+
Show runtime status or set the runtime mode until reload:
|
|
161
|
+
|
|
162
|
+
```text
|
|
163
|
+
/tool-feedback
|
|
164
|
+
/tool-feedback off
|
|
165
|
+
/tool-feedback passive
|
|
166
|
+
/tool-feedback ask-agent
|
|
167
|
+
/tool-feedback both
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
## Loop prevention
|
|
171
|
+
|
|
172
|
+
The extension avoids the common feedback-loop traps:
|
|
173
|
+
|
|
174
|
+
- `tool_feedback` and `tool_feedback_state` are excluded by default.
|
|
175
|
+
- The active prompt is asked at most once.
|
|
176
|
+
- Feedback prompts are skipped when Pi already has pending messages if `skipWhenPendingMessages` is true.
|
|
177
|
+
- If the agent already called `tool_feedback`, no follow-up prompt is sent.
|
|
178
|
+
|
|
179
|
+
If the feedback prompt is annoying, run:
|
|
180
|
+
|
|
181
|
+
```text
|
|
182
|
+
/tool-feedback passive
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
or set:
|
|
186
|
+
|
|
187
|
+
```json
|
|
188
|
+
{ "mode": "off" }
|
|
189
|
+
```
|
package/index.ts
ADDED
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
import { Type } from "@mariozechner/pi-ai";
|
|
2
|
+
import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
|
|
3
|
+
import { Text } from "@mariozechner/pi-tui";
|
|
4
|
+
import {
|
|
5
|
+
appendLog,
|
|
6
|
+
categoryForTool,
|
|
7
|
+
feedbackLogPath,
|
|
8
|
+
feedbackPrompt,
|
|
9
|
+
feedbackRecord,
|
|
10
|
+
isRecord,
|
|
11
|
+
loadToolFeedbackConfig,
|
|
12
|
+
logSafeFeedbackRecord,
|
|
13
|
+
makeTurnSummary,
|
|
14
|
+
matchesWatch,
|
|
15
|
+
modeIncludesAsk,
|
|
16
|
+
modeIncludesPassive,
|
|
17
|
+
resultErrorKind,
|
|
18
|
+
resultOk,
|
|
19
|
+
resultTruncated,
|
|
20
|
+
stringValue,
|
|
21
|
+
unique,
|
|
22
|
+
type AgentUsage,
|
|
23
|
+
type FeedbackMode,
|
|
24
|
+
type LoadedConfig,
|
|
25
|
+
type TurnUsage,
|
|
26
|
+
type WatchedToolCall,
|
|
27
|
+
type WatchedToolResult,
|
|
28
|
+
} from "./src/core.ts";
|
|
29
|
+
|
|
30
|
+
export { feedbackLogPath, loadToolFeedbackConfig } from "./src/core.ts";
|
|
31
|
+
|
|
32
|
+
const MESSAGE_TYPE_TOOL_FEEDBACK_REQUEST = "tool-feedback:request";
|
|
33
|
+
|
|
34
|
+
interface FeedbackRequestDetails {
|
|
35
|
+
kind: "tool_feedback_request";
|
|
36
|
+
watchedTools: string[];
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function runtimeModeFromArgs(args: string): FeedbackMode | undefined {
|
|
40
|
+
return args === "off" || args === "passive" || args === "ask-agent" || args === "both" ? args : undefined;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function statusText(loaded: LoadedConfig): string {
|
|
44
|
+
return `tool-feedback ${loaded.config.mode}; watching ${loaded.config.watch.map((rule) => rule.name ?? `${rule.prefix}*`).join(", ") || "nothing"}`;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function feedbackRequestDetails(agent: AgentUsage): FeedbackRequestDetails {
|
|
48
|
+
return {
|
|
49
|
+
kind: "tool_feedback_request",
|
|
50
|
+
watchedTools: unique(agent.watchedCalls.map((call) => call.toolName)),
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function statePayload(loaded: LoadedConfig, agent: AgentUsage | undefined): Record<string, unknown> {
|
|
55
|
+
return {
|
|
56
|
+
mode: loaded.config.mode,
|
|
57
|
+
watch: loaded.config.watch,
|
|
58
|
+
excludeTools: loaded.config.excludeTools,
|
|
59
|
+
cooldownTurns: loaded.config.cooldownTurns,
|
|
60
|
+
skipWhenPendingMessages: loaded.config.skipWhenPendingMessages,
|
|
61
|
+
appendSessionEntries: loaded.config.appendSessionEntries,
|
|
62
|
+
log: loaded.config.log,
|
|
63
|
+
feedbackFields: loaded.config.feedbackFields,
|
|
64
|
+
loadedConfig: loaded.paths,
|
|
65
|
+
diagnostics: loaded.diagnostics,
|
|
66
|
+
currentAgent: agent ? {
|
|
67
|
+
watchedTools: unique(agent.watchedCalls.map((call) => call.toolName)),
|
|
68
|
+
watchedCallCount: agent.watchedCalls.length,
|
|
69
|
+
watchedResultCount: agent.watchedResults.length,
|
|
70
|
+
feedbackRecorded: agent.feedbackRecorded,
|
|
71
|
+
afterWatchedCategories: unique(agent.afterWatchedCategories),
|
|
72
|
+
} : undefined,
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function registerFeedbackRequestRenderer(pi: ExtensionAPI): void {
|
|
77
|
+
pi.registerMessageRenderer<FeedbackRequestDetails>(MESSAGE_TYPE_TOOL_FEEDBACK_REQUEST, (message, _options, theme) => {
|
|
78
|
+
const details = message.details;
|
|
79
|
+
if (!details || details.kind !== "tool_feedback_request") return undefined;
|
|
80
|
+
const watched = details.watchedTools.join(", ") || "watched tools";
|
|
81
|
+
return new Text(`${theme.fg("warning", "✦ tool feedback requested ")}${theme.fg("accent", watched)}`);
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
export default function toolFeedback(pi: ExtensionAPI): void {
|
|
86
|
+
let loadedConfig: LoadedConfig | undefined;
|
|
87
|
+
let runtimeMode: FeedbackMode | undefined;
|
|
88
|
+
let activeTurnIndex = 0;
|
|
89
|
+
let sequence = 0;
|
|
90
|
+
let currentTurn: TurnUsage | undefined;
|
|
91
|
+
let currentAgent: AgentUsage | undefined;
|
|
92
|
+
let lastPromptedTurn = Number.NEGATIVE_INFINITY;
|
|
93
|
+
const pendingCalls = new Map<string, WatchedToolCall & { startedAt: number }>();
|
|
94
|
+
|
|
95
|
+
registerFeedbackRequestRenderer(pi);
|
|
96
|
+
|
|
97
|
+
const getLoadedConfig = (ctx: ExtensionContext): LoadedConfig => {
|
|
98
|
+
loadedConfig = loadToolFeedbackConfig(ctx);
|
|
99
|
+
if (runtimeMode) loadedConfig.config.mode = runtimeMode;
|
|
100
|
+
return loadedConfig;
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
const getConfig = (ctx: ExtensionContext) => getLoadedConfig(ctx).config;
|
|
104
|
+
|
|
105
|
+
const ensureTurn = (): TurnUsage => {
|
|
106
|
+
if (currentTurn) return currentTurn;
|
|
107
|
+
currentTurn = { turnIndex: activeTurnIndex, startedAt: Date.now(), toolCalls: [], watchedCalls: [], watchedResults: [] };
|
|
108
|
+
return currentTurn;
|
|
109
|
+
};
|
|
110
|
+
|
|
111
|
+
const ensureAgent = (): AgentUsage => {
|
|
112
|
+
if (currentAgent) return currentAgent;
|
|
113
|
+
currentAgent = { startedAt: Date.now(), watchedCalls: [], watchedResults: [], feedbackRecorded: false, afterWatchedCategories: [], turnSummaries: [] };
|
|
114
|
+
return currentAgent;
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
pi.registerTool({
|
|
118
|
+
name: "tool_feedback_state",
|
|
119
|
+
label: "Tool Feedback State",
|
|
120
|
+
description: "Inspect generic watched-tool feedback configuration and current prompt usage.",
|
|
121
|
+
parameters: Type.Object({}),
|
|
122
|
+
async execute(_toolCallId, _params, _signal, _onUpdate, ctx) {
|
|
123
|
+
const payload = statePayload(getLoadedConfig(ctx), currentAgent);
|
|
124
|
+
return { content: [{ type: "text", text: JSON.stringify(payload, null, 2) }], details: payload };
|
|
125
|
+
},
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
pi.registerTool({
|
|
129
|
+
name: "tool_feedback",
|
|
130
|
+
label: "Tool Feedback",
|
|
131
|
+
description: "Record concise structured feedback after using watched tools. This stores feedback only; it does not change the watched tool.",
|
|
132
|
+
parameters: Type.Object({
|
|
133
|
+
watchedTools: Type.Array(Type.String(), { description: "Watched tool names this feedback covers." }),
|
|
134
|
+
perceivedUsefulness: Type.Union([Type.Literal("high"), Type.Literal("medium"), Type.Literal("low"), Type.Literal("none"), Type.Literal("unknown")], { description: "How useful the tool felt for this task. Allowed values are: `high`, `medium`, `low`, `none`, `unknown`." }),
|
|
135
|
+
wouldUseAgainSameSituation: Type.Union([Type.Literal("yes"), Type.Literal("no"), Type.Literal("unsure"), Type.Literal("unknown")], { description: "Whether you would use the same tool again for a similar situation. Allowed values are: `yes`, `no`, `unsure`, `unknown`." }),
|
|
136
|
+
followupWasRoutine: Type.Optional(Type.Union([Type.Literal("yes"), Type.Literal("no"), Type.Literal("unknown")], { description: "Whether follow-up work felt routine rather than caused by tool insufficiency. Allowed values are: `yes`, `no`, `unknown`." })),
|
|
137
|
+
followupNeededBecauseToolWasInsufficient: Type.Optional(Type.Union([Type.Literal("yes"), Type.Literal("no"), Type.Literal("unknown")], { description: "Whether follow-up work was needed because the watched tool was insufficient. Allowed values are: `yes`, `no`, `unknown`." })),
|
|
138
|
+
outputSeemedTooNoisy: Type.Optional(Type.Union([Type.Literal("yes"), Type.Literal("no"), Type.Literal("unknown")], { description: "Whether the output felt too noisy to use efficiently. Allowed values are: `yes`, `no`, `unknown`." })),
|
|
139
|
+
outputSeemedIncomplete: Type.Optional(Type.Union([Type.Literal("yes"), Type.Literal("no"), Type.Literal("unknown")], { description: "Whether the output felt incomplete for the task. Allowed values are: `yes`, `no`, `unknown`." })),
|
|
140
|
+
missedImportantContext: Type.Optional(Type.Union([Type.Literal("yes"), Type.Literal("no"), Type.Literal("unknown")], { description: "Whether important context was later found outside the watched tool output. Allowed values are: `yes`, `no`, `unknown`." })),
|
|
141
|
+
confidence: Type.Union([Type.Literal("high"), Type.Literal("medium"), Type.Literal("low")], { description: "Confidence in this subjective feedback. Allowed values are: `high`, `medium`, `low`." }),
|
|
142
|
+
improvement: Type.Optional(Type.Union([Type.Literal("better_ranking"), Type.Literal("higher_cap"), Type.Literal("better_summary"), Type.Literal("better_docs"), Type.Literal("less_noise"), Type.Literal("faster"), Type.Literal("other")], { description: "Most useful improvement area. Allowed values are: `better_ranking`, `higher_cap`, `better_summary`, `better_docs`, `less_noise`, `faster`, `other`." })),
|
|
143
|
+
fieldResponses: Type.Optional(Type.Record(Type.String(), Type.Unknown(), { description: "Responses for configured extra feedback fields. Use exact field names from the feedback prompt/config; invalid or unknown fields are recorded as errors and ignored." })),
|
|
144
|
+
note: Type.Optional(Type.String({ description: "Short optional note. Stored in the session entry; logs keep only length/hash." })),
|
|
145
|
+
}),
|
|
146
|
+
async execute(_toolCallId, params, _signal, _onUpdate, ctx) {
|
|
147
|
+
const config = getConfig(ctx);
|
|
148
|
+
const record = feedbackRecord(params as Record<string, unknown>, ctx, config);
|
|
149
|
+
ensureAgent().feedbackRecorded = true;
|
|
150
|
+
if (config.appendSessionEntries) pi.appendEntry("tool-feedback:agent-feedback", record);
|
|
151
|
+
appendLog(config, record.sessionId, logSafeFeedbackRecord(record));
|
|
152
|
+
const payload = { recorded: true, watchedTools: record.watchedTools, perceivedUsefulness: record.perceivedUsefulness, confidence: record.confidence, fieldResponseErrors: record.fieldResponseErrors };
|
|
153
|
+
return { content: [{ type: "text", text: `Recorded feedback for ${record.watchedTools.length || 0} watched tool(s).` }], details: payload };
|
|
154
|
+
},
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
pi.registerCommand("tool-feedback", {
|
|
158
|
+
description: "Show or set watched-tool feedback mode: status, off, passive, ask-agent, both",
|
|
159
|
+
handler: async (args, ctx) => {
|
|
160
|
+
runtimeMode = runtimeModeFromArgs(args.trim()) ?? runtimeMode;
|
|
161
|
+
ctx.ui.notify(statusText(getLoadedConfig(ctx)), "info");
|
|
162
|
+
},
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
pi.on("session_start", async (_event, ctx) => {
|
|
166
|
+
const loaded = getLoadedConfig(ctx);
|
|
167
|
+
ctx.ui.setStatus("tool-feedback", loaded.config.mode === "off" || loaded.config.watch.length === 0 ? undefined : `tf:${loaded.config.mode}`);
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
pi.on("agent_start", async (_event, ctx) => {
|
|
171
|
+
getLoadedConfig(ctx);
|
|
172
|
+
currentAgent = { startedAt: Date.now(), watchedCalls: [], watchedResults: [], feedbackRecorded: false, afterWatchedCategories: [], turnSummaries: [] };
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
pi.on("turn_start", async (event) => {
|
|
176
|
+
activeTurnIndex = event.turnIndex;
|
|
177
|
+
currentTurn = { turnIndex: event.turnIndex, startedAt: event.timestamp, toolCalls: [], watchedCalls: [], watchedResults: [] };
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
pi.on("tool_call", async (event, ctx) => {
|
|
181
|
+
const config = getConfig(ctx);
|
|
182
|
+
const toolName = stringValue(event.toolName);
|
|
183
|
+
const toolCallId = stringValue(event.toolCallId);
|
|
184
|
+
if (!toolName || !toolCallId) return;
|
|
185
|
+
|
|
186
|
+
const category = categoryForTool(toolName, event.input);
|
|
187
|
+
const callSequence = ++sequence;
|
|
188
|
+
const turn = ensureTurn();
|
|
189
|
+
turn.toolCalls.push({ toolName, category, sequence: callSequence });
|
|
190
|
+
|
|
191
|
+
const agent = ensureAgent();
|
|
192
|
+
if (agent.lastWatchedSequence !== undefined && callSequence > agent.lastWatchedSequence) agent.afterWatchedCategories.push(category);
|
|
193
|
+
if (config.mode === "off" || !matchesWatch(toolName, config)) return;
|
|
194
|
+
|
|
195
|
+
const inputRecord = isRecord(event.input) ? event.input as Record<string, unknown> : undefined;
|
|
196
|
+
const watched: WatchedToolCall & { startedAt: number } = {
|
|
197
|
+
toolName,
|
|
198
|
+
toolCallId,
|
|
199
|
+
category,
|
|
200
|
+
confirmReferences: inputRecord ? stringValue(inputRecord.confirmReferences) : undefined,
|
|
201
|
+
turnIndex: turn.turnIndex,
|
|
202
|
+
sequence: callSequence,
|
|
203
|
+
startedAt: Date.now(),
|
|
204
|
+
};
|
|
205
|
+
pendingCalls.set(toolCallId, watched);
|
|
206
|
+
turn.watchedCalls.push(watched);
|
|
207
|
+
agent.watchedCalls.push(watched);
|
|
208
|
+
agent.lastWatchedSequence = callSequence;
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
pi.on("tool_result", async (event) => {
|
|
212
|
+
const toolName = stringValue(event.toolName);
|
|
213
|
+
const toolCallId = stringValue(event.toolCallId);
|
|
214
|
+
if (!toolName || !toolCallId) return;
|
|
215
|
+
const pending = pendingCalls.get(toolCallId);
|
|
216
|
+
pendingCalls.delete(toolCallId);
|
|
217
|
+
if (!pending) return;
|
|
218
|
+
|
|
219
|
+
const result: WatchedToolResult = {
|
|
220
|
+
...pending,
|
|
221
|
+
ok: resultOk(event.details, event.isError === true),
|
|
222
|
+
isError: event.isError === true,
|
|
223
|
+
truncated: resultTruncated(event.details),
|
|
224
|
+
errorKind: resultErrorKind(event.details, event.isError === true),
|
|
225
|
+
durationMs: Date.now() - pending.startedAt,
|
|
226
|
+
};
|
|
227
|
+
ensureTurn().watchedResults.push(result);
|
|
228
|
+
ensureAgent().watchedResults.push(result);
|
|
229
|
+
});
|
|
230
|
+
|
|
231
|
+
pi.on("turn_end", async (event, ctx) => {
|
|
232
|
+
const config = getConfig(ctx);
|
|
233
|
+
const turn = currentTurn;
|
|
234
|
+
if (!turn || turn.watchedCalls.length === 0) return;
|
|
235
|
+
const summary = makeTurnSummary(turn, ctx);
|
|
236
|
+
ensureAgent().turnSummaries.push(summary);
|
|
237
|
+
if (modeIncludesPassive(config.mode)) {
|
|
238
|
+
if (config.appendSessionEntries) pi.appendEntry("tool-feedback:turn-summary", summary);
|
|
239
|
+
appendLog(config, summary.sessionId, summary as unknown as Record<string, unknown>);
|
|
240
|
+
}
|
|
241
|
+
if (event.turnIndex === turn.turnIndex) currentTurn = undefined;
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
pi.on("agent_end", async (_event, ctx) => {
|
|
245
|
+
const config = getConfig(ctx);
|
|
246
|
+
const agent = currentAgent;
|
|
247
|
+
if (!agent || agent.watchedCalls.length === 0) return;
|
|
248
|
+
if (!modeIncludesAsk(config.mode)) return;
|
|
249
|
+
if (agent.feedbackRecorded) return;
|
|
250
|
+
if (config.skipWhenPendingMessages && ctx.hasPendingMessages()) return;
|
|
251
|
+
if (activeTurnIndex - lastPromptedTurn <= config.cooldownTurns) return;
|
|
252
|
+
lastPromptedTurn = activeTurnIndex;
|
|
253
|
+
pi.sendMessage(
|
|
254
|
+
{
|
|
255
|
+
customType: MESSAGE_TYPE_TOOL_FEEDBACK_REQUEST,
|
|
256
|
+
content: feedbackPrompt(config, agent),
|
|
257
|
+
display: true,
|
|
258
|
+
details: feedbackRequestDetails(agent),
|
|
259
|
+
},
|
|
260
|
+
{ triggerTurn: true },
|
|
261
|
+
);
|
|
262
|
+
});
|
|
263
|
+
|
|
264
|
+
pi.on("session_shutdown", async () => {
|
|
265
|
+
pendingCalls.clear();
|
|
266
|
+
currentTurn = undefined;
|
|
267
|
+
currentAgent = undefined;
|
|
268
|
+
});
|
|
269
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@badliveware/pi-tool-feedback",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Generic watched-tool feedback prompts and passive summaries for Pi.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"keywords": [
|
|
7
|
+
"pi-package",
|
|
8
|
+
"pi-extension",
|
|
9
|
+
"feedback",
|
|
10
|
+
"telemetry"
|
|
11
|
+
],
|
|
12
|
+
"license": "MIT",
|
|
13
|
+
"author": "BadLiveware",
|
|
14
|
+
"repository": {
|
|
15
|
+
"type": "git",
|
|
16
|
+
"url": "git+https://github.com/BadLiveware/pi.git",
|
|
17
|
+
"directory": "agent/extensions/public/tool-feedback"
|
|
18
|
+
},
|
|
19
|
+
"bugs": {
|
|
20
|
+
"url": "https://github.com/BadLiveware/pi/issues"
|
|
21
|
+
},
|
|
22
|
+
"homepage": "https://github.com/BadLiveware/pi/tree/main/agent/extensions/public/tool-feedback#readme",
|
|
23
|
+
"publishConfig": {
|
|
24
|
+
"access": "public"
|
|
25
|
+
},
|
|
26
|
+
"files": [
|
|
27
|
+
"README.md",
|
|
28
|
+
"LICENSE",
|
|
29
|
+
"index.ts",
|
|
30
|
+
"src",
|
|
31
|
+
"package.json"
|
|
32
|
+
],
|
|
33
|
+
"pi": {
|
|
34
|
+
"extensions": [
|
|
35
|
+
"./index.ts"
|
|
36
|
+
]
|
|
37
|
+
},
|
|
38
|
+
"peerDependencies": {
|
|
39
|
+
"@mariozechner/pi-ai": "*",
|
|
40
|
+
"@mariozechner/pi-coding-agent": "*"
|
|
41
|
+
},
|
|
42
|
+
"engines": {
|
|
43
|
+
"node": ">=20"
|
|
44
|
+
}
|
|
45
|
+
}
|
package/src/core.ts
ADDED
|
@@ -0,0 +1,511 @@
|
|
|
1
|
+
import * as crypto from "node:crypto";
|
|
2
|
+
import * as fs from "node:fs";
|
|
3
|
+
import * as os from "node:os";
|
|
4
|
+
import * as path from "node:path";
|
|
5
|
+
import type { ExtensionContext } from "@mariozechner/pi-coding-agent";
|
|
6
|
+
|
|
7
|
+
export type FeedbackMode = "off" | "passive" | "ask-agent" | "both";
|
|
8
|
+
export type PerceivedUsefulness = "high" | "medium" | "low" | "none" | "unknown";
|
|
9
|
+
export type YesNoUnknown = "yes" | "no" | "unknown";
|
|
10
|
+
export type WouldUseAgain = "yes" | "no" | "unsure" | "unknown";
|
|
11
|
+
export type FeedbackConfidence = "high" | "medium" | "low";
|
|
12
|
+
export type FeedbackImprovement = "better_ranking" | "higher_cap" | "better_summary" | "better_docs" | "less_noise" | "faster" | "other";
|
|
13
|
+
export type FeedbackFieldType = "enum" | "yes_no_unknown" | "boolean" | "number";
|
|
14
|
+
|
|
15
|
+
export interface WatchRule {
|
|
16
|
+
name?: string;
|
|
17
|
+
prefix?: string;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface FeedbackFieldConfig {
|
|
21
|
+
name: string;
|
|
22
|
+
type: FeedbackFieldType;
|
|
23
|
+
description?: string;
|
|
24
|
+
values?: string[];
|
|
25
|
+
required: boolean;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export type FeedbackFieldValue = string | number | boolean;
|
|
29
|
+
|
|
30
|
+
export interface FeedbackFieldError {
|
|
31
|
+
name: string;
|
|
32
|
+
reason: string;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export interface ToolFeedbackConfig {
|
|
36
|
+
mode: FeedbackMode;
|
|
37
|
+
watch: WatchRule[];
|
|
38
|
+
excludeTools: string[];
|
|
39
|
+
cooldownTurns: number;
|
|
40
|
+
skipWhenPendingMessages: boolean;
|
|
41
|
+
appendSessionEntries: boolean;
|
|
42
|
+
log: boolean;
|
|
43
|
+
taskPrompt: string;
|
|
44
|
+
feedbackFields: FeedbackFieldConfig[];
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export interface LoadedConfig {
|
|
48
|
+
config: ToolFeedbackConfig;
|
|
49
|
+
paths: string[];
|
|
50
|
+
diagnostics: string[];
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export interface WatchedToolCall {
|
|
54
|
+
toolName: string;
|
|
55
|
+
toolCallId: string;
|
|
56
|
+
category: string;
|
|
57
|
+
confirmReferences?: string;
|
|
58
|
+
turnIndex: number;
|
|
59
|
+
sequence: number;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export interface WatchedToolResult extends WatchedToolCall {
|
|
63
|
+
ok: boolean;
|
|
64
|
+
isError: boolean;
|
|
65
|
+
truncated: boolean;
|
|
66
|
+
errorKind?: string;
|
|
67
|
+
durationMs?: number;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export interface TurnUsage {
|
|
71
|
+
turnIndex: number;
|
|
72
|
+
startedAt: number;
|
|
73
|
+
toolCalls: Array<{ toolName: string; category: string; sequence: number }>;
|
|
74
|
+
watchedCalls: WatchedToolCall[];
|
|
75
|
+
watchedResults: WatchedToolResult[];
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export interface AgentUsage {
|
|
79
|
+
startedAt: number;
|
|
80
|
+
watchedCalls: WatchedToolCall[];
|
|
81
|
+
watchedResults: WatchedToolResult[];
|
|
82
|
+
feedbackRecorded: boolean;
|
|
83
|
+
lastWatchedSequence?: number;
|
|
84
|
+
afterWatchedCategories: string[];
|
|
85
|
+
turnSummaries: TurnSummary[];
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export interface TurnSummary {
|
|
89
|
+
version: 1;
|
|
90
|
+
kind: "turn_summary";
|
|
91
|
+
timestamp: string;
|
|
92
|
+
sessionId: string;
|
|
93
|
+
repoRoot: string;
|
|
94
|
+
turnIndex: number;
|
|
95
|
+
watchedTools: string[];
|
|
96
|
+
watchedCallCount: number;
|
|
97
|
+
watchedResultCount: number;
|
|
98
|
+
anyTruncated: boolean;
|
|
99
|
+
anyError: boolean;
|
|
100
|
+
confirmReferences: string[];
|
|
101
|
+
toolCategories: string[];
|
|
102
|
+
categoriesAfterFirstWatchedCall: string[];
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
export interface FeedbackRecord {
|
|
106
|
+
version: 1;
|
|
107
|
+
kind: "agent_feedback";
|
|
108
|
+
timestamp: string;
|
|
109
|
+
sessionId: string;
|
|
110
|
+
repoRoot: string;
|
|
111
|
+
watchedTools: string[];
|
|
112
|
+
perceivedUsefulness: PerceivedUsefulness;
|
|
113
|
+
wouldUseAgainSameSituation: WouldUseAgain;
|
|
114
|
+
followupWasRoutine?: YesNoUnknown;
|
|
115
|
+
followupNeededBecauseToolWasInsufficient?: YesNoUnknown;
|
|
116
|
+
outputSeemedTooNoisy?: YesNoUnknown;
|
|
117
|
+
outputSeemedIncomplete?: YesNoUnknown;
|
|
118
|
+
missedImportantContext?: YesNoUnknown;
|
|
119
|
+
confidence: FeedbackConfidence;
|
|
120
|
+
improvement?: FeedbackImprovement;
|
|
121
|
+
fieldResponses?: Record<string, FeedbackFieldValue>;
|
|
122
|
+
fieldResponseErrors?: FeedbackFieldError[];
|
|
123
|
+
note?: string;
|
|
124
|
+
noteLength?: number;
|
|
125
|
+
noteHash?: string;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
const CONFIG_FILE_NAME = "tool-feedback.json";
|
|
129
|
+
const DEFAULT_TASK_PROMPT = [
|
|
130
|
+
"You used watched tools in the previous prompt. Please call `tool_feedback` once with concise structured feedback.",
|
|
131
|
+
"Focus on your own experience using the tool: whether it seemed useful, whether it felt incomplete or noisy, whether follow-up work was routine or compensatory, whether you would use it again in the same situation, and what one improvement would help most.",
|
|
132
|
+
"This is a dogfood feedback request, not new implementation work.",
|
|
133
|
+
].join("\n\n");
|
|
134
|
+
|
|
135
|
+
const BASE_FIELD_PROMPT = [
|
|
136
|
+
"Base `tool_feedback` field values:",
|
|
137
|
+
"- perceivedUsefulness: `high`, `medium`, `low`, `none`, or `unknown`",
|
|
138
|
+
"- wouldUseAgainSameSituation: `yes`, `no`, `unsure`, or `unknown`",
|
|
139
|
+
"- followupWasRoutine, followupNeededBecauseToolWasInsufficient, outputSeemedTooNoisy, outputSeemedIncomplete, missedImportantContext: `yes`, `no`, or `unknown`",
|
|
140
|
+
"- confidence: `high`, `medium`, or `low`",
|
|
141
|
+
"- improvement (optional): `better_ranking`, `higher_cap`, `better_summary`, `better_docs`, `less_noise`, `faster`, or `other`",
|
|
142
|
+
"Use `fieldResponses` only for configured extra fields. You do not need to inspect extension source to answer this prompt.",
|
|
143
|
+
].join("\n");
|
|
144
|
+
|
|
145
|
+
export const DEFAULT_CONFIG: ToolFeedbackConfig = {
|
|
146
|
+
mode: "passive",
|
|
147
|
+
watch: [],
|
|
148
|
+
excludeTools: ["tool_feedback", "tool_feedback_state"],
|
|
149
|
+
cooldownTurns: 0,
|
|
150
|
+
skipWhenPendingMessages: true,
|
|
151
|
+
appendSessionEntries: true,
|
|
152
|
+
log: true,
|
|
153
|
+
taskPrompt: DEFAULT_TASK_PROMPT,
|
|
154
|
+
feedbackFields: [],
|
|
155
|
+
};
|
|
156
|
+
|
|
157
|
+
export function isRecord(value: unknown): value is Record<string, unknown> {
|
|
158
|
+
return typeof value === "object" && value !== null;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
export function stringValue(value: unknown): string | undefined {
|
|
162
|
+
return typeof value === "string" ? value : undefined;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
function booleanValue(value: unknown): boolean | undefined {
|
|
166
|
+
return typeof value === "boolean" ? value : undefined;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
function numberValue(value: unknown): number | undefined {
|
|
170
|
+
return typeof value === "number" && Number.isFinite(value) ? value : undefined;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function normalizeMode(value: unknown, fallback: FeedbackMode): FeedbackMode {
|
|
174
|
+
return value === "off" || value === "passive" || value === "ask-agent" || value === "both" ? value : fallback;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
export function normalizeStringArray(value: unknown): string[] {
|
|
178
|
+
return Array.isArray(value) ? value.filter((item): item is string => typeof item === "string" && item.trim().length > 0).map((item) => item.trim()) : [];
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
function normalizeWatchRules(value: unknown): WatchRule[] {
|
|
182
|
+
if (!Array.isArray(value)) return [];
|
|
183
|
+
const rules: WatchRule[] = [];
|
|
184
|
+
for (const item of value) {
|
|
185
|
+
if (!isRecord(item)) continue;
|
|
186
|
+
const name = stringValue(item.name)?.trim();
|
|
187
|
+
const prefix = stringValue(item.prefix)?.trim();
|
|
188
|
+
if (!name && !prefix) continue;
|
|
189
|
+
rules.push({ name, prefix });
|
|
190
|
+
}
|
|
191
|
+
return rules;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
function isFeedbackFieldType(value: unknown): value is FeedbackFieldType {
|
|
195
|
+
return value === "enum" || value === "yes_no_unknown" || value === "boolean" || value === "number";
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
function normalizeFieldName(value: unknown): string | undefined {
|
|
199
|
+
const name = stringValue(value)?.trim();
|
|
200
|
+
if (!name || name.length > 64) return undefined;
|
|
201
|
+
return /^[a-zA-Z][a-zA-Z0-9_]*$/.test(name) ? name : undefined;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
function normalizeFeedbackFields(value: unknown, source: string, diagnostics: string[]): FeedbackFieldConfig[] {
|
|
205
|
+
if (!Array.isArray(value)) return [];
|
|
206
|
+
const fields: FeedbackFieldConfig[] = [];
|
|
207
|
+
const seen = new Set<string>();
|
|
208
|
+
for (const item of value) {
|
|
209
|
+
if (!isRecord(item)) {
|
|
210
|
+
diagnostics.push(`${source}: feedback field ignored because it is not an object`);
|
|
211
|
+
continue;
|
|
212
|
+
}
|
|
213
|
+
const name = normalizeFieldName(item.name);
|
|
214
|
+
if (!name) {
|
|
215
|
+
diagnostics.push(`${source}: feedback field ignored because name must match /^[a-zA-Z][a-zA-Z0-9_]*$/ and be <=64 chars`);
|
|
216
|
+
continue;
|
|
217
|
+
}
|
|
218
|
+
if (seen.has(name)) {
|
|
219
|
+
diagnostics.push(`${source}: duplicate feedback field "${name}" ignored`);
|
|
220
|
+
continue;
|
|
221
|
+
}
|
|
222
|
+
const type = isFeedbackFieldType(item.type) ? item.type : undefined;
|
|
223
|
+
if (!type) {
|
|
224
|
+
diagnostics.push(`${source}: feedback field "${name}" ignored because type is invalid`);
|
|
225
|
+
continue;
|
|
226
|
+
}
|
|
227
|
+
const values = type === "enum" ? normalizeStringArray(item.values).slice(0, 20) : undefined;
|
|
228
|
+
if (type === "enum" && (!values || values.length === 0)) {
|
|
229
|
+
diagnostics.push(`${source}: enum feedback field "${name}" ignored because values are missing`);
|
|
230
|
+
continue;
|
|
231
|
+
}
|
|
232
|
+
seen.add(name);
|
|
233
|
+
fields.push({
|
|
234
|
+
name,
|
|
235
|
+
type,
|
|
236
|
+
description: stringValue(item.description)?.trim().slice(0, 200) || undefined,
|
|
237
|
+
values,
|
|
238
|
+
required: booleanValue(item.required) ?? false,
|
|
239
|
+
});
|
|
240
|
+
}
|
|
241
|
+
return fields;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
function agentDir(): string {
|
|
245
|
+
return process.env.PI_CODING_AGENT_DIR ?? process.env.PI_AGENT_DIR ?? path.join(os.homedir(), ".pi", "agent");
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
function configPaths(ctx: ExtensionContext): string[] {
|
|
249
|
+
const paths: string[] = [];
|
|
250
|
+
if (process.env.PI_TOOL_FEEDBACK_CONFIG) paths.push(process.env.PI_TOOL_FEEDBACK_CONFIG);
|
|
251
|
+
paths.push(path.join(agentDir(), CONFIG_FILE_NAME));
|
|
252
|
+
paths.push(path.join(ctx.cwd, ".pi", CONFIG_FILE_NAME));
|
|
253
|
+
return [...new Set(paths)];
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
function normalizeConfigPatch(input: unknown, base: ToolFeedbackConfig, source: string, diagnostics: string[]): ToolFeedbackConfig {
|
|
257
|
+
if (!isRecord(input)) {
|
|
258
|
+
diagnostics.push(`${source}: expected a JSON object`);
|
|
259
|
+
return base;
|
|
260
|
+
}
|
|
261
|
+
const next: ToolFeedbackConfig = { ...base };
|
|
262
|
+
next.mode = normalizeMode(input.mode, base.mode);
|
|
263
|
+
if ("watch" in input) next.watch = normalizeWatchRules(input.watch);
|
|
264
|
+
if ("excludeTools" in input) next.excludeTools = normalizeStringArray(input.excludeTools);
|
|
265
|
+
next.cooldownTurns = Math.max(0, Math.min(100, Math.floor(numberValue(input.cooldownTurns) ?? base.cooldownTurns)));
|
|
266
|
+
next.skipWhenPendingMessages = booleanValue(input.skipWhenPendingMessages) ?? base.skipWhenPendingMessages;
|
|
267
|
+
next.appendSessionEntries = booleanValue(input.appendSessionEntries) ?? base.appendSessionEntries;
|
|
268
|
+
next.log = booleanValue(input.log) ?? base.log;
|
|
269
|
+
next.taskPrompt = stringValue(input.taskPrompt)?.trim() || base.taskPrompt;
|
|
270
|
+
if ("feedbackFields" in input) next.feedbackFields = normalizeFeedbackFields(input.feedbackFields, source, diagnostics);
|
|
271
|
+
if (isRecord(input.feedback) && "fields" in input.feedback) next.feedbackFields = normalizeFeedbackFields(input.feedback.fields, source, diagnostics);
|
|
272
|
+
return next;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
export function loadToolFeedbackConfig(ctx: ExtensionContext): LoadedConfig {
|
|
276
|
+
let config = { ...DEFAULT_CONFIG };
|
|
277
|
+
const loaded: string[] = [];
|
|
278
|
+
const diagnostics: string[] = [];
|
|
279
|
+
for (const configPath of configPaths(ctx)) {
|
|
280
|
+
if (!fs.existsSync(configPath)) continue;
|
|
281
|
+
try {
|
|
282
|
+
const parsed = JSON.parse(fs.readFileSync(configPath, "utf-8")) as unknown;
|
|
283
|
+
config = normalizeConfigPatch(parsed, config, configPath, diagnostics);
|
|
284
|
+
loaded.push(configPath);
|
|
285
|
+
} catch (error) {
|
|
286
|
+
diagnostics.push(`${configPath}: ${error instanceof Error ? error.message : String(error)}`);
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
return { config, paths: loaded, diagnostics };
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
export function sessionIdFromContext(ctx: ExtensionContext): string {
|
|
293
|
+
const manager = ctx.sessionManager as unknown as { getSessionId?: () => string } | undefined;
|
|
294
|
+
try {
|
|
295
|
+
const sessionId = manager?.getSessionId?.();
|
|
296
|
+
if (sessionId) return sessionId;
|
|
297
|
+
} catch {
|
|
298
|
+
// Fall through.
|
|
299
|
+
}
|
|
300
|
+
return `process:${process.pid}:${ctx.cwd}`;
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
function feedbackLogDir(): string {
|
|
304
|
+
return process.env.PI_TOOL_FEEDBACK_DIR ?? path.join(process.env.XDG_CACHE_HOME ?? path.join(os.homedir(), ".cache"), "pi-tool-feedback");
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
function safeSessionPathSegment(sessionId: string): string {
|
|
308
|
+
return sessionId.replace(/[^a-zA-Z0-9._-]+/g, "_").slice(0, 160) || "unknown";
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
export function feedbackLogPath(sessionId = "unknown"): string {
|
|
312
|
+
return process.env.PI_TOOL_FEEDBACK_LOG ?? path.join(feedbackLogDir(), `${safeSessionPathSegment(sessionId)}.jsonl`);
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
export function nowIso(): string {
|
|
316
|
+
return new Date().toISOString();
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
function shortHash(value: string): string {
|
|
320
|
+
return crypto.createHash("sha256").update(value).digest("hex").slice(0, 16);
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
export function unique(items: Array<string | undefined>): string[] {
|
|
324
|
+
return [...new Set(items.filter((item): item is string => typeof item === "string" && item.length > 0))];
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
export function categoryForTool(toolName: string, input: unknown): string {
|
|
328
|
+
if (toolName === "read") return "read";
|
|
329
|
+
if (toolName === "edit" || toolName === "write") return "edit";
|
|
330
|
+
if (toolName === "bash") {
|
|
331
|
+
const command = isRecord(input) ? stringValue(input.command)?.trim() : undefined;
|
|
332
|
+
if (!command) return "bash:unknown";
|
|
333
|
+
if (/\b(rg|grep|fd|find|ag)\b/.test(command)) return "bash:search";
|
|
334
|
+
if (/\b(npm|pnpm|yarn|bun)\s+(test|run\s+test|run\s+typecheck)|\b(pytest|go\s+test|cargo\s+test|dotnet\s+test)\b/.test(command)) return "bash:test";
|
|
335
|
+
if (/\b(git\s+(diff|status|show|log|grep)|gh\s+)/.test(command)) return "bash:vcs";
|
|
336
|
+
return "bash:other";
|
|
337
|
+
}
|
|
338
|
+
return toolName.startsWith("code_intel_") ? "code-intel" : "other";
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
export function matchesWatch(toolName: string, config: ToolFeedbackConfig): boolean {
|
|
342
|
+
if (config.excludeTools.includes(toolName)) return false;
|
|
343
|
+
return config.watch.some((rule) => rule.name === toolName || (rule.prefix !== undefined && toolName.startsWith(rule.prefix)));
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
export function resultOk(details: unknown, isError: boolean): boolean {
|
|
347
|
+
if (isError) return false;
|
|
348
|
+
if (isRecord(details) && details.ok === false) return false;
|
|
349
|
+
return true;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
export function resultTruncated(details: unknown): boolean {
|
|
353
|
+
if (!isRecord(details)) return false;
|
|
354
|
+
if (details.truncated === true) return true;
|
|
355
|
+
return isRecord(details.coverage) && details.coverage.truncated === true;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
export function resultErrorKind(details: unknown, isError: boolean): string | undefined {
|
|
359
|
+
if (isError) return "tool-error";
|
|
360
|
+
if (!isRecord(details)) return undefined;
|
|
361
|
+
if (typeof details.reason === "string") return "reason";
|
|
362
|
+
if (typeof details.diagnostic === "string") return "diagnostic";
|
|
363
|
+
return undefined;
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
export function appendLog(config: ToolFeedbackConfig, sessionId: string, event: Record<string, unknown>): void {
|
|
367
|
+
if (!config.log) return;
|
|
368
|
+
try {
|
|
369
|
+
const logPath = feedbackLogPath(sessionId);
|
|
370
|
+
fs.mkdirSync(path.dirname(logPath), { recursive: true });
|
|
371
|
+
fs.appendFileSync(logPath, `${JSON.stringify(event)}\n`);
|
|
372
|
+
} catch {
|
|
373
|
+
// Feedback logging must never affect tool execution.
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
export function makeTurnSummary(turn: TurnUsage, ctx: ExtensionContext): TurnSummary {
|
|
378
|
+
const firstWatchedSequence = Math.min(...turn.watchedCalls.map((call) => call.sequence));
|
|
379
|
+
return {
|
|
380
|
+
version: 1,
|
|
381
|
+
kind: "turn_summary",
|
|
382
|
+
timestamp: nowIso(),
|
|
383
|
+
sessionId: sessionIdFromContext(ctx),
|
|
384
|
+
repoRoot: ctx.cwd,
|
|
385
|
+
turnIndex: turn.turnIndex,
|
|
386
|
+
watchedTools: unique(turn.watchedCalls.map((call) => call.toolName)),
|
|
387
|
+
watchedCallCount: turn.watchedCalls.length,
|
|
388
|
+
watchedResultCount: turn.watchedResults.length,
|
|
389
|
+
anyTruncated: turn.watchedResults.some((result) => result.truncated),
|
|
390
|
+
anyError: turn.watchedResults.some((result) => !result.ok || result.isError),
|
|
391
|
+
confirmReferences: unique(turn.watchedCalls.map((call) => call.confirmReferences)),
|
|
392
|
+
toolCategories: unique(turn.toolCalls.map((call) => call.category)),
|
|
393
|
+
categoriesAfterFirstWatchedCall: unique(turn.toolCalls.filter((call) => call.sequence > firstWatchedSequence).map((call) => call.category)),
|
|
394
|
+
};
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
export function modeIncludesPassive(mode: FeedbackMode): boolean {
|
|
398
|
+
return mode === "passive" || mode === "both";
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
export function modeIncludesAsk(mode: FeedbackMode): boolean {
|
|
402
|
+
return mode === "ask-agent" || mode === "both";
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
function configuredFieldsPrompt(fields: FeedbackFieldConfig[]): string {
|
|
406
|
+
if (fields.length === 0) return "";
|
|
407
|
+
const lines = ["Configured extra feedback fields: answer these in `fieldResponses` using the exact field names and allowed values below."];
|
|
408
|
+
for (const field of fields) {
|
|
409
|
+
const required = field.required ? "required" : "optional";
|
|
410
|
+
const allowed = field.type === "enum" ? (field.values ?? []).join(" | ") : field.type === "yes_no_unknown" ? "yes | no | unknown" : field.type;
|
|
411
|
+
lines.push(`- ${field.name} (${required}, ${field.type}${field.description ? `, ${field.description}` : ""}): ${allowed}`);
|
|
412
|
+
}
|
|
413
|
+
return `\n\n${lines.join("\n")}`;
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
export function feedbackPrompt(config: ToolFeedbackConfig, usage: AgentUsage): string {
|
|
417
|
+
const watchedTools = unique(usage.watchedCalls.map((call) => call.toolName)).join(", ");
|
|
418
|
+
return `${config.taskPrompt}\n\nWatched tools used: ${watchedTools || "unknown"}.\n\n${BASE_FIELD_PROMPT}${configuredFieldsPrompt(config.feedbackFields)}`;
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
function perceivedUsefulness(value: unknown): PerceivedUsefulness {
|
|
422
|
+
return value === "high" || value === "medium" || value === "low" || value === "none" || value === "unknown" ? value : "unknown";
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
function yesNoUnknown(value: unknown): YesNoUnknown | undefined {
|
|
426
|
+
return value === "yes" || value === "no" || value === "unknown" ? value : undefined;
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
function wouldUseAgain(value: unknown): WouldUseAgain {
|
|
430
|
+
return value === "yes" || value === "no" || value === "unsure" || value === "unknown" ? value : "unknown";
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
function confidence(value: unknown): FeedbackConfidence {
|
|
434
|
+
return value === "high" || value === "medium" || value === "low" ? value : "low";
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
function improvement(value: unknown): FeedbackImprovement | undefined {
|
|
438
|
+
return value === "better_ranking" || value === "higher_cap" || value === "better_summary" || value === "better_docs" || value === "less_noise" || value === "faster" || value === "other" ? value : undefined;
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
function validateFieldValue(field: FeedbackFieldConfig, value: unknown): { value?: FeedbackFieldValue; error?: string } {
|
|
442
|
+
if (value === undefined) return field.required ? { error: "required field missing" } : {};
|
|
443
|
+
if (field.type === "enum") {
|
|
444
|
+
if (typeof value === "string" && (field.values ?? []).includes(value)) return { value };
|
|
445
|
+
return { error: `expected one of ${(field.values ?? []).join(" | ")}` };
|
|
446
|
+
}
|
|
447
|
+
if (field.type === "yes_no_unknown") {
|
|
448
|
+
if (value === "yes" || value === "no" || value === "unknown") return { value };
|
|
449
|
+
return { error: "expected yes | no | unknown" };
|
|
450
|
+
}
|
|
451
|
+
if (field.type === "boolean") {
|
|
452
|
+
if (typeof value === "boolean") return { value };
|
|
453
|
+
return { error: "expected boolean" };
|
|
454
|
+
}
|
|
455
|
+
if (field.type === "number") {
|
|
456
|
+
if (typeof value === "number" && Number.isFinite(value)) return { value };
|
|
457
|
+
return { error: "expected finite number" };
|
|
458
|
+
}
|
|
459
|
+
return { error: "unsupported field type" };
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
function validateFieldResponses(input: unknown, fields: FeedbackFieldConfig[]): { responses?: Record<string, FeedbackFieldValue>; errors?: FeedbackFieldError[] } {
|
|
463
|
+
if (fields.length === 0) return {};
|
|
464
|
+
const raw = isRecord(input) ? input : {};
|
|
465
|
+
const responses: Record<string, FeedbackFieldValue> = {};
|
|
466
|
+
const errors: FeedbackFieldError[] = [];
|
|
467
|
+
for (const field of fields) {
|
|
468
|
+
const result = validateFieldValue(field, raw[field.name]);
|
|
469
|
+
if (result.value !== undefined) responses[field.name] = result.value;
|
|
470
|
+
if (result.error) errors.push({ name: field.name, reason: result.error });
|
|
471
|
+
}
|
|
472
|
+
for (const name of Object.keys(raw)) {
|
|
473
|
+
if (!fields.some((field) => field.name === name)) errors.push({ name, reason: "unknown configured field" });
|
|
474
|
+
}
|
|
475
|
+
return {
|
|
476
|
+
responses: Object.keys(responses).length > 0 ? responses : undefined,
|
|
477
|
+
errors: errors.length > 0 ? errors : undefined,
|
|
478
|
+
};
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
export function feedbackRecord(input: Record<string, unknown>, ctx: ExtensionContext, config?: ToolFeedbackConfig): FeedbackRecord {
|
|
482
|
+
const note = stringValue(input.note)?.trim();
|
|
483
|
+
const fields = validateFieldResponses(input.fieldResponses, config?.feedbackFields ?? []);
|
|
484
|
+
return {
|
|
485
|
+
version: 1,
|
|
486
|
+
kind: "agent_feedback",
|
|
487
|
+
timestamp: nowIso(),
|
|
488
|
+
sessionId: sessionIdFromContext(ctx),
|
|
489
|
+
repoRoot: ctx.cwd,
|
|
490
|
+
watchedTools: normalizeStringArray(input.watchedTools),
|
|
491
|
+
perceivedUsefulness: perceivedUsefulness(input.perceivedUsefulness),
|
|
492
|
+
wouldUseAgainSameSituation: wouldUseAgain(input.wouldUseAgainSameSituation),
|
|
493
|
+
followupWasRoutine: yesNoUnknown(input.followupWasRoutine),
|
|
494
|
+
followupNeededBecauseToolWasInsufficient: yesNoUnknown(input.followupNeededBecauseToolWasInsufficient),
|
|
495
|
+
outputSeemedTooNoisy: yesNoUnknown(input.outputSeemedTooNoisy),
|
|
496
|
+
outputSeemedIncomplete: yesNoUnknown(input.outputSeemedIncomplete),
|
|
497
|
+
missedImportantContext: yesNoUnknown(input.missedImportantContext),
|
|
498
|
+
confidence: confidence(input.confidence),
|
|
499
|
+
improvement: improvement(input.improvement),
|
|
500
|
+
fieldResponses: fields.responses,
|
|
501
|
+
fieldResponseErrors: fields.errors,
|
|
502
|
+
note,
|
|
503
|
+
noteLength: note ? note.length : undefined,
|
|
504
|
+
noteHash: note ? shortHash(note) : undefined,
|
|
505
|
+
};
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
export function logSafeFeedbackRecord(record: FeedbackRecord): Record<string, unknown> {
|
|
509
|
+
const { note: _note, ...safe } = record;
|
|
510
|
+
return safe;
|
|
511
|
+
}
|