@badliveware/pi-tool-feedback 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/README.md +9 -2
- package/index.ts +8 -3
- package/package.json +3 -3
- package/src/core.ts +13 -1
- package/src/per-tool.ts +52 -0
- package/src/prompts.ts +2 -2
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.2.0
|
|
4
|
+
|
|
5
|
+
- Add per-tool feedback responses so agents can report different experiences for different watched tools in the same turn.
|
|
6
|
+
- Record follow-up tool categories after watched calls so feedback logs can distinguish routine verification from compensatory follow-up work.
|
|
7
|
+
- Update package imports and peer dependencies for the `@earendil-works/*` Pi packages.
|
|
8
|
+
|
|
3
9
|
## 0.1.1
|
|
4
10
|
|
|
5
11
|
- Clarified dogfood feedback prompts so agents call `tool_feedback` silently and avoid acknowledging the feedback request to the user.
|
package/README.md
CHANGED
|
@@ -86,7 +86,7 @@ Other options:
|
|
|
86
86
|
|
|
87
87
|
### Extra feedback fields
|
|
88
88
|
|
|
89
|
-
The built-in feedback schema stays stable, but you can add project- or user-specific fields. The active prompt lists these fields and agents answer them inside `fieldResponses`.
|
|
89
|
+
The built-in feedback schema stays stable, but you can add project- or user-specific fields. The active prompt lists these fields and agents answer them inside `fieldResponses`. Set `required: true` for fields you want every feedback prompt to answer; invalid or missing required answers are logged in `fieldResponseErrors`.
|
|
90
90
|
|
|
91
91
|
```json
|
|
92
92
|
{
|
|
@@ -135,11 +135,12 @@ Read-only state/config inspection. Use it to see the loaded mode, watch rules, c
|
|
|
135
135
|
|
|
136
136
|
### `tool_feedback`
|
|
137
137
|
|
|
138
|
-
Records one structured feedback entry. Typical agent response after a feedback prompt:
|
|
138
|
+
Records one structured feedback entry. When several watched tools were used and the experience differed by tool, agents can set `primaryWatchedTool` and optional `perToolResponses` keyed by tool name. Typical agent response after a feedback prompt:
|
|
139
139
|
|
|
140
140
|
```json
|
|
141
141
|
{
|
|
142
142
|
"watchedTools": ["code_intel_impact_map"],
|
|
143
|
+
"primaryWatchedTool": "code_intel_impact_map",
|
|
143
144
|
"perceivedUsefulness": "medium",
|
|
144
145
|
"wouldUseAgainSameSituation": "yes",
|
|
145
146
|
"followupWasRoutine": "yes",
|
|
@@ -151,6 +152,12 @@ Records one structured feedback entry. Typical agent response after a feedback p
|
|
|
151
152
|
"improvement": "better_summary",
|
|
152
153
|
"fieldResponses": {
|
|
153
154
|
"rankingQuality": "mixed"
|
|
155
|
+
},
|
|
156
|
+
"perToolResponses": {
|
|
157
|
+
"code_intel_impact_map": {
|
|
158
|
+
"outputSeemedTooNoisy": "yes",
|
|
159
|
+
"fieldResponses": { "rankingQuality": "mixed" }
|
|
160
|
+
}
|
|
154
161
|
}
|
|
155
162
|
}
|
|
156
163
|
```
|
package/index.ts
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
|
-
import { Type } from "@
|
|
2
|
-
import type { ExtensionAPI, ExtensionContext } from "@
|
|
3
|
-
import { Text } from "@
|
|
1
|
+
import { Type } from "@earendil-works/pi-ai";
|
|
2
|
+
import type { ExtensionAPI, ExtensionContext } from "@earendil-works/pi-coding-agent";
|
|
3
|
+
import { Text } from "@earendil-works/pi-tui";
|
|
4
4
|
import {
|
|
5
5
|
appendLog,
|
|
6
6
|
categoryForTool,
|
|
7
7
|
feedbackLogPath,
|
|
8
8
|
feedbackPrompt,
|
|
9
9
|
feedbackRecord,
|
|
10
|
+
invocationIdFor,
|
|
10
11
|
isRecord,
|
|
11
12
|
loadToolFeedbackConfig,
|
|
12
13
|
logSafeFeedbackRecord,
|
|
@@ -17,6 +18,7 @@ import {
|
|
|
17
18
|
resultErrorKind,
|
|
18
19
|
resultOk,
|
|
19
20
|
resultTruncated,
|
|
21
|
+
sessionIdFromContext,
|
|
20
22
|
stringValue,
|
|
21
23
|
unique,
|
|
22
24
|
type AgentUsage,
|
|
@@ -131,6 +133,8 @@ export default function toolFeedback(pi: ExtensionAPI): void {
|
|
|
131
133
|
description: "Record concise structured feedback after using watched tools. This stores feedback only; it does not change the watched tool.",
|
|
132
134
|
parameters: Type.Object({
|
|
133
135
|
watchedTools: Type.Array(Type.String(), { description: "Watched tool names this feedback covers." }),
|
|
136
|
+
primaryWatchedTool: Type.Optional(Type.String({ description: "Primary watched tool this feedback mostly concerns when multiple tools were used." })),
|
|
137
|
+
perToolResponses: Type.Optional(Type.Record(Type.String(), Type.Unknown(), { description: "Optional per-tool structured responses when multiple watched tools differed materially. Keys are watched tool names; values may include base feedback fields and fieldResponses." })),
|
|
134
138
|
perceivedUsefulness: Type.Union([Type.Literal("high"), Type.Literal("medium"), Type.Literal("low"), Type.Literal("none"), Type.Literal("unknown")], { description: "How useful the tool felt for this task. Allowed values are: `high`, `medium`, `low`, `none`, `unknown`." }),
|
|
135
139
|
wouldUseAgainSameSituation: Type.Union([Type.Literal("yes"), Type.Literal("no"), Type.Literal("unsure"), Type.Literal("unknown")], { description: "Whether you would use the same tool again for a similar situation. Allowed values are: `yes`, `no`, `unsure`, `unknown`." }),
|
|
136
140
|
followupWasRoutine: Type.Optional(Type.Union([Type.Literal("yes"), Type.Literal("no"), Type.Literal("unknown")], { description: "Whether follow-up work felt routine rather than caused by tool insufficiency. Allowed values are: `yes`, `no`, `unknown`." })),
|
|
@@ -196,6 +200,7 @@ export default function toolFeedback(pi: ExtensionAPI): void {
|
|
|
196
200
|
const watched: WatchedToolCall & { startedAt: number } = {
|
|
197
201
|
toolName,
|
|
198
202
|
toolCallId,
|
|
203
|
+
invocationId: invocationIdFor(sessionIdFromContext(ctx), toolCallId),
|
|
199
204
|
category,
|
|
200
205
|
confirmReferences: inputRecord ? stringValue(inputRecord.confirmReferences) : undefined,
|
|
201
206
|
turnIndex: turn.turnIndex,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@badliveware/pi-tool-feedback",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "Generic watched-tool feedback prompts and passive summaries for Pi.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"keywords": [
|
|
@@ -37,8 +37,8 @@
|
|
|
37
37
|
]
|
|
38
38
|
},
|
|
39
39
|
"peerDependencies": {
|
|
40
|
-
"@
|
|
41
|
-
"@
|
|
40
|
+
"@earendil-works/pi-ai": "*",
|
|
41
|
+
"@earendil-works/pi-coding-agent": "*"
|
|
42
42
|
},
|
|
43
43
|
"engines": {
|
|
44
44
|
"node": ">=20"
|
package/src/core.ts
CHANGED
|
@@ -2,7 +2,8 @@ import * as crypto from "node:crypto";
|
|
|
2
2
|
import * as fs from "node:fs";
|
|
3
3
|
import * as os from "node:os";
|
|
4
4
|
import * as path from "node:path";
|
|
5
|
-
import type { ExtensionContext } from "@
|
|
5
|
+
import type { ExtensionContext } from "@earendil-works/pi-coding-agent";
|
|
6
|
+
import { sanitizePerToolResponses } from "./per-tool.ts";
|
|
6
7
|
import { BASE_FIELD_PROMPT, DEFAULT_TASK_PROMPT } from "./prompts.ts";
|
|
7
8
|
|
|
8
9
|
export type FeedbackMode = "off" | "passive" | "ask-agent" | "both";
|
|
@@ -54,6 +55,7 @@ export interface LoadedConfig {
|
|
|
54
55
|
export interface WatchedToolCall {
|
|
55
56
|
toolName: string;
|
|
56
57
|
toolCallId: string;
|
|
58
|
+
invocationId: string;
|
|
57
59
|
category: string;
|
|
58
60
|
confirmReferences?: string;
|
|
59
61
|
turnIndex: number;
|
|
@@ -101,6 +103,7 @@ export interface TurnSummary {
|
|
|
101
103
|
confirmReferences: string[];
|
|
102
104
|
toolCategories: string[];
|
|
103
105
|
categoriesAfterFirstWatchedCall: string[];
|
|
106
|
+
watchedResults: Array<Record<string, unknown>>;
|
|
104
107
|
}
|
|
105
108
|
|
|
106
109
|
export interface FeedbackRecord {
|
|
@@ -110,6 +113,8 @@ export interface FeedbackRecord {
|
|
|
110
113
|
sessionId: string;
|
|
111
114
|
repoRoot: string;
|
|
112
115
|
watchedTools: string[];
|
|
116
|
+
primaryWatchedTool?: string;
|
|
117
|
+
perToolResponses?: Record<string, Record<string, unknown>>;
|
|
113
118
|
perceivedUsefulness: PerceivedUsefulness;
|
|
114
119
|
wouldUseAgainSameSituation: WouldUseAgain;
|
|
115
120
|
followupWasRoutine?: YesNoUnknown;
|
|
@@ -306,6 +311,10 @@ function shortHash(value: string): string {
|
|
|
306
311
|
return crypto.createHash("sha256").update(value).digest("hex").slice(0, 16);
|
|
307
312
|
}
|
|
308
313
|
|
|
314
|
+
export function invocationIdFor(sessionId: string, toolCallId: string): string {
|
|
315
|
+
return shortHash(`${sessionId}\0${toolCallId}`);
|
|
316
|
+
}
|
|
317
|
+
|
|
309
318
|
export function unique(items: Array<string | undefined>): string[] {
|
|
310
319
|
return [...new Set(items.filter((item): item is string => typeof item === "string" && item.length > 0))];
|
|
311
320
|
}
|
|
@@ -377,6 +386,7 @@ export function makeTurnSummary(turn: TurnUsage, ctx: ExtensionContext): TurnSum
|
|
|
377
386
|
confirmReferences: unique(turn.watchedCalls.map((call) => call.confirmReferences)),
|
|
378
387
|
toolCategories: unique(turn.toolCalls.map((call) => call.category)),
|
|
379
388
|
categoriesAfterFirstWatchedCall: unique(turn.toolCalls.filter((call) => call.sequence > firstWatchedSequence).map((call) => call.category)),
|
|
389
|
+
watchedResults: turn.watchedResults.map((result) => ({ toolName: result.toolName, invocationId: result.invocationId, ok: result.ok, isError: result.isError, truncated: result.truncated, errorKind: result.errorKind, durationMs: result.durationMs })),
|
|
380
390
|
};
|
|
381
391
|
}
|
|
382
392
|
|
|
@@ -474,6 +484,8 @@ export function feedbackRecord(input: Record<string, unknown>, ctx: ExtensionCon
|
|
|
474
484
|
sessionId: sessionIdFromContext(ctx),
|
|
475
485
|
repoRoot: ctx.cwd,
|
|
476
486
|
watchedTools: normalizeStringArray(input.watchedTools),
|
|
487
|
+
primaryWatchedTool: stringValue(input.primaryWatchedTool),
|
|
488
|
+
perToolResponses: sanitizePerToolResponses(input.perToolResponses),
|
|
477
489
|
perceivedUsefulness: perceivedUsefulness(input.perceivedUsefulness),
|
|
478
490
|
wouldUseAgainSameSituation: wouldUseAgain(input.wouldUseAgainSameSituation),
|
|
479
491
|
followupWasRoutine: yesNoUnknown(input.followupWasRoutine),
|
package/src/per-tool.ts
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
type FeedbackFieldValue = string | number | boolean;
|
|
2
|
+
|
|
3
|
+
const allowedBaseFields = new Set([
|
|
4
|
+
"perceivedUsefulness",
|
|
5
|
+
"wouldUseAgainSameSituation",
|
|
6
|
+
"followupWasRoutine",
|
|
7
|
+
"followupNeededBecauseToolWasInsufficient",
|
|
8
|
+
"outputSeemedTooNoisy",
|
|
9
|
+
"outputSeemedIncomplete",
|
|
10
|
+
"missedImportantContext",
|
|
11
|
+
"confidence",
|
|
12
|
+
"improvement",
|
|
13
|
+
]);
|
|
14
|
+
|
|
15
|
+
function isRecord(value: unknown): value is Record<string, unknown> {
|
|
16
|
+
return typeof value === "object" && value !== null;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function scalar(value: unknown): FeedbackFieldValue | undefined {
|
|
20
|
+
if (typeof value === "string" || typeof value === "boolean") return value;
|
|
21
|
+
if (typeof value === "number" && Number.isFinite(value)) return value;
|
|
22
|
+
return undefined;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function sanitizeFieldResponses(value: unknown): Record<string, FeedbackFieldValue> | undefined {
|
|
26
|
+
if (!isRecord(value)) return undefined;
|
|
27
|
+
const output: Record<string, FeedbackFieldValue> = {};
|
|
28
|
+
for (const [name, raw] of Object.entries(value)) {
|
|
29
|
+
const item = scalar(raw);
|
|
30
|
+
if (item !== undefined) output[name] = item;
|
|
31
|
+
}
|
|
32
|
+
return Object.keys(output).length > 0 ? output : undefined;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export function sanitizePerToolResponses(input: unknown): Record<string, Record<string, unknown>> | undefined {
|
|
36
|
+
if (!isRecord(input)) return undefined;
|
|
37
|
+
const output: Record<string, Record<string, unknown>> = {};
|
|
38
|
+
for (const [toolName, rawResponse] of Object.entries(input)) {
|
|
39
|
+
if (!toolName.trim() || !isRecord(rawResponse)) continue;
|
|
40
|
+
const response: Record<string, unknown> = {};
|
|
41
|
+
for (const [name, raw] of Object.entries(rawResponse)) {
|
|
42
|
+
if (allowedBaseFields.has(name)) {
|
|
43
|
+
const item = scalar(raw);
|
|
44
|
+
if (item !== undefined) response[name] = item;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
const fieldResponses = sanitizeFieldResponses(rawResponse.fieldResponses);
|
|
48
|
+
if (fieldResponses) response.fieldResponses = fieldResponses;
|
|
49
|
+
if (Object.keys(response).length > 0) output[toolName.trim()] = response;
|
|
50
|
+
}
|
|
51
|
+
return Object.keys(output).length > 0 ? output : undefined;
|
|
52
|
+
}
|
package/src/prompts.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
export const DEFAULT_TASK_PROMPT = [
|
|
2
2
|
"You used watched tools in the previous prompt. Please call `tool_feedback` once with concise structured feedback.",
|
|
3
|
-
"Focus on your own experience using the tool: whether it seemed useful, whether it felt incomplete or noisy, whether follow-up work was routine or compensatory, whether you would use it again in the same situation, and what one improvement would help most.",
|
|
3
|
+
"Focus on your own experience using the tool: whether it seemed useful, whether it felt incomplete or noisy, whether follow-up work was routine or compensatory, whether you would use it again in the same situation, and what one improvement would help most. When multiple watched tools differed materially, include `primaryWatchedTool` and optional `perToolResponses` keyed by tool name.",
|
|
4
4
|
"This is a dogfood feedback request, not new implementation work.",
|
|
5
5
|
"Do not acknowledge or summarize this request to the user. Call `tool_feedback` silently and continue or stop if you were done.",
|
|
6
6
|
].join("\n\n");
|
|
@@ -12,5 +12,5 @@ export const BASE_FIELD_PROMPT = [
|
|
|
12
12
|
"- followupWasRoutine, followupNeededBecauseToolWasInsufficient, outputSeemedTooNoisy, outputSeemedIncomplete, missedImportantContext: `yes`, `no`, or `unknown`",
|
|
13
13
|
"- confidence: `high`, `medium`, or `low`",
|
|
14
14
|
"- improvement (optional): `better_ranking`, `higher_cap`, `better_summary`, `better_docs`, `less_noise`, `faster`, or `other`",
|
|
15
|
-
"Use `fieldResponses` only for configured extra fields. You do not need to inspect extension source to answer this prompt.",
|
|
15
|
+
"Use `fieldResponses` only for configured extra fields. Include all required configured fields, using `unknown` when that is the most honest answer. You do not need to inspect extension source to answer this prompt.",
|
|
16
16
|
].join("\n");
|