@badliveware/pi-tool-feedback 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md ADDED
@@ -0,0 +1,18 @@
1
+ # Changelog
2
+
3
+ ## 0.2.0
4
+
5
+ - Add per-tool feedback responses so agents can report different experiences for different watched tools in the same turn.
6
+ - Record follow-up tool categories after watched calls so feedback logs can distinguish routine verification from compensatory follow-up work.
7
+ - Update package imports and peer dependencies for the `@earendil-works/*` Pi packages.
8
+
9
+ ## 0.1.1
10
+
11
+ - Clarified dogfood feedback prompts so agents call `tool_feedback` silently and avoid acknowledging the feedback request to the user.
12
+
13
+ ## 0.1.0
14
+
15
+ - Initial public package release for watched-tool feedback prompts and passive turn summaries.
16
+ - Added custom feedback fields so projects can ask for domain-specific ratings such as ranking quality or latency acceptability.
17
+ - Delivered active feedback requests as Pi custom messages instead of user messages.
18
+ - Added prompt wording and README guidance that frame agent self-feedback as noisy subjective signal rather than ground truth.
package/README.md CHANGED
@@ -86,7 +86,7 @@ Other options:
86
86
 
87
87
  ### Extra feedback fields
88
88
 
89
- The built-in feedback schema stays stable, but you can add project- or user-specific fields. The active prompt lists these fields and agents answer them inside `fieldResponses`.
89
+ The built-in feedback schema stays stable, but you can add project- or user-specific fields. The active prompt lists these fields and agents answer them inside `fieldResponses`. Set `required: true` for fields you want every feedback prompt to answer; invalid or missing required answers are logged in `fieldResponseErrors`.
90
90
 
91
91
  ```json
92
92
  {
@@ -135,11 +135,12 @@ Read-only state/config inspection. Use it to see the loaded mode, watch rules, c
135
135
 
136
136
  ### `tool_feedback`
137
137
 
138
- Records one structured feedback entry. Typical agent response after a feedback prompt:
138
+ Records one structured feedback entry. When several watched tools were used and the experience differed by tool, agents can set `primaryWatchedTool` and optional `perToolResponses` keyed by tool name. Typical agent response after a feedback prompt:
139
139
 
140
140
  ```json
141
141
  {
142
142
  "watchedTools": ["code_intel_impact_map"],
143
+ "primaryWatchedTool": "code_intel_impact_map",
143
144
  "perceivedUsefulness": "medium",
144
145
  "wouldUseAgainSameSituation": "yes",
145
146
  "followupWasRoutine": "yes",
@@ -151,6 +152,12 @@ Records one structured feedback entry. Typical agent response after a feedback p
151
152
  "improvement": "better_summary",
152
153
  "fieldResponses": {
153
154
  "rankingQuality": "mixed"
155
+ },
156
+ "perToolResponses": {
157
+ "code_intel_impact_map": {
158
+ "outputSeemedTooNoisy": "yes",
159
+ "fieldResponses": { "rankingQuality": "mixed" }
160
+ }
154
161
  }
155
162
  }
156
163
  ```
package/index.ts CHANGED
@@ -1,12 +1,13 @@
1
- import { Type } from "@mariozechner/pi-ai";
2
- import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
3
- import { Text } from "@mariozechner/pi-tui";
1
+ import { Type } from "@earendil-works/pi-ai";
2
+ import type { ExtensionAPI, ExtensionContext } from "@earendil-works/pi-coding-agent";
3
+ import { Text } from "@earendil-works/pi-tui";
4
4
  import {
5
5
  appendLog,
6
6
  categoryForTool,
7
7
  feedbackLogPath,
8
8
  feedbackPrompt,
9
9
  feedbackRecord,
10
+ invocationIdFor,
10
11
  isRecord,
11
12
  loadToolFeedbackConfig,
12
13
  logSafeFeedbackRecord,
@@ -17,6 +18,7 @@ import {
17
18
  resultErrorKind,
18
19
  resultOk,
19
20
  resultTruncated,
21
+ sessionIdFromContext,
20
22
  stringValue,
21
23
  unique,
22
24
  type AgentUsage,
@@ -131,6 +133,8 @@ export default function toolFeedback(pi: ExtensionAPI): void {
131
133
  description: "Record concise structured feedback after using watched tools. This stores feedback only; it does not change the watched tool.",
132
134
  parameters: Type.Object({
133
135
  watchedTools: Type.Array(Type.String(), { description: "Watched tool names this feedback covers." }),
136
+ primaryWatchedTool: Type.Optional(Type.String({ description: "Primary watched tool this feedback mostly concerns when multiple tools were used." })),
137
+ perToolResponses: Type.Optional(Type.Record(Type.String(), Type.Unknown(), { description: "Optional per-tool structured responses when multiple watched tools differed materially. Keys are watched tool names; values may include base feedback fields and fieldResponses." })),
134
138
  perceivedUsefulness: Type.Union([Type.Literal("high"), Type.Literal("medium"), Type.Literal("low"), Type.Literal("none"), Type.Literal("unknown")], { description: "How useful the tool felt for this task. Allowed values are: `high`, `medium`, `low`, `none`, `unknown`." }),
135
139
  wouldUseAgainSameSituation: Type.Union([Type.Literal("yes"), Type.Literal("no"), Type.Literal("unsure"), Type.Literal("unknown")], { description: "Whether you would use the same tool again for a similar situation. Allowed values are: `yes`, `no`, `unsure`, `unknown`." }),
136
140
  followupWasRoutine: Type.Optional(Type.Union([Type.Literal("yes"), Type.Literal("no"), Type.Literal("unknown")], { description: "Whether follow-up work felt routine rather than caused by tool insufficiency. Allowed values are: `yes`, `no`, `unknown`." })),
@@ -196,6 +200,7 @@ export default function toolFeedback(pi: ExtensionAPI): void {
196
200
  const watched: WatchedToolCall & { startedAt: number } = {
197
201
  toolName,
198
202
  toolCallId,
203
+ invocationId: invocationIdFor(sessionIdFromContext(ctx), toolCallId),
199
204
  category,
200
205
  confirmReferences: inputRecord ? stringValue(inputRecord.confirmReferences) : undefined,
201
206
  turnIndex: turn.turnIndex,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@badliveware/pi-tool-feedback",
3
- "version": "0.1.0",
3
+ "version": "0.2.0",
4
4
  "description": "Generic watched-tool feedback prompts and passive summaries for Pi.",
5
5
  "type": "module",
6
6
  "keywords": [
@@ -25,6 +25,7 @@
25
25
  },
26
26
  "files": [
27
27
  "README.md",
28
+ "CHANGELOG.md",
28
29
  "LICENSE",
29
30
  "index.ts",
30
31
  "src",
@@ -36,8 +37,8 @@
36
37
  ]
37
38
  },
38
39
  "peerDependencies": {
39
- "@mariozechner/pi-ai": "*",
40
- "@mariozechner/pi-coding-agent": "*"
40
+ "@earendil-works/pi-ai": "*",
41
+ "@earendil-works/pi-coding-agent": "*"
41
42
  },
42
43
  "engines": {
43
44
  "node": ">=20"
package/src/core.ts CHANGED
@@ -2,7 +2,9 @@ import * as crypto from "node:crypto";
2
2
  import * as fs from "node:fs";
3
3
  import * as os from "node:os";
4
4
  import * as path from "node:path";
5
- import type { ExtensionContext } from "@mariozechner/pi-coding-agent";
5
+ import type { ExtensionContext } from "@earendil-works/pi-coding-agent";
6
+ import { sanitizePerToolResponses } from "./per-tool.ts";
7
+ import { BASE_FIELD_PROMPT, DEFAULT_TASK_PROMPT } from "./prompts.ts";
6
8
 
7
9
  export type FeedbackMode = "off" | "passive" | "ask-agent" | "both";
8
10
  export type PerceivedUsefulness = "high" | "medium" | "low" | "none" | "unknown";
@@ -53,6 +55,7 @@ export interface LoadedConfig {
53
55
  export interface WatchedToolCall {
54
56
  toolName: string;
55
57
  toolCallId: string;
58
+ invocationId: string;
56
59
  category: string;
57
60
  confirmReferences?: string;
58
61
  turnIndex: number;
@@ -100,6 +103,7 @@ export interface TurnSummary {
100
103
  confirmReferences: string[];
101
104
  toolCategories: string[];
102
105
  categoriesAfterFirstWatchedCall: string[];
106
+ watchedResults: Array<Record<string, unknown>>;
103
107
  }
104
108
 
105
109
  export interface FeedbackRecord {
@@ -109,6 +113,8 @@ export interface FeedbackRecord {
109
113
  sessionId: string;
110
114
  repoRoot: string;
111
115
  watchedTools: string[];
116
+ primaryWatchedTool?: string;
117
+ perToolResponses?: Record<string, Record<string, unknown>>;
112
118
  perceivedUsefulness: PerceivedUsefulness;
113
119
  wouldUseAgainSameSituation: WouldUseAgain;
114
120
  followupWasRoutine?: YesNoUnknown;
@@ -126,21 +132,6 @@ export interface FeedbackRecord {
126
132
  }
127
133
 
128
134
  const CONFIG_FILE_NAME = "tool-feedback.json";
129
- const DEFAULT_TASK_PROMPT = [
130
- "You used watched tools in the previous prompt. Please call `tool_feedback` once with concise structured feedback.",
131
- "Focus on your own experience using the tool: whether it seemed useful, whether it felt incomplete or noisy, whether follow-up work was routine or compensatory, whether you would use it again in the same situation, and what one improvement would help most.",
132
- "This is a dogfood feedback request, not new implementation work.",
133
- ].join("\n\n");
134
-
135
- const BASE_FIELD_PROMPT = [
136
- "Base `tool_feedback` field values:",
137
- "- perceivedUsefulness: `high`, `medium`, `low`, `none`, or `unknown`",
138
- "- wouldUseAgainSameSituation: `yes`, `no`, `unsure`, or `unknown`",
139
- "- followupWasRoutine, followupNeededBecauseToolWasInsufficient, outputSeemedTooNoisy, outputSeemedIncomplete, missedImportantContext: `yes`, `no`, or `unknown`",
140
- "- confidence: `high`, `medium`, or `low`",
141
- "- improvement (optional): `better_ranking`, `higher_cap`, `better_summary`, `better_docs`, `less_noise`, `faster`, or `other`",
142
- "Use `fieldResponses` only for configured extra fields. You do not need to inspect extension source to answer this prompt.",
143
- ].join("\n");
144
135
 
145
136
  export const DEFAULT_CONFIG: ToolFeedbackConfig = {
146
137
  mode: "passive",
@@ -320,6 +311,10 @@ function shortHash(value: string): string {
320
311
  return crypto.createHash("sha256").update(value).digest("hex").slice(0, 16);
321
312
  }
322
313
 
314
+ export function invocationIdFor(sessionId: string, toolCallId: string): string {
315
+ return shortHash(`${sessionId}\0${toolCallId}`);
316
+ }
317
+
323
318
  export function unique(items: Array<string | undefined>): string[] {
324
319
  return [...new Set(items.filter((item): item is string => typeof item === "string" && item.length > 0))];
325
320
  }
@@ -391,6 +386,7 @@ export function makeTurnSummary(turn: TurnUsage, ctx: ExtensionContext): TurnSum
391
386
  confirmReferences: unique(turn.watchedCalls.map((call) => call.confirmReferences)),
392
387
  toolCategories: unique(turn.toolCalls.map((call) => call.category)),
393
388
  categoriesAfterFirstWatchedCall: unique(turn.toolCalls.filter((call) => call.sequence > firstWatchedSequence).map((call) => call.category)),
389
+ watchedResults: turn.watchedResults.map((result) => ({ toolName: result.toolName, invocationId: result.invocationId, ok: result.ok, isError: result.isError, truncated: result.truncated, errorKind: result.errorKind, durationMs: result.durationMs })),
394
390
  };
395
391
  }
396
392
 
@@ -488,6 +484,8 @@ export function feedbackRecord(input: Record<string, unknown>, ctx: ExtensionCon
488
484
  sessionId: sessionIdFromContext(ctx),
489
485
  repoRoot: ctx.cwd,
490
486
  watchedTools: normalizeStringArray(input.watchedTools),
487
+ primaryWatchedTool: stringValue(input.primaryWatchedTool),
488
+ perToolResponses: sanitizePerToolResponses(input.perToolResponses),
491
489
  perceivedUsefulness: perceivedUsefulness(input.perceivedUsefulness),
492
490
  wouldUseAgainSameSituation: wouldUseAgain(input.wouldUseAgainSameSituation),
493
491
  followupWasRoutine: yesNoUnknown(input.followupWasRoutine),
@@ -0,0 +1,52 @@
1
+ type FeedbackFieldValue = string | number | boolean;
2
+
3
+ const allowedBaseFields = new Set([
4
+ "perceivedUsefulness",
5
+ "wouldUseAgainSameSituation",
6
+ "followupWasRoutine",
7
+ "followupNeededBecauseToolWasInsufficient",
8
+ "outputSeemedTooNoisy",
9
+ "outputSeemedIncomplete",
10
+ "missedImportantContext",
11
+ "confidence",
12
+ "improvement",
13
+ ]);
14
+
15
+ function isRecord(value: unknown): value is Record<string, unknown> {
16
+ return typeof value === "object" && value !== null;
17
+ }
18
+
19
+ function scalar(value: unknown): FeedbackFieldValue | undefined {
20
+ if (typeof value === "string" || typeof value === "boolean") return value;
21
+ if (typeof value === "number" && Number.isFinite(value)) return value;
22
+ return undefined;
23
+ }
24
+
25
+ function sanitizeFieldResponses(value: unknown): Record<string, FeedbackFieldValue> | undefined {
26
+ if (!isRecord(value)) return undefined;
27
+ const output: Record<string, FeedbackFieldValue> = {};
28
+ for (const [name, raw] of Object.entries(value)) {
29
+ const item = scalar(raw);
30
+ if (item !== undefined) output[name] = item;
31
+ }
32
+ return Object.keys(output).length > 0 ? output : undefined;
33
+ }
34
+
35
+ export function sanitizePerToolResponses(input: unknown): Record<string, Record<string, unknown>> | undefined {
36
+ if (!isRecord(input)) return undefined;
37
+ const output: Record<string, Record<string, unknown>> = {};
38
+ for (const [toolName, rawResponse] of Object.entries(input)) {
39
+ if (!toolName.trim() || !isRecord(rawResponse)) continue;
40
+ const response: Record<string, unknown> = {};
41
+ for (const [name, raw] of Object.entries(rawResponse)) {
42
+ if (allowedBaseFields.has(name)) {
43
+ const item = scalar(raw);
44
+ if (item !== undefined) response[name] = item;
45
+ }
46
+ }
47
+ const fieldResponses = sanitizeFieldResponses(rawResponse.fieldResponses);
48
+ if (fieldResponses) response.fieldResponses = fieldResponses;
49
+ if (Object.keys(response).length > 0) output[toolName.trim()] = response;
50
+ }
51
+ return Object.keys(output).length > 0 ? output : undefined;
52
+ }
package/src/prompts.ts ADDED
@@ -0,0 +1,16 @@
1
+ export const DEFAULT_TASK_PROMPT = [
2
+ "You used watched tools in the previous prompt. Please call `tool_feedback` once with concise structured feedback.",
3
+ "Focus on your own experience using the tool: whether it seemed useful, whether it felt incomplete or noisy, whether follow-up work was routine or compensatory, whether you would use it again in the same situation, and what one improvement would help most. When multiple watched tools differed materially, include `primaryWatchedTool` and optional `perToolResponses` keyed by tool name.",
4
+ "This is a dogfood feedback request, not new implementation work.",
5
+ "Do not acknowledge or summarize this request to the user. Call `tool_feedback` silently and continue or stop if you were done.",
6
+ ].join("\n\n");
7
+
8
+ export const BASE_FIELD_PROMPT = [
9
+ "Base `tool_feedback` field values:",
10
+ "- perceivedUsefulness: `high`, `medium`, `low`, `none`, or `unknown`",
11
+ "- wouldUseAgainSameSituation: `yes`, `no`, `unsure`, or `unknown`",
12
+ "- followupWasRoutine, followupNeededBecauseToolWasInsufficient, outputSeemedTooNoisy, outputSeemedIncomplete, missedImportantContext: `yes`, `no`, or `unknown`",
13
+ "- confidence: `high`, `medium`, or `low`",
14
+ "- improvement (optional): `better_ranking`, `higher_cap`, `better_summary`, `better_docs`, `less_noise`, `faster`, or `other`",
15
+ "Use `fieldResponses` only for configured extra fields. Include all required configured fields, using `unknown` when that is the most honest answer. You do not need to inspect extension source to answer this prompt.",
16
+ ].join("\n");