npm - @badliveware/pi-tool-feedback - Versions diffs - 0.1.0 → 0.2.0 - Mend

@badliveware/pi-tool-feedback 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,18 @@
+# Changelog
+## 0.2.0
+- Add per-tool feedback responses so agents can report different experiences for different watched tools in the same turn.
+- Record follow-up tool categories after watched calls so feedback logs can distinguish routine verification from compensatory follow-up work.
+- Update package imports and peer dependencies for the `@earendil-works/*` Pi packages.
+## 0.1.1
+- Clarified dogfood feedback prompts so agents call `tool_feedback` silently and avoid acknowledging the feedback request to the user.
+## 0.1.0
+- Initial public package release for watched-tool feedback prompts and passive turn summaries.
+- Added custom feedback fields so projects can ask for domain-specific ratings such as ranking quality or latency acceptability.
+- Delivered active feedback requests as Pi custom messages instead of user messages.
+- Added prompt wording and README guidance that frame agent self-feedback as noisy subjective signal rather than ground truth.

package/README.md CHANGED Viewed

@@ -86,7 +86,7 @@ Other options:
 ### Extra feedback fields
-The built-in feedback schema stays stable, but you can add project- or user-specific fields. The active prompt lists these fields and agents answer them inside `fieldResponses`.
+The built-in feedback schema stays stable, but you can add project- or user-specific fields. The active prompt lists these fields and agents answer them inside `fieldResponses`. Set `required: true` for fields you want every feedback prompt to answer; invalid or missing required answers are logged in `fieldResponseErrors`.
 ```json
 {
@@ -135,11 +135,12 @@ Read-only state/config inspection. Use it to see the loaded mode, watch rules, c
 ### `tool_feedback`
-Records one structured feedback entry. Typical agent response after a feedback prompt:
+Records one structured feedback entry. When several watched tools were used and the experience differed by tool, agents can set `primaryWatchedTool` and optional `perToolResponses` keyed by tool name. Typical agent response after a feedback prompt:
 ```json
 {
   "watchedTools": ["code_intel_impact_map"],
+  "primaryWatchedTool": "code_intel_impact_map",
   "perceivedUsefulness": "medium",
   "wouldUseAgainSameSituation": "yes",
   "followupWasRoutine": "yes",
@@ -151,6 +152,12 @@ Records one structured feedback entry. Typical agent response after a feedback p
   "improvement": "better_summary",
   "fieldResponses": {
     "rankingQuality": "mixed"
+  },
+  "perToolResponses": {
+    "code_intel_impact_map": {
+      "outputSeemedTooNoisy": "yes",
+      "fieldResponses": { "rankingQuality": "mixed" }
+    }
   }
 }
 ```

package/index.ts CHANGED Viewed

@@ -1,12 +1,13 @@
-import { Type } from "@mariozechner/pi-ai";
-import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
-import { Text } from "@mariozechner/pi-tui";
+import { Type } from "@earendil-works/pi-ai";
+import type { ExtensionAPI, ExtensionContext } from "@earendil-works/pi-coding-agent";
+import { Text } from "@earendil-works/pi-tui";
 import {
 	appendLog,
 	categoryForTool,
 	feedbackLogPath,
 	feedbackPrompt,
 	feedbackRecord,
+	invocationIdFor,
 	isRecord,
 	loadToolFeedbackConfig,
 	logSafeFeedbackRecord,
@@ -17,6 +18,7 @@ import {
 	resultErrorKind,
 	resultOk,
 	resultTruncated,
+	sessionIdFromContext,
 	stringValue,
 	unique,
 	type AgentUsage,
@@ -131,6 +133,8 @@ export default function toolFeedback(pi: ExtensionAPI): void {
 		description: "Record concise structured feedback after using watched tools. This stores feedback only; it does not change the watched tool.",
 		parameters: Type.Object({
 			watchedTools: Type.Array(Type.String(), { description: "Watched tool names this feedback covers." }),
+			primaryWatchedTool: Type.Optional(Type.String({ description: "Primary watched tool this feedback mostly concerns when multiple tools were used." })),
+			perToolResponses: Type.Optional(Type.Record(Type.String(), Type.Unknown(), { description: "Optional per-tool structured responses when multiple watched tools differed materially. Keys are watched tool names; values may include base feedback fields and fieldResponses." })),
 			perceivedUsefulness: Type.Union([Type.Literal("high"), Type.Literal("medium"), Type.Literal("low"), Type.Literal("none"), Type.Literal("unknown")], { description: "How useful the tool felt for this task. Allowed values are: `high`, `medium`, `low`, `none`, `unknown`." }),
 			wouldUseAgainSameSituation: Type.Union([Type.Literal("yes"), Type.Literal("no"), Type.Literal("unsure"), Type.Literal("unknown")], { description: "Whether you would use the same tool again for a similar situation. Allowed values are: `yes`, `no`, `unsure`, `unknown`." }),
 			followupWasRoutine: Type.Optional(Type.Union([Type.Literal("yes"), Type.Literal("no"), Type.Literal("unknown")], { description: "Whether follow-up work felt routine rather than caused by tool insufficiency. Allowed values are: `yes`, `no`, `unknown`." })),
@@ -196,6 +200,7 @@ export default function toolFeedback(pi: ExtensionAPI): void {
 		const watched: WatchedToolCall & { startedAt: number } = {
 			toolName,
 			toolCallId,
+			invocationId: invocationIdFor(sessionIdFromContext(ctx), toolCallId),
 			category,
 			confirmReferences: inputRecord ? stringValue(inputRecord.confirmReferences) : undefined,
 			turnIndex: turn.turnIndex,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@badliveware/pi-tool-feedback",
-  "version": "0.1.0",
+  "version": "0.2.0",
   "description": "Generic watched-tool feedback prompts and passive summaries for Pi.",
   "type": "module",
   "keywords": [
@@ -25,6 +25,7 @@
   },
   "files": [
     "README.md",
+    "CHANGELOG.md",
     "LICENSE",
     "index.ts",
     "src",
@@ -36,8 +37,8 @@
     ]
   },
   "peerDependencies": {
-    "@mariozechner/pi-ai": "*",
-    "@mariozechner/pi-coding-agent": "*"
+    "@earendil-works/pi-ai": "*",
+    "@earendil-works/pi-coding-agent": "*"
   },
   "engines": {
     "node": ">=20"

package/src/core.ts CHANGED Viewed

@@ -2,7 +2,9 @@ import * as crypto from "node:crypto";
 import * as fs from "node:fs";
 import * as os from "node:os";
 import * as path from "node:path";
-import type { ExtensionContext } from "@mariozechner/pi-coding-agent";
+import type { ExtensionContext } from "@earendil-works/pi-coding-agent";
+import { sanitizePerToolResponses } from "./per-tool.ts";
+import { BASE_FIELD_PROMPT, DEFAULT_TASK_PROMPT } from "./prompts.ts";
 export type FeedbackMode = "off" | "passive" | "ask-agent" | "both";
 export type PerceivedUsefulness = "high" | "medium" | "low" | "none" | "unknown";
@@ -53,6 +55,7 @@ export interface LoadedConfig {
 export interface WatchedToolCall {
 	toolName: string;
 	toolCallId: string;
+	invocationId: string;
 	category: string;
 	confirmReferences?: string;
 	turnIndex: number;
@@ -100,6 +103,7 @@ export interface TurnSummary {
 	confirmReferences: string[];
 	toolCategories: string[];
 	categoriesAfterFirstWatchedCall: string[];
+	watchedResults: Array<Record<string, unknown>>;
 }
 export interface FeedbackRecord {
@@ -109,6 +113,8 @@ export interface FeedbackRecord {
 	sessionId: string;
 	repoRoot: string;
 	watchedTools: string[];
+	primaryWatchedTool?: string;
+	perToolResponses?: Record<string, Record<string, unknown>>;
 	perceivedUsefulness: PerceivedUsefulness;
 	wouldUseAgainSameSituation: WouldUseAgain;
 	followupWasRoutine?: YesNoUnknown;
@@ -126,21 +132,6 @@ export interface FeedbackRecord {
 }
 const CONFIG_FILE_NAME = "tool-feedback.json";
-const DEFAULT_TASK_PROMPT = [
-	"You used watched tools in the previous prompt. Please call `tool_feedback` once with concise structured feedback.",
-	"Focus on your own experience using the tool: whether it seemed useful, whether it felt incomplete or noisy, whether follow-up work was routine or compensatory, whether you would use it again in the same situation, and what one improvement would help most.",
-	"This is a dogfood feedback request, not new implementation work.",
-].join("\n\n");
-const BASE_FIELD_PROMPT = [
-	"Base `tool_feedback` field values:",
-	"- perceivedUsefulness: `high`, `medium`, `low`, `none`, or `unknown`",
-	"- wouldUseAgainSameSituation: `yes`, `no`, `unsure`, or `unknown`",
-	"- followupWasRoutine, followupNeededBecauseToolWasInsufficient, outputSeemedTooNoisy, outputSeemedIncomplete, missedImportantContext: `yes`, `no`, or `unknown`",
-	"- confidence: `high`, `medium`, or `low`",
-	"- improvement (optional): `better_ranking`, `higher_cap`, `better_summary`, `better_docs`, `less_noise`, `faster`, or `other`",
-	"Use `fieldResponses` only for configured extra fields. You do not need to inspect extension source to answer this prompt.",
-].join("\n");
 export const DEFAULT_CONFIG: ToolFeedbackConfig = {
 	mode: "passive",
@@ -320,6 +311,10 @@ function shortHash(value: string): string {
 	return crypto.createHash("sha256").update(value).digest("hex").slice(0, 16);
 }
+export function invocationIdFor(sessionId: string, toolCallId: string): string {
+	return shortHash(`${sessionId}\0${toolCallId}`);
+}
 export function unique(items: Array<string | undefined>): string[] {
 	return [...new Set(items.filter((item): item is string => typeof item === "string" && item.length > 0))];
 }
@@ -391,6 +386,7 @@ export function makeTurnSummary(turn: TurnUsage, ctx: ExtensionContext): TurnSum
 		confirmReferences: unique(turn.watchedCalls.map((call) => call.confirmReferences)),
 		toolCategories: unique(turn.toolCalls.map((call) => call.category)),
 		categoriesAfterFirstWatchedCall: unique(turn.toolCalls.filter((call) => call.sequence > firstWatchedSequence).map((call) => call.category)),
+		watchedResults: turn.watchedResults.map((result) => ({ toolName: result.toolName, invocationId: result.invocationId, ok: result.ok, isError: result.isError, truncated: result.truncated, errorKind: result.errorKind, durationMs: result.durationMs })),
 	};
 }
@@ -488,6 +484,8 @@ export function feedbackRecord(input: Record<string, unknown>, ctx: ExtensionCon
 		sessionId: sessionIdFromContext(ctx),
 		repoRoot: ctx.cwd,
 		watchedTools: normalizeStringArray(input.watchedTools),
+		primaryWatchedTool: stringValue(input.primaryWatchedTool),
+		perToolResponses: sanitizePerToolResponses(input.perToolResponses),
 		perceivedUsefulness: perceivedUsefulness(input.perceivedUsefulness),
 		wouldUseAgainSameSituation: wouldUseAgain(input.wouldUseAgainSameSituation),
 		followupWasRoutine: yesNoUnknown(input.followupWasRoutine),

package/src/per-tool.ts ADDED Viewed

@@ -0,0 +1,52 @@
+type FeedbackFieldValue = string | number | boolean;
+const allowedBaseFields = new Set([
+	"perceivedUsefulness",
+	"wouldUseAgainSameSituation",
+	"followupWasRoutine",
+	"followupNeededBecauseToolWasInsufficient",
+	"outputSeemedTooNoisy",
+	"outputSeemedIncomplete",
+	"missedImportantContext",
+	"confidence",
+	"improvement",
+]);
+function isRecord(value: unknown): value is Record<string, unknown> {
+	return typeof value === "object" && value !== null;
+}
+function scalar(value: unknown): FeedbackFieldValue | undefined {
+	if (typeof value === "string" || typeof value === "boolean") return value;
+	if (typeof value === "number" && Number.isFinite(value)) return value;
+	return undefined;
+}
+function sanitizeFieldResponses(value: unknown): Record<string, FeedbackFieldValue> | undefined {
+	if (!isRecord(value)) return undefined;
+	const output: Record<string, FeedbackFieldValue> = {};
+	for (const [name, raw] of Object.entries(value)) {
+		const item = scalar(raw);
+		if (item !== undefined) output[name] = item;
+	}
+	return Object.keys(output).length > 0 ? output : undefined;
+}
+export function sanitizePerToolResponses(input: unknown): Record<string, Record<string, unknown>> | undefined {
+	if (!isRecord(input)) return undefined;
+	const output: Record<string, Record<string, unknown>> = {};
+	for (const [toolName, rawResponse] of Object.entries(input)) {
+		if (!toolName.trim() || !isRecord(rawResponse)) continue;
+		const response: Record<string, unknown> = {};
+		for (const [name, raw] of Object.entries(rawResponse)) {
+			if (allowedBaseFields.has(name)) {
+				const item = scalar(raw);
+				if (item !== undefined) response[name] = item;
+			}
+		}
+		const fieldResponses = sanitizeFieldResponses(rawResponse.fieldResponses);
+		if (fieldResponses) response.fieldResponses = fieldResponses;
+		if (Object.keys(response).length > 0) output[toolName.trim()] = response;
+	}
+	return Object.keys(output).length > 0 ? output : undefined;
+}

package/src/prompts.ts ADDED Viewed

@@ -0,0 +1,16 @@
+export const DEFAULT_TASK_PROMPT = [
+	"You used watched tools in the previous prompt. Please call `tool_feedback` once with concise structured feedback.",
+	"Focus on your own experience using the tool: whether it seemed useful, whether it felt incomplete or noisy, whether follow-up work was routine or compensatory, whether you would use it again in the same situation, and what one improvement would help most. When multiple watched tools differed materially, include `primaryWatchedTool` and optional `perToolResponses` keyed by tool name.",
+	"This is a dogfood feedback request, not new implementation work.",
+	"Do not acknowledge or summarize this request to the user. Call `tool_feedback` silently and continue or stop if you were done.",
+].join("\n\n");
+export const BASE_FIELD_PROMPT = [
+	"Base `tool_feedback` field values:",
+	"- perceivedUsefulness: `high`, `medium`, `low`, `none`, or `unknown`",
+	"- wouldUseAgainSameSituation: `yes`, `no`, `unsure`, or `unknown`",
+	"- followupWasRoutine, followupNeededBecauseToolWasInsufficient, outputSeemedTooNoisy, outputSeemedIncomplete, missedImportantContext: `yes`, `no`, or `unknown`",
+	"- confidence: `high`, `medium`, or `low`",
+	"- improvement (optional): `better_ranking`, `higher_cap`, `better_summary`, `better_docs`, `less_noise`, `faster`, or `other`",
+	"Use `fieldResponses` only for configured extra fields. Include all required configured fields, using `unknown` when that is the most honest answer. You do not need to inspect extension source to answer this prompt.",
+].join("\n");