npm - @zhushanwen/pi-evolve-daily - Versions diffs - 0.1.2 → 0.1.4 - Mend

@zhushanwen/pi-evolve-daily 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/package.json +1 -1
package/skills/evolve/SKILL.md +31 -0
package/skills/evolve-report/SKILL.md +52 -0
package/src/detectors/compact.ts +43 -0
package/src/detectors/goal-quality.ts +59 -0
package/src/detectors/param-error.ts +90 -0
package/src/detectors/subagent-result.ts +65 -0
package/src/index.ts +98 -4
package/src/problems.ts +232 -0
package/src/trackers/core.ts +459 -0
package/src/trackers/run_tests.mjs +202 -0
package/src/trackers/skill-execution.ts +126 -0
package/src/trackers/types.ts +176 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@zhushanwen/pi-evolve-daily",
-  "version": "0.1.2",
+  "version": "0.1.4",
   "description": "Daily evolution data collector — runs Python analyzer on first session of the day.",
   "type": "module",
   "main": "src/index.ts",

package/skills/evolve/SKILL.md CHANGED Viewed

@@ -120,6 +120,37 @@ Use these to identify:
 - **Mixed-trigger skills** — both paths used; healthy signal
 - **Never-triggered skills** — candidates for removal or description improvement
+#### 3e. New Dimension Analysis
+读取 daily-reports 中的新增字段，按优先级分析：
+1. **Tool Parameter Errors**（tool_error_stats）
+   - `param_error_rate > 25%` → 高优先级建议
+   - 某工具参数错误集中 → 针对性建议
+2. **Goal Task Quality**（goal_quality_stats）
+   - `task_completion_rate < 50%` → 任务拆分优化建议
+   - evidence 质量低 → Evidence 要求强化建议
+3. **Subagent Efficiency**（subagent_stats）
+   - `failure_rate > 20%` → task prompt 优化建议
+   - `retry_rate > 15%` → 任务拆分优化建议
+4. **Compact Efficiency**（compact_stats + context_stats）
+   - `compacts_per_session ≥ 3` → 上下文管理优化建议
+   - 上下文利用率持续偏高 → 工具输出优化建议
+5. **Workflow Efficiency**（workflow_stats）
+   - 某阶段耗时占比 > 50% → 流程优化建议
+   - gate 重试频繁 → gate 检查项优化建议
+6. **Todo Usage**（todo_stats）
+   - `abandon_rate > 25%` → Todo 使用模式优化建议
+每个维度的分析结果应作为 actionable issues 写入 feedback-records。
+对于高优先级发现（超过阈值的指标），生成 EvolutionSuggestion 对象并加入
+第 4 步的 suggestions 列表中。
 ### 4. Generate Suggestions
 For each actionable finding, create an EvolutionSuggestion object:

package/skills/evolve-report/SKILL.md CHANGED Viewed

@@ -44,6 +44,58 @@ User says "/evolve-report", "evolve-report", "查看报告", "进化报告",
    - Token consumption (input/output)
    - Anomalies and signals
    - Improvement suggestions (if any in the report)
+4. **New Dimensions** — if present in the report, display additional sections:
+   #### Tool Parameter Errors (`tool_error_stats`)
+   Display when `tool_error_stats` field exists:
+   - Param error rate per tool (highlight tools with rate > 25%)
+   - Top parameter error patterns with occurrence counts
+   - Actionable issues from feedback-records (if any)
+   #### Goal Task Quality (`goal_quality_stats`)
+   Display when `goal_quality_stats` field exists:
+   - Task completion rate (highlight if < 50%)
+   - Average tasks per goal
+   - Evidence quality distribution (high/medium/low)
+   - Actionable issues: task splitting, evidence requirements
+   #### Subagent Efficiency (`subagent_stats`)
+   Display when `subagent_stats` field exists:
+   - Success / failure / retry counts
+   - Failure rate (highlight if > 20%)
+   - Retry rate (highlight if > 15%)
+   - Actionable issues: task prompt optimization, task splitting
+   #### Compact & Context Efficiency (`compact_stats` + `context_stats`)
+   Display when either field exists:
+   - Average compacts per session (highlight if ≥ 3)
+   - Context utilization trend (rising / stable / falling)
+   - Actionable issues: context management, tool output optimization
+   #### Workflow Efficiency (`workflow_stats`)
+   Display when `workflow_stats` field exists:
+   - Per-phase duration breakdown (highlight phase > 50% of total)
+   - Gate retry frequency per phase
+   - Actionable issues: workflow optimization, gate criteria tuning
+   #### Todo Usage (`todo_stats`)
+   Display when `todo_stats` field exists:
+   - Completion / abandon / in-progress counts
+   - Abandon rate (highlight if > 25%)
+   - Actionable issues: todo usage pattern optimization
+   **Formatting guidelines for new dimensions:**
+   - Use a consistent section header format: `### Dimension Name`
+   - Show metrics as key-value pairs or simple tables
+   - Prefix actionable issues with `⚠` marker
+   - If a dimension field is missing from the report, skip that section entirely
+   - Group all new dimension sections under a `## Extended Metrics` heading
 ### List Reports

package/src/detectors/compact.ts ADDED Viewed

@@ -0,0 +1,43 @@
+// packages/evolve-daily/src/detectors/compact.ts
+import type { ProblemDefinition } from "../problems";
+export interface CompactTrackedItem {
+  id: string;
+  problemId: "compact-frequency";
+  sessionId: string;
+  tokensBefore: number;
+  detected: boolean;
+  status: "pending" | "completed" | "error" | "dismissed";
+  detail?: string;
+}
+export function createCompactDetector(problem: ProblemDefinition) {
+  return {
+    problemId: problem.id,
+    /**
+     * 从 session_compact 事件创建 tracked item。
+     * compact 不通过通用的 tool_execution_end handler，
+     * 而是独立监听 pi.on("session_compact") 事件。
+     */
+    createItem(event: {
+      compactionEntry?: { tokensBefore?: number };
+    }): CompactTrackedItem {
+      return {
+        id: `compact-${Date.now()}-${Math.random().toString(36).slice(2, 7)}`,
+        problemId: problem.id as "compact-frequency",
+        sessionId: "",
+        tokensBefore: event.compactionEntry?.tokensBefore ?? 0,
+        detected: true,
+        status: "pending",
+      };
+    },
+    steering(item: CompactTrackedItem): string {
+      return problem.detector.steering
+        .replace("{{id}}", item.id)
+        .replace("{{tokensBefore}}", String(item.tokensBefore));
+    },
+  };
+}

package/src/detectors/goal-quality.ts ADDED Viewed

@@ -0,0 +1,59 @@
+// packages/evolve-daily/src/detectors/goal-quality.ts
+import type { ProblemDefinition } from "../problems";
+export interface GoalQualityTrackedItem {
+  id: string;
+  problemId: "goal-task-quality";
+  sessionId: string;
+  goalId: string;
+  taskCount: number;
+  completedCount: number;
+  cancelledCount: number;
+  taskCompletionRate: number;
+  taskCancelRate: number;
+  status: "pending" | "completed" | "error" | "dismissed";
+  detail?: string;
+}
+export function createGoalQualityDetector(problem: ProblemDefinition) {
+  return {
+    problemId: problem.id,
+    events: problem.detector.events,
+    match(event: { type: string; toolName?: string }): boolean {
+      if (event.type !== "tool_result") return false;
+      return event.toolName === "goal_manager";
+    },
+    createItem(event: {
+      type: string;
+      toolName?: string;
+      details?: { tasks?: Array<{ status: string }> };
+    }): GoalQualityTrackedItem {
+      const tasks = event.details?.tasks ?? [];
+      const completed = tasks.filter((t) => t.status === "completed").length;
+      const cancelled = tasks.filter((t) => t.status === "cancelled").length;
+      const total = tasks.length;
+      return {
+        id: `goal-quality-${Date.now()}-${Math.random().toString(36).slice(2, 7)}`,
+        problemId: problem.id as "goal-task-quality",
+        sessionId: "",
+        goalId: "",
+        taskCount: total,
+        completedCount: completed,
+        cancelledCount: cancelled,
+        taskCompletionRate: total > 0 ? completed / total : 0,
+        taskCancelRate: total > 0 ? cancelled / total : 0,
+        status: "pending",
+      };
+    },
+    steering(item: GoalQualityTrackedItem): string {
+      return problem.detector.steering
+        .replace("{{id}}", item.id)
+        .replace("{{completionRate}}", String(item.taskCompletionRate));
+    },
+  };
+}

package/src/detectors/param-error.ts ADDED Viewed

@@ -0,0 +1,90 @@
+// packages/evolve-daily/src/detectors/param-error.ts
+import type { ProblemDefinition } from "../problems";
+export interface ParamErrorTrackedItem {
+  id: string;
+  problemId: "tool-param-validation";
+  sessionId: string;
+  toolName: string;
+  errorType: "param" | "runtime" | "unclassified";
+  errorPreview: string;
+  status: "pending" | "completed" | "error" | "dismissed";
+  detail?: string;
+}
+const PARAM_ERROR_PATTERNS = [
+  /required.*parameter/i,
+  /missing.*argument/i,
+  /invalid.*type/i,
+  /schema.*validation/i,
+  /unexpected.*token/i,
+  /parameter.*missing/i,
+  /argument.*required/i,
+  /invalid.*argument/i,
+  /unknown.*parameter/i,
+  /missing.*required/i,
+];
+const RUNTIME_ERROR_PATTERNS = [
+  /enoent/i,
+  /permission denied/i,
+  /non-zero exit/i,
+  /timeout/i,
+  /syntaxerror/i,
+  /typeerror/i,
+  /connection refused/i,
+  /out of memory/i,
+  /could not find the exact text/i,
+  /no such file/i,
+];
+function classifyError(errorMessage: string): "param" | "runtime" | "unclassified" {
+  for (const pattern of PARAM_ERROR_PATTERNS) {
+    if (pattern.test(errorMessage)) return "param";
+  }
+  for (const pattern of RUNTIME_ERROR_PATTERNS) {
+    if (pattern.test(errorMessage)) return "runtime";
+  }
+  return "unclassified";
+}
+const TRACKED_TOOLS = new Set(["edit", "bash", "read", "write"]);
+export function createParamErrorDetector(problem: ProblemDefinition) {
+  return {
+    problemId: problem.id,
+    events: problem.detector.events,
+    match(event: { type: string; toolName?: string; isError?: boolean }): boolean {
+      if (event.type !== "tool_result") return false;
+      if (event.isError !== true) return false;
+      return TRACKED_TOOLS.has(event.toolName ?? "");
+    },
+    createItem(event: {
+      type: string;
+      toolName?: string;
+      isError?: boolean;
+      content?: string;
+    }): ParamErrorTrackedItem {
+      const errorMessage = event.content ?? "";
+      return {
+        id: `param-error-${Date.now()}-${Math.random().toString(36).slice(2, 7)}`,
+        problemId: problem.id as "tool-param-validation",
+        sessionId: "",
+        toolName: event.toolName ?? "unknown",
+        errorType: classifyError(errorMessage),
+        errorPreview: errorMessage.slice(0, 200),
+        status: "pending",
+      };
+    },
+    steering(item: ParamErrorTrackedItem): string {
+      return problem.detector.steering
+        .replace("{{id}}", item.id)
+        .replace("{{toolName}}", item.toolName)
+        .replace("{{errorPreview}}", item.errorPreview);
+    },
+  };
+}

package/src/detectors/subagent-result.ts ADDED Viewed

@@ -0,0 +1,65 @@
+// packages/evolve-daily/src/detectors/subagent-result.ts
+import type { ProblemDefinition } from "../problems";
+export interface SubagentTrackedItem {
+  id: string;
+  problemId: "subagent-efficiency";
+  sessionId: string;
+  taskType: string;
+  isError: boolean;
+  resultLength: number;
+  status: "pending" | "completed" | "error" | "dismissed";
+  detail?: string;
+}
+const TASK_TYPE_PATTERNS: Record<string, RegExp> = {
+  code_review: /review|审查|检查/i,
+  implementation: /implement|实现|编写|创建/i,
+  testing: /test|测试|验证/i,
+  analysis: /analyze|分析|研究/i,
+};
+function classifyTaskType(taskPrompt: string): string {
+  for (const [type, pattern] of Object.entries(TASK_TYPE_PATTERNS)) {
+    if (pattern.test(taskPrompt)) return type;
+  }
+  return "unknown";
+}
+export function createSubagentDetector(problem: ProblemDefinition) {
+  return {
+    problemId: problem.id,
+    events: problem.detector.events,
+    match(event: { type: string; toolName?: string; isError?: boolean }): boolean {
+      if (event.type !== "tool_result") return false;
+      return event.toolName === "subagent";
+    },
+    createItem(event: {
+      type: string;
+      toolName?: string;
+      isError?: boolean;
+      content?: string;
+      taskPrompt?: string;
+    }): SubagentTrackedItem {
+      return {
+        id: `subagent-${Date.now()}-${Math.random().toString(36).slice(2, 7)}`,
+        problemId: problem.id as "subagent-efficiency",
+        sessionId: "",
+        taskType: classifyTaskType(event.taskPrompt ?? ""),
+        isError: event.isError ?? false,
+        resultLength: event.content?.length ?? 0,
+        status: "pending",
+      };
+    },
+    steering(item: SubagentTrackedItem): string {
+      return problem.detector.steering
+        .replace("{{id}}", item.id)
+        .replace("{{exitCode}}", item.isError ? "error" : "0")
+        .replace("{{duration}}", "unknown");
+    },
+  };
+}

package/src/index.ts CHANGED Viewed

@@ -1,18 +1,35 @@
+// packages/evolve-daily/src/index.ts
 import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
 import { existsSync, unlinkSync } from "node:fs";
 import { homedir } from "node:os";
 import { dirname, join } from "node:path";
 import { fileURLToPath } from "node:url";
+import { PROBLEM_REGISTRY } from "./problems";
+import { createCompactDetector } from "./detectors/compact";
+import { createSubagentDetector } from "./detectors/subagent-result";
+import { createParamErrorDetector } from "./detectors/param-error";
+import { createGoalQualityDetector } from "./detectors/goal-quality";
+import { createTracker } from "./trackers/core";
+import { skillExecutionConfig } from "./trackers/skill-execution";
 // 资源文件（Python 脚本）相对于扩展目录自身定位，不依赖外部绝对路径
 const EXT_DIR = dirname(fileURLToPath(import.meta.url)); // src/
-const ANALYZER_PATH = join(EXT_DIR, "..", "scripts", "analyze.py");
+const ANALYZER_PATH = join(EXT_DIR, "..", "analyzer", "analyze.py");
 // 运行时数据目录使用 Pi 平台约定路径（homedir + .pi/agent/）
-// 这是运行时产出数据，不是扩展自带的资源，用平台约定路径是合理的
 const REPORTS_DIR = join(homedir(), ".pi", "agent", "evolution-data", "daily-reports");
+/** tool_result 事件中匹配的工具结果 detector */
+interface ToolResultDetector {
+  problemId: string;
+  match(event: Record<string, unknown>): boolean;
+  createItem(event: Record<string, unknown>): { id: string; problemId: string; status: string; detail?: string };
+}
 export default function evolveDailyExtension(pi: ExtensionAPI) {
+  // ── L1: session_start 时调用 Python analyzer ──
   pi.on("session_start", async () => {
     const today = new Date().toISOString().slice(0, 10); // YYYY-MM-DD
     const reportPath = join(REPORTS_DIR, `${today}.json`);
@@ -22,13 +39,90 @@ export default function evolveDailyExtension(pi: ExtensionAPI) {
     try {
       await pi.exec(
         "python3",
-        [ANALYZER_PATH, "--since", "1d", "--format", "json", "--output", reportPath],
+        [
+          ANALYZER_PATH,
+          "--since",
+          "1d",
+          "--format",
+          "json",
+          "--output",
+          reportPath,
+        ],
         { timeout: 30_000 }
       );
     } catch (e) {
       // Clean up partial output if analyzer failed mid-write
-      try { unlinkSync(reportPath); } catch { /* already gone */ }
+      try {
+        unlinkSync(reportPath);
+      } catch {
+        /* already gone */
+      }
       console.error("[evolve-daily] analyzer failed:", e);
     }
   });
+  // ── L2c: Tracker 主动追踪（createTracker 在闭包内调用） ──
+  createTracker(pi, skillExecutionConfig);
+  // ── L2a: Compact 实时追踪 — 监听 session_compact 事件 ──
+  const compactDetector = createCompactDetector(
+    PROBLEM_REGISTRY.find((p) => p.id === "compact-frequency")!
+  );
+  pi.on("session_compact", async (event: Record<string, unknown>) => {
+    try {
+      const item = compactDetector.createItem(event);
+      pi.appendEntry("evolve-feedback", {
+        problemId: item.problemId,
+        itemId: item.id,
+        status: item.status,
+        detail: item.detail ?? null,
+        timestamp: new Date().toISOString(),
+      });
+    } catch (e) {
+      console.error(
+        `[evolve-daily] compact detector error:`,
+        e
+      );
+    }
+  });
+  // ── L2b: 工具结果实时追踪 — 监听 tool_result 事件 ──
+  // subagent/param-error/goal-quality detectors 检查 event.type === "tool_result"
+  const toolDetectors: ToolResultDetector[] = [
+    createSubagentDetector(
+      PROBLEM_REGISTRY.find((p) => p.id === "subagent-efficiency")!
+    ),
+    createParamErrorDetector(
+      PROBLEM_REGISTRY.find((p) => p.id === "tool-param-validation")!
+    ),
+    createGoalQualityDetector(
+      PROBLEM_REGISTRY.find((p) => p.id === "goal-task-quality")!
+    ),
+  ];
+  pi.on(
+    "tool_result",
+    async (event: Record<string, unknown>, _ctx?: unknown) => {
+      for (const detector of toolDetectors) {
+        try {
+          if (detector.match(event)) {
+            const item = detector.createItem(event);
+            pi.appendEntry("evolve-feedback", {
+              problemId: item.problemId,
+              itemId: item.id,
+              status: item.status,
+              detail: item.detail ?? null,
+              timestamp: new Date().toISOString(),
+            });
+          }
+        } catch (e) {
+          console.error(
+            `[evolve-daily] detector ${detector.problemId} error:`,
+            e
+          );
+        }
+      }
+    }
+  );
 }