npm - assistme - Versions diffs - 0.6.6 → 0.6.8 - Mend

assistme 0.6.6 → 0.6.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/{chunk-5F4P6MYZ.js → chunk-NA2HXYJ7.js} +0 -1
package/dist/index.js +35 -121
package/dist/{job-runner-VIGPBGRE.js → job-runner-TEE5OX7H.js} +1 -1
package/package.json +1 -1
package/src/agent/self-analyzer.ts +39 -128

package/dist/{chunk-5F4P6MYZ.js → chunk-NA2HXYJ7.js} RENAMED Viewed

@@ -517,6 +517,5 @@ export {
   SkillCreateResultSchema,
   SkillDecisionSchema,
   BrowseSkillRowSchema,
-  SelfAnalysisResultSchema,
   JobRunner
 };

package/dist/index.js CHANGED Viewed

@@ -31,7 +31,6 @@ import {
   SHELL_TIMEOUT_MS,
   SKILL_DESCRIPTION_BUDGET_CHARS,
   SKILL_VALIDATION_MAX_TURNS,
-  SelfAnalysisResultSchema,
   SkillCreateResultSchema,
   SkillDecisionSchema,
   SkillRowSchema,
@@ -46,7 +45,7 @@ import {
   setLogHook,
   setLogLevel,
   writeAuthStore
-} from "./chunk-5F4P6MYZ.js";
+} from "./chunk-NA2HXYJ7.js";
 import {
   clearConfig,
   getConfig,
@@ -3845,58 +3844,11 @@ var SELF_ANALYSIS_OUTPUT_FORMAT = {
   schema: {
     type: "object",
     properties: {
-      is_perfect: { type: "boolean" },
-      overall_score: { type: "number", minimum: 1, maximum: 10 },
-      task_completion_quality: {
-        type: "object",
-        properties: {
-          score: { type: "number", minimum: 1, maximum: 10 },
-          assessment: { type: "string" }
-        },
-        required: ["score", "assessment"]
-      },
-      improvements: {
-        type: "array",
-        items: {
-          type: "object",
-          properties: {
-            area: { type: "string" },
-            severity: {
-              type: "string",
-              enum: ["critical", "major", "minor", "suggestion"]
-            },
-            description: { type: "string" },
-            suggestion: { type: "string" }
-          },
-          required: ["area", "severity", "description", "suggestion"]
-        }
-      },
-      data_quality: {
-        type: "object",
-        properties: {
-          session_logs_useful: { type: "boolean" },
-          session_logs_gaps: { type: ["string", "null"] },
-          message_events_useful: { type: "boolean" },
-          message_events_gaps: { type: ["string", "null"] },
-          conversation_context_useful: { type: "boolean" },
-          conversation_context_gaps: { type: ["string", "null"] }
-        },
-        required: [
-          "session_logs_useful",
-          "message_events_useful",
-          "conversation_context_useful"
-        ]
-      },
-      summary: { type: "string" }
+      needsImprovement: { type: "boolean" },
+      title: { type: "string" },
+      description: { type: "string" }
     },
-    required: [
-      "is_perfect",
-      "overall_score",
-      "task_completion_quality",
-      "improvements",
-      "data_quality",
-      "summary"
-    ]
+    required: ["needsImprovement", "title", "description"]
   }
 };
 var SELF_ANALYSIS_PROMPT = `You just completed a task as the AssistMe agent. Now critically analyze AssistMe's own implementation \u2014 NOT the user's task itself, but how well AssistMe (the agent system) performed and whether AssistMe's codebase can be improved.
@@ -3920,17 +3872,14 @@ Below you will find:
 - **Tool Failures**: Any tool calls that failed during execution
 ## Instructions
-Analyze all provided data critically. Consider:
-- Are the session logs capturing enough detail for debugging?
-- Do the message events provide sufficient visibility into the agent's decision-making?
-- Is the conversation context giving enough user intent signal?
-- Were tools used efficiently?
-- Could the overall execution flow be improved?
-Set is_perfect to true ONLY if there are genuinely zero improvements to suggest (this should be rare).
-The overall_score should be 1-10 where 10 means absolutely perfect.
-Respond with a JSON object now.`;
+Analyze all provided data critically. Respond with a JSON object containing:
+- "needsImprovement": set to false ONLY if the task was handled perfectly with zero improvements, true otherwise
+- "title": a short summary under 100 chars (empty string if needsImprovement is false)
+- "description": a detailed markdown report (empty string if needsImprovement is false) that includes:
+  - **Summary**: overall assessment of how AssistMe performed
+  - **Task Completion Quality**: score (1-10) and assessment
+  - **Improvements**: numbered list, each with severity (critical/major/minor/suggestion), area, description, and suggestion
+  - **Data Quality Gaps**: any gaps in session logs, message events, or conversation context that limited your analysis`;
 function truncateToChars(text, maxChars) {
   if (text.length <= maxChars) return text;
   return text.slice(0, maxChars) + "\n... [truncated]";
@@ -4060,32 +4009,7 @@ async function buildAnalysisContext(ctx) {
 `;
   return context;
 }
-async function submitSelfAnalysisFeedback(analysis) {
-  const title = `Self-Analysis: Score ${analysis.overall_score}/10 \u2014 ${analysis.improvements.length} improvement(s)`;
-  const improvementDetails = analysis.improvements.map((imp, i) => `${i + 1}. [${imp.severity}] **${imp.area}**: ${imp.description}
-   \u2192 ${imp.suggestion}`).join("\n");
-  const dataQualityNotes = [
-    analysis.data_quality.session_logs_gaps ? `Session logs: ${analysis.data_quality.session_logs_gaps}` : null,
-    analysis.data_quality.message_events_gaps ? `Message events: ${analysis.data_quality.message_events_gaps}` : null,
-    analysis.data_quality.conversation_context_gaps ? `Conversation context: ${analysis.data_quality.conversation_context_gaps}` : null
-  ].filter(Boolean).join("\n");
-  let description = `## Summary
-${analysis.summary}
-`;
-  description += `## Task Completion Quality (${analysis.task_completion_quality.score}/10)
-${analysis.task_completion_quality.assessment}
-`;
-  description += `## Improvements
-${improvementDetails}
-`;
-  if (dataQualityNotes) {
-    description += `
-## Data Quality Gaps
-${dataQualityNotes}
-`;
-  }
+async function submitSelfAnalysisFeedback(title, description) {
   if (description.length > 4900) {
     description = description.slice(0, 4900) + "\n...[truncated]";
   }
@@ -4111,7 +4035,7 @@ async function runAnalysisQuery(model, prompt) {
     prompt,
     options: {
       model,
-      maxTurns: 10,
+      maxTurns: 1,
       allowedTools: [],
       effort: "medium",
       outputFormat: SELF_ANALYSIS_OUTPUT_FORMAT
@@ -4132,12 +4056,18 @@ async function runAnalysisQuery(model, prompt) {
         }
       } else {
         log.warn(
-          `Self-analysis: query returned subtype="${resultMsg.subtype}". result: ${String(resultMsg.result ?? "").slice(0, 500)}`
+          `Self-analysis: query returned subtype="${resultMsg.subtype}".`
         );
       }
     }
   }
-  return structuredOutput;
+  if (!structuredOutput || typeof structuredOutput !== "object") return null;
+  const output = structuredOutput;
+  return {
+    needsImprovement: Boolean(output.needsImprovement),
+    title: String(output.title || ""),
+    description: String(output.description || "")
+  };
 }
 async function analyzeSelfPostTask(opts) {
   const {
@@ -4164,38 +4094,22 @@ async function analyzeSelfPostTask(opts) {
       tokenUsage
     });
     const prompt = `${SELF_ANALYSIS_PROMPT}
-${analysisContext}
-Respond with a JSON object now.`;
+${analysisContext}`;
     const analysisPromise = runAnalysisQuery(model, prompt);
     const timeoutPromise = new Promise(
       (_, reject) => setTimeout(() => reject(new Error(`Self-analysis timed out after ${SELF_ANALYSIS_TIMEOUT_MS / 1e3}s`)), SELF_ANALYSIS_TIMEOUT_MS)
     );
-    const structuredOutput = await Promise.race([analysisPromise, timeoutPromise]);
-    let analysis = null;
-    if (structuredOutput) {
-      const result = SelfAnalysisResultSchema.safeParse(structuredOutput);
-      if (result.success) {
-        analysis = result.data;
-      } else {
-        log.warn(
-          `Self-analysis: schema validation failed: ${result.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join("; ")}`
-        );
-        log.debug(`Self-analysis: raw output: ${JSON.stringify(structuredOutput).slice(0, 500)}`);
-      }
-    }
-    if (!analysis) {
-      log.warn("Self-analysis: no valid structured output");
+    const result = await Promise.race([analysisPromise, timeoutPromise]);
+    if (!result) {
+      log.warn("Self-analysis: no result from query");
       return;
     }
-    log.info(
-      `Self-analysis complete: score=${analysis.overall_score}/10, perfect=${analysis.is_perfect}, improvements=${analysis.improvements.length}`
-    );
-    if (!analysis.is_perfect && analysis.improvements.length > 0) {
-      await submitSelfAnalysisFeedback(analysis);
-    } else {
-      log.debug("Self-analysis: no improvements to report \u2014 skipping feedback");
+    if (!result.needsImprovement) {
+      log.info("Self-analysis complete: no improvements needed");
+      return;
     }
+    log.info(`Self-analysis complete: improvements found \u2014 ${result.title}`);
+    await submitSelfAnalysisFeedback(result.title, result.description);
   } catch (err) {
     log.warn(`Self-analysis error: ${errorMessage(err)}`);
   }
@@ -7104,7 +7018,7 @@ function registerJobCommands(program2) {
   jobCmd.command("list").description("List your defined jobs").action(async () => {
     try {
       const userId = await getCurrentUserId();
-      const { JobRunner: JobRunner2 } = await import("./job-runner-VIGPBGRE.js");
+      const { JobRunner: JobRunner2 } = await import("./job-runner-TEE5OX7H.js");
       const runner = new JobRunner2();
       const jobs = await runner.listJobs();
       if (jobs.length === 0) {
@@ -7128,7 +7042,7 @@ function registerJobCommands(program2) {
   jobCmd.command("status [name]").description("Show run history for a job (or all jobs)").option("-l, --limit <number>", "Max runs to show (default: 5)").action(async (name, opts) => {
     try {
       const userId = await getCurrentUserId();
-      const { JobRunner: JobRunner2 } = await import("./job-runner-VIGPBGRE.js");
+      const { JobRunner: JobRunner2 } = await import("./job-runner-TEE5OX7H.js");
       const runner = new JobRunner2();
       const runs = await runner.getRunHistory(name, parseInt(opts.limit || "5"));
       if (runs.length === 0) {
@@ -7167,7 +7081,7 @@ Job Run History${name ? ` \u2014 ${name}` : ""}:`));
         process.exit(1);
       }
       const userId = await getCurrentUserId();
-      const { JobRunner: JobRunner2 } = await import("./job-runner-VIGPBGRE.js");
+      const { JobRunner: JobRunner2 } = await import("./job-runner-TEE5OX7H.js");
       const runner = new JobRunner2();
       const job = await runner.loadJob(name);
       if (!job) {

package/dist/{job-runner-VIGPBGRE.js → job-runner-TEE5OX7H.js} RENAMED Viewed

@@ -1,6 +1,6 @@
 import {
   JobRunner
-} from "./chunk-5F4P6MYZ.js";
+} from "./chunk-NA2HXYJ7.js";
 import "./chunk-EPKN2PW5.js";
 export {
   JobRunner

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "assistme",
-  "version": "0.6.6",
+  "version": "0.6.8",
   "description": "AssistMe CLI Agent - AI-powered assistant that controls your real browser",
   "type": "module",
   "main": "dist/index.js",

package/src/agent/self-analyzer.ts CHANGED Viewed

@@ -6,10 +6,6 @@ import {
 } from "@anthropic-ai/claude-agent-sdk";
 import { submitFeedback, FeedbackError } from "edsger-feedback";
 import { log } from "../utils/logger.js";
-import {
-  SelfAnalysisResultSchema,
-  type SelfAnalysisResult,
-} from "../utils/schemas.js";
 import { errorMessage } from "../utils/errors.js";
 import {
   getSessionLogs,
@@ -35,58 +31,11 @@ const SELF_ANALYSIS_OUTPUT_FORMAT: OutputFormat = {
   schema: {
     type: "object",
     properties: {
-      is_perfect: { type: "boolean" },
-      overall_score: { type: "number", minimum: 1, maximum: 10 },
-      task_completion_quality: {
-        type: "object",
-        properties: {
-          score: { type: "number", minimum: 1, maximum: 10 },
-          assessment: { type: "string" },
-        },
-        required: ["score", "assessment"],
-      },
-      improvements: {
-        type: "array",
-        items: {
-          type: "object",
-          properties: {
-            area: { type: "string" },
-            severity: {
-              type: "string",
-              enum: ["critical", "major", "minor", "suggestion"],
-            },
-            description: { type: "string" },
-            suggestion: { type: "string" },
-          },
-          required: ["area", "severity", "description", "suggestion"],
-        },
-      },
-      data_quality: {
-        type: "object",
-        properties: {
-          session_logs_useful: { type: "boolean" },
-          session_logs_gaps: { type: ["string", "null"] },
-          message_events_useful: { type: "boolean" },
-          message_events_gaps: { type: ["string", "null"] },
-          conversation_context_useful: { type: "boolean" },
-          conversation_context_gaps: { type: ["string", "null"] },
-        },
-        required: [
-          "session_logs_useful",
-          "message_events_useful",
-          "conversation_context_useful",
-        ],
-      },
-      summary: { type: "string" },
+      needsImprovement: { type: "boolean" },
+      title: { type: "string" },
+      description: { type: "string" },
     },
-    required: [
-      "is_perfect",
-      "overall_score",
-      "task_completion_quality",
-      "improvements",
-      "data_quality",
-      "summary",
-    ],
+    required: ["needsImprovement", "title", "description"],
   },
 };
@@ -113,17 +62,14 @@ Below you will find:
 - **Tool Failures**: Any tool calls that failed during execution
 ## Instructions
-Analyze all provided data critically. Consider:
-- Are the session logs capturing enough detail for debugging?
-- Do the message events provide sufficient visibility into the agent's decision-making?
-- Is the conversation context giving enough user intent signal?
-- Were tools used efficiently?
-- Could the overall execution flow be improved?
-Set is_perfect to true ONLY if there are genuinely zero improvements to suggest (this should be rare).
-The overall_score should be 1-10 where 10 means absolutely perfect.
-Respond with a JSON object now.`;
+Analyze all provided data critically. Respond with a JSON object containing:
+- "needsImprovement": set to false ONLY if the task was handled perfectly with zero improvements, true otherwise
+- "title": a short summary under 100 chars (empty string if needsImprovement is false)
+- "description": a detailed markdown report (empty string if needsImprovement is false) that includes:
+  - **Summary**: overall assessment of how AssistMe performed
+  - **Task Completion Quality**: score (1-10) and assessment
+  - **Improvements**: numbered list, each with severity (critical/major/minor/suggestion), area, description, and suggestion
+  - **Data Quality Gaps**: any gaps in session logs, message events, or conversation context that limited your analysis`;
 // ── Context Building ────────────────────────────────────────────
@@ -294,35 +240,7 @@ async function buildAnalysisContext(ctx: SelfAnalysisContext): Promise<string> {
 // ── Feedback Submission ─────────────────────────────────────────
-async function submitSelfAnalysisFeedback(analysis: SelfAnalysisResult): Promise<void> {
-  const title = `Self-Analysis: Score ${analysis.overall_score}/10 — ${analysis.improvements.length} improvement(s)`;
-  const improvementDetails = analysis.improvements
-    .map((imp, i) => `${i + 1}. [${imp.severity}] **${imp.area}**: ${imp.description}\n   → ${imp.suggestion}`)
-    .join("\n");
-  const dataQualityNotes = [
-    analysis.data_quality.session_logs_gaps
-      ? `Session logs: ${analysis.data_quality.session_logs_gaps}`
-      : null,
-    analysis.data_quality.message_events_gaps
-      ? `Message events: ${analysis.data_quality.message_events_gaps}`
-      : null,
-    analysis.data_quality.conversation_context_gaps
-      ? `Conversation context: ${analysis.data_quality.conversation_context_gaps}`
-      : null,
-  ]
-    .filter(Boolean)
-    .join("\n");
-  let description = `## Summary\n${analysis.summary}\n\n`;
-  description += `## Task Completion Quality (${analysis.task_completion_quality.score}/10)\n${analysis.task_completion_quality.assessment}\n\n`;
-  description += `## Improvements\n${improvementDetails}\n`;
-  if (dataQualityNotes) {
-    description += `\n## Data Quality Gaps\n${dataQualityNotes}\n`;
-  }
+async function submitSelfAnalysisFeedback(title: string, description: string): Promise<void> {
   // Truncate to fit edsger-feedback's 5000 char limit
   if (description.length > 4900) {
     description = description.slice(0, 4900) + "\n...[truncated]";
@@ -347,16 +265,20 @@ async function submitSelfAnalysisFeedback(analysis: SelfAnalysisResult): Promise
 // ── Query Runner ────────────────────────────────────────────────
-async function runAnalysisQuery(model: string, prompt: string): Promise<unknown> {
+interface AnalysisResult {
+  needsImprovement: boolean;
+  title: string;
+  description: string;
+}
+async function runAnalysisQuery(model: string, prompt: string): Promise<AnalysisResult | null> {
   let structuredOutput: unknown;
-  // Use independent query() instead of session resume to avoid
-  // conflicts with skill evaluation which also resumes the session
   for await (const message of query({
     prompt,
     options: {
       model,
-      maxTurns: 10,
+      maxTurns: 1,
       allowedTools: [],
       effort: "medium",
       outputFormat: SELF_ANALYSIS_OUTPUT_FORMAT,
@@ -371,20 +293,26 @@ async function runAnalysisQuery(model: string, prompt: string): Promise<unknown>
           `Self-analysis cost: $${successMsg.total_cost_usd.toFixed(4)}`
         );
         if (!structuredOutput) {
-          // structured_output can be undefined even on success — log the text result
           log.warn(
             `Self-analysis: success but no structured_output. result text: ${String((successMsg as any).result ?? "").slice(0, 500)}`
           );
         }
       } else {
         log.warn(
-          `Self-analysis: query returned subtype="${resultMsg.subtype}". result: ${String((resultMsg as any).result ?? "").slice(0, 500)}`
+          `Self-analysis: query returned subtype="${resultMsg.subtype}".`
         );
       }
     }
   }
-  return structuredOutput;
+  if (!structuredOutput || typeof structuredOutput !== "object") return null;
+  const output = structuredOutput as Record<string, unknown>;
+  return {
+    needsImprovement: Boolean(output.needsImprovement),
+    title: String(output.title || ""),
+    description: String(output.description || ""),
+  };
 }
 // ── Main Entry Point ────────────────────────────────────────────
@@ -434,7 +362,7 @@ export async function analyzeSelfPostTask(opts: {
       tokenUsage,
     });
-    const prompt = `${SELF_ANALYSIS_PROMPT}\n${analysisContext}\n\nRespond with a JSON object now.`;
+    const prompt = `${SELF_ANALYSIS_PROMPT}\n${analysisContext}`;
     // Race the analysis against a timeout to avoid hanging forever
     const analysisPromise = runAnalysisQuery(model, prompt);
@@ -442,37 +370,20 @@ export async function analyzeSelfPostTask(opts: {
       setTimeout(() => reject(new Error(`Self-analysis timed out after ${SELF_ANALYSIS_TIMEOUT_MS / 1000}s`)), SELF_ANALYSIS_TIMEOUT_MS)
     );
-    const structuredOutput = await Promise.race([analysisPromise, timeoutPromise]);
+    const result = await Promise.race([analysisPromise, timeoutPromise]);
-    // Validate against Zod schema
-    let analysis: SelfAnalysisResult | null = null;
-    if (structuredOutput) {
-      const result = SelfAnalysisResultSchema.safeParse(structuredOutput);
-      if (result.success) {
-        analysis = result.data;
-      } else {
-        log.warn(
-          `Self-analysis: schema validation failed: ${result.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join("; ")}`
-        );
-        log.debug(`Self-analysis: raw output: ${JSON.stringify(structuredOutput).slice(0, 500)}`);
-      }
+    if (!result) {
+      log.warn("Self-analysis: no result from query");
+      return;
     }
-    if (!analysis) {
-      log.warn("Self-analysis: no valid structured output");
+    if (!result.needsImprovement) {
+      log.info("Self-analysis complete: no improvements needed");
       return;
     }
-    log.info(
-      `Self-analysis complete: score=${analysis.overall_score}/10, perfect=${analysis.is_perfect}, improvements=${analysis.improvements.length}`
-    );
-    // Only submit feedback if not perfect
-    if (!analysis.is_perfect && analysis.improvements.length > 0) {
-      await submitSelfAnalysisFeedback(analysis);
-    } else {
-      log.debug("Self-analysis: no improvements to report — skipping feedback");
-    }
+    log.info(`Self-analysis complete: improvements found — ${result.title}`);
+    await submitSelfAnalysisFeedback(result.title, result.description);
   } catch (err) {
     log.warn(`Self-analysis error: ${errorMessage(err)}`);
   }