npm - assistme - Versions diffs - 0.6.7 → 0.6.8 - Mend

assistme 0.6.7 → 0.6.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/{chunk-5F4P6MYZ.js → chunk-NA2HXYJ7.js} +0 -1
package/dist/index.js +39 -148
package/dist/{job-runner-VIGPBGRE.js → job-runner-TEE5OX7H.js} +1 -1
package/package.json +1 -1
package/src/agent/self-analyzer.ts +40 -162

package/dist/{chunk-5F4P6MYZ.js → chunk-NA2HXYJ7.js} RENAMED Viewed

@@ -517,6 +517,5 @@ export {
   SkillCreateResultSchema,
   SkillDecisionSchema,
   BrowseSkillRowSchema,
-  SelfAnalysisResultSchema,
   JobRunner
 };

package/dist/index.js CHANGED Viewed

@@ -31,7 +31,6 @@ import {
   SHELL_TIMEOUT_MS,
   SKILL_DESCRIPTION_BUDGET_CHARS,
   SKILL_VALIDATION_MAX_TURNS,
-  SelfAnalysisResultSchema,
   SkillCreateResultSchema,
   SkillDecisionSchema,
   SkillRowSchema,
@@ -46,7 +45,7 @@ import {
   setLogHook,
   setLogLevel,
   writeAuthStore
-} from "./chunk-5F4P6MYZ.js";
+} from "./chunk-NA2HXYJ7.js";
 import {
   clearConfig,
   getConfig,
@@ -3845,58 +3844,11 @@ var SELF_ANALYSIS_OUTPUT_FORMAT = {
   schema: {
     type: "object",
     properties: {
-      is_perfect: { type: "boolean" },
-      overall_score: { type: "number", minimum: 1, maximum: 10 },
-      task_completion_quality: {
-        type: "object",
-        properties: {
-          score: { type: "number", minimum: 1, maximum: 10 },
-          assessment: { type: "string" }
-        },
-        required: ["score", "assessment"]
-      },
-      improvements: {
-        type: "array",
-        items: {
-          type: "object",
-          properties: {
-            area: { type: "string" },
-            severity: {
-              type: "string",
-              enum: ["critical", "major", "minor", "suggestion"]
-            },
-            description: { type: "string" },
-            suggestion: { type: "string" }
-          },
-          required: ["area", "severity", "description", "suggestion"]
-        }
-      },
-      data_quality: {
-        type: "object",
-        properties: {
-          session_logs_useful: { type: "boolean" },
-          session_logs_gaps: { type: "string" },
-          message_events_useful: { type: "boolean" },
-          message_events_gaps: { type: "string" },
-          conversation_context_useful: { type: "boolean" },
-          conversation_context_gaps: { type: "string" }
-        },
-        required: [
-          "session_logs_useful",
-          "message_events_useful",
-          "conversation_context_useful"
-        ]
-      },
-      summary: { type: "string" }
+      needsImprovement: { type: "boolean" },
+      title: { type: "string" },
+      description: { type: "string" }
     },
-    required: [
-      "is_perfect",
-      "overall_score",
-      "task_completion_quality",
-      "improvements",
-      "data_quality",
-      "summary"
-    ]
+    required: ["needsImprovement", "title", "description"]
   }
 };
 var SELF_ANALYSIS_PROMPT = `You just completed a task as the AssistMe agent. Now critically analyze AssistMe's own implementation \u2014 NOT the user's task itself, but how well AssistMe (the agent system) performed and whether AssistMe's codebase can be improved.
@@ -3920,17 +3872,14 @@ Below you will find:
 - **Tool Failures**: Any tool calls that failed during execution
 ## Instructions
-Analyze all provided data critically. Consider:
-- Are the session logs capturing enough detail for debugging?
-- Do the message events provide sufficient visibility into the agent's decision-making?
-- Is the conversation context giving enough user intent signal?
-- Were tools used efficiently?
-- Could the overall execution flow be improved?
-Set is_perfect to true ONLY if there are genuinely zero improvements to suggest (this should be rare).
-The overall_score should be 1-10 where 10 means absolutely perfect.
-Respond with a JSON object now.`;
+Analyze all provided data critically. Respond with a JSON object containing:
+- "needsImprovement": set to false ONLY if the task was handled perfectly with zero improvements, true otherwise
+- "title": a short summary under 100 chars (empty string if needsImprovement is false)
+- "description": a detailed markdown report (empty string if needsImprovement is false) that includes:
+  - **Summary**: overall assessment of how AssistMe performed
+  - **Task Completion Quality**: score (1-10) and assessment
+  - **Improvements**: numbered list, each with severity (critical/major/minor/suggestion), area, description, and suggestion
+  - **Data Quality Gaps**: any gaps in session logs, message events, or conversation context that limited your analysis`;
 function truncateToChars(text, maxChars) {
   if (text.length <= maxChars) return text;
   return text.slice(0, maxChars) + "\n... [truncated]";
@@ -4060,32 +4009,7 @@ async function buildAnalysisContext(ctx) {
 `;
   return context;
 }
-async function submitSelfAnalysisFeedback(analysis) {
-  const title = `Self-Analysis: Score ${analysis.overall_score}/10 \u2014 ${analysis.improvements.length} improvement(s)`;
-  const improvementDetails = analysis.improvements.map((imp, i) => `${i + 1}. [${imp.severity}] **${imp.area}**: ${imp.description}
-   \u2192 ${imp.suggestion}`).join("\n");
-  const dataQualityNotes = [
-    analysis.data_quality.session_logs_gaps ? `Session logs: ${analysis.data_quality.session_logs_gaps}` : null,
-    analysis.data_quality.message_events_gaps ? `Message events: ${analysis.data_quality.message_events_gaps}` : null,
-    analysis.data_quality.conversation_context_gaps ? `Conversation context: ${analysis.data_quality.conversation_context_gaps}` : null
-  ].filter(Boolean).join("\n");
-  let description = `## Summary
-${analysis.summary}
-`;
-  description += `## Task Completion Quality (${analysis.task_completion_quality.score}/10)
-${analysis.task_completion_quality.assessment}
-`;
-  description += `## Improvements
-${improvementDetails}
-`;
-  if (dataQualityNotes) {
-    description += `
-## Data Quality Gaps
-${dataQualityNotes}
-`;
-  }
+async function submitSelfAnalysisFeedback(title, description) {
   if (description.length > 4900) {
     description = description.slice(0, 4900) + "\n...[truncated]";
   }
@@ -4106,22 +4030,15 @@ ${dataQualityNotes}
   }
 }
 async function runAnalysisQuery(model, prompt) {
-  const result = await attemptQuery(model, prompt, SELF_ANALYSIS_OUTPUT_FORMAT);
-  if (result) return result;
-  log.info("Self-analysis: retrying without structured output (fallback)");
-  const fallbackResult = await attemptQuery(model, prompt, void 0);
-  return fallbackResult;
-}
-async function attemptQuery(model, prompt, outputFormat) {
   let structuredOutput;
   for await (const message of query2({
     prompt,
     options: {
       model,
-      maxTurns: 10,
+      maxTurns: 1,
       allowedTools: [],
       effort: "medium",
-      ...outputFormat ? { outputFormat } : {}
+      outputFormat: SELF_ANALYSIS_OUTPUT_FORMAT
     }
   })) {
     if (message.type === "result") {
@@ -4133,34 +4050,24 @@ async function attemptQuery(model, prompt, outputFormat) {
           `Self-analysis cost: $${successMsg.total_cost_usd.toFixed(4)}`
         );
         if (!structuredOutput) {
-          const text = String(successMsg.result ?? "");
-          const parsed = tryParseJson(text);
-          if (parsed) {
-            log.info("Self-analysis: parsed JSON from text result");
-            structuredOutput = parsed;
-          } else {
-            log.warn(
-              `Self-analysis: success but no structured_output. result text: ${text.slice(0, 500)}`
-            );
-          }
+          log.warn(
+            `Self-analysis: success but no structured_output. result text: ${String(successMsg.result ?? "").slice(0, 500)}`
+          );
         }
       } else {
         log.warn(
-          `Self-analysis: query returned subtype="${resultMsg.subtype}". result: ${String(resultMsg.result ?? "").slice(0, 500)}`
+          `Self-analysis: query returned subtype="${resultMsg.subtype}".`
         );
       }
     }
   }
-  return structuredOutput;
-}
-function tryParseJson(text) {
-  const jsonMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/) || text.match(/(\{[\s\S]*\})/);
-  if (!jsonMatch) return null;
-  try {
-    return JSON.parse(jsonMatch[1]);
-  } catch {
-    return null;
-  }
+  if (!structuredOutput || typeof structuredOutput !== "object") return null;
+  const output = structuredOutput;
+  return {
+    needsImprovement: Boolean(output.needsImprovement),
+    title: String(output.title || ""),
+    description: String(output.description || "")
+  };
 }
 async function analyzeSelfPostTask(opts) {
   const {
@@ -4187,38 +4094,22 @@ async function analyzeSelfPostTask(opts) {
       tokenUsage
     });
     const prompt = `${SELF_ANALYSIS_PROMPT}
-${analysisContext}
-Respond with a JSON object now.`;
+${analysisContext}`;
     const analysisPromise = runAnalysisQuery(model, prompt);
     const timeoutPromise = new Promise(
       (_, reject) => setTimeout(() => reject(new Error(`Self-analysis timed out after ${SELF_ANALYSIS_TIMEOUT_MS / 1e3}s`)), SELF_ANALYSIS_TIMEOUT_MS)
     );
-    const structuredOutput = await Promise.race([analysisPromise, timeoutPromise]);
-    let analysis = null;
-    if (structuredOutput) {
-      const result = SelfAnalysisResultSchema.safeParse(structuredOutput);
-      if (result.success) {
-        analysis = result.data;
-      } else {
-        log.warn(
-          `Self-analysis: schema validation failed: ${result.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join("; ")}`
-        );
-        log.debug(`Self-analysis: raw output: ${JSON.stringify(structuredOutput).slice(0, 500)}`);
-      }
-    }
-    if (!analysis) {
-      log.warn("Self-analysis: no valid structured output");
+    const result = await Promise.race([analysisPromise, timeoutPromise]);
+    if (!result) {
+      log.warn("Self-analysis: no result from query");
       return;
     }
-    log.info(
-      `Self-analysis complete: score=${analysis.overall_score}/10, perfect=${analysis.is_perfect}, improvements=${analysis.improvements.length}`
-    );
-    if (!analysis.is_perfect && analysis.improvements.length > 0) {
-      await submitSelfAnalysisFeedback(analysis);
-    } else {
-      log.debug("Self-analysis: no improvements to report \u2014 skipping feedback");
+    if (!result.needsImprovement) {
+      log.info("Self-analysis complete: no improvements needed");
+      return;
     }
+    log.info(`Self-analysis complete: improvements found \u2014 ${result.title}`);
+    await submitSelfAnalysisFeedback(result.title, result.description);
   } catch (err) {
     log.warn(`Self-analysis error: ${errorMessage(err)}`);
   }
@@ -7127,7 +7018,7 @@ function registerJobCommands(program2) {
   jobCmd.command("list").description("List your defined jobs").action(async () => {
     try {
       const userId = await getCurrentUserId();
-      const { JobRunner: JobRunner2 } = await import("./job-runner-VIGPBGRE.js");
+      const { JobRunner: JobRunner2 } = await import("./job-runner-TEE5OX7H.js");
       const runner = new JobRunner2();
       const jobs = await runner.listJobs();
       if (jobs.length === 0) {
@@ -7151,7 +7042,7 @@ function registerJobCommands(program2) {
   jobCmd.command("status [name]").description("Show run history for a job (or all jobs)").option("-l, --limit <number>", "Max runs to show (default: 5)").action(async (name, opts) => {
     try {
       const userId = await getCurrentUserId();
-      const { JobRunner: JobRunner2 } = await import("./job-runner-VIGPBGRE.js");
+      const { JobRunner: JobRunner2 } = await import("./job-runner-TEE5OX7H.js");
       const runner = new JobRunner2();
       const runs = await runner.getRunHistory(name, parseInt(opts.limit || "5"));
       if (runs.length === 0) {
@@ -7190,7 +7081,7 @@ Job Run History${name ? ` \u2014 ${name}` : ""}:`));
         process.exit(1);
       }
       const userId = await getCurrentUserId();
-      const { JobRunner: JobRunner2 } = await import("./job-runner-VIGPBGRE.js");
+      const { JobRunner: JobRunner2 } = await import("./job-runner-TEE5OX7H.js");
       const runner = new JobRunner2();
       const job = await runner.loadJob(name);
       if (!job) {

package/dist/{job-runner-VIGPBGRE.js → job-runner-TEE5OX7H.js} RENAMED Viewed

@@ -1,6 +1,6 @@
 import {
   JobRunner
-} from "./chunk-5F4P6MYZ.js";
+} from "./chunk-NA2HXYJ7.js";
 import "./chunk-EPKN2PW5.js";
 export {
   JobRunner

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "assistme",
-  "version": "0.6.7",
+  "version": "0.6.8",
   "description": "AssistMe CLI Agent - AI-powered assistant that controls your real browser",
   "type": "module",
   "main": "dist/index.js",

package/src/agent/self-analyzer.ts CHANGED Viewed

@@ -6,10 +6,6 @@ import {
 } from "@anthropic-ai/claude-agent-sdk";
 import { submitFeedback, FeedbackError } from "edsger-feedback";
 import { log } from "../utils/logger.js";
-import {
-  SelfAnalysisResultSchema,
-  type SelfAnalysisResult,
-} from "../utils/schemas.js";
 import { errorMessage } from "../utils/errors.js";
 import {
   getSessionLogs,
@@ -35,58 +31,11 @@ const SELF_ANALYSIS_OUTPUT_FORMAT: OutputFormat = {
   schema: {
     type: "object",
     properties: {
-      is_perfect: { type: "boolean" },
-      overall_score: { type: "number", minimum: 1, maximum: 10 },
-      task_completion_quality: {
-        type: "object",
-        properties: {
-          score: { type: "number", minimum: 1, maximum: 10 },
-          assessment: { type: "string" },
-        },
-        required: ["score", "assessment"],
-      },
-      improvements: {
-        type: "array",
-        items: {
-          type: "object",
-          properties: {
-            area: { type: "string" },
-            severity: {
-              type: "string",
-              enum: ["critical", "major", "minor", "suggestion"],
-            },
-            description: { type: "string" },
-            suggestion: { type: "string" },
-          },
-          required: ["area", "severity", "description", "suggestion"],
-        },
-      },
-      data_quality: {
-        type: "object",
-        properties: {
-          session_logs_useful: { type: "boolean" },
-          session_logs_gaps: { type: "string" },
-          message_events_useful: { type: "boolean" },
-          message_events_gaps: { type: "string" },
-          conversation_context_useful: { type: "boolean" },
-          conversation_context_gaps: { type: "string" },
-        },
-        required: [
-          "session_logs_useful",
-          "message_events_useful",
-          "conversation_context_useful",
-        ],
-      },
-      summary: { type: "string" },
+      needsImprovement: { type: "boolean" },
+      title: { type: "string" },
+      description: { type: "string" },
     },
-    required: [
-      "is_perfect",
-      "overall_score",
-      "task_completion_quality",
-      "improvements",
-      "data_quality",
-      "summary",
-    ],
+    required: ["needsImprovement", "title", "description"],
   },
 };
@@ -113,17 +62,14 @@ Below you will find:
 - **Tool Failures**: Any tool calls that failed during execution
 ## Instructions
-Analyze all provided data critically. Consider:
-- Are the session logs capturing enough detail for debugging?
-- Do the message events provide sufficient visibility into the agent's decision-making?
-- Is the conversation context giving enough user intent signal?
-- Were tools used efficiently?
-- Could the overall execution flow be improved?
-Set is_perfect to true ONLY if there are genuinely zero improvements to suggest (this should be rare).
-The overall_score should be 1-10 where 10 means absolutely perfect.
-Respond with a JSON object now.`;
+Analyze all provided data critically. Respond with a JSON object containing:
+- "needsImprovement": set to false ONLY if the task was handled perfectly with zero improvements, true otherwise
+- "title": a short summary under 100 chars (empty string if needsImprovement is false)
+- "description": a detailed markdown report (empty string if needsImprovement is false) that includes:
+  - **Summary**: overall assessment of how AssistMe performed
+  - **Task Completion Quality**: score (1-10) and assessment
+  - **Improvements**: numbered list, each with severity (critical/major/minor/suggestion), area, description, and suggestion
+  - **Data Quality Gaps**: any gaps in session logs, message events, or conversation context that limited your analysis`;
 // ── Context Building ────────────────────────────────────────────
@@ -294,35 +240,7 @@ async function buildAnalysisContext(ctx: SelfAnalysisContext): Promise<string> {
 // ── Feedback Submission ─────────────────────────────────────────
-async function submitSelfAnalysisFeedback(analysis: SelfAnalysisResult): Promise<void> {
-  const title = `Self-Analysis: Score ${analysis.overall_score}/10 — ${analysis.improvements.length} improvement(s)`;
-  const improvementDetails = analysis.improvements
-    .map((imp, i) => `${i + 1}. [${imp.severity}] **${imp.area}**: ${imp.description}\n   → ${imp.suggestion}`)
-    .join("\n");
-  const dataQualityNotes = [
-    analysis.data_quality.session_logs_gaps
-      ? `Session logs: ${analysis.data_quality.session_logs_gaps}`
-      : null,
-    analysis.data_quality.message_events_gaps
-      ? `Message events: ${analysis.data_quality.message_events_gaps}`
-      : null,
-    analysis.data_quality.conversation_context_gaps
-      ? `Conversation context: ${analysis.data_quality.conversation_context_gaps}`
-      : null,
-  ]
-    .filter(Boolean)
-    .join("\n");
-  let description = `## Summary\n${analysis.summary}\n\n`;
-  description += `## Task Completion Quality (${analysis.task_completion_quality.score}/10)\n${analysis.task_completion_quality.assessment}\n\n`;
-  description += `## Improvements\n${improvementDetails}\n`;
-  if (dataQualityNotes) {
-    description += `\n## Data Quality Gaps\n${dataQualityNotes}\n`;
-  }
+async function submitSelfAnalysisFeedback(title: string, description: string): Promise<void> {
   // Truncate to fit edsger-feedback's 5000 char limit
   if (description.length > 4900) {
     description = description.slice(0, 4900) + "\n...[truncated]";
@@ -347,34 +265,23 @@ async function submitSelfAnalysisFeedback(analysis: SelfAnalysisResult): Promise
 // ── Query Runner ────────────────────────────────────────────────
-async function runAnalysisQuery(model: string, prompt: string): Promise<unknown> {
-  // First attempt: structured output
-  const result = await attemptQuery(model, prompt, SELF_ANALYSIS_OUTPUT_FORMAT);
-  if (result) return result;
-  // Fallback: no structured output, parse JSON from text response
-  log.info("Self-analysis: retrying without structured output (fallback)");
-  const fallbackResult = await attemptQuery(model, prompt, undefined);
-  return fallbackResult;
+interface AnalysisResult {
+  needsImprovement: boolean;
+  title: string;
+  description: string;
 }
-async function attemptQuery(
-  model: string,
-  prompt: string,
-  outputFormat: OutputFormat | undefined
-): Promise<unknown> {
+async function runAnalysisQuery(model: string, prompt: string): Promise<AnalysisResult | null> {
   let structuredOutput: unknown;
-  // Use independent query() instead of session resume to avoid
-  // conflicts with skill evaluation which also resumes the session
   for await (const message of query({
     prompt,
     options: {
       model,
-      maxTurns: 10,
+      maxTurns: 1,
       allowedTools: [],
       effort: "medium",
-      ...(outputFormat ? { outputFormat } : {}),
+      outputFormat: SELF_ANALYSIS_OUTPUT_FORMAT,
     },
   })) {
     if (message.type === "result") {
@@ -386,38 +293,26 @@ async function attemptQuery(
           `Self-analysis cost: $${successMsg.total_cost_usd.toFixed(4)}`
         );
         if (!structuredOutput) {
-          // Try to parse JSON from text result as fallback
-          const text = String((successMsg as any).result ?? "");
-          const parsed = tryParseJson(text);
-          if (parsed) {
-            log.info("Self-analysis: parsed JSON from text result");
-            structuredOutput = parsed;
-          } else {
-            log.warn(
-              `Self-analysis: success but no structured_output. result text: ${text.slice(0, 500)}`
-            );
-          }
+          log.warn(
+            `Self-analysis: success but no structured_output. result text: ${String((successMsg as any).result ?? "").slice(0, 500)}`
+          );
         }
       } else {
         log.warn(
-          `Self-analysis: query returned subtype="${resultMsg.subtype}". result: ${String((resultMsg as any).result ?? "").slice(0, 500)}`
+          `Self-analysis: query returned subtype="${resultMsg.subtype}".`
         );
       }
     }
   }
-  return structuredOutput;
-}
+  if (!structuredOutput || typeof structuredOutput !== "object") return null;
-function tryParseJson(text: string): unknown {
-  // Extract JSON from text that may contain markdown code fences
-  const jsonMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/) || text.match(/(\{[\s\S]*\})/);
-  if (!jsonMatch) return null;
-  try {
-    return JSON.parse(jsonMatch[1]);
-  } catch {
-    return null;
-  }
+  const output = structuredOutput as Record<string, unknown>;
+  return {
+    needsImprovement: Boolean(output.needsImprovement),
+    title: String(output.title || ""),
+    description: String(output.description || ""),
+  };
 }
 // ── Main Entry Point ────────────────────────────────────────────
@@ -467,7 +362,7 @@ export async function analyzeSelfPostTask(opts: {
       tokenUsage,
     });
-    const prompt = `${SELF_ANALYSIS_PROMPT}\n${analysisContext}\n\nRespond with a JSON object now.`;
+    const prompt = `${SELF_ANALYSIS_PROMPT}\n${analysisContext}`;
     // Race the analysis against a timeout to avoid hanging forever
     const analysisPromise = runAnalysisQuery(model, prompt);
@@ -475,37 +370,20 @@ export async function analyzeSelfPostTask(opts: {
       setTimeout(() => reject(new Error(`Self-analysis timed out after ${SELF_ANALYSIS_TIMEOUT_MS / 1000}s`)), SELF_ANALYSIS_TIMEOUT_MS)
     );
-    const structuredOutput = await Promise.race([analysisPromise, timeoutPromise]);
+    const result = await Promise.race([analysisPromise, timeoutPromise]);
-    // Validate against Zod schema
-    let analysis: SelfAnalysisResult | null = null;
-    if (structuredOutput) {
-      const result = SelfAnalysisResultSchema.safeParse(structuredOutput);
-      if (result.success) {
-        analysis = result.data;
-      } else {
-        log.warn(
-          `Self-analysis: schema validation failed: ${result.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join("; ")}`
-        );
-        log.debug(`Self-analysis: raw output: ${JSON.stringify(structuredOutput).slice(0, 500)}`);
-      }
+    if (!result) {
+      log.warn("Self-analysis: no result from query");
+      return;
     }
-    if (!analysis) {
-      log.warn("Self-analysis: no valid structured output");
+    if (!result.needsImprovement) {
+      log.info("Self-analysis complete: no improvements needed");
       return;
     }
-    log.info(
-      `Self-analysis complete: score=${analysis.overall_score}/10, perfect=${analysis.is_perfect}, improvements=${analysis.improvements.length}`
-    );
-    // Only submit feedback if not perfect
-    if (!analysis.is_perfect && analysis.improvements.length > 0) {
-      await submitSelfAnalysisFeedback(analysis);
-    } else {
-      log.debug("Self-analysis: no improvements to report — skipping feedback");
-    }
+    log.info(`Self-analysis complete: improvements found — ${result.title}`);
+    await submitSelfAnalysisFeedback(result.title, result.description);
   } catch (err) {
     log.warn(`Self-analysis error: ${errorMessage(err)}`);
   }