assistme 0.6.6 → 0.6.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -517,6 +517,5 @@ export {
517
517
  SkillCreateResultSchema,
518
518
  SkillDecisionSchema,
519
519
  BrowseSkillRowSchema,
520
- SelfAnalysisResultSchema,
521
520
  JobRunner
522
521
  };
package/dist/index.js CHANGED
@@ -31,7 +31,6 @@ import {
31
31
  SHELL_TIMEOUT_MS,
32
32
  SKILL_DESCRIPTION_BUDGET_CHARS,
33
33
  SKILL_VALIDATION_MAX_TURNS,
34
- SelfAnalysisResultSchema,
35
34
  SkillCreateResultSchema,
36
35
  SkillDecisionSchema,
37
36
  SkillRowSchema,
@@ -46,7 +45,7 @@ import {
46
45
  setLogHook,
47
46
  setLogLevel,
48
47
  writeAuthStore
49
- } from "./chunk-5F4P6MYZ.js";
48
+ } from "./chunk-NA2HXYJ7.js";
50
49
  import {
51
50
  clearConfig,
52
51
  getConfig,
@@ -3845,58 +3844,11 @@ var SELF_ANALYSIS_OUTPUT_FORMAT = {
3845
3844
  schema: {
3846
3845
  type: "object",
3847
3846
  properties: {
3848
- is_perfect: { type: "boolean" },
3849
- overall_score: { type: "number", minimum: 1, maximum: 10 },
3850
- task_completion_quality: {
3851
- type: "object",
3852
- properties: {
3853
- score: { type: "number", minimum: 1, maximum: 10 },
3854
- assessment: { type: "string" }
3855
- },
3856
- required: ["score", "assessment"]
3857
- },
3858
- improvements: {
3859
- type: "array",
3860
- items: {
3861
- type: "object",
3862
- properties: {
3863
- area: { type: "string" },
3864
- severity: {
3865
- type: "string",
3866
- enum: ["critical", "major", "minor", "suggestion"]
3867
- },
3868
- description: { type: "string" },
3869
- suggestion: { type: "string" }
3870
- },
3871
- required: ["area", "severity", "description", "suggestion"]
3872
- }
3873
- },
3874
- data_quality: {
3875
- type: "object",
3876
- properties: {
3877
- session_logs_useful: { type: "boolean" },
3878
- session_logs_gaps: { type: ["string", "null"] },
3879
- message_events_useful: { type: "boolean" },
3880
- message_events_gaps: { type: ["string", "null"] },
3881
- conversation_context_useful: { type: "boolean" },
3882
- conversation_context_gaps: { type: ["string", "null"] }
3883
- },
3884
- required: [
3885
- "session_logs_useful",
3886
- "message_events_useful",
3887
- "conversation_context_useful"
3888
- ]
3889
- },
3890
- summary: { type: "string" }
3847
+ needsImprovement: { type: "boolean" },
3848
+ title: { type: "string" },
3849
+ description: { type: "string" }
3891
3850
  },
3892
- required: [
3893
- "is_perfect",
3894
- "overall_score",
3895
- "task_completion_quality",
3896
- "improvements",
3897
- "data_quality",
3898
- "summary"
3899
- ]
3851
+ required: ["needsImprovement", "title", "description"]
3900
3852
  }
3901
3853
  };
3902
3854
  var SELF_ANALYSIS_PROMPT = `You just completed a task as the AssistMe agent. Now critically analyze AssistMe's own implementation \u2014 NOT the user's task itself, but how well AssistMe (the agent system) performed and whether AssistMe's codebase can be improved.
@@ -3920,17 +3872,14 @@ Below you will find:
3920
3872
  - **Tool Failures**: Any tool calls that failed during execution
3921
3873
 
3922
3874
  ## Instructions
3923
- Analyze all provided data critically. Consider:
3924
- - Are the session logs capturing enough detail for debugging?
3925
- - Do the message events provide sufficient visibility into the agent's decision-making?
3926
- - Is the conversation context giving enough user intent signal?
3927
- - Were tools used efficiently?
3928
- - Could the overall execution flow be improved?
3929
-
3930
- Set is_perfect to true ONLY if there are genuinely zero improvements to suggest (this should be rare).
3931
- The overall_score should be 1-10 where 10 means absolutely perfect.
3932
-
3933
- Respond with a JSON object now.`;
3875
+ Analyze all provided data critically. Respond with a JSON object containing:
3876
+ - "needsImprovement": set to false ONLY if the task was handled perfectly with zero improvements, true otherwise
3877
+ - "title": a short summary under 100 chars (empty string if needsImprovement is false)
3878
+ - "description": a detailed markdown report (empty string if needsImprovement is false) that includes:
3879
+ - **Summary**: overall assessment of how AssistMe performed
3880
+ - **Task Completion Quality**: score (1-10) and assessment
3881
+ - **Improvements**: numbered list, each with severity (critical/major/minor/suggestion), area, description, and suggestion
3882
+ - **Data Quality Gaps**: any gaps in session logs, message events, or conversation context that limited your analysis`;
3934
3883
  function truncateToChars(text, maxChars) {
3935
3884
  if (text.length <= maxChars) return text;
3936
3885
  return text.slice(0, maxChars) + "\n... [truncated]";
@@ -4060,32 +4009,7 @@ async function buildAnalysisContext(ctx) {
4060
4009
  `;
4061
4010
  return context;
4062
4011
  }
4063
- async function submitSelfAnalysisFeedback(analysis) {
4064
- const title = `Self-Analysis: Score ${analysis.overall_score}/10 \u2014 ${analysis.improvements.length} improvement(s)`;
4065
- const improvementDetails = analysis.improvements.map((imp, i) => `${i + 1}. [${imp.severity}] **${imp.area}**: ${imp.description}
4066
- \u2192 ${imp.suggestion}`).join("\n");
4067
- const dataQualityNotes = [
4068
- analysis.data_quality.session_logs_gaps ? `Session logs: ${analysis.data_quality.session_logs_gaps}` : null,
4069
- analysis.data_quality.message_events_gaps ? `Message events: ${analysis.data_quality.message_events_gaps}` : null,
4070
- analysis.data_quality.conversation_context_gaps ? `Conversation context: ${analysis.data_quality.conversation_context_gaps}` : null
4071
- ].filter(Boolean).join("\n");
4072
- let description = `## Summary
4073
- ${analysis.summary}
4074
-
4075
- `;
4076
- description += `## Task Completion Quality (${analysis.task_completion_quality.score}/10)
4077
- ${analysis.task_completion_quality.assessment}
4078
-
4079
- `;
4080
- description += `## Improvements
4081
- ${improvementDetails}
4082
- `;
4083
- if (dataQualityNotes) {
4084
- description += `
4085
- ## Data Quality Gaps
4086
- ${dataQualityNotes}
4087
- `;
4088
- }
4012
+ async function submitSelfAnalysisFeedback(title, description) {
4089
4013
  if (description.length > 4900) {
4090
4014
  description = description.slice(0, 4900) + "\n...[truncated]";
4091
4015
  }
@@ -4111,7 +4035,7 @@ async function runAnalysisQuery(model, prompt) {
4111
4035
  prompt,
4112
4036
  options: {
4113
4037
  model,
4114
- maxTurns: 10,
4038
+ maxTurns: 1,
4115
4039
  allowedTools: [],
4116
4040
  effort: "medium",
4117
4041
  outputFormat: SELF_ANALYSIS_OUTPUT_FORMAT
@@ -4132,12 +4056,18 @@ async function runAnalysisQuery(model, prompt) {
4132
4056
  }
4133
4057
  } else {
4134
4058
  log.warn(
4135
- `Self-analysis: query returned subtype="${resultMsg.subtype}". result: ${String(resultMsg.result ?? "").slice(0, 500)}`
4059
+ `Self-analysis: query returned subtype="${resultMsg.subtype}".`
4136
4060
  );
4137
4061
  }
4138
4062
  }
4139
4063
  }
4140
- return structuredOutput;
4064
+ if (!structuredOutput || typeof structuredOutput !== "object") return null;
4065
+ const output = structuredOutput;
4066
+ return {
4067
+ needsImprovement: Boolean(output.needsImprovement),
4068
+ title: String(output.title || ""),
4069
+ description: String(output.description || "")
4070
+ };
4141
4071
  }
4142
4072
  async function analyzeSelfPostTask(opts) {
4143
4073
  const {
@@ -4164,38 +4094,22 @@ async function analyzeSelfPostTask(opts) {
4164
4094
  tokenUsage
4165
4095
  });
4166
4096
  const prompt = `${SELF_ANALYSIS_PROMPT}
4167
- ${analysisContext}
4168
-
4169
- Respond with a JSON object now.`;
4097
+ ${analysisContext}`;
4170
4098
  const analysisPromise = runAnalysisQuery(model, prompt);
4171
4099
  const timeoutPromise = new Promise(
4172
4100
  (_, reject) => setTimeout(() => reject(new Error(`Self-analysis timed out after ${SELF_ANALYSIS_TIMEOUT_MS / 1e3}s`)), SELF_ANALYSIS_TIMEOUT_MS)
4173
4101
  );
4174
- const structuredOutput = await Promise.race([analysisPromise, timeoutPromise]);
4175
- let analysis = null;
4176
- if (structuredOutput) {
4177
- const result = SelfAnalysisResultSchema.safeParse(structuredOutput);
4178
- if (result.success) {
4179
- analysis = result.data;
4180
- } else {
4181
- log.warn(
4182
- `Self-analysis: schema validation failed: ${result.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join("; ")}`
4183
- );
4184
- log.debug(`Self-analysis: raw output: ${JSON.stringify(structuredOutput).slice(0, 500)}`);
4185
- }
4186
- }
4187
- if (!analysis) {
4188
- log.warn("Self-analysis: no valid structured output");
4102
+ const result = await Promise.race([analysisPromise, timeoutPromise]);
4103
+ if (!result) {
4104
+ log.warn("Self-analysis: no result from query");
4189
4105
  return;
4190
4106
  }
4191
- log.info(
4192
- `Self-analysis complete: score=${analysis.overall_score}/10, perfect=${analysis.is_perfect}, improvements=${analysis.improvements.length}`
4193
- );
4194
- if (!analysis.is_perfect && analysis.improvements.length > 0) {
4195
- await submitSelfAnalysisFeedback(analysis);
4196
- } else {
4197
- log.debug("Self-analysis: no improvements to report \u2014 skipping feedback");
4107
+ if (!result.needsImprovement) {
4108
+ log.info("Self-analysis complete: no improvements needed");
4109
+ return;
4198
4110
  }
4111
+ log.info(`Self-analysis complete: improvements found \u2014 ${result.title}`);
4112
+ await submitSelfAnalysisFeedback(result.title, result.description);
4199
4113
  } catch (err) {
4200
4114
  log.warn(`Self-analysis error: ${errorMessage(err)}`);
4201
4115
  }
@@ -7104,7 +7018,7 @@ function registerJobCommands(program2) {
7104
7018
  jobCmd.command("list").description("List your defined jobs").action(async () => {
7105
7019
  try {
7106
7020
  const userId = await getCurrentUserId();
7107
- const { JobRunner: JobRunner2 } = await import("./job-runner-VIGPBGRE.js");
7021
+ const { JobRunner: JobRunner2 } = await import("./job-runner-TEE5OX7H.js");
7108
7022
  const runner = new JobRunner2();
7109
7023
  const jobs = await runner.listJobs();
7110
7024
  if (jobs.length === 0) {
@@ -7128,7 +7042,7 @@ function registerJobCommands(program2) {
7128
7042
  jobCmd.command("status [name]").description("Show run history for a job (or all jobs)").option("-l, --limit <number>", "Max runs to show (default: 5)").action(async (name, opts) => {
7129
7043
  try {
7130
7044
  const userId = await getCurrentUserId();
7131
- const { JobRunner: JobRunner2 } = await import("./job-runner-VIGPBGRE.js");
7045
+ const { JobRunner: JobRunner2 } = await import("./job-runner-TEE5OX7H.js");
7132
7046
  const runner = new JobRunner2();
7133
7047
  const runs = await runner.getRunHistory(name, parseInt(opts.limit || "5"));
7134
7048
  if (runs.length === 0) {
@@ -7167,7 +7081,7 @@ Job Run History${name ? ` \u2014 ${name}` : ""}:`));
7167
7081
  process.exit(1);
7168
7082
  }
7169
7083
  const userId = await getCurrentUserId();
7170
- const { JobRunner: JobRunner2 } = await import("./job-runner-VIGPBGRE.js");
7084
+ const { JobRunner: JobRunner2 } = await import("./job-runner-TEE5OX7H.js");
7171
7085
  const runner = new JobRunner2();
7172
7086
  const job = await runner.loadJob(name);
7173
7087
  if (!job) {
@@ -1,6 +1,6 @@
1
1
  import {
2
2
  JobRunner
3
- } from "./chunk-5F4P6MYZ.js";
3
+ } from "./chunk-NA2HXYJ7.js";
4
4
  import "./chunk-EPKN2PW5.js";
5
5
  export {
6
6
  JobRunner
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "assistme",
3
- "version": "0.6.6",
3
+ "version": "0.6.8",
4
4
  "description": "AssistMe CLI Agent - AI-powered assistant that controls your real browser",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -6,10 +6,6 @@ import {
6
6
  } from "@anthropic-ai/claude-agent-sdk";
7
7
  import { submitFeedback, FeedbackError } from "edsger-feedback";
8
8
  import { log } from "../utils/logger.js";
9
- import {
10
- SelfAnalysisResultSchema,
11
- type SelfAnalysisResult,
12
- } from "../utils/schemas.js";
13
9
  import { errorMessage } from "../utils/errors.js";
14
10
  import {
15
11
  getSessionLogs,
@@ -35,58 +31,11 @@ const SELF_ANALYSIS_OUTPUT_FORMAT: OutputFormat = {
35
31
  schema: {
36
32
  type: "object",
37
33
  properties: {
38
- is_perfect: { type: "boolean" },
39
- overall_score: { type: "number", minimum: 1, maximum: 10 },
40
- task_completion_quality: {
41
- type: "object",
42
- properties: {
43
- score: { type: "number", minimum: 1, maximum: 10 },
44
- assessment: { type: "string" },
45
- },
46
- required: ["score", "assessment"],
47
- },
48
- improvements: {
49
- type: "array",
50
- items: {
51
- type: "object",
52
- properties: {
53
- area: { type: "string" },
54
- severity: {
55
- type: "string",
56
- enum: ["critical", "major", "minor", "suggestion"],
57
- },
58
- description: { type: "string" },
59
- suggestion: { type: "string" },
60
- },
61
- required: ["area", "severity", "description", "suggestion"],
62
- },
63
- },
64
- data_quality: {
65
- type: "object",
66
- properties: {
67
- session_logs_useful: { type: "boolean" },
68
- session_logs_gaps: { type: ["string", "null"] },
69
- message_events_useful: { type: "boolean" },
70
- message_events_gaps: { type: ["string", "null"] },
71
- conversation_context_useful: { type: "boolean" },
72
- conversation_context_gaps: { type: ["string", "null"] },
73
- },
74
- required: [
75
- "session_logs_useful",
76
- "message_events_useful",
77
- "conversation_context_useful",
78
- ],
79
- },
80
- summary: { type: "string" },
34
+ needsImprovement: { type: "boolean" },
35
+ title: { type: "string" },
36
+ description: { type: "string" },
81
37
  },
82
- required: [
83
- "is_perfect",
84
- "overall_score",
85
- "task_completion_quality",
86
- "improvements",
87
- "data_quality",
88
- "summary",
89
- ],
38
+ required: ["needsImprovement", "title", "description"],
90
39
  },
91
40
  };
92
41
 
@@ -113,17 +62,14 @@ Below you will find:
113
62
  - **Tool Failures**: Any tool calls that failed during execution
114
63
 
115
64
  ## Instructions
116
- Analyze all provided data critically. Consider:
117
- - Are the session logs capturing enough detail for debugging?
118
- - Do the message events provide sufficient visibility into the agent's decision-making?
119
- - Is the conversation context giving enough user intent signal?
120
- - Were tools used efficiently?
121
- - Could the overall execution flow be improved?
122
-
123
- Set is_perfect to true ONLY if there are genuinely zero improvements to suggest (this should be rare).
124
- The overall_score should be 1-10 where 10 means absolutely perfect.
125
-
126
- Respond with a JSON object now.`;
65
+ Analyze all provided data critically. Respond with a JSON object containing:
66
+ - "needsImprovement": set to false ONLY if the task was handled perfectly with zero improvements, true otherwise
67
+ - "title": a short summary under 100 chars (empty string if needsImprovement is false)
68
+ - "description": a detailed markdown report (empty string if needsImprovement is false) that includes:
69
+ - **Summary**: overall assessment of how AssistMe performed
70
+ - **Task Completion Quality**: score (1-10) and assessment
71
+ - **Improvements**: numbered list, each with severity (critical/major/minor/suggestion), area, description, and suggestion
72
+ - **Data Quality Gaps**: any gaps in session logs, message events, or conversation context that limited your analysis`;
127
73
 
128
74
  // ── Context Building ────────────────────────────────────────────
129
75
 
@@ -294,35 +240,7 @@ async function buildAnalysisContext(ctx: SelfAnalysisContext): Promise<string> {
294
240
 
295
241
  // ── Feedback Submission ─────────────────────────────────────────
296
242
 
297
- async function submitSelfAnalysisFeedback(analysis: SelfAnalysisResult): Promise<void> {
298
- const title = `Self-Analysis: Score ${analysis.overall_score}/10 — ${analysis.improvements.length} improvement(s)`;
299
-
300
- const improvementDetails = analysis.improvements
301
- .map((imp, i) => `${i + 1}. [${imp.severity}] **${imp.area}**: ${imp.description}\n → ${imp.suggestion}`)
302
- .join("\n");
303
-
304
- const dataQualityNotes = [
305
- analysis.data_quality.session_logs_gaps
306
- ? `Session logs: ${analysis.data_quality.session_logs_gaps}`
307
- : null,
308
- analysis.data_quality.message_events_gaps
309
- ? `Message events: ${analysis.data_quality.message_events_gaps}`
310
- : null,
311
- analysis.data_quality.conversation_context_gaps
312
- ? `Conversation context: ${analysis.data_quality.conversation_context_gaps}`
313
- : null,
314
- ]
315
- .filter(Boolean)
316
- .join("\n");
317
-
318
- let description = `## Summary\n${analysis.summary}\n\n`;
319
- description += `## Task Completion Quality (${analysis.task_completion_quality.score}/10)\n${analysis.task_completion_quality.assessment}\n\n`;
320
- description += `## Improvements\n${improvementDetails}\n`;
321
-
322
- if (dataQualityNotes) {
323
- description += `\n## Data Quality Gaps\n${dataQualityNotes}\n`;
324
- }
325
-
243
+ async function submitSelfAnalysisFeedback(title: string, description: string): Promise<void> {
326
244
  // Truncate to fit edsger-feedback's 5000 char limit
327
245
  if (description.length > 4900) {
328
246
  description = description.slice(0, 4900) + "\n...[truncated]";
@@ -347,16 +265,20 @@ async function submitSelfAnalysisFeedback(analysis: SelfAnalysisResult): Promise
347
265
 
348
266
  // ── Query Runner ────────────────────────────────────────────────
349
267
 
350
- async function runAnalysisQuery(model: string, prompt: string): Promise<unknown> {
268
+ interface AnalysisResult {
269
+ needsImprovement: boolean;
270
+ title: string;
271
+ description: string;
272
+ }
273
+
274
+ async function runAnalysisQuery(model: string, prompt: string): Promise<AnalysisResult | null> {
351
275
  let structuredOutput: unknown;
352
276
 
353
- // Use independent query() instead of session resume to avoid
354
- // conflicts with skill evaluation which also resumes the session
355
277
  for await (const message of query({
356
278
  prompt,
357
279
  options: {
358
280
  model,
359
- maxTurns: 10,
281
+ maxTurns: 1,
360
282
  allowedTools: [],
361
283
  effort: "medium",
362
284
  outputFormat: SELF_ANALYSIS_OUTPUT_FORMAT,
@@ -371,20 +293,26 @@ async function runAnalysisQuery(model: string, prompt: string): Promise<unknown>
371
293
  `Self-analysis cost: $${successMsg.total_cost_usd.toFixed(4)}`
372
294
  );
373
295
  if (!structuredOutput) {
374
- // structured_output can be undefined even on success — log the text result
375
296
  log.warn(
376
297
  `Self-analysis: success but no structured_output. result text: ${String((successMsg as any).result ?? "").slice(0, 500)}`
377
298
  );
378
299
  }
379
300
  } else {
380
301
  log.warn(
381
- `Self-analysis: query returned subtype="${resultMsg.subtype}". result: ${String((resultMsg as any).result ?? "").slice(0, 500)}`
302
+ `Self-analysis: query returned subtype="${resultMsg.subtype}".`
382
303
  );
383
304
  }
384
305
  }
385
306
  }
386
307
 
387
- return structuredOutput;
308
+ if (!structuredOutput || typeof structuredOutput !== "object") return null;
309
+
310
+ const output = structuredOutput as Record<string, unknown>;
311
+ return {
312
+ needsImprovement: Boolean(output.needsImprovement),
313
+ title: String(output.title || ""),
314
+ description: String(output.description || ""),
315
+ };
388
316
  }
389
317
 
390
318
  // ── Main Entry Point ────────────────────────────────────────────
@@ -434,7 +362,7 @@ export async function analyzeSelfPostTask(opts: {
434
362
  tokenUsage,
435
363
  });
436
364
 
437
- const prompt = `${SELF_ANALYSIS_PROMPT}\n${analysisContext}\n\nRespond with a JSON object now.`;
365
+ const prompt = `${SELF_ANALYSIS_PROMPT}\n${analysisContext}`;
438
366
 
439
367
  // Race the analysis against a timeout to avoid hanging forever
440
368
  const analysisPromise = runAnalysisQuery(model, prompt);
@@ -442,37 +370,20 @@ export async function analyzeSelfPostTask(opts: {
442
370
  setTimeout(() => reject(new Error(`Self-analysis timed out after ${SELF_ANALYSIS_TIMEOUT_MS / 1000}s`)), SELF_ANALYSIS_TIMEOUT_MS)
443
371
  );
444
372
 
445
- const structuredOutput = await Promise.race([analysisPromise, timeoutPromise]);
373
+ const result = await Promise.race([analysisPromise, timeoutPromise]);
446
374
 
447
- // Validate against Zod schema
448
- let analysis: SelfAnalysisResult | null = null;
449
- if (structuredOutput) {
450
- const result = SelfAnalysisResultSchema.safeParse(structuredOutput);
451
- if (result.success) {
452
- analysis = result.data;
453
- } else {
454
- log.warn(
455
- `Self-analysis: schema validation failed: ${result.error.issues.map((i) => `${i.path.join(".")}: ${i.message}`).join("; ")}`
456
- );
457
- log.debug(`Self-analysis: raw output: ${JSON.stringify(structuredOutput).slice(0, 500)}`);
458
- }
375
+ if (!result) {
376
+ log.warn("Self-analysis: no result from query");
377
+ return;
459
378
  }
460
379
 
461
- if (!analysis) {
462
- log.warn("Self-analysis: no valid structured output");
380
+ if (!result.needsImprovement) {
381
+ log.info("Self-analysis complete: no improvements needed");
463
382
  return;
464
383
  }
465
384
 
466
- log.info(
467
- `Self-analysis complete: score=${analysis.overall_score}/10, perfect=${analysis.is_perfect}, improvements=${analysis.improvements.length}`
468
- );
469
-
470
- // Only submit feedback if not perfect
471
- if (!analysis.is_perfect && analysis.improvements.length > 0) {
472
- await submitSelfAnalysisFeedback(analysis);
473
- } else {
474
- log.debug("Self-analysis: no improvements to report — skipping feedback");
475
- }
385
+ log.info(`Self-analysis complete: improvements found — ${result.title}`);
386
+ await submitSelfAnalysisFeedback(result.title, result.description);
476
387
  } catch (err) {
477
388
  log.warn(`Self-analysis error: ${errorMessage(err)}`);
478
389
  }