assistme 0.5.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-KAS2PTOX.js → chunk-ECEOBNDM.js} +38 -0
- package/dist/index.js +430 -22
- package/dist/{job-runner-AT3V6LAQ.js → job-runner-RGP4CLYV.js} +1 -1
- package/package.json +2 -1
- package/src/agent/processor.ts +24 -4
- package/src/agent/self-analyzer.ts +446 -0
- package/src/db/analysis-data.ts +79 -0
- package/src/utils/constants.ts +20 -0
- package/src/utils/schemas.ts +30 -0
- package/tests/agent/processor.test.ts +4 -0
- package/tests/agent/self-analyzer.test.ts +349 -0
|
@@ -234,6 +234,31 @@ var BrowseSkillRowSchema = z.object({
|
|
|
234
234
|
avg_rating: z.number().optional().nullable(),
|
|
235
235
|
rating_count: z.number().optional().default(0)
|
|
236
236
|
});
|
|
237
|
+
var SelfAnalysisResultSchema = z.object({
|
|
238
|
+
is_perfect: z.boolean(),
|
|
239
|
+
overall_score: z.number().min(1).max(10),
|
|
240
|
+
task_completion_quality: z.object({
|
|
241
|
+
score: z.number().min(1).max(10),
|
|
242
|
+
assessment: z.string()
|
|
243
|
+
}),
|
|
244
|
+
improvements: z.array(
|
|
245
|
+
z.object({
|
|
246
|
+
area: z.string(),
|
|
247
|
+
severity: z.enum(["critical", "major", "minor", "suggestion"]),
|
|
248
|
+
description: z.string(),
|
|
249
|
+
suggestion: z.string()
|
|
250
|
+
})
|
|
251
|
+
),
|
|
252
|
+
data_quality: z.object({
|
|
253
|
+
session_logs_useful: z.boolean(),
|
|
254
|
+
session_logs_gaps: z.string().nullable().optional(),
|
|
255
|
+
message_events_useful: z.boolean(),
|
|
256
|
+
message_events_gaps: z.string().nullable().optional(),
|
|
257
|
+
conversation_context_useful: z.boolean(),
|
|
258
|
+
conversation_context_gaps: z.string().nullable().optional()
|
|
259
|
+
}),
|
|
260
|
+
summary: z.string()
|
|
261
|
+
});
|
|
237
262
|
|
|
238
263
|
// src/utils/constants.ts
|
|
239
264
|
var MAX_RESPONSE_CONTENT_LENGTH = 5e4;
|
|
@@ -258,6 +283,12 @@ var MEMORY_DEDUP_SIMILARITY_THRESHOLD = 0.75;
|
|
|
258
283
|
var MEMORY_COMPRESSION_THRESHOLD = 50;
|
|
259
284
|
var MEMORY_COMPRESSION_TARGET = 30;
|
|
260
285
|
var MAX_BUDGET_USD = 2;
|
|
286
|
+
var SELF_ANALYSIS_MAX_SESSION_LOGS = 200;
|
|
287
|
+
var SELF_ANALYSIS_MAX_MESSAGE_EVENTS = 300;
|
|
288
|
+
var SELF_ANALYSIS_MAX_CONVERSATION_MESSAGES = 10;
|
|
289
|
+
var SELF_ANALYSIS_LOG_CONTEXT_CHARS = 2e4;
|
|
290
|
+
var SELF_ANALYSIS_EVENT_CONTEXT_CHARS = 2e4;
|
|
291
|
+
var EDSGER_PRODUCT_SLUG = "assistme";
|
|
261
292
|
var MAX_COMPLETE_TASK_RETRIES = 2;
|
|
262
293
|
|
|
263
294
|
// src/utils/errors.ts
|
|
@@ -470,6 +501,12 @@ export {
|
|
|
470
501
|
MEMORY_COMPRESSION_THRESHOLD,
|
|
471
502
|
MEMORY_COMPRESSION_TARGET,
|
|
472
503
|
MAX_BUDGET_USD,
|
|
504
|
+
SELF_ANALYSIS_MAX_SESSION_LOGS,
|
|
505
|
+
SELF_ANALYSIS_MAX_MESSAGE_EVENTS,
|
|
506
|
+
SELF_ANALYSIS_MAX_CONVERSATION_MESSAGES,
|
|
507
|
+
SELF_ANALYSIS_LOG_CONTEXT_CHARS,
|
|
508
|
+
SELF_ANALYSIS_EVENT_CONTEXT_CHARS,
|
|
509
|
+
EDSGER_PRODUCT_SLUG,
|
|
473
510
|
MAX_COMPLETE_TASK_RETRIES,
|
|
474
511
|
AppError,
|
|
475
512
|
errorMessage,
|
|
@@ -478,5 +515,6 @@ export {
|
|
|
478
515
|
SkillCreateResultSchema,
|
|
479
516
|
SkillDecisionSchema,
|
|
480
517
|
BrowseSkillRowSchema,
|
|
518
|
+
SelfAnalysisResultSchema,
|
|
481
519
|
JobRunner
|
|
482
520
|
};
|
package/dist/index.js
CHANGED
|
@@ -3,6 +3,7 @@ import {
|
|
|
3
3
|
AppError,
|
|
4
4
|
BrowseSkillRowSchema,
|
|
5
5
|
CDP_COMMAND_TIMEOUT_MS,
|
|
6
|
+
EDSGER_PRODUCT_SLUG,
|
|
6
7
|
FRAME_CONTEXTS_MAX_SIZE,
|
|
7
8
|
JobRunner,
|
|
8
9
|
MAX_BUDGET_USD,
|
|
@@ -20,10 +21,16 @@ import {
|
|
|
20
21
|
MEMORY_COMPRESSION_THRESHOLD,
|
|
21
22
|
MEMORY_DEDUP_SIMILARITY_THRESHOLD,
|
|
22
23
|
SCHEDULER_INTERVAL_MS,
|
|
24
|
+
SELF_ANALYSIS_EVENT_CONTEXT_CHARS,
|
|
25
|
+
SELF_ANALYSIS_LOG_CONTEXT_CHARS,
|
|
26
|
+
SELF_ANALYSIS_MAX_CONVERSATION_MESSAGES,
|
|
27
|
+
SELF_ANALYSIS_MAX_MESSAGE_EVENTS,
|
|
28
|
+
SELF_ANALYSIS_MAX_SESSION_LOGS,
|
|
23
29
|
SHELL_MAX_OUTPUT,
|
|
24
30
|
SHELL_TIMEOUT_MS,
|
|
25
31
|
SKILL_DESCRIPTION_BUDGET_CHARS,
|
|
26
32
|
SKILL_VALIDATION_MAX_TURNS,
|
|
33
|
+
SelfAnalysisResultSchema,
|
|
27
34
|
SkillCreateResultSchema,
|
|
28
35
|
SkillDecisionSchema,
|
|
29
36
|
SkillRowSchema,
|
|
@@ -38,7 +45,7 @@ import {
|
|
|
38
45
|
setLogHook,
|
|
39
46
|
setLogLevel,
|
|
40
47
|
writeAuthStore
|
|
41
|
-
} from "./chunk-
|
|
48
|
+
} from "./chunk-ECEOBNDM.js";
|
|
42
49
|
import {
|
|
43
50
|
clearConfig,
|
|
44
51
|
getConfig,
|
|
@@ -2712,7 +2719,7 @@ var SessionManager = class {
|
|
|
2712
2719
|
|
|
2713
2720
|
// src/agent/processor.ts
|
|
2714
2721
|
import {
|
|
2715
|
-
query as
|
|
2722
|
+
query as query3
|
|
2716
2723
|
} from "@anthropic-ai/claude-agent-sdk";
|
|
2717
2724
|
|
|
2718
2725
|
// src/agent/memory.ts
|
|
@@ -2737,10 +2744,10 @@ var MemoryManager = class {
|
|
|
2737
2744
|
/**
|
|
2738
2745
|
* Search memories by query text. Uses ILIKE + tag containment.
|
|
2739
2746
|
*/
|
|
2740
|
-
async search(
|
|
2747
|
+
async search(query4, limit = 10) {
|
|
2741
2748
|
try {
|
|
2742
2749
|
return await callMcpHandler("memory.search", {
|
|
2743
|
-
query:
|
|
2750
|
+
query: query4,
|
|
2744
2751
|
limit
|
|
2745
2752
|
});
|
|
2746
2753
|
} catch (err) {
|
|
@@ -3385,11 +3392,11 @@ _(${skills.length - included} additional skills available \u2014 use skill_searc
|
|
|
3385
3392
|
log.debug(`Invocation log error: ${err}`);
|
|
3386
3393
|
}
|
|
3387
3394
|
}
|
|
3388
|
-
async searchDb(
|
|
3395
|
+
async searchDb(query4, limit = 10) {
|
|
3389
3396
|
if (this.userId) {
|
|
3390
3397
|
try {
|
|
3391
3398
|
const data = await callMcpHandler("skill.search", {
|
|
3392
|
-
query:
|
|
3399
|
+
query: query4,
|
|
3393
3400
|
limit
|
|
3394
3401
|
});
|
|
3395
3402
|
if (data) {
|
|
@@ -3404,7 +3411,7 @@ _(${skills.length - included} additional skills available \u2014 use skill_searc
|
|
|
3404
3411
|
} catch {
|
|
3405
3412
|
}
|
|
3406
3413
|
}
|
|
3407
|
-
const results = this.findRelevant(
|
|
3414
|
+
const results = this.findRelevant(query4, limit);
|
|
3408
3415
|
return results.map((s) => ({
|
|
3409
3416
|
name: s.name,
|
|
3410
3417
|
description: s.description,
|
|
@@ -3784,6 +3791,387 @@ async function executeSkillDecision(decision, skillManager, sessionId, model) {
|
|
|
3784
3791
|
}
|
|
3785
3792
|
}
|
|
3786
3793
|
|
|
3794
|
+
// src/agent/self-analyzer.ts
|
|
3795
|
+
import {
|
|
3796
|
+
query as query2
|
|
3797
|
+
} from "@anthropic-ai/claude-agent-sdk";
|
|
3798
|
+
import { submitFeedback, FeedbackError } from "edsger-feedback";
|
|
3799
|
+
|
|
3800
|
+
// src/db/analysis-data.ts
|
|
3801
|
+
async function getSessionLogs(sessionId, limit = 500) {
|
|
3802
|
+
try {
|
|
3803
|
+
const data = await callMcpHandler("log.list", {
|
|
3804
|
+
session_id: sessionId,
|
|
3805
|
+
limit
|
|
3806
|
+
});
|
|
3807
|
+
return data || [];
|
|
3808
|
+
} catch (err) {
|
|
3809
|
+
log.debug(`Failed to fetch session logs: ${err instanceof Error ? err.message : err}`);
|
|
3810
|
+
return [];
|
|
3811
|
+
}
|
|
3812
|
+
}
|
|
3813
|
+
async function getMessageEvents(messageId, limit = 500) {
|
|
3814
|
+
try {
|
|
3815
|
+
const data = await callMcpHandler("event.list", {
|
|
3816
|
+
message_id: messageId,
|
|
3817
|
+
limit
|
|
3818
|
+
});
|
|
3819
|
+
return data || [];
|
|
3820
|
+
} catch (err) {
|
|
3821
|
+
log.debug(`Failed to fetch message events: ${err instanceof Error ? err.message : err}`);
|
|
3822
|
+
return [];
|
|
3823
|
+
}
|
|
3824
|
+
}
|
|
3825
|
+
async function getConversationMessages(conversationId, limit = 10) {
|
|
3826
|
+
try {
|
|
3827
|
+
const data = await callMcpHandler(
|
|
3828
|
+
"conversation.list_messages",
|
|
3829
|
+
{
|
|
3830
|
+
conversation_id: conversationId,
|
|
3831
|
+
limit
|
|
3832
|
+
}
|
|
3833
|
+
);
|
|
3834
|
+
return data || [];
|
|
3835
|
+
} catch (err) {
|
|
3836
|
+
log.debug(`Failed to fetch conversation messages: ${err instanceof Error ? err.message : err}`);
|
|
3837
|
+
return [];
|
|
3838
|
+
}
|
|
3839
|
+
}
|
|
3840
|
+
|
|
3841
|
+
// src/agent/self-analyzer.ts
|
|
3842
|
+
var SELF_ANALYSIS_OUTPUT_FORMAT = {
|
|
3843
|
+
type: "json_schema",
|
|
3844
|
+
schema: {
|
|
3845
|
+
type: "object",
|
|
3846
|
+
properties: {
|
|
3847
|
+
is_perfect: { type: "boolean" },
|
|
3848
|
+
overall_score: { type: "number" },
|
|
3849
|
+
task_completion_quality: {
|
|
3850
|
+
type: "object",
|
|
3851
|
+
properties: {
|
|
3852
|
+
score: { type: "number" },
|
|
3853
|
+
assessment: { type: "string" }
|
|
3854
|
+
},
|
|
3855
|
+
required: ["score", "assessment"]
|
|
3856
|
+
},
|
|
3857
|
+
improvements: {
|
|
3858
|
+
type: "array",
|
|
3859
|
+
items: {
|
|
3860
|
+
type: "object",
|
|
3861
|
+
properties: {
|
|
3862
|
+
area: { type: "string" },
|
|
3863
|
+
severity: {
|
|
3864
|
+
type: "string",
|
|
3865
|
+
enum: ["critical", "major", "minor", "suggestion"]
|
|
3866
|
+
},
|
|
3867
|
+
description: { type: "string" },
|
|
3868
|
+
suggestion: { type: "string" }
|
|
3869
|
+
},
|
|
3870
|
+
required: ["area", "severity", "description", "suggestion"]
|
|
3871
|
+
}
|
|
3872
|
+
},
|
|
3873
|
+
data_quality: {
|
|
3874
|
+
type: "object",
|
|
3875
|
+
properties: {
|
|
3876
|
+
session_logs_useful: { type: "boolean" },
|
|
3877
|
+
session_logs_gaps: { type: "string" },
|
|
3878
|
+
message_events_useful: { type: "boolean" },
|
|
3879
|
+
message_events_gaps: { type: "string" },
|
|
3880
|
+
conversation_context_useful: { type: "boolean" },
|
|
3881
|
+
conversation_context_gaps: { type: "string" }
|
|
3882
|
+
},
|
|
3883
|
+
required: [
|
|
3884
|
+
"session_logs_useful",
|
|
3885
|
+
"message_events_useful",
|
|
3886
|
+
"conversation_context_useful"
|
|
3887
|
+
]
|
|
3888
|
+
},
|
|
3889
|
+
summary: { type: "string" }
|
|
3890
|
+
},
|
|
3891
|
+
required: [
|
|
3892
|
+
"is_perfect",
|
|
3893
|
+
"overall_score",
|
|
3894
|
+
"task_completion_quality",
|
|
3895
|
+
"improvements",
|
|
3896
|
+
"data_quality",
|
|
3897
|
+
"summary"
|
|
3898
|
+
]
|
|
3899
|
+
}
|
|
3900
|
+
};
|
|
3901
|
+
var SELF_ANALYSIS_PROMPT = `You just completed a task as the AssistMe agent. Now critically analyze AssistMe's own implementation \u2014 NOT the user's task itself, but how well AssistMe (the agent system) performed and whether AssistMe's codebase can be improved.
|
|
3902
|
+
|
|
3903
|
+
## Your Role
|
|
3904
|
+
You are a perfectionist code reviewer analyzing the AssistMe agent system itself. Be critical, thorough, and constructive. Focus on:
|
|
3905
|
+
|
|
3906
|
+
1. **Task Completion Quality**: Did AssistMe handle the task optimally? Were there unnecessary steps, missed edge cases, or suboptimal tool usage?
|
|
3907
|
+
2. **Agent Architecture**: Based on what you observed during execution, are there architectural improvements to AssistMe's code (processor, event hooks, MCP servers, skill system, etc.)?
|
|
3908
|
+
3. **Data & Observability**: Evaluate the quality of the context data provided (session logs, message events, conversation messages). What information is missing that would help diagnose issues or improve the system?
|
|
3909
|
+
4. **Error Handling & Resilience**: Were there any failures or retries? Could the error handling be improved?
|
|
3910
|
+
5. **Performance & Efficiency**: Were there unnecessary API calls, redundant operations, or opportunities to optimize?
|
|
3911
|
+
6. **User Experience**: Could the interaction flow be smoother? Is the feedback to the user adequate?
|
|
3912
|
+
|
|
3913
|
+
## Context Data Provided
|
|
3914
|
+
Below you will find:
|
|
3915
|
+
- **Session Logs**: stdout/stderr/status logs from the agent session
|
|
3916
|
+
- **Message Events**: Real-time events emitted during task execution (tool calls, results, status changes, thinking blocks)
|
|
3917
|
+
- **Conversation Messages**: User interaction history for this conversation
|
|
3918
|
+
- **Tool Call Records**: Summary of all tool calls made during the task
|
|
3919
|
+
- **Tool Failures**: Any tool calls that failed during execution
|
|
3920
|
+
|
|
3921
|
+
## Instructions
|
|
3922
|
+
Analyze all provided data critically. Consider:
|
|
3923
|
+
- Are the session logs capturing enough detail for debugging?
|
|
3924
|
+
- Do the message events provide sufficient visibility into the agent's decision-making?
|
|
3925
|
+
- Is the conversation context giving enough user intent signal?
|
|
3926
|
+
- Were tools used efficiently?
|
|
3927
|
+
- Could the overall execution flow be improved?
|
|
3928
|
+
|
|
3929
|
+
Set is_perfect to true ONLY if there are genuinely zero improvements to suggest (this should be rare).
|
|
3930
|
+
The overall_score should be 1-10 where 10 means absolutely perfect.
|
|
3931
|
+
|
|
3932
|
+
Respond with a JSON object now.`;
|
|
3933
|
+
function truncateToChars(text, maxChars) {
|
|
3934
|
+
if (text.length <= maxChars) return text;
|
|
3935
|
+
return text.slice(0, maxChars) + "\n... [truncated]";
|
|
3936
|
+
}
|
|
3937
|
+
var EVENT_RESULT_SUMMARY_CHARS = 150;
|
|
3938
|
+
function filterAndAggregateEvents(events) {
|
|
3939
|
+
const lines = [];
|
|
3940
|
+
let pendingTextChunks = [];
|
|
3941
|
+
const flushTextDelta = () => {
|
|
3942
|
+
if (pendingTextChunks.length > 0) {
|
|
3943
|
+
const merged = pendingTextChunks.join("");
|
|
3944
|
+
const summary = merged.length > 200 ? merged.slice(0, 200) + "..." : merged;
|
|
3945
|
+
lines.push(`[text_delta x${pendingTextChunks.length}] ${summary}`);
|
|
3946
|
+
pendingTextChunks = [];
|
|
3947
|
+
}
|
|
3948
|
+
};
|
|
3949
|
+
for (const e of events) {
|
|
3950
|
+
switch (e.event_type) {
|
|
3951
|
+
case "text_delta": {
|
|
3952
|
+
const text = e.event_data.text || "";
|
|
3953
|
+
pendingTextChunks.push(text);
|
|
3954
|
+
break;
|
|
3955
|
+
}
|
|
3956
|
+
case "thinking":
|
|
3957
|
+
flushTextDelta();
|
|
3958
|
+
break;
|
|
3959
|
+
case "tool_use_input":
|
|
3960
|
+
flushTextDelta();
|
|
3961
|
+
break;
|
|
3962
|
+
case "tool_result": {
|
|
3963
|
+
flushTextDelta();
|
|
3964
|
+
const name = e.event_data.name || "unknown";
|
|
3965
|
+
const result = (e.event_data.result || "").slice(0, EVENT_RESULT_SUMMARY_CHARS);
|
|
3966
|
+
lines.push(`[tool_result] ${name}: ${result}`);
|
|
3967
|
+
break;
|
|
3968
|
+
}
|
|
3969
|
+
case "tool_failure":
|
|
3970
|
+
case "status_change":
|
|
3971
|
+
case "error": {
|
|
3972
|
+
flushTextDelta();
|
|
3973
|
+
lines.push(`[${e.event_type}] ${JSON.stringify(e.event_data)}`);
|
|
3974
|
+
break;
|
|
3975
|
+
}
|
|
3976
|
+
default: {
|
|
3977
|
+
flushTextDelta();
|
|
3978
|
+
const dataStr = JSON.stringify(e.event_data);
|
|
3979
|
+
lines.push(`[${e.event_type}] ${dataStr.slice(0, 200)}`);
|
|
3980
|
+
break;
|
|
3981
|
+
}
|
|
3982
|
+
}
|
|
3983
|
+
}
|
|
3984
|
+
flushTextDelta();
|
|
3985
|
+
return lines;
|
|
3986
|
+
}
|
|
3987
|
+
async function buildAnalysisContext(ctx) {
|
|
3988
|
+
const [sessionLogs, messageEvents, conversationMessages] = await Promise.all([
|
|
3989
|
+
getSessionLogs(ctx.sessionId, SELF_ANALYSIS_MAX_SESSION_LOGS),
|
|
3990
|
+
getMessageEvents(ctx.taskId, SELF_ANALYSIS_MAX_MESSAGE_EVENTS),
|
|
3991
|
+
getConversationMessages(ctx.conversationId, SELF_ANALYSIS_MAX_CONVERSATION_MESSAGES)
|
|
3992
|
+
]);
|
|
3993
|
+
let context = "";
|
|
3994
|
+
if (sessionLogs.length > 0) {
|
|
3995
|
+
const logText = sessionLogs.map((l) => `[${l.log_type}] ${l.message}`).join("\n");
|
|
3996
|
+
context += `
|
|
3997
|
+
## Session Logs (${sessionLogs.length} entries)
|
|
3998
|
+
`;
|
|
3999
|
+
context += truncateToChars(logText, SELF_ANALYSIS_LOG_CONTEXT_CHARS);
|
|
4000
|
+
} else {
|
|
4001
|
+
context += "\n## Session Logs\n(No session logs available \u2014 this is itself a data gap to note)\n";
|
|
4002
|
+
}
|
|
4003
|
+
if (messageEvents.length > 0) {
|
|
4004
|
+
const filteredLines = filterAndAggregateEvents(messageEvents);
|
|
4005
|
+
const eventText = filteredLines.join("\n");
|
|
4006
|
+
context += `
|
|
4007
|
+
## Message Events (${messageEvents.length} raw \u2192 ${filteredLines.length} aggregated)
|
|
4008
|
+
`;
|
|
4009
|
+
context += truncateToChars(eventText, SELF_ANALYSIS_EVENT_CONTEXT_CHARS);
|
|
4010
|
+
} else {
|
|
4011
|
+
context += "\n## Message Events\n(No message events available \u2014 this is itself a data gap to note)\n";
|
|
4012
|
+
}
|
|
4013
|
+
if (conversationMessages.length > 0) {
|
|
4014
|
+
context += `
|
|
4015
|
+
## Conversation Messages (${conversationMessages.length} entries)
|
|
4016
|
+
`;
|
|
4017
|
+
for (const msg of conversationMessages) {
|
|
4018
|
+
const role = msg.role || "unknown";
|
|
4019
|
+
const content = (msg.content || "").slice(0, 500);
|
|
4020
|
+
const status = msg.status || "";
|
|
4021
|
+
context += `[${role}${status ? ` (${status})` : ""}] ${content}
|
|
4022
|
+
`;
|
|
4023
|
+
}
|
|
4024
|
+
} else {
|
|
4025
|
+
context += "\n## Conversation Messages\n(No conversation messages available)\n";
|
|
4026
|
+
}
|
|
4027
|
+
if (ctx.toolCallRecords.length > 0) {
|
|
4028
|
+
context += `
|
|
4029
|
+
## Tool Call Records (${ctx.toolCallRecords.length} calls)
|
|
4030
|
+
`;
|
|
4031
|
+
for (const tc of ctx.toolCallRecords) {
|
|
4032
|
+
const inputStr = JSON.stringify(tc.input).slice(0, 200);
|
|
4033
|
+
context += `- ${tc.name}: ${inputStr} \u2192 ${tc.result.slice(0, 100)}
|
|
4034
|
+
`;
|
|
4035
|
+
}
|
|
4036
|
+
}
|
|
4037
|
+
if (ctx.toolFailures.length > 0) {
|
|
4038
|
+
context += `
|
|
4039
|
+
## Tool Failures (${ctx.toolFailures.length} failures)
|
|
4040
|
+
`;
|
|
4041
|
+
for (const tf of ctx.toolFailures) {
|
|
4042
|
+
context += `- ${tf.toolName}: ${tf.error}
|
|
4043
|
+
`;
|
|
4044
|
+
}
|
|
4045
|
+
}
|
|
4046
|
+
if (ctx.tokenUsage) {
|
|
4047
|
+
context += `
|
|
4048
|
+
## Token Usage
|
|
4049
|
+
`;
|
|
4050
|
+
context += `Input: ${ctx.tokenUsage.input_tokens}, Output: ${ctx.tokenUsage.output_tokens}
|
|
4051
|
+
`;
|
|
4052
|
+
}
|
|
4053
|
+
context += `
|
|
4054
|
+
## Task
|
|
4055
|
+
`;
|
|
4056
|
+
context += `Prompt: ${ctx.taskPrompt.slice(0, 500)}
|
|
4057
|
+
`;
|
|
4058
|
+
context += `Response length: ${ctx.taskResponse.length} chars
|
|
4059
|
+
`;
|
|
4060
|
+
return context;
|
|
4061
|
+
}
|
|
4062
|
+
async function submitSelfAnalysisFeedback(analysis) {
|
|
4063
|
+
const title = `Self-Analysis: Score ${analysis.overall_score}/10 \u2014 ${analysis.improvements.length} improvement(s)`;
|
|
4064
|
+
const improvementDetails = analysis.improvements.map((imp, i) => `${i + 1}. [${imp.severity}] **${imp.area}**: ${imp.description}
|
|
4065
|
+
\u2192 ${imp.suggestion}`).join("\n");
|
|
4066
|
+
const dataQualityNotes = [
|
|
4067
|
+
analysis.data_quality.session_logs_gaps ? `Session logs: ${analysis.data_quality.session_logs_gaps}` : null,
|
|
4068
|
+
analysis.data_quality.message_events_gaps ? `Message events: ${analysis.data_quality.message_events_gaps}` : null,
|
|
4069
|
+
analysis.data_quality.conversation_context_gaps ? `Conversation context: ${analysis.data_quality.conversation_context_gaps}` : null
|
|
4070
|
+
].filter(Boolean).join("\n");
|
|
4071
|
+
let description = `## Summary
|
|
4072
|
+
${analysis.summary}
|
|
4073
|
+
|
|
4074
|
+
`;
|
|
4075
|
+
description += `## Task Completion Quality (${analysis.task_completion_quality.score}/10)
|
|
4076
|
+
${analysis.task_completion_quality.assessment}
|
|
4077
|
+
|
|
4078
|
+
`;
|
|
4079
|
+
description += `## Improvements
|
|
4080
|
+
${improvementDetails}
|
|
4081
|
+
`;
|
|
4082
|
+
if (dataQualityNotes) {
|
|
4083
|
+
description += `
|
|
4084
|
+
## Data Quality Gaps
|
|
4085
|
+
${dataQualityNotes}
|
|
4086
|
+
`;
|
|
4087
|
+
}
|
|
4088
|
+
if (description.length > 4900) {
|
|
4089
|
+
description = description.slice(0, 4900) + "\n...[truncated]";
|
|
4090
|
+
}
|
|
4091
|
+
try {
|
|
4092
|
+
const result = await submitFeedback({
|
|
4093
|
+
slug: EDSGER_PRODUCT_SLUG,
|
|
4094
|
+
title: title.slice(0, 200),
|
|
4095
|
+
description,
|
|
4096
|
+
category: "improvement"
|
|
4097
|
+
});
|
|
4098
|
+
log.info(`Self-analysis feedback submitted: ${result.id}`);
|
|
4099
|
+
} catch (err) {
|
|
4100
|
+
if (err instanceof FeedbackError) {
|
|
4101
|
+
log.debug(`Feedback submission failed (${err.statusCode}): ${err.message}`);
|
|
4102
|
+
} else {
|
|
4103
|
+
log.debug(`Feedback submission failed: ${errorMessage(err)}`);
|
|
4104
|
+
}
|
|
4105
|
+
}
|
|
4106
|
+
}
|
|
4107
|
+
async function analyzeSelfPostTask(opts) {
|
|
4108
|
+
const {
|
|
4109
|
+
model,
|
|
4110
|
+
taskId,
|
|
4111
|
+
conversationId,
|
|
4112
|
+
taskPrompt,
|
|
4113
|
+
taskResponse,
|
|
4114
|
+
toolCallRecords,
|
|
4115
|
+
toolFailures,
|
|
4116
|
+
tokenUsage,
|
|
4117
|
+
sessionId
|
|
4118
|
+
} = opts;
|
|
4119
|
+
try {
|
|
4120
|
+
log.info(`Self-analysis starting for task ${taskId}`);
|
|
4121
|
+
const analysisContext = await buildAnalysisContext({
|
|
4122
|
+
taskId,
|
|
4123
|
+
conversationId,
|
|
4124
|
+
sessionId,
|
|
4125
|
+
taskPrompt,
|
|
4126
|
+
taskResponse,
|
|
4127
|
+
toolCallRecords,
|
|
4128
|
+
toolFailures,
|
|
4129
|
+
tokenUsage
|
|
4130
|
+
});
|
|
4131
|
+
const prompt = `${SELF_ANALYSIS_PROMPT}
|
|
4132
|
+
${analysisContext}
|
|
4133
|
+
|
|
4134
|
+
Respond with a JSON object now.`;
|
|
4135
|
+
let structuredOutput;
|
|
4136
|
+
for await (const message of query2({
|
|
4137
|
+
prompt,
|
|
4138
|
+
options: {
|
|
4139
|
+
model,
|
|
4140
|
+
maxTurns: 1,
|
|
4141
|
+
allowedTools: [],
|
|
4142
|
+
effort: "low",
|
|
4143
|
+
outputFormat: SELF_ANALYSIS_OUTPUT_FORMAT
|
|
4144
|
+
}
|
|
4145
|
+
})) {
|
|
4146
|
+
if (message.type === "result") {
|
|
4147
|
+
const resultMsg = message;
|
|
4148
|
+
if (resultMsg.subtype === "success") {
|
|
4149
|
+
const successMsg = resultMsg;
|
|
4150
|
+
structuredOutput = successMsg.structured_output;
|
|
4151
|
+
log.debug(
|
|
4152
|
+
`Self-analysis cost: $${successMsg.total_cost_usd.toFixed(4)}`
|
|
4153
|
+
);
|
|
4154
|
+
}
|
|
4155
|
+
}
|
|
4156
|
+
}
|
|
4157
|
+
const analysis = structuredOutput ? safeParse(SelfAnalysisResultSchema, structuredOutput) : null;
|
|
4158
|
+
if (!analysis) {
|
|
4159
|
+
log.debug("Self-analysis: no valid structured output");
|
|
4160
|
+
return;
|
|
4161
|
+
}
|
|
4162
|
+
log.info(
|
|
4163
|
+
`Self-analysis complete: score=${analysis.overall_score}/10, perfect=${analysis.is_perfect}, improvements=${analysis.improvements.length}`
|
|
4164
|
+
);
|
|
4165
|
+
if (!analysis.is_perfect && analysis.improvements.length > 0) {
|
|
4166
|
+
await submitSelfAnalysisFeedback(analysis);
|
|
4167
|
+
} else {
|
|
4168
|
+
log.debug("Self-analysis: no improvements to report \u2014 skipping feedback");
|
|
4169
|
+
}
|
|
4170
|
+
} catch (err) {
|
|
4171
|
+
log.debug(`Self-analysis error: ${errorMessage(err)}`);
|
|
4172
|
+
}
|
|
4173
|
+
}
|
|
4174
|
+
|
|
3787
4175
|
// src/utils/retry.ts
|
|
3788
4176
|
async function withRetry(fn, opts = {}) {
|
|
3789
4177
|
const {
|
|
@@ -6022,7 +6410,7 @@ var TaskProcessor = class {
|
|
|
6022
6410
|
maxBudgetUsd: MAX_BUDGET_USD
|
|
6023
6411
|
};
|
|
6024
6412
|
try {
|
|
6025
|
-
for await (const message of
|
|
6413
|
+
for await (const message of query3({ prompt: task.prompt, options })) {
|
|
6026
6414
|
switch (message.type) {
|
|
6027
6415
|
case "assistant": {
|
|
6028
6416
|
const assistantMsg = message;
|
|
@@ -6097,9 +6485,29 @@ var TaskProcessor = class {
|
|
|
6097
6485
|
);
|
|
6098
6486
|
}
|
|
6099
6487
|
if (agentSessionId) {
|
|
6100
|
-
|
|
6101
|
-
|
|
6102
|
-
|
|
6488
|
+
(async () => {
|
|
6489
|
+
try {
|
|
6490
|
+
await this.evaluateSkillPostTask(agentSessionId, config.model);
|
|
6491
|
+
} catch (err) {
|
|
6492
|
+
log.debug(`Post-task skill evaluation skipped: ${err}`);
|
|
6493
|
+
}
|
|
6494
|
+
try {
|
|
6495
|
+
await analyzeSelfPostTask({
|
|
6496
|
+
model: config.model,
|
|
6497
|
+
taskId: task.id,
|
|
6498
|
+
conversationId: task.conversation_id,
|
|
6499
|
+
taskPrompt: task.prompt,
|
|
6500
|
+
taskResponse: finalResponse,
|
|
6501
|
+
toolCallRecords,
|
|
6502
|
+
toolFailures,
|
|
6503
|
+
tokenUsage,
|
|
6504
|
+
sessionId: this.sessionId || ""
|
|
6505
|
+
});
|
|
6506
|
+
} catch (err) {
|
|
6507
|
+
log.debug(`Post-task self-analysis skipped: ${err}`);
|
|
6508
|
+
}
|
|
6509
|
+
})().catch(() => {
|
|
6510
|
+
});
|
|
6103
6511
|
}
|
|
6104
6512
|
} catch (err) {
|
|
6105
6513
|
const errMsg = errorMessage(err);
|
|
@@ -6487,17 +6895,17 @@ Memories (${memories.length}):`));
|
|
|
6487
6895
|
process.exit(1);
|
|
6488
6896
|
}
|
|
6489
6897
|
});
|
|
6490
|
-
memoryCmd.command("search <query>").description("Search memories").action(async (
|
|
6898
|
+
memoryCmd.command("search <query>").description("Search memories").action(async (query4) => {
|
|
6491
6899
|
try {
|
|
6492
6900
|
await getCurrentUserId();
|
|
6493
6901
|
const mm = new MemoryManager();
|
|
6494
|
-
const results = await mm.search(
|
|
6902
|
+
const results = await mm.search(query4);
|
|
6495
6903
|
if (results.length === 0) {
|
|
6496
|
-
console.log(chalk7.yellow(`No memories matching "${
|
|
6904
|
+
console.log(chalk7.yellow(`No memories matching "${query4}"`));
|
|
6497
6905
|
return;
|
|
6498
6906
|
}
|
|
6499
6907
|
console.log(chalk7.bold(`
|
|
6500
|
-
Search results for "${
|
|
6908
|
+
Search results for "${query4}":`));
|
|
6501
6909
|
for (const m of results) {
|
|
6502
6910
|
console.log(` [${m.category}] ${m.content}`);
|
|
6503
6911
|
}
|
|
@@ -6563,18 +6971,18 @@ function registerSkillCommands(program2) {
|
|
|
6563
6971
|
);
|
|
6564
6972
|
console.log();
|
|
6565
6973
|
});
|
|
6566
|
-
skillCmd.command("search <query>").description("Search skills in your collection and marketplace").action(async (
|
|
6974
|
+
skillCmd.command("search <query>").description("Search skills in your collection and marketplace").action(async (query4) => {
|
|
6567
6975
|
const spinner = ora4("Searching skills...").start();
|
|
6568
6976
|
const sm = await getAuthenticatedSkillManager();
|
|
6569
6977
|
try {
|
|
6570
|
-
const results = await sm.searchDb(
|
|
6978
|
+
const results = await sm.searchDb(query4);
|
|
6571
6979
|
spinner.stop();
|
|
6572
6980
|
if (results.length === 0) {
|
|
6573
|
-
console.log(chalk8.yellow(`No skills found for "${
|
|
6981
|
+
console.log(chalk8.yellow(`No skills found for "${query4}"`));
|
|
6574
6982
|
return;
|
|
6575
6983
|
}
|
|
6576
6984
|
console.log(chalk8.bold(`
|
|
6577
|
-
Skills matching "${
|
|
6985
|
+
Skills matching "${query4}":`));
|
|
6578
6986
|
for (const r of results) {
|
|
6579
6987
|
const emoji = r.emoji ? `${r.emoji} ` : "";
|
|
6580
6988
|
console.log(` ${emoji}${chalk8.cyan(r.name)} [${r.source}]`);
|
|
@@ -6667,7 +7075,7 @@ function registerJobCommands(program2) {
|
|
|
6667
7075
|
jobCmd.command("list").description("List your defined jobs").action(async () => {
|
|
6668
7076
|
try {
|
|
6669
7077
|
const userId = await getCurrentUserId();
|
|
6670
|
-
const { JobRunner: JobRunner2 } = await import("./job-runner-
|
|
7078
|
+
const { JobRunner: JobRunner2 } = await import("./job-runner-RGP4CLYV.js");
|
|
6671
7079
|
const runner = new JobRunner2();
|
|
6672
7080
|
const jobs = await runner.listJobs();
|
|
6673
7081
|
if (jobs.length === 0) {
|
|
@@ -6691,7 +7099,7 @@ function registerJobCommands(program2) {
|
|
|
6691
7099
|
jobCmd.command("status [name]").description("Show run history for a job (or all jobs)").option("-l, --limit <number>", "Max runs to show (default: 5)").action(async (name, opts) => {
|
|
6692
7100
|
try {
|
|
6693
7101
|
const userId = await getCurrentUserId();
|
|
6694
|
-
const { JobRunner: JobRunner2 } = await import("./job-runner-
|
|
7102
|
+
const { JobRunner: JobRunner2 } = await import("./job-runner-RGP4CLYV.js");
|
|
6695
7103
|
const runner = new JobRunner2();
|
|
6696
7104
|
const runs = await runner.getRunHistory(name, parseInt(opts.limit || "5"));
|
|
6697
7105
|
if (runs.length === 0) {
|
|
@@ -6730,7 +7138,7 @@ Job Run History${name ? ` \u2014 ${name}` : ""}:`));
|
|
|
6730
7138
|
process.exit(1);
|
|
6731
7139
|
}
|
|
6732
7140
|
const userId = await getCurrentUserId();
|
|
6733
|
-
const { JobRunner: JobRunner2 } = await import("./job-runner-
|
|
7141
|
+
const { JobRunner: JobRunner2 } = await import("./job-runner-RGP4CLYV.js");
|
|
6734
7142
|
const runner = new JobRunner2();
|
|
6735
7143
|
const job = await runner.loadJob(name);
|
|
6736
7144
|
if (!job) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "assistme",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.1",
|
|
4
4
|
"description": "AssistMe CLI Agent - AI-powered assistant that controls your real browser",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -26,6 +26,7 @@
|
|
|
26
26
|
"conf": "^13.0.1",
|
|
27
27
|
"croner": "^10.0.1",
|
|
28
28
|
"dotenv": "^16.5.0",
|
|
29
|
+
"edsger-feedback": "^0.1.0",
|
|
29
30
|
"glob": "^11.0.1",
|
|
30
31
|
"ora": "^8.2.0",
|
|
31
32
|
"ws": "^8.18.0",
|