assistme 0.3.6 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-4YWS463E.js → chunk-4SBIN27G.js} +10 -0
- package/dist/index.js +296 -93
- package/dist/{job-runner-JT3JWZBV.js → job-runner-CJ7HM4GZ.js} +1 -1
- package/package.json +1 -1
- package/src/agent/event-hooks.ts +43 -2
- package/src/agent/memory.ts +124 -0
- package/src/agent/processor.ts +42 -64
- package/src/agent/skill-evaluator.ts +173 -61
- package/src/agent/system-prompt.ts +9 -0
- package/src/db/types.ts +3 -1
- package/src/utils/constants.ts +21 -0
- package/src/utils/schemas.ts +33 -0
- package/{src → tests}/agent/event-hooks.test.ts +121 -33
- package/{src → tests}/agent/mcp-servers.test.ts +43 -29
- package/{src → tests}/agent/memory.test.ts +71 -3
- package/{src → tests}/agent/processor.test.ts +59 -55
- package/{src → tests}/agent/scheduler.test.ts +1 -1
- package/{src → tests}/agent/session.test.ts +20 -10
- package/{src → tests}/agent/skills.test.ts +51 -29
- package/{src → tests}/credentials/credential-store.test.ts +23 -8
- package/{src → tests}/credentials/encryption.test.ts +1 -1
- package/{src → tests}/db/supabase.test.ts +4 -4
- package/{src → tests}/tools/filesystem.test.ts +6 -15
- package/{src → tests}/tools/shell.test.ts +1 -1
- package/{src → tests}/utils/config.test.ts +2 -1
- package/{src → tests}/utils/rate-limiter.test.ts +1 -1
- package/{src → tests}/utils/retry.test.ts +6 -12
- package/tsconfig.json +1 -1
- package/vitest.config.ts +1 -1
|
@@ -232,6 +232,11 @@ var FRAME_CONTEXTS_MAX_SIZE = 500;
|
|
|
232
232
|
var MAX_FILE_SEARCH_RESULTS = 50;
|
|
233
233
|
var MAX_CONTENT_SEARCH_FILES = 200;
|
|
234
234
|
var MAX_CONTENT_SEARCH_RESULTS = 30;
|
|
235
|
+
var SKILL_VALIDATION_MAX_TURNS = 2;
|
|
236
|
+
var MEMORY_DEDUP_SIMILARITY_THRESHOLD = 0.75;
|
|
237
|
+
var MEMORY_COMPRESSION_THRESHOLD = 50;
|
|
238
|
+
var MEMORY_COMPRESSION_TARGET = 30;
|
|
239
|
+
var MAX_BUDGET_USD = 2;
|
|
235
240
|
var MAX_COMPLETE_TASK_RETRIES = 2;
|
|
236
241
|
|
|
237
242
|
// src/utils/errors.ts
|
|
@@ -438,6 +443,11 @@ export {
|
|
|
438
443
|
MAX_FILE_SEARCH_RESULTS,
|
|
439
444
|
MAX_CONTENT_SEARCH_FILES,
|
|
440
445
|
MAX_CONTENT_SEARCH_RESULTS,
|
|
446
|
+
SKILL_VALIDATION_MAX_TURNS,
|
|
447
|
+
MEMORY_DEDUP_SIMILARITY_THRESHOLD,
|
|
448
|
+
MEMORY_COMPRESSION_THRESHOLD,
|
|
449
|
+
MEMORY_COMPRESSION_TARGET,
|
|
450
|
+
MAX_BUDGET_USD,
|
|
441
451
|
MAX_COMPLETE_TASK_RETRIES,
|
|
442
452
|
AppError,
|
|
443
453
|
errorMessage,
|
package/dist/index.js
CHANGED
|
@@ -5,6 +5,7 @@ import {
|
|
|
5
5
|
CDP_COMMAND_TIMEOUT_MS,
|
|
6
6
|
FRAME_CONTEXTS_MAX_SIZE,
|
|
7
7
|
JobRunner,
|
|
8
|
+
MAX_BUDGET_USD,
|
|
8
9
|
MAX_COMPLETE_TASK_RETRIES,
|
|
9
10
|
MAX_CONTENT_SEARCH_FILES,
|
|
10
11
|
MAX_CONTENT_SEARCH_RESULTS,
|
|
@@ -15,10 +16,14 @@ import {
|
|
|
15
16
|
MAX_SKILL_RECORD_RESULT_LENGTH,
|
|
16
17
|
MAX_TOOL_INPUT_LOG_LENGTH,
|
|
17
18
|
MAX_TOOL_RESULT_LENGTH,
|
|
19
|
+
MEMORY_COMPRESSION_TARGET,
|
|
20
|
+
MEMORY_COMPRESSION_THRESHOLD,
|
|
21
|
+
MEMORY_DEDUP_SIMILARITY_THRESHOLD,
|
|
18
22
|
SCHEDULER_INTERVAL_MS,
|
|
19
23
|
SHELL_MAX_OUTPUT,
|
|
20
24
|
SHELL_TIMEOUT_MS,
|
|
21
25
|
SKILL_DESCRIPTION_BUDGET_CHARS,
|
|
26
|
+
SKILL_VALIDATION_MAX_TURNS,
|
|
22
27
|
SkillCreateResultSchema,
|
|
23
28
|
SkillDecisionSchema,
|
|
24
29
|
SkillRowSchema,
|
|
@@ -32,7 +37,7 @@ import {
|
|
|
32
37
|
setCorrelationId,
|
|
33
38
|
setLogLevel,
|
|
34
39
|
writeAuthStore
|
|
35
|
-
} from "./chunk-
|
|
40
|
+
} from "./chunk-4SBIN27G.js";
|
|
36
41
|
import {
|
|
37
42
|
clearConfig,
|
|
38
43
|
getConfig,
|
|
@@ -2807,7 +2812,92 @@ var MemoryManager = class {
|
|
|
2807
2812
|
});
|
|
2808
2813
|
return result.count;
|
|
2809
2814
|
}
|
|
2815
|
+
// ── Compression & Deduplication ──────────────────────────────────
|
|
2816
|
+
/**
|
|
2817
|
+
* Check if memory count exceeds threshold and compress if needed.
|
|
2818
|
+
* Called automatically after task completion.
|
|
2819
|
+
*/
|
|
2820
|
+
async compressIfNeeded() {
|
|
2821
|
+
try {
|
|
2822
|
+
const all = await this.list(void 0, 200);
|
|
2823
|
+
if (all.length < MEMORY_COMPRESSION_THRESHOLD) {
|
|
2824
|
+
return 0;
|
|
2825
|
+
}
|
|
2826
|
+
log.info(`Memory compression triggered: ${all.length} memories (threshold: ${MEMORY_COMPRESSION_THRESHOLD})`);
|
|
2827
|
+
let removed = 0;
|
|
2828
|
+
const now = Date.now();
|
|
2829
|
+
for (const m of all) {
|
|
2830
|
+
if (m.expires_at && new Date(m.expires_at).getTime() < now) {
|
|
2831
|
+
await this.remove(m.id);
|
|
2832
|
+
removed++;
|
|
2833
|
+
}
|
|
2834
|
+
}
|
|
2835
|
+
const remaining = all.filter(
|
|
2836
|
+
(m) => !m.expires_at || new Date(m.expires_at).getTime() >= now
|
|
2837
|
+
);
|
|
2838
|
+
const duplicateIds = this.findDuplicates(remaining);
|
|
2839
|
+
for (const id of duplicateIds) {
|
|
2840
|
+
await this.remove(id);
|
|
2841
|
+
removed++;
|
|
2842
|
+
}
|
|
2843
|
+
const afterDedup = remaining.filter((m) => !duplicateIds.has(m.id));
|
|
2844
|
+
if (afterDedup.length > MEMORY_COMPRESSION_TARGET) {
|
|
2845
|
+
const toRemove = afterDedup.sort((a, b) => {
|
|
2846
|
+
if (a.importance !== b.importance) return a.importance - b.importance;
|
|
2847
|
+
if (a.access_count !== b.access_count) return a.access_count - b.access_count;
|
|
2848
|
+
return new Date(a.created_at).getTime() - new Date(b.created_at).getTime();
|
|
2849
|
+
}).slice(0, afterDedup.length - MEMORY_COMPRESSION_TARGET);
|
|
2850
|
+
for (const m of toRemove) {
|
|
2851
|
+
if (m.category === "instruction" && m.importance >= 8) continue;
|
|
2852
|
+
await this.remove(m.id);
|
|
2853
|
+
removed++;
|
|
2854
|
+
}
|
|
2855
|
+
}
|
|
2856
|
+
if (removed > 0) {
|
|
2857
|
+
log.info(`Memory compression complete: removed ${removed} memories`);
|
|
2858
|
+
}
|
|
2859
|
+
return removed;
|
|
2860
|
+
} catch (err) {
|
|
2861
|
+
log.warn(`Memory compression error: ${err instanceof Error ? err.message : err}`);
|
|
2862
|
+
return 0;
|
|
2863
|
+
}
|
|
2864
|
+
}
|
|
2865
|
+
/**
|
|
2866
|
+
* Find duplicate memories based on content similarity.
|
|
2867
|
+
* Returns the IDs of memories that should be removed (keeps the higher-importance duplicate).
|
|
2868
|
+
*/
|
|
2869
|
+
findDuplicates(memories) {
|
|
2870
|
+
const toRemove = /* @__PURE__ */ new Set();
|
|
2871
|
+
for (let i = 0; i < memories.length; i++) {
|
|
2872
|
+
if (toRemove.has(memories[i].id)) continue;
|
|
2873
|
+
for (let j = i + 1; j < memories.length; j++) {
|
|
2874
|
+
if (toRemove.has(memories[j].id)) continue;
|
|
2875
|
+
if (memories[i].category !== memories[j].category) continue;
|
|
2876
|
+
const similarity = computeWordOverlap(memories[i].content, memories[j].content);
|
|
2877
|
+
if (similarity >= MEMORY_DEDUP_SIMILARITY_THRESHOLD) {
|
|
2878
|
+
if (memories[i].importance > memories[j].importance || memories[i].importance === memories[j].importance && new Date(memories[i].created_at) > new Date(memories[j].created_at)) {
|
|
2879
|
+
toRemove.add(memories[j].id);
|
|
2880
|
+
} else {
|
|
2881
|
+
toRemove.add(memories[i].id);
|
|
2882
|
+
}
|
|
2883
|
+
}
|
|
2884
|
+
}
|
|
2885
|
+
}
|
|
2886
|
+
return toRemove;
|
|
2887
|
+
}
|
|
2810
2888
|
};
|
|
2889
|
+
function computeWordOverlap(a, b) {
|
|
2890
|
+
const wordsA = new Set(a.toLowerCase().split(/\s+/).filter(Boolean));
|
|
2891
|
+
const wordsB = new Set(b.toLowerCase().split(/\s+/).filter(Boolean));
|
|
2892
|
+
if (wordsA.size === 0 && wordsB.size === 0) return 1;
|
|
2893
|
+
if (wordsA.size === 0 || wordsB.size === 0) return 0;
|
|
2894
|
+
let intersection = 0;
|
|
2895
|
+
for (const w of wordsA) {
|
|
2896
|
+
if (wordsB.has(w)) intersection++;
|
|
2897
|
+
}
|
|
2898
|
+
const union = wordsA.size + wordsB.size - intersection;
|
|
2899
|
+
return union === 0 ? 0 : intersection / union;
|
|
2900
|
+
}
|
|
2811
2901
|
|
|
2812
2902
|
// src/agent/skills.ts
|
|
2813
2903
|
import { execSync as execSync2 } from "child_process";
|
|
@@ -3423,6 +3513,36 @@ function preprocessDynamicContext(content, cwd) {
|
|
|
3423
3513
|
import {
|
|
3424
3514
|
query
|
|
3425
3515
|
} from "@anthropic-ai/claude-agent-sdk";
|
|
3516
|
+
var SKILL_DECISION_OUTPUT_FORMAT = {
|
|
3517
|
+
type: "json_schema",
|
|
3518
|
+
schema: {
|
|
3519
|
+
type: "object",
|
|
3520
|
+
properties: {
|
|
3521
|
+
action: { type: "string", enum: ["create", "update", "skip"] },
|
|
3522
|
+
name: { type: "string" },
|
|
3523
|
+
description: { type: "string" },
|
|
3524
|
+
instructions: { type: "string" },
|
|
3525
|
+
emoji: { type: "string" },
|
|
3526
|
+
keywords: { type: "array", items: { type: "string" } },
|
|
3527
|
+
existing_skill_name: { type: "string" },
|
|
3528
|
+
improved_instructions: { type: "string" },
|
|
3529
|
+
improved_description: { type: "string" },
|
|
3530
|
+
reason: { type: "string" }
|
|
3531
|
+
},
|
|
3532
|
+
required: ["action", "reason"]
|
|
3533
|
+
}
|
|
3534
|
+
};
|
|
3535
|
+
var SKILL_VALIDATION_OUTPUT_FORMAT = {
|
|
3536
|
+
type: "json_schema",
|
|
3537
|
+
schema: {
|
|
3538
|
+
type: "object",
|
|
3539
|
+
properties: {
|
|
3540
|
+
valid: { type: "boolean" },
|
|
3541
|
+
improvements: { type: "string" }
|
|
3542
|
+
},
|
|
3543
|
+
required: ["valid"]
|
|
3544
|
+
}
|
|
3545
|
+
};
|
|
3426
3546
|
var SKILL_EVALUATION_PROMPT = `You just completed a task. Now evaluate whether it should be saved as a reusable Agent Skill.
|
|
3427
3547
|
|
|
3428
3548
|
## Agent Skills Format (agentskills.io)
|
|
@@ -3450,6 +3570,22 @@ Respond with ONLY a JSON object (no markdown, no explanation outside the JSON).
|
|
|
3450
3570
|
Always include "reason" explaining your decision.
|
|
3451
3571
|
|
|
3452
3572
|
Use your judgment \u2014 no rigid rules. Consider: Is this repeatable? Can it be generalized? Would it save time next time?`;
|
|
3573
|
+
var SKILL_VALIDATION_PROMPT = `Validate this auto-generated skill before it becomes active.
|
|
3574
|
+
|
|
3575
|
+
Check:
|
|
3576
|
+
1. Are the instructions clear, complete, and actionable?
|
|
3577
|
+
2. Do they use generic placeholders (not hardcoded values)?
|
|
3578
|
+
3. Are error handling steps included?
|
|
3579
|
+
4. Is the description accurate and searchable?
|
|
3580
|
+
5. Would this actually work if followed step-by-step?
|
|
3581
|
+
|
|
3582
|
+
Respond with ONLY a JSON object:
|
|
3583
|
+
- {"valid": true, "improvements": null}
|
|
3584
|
+
- {"valid": false, "improvements": "Specific improvements needed"}
|
|
3585
|
+
- {"valid": true, "improvements": "Optional minor improvements"}
|
|
3586
|
+
|
|
3587
|
+
Skill to validate:
|
|
3588
|
+
`;
|
|
3453
3589
|
async function evaluateAndMaybeCreateSkill(opts) {
|
|
3454
3590
|
const { sessionId, skillManager, model } = opts;
|
|
3455
3591
|
if (!sessionId) {
|
|
@@ -3465,50 +3601,83 @@ ${existingList}
|
|
|
3465
3601
|
|
|
3466
3602
|
Respond with a JSON object now.`;
|
|
3467
3603
|
try {
|
|
3468
|
-
let
|
|
3604
|
+
let structuredOutput;
|
|
3469
3605
|
for await (const message of query({
|
|
3470
3606
|
prompt,
|
|
3471
3607
|
options: {
|
|
3472
3608
|
resume: sessionId,
|
|
3473
3609
|
model,
|
|
3474
3610
|
maxTurns: 1,
|
|
3475
|
-
allowedTools: []
|
|
3611
|
+
allowedTools: [],
|
|
3612
|
+
effort: "low",
|
|
3613
|
+
outputFormat: SKILL_DECISION_OUTPUT_FORMAT
|
|
3476
3614
|
}
|
|
3477
3615
|
})) {
|
|
3478
|
-
if (message.type === "
|
|
3479
|
-
const assistantMsg = message;
|
|
3480
|
-
for (const block of assistantMsg.message.content) {
|
|
3481
|
-
if (block.type === "text") {
|
|
3482
|
-
responseText += block.text;
|
|
3483
|
-
}
|
|
3484
|
-
}
|
|
3485
|
-
} else if (message.type === "result") {
|
|
3616
|
+
if (message.type === "result") {
|
|
3486
3617
|
const resultMsg = message;
|
|
3487
|
-
if (resultMsg.subtype === "success"
|
|
3618
|
+
if (resultMsg.subtype === "success") {
|
|
3619
|
+
const successMsg = resultMsg;
|
|
3620
|
+
structuredOutput = successMsg.structured_output;
|
|
3488
3621
|
log.debug(
|
|
3489
|
-
`Skill evaluation cost: $${
|
|
3622
|
+
`Skill evaluation cost: $${successMsg.total_cost_usd.toFixed(4)}`
|
|
3490
3623
|
);
|
|
3491
3624
|
}
|
|
3492
3625
|
}
|
|
3493
3626
|
}
|
|
3494
|
-
const decision =
|
|
3627
|
+
const decision = structuredOutput ? safeParse(SkillDecisionSchema, structuredOutput) : null;
|
|
3495
3628
|
if (!decision) {
|
|
3496
3629
|
log.debug("Skill evaluation: no valid JSON in response");
|
|
3497
3630
|
return;
|
|
3498
3631
|
}
|
|
3499
|
-
await executeSkillDecision(decision, skillManager);
|
|
3632
|
+
await executeSkillDecision(decision, skillManager, sessionId, model);
|
|
3500
3633
|
} catch (err) {
|
|
3501
3634
|
log.debug(`Skill evaluation error: ${errorMessage(err)}`);
|
|
3502
3635
|
}
|
|
3503
3636
|
}
|
|
3504
|
-
async function
|
|
3637
|
+
async function validateSkill(name, description, instructions, sessionId, model) {
|
|
3638
|
+
try {
|
|
3639
|
+
const skillDoc = `Name: ${name}
|
|
3640
|
+
Description: ${description}
|
|
3641
|
+
|
|
3642
|
+
Instructions:
|
|
3643
|
+
${instructions}`;
|
|
3644
|
+
let structuredOutput;
|
|
3645
|
+
for await (const message of query({
|
|
3646
|
+
prompt: SKILL_VALIDATION_PROMPT + skillDoc,
|
|
3647
|
+
options: {
|
|
3648
|
+
resume: sessionId,
|
|
3649
|
+
model,
|
|
3650
|
+
maxTurns: SKILL_VALIDATION_MAX_TURNS,
|
|
3651
|
+
allowedTools: [],
|
|
3652
|
+
effort: "low",
|
|
3653
|
+
outputFormat: SKILL_VALIDATION_OUTPUT_FORMAT
|
|
3654
|
+
}
|
|
3655
|
+
})) {
|
|
3656
|
+
if (message.type === "result") {
|
|
3657
|
+
const resultMsg = message;
|
|
3658
|
+
if (resultMsg.subtype === "success") {
|
|
3659
|
+
structuredOutput = resultMsg.structured_output;
|
|
3660
|
+
}
|
|
3661
|
+
}
|
|
3662
|
+
}
|
|
3663
|
+
const parsed = structuredOutput;
|
|
3664
|
+
if (parsed) {
|
|
3665
|
+
return { valid: parsed.valid, improvements: parsed.improvements || void 0 };
|
|
3666
|
+
}
|
|
3667
|
+
return { valid: true };
|
|
3668
|
+
} catch (err) {
|
|
3669
|
+
log.debug(`Skill validation error: ${errorMessage(err)}`);
|
|
3670
|
+
return { valid: true };
|
|
3671
|
+
}
|
|
3672
|
+
}
|
|
3673
|
+
async function executeSkillDecision(decision, skillManager, sessionId, model) {
|
|
3505
3674
|
switch (decision.action) {
|
|
3506
3675
|
case "create": {
|
|
3507
3676
|
if (!decision.name || !decision.instructions) {
|
|
3508
3677
|
log.debug("Skill create skipped: missing name or instructions");
|
|
3509
3678
|
return;
|
|
3510
3679
|
}
|
|
3511
|
-
|
|
3680
|
+
const skillName = normalizeSkillName(decision.name);
|
|
3512
3681
|
if (!skillName) {
|
|
3513
3682
|
log.debug(`Skill create skipped: name "${decision.name}" cannot be normalized`);
|
|
3514
3683
|
return;
|
|
@@ -3526,10 +3695,33 @@ async function executeSkillDecision(decision, skillManager) {
|
|
|
3526
3695
|
log.debug(`Skill create skipped: similar skill "${existing.name}" exists`);
|
|
3527
3696
|
return;
|
|
3528
3697
|
}
|
|
3698
|
+
let instructions = decision.instructions;
|
|
3699
|
+
if (sessionId) {
|
|
3700
|
+
log.debug(`Validating skill "${skillName}" before activation...`);
|
|
3701
|
+
const validation = await validateSkill(
|
|
3702
|
+
skillName,
|
|
3703
|
+
decision.description || "",
|
|
3704
|
+
instructions,
|
|
3705
|
+
sessionId,
|
|
3706
|
+
model
|
|
3707
|
+
);
|
|
3708
|
+
if (!validation.valid) {
|
|
3709
|
+
log.info(
|
|
3710
|
+
`Skill "${skillName}" failed validation: ${validation.improvements}. Skipping creation.`
|
|
3711
|
+
);
|
|
3712
|
+
return;
|
|
3713
|
+
}
|
|
3714
|
+
if (validation.improvements) {
|
|
3715
|
+
log.debug(`Skill "${skillName}" validated with suggestions: ${validation.improvements}`);
|
|
3716
|
+
instructions += `
|
|
3717
|
+
|
|
3718
|
+
<!-- Validation notes: ${validation.improvements} -->`;
|
|
3719
|
+
}
|
|
3720
|
+
}
|
|
3529
3721
|
const result = await skillManager.create(
|
|
3530
3722
|
skillName,
|
|
3531
3723
|
decision.description || "",
|
|
3532
|
-
|
|
3724
|
+
instructions,
|
|
3533
3725
|
{
|
|
3534
3726
|
source: "auto_extracted",
|
|
3535
3727
|
emoji: decision.emoji,
|
|
@@ -3540,7 +3732,7 @@ async function executeSkillDecision(decision, skillManager) {
|
|
|
3540
3732
|
await skillManager.syncToAgentSkills(
|
|
3541
3733
|
skillName,
|
|
3542
3734
|
decision.description || "",
|
|
3543
|
-
|
|
3735
|
+
instructions,
|
|
3544
3736
|
"1.0.0",
|
|
3545
3737
|
{
|
|
3546
3738
|
source: "auto_extracted",
|
|
@@ -3549,7 +3741,7 @@ async function executeSkillDecision(decision, skillManager) {
|
|
|
3549
3741
|
sourceSkillId: result.id
|
|
3550
3742
|
}
|
|
3551
3743
|
);
|
|
3552
|
-
log.info(`Auto-created skill "${skillName}": ${decision.reason}`);
|
|
3744
|
+
log.info(`Auto-created skill "${skillName}" (validated): ${decision.reason}`);
|
|
3553
3745
|
}
|
|
3554
3746
|
break;
|
|
3555
3747
|
}
|
|
@@ -3558,6 +3750,21 @@ async function executeSkillDecision(decision, skillManager) {
|
|
|
3558
3750
|
log.debug("Skill update skipped: missing skill name or instructions");
|
|
3559
3751
|
return;
|
|
3560
3752
|
}
|
|
3753
|
+
if (sessionId) {
|
|
3754
|
+
const validation = await validateSkill(
|
|
3755
|
+
decision.existing_skill_name,
|
|
3756
|
+
decision.improved_description || "",
|
|
3757
|
+
decision.improved_instructions,
|
|
3758
|
+
sessionId,
|
|
3759
|
+
model
|
|
3760
|
+
);
|
|
3761
|
+
if (!validation.valid) {
|
|
3762
|
+
log.info(
|
|
3763
|
+
`Skill update for "${decision.existing_skill_name}" failed validation. Skipping.`
|
|
3764
|
+
);
|
|
3765
|
+
return;
|
|
3766
|
+
}
|
|
3767
|
+
}
|
|
3561
3768
|
const updated = skillManager.update(
|
|
3562
3769
|
decision.existing_skill_name,
|
|
3563
3770
|
decision.improved_instructions,
|
|
@@ -3575,32 +3782,6 @@ async function executeSkillDecision(decision, skillManager) {
|
|
|
3575
3782
|
break;
|
|
3576
3783
|
}
|
|
3577
3784
|
}
|
|
3578
|
-
function parseJsonResponse(text) {
|
|
3579
|
-
const trimmed = text.trim();
|
|
3580
|
-
const candidates = [trimmed];
|
|
3581
|
-
const start = trimmed.indexOf("{");
|
|
3582
|
-
if (start !== -1) {
|
|
3583
|
-
let depth = 0;
|
|
3584
|
-
for (let i = start; i < trimmed.length; i++) {
|
|
3585
|
-
if (trimmed[i] === "{") depth++;
|
|
3586
|
-
else if (trimmed[i] === "}") depth--;
|
|
3587
|
-
if (depth === 0) {
|
|
3588
|
-
candidates.push(trimmed.slice(start, i + 1));
|
|
3589
|
-
break;
|
|
3590
|
-
}
|
|
3591
|
-
}
|
|
3592
|
-
}
|
|
3593
|
-
for (const candidate of candidates) {
|
|
3594
|
-
try {
|
|
3595
|
-
const parsed = JSON.parse(candidate);
|
|
3596
|
-
const validated = safeParse(SkillDecisionSchema, parsed);
|
|
3597
|
-
if (validated) return validated;
|
|
3598
|
-
} catch {
|
|
3599
|
-
continue;
|
|
3600
|
-
}
|
|
3601
|
-
}
|
|
3602
|
-
return null;
|
|
3603
|
-
}
|
|
3604
3785
|
|
|
3605
3786
|
// src/utils/retry.ts
|
|
3606
3787
|
async function withRetry(fn, opts = {}) {
|
|
@@ -5450,7 +5631,7 @@ function stripMcpPrefix(toolName) {
|
|
|
5450
5631
|
const match = toolName.match(/^mcp__[^_]+(?:__)?(.+)$/);
|
|
5451
5632
|
return match ? match[1] : toolName;
|
|
5452
5633
|
}
|
|
5453
|
-
function createEventHooks(taskId, toolCallRecords) {
|
|
5634
|
+
function createEventHooks(taskId, toolCallRecords, toolFailures = []) {
|
|
5454
5635
|
const preToolUseHook = async (input) => {
|
|
5455
5636
|
if (input.hook_event_name !== "PreToolUse") return { continue: true };
|
|
5456
5637
|
const preInput = input;
|
|
@@ -5488,9 +5669,30 @@ function createEventHooks(taskId, toolCallRecords) {
|
|
|
5488
5669
|
});
|
|
5489
5670
|
return {};
|
|
5490
5671
|
};
|
|
5672
|
+
const postToolUseFailureHook = async (input) => {
|
|
5673
|
+
if (input.hook_event_name !== "PostToolUseFailure") return {};
|
|
5674
|
+
const failureInput = input;
|
|
5675
|
+
const rawName = failureInput.tool_name;
|
|
5676
|
+
const displayName = stripMcpPrefix(rawName);
|
|
5677
|
+
const errorStr = failureInput.error;
|
|
5678
|
+
toolFailures.push({
|
|
5679
|
+
toolName: displayName,
|
|
5680
|
+
input: failureInput.tool_input || {},
|
|
5681
|
+
error: errorStr.slice(0, 500),
|
|
5682
|
+
timestamp: Date.now()
|
|
5683
|
+
});
|
|
5684
|
+
await emitEvent(taskId, "tool_failure", {
|
|
5685
|
+
name: displayName,
|
|
5686
|
+
error: errorStr.slice(0, 500),
|
|
5687
|
+
failure_count: toolFailures.filter((f) => f.toolName === displayName).length
|
|
5688
|
+
});
|
|
5689
|
+
log.warn(`Tool failure tracked: ${displayName} (total: ${toolFailures.length})`);
|
|
5690
|
+
return {};
|
|
5691
|
+
};
|
|
5491
5692
|
return {
|
|
5492
5693
|
PreToolUse: [{ hooks: [preToolUseHook] }],
|
|
5493
|
-
PostToolUse: [{ hooks: [postToolUseHook] }]
|
|
5694
|
+
PostToolUse: [{ hooks: [postToolUseHook] }],
|
|
5695
|
+
PostToolUseFailure: [{ hooks: [postToolUseFailureHook] }]
|
|
5494
5696
|
};
|
|
5495
5697
|
}
|
|
5496
5698
|
|
|
@@ -5602,7 +5804,16 @@ Workflow for form filling (e.g. "\u6CE8\u518C\u4E00\u4E2A Gmail \u8D26\u53F7"):
|
|
|
5602
5804
|
4. Check the screenshot \u2014 if validation errors appear, re-snapshot and fix
|
|
5603
5805
|
5. When a username/email is taken, append a random 4-digit suffix and retry
|
|
5604
5806
|
|
|
5807
|
+
7. FAILURE RECOVERY \u2014 Strategy Switching:
|
|
5808
|
+
If a tool call fails, do NOT repeat the same call. Reflect on why it failed and switch strategy:
|
|
5809
|
+
- CSS selector fails \u2192 use browser_snapshot refs instead
|
|
5810
|
+
- Direct navigation fails \u2192 search for the page first
|
|
5811
|
+
- API/programmatic approach fails \u2192 use browser UI instead
|
|
5812
|
+
- One data source fails \u2192 try an alternative source
|
|
5813
|
+
- If stuck after 2 failed attempts at the same step, try a fundamentally different approach
|
|
5814
|
+
|
|
5605
5815
|
Guidelines:
|
|
5816
|
+
- SELF-VERIFY before finishing: re-read modified files, take a final screenshot after browser actions, or re-check output to confirm correctness. Never assume success without confirming the end state.
|
|
5606
5817
|
- Always use the real browser for web tasks, never try to fetch URLs programmatically
|
|
5607
5818
|
- ALWAYS use browser_snapshot as your primary way to understand a page \u2014 the ref table gives actionable refs, the screenshot gives visual context
|
|
5608
5819
|
- Use browser_act to batch multiple actions \u2014 fill an entire form in one call instead of individual clicks/types
|
|
@@ -5667,12 +5878,18 @@ var TaskProcessor = class {
|
|
|
5667
5878
|
memoryManager = null;
|
|
5668
5879
|
skillManager;
|
|
5669
5880
|
sessionId = null;
|
|
5881
|
+
userId = null;
|
|
5670
5882
|
/** In-memory conversation history, keyed by conversation_id */
|
|
5671
5883
|
historyCache = /* @__PURE__ */ new Map();
|
|
5672
5884
|
constructor() {
|
|
5673
5885
|
this.skillManager = new SkillManager();
|
|
5674
5886
|
}
|
|
5887
|
+
/** @deprecated Use setUserId() instead */
|
|
5675
5888
|
init(userId) {
|
|
5889
|
+
this.setUserId(userId);
|
|
5890
|
+
}
|
|
5891
|
+
setUserId(userId) {
|
|
5892
|
+
this.userId = userId;
|
|
5676
5893
|
this.memoryManager = new MemoryManager();
|
|
5677
5894
|
this.skillManager.setUserId(userId);
|
|
5678
5895
|
this.skillManager.loadFromDb().catch((err) => {
|
|
@@ -5702,6 +5919,7 @@ var TaskProcessor = class {
|
|
|
5702
5919
|
log.info(`Processing task ${task.id.slice(0, 8)}...`);
|
|
5703
5920
|
let finalResponse = "";
|
|
5704
5921
|
const toolCallRecords = [];
|
|
5922
|
+
const toolFailures = [];
|
|
5705
5923
|
let tokenUsage;
|
|
5706
5924
|
let agentSessionId;
|
|
5707
5925
|
try {
|
|
@@ -5754,18 +5972,15 @@ var TaskProcessor = class {
|
|
|
5754
5972
|
onUserWaitStart: () => taskTimeout.pause(),
|
|
5755
5973
|
onUserWaitEnd: () => taskTimeout.resume()
|
|
5756
5974
|
});
|
|
5757
|
-
const eventHooks = createEventHooks(task.id, toolCallRecords);
|
|
5975
|
+
const eventHooks = createEventHooks(task.id, toolCallRecords, toolFailures);
|
|
5758
5976
|
const allowedTools = [
|
|
5759
|
-
// SDK built-in tools
|
|
5760
5977
|
"Read",
|
|
5761
5978
|
"Write",
|
|
5762
5979
|
"Edit",
|
|
5763
5980
|
"Bash",
|
|
5764
5981
|
"Glob",
|
|
5765
5982
|
"Grep",
|
|
5766
|
-
// Browser MCP tools
|
|
5767
5983
|
...BROWSER_TOOL_NAMES.map((n) => `mcp__assistme-browser__${n}`),
|
|
5768
|
-
// Agent MCP tools (memory, skills)
|
|
5769
5984
|
"mcp__assistme-agent__memory_store",
|
|
5770
5985
|
"mcp__assistme-agent__skill_create",
|
|
5771
5986
|
"mcp__assistme-agent__skill_improve",
|
|
@@ -5776,29 +5991,19 @@ var TaskProcessor = class {
|
|
|
5776
5991
|
"mcp__assistme-agent__skill_browse",
|
|
5777
5992
|
"mcp__assistme-agent__skill_add",
|
|
5778
5993
|
"mcp__assistme-agent__skill_publish",
|
|
5779
|
-
// User interaction
|
|
5780
5994
|
"mcp__assistme-agent__ask_user",
|
|
5781
|
-
// Job automation tools
|
|
5782
5995
|
"mcp__assistme-agent__job_run",
|
|
5783
5996
|
"mcp__assistme-agent__job_schedule",
|
|
5784
5997
|
"mcp__assistme-agent__job_status",
|
|
5785
|
-
// Credential tools (local storage)
|
|
5786
5998
|
"mcp__assistme-agent__credential_get",
|
|
5787
5999
|
"mcp__assistme-agent__credential_set",
|
|
5788
6000
|
"mcp__assistme-agent__credential_list",
|
|
5789
6001
|
"mcp__assistme-agent__credential_remove"
|
|
5790
6002
|
];
|
|
5791
|
-
|
|
5792
|
-
|
|
5793
|
-
|
|
5794
|
-
|
|
5795
|
-
role: "user",
|
|
5796
|
-
content: task.prompt
|
|
5797
|
-
},
|
|
5798
|
-
parent_tool_use_id: null,
|
|
5799
|
-
session_id: ""
|
|
5800
|
-
};
|
|
5801
|
-
}
|
|
6003
|
+
const mcpServers = {
|
|
6004
|
+
"assistme-browser": browserServer,
|
|
6005
|
+
"assistme-agent": agentToolsServer
|
|
6006
|
+
};
|
|
5802
6007
|
const options = {
|
|
5803
6008
|
model: config.model,
|
|
5804
6009
|
systemPrompt,
|
|
@@ -5807,19 +6012,16 @@ var TaskProcessor = class {
|
|
|
5807
6012
|
allowedTools,
|
|
5808
6013
|
permissionMode: "bypassPermissions",
|
|
5809
6014
|
allowDangerouslySkipPermissions: true,
|
|
5810
|
-
mcpServers
|
|
5811
|
-
"assistme-browser": browserServer,
|
|
5812
|
-
"assistme-agent": agentToolsServer
|
|
5813
|
-
},
|
|
6015
|
+
mcpServers,
|
|
5814
6016
|
hooks: eventHooks,
|
|
5815
6017
|
persistSession: true,
|
|
5816
|
-
abortController
|
|
6018
|
+
abortController,
|
|
6019
|
+
thinking: { type: "adaptive" },
|
|
6020
|
+
effort: "high",
|
|
6021
|
+
maxBudgetUsd: MAX_BUDGET_USD
|
|
5817
6022
|
};
|
|
5818
6023
|
try {
|
|
5819
|
-
for await (const message of query2({
|
|
5820
|
-
prompt: promptMessages(),
|
|
5821
|
-
options
|
|
5822
|
-
})) {
|
|
6024
|
+
for await (const message of query2({ prompt: task.prompt, options })) {
|
|
5823
6025
|
switch (message.type) {
|
|
5824
6026
|
case "assistant": {
|
|
5825
6027
|
const assistantMsg = message;
|
|
@@ -5827,16 +6029,11 @@ var TaskProcessor = class {
|
|
|
5827
6029
|
if (block.type === "text") {
|
|
5828
6030
|
finalResponse += block.text;
|
|
5829
6031
|
log.agent(block.text);
|
|
5830
|
-
await emitEvent(task.id, "text_delta", {
|
|
5831
|
-
text: block.text
|
|
5832
|
-
});
|
|
6032
|
+
await emitEvent(task.id, "text_delta", { text: block.text });
|
|
5833
6033
|
} else if (block.type === "thinking" && "thinking" in block) {
|
|
5834
6034
|
const thinkingBlock = block;
|
|
5835
|
-
|
|
5836
|
-
|
|
5837
|
-
await emitEvent(task.id, "thinking", {
|
|
5838
|
-
text: thinkingText
|
|
5839
|
-
});
|
|
6035
|
+
log.debug(`Thinking: ${thinkingBlock.thinking.slice(0, 100)}...`);
|
|
6036
|
+
await emitEvent(task.id, "thinking", { text: thinkingBlock.thinking });
|
|
5840
6037
|
}
|
|
5841
6038
|
}
|
|
5842
6039
|
break;
|
|
@@ -5852,13 +6049,14 @@ var TaskProcessor = class {
|
|
|
5852
6049
|
if (!finalResponse && successMsg.result) {
|
|
5853
6050
|
finalResponse = successMsg.result;
|
|
5854
6051
|
}
|
|
6052
|
+
agentSessionId = successMsg.session_id;
|
|
5855
6053
|
log.info(
|
|
5856
6054
|
`Task cost: $${successMsg.total_cost_usd.toFixed(4)}, turns: ${successMsg.num_turns}`
|
|
5857
6055
|
);
|
|
5858
6056
|
} else {
|
|
5859
|
-
const
|
|
5860
|
-
log.warn(`SDK result: ${
|
|
5861
|
-
for (const err of
|
|
6057
|
+
const errMsg = resultMsg;
|
|
6058
|
+
log.warn(`SDK result: ${errMsg.subtype}`);
|
|
6059
|
+
for (const err of errMsg.errors) {
|
|
5862
6060
|
await emitEvent(task.id, "error", { message: err });
|
|
5863
6061
|
}
|
|
5864
6062
|
}
|
|
@@ -5892,16 +6090,21 @@ var TaskProcessor = class {
|
|
|
5892
6090
|
convHistory.splice(0, convHistory.length - MAX_HISTORY_ENTRIES * 2);
|
|
5893
6091
|
}
|
|
5894
6092
|
this.historyCache.set(task.conversation_id, convHistory);
|
|
6093
|
+
if (this.memoryManager) {
|
|
6094
|
+
this.memoryManager.compressIfNeeded().catch(
|
|
6095
|
+
(err) => log.debug(`Memory compression skipped: ${err}`)
|
|
6096
|
+
);
|
|
6097
|
+
}
|
|
5895
6098
|
if (agentSessionId) {
|
|
5896
6099
|
this.evaluateSkillPostTask(agentSessionId, config.model).catch(
|
|
5897
6100
|
(err) => log.debug(`Post-task skill evaluation skipped: ${err}`)
|
|
5898
6101
|
);
|
|
5899
6102
|
}
|
|
5900
6103
|
} catch (err) {
|
|
5901
|
-
const
|
|
5902
|
-
log.error(`Task failed: ${
|
|
5903
|
-
await failTask(task.id,
|
|
5904
|
-
await emitEvent(task.id, "error", { message:
|
|
6104
|
+
const errMsg = errorMessage(err);
|
|
6105
|
+
log.error(`Task failed: ${errMsg}`);
|
|
6106
|
+
await failTask(task.id, errMsg);
|
|
6107
|
+
await emitEvent(task.id, "error", { message: errMsg });
|
|
5905
6108
|
await emitEvent(task.id, "status_change", { status: "failed" });
|
|
5906
6109
|
} finally {
|
|
5907
6110
|
setCorrelationId(null);
|
|
@@ -6402,7 +6605,7 @@ function registerJobCommands(program2) {
|
|
|
6402
6605
|
jobCmd.command("list").description("List your defined jobs").action(async () => {
|
|
6403
6606
|
try {
|
|
6404
6607
|
const userId = await getCurrentUserId();
|
|
6405
|
-
const { JobRunner: JobRunner2 } = await import("./job-runner-
|
|
6608
|
+
const { JobRunner: JobRunner2 } = await import("./job-runner-CJ7HM4GZ.js");
|
|
6406
6609
|
const runner = new JobRunner2();
|
|
6407
6610
|
const jobs = await runner.listJobs();
|
|
6408
6611
|
if (jobs.length === 0) {
|
|
@@ -6426,7 +6629,7 @@ function registerJobCommands(program2) {
|
|
|
6426
6629
|
jobCmd.command("status [name]").description("Show run history for a job (or all jobs)").option("-l, --limit <number>", "Max runs to show (default: 5)").action(async (name, opts) => {
|
|
6427
6630
|
try {
|
|
6428
6631
|
const userId = await getCurrentUserId();
|
|
6429
|
-
const { JobRunner: JobRunner2 } = await import("./job-runner-
|
|
6632
|
+
const { JobRunner: JobRunner2 } = await import("./job-runner-CJ7HM4GZ.js");
|
|
6430
6633
|
const runner = new JobRunner2();
|
|
6431
6634
|
const runs = await runner.getRunHistory(name, parseInt(opts.limit || "5"));
|
|
6432
6635
|
if (runs.length === 0) {
|
|
@@ -6465,7 +6668,7 @@ Job Run History${name ? ` \u2014 ${name}` : ""}:`));
|
|
|
6465
6668
|
process.exit(1);
|
|
6466
6669
|
}
|
|
6467
6670
|
const userId = await getCurrentUserId();
|
|
6468
|
-
const { JobRunner: JobRunner2 } = await import("./job-runner-
|
|
6671
|
+
const { JobRunner: JobRunner2 } = await import("./job-runner-CJ7HM4GZ.js");
|
|
6469
6672
|
const runner = new JobRunner2();
|
|
6470
6673
|
const job = await runner.loadJob(name);
|
|
6471
6674
|
if (!job) {
|