assistme 0.3.6 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/dist/{chunk-4YWS463E.js → chunk-KAS2PTOX.js} +32 -0
  2. package/dist/index.js +358 -93
  3. package/dist/{job-runner-JT3JWZBV.js → job-runner-AT3V6LAQ.js} +1 -1
  4. package/package.json +1 -1
  5. package/src/agent/event-hooks.ts +43 -2
  6. package/src/agent/memory.ts +124 -0
  7. package/src/agent/processor.ts +42 -64
  8. package/src/agent/skill-evaluator.ts +173 -61
  9. package/src/agent/system-prompt.ts +9 -0
  10. package/src/commands/start.ts +15 -1
  11. package/src/db/session-log.ts +71 -0
  12. package/src/db/types.ts +3 -1
  13. package/src/utils/constants.ts +21 -0
  14. package/src/utils/logger.ts +28 -0
  15. package/src/utils/schemas.ts +33 -0
  16. package/{src → tests}/agent/event-hooks.test.ts +121 -33
  17. package/{src → tests}/agent/mcp-servers.test.ts +43 -29
  18. package/{src → tests}/agent/memory.test.ts +71 -3
  19. package/{src → tests}/agent/processor.test.ts +59 -55
  20. package/{src → tests}/agent/scheduler.test.ts +1 -1
  21. package/{src → tests}/agent/session.test.ts +20 -10
  22. package/{src → tests}/agent/skills.test.ts +51 -29
  23. package/{src → tests}/credentials/credential-store.test.ts +23 -8
  24. package/{src → tests}/credentials/encryption.test.ts +1 -1
  25. package/{src → tests}/db/supabase.test.ts +4 -4
  26. package/{src → tests}/tools/filesystem.test.ts +6 -15
  27. package/{src → tests}/tools/shell.test.ts +1 -1
  28. package/{src → tests}/utils/config.test.ts +2 -1
  29. package/{src → tests}/utils/rate-limiter.test.ts +1 -1
  30. package/{src → tests}/utils/retry.test.ts +6 -12
  31. package/tsconfig.json +1 -1
  32. package/vitest.config.ts +1 -1
package/dist/index.js CHANGED
@@ -5,6 +5,7 @@ import {
5
5
  CDP_COMMAND_TIMEOUT_MS,
6
6
  FRAME_CONTEXTS_MAX_SIZE,
7
7
  JobRunner,
8
+ MAX_BUDGET_USD,
8
9
  MAX_COMPLETE_TASK_RETRIES,
9
10
  MAX_CONTENT_SEARCH_FILES,
10
11
  MAX_CONTENT_SEARCH_RESULTS,
@@ -15,10 +16,14 @@ import {
15
16
  MAX_SKILL_RECORD_RESULT_LENGTH,
16
17
  MAX_TOOL_INPUT_LOG_LENGTH,
17
18
  MAX_TOOL_RESULT_LENGTH,
19
+ MEMORY_COMPRESSION_TARGET,
20
+ MEMORY_COMPRESSION_THRESHOLD,
21
+ MEMORY_DEDUP_SIMILARITY_THRESHOLD,
18
22
  SCHEDULER_INTERVAL_MS,
19
23
  SHELL_MAX_OUTPUT,
20
24
  SHELL_TIMEOUT_MS,
21
25
  SKILL_DESCRIPTION_BUDGET_CHARS,
26
+ SKILL_VALIDATION_MAX_TURNS,
22
27
  SkillCreateResultSchema,
23
28
  SkillDecisionSchema,
24
29
  SkillRowSchema,
@@ -30,9 +35,10 @@ import {
30
35
  readAuthStore,
31
36
  safeParse,
32
37
  setCorrelationId,
38
+ setLogHook,
33
39
  setLogLevel,
34
40
  writeAuthStore
35
- } from "./chunk-4YWS463E.js";
41
+ } from "./chunk-KAS2PTOX.js";
36
42
  import {
37
43
  clearConfig,
38
44
  getConfig,
@@ -2807,7 +2813,92 @@ var MemoryManager = class {
2807
2813
  });
2808
2814
  return result.count;
2809
2815
  }
2816
+ // ── Compression & Deduplication ──────────────────────────────────
2817
+ /**
2818
+ * Check if memory count exceeds threshold and compress if needed.
2819
+ * Called automatically after task completion.
2820
+ */
2821
+ async compressIfNeeded() {
2822
+ try {
2823
+ const all = await this.list(void 0, 200);
2824
+ if (all.length < MEMORY_COMPRESSION_THRESHOLD) {
2825
+ return 0;
2826
+ }
2827
+ log.info(`Memory compression triggered: ${all.length} memories (threshold: ${MEMORY_COMPRESSION_THRESHOLD})`);
2828
+ let removed = 0;
2829
+ const now = Date.now();
2830
+ for (const m of all) {
2831
+ if (m.expires_at && new Date(m.expires_at).getTime() < now) {
2832
+ await this.remove(m.id);
2833
+ removed++;
2834
+ }
2835
+ }
2836
+ const remaining = all.filter(
2837
+ (m) => !m.expires_at || new Date(m.expires_at).getTime() >= now
2838
+ );
2839
+ const duplicateIds = this.findDuplicates(remaining);
2840
+ for (const id of duplicateIds) {
2841
+ await this.remove(id);
2842
+ removed++;
2843
+ }
2844
+ const afterDedup = remaining.filter((m) => !duplicateIds.has(m.id));
2845
+ if (afterDedup.length > MEMORY_COMPRESSION_TARGET) {
2846
+ const toRemove = afterDedup.sort((a, b) => {
2847
+ if (a.importance !== b.importance) return a.importance - b.importance;
2848
+ if (a.access_count !== b.access_count) return a.access_count - b.access_count;
2849
+ return new Date(a.created_at).getTime() - new Date(b.created_at).getTime();
2850
+ }).slice(0, afterDedup.length - MEMORY_COMPRESSION_TARGET);
2851
+ for (const m of toRemove) {
2852
+ if (m.category === "instruction" && m.importance >= 8) continue;
2853
+ await this.remove(m.id);
2854
+ removed++;
2855
+ }
2856
+ }
2857
+ if (removed > 0) {
2858
+ log.info(`Memory compression complete: removed ${removed} memories`);
2859
+ }
2860
+ return removed;
2861
+ } catch (err) {
2862
+ log.warn(`Memory compression error: ${err instanceof Error ? err.message : err}`);
2863
+ return 0;
2864
+ }
2865
+ }
2866
+ /**
2867
+ * Find duplicate memories based on content similarity.
2868
+ * Returns the IDs of memories that should be removed (keeps the higher-importance duplicate).
2869
+ */
2870
+ findDuplicates(memories) {
2871
+ const toRemove = /* @__PURE__ */ new Set();
2872
+ for (let i = 0; i < memories.length; i++) {
2873
+ if (toRemove.has(memories[i].id)) continue;
2874
+ for (let j = i + 1; j < memories.length; j++) {
2875
+ if (toRemove.has(memories[j].id)) continue;
2876
+ if (memories[i].category !== memories[j].category) continue;
2877
+ const similarity = computeWordOverlap(memories[i].content, memories[j].content);
2878
+ if (similarity >= MEMORY_DEDUP_SIMILARITY_THRESHOLD) {
2879
+ if (memories[i].importance > memories[j].importance || memories[i].importance === memories[j].importance && new Date(memories[i].created_at) > new Date(memories[j].created_at)) {
2880
+ toRemove.add(memories[j].id);
2881
+ } else {
2882
+ toRemove.add(memories[i].id);
2883
+ }
2884
+ }
2885
+ }
2886
+ }
2887
+ return toRemove;
2888
+ }
2810
2889
  };
2890
+ function computeWordOverlap(a, b) {
2891
+ const wordsA = new Set(a.toLowerCase().split(/\s+/).filter(Boolean));
2892
+ const wordsB = new Set(b.toLowerCase().split(/\s+/).filter(Boolean));
2893
+ if (wordsA.size === 0 && wordsB.size === 0) return 1;
2894
+ if (wordsA.size === 0 || wordsB.size === 0) return 0;
2895
+ let intersection = 0;
2896
+ for (const w of wordsA) {
2897
+ if (wordsB.has(w)) intersection++;
2898
+ }
2899
+ const union = wordsA.size + wordsB.size - intersection;
2900
+ return union === 0 ? 0 : intersection / union;
2901
+ }
2811
2902
 
2812
2903
  // src/agent/skills.ts
2813
2904
  import { execSync as execSync2 } from "child_process";
@@ -3423,6 +3514,36 @@ function preprocessDynamicContext(content, cwd) {
3423
3514
  import {
3424
3515
  query
3425
3516
  } from "@anthropic-ai/claude-agent-sdk";
3517
+ var SKILL_DECISION_OUTPUT_FORMAT = {
3518
+ type: "json_schema",
3519
+ schema: {
3520
+ type: "object",
3521
+ properties: {
3522
+ action: { type: "string", enum: ["create", "update", "skip"] },
3523
+ name: { type: "string" },
3524
+ description: { type: "string" },
3525
+ instructions: { type: "string" },
3526
+ emoji: { type: "string" },
3527
+ keywords: { type: "array", items: { type: "string" } },
3528
+ existing_skill_name: { type: "string" },
3529
+ improved_instructions: { type: "string" },
3530
+ improved_description: { type: "string" },
3531
+ reason: { type: "string" }
3532
+ },
3533
+ required: ["action", "reason"]
3534
+ }
3535
+ };
3536
+ var SKILL_VALIDATION_OUTPUT_FORMAT = {
3537
+ type: "json_schema",
3538
+ schema: {
3539
+ type: "object",
3540
+ properties: {
3541
+ valid: { type: "boolean" },
3542
+ improvements: { type: "string" }
3543
+ },
3544
+ required: ["valid"]
3545
+ }
3546
+ };
3426
3547
  var SKILL_EVALUATION_PROMPT = `You just completed a task. Now evaluate whether it should be saved as a reusable Agent Skill.
3427
3548
 
3428
3549
  ## Agent Skills Format (agentskills.io)
@@ -3450,6 +3571,22 @@ Respond with ONLY a JSON object (no markdown, no explanation outside the JSON).
3450
3571
  Always include "reason" explaining your decision.
3451
3572
 
3452
3573
  Use your judgment \u2014 no rigid rules. Consider: Is this repeatable? Can it be generalized? Would it save time next time?`;
3574
+ var SKILL_VALIDATION_PROMPT = `Validate this auto-generated skill before it becomes active.
3575
+
3576
+ Check:
3577
+ 1. Are the instructions clear, complete, and actionable?
3578
+ 2. Do they use generic placeholders (not hardcoded values)?
3579
+ 3. Are error handling steps included?
3580
+ 4. Is the description accurate and searchable?
3581
+ 5. Would this actually work if followed step-by-step?
3582
+
3583
+ Respond with ONLY a JSON object:
3584
+ - {"valid": true, "improvements": null}
3585
+ - {"valid": false, "improvements": "Specific improvements needed"}
3586
+ - {"valid": true, "improvements": "Optional minor improvements"}
3587
+
3588
+ Skill to validate:
3589
+ `;
3453
3590
  async function evaluateAndMaybeCreateSkill(opts) {
3454
3591
  const { sessionId, skillManager, model } = opts;
3455
3592
  if (!sessionId) {
@@ -3465,50 +3602,83 @@ ${existingList}
3465
3602
 
3466
3603
  Respond with a JSON object now.`;
3467
3604
  try {
3468
- let responseText = "";
3605
+ let structuredOutput;
3469
3606
  for await (const message of query({
3470
3607
  prompt,
3471
3608
  options: {
3472
3609
  resume: sessionId,
3473
3610
  model,
3474
3611
  maxTurns: 1,
3475
- allowedTools: []
3612
+ allowedTools: [],
3613
+ effort: "low",
3614
+ outputFormat: SKILL_DECISION_OUTPUT_FORMAT
3476
3615
  }
3477
3616
  })) {
3478
- if (message.type === "assistant") {
3479
- const assistantMsg = message;
3480
- for (const block of assistantMsg.message.content) {
3481
- if (block.type === "text") {
3482
- responseText += block.text;
3483
- }
3484
- }
3485
- } else if (message.type === "result") {
3617
+ if (message.type === "result") {
3486
3618
  const resultMsg = message;
3487
- if (resultMsg.subtype === "success" && "total_cost_usd" in resultMsg) {
3619
+ if (resultMsg.subtype === "success") {
3620
+ const successMsg = resultMsg;
3621
+ structuredOutput = successMsg.structured_output;
3488
3622
  log.debug(
3489
- `Skill evaluation cost: $${resultMsg.total_cost_usd.toFixed(4)}`
3623
+ `Skill evaluation cost: $${successMsg.total_cost_usd.toFixed(4)}`
3490
3624
  );
3491
3625
  }
3492
3626
  }
3493
3627
  }
3494
- const decision = parseJsonResponse(responseText);
3628
+ const decision = structuredOutput ? safeParse(SkillDecisionSchema, structuredOutput) : null;
3495
3629
  if (!decision) {
3496
3630
  log.debug("Skill evaluation: no valid JSON in response");
3497
3631
  return;
3498
3632
  }
3499
- await executeSkillDecision(decision, skillManager);
3633
+ await executeSkillDecision(decision, skillManager, sessionId, model);
3500
3634
  } catch (err) {
3501
3635
  log.debug(`Skill evaluation error: ${errorMessage(err)}`);
3502
3636
  }
3503
3637
  }
3504
- async function executeSkillDecision(decision, skillManager) {
3638
+ async function validateSkill(name, description, instructions, sessionId, model) {
3639
+ try {
3640
+ const skillDoc = `Name: ${name}
3641
+ Description: ${description}
3642
+
3643
+ Instructions:
3644
+ ${instructions}`;
3645
+ let structuredOutput;
3646
+ for await (const message of query({
3647
+ prompt: SKILL_VALIDATION_PROMPT + skillDoc,
3648
+ options: {
3649
+ resume: sessionId,
3650
+ model,
3651
+ maxTurns: SKILL_VALIDATION_MAX_TURNS,
3652
+ allowedTools: [],
3653
+ effort: "low",
3654
+ outputFormat: SKILL_VALIDATION_OUTPUT_FORMAT
3655
+ }
3656
+ })) {
3657
+ if (message.type === "result") {
3658
+ const resultMsg = message;
3659
+ if (resultMsg.subtype === "success") {
3660
+ structuredOutput = resultMsg.structured_output;
3661
+ }
3662
+ }
3663
+ }
3664
+ const parsed = structuredOutput;
3665
+ if (parsed) {
3666
+ return { valid: parsed.valid, improvements: parsed.improvements || void 0 };
3667
+ }
3668
+ return { valid: true };
3669
+ } catch (err) {
3670
+ log.debug(`Skill validation error: ${errorMessage(err)}`);
3671
+ return { valid: true };
3672
+ }
3673
+ }
3674
+ async function executeSkillDecision(decision, skillManager, sessionId, model) {
3505
3675
  switch (decision.action) {
3506
3676
  case "create": {
3507
3677
  if (!decision.name || !decision.instructions) {
3508
3678
  log.debug("Skill create skipped: missing name or instructions");
3509
3679
  return;
3510
3680
  }
3511
- let skillName = normalizeSkillName(decision.name);
3681
+ const skillName = normalizeSkillName(decision.name);
3512
3682
  if (!skillName) {
3513
3683
  log.debug(`Skill create skipped: name "${decision.name}" cannot be normalized`);
3514
3684
  return;
@@ -3526,10 +3696,33 @@ async function executeSkillDecision(decision, skillManager) {
3526
3696
  log.debug(`Skill create skipped: similar skill "${existing.name}" exists`);
3527
3697
  return;
3528
3698
  }
3699
+ let instructions = decision.instructions;
3700
+ if (sessionId) {
3701
+ log.debug(`Validating skill "${skillName}" before activation...`);
3702
+ const validation = await validateSkill(
3703
+ skillName,
3704
+ decision.description || "",
3705
+ instructions,
3706
+ sessionId,
3707
+ model
3708
+ );
3709
+ if (!validation.valid) {
3710
+ log.info(
3711
+ `Skill "${skillName}" failed validation: ${validation.improvements}. Skipping creation.`
3712
+ );
3713
+ return;
3714
+ }
3715
+ if (validation.improvements) {
3716
+ log.debug(`Skill "${skillName}" validated with suggestions: ${validation.improvements}`);
3717
+ instructions += `
3718
+
3719
+ <!-- Validation notes: ${validation.improvements} -->`;
3720
+ }
3721
+ }
3529
3722
  const result = await skillManager.create(
3530
3723
  skillName,
3531
3724
  decision.description || "",
3532
- decision.instructions,
3725
+ instructions,
3533
3726
  {
3534
3727
  source: "auto_extracted",
3535
3728
  emoji: decision.emoji,
@@ -3540,7 +3733,7 @@ async function executeSkillDecision(decision, skillManager) {
3540
3733
  await skillManager.syncToAgentSkills(
3541
3734
  skillName,
3542
3735
  decision.description || "",
3543
- decision.instructions,
3736
+ instructions,
3544
3737
  "1.0.0",
3545
3738
  {
3546
3739
  source: "auto_extracted",
@@ -3549,7 +3742,7 @@ async function executeSkillDecision(decision, skillManager) {
3549
3742
  sourceSkillId: result.id
3550
3743
  }
3551
3744
  );
3552
- log.info(`Auto-created skill "${skillName}": ${decision.reason}`);
3745
+ log.info(`Auto-created skill "${skillName}" (validated): ${decision.reason}`);
3553
3746
  }
3554
3747
  break;
3555
3748
  }
@@ -3558,6 +3751,21 @@ async function executeSkillDecision(decision, skillManager) {
3558
3751
  log.debug("Skill update skipped: missing skill name or instructions");
3559
3752
  return;
3560
3753
  }
3754
+ if (sessionId) {
3755
+ const validation = await validateSkill(
3756
+ decision.existing_skill_name,
3757
+ decision.improved_description || "",
3758
+ decision.improved_instructions,
3759
+ sessionId,
3760
+ model
3761
+ );
3762
+ if (!validation.valid) {
3763
+ log.info(
3764
+ `Skill update for "${decision.existing_skill_name}" failed validation. Skipping.`
3765
+ );
3766
+ return;
3767
+ }
3768
+ }
3561
3769
  const updated = skillManager.update(
3562
3770
  decision.existing_skill_name,
3563
3771
  decision.improved_instructions,
@@ -3575,32 +3783,6 @@ async function executeSkillDecision(decision, skillManager) {
3575
3783
  break;
3576
3784
  }
3577
3785
  }
3578
- function parseJsonResponse(text) {
3579
- const trimmed = text.trim();
3580
- const candidates = [trimmed];
3581
- const start = trimmed.indexOf("{");
3582
- if (start !== -1) {
3583
- let depth = 0;
3584
- for (let i = start; i < trimmed.length; i++) {
3585
- if (trimmed[i] === "{") depth++;
3586
- else if (trimmed[i] === "}") depth--;
3587
- if (depth === 0) {
3588
- candidates.push(trimmed.slice(start, i + 1));
3589
- break;
3590
- }
3591
- }
3592
- }
3593
- for (const candidate of candidates) {
3594
- try {
3595
- const parsed = JSON.parse(candidate);
3596
- const validated = safeParse(SkillDecisionSchema, parsed);
3597
- if (validated) return validated;
3598
- } catch {
3599
- continue;
3600
- }
3601
- }
3602
- return null;
3603
- }
3604
3786
 
3605
3787
  // src/utils/retry.ts
3606
3788
  async function withRetry(fn, opts = {}) {
@@ -5450,7 +5632,7 @@ function stripMcpPrefix(toolName) {
5450
5632
  const match = toolName.match(/^mcp__[^_]+(?:__)?(.+)$/);
5451
5633
  return match ? match[1] : toolName;
5452
5634
  }
5453
- function createEventHooks(taskId, toolCallRecords) {
5635
+ function createEventHooks(taskId, toolCallRecords, toolFailures = []) {
5454
5636
  const preToolUseHook = async (input) => {
5455
5637
  if (input.hook_event_name !== "PreToolUse") return { continue: true };
5456
5638
  const preInput = input;
@@ -5488,9 +5670,30 @@ function createEventHooks(taskId, toolCallRecords) {
5488
5670
  });
5489
5671
  return {};
5490
5672
  };
5673
+ const postToolUseFailureHook = async (input) => {
5674
+ if (input.hook_event_name !== "PostToolUseFailure") return {};
5675
+ const failureInput = input;
5676
+ const rawName = failureInput.tool_name;
5677
+ const displayName = stripMcpPrefix(rawName);
5678
+ const errorStr = failureInput.error;
5679
+ toolFailures.push({
5680
+ toolName: displayName,
5681
+ input: failureInput.tool_input || {},
5682
+ error: errorStr.slice(0, 500),
5683
+ timestamp: Date.now()
5684
+ });
5685
+ await emitEvent(taskId, "tool_failure", {
5686
+ name: displayName,
5687
+ error: errorStr.slice(0, 500),
5688
+ failure_count: toolFailures.filter((f) => f.toolName === displayName).length
5689
+ });
5690
+ log.warn(`Tool failure tracked: ${displayName} (total: ${toolFailures.length})`);
5691
+ return {};
5692
+ };
5491
5693
  return {
5492
5694
  PreToolUse: [{ hooks: [preToolUseHook] }],
5493
- PostToolUse: [{ hooks: [postToolUseHook] }]
5695
+ PostToolUse: [{ hooks: [postToolUseHook] }],
5696
+ PostToolUseFailure: [{ hooks: [postToolUseFailureHook] }]
5494
5697
  };
5495
5698
  }
5496
5699
 
@@ -5602,7 +5805,16 @@ Workflow for form filling (e.g. "\u6CE8\u518C\u4E00\u4E2A Gmail \u8D26\u53F7"):
5602
5805
  4. Check the screenshot \u2014 if validation errors appear, re-snapshot and fix
5603
5806
  5. When a username/email is taken, append a random 4-digit suffix and retry
5604
5807
 
5808
+ 7. FAILURE RECOVERY \u2014 Strategy Switching:
5809
+ If a tool call fails, do NOT repeat the same call. Reflect on why it failed and switch strategy:
5810
+ - CSS selector fails \u2192 use browser_snapshot refs instead
5811
+ - Direct navigation fails \u2192 search for the page first
5812
+ - API/programmatic approach fails \u2192 use browser UI instead
5813
+ - One data source fails \u2192 try an alternative source
5814
+ - If stuck after 2 failed attempts at the same step, try a fundamentally different approach
5815
+
5605
5816
  Guidelines:
5817
+ - SELF-VERIFY before finishing: re-read modified files, take a final screenshot after browser actions, or re-check output to confirm correctness. Never assume success without confirming the end state.
5606
5818
  - Always use the real browser for web tasks, never try to fetch URLs programmatically
5607
5819
  - ALWAYS use browser_snapshot as your primary way to understand a page \u2014 the ref table gives actionable refs, the screenshot gives visual context
5608
5820
  - Use browser_act to batch multiple actions \u2014 fill an entire form in one call instead of individual clicks/types
@@ -5667,12 +5879,18 @@ var TaskProcessor = class {
5667
5879
  memoryManager = null;
5668
5880
  skillManager;
5669
5881
  sessionId = null;
5882
+ userId = null;
5670
5883
  /** In-memory conversation history, keyed by conversation_id */
5671
5884
  historyCache = /* @__PURE__ */ new Map();
5672
5885
  constructor() {
5673
5886
  this.skillManager = new SkillManager();
5674
5887
  }
5888
+ /** @deprecated Use setUserId() instead */
5675
5889
  init(userId) {
5890
+ this.setUserId(userId);
5891
+ }
5892
+ setUserId(userId) {
5893
+ this.userId = userId;
5676
5894
  this.memoryManager = new MemoryManager();
5677
5895
  this.skillManager.setUserId(userId);
5678
5896
  this.skillManager.loadFromDb().catch((err) => {
@@ -5702,6 +5920,7 @@ var TaskProcessor = class {
5702
5920
  log.info(`Processing task ${task.id.slice(0, 8)}...`);
5703
5921
  let finalResponse = "";
5704
5922
  const toolCallRecords = [];
5923
+ const toolFailures = [];
5705
5924
  let tokenUsage;
5706
5925
  let agentSessionId;
5707
5926
  try {
@@ -5754,18 +5973,15 @@ var TaskProcessor = class {
5754
5973
  onUserWaitStart: () => taskTimeout.pause(),
5755
5974
  onUserWaitEnd: () => taskTimeout.resume()
5756
5975
  });
5757
- const eventHooks = createEventHooks(task.id, toolCallRecords);
5976
+ const eventHooks = createEventHooks(task.id, toolCallRecords, toolFailures);
5758
5977
  const allowedTools = [
5759
- // SDK built-in tools
5760
5978
  "Read",
5761
5979
  "Write",
5762
5980
  "Edit",
5763
5981
  "Bash",
5764
5982
  "Glob",
5765
5983
  "Grep",
5766
- // Browser MCP tools
5767
5984
  ...BROWSER_TOOL_NAMES.map((n) => `mcp__assistme-browser__${n}`),
5768
- // Agent MCP tools (memory, skills)
5769
5985
  "mcp__assistme-agent__memory_store",
5770
5986
  "mcp__assistme-agent__skill_create",
5771
5987
  "mcp__assistme-agent__skill_improve",
@@ -5776,29 +5992,19 @@ var TaskProcessor = class {
5776
5992
  "mcp__assistme-agent__skill_browse",
5777
5993
  "mcp__assistme-agent__skill_add",
5778
5994
  "mcp__assistme-agent__skill_publish",
5779
- // User interaction
5780
5995
  "mcp__assistme-agent__ask_user",
5781
- // Job automation tools
5782
5996
  "mcp__assistme-agent__job_run",
5783
5997
  "mcp__assistme-agent__job_schedule",
5784
5998
  "mcp__assistme-agent__job_status",
5785
- // Credential tools (local storage)
5786
5999
  "mcp__assistme-agent__credential_get",
5787
6000
  "mcp__assistme-agent__credential_set",
5788
6001
  "mcp__assistme-agent__credential_list",
5789
6002
  "mcp__assistme-agent__credential_remove"
5790
6003
  ];
5791
- async function* promptMessages() {
5792
- yield {
5793
- type: "user",
5794
- message: {
5795
- role: "user",
5796
- content: task.prompt
5797
- },
5798
- parent_tool_use_id: null,
5799
- session_id: ""
5800
- };
5801
- }
6004
+ const mcpServers = {
6005
+ "assistme-browser": browserServer,
6006
+ "assistme-agent": agentToolsServer
6007
+ };
5802
6008
  const options = {
5803
6009
  model: config.model,
5804
6010
  systemPrompt,
@@ -5807,19 +6013,16 @@ var TaskProcessor = class {
5807
6013
  allowedTools,
5808
6014
  permissionMode: "bypassPermissions",
5809
6015
  allowDangerouslySkipPermissions: true,
5810
- mcpServers: {
5811
- "assistme-browser": browserServer,
5812
- "assistme-agent": agentToolsServer
5813
- },
6016
+ mcpServers,
5814
6017
  hooks: eventHooks,
5815
6018
  persistSession: true,
5816
- abortController
6019
+ abortController,
6020
+ thinking: { type: "adaptive" },
6021
+ effort: "high",
6022
+ maxBudgetUsd: MAX_BUDGET_USD
5817
6023
  };
5818
6024
  try {
5819
- for await (const message of query2({
5820
- prompt: promptMessages(),
5821
- options
5822
- })) {
6025
+ for await (const message of query2({ prompt: task.prompt, options })) {
5823
6026
  switch (message.type) {
5824
6027
  case "assistant": {
5825
6028
  const assistantMsg = message;
@@ -5827,16 +6030,11 @@ var TaskProcessor = class {
5827
6030
  if (block.type === "text") {
5828
6031
  finalResponse += block.text;
5829
6032
  log.agent(block.text);
5830
- await emitEvent(task.id, "text_delta", {
5831
- text: block.text
5832
- });
6033
+ await emitEvent(task.id, "text_delta", { text: block.text });
5833
6034
  } else if (block.type === "thinking" && "thinking" in block) {
5834
6035
  const thinkingBlock = block;
5835
- const thinkingText = thinkingBlock.thinking;
5836
- log.debug(`Thinking: ${thinkingText.slice(0, 100)}...`);
5837
- await emitEvent(task.id, "thinking", {
5838
- text: thinkingText
5839
- });
6036
+ log.debug(`Thinking: ${thinkingBlock.thinking.slice(0, 100)}...`);
6037
+ await emitEvent(task.id, "thinking", { text: thinkingBlock.thinking });
5840
6038
  }
5841
6039
  }
5842
6040
  break;
@@ -5852,13 +6050,14 @@ var TaskProcessor = class {
5852
6050
  if (!finalResponse && successMsg.result) {
5853
6051
  finalResponse = successMsg.result;
5854
6052
  }
6053
+ agentSessionId = successMsg.session_id;
5855
6054
  log.info(
5856
6055
  `Task cost: $${successMsg.total_cost_usd.toFixed(4)}, turns: ${successMsg.num_turns}`
5857
6056
  );
5858
6057
  } else {
5859
- const errorMsg = resultMsg;
5860
- log.warn(`SDK result: ${errorMsg.subtype}`);
5861
- for (const err of errorMsg.errors) {
6058
+ const errMsg = resultMsg;
6059
+ log.warn(`SDK result: ${errMsg.subtype}`);
6060
+ for (const err of errMsg.errors) {
5862
6061
  await emitEvent(task.id, "error", { message: err });
5863
6062
  }
5864
6063
  }
@@ -5892,16 +6091,21 @@ var TaskProcessor = class {
5892
6091
  convHistory.splice(0, convHistory.length - MAX_HISTORY_ENTRIES * 2);
5893
6092
  }
5894
6093
  this.historyCache.set(task.conversation_id, convHistory);
6094
+ if (this.memoryManager) {
6095
+ this.memoryManager.compressIfNeeded().catch(
6096
+ (err) => log.debug(`Memory compression skipped: ${err}`)
6097
+ );
6098
+ }
5895
6099
  if (agentSessionId) {
5896
6100
  this.evaluateSkillPostTask(agentSessionId, config.model).catch(
5897
6101
  (err) => log.debug(`Post-task skill evaluation skipped: ${err}`)
5898
6102
  );
5899
6103
  }
5900
6104
  } catch (err) {
5901
- const errorMsg = errorMessage(err);
5902
- log.error(`Task failed: ${errorMsg}`);
5903
- await failTask(task.id, errorMsg);
5904
- await emitEvent(task.id, "error", { message: errorMsg });
6105
+ const errMsg = errorMessage(err);
6106
+ log.error(`Task failed: ${errMsg}`);
6107
+ await failTask(task.id, errMsg);
6108
+ await emitEvent(task.id, "error", { message: errMsg });
5905
6109
  await emitEvent(task.id, "status_change", { status: "failed" });
5906
6110
  } finally {
5907
6111
  setCorrelationId(null);
@@ -5916,6 +6120,57 @@ var TaskProcessor = class {
5916
6120
  }
5917
6121
  };
5918
6122
 
6123
+ // src/db/session-log.ts
6124
+ var FLUSH_INTERVAL_MS = 3e3;
6125
+ var MAX_BATCH_SIZE = 100;
6126
+ var SessionLogEmitter = class {
6127
+ constructor(sessionId) {
6128
+ this.sessionId = sessionId;
6129
+ this.flushTimer = setInterval(() => this.flush(), FLUSH_INTERVAL_MS);
6130
+ }
6131
+ sequence = 0;
6132
+ buffer = [];
6133
+ flushTimer = null;
6134
+ flushing = false;
6135
+ /** Queue a log entry for batch insertion */
6136
+ push(logType, message) {
6137
+ this.sequence++;
6138
+ this.buffer.push({ log_type: logType, message, seq: this.sequence });
6139
+ if (this.buffer.length >= MAX_BATCH_SIZE) {
6140
+ this.flush();
6141
+ }
6142
+ }
6143
+ /** Flush buffered logs to Supabase */
6144
+ async flush() {
6145
+ if (this.flushing || this.buffer.length === 0) return;
6146
+ const batch = this.buffer.splice(0);
6147
+ this.flushing = true;
6148
+ try {
6149
+ await callMcpHandler("log.emit_batch", {
6150
+ session_id: this.sessionId,
6151
+ logs: batch
6152
+ });
6153
+ } catch (err) {
6154
+ log.debug(
6155
+ `Failed to flush session logs: ${err instanceof Error ? err.message : err}`
6156
+ );
6157
+ if (this.buffer.length < MAX_BATCH_SIZE * 5) {
6158
+ this.buffer.unshift(...batch);
6159
+ }
6160
+ } finally {
6161
+ this.flushing = false;
6162
+ }
6163
+ }
6164
+ /** Stop the emitter and flush remaining logs */
6165
+ async stop() {
6166
+ if (this.flushTimer) {
6167
+ clearInterval(this.flushTimer);
6168
+ this.flushTimer = null;
6169
+ }
6170
+ await this.flush();
6171
+ }
6172
+ };
6173
+
5919
6174
  // src/commands/start.ts
5920
6175
  function registerStartCommand(program2) {
5921
6176
  program2.command("start", { isDefault: true, hidden: true }).description("Start the agent (default command)").option("-w, --workspace <path>", "Workspace path (default: current directory)").option("-n, --name <name>", "Session name").option("-v, --verbose", "Enable verbose/debug logging").action(runAgent);
@@ -5979,10 +6234,16 @@ async function runAgent(opts) {
5979
6234
  const processor = new TaskProcessor();
5980
6235
  processor.init(userId);
5981
6236
  const sessionManager = new SessionManager();
6237
+ let logEmitter = null;
5982
6238
  const browserRef = getBrowser();
5983
6239
  const shutdown = async () => {
5984
6240
  console.log();
5985
6241
  log.info("Shutting down...");
6242
+ setLogHook(null);
6243
+ try {
6244
+ if (logEmitter) await logEmitter.stop();
6245
+ } catch {
6246
+ }
5986
6247
  try {
5987
6248
  if (browserRef.isConnected()) await browserRef.disconnect();
5988
6249
  } catch {
@@ -5997,6 +6258,10 @@ async function runAgent(opts) {
5997
6258
  await processor.processTask(task);
5998
6259
  });
5999
6260
  processor.setSessionId(session.id);
6261
+ logEmitter = new SessionLogEmitter(session.id);
6262
+ setLogHook((logType, message) => {
6263
+ logEmitter?.push(logType, message);
6264
+ });
6000
6265
  log.info("Listening for tasks (chat + jobs) from web UI...");
6001
6266
  log.info("Press Ctrl+C to stop.\n");
6002
6267
  const rl = createInterface2({
@@ -6402,7 +6667,7 @@ function registerJobCommands(program2) {
6402
6667
  jobCmd.command("list").description("List your defined jobs").action(async () => {
6403
6668
  try {
6404
6669
  const userId = await getCurrentUserId();
6405
- const { JobRunner: JobRunner2 } = await import("./job-runner-JT3JWZBV.js");
6670
+ const { JobRunner: JobRunner2 } = await import("./job-runner-AT3V6LAQ.js");
6406
6671
  const runner = new JobRunner2();
6407
6672
  const jobs = await runner.listJobs();
6408
6673
  if (jobs.length === 0) {
@@ -6426,7 +6691,7 @@ function registerJobCommands(program2) {
6426
6691
  jobCmd.command("status [name]").description("Show run history for a job (or all jobs)").option("-l, --limit <number>", "Max runs to show (default: 5)").action(async (name, opts) => {
6427
6692
  try {
6428
6693
  const userId = await getCurrentUserId();
6429
- const { JobRunner: JobRunner2 } = await import("./job-runner-JT3JWZBV.js");
6694
+ const { JobRunner: JobRunner2 } = await import("./job-runner-AT3V6LAQ.js");
6430
6695
  const runner = new JobRunner2();
6431
6696
  const runs = await runner.getRunHistory(name, parseInt(opts.limit || "5"));
6432
6697
  if (runs.length === 0) {
@@ -6465,7 +6730,7 @@ Job Run History${name ? ` \u2014 ${name}` : ""}:`));
6465
6730
  process.exit(1);
6466
6731
  }
6467
6732
  const userId = await getCurrentUserId();
6468
- const { JobRunner: JobRunner2 } = await import("./job-runner-JT3JWZBV.js");
6733
+ const { JobRunner: JobRunner2 } = await import("./job-runner-AT3V6LAQ.js");
6469
6734
  const runner = new JobRunner2();
6470
6735
  const job = await runner.loadJob(name);
6471
6736
  if (!job) {