askshepherd 0.1.39 → 0.1.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,8 @@
1
1
  #!/usr/bin/env node
2
2
  import { execFile, execFileSync, spawn } from "node:child_process";
3
3
  import { createHash } from "node:crypto";
4
- import { constants as fsConstants, existsSync, mkdirSync, readFileSync, unlinkSync, watch, writeFileSync } from "node:fs";
5
- import { access, mkdir, readdir, readFile, stat, writeFile } from "node:fs/promises";
4
+ import { constants as fsConstants, existsSync, mkdirSync, readdirSync, readFileSync, renameSync, unlinkSync, watch, writeFileSync } from "node:fs";
5
+ import { access, chmod, mkdir, readdir, readFile, stat, writeFile } from "node:fs/promises";
6
6
  import { createServer } from "node:http";
7
7
  import { homedir, platform } from "node:os";
8
8
  import { basename, dirname, join } from "node:path";
@@ -12,7 +12,7 @@ import { fileURLToPath } from "node:url";
12
12
  const DEFAULT_API_URL = "https://brain-api-customer-facing.up.railway.app";
13
13
  const PACKAGE_NAME = "askshepherd";
14
14
  const PACKAGE_SPEC = `${PACKAGE_NAME}@latest`;
15
- const PACKAGE_VERSION = "0.1.37";
15
+ const PACKAGE_VERSION = "0.1.40";
16
16
  const MCP_SERVER_NAME = "shepherd";
17
17
  const PACKAGE_DIR = dirname(dirname(fileURLToPath(import.meta.url)));
18
18
  const DEFAULT_AGENT_STATE_PATH = join(homedir(), ".shepherd", "raw-onboarding-agent.json");
@@ -24,7 +24,7 @@ const MAX_QUEUE_MESSAGES = 10_000;
24
24
  const DEFAULT_MESSAGE_CHAT_SEARCH_LIMIT = 200;
25
25
  const INITIAL_MESSAGE_CHAT_ROWS = 20;
26
26
  const ALL_MESSAGES_CHATS = "__shepherd_all_messages_chats__";
27
- const AGENT_MODALITY_ORDER = ["google", "slack", "granola", "messages", "codingSessions"];
27
+ const AGENT_MODALITY_ORDER = ["google", "slack", "github", "granola", "messages", "codingSessions"];
28
28
  const SHEPHERD_LOGO_PATH = join(PACKAGE_DIR, "assets", "shepherd_G_vector_136033.png");
29
29
  const GRANOLA_API_KEYS_PATH = "/settings/integrations/api-keys";
30
30
  const GOOGLE_WORKSPACE_DELEGATION_ADMIN_URL = "https://admin.google.com/ac/owl/domainwidedelegation";
@@ -38,8 +38,22 @@ const CLAUDE_PROJECTS_DIR = join(homedir(), ".claude", "projects");
38
38
  const CONTACTS_WAL_PATH = join(homedir(), "Library", "Application Support", "AddressBook", "AddressBook-v22.abcddb-wal");
39
39
  const CONTACT_SYNC_DEBOUNCE_MS = 5_000;
40
40
  const CONTACT_SYNC_FALLBACK_MS = 30 * 60_000;
41
+ const MESSAGES_EDIT_SCAN_INTERVAL_MS = positiveIntFromEnv("SHEPHERD_MESSAGES_EDIT_SCAN_INTERVAL_MS", 60_000);
42
+ const MESSAGES_EDIT_SCAN_WINDOW_MS = positiveIntFromEnv("SHEPHERD_MESSAGES_EDIT_SCAN_WINDOW_MS", 30 * 60_000);
43
+ const MESSAGES_EDIT_SCAN_LIMIT = positiveIntFromEnv("SHEPHERD_MESSAGES_EDIT_SCAN_LIMIT", 500);
44
+ const MESSAGES_MUTATION_RECONCILE_INTERVAL_MS = positiveIntFromEnv("SHEPHERD_MESSAGES_MUTATION_RECONCILE_INTERVAL_MS", 15 * 60_000);
45
+ const MESSAGES_STATE_CACHE_MAX = positiveIntFromEnv("SHEPHERD_MESSAGES_STATE_CACHE_MAX", 5_000);
46
+ const LEGACY_SHEPHERD_OWNED_MESSAGE_HANDLES = [
47
+ "+13054098546",
48
+ "+12054012556",
49
+ ];
41
50
  const SHEPHERD_OWNED_MESSAGE_HANDLES = parseMessageHandleList(
42
- process.env.SHEPHERD_OWNED_MESSAGE_HANDLES ?? process.env.SENDBLUE_NUMBER ?? "",
51
+ [
52
+ ...LEGACY_SHEPHERD_OWNED_MESSAGE_HANDLES,
53
+ process.env.SENDBLUE_NUMBER,
54
+ process.env.SHEPHERD_OWNED_MESSAGE_HANDLES,
55
+ process.env.SHEPHERD_OWNED_IMESSAGE_HANDLES,
56
+ ].filter(Boolean).join(","),
43
57
  );
44
58
  const GOOGLE_WORKSPACE_DELEGATION_APP_NAME = "Shepherd";
45
59
  const GOOGLE_WORKSPACE_DELEGATION_SERVICE_ACCOUNT_EMAIL =
@@ -133,6 +147,12 @@ async function dispatch() {
133
147
  await runMessagesChatsCommand();
134
148
  } else if (command === "messages-agent") {
135
149
  await runMessagesAgent();
150
+ } else if (command === "write-agent-state") {
151
+ await runWriteAgentState();
152
+ } else if (command === "write-messages-config") {
153
+ await runWriteMessagesConfig();
154
+ } else if (command === "install-messages-agent") {
155
+ await runInstallMessagesAgent();
136
156
  } else if (command === "coding-sessions-agent") {
137
157
  await runCodingSessionsAgent();
138
158
  } else if (command === "coding-sessions-status") {
@@ -348,15 +368,16 @@ async function runAgentOnboarding() {
348
368
  authSessionToken: workosAuth.authSessionToken,
349
369
  sources,
350
370
  });
371
+ const sessionSources = sourceSelectionFromSession(session, sources);
351
372
 
352
373
  const statePath = await writeAgentState({
353
374
  apiUrl,
354
375
  sessionId: session.sessionId,
355
376
  sessionToken: session.sessionToken,
356
377
  account: session.account,
357
- sources,
378
+ sources: sessionSources,
358
379
  authUrls: session.authUrls ?? {},
359
- googleWorkspaceDelegation: sources.google
380
+ googleWorkspaceDelegation: sessionSources.google && session.googleWorkspaceDelegation
360
381
  ? googleWorkspaceDelegationSetup(session.googleWorkspaceDelegation)
361
382
  : undefined,
362
383
  workosAuth,
@@ -374,7 +395,8 @@ async function runAgentOnboarding() {
374
395
  status: "auth_required",
375
396
  account: publicAgentAccount(session.account),
376
397
  opened: currentAction?.opened ? [currentAction.source] : [],
377
- googleWorkspaceDelegation: sources.google ? googleWorkspaceDelegationSetup(session.googleWorkspaceDelegation) : undefined,
398
+ sources: sessionSources,
399
+ googleWorkspaceDelegation: sessionSources.google && session.googleWorkspaceDelegation ? googleWorkspaceDelegationSetup(session.googleWorkspaceDelegation) : undefined,
378
400
  currentAction,
379
401
  statePath,
380
402
  messagesChatsCommand: sources.messages ? `${agentCommand()} messages-chats` : undefined,
@@ -485,7 +507,7 @@ async function runMcpLogin() {
485
507
  const statePath = await writeMcpStateFromLogin(login);
486
508
  const installTargets = await selectMcpInstallTargets();
487
509
  const installResults = installTargets.length > 0
488
- ? await installMcpClients({ statePath, targets: installTargets })
510
+ ? await installMcpClients({ statePath, targets: installTargets, proxyProgram: parseJsonArrayArg("mcp-program") })
489
511
  : [];
490
512
 
491
513
  if (args.json) {
@@ -674,7 +696,7 @@ async function runMcpInstall() {
674
696
  const ensured = await ensureMcpState({ allowBrowser: process.stdin.isTTY, quiet: args.json === true });
675
697
  const targets = await selectMcpInstallTargets({ defaultTargets: MCP_INSTALL_TARGETS });
676
698
  const installResults = targets.length > 0
677
- ? await installMcpClients({ statePath: ensured.statePath, targets })
699
+ ? await installMcpClients({ statePath: ensured.statePath, targets, proxyProgram: parseJsonArrayArg("mcp-program") })
678
700
  : [];
679
701
 
680
702
  if (args.json) {
@@ -923,9 +945,18 @@ async function writeMcpState(state) {
923
945
  const path = mcpStatePath();
924
946
  await mkdir(dirname(path), { recursive: true });
925
947
  await writeFile(path, JSON.stringify(state, null, 2), { mode: 0o600 });
948
+ await chmod(path, 0o600);
926
949
  return path;
927
950
  }
928
951
 
952
+ function sanitizeUserFileId(userId) {
953
+ const safeId = String(userId ?? "").replace(/[^a-zA-Z0-9._-]/g, "-");
954
+ if (!safeId || safeId === "." || safeId === ".." || /^\.+$/.test(safeId)) {
955
+ throw new Error("Onboarding returned an invalid user ID for local Messages config.");
956
+ }
957
+ return safeId;
958
+ }
959
+
929
960
  function mcpStatePath() {
930
961
  return expandHomePath(stringArg("state") ?? DEFAULT_MCP_STATE_PATH);
931
962
  }
@@ -967,16 +998,17 @@ function parseMcpInstallTargets(value) {
967
998
  return [...new Set(targets)];
968
999
  }
969
1000
 
970
- async function installMcpClients({ statePath, targets }) {
1001
+ async function installMcpClients({ statePath, targets, proxyProgram }) {
971
1002
  const results = [];
1003
+ const proxy = mcpProxyCommand(statePath, proxyProgram);
972
1004
  for (const target of targets) {
973
1005
  try {
974
1006
  if (target === "codex") {
975
- await installCodexMcp(statePath);
1007
+ await installCodexMcp(proxy);
976
1008
  } else if (target === "claude") {
977
- await installClaudeMcp(statePath);
1009
+ await installClaudeMcp(proxy);
978
1010
  } else if (target === "cursor") {
979
- await installCursorMcp(statePath);
1011
+ await installCursorMcp(proxy);
980
1012
  }
981
1013
  results.push({ target, status: "installed" });
982
1014
  } catch (err) {
@@ -986,25 +1018,25 @@ async function installMcpClients({ statePath, targets }) {
986
1018
  return results;
987
1019
  }
988
1020
 
989
- async function installCodexMcp(statePath) {
1021
+ async function installCodexMcp(proxy) {
990
1022
  await execFileQuiet("codex", ["mcp", "remove", MCP_SERVER_NAME], { ignoreError: true });
991
- await execFileQuiet("codex", ["mcp", "add", MCP_SERVER_NAME, "--", "npx", ...mcpProxyArgs(statePath)]);
1023
+ await execFileQuiet("codex", ["mcp", "add", MCP_SERVER_NAME, "--", proxy.command, ...proxy.args]);
992
1024
  }
993
1025
 
994
- async function installClaudeMcp(statePath) {
1026
+ async function installClaudeMcp(proxy) {
995
1027
  await execFileQuiet("claude", ["mcp", "remove", MCP_SERVER_NAME], { ignoreError: true });
996
- await execFileQuiet("claude", ["mcp", "add", "--scope", "user", MCP_SERVER_NAME, "--", "npx", ...mcpProxyArgs(statePath)]);
1028
+ await execFileQuiet("claude", ["mcp", "add", "--scope", "user", MCP_SERVER_NAME, "--", proxy.command, ...proxy.args]);
997
1029
  }
998
1030
 
999
- async function installCursorMcp(statePath) {
1031
+ async function installCursorMcp(proxy) {
1000
1032
  const path = join(homedir(), ".cursor", "mcp.json");
1001
1033
  const config = await readJsonObject(path);
1002
1034
  const mcpServers = config.mcpServers && typeof config.mcpServers === "object" && !Array.isArray(config.mcpServers)
1003
1035
  ? config.mcpServers
1004
1036
  : {};
1005
1037
  mcpServers[MCP_SERVER_NAME] = {
1006
- command: "npx",
1007
- args: mcpProxyArgs(statePath),
1038
+ command: proxy.command,
1039
+ args: proxy.args,
1008
1040
  };
1009
1041
  config.mcpServers = mcpServers;
1010
1042
  await mkdir(dirname(path), { recursive: true });
@@ -1012,8 +1044,18 @@ async function installCursorMcp(statePath) {
1012
1044
  await execFileQuiet("cursor-agent", ["mcp", "enable", MCP_SERVER_NAME], { ignoreError: true });
1013
1045
  }
1014
1046
 
1047
+ function mcpProxyCommand(statePath, proxyProgram) {
1048
+ const prefix = Array.isArray(proxyProgram) && proxyProgram.length > 0
1049
+ ? proxyProgram
1050
+ : ["npx", "-y", PACKAGE_SPEC];
1051
+ return {
1052
+ command: prefix[0],
1053
+ args: [...prefix.slice(1), "mcp", "--state", statePath],
1054
+ };
1055
+ }
1056
+
1015
1057
  function mcpProxyArgs(statePath) {
1016
- return ["-y", PACKAGE_SPEC, "mcp", "--state", statePath];
1058
+ return mcpProxyCommand(statePath).args;
1017
1059
  }
1018
1060
 
1019
1061
  async function readJsonObject(path) {
@@ -1177,7 +1219,7 @@ async function runStatusCommand() {
1177
1219
 
1178
1220
  async function collectShepherdStatus() {
1179
1221
  const statePath = agentStatePath();
1180
- const state = await readOptionalAgentState();
1222
+ let state = await readOptionalAgentState();
1181
1223
  let production = null;
1182
1224
  let productionError = null;
1183
1225
 
@@ -1187,7 +1229,7 @@ async function collectShepherdStatus() {
1187
1229
  `${trimTrailingSlash(state.apiUrl)}/onboarding/raw/session/${encodeURIComponent(state.sessionId)}/status`,
1188
1230
  { token: state.sessionToken },
1189
1231
  );
1190
- await updateAgentStateFromOnboardingResponse(state, production);
1232
+ state = await updateAgentStateFromOnboardingResponse(state, production);
1191
1233
  } catch (err) {
1192
1234
  productionError = safeError(err);
1193
1235
  }
@@ -1281,8 +1323,11 @@ function statusSourceRows(providers, savedSources = {}) {
1281
1323
  const definitions = [
1282
1324
  ["google", "Google Workspace", "google"],
1283
1325
  ["slack", "Slack", "slack"],
1326
+ ["github", "GitHub", "github"],
1284
1327
  ["granola", "Granola", "granola"],
1285
1328
  ["messages", "Messages", "messages"],
1329
+ ["discord", "Discord", "discord"],
1330
+ ["instagram", "Instagram", "instagram"],
1286
1331
  ["codingSessions", "Coding Sessions", "codingSessions"],
1287
1332
  ];
1288
1333
  return definitions.map(([key, label, sourceKey]) => ({
@@ -1312,6 +1357,9 @@ function renderLocalMessagesStatus(status) {
1312
1357
  lines.push(" LaunchAgent: not installed or unavailable");
1313
1358
  }
1314
1359
  lines.push(` Messages database: ${status.storage.readable ? "readable" : `not readable (${status.storage.reason})`}`);
1360
+ if (status.contacts) {
1361
+ lines.push(` Contacts: ${status.contacts.ok ? "resolved from AddressBook DB" : `degraded (${status.contacts.error ?? "unsupported local Contacts database schema"}; contact founders@askshepherd.ai)`}`);
1362
+ }
1315
1363
  lines.push(` Queued unsent messages: ${status.queueDepth}`);
1316
1364
  return lines;
1317
1365
  }
@@ -1345,6 +1393,96 @@ function renderLocalCodingSessionsStatus(status) {
1345
1393
  return lines;
1346
1394
  }
1347
1395
 
1396
+ async function runWriteAgentState() {
1397
+ const input = await readJsonInput();
1398
+ if (!input || typeof input !== "object" || Array.isArray(input)) {
1399
+ throw new Error("write-agent-state expects a JSON object on stdin.");
1400
+ }
1401
+ const previous = await readOptionalAgentState();
1402
+ const statePath = await writeAgentState({ ...(previous ?? {}), ...input });
1403
+ console.log(JSON.stringify({ statePath }, null, 2));
1404
+ }
1405
+
1406
+ async function runWriteMessagesConfig() {
1407
+ const input = await readJsonInput();
1408
+ if (!input || typeof input !== "object" || Array.isArray(input)) {
1409
+ throw new Error("write-messages-config expects a JSON object on stdin.");
1410
+ }
1411
+ const configPath = await writeMessagesConfig({
1412
+ apiUrl: trimTrailingSlash(requiredConfigString(input.apiUrl, "apiUrl")),
1413
+ userId: requiredConfigString(input.userId, "userId"),
1414
+ agentToken: requiredConfigString(input.agentToken, "agentToken"),
1415
+ backfillDays: parseBackfillDays(input.backfillDays, null),
1416
+ allowedChatIds: input.allowedChatIds,
1417
+ selectedChats: Array.isArray(input.selectedChats) ? input.selectedChats : [],
1418
+ });
1419
+ console.log(JSON.stringify({ configPath }, null, 2));
1420
+ }
1421
+
1422
+ async function runInstallMessagesAgent() {
1423
+ const configPath = stringArg("config");
1424
+ if (!configPath) throw new Error("install-messages-agent requires --config <path>");
1425
+ let config;
1426
+ try {
1427
+ config = JSON.parse(await readFile(configPath, "utf8"));
1428
+ } catch (err) {
1429
+ if (err && typeof err === "object" && "code" in err) throw err;
1430
+ throw new Error(`install-messages-agent: config file at ${configPath} does not contain valid JSON.`);
1431
+ }
1432
+ const userId = stringArg("user-id") ?? requiredConfigString(config.userId, "userId");
1433
+ const overrides = {
1434
+ programArguments: parseJsonArrayArg("program"),
1435
+ environment: parseJsonObjectArg("env"),
1436
+ };
1437
+
1438
+ if (args["dry-run"]) {
1439
+ console.log(JSON.stringify(buildMessagesAgentInstall(configPath, userId, overrides), null, 2));
1440
+ return;
1441
+ }
1442
+
1443
+ const install = await installMessagesAgent(configPath, userId, overrides);
1444
+ console.log(JSON.stringify(install, null, 2));
1445
+ }
1446
+
1447
+ async function readJsonInput() {
1448
+ const chunks = [];
1449
+ for await (const chunk of process.stdin) chunks.push(chunk);
1450
+ const text = Buffer.concat(chunks).toString("utf8").trim();
1451
+ if (!text) throw new Error("Expected JSON input on stdin.");
1452
+ return JSON.parse(text);
1453
+ }
1454
+
1455
+ function parseJsonArrayArg(name) {
1456
+ const raw = stringArg(name);
1457
+ if (!raw) return undefined;
1458
+ let parsed;
1459
+ try {
1460
+ parsed = JSON.parse(raw);
1461
+ } catch {
1462
+ throw new Error(`--${name} must be a JSON array of strings.`);
1463
+ }
1464
+ if (!Array.isArray(parsed) || parsed.some((value) => typeof value !== "string")) {
1465
+ throw new Error(`--${name} must be a JSON array of strings.`);
1466
+ }
1467
+ return parsed;
1468
+ }
1469
+
1470
+ function parseJsonObjectArg(name) {
1471
+ const raw = stringArg(name);
1472
+ if (!raw) return undefined;
1473
+ let parsed;
1474
+ try {
1475
+ parsed = JSON.parse(raw);
1476
+ } catch {
1477
+ throw new Error(`--${name} must be a JSON object of string values.`);
1478
+ }
1479
+ if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)
1480
+ || Object.values(parsed).some((value) => typeof value !== "string")) {
1481
+ throw new Error(`--${name} must be a JSON object of string values.`);
1482
+ }
1483
+ return parsed;
1484
+ }
1485
+
1348
1486
  async function runMessagesChatsCommand() {
1349
1487
  await ensureMessagesReadPermission({ noOpen: Boolean(args["no-open"]) });
1350
1488
  const chats = await listRecentMessageChats({
@@ -1387,7 +1525,9 @@ async function runMessagesAgent() {
1387
1525
  const userId = requiredConfigString(config.userId, "userId");
1388
1526
  const agentToken = requiredConfigString(config.agentToken, "agentToken");
1389
1527
  mergeShepherdOwnedMessageHandles(config.excludedMessageHandles);
1390
- const backfillDays = parseBackfillDays(args["backfill-days"] ?? process.env.SHEPHERD_BACKFILL_DAYS ?? config.backfillDays, null);
1528
+ const backfillOverride = args["backfill-days"] ?? process.env.SHEPHERD_BACKFILL_DAYS;
1529
+ const backfillExplicit = backfillOverride !== undefined && backfillOverride !== null && backfillOverride !== "";
1530
+ const backfillDays = parseBackfillDays(backfillExplicit ? backfillOverride : config.backfillDays, null);
1391
1531
  const allowedChatIds = parseAllowedChatIds(config.allowedChatIds);
1392
1532
  const allChats = config.allChats === true || selectedChatIdsIncludeAll(allowedChatIds);
1393
1533
  if (!allChats && allowedChatIds.length === 0) {
@@ -1397,12 +1537,15 @@ async function runMessagesAgent() {
1397
1537
  const kit = await import("@photon-ai/imessage-kit");
1398
1538
  const sdk = new kit.IMessageSDK({ debug: args.debug === true });
1399
1539
  const sender = new MessagesBatchSender(apiUrl, agentToken, userId);
1400
- const contactLookup = createMutableContactLookup(buildContactLookup());
1540
+ const contactLookup = createMutableContactLookup(buildContactLookup({ userId }));
1401
1541
  const serializer = createMessageSerializer(kit, contactLookup);
1542
+ const stateCache = createMessageStateCache(userId);
1402
1543
  const contactSync = startMessagesContactSync(sender, contactLookup, {
1544
+ userId,
1403
1545
  syncAllContacts: allChats,
1404
1546
  seedHandles: allChats ? [] : selectedChatContactSeedHandles(config.selectedChats, allowedChatIds),
1405
1547
  });
1548
+ let editDetector = null;
1406
1549
 
1407
1550
  console.log("Shepherd Messages raw sync starting");
1408
1551
  console.log(allChats
@@ -1410,25 +1553,52 @@ async function runMessagesAgent() {
1410
1553
  : `Messages chat filter: ${allowedChatIds.length} selected chat(s)`);
1411
1554
 
1412
1555
  try {
1556
+ await validateMessagesDatabaseAccess(sdk);
1557
+ console.log("Messages local database access validated");
1413
1558
  await contactSync.syncNow({ forceAll: true, reason: "startup" }).catch((err) => {
1414
1559
  console.error("Initial Messages contact sync failed:", safeError(err));
1415
1560
  });
1416
1561
  await loadGroupChatNames(sdk, serializer);
1417
1562
  loadSelectedChatNames(config.selectedChats, serializer);
1418
1563
 
1419
- if (backfillDays !== 0) {
1420
- await runMessagesBackfill(sdk, sender, serializer, backfillDays, allChats ? null : allowedChatIds, contactSync);
1564
+ const initialWatermark = loadMessagesWatermark(userId);
1565
+ const backfillScope = messagesBackfillScope({ backfillDays, allChats, allowedChatIds });
1566
+ let backfillComplete = loadMessagesBackfillComplete(userId, backfillScope);
1567
+ if (!backfillComplete && !hasAnyMessagesBackfillComplete(userId) && initialWatermark > 0 && !backfillExplicit && backfillDays !== 0) {
1568
+ backfillComplete = saveMessagesBackfillComplete(userId, backfillScope, {
1569
+ legacyAssumedComplete: true,
1570
+ watermark: initialWatermark,
1571
+ });
1572
+ }
1573
+ if (shouldRunMessagesBackfill({ backfillDays, backfillExplicit, backfillComplete })) {
1574
+ const backfillResult = await runMessagesBackfill(sdk, sender, serializer, backfillDays, allChats ? null : allowedChatIds, contactSync, stateCache);
1575
+ if (backfillResult.complete) {
1576
+ saveMessagesBackfillComplete(userId, backfillScope, backfillResult);
1577
+ } else {
1578
+ console.warn("Messages backfill paused before completion because messages were queued for retry");
1579
+ }
1421
1580
  await contactSync.syncNow({ forceAll: true, reason: "post-backfill" }).catch((err) => {
1422
1581
  console.error("Post-backfill Messages contact sync failed:", safeError(err));
1423
1582
  });
1583
+ } else if (backfillDays !== 0) {
1584
+ console.log(`Skipping configured Messages backfill because this chat scope is already complete; use --backfill-days to force a historical backfill`);
1424
1585
  }
1425
1586
 
1426
- await gapFillFromWatermark(sdk, sender, serializer, userId, allChats ? null : allowedChatIds, contactSync);
1587
+ await gapFillFromWatermark(sdk, sender, serializer, userId, allChats ? null : allowedChatIds, contactSync, stateCache);
1427
1588
  await contactSync.syncNow({ forceAll: true, reason: "post-gap-fill" }).catch((err) => {
1428
1589
  console.error("Post-gap-fill Messages contact sync failed:", safeError(err));
1429
1590
  });
1430
- await watchMessages(sdk, sender, serializer, userId, allChats ? null : allowedChatIds, { contactSync });
1591
+ editDetector = startMessagesEditDetector(sdk, sender, serializer, allChats ? null : allowedChatIds, {
1592
+ contactSync,
1593
+ stateCache,
1594
+ });
1595
+ await watchMessages(sdk, sender, serializer, userId, allChats ? null : allowedChatIds, {
1596
+ contactSync,
1597
+ stateCache,
1598
+ onShutdown: () => editDetector?.stop(),
1599
+ });
1431
1600
  } catch (err) {
1601
+ editDetector?.stop();
1432
1602
  contactSync.stop();
1433
1603
  await sdk.close?.().catch(() => undefined);
1434
1604
  throw err;
@@ -1495,6 +1665,7 @@ async function collectMessagesLocalStatus(preferredUserId = null) {
1495
1665
  allChats: config?.allChats === true,
1496
1666
  selectedChatCount: Array.isArray(config?.allowedChatIds) ? config.allowedChatIds.length : 0,
1497
1667
  storage: await probePath("messages", MESSAGES_CHAT_DB_PATH),
1668
+ contacts: userId ? readJsonOptional(messagesContactStatusFile(userId)) : null,
1498
1669
  launch: localLaunchStatus(label),
1499
1670
  queueDepth: Array.isArray(queue) ? queue.length : 0,
1500
1671
  };
@@ -1722,6 +1893,7 @@ Options:
1722
1893
  --api <url> Advanced: Shepherd API URL.
1723
1894
  --state <path> Token state file. Defaults to ~/.shepherd/mcp.json.
1724
1895
  --onboarding-state <path> Local onboarding state file. Defaults to ~/.shepherd/raw-onboarding-agent.json.
1896
+ --mcp-program <json_array> Advanced: MCP proxy command prefix. The app uses this to install app-binary-backed MCP instead of npm.
1725
1897
  --no-local Skip local onboarding auth and use WorkOS browser login.
1726
1898
  --install <targets> Install MCP after login. Use all, none, codex, claude, cursor, or comma-separated targets.
1727
1899
  --no-install Save the MCP token without installing client config.
@@ -1750,6 +1922,7 @@ Installs the saved Shepherd MCP login into:
1750
1922
  Options:
1751
1923
  --state <path> Token state file. Defaults to ~/.shepherd/mcp.json.
1752
1924
  --onboarding-state <path> Local onboarding state file. Defaults to ~/.shepherd/raw-onboarding-agent.json.
1925
+ --mcp-program <json_array> Advanced: MCP proxy command prefix. The app uses this to install app-binary-backed MCP instead of npm.
1753
1926
  --no-local Skip local onboarding auth refresh.
1754
1927
  --install <targets> Use all, none, codex, claude, cursor, or comma-separated targets.
1755
1928
  --no-install Skip client config writes.
@@ -1759,6 +1932,31 @@ Options:
1759
1932
  return;
1760
1933
  }
1761
1934
 
1935
+ if (which === "write-agent-state" || which === "write-messages-config" || which === "install-messages-agent") {
1936
+ console.log(`Shepherd onboarding engine commands
1937
+
1938
+ These non-interactive commands are used by GUI onboarding apps (for example the
1939
+ Shepherd macOS app) so that this CLI stays the single owner of Shepherd state
1940
+ file schemas and the launchd install flow.
1941
+
1942
+ Usage:
1943
+ shepherd-onboard write-agent-state JSON object on stdin is merged into ~/.shepherd/raw-onboarding-agent.json. Prints {statePath}.
1944
+ shepherd-onboard write-messages-config JSON object on stdin ({apiUrl, userId, agentToken, backfillDays?, allowedChatIds, selectedChats?}) is written to ~/.shepherd/raw-messages/<userId>.json. Prints {configPath}.
1945
+ shepherd-onboard install-messages-agent --config <path>
1946
+ Installs and verifies the Messages launchd agent for an existing config. Prints install metadata.
1947
+
1948
+ install-messages-agent options:
1949
+ --config <path> Messages agent config created by onboarding. Required.
1950
+ --user-id <id> Override the user ID. Defaults to the config's userId.
1951
+ --program <json_array> Replace the default npx launcher with custom ProgramArguments (e.g. a signed app binary). --config <path> is appended.
1952
+ --env <json_object> Extra EnvironmentVariables merged into the launchd plist (e.g. ELECTRON_RUN_AS_NODE).
1953
+ --dry-run Print the launchd plist and paths without writing or loading anything.
1954
+ --no-permission-prompt Fail instead of prompting when Full Disk Access is missing.
1955
+ --help Show this help.
1956
+ `);
1957
+ return;
1958
+ }
1959
+
1762
1960
  if (which === "mcp") {
1763
1961
  console.log(`Shepherd MCP stdio proxy
1764
1962
 
@@ -1934,7 +2132,7 @@ function printAgentContract() {
1934
2132
  agentStatusCommand: `${command} agent --status`,
1935
2133
  messagesChatsCommand: `${command} messages-chats`,
1936
2134
  messagesPermissions: {
1937
- macOS: "Local Messages raw sync needs Full Disk Access for the app running onboarding and for Node.js used by the background LaunchAgent. The Messages selector command validates local chat.db access, opens Full Disk Access settings if needed, and keeps checking until access works in interactive onboarding. Background sync install also checks that launchd can start the Messages agent. Contacts permission may also appear when resolving local contact names. The background Messages agent reloads Contacts on startup, watches AddressBook changes when available, and runs fallback contact sync so renamed contacts can hydrate prior ingested Messages rows for the token-bound customer account.",
2135
+ macOS: "Local Messages raw sync needs Full Disk Access for the app running onboarding and for Node.js used by the background LaunchAgent, so launchd can start the Messages agent after onboarding. The Messages selector command validates local chat.db access, opens Full Disk Access settings if needed, and keeps checking until access works in interactive onboarding. Contacts permission is also needed for contact-name hydration; Shepherd reads contact names from the local macOS AddressBook database, reloads Contacts on startup, watches AddressBook changes, and scopes synced messages to the token-bound customer account. If that schema cannot be read, raw message sync still works but contact names are marked degraded and the user should contact founders@askshepherd.ai.",
1938
2136
  nodeBinary: process.execPath,
1939
2137
  },
1940
2138
  codingSessions: {
@@ -2003,7 +2201,7 @@ If Messages is selected, run:
2003
2201
 
2004
2202
  Before or during this step, ask the user to grant or confirm macOS Full Disk Access for local Messages sync. The command validates access to the local Messages database, opens System Settings -> Privacy & Security -> Full Disk Access if access is missing, and keeps checking until access works in interactive onboarding. The user should enable the app running onboarding, such as Terminal, iTerm, Claude Code, or Codex, and Node.js for background sync:
2005
2203
  ${payload.messagesPermissions.nodeBinary}
2006
- Contacts permission may also appear when Shepherd resolves local contact names. The background Messages agent reloads Contacts on startup, watches AddressBook changes when available, and runs fallback contact sync so renamed contacts can hydrate prior ingested Messages rows for the token-bound customer account.
2204
+ Shepherd reads contact names from the local macOS AddressBook database. If that database schema cannot be read, raw Messages sync still works, but contact names will be marked degraded and the user should contact founders@askshepherd.ai.
2007
2205
 
2008
2206
  This opens a minimal local webpage with recent local Messages chats and search. Have the user select which contacts/groups Shepherd should sync. Do not select all chats by default. If the user explicitly wants everything, use the "Sync all current and future chats" checkbox or pass --messages-chat-ids all. All-chats mode backfills current chats and keeps watching chats that appear later. When the command returns, keep the printed chat IDs or the literal value all.
2009
2207
 
@@ -2096,10 +2294,28 @@ function selectedSources() {
2096
2294
  return selected;
2097
2295
  }
2098
2296
 
2297
+ function sourceSelectionFromSession(response, fallback) {
2298
+ const raw = response?.sources;
2299
+ if (!raw || typeof raw !== "object" || Array.isArray(raw)) {
2300
+ return fallback ? { ...fallback } : null;
2301
+ }
2302
+ return {
2303
+ google: raw.google === true,
2304
+ slack: raw.slack === true,
2305
+ github: raw.github === true,
2306
+ granola: raw.granola === true,
2307
+ messages: raw.messages === true,
2308
+ discord: raw.discord === true,
2309
+ instagram: raw.instagram === true,
2310
+ codingSessions: raw.codingSessions === true,
2311
+ };
2312
+ }
2313
+
2099
2314
  function sourceSelectionFromList(value) {
2100
2315
  const selected = {
2101
2316
  google: false,
2102
2317
  slack: false,
2318
+ github: false,
2103
2319
  granola: false,
2104
2320
  messages: false,
2105
2321
  codingSessions: false,
@@ -2112,6 +2328,7 @@ function sourceSelectionFromList(value) {
2112
2328
  ["gdocs", "google"],
2113
2329
  ["calendar", "google"],
2114
2330
  ["slack", "slack"],
2331
+ ["github", "github"],
2115
2332
  ["granola", "granola"],
2116
2333
  ["messages", "messages"],
2117
2334
  ["imessage", "messages"],
@@ -2131,6 +2348,7 @@ function sourceSelectionFromList(value) {
2131
2348
  if (part === "all") {
2132
2349
  selected.google = true;
2133
2350
  selected.slack = true;
2351
+ selected.github = true;
2134
2352
  selected.granola = true;
2135
2353
  selected.messages = true;
2136
2354
  selected.codingSessions = true;
@@ -2138,7 +2356,7 @@ function sourceSelectionFromList(value) {
2138
2356
  }
2139
2357
  const source = aliases.get(part);
2140
2358
  if (!source) {
2141
- throw new Error(`Unknown source "${part}". Use google, slack, granola, messages, coding-sessions, or all.`);
2359
+ throw new Error(`Unknown source "${part}". Use google, slack, github, granola, messages, coding-sessions, or all.`);
2142
2360
  }
2143
2361
  selected[source] = true;
2144
2362
  }
@@ -2149,6 +2367,9 @@ async function writeAgentState(state) {
2149
2367
  const path = agentStatePath();
2150
2368
  await mkdir(dirname(path), { recursive: true });
2151
2369
  await writeFile(path, JSON.stringify(state, null, 2), { mode: 0o600 });
2370
+ // writeFile's mode only applies on creation; an existing file keeps its
2371
+ // permissions, so enforce them on every token write.
2372
+ await chmod(path, 0o600);
2152
2373
  return path;
2153
2374
  }
2154
2375
 
@@ -2191,7 +2412,9 @@ async function updateAgentStateFromOnboardingResponse(state, response) {
2191
2412
  const hasStatus = typeof response?.status === "string";
2192
2413
  const hasProcessing = typeof response?.processingEnabled === "boolean" || response?.processing;
2193
2414
  const hasProviders = response?.providers && typeof response.providers === "object" && !Array.isArray(response.providers);
2194
- if (!hasAuthUrls && !hasGoogleWorkspaceDelegation && !hasStatus && !hasProcessing && !hasProviders) return state;
2415
+ const responseSources = sourceSelectionFromSession(response, null);
2416
+ const hasSources = responseSources !== null;
2417
+ if (!hasAuthUrls && !hasGoogleWorkspaceDelegation && !hasStatus && !hasProcessing && !hasProviders && !hasSources) return state;
2195
2418
 
2196
2419
  const next = {
2197
2420
  ...state,
@@ -2203,6 +2426,7 @@ async function updateAgentStateFromOnboardingResponse(state, response) {
2203
2426
  ...(typeof response?.processingEnabled === "boolean" ? { processingEnabled: response.processingEnabled } : {}),
2204
2427
  ...(response?.processing ? { processing: response.processing } : {}),
2205
2428
  ...(hasProviders ? { providers: response.providers } : {}),
2429
+ ...(hasSources ? { sources: responseSources } : {}),
2206
2430
  };
2207
2431
  await writeAgentState(next);
2208
2432
  return next;
@@ -2309,6 +2533,20 @@ async function openNextAgentModality({ sources, authUrls = {}, noOpen = false, p
2309
2533
  return { source, label: "Slack", opened: !noOpen, url };
2310
2534
  }
2311
2535
 
2536
+ if (source === "github") {
2537
+ const url = typeof authUrls.github === "string" ? authUrls.github : null;
2538
+ if (!url) {
2539
+ return {
2540
+ source,
2541
+ label: "GitHub",
2542
+ opened: false,
2543
+ message: "GitHub authorization URL was not returned by Shepherd.",
2544
+ };
2545
+ }
2546
+ await openOrPrint(url, { noOpen });
2547
+ return { source, label: "GitHub", opened: !noOpen, url };
2548
+ }
2549
+
2312
2550
  if (source === "granola") {
2313
2551
  const result = await openGranolaApiKeys({ noOpen: noOpen || Boolean(args["no-open-granola"]) });
2314
2552
  return { source, label: "Granola", ...result };
@@ -2370,6 +2608,18 @@ function printAgentCurrentAction(action, opts = {}) {
2370
2608
  return;
2371
2609
  }
2372
2610
 
2611
+ if (action.source === "github") {
2612
+ if (action.opened) {
2613
+ console.log("Opened GitHub authorization in the browser.");
2614
+ } else if (action.url) {
2615
+ console.log(`GitHub authorization URL: ${action.url}`);
2616
+ } else if (action.message) {
2617
+ console.log(action.message);
2618
+ }
2619
+ console.log("Ask the user to complete GitHub authorization before opening another source.");
2620
+ return;
2621
+ }
2622
+
2373
2623
  if (action.source === "granola") {
2374
2624
  if (action.target) console.log(`Granola target: ${action.target}`);
2375
2625
  console.log("Ask the user to create/copy the Granola API key before opening another source.");
@@ -2393,6 +2643,7 @@ function agentNeedsUserAction(sources, action) {
2393
2643
  if (!action) return [];
2394
2644
  if (action.source === "google") return ["Have the customer's Google Workspace super admin authorize Shepherd's domain-wide delegation Client ID and scopes in Google Admin Console."];
2395
2645
  if (action.source === "slack") return ["Complete Slack browser authorization."];
2646
+ if (action.source === "github") return ["Complete GitHub browser authorization."];
2396
2647
  if (action.source === "granola") return ["Create/copy a Granola API key from the Granola Mac app."];
2397
2648
  if (action.source === "messages") return ["Grant or confirm macOS Full Disk Access for the onboarding app and Node.js, run messages-chats, have the user select local Messages contacts/groups in the browser, then pass the printed chat IDs with the Messages handle."];
2398
2649
  if (action.source === "codingSessions") return ["Run the continue command to install local Codex and Claude Code session summary sync."];
@@ -2550,7 +2801,7 @@ async function explainMessagesBackgroundPermissions(opts = {}) {
2550
2801
  console.log("\nMessages background sync permissions");
2551
2802
  console.log("Local Messages raw sync runs as a macOS LaunchAgent using npx/Node.js. For continuous sync, macOS Full Disk Access must include the background Node.js binary, not just the current terminal.");
2552
2803
  printMessagesPermissionTargets();
2553
- console.log("Contacts permission may also appear when Shepherd resolves local contact names. The background agent keeps contact names hydrated for observed Messages conversations.");
2804
+ console.log("Shepherd reads contact names from the local macOS AddressBook database. If that schema cannot be read, raw Messages sync still works but contact names are marked degraded; contact founders@askshepherd.ai.");
2554
2805
  await openFullDiskAccessSettings(opts);
2555
2806
 
2556
2807
  if (opts.waitForUser && process.stdin.isTTY && !args["no-permission-prompt"]) {
@@ -2660,7 +2911,11 @@ function headers(token) {
2660
2911
  async function writeMessagesConfig(input) {
2661
2912
  const dir = join(homedir(), ".shepherd", "raw-messages");
2662
2913
  await mkdir(dir, { recursive: true });
2663
- const path = join(dir, `${input.userId}.json`);
2914
+ // The userId reaches this path from the network (server-issued session id)
2915
+ // and, via write-messages-config, from stdin; sanitize it like the launchd
2916
+ // label does so it can never traverse outside the raw-messages directory.
2917
+ const safeId = sanitizeUserFileId(input.userId);
2918
+ const path = join(dir, `${safeId}.json`);
2664
2919
  const allowedChatIds = parseAllowedChatIds(input.allowedChatIds);
2665
2920
  const allChats = selectedChatIdsIncludeAll(allowedChatIds);
2666
2921
  if (!allChats && allowedChatIds.length === 0) {
@@ -2681,25 +2936,34 @@ async function writeMessagesConfig(input) {
2681
2936
  }, null, 2),
2682
2937
  { mode: 0o600 },
2683
2938
  );
2939
+ await chmod(path, 0o600);
2684
2940
  return path;
2685
2941
  }
2686
2942
 
2687
- async function installMessagesAgent(configPath, userId) {
2688
- if (platform() !== "darwin") {
2689
- throw new Error("automatic local Messages sync is only supported on macOS");
2690
- }
2691
-
2943
+ function buildMessagesAgentInstall(configPath, userId, overrides = {}) {
2692
2944
  const safeId = userId.replace(/[^a-zA-Z0-9.-]/g, "-");
2693
2945
  const label = `ai.shepherd.raw-messages.${safeId}`;
2694
2946
  const rawDir = join(homedir(), ".shepherd", "raw-messages");
2695
2947
  const agentsDir = join(homedir(), "Library", "LaunchAgents");
2696
- await mkdir(rawDir, { recursive: true });
2697
- await mkdir(agentsDir, { recursive: true });
2698
-
2699
2948
  const plistPath = join(agentsDir, `${label}.plist`);
2700
2949
  const stdoutPath = join(rawDir, `${safeId}.out.log`);
2701
2950
  const stderrPath = join(rawDir, `${safeId}.err.log`);
2702
- const launchPath = launchAgentPath();
2951
+
2952
+ const programPrefix = Array.isArray(overrides.programArguments) && overrides.programArguments.length > 0
2953
+ ? overrides.programArguments
2954
+ : ["/usr/bin/env", "npx", "-y", PACKAGE_SPEC, "messages-agent"];
2955
+ const programArguments = [...programPrefix, "--config", configPath];
2956
+ const environment = {
2957
+ PATH: launchAgentPath(),
2958
+ ...stringRecord(overrides.environment),
2959
+ };
2960
+
2961
+ const programArgumentsXml = programArguments
2962
+ .map((value) => ` <string>${xmlEscape(value)}</string>`)
2963
+ .join("\n");
2964
+ const environmentXml = Object.entries(environment)
2965
+ .map(([key, value]) => ` <key>${xmlEscape(key)}</key>\n <string>${xmlEscape(value)}</string>`)
2966
+ .join("\n");
2703
2967
 
2704
2968
  const plist = `<?xml version="1.0" encoding="UTF-8"?>
2705
2969
  <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
@@ -2709,32 +2973,43 @@ async function installMessagesAgent(configPath, userId) {
2709
2973
  <string>${xmlEscape(label)}</string>
2710
2974
  <key>ProgramArguments</key>
2711
2975
  <array>
2712
- <string>/usr/bin/env</string>
2713
- <string>npx</string>
2714
- <string>-y</string>
2715
- <string>${PACKAGE_SPEC}</string>
2716
- <string>messages-agent</string>
2717
- <string>--config</string>
2718
- <string>${xmlEscape(configPath)}</string>
2976
+ ${programArgumentsXml}
2719
2977
  </array>
2720
2978
  <key>KeepAlive</key>
2721
2979
  <true/>
2722
2980
  <key>RunAtLoad</key>
2723
2981
  <true/>
2982
+ <key>ThrottleInterval</key>
2983
+ <integer>10</integer>
2984
+ <key>WorkingDirectory</key>
2985
+ <string>${xmlEscape(rawDir)}</string>
2724
2986
  <key>StandardOutPath</key>
2725
2987
  <string>${xmlEscape(stdoutPath)}</string>
2726
2988
  <key>StandardErrorPath</key>
2727
2989
  <string>${xmlEscape(stderrPath)}</string>
2728
2990
  <key>EnvironmentVariables</key>
2729
2991
  <dict>
2730
- <key>PATH</key>
2731
- <string>${xmlEscape(launchPath)}</string>
2992
+ ${environmentXml}
2732
2993
  </dict>
2733
2994
  </dict>
2734
2995
  </plist>
2735
2996
  `;
2736
2997
 
2998
+ return { label, rawDir, agentsDir, plistPath, stdoutPath, stderrPath, programArguments, environment, plist };
2999
+ }
3000
+
3001
+ async function installMessagesAgent(configPath, userId, overrides = {}) {
3002
+ if (platform() !== "darwin") {
3003
+ throw new Error("automatic local Messages sync is only supported on macOS");
3004
+ }
3005
+
3006
+ const install = buildMessagesAgentInstall(configPath, userId, overrides);
3007
+ const { label, rawDir, agentsDir, plistPath, stdoutPath, stderrPath, plist } = install;
3008
+ await mkdir(rawDir, { recursive: true });
3009
+ await mkdir(agentsDir, { recursive: true });
3010
+
2737
3011
  await writeFile(plistPath, plist, { mode: 0o600 });
3012
+ await chmod(plistPath, 0o600);
2738
3013
  while (true) {
2739
3014
  const stdoutOffset = await fileLength(stdoutPath);
2740
3015
  const stderrOffset = await fileLength(stderrPath);
@@ -2878,7 +3153,7 @@ async function verifyMessagesAgentLaunch({ label, stdoutPath, stderrPath, stdout
2878
3153
  throw new Error("Messages background sync could not open local Messages storage. Grant macOS Full Disk Access to the app running onboarding and Node.js, then rerun or continue the Messages step.");
2879
3154
  }
2880
3155
 
2881
- if (/Watching for new Messages|Shepherd Messages raw sync starting|Running .*Messages backfill/i.test(stdout)
3156
+ if (/Messages local database access validated|Watching for new Messages/i.test(stdout)
2882
3157
  && /state = running|job state = running/i.test(launchState)) {
2883
3158
  return;
2884
3159
  }
@@ -3801,7 +4076,27 @@ function selectedChatContactSeedHandles(selectedChats, allowedChatIds) {
3801
4076
  return [...new Set(handles)];
3802
4077
  }
3803
4078
 
3804
- async function runMessagesBackfill(sdk, sender, serializer, days, allowedChatIds, contactSync = null) {
4079
+ async function validateMessagesDatabaseAccess(sdk) {
4080
+ await sdk.getMessages({ limit: 1 });
4081
+ }
4082
+
4083
+ function messagesBackfillScope({ backfillDays, allChats, allowedChatIds }) {
4084
+ return {
4085
+ version: 1,
4086
+ backfillDays: backfillDays == null ? "all" : backfillDays,
4087
+ scope: allChats
4088
+ ? { allChats: true }
4089
+ : { chatIds: [...allowedChatIds].sort() },
4090
+ };
4091
+ }
4092
+
4093
+ function shouldRunMessagesBackfill({ backfillDays, backfillExplicit, backfillComplete }) {
4094
+ if (backfillDays === 0) return false;
4095
+ if (backfillExplicit) return true;
4096
+ return !backfillComplete;
4097
+ }
4098
+
4099
+ async function runMessagesBackfill(sdk, sender, serializer, days, allowedChatIds, contactSync = null, stateCache = null) {
3805
4100
  const allChats = allowedChatIds == null;
3806
4101
  console.log(allChats
3807
4102
  ? `Running ${days == null ? "all-history" : `${days}-day`} Messages backfill for all current chats`
@@ -3822,6 +4117,8 @@ async function runMessagesBackfill(sdk, sender, serializer, days, allowedChatIds
3822
4117
  totalMessages += filtered.length;
3823
4118
  const result = await sender.send(filtered.map((msg) => serializer.serialize(msg)));
3824
4119
  totalStored += result.stored;
4120
+ if ((result.queued ?? 0) > 0) return { complete: false, totalMessages, totalStored, queued: result.queued };
4121
+ stateCache?.observe(filtered);
3825
4122
  saveMessagesWatermark(sender.userId, maxRowId(messages));
3826
4123
 
3827
4124
  if (messages.length < pageSize) break;
@@ -3829,7 +4126,7 @@ async function runMessagesBackfill(sdk, sender, serializer, days, allowedChatIds
3829
4126
  }
3830
4127
 
3831
4128
  console.log(`Messages backfill complete: stored ${totalStored} of ${totalMessages}`);
3832
- return;
4129
+ return { complete: true, totalMessages, totalStored, queued: 0, watermark: loadMessagesWatermark(sender.userId) };
3833
4130
  }
3834
4131
 
3835
4132
  for (const chatId of allowedChatIds) {
@@ -3843,6 +4140,8 @@ async function runMessagesBackfill(sdk, sender, serializer, days, allowedChatIds
3843
4140
  totalMessages += filtered.length;
3844
4141
  const result = await sender.send(filtered.map((msg) => serializer.serialize(msg)));
3845
4142
  totalStored += result.stored;
4143
+ if ((result.queued ?? 0) > 0) return { complete: false, totalMessages, totalStored, queued: result.queued };
4144
+ stateCache?.observe(filtered);
3846
4145
  saveMessagesWatermark(sender.userId, maxRowId(messages));
3847
4146
 
3848
4147
  if (messages.length < pageSize) break;
@@ -3851,19 +4150,20 @@ async function runMessagesBackfill(sdk, sender, serializer, days, allowedChatIds
3851
4150
  }
3852
4151
 
3853
4152
  console.log(`Messages backfill complete: stored ${totalStored} of ${totalMessages}`);
4153
+ return { complete: true, totalMessages, totalStored, queued: 0, watermark: loadMessagesWatermark(sender.userId) };
3854
4154
  }
3855
4155
 
3856
- async function gapFillFromWatermark(sdk, sender, serializer, userId, allowedChatIds, contactSync = null) {
4156
+ async function gapFillFromWatermark(sdk, sender, serializer, userId, allowedChatIds, contactSync = null, stateCache = null) {
3857
4157
  const allChats = allowedChatIds == null;
3858
4158
  const lastWatermark = loadMessagesWatermark(userId);
3859
4159
  if (lastWatermark <= 0) return;
3860
4160
 
3861
- const missed = [];
4161
+ let missed = [];
3862
4162
  if (allChats) {
3863
- missed.push(...await sdk.getMessages({ limit: 5000 }));
4163
+ missed = await getMessagesAfterWatermark(sdk, { lastWatermark, pageSize: 1000 });
3864
4164
  } else {
3865
4165
  for (const chatId of allowedChatIds) {
3866
- missed.push(...await sdk.getMessages({ chatId, limit: 1000 }));
4166
+ missed.push(...await getMessagesAfterWatermark(sdk, { chatId, lastWatermark, pageSize: 1000 }));
3867
4167
  }
3868
4168
  }
3869
4169
  const newMessages = missed.filter((msg) =>
@@ -3872,16 +4172,388 @@ async function gapFillFromWatermark(sdk, sender, serializer, userId, allowedChat
3872
4172
  && !messageTouchesShepherdAgent(msg));
3873
4173
  if (newMessages.length === 0) return;
3874
4174
 
4175
+ newMessages.sort((a, b) => Number(a.rowId ?? 0) - Number(b.rowId ?? 0));
3875
4176
  contactSync?.observeMessages(newMessages);
3876
4177
  const result = await sender.send(newMessages.map((msg) => serializer.serialize(msg)));
3877
- if (result.stored > 0) saveMessagesWatermark(userId, maxRowId(newMessages));
4178
+ if ((result.queued ?? 0) === 0) {
4179
+ stateCache?.observe(newMessages);
4180
+ saveMessagesWatermark(userId, maxRowId(newMessages));
4181
+ }
3878
4182
  console.log(`Messages gap-fill complete: stored ${result.stored} of ${newMessages.length}`);
3879
4183
  }
3880
4184
 
4185
+ async function getMessagesAfterWatermark(sdk, { chatId = null, lastWatermark, pageSize }) {
4186
+ const messages = [];
4187
+ let offset = 0;
4188
+ while (true) {
4189
+ const page = await sdk.getMessages({ ...(chatId ? { chatId } : {}), limit: pageSize, offset });
4190
+ if (!page.length) break;
4191
+ messages.push(...page.filter((msg) => Number(msg.rowId) > lastWatermark));
4192
+ if (page.length < pageSize) break;
4193
+ offset += pageSize;
4194
+ }
4195
+ return messages;
4196
+ }
4197
+
4198
+ function createMessageStateCache(userId, maxSize = MESSAGES_STATE_CACHE_MAX) {
4199
+ const stateFile = messagesStateFile(userId);
4200
+ const initial = readJsonOptional(stateFile);
4201
+ const cache = new Map(Object.entries(
4202
+ initial && typeof initial === "object" && !Array.isArray(initial) ? initial : {},
4203
+ ));
4204
+
4205
+ const remember = (msg) => {
4206
+ const key = messageIdentity(msg);
4207
+ if (!key) return;
4208
+ cache.set(key, messageState(msg));
4209
+ trimMessageStateCache(cache, maxSize);
4210
+ };
4211
+
4212
+ const persist = () => saveMessagesState(userId, Object.fromEntries(cache));
4213
+
4214
+ return {
4215
+ observe(messages) {
4216
+ for (const msg of messages ?? []) remember(msg);
4217
+ persist();
4218
+ },
4219
+ changed(messages) {
4220
+ const changed = [];
4221
+ for (const msg of messages ?? []) {
4222
+ const key = messageIdentity(msg);
4223
+ if (!key) continue;
4224
+ const current = messageState(msg);
4225
+ const previous = cache.get(key);
4226
+ if ((previous && !sameMessageState(previous, current))
4227
+ || (!previous && hasMessageEditOrRetraction(msg))) {
4228
+ changed.push(msg);
4229
+ }
4230
+ }
4231
+ return changed;
4232
+ },
4233
+ };
4234
+ }
4235
+
4236
+ function trimMessageStateCache(cache, maxSize) {
4237
+ if (cache.size <= maxSize) return;
4238
+ const excess = cache.size - maxSize;
4239
+ for (const key of [...cache.keys()].slice(0, excess)) cache.delete(key);
4240
+ }
4241
+
4242
+ function hasMessageEditOrRetraction(msg) {
4243
+ return Boolean(msg?.editedAt || msg?.retractedAt);
4244
+ }
4245
+
4246
+ function messageIdentity(msg) {
4247
+ const value = msg?.id ?? msg?.messageId ?? msg?.rowId;
4248
+ return value == null ? null : String(value);
4249
+ }
4250
+
4251
+ function messageState(msg) {
4252
+ return {
4253
+ text: msg?.text ?? null,
4254
+ editedAt: isoDate(msg?.editedAt),
4255
+ retractedAt: isoDate(msg?.retractedAt),
4256
+ hasAttachments: Boolean(msg?.hasAttachments ?? (Array.isArray(msg?.attachments) && msg.attachments.length > 0)),
4257
+ attachments: messageAttachmentState(msg),
4258
+ };
4259
+ }
4260
+
4261
+ function sameMessageState(a, b) {
4262
+ return a.text === b.text
4263
+ && a.editedAt === b.editedAt
4264
+ && a.retractedAt === b.retractedAt
4265
+ && a.hasAttachments === b.hasAttachments
4266
+ && stableLocalJson(a.attachments) === stableLocalJson(b.attachments);
4267
+ }
4268
+
4269
+ function messageAttachmentState(msg) {
4270
+ return (Array.isArray(msg?.attachments) ? msg.attachments : [])
4271
+ .map((att) => ({
4272
+ id: String(att?.id ?? ""),
4273
+ fileName: att?.fileName ?? null,
4274
+ mimeType: att?.mimeType ?? null,
4275
+ sizeBytes: Number(att?.sizeBytes ?? 0),
4276
+ transferStatus: att?.transferStatus ?? null,
4277
+ isSticker: Boolean(att?.isSticker),
4278
+ isSensitiveContent: Boolean(att?.isSensitiveContent),
4279
+ }))
4280
+ .sort((a, b) => a.id.localeCompare(b.id) || String(a.fileName ?? "").localeCompare(String(b.fileName ?? "")));
4281
+ }
4282
+
4283
+ function stableLocalJson(value) {
4284
+ if (Array.isArray(value)) return `[${value.map(stableLocalJson).join(",")}]`;
4285
+ if (value && typeof value === "object") {
4286
+ return `{${Object.entries(value)
4287
+ .filter(([, entryValue]) => entryValue !== undefined)
4288
+ .sort(([a], [b]) => a.localeCompare(b))
4289
+ .map(([key, entryValue]) => `${JSON.stringify(key)}:${stableLocalJson(entryValue)}`)
4290
+ .join(",")}}`;
4291
+ }
4292
+ return JSON.stringify(value);
4293
+ }
4294
+
4295
+ function startMessagesEditDetector(sdk, sender, serializer, allowedChatIds, opts = {}) {
4296
+ const contactSync = opts.contactSync ?? null;
4297
+ const stateCache = opts.stateCache ?? createMessageStateCache(sender.userId);
4298
+ const watchers = [];
4299
+ let stopped = false;
4300
+ let timer = null;
4301
+ let scanning = false;
4302
+ let lastMutationReconcileAt = 0;
4303
+
4304
+ const scan = async (reason) => {
4305
+ if (stopped || scanning) return;
4306
+ scanning = true;
4307
+ try {
4308
+ const recentResult = await scanMessagesForEditsAndUnsends(sdk, sender, serializer, allowedChatIds, {
4309
+ contactSync,
4310
+ stateCache,
4311
+ });
4312
+ const shouldReconcileMutations = reason === "startup"
4313
+ || Date.now() - lastMutationReconcileAt >= MESSAGES_MUTATION_RECONCILE_INTERVAL_MS;
4314
+ const mutationResult = shouldReconcileMutations
4315
+ ? await reconcileMessagesMutations(sdk, sender, serializer, allowedChatIds, {
4316
+ contactSync,
4317
+ stateCache,
4318
+ })
4319
+ : { changed: 0, stored: 0, updated: 0, queued: 0 };
4320
+ if (shouldReconcileMutations) lastMutationReconcileAt = Date.now();
4321
+
4322
+ const result = {
4323
+ changed: recentResult.changed + mutationResult.changed,
4324
+ stored: recentResult.stored + mutationResult.stored,
4325
+ updated: recentResult.updated + mutationResult.updated,
4326
+ queued: recentResult.queued + mutationResult.queued,
4327
+ };
4328
+ if (result.changed > 0) {
4329
+ console.log(`Messages edit detector: ${result.changed} edit/unsend change(s) sent (${reason}); stored ${result.stored}, updated ${result.updated}, queued ${result.queued}`);
4330
+ }
4331
+ } catch (err) {
4332
+ console.error("Messages edit detector failed:", safeError(err));
4333
+ } finally {
4334
+ scanning = false;
4335
+ }
4336
+ };
4337
+
4338
+ const schedule = (reason) => {
4339
+ if (stopped) return;
4340
+ if (timer) clearTimeout(timer);
4341
+ timer = setTimeout(() => {
4342
+ timer = null;
4343
+ scan(reason).catch((err) => console.error("Messages edit detector failed:", safeError(err)));
4344
+ }, 500);
4345
+ };
4346
+
4347
+ for (const walPath of messagesWatchPaths()) {
4348
+ try {
4349
+ watchers.push(watch(walPath, () => {
4350
+ schedule("messages-db");
4351
+ }));
4352
+ } catch (err) {
4353
+ console.warn(`Could not watch Messages database file ${walPath}: ${safeError(err)}`);
4354
+ }
4355
+ }
4356
+
4357
+ if (watchers.length > 0) {
4358
+ console.log(`Watching ${watchers.length} Messages database file(s) for edits and unsends`);
4359
+ } else if (platform() === "darwin") {
4360
+ console.warn("Messages database watch files not found; edit/unsend detection will use fallback polling only");
4361
+ }
4362
+
4363
+ const interval = setInterval(() => {
4364
+ scan("fallback").catch((err) => console.error("Messages edit detector failed:", safeError(err)));
4365
+ }, MESSAGES_EDIT_SCAN_INTERVAL_MS);
4366
+ scan("startup").catch((err) => console.error("Messages edit detector failed:", safeError(err)));
4367
+
4368
+ return {
4369
+ stop() {
4370
+ stopped = true;
4371
+ if (timer) clearTimeout(timer);
4372
+ clearInterval(interval);
4373
+ for (const watcher of watchers) watcher.close();
4374
+ },
4375
+ };
4376
+ }
4377
+
4378
+ function messagesWatchPaths() {
4379
+ if (platform() !== "darwin") return [];
4380
+ return [
4381
+ MESSAGES_CHAT_DB_PATH,
4382
+ `${MESSAGES_CHAT_DB_PATH}-wal`,
4383
+ `${MESSAGES_CHAT_DB_PATH}-shm`,
4384
+ ].filter((path) => existsSync(path));
4385
+ }
4386
+
4387
+ async function scanMessagesForEditsAndUnsends(sdk, sender, serializer, allowedChatIds, opts = {}) {
4388
+ const allChats = allowedChatIds == null;
4389
+ const since = new Date(Date.now() - MESSAGES_EDIT_SCAN_WINDOW_MS);
4390
+ const messages = await getScopedMessages(sdk, allowedChatIds, { since });
4391
+
4392
+ const filtered = messages.filter((msg) =>
4393
+ msg?.chatId
4394
+ && (allChats || allowedChatIds.includes(msg.chatId))
4395
+ && !messageTouchesShepherdAgent(msg));
4396
+ opts.contactSync?.observeMessages(filtered);
4397
+
4398
+ const changed = opts.stateCache?.changed(filtered) ?? [];
4399
+ if (changed.length === 0) {
4400
+ opts.stateCache?.observe(filtered);
4401
+ return { changed: 0, stored: 0, updated: 0, queued: 0 };
4402
+ }
4403
+
4404
+ const result = await sender.send(changed.map((msg) => serializer.serialize(msg)));
4405
+ if ((result.queued ?? 0) === 0) opts.stateCache?.observe(filtered);
4406
+ return {
4407
+ changed: changed.length,
4408
+ stored: result.stored ?? 0,
4409
+ updated: result.updated ?? 0,
4410
+ queued: result.queued ?? 0,
4411
+ };
4412
+ }
4413
+
4414
+ async function getScopedMessages(sdk, allowedChatIds, query) {
4415
+ const messages = [];
4416
+ if (allowedChatIds == null) {
4417
+ messages.push(...await getPagedMessages(sdk, query));
4418
+ return messages;
4419
+ }
4420
+
4421
+ for (const chatId of allowedChatIds) {
4422
+ messages.push(...await getPagedMessages(sdk, { ...query, chatId }));
4423
+ }
4424
+ return messages;
4425
+ }
4426
+
4427
+ async function getPagedMessages(sdk, query) {
4428
+ const messages = [];
4429
+ let offset = 0;
4430
+ while (true) {
4431
+ const page = await sdk.getMessages({ ...query, limit: MESSAGES_EDIT_SCAN_LIMIT, offset });
4432
+ if (!page.length) break;
4433
+ messages.push(...page);
4434
+ if (page.length < MESSAGES_EDIT_SCAN_LIMIT) break;
4435
+ offset += MESSAGES_EDIT_SCAN_LIMIT;
4436
+ }
4437
+ return messages;
4438
+ }
4439
+
4440
+ async function reconcileMessagesMutations(sdk, sender, serializer, allowedChatIds, opts = {}) {
4441
+ const allChats = allowedChatIds == null;
4442
+ const rowIds = queryMessagesMutationRowIds(sdk, allowedChatIds);
4443
+ if (rowIds.length === 0) return { changed: 0, stored: 0, updated: 0, queued: 0 };
4444
+
4445
+ const messages = await getMessagesByRowIds(sdk, rowIds);
4446
+ const filtered = messages.filter((msg) =>
4447
+ msg?.chatId
4448
+ && (allChats || allowedChatIds.includes(msg.chatId))
4449
+ && !messageTouchesShepherdAgent(msg));
4450
+ opts.contactSync?.observeMessages(filtered);
4451
+
4452
+ const changed = opts.stateCache?.changed(filtered) ?? filtered.filter(hasMessageEditOrRetraction);
4453
+ if (changed.length === 0) {
4454
+ opts.stateCache?.observe(filtered);
4455
+ return { changed: 0, stored: 0, updated: 0, queued: 0 };
4456
+ }
4457
+
4458
+ const result = await sender.send(changed.map((msg) => serializer.serialize(msg)));
4459
+ if ((result.queued ?? 0) === 0) opts.stateCache?.observe(filtered);
4460
+ return {
4461
+ changed: changed.length,
4462
+ stored: result.stored ?? 0,
4463
+ updated: result.updated ?? 0,
4464
+ queued: result.queued ?? 0,
4465
+ };
4466
+ }
4467
+
4468
+ function queryMessagesMutationRowIds(sdk, allowedChatIds = null) {
4469
+ const db = sdk?.database;
4470
+ if (!db || typeof db.all !== "function") {
4471
+ console.warn("Messages mutation reconciliation unavailable: SDK database reader is not exposed");
4472
+ return [];
4473
+ }
4474
+
4475
+ const columns = messageTableColumns(db);
4476
+ const predicates = [];
4477
+ if (columns.has("date_edited")) predicates.push("COALESCE(message.date_edited, 0) != 0");
4478
+ if (columns.has("date_retracted")) predicates.push("COALESCE(message.date_retracted, 0) != 0");
4479
+ if (!predicates.length) return [];
4480
+
4481
+ const params = [];
4482
+ let chatJoin = "";
4483
+ let chatPredicate = "";
4484
+ if (allowedChatIds != null) {
4485
+ const values = uniqueStrings(allowedChatIds.flatMap(chatIdMatchValues));
4486
+ if (values.length === 0) return [];
4487
+ const placeholders = values.map(() => "?").join(", ");
4488
+ chatJoin = `
4489
+ INNER JOIN chat_message_join ON chat_message_join.message_id = message.ROWID
4490
+ INNER JOIN chat ON chat.ROWID = chat_message_join.chat_id
4491
+ `;
4492
+ chatPredicate = `AND (chat.chat_identifier IN (${placeholders}) OR chat.guid IN (${placeholders}))`;
4493
+ params.push(...values, ...values);
4494
+ }
4495
+
4496
+ try {
4497
+ const rows = db.all(`
4498
+ SELECT DISTINCT message.ROWID AS rowId
4499
+ FROM message
4500
+ ${chatJoin}
4501
+ WHERE (${predicates.join(" OR ")})
4502
+ ${chatPredicate}
4503
+ ORDER BY message.ROWID ASC
4504
+ `, params);
4505
+ return rows
4506
+ .map((row) => Number(row.rowId))
4507
+ .filter((rowId) => Number.isFinite(rowId) && rowId > 0);
4508
+ } catch (err) {
4509
+ console.warn(`Messages mutation reconciliation query failed: ${safeError(err)}`);
4510
+ return [];
4511
+ }
4512
+ }
4513
+
4514
+ function messageTableColumns(db) {
4515
+ try {
4516
+ return new Set(db.all("PRAGMA table_info(message)").map((row) => String(row.name)));
4517
+ } catch (err) {
4518
+ console.warn(`Messages mutation reconciliation unavailable: ${safeError(err)}`);
4519
+ return new Set();
4520
+ }
4521
+ }
4522
+
4523
+ function chatIdMatchValues(chatId) {
4524
+ const raw = String(chatId ?? "").trim();
4525
+ if (!raw) return [];
4526
+ const core = raw.includes(";+;")
4527
+ ? raw.slice(raw.indexOf(";+;") + 3)
4528
+ : raw.includes(";-;")
4529
+ ? raw.slice(raw.indexOf(";-;") + 3)
4530
+ : raw;
4531
+ return raw === core ? [raw] : [raw, core];
4532
+ }
4533
+
4534
+ function uniqueStrings(values) {
4535
+ return [...new Set(values.map((value) => String(value ?? "").trim()).filter(Boolean))];
4536
+ }
4537
+
4538
+ async function getMessagesByRowIds(sdk, rowIds) {
4539
+ const messages = [];
4540
+ for (const rowId of rowIds) {
4541
+ const page = await sdk.getMessages({
4542
+ sinceRowId: rowId - 1,
4543
+ orderByRowIdAsc: true,
4544
+ limit: 1,
4545
+ });
4546
+ const msg = page.find((candidate) => Number(candidate.rowId) === rowId);
4547
+ if (msg) messages.push(msg);
4548
+ }
4549
+ return messages;
4550
+ }
4551
+
3881
4552
  async function watchMessages(sdk, sender, serializer, userId, allowedChatIds, opts = {}) {
3882
4553
  const allChats = allowedChatIds == null;
3883
4554
  const allowed = new Set(allowedChatIds ?? []);
3884
4555
  const contactSync = opts.contactSync ?? null;
4556
+ const stateCache = opts.stateCache ?? null;
3885
4557
  let buffer = [];
3886
4558
  let timer = null;
3887
4559
 
@@ -3889,7 +4561,10 @@ async function watchMessages(sdk, sender, serializer, userId, allowedChatIds, op
3889
4561
  if (!buffer.length) return;
3890
4562
  const batch = buffer.splice(0, MAX_BATCH_SIZE);
3891
4563
  const result = await sender.send(batch.map((msg) => serializer.serialize(msg)));
3892
- if (result.stored > 0) saveMessagesWatermark(userId, maxRowId(batch));
4564
+ if ((result.queued ?? 0) === 0) {
4565
+ stateCache?.observe(batch);
4566
+ saveMessagesWatermark(userId, maxRowId(batch));
4567
+ }
3893
4568
  };
3894
4569
 
3895
4570
  const scheduleFlush = () => {
@@ -3931,6 +4606,7 @@ async function watchMessages(sdk, sender, serializer, userId, allowedChatIds, op
3931
4606
  stopping = true;
3932
4607
  if (timer) clearTimeout(timer);
3933
4608
  contactSync?.stop();
4609
+ opts.onShutdown?.();
3934
4610
  await flush().catch(() => undefined);
3935
4611
  await sdk.close?.().catch(() => undefined);
3936
4612
  resolve();
@@ -3987,7 +4663,7 @@ function startMessagesContactSync(sender, contactLookup, opts = {}) {
3987
4663
 
3988
4664
  syncing = true;
3989
4665
  try {
3990
- const nextLookup = buildContactLookup();
4666
+ const nextLookup = buildContactLookup({ userId: opts.userId });
3991
4667
  contactLookup.replace(nextLookup);
3992
4668
  const mappings = visibleMappings();
3993
4669
  const toSync = forceAll
@@ -4130,14 +4806,22 @@ function createMessageSerializer(kit, contactLookup = emptyContactLookup()) {
4130
4806
  targetMessageId: msg.reaction.targetMessageId ?? null,
4131
4807
  emoji: msg.reaction.emoji ?? null,
4132
4808
  isRemoved: Boolean(msg.reaction.isRemoved),
4809
+ textRange: msg.reaction.textRange ?? null,
4810
+ appBundleId: msg.reaction.appBundleId ?? null,
4133
4811
  }
4134
4812
  : null,
4813
+ hasAttachments: Boolean(msg.hasAttachments ?? attachments.length > 0),
4135
4814
  attachments: attachments.map((att) => ({
4136
4815
  id: String(att.id ?? ""),
4137
4816
  fileName: att.fileName ?? null,
4138
4817
  mimeType: att.mimeType ?? "application/octet-stream",
4818
+ uti: att.uti ?? null,
4139
4819
  sizeBytes: Number(att.sizeBytes ?? 0),
4140
4820
  transferStatus: att.transferStatus ?? "unknown",
4821
+ createdAt: isoDate(att.createdAt),
4822
+ altText: att.altText ?? null,
4823
+ isFromMe: typeof att.isFromMe === "boolean" ? att.isFromMe : null,
4824
+ isSensitiveContent: Boolean(att.isSensitiveContent),
4141
4825
  isSticker: Boolean(att.isSticker),
4142
4826
  isImage: isImageAttachment(att),
4143
4827
  isVideo: isVideoAttachment(att),
@@ -4159,8 +4843,21 @@ function createMessageSerializer(kit, contactLookup = emptyContactLookup()) {
4159
4843
  }
4160
4844
 
4161
4845
  function buildContactLookup(opts = {}) {
4162
- const contacts = opts.loadAll === false ? [] : loadContacts();
4163
- const myCard = loadMyCard();
4846
+ const addressBook = platform() === "darwin" ? loadContactsFromAddressBookDb() : { contacts: [], myCard: null };
4847
+ if (opts.userId) {
4848
+ saveMessagesContactStatus(opts.userId, {
4849
+ ok: addressBook.ok === true,
4850
+ updatedAt: new Date().toISOString(),
4851
+ contacts: addressBook.contacts.length,
4852
+ hasMyCard: Boolean(addressBook.myCard),
4853
+ error: addressBook.ok === true ? null : addressBook.error ?? "unsupported local Contacts database schema",
4854
+ });
4855
+ }
4856
+ if (addressBook.ok !== true && platform() === "darwin") {
4857
+ console.warn(`Messages contact resolution degraded: ${addressBook.error ?? "unsupported local Contacts database schema"}. Raw Messages sync will continue; contact founders@askshepherd.ai.`);
4858
+ }
4859
+ const contacts = opts.loadAll === false ? [] : addressBook.contacts;
4860
+ const myCard = addressBook.myCard;
4164
4861
  const handleToName = new Map();
4165
4862
  const selfHandles = new Set();
4166
4863
 
@@ -4210,84 +4907,96 @@ function emptyContactLookup() {
4210
4907
  };
4211
4908
  }
4212
4909
 
4213
- function loadContacts() {
4214
- if (platform() !== "darwin") return [];
4215
- const sqliteContacts = loadContactsFromAddressBookDb();
4216
- if (sqliteContacts.length > 0) return sqliteContacts;
4217
-
4218
- const script = `
4219
- set output to ""
4220
- tell application "Contacts"
4221
- repeat with p in every person
4222
- set pName to name of p
4223
- set phList to ""
4224
- repeat with ph in phones of p
4225
- if phList is not "" then set phList to phList & ","
4226
- set phList to phList & (value of ph)
4227
- end repeat
4228
- set eList to ""
4229
- repeat with e in emails of p
4230
- if eList is not "" then set eList to eList & ","
4231
- set eList to eList & (value of e)
4232
- end repeat
4233
- set output to output & pName & "\\t" & phList & "\\t" & eList & "\\n"
4234
- end repeat
4235
- end tell
4236
- return output`;
4237
-
4238
- try {
4239
- const raw = execFileSync("osascript", ["-e", script], {
4240
- encoding: "utf8",
4241
- timeout: 120_000,
4242
- });
4243
- return parseContacts(raw);
4244
- } catch (err) {
4245
- if (args.debug === true) console.error("Could not load Contacts:", safeError(err));
4246
- return [];
4247
- }
4248
- }
4249
-
4250
4910
  function loadContactsFromAddressBookDb() {
4251
- const contacts = new Map();
4911
+ const contacts = [];
4912
+ let myCard = null;
4913
+ let readSucceeded = false;
4914
+ let firstError = null;
4252
4915
  for (const dbPath of addressBookDatabasePaths()) {
4253
4916
  const query = `
4254
- select r.Z_PK,
4255
- coalesce(nullif(r.ZNAME, ''), nullif(trim(coalesce(r.ZFIRSTNAME, '') || ' ' || coalesce(r.ZLASTNAME, '')), ''), nullif(r.ZORGANIZATION, ''), '') as display_name,
4256
- coalesce(p.ZFULLNUMBER, '') as phone,
4257
- '' as email
4258
- from ZABCDRECORD r
4259
- join ZABCDPHONENUMBER p on p.ZOWNER = r.Z_PK
4260
- where p.ZFULLNUMBER is not null and p.ZFULLNUMBER != ''
4261
- union all
4262
- select r.Z_PK,
4263
- coalesce(nullif(r.ZNAME, ''), nullif(trim(coalesce(r.ZFIRSTNAME, '') || ' ' || coalesce(r.ZLASTNAME, '')), ''), nullif(r.ZORGANIZATION, ''), '') as display_name,
4264
- '' as phone,
4265
- coalesce(e.ZADDRESS, '') as email
4266
- from ZABCDRECORD r
4267
- join ZABCDEMAILADDRESS e on e.ZOWNER = r.Z_PK
4268
- where e.ZADDRESS is not null and e.ZADDRESS != '';`;
4917
+ with records as (
4918
+ select Z_PK as id,
4919
+ coalesce(ZME, 0) as me,
4920
+ coalesce(Z22_ME, 0) as me22,
4921
+ coalesce(
4922
+ nullif(ZNAME, ''),
4923
+ nullif(trim(coalesce(ZFIRSTNAME, '') || ' ' || coalesce(ZMIDDLENAME, '') || ' ' || coalesce(ZLASTNAME, '')), ''),
4924
+ nullif(ZNICKNAME, ''),
4925
+ nullif(ZORGANIZATION, ''),
4926
+ ''
4927
+ ) as display_name
4928
+ from ZABCDRECORD
4929
+ ),
4930
+ phones as (
4931
+ select coalesce(ZOWNER, Z22_OWNER) as owner, ZFULLNUMBER as value
4932
+ from ZABCDPHONENUMBER
4933
+ where nullif(ZFULLNUMBER, '') is not null
4934
+ ),
4935
+ emails as (
4936
+ select coalesce(ZOWNER, Z22_OWNER) as owner, coalesce(ZADDRESSNORMALIZED, ZADDRESS) as value
4937
+ from ZABCDEMAILADDRESS
4938
+ where nullif(coalesce(ZADDRESSNORMALIZED, ZADDRESS), '') is not null
4939
+ )
4940
+ select r.id,
4941
+ r.me,
4942
+ r.me22,
4943
+ r.display_name,
4944
+ coalesce(p.value, '') as phone,
4945
+ coalesce(e.value, '') as email
4946
+ from records r
4947
+ left join phones p on p.owner = r.id
4948
+ left join emails e on e.owner = r.id
4949
+ where r.display_name != ''
4950
+ and (p.value is not null or e.value is not null);`;
4269
4951
 
4270
4952
  try {
4271
4953
  const raw = execFileSync("sqlite3", ["-separator", "\t", dbPath, query], {
4272
4954
  encoding: "utf8",
4273
4955
  timeout: 10_000,
4274
4956
  });
4957
+ readSucceeded = true;
4275
4958
  for (const line of raw.split("\n").filter(Boolean)) {
4276
- const [id, rawName, phone, email] = line.split("\t");
4959
+ const [id, me, me22, rawName, phone, email] = line.split("\t");
4277
4960
  const name = rawName?.trim();
4278
4961
  if (!id || !name) continue;
4279
4962
  const key = `${dbPath}:${id}`;
4280
- const current = contacts.get(key) ?? { name, phones: [], emails: [] };
4281
- if (phone) current.phones.push(phone.trim());
4282
- if (email) current.emails.push(email.trim());
4283
- contacts.set(key, current);
4963
+ let current = contacts.find((contact) => contact._key === key);
4964
+ if (!current) {
4965
+ current = { _key: key, name, phones: [], emails: [] };
4966
+ contacts.push(current);
4967
+ }
4968
+ if (phone?.trim()) current.phones.push(phone.trim());
4969
+ if (email?.trim()) current.emails.push(email.trim());
4970
+ if (Number(me) !== 0 || Number(me22) !== 0) myCard = current;
4284
4971
  }
4285
4972
  } catch (err) {
4973
+ firstError ??= safeError(err);
4286
4974
  if (args.debug === true) console.error(`Could not read Contacts DB ${dbPath}:`, safeError(err));
4287
4975
  }
4288
4976
  }
4289
4977
 
4290
- return [...contacts.values()].filter((contact) => contact.name);
4978
+ const cleanContacts = contacts
4979
+ .map(({ _key, ...contact }) => ({
4980
+ name: contact.name,
4981
+ phones: [...new Set(contact.phones)],
4982
+ emails: [...new Set(contact.emails)],
4983
+ }))
4984
+ .filter((contact) => contact.name && (contact.phones.length > 0 || contact.emails.length > 0));
4985
+
4986
+ const cleanMyCard = myCard
4987
+ ? {
4988
+ name: myCard.name,
4989
+ phones: [...new Set(myCard.phones)],
4990
+ emails: [...new Set(myCard.emails)],
4991
+ }
4992
+ : null;
4993
+
4994
+ return {
4995
+ ok: readSucceeded,
4996
+ contacts: cleanContacts,
4997
+ myCard: cleanMyCard,
4998
+ error: readSucceeded ? null : firstError,
4999
+ };
4291
5000
  }
4292
5001
 
4293
5002
  function addressBookDatabasePaths() {
@@ -4312,51 +5021,6 @@ function addressBookWalPaths() {
4312
5021
  return [...paths].filter((path) => existsSync(path));
4313
5022
  }
4314
5023
 
4315
- function loadMyCard() {
4316
- if (platform() !== "darwin") return null;
4317
- const script = `
4318
- tell application "Contacts"
4319
- set mc to my card
4320
- set pName to name of mc
4321
- set phList to ""
4322
- repeat with ph in phones of mc
4323
- if phList is not "" then set phList to phList & ","
4324
- set phList to phList & (value of ph)
4325
- end repeat
4326
- set eList to ""
4327
- repeat with e in emails of mc
4328
- if eList is not "" then set eList to eList & ","
4329
- set eList to eList & (value of e)
4330
- end repeat
4331
- return pName & "\\t" & phList & "\\t" & eList
4332
- end tell`;
4333
-
4334
- try {
4335
- const raw = execFileSync("osascript", ["-e", script], {
4336
- encoding: "utf8",
4337
- timeout: 10_000,
4338
- });
4339
- return parseContacts(raw)[0] ?? null;
4340
- } catch {
4341
- return null;
4342
- }
4343
- }
4344
-
4345
- function parseContacts(raw) {
4346
- return String(raw)
4347
- .split("\n")
4348
- .filter(Boolean)
4349
- .map((line) => {
4350
- const [name, phones, emails] = line.split("\t");
4351
- return {
4352
- name: name?.trim() ?? "",
4353
- phones: phones ? phones.split(",").map((phone) => phone.trim()).filter(Boolean) : [],
4354
- emails: emails ? emails.split(",").map((email) => email.trim()).filter(Boolean) : [],
4355
- };
4356
- })
4357
- .filter((contact) => contact.name);
4358
- }
4359
-
4360
5024
  function addHandleMapping(map, handle, name) {
4361
5025
  for (const candidate of handleCandidates(handle)) {
4362
5026
  map.set(candidate, name);
@@ -5062,14 +5726,23 @@ class MessagesBatchSender {
5062
5726
  this.agentToken = agentToken;
5063
5727
  this.userId = userId;
5064
5728
  this.queueFile = join(homedir(), ".shepherd", "raw-messages", `${safeFileId(userId)}-queue.json`);
5729
+ this.sendChain = Promise.resolve();
5065
5730
  }
5066
5731
 
5067
5732
  async send(messages) {
5733
+ const run = () => this.sendUnlocked(messages);
5734
+ const next = this.sendChain.then(run, run);
5735
+ this.sendChain = next.catch(() => undefined);
5736
+ return next;
5737
+ }
5738
+
5739
+ async sendUnlocked(messages) {
5068
5740
  const queued = this.loadQueue();
5069
- const all = [...queued, ...messages];
5070
- if (!all.length) return { stored: 0, skipped: 0 };
5741
+ const all = dedupeMessagePayloads([...queued, ...messages]);
5742
+ if (!all.length) return { stored: 0, updated: 0, skipped: 0, queued: 0 };
5071
5743
 
5072
5744
  let totalStored = 0;
5745
+ let totalUpdated = 0;
5073
5746
  let totalSkipped = 0;
5074
5747
 
5075
5748
  for (let i = 0; i < all.length; i += MAX_BATCH_SIZE) {
@@ -5077,16 +5750,18 @@ class MessagesBatchSender {
5077
5750
  try {
5078
5751
  const result = await this.postBatch(batch);
5079
5752
  totalStored += result.stored ?? 0;
5753
+ totalUpdated += result.updated ?? 0;
5080
5754
  totalSkipped += result.skipped ?? 0;
5081
5755
  } catch (err) {
5082
- this.saveQueue(all.slice(i));
5756
+ const remaining = all.slice(i);
5757
+ this.saveQueue(remaining);
5083
5758
  console.error("Messages batch send failed:", safeError(err));
5084
- return { stored: totalStored, skipped: totalSkipped };
5759
+ return { stored: totalStored, updated: totalUpdated, skipped: totalSkipped, queued: remaining.length };
5085
5760
  }
5086
5761
  }
5087
5762
 
5088
5763
  this.clearQueue();
5089
- return { stored: totalStored, skipped: totalSkipped };
5764
+ return { stored: totalStored, updated: totalUpdated, skipped: totalSkipped, queued: 0 };
5090
5765
  }
5091
5766
 
5092
5767
  async postBatch(messages) {
@@ -5129,7 +5804,11 @@ class MessagesBatchSender {
5129
5804
 
5130
5805
  saveQueue(messages) {
5131
5806
  const capped = messages.slice(-MAX_QUEUE_MESSAGES);
5807
+ mkdirSync(dirname(this.queueFile), { recursive: true });
5132
5808
  writeFileSync(this.queueFile, JSON.stringify(capped), { mode: 0o600 });
5809
+ if (capped.length < messages.length) {
5810
+ console.warn(`Messages queue trimmed from ${messages.length} to ${MAX_QUEUE_MESSAGES} messages`);
5811
+ }
5133
5812
  }
5134
5813
 
5135
5814
  clearQueue() {
@@ -5141,6 +5820,37 @@ class MessagesBatchSender {
5141
5820
  }
5142
5821
  }
5143
5822
 
5823
+ function dedupeMessagePayloads(messages) {
5824
+ const entries = [];
5825
+ const keyed = new Map();
5826
+ for (const message of messages) {
5827
+ const key = messagePayloadIdentity(message);
5828
+ if (!key) {
5829
+ entries.push({ message });
5830
+ continue;
5831
+ }
5832
+
5833
+ let entry = keyed.get(key);
5834
+ if (!entry) {
5835
+ entry = { key, message };
5836
+ keyed.set(key, entry);
5837
+ entries.push(entry);
5838
+ } else {
5839
+ entry.message = message;
5840
+ }
5841
+ }
5842
+ return entries.map((entry) => entry.message);
5843
+ }
5844
+
5845
+ function messagePayloadIdentity(message) {
5846
+ const messageId = message?.messageId == null ? null : String(message.messageId);
5847
+ const chatId = message?.chatId == null ? null : String(message.chatId);
5848
+ if (messageId && chatId) return `${chatId}:${messageId}`;
5849
+ if (messageId) return messageId;
5850
+ const rowId = Number(message?.rowId);
5851
+ return Number.isFinite(rowId) && rowId > 0 ? `row:${rowId}` : null;
5852
+ }
5853
+
5144
5854
  function loadMessagesWatermark(userId) {
5145
5855
  try {
5146
5856
  const raw = readFileSync(messagesWatermarkFile(userId), "utf8").trim();
@@ -5151,21 +5861,99 @@ function loadMessagesWatermark(userId) {
5151
5861
  }
5152
5862
  }
5153
5863
 
5864
+ function loadMessagesBackfillComplete(userId, scope) {
5865
+ const record = readJsonOptional(messagesBackfillCompleteFile(userId, scope));
5866
+ if (!record || typeof record !== "object" || Array.isArray(record)) return null;
5867
+ return record;
5868
+ }
5869
+
5870
+ function hasAnyMessagesBackfillComplete(userId) {
5871
+ const safeId = safeFileId(userId);
5872
+ try {
5873
+ return readdirSync(messagesRawMessagesDir()).some((entry) =>
5874
+ entry.startsWith(`${safeId}-backfill-`) && entry.endsWith(".json"));
5875
+ } catch {
5876
+ return false;
5877
+ }
5878
+ }
5879
+
5880
+ function saveMessagesBackfillComplete(userId, scope, result = {}) {
5881
+ const record = {
5882
+ completedAt: new Date().toISOString(),
5883
+ scope,
5884
+ ...result,
5885
+ };
5886
+ writeJsonAtomic(messagesBackfillCompleteFile(userId, scope), record);
5887
+ return record;
5888
+ }
5889
+
5154
5890
  function saveMessagesWatermark(userId, rowId) {
5891
+ const numericRowId = Number(rowId);
5892
+ if (!Number.isFinite(numericRowId) || numericRowId <= 0) return;
5893
+
5155
5894
  try {
5156
5895
  const path = messagesWatermarkFile(userId);
5157
- writeFileSync(path, String(rowId), { mode: 0o600 });
5896
+ const next = Math.max(loadMessagesWatermark(userId), Math.floor(numericRowId));
5897
+ const tmpPath = `${path}.tmp`;
5898
+ writeFileSync(tmpPath, String(next), { mode: 0o600 });
5899
+ renameSync(tmpPath, path);
5158
5900
  } catch (err) {
5159
5901
  console.error("Could not save Messages watermark:", safeError(err));
5160
5902
  }
5161
5903
  }
5162
5904
 
5163
5905
  function messagesWatermarkFile(userId) {
5164
- const path = join(homedir(), ".shepherd", "raw-messages", `${safeFileId(userId)}-watermark`);
5906
+ const path = join(messagesRawMessagesDir(), `${safeFileId(userId)}-watermark`);
5907
+ mkdirSync(dirname(path), { recursive: true });
5908
+ return path;
5909
+ }
5910
+
5911
+ function messagesBackfillCompleteFile(userId, scope) {
5912
+ const path = join(messagesRawMessagesDir(), `${safeFileId(userId)}-backfill-${hashObject(scope).slice(0, 24)}.json`);
5913
+ mkdirSync(dirname(path), { recursive: true });
5914
+ return path;
5915
+ }
5916
+
5917
+ function messagesStateFile(userId) {
5918
+ const path = join(messagesRawMessagesDir(), `${safeFileId(userId)}-message-state.json`);
5165
5919
  mkdirSync(dirname(path), { recursive: true });
5166
5920
  return path;
5167
5921
  }
5168
5922
 
5923
+ function saveMessagesState(userId, state) {
5924
+ try {
5925
+ writeJsonAtomic(messagesStateFile(userId), state);
5926
+ } catch (err) {
5927
+ console.error("Could not save Messages state:", safeError(err));
5928
+ }
5929
+ }
5930
+
5931
+ function messagesContactStatusFile(userId) {
5932
+ const path = join(messagesRawMessagesDir(), `${safeFileId(userId)}-contacts-status.json`);
5933
+ mkdirSync(dirname(path), { recursive: true });
5934
+ return path;
5935
+ }
5936
+
5937
+ function saveMessagesContactStatus(userId, status) {
5938
+ try {
5939
+ writeJsonAtomic(messagesContactStatusFile(userId), status);
5940
+ } catch (err) {
5941
+ console.error("Could not save Messages contact status:", safeError(err));
5942
+ }
5943
+ }
5944
+
5945
+ function messagesRawMessagesDir() {
5946
+ const path = join(homedir(), ".shepherd", "raw-messages");
5947
+ mkdirSync(path, { recursive: true });
5948
+ return path;
5949
+ }
5950
+
5951
+ function writeJsonAtomic(path, value) {
5952
+ const tmpPath = `${path}.tmp`;
5953
+ writeFileSync(tmpPath, JSON.stringify(value), { mode: 0o600 });
5954
+ renameSync(tmpPath, path);
5955
+ }
5956
+
5169
5957
  function maxRowId(messages) {
5170
5958
  return Math.max(0, ...messages.map((msg) => Number(msg.rowId ?? 0)).filter(Number.isFinite));
5171
5959
  }
@@ -5228,6 +6016,12 @@ function clampInt(value, min, max) {
5228
6016
  return Math.min(Math.max(Math.floor(value), min), max);
5229
6017
  }
5230
6018
 
6019
+ function positiveIntFromEnv(name, defaultValue) {
6020
+ const parsed = Number(process.env[name]);
6021
+ if (!Number.isFinite(parsed) || parsed <= 0) return defaultValue;
6022
+ return Math.floor(parsed);
6023
+ }
6024
+
5231
6025
  function parseBackfillDays(value, defaultValue) {
5232
6026
  if (value === undefined || value === null || value === "") return defaultValue;
5233
6027
  if (typeof value === "string" && value.trim().toLowerCase() === "all") return null;