@lobu/cli 6.1.1 → 7.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/dist/commands/_lib/apply/apply-cmd.d.ts +36 -0
  2. package/dist/commands/_lib/apply/apply-cmd.d.ts.map +1 -1
  3. package/dist/commands/_lib/apply/apply-cmd.js +696 -40
  4. package/dist/commands/_lib/apply/apply-cmd.js.map +1 -1
  5. package/dist/commands/_lib/apply/client.d.ts +285 -0
  6. package/dist/commands/_lib/apply/client.d.ts.map +1 -1
  7. package/dist/commands/_lib/apply/client.js +469 -28
  8. package/dist/commands/_lib/apply/client.js.map +1 -1
  9. package/dist/commands/_lib/apply/desired-state.d.ts +187 -3
  10. package/dist/commands/_lib/apply/desired-state.d.ts.map +1 -1
  11. package/dist/commands/_lib/apply/desired-state.js +879 -88
  12. package/dist/commands/_lib/apply/desired-state.js.map +1 -1
  13. package/dist/commands/_lib/apply/diff.d.ts +72 -3
  14. package/dist/commands/_lib/apply/diff.d.ts.map +1 -1
  15. package/dist/commands/_lib/apply/diff.js +473 -84
  16. package/dist/commands/_lib/apply/diff.js.map +1 -1
  17. package/dist/commands/_lib/apply/prompt.d.ts +6 -0
  18. package/dist/commands/_lib/apply/prompt.d.ts.map +1 -1
  19. package/dist/commands/_lib/apply/prompt.js +16 -0
  20. package/dist/commands/_lib/apply/prompt.js.map +1 -1
  21. package/dist/commands/_lib/apply/render.d.ts +9 -0
  22. package/dist/commands/_lib/apply/render.d.ts.map +1 -1
  23. package/dist/commands/_lib/apply/render.js +80 -3
  24. package/dist/commands/_lib/apply/render.js.map +1 -1
  25. package/dist/commands/_lib/connector-loader.d.ts +3 -0
  26. package/dist/commands/_lib/connector-loader.d.ts.map +1 -0
  27. package/dist/commands/_lib/connector-loader.js +129 -0
  28. package/dist/commands/_lib/connector-loader.js.map +1 -0
  29. package/dist/commands/_lib/connector-run-cmd.d.ts +35 -0
  30. package/dist/commands/_lib/connector-run-cmd.d.ts.map +1 -0
  31. package/dist/commands/_lib/connector-run-cmd.js +351 -0
  32. package/dist/commands/_lib/connector-run-cmd.js.map +1 -0
  33. package/dist/commands/_lib/export/export-cmd.d.ts +35 -0
  34. package/dist/commands/_lib/export/export-cmd.d.ts.map +1 -0
  35. package/dist/commands/_lib/export/export-cmd.js +329 -0
  36. package/dist/commands/_lib/export/export-cmd.js.map +1 -0
  37. package/dist/commands/agent.d.ts.map +1 -1
  38. package/dist/commands/agent.js +11 -14
  39. package/dist/commands/agent.js.map +1 -1
  40. package/dist/commands/chat.d.ts.map +1 -1
  41. package/dist/commands/chat.js +28 -7
  42. package/dist/commands/chat.js.map +1 -1
  43. package/dist/commands/connector.d.ts +3 -0
  44. package/dist/commands/connector.d.ts.map +1 -0
  45. package/dist/commands/connector.js +5 -0
  46. package/dist/commands/connector.js.map +1 -0
  47. package/dist/commands/dev.d.ts +23 -0
  48. package/dist/commands/dev.d.ts.map +1 -1
  49. package/dist/commands/dev.js +273 -8
  50. package/dist/commands/dev.js.map +1 -1
  51. package/dist/commands/doctor.d.ts.map +1 -1
  52. package/dist/commands/doctor.js +2 -3
  53. package/dist/commands/doctor.js.map +1 -1
  54. package/dist/commands/eval.d.ts.map +1 -1
  55. package/dist/commands/eval.js +28 -18
  56. package/dist/commands/eval.js.map +1 -1
  57. package/dist/commands/init.d.ts +2 -0
  58. package/dist/commands/init.d.ts.map +1 -1
  59. package/dist/commands/init.js +29 -1
  60. package/dist/commands/init.js.map +1 -1
  61. package/dist/commands/login.d.ts.map +1 -1
  62. package/dist/commands/login.js +22 -16
  63. package/dist/commands/login.js.map +1 -1
  64. package/dist/commands/memory/_lib/browser-auth-cmd.d.ts.map +1 -1
  65. package/dist/commands/memory/_lib/browser-auth-cmd.js +15 -144
  66. package/dist/commands/memory/_lib/browser-auth-cmd.js.map +1 -1
  67. package/dist/commands/memory/_lib/schema.d.ts +28 -1
  68. package/dist/commands/memory/_lib/schema.d.ts.map +1 -1
  69. package/dist/commands/memory/_lib/schema.js +120 -4
  70. package/dist/commands/memory/_lib/schema.js.map +1 -1
  71. package/dist/commands/memory/_lib/seed-cmd.d.ts.map +1 -1
  72. package/dist/commands/memory/_lib/seed-cmd.js +41 -18
  73. package/dist/commands/memory/_lib/seed-cmd.js.map +1 -1
  74. package/dist/commands/org.d.ts +4 -0
  75. package/dist/commands/org.d.ts.map +1 -1
  76. package/dist/commands/org.js +10 -0
  77. package/dist/commands/org.js.map +1 -1
  78. package/dist/commands/token.d.ts +9 -0
  79. package/dist/commands/token.d.ts.map +1 -1
  80. package/dist/commands/token.js +54 -3
  81. package/dist/commands/token.js.map +1 -1
  82. package/dist/commands/validate.d.ts.map +1 -1
  83. package/dist/commands/validate.js +4 -13
  84. package/dist/commands/validate.js.map +1 -1
  85. package/dist/config/loader.js +2 -2
  86. package/dist/config/loader.js.map +1 -1
  87. package/dist/connectors/README.md +2 -3
  88. package/dist/connectors/apple_health.ts +138 -0
  89. package/dist/connectors/apple_photos.ts +178 -0
  90. package/dist/connectors/apple_screen_time.ts +82 -0
  91. package/dist/connectors/browser/evaluate.ts +120 -0
  92. package/dist/connectors/browser/fill_form.ts +107 -0
  93. package/dist/connectors/browser/page_text.ts +108 -0
  94. package/dist/connectors/browser-scraper-utils.ts +111 -3
  95. package/dist/connectors/capterra.ts +5 -1
  96. package/dist/connectors/chrome_tabs.ts +74 -0
  97. package/dist/connectors/g2.ts +5 -1
  98. package/dist/connectors/github.ts +16 -38
  99. package/dist/connectors/glassdoor.ts +5 -1
  100. package/dist/connectors/google_calendar.ts +28 -6
  101. package/dist/connectors/google_gmail.ts +6 -3
  102. package/dist/connectors/google_play.ts +32 -5
  103. package/dist/connectors/hackernews.ts +37 -2
  104. package/dist/connectors/index.ts +14 -1
  105. package/dist/connectors/linkedin.ts +32 -9
  106. package/dist/connectors/local_directory.ts +91 -0
  107. package/dist/connectors/reddit.ts +1 -0
  108. package/dist/connectors/revolut.ts +569 -0
  109. package/dist/connectors/rss.ts +33 -8
  110. package/dist/connectors/trustpilot.ts +36 -21
  111. package/dist/connectors/website.ts +8 -69
  112. package/dist/connectors/whatsapp.ts +21 -22
  113. package/dist/connectors/whatsapp_local.ts +125 -0
  114. package/dist/connectors/x.ts +17 -7
  115. package/dist/db/migrations/20260510220000_connector_required_capability.sql +47 -0
  116. package/dist/db/migrations/20260512000000_device_worker_connection_binding.sql +113 -0
  117. package/dist/db/migrations/20260512131703_connections_slug.sql +131 -0
  118. package/dist/db/migrations/20260513000000_chat_user_identities.sql +24 -0
  119. package/dist/db/migrations/20260513120000_auth_profiles_device_binding.sql +50 -0
  120. package/dist/db/migrations/20260513150000_auth_profiles_cdp_url.sql +43 -0
  121. package/dist/db/migrations/20260513200000_notifications_as_events.sql +86 -0
  122. package/dist/db/migrations/20260514000000_scheduled_jobs.sql +97 -0
  123. package/dist/db/migrations/20260514120000_auth_profiles_connector_key_nullable.sql +42 -0
  124. package/dist/db/migrations/20260514130000_connection_action_modes.sql +103 -0
  125. package/dist/db/migrations/20260514160000_auth_profiles_mirror_mode.sql +32 -0
  126. package/dist/db/migrations/20260515120000_agents_per_org_pk.sql +66 -0
  127. package/dist/db/migrations/20260515150000_geo_enrichment.sql +208 -0
  128. package/dist/db/migrations/20260515160000_drop_agents_org_id_unique.sql +24 -0
  129. package/dist/db/migrations/20260515170000_auth_profiles_default_for_connector.sql +23 -0
  130. package/dist/db/migrations/20260516120000_agents_per_org_pk_swap.sql +125 -0
  131. package/dist/db/migrations/20260516200000_events_search_tsv.sql +134 -0
  132. package/dist/db/migrations/20260516200100_events_lifecycle_changes_index.sql +25 -0
  133. package/dist/db/migrations/20260517010000_drop_unused_indexes.sql +49 -0
  134. package/dist/db/migrations/20260517020000_softdelete_orphan_feeds.sql +56 -0
  135. package/dist/db/migrations/20260517030000_pat_worker_id_binding.sql +27 -0
  136. package/dist/db/migrations/20260517040000_archive_orphan_watchers.sql +30 -0
  137. package/dist/db/migrations/20260517050000_watcher_agent_id_not_null.sql +34 -0
  138. package/dist/db/migrations/20260517060000_watcher_schema_additions.sql +78 -0
  139. package/dist/db/migrations/20260517150000_goals_primitive.sql +55 -0
  140. package/dist/db/migrations/20260517160000_drop_goals_primitive.sql +45 -0
  141. package/dist/db/migrations/20260518000000_pending_interactions.sql +49 -0
  142. package/dist/db/migrations/20260518010000_runs_heartbeat_reaper_index.sql +22 -0
  143. package/dist/eval/client.d.ts.map +1 -1
  144. package/dist/eval/client.js +11 -0
  145. package/dist/eval/client.js.map +1 -1
  146. package/dist/eval/grader.js +2 -1
  147. package/dist/eval/grader.js.map +1 -1
  148. package/dist/eval/types.d.ts +2 -0
  149. package/dist/eval/types.d.ts.map +1 -1
  150. package/dist/index.d.ts +11 -0
  151. package/dist/index.d.ts.map +1 -1
  152. package/dist/index.js +115 -114
  153. package/dist/index.js.map +1 -1
  154. package/dist/internal/context.d.ts +9 -0
  155. package/dist/internal/context.d.ts.map +1 -1
  156. package/dist/internal/context.js +41 -6
  157. package/dist/internal/context.js.map +1 -1
  158. package/dist/internal/credentials.d.ts +5 -0
  159. package/dist/internal/credentials.d.ts.map +1 -1
  160. package/dist/internal/credentials.js +75 -1
  161. package/dist/internal/credentials.js.map +1 -1
  162. package/dist/internal/gateway-url.d.ts +14 -0
  163. package/dist/internal/gateway-url.d.ts.map +1 -1
  164. package/dist/internal/gateway-url.js +19 -0
  165. package/dist/internal/gateway-url.js.map +1 -1
  166. package/dist/internal/index.d.ts +1 -1
  167. package/dist/internal/index.d.ts.map +1 -1
  168. package/dist/internal/index.js +1 -1
  169. package/dist/internal/index.js.map +1 -1
  170. package/dist/internal/local-env.d.ts.map +1 -1
  171. package/dist/internal/local-env.js +9 -2
  172. package/dist/internal/local-env.js.map +1 -1
  173. package/dist/server.bundle.mjs +42251 -36931
  174. package/dist/start-local.bundle.mjs +16437 -9882
  175. package/dist/templates/TESTING.md.tmpl +9 -9
  176. package/package.json +8 -6
  177. package/dist/connectors/google_photos.ts +0 -776
@@ -15,6 +15,8 @@ import {
15
15
  type SyncResult,
16
16
  } from '@lobu/connector-sdk';
17
17
  import {
18
+ getBrowserCdpUrl,
19
+ getBrowserUserDataDir,
18
20
  handleCookieConsent,
19
21
  openStealthBrowser,
20
22
  validateUrlDomain,
@@ -96,10 +98,16 @@ export default class TrustpilotConnector extends ConnectorRuntime {
96
98
  throw new Error('Either business_url or business_name is required');
97
99
  }
98
100
 
99
- const baseUrl = businessUrl || `https://www.trustpilot.com/review/${businessName}`;
101
+ // encodeURIComponent the user-supplied businessName so a value like
102
+ // "../search?foo=bar" can't escape the /review/ path on trustpilot.com.
103
+ const baseUrl =
104
+ businessUrl ||
105
+ `https://www.trustpilot.com/review/${encodeURIComponent(businessName ?? '')}`;
100
106
  validateUrlDomain(baseUrl, 'trustpilot.com');
101
107
 
102
- const session = await openStealthBrowser({ cdpUrl: 'auto' });
108
+ const userDataDir = getBrowserUserDataDir(ctx.sessionState);
109
+ const cdpUrl = getBrowserCdpUrl(ctx.sessionState) ?? 'auto';
110
+ const session = await openStealthBrowser({ cdpUrl, userDataDir });
103
111
 
104
112
  return withBrowserErrorCapture(session, 'trustpilot-sync', async (page) => {
105
113
  await page.goto(baseUrl, {
@@ -157,27 +165,34 @@ export default class TrustpilotConnector extends ConnectorRuntime {
157
165
  // Filter reviews with meaningful content (more than 10 chars)
158
166
  const reviews: TrustpilotReview[] = rawReviews.filter((r) => r.text && r.text.length > 10);
159
167
 
160
- // Transform to EventEnvelope format
161
- const events: EventEnvelope[] = reviews.map((review) => {
168
+ // Transform to EventEnvelope format. Drop rows whose `date` attribute
169
+ // was missing/invalid in the DOM — `new Date("")` yields an Invalid
170
+ // Date, which downstream sorting/checkpointing then can't compare, and
171
+ // an empty `date` made `origin_id` collide on `-<author>` across rows.
172
+ const events: EventEnvelope[] = reviews.flatMap((review) => {
162
173
  const content = review.title ? `${review.title}\n\n${review.text}` : review.text;
163
-
164
- return {
165
- origin_id: `${review.date}-${review.author}`,
166
- payload_text: content,
167
- author_name: review.author,
168
- occurred_at: new Date(review.date),
169
- origin_type: 'review',
170
- score: calculateEngagementScore('trustpilot', {
171
- rating: review.rating,
172
- helpful_count: 0,
173
- }),
174
- source_url: baseUrl,
175
- metadata: {
176
- rating: review.rating,
177
- helpful_count: 0,
178
- title: review.title,
174
+ const parsedDate = review.date ? new Date(review.date) : null;
175
+ if (!parsedDate || Number.isNaN(parsedDate.getTime())) return [];
176
+
177
+ return [
178
+ {
179
+ origin_id: `${review.date}-${review.author}`,
180
+ payload_text: content,
181
+ author_name: review.author,
182
+ occurred_at: parsedDate,
183
+ origin_type: 'review',
184
+ score: calculateEngagementScore('trustpilot', {
185
+ rating: review.rating,
186
+ helpful_count: 0,
187
+ }),
188
+ source_url: baseUrl,
189
+ metadata: {
190
+ rating: review.rating,
191
+ helpful_count: 0,
192
+ title: review.title,
193
+ },
179
194
  },
180
- };
195
+ ];
181
196
  });
182
197
 
183
198
  return {
@@ -20,6 +20,7 @@ import {
20
20
  type SyncResult,
21
21
  } from '@lobu/connector-sdk';
22
22
  import type { Page } from 'playwright';
23
+ import { validatePublicUrl } from './browser-scraper-utils.ts';
23
24
 
24
25
  interface PageSection {
25
26
  heading: string;
@@ -50,72 +51,6 @@ function shouldSkipCookieBannerText(text: string): boolean {
50
51
  return countPatternMatches(normalized, COOKIE_BANNER_PATTERNS) >= 3;
51
52
  }
52
53
 
53
- /**
54
- * Validates a URL is safe for server-side fetching.
55
- * Blocks private/internal network addresses to prevent SSRF attacks.
56
- */
57
- function validatePublicUrl(url: string): void {
58
- let parsed: URL;
59
- try {
60
- parsed = new URL(url);
61
- } catch {
62
- throw new Error(`Invalid URL: ${url}`);
63
- }
64
-
65
- if (parsed.protocol !== 'https:' && parsed.protocol !== 'http:') {
66
- throw new Error(`URL must use http: or https: protocol, got ${parsed.protocol}`);
67
- }
68
-
69
- const hostname = parsed.hostname.toLowerCase();
70
-
71
- // Block localhost variants
72
- if (hostname === 'localhost' || hostname === '[::1]' || hostname.endsWith('.localhost')) {
73
- throw new Error(`URL must not point to localhost: ${hostname}`);
74
- }
75
-
76
- // Block private/internal IP ranges
77
- // IPv4 patterns: 127.x.x.x, 10.x.x.x, 192.168.x.x, 172.16-31.x.x, 169.254.x.x, 0.x.x.x
78
- const ipv4Match = hostname.match(/^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/);
79
- if (ipv4Match) {
80
- const [, a, b] = ipv4Match.map(Number);
81
- if (
82
- a === 127 || // 127.0.0.0/8 loopback
83
- a === 10 || // 10.0.0.0/8 private
84
- (a === 172 && b >= 16 && b <= 31) || // 172.16.0.0/12 private
85
- (a === 192 && b === 168) || // 192.168.0.0/16 private
86
- (a === 169 && b === 254) || // 169.254.0.0/16 link-local
87
- a === 0 // 0.0.0.0/8
88
- ) {
89
- throw new Error(`URL must not point to a private/internal IP address: ${hostname}`);
90
- }
91
- }
92
-
93
- // Block IPv6 private ranges (bracketed notation in URLs)
94
- if (hostname.startsWith('[')) {
95
- const ipv6 = hostname.slice(1, -1).toLowerCase();
96
- if (
97
- ipv6 === '::1' ||
98
- ipv6.startsWith('fe80:') || // link-local
99
- ipv6.startsWith('fc') || // unique local (fc00::/7)
100
- ipv6.startsWith('fd') || // unique local (fc00::/7)
101
- ipv6 === '::' || // unspecified
102
- ipv6.startsWith('::ffff:') // IPv4-mapped IPv6
103
- ) {
104
- throw new Error(`URL must not point to a private/internal IPv6 address: ${hostname}`);
105
- }
106
- }
107
-
108
- // Block common internal hostnames
109
- if (
110
- hostname.endsWith('.internal') ||
111
- hostname.endsWith('.local') ||
112
- hostname.endsWith('.corp') ||
113
- hostname.endsWith('.lan')
114
- ) {
115
- throw new Error(`URL must not point to an internal hostname: ${hostname}`);
116
- }
117
- }
118
-
119
54
  export default class WebsiteConnector extends ConnectorRuntime {
120
55
  readonly definition: ConnectorDefinition = {
121
56
  key: 'website',
@@ -238,7 +173,7 @@ export default class WebsiteConnector extends ConnectorRuntime {
238
173
  urls = urls.slice(0, maxPages);
239
174
 
240
175
  // Launch browser
241
- const { browser } = await launchBrowser({} as any, { stealth: false });
176
+ const { browser } = await launchBrowser({ stealth: false });
242
177
  const events: EventEnvelope[] = [];
243
178
  const newHashes: Record<string, string> = {};
244
179
 
@@ -457,7 +392,11 @@ export default class WebsiteConnector extends ConnectorRuntime {
457
392
  return result.join('\n');
458
393
  }
459
394
 
460
- private async fetchSitemap(sitemapUrl: string): Promise<string[]> {
395
+ private async fetchSitemap(sitemapUrl: string, depth = 0): Promise<string[]> {
396
+ // Sitemap-index recursion bound — caps fan-out from a remote sitemap that
397
+ // links to a sitemap that links to a sitemap... untrusted XML must not
398
+ // drive unbounded outbound traffic.
399
+ if (depth > 2) return [];
461
400
  const response = await fetch(sitemapUrl, {
462
401
  headers: { 'User-Agent': 'Mozilla/5.0 (compatible; LobuBot/1.0)' },
463
402
  });
@@ -493,7 +432,7 @@ export default class WebsiteConnector extends ConnectorRuntime {
493
432
  }
494
433
  for (const childUrl of childSitemaps.slice(0, 5)) {
495
434
  validatePublicUrl(childUrl);
496
- const childUrls = await this.fetchSitemap(childUrl);
435
+ const childUrls = await this.fetchSitemap(childUrl, depth + 1);
497
436
  urls.push(...childUrls);
498
437
  }
499
438
  }
@@ -164,6 +164,7 @@ export default class WhatsAppConnector extends ConnectorRuntime {
164
164
  metadataSchema: {
165
165
  type: 'object',
166
166
  properties: {
167
+ source: { type: 'string', const: 'whatsapp' },
167
168
  chat_jid: { type: 'string' },
168
169
  is_group: { type: 'boolean' },
169
170
  from_me: { type: 'boolean' },
@@ -178,7 +179,7 @@ export default class WhatsAppConnector extends ConnectorRuntime {
178
179
  },
179
180
  entityLinks: [
180
181
  {
181
- entityType: '$member',
182
+ entityType: 'person',
182
183
  autoCreate: true,
183
184
  titlePath: 'metadata.push_name',
184
185
  identities: [
@@ -423,11 +424,7 @@ export default class WhatsAppConnector extends ConnectorRuntime {
423
424
  },
424
425
  };
425
426
  } catch (error) {
426
- try {
427
- sock.end(undefined);
428
- } catch {
429
- /* ignore */
430
- }
427
+ safeEnd(sock);
431
428
  throw error;
432
429
  }
433
430
  }
@@ -477,11 +474,7 @@ async function attemptPairing(
477
474
  sock.ev.off('connection.update', handler);
478
475
  sock.ev.off('creds.update', credsListener);
479
476
  ctx.signal.removeEventListener('abort', onAbort);
480
- try {
481
- sock.end(undefined);
482
- } catch {
483
- /* ignore */
484
- }
477
+ safeEnd(sock);
485
478
  resolve(outcome);
486
479
  };
487
480
 
@@ -591,11 +584,7 @@ async function drainHistory(
591
584
  sock.ev.off('chats.upsert', chatsListener);
592
585
  sock.ev.off('messaging-history.set', historyListener);
593
586
  sock.ev.off('messages.upsert', messagesListener);
594
- try {
595
- sock.end(undefined);
596
- } catch {
597
- /* ignore */
598
- }
587
+ safeEnd(sock);
599
588
  };
600
589
 
601
590
  try {
@@ -828,6 +817,14 @@ function delay(ms: number): Promise<void> {
828
817
  return new Promise((r) => setTimeout(r, ms));
829
818
  }
830
819
 
820
+ function safeEnd(sock: ReturnType<typeof makeWASocket>): void {
821
+ try {
822
+ sock.end(undefined);
823
+ } catch {
824
+ /* ignore */
825
+ }
826
+ }
827
+
831
828
  function waitForOpen(sock: ReturnType<typeof makeWASocket>, timeoutMs: number): Promise<boolean> {
832
829
  return new Promise((resolve) => {
833
830
  let newLogin = false;
@@ -962,12 +959,7 @@ export function toEvent(
962
959
  const text = extractText(m.message);
963
960
  if (!text) return null;
964
961
 
965
- const tsRaw =
966
- typeof m.messageTimestamp === 'number'
967
- ? m.messageTimestamp
968
- : ((m.messageTimestamp as { low?: number; toNumber?: () => number } | null)?.toNumber?.() ??
969
- (m.messageTimestamp as { low?: number } | null)?.low ??
970
- 0);
962
+ const tsRaw = extractTs(m);
971
963
  if (!tsRaw) return null;
972
964
  const occurredAt = new Date(tsRaw * 1000);
973
965
 
@@ -1001,6 +993,13 @@ export function toEvent(
1001
993
  occurred_at: occurredAt,
1002
994
  origin_parent_id: chatJid,
1003
995
  metadata: {
996
+ // Mirror the bridge's `source` field so consumers can tell which
997
+ // transport delivered an event when the same message arrives via both
998
+ // (QR-paired socket and the local Mac archive). Origin id alignment
999
+ // (both connectors emit the bare WhatsApp stanza id) makes the gateway
1000
+ // dedupe on insert; `source` records which side produced the row that
1001
+ // survived.
1002
+ source: 'whatsapp',
1004
1003
  chat_jid: chatJid,
1005
1004
  is_group: isGroup,
1006
1005
  from_me: fromMe,
@@ -0,0 +1,125 @@
1
+ /**
2
+ * WhatsApp (local) Connector — Lobu for Mac only.
3
+ *
4
+ * Reads messages directly from the WhatsApp Desktop app's local SQLite store
5
+ * at `~/Library/Group Containers/group.net.whatsapp.WhatsApp.shared/
6
+ * ChatStorage.sqlite`. Lobu for Mac snapshots the DB read-only, walks new
7
+ * rows since the last `Z_PK` checkpoint, and emits events that share the
8
+ * `whatsapp` connector's metadata shape so downstream entity links work
9
+ * identically.
10
+ *
11
+ * Differences from the QR-paired `whatsapp` connector:
12
+ * - No Baileys, no socket, no phone-offline auto-unlink (WA Desktop itself
13
+ * is the linked device).
14
+ * - Ciphertext never leaves the Mac.
15
+ * - Bound to one specific Mac; requires WhatsApp Desktop installed.
16
+ */
17
+
18
+ import {
19
+ type ActionResult,
20
+ type ConnectorDefinition,
21
+ ConnectorRuntime,
22
+ IDENTITY,
23
+ type SyncContext,
24
+ type SyncResult,
25
+ } from '@lobu/connector-sdk';
26
+
27
+ const BRIDGE_ONLY =
28
+ 'WhatsApp (local) runs only on a worker advertising capability "whatsapp_local" (Lobu for Mac with WhatsApp Desktop installed).';
29
+
30
+ export default class WhatsAppLocalConnector extends ConnectorRuntime {
31
+ readonly definition: ConnectorDefinition = {
32
+ key: 'whatsapp.local',
33
+ name: 'WhatsApp (this Mac)',
34
+ description:
35
+ "Reads messages from the WhatsApp Desktop app's local archive on this Mac. No QR pairing, no phone-offline auto-unlink — the desktop app is itself the linked device.",
36
+ version: '0.1.0',
37
+ faviconDomain: 'whatsapp.com',
38
+ requiredCapability: 'whatsapp_local',
39
+ runtime: { platforms: ['macos'] },
40
+ authSchema: { methods: [{ type: 'none' }] },
41
+ feeds: {
42
+ messages: {
43
+ key: 'messages',
44
+ name: 'Messages',
45
+ description:
46
+ 'Personal WhatsApp messages from 1:1 and group chats, sourced from WhatsApp Desktop.',
47
+ configSchema: {
48
+ type: 'object',
49
+ properties: {
50
+ chat_filter: {
51
+ type: 'string',
52
+ enum: ['all', 'individual', 'group'],
53
+ default: 'all',
54
+ description: 'Which chats to include.',
55
+ },
56
+ max_messages_per_sync: {
57
+ type: 'integer',
58
+ minimum: 1,
59
+ maximum: 500000,
60
+ default: 5000,
61
+ description:
62
+ 'Safety cap on messages collected per sync. The first sync drains all messages up to this cap; subsequent syncs ingest only new messages, so the cap rarely binds.',
63
+ },
64
+ },
65
+ },
66
+ eventKinds: {
67
+ message: {
68
+ description: 'A WhatsApp message (text, caption, or system).',
69
+ metadataSchema: {
70
+ type: 'object',
71
+ properties: {
72
+ source: { type: 'string', const: 'whatsapp_local' },
73
+ chat_jid: { type: 'string' },
74
+ is_group: { type: 'boolean' },
75
+ from_me: { type: 'boolean' },
76
+ participant: { type: 'string' },
77
+ sender_jid: { type: 'string' },
78
+ sender_phone: { type: 'string' },
79
+ push_name: { type: 'string' },
80
+ media_type: { type: 'string' },
81
+ quoted_id: { type: 'string' },
82
+ is_forwarded: { type: 'boolean' },
83
+ is_starred: { type: 'boolean' },
84
+ is_system_event: { type: 'boolean' },
85
+ voice_note_skipped: {
86
+ type: 'string',
87
+ enum: ['not_downloaded', 'too_large', 'empty', 'read_error', 'invalid_path'],
88
+ },
89
+ },
90
+ },
91
+ entityLinks: [
92
+ {
93
+ entityType: 'person',
94
+ autoCreate: true,
95
+ titlePath: 'metadata.push_name',
96
+ identities: [
97
+ { namespace: IDENTITY.WA_JID, eventPath: 'metadata.sender_jid' },
98
+ { namespace: IDENTITY.PHONE, eventPath: 'metadata.sender_phone' },
99
+ ],
100
+ traits: {
101
+ push_name: {
102
+ eventPath: 'metadata.push_name',
103
+ behavior: 'prefer_non_empty',
104
+ },
105
+ last_seen_at: {
106
+ eventPath: 'occurred_at',
107
+ behavior: 'overwrite',
108
+ },
109
+ },
110
+ },
111
+ ],
112
+ },
113
+ },
114
+ },
115
+ },
116
+ };
117
+
118
+ async sync(_ctx: SyncContext): Promise<SyncResult> {
119
+ throw new Error(BRIDGE_ONLY);
120
+ }
121
+
122
+ async execute(): Promise<ActionResult> {
123
+ throw new Error(BRIDGE_ONLY);
124
+ }
125
+ }
@@ -17,7 +17,12 @@ import {
17
17
  type SyncContext,
18
18
  type SyncResult,
19
19
  } from '@lobu/connector-sdk';
20
- import { getBrowserCookies, validateCookieNotExpired } from './browser-scraper-utils';
20
+ import {
21
+ getBrowserCdpUrl,
22
+ getBrowserCookies,
23
+ getBrowserUserDataDir,
24
+ validateCookieNotExpired,
25
+ } from './browser-scraper-utils';
21
26
 
22
27
  interface XCheckpoint {
23
28
  last_tweet_id?: string;
@@ -358,12 +363,16 @@ async function syncViaBrowser(
358
363
  const searchFilter = (config.search_filter as string) ?? 'live';
359
364
  const searchUrl = `https://x.com/search?q=${encodeURIComponent(searchQuery)}&src=typed_query&f=${searchFilter}`;
360
365
 
366
+ const userDataDir = getBrowserUserDataDir(ctx.sessionState);
367
+ const cdpUrl = getBrowserCdpUrl(ctx.sessionState) ?? 'auto';
361
368
  let cookies: any[] = [];
362
- try {
363
- cookies = getBrowserCookies(ctx.checkpoint as any, ctx.sessionState as any, 'x');
364
- validateCookieNotExpired(cookies, 'auth_token', 'x');
365
- } catch {
366
- // No stored cookies — CDP will be the only path
369
+ if (!userDataDir) {
370
+ try {
371
+ cookies = getBrowserCookies(ctx.checkpoint as any, ctx.sessionState as any, 'x');
372
+ validateCookieNotExpired(cookies, 'auth_token', 'x');
373
+ } catch {
374
+ // No stored cookies — CDP will be the only path
375
+ }
367
376
  }
368
377
 
369
378
  const result = await browserNetworkSync<XTweet>({
@@ -376,8 +385,9 @@ async function syncViaBrowser(
376
385
  navigationTimeoutMs: 15000,
377
386
  },
378
387
  url: searchUrl,
379
- cdpUrl: 'auto',
388
+ cdpUrl,
380
389
  cookies,
390
+ userDataDir,
381
391
  parseResponse: parseBrowserSearchResponse,
382
392
  checkAuth: async (page) => {
383
393
  const url = page.url();
@@ -0,0 +1,47 @@
1
+ -- migrate:up
2
+
3
+ -- Add a per-connector capability gate for worker dispatch. Workers advertise
4
+ -- their capabilities on poll; the runs scheduler only assigns connector runs
5
+ -- to workers whose capabilities include the connector's required_capability.
6
+ -- NULL means "no special capability required" (the default for API/browser
7
+ -- connectors that the existing fleet can run).
8
+ --
9
+ -- `runtime` carries platform metadata for device-bound connectors (e.g.
10
+ -- `{"platforms": ["macos"]}` for apple.screen_time / local.directory, which
11
+ -- only run inside Lobu for Mac — that data is unreachable from a server-side
12
+ -- worker). NULL = cloud connector.
13
+ --
14
+ -- Initial use case: apple.screen_time and local.directory, served by Lobu for
15
+ -- Mac polling /api/workers/* as a user-scoped device worker.
16
+
17
+ ALTER TABLE public.connector_definitions
18
+ ADD COLUMN IF NOT EXISTS required_capability text,
19
+ ADD COLUMN IF NOT EXISTS runtime jsonb;
20
+
21
+ CREATE INDEX IF NOT EXISTS connector_definitions_required_capability_idx
22
+ ON public.connector_definitions (required_capability)
23
+ WHERE required_capability IS NOT NULL;
24
+
25
+ CREATE TABLE IF NOT EXISTS public.device_workers (
26
+ user_id text NOT NULL,
27
+ worker_id text NOT NULL,
28
+ platform text,
29
+ app_version text,
30
+ capabilities jsonb NOT NULL DEFAULT '[]'::jsonb,
31
+ label text,
32
+ first_seen_at timestamptz NOT NULL DEFAULT now(),
33
+ last_seen_at timestamptz NOT NULL DEFAULT now(),
34
+ PRIMARY KEY (user_id, worker_id)
35
+ );
36
+
37
+ CREATE INDEX IF NOT EXISTS device_workers_user_id_idx
38
+ ON public.device_workers (user_id);
39
+
40
+ -- migrate:down
41
+
42
+ DROP INDEX IF EXISTS public.device_workers_user_id_idx;
43
+ DROP TABLE IF EXISTS public.device_workers;
44
+ DROP INDEX IF EXISTS public.connector_definitions_required_capability_idx;
45
+ ALTER TABLE public.connector_definitions
46
+ DROP COLUMN IF EXISTS runtime,
47
+ DROP COLUMN IF EXISTS required_capability;
@@ -0,0 +1,113 @@
1
+ -- migrate:up
2
+
3
+ -- Make a connection's execution target explicit, and give every device worker
4
+ -- a home organization.
5
+ --
6
+ -- connections.device_worker_id (nullable) is the binding:
7
+ -- NULL -> runs on the cloud connector-worker pool (today's behavior)
8
+ -- set -> runs are pinned to that device worker
9
+ -- For device-type connectors the binding is mandatory; for cloud connectors
10
+ -- it's an optional override. A connection can only be pinned to a device that
11
+ -- is attached to that connection's organization.
12
+ --
13
+ -- device_workers.organization_id is the device's home org — chosen at setup,
14
+ -- defaulting to the owner's personal workspace. The device's connectors live
15
+ -- there; re-attaching the device to a different org (a member of which the
16
+ -- owner must be) is the only knob. There is no per-connection device→org grant.
17
+
18
+ -- Surrogate key for device_workers so connections / UI can reference a device
19
+ -- by a single stable id. The (user_id, worker_id) primary key stays.
20
+ ALTER TABLE public.device_workers
21
+ ADD COLUMN IF NOT EXISTS id uuid NOT NULL DEFAULT gen_random_uuid(),
22
+ ADD COLUMN IF NOT EXISTS organization_id text;
23
+
24
+ CREATE UNIQUE INDEX IF NOT EXISTS device_workers_id_key
25
+ ON public.device_workers (id);
26
+
27
+ CREATE INDEX IF NOT EXISTS idx_device_workers_organization_id
28
+ ON public.device_workers (organization_id)
29
+ WHERE organization_id IS NOT NULL;
30
+
31
+ ALTER TABLE public.connections
32
+ ADD COLUMN IF NOT EXISTS device_worker_id uuid;
33
+
34
+ DO $$
35
+ BEGIN
36
+ IF NOT EXISTS (
37
+ SELECT 1 FROM pg_constraint WHERE conname = 'connections_device_worker_id_fkey'
38
+ ) THEN
39
+ ALTER TABLE public.connections
40
+ ADD CONSTRAINT connections_device_worker_id_fkey
41
+ FOREIGN KEY (device_worker_id)
42
+ REFERENCES public.device_workers (id)
43
+ ON DELETE SET NULL;
44
+ END IF;
45
+ END$$;
46
+
47
+ CREATE INDEX IF NOT EXISTS idx_connections_device_worker_id
48
+ ON public.connections (device_worker_id)
49
+ WHERE device_worker_id IS NOT NULL;
50
+
51
+ -- Attach existing devices to their owner's personal workspace (no-op on a
52
+ -- fresh database — there are no users yet; the device heartbeat sets this for
53
+ -- new devices either way).
54
+ UPDATE public.device_workers dw
55
+ SET organization_id = (
56
+ SELECT o.id FROM public.organization o
57
+ WHERE (o.metadata::jsonb)->>'personal_org_for_user_id' = dw.user_id
58
+ LIMIT 1
59
+ )
60
+ WHERE dw.organization_id IS NULL;
61
+
62
+ -- Backfill: existing auto-wired personal-org device connections (created_by
63
+ -- set, no auth profile) whose owner has exactly one device get pinned to that
64
+ -- device — but at most one per (org, connector_key, owner) so the unique index
65
+ -- created below can never be violated. Ambiguous ones stay NULL and the UI
66
+ -- prompts for a device.
67
+ UPDATE public.connections c
68
+ SET device_worker_id = dw.id
69
+ FROM (
70
+ -- Users with exactly one device worker (no min(uuid) needed — and Postgres
71
+ -- has no aggregate for uuid anyway).
72
+ SELECT dw1.user_id, dw1.id
73
+ FROM public.device_workers dw1
74
+ WHERE NOT EXISTS (
75
+ SELECT 1 FROM public.device_workers dw2
76
+ WHERE dw2.user_id = dw1.user_id AND dw2.id <> dw1.id
77
+ )
78
+ ) dw
79
+ WHERE c.created_by = dw.user_id
80
+ AND c.device_worker_id IS NULL
81
+ AND c.deleted_at IS NULL
82
+ AND c.auth_profile_id IS NULL
83
+ AND c.connector_key IN (
84
+ SELECT key FROM public.connector_definitions WHERE required_capability IS NOT NULL
85
+ )
86
+ AND c.id = (
87
+ SELECT min(c2.id) FROM public.connections c2
88
+ WHERE c2.organization_id = c.organization_id
89
+ AND c2.connector_key = c.connector_key
90
+ AND c2.created_by = c.created_by
91
+ AND c2.deleted_at IS NULL
92
+ );
93
+
94
+ -- One active connection per (org, connector, device). A second device backing
95
+ -- the same connector is a second connection. Doubles as DB-level idempotency
96
+ -- for the create-vs-auto-wire race. Created AFTER the backfill above.
97
+ DROP INDEX IF EXISTS public.idx_connections_org_connector_device_live;
98
+ CREATE UNIQUE INDEX idx_connections_org_connector_device_live
99
+ ON public.connections (organization_id, connector_key, device_worker_id)
100
+ WHERE deleted_at IS NULL AND device_worker_id IS NOT NULL;
101
+
102
+ -- migrate:down
103
+
104
+ DROP INDEX IF EXISTS public.idx_connections_org_connector_device_live;
105
+ DROP INDEX IF EXISTS public.idx_connections_device_worker_id;
106
+ ALTER TABLE public.connections
107
+ DROP CONSTRAINT IF EXISTS connections_device_worker_id_fkey,
108
+ DROP COLUMN IF EXISTS device_worker_id;
109
+ DROP INDEX IF EXISTS public.device_workers_id_key;
110
+ DROP INDEX IF EXISTS public.idx_device_workers_organization_id;
111
+ ALTER TABLE public.device_workers
112
+ DROP COLUMN IF EXISTS id,
113
+ DROP COLUMN IF EXISTS organization_id;