@lobu/cli 7.0.0 → 7.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. package/dist/commands/_lib/apply/apply-cmd.d.ts.map +1 -1
  2. package/dist/commands/_lib/apply/apply-cmd.js +160 -12
  3. package/dist/commands/_lib/apply/apply-cmd.js.map +1 -1
  4. package/dist/commands/_lib/apply/client.d.ts +106 -0
  5. package/dist/commands/_lib/apply/client.d.ts.map +1 -1
  6. package/dist/commands/_lib/apply/client.js +163 -2
  7. package/dist/commands/_lib/apply/client.js.map +1 -1
  8. package/dist/commands/_lib/apply/desired-state.d.ts +53 -0
  9. package/dist/commands/_lib/apply/desired-state.d.ts.map +1 -1
  10. package/dist/commands/_lib/apply/desired-state.js +182 -5
  11. package/dist/commands/_lib/apply/desired-state.js.map +1 -1
  12. package/dist/commands/_lib/apply/diff.d.ts +12 -1
  13. package/dist/commands/_lib/apply/diff.d.ts.map +1 -1
  14. package/dist/commands/_lib/apply/diff.js +106 -7
  15. package/dist/commands/_lib/apply/diff.js.map +1 -1
  16. package/dist/commands/_lib/connector-loader.d.ts +3 -0
  17. package/dist/commands/_lib/connector-loader.d.ts.map +1 -0
  18. package/dist/commands/_lib/connector-loader.js +129 -0
  19. package/dist/commands/_lib/connector-loader.js.map +1 -0
  20. package/dist/commands/_lib/connector-run-cmd.d.ts +35 -0
  21. package/dist/commands/_lib/connector-run-cmd.d.ts.map +1 -0
  22. package/dist/commands/_lib/connector-run-cmd.js +351 -0
  23. package/dist/commands/_lib/connector-run-cmd.js.map +1 -0
  24. package/dist/commands/_lib/export/export-cmd.d.ts +35 -0
  25. package/dist/commands/_lib/export/export-cmd.d.ts.map +1 -0
  26. package/dist/commands/_lib/export/export-cmd.js +329 -0
  27. package/dist/commands/_lib/export/export-cmd.js.map +1 -0
  28. package/dist/commands/agent.d.ts.map +1 -1
  29. package/dist/commands/agent.js +11 -14
  30. package/dist/commands/agent.js.map +1 -1
  31. package/dist/commands/chat.d.ts.map +1 -1
  32. package/dist/commands/chat.js +19 -5
  33. package/dist/commands/chat.js.map +1 -1
  34. package/dist/commands/connector.d.ts +3 -0
  35. package/dist/commands/connector.d.ts.map +1 -0
  36. package/dist/commands/connector.js +5 -0
  37. package/dist/commands/connector.js.map +1 -0
  38. package/dist/commands/dev.d.ts +15 -0
  39. package/dist/commands/dev.d.ts.map +1 -1
  40. package/dist/commands/dev.js +156 -4
  41. package/dist/commands/dev.js.map +1 -1
  42. package/dist/commands/doctor.d.ts.map +1 -1
  43. package/dist/commands/doctor.js +2 -3
  44. package/dist/commands/doctor.js.map +1 -1
  45. package/dist/commands/eval.d.ts.map +1 -1
  46. package/dist/commands/eval.js +12 -13
  47. package/dist/commands/eval.js.map +1 -1
  48. package/dist/commands/init.d.ts.map +1 -1
  49. package/dist/commands/init.js +5 -1
  50. package/dist/commands/init.js.map +1 -1
  51. package/dist/commands/login.d.ts.map +1 -1
  52. package/dist/commands/login.js +22 -16
  53. package/dist/commands/login.js.map +1 -1
  54. package/dist/commands/memory/_lib/browser-auth-cmd.d.ts.map +1 -1
  55. package/dist/commands/memory/_lib/browser-auth-cmd.js +15 -144
  56. package/dist/commands/memory/_lib/browser-auth-cmd.js.map +1 -1
  57. package/dist/commands/token.d.ts.map +1 -1
  58. package/dist/commands/token.js +1 -4
  59. package/dist/commands/token.js.map +1 -1
  60. package/dist/commands/validate.d.ts.map +1 -1
  61. package/dist/commands/validate.js +4 -13
  62. package/dist/commands/validate.js.map +1 -1
  63. package/dist/config/loader.js +2 -2
  64. package/dist/config/loader.js.map +1 -1
  65. package/dist/connectors/README.md +0 -1
  66. package/dist/connectors/apple_photos.ts +178 -0
  67. package/dist/connectors/browser/evaluate.ts +120 -0
  68. package/dist/connectors/browser/fill_form.ts +107 -0
  69. package/dist/connectors/browser/page_text.ts +108 -0
  70. package/dist/connectors/browser-scraper-utils.ts +76 -0
  71. package/dist/connectors/chrome_tabs.ts +74 -0
  72. package/dist/connectors/github.ts +1 -0
  73. package/dist/connectors/google_calendar.ts +14 -2
  74. package/dist/connectors/google_play.ts +22 -2
  75. package/dist/connectors/hackernews.ts +37 -2
  76. package/dist/connectors/index.ts +9 -1
  77. package/dist/connectors/reddit.ts +1 -0
  78. package/dist/connectors/revolut.ts +10 -13
  79. package/dist/connectors/rss.ts +33 -8
  80. package/dist/connectors/trustpilot.ts +31 -20
  81. package/dist/connectors/website.ts +7 -68
  82. package/dist/connectors/whatsapp.ts +12 -21
  83. package/dist/db/migrations/20260514130000_connection_action_modes.sql +103 -0
  84. package/dist/db/migrations/20260514160000_auth_profiles_mirror_mode.sql +32 -0
  85. package/dist/db/migrations/20260515120000_agents_per_org_pk.sql +66 -0
  86. package/dist/db/migrations/20260515150000_geo_enrichment.sql +208 -0
  87. package/dist/db/migrations/20260515160000_drop_agents_org_id_unique.sql +24 -0
  88. package/dist/db/migrations/20260515170000_auth_profiles_default_for_connector.sql +23 -0
  89. package/dist/db/migrations/20260516120000_agents_per_org_pk_swap.sql +125 -0
  90. package/dist/db/migrations/20260516200000_events_search_tsv.sql +134 -0
  91. package/dist/db/migrations/20260516200100_events_lifecycle_changes_index.sql +25 -0
  92. package/dist/db/migrations/20260517010000_drop_unused_indexes.sql +49 -0
  93. package/dist/db/migrations/20260517020000_softdelete_orphan_feeds.sql +56 -0
  94. package/dist/db/migrations/20260517030000_pat_worker_id_binding.sql +27 -0
  95. package/dist/db/migrations/20260517040000_archive_orphan_watchers.sql +30 -0
  96. package/dist/db/migrations/20260517050000_watcher_agent_id_not_null.sql +34 -0
  97. package/dist/db/migrations/20260517060000_watcher_schema_additions.sql +78 -0
  98. package/dist/db/migrations/20260517150000_goals_primitive.sql +55 -0
  99. package/dist/db/migrations/20260517160000_drop_goals_primitive.sql +45 -0
  100. package/dist/db/migrations/20260518000000_pending_interactions.sql +49 -0
  101. package/dist/db/migrations/20260518010000_runs_heartbeat_reaper_index.sql +22 -0
  102. package/dist/eval/client.d.ts.map +1 -1
  103. package/dist/eval/client.js +11 -0
  104. package/dist/eval/client.js.map +1 -1
  105. package/dist/eval/grader.js +2 -1
  106. package/dist/eval/grader.js.map +1 -1
  107. package/dist/index.d.ts.map +1 -1
  108. package/dist/index.js +47 -0
  109. package/dist/index.js.map +1 -1
  110. package/dist/internal/context.d.ts +9 -0
  111. package/dist/internal/context.d.ts.map +1 -1
  112. package/dist/internal/context.js +41 -6
  113. package/dist/internal/context.js.map +1 -1
  114. package/dist/internal/credentials.d.ts +5 -0
  115. package/dist/internal/credentials.d.ts.map +1 -1
  116. package/dist/internal/credentials.js +75 -1
  117. package/dist/internal/credentials.js.map +1 -1
  118. package/dist/internal/index.d.ts +1 -1
  119. package/dist/internal/index.d.ts.map +1 -1
  120. package/dist/internal/index.js +1 -1
  121. package/dist/internal/index.js.map +1 -1
  122. package/dist/internal/local-env.d.ts.map +1 -1
  123. package/dist/internal/local-env.js +9 -2
  124. package/dist/internal/local-env.js.map +1 -1
  125. package/dist/server.bundle.mjs +8990 -5689
  126. package/dist/start-local.bundle.mjs +7029 -3402
  127. package/package.json +7 -5
  128. package/dist/connectors/google_photos.ts +0 -776
@@ -255,7 +255,14 @@ export default class GoogleCalendarConnector extends ConnectorRuntime {
255
255
  let pageToken: string | undefined;
256
256
  let nextSyncToken: string | undefined;
257
257
 
258
- while (true) {
258
+ // Safety bound — at 250 events/page, 200 pages = 50k events, more than
259
+ // any reasonable calendar window. Stops a runaway loop if the upstream
260
+ // ever returns a self-referential page token.
261
+ const MAX_PAGES = 200;
262
+ let pages = 0;
263
+
264
+ while (pages < MAX_PAGES) {
265
+ pages++;
259
266
  // Always request a full page — `maxResults` is a soft cap on *stored*
260
267
  // events, not a reason to shrink the request size (shrinking to 1 once the
261
268
  // cap is hit would crawl a busy calendar one event per round-trip).
@@ -350,7 +357,12 @@ export default class GoogleCalendarConnector extends ConnectorRuntime {
350
357
  let pageToken: string | undefined;
351
358
  let nextSyncToken: string | undefined;
352
359
 
353
- while (true) {
360
+ // Same hard ceiling as the full-sync path — defensive only.
361
+ const MAX_PAGES = 200;
362
+ let pages = 0;
363
+
364
+ while (pages < MAX_PAGES) {
365
+ pages++;
354
366
  const params = new URLSearchParams({
355
367
  maxResults: String(Math.max(1, Math.min(250, maxResults - events.length))),
356
368
  syncToken,
@@ -136,6 +136,12 @@ async function fetchReviewsPage(
136
136
 
137
137
  if (!res.ok) {
138
138
  if (res.status === 404) throw new Error('App not found (404)');
139
+ if (res.status === 429) {
140
+ const retryAfter = res.headers.get('Retry-After');
141
+ throw new Error(
142
+ `Google Play rate limit (429). Retry after ${retryAfter ?? 'unknown'} seconds.`
143
+ );
144
+ }
139
145
  throw new Error(`Google Play request failed: ${res.status} ${res.statusText}`);
140
146
  }
141
147
 
@@ -143,14 +149,28 @@ async function fetchReviewsPage(
143
149
 
144
150
  // Response starts with ")]}'" (security prefix), then a newline, then JSON.
145
151
  // The library skips the first 5 characters.
146
- const outer = JSON.parse(text.substring(5));
152
+ // Wrap parse in try/catch — Google sometimes returns an HTML interstitial
153
+ // (captcha / geo-block / maintenance) with status 200, which would bubble up
154
+ // as an unhelpful SyntaxError otherwise.
155
+ let outer: any;
156
+ try {
157
+ outer = JSON.parse(text.substring(5));
158
+ } catch {
159
+ const preview = text.substring(0, 120).replace(/\s+/g, ' ');
160
+ throw new Error(`Google Play returned non-JSON response: ${preview}`);
161
+ }
147
162
  const innerJson: string | null = outer?.[0]?.[2];
148
163
 
149
164
  if (innerJson === null || innerJson === undefined) {
150
165
  return { reviews: [], nextToken: null };
151
166
  }
152
167
 
153
- const data = JSON.parse(innerJson);
168
+ let data: any;
169
+ try {
170
+ data = JSON.parse(innerJson);
171
+ } catch {
172
+ throw new Error('Google Play returned malformed inner JSON payload');
173
+ }
154
174
  return {
155
175
  reviews: extractReviews(data, appId),
156
176
  nextToken: extractPaginationToken(data),
@@ -16,6 +16,7 @@ import {
16
16
  type SyncContext,
17
17
  type SyncResult,
18
18
  } from '@lobu/connector-sdk';
19
+ import { validatePublicUrl } from './browser-scraper-utils.ts';
19
20
 
20
21
  // ---------------------------------------------------------------------------
21
22
  // Algolia HN API types
@@ -261,11 +262,36 @@ export default class HackerNewsConnector extends ConnectorRuntime {
261
262
  `&numericFilters=${encodeURIComponent(`created_at_i>${lookbackTimestamp}`)}`;
262
263
 
263
264
  const response = await fetch(url);
265
+
266
+ // Honor Algolia's rate-limit response so we don't hammer them and turn
267
+ // a transient 429 into "Unexpected token < in JSON" when the next call
268
+ // returns an HTML error page.
269
+ if (response.status === 429) {
270
+ const retryAfter = response.headers.get('Retry-After');
271
+ const waitMs = retryAfter ? Math.min(60_000, Math.max(1, Number(retryAfter)) * 1000) : 5000;
272
+ await this.sleep(Number.isFinite(waitMs) ? waitMs : 5000);
273
+ continue;
274
+ }
275
+
264
276
  if (!response.ok) {
265
- throw new Error(`Algolia API error (${response.status}): ${await response.text()}`);
277
+ const text = await response.text().catch(() => '');
278
+ throw new Error(`Algolia API error (${response.status}): ${text}`);
279
+ }
280
+
281
+ // Algolia normally returns JSON, but proxies/captive portals occasionally
282
+ // return HTML. Surface a useful error instead of a bare SyntaxError that
283
+ // makes the connector look broken when the upstream is at fault.
284
+ let data: AlgoliaResponse;
285
+ try {
286
+ data = (await response.json()) as AlgoliaResponse;
287
+ } catch (err) {
288
+ const message = err instanceof Error ? err.message : String(err);
289
+ throw new Error(`Algolia API returned non-JSON response: ${message}`);
266
290
  }
267
291
 
268
- const data = (await response.json()) as AlgoliaResponse;
292
+ if (!data || !Array.isArray(data.hits)) {
293
+ throw new Error('Algolia API returned an unexpected response shape');
294
+ }
269
295
 
270
296
  for (const hit of data.hits) {
271
297
  if (contentType === 'comment') {
@@ -404,6 +430,15 @@ export default class HackerNewsConnector extends ConnectorRuntime {
404
430
 
405
431
  private async fetchExternalContent(url: string): Promise<string | null> {
406
432
  try {
433
+ // SSRF guard — `url` is supplied by whoever submitted the HN story and
434
+ // is therefore attacker-controllable. Refuse to fetch private/internal
435
+ // addresses (loopback, 169.254.169.254 cloud metadata, RFC1918, etc.).
436
+ try {
437
+ validatePublicUrl(url);
438
+ } catch {
439
+ return null;
440
+ }
441
+
407
442
  const controller = new AbortController();
408
443
  const timeoutId = setTimeout(() => controller.abort(), this.CONTENT_FETCH_TIMEOUT);
409
444
 
@@ -1,15 +1,23 @@
1
1
  export * from './apple_health.ts';
2
+ export * from './apple_photos.ts';
2
3
  export * from './apple_screen_time.ts';
3
4
  export * from './local_directory.ts';
4
5
  export * from './browser-scraper-utils.ts';
6
+ // Browser primitives — connector definitions whose executors live in the
7
+ // Owletto for Chrome extension (apps/chrome/executor.js). Kept under
8
+ // browser/ so they're structurally distinct from third-party service
9
+ // connectors (linkedin, revolut, github, etc.).
10
+ export * from './browser/evaluate.ts';
11
+ export * from './browser/fill_form.ts';
12
+ export * from './browser/page_text.ts';
5
13
  export * from './capterra.ts';
14
+ export * from './chrome_tabs.ts';
6
15
  export * from './g2.ts';
7
16
  export * from './github.ts';
8
17
  export * from './glassdoor.ts';
9
18
  export * from './gmaps.ts';
10
19
  export * from './google_calendar.ts';
11
20
  export * from './google_gmail.ts';
12
- export * from './google_photos.ts';
13
21
  export * from './google_play.ts';
14
22
  export * from './hackernews.ts';
15
23
  export * from './ios_appstore.ts';
@@ -79,6 +79,7 @@ export default class RedditConnector extends ConnectorRuntime {
79
79
  name: 'Reddit',
80
80
  description: 'Fetches posts and comments from Reddit subreddits or search queries.',
81
81
  version: '1.0.0',
82
+ faviconDomain: 'reddit.com',
82
83
  authSchema: {
83
84
  methods: [
84
85
  {
@@ -142,12 +142,9 @@ function extractAmountAndCurrency(
142
142
  const amt = record.amount;
143
143
  if (amt && typeof amt === "object") {
144
144
  const obj = amt as Record<string, unknown>;
145
- const value =
146
- typeof obj.value === "number"
147
- ? obj.value
148
- : typeof obj.amount === "number"
149
- ? obj.amount
150
- : null;
145
+ let value: number | null = null;
146
+ if (typeof obj.value === "number") value = obj.value;
147
+ else if (typeof obj.amount === "number") value = obj.amount;
151
148
  const currency = typeof obj.currency === "string" ? obj.currency : null;
152
149
  if (value !== null && currency) return { amount: value, currency };
153
150
  }
@@ -200,13 +197,13 @@ function extractBalance(
200
197
  record: Record<string, unknown>,
201
198
  currency: string,
202
199
  ): number | undefined {
203
- const raw =
204
- typeof record.balance === "number"
205
- ? record.balance
206
- : record.balance && typeof record.balance === "object"
207
- ? ((record.balance as Record<string, unknown>).value ??
208
- (record.balance as Record<string, unknown>).amount)
209
- : undefined;
200
+ let raw: unknown;
201
+ if (typeof record.balance === "number") {
202
+ raw = record.balance;
203
+ } else if (record.balance && typeof record.balance === "object") {
204
+ const obj = record.balance as Record<string, unknown>;
205
+ raw = obj.value ?? obj.amount;
206
+ }
210
207
  if (typeof raw !== "number" || !Number.isFinite(raw)) return undefined;
211
208
  return Number.isInteger(raw) ? minorUnitsToMajor(raw, currency) : raw;
212
209
  }
@@ -15,6 +15,7 @@ import {
15
15
  type SyncContext,
16
16
  type SyncResult,
17
17
  } from '@lobu/connector-sdk';
18
+ import { validatePublicUrl } from './browser-scraper-utils.ts';
18
19
 
19
20
  // ---------------------------------------------------------------------------
20
21
  // Types
@@ -211,6 +212,11 @@ export default class RSSConnector extends ConnectorRuntime {
211
212
  // -------------------------------------------------------------------------
212
213
 
213
214
  private async fetchAndParseFeed(feedUrl: string, maxItems: number): Promise<RSSFeedItem[]> {
215
+ // SSRF guard at the trust boundary. `feed_urls` is operator/user supplied
216
+ // via connector config and must not be allowed to target loopback, RFC1918,
217
+ // or cloud-metadata IPs from the gateway process.
218
+ validatePublicUrl(feedUrl);
219
+
214
220
  const controller = new AbortController();
215
221
  const timeoutId = setTimeout(() => controller.abort(), this.FETCH_TIMEOUT_MS);
216
222
 
@@ -222,18 +228,13 @@ export default class RSSConnector extends ConnectorRuntime {
222
228
  Accept: 'application/rss+xml, application/atom+xml, application/xml, text/xml, */*',
223
229
  },
224
230
  });
225
-
226
- clearTimeout(timeoutId);
227
-
228
231
  if (!response.ok) {
229
232
  throw new Error(`HTTP ${response.status}: ${response.statusText}`);
230
233
  }
231
-
232
234
  const xml = await response.text();
233
235
  return this.parseXml(xml, feedUrl, maxItems);
234
- } catch (err) {
236
+ } finally {
235
237
  clearTimeout(timeoutId);
236
- throw err;
237
238
  }
238
239
  }
239
240
 
@@ -413,8 +414,32 @@ export default class RSSConnector extends ConnectorRuntime {
413
414
  case '#39':
414
415
  return "'";
415
416
  default:
416
- if (hex) return String.fromCharCode(parseInt(hex, 16));
417
- if (decimal) return String.fromCharCode(parseInt(decimal, 10));
417
+ // Use fromCodePoint, not fromCharCode — astral-plane characters
418
+ // (emoji, CJK extension B+, etc.) have code points > 0xFFFF which
419
+ // fromCharCode silently truncates, producing mojibake in feed
420
+ // titles. Guard the range so a malformed entity doesn't throw.
421
+ if (hex) {
422
+ const cp = parseInt(hex, 16);
423
+ if (Number.isFinite(cp) && cp >= 0 && cp <= 0x10ffff) {
424
+ try {
425
+ return String.fromCodePoint(cp);
426
+ } catch {
427
+ return _match;
428
+ }
429
+ }
430
+ return _match;
431
+ }
432
+ if (decimal) {
433
+ const cp = parseInt(decimal, 10);
434
+ if (Number.isFinite(cp) && cp >= 0 && cp <= 0x10ffff) {
435
+ try {
436
+ return String.fromCodePoint(cp);
437
+ } catch {
438
+ return _match;
439
+ }
440
+ }
441
+ return _match;
442
+ }
418
443
  return _match;
419
444
  }
420
445
  }
@@ -98,7 +98,11 @@ export default class TrustpilotConnector extends ConnectorRuntime {
98
98
  throw new Error('Either business_url or business_name is required');
99
99
  }
100
100
 
101
- const baseUrl = businessUrl || `https://www.trustpilot.com/review/${businessName}`;
101
+ // encodeURIComponent the user-supplied businessName so a value like
102
+ // "../search?foo=bar" can't escape the /review/ path on trustpilot.com.
103
+ const baseUrl =
104
+ businessUrl ||
105
+ `https://www.trustpilot.com/review/${encodeURIComponent(businessName ?? '')}`;
102
106
  validateUrlDomain(baseUrl, 'trustpilot.com');
103
107
 
104
108
  const userDataDir = getBrowserUserDataDir(ctx.sessionState);
@@ -161,27 +165,34 @@ export default class TrustpilotConnector extends ConnectorRuntime {
161
165
  // Filter reviews with meaningful content (more than 10 chars)
162
166
  const reviews: TrustpilotReview[] = rawReviews.filter((r) => r.text && r.text.length > 10);
163
167
 
164
- // Transform to EventEnvelope format
165
- const events: EventEnvelope[] = reviews.map((review) => {
168
+ // Transform to EventEnvelope format. Drop rows whose `date` attribute
169
+ // was missing/invalid in the DOM — `new Date("")` yields an Invalid
170
+ // Date, which downstream sorting/checkpointing then can't compare, and
171
+ // an empty `date` made `origin_id` collide on `-<author>` across rows.
172
+ const events: EventEnvelope[] = reviews.flatMap((review) => {
166
173
  const content = review.title ? `${review.title}\n\n${review.text}` : review.text;
167
-
168
- return {
169
- origin_id: `${review.date}-${review.author}`,
170
- payload_text: content,
171
- author_name: review.author,
172
- occurred_at: new Date(review.date),
173
- origin_type: 'review',
174
- score: calculateEngagementScore('trustpilot', {
175
- rating: review.rating,
176
- helpful_count: 0,
177
- }),
178
- source_url: baseUrl,
179
- metadata: {
180
- rating: review.rating,
181
- helpful_count: 0,
182
- title: review.title,
174
+ const parsedDate = review.date ? new Date(review.date) : null;
175
+ if (!parsedDate || Number.isNaN(parsedDate.getTime())) return [];
176
+
177
+ return [
178
+ {
179
+ origin_id: `${review.date}-${review.author}`,
180
+ payload_text: content,
181
+ author_name: review.author,
182
+ occurred_at: parsedDate,
183
+ origin_type: 'review',
184
+ score: calculateEngagementScore('trustpilot', {
185
+ rating: review.rating,
186
+ helpful_count: 0,
187
+ }),
188
+ source_url: baseUrl,
189
+ metadata: {
190
+ rating: review.rating,
191
+ helpful_count: 0,
192
+ title: review.title,
193
+ },
183
194
  },
184
- };
195
+ ];
185
196
  });
186
197
 
187
198
  return {
@@ -20,6 +20,7 @@ import {
20
20
  type SyncResult,
21
21
  } from '@lobu/connector-sdk';
22
22
  import type { Page } from 'playwright';
23
+ import { validatePublicUrl } from './browser-scraper-utils.ts';
23
24
 
24
25
  interface PageSection {
25
26
  heading: string;
@@ -50,72 +51,6 @@ function shouldSkipCookieBannerText(text: string): boolean {
50
51
  return countPatternMatches(normalized, COOKIE_BANNER_PATTERNS) >= 3;
51
52
  }
52
53
 
53
- /**
54
- * Validates a URL is safe for server-side fetching.
55
- * Blocks private/internal network addresses to prevent SSRF attacks.
56
- */
57
- function validatePublicUrl(url: string): void {
58
- let parsed: URL;
59
- try {
60
- parsed = new URL(url);
61
- } catch {
62
- throw new Error(`Invalid URL: ${url}`);
63
- }
64
-
65
- if (parsed.protocol !== 'https:' && parsed.protocol !== 'http:') {
66
- throw new Error(`URL must use http: or https: protocol, got ${parsed.protocol}`);
67
- }
68
-
69
- const hostname = parsed.hostname.toLowerCase();
70
-
71
- // Block localhost variants
72
- if (hostname === 'localhost' || hostname === '[::1]' || hostname.endsWith('.localhost')) {
73
- throw new Error(`URL must not point to localhost: ${hostname}`);
74
- }
75
-
76
- // Block private/internal IP ranges
77
- // IPv4 patterns: 127.x.x.x, 10.x.x.x, 192.168.x.x, 172.16-31.x.x, 169.254.x.x, 0.x.x.x
78
- const ipv4Match = hostname.match(/^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/);
79
- if (ipv4Match) {
80
- const [, a, b] = ipv4Match.map(Number);
81
- if (
82
- a === 127 || // 127.0.0.0/8 loopback
83
- a === 10 || // 10.0.0.0/8 private
84
- (a === 172 && b >= 16 && b <= 31) || // 172.16.0.0/12 private
85
- (a === 192 && b === 168) || // 192.168.0.0/16 private
86
- (a === 169 && b === 254) || // 169.254.0.0/16 link-local
87
- a === 0 // 0.0.0.0/8
88
- ) {
89
- throw new Error(`URL must not point to a private/internal IP address: ${hostname}`);
90
- }
91
- }
92
-
93
- // Block IPv6 private ranges (bracketed notation in URLs)
94
- if (hostname.startsWith('[')) {
95
- const ipv6 = hostname.slice(1, -1).toLowerCase();
96
- if (
97
- ipv6 === '::1' ||
98
- ipv6.startsWith('fe80:') || // link-local
99
- ipv6.startsWith('fc') || // unique local (fc00::/7)
100
- ipv6.startsWith('fd') || // unique local (fc00::/7)
101
- ipv6 === '::' || // unspecified
102
- ipv6.startsWith('::ffff:') // IPv4-mapped IPv6
103
- ) {
104
- throw new Error(`URL must not point to a private/internal IPv6 address: ${hostname}`);
105
- }
106
- }
107
-
108
- // Block common internal hostnames
109
- if (
110
- hostname.endsWith('.internal') ||
111
- hostname.endsWith('.local') ||
112
- hostname.endsWith('.corp') ||
113
- hostname.endsWith('.lan')
114
- ) {
115
- throw new Error(`URL must not point to an internal hostname: ${hostname}`);
116
- }
117
- }
118
-
119
54
  export default class WebsiteConnector extends ConnectorRuntime {
120
55
  readonly definition: ConnectorDefinition = {
121
56
  key: 'website',
@@ -457,7 +392,11 @@ export default class WebsiteConnector extends ConnectorRuntime {
457
392
  return result.join('\n');
458
393
  }
459
394
 
460
- private async fetchSitemap(sitemapUrl: string): Promise<string[]> {
395
+ private async fetchSitemap(sitemapUrl: string, depth = 0): Promise<string[]> {
396
+ // Sitemap-index recursion bound — caps fan-out from a remote sitemap that
397
+ // links to a sitemap that links to a sitemap... untrusted XML must not
398
+ // drive unbounded outbound traffic.
399
+ if (depth > 2) return [];
461
400
  const response = await fetch(sitemapUrl, {
462
401
  headers: { 'User-Agent': 'Mozilla/5.0 (compatible; LobuBot/1.0)' },
463
402
  });
@@ -493,7 +432,7 @@ export default class WebsiteConnector extends ConnectorRuntime {
493
432
  }
494
433
  for (const childUrl of childSitemaps.slice(0, 5)) {
495
434
  validatePublicUrl(childUrl);
496
- const childUrls = await this.fetchSitemap(childUrl);
435
+ const childUrls = await this.fetchSitemap(childUrl, depth + 1);
497
436
  urls.push(...childUrls);
498
437
  }
499
438
  }
@@ -424,11 +424,7 @@ export default class WhatsAppConnector extends ConnectorRuntime {
424
424
  },
425
425
  };
426
426
  } catch (error) {
427
- try {
428
- sock.end(undefined);
429
- } catch {
430
- /* ignore */
431
- }
427
+ safeEnd(sock);
432
428
  throw error;
433
429
  }
434
430
  }
@@ -478,11 +474,7 @@ async function attemptPairing(
478
474
  sock.ev.off('connection.update', handler);
479
475
  sock.ev.off('creds.update', credsListener);
480
476
  ctx.signal.removeEventListener('abort', onAbort);
481
- try {
482
- sock.end(undefined);
483
- } catch {
484
- /* ignore */
485
- }
477
+ safeEnd(sock);
486
478
  resolve(outcome);
487
479
  };
488
480
 
@@ -592,11 +584,7 @@ async function drainHistory(
592
584
  sock.ev.off('chats.upsert', chatsListener);
593
585
  sock.ev.off('messaging-history.set', historyListener);
594
586
  sock.ev.off('messages.upsert', messagesListener);
595
- try {
596
- sock.end(undefined);
597
- } catch {
598
- /* ignore */
599
- }
587
+ safeEnd(sock);
600
588
  };
601
589
 
602
590
  try {
@@ -829,6 +817,14 @@ function delay(ms: number): Promise<void> {
829
817
  return new Promise((r) => setTimeout(r, ms));
830
818
  }
831
819
 
820
+ function safeEnd(sock: ReturnType<typeof makeWASocket>): void {
821
+ try {
822
+ sock.end(undefined);
823
+ } catch {
824
+ /* ignore */
825
+ }
826
+ }
827
+
832
828
  function waitForOpen(sock: ReturnType<typeof makeWASocket>, timeoutMs: number): Promise<boolean> {
833
829
  return new Promise((resolve) => {
834
830
  let newLogin = false;
@@ -963,12 +959,7 @@ export function toEvent(
963
959
  const text = extractText(m.message);
964
960
  if (!text) return null;
965
961
 
966
- const tsRaw =
967
- typeof m.messageTimestamp === 'number'
968
- ? m.messageTimestamp
969
- : ((m.messageTimestamp as { low?: number; toNumber?: () => number } | null)?.toNumber?.() ??
970
- (m.messageTimestamp as { low?: number } | null)?.low ??
971
- 0);
962
+ const tsRaw = extractTs(m);
972
963
  if (!tsRaw) return null;
973
964
  const occurredAt = new Date(tsRaw * 1000);
974
965
 
@@ -0,0 +1,103 @@
1
+ -- migrate:up
2
+
3
+ -- Collapse `connection.config.auto_approve_actions` (string[]) and
4
+ -- `connection.config.require_approval_actions` (string[]) into a single
5
+ -- `action_modes` (Record<string, 'disabled' | 'approval' | 'auto'>) map.
6
+ --
7
+ -- The old two-array model couldn't express "agent must not call this op
8
+ -- at all" — every action the connector defined was always reachable, the
9
+ -- arrays only flipped approval prompts. The new map adds 'disabled' as the
10
+ -- third state and gives every op an explicit user-chosen mode.
11
+ --
12
+ -- Backfill rule, per row, for every op listed in either array:
13
+ -- op in auto_approve_actions → action_modes[op] = 'auto'
14
+ -- op in require_approval_actions → action_modes[op] = 'approval'
15
+ -- When an op appears in both, 'approval' wins (it's the stricter signal:
16
+ -- the user explicitly opted in to seeing an approval prompt).
17
+ --
18
+ -- Ops the user never touched are not stored in action_modes; the server
19
+ -- falls back to the connector's per-op `requires_approval` default at read
20
+ -- time, which preserves today's "all on" behavior.
21
+ --
22
+ -- We drop the two old keys in the same statement so the new state is the
23
+ -- only state on disk after migration.
24
+
25
+ UPDATE public.connections
26
+ SET config = (
27
+ COALESCE(config, '{}'::jsonb)
28
+ - 'auto_approve_actions'
29
+ - 'require_approval_actions'
30
+ )
31
+ || jsonb_build_object(
32
+ 'action_modes',
33
+ COALESCE(
34
+ (
35
+ -- 'approval' wins over 'auto' when an op appears in both arrays
36
+ -- (MIN('approval', 'auto') = 'approval' lexicographically).
37
+ SELECT jsonb_object_agg(op_key, mode)
38
+ FROM (
39
+ SELECT op_key, MIN(mode) AS mode
40
+ FROM (
41
+ SELECT op_key, 'approval'::text AS mode
42
+ FROM jsonb_array_elements_text(
43
+ CASE
44
+ WHEN jsonb_typeof(config->'require_approval_actions') = 'array'
45
+ THEN config->'require_approval_actions'
46
+ ELSE '[]'::jsonb
47
+ END
48
+ ) AS op_key
49
+ UNION ALL
50
+ SELECT op_key, 'auto'::text AS mode
51
+ FROM jsonb_array_elements_text(
52
+ CASE
53
+ WHEN jsonb_typeof(config->'auto_approve_actions') = 'array'
54
+ THEN config->'auto_approve_actions'
55
+ ELSE '[]'::jsonb
56
+ END
57
+ ) AS op_key
58
+ ) all_modes
59
+ GROUP BY op_key
60
+ ) collapsed
61
+ ),
62
+ '{}'::jsonb
63
+ )
64
+ )
65
+ WHERE config IS NOT NULL
66
+ AND (
67
+ config ? 'auto_approve_actions'
68
+ OR config ? 'require_approval_actions'
69
+ );
70
+
71
+ -- migrate:down
72
+
73
+ -- Reverse the collapse: split action_modes back into the two arrays.
74
+ -- 'auto' → auto_approve_actions
75
+ -- 'approval' → require_approval_actions
76
+ -- 'disabled' has no pre-refactor equivalent and is silently dropped on
77
+ -- downgrade — the agent will see the op again as if no override existed.
78
+ UPDATE public.connections
79
+ SET config = (
80
+ COALESCE(config, '{}'::jsonb) - 'action_modes'
81
+ )
82
+ || jsonb_build_object(
83
+ 'auto_approve_actions',
84
+ COALESCE(
85
+ (
86
+ SELECT jsonb_agg(key)
87
+ FROM jsonb_each_text(config->'action_modes')
88
+ WHERE value = 'auto'
89
+ ),
90
+ '[]'::jsonb
91
+ ),
92
+ 'require_approval_actions',
93
+ COALESCE(
94
+ (
95
+ SELECT jsonb_agg(key)
96
+ FROM jsonb_each_text(config->'action_modes')
97
+ WHERE value = 'approval'
98
+ ),
99
+ '[]'::jsonb
100
+ )
101
+ )
102
+ WHERE config IS NOT NULL
103
+ AND jsonb_typeof(config->'action_modes') = 'object';
@@ -0,0 +1,32 @@
1
+ -- migrate:up
2
+ -- Relax the device-binding XOR for browser_session profiles to allow
3
+ -- mirror mode, where neither user_data_dir nor cdp_url is set on the
4
+ -- row (the source profile dir lives in auth_data.source_profile_dir).
5
+ -- Keep the mutual exclusion of the two columns so they can't be set
6
+ -- together; application validation enforces "exactly one of mirror /
7
+ -- cdp / legacy" per row.
8
+
9
+ ALTER TABLE auth_profiles
10
+ DROP CONSTRAINT IF EXISTS auth_profiles_device_browser_path_xor;
11
+
12
+ ALTER TABLE auth_profiles
13
+ ADD CONSTRAINT auth_profiles_device_browser_path_mutex
14
+ CHECK (
15
+ device_worker_id IS NULL
16
+ OR profile_kind <> 'browser_session'
17
+ OR user_data_dir IS NULL
18
+ OR cdp_url IS NULL
19
+ );
20
+
21
+ -- migrate:down
22
+ ALTER TABLE auth_profiles
23
+ DROP CONSTRAINT IF EXISTS auth_profiles_device_browser_path_mutex;
24
+
25
+ ALTER TABLE auth_profiles
26
+ ADD CONSTRAINT auth_profiles_device_browser_path_xor
27
+ CHECK (
28
+ device_worker_id IS NULL
29
+ OR profile_kind <> 'browser_session'
30
+ OR ((user_data_dir IS NOT NULL) AND (cdp_url IS NULL))
31
+ OR ((user_data_dir IS NULL) AND (cdp_url IS NOT NULL))
32
+ );