@lobu/cli 7.0.0 → 7.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/_lib/apply/apply-cmd.d.ts.map +1 -1
- package/dist/commands/_lib/apply/apply-cmd.js +160 -12
- package/dist/commands/_lib/apply/apply-cmd.js.map +1 -1
- package/dist/commands/_lib/apply/client.d.ts +106 -0
- package/dist/commands/_lib/apply/client.d.ts.map +1 -1
- package/dist/commands/_lib/apply/client.js +163 -2
- package/dist/commands/_lib/apply/client.js.map +1 -1
- package/dist/commands/_lib/apply/desired-state.d.ts +53 -0
- package/dist/commands/_lib/apply/desired-state.d.ts.map +1 -1
- package/dist/commands/_lib/apply/desired-state.js +182 -5
- package/dist/commands/_lib/apply/desired-state.js.map +1 -1
- package/dist/commands/_lib/apply/diff.d.ts +12 -1
- package/dist/commands/_lib/apply/diff.d.ts.map +1 -1
- package/dist/commands/_lib/apply/diff.js +106 -7
- package/dist/commands/_lib/apply/diff.js.map +1 -1
- package/dist/commands/_lib/connector-loader.d.ts +3 -0
- package/dist/commands/_lib/connector-loader.d.ts.map +1 -0
- package/dist/commands/_lib/connector-loader.js +129 -0
- package/dist/commands/_lib/connector-loader.js.map +1 -0
- package/dist/commands/_lib/connector-run-cmd.d.ts +35 -0
- package/dist/commands/_lib/connector-run-cmd.d.ts.map +1 -0
- package/dist/commands/_lib/connector-run-cmd.js +351 -0
- package/dist/commands/_lib/connector-run-cmd.js.map +1 -0
- package/dist/commands/_lib/export/export-cmd.d.ts +35 -0
- package/dist/commands/_lib/export/export-cmd.d.ts.map +1 -0
- package/dist/commands/_lib/export/export-cmd.js +329 -0
- package/dist/commands/_lib/export/export-cmd.js.map +1 -0
- package/dist/commands/agent.d.ts.map +1 -1
- package/dist/commands/agent.js +11 -14
- package/dist/commands/agent.js.map +1 -1
- package/dist/commands/chat.d.ts.map +1 -1
- package/dist/commands/chat.js +19 -5
- package/dist/commands/chat.js.map +1 -1
- package/dist/commands/connector.d.ts +3 -0
- package/dist/commands/connector.d.ts.map +1 -0
- package/dist/commands/connector.js +5 -0
- package/dist/commands/connector.js.map +1 -0
- package/dist/commands/dev.d.ts +15 -0
- package/dist/commands/dev.d.ts.map +1 -1
- package/dist/commands/dev.js +156 -4
- package/dist/commands/dev.js.map +1 -1
- package/dist/commands/doctor.d.ts.map +1 -1
- package/dist/commands/doctor.js +2 -3
- package/dist/commands/doctor.js.map +1 -1
- package/dist/commands/eval.d.ts.map +1 -1
- package/dist/commands/eval.js +12 -13
- package/dist/commands/eval.js.map +1 -1
- package/dist/commands/init.d.ts.map +1 -1
- package/dist/commands/init.js +5 -1
- package/dist/commands/init.js.map +1 -1
- package/dist/commands/login.d.ts.map +1 -1
- package/dist/commands/login.js +22 -16
- package/dist/commands/login.js.map +1 -1
- package/dist/commands/memory/_lib/browser-auth-cmd.d.ts.map +1 -1
- package/dist/commands/memory/_lib/browser-auth-cmd.js +15 -144
- package/dist/commands/memory/_lib/browser-auth-cmd.js.map +1 -1
- package/dist/commands/token.d.ts.map +1 -1
- package/dist/commands/token.js +1 -4
- package/dist/commands/token.js.map +1 -1
- package/dist/commands/validate.d.ts.map +1 -1
- package/dist/commands/validate.js +4 -13
- package/dist/commands/validate.js.map +1 -1
- package/dist/config/loader.js +2 -2
- package/dist/config/loader.js.map +1 -1
- package/dist/connectors/README.md +0 -1
- package/dist/connectors/apple_photos.ts +178 -0
- package/dist/connectors/browser/evaluate.ts +120 -0
- package/dist/connectors/browser/fill_form.ts +107 -0
- package/dist/connectors/browser/page_text.ts +108 -0
- package/dist/connectors/browser-scraper-utils.ts +76 -0
- package/dist/connectors/chrome_tabs.ts +74 -0
- package/dist/connectors/github.ts +1 -0
- package/dist/connectors/google_calendar.ts +14 -2
- package/dist/connectors/google_play.ts +22 -2
- package/dist/connectors/hackernews.ts +37 -2
- package/dist/connectors/index.ts +9 -1
- package/dist/connectors/reddit.ts +1 -0
- package/dist/connectors/revolut.ts +10 -13
- package/dist/connectors/rss.ts +33 -8
- package/dist/connectors/trustpilot.ts +31 -20
- package/dist/connectors/website.ts +7 -68
- package/dist/connectors/whatsapp.ts +12 -21
- package/dist/db/migrations/20260514130000_connection_action_modes.sql +103 -0
- package/dist/db/migrations/20260514160000_auth_profiles_mirror_mode.sql +32 -0
- package/dist/db/migrations/20260515120000_agents_per_org_pk.sql +66 -0
- package/dist/db/migrations/20260515150000_geo_enrichment.sql +208 -0
- package/dist/db/migrations/20260515160000_drop_agents_org_id_unique.sql +24 -0
- package/dist/db/migrations/20260515170000_auth_profiles_default_for_connector.sql +23 -0
- package/dist/db/migrations/20260516120000_agents_per_org_pk_swap.sql +125 -0
- package/dist/db/migrations/20260516200000_events_search_tsv.sql +134 -0
- package/dist/db/migrations/20260516200100_events_lifecycle_changes_index.sql +25 -0
- package/dist/db/migrations/20260517010000_drop_unused_indexes.sql +49 -0
- package/dist/db/migrations/20260517020000_softdelete_orphan_feeds.sql +56 -0
- package/dist/db/migrations/20260517030000_pat_worker_id_binding.sql +27 -0
- package/dist/db/migrations/20260517040000_archive_orphan_watchers.sql +30 -0
- package/dist/db/migrations/20260517050000_watcher_agent_id_not_null.sql +34 -0
- package/dist/db/migrations/20260517060000_watcher_schema_additions.sql +78 -0
- package/dist/db/migrations/20260517150000_goals_primitive.sql +55 -0
- package/dist/db/migrations/20260517160000_drop_goals_primitive.sql +45 -0
- package/dist/db/migrations/20260518000000_pending_interactions.sql +49 -0
- package/dist/db/migrations/20260518010000_runs_heartbeat_reaper_index.sql +22 -0
- package/dist/eval/client.d.ts.map +1 -1
- package/dist/eval/client.js +11 -0
- package/dist/eval/client.js.map +1 -1
- package/dist/eval/grader.js +2 -1
- package/dist/eval/grader.js.map +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +47 -0
- package/dist/index.js.map +1 -1
- package/dist/internal/context.d.ts +9 -0
- package/dist/internal/context.d.ts.map +1 -1
- package/dist/internal/context.js +41 -6
- package/dist/internal/context.js.map +1 -1
- package/dist/internal/credentials.d.ts +5 -0
- package/dist/internal/credentials.d.ts.map +1 -1
- package/dist/internal/credentials.js +75 -1
- package/dist/internal/credentials.js.map +1 -1
- package/dist/internal/index.d.ts +1 -1
- package/dist/internal/index.d.ts.map +1 -1
- package/dist/internal/index.js +1 -1
- package/dist/internal/index.js.map +1 -1
- package/dist/internal/local-env.d.ts.map +1 -1
- package/dist/internal/local-env.js +9 -2
- package/dist/internal/local-env.js.map +1 -1
- package/dist/server.bundle.mjs +8990 -5689
- package/dist/start-local.bundle.mjs +7029 -3402
- package/package.json +7 -5
- package/dist/connectors/google_photos.ts +0 -776
|
@@ -255,7 +255,14 @@ export default class GoogleCalendarConnector extends ConnectorRuntime {
|
|
|
255
255
|
let pageToken: string | undefined;
|
|
256
256
|
let nextSyncToken: string | undefined;
|
|
257
257
|
|
|
258
|
-
|
|
258
|
+
// Safety bound — at 250 events/page, 200 pages = 50k events, more than
|
|
259
|
+
// any reasonable calendar window. Stops a runaway loop if the upstream
|
|
260
|
+
// ever returns a self-referential page token.
|
|
261
|
+
const MAX_PAGES = 200;
|
|
262
|
+
let pages = 0;
|
|
263
|
+
|
|
264
|
+
while (pages < MAX_PAGES) {
|
|
265
|
+
pages++;
|
|
259
266
|
// Always request a full page — `maxResults` is a soft cap on *stored*
|
|
260
267
|
// events, not a reason to shrink the request size (shrinking to 1 once the
|
|
261
268
|
// cap is hit would crawl a busy calendar one event per round-trip).
|
|
@@ -350,7 +357,12 @@ export default class GoogleCalendarConnector extends ConnectorRuntime {
|
|
|
350
357
|
let pageToken: string | undefined;
|
|
351
358
|
let nextSyncToken: string | undefined;
|
|
352
359
|
|
|
353
|
-
|
|
360
|
+
// Same hard ceiling as the full-sync path — defensive only.
|
|
361
|
+
const MAX_PAGES = 200;
|
|
362
|
+
let pages = 0;
|
|
363
|
+
|
|
364
|
+
while (pages < MAX_PAGES) {
|
|
365
|
+
pages++;
|
|
354
366
|
const params = new URLSearchParams({
|
|
355
367
|
maxResults: String(Math.max(1, Math.min(250, maxResults - events.length))),
|
|
356
368
|
syncToken,
|
|
@@ -136,6 +136,12 @@ async function fetchReviewsPage(
|
|
|
136
136
|
|
|
137
137
|
if (!res.ok) {
|
|
138
138
|
if (res.status === 404) throw new Error('App not found (404)');
|
|
139
|
+
if (res.status === 429) {
|
|
140
|
+
const retryAfter = res.headers.get('Retry-After');
|
|
141
|
+
throw new Error(
|
|
142
|
+
`Google Play rate limit (429). Retry after ${retryAfter ?? 'unknown'} seconds.`
|
|
143
|
+
);
|
|
144
|
+
}
|
|
139
145
|
throw new Error(`Google Play request failed: ${res.status} ${res.statusText}`);
|
|
140
146
|
}
|
|
141
147
|
|
|
@@ -143,14 +149,28 @@ async function fetchReviewsPage(
|
|
|
143
149
|
|
|
144
150
|
// Response starts with ")]}'" (security prefix), then a newline, then JSON.
|
|
145
151
|
// The library skips the first 5 characters.
|
|
146
|
-
|
|
152
|
+
// Wrap parse in try/catch — Google sometimes returns an HTML interstitial
|
|
153
|
+
// (captcha / geo-block / maintenance) with status 200, which would bubble up
|
|
154
|
+
// as an unhelpful SyntaxError otherwise.
|
|
155
|
+
let outer: any;
|
|
156
|
+
try {
|
|
157
|
+
outer = JSON.parse(text.substring(5));
|
|
158
|
+
} catch {
|
|
159
|
+
const preview = text.substring(0, 120).replace(/\s+/g, ' ');
|
|
160
|
+
throw new Error(`Google Play returned non-JSON response: ${preview}`);
|
|
161
|
+
}
|
|
147
162
|
const innerJson: string | null = outer?.[0]?.[2];
|
|
148
163
|
|
|
149
164
|
if (innerJson === null || innerJson === undefined) {
|
|
150
165
|
return { reviews: [], nextToken: null };
|
|
151
166
|
}
|
|
152
167
|
|
|
153
|
-
|
|
168
|
+
let data: any;
|
|
169
|
+
try {
|
|
170
|
+
data = JSON.parse(innerJson);
|
|
171
|
+
} catch {
|
|
172
|
+
throw new Error('Google Play returned malformed inner JSON payload');
|
|
173
|
+
}
|
|
154
174
|
return {
|
|
155
175
|
reviews: extractReviews(data, appId),
|
|
156
176
|
nextToken: extractPaginationToken(data),
|
|
@@ -16,6 +16,7 @@ import {
|
|
|
16
16
|
type SyncContext,
|
|
17
17
|
type SyncResult,
|
|
18
18
|
} from '@lobu/connector-sdk';
|
|
19
|
+
import { validatePublicUrl } from './browser-scraper-utils.ts';
|
|
19
20
|
|
|
20
21
|
// ---------------------------------------------------------------------------
|
|
21
22
|
// Algolia HN API types
|
|
@@ -261,11 +262,36 @@ export default class HackerNewsConnector extends ConnectorRuntime {
|
|
|
261
262
|
`&numericFilters=${encodeURIComponent(`created_at_i>${lookbackTimestamp}`)}`;
|
|
262
263
|
|
|
263
264
|
const response = await fetch(url);
|
|
265
|
+
|
|
266
|
+
// Honor Algolia's rate-limit response so we don't hammer them and turn
|
|
267
|
+
// a transient 429 into "Unexpected token < in JSON" when the next call
|
|
268
|
+
// returns an HTML error page.
|
|
269
|
+
if (response.status === 429) {
|
|
270
|
+
const retryAfter = response.headers.get('Retry-After');
|
|
271
|
+
const waitMs = retryAfter ? Math.min(60_000, Math.max(1, Number(retryAfter)) * 1000) : 5000;
|
|
272
|
+
await this.sleep(Number.isFinite(waitMs) ? waitMs : 5000);
|
|
273
|
+
continue;
|
|
274
|
+
}
|
|
275
|
+
|
|
264
276
|
if (!response.ok) {
|
|
265
|
-
|
|
277
|
+
const text = await response.text().catch(() => '');
|
|
278
|
+
throw new Error(`Algolia API error (${response.status}): ${text}`);
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// Algolia normally returns JSON, but proxies/captive portals occasionally
|
|
282
|
+
// return HTML. Surface a useful error instead of a bare SyntaxError that
|
|
283
|
+
// makes the connector look broken when the upstream is at fault.
|
|
284
|
+
let data: AlgoliaResponse;
|
|
285
|
+
try {
|
|
286
|
+
data = (await response.json()) as AlgoliaResponse;
|
|
287
|
+
} catch (err) {
|
|
288
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
289
|
+
throw new Error(`Algolia API returned non-JSON response: ${message}`);
|
|
266
290
|
}
|
|
267
291
|
|
|
268
|
-
|
|
292
|
+
if (!data || !Array.isArray(data.hits)) {
|
|
293
|
+
throw new Error('Algolia API returned an unexpected response shape');
|
|
294
|
+
}
|
|
269
295
|
|
|
270
296
|
for (const hit of data.hits) {
|
|
271
297
|
if (contentType === 'comment') {
|
|
@@ -404,6 +430,15 @@ export default class HackerNewsConnector extends ConnectorRuntime {
|
|
|
404
430
|
|
|
405
431
|
private async fetchExternalContent(url: string): Promise<string | null> {
|
|
406
432
|
try {
|
|
433
|
+
// SSRF guard — `url` is supplied by whoever submitted the HN story and
|
|
434
|
+
// is therefore attacker-controllable. Refuse to fetch private/internal
|
|
435
|
+
// addresses (loopback, 169.254.169.254 cloud metadata, RFC1918, etc.).
|
|
436
|
+
try {
|
|
437
|
+
validatePublicUrl(url);
|
|
438
|
+
} catch {
|
|
439
|
+
return null;
|
|
440
|
+
}
|
|
441
|
+
|
|
407
442
|
const controller = new AbortController();
|
|
408
443
|
const timeoutId = setTimeout(() => controller.abort(), this.CONTENT_FETCH_TIMEOUT);
|
|
409
444
|
|
package/dist/connectors/index.ts
CHANGED
|
@@ -1,15 +1,23 @@
|
|
|
1
1
|
export * from './apple_health.ts';
|
|
2
|
+
export * from './apple_photos.ts';
|
|
2
3
|
export * from './apple_screen_time.ts';
|
|
3
4
|
export * from './local_directory.ts';
|
|
4
5
|
export * from './browser-scraper-utils.ts';
|
|
6
|
+
// Browser primitives — connector definitions whose executors live in the
|
|
7
|
+
// Owletto for Chrome extension (apps/chrome/executor.js). Kept under
|
|
8
|
+
// browser/ so they're structurally distinct from third-party service
|
|
9
|
+
// connectors (linkedin, revolut, github, etc.).
|
|
10
|
+
export * from './browser/evaluate.ts';
|
|
11
|
+
export * from './browser/fill_form.ts';
|
|
12
|
+
export * from './browser/page_text.ts';
|
|
5
13
|
export * from './capterra.ts';
|
|
14
|
+
export * from './chrome_tabs.ts';
|
|
6
15
|
export * from './g2.ts';
|
|
7
16
|
export * from './github.ts';
|
|
8
17
|
export * from './glassdoor.ts';
|
|
9
18
|
export * from './gmaps.ts';
|
|
10
19
|
export * from './google_calendar.ts';
|
|
11
20
|
export * from './google_gmail.ts';
|
|
12
|
-
export * from './google_photos.ts';
|
|
13
21
|
export * from './google_play.ts';
|
|
14
22
|
export * from './hackernews.ts';
|
|
15
23
|
export * from './ios_appstore.ts';
|
|
@@ -142,12 +142,9 @@ function extractAmountAndCurrency(
|
|
|
142
142
|
const amt = record.amount;
|
|
143
143
|
if (amt && typeof amt === "object") {
|
|
144
144
|
const obj = amt as Record<string, unknown>;
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
: typeof obj.amount === "number"
|
|
149
|
-
? obj.amount
|
|
150
|
-
: null;
|
|
145
|
+
let value: number | null = null;
|
|
146
|
+
if (typeof obj.value === "number") value = obj.value;
|
|
147
|
+
else if (typeof obj.amount === "number") value = obj.amount;
|
|
151
148
|
const currency = typeof obj.currency === "string" ? obj.currency : null;
|
|
152
149
|
if (value !== null && currency) return { amount: value, currency };
|
|
153
150
|
}
|
|
@@ -200,13 +197,13 @@ function extractBalance(
|
|
|
200
197
|
record: Record<string, unknown>,
|
|
201
198
|
currency: string,
|
|
202
199
|
): number | undefined {
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
200
|
+
let raw: unknown;
|
|
201
|
+
if (typeof record.balance === "number") {
|
|
202
|
+
raw = record.balance;
|
|
203
|
+
} else if (record.balance && typeof record.balance === "object") {
|
|
204
|
+
const obj = record.balance as Record<string, unknown>;
|
|
205
|
+
raw = obj.value ?? obj.amount;
|
|
206
|
+
}
|
|
210
207
|
if (typeof raw !== "number" || !Number.isFinite(raw)) return undefined;
|
|
211
208
|
return Number.isInteger(raw) ? minorUnitsToMajor(raw, currency) : raw;
|
|
212
209
|
}
|
package/dist/connectors/rss.ts
CHANGED
|
@@ -15,6 +15,7 @@ import {
|
|
|
15
15
|
type SyncContext,
|
|
16
16
|
type SyncResult,
|
|
17
17
|
} from '@lobu/connector-sdk';
|
|
18
|
+
import { validatePublicUrl } from './browser-scraper-utils.ts';
|
|
18
19
|
|
|
19
20
|
// ---------------------------------------------------------------------------
|
|
20
21
|
// Types
|
|
@@ -211,6 +212,11 @@ export default class RSSConnector extends ConnectorRuntime {
|
|
|
211
212
|
// -------------------------------------------------------------------------
|
|
212
213
|
|
|
213
214
|
private async fetchAndParseFeed(feedUrl: string, maxItems: number): Promise<RSSFeedItem[]> {
|
|
215
|
+
// SSRF guard at the trust boundary. `feed_urls` is operator/user supplied
|
|
216
|
+
// via connector config and must not be allowed to target loopback, RFC1918,
|
|
217
|
+
// or cloud-metadata IPs from the gateway process.
|
|
218
|
+
validatePublicUrl(feedUrl);
|
|
219
|
+
|
|
214
220
|
const controller = new AbortController();
|
|
215
221
|
const timeoutId = setTimeout(() => controller.abort(), this.FETCH_TIMEOUT_MS);
|
|
216
222
|
|
|
@@ -222,18 +228,13 @@ export default class RSSConnector extends ConnectorRuntime {
|
|
|
222
228
|
Accept: 'application/rss+xml, application/atom+xml, application/xml, text/xml, */*',
|
|
223
229
|
},
|
|
224
230
|
});
|
|
225
|
-
|
|
226
|
-
clearTimeout(timeoutId);
|
|
227
|
-
|
|
228
231
|
if (!response.ok) {
|
|
229
232
|
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
230
233
|
}
|
|
231
|
-
|
|
232
234
|
const xml = await response.text();
|
|
233
235
|
return this.parseXml(xml, feedUrl, maxItems);
|
|
234
|
-
}
|
|
236
|
+
} finally {
|
|
235
237
|
clearTimeout(timeoutId);
|
|
236
|
-
throw err;
|
|
237
238
|
}
|
|
238
239
|
}
|
|
239
240
|
|
|
@@ -413,8 +414,32 @@ export default class RSSConnector extends ConnectorRuntime {
|
|
|
413
414
|
case '#39':
|
|
414
415
|
return "'";
|
|
415
416
|
default:
|
|
416
|
-
|
|
417
|
-
|
|
417
|
+
// Use fromCodePoint, not fromCharCode — astral-plane characters
|
|
418
|
+
// (emoji, CJK extension B+, etc.) have code points > 0xFFFF which
|
|
419
|
+
// fromCharCode silently truncates, producing mojibake in feed
|
|
420
|
+
// titles. Guard the range so a malformed entity doesn't throw.
|
|
421
|
+
if (hex) {
|
|
422
|
+
const cp = parseInt(hex, 16);
|
|
423
|
+
if (Number.isFinite(cp) && cp >= 0 && cp <= 0x10ffff) {
|
|
424
|
+
try {
|
|
425
|
+
return String.fromCodePoint(cp);
|
|
426
|
+
} catch {
|
|
427
|
+
return _match;
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
return _match;
|
|
431
|
+
}
|
|
432
|
+
if (decimal) {
|
|
433
|
+
const cp = parseInt(decimal, 10);
|
|
434
|
+
if (Number.isFinite(cp) && cp >= 0 && cp <= 0x10ffff) {
|
|
435
|
+
try {
|
|
436
|
+
return String.fromCodePoint(cp);
|
|
437
|
+
} catch {
|
|
438
|
+
return _match;
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
return _match;
|
|
442
|
+
}
|
|
418
443
|
return _match;
|
|
419
444
|
}
|
|
420
445
|
}
|
|
@@ -98,7 +98,11 @@ export default class TrustpilotConnector extends ConnectorRuntime {
|
|
|
98
98
|
throw new Error('Either business_url or business_name is required');
|
|
99
99
|
}
|
|
100
100
|
|
|
101
|
-
|
|
101
|
+
// encodeURIComponent the user-supplied businessName so a value like
|
|
102
|
+
// "../search?foo=bar" can't escape the /review/ path on trustpilot.com.
|
|
103
|
+
const baseUrl =
|
|
104
|
+
businessUrl ||
|
|
105
|
+
`https://www.trustpilot.com/review/${encodeURIComponent(businessName ?? '')}`;
|
|
102
106
|
validateUrlDomain(baseUrl, 'trustpilot.com');
|
|
103
107
|
|
|
104
108
|
const userDataDir = getBrowserUserDataDir(ctx.sessionState);
|
|
@@ -161,27 +165,34 @@ export default class TrustpilotConnector extends ConnectorRuntime {
|
|
|
161
165
|
// Filter reviews with meaningful content (more than 10 chars)
|
|
162
166
|
const reviews: TrustpilotReview[] = rawReviews.filter((r) => r.text && r.text.length > 10);
|
|
163
167
|
|
|
164
|
-
// Transform to EventEnvelope format
|
|
165
|
-
|
|
168
|
+
// Transform to EventEnvelope format. Drop rows whose `date` attribute
|
|
169
|
+
// was missing/invalid in the DOM — `new Date("")` yields an Invalid
|
|
170
|
+
// Date, which downstream sorting/checkpointing then can't compare, and
|
|
171
|
+
// an empty `date` made `origin_id` collide on `-<author>` across rows.
|
|
172
|
+
const events: EventEnvelope[] = reviews.flatMap((review) => {
|
|
166
173
|
const content = review.title ? `${review.title}\n\n${review.text}` : review.text;
|
|
167
|
-
|
|
168
|
-
return
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
174
|
+
const parsedDate = review.date ? new Date(review.date) : null;
|
|
175
|
+
if (!parsedDate || Number.isNaN(parsedDate.getTime())) return [];
|
|
176
|
+
|
|
177
|
+
return [
|
|
178
|
+
{
|
|
179
|
+
origin_id: `${review.date}-${review.author}`,
|
|
180
|
+
payload_text: content,
|
|
181
|
+
author_name: review.author,
|
|
182
|
+
occurred_at: parsedDate,
|
|
183
|
+
origin_type: 'review',
|
|
184
|
+
score: calculateEngagementScore('trustpilot', {
|
|
185
|
+
rating: review.rating,
|
|
186
|
+
helpful_count: 0,
|
|
187
|
+
}),
|
|
188
|
+
source_url: baseUrl,
|
|
189
|
+
metadata: {
|
|
190
|
+
rating: review.rating,
|
|
191
|
+
helpful_count: 0,
|
|
192
|
+
title: review.title,
|
|
193
|
+
},
|
|
183
194
|
},
|
|
184
|
-
|
|
195
|
+
];
|
|
185
196
|
});
|
|
186
197
|
|
|
187
198
|
return {
|
|
@@ -20,6 +20,7 @@ import {
|
|
|
20
20
|
type SyncResult,
|
|
21
21
|
} from '@lobu/connector-sdk';
|
|
22
22
|
import type { Page } from 'playwright';
|
|
23
|
+
import { validatePublicUrl } from './browser-scraper-utils.ts';
|
|
23
24
|
|
|
24
25
|
interface PageSection {
|
|
25
26
|
heading: string;
|
|
@@ -50,72 +51,6 @@ function shouldSkipCookieBannerText(text: string): boolean {
|
|
|
50
51
|
return countPatternMatches(normalized, COOKIE_BANNER_PATTERNS) >= 3;
|
|
51
52
|
}
|
|
52
53
|
|
|
53
|
-
/**
|
|
54
|
-
* Validates a URL is safe for server-side fetching.
|
|
55
|
-
* Blocks private/internal network addresses to prevent SSRF attacks.
|
|
56
|
-
*/
|
|
57
|
-
function validatePublicUrl(url: string): void {
|
|
58
|
-
let parsed: URL;
|
|
59
|
-
try {
|
|
60
|
-
parsed = new URL(url);
|
|
61
|
-
} catch {
|
|
62
|
-
throw new Error(`Invalid URL: ${url}`);
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
if (parsed.protocol !== 'https:' && parsed.protocol !== 'http:') {
|
|
66
|
-
throw new Error(`URL must use http: or https: protocol, got ${parsed.protocol}`);
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
const hostname = parsed.hostname.toLowerCase();
|
|
70
|
-
|
|
71
|
-
// Block localhost variants
|
|
72
|
-
if (hostname === 'localhost' || hostname === '[::1]' || hostname.endsWith('.localhost')) {
|
|
73
|
-
throw new Error(`URL must not point to localhost: ${hostname}`);
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
// Block private/internal IP ranges
|
|
77
|
-
// IPv4 patterns: 127.x.x.x, 10.x.x.x, 192.168.x.x, 172.16-31.x.x, 169.254.x.x, 0.x.x.x
|
|
78
|
-
const ipv4Match = hostname.match(/^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/);
|
|
79
|
-
if (ipv4Match) {
|
|
80
|
-
const [, a, b] = ipv4Match.map(Number);
|
|
81
|
-
if (
|
|
82
|
-
a === 127 || // 127.0.0.0/8 loopback
|
|
83
|
-
a === 10 || // 10.0.0.0/8 private
|
|
84
|
-
(a === 172 && b >= 16 && b <= 31) || // 172.16.0.0/12 private
|
|
85
|
-
(a === 192 && b === 168) || // 192.168.0.0/16 private
|
|
86
|
-
(a === 169 && b === 254) || // 169.254.0.0/16 link-local
|
|
87
|
-
a === 0 // 0.0.0.0/8
|
|
88
|
-
) {
|
|
89
|
-
throw new Error(`URL must not point to a private/internal IP address: ${hostname}`);
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
// Block IPv6 private ranges (bracketed notation in URLs)
|
|
94
|
-
if (hostname.startsWith('[')) {
|
|
95
|
-
const ipv6 = hostname.slice(1, -1).toLowerCase();
|
|
96
|
-
if (
|
|
97
|
-
ipv6 === '::1' ||
|
|
98
|
-
ipv6.startsWith('fe80:') || // link-local
|
|
99
|
-
ipv6.startsWith('fc') || // unique local (fc00::/7)
|
|
100
|
-
ipv6.startsWith('fd') || // unique local (fc00::/7)
|
|
101
|
-
ipv6 === '::' || // unspecified
|
|
102
|
-
ipv6.startsWith('::ffff:') // IPv4-mapped IPv6
|
|
103
|
-
) {
|
|
104
|
-
throw new Error(`URL must not point to a private/internal IPv6 address: ${hostname}`);
|
|
105
|
-
}
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
// Block common internal hostnames
|
|
109
|
-
if (
|
|
110
|
-
hostname.endsWith('.internal') ||
|
|
111
|
-
hostname.endsWith('.local') ||
|
|
112
|
-
hostname.endsWith('.corp') ||
|
|
113
|
-
hostname.endsWith('.lan')
|
|
114
|
-
) {
|
|
115
|
-
throw new Error(`URL must not point to an internal hostname: ${hostname}`);
|
|
116
|
-
}
|
|
117
|
-
}
|
|
118
|
-
|
|
119
54
|
export default class WebsiteConnector extends ConnectorRuntime {
|
|
120
55
|
readonly definition: ConnectorDefinition = {
|
|
121
56
|
key: 'website',
|
|
@@ -457,7 +392,11 @@ export default class WebsiteConnector extends ConnectorRuntime {
|
|
|
457
392
|
return result.join('\n');
|
|
458
393
|
}
|
|
459
394
|
|
|
460
|
-
private async fetchSitemap(sitemapUrl: string): Promise<string[]> {
|
|
395
|
+
private async fetchSitemap(sitemapUrl: string, depth = 0): Promise<string[]> {
|
|
396
|
+
// Sitemap-index recursion bound — caps fan-out from a remote sitemap that
|
|
397
|
+
// links to a sitemap that links to a sitemap... untrusted XML must not
|
|
398
|
+
// drive unbounded outbound traffic.
|
|
399
|
+
if (depth > 2) return [];
|
|
461
400
|
const response = await fetch(sitemapUrl, {
|
|
462
401
|
headers: { 'User-Agent': 'Mozilla/5.0 (compatible; LobuBot/1.0)' },
|
|
463
402
|
});
|
|
@@ -493,7 +432,7 @@ export default class WebsiteConnector extends ConnectorRuntime {
|
|
|
493
432
|
}
|
|
494
433
|
for (const childUrl of childSitemaps.slice(0, 5)) {
|
|
495
434
|
validatePublicUrl(childUrl);
|
|
496
|
-
const childUrls = await this.fetchSitemap(childUrl);
|
|
435
|
+
const childUrls = await this.fetchSitemap(childUrl, depth + 1);
|
|
497
436
|
urls.push(...childUrls);
|
|
498
437
|
}
|
|
499
438
|
}
|
|
@@ -424,11 +424,7 @@ export default class WhatsAppConnector extends ConnectorRuntime {
|
|
|
424
424
|
},
|
|
425
425
|
};
|
|
426
426
|
} catch (error) {
|
|
427
|
-
|
|
428
|
-
sock.end(undefined);
|
|
429
|
-
} catch {
|
|
430
|
-
/* ignore */
|
|
431
|
-
}
|
|
427
|
+
safeEnd(sock);
|
|
432
428
|
throw error;
|
|
433
429
|
}
|
|
434
430
|
}
|
|
@@ -478,11 +474,7 @@ async function attemptPairing(
|
|
|
478
474
|
sock.ev.off('connection.update', handler);
|
|
479
475
|
sock.ev.off('creds.update', credsListener);
|
|
480
476
|
ctx.signal.removeEventListener('abort', onAbort);
|
|
481
|
-
|
|
482
|
-
sock.end(undefined);
|
|
483
|
-
} catch {
|
|
484
|
-
/* ignore */
|
|
485
|
-
}
|
|
477
|
+
safeEnd(sock);
|
|
486
478
|
resolve(outcome);
|
|
487
479
|
};
|
|
488
480
|
|
|
@@ -592,11 +584,7 @@ async function drainHistory(
|
|
|
592
584
|
sock.ev.off('chats.upsert', chatsListener);
|
|
593
585
|
sock.ev.off('messaging-history.set', historyListener);
|
|
594
586
|
sock.ev.off('messages.upsert', messagesListener);
|
|
595
|
-
|
|
596
|
-
sock.end(undefined);
|
|
597
|
-
} catch {
|
|
598
|
-
/* ignore */
|
|
599
|
-
}
|
|
587
|
+
safeEnd(sock);
|
|
600
588
|
};
|
|
601
589
|
|
|
602
590
|
try {
|
|
@@ -829,6 +817,14 @@ function delay(ms: number): Promise<void> {
|
|
|
829
817
|
return new Promise((r) => setTimeout(r, ms));
|
|
830
818
|
}
|
|
831
819
|
|
|
820
|
+
function safeEnd(sock: ReturnType<typeof makeWASocket>): void {
|
|
821
|
+
try {
|
|
822
|
+
sock.end(undefined);
|
|
823
|
+
} catch {
|
|
824
|
+
/* ignore */
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
|
|
832
828
|
function waitForOpen(sock: ReturnType<typeof makeWASocket>, timeoutMs: number): Promise<boolean> {
|
|
833
829
|
return new Promise((resolve) => {
|
|
834
830
|
let newLogin = false;
|
|
@@ -963,12 +959,7 @@ export function toEvent(
|
|
|
963
959
|
const text = extractText(m.message);
|
|
964
960
|
if (!text) return null;
|
|
965
961
|
|
|
966
|
-
const tsRaw =
|
|
967
|
-
typeof m.messageTimestamp === 'number'
|
|
968
|
-
? m.messageTimestamp
|
|
969
|
-
: ((m.messageTimestamp as { low?: number; toNumber?: () => number } | null)?.toNumber?.() ??
|
|
970
|
-
(m.messageTimestamp as { low?: number } | null)?.low ??
|
|
971
|
-
0);
|
|
962
|
+
const tsRaw = extractTs(m);
|
|
972
963
|
if (!tsRaw) return null;
|
|
973
964
|
const occurredAt = new Date(tsRaw * 1000);
|
|
974
965
|
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
-- migrate:up
|
|
2
|
+
|
|
3
|
+
-- Collapse `connection.config.auto_approve_actions` (string[]) and
|
|
4
|
+
-- `connection.config.require_approval_actions` (string[]) into a single
|
|
5
|
+
-- `action_modes` (Record<string, 'disabled' | 'approval' | 'auto'>) map.
|
|
6
|
+
--
|
|
7
|
+
-- The old two-array model couldn't express "agent must not call this op
|
|
8
|
+
-- at all" — every action the connector defined was always reachable, the
|
|
9
|
+
-- arrays only flipped approval prompts. The new map adds 'disabled' as the
|
|
10
|
+
-- third state and gives every op an explicit user-chosen mode.
|
|
11
|
+
--
|
|
12
|
+
-- Backfill rule, per row, for every op listed in either array:
|
|
13
|
+
-- op in auto_approve_actions → action_modes[op] = 'auto'
|
|
14
|
+
-- op in require_approval_actions → action_modes[op] = 'approval'
|
|
15
|
+
-- When an op appears in both, 'approval' wins (it's the stricter signal:
|
|
16
|
+
-- the user explicitly opted in to seeing an approval prompt).
|
|
17
|
+
--
|
|
18
|
+
-- Ops the user never touched are not stored in action_modes; the server
|
|
19
|
+
-- falls back to the connector's per-op `requires_approval` default at read
|
|
20
|
+
-- time, which preserves today's "all on" behavior.
|
|
21
|
+
--
|
|
22
|
+
-- We drop the two old keys in the same statement so the new state is the
|
|
23
|
+
-- only state on disk after migration.
|
|
24
|
+
|
|
25
|
+
UPDATE public.connections
|
|
26
|
+
SET config = (
|
|
27
|
+
COALESCE(config, '{}'::jsonb)
|
|
28
|
+
- 'auto_approve_actions'
|
|
29
|
+
- 'require_approval_actions'
|
|
30
|
+
)
|
|
31
|
+
|| jsonb_build_object(
|
|
32
|
+
'action_modes',
|
|
33
|
+
COALESCE(
|
|
34
|
+
(
|
|
35
|
+
-- 'approval' wins over 'auto' when an op appears in both arrays
|
|
36
|
+
-- (MIN('approval', 'auto') = 'approval' lexicographically).
|
|
37
|
+
SELECT jsonb_object_agg(op_key, mode)
|
|
38
|
+
FROM (
|
|
39
|
+
SELECT op_key, MIN(mode) AS mode
|
|
40
|
+
FROM (
|
|
41
|
+
SELECT op_key, 'approval'::text AS mode
|
|
42
|
+
FROM jsonb_array_elements_text(
|
|
43
|
+
CASE
|
|
44
|
+
WHEN jsonb_typeof(config->'require_approval_actions') = 'array'
|
|
45
|
+
THEN config->'require_approval_actions'
|
|
46
|
+
ELSE '[]'::jsonb
|
|
47
|
+
END
|
|
48
|
+
) AS op_key
|
|
49
|
+
UNION ALL
|
|
50
|
+
SELECT op_key, 'auto'::text AS mode
|
|
51
|
+
FROM jsonb_array_elements_text(
|
|
52
|
+
CASE
|
|
53
|
+
WHEN jsonb_typeof(config->'auto_approve_actions') = 'array'
|
|
54
|
+
THEN config->'auto_approve_actions'
|
|
55
|
+
ELSE '[]'::jsonb
|
|
56
|
+
END
|
|
57
|
+
) AS op_key
|
|
58
|
+
) all_modes
|
|
59
|
+
GROUP BY op_key
|
|
60
|
+
) collapsed
|
|
61
|
+
),
|
|
62
|
+
'{}'::jsonb
|
|
63
|
+
)
|
|
64
|
+
)
|
|
65
|
+
WHERE config IS NOT NULL
|
|
66
|
+
AND (
|
|
67
|
+
config ? 'auto_approve_actions'
|
|
68
|
+
OR config ? 'require_approval_actions'
|
|
69
|
+
);
|
|
70
|
+
|
|
71
|
+
-- migrate:down
|
|
72
|
+
|
|
73
|
+
-- Reverse the collapse: split action_modes back into the two arrays.
|
|
74
|
+
-- 'auto' → auto_approve_actions
|
|
75
|
+
-- 'approval' → require_approval_actions
|
|
76
|
+
-- 'disabled' has no pre-refactor equivalent and is silently dropped on
|
|
77
|
+
-- downgrade — the agent will see the op again as if no override existed.
|
|
78
|
+
UPDATE public.connections
|
|
79
|
+
SET config = (
|
|
80
|
+
COALESCE(config, '{}'::jsonb) - 'action_modes'
|
|
81
|
+
)
|
|
82
|
+
|| jsonb_build_object(
|
|
83
|
+
'auto_approve_actions',
|
|
84
|
+
COALESCE(
|
|
85
|
+
(
|
|
86
|
+
SELECT jsonb_agg(key)
|
|
87
|
+
FROM jsonb_each_text(config->'action_modes')
|
|
88
|
+
WHERE value = 'auto'
|
|
89
|
+
),
|
|
90
|
+
'[]'::jsonb
|
|
91
|
+
),
|
|
92
|
+
'require_approval_actions',
|
|
93
|
+
COALESCE(
|
|
94
|
+
(
|
|
95
|
+
SELECT jsonb_agg(key)
|
|
96
|
+
FROM jsonb_each_text(config->'action_modes')
|
|
97
|
+
WHERE value = 'approval'
|
|
98
|
+
),
|
|
99
|
+
'[]'::jsonb
|
|
100
|
+
)
|
|
101
|
+
)
|
|
102
|
+
WHERE config IS NOT NULL
|
|
103
|
+
AND jsonb_typeof(config->'action_modes') = 'object';
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
-- migrate:up
|
|
2
|
+
-- Relax the device-binding XOR for browser_session profiles to allow
|
|
3
|
+
-- mirror mode, where neither user_data_dir nor cdp_url is set on the
|
|
4
|
+
-- row (the source profile dir lives in auth_data.source_profile_dir).
|
|
5
|
+
-- Keep the mutual exclusion of the two columns so they can't be set
|
|
6
|
+
-- together; application validation enforces "exactly one of mirror /
|
|
7
|
+
-- cdp / legacy" per row.
|
|
8
|
+
|
|
9
|
+
ALTER TABLE auth_profiles
|
|
10
|
+
DROP CONSTRAINT IF EXISTS auth_profiles_device_browser_path_xor;
|
|
11
|
+
|
|
12
|
+
ALTER TABLE auth_profiles
|
|
13
|
+
ADD CONSTRAINT auth_profiles_device_browser_path_mutex
|
|
14
|
+
CHECK (
|
|
15
|
+
device_worker_id IS NULL
|
|
16
|
+
OR profile_kind <> 'browser_session'
|
|
17
|
+
OR user_data_dir IS NULL
|
|
18
|
+
OR cdp_url IS NULL
|
|
19
|
+
);
|
|
20
|
+
|
|
21
|
+
-- migrate:down
|
|
22
|
+
ALTER TABLE auth_profiles
|
|
23
|
+
DROP CONSTRAINT IF EXISTS auth_profiles_device_browser_path_mutex;
|
|
24
|
+
|
|
25
|
+
ALTER TABLE auth_profiles
|
|
26
|
+
ADD CONSTRAINT auth_profiles_device_browser_path_xor
|
|
27
|
+
CHECK (
|
|
28
|
+
device_worker_id IS NULL
|
|
29
|
+
OR profile_kind <> 'browser_session'
|
|
30
|
+
OR ((user_data_dir IS NOT NULL) AND (cdp_url IS NULL))
|
|
31
|
+
OR ((user_data_dir IS NULL) AND (cdp_url IS NOT NULL))
|
|
32
|
+
);
|