@lobu/cli 6.1.1 → 7.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/_lib/apply/apply-cmd.d.ts +36 -0
- package/dist/commands/_lib/apply/apply-cmd.d.ts.map +1 -1
- package/dist/commands/_lib/apply/apply-cmd.js +696 -40
- package/dist/commands/_lib/apply/apply-cmd.js.map +1 -1
- package/dist/commands/_lib/apply/client.d.ts +285 -0
- package/dist/commands/_lib/apply/client.d.ts.map +1 -1
- package/dist/commands/_lib/apply/client.js +469 -28
- package/dist/commands/_lib/apply/client.js.map +1 -1
- package/dist/commands/_lib/apply/desired-state.d.ts +187 -3
- package/dist/commands/_lib/apply/desired-state.d.ts.map +1 -1
- package/dist/commands/_lib/apply/desired-state.js +879 -88
- package/dist/commands/_lib/apply/desired-state.js.map +1 -1
- package/dist/commands/_lib/apply/diff.d.ts +72 -3
- package/dist/commands/_lib/apply/diff.d.ts.map +1 -1
- package/dist/commands/_lib/apply/diff.js +473 -84
- package/dist/commands/_lib/apply/diff.js.map +1 -1
- package/dist/commands/_lib/apply/prompt.d.ts +6 -0
- package/dist/commands/_lib/apply/prompt.d.ts.map +1 -1
- package/dist/commands/_lib/apply/prompt.js +16 -0
- package/dist/commands/_lib/apply/prompt.js.map +1 -1
- package/dist/commands/_lib/apply/render.d.ts +9 -0
- package/dist/commands/_lib/apply/render.d.ts.map +1 -1
- package/dist/commands/_lib/apply/render.js +80 -3
- package/dist/commands/_lib/apply/render.js.map +1 -1
- package/dist/commands/_lib/connector-loader.d.ts +3 -0
- package/dist/commands/_lib/connector-loader.d.ts.map +1 -0
- package/dist/commands/_lib/connector-loader.js +129 -0
- package/dist/commands/_lib/connector-loader.js.map +1 -0
- package/dist/commands/_lib/connector-run-cmd.d.ts +35 -0
- package/dist/commands/_lib/connector-run-cmd.d.ts.map +1 -0
- package/dist/commands/_lib/connector-run-cmd.js +351 -0
- package/dist/commands/_lib/connector-run-cmd.js.map +1 -0
- package/dist/commands/_lib/export/export-cmd.d.ts +35 -0
- package/dist/commands/_lib/export/export-cmd.d.ts.map +1 -0
- package/dist/commands/_lib/export/export-cmd.js +329 -0
- package/dist/commands/_lib/export/export-cmd.js.map +1 -0
- package/dist/commands/agent.d.ts.map +1 -1
- package/dist/commands/agent.js +11 -14
- package/dist/commands/agent.js.map +1 -1
- package/dist/commands/chat.d.ts.map +1 -1
- package/dist/commands/chat.js +28 -7
- package/dist/commands/chat.js.map +1 -1
- package/dist/commands/connector.d.ts +3 -0
- package/dist/commands/connector.d.ts.map +1 -0
- package/dist/commands/connector.js +5 -0
- package/dist/commands/connector.js.map +1 -0
- package/dist/commands/dev.d.ts +23 -0
- package/dist/commands/dev.d.ts.map +1 -1
- package/dist/commands/dev.js +273 -8
- package/dist/commands/dev.js.map +1 -1
- package/dist/commands/doctor.d.ts.map +1 -1
- package/dist/commands/doctor.js +2 -3
- package/dist/commands/doctor.js.map +1 -1
- package/dist/commands/eval.d.ts.map +1 -1
- package/dist/commands/eval.js +28 -18
- package/dist/commands/eval.js.map +1 -1
- package/dist/commands/init.d.ts +2 -0
- package/dist/commands/init.d.ts.map +1 -1
- package/dist/commands/init.js +29 -1
- package/dist/commands/init.js.map +1 -1
- package/dist/commands/login.d.ts.map +1 -1
- package/dist/commands/login.js +22 -16
- package/dist/commands/login.js.map +1 -1
- package/dist/commands/memory/_lib/browser-auth-cmd.d.ts.map +1 -1
- package/dist/commands/memory/_lib/browser-auth-cmd.js +15 -144
- package/dist/commands/memory/_lib/browser-auth-cmd.js.map +1 -1
- package/dist/commands/memory/_lib/schema.d.ts +28 -1
- package/dist/commands/memory/_lib/schema.d.ts.map +1 -1
- package/dist/commands/memory/_lib/schema.js +120 -4
- package/dist/commands/memory/_lib/schema.js.map +1 -1
- package/dist/commands/memory/_lib/seed-cmd.d.ts.map +1 -1
- package/dist/commands/memory/_lib/seed-cmd.js +41 -18
- package/dist/commands/memory/_lib/seed-cmd.js.map +1 -1
- package/dist/commands/org.d.ts +4 -0
- package/dist/commands/org.d.ts.map +1 -1
- package/dist/commands/org.js +10 -0
- package/dist/commands/org.js.map +1 -1
- package/dist/commands/token.d.ts +9 -0
- package/dist/commands/token.d.ts.map +1 -1
- package/dist/commands/token.js +54 -3
- package/dist/commands/token.js.map +1 -1
- package/dist/commands/validate.d.ts.map +1 -1
- package/dist/commands/validate.js +4 -13
- package/dist/commands/validate.js.map +1 -1
- package/dist/config/loader.js +2 -2
- package/dist/config/loader.js.map +1 -1
- package/dist/connectors/README.md +2 -3
- package/dist/connectors/apple_health.ts +138 -0
- package/dist/connectors/apple_photos.ts +178 -0
- package/dist/connectors/apple_screen_time.ts +82 -0
- package/dist/connectors/browser/evaluate.ts +120 -0
- package/dist/connectors/browser/fill_form.ts +107 -0
- package/dist/connectors/browser/page_text.ts +108 -0
- package/dist/connectors/browser-scraper-utils.ts +111 -3
- package/dist/connectors/capterra.ts +5 -1
- package/dist/connectors/chrome_tabs.ts +74 -0
- package/dist/connectors/g2.ts +5 -1
- package/dist/connectors/github.ts +16 -38
- package/dist/connectors/glassdoor.ts +5 -1
- package/dist/connectors/google_calendar.ts +28 -6
- package/dist/connectors/google_gmail.ts +6 -3
- package/dist/connectors/google_play.ts +32 -5
- package/dist/connectors/hackernews.ts +37 -2
- package/dist/connectors/index.ts +14 -1
- package/dist/connectors/linkedin.ts +32 -9
- package/dist/connectors/local_directory.ts +91 -0
- package/dist/connectors/reddit.ts +1 -0
- package/dist/connectors/revolut.ts +569 -0
- package/dist/connectors/rss.ts +33 -8
- package/dist/connectors/trustpilot.ts +36 -21
- package/dist/connectors/website.ts +8 -69
- package/dist/connectors/whatsapp.ts +21 -22
- package/dist/connectors/whatsapp_local.ts +125 -0
- package/dist/connectors/x.ts +17 -7
- package/dist/db/migrations/20260510220000_connector_required_capability.sql +47 -0
- package/dist/db/migrations/20260512000000_device_worker_connection_binding.sql +113 -0
- package/dist/db/migrations/20260512131703_connections_slug.sql +131 -0
- package/dist/db/migrations/20260513000000_chat_user_identities.sql +24 -0
- package/dist/db/migrations/20260513120000_auth_profiles_device_binding.sql +50 -0
- package/dist/db/migrations/20260513150000_auth_profiles_cdp_url.sql +43 -0
- package/dist/db/migrations/20260513200000_notifications_as_events.sql +86 -0
- package/dist/db/migrations/20260514000000_scheduled_jobs.sql +97 -0
- package/dist/db/migrations/20260514120000_auth_profiles_connector_key_nullable.sql +42 -0
- package/dist/db/migrations/20260514130000_connection_action_modes.sql +103 -0
- package/dist/db/migrations/20260514160000_auth_profiles_mirror_mode.sql +32 -0
- package/dist/db/migrations/20260515120000_agents_per_org_pk.sql +66 -0
- package/dist/db/migrations/20260515150000_geo_enrichment.sql +208 -0
- package/dist/db/migrations/20260515160000_drop_agents_org_id_unique.sql +24 -0
- package/dist/db/migrations/20260515170000_auth_profiles_default_for_connector.sql +23 -0
- package/dist/db/migrations/20260516120000_agents_per_org_pk_swap.sql +125 -0
- package/dist/db/migrations/20260516200000_events_search_tsv.sql +134 -0
- package/dist/db/migrations/20260516200100_events_lifecycle_changes_index.sql +25 -0
- package/dist/db/migrations/20260517010000_drop_unused_indexes.sql +49 -0
- package/dist/db/migrations/20260517020000_softdelete_orphan_feeds.sql +56 -0
- package/dist/db/migrations/20260517030000_pat_worker_id_binding.sql +27 -0
- package/dist/db/migrations/20260517040000_archive_orphan_watchers.sql +30 -0
- package/dist/db/migrations/20260517050000_watcher_agent_id_not_null.sql +34 -0
- package/dist/db/migrations/20260517060000_watcher_schema_additions.sql +78 -0
- package/dist/db/migrations/20260517150000_goals_primitive.sql +55 -0
- package/dist/db/migrations/20260517160000_drop_goals_primitive.sql +45 -0
- package/dist/db/migrations/20260518000000_pending_interactions.sql +49 -0
- package/dist/db/migrations/20260518010000_runs_heartbeat_reaper_index.sql +22 -0
- package/dist/eval/client.d.ts.map +1 -1
- package/dist/eval/client.js +11 -0
- package/dist/eval/client.js.map +1 -1
- package/dist/eval/grader.js +2 -1
- package/dist/eval/grader.js.map +1 -1
- package/dist/eval/types.d.ts +2 -0
- package/dist/eval/types.d.ts.map +1 -1
- package/dist/index.d.ts +11 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +115 -114
- package/dist/index.js.map +1 -1
- package/dist/internal/context.d.ts +9 -0
- package/dist/internal/context.d.ts.map +1 -1
- package/dist/internal/context.js +41 -6
- package/dist/internal/context.js.map +1 -1
- package/dist/internal/credentials.d.ts +5 -0
- package/dist/internal/credentials.d.ts.map +1 -1
- package/dist/internal/credentials.js +75 -1
- package/dist/internal/credentials.js.map +1 -1
- package/dist/internal/gateway-url.d.ts +14 -0
- package/dist/internal/gateway-url.d.ts.map +1 -1
- package/dist/internal/gateway-url.js +19 -0
- package/dist/internal/gateway-url.js.map +1 -1
- package/dist/internal/index.d.ts +1 -1
- package/dist/internal/index.d.ts.map +1 -1
- package/dist/internal/index.js +1 -1
- package/dist/internal/index.js.map +1 -1
- package/dist/internal/local-env.d.ts.map +1 -1
- package/dist/internal/local-env.js +9 -2
- package/dist/internal/local-env.js.map +1 -1
- package/dist/server.bundle.mjs +42251 -36931
- package/dist/start-local.bundle.mjs +16437 -9882
- package/dist/templates/TESTING.md.tmpl +9 -9
- package/package.json +8 -6
- package/dist/connectors/google_photos.ts +0 -776
|
@@ -15,6 +15,8 @@ import {
|
|
|
15
15
|
type SyncResult,
|
|
16
16
|
} from '@lobu/connector-sdk';
|
|
17
17
|
import {
|
|
18
|
+
getBrowserCdpUrl,
|
|
19
|
+
getBrowserUserDataDir,
|
|
18
20
|
handleCookieConsent,
|
|
19
21
|
openStealthBrowser,
|
|
20
22
|
validateUrlDomain,
|
|
@@ -96,10 +98,16 @@ export default class TrustpilotConnector extends ConnectorRuntime {
|
|
|
96
98
|
throw new Error('Either business_url or business_name is required');
|
|
97
99
|
}
|
|
98
100
|
|
|
99
|
-
|
|
101
|
+
// encodeURIComponent the user-supplied businessName so a value like
|
|
102
|
+
// "../search?foo=bar" can't escape the /review/ path on trustpilot.com.
|
|
103
|
+
const baseUrl =
|
|
104
|
+
businessUrl ||
|
|
105
|
+
`https://www.trustpilot.com/review/${encodeURIComponent(businessName ?? '')}`;
|
|
100
106
|
validateUrlDomain(baseUrl, 'trustpilot.com');
|
|
101
107
|
|
|
102
|
-
const
|
|
108
|
+
const userDataDir = getBrowserUserDataDir(ctx.sessionState);
|
|
109
|
+
const cdpUrl = getBrowserCdpUrl(ctx.sessionState) ?? 'auto';
|
|
110
|
+
const session = await openStealthBrowser({ cdpUrl, userDataDir });
|
|
103
111
|
|
|
104
112
|
return withBrowserErrorCapture(session, 'trustpilot-sync', async (page) => {
|
|
105
113
|
await page.goto(baseUrl, {
|
|
@@ -157,27 +165,34 @@ export default class TrustpilotConnector extends ConnectorRuntime {
|
|
|
157
165
|
// Filter reviews with meaningful content (more than 10 chars)
|
|
158
166
|
const reviews: TrustpilotReview[] = rawReviews.filter((r) => r.text && r.text.length > 10);
|
|
159
167
|
|
|
160
|
-
// Transform to EventEnvelope format
|
|
161
|
-
|
|
168
|
+
// Transform to EventEnvelope format. Drop rows whose `date` attribute
|
|
169
|
+
// was missing/invalid in the DOM — `new Date("")` yields an Invalid
|
|
170
|
+
// Date, which downstream sorting/checkpointing then can't compare, and
|
|
171
|
+
// an empty `date` made `origin_id` collide on `-<author>` across rows.
|
|
172
|
+
const events: EventEnvelope[] = reviews.flatMap((review) => {
|
|
162
173
|
const content = review.title ? `${review.title}\n\n${review.text}` : review.text;
|
|
163
|
-
|
|
164
|
-
return
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
174
|
+
const parsedDate = review.date ? new Date(review.date) : null;
|
|
175
|
+
if (!parsedDate || Number.isNaN(parsedDate.getTime())) return [];
|
|
176
|
+
|
|
177
|
+
return [
|
|
178
|
+
{
|
|
179
|
+
origin_id: `${review.date}-${review.author}`,
|
|
180
|
+
payload_text: content,
|
|
181
|
+
author_name: review.author,
|
|
182
|
+
occurred_at: parsedDate,
|
|
183
|
+
origin_type: 'review',
|
|
184
|
+
score: calculateEngagementScore('trustpilot', {
|
|
185
|
+
rating: review.rating,
|
|
186
|
+
helpful_count: 0,
|
|
187
|
+
}),
|
|
188
|
+
source_url: baseUrl,
|
|
189
|
+
metadata: {
|
|
190
|
+
rating: review.rating,
|
|
191
|
+
helpful_count: 0,
|
|
192
|
+
title: review.title,
|
|
193
|
+
},
|
|
179
194
|
},
|
|
180
|
-
|
|
195
|
+
];
|
|
181
196
|
});
|
|
182
197
|
|
|
183
198
|
return {
|
|
@@ -20,6 +20,7 @@ import {
|
|
|
20
20
|
type SyncResult,
|
|
21
21
|
} from '@lobu/connector-sdk';
|
|
22
22
|
import type { Page } from 'playwright';
|
|
23
|
+
import { validatePublicUrl } from './browser-scraper-utils.ts';
|
|
23
24
|
|
|
24
25
|
interface PageSection {
|
|
25
26
|
heading: string;
|
|
@@ -50,72 +51,6 @@ function shouldSkipCookieBannerText(text: string): boolean {
|
|
|
50
51
|
return countPatternMatches(normalized, COOKIE_BANNER_PATTERNS) >= 3;
|
|
51
52
|
}
|
|
52
53
|
|
|
53
|
-
/**
|
|
54
|
-
* Validates a URL is safe for server-side fetching.
|
|
55
|
-
* Blocks private/internal network addresses to prevent SSRF attacks.
|
|
56
|
-
*/
|
|
57
|
-
function validatePublicUrl(url: string): void {
|
|
58
|
-
let parsed: URL;
|
|
59
|
-
try {
|
|
60
|
-
parsed = new URL(url);
|
|
61
|
-
} catch {
|
|
62
|
-
throw new Error(`Invalid URL: ${url}`);
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
if (parsed.protocol !== 'https:' && parsed.protocol !== 'http:') {
|
|
66
|
-
throw new Error(`URL must use http: or https: protocol, got ${parsed.protocol}`);
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
const hostname = parsed.hostname.toLowerCase();
|
|
70
|
-
|
|
71
|
-
// Block localhost variants
|
|
72
|
-
if (hostname === 'localhost' || hostname === '[::1]' || hostname.endsWith('.localhost')) {
|
|
73
|
-
throw new Error(`URL must not point to localhost: ${hostname}`);
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
// Block private/internal IP ranges
|
|
77
|
-
// IPv4 patterns: 127.x.x.x, 10.x.x.x, 192.168.x.x, 172.16-31.x.x, 169.254.x.x, 0.x.x.x
|
|
78
|
-
const ipv4Match = hostname.match(/^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/);
|
|
79
|
-
if (ipv4Match) {
|
|
80
|
-
const [, a, b] = ipv4Match.map(Number);
|
|
81
|
-
if (
|
|
82
|
-
a === 127 || // 127.0.0.0/8 loopback
|
|
83
|
-
a === 10 || // 10.0.0.0/8 private
|
|
84
|
-
(a === 172 && b >= 16 && b <= 31) || // 172.16.0.0/12 private
|
|
85
|
-
(a === 192 && b === 168) || // 192.168.0.0/16 private
|
|
86
|
-
(a === 169 && b === 254) || // 169.254.0.0/16 link-local
|
|
87
|
-
a === 0 // 0.0.0.0/8
|
|
88
|
-
) {
|
|
89
|
-
throw new Error(`URL must not point to a private/internal IP address: ${hostname}`);
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
// Block IPv6 private ranges (bracketed notation in URLs)
|
|
94
|
-
if (hostname.startsWith('[')) {
|
|
95
|
-
const ipv6 = hostname.slice(1, -1).toLowerCase();
|
|
96
|
-
if (
|
|
97
|
-
ipv6 === '::1' ||
|
|
98
|
-
ipv6.startsWith('fe80:') || // link-local
|
|
99
|
-
ipv6.startsWith('fc') || // unique local (fc00::/7)
|
|
100
|
-
ipv6.startsWith('fd') || // unique local (fc00::/7)
|
|
101
|
-
ipv6 === '::' || // unspecified
|
|
102
|
-
ipv6.startsWith('::ffff:') // IPv4-mapped IPv6
|
|
103
|
-
) {
|
|
104
|
-
throw new Error(`URL must not point to a private/internal IPv6 address: ${hostname}`);
|
|
105
|
-
}
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
// Block common internal hostnames
|
|
109
|
-
if (
|
|
110
|
-
hostname.endsWith('.internal') ||
|
|
111
|
-
hostname.endsWith('.local') ||
|
|
112
|
-
hostname.endsWith('.corp') ||
|
|
113
|
-
hostname.endsWith('.lan')
|
|
114
|
-
) {
|
|
115
|
-
throw new Error(`URL must not point to an internal hostname: ${hostname}`);
|
|
116
|
-
}
|
|
117
|
-
}
|
|
118
|
-
|
|
119
54
|
export default class WebsiteConnector extends ConnectorRuntime {
|
|
120
55
|
readonly definition: ConnectorDefinition = {
|
|
121
56
|
key: 'website',
|
|
@@ -238,7 +173,7 @@ export default class WebsiteConnector extends ConnectorRuntime {
|
|
|
238
173
|
urls = urls.slice(0, maxPages);
|
|
239
174
|
|
|
240
175
|
// Launch browser
|
|
241
|
-
const { browser } = await launchBrowser({
|
|
176
|
+
const { browser } = await launchBrowser({ stealth: false });
|
|
242
177
|
const events: EventEnvelope[] = [];
|
|
243
178
|
const newHashes: Record<string, string> = {};
|
|
244
179
|
|
|
@@ -457,7 +392,11 @@ export default class WebsiteConnector extends ConnectorRuntime {
|
|
|
457
392
|
return result.join('\n');
|
|
458
393
|
}
|
|
459
394
|
|
|
460
|
-
private async fetchSitemap(sitemapUrl: string): Promise<string[]> {
|
|
395
|
+
private async fetchSitemap(sitemapUrl: string, depth = 0): Promise<string[]> {
|
|
396
|
+
// Sitemap-index recursion bound — caps fan-out from a remote sitemap that
|
|
397
|
+
// links to a sitemap that links to a sitemap... untrusted XML must not
|
|
398
|
+
// drive unbounded outbound traffic.
|
|
399
|
+
if (depth > 2) return [];
|
|
461
400
|
const response = await fetch(sitemapUrl, {
|
|
462
401
|
headers: { 'User-Agent': 'Mozilla/5.0 (compatible; LobuBot/1.0)' },
|
|
463
402
|
});
|
|
@@ -493,7 +432,7 @@ export default class WebsiteConnector extends ConnectorRuntime {
|
|
|
493
432
|
}
|
|
494
433
|
for (const childUrl of childSitemaps.slice(0, 5)) {
|
|
495
434
|
validatePublicUrl(childUrl);
|
|
496
|
-
const childUrls = await this.fetchSitemap(childUrl);
|
|
435
|
+
const childUrls = await this.fetchSitemap(childUrl, depth + 1);
|
|
497
436
|
urls.push(...childUrls);
|
|
498
437
|
}
|
|
499
438
|
}
|
|
@@ -164,6 +164,7 @@ export default class WhatsAppConnector extends ConnectorRuntime {
|
|
|
164
164
|
metadataSchema: {
|
|
165
165
|
type: 'object',
|
|
166
166
|
properties: {
|
|
167
|
+
source: { type: 'string', const: 'whatsapp' },
|
|
167
168
|
chat_jid: { type: 'string' },
|
|
168
169
|
is_group: { type: 'boolean' },
|
|
169
170
|
from_me: { type: 'boolean' },
|
|
@@ -178,7 +179,7 @@ export default class WhatsAppConnector extends ConnectorRuntime {
|
|
|
178
179
|
},
|
|
179
180
|
entityLinks: [
|
|
180
181
|
{
|
|
181
|
-
entityType: '
|
|
182
|
+
entityType: 'person',
|
|
182
183
|
autoCreate: true,
|
|
183
184
|
titlePath: 'metadata.push_name',
|
|
184
185
|
identities: [
|
|
@@ -423,11 +424,7 @@ export default class WhatsAppConnector extends ConnectorRuntime {
|
|
|
423
424
|
},
|
|
424
425
|
};
|
|
425
426
|
} catch (error) {
|
|
426
|
-
|
|
427
|
-
sock.end(undefined);
|
|
428
|
-
} catch {
|
|
429
|
-
/* ignore */
|
|
430
|
-
}
|
|
427
|
+
safeEnd(sock);
|
|
431
428
|
throw error;
|
|
432
429
|
}
|
|
433
430
|
}
|
|
@@ -477,11 +474,7 @@ async function attemptPairing(
|
|
|
477
474
|
sock.ev.off('connection.update', handler);
|
|
478
475
|
sock.ev.off('creds.update', credsListener);
|
|
479
476
|
ctx.signal.removeEventListener('abort', onAbort);
|
|
480
|
-
|
|
481
|
-
sock.end(undefined);
|
|
482
|
-
} catch {
|
|
483
|
-
/* ignore */
|
|
484
|
-
}
|
|
477
|
+
safeEnd(sock);
|
|
485
478
|
resolve(outcome);
|
|
486
479
|
};
|
|
487
480
|
|
|
@@ -591,11 +584,7 @@ async function drainHistory(
|
|
|
591
584
|
sock.ev.off('chats.upsert', chatsListener);
|
|
592
585
|
sock.ev.off('messaging-history.set', historyListener);
|
|
593
586
|
sock.ev.off('messages.upsert', messagesListener);
|
|
594
|
-
|
|
595
|
-
sock.end(undefined);
|
|
596
|
-
} catch {
|
|
597
|
-
/* ignore */
|
|
598
|
-
}
|
|
587
|
+
safeEnd(sock);
|
|
599
588
|
};
|
|
600
589
|
|
|
601
590
|
try {
|
|
@@ -828,6 +817,14 @@ function delay(ms: number): Promise<void> {
|
|
|
828
817
|
return new Promise((r) => setTimeout(r, ms));
|
|
829
818
|
}
|
|
830
819
|
|
|
820
|
+
function safeEnd(sock: ReturnType<typeof makeWASocket>): void {
|
|
821
|
+
try {
|
|
822
|
+
sock.end(undefined);
|
|
823
|
+
} catch {
|
|
824
|
+
/* ignore */
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
|
|
831
828
|
function waitForOpen(sock: ReturnType<typeof makeWASocket>, timeoutMs: number): Promise<boolean> {
|
|
832
829
|
return new Promise((resolve) => {
|
|
833
830
|
let newLogin = false;
|
|
@@ -962,12 +959,7 @@ export function toEvent(
|
|
|
962
959
|
const text = extractText(m.message);
|
|
963
960
|
if (!text) return null;
|
|
964
961
|
|
|
965
|
-
const tsRaw =
|
|
966
|
-
typeof m.messageTimestamp === 'number'
|
|
967
|
-
? m.messageTimestamp
|
|
968
|
-
: ((m.messageTimestamp as { low?: number; toNumber?: () => number } | null)?.toNumber?.() ??
|
|
969
|
-
(m.messageTimestamp as { low?: number } | null)?.low ??
|
|
970
|
-
0);
|
|
962
|
+
const tsRaw = extractTs(m);
|
|
971
963
|
if (!tsRaw) return null;
|
|
972
964
|
const occurredAt = new Date(tsRaw * 1000);
|
|
973
965
|
|
|
@@ -1001,6 +993,13 @@ export function toEvent(
|
|
|
1001
993
|
occurred_at: occurredAt,
|
|
1002
994
|
origin_parent_id: chatJid,
|
|
1003
995
|
metadata: {
|
|
996
|
+
// Mirror the bridge's `source` field so consumers can tell which
|
|
997
|
+
// transport delivered an event when the same message arrives via both
|
|
998
|
+
// (QR-paired socket and the local Mac archive). Origin id alignment
|
|
999
|
+
// (both connectors emit the bare WhatsApp stanza id) makes the gateway
|
|
1000
|
+
// dedupe on insert; `source` records which side produced the row that
|
|
1001
|
+
// survived.
|
|
1002
|
+
source: 'whatsapp',
|
|
1004
1003
|
chat_jid: chatJid,
|
|
1005
1004
|
is_group: isGroup,
|
|
1006
1005
|
from_me: fromMe,
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WhatsApp (local) Connector — Lobu for Mac only.
|
|
3
|
+
*
|
|
4
|
+
* Reads messages directly from the WhatsApp Desktop app's local SQLite store
|
|
5
|
+
* at `~/Library/Group Containers/group.net.whatsapp.WhatsApp.shared/
|
|
6
|
+
* ChatStorage.sqlite`. Lobu for Mac snapshots the DB read-only, walks new
|
|
7
|
+
* rows since the last `Z_PK` checkpoint, and emits events that share the
|
|
8
|
+
* `whatsapp` connector's metadata shape so downstream entity links work
|
|
9
|
+
* identically.
|
|
10
|
+
*
|
|
11
|
+
* Differences from the QR-paired `whatsapp` connector:
|
|
12
|
+
* - No Baileys, no socket, no phone-offline auto-unlink (WA Desktop itself
|
|
13
|
+
* is the linked device).
|
|
14
|
+
* - Ciphertext never leaves the Mac.
|
|
15
|
+
* - Bound to one specific Mac; requires WhatsApp Desktop installed.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import {
|
|
19
|
+
type ActionResult,
|
|
20
|
+
type ConnectorDefinition,
|
|
21
|
+
ConnectorRuntime,
|
|
22
|
+
IDENTITY,
|
|
23
|
+
type SyncContext,
|
|
24
|
+
type SyncResult,
|
|
25
|
+
} from '@lobu/connector-sdk';
|
|
26
|
+
|
|
27
|
+
const BRIDGE_ONLY =
|
|
28
|
+
'WhatsApp (local) runs only on a worker advertising capability "whatsapp_local" (Lobu for Mac with WhatsApp Desktop installed).';
|
|
29
|
+
|
|
30
|
+
export default class WhatsAppLocalConnector extends ConnectorRuntime {
|
|
31
|
+
readonly definition: ConnectorDefinition = {
|
|
32
|
+
key: 'whatsapp.local',
|
|
33
|
+
name: 'WhatsApp (this Mac)',
|
|
34
|
+
description:
|
|
35
|
+
"Reads messages from the WhatsApp Desktop app's local archive on this Mac. No QR pairing, no phone-offline auto-unlink — the desktop app is itself the linked device.",
|
|
36
|
+
version: '0.1.0',
|
|
37
|
+
faviconDomain: 'whatsapp.com',
|
|
38
|
+
requiredCapability: 'whatsapp_local',
|
|
39
|
+
runtime: { platforms: ['macos'] },
|
|
40
|
+
authSchema: { methods: [{ type: 'none' }] },
|
|
41
|
+
feeds: {
|
|
42
|
+
messages: {
|
|
43
|
+
key: 'messages',
|
|
44
|
+
name: 'Messages',
|
|
45
|
+
description:
|
|
46
|
+
'Personal WhatsApp messages from 1:1 and group chats, sourced from WhatsApp Desktop.',
|
|
47
|
+
configSchema: {
|
|
48
|
+
type: 'object',
|
|
49
|
+
properties: {
|
|
50
|
+
chat_filter: {
|
|
51
|
+
type: 'string',
|
|
52
|
+
enum: ['all', 'individual', 'group'],
|
|
53
|
+
default: 'all',
|
|
54
|
+
description: 'Which chats to include.',
|
|
55
|
+
},
|
|
56
|
+
max_messages_per_sync: {
|
|
57
|
+
type: 'integer',
|
|
58
|
+
minimum: 1,
|
|
59
|
+
maximum: 500000,
|
|
60
|
+
default: 5000,
|
|
61
|
+
description:
|
|
62
|
+
'Safety cap on messages collected per sync. The first sync drains all messages up to this cap; subsequent syncs ingest only new messages, so the cap rarely binds.',
|
|
63
|
+
},
|
|
64
|
+
},
|
|
65
|
+
},
|
|
66
|
+
eventKinds: {
|
|
67
|
+
message: {
|
|
68
|
+
description: 'A WhatsApp message (text, caption, or system).',
|
|
69
|
+
metadataSchema: {
|
|
70
|
+
type: 'object',
|
|
71
|
+
properties: {
|
|
72
|
+
source: { type: 'string', const: 'whatsapp_local' },
|
|
73
|
+
chat_jid: { type: 'string' },
|
|
74
|
+
is_group: { type: 'boolean' },
|
|
75
|
+
from_me: { type: 'boolean' },
|
|
76
|
+
participant: { type: 'string' },
|
|
77
|
+
sender_jid: { type: 'string' },
|
|
78
|
+
sender_phone: { type: 'string' },
|
|
79
|
+
push_name: { type: 'string' },
|
|
80
|
+
media_type: { type: 'string' },
|
|
81
|
+
quoted_id: { type: 'string' },
|
|
82
|
+
is_forwarded: { type: 'boolean' },
|
|
83
|
+
is_starred: { type: 'boolean' },
|
|
84
|
+
is_system_event: { type: 'boolean' },
|
|
85
|
+
voice_note_skipped: {
|
|
86
|
+
type: 'string',
|
|
87
|
+
enum: ['not_downloaded', 'too_large', 'empty', 'read_error', 'invalid_path'],
|
|
88
|
+
},
|
|
89
|
+
},
|
|
90
|
+
},
|
|
91
|
+
entityLinks: [
|
|
92
|
+
{
|
|
93
|
+
entityType: 'person',
|
|
94
|
+
autoCreate: true,
|
|
95
|
+
titlePath: 'metadata.push_name',
|
|
96
|
+
identities: [
|
|
97
|
+
{ namespace: IDENTITY.WA_JID, eventPath: 'metadata.sender_jid' },
|
|
98
|
+
{ namespace: IDENTITY.PHONE, eventPath: 'metadata.sender_phone' },
|
|
99
|
+
],
|
|
100
|
+
traits: {
|
|
101
|
+
push_name: {
|
|
102
|
+
eventPath: 'metadata.push_name',
|
|
103
|
+
behavior: 'prefer_non_empty',
|
|
104
|
+
},
|
|
105
|
+
last_seen_at: {
|
|
106
|
+
eventPath: 'occurred_at',
|
|
107
|
+
behavior: 'overwrite',
|
|
108
|
+
},
|
|
109
|
+
},
|
|
110
|
+
},
|
|
111
|
+
],
|
|
112
|
+
},
|
|
113
|
+
},
|
|
114
|
+
},
|
|
115
|
+
},
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
async sync(_ctx: SyncContext): Promise<SyncResult> {
|
|
119
|
+
throw new Error(BRIDGE_ONLY);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
async execute(): Promise<ActionResult> {
|
|
123
|
+
throw new Error(BRIDGE_ONLY);
|
|
124
|
+
}
|
|
125
|
+
}
|
package/dist/connectors/x.ts
CHANGED
|
@@ -17,7 +17,12 @@ import {
|
|
|
17
17
|
type SyncContext,
|
|
18
18
|
type SyncResult,
|
|
19
19
|
} from '@lobu/connector-sdk';
|
|
20
|
-
import {
|
|
20
|
+
import {
|
|
21
|
+
getBrowserCdpUrl,
|
|
22
|
+
getBrowserCookies,
|
|
23
|
+
getBrowserUserDataDir,
|
|
24
|
+
validateCookieNotExpired,
|
|
25
|
+
} from './browser-scraper-utils';
|
|
21
26
|
|
|
22
27
|
interface XCheckpoint {
|
|
23
28
|
last_tweet_id?: string;
|
|
@@ -358,12 +363,16 @@ async function syncViaBrowser(
|
|
|
358
363
|
const searchFilter = (config.search_filter as string) ?? 'live';
|
|
359
364
|
const searchUrl = `https://x.com/search?q=${encodeURIComponent(searchQuery)}&src=typed_query&f=${searchFilter}`;
|
|
360
365
|
|
|
366
|
+
const userDataDir = getBrowserUserDataDir(ctx.sessionState);
|
|
367
|
+
const cdpUrl = getBrowserCdpUrl(ctx.sessionState) ?? 'auto';
|
|
361
368
|
let cookies: any[] = [];
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
369
|
+
if (!userDataDir) {
|
|
370
|
+
try {
|
|
371
|
+
cookies = getBrowserCookies(ctx.checkpoint as any, ctx.sessionState as any, 'x');
|
|
372
|
+
validateCookieNotExpired(cookies, 'auth_token', 'x');
|
|
373
|
+
} catch {
|
|
374
|
+
// No stored cookies — CDP will be the only path
|
|
375
|
+
}
|
|
367
376
|
}
|
|
368
377
|
|
|
369
378
|
const result = await browserNetworkSync<XTweet>({
|
|
@@ -376,8 +385,9 @@ async function syncViaBrowser(
|
|
|
376
385
|
navigationTimeoutMs: 15000,
|
|
377
386
|
},
|
|
378
387
|
url: searchUrl,
|
|
379
|
-
cdpUrl
|
|
388
|
+
cdpUrl,
|
|
380
389
|
cookies,
|
|
390
|
+
userDataDir,
|
|
381
391
|
parseResponse: parseBrowserSearchResponse,
|
|
382
392
|
checkAuth: async (page) => {
|
|
383
393
|
const url = page.url();
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
-- migrate:up
|
|
2
|
+
|
|
3
|
+
-- Add a per-connector capability gate for worker dispatch. Workers advertise
|
|
4
|
+
-- their capabilities on poll; the runs scheduler only assigns connector runs
|
|
5
|
+
-- to workers whose capabilities include the connector's required_capability.
|
|
6
|
+
-- NULL means "no special capability required" (the default for API/browser
|
|
7
|
+
-- connectors that the existing fleet can run).
|
|
8
|
+
--
|
|
9
|
+
-- `runtime` carries platform metadata for device-bound connectors (e.g.
|
|
10
|
+
-- `{"platforms": ["macos"]}` for apple.screen_time / local.directory, which
|
|
11
|
+
-- only run inside Lobu for Mac — that data is unreachable from a server-side
|
|
12
|
+
-- worker). NULL = cloud connector.
|
|
13
|
+
--
|
|
14
|
+
-- Initial use case: apple.screen_time and local.directory, served by Lobu for
|
|
15
|
+
-- Mac polling /api/workers/* as a user-scoped device worker.
|
|
16
|
+
|
|
17
|
+
ALTER TABLE public.connector_definitions
|
|
18
|
+
ADD COLUMN IF NOT EXISTS required_capability text,
|
|
19
|
+
ADD COLUMN IF NOT EXISTS runtime jsonb;
|
|
20
|
+
|
|
21
|
+
CREATE INDEX IF NOT EXISTS connector_definitions_required_capability_idx
|
|
22
|
+
ON public.connector_definitions (required_capability)
|
|
23
|
+
WHERE required_capability IS NOT NULL;
|
|
24
|
+
|
|
25
|
+
CREATE TABLE IF NOT EXISTS public.device_workers (
|
|
26
|
+
user_id text NOT NULL,
|
|
27
|
+
worker_id text NOT NULL,
|
|
28
|
+
platform text,
|
|
29
|
+
app_version text,
|
|
30
|
+
capabilities jsonb NOT NULL DEFAULT '[]'::jsonb,
|
|
31
|
+
label text,
|
|
32
|
+
first_seen_at timestamptz NOT NULL DEFAULT now(),
|
|
33
|
+
last_seen_at timestamptz NOT NULL DEFAULT now(),
|
|
34
|
+
PRIMARY KEY (user_id, worker_id)
|
|
35
|
+
);
|
|
36
|
+
|
|
37
|
+
CREATE INDEX IF NOT EXISTS device_workers_user_id_idx
|
|
38
|
+
ON public.device_workers (user_id);
|
|
39
|
+
|
|
40
|
+
-- migrate:down
|
|
41
|
+
|
|
42
|
+
DROP INDEX IF EXISTS public.device_workers_user_id_idx;
|
|
43
|
+
DROP TABLE IF EXISTS public.device_workers;
|
|
44
|
+
DROP INDEX IF EXISTS public.connector_definitions_required_capability_idx;
|
|
45
|
+
ALTER TABLE public.connector_definitions
|
|
46
|
+
DROP COLUMN IF EXISTS runtime,
|
|
47
|
+
DROP COLUMN IF EXISTS required_capability;
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
-- migrate:up
|
|
2
|
+
|
|
3
|
+
-- Make a connection's execution target explicit, and give every device worker
|
|
4
|
+
-- a home organization.
|
|
5
|
+
--
|
|
6
|
+
-- connections.device_worker_id (nullable) is the binding:
|
|
7
|
+
-- NULL -> runs on the cloud connector-worker pool (today's behavior)
|
|
8
|
+
-- set -> runs are pinned to that device worker
|
|
9
|
+
-- For device-type connectors the binding is mandatory; for cloud connectors
|
|
10
|
+
-- it's an optional override. A connection can only be pinned to a device that
|
|
11
|
+
-- is attached to that connection's organization.
|
|
12
|
+
--
|
|
13
|
+
-- device_workers.organization_id is the device's home org — chosen at setup,
|
|
14
|
+
-- defaulting to the owner's personal workspace. The device's connectors live
|
|
15
|
+
-- there; re-attaching the device to a different org (a member of which the
|
|
16
|
+
-- owner must be) is the only knob. There is no per-connection device→org grant.
|
|
17
|
+
|
|
18
|
+
-- Surrogate key for device_workers so connections / UI can reference a device
|
|
19
|
+
-- by a single stable id. The (user_id, worker_id) primary key stays.
|
|
20
|
+
ALTER TABLE public.device_workers
|
|
21
|
+
ADD COLUMN IF NOT EXISTS id uuid NOT NULL DEFAULT gen_random_uuid(),
|
|
22
|
+
ADD COLUMN IF NOT EXISTS organization_id text;
|
|
23
|
+
|
|
24
|
+
CREATE UNIQUE INDEX IF NOT EXISTS device_workers_id_key
|
|
25
|
+
ON public.device_workers (id);
|
|
26
|
+
|
|
27
|
+
CREATE INDEX IF NOT EXISTS idx_device_workers_organization_id
|
|
28
|
+
ON public.device_workers (organization_id)
|
|
29
|
+
WHERE organization_id IS NOT NULL;
|
|
30
|
+
|
|
31
|
+
ALTER TABLE public.connections
|
|
32
|
+
ADD COLUMN IF NOT EXISTS device_worker_id uuid;
|
|
33
|
+
|
|
34
|
+
DO $$
|
|
35
|
+
BEGIN
|
|
36
|
+
IF NOT EXISTS (
|
|
37
|
+
SELECT 1 FROM pg_constraint WHERE conname = 'connections_device_worker_id_fkey'
|
|
38
|
+
) THEN
|
|
39
|
+
ALTER TABLE public.connections
|
|
40
|
+
ADD CONSTRAINT connections_device_worker_id_fkey
|
|
41
|
+
FOREIGN KEY (device_worker_id)
|
|
42
|
+
REFERENCES public.device_workers (id)
|
|
43
|
+
ON DELETE SET NULL;
|
|
44
|
+
END IF;
|
|
45
|
+
END$$;
|
|
46
|
+
|
|
47
|
+
CREATE INDEX IF NOT EXISTS idx_connections_device_worker_id
|
|
48
|
+
ON public.connections (device_worker_id)
|
|
49
|
+
WHERE device_worker_id IS NOT NULL;
|
|
50
|
+
|
|
51
|
+
-- Attach existing devices to their owner's personal workspace (no-op on a
|
|
52
|
+
-- fresh database — there are no users yet; the device heartbeat sets this for
|
|
53
|
+
-- new devices either way).
|
|
54
|
+
UPDATE public.device_workers dw
|
|
55
|
+
SET organization_id = (
|
|
56
|
+
SELECT o.id FROM public.organization o
|
|
57
|
+
WHERE (o.metadata::jsonb)->>'personal_org_for_user_id' = dw.user_id
|
|
58
|
+
LIMIT 1
|
|
59
|
+
)
|
|
60
|
+
WHERE dw.organization_id IS NULL;
|
|
61
|
+
|
|
62
|
+
-- Backfill: existing auto-wired personal-org device connections (created_by
|
|
63
|
+
-- set, no auth profile) whose owner has exactly one device get pinned to that
|
|
64
|
+
-- device — but at most one per (org, connector_key, owner) so the unique index
|
|
65
|
+
-- created below can never be violated. Ambiguous ones stay NULL and the UI
|
|
66
|
+
-- prompts for a device.
|
|
67
|
+
UPDATE public.connections c
|
|
68
|
+
SET device_worker_id = dw.id
|
|
69
|
+
FROM (
|
|
70
|
+
-- Users with exactly one device worker (no min(uuid) needed — and Postgres
|
|
71
|
+
-- has no aggregate for uuid anyway).
|
|
72
|
+
SELECT dw1.user_id, dw1.id
|
|
73
|
+
FROM public.device_workers dw1
|
|
74
|
+
WHERE NOT EXISTS (
|
|
75
|
+
SELECT 1 FROM public.device_workers dw2
|
|
76
|
+
WHERE dw2.user_id = dw1.user_id AND dw2.id <> dw1.id
|
|
77
|
+
)
|
|
78
|
+
) dw
|
|
79
|
+
WHERE c.created_by = dw.user_id
|
|
80
|
+
AND c.device_worker_id IS NULL
|
|
81
|
+
AND c.deleted_at IS NULL
|
|
82
|
+
AND c.auth_profile_id IS NULL
|
|
83
|
+
AND c.connector_key IN (
|
|
84
|
+
SELECT key FROM public.connector_definitions WHERE required_capability IS NOT NULL
|
|
85
|
+
)
|
|
86
|
+
AND c.id = (
|
|
87
|
+
SELECT min(c2.id) FROM public.connections c2
|
|
88
|
+
WHERE c2.organization_id = c.organization_id
|
|
89
|
+
AND c2.connector_key = c.connector_key
|
|
90
|
+
AND c2.created_by = c.created_by
|
|
91
|
+
AND c2.deleted_at IS NULL
|
|
92
|
+
);
|
|
93
|
+
|
|
94
|
+
-- One active connection per (org, connector, device). A second device backing
|
|
95
|
+
-- the same connector is a second connection. Doubles as DB-level idempotency
|
|
96
|
+
-- for the create-vs-auto-wire race. Created AFTER the backfill above.
|
|
97
|
+
DROP INDEX IF EXISTS public.idx_connections_org_connector_device_live;
|
|
98
|
+
CREATE UNIQUE INDEX idx_connections_org_connector_device_live
|
|
99
|
+
ON public.connections (organization_id, connector_key, device_worker_id)
|
|
100
|
+
WHERE deleted_at IS NULL AND device_worker_id IS NOT NULL;
|
|
101
|
+
|
|
102
|
+
-- migrate:down
|
|
103
|
+
|
|
104
|
+
DROP INDEX IF EXISTS public.idx_connections_org_connector_device_live;
|
|
105
|
+
DROP INDEX IF EXISTS public.idx_connections_device_worker_id;
|
|
106
|
+
ALTER TABLE public.connections
|
|
107
|
+
DROP CONSTRAINT IF EXISTS connections_device_worker_id_fkey,
|
|
108
|
+
DROP COLUMN IF EXISTS device_worker_id;
|
|
109
|
+
DROP INDEX IF EXISTS public.device_workers_id_key;
|
|
110
|
+
DROP INDEX IF EXISTS public.idx_device_workers_organization_id;
|
|
111
|
+
ALTER TABLE public.device_workers
|
|
112
|
+
DROP COLUMN IF EXISTS id,
|
|
113
|
+
DROP COLUMN IF EXISTS organization_id;
|