@lobu/cli 6.0.0 → 6.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -27
- package/dist/bundled-skills/lobu/SKILL.md +12 -12
- package/dist/commands/_lib/apply/apply-cmd.d.ts +2 -0
- package/dist/commands/_lib/apply/apply-cmd.d.ts.map +1 -1
- package/dist/commands/_lib/apply/apply-cmd.js +26 -0
- package/dist/commands/_lib/apply/apply-cmd.js.map +1 -1
- package/dist/commands/_lib/apply/client.d.ts +1 -1
- package/dist/commands/_lib/apply/client.d.ts.map +1 -1
- package/dist/commands/_lib/apply/desired-state.js +6 -6
- package/dist/commands/_lib/apply/desired-state.js.map +1 -1
- package/dist/commands/agent.d.ts +7 -0
- package/dist/commands/agent.d.ts.map +1 -1
- package/dist/commands/agent.js +65 -1
- package/dist/commands/agent.js.map +1 -1
- package/dist/commands/chat.d.ts +12 -9
- package/dist/commands/chat.d.ts.map +1 -1
- package/dist/commands/chat.js +117 -56
- package/dist/commands/chat.js.map +1 -1
- package/dist/commands/dev.d.ts +15 -7
- package/dist/commands/dev.d.ts.map +1 -1
- package/dist/commands/dev.js +79 -44
- package/dist/commands/dev.js.map +1 -1
- package/dist/commands/doctor.d.ts +1 -0
- package/dist/commands/doctor.d.ts.map +1 -1
- package/dist/commands/doctor.js +136 -0
- package/dist/commands/doctor.js.map +1 -1
- package/dist/commands/eval.d.ts +8 -0
- package/dist/commands/eval.d.ts.map +1 -1
- package/dist/commands/eval.js +56 -1
- package/dist/commands/eval.js.map +1 -1
- package/dist/commands/init.d.ts +20 -5
- package/dist/commands/init.d.ts.map +1 -1
- package/dist/commands/init.js +332 -183
- package/dist/commands/init.js.map +1 -1
- package/dist/commands/link.d.ts +11 -0
- package/dist/commands/link.d.ts.map +1 -0
- package/dist/commands/link.js +28 -0
- package/dist/commands/link.js.map +1 -0
- package/dist/commands/login.d.ts.map +1 -1
- package/dist/commands/login.js +14 -2
- package/dist/commands/login.js.map +1 -1
- package/dist/commands/memory/_lib/browser-auth-cmd.d.ts.map +1 -1
- package/dist/commands/memory/_lib/browser-auth-cmd.js +4 -4
- package/dist/commands/memory/_lib/browser-auth-cmd.js.map +1 -1
- package/dist/commands/memory/_lib/install-targets.d.ts.map +1 -1
- package/dist/commands/memory/_lib/install-targets.js +1 -5
- package/dist/commands/memory/_lib/install-targets.js.map +1 -1
- package/dist/commands/memory/_lib/mcp.d.ts +2 -2
- package/dist/commands/memory/_lib/mcp.d.ts.map +1 -1
- package/dist/commands/memory/_lib/mcp.js +24 -12
- package/dist/commands/memory/_lib/mcp.js.map +1 -1
- package/dist/commands/memory/_lib/openclaw-auth.d.ts +1 -0
- package/dist/commands/memory/_lib/openclaw-auth.d.ts.map +1 -1
- package/dist/commands/memory/_lib/openclaw-auth.js +14 -3
- package/dist/commands/memory/_lib/openclaw-auth.js.map +1 -1
- package/dist/commands/memory/_lib/openclaw-cmd.js +1 -1
- package/dist/commands/memory/_lib/openclaw-cmd.js.map +1 -1
- package/dist/commands/memory/_lib/schema.d.ts +2 -2
- package/dist/commands/memory/_lib/schema.d.ts.map +1 -1
- package/dist/commands/memory/_lib/schema.js +3 -3
- package/dist/commands/memory/_lib/schema.js.map +1 -1
- package/dist/commands/memory/_lib/seed-cmd.d.ts.map +1 -1
- package/dist/commands/memory/_lib/seed-cmd.js +5 -6
- package/dist/commands/memory/_lib/seed-cmd.js.map +1 -1
- package/dist/commands/memory/run.d.ts.map +1 -1
- package/dist/commands/memory/run.js +2 -2
- package/dist/commands/memory/run.js.map +1 -1
- package/dist/commands/platforms/platform-prompts.d.ts +0 -1
- package/dist/commands/platforms/platform-prompts.d.ts.map +1 -1
- package/dist/commands/platforms/platform-prompts.js +54 -8
- package/dist/commands/platforms/platform-prompts.js.map +1 -1
- package/dist/commands/telemetry.d.ts +10 -0
- package/dist/commands/telemetry.d.ts.map +1 -0
- package/dist/commands/telemetry.js +68 -0
- package/dist/commands/telemetry.js.map +1 -0
- package/dist/commands/whoami.d.ts.map +1 -1
- package/dist/commands/whoami.js +1 -1
- package/dist/commands/whoami.js.map +1 -1
- package/dist/connectors/README.md +534 -0
- package/dist/connectors/__tests__/browser-scraper-utils.test.ts +186 -0
- package/dist/connectors/browser-scraper-utils.ts +214 -0
- package/dist/connectors/capterra.ts +273 -0
- package/dist/connectors/g2.ts +286 -0
- package/dist/connectors/github.ts +1553 -0
- package/dist/connectors/glassdoor.ts +291 -0
- package/dist/connectors/gmaps.ts +197 -0
- package/dist/connectors/google_calendar.ts +631 -0
- package/dist/connectors/google_gmail.ts +751 -0
- package/dist/connectors/google_photos.ts +776 -0
- package/dist/connectors/google_play.ts +342 -0
- package/dist/connectors/hackernews.ts +471 -0
- package/dist/connectors/index.ts +23 -0
- package/dist/connectors/ios_appstore.ts +226 -0
- package/dist/connectors/linkedin.ts +471 -0
- package/dist/connectors/microsoft_outlook.ts +410 -0
- package/dist/connectors/producthunt.ts +471 -0
- package/dist/connectors/reddit.ts +600 -0
- package/dist/connectors/rss.ts +448 -0
- package/dist/connectors/spotify.ts +590 -0
- package/dist/connectors/trustpilot.ts +199 -0
- package/dist/connectors/website.ts +629 -0
- package/dist/connectors/whatsapp.ts +1073 -0
- package/dist/connectors/x.ts +526 -0
- package/dist/connectors/youtube.ts +666 -0
- package/dist/db/migrations/00000000000000_baseline.sql +4867 -0
- package/dist/db/migrations/20260405193000_add_mcp_sessions.sql +33 -0
- package/dist/db/migrations/20260408120000_remove_system_connectors.sql +48 -0
- package/dist/db/migrations/20260408120001_optional_compiled_code.sql +6 -0
- package/dist/db/migrations/20260409110000_add_active_watcher_run_index.sql +9 -0
- package/dist/db/migrations/20260409130000_connector_default_config.sql +5 -0
- package/dist/db/migrations/20260410120000_add_agent_secrets.sql +25 -0
- package/dist/db/migrations/20260413170000_add_watcher_group_id.sql +67 -0
- package/dist/db/migrations/20260416120000_add_entity_wa_jid_index.sql +14 -0
- package/dist/db/migrations/20260417100000_add_entity_identities.sql +77 -0
- package/dist/db/migrations/20260418100000_add_auth_runs.sql +83 -0
- package/dist/db/migrations/20260418110000_add_runs_created_by_user.sql +18 -0
- package/dist/db/migrations/20260419120000_add_event_identity_indexes.sql +56 -0
- package/dist/db/migrations/20260420120000_extend_reserved_org_slugs.sql +56 -0
- package/dist/db/migrations/20260424030000_add_watcher_run_correlation.sql +52 -0
- package/dist/db/migrations/20260424130000_relax_events_client_id_fk.sql +47 -0
- package/dist/db/migrations/20260425100000_normalize_watcher_feedback.sql +91 -0
- package/dist/db/migrations/20260425120000_add_run_diagnostics.sql +20 -0
- package/dist/db/migrations/20260425130000_add_repair_agent_plumbing.sql +46 -0
- package/dist/db/migrations/20260426120000_entities_entity_type_fk.sql +101 -0
- package/dist/db/migrations/20260426130000_db_integrity_cleanup.sql +104 -0
- package/dist/db/migrations/20260426130001_db_integrity_cleanup_concurrent.sql +187 -0
- package/dist/db/migrations/20260427133000_events_created_by_nullable.sql +74 -0
- package/dist/db/migrations/20260427140000_identity_engine_indexes.sql +140 -0
- package/dist/db/migrations/20260427150000_drop_events_source_id.sql +177 -0
- package/dist/db/migrations/20260427160000_drop_dead_schema.sql +76 -0
- package/dist/db/migrations/20260427170000_market_founder_to_member.sql +364 -0
- package/dist/db/migrations/20260428040000_cascade_events_watchers_org_fk.sql +66 -0
- package/dist/db/migrations/20260428050000_add_runs_approved_input.sql +9 -0
- package/dist/db/migrations/20260429010000_auth_profile_tenant_scoped_fk.sql +79 -0
- package/dist/db/migrations/20260429060000_extend_runs_for_lobu_queue.sql +108 -0
- package/dist/db/migrations/20260429120000_agent_changed_notify.sql +97 -0
- package/dist/db/migrations/20260429120100_user_auth_profiles_and_model_prefs.sql +36 -0
- package/dist/db/migrations/20260429120200_fix_notify_old_keys.sql +130 -0
- package/dist/db/migrations/20260429130000_oauth_states_cli_sessions_rate_limits.sql +83 -0
- package/dist/db/migrations/20260429140000_phase8_grants_chat_connections_mcp_sessions.sql +84 -0
- package/dist/db/migrations/20260429140100_runs_priority_expires_at_retry_delay.sql +44 -0
- package/dist/db/migrations/20260429180000_drop_invalidatable_cache_triggers.sql +25 -0
- package/dist/db/migrations/20260430005614_agents_apply_fields.sql +21 -0
- package/dist/db/migrations/20260430022231_fix_connection_config_encryption.sql +69 -0
- package/dist/db/migrations/20260430151215_add_task_run_type.sql +77 -0
- package/dist/db/migrations/20260501000000_drop_cli_sessions.sql +27 -0
- package/dist/db/migrations/20260501133000_lobu_memory_mcp_id.sql +117 -0
- package/dist/db/migrations/20260502000000_drop_chat_connections.sql +60 -0
- package/dist/db/migrations/20260503000000_agent_secrets_org_scope.sql +56 -0
- package/dist/db/migrations/20260504000000_flatten_agents_drop_sandbox_model.sql +48 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +147 -23
- package/dist/index.js.map +1 -1
- package/dist/internal/api-client.d.ts +4 -8
- package/dist/internal/api-client.d.ts.map +1 -1
- package/dist/internal/api-client.js +1 -1
- package/dist/internal/api-client.js.map +1 -1
- package/dist/internal/context.js +2 -2
- package/dist/internal/context.js.map +1 -1
- package/dist/internal/credentials.d.ts.map +1 -1
- package/dist/internal/credentials.js +6 -1
- package/dist/internal/credentials.js.map +1 -1
- package/dist/internal/index.d.ts +2 -3
- package/dist/internal/index.d.ts.map +1 -1
- package/dist/internal/index.js +2 -2
- package/dist/internal/index.js.map +1 -1
- package/dist/internal/oauth.d.ts +7 -6
- package/dist/internal/oauth.d.ts.map +1 -1
- package/dist/internal/oauth.js +3 -3
- package/dist/internal/project-link.d.ts +10 -0
- package/dist/internal/project-link.d.ts.map +1 -0
- package/dist/internal/project-link.js +48 -0
- package/dist/internal/project-link.js.map +1 -0
- package/dist/providers.json +2 -2
- package/dist/server.bundle.mjs +3173 -4404
- package/dist/start-local.bundle.mjs +71481 -0
- package/dist/templates/README.md.tmpl +10 -11
- package/package.json +14 -12
- package/dist/__tests__/chat.integration.test.d.ts +0 -2
- package/dist/__tests__/chat.integration.test.d.ts.map +0 -1
- package/dist/__tests__/chat.integration.test.js +0 -337
- package/dist/__tests__/chat.integration.test.js.map +0 -1
- package/dist/__tests__/dev.test.d.ts +0 -2
- package/dist/__tests__/dev.test.d.ts.map +0 -1
- package/dist/__tests__/dev.test.js +0 -25
- package/dist/__tests__/dev.test.js.map +0 -1
- package/dist/__tests__/init-memory.test.d.ts +0 -2
- package/dist/__tests__/init-memory.test.d.ts.map +0 -1
- package/dist/__tests__/init-memory.test.js +0 -45
- package/dist/__tests__/init-memory.test.js.map +0 -1
- package/dist/__tests__/token.test.d.ts +0 -2
- package/dist/__tests__/token.test.d.ts.map +0 -1
- package/dist/__tests__/token.test.js +0 -52
- package/dist/__tests__/token.test.js.map +0 -1
- package/dist/commands/_lib/apply/__tests__/client.test.d.ts +0 -2
- package/dist/commands/_lib/apply/__tests__/client.test.d.ts.map +0 -1
- package/dist/commands/_lib/apply/__tests__/client.test.js +0 -23
- package/dist/commands/_lib/apply/__tests__/client.test.js.map +0 -1
- package/dist/commands/_lib/apply/__tests__/desired-state.test.d.ts +0 -2
- package/dist/commands/_lib/apply/__tests__/desired-state.test.d.ts.map +0 -1
- package/dist/commands/_lib/apply/__tests__/desired-state.test.js +0 -140
- package/dist/commands/_lib/apply/__tests__/desired-state.test.js.map +0 -1
- package/dist/commands/_lib/apply/__tests__/diff.test.d.ts +0 -2
- package/dist/commands/_lib/apply/__tests__/diff.test.d.ts.map +0 -1
- package/dist/commands/_lib/apply/__tests__/diff.test.js +0 -378
- package/dist/commands/_lib/apply/__tests__/diff.test.js.map +0 -1
- package/dist/commands/apply.d.ts +0 -3
- package/dist/commands/apply.d.ts.map +0 -1
- package/dist/commands/apply.js +0 -5
- package/dist/commands/apply.js.map +0 -1
- package/dist/commands/memory/_lib/openclaw-auth.test.d.ts +0 -2
- package/dist/commands/memory/_lib/openclaw-auth.test.d.ts.map +0 -1
- package/dist/commands/memory/_lib/openclaw-auth.test.js +0 -9
- package/dist/commands/memory/_lib/openclaw-auth.test.js.map +0 -1
- package/dist/internal/__tests__/api-client.test.d.ts +0 -2
- package/dist/internal/__tests__/api-client.test.d.ts.map +0 -1
- package/dist/internal/__tests__/api-client.test.js +0 -95
- package/dist/internal/__tests__/api-client.test.js.map +0 -1
- package/dist/internal/__tests__/context.test.d.ts +0 -2
- package/dist/internal/__tests__/context.test.d.ts.map +0 -1
- package/dist/internal/__tests__/context.test.js +0 -77
- package/dist/internal/__tests__/context.test.js.map +0 -1
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Glassdoor Connector (V1 runtime)
|
|
3
|
+
*
|
|
4
|
+
* Scrapes employee reviews from Glassdoor using Playwright.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { createHash } from 'node:crypto';
|
|
8
|
+
import {
|
|
9
|
+
type ActionContext,
|
|
10
|
+
type ActionResult,
|
|
11
|
+
type ConnectorDefinition,
|
|
12
|
+
ConnectorRuntime,
|
|
13
|
+
calculateEngagementScore,
|
|
14
|
+
type EventEnvelope,
|
|
15
|
+
type SyncContext,
|
|
16
|
+
type SyncResult,
|
|
17
|
+
} from '@lobu/connector-sdk';
|
|
18
|
+
import {
|
|
19
|
+
handleCookieConsent,
|
|
20
|
+
openStealthBrowser,
|
|
21
|
+
validateUrlDomain,
|
|
22
|
+
withBrowserErrorCapture,
|
|
23
|
+
} from './browser-scraper-utils.ts';
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Generates a deterministic external ID for a Glassdoor review.
|
|
27
|
+
* Uses the native review ID from the DOM when available, otherwise
|
|
28
|
+
* derives a stable hash from review content to avoid duplicates.
|
|
29
|
+
*/
|
|
30
|
+
function deriveReviewExternalId(companyName: string, review: GlassdoorReview): string {
|
|
31
|
+
if (review.id) return review.id;
|
|
32
|
+
|
|
33
|
+
const contentKey = [
|
|
34
|
+
review.date,
|
|
35
|
+
review.author,
|
|
36
|
+
(review.title || review.pros || review.cons).slice(0, 80),
|
|
37
|
+
]
|
|
38
|
+
.filter(Boolean)
|
|
39
|
+
.join('|');
|
|
40
|
+
|
|
41
|
+
const hash = createHash('sha256').update(contentKey).digest('hex').slice(0, 12);
|
|
42
|
+
const slug = companyName.toLowerCase().replace(/[^a-z0-9]+/g, '-');
|
|
43
|
+
return `glassdoor-${slug}-${hash}`;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Raw review data extracted from a Glassdoor page
|
|
48
|
+
*/
|
|
49
|
+
interface GlassdoorReview {
|
|
50
|
+
id: string;
|
|
51
|
+
rating: number;
|
|
52
|
+
title: string;
|
|
53
|
+
pros: string;
|
|
54
|
+
cons: string;
|
|
55
|
+
date: string;
|
|
56
|
+
author: string;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
interface GlassdoorConfig {
|
|
60
|
+
company_name: string;
|
|
61
|
+
company_id?: string;
|
|
62
|
+
lookback_days?: number;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export default class GlassdoorConnector extends ConnectorRuntime {
|
|
66
|
+
readonly definition: ConnectorDefinition = {
|
|
67
|
+
key: 'glassdoor',
|
|
68
|
+
name: 'Glassdoor',
|
|
69
|
+
description: 'Scrapes employee reviews from Glassdoor.',
|
|
70
|
+
version: '1.0.0',
|
|
71
|
+
faviconDomain: 'glassdoor.com',
|
|
72
|
+
authSchema: {
|
|
73
|
+
methods: [{ type: 'none' }],
|
|
74
|
+
},
|
|
75
|
+
feeds: {
|
|
76
|
+
reviews: {
|
|
77
|
+
key: 'reviews',
|
|
78
|
+
name: 'Employee Reviews',
|
|
79
|
+
description: 'Scrapes employee reviews for a given company.',
|
|
80
|
+
configSchema: {
|
|
81
|
+
type: 'object',
|
|
82
|
+
required: ['company_name'],
|
|
83
|
+
properties: {
|
|
84
|
+
company_name: {
|
|
85
|
+
type: 'string',
|
|
86
|
+
minLength: 1,
|
|
87
|
+
description: 'Company name for search-based lookup',
|
|
88
|
+
},
|
|
89
|
+
company_id: {
|
|
90
|
+
type: 'string',
|
|
91
|
+
description: 'Glassdoor company ID if known',
|
|
92
|
+
},
|
|
93
|
+
lookback_days: {
|
|
94
|
+
type: 'integer',
|
|
95
|
+
minimum: 1,
|
|
96
|
+
maximum: 730,
|
|
97
|
+
default: 365,
|
|
98
|
+
description:
|
|
99
|
+
'Number of days to look back for historical data. Default: 365 (1 year). Maximum: 730 (2 years).',
|
|
100
|
+
},
|
|
101
|
+
},
|
|
102
|
+
},
|
|
103
|
+
eventKinds: {
|
|
104
|
+
review: {
|
|
105
|
+
description: 'A Glassdoor employee review',
|
|
106
|
+
metadataSchema: {
|
|
107
|
+
type: 'object',
|
|
108
|
+
properties: {
|
|
109
|
+
rating: { type: 'number', description: 'Overall rating (0-5)' },
|
|
110
|
+
title: { type: 'string', description: 'Review headline' },
|
|
111
|
+
pros: { type: 'string' },
|
|
112
|
+
cons: { type: 'string' },
|
|
113
|
+
},
|
|
114
|
+
},
|
|
115
|
+
},
|
|
116
|
+
},
|
|
117
|
+
},
|
|
118
|
+
},
|
|
119
|
+
optionsSchema: {
|
|
120
|
+
type: 'object',
|
|
121
|
+
required: ['company_name'],
|
|
122
|
+
properties: {
|
|
123
|
+
company_name: {
|
|
124
|
+
type: 'string',
|
|
125
|
+
minLength: 1,
|
|
126
|
+
description: 'Company name for search-based lookup',
|
|
127
|
+
},
|
|
128
|
+
company_id: {
|
|
129
|
+
type: 'string',
|
|
130
|
+
description: 'Glassdoor company ID if known',
|
|
131
|
+
},
|
|
132
|
+
lookback_days: {
|
|
133
|
+
type: 'integer',
|
|
134
|
+
minimum: 1,
|
|
135
|
+
maximum: 730,
|
|
136
|
+
default: 365,
|
|
137
|
+
description:
|
|
138
|
+
'Number of days to look back for historical data. Default: 365 (1 year). Maximum: 730 (2 years).',
|
|
139
|
+
},
|
|
140
|
+
},
|
|
141
|
+
},
|
|
142
|
+
};
|
|
143
|
+
|
|
144
|
+
async sync(ctx: SyncContext): Promise<SyncResult> {
|
|
145
|
+
const config = ctx.config as GlassdoorConfig;
|
|
146
|
+
const { company_name, company_id } = config;
|
|
147
|
+
|
|
148
|
+
if (!company_name) {
|
|
149
|
+
return {
|
|
150
|
+
events: [],
|
|
151
|
+
checkpoint: ctx.checkpoint,
|
|
152
|
+
metadata: { items_found: 0, error: 'company_name is required' },
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
const baseUrl = company_id
|
|
157
|
+
? `https://www.glassdoor.com/Reviews/company-reviews-${company_id}.htm`
|
|
158
|
+
: `https://www.glassdoor.com/Reviews/${company_name}-reviews-SRCH_KE0.htm`;
|
|
159
|
+
validateUrlDomain(baseUrl, 'glassdoor.com');
|
|
160
|
+
|
|
161
|
+
const session = await openStealthBrowser({ cdpUrl: 'auto' });
|
|
162
|
+
|
|
163
|
+
return withBrowserErrorCapture(session, 'glassdoor-sync', async (page) => {
|
|
164
|
+
// Configure viewport and user-agent to mimic a real browser
|
|
165
|
+
await page.setViewportSize({ width: 1920, height: 1080 });
|
|
166
|
+
await page.setExtraHTTPHeaders({
|
|
167
|
+
'User-Agent':
|
|
168
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
169
|
+
});
|
|
170
|
+
|
|
171
|
+
// Navigate to the reviews page
|
|
172
|
+
await page.goto(baseUrl, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
173
|
+
|
|
174
|
+
await handleCookieConsent(page, '#onetrust-accept-btn-handler');
|
|
175
|
+
|
|
176
|
+
// Human-like delay before interacting with the page
|
|
177
|
+
await page.waitForTimeout(2000);
|
|
178
|
+
|
|
179
|
+
// Wait for review elements to render
|
|
180
|
+
try {
|
|
181
|
+
await page.waitForSelector(
|
|
182
|
+
'[data-test="review-list-item"], .empReview, [data-test="employerReview"]',
|
|
183
|
+
{ timeout: 10000 }
|
|
184
|
+
);
|
|
185
|
+
} catch {
|
|
186
|
+
// Reviews may not be present (auth wall, empty page, etc.)
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// Extract raw reviews from the page DOM
|
|
190
|
+
const rawReviews = await page.evaluate((): GlassdoorReview[] => {
|
|
191
|
+
// Try multiple selector strategies as Glassdoor frequently changes their HTML
|
|
192
|
+
const reviewElements =
|
|
193
|
+
Array.from(document.querySelectorAll('[data-test="review-list-item"]')).length > 0
|
|
194
|
+
? Array.from(document.querySelectorAll('[data-test="review-list-item"]'))
|
|
195
|
+
: Array.from(document.querySelectorAll('.empReview')).length > 0
|
|
196
|
+
? Array.from(document.querySelectorAll('.empReview'))
|
|
197
|
+
: Array.from(document.querySelectorAll('[data-test="employerReview"]'));
|
|
198
|
+
|
|
199
|
+
return reviewElements.map((el: Element) => {
|
|
200
|
+
// Try multiple selector patterns for each field
|
|
201
|
+
const ratingEl =
|
|
202
|
+
el.querySelector('[data-test="overall-rating"]') ||
|
|
203
|
+
el.querySelector('.rating') ||
|
|
204
|
+
el.querySelector('[class*="rating"]');
|
|
205
|
+
|
|
206
|
+
const titleEl =
|
|
207
|
+
el.querySelector('[data-test="review-title"]') ||
|
|
208
|
+
el.querySelector('.reviewLink') ||
|
|
209
|
+
el.querySelector('[class*="title"]');
|
|
210
|
+
|
|
211
|
+
const prosEl =
|
|
212
|
+
el.querySelector('[data-test="pros"]') ||
|
|
213
|
+
el.querySelector('[data-pros]') ||
|
|
214
|
+
el.querySelector('.pros');
|
|
215
|
+
|
|
216
|
+
const consEl =
|
|
217
|
+
el.querySelector('[data-test="cons"]') ||
|
|
218
|
+
el.querySelector('[data-cons]') ||
|
|
219
|
+
el.querySelector('.cons');
|
|
220
|
+
|
|
221
|
+
const dateEl =
|
|
222
|
+
el.querySelector('[data-test="review-date"]') ||
|
|
223
|
+
el.querySelector('.date') ||
|
|
224
|
+
el.querySelector('time');
|
|
225
|
+
|
|
226
|
+
const authorEl =
|
|
227
|
+
el.querySelector('[data-test="employee-info"]') ||
|
|
228
|
+
el.querySelector('.authorInfo') ||
|
|
229
|
+
el.querySelector('[class*="author"]');
|
|
230
|
+
|
|
231
|
+
// Try to get review ID from various attributes
|
|
232
|
+
const reviewId =
|
|
233
|
+
(el as HTMLElement).getAttribute('data-review-id') ||
|
|
234
|
+
(el as HTMLElement).getAttribute('id') ||
|
|
235
|
+
(el as HTMLElement).getAttribute('data-id') ||
|
|
236
|
+
'';
|
|
237
|
+
|
|
238
|
+
return {
|
|
239
|
+
id: reviewId,
|
|
240
|
+
rating: parseFloat(ratingEl?.textContent?.trim() || '0'),
|
|
241
|
+
title: titleEl?.textContent?.trim() || '',
|
|
242
|
+
pros: prosEl?.textContent?.trim() || '',
|
|
243
|
+
cons: consEl?.textContent?.trim() || '',
|
|
244
|
+
date: dateEl?.getAttribute('datetime') || dateEl?.textContent?.trim() || '',
|
|
245
|
+
author: authorEl?.textContent?.trim() || '',
|
|
246
|
+
};
|
|
247
|
+
});
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
// Filter reviews that have at least pros or cons
|
|
251
|
+
const validReviews = rawReviews.filter((r) => Boolean(r.pros || r.cons));
|
|
252
|
+
|
|
253
|
+
// Transform to EventEnvelope format
|
|
254
|
+
const events: EventEnvelope[] = validReviews.map((review) => {
|
|
255
|
+
const externalId = deriveReviewExternalId(company_name, review);
|
|
256
|
+
const content = `${review.title}\n\nPros: ${review.pros}\n\nCons: ${review.cons}`;
|
|
257
|
+
|
|
258
|
+
return {
|
|
259
|
+
origin_id: externalId,
|
|
260
|
+
payload_text: content,
|
|
261
|
+
author_name: review.author || undefined,
|
|
262
|
+
occurred_at: review.date ? new Date(review.date) : new Date(),
|
|
263
|
+
origin_type: 'review',
|
|
264
|
+
score: calculateEngagementScore('glassdoor', { rating: review.rating }),
|
|
265
|
+
source_url: `${baseUrl}#review_${review.id}`,
|
|
266
|
+
metadata: {
|
|
267
|
+
rating: review.rating,
|
|
268
|
+
title: review.title,
|
|
269
|
+
pros: review.pros,
|
|
270
|
+
cons: review.cons,
|
|
271
|
+
},
|
|
272
|
+
};
|
|
273
|
+
});
|
|
274
|
+
|
|
275
|
+
return {
|
|
276
|
+
events,
|
|
277
|
+
checkpoint: {
|
|
278
|
+
last_sync_at: new Date().toISOString(),
|
|
279
|
+
} as Record<string, unknown>,
|
|
280
|
+
metadata: {
|
|
281
|
+
items_found: events.length,
|
|
282
|
+
items_skipped: rawReviews.length - validReviews.length,
|
|
283
|
+
},
|
|
284
|
+
};
|
|
285
|
+
});
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
async execute(_ctx: ActionContext): Promise<ActionResult> {
|
|
289
|
+
return { success: false, error: 'Actions not supported' };
|
|
290
|
+
}
|
|
291
|
+
}
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Google Maps Connector (V1 runtime)
|
|
3
|
+
*
|
|
4
|
+
* Fetches business reviews using Google Places API.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import {
|
|
8
|
+
type ActionContext,
|
|
9
|
+
type ActionResult,
|
|
10
|
+
type ConnectorDefinition,
|
|
11
|
+
ConnectorRuntime,
|
|
12
|
+
calculateEngagementScore,
|
|
13
|
+
type EventEnvelope,
|
|
14
|
+
type SyncContext,
|
|
15
|
+
type SyncResult,
|
|
16
|
+
} from '@lobu/connector-sdk';
|
|
17
|
+
import { filterByCheckpoint } from './browser-scraper-utils.ts';
|
|
18
|
+
|
|
19
|
+
interface GMapsReview {
|
|
20
|
+
author_name: string;
|
|
21
|
+
author_url?: string;
|
|
22
|
+
profile_photo_url?: string;
|
|
23
|
+
rating: number;
|
|
24
|
+
relative_time_description?: string;
|
|
25
|
+
text: string;
|
|
26
|
+
time: number;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
interface PlaceDetailsResponse {
|
|
30
|
+
status: string;
|
|
31
|
+
result?: {
|
|
32
|
+
name?: string;
|
|
33
|
+
reviews?: GMapsReview[];
|
|
34
|
+
url?: string;
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
interface FindPlaceResponse {
|
|
39
|
+
candidates?: Array<{ place_id: string }>;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
interface GMapsCheckpoint {
|
|
43
|
+
last_timestamp?: string;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const configSchema = {
|
|
47
|
+
type: 'object',
|
|
48
|
+
properties: {
|
|
49
|
+
place_id: {
|
|
50
|
+
type: 'string',
|
|
51
|
+
description: 'Google Place ID',
|
|
52
|
+
},
|
|
53
|
+
business_name: {
|
|
54
|
+
type: 'string',
|
|
55
|
+
description: 'Business name for search-based fallback',
|
|
56
|
+
},
|
|
57
|
+
},
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
export default class GoogleMapsConnector extends ConnectorRuntime {
|
|
61
|
+
readonly definition: ConnectorDefinition = {
|
|
62
|
+
key: 'gmaps',
|
|
63
|
+
name: 'Google Maps',
|
|
64
|
+
description: 'Fetches business reviews using Google Places API.',
|
|
65
|
+
version: '1.0.0',
|
|
66
|
+
faviconDomain: 'maps.google.com',
|
|
67
|
+
authSchema: {
|
|
68
|
+
methods: [
|
|
69
|
+
{
|
|
70
|
+
type: 'env_keys',
|
|
71
|
+
required: true,
|
|
72
|
+
fields: [
|
|
73
|
+
{
|
|
74
|
+
key: 'GOOGLE_MAPS_API_KEY',
|
|
75
|
+
label: 'Google Maps API Key',
|
|
76
|
+
secret: true,
|
|
77
|
+
},
|
|
78
|
+
],
|
|
79
|
+
},
|
|
80
|
+
],
|
|
81
|
+
},
|
|
82
|
+
feeds: {
|
|
83
|
+
reviews: {
|
|
84
|
+
key: 'reviews',
|
|
85
|
+
name: 'Business Reviews',
|
|
86
|
+
description: 'Fetch reviews for a business on Google Maps.',
|
|
87
|
+
configSchema,
|
|
88
|
+
eventKinds: {
|
|
89
|
+
review: {
|
|
90
|
+
description: 'A Google Maps business review',
|
|
91
|
+
metadataSchema: {
|
|
92
|
+
type: 'object',
|
|
93
|
+
properties: {
|
|
94
|
+
rating: { type: 'number', description: 'Star rating (1-5)' },
|
|
95
|
+
author_url: { type: 'string', format: 'uri' },
|
|
96
|
+
profile_photo_url: { type: 'string', format: 'uri' },
|
|
97
|
+
relative_time_description: { type: 'string' },
|
|
98
|
+
},
|
|
99
|
+
},
|
|
100
|
+
},
|
|
101
|
+
},
|
|
102
|
+
},
|
|
103
|
+
},
|
|
104
|
+
optionsSchema: configSchema,
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
async sync(ctx: SyncContext): Promise<SyncResult> {
|
|
108
|
+
const apiKey = ctx.config.GOOGLE_MAPS_API_KEY as string | undefined;
|
|
109
|
+
if (!apiKey) {
|
|
110
|
+
throw new Error('GOOGLE_MAPS_API_KEY is required');
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
let placeId = ctx.config.place_id as string | undefined;
|
|
114
|
+
const businessName = ctx.config.business_name as string | undefined;
|
|
115
|
+
|
|
116
|
+
if (!placeId && !businessName) {
|
|
117
|
+
throw new Error('Either place_id or business_name is required');
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// If no place_id, search by business name
|
|
121
|
+
if (!placeId && businessName) {
|
|
122
|
+
const searchUrl = `https://maps.googleapis.com/maps/api/place/findplacefromtext/json?input=${encodeURIComponent(businessName)}&inputtype=textquery&fields=place_id&key=${apiKey}`;
|
|
123
|
+
const searchResponse = await fetch(searchUrl);
|
|
124
|
+
if (!searchResponse.ok) {
|
|
125
|
+
throw new Error(
|
|
126
|
+
`Google Places search failed (${searchResponse.status}): ${await searchResponse.text()}`
|
|
127
|
+
);
|
|
128
|
+
}
|
|
129
|
+
const searchData = (await searchResponse.json()) as FindPlaceResponse;
|
|
130
|
+
if (!searchData.candidates || searchData.candidates.length === 0) {
|
|
131
|
+
throw new Error(`Business not found: ${businessName}`);
|
|
132
|
+
}
|
|
133
|
+
placeId = searchData.candidates[0].place_id;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Fetch place details with reviews
|
|
137
|
+
const detailsUrl = `https://maps.googleapis.com/maps/api/place/details/json?place_id=${placeId}&fields=name,reviews,url&key=${apiKey}`;
|
|
138
|
+
const detailsResponse = await fetch(detailsUrl);
|
|
139
|
+
if (!detailsResponse.ok) {
|
|
140
|
+
throw new Error(
|
|
141
|
+
`Google Places details failed (${detailsResponse.status}): ${await detailsResponse.text()}`
|
|
142
|
+
);
|
|
143
|
+
}
|
|
144
|
+
const data = (await detailsResponse.json()) as PlaceDetailsResponse;
|
|
145
|
+
|
|
146
|
+
if (data.status !== 'OK') {
|
|
147
|
+
throw new Error(`Google Places API error: ${data.status}`);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
const place = data.result;
|
|
151
|
+
const reviews = place?.reviews ?? [];
|
|
152
|
+
const placeUrl = place?.url ?? `https://maps.google.com/?q=place_id:${placeId}`;
|
|
153
|
+
|
|
154
|
+
// Transform reviews to EventEnvelope[] — skip reviews without text
|
|
155
|
+
let events: EventEnvelope[] = reviews
|
|
156
|
+
.filter((review) => review.text)
|
|
157
|
+
.map((review) => ({
|
|
158
|
+
origin_id: `${placeId}_${review.time}`,
|
|
159
|
+
payload_text: review.text,
|
|
160
|
+
author_name: review.author_name || undefined,
|
|
161
|
+
occurred_at: new Date(review.time * 1000),
|
|
162
|
+
origin_type: 'review',
|
|
163
|
+
source_url: placeUrl,
|
|
164
|
+
score: calculateEngagementScore('gmaps', { rating: review.rating }),
|
|
165
|
+
metadata: {
|
|
166
|
+
rating: review.rating,
|
|
167
|
+
author_url: review.author_url,
|
|
168
|
+
profile_photo_url: review.profile_photo_url,
|
|
169
|
+
relative_time_description: review.relative_time_description,
|
|
170
|
+
},
|
|
171
|
+
}));
|
|
172
|
+
|
|
173
|
+
// Filter by checkpoint
|
|
174
|
+
const checkpoint = ctx.checkpoint as GMapsCheckpoint | null;
|
|
175
|
+
events = filterByCheckpoint(events, checkpoint);
|
|
176
|
+
|
|
177
|
+
// Sort descending by occurred_at
|
|
178
|
+
events.sort((a, b) => b.occurred_at.getTime() - a.occurred_at.getTime());
|
|
179
|
+
|
|
180
|
+
const newCheckpoint: Record<string, unknown> =
|
|
181
|
+
events.length > 0
|
|
182
|
+
? { last_timestamp: events[0].occurred_at.toISOString() }
|
|
183
|
+
: { last_timestamp: checkpoint?.last_timestamp ?? null };
|
|
184
|
+
|
|
185
|
+
return {
|
|
186
|
+
events,
|
|
187
|
+
checkpoint: newCheckpoint,
|
|
188
|
+
metadata: {
|
|
189
|
+
items_found: reviews.length,
|
|
190
|
+
},
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
async execute(_ctx: ActionContext): Promise<ActionResult> {
|
|
195
|
+
return { success: false, error: 'Actions not supported' };
|
|
196
|
+
}
|
|
197
|
+
}
|