salesprompter-cli 0.1.29 → 0.1.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env node
2
2
  import { spawn } from "node:child_process";
3
- import { access, appendFile, mkdir, readFile, readdir, writeFile } from "node:fs/promises";
3
+ import { access, appendFile, mkdir, readFile, writeFile } from "node:fs/promises";
4
4
  import { createRequire } from "node:module";
5
5
  import os from "node:os";
6
6
  import path from "node:path";
@@ -33,9 +33,7 @@ import { buildSalesNavigatorHistoricalBackfillPlan, ensureSalesNavigatorPeopleCo
33
33
  const require = createRequire(import.meta.url);
34
34
  const { version: packageVersion } = require("../package.json");
35
35
  const program = new Command();
36
- const companyProvider = new HeuristicCompanyProvider();
37
- const peopleSearchProvider = new HeuristicPeopleSearchProvider();
38
- const leadProvider = new AccountLeadProvider(companyProvider, peopleSearchProvider);
36
+ const leadProvider = new AccountLeadProvider(new HeuristicCompanyProvider(), new HeuristicPeopleSearchProvider());
39
37
  const enrichmentProvider = new HeuristicEnrichmentProvider();
40
38
  const scoringProvider = new HeuristicScoringProvider();
41
39
  const syncProvider = new RoutedSyncProvider(new DryRunSyncProvider(), new InstantlySyncProvider());
@@ -96,22 +94,6 @@ const LinkedInCompanyBackfillStatusResponseSchema = z.object({
96
94
  failureCode: z.string().nullable().optional(),
97
95
  failureMessage: z.string().nullable().optional()
98
96
  });
99
- const PhantombusterContainersSyncResponseSchema = z.object({
100
- status: z.literal("ok"),
101
- agentIds: z.array(z.string().min(1)),
102
- agents: z.array(z.object({
103
- agentId: z.string().min(1),
104
- fetched: z.number().int().nonnegative(),
105
- upserted: z.number().int().nonnegative(),
106
- resultsSynced: z.number().int().nonnegative()
107
- })),
108
- fetched: z.number().int().nonnegative(),
109
- upserted: z.number().int().nonnegative(),
110
- resultsSynced: z.number().int().nonnegative(),
111
- outputsStored: z.number().int().nonnegative(),
112
- resultObjectsStored: z.number().int().nonnegative(),
113
- resultRowsStored: z.number().int().nonnegative()
114
- });
115
97
  const CliEmailEnrichmentCompaniesResponseSchema = z.object({
116
98
  clientId: z.number().int().positive(),
117
99
  companies: z.array(z.object({
@@ -939,13 +921,6 @@ function splitLookupFullName(fullName) {
939
921
  function buildSyntheticLookupEmail(contactId) {
940
922
  return `linkedin-lookup+${contactId}@salesprompter.invalid`;
941
923
  }
942
- function normalizeLinkedInLookupField(value) {
943
- if (value == null) {
944
- return undefined;
945
- }
946
- const normalized = normalizeLookupWhitespace(String(value));
947
- return normalized || undefined;
948
- }
949
924
  function looksLikeLookupCompanyRow(fullName, companyName) {
950
925
  const fullNameComparable = normalizeLooseMatchText(fullName);
951
926
  const companyComparable = normalizeLooseMatchText(companyName);
@@ -965,32 +940,19 @@ function parseLinkedInUrlLookupInput(content) {
965
940
  const parsed = z
966
941
  .array(z.object({
967
942
  clientId: z.union([z.string(), z.number()]).nullish(),
968
- contactId: z.union([z.string(), z.number()]).nullish(),
969
- companyId: z.union([z.string(), z.number()]).nullish(),
970
943
  fullName: z.string().nullish(),
971
944
  companyName: z.string().nullish(),
972
945
  email: z.string().nullish(),
973
- contact_email: z.string().nullish(),
974
- jobTitle: z.string().nullish(),
975
- jobtitle: z.string().nullish(),
976
- title: z.string().nullish(),
977
- linkedin_company_url: z.string().nullish(),
978
- linkedinCompanyUrl: z.string().nullish(),
979
- deep_dive_recommended_role: z.string().nullish(),
980
- deepDiveRecommendedRole: z.string().nullish()
946
+ jobTitle: z.string().nullish()
981
947
  }))
982
948
  .parse(JSON.parse(trimmed));
983
949
  return parsed
984
950
  .map((row) => ({
985
951
  clientId: row.clientId == null ? null : String(row.clientId).trim() || null,
986
- contactId: row.contactId == null ? undefined : String(row.contactId).trim() || undefined,
987
- companyId: row.companyId == null ? undefined : String(row.companyId).trim() || undefined,
988
952
  fullName: row.fullName?.trim() ?? "",
989
953
  companyName: row.companyName?.trim() ?? "",
990
- email: row.email?.trim() || row.contact_email?.trim() || undefined,
991
- jobTitle: row.jobTitle?.trim() || row.jobtitle?.trim() || row.title?.trim() || undefined,
992
- linkedinCompanyUrl: row.linkedin_company_url?.trim() || row.linkedinCompanyUrl?.trim() || undefined,
993
- deepDiveRecommendedRole: row.deep_dive_recommended_role?.trim() || row.deepDiveRecommendedRole?.trim() || undefined
954
+ email: row.email?.trim() || undefined,
955
+ jobTitle: row.jobTitle?.trim() || undefined
994
956
  }))
995
957
  .filter((row) => row.fullName.length > 0 || row.companyName.length > 0);
996
958
  }
@@ -1018,35 +980,17 @@ function parseLinkedInUrlLookupInput(content) {
1018
980
  ? headerValues.findIndex((value) => ["companyname", "company_name"].includes(value))
1019
981
  : 2;
1020
982
  const emailIndex = hasHeader ? headerValues.findIndex((value) => value === "email") : -1;
1021
- const contactEmailIndex = hasHeader ? headerValues.findIndex((value) => value === "contact_email") : -1;
1022
983
  const jobTitleIndex = hasHeader
1023
984
  ? headerValues.findIndex((value) => ["jobtitle", "job_title", "title"].includes(value))
1024
985
  : -1;
1025
- const contactIdIndex = hasHeader
1026
- ? headerValues.findIndex((value) => ["contactid", "contact_id", "hubspot_contact_id"].includes(value))
1027
- : -1;
1028
- const companyIdIndex = hasHeader
1029
- ? headerValues.findIndex((value) => ["companyid", "company_id", "hubspot_company_id"].includes(value))
1030
- : -1;
1031
- const linkedinCompanyUrlIndex = hasHeader
1032
- ? headerValues.findIndex((value) => ["linkedin_company_url", "linkedincompanyurl"].includes(value))
1033
- : -1;
1034
- const deepDiveRecommendedRoleIndex = hasHeader
1035
- ? headerValues.findIndex((value) => ["deep_dive_recommended_role", "deepdiverecommendedrole"].includes(value))
1036
- : -1;
1037
986
  return dataLines
1038
987
  .map((line) => splitLooseDelimitedLine(line, delimiter).map((value) => value.trim()))
1039
988
  .map((columns) => ({
1040
989
  clientId: clientIdIndex >= 0 ? columns[clientIdIndex] || null : null,
1041
- contactId: contactIdIndex >= 0 ? columns[contactIdIndex] || undefined : undefined,
1042
- companyId: companyIdIndex >= 0 ? columns[companyIdIndex] || undefined : undefined,
1043
990
  fullName: fullNameIndex >= 0 ? columns[fullNameIndex] || "" : "",
1044
991
  companyName: companyNameIndex >= 0 ? columns[companyNameIndex] || "" : "",
1045
- email: (emailIndex >= 0 ? columns[emailIndex] || undefined : undefined) ??
1046
- (contactEmailIndex >= 0 ? columns[contactEmailIndex] || undefined : undefined),
1047
- jobTitle: jobTitleIndex >= 0 ? columns[jobTitleIndex] || undefined : undefined,
1048
- linkedinCompanyUrl: linkedinCompanyUrlIndex >= 0 ? columns[linkedinCompanyUrlIndex] || undefined : undefined,
1049
- deepDiveRecommendedRole: deepDiveRecommendedRoleIndex >= 0 ? columns[deepDiveRecommendedRoleIndex] || undefined : undefined
992
+ email: emailIndex >= 0 ? columns[emailIndex] || undefined : undefined,
993
+ jobTitle: jobTitleIndex >= 0 ? columns[jobTitleIndex] || undefined : undefined
1050
994
  }))
1051
995
  .filter((row) => row.fullName.length > 0 || row.companyName.length > 0);
1052
996
  }
@@ -1101,7 +1045,7 @@ function parseLinkedInCompanyLookupInput(content) {
1101
1045
  }
1102
1046
  function toLinkedInUrlLookupContacts(rows, cleanedCompanyMap = new Map()) {
1103
1047
  return rows.flatMap((row, index) => {
1104
- const contactId = normalizeLinkedInLookupField(row.contactId) ?? String(index + 1);
1048
+ const contactId = String(index + 1);
1105
1049
  const syntheticEmail = row.email?.trim() || buildSyntheticLookupEmail(contactId);
1106
1050
  const rawCompanyName = normalizeLookupWhitespace(row.companyName);
1107
1051
  const cleanedCompanyName = normalizeLookupCompanyForSearch(cleanedCompanyMap.get(normalizeLookupCompanyForCleaning(rawCompanyName)) ?? rawCompanyName);
@@ -1115,10 +1059,7 @@ function toLinkedInUrlLookupContacts(rows, cleanedCompanyMap = new Map()) {
1115
1059
  companyName: cleanedCompanyName,
1116
1060
  companyNameOriginal: rawCompanyName || undefined,
1117
1061
  email: syntheticEmail,
1118
- jobTitle: row.jobTitle,
1119
- companyId: normalizeLinkedInLookupField(row.companyId),
1120
- linkedinCompanyUrl: row.linkedinCompanyUrl?.trim() || undefined,
1121
- deepDiveRecommendedRole: row.deepDiveRecommendedRole?.trim() || undefined
1062
+ jobTitle: row.jobTitle
1122
1063
  }
1123
1064
  ];
1124
1065
  }
@@ -1133,10 +1074,7 @@ function toLinkedInUrlLookupContacts(rows, cleanedCompanyMap = new Map()) {
1133
1074
  companyName: cleanedCompanyName,
1134
1075
  companyNameOriginal: rawCompanyName || undefined,
1135
1076
  email: syntheticEmail,
1136
- jobTitle: row.jobTitle,
1137
- companyId: normalizeLinkedInLookupField(row.companyId),
1138
- linkedinCompanyUrl: row.linkedinCompanyUrl?.trim() || undefined,
1139
- deepDiveRecommendedRole: row.deepDiveRecommendedRole?.trim() || undefined
1077
+ jobTitle: row.jobTitle
1140
1078
  }
1141
1079
  ];
1142
1080
  const rawDiffers = rawSplit.firstName !== cleanedSplit.firstName ||
@@ -1150,9 +1088,6 @@ function toLinkedInUrlLookupContacts(rows, cleanedCompanyMap = new Map()) {
1150
1088
  companyNameOriginal: rawCompanyName || undefined,
1151
1089
  email: syntheticEmail,
1152
1090
  jobTitle: row.jobTitle,
1153
- companyId: normalizeLinkedInLookupField(row.companyId),
1154
- linkedinCompanyUrl: row.linkedinCompanyUrl?.trim() || undefined,
1155
- deepDiveRecommendedRole: row.deepDiveRecommendedRole?.trim() || undefined,
1156
1091
  isVariation: true
1157
1092
  });
1158
1093
  }
@@ -1175,132 +1110,10 @@ function readPipedreamLinkedInEnrichmentConfig() {
1175
1110
  projectEnvironment: resolveConfiguredEnvValue(process.env, "PIPEDREAM_PROJECT_ENVIRONMENT") || ""
1176
1111
  };
1177
1112
  }
1178
- function isSyntheticLinkedInLookupEmail(value) {
1179
- const normalized = normalizeLookupWhitespace(value).toLowerCase();
1180
- return normalized.endsWith("@salesprompter.invalid");
1181
- }
1182
1113
  function deriveCsrfTokenFromCookie(cookie) {
1183
1114
  const match = cookie.match(/JSESSIONID="?([^";]+)"?/i);
1184
1115
  return match?.[1]?.trim() || "";
1185
1116
  }
1186
- function normalizeLinkedInDirectLookupCookieHeader(cookie) {
1187
- const trimmed = normalizeLookupWhitespace(cookie);
1188
- if (!trimmed) {
1189
- return "";
1190
- }
1191
- if (trimmed.includes("=") || trimmed.includes(";")) {
1192
- return trimmed;
1193
- }
1194
- return `li_at=${trimmed}`;
1195
- }
1196
- function parseLocalLinkedInExtensionTokenLog(content) {
1197
- const matches = [
1198
- ...content.matchAll(/\{"csrfToken":"([^"]+)","extractedFrom":"sales-api\/salesApiLeadSearch"[\s\S]*?"linkedInIdentity":"([^"]+)"[\s\S]*?"sessionCookie":"([\s\S]*?)","syncStatus":"(success|captured)"[\s\S]*?"userAgent":"([^"]+)"\}/g)
1199
- ];
1200
- const last = matches.at(-1);
1201
- if (!last) {
1202
- return null;
1203
- }
1204
- const csrfToken = normalizeLookupWhitespace(last[1]);
1205
- const linkedInIdentity = normalizeLookupWhitespace(last[2]);
1206
- const sessionCookie = normalizeLookupWhitespace(last[3]?.replace(/\\"/g, "\"").replace(/\\\\/g, "\\"));
1207
- const userAgent = normalizeLookupWhitespace(last[5]);
1208
- if (!csrfToken || !linkedInIdentity || !sessionCookie || !userAgent) {
1209
- return null;
1210
- }
1211
- return {
1212
- csrfToken,
1213
- linkedInIdentity,
1214
- sessionCookie,
1215
- userAgent
1216
- };
1217
- }
1218
- async function readLocalLinkedInExtensionTokenLog(filePath) {
1219
- try {
1220
- const content = await readFile(filePath, "latin1");
1221
- return parseLocalLinkedInExtensionTokenLog(content);
1222
- }
1223
- catch {
1224
- return null;
1225
- }
1226
- }
1227
- async function listChromeExtensionTokenLogCandidates() {
1228
- const overrideFile = normalizeLookupWhitespace(process.env.SALESPROMPTER_LINKEDIN_EXTENSION_TOKENS_LOG_PATH);
1229
- if (overrideFile) {
1230
- return [overrideFile];
1231
- }
1232
- const overrideDir = normalizeLookupWhitespace(process.env.SALESPROMPTER_LINKEDIN_EXTENSION_TOKENS_DIR);
1233
- if (overrideDir) {
1234
- try {
1235
- const files = await readdir(overrideDir);
1236
- return files
1237
- .filter((file) => file.endsWith(".log") || file.endsWith(".ldb"))
1238
- .map((file) => path.join(overrideDir, file))
1239
- .sort()
1240
- .reverse();
1241
- }
1242
- catch {
1243
- return [];
1244
- }
1245
- }
1246
- const chromeRootCandidates = [
1247
- path.join(os.homedir(), "Library", "Application Support", "Google", "Chrome"),
1248
- path.join(os.homedir(), "Library", "Application Support", "Chromium")
1249
- ];
1250
- const paths = [];
1251
- for (const chromeRoot of chromeRootCandidates) {
1252
- let profileDirs = [];
1253
- try {
1254
- profileDirs = await readdir(chromeRoot);
1255
- }
1256
- catch {
1257
- continue;
1258
- }
1259
- for (const profileDir of profileDirs) {
1260
- const extensionSettingsRoot = path.join(chromeRoot, profileDir, "Local Extension Settings");
1261
- let extensionIds = [];
1262
- try {
1263
- extensionIds = await readdir(extensionSettingsRoot);
1264
- }
1265
- catch {
1266
- continue;
1267
- }
1268
- for (const extensionId of extensionIds) {
1269
- const extensionDir = path.join(extensionSettingsRoot, extensionId);
1270
- let files = [];
1271
- try {
1272
- files = await readdir(extensionDir);
1273
- }
1274
- catch {
1275
- continue;
1276
- }
1277
- for (const file of files) {
1278
- if (!file.endsWith(".log")) {
1279
- continue;
1280
- }
1281
- paths.push(path.join(extensionDir, file));
1282
- }
1283
- }
1284
- }
1285
- }
1286
- return paths.sort().reverse();
1287
- }
1288
- async function readLocalLinkedInExtensionDirectLookupConfig() {
1289
- const candidates = await listChromeExtensionTokenLogCandidates();
1290
- for (const candidate of candidates) {
1291
- const snapshot = await readLocalLinkedInExtensionTokenLog(candidate);
1292
- if (!snapshot) {
1293
- continue;
1294
- }
1295
- return {
1296
- csrfToken: snapshot.csrfToken,
1297
- identity: snapshot.linkedInIdentity,
1298
- cookie: normalizeLinkedInDirectLookupCookieHeader(snapshot.sessionCookie),
1299
- userAgent: snapshot.userAgent
1300
- };
1301
- }
1302
- return null;
1303
- }
1304
1117
  function readLinkedInDirectLookupEnvConfig() {
1305
1118
  const cookie = process.env.SALESPROMPTER_LINKEDIN_SALES_NAV_COOKIE?.trim() ||
1306
1119
  process.env.LINKEDIN_SALES_NAV_COOKIE?.trim() ||
@@ -1317,7 +1130,7 @@ function readLinkedInDirectLookupEnvConfig() {
1317
1130
  return {
1318
1131
  csrfToken,
1319
1132
  identity,
1320
- cookie: normalizeLinkedInDirectLookupCookieHeader(cookie),
1133
+ cookie,
1321
1134
  userAgent: process.env.SALESPROMPTER_LINKEDIN_USER_AGENT?.trim() ||
1322
1135
  "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"
1323
1136
  };
@@ -1367,7 +1180,7 @@ async function readStoredLinkedInDirectLookupConfig() {
1367
1180
  return {
1368
1181
  csrfToken,
1369
1182
  identity,
1370
- cookie: normalizeLinkedInDirectLookupCookieHeader(claimed.sessionCookie),
1183
+ cookie: claimed.sessionCookie,
1371
1184
  userAgent
1372
1185
  };
1373
1186
  }
@@ -1381,11 +1194,6 @@ async function readLinkedInDirectLookupConfig() {
1381
1194
  cachedLinkedInDirectLookupConfig = envConfig;
1382
1195
  return envConfig;
1383
1196
  }
1384
- const localExtensionConfig = await readLocalLinkedInExtensionDirectLookupConfig();
1385
- if (localExtensionConfig) {
1386
- cachedLinkedInDirectLookupConfig = localExtensionConfig;
1387
- return localExtensionConfig;
1388
- }
1389
1197
  const storedConfig = await readStoredLinkedInDirectLookupConfig();
1390
1198
  if (storedConfig) {
1391
1199
  cachedLinkedInDirectLookupConfig = storedConfig;
@@ -1402,200 +1210,46 @@ function buildLinkedInSalesApiUrl(params) {
1402
1210
  const encodedFirstName = encodeURIComponent(params.firstName);
1403
1211
  const encodedLastName = encodeURIComponent(params.lastName);
1404
1212
  const encodedCompanyName = encodeURIComponent(params.companyName);
1405
- const encodedKeywords = encodeURIComponent(params.keywordsText?.trim() || params.companyName);
1406
1213
  const filters = params.searchMode === "current_company"
1407
1214
  ? `(type:FIRST_NAME,values:List((text:${encodedFirstName},selectionType:INCLUDED))),(type:LAST_NAME,values:List((text:${encodedLastName},selectionType:INCLUDED))),(type:CURRENT_COMPANY,values:List((text:${encodedCompanyName},selectionType:INCLUDED)))`
1408
1215
  : `(type:FIRST_NAME,values:List((text:${encodedFirstName},selectionType:INCLUDED))),(type:LAST_NAME,values:List((text:${encodedLastName},selectionType:INCLUDED)))`;
1409
- const keywordsSegment = params.searchMode === "current_company" ? "" : `,keywords:${encodedKeywords}`;
1216
+ const keywordsSegment = params.searchMode === "keywords" ? `,keywords:${encodedCompanyName}` : "";
1410
1217
  return `${baseUrl.replace(/\/+$/, "")}/sales-api/salesApiLeadSearch?q=searchQuery&query=(recentSearchParam:(id:${Date.now()},doLogHistory:true),filters:List(${filters})${keywordsSegment})&start=0&count=25&trackingParam=(sessionId:${generateLinkedInSessionId()})&decorationId=com.linkedin.sales.deco.desktop.searchv2.LeadSearchResult-14`;
1411
1218
  }
1412
- function extractLookupTitleKeywords(value) {
1413
- const shortAllowlist = new Set(["hr", "it", "cfo"]);
1414
- return normalizeLooseMatchText(value)
1415
- .split(/\s+/)
1416
- .filter((token) => token.length >= 4 || shortAllowlist.has(token))
1417
- .filter((token) => ![
1418
- "head",
1419
- "senior",
1420
- "consultant",
1421
- "manager",
1422
- "specialist",
1423
- "lead",
1424
- "global",
1425
- "team",
1426
- "group"
1427
- ].includes(token))
1428
- .slice(0, 4);
1429
- }
1430
- function buildDeepDiveRoleSearchKeywords(role) {
1431
- const normalized = normalizeLooseMatchText(role);
1432
- switch (normalized) {
1433
- case "budgetholder":
1434
- return ["finance", "procurement", "purchasing", "accounting", "controlling", "cfo"];
1435
- case "decisionmaker":
1436
- return ["director", "head", "vp", "chief", "leiter", "lead"];
1437
- case "champion":
1438
- return ["hr", "workplace", "operations", "it", "people", "office"];
1439
- case "executivesponsor":
1440
- return ["executive", "board", "chief", "managing", "director", "ceo"];
1441
- case "influencer":
1442
- return ["specialist", "manager", "consultant", "project", "workplace", "hr"];
1443
- case "legalandcompliance":
1444
- return ["legal", "compliance", "datenschutz", "counsel"];
1445
- case "blocker":
1446
- return ["procurement", "legal", "compliance", "security"];
1447
- case "enduser":
1448
- return ["workplace", "office", "operations", "assistant", "admin"];
1449
- default:
1450
- return [];
1451
- }
1452
- }
1453
1219
  function buildLinkedInAccountSearchApiUrl(companyName) {
1454
1220
  const baseUrl = process.env.SALESPROMPTER_LINKEDIN_SALES_API_BASE_URL?.trim() ||
1455
1221
  "https://www.linkedin.com";
1456
1222
  const encodedCompanyName = encodeURIComponent(companyName);
1457
1223
  return `${baseUrl.replace(/\/+$/, "")}/sales-api/salesApiAccountSearch?q=searchQuery&query=(recentSearchParam:(id:${Date.now()},doLogHistory:true),spellCorrectionEnabled:true,keywords:${encodedCompanyName})&start=0&count=10&trackingParam=(sessionId:${generateLinkedInSessionId()})&decorationId=com.linkedin.sales.deco.desktop.searchv2.AccountSearchResult-14`;
1458
1224
  }
1459
- async function buildLinkedInLookupSearchVariants(contact, timeoutMs, resolvedCompanyAliases = []) {
1225
+ function buildLinkedInLookupSearchVariants(contact) {
1460
1226
  const variants = [];
1461
1227
  const seen = new Set();
1462
- const companyCandidateScores = new Map();
1463
- const addCompanyCandidate = (value, score) => {
1464
- const normalized = normalizeLookupWhitespace(value);
1465
- if (!normalized) {
1466
- return;
1467
- }
1468
- companyCandidateScores.set(normalized, Math.max(score, companyCandidateScores.get(normalized) ?? 0));
1469
- };
1470
- addCompanyCandidate(contact.companyName, 80);
1471
- addCompanyCandidate(contact.companyNameOriginal, 70);
1472
- const linkedInHandle = normalizeLinkedInCompanyHandle(contact.linkedinCompanyUrl ?? "");
1473
- if (linkedInHandle && !/^\d+$/.test(linkedInHandle)) {
1474
- addCompanyCandidate(linkedInHandle.replace(/[-_]+/g, " "), 95);
1475
- }
1476
- for (const alias of resolvedCompanyAliases) {
1477
- addCompanyCandidate(alias, 110);
1478
- }
1479
- const emailDomain = (() => {
1480
- const email = normalizeLookupWhitespace(contact.email);
1481
- if (!email || isSyntheticLinkedInLookupEmail(email)) {
1482
- return "";
1483
- }
1484
- const at = email.lastIndexOf("@");
1485
- return at >= 0 ? email.slice(at + 1) : "";
1486
- })();
1487
- if (emailDomain) {
1488
- const host = emailDomain.replace(/^www\./i, "").split(".")[0] ?? "";
1489
- if (host) {
1490
- addCompanyCandidate(host.replace(/[-_]+/g, " "), 100);
1491
- }
1492
- }
1493
- if (contact.jobTitle && contact.deepDiveRecommendedRole) {
1494
- const primaryWord = normalizeLookupWhitespace(contact.companyNameOriginal ?? contact.companyName)
1495
- .split(/\s+/)
1496
- .filter((part) => part.length >= 4)
1497
- .slice(-1)[0];
1498
- if (primaryWord) {
1499
- addCompanyCandidate(primaryWord, 45);
1500
- }
1501
- }
1502
- const companyHints = await buildLinkedInProfileCompanyHints(contact, timeoutMs);
1503
- for (const phrase of companyHints.phrases) {
1504
- const tokenCount = normalizeLooseMatchText(phrase).split(/\s+/).filter(Boolean).length;
1505
- if (tokenCount >= 1 && tokenCount <= 4) {
1506
- addCompanyCandidate(phrase, tokenCount <= 2 ? 75 : 60);
1507
- }
1508
- }
1509
- for (const keyword of companyHints.keywords.slice(0, 5)) {
1510
- addCompanyCandidate(keyword, keyword.includes(".") ? 90 : 55);
1511
- }
1512
- const titleKeywords = Array.from(new Set([
1513
- ...extractLookupTitleKeywords(contact.jobTitle),
1514
- ...buildDeepDiveRoleSearchKeywords(contact.deepDiveRecommendedRole)
1515
- ])).slice(0, 6);
1516
- const rankedCompanyCandidates = Array.from(companyCandidateScores.entries())
1517
- .sort((left, right) => right[1] - left[1] || left[0].length - right[0].length)
1518
- .slice(0, 6);
1519
- const emailHostCandidate = (() => {
1520
- if (!emailDomain) {
1521
- return "";
1522
- }
1523
- return normalizeLookupWhitespace(emailDomain.replace(/^www\./i, "").split(".")[0] ?? "").replace(/[-_]+/g, " ");
1524
- })();
1525
- const cleanCompanyCandidate = normalizeLookupWhitespace(contact.companyName) ||
1526
- normalizeLookupWhitespace(contact.companyNameOriginal) ||
1527
- "";
1528
- const linkedInHandleCandidate = linkedInHandle && !/^\d+$/.test(linkedInHandle)
1529
- ? normalizeLookupWhitespace(linkedInHandle.replace(/[-_]+/g, " "))
1530
- : "";
1531
- const pushVariant = (companyName, searchMode) => {
1532
- const normalizedCompany = normalizeLookupWhitespace(companyName);
1533
- if (!normalizedCompany) {
1534
- return;
1535
- }
1536
- const keywordsText = searchMode === "keywords_title" && titleKeywords.length > 0
1537
- ? `${normalizedCompany} ${titleKeywords.join(" ")}`
1538
- : undefined;
1539
- if (searchMode === "keywords_title" && !keywordsText) {
1540
- return;
1541
- }
1542
- const key = [
1543
- contact.firstName.trim().toLowerCase(),
1544
- contact.lastName.trim().toLowerCase(),
1545
- normalizedCompany.toLowerCase(),
1546
- searchMode,
1547
- keywordsText?.toLowerCase() ?? ""
1548
- ].join("|");
1549
- if (seen.has(key)) {
1550
- return;
1228
+ const companyCandidates = [
1229
+ normalizeLookupWhitespace(contact.companyName),
1230
+ normalizeLookupWhitespace(contact.companyNameOriginal)
1231
+ ].filter(Boolean);
1232
+ for (const companyName of companyCandidates) {
1233
+ for (const searchMode of ["current_company", "keywords"]) {
1234
+ const key = [
1235
+ contact.firstName.trim().toLowerCase(),
1236
+ contact.lastName.trim().toLowerCase(),
1237
+ companyName.toLowerCase(),
1238
+ searchMode
1239
+ ].join("|");
1240
+ if (seen.has(key)) {
1241
+ continue;
1242
+ }
1243
+ seen.add(key);
1244
+ variants.push({
1245
+ firstName: contact.firstName,
1246
+ lastName: contact.lastName,
1247
+ companyName,
1248
+ searchMode
1249
+ });
1551
1250
  }
1552
- seen.add(key);
1553
- variants.push({
1554
- firstName: contact.firstName,
1555
- lastName: contact.lastName,
1556
- companyName: normalizedCompany,
1557
- searchMode,
1558
- keywordsText
1559
- });
1560
- };
1561
- const rankedCompanyNames = rankedCompanyCandidates.map(([companyName]) => companyName);
1562
- const currentCompanyStageCandidates = [
1563
- emailHostCandidate,
1564
- linkedInHandleCandidate,
1565
- ...resolvedCompanyAliases,
1566
- ...rankedCompanyNames.filter((companyName) => (companyCandidateScores.get(companyName) ?? 0) >= 90)
1567
- ];
1568
- const keywordStageCandidates = [
1569
- cleanCompanyCandidate,
1570
- ...rankedCompanyNames
1571
- ];
1572
- const keywordTitleStageCandidates = [
1573
- cleanCompanyCandidate,
1574
- ...rankedCompanyNames
1575
- ];
1576
- const fallbackCurrentCompanyCandidates = [
1577
- cleanCompanyCandidate,
1578
- normalizeLookupWhitespace(contact.companyNameOriginal),
1579
- ...rankedCompanyNames
1580
- ];
1581
- for (const companyName of currentCompanyStageCandidates) {
1582
- pushVariant(companyName, "current_company");
1583
- }
1584
- for (const companyName of keywordStageCandidates) {
1585
- pushVariant(companyName, "keywords");
1586
- }
1587
- for (const companyName of keywordTitleStageCandidates) {
1588
- pushVariant(companyName, "keywords_title");
1589
- }
1590
- for (const companyName of fallbackCurrentCompanyCandidates) {
1591
- pushVariant(companyName, "current_company");
1592
- }
1593
- for (const [companyName] of rankedCompanyCandidates) {
1594
- pushVariant(companyName, "current_company");
1595
- pushVariant(companyName, "keywords");
1596
- pushVariant(companyName, "keywords_title");
1597
1251
  }
1598
- return variants.slice(0, 12);
1252
+ return variants;
1599
1253
  }
1600
1254
  function normalizeSalesNavLeadUrl(value) {
1601
1255
  const trimmed = String(value ?? "").trim();
@@ -1617,21 +1271,14 @@ function normalizePublicLinkedInProfileUrl(value) {
1617
1271
  if (!trimmed) {
1618
1272
  return null;
1619
1273
  }
1620
- let parsed;
1621
- try {
1622
- parsed = new URL(trimmed);
1623
- }
1624
- catch {
1625
- return null;
1626
- }
1627
- if (!/(^|\.)linkedin\.com$/i.test(parsed.hostname)) {
1274
+ const publicMatch = trimmed.match(/https:\/\/www\.linkedin\.com\/in\/[^/?#]+\/?/i);
1275
+ if (!publicMatch) {
1628
1276
  return null;
1629
1277
  }
1630
- const pathMatch = parsed.pathname.match(/^\/in\/([^/?#]+)\/?/i);
1631
- if (!pathMatch?.[1]) {
1278
+ const candidate = publicMatch[0] ?? null;
1279
+ if (!candidate) {
1632
1280
  return null;
1633
1281
  }
1634
- const candidate = `https://www.linkedin.com/in/${pathMatch[1]}`;
1635
1282
  return normalizeSalesNavLeadUrl(candidate) ? null : candidate;
1636
1283
  }
1637
1284
  function extractLinkedInProfileUrlFromSalesApiElement(element) {
@@ -1774,112 +1421,6 @@ function extractLinkedInCompanyNameFromSalesApiElement(element) {
1774
1421
  }
1775
1422
  return null;
1776
1423
  }
1777
- function extractLinkedInFullNameFromSalesApiElement(element) {
1778
- if (!element) {
1779
- return null;
1780
- }
1781
- const directCandidates = [
1782
- typeof element.fullName === "string" ? element.fullName : null,
1783
- typeof element.name === "string" ? element.name : null
1784
- ].filter(Boolean);
1785
- for (const candidate of directCandidates) {
1786
- const normalized = normalizeLookupWhitespace(candidate);
1787
- if (normalized) {
1788
- return normalized;
1789
- }
1790
- }
1791
- const firstName = typeof element.firstName === "string" ? normalizeLookupWhitespace(element.firstName) : "";
1792
- const lastName = typeof element.lastName === "string" ? normalizeLookupWhitespace(element.lastName) : "";
1793
- const combined = normalizeLookupWhitespace(`${firstName} ${lastName}`);
1794
- return combined || null;
1795
- }
1796
- function extractLinkedInTitleFromSalesApiElement(element) {
1797
- if (!element) {
1798
- return null;
1799
- }
1800
- const directCandidates = [
1801
- typeof element.title === "string" ? element.title : null,
1802
- typeof element.occupation === "string" ? element.occupation : null
1803
- ].filter(Boolean);
1804
- for (const candidate of directCandidates) {
1805
- const normalized = normalizeLookupWhitespace(candidate);
1806
- if (normalized) {
1807
- return normalized;
1808
- }
1809
- }
1810
- const currentPosition = Array.isArray(element.currentPositions) && element.currentPositions.length > 0
1811
- ? element.currentPositions[0]
1812
- : null;
1813
- const currentTitle = currentPosition && typeof currentPosition.title === "string"
1814
- ? normalizeLookupWhitespace(currentPosition.title)
1815
- : "";
1816
- return currentTitle || null;
1817
- }
1818
- function scoreLinkedInSalesApiElementMatch(contact, element) {
1819
- const fullName = extractLinkedInFullNameFromSalesApiElement(element);
1820
- const companyName = extractLinkedInCompanyNameFromSalesApiElement(Array.isArray(element?.currentPositions) && element.currentPositions.length > 0
1821
- ? element.currentPositions[0]
1822
- : element) ?? extractLinkedInCompanyNameFromSalesApiElement(element);
1823
- const title = extractLinkedInTitleFromSalesApiElement(element);
1824
- const expectedFullName = normalizeLooseMatchText(`${contact.firstName} ${contact.lastName}`);
1825
- const candidateFullName = normalizeLooseMatchText(fullName);
1826
- const expectedCompanies = Array.from(new Set([
1827
- normalizeLooseMatchText(contact.companyNameOriginal ?? contact.companyName),
1828
- normalizeLooseMatchText(aggressivelyCleanLookupCompanyName(contact.companyNameOriginal ?? contact.companyName)),
1829
- normalizeLooseMatchText(normalizeLinkedInCompanyHandle(contact.linkedinCompanyUrl ?? "")?.replace(/[-_]+/g, " ") ?? ""),
1830
- normalizeLooseMatchText((() => {
1831
- const email = normalizeLookupWhitespace(contact.email);
1832
- if (!email || isSyntheticLinkedInLookupEmail(email)) {
1833
- return "";
1834
- }
1835
- return email.split("@")[1]?.replace(/^www\./i, "").split(".")[0] ?? "";
1836
- })())
1837
- ].filter(Boolean)));
1838
- const candidateCompany = normalizeLooseMatchText(companyName);
1839
- const candidateTitle = normalizeLooseMatchText(title);
1840
- let score = 0;
1841
- let exactNameMatch = false;
1842
- let companyMatchCount = 0;
1843
- if (expectedFullName && candidateFullName === expectedFullName) {
1844
- score += 120;
1845
- exactNameMatch = true;
1846
- }
1847
- else if (expectedFullName &&
1848
- candidateFullName.includes(normalizeLooseMatchText(contact.firstName)) &&
1849
- candidateFullName.includes(normalizeLooseMatchText(contact.lastName))) {
1850
- score += 90;
1851
- }
1852
- for (const companyHint of expectedCompanies) {
1853
- if (!companyHint) {
1854
- continue;
1855
- }
1856
- if (candidateCompany === companyHint) {
1857
- score += 40;
1858
- companyMatchCount += 1;
1859
- }
1860
- else if (candidateCompany.includes(companyHint) || companyHint.includes(candidateCompany)) {
1861
- score += 25;
1862
- companyMatchCount += 1;
1863
- }
1864
- }
1865
- const titleHints = [
1866
- ...extractLookupTitleKeywords(contact.jobTitle),
1867
- ...buildDeepDiveRoleSearchKeywords(contact.deepDiveRecommendedRole)
1868
- ].slice(0, 6);
1869
- for (const hint of titleHints) {
1870
- if (hint && candidateTitle.includes(normalizeLooseMatchText(hint))) {
1871
- score += 6;
1872
- }
1873
- }
1874
- return {
1875
- score,
1876
- fullName,
1877
- companyName,
1878
- title,
1879
- exactNameMatch,
1880
- companyMatchCount
1881
- };
1882
- }
1883
1424
  function extractLinkedInCompanyEmployeeCountFromSalesApiElement(element) {
1884
1425
  if (!element) {
1885
1426
  return null;
@@ -1928,111 +1469,6 @@ function buildLinkedInCompanyLookupVariants(params) {
1928
1469
  }
1929
1470
  return variants;
1930
1471
  }
1931
- function buildDirectCompanyContextKey(contact) {
1932
- return normalizeLooseMatchText(contact.companyNameOriginal ?? contact.companyName);
1933
- }
1934
- async function resolveDirectLinkedInCompanyContexts(params) {
1935
- const perCompanyBudgetMs = Math.min(params.timeoutMs, 10_000);
1936
- const primaryByCompany = new Map();
1937
- for (const contact of params.contacts) {
1938
- const key = buildDirectCompanyContextKey(contact);
1939
- if (!key || primaryByCompany.has(key)) {
1940
- continue;
1941
- }
1942
- primaryByCompany.set(key, contact);
1943
- }
1944
- const contexts = new Map();
1945
- for (const [companyKey, contact] of primaryByCompany.entries()) {
1946
- const aliases = new Set();
1947
- const addAlias = (value) => {
1948
- const normalized = normalizeLookupWhitespace(value);
1949
- if (!normalized) {
1950
- return;
1951
- }
1952
- aliases.add(normalized);
1953
- };
1954
- addAlias(contact.companyNameOriginal);
1955
- addAlias(contact.companyName);
1956
- const existingHandle = normalizeLinkedInCompanyHandle(contact.linkedinCompanyUrl ?? "");
1957
- if (existingHandle && !/^\d+$/.test(existingHandle)) {
1958
- addAlias(existingHandle.replace(/[-_]+/g, " "));
1959
- }
1960
- let matchedCompanyUrl = contact.linkedinCompanyUrl ?? null;
1961
- let matchedSalesNavCompanyUrl = null;
1962
- let matchedCompanyName = null;
1963
- let matchedCompanyEmployeeCount = null;
1964
- const companyDeadline = Date.now() + perCompanyBudgetMs;
1965
- const variants = buildLinkedInCompanyLookupVariants({
1966
- contactId: contact.contact_id,
1967
- companyName: contact.companyName,
1968
- companyNameOriginal: contact.companyNameOriginal
1969
- }).slice(0, 4);
1970
- for (const variant of variants) {
1971
- if (Date.now() >= companyDeadline) {
1972
- break;
1973
- }
1974
- const controller = new AbortController();
1975
- const timeout = setTimeout(controller.abort.bind(controller), Math.min(6_000, Math.max(1_000, companyDeadline - Date.now())));
1976
- try {
1977
- const response = await fetch(buildLinkedInAccountSearchApiUrl(variant.companyName), {
1978
- method: "GET",
1979
- signal: controller.signal,
1980
- headers: {
1981
- accept: "*/*",
1982
- "accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
1983
- "csrf-token": params.config.csrfToken,
1984
- referer: "https://www.linkedin.com/sales/search/company",
1985
- "sec-fetch-dest": "empty",
1986
- "sec-fetch-mode": "cors",
1987
- "sec-fetch-site": "same-origin",
1988
- "user-agent": params.config.userAgent,
1989
- "x-li-identity": params.config.identity,
1990
- "x-li-lang": "en_US",
1991
- "x-li-page-instance": "urn:li:page:d_sales2_search_accounts;13Jvve6kRGCao+iP0wwAag==",
1992
- "x-restli-protocol-version": "2.0.0",
1993
- cookie: params.config.cookie
1994
- }
1995
- });
1996
- if (!response.ok) {
1997
- if (response.status === 429) {
1998
- break;
1999
- }
2000
- continue;
2001
- }
2002
- const data = (await response.json());
2003
- const first = data.elements?.[0];
2004
- const companyUrl = extractLinkedInCompanyUrlFromSalesApiElement(first);
2005
- const salesNavCompanyUrl = extractLinkedInSalesNavCompanyUrlFromSalesApiElement(first);
2006
- const companyName = extractLinkedInCompanyNameFromSalesApiElement(first);
2007
- if (companyUrl || salesNavCompanyUrl || companyName) {
2008
- matchedCompanyUrl = companyUrl ?? matchedCompanyUrl;
2009
- matchedSalesNavCompanyUrl = salesNavCompanyUrl ?? matchedSalesNavCompanyUrl;
2010
- matchedCompanyName = companyName ?? matchedCompanyName;
2011
- matchedCompanyEmployeeCount = extractLinkedInCompanyEmployeeCountFromSalesApiElement(first);
2012
- addAlias(companyName);
2013
- addAlias(companyUrl ? normalizeLinkedInCompanyHandle(companyUrl)?.replace(/[-_]+/g, " ") : null);
2014
- addAlias(salesNavCompanyUrl ? normalizeLookupWhitespace(salesNavCompanyUrl.split("/sales/company/")[1]?.split(/[/?#]/)[0] ?? "") : null);
2015
- break;
2016
- }
2017
- }
2018
- catch {
2019
- // Try next company variant.
2020
- }
2021
- finally {
2022
- clearTimeout(timeout);
2023
- }
2024
- }
2025
- contexts.set(companyKey, {
2026
- normalizedCompanyKey: companyKey,
2027
- aliases: Array.from(aliases),
2028
- linkedinCompanyUrl: matchedCompanyUrl,
2029
- salesNavCompanyUrl: matchedSalesNavCompanyUrl,
2030
- matchedCompanyName,
2031
- matchedCompanyEmployeeCount
2032
- });
2033
- }
2034
- return contexts;
2035
- }
2036
1472
  function buildPublicLinkedInCompanySearchUrl(companyName) {
2037
1473
  const baseUrl = process.env.SALESPROMPTER_LINKEDIN_COMPANY_SEARCH_BASE_URL?.trim() ||
2038
1474
  "https://duckduckgo.com/html/";
@@ -2096,8 +1532,7 @@ function extractSerperLinkedInCompanyCandidates(payload) {
2096
1532
  const organic = "organic" in payload && Array.isArray(payload.organic)
2097
1533
  ? (payload.organic ?? [])
2098
1534
  : [];
2099
- const seen = new Set();
2100
- const candidates = [];
1535
+ const candidates = new Set();
2101
1536
  for (const result of organic) {
2102
1537
  if (!result || typeof result !== "object") {
2103
1538
  continue;
@@ -2107,685 +1542,60 @@ function extractSerperLinkedInCompanyCandidates(payload) {
2107
1542
  : "";
2108
1543
  const handle = normalizeLinkedInCompanyHandle(link);
2109
1544
  if (handle) {
2110
- const url = normalizeLinkedInCompanyPage(handle);
2111
- if (!seen.has(url)) {
2112
- seen.add(url);
2113
- candidates.push({
2114
- url,
2115
- title: "title" in result && typeof result.title === "string"
2116
- ? normalizeLookupWhitespace(result.title)
2117
- : "",
2118
- snippet: "snippet" in result && typeof result.snippet === "string"
2119
- ? normalizeLookupWhitespace(result.snippet)
2120
- : ""
2121
- });
2122
- }
1545
+ candidates.add(normalizeLinkedInCompanyPage(handle));
2123
1546
  }
2124
1547
  }
2125
- return candidates;
1548
+ return Array.from(candidates);
2126
1549
  }
2127
- const linkedInCompanyHintCache = new Map();
2128
- const linkedInProfilePageSignalCache = new Map();
2129
- const linkedInCompanyPageSignalCache = new Map();
2130
- const serperSearchCache = new Map();
2131
- let serperCreditsExhausted = false;
2132
- function extractKeywordPhrases(value) {
2133
- const normalized = normalizeLookupWhitespace(value);
2134
- if (!normalized) {
2135
- return [];
2136
- }
2137
- const phrases = new Set();
2138
- const push = (candidate) => {
2139
- const cleaned = normalizeLookupWhitespace(candidate);
2140
- if (!cleaned || cleaned.length < 3) {
2141
- return;
1550
+ function decodeHtmlEntities(value) {
1551
+ return value
1552
+ .replace(/&amp;/gi, "&")
1553
+ .replace(/&quot;/gi, '"')
1554
+ .replace(/&#39;/gi, "'")
1555
+ .replace(/&lt;/gi, "<")
1556
+ .replace(/&gt;/gi, ">");
1557
+ }
1558
+ async function fetchLinkedInCompanyPageSignals(url, timeoutMs) {
1559
+ const controller = new AbortController();
1560
+ const timeout = setTimeout(() => controller.abort(), Math.min(timeoutMs, 12_000));
1561
+ try {
1562
+ const response = await fetch(url, {
1563
+ method: "GET",
1564
+ signal: controller.signal,
1565
+ headers: {
1566
+ "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"
1567
+ }
1568
+ });
1569
+ const html = await response.text();
1570
+ const finalUrl = response.url || url;
1571
+ const title = decodeHtmlEntities(html.match(/<title[^>]*>([^<]+)/i)?.[1] || "") ||
1572
+ decodeHtmlEntities(html.match(/<meta[^>]+property="og:title"[^>]+content="([^"]+)/i)?.[1] || "");
1573
+ const description = decodeHtmlEntities(html.match(/<meta[^>]+name="description"[^>]+content="([^"]+)/i)?.[1] || "");
1574
+ const bodyText = decodeHtmlEntities(html.replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ").replace(/<[^>]+>/g, " "));
1575
+ const unavailableText = normalizeLooseMatchText(`${title} ${description} ${bodyText}`);
1576
+ const unavailable = response.status >= 400 ||
1577
+ unavailableText.includes("page not found") ||
1578
+ unavailableText.includes("this page does not exist") ||
1579
+ unavailableText.includes("page isnt available");
1580
+ const handle = normalizeLinkedInCompanyHandle(finalUrl) ?? normalizeLinkedInCompanyHandle(url);
1581
+ if (!handle) {
1582
+ return null;
2142
1583
  }
2143
- phrases.add(cleaned);
2144
- };
2145
- push(normalized);
2146
- push(normalizeLookupCompanyForSearch(normalized));
2147
- push(aggressivelyCleanLookupCompanyName(normalized));
2148
- const titleStripped = normalized
2149
- .replace(/\|\s*linkedin$/i, "")
2150
- .replace(/\|\s*overview$/i, "")
2151
- .replace(/\b(linkedin|home|about|posts|see all details)\b/gi, " ")
2152
- .replace(/\s+/g, " ")
2153
- .trim();
2154
- push(titleStripped);
2155
- const parts = titleStripped
2156
- .split(/[|,·•:()/-]+/)
2157
- .map((part) => normalizeLookupWhitespace(part))
2158
- .filter(Boolean);
2159
- for (const part of parts) {
2160
- push(part);
2161
- }
2162
- const looseTokens = normalizeLooseMatchText(titleStripped)
2163
- .split(/\s+/)
2164
- .filter((token) => token.length >= 4)
2165
- .filter((token) => ![
2166
- "group",
2167
- "holding",
2168
- "services",
2169
- "service",
2170
- "consulting",
2171
- "gmbh",
2172
- "publishing",
2173
- "company",
2174
- "linkedin",
2175
- "deutschland"
2176
- ].includes(token));
2177
- if (looseTokens.length > 0) {
2178
- push(looseTokens[0]);
2179
- push(looseTokens.slice(0, 2).join(" "));
2180
- push(looseTokens.slice(-2).join(" "));
2181
- }
2182
- return Array.from(phrases);
2183
- }
2184
- async function buildLinkedInProfileCompanyHints(contact, timeoutMs) {
2185
- const phrases = new Set();
2186
- const keywords = new Set();
2187
- const addPhrase = (value) => {
2188
- for (const phrase of extractKeywordPhrases(value)) {
2189
- phrases.add(phrase);
2190
- const looseTokens = normalizeLooseMatchText(phrase)
2191
- .split(/\s+/)
2192
- .filter((token) => token.length >= 4)
2193
- .filter((token) => ![
2194
- "group",
2195
- "holding",
2196
- "services",
2197
- "service",
2198
- "consulting",
2199
- "gmbh",
2200
- "publishing",
2201
- "company",
2202
- "linkedin",
2203
- "deutschland"
2204
- ].includes(token));
2205
- for (const token of looseTokens.slice(0, 5)) {
2206
- keywords.add(token);
2207
- }
2208
- if (looseTokens.length > 1) {
2209
- keywords.add(looseTokens.slice(0, 2).join(" "));
2210
- keywords.add(looseTokens.slice(-2).join(" "));
2211
- }
2212
- }
2213
- };
2214
- addPhrase(contact.companyNameOriginal ?? contact.companyName);
2215
- const linkedInHandle = normalizeLinkedInCompanyHandle(contact.linkedinCompanyUrl ?? "");
2216
- if (linkedInHandle && !/^\d+$/.test(linkedInHandle)) {
2217
- addPhrase(linkedInHandle.replace(/[-_]+/g, " "));
2218
- }
2219
- const normalizedEmail = normalizeLookupWhitespace(contact.email);
2220
- const emailDomain = normalizedEmail && !isSyntheticLinkedInLookupEmail(normalizedEmail)
2221
- ? normalizedEmail.split("@")[1] ?? ""
2222
- : "";
2223
- if (emailDomain) {
2224
- const normalizedDomain = emailDomain.replace(/^www\./i, "");
2225
- keywords.add(normalizedDomain);
2226
- const host = normalizedDomain.split(".")[0] ?? "";
2227
- if (host) {
2228
- addPhrase(host.replace(/[-_]+/g, " "));
2229
- }
2230
- }
2231
- const companyUrl = contact.linkedinCompanyUrl?.trim();
2232
- if (companyUrl) {
2233
- const cacheKey = companyUrl.replace(/\/$/, "");
2234
- let cachedHints = linkedInCompanyHintCache.get(cacheKey);
2235
- if (!cachedHints) {
2236
- const signals = await fetchLinkedInCompanyPageSignals(companyUrl, timeoutMs);
2237
- cachedHints = signals ? [...extractKeywordPhrases(signals.title), ...extractKeywordPhrases(signals.description)] : [];
2238
- linkedInCompanyHintCache.set(cacheKey, cachedHints);
2239
- }
2240
- for (const hint of cachedHints) {
2241
- addPhrase(hint);
2242
- }
2243
- }
2244
- return {
2245
- phrases: Array.from(phrases)
2246
- .map((value) => normalizeLookupWhitespace(value))
2247
- .filter((value) => value.length > 0),
2248
- keywords: Array.from(keywords)
2249
- .map((value) => normalizeLookupWhitespace(value))
2250
- .filter((value) => value.length > 0)
2251
- };
2252
- }
2253
- async function buildSerperLinkedInProfileQueries(contact, timeoutMs) {
2254
- const fullName = normalizeLookupWhitespace(`${contact.firstName} ${contact.lastName}`);
2255
- const title = normalizeLookupWhitespace(contact.jobTitle);
2256
- const queryEntries = [];
2257
- const seenQueries = new Set();
2258
- const pushQuery = (query, score) => {
2259
- const normalized = normalizeLookupWhitespace(query);
2260
- if (!normalized) {
2261
- return;
2262
- }
2263
- const key = normalized.toLowerCase();
2264
- if (seenQueries.has(key)) {
2265
- return;
2266
- }
2267
- seenQueries.add(key);
2268
- queryEntries.push({ query: normalized, score });
2269
- };
2270
- const { phrases, keywords } = await buildLinkedInProfileCompanyHints(contact, timeoutMs);
2271
- const enrichedPhrases = new Set(phrases);
2272
- const enrichedKeywords = new Set(keywords);
2273
- const preferredPhrases = [];
2274
- const normalizedEmail = normalizeLookupWhitespace(contact.email);
2275
- const trustedEmailDomain = normalizedEmail && !isSyntheticLinkedInLookupEmail(normalizedEmail)
2276
- ? normalizedEmail.split("@")[1]?.replace(/^www\./i, "") ?? ""
2277
- : "";
2278
- const emailHost = trustedEmailDomain.split(".")[0] ?? "";
2279
- const emailDomain = trustedEmailDomain;
2280
- const linkedInHandle = normalizeLinkedInCompanyHandle(contact.linkedinCompanyUrl ?? "")?.replace(/[-_]+/g, " ") ?? "";
2281
- if (contact.linkedinCompanyUrl?.trim()) {
2282
- const companySignals = await fetchLinkedInCompanyPageSignals(contact.linkedinCompanyUrl.trim(), timeoutMs);
2283
- for (const phrase of [
2284
- ...extractKeywordPhrases(companySignals?.title),
2285
- ...extractKeywordPhrases(companySignals?.description)
2286
- ]) {
2287
- enrichedPhrases.add(phrase);
2288
- preferredPhrases.push(phrase);
2289
- const looseTokens = normalizeLooseMatchText(phrase)
2290
- .split(/\s+/)
2291
- .filter((token) => token.length >= 4)
2292
- .filter((token) => ![
2293
- "group",
2294
- "holding",
2295
- "services",
2296
- "service",
2297
- "consulting",
2298
- "gmbh",
2299
- "publishing",
2300
- "company",
2301
- "linkedin",
2302
- "deutschland"
2303
- ].includes(token));
2304
- for (const token of looseTokens.slice(0, 4)) {
2305
- enrichedKeywords.add(token);
2306
- }
2307
- if (looseTokens.length > 1) {
2308
- enrichedKeywords.add(looseTokens.slice(0, 2).join(" "));
2309
- }
2310
- }
2311
- }
2312
- const phrasePriority = (value) => {
2313
- const loose = normalizeLooseMatchText(value);
2314
- const tokenCount = loose.split(/\s+/).filter(Boolean).length;
2315
- let score = 0;
2316
- if (emailHost && loose.includes(normalizeLooseMatchText(emailHost)))
2317
- score += 80;
2318
- if (linkedInHandle && loose.includes(normalizeLooseMatchText(linkedInHandle)))
2319
- score += 60;
2320
- if (tokenCount >= 1 && tokenCount <= 4)
2321
- score += 40;
2322
- if (!/\b(gmbh|holding|services|service|consulting|kg|co)\b/i.test(value))
2323
- score += 20;
2324
- if (tokenCount > 7)
2325
- score -= 40;
2326
- return score;
2327
- };
2328
- const keywordPriority = (value) => {
2329
- const loose = normalizeLooseMatchText(value);
2330
- let score = 0;
2331
- if (emailHost && loose.includes(normalizeLooseMatchText(emailHost)))
2332
- score += 80;
2333
- if (linkedInHandle && loose.includes(normalizeLooseMatchText(linkedInHandle)))
2334
- score += 60;
2335
- if (value.includes("."))
2336
- score += 20;
2337
- if (loose.split(/\s+/).filter(Boolean).length <= 2)
2338
- score += 10;
2339
- return score;
2340
- };
2341
- const rankedPhrases = [...enrichedPhrases].sort((left, right) => {
2342
- const preferredDelta = Number(preferredPhrases.includes(right)) - Number(preferredPhrases.includes(left));
2343
- if (preferredDelta !== 0) {
2344
- return preferredDelta;
2345
- }
2346
- return phrasePriority(right) - phrasePriority(left);
2347
- });
2348
- const cleanPhrases = rankedPhrases.slice(0, 6);
2349
- const fallbackKeywords = new Set(enrichedKeywords);
2350
- for (const phrase of cleanPhrases) {
2351
- const looseTokens = normalizeLooseMatchText(phrase)
2352
- .split(/\s+/)
2353
- .filter((token) => token.length >= 4)
2354
- .filter((token) => ![
2355
- "group",
2356
- "holding",
2357
- "services",
2358
- "service",
2359
- "consulting",
2360
- "gmbh",
2361
- "publishing",
2362
- "company",
2363
- "linkedin",
2364
- "deutschland"
2365
- ].includes(token));
2366
- for (const token of looseTokens.slice(0, 3)) {
2367
- fallbackKeywords.add(token);
2368
- }
2369
- if (looseTokens.length > 1) {
2370
- fallbackKeywords.add(looseTokens.slice(0, 2).join(" "));
2371
- }
2372
- }
2373
- if (emailHost) {
2374
- fallbackKeywords.add(emailHost);
2375
- }
2376
- if (emailDomain) {
2377
- fallbackKeywords.add(emailDomain);
2378
- }
2379
- if (linkedInHandle) {
2380
- fallbackKeywords.add(linkedInHandle);
2381
- }
2382
- const cleanKeywords = [...fallbackKeywords]
2383
- .sort((left, right) => keywordPriority(right) - keywordPriority(left))
2384
- .slice(0, 5);
2385
- cleanKeywords.forEach((keyword, index) => {
2386
- const keywordScore = 260 - index * 15;
2387
- pushQuery(`site:linkedin.com/in "${fullName}" ${keyword} linkedin`, keywordScore);
2388
- pushQuery(`site:linkedin.com/in ${fullName} ${keyword} linkedin`, keywordScore - 5);
2389
- if (title) {
2390
- pushQuery(`site:linkedin.com/in "${fullName}" ${keyword} "${title}"`, keywordScore - 10);
2391
- }
2392
- });
2393
- cleanPhrases.forEach((companyName, index) => {
2394
- const phraseScore = 180 - index * 10;
2395
- pushQuery(`site:linkedin.com/in "${fullName}" "${companyName}"`, phraseScore);
2396
- pushQuery(`site:linkedin.com/in ${fullName} ${companyName} linkedin`, phraseScore - 5);
2397
- if (title) {
2398
- pushQuery(`site:linkedin.com/in "${fullName}" "${companyName}" "${title}"`, phraseScore - 10);
2399
- pushQuery(`site:linkedin.com/in ${fullName} ${companyName} ${title} linkedin`, phraseScore - 15);
2400
- }
2401
- });
2402
- if (emailDomain) {
2403
- pushQuery(`site:linkedin.com/in "${fullName}" "${emailDomain}" linkedin`, 240);
2404
- }
2405
- pushQuery(`site:linkedin.com/in "${fullName}" linkedin`, 50);
2406
- if (title) {
2407
- pushQuery(`site:linkedin.com/in "${fullName}" "${title}" linkedin`, 40);
2408
- }
2409
- return queryEntries
2410
- .sort((left, right) => right.score - left.score)
2411
- .map((entry) => entry.query);
2412
- }
2413
- function extractPublicLinkedInProfileSearchCandidates(bodyText) {
2414
- const candidates = new Set();
2415
- const directMatches = bodyText.match(/https:\/\/(?:(?:www|[a-z]{2})\.)?linkedin\.com\/in\/[^"'&<>\s)]+/gi) ?? [];
2416
- for (const match of directMatches) {
2417
- const normalized = normalizePublicLinkedInProfileUrl(match);
2418
- if (normalized) {
2419
- candidates.add(normalized);
2420
- }
2421
- }
2422
- const encodedMatches = bodyText.match(/https?%3A%2F%2F(?:(?:www|[a-z]{2})\.)?linkedin\.com%2Fin%2F[^"'&<>\s)]+/gi) ?? [];
2423
- for (const match of encodedMatches) {
2424
- try {
2425
- const decoded = decodeURIComponent(match);
2426
- const normalized = normalizePublicLinkedInProfileUrl(decoded);
2427
- if (normalized) {
2428
- candidates.add(normalized);
2429
- }
2430
- }
2431
- catch {
2432
- // Ignore malformed encoded fragments.
2433
- }
2434
- }
2435
- return Array.from(candidates);
2436
- }
2437
- function buildPublicLinkedInProfileSearchUrl(query) {
2438
- const baseUrl = process.env.SALESPROMPTER_LINKEDIN_PROFILE_SEARCH_BASE_URL?.trim() ||
2439
- "https://duckduckgo.com/html/";
2440
- const url = new URL(baseUrl);
2441
- url.searchParams.set("q", query);
2442
- return url.toString();
2443
- }
2444
- async function fetchSerperSearchResults(query, num, timeoutMs) {
2445
- if (serperCreditsExhausted) {
2446
- return null;
2447
- }
2448
- const apiKey = getSerperApiKey();
2449
- if (!apiKey) {
2450
- return null;
2451
- }
2452
- const cacheKey = `${query}::${num}`;
2453
- if (serperSearchCache.has(cacheKey)) {
2454
- return serperSearchCache.get(cacheKey) ?? null;
2455
- }
2456
- const controller = new AbortController();
2457
- const timeout = setTimeout(() => controller.abort(), Math.min(timeoutMs, 12_000));
2458
- try {
2459
- const response = await fetch(getSerperSearchEndpoint(), {
2460
- method: "POST",
2461
- signal: controller.signal,
2462
- headers: {
2463
- "X-API-KEY": apiKey,
2464
- "Content-Type": "application/json"
2465
- },
2466
- body: JSON.stringify({ q: query, num })
2467
- });
2468
- if (!response.ok) {
2469
- const bodyText = await response.text().catch(() => "");
2470
- if (response.status === 400 &&
2471
- /not enough credits/i.test(bodyText)) {
2472
- serperCreditsExhausted = true;
2473
- }
2474
- serperSearchCache.set(cacheKey, null);
2475
- return null;
2476
- }
2477
- const parsed = await response.json();
2478
- serperSearchCache.set(cacheKey, parsed);
2479
- return parsed;
2480
- }
2481
- catch {
2482
- return null;
2483
- }
2484
- finally {
2485
- clearTimeout(timeout);
2486
- }
2487
- }
2488
- function extractSerperLinkedInProfileCandidates(payload) {
2489
- if (!payload || typeof payload !== "object") {
2490
- return [];
2491
- }
2492
- const organic = "organic" in payload && Array.isArray(payload.organic)
2493
- ? (payload.organic ?? [])
2494
- : [];
2495
- const seen = new Set();
2496
- const candidates = [];
2497
- for (const result of organic) {
2498
- if (!result || typeof result !== "object")
2499
- continue;
2500
- const link = "link" in result && typeof result.link === "string"
2501
- ? result.link
2502
- : "";
2503
- const normalized = normalizePublicLinkedInProfileUrl(link);
2504
- if (normalized) {
2505
- const canonical = normalized.replace(/\/$/, "");
2506
- if (!seen.has(canonical)) {
2507
- seen.add(canonical);
2508
- candidates.push({
2509
- url: canonical,
2510
- title: "title" in result && typeof result.title === "string"
2511
- ? normalizeLookupWhitespace(result.title)
2512
- : "",
2513
- snippet: "snippet" in result && typeof result.snippet === "string"
2514
- ? normalizeLookupWhitespace(result.snippet)
2515
- : ""
2516
- });
2517
- }
2518
- }
2519
- }
2520
- return candidates;
2521
- }
2522
- async function fetchLinkedInProfilePageSignals(url, timeoutMs) {
2523
- const cacheKey = normalizePublicLinkedInProfileUrl(url)?.replace(/\/$/, "") ?? url.replace(/\/$/, "");
2524
- if (linkedInProfilePageSignalCache.has(cacheKey)) {
2525
- return linkedInProfilePageSignalCache.get(cacheKey) ?? null;
2526
- }
2527
- const controller = new AbortController();
2528
- const timeout = setTimeout(() => controller.abort(), Math.min(timeoutMs, 12_000));
2529
- try {
2530
- const targetUrl = rewriteLinkedInUrlForConfiguredBase(url);
2531
- const response = await fetch(targetUrl, {
2532
- method: "GET",
2533
- signal: controller.signal,
2534
- headers: {
2535
- "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"
2536
- }
2537
- });
2538
- const html = await response.text();
2539
- const finalUrl = normalizePublicLinkedInProfileUrl(url) ||
2540
- normalizePublicLinkedInProfileUrl(response.url || url);
2541
- if (!finalUrl) {
2542
- return null;
2543
- }
2544
- const title = decodeHtmlEntities(html.match(/<title[^>]*>([^<]+)/i)?.[1] || "") ||
2545
- decodeHtmlEntities(html.match(/<meta[^>]+property="og:title"[^>]+content="([^"]+)/i)?.[1] || "");
2546
- const description = decodeHtmlEntities(html.match(/<meta[^>]+name="description"[^>]+content="([^"]+)/i)?.[1] || "");
2547
- const bodyText = decodeHtmlEntities(html.replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ").replace(/<[^>]+>/g, " "));
2548
- const unavailableText = normalizeLooseMatchText(`${title} ${description} ${bodyText}`);
2549
- const unavailable = response.status >= 400 ||
2550
- unavailableText.includes("page not found") ||
2551
- unavailableText.includes("profile not found") ||
2552
- unavailableText.includes("member profile") && unavailableText.includes("not available");
2553
- const result = {
2554
- normalizedUrl: finalUrl.replace(/\/$/, ""),
2555
- title: normalizeLookupWhitespace(title),
2556
- description: normalizeLookupWhitespace(description),
2557
- bodyText: normalizeLookupWhitespace(bodyText),
2558
- unavailable
2559
- };
2560
- linkedInProfilePageSignalCache.set(cacheKey, result);
2561
- return result;
2562
- }
2563
- catch {
2564
- linkedInProfilePageSignalCache.set(cacheKey, null);
2565
- return null;
2566
- }
2567
- finally {
2568
- clearTimeout(timeout);
2569
- }
2570
- }
2571
- function scoreLinkedInProfilePageSignals(contact, signals) {
2572
- const fullName = normalizeLooseMatchText(`${contact.firstName} ${contact.lastName}`);
2573
- const companyHints = [
2574
- normalizeLooseMatchText(contact.companyNameOriginal ?? contact.companyName),
2575
- normalizeLooseMatchText(aggressivelyCleanLookupCompanyName(contact.companyNameOriginal ?? contact.companyName))
2576
- ].filter(Boolean);
2577
- const titleHint = normalizeLooseMatchText(contact.jobTitle);
2578
- const haystack = normalizeLooseMatchText(`${signals.title} ${signals.description} ${signals.bodyText}`);
2579
- let score = 0;
2580
- if (fullName && haystack.includes(fullName))
2581
- score += 120;
2582
- for (const hint of companyHints) {
2583
- if (hint && haystack.includes(hint))
2584
- score += 30;
2585
- }
2586
- if (titleHint) {
2587
- const titleWords = titleHint.split(/\s+/).filter((token) => token.length >= 4).slice(0, 4);
2588
- score += titleWords.filter((token) => haystack.includes(token)).length * 8;
2589
- }
2590
- const slug = signals.normalizedUrl.split("/in/")[1]?.replace(/\/$/, "") ?? "";
2591
- const slugText = normalizeLooseMatchText(slug.replace(/[-_]+/g, " "));
2592
- if (fullName && slugText.includes(contact.firstName.toLowerCase()) && slugText.includes(contact.lastName.toLowerCase())) {
2593
- score += 40;
2594
- }
2595
- return score;
2596
- }
2597
- function analyzeSerperLinkedInProfileCandidate(contact, candidate) {
2598
- const fullName = normalizeLooseMatchText(`${contact.firstName} ${contact.lastName}`);
2599
- const titleHint = normalizeLooseMatchText(contact.jobTitle);
2600
- const companyTokens = [
2601
- normalizeLooseMatchText(contact.companyNameOriginal ?? contact.companyName),
2602
- normalizeLooseMatchText(aggressivelyCleanLookupCompanyName(contact.companyNameOriginal ?? contact.companyName)),
2603
- normalizeLooseMatchText(normalizeLinkedInCompanyHandle(contact.linkedinCompanyUrl ?? "")?.replace(/[-_]+/g, " ") ?? ""),
2604
- normalizeLooseMatchText((() => {
2605
- const normalizedEmail = normalizeLookupWhitespace(contact.email);
2606
- if (!normalizedEmail || isSyntheticLinkedInLookupEmail(normalizedEmail)) {
2607
- return "";
2608
- }
2609
- return normalizedEmail.split("@")[1]?.replace(/^www\./i, "").split(".")[0] ?? "";
2610
- })())
2611
- ].filter(Boolean);
2612
- const haystack = normalizeLooseMatchText(`${candidate.title} ${candidate.snippet}`);
2613
- let score = 0;
2614
- let companyMatches = 0;
2615
- let titleMatches = 0;
2616
- if (fullName && haystack.includes(fullName))
2617
- score += 120;
2618
- for (const token of companyTokens) {
2619
- if (!token)
2620
- continue;
2621
- if (haystack.includes(token)) {
2622
- companyMatches += 1;
2623
- score += token.split(/\s+/).length <= 2 ? 30 : 20;
2624
- }
2625
- }
2626
- if (titleHint) {
2627
- const titleWords = titleHint.split(/\s+/).filter((token) => token.length >= 4).slice(0, 4);
2628
- titleMatches = titleWords.filter((token) => haystack.includes(token)).length;
2629
- score += titleMatches * 8;
2630
- }
2631
- const slugText = normalizeLooseMatchText(candidate.url.split("/in/")[1]?.replace(/\/$/, "").replace(/[-_]+/g, " ") ?? "");
2632
- if (fullName &&
2633
- slugText.includes(contact.firstName.toLowerCase()) &&
2634
- slugText.includes(contact.lastName.toLowerCase()) &&
2635
- (companyMatches > 0 || titleMatches > 0)) {
2636
- score += 40;
2637
- }
2638
- return { score, companyMatches, titleMatches };
2639
- }
2640
- async function searchSerperLinkedInProfileUrl(contact, timeoutMs, options) {
2641
- if (!contact.firstName || !contact.lastName) {
2642
- return null;
2643
- }
2644
- const maxQueries = options?.maxQueries && Number.isFinite(options.maxQueries) && options.maxQueries > 0
2645
- ? Math.trunc(options.maxQueries)
2646
- : Number.POSITIVE_INFINITY;
2647
- for (const query of (await buildSerperLinkedInProfileQueries(contact, timeoutMs)).slice(0, maxQueries)) {
2648
- try {
2649
- const parsed = await fetchSerperSearchResults(query, 5, timeoutMs);
2650
- if (!parsed) {
2651
- continue;
2652
- }
2653
- const candidates = extractSerperLinkedInProfileCandidates(parsed);
2654
- let bestUrl = null;
2655
- let bestScore = 0;
2656
- for (const candidate of candidates) {
2657
- const serperAnalysis = analyzeSerperLinkedInProfileCandidate(contact, candidate);
2658
- const serperScore = serperAnalysis.score;
2659
- if (serperScore >= 150 && (serperAnalysis.companyMatches > 0 || serperAnalysis.titleMatches > 0)) {
2660
- return candidate.url;
2661
- }
2662
- const signals = await fetchLinkedInProfilePageSignals(candidate.url, timeoutMs);
2663
- if (!signals || signals.unavailable) {
2664
- if (serperScore > bestScore) {
2665
- bestScore = serperScore;
2666
- bestUrl = candidate.url;
2667
- }
2668
- continue;
2669
- }
2670
- const score = Math.max(serperScore, scoreLinkedInProfilePageSignals(contact, signals));
2671
- if (score > bestScore) {
2672
- bestScore = score;
2673
- bestUrl = signals.normalizedUrl;
2674
- }
2675
- }
2676
- if (bestUrl && bestScore >= 130) {
2677
- return bestUrl;
2678
- }
2679
- }
2680
- catch {
2681
- // Continue with the next query variant.
2682
- }
2683
- }
2684
- return searchPublicLinkedInProfileUrl(contact, timeoutMs, {
2685
- maxQueries: Math.min(Number.isFinite(maxQueries) ? maxQueries : 4, 4)
2686
- });
2687
- }
2688
- function decodeHtmlEntities(value) {
2689
- return value
2690
- .replace(/&amp;/gi, "&")
2691
- .replace(/&quot;/gi, '"')
2692
- .replace(/&#39;/gi, "'")
2693
- .replace(/&lt;/gi, "<")
2694
- .replace(/&gt;/gi, ">");
2695
- }
2696
- async function fetchLinkedInCompanyPageSignals(url, timeoutMs) {
2697
- const cacheKey = url.replace(/\/$/, "");
2698
- if (linkedInCompanyPageSignalCache.has(cacheKey)) {
2699
- return linkedInCompanyPageSignalCache.get(cacheKey) ?? null;
2700
- }
2701
- const controller = new AbortController();
2702
- const timeout = setTimeout(() => controller.abort(), Math.min(timeoutMs, 12_000));
2703
- try {
2704
- const response = await fetch(url, {
2705
- method: "GET",
2706
- signal: controller.signal,
2707
- headers: {
2708
- "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"
2709
- }
2710
- });
2711
- const html = await response.text();
2712
- const finalUrl = response.url || url;
2713
- const title = decodeHtmlEntities(html.match(/<title[^>]*>([^<]+)/i)?.[1] || "") ||
2714
- decodeHtmlEntities(html.match(/<meta[^>]+property="og:title"[^>]+content="([^"]+)/i)?.[1] || "");
2715
- const description = decodeHtmlEntities(html.match(/<meta[^>]+name="description"[^>]+content="([^"]+)/i)?.[1] || "");
2716
- const bodyText = decodeHtmlEntities(html.replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ").replace(/<[^>]+>/g, " "));
2717
- const unavailableText = normalizeLooseMatchText(`${title} ${description} ${bodyText}`);
2718
- const unavailable = response.status >= 400 ||
2719
- unavailableText.includes("page not found") ||
2720
- unavailableText.includes("this page does not exist") ||
2721
- unavailableText.includes("page isnt available");
2722
- const result = {
2723
- normalizedUrl: normalizeLinkedInCompanyHandle(finalUrl ?? "") || normalizeLinkedInCompanyHandle(url)
2724
- ? normalizeLinkedInCompanyPage(normalizeLinkedInCompanyHandle(finalUrl ?? "") ?? normalizeLinkedInCompanyHandle(url) ?? "")
2725
- : finalUrl,
1584
+ return {
1585
+ normalizedUrl: normalizeLinkedInCompanyPage(handle),
2726
1586
  title: normalizeLookupWhitespace(title),
2727
1587
  description: normalizeLookupWhitespace(description),
2728
1588
  bodyText: normalizeLookupWhitespace(bodyText),
2729
1589
  unavailable
2730
1590
  };
2731
- linkedInCompanyPageSignalCache.set(cacheKey, result);
2732
- return result;
2733
1591
  }
2734
1592
  catch {
2735
- linkedInCompanyPageSignalCache.set(cacheKey, null);
2736
1593
  return null;
2737
1594
  }
2738
1595
  finally {
2739
1596
  clearTimeout(timeout);
2740
1597
  }
2741
1598
  }
2742
- async function searchPublicLinkedInProfileUrl(contact, timeoutMs, options) {
2743
- const maxQueries = options?.maxQueries && Number.isFinite(options.maxQueries) && options.maxQueries > 0
2744
- ? Math.trunc(options.maxQueries)
2745
- : 4;
2746
- const queries = (await buildSerperLinkedInProfileQueries(contact, timeoutMs)).slice(0, maxQueries);
2747
- for (const query of queries) {
2748
- const controller = new AbortController();
2749
- const timeout = setTimeout(() => controller.abort(), Math.min(timeoutMs, 12_000));
2750
- try {
2751
- const response = await fetch(buildPublicLinkedInProfileSearchUrl(query), {
2752
- method: "GET",
2753
- signal: controller.signal,
2754
- headers: {
2755
- "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"
2756
- }
2757
- });
2758
- if (!response.ok) {
2759
- continue;
2760
- }
2761
- const bodyText = await response.text();
2762
- const candidates = extractPublicLinkedInProfileSearchCandidates(bodyText);
2763
- let bestUrl = null;
2764
- let bestScore = 0;
2765
- for (const candidateUrl of candidates.slice(0, 5)) {
2766
- const signals = await fetchLinkedInProfilePageSignals(candidateUrl, timeoutMs);
2767
- if (!signals || signals.unavailable) {
2768
- continue;
2769
- }
2770
- const score = scoreLinkedInProfilePageSignals(contact, signals);
2771
- if (score > bestScore) {
2772
- bestScore = score;
2773
- bestUrl = signals.normalizedUrl;
2774
- }
2775
- }
2776
- if (bestUrl && bestScore >= 130) {
2777
- return bestUrl;
2778
- }
2779
- }
2780
- catch {
2781
- // Continue with the next query variant.
2782
- }
2783
- finally {
2784
- clearTimeout(timeout);
2785
- }
2786
- }
2787
- return null;
2788
- }
2789
1599
  function scoreLinkedInCompanyPageSignals(companyName, signals) {
2790
1600
  const inputTokens = normalizeLooseMatchText(companyName).split(/\s+/).filter((token) => token.length >= 4);
2791
1601
  const haystack = normalizeLooseMatchText(`${signals.title} ${signals.description}`);
@@ -2800,20 +1610,6 @@ function scoreLinkedInCompanyPageSignals(companyName, signals) {
2800
1610
  }
2801
1611
  return score;
2802
1612
  }
2803
- function scoreSerperLinkedInCompanyCandidate(companyName, candidate) {
2804
- const inputTokens = normalizeLooseMatchText(companyName).split(/\s+/).filter((token) => token.length >= 4);
2805
- const haystack = normalizeLooseMatchText(`${candidate.title} ${candidate.snippet}`);
2806
- let score = scoreLinkedInCompanyUrlCandidate(companyName, candidate.url);
2807
- for (const token of inputTokens) {
2808
- if (haystack.includes(token)) {
2809
- score += 12;
2810
- }
2811
- }
2812
- if (haystack.includes(normalizeLooseMatchText(aggressivelyCleanLookupCompanyName(companyName)))) {
2813
- score += 40;
2814
- }
2815
- return score;
2816
- }
2817
1613
  function scoreLinkedInCompanyUrlCandidate(companyName, url) {
2818
1614
  const handle = normalizeLinkedInCompanyHandle(url);
2819
1615
  if (!handle || /^\d+$/.test(handle)) {
@@ -2907,15 +1703,9 @@ async function searchSerperLinkedInCompanyUrl(companyName, timeoutMs) {
2907
1703
  const parsed = (await response.json());
2908
1704
  const candidates = extractSerperLinkedInCompanyCandidates(parsed);
2909
1705
  const ranked = candidates
2910
- .map((candidate) => ({
2911
- ...candidate,
2912
- score: scoreSerperLinkedInCompanyCandidate(companyName, candidate)
2913
- }))
1706
+ .map((url) => ({ url, score: scoreLinkedInCompanyUrlCandidate(companyName, url) }))
2914
1707
  .filter((candidate) => candidate.score > 0)
2915
1708
  .sort((left, right) => right.score - left.score);
2916
- if (ranked[0] && ranked[0].score >= 80) {
2917
- return ranked[0].url;
2918
- }
2919
1709
  let anySignalsFetched = false;
2920
1710
  let bestValidated = null;
2921
1711
  for (const candidate of ranked.slice(0, 3)) {
@@ -2955,11 +1745,6 @@ async function searchSerperLinkedInCompanyUrl(companyName, timeoutMs) {
2955
1745
  }
2956
1746
  async function invokeLinkedInUrlEnrichmentDirect(params) {
2957
1747
  const config = await readLinkedInDirectLookupConfig();
2958
- const companyContexts = await resolveDirectLinkedInCompanyContexts({
2959
- contacts: params.contacts.filter((contact) => !contact.isVariation),
2960
- timeoutMs: params.timeoutMs,
2961
- config
2962
- });
2963
1748
  const groupedContacts = new Map();
2964
1749
  for (const contact of params.contacts) {
2965
1750
  const key = contact.email?.trim().toLowerCase() || `contact:${contact.contact_id}`;
@@ -2968,25 +1753,15 @@ async function invokeLinkedInUrlEnrichmentDirect(params) {
2968
1753
  groupedContacts.set(key, existing);
2969
1754
  }
2970
1755
  const results = [];
2971
- const perAttemptTimeoutMs = params.perAttemptTimeoutMs && Number.isFinite(params.perAttemptTimeoutMs) && params.perAttemptTimeoutMs > 0
2972
- ? Math.trunc(params.perAttemptTimeoutMs)
2973
- : Math.min(params.timeoutMs, 8_000);
2974
- const perContactBudgetMs = params.perContactBudgetMs && Number.isFinite(params.perContactBudgetMs) && params.perContactBudgetMs > 0
2975
- ? Math.trunc(params.perContactBudgetMs)
2976
- : Math.min(params.timeoutMs, 15_000);
2977
- const rateLimitCooldownMs = Math.max(750, Math.min(3_000, Math.trunc(perAttemptTimeoutMs / 2)));
2978
- const maxRateLimitCooldowns = 4;
2979
- let rateLimitCooldownUntil = 0;
2980
- let consecutiveRateLimitCount = 0;
2981
- let totalRateLimitCooldowns = 0;
1756
+ let rateLimited = false;
2982
1757
  for (const variations of groupedContacts.values()) {
2983
1758
  const primary = variations.find((contact) => !contact.isVariation) ?? variations[0];
2984
1759
  const blankPerson = !primary?.firstName.trim() || !primary?.lastName.trim();
2985
- if (totalRateLimitCooldowns >= maxRateLimitCooldowns) {
1760
+ if (rateLimited) {
2986
1761
  results.push({
2987
1762
  contact_id: primary.contact_id,
2988
1763
  linkedin_url: null,
2989
- error: "LinkedIn rate limit budget exhausted"
1764
+ error: "LinkedIn rate limit"
2990
1765
  });
2991
1766
  continue;
2992
1767
  }
@@ -3000,23 +1775,11 @@ async function invokeLinkedInUrlEnrichmentDirect(params) {
3000
1775
  }
3001
1776
  let matchedUrl = null;
3002
1777
  let matchedSalesNavUrl = null;
3003
- let matchedFullName = null;
3004
- let matchedCompanyName = null;
3005
- let matchedTitle = null;
3006
1778
  let lastError = null;
3007
- const contactDeadline = Date.now() + perContactBudgetMs;
3008
- const companyContext = companyContexts.get(buildDirectCompanyContextKey(primary));
3009
1779
  for (const candidate of variations) {
3010
- for (const searchVariant of await buildLinkedInLookupSearchVariants(candidate, params.timeoutMs, companyContext?.aliases ?? [])) {
3011
- if (Date.now() < rateLimitCooldownUntil) {
3012
- await new Promise((resolve) => setTimeout(resolve, rateLimitCooldownUntil - Date.now()));
3013
- }
3014
- if (Date.now() >= contactDeadline) {
3015
- lastError = lastError || "Direct lookup budget exhausted";
3016
- break;
3017
- }
1780
+ for (const searchVariant of buildLinkedInLookupSearchVariants(candidate)) {
3018
1781
  const controller = new AbortController();
3019
- const timeout = setTimeout(controller.abort.bind(controller), Math.min(perAttemptTimeoutMs, Math.max(1_000, contactDeadline - Date.now())));
1782
+ const timeout = setTimeout(controller.abort.bind(controller), Math.min(params.timeoutMs, 20_000));
3020
1783
  try {
3021
1784
  const response = await fetch(buildLinkedInSalesApiUrl(searchVariant), {
3022
1785
  method: "GET",
@@ -3037,51 +1800,20 @@ async function invokeLinkedInUrlEnrichmentDirect(params) {
3037
1800
  }
3038
1801
  });
3039
1802
  if (response.status === 429) {
1803
+ rateLimited = true;
3040
1804
  lastError = "LinkedIn rate limit";
3041
- consecutiveRateLimitCount += 1;
3042
- totalRateLimitCooldowns += 1;
3043
- rateLimitCooldownUntil =
3044
- Date.now() + Math.min(15_000, rateLimitCooldownMs * Math.max(1, consecutiveRateLimitCount));
3045
- if (totalRateLimitCooldowns >= maxRateLimitCooldowns) {
3046
- break;
3047
- }
3048
1805
  break;
3049
1806
  }
3050
1807
  if (!response.ok) {
3051
1808
  lastError = `LinkedIn returned ${response.status}`;
3052
1809
  continue;
3053
1810
  }
3054
- consecutiveRateLimitCount = 0;
3055
- rateLimitCooldownUntil = 0;
3056
1811
  const data = (await response.json());
3057
1812
  const profilesFound = data.paging?.total ?? 0;
3058
1813
  if (profilesFound > 0) {
3059
- const bestCandidate = (data.elements ?? [])
3060
- .map((element) => ({
3061
- element,
3062
- ...scoreLinkedInSalesApiElementMatch(candidate, element)
3063
- }))
3064
- .sort((left, right) => right.score - left.score)[0];
3065
- const hasTrustedCompanyContext = Boolean(candidate.linkedinCompanyUrl ||
3066
- companyContext?.linkedinCompanyUrl ||
3067
- companyContext?.matchedCompanyName);
3068
- const hasTrustedEmailContext = Boolean(candidate.email && !isSyntheticLinkedInLookupEmail(candidate.email));
3069
- const acceptBestCandidate = Boolean(bestCandidate &&
3070
- (bestCandidate.score >= 140 ||
3071
- (bestCandidate.exactNameMatch &&
3072
- (bestCandidate.companyMatchCount > 0 || hasTrustedCompanyContext || hasTrustedEmailContext))));
3073
- if (bestCandidate && acceptBestCandidate) {
3074
- matchedUrl = extractLinkedInProfileUrlFromSalesApiElement(bestCandidate.element) ?? null;
3075
- matchedSalesNavUrl = extractLinkedInSalesNavLeadUrlFromSalesApiElement(bestCandidate.element) ?? null;
3076
- matchedFullName = bestCandidate.fullName;
3077
- matchedCompanyName = bestCandidate.companyName;
3078
- matchedTitle = bestCandidate.title;
3079
- }
3080
- else {
3081
- lastError = bestCandidate
3082
- ? `LinkedIn top result score too low (${bestCandidate.score})`
3083
- : "LinkedIn returned no usable results";
3084
- }
1814
+ const first = data.elements?.[0];
1815
+ matchedUrl = extractLinkedInProfileUrlFromSalesApiElement(first) ?? null;
1816
+ matchedSalesNavUrl = extractLinkedInSalesNavLeadUrlFromSalesApiElement(first) ?? null;
3085
1817
  if (matchedUrl || matchedSalesNavUrl) {
3086
1818
  break;
3087
1819
  }
@@ -3093,14 +1825,11 @@ async function invokeLinkedInUrlEnrichmentDirect(params) {
3093
1825
  finally {
3094
1826
  clearTimeout(timeout);
3095
1827
  }
3096
- if (matchedUrl || matchedSalesNavUrl || totalRateLimitCooldowns >= maxRateLimitCooldowns) {
1828
+ if (matchedUrl || matchedSalesNavUrl || rateLimited) {
3097
1829
  break;
3098
1830
  }
3099
1831
  }
3100
- if (matchedUrl || matchedSalesNavUrl || totalRateLimitCooldowns >= maxRateLimitCooldowns) {
3101
- break;
3102
- }
3103
- if (Date.now() >= contactDeadline) {
1832
+ if (matchedUrl || matchedSalesNavUrl || rateLimited) {
3104
1833
  break;
3105
1834
  }
3106
1835
  }
@@ -3108,21 +1837,16 @@ async function invokeLinkedInUrlEnrichmentDirect(params) {
3108
1837
  contact_id: primary.contact_id,
3109
1838
  linkedin_url: matchedUrl ?? matchedSalesNavUrl,
3110
1839
  sales_nav_profile_url: matchedSalesNavUrl,
3111
- matched_full_name: matchedFullName,
3112
- matched_company_name: matchedCompanyName,
3113
- matched_title: matchedTitle,
3114
1840
  error: matchedUrl || matchedSalesNavUrl ? null : lastError
3115
1841
  });
3116
1842
  }
3117
1843
  return {
3118
1844
  success: true,
3119
- contacts: results,
3120
- companyContexts: Array.from(companyContexts.values())
1845
+ contacts: results
3121
1846
  };
3122
1847
  }
3123
1848
  async function invokeLinkedInCompanyEnrichmentDirect(params) {
3124
1849
  const config = await readLinkedInDirectLookupConfig();
3125
- const precomputedContextByKey = new Map((params.precomputedContexts ?? []).map((context) => [context.normalizedCompanyKey, context]));
3126
1850
  const primaryContacts = new Map();
3127
1851
  for (const contact of params.contacts) {
3128
1852
  const existing = primaryContacts.get(contact.contact_id);
@@ -3146,23 +1870,11 @@ async function invokeLinkedInCompanyEnrichmentDirect(params) {
3146
1870
  companyName: contact.companyName,
3147
1871
  companyNameOriginal: contact.companyNameOriginal
3148
1872
  });
3149
- const precomputedContext = precomputedContextByKey.get(buildDirectCompanyContextKey(contact));
3150
- let matchedCompanyUrl = precomputedContext?.linkedinCompanyUrl ?? null;
3151
- let matchedSalesNavCompanyUrl = precomputedContext?.salesNavCompanyUrl ?? null;
3152
- let matchedCompanyName = precomputedContext?.matchedCompanyName ?? null;
3153
- let matchedCompanyEmployeeCount = precomputedContext?.matchedCompanyEmployeeCount ?? null;
1873
+ let matchedCompanyUrl = null;
1874
+ let matchedSalesNavCompanyUrl = null;
1875
+ let matchedCompanyName = null;
1876
+ let matchedCompanyEmployeeCount = null;
3154
1877
  let lastError = null;
3155
- if (matchedCompanyUrl || matchedSalesNavCompanyUrl || matchedCompanyName) {
3156
- results.push({
3157
- contact_id: contact.contact_id,
3158
- linkedin_company_url: matchedCompanyUrl,
3159
- sales_nav_company_url: matchedSalesNavCompanyUrl,
3160
- matched_company_name: matchedCompanyName,
3161
- matched_company_employee_count: matchedCompanyEmployeeCount,
3162
- error: null
3163
- });
3164
- continue;
3165
- }
3166
1878
  for (const variant of variants) {
3167
1879
  const controller = new AbortController();
3168
1880
  const timeout = setTimeout(controller.abort.bind(controller), Math.min(params.timeoutMs, 20_000));
@@ -3305,34 +2017,9 @@ async function invokeLinkedInUrlEnrichmentWorkflow(params) {
3305
2017
  }
3306
2018
  }
3307
2019
  function normalizeWorkflowLinkedInUrlResult(params) {
3308
- const inputContactIds = new Set(params.contacts.map((contact) => contact.contact_id));
3309
2020
  const contactIdsBySyntheticEmail = new Map(params.contacts
3310
2021
  .filter((contact) => contact.email)
3311
2022
  .map((contact) => [String(contact.email).toLowerCase(), contact.contact_id]));
3312
- const contactIdsByNormalizedIdentity = new Map(params.contacts
3313
- .filter((contact) => !contact.isVariation)
3314
- .map((contact) => {
3315
- const fullName = normalizeLooseMatchText(normalizeLookupWhitespace(`${contact.firstName} ${contact.lastName}`));
3316
- const companyName = normalizeLooseMatchText(contact.companyNameOriginal ?? contact.companyName);
3317
- return [`${fullName}|${companyName}`, contact.contact_id];
3318
- })
3319
- .filter(([key]) => key !== "|"));
3320
- const normalizedNameCounts = new Map();
3321
- for (const contact of params.contacts) {
3322
- if (contact.isVariation)
3323
- continue;
3324
- const fullName = normalizeLooseMatchText(normalizeLookupWhitespace(`${contact.firstName} ${contact.lastName}`));
3325
- if (!fullName)
3326
- continue;
3327
- normalizedNameCounts.set(fullName, (normalizedNameCounts.get(fullName) ?? 0) + 1);
3328
- }
3329
- const contactIdsByNormalizedName = new Map(params.contacts
3330
- .filter((contact) => !contact.isVariation)
3331
- .map((contact) => {
3332
- const fullName = normalizeLooseMatchText(normalizeLookupWhitespace(`${contact.firstName} ${contact.lastName}`));
3333
- return [fullName, contact.contact_id];
3334
- })
3335
- .filter(([fullName]) => Boolean(fullName) && (normalizedNameCounts.get(fullName) ?? 0) === 1));
3336
2023
  const rowsByContactId = new Map();
3337
2024
  const body = params.parsedBody && typeof params.parsedBody === "object" && !Array.isArray(params.parsedBody)
3338
2025
  ? params.parsedBody
@@ -3342,34 +2029,13 @@ function normalizeWorkflowLinkedInUrlResult(params) {
3342
2029
  ...(Array.isArray(body?.profiles) ? body?.profiles : [])
3343
2030
  ];
3344
2031
  for (const contact of workflowRows) {
3345
- const fullNameCandidate = normalizeLookupWhitespace(typeof contact.full_name === "string"
3346
- ? contact.full_name
3347
- : typeof contact.fullName === "string"
3348
- ? contact.fullName
3349
- : typeof contact.name === "string"
3350
- ? contact.name
3351
- : [contact.first_name, contact.last_name]
3352
- .filter((value) => typeof value === "string" && value.trim().length > 0)
3353
- .join(" "));
3354
- const companyNameCandidate = normalizeLookupWhitespace(typeof contact.company_name === "string"
3355
- ? contact.company_name
3356
- : typeof contact.companyName === "string"
3357
- ? contact.companyName
3358
- : typeof contact.current_company === "string"
3359
- ? contact.current_company
3360
- : "");
3361
- const normalizedIdentityKey = `${normalizeLooseMatchText(fullNameCandidate)}|${normalizeLooseMatchText(companyNameCandidate)}`;
3362
2032
  const explicitContactId = typeof contact.contact_id === "string"
3363
2033
  ? contact.contact_id
3364
2034
  : typeof contact.contact_id === "number"
3365
2035
  ? String(contact.contact_id)
3366
2036
  : "";
3367
2037
  const emailKey = typeof contact.email === "string" ? contact.email.toLowerCase() : "";
3368
- const contactId = (inputContactIds.has(explicitContactId) ? explicitContactId : "") ||
3369
- contactIdsBySyntheticEmail.get(emailKey) ||
3370
- contactIdsByNormalizedIdentity.get(normalizedIdentityKey) ||
3371
- contactIdsByNormalizedName.get(normalizeLooseMatchText(fullNameCandidate)) ||
3372
- "";
2038
+ const contactId = explicitContactId || contactIdsBySyntheticEmail.get(emailKey) || "";
3373
2039
  const linkedinUrl = normalizePublicLinkedInProfileUrl(typeof contact.linkedin_profile_url === "string"
3374
2040
  ? contact.linkedin_profile_url
3375
2041
  : typeof contact.linkedinProfileUrl === "string"
@@ -3456,8 +2122,7 @@ async function fetchSalesNavLookupCandidates(params) {
3456
2122
  }
3457
2123
  async function resolveLinkedInUrlsFromSalesNavRows(params) {
3458
2124
  const results = [];
3459
- for (const row of params.rows) {
3460
- const contactId = normalizeLinkedInLookupField(row.contactId) ?? `${results.length + 1}`;
2125
+ for (const [index, row] of params.rows.entries()) {
3461
2126
  const candidates = await fetchSalesNavLookupCandidates({
3462
2127
  companyName: row.companyName,
3463
2128
  orgId: params.orgId
@@ -3503,247 +2168,29 @@ async function resolveLinkedInUrlsFromSalesNavRows(params) {
3503
2168
  }
3504
2169
  const numericCompanyUrl = typeof best?.companyUrl === "string" ? best.companyUrl.trim() : "";
3505
2170
  return numericCompanyUrl.length > 0 ? numericCompanyUrl : null;
3506
- })();
3507
- const salesNavCompanyUrl = typeof best?.companyUrl === "string" && /\/sales\/company\//i.test(best.companyUrl)
3508
- ? best.companyUrl
3509
- : null;
3510
- const existingLinkedInCompanyUrl = row.linkedinCompanyUrl?.trim() || null;
3511
- results.push({
3512
- clientId: row.clientId,
3513
- fullName: row.fullName,
3514
- companyName: row.companyName,
3515
- linkedinUrl,
3516
- salesNavProfileUrl,
3517
- linkedinCompanyUrl: linkedinCompanyUrl ?? existingLinkedInCompanyUrl,
3518
- salesNavCompanyUrl,
3519
- found: Boolean(linkedinUrl),
3520
- companyFound: Boolean(linkedinCompanyUrl ?? existingLinkedInCompanyUrl),
3521
- contactId,
3522
- source: linkedinUrl ? "salesnav-supabase" : null,
3523
- companySource: linkedinCompanyUrl ? "salesnav-supabase" : existingLinkedInCompanyUrl ? "input" : null,
3524
- matchedFullName: best?.fullName ?? null,
3525
- matchedCompanyName: best?.companyName ?? null,
3526
- matchedTitle: best?.title ?? null,
3527
- matchedOrgId: best?.orgId ?? null,
3528
- matchedCompanyEmployeeCount: null
3529
- });
3530
- }
3531
- return results;
3532
- }
3533
- function shouldUseSalesNavRowPrepass(params) {
3534
- const env = params.env ?? process.env;
3535
- const explicit = env.SALESPROMPTER_LINKEDIN_ROW_PREPASS?.trim().toLowerCase();
3536
- if (explicit === "0" || explicit === "false" || explicit === "off") {
3537
- return false;
3538
- }
3539
- if (explicit === "1" || explicit === "true" || explicit === "on") {
3540
- return true;
3541
- }
3542
- const hasOrgId = Boolean(params.orgId?.trim());
3543
- const hasSupabase = Boolean(env.NEXT_PUBLIC_SUPABASE_URL?.trim() && env.SUPABASE_SERVICE_ROLE_KEY?.trim());
3544
- const maxRows = Number(env.SALESPROMPTER_LINKEDIN_ROW_PREPASS_MAX_ROWS ?? 200);
3545
- if (!hasOrgId || !hasSupabase) {
3546
- return false;
3547
- }
3548
- return params.rows.length <= (Number.isFinite(maxRows) && maxRows > 0 ? maxRows : 200);
3549
- }
3550
- function shouldUseDirectPeopleLookup(params) {
3551
- const env = params.env ?? process.env;
3552
- const explicit = env.SALESPROMPTER_LINKEDIN_DIRECT_PROFILE_LOOKUP?.trim().toLowerCase();
3553
- if (explicit === "0" || explicit === "false" || explicit === "off") {
3554
- return false;
3555
- }
3556
- if (explicit === "1" || explicit === "true" || explicit === "on") {
3557
- return true;
3558
- }
3559
- const maxRows = Number(env.SALESPROMPTER_LINKEDIN_DIRECT_PROFILE_MAX_ROWS ?? 50);
3560
- return params.rowCount <= (Number.isFinite(maxRows) && maxRows > 0 ? maxRows : 50);
3561
- }
3562
- function shouldUseWorkflowPeopleLookup(params) {
3563
- const env = params.env ?? process.env;
3564
- const explicit = env.SALESPROMPTER_LINKEDIN_WORKFLOW_PROFILE_LOOKUP?.trim().toLowerCase();
3565
- if (explicit === "0" || explicit === "false" || explicit === "off") {
3566
- return false;
3567
- }
3568
- if (explicit === "1" || explicit === "true" || explicit === "on") {
3569
- return true;
3570
- }
3571
- const hasSerper = Boolean(getSerperApiKey(env));
3572
- const maxRows = Number(env.SALESPROMPTER_LINKEDIN_WORKFLOW_PROFILE_MAX_ROWS ?? (hasSerper ? 75 : 250));
3573
- return params.rowCount <= (Number.isFinite(maxRows) && maxRows > 0 ? maxRows : hasSerper ? 75 : 250);
3574
- }
3575
- function shouldUseBulkProfileResolutionStrategy(params) {
3576
- const env = params.env ?? process.env;
3577
- const explicit = env.SALESPROMPTER_LINKEDIN_BULK_MODE?.trim().toLowerCase();
3578
- if (explicit === "0" || explicit === "false" || explicit === "off") {
3579
- return false;
3580
- }
3581
- if (explicit === "1" || explicit === "true" || explicit === "on") {
3582
- return true;
3583
- }
3584
- const minRows = Number(env.SALESPROMPTER_LINKEDIN_BULK_MODE_MIN_ROWS ?? 75);
3585
- return params.rowCount >= (Number.isFinite(minRows) && minRows > 0 ? minRows : 75);
3586
- }
3587
- function resolveLinkedInBulkStrategyConfig(params) {
3588
- const env = params.env ?? process.env;
3589
- const bulkMode = shouldUseBulkProfileResolutionStrategy({
3590
- rowCount: params.rowCount,
3591
- env
3592
- });
3593
- const serperConcurrencyDefault = bulkMode ? 12 : 6;
3594
- const serperConcurrency = Number(env.SALESPROMPTER_LINKEDIN_SERPER_CONCURRENCY ?? serperConcurrencyDefault);
3595
- const serperMaxQueriesDefault = bulkMode ? 4 : 8;
3596
- const serperMaxQueries = Number(env.SALESPROMPTER_LINKEDIN_SERPER_MAX_QUERIES ?? serperMaxQueriesDefault);
3597
- const workflowStageBudgetDefault = bulkMode ? 8_000 : 15_000;
3598
- const workflowStageBudgetMs = Number(env.SALESPROMPTER_LINKEDIN_WORKFLOW_STAGE_TIMEOUT_MS ?? workflowStageBudgetDefault);
3599
- const serperStageBudgetDefault = bulkMode
3600
- ? Math.max(15_000, Math.min(params.timeoutMs * 2, 45_000))
3601
- : Math.max(10_000, Math.min(params.timeoutMs, 20_000));
3602
- const serperStageBudgetMs = Number(env.SALESPROMPTER_LINKEDIN_SERPER_STAGE_TIMEOUT_MS ?? serperStageBudgetDefault);
3603
- const bulkDirectProfileMaxRowsDefault = 0;
3604
- const bulkDirectProfileMaxRows = Number(env.SALESPROMPTER_LINKEDIN_BULK_DIRECT_PROFILE_MAX_ROWS ?? bulkDirectProfileMaxRowsDefault);
3605
- const bulkDirectProfileTimeoutDefault = bulkMode ? Math.min(params.timeoutMs, 6_000) : 0;
3606
- const bulkDirectProfileTimeoutMs = Number(env.SALESPROMPTER_LINKEDIN_BULK_DIRECT_PROFILE_TIMEOUT_MS ?? bulkDirectProfileTimeoutDefault);
3607
- return {
3608
- bulkMode,
3609
- serperConcurrency: Number.isFinite(serperConcurrency) && serperConcurrency > 0
3610
- ? Math.trunc(serperConcurrency)
3611
- : serperConcurrencyDefault,
3612
- serperMaxQueries: Number.isFinite(serperMaxQueries) && serperMaxQueries > 0
3613
- ? Math.trunc(serperMaxQueries)
3614
- : serperMaxQueriesDefault,
3615
- workflowStageBudgetMs: Number.isFinite(workflowStageBudgetMs) && workflowStageBudgetMs > 0
3616
- ? Math.trunc(workflowStageBudgetMs)
3617
- : workflowStageBudgetDefault,
3618
- serperStageBudgetMs: Number.isFinite(serperStageBudgetMs) && serperStageBudgetMs > 0
3619
- ? Math.trunc(serperStageBudgetMs)
3620
- : serperStageBudgetDefault,
3621
- bulkDirectProfileMaxRows: Number.isFinite(bulkDirectProfileMaxRows) && bulkDirectProfileMaxRows > 0
3622
- ? Math.trunc(bulkDirectProfileMaxRows)
3623
- : 0,
3624
- bulkDirectProfileTimeoutMs: Number.isFinite(bulkDirectProfileTimeoutMs) && bulkDirectProfileTimeoutMs > 0
3625
- ? Math.trunc(bulkDirectProfileTimeoutMs)
3626
- : 0
3627
- };
3628
- }
3629
- function shouldAttemptBulkDirectProfileLookup(params) {
3630
- return (params.strategy.bulkMode &&
3631
- params.strategy.bulkDirectProfileMaxRows > 0 &&
3632
- params.strategy.bulkDirectProfileTimeoutMs > 0 &&
3633
- params.unresolvedRowCount > 0);
3634
- }
3635
- function rankContactsForBulkDirectProfileLookup(params) {
3636
- const scored = params.contacts
3637
- .filter((contact) => !contact.isVariation)
3638
- .map((contact) => {
3639
- const row = params.rowsByContactId.get(contact.contact_id);
3640
- const normalizedName = normalizeLookupWhitespace(`${contact.firstName} ${contact.lastName}`);
3641
- const normalizedEmail = normalizeLookupWhitespace(contact.email);
3642
- const titleKeywords = extractLookupTitleKeywords(contact.jobTitle);
3643
- const roleKeywords = buildDeepDiveRoleSearchKeywords(contact.deepDiveRecommendedRole);
3644
- let score = 0;
3645
- if (row?.linkedinCompanyUrl || contact.linkedinCompanyUrl)
3646
- score += 80;
3647
- if (row?.salesNavCompanyUrl)
3648
- score += 20;
3649
- if (normalizedEmail && !isSyntheticLinkedInLookupEmail(normalizedEmail))
3650
- score += 40;
3651
- if (contact.jobTitle?.trim())
3652
- score += 25;
3653
- if (contact.deepDiveRecommendedRole?.trim())
3654
- score += 15;
3655
- score += Math.min(20, titleKeywords.length * 5);
3656
- score += Math.min(15, roleKeywords.length * 5);
3657
- if (/^contact\s+\d+$/i.test(normalizedName))
3658
- score -= 100;
3659
- if (/^(hr|support|facility|buchhaltung|rechnungen)$/i.test(normalizedName))
3660
- score -= 25;
3661
- return { contact, score };
3662
- })
3663
- .filter((entry) => entry.score > 0)
3664
- .sort((left, right) => right.score - left.score);
3665
- return scored.slice(0, params.limit).map((entry) => entry.contact);
3666
- }
3667
- async function resolveSerperLinkedInProfilesInParallel(params) {
3668
- const results = new Map();
3669
- const contacts = params.contacts;
3670
- const concurrency = Math.max(1, Math.min(params.concurrency ?? 3, contacts.length || 1));
3671
- const deadline = params.overallBudgetMs && Number.isFinite(params.overallBudgetMs) && params.overallBudgetMs > 0
3672
- ? Date.now() + Math.trunc(params.overallBudgetMs)
3673
- : Number.POSITIVE_INFINITY;
3674
- let nextIndex = 0;
3675
- const worker = async () => {
3676
- while (true) {
3677
- if (Date.now() >= deadline) {
3678
- return;
3679
- }
3680
- const index = nextIndex++;
3681
- if (index >= contacts.length) {
3682
- return;
3683
- }
3684
- const contact = contacts[index];
3685
- const remainingBudget = deadline - Date.now();
3686
- if (remainingBudget <= 0) {
3687
- return;
3688
- }
3689
- const linkedinUrl = await searchSerperLinkedInProfileUrl(contact, Math.min(params.timeoutMs, remainingBudget), {
3690
- maxQueries: params.maxQueries
3691
- });
3692
- if (linkedinUrl) {
3693
- results.set(contact.contact_id, linkedinUrl);
3694
- }
3695
- }
3696
- };
3697
- await Promise.all(Array.from({ length: concurrency }, () => worker()));
3698
- return results;
3699
- }
3700
- async function resolveLinkedInCompanyUrlsForContacts(params) {
3701
- const contacts = params.contacts.filter((contact) => !contact.isVariation && !contact.linkedinCompanyUrl);
3702
- const uniqueCompanies = new Map();
3703
- for (const contact of contacts) {
3704
- const key = normalizeLookupCompanyForCleaning(contact.companyNameOriginal ?? contact.companyName);
3705
- if (!key || uniqueCompanies.has(key)) {
3706
- continue;
3707
- }
3708
- uniqueCompanies.set(key, contact.companyNameOriginal ?? contact.companyName);
3709
- }
3710
- const resultsByCompany = new Map();
3711
- const entries = Array.from(uniqueCompanies.entries());
3712
- const concurrency = Math.max(1, Math.min(params.concurrency ?? 4, entries.length || 1));
3713
- const deadline = params.overallBudgetMs && Number.isFinite(params.overallBudgetMs) && params.overallBudgetMs > 0
3714
- ? Date.now() + Math.trunc(params.overallBudgetMs)
3715
- : Number.POSITIVE_INFINITY;
3716
- let nextIndex = 0;
3717
- const worker = async () => {
3718
- while (true) {
3719
- if (Date.now() >= deadline) {
3720
- return;
3721
- }
3722
- const index = nextIndex++;
3723
- if (index >= entries.length) {
3724
- return;
3725
- }
3726
- const [key, companyName] = entries[index];
3727
- const remainingBudget = deadline - Date.now();
3728
- if (remainingBudget <= 0) {
3729
- return;
3730
- }
3731
- const perCompanyTimeout = Math.min(params.timeoutMs, remainingBudget);
3732
- const linkedinUrl = (await searchSerperLinkedInCompanyUrl(companyName, perCompanyTimeout)) ??
3733
- (await searchPublicLinkedInCompanyUrl(companyName, perCompanyTimeout));
3734
- if (linkedinUrl) {
3735
- resultsByCompany.set(key, linkedinUrl);
3736
- }
3737
- }
3738
- };
3739
- await Promise.all(Array.from({ length: concurrency }, () => worker()));
3740
- const results = new Map();
3741
- for (const contact of params.contacts) {
3742
- const key = normalizeLookupCompanyForCleaning(contact.companyNameOriginal ?? contact.companyName);
3743
- const linkedinUrl = resultsByCompany.get(key);
3744
- if (linkedinUrl) {
3745
- results.set(contact.contact_id, linkedinUrl);
3746
- }
2171
+ })();
2172
+ const salesNavCompanyUrl = typeof best?.companyUrl === "string" && /\/sales\/company\//i.test(best.companyUrl)
2173
+ ? best.companyUrl
2174
+ : null;
2175
+ results.push({
2176
+ clientId: row.clientId,
2177
+ fullName: row.fullName,
2178
+ companyName: row.companyName,
2179
+ linkedinUrl,
2180
+ salesNavProfileUrl,
2181
+ linkedinCompanyUrl,
2182
+ salesNavCompanyUrl,
2183
+ found: Boolean(linkedinUrl),
2184
+ companyFound: Boolean(linkedinCompanyUrl),
2185
+ contactId: String(index + 1),
2186
+ source: linkedinUrl ? "salesnav-supabase" : null,
2187
+ companySource: linkedinCompanyUrl ? "salesnav-supabase" : null,
2188
+ matchedFullName: best?.fullName ?? null,
2189
+ matchedCompanyName: best?.companyName ?? null,
2190
+ matchedTitle: best?.title ?? null,
2191
+ matchedOrgId: best?.orgId ?? null,
2192
+ matchedCompanyEmployeeCount: null
2193
+ });
3747
2194
  }
3748
2195
  return results;
3749
2196
  }
@@ -4093,15 +2540,22 @@ function writeWizardSection(title, description) {
4093
2540
  }
4094
2541
  writeWizardLine();
4095
2542
  }
4096
- function isOpaqueOrgId(value) {
4097
- return /^org_[A-Za-z0-9]+$/.test(value);
2543
+ function compactOptionalText(value) {
2544
+ const compacted = value?.trim();
2545
+ return compacted && compacted.length > 0 ? compacted : null;
4098
2546
  }
4099
2547
  function getOrgLabel(session) {
4100
- const label = session.user.orgName ?? session.user.orgSlug ?? session.user.orgId ?? null;
4101
- if (label && isOpaqueOrgId(label)) {
4102
- return null;
2548
+ const orgName = compactOptionalText(session.user.orgName);
2549
+ const orgSlug = compactOptionalText(session.user.orgSlug);
2550
+ const orgId = compactOptionalText(session.user.orgId);
2551
+ if (orgName) {
2552
+ const details = [orgSlug, orgId].filter((value) => Boolean(value));
2553
+ return details.length > 0 ? `${orgName} (${details.join(", ")})` : orgName;
2554
+ }
2555
+ if (orgSlug) {
2556
+ return orgId ? `${orgSlug} (${orgId})` : orgSlug;
4103
2557
  }
4104
- return label;
2558
+ return orgId;
4105
2559
  }
4106
2560
  function resolveSessionOrgId(session) {
4107
2561
  const orgId = session.user.orgId?.trim();
@@ -4324,13 +2778,19 @@ async function promptYesNo(rl, prompt, defaultValue) {
4324
2778
  }
4325
2779
  async function ensureWizardSession(options) {
4326
2780
  if (shouldBypassAuth()) {
4327
- return null;
2781
+ return {
2782
+ session: null,
2783
+ restoredFromCache: false
2784
+ };
4328
2785
  }
4329
2786
  try {
4330
2787
  const session = await requireAuthSession();
4331
2788
  writeSessionSummary(session);
4332
2789
  writeWizardLine();
4333
- return session;
2790
+ return {
2791
+ session,
2792
+ restoredFromCache: true
2793
+ };
4334
2794
  }
4335
2795
  catch (error) {
4336
2796
  const message = error instanceof Error ? error.message : String(error);
@@ -4346,6 +2806,52 @@ async function ensureWizardSession(options) {
4346
2806
  });
4347
2807
  writeSessionSummary(result.session);
4348
2808
  writeWizardLine();
2809
+ return {
2810
+ session: result.session,
2811
+ restoredFromCache: false
2812
+ };
2813
+ }
2814
+ async function confirmWizardWorkspace(rl, session, options) {
2815
+ const orgLabel = getOrgLabel(session);
2816
+ const hasNamedOrg = Boolean(compactOptionalText(session.user.orgName) || compactOptionalText(session.user.orgSlug));
2817
+ const currentLabel = orgLabel
2818
+ ? hasNamedOrg
2819
+ ? `Use ${orgLabel}`
2820
+ : `Use current workspace (${orgLabel})`
2821
+ : "Use signed-in account without a selected workspace";
2822
+ const currentDescription = orgLabel
2823
+ ? hasNamedOrg
2824
+ ? "Current cached CLI workspace"
2825
+ : "Workspace name is not available in this cached token"
2826
+ : "Choose another workspace in the browser if this account belongs to more than one";
2827
+ const workspaceChoice = await promptChoice(rl, "Which workspace should I use?", [
2828
+ {
2829
+ value: "current",
2830
+ label: currentLabel,
2831
+ description: currentDescription,
2832
+ aliases: ["current", "cached", "this workspace", "use current"]
2833
+ },
2834
+ {
2835
+ value: "browser",
2836
+ label: "Choose another workspace in the browser",
2837
+ description: "Opens Salesprompter so you can pick from your organizations",
2838
+ aliases: ["browser", "choose another", "switch workspace", "select organization"]
2839
+ }
2840
+ ], "current");
2841
+ if (workspaceChoice === "current") {
2842
+ writeWizardLine();
2843
+ return session;
2844
+ }
2845
+ writeWizardLine();
2846
+ writeWizardLine("Choose the workspace for this CLI session in the browser.");
2847
+ writeWizardLine();
2848
+ await clearAuthSession();
2849
+ const result = await performLogin({
2850
+ apiUrl: options?.apiUrl ?? session.apiBaseUrl,
2851
+ timeoutSeconds: options?.timeoutSeconds ?? 180
2852
+ });
2853
+ writeSessionSummary(result.session);
2854
+ writeWizardLine();
4349
2855
  return result.session;
4350
2856
  }
4351
2857
  async function resolveLlmAuthReadiness() {
@@ -4411,72 +2917,6 @@ async function fetchWorkspaceLeadSearch(session, requestBody) {
4411
2917
  }
4412
2918
  return WorkspaceLeadSearchResponseSchema.parse(payload).leads;
4413
2919
  }
4414
- async function buildWorkspaceLeadAccount(icp, target, leads) {
4415
- const firstLead = leads[0];
4416
- if (firstLead) {
4417
- const keywords = Array.from(new Set([target.companyDomain?.split(".")[0], firstLead.industry, firstLead.region, ...icp.keywords].filter((value) => typeof value === "string" && value.trim().length > 0)));
4418
- return AccountProfileSchema.parse({
4419
- companyName: target.companyName?.trim() || firstLead.companyName,
4420
- domain: target.companyDomain?.trim().toLowerCase() || firstLead.domain,
4421
- industry: firstLead.industry,
4422
- region: firstLead.region,
4423
- employeeCount: firstLead.employeeCount,
4424
- keywords,
4425
- sources: ["workspace-qualified-leads"]
4426
- });
4427
- }
4428
- return await companyProvider.resolveCompany({
4429
- companyDomain: target.companyDomain,
4430
- companyName: target.companyName
4431
- }, icp);
4432
- }
4433
- async function generateLeadsForCommand(options) {
4434
- const source = z.enum(["auto", "workspace", "fallback"]).parse(options.source ?? "auto");
4435
- if (source === "fallback") {
4436
- return await leadProvider.generateLeads(options.icp, options.count, options.target);
4437
- }
4438
- if (shouldBypassAuth()) {
4439
- if (source === "workspace") {
4440
- throw new Error("workspace lead generation requires authentication. Disable SALESPROMPTER_SKIP_AUTH and log in first.");
4441
- }
4442
- return await leadProvider.generateLeads(options.icp, options.count, options.target);
4443
- }
4444
- try {
4445
- const session = await requireAuthSession();
4446
- const requestBody = options.target.companyDomain || options.target.linkedinCompanyPage
4447
- ? {
4448
- mode: "target-company",
4449
- domain: options.target.companyDomain,
4450
- linkedinCompanyPage: options.target.linkedinCompanyPage,
4451
- limit: options.count
4452
- }
4453
- : {
4454
- mode: "reference-company",
4455
- icp: options.icp,
4456
- limit: options.count
4457
- };
4458
- const leads = await fetchWorkspaceLeadSearch(session, requestBody);
4459
- const account = await buildWorkspaceLeadAccount(options.icp, options.target, leads);
4460
- return {
4461
- provider: "salesprompter-app-workspace-search",
4462
- mode: "real",
4463
- account,
4464
- leads,
4465
- warnings: []
4466
- };
4467
- }
4468
- catch (error) {
4469
- if (source === "workspace") {
4470
- throw error;
4471
- }
4472
- const fallback = await leadProvider.generateLeads(options.icp, options.count, options.target);
4473
- const message = error instanceof Error ? error.message : String(error);
4474
- return {
4475
- ...fallback,
4476
- warnings: [`Workspace lead search unavailable: ${message}`, ...fallback.warnings]
4477
- };
4478
- }
4479
- }
4480
2920
  function buildLinkedInProductsOutputPath(categorySlug) {
4481
2921
  return `./data/linkedin-products-${categorySlug}.json`;
4482
2922
  }
@@ -5250,17 +3690,6 @@ async function fetchLinkedInCompaniesBackfillStatus(session, payload) {
5250
3690
  }), LinkedInCompanyBackfillStatusResponseSchema);
5251
3691
  return value;
5252
3692
  }
5253
- async function syncPhantombusterContainersViaApp(session, payload) {
5254
- const { value } = await fetchCliJson(session, (currentSession) => fetch(`${currentSession.apiBaseUrl}/api/cli/phantombuster/containers/sync`, {
5255
- method: "POST",
5256
- headers: {
5257
- "Content-Type": "application/json",
5258
- Authorization: `Bearer ${currentSession.accessToken}`
5259
- },
5260
- body: JSON.stringify(payload)
5261
- }), PhantombusterContainersSyncResponseSchema);
5262
- return value;
5263
- }
5264
3693
  function serializeSalesNavigatorFiltersForApi(filters) {
5265
3694
  return filters.map((filter) => ({
5266
3695
  type: filter.type,
@@ -5287,12 +3716,6 @@ function buildSalesNavigatorSliceRawPayload(slice, extra = {}) {
5287
3716
  resultRetryCount: slice.resultRetryCount ?? null
5288
3717
  };
5289
3718
  }
5290
- function parseOptionalSalesNavigatorClientId(value) {
5291
- if (value == null || String(value).trim().length === 0) {
5292
- return null;
5293
- }
5294
- return z.coerce.number().int().positive().parse(value);
5295
- }
5296
3719
  function buildSalesNavigatorCrawlReportRawPayload(slice, traceId, extra = {}) {
5297
3720
  return buildSalesNavigatorSliceRawPayload({
5298
3721
  sourceQueryUrl: slice.sourceQueryUrl,
@@ -5753,12 +4176,11 @@ function isSalesNavigatorSessionError(error) {
5753
4176
  return /can't connect profile|sales navigator account|upsell|linkedin session invalid|linkedin_rate_limited|too many requests|rate.?limit|invalid session cookie|disconnected by linkedin|linkedin-disconnected-while-using-api|provide a new linkedin session cookie/i.test(message);
5754
4177
  }
5755
4178
  function isSalesNavigatorResultArtifactError(error) {
5756
- if (error instanceof SalesNavigatorExportRequestError &&
5757
- ["phantombuster_result_invalid", "partial_result_artifact"].includes(error.errorCode ?? "")) {
4179
+ if (error instanceof SalesNavigatorExportRequestError && error.errorCode === "phantombuster_result_invalid") {
5758
4180
  return true;
5759
4181
  }
5760
4182
  const message = error instanceof Error ? error.message : String(error);
5761
- return /page has crashed|no valid sales navigator people rows|partial result artifact|returned \d+ valid sales navigator people rows, but \d+ were expected/i.test(message);
4183
+ return /page has crashed|no valid sales navigator people rows/i.test(message);
5762
4184
  }
5763
4185
  function isSalesNavigatorTransientExportError(error) {
5764
4186
  if (isSalesNavigatorSessionError(error) || isSalesNavigatorResultArtifactError(error)) {
@@ -5849,7 +4271,6 @@ async function runSalesNavigatorCrawlAttempt(session, attempt, options, context)
5849
4271
  crawlSliceId: context?.crawlSliceId,
5850
4272
  rawPayload: buildSalesNavigatorSliceRawPayload(attempt, {
5851
4273
  traceId: context?.traceId ?? null,
5852
- clientId: context?.clientId ?? null,
5853
4274
  phase: shouldProbe ? "probe" : "full_export",
5854
4275
  requestedProfiles: probeProfiles,
5855
4276
  crawlJobId: context?.crawlJobId ?? null,
@@ -5886,7 +4307,6 @@ async function runSalesNavigatorCrawlAttempt(session, attempt, options, context)
5886
4307
  crawlSliceId: context?.crawlSliceId,
5887
4308
  rawPayload: buildSalesNavigatorSliceRawPayload(attempt, {
5888
4309
  traceId: context?.traceId ?? null,
5889
- clientId: context?.clientId ?? null,
5890
4310
  phase: "full_export_after_probe",
5891
4311
  requestedProfiles: attempt.numberOfProfiles,
5892
4312
  crawlJobId: context?.crawlJobId ?? null,
@@ -5985,8 +4405,6 @@ const SALES_NAVIGATOR_SPLIT_TRIGGER_RESULTS = 1500;
5985
4405
  const SALES_NAVIGATOR_FILTER_IMPACT_MIN_OBSERVATIONS = 3;
5986
4406
  let salesNavigatorFilterImpactModel = null;
5987
4407
  let salesNavigatorFilterImpactLoaded = false;
5988
- let linkedInProfileHitCache = null;
5989
- let linkedInProfileHitCacheLoaded = false;
5990
4408
  function getSalesprompterConfigDir() {
5991
4409
  const override = process.env.SALESPROMPTER_CONFIG_DIR?.trim();
5992
4410
  if (override !== undefined && override.length > 0) {
@@ -5997,76 +4415,6 @@ function getSalesprompterConfigDir() {
5997
4415
  function getSalesNavigatorFilterImpactPath() {
5998
4416
  return path.join(getSalesprompterConfigDir(), "salesnav-filter-impact.json");
5999
4417
  }
6000
- function getLinkedInProfileHitCachePath() {
6001
- return path.join(getSalesprompterConfigDir(), "linkedin-profile-hits.json");
6002
- }
6003
- function buildLinkedInProfileHitCacheKeys(params) {
6004
- const keys = new Set();
6005
- const normalizedName = normalizeLooseMatchText(params.fullName);
6006
- const normalizedCompany = normalizeLooseMatchText(params.companyName);
6007
- const normalizedEmail = normalizeLookupWhitespace(params.email);
6008
- const trustedEmail = normalizedEmail && !isSyntheticLinkedInLookupEmail(normalizedEmail) ? normalizedEmail.toLowerCase() : "";
6009
- const contactId = normalizeLinkedInLookupField(params.contactId);
6010
- if (contactId && !/^[1-9]\d?$/.test(contactId)) {
6011
- keys.add(`contact:${contactId}`);
6012
- }
6013
- if (normalizedName && normalizedCompany && trustedEmail) {
6014
- keys.add(`identity:${normalizedName}|${normalizedCompany}|${trustedEmail}`);
6015
- }
6016
- if (normalizedName && normalizedCompany) {
6017
- keys.add(`identity:${normalizedName}|${normalizedCompany}`);
6018
- }
6019
- return Array.from(keys);
6020
- }
6021
- async function loadLinkedInProfileHitCache() {
6022
- if (linkedInProfileHitCacheLoaded) {
6023
- return linkedInProfileHitCache;
6024
- }
6025
- linkedInProfileHitCacheLoaded = true;
6026
- try {
6027
- const content = await readFile(getLinkedInProfileHitCachePath(), "utf8");
6028
- const parsed = JSON.parse(content);
6029
- if (parsed?.version === 1 && parsed.entries && typeof parsed.entries === "object") {
6030
- linkedInProfileHitCache = parsed;
6031
- }
6032
- }
6033
- catch {
6034
- linkedInProfileHitCache = null;
6035
- }
6036
- return linkedInProfileHitCache;
6037
- }
6038
- async function persistLinkedInProfileHitCache() {
6039
- if (!linkedInProfileHitCache) {
6040
- return;
6041
- }
6042
- const filePath = getLinkedInProfileHitCachePath();
6043
- await mkdir(path.dirname(filePath), { recursive: true });
6044
- await writeFile(filePath, `${JSON.stringify(linkedInProfileHitCache, null, 2)}\n`, "utf8");
6045
- }
6046
- function upsertLinkedInProfileHitCacheEntry(params) {
6047
- if (!params.linkedinUrl && !params.salesNavProfileUrl && !params.linkedinCompanyUrl && !params.salesNavCompanyUrl) {
6048
- return;
6049
- }
6050
- if (!linkedInProfileHitCache) {
6051
- linkedInProfileHitCache = {
6052
- version: 1,
6053
- updatedAt: new Date().toISOString(),
6054
- entries: {}
6055
- };
6056
- }
6057
- const updatedAt = new Date().toISOString();
6058
- linkedInProfileHitCache.updatedAt = updatedAt;
6059
- const entry = {
6060
- linkedinUrl: params.linkedinUrl,
6061
- salesNavProfileUrl: params.salesNavProfileUrl,
6062
- linkedinCompanyUrl: params.linkedinCompanyUrl,
6063
- salesNavCompanyUrl: params.salesNavCompanyUrl,
6064
- updatedAt
6065
- };
6066
- for (const key of buildLinkedInProfileHitCacheKeys(params)) {
6067
- linkedInProfileHitCache.entries[key] = entry;
6068
- }
6069
- }
6070
4418
  async function loadSalesNavigatorFilterImpactModel() {
6071
4419
  if (salesNavigatorFilterImpactLoaded) {
6072
4420
  return salesNavigatorFilterImpactModel;
@@ -6339,7 +4687,6 @@ async function processSalesNavigatorClaimedCrawlSlice(session, jobId, slice, opt
6339
4687
  }, {
6340
4688
  crawlJobId: jobId,
6341
4689
  crawlSliceId: slice.id,
6342
- clientId: options.clientId ?? null,
6343
4690
  traceId: options.traceId
6344
4691
  });
6345
4692
  const reported = await reportSalesNavigatorCrawlSlice(currentSession, jobId, {
@@ -6616,7 +4963,6 @@ async function executeSalesNavigatorCrawlJob(session, jobId, options) {
6616
4963
  agentBusyWaitSeconds: options.agentBusyWaitSeconds,
6617
4964
  agentBusyMaxWaits: options.agentBusyMaxWaits,
6618
4965
  claimedSlices: claimedSliceNumber,
6619
- clientId: options.clientId ?? null,
6620
4966
  traceId: options.traceId,
6621
4967
  logger: options.logger
6622
4968
  }).then((value) => ({ slot, value })));
@@ -6889,12 +5235,15 @@ async function runWizard(options) {
6889
5235
  writeWizardLine("Salesprompter");
6890
5236
  writeWizardLine("Start with a company website, LinkedIn product page, or category URL. I will guide you from there.");
6891
5237
  writeWizardLine();
6892
- await ensureWizardSession(options);
6893
5238
  const rl = createInterface({
6894
5239
  input: process.stdin,
6895
5240
  output: process.stdout
6896
5241
  });
6897
5242
  try {
5243
+ const wizardSession = await ensureWizardSession(options);
5244
+ if (wizardSession.session && wizardSession.restoredFromCache) {
5245
+ await confirmWizardWorkspace(rl, wizardSession.session, options);
5246
+ }
6898
5247
  const flow = await promptChoice(rl, "What do you want help with?", [
6899
5248
  {
6900
5249
  value: "product-market",
@@ -7352,7 +5701,6 @@ program
7352
5701
  const companyCleaningMode = resolveCompanyCleaningMode(String(options.companyCleaning ?? process.env.SALESPROMPTER_COMPANY_CLEANING_MODE ?? "basic"));
7353
5702
  const cleanedCompanyMap = await buildCompanyNameCleaningMap(rows, companyCleaningMode);
7354
5703
  const contacts = toLinkedInUrlLookupContacts(rows, cleanedCompanyMap);
7355
- await loadLinkedInProfileHitCache();
7356
5704
  if (options.dryRun) {
7357
5705
  const payload = {
7358
5706
  status: "ok",
@@ -7368,70 +5716,79 @@ program
7368
5716
  printOutput(payload);
7369
5717
  return;
7370
5718
  }
7371
- const orgId = String(options.orgId ?? "").trim() || undefined;
7372
- const strategy = resolveLinkedInBulkStrategyConfig({
7373
- rowCount: rows.length,
7374
- timeoutMs
5719
+ const enrichedRows = await resolveLinkedInUrlsFromSalesNavRows({
5720
+ rows,
5721
+ orgId: String(options.orgId ?? "").trim() || undefined
7375
5722
  });
7376
- const useSalesNavRowPrepass = !strategy.bulkMode &&
7377
- shouldUseSalesNavRowPrepass({
7378
- rows,
7379
- orgId
7380
- });
7381
- const enrichedRows = useSalesNavRowPrepass
7382
- ? await resolveLinkedInUrlsFromSalesNavRows({
7383
- rows,
7384
- orgId
7385
- })
7386
- : rows.map((row, index) => ({
7387
- clientId: row.clientId,
7388
- fullName: row.fullName,
7389
- companyName: row.companyName,
7390
- linkedinUrl: null,
7391
- salesNavProfileUrl: null,
7392
- linkedinCompanyUrl: row.linkedinCompanyUrl?.trim() || null,
7393
- salesNavCompanyUrl: null,
7394
- found: false,
7395
- companyFound: Boolean(row.linkedinCompanyUrl?.trim()),
7396
- contactId: normalizeLinkedInLookupField(row.contactId) ?? `${index + 1}`,
7397
- source: null,
7398
- companySource: row.linkedinCompanyUrl?.trim() ? "input" : null,
7399
- matchedFullName: null,
7400
- matchedCompanyName: null,
7401
- matchedTitle: null,
7402
- matchedOrgId: null,
7403
- matchedCompanyEmployeeCount: null
7404
- }));
7405
- const contactById = new Map(contacts.filter((contact) => !contact.isVariation).map((contact) => [contact.contact_id, contact]));
7406
- for (const row of enrichedRows) {
7407
- if (row.found) {
7408
- continue;
5723
+ let directAttempted = false;
5724
+ let workflowAttempted = false;
5725
+ const missingRows = enrichedRows.filter((row) => !row.found);
5726
+ if (missingRows.length > 0) {
5727
+ const directContacts = contacts.filter((contact) => missingRows.some((row) => row.contactId === contact.contact_id));
5728
+ let linkedInUrlByContactId = new Map();
5729
+ try {
5730
+ directAttempted = true;
5731
+ const result = await invokeLinkedInUrlEnrichmentDirect({
5732
+ contacts: directContacts,
5733
+ timeoutMs
5734
+ });
5735
+ linkedInUrlByContactId = new Map(result.contacts.map((contact) => [
5736
+ contact.contact_id,
5737
+ {
5738
+ linkedinUrl: contact.linkedin_url ?? null,
5739
+ salesNavProfileUrl: contact.sales_nav_profile_url ?? null,
5740
+ linkedinCompanyUrl: null,
5741
+ salesNavCompanyUrl: null
5742
+ }
5743
+ ]));
5744
+ for (const row of enrichedRows) {
5745
+ if (row.found)
5746
+ continue;
5747
+ const profile = linkedInUrlByContactId.get(row.contactId);
5748
+ if (profile?.linkedinUrl) {
5749
+ row.linkedinUrl = profile.linkedinUrl;
5750
+ row.salesNavProfileUrl = profile.salesNavProfileUrl ?? row.salesNavProfileUrl ?? null;
5751
+ row.found = true;
5752
+ row.source = "linkedin-direct";
5753
+ }
5754
+ }
7409
5755
  }
7410
- const contact = contactById.get(row.contactId);
7411
- const cacheKeys = buildLinkedInProfileHitCacheKeys({
7412
- fullName: row.fullName,
7413
- companyName: row.companyName,
7414
- email: contact?.email,
7415
- contactId: row.contactId
7416
- });
7417
- const cachedEntry = cacheKeys
7418
- .map((key) => linkedInProfileHitCache?.entries[key] ?? null)
7419
- .find(Boolean);
7420
- if (!cachedEntry) {
7421
- continue;
5756
+ catch (error) {
5757
+ const message = error instanceof Error ? error.message : String(error);
5758
+ if (!/Missing LinkedIn direct lookup session/i.test(message)) {
5759
+ throw error;
5760
+ }
5761
+ workflowAttempted = true;
5762
+ const workflow = await invokeLinkedInUrlEnrichmentWorkflow({
5763
+ contacts: directContacts,
5764
+ externalUserId: String(options.orgId ?? "").trim() || sessionOrgId || "cli_direct_lookup",
5765
+ timeoutMs
5766
+ });
5767
+ if (!workflow.response.ok) {
5768
+ throw new Error(`LinkedIn enrichment workflow returned ${workflow.response.status}: ${workflow.bodyText.slice(0, 300)}`);
5769
+ }
5770
+ linkedInUrlByContactId = normalizeWorkflowLinkedInUrlResult({
5771
+ parsedBody: workflow.parsedBody,
5772
+ contacts: directContacts
5773
+ });
5774
+ for (const row of enrichedRows) {
5775
+ if (row.found)
5776
+ continue;
5777
+ const profile = linkedInUrlByContactId.get(row.contactId);
5778
+ if (profile?.linkedinUrl) {
5779
+ row.linkedinUrl = profile.linkedinUrl;
5780
+ row.salesNavProfileUrl = profile.salesNavProfileUrl ?? row.salesNavProfileUrl ?? null;
5781
+ row.linkedinCompanyUrl = profile.linkedinCompanyUrl ?? row.linkedinCompanyUrl ?? null;
5782
+ row.salesNavCompanyUrl = profile.salesNavCompanyUrl ?? row.salesNavCompanyUrl ?? null;
5783
+ row.found = true;
5784
+ row.companyFound = Boolean(row.linkedinCompanyUrl || row.salesNavCompanyUrl);
5785
+ row.source = "workflow";
5786
+ row.companySource =
5787
+ row.linkedinCompanyUrl || row.salesNavCompanyUrl ? "workflow" : row.companySource ?? null;
5788
+ }
5789
+ }
7422
5790
  }
7423
- row.linkedinUrl = cachedEntry.linkedinUrl ?? row.linkedinUrl ?? null;
7424
- row.salesNavProfileUrl = cachedEntry.salesNavProfileUrl ?? row.salesNavProfileUrl ?? null;
7425
- row.linkedinCompanyUrl = cachedEntry.linkedinCompanyUrl ?? row.linkedinCompanyUrl ?? null;
7426
- row.salesNavCompanyUrl = cachedEntry.salesNavCompanyUrl ?? row.salesNavCompanyUrl ?? null;
7427
- row.found = Boolean(row.linkedinUrl || row.salesNavProfileUrl);
7428
- row.companyFound = Boolean(row.linkedinCompanyUrl || row.salesNavCompanyUrl);
7429
- row.source = row.found ? "cache" : row.source;
7430
- row.companySource =
7431
- row.companyFound && !row.companySource ? "cache" : row.companySource;
7432
5791
  }
7433
- let directAttempted = false;
7434
- let workflowAttempted = false;
7435
5792
  const parsedClientIds = Array.from(new Set(rows
7436
5793
  .map((row) => Number(row.clientId))
7437
5794
  .filter((value) => Number.isFinite(value) && value > 0)));
@@ -7478,266 +5835,38 @@ program
7478
5835
  writeProgress(`Skipping app-backed company enrichment: ${error instanceof Error ? error.message : String(error)}`);
7479
5836
  }
7480
5837
  }
7481
- const contactsMissingCompanyUrl = contacts.filter((contact) => !contact.isVariation &&
7482
- enrichedRows.some((row) => row.contactId === contact.contact_id && !row.linkedinCompanyUrl));
7483
- if (contactsMissingCompanyUrl.length > 0) {
7484
- const companyUrlByContactId = await resolveLinkedInCompanyUrlsForContacts({
7485
- contacts: contactsMissingCompanyUrl,
7486
- timeoutMs: Math.min(timeoutMs, 15_000),
7487
- concurrency: strategy.bulkMode ? 6 : 3,
7488
- overallBudgetMs: strategy.bulkMode ? 20_000 : 10_000
7489
- });
7490
- for (const row of enrichedRows) {
7491
- if (row.linkedinCompanyUrl) {
7492
- continue;
7493
- }
7494
- const linkedinCompanyUrl = companyUrlByContactId.get(row.contactId);
7495
- if (!linkedinCompanyUrl) {
7496
- continue;
7497
- }
7498
- row.linkedinCompanyUrl = linkedinCompanyUrl;
7499
- row.companyFound = true;
7500
- row.companySource = "web-search";
7501
- }
7502
- }
7503
- const missingRows = enrichedRows.filter((row) => !row.found);
7504
- const useDirectPeopleLookup = !strategy.bulkMode &&
7505
- shouldUseDirectPeopleLookup({
7506
- rowCount: missingRows.length
7507
- });
7508
- const useWorkflowPeopleLookup = !strategy.bulkMode &&
7509
- shouldUseWorkflowPeopleLookup({
7510
- rowCount: missingRows.length
7511
- });
7512
- if (missingRows.length > 0) {
7513
- const rowByContactId = new Map(enrichedRows.map((row) => [row.contactId, row]));
7514
- const directContacts = contacts
7515
- .filter((contact) => missingRows.some((row) => row.contactId === contact.contact_id))
7516
- .map((contact) => {
7517
- const row = rowByContactId.get(contact.contact_id);
7518
- if (!row) {
7519
- return contact;
7520
- }
7521
- return {
7522
- ...contact,
7523
- linkedinCompanyUrl: row.linkedinCompanyUrl ?? contact.linkedinCompanyUrl,
7524
- companyNameOriginal: row.matchedCompanyName ?? contact.companyNameOriginal,
7525
- companyName: row.matchedCompanyName && normalizeLookupCompanyForSearch(row.matchedCompanyName)
7526
- ? normalizeLookupCompanyForSearch(row.matchedCompanyName)
7527
- : contact.companyName
7528
- };
5838
+ try {
5839
+ const companyResult = await invokeLinkedInCompanyEnrichmentDirect({
5840
+ contacts,
5841
+ timeoutMs
7529
5842
  });
7530
- let linkedInUrlByContactId = new Map();
7531
- if (useDirectPeopleLookup) {
7532
- try {
7533
- directAttempted = true;
7534
- const result = await invokeLinkedInUrlEnrichmentDirect({
7535
- contacts: directContacts,
7536
- timeoutMs
7537
- });
7538
- const directCompanyContextByKey = new Map((result.companyContexts ?? []).map((context) => [context.normalizedCompanyKey, context]));
7539
- linkedInUrlByContactId = new Map(result.contacts.map((contact) => [
7540
- contact.contact_id,
7541
- {
7542
- linkedinUrl: contact.linkedin_url ?? null,
7543
- salesNavProfileUrl: contact.sales_nav_profile_url ?? null,
7544
- linkedinCompanyUrl: null,
7545
- salesNavCompanyUrl: null,
7546
- matchedFullName: contact.matched_full_name ?? null,
7547
- matchedCompanyName: contact.matched_company_name ?? null,
7548
- matchedTitle: contact.matched_title ?? null
7549
- }
7550
- ]));
7551
- for (const row of enrichedRows) {
7552
- if (row.found)
7553
- continue;
7554
- const profile = linkedInUrlByContactId.get(row.contactId);
7555
- if (profile?.linkedinUrl) {
7556
- row.linkedinUrl = profile.linkedinUrl;
7557
- row.salesNavProfileUrl = profile.salesNavProfileUrl ?? row.salesNavProfileUrl ?? null;
7558
- row.found = true;
7559
- row.source = "linkedin-direct";
7560
- row.matchedFullName = profile.matchedFullName ?? row.matchedFullName ?? null;
7561
- row.matchedCompanyName = profile.matchedCompanyName ?? row.matchedCompanyName ?? null;
7562
- row.matchedTitle = profile.matchedTitle ?? row.matchedTitle ?? null;
7563
- }
7564
- const directContact = directContacts.find((candidate) => candidate.contact_id === row.contactId && !candidate.isVariation);
7565
- const companyContext = directContact
7566
- ? directCompanyContextByKey.get(buildDirectCompanyContextKey(directContact))
7567
- : null;
7568
- if (companyContext && !row.linkedinCompanyUrl) {
7569
- row.linkedinCompanyUrl = companyContext.linkedinCompanyUrl ?? row.linkedinCompanyUrl ?? null;
7570
- row.salesNavCompanyUrl = companyContext.salesNavCompanyUrl ?? row.salesNavCompanyUrl ?? null;
7571
- row.companyFound = Boolean(row.linkedinCompanyUrl || row.salesNavCompanyUrl);
7572
- row.companySource =
7573
- row.linkedinCompanyUrl || row.salesNavCompanyUrl ? "linkedin-direct" : row.companySource ?? null;
7574
- row.matchedCompanyName = companyContext.matchedCompanyName ?? row.matchedCompanyName ?? null;
7575
- row.matchedCompanyEmployeeCount =
7576
- companyContext.matchedCompanyEmployeeCount ?? row.matchedCompanyEmployeeCount ?? null;
7577
- }
7578
- }
7579
- const contactsStillMissingCompany = contacts.filter((contact) => !contact.isVariation &&
7580
- enrichedRows.some((row) => row.contactId === contact.contact_id && !row.linkedinCompanyUrl && !row.salesNavCompanyUrl));
7581
- if (contactsStillMissingCompany.length > 0) {
7582
- const companyResult = await invokeLinkedInCompanyEnrichmentDirect({
7583
- contacts: contactsStillMissingCompany,
7584
- timeoutMs,
7585
- precomputedContexts: result.companyContexts
7586
- });
7587
- const companyByContactId = new Map(companyResult.contacts.map((contact) => [
7588
- contact.contact_id,
7589
- {
7590
- linkedinCompanyUrl: contact.linkedin_company_url ?? null,
7591
- salesNavCompanyUrl: contact.sales_nav_company_url ?? null,
7592
- matchedCompanyName: contact.matched_company_name ?? null,
7593
- matchedCompanyEmployeeCount: contact.matched_company_employee_count ?? null
7594
- }
7595
- ]));
7596
- for (const row of enrichedRows) {
7597
- const company = companyByContactId.get(row.contactId);
7598
- if (!company || row.linkedinCompanyUrl) {
7599
- continue;
7600
- }
7601
- row.linkedinCompanyUrl = company.linkedinCompanyUrl;
7602
- row.salesNavCompanyUrl = company.salesNavCompanyUrl ?? row.salesNavCompanyUrl ?? null;
7603
- row.companyFound = Boolean(company.linkedinCompanyUrl || company.salesNavCompanyUrl);
7604
- row.companySource =
7605
- company.linkedinCompanyUrl || company.salesNavCompanyUrl ? "linkedin-direct" : row.companySource ?? null;
7606
- row.matchedCompanyName = company.matchedCompanyName ?? row.matchedCompanyName ?? null;
7607
- row.matchedCompanyEmployeeCount =
7608
- company.matchedCompanyEmployeeCount ?? row.matchedCompanyEmployeeCount ?? null;
7609
- }
7610
- }
7611
- }
7612
- catch (error) {
7613
- const message = error instanceof Error ? error.message : String(error);
7614
- if (!/Missing LinkedIn direct lookup session/i.test(message)) {
7615
- throw error;
7616
- }
7617
- }
7618
- }
7619
- const stillMissingAfterDirect = enrichedRows.filter((row) => !row.found);
7620
- const contactsStillMissing = directContacts.filter((contact) => stillMissingAfterDirect.some((row) => row.contactId === contact.contact_id));
7621
- if (contactsStillMissing.length > 0 && useWorkflowPeopleLookup) {
7622
- workflowAttempted = true;
7623
- try {
7624
- const workflow = await invokeLinkedInUrlEnrichmentWorkflow({
7625
- contacts: contactsStillMissing,
7626
- externalUserId: orgId || sessionOrgId || "cli_direct_lookup",
7627
- timeoutMs: Math.min(timeoutMs, strategy.workflowStageBudgetMs)
7628
- });
7629
- if (!workflow.response.ok) {
7630
- throw new Error(`LinkedIn enrichment workflow returned ${workflow.response.status}: ${workflow.bodyText.slice(0, 300)}`);
7631
- }
7632
- linkedInUrlByContactId = normalizeWorkflowLinkedInUrlResult({
7633
- parsedBody: workflow.parsedBody,
7634
- contacts: contactsStillMissing
7635
- });
7636
- for (const row of enrichedRows) {
7637
- if (row.found)
7638
- continue;
7639
- const profile = linkedInUrlByContactId.get(row.contactId);
7640
- if (profile?.linkedinUrl) {
7641
- row.linkedinUrl = profile.linkedinUrl;
7642
- row.salesNavProfileUrl = profile.salesNavProfileUrl ?? row.salesNavProfileUrl ?? null;
7643
- row.linkedinCompanyUrl = profile.linkedinCompanyUrl ?? row.linkedinCompanyUrl ?? null;
7644
- row.salesNavCompanyUrl = profile.salesNavCompanyUrl ?? row.salesNavCompanyUrl ?? null;
7645
- row.found = true;
7646
- row.companyFound = Boolean(row.linkedinCompanyUrl || row.salesNavCompanyUrl);
7647
- row.source = "workflow";
7648
- row.companySource =
7649
- row.linkedinCompanyUrl || row.salesNavCompanyUrl ? "workflow" : row.companySource ?? null;
7650
- }
7651
- }
7652
- }
7653
- catch (error) {
7654
- writeProgress(`Skipping workflow profile enrichment: ${error instanceof Error ? error.message : String(error)}`);
5843
+ const companyByContactId = new Map(companyResult.contacts.map((contact) => [
5844
+ contact.contact_id,
5845
+ {
5846
+ linkedinCompanyUrl: contact.linkedin_company_url ?? null,
5847
+ salesNavCompanyUrl: contact.sales_nav_company_url ?? null,
5848
+ matchedCompanyName: contact.matched_company_name ?? null,
5849
+ matchedCompanyEmployeeCount: contact.matched_company_employee_count ?? null
7655
5850
  }
7656
- }
7657
- const serperContacts = directContacts.filter((contact) => enrichedRows.some((row) => row.contactId === contact.contact_id && !row.found));
7658
- if (strategy.bulkMode && serperContacts.length > 0) {
7659
- writeProgress(`Using bulk profile resolution strategy for ${serperContacts.length} remaining contacts.`);
7660
- }
7661
- const serperResults = await resolveSerperLinkedInProfilesInParallel({
7662
- contacts: serperContacts.filter((contact) => !contact.isVariation),
7663
- timeoutMs,
7664
- concurrency: Math.min(strategy.serperConcurrency, serperContacts.length || 1),
7665
- maxQueries: strategy.serperMaxQueries,
7666
- overallBudgetMs: strategy.serperStageBudgetMs
7667
- });
5851
+ ]));
7668
5852
  for (const row of enrichedRows) {
7669
- if (row.found)
5853
+ const company = companyByContactId.get(row.contactId);
5854
+ if (!company || row.linkedinCompanyUrl) {
7670
5855
  continue;
7671
- const linkedinUrl = serperResults.get(row.contactId);
7672
- if (!linkedinUrl)
7673
- continue;
7674
- row.linkedinUrl = linkedinUrl;
7675
- row.found = true;
7676
- row.source = "web-search";
7677
- }
7678
- const stillMissingAfterSerper = enrichedRows.filter((row) => !row.found);
7679
- if (shouldAttemptBulkDirectProfileLookup({
7680
- strategy,
7681
- unresolvedRowCount: stillMissingAfterSerper.length
7682
- })) {
7683
- const bulkDirectCandidates = rankContactsForBulkDirectProfileLookup({
7684
- contacts: directContacts.filter((contact) => stillMissingAfterSerper.some((row) => row.contactId === contact.contact_id)),
7685
- rowsByContactId: rowByContactId,
7686
- limit: strategy.bulkDirectProfileMaxRows
7687
- });
7688
- if (bulkDirectCandidates.length > 0) {
7689
- writeProgress(`Using bulk direct profile follow-up for ${bulkDirectCandidates.length} high-signal unresolved contacts.`);
7690
- try {
7691
- directAttempted = true;
7692
- const result = await invokeLinkedInUrlEnrichmentDirect({
7693
- contacts: bulkDirectCandidates,
7694
- timeoutMs: strategy.bulkDirectProfileTimeoutMs,
7695
- perAttemptTimeoutMs: Math.min(strategy.bulkDirectProfileTimeoutMs, 2_500),
7696
- perContactBudgetMs: strategy.bulkDirectProfileTimeoutMs
7697
- });
7698
- const directCompanyContextByKey = new Map((result.companyContexts ?? []).map((context) => [context.normalizedCompanyKey, context]));
7699
- const bulkDirectByContactId = new Map(result.contacts.map((contact) => [
7700
- contact.contact_id,
7701
- {
7702
- linkedinUrl: contact.linkedin_url ?? null,
7703
- salesNavProfileUrl: contact.sales_nav_profile_url ?? null
7704
- }
7705
- ]));
7706
- for (const row of enrichedRows) {
7707
- if (row.found)
7708
- continue;
7709
- const profile = bulkDirectByContactId.get(row.contactId);
7710
- if (profile?.linkedinUrl) {
7711
- row.linkedinUrl = profile.linkedinUrl;
7712
- row.salesNavProfileUrl = profile.salesNavProfileUrl ?? row.salesNavProfileUrl ?? null;
7713
- row.found = true;
7714
- row.source = "linkedin-direct";
7715
- }
7716
- const directContact = bulkDirectCandidates.find((candidate) => candidate.contact_id === row.contactId && !candidate.isVariation);
7717
- const companyContext = directContact
7718
- ? directCompanyContextByKey.get(buildDirectCompanyContextKey(directContact))
7719
- : null;
7720
- if (companyContext && !row.linkedinCompanyUrl) {
7721
- row.linkedinCompanyUrl = companyContext.linkedinCompanyUrl ?? row.linkedinCompanyUrl ?? null;
7722
- row.salesNavCompanyUrl = companyContext.salesNavCompanyUrl ?? row.salesNavCompanyUrl ?? null;
7723
- row.companyFound = Boolean(row.linkedinCompanyUrl || row.salesNavCompanyUrl);
7724
- row.companySource =
7725
- row.linkedinCompanyUrl || row.salesNavCompanyUrl ? "linkedin-direct" : row.companySource ?? null;
7726
- row.matchedCompanyName = companyContext.matchedCompanyName ?? row.matchedCompanyName ?? null;
7727
- row.matchedCompanyEmployeeCount =
7728
- companyContext.matchedCompanyEmployeeCount ?? row.matchedCompanyEmployeeCount ?? null;
7729
- }
7730
- }
7731
- }
7732
- catch (error) {
7733
- const message = error instanceof Error ? error.message : String(error);
7734
- if (!/Missing LinkedIn direct lookup session/i.test(message)) {
7735
- writeProgress(`Skipping bulk direct profile follow-up: ${message}`);
7736
- }
7737
- }
7738
5856
  }
5857
+ row.linkedinCompanyUrl = company.linkedinCompanyUrl;
5858
+ row.salesNavCompanyUrl = company.salesNavCompanyUrl ?? row.salesNavCompanyUrl ?? null;
5859
+ row.companyFound = Boolean(company.linkedinCompanyUrl || company.salesNavCompanyUrl);
5860
+ row.companySource =
5861
+ company.linkedinCompanyUrl || company.salesNavCompanyUrl ? "linkedin-direct" : row.companySource ?? null;
5862
+ row.matchedCompanyName = company.matchedCompanyName ?? row.matchedCompanyName ?? null;
5863
+ row.matchedCompanyEmployeeCount =
5864
+ company.matchedCompanyEmployeeCount ?? row.matchedCompanyEmployeeCount ?? null;
7739
5865
  }
7740
5866
  }
5867
+ catch (error) {
5868
+ writeProgress(`Skipping separate company enrichment: ${error instanceof Error ? error.message : String(error)}`);
5869
+ }
7741
5870
  const payload = {
7742
5871
  status: "ok",
7743
5872
  orgId: String(options.orgId ?? "").trim() || null,
@@ -7746,23 +5875,8 @@ program
7746
5875
  companiesFound: enrichedRows.filter((row) => row.companyFound).length,
7747
5876
  directAttempted,
7748
5877
  workflowAttempted,
7749
- bulkMode: strategy.bulkMode,
7750
5878
  rows: enrichedRows
7751
5879
  };
7752
- for (const row of enrichedRows) {
7753
- const contact = contactById.get(row.contactId);
7754
- upsertLinkedInProfileHitCacheEntry({
7755
- fullName: row.fullName,
7756
- companyName: row.companyName,
7757
- email: contact?.email,
7758
- contactId: row.contactId,
7759
- linkedinUrl: row.linkedinUrl ?? null,
7760
- salesNavProfileUrl: row.salesNavProfileUrl ?? null,
7761
- linkedinCompanyUrl: row.linkedinCompanyUrl ?? null,
7762
- salesNavCompanyUrl: row.salesNavCompanyUrl ?? null
7763
- });
7764
- }
7765
- await persistLinkedInProfileHitCache();
7766
5880
  if (options.out) {
7767
5881
  await writeJsonFile(options.out, payload);
7768
5882
  }
@@ -8100,14 +6214,12 @@ program
8100
6214
  });
8101
6215
  program
8102
6216
  .command("leads:generate")
8103
- .description("Generate leads from your Salesprompter workspace when authenticated, or from fallback seeds.")
6217
+ .description("Generate leads for a target account or from fallback seeds.")
8104
6218
  .requiredOption("--icp <path>", "Path to ICP JSON")
8105
6219
  .option("--count <number>", "Number of leads to generate", "10")
8106
6220
  .option("--domain <domain>", "Target a specific company domain like company.com")
8107
6221
  .option("--company-domain <domain>", "Deprecated alias for --domain")
8108
6222
  .option("--company-name <name>", "Optional company name override for a targeted domain")
8109
- .option("--linkedin-company-page <url>", "LinkedIn company page to target when the domain is unknown")
8110
- .option("--source <source>", "auto|workspace|fallback", "auto")
8111
6223
  .requiredOption("--out <path>", "Output file path")
8112
6224
  .action(async (options) => {
8113
6225
  const icp = await readJsonFile(options.icp, IcpSchema);
@@ -8115,15 +6227,9 @@ program
8115
6227
  const domain = options.domain ?? options.companyDomain;
8116
6228
  const target = {
8117
6229
  companyDomain: domain,
8118
- companyName: options.companyName,
8119
- linkedinCompanyPage: options.linkedinCompanyPage
6230
+ companyName: options.companyName
8120
6231
  };
8121
- const result = await generateLeadsForCommand({
8122
- icp,
8123
- count,
8124
- target,
8125
- source: options.source
8126
- });
6232
+ const result = await leadProvider.generateLeads(icp, count, target);
8127
6233
  await writeJsonFile(options.out, result.leads);
8128
6234
  printOutput({
8129
6235
  status: "ok",
@@ -8168,8 +6274,6 @@ program
8168
6274
  .option("--domain <domain>", "Target a specific company domain like company.com")
8169
6275
  .option("--company-domain <domain>", "Deprecated alias for --domain")
8170
6276
  .option("--company-name <name>", "Optional company name override for a targeted domain")
8171
- .option("--linkedin-company-page <url>", "LinkedIn company page to target when the domain is unknown")
8172
- .option("--source <source>", "auto|workspace|fallback", "auto")
8173
6277
  .option("--out-prefix <path>", "Output path prefix (writes <prefix>-leads.json, <prefix>-enriched.json, <prefix>-scored.json)", "./data/leads-pipeline")
8174
6278
  .action(async (options) => {
8175
6279
  const icp = await readJsonFile(options.icp, IcpSchema);
@@ -8177,19 +6281,13 @@ program
8177
6281
  const domain = options.domain ?? options.companyDomain;
8178
6282
  const target = {
8179
6283
  companyDomain: domain,
8180
- companyName: options.companyName,
8181
- linkedinCompanyPage: options.linkedinCompanyPage
6284
+ companyName: options.companyName
8182
6285
  };
8183
6286
  const outPrefix = String(options.outPrefix);
8184
6287
  const leadsOut = `${outPrefix}-leads.json`;
8185
6288
  const enrichedOut = `${outPrefix}-enriched.json`;
8186
6289
  const scoredOut = `${outPrefix}-scored.json`;
8187
- const generated = await generateLeadsForCommand({
8188
- icp,
8189
- count,
8190
- target,
8191
- source: options.source
8192
- });
6290
+ const generated = await leadProvider.generateLeads(icp, count, target);
8193
6291
  await writeJsonFile(leadsOut, generated.leads);
8194
6292
  const enriched = await enrichmentProvider.enrichLeads(generated.leads);
8195
6293
  await writeJsonFile(enrichedOut, enriched);
@@ -8927,7 +7025,6 @@ program
8927
7025
  .option("--max-results-per-search <number>", "Maximum results allowed for a sliced search", "2500")
8928
7026
  .option("--number-of-profiles <number>", "Profiles to export per sliced query", "2500")
8929
7027
  .option("--slice-preset <name>", "Slice preset label stored with the export runs", "human-resources-crawl")
8930
- .option("--client-id <number>", "Client id used to generate and store the legacy Neon lead list projection")
8931
7028
  .option("--max-split-depth <number>", "Maximum number of adaptive split dimensions to use", "6")
8932
7029
  .option("--max-slices <number>", "Safety cap for total claimed slices in this invocation", "1000")
8933
7030
  .option("--max-retries <number>", "Retries for non-splitting export failures", "3")
@@ -8946,7 +7043,6 @@ program
8946
7043
  const jobId = z.string().uuid().optional().parse(options.jobId);
8947
7044
  const maxResultsPerSearch = z.coerce.number().int().min(1).max(2500).parse(options.maxResultsPerSearch);
8948
7045
  const numberOfProfiles = z.coerce.number().int().min(1).max(2500).parse(options.numberOfProfiles);
8949
- const clientId = parseOptionalSalesNavigatorClientId(options.clientId);
8950
7046
  const maxSplitDepth = z.coerce.number().int().min(1).max(6).parse(options.maxSplitDepth);
8951
7047
  const maxSlices = z.coerce.number().int().min(1).max(10000).parse(options.maxSlices);
8952
7048
  const maxRetries = z.coerce.number().int().min(0).max(5).parse(options.maxRetries);
@@ -8966,7 +7062,6 @@ program
8966
7062
  jobId: jobId ?? null,
8967
7063
  maxResultsPerSearch,
8968
7064
  numberOfProfiles,
8969
- clientId,
8970
7065
  slicePreset: options.slicePreset,
8971
7066
  maxSplitDepth,
8972
7067
  maxSlices,
@@ -9067,7 +7162,6 @@ program
9067
7162
  traceId: logger.traceId,
9068
7163
  command: {
9069
7164
  sourceQueryUrl: queryUrl,
9070
- clientId,
9071
7165
  slicePreset: options.slicePreset,
9072
7166
  maxResultsPerSearch,
9073
7167
  numberOfProfiles,
@@ -9089,7 +7183,6 @@ program
9089
7183
  splitTrail: seed.splitTrail,
9090
7184
  rawPayload: {
9091
7185
  workflow: "salesnav:crawl",
9092
- clientId,
9093
7186
  traceId: logger.traceId
9094
7187
  }
9095
7188
  }
@@ -9129,7 +7222,6 @@ program
9129
7222
  idlePollSeconds,
9130
7223
  idleMaxPolls,
9131
7224
  parallelExports,
9132
- clientId,
9133
7225
  traceId: logger.traceId,
9134
7226
  logger
9135
7227
  });
@@ -9210,43 +7302,6 @@ program
9210
7302
  recentEvents
9211
7303
  });
9212
7304
  });
9213
- program
9214
- .command("phantombuster:containers:sync")
9215
- .alias("pb:containers:sync")
9216
- .description("Fetch Phantombuster containers for configured agents and store them in Neon.")
9217
- .option("--agent-id <id>", "Phantombuster agent id to sync. Repeat to sync multiple agents.", collectStringOptionValue, [])
9218
- .option("--limit <number>", "Maximum containers to fetch per Phantombuster page", "100")
9219
- .option("--max-pages <number>", "Maximum Phantombuster pages to fetch per agent", "50")
9220
- .option("--mode <mode>", "Phantombuster container mode: all or finalized", "all")
9221
- .option("--before-ended-at <iso>", "Only fetch containers that ended before this ISO timestamp")
9222
- .option("--metadata-only", "Store container metadata without fetching output and result objects", false)
9223
- .option("--out <path>", "Optional local JSON output path")
9224
- .action(async (options) => {
9225
- const agentIds = z.array(z.string().min(1)).parse(options.agentId);
9226
- const limit = z.coerce.number().int().min(1).max(500).parse(options.limit);
9227
- const maxPages = z.coerce.number().int().min(1).max(500).parse(options.maxPages);
9228
- const mode = z.enum(["all", "finalized"]).parse(options.mode);
9229
- const beforeEndedAt = options.beforeEndedAt
9230
- ? z.string().datetime().parse(options.beforeEndedAt)
9231
- : undefined;
9232
- const session = await requireAuthSession();
9233
- const result = await syncPhantombusterContainersViaApp(session, {
9234
- agentIds: agentIds.length > 0 ? agentIds : undefined,
9235
- limit,
9236
- maxPages,
9237
- mode,
9238
- beforeEndedAt,
9239
- includeResults: !options.metadataOnly
9240
- });
9241
- const payload = {
9242
- ...result,
9243
- dryRun: false
9244
- };
9245
- if (options.out) {
9246
- await writeJsonFile(options.out, payload);
9247
- }
9248
- printOutput(payload);
9249
- });
9250
7305
  program
9251
7306
  .command("salesnav:export")
9252
7307
  .alias("search:export")
@@ -9255,18 +7310,12 @@ program
9255
7310
  .option("--max-results-per-search <number>", "Maximum results allowed for a sliced search", "2500")
9256
7311
  .option("--number-of-profiles <number>", "Profiles to export per sliced query", "2500")
9257
7312
  .option("--slice-preset <name>", "Slice preset label stored with the export run", "human-resources-default")
9258
- .option("--client-id <number>", "Client id used to generate and store the legacy Neon lead list projection")
9259
- .option("--agent-busy-wait-seconds <number>", "Seconds to wait before retrying when the export agent is already busy", "30")
9260
- .option("--agent-busy-max-waits <number>", "How many busy-agent waits to tolerate before failing the export", "20")
9261
7313
  .option("--out <path>", "Optional local JSON output path")
9262
7314
  .option("--dry-run", "Only generate sliced query URLs without exporting them", false)
9263
7315
  .action(async (options) => {
9264
7316
  const queryUrls = z.array(z.string().url()).min(1).parse(options.queryUrl);
9265
7317
  const maxResultsPerSearch = z.coerce.number().int().min(1).max(2500).parse(options.maxResultsPerSearch);
9266
7318
  const numberOfProfiles = z.coerce.number().int().min(1).max(2500).parse(options.numberOfProfiles);
9267
- const agentBusyWaitSeconds = z.coerce.number().int().min(1).max(300).parse(options.agentBusyWaitSeconds);
9268
- const agentBusyMaxWaits = z.coerce.number().int().min(0).max(100).parse(options.agentBusyMaxWaits);
9269
- const clientId = parseOptionalSalesNavigatorClientId(options.clientId);
9270
7319
  const prepared = queryUrls.map((queryUrl) => buildSalesNavigatorPeopleSlice(queryUrl));
9271
7320
  const effectiveDryRun = Boolean(options.dryRun || shouldBypassAuth());
9272
7321
  if (effectiveDryRun) {
@@ -9288,10 +7337,10 @@ program
9288
7337
  printOutput(payload);
9289
7338
  return;
9290
7339
  }
9291
- let session = await requireAuthSession();
7340
+ const session = await requireAuthSession();
9292
7341
  const exported = [];
9293
7342
  for (const item of prepared) {
9294
- const result = await runSalesNavigatorExportWithAgentWait(session, {
7343
+ const result = await runSalesNavigatorExport(session, {
9295
7344
  sourceQueryUrl: item.sourceQueryUrl,
9296
7345
  slicedQueryUrl: item.slicedQueryUrl,
9297
7346
  appliedFilters: item.appliedFilters,
@@ -9300,17 +7349,12 @@ program
9300
7349
  slicePreset: options.slicePreset,
9301
7350
  rawPayload: {
9302
7351
  workflow: "salesnav:export",
9303
- clientId,
9304
7352
  sourceQueryUrl: item.sourceQueryUrl,
9305
7353
  slicedQueryUrl: item.slicedQueryUrl,
9306
7354
  appliedFilters: item.appliedFilters
9307
7355
  }
9308
- }, {
9309
- waitSeconds: agentBusyWaitSeconds,
9310
- maxWaits: agentBusyMaxWaits
9311
7356
  });
9312
7357
  exported.push(result);
9313
- session = await requireAuthSession();
9314
7358
  }
9315
7359
  const payload = {
9316
7360
  status: "ok",