salesprompter-cli 0.1.35 → 0.1.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/dist/cli.js +2321 -208
- package/dist/deel-outreach.js +16 -1
- package/dist/direct-path.js +16 -1
- package/package.json +2 -1
package/dist/cli.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { spawn } from "node:child_process";
|
|
3
|
-
import { access, appendFile, mkdir, readFile, writeFile } from "node:fs/promises";
|
|
3
|
+
import { access, appendFile, mkdir, readFile, readdir, stat, writeFile } from "node:fs/promises";
|
|
4
4
|
import { createRequire } from "node:module";
|
|
5
5
|
import os from "node:os";
|
|
6
6
|
import path from "node:path";
|
|
@@ -33,7 +33,9 @@ import { buildSalesNavigatorHistoricalBackfillPlan, ensureSalesNavigatorPeopleCo
|
|
|
33
33
|
const require = createRequire(import.meta.url);
|
|
34
34
|
const { version: packageVersion } = require("../package.json");
|
|
35
35
|
const program = new Command();
|
|
36
|
-
const
|
|
36
|
+
const companyProvider = new HeuristicCompanyProvider();
|
|
37
|
+
const peopleSearchProvider = new HeuristicPeopleSearchProvider();
|
|
38
|
+
const leadProvider = new AccountLeadProvider(companyProvider, peopleSearchProvider);
|
|
37
39
|
const enrichmentProvider = new HeuristicEnrichmentProvider();
|
|
38
40
|
const scoringProvider = new HeuristicScoringProvider();
|
|
39
41
|
const syncProvider = new RoutedSyncProvider(new DryRunSyncProvider(), new InstantlySyncProvider());
|
|
@@ -120,6 +122,28 @@ const LinkedInCompanyBackfillStatusResponseSchema = z.object({
|
|
|
120
122
|
failureCode: z.string().nullable().optional(),
|
|
121
123
|
failureMessage: z.string().nullable().optional()
|
|
122
124
|
});
|
|
125
|
+
const PhantombusterContainersSyncResponseSchema = z.object({
|
|
126
|
+
status: z.literal("ok"),
|
|
127
|
+
agentIds: z.array(z.string().min(1)),
|
|
128
|
+
agents: z.array(z.object({
|
|
129
|
+
agentId: z.string().min(1),
|
|
130
|
+
fetched: z.number().int().nonnegative(),
|
|
131
|
+
upserted: z.number().int().nonnegative(),
|
|
132
|
+
resultsSynced: z.number().int().nonnegative()
|
|
133
|
+
})),
|
|
134
|
+
fetched: z.number().int().nonnegative(),
|
|
135
|
+
upserted: z.number().int().nonnegative(),
|
|
136
|
+
resultsSynced: z.number().int().nonnegative(),
|
|
137
|
+
outputsStored: z.number().int().nonnegative(),
|
|
138
|
+
resultObjectsStored: z.number().int().nonnegative(),
|
|
139
|
+
resultRowsStored: z.number().int().nonnegative(),
|
|
140
|
+
leadListsProjected: z.number().int().nonnegative().optional(),
|
|
141
|
+
leadListContactsProjected: z.number().int().nonnegative().optional(),
|
|
142
|
+
contactsProjected: z.number().int().nonnegative().optional(),
|
|
143
|
+
leadPoolRows: z.number().int().nonnegative().nullable().optional(),
|
|
144
|
+
qualifiedContacts: z.number().int().nonnegative().nullable().optional(),
|
|
145
|
+
qualifiedCompanies: z.number().int().nonnegative().nullable().optional()
|
|
146
|
+
});
|
|
123
147
|
const CliEmailEnrichmentCompaniesResponseSchema = z.object({
|
|
124
148
|
clientId: z.number().int().positive(),
|
|
125
149
|
companies: z.array(z.object({
|
|
@@ -948,6 +972,13 @@ function splitLookupFullName(fullName) {
|
|
|
948
972
|
function buildSyntheticLookupEmail(contactId) {
|
|
949
973
|
return `linkedin-lookup+${contactId}@salesprompter.invalid`;
|
|
950
974
|
}
|
|
975
|
+
function normalizeLinkedInLookupField(value) {
|
|
976
|
+
if (value == null) {
|
|
977
|
+
return undefined;
|
|
978
|
+
}
|
|
979
|
+
const normalized = normalizeLookupWhitespace(String(value));
|
|
980
|
+
return normalized || undefined;
|
|
981
|
+
}
|
|
951
982
|
function looksLikeLookupCompanyRow(fullName, companyName) {
|
|
952
983
|
const fullNameComparable = normalizeLooseMatchText(fullName);
|
|
953
984
|
const companyComparable = normalizeLooseMatchText(companyName);
|
|
@@ -967,19 +998,32 @@ function parseLinkedInUrlLookupInput(content) {
|
|
|
967
998
|
const parsed = z
|
|
968
999
|
.array(z.object({
|
|
969
1000
|
clientId: z.union([z.string(), z.number()]).nullish(),
|
|
1001
|
+
contactId: z.union([z.string(), z.number()]).nullish(),
|
|
1002
|
+
companyId: z.union([z.string(), z.number()]).nullish(),
|
|
970
1003
|
fullName: z.string().nullish(),
|
|
971
1004
|
companyName: z.string().nullish(),
|
|
972
1005
|
email: z.string().nullish(),
|
|
973
|
-
|
|
1006
|
+
contact_email: z.string().nullish(),
|
|
1007
|
+
jobTitle: z.string().nullish(),
|
|
1008
|
+
jobtitle: z.string().nullish(),
|
|
1009
|
+
title: z.string().nullish(),
|
|
1010
|
+
linkedin_company_url: z.string().nullish(),
|
|
1011
|
+
linkedinCompanyUrl: z.string().nullish(),
|
|
1012
|
+
deep_dive_recommended_role: z.string().nullish(),
|
|
1013
|
+
deepDiveRecommendedRole: z.string().nullish()
|
|
974
1014
|
}))
|
|
975
1015
|
.parse(JSON.parse(trimmed));
|
|
976
1016
|
return parsed
|
|
977
1017
|
.map((row) => ({
|
|
978
1018
|
clientId: row.clientId == null ? null : String(row.clientId).trim() || null,
|
|
1019
|
+
contactId: row.contactId == null ? undefined : String(row.contactId).trim() || undefined,
|
|
1020
|
+
companyId: row.companyId == null ? undefined : String(row.companyId).trim() || undefined,
|
|
979
1021
|
fullName: row.fullName?.trim() ?? "",
|
|
980
1022
|
companyName: row.companyName?.trim() ?? "",
|
|
981
|
-
email: row.email?.trim() || undefined,
|
|
982
|
-
jobTitle: row.jobTitle?.trim() || undefined
|
|
1023
|
+
email: row.email?.trim() || row.contact_email?.trim() || undefined,
|
|
1024
|
+
jobTitle: row.jobTitle?.trim() || row.jobtitle?.trim() || row.title?.trim() || undefined,
|
|
1025
|
+
linkedinCompanyUrl: row.linkedin_company_url?.trim() || row.linkedinCompanyUrl?.trim() || undefined,
|
|
1026
|
+
deepDiveRecommendedRole: row.deep_dive_recommended_role?.trim() || row.deepDiveRecommendedRole?.trim() || undefined
|
|
983
1027
|
}))
|
|
984
1028
|
.filter((row) => row.fullName.length > 0 || row.companyName.length > 0);
|
|
985
1029
|
}
|
|
@@ -1007,17 +1051,35 @@ function parseLinkedInUrlLookupInput(content) {
|
|
|
1007
1051
|
? headerValues.findIndex((value) => ["companyname", "company_name"].includes(value))
|
|
1008
1052
|
: 2;
|
|
1009
1053
|
const emailIndex = hasHeader ? headerValues.findIndex((value) => value === "email") : -1;
|
|
1054
|
+
const contactEmailIndex = hasHeader ? headerValues.findIndex((value) => value === "contact_email") : -1;
|
|
1010
1055
|
const jobTitleIndex = hasHeader
|
|
1011
1056
|
? headerValues.findIndex((value) => ["jobtitle", "job_title", "title"].includes(value))
|
|
1012
1057
|
: -1;
|
|
1058
|
+
const contactIdIndex = hasHeader
|
|
1059
|
+
? headerValues.findIndex((value) => ["contactid", "contact_id", "hubspot_contact_id"].includes(value))
|
|
1060
|
+
: -1;
|
|
1061
|
+
const companyIdIndex = hasHeader
|
|
1062
|
+
? headerValues.findIndex((value) => ["companyid", "company_id", "hubspot_company_id"].includes(value))
|
|
1063
|
+
: -1;
|
|
1064
|
+
const linkedinCompanyUrlIndex = hasHeader
|
|
1065
|
+
? headerValues.findIndex((value) => ["linkedin_company_url", "linkedincompanyurl"].includes(value))
|
|
1066
|
+
: -1;
|
|
1067
|
+
const deepDiveRecommendedRoleIndex = hasHeader
|
|
1068
|
+
? headerValues.findIndex((value) => ["deep_dive_recommended_role", "deepdiverecommendedrole"].includes(value))
|
|
1069
|
+
: -1;
|
|
1013
1070
|
return dataLines
|
|
1014
1071
|
.map((line) => splitLooseDelimitedLine(line, delimiter).map((value) => value.trim()))
|
|
1015
1072
|
.map((columns) => ({
|
|
1016
1073
|
clientId: clientIdIndex >= 0 ? columns[clientIdIndex] || null : null,
|
|
1074
|
+
contactId: contactIdIndex >= 0 ? columns[contactIdIndex] || undefined : undefined,
|
|
1075
|
+
companyId: companyIdIndex >= 0 ? columns[companyIdIndex] || undefined : undefined,
|
|
1017
1076
|
fullName: fullNameIndex >= 0 ? columns[fullNameIndex] || "" : "",
|
|
1018
1077
|
companyName: companyNameIndex >= 0 ? columns[companyNameIndex] || "" : "",
|
|
1019
|
-
email: emailIndex >= 0 ? columns[emailIndex] || undefined : undefined
|
|
1020
|
-
|
|
1078
|
+
email: (emailIndex >= 0 ? columns[emailIndex] || undefined : undefined) ??
|
|
1079
|
+
(contactEmailIndex >= 0 ? columns[contactEmailIndex] || undefined : undefined),
|
|
1080
|
+
jobTitle: jobTitleIndex >= 0 ? columns[jobTitleIndex] || undefined : undefined,
|
|
1081
|
+
linkedinCompanyUrl: linkedinCompanyUrlIndex >= 0 ? columns[linkedinCompanyUrlIndex] || undefined : undefined,
|
|
1082
|
+
deepDiveRecommendedRole: deepDiveRecommendedRoleIndex >= 0 ? columns[deepDiveRecommendedRoleIndex] || undefined : undefined
|
|
1021
1083
|
}))
|
|
1022
1084
|
.filter((row) => row.fullName.length > 0 || row.companyName.length > 0);
|
|
1023
1085
|
}
|
|
@@ -1072,7 +1134,7 @@ function parseLinkedInCompanyLookupInput(content) {
|
|
|
1072
1134
|
}
|
|
1073
1135
|
function toLinkedInUrlLookupContacts(rows, cleanedCompanyMap = new Map()) {
|
|
1074
1136
|
return rows.flatMap((row, index) => {
|
|
1075
|
-
const contactId = String(index + 1);
|
|
1137
|
+
const contactId = normalizeLinkedInLookupField(row.contactId) ?? String(index + 1);
|
|
1076
1138
|
const syntheticEmail = row.email?.trim() || buildSyntheticLookupEmail(contactId);
|
|
1077
1139
|
const rawCompanyName = normalizeLookupWhitespace(row.companyName);
|
|
1078
1140
|
const cleanedCompanyName = normalizeLookupCompanyForSearch(cleanedCompanyMap.get(normalizeLookupCompanyForCleaning(rawCompanyName)) ?? rawCompanyName);
|
|
@@ -1086,7 +1148,10 @@ function toLinkedInUrlLookupContacts(rows, cleanedCompanyMap = new Map()) {
|
|
|
1086
1148
|
companyName: cleanedCompanyName,
|
|
1087
1149
|
companyNameOriginal: rawCompanyName || undefined,
|
|
1088
1150
|
email: syntheticEmail,
|
|
1089
|
-
jobTitle: row.jobTitle
|
|
1151
|
+
jobTitle: row.jobTitle,
|
|
1152
|
+
companyId: normalizeLinkedInLookupField(row.companyId),
|
|
1153
|
+
linkedinCompanyUrl: row.linkedinCompanyUrl?.trim() || undefined,
|
|
1154
|
+
deepDiveRecommendedRole: row.deepDiveRecommendedRole?.trim() || undefined
|
|
1090
1155
|
}
|
|
1091
1156
|
];
|
|
1092
1157
|
}
|
|
@@ -1101,7 +1166,10 @@ function toLinkedInUrlLookupContacts(rows, cleanedCompanyMap = new Map()) {
|
|
|
1101
1166
|
companyName: cleanedCompanyName,
|
|
1102
1167
|
companyNameOriginal: rawCompanyName || undefined,
|
|
1103
1168
|
email: syntheticEmail,
|
|
1104
|
-
jobTitle: row.jobTitle
|
|
1169
|
+
jobTitle: row.jobTitle,
|
|
1170
|
+
companyId: normalizeLinkedInLookupField(row.companyId),
|
|
1171
|
+
linkedinCompanyUrl: row.linkedinCompanyUrl?.trim() || undefined,
|
|
1172
|
+
deepDiveRecommendedRole: row.deepDiveRecommendedRole?.trim() || undefined
|
|
1105
1173
|
}
|
|
1106
1174
|
];
|
|
1107
1175
|
const rawDiffers = rawSplit.firstName !== cleanedSplit.firstName ||
|
|
@@ -1115,6 +1183,9 @@ function toLinkedInUrlLookupContacts(rows, cleanedCompanyMap = new Map()) {
|
|
|
1115
1183
|
companyNameOriginal: rawCompanyName || undefined,
|
|
1116
1184
|
email: syntheticEmail,
|
|
1117
1185
|
jobTitle: row.jobTitle,
|
|
1186
|
+
companyId: normalizeLinkedInLookupField(row.companyId),
|
|
1187
|
+
linkedinCompanyUrl: row.linkedinCompanyUrl?.trim() || undefined,
|
|
1188
|
+
deepDiveRecommendedRole: row.deepDiveRecommendedRole?.trim() || undefined,
|
|
1118
1189
|
isVariation: true
|
|
1119
1190
|
});
|
|
1120
1191
|
}
|
|
@@ -1137,10 +1208,132 @@ function readPipedreamLinkedInEnrichmentConfig() {
|
|
|
1137
1208
|
projectEnvironment: resolveConfiguredEnvValue(process.env, "PIPEDREAM_PROJECT_ENVIRONMENT") || ""
|
|
1138
1209
|
};
|
|
1139
1210
|
}
|
|
1211
|
+
function isSyntheticLinkedInLookupEmail(value) {
|
|
1212
|
+
const normalized = normalizeLookupWhitespace(value).toLowerCase();
|
|
1213
|
+
return normalized.endsWith("@salesprompter.invalid");
|
|
1214
|
+
}
|
|
1140
1215
|
function deriveCsrfTokenFromCookie(cookie) {
|
|
1141
1216
|
const match = cookie.match(/JSESSIONID="?([^";]+)"?/i);
|
|
1142
1217
|
return match?.[1]?.trim() || "";
|
|
1143
1218
|
}
|
|
1219
|
+
function normalizeLinkedInDirectLookupCookieHeader(cookie) {
|
|
1220
|
+
const trimmed = normalizeLookupWhitespace(cookie);
|
|
1221
|
+
if (!trimmed) {
|
|
1222
|
+
return "";
|
|
1223
|
+
}
|
|
1224
|
+
if (trimmed.includes("=") || trimmed.includes(";")) {
|
|
1225
|
+
return trimmed;
|
|
1226
|
+
}
|
|
1227
|
+
return `li_at=${trimmed}`;
|
|
1228
|
+
}
|
|
1229
|
+
function parseLocalLinkedInExtensionTokenLog(content) {
|
|
1230
|
+
const matches = [
|
|
1231
|
+
...content.matchAll(/\{"csrfToken":"([^"]+)","extractedFrom":"sales-api\/salesApiLeadSearch"[\s\S]*?"linkedInIdentity":"([^"]+)"[\s\S]*?"sessionCookie":"([\s\S]*?)","syncStatus":"(success|captured)"[\s\S]*?"userAgent":"([^"]+)"\}/g)
|
|
1232
|
+
];
|
|
1233
|
+
const last = matches.at(-1);
|
|
1234
|
+
if (!last) {
|
|
1235
|
+
return null;
|
|
1236
|
+
}
|
|
1237
|
+
const csrfToken = normalizeLookupWhitespace(last[1]);
|
|
1238
|
+
const linkedInIdentity = normalizeLookupWhitespace(last[2]);
|
|
1239
|
+
const sessionCookie = normalizeLookupWhitespace(last[3]?.replace(/\\"/g, "\"").replace(/\\\\/g, "\\"));
|
|
1240
|
+
const userAgent = normalizeLookupWhitespace(last[5]);
|
|
1241
|
+
if (!csrfToken || !linkedInIdentity || !sessionCookie || !userAgent) {
|
|
1242
|
+
return null;
|
|
1243
|
+
}
|
|
1244
|
+
return {
|
|
1245
|
+
csrfToken,
|
|
1246
|
+
linkedInIdentity,
|
|
1247
|
+
sessionCookie,
|
|
1248
|
+
userAgent
|
|
1249
|
+
};
|
|
1250
|
+
}
|
|
1251
|
+
async function readLocalLinkedInExtensionTokenLog(filePath) {
|
|
1252
|
+
try {
|
|
1253
|
+
const content = await readFile(filePath, "latin1");
|
|
1254
|
+
return parseLocalLinkedInExtensionTokenLog(content);
|
|
1255
|
+
}
|
|
1256
|
+
catch {
|
|
1257
|
+
return null;
|
|
1258
|
+
}
|
|
1259
|
+
}
|
|
1260
|
+
async function listChromeExtensionTokenLogCandidates() {
|
|
1261
|
+
const overrideFile = normalizeLookupWhitespace(process.env.SALESPROMPTER_LINKEDIN_EXTENSION_TOKENS_LOG_PATH);
|
|
1262
|
+
if (overrideFile) {
|
|
1263
|
+
return [overrideFile];
|
|
1264
|
+
}
|
|
1265
|
+
const overrideDir = normalizeLookupWhitespace(process.env.SALESPROMPTER_LINKEDIN_EXTENSION_TOKENS_DIR);
|
|
1266
|
+
if (overrideDir) {
|
|
1267
|
+
try {
|
|
1268
|
+
const files = await readdir(overrideDir);
|
|
1269
|
+
return files
|
|
1270
|
+
.filter((file) => file.endsWith(".log") || file.endsWith(".ldb"))
|
|
1271
|
+
.map((file) => path.join(overrideDir, file))
|
|
1272
|
+
.sort()
|
|
1273
|
+
.reverse();
|
|
1274
|
+
}
|
|
1275
|
+
catch {
|
|
1276
|
+
return [];
|
|
1277
|
+
}
|
|
1278
|
+
}
|
|
1279
|
+
const chromeRootCandidates = [
|
|
1280
|
+
path.join(os.homedir(), "Library", "Application Support", "Google", "Chrome"),
|
|
1281
|
+
path.join(os.homedir(), "Library", "Application Support", "Chromium")
|
|
1282
|
+
];
|
|
1283
|
+
const paths = [];
|
|
1284
|
+
for (const chromeRoot of chromeRootCandidates) {
|
|
1285
|
+
let profileDirs = [];
|
|
1286
|
+
try {
|
|
1287
|
+
profileDirs = await readdir(chromeRoot);
|
|
1288
|
+
}
|
|
1289
|
+
catch {
|
|
1290
|
+
continue;
|
|
1291
|
+
}
|
|
1292
|
+
for (const profileDir of profileDirs) {
|
|
1293
|
+
const extensionSettingsRoot = path.join(chromeRoot, profileDir, "Local Extension Settings");
|
|
1294
|
+
let extensionIds = [];
|
|
1295
|
+
try {
|
|
1296
|
+
extensionIds = await readdir(extensionSettingsRoot);
|
|
1297
|
+
}
|
|
1298
|
+
catch {
|
|
1299
|
+
continue;
|
|
1300
|
+
}
|
|
1301
|
+
for (const extensionId of extensionIds) {
|
|
1302
|
+
const extensionDir = path.join(extensionSettingsRoot, extensionId);
|
|
1303
|
+
let files = [];
|
|
1304
|
+
try {
|
|
1305
|
+
files = await readdir(extensionDir);
|
|
1306
|
+
}
|
|
1307
|
+
catch {
|
|
1308
|
+
continue;
|
|
1309
|
+
}
|
|
1310
|
+
for (const file of files) {
|
|
1311
|
+
if (!file.endsWith(".log")) {
|
|
1312
|
+
continue;
|
|
1313
|
+
}
|
|
1314
|
+
paths.push(path.join(extensionDir, file));
|
|
1315
|
+
}
|
|
1316
|
+
}
|
|
1317
|
+
}
|
|
1318
|
+
}
|
|
1319
|
+
return paths.sort().reverse();
|
|
1320
|
+
}
|
|
1321
|
+
async function readLocalLinkedInExtensionDirectLookupConfig() {
|
|
1322
|
+
const candidates = await listChromeExtensionTokenLogCandidates();
|
|
1323
|
+
for (const candidate of candidates) {
|
|
1324
|
+
const snapshot = await readLocalLinkedInExtensionTokenLog(candidate);
|
|
1325
|
+
if (!snapshot) {
|
|
1326
|
+
continue;
|
|
1327
|
+
}
|
|
1328
|
+
return {
|
|
1329
|
+
csrfToken: snapshot.csrfToken,
|
|
1330
|
+
identity: snapshot.linkedInIdentity,
|
|
1331
|
+
cookie: normalizeLinkedInDirectLookupCookieHeader(snapshot.sessionCookie),
|
|
1332
|
+
userAgent: snapshot.userAgent
|
|
1333
|
+
};
|
|
1334
|
+
}
|
|
1335
|
+
return null;
|
|
1336
|
+
}
|
|
1144
1337
|
function readLinkedInDirectLookupEnvConfig() {
|
|
1145
1338
|
const cookie = process.env.SALESPROMPTER_LINKEDIN_SALES_NAV_COOKIE?.trim() ||
|
|
1146
1339
|
process.env.LINKEDIN_SALES_NAV_COOKIE?.trim() ||
|
|
@@ -1157,7 +1350,7 @@ function readLinkedInDirectLookupEnvConfig() {
|
|
|
1157
1350
|
return {
|
|
1158
1351
|
csrfToken,
|
|
1159
1352
|
identity,
|
|
1160
|
-
cookie,
|
|
1353
|
+
cookie: normalizeLinkedInDirectLookupCookieHeader(cookie),
|
|
1161
1354
|
userAgent: process.env.SALESPROMPTER_LINKEDIN_USER_AGENT?.trim() ||
|
|
1162
1355
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"
|
|
1163
1356
|
};
|
|
@@ -1207,7 +1400,7 @@ async function readStoredLinkedInDirectLookupConfig() {
|
|
|
1207
1400
|
return {
|
|
1208
1401
|
csrfToken,
|
|
1209
1402
|
identity,
|
|
1210
|
-
cookie: claimed.sessionCookie,
|
|
1403
|
+
cookie: normalizeLinkedInDirectLookupCookieHeader(claimed.sessionCookie),
|
|
1211
1404
|
userAgent
|
|
1212
1405
|
};
|
|
1213
1406
|
}
|
|
@@ -1221,6 +1414,11 @@ async function readLinkedInDirectLookupConfig() {
|
|
|
1221
1414
|
cachedLinkedInDirectLookupConfig = envConfig;
|
|
1222
1415
|
return envConfig;
|
|
1223
1416
|
}
|
|
1417
|
+
const localExtensionConfig = await readLocalLinkedInExtensionDirectLookupConfig();
|
|
1418
|
+
if (localExtensionConfig) {
|
|
1419
|
+
cachedLinkedInDirectLookupConfig = localExtensionConfig;
|
|
1420
|
+
return localExtensionConfig;
|
|
1421
|
+
}
|
|
1224
1422
|
const storedConfig = await readStoredLinkedInDirectLookupConfig();
|
|
1225
1423
|
if (storedConfig) {
|
|
1226
1424
|
cachedLinkedInDirectLookupConfig = storedConfig;
|
|
@@ -1237,46 +1435,200 @@ function buildLinkedInSalesApiUrl(params) {
|
|
|
1237
1435
|
const encodedFirstName = encodeURIComponent(params.firstName);
|
|
1238
1436
|
const encodedLastName = encodeURIComponent(params.lastName);
|
|
1239
1437
|
const encodedCompanyName = encodeURIComponent(params.companyName);
|
|
1438
|
+
const encodedKeywords = encodeURIComponent(params.keywordsText?.trim() || params.companyName);
|
|
1240
1439
|
const filters = params.searchMode === "current_company"
|
|
1241
1440
|
? `(type:FIRST_NAME,values:List((text:${encodedFirstName},selectionType:INCLUDED))),(type:LAST_NAME,values:List((text:${encodedLastName},selectionType:INCLUDED))),(type:CURRENT_COMPANY,values:List((text:${encodedCompanyName},selectionType:INCLUDED)))`
|
|
1242
1441
|
: `(type:FIRST_NAME,values:List((text:${encodedFirstName},selectionType:INCLUDED))),(type:LAST_NAME,values:List((text:${encodedLastName},selectionType:INCLUDED)))`;
|
|
1243
|
-
const keywordsSegment = params.searchMode === "
|
|
1442
|
+
const keywordsSegment = params.searchMode === "current_company" ? "" : `,keywords:${encodedKeywords}`;
|
|
1244
1443
|
return `${baseUrl.replace(/\/+$/, "")}/sales-api/salesApiLeadSearch?q=searchQuery&query=(recentSearchParam:(id:${Date.now()},doLogHistory:true),filters:List(${filters})${keywordsSegment})&start=0&count=25&trackingParam=(sessionId:${generateLinkedInSessionId()})&decorationId=com.linkedin.sales.deco.desktop.searchv2.LeadSearchResult-14`;
|
|
1245
1444
|
}
|
|
1445
|
+
function extractLookupTitleKeywords(value) {
|
|
1446
|
+
const shortAllowlist = new Set(["hr", "it", "cfo"]);
|
|
1447
|
+
return normalizeLooseMatchText(value)
|
|
1448
|
+
.split(/\s+/)
|
|
1449
|
+
.filter((token) => token.length >= 4 || shortAllowlist.has(token))
|
|
1450
|
+
.filter((token) => ![
|
|
1451
|
+
"head",
|
|
1452
|
+
"senior",
|
|
1453
|
+
"consultant",
|
|
1454
|
+
"manager",
|
|
1455
|
+
"specialist",
|
|
1456
|
+
"lead",
|
|
1457
|
+
"global",
|
|
1458
|
+
"team",
|
|
1459
|
+
"group"
|
|
1460
|
+
].includes(token))
|
|
1461
|
+
.slice(0, 4);
|
|
1462
|
+
}
|
|
1463
|
+
function buildDeepDiveRoleSearchKeywords(role) {
|
|
1464
|
+
const normalized = normalizeLooseMatchText(role);
|
|
1465
|
+
switch (normalized) {
|
|
1466
|
+
case "budgetholder":
|
|
1467
|
+
return ["finance", "procurement", "purchasing", "accounting", "controlling", "cfo"];
|
|
1468
|
+
case "decisionmaker":
|
|
1469
|
+
return ["director", "head", "vp", "chief", "leiter", "lead"];
|
|
1470
|
+
case "champion":
|
|
1471
|
+
return ["hr", "workplace", "operations", "it", "people", "office"];
|
|
1472
|
+
case "executivesponsor":
|
|
1473
|
+
return ["executive", "board", "chief", "managing", "director", "ceo"];
|
|
1474
|
+
case "influencer":
|
|
1475
|
+
return ["specialist", "manager", "consultant", "project", "workplace", "hr"];
|
|
1476
|
+
case "legalandcompliance":
|
|
1477
|
+
return ["legal", "compliance", "datenschutz", "counsel"];
|
|
1478
|
+
case "blocker":
|
|
1479
|
+
return ["procurement", "legal", "compliance", "security"];
|
|
1480
|
+
case "enduser":
|
|
1481
|
+
return ["workplace", "office", "operations", "assistant", "admin"];
|
|
1482
|
+
default:
|
|
1483
|
+
return [];
|
|
1484
|
+
}
|
|
1485
|
+
}
|
|
1246
1486
|
function buildLinkedInAccountSearchApiUrl(companyName) {
|
|
1247
1487
|
const baseUrl = process.env.SALESPROMPTER_LINKEDIN_SALES_API_BASE_URL?.trim() ||
|
|
1248
1488
|
"https://www.linkedin.com";
|
|
1249
1489
|
const encodedCompanyName = encodeURIComponent(companyName);
|
|
1250
1490
|
return `${baseUrl.replace(/\/+$/, "")}/sales-api/salesApiAccountSearch?q=searchQuery&query=(recentSearchParam:(id:${Date.now()},doLogHistory:true),spellCorrectionEnabled:true,keywords:${encodedCompanyName})&start=0&count=10&trackingParam=(sessionId:${generateLinkedInSessionId()})&decorationId=com.linkedin.sales.deco.desktop.searchv2.AccountSearchResult-14`;
|
|
1251
1491
|
}
|
|
1252
|
-
function buildLinkedInLookupSearchVariants(contact) {
|
|
1492
|
+
async function buildLinkedInLookupSearchVariants(contact, timeoutMs, resolvedCompanyAliases = []) {
|
|
1253
1493
|
const variants = [];
|
|
1254
1494
|
const seen = new Set();
|
|
1255
|
-
const
|
|
1256
|
-
|
|
1257
|
-
normalizeLookupWhitespace(
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1495
|
+
const companyCandidateScores = new Map();
|
|
1496
|
+
const addCompanyCandidate = (value, score) => {
|
|
1497
|
+
const normalized = normalizeLookupWhitespace(value);
|
|
1498
|
+
if (!normalized) {
|
|
1499
|
+
return;
|
|
1500
|
+
}
|
|
1501
|
+
companyCandidateScores.set(normalized, Math.max(score, companyCandidateScores.get(normalized) ?? 0));
|
|
1502
|
+
};
|
|
1503
|
+
addCompanyCandidate(contact.companyName, 80);
|
|
1504
|
+
addCompanyCandidate(contact.companyNameOriginal, 70);
|
|
1505
|
+
const linkedInHandle = normalizeLinkedInCompanyHandle(contact.linkedinCompanyUrl ?? "");
|
|
1506
|
+
if (linkedInHandle && !/^\d+$/.test(linkedInHandle)) {
|
|
1507
|
+
addCompanyCandidate(linkedInHandle.replace(/[-_]+/g, " "), 95);
|
|
1508
|
+
}
|
|
1509
|
+
for (const alias of resolvedCompanyAliases) {
|
|
1510
|
+
addCompanyCandidate(alias, 110);
|
|
1511
|
+
}
|
|
1512
|
+
const emailDomain = (() => {
|
|
1513
|
+
const email = normalizeLookupWhitespace(contact.email);
|
|
1514
|
+
if (!email || isSyntheticLinkedInLookupEmail(email)) {
|
|
1515
|
+
return "";
|
|
1516
|
+
}
|
|
1517
|
+
const at = email.lastIndexOf("@");
|
|
1518
|
+
return at >= 0 ? email.slice(at + 1) : "";
|
|
1519
|
+
})();
|
|
1520
|
+
if (emailDomain) {
|
|
1521
|
+
const host = emailDomain.replace(/^www\./i, "").split(".")[0] ?? "";
|
|
1522
|
+
if (host) {
|
|
1523
|
+
addCompanyCandidate(host.replace(/[-_]+/g, " "), 100);
|
|
1277
1524
|
}
|
|
1278
1525
|
}
|
|
1279
|
-
|
|
1526
|
+
if (contact.jobTitle && contact.deepDiveRecommendedRole) {
|
|
1527
|
+
const primaryWord = normalizeLookupWhitespace(contact.companyNameOriginal ?? contact.companyName)
|
|
1528
|
+
.split(/\s+/)
|
|
1529
|
+
.filter((part) => part.length >= 4)
|
|
1530
|
+
.slice(-1)[0];
|
|
1531
|
+
if (primaryWord) {
|
|
1532
|
+
addCompanyCandidate(primaryWord, 45);
|
|
1533
|
+
}
|
|
1534
|
+
}
|
|
1535
|
+
const companyHints = await buildLinkedInProfileCompanyHints(contact, timeoutMs);
|
|
1536
|
+
for (const phrase of companyHints.phrases) {
|
|
1537
|
+
const tokenCount = normalizeLooseMatchText(phrase).split(/\s+/).filter(Boolean).length;
|
|
1538
|
+
if (tokenCount >= 1 && tokenCount <= 4) {
|
|
1539
|
+
addCompanyCandidate(phrase, tokenCount <= 2 ? 75 : 60);
|
|
1540
|
+
}
|
|
1541
|
+
}
|
|
1542
|
+
for (const keyword of companyHints.keywords.slice(0, 5)) {
|
|
1543
|
+
addCompanyCandidate(keyword, keyword.includes(".") ? 90 : 55);
|
|
1544
|
+
}
|
|
1545
|
+
const titleKeywords = Array.from(new Set([
|
|
1546
|
+
...extractLookupTitleKeywords(contact.jobTitle),
|
|
1547
|
+
...buildDeepDiveRoleSearchKeywords(contact.deepDiveRecommendedRole)
|
|
1548
|
+
])).slice(0, 6);
|
|
1549
|
+
const rankedCompanyCandidates = Array.from(companyCandidateScores.entries())
|
|
1550
|
+
.sort((left, right) => right[1] - left[1] || left[0].length - right[0].length)
|
|
1551
|
+
.slice(0, 6);
|
|
1552
|
+
const emailHostCandidate = (() => {
|
|
1553
|
+
if (!emailDomain) {
|
|
1554
|
+
return "";
|
|
1555
|
+
}
|
|
1556
|
+
return normalizeLookupWhitespace(emailDomain.replace(/^www\./i, "").split(".")[0] ?? "").replace(/[-_]+/g, " ");
|
|
1557
|
+
})();
|
|
1558
|
+
const cleanCompanyCandidate = normalizeLookupWhitespace(contact.companyName) ||
|
|
1559
|
+
normalizeLookupWhitespace(contact.companyNameOriginal) ||
|
|
1560
|
+
"";
|
|
1561
|
+
const linkedInHandleCandidate = linkedInHandle && !/^\d+$/.test(linkedInHandle)
|
|
1562
|
+
? normalizeLookupWhitespace(linkedInHandle.replace(/[-_]+/g, " "))
|
|
1563
|
+
: "";
|
|
1564
|
+
const pushVariant = (companyName, searchMode) => {
|
|
1565
|
+
const normalizedCompany = normalizeLookupWhitespace(companyName);
|
|
1566
|
+
if (!normalizedCompany) {
|
|
1567
|
+
return;
|
|
1568
|
+
}
|
|
1569
|
+
const keywordsText = searchMode === "keywords_title" && titleKeywords.length > 0
|
|
1570
|
+
? `${normalizedCompany} ${titleKeywords.join(" ")}`
|
|
1571
|
+
: undefined;
|
|
1572
|
+
if (searchMode === "keywords_title" && !keywordsText) {
|
|
1573
|
+
return;
|
|
1574
|
+
}
|
|
1575
|
+
const key = [
|
|
1576
|
+
contact.firstName.trim().toLowerCase(),
|
|
1577
|
+
contact.lastName.trim().toLowerCase(),
|
|
1578
|
+
normalizedCompany.toLowerCase(),
|
|
1579
|
+
searchMode,
|
|
1580
|
+
keywordsText?.toLowerCase() ?? ""
|
|
1581
|
+
].join("|");
|
|
1582
|
+
if (seen.has(key)) {
|
|
1583
|
+
return;
|
|
1584
|
+
}
|
|
1585
|
+
seen.add(key);
|
|
1586
|
+
variants.push({
|
|
1587
|
+
firstName: contact.firstName,
|
|
1588
|
+
lastName: contact.lastName,
|
|
1589
|
+
companyName: normalizedCompany,
|
|
1590
|
+
searchMode,
|
|
1591
|
+
keywordsText
|
|
1592
|
+
});
|
|
1593
|
+
};
|
|
1594
|
+
const rankedCompanyNames = rankedCompanyCandidates.map(([companyName]) => companyName);
|
|
1595
|
+
const currentCompanyStageCandidates = [
|
|
1596
|
+
emailHostCandidate,
|
|
1597
|
+
linkedInHandleCandidate,
|
|
1598
|
+
...resolvedCompanyAliases,
|
|
1599
|
+
...rankedCompanyNames.filter((companyName) => (companyCandidateScores.get(companyName) ?? 0) >= 90)
|
|
1600
|
+
];
|
|
1601
|
+
const keywordStageCandidates = [
|
|
1602
|
+
cleanCompanyCandidate,
|
|
1603
|
+
...rankedCompanyNames
|
|
1604
|
+
];
|
|
1605
|
+
const keywordTitleStageCandidates = [
|
|
1606
|
+
cleanCompanyCandidate,
|
|
1607
|
+
...rankedCompanyNames
|
|
1608
|
+
];
|
|
1609
|
+
const fallbackCurrentCompanyCandidates = [
|
|
1610
|
+
cleanCompanyCandidate,
|
|
1611
|
+
normalizeLookupWhitespace(contact.companyNameOriginal),
|
|
1612
|
+
...rankedCompanyNames
|
|
1613
|
+
];
|
|
1614
|
+
for (const companyName of currentCompanyStageCandidates) {
|
|
1615
|
+
pushVariant(companyName, "current_company");
|
|
1616
|
+
}
|
|
1617
|
+
for (const companyName of keywordStageCandidates) {
|
|
1618
|
+
pushVariant(companyName, "keywords");
|
|
1619
|
+
}
|
|
1620
|
+
for (const companyName of keywordTitleStageCandidates) {
|
|
1621
|
+
pushVariant(companyName, "keywords_title");
|
|
1622
|
+
}
|
|
1623
|
+
for (const companyName of fallbackCurrentCompanyCandidates) {
|
|
1624
|
+
pushVariant(companyName, "current_company");
|
|
1625
|
+
}
|
|
1626
|
+
for (const [companyName] of rankedCompanyCandidates) {
|
|
1627
|
+
pushVariant(companyName, "current_company");
|
|
1628
|
+
pushVariant(companyName, "keywords");
|
|
1629
|
+
pushVariant(companyName, "keywords_title");
|
|
1630
|
+
}
|
|
1631
|
+
return variants.slice(0, 12);
|
|
1280
1632
|
}
|
|
1281
1633
|
function normalizeSalesNavLeadUrl(value) {
|
|
1282
1634
|
const trimmed = String(value ?? "").trim();
|
|
@@ -1298,14 +1650,21 @@ function normalizePublicLinkedInProfileUrl(value) {
|
|
|
1298
1650
|
if (!trimmed) {
|
|
1299
1651
|
return null;
|
|
1300
1652
|
}
|
|
1301
|
-
|
|
1302
|
-
|
|
1653
|
+
let parsed;
|
|
1654
|
+
try {
|
|
1655
|
+
parsed = new URL(trimmed);
|
|
1656
|
+
}
|
|
1657
|
+
catch {
|
|
1658
|
+
return null;
|
|
1659
|
+
}
|
|
1660
|
+
if (!/(^|\.)linkedin\.com$/i.test(parsed.hostname)) {
|
|
1303
1661
|
return null;
|
|
1304
1662
|
}
|
|
1305
|
-
const
|
|
1306
|
-
if (!
|
|
1663
|
+
const pathMatch = parsed.pathname.match(/^\/in\/([^/?#]+)\/?/i);
|
|
1664
|
+
if (!pathMatch?.[1]) {
|
|
1307
1665
|
return null;
|
|
1308
1666
|
}
|
|
1667
|
+
const candidate = `https://www.linkedin.com/in/${pathMatch[1]}`;
|
|
1309
1668
|
return normalizeSalesNavLeadUrl(candidate) ? null : candidate;
|
|
1310
1669
|
}
|
|
1311
1670
|
function extractLinkedInProfileUrlFromSalesApiElement(element) {
|
|
@@ -1448,6 +1807,112 @@ function extractLinkedInCompanyNameFromSalesApiElement(element) {
|
|
|
1448
1807
|
}
|
|
1449
1808
|
return null;
|
|
1450
1809
|
}
|
|
1810
|
+
function extractLinkedInFullNameFromSalesApiElement(element) {
|
|
1811
|
+
if (!element) {
|
|
1812
|
+
return null;
|
|
1813
|
+
}
|
|
1814
|
+
const directCandidates = [
|
|
1815
|
+
typeof element.fullName === "string" ? element.fullName : null,
|
|
1816
|
+
typeof element.name === "string" ? element.name : null
|
|
1817
|
+
].filter(Boolean);
|
|
1818
|
+
for (const candidate of directCandidates) {
|
|
1819
|
+
const normalized = normalizeLookupWhitespace(candidate);
|
|
1820
|
+
if (normalized) {
|
|
1821
|
+
return normalized;
|
|
1822
|
+
}
|
|
1823
|
+
}
|
|
1824
|
+
const firstName = typeof element.firstName === "string" ? normalizeLookupWhitespace(element.firstName) : "";
|
|
1825
|
+
const lastName = typeof element.lastName === "string" ? normalizeLookupWhitespace(element.lastName) : "";
|
|
1826
|
+
const combined = normalizeLookupWhitespace(`${firstName} ${lastName}`);
|
|
1827
|
+
return combined || null;
|
|
1828
|
+
}
|
|
1829
|
+
function extractLinkedInTitleFromSalesApiElement(element) {
|
|
1830
|
+
if (!element) {
|
|
1831
|
+
return null;
|
|
1832
|
+
}
|
|
1833
|
+
const directCandidates = [
|
|
1834
|
+
typeof element.title === "string" ? element.title : null,
|
|
1835
|
+
typeof element.occupation === "string" ? element.occupation : null
|
|
1836
|
+
].filter(Boolean);
|
|
1837
|
+
for (const candidate of directCandidates) {
|
|
1838
|
+
const normalized = normalizeLookupWhitespace(candidate);
|
|
1839
|
+
if (normalized) {
|
|
1840
|
+
return normalized;
|
|
1841
|
+
}
|
|
1842
|
+
}
|
|
1843
|
+
const currentPosition = Array.isArray(element.currentPositions) && element.currentPositions.length > 0
|
|
1844
|
+
? element.currentPositions[0]
|
|
1845
|
+
: null;
|
|
1846
|
+
const currentTitle = currentPosition && typeof currentPosition.title === "string"
|
|
1847
|
+
? normalizeLookupWhitespace(currentPosition.title)
|
|
1848
|
+
: "";
|
|
1849
|
+
return currentTitle || null;
|
|
1850
|
+
}
|
|
1851
|
+
function scoreLinkedInSalesApiElementMatch(contact, element) {
|
|
1852
|
+
const fullName = extractLinkedInFullNameFromSalesApiElement(element);
|
|
1853
|
+
const companyName = extractLinkedInCompanyNameFromSalesApiElement(Array.isArray(element?.currentPositions) && element.currentPositions.length > 0
|
|
1854
|
+
? element.currentPositions[0]
|
|
1855
|
+
: element) ?? extractLinkedInCompanyNameFromSalesApiElement(element);
|
|
1856
|
+
const title = extractLinkedInTitleFromSalesApiElement(element);
|
|
1857
|
+
const expectedFullName = normalizeLooseMatchText(`${contact.firstName} ${contact.lastName}`);
|
|
1858
|
+
const candidateFullName = normalizeLooseMatchText(fullName);
|
|
1859
|
+
const expectedCompanies = Array.from(new Set([
|
|
1860
|
+
normalizeLooseMatchText(contact.companyNameOriginal ?? contact.companyName),
|
|
1861
|
+
normalizeLooseMatchText(aggressivelyCleanLookupCompanyName(contact.companyNameOriginal ?? contact.companyName)),
|
|
1862
|
+
normalizeLooseMatchText(normalizeLinkedInCompanyHandle(contact.linkedinCompanyUrl ?? "")?.replace(/[-_]+/g, " ") ?? ""),
|
|
1863
|
+
normalizeLooseMatchText((() => {
|
|
1864
|
+
const email = normalizeLookupWhitespace(contact.email);
|
|
1865
|
+
if (!email || isSyntheticLinkedInLookupEmail(email)) {
|
|
1866
|
+
return "";
|
|
1867
|
+
}
|
|
1868
|
+
return email.split("@")[1]?.replace(/^www\./i, "").split(".")[0] ?? "";
|
|
1869
|
+
})())
|
|
1870
|
+
].filter(Boolean)));
|
|
1871
|
+
const candidateCompany = normalizeLooseMatchText(companyName);
|
|
1872
|
+
const candidateTitle = normalizeLooseMatchText(title);
|
|
1873
|
+
let score = 0;
|
|
1874
|
+
let exactNameMatch = false;
|
|
1875
|
+
let companyMatchCount = 0;
|
|
1876
|
+
if (expectedFullName && candidateFullName === expectedFullName) {
|
|
1877
|
+
score += 120;
|
|
1878
|
+
exactNameMatch = true;
|
|
1879
|
+
}
|
|
1880
|
+
else if (expectedFullName &&
|
|
1881
|
+
candidateFullName.includes(normalizeLooseMatchText(contact.firstName)) &&
|
|
1882
|
+
candidateFullName.includes(normalizeLooseMatchText(contact.lastName))) {
|
|
1883
|
+
score += 90;
|
|
1884
|
+
}
|
|
1885
|
+
for (const companyHint of expectedCompanies) {
|
|
1886
|
+
if (!companyHint) {
|
|
1887
|
+
continue;
|
|
1888
|
+
}
|
|
1889
|
+
if (candidateCompany === companyHint) {
|
|
1890
|
+
score += 40;
|
|
1891
|
+
companyMatchCount += 1;
|
|
1892
|
+
}
|
|
1893
|
+
else if (candidateCompany.includes(companyHint) || companyHint.includes(candidateCompany)) {
|
|
1894
|
+
score += 25;
|
|
1895
|
+
companyMatchCount += 1;
|
|
1896
|
+
}
|
|
1897
|
+
}
|
|
1898
|
+
const titleHints = [
|
|
1899
|
+
...extractLookupTitleKeywords(contact.jobTitle),
|
|
1900
|
+
...buildDeepDiveRoleSearchKeywords(contact.deepDiveRecommendedRole)
|
|
1901
|
+
].slice(0, 6);
|
|
1902
|
+
for (const hint of titleHints) {
|
|
1903
|
+
if (hint && candidateTitle.includes(normalizeLooseMatchText(hint))) {
|
|
1904
|
+
score += 6;
|
|
1905
|
+
}
|
|
1906
|
+
}
|
|
1907
|
+
return {
|
|
1908
|
+
score,
|
|
1909
|
+
fullName,
|
|
1910
|
+
companyName,
|
|
1911
|
+
title,
|
|
1912
|
+
exactNameMatch,
|
|
1913
|
+
companyMatchCount
|
|
1914
|
+
};
|
|
1915
|
+
}
|
|
1451
1916
|
function extractLinkedInCompanyEmployeeCountFromSalesApiElement(element) {
|
|
1452
1917
|
if (!element) {
|
|
1453
1918
|
return null;
|
|
@@ -1496,6 +1961,111 @@ function buildLinkedInCompanyLookupVariants(params) {
|
|
|
1496
1961
|
}
|
|
1497
1962
|
return variants;
|
|
1498
1963
|
}
|
|
1964
|
+
function buildDirectCompanyContextKey(contact) {
|
|
1965
|
+
return normalizeLooseMatchText(contact.companyNameOriginal ?? contact.companyName);
|
|
1966
|
+
}
|
|
1967
|
+
async function resolveDirectLinkedInCompanyContexts(params) {
|
|
1968
|
+
const perCompanyBudgetMs = Math.min(params.timeoutMs, 10_000);
|
|
1969
|
+
const primaryByCompany = new Map();
|
|
1970
|
+
for (const contact of params.contacts) {
|
|
1971
|
+
const key = buildDirectCompanyContextKey(contact);
|
|
1972
|
+
if (!key || primaryByCompany.has(key)) {
|
|
1973
|
+
continue;
|
|
1974
|
+
}
|
|
1975
|
+
primaryByCompany.set(key, contact);
|
|
1976
|
+
}
|
|
1977
|
+
const contexts = new Map();
|
|
1978
|
+
for (const [companyKey, contact] of primaryByCompany.entries()) {
|
|
1979
|
+
const aliases = new Set();
|
|
1980
|
+
const addAlias = (value) => {
|
|
1981
|
+
const normalized = normalizeLookupWhitespace(value);
|
|
1982
|
+
if (!normalized) {
|
|
1983
|
+
return;
|
|
1984
|
+
}
|
|
1985
|
+
aliases.add(normalized);
|
|
1986
|
+
};
|
|
1987
|
+
addAlias(contact.companyNameOriginal);
|
|
1988
|
+
addAlias(contact.companyName);
|
|
1989
|
+
const existingHandle = normalizeLinkedInCompanyHandle(contact.linkedinCompanyUrl ?? "");
|
|
1990
|
+
if (existingHandle && !/^\d+$/.test(existingHandle)) {
|
|
1991
|
+
addAlias(existingHandle.replace(/[-_]+/g, " "));
|
|
1992
|
+
}
|
|
1993
|
+
let matchedCompanyUrl = contact.linkedinCompanyUrl ?? null;
|
|
1994
|
+
let matchedSalesNavCompanyUrl = null;
|
|
1995
|
+
let matchedCompanyName = null;
|
|
1996
|
+
let matchedCompanyEmployeeCount = null;
|
|
1997
|
+
const companyDeadline = Date.now() + perCompanyBudgetMs;
|
|
1998
|
+
const variants = buildLinkedInCompanyLookupVariants({
|
|
1999
|
+
contactId: contact.contact_id,
|
|
2000
|
+
companyName: contact.companyName,
|
|
2001
|
+
companyNameOriginal: contact.companyNameOriginal
|
|
2002
|
+
}).slice(0, 4);
|
|
2003
|
+
for (const variant of variants) {
|
|
2004
|
+
if (Date.now() >= companyDeadline) {
|
|
2005
|
+
break;
|
|
2006
|
+
}
|
|
2007
|
+
const controller = new AbortController();
|
|
2008
|
+
const timeout = setTimeout(controller.abort.bind(controller), Math.min(6_000, Math.max(1_000, companyDeadline - Date.now())));
|
|
2009
|
+
try {
|
|
2010
|
+
const response = await fetch(buildLinkedInAccountSearchApiUrl(variant.companyName), {
|
|
2011
|
+
method: "GET",
|
|
2012
|
+
signal: controller.signal,
|
|
2013
|
+
headers: {
|
|
2014
|
+
accept: "*/*",
|
|
2015
|
+
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
|
|
2016
|
+
"csrf-token": params.config.csrfToken,
|
|
2017
|
+
referer: "https://www.linkedin.com/sales/search/company",
|
|
2018
|
+
"sec-fetch-dest": "empty",
|
|
2019
|
+
"sec-fetch-mode": "cors",
|
|
2020
|
+
"sec-fetch-site": "same-origin",
|
|
2021
|
+
"user-agent": params.config.userAgent,
|
|
2022
|
+
"x-li-identity": params.config.identity,
|
|
2023
|
+
"x-li-lang": "en_US",
|
|
2024
|
+
"x-li-page-instance": "urn:li:page:d_sales2_search_accounts;13Jvve6kRGCao+iP0wwAag==",
|
|
2025
|
+
"x-restli-protocol-version": "2.0.0",
|
|
2026
|
+
cookie: params.config.cookie
|
|
2027
|
+
}
|
|
2028
|
+
});
|
|
2029
|
+
if (!response.ok) {
|
|
2030
|
+
if (response.status === 429) {
|
|
2031
|
+
break;
|
|
2032
|
+
}
|
|
2033
|
+
continue;
|
|
2034
|
+
}
|
|
2035
|
+
const data = (await response.json());
|
|
2036
|
+
const first = data.elements?.[0];
|
|
2037
|
+
const companyUrl = extractLinkedInCompanyUrlFromSalesApiElement(first);
|
|
2038
|
+
const salesNavCompanyUrl = extractLinkedInSalesNavCompanyUrlFromSalesApiElement(first);
|
|
2039
|
+
const companyName = extractLinkedInCompanyNameFromSalesApiElement(first);
|
|
2040
|
+
if (companyUrl || salesNavCompanyUrl || companyName) {
|
|
2041
|
+
matchedCompanyUrl = companyUrl ?? matchedCompanyUrl;
|
|
2042
|
+
matchedSalesNavCompanyUrl = salesNavCompanyUrl ?? matchedSalesNavCompanyUrl;
|
|
2043
|
+
matchedCompanyName = companyName ?? matchedCompanyName;
|
|
2044
|
+
matchedCompanyEmployeeCount = extractLinkedInCompanyEmployeeCountFromSalesApiElement(first);
|
|
2045
|
+
addAlias(companyName);
|
|
2046
|
+
addAlias(companyUrl ? normalizeLinkedInCompanyHandle(companyUrl)?.replace(/[-_]+/g, " ") : null);
|
|
2047
|
+
addAlias(salesNavCompanyUrl ? normalizeLookupWhitespace(salesNavCompanyUrl.split("/sales/company/")[1]?.split(/[/?#]/)[0] ?? "") : null);
|
|
2048
|
+
break;
|
|
2049
|
+
}
|
|
2050
|
+
}
|
|
2051
|
+
catch {
|
|
2052
|
+
// Try next company variant.
|
|
2053
|
+
}
|
|
2054
|
+
finally {
|
|
2055
|
+
clearTimeout(timeout);
|
|
2056
|
+
}
|
|
2057
|
+
}
|
|
2058
|
+
contexts.set(companyKey, {
|
|
2059
|
+
normalizedCompanyKey: companyKey,
|
|
2060
|
+
aliases: Array.from(aliases),
|
|
2061
|
+
linkedinCompanyUrl: matchedCompanyUrl,
|
|
2062
|
+
salesNavCompanyUrl: matchedSalesNavCompanyUrl,
|
|
2063
|
+
matchedCompanyName,
|
|
2064
|
+
matchedCompanyEmployeeCount
|
|
2065
|
+
});
|
|
2066
|
+
}
|
|
2067
|
+
return contexts;
|
|
2068
|
+
}
|
|
1499
2069
|
function buildPublicLinkedInCompanySearchUrl(companyName) {
|
|
1500
2070
|
const baseUrl = process.env.SALESPROMPTER_LINKEDIN_COMPANY_SEARCH_BASE_URL?.trim() ||
|
|
1501
2071
|
"https://duckduckgo.com/html/";
|
|
@@ -1559,7 +2129,8 @@ function extractSerperLinkedInCompanyCandidates(payload) {
|
|
|
1559
2129
|
const organic = "organic" in payload && Array.isArray(payload.organic)
|
|
1560
2130
|
? (payload.organic ?? [])
|
|
1561
2131
|
: [];
|
|
1562
|
-
const
|
|
2132
|
+
const seen = new Set();
|
|
2133
|
+
const candidates = [];
|
|
1563
2134
|
for (const result of organic) {
|
|
1564
2135
|
if (!result || typeof result !== "object") {
|
|
1565
2136
|
continue;
|
|
@@ -1569,60 +2140,685 @@ function extractSerperLinkedInCompanyCandidates(payload) {
|
|
|
1569
2140
|
: "";
|
|
1570
2141
|
const handle = normalizeLinkedInCompanyHandle(link);
|
|
1571
2142
|
if (handle) {
|
|
1572
|
-
|
|
2143
|
+
const url = normalizeLinkedInCompanyPage(handle);
|
|
2144
|
+
if (!seen.has(url)) {
|
|
2145
|
+
seen.add(url);
|
|
2146
|
+
candidates.push({
|
|
2147
|
+
url,
|
|
2148
|
+
title: "title" in result && typeof result.title === "string"
|
|
2149
|
+
? normalizeLookupWhitespace(result.title)
|
|
2150
|
+
: "",
|
|
2151
|
+
snippet: "snippet" in result && typeof result.snippet === "string"
|
|
2152
|
+
? normalizeLookupWhitespace(result.snippet)
|
|
2153
|
+
: ""
|
|
2154
|
+
});
|
|
2155
|
+
}
|
|
1573
2156
|
}
|
|
1574
2157
|
}
|
|
1575
|
-
return
|
|
1576
|
-
}
|
|
1577
|
-
function decodeHtmlEntities(value) {
|
|
1578
|
-
return value
|
|
1579
|
-
.replace(/&/gi, "&")
|
|
1580
|
-
.replace(/"/gi, '"')
|
|
1581
|
-
.replace(/'/gi, "'")
|
|
1582
|
-
.replace(/</gi, "<")
|
|
1583
|
-
.replace(/>/gi, ">");
|
|
2158
|
+
return candidates;
|
|
1584
2159
|
}
|
|
1585
|
-
|
|
1586
|
-
|
|
1587
|
-
|
|
1588
|
-
|
|
1589
|
-
|
|
1590
|
-
|
|
1591
|
-
|
|
1592
|
-
|
|
1593
|
-
|
|
1594
|
-
|
|
1595
|
-
|
|
1596
|
-
|
|
1597
|
-
const
|
|
1598
|
-
|
|
1599
|
-
|
|
1600
|
-
const description = decodeHtmlEntities(html.match(/<meta[^>]+name="description"[^>]+content="([^"]+)/i)?.[1] || "");
|
|
1601
|
-
const bodyText = decodeHtmlEntities(html.replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ").replace(/<[^>]+>/g, " "));
|
|
1602
|
-
const unavailableText = normalizeLooseMatchText(`${title} ${description} ${bodyText}`);
|
|
1603
|
-
const unavailable = response.status >= 400 ||
|
|
1604
|
-
unavailableText.includes("page not found") ||
|
|
1605
|
-
unavailableText.includes("this page does not exist") ||
|
|
1606
|
-
unavailableText.includes("page isnt available");
|
|
1607
|
-
const handle = normalizeLinkedInCompanyHandle(finalUrl) ?? normalizeLinkedInCompanyHandle(url);
|
|
1608
|
-
if (!handle) {
|
|
1609
|
-
return null;
|
|
2160
|
+
const linkedInCompanyHintCache = new Map();
|
|
2161
|
+
const linkedInProfilePageSignalCache = new Map();
|
|
2162
|
+
const linkedInCompanyPageSignalCache = new Map();
|
|
2163
|
+
const serperSearchCache = new Map();
|
|
2164
|
+
let serperCreditsExhausted = false;
|
|
2165
|
+
function extractKeywordPhrases(value) {
|
|
2166
|
+
const normalized = normalizeLookupWhitespace(value);
|
|
2167
|
+
if (!normalized) {
|
|
2168
|
+
return [];
|
|
2169
|
+
}
|
|
2170
|
+
const phrases = new Set();
|
|
2171
|
+
const push = (candidate) => {
|
|
2172
|
+
const cleaned = normalizeLookupWhitespace(candidate);
|
|
2173
|
+
if (!cleaned || cleaned.length < 3) {
|
|
2174
|
+
return;
|
|
1610
2175
|
}
|
|
1611
|
-
|
|
1612
|
-
|
|
2176
|
+
phrases.add(cleaned);
|
|
2177
|
+
};
|
|
2178
|
+
push(normalized);
|
|
2179
|
+
push(normalizeLookupCompanyForSearch(normalized));
|
|
2180
|
+
push(aggressivelyCleanLookupCompanyName(normalized));
|
|
2181
|
+
const titleStripped = normalized
|
|
2182
|
+
.replace(/\|\s*linkedin$/i, "")
|
|
2183
|
+
.replace(/\|\s*overview$/i, "")
|
|
2184
|
+
.replace(/\b(linkedin|home|about|posts|see all details)\b/gi, " ")
|
|
2185
|
+
.replace(/\s+/g, " ")
|
|
2186
|
+
.trim();
|
|
2187
|
+
push(titleStripped);
|
|
2188
|
+
const parts = titleStripped
|
|
2189
|
+
.split(/[|,·•:()/-]+/)
|
|
2190
|
+
.map((part) => normalizeLookupWhitespace(part))
|
|
2191
|
+
.filter(Boolean);
|
|
2192
|
+
for (const part of parts) {
|
|
2193
|
+
push(part);
|
|
2194
|
+
}
|
|
2195
|
+
const looseTokens = normalizeLooseMatchText(titleStripped)
|
|
2196
|
+
.split(/\s+/)
|
|
2197
|
+
.filter((token) => token.length >= 4)
|
|
2198
|
+
.filter((token) => ![
|
|
2199
|
+
"group",
|
|
2200
|
+
"holding",
|
|
2201
|
+
"services",
|
|
2202
|
+
"service",
|
|
2203
|
+
"consulting",
|
|
2204
|
+
"gmbh",
|
|
2205
|
+
"publishing",
|
|
2206
|
+
"company",
|
|
2207
|
+
"linkedin",
|
|
2208
|
+
"deutschland"
|
|
2209
|
+
].includes(token));
|
|
2210
|
+
if (looseTokens.length > 0) {
|
|
2211
|
+
push(looseTokens[0]);
|
|
2212
|
+
push(looseTokens.slice(0, 2).join(" "));
|
|
2213
|
+
push(looseTokens.slice(-2).join(" "));
|
|
2214
|
+
}
|
|
2215
|
+
return Array.from(phrases);
|
|
2216
|
+
}
|
|
2217
|
+
async function buildLinkedInProfileCompanyHints(contact, timeoutMs) {
|
|
2218
|
+
const phrases = new Set();
|
|
2219
|
+
const keywords = new Set();
|
|
2220
|
+
const addPhrase = (value) => {
|
|
2221
|
+
for (const phrase of extractKeywordPhrases(value)) {
|
|
2222
|
+
phrases.add(phrase);
|
|
2223
|
+
const looseTokens = normalizeLooseMatchText(phrase)
|
|
2224
|
+
.split(/\s+/)
|
|
2225
|
+
.filter((token) => token.length >= 4)
|
|
2226
|
+
.filter((token) => ![
|
|
2227
|
+
"group",
|
|
2228
|
+
"holding",
|
|
2229
|
+
"services",
|
|
2230
|
+
"service",
|
|
2231
|
+
"consulting",
|
|
2232
|
+
"gmbh",
|
|
2233
|
+
"publishing",
|
|
2234
|
+
"company",
|
|
2235
|
+
"linkedin",
|
|
2236
|
+
"deutschland"
|
|
2237
|
+
].includes(token));
|
|
2238
|
+
for (const token of looseTokens.slice(0, 5)) {
|
|
2239
|
+
keywords.add(token);
|
|
2240
|
+
}
|
|
2241
|
+
if (looseTokens.length > 1) {
|
|
2242
|
+
keywords.add(looseTokens.slice(0, 2).join(" "));
|
|
2243
|
+
keywords.add(looseTokens.slice(-2).join(" "));
|
|
2244
|
+
}
|
|
2245
|
+
}
|
|
2246
|
+
};
|
|
2247
|
+
addPhrase(contact.companyNameOriginal ?? contact.companyName);
|
|
2248
|
+
const linkedInHandle = normalizeLinkedInCompanyHandle(contact.linkedinCompanyUrl ?? "");
|
|
2249
|
+
if (linkedInHandle && !/^\d+$/.test(linkedInHandle)) {
|
|
2250
|
+
addPhrase(linkedInHandle.replace(/[-_]+/g, " "));
|
|
2251
|
+
}
|
|
2252
|
+
const normalizedEmail = normalizeLookupWhitespace(contact.email);
|
|
2253
|
+
const emailDomain = normalizedEmail && !isSyntheticLinkedInLookupEmail(normalizedEmail)
|
|
2254
|
+
? normalizedEmail.split("@")[1] ?? ""
|
|
2255
|
+
: "";
|
|
2256
|
+
if (emailDomain) {
|
|
2257
|
+
const normalizedDomain = emailDomain.replace(/^www\./i, "");
|
|
2258
|
+
keywords.add(normalizedDomain);
|
|
2259
|
+
const host = normalizedDomain.split(".")[0] ?? "";
|
|
2260
|
+
if (host) {
|
|
2261
|
+
addPhrase(host.replace(/[-_]+/g, " "));
|
|
2262
|
+
}
|
|
2263
|
+
}
|
|
2264
|
+
const companyUrl = contact.linkedinCompanyUrl?.trim();
|
|
2265
|
+
if (companyUrl) {
|
|
2266
|
+
const cacheKey = companyUrl.replace(/\/$/, "");
|
|
2267
|
+
let cachedHints = linkedInCompanyHintCache.get(cacheKey);
|
|
2268
|
+
if (!cachedHints) {
|
|
2269
|
+
const signals = await fetchLinkedInCompanyPageSignals(companyUrl, timeoutMs);
|
|
2270
|
+
cachedHints = signals ? [...extractKeywordPhrases(signals.title), ...extractKeywordPhrases(signals.description)] : [];
|
|
2271
|
+
linkedInCompanyHintCache.set(cacheKey, cachedHints);
|
|
2272
|
+
}
|
|
2273
|
+
for (const hint of cachedHints) {
|
|
2274
|
+
addPhrase(hint);
|
|
2275
|
+
}
|
|
2276
|
+
}
|
|
2277
|
+
return {
|
|
2278
|
+
phrases: Array.from(phrases)
|
|
2279
|
+
.map((value) => normalizeLookupWhitespace(value))
|
|
2280
|
+
.filter((value) => value.length > 0),
|
|
2281
|
+
keywords: Array.from(keywords)
|
|
2282
|
+
.map((value) => normalizeLookupWhitespace(value))
|
|
2283
|
+
.filter((value) => value.length > 0)
|
|
2284
|
+
};
|
|
2285
|
+
}
|
|
2286
|
+
async function buildSerperLinkedInProfileQueries(contact, timeoutMs) {
|
|
2287
|
+
const fullName = normalizeLookupWhitespace(`${contact.firstName} ${contact.lastName}`);
|
|
2288
|
+
const title = normalizeLookupWhitespace(contact.jobTitle);
|
|
2289
|
+
const queryEntries = [];
|
|
2290
|
+
const seenQueries = new Set();
|
|
2291
|
+
const pushQuery = (query, score) => {
|
|
2292
|
+
const normalized = normalizeLookupWhitespace(query);
|
|
2293
|
+
if (!normalized) {
|
|
2294
|
+
return;
|
|
2295
|
+
}
|
|
2296
|
+
const key = normalized.toLowerCase();
|
|
2297
|
+
if (seenQueries.has(key)) {
|
|
2298
|
+
return;
|
|
2299
|
+
}
|
|
2300
|
+
seenQueries.add(key);
|
|
2301
|
+
queryEntries.push({ query: normalized, score });
|
|
2302
|
+
};
|
|
2303
|
+
const { phrases, keywords } = await buildLinkedInProfileCompanyHints(contact, timeoutMs);
|
|
2304
|
+
const enrichedPhrases = new Set(phrases);
|
|
2305
|
+
const enrichedKeywords = new Set(keywords);
|
|
2306
|
+
const preferredPhrases = [];
|
|
2307
|
+
const normalizedEmail = normalizeLookupWhitespace(contact.email);
|
|
2308
|
+
const trustedEmailDomain = normalizedEmail && !isSyntheticLinkedInLookupEmail(normalizedEmail)
|
|
2309
|
+
? normalizedEmail.split("@")[1]?.replace(/^www\./i, "") ?? ""
|
|
2310
|
+
: "";
|
|
2311
|
+
const emailHost = trustedEmailDomain.split(".")[0] ?? "";
|
|
2312
|
+
const emailDomain = trustedEmailDomain;
|
|
2313
|
+
const linkedInHandle = normalizeLinkedInCompanyHandle(contact.linkedinCompanyUrl ?? "")?.replace(/[-_]+/g, " ") ?? "";
|
|
2314
|
+
if (contact.linkedinCompanyUrl?.trim()) {
|
|
2315
|
+
const companySignals = await fetchLinkedInCompanyPageSignals(contact.linkedinCompanyUrl.trim(), timeoutMs);
|
|
2316
|
+
for (const phrase of [
|
|
2317
|
+
...extractKeywordPhrases(companySignals?.title),
|
|
2318
|
+
...extractKeywordPhrases(companySignals?.description)
|
|
2319
|
+
]) {
|
|
2320
|
+
enrichedPhrases.add(phrase);
|
|
2321
|
+
preferredPhrases.push(phrase);
|
|
2322
|
+
const looseTokens = normalizeLooseMatchText(phrase)
|
|
2323
|
+
.split(/\s+/)
|
|
2324
|
+
.filter((token) => token.length >= 4)
|
|
2325
|
+
.filter((token) => ![
|
|
2326
|
+
"group",
|
|
2327
|
+
"holding",
|
|
2328
|
+
"services",
|
|
2329
|
+
"service",
|
|
2330
|
+
"consulting",
|
|
2331
|
+
"gmbh",
|
|
2332
|
+
"publishing",
|
|
2333
|
+
"company",
|
|
2334
|
+
"linkedin",
|
|
2335
|
+
"deutschland"
|
|
2336
|
+
].includes(token));
|
|
2337
|
+
for (const token of looseTokens.slice(0, 4)) {
|
|
2338
|
+
enrichedKeywords.add(token);
|
|
2339
|
+
}
|
|
2340
|
+
if (looseTokens.length > 1) {
|
|
2341
|
+
enrichedKeywords.add(looseTokens.slice(0, 2).join(" "));
|
|
2342
|
+
}
|
|
2343
|
+
}
|
|
2344
|
+
}
|
|
2345
|
+
const phrasePriority = (value) => {
|
|
2346
|
+
const loose = normalizeLooseMatchText(value);
|
|
2347
|
+
const tokenCount = loose.split(/\s+/).filter(Boolean).length;
|
|
2348
|
+
let score = 0;
|
|
2349
|
+
if (emailHost && loose.includes(normalizeLooseMatchText(emailHost)))
|
|
2350
|
+
score += 80;
|
|
2351
|
+
if (linkedInHandle && loose.includes(normalizeLooseMatchText(linkedInHandle)))
|
|
2352
|
+
score += 60;
|
|
2353
|
+
if (tokenCount >= 1 && tokenCount <= 4)
|
|
2354
|
+
score += 40;
|
|
2355
|
+
if (!/\b(gmbh|holding|services|service|consulting|kg|co)\b/i.test(value))
|
|
2356
|
+
score += 20;
|
|
2357
|
+
if (tokenCount > 7)
|
|
2358
|
+
score -= 40;
|
|
2359
|
+
return score;
|
|
2360
|
+
};
|
|
2361
|
+
const keywordPriority = (value) => {
|
|
2362
|
+
const loose = normalizeLooseMatchText(value);
|
|
2363
|
+
let score = 0;
|
|
2364
|
+
if (emailHost && loose.includes(normalizeLooseMatchText(emailHost)))
|
|
2365
|
+
score += 80;
|
|
2366
|
+
if (linkedInHandle && loose.includes(normalizeLooseMatchText(linkedInHandle)))
|
|
2367
|
+
score += 60;
|
|
2368
|
+
if (value.includes("."))
|
|
2369
|
+
score += 20;
|
|
2370
|
+
if (loose.split(/\s+/).filter(Boolean).length <= 2)
|
|
2371
|
+
score += 10;
|
|
2372
|
+
return score;
|
|
2373
|
+
};
|
|
2374
|
+
const rankedPhrases = [...enrichedPhrases].sort((left, right) => {
|
|
2375
|
+
const preferredDelta = Number(preferredPhrases.includes(right)) - Number(preferredPhrases.includes(left));
|
|
2376
|
+
if (preferredDelta !== 0) {
|
|
2377
|
+
return preferredDelta;
|
|
2378
|
+
}
|
|
2379
|
+
return phrasePriority(right) - phrasePriority(left);
|
|
2380
|
+
});
|
|
2381
|
+
const cleanPhrases = rankedPhrases.slice(0, 6);
|
|
2382
|
+
const fallbackKeywords = new Set(enrichedKeywords);
|
|
2383
|
+
for (const phrase of cleanPhrases) {
|
|
2384
|
+
const looseTokens = normalizeLooseMatchText(phrase)
|
|
2385
|
+
.split(/\s+/)
|
|
2386
|
+
.filter((token) => token.length >= 4)
|
|
2387
|
+
.filter((token) => ![
|
|
2388
|
+
"group",
|
|
2389
|
+
"holding",
|
|
2390
|
+
"services",
|
|
2391
|
+
"service",
|
|
2392
|
+
"consulting",
|
|
2393
|
+
"gmbh",
|
|
2394
|
+
"publishing",
|
|
2395
|
+
"company",
|
|
2396
|
+
"linkedin",
|
|
2397
|
+
"deutschland"
|
|
2398
|
+
].includes(token));
|
|
2399
|
+
for (const token of looseTokens.slice(0, 3)) {
|
|
2400
|
+
fallbackKeywords.add(token);
|
|
2401
|
+
}
|
|
2402
|
+
if (looseTokens.length > 1) {
|
|
2403
|
+
fallbackKeywords.add(looseTokens.slice(0, 2).join(" "));
|
|
2404
|
+
}
|
|
2405
|
+
}
|
|
2406
|
+
if (emailHost) {
|
|
2407
|
+
fallbackKeywords.add(emailHost);
|
|
2408
|
+
}
|
|
2409
|
+
if (emailDomain) {
|
|
2410
|
+
fallbackKeywords.add(emailDomain);
|
|
2411
|
+
}
|
|
2412
|
+
if (linkedInHandle) {
|
|
2413
|
+
fallbackKeywords.add(linkedInHandle);
|
|
2414
|
+
}
|
|
2415
|
+
const cleanKeywords = [...fallbackKeywords]
|
|
2416
|
+
.sort((left, right) => keywordPriority(right) - keywordPriority(left))
|
|
2417
|
+
.slice(0, 5);
|
|
2418
|
+
cleanKeywords.forEach((keyword, index) => {
|
|
2419
|
+
const keywordScore = 260 - index * 15;
|
|
2420
|
+
pushQuery(`site:linkedin.com/in "${fullName}" ${keyword} linkedin`, keywordScore);
|
|
2421
|
+
pushQuery(`site:linkedin.com/in ${fullName} ${keyword} linkedin`, keywordScore - 5);
|
|
2422
|
+
if (title) {
|
|
2423
|
+
pushQuery(`site:linkedin.com/in "${fullName}" ${keyword} "${title}"`, keywordScore - 10);
|
|
2424
|
+
}
|
|
2425
|
+
});
|
|
2426
|
+
cleanPhrases.forEach((companyName, index) => {
|
|
2427
|
+
const phraseScore = 180 - index * 10;
|
|
2428
|
+
pushQuery(`site:linkedin.com/in "${fullName}" "${companyName}"`, phraseScore);
|
|
2429
|
+
pushQuery(`site:linkedin.com/in ${fullName} ${companyName} linkedin`, phraseScore - 5);
|
|
2430
|
+
if (title) {
|
|
2431
|
+
pushQuery(`site:linkedin.com/in "${fullName}" "${companyName}" "${title}"`, phraseScore - 10);
|
|
2432
|
+
pushQuery(`site:linkedin.com/in ${fullName} ${companyName} ${title} linkedin`, phraseScore - 15);
|
|
2433
|
+
}
|
|
2434
|
+
});
|
|
2435
|
+
if (emailDomain) {
|
|
2436
|
+
pushQuery(`site:linkedin.com/in "${fullName}" "${emailDomain}" linkedin`, 240);
|
|
2437
|
+
}
|
|
2438
|
+
pushQuery(`site:linkedin.com/in "${fullName}" linkedin`, 50);
|
|
2439
|
+
if (title) {
|
|
2440
|
+
pushQuery(`site:linkedin.com/in "${fullName}" "${title}" linkedin`, 40);
|
|
2441
|
+
}
|
|
2442
|
+
return queryEntries
|
|
2443
|
+
.sort((left, right) => right.score - left.score)
|
|
2444
|
+
.map((entry) => entry.query);
|
|
2445
|
+
}
|
|
2446
|
+
function extractPublicLinkedInProfileSearchCandidates(bodyText) {
|
|
2447
|
+
const candidates = new Set();
|
|
2448
|
+
const directMatches = bodyText.match(/https:\/\/(?:(?:www|[a-z]{2})\.)?linkedin\.com\/in\/[^"'&<>\s)]+/gi) ?? [];
|
|
2449
|
+
for (const match of directMatches) {
|
|
2450
|
+
const normalized = normalizePublicLinkedInProfileUrl(match);
|
|
2451
|
+
if (normalized) {
|
|
2452
|
+
candidates.add(normalized);
|
|
2453
|
+
}
|
|
2454
|
+
}
|
|
2455
|
+
const encodedMatches = bodyText.match(/https?%3A%2F%2F(?:(?:www|[a-z]{2})\.)?linkedin\.com%2Fin%2F[^"'&<>\s)]+/gi) ?? [];
|
|
2456
|
+
for (const match of encodedMatches) {
|
|
2457
|
+
try {
|
|
2458
|
+
const decoded = decodeURIComponent(match);
|
|
2459
|
+
const normalized = normalizePublicLinkedInProfileUrl(decoded);
|
|
2460
|
+
if (normalized) {
|
|
2461
|
+
candidates.add(normalized);
|
|
2462
|
+
}
|
|
2463
|
+
}
|
|
2464
|
+
catch {
|
|
2465
|
+
// Ignore malformed encoded fragments.
|
|
2466
|
+
}
|
|
2467
|
+
}
|
|
2468
|
+
return Array.from(candidates);
|
|
2469
|
+
}
|
|
2470
|
+
function buildPublicLinkedInProfileSearchUrl(query) {
|
|
2471
|
+
const baseUrl = process.env.SALESPROMPTER_LINKEDIN_PROFILE_SEARCH_BASE_URL?.trim() ||
|
|
2472
|
+
"https://duckduckgo.com/html/";
|
|
2473
|
+
const url = new URL(baseUrl);
|
|
2474
|
+
url.searchParams.set("q", query);
|
|
2475
|
+
return url.toString();
|
|
2476
|
+
}
|
|
2477
|
+
async function fetchSerperSearchResults(query, num, timeoutMs) {
|
|
2478
|
+
if (serperCreditsExhausted) {
|
|
2479
|
+
return null;
|
|
2480
|
+
}
|
|
2481
|
+
const apiKey = getSerperApiKey();
|
|
2482
|
+
if (!apiKey) {
|
|
2483
|
+
return null;
|
|
2484
|
+
}
|
|
2485
|
+
const cacheKey = `${query}::${num}`;
|
|
2486
|
+
if (serperSearchCache.has(cacheKey)) {
|
|
2487
|
+
return serperSearchCache.get(cacheKey) ?? null;
|
|
2488
|
+
}
|
|
2489
|
+
const controller = new AbortController();
|
|
2490
|
+
const timeout = setTimeout(() => controller.abort(), Math.min(timeoutMs, 12_000));
|
|
2491
|
+
try {
|
|
2492
|
+
const response = await fetch(getSerperSearchEndpoint(), {
|
|
2493
|
+
method: "POST",
|
|
2494
|
+
signal: controller.signal,
|
|
2495
|
+
headers: {
|
|
2496
|
+
"X-API-KEY": apiKey,
|
|
2497
|
+
"Content-Type": "application/json"
|
|
2498
|
+
},
|
|
2499
|
+
body: JSON.stringify({ q: query, num })
|
|
2500
|
+
});
|
|
2501
|
+
if (!response.ok) {
|
|
2502
|
+
const bodyText = await response.text().catch(() => "");
|
|
2503
|
+
if (response.status === 400 &&
|
|
2504
|
+
/not enough credits/i.test(bodyText)) {
|
|
2505
|
+
serperCreditsExhausted = true;
|
|
2506
|
+
}
|
|
2507
|
+
serperSearchCache.set(cacheKey, null);
|
|
2508
|
+
return null;
|
|
2509
|
+
}
|
|
2510
|
+
const parsed = await response.json();
|
|
2511
|
+
serperSearchCache.set(cacheKey, parsed);
|
|
2512
|
+
return parsed;
|
|
2513
|
+
}
|
|
2514
|
+
catch {
|
|
2515
|
+
return null;
|
|
2516
|
+
}
|
|
2517
|
+
finally {
|
|
2518
|
+
clearTimeout(timeout);
|
|
2519
|
+
}
|
|
2520
|
+
}
|
|
2521
|
+
function extractSerperLinkedInProfileCandidates(payload) {
|
|
2522
|
+
if (!payload || typeof payload !== "object") {
|
|
2523
|
+
return [];
|
|
2524
|
+
}
|
|
2525
|
+
const organic = "organic" in payload && Array.isArray(payload.organic)
|
|
2526
|
+
? (payload.organic ?? [])
|
|
2527
|
+
: [];
|
|
2528
|
+
const seen = new Set();
|
|
2529
|
+
const candidates = [];
|
|
2530
|
+
for (const result of organic) {
|
|
2531
|
+
if (!result || typeof result !== "object")
|
|
2532
|
+
continue;
|
|
2533
|
+
const link = "link" in result && typeof result.link === "string"
|
|
2534
|
+
? result.link
|
|
2535
|
+
: "";
|
|
2536
|
+
const normalized = normalizePublicLinkedInProfileUrl(link);
|
|
2537
|
+
if (normalized) {
|
|
2538
|
+
const canonical = normalized.replace(/\/$/, "");
|
|
2539
|
+
if (!seen.has(canonical)) {
|
|
2540
|
+
seen.add(canonical);
|
|
2541
|
+
candidates.push({
|
|
2542
|
+
url: canonical,
|
|
2543
|
+
title: "title" in result && typeof result.title === "string"
|
|
2544
|
+
? normalizeLookupWhitespace(result.title)
|
|
2545
|
+
: "",
|
|
2546
|
+
snippet: "snippet" in result && typeof result.snippet === "string"
|
|
2547
|
+
? normalizeLookupWhitespace(result.snippet)
|
|
2548
|
+
: ""
|
|
2549
|
+
});
|
|
2550
|
+
}
|
|
2551
|
+
}
|
|
2552
|
+
}
|
|
2553
|
+
return candidates;
|
|
2554
|
+
}
|
|
2555
|
+
async function fetchLinkedInProfilePageSignals(url, timeoutMs) {
|
|
2556
|
+
const cacheKey = normalizePublicLinkedInProfileUrl(url)?.replace(/\/$/, "") ?? url.replace(/\/$/, "");
|
|
2557
|
+
if (linkedInProfilePageSignalCache.has(cacheKey)) {
|
|
2558
|
+
return linkedInProfilePageSignalCache.get(cacheKey) ?? null;
|
|
2559
|
+
}
|
|
2560
|
+
const controller = new AbortController();
|
|
2561
|
+
const timeout = setTimeout(() => controller.abort(), Math.min(timeoutMs, 12_000));
|
|
2562
|
+
try {
|
|
2563
|
+
const targetUrl = rewriteLinkedInUrlForConfiguredBase(url);
|
|
2564
|
+
const response = await fetch(targetUrl, {
|
|
2565
|
+
method: "GET",
|
|
2566
|
+
signal: controller.signal,
|
|
2567
|
+
headers: {
|
|
2568
|
+
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"
|
|
2569
|
+
}
|
|
2570
|
+
});
|
|
2571
|
+
const html = await response.text();
|
|
2572
|
+
const finalUrl = normalizePublicLinkedInProfileUrl(url) ||
|
|
2573
|
+
normalizePublicLinkedInProfileUrl(response.url || url);
|
|
2574
|
+
if (!finalUrl) {
|
|
2575
|
+
return null;
|
|
2576
|
+
}
|
|
2577
|
+
const title = decodeHtmlEntities(html.match(/<title[^>]*>([^<]+)/i)?.[1] || "") ||
|
|
2578
|
+
decodeHtmlEntities(html.match(/<meta[^>]+property="og:title"[^>]+content="([^"]+)/i)?.[1] || "");
|
|
2579
|
+
const description = decodeHtmlEntities(html.match(/<meta[^>]+name="description"[^>]+content="([^"]+)/i)?.[1] || "");
|
|
2580
|
+
const bodyText = decodeHtmlEntities(html.replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ").replace(/<[^>]+>/g, " "));
|
|
2581
|
+
const unavailableText = normalizeLooseMatchText(`${title} ${description} ${bodyText}`);
|
|
2582
|
+
const unavailable = response.status >= 400 ||
|
|
2583
|
+
unavailableText.includes("page not found") ||
|
|
2584
|
+
unavailableText.includes("profile not found") ||
|
|
2585
|
+
unavailableText.includes("member profile") && unavailableText.includes("not available");
|
|
2586
|
+
const result = {
|
|
2587
|
+
normalizedUrl: finalUrl.replace(/\/$/, ""),
|
|
2588
|
+
title: normalizeLookupWhitespace(title),
|
|
2589
|
+
description: normalizeLookupWhitespace(description),
|
|
2590
|
+
bodyText: normalizeLookupWhitespace(bodyText),
|
|
2591
|
+
unavailable
|
|
2592
|
+
};
|
|
2593
|
+
linkedInProfilePageSignalCache.set(cacheKey, result);
|
|
2594
|
+
return result;
|
|
2595
|
+
}
|
|
2596
|
+
catch {
|
|
2597
|
+
linkedInProfilePageSignalCache.set(cacheKey, null);
|
|
2598
|
+
return null;
|
|
2599
|
+
}
|
|
2600
|
+
finally {
|
|
2601
|
+
clearTimeout(timeout);
|
|
2602
|
+
}
|
|
2603
|
+
}
|
|
2604
|
+
function scoreLinkedInProfilePageSignals(contact, signals) {
|
|
2605
|
+
const fullName = normalizeLooseMatchText(`${contact.firstName} ${contact.lastName}`);
|
|
2606
|
+
const companyHints = [
|
|
2607
|
+
normalizeLooseMatchText(contact.companyNameOriginal ?? contact.companyName),
|
|
2608
|
+
normalizeLooseMatchText(aggressivelyCleanLookupCompanyName(contact.companyNameOriginal ?? contact.companyName))
|
|
2609
|
+
].filter(Boolean);
|
|
2610
|
+
const titleHint = normalizeLooseMatchText(contact.jobTitle);
|
|
2611
|
+
const haystack = normalizeLooseMatchText(`${signals.title} ${signals.description} ${signals.bodyText}`);
|
|
2612
|
+
let score = 0;
|
|
2613
|
+
if (fullName && haystack.includes(fullName))
|
|
2614
|
+
score += 120;
|
|
2615
|
+
for (const hint of companyHints) {
|
|
2616
|
+
if (hint && haystack.includes(hint))
|
|
2617
|
+
score += 30;
|
|
2618
|
+
}
|
|
2619
|
+
if (titleHint) {
|
|
2620
|
+
const titleWords = titleHint.split(/\s+/).filter((token) => token.length >= 4).slice(0, 4);
|
|
2621
|
+
score += titleWords.filter((token) => haystack.includes(token)).length * 8;
|
|
2622
|
+
}
|
|
2623
|
+
const slug = signals.normalizedUrl.split("/in/")[1]?.replace(/\/$/, "") ?? "";
|
|
2624
|
+
const slugText = normalizeLooseMatchText(slug.replace(/[-_]+/g, " "));
|
|
2625
|
+
if (fullName && slugText.includes(contact.firstName.toLowerCase()) && slugText.includes(contact.lastName.toLowerCase())) {
|
|
2626
|
+
score += 40;
|
|
2627
|
+
}
|
|
2628
|
+
return score;
|
|
2629
|
+
}
|
|
2630
|
+
function analyzeSerperLinkedInProfileCandidate(contact, candidate) {
|
|
2631
|
+
const fullName = normalizeLooseMatchText(`${contact.firstName} ${contact.lastName}`);
|
|
2632
|
+
const titleHint = normalizeLooseMatchText(contact.jobTitle);
|
|
2633
|
+
const companyTokens = [
|
|
2634
|
+
normalizeLooseMatchText(contact.companyNameOriginal ?? contact.companyName),
|
|
2635
|
+
normalizeLooseMatchText(aggressivelyCleanLookupCompanyName(contact.companyNameOriginal ?? contact.companyName)),
|
|
2636
|
+
normalizeLooseMatchText(normalizeLinkedInCompanyHandle(contact.linkedinCompanyUrl ?? "")?.replace(/[-_]+/g, " ") ?? ""),
|
|
2637
|
+
normalizeLooseMatchText((() => {
|
|
2638
|
+
const normalizedEmail = normalizeLookupWhitespace(contact.email);
|
|
2639
|
+
if (!normalizedEmail || isSyntheticLinkedInLookupEmail(normalizedEmail)) {
|
|
2640
|
+
return "";
|
|
2641
|
+
}
|
|
2642
|
+
return normalizedEmail.split("@")[1]?.replace(/^www\./i, "").split(".")[0] ?? "";
|
|
2643
|
+
})())
|
|
2644
|
+
].filter(Boolean);
|
|
2645
|
+
const haystack = normalizeLooseMatchText(`${candidate.title} ${candidate.snippet}`);
|
|
2646
|
+
let score = 0;
|
|
2647
|
+
let companyMatches = 0;
|
|
2648
|
+
let titleMatches = 0;
|
|
2649
|
+
if (fullName && haystack.includes(fullName))
|
|
2650
|
+
score += 120;
|
|
2651
|
+
for (const token of companyTokens) {
|
|
2652
|
+
if (!token)
|
|
2653
|
+
continue;
|
|
2654
|
+
if (haystack.includes(token)) {
|
|
2655
|
+
companyMatches += 1;
|
|
2656
|
+
score += token.split(/\s+/).length <= 2 ? 30 : 20;
|
|
2657
|
+
}
|
|
2658
|
+
}
|
|
2659
|
+
if (titleHint) {
|
|
2660
|
+
const titleWords = titleHint.split(/\s+/).filter((token) => token.length >= 4).slice(0, 4);
|
|
2661
|
+
titleMatches = titleWords.filter((token) => haystack.includes(token)).length;
|
|
2662
|
+
score += titleMatches * 8;
|
|
2663
|
+
}
|
|
2664
|
+
const slugText = normalizeLooseMatchText(candidate.url.split("/in/")[1]?.replace(/\/$/, "").replace(/[-_]+/g, " ") ?? "");
|
|
2665
|
+
if (fullName &&
|
|
2666
|
+
slugText.includes(contact.firstName.toLowerCase()) &&
|
|
2667
|
+
slugText.includes(contact.lastName.toLowerCase()) &&
|
|
2668
|
+
(companyMatches > 0 || titleMatches > 0)) {
|
|
2669
|
+
score += 40;
|
|
2670
|
+
}
|
|
2671
|
+
return { score, companyMatches, titleMatches };
|
|
2672
|
+
}
|
|
2673
|
+
async function searchSerperLinkedInProfileUrl(contact, timeoutMs, options) {
|
|
2674
|
+
if (!contact.firstName || !contact.lastName) {
|
|
2675
|
+
return null;
|
|
2676
|
+
}
|
|
2677
|
+
const maxQueries = options?.maxQueries && Number.isFinite(options.maxQueries) && options.maxQueries > 0
|
|
2678
|
+
? Math.trunc(options.maxQueries)
|
|
2679
|
+
: Number.POSITIVE_INFINITY;
|
|
2680
|
+
for (const query of (await buildSerperLinkedInProfileQueries(contact, timeoutMs)).slice(0, maxQueries)) {
|
|
2681
|
+
try {
|
|
2682
|
+
const parsed = await fetchSerperSearchResults(query, 5, timeoutMs);
|
|
2683
|
+
if (!parsed) {
|
|
2684
|
+
continue;
|
|
2685
|
+
}
|
|
2686
|
+
const candidates = extractSerperLinkedInProfileCandidates(parsed);
|
|
2687
|
+
let bestUrl = null;
|
|
2688
|
+
let bestScore = 0;
|
|
2689
|
+
for (const candidate of candidates) {
|
|
2690
|
+
const serperAnalysis = analyzeSerperLinkedInProfileCandidate(contact, candidate);
|
|
2691
|
+
const serperScore = serperAnalysis.score;
|
|
2692
|
+
if (serperScore >= 150 && (serperAnalysis.companyMatches > 0 || serperAnalysis.titleMatches > 0)) {
|
|
2693
|
+
return candidate.url;
|
|
2694
|
+
}
|
|
2695
|
+
const signals = await fetchLinkedInProfilePageSignals(candidate.url, timeoutMs);
|
|
2696
|
+
if (!signals || signals.unavailable) {
|
|
2697
|
+
if (serperScore > bestScore) {
|
|
2698
|
+
bestScore = serperScore;
|
|
2699
|
+
bestUrl = candidate.url;
|
|
2700
|
+
}
|
|
2701
|
+
continue;
|
|
2702
|
+
}
|
|
2703
|
+
const score = Math.max(serperScore, scoreLinkedInProfilePageSignals(contact, signals));
|
|
2704
|
+
if (score > bestScore) {
|
|
2705
|
+
bestScore = score;
|
|
2706
|
+
bestUrl = signals.normalizedUrl;
|
|
2707
|
+
}
|
|
2708
|
+
}
|
|
2709
|
+
if (bestUrl && bestScore >= 130) {
|
|
2710
|
+
return bestUrl;
|
|
2711
|
+
}
|
|
2712
|
+
}
|
|
2713
|
+
catch {
|
|
2714
|
+
// Continue with the next query variant.
|
|
2715
|
+
}
|
|
2716
|
+
}
|
|
2717
|
+
return searchPublicLinkedInProfileUrl(contact, timeoutMs, {
|
|
2718
|
+
maxQueries: Math.min(Number.isFinite(maxQueries) ? maxQueries : 4, 4)
|
|
2719
|
+
});
|
|
2720
|
+
}
|
|
2721
|
+
function decodeHtmlEntities(value) {
|
|
2722
|
+
return value
|
|
2723
|
+
.replace(/&/gi, "&")
|
|
2724
|
+
.replace(/"/gi, '"')
|
|
2725
|
+
.replace(/'/gi, "'")
|
|
2726
|
+
.replace(/</gi, "<")
|
|
2727
|
+
.replace(/>/gi, ">");
|
|
2728
|
+
}
|
|
2729
|
+
async function fetchLinkedInCompanyPageSignals(url, timeoutMs) {
|
|
2730
|
+
const cacheKey = url.replace(/\/$/, "");
|
|
2731
|
+
if (linkedInCompanyPageSignalCache.has(cacheKey)) {
|
|
2732
|
+
return linkedInCompanyPageSignalCache.get(cacheKey) ?? null;
|
|
2733
|
+
}
|
|
2734
|
+
const controller = new AbortController();
|
|
2735
|
+
const timeout = setTimeout(() => controller.abort(), Math.min(timeoutMs, 12_000));
|
|
2736
|
+
try {
|
|
2737
|
+
const response = await fetch(url, {
|
|
2738
|
+
method: "GET",
|
|
2739
|
+
signal: controller.signal,
|
|
2740
|
+
headers: {
|
|
2741
|
+
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"
|
|
2742
|
+
}
|
|
2743
|
+
});
|
|
2744
|
+
const html = await response.text();
|
|
2745
|
+
const finalUrl = response.url || url;
|
|
2746
|
+
const title = decodeHtmlEntities(html.match(/<title[^>]*>([^<]+)/i)?.[1] || "") ||
|
|
2747
|
+
decodeHtmlEntities(html.match(/<meta[^>]+property="og:title"[^>]+content="([^"]+)/i)?.[1] || "");
|
|
2748
|
+
const description = decodeHtmlEntities(html.match(/<meta[^>]+name="description"[^>]+content="([^"]+)/i)?.[1] || "");
|
|
2749
|
+
const bodyText = decodeHtmlEntities(html.replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ").replace(/<[^>]+>/g, " "));
|
|
2750
|
+
const unavailableText = normalizeLooseMatchText(`${title} ${description} ${bodyText}`);
|
|
2751
|
+
const unavailable = response.status >= 400 ||
|
|
2752
|
+
unavailableText.includes("page not found") ||
|
|
2753
|
+
unavailableText.includes("this page does not exist") ||
|
|
2754
|
+
unavailableText.includes("page isnt available");
|
|
2755
|
+
const result = {
|
|
2756
|
+
normalizedUrl: normalizeLinkedInCompanyHandle(finalUrl ?? "") || normalizeLinkedInCompanyHandle(url)
|
|
2757
|
+
? normalizeLinkedInCompanyPage(normalizeLinkedInCompanyHandle(finalUrl ?? "") ?? normalizeLinkedInCompanyHandle(url) ?? "")
|
|
2758
|
+
: finalUrl,
|
|
1613
2759
|
title: normalizeLookupWhitespace(title),
|
|
1614
2760
|
description: normalizeLookupWhitespace(description),
|
|
1615
2761
|
bodyText: normalizeLookupWhitespace(bodyText),
|
|
1616
2762
|
unavailable
|
|
1617
2763
|
};
|
|
2764
|
+
linkedInCompanyPageSignalCache.set(cacheKey, result);
|
|
2765
|
+
return result;
|
|
1618
2766
|
}
|
|
1619
2767
|
catch {
|
|
2768
|
+
linkedInCompanyPageSignalCache.set(cacheKey, null);
|
|
1620
2769
|
return null;
|
|
1621
2770
|
}
|
|
1622
2771
|
finally {
|
|
1623
2772
|
clearTimeout(timeout);
|
|
1624
2773
|
}
|
|
1625
2774
|
}
|
|
2775
|
+
async function searchPublicLinkedInProfileUrl(contact, timeoutMs, options) {
|
|
2776
|
+
const maxQueries = options?.maxQueries && Number.isFinite(options.maxQueries) && options.maxQueries > 0
|
|
2777
|
+
? Math.trunc(options.maxQueries)
|
|
2778
|
+
: 4;
|
|
2779
|
+
const queries = (await buildSerperLinkedInProfileQueries(contact, timeoutMs)).slice(0, maxQueries);
|
|
2780
|
+
for (const query of queries) {
|
|
2781
|
+
const controller = new AbortController();
|
|
2782
|
+
const timeout = setTimeout(() => controller.abort(), Math.min(timeoutMs, 12_000));
|
|
2783
|
+
try {
|
|
2784
|
+
const response = await fetch(buildPublicLinkedInProfileSearchUrl(query), {
|
|
2785
|
+
method: "GET",
|
|
2786
|
+
signal: controller.signal,
|
|
2787
|
+
headers: {
|
|
2788
|
+
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"
|
|
2789
|
+
}
|
|
2790
|
+
});
|
|
2791
|
+
if (!response.ok) {
|
|
2792
|
+
continue;
|
|
2793
|
+
}
|
|
2794
|
+
const bodyText = await response.text();
|
|
2795
|
+
const candidates = extractPublicLinkedInProfileSearchCandidates(bodyText);
|
|
2796
|
+
let bestUrl = null;
|
|
2797
|
+
let bestScore = 0;
|
|
2798
|
+
for (const candidateUrl of candidates.slice(0, 5)) {
|
|
2799
|
+
const signals = await fetchLinkedInProfilePageSignals(candidateUrl, timeoutMs);
|
|
2800
|
+
if (!signals || signals.unavailable) {
|
|
2801
|
+
continue;
|
|
2802
|
+
}
|
|
2803
|
+
const score = scoreLinkedInProfilePageSignals(contact, signals);
|
|
2804
|
+
if (score > bestScore) {
|
|
2805
|
+
bestScore = score;
|
|
2806
|
+
bestUrl = signals.normalizedUrl;
|
|
2807
|
+
}
|
|
2808
|
+
}
|
|
2809
|
+
if (bestUrl && bestScore >= 130) {
|
|
2810
|
+
return bestUrl;
|
|
2811
|
+
}
|
|
2812
|
+
}
|
|
2813
|
+
catch {
|
|
2814
|
+
// Continue with the next query variant.
|
|
2815
|
+
}
|
|
2816
|
+
finally {
|
|
2817
|
+
clearTimeout(timeout);
|
|
2818
|
+
}
|
|
2819
|
+
}
|
|
2820
|
+
return null;
|
|
2821
|
+
}
|
|
1626
2822
|
function scoreLinkedInCompanyPageSignals(companyName, signals) {
|
|
1627
2823
|
const inputTokens = normalizeLooseMatchText(companyName).split(/\s+/).filter((token) => token.length >= 4);
|
|
1628
2824
|
const haystack = normalizeLooseMatchText(`${signals.title} ${signals.description}`);
|
|
@@ -1637,6 +2833,20 @@ function scoreLinkedInCompanyPageSignals(companyName, signals) {
|
|
|
1637
2833
|
}
|
|
1638
2834
|
return score;
|
|
1639
2835
|
}
|
|
2836
|
+
function scoreSerperLinkedInCompanyCandidate(companyName, candidate) {
|
|
2837
|
+
const inputTokens = normalizeLooseMatchText(companyName).split(/\s+/).filter((token) => token.length >= 4);
|
|
2838
|
+
const haystack = normalizeLooseMatchText(`${candidate.title} ${candidate.snippet}`);
|
|
2839
|
+
let score = scoreLinkedInCompanyUrlCandidate(companyName, candidate.url);
|
|
2840
|
+
for (const token of inputTokens) {
|
|
2841
|
+
if (haystack.includes(token)) {
|
|
2842
|
+
score += 12;
|
|
2843
|
+
}
|
|
2844
|
+
}
|
|
2845
|
+
if (haystack.includes(normalizeLooseMatchText(aggressivelyCleanLookupCompanyName(companyName)))) {
|
|
2846
|
+
score += 40;
|
|
2847
|
+
}
|
|
2848
|
+
return score;
|
|
2849
|
+
}
|
|
1640
2850
|
function scoreLinkedInCompanyUrlCandidate(companyName, url) {
|
|
1641
2851
|
const handle = normalizeLinkedInCompanyHandle(url);
|
|
1642
2852
|
if (!handle || /^\d+$/.test(handle)) {
|
|
@@ -1730,9 +2940,15 @@ async function searchSerperLinkedInCompanyUrl(companyName, timeoutMs) {
|
|
|
1730
2940
|
const parsed = (await response.json());
|
|
1731
2941
|
const candidates = extractSerperLinkedInCompanyCandidates(parsed);
|
|
1732
2942
|
const ranked = candidates
|
|
1733
|
-
.map((
|
|
2943
|
+
.map((candidate) => ({
|
|
2944
|
+
...candidate,
|
|
2945
|
+
score: scoreSerperLinkedInCompanyCandidate(companyName, candidate)
|
|
2946
|
+
}))
|
|
1734
2947
|
.filter((candidate) => candidate.score > 0)
|
|
1735
2948
|
.sort((left, right) => right.score - left.score);
|
|
2949
|
+
if (ranked[0] && ranked[0].score >= 80) {
|
|
2950
|
+
return ranked[0].url;
|
|
2951
|
+
}
|
|
1736
2952
|
let anySignalsFetched = false;
|
|
1737
2953
|
let bestValidated = null;
|
|
1738
2954
|
for (const candidate of ranked.slice(0, 3)) {
|
|
@@ -1772,6 +2988,11 @@ async function searchSerperLinkedInCompanyUrl(companyName, timeoutMs) {
|
|
|
1772
2988
|
}
|
|
1773
2989
|
async function invokeLinkedInUrlEnrichmentDirect(params) {
|
|
1774
2990
|
const config = await readLinkedInDirectLookupConfig();
|
|
2991
|
+
const companyContexts = await resolveDirectLinkedInCompanyContexts({
|
|
2992
|
+
contacts: params.contacts.filter((contact) => !contact.isVariation),
|
|
2993
|
+
timeoutMs: params.timeoutMs,
|
|
2994
|
+
config
|
|
2995
|
+
});
|
|
1775
2996
|
const groupedContacts = new Map();
|
|
1776
2997
|
for (const contact of params.contacts) {
|
|
1777
2998
|
const key = contact.email?.trim().toLowerCase() || `contact:${contact.contact_id}`;
|
|
@@ -1780,15 +3001,25 @@ async function invokeLinkedInUrlEnrichmentDirect(params) {
|
|
|
1780
3001
|
groupedContacts.set(key, existing);
|
|
1781
3002
|
}
|
|
1782
3003
|
const results = [];
|
|
1783
|
-
|
|
3004
|
+
const perAttemptTimeoutMs = params.perAttemptTimeoutMs && Number.isFinite(params.perAttemptTimeoutMs) && params.perAttemptTimeoutMs > 0
|
|
3005
|
+
? Math.trunc(params.perAttemptTimeoutMs)
|
|
3006
|
+
: Math.min(params.timeoutMs, 8_000);
|
|
3007
|
+
const perContactBudgetMs = params.perContactBudgetMs && Number.isFinite(params.perContactBudgetMs) && params.perContactBudgetMs > 0
|
|
3008
|
+
? Math.trunc(params.perContactBudgetMs)
|
|
3009
|
+
: Math.min(params.timeoutMs, 15_000);
|
|
3010
|
+
const rateLimitCooldownMs = Math.max(750, Math.min(3_000, Math.trunc(perAttemptTimeoutMs / 2)));
|
|
3011
|
+
const maxRateLimitCooldowns = 4;
|
|
3012
|
+
let rateLimitCooldownUntil = 0;
|
|
3013
|
+
let consecutiveRateLimitCount = 0;
|
|
3014
|
+
let totalRateLimitCooldowns = 0;
|
|
1784
3015
|
for (const variations of groupedContacts.values()) {
|
|
1785
3016
|
const primary = variations.find((contact) => !contact.isVariation) ?? variations[0];
|
|
1786
3017
|
const blankPerson = !primary?.firstName.trim() || !primary?.lastName.trim();
|
|
1787
|
-
if (
|
|
3018
|
+
if (totalRateLimitCooldowns >= maxRateLimitCooldowns) {
|
|
1788
3019
|
results.push({
|
|
1789
3020
|
contact_id: primary.contact_id,
|
|
1790
3021
|
linkedin_url: null,
|
|
1791
|
-
error: "LinkedIn rate limit"
|
|
3022
|
+
error: "LinkedIn rate limit budget exhausted"
|
|
1792
3023
|
});
|
|
1793
3024
|
continue;
|
|
1794
3025
|
}
|
|
@@ -1802,11 +3033,23 @@ async function invokeLinkedInUrlEnrichmentDirect(params) {
|
|
|
1802
3033
|
}
|
|
1803
3034
|
let matchedUrl = null;
|
|
1804
3035
|
let matchedSalesNavUrl = null;
|
|
3036
|
+
let matchedFullName = null;
|
|
3037
|
+
let matchedCompanyName = null;
|
|
3038
|
+
let matchedTitle = null;
|
|
1805
3039
|
let lastError = null;
|
|
3040
|
+
const contactDeadline = Date.now() + perContactBudgetMs;
|
|
3041
|
+
const companyContext = companyContexts.get(buildDirectCompanyContextKey(primary));
|
|
1806
3042
|
for (const candidate of variations) {
|
|
1807
|
-
for (const searchVariant of buildLinkedInLookupSearchVariants(candidate)) {
|
|
3043
|
+
for (const searchVariant of await buildLinkedInLookupSearchVariants(candidate, params.timeoutMs, companyContext?.aliases ?? [])) {
|
|
3044
|
+
if (Date.now() < rateLimitCooldownUntil) {
|
|
3045
|
+
await new Promise((resolve) => setTimeout(resolve, rateLimitCooldownUntil - Date.now()));
|
|
3046
|
+
}
|
|
3047
|
+
if (Date.now() >= contactDeadline) {
|
|
3048
|
+
lastError = lastError || "Direct lookup budget exhausted";
|
|
3049
|
+
break;
|
|
3050
|
+
}
|
|
1808
3051
|
const controller = new AbortController();
|
|
1809
|
-
const timeout = setTimeout(controller.abort.bind(controller), Math.min(
|
|
3052
|
+
const timeout = setTimeout(controller.abort.bind(controller), Math.min(perAttemptTimeoutMs, Math.max(1_000, contactDeadline - Date.now())));
|
|
1810
3053
|
try {
|
|
1811
3054
|
const response = await fetch(buildLinkedInSalesApiUrl(searchVariant), {
|
|
1812
3055
|
method: "GET",
|
|
@@ -1827,20 +3070,51 @@ async function invokeLinkedInUrlEnrichmentDirect(params) {
|
|
|
1827
3070
|
}
|
|
1828
3071
|
});
|
|
1829
3072
|
if (response.status === 429) {
|
|
1830
|
-
rateLimited = true;
|
|
1831
3073
|
lastError = "LinkedIn rate limit";
|
|
3074
|
+
consecutiveRateLimitCount += 1;
|
|
3075
|
+
totalRateLimitCooldowns += 1;
|
|
3076
|
+
rateLimitCooldownUntil =
|
|
3077
|
+
Date.now() + Math.min(15_000, rateLimitCooldownMs * Math.max(1, consecutiveRateLimitCount));
|
|
3078
|
+
if (totalRateLimitCooldowns >= maxRateLimitCooldowns) {
|
|
3079
|
+
break;
|
|
3080
|
+
}
|
|
1832
3081
|
break;
|
|
1833
3082
|
}
|
|
1834
3083
|
if (!response.ok) {
|
|
1835
3084
|
lastError = `LinkedIn returned ${response.status}`;
|
|
1836
3085
|
continue;
|
|
1837
3086
|
}
|
|
3087
|
+
consecutiveRateLimitCount = 0;
|
|
3088
|
+
rateLimitCooldownUntil = 0;
|
|
1838
3089
|
const data = (await response.json());
|
|
1839
3090
|
const profilesFound = data.paging?.total ?? 0;
|
|
1840
3091
|
if (profilesFound > 0) {
|
|
1841
|
-
const
|
|
1842
|
-
|
|
1843
|
-
|
|
3092
|
+
const bestCandidate = (data.elements ?? [])
|
|
3093
|
+
.map((element) => ({
|
|
3094
|
+
element,
|
|
3095
|
+
...scoreLinkedInSalesApiElementMatch(candidate, element)
|
|
3096
|
+
}))
|
|
3097
|
+
.sort((left, right) => right.score - left.score)[0];
|
|
3098
|
+
const hasTrustedCompanyContext = Boolean(candidate.linkedinCompanyUrl ||
|
|
3099
|
+
companyContext?.linkedinCompanyUrl ||
|
|
3100
|
+
companyContext?.matchedCompanyName);
|
|
3101
|
+
const hasTrustedEmailContext = Boolean(candidate.email && !isSyntheticLinkedInLookupEmail(candidate.email));
|
|
3102
|
+
const acceptBestCandidate = Boolean(bestCandidate &&
|
|
3103
|
+
(bestCandidate.score >= 140 ||
|
|
3104
|
+
(bestCandidate.exactNameMatch &&
|
|
3105
|
+
(bestCandidate.companyMatchCount > 0 || hasTrustedCompanyContext || hasTrustedEmailContext))));
|
|
3106
|
+
if (bestCandidate && acceptBestCandidate) {
|
|
3107
|
+
matchedUrl = extractLinkedInProfileUrlFromSalesApiElement(bestCandidate.element) ?? null;
|
|
3108
|
+
matchedSalesNavUrl = extractLinkedInSalesNavLeadUrlFromSalesApiElement(bestCandidate.element) ?? null;
|
|
3109
|
+
matchedFullName = bestCandidate.fullName;
|
|
3110
|
+
matchedCompanyName = bestCandidate.companyName;
|
|
3111
|
+
matchedTitle = bestCandidate.title;
|
|
3112
|
+
}
|
|
3113
|
+
else {
|
|
3114
|
+
lastError = bestCandidate
|
|
3115
|
+
? `LinkedIn top result score too low (${bestCandidate.score})`
|
|
3116
|
+
: "LinkedIn returned no usable results";
|
|
3117
|
+
}
|
|
1844
3118
|
if (matchedUrl || matchedSalesNavUrl) {
|
|
1845
3119
|
break;
|
|
1846
3120
|
}
|
|
@@ -1852,11 +3126,14 @@ async function invokeLinkedInUrlEnrichmentDirect(params) {
|
|
|
1852
3126
|
finally {
|
|
1853
3127
|
clearTimeout(timeout);
|
|
1854
3128
|
}
|
|
1855
|
-
if (matchedUrl || matchedSalesNavUrl ||
|
|
3129
|
+
if (matchedUrl || matchedSalesNavUrl || totalRateLimitCooldowns >= maxRateLimitCooldowns) {
|
|
1856
3130
|
break;
|
|
1857
3131
|
}
|
|
1858
3132
|
}
|
|
1859
|
-
if (matchedUrl || matchedSalesNavUrl ||
|
|
3133
|
+
if (matchedUrl || matchedSalesNavUrl || totalRateLimitCooldowns >= maxRateLimitCooldowns) {
|
|
3134
|
+
break;
|
|
3135
|
+
}
|
|
3136
|
+
if (Date.now() >= contactDeadline) {
|
|
1860
3137
|
break;
|
|
1861
3138
|
}
|
|
1862
3139
|
}
|
|
@@ -1864,16 +3141,21 @@ async function invokeLinkedInUrlEnrichmentDirect(params) {
|
|
|
1864
3141
|
contact_id: primary.contact_id,
|
|
1865
3142
|
linkedin_url: matchedUrl ?? matchedSalesNavUrl,
|
|
1866
3143
|
sales_nav_profile_url: matchedSalesNavUrl,
|
|
3144
|
+
matched_full_name: matchedFullName,
|
|
3145
|
+
matched_company_name: matchedCompanyName,
|
|
3146
|
+
matched_title: matchedTitle,
|
|
1867
3147
|
error: matchedUrl || matchedSalesNavUrl ? null : lastError
|
|
1868
3148
|
});
|
|
1869
3149
|
}
|
|
1870
3150
|
return {
|
|
1871
3151
|
success: true,
|
|
1872
|
-
contacts: results
|
|
3152
|
+
contacts: results,
|
|
3153
|
+
companyContexts: Array.from(companyContexts.values())
|
|
1873
3154
|
};
|
|
1874
3155
|
}
|
|
1875
3156
|
async function invokeLinkedInCompanyEnrichmentDirect(params) {
|
|
1876
3157
|
const config = await readLinkedInDirectLookupConfig();
|
|
3158
|
+
const precomputedContextByKey = new Map((params.precomputedContexts ?? []).map((context) => [context.normalizedCompanyKey, context]));
|
|
1877
3159
|
const primaryContacts = new Map();
|
|
1878
3160
|
for (const contact of params.contacts) {
|
|
1879
3161
|
const existing = primaryContacts.get(contact.contact_id);
|
|
@@ -1897,11 +3179,23 @@ async function invokeLinkedInCompanyEnrichmentDirect(params) {
|
|
|
1897
3179
|
companyName: contact.companyName,
|
|
1898
3180
|
companyNameOriginal: contact.companyNameOriginal
|
|
1899
3181
|
});
|
|
1900
|
-
|
|
1901
|
-
let
|
|
1902
|
-
let
|
|
1903
|
-
let
|
|
3182
|
+
const precomputedContext = precomputedContextByKey.get(buildDirectCompanyContextKey(contact));
|
|
3183
|
+
let matchedCompanyUrl = precomputedContext?.linkedinCompanyUrl ?? null;
|
|
3184
|
+
let matchedSalesNavCompanyUrl = precomputedContext?.salesNavCompanyUrl ?? null;
|
|
3185
|
+
let matchedCompanyName = precomputedContext?.matchedCompanyName ?? null;
|
|
3186
|
+
let matchedCompanyEmployeeCount = precomputedContext?.matchedCompanyEmployeeCount ?? null;
|
|
1904
3187
|
let lastError = null;
|
|
3188
|
+
if (matchedCompanyUrl || matchedSalesNavCompanyUrl || matchedCompanyName) {
|
|
3189
|
+
results.push({
|
|
3190
|
+
contact_id: contact.contact_id,
|
|
3191
|
+
linkedin_company_url: matchedCompanyUrl,
|
|
3192
|
+
sales_nav_company_url: matchedSalesNavCompanyUrl,
|
|
3193
|
+
matched_company_name: matchedCompanyName,
|
|
3194
|
+
matched_company_employee_count: matchedCompanyEmployeeCount,
|
|
3195
|
+
error: null
|
|
3196
|
+
});
|
|
3197
|
+
continue;
|
|
3198
|
+
}
|
|
1905
3199
|
for (const variant of variants) {
|
|
1906
3200
|
const controller = new AbortController();
|
|
1907
3201
|
const timeout = setTimeout(controller.abort.bind(controller), Math.min(params.timeoutMs, 20_000));
|
|
@@ -2044,9 +3338,34 @@ async function invokeLinkedInUrlEnrichmentWorkflow(params) {
|
|
|
2044
3338
|
}
|
|
2045
3339
|
}
|
|
2046
3340
|
function normalizeWorkflowLinkedInUrlResult(params) {
|
|
3341
|
+
const inputContactIds = new Set(params.contacts.map((contact) => contact.contact_id));
|
|
2047
3342
|
const contactIdsBySyntheticEmail = new Map(params.contacts
|
|
2048
3343
|
.filter((contact) => contact.email)
|
|
2049
3344
|
.map((contact) => [String(contact.email).toLowerCase(), contact.contact_id]));
|
|
3345
|
+
const contactIdsByNormalizedIdentity = new Map(params.contacts
|
|
3346
|
+
.filter((contact) => !contact.isVariation)
|
|
3347
|
+
.map((contact) => {
|
|
3348
|
+
const fullName = normalizeLooseMatchText(normalizeLookupWhitespace(`${contact.firstName} ${contact.lastName}`));
|
|
3349
|
+
const companyName = normalizeLooseMatchText(contact.companyNameOriginal ?? contact.companyName);
|
|
3350
|
+
return [`${fullName}|${companyName}`, contact.contact_id];
|
|
3351
|
+
})
|
|
3352
|
+
.filter(([key]) => key !== "|"));
|
|
3353
|
+
const normalizedNameCounts = new Map();
|
|
3354
|
+
for (const contact of params.contacts) {
|
|
3355
|
+
if (contact.isVariation)
|
|
3356
|
+
continue;
|
|
3357
|
+
const fullName = normalizeLooseMatchText(normalizeLookupWhitespace(`${contact.firstName} ${contact.lastName}`));
|
|
3358
|
+
if (!fullName)
|
|
3359
|
+
continue;
|
|
3360
|
+
normalizedNameCounts.set(fullName, (normalizedNameCounts.get(fullName) ?? 0) + 1);
|
|
3361
|
+
}
|
|
3362
|
+
const contactIdsByNormalizedName = new Map(params.contacts
|
|
3363
|
+
.filter((contact) => !contact.isVariation)
|
|
3364
|
+
.map((contact) => {
|
|
3365
|
+
const fullName = normalizeLooseMatchText(normalizeLookupWhitespace(`${contact.firstName} ${contact.lastName}`));
|
|
3366
|
+
return [fullName, contact.contact_id];
|
|
3367
|
+
})
|
|
3368
|
+
.filter(([fullName]) => Boolean(fullName) && (normalizedNameCounts.get(fullName) ?? 0) === 1));
|
|
2050
3369
|
const rowsByContactId = new Map();
|
|
2051
3370
|
const body = params.parsedBody && typeof params.parsedBody === "object" && !Array.isArray(params.parsedBody)
|
|
2052
3371
|
? params.parsedBody
|
|
@@ -2056,13 +3375,34 @@ function normalizeWorkflowLinkedInUrlResult(params) {
|
|
|
2056
3375
|
...(Array.isArray(body?.profiles) ? body?.profiles : [])
|
|
2057
3376
|
];
|
|
2058
3377
|
for (const contact of workflowRows) {
|
|
3378
|
+
const fullNameCandidate = normalizeLookupWhitespace(typeof contact.full_name === "string"
|
|
3379
|
+
? contact.full_name
|
|
3380
|
+
: typeof contact.fullName === "string"
|
|
3381
|
+
? contact.fullName
|
|
3382
|
+
: typeof contact.name === "string"
|
|
3383
|
+
? contact.name
|
|
3384
|
+
: [contact.first_name, contact.last_name]
|
|
3385
|
+
.filter((value) => typeof value === "string" && value.trim().length > 0)
|
|
3386
|
+
.join(" "));
|
|
3387
|
+
const companyNameCandidate = normalizeLookupWhitespace(typeof contact.company_name === "string"
|
|
3388
|
+
? contact.company_name
|
|
3389
|
+
: typeof contact.companyName === "string"
|
|
3390
|
+
? contact.companyName
|
|
3391
|
+
: typeof contact.current_company === "string"
|
|
3392
|
+
? contact.current_company
|
|
3393
|
+
: "");
|
|
3394
|
+
const normalizedIdentityKey = `${normalizeLooseMatchText(fullNameCandidate)}|${normalizeLooseMatchText(companyNameCandidate)}`;
|
|
2059
3395
|
const explicitContactId = typeof contact.contact_id === "string"
|
|
2060
3396
|
? contact.contact_id
|
|
2061
3397
|
: typeof contact.contact_id === "number"
|
|
2062
3398
|
? String(contact.contact_id)
|
|
2063
3399
|
: "";
|
|
2064
3400
|
const emailKey = typeof contact.email === "string" ? contact.email.toLowerCase() : "";
|
|
2065
|
-
const contactId =
|
|
3401
|
+
const contactId = (inputContactIds.has(explicitContactId) ? explicitContactId : "") ||
|
|
3402
|
+
contactIdsBySyntheticEmail.get(emailKey) ||
|
|
3403
|
+
contactIdsByNormalizedIdentity.get(normalizedIdentityKey) ||
|
|
3404
|
+
contactIdsByNormalizedName.get(normalizeLooseMatchText(fullNameCandidate)) ||
|
|
3405
|
+
"";
|
|
2066
3406
|
const linkedinUrl = normalizePublicLinkedInProfileUrl(typeof contact.linkedin_profile_url === "string"
|
|
2067
3407
|
? contact.linkedin_profile_url
|
|
2068
3408
|
: typeof contact.linkedinProfileUrl === "string"
|
|
@@ -2149,7 +3489,8 @@ async function fetchSalesNavLookupCandidates(params) {
|
|
|
2149
3489
|
}
|
|
2150
3490
|
async function resolveLinkedInUrlsFromSalesNavRows(params) {
|
|
2151
3491
|
const results = [];
|
|
2152
|
-
for (const
|
|
3492
|
+
for (const row of params.rows) {
|
|
3493
|
+
const contactId = normalizeLinkedInLookupField(row.contactId) ?? `${results.length + 1}`;
|
|
2153
3494
|
const candidates = await fetchSalesNavLookupCandidates({
|
|
2154
3495
|
companyName: row.companyName,
|
|
2155
3496
|
orgId: params.orgId
|
|
@@ -2199,19 +3540,20 @@ async function resolveLinkedInUrlsFromSalesNavRows(params) {
|
|
|
2199
3540
|
const salesNavCompanyUrl = typeof best?.companyUrl === "string" && /\/sales\/company\//i.test(best.companyUrl)
|
|
2200
3541
|
? best.companyUrl
|
|
2201
3542
|
: null;
|
|
3543
|
+
const existingLinkedInCompanyUrl = row.linkedinCompanyUrl?.trim() || null;
|
|
2202
3544
|
results.push({
|
|
2203
3545
|
clientId: row.clientId,
|
|
2204
3546
|
fullName: row.fullName,
|
|
2205
3547
|
companyName: row.companyName,
|
|
2206
3548
|
linkedinUrl,
|
|
2207
3549
|
salesNavProfileUrl,
|
|
2208
|
-
linkedinCompanyUrl,
|
|
3550
|
+
linkedinCompanyUrl: linkedinCompanyUrl ?? existingLinkedInCompanyUrl,
|
|
2209
3551
|
salesNavCompanyUrl,
|
|
2210
3552
|
found: Boolean(linkedinUrl),
|
|
2211
|
-
companyFound: Boolean(linkedinCompanyUrl),
|
|
2212
|
-
contactId
|
|
3553
|
+
companyFound: Boolean(linkedinCompanyUrl ?? existingLinkedInCompanyUrl),
|
|
3554
|
+
contactId,
|
|
2213
3555
|
source: linkedinUrl ? "salesnav-supabase" : null,
|
|
2214
|
-
companySource: linkedinCompanyUrl ? "salesnav-supabase" : null,
|
|
3556
|
+
companySource: linkedinCompanyUrl ? "salesnav-supabase" : existingLinkedInCompanyUrl ? "input" : null,
|
|
2215
3557
|
matchedFullName: best?.fullName ?? null,
|
|
2216
3558
|
matchedCompanyName: best?.companyName ?? null,
|
|
2217
3559
|
matchedTitle: best?.title ?? null,
|
|
@@ -2221,6 +3563,223 @@ async function resolveLinkedInUrlsFromSalesNavRows(params) {
|
|
|
2221
3563
|
}
|
|
2222
3564
|
return results;
|
|
2223
3565
|
}
|
|
3566
|
+
function shouldUseSalesNavRowPrepass(params) {
|
|
3567
|
+
const env = params.env ?? process.env;
|
|
3568
|
+
const explicit = env.SALESPROMPTER_LINKEDIN_ROW_PREPASS?.trim().toLowerCase();
|
|
3569
|
+
if (explicit === "0" || explicit === "false" || explicit === "off") {
|
|
3570
|
+
return false;
|
|
3571
|
+
}
|
|
3572
|
+
if (explicit === "1" || explicit === "true" || explicit === "on") {
|
|
3573
|
+
return true;
|
|
3574
|
+
}
|
|
3575
|
+
const hasOrgId = Boolean(params.orgId?.trim());
|
|
3576
|
+
const hasSupabase = Boolean(env.NEXT_PUBLIC_SUPABASE_URL?.trim() && env.SUPABASE_SERVICE_ROLE_KEY?.trim());
|
|
3577
|
+
const maxRows = Number(env.SALESPROMPTER_LINKEDIN_ROW_PREPASS_MAX_ROWS ?? 200);
|
|
3578
|
+
if (!hasOrgId || !hasSupabase) {
|
|
3579
|
+
return false;
|
|
3580
|
+
}
|
|
3581
|
+
return params.rows.length <= (Number.isFinite(maxRows) && maxRows > 0 ? maxRows : 200);
|
|
3582
|
+
}
|
|
3583
|
+
function shouldUseDirectPeopleLookup(params) {
|
|
3584
|
+
const env = params.env ?? process.env;
|
|
3585
|
+
const explicit = env.SALESPROMPTER_LINKEDIN_DIRECT_PROFILE_LOOKUP?.trim().toLowerCase();
|
|
3586
|
+
if (explicit === "0" || explicit === "false" || explicit === "off") {
|
|
3587
|
+
return false;
|
|
3588
|
+
}
|
|
3589
|
+
if (explicit === "1" || explicit === "true" || explicit === "on") {
|
|
3590
|
+
return true;
|
|
3591
|
+
}
|
|
3592
|
+
const maxRows = Number(env.SALESPROMPTER_LINKEDIN_DIRECT_PROFILE_MAX_ROWS ?? 50);
|
|
3593
|
+
return params.rowCount <= (Number.isFinite(maxRows) && maxRows > 0 ? maxRows : 50);
|
|
3594
|
+
}
|
|
3595
|
+
function shouldUseWorkflowPeopleLookup(params) {
|
|
3596
|
+
const env = params.env ?? process.env;
|
|
3597
|
+
const explicit = env.SALESPROMPTER_LINKEDIN_WORKFLOW_PROFILE_LOOKUP?.trim().toLowerCase();
|
|
3598
|
+
if (explicit === "0" || explicit === "false" || explicit === "off") {
|
|
3599
|
+
return false;
|
|
3600
|
+
}
|
|
3601
|
+
if (explicit === "1" || explicit === "true" || explicit === "on") {
|
|
3602
|
+
return true;
|
|
3603
|
+
}
|
|
3604
|
+
const hasSerper = Boolean(getSerperApiKey(env));
|
|
3605
|
+
const maxRows = Number(env.SALESPROMPTER_LINKEDIN_WORKFLOW_PROFILE_MAX_ROWS ?? (hasSerper ? 75 : 250));
|
|
3606
|
+
return params.rowCount <= (Number.isFinite(maxRows) && maxRows > 0 ? maxRows : hasSerper ? 75 : 250);
|
|
3607
|
+
}
|
|
3608
|
+
function shouldUseBulkProfileResolutionStrategy(params) {
|
|
3609
|
+
const env = params.env ?? process.env;
|
|
3610
|
+
const explicit = env.SALESPROMPTER_LINKEDIN_BULK_MODE?.trim().toLowerCase();
|
|
3611
|
+
if (explicit === "0" || explicit === "false" || explicit === "off") {
|
|
3612
|
+
return false;
|
|
3613
|
+
}
|
|
3614
|
+
if (explicit === "1" || explicit === "true" || explicit === "on") {
|
|
3615
|
+
return true;
|
|
3616
|
+
}
|
|
3617
|
+
const minRows = Number(env.SALESPROMPTER_LINKEDIN_BULK_MODE_MIN_ROWS ?? 75);
|
|
3618
|
+
return params.rowCount >= (Number.isFinite(minRows) && minRows > 0 ? minRows : 75);
|
|
3619
|
+
}
|
|
3620
|
+
function resolveLinkedInBulkStrategyConfig(params) {
|
|
3621
|
+
const env = params.env ?? process.env;
|
|
3622
|
+
const bulkMode = shouldUseBulkProfileResolutionStrategy({
|
|
3623
|
+
rowCount: params.rowCount,
|
|
3624
|
+
env
|
|
3625
|
+
});
|
|
3626
|
+
const serperConcurrencyDefault = bulkMode ? 12 : 6;
|
|
3627
|
+
const serperConcurrency = Number(env.SALESPROMPTER_LINKEDIN_SERPER_CONCURRENCY ?? serperConcurrencyDefault);
|
|
3628
|
+
const serperMaxQueriesDefault = bulkMode ? 4 : 8;
|
|
3629
|
+
const serperMaxQueries = Number(env.SALESPROMPTER_LINKEDIN_SERPER_MAX_QUERIES ?? serperMaxQueriesDefault);
|
|
3630
|
+
const workflowStageBudgetDefault = bulkMode ? 8_000 : 15_000;
|
|
3631
|
+
const workflowStageBudgetMs = Number(env.SALESPROMPTER_LINKEDIN_WORKFLOW_STAGE_TIMEOUT_MS ?? workflowStageBudgetDefault);
|
|
3632
|
+
const serperStageBudgetDefault = bulkMode
|
|
3633
|
+
? Math.max(15_000, Math.min(params.timeoutMs * 2, 45_000))
|
|
3634
|
+
: Math.max(10_000, Math.min(params.timeoutMs, 20_000));
|
|
3635
|
+
const serperStageBudgetMs = Number(env.SALESPROMPTER_LINKEDIN_SERPER_STAGE_TIMEOUT_MS ?? serperStageBudgetDefault);
|
|
3636
|
+
const bulkDirectProfileMaxRowsDefault = 0;
|
|
3637
|
+
const bulkDirectProfileMaxRows = Number(env.SALESPROMPTER_LINKEDIN_BULK_DIRECT_PROFILE_MAX_ROWS ?? bulkDirectProfileMaxRowsDefault);
|
|
3638
|
+
const bulkDirectProfileTimeoutDefault = bulkMode ? Math.min(params.timeoutMs, 6_000) : 0;
|
|
3639
|
+
const bulkDirectProfileTimeoutMs = Number(env.SALESPROMPTER_LINKEDIN_BULK_DIRECT_PROFILE_TIMEOUT_MS ?? bulkDirectProfileTimeoutDefault);
|
|
3640
|
+
return {
|
|
3641
|
+
bulkMode,
|
|
3642
|
+
serperConcurrency: Number.isFinite(serperConcurrency) && serperConcurrency > 0
|
|
3643
|
+
? Math.trunc(serperConcurrency)
|
|
3644
|
+
: serperConcurrencyDefault,
|
|
3645
|
+
serperMaxQueries: Number.isFinite(serperMaxQueries) && serperMaxQueries > 0
|
|
3646
|
+
? Math.trunc(serperMaxQueries)
|
|
3647
|
+
: serperMaxQueriesDefault,
|
|
3648
|
+
workflowStageBudgetMs: Number.isFinite(workflowStageBudgetMs) && workflowStageBudgetMs > 0
|
|
3649
|
+
? Math.trunc(workflowStageBudgetMs)
|
|
3650
|
+
: workflowStageBudgetDefault,
|
|
3651
|
+
serperStageBudgetMs: Number.isFinite(serperStageBudgetMs) && serperStageBudgetMs > 0
|
|
3652
|
+
? Math.trunc(serperStageBudgetMs)
|
|
3653
|
+
: serperStageBudgetDefault,
|
|
3654
|
+
bulkDirectProfileMaxRows: Number.isFinite(bulkDirectProfileMaxRows) && bulkDirectProfileMaxRows > 0
|
|
3655
|
+
? Math.trunc(bulkDirectProfileMaxRows)
|
|
3656
|
+
: 0,
|
|
3657
|
+
bulkDirectProfileTimeoutMs: Number.isFinite(bulkDirectProfileTimeoutMs) && bulkDirectProfileTimeoutMs > 0
|
|
3658
|
+
? Math.trunc(bulkDirectProfileTimeoutMs)
|
|
3659
|
+
: 0
|
|
3660
|
+
};
|
|
3661
|
+
}
|
|
3662
|
+
function shouldAttemptBulkDirectProfileLookup(params) {
|
|
3663
|
+
return (params.strategy.bulkMode &&
|
|
3664
|
+
params.strategy.bulkDirectProfileMaxRows > 0 &&
|
|
3665
|
+
params.strategy.bulkDirectProfileTimeoutMs > 0 &&
|
|
3666
|
+
params.unresolvedRowCount > 0);
|
|
3667
|
+
}
|
|
3668
|
+
function rankContactsForBulkDirectProfileLookup(params) {
|
|
3669
|
+
const scored = params.contacts
|
|
3670
|
+
.filter((contact) => !contact.isVariation)
|
|
3671
|
+
.map((contact) => {
|
|
3672
|
+
const row = params.rowsByContactId.get(contact.contact_id);
|
|
3673
|
+
const normalizedName = normalizeLookupWhitespace(`${contact.firstName} ${contact.lastName}`);
|
|
3674
|
+
const normalizedEmail = normalizeLookupWhitespace(contact.email);
|
|
3675
|
+
const titleKeywords = extractLookupTitleKeywords(contact.jobTitle);
|
|
3676
|
+
const roleKeywords = buildDeepDiveRoleSearchKeywords(contact.deepDiveRecommendedRole);
|
|
3677
|
+
let score = 0;
|
|
3678
|
+
if (row?.linkedinCompanyUrl || contact.linkedinCompanyUrl)
|
|
3679
|
+
score += 80;
|
|
3680
|
+
if (row?.salesNavCompanyUrl)
|
|
3681
|
+
score += 20;
|
|
3682
|
+
if (normalizedEmail && !isSyntheticLinkedInLookupEmail(normalizedEmail))
|
|
3683
|
+
score += 40;
|
|
3684
|
+
if (contact.jobTitle?.trim())
|
|
3685
|
+
score += 25;
|
|
3686
|
+
if (contact.deepDiveRecommendedRole?.trim())
|
|
3687
|
+
score += 15;
|
|
3688
|
+
score += Math.min(20, titleKeywords.length * 5);
|
|
3689
|
+
score += Math.min(15, roleKeywords.length * 5);
|
|
3690
|
+
if (/^contact\s+\d+$/i.test(normalizedName))
|
|
3691
|
+
score -= 100;
|
|
3692
|
+
if (/^(hr|support|facility|buchhaltung|rechnungen)$/i.test(normalizedName))
|
|
3693
|
+
score -= 25;
|
|
3694
|
+
return { contact, score };
|
|
3695
|
+
})
|
|
3696
|
+
.filter((entry) => entry.score > 0)
|
|
3697
|
+
.sort((left, right) => right.score - left.score);
|
|
3698
|
+
return scored.slice(0, params.limit).map((entry) => entry.contact);
|
|
3699
|
+
}
|
|
3700
|
+
async function resolveSerperLinkedInProfilesInParallel(params) {
|
|
3701
|
+
const results = new Map();
|
|
3702
|
+
const contacts = params.contacts;
|
|
3703
|
+
const concurrency = Math.max(1, Math.min(params.concurrency ?? 3, contacts.length || 1));
|
|
3704
|
+
const deadline = params.overallBudgetMs && Number.isFinite(params.overallBudgetMs) && params.overallBudgetMs > 0
|
|
3705
|
+
? Date.now() + Math.trunc(params.overallBudgetMs)
|
|
3706
|
+
: Number.POSITIVE_INFINITY;
|
|
3707
|
+
let nextIndex = 0;
|
|
3708
|
+
const worker = async () => {
|
|
3709
|
+
while (true) {
|
|
3710
|
+
if (Date.now() >= deadline) {
|
|
3711
|
+
return;
|
|
3712
|
+
}
|
|
3713
|
+
const index = nextIndex++;
|
|
3714
|
+
if (index >= contacts.length) {
|
|
3715
|
+
return;
|
|
3716
|
+
}
|
|
3717
|
+
const contact = contacts[index];
|
|
3718
|
+
const remainingBudget = deadline - Date.now();
|
|
3719
|
+
if (remainingBudget <= 0) {
|
|
3720
|
+
return;
|
|
3721
|
+
}
|
|
3722
|
+
const linkedinUrl = await searchSerperLinkedInProfileUrl(contact, Math.min(params.timeoutMs, remainingBudget), {
|
|
3723
|
+
maxQueries: params.maxQueries
|
|
3724
|
+
});
|
|
3725
|
+
if (linkedinUrl) {
|
|
3726
|
+
results.set(contact.contact_id, linkedinUrl);
|
|
3727
|
+
}
|
|
3728
|
+
}
|
|
3729
|
+
};
|
|
3730
|
+
await Promise.all(Array.from({ length: concurrency }, () => worker()));
|
|
3731
|
+
return results;
|
|
3732
|
+
}
|
|
3733
|
+
async function resolveLinkedInCompanyUrlsForContacts(params) {
|
|
3734
|
+
const contacts = params.contacts.filter((contact) => !contact.isVariation && !contact.linkedinCompanyUrl);
|
|
3735
|
+
const uniqueCompanies = new Map();
|
|
3736
|
+
for (const contact of contacts) {
|
|
3737
|
+
const key = normalizeLookupCompanyForCleaning(contact.companyNameOriginal ?? contact.companyName);
|
|
3738
|
+
if (!key || uniqueCompanies.has(key)) {
|
|
3739
|
+
continue;
|
|
3740
|
+
}
|
|
3741
|
+
uniqueCompanies.set(key, contact.companyNameOriginal ?? contact.companyName);
|
|
3742
|
+
}
|
|
3743
|
+
const resultsByCompany = new Map();
|
|
3744
|
+
const entries = Array.from(uniqueCompanies.entries());
|
|
3745
|
+
const concurrency = Math.max(1, Math.min(params.concurrency ?? 4, entries.length || 1));
|
|
3746
|
+
const deadline = params.overallBudgetMs && Number.isFinite(params.overallBudgetMs) && params.overallBudgetMs > 0
|
|
3747
|
+
? Date.now() + Math.trunc(params.overallBudgetMs)
|
|
3748
|
+
: Number.POSITIVE_INFINITY;
|
|
3749
|
+
let nextIndex = 0;
|
|
3750
|
+
const worker = async () => {
|
|
3751
|
+
while (true) {
|
|
3752
|
+
if (Date.now() >= deadline) {
|
|
3753
|
+
return;
|
|
3754
|
+
}
|
|
3755
|
+
const index = nextIndex++;
|
|
3756
|
+
if (index >= entries.length) {
|
|
3757
|
+
return;
|
|
3758
|
+
}
|
|
3759
|
+
const [key, companyName] = entries[index];
|
|
3760
|
+
const remainingBudget = deadline - Date.now();
|
|
3761
|
+
if (remainingBudget <= 0) {
|
|
3762
|
+
return;
|
|
3763
|
+
}
|
|
3764
|
+
const perCompanyTimeout = Math.min(params.timeoutMs, remainingBudget);
|
|
3765
|
+
const linkedinUrl = (await searchSerperLinkedInCompanyUrl(companyName, perCompanyTimeout)) ??
|
|
3766
|
+
(await searchPublicLinkedInCompanyUrl(companyName, perCompanyTimeout));
|
|
3767
|
+
if (linkedinUrl) {
|
|
3768
|
+
resultsByCompany.set(key, linkedinUrl);
|
|
3769
|
+
}
|
|
3770
|
+
}
|
|
3771
|
+
};
|
|
3772
|
+
await Promise.all(Array.from({ length: concurrency }, () => worker()));
|
|
3773
|
+
const results = new Map();
|
|
3774
|
+
for (const contact of params.contacts) {
|
|
3775
|
+
const key = normalizeLookupCompanyForCleaning(contact.companyNameOriginal ?? contact.companyName);
|
|
3776
|
+
const linkedinUrl = resultsByCompany.get(key);
|
|
3777
|
+
if (linkedinUrl) {
|
|
3778
|
+
results.set(contact.contact_id, linkedinUrl);
|
|
3779
|
+
}
|
|
3780
|
+
}
|
|
3781
|
+
return results;
|
|
3782
|
+
}
|
|
2224
3783
|
function buildCommandLine(args) {
|
|
2225
3784
|
return args.map((arg) => shellQuote(arg)).join(" ");
|
|
2226
3785
|
}
|
|
@@ -3208,6 +4767,72 @@ async function fetchWorkspaceLeadSearch(session, requestBody) {
|
|
|
3208
4767
|
}
|
|
3209
4768
|
return WorkspaceLeadSearchResponseSchema.parse(payload).leads;
|
|
3210
4769
|
}
|
|
4770
|
+
async function buildWorkspaceLeadAccount(icp, target, leads) {
|
|
4771
|
+
const firstLead = leads[0];
|
|
4772
|
+
if (firstLead) {
|
|
4773
|
+
const keywords = Array.from(new Set([target.companyDomain?.split(".")[0], firstLead.industry, firstLead.region, ...icp.keywords].filter((value) => typeof value === "string" && value.trim().length > 0)));
|
|
4774
|
+
return AccountProfileSchema.parse({
|
|
4775
|
+
companyName: target.companyName?.trim() || firstLead.companyName,
|
|
4776
|
+
domain: target.companyDomain?.trim().toLowerCase() || firstLead.domain,
|
|
4777
|
+
industry: firstLead.industry,
|
|
4778
|
+
region: firstLead.region,
|
|
4779
|
+
employeeCount: firstLead.employeeCount,
|
|
4780
|
+
keywords,
|
|
4781
|
+
sources: ["workspace-qualified-leads"]
|
|
4782
|
+
});
|
|
4783
|
+
}
|
|
4784
|
+
return await companyProvider.resolveCompany({
|
|
4785
|
+
companyDomain: target.companyDomain,
|
|
4786
|
+
companyName: target.companyName
|
|
4787
|
+
}, icp);
|
|
4788
|
+
}
|
|
4789
|
+
async function generateLeadsForCommand(options) {
|
|
4790
|
+
const source = z.enum(["auto", "workspace", "fallback"]).parse(options.source ?? "auto");
|
|
4791
|
+
if (source === "fallback") {
|
|
4792
|
+
return await leadProvider.generateLeads(options.icp, options.count, options.target);
|
|
4793
|
+
}
|
|
4794
|
+
if (shouldBypassAuth()) {
|
|
4795
|
+
if (source === "workspace") {
|
|
4796
|
+
throw new Error("workspace lead generation requires authentication. Disable SALESPROMPTER_SKIP_AUTH and log in first.");
|
|
4797
|
+
}
|
|
4798
|
+
return await leadProvider.generateLeads(options.icp, options.count, options.target);
|
|
4799
|
+
}
|
|
4800
|
+
try {
|
|
4801
|
+
const session = await requireAuthSession();
|
|
4802
|
+
const requestBody = options.target.companyDomain || options.target.linkedinCompanyPage
|
|
4803
|
+
? {
|
|
4804
|
+
mode: "target-company",
|
|
4805
|
+
domain: options.target.companyDomain,
|
|
4806
|
+
linkedinCompanyPage: options.target.linkedinCompanyPage,
|
|
4807
|
+
limit: options.count
|
|
4808
|
+
}
|
|
4809
|
+
: {
|
|
4810
|
+
mode: "reference-company",
|
|
4811
|
+
icp: options.icp,
|
|
4812
|
+
limit: options.count
|
|
4813
|
+
};
|
|
4814
|
+
const leads = await fetchWorkspaceLeadSearch(session, requestBody);
|
|
4815
|
+
const account = await buildWorkspaceLeadAccount(options.icp, options.target, leads);
|
|
4816
|
+
return {
|
|
4817
|
+
provider: "salesprompter-app-workspace-search",
|
|
4818
|
+
mode: "real",
|
|
4819
|
+
account,
|
|
4820
|
+
leads,
|
|
4821
|
+
warnings: []
|
|
4822
|
+
};
|
|
4823
|
+
}
|
|
4824
|
+
catch (error) {
|
|
4825
|
+
if (source === "workspace") {
|
|
4826
|
+
throw error;
|
|
4827
|
+
}
|
|
4828
|
+
const fallback = await leadProvider.generateLeads(options.icp, options.count, options.target);
|
|
4829
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
4830
|
+
return {
|
|
4831
|
+
...fallback,
|
|
4832
|
+
warnings: [`Workspace lead search unavailable: ${message}`, ...fallback.warnings]
|
|
4833
|
+
};
|
|
4834
|
+
}
|
|
4835
|
+
}
|
|
3211
4836
|
function buildLinkedInProductsOutputPath(categorySlug) {
|
|
3212
4837
|
return `./data/linkedin-products-${categorySlug}.json`;
|
|
3213
4838
|
}
|
|
@@ -3215,19 +4840,29 @@ function buildLinkedInProductCategorySalesNavigatorOutputPath(categorySlug) {
|
|
|
3215
4840
|
return `./data/salesnav-product-category-${categorySlug}.json`;
|
|
3216
4841
|
}
|
|
3217
4842
|
const SALES_NAVIGATOR_TERMINAL_JOB_STATUSES = new Set(["completed", "completed_with_failures"]);
|
|
4843
|
+
const WORKFLOW_LOCAL_LOG_MAX_FILE_BYTES = 10 * 1024 * 1024;
|
|
4844
|
+
const WORKFLOW_LOCAL_LOG_MAX_LINE_BYTES = 32 * 1024;
|
|
4845
|
+
const WORKFLOW_LOCAL_LOG_MAX_STRING_CHARS = 4000;
|
|
4846
|
+
const WORKFLOW_LOCAL_LOG_MAX_ARRAY_ITEMS = 50;
|
|
4847
|
+
const WORKFLOW_LOCAL_LOG_MAX_OBJECT_KEYS = 50;
|
|
4848
|
+
const WORKFLOW_LOCAL_LOG_MAX_DEPTH = 5;
|
|
3218
4849
|
function isSalesNavigatorCrawlJobTerminal(status) {
|
|
3219
4850
|
return SALES_NAVIGATOR_TERMINAL_JOB_STATUSES.has(status);
|
|
3220
4851
|
}
|
|
3221
4852
|
function buildWorkflowTraceId(prefix) {
|
|
3222
4853
|
return `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 10)}`;
|
|
3223
4854
|
}
|
|
4855
|
+
function buildWorkflowLogRunSuffix() {
|
|
4856
|
+
const timestamp = new Date().toISOString().replace(/[-:]/g, "").replace(/\.\d{3}Z$/, "Z");
|
|
4857
|
+
return `${timestamp}-${Math.random().toString(36).slice(2, 8)}`;
|
|
4858
|
+
}
|
|
3224
4859
|
function buildSalesNavigatorWorkflowLogPath(input) {
|
|
3225
4860
|
const slug = slugify(input) || "salesnav-product-category";
|
|
3226
|
-
return `./data/${slug}-salesnav.log.jsonl`;
|
|
4861
|
+
return `./data/${slug}-${buildWorkflowLogRunSuffix()}-salesnav.log.jsonl`;
|
|
3227
4862
|
}
|
|
3228
4863
|
function buildSalesNavigatorCrawlLogPath(input) {
|
|
3229
4864
|
const slug = slugify(input) || "salesnav-crawl";
|
|
3230
|
-
return `./data/${slug}-crawl.log.jsonl`;
|
|
4865
|
+
return `./data/${slug}-${buildWorkflowLogRunSuffix()}-crawl.log.jsonl`;
|
|
3231
4866
|
}
|
|
3232
4867
|
function buildSalesNavigatorCrawlOutputPath(input) {
|
|
3233
4868
|
const slug = slugify(input) || "salesnav-crawl";
|
|
@@ -3259,6 +4894,83 @@ function decodeSalesNavigatorQueryParam(url) {
|
|
|
3259
4894
|
return null;
|
|
3260
4895
|
}
|
|
3261
4896
|
}
|
|
4897
|
+
function sanitizeWorkflowLogValue(value, depth = 0, seen = new WeakSet()) {
|
|
4898
|
+
if (typeof value === "string") {
|
|
4899
|
+
if (value.length <= WORKFLOW_LOCAL_LOG_MAX_STRING_CHARS) {
|
|
4900
|
+
return value;
|
|
4901
|
+
}
|
|
4902
|
+
return `${value.slice(0, WORKFLOW_LOCAL_LOG_MAX_STRING_CHARS)}... [truncated ${value.length - WORKFLOW_LOCAL_LOG_MAX_STRING_CHARS} chars]`;
|
|
4903
|
+
}
|
|
4904
|
+
if (typeof value !== "object" || value === null) {
|
|
4905
|
+
return value;
|
|
4906
|
+
}
|
|
4907
|
+
if (seen.has(value)) {
|
|
4908
|
+
return "[Circular]";
|
|
4909
|
+
}
|
|
4910
|
+
if (depth >= WORKFLOW_LOCAL_LOG_MAX_DEPTH) {
|
|
4911
|
+
return "[MaxDepth]";
|
|
4912
|
+
}
|
|
4913
|
+
seen.add(value);
|
|
4914
|
+
if (Array.isArray(value)) {
|
|
4915
|
+
const items = value
|
|
4916
|
+
.slice(0, WORKFLOW_LOCAL_LOG_MAX_ARRAY_ITEMS)
|
|
4917
|
+
.map((item) => sanitizeWorkflowLogValue(item, depth + 1, seen));
|
|
4918
|
+
if (value.length > WORKFLOW_LOCAL_LOG_MAX_ARRAY_ITEMS) {
|
|
4919
|
+
items.push({ truncatedItems: value.length - WORKFLOW_LOCAL_LOG_MAX_ARRAY_ITEMS });
|
|
4920
|
+
}
|
|
4921
|
+
return items;
|
|
4922
|
+
}
|
|
4923
|
+
const entries = Object.entries(value);
|
|
4924
|
+
const sanitized = {};
|
|
4925
|
+
for (const [key, entryValue] of entries.slice(0, WORKFLOW_LOCAL_LOG_MAX_OBJECT_KEYS)) {
|
|
4926
|
+
sanitized[key] = sanitizeWorkflowLogValue(entryValue, depth + 1, seen);
|
|
4927
|
+
}
|
|
4928
|
+
if (entries.length > WORKFLOW_LOCAL_LOG_MAX_OBJECT_KEYS) {
|
|
4929
|
+
sanitized.truncatedKeys = entries.length - WORKFLOW_LOCAL_LOG_MAX_OBJECT_KEYS;
|
|
4930
|
+
}
|
|
4931
|
+
return sanitized;
|
|
4932
|
+
}
|
|
4933
|
+
function serializeWorkflowLogEntry(entry) {
|
|
4934
|
+
const sanitizedEntry = {
|
|
4935
|
+
...entry,
|
|
4936
|
+
metadata: sanitizeWorkflowLogValue(entry.metadata)
|
|
4937
|
+
};
|
|
4938
|
+
let line = JSON.stringify(sanitizedEntry);
|
|
4939
|
+
if (Buffer.byteLength(line, "utf8") <= WORKFLOW_LOCAL_LOG_MAX_LINE_BYTES) {
|
|
4940
|
+
return `${line}\n`;
|
|
4941
|
+
}
|
|
4942
|
+
const originalMetadata = entry.metadata ?? {};
|
|
4943
|
+
const metadataKeys = Object.keys(originalMetadata);
|
|
4944
|
+
line = JSON.stringify({
|
|
4945
|
+
...entry,
|
|
4946
|
+
metadata: {
|
|
4947
|
+
localLogTruncated: true,
|
|
4948
|
+
originalMetadataKeys: metadataKeys.slice(0, WORKFLOW_LOCAL_LOG_MAX_OBJECT_KEYS),
|
|
4949
|
+
truncatedKeys: Math.max(0, metadataKeys.length - WORKFLOW_LOCAL_LOG_MAX_OBJECT_KEYS),
|
|
4950
|
+
originalMetadataBytes: Buffer.byteLength(JSON.stringify(sanitizeWorkflowLogValue(originalMetadata)), "utf8")
|
|
4951
|
+
}
|
|
4952
|
+
});
|
|
4953
|
+
return `${line}\n`;
|
|
4954
|
+
}
|
|
4955
|
+
async function appendWorkflowLocalLog(logPath, entry) {
|
|
4956
|
+
try {
|
|
4957
|
+
const current = await stat(logPath).catch((error) => {
|
|
4958
|
+
if (error.code === "ENOENT") {
|
|
4959
|
+
return null;
|
|
4960
|
+
}
|
|
4961
|
+
throw error;
|
|
4962
|
+
});
|
|
4963
|
+
if (current && current.size >= WORKFLOW_LOCAL_LOG_MAX_FILE_BYTES) {
|
|
4964
|
+
writeProgress(`[${entry.timestamp}] ${entry.event} (local log skipped because ${logPath} is already over 10 MB; durable event storage still runs when configured)`);
|
|
4965
|
+
return;
|
|
4966
|
+
}
|
|
4967
|
+
await appendFile(logPath, serializeWorkflowLogEntry(entry), "utf8");
|
|
4968
|
+
}
|
|
4969
|
+
catch (error) {
|
|
4970
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
4971
|
+
writeProgress(`[${entry.timestamp}] workflow.local_log.write_failed: ${message}`);
|
|
4972
|
+
}
|
|
4973
|
+
}
|
|
3262
4974
|
async function createWorkflowLogger(options) {
|
|
3263
4975
|
const traceId = options.traceId ?? buildWorkflowTraceId("salesprompter-cli");
|
|
3264
4976
|
const logPath = options.logPath;
|
|
@@ -3277,7 +4989,7 @@ async function createWorkflowLogger(options) {
|
|
|
3277
4989
|
event,
|
|
3278
4990
|
metadata
|
|
3279
4991
|
};
|
|
3280
|
-
await
|
|
4992
|
+
await appendWorkflowLocalLog(logPath, entry);
|
|
3281
4993
|
if (eventStore) {
|
|
3282
4994
|
try {
|
|
3283
4995
|
await eventStore.append({
|
|
@@ -3994,6 +5706,17 @@ async function fetchLinkedInCompaniesBackfillStatus(session, payload) {
|
|
|
3994
5706
|
}), LinkedInCompanyBackfillStatusResponseSchema);
|
|
3995
5707
|
return value;
|
|
3996
5708
|
}
|
|
5709
|
+
async function syncPhantombusterContainersViaApp(session, payload) {
|
|
5710
|
+
const { value } = await fetchCliJson(session, (currentSession) => fetch(`${currentSession.apiBaseUrl}/api/cli/phantombuster/containers/sync`, {
|
|
5711
|
+
method: "POST",
|
|
5712
|
+
headers: {
|
|
5713
|
+
"Content-Type": "application/json",
|
|
5714
|
+
Authorization: `Bearer ${currentSession.accessToken}`
|
|
5715
|
+
},
|
|
5716
|
+
body: JSON.stringify(payload)
|
|
5717
|
+
}), PhantombusterContainersSyncResponseSchema);
|
|
5718
|
+
return value;
|
|
5719
|
+
}
|
|
3997
5720
|
function serializeSalesNavigatorFiltersForApi(filters) {
|
|
3998
5721
|
return filters.map((filter) => ({
|
|
3999
5722
|
type: filter.type,
|
|
@@ -4020,6 +5743,12 @@ function buildSalesNavigatorSliceRawPayload(slice, extra = {}) {
|
|
|
4020
5743
|
resultRetryCount: slice.resultRetryCount ?? null
|
|
4021
5744
|
};
|
|
4022
5745
|
}
|
|
5746
|
+
function parseOptionalSalesNavigatorClientId(value) {
|
|
5747
|
+
if (value == null || String(value).trim().length === 0) {
|
|
5748
|
+
return null;
|
|
5749
|
+
}
|
|
5750
|
+
return z.coerce.number().int().positive().parse(value);
|
|
5751
|
+
}
|
|
4023
5752
|
function buildSalesNavigatorCrawlReportRawPayload(slice, traceId, extra = {}) {
|
|
4024
5753
|
return buildSalesNavigatorSliceRawPayload({
|
|
4025
5754
|
sourceQueryUrl: slice.sourceQueryUrl,
|
|
@@ -4480,11 +6209,12 @@ function isSalesNavigatorSessionError(error) {
|
|
|
4480
6209
|
return /can't connect profile|sales navigator account|upsell|linkedin session invalid|linkedin_rate_limited|too many requests|rate.?limit|invalid session cookie|disconnected by linkedin|linkedin-disconnected-while-using-api|provide a new linkedin session cookie/i.test(message);
|
|
4481
6210
|
}
|
|
4482
6211
|
function isSalesNavigatorResultArtifactError(error) {
|
|
4483
|
-
if (error instanceof SalesNavigatorExportRequestError &&
|
|
6212
|
+
if (error instanceof SalesNavigatorExportRequestError &&
|
|
6213
|
+
["phantombuster_result_invalid", "partial_result_artifact"].includes(error.errorCode ?? "")) {
|
|
4484
6214
|
return true;
|
|
4485
6215
|
}
|
|
4486
6216
|
const message = error instanceof Error ? error.message : String(error);
|
|
4487
|
-
return /page has crashed|no valid sales navigator people rows/i.test(message);
|
|
6217
|
+
return /page has crashed|no valid sales navigator people rows|partial result artifact|returned \d+ valid sales navigator people rows, but \d+ were expected/i.test(message);
|
|
4488
6218
|
}
|
|
4489
6219
|
function isSalesNavigatorTransientExportError(error) {
|
|
4490
6220
|
if (isSalesNavigatorSessionError(error) || isSalesNavigatorResultArtifactError(error)) {
|
|
@@ -4575,6 +6305,7 @@ async function runSalesNavigatorCrawlAttempt(session, attempt, options, context)
|
|
|
4575
6305
|
crawlSliceId: context?.crawlSliceId,
|
|
4576
6306
|
rawPayload: buildSalesNavigatorSliceRawPayload(attempt, {
|
|
4577
6307
|
traceId: context?.traceId ?? null,
|
|
6308
|
+
clientId: context?.clientId ?? null,
|
|
4578
6309
|
phase: shouldProbe ? "probe" : "full_export",
|
|
4579
6310
|
requestedProfiles: probeProfiles,
|
|
4580
6311
|
crawlJobId: context?.crawlJobId ?? null,
|
|
@@ -4611,6 +6342,7 @@ async function runSalesNavigatorCrawlAttempt(session, attempt, options, context)
|
|
|
4611
6342
|
crawlSliceId: context?.crawlSliceId,
|
|
4612
6343
|
rawPayload: buildSalesNavigatorSliceRawPayload(attempt, {
|
|
4613
6344
|
traceId: context?.traceId ?? null,
|
|
6345
|
+
clientId: context?.clientId ?? null,
|
|
4614
6346
|
phase: "full_export_after_probe",
|
|
4615
6347
|
requestedProfiles: attempt.numberOfProfiles,
|
|
4616
6348
|
crawlJobId: context?.crawlJobId ?? null,
|
|
@@ -4709,6 +6441,8 @@ const SALES_NAVIGATOR_SPLIT_TRIGGER_RESULTS = 1500;
|
|
|
4709
6441
|
const SALES_NAVIGATOR_FILTER_IMPACT_MIN_OBSERVATIONS = 3;
|
|
4710
6442
|
let salesNavigatorFilterImpactModel = null;
|
|
4711
6443
|
let salesNavigatorFilterImpactLoaded = false;
|
|
6444
|
+
let linkedInProfileHitCache = null;
|
|
6445
|
+
let linkedInProfileHitCacheLoaded = false;
|
|
4712
6446
|
function getSalesprompterConfigDir() {
|
|
4713
6447
|
const override = process.env.SALESPROMPTER_CONFIG_DIR?.trim();
|
|
4714
6448
|
if (override !== undefined && override.length > 0) {
|
|
@@ -4719,6 +6453,76 @@ function getSalesprompterConfigDir() {
|
|
|
4719
6453
|
function getSalesNavigatorFilterImpactPath() {
|
|
4720
6454
|
return path.join(getSalesprompterConfigDir(), "salesnav-filter-impact.json");
|
|
4721
6455
|
}
|
|
6456
|
+
function getLinkedInProfileHitCachePath() {
|
|
6457
|
+
return path.join(getSalesprompterConfigDir(), "linkedin-profile-hits.json");
|
|
6458
|
+
}
|
|
6459
|
+
function buildLinkedInProfileHitCacheKeys(params) {
|
|
6460
|
+
const keys = new Set();
|
|
6461
|
+
const normalizedName = normalizeLooseMatchText(params.fullName);
|
|
6462
|
+
const normalizedCompany = normalizeLooseMatchText(params.companyName);
|
|
6463
|
+
const normalizedEmail = normalizeLookupWhitespace(params.email);
|
|
6464
|
+
const trustedEmail = normalizedEmail && !isSyntheticLinkedInLookupEmail(normalizedEmail) ? normalizedEmail.toLowerCase() : "";
|
|
6465
|
+
const contactId = normalizeLinkedInLookupField(params.contactId);
|
|
6466
|
+
if (contactId && !/^[1-9]\d?$/.test(contactId)) {
|
|
6467
|
+
keys.add(`contact:${contactId}`);
|
|
6468
|
+
}
|
|
6469
|
+
if (normalizedName && normalizedCompany && trustedEmail) {
|
|
6470
|
+
keys.add(`identity:${normalizedName}|${normalizedCompany}|${trustedEmail}`);
|
|
6471
|
+
}
|
|
6472
|
+
if (normalizedName && normalizedCompany) {
|
|
6473
|
+
keys.add(`identity:${normalizedName}|${normalizedCompany}`);
|
|
6474
|
+
}
|
|
6475
|
+
return Array.from(keys);
|
|
6476
|
+
}
|
|
6477
|
+
async function loadLinkedInProfileHitCache() {
|
|
6478
|
+
if (linkedInProfileHitCacheLoaded) {
|
|
6479
|
+
return linkedInProfileHitCache;
|
|
6480
|
+
}
|
|
6481
|
+
linkedInProfileHitCacheLoaded = true;
|
|
6482
|
+
try {
|
|
6483
|
+
const content = await readFile(getLinkedInProfileHitCachePath(), "utf8");
|
|
6484
|
+
const parsed = JSON.parse(content);
|
|
6485
|
+
if (parsed?.version === 1 && parsed.entries && typeof parsed.entries === "object") {
|
|
6486
|
+
linkedInProfileHitCache = parsed;
|
|
6487
|
+
}
|
|
6488
|
+
}
|
|
6489
|
+
catch {
|
|
6490
|
+
linkedInProfileHitCache = null;
|
|
6491
|
+
}
|
|
6492
|
+
return linkedInProfileHitCache;
|
|
6493
|
+
}
|
|
6494
|
+
async function persistLinkedInProfileHitCache() {
|
|
6495
|
+
if (!linkedInProfileHitCache) {
|
|
6496
|
+
return;
|
|
6497
|
+
}
|
|
6498
|
+
const filePath = getLinkedInProfileHitCachePath();
|
|
6499
|
+
await mkdir(path.dirname(filePath), { recursive: true });
|
|
6500
|
+
await writeFile(filePath, `${JSON.stringify(linkedInProfileHitCache, null, 2)}\n`, "utf8");
|
|
6501
|
+
}
|
|
6502
|
+
function upsertLinkedInProfileHitCacheEntry(params) {
|
|
6503
|
+
if (!params.linkedinUrl && !params.salesNavProfileUrl && !params.linkedinCompanyUrl && !params.salesNavCompanyUrl) {
|
|
6504
|
+
return;
|
|
6505
|
+
}
|
|
6506
|
+
if (!linkedInProfileHitCache) {
|
|
6507
|
+
linkedInProfileHitCache = {
|
|
6508
|
+
version: 1,
|
|
6509
|
+
updatedAt: new Date().toISOString(),
|
|
6510
|
+
entries: {}
|
|
6511
|
+
};
|
|
6512
|
+
}
|
|
6513
|
+
const updatedAt = new Date().toISOString();
|
|
6514
|
+
linkedInProfileHitCache.updatedAt = updatedAt;
|
|
6515
|
+
const entry = {
|
|
6516
|
+
linkedinUrl: params.linkedinUrl,
|
|
6517
|
+
salesNavProfileUrl: params.salesNavProfileUrl,
|
|
6518
|
+
linkedinCompanyUrl: params.linkedinCompanyUrl,
|
|
6519
|
+
salesNavCompanyUrl: params.salesNavCompanyUrl,
|
|
6520
|
+
updatedAt
|
|
6521
|
+
};
|
|
6522
|
+
for (const key of buildLinkedInProfileHitCacheKeys(params)) {
|
|
6523
|
+
linkedInProfileHitCache.entries[key] = entry;
|
|
6524
|
+
}
|
|
6525
|
+
}
|
|
4722
6526
|
async function loadSalesNavigatorFilterImpactModel() {
|
|
4723
6527
|
if (salesNavigatorFilterImpactLoaded) {
|
|
4724
6528
|
return salesNavigatorFilterImpactModel;
|
|
@@ -4991,6 +6795,7 @@ async function processSalesNavigatorClaimedCrawlSlice(session, jobId, slice, opt
|
|
|
4991
6795
|
}, {
|
|
4992
6796
|
crawlJobId: jobId,
|
|
4993
6797
|
crawlSliceId: slice.id,
|
|
6798
|
+
clientId: options.clientId ?? null,
|
|
4994
6799
|
traceId: options.traceId
|
|
4995
6800
|
});
|
|
4996
6801
|
const reported = await reportSalesNavigatorCrawlSlice(currentSession, jobId, {
|
|
@@ -5267,6 +7072,7 @@ async function executeSalesNavigatorCrawlJob(session, jobId, options) {
|
|
|
5267
7072
|
agentBusyWaitSeconds: options.agentBusyWaitSeconds,
|
|
5268
7073
|
agentBusyMaxWaits: options.agentBusyMaxWaits,
|
|
5269
7074
|
claimedSlices: claimedSliceNumber,
|
|
7075
|
+
clientId: options.clientId ?? null,
|
|
5270
7076
|
traceId: options.traceId,
|
|
5271
7077
|
logger: options.logger
|
|
5272
7078
|
}).then((value) => ({ slot, value })));
|
|
@@ -6230,6 +8036,7 @@ program
|
|
|
6230
8036
|
const companyCleaningMode = resolveCompanyCleaningMode(String(options.companyCleaning ?? process.env.SALESPROMPTER_COMPANY_CLEANING_MODE ?? "basic"));
|
|
6231
8037
|
const cleanedCompanyMap = await buildCompanyNameCleaningMap(rows, companyCleaningMode);
|
|
6232
8038
|
const contacts = toLinkedInUrlLookupContacts(rows, cleanedCompanyMap);
|
|
8039
|
+
await loadLinkedInProfileHitCache();
|
|
6233
8040
|
if (options.dryRun) {
|
|
6234
8041
|
const payload = {
|
|
6235
8042
|
status: "ok",
|
|
@@ -6245,79 +8052,70 @@ program
|
|
|
6245
8052
|
printOutput(payload);
|
|
6246
8053
|
return;
|
|
6247
8054
|
}
|
|
6248
|
-
const
|
|
6249
|
-
|
|
6250
|
-
|
|
8055
|
+
const orgId = String(options.orgId ?? "").trim() || undefined;
|
|
8056
|
+
const strategy = resolveLinkedInBulkStrategyConfig({
|
|
8057
|
+
rowCount: rows.length,
|
|
8058
|
+
timeoutMs
|
|
6251
8059
|
});
|
|
6252
|
-
|
|
6253
|
-
|
|
6254
|
-
|
|
6255
|
-
|
|
6256
|
-
|
|
6257
|
-
|
|
6258
|
-
|
|
6259
|
-
|
|
6260
|
-
|
|
6261
|
-
|
|
6262
|
-
|
|
6263
|
-
|
|
6264
|
-
|
|
6265
|
-
|
|
6266
|
-
|
|
6267
|
-
|
|
6268
|
-
|
|
6269
|
-
|
|
6270
|
-
|
|
6271
|
-
|
|
6272
|
-
|
|
6273
|
-
|
|
6274
|
-
|
|
6275
|
-
|
|
6276
|
-
|
|
6277
|
-
|
|
6278
|
-
|
|
6279
|
-
|
|
6280
|
-
|
|
6281
|
-
|
|
6282
|
-
|
|
6283
|
-
|
|
8060
|
+
const useSalesNavRowPrepass = !strategy.bulkMode &&
|
|
8061
|
+
shouldUseSalesNavRowPrepass({
|
|
8062
|
+
rows,
|
|
8063
|
+
orgId
|
|
8064
|
+
});
|
|
8065
|
+
const enrichedRows = useSalesNavRowPrepass
|
|
8066
|
+
? await resolveLinkedInUrlsFromSalesNavRows({
|
|
8067
|
+
rows,
|
|
8068
|
+
orgId
|
|
8069
|
+
})
|
|
8070
|
+
: rows.map((row, index) => ({
|
|
8071
|
+
clientId: row.clientId,
|
|
8072
|
+
fullName: row.fullName,
|
|
8073
|
+
companyName: row.companyName,
|
|
8074
|
+
linkedinUrl: null,
|
|
8075
|
+
salesNavProfileUrl: null,
|
|
8076
|
+
linkedinCompanyUrl: row.linkedinCompanyUrl?.trim() || null,
|
|
8077
|
+
salesNavCompanyUrl: null,
|
|
8078
|
+
found: false,
|
|
8079
|
+
companyFound: Boolean(row.linkedinCompanyUrl?.trim()),
|
|
8080
|
+
contactId: normalizeLinkedInLookupField(row.contactId) ?? `${index + 1}`,
|
|
8081
|
+
source: null,
|
|
8082
|
+
companySource: row.linkedinCompanyUrl?.trim() ? "input" : null,
|
|
8083
|
+
matchedFullName: null,
|
|
8084
|
+
matchedCompanyName: null,
|
|
8085
|
+
matchedTitle: null,
|
|
8086
|
+
matchedOrgId: null,
|
|
8087
|
+
matchedCompanyEmployeeCount: null
|
|
8088
|
+
}));
|
|
8089
|
+
const contactById = new Map(contacts.filter((contact) => !contact.isVariation).map((contact) => [contact.contact_id, contact]));
|
|
8090
|
+
for (const row of enrichedRows) {
|
|
8091
|
+
if (row.found) {
|
|
8092
|
+
continue;
|
|
6284
8093
|
}
|
|
6285
|
-
|
|
6286
|
-
|
|
6287
|
-
|
|
6288
|
-
|
|
6289
|
-
|
|
6290
|
-
|
|
6291
|
-
|
|
6292
|
-
|
|
6293
|
-
|
|
6294
|
-
|
|
6295
|
-
|
|
6296
|
-
|
|
6297
|
-
throw new Error(`LinkedIn enrichment workflow returned ${workflow.response.status}: ${workflow.bodyText.slice(0, 300)}`);
|
|
6298
|
-
}
|
|
6299
|
-
linkedInUrlByContactId = normalizeWorkflowLinkedInUrlResult({
|
|
6300
|
-
parsedBody: workflow.parsedBody,
|
|
6301
|
-
contacts: directContacts
|
|
6302
|
-
});
|
|
6303
|
-
for (const row of enrichedRows) {
|
|
6304
|
-
if (row.found)
|
|
6305
|
-
continue;
|
|
6306
|
-
const profile = linkedInUrlByContactId.get(row.contactId);
|
|
6307
|
-
if (profile?.linkedinUrl) {
|
|
6308
|
-
row.linkedinUrl = profile.linkedinUrl;
|
|
6309
|
-
row.salesNavProfileUrl = profile.salesNavProfileUrl ?? row.salesNavProfileUrl ?? null;
|
|
6310
|
-
row.linkedinCompanyUrl = profile.linkedinCompanyUrl ?? row.linkedinCompanyUrl ?? null;
|
|
6311
|
-
row.salesNavCompanyUrl = profile.salesNavCompanyUrl ?? row.salesNavCompanyUrl ?? null;
|
|
6312
|
-
row.found = true;
|
|
6313
|
-
row.companyFound = Boolean(row.linkedinCompanyUrl || row.salesNavCompanyUrl);
|
|
6314
|
-
row.source = "workflow";
|
|
6315
|
-
row.companySource =
|
|
6316
|
-
row.linkedinCompanyUrl || row.salesNavCompanyUrl ? "workflow" : row.companySource ?? null;
|
|
6317
|
-
}
|
|
6318
|
-
}
|
|
8094
|
+
const contact = contactById.get(row.contactId);
|
|
8095
|
+
const cacheKeys = buildLinkedInProfileHitCacheKeys({
|
|
8096
|
+
fullName: row.fullName,
|
|
8097
|
+
companyName: row.companyName,
|
|
8098
|
+
email: contact?.email,
|
|
8099
|
+
contactId: row.contactId
|
|
8100
|
+
});
|
|
8101
|
+
const cachedEntry = cacheKeys
|
|
8102
|
+
.map((key) => linkedInProfileHitCache?.entries[key] ?? null)
|
|
8103
|
+
.find(Boolean);
|
|
8104
|
+
if (!cachedEntry) {
|
|
8105
|
+
continue;
|
|
6319
8106
|
}
|
|
8107
|
+
row.linkedinUrl = cachedEntry.linkedinUrl ?? row.linkedinUrl ?? null;
|
|
8108
|
+
row.salesNavProfileUrl = cachedEntry.salesNavProfileUrl ?? row.salesNavProfileUrl ?? null;
|
|
8109
|
+
row.linkedinCompanyUrl = cachedEntry.linkedinCompanyUrl ?? row.linkedinCompanyUrl ?? null;
|
|
8110
|
+
row.salesNavCompanyUrl = cachedEntry.salesNavCompanyUrl ?? row.salesNavCompanyUrl ?? null;
|
|
8111
|
+
row.found = Boolean(row.linkedinUrl || row.salesNavProfileUrl);
|
|
8112
|
+
row.companyFound = Boolean(row.linkedinCompanyUrl || row.salesNavCompanyUrl);
|
|
8113
|
+
row.source = row.found ? "cache" : row.source;
|
|
8114
|
+
row.companySource =
|
|
8115
|
+
row.companyFound && !row.companySource ? "cache" : row.companySource;
|
|
6320
8116
|
}
|
|
8117
|
+
let directAttempted = false;
|
|
8118
|
+
let workflowAttempted = false;
|
|
6321
8119
|
const parsedClientIds = Array.from(new Set(rows
|
|
6322
8120
|
.map((row) => Number(row.clientId))
|
|
6323
8121
|
.filter((value) => Number.isFinite(value) && value > 0)));
|
|
@@ -6364,37 +8162,265 @@ program
|
|
|
6364
8162
|
writeProgress(`Skipping app-backed company enrichment: ${error instanceof Error ? error.message : String(error)}`);
|
|
6365
8163
|
}
|
|
6366
8164
|
}
|
|
6367
|
-
|
|
6368
|
-
|
|
6369
|
-
|
|
6370
|
-
|
|
8165
|
+
const contactsMissingCompanyUrl = contacts.filter((contact) => !contact.isVariation &&
|
|
8166
|
+
enrichedRows.some((row) => row.contactId === contact.contact_id && !row.linkedinCompanyUrl));
|
|
8167
|
+
if (contactsMissingCompanyUrl.length > 0) {
|
|
8168
|
+
const companyUrlByContactId = await resolveLinkedInCompanyUrlsForContacts({
|
|
8169
|
+
contacts: contactsMissingCompanyUrl,
|
|
8170
|
+
timeoutMs: Math.min(timeoutMs, 15_000),
|
|
8171
|
+
concurrency: strategy.bulkMode ? 6 : 3,
|
|
8172
|
+
overallBudgetMs: strategy.bulkMode ? 20_000 : 10_000
|
|
6371
8173
|
});
|
|
6372
|
-
const companyByContactId = new Map(companyResult.contacts.map((contact) => [
|
|
6373
|
-
contact.contact_id,
|
|
6374
|
-
{
|
|
6375
|
-
linkedinCompanyUrl: contact.linkedin_company_url ?? null,
|
|
6376
|
-
salesNavCompanyUrl: contact.sales_nav_company_url ?? null,
|
|
6377
|
-
matchedCompanyName: contact.matched_company_name ?? null,
|
|
6378
|
-
matchedCompanyEmployeeCount: contact.matched_company_employee_count ?? null
|
|
6379
|
-
}
|
|
6380
|
-
]));
|
|
6381
8174
|
for (const row of enrichedRows) {
|
|
6382
|
-
|
|
6383
|
-
if (!company || row.linkedinCompanyUrl) {
|
|
8175
|
+
if (row.linkedinCompanyUrl) {
|
|
6384
8176
|
continue;
|
|
6385
8177
|
}
|
|
6386
|
-
|
|
6387
|
-
|
|
6388
|
-
|
|
6389
|
-
|
|
6390
|
-
|
|
6391
|
-
row.
|
|
6392
|
-
row.
|
|
6393
|
-
company.matchedCompanyEmployeeCount ?? row.matchedCompanyEmployeeCount ?? null;
|
|
8178
|
+
const linkedinCompanyUrl = companyUrlByContactId.get(row.contactId);
|
|
8179
|
+
if (!linkedinCompanyUrl) {
|
|
8180
|
+
continue;
|
|
8181
|
+
}
|
|
8182
|
+
row.linkedinCompanyUrl = linkedinCompanyUrl;
|
|
8183
|
+
row.companyFound = true;
|
|
8184
|
+
row.companySource = "web-search";
|
|
6394
8185
|
}
|
|
6395
8186
|
}
|
|
6396
|
-
|
|
6397
|
-
|
|
8187
|
+
const missingRows = enrichedRows.filter((row) => !row.found);
|
|
8188
|
+
const useDirectPeopleLookup = !strategy.bulkMode &&
|
|
8189
|
+
shouldUseDirectPeopleLookup({
|
|
8190
|
+
rowCount: missingRows.length
|
|
8191
|
+
});
|
|
8192
|
+
const useWorkflowPeopleLookup = !strategy.bulkMode &&
|
|
8193
|
+
shouldUseWorkflowPeopleLookup({
|
|
8194
|
+
rowCount: missingRows.length
|
|
8195
|
+
});
|
|
8196
|
+
if (missingRows.length > 0) {
|
|
8197
|
+
const rowByContactId = new Map(enrichedRows.map((row) => [row.contactId, row]));
|
|
8198
|
+
const directContacts = contacts
|
|
8199
|
+
.filter((contact) => missingRows.some((row) => row.contactId === contact.contact_id))
|
|
8200
|
+
.map((contact) => {
|
|
8201
|
+
const row = rowByContactId.get(contact.contact_id);
|
|
8202
|
+
if (!row) {
|
|
8203
|
+
return contact;
|
|
8204
|
+
}
|
|
8205
|
+
return {
|
|
8206
|
+
...contact,
|
|
8207
|
+
linkedinCompanyUrl: row.linkedinCompanyUrl ?? contact.linkedinCompanyUrl,
|
|
8208
|
+
companyNameOriginal: row.matchedCompanyName ?? contact.companyNameOriginal,
|
|
8209
|
+
companyName: row.matchedCompanyName && normalizeLookupCompanyForSearch(row.matchedCompanyName)
|
|
8210
|
+
? normalizeLookupCompanyForSearch(row.matchedCompanyName)
|
|
8211
|
+
: contact.companyName
|
|
8212
|
+
};
|
|
8213
|
+
});
|
|
8214
|
+
let linkedInUrlByContactId = new Map();
|
|
8215
|
+
if (useDirectPeopleLookup) {
|
|
8216
|
+
try {
|
|
8217
|
+
directAttempted = true;
|
|
8218
|
+
const result = await invokeLinkedInUrlEnrichmentDirect({
|
|
8219
|
+
contacts: directContacts,
|
|
8220
|
+
timeoutMs
|
|
8221
|
+
});
|
|
8222
|
+
const directCompanyContextByKey = new Map((result.companyContexts ?? []).map((context) => [context.normalizedCompanyKey, context]));
|
|
8223
|
+
linkedInUrlByContactId = new Map(result.contacts.map((contact) => [
|
|
8224
|
+
contact.contact_id,
|
|
8225
|
+
{
|
|
8226
|
+
linkedinUrl: contact.linkedin_url ?? null,
|
|
8227
|
+
salesNavProfileUrl: contact.sales_nav_profile_url ?? null,
|
|
8228
|
+
linkedinCompanyUrl: null,
|
|
8229
|
+
salesNavCompanyUrl: null,
|
|
8230
|
+
matchedFullName: contact.matched_full_name ?? null,
|
|
8231
|
+
matchedCompanyName: contact.matched_company_name ?? null,
|
|
8232
|
+
matchedTitle: contact.matched_title ?? null
|
|
8233
|
+
}
|
|
8234
|
+
]));
|
|
8235
|
+
for (const row of enrichedRows) {
|
|
8236
|
+
if (row.found)
|
|
8237
|
+
continue;
|
|
8238
|
+
const profile = linkedInUrlByContactId.get(row.contactId);
|
|
8239
|
+
if (profile?.linkedinUrl) {
|
|
8240
|
+
row.linkedinUrl = profile.linkedinUrl;
|
|
8241
|
+
row.salesNavProfileUrl = profile.salesNavProfileUrl ?? row.salesNavProfileUrl ?? null;
|
|
8242
|
+
row.found = true;
|
|
8243
|
+
row.source = "linkedin-direct";
|
|
8244
|
+
row.matchedFullName = profile.matchedFullName ?? row.matchedFullName ?? null;
|
|
8245
|
+
row.matchedCompanyName = profile.matchedCompanyName ?? row.matchedCompanyName ?? null;
|
|
8246
|
+
row.matchedTitle = profile.matchedTitle ?? row.matchedTitle ?? null;
|
|
8247
|
+
}
|
|
8248
|
+
const directContact = directContacts.find((candidate) => candidate.contact_id === row.contactId && !candidate.isVariation);
|
|
8249
|
+
const companyContext = directContact
|
|
8250
|
+
? directCompanyContextByKey.get(buildDirectCompanyContextKey(directContact))
|
|
8251
|
+
: null;
|
|
8252
|
+
if (companyContext && !row.linkedinCompanyUrl) {
|
|
8253
|
+
row.linkedinCompanyUrl = companyContext.linkedinCompanyUrl ?? row.linkedinCompanyUrl ?? null;
|
|
8254
|
+
row.salesNavCompanyUrl = companyContext.salesNavCompanyUrl ?? row.salesNavCompanyUrl ?? null;
|
|
8255
|
+
row.companyFound = Boolean(row.linkedinCompanyUrl || row.salesNavCompanyUrl);
|
|
8256
|
+
row.companySource =
|
|
8257
|
+
row.linkedinCompanyUrl || row.salesNavCompanyUrl ? "linkedin-direct" : row.companySource ?? null;
|
|
8258
|
+
row.matchedCompanyName = companyContext.matchedCompanyName ?? row.matchedCompanyName ?? null;
|
|
8259
|
+
row.matchedCompanyEmployeeCount =
|
|
8260
|
+
companyContext.matchedCompanyEmployeeCount ?? row.matchedCompanyEmployeeCount ?? null;
|
|
8261
|
+
}
|
|
8262
|
+
}
|
|
8263
|
+
const contactsStillMissingCompany = contacts.filter((contact) => !contact.isVariation &&
|
|
8264
|
+
enrichedRows.some((row) => row.contactId === contact.contact_id && !row.linkedinCompanyUrl && !row.salesNavCompanyUrl));
|
|
8265
|
+
if (contactsStillMissingCompany.length > 0) {
|
|
8266
|
+
const companyResult = await invokeLinkedInCompanyEnrichmentDirect({
|
|
8267
|
+
contacts: contactsStillMissingCompany,
|
|
8268
|
+
timeoutMs,
|
|
8269
|
+
precomputedContexts: result.companyContexts
|
|
8270
|
+
});
|
|
8271
|
+
const companyByContactId = new Map(companyResult.contacts.map((contact) => [
|
|
8272
|
+
contact.contact_id,
|
|
8273
|
+
{
|
|
8274
|
+
linkedinCompanyUrl: contact.linkedin_company_url ?? null,
|
|
8275
|
+
salesNavCompanyUrl: contact.sales_nav_company_url ?? null,
|
|
8276
|
+
matchedCompanyName: contact.matched_company_name ?? null,
|
|
8277
|
+
matchedCompanyEmployeeCount: contact.matched_company_employee_count ?? null
|
|
8278
|
+
}
|
|
8279
|
+
]));
|
|
8280
|
+
for (const row of enrichedRows) {
|
|
8281
|
+
const company = companyByContactId.get(row.contactId);
|
|
8282
|
+
if (!company || row.linkedinCompanyUrl) {
|
|
8283
|
+
continue;
|
|
8284
|
+
}
|
|
8285
|
+
row.linkedinCompanyUrl = company.linkedinCompanyUrl;
|
|
8286
|
+
row.salesNavCompanyUrl = company.salesNavCompanyUrl ?? row.salesNavCompanyUrl ?? null;
|
|
8287
|
+
row.companyFound = Boolean(company.linkedinCompanyUrl || company.salesNavCompanyUrl);
|
|
8288
|
+
row.companySource =
|
|
8289
|
+
company.linkedinCompanyUrl || company.salesNavCompanyUrl ? "linkedin-direct" : row.companySource ?? null;
|
|
8290
|
+
row.matchedCompanyName = company.matchedCompanyName ?? row.matchedCompanyName ?? null;
|
|
8291
|
+
row.matchedCompanyEmployeeCount =
|
|
8292
|
+
company.matchedCompanyEmployeeCount ?? row.matchedCompanyEmployeeCount ?? null;
|
|
8293
|
+
}
|
|
8294
|
+
}
|
|
8295
|
+
}
|
|
8296
|
+
catch (error) {
|
|
8297
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
8298
|
+
if (!/Missing LinkedIn direct lookup session/i.test(message)) {
|
|
8299
|
+
throw error;
|
|
8300
|
+
}
|
|
8301
|
+
}
|
|
8302
|
+
}
|
|
8303
|
+
const stillMissingAfterDirect = enrichedRows.filter((row) => !row.found);
|
|
8304
|
+
const contactsStillMissing = directContacts.filter((contact) => stillMissingAfterDirect.some((row) => row.contactId === contact.contact_id));
|
|
8305
|
+
if (contactsStillMissing.length > 0 && useWorkflowPeopleLookup) {
|
|
8306
|
+
workflowAttempted = true;
|
|
8307
|
+
try {
|
|
8308
|
+
const workflow = await invokeLinkedInUrlEnrichmentWorkflow({
|
|
8309
|
+
contacts: contactsStillMissing,
|
|
8310
|
+
externalUserId: orgId || sessionOrgId || "cli_direct_lookup",
|
|
8311
|
+
timeoutMs: Math.min(timeoutMs, strategy.workflowStageBudgetMs)
|
|
8312
|
+
});
|
|
8313
|
+
if (!workflow.response.ok) {
|
|
8314
|
+
throw new Error(`LinkedIn enrichment workflow returned ${workflow.response.status}: ${workflow.bodyText.slice(0, 300)}`);
|
|
8315
|
+
}
|
|
8316
|
+
linkedInUrlByContactId = normalizeWorkflowLinkedInUrlResult({
|
|
8317
|
+
parsedBody: workflow.parsedBody,
|
|
8318
|
+
contacts: contactsStillMissing
|
|
8319
|
+
});
|
|
8320
|
+
for (const row of enrichedRows) {
|
|
8321
|
+
if (row.found)
|
|
8322
|
+
continue;
|
|
8323
|
+
const profile = linkedInUrlByContactId.get(row.contactId);
|
|
8324
|
+
if (profile?.linkedinUrl) {
|
|
8325
|
+
row.linkedinUrl = profile.linkedinUrl;
|
|
8326
|
+
row.salesNavProfileUrl = profile.salesNavProfileUrl ?? row.salesNavProfileUrl ?? null;
|
|
8327
|
+
row.linkedinCompanyUrl = profile.linkedinCompanyUrl ?? row.linkedinCompanyUrl ?? null;
|
|
8328
|
+
row.salesNavCompanyUrl = profile.salesNavCompanyUrl ?? row.salesNavCompanyUrl ?? null;
|
|
8329
|
+
row.found = true;
|
|
8330
|
+
row.companyFound = Boolean(row.linkedinCompanyUrl || row.salesNavCompanyUrl);
|
|
8331
|
+
row.source = "workflow";
|
|
8332
|
+
row.companySource =
|
|
8333
|
+
row.linkedinCompanyUrl || row.salesNavCompanyUrl ? "workflow" : row.companySource ?? null;
|
|
8334
|
+
}
|
|
8335
|
+
}
|
|
8336
|
+
}
|
|
8337
|
+
catch (error) {
|
|
8338
|
+
writeProgress(`Skipping workflow profile enrichment: ${error instanceof Error ? error.message : String(error)}`);
|
|
8339
|
+
}
|
|
8340
|
+
}
|
|
8341
|
+
const serperContacts = directContacts.filter((contact) => enrichedRows.some((row) => row.contactId === contact.contact_id && !row.found));
|
|
8342
|
+
if (strategy.bulkMode && serperContacts.length > 0) {
|
|
8343
|
+
writeProgress(`Using bulk profile resolution strategy for ${serperContacts.length} remaining contacts.`);
|
|
8344
|
+
}
|
|
8345
|
+
const serperResults = await resolveSerperLinkedInProfilesInParallel({
|
|
8346
|
+
contacts: serperContacts.filter((contact) => !contact.isVariation),
|
|
8347
|
+
timeoutMs,
|
|
8348
|
+
concurrency: Math.min(strategy.serperConcurrency, serperContacts.length || 1),
|
|
8349
|
+
maxQueries: strategy.serperMaxQueries,
|
|
8350
|
+
overallBudgetMs: strategy.serperStageBudgetMs
|
|
8351
|
+
});
|
|
8352
|
+
for (const row of enrichedRows) {
|
|
8353
|
+
if (row.found)
|
|
8354
|
+
continue;
|
|
8355
|
+
const linkedinUrl = serperResults.get(row.contactId);
|
|
8356
|
+
if (!linkedinUrl)
|
|
8357
|
+
continue;
|
|
8358
|
+
row.linkedinUrl = linkedinUrl;
|
|
8359
|
+
row.found = true;
|
|
8360
|
+
row.source = "web-search";
|
|
8361
|
+
}
|
|
8362
|
+
const stillMissingAfterSerper = enrichedRows.filter((row) => !row.found);
|
|
8363
|
+
if (shouldAttemptBulkDirectProfileLookup({
|
|
8364
|
+
strategy,
|
|
8365
|
+
unresolvedRowCount: stillMissingAfterSerper.length
|
|
8366
|
+
})) {
|
|
8367
|
+
const bulkDirectCandidates = rankContactsForBulkDirectProfileLookup({
|
|
8368
|
+
contacts: directContacts.filter((contact) => stillMissingAfterSerper.some((row) => row.contactId === contact.contact_id)),
|
|
8369
|
+
rowsByContactId: rowByContactId,
|
|
8370
|
+
limit: strategy.bulkDirectProfileMaxRows
|
|
8371
|
+
});
|
|
8372
|
+
if (bulkDirectCandidates.length > 0) {
|
|
8373
|
+
writeProgress(`Using bulk direct profile follow-up for ${bulkDirectCandidates.length} high-signal unresolved contacts.`);
|
|
8374
|
+
try {
|
|
8375
|
+
directAttempted = true;
|
|
8376
|
+
const result = await invokeLinkedInUrlEnrichmentDirect({
|
|
8377
|
+
contacts: bulkDirectCandidates,
|
|
8378
|
+
timeoutMs: strategy.bulkDirectProfileTimeoutMs,
|
|
8379
|
+
perAttemptTimeoutMs: Math.min(strategy.bulkDirectProfileTimeoutMs, 2_500),
|
|
8380
|
+
perContactBudgetMs: strategy.bulkDirectProfileTimeoutMs
|
|
8381
|
+
});
|
|
8382
|
+
const directCompanyContextByKey = new Map((result.companyContexts ?? []).map((context) => [context.normalizedCompanyKey, context]));
|
|
8383
|
+
const bulkDirectByContactId = new Map(result.contacts.map((contact) => [
|
|
8384
|
+
contact.contact_id,
|
|
8385
|
+
{
|
|
8386
|
+
linkedinUrl: contact.linkedin_url ?? null,
|
|
8387
|
+
salesNavProfileUrl: contact.sales_nav_profile_url ?? null
|
|
8388
|
+
}
|
|
8389
|
+
]));
|
|
8390
|
+
for (const row of enrichedRows) {
|
|
8391
|
+
if (row.found)
|
|
8392
|
+
continue;
|
|
8393
|
+
const profile = bulkDirectByContactId.get(row.contactId);
|
|
8394
|
+
if (profile?.linkedinUrl) {
|
|
8395
|
+
row.linkedinUrl = profile.linkedinUrl;
|
|
8396
|
+
row.salesNavProfileUrl = profile.salesNavProfileUrl ?? row.salesNavProfileUrl ?? null;
|
|
8397
|
+
row.found = true;
|
|
8398
|
+
row.source = "linkedin-direct";
|
|
8399
|
+
}
|
|
8400
|
+
const directContact = bulkDirectCandidates.find((candidate) => candidate.contact_id === row.contactId && !candidate.isVariation);
|
|
8401
|
+
const companyContext = directContact
|
|
8402
|
+
? directCompanyContextByKey.get(buildDirectCompanyContextKey(directContact))
|
|
8403
|
+
: null;
|
|
8404
|
+
if (companyContext && !row.linkedinCompanyUrl) {
|
|
8405
|
+
row.linkedinCompanyUrl = companyContext.linkedinCompanyUrl ?? row.linkedinCompanyUrl ?? null;
|
|
8406
|
+
row.salesNavCompanyUrl = companyContext.salesNavCompanyUrl ?? row.salesNavCompanyUrl ?? null;
|
|
8407
|
+
row.companyFound = Boolean(row.linkedinCompanyUrl || row.salesNavCompanyUrl);
|
|
8408
|
+
row.companySource =
|
|
8409
|
+
row.linkedinCompanyUrl || row.salesNavCompanyUrl ? "linkedin-direct" : row.companySource ?? null;
|
|
8410
|
+
row.matchedCompanyName = companyContext.matchedCompanyName ?? row.matchedCompanyName ?? null;
|
|
8411
|
+
row.matchedCompanyEmployeeCount =
|
|
8412
|
+
companyContext.matchedCompanyEmployeeCount ?? row.matchedCompanyEmployeeCount ?? null;
|
|
8413
|
+
}
|
|
8414
|
+
}
|
|
8415
|
+
}
|
|
8416
|
+
catch (error) {
|
|
8417
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
8418
|
+
if (!/Missing LinkedIn direct lookup session/i.test(message)) {
|
|
8419
|
+
writeProgress(`Skipping bulk direct profile follow-up: ${message}`);
|
|
8420
|
+
}
|
|
8421
|
+
}
|
|
8422
|
+
}
|
|
8423
|
+
}
|
|
6398
8424
|
}
|
|
6399
8425
|
const payload = {
|
|
6400
8426
|
status: "ok",
|
|
@@ -6404,8 +8430,23 @@ program
|
|
|
6404
8430
|
companiesFound: enrichedRows.filter((row) => row.companyFound).length,
|
|
6405
8431
|
directAttempted,
|
|
6406
8432
|
workflowAttempted,
|
|
8433
|
+
bulkMode: strategy.bulkMode,
|
|
6407
8434
|
rows: enrichedRows
|
|
6408
8435
|
};
|
|
8436
|
+
for (const row of enrichedRows) {
|
|
8437
|
+
const contact = contactById.get(row.contactId);
|
|
8438
|
+
upsertLinkedInProfileHitCacheEntry({
|
|
8439
|
+
fullName: row.fullName,
|
|
8440
|
+
companyName: row.companyName,
|
|
8441
|
+
email: contact?.email,
|
|
8442
|
+
contactId: row.contactId,
|
|
8443
|
+
linkedinUrl: row.linkedinUrl ?? null,
|
|
8444
|
+
salesNavProfileUrl: row.salesNavProfileUrl ?? null,
|
|
8445
|
+
linkedinCompanyUrl: row.linkedinCompanyUrl ?? null,
|
|
8446
|
+
salesNavCompanyUrl: row.salesNavCompanyUrl ?? null
|
|
8447
|
+
});
|
|
8448
|
+
}
|
|
8449
|
+
await persistLinkedInProfileHitCache();
|
|
6409
8450
|
if (options.out) {
|
|
6410
8451
|
await writeJsonFile(options.out, payload);
|
|
6411
8452
|
}
|
|
@@ -6743,12 +8784,14 @@ program
|
|
|
6743
8784
|
});
|
|
6744
8785
|
program
|
|
6745
8786
|
.command("leads:generate")
|
|
6746
|
-
.description("Generate leads
|
|
8787
|
+
.description("Generate leads from your Salesprompter workspace when authenticated, or from fallback seeds.")
|
|
6747
8788
|
.requiredOption("--icp <path>", "Path to ICP JSON")
|
|
6748
8789
|
.option("--count <number>", "Number of leads to generate", "10")
|
|
6749
8790
|
.option("--domain <domain>", "Target a specific company domain like company.com")
|
|
6750
8791
|
.option("--company-domain <domain>", "Deprecated alias for --domain")
|
|
6751
8792
|
.option("--company-name <name>", "Optional company name override for a targeted domain")
|
|
8793
|
+
.option("--linkedin-company-page <url>", "LinkedIn company page to target when the domain is unknown")
|
|
8794
|
+
.option("--source <source>", "auto|workspace|fallback", "auto")
|
|
6752
8795
|
.requiredOption("--out <path>", "Output file path")
|
|
6753
8796
|
.action(async (options) => {
|
|
6754
8797
|
const icp = await readJsonFile(options.icp, IcpSchema);
|
|
@@ -6756,9 +8799,15 @@ program
|
|
|
6756
8799
|
const domain = options.domain ?? options.companyDomain;
|
|
6757
8800
|
const target = {
|
|
6758
8801
|
companyDomain: domain,
|
|
6759
|
-
companyName: options.companyName
|
|
8802
|
+
companyName: options.companyName,
|
|
8803
|
+
linkedinCompanyPage: options.linkedinCompanyPage
|
|
6760
8804
|
};
|
|
6761
|
-
const result = await
|
|
8805
|
+
const result = await generateLeadsForCommand({
|
|
8806
|
+
icp,
|
|
8807
|
+
count,
|
|
8808
|
+
target,
|
|
8809
|
+
source: options.source
|
|
8810
|
+
});
|
|
6762
8811
|
await writeJsonFile(options.out, result.leads);
|
|
6763
8812
|
printOutput({
|
|
6764
8813
|
status: "ok",
|
|
@@ -6803,6 +8852,8 @@ program
|
|
|
6803
8852
|
.option("--domain <domain>", "Target a specific company domain like company.com")
|
|
6804
8853
|
.option("--company-domain <domain>", "Deprecated alias for --domain")
|
|
6805
8854
|
.option("--company-name <name>", "Optional company name override for a targeted domain")
|
|
8855
|
+
.option("--linkedin-company-page <url>", "LinkedIn company page to target when the domain is unknown")
|
|
8856
|
+
.option("--source <source>", "auto|workspace|fallback", "auto")
|
|
6806
8857
|
.option("--out-prefix <path>", "Output path prefix (writes <prefix>-leads.json, <prefix>-enriched.json, <prefix>-scored.json)", "./data/leads-pipeline")
|
|
6807
8858
|
.action(async (options) => {
|
|
6808
8859
|
const icp = await readJsonFile(options.icp, IcpSchema);
|
|
@@ -6810,13 +8861,19 @@ program
|
|
|
6810
8861
|
const domain = options.domain ?? options.companyDomain;
|
|
6811
8862
|
const target = {
|
|
6812
8863
|
companyDomain: domain,
|
|
6813
|
-
companyName: options.companyName
|
|
8864
|
+
companyName: options.companyName,
|
|
8865
|
+
linkedinCompanyPage: options.linkedinCompanyPage
|
|
6814
8866
|
};
|
|
6815
8867
|
const outPrefix = String(options.outPrefix);
|
|
6816
8868
|
const leadsOut = `${outPrefix}-leads.json`;
|
|
6817
8869
|
const enrichedOut = `${outPrefix}-enriched.json`;
|
|
6818
8870
|
const scoredOut = `${outPrefix}-scored.json`;
|
|
6819
|
-
const generated = await
|
|
8871
|
+
const generated = await generateLeadsForCommand({
|
|
8872
|
+
icp,
|
|
8873
|
+
count,
|
|
8874
|
+
target,
|
|
8875
|
+
source: options.source
|
|
8876
|
+
});
|
|
6820
8877
|
await writeJsonFile(leadsOut, generated.leads);
|
|
6821
8878
|
const enriched = await enrichmentProvider.enrichLeads(generated.leads);
|
|
6822
8879
|
await writeJsonFile(enrichedOut, enriched);
|
|
@@ -7554,6 +9611,7 @@ program
|
|
|
7554
9611
|
.option("--max-results-per-search <number>", "Maximum results allowed for a sliced search", "2500")
|
|
7555
9612
|
.option("--number-of-profiles <number>", "Profiles to export per sliced query", "2500")
|
|
7556
9613
|
.option("--slice-preset <name>", "Slice preset label stored with the export runs", "human-resources-crawl")
|
|
9614
|
+
.option("--client-id <number>", "Client id used to generate and store the legacy Neon lead list projection")
|
|
7557
9615
|
.option("--max-split-depth <number>", "Maximum number of adaptive split dimensions to use", "6")
|
|
7558
9616
|
.option("--max-slices <number>", "Safety cap for total claimed slices in this invocation", "1000")
|
|
7559
9617
|
.option("--max-retries <number>", "Retries for non-splitting export failures", "3")
|
|
@@ -7572,6 +9630,7 @@ program
|
|
|
7572
9630
|
const jobId = z.string().uuid().optional().parse(options.jobId);
|
|
7573
9631
|
const maxResultsPerSearch = z.coerce.number().int().min(1).max(2500).parse(options.maxResultsPerSearch);
|
|
7574
9632
|
const numberOfProfiles = z.coerce.number().int().min(1).max(2500).parse(options.numberOfProfiles);
|
|
9633
|
+
const clientId = parseOptionalSalesNavigatorClientId(options.clientId);
|
|
7575
9634
|
const maxSplitDepth = z.coerce.number().int().min(1).max(6).parse(options.maxSplitDepth);
|
|
7576
9635
|
const maxSlices = z.coerce.number().int().min(1).max(10000).parse(options.maxSlices);
|
|
7577
9636
|
const maxRetries = z.coerce.number().int().min(0).max(5).parse(options.maxRetries);
|
|
@@ -7591,6 +9650,7 @@ program
|
|
|
7591
9650
|
jobId: jobId ?? null,
|
|
7592
9651
|
maxResultsPerSearch,
|
|
7593
9652
|
numberOfProfiles,
|
|
9653
|
+
clientId,
|
|
7594
9654
|
slicePreset: options.slicePreset,
|
|
7595
9655
|
maxSplitDepth,
|
|
7596
9656
|
maxSlices,
|
|
@@ -7691,6 +9751,7 @@ program
|
|
|
7691
9751
|
traceId: logger.traceId,
|
|
7692
9752
|
command: {
|
|
7693
9753
|
sourceQueryUrl: queryUrl,
|
|
9754
|
+
clientId,
|
|
7694
9755
|
slicePreset: options.slicePreset,
|
|
7695
9756
|
maxResultsPerSearch,
|
|
7696
9757
|
numberOfProfiles,
|
|
@@ -7712,6 +9773,7 @@ program
|
|
|
7712
9773
|
splitTrail: seed.splitTrail,
|
|
7713
9774
|
rawPayload: {
|
|
7714
9775
|
workflow: "salesnav:crawl",
|
|
9776
|
+
clientId,
|
|
7715
9777
|
traceId: logger.traceId
|
|
7716
9778
|
}
|
|
7717
9779
|
}
|
|
@@ -7751,6 +9813,7 @@ program
|
|
|
7751
9813
|
idlePollSeconds,
|
|
7752
9814
|
idleMaxPolls,
|
|
7753
9815
|
parallelExports,
|
|
9816
|
+
clientId,
|
|
7754
9817
|
traceId: logger.traceId,
|
|
7755
9818
|
logger
|
|
7756
9819
|
});
|
|
@@ -7831,6 +9894,45 @@ program
|
|
|
7831
9894
|
recentEvents
|
|
7832
9895
|
});
|
|
7833
9896
|
});
|
|
9897
|
+
program
|
|
9898
|
+
.command("phantombuster:containers:sync")
|
|
9899
|
+
.alias("pb:containers:sync")
|
|
9900
|
+
.description("Fetch Phantombuster containers for configured agents and store them in Neon.")
|
|
9901
|
+
.option("--agent-id <id>", "Phantombuster agent id to sync. Repeat to sync multiple agents.", collectStringOptionValue, [])
|
|
9902
|
+
.option("--limit <number>", "Maximum containers to fetch per Phantombuster page", "100")
|
|
9903
|
+
.option("--max-pages <number>", "Maximum Phantombuster pages to fetch per agent", "50")
|
|
9904
|
+
.option("--mode <mode>", "Phantombuster container mode: all or finalized", "all")
|
|
9905
|
+
.option("--before-ended-at <iso>", "Only fetch containers that ended before this ISO timestamp")
|
|
9906
|
+
.option("--metadata-only", "Store container metadata without fetching output and result objects", false)
|
|
9907
|
+
.option("--refresh-lead-pool", "After syncing results, rebuild the Neon lead_pool_new reporting table. This can take several minutes.", false)
|
|
9908
|
+
.option("--out <path>", "Optional local JSON output path")
|
|
9909
|
+
.action(async (options) => {
|
|
9910
|
+
const agentIds = z.array(z.string().min(1)).parse(options.agentId);
|
|
9911
|
+
const limit = z.coerce.number().int().min(1).max(500).parse(options.limit);
|
|
9912
|
+
const maxPages = z.coerce.number().int().min(1).max(500).parse(options.maxPages);
|
|
9913
|
+
const mode = z.enum(["all", "finalized"]).parse(options.mode);
|
|
9914
|
+
const beforeEndedAt = options.beforeEndedAt
|
|
9915
|
+
? z.string().datetime().parse(options.beforeEndedAt)
|
|
9916
|
+
: undefined;
|
|
9917
|
+
const session = await requireAuthSession();
|
|
9918
|
+
const result = await syncPhantombusterContainersViaApp(session, {
|
|
9919
|
+
agentIds: agentIds.length > 0 ? agentIds : undefined,
|
|
9920
|
+
limit,
|
|
9921
|
+
maxPages,
|
|
9922
|
+
mode,
|
|
9923
|
+
beforeEndedAt,
|
|
9924
|
+
includeResults: !options.metadataOnly,
|
|
9925
|
+
refreshLeadPool: Boolean(options.refreshLeadPool)
|
|
9926
|
+
});
|
|
9927
|
+
const payload = {
|
|
9928
|
+
...result,
|
|
9929
|
+
dryRun: false
|
|
9930
|
+
};
|
|
9931
|
+
if (options.out) {
|
|
9932
|
+
await writeJsonFile(options.out, payload);
|
|
9933
|
+
}
|
|
9934
|
+
printOutput(payload);
|
|
9935
|
+
});
|
|
7834
9936
|
program
|
|
7835
9937
|
.command("salesnav:export")
|
|
7836
9938
|
.alias("search:export")
|
|
@@ -7839,12 +9941,18 @@ program
|
|
|
7839
9941
|
.option("--max-results-per-search <number>", "Maximum results allowed for a sliced search", "2500")
|
|
7840
9942
|
.option("--number-of-profiles <number>", "Profiles to export per sliced query", "2500")
|
|
7841
9943
|
.option("--slice-preset <name>", "Slice preset label stored with the export run", "human-resources-default")
|
|
9944
|
+
.option("--client-id <number>", "Client id used to generate and store the legacy Neon lead list projection")
|
|
9945
|
+
.option("--agent-busy-wait-seconds <number>", "Seconds to wait before retrying when the export agent is already busy", "30")
|
|
9946
|
+
.option("--agent-busy-max-waits <number>", "How many busy-agent waits to tolerate before failing the export", "20")
|
|
7842
9947
|
.option("--out <path>", "Optional local JSON output path")
|
|
7843
9948
|
.option("--dry-run", "Only generate sliced query URLs without exporting them", false)
|
|
7844
9949
|
.action(async (options) => {
|
|
7845
9950
|
const queryUrls = z.array(z.string().url()).min(1).parse(options.queryUrl);
|
|
7846
9951
|
const maxResultsPerSearch = z.coerce.number().int().min(1).max(2500).parse(options.maxResultsPerSearch);
|
|
7847
9952
|
const numberOfProfiles = z.coerce.number().int().min(1).max(2500).parse(options.numberOfProfiles);
|
|
9953
|
+
const agentBusyWaitSeconds = z.coerce.number().int().min(1).max(300).parse(options.agentBusyWaitSeconds);
|
|
9954
|
+
const agentBusyMaxWaits = z.coerce.number().int().min(0).max(100).parse(options.agentBusyMaxWaits);
|
|
9955
|
+
const clientId = parseOptionalSalesNavigatorClientId(options.clientId);
|
|
7848
9956
|
const prepared = queryUrls.map((queryUrl) => buildSalesNavigatorPeopleSlice(queryUrl));
|
|
7849
9957
|
const effectiveDryRun = Boolean(options.dryRun || shouldBypassAuth());
|
|
7850
9958
|
if (effectiveDryRun) {
|
|
@@ -7866,10 +9974,10 @@ program
|
|
|
7866
9974
|
printOutput(payload);
|
|
7867
9975
|
return;
|
|
7868
9976
|
}
|
|
7869
|
-
|
|
9977
|
+
let session = await requireAuthSession();
|
|
7870
9978
|
const exported = [];
|
|
7871
9979
|
for (const item of prepared) {
|
|
7872
|
-
const result = await
|
|
9980
|
+
const result = await runSalesNavigatorExportWithAgentWait(session, {
|
|
7873
9981
|
sourceQueryUrl: item.sourceQueryUrl,
|
|
7874
9982
|
slicedQueryUrl: item.slicedQueryUrl,
|
|
7875
9983
|
appliedFilters: item.appliedFilters,
|
|
@@ -7878,12 +9986,17 @@ program
|
|
|
7878
9986
|
slicePreset: options.slicePreset,
|
|
7879
9987
|
rawPayload: {
|
|
7880
9988
|
workflow: "salesnav:export",
|
|
9989
|
+
clientId,
|
|
7881
9990
|
sourceQueryUrl: item.sourceQueryUrl,
|
|
7882
9991
|
slicedQueryUrl: item.slicedQueryUrl,
|
|
7883
9992
|
appliedFilters: item.appliedFilters
|
|
7884
9993
|
}
|
|
9994
|
+
}, {
|
|
9995
|
+
waitSeconds: agentBusyWaitSeconds,
|
|
9996
|
+
maxWaits: agentBusyMaxWaits
|
|
7885
9997
|
});
|
|
7886
9998
|
exported.push(result);
|
|
9999
|
+
session = await requireAuthSession();
|
|
7887
10000
|
}
|
|
7888
10001
|
const payload = {
|
|
7889
10002
|
status: "ok",
|