salesprompter-cli 0.1.27 → 0.1.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CODE_OF_CONDUCT.md +35 -0
- package/CONTRIBUTING.md +89 -0
- package/README.md +30 -1
- package/SECURITY.md +35 -0
- package/dist/cli.js +3309 -166
- package/dist/deel-outreach.js +16 -1
- package/dist/direct-path.js +16 -1
- package/dist/domainfinder.js +132 -0
- package/dist/linkedin-companies.js +3 -3
- package/dist/linkedin-products.js +2 -2
- package/dist/linkedin-session-contracts.js +3 -0
- package/dist/linkedin-session.js +8 -9
- package/dist/vendor/salesprompter-shared/extension-session-contracts.js +29 -0
- package/dist/vendor/salesprompter-shared/linkedin-session.js +22 -0
- package/dist/vendor/salesprompter-shared/phantombuster-contracts.js +16 -0
- package/dist/vendor/salesprompter-shared/session-vault-contracts.js +17 -0
- package/package.json +17 -4
package/dist/cli.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { spawn } from "node:child_process";
|
|
3
|
-
import { access, appendFile, mkdir, readFile, writeFile } from "node:fs/promises";
|
|
3
|
+
import { access, appendFile, mkdir, readFile, readdir, writeFile } from "node:fs/promises";
|
|
4
4
|
import { createRequire } from "node:module";
|
|
5
5
|
import os from "node:os";
|
|
6
6
|
import path from "node:path";
|
|
@@ -25,7 +25,7 @@ import { InstantlySyncProvider } from "./instantly.js";
|
|
|
25
25
|
import { backfillLinkedInCompanies } from "./linkedin-companies.js";
|
|
26
26
|
import { parseLinkedInCompanyPage } from "./linkedin-companies.js";
|
|
27
27
|
import { crawlLinkedInProductCategory } from "./linkedin-products.js";
|
|
28
|
-
import { claimValidatedSalesNavigatorSessionCookieForCli, createLinkedInSessionSupabaseClient } from "./linkedin-session.js";
|
|
28
|
+
import { claimValidatedSalesNavigatorSessionCookieForCli, createLinkedInSessionSupabaseClient, resolveConfiguredEnvValue } from "./linkedin-session.js";
|
|
29
29
|
import { buildLeadlistsFunnelQueries } from "./leadlists-funnel.js";
|
|
30
30
|
import { readJsonFile, splitCsv, writeJsonFile, writeTextFile } from "./io.js";
|
|
31
31
|
import { buildSalesNavigatorCrawlPreview, createSalesNavigatorCrawlSeed, DEFAULT_SALES_NAVIGATOR_CRAWL_DIMENSIONS, buildSalesNavigatorPeopleSlice, deriveSalesNavigatorTitleQuerySeeds, expandSalesNavigatorCrawlAttempt, SalesNavigatorSliceTooBroadError } from "./sales-navigator.js";
|
|
@@ -33,7 +33,9 @@ import { buildSalesNavigatorHistoricalBackfillPlan, ensureSalesNavigatorPeopleCo
|
|
|
33
33
|
const require = createRequire(import.meta.url);
|
|
34
34
|
const { version: packageVersion } = require("../package.json");
|
|
35
35
|
const program = new Command();
|
|
36
|
-
const
|
|
36
|
+
const companyProvider = new HeuristicCompanyProvider();
|
|
37
|
+
const peopleSearchProvider = new HeuristicPeopleSearchProvider();
|
|
38
|
+
const leadProvider = new AccountLeadProvider(companyProvider, peopleSearchProvider);
|
|
37
39
|
const enrichmentProvider = new HeuristicEnrichmentProvider();
|
|
38
40
|
const scoringProvider = new HeuristicScoringProvider();
|
|
39
41
|
const syncProvider = new RoutedSyncProvider(new DryRunSyncProvider(), new InstantlySyncProvider());
|
|
@@ -42,6 +44,14 @@ const runtimeOutputOptions = {
|
|
|
42
44
|
quiet: false
|
|
43
45
|
};
|
|
44
46
|
const nullableOptionalString = z.string().min(1).nullish().transform((value) => value ?? undefined);
|
|
47
|
+
const LinkedInCompanyBackfillClientIdStateSchema = z
|
|
48
|
+
.object({
|
|
49
|
+
clientId: z.number().int().positive(),
|
|
50
|
+
userId: z.string().optional(),
|
|
51
|
+
orgId: z.string().optional(),
|
|
52
|
+
updatedAt: z.string().datetime()
|
|
53
|
+
})
|
|
54
|
+
.passthrough();
|
|
45
55
|
const WorkspaceLeadSchema = LeadSchema.extend({
|
|
46
56
|
companySize: nullableOptionalString.optional(),
|
|
47
57
|
country: nullableOptionalString.optional()
|
|
@@ -66,6 +76,9 @@ const LinkedInCompanyBackfillLaunchResponseSchema = z.object({
|
|
|
66
76
|
webhookUrl: z.string().url(),
|
|
67
77
|
inputUrl: z.string().url().nullable(),
|
|
68
78
|
containerId: z.string().min(1).nullable(),
|
|
79
|
+
selectedSessionCookieSha256: z.string().min(1).nullable().optional(),
|
|
80
|
+
selectedSessionUserEmail: z.string().min(1).nullable().optional(),
|
|
81
|
+
selectedSessionUserHandle: z.string().min(1).nullable().optional(),
|
|
69
82
|
candidates: z.array(z.object({
|
|
70
83
|
companyId: z.number().int().positive(),
|
|
71
84
|
companyUrl: z.string().url(),
|
|
@@ -78,7 +91,26 @@ const LinkedInCompanyBackfillStatusResponseSchema = z.object({
|
|
|
78
91
|
containerId: z.string().min(1),
|
|
79
92
|
running: z.boolean(),
|
|
80
93
|
processed: z.boolean(),
|
|
81
|
-
remaining: z.number().int().nonnegative()
|
|
94
|
+
remaining: z.number().int().nonnegative(),
|
|
95
|
+
failed: z.boolean().default(false),
|
|
96
|
+
failureCode: z.string().nullable().optional(),
|
|
97
|
+
failureMessage: z.string().nullable().optional()
|
|
98
|
+
});
|
|
99
|
+
const PhantombusterContainersSyncResponseSchema = z.object({
|
|
100
|
+
status: z.literal("ok"),
|
|
101
|
+
agentIds: z.array(z.string().min(1)),
|
|
102
|
+
agents: z.array(z.object({
|
|
103
|
+
agentId: z.string().min(1),
|
|
104
|
+
fetched: z.number().int().nonnegative(),
|
|
105
|
+
upserted: z.number().int().nonnegative(),
|
|
106
|
+
resultsSynced: z.number().int().nonnegative()
|
|
107
|
+
})),
|
|
108
|
+
fetched: z.number().int().nonnegative(),
|
|
109
|
+
upserted: z.number().int().nonnegative(),
|
|
110
|
+
resultsSynced: z.number().int().nonnegative(),
|
|
111
|
+
outputsStored: z.number().int().nonnegative(),
|
|
112
|
+
resultObjectsStored: z.number().int().nonnegative(),
|
|
113
|
+
resultRowsStored: z.number().int().nonnegative()
|
|
82
114
|
});
|
|
83
115
|
const CliEmailEnrichmentCompaniesResponseSchema = z.object({
|
|
84
116
|
clientId: z.number().int().positive(),
|
|
@@ -280,6 +312,7 @@ const cliPacks = [
|
|
|
280
312
|
];
|
|
281
313
|
const helpAliasByCommandName = new Map([
|
|
282
314
|
["contacts:find-linkedin-urls", "contacts:resolve-profiles"],
|
|
315
|
+
["companies:find-linkedin-urls", "companies:resolve-linkedin-urls"],
|
|
283
316
|
["contacts:process-emails", "contacts:resolve-emails"],
|
|
284
317
|
["linkedin-companies:backfill", "companies:enrich"],
|
|
285
318
|
["linkedin-products:scrape", "market:scrape"],
|
|
@@ -300,6 +333,7 @@ const helpVisibleCommandNames = new Set([
|
|
|
300
333
|
"auth:whoami",
|
|
301
334
|
"llm:ready",
|
|
302
335
|
"contacts:find-linkedin-urls",
|
|
336
|
+
"companies:find-linkedin-urls",
|
|
303
337
|
"contacts:process-emails",
|
|
304
338
|
"auth:logout",
|
|
305
339
|
"account:resolve",
|
|
@@ -339,6 +373,64 @@ function formatHelpArgumentTerm(argument) {
|
|
|
339
373
|
}
|
|
340
374
|
return argument.required ? `<${term}>` : `[${term}]`;
|
|
341
375
|
}
|
|
376
|
+
function parsePositiveClientIdValue(rawValue) {
|
|
377
|
+
if (rawValue == null) {
|
|
378
|
+
throw new Error("clientId is required and must be a positive integer.");
|
|
379
|
+
}
|
|
380
|
+
const asString = String(rawValue).trim();
|
|
381
|
+
if (!asString) {
|
|
382
|
+
throw new Error("clientId is required and must be a positive integer.");
|
|
383
|
+
}
|
|
384
|
+
return z.coerce.number().int().positive().parse(asString);
|
|
385
|
+
}
|
|
386
|
+
function getLinkedInCompanyBackfillClientStatePath() {
|
|
387
|
+
return path.join(getSalesprompterConfigDir(), "linkedin-companies-backfill.json");
|
|
388
|
+
}
|
|
389
|
+
async function readLinkedInCompanyBackfillClientIdFromCache(session) {
|
|
390
|
+
const path = getLinkedInCompanyBackfillClientStatePath();
|
|
391
|
+
try {
|
|
392
|
+
const content = await readFile(path, "utf8");
|
|
393
|
+
const parsed = JSON.parse(content);
|
|
394
|
+
const state = LinkedInCompanyBackfillClientIdStateSchema.parse(parsed);
|
|
395
|
+
if (session?.user?.id != null && state.userId != null && state.userId !== session.user.id) {
|
|
396
|
+
return undefined;
|
|
397
|
+
}
|
|
398
|
+
if (session?.user?.orgId != null &&
|
|
399
|
+
state.orgId != null &&
|
|
400
|
+
String(state.orgId) !== String(session.user.orgId)) {
|
|
401
|
+
return undefined;
|
|
402
|
+
}
|
|
403
|
+
return state.clientId;
|
|
404
|
+
}
|
|
405
|
+
catch {
|
|
406
|
+
return undefined;
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
async function writeLinkedInCompanyBackfillClientIdToCache(clientId, session) {
|
|
410
|
+
const filePath = getLinkedInCompanyBackfillClientStatePath();
|
|
411
|
+
const state = {
|
|
412
|
+
clientId,
|
|
413
|
+
userId: session?.user?.id,
|
|
414
|
+
orgId: session?.user?.orgId,
|
|
415
|
+
updatedAt: new Date().toISOString()
|
|
416
|
+
};
|
|
417
|
+
await mkdir(path.dirname(filePath), { recursive: true });
|
|
418
|
+
await writeFile(filePath, `${JSON.stringify(state, null, 2)}\n`, "utf8");
|
|
419
|
+
}
|
|
420
|
+
async function resolveLinkedInCompanyBackfillClientId(params) {
|
|
421
|
+
if (params.clientIdOption != null && String(params.clientIdOption).trim().length > 0) {
|
|
422
|
+
return parsePositiveClientIdValue(params.clientIdOption);
|
|
423
|
+
}
|
|
424
|
+
const envClientId = process.env.PIPEDREAM_CLIENT_ID?.trim() || process.env.SALESPROMPTER_CLIENT_ID?.trim();
|
|
425
|
+
if (envClientId) {
|
|
426
|
+
return parsePositiveClientIdValue(envClientId);
|
|
427
|
+
}
|
|
428
|
+
const cachedClientId = await readLinkedInCompanyBackfillClientIdFromCache(params.session);
|
|
429
|
+
if (cachedClientId != null) {
|
|
430
|
+
return cachedClientId;
|
|
431
|
+
}
|
|
432
|
+
throw new Error("Missing LinkedIn company backfill clientId. Pass --client-id, set PIPEDREAM_CLIENT_ID or SALESPROMPTER_CLIENT_ID, or run once with --client-id so the CLI can reuse it.");
|
|
433
|
+
}
|
|
342
434
|
function applyGlobalOutputOptions(actionCommand) {
|
|
343
435
|
const globalOptions = actionCommand.optsWithGlobals();
|
|
344
436
|
runtimeOutputOptions.json = Boolean(globalOptions.json);
|
|
@@ -847,6 +939,13 @@ function splitLookupFullName(fullName) {
|
|
|
847
939
|
function buildSyntheticLookupEmail(contactId) {
|
|
848
940
|
return `linkedin-lookup+${contactId}@salesprompter.invalid`;
|
|
849
941
|
}
|
|
942
|
+
function normalizeLinkedInLookupField(value) {
|
|
943
|
+
if (value == null) {
|
|
944
|
+
return undefined;
|
|
945
|
+
}
|
|
946
|
+
const normalized = normalizeLookupWhitespace(String(value));
|
|
947
|
+
return normalized || undefined;
|
|
948
|
+
}
|
|
850
949
|
function looksLikeLookupCompanyRow(fullName, companyName) {
|
|
851
950
|
const fullNameComparable = normalizeLooseMatchText(fullName);
|
|
852
951
|
const companyComparable = normalizeLooseMatchText(companyName);
|
|
@@ -866,19 +965,32 @@ function parseLinkedInUrlLookupInput(content) {
|
|
|
866
965
|
const parsed = z
|
|
867
966
|
.array(z.object({
|
|
868
967
|
clientId: z.union([z.string(), z.number()]).nullish(),
|
|
968
|
+
contactId: z.union([z.string(), z.number()]).nullish(),
|
|
969
|
+
companyId: z.union([z.string(), z.number()]).nullish(),
|
|
869
970
|
fullName: z.string().nullish(),
|
|
870
971
|
companyName: z.string().nullish(),
|
|
871
972
|
email: z.string().nullish(),
|
|
872
|
-
|
|
973
|
+
contact_email: z.string().nullish(),
|
|
974
|
+
jobTitle: z.string().nullish(),
|
|
975
|
+
jobtitle: z.string().nullish(),
|
|
976
|
+
title: z.string().nullish(),
|
|
977
|
+
linkedin_company_url: z.string().nullish(),
|
|
978
|
+
linkedinCompanyUrl: z.string().nullish(),
|
|
979
|
+
deep_dive_recommended_role: z.string().nullish(),
|
|
980
|
+
deepDiveRecommendedRole: z.string().nullish()
|
|
873
981
|
}))
|
|
874
982
|
.parse(JSON.parse(trimmed));
|
|
875
983
|
return parsed
|
|
876
984
|
.map((row) => ({
|
|
877
985
|
clientId: row.clientId == null ? null : String(row.clientId).trim() || null,
|
|
986
|
+
contactId: row.contactId == null ? undefined : String(row.contactId).trim() || undefined,
|
|
987
|
+
companyId: row.companyId == null ? undefined : String(row.companyId).trim() || undefined,
|
|
878
988
|
fullName: row.fullName?.trim() ?? "",
|
|
879
989
|
companyName: row.companyName?.trim() ?? "",
|
|
880
|
-
email: row.email?.trim() || undefined,
|
|
881
|
-
jobTitle: row.jobTitle?.trim() || undefined
|
|
990
|
+
email: row.email?.trim() || row.contact_email?.trim() || undefined,
|
|
991
|
+
jobTitle: row.jobTitle?.trim() || row.jobtitle?.trim() || row.title?.trim() || undefined,
|
|
992
|
+
linkedinCompanyUrl: row.linkedin_company_url?.trim() || row.linkedinCompanyUrl?.trim() || undefined,
|
|
993
|
+
deepDiveRecommendedRole: row.deep_dive_recommended_role?.trim() || row.deepDiveRecommendedRole?.trim() || undefined
|
|
882
994
|
}))
|
|
883
995
|
.filter((row) => row.fullName.length > 0 || row.companyName.length > 0);
|
|
884
996
|
}
|
|
@@ -906,23 +1018,90 @@ function parseLinkedInUrlLookupInput(content) {
|
|
|
906
1018
|
? headerValues.findIndex((value) => ["companyname", "company_name"].includes(value))
|
|
907
1019
|
: 2;
|
|
908
1020
|
const emailIndex = hasHeader ? headerValues.findIndex((value) => value === "email") : -1;
|
|
1021
|
+
const contactEmailIndex = hasHeader ? headerValues.findIndex((value) => value === "contact_email") : -1;
|
|
909
1022
|
const jobTitleIndex = hasHeader
|
|
910
1023
|
? headerValues.findIndex((value) => ["jobtitle", "job_title", "title"].includes(value))
|
|
911
1024
|
: -1;
|
|
1025
|
+
const contactIdIndex = hasHeader
|
|
1026
|
+
? headerValues.findIndex((value) => ["contactid", "contact_id", "hubspot_contact_id"].includes(value))
|
|
1027
|
+
: -1;
|
|
1028
|
+
const companyIdIndex = hasHeader
|
|
1029
|
+
? headerValues.findIndex((value) => ["companyid", "company_id", "hubspot_company_id"].includes(value))
|
|
1030
|
+
: -1;
|
|
1031
|
+
const linkedinCompanyUrlIndex = hasHeader
|
|
1032
|
+
? headerValues.findIndex((value) => ["linkedin_company_url", "linkedincompanyurl"].includes(value))
|
|
1033
|
+
: -1;
|
|
1034
|
+
const deepDiveRecommendedRoleIndex = hasHeader
|
|
1035
|
+
? headerValues.findIndex((value) => ["deep_dive_recommended_role", "deepdiverecommendedrole"].includes(value))
|
|
1036
|
+
: -1;
|
|
912
1037
|
return dataLines
|
|
913
1038
|
.map((line) => splitLooseDelimitedLine(line, delimiter).map((value) => value.trim()))
|
|
914
1039
|
.map((columns) => ({
|
|
915
1040
|
clientId: clientIdIndex >= 0 ? columns[clientIdIndex] || null : null,
|
|
1041
|
+
contactId: contactIdIndex >= 0 ? columns[contactIdIndex] || undefined : undefined,
|
|
1042
|
+
companyId: companyIdIndex >= 0 ? columns[companyIdIndex] || undefined : undefined,
|
|
916
1043
|
fullName: fullNameIndex >= 0 ? columns[fullNameIndex] || "" : "",
|
|
917
1044
|
companyName: companyNameIndex >= 0 ? columns[companyNameIndex] || "" : "",
|
|
918
|
-
email: emailIndex >= 0 ? columns[emailIndex] || undefined : undefined
|
|
919
|
-
|
|
1045
|
+
email: (emailIndex >= 0 ? columns[emailIndex] || undefined : undefined) ??
|
|
1046
|
+
(contactEmailIndex >= 0 ? columns[contactEmailIndex] || undefined : undefined),
|
|
1047
|
+
jobTitle: jobTitleIndex >= 0 ? columns[jobTitleIndex] || undefined : undefined,
|
|
1048
|
+
linkedinCompanyUrl: linkedinCompanyUrlIndex >= 0 ? columns[linkedinCompanyUrlIndex] || undefined : undefined,
|
|
1049
|
+
deepDiveRecommendedRole: deepDiveRecommendedRoleIndex >= 0 ? columns[deepDiveRecommendedRoleIndex] || undefined : undefined
|
|
920
1050
|
}))
|
|
921
1051
|
.filter((row) => row.fullName.length > 0 || row.companyName.length > 0);
|
|
922
1052
|
}
|
|
1053
|
+
function parseLinkedInCompanyLookupInput(content) {
|
|
1054
|
+
const trimmed = content.trim();
|
|
1055
|
+
if (!trimmed) {
|
|
1056
|
+
return [];
|
|
1057
|
+
}
|
|
1058
|
+
if (trimmed.startsWith("[")) {
|
|
1059
|
+
const parsed = z
|
|
1060
|
+
.array(z.object({
|
|
1061
|
+
clientId: z.union([z.string(), z.number()]).nullish(),
|
|
1062
|
+
companyName: z.string().nullish(),
|
|
1063
|
+
name: z.string().nullish()
|
|
1064
|
+
}))
|
|
1065
|
+
.parse(JSON.parse(trimmed));
|
|
1066
|
+
return parsed
|
|
1067
|
+
.map((row) => ({
|
|
1068
|
+
clientId: row.clientId == null ? null : String(row.clientId).trim() || null,
|
|
1069
|
+
companyName: normalizeLookupWhitespace(row.companyName?.trim() || row.name?.trim() || "")
|
|
1070
|
+
}))
|
|
1071
|
+
.filter((row) => row.companyName.length > 0);
|
|
1072
|
+
}
|
|
1073
|
+
const lines = trimmed
|
|
1074
|
+
.split(/\r?\n/)
|
|
1075
|
+
.map((line) => line.trim())
|
|
1076
|
+
.filter((line) => line.length > 0);
|
|
1077
|
+
if (lines.length === 0) {
|
|
1078
|
+
return [];
|
|
1079
|
+
}
|
|
1080
|
+
const delimiter = detectLooseDelimiter(lines[0] ?? "");
|
|
1081
|
+
const headerValues = splitLooseDelimitedLine(lines[0] ?? "", delimiter).map((value) => value.trim().toLowerCase());
|
|
1082
|
+
const hasHeader = headerValues.includes("companyname") ||
|
|
1083
|
+
headerValues.includes("company_name") ||
|
|
1084
|
+
headerValues.includes("name");
|
|
1085
|
+
if (hasHeader) {
|
|
1086
|
+
const companyNameIndex = headerValues.findIndex((value) => ["companyname", "company_name", "name"].includes(value));
|
|
1087
|
+
const clientIdIndex = headerValues.findIndex((value) => ["clientid", "client_id"].includes(value));
|
|
1088
|
+
return lines
|
|
1089
|
+
.slice(1)
|
|
1090
|
+
.map((line) => splitLooseDelimitedLine(line, delimiter).map((value) => value.trim()))
|
|
1091
|
+
.map((columns) => ({
|
|
1092
|
+
clientId: clientIdIndex >= 0 ? columns[clientIdIndex] || null : null,
|
|
1093
|
+
companyName: normalizeLookupWhitespace(companyNameIndex >= 0 ? columns[companyNameIndex] || "" : "")
|
|
1094
|
+
}))
|
|
1095
|
+
.filter((row) => row.companyName.length > 0);
|
|
1096
|
+
}
|
|
1097
|
+
return lines.map((line) => ({
|
|
1098
|
+
clientId: null,
|
|
1099
|
+
companyName: normalizeLookupWhitespace(line)
|
|
1100
|
+
}));
|
|
1101
|
+
}
|
|
923
1102
|
function toLinkedInUrlLookupContacts(rows, cleanedCompanyMap = new Map()) {
|
|
924
1103
|
return rows.flatMap((row, index) => {
|
|
925
|
-
const contactId = String(index + 1);
|
|
1104
|
+
const contactId = normalizeLinkedInLookupField(row.contactId) ?? String(index + 1);
|
|
926
1105
|
const syntheticEmail = row.email?.trim() || buildSyntheticLookupEmail(contactId);
|
|
927
1106
|
const rawCompanyName = normalizeLookupWhitespace(row.companyName);
|
|
928
1107
|
const cleanedCompanyName = normalizeLookupCompanyForSearch(cleanedCompanyMap.get(normalizeLookupCompanyForCleaning(rawCompanyName)) ?? rawCompanyName);
|
|
@@ -936,7 +1115,10 @@ function toLinkedInUrlLookupContacts(rows, cleanedCompanyMap = new Map()) {
|
|
|
936
1115
|
companyName: cleanedCompanyName,
|
|
937
1116
|
companyNameOriginal: rawCompanyName || undefined,
|
|
938
1117
|
email: syntheticEmail,
|
|
939
|
-
jobTitle: row.jobTitle
|
|
1118
|
+
jobTitle: row.jobTitle,
|
|
1119
|
+
companyId: normalizeLinkedInLookupField(row.companyId),
|
|
1120
|
+
linkedinCompanyUrl: row.linkedinCompanyUrl?.trim() || undefined,
|
|
1121
|
+
deepDiveRecommendedRole: row.deepDiveRecommendedRole?.trim() || undefined
|
|
940
1122
|
}
|
|
941
1123
|
];
|
|
942
1124
|
}
|
|
@@ -951,7 +1133,10 @@ function toLinkedInUrlLookupContacts(rows, cleanedCompanyMap = new Map()) {
|
|
|
951
1133
|
companyName: cleanedCompanyName,
|
|
952
1134
|
companyNameOriginal: rawCompanyName || undefined,
|
|
953
1135
|
email: syntheticEmail,
|
|
954
|
-
jobTitle: row.jobTitle
|
|
1136
|
+
jobTitle: row.jobTitle,
|
|
1137
|
+
companyId: normalizeLinkedInLookupField(row.companyId),
|
|
1138
|
+
linkedinCompanyUrl: row.linkedinCompanyUrl?.trim() || undefined,
|
|
1139
|
+
deepDiveRecommendedRole: row.deepDiveRecommendedRole?.trim() || undefined
|
|
955
1140
|
}
|
|
956
1141
|
];
|
|
957
1142
|
const rawDiffers = rawSplit.firstName !== cleanedSplit.firstName ||
|
|
@@ -965,6 +1150,9 @@ function toLinkedInUrlLookupContacts(rows, cleanedCompanyMap = new Map()) {
|
|
|
965
1150
|
companyNameOriginal: rawCompanyName || undefined,
|
|
966
1151
|
email: syntheticEmail,
|
|
967
1152
|
jobTitle: row.jobTitle,
|
|
1153
|
+
companyId: normalizeLinkedInLookupField(row.companyId),
|
|
1154
|
+
linkedinCompanyUrl: row.linkedinCompanyUrl?.trim() || undefined,
|
|
1155
|
+
deepDiveRecommendedRole: row.deepDiveRecommendedRole?.trim() || undefined,
|
|
968
1156
|
isVariation: true
|
|
969
1157
|
});
|
|
970
1158
|
}
|
|
@@ -972,25 +1160,147 @@ function toLinkedInUrlLookupContacts(rows, cleanedCompanyMap = new Map()) {
|
|
|
972
1160
|
});
|
|
973
1161
|
}
|
|
974
1162
|
function readPipedreamLinkedInEnrichmentConfig() {
|
|
975
|
-
const endpointUrl = process.env
|
|
976
|
-
(process.env
|
|
977
|
-
? `https://${process.env
|
|
1163
|
+
const endpointUrl = resolveConfiguredEnvValue(process.env, "SALESPROMPTER_LINKEDIN_ENRICHMENT_ENDPOINT_URL") ||
|
|
1164
|
+
(resolveConfiguredEnvValue(process.env, "PIPEDREAM_ENDPOINT_ID")
|
|
1165
|
+
? `https://${resolveConfiguredEnvValue(process.env, "PIPEDREAM_ENDPOINT_ID")?.trim()}.m.pipedream.net`
|
|
978
1166
|
: "");
|
|
979
1167
|
if (!endpointUrl) {
|
|
980
1168
|
throw new Error("Missing LinkedIn enrichment endpoint. Set SALESPROMPTER_LINKEDIN_ENRICHMENT_ENDPOINT_URL or PIPEDREAM_ENDPOINT_ID.");
|
|
981
1169
|
}
|
|
982
1170
|
return {
|
|
983
1171
|
endpointUrl,
|
|
984
|
-
secret: process.env
|
|
985
|
-
clientId: process.env
|
|
986
|
-
projectId: process.env
|
|
987
|
-
projectEnvironment: process.env
|
|
1172
|
+
secret: resolveConfiguredEnvValue(process.env, "PIPEDREAM_SECRET_KEY") || "",
|
|
1173
|
+
clientId: resolveConfiguredEnvValue(process.env, "PIPEDREAM_CLIENT_ID") || "",
|
|
1174
|
+
projectId: resolveConfiguredEnvValue(process.env, "PIPEDREAM_PROJECT_ID") || "",
|
|
1175
|
+
projectEnvironment: resolveConfiguredEnvValue(process.env, "PIPEDREAM_PROJECT_ENVIRONMENT") || ""
|
|
988
1176
|
};
|
|
989
1177
|
}
|
|
1178
|
+
function isSyntheticLinkedInLookupEmail(value) {
|
|
1179
|
+
const normalized = normalizeLookupWhitespace(value).toLowerCase();
|
|
1180
|
+
return normalized.endsWith("@salesprompter.invalid");
|
|
1181
|
+
}
|
|
990
1182
|
function deriveCsrfTokenFromCookie(cookie) {
|
|
991
1183
|
const match = cookie.match(/JSESSIONID="?([^";]+)"?/i);
|
|
992
1184
|
return match?.[1]?.trim() || "";
|
|
993
1185
|
}
|
|
1186
|
+
function normalizeLinkedInDirectLookupCookieHeader(cookie) {
|
|
1187
|
+
const trimmed = normalizeLookupWhitespace(cookie);
|
|
1188
|
+
if (!trimmed) {
|
|
1189
|
+
return "";
|
|
1190
|
+
}
|
|
1191
|
+
if (trimmed.includes("=") || trimmed.includes(";")) {
|
|
1192
|
+
return trimmed;
|
|
1193
|
+
}
|
|
1194
|
+
return `li_at=${trimmed}`;
|
|
1195
|
+
}
|
|
1196
|
+
function parseLocalLinkedInExtensionTokenLog(content) {
|
|
1197
|
+
const matches = [
|
|
1198
|
+
...content.matchAll(/\{"csrfToken":"([^"]+)","extractedFrom":"sales-api\/salesApiLeadSearch"[\s\S]*?"linkedInIdentity":"([^"]+)"[\s\S]*?"sessionCookie":"([\s\S]*?)","syncStatus":"(success|captured)"[\s\S]*?"userAgent":"([^"]+)"\}/g)
|
|
1199
|
+
];
|
|
1200
|
+
const last = matches.at(-1);
|
|
1201
|
+
if (!last) {
|
|
1202
|
+
return null;
|
|
1203
|
+
}
|
|
1204
|
+
const csrfToken = normalizeLookupWhitespace(last[1]);
|
|
1205
|
+
const linkedInIdentity = normalizeLookupWhitespace(last[2]);
|
|
1206
|
+
const sessionCookie = normalizeLookupWhitespace(last[3]?.replace(/\\"/g, "\"").replace(/\\\\/g, "\\"));
|
|
1207
|
+
const userAgent = normalizeLookupWhitespace(last[5]);
|
|
1208
|
+
if (!csrfToken || !linkedInIdentity || !sessionCookie || !userAgent) {
|
|
1209
|
+
return null;
|
|
1210
|
+
}
|
|
1211
|
+
return {
|
|
1212
|
+
csrfToken,
|
|
1213
|
+
linkedInIdentity,
|
|
1214
|
+
sessionCookie,
|
|
1215
|
+
userAgent
|
|
1216
|
+
};
|
|
1217
|
+
}
|
|
1218
|
+
async function readLocalLinkedInExtensionTokenLog(filePath) {
|
|
1219
|
+
try {
|
|
1220
|
+
const content = await readFile(filePath, "latin1");
|
|
1221
|
+
return parseLocalLinkedInExtensionTokenLog(content);
|
|
1222
|
+
}
|
|
1223
|
+
catch {
|
|
1224
|
+
return null;
|
|
1225
|
+
}
|
|
1226
|
+
}
|
|
1227
|
+
async function listChromeExtensionTokenLogCandidates() {
|
|
1228
|
+
const overrideFile = normalizeLookupWhitespace(process.env.SALESPROMPTER_LINKEDIN_EXTENSION_TOKENS_LOG_PATH);
|
|
1229
|
+
if (overrideFile) {
|
|
1230
|
+
return [overrideFile];
|
|
1231
|
+
}
|
|
1232
|
+
const overrideDir = normalizeLookupWhitespace(process.env.SALESPROMPTER_LINKEDIN_EXTENSION_TOKENS_DIR);
|
|
1233
|
+
if (overrideDir) {
|
|
1234
|
+
try {
|
|
1235
|
+
const files = await readdir(overrideDir);
|
|
1236
|
+
return files
|
|
1237
|
+
.filter((file) => file.endsWith(".log") || file.endsWith(".ldb"))
|
|
1238
|
+
.map((file) => path.join(overrideDir, file))
|
|
1239
|
+
.sort()
|
|
1240
|
+
.reverse();
|
|
1241
|
+
}
|
|
1242
|
+
catch {
|
|
1243
|
+
return [];
|
|
1244
|
+
}
|
|
1245
|
+
}
|
|
1246
|
+
const chromeRootCandidates = [
|
|
1247
|
+
path.join(os.homedir(), "Library", "Application Support", "Google", "Chrome"),
|
|
1248
|
+
path.join(os.homedir(), "Library", "Application Support", "Chromium")
|
|
1249
|
+
];
|
|
1250
|
+
const paths = [];
|
|
1251
|
+
for (const chromeRoot of chromeRootCandidates) {
|
|
1252
|
+
let profileDirs = [];
|
|
1253
|
+
try {
|
|
1254
|
+
profileDirs = await readdir(chromeRoot);
|
|
1255
|
+
}
|
|
1256
|
+
catch {
|
|
1257
|
+
continue;
|
|
1258
|
+
}
|
|
1259
|
+
for (const profileDir of profileDirs) {
|
|
1260
|
+
const extensionSettingsRoot = path.join(chromeRoot, profileDir, "Local Extension Settings");
|
|
1261
|
+
let extensionIds = [];
|
|
1262
|
+
try {
|
|
1263
|
+
extensionIds = await readdir(extensionSettingsRoot);
|
|
1264
|
+
}
|
|
1265
|
+
catch {
|
|
1266
|
+
continue;
|
|
1267
|
+
}
|
|
1268
|
+
for (const extensionId of extensionIds) {
|
|
1269
|
+
const extensionDir = path.join(extensionSettingsRoot, extensionId);
|
|
1270
|
+
let files = [];
|
|
1271
|
+
try {
|
|
1272
|
+
files = await readdir(extensionDir);
|
|
1273
|
+
}
|
|
1274
|
+
catch {
|
|
1275
|
+
continue;
|
|
1276
|
+
}
|
|
1277
|
+
for (const file of files) {
|
|
1278
|
+
if (!file.endsWith(".log")) {
|
|
1279
|
+
continue;
|
|
1280
|
+
}
|
|
1281
|
+
paths.push(path.join(extensionDir, file));
|
|
1282
|
+
}
|
|
1283
|
+
}
|
|
1284
|
+
}
|
|
1285
|
+
}
|
|
1286
|
+
return paths.sort().reverse();
|
|
1287
|
+
}
|
|
1288
|
+
async function readLocalLinkedInExtensionDirectLookupConfig() {
|
|
1289
|
+
const candidates = await listChromeExtensionTokenLogCandidates();
|
|
1290
|
+
for (const candidate of candidates) {
|
|
1291
|
+
const snapshot = await readLocalLinkedInExtensionTokenLog(candidate);
|
|
1292
|
+
if (!snapshot) {
|
|
1293
|
+
continue;
|
|
1294
|
+
}
|
|
1295
|
+
return {
|
|
1296
|
+
csrfToken: snapshot.csrfToken,
|
|
1297
|
+
identity: snapshot.linkedInIdentity,
|
|
1298
|
+
cookie: normalizeLinkedInDirectLookupCookieHeader(snapshot.sessionCookie),
|
|
1299
|
+
userAgent: snapshot.userAgent
|
|
1300
|
+
};
|
|
1301
|
+
}
|
|
1302
|
+
return null;
|
|
1303
|
+
}
|
|
994
1304
|
function readLinkedInDirectLookupEnvConfig() {
|
|
995
1305
|
const cookie = process.env.SALESPROMPTER_LINKEDIN_SALES_NAV_COOKIE?.trim() ||
|
|
996
1306
|
process.env.LINKEDIN_SALES_NAV_COOKIE?.trim() ||
|
|
@@ -1007,7 +1317,7 @@ function readLinkedInDirectLookupEnvConfig() {
|
|
|
1007
1317
|
return {
|
|
1008
1318
|
csrfToken,
|
|
1009
1319
|
identity,
|
|
1010
|
-
cookie,
|
|
1320
|
+
cookie: normalizeLinkedInDirectLookupCookieHeader(cookie),
|
|
1011
1321
|
userAgent: process.env.SALESPROMPTER_LINKEDIN_USER_AGENT?.trim() ||
|
|
1012
1322
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"
|
|
1013
1323
|
};
|
|
@@ -1057,7 +1367,7 @@ async function readStoredLinkedInDirectLookupConfig() {
|
|
|
1057
1367
|
return {
|
|
1058
1368
|
csrfToken,
|
|
1059
1369
|
identity,
|
|
1060
|
-
cookie: claimed.sessionCookie,
|
|
1370
|
+
cookie: normalizeLinkedInDirectLookupCookieHeader(claimed.sessionCookie),
|
|
1061
1371
|
userAgent
|
|
1062
1372
|
};
|
|
1063
1373
|
}
|
|
@@ -1071,6 +1381,11 @@ async function readLinkedInDirectLookupConfig() {
|
|
|
1071
1381
|
cachedLinkedInDirectLookupConfig = envConfig;
|
|
1072
1382
|
return envConfig;
|
|
1073
1383
|
}
|
|
1384
|
+
const localExtensionConfig = await readLocalLinkedInExtensionDirectLookupConfig();
|
|
1385
|
+
if (localExtensionConfig) {
|
|
1386
|
+
cachedLinkedInDirectLookupConfig = localExtensionConfig;
|
|
1387
|
+
return localExtensionConfig;
|
|
1388
|
+
}
|
|
1074
1389
|
const storedConfig = await readStoredLinkedInDirectLookupConfig();
|
|
1075
1390
|
if (storedConfig) {
|
|
1076
1391
|
cachedLinkedInDirectLookupConfig = storedConfig;
|
|
@@ -1087,51 +1402,310 @@ function buildLinkedInSalesApiUrl(params) {
|
|
|
1087
1402
|
const encodedFirstName = encodeURIComponent(params.firstName);
|
|
1088
1403
|
const encodedLastName = encodeURIComponent(params.lastName);
|
|
1089
1404
|
const encodedCompanyName = encodeURIComponent(params.companyName);
|
|
1405
|
+
const encodedKeywords = encodeURIComponent(params.keywordsText?.trim() || params.companyName);
|
|
1090
1406
|
const filters = params.searchMode === "current_company"
|
|
1091
1407
|
? `(type:FIRST_NAME,values:List((text:${encodedFirstName},selectionType:INCLUDED))),(type:LAST_NAME,values:List((text:${encodedLastName},selectionType:INCLUDED))),(type:CURRENT_COMPANY,values:List((text:${encodedCompanyName},selectionType:INCLUDED)))`
|
|
1092
1408
|
: `(type:FIRST_NAME,values:List((text:${encodedFirstName},selectionType:INCLUDED))),(type:LAST_NAME,values:List((text:${encodedLastName},selectionType:INCLUDED)))`;
|
|
1093
|
-
const keywordsSegment = params.searchMode === "
|
|
1409
|
+
const keywordsSegment = params.searchMode === "current_company" ? "" : `,keywords:${encodedKeywords}`;
|
|
1094
1410
|
return `${baseUrl.replace(/\/+$/, "")}/sales-api/salesApiLeadSearch?q=searchQuery&query=(recentSearchParam:(id:${Date.now()},doLogHistory:true),filters:List(${filters})${keywordsSegment})&start=0&count=25&trackingParam=(sessionId:${generateLinkedInSessionId()})&decorationId=com.linkedin.sales.deco.desktop.searchv2.LeadSearchResult-14`;
|
|
1095
1411
|
}
|
|
1412
|
+
function extractLookupTitleKeywords(value) {
|
|
1413
|
+
const shortAllowlist = new Set(["hr", "it", "cfo"]);
|
|
1414
|
+
return normalizeLooseMatchText(value)
|
|
1415
|
+
.split(/\s+/)
|
|
1416
|
+
.filter((token) => token.length >= 4 || shortAllowlist.has(token))
|
|
1417
|
+
.filter((token) => ![
|
|
1418
|
+
"head",
|
|
1419
|
+
"senior",
|
|
1420
|
+
"consultant",
|
|
1421
|
+
"manager",
|
|
1422
|
+
"specialist",
|
|
1423
|
+
"lead",
|
|
1424
|
+
"global",
|
|
1425
|
+
"team",
|
|
1426
|
+
"group"
|
|
1427
|
+
].includes(token))
|
|
1428
|
+
.slice(0, 4);
|
|
1429
|
+
}
|
|
1430
|
+
function buildDeepDiveRoleSearchKeywords(role) {
|
|
1431
|
+
const normalized = normalizeLooseMatchText(role);
|
|
1432
|
+
switch (normalized) {
|
|
1433
|
+
case "budgetholder":
|
|
1434
|
+
return ["finance", "procurement", "purchasing", "accounting", "controlling", "cfo"];
|
|
1435
|
+
case "decisionmaker":
|
|
1436
|
+
return ["director", "head", "vp", "chief", "leiter", "lead"];
|
|
1437
|
+
case "champion":
|
|
1438
|
+
return ["hr", "workplace", "operations", "it", "people", "office"];
|
|
1439
|
+
case "executivesponsor":
|
|
1440
|
+
return ["executive", "board", "chief", "managing", "director", "ceo"];
|
|
1441
|
+
case "influencer":
|
|
1442
|
+
return ["specialist", "manager", "consultant", "project", "workplace", "hr"];
|
|
1443
|
+
case "legalandcompliance":
|
|
1444
|
+
return ["legal", "compliance", "datenschutz", "counsel"];
|
|
1445
|
+
case "blocker":
|
|
1446
|
+
return ["procurement", "legal", "compliance", "security"];
|
|
1447
|
+
case "enduser":
|
|
1448
|
+
return ["workplace", "office", "operations", "assistant", "admin"];
|
|
1449
|
+
default:
|
|
1450
|
+
return [];
|
|
1451
|
+
}
|
|
1452
|
+
}
|
|
1096
1453
|
function buildLinkedInAccountSearchApiUrl(companyName) {
|
|
1097
1454
|
const baseUrl = process.env.SALESPROMPTER_LINKEDIN_SALES_API_BASE_URL?.trim() ||
|
|
1098
1455
|
"https://www.linkedin.com";
|
|
1099
1456
|
const encodedCompanyName = encodeURIComponent(companyName);
|
|
1100
1457
|
return `${baseUrl.replace(/\/+$/, "")}/sales-api/salesApiAccountSearch?q=searchQuery&query=(recentSearchParam:(id:${Date.now()},doLogHistory:true),spellCorrectionEnabled:true,keywords:${encodedCompanyName})&start=0&count=10&trackingParam=(sessionId:${generateLinkedInSessionId()})&decorationId=com.linkedin.sales.deco.desktop.searchv2.AccountSearchResult-14`;
|
|
1101
1458
|
}
|
|
1102
|
-
function buildLinkedInLookupSearchVariants(contact) {
|
|
1459
|
+
async function buildLinkedInLookupSearchVariants(contact, timeoutMs, resolvedCompanyAliases = []) {
|
|
1103
1460
|
const variants = [];
|
|
1104
1461
|
const seen = new Set();
|
|
1105
|
-
const
|
|
1106
|
-
|
|
1107
|
-
normalizeLookupWhitespace(
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1462
|
+
const companyCandidateScores = new Map();
|
|
1463
|
+
const addCompanyCandidate = (value, score) => {
|
|
1464
|
+
const normalized = normalizeLookupWhitespace(value);
|
|
1465
|
+
if (!normalized) {
|
|
1466
|
+
return;
|
|
1467
|
+
}
|
|
1468
|
+
companyCandidateScores.set(normalized, Math.max(score, companyCandidateScores.get(normalized) ?? 0));
|
|
1469
|
+
};
|
|
1470
|
+
addCompanyCandidate(contact.companyName, 80);
|
|
1471
|
+
addCompanyCandidate(contact.companyNameOriginal, 70);
|
|
1472
|
+
const linkedInHandle = normalizeLinkedInCompanyHandle(contact.linkedinCompanyUrl ?? "");
|
|
1473
|
+
if (linkedInHandle && !/^\d+$/.test(linkedInHandle)) {
|
|
1474
|
+
addCompanyCandidate(linkedInHandle.replace(/[-_]+/g, " "), 95);
|
|
1475
|
+
}
|
|
1476
|
+
for (const alias of resolvedCompanyAliases) {
|
|
1477
|
+
addCompanyCandidate(alias, 110);
|
|
1478
|
+
}
|
|
1479
|
+
const emailDomain = (() => {
|
|
1480
|
+
const email = normalizeLookupWhitespace(contact.email);
|
|
1481
|
+
if (!email || isSyntheticLinkedInLookupEmail(email)) {
|
|
1482
|
+
return "";
|
|
1483
|
+
}
|
|
1484
|
+
const at = email.lastIndexOf("@");
|
|
1485
|
+
return at >= 0 ? email.slice(at + 1) : "";
|
|
1486
|
+
})();
|
|
1487
|
+
if (emailDomain) {
|
|
1488
|
+
const host = emailDomain.replace(/^www\./i, "").split(".")[0] ?? "";
|
|
1489
|
+
if (host) {
|
|
1490
|
+
addCompanyCandidate(host.replace(/[-_]+/g, " "), 100);
|
|
1127
1491
|
}
|
|
1128
1492
|
}
|
|
1129
|
-
|
|
1493
|
+
if (contact.jobTitle && contact.deepDiveRecommendedRole) {
|
|
1494
|
+
const primaryWord = normalizeLookupWhitespace(contact.companyNameOriginal ?? contact.companyName)
|
|
1495
|
+
.split(/\s+/)
|
|
1496
|
+
.filter((part) => part.length >= 4)
|
|
1497
|
+
.slice(-1)[0];
|
|
1498
|
+
if (primaryWord) {
|
|
1499
|
+
addCompanyCandidate(primaryWord, 45);
|
|
1500
|
+
}
|
|
1501
|
+
}
|
|
1502
|
+
const companyHints = await buildLinkedInProfileCompanyHints(contact, timeoutMs);
|
|
1503
|
+
for (const phrase of companyHints.phrases) {
|
|
1504
|
+
const tokenCount = normalizeLooseMatchText(phrase).split(/\s+/).filter(Boolean).length;
|
|
1505
|
+
if (tokenCount >= 1 && tokenCount <= 4) {
|
|
1506
|
+
addCompanyCandidate(phrase, tokenCount <= 2 ? 75 : 60);
|
|
1507
|
+
}
|
|
1508
|
+
}
|
|
1509
|
+
for (const keyword of companyHints.keywords.slice(0, 5)) {
|
|
1510
|
+
addCompanyCandidate(keyword, keyword.includes(".") ? 90 : 55);
|
|
1511
|
+
}
|
|
1512
|
+
const titleKeywords = Array.from(new Set([
|
|
1513
|
+
...extractLookupTitleKeywords(contact.jobTitle),
|
|
1514
|
+
...buildDeepDiveRoleSearchKeywords(contact.deepDiveRecommendedRole)
|
|
1515
|
+
])).slice(0, 6);
|
|
1516
|
+
const rankedCompanyCandidates = Array.from(companyCandidateScores.entries())
|
|
1517
|
+
.sort((left, right) => right[1] - left[1] || left[0].length - right[0].length)
|
|
1518
|
+
.slice(0, 6);
|
|
1519
|
+
const emailHostCandidate = (() => {
|
|
1520
|
+
if (!emailDomain) {
|
|
1521
|
+
return "";
|
|
1522
|
+
}
|
|
1523
|
+
return normalizeLookupWhitespace(emailDomain.replace(/^www\./i, "").split(".")[0] ?? "").replace(/[-_]+/g, " ");
|
|
1524
|
+
})();
|
|
1525
|
+
const cleanCompanyCandidate = normalizeLookupWhitespace(contact.companyName) ||
|
|
1526
|
+
normalizeLookupWhitespace(contact.companyNameOriginal) ||
|
|
1527
|
+
"";
|
|
1528
|
+
const linkedInHandleCandidate = linkedInHandle && !/^\d+$/.test(linkedInHandle)
|
|
1529
|
+
? normalizeLookupWhitespace(linkedInHandle.replace(/[-_]+/g, " "))
|
|
1530
|
+
: "";
|
|
1531
|
+
const pushVariant = (companyName, searchMode) => {
|
|
1532
|
+
const normalizedCompany = normalizeLookupWhitespace(companyName);
|
|
1533
|
+
if (!normalizedCompany) {
|
|
1534
|
+
return;
|
|
1535
|
+
}
|
|
1536
|
+
const keywordsText = searchMode === "keywords_title" && titleKeywords.length > 0
|
|
1537
|
+
? `${normalizedCompany} ${titleKeywords.join(" ")}`
|
|
1538
|
+
: undefined;
|
|
1539
|
+
if (searchMode === "keywords_title" && !keywordsText) {
|
|
1540
|
+
return;
|
|
1541
|
+
}
|
|
1542
|
+
const key = [
|
|
1543
|
+
contact.firstName.trim().toLowerCase(),
|
|
1544
|
+
contact.lastName.trim().toLowerCase(),
|
|
1545
|
+
normalizedCompany.toLowerCase(),
|
|
1546
|
+
searchMode,
|
|
1547
|
+
keywordsText?.toLowerCase() ?? ""
|
|
1548
|
+
].join("|");
|
|
1549
|
+
if (seen.has(key)) {
|
|
1550
|
+
return;
|
|
1551
|
+
}
|
|
1552
|
+
seen.add(key);
|
|
1553
|
+
variants.push({
|
|
1554
|
+
firstName: contact.firstName,
|
|
1555
|
+
lastName: contact.lastName,
|
|
1556
|
+
companyName: normalizedCompany,
|
|
1557
|
+
searchMode,
|
|
1558
|
+
keywordsText
|
|
1559
|
+
});
|
|
1560
|
+
};
|
|
1561
|
+
const rankedCompanyNames = rankedCompanyCandidates.map(([companyName]) => companyName);
|
|
1562
|
+
const currentCompanyStageCandidates = [
|
|
1563
|
+
emailHostCandidate,
|
|
1564
|
+
linkedInHandleCandidate,
|
|
1565
|
+
...resolvedCompanyAliases,
|
|
1566
|
+
...rankedCompanyNames.filter((companyName) => (companyCandidateScores.get(companyName) ?? 0) >= 90)
|
|
1567
|
+
];
|
|
1568
|
+
const keywordStageCandidates = [
|
|
1569
|
+
cleanCompanyCandidate,
|
|
1570
|
+
...rankedCompanyNames
|
|
1571
|
+
];
|
|
1572
|
+
const keywordTitleStageCandidates = [
|
|
1573
|
+
cleanCompanyCandidate,
|
|
1574
|
+
...rankedCompanyNames
|
|
1575
|
+
];
|
|
1576
|
+
const fallbackCurrentCompanyCandidates = [
|
|
1577
|
+
cleanCompanyCandidate,
|
|
1578
|
+
normalizeLookupWhitespace(contact.companyNameOriginal),
|
|
1579
|
+
...rankedCompanyNames
|
|
1580
|
+
];
|
|
1581
|
+
for (const companyName of currentCompanyStageCandidates) {
|
|
1582
|
+
pushVariant(companyName, "current_company");
|
|
1583
|
+
}
|
|
1584
|
+
for (const companyName of keywordStageCandidates) {
|
|
1585
|
+
pushVariant(companyName, "keywords");
|
|
1586
|
+
}
|
|
1587
|
+
for (const companyName of keywordTitleStageCandidates) {
|
|
1588
|
+
pushVariant(companyName, "keywords_title");
|
|
1589
|
+
}
|
|
1590
|
+
for (const companyName of fallbackCurrentCompanyCandidates) {
|
|
1591
|
+
pushVariant(companyName, "current_company");
|
|
1592
|
+
}
|
|
1593
|
+
for (const [companyName] of rankedCompanyCandidates) {
|
|
1594
|
+
pushVariant(companyName, "current_company");
|
|
1595
|
+
pushVariant(companyName, "keywords");
|
|
1596
|
+
pushVariant(companyName, "keywords_title");
|
|
1597
|
+
}
|
|
1598
|
+
return variants.slice(0, 12);
|
|
1599
|
+
}
|
|
1600
|
+
function normalizeSalesNavLeadUrl(value) {
|
|
1601
|
+
const trimmed = String(value ?? "").trim();
|
|
1602
|
+
if (!trimmed) {
|
|
1603
|
+
return null;
|
|
1604
|
+
}
|
|
1605
|
+
const directMatch = trimmed.match(/https:\/\/www\.linkedin\.com\/sales\/lead\/[^/?#]+/i);
|
|
1606
|
+
if (directMatch) {
|
|
1607
|
+
return directMatch[0] ?? null;
|
|
1608
|
+
}
|
|
1609
|
+
const disguisedLeadIdMatch = trimmed.match(/https:\/\/www\.linkedin\.com\/in\/(ACw[A-Za-z0-9_-]+)/i);
|
|
1610
|
+
if (disguisedLeadIdMatch?.[1]) {
|
|
1611
|
+
return `https://www.linkedin.com/sales/lead/${disguisedLeadIdMatch[1]}`;
|
|
1612
|
+
}
|
|
1613
|
+
return null;
|
|
1614
|
+
}
|
|
1615
|
+
function normalizePublicLinkedInProfileUrl(value) {
|
|
1616
|
+
const trimmed = String(value ?? "").trim();
|
|
1617
|
+
if (!trimmed) {
|
|
1618
|
+
return null;
|
|
1619
|
+
}
|
|
1620
|
+
let parsed;
|
|
1621
|
+
try {
|
|
1622
|
+
parsed = new URL(trimmed);
|
|
1623
|
+
}
|
|
1624
|
+
catch {
|
|
1625
|
+
return null;
|
|
1626
|
+
}
|
|
1627
|
+
if (!/(^|\.)linkedin\.com$/i.test(parsed.hostname)) {
|
|
1628
|
+
return null;
|
|
1629
|
+
}
|
|
1630
|
+
const pathMatch = parsed.pathname.match(/^\/in\/([^/?#]+)\/?/i);
|
|
1631
|
+
if (!pathMatch?.[1]) {
|
|
1632
|
+
return null;
|
|
1633
|
+
}
|
|
1634
|
+
const candidate = `https://www.linkedin.com/in/${pathMatch[1]}`;
|
|
1635
|
+
return normalizeSalesNavLeadUrl(candidate) ? null : candidate;
|
|
1130
1636
|
}
|
|
1131
1637
|
function extractLinkedInProfileUrlFromSalesApiElement(element) {
|
|
1132
|
-
|
|
1638
|
+
if (!element) {
|
|
1639
|
+
return null;
|
|
1640
|
+
}
|
|
1641
|
+
const explicitCandidates = [
|
|
1642
|
+
typeof element.linkedinProfileUrl === "string" ? element.linkedinProfileUrl : null,
|
|
1643
|
+
typeof element.profileUrl === "string" ? element.profileUrl : null,
|
|
1644
|
+
typeof element.url === "string" ? element.url : null
|
|
1645
|
+
].filter(Boolean);
|
|
1646
|
+
for (const candidate of explicitCandidates) {
|
|
1647
|
+
const normalized = normalizePublicLinkedInProfileUrl(candidate);
|
|
1648
|
+
if (normalized) {
|
|
1649
|
+
return normalized;
|
|
1650
|
+
}
|
|
1651
|
+
}
|
|
1652
|
+
for (const value of collectNestedStrings(element)) {
|
|
1653
|
+
const normalized = normalizePublicLinkedInProfileUrl(value);
|
|
1654
|
+
if (normalized) {
|
|
1655
|
+
return normalized;
|
|
1656
|
+
}
|
|
1657
|
+
}
|
|
1658
|
+
return null;
|
|
1659
|
+
}
|
|
1660
|
+
function extractLinkedInSalesNavLeadUrlFromSalesApiElement(element) {
|
|
1661
|
+
if (!element) {
|
|
1662
|
+
return null;
|
|
1663
|
+
}
|
|
1664
|
+
const explicitCandidates = [
|
|
1665
|
+
typeof element.salesNavProfileUrl === "string" ? element.salesNavProfileUrl : null,
|
|
1666
|
+
typeof element.sales_nav_profile_url === "string" ? element.sales_nav_profile_url : null,
|
|
1667
|
+
typeof element.url === "string" ? element.url : null
|
|
1668
|
+
].filter(Boolean);
|
|
1669
|
+
for (const candidate of explicitCandidates) {
|
|
1670
|
+
const normalized = normalizeSalesNavLeadUrl(candidate);
|
|
1671
|
+
if (normalized) {
|
|
1672
|
+
return normalized;
|
|
1673
|
+
}
|
|
1674
|
+
}
|
|
1675
|
+
for (const value of collectNestedStrings(element)) {
|
|
1676
|
+
const normalized = normalizeSalesNavLeadUrl(value);
|
|
1677
|
+
if (normalized) {
|
|
1678
|
+
return normalized;
|
|
1679
|
+
}
|
|
1680
|
+
}
|
|
1681
|
+
const entityUrn = typeof element.entityUrn === "string" ? element.entityUrn : "";
|
|
1133
1682
|
const salesIdMatch = entityUrn.match(/\(([^,]+),/);
|
|
1134
|
-
return salesIdMatch ? `https://www.linkedin.com/
|
|
1683
|
+
return salesIdMatch?.[1] ? `https://www.linkedin.com/sales/lead/${salesIdMatch[1]}` : null;
|
|
1684
|
+
}
|
|
1685
|
+
function extractLinkedInSalesNavCompanyUrlFromSalesApiElement(element) {
|
|
1686
|
+
if (!element) {
|
|
1687
|
+
return null;
|
|
1688
|
+
}
|
|
1689
|
+
const explicitCandidates = [
|
|
1690
|
+
typeof element.salesNavCompanyUrl === "string" ? element.salesNavCompanyUrl : null,
|
|
1691
|
+
typeof element.sales_nav_company_url === "string" ? element.sales_nav_company_url : null,
|
|
1692
|
+
typeof element.url === "string" ? element.url : null
|
|
1693
|
+
].filter(Boolean);
|
|
1694
|
+
for (const candidate of explicitCandidates) {
|
|
1695
|
+
const directMatch = candidate.match(/https:\/\/www\.linkedin\.com\/sales\/company\/[^/?#]+/i);
|
|
1696
|
+
if (directMatch) {
|
|
1697
|
+
return directMatch[0] ?? null;
|
|
1698
|
+
}
|
|
1699
|
+
}
|
|
1700
|
+
for (const value of collectNestedStrings(element)) {
|
|
1701
|
+
const directMatch = value.match(/https:\/\/www\.linkedin\.com\/sales\/company\/[^/?#]+/i);
|
|
1702
|
+
if (directMatch) {
|
|
1703
|
+
return directMatch[0] ?? null;
|
|
1704
|
+
}
|
|
1705
|
+
}
|
|
1706
|
+
const entityUrn = typeof element.entityUrn === "string" ? element.entityUrn : "";
|
|
1707
|
+
const idMatch = entityUrn.match(/\(([^,]+),/);
|
|
1708
|
+
return idMatch?.[1] ? `https://www.linkedin.com/sales/company/${idMatch[1]}` : null;
|
|
1135
1709
|
}
|
|
1136
1710
|
function collectNestedStrings(value, seen = new Set()) {
|
|
1137
1711
|
if (value == null || seen.has(value)) {
|
|
@@ -1200,34 +1774,140 @@ function extractLinkedInCompanyNameFromSalesApiElement(element) {
|
|
|
1200
1774
|
}
|
|
1201
1775
|
return null;
|
|
1202
1776
|
}
|
|
1203
|
-
function
|
|
1777
|
+
function extractLinkedInFullNameFromSalesApiElement(element) {
|
|
1204
1778
|
if (!element) {
|
|
1205
1779
|
return null;
|
|
1206
1780
|
}
|
|
1207
|
-
const
|
|
1208
|
-
typeof element.
|
|
1209
|
-
typeof element.
|
|
1210
|
-
].filter(
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
const match = value.match(/(\d[\d.,]*)\s+employees\b/i);
|
|
1216
|
-
if (match) {
|
|
1217
|
-
return Number(match[1].replace(/[.,]/g, ""));
|
|
1781
|
+
const directCandidates = [
|
|
1782
|
+
typeof element.fullName === "string" ? element.fullName : null,
|
|
1783
|
+
typeof element.name === "string" ? element.name : null
|
|
1784
|
+
].filter(Boolean);
|
|
1785
|
+
for (const candidate of directCandidates) {
|
|
1786
|
+
const normalized = normalizeLookupWhitespace(candidate);
|
|
1787
|
+
if (normalized) {
|
|
1788
|
+
return normalized;
|
|
1218
1789
|
}
|
|
1219
1790
|
}
|
|
1220
|
-
|
|
1791
|
+
const firstName = typeof element.firstName === "string" ? normalizeLookupWhitespace(element.firstName) : "";
|
|
1792
|
+
const lastName = typeof element.lastName === "string" ? normalizeLookupWhitespace(element.lastName) : "";
|
|
1793
|
+
const combined = normalizeLookupWhitespace(`${firstName} ${lastName}`);
|
|
1794
|
+
return combined || null;
|
|
1221
1795
|
}
|
|
1222
|
-
function
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1796
|
+
function extractLinkedInTitleFromSalesApiElement(element) {
|
|
1797
|
+
if (!element) {
|
|
1798
|
+
return null;
|
|
1799
|
+
}
|
|
1800
|
+
const directCandidates = [
|
|
1801
|
+
typeof element.title === "string" ? element.title : null,
|
|
1802
|
+
typeof element.occupation === "string" ? element.occupation : null
|
|
1228
1803
|
].filter(Boolean);
|
|
1229
|
-
const
|
|
1230
|
-
const
|
|
1804
|
+
for (const candidate of directCandidates) {
|
|
1805
|
+
const normalized = normalizeLookupWhitespace(candidate);
|
|
1806
|
+
if (normalized) {
|
|
1807
|
+
return normalized;
|
|
1808
|
+
}
|
|
1809
|
+
}
|
|
1810
|
+
const currentPosition = Array.isArray(element.currentPositions) && element.currentPositions.length > 0
|
|
1811
|
+
? element.currentPositions[0]
|
|
1812
|
+
: null;
|
|
1813
|
+
const currentTitle = currentPosition && typeof currentPosition.title === "string"
|
|
1814
|
+
? normalizeLookupWhitespace(currentPosition.title)
|
|
1815
|
+
: "";
|
|
1816
|
+
return currentTitle || null;
|
|
1817
|
+
}
|
|
1818
|
+
function scoreLinkedInSalesApiElementMatch(contact, element) {
|
|
1819
|
+
const fullName = extractLinkedInFullNameFromSalesApiElement(element);
|
|
1820
|
+
const companyName = extractLinkedInCompanyNameFromSalesApiElement(Array.isArray(element?.currentPositions) && element.currentPositions.length > 0
|
|
1821
|
+
? element.currentPositions[0]
|
|
1822
|
+
: element) ?? extractLinkedInCompanyNameFromSalesApiElement(element);
|
|
1823
|
+
const title = extractLinkedInTitleFromSalesApiElement(element);
|
|
1824
|
+
const expectedFullName = normalizeLooseMatchText(`${contact.firstName} ${contact.lastName}`);
|
|
1825
|
+
const candidateFullName = normalizeLooseMatchText(fullName);
|
|
1826
|
+
const expectedCompanies = Array.from(new Set([
|
|
1827
|
+
normalizeLooseMatchText(contact.companyNameOriginal ?? contact.companyName),
|
|
1828
|
+
normalizeLooseMatchText(aggressivelyCleanLookupCompanyName(contact.companyNameOriginal ?? contact.companyName)),
|
|
1829
|
+
normalizeLooseMatchText(normalizeLinkedInCompanyHandle(contact.linkedinCompanyUrl ?? "")?.replace(/[-_]+/g, " ") ?? ""),
|
|
1830
|
+
normalizeLooseMatchText((() => {
|
|
1831
|
+
const email = normalizeLookupWhitespace(contact.email);
|
|
1832
|
+
if (!email || isSyntheticLinkedInLookupEmail(email)) {
|
|
1833
|
+
return "";
|
|
1834
|
+
}
|
|
1835
|
+
return email.split("@")[1]?.replace(/^www\./i, "").split(".")[0] ?? "";
|
|
1836
|
+
})())
|
|
1837
|
+
].filter(Boolean)));
|
|
1838
|
+
const candidateCompany = normalizeLooseMatchText(companyName);
|
|
1839
|
+
const candidateTitle = normalizeLooseMatchText(title);
|
|
1840
|
+
let score = 0;
|
|
1841
|
+
let exactNameMatch = false;
|
|
1842
|
+
let companyMatchCount = 0;
|
|
1843
|
+
if (expectedFullName && candidateFullName === expectedFullName) {
|
|
1844
|
+
score += 120;
|
|
1845
|
+
exactNameMatch = true;
|
|
1846
|
+
}
|
|
1847
|
+
else if (expectedFullName &&
|
|
1848
|
+
candidateFullName.includes(normalizeLooseMatchText(contact.firstName)) &&
|
|
1849
|
+
candidateFullName.includes(normalizeLooseMatchText(contact.lastName))) {
|
|
1850
|
+
score += 90;
|
|
1851
|
+
}
|
|
1852
|
+
for (const companyHint of expectedCompanies) {
|
|
1853
|
+
if (!companyHint) {
|
|
1854
|
+
continue;
|
|
1855
|
+
}
|
|
1856
|
+
if (candidateCompany === companyHint) {
|
|
1857
|
+
score += 40;
|
|
1858
|
+
companyMatchCount += 1;
|
|
1859
|
+
}
|
|
1860
|
+
else if (candidateCompany.includes(companyHint) || companyHint.includes(candidateCompany)) {
|
|
1861
|
+
score += 25;
|
|
1862
|
+
companyMatchCount += 1;
|
|
1863
|
+
}
|
|
1864
|
+
}
|
|
1865
|
+
const titleHints = [
|
|
1866
|
+
...extractLookupTitleKeywords(contact.jobTitle),
|
|
1867
|
+
...buildDeepDiveRoleSearchKeywords(contact.deepDiveRecommendedRole)
|
|
1868
|
+
].slice(0, 6);
|
|
1869
|
+
for (const hint of titleHints) {
|
|
1870
|
+
if (hint && candidateTitle.includes(normalizeLooseMatchText(hint))) {
|
|
1871
|
+
score += 6;
|
|
1872
|
+
}
|
|
1873
|
+
}
|
|
1874
|
+
return {
|
|
1875
|
+
score,
|
|
1876
|
+
fullName,
|
|
1877
|
+
companyName,
|
|
1878
|
+
title,
|
|
1879
|
+
exactNameMatch,
|
|
1880
|
+
companyMatchCount
|
|
1881
|
+
};
|
|
1882
|
+
}
|
|
1883
|
+
function extractLinkedInCompanyEmployeeCountFromSalesApiElement(element) {
|
|
1884
|
+
if (!element) {
|
|
1885
|
+
return null;
|
|
1886
|
+
}
|
|
1887
|
+
const numericCandidates = [
|
|
1888
|
+
typeof element.employeeCount === "number" ? element.employeeCount : null,
|
|
1889
|
+
typeof element.employeesOnLinkedInCount === "number" ? element.employeesOnLinkedInCount : null
|
|
1890
|
+
].filter((value) => Number.isFinite(value));
|
|
1891
|
+
if (numericCandidates.length > 0) {
|
|
1892
|
+
return Math.max(0, Math.trunc(numericCandidates[0] ?? 0));
|
|
1893
|
+
}
|
|
1894
|
+
for (const value of collectNestedStrings(element)) {
|
|
1895
|
+
const match = value.match(/(\d[\d.,]*)\s+employees\b/i);
|
|
1896
|
+
if (match) {
|
|
1897
|
+
return Number(match[1].replace(/[.,]/g, ""));
|
|
1898
|
+
}
|
|
1899
|
+
}
|
|
1900
|
+
return null;
|
|
1901
|
+
}
|
|
1902
|
+
function buildLinkedInCompanyLookupVariants(params) {
|
|
1903
|
+
const variants = [];
|
|
1904
|
+
const seen = new Set();
|
|
1905
|
+
const rawCandidates = [
|
|
1906
|
+
normalizeLookupWhitespace(params.companyName),
|
|
1907
|
+
normalizeLookupWhitespace(params.companyNameOriginal)
|
|
1908
|
+
].filter(Boolean);
|
|
1909
|
+
const normalizedCandidates = rawCandidates.flatMap((candidate) => {
|
|
1910
|
+
const aggressive = aggressivelyCleanLookupCompanyName(candidate);
|
|
1231
1911
|
const searchable = normalizeLookupCompanyForSearch(candidate);
|
|
1232
1912
|
const dePunctuated = normalizeLookupWhitespace(candidate.replace(/[^\p{L}\p{N}]+/gu, " "));
|
|
1233
1913
|
return [candidate, aggressive, searchable, dePunctuated];
|
|
@@ -1236,20 +1916,1050 @@ function buildLinkedInCompanyLookupVariants(params) {
|
|
|
1236
1916
|
if (!companyName) {
|
|
1237
1917
|
continue;
|
|
1238
1918
|
}
|
|
1239
|
-
const key = companyName.toLowerCase();
|
|
1240
|
-
if (seen.has(key)) {
|
|
1241
|
-
continue;
|
|
1919
|
+
const key = companyName.toLowerCase();
|
|
1920
|
+
if (seen.has(key)) {
|
|
1921
|
+
continue;
|
|
1922
|
+
}
|
|
1923
|
+
seen.add(key);
|
|
1924
|
+
variants.push({
|
|
1925
|
+
contact_id: params.contactId,
|
|
1926
|
+
companyName
|
|
1927
|
+
});
|
|
1928
|
+
}
|
|
1929
|
+
return variants;
|
|
1930
|
+
}
|
|
1931
|
+
function buildDirectCompanyContextKey(contact) {
|
|
1932
|
+
return normalizeLooseMatchText(contact.companyNameOriginal ?? contact.companyName);
|
|
1933
|
+
}
|
|
1934
|
+
async function resolveDirectLinkedInCompanyContexts(params) {
|
|
1935
|
+
const perCompanyBudgetMs = Math.min(params.timeoutMs, 10_000);
|
|
1936
|
+
const primaryByCompany = new Map();
|
|
1937
|
+
for (const contact of params.contacts) {
|
|
1938
|
+
const key = buildDirectCompanyContextKey(contact);
|
|
1939
|
+
if (!key || primaryByCompany.has(key)) {
|
|
1940
|
+
continue;
|
|
1941
|
+
}
|
|
1942
|
+
primaryByCompany.set(key, contact);
|
|
1943
|
+
}
|
|
1944
|
+
const contexts = new Map();
|
|
1945
|
+
for (const [companyKey, contact] of primaryByCompany.entries()) {
|
|
1946
|
+
const aliases = new Set();
|
|
1947
|
+
const addAlias = (value) => {
|
|
1948
|
+
const normalized = normalizeLookupWhitespace(value);
|
|
1949
|
+
if (!normalized) {
|
|
1950
|
+
return;
|
|
1951
|
+
}
|
|
1952
|
+
aliases.add(normalized);
|
|
1953
|
+
};
|
|
1954
|
+
addAlias(contact.companyNameOriginal);
|
|
1955
|
+
addAlias(contact.companyName);
|
|
1956
|
+
const existingHandle = normalizeLinkedInCompanyHandle(contact.linkedinCompanyUrl ?? "");
|
|
1957
|
+
if (existingHandle && !/^\d+$/.test(existingHandle)) {
|
|
1958
|
+
addAlias(existingHandle.replace(/[-_]+/g, " "));
|
|
1959
|
+
}
|
|
1960
|
+
let matchedCompanyUrl = contact.linkedinCompanyUrl ?? null;
|
|
1961
|
+
let matchedSalesNavCompanyUrl = null;
|
|
1962
|
+
let matchedCompanyName = null;
|
|
1963
|
+
let matchedCompanyEmployeeCount = null;
|
|
1964
|
+
const companyDeadline = Date.now() + perCompanyBudgetMs;
|
|
1965
|
+
const variants = buildLinkedInCompanyLookupVariants({
|
|
1966
|
+
contactId: contact.contact_id,
|
|
1967
|
+
companyName: contact.companyName,
|
|
1968
|
+
companyNameOriginal: contact.companyNameOriginal
|
|
1969
|
+
}).slice(0, 4);
|
|
1970
|
+
for (const variant of variants) {
|
|
1971
|
+
if (Date.now() >= companyDeadline) {
|
|
1972
|
+
break;
|
|
1973
|
+
}
|
|
1974
|
+
const controller = new AbortController();
|
|
1975
|
+
const timeout = setTimeout(controller.abort.bind(controller), Math.min(6_000, Math.max(1_000, companyDeadline - Date.now())));
|
|
1976
|
+
try {
|
|
1977
|
+
const response = await fetch(buildLinkedInAccountSearchApiUrl(variant.companyName), {
|
|
1978
|
+
method: "GET",
|
|
1979
|
+
signal: controller.signal,
|
|
1980
|
+
headers: {
|
|
1981
|
+
accept: "*/*",
|
|
1982
|
+
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
|
|
1983
|
+
"csrf-token": params.config.csrfToken,
|
|
1984
|
+
referer: "https://www.linkedin.com/sales/search/company",
|
|
1985
|
+
"sec-fetch-dest": "empty",
|
|
1986
|
+
"sec-fetch-mode": "cors",
|
|
1987
|
+
"sec-fetch-site": "same-origin",
|
|
1988
|
+
"user-agent": params.config.userAgent,
|
|
1989
|
+
"x-li-identity": params.config.identity,
|
|
1990
|
+
"x-li-lang": "en_US",
|
|
1991
|
+
"x-li-page-instance": "urn:li:page:d_sales2_search_accounts;13Jvve6kRGCao+iP0wwAag==",
|
|
1992
|
+
"x-restli-protocol-version": "2.0.0",
|
|
1993
|
+
cookie: params.config.cookie
|
|
1994
|
+
}
|
|
1995
|
+
});
|
|
1996
|
+
if (!response.ok) {
|
|
1997
|
+
if (response.status === 429) {
|
|
1998
|
+
break;
|
|
1999
|
+
}
|
|
2000
|
+
continue;
|
|
2001
|
+
}
|
|
2002
|
+
const data = (await response.json());
|
|
2003
|
+
const first = data.elements?.[0];
|
|
2004
|
+
const companyUrl = extractLinkedInCompanyUrlFromSalesApiElement(first);
|
|
2005
|
+
const salesNavCompanyUrl = extractLinkedInSalesNavCompanyUrlFromSalesApiElement(first);
|
|
2006
|
+
const companyName = extractLinkedInCompanyNameFromSalesApiElement(first);
|
|
2007
|
+
if (companyUrl || salesNavCompanyUrl || companyName) {
|
|
2008
|
+
matchedCompanyUrl = companyUrl ?? matchedCompanyUrl;
|
|
2009
|
+
matchedSalesNavCompanyUrl = salesNavCompanyUrl ?? matchedSalesNavCompanyUrl;
|
|
2010
|
+
matchedCompanyName = companyName ?? matchedCompanyName;
|
|
2011
|
+
matchedCompanyEmployeeCount = extractLinkedInCompanyEmployeeCountFromSalesApiElement(first);
|
|
2012
|
+
addAlias(companyName);
|
|
2013
|
+
addAlias(companyUrl ? normalizeLinkedInCompanyHandle(companyUrl)?.replace(/[-_]+/g, " ") : null);
|
|
2014
|
+
addAlias(salesNavCompanyUrl ? normalizeLookupWhitespace(salesNavCompanyUrl.split("/sales/company/")[1]?.split(/[/?#]/)[0] ?? "") : null);
|
|
2015
|
+
break;
|
|
2016
|
+
}
|
|
2017
|
+
}
|
|
2018
|
+
catch {
|
|
2019
|
+
// Try next company variant.
|
|
2020
|
+
}
|
|
2021
|
+
finally {
|
|
2022
|
+
clearTimeout(timeout);
|
|
2023
|
+
}
|
|
2024
|
+
}
|
|
2025
|
+
contexts.set(companyKey, {
|
|
2026
|
+
normalizedCompanyKey: companyKey,
|
|
2027
|
+
aliases: Array.from(aliases),
|
|
2028
|
+
linkedinCompanyUrl: matchedCompanyUrl,
|
|
2029
|
+
salesNavCompanyUrl: matchedSalesNavCompanyUrl,
|
|
2030
|
+
matchedCompanyName,
|
|
2031
|
+
matchedCompanyEmployeeCount
|
|
2032
|
+
});
|
|
2033
|
+
}
|
|
2034
|
+
return contexts;
|
|
2035
|
+
}
|
|
2036
|
+
function buildPublicLinkedInCompanySearchUrl(companyName) {
|
|
2037
|
+
const baseUrl = process.env.SALESPROMPTER_LINKEDIN_COMPANY_SEARCH_BASE_URL?.trim() ||
|
|
2038
|
+
"https://duckduckgo.com/html/";
|
|
2039
|
+
const url = new URL(baseUrl);
|
|
2040
|
+
url.searchParams.set("q", `site:linkedin.com/company "${companyName}"`);
|
|
2041
|
+
return url.toString();
|
|
2042
|
+
}
|
|
2043
|
+
function getSerperApiKey(env = process.env) {
|
|
2044
|
+
return env.SALESPROMPTER_SERPER_API_KEY?.trim() || env.SERPER_API_KEY?.trim() || "";
|
|
2045
|
+
}
|
|
2046
|
+
function getSerperSearchEndpoint(env = process.env) {
|
|
2047
|
+
return env.SALESPROMPTER_SERPER_SEARCH_URL?.trim() || "https://google.serper.dev/search";
|
|
2048
|
+
}
|
|
2049
|
+
function buildSerperLinkedInCompanyQueries(companyName) {
|
|
2050
|
+
const normalized = normalizeLookupWhitespace(companyName);
|
|
2051
|
+
const coreName = normalized.split(/\s*[-,|]\s*/)[0]?.trim() || normalized.trim();
|
|
2052
|
+
const searchable = normalizeLookupCompanyForSearch(normalized);
|
|
2053
|
+
const loose = normalizeLooseMatchText(normalized).replace(/\s+/g, " ").trim();
|
|
2054
|
+
const keywordTokens = loose
|
|
2055
|
+
.split(/\s+/)
|
|
2056
|
+
.filter((token) => token.length >= 4)
|
|
2057
|
+
.filter((token) => !["oder", "with", "from", "handel", "beratung"].includes(token))
|
|
2058
|
+
.slice(0, 4);
|
|
2059
|
+
const keywordQuery = keywordTokens.join(" ");
|
|
2060
|
+
return Array.from(new Set([
|
|
2061
|
+
`site:linkedin.com/company "${companyName}"`,
|
|
2062
|
+
`site:linkedin.com/company "${coreName}"`,
|
|
2063
|
+
`site:linkedin.com/company ${searchable} linkedin`,
|
|
2064
|
+
`site:linkedin.com/company ${loose} linkedin`,
|
|
2065
|
+
keywordQuery ? `site:linkedin.com/company ${keywordQuery} linkedin` : ""
|
|
2066
|
+
])).filter((query) => query.length > 0);
|
|
2067
|
+
}
|
|
2068
|
+
function extractLinkedInCompanySearchCandidates(bodyText) {
|
|
2069
|
+
const candidates = new Set();
|
|
2070
|
+
const directMatches = bodyText.match(/https:\/\/www\.linkedin\.com\/company\/[^"'&<>\s)]+/gi) ?? [];
|
|
2071
|
+
for (const match of directMatches) {
|
|
2072
|
+
const handle = normalizeLinkedInCompanyHandle(match);
|
|
2073
|
+
if (handle) {
|
|
2074
|
+
candidates.add(normalizeLinkedInCompanyPage(handle));
|
|
2075
|
+
}
|
|
2076
|
+
}
|
|
2077
|
+
const encodedMatches = bodyText.match(/https?%3A%2F%2Fwww\.linkedin\.com%2Fcompany%2F[^"'&<>\s)]+/gi) ?? [];
|
|
2078
|
+
for (const match of encodedMatches) {
|
|
2079
|
+
try {
|
|
2080
|
+
const decoded = decodeURIComponent(match);
|
|
2081
|
+
const handle = normalizeLinkedInCompanyHandle(decoded);
|
|
2082
|
+
if (handle) {
|
|
2083
|
+
candidates.add(normalizeLinkedInCompanyPage(handle));
|
|
2084
|
+
}
|
|
2085
|
+
}
|
|
2086
|
+
catch {
|
|
2087
|
+
// Ignore malformed encoded fragments from search result pages.
|
|
2088
|
+
}
|
|
2089
|
+
}
|
|
2090
|
+
return Array.from(candidates);
|
|
2091
|
+
}
|
|
2092
|
+
function extractSerperLinkedInCompanyCandidates(payload) {
|
|
2093
|
+
if (!payload || typeof payload !== "object") {
|
|
2094
|
+
return [];
|
|
2095
|
+
}
|
|
2096
|
+
const organic = "organic" in payload && Array.isArray(payload.organic)
|
|
2097
|
+
? (payload.organic ?? [])
|
|
2098
|
+
: [];
|
|
2099
|
+
const seen = new Set();
|
|
2100
|
+
const candidates = [];
|
|
2101
|
+
for (const result of organic) {
|
|
2102
|
+
if (!result || typeof result !== "object") {
|
|
2103
|
+
continue;
|
|
2104
|
+
}
|
|
2105
|
+
const link = "link" in result && typeof result.link === "string"
|
|
2106
|
+
? result.link
|
|
2107
|
+
: "";
|
|
2108
|
+
const handle = normalizeLinkedInCompanyHandle(link);
|
|
2109
|
+
if (handle) {
|
|
2110
|
+
const url = normalizeLinkedInCompanyPage(handle);
|
|
2111
|
+
if (!seen.has(url)) {
|
|
2112
|
+
seen.add(url);
|
|
2113
|
+
candidates.push({
|
|
2114
|
+
url,
|
|
2115
|
+
title: "title" in result && typeof result.title === "string"
|
|
2116
|
+
? normalizeLookupWhitespace(result.title)
|
|
2117
|
+
: "",
|
|
2118
|
+
snippet: "snippet" in result && typeof result.snippet === "string"
|
|
2119
|
+
? normalizeLookupWhitespace(result.snippet)
|
|
2120
|
+
: ""
|
|
2121
|
+
});
|
|
2122
|
+
}
|
|
2123
|
+
}
|
|
2124
|
+
}
|
|
2125
|
+
return candidates;
|
|
2126
|
+
}
|
|
2127
|
+
const linkedInCompanyHintCache = new Map();
|
|
2128
|
+
const linkedInProfilePageSignalCache = new Map();
|
|
2129
|
+
const linkedInCompanyPageSignalCache = new Map();
|
|
2130
|
+
const serperSearchCache = new Map();
|
|
2131
|
+
let serperCreditsExhausted = false;
|
|
2132
|
+
function extractKeywordPhrases(value) {
|
|
2133
|
+
const normalized = normalizeLookupWhitespace(value);
|
|
2134
|
+
if (!normalized) {
|
|
2135
|
+
return [];
|
|
2136
|
+
}
|
|
2137
|
+
const phrases = new Set();
|
|
2138
|
+
const push = (candidate) => {
|
|
2139
|
+
const cleaned = normalizeLookupWhitespace(candidate);
|
|
2140
|
+
if (!cleaned || cleaned.length < 3) {
|
|
2141
|
+
return;
|
|
2142
|
+
}
|
|
2143
|
+
phrases.add(cleaned);
|
|
2144
|
+
};
|
|
2145
|
+
push(normalized);
|
|
2146
|
+
push(normalizeLookupCompanyForSearch(normalized));
|
|
2147
|
+
push(aggressivelyCleanLookupCompanyName(normalized));
|
|
2148
|
+
const titleStripped = normalized
|
|
2149
|
+
.replace(/\|\s*linkedin$/i, "")
|
|
2150
|
+
.replace(/\|\s*overview$/i, "")
|
|
2151
|
+
.replace(/\b(linkedin|home|about|posts|see all details)\b/gi, " ")
|
|
2152
|
+
.replace(/\s+/g, " ")
|
|
2153
|
+
.trim();
|
|
2154
|
+
push(titleStripped);
|
|
2155
|
+
const parts = titleStripped
|
|
2156
|
+
.split(/[|,·•:()/-]+/)
|
|
2157
|
+
.map((part) => normalizeLookupWhitespace(part))
|
|
2158
|
+
.filter(Boolean);
|
|
2159
|
+
for (const part of parts) {
|
|
2160
|
+
push(part);
|
|
2161
|
+
}
|
|
2162
|
+
const looseTokens = normalizeLooseMatchText(titleStripped)
|
|
2163
|
+
.split(/\s+/)
|
|
2164
|
+
.filter((token) => token.length >= 4)
|
|
2165
|
+
.filter((token) => ![
|
|
2166
|
+
"group",
|
|
2167
|
+
"holding",
|
|
2168
|
+
"services",
|
|
2169
|
+
"service",
|
|
2170
|
+
"consulting",
|
|
2171
|
+
"gmbh",
|
|
2172
|
+
"publishing",
|
|
2173
|
+
"company",
|
|
2174
|
+
"linkedin",
|
|
2175
|
+
"deutschland"
|
|
2176
|
+
].includes(token));
|
|
2177
|
+
if (looseTokens.length > 0) {
|
|
2178
|
+
push(looseTokens[0]);
|
|
2179
|
+
push(looseTokens.slice(0, 2).join(" "));
|
|
2180
|
+
push(looseTokens.slice(-2).join(" "));
|
|
2181
|
+
}
|
|
2182
|
+
return Array.from(phrases);
|
|
2183
|
+
}
|
|
2184
|
+
async function buildLinkedInProfileCompanyHints(contact, timeoutMs) {
|
|
2185
|
+
const phrases = new Set();
|
|
2186
|
+
const keywords = new Set();
|
|
2187
|
+
const addPhrase = (value) => {
|
|
2188
|
+
for (const phrase of extractKeywordPhrases(value)) {
|
|
2189
|
+
phrases.add(phrase);
|
|
2190
|
+
const looseTokens = normalizeLooseMatchText(phrase)
|
|
2191
|
+
.split(/\s+/)
|
|
2192
|
+
.filter((token) => token.length >= 4)
|
|
2193
|
+
.filter((token) => ![
|
|
2194
|
+
"group",
|
|
2195
|
+
"holding",
|
|
2196
|
+
"services",
|
|
2197
|
+
"service",
|
|
2198
|
+
"consulting",
|
|
2199
|
+
"gmbh",
|
|
2200
|
+
"publishing",
|
|
2201
|
+
"company",
|
|
2202
|
+
"linkedin",
|
|
2203
|
+
"deutschland"
|
|
2204
|
+
].includes(token));
|
|
2205
|
+
for (const token of looseTokens.slice(0, 5)) {
|
|
2206
|
+
keywords.add(token);
|
|
2207
|
+
}
|
|
2208
|
+
if (looseTokens.length > 1) {
|
|
2209
|
+
keywords.add(looseTokens.slice(0, 2).join(" "));
|
|
2210
|
+
keywords.add(looseTokens.slice(-2).join(" "));
|
|
2211
|
+
}
|
|
2212
|
+
}
|
|
2213
|
+
};
|
|
2214
|
+
addPhrase(contact.companyNameOriginal ?? contact.companyName);
|
|
2215
|
+
const linkedInHandle = normalizeLinkedInCompanyHandle(contact.linkedinCompanyUrl ?? "");
|
|
2216
|
+
if (linkedInHandle && !/^\d+$/.test(linkedInHandle)) {
|
|
2217
|
+
addPhrase(linkedInHandle.replace(/[-_]+/g, " "));
|
|
2218
|
+
}
|
|
2219
|
+
const normalizedEmail = normalizeLookupWhitespace(contact.email);
|
|
2220
|
+
const emailDomain = normalizedEmail && !isSyntheticLinkedInLookupEmail(normalizedEmail)
|
|
2221
|
+
? normalizedEmail.split("@")[1] ?? ""
|
|
2222
|
+
: "";
|
|
2223
|
+
if (emailDomain) {
|
|
2224
|
+
const normalizedDomain = emailDomain.replace(/^www\./i, "");
|
|
2225
|
+
keywords.add(normalizedDomain);
|
|
2226
|
+
const host = normalizedDomain.split(".")[0] ?? "";
|
|
2227
|
+
if (host) {
|
|
2228
|
+
addPhrase(host.replace(/[-_]+/g, " "));
|
|
2229
|
+
}
|
|
2230
|
+
}
|
|
2231
|
+
const companyUrl = contact.linkedinCompanyUrl?.trim();
|
|
2232
|
+
if (companyUrl) {
|
|
2233
|
+
const cacheKey = companyUrl.replace(/\/$/, "");
|
|
2234
|
+
let cachedHints = linkedInCompanyHintCache.get(cacheKey);
|
|
2235
|
+
if (!cachedHints) {
|
|
2236
|
+
const signals = await fetchLinkedInCompanyPageSignals(companyUrl, timeoutMs);
|
|
2237
|
+
cachedHints = signals ? [...extractKeywordPhrases(signals.title), ...extractKeywordPhrases(signals.description)] : [];
|
|
2238
|
+
linkedInCompanyHintCache.set(cacheKey, cachedHints);
|
|
2239
|
+
}
|
|
2240
|
+
for (const hint of cachedHints) {
|
|
2241
|
+
addPhrase(hint);
|
|
2242
|
+
}
|
|
2243
|
+
}
|
|
2244
|
+
return {
|
|
2245
|
+
phrases: Array.from(phrases)
|
|
2246
|
+
.map((value) => normalizeLookupWhitespace(value))
|
|
2247
|
+
.filter((value) => value.length > 0),
|
|
2248
|
+
keywords: Array.from(keywords)
|
|
2249
|
+
.map((value) => normalizeLookupWhitespace(value))
|
|
2250
|
+
.filter((value) => value.length > 0)
|
|
2251
|
+
};
|
|
2252
|
+
}
|
|
2253
|
+
async function buildSerperLinkedInProfileQueries(contact, timeoutMs) {
|
|
2254
|
+
const fullName = normalizeLookupWhitespace(`${contact.firstName} ${contact.lastName}`);
|
|
2255
|
+
const title = normalizeLookupWhitespace(contact.jobTitle);
|
|
2256
|
+
const queryEntries = [];
|
|
2257
|
+
const seenQueries = new Set();
|
|
2258
|
+
const pushQuery = (query, score) => {
|
|
2259
|
+
const normalized = normalizeLookupWhitespace(query);
|
|
2260
|
+
if (!normalized) {
|
|
2261
|
+
return;
|
|
2262
|
+
}
|
|
2263
|
+
const key = normalized.toLowerCase();
|
|
2264
|
+
if (seenQueries.has(key)) {
|
|
2265
|
+
return;
|
|
2266
|
+
}
|
|
2267
|
+
seenQueries.add(key);
|
|
2268
|
+
queryEntries.push({ query: normalized, score });
|
|
2269
|
+
};
|
|
2270
|
+
const { phrases, keywords } = await buildLinkedInProfileCompanyHints(contact, timeoutMs);
|
|
2271
|
+
const enrichedPhrases = new Set(phrases);
|
|
2272
|
+
const enrichedKeywords = new Set(keywords);
|
|
2273
|
+
const preferredPhrases = [];
|
|
2274
|
+
const normalizedEmail = normalizeLookupWhitespace(contact.email);
|
|
2275
|
+
const trustedEmailDomain = normalizedEmail && !isSyntheticLinkedInLookupEmail(normalizedEmail)
|
|
2276
|
+
? normalizedEmail.split("@")[1]?.replace(/^www\./i, "") ?? ""
|
|
2277
|
+
: "";
|
|
2278
|
+
const emailHost = trustedEmailDomain.split(".")[0] ?? "";
|
|
2279
|
+
const emailDomain = trustedEmailDomain;
|
|
2280
|
+
const linkedInHandle = normalizeLinkedInCompanyHandle(contact.linkedinCompanyUrl ?? "")?.replace(/[-_]+/g, " ") ?? "";
|
|
2281
|
+
if (contact.linkedinCompanyUrl?.trim()) {
|
|
2282
|
+
const companySignals = await fetchLinkedInCompanyPageSignals(contact.linkedinCompanyUrl.trim(), timeoutMs);
|
|
2283
|
+
for (const phrase of [
|
|
2284
|
+
...extractKeywordPhrases(companySignals?.title),
|
|
2285
|
+
...extractKeywordPhrases(companySignals?.description)
|
|
2286
|
+
]) {
|
|
2287
|
+
enrichedPhrases.add(phrase);
|
|
2288
|
+
preferredPhrases.push(phrase);
|
|
2289
|
+
const looseTokens = normalizeLooseMatchText(phrase)
|
|
2290
|
+
.split(/\s+/)
|
|
2291
|
+
.filter((token) => token.length >= 4)
|
|
2292
|
+
.filter((token) => ![
|
|
2293
|
+
"group",
|
|
2294
|
+
"holding",
|
|
2295
|
+
"services",
|
|
2296
|
+
"service",
|
|
2297
|
+
"consulting",
|
|
2298
|
+
"gmbh",
|
|
2299
|
+
"publishing",
|
|
2300
|
+
"company",
|
|
2301
|
+
"linkedin",
|
|
2302
|
+
"deutschland"
|
|
2303
|
+
].includes(token));
|
|
2304
|
+
for (const token of looseTokens.slice(0, 4)) {
|
|
2305
|
+
enrichedKeywords.add(token);
|
|
2306
|
+
}
|
|
2307
|
+
if (looseTokens.length > 1) {
|
|
2308
|
+
enrichedKeywords.add(looseTokens.slice(0, 2).join(" "));
|
|
2309
|
+
}
|
|
2310
|
+
}
|
|
2311
|
+
}
|
|
2312
|
+
const phrasePriority = (value) => {
|
|
2313
|
+
const loose = normalizeLooseMatchText(value);
|
|
2314
|
+
const tokenCount = loose.split(/\s+/).filter(Boolean).length;
|
|
2315
|
+
let score = 0;
|
|
2316
|
+
if (emailHost && loose.includes(normalizeLooseMatchText(emailHost)))
|
|
2317
|
+
score += 80;
|
|
2318
|
+
if (linkedInHandle && loose.includes(normalizeLooseMatchText(linkedInHandle)))
|
|
2319
|
+
score += 60;
|
|
2320
|
+
if (tokenCount >= 1 && tokenCount <= 4)
|
|
2321
|
+
score += 40;
|
|
2322
|
+
if (!/\b(gmbh|holding|services|service|consulting|kg|co)\b/i.test(value))
|
|
2323
|
+
score += 20;
|
|
2324
|
+
if (tokenCount > 7)
|
|
2325
|
+
score -= 40;
|
|
2326
|
+
return score;
|
|
2327
|
+
};
|
|
2328
|
+
const keywordPriority = (value) => {
|
|
2329
|
+
const loose = normalizeLooseMatchText(value);
|
|
2330
|
+
let score = 0;
|
|
2331
|
+
if (emailHost && loose.includes(normalizeLooseMatchText(emailHost)))
|
|
2332
|
+
score += 80;
|
|
2333
|
+
if (linkedInHandle && loose.includes(normalizeLooseMatchText(linkedInHandle)))
|
|
2334
|
+
score += 60;
|
|
2335
|
+
if (value.includes("."))
|
|
2336
|
+
score += 20;
|
|
2337
|
+
if (loose.split(/\s+/).filter(Boolean).length <= 2)
|
|
2338
|
+
score += 10;
|
|
2339
|
+
return score;
|
|
2340
|
+
};
|
|
2341
|
+
const rankedPhrases = [...enrichedPhrases].sort((left, right) => {
|
|
2342
|
+
const preferredDelta = Number(preferredPhrases.includes(right)) - Number(preferredPhrases.includes(left));
|
|
2343
|
+
if (preferredDelta !== 0) {
|
|
2344
|
+
return preferredDelta;
|
|
2345
|
+
}
|
|
2346
|
+
return phrasePriority(right) - phrasePriority(left);
|
|
2347
|
+
});
|
|
2348
|
+
const cleanPhrases = rankedPhrases.slice(0, 6);
|
|
2349
|
+
const fallbackKeywords = new Set(enrichedKeywords);
|
|
2350
|
+
for (const phrase of cleanPhrases) {
|
|
2351
|
+
const looseTokens = normalizeLooseMatchText(phrase)
|
|
2352
|
+
.split(/\s+/)
|
|
2353
|
+
.filter((token) => token.length >= 4)
|
|
2354
|
+
.filter((token) => ![
|
|
2355
|
+
"group",
|
|
2356
|
+
"holding",
|
|
2357
|
+
"services",
|
|
2358
|
+
"service",
|
|
2359
|
+
"consulting",
|
|
2360
|
+
"gmbh",
|
|
2361
|
+
"publishing",
|
|
2362
|
+
"company",
|
|
2363
|
+
"linkedin",
|
|
2364
|
+
"deutschland"
|
|
2365
|
+
].includes(token));
|
|
2366
|
+
for (const token of looseTokens.slice(0, 3)) {
|
|
2367
|
+
fallbackKeywords.add(token);
|
|
2368
|
+
}
|
|
2369
|
+
if (looseTokens.length > 1) {
|
|
2370
|
+
fallbackKeywords.add(looseTokens.slice(0, 2).join(" "));
|
|
2371
|
+
}
|
|
2372
|
+
}
|
|
2373
|
+
if (emailHost) {
|
|
2374
|
+
fallbackKeywords.add(emailHost);
|
|
2375
|
+
}
|
|
2376
|
+
if (emailDomain) {
|
|
2377
|
+
fallbackKeywords.add(emailDomain);
|
|
2378
|
+
}
|
|
2379
|
+
if (linkedInHandle) {
|
|
2380
|
+
fallbackKeywords.add(linkedInHandle);
|
|
2381
|
+
}
|
|
2382
|
+
const cleanKeywords = [...fallbackKeywords]
|
|
2383
|
+
.sort((left, right) => keywordPriority(right) - keywordPriority(left))
|
|
2384
|
+
.slice(0, 5);
|
|
2385
|
+
cleanKeywords.forEach((keyword, index) => {
|
|
2386
|
+
const keywordScore = 260 - index * 15;
|
|
2387
|
+
pushQuery(`site:linkedin.com/in "${fullName}" ${keyword} linkedin`, keywordScore);
|
|
2388
|
+
pushQuery(`site:linkedin.com/in ${fullName} ${keyword} linkedin`, keywordScore - 5);
|
|
2389
|
+
if (title) {
|
|
2390
|
+
pushQuery(`site:linkedin.com/in "${fullName}" ${keyword} "${title}"`, keywordScore - 10);
|
|
2391
|
+
}
|
|
2392
|
+
});
|
|
2393
|
+
cleanPhrases.forEach((companyName, index) => {
|
|
2394
|
+
const phraseScore = 180 - index * 10;
|
|
2395
|
+
pushQuery(`site:linkedin.com/in "${fullName}" "${companyName}"`, phraseScore);
|
|
2396
|
+
pushQuery(`site:linkedin.com/in ${fullName} ${companyName} linkedin`, phraseScore - 5);
|
|
2397
|
+
if (title) {
|
|
2398
|
+
pushQuery(`site:linkedin.com/in "${fullName}" "${companyName}" "${title}"`, phraseScore - 10);
|
|
2399
|
+
pushQuery(`site:linkedin.com/in ${fullName} ${companyName} ${title} linkedin`, phraseScore - 15);
|
|
2400
|
+
}
|
|
2401
|
+
});
|
|
2402
|
+
if (emailDomain) {
|
|
2403
|
+
pushQuery(`site:linkedin.com/in "${fullName}" "${emailDomain}" linkedin`, 240);
|
|
2404
|
+
}
|
|
2405
|
+
pushQuery(`site:linkedin.com/in "${fullName}" linkedin`, 50);
|
|
2406
|
+
if (title) {
|
|
2407
|
+
pushQuery(`site:linkedin.com/in "${fullName}" "${title}" linkedin`, 40);
|
|
2408
|
+
}
|
|
2409
|
+
return queryEntries
|
|
2410
|
+
.sort((left, right) => right.score - left.score)
|
|
2411
|
+
.map((entry) => entry.query);
|
|
2412
|
+
}
|
|
2413
|
+
function extractPublicLinkedInProfileSearchCandidates(bodyText) {
|
|
2414
|
+
const candidates = new Set();
|
|
2415
|
+
const directMatches = bodyText.match(/https:\/\/(?:(?:www|[a-z]{2})\.)?linkedin\.com\/in\/[^"'&<>\s)]+/gi) ?? [];
|
|
2416
|
+
for (const match of directMatches) {
|
|
2417
|
+
const normalized = normalizePublicLinkedInProfileUrl(match);
|
|
2418
|
+
if (normalized) {
|
|
2419
|
+
candidates.add(normalized);
|
|
2420
|
+
}
|
|
2421
|
+
}
|
|
2422
|
+
const encodedMatches = bodyText.match(/https?%3A%2F%2F(?:(?:www|[a-z]{2})\.)?linkedin\.com%2Fin%2F[^"'&<>\s)]+/gi) ?? [];
|
|
2423
|
+
for (const match of encodedMatches) {
|
|
2424
|
+
try {
|
|
2425
|
+
const decoded = decodeURIComponent(match);
|
|
2426
|
+
const normalized = normalizePublicLinkedInProfileUrl(decoded);
|
|
2427
|
+
if (normalized) {
|
|
2428
|
+
candidates.add(normalized);
|
|
2429
|
+
}
|
|
2430
|
+
}
|
|
2431
|
+
catch {
|
|
2432
|
+
// Ignore malformed encoded fragments.
|
|
2433
|
+
}
|
|
2434
|
+
}
|
|
2435
|
+
return Array.from(candidates);
|
|
2436
|
+
}
|
|
2437
|
+
function buildPublicLinkedInProfileSearchUrl(query) {
|
|
2438
|
+
const baseUrl = process.env.SALESPROMPTER_LINKEDIN_PROFILE_SEARCH_BASE_URL?.trim() ||
|
|
2439
|
+
"https://duckduckgo.com/html/";
|
|
2440
|
+
const url = new URL(baseUrl);
|
|
2441
|
+
url.searchParams.set("q", query);
|
|
2442
|
+
return url.toString();
|
|
2443
|
+
}
|
|
2444
|
+
async function fetchSerperSearchResults(query, num, timeoutMs) {
|
|
2445
|
+
if (serperCreditsExhausted) {
|
|
2446
|
+
return null;
|
|
2447
|
+
}
|
|
2448
|
+
const apiKey = getSerperApiKey();
|
|
2449
|
+
if (!apiKey) {
|
|
2450
|
+
return null;
|
|
2451
|
+
}
|
|
2452
|
+
const cacheKey = `${query}::${num}`;
|
|
2453
|
+
if (serperSearchCache.has(cacheKey)) {
|
|
2454
|
+
return serperSearchCache.get(cacheKey) ?? null;
|
|
2455
|
+
}
|
|
2456
|
+
const controller = new AbortController();
|
|
2457
|
+
const timeout = setTimeout(() => controller.abort(), Math.min(timeoutMs, 12_000));
|
|
2458
|
+
try {
|
|
2459
|
+
const response = await fetch(getSerperSearchEndpoint(), {
|
|
2460
|
+
method: "POST",
|
|
2461
|
+
signal: controller.signal,
|
|
2462
|
+
headers: {
|
|
2463
|
+
"X-API-KEY": apiKey,
|
|
2464
|
+
"Content-Type": "application/json"
|
|
2465
|
+
},
|
|
2466
|
+
body: JSON.stringify({ q: query, num })
|
|
2467
|
+
});
|
|
2468
|
+
if (!response.ok) {
|
|
2469
|
+
const bodyText = await response.text().catch(() => "");
|
|
2470
|
+
if (response.status === 400 &&
|
|
2471
|
+
/not enough credits/i.test(bodyText)) {
|
|
2472
|
+
serperCreditsExhausted = true;
|
|
2473
|
+
}
|
|
2474
|
+
serperSearchCache.set(cacheKey, null);
|
|
2475
|
+
return null;
|
|
2476
|
+
}
|
|
2477
|
+
const parsed = await response.json();
|
|
2478
|
+
serperSearchCache.set(cacheKey, parsed);
|
|
2479
|
+
return parsed;
|
|
2480
|
+
}
|
|
2481
|
+
catch {
|
|
2482
|
+
return null;
|
|
2483
|
+
}
|
|
2484
|
+
finally {
|
|
2485
|
+
clearTimeout(timeout);
|
|
2486
|
+
}
|
|
2487
|
+
}
|
|
2488
|
+
function extractSerperLinkedInProfileCandidates(payload) {
|
|
2489
|
+
if (!payload || typeof payload !== "object") {
|
|
2490
|
+
return [];
|
|
2491
|
+
}
|
|
2492
|
+
const organic = "organic" in payload && Array.isArray(payload.organic)
|
|
2493
|
+
? (payload.organic ?? [])
|
|
2494
|
+
: [];
|
|
2495
|
+
const seen = new Set();
|
|
2496
|
+
const candidates = [];
|
|
2497
|
+
for (const result of organic) {
|
|
2498
|
+
if (!result || typeof result !== "object")
|
|
2499
|
+
continue;
|
|
2500
|
+
const link = "link" in result && typeof result.link === "string"
|
|
2501
|
+
? result.link
|
|
2502
|
+
: "";
|
|
2503
|
+
const normalized = normalizePublicLinkedInProfileUrl(link);
|
|
2504
|
+
if (normalized) {
|
|
2505
|
+
const canonical = normalized.replace(/\/$/, "");
|
|
2506
|
+
if (!seen.has(canonical)) {
|
|
2507
|
+
seen.add(canonical);
|
|
2508
|
+
candidates.push({
|
|
2509
|
+
url: canonical,
|
|
2510
|
+
title: "title" in result && typeof result.title === "string"
|
|
2511
|
+
? normalizeLookupWhitespace(result.title)
|
|
2512
|
+
: "",
|
|
2513
|
+
snippet: "snippet" in result && typeof result.snippet === "string"
|
|
2514
|
+
? normalizeLookupWhitespace(result.snippet)
|
|
2515
|
+
: ""
|
|
2516
|
+
});
|
|
2517
|
+
}
|
|
2518
|
+
}
|
|
2519
|
+
}
|
|
2520
|
+
return candidates;
|
|
2521
|
+
}
|
|
2522
|
+
async function fetchLinkedInProfilePageSignals(url, timeoutMs) {
|
|
2523
|
+
const cacheKey = normalizePublicLinkedInProfileUrl(url)?.replace(/\/$/, "") ?? url.replace(/\/$/, "");
|
|
2524
|
+
if (linkedInProfilePageSignalCache.has(cacheKey)) {
|
|
2525
|
+
return linkedInProfilePageSignalCache.get(cacheKey) ?? null;
|
|
2526
|
+
}
|
|
2527
|
+
const controller = new AbortController();
|
|
2528
|
+
const timeout = setTimeout(() => controller.abort(), Math.min(timeoutMs, 12_000));
|
|
2529
|
+
try {
|
|
2530
|
+
const targetUrl = rewriteLinkedInUrlForConfiguredBase(url);
|
|
2531
|
+
const response = await fetch(targetUrl, {
|
|
2532
|
+
method: "GET",
|
|
2533
|
+
signal: controller.signal,
|
|
2534
|
+
headers: {
|
|
2535
|
+
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"
|
|
2536
|
+
}
|
|
2537
|
+
});
|
|
2538
|
+
const html = await response.text();
|
|
2539
|
+
const finalUrl = normalizePublicLinkedInProfileUrl(url) ||
|
|
2540
|
+
normalizePublicLinkedInProfileUrl(response.url || url);
|
|
2541
|
+
if (!finalUrl) {
|
|
2542
|
+
return null;
|
|
2543
|
+
}
|
|
2544
|
+
const title = decodeHtmlEntities(html.match(/<title[^>]*>([^<]+)/i)?.[1] || "") ||
|
|
2545
|
+
decodeHtmlEntities(html.match(/<meta[^>]+property="og:title"[^>]+content="([^"]+)/i)?.[1] || "");
|
|
2546
|
+
const description = decodeHtmlEntities(html.match(/<meta[^>]+name="description"[^>]+content="([^"]+)/i)?.[1] || "");
|
|
2547
|
+
const bodyText = decodeHtmlEntities(html.replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ").replace(/<[^>]+>/g, " "));
|
|
2548
|
+
const unavailableText = normalizeLooseMatchText(`${title} ${description} ${bodyText}`);
|
|
2549
|
+
const unavailable = response.status >= 400 ||
|
|
2550
|
+
unavailableText.includes("page not found") ||
|
|
2551
|
+
unavailableText.includes("profile not found") ||
|
|
2552
|
+
unavailableText.includes("member profile") && unavailableText.includes("not available");
|
|
2553
|
+
const result = {
|
|
2554
|
+
normalizedUrl: finalUrl.replace(/\/$/, ""),
|
|
2555
|
+
title: normalizeLookupWhitespace(title),
|
|
2556
|
+
description: normalizeLookupWhitespace(description),
|
|
2557
|
+
bodyText: normalizeLookupWhitespace(bodyText),
|
|
2558
|
+
unavailable
|
|
2559
|
+
};
|
|
2560
|
+
linkedInProfilePageSignalCache.set(cacheKey, result);
|
|
2561
|
+
return result;
|
|
2562
|
+
}
|
|
2563
|
+
catch {
|
|
2564
|
+
linkedInProfilePageSignalCache.set(cacheKey, null);
|
|
2565
|
+
return null;
|
|
2566
|
+
}
|
|
2567
|
+
finally {
|
|
2568
|
+
clearTimeout(timeout);
|
|
2569
|
+
}
|
|
2570
|
+
}
|
|
2571
|
+
function scoreLinkedInProfilePageSignals(contact, signals) {
|
|
2572
|
+
const fullName = normalizeLooseMatchText(`${contact.firstName} ${contact.lastName}`);
|
|
2573
|
+
const companyHints = [
|
|
2574
|
+
normalizeLooseMatchText(contact.companyNameOriginal ?? contact.companyName),
|
|
2575
|
+
normalizeLooseMatchText(aggressivelyCleanLookupCompanyName(contact.companyNameOriginal ?? contact.companyName))
|
|
2576
|
+
].filter(Boolean);
|
|
2577
|
+
const titleHint = normalizeLooseMatchText(contact.jobTitle);
|
|
2578
|
+
const haystack = normalizeLooseMatchText(`${signals.title} ${signals.description} ${signals.bodyText}`);
|
|
2579
|
+
let score = 0;
|
|
2580
|
+
if (fullName && haystack.includes(fullName))
|
|
2581
|
+
score += 120;
|
|
2582
|
+
for (const hint of companyHints) {
|
|
2583
|
+
if (hint && haystack.includes(hint))
|
|
2584
|
+
score += 30;
|
|
2585
|
+
}
|
|
2586
|
+
if (titleHint) {
|
|
2587
|
+
const titleWords = titleHint.split(/\s+/).filter((token) => token.length >= 4).slice(0, 4);
|
|
2588
|
+
score += titleWords.filter((token) => haystack.includes(token)).length * 8;
|
|
2589
|
+
}
|
|
2590
|
+
const slug = signals.normalizedUrl.split("/in/")[1]?.replace(/\/$/, "") ?? "";
|
|
2591
|
+
const slugText = normalizeLooseMatchText(slug.replace(/[-_]+/g, " "));
|
|
2592
|
+
if (fullName && slugText.includes(contact.firstName.toLowerCase()) && slugText.includes(contact.lastName.toLowerCase())) {
|
|
2593
|
+
score += 40;
|
|
2594
|
+
}
|
|
2595
|
+
return score;
|
|
2596
|
+
}
|
|
2597
|
+
function analyzeSerperLinkedInProfileCandidate(contact, candidate) {
|
|
2598
|
+
const fullName = normalizeLooseMatchText(`${contact.firstName} ${contact.lastName}`);
|
|
2599
|
+
const titleHint = normalizeLooseMatchText(contact.jobTitle);
|
|
2600
|
+
const companyTokens = [
|
|
2601
|
+
normalizeLooseMatchText(contact.companyNameOriginal ?? contact.companyName),
|
|
2602
|
+
normalizeLooseMatchText(aggressivelyCleanLookupCompanyName(contact.companyNameOriginal ?? contact.companyName)),
|
|
2603
|
+
normalizeLooseMatchText(normalizeLinkedInCompanyHandle(contact.linkedinCompanyUrl ?? "")?.replace(/[-_]+/g, " ") ?? ""),
|
|
2604
|
+
normalizeLooseMatchText((() => {
|
|
2605
|
+
const normalizedEmail = normalizeLookupWhitespace(contact.email);
|
|
2606
|
+
if (!normalizedEmail || isSyntheticLinkedInLookupEmail(normalizedEmail)) {
|
|
2607
|
+
return "";
|
|
2608
|
+
}
|
|
2609
|
+
return normalizedEmail.split("@")[1]?.replace(/^www\./i, "").split(".")[0] ?? "";
|
|
2610
|
+
})())
|
|
2611
|
+
].filter(Boolean);
|
|
2612
|
+
const haystack = normalizeLooseMatchText(`${candidate.title} ${candidate.snippet}`);
|
|
2613
|
+
let score = 0;
|
|
2614
|
+
let companyMatches = 0;
|
|
2615
|
+
let titleMatches = 0;
|
|
2616
|
+
if (fullName && haystack.includes(fullName))
|
|
2617
|
+
score += 120;
|
|
2618
|
+
for (const token of companyTokens) {
|
|
2619
|
+
if (!token)
|
|
2620
|
+
continue;
|
|
2621
|
+
if (haystack.includes(token)) {
|
|
2622
|
+
companyMatches += 1;
|
|
2623
|
+
score += token.split(/\s+/).length <= 2 ? 30 : 20;
|
|
2624
|
+
}
|
|
2625
|
+
}
|
|
2626
|
+
if (titleHint) {
|
|
2627
|
+
const titleWords = titleHint.split(/\s+/).filter((token) => token.length >= 4).slice(0, 4);
|
|
2628
|
+
titleMatches = titleWords.filter((token) => haystack.includes(token)).length;
|
|
2629
|
+
score += titleMatches * 8;
|
|
2630
|
+
}
|
|
2631
|
+
const slugText = normalizeLooseMatchText(candidate.url.split("/in/")[1]?.replace(/\/$/, "").replace(/[-_]+/g, " ") ?? "");
|
|
2632
|
+
if (fullName &&
|
|
2633
|
+
slugText.includes(contact.firstName.toLowerCase()) &&
|
|
2634
|
+
slugText.includes(contact.lastName.toLowerCase()) &&
|
|
2635
|
+
(companyMatches > 0 || titleMatches > 0)) {
|
|
2636
|
+
score += 40;
|
|
2637
|
+
}
|
|
2638
|
+
return { score, companyMatches, titleMatches };
|
|
2639
|
+
}
|
|
2640
|
+
async function searchSerperLinkedInProfileUrl(contact, timeoutMs, options) {
|
|
2641
|
+
if (!contact.firstName || !contact.lastName) {
|
|
2642
|
+
return null;
|
|
2643
|
+
}
|
|
2644
|
+
const maxQueries = options?.maxQueries && Number.isFinite(options.maxQueries) && options.maxQueries > 0
|
|
2645
|
+
? Math.trunc(options.maxQueries)
|
|
2646
|
+
: Number.POSITIVE_INFINITY;
|
|
2647
|
+
for (const query of (await buildSerperLinkedInProfileQueries(contact, timeoutMs)).slice(0, maxQueries)) {
|
|
2648
|
+
try {
|
|
2649
|
+
const parsed = await fetchSerperSearchResults(query, 5, timeoutMs);
|
|
2650
|
+
if (!parsed) {
|
|
2651
|
+
continue;
|
|
2652
|
+
}
|
|
2653
|
+
const candidates = extractSerperLinkedInProfileCandidates(parsed);
|
|
2654
|
+
let bestUrl = null;
|
|
2655
|
+
let bestScore = 0;
|
|
2656
|
+
for (const candidate of candidates) {
|
|
2657
|
+
const serperAnalysis = analyzeSerperLinkedInProfileCandidate(contact, candidate);
|
|
2658
|
+
const serperScore = serperAnalysis.score;
|
|
2659
|
+
if (serperScore >= 150 && (serperAnalysis.companyMatches > 0 || serperAnalysis.titleMatches > 0)) {
|
|
2660
|
+
return candidate.url;
|
|
2661
|
+
}
|
|
2662
|
+
const signals = await fetchLinkedInProfilePageSignals(candidate.url, timeoutMs);
|
|
2663
|
+
if (!signals || signals.unavailable) {
|
|
2664
|
+
if (serperScore > bestScore) {
|
|
2665
|
+
bestScore = serperScore;
|
|
2666
|
+
bestUrl = candidate.url;
|
|
2667
|
+
}
|
|
2668
|
+
continue;
|
|
2669
|
+
}
|
|
2670
|
+
const score = Math.max(serperScore, scoreLinkedInProfilePageSignals(contact, signals));
|
|
2671
|
+
if (score > bestScore) {
|
|
2672
|
+
bestScore = score;
|
|
2673
|
+
bestUrl = signals.normalizedUrl;
|
|
2674
|
+
}
|
|
2675
|
+
}
|
|
2676
|
+
if (bestUrl && bestScore >= 130) {
|
|
2677
|
+
return bestUrl;
|
|
2678
|
+
}
|
|
2679
|
+
}
|
|
2680
|
+
catch {
|
|
2681
|
+
// Continue with the next query variant.
|
|
2682
|
+
}
|
|
2683
|
+
}
|
|
2684
|
+
return searchPublicLinkedInProfileUrl(contact, timeoutMs, {
|
|
2685
|
+
maxQueries: Math.min(Number.isFinite(maxQueries) ? maxQueries : 4, 4)
|
|
2686
|
+
});
|
|
2687
|
+
}
|
|
2688
|
+
function decodeHtmlEntities(value) {
|
|
2689
|
+
return value
|
|
2690
|
+
.replace(/&/gi, "&")
|
|
2691
|
+
.replace(/"/gi, '"')
|
|
2692
|
+
.replace(/'/gi, "'")
|
|
2693
|
+
.replace(/</gi, "<")
|
|
2694
|
+
.replace(/>/gi, ">");
|
|
2695
|
+
}
|
|
2696
|
+
async function fetchLinkedInCompanyPageSignals(url, timeoutMs) {
|
|
2697
|
+
const cacheKey = url.replace(/\/$/, "");
|
|
2698
|
+
if (linkedInCompanyPageSignalCache.has(cacheKey)) {
|
|
2699
|
+
return linkedInCompanyPageSignalCache.get(cacheKey) ?? null;
|
|
2700
|
+
}
|
|
2701
|
+
const controller = new AbortController();
|
|
2702
|
+
const timeout = setTimeout(() => controller.abort(), Math.min(timeoutMs, 12_000));
|
|
2703
|
+
try {
|
|
2704
|
+
const response = await fetch(url, {
|
|
2705
|
+
method: "GET",
|
|
2706
|
+
signal: controller.signal,
|
|
2707
|
+
headers: {
|
|
2708
|
+
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"
|
|
2709
|
+
}
|
|
2710
|
+
});
|
|
2711
|
+
const html = await response.text();
|
|
2712
|
+
const finalUrl = response.url || url;
|
|
2713
|
+
const title = decodeHtmlEntities(html.match(/<title[^>]*>([^<]+)/i)?.[1] || "") ||
|
|
2714
|
+
decodeHtmlEntities(html.match(/<meta[^>]+property="og:title"[^>]+content="([^"]+)/i)?.[1] || "");
|
|
2715
|
+
const description = decodeHtmlEntities(html.match(/<meta[^>]+name="description"[^>]+content="([^"]+)/i)?.[1] || "");
|
|
2716
|
+
const bodyText = decodeHtmlEntities(html.replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ").replace(/<[^>]+>/g, " "));
|
|
2717
|
+
const unavailableText = normalizeLooseMatchText(`${title} ${description} ${bodyText}`);
|
|
2718
|
+
const unavailable = response.status >= 400 ||
|
|
2719
|
+
unavailableText.includes("page not found") ||
|
|
2720
|
+
unavailableText.includes("this page does not exist") ||
|
|
2721
|
+
unavailableText.includes("page isnt available");
|
|
2722
|
+
const result = {
|
|
2723
|
+
normalizedUrl: normalizeLinkedInCompanyHandle(finalUrl ?? "") || normalizeLinkedInCompanyHandle(url)
|
|
2724
|
+
? normalizeLinkedInCompanyPage(normalizeLinkedInCompanyHandle(finalUrl ?? "") ?? normalizeLinkedInCompanyHandle(url) ?? "")
|
|
2725
|
+
: finalUrl,
|
|
2726
|
+
title: normalizeLookupWhitespace(title),
|
|
2727
|
+
description: normalizeLookupWhitespace(description),
|
|
2728
|
+
bodyText: normalizeLookupWhitespace(bodyText),
|
|
2729
|
+
unavailable
|
|
2730
|
+
};
|
|
2731
|
+
linkedInCompanyPageSignalCache.set(cacheKey, result);
|
|
2732
|
+
return result;
|
|
2733
|
+
}
|
|
2734
|
+
catch {
|
|
2735
|
+
linkedInCompanyPageSignalCache.set(cacheKey, null);
|
|
2736
|
+
return null;
|
|
2737
|
+
}
|
|
2738
|
+
finally {
|
|
2739
|
+
clearTimeout(timeout);
|
|
2740
|
+
}
|
|
2741
|
+
}
|
|
2742
|
+
async function searchPublicLinkedInProfileUrl(contact, timeoutMs, options) {
|
|
2743
|
+
const maxQueries = options?.maxQueries && Number.isFinite(options.maxQueries) && options.maxQueries > 0
|
|
2744
|
+
? Math.trunc(options.maxQueries)
|
|
2745
|
+
: 4;
|
|
2746
|
+
const queries = (await buildSerperLinkedInProfileQueries(contact, timeoutMs)).slice(0, maxQueries);
|
|
2747
|
+
for (const query of queries) {
|
|
2748
|
+
const controller = new AbortController();
|
|
2749
|
+
const timeout = setTimeout(() => controller.abort(), Math.min(timeoutMs, 12_000));
|
|
2750
|
+
try {
|
|
2751
|
+
const response = await fetch(buildPublicLinkedInProfileSearchUrl(query), {
|
|
2752
|
+
method: "GET",
|
|
2753
|
+
signal: controller.signal,
|
|
2754
|
+
headers: {
|
|
2755
|
+
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"
|
|
2756
|
+
}
|
|
2757
|
+
});
|
|
2758
|
+
if (!response.ok) {
|
|
2759
|
+
continue;
|
|
2760
|
+
}
|
|
2761
|
+
const bodyText = await response.text();
|
|
2762
|
+
const candidates = extractPublicLinkedInProfileSearchCandidates(bodyText);
|
|
2763
|
+
let bestUrl = null;
|
|
2764
|
+
let bestScore = 0;
|
|
2765
|
+
for (const candidateUrl of candidates.slice(0, 5)) {
|
|
2766
|
+
const signals = await fetchLinkedInProfilePageSignals(candidateUrl, timeoutMs);
|
|
2767
|
+
if (!signals || signals.unavailable) {
|
|
2768
|
+
continue;
|
|
2769
|
+
}
|
|
2770
|
+
const score = scoreLinkedInProfilePageSignals(contact, signals);
|
|
2771
|
+
if (score > bestScore) {
|
|
2772
|
+
bestScore = score;
|
|
2773
|
+
bestUrl = signals.normalizedUrl;
|
|
2774
|
+
}
|
|
2775
|
+
}
|
|
2776
|
+
if (bestUrl && bestScore >= 130) {
|
|
2777
|
+
return bestUrl;
|
|
2778
|
+
}
|
|
2779
|
+
}
|
|
2780
|
+
catch {
|
|
2781
|
+
// Continue with the next query variant.
|
|
2782
|
+
}
|
|
2783
|
+
finally {
|
|
2784
|
+
clearTimeout(timeout);
|
|
2785
|
+
}
|
|
2786
|
+
}
|
|
2787
|
+
return null;
|
|
2788
|
+
}
|
|
2789
|
+
function scoreLinkedInCompanyPageSignals(companyName, signals) {
|
|
2790
|
+
const inputTokens = normalizeLooseMatchText(companyName).split(/\s+/).filter((token) => token.length >= 4);
|
|
2791
|
+
const haystack = normalizeLooseMatchText(`${signals.title} ${signals.description}`);
|
|
2792
|
+
let score = 0;
|
|
2793
|
+
for (const token of inputTokens) {
|
|
2794
|
+
if (haystack.includes(token)) {
|
|
2795
|
+
score += 12;
|
|
2796
|
+
}
|
|
2797
|
+
}
|
|
2798
|
+
if (signals.description && normalizeLooseMatchText(signals.description).includes(normalizeLooseMatchText(companyName))) {
|
|
2799
|
+
score += 50;
|
|
2800
|
+
}
|
|
2801
|
+
return score;
|
|
2802
|
+
}
|
|
2803
|
+
function scoreSerperLinkedInCompanyCandidate(companyName, candidate) {
|
|
2804
|
+
const inputTokens = normalizeLooseMatchText(companyName).split(/\s+/).filter((token) => token.length >= 4);
|
|
2805
|
+
const haystack = normalizeLooseMatchText(`${candidate.title} ${candidate.snippet}`);
|
|
2806
|
+
let score = scoreLinkedInCompanyUrlCandidate(companyName, candidate.url);
|
|
2807
|
+
for (const token of inputTokens) {
|
|
2808
|
+
if (haystack.includes(token)) {
|
|
2809
|
+
score += 12;
|
|
2810
|
+
}
|
|
2811
|
+
}
|
|
2812
|
+
if (haystack.includes(normalizeLooseMatchText(aggressivelyCleanLookupCompanyName(companyName)))) {
|
|
2813
|
+
score += 40;
|
|
2814
|
+
}
|
|
2815
|
+
return score;
|
|
2816
|
+
}
|
|
2817
|
+
function scoreLinkedInCompanyUrlCandidate(companyName, url) {
|
|
2818
|
+
const handle = normalizeLinkedInCompanyHandle(url);
|
|
2819
|
+
if (!handle || /^\d+$/.test(handle)) {
|
|
2820
|
+
return 0;
|
|
2821
|
+
}
|
|
2822
|
+
const normalizedCompanyWords = normalizeLookupCompanyForSearch(companyName)
|
|
2823
|
+
.split(/\s+/)
|
|
2824
|
+
.filter((part) => part.length >= 3);
|
|
2825
|
+
const normalizedCompany = normalizedCompanyWords.join("");
|
|
2826
|
+
const aggressiveCompany = aggressivelyCleanLookupCompanyName(companyName).replace(/\s+/g, "");
|
|
2827
|
+
const normalizedHandle = handle.toLowerCase().replace(/[-_]/g, "");
|
|
2828
|
+
const slugCompany = (slugify(companyName) || "").replace(/-/g, "");
|
|
2829
|
+
let score = 0;
|
|
2830
|
+
if (normalizedHandle === normalizedCompany || normalizedHandle === aggressiveCompany || normalizedHandle === slugCompany) {
|
|
2831
|
+
score += 100;
|
|
2832
|
+
}
|
|
2833
|
+
if (normalizedCompany &&
|
|
2834
|
+
(normalizedHandle.includes(normalizedCompany) || normalizedCompany.includes(normalizedHandle))) {
|
|
2835
|
+
score += 60;
|
|
2836
|
+
}
|
|
2837
|
+
if (aggressiveCompany &&
|
|
2838
|
+
(normalizedHandle.includes(aggressiveCompany) || aggressiveCompany.includes(normalizedHandle))) {
|
|
2839
|
+
score += 40;
|
|
2840
|
+
}
|
|
2841
|
+
if (normalizedCompanyWords.length > 0) {
|
|
2842
|
+
const primaryWord = normalizedCompanyWords[0] ?? "";
|
|
2843
|
+
if (primaryWord && normalizedHandle.includes(primaryWord)) {
|
|
2844
|
+
score += 35;
|
|
2845
|
+
}
|
|
2846
|
+
const overlap = normalizedCompanyWords.filter((word) => normalizedHandle.includes(word)).length;
|
|
2847
|
+
score += Math.min(30, overlap * 10);
|
|
2848
|
+
}
|
|
2849
|
+
return score;
|
|
2850
|
+
}
|
|
2851
|
+
async function searchPublicLinkedInCompanyUrl(companyName, timeoutMs) {
|
|
2852
|
+
const controller = new AbortController();
|
|
2853
|
+
const timeout = setTimeout(() => controller.abort(), Math.min(timeoutMs, 15_000));
|
|
2854
|
+
try {
|
|
2855
|
+
const response = await fetch(buildPublicLinkedInCompanySearchUrl(companyName), {
|
|
2856
|
+
method: "GET",
|
|
2857
|
+
signal: controller.signal,
|
|
2858
|
+
headers: {
|
|
2859
|
+
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"
|
|
2860
|
+
}
|
|
2861
|
+
});
|
|
2862
|
+
if (!response.ok) {
|
|
2863
|
+
return null;
|
|
2864
|
+
}
|
|
2865
|
+
const bodyText = await response.text();
|
|
2866
|
+
const candidates = extractLinkedInCompanySearchCandidates(bodyText);
|
|
2867
|
+
const ranked = candidates
|
|
2868
|
+
.map((url) => ({ url, score: scoreLinkedInCompanyUrlCandidate(companyName, url) }))
|
|
2869
|
+
.filter((candidate) => candidate.score > 0)
|
|
2870
|
+
.sort((left, right) => right.score - left.score);
|
|
2871
|
+
return ranked[0]?.url ?? candidates[0] ?? null;
|
|
2872
|
+
}
|
|
2873
|
+
catch (error) {
|
|
2874
|
+
if (error.name === "AbortError") {
|
|
2875
|
+
return null;
|
|
2876
|
+
}
|
|
2877
|
+
return null;
|
|
2878
|
+
}
|
|
2879
|
+
finally {
|
|
2880
|
+
clearTimeout(timeout);
|
|
2881
|
+
}
|
|
2882
|
+
}
|
|
2883
|
+
async function searchSerperLinkedInCompanyUrl(companyName, timeoutMs) {
|
|
2884
|
+
const apiKey = getSerperApiKey();
|
|
2885
|
+
if (!apiKey) {
|
|
2886
|
+
return null;
|
|
2887
|
+
}
|
|
2888
|
+
for (const query of buildSerperLinkedInCompanyQueries(companyName)) {
|
|
2889
|
+
const controller = new AbortController();
|
|
2890
|
+
const timeout = setTimeout(() => controller.abort(), Math.min(timeoutMs, 15_000));
|
|
2891
|
+
try {
|
|
2892
|
+
const response = await fetch(getSerperSearchEndpoint(), {
|
|
2893
|
+
method: "POST",
|
|
2894
|
+
signal: controller.signal,
|
|
2895
|
+
headers: {
|
|
2896
|
+
"Content-Type": "application/json",
|
|
2897
|
+
"X-API-KEY": apiKey
|
|
2898
|
+
},
|
|
2899
|
+
body: JSON.stringify({
|
|
2900
|
+
q: query,
|
|
2901
|
+
num: 5
|
|
2902
|
+
})
|
|
2903
|
+
});
|
|
2904
|
+
if (!response.ok) {
|
|
2905
|
+
continue;
|
|
2906
|
+
}
|
|
2907
|
+
const parsed = (await response.json());
|
|
2908
|
+
const candidates = extractSerperLinkedInCompanyCandidates(parsed);
|
|
2909
|
+
const ranked = candidates
|
|
2910
|
+
.map((candidate) => ({
|
|
2911
|
+
...candidate,
|
|
2912
|
+
score: scoreSerperLinkedInCompanyCandidate(companyName, candidate)
|
|
2913
|
+
}))
|
|
2914
|
+
.filter((candidate) => candidate.score > 0)
|
|
2915
|
+
.sort((left, right) => right.score - left.score);
|
|
2916
|
+
if (ranked[0] && ranked[0].score >= 80) {
|
|
2917
|
+
return ranked[0].url;
|
|
2918
|
+
}
|
|
2919
|
+
let anySignalsFetched = false;
|
|
2920
|
+
let bestValidated = null;
|
|
2921
|
+
for (const candidate of ranked.slice(0, 3)) {
|
|
2922
|
+
const signals = await fetchLinkedInCompanyPageSignals(candidate.url, timeoutMs);
|
|
2923
|
+
if (!signals || signals.unavailable) {
|
|
2924
|
+
continue;
|
|
2925
|
+
}
|
|
2926
|
+
anySignalsFetched = true;
|
|
2927
|
+
const validationScore = scoreLinkedInCompanyPageSignals(companyName, signals);
|
|
2928
|
+
if (validationScore >= 24) {
|
|
2929
|
+
const combinedScore = candidate.score + validationScore;
|
|
2930
|
+
if (!bestValidated || combinedScore > bestValidated.score) {
|
|
2931
|
+
bestValidated = {
|
|
2932
|
+
url: signals.normalizedUrl,
|
|
2933
|
+
score: combinedScore
|
|
2934
|
+
};
|
|
2935
|
+
}
|
|
2936
|
+
}
|
|
2937
|
+
}
|
|
2938
|
+
if (bestValidated) {
|
|
2939
|
+
return bestValidated.url;
|
|
2940
|
+
}
|
|
2941
|
+
if (!anySignalsFetched && ranked[0]?.url) {
|
|
2942
|
+
return ranked[0].url;
|
|
2943
|
+
}
|
|
2944
|
+
}
|
|
2945
|
+
catch (error) {
|
|
2946
|
+
if (error.name === "AbortError") {
|
|
2947
|
+
continue;
|
|
2948
|
+
}
|
|
2949
|
+
}
|
|
2950
|
+
finally {
|
|
2951
|
+
clearTimeout(timeout);
|
|
1242
2952
|
}
|
|
1243
|
-
seen.add(key);
|
|
1244
|
-
variants.push({
|
|
1245
|
-
contact_id: params.contactId,
|
|
1246
|
-
companyName
|
|
1247
|
-
});
|
|
1248
2953
|
}
|
|
1249
|
-
return
|
|
2954
|
+
return null;
|
|
1250
2955
|
}
|
|
1251
2956
|
async function invokeLinkedInUrlEnrichmentDirect(params) {
|
|
1252
2957
|
const config = await readLinkedInDirectLookupConfig();
|
|
2958
|
+
const companyContexts = await resolveDirectLinkedInCompanyContexts({
|
|
2959
|
+
contacts: params.contacts.filter((contact) => !contact.isVariation),
|
|
2960
|
+
timeoutMs: params.timeoutMs,
|
|
2961
|
+
config
|
|
2962
|
+
});
|
|
1253
2963
|
const groupedContacts = new Map();
|
|
1254
2964
|
for (const contact of params.contacts) {
|
|
1255
2965
|
const key = contact.email?.trim().toLowerCase() || `contact:${contact.contact_id}`;
|
|
@@ -1258,15 +2968,25 @@ async function invokeLinkedInUrlEnrichmentDirect(params) {
|
|
|
1258
2968
|
groupedContacts.set(key, existing);
|
|
1259
2969
|
}
|
|
1260
2970
|
const results = [];
|
|
1261
|
-
|
|
2971
|
+
const perAttemptTimeoutMs = params.perAttemptTimeoutMs && Number.isFinite(params.perAttemptTimeoutMs) && params.perAttemptTimeoutMs > 0
|
|
2972
|
+
? Math.trunc(params.perAttemptTimeoutMs)
|
|
2973
|
+
: Math.min(params.timeoutMs, 8_000);
|
|
2974
|
+
const perContactBudgetMs = params.perContactBudgetMs && Number.isFinite(params.perContactBudgetMs) && params.perContactBudgetMs > 0
|
|
2975
|
+
? Math.trunc(params.perContactBudgetMs)
|
|
2976
|
+
: Math.min(params.timeoutMs, 15_000);
|
|
2977
|
+
const rateLimitCooldownMs = Math.max(750, Math.min(3_000, Math.trunc(perAttemptTimeoutMs / 2)));
|
|
2978
|
+
const maxRateLimitCooldowns = 4;
|
|
2979
|
+
let rateLimitCooldownUntil = 0;
|
|
2980
|
+
let consecutiveRateLimitCount = 0;
|
|
2981
|
+
let totalRateLimitCooldowns = 0;
|
|
1262
2982
|
for (const variations of groupedContacts.values()) {
|
|
1263
2983
|
const primary = variations.find((contact) => !contact.isVariation) ?? variations[0];
|
|
1264
2984
|
const blankPerson = !primary?.firstName.trim() || !primary?.lastName.trim();
|
|
1265
|
-
if (
|
|
2985
|
+
if (totalRateLimitCooldowns >= maxRateLimitCooldowns) {
|
|
1266
2986
|
results.push({
|
|
1267
2987
|
contact_id: primary.contact_id,
|
|
1268
2988
|
linkedin_url: null,
|
|
1269
|
-
error: "LinkedIn rate limit"
|
|
2989
|
+
error: "LinkedIn rate limit budget exhausted"
|
|
1270
2990
|
});
|
|
1271
2991
|
continue;
|
|
1272
2992
|
}
|
|
@@ -1279,11 +2999,24 @@ async function invokeLinkedInUrlEnrichmentDirect(params) {
|
|
|
1279
2999
|
continue;
|
|
1280
3000
|
}
|
|
1281
3001
|
let matchedUrl = null;
|
|
3002
|
+
let matchedSalesNavUrl = null;
|
|
3003
|
+
let matchedFullName = null;
|
|
3004
|
+
let matchedCompanyName = null;
|
|
3005
|
+
let matchedTitle = null;
|
|
1282
3006
|
let lastError = null;
|
|
3007
|
+
const contactDeadline = Date.now() + perContactBudgetMs;
|
|
3008
|
+
const companyContext = companyContexts.get(buildDirectCompanyContextKey(primary));
|
|
1283
3009
|
for (const candidate of variations) {
|
|
1284
|
-
for (const searchVariant of buildLinkedInLookupSearchVariants(candidate)) {
|
|
3010
|
+
for (const searchVariant of await buildLinkedInLookupSearchVariants(candidate, params.timeoutMs, companyContext?.aliases ?? [])) {
|
|
3011
|
+
if (Date.now() < rateLimitCooldownUntil) {
|
|
3012
|
+
await new Promise((resolve) => setTimeout(resolve, rateLimitCooldownUntil - Date.now()));
|
|
3013
|
+
}
|
|
3014
|
+
if (Date.now() >= contactDeadline) {
|
|
3015
|
+
lastError = lastError || "Direct lookup budget exhausted";
|
|
3016
|
+
break;
|
|
3017
|
+
}
|
|
1285
3018
|
const controller = new AbortController();
|
|
1286
|
-
const timeout = setTimeout(controller.abort.bind(controller), Math.min(
|
|
3019
|
+
const timeout = setTimeout(controller.abort.bind(controller), Math.min(perAttemptTimeoutMs, Math.max(1_000, contactDeadline - Date.now())));
|
|
1287
3020
|
try {
|
|
1288
3021
|
const response = await fetch(buildLinkedInSalesApiUrl(searchVariant), {
|
|
1289
3022
|
method: "GET",
|
|
@@ -1304,19 +3037,52 @@ async function invokeLinkedInUrlEnrichmentDirect(params) {
|
|
|
1304
3037
|
}
|
|
1305
3038
|
});
|
|
1306
3039
|
if (response.status === 429) {
|
|
1307
|
-
rateLimited = true;
|
|
1308
3040
|
lastError = "LinkedIn rate limit";
|
|
3041
|
+
consecutiveRateLimitCount += 1;
|
|
3042
|
+
totalRateLimitCooldowns += 1;
|
|
3043
|
+
rateLimitCooldownUntil =
|
|
3044
|
+
Date.now() + Math.min(15_000, rateLimitCooldownMs * Math.max(1, consecutiveRateLimitCount));
|
|
3045
|
+
if (totalRateLimitCooldowns >= maxRateLimitCooldowns) {
|
|
3046
|
+
break;
|
|
3047
|
+
}
|
|
1309
3048
|
break;
|
|
1310
3049
|
}
|
|
1311
3050
|
if (!response.ok) {
|
|
1312
3051
|
lastError = `LinkedIn returned ${response.status}`;
|
|
1313
3052
|
continue;
|
|
1314
3053
|
}
|
|
3054
|
+
consecutiveRateLimitCount = 0;
|
|
3055
|
+
rateLimitCooldownUntil = 0;
|
|
1315
3056
|
const data = (await response.json());
|
|
1316
3057
|
const profilesFound = data.paging?.total ?? 0;
|
|
1317
3058
|
if (profilesFound > 0) {
|
|
1318
|
-
|
|
1319
|
-
|
|
3059
|
+
const bestCandidate = (data.elements ?? [])
|
|
3060
|
+
.map((element) => ({
|
|
3061
|
+
element,
|
|
3062
|
+
...scoreLinkedInSalesApiElementMatch(candidate, element)
|
|
3063
|
+
}))
|
|
3064
|
+
.sort((left, right) => right.score - left.score)[0];
|
|
3065
|
+
const hasTrustedCompanyContext = Boolean(candidate.linkedinCompanyUrl ||
|
|
3066
|
+
companyContext?.linkedinCompanyUrl ||
|
|
3067
|
+
companyContext?.matchedCompanyName);
|
|
3068
|
+
const hasTrustedEmailContext = Boolean(candidate.email && !isSyntheticLinkedInLookupEmail(candidate.email));
|
|
3069
|
+
const acceptBestCandidate = Boolean(bestCandidate &&
|
|
3070
|
+
(bestCandidate.score >= 140 ||
|
|
3071
|
+
(bestCandidate.exactNameMatch &&
|
|
3072
|
+
(bestCandidate.companyMatchCount > 0 || hasTrustedCompanyContext || hasTrustedEmailContext))));
|
|
3073
|
+
if (bestCandidate && acceptBestCandidate) {
|
|
3074
|
+
matchedUrl = extractLinkedInProfileUrlFromSalesApiElement(bestCandidate.element) ?? null;
|
|
3075
|
+
matchedSalesNavUrl = extractLinkedInSalesNavLeadUrlFromSalesApiElement(bestCandidate.element) ?? null;
|
|
3076
|
+
matchedFullName = bestCandidate.fullName;
|
|
3077
|
+
matchedCompanyName = bestCandidate.companyName;
|
|
3078
|
+
matchedTitle = bestCandidate.title;
|
|
3079
|
+
}
|
|
3080
|
+
else {
|
|
3081
|
+
lastError = bestCandidate
|
|
3082
|
+
? `LinkedIn top result score too low (${bestCandidate.score})`
|
|
3083
|
+
: "LinkedIn returned no usable results";
|
|
3084
|
+
}
|
|
3085
|
+
if (matchedUrl || matchedSalesNavUrl) {
|
|
1320
3086
|
break;
|
|
1321
3087
|
}
|
|
1322
3088
|
}
|
|
@@ -1327,27 +3093,36 @@ async function invokeLinkedInUrlEnrichmentDirect(params) {
|
|
|
1327
3093
|
finally {
|
|
1328
3094
|
clearTimeout(timeout);
|
|
1329
3095
|
}
|
|
1330
|
-
if (matchedUrl ||
|
|
3096
|
+
if (matchedUrl || matchedSalesNavUrl || totalRateLimitCooldowns >= maxRateLimitCooldowns) {
|
|
1331
3097
|
break;
|
|
1332
3098
|
}
|
|
1333
3099
|
}
|
|
1334
|
-
if (matchedUrl ||
|
|
3100
|
+
if (matchedUrl || matchedSalesNavUrl || totalRateLimitCooldowns >= maxRateLimitCooldowns) {
|
|
3101
|
+
break;
|
|
3102
|
+
}
|
|
3103
|
+
if (Date.now() >= contactDeadline) {
|
|
1335
3104
|
break;
|
|
1336
3105
|
}
|
|
1337
3106
|
}
|
|
1338
3107
|
results.push({
|
|
1339
3108
|
contact_id: primary.contact_id,
|
|
1340
|
-
linkedin_url: matchedUrl,
|
|
1341
|
-
|
|
3109
|
+
linkedin_url: matchedUrl ?? matchedSalesNavUrl,
|
|
3110
|
+
sales_nav_profile_url: matchedSalesNavUrl,
|
|
3111
|
+
matched_full_name: matchedFullName,
|
|
3112
|
+
matched_company_name: matchedCompanyName,
|
|
3113
|
+
matched_title: matchedTitle,
|
|
3114
|
+
error: matchedUrl || matchedSalesNavUrl ? null : lastError
|
|
1342
3115
|
});
|
|
1343
3116
|
}
|
|
1344
3117
|
return {
|
|
1345
3118
|
success: true,
|
|
1346
|
-
contacts: results
|
|
3119
|
+
contacts: results,
|
|
3120
|
+
companyContexts: Array.from(companyContexts.values())
|
|
1347
3121
|
};
|
|
1348
3122
|
}
|
|
1349
3123
|
async function invokeLinkedInCompanyEnrichmentDirect(params) {
|
|
1350
3124
|
const config = await readLinkedInDirectLookupConfig();
|
|
3125
|
+
const precomputedContextByKey = new Map((params.precomputedContexts ?? []).map((context) => [context.normalizedCompanyKey, context]));
|
|
1351
3126
|
const primaryContacts = new Map();
|
|
1352
3127
|
for (const contact of params.contacts) {
|
|
1353
3128
|
const existing = primaryContacts.get(contact.contact_id);
|
|
@@ -1371,10 +3146,23 @@ async function invokeLinkedInCompanyEnrichmentDirect(params) {
|
|
|
1371
3146
|
companyName: contact.companyName,
|
|
1372
3147
|
companyNameOriginal: contact.companyNameOriginal
|
|
1373
3148
|
});
|
|
1374
|
-
|
|
1375
|
-
let
|
|
1376
|
-
let
|
|
3149
|
+
const precomputedContext = precomputedContextByKey.get(buildDirectCompanyContextKey(contact));
|
|
3150
|
+
let matchedCompanyUrl = precomputedContext?.linkedinCompanyUrl ?? null;
|
|
3151
|
+
let matchedSalesNavCompanyUrl = precomputedContext?.salesNavCompanyUrl ?? null;
|
|
3152
|
+
let matchedCompanyName = precomputedContext?.matchedCompanyName ?? null;
|
|
3153
|
+
let matchedCompanyEmployeeCount = precomputedContext?.matchedCompanyEmployeeCount ?? null;
|
|
1377
3154
|
let lastError = null;
|
|
3155
|
+
if (matchedCompanyUrl || matchedSalesNavCompanyUrl || matchedCompanyName) {
|
|
3156
|
+
results.push({
|
|
3157
|
+
contact_id: contact.contact_id,
|
|
3158
|
+
linkedin_company_url: matchedCompanyUrl,
|
|
3159
|
+
sales_nav_company_url: matchedSalesNavCompanyUrl,
|
|
3160
|
+
matched_company_name: matchedCompanyName,
|
|
3161
|
+
matched_company_employee_count: matchedCompanyEmployeeCount,
|
|
3162
|
+
error: null
|
|
3163
|
+
});
|
|
3164
|
+
continue;
|
|
3165
|
+
}
|
|
1378
3166
|
for (const variant of variants) {
|
|
1379
3167
|
const controller = new AbortController();
|
|
1380
3168
|
const timeout = setTimeout(controller.abort.bind(controller), Math.min(params.timeoutMs, 20_000));
|
|
@@ -1410,8 +3198,10 @@ async function invokeLinkedInCompanyEnrichmentDirect(params) {
|
|
|
1410
3198
|
const data = (await response.json());
|
|
1411
3199
|
const first = data.elements?.[0];
|
|
1412
3200
|
const companyUrl = extractLinkedInCompanyUrlFromSalesApiElement(first);
|
|
1413
|
-
|
|
3201
|
+
const salesNavCompanyUrl = extractLinkedInSalesNavCompanyUrlFromSalesApiElement(first);
|
|
3202
|
+
if (companyUrl || salesNavCompanyUrl) {
|
|
1414
3203
|
matchedCompanyUrl = companyUrl;
|
|
3204
|
+
matchedSalesNavCompanyUrl = salesNavCompanyUrl;
|
|
1415
3205
|
matchedCompanyName = extractLinkedInCompanyNameFromSalesApiElement(first);
|
|
1416
3206
|
matchedCompanyEmployeeCount = extractLinkedInCompanyEmployeeCountFromSalesApiElement(first);
|
|
1417
3207
|
break;
|
|
@@ -1430,9 +3220,10 @@ async function invokeLinkedInCompanyEnrichmentDirect(params) {
|
|
|
1430
3220
|
results.push({
|
|
1431
3221
|
contact_id: contact.contact_id,
|
|
1432
3222
|
linkedin_company_url: matchedCompanyUrl,
|
|
3223
|
+
sales_nav_company_url: matchedSalesNavCompanyUrl,
|
|
1433
3224
|
matched_company_name: matchedCompanyName,
|
|
1434
3225
|
matched_company_employee_count: matchedCompanyEmployeeCount,
|
|
1435
|
-
error: matchedCompanyUrl ? null : lastError
|
|
3226
|
+
error: matchedCompanyUrl || matchedSalesNavCompanyUrl ? null : lastError
|
|
1436
3227
|
});
|
|
1437
3228
|
}
|
|
1438
3229
|
return {
|
|
@@ -1513,6 +3304,113 @@ async function invokeLinkedInUrlEnrichmentWorkflow(params) {
|
|
|
1513
3304
|
clearTimeout(timeout);
|
|
1514
3305
|
}
|
|
1515
3306
|
}
|
|
3307
|
+
function normalizeWorkflowLinkedInUrlResult(params) {
|
|
3308
|
+
const inputContactIds = new Set(params.contacts.map((contact) => contact.contact_id));
|
|
3309
|
+
const contactIdsBySyntheticEmail = new Map(params.contacts
|
|
3310
|
+
.filter((contact) => contact.email)
|
|
3311
|
+
.map((contact) => [String(contact.email).toLowerCase(), contact.contact_id]));
|
|
3312
|
+
const contactIdsByNormalizedIdentity = new Map(params.contacts
|
|
3313
|
+
.filter((contact) => !contact.isVariation)
|
|
3314
|
+
.map((contact) => {
|
|
3315
|
+
const fullName = normalizeLooseMatchText(normalizeLookupWhitespace(`${contact.firstName} ${contact.lastName}`));
|
|
3316
|
+
const companyName = normalizeLooseMatchText(contact.companyNameOriginal ?? contact.companyName);
|
|
3317
|
+
return [`${fullName}|${companyName}`, contact.contact_id];
|
|
3318
|
+
})
|
|
3319
|
+
.filter(([key]) => key !== "|"));
|
|
3320
|
+
const normalizedNameCounts = new Map();
|
|
3321
|
+
for (const contact of params.contacts) {
|
|
3322
|
+
if (contact.isVariation)
|
|
3323
|
+
continue;
|
|
3324
|
+
const fullName = normalizeLooseMatchText(normalizeLookupWhitespace(`${contact.firstName} ${contact.lastName}`));
|
|
3325
|
+
if (!fullName)
|
|
3326
|
+
continue;
|
|
3327
|
+
normalizedNameCounts.set(fullName, (normalizedNameCounts.get(fullName) ?? 0) + 1);
|
|
3328
|
+
}
|
|
3329
|
+
const contactIdsByNormalizedName = new Map(params.contacts
|
|
3330
|
+
.filter((contact) => !contact.isVariation)
|
|
3331
|
+
.map((contact) => {
|
|
3332
|
+
const fullName = normalizeLooseMatchText(normalizeLookupWhitespace(`${contact.firstName} ${contact.lastName}`));
|
|
3333
|
+
return [fullName, contact.contact_id];
|
|
3334
|
+
})
|
|
3335
|
+
.filter(([fullName]) => Boolean(fullName) && (normalizedNameCounts.get(fullName) ?? 0) === 1));
|
|
3336
|
+
const rowsByContactId = new Map();
|
|
3337
|
+
const body = params.parsedBody && typeof params.parsedBody === "object" && !Array.isArray(params.parsedBody)
|
|
3338
|
+
? params.parsedBody
|
|
3339
|
+
: null;
|
|
3340
|
+
const workflowRows = [
|
|
3341
|
+
...(Array.isArray(body?.contacts) ? body?.contacts : []),
|
|
3342
|
+
...(Array.isArray(body?.profiles) ? body?.profiles : [])
|
|
3343
|
+
];
|
|
3344
|
+
for (const contact of workflowRows) {
|
|
3345
|
+
const fullNameCandidate = normalizeLookupWhitespace(typeof contact.full_name === "string"
|
|
3346
|
+
? contact.full_name
|
|
3347
|
+
: typeof contact.fullName === "string"
|
|
3348
|
+
? contact.fullName
|
|
3349
|
+
: typeof contact.name === "string"
|
|
3350
|
+
? contact.name
|
|
3351
|
+
: [contact.first_name, contact.last_name]
|
|
3352
|
+
.filter((value) => typeof value === "string" && value.trim().length > 0)
|
|
3353
|
+
.join(" "));
|
|
3354
|
+
const companyNameCandidate = normalizeLookupWhitespace(typeof contact.company_name === "string"
|
|
3355
|
+
? contact.company_name
|
|
3356
|
+
: typeof contact.companyName === "string"
|
|
3357
|
+
? contact.companyName
|
|
3358
|
+
: typeof contact.current_company === "string"
|
|
3359
|
+
? contact.current_company
|
|
3360
|
+
: "");
|
|
3361
|
+
const normalizedIdentityKey = `${normalizeLooseMatchText(fullNameCandidate)}|${normalizeLooseMatchText(companyNameCandidate)}`;
|
|
3362
|
+
const explicitContactId = typeof contact.contact_id === "string"
|
|
3363
|
+
? contact.contact_id
|
|
3364
|
+
: typeof contact.contact_id === "number"
|
|
3365
|
+
? String(contact.contact_id)
|
|
3366
|
+
: "";
|
|
3367
|
+
const emailKey = typeof contact.email === "string" ? contact.email.toLowerCase() : "";
|
|
3368
|
+
const contactId = (inputContactIds.has(explicitContactId) ? explicitContactId : "") ||
|
|
3369
|
+
contactIdsBySyntheticEmail.get(emailKey) ||
|
|
3370
|
+
contactIdsByNormalizedIdentity.get(normalizedIdentityKey) ||
|
|
3371
|
+
contactIdsByNormalizedName.get(normalizeLooseMatchText(fullNameCandidate)) ||
|
|
3372
|
+
"";
|
|
3373
|
+
const linkedinUrl = normalizePublicLinkedInProfileUrl(typeof contact.linkedin_profile_url === "string"
|
|
3374
|
+
? contact.linkedin_profile_url
|
|
3375
|
+
: typeof contact.linkedinProfileUrl === "string"
|
|
3376
|
+
? contact.linkedinProfileUrl
|
|
3377
|
+
: typeof contact.default_profile_url === "string"
|
|
3378
|
+
? contact.default_profile_url
|
|
3379
|
+
: typeof contact.defaultProfileUrl === "string"
|
|
3380
|
+
? contact.defaultProfileUrl
|
|
3381
|
+
: typeof contact.linkedin_url === "string"
|
|
3382
|
+
? contact.linkedin_url
|
|
3383
|
+
: typeof contact.linkedinUrl === "string"
|
|
3384
|
+
? contact.linkedinUrl
|
|
3385
|
+
: null);
|
|
3386
|
+
const salesNavProfileUrl = normalizeSalesNavLeadUrl(typeof contact.sales_nav_profile_url === "string"
|
|
3387
|
+
? contact.sales_nav_profile_url
|
|
3388
|
+
: typeof contact.salesNavProfileUrl === "string"
|
|
3389
|
+
? contact.salesNavProfileUrl
|
|
3390
|
+
: typeof contact.linkedin_url === "string"
|
|
3391
|
+
? contact.linkedin_url
|
|
3392
|
+
: typeof contact.linkedinUrl === "string"
|
|
3393
|
+
? contact.linkedinUrl
|
|
3394
|
+
: null) ?? null;
|
|
3395
|
+
const regularCompanyHandle = normalizeLinkedInCompanyHandle(typeof contact.regular_company_url === "string"
|
|
3396
|
+
? contact.regular_company_url
|
|
3397
|
+
: typeof contact.regularCompanyUrl === "string"
|
|
3398
|
+
? contact.regularCompanyUrl
|
|
3399
|
+
: "");
|
|
3400
|
+
const linkedinCompanyUrl = extractLinkedInCompanyUrlFromSalesApiElement(contact) ??
|
|
3401
|
+
(regularCompanyHandle ? normalizeLinkedInCompanyPage(regularCompanyHandle) : null);
|
|
3402
|
+
const salesNavCompanyUrl = extractLinkedInSalesNavCompanyUrlFromSalesApiElement(contact);
|
|
3403
|
+
if (contactId) {
|
|
3404
|
+
rowsByContactId.set(contactId, {
|
|
3405
|
+
linkedinUrl: linkedinUrl ?? salesNavProfileUrl,
|
|
3406
|
+
salesNavProfileUrl,
|
|
3407
|
+
linkedinCompanyUrl,
|
|
3408
|
+
salesNavCompanyUrl
|
|
3409
|
+
});
|
|
3410
|
+
}
|
|
3411
|
+
}
|
|
3412
|
+
return rowsByContactId;
|
|
3413
|
+
}
|
|
1516
3414
|
async function fetchSalesNavLookupCandidates(params) {
|
|
1517
3415
|
const supabaseUrl = process.env.NEXT_PUBLIC_SUPABASE_URL?.trim();
|
|
1518
3416
|
const serviceRoleKey = process.env.SUPABASE_SERVICE_ROLE_KEY?.trim();
|
|
@@ -1558,7 +3456,8 @@ async function fetchSalesNavLookupCandidates(params) {
|
|
|
1558
3456
|
}
|
|
1559
3457
|
async function resolveLinkedInUrlsFromSalesNavRows(params) {
|
|
1560
3458
|
const results = [];
|
|
1561
|
-
for (const
|
|
3459
|
+
for (const row of params.rows) {
|
|
3460
|
+
const contactId = normalizeLinkedInLookupField(row.contactId) ?? `${results.length + 1}`;
|
|
1562
3461
|
const candidates = await fetchSalesNavLookupCandidates({
|
|
1563
3462
|
companyName: row.companyName,
|
|
1564
3463
|
orgId: params.orgId
|
|
@@ -1594,6 +3493,7 @@ async function resolveLinkedInUrlsFromSalesNavRows(params) {
|
|
|
1594
3493
|
return right.score - left.score || Number(Boolean(rightUrl)) - Number(Boolean(leftUrl));
|
|
1595
3494
|
});
|
|
1596
3495
|
const best = ranked[0]?.candidate;
|
|
3496
|
+
const salesNavProfileUrl = best?.salesNavProfileUrl ?? null;
|
|
1597
3497
|
const linkedinUrl = best?.linkedInProfileUrl ?? best?.salesNavProfileUrl ?? null;
|
|
1598
3498
|
const linkedinCompanyUrl = (() => {
|
|
1599
3499
|
const handle = normalizeLinkedInCompanyHandle(best?.regularCompanyUrl ?? "") ??
|
|
@@ -1604,17 +3504,23 @@ async function resolveLinkedInUrlsFromSalesNavRows(params) {
|
|
|
1604
3504
|
const numericCompanyUrl = typeof best?.companyUrl === "string" ? best.companyUrl.trim() : "";
|
|
1605
3505
|
return numericCompanyUrl.length > 0 ? numericCompanyUrl : null;
|
|
1606
3506
|
})();
|
|
3507
|
+
const salesNavCompanyUrl = typeof best?.companyUrl === "string" && /\/sales\/company\//i.test(best.companyUrl)
|
|
3508
|
+
? best.companyUrl
|
|
3509
|
+
: null;
|
|
3510
|
+
const existingLinkedInCompanyUrl = row.linkedinCompanyUrl?.trim() || null;
|
|
1607
3511
|
results.push({
|
|
1608
3512
|
clientId: row.clientId,
|
|
1609
3513
|
fullName: row.fullName,
|
|
1610
3514
|
companyName: row.companyName,
|
|
1611
3515
|
linkedinUrl,
|
|
1612
|
-
|
|
3516
|
+
salesNavProfileUrl,
|
|
3517
|
+
linkedinCompanyUrl: linkedinCompanyUrl ?? existingLinkedInCompanyUrl,
|
|
3518
|
+
salesNavCompanyUrl,
|
|
1613
3519
|
found: Boolean(linkedinUrl),
|
|
1614
|
-
companyFound: Boolean(linkedinCompanyUrl),
|
|
1615
|
-
contactId
|
|
3520
|
+
companyFound: Boolean(linkedinCompanyUrl ?? existingLinkedInCompanyUrl),
|
|
3521
|
+
contactId,
|
|
1616
3522
|
source: linkedinUrl ? "salesnav-supabase" : null,
|
|
1617
|
-
companySource: linkedinCompanyUrl ? "salesnav-supabase" : null,
|
|
3523
|
+
companySource: linkedinCompanyUrl ? "salesnav-supabase" : existingLinkedInCompanyUrl ? "input" : null,
|
|
1618
3524
|
matchedFullName: best?.fullName ?? null,
|
|
1619
3525
|
matchedCompanyName: best?.companyName ?? null,
|
|
1620
3526
|
matchedTitle: best?.title ?? null,
|
|
@@ -1624,6 +3530,223 @@ async function resolveLinkedInUrlsFromSalesNavRows(params) {
|
|
|
1624
3530
|
}
|
|
1625
3531
|
return results;
|
|
1626
3532
|
}
|
|
3533
|
+
function shouldUseSalesNavRowPrepass(params) {
|
|
3534
|
+
const env = params.env ?? process.env;
|
|
3535
|
+
const explicit = env.SALESPROMPTER_LINKEDIN_ROW_PREPASS?.trim().toLowerCase();
|
|
3536
|
+
if (explicit === "0" || explicit === "false" || explicit === "off") {
|
|
3537
|
+
return false;
|
|
3538
|
+
}
|
|
3539
|
+
if (explicit === "1" || explicit === "true" || explicit === "on") {
|
|
3540
|
+
return true;
|
|
3541
|
+
}
|
|
3542
|
+
const hasOrgId = Boolean(params.orgId?.trim());
|
|
3543
|
+
const hasSupabase = Boolean(env.NEXT_PUBLIC_SUPABASE_URL?.trim() && env.SUPABASE_SERVICE_ROLE_KEY?.trim());
|
|
3544
|
+
const maxRows = Number(env.SALESPROMPTER_LINKEDIN_ROW_PREPASS_MAX_ROWS ?? 200);
|
|
3545
|
+
if (!hasOrgId || !hasSupabase) {
|
|
3546
|
+
return false;
|
|
3547
|
+
}
|
|
3548
|
+
return params.rows.length <= (Number.isFinite(maxRows) && maxRows > 0 ? maxRows : 200);
|
|
3549
|
+
}
|
|
3550
|
+
function shouldUseDirectPeopleLookup(params) {
|
|
3551
|
+
const env = params.env ?? process.env;
|
|
3552
|
+
const explicit = env.SALESPROMPTER_LINKEDIN_DIRECT_PROFILE_LOOKUP?.trim().toLowerCase();
|
|
3553
|
+
if (explicit === "0" || explicit === "false" || explicit === "off") {
|
|
3554
|
+
return false;
|
|
3555
|
+
}
|
|
3556
|
+
if (explicit === "1" || explicit === "true" || explicit === "on") {
|
|
3557
|
+
return true;
|
|
3558
|
+
}
|
|
3559
|
+
const maxRows = Number(env.SALESPROMPTER_LINKEDIN_DIRECT_PROFILE_MAX_ROWS ?? 50);
|
|
3560
|
+
return params.rowCount <= (Number.isFinite(maxRows) && maxRows > 0 ? maxRows : 50);
|
|
3561
|
+
}
|
|
3562
|
+
function shouldUseWorkflowPeopleLookup(params) {
|
|
3563
|
+
const env = params.env ?? process.env;
|
|
3564
|
+
const explicit = env.SALESPROMPTER_LINKEDIN_WORKFLOW_PROFILE_LOOKUP?.trim().toLowerCase();
|
|
3565
|
+
if (explicit === "0" || explicit === "false" || explicit === "off") {
|
|
3566
|
+
return false;
|
|
3567
|
+
}
|
|
3568
|
+
if (explicit === "1" || explicit === "true" || explicit === "on") {
|
|
3569
|
+
return true;
|
|
3570
|
+
}
|
|
3571
|
+
const hasSerper = Boolean(getSerperApiKey(env));
|
|
3572
|
+
const maxRows = Number(env.SALESPROMPTER_LINKEDIN_WORKFLOW_PROFILE_MAX_ROWS ?? (hasSerper ? 75 : 250));
|
|
3573
|
+
return params.rowCount <= (Number.isFinite(maxRows) && maxRows > 0 ? maxRows : hasSerper ? 75 : 250);
|
|
3574
|
+
}
|
|
3575
|
+
function shouldUseBulkProfileResolutionStrategy(params) {
|
|
3576
|
+
const env = params.env ?? process.env;
|
|
3577
|
+
const explicit = env.SALESPROMPTER_LINKEDIN_BULK_MODE?.trim().toLowerCase();
|
|
3578
|
+
if (explicit === "0" || explicit === "false" || explicit === "off") {
|
|
3579
|
+
return false;
|
|
3580
|
+
}
|
|
3581
|
+
if (explicit === "1" || explicit === "true" || explicit === "on") {
|
|
3582
|
+
return true;
|
|
3583
|
+
}
|
|
3584
|
+
const minRows = Number(env.SALESPROMPTER_LINKEDIN_BULK_MODE_MIN_ROWS ?? 75);
|
|
3585
|
+
return params.rowCount >= (Number.isFinite(minRows) && minRows > 0 ? minRows : 75);
|
|
3586
|
+
}
|
|
3587
|
+
function resolveLinkedInBulkStrategyConfig(params) {
|
|
3588
|
+
const env = params.env ?? process.env;
|
|
3589
|
+
const bulkMode = shouldUseBulkProfileResolutionStrategy({
|
|
3590
|
+
rowCount: params.rowCount,
|
|
3591
|
+
env
|
|
3592
|
+
});
|
|
3593
|
+
const serperConcurrencyDefault = bulkMode ? 12 : 6;
|
|
3594
|
+
const serperConcurrency = Number(env.SALESPROMPTER_LINKEDIN_SERPER_CONCURRENCY ?? serperConcurrencyDefault);
|
|
3595
|
+
const serperMaxQueriesDefault = bulkMode ? 4 : 8;
|
|
3596
|
+
const serperMaxQueries = Number(env.SALESPROMPTER_LINKEDIN_SERPER_MAX_QUERIES ?? serperMaxQueriesDefault);
|
|
3597
|
+
const workflowStageBudgetDefault = bulkMode ? 8_000 : 15_000;
|
|
3598
|
+
const workflowStageBudgetMs = Number(env.SALESPROMPTER_LINKEDIN_WORKFLOW_STAGE_TIMEOUT_MS ?? workflowStageBudgetDefault);
|
|
3599
|
+
const serperStageBudgetDefault = bulkMode
|
|
3600
|
+
? Math.max(15_000, Math.min(params.timeoutMs * 2, 45_000))
|
|
3601
|
+
: Math.max(10_000, Math.min(params.timeoutMs, 20_000));
|
|
3602
|
+
const serperStageBudgetMs = Number(env.SALESPROMPTER_LINKEDIN_SERPER_STAGE_TIMEOUT_MS ?? serperStageBudgetDefault);
|
|
3603
|
+
const bulkDirectProfileMaxRowsDefault = 0;
|
|
3604
|
+
const bulkDirectProfileMaxRows = Number(env.SALESPROMPTER_LINKEDIN_BULK_DIRECT_PROFILE_MAX_ROWS ?? bulkDirectProfileMaxRowsDefault);
|
|
3605
|
+
const bulkDirectProfileTimeoutDefault = bulkMode ? Math.min(params.timeoutMs, 6_000) : 0;
|
|
3606
|
+
const bulkDirectProfileTimeoutMs = Number(env.SALESPROMPTER_LINKEDIN_BULK_DIRECT_PROFILE_TIMEOUT_MS ?? bulkDirectProfileTimeoutDefault);
|
|
3607
|
+
return {
|
|
3608
|
+
bulkMode,
|
|
3609
|
+
serperConcurrency: Number.isFinite(serperConcurrency) && serperConcurrency > 0
|
|
3610
|
+
? Math.trunc(serperConcurrency)
|
|
3611
|
+
: serperConcurrencyDefault,
|
|
3612
|
+
serperMaxQueries: Number.isFinite(serperMaxQueries) && serperMaxQueries > 0
|
|
3613
|
+
? Math.trunc(serperMaxQueries)
|
|
3614
|
+
: serperMaxQueriesDefault,
|
|
3615
|
+
workflowStageBudgetMs: Number.isFinite(workflowStageBudgetMs) && workflowStageBudgetMs > 0
|
|
3616
|
+
? Math.trunc(workflowStageBudgetMs)
|
|
3617
|
+
: workflowStageBudgetDefault,
|
|
3618
|
+
serperStageBudgetMs: Number.isFinite(serperStageBudgetMs) && serperStageBudgetMs > 0
|
|
3619
|
+
? Math.trunc(serperStageBudgetMs)
|
|
3620
|
+
: serperStageBudgetDefault,
|
|
3621
|
+
bulkDirectProfileMaxRows: Number.isFinite(bulkDirectProfileMaxRows) && bulkDirectProfileMaxRows > 0
|
|
3622
|
+
? Math.trunc(bulkDirectProfileMaxRows)
|
|
3623
|
+
: 0,
|
|
3624
|
+
bulkDirectProfileTimeoutMs: Number.isFinite(bulkDirectProfileTimeoutMs) && bulkDirectProfileTimeoutMs > 0
|
|
3625
|
+
? Math.trunc(bulkDirectProfileTimeoutMs)
|
|
3626
|
+
: 0
|
|
3627
|
+
};
|
|
3628
|
+
}
|
|
3629
|
+
function shouldAttemptBulkDirectProfileLookup(params) {
|
|
3630
|
+
return (params.strategy.bulkMode &&
|
|
3631
|
+
params.strategy.bulkDirectProfileMaxRows > 0 &&
|
|
3632
|
+
params.strategy.bulkDirectProfileTimeoutMs > 0 &&
|
|
3633
|
+
params.unresolvedRowCount > 0);
|
|
3634
|
+
}
|
|
3635
|
+
function rankContactsForBulkDirectProfileLookup(params) {
|
|
3636
|
+
const scored = params.contacts
|
|
3637
|
+
.filter((contact) => !contact.isVariation)
|
|
3638
|
+
.map((contact) => {
|
|
3639
|
+
const row = params.rowsByContactId.get(contact.contact_id);
|
|
3640
|
+
const normalizedName = normalizeLookupWhitespace(`${contact.firstName} ${contact.lastName}`);
|
|
3641
|
+
const normalizedEmail = normalizeLookupWhitespace(contact.email);
|
|
3642
|
+
const titleKeywords = extractLookupTitleKeywords(contact.jobTitle);
|
|
3643
|
+
const roleKeywords = buildDeepDiveRoleSearchKeywords(contact.deepDiveRecommendedRole);
|
|
3644
|
+
let score = 0;
|
|
3645
|
+
if (row?.linkedinCompanyUrl || contact.linkedinCompanyUrl)
|
|
3646
|
+
score += 80;
|
|
3647
|
+
if (row?.salesNavCompanyUrl)
|
|
3648
|
+
score += 20;
|
|
3649
|
+
if (normalizedEmail && !isSyntheticLinkedInLookupEmail(normalizedEmail))
|
|
3650
|
+
score += 40;
|
|
3651
|
+
if (contact.jobTitle?.trim())
|
|
3652
|
+
score += 25;
|
|
3653
|
+
if (contact.deepDiveRecommendedRole?.trim())
|
|
3654
|
+
score += 15;
|
|
3655
|
+
score += Math.min(20, titleKeywords.length * 5);
|
|
3656
|
+
score += Math.min(15, roleKeywords.length * 5);
|
|
3657
|
+
if (/^contact\s+\d+$/i.test(normalizedName))
|
|
3658
|
+
score -= 100;
|
|
3659
|
+
if (/^(hr|support|facility|buchhaltung|rechnungen)$/i.test(normalizedName))
|
|
3660
|
+
score -= 25;
|
|
3661
|
+
return { contact, score };
|
|
3662
|
+
})
|
|
3663
|
+
.filter((entry) => entry.score > 0)
|
|
3664
|
+
.sort((left, right) => right.score - left.score);
|
|
3665
|
+
return scored.slice(0, params.limit).map((entry) => entry.contact);
|
|
3666
|
+
}
|
|
3667
|
+
async function resolveSerperLinkedInProfilesInParallel(params) {
|
|
3668
|
+
const results = new Map();
|
|
3669
|
+
const contacts = params.contacts;
|
|
3670
|
+
const concurrency = Math.max(1, Math.min(params.concurrency ?? 3, contacts.length || 1));
|
|
3671
|
+
const deadline = params.overallBudgetMs && Number.isFinite(params.overallBudgetMs) && params.overallBudgetMs > 0
|
|
3672
|
+
? Date.now() + Math.trunc(params.overallBudgetMs)
|
|
3673
|
+
: Number.POSITIVE_INFINITY;
|
|
3674
|
+
let nextIndex = 0;
|
|
3675
|
+
const worker = async () => {
|
|
3676
|
+
while (true) {
|
|
3677
|
+
if (Date.now() >= deadline) {
|
|
3678
|
+
return;
|
|
3679
|
+
}
|
|
3680
|
+
const index = nextIndex++;
|
|
3681
|
+
if (index >= contacts.length) {
|
|
3682
|
+
return;
|
|
3683
|
+
}
|
|
3684
|
+
const contact = contacts[index];
|
|
3685
|
+
const remainingBudget = deadline - Date.now();
|
|
3686
|
+
if (remainingBudget <= 0) {
|
|
3687
|
+
return;
|
|
3688
|
+
}
|
|
3689
|
+
const linkedinUrl = await searchSerperLinkedInProfileUrl(contact, Math.min(params.timeoutMs, remainingBudget), {
|
|
3690
|
+
maxQueries: params.maxQueries
|
|
3691
|
+
});
|
|
3692
|
+
if (linkedinUrl) {
|
|
3693
|
+
results.set(contact.contact_id, linkedinUrl);
|
|
3694
|
+
}
|
|
3695
|
+
}
|
|
3696
|
+
};
|
|
3697
|
+
await Promise.all(Array.from({ length: concurrency }, () => worker()));
|
|
3698
|
+
return results;
|
|
3699
|
+
}
|
|
3700
|
+
async function resolveLinkedInCompanyUrlsForContacts(params) {
|
|
3701
|
+
const contacts = params.contacts.filter((contact) => !contact.isVariation && !contact.linkedinCompanyUrl);
|
|
3702
|
+
const uniqueCompanies = new Map();
|
|
3703
|
+
for (const contact of contacts) {
|
|
3704
|
+
const key = normalizeLookupCompanyForCleaning(contact.companyNameOriginal ?? contact.companyName);
|
|
3705
|
+
if (!key || uniqueCompanies.has(key)) {
|
|
3706
|
+
continue;
|
|
3707
|
+
}
|
|
3708
|
+
uniqueCompanies.set(key, contact.companyNameOriginal ?? contact.companyName);
|
|
3709
|
+
}
|
|
3710
|
+
const resultsByCompany = new Map();
|
|
3711
|
+
const entries = Array.from(uniqueCompanies.entries());
|
|
3712
|
+
const concurrency = Math.max(1, Math.min(params.concurrency ?? 4, entries.length || 1));
|
|
3713
|
+
const deadline = params.overallBudgetMs && Number.isFinite(params.overallBudgetMs) && params.overallBudgetMs > 0
|
|
3714
|
+
? Date.now() + Math.trunc(params.overallBudgetMs)
|
|
3715
|
+
: Number.POSITIVE_INFINITY;
|
|
3716
|
+
let nextIndex = 0;
|
|
3717
|
+
const worker = async () => {
|
|
3718
|
+
while (true) {
|
|
3719
|
+
if (Date.now() >= deadline) {
|
|
3720
|
+
return;
|
|
3721
|
+
}
|
|
3722
|
+
const index = nextIndex++;
|
|
3723
|
+
if (index >= entries.length) {
|
|
3724
|
+
return;
|
|
3725
|
+
}
|
|
3726
|
+
const [key, companyName] = entries[index];
|
|
3727
|
+
const remainingBudget = deadline - Date.now();
|
|
3728
|
+
if (remainingBudget <= 0) {
|
|
3729
|
+
return;
|
|
3730
|
+
}
|
|
3731
|
+
const perCompanyTimeout = Math.min(params.timeoutMs, remainingBudget);
|
|
3732
|
+
const linkedinUrl = (await searchSerperLinkedInCompanyUrl(companyName, perCompanyTimeout)) ??
|
|
3733
|
+
(await searchPublicLinkedInCompanyUrl(companyName, perCompanyTimeout));
|
|
3734
|
+
if (linkedinUrl) {
|
|
3735
|
+
resultsByCompany.set(key, linkedinUrl);
|
|
3736
|
+
}
|
|
3737
|
+
}
|
|
3738
|
+
};
|
|
3739
|
+
await Promise.all(Array.from({ length: concurrency }, () => worker()));
|
|
3740
|
+
const results = new Map();
|
|
3741
|
+
for (const contact of params.contacts) {
|
|
3742
|
+
const key = normalizeLookupCompanyForCleaning(contact.companyNameOriginal ?? contact.companyName);
|
|
3743
|
+
const linkedinUrl = resultsByCompany.get(key);
|
|
3744
|
+
if (linkedinUrl) {
|
|
3745
|
+
results.set(contact.contact_id, linkedinUrl);
|
|
3746
|
+
}
|
|
3747
|
+
}
|
|
3748
|
+
return results;
|
|
3749
|
+
}
|
|
1627
3750
|
function buildCommandLine(args) {
|
|
1628
3751
|
return args.map((arg) => shellQuote(arg)).join(" ");
|
|
1629
3752
|
}
|
|
@@ -1679,7 +3802,16 @@ function normalizeLinkedInCompanyHandle(value) {
|
|
|
1679
3802
|
}
|
|
1680
3803
|
try {
|
|
1681
3804
|
const url = new URL(trimmed);
|
|
1682
|
-
|
|
3805
|
+
const overrideHostname = (() => {
|
|
3806
|
+
try {
|
|
3807
|
+
const overrideBase = process.env.SALESPROMPTER_LINKEDIN_BASE_URL?.trim();
|
|
3808
|
+
return overrideBase ? new URL(overrideBase).hostname : "";
|
|
3809
|
+
}
|
|
3810
|
+
catch {
|
|
3811
|
+
return "";
|
|
3812
|
+
}
|
|
3813
|
+
})();
|
|
3814
|
+
if (!/(^|\.)linkedin\.com$/i.test(url.hostname) && (!overrideHostname || url.hostname !== overrideHostname)) {
|
|
1683
3815
|
return null;
|
|
1684
3816
|
}
|
|
1685
3817
|
const segments = url.pathname.split("/").filter((segment) => segment.length > 0);
|
|
@@ -2279,6 +4411,72 @@ async function fetchWorkspaceLeadSearch(session, requestBody) {
|
|
|
2279
4411
|
}
|
|
2280
4412
|
return WorkspaceLeadSearchResponseSchema.parse(payload).leads;
|
|
2281
4413
|
}
|
|
4414
|
+
async function buildWorkspaceLeadAccount(icp, target, leads) {
|
|
4415
|
+
const firstLead = leads[0];
|
|
4416
|
+
if (firstLead) {
|
|
4417
|
+
const keywords = Array.from(new Set([target.companyDomain?.split(".")[0], firstLead.industry, firstLead.region, ...icp.keywords].filter((value) => typeof value === "string" && value.trim().length > 0)));
|
|
4418
|
+
return AccountProfileSchema.parse({
|
|
4419
|
+
companyName: target.companyName?.trim() || firstLead.companyName,
|
|
4420
|
+
domain: target.companyDomain?.trim().toLowerCase() || firstLead.domain,
|
|
4421
|
+
industry: firstLead.industry,
|
|
4422
|
+
region: firstLead.region,
|
|
4423
|
+
employeeCount: firstLead.employeeCount,
|
|
4424
|
+
keywords,
|
|
4425
|
+
sources: ["workspace-qualified-leads"]
|
|
4426
|
+
});
|
|
4427
|
+
}
|
|
4428
|
+
return await companyProvider.resolveCompany({
|
|
4429
|
+
companyDomain: target.companyDomain,
|
|
4430
|
+
companyName: target.companyName
|
|
4431
|
+
}, icp);
|
|
4432
|
+
}
|
|
4433
|
+
async function generateLeadsForCommand(options) {
|
|
4434
|
+
const source = z.enum(["auto", "workspace", "fallback"]).parse(options.source ?? "auto");
|
|
4435
|
+
if (source === "fallback") {
|
|
4436
|
+
return await leadProvider.generateLeads(options.icp, options.count, options.target);
|
|
4437
|
+
}
|
|
4438
|
+
if (shouldBypassAuth()) {
|
|
4439
|
+
if (source === "workspace") {
|
|
4440
|
+
throw new Error("workspace lead generation requires authentication. Disable SALESPROMPTER_SKIP_AUTH and log in first.");
|
|
4441
|
+
}
|
|
4442
|
+
return await leadProvider.generateLeads(options.icp, options.count, options.target);
|
|
4443
|
+
}
|
|
4444
|
+
try {
|
|
4445
|
+
const session = await requireAuthSession();
|
|
4446
|
+
const requestBody = options.target.companyDomain || options.target.linkedinCompanyPage
|
|
4447
|
+
? {
|
|
4448
|
+
mode: "target-company",
|
|
4449
|
+
domain: options.target.companyDomain,
|
|
4450
|
+
linkedinCompanyPage: options.target.linkedinCompanyPage,
|
|
4451
|
+
limit: options.count
|
|
4452
|
+
}
|
|
4453
|
+
: {
|
|
4454
|
+
mode: "reference-company",
|
|
4455
|
+
icp: options.icp,
|
|
4456
|
+
limit: options.count
|
|
4457
|
+
};
|
|
4458
|
+
const leads = await fetchWorkspaceLeadSearch(session, requestBody);
|
|
4459
|
+
const account = await buildWorkspaceLeadAccount(options.icp, options.target, leads);
|
|
4460
|
+
return {
|
|
4461
|
+
provider: "salesprompter-app-workspace-search",
|
|
4462
|
+
mode: "real",
|
|
4463
|
+
account,
|
|
4464
|
+
leads,
|
|
4465
|
+
warnings: []
|
|
4466
|
+
};
|
|
4467
|
+
}
|
|
4468
|
+
catch (error) {
|
|
4469
|
+
if (source === "workspace") {
|
|
4470
|
+
throw error;
|
|
4471
|
+
}
|
|
4472
|
+
const fallback = await leadProvider.generateLeads(options.icp, options.count, options.target);
|
|
4473
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
4474
|
+
return {
|
|
4475
|
+
...fallback,
|
|
4476
|
+
warnings: [`Workspace lead search unavailable: ${message}`, ...fallback.warnings]
|
|
4477
|
+
};
|
|
4478
|
+
}
|
|
4479
|
+
}
|
|
2282
4480
|
function buildLinkedInProductsOutputPath(categorySlug) {
|
|
2283
4481
|
return `./data/linkedin-products-${categorySlug}.json`;
|
|
2284
4482
|
}
|
|
@@ -2891,6 +5089,49 @@ class SalesNavigatorExportRequestError extends Error {
|
|
|
2891
5089
|
this.launchDiagnostics = options.launchDiagnostics ?? null;
|
|
2892
5090
|
}
|
|
2893
5091
|
}
|
|
5092
|
+
class CliApiRequestError extends Error {
|
|
5093
|
+
statusCode;
|
|
5094
|
+
errorCode;
|
|
5095
|
+
constructor(message, options) {
|
|
5096
|
+
super(message);
|
|
5097
|
+
this.name = "CliApiRequestError";
|
|
5098
|
+
this.statusCode = options.statusCode;
|
|
5099
|
+
this.errorCode = options.errorCode;
|
|
5100
|
+
}
|
|
5101
|
+
}
|
|
5102
|
+
class LinkedInCompanyBackfillBatchError extends Error {
|
|
5103
|
+
failureCode;
|
|
5104
|
+
constructor(message, options) {
|
|
5105
|
+
super(message);
|
|
5106
|
+
this.name = "LinkedInCompanyBackfillBatchError";
|
|
5107
|
+
this.failureCode = options.failureCode;
|
|
5108
|
+
}
|
|
5109
|
+
}
|
|
5110
|
+
function formatLinkedInCompanyBackfillSessionLabel(launch) {
|
|
5111
|
+
const identity = launch.selectedSessionUserEmail?.trim() ||
|
|
5112
|
+
launch.selectedSessionUserHandle?.trim() ||
|
|
5113
|
+
null;
|
|
5114
|
+
const shortHash = launch.selectedSessionCookieSha256?.trim()
|
|
5115
|
+
? launch.selectedSessionCookieSha256.trim().slice(0, 12)
|
|
5116
|
+
: null;
|
|
5117
|
+
if (identity && shortHash) {
|
|
5118
|
+
return `${identity} (${shortHash})`;
|
|
5119
|
+
}
|
|
5120
|
+
return identity || shortHash || 'the selected LinkedIn session';
|
|
5121
|
+
}
|
|
5122
|
+
function isLinkedInCompanyBackfillInvalidSessionMessage(message) {
|
|
5123
|
+
return /session cookie not valid anymore|expired session cookie|invalid session cookie|can't connect to linkedin with this session cookie|no valid credentials found|please log in to linkedin to get a new one/i.test(message);
|
|
5124
|
+
}
|
|
5125
|
+
function buildLinkedInCompanyBackfillSessionRecoveryMessage(labels) {
|
|
5126
|
+
const uniqueLabels = Array.from(new Set(labels
|
|
5127
|
+
.map((label) => label.trim())
|
|
5128
|
+
.filter((label) => label.length > 0)));
|
|
5129
|
+
if (uniqueLabels.length === 0) {
|
|
5130
|
+
return "Company enrichment exhausted the LinkedIn session pool. Open LinkedIn Sales Navigator in Chrome, reconnect the Salesprompter extension, and retry companies:enrich.";
|
|
5131
|
+
}
|
|
5132
|
+
const attemptedSessions = uniqueLabels.join(", ");
|
|
5133
|
+
return `Company enrichment exhausted the LinkedIn session pool. Phantombuster rejected ${uniqueLabels.length} synced LinkedIn session${uniqueLabels.length === 1 ? "" : "s"} as expired: ${attemptedSessions}. Open LinkedIn Sales Navigator in Chrome, reconnect the Salesprompter extension, and retry companies:enrich.`;
|
|
5134
|
+
}
|
|
2894
5135
|
const SALES_NAVIGATOR_EXPORT_START_TIMEOUT_MS = 90_000;
|
|
2895
5136
|
async function withRefreshableAuthSession(session, run, contextLabel = "Salesprompter session expired during crawl. Refreshing login...") {
|
|
2896
5137
|
let currentSession = session;
|
|
@@ -2921,13 +5162,22 @@ async function fetchCliJson(session, request, schema) {
|
|
|
2921
5162
|
const text = await response.text();
|
|
2922
5163
|
const parsed = text.length > 0 ? JSON.parse(text) : {};
|
|
2923
5164
|
if (!response.ok) {
|
|
5165
|
+
const errorCode = typeof parsed === "object" &&
|
|
5166
|
+
parsed !== null &&
|
|
5167
|
+
"code" in parsed &&
|
|
5168
|
+
typeof parsed.code === "string"
|
|
5169
|
+
? parsed.code
|
|
5170
|
+
: undefined;
|
|
2924
5171
|
const errorMessage = typeof parsed === "object" &&
|
|
2925
5172
|
parsed !== null &&
|
|
2926
5173
|
"error" in parsed &&
|
|
2927
5174
|
typeof parsed.error === "string"
|
|
2928
5175
|
? parsed.error
|
|
2929
5176
|
: `request failed (${response.status})`;
|
|
2930
|
-
throw new
|
|
5177
|
+
throw new CliApiRequestError(errorMessage, {
|
|
5178
|
+
statusCode: response.status,
|
|
5179
|
+
errorCode
|
|
5180
|
+
});
|
|
2931
5181
|
}
|
|
2932
5182
|
return schema.parse(parsed);
|
|
2933
5183
|
});
|
|
@@ -2986,7 +5236,13 @@ async function enrichDirectEmailCompaniesViaApp(session, payload) {
|
|
|
2986
5236
|
return value;
|
|
2987
5237
|
}
|
|
2988
5238
|
async function fetchLinkedInCompaniesBackfillStatus(session, payload) {
|
|
2989
|
-
const
|
|
5239
|
+
const url = new URL('/api/cli/linkedin-companies/status', session.apiBaseUrl);
|
|
5240
|
+
url.searchParams.set('clientId', String(payload.clientId));
|
|
5241
|
+
url.searchParams.set('containerId', payload.containerId);
|
|
5242
|
+
if (payload.selectedSessionCookieSha256?.trim()) {
|
|
5243
|
+
url.searchParams.set('selectedSessionCookieSha256', payload.selectedSessionCookieSha256.trim());
|
|
5244
|
+
}
|
|
5245
|
+
const { value } = await fetchCliJson(session, (currentSession) => fetch(url.toString(), {
|
|
2990
5246
|
method: "GET",
|
|
2991
5247
|
headers: {
|
|
2992
5248
|
Authorization: `Bearer ${currentSession.accessToken}`
|
|
@@ -2994,6 +5250,17 @@ async function fetchLinkedInCompaniesBackfillStatus(session, payload) {
|
|
|
2994
5250
|
}), LinkedInCompanyBackfillStatusResponseSchema);
|
|
2995
5251
|
return value;
|
|
2996
5252
|
}
|
|
5253
|
+
async function syncPhantombusterContainersViaApp(session, payload) {
|
|
5254
|
+
const { value } = await fetchCliJson(session, (currentSession) => fetch(`${currentSession.apiBaseUrl}/api/cli/phantombuster/containers/sync`, {
|
|
5255
|
+
method: "POST",
|
|
5256
|
+
headers: {
|
|
5257
|
+
"Content-Type": "application/json",
|
|
5258
|
+
Authorization: `Bearer ${currentSession.accessToken}`
|
|
5259
|
+
},
|
|
5260
|
+
body: JSON.stringify(payload)
|
|
5261
|
+
}), PhantombusterContainersSyncResponseSchema);
|
|
5262
|
+
return value;
|
|
5263
|
+
}
|
|
2997
5264
|
function serializeSalesNavigatorFiltersForApi(filters) {
|
|
2998
5265
|
return filters.map((filter) => ({
|
|
2999
5266
|
type: filter.type,
|
|
@@ -3020,6 +5287,12 @@ function buildSalesNavigatorSliceRawPayload(slice, extra = {}) {
|
|
|
3020
5287
|
resultRetryCount: slice.resultRetryCount ?? null
|
|
3021
5288
|
};
|
|
3022
5289
|
}
|
|
5290
|
+
function parseOptionalSalesNavigatorClientId(value) {
|
|
5291
|
+
if (value == null || String(value).trim().length === 0) {
|
|
5292
|
+
return null;
|
|
5293
|
+
}
|
|
5294
|
+
return z.coerce.number().int().positive().parse(value);
|
|
5295
|
+
}
|
|
3023
5296
|
function buildSalesNavigatorCrawlReportRawPayload(slice, traceId, extra = {}) {
|
|
3024
5297
|
return buildSalesNavigatorSliceRawPayload({
|
|
3025
5298
|
sourceQueryUrl: slice.sourceQueryUrl,
|
|
@@ -3242,10 +5515,24 @@ async function drainLinkedInCompanyBackfill(session, payload) {
|
|
|
3242
5515
|
let startedCompanies = 0;
|
|
3243
5516
|
let remaining = 0;
|
|
3244
5517
|
let consecutiveBusyPolls = 0;
|
|
5518
|
+
let consecutiveRetryableFailures = 0;
|
|
5519
|
+
const maxRetryableFailures = 3;
|
|
5520
|
+
let consecutiveInvalidSessionFailures = 0;
|
|
5521
|
+
const maxInvalidSessionFailures = 2;
|
|
5522
|
+
const invalidSessionLabels = [];
|
|
5523
|
+
const excludedSessionCookieSha256 = new Set();
|
|
5524
|
+
const excludedUserEmails = new Set();
|
|
5525
|
+
const excludedUserHandles = new Set();
|
|
5526
|
+
let lastProcessedRemaining = null;
|
|
3245
5527
|
for (;;) {
|
|
3246
5528
|
let launched;
|
|
3247
5529
|
try {
|
|
3248
|
-
launched = await launchLinkedInCompaniesBackfill(session,
|
|
5530
|
+
launched = await launchLinkedInCompaniesBackfill(session, {
|
|
5531
|
+
...payload,
|
|
5532
|
+
excludedSessionCookieSha256: Array.from(excludedSessionCookieSha256),
|
|
5533
|
+
excludedUserEmails: Array.from(excludedUserEmails),
|
|
5534
|
+
excludedUserHandles: Array.from(excludedUserHandles),
|
|
5535
|
+
});
|
|
3249
5536
|
}
|
|
3250
5537
|
catch (error) {
|
|
3251
5538
|
if (isSalesNavigatorAgentBusyError(error)) {
|
|
@@ -3256,6 +5543,19 @@ async function drainLinkedInCompanyBackfill(session, payload) {
|
|
|
3256
5543
|
await delay(30_000);
|
|
3257
5544
|
continue;
|
|
3258
5545
|
}
|
|
5546
|
+
if (isRecoverableLinkedInCompanyBackfillSessionFailure(error) &&
|
|
5547
|
+
consecutiveInvalidSessionFailures < maxInvalidSessionFailures) {
|
|
5548
|
+
consecutiveInvalidSessionFailures += 1;
|
|
5549
|
+
writeProgress(`Company enrichment session expired. Trying another synced LinkedIn session (${consecutiveInvalidSessionFailures}/${maxInvalidSessionFailures})...`);
|
|
5550
|
+
await delay(5_000);
|
|
5551
|
+
continue;
|
|
5552
|
+
}
|
|
5553
|
+
if (isRecoverableLinkedInCompanyBackfillSessionFailure(error)) {
|
|
5554
|
+
throw new Error(buildLinkedInCompanyBackfillSessionRecoveryMessage(invalidSessionLabels));
|
|
5555
|
+
}
|
|
5556
|
+
if (isCompanyBackfillSourceInvalidError(error)) {
|
|
5557
|
+
throw new Error("Company enrichment source is broken. Refresh leadPool_inner_merged and recreate leadPool_new, then retry.");
|
|
5558
|
+
}
|
|
3259
5559
|
throw error;
|
|
3260
5560
|
}
|
|
3261
5561
|
consecutiveBusyPolls = 0;
|
|
@@ -3268,24 +5568,178 @@ async function drainLinkedInCompanyBackfill(session, payload) {
|
|
|
3268
5568
|
};
|
|
3269
5569
|
}
|
|
3270
5570
|
batches += 1;
|
|
3271
|
-
|
|
3272
|
-
|
|
5571
|
+
const launchedCompanies = launched.candidates.length;
|
|
5572
|
+
startedCompanies += launchedCompanies;
|
|
5573
|
+
let initialStatus;
|
|
5574
|
+
try {
|
|
5575
|
+
initialStatus = await waitForLinkedInCompanyBackfillStart(session, {
|
|
5576
|
+
clientId: payload.clientId,
|
|
5577
|
+
containerId: launched.containerId,
|
|
5578
|
+
selectedSessionCookieSha256: launched.selectedSessionCookieSha256 ?? null,
|
|
5579
|
+
});
|
|
5580
|
+
}
|
|
5581
|
+
catch (error) {
|
|
5582
|
+
if (isRetryableLinkedInCompanyBackfillFailure(error) && consecutiveRetryableFailures < maxRetryableFailures) {
|
|
5583
|
+
consecutiveRetryableFailures += 1;
|
|
5584
|
+
batches -= 1;
|
|
5585
|
+
startedCompanies -= launchedCompanies;
|
|
5586
|
+
writeProgress(`Company enrichment batch failed before start (${error.message}). Retrying automatically (${consecutiveRetryableFailures}/${maxRetryableFailures})...`);
|
|
5587
|
+
await delay(5_000);
|
|
5588
|
+
continue;
|
|
5589
|
+
}
|
|
5590
|
+
throw error;
|
|
5591
|
+
}
|
|
5592
|
+
writeProgress(initialStatus.processed
|
|
5593
|
+
? `Finished company enrichment batch ${batches} for ${launchedCompanies} companies.`
|
|
5594
|
+
: `Started company enrichment batch ${batches} for ${launchedCompanies} companies.`);
|
|
5595
|
+
const batchStartedAt = Date.now();
|
|
5596
|
+
let lastRunningHeartbeatAt = batchStartedAt;
|
|
5597
|
+
let lastPendingPersistenceHeartbeatAt = batchStartedAt;
|
|
3273
5598
|
for (;;) {
|
|
3274
5599
|
const status = await fetchLinkedInCompaniesBackfillStatus(session, {
|
|
3275
5600
|
clientId: payload.clientId,
|
|
3276
|
-
containerId: launched.containerId
|
|
5601
|
+
containerId: launched.containerId,
|
|
5602
|
+
selectedSessionCookieSha256: launched.selectedSessionCookieSha256 ?? null,
|
|
3277
5603
|
});
|
|
3278
5604
|
remaining = status.remaining;
|
|
5605
|
+
if (!status.running && status.failed) {
|
|
5606
|
+
const batchError = new LinkedInCompanyBackfillBatchError(status.failureMessage ?? "Company enrichment batch failed.", { failureCode: status.failureCode ?? undefined });
|
|
5607
|
+
if (isRetryableLinkedInCompanyBackfillFailure(batchError) &&
|
|
5608
|
+
consecutiveRetryableFailures < maxRetryableFailures) {
|
|
5609
|
+
consecutiveRetryableFailures += 1;
|
|
5610
|
+
batches -= 1;
|
|
5611
|
+
startedCompanies -= launchedCompanies;
|
|
5612
|
+
writeProgress(`Company enrichment batch failed (${batchError.message}). Retrying automatically (${consecutiveRetryableFailures}/${maxRetryableFailures})...`);
|
|
5613
|
+
await delay(5_000);
|
|
5614
|
+
break;
|
|
5615
|
+
}
|
|
5616
|
+
if (isRecoverableLinkedInCompanyBackfillSessionFailure(batchError) &&
|
|
5617
|
+
consecutiveInvalidSessionFailures < maxInvalidSessionFailures) {
|
|
5618
|
+
consecutiveInvalidSessionFailures += 1;
|
|
5619
|
+
invalidSessionLabels.push(formatLinkedInCompanyBackfillSessionLabel(launched));
|
|
5620
|
+
if (launched.selectedSessionCookieSha256?.trim()) {
|
|
5621
|
+
excludedSessionCookieSha256.add(launched.selectedSessionCookieSha256.trim());
|
|
5622
|
+
}
|
|
5623
|
+
if (launched.selectedSessionUserEmail?.trim()) {
|
|
5624
|
+
excludedUserEmails.add(launched.selectedSessionUserEmail.trim());
|
|
5625
|
+
}
|
|
5626
|
+
if (launched.selectedSessionUserHandle?.trim()) {
|
|
5627
|
+
excludedUserHandles.add(launched.selectedSessionUserHandle.trim());
|
|
5628
|
+
}
|
|
5629
|
+
batches -= 1;
|
|
5630
|
+
startedCompanies -= launchedCompanies;
|
|
5631
|
+
writeProgress(`Company enrichment rejected ${formatLinkedInCompanyBackfillSessionLabel(launched)} as expired. Trying another synced LinkedIn session (${consecutiveInvalidSessionFailures}/${maxInvalidSessionFailures})...`);
|
|
5632
|
+
await delay(5_000);
|
|
5633
|
+
break;
|
|
5634
|
+
}
|
|
5635
|
+
if (isRecoverableLinkedInCompanyBackfillSessionFailure(batchError)) {
|
|
5636
|
+
invalidSessionLabels.push(formatLinkedInCompanyBackfillSessionLabel(launched));
|
|
5637
|
+
if (launched.selectedSessionCookieSha256?.trim()) {
|
|
5638
|
+
excludedSessionCookieSha256.add(launched.selectedSessionCookieSha256.trim());
|
|
5639
|
+
}
|
|
5640
|
+
if (launched.selectedSessionUserEmail?.trim()) {
|
|
5641
|
+
excludedUserEmails.add(launched.selectedSessionUserEmail.trim());
|
|
5642
|
+
}
|
|
5643
|
+
if (launched.selectedSessionUserHandle?.trim()) {
|
|
5644
|
+
excludedUserHandles.add(launched.selectedSessionUserHandle.trim());
|
|
5645
|
+
}
|
|
5646
|
+
throw new Error(buildLinkedInCompanyBackfillSessionRecoveryMessage(invalidSessionLabels));
|
|
5647
|
+
}
|
|
5648
|
+
throw batchError;
|
|
5649
|
+
}
|
|
3279
5650
|
if (!status.running && status.processed) {
|
|
5651
|
+
if (lastProcessedRemaining !== null && status.remaining >= lastProcessedRemaining) {
|
|
5652
|
+
const settledStatus = await waitForLinkedInCompanyBackfillRemainingDrop(session, {
|
|
5653
|
+
clientId: payload.clientId,
|
|
5654
|
+
containerId: launched.containerId,
|
|
5655
|
+
selectedSessionCookieSha256: launched.selectedSessionCookieSha256 ?? null,
|
|
5656
|
+
previousRemaining: lastProcessedRemaining
|
|
5657
|
+
});
|
|
5658
|
+
remaining = settledStatus.remaining;
|
|
5659
|
+
if (remaining >= lastProcessedRemaining) {
|
|
5660
|
+
throw new Error(`Company enrichment batch ${batches} finished but remaining stayed at ${remaining}. Stopping to avoid duplicate launches.`);
|
|
5661
|
+
}
|
|
5662
|
+
}
|
|
5663
|
+
consecutiveRetryableFailures = 0;
|
|
5664
|
+
consecutiveInvalidSessionFailures = 0;
|
|
5665
|
+
lastProcessedRemaining = remaining;
|
|
5666
|
+
const completionMessage = `Finished company enrichment batch ${batches} for ${launchedCompanies} companies.`;
|
|
3280
5667
|
writeProgress(remaining > 0
|
|
3281
|
-
? `${remaining} companies still waiting. Starting the next batch...`
|
|
3282
|
-
:
|
|
5668
|
+
? `${completionMessage} ${remaining} companies still waiting. Starting the next batch...`
|
|
5669
|
+
: `${completionMessage} Company enrichment finished.`);
|
|
3283
5670
|
break;
|
|
3284
5671
|
}
|
|
5672
|
+
if (status.running) {
|
|
5673
|
+
const now = Date.now();
|
|
5674
|
+
if (now - lastRunningHeartbeatAt >= 30_000) {
|
|
5675
|
+
const elapsedSeconds = Math.max(1, Math.round((now - batchStartedAt) / 1000));
|
|
5676
|
+
writeProgress(`Company enrichment batch ${batches} is still running (${elapsedSeconds}s elapsed)...`);
|
|
5677
|
+
lastRunningHeartbeatAt = now;
|
|
5678
|
+
}
|
|
5679
|
+
}
|
|
5680
|
+
else if (!status.processed) {
|
|
5681
|
+
const now = Date.now();
|
|
5682
|
+
if (now - lastPendingPersistenceHeartbeatAt >= 30_000) {
|
|
5683
|
+
writeProgress(`Company enrichment batch ${batches} finished remotely. Waiting for results to sync...`);
|
|
5684
|
+
lastPendingPersistenceHeartbeatAt = now;
|
|
5685
|
+
}
|
|
5686
|
+
}
|
|
3285
5687
|
await delay(15_000);
|
|
3286
5688
|
}
|
|
3287
5689
|
}
|
|
3288
5690
|
}
|
|
5691
|
+
async function waitForLinkedInCompanyBackfillStart(session, payload) {
|
|
5692
|
+
const deadline = Date.now() + 45_000;
|
|
5693
|
+
for (;;) {
|
|
5694
|
+
const status = await fetchLinkedInCompaniesBackfillStatus(session, payload);
|
|
5695
|
+
if (status.failed) {
|
|
5696
|
+
throw new LinkedInCompanyBackfillBatchError(status.failureMessage ?? "Company enrichment batch failed.", { failureCode: status.failureCode ?? undefined });
|
|
5697
|
+
}
|
|
5698
|
+
if (status.running || status.processed) {
|
|
5699
|
+
return status;
|
|
5700
|
+
}
|
|
5701
|
+
if (Date.now() >= deadline) {
|
|
5702
|
+
return status;
|
|
5703
|
+
}
|
|
5704
|
+
await delay(5_000);
|
|
5705
|
+
}
|
|
5706
|
+
}
|
|
5707
|
+
async function waitForLinkedInCompanyBackfillRemainingDrop(session, payload) {
|
|
5708
|
+
const deadline = Date.now() + 90_000;
|
|
5709
|
+
let latestStatus = await fetchLinkedInCompaniesBackfillStatus(session, payload);
|
|
5710
|
+
let lastHeartbeatAt = Date.now();
|
|
5711
|
+
while (Date.now() < deadline) {
|
|
5712
|
+
if (latestStatus.failed) {
|
|
5713
|
+
throw new LinkedInCompanyBackfillBatchError(latestStatus.failureMessage ?? "Company enrichment batch failed.", { failureCode: latestStatus.failureCode ?? undefined });
|
|
5714
|
+
}
|
|
5715
|
+
if (latestStatus.remaining < payload.previousRemaining) {
|
|
5716
|
+
return latestStatus;
|
|
5717
|
+
}
|
|
5718
|
+
const now = Date.now();
|
|
5719
|
+
if (now - lastHeartbeatAt >= 30_000) {
|
|
5720
|
+
writeProgress(`Company enrichment batch finished. Waiting for backlog to update below ${payload.previousRemaining}...`);
|
|
5721
|
+
lastHeartbeatAt = now;
|
|
5722
|
+
}
|
|
5723
|
+
await delay(10_000);
|
|
5724
|
+
latestStatus = await fetchLinkedInCompaniesBackfillStatus(session, payload);
|
|
5725
|
+
}
|
|
5726
|
+
return latestStatus;
|
|
5727
|
+
}
|
|
5728
|
+
function isRetryableLinkedInCompanyBackfillFailure(error) {
|
|
5729
|
+
return error instanceof LinkedInCompanyBackfillBatchError && error.failureCode === "input_empty";
|
|
5730
|
+
}
|
|
5731
|
+
function isRecoverableLinkedInCompanyBackfillSessionFailure(error) {
|
|
5732
|
+
if (error instanceof LinkedInCompanyBackfillBatchError) {
|
|
5733
|
+
return error.failureCode === "invalid_session" || isLinkedInCompanyBackfillInvalidSessionMessage(error.message);
|
|
5734
|
+
}
|
|
5735
|
+
if (error instanceof CliApiRequestError) {
|
|
5736
|
+
return error.errorCode === "invalid_session" || isLinkedInCompanyBackfillInvalidSessionMessage(error.message);
|
|
5737
|
+
}
|
|
5738
|
+
return false;
|
|
5739
|
+
}
|
|
5740
|
+
function isCompanyBackfillSourceInvalidError(error) {
|
|
5741
|
+
return error instanceof CliApiRequestError && error.errorCode === "company_backfill_source_invalid";
|
|
5742
|
+
}
|
|
3289
5743
|
function isSalesNavigatorSessionError(error) {
|
|
3290
5744
|
if (error instanceof SalesNavigatorExportRequestError) {
|
|
3291
5745
|
if (error.errorCode === "invalid_session") {
|
|
@@ -3299,11 +5753,12 @@ function isSalesNavigatorSessionError(error) {
|
|
|
3299
5753
|
return /can't connect profile|sales navigator account|upsell|linkedin session invalid|linkedin_rate_limited|too many requests|rate.?limit|invalid session cookie|disconnected by linkedin|linkedin-disconnected-while-using-api|provide a new linkedin session cookie/i.test(message);
|
|
3300
5754
|
}
|
|
3301
5755
|
function isSalesNavigatorResultArtifactError(error) {
|
|
3302
|
-
if (error instanceof SalesNavigatorExportRequestError &&
|
|
5756
|
+
if (error instanceof SalesNavigatorExportRequestError &&
|
|
5757
|
+
["phantombuster_result_invalid", "partial_result_artifact"].includes(error.errorCode ?? "")) {
|
|
3303
5758
|
return true;
|
|
3304
5759
|
}
|
|
3305
5760
|
const message = error instanceof Error ? error.message : String(error);
|
|
3306
|
-
return /page has crashed|no valid sales navigator people rows/i.test(message);
|
|
5761
|
+
return /page has crashed|no valid sales navigator people rows|partial result artifact|returned \d+ valid sales navigator people rows, but \d+ were expected/i.test(message);
|
|
3307
5762
|
}
|
|
3308
5763
|
function isSalesNavigatorTransientExportError(error) {
|
|
3309
5764
|
if (isSalesNavigatorSessionError(error) || isSalesNavigatorResultArtifactError(error)) {
|
|
@@ -3394,6 +5849,7 @@ async function runSalesNavigatorCrawlAttempt(session, attempt, options, context)
|
|
|
3394
5849
|
crawlSliceId: context?.crawlSliceId,
|
|
3395
5850
|
rawPayload: buildSalesNavigatorSliceRawPayload(attempt, {
|
|
3396
5851
|
traceId: context?.traceId ?? null,
|
|
5852
|
+
clientId: context?.clientId ?? null,
|
|
3397
5853
|
phase: shouldProbe ? "probe" : "full_export",
|
|
3398
5854
|
requestedProfiles: probeProfiles,
|
|
3399
5855
|
crawlJobId: context?.crawlJobId ?? null,
|
|
@@ -3430,6 +5886,7 @@ async function runSalesNavigatorCrawlAttempt(session, attempt, options, context)
|
|
|
3430
5886
|
crawlSliceId: context?.crawlSliceId,
|
|
3431
5887
|
rawPayload: buildSalesNavigatorSliceRawPayload(attempt, {
|
|
3432
5888
|
traceId: context?.traceId ?? null,
|
|
5889
|
+
clientId: context?.clientId ?? null,
|
|
3433
5890
|
phase: "full_export_after_probe",
|
|
3434
5891
|
requestedProfiles: attempt.numberOfProfiles,
|
|
3435
5892
|
crawlJobId: context?.crawlJobId ?? null,
|
|
@@ -3528,15 +5985,87 @@ const SALES_NAVIGATOR_SPLIT_TRIGGER_RESULTS = 1500;
|
|
|
3528
5985
|
const SALES_NAVIGATOR_FILTER_IMPACT_MIN_OBSERVATIONS = 3;
|
|
3529
5986
|
let salesNavigatorFilterImpactModel = null;
|
|
3530
5987
|
let salesNavigatorFilterImpactLoaded = false;
|
|
5988
|
+
let linkedInProfileHitCache = null;
|
|
5989
|
+
let linkedInProfileHitCacheLoaded = false;
|
|
3531
5990
|
function getSalesprompterConfigDir() {
|
|
3532
5991
|
const override = process.env.SALESPROMPTER_CONFIG_DIR?.trim();
|
|
3533
5992
|
if (override !== undefined && override.length > 0) {
|
|
3534
5993
|
return override;
|
|
3535
5994
|
}
|
|
3536
|
-
return path.join(os.homedir(), ".config", "salesprompter");
|
|
5995
|
+
return path.join(os.homedir(), ".config", "salesprompter");
|
|
5996
|
+
}
|
|
5997
|
+
function getSalesNavigatorFilterImpactPath() {
|
|
5998
|
+
return path.join(getSalesprompterConfigDir(), "salesnav-filter-impact.json");
|
|
5999
|
+
}
|
|
6000
|
+
function getLinkedInProfileHitCachePath() {
|
|
6001
|
+
return path.join(getSalesprompterConfigDir(), "linkedin-profile-hits.json");
|
|
6002
|
+
}
|
|
6003
|
+
function buildLinkedInProfileHitCacheKeys(params) {
|
|
6004
|
+
const keys = new Set();
|
|
6005
|
+
const normalizedName = normalizeLooseMatchText(params.fullName);
|
|
6006
|
+
const normalizedCompany = normalizeLooseMatchText(params.companyName);
|
|
6007
|
+
const normalizedEmail = normalizeLookupWhitespace(params.email);
|
|
6008
|
+
const trustedEmail = normalizedEmail && !isSyntheticLinkedInLookupEmail(normalizedEmail) ? normalizedEmail.toLowerCase() : "";
|
|
6009
|
+
const contactId = normalizeLinkedInLookupField(params.contactId);
|
|
6010
|
+
if (contactId && !/^[1-9]\d?$/.test(contactId)) {
|
|
6011
|
+
keys.add(`contact:${contactId}`);
|
|
6012
|
+
}
|
|
6013
|
+
if (normalizedName && normalizedCompany && trustedEmail) {
|
|
6014
|
+
keys.add(`identity:${normalizedName}|${normalizedCompany}|${trustedEmail}`);
|
|
6015
|
+
}
|
|
6016
|
+
if (normalizedName && normalizedCompany) {
|
|
6017
|
+
keys.add(`identity:${normalizedName}|${normalizedCompany}`);
|
|
6018
|
+
}
|
|
6019
|
+
return Array.from(keys);
|
|
6020
|
+
}
|
|
6021
|
+
async function loadLinkedInProfileHitCache() {
|
|
6022
|
+
if (linkedInProfileHitCacheLoaded) {
|
|
6023
|
+
return linkedInProfileHitCache;
|
|
6024
|
+
}
|
|
6025
|
+
linkedInProfileHitCacheLoaded = true;
|
|
6026
|
+
try {
|
|
6027
|
+
const content = await readFile(getLinkedInProfileHitCachePath(), "utf8");
|
|
6028
|
+
const parsed = JSON.parse(content);
|
|
6029
|
+
if (parsed?.version === 1 && parsed.entries && typeof parsed.entries === "object") {
|
|
6030
|
+
linkedInProfileHitCache = parsed;
|
|
6031
|
+
}
|
|
6032
|
+
}
|
|
6033
|
+
catch {
|
|
6034
|
+
linkedInProfileHitCache = null;
|
|
6035
|
+
}
|
|
6036
|
+
return linkedInProfileHitCache;
|
|
3537
6037
|
}
|
|
3538
|
-
function
|
|
3539
|
-
|
|
6038
|
+
async function persistLinkedInProfileHitCache() {
|
|
6039
|
+
if (!linkedInProfileHitCache) {
|
|
6040
|
+
return;
|
|
6041
|
+
}
|
|
6042
|
+
const filePath = getLinkedInProfileHitCachePath();
|
|
6043
|
+
await mkdir(path.dirname(filePath), { recursive: true });
|
|
6044
|
+
await writeFile(filePath, `${JSON.stringify(linkedInProfileHitCache, null, 2)}\n`, "utf8");
|
|
6045
|
+
}
|
|
6046
|
+
function upsertLinkedInProfileHitCacheEntry(params) {
|
|
6047
|
+
if (!params.linkedinUrl && !params.salesNavProfileUrl && !params.linkedinCompanyUrl && !params.salesNavCompanyUrl) {
|
|
6048
|
+
return;
|
|
6049
|
+
}
|
|
6050
|
+
if (!linkedInProfileHitCache) {
|
|
6051
|
+
linkedInProfileHitCache = {
|
|
6052
|
+
version: 1,
|
|
6053
|
+
updatedAt: new Date().toISOString(),
|
|
6054
|
+
entries: {}
|
|
6055
|
+
};
|
|
6056
|
+
}
|
|
6057
|
+
const updatedAt = new Date().toISOString();
|
|
6058
|
+
linkedInProfileHitCache.updatedAt = updatedAt;
|
|
6059
|
+
const entry = {
|
|
6060
|
+
linkedinUrl: params.linkedinUrl,
|
|
6061
|
+
salesNavProfileUrl: params.salesNavProfileUrl,
|
|
6062
|
+
linkedinCompanyUrl: params.linkedinCompanyUrl,
|
|
6063
|
+
salesNavCompanyUrl: params.salesNavCompanyUrl,
|
|
6064
|
+
updatedAt
|
|
6065
|
+
};
|
|
6066
|
+
for (const key of buildLinkedInProfileHitCacheKeys(params)) {
|
|
6067
|
+
linkedInProfileHitCache.entries[key] = entry;
|
|
6068
|
+
}
|
|
3540
6069
|
}
|
|
3541
6070
|
async function loadSalesNavigatorFilterImpactModel() {
|
|
3542
6071
|
if (salesNavigatorFilterImpactLoaded) {
|
|
@@ -3718,7 +6247,8 @@ async function ensureSalesNavigatorSessionPoolReady(queryUrl, options) {
|
|
|
3718
6247
|
status: claimed ? "ok" : "skipped",
|
|
3719
6248
|
selectedSessionUserEmail: claimed?.userEmail ?? null,
|
|
3720
6249
|
selectedSessionUserHandle: claimed?.userHandle ?? null,
|
|
3721
|
-
selectedSessionCookieSha256: claimed?.sessionCookieSha256 ?? null
|
|
6250
|
+
selectedSessionCookieSha256: claimed?.sessionCookieSha256 ?? null,
|
|
6251
|
+
selectedSessionLastIngestedSource: claimed?.lastIngestedSource ?? null
|
|
3722
6252
|
});
|
|
3723
6253
|
return {
|
|
3724
6254
|
ready: true
|
|
@@ -3809,6 +6339,7 @@ async function processSalesNavigatorClaimedCrawlSlice(session, jobId, slice, opt
|
|
|
3809
6339
|
}, {
|
|
3810
6340
|
crawlJobId: jobId,
|
|
3811
6341
|
crawlSliceId: slice.id,
|
|
6342
|
+
clientId: options.clientId ?? null,
|
|
3812
6343
|
traceId: options.traceId
|
|
3813
6344
|
});
|
|
3814
6345
|
const reported = await reportSalesNavigatorCrawlSlice(currentSession, jobId, {
|
|
@@ -3949,9 +6480,11 @@ async function executeSalesNavigatorCrawlJob(session, jobId, options) {
|
|
|
3949
6480
|
let nextSessionPoolRetryAt = 0;
|
|
3950
6481
|
let lastSessionPoolReadyAt = 0;
|
|
3951
6482
|
const sessionPoolReadinessCooldownMs = 120_000;
|
|
6483
|
+
let allowRetryClaimBeyondMaxSlices = false;
|
|
6484
|
+
let allowedRetrySliceId = null;
|
|
3952
6485
|
while (true) {
|
|
3953
6486
|
while (!noMoreClaimableWork && inFlight.size < parallelExports) {
|
|
3954
|
-
if (claimedSlices >= options.maxSlices) {
|
|
6487
|
+
if (claimedSlices >= options.maxSlices && !allowRetryClaimBeyondMaxSlices) {
|
|
3955
6488
|
break;
|
|
3956
6489
|
}
|
|
3957
6490
|
if (inFlight.size === 0) {
|
|
@@ -4058,6 +6591,15 @@ async function executeSalesNavigatorCrawlJob(session, jobId, options) {
|
|
|
4058
6591
|
break;
|
|
4059
6592
|
}
|
|
4060
6593
|
const slice = claimed.value.slice;
|
|
6594
|
+
if (claimedSlices >= options.maxSlices &&
|
|
6595
|
+
allowRetryClaimBeyondMaxSlices &&
|
|
6596
|
+
allowedRetrySliceId &&
|
|
6597
|
+
slice.id !== allowedRetrySliceId) {
|
|
6598
|
+
noMoreClaimableWork = true;
|
|
6599
|
+
break;
|
|
6600
|
+
}
|
|
6601
|
+
allowRetryClaimBeyondMaxSlices = false;
|
|
6602
|
+
allowedRetrySliceId = null;
|
|
4061
6603
|
idlePollCount = 0;
|
|
4062
6604
|
activeSlice = slice;
|
|
4063
6605
|
const isNewSlice = !seenSliceIds.has(slice.id);
|
|
@@ -4074,6 +6616,7 @@ async function executeSalesNavigatorCrawlJob(session, jobId, options) {
|
|
|
4074
6616
|
agentBusyWaitSeconds: options.agentBusyWaitSeconds,
|
|
4075
6617
|
agentBusyMaxWaits: options.agentBusyMaxWaits,
|
|
4076
6618
|
claimedSlices: claimedSliceNumber,
|
|
6619
|
+
clientId: options.clientId ?? null,
|
|
4077
6620
|
traceId: options.traceId,
|
|
4078
6621
|
logger: options.logger
|
|
4079
6622
|
}).then((value) => ({ slot, value })));
|
|
@@ -4087,6 +6630,8 @@ async function executeSalesNavigatorCrawlJob(session, jobId, options) {
|
|
|
4087
6630
|
job = completed.value.job;
|
|
4088
6631
|
activeSlice = completed.value.activeSlice;
|
|
4089
6632
|
lastOutcome = completed.value.lastOutcome;
|
|
6633
|
+
allowRetryClaimBeyondMaxSlices = lastOutcome?.outcome === "retryable_failed";
|
|
6634
|
+
allowedRetrySliceId = lastOutcome?.outcome === "retryable_failed" ? completed.value.activeSlice.id : null;
|
|
4090
6635
|
if (completed.value.forceSessionPoolRecheck) {
|
|
4091
6636
|
lastSessionPoolReadyAt = 0;
|
|
4092
6637
|
nextSessionPoolRetryAt = 0;
|
|
@@ -4097,6 +6642,11 @@ async function executeSalesNavigatorCrawlJob(session, jobId, options) {
|
|
|
4097
6642
|
currentSession = status.session;
|
|
4098
6643
|
job = status.value.job;
|
|
4099
6644
|
}
|
|
6645
|
+
else if (!isSalesNavigatorCrawlJobTerminal(job.status)) {
|
|
6646
|
+
const status = await getSalesNavigatorCrawlStatus(currentSession, jobId, options.traceId);
|
|
6647
|
+
currentSession = status.session;
|
|
6648
|
+
job = status.value.job;
|
|
6649
|
+
}
|
|
4100
6650
|
await options.logger?.log("salesnav.crawl.job.completed", {
|
|
4101
6651
|
jobId,
|
|
4102
6652
|
status: job.status,
|
|
@@ -4406,6 +6956,15 @@ function buildCliError(error) {
|
|
|
4406
6956
|
};
|
|
4407
6957
|
}
|
|
4408
6958
|
const message = error instanceof Error ? error.message : "Unknown error";
|
|
6959
|
+
if (message === "linkedin_session_invalid" ||
|
|
6960
|
+
isLinkedInCompanyBackfillInvalidSessionMessage(message) ||
|
|
6961
|
+
/no eligible linkedin session cookies available for company backfill|company session preflight returned/i.test(message)) {
|
|
6962
|
+
return {
|
|
6963
|
+
status: "error",
|
|
6964
|
+
code: "runtime_error",
|
|
6965
|
+
message: buildLinkedInCompanyBackfillSessionRecoveryMessage([])
|
|
6966
|
+
};
|
|
6967
|
+
}
|
|
4409
6968
|
if (message.includes("not logged in")) {
|
|
4410
6969
|
return {
|
|
4411
6970
|
status: "error",
|
|
@@ -4494,6 +7053,7 @@ const domainDecisionArraySchema = z.array(z.object({
|
|
|
4494
7053
|
reason: z.enum([
|
|
4495
7054
|
"linkedin-domain",
|
|
4496
7055
|
"linkedin-website",
|
|
7056
|
+
"better-company-match",
|
|
4497
7057
|
"highest-hunter-count",
|
|
4498
7058
|
"fallback-first-non-null",
|
|
4499
7059
|
"no-domain"
|
|
@@ -4777,19 +7337,22 @@ program
|
|
|
4777
7337
|
if (rows.length === 0) {
|
|
4778
7338
|
throw new Error("No contact rows found. Provide TSV/CSV/JSON input via --in or stdin.");
|
|
4779
7339
|
}
|
|
7340
|
+
let authSession = null;
|
|
4780
7341
|
let sessionOrgId = "";
|
|
4781
7342
|
if (!shouldBypassAuth()) {
|
|
4782
7343
|
try {
|
|
4783
|
-
|
|
4784
|
-
sessionOrgId =
|
|
7344
|
+
authSession = await requireAuthSession();
|
|
7345
|
+
sessionOrgId = authSession.user.orgId ?? "";
|
|
4785
7346
|
}
|
|
4786
7347
|
catch {
|
|
7348
|
+
authSession = null;
|
|
4787
7349
|
sessionOrgId = "";
|
|
4788
7350
|
}
|
|
4789
7351
|
}
|
|
4790
7352
|
const companyCleaningMode = resolveCompanyCleaningMode(String(options.companyCleaning ?? process.env.SALESPROMPTER_COMPANY_CLEANING_MODE ?? "basic"));
|
|
4791
7353
|
const cleanedCompanyMap = await buildCompanyNameCleaningMap(rows, companyCleaningMode);
|
|
4792
7354
|
const contacts = toLinkedInUrlLookupContacts(rows, cleanedCompanyMap);
|
|
7355
|
+
await loadLinkedInProfileHitCache();
|
|
4793
7356
|
if (options.dryRun) {
|
|
4794
7357
|
const payload = {
|
|
4795
7358
|
status: "ok",
|
|
@@ -4805,68 +7368,558 @@ program
|
|
|
4805
7368
|
printOutput(payload);
|
|
4806
7369
|
return;
|
|
4807
7370
|
}
|
|
4808
|
-
const
|
|
4809
|
-
|
|
4810
|
-
|
|
7371
|
+
const orgId = String(options.orgId ?? "").trim() || undefined;
|
|
7372
|
+
const strategy = resolveLinkedInBulkStrategyConfig({
|
|
7373
|
+
rowCount: rows.length,
|
|
7374
|
+
timeoutMs
|
|
4811
7375
|
});
|
|
7376
|
+
const useSalesNavRowPrepass = !strategy.bulkMode &&
|
|
7377
|
+
shouldUseSalesNavRowPrepass({
|
|
7378
|
+
rows,
|
|
7379
|
+
orgId
|
|
7380
|
+
});
|
|
7381
|
+
const enrichedRows = useSalesNavRowPrepass
|
|
7382
|
+
? await resolveLinkedInUrlsFromSalesNavRows({
|
|
7383
|
+
rows,
|
|
7384
|
+
orgId
|
|
7385
|
+
})
|
|
7386
|
+
: rows.map((row, index) => ({
|
|
7387
|
+
clientId: row.clientId,
|
|
7388
|
+
fullName: row.fullName,
|
|
7389
|
+
companyName: row.companyName,
|
|
7390
|
+
linkedinUrl: null,
|
|
7391
|
+
salesNavProfileUrl: null,
|
|
7392
|
+
linkedinCompanyUrl: row.linkedinCompanyUrl?.trim() || null,
|
|
7393
|
+
salesNavCompanyUrl: null,
|
|
7394
|
+
found: false,
|
|
7395
|
+
companyFound: Boolean(row.linkedinCompanyUrl?.trim()),
|
|
7396
|
+
contactId: normalizeLinkedInLookupField(row.contactId) ?? `${index + 1}`,
|
|
7397
|
+
source: null,
|
|
7398
|
+
companySource: row.linkedinCompanyUrl?.trim() ? "input" : null,
|
|
7399
|
+
matchedFullName: null,
|
|
7400
|
+
matchedCompanyName: null,
|
|
7401
|
+
matchedTitle: null,
|
|
7402
|
+
matchedOrgId: null,
|
|
7403
|
+
matchedCompanyEmployeeCount: null
|
|
7404
|
+
}));
|
|
7405
|
+
const contactById = new Map(contacts.filter((contact) => !contact.isVariation).map((contact) => [contact.contact_id, contact]));
|
|
7406
|
+
for (const row of enrichedRows) {
|
|
7407
|
+
if (row.found) {
|
|
7408
|
+
continue;
|
|
7409
|
+
}
|
|
7410
|
+
const contact = contactById.get(row.contactId);
|
|
7411
|
+
const cacheKeys = buildLinkedInProfileHitCacheKeys({
|
|
7412
|
+
fullName: row.fullName,
|
|
7413
|
+
companyName: row.companyName,
|
|
7414
|
+
email: contact?.email,
|
|
7415
|
+
contactId: row.contactId
|
|
7416
|
+
});
|
|
7417
|
+
const cachedEntry = cacheKeys
|
|
7418
|
+
.map((key) => linkedInProfileHitCache?.entries[key] ?? null)
|
|
7419
|
+
.find(Boolean);
|
|
7420
|
+
if (!cachedEntry) {
|
|
7421
|
+
continue;
|
|
7422
|
+
}
|
|
7423
|
+
row.linkedinUrl = cachedEntry.linkedinUrl ?? row.linkedinUrl ?? null;
|
|
7424
|
+
row.salesNavProfileUrl = cachedEntry.salesNavProfileUrl ?? row.salesNavProfileUrl ?? null;
|
|
7425
|
+
row.linkedinCompanyUrl = cachedEntry.linkedinCompanyUrl ?? row.linkedinCompanyUrl ?? null;
|
|
7426
|
+
row.salesNavCompanyUrl = cachedEntry.salesNavCompanyUrl ?? row.salesNavCompanyUrl ?? null;
|
|
7427
|
+
row.found = Boolean(row.linkedinUrl || row.salesNavProfileUrl);
|
|
7428
|
+
row.companyFound = Boolean(row.linkedinCompanyUrl || row.salesNavCompanyUrl);
|
|
7429
|
+
row.source = row.found ? "cache" : row.source;
|
|
7430
|
+
row.companySource =
|
|
7431
|
+
row.companyFound && !row.companySource ? "cache" : row.companySource;
|
|
7432
|
+
}
|
|
4812
7433
|
let directAttempted = false;
|
|
7434
|
+
let workflowAttempted = false;
|
|
7435
|
+
const parsedClientIds = Array.from(new Set(rows
|
|
7436
|
+
.map((row) => Number(row.clientId))
|
|
7437
|
+
.filter((value) => Number.isFinite(value) && value > 0)));
|
|
7438
|
+
if (authSession && parsedClientIds.length === 1) {
|
|
7439
|
+
try {
|
|
7440
|
+
const uniqueCompanies = Array.from(new Map(contacts
|
|
7441
|
+
.filter((contact) => !contact.isVariation)
|
|
7442
|
+
.map((contact) => {
|
|
7443
|
+
const key = normalizeLookupCompanyForCleaning(contact.companyNameOriginal ?? contact.companyName);
|
|
7444
|
+
return [
|
|
7445
|
+
key,
|
|
7446
|
+
{
|
|
7447
|
+
companyId: contact.contact_id,
|
|
7448
|
+
companyName: contact.companyNameOriginal ?? contact.companyName,
|
|
7449
|
+
companyNameCleaned: cleanedCompanyMap.get(key) ?? normalizeLookupWhitespace(contact.companyNameOriginal ?? contact.companyName)
|
|
7450
|
+
}
|
|
7451
|
+
];
|
|
7452
|
+
})).values());
|
|
7453
|
+
if (uniqueCompanies.length > 0) {
|
|
7454
|
+
const appCompanyResult = await enrichDirectEmailCompaniesViaApp(authSession, {
|
|
7455
|
+
clientId: parsedClientIds[0],
|
|
7456
|
+
companies: uniqueCompanies
|
|
7457
|
+
});
|
|
7458
|
+
const companyByNormalizedName = new Map(appCompanyResult.companies.map((company) => [
|
|
7459
|
+
normalizeLookupCompanyForCleaning(company.companyName),
|
|
7460
|
+
company.linkedinCompanyPage ?? null
|
|
7461
|
+
]));
|
|
7462
|
+
for (const row of enrichedRows) {
|
|
7463
|
+
if (row.linkedinCompanyUrl) {
|
|
7464
|
+
continue;
|
|
7465
|
+
}
|
|
7466
|
+
const normalizedName = normalizeLookupCompanyForCleaning(row.companyName);
|
|
7467
|
+
const linkedinCompanyUrl = companyByNormalizedName.get(normalizedName) ?? null;
|
|
7468
|
+
if (!linkedinCompanyUrl) {
|
|
7469
|
+
continue;
|
|
7470
|
+
}
|
|
7471
|
+
row.linkedinCompanyUrl = linkedinCompanyUrl;
|
|
7472
|
+
row.companyFound = true;
|
|
7473
|
+
row.companySource = "workflow";
|
|
7474
|
+
}
|
|
7475
|
+
}
|
|
7476
|
+
}
|
|
7477
|
+
catch (error) {
|
|
7478
|
+
writeProgress(`Skipping app-backed company enrichment: ${error instanceof Error ? error.message : String(error)}`);
|
|
7479
|
+
}
|
|
7480
|
+
}
|
|
7481
|
+
const contactsMissingCompanyUrl = contacts.filter((contact) => !contact.isVariation &&
|
|
7482
|
+
enrichedRows.some((row) => row.contactId === contact.contact_id && !row.linkedinCompanyUrl));
|
|
7483
|
+
if (contactsMissingCompanyUrl.length > 0) {
|
|
7484
|
+
const companyUrlByContactId = await resolveLinkedInCompanyUrlsForContacts({
|
|
7485
|
+
contacts: contactsMissingCompanyUrl,
|
|
7486
|
+
timeoutMs: Math.min(timeoutMs, 15_000),
|
|
7487
|
+
concurrency: strategy.bulkMode ? 6 : 3,
|
|
7488
|
+
overallBudgetMs: strategy.bulkMode ? 20_000 : 10_000
|
|
7489
|
+
});
|
|
7490
|
+
for (const row of enrichedRows) {
|
|
7491
|
+
if (row.linkedinCompanyUrl) {
|
|
7492
|
+
continue;
|
|
7493
|
+
}
|
|
7494
|
+
const linkedinCompanyUrl = companyUrlByContactId.get(row.contactId);
|
|
7495
|
+
if (!linkedinCompanyUrl) {
|
|
7496
|
+
continue;
|
|
7497
|
+
}
|
|
7498
|
+
row.linkedinCompanyUrl = linkedinCompanyUrl;
|
|
7499
|
+
row.companyFound = true;
|
|
7500
|
+
row.companySource = "web-search";
|
|
7501
|
+
}
|
|
7502
|
+
}
|
|
4813
7503
|
const missingRows = enrichedRows.filter((row) => !row.found);
|
|
7504
|
+
const useDirectPeopleLookup = !strategy.bulkMode &&
|
|
7505
|
+
shouldUseDirectPeopleLookup({
|
|
7506
|
+
rowCount: missingRows.length
|
|
7507
|
+
});
|
|
7508
|
+
const useWorkflowPeopleLookup = !strategy.bulkMode &&
|
|
7509
|
+
shouldUseWorkflowPeopleLookup({
|
|
7510
|
+
rowCount: missingRows.length
|
|
7511
|
+
});
|
|
4814
7512
|
if (missingRows.length > 0) {
|
|
4815
|
-
|
|
4816
|
-
const directContacts = contacts
|
|
4817
|
-
|
|
4818
|
-
|
|
4819
|
-
|
|
7513
|
+
const rowByContactId = new Map(enrichedRows.map((row) => [row.contactId, row]));
|
|
7514
|
+
const directContacts = contacts
|
|
7515
|
+
.filter((contact) => missingRows.some((row) => row.contactId === contact.contact_id))
|
|
7516
|
+
.map((contact) => {
|
|
7517
|
+
const row = rowByContactId.get(contact.contact_id);
|
|
7518
|
+
if (!row) {
|
|
7519
|
+
return contact;
|
|
7520
|
+
}
|
|
7521
|
+
return {
|
|
7522
|
+
...contact,
|
|
7523
|
+
linkedinCompanyUrl: row.linkedinCompanyUrl ?? contact.linkedinCompanyUrl,
|
|
7524
|
+
companyNameOriginal: row.matchedCompanyName ?? contact.companyNameOriginal,
|
|
7525
|
+
companyName: row.matchedCompanyName && normalizeLookupCompanyForSearch(row.matchedCompanyName)
|
|
7526
|
+
? normalizeLookupCompanyForSearch(row.matchedCompanyName)
|
|
7527
|
+
: contact.companyName
|
|
7528
|
+
};
|
|
7529
|
+
});
|
|
7530
|
+
let linkedInUrlByContactId = new Map();
|
|
7531
|
+
if (useDirectPeopleLookup) {
|
|
7532
|
+
try {
|
|
7533
|
+
directAttempted = true;
|
|
7534
|
+
const result = await invokeLinkedInUrlEnrichmentDirect({
|
|
7535
|
+
contacts: directContacts,
|
|
7536
|
+
timeoutMs
|
|
7537
|
+
});
|
|
7538
|
+
const directCompanyContextByKey = new Map((result.companyContexts ?? []).map((context) => [context.normalizedCompanyKey, context]));
|
|
7539
|
+
linkedInUrlByContactId = new Map(result.contacts.map((contact) => [
|
|
7540
|
+
contact.contact_id,
|
|
7541
|
+
{
|
|
7542
|
+
linkedinUrl: contact.linkedin_url ?? null,
|
|
7543
|
+
salesNavProfileUrl: contact.sales_nav_profile_url ?? null,
|
|
7544
|
+
linkedinCompanyUrl: null,
|
|
7545
|
+
salesNavCompanyUrl: null,
|
|
7546
|
+
matchedFullName: contact.matched_full_name ?? null,
|
|
7547
|
+
matchedCompanyName: contact.matched_company_name ?? null,
|
|
7548
|
+
matchedTitle: contact.matched_title ?? null
|
|
7549
|
+
}
|
|
7550
|
+
]));
|
|
7551
|
+
for (const row of enrichedRows) {
|
|
7552
|
+
if (row.found)
|
|
7553
|
+
continue;
|
|
7554
|
+
const profile = linkedInUrlByContactId.get(row.contactId);
|
|
7555
|
+
if (profile?.linkedinUrl) {
|
|
7556
|
+
row.linkedinUrl = profile.linkedinUrl;
|
|
7557
|
+
row.salesNavProfileUrl = profile.salesNavProfileUrl ?? row.salesNavProfileUrl ?? null;
|
|
7558
|
+
row.found = true;
|
|
7559
|
+
row.source = "linkedin-direct";
|
|
7560
|
+
row.matchedFullName = profile.matchedFullName ?? row.matchedFullName ?? null;
|
|
7561
|
+
row.matchedCompanyName = profile.matchedCompanyName ?? row.matchedCompanyName ?? null;
|
|
7562
|
+
row.matchedTitle = profile.matchedTitle ?? row.matchedTitle ?? null;
|
|
7563
|
+
}
|
|
7564
|
+
const directContact = directContacts.find((candidate) => candidate.contact_id === row.contactId && !candidate.isVariation);
|
|
7565
|
+
const companyContext = directContact
|
|
7566
|
+
? directCompanyContextByKey.get(buildDirectCompanyContextKey(directContact))
|
|
7567
|
+
: null;
|
|
7568
|
+
if (companyContext && !row.linkedinCompanyUrl) {
|
|
7569
|
+
row.linkedinCompanyUrl = companyContext.linkedinCompanyUrl ?? row.linkedinCompanyUrl ?? null;
|
|
7570
|
+
row.salesNavCompanyUrl = companyContext.salesNavCompanyUrl ?? row.salesNavCompanyUrl ?? null;
|
|
7571
|
+
row.companyFound = Boolean(row.linkedinCompanyUrl || row.salesNavCompanyUrl);
|
|
7572
|
+
row.companySource =
|
|
7573
|
+
row.linkedinCompanyUrl || row.salesNavCompanyUrl ? "linkedin-direct" : row.companySource ?? null;
|
|
7574
|
+
row.matchedCompanyName = companyContext.matchedCompanyName ?? row.matchedCompanyName ?? null;
|
|
7575
|
+
row.matchedCompanyEmployeeCount =
|
|
7576
|
+
companyContext.matchedCompanyEmployeeCount ?? row.matchedCompanyEmployeeCount ?? null;
|
|
7577
|
+
}
|
|
7578
|
+
}
|
|
7579
|
+
const contactsStillMissingCompany = contacts.filter((contact) => !contact.isVariation &&
|
|
7580
|
+
enrichedRows.some((row) => row.contactId === contact.contact_id && !row.linkedinCompanyUrl && !row.salesNavCompanyUrl));
|
|
7581
|
+
if (contactsStillMissingCompany.length > 0) {
|
|
7582
|
+
const companyResult = await invokeLinkedInCompanyEnrichmentDirect({
|
|
7583
|
+
contacts: contactsStillMissingCompany,
|
|
7584
|
+
timeoutMs,
|
|
7585
|
+
precomputedContexts: result.companyContexts
|
|
7586
|
+
});
|
|
7587
|
+
const companyByContactId = new Map(companyResult.contacts.map((contact) => [
|
|
7588
|
+
contact.contact_id,
|
|
7589
|
+
{
|
|
7590
|
+
linkedinCompanyUrl: contact.linkedin_company_url ?? null,
|
|
7591
|
+
salesNavCompanyUrl: contact.sales_nav_company_url ?? null,
|
|
7592
|
+
matchedCompanyName: contact.matched_company_name ?? null,
|
|
7593
|
+
matchedCompanyEmployeeCount: contact.matched_company_employee_count ?? null
|
|
7594
|
+
}
|
|
7595
|
+
]));
|
|
7596
|
+
for (const row of enrichedRows) {
|
|
7597
|
+
const company = companyByContactId.get(row.contactId);
|
|
7598
|
+
if (!company || row.linkedinCompanyUrl) {
|
|
7599
|
+
continue;
|
|
7600
|
+
}
|
|
7601
|
+
row.linkedinCompanyUrl = company.linkedinCompanyUrl;
|
|
7602
|
+
row.salesNavCompanyUrl = company.salesNavCompanyUrl ?? row.salesNavCompanyUrl ?? null;
|
|
7603
|
+
row.companyFound = Boolean(company.linkedinCompanyUrl || company.salesNavCompanyUrl);
|
|
7604
|
+
row.companySource =
|
|
7605
|
+
company.linkedinCompanyUrl || company.salesNavCompanyUrl ? "linkedin-direct" : row.companySource ?? null;
|
|
7606
|
+
row.matchedCompanyName = company.matchedCompanyName ?? row.matchedCompanyName ?? null;
|
|
7607
|
+
row.matchedCompanyEmployeeCount =
|
|
7608
|
+
company.matchedCompanyEmployeeCount ?? row.matchedCompanyEmployeeCount ?? null;
|
|
7609
|
+
}
|
|
7610
|
+
}
|
|
7611
|
+
}
|
|
7612
|
+
catch (error) {
|
|
7613
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
7614
|
+
if (!/Missing LinkedIn direct lookup session/i.test(message)) {
|
|
7615
|
+
throw error;
|
|
7616
|
+
}
|
|
7617
|
+
}
|
|
7618
|
+
}
|
|
7619
|
+
const stillMissingAfterDirect = enrichedRows.filter((row) => !row.found);
|
|
7620
|
+
const contactsStillMissing = directContacts.filter((contact) => stillMissingAfterDirect.some((row) => row.contactId === contact.contact_id));
|
|
7621
|
+
if (contactsStillMissing.length > 0 && useWorkflowPeopleLookup) {
|
|
7622
|
+
workflowAttempted = true;
|
|
7623
|
+
try {
|
|
7624
|
+
const workflow = await invokeLinkedInUrlEnrichmentWorkflow({
|
|
7625
|
+
contacts: contactsStillMissing,
|
|
7626
|
+
externalUserId: orgId || sessionOrgId || "cli_direct_lookup",
|
|
7627
|
+
timeoutMs: Math.min(timeoutMs, strategy.workflowStageBudgetMs)
|
|
7628
|
+
});
|
|
7629
|
+
if (!workflow.response.ok) {
|
|
7630
|
+
throw new Error(`LinkedIn enrichment workflow returned ${workflow.response.status}: ${workflow.bodyText.slice(0, 300)}`);
|
|
7631
|
+
}
|
|
7632
|
+
linkedInUrlByContactId = normalizeWorkflowLinkedInUrlResult({
|
|
7633
|
+
parsedBody: workflow.parsedBody,
|
|
7634
|
+
contacts: contactsStillMissing
|
|
7635
|
+
});
|
|
7636
|
+
for (const row of enrichedRows) {
|
|
7637
|
+
if (row.found)
|
|
7638
|
+
continue;
|
|
7639
|
+
const profile = linkedInUrlByContactId.get(row.contactId);
|
|
7640
|
+
if (profile?.linkedinUrl) {
|
|
7641
|
+
row.linkedinUrl = profile.linkedinUrl;
|
|
7642
|
+
row.salesNavProfileUrl = profile.salesNavProfileUrl ?? row.salesNavProfileUrl ?? null;
|
|
7643
|
+
row.linkedinCompanyUrl = profile.linkedinCompanyUrl ?? row.linkedinCompanyUrl ?? null;
|
|
7644
|
+
row.salesNavCompanyUrl = profile.salesNavCompanyUrl ?? row.salesNavCompanyUrl ?? null;
|
|
7645
|
+
row.found = true;
|
|
7646
|
+
row.companyFound = Boolean(row.linkedinCompanyUrl || row.salesNavCompanyUrl);
|
|
7647
|
+
row.source = "workflow";
|
|
7648
|
+
row.companySource =
|
|
7649
|
+
row.linkedinCompanyUrl || row.salesNavCompanyUrl ? "workflow" : row.companySource ?? null;
|
|
7650
|
+
}
|
|
7651
|
+
}
|
|
7652
|
+
}
|
|
7653
|
+
catch (error) {
|
|
7654
|
+
writeProgress(`Skipping workflow profile enrichment: ${error instanceof Error ? error.message : String(error)}`);
|
|
7655
|
+
}
|
|
7656
|
+
}
|
|
7657
|
+
const serperContacts = directContacts.filter((contact) => enrichedRows.some((row) => row.contactId === contact.contact_id && !row.found));
|
|
7658
|
+
if (strategy.bulkMode && serperContacts.length > 0) {
|
|
7659
|
+
writeProgress(`Using bulk profile resolution strategy for ${serperContacts.length} remaining contacts.`);
|
|
7660
|
+
}
|
|
7661
|
+
const serperResults = await resolveSerperLinkedInProfilesInParallel({
|
|
7662
|
+
contacts: serperContacts.filter((contact) => !contact.isVariation),
|
|
7663
|
+
timeoutMs,
|
|
7664
|
+
concurrency: Math.min(strategy.serperConcurrency, serperContacts.length || 1),
|
|
7665
|
+
maxQueries: strategy.serperMaxQueries,
|
|
7666
|
+
overallBudgetMs: strategy.serperStageBudgetMs
|
|
4820
7667
|
});
|
|
4821
|
-
const linkedInUrlByContactId = new Map(result.contacts.map((contact) => [contact.contact_id, contact.linkedin_url]));
|
|
4822
7668
|
for (const row of enrichedRows) {
|
|
4823
7669
|
if (row.found)
|
|
4824
7670
|
continue;
|
|
4825
|
-
const linkedinUrl =
|
|
4826
|
-
if (linkedinUrl)
|
|
4827
|
-
|
|
4828
|
-
|
|
4829
|
-
|
|
7671
|
+
const linkedinUrl = serperResults.get(row.contactId);
|
|
7672
|
+
if (!linkedinUrl)
|
|
7673
|
+
continue;
|
|
7674
|
+
row.linkedinUrl = linkedinUrl;
|
|
7675
|
+
row.found = true;
|
|
7676
|
+
row.source = "web-search";
|
|
7677
|
+
}
|
|
7678
|
+
const stillMissingAfterSerper = enrichedRows.filter((row) => !row.found);
|
|
7679
|
+
if (shouldAttemptBulkDirectProfileLookup({
|
|
7680
|
+
strategy,
|
|
7681
|
+
unresolvedRowCount: stillMissingAfterSerper.length
|
|
7682
|
+
})) {
|
|
7683
|
+
const bulkDirectCandidates = rankContactsForBulkDirectProfileLookup({
|
|
7684
|
+
contacts: directContacts.filter((contact) => stillMissingAfterSerper.some((row) => row.contactId === contact.contact_id)),
|
|
7685
|
+
rowsByContactId: rowByContactId,
|
|
7686
|
+
limit: strategy.bulkDirectProfileMaxRows
|
|
7687
|
+
});
|
|
7688
|
+
if (bulkDirectCandidates.length > 0) {
|
|
7689
|
+
writeProgress(`Using bulk direct profile follow-up for ${bulkDirectCandidates.length} high-signal unresolved contacts.`);
|
|
7690
|
+
try {
|
|
7691
|
+
directAttempted = true;
|
|
7692
|
+
const result = await invokeLinkedInUrlEnrichmentDirect({
|
|
7693
|
+
contacts: bulkDirectCandidates,
|
|
7694
|
+
timeoutMs: strategy.bulkDirectProfileTimeoutMs,
|
|
7695
|
+
perAttemptTimeoutMs: Math.min(strategy.bulkDirectProfileTimeoutMs, 2_500),
|
|
7696
|
+
perContactBudgetMs: strategy.bulkDirectProfileTimeoutMs
|
|
7697
|
+
});
|
|
7698
|
+
const directCompanyContextByKey = new Map((result.companyContexts ?? []).map((context) => [context.normalizedCompanyKey, context]));
|
|
7699
|
+
const bulkDirectByContactId = new Map(result.contacts.map((contact) => [
|
|
7700
|
+
contact.contact_id,
|
|
7701
|
+
{
|
|
7702
|
+
linkedinUrl: contact.linkedin_url ?? null,
|
|
7703
|
+
salesNavProfileUrl: contact.sales_nav_profile_url ?? null
|
|
7704
|
+
}
|
|
7705
|
+
]));
|
|
7706
|
+
for (const row of enrichedRows) {
|
|
7707
|
+
if (row.found)
|
|
7708
|
+
continue;
|
|
7709
|
+
const profile = bulkDirectByContactId.get(row.contactId);
|
|
7710
|
+
if (profile?.linkedinUrl) {
|
|
7711
|
+
row.linkedinUrl = profile.linkedinUrl;
|
|
7712
|
+
row.salesNavProfileUrl = profile.salesNavProfileUrl ?? row.salesNavProfileUrl ?? null;
|
|
7713
|
+
row.found = true;
|
|
7714
|
+
row.source = "linkedin-direct";
|
|
7715
|
+
}
|
|
7716
|
+
const directContact = bulkDirectCandidates.find((candidate) => candidate.contact_id === row.contactId && !candidate.isVariation);
|
|
7717
|
+
const companyContext = directContact
|
|
7718
|
+
? directCompanyContextByKey.get(buildDirectCompanyContextKey(directContact))
|
|
7719
|
+
: null;
|
|
7720
|
+
if (companyContext && !row.linkedinCompanyUrl) {
|
|
7721
|
+
row.linkedinCompanyUrl = companyContext.linkedinCompanyUrl ?? row.linkedinCompanyUrl ?? null;
|
|
7722
|
+
row.salesNavCompanyUrl = companyContext.salesNavCompanyUrl ?? row.salesNavCompanyUrl ?? null;
|
|
7723
|
+
row.companyFound = Boolean(row.linkedinCompanyUrl || row.salesNavCompanyUrl);
|
|
7724
|
+
row.companySource =
|
|
7725
|
+
row.linkedinCompanyUrl || row.salesNavCompanyUrl ? "linkedin-direct" : row.companySource ?? null;
|
|
7726
|
+
row.matchedCompanyName = companyContext.matchedCompanyName ?? row.matchedCompanyName ?? null;
|
|
7727
|
+
row.matchedCompanyEmployeeCount =
|
|
7728
|
+
companyContext.matchedCompanyEmployeeCount ?? row.matchedCompanyEmployeeCount ?? null;
|
|
7729
|
+
}
|
|
7730
|
+
}
|
|
7731
|
+
}
|
|
7732
|
+
catch (error) {
|
|
7733
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
7734
|
+
if (!/Missing LinkedIn direct lookup session/i.test(message)) {
|
|
7735
|
+
writeProgress(`Skipping bulk direct profile follow-up: ${message}`);
|
|
7736
|
+
}
|
|
7737
|
+
}
|
|
7738
|
+
}
|
|
7739
|
+
}
|
|
7740
|
+
}
|
|
7741
|
+
const payload = {
|
|
7742
|
+
status: "ok",
|
|
7743
|
+
orgId: String(options.orgId ?? "").trim() || null,
|
|
7744
|
+
requested: rows.length,
|
|
7745
|
+
found: enrichedRows.filter((row) => row.found).length,
|
|
7746
|
+
companiesFound: enrichedRows.filter((row) => row.companyFound).length,
|
|
7747
|
+
directAttempted,
|
|
7748
|
+
workflowAttempted,
|
|
7749
|
+
bulkMode: strategy.bulkMode,
|
|
7750
|
+
rows: enrichedRows
|
|
7751
|
+
};
|
|
7752
|
+
for (const row of enrichedRows) {
|
|
7753
|
+
const contact = contactById.get(row.contactId);
|
|
7754
|
+
upsertLinkedInProfileHitCacheEntry({
|
|
7755
|
+
fullName: row.fullName,
|
|
7756
|
+
companyName: row.companyName,
|
|
7757
|
+
email: contact?.email,
|
|
7758
|
+
contactId: row.contactId,
|
|
7759
|
+
linkedinUrl: row.linkedinUrl ?? null,
|
|
7760
|
+
salesNavProfileUrl: row.salesNavProfileUrl ?? null,
|
|
7761
|
+
linkedinCompanyUrl: row.linkedinCompanyUrl ?? null,
|
|
7762
|
+
salesNavCompanyUrl: row.salesNavCompanyUrl ?? null
|
|
7763
|
+
});
|
|
7764
|
+
}
|
|
7765
|
+
await persistLinkedInProfileHitCache();
|
|
7766
|
+
if (options.out) {
|
|
7767
|
+
await writeJsonFile(options.out, payload);
|
|
7768
|
+
}
|
|
7769
|
+
printOutput(payload);
|
|
7770
|
+
});
|
|
7771
|
+
program
|
|
7772
|
+
.command("companies:find-linkedin-urls")
|
|
7773
|
+
.alias("companies:resolve-linkedin-urls")
|
|
7774
|
+
.description("Resolve LinkedIn company URLs from a pasted company list directly in the CLI.")
|
|
7775
|
+
.option("--in <path>", "Input TSV/CSV/JSON file path. Omit to read from stdin.")
|
|
7776
|
+
.option("--out <path>", "Optional output JSON path for the enriched rows.")
|
|
7777
|
+
.option("--client-id <id>", "Optional clientId override for app-backed enrichment.")
|
|
7778
|
+
.option("--timeout-ms <number>", "Lookup timeout in milliseconds", "30000")
|
|
7779
|
+
.option("--company-cleaning <mode>", "Company cleaning mode: off, basic, or ai", "basic")
|
|
7780
|
+
.option("--dry-run", "Preview the normalized payload without calling LinkedIn", false)
|
|
7781
|
+
.action(async (options) => {
|
|
7782
|
+
const timeoutMs = z.coerce.number().int().min(1000).max(300000).parse(options.timeoutMs);
|
|
7783
|
+
const inputContent = options.in ? await readFile(options.in, "utf8") : await readAllStdin();
|
|
7784
|
+
const rows = parseLinkedInCompanyLookupInput(inputContent);
|
|
7785
|
+
if (rows.length === 0) {
|
|
7786
|
+
throw new Error("No company rows found. Provide TSV/CSV/JSON input via --in or stdin.");
|
|
7787
|
+
}
|
|
7788
|
+
let authSession = null;
|
|
7789
|
+
if (!shouldBypassAuth()) {
|
|
7790
|
+
authSession = await requireAuthSession().catch(() => null);
|
|
7791
|
+
}
|
|
7792
|
+
const companyCleaningMode = resolveCompanyCleaningMode(String(options.companyCleaning ?? process.env.SALESPROMPTER_COMPANY_CLEANING_MODE ?? "basic"));
|
|
7793
|
+
const lookupRows = rows.map((row) => ({
|
|
7794
|
+
clientId: row.clientId,
|
|
7795
|
+
fullName: "",
|
|
7796
|
+
companyName: row.companyName
|
|
7797
|
+
}));
|
|
7798
|
+
const cleanedCompanyMap = await buildCompanyNameCleaningMap(lookupRows, companyCleaningMode);
|
|
7799
|
+
const contacts = toLinkedInUrlLookupContacts(lookupRows, cleanedCompanyMap);
|
|
7800
|
+
if (options.dryRun) {
|
|
7801
|
+
const payload = {
|
|
7802
|
+
status: "ok",
|
|
7803
|
+
dryRun: true,
|
|
7804
|
+
companyCleaningMode,
|
|
7805
|
+
companies: contacts.length,
|
|
7806
|
+
sample: contacts.slice(0, 5).map((contact) => ({
|
|
7807
|
+
companyId: contact.contact_id,
|
|
7808
|
+
companyName: contact.companyNameOriginal ?? contact.companyName,
|
|
7809
|
+
companyNameCleaned: contact.companyName
|
|
7810
|
+
}))
|
|
7811
|
+
};
|
|
7812
|
+
if (options.out) {
|
|
7813
|
+
await writeJsonFile(options.out, payload);
|
|
7814
|
+
}
|
|
7815
|
+
printOutput(payload);
|
|
7816
|
+
return;
|
|
7817
|
+
}
|
|
7818
|
+
const clientId = resolveDirectEmailEnrichmentClientId(rows.map((row) => ({
|
|
7819
|
+
clientId: row.clientId,
|
|
7820
|
+
companyName: row.companyName,
|
|
7821
|
+
fullName: ""
|
|
7822
|
+
})), options.clientId);
|
|
7823
|
+
const results = contacts
|
|
7824
|
+
.filter((contact) => !contact.isVariation)
|
|
7825
|
+
.map((contact) => ({
|
|
7826
|
+
clientId: String(clientId),
|
|
7827
|
+
companyName: contact.companyNameOriginal ?? contact.companyName,
|
|
7828
|
+
linkedinCompanyUrl: null,
|
|
7829
|
+
salesNavCompanyUrl: null,
|
|
7830
|
+
domain: null,
|
|
7831
|
+
found: false,
|
|
7832
|
+
source: null,
|
|
7833
|
+
matchedCompanyName: null,
|
|
7834
|
+
matchedCompanyEmployeeCount: null
|
|
7835
|
+
}));
|
|
7836
|
+
const resultByNormalizedName = new Map(results.map((row) => [normalizeLookupCompanyForCleaning(row.companyName), row]));
|
|
7837
|
+
if (authSession) {
|
|
7838
|
+
try {
|
|
7839
|
+
const uniqueCompanies = contacts
|
|
7840
|
+
.filter((contact) => !contact.isVariation)
|
|
7841
|
+
.map((contact) => ({
|
|
7842
|
+
companyId: contact.contact_id,
|
|
7843
|
+
companyName: contact.companyNameOriginal ?? contact.companyName,
|
|
7844
|
+
companyNameCleaned: cleanedCompanyMap.get(normalizeLookupCompanyForCleaning(contact.companyNameOriginal ?? contact.companyName)) ?? normalizeLookupWhitespace(contact.companyNameOriginal ?? contact.companyName)
|
|
7845
|
+
}));
|
|
7846
|
+
if (uniqueCompanies.length > 0) {
|
|
7847
|
+
const enrichedCompanies = await enrichDirectEmailCompaniesViaApp(authSession, {
|
|
7848
|
+
clientId,
|
|
7849
|
+
companies: uniqueCompanies
|
|
7850
|
+
});
|
|
7851
|
+
for (const company of enrichedCompanies.companies) {
|
|
7852
|
+
const row = resultByNormalizedName.get(normalizeLookupCompanyForCleaning(company.companyName));
|
|
7853
|
+
if (!row) {
|
|
7854
|
+
continue;
|
|
7855
|
+
}
|
|
7856
|
+
row.domain = company.domain ?? row.domain ?? null;
|
|
7857
|
+
row.linkedinCompanyUrl = company.linkedinCompanyPage ?? row.linkedinCompanyUrl ?? null;
|
|
7858
|
+
row.found = Boolean(row.linkedinCompanyUrl || row.salesNavCompanyUrl);
|
|
7859
|
+
row.source = row.linkedinCompanyUrl ? "app" : row.source;
|
|
7860
|
+
}
|
|
4830
7861
|
}
|
|
4831
7862
|
}
|
|
7863
|
+
catch {
|
|
7864
|
+
// Ignore app failures here and keep falling back to direct or public lookup.
|
|
7865
|
+
}
|
|
4832
7866
|
}
|
|
4833
7867
|
try {
|
|
4834
7868
|
const companyResult = await invokeLinkedInCompanyEnrichmentDirect({
|
|
4835
7869
|
contacts,
|
|
4836
7870
|
timeoutMs
|
|
4837
7871
|
});
|
|
4838
|
-
const companyByContactId = new Map(companyResult.contacts.map((contact) => [
|
|
4839
|
-
|
|
4840
|
-
{
|
|
4841
|
-
|
|
4842
|
-
matchedCompanyName: contact.matched_company_name ?? null,
|
|
4843
|
-
matchedCompanyEmployeeCount: contact.matched_company_employee_count ?? null
|
|
7872
|
+
const companyByContactId = new Map(companyResult.contacts.map((contact) => [contact.contact_id, contact]));
|
|
7873
|
+
for (const contact of contacts) {
|
|
7874
|
+
if (contact.isVariation) {
|
|
7875
|
+
continue;
|
|
4844
7876
|
}
|
|
4845
|
-
|
|
4846
|
-
|
|
4847
|
-
|
|
4848
|
-
if (!company || row.linkedinCompanyUrl) {
|
|
7877
|
+
const row = resultByNormalizedName.get(normalizeLookupCompanyForCleaning(contact.companyNameOriginal ?? contact.companyName));
|
|
7878
|
+
const company = companyByContactId.get(contact.contact_id);
|
|
7879
|
+
if (!row || !company) {
|
|
4849
7880
|
continue;
|
|
4850
7881
|
}
|
|
4851
|
-
row.linkedinCompanyUrl
|
|
4852
|
-
|
|
4853
|
-
|
|
4854
|
-
row.
|
|
7882
|
+
if (!row.linkedinCompanyUrl && company.linkedin_company_url) {
|
|
7883
|
+
row.linkedinCompanyUrl = company.linkedin_company_url;
|
|
7884
|
+
}
|
|
7885
|
+
if (!row.salesNavCompanyUrl && company.sales_nav_company_url) {
|
|
7886
|
+
row.salesNavCompanyUrl = company.sales_nav_company_url;
|
|
7887
|
+
}
|
|
7888
|
+
row.matchedCompanyName = company.matched_company_name ?? row.matchedCompanyName ?? null;
|
|
4855
7889
|
row.matchedCompanyEmployeeCount =
|
|
4856
|
-
company.
|
|
7890
|
+
company.matched_company_employee_count ?? row.matchedCompanyEmployeeCount ?? null;
|
|
7891
|
+
if ((company.linkedin_company_url || company.sales_nav_company_url) && row.source == null) {
|
|
7892
|
+
row.source = "linkedin-direct";
|
|
7893
|
+
}
|
|
7894
|
+
row.found = Boolean(row.linkedinCompanyUrl || row.salesNavCompanyUrl);
|
|
4857
7895
|
}
|
|
4858
7896
|
}
|
|
4859
7897
|
catch (error) {
|
|
4860
|
-
|
|
7898
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
7899
|
+
if (!/Missing LinkedIn direct lookup session/i.test(message)) {
|
|
7900
|
+
throw error;
|
|
7901
|
+
}
|
|
7902
|
+
}
|
|
7903
|
+
for (const row of results) {
|
|
7904
|
+
if (row.linkedinCompanyUrl) {
|
|
7905
|
+
continue;
|
|
7906
|
+
}
|
|
7907
|
+
const linkedinCompanyUrl = (await searchSerperLinkedInCompanyUrl(row.companyName, timeoutMs)) ??
|
|
7908
|
+
(await searchPublicLinkedInCompanyUrl(row.companyName, timeoutMs));
|
|
7909
|
+
if (!linkedinCompanyUrl) {
|
|
7910
|
+
continue;
|
|
7911
|
+
}
|
|
7912
|
+
row.linkedinCompanyUrl = linkedinCompanyUrl;
|
|
7913
|
+
row.found = true;
|
|
7914
|
+
if (row.source == null) {
|
|
7915
|
+
row.source = "web-search";
|
|
7916
|
+
}
|
|
4861
7917
|
}
|
|
4862
7918
|
const payload = {
|
|
4863
7919
|
status: "ok",
|
|
4864
|
-
orgId: String(options.orgId ?? "").trim() || null,
|
|
4865
7920
|
requested: rows.length,
|
|
4866
|
-
found:
|
|
4867
|
-
|
|
4868
|
-
directAttempted,
|
|
4869
|
-
rows: enrichedRows
|
|
7921
|
+
found: results.filter((row) => row.found).length,
|
|
7922
|
+
rows: results
|
|
4870
7923
|
};
|
|
4871
7924
|
if (options.out) {
|
|
4872
7925
|
await writeJsonFile(options.out, payload);
|
|
@@ -4893,6 +7946,7 @@ program.hook("preAction", async (_thisCommand, actionCommand) => {
|
|
|
4893
7946
|
commandName === "wizard" ||
|
|
4894
7947
|
commandName === "llm:ready" ||
|
|
4895
7948
|
commandName === "contacts:find-linkedin-urls" ||
|
|
7949
|
+
commandName === "companies:find-linkedin-urls" ||
|
|
4896
7950
|
commandName.startsWith("packs:") ||
|
|
4897
7951
|
((commandName === "list" || commandName === "add") && parentCommandName === "packs")) {
|
|
4898
7952
|
return;
|
|
@@ -5046,12 +8100,14 @@ program
|
|
|
5046
8100
|
});
|
|
5047
8101
|
program
|
|
5048
8102
|
.command("leads:generate")
|
|
5049
|
-
.description("Generate leads
|
|
8103
|
+
.description("Generate leads from your Salesprompter workspace when authenticated, or from fallback seeds.")
|
|
5050
8104
|
.requiredOption("--icp <path>", "Path to ICP JSON")
|
|
5051
8105
|
.option("--count <number>", "Number of leads to generate", "10")
|
|
5052
8106
|
.option("--domain <domain>", "Target a specific company domain like company.com")
|
|
5053
8107
|
.option("--company-domain <domain>", "Deprecated alias for --domain")
|
|
5054
8108
|
.option("--company-name <name>", "Optional company name override for a targeted domain")
|
|
8109
|
+
.option("--linkedin-company-page <url>", "LinkedIn company page to target when the domain is unknown")
|
|
8110
|
+
.option("--source <source>", "auto|workspace|fallback", "auto")
|
|
5055
8111
|
.requiredOption("--out <path>", "Output file path")
|
|
5056
8112
|
.action(async (options) => {
|
|
5057
8113
|
const icp = await readJsonFile(options.icp, IcpSchema);
|
|
@@ -5059,9 +8115,15 @@ program
|
|
|
5059
8115
|
const domain = options.domain ?? options.companyDomain;
|
|
5060
8116
|
const target = {
|
|
5061
8117
|
companyDomain: domain,
|
|
5062
|
-
companyName: options.companyName
|
|
8118
|
+
companyName: options.companyName,
|
|
8119
|
+
linkedinCompanyPage: options.linkedinCompanyPage
|
|
5063
8120
|
};
|
|
5064
|
-
const result = await
|
|
8121
|
+
const result = await generateLeadsForCommand({
|
|
8122
|
+
icp,
|
|
8123
|
+
count,
|
|
8124
|
+
target,
|
|
8125
|
+
source: options.source
|
|
8126
|
+
});
|
|
5065
8127
|
await writeJsonFile(options.out, result.leads);
|
|
5066
8128
|
printOutput({
|
|
5067
8129
|
status: "ok",
|
|
@@ -5106,6 +8168,8 @@ program
|
|
|
5106
8168
|
.option("--domain <domain>", "Target a specific company domain like company.com")
|
|
5107
8169
|
.option("--company-domain <domain>", "Deprecated alias for --domain")
|
|
5108
8170
|
.option("--company-name <name>", "Optional company name override for a targeted domain")
|
|
8171
|
+
.option("--linkedin-company-page <url>", "LinkedIn company page to target when the domain is unknown")
|
|
8172
|
+
.option("--source <source>", "auto|workspace|fallback", "auto")
|
|
5109
8173
|
.option("--out-prefix <path>", "Output path prefix (writes <prefix>-leads.json, <prefix>-enriched.json, <prefix>-scored.json)", "./data/leads-pipeline")
|
|
5110
8174
|
.action(async (options) => {
|
|
5111
8175
|
const icp = await readJsonFile(options.icp, IcpSchema);
|
|
@@ -5113,13 +8177,19 @@ program
|
|
|
5113
8177
|
const domain = options.domain ?? options.companyDomain;
|
|
5114
8178
|
const target = {
|
|
5115
8179
|
companyDomain: domain,
|
|
5116
|
-
companyName: options.companyName
|
|
8180
|
+
companyName: options.companyName,
|
|
8181
|
+
linkedinCompanyPage: options.linkedinCompanyPage
|
|
5117
8182
|
};
|
|
5118
8183
|
const outPrefix = String(options.outPrefix);
|
|
5119
8184
|
const leadsOut = `${outPrefix}-leads.json`;
|
|
5120
8185
|
const enrichedOut = `${outPrefix}-enriched.json`;
|
|
5121
8186
|
const scoredOut = `${outPrefix}-scored.json`;
|
|
5122
|
-
const generated = await
|
|
8187
|
+
const generated = await generateLeadsForCommand({
|
|
8188
|
+
icp,
|
|
8189
|
+
count,
|
|
8190
|
+
target,
|
|
8191
|
+
source: options.source
|
|
8192
|
+
});
|
|
5123
8193
|
await writeJsonFile(leadsOut, generated.leads);
|
|
5124
8194
|
const enriched = await enrichmentProvider.enrichLeads(generated.leads);
|
|
5125
8195
|
await writeJsonFile(enrichedOut, enriched);
|
|
@@ -5176,16 +8246,21 @@ program
|
|
|
5176
8246
|
.command("linkedin-companies:backfill")
|
|
5177
8247
|
.alias("companies:enrich")
|
|
5178
8248
|
.description("Backfill missing or unavailable company profiles for the current workspace.")
|
|
5179
|
-
.
|
|
8249
|
+
.option("--client-id <number>", "Legacy BigQuery clientId to backfill (optional if set in cache or env)")
|
|
5180
8250
|
.option("--limit <number>", "Maximum companies to scrape in one run", "25")
|
|
5181
8251
|
.option("--concurrency <number>", "How many LinkedIn company pages to scrape in parallel", "4")
|
|
5182
8252
|
.option("--dry-run", "Preview the scrape result and generated MERGE SQL without writing to BigQuery", false)
|
|
5183
8253
|
.action(async (options) => {
|
|
5184
|
-
const
|
|
8254
|
+
const authenticatedRun = !shouldBypassAuth() && !options.dryRun;
|
|
8255
|
+
const session = authenticatedRun ? await requireAuthSession() : undefined;
|
|
8256
|
+
const clientId = await resolveLinkedInCompanyBackfillClientId({
|
|
8257
|
+
clientIdOption: options.clientId,
|
|
8258
|
+
session
|
|
8259
|
+
});
|
|
5185
8260
|
const limit = z.coerce.number().int().min(1).max(500).parse(options.limit);
|
|
5186
8261
|
const concurrency = z.coerce.number().int().min(1).max(20).parse(options.concurrency);
|
|
5187
|
-
|
|
5188
|
-
|
|
8262
|
+
await writeLinkedInCompanyBackfillClientIdToCache(clientId, session);
|
|
8263
|
+
if (authenticatedRun && session) {
|
|
5189
8264
|
const drained = await drainLinkedInCompanyBackfill(session, {
|
|
5190
8265
|
clientId,
|
|
5191
8266
|
limit
|
|
@@ -5852,6 +8927,7 @@ program
|
|
|
5852
8927
|
.option("--max-results-per-search <number>", "Maximum results allowed for a sliced search", "2500")
|
|
5853
8928
|
.option("--number-of-profiles <number>", "Profiles to export per sliced query", "2500")
|
|
5854
8929
|
.option("--slice-preset <name>", "Slice preset label stored with the export runs", "human-resources-crawl")
|
|
8930
|
+
.option("--client-id <number>", "Client id used to generate and store the legacy Neon lead list projection")
|
|
5855
8931
|
.option("--max-split-depth <number>", "Maximum number of adaptive split dimensions to use", "6")
|
|
5856
8932
|
.option("--max-slices <number>", "Safety cap for total claimed slices in this invocation", "1000")
|
|
5857
8933
|
.option("--max-retries <number>", "Retries for non-splitting export failures", "3")
|
|
@@ -5870,6 +8946,7 @@ program
|
|
|
5870
8946
|
const jobId = z.string().uuid().optional().parse(options.jobId);
|
|
5871
8947
|
const maxResultsPerSearch = z.coerce.number().int().min(1).max(2500).parse(options.maxResultsPerSearch);
|
|
5872
8948
|
const numberOfProfiles = z.coerce.number().int().min(1).max(2500).parse(options.numberOfProfiles);
|
|
8949
|
+
const clientId = parseOptionalSalesNavigatorClientId(options.clientId);
|
|
5873
8950
|
const maxSplitDepth = z.coerce.number().int().min(1).max(6).parse(options.maxSplitDepth);
|
|
5874
8951
|
const maxSlices = z.coerce.number().int().min(1).max(10000).parse(options.maxSlices);
|
|
5875
8952
|
const maxRetries = z.coerce.number().int().min(0).max(5).parse(options.maxRetries);
|
|
@@ -5889,6 +8966,7 @@ program
|
|
|
5889
8966
|
jobId: jobId ?? null,
|
|
5890
8967
|
maxResultsPerSearch,
|
|
5891
8968
|
numberOfProfiles,
|
|
8969
|
+
clientId,
|
|
5892
8970
|
slicePreset: options.slicePreset,
|
|
5893
8971
|
maxSplitDepth,
|
|
5894
8972
|
maxSlices,
|
|
@@ -5989,6 +9067,7 @@ program
|
|
|
5989
9067
|
traceId: logger.traceId,
|
|
5990
9068
|
command: {
|
|
5991
9069
|
sourceQueryUrl: queryUrl,
|
|
9070
|
+
clientId,
|
|
5992
9071
|
slicePreset: options.slicePreset,
|
|
5993
9072
|
maxResultsPerSearch,
|
|
5994
9073
|
numberOfProfiles,
|
|
@@ -6010,6 +9089,7 @@ program
|
|
|
6010
9089
|
splitTrail: seed.splitTrail,
|
|
6011
9090
|
rawPayload: {
|
|
6012
9091
|
workflow: "salesnav:crawl",
|
|
9092
|
+
clientId,
|
|
6013
9093
|
traceId: logger.traceId
|
|
6014
9094
|
}
|
|
6015
9095
|
}
|
|
@@ -6049,6 +9129,7 @@ program
|
|
|
6049
9129
|
idlePollSeconds,
|
|
6050
9130
|
idleMaxPolls,
|
|
6051
9131
|
parallelExports,
|
|
9132
|
+
clientId,
|
|
6052
9133
|
traceId: logger.traceId,
|
|
6053
9134
|
logger
|
|
6054
9135
|
});
|
|
@@ -6129,6 +9210,43 @@ program
|
|
|
6129
9210
|
recentEvents
|
|
6130
9211
|
});
|
|
6131
9212
|
});
|
|
9213
|
+
program
|
|
9214
|
+
.command("phantombuster:containers:sync")
|
|
9215
|
+
.alias("pb:containers:sync")
|
|
9216
|
+
.description("Fetch Phantombuster containers for configured agents and store them in Neon.")
|
|
9217
|
+
.option("--agent-id <id>", "Phantombuster agent id to sync. Repeat to sync multiple agents.", collectStringOptionValue, [])
|
|
9218
|
+
.option("--limit <number>", "Maximum containers to fetch per Phantombuster page", "100")
|
|
9219
|
+
.option("--max-pages <number>", "Maximum Phantombuster pages to fetch per agent", "50")
|
|
9220
|
+
.option("--mode <mode>", "Phantombuster container mode: all or finalized", "all")
|
|
9221
|
+
.option("--before-ended-at <iso>", "Only fetch containers that ended before this ISO timestamp")
|
|
9222
|
+
.option("--metadata-only", "Store container metadata without fetching output and result objects", false)
|
|
9223
|
+
.option("--out <path>", "Optional local JSON output path")
|
|
9224
|
+
.action(async (options) => {
|
|
9225
|
+
const agentIds = z.array(z.string().min(1)).parse(options.agentId);
|
|
9226
|
+
const limit = z.coerce.number().int().min(1).max(500).parse(options.limit);
|
|
9227
|
+
const maxPages = z.coerce.number().int().min(1).max(500).parse(options.maxPages);
|
|
9228
|
+
const mode = z.enum(["all", "finalized"]).parse(options.mode);
|
|
9229
|
+
const beforeEndedAt = options.beforeEndedAt
|
|
9230
|
+
? z.string().datetime().parse(options.beforeEndedAt)
|
|
9231
|
+
: undefined;
|
|
9232
|
+
const session = await requireAuthSession();
|
|
9233
|
+
const result = await syncPhantombusterContainersViaApp(session, {
|
|
9234
|
+
agentIds: agentIds.length > 0 ? agentIds : undefined,
|
|
9235
|
+
limit,
|
|
9236
|
+
maxPages,
|
|
9237
|
+
mode,
|
|
9238
|
+
beforeEndedAt,
|
|
9239
|
+
includeResults: !options.metadataOnly
|
|
9240
|
+
});
|
|
9241
|
+
const payload = {
|
|
9242
|
+
...result,
|
|
9243
|
+
dryRun: false
|
|
9244
|
+
};
|
|
9245
|
+
if (options.out) {
|
|
9246
|
+
await writeJsonFile(options.out, payload);
|
|
9247
|
+
}
|
|
9248
|
+
printOutput(payload);
|
|
9249
|
+
});
|
|
6132
9250
|
program
|
|
6133
9251
|
.command("salesnav:export")
|
|
6134
9252
|
.alias("search:export")
|
|
@@ -6137,12 +9255,18 @@ program
|
|
|
6137
9255
|
.option("--max-results-per-search <number>", "Maximum results allowed for a sliced search", "2500")
|
|
6138
9256
|
.option("--number-of-profiles <number>", "Profiles to export per sliced query", "2500")
|
|
6139
9257
|
.option("--slice-preset <name>", "Slice preset label stored with the export run", "human-resources-default")
|
|
9258
|
+
.option("--client-id <number>", "Client id used to generate and store the legacy Neon lead list projection")
|
|
9259
|
+
.option("--agent-busy-wait-seconds <number>", "Seconds to wait before retrying when the export agent is already busy", "30")
|
|
9260
|
+
.option("--agent-busy-max-waits <number>", "How many busy-agent waits to tolerate before failing the export", "20")
|
|
6140
9261
|
.option("--out <path>", "Optional local JSON output path")
|
|
6141
9262
|
.option("--dry-run", "Only generate sliced query URLs without exporting them", false)
|
|
6142
9263
|
.action(async (options) => {
|
|
6143
9264
|
const queryUrls = z.array(z.string().url()).min(1).parse(options.queryUrl);
|
|
6144
9265
|
const maxResultsPerSearch = z.coerce.number().int().min(1).max(2500).parse(options.maxResultsPerSearch);
|
|
6145
9266
|
const numberOfProfiles = z.coerce.number().int().min(1).max(2500).parse(options.numberOfProfiles);
|
|
9267
|
+
const agentBusyWaitSeconds = z.coerce.number().int().min(1).max(300).parse(options.agentBusyWaitSeconds);
|
|
9268
|
+
const agentBusyMaxWaits = z.coerce.number().int().min(0).max(100).parse(options.agentBusyMaxWaits);
|
|
9269
|
+
const clientId = parseOptionalSalesNavigatorClientId(options.clientId);
|
|
6146
9270
|
const prepared = queryUrls.map((queryUrl) => buildSalesNavigatorPeopleSlice(queryUrl));
|
|
6147
9271
|
const effectiveDryRun = Boolean(options.dryRun || shouldBypassAuth());
|
|
6148
9272
|
if (effectiveDryRun) {
|
|
@@ -6164,10 +9288,10 @@ program
|
|
|
6164
9288
|
printOutput(payload);
|
|
6165
9289
|
return;
|
|
6166
9290
|
}
|
|
6167
|
-
|
|
9291
|
+
let session = await requireAuthSession();
|
|
6168
9292
|
const exported = [];
|
|
6169
9293
|
for (const item of prepared) {
|
|
6170
|
-
const result = await
|
|
9294
|
+
const result = await runSalesNavigatorExportWithAgentWait(session, {
|
|
6171
9295
|
sourceQueryUrl: item.sourceQueryUrl,
|
|
6172
9296
|
slicedQueryUrl: item.slicedQueryUrl,
|
|
6173
9297
|
appliedFilters: item.appliedFilters,
|
|
@@ -6176,12 +9300,17 @@ program
|
|
|
6176
9300
|
slicePreset: options.slicePreset,
|
|
6177
9301
|
rawPayload: {
|
|
6178
9302
|
workflow: "salesnav:export",
|
|
9303
|
+
clientId,
|
|
6179
9304
|
sourceQueryUrl: item.sourceQueryUrl,
|
|
6180
9305
|
slicedQueryUrl: item.slicedQueryUrl,
|
|
6181
9306
|
appliedFilters: item.appliedFilters
|
|
6182
9307
|
}
|
|
9308
|
+
}, {
|
|
9309
|
+
waitSeconds: agentBusyWaitSeconds,
|
|
9310
|
+
maxWaits: agentBusyMaxWaits
|
|
6183
9311
|
});
|
|
6184
9312
|
exported.push(result);
|
|
9313
|
+
session = await requireAuthSession();
|
|
6185
9314
|
}
|
|
6186
9315
|
const payload = {
|
|
6187
9316
|
status: "ok",
|
|
@@ -6983,7 +10112,17 @@ async function main() {
|
|
|
6983
10112
|
}
|
|
6984
10113
|
await program.parseAsync(process.argv);
|
|
6985
10114
|
}
|
|
6986
|
-
|
|
10115
|
+
async function closeGlobalHttpDispatcher() {
|
|
10116
|
+
try {
|
|
10117
|
+
const undici = await import("undici");
|
|
10118
|
+
await undici.getGlobalDispatcher().close();
|
|
10119
|
+
}
|
|
10120
|
+
catch {
|
|
10121
|
+
// Best-effort shutdown for keep-alive sockets; ignore when undici is unavailable.
|
|
10122
|
+
}
|
|
10123
|
+
}
|
|
10124
|
+
main()
|
|
10125
|
+
.catch((error) => {
|
|
6987
10126
|
if (error instanceof Error &&
|
|
6988
10127
|
(error.message === "prompt cancelled" || error.message === "readline was closed")) {
|
|
6989
10128
|
process.exitCode = 130;
|
|
@@ -6998,4 +10137,8 @@ main().catch((error) => {
|
|
|
6998
10137
|
process.stderr.write(`${cliError.message}\n`);
|
|
6999
10138
|
}
|
|
7000
10139
|
process.exitCode = exitCodeForError(cliError.code);
|
|
10140
|
+
})
|
|
10141
|
+
.finally(async () => {
|
|
10142
|
+
await closeGlobalHttpDispatcher();
|
|
10143
|
+
process.exit(process.exitCode ?? 0);
|
|
7001
10144
|
});
|