salesprompter-cli 0.1.27 → 0.1.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CODE_OF_CONDUCT.md +35 -0
- package/CONTRIBUTING.md +89 -0
- package/README.md +29 -1
- package/SECURITY.md +35 -0
- package/dist/cli.js +1213 -62
- package/dist/domainfinder.js +132 -0
- package/dist/linkedin-companies.js +3 -3
- package/dist/linkedin-products.js +2 -2
- package/dist/linkedin-session-contracts.js +3 -0
- package/dist/linkedin-session.js +8 -9
- package/dist/vendor/salesprompter-shared/extension-session-contracts.js +29 -0
- package/dist/vendor/salesprompter-shared/linkedin-session.js +22 -0
- package/dist/vendor/salesprompter-shared/phantombuster-contracts.js +16 -0
- package/dist/vendor/salesprompter-shared/session-vault-contracts.js +17 -0
- package/package.json +16 -4
- package/dist/hunter-emails.js +0 -291
package/dist/cli.js
CHANGED
|
@@ -25,7 +25,7 @@ import { InstantlySyncProvider } from "./instantly.js";
|
|
|
25
25
|
import { backfillLinkedInCompanies } from "./linkedin-companies.js";
|
|
26
26
|
import { parseLinkedInCompanyPage } from "./linkedin-companies.js";
|
|
27
27
|
import { crawlLinkedInProductCategory } from "./linkedin-products.js";
|
|
28
|
-
import { claimValidatedSalesNavigatorSessionCookieForCli, createLinkedInSessionSupabaseClient } from "./linkedin-session.js";
|
|
28
|
+
import { claimValidatedSalesNavigatorSessionCookieForCli, createLinkedInSessionSupabaseClient, resolveConfiguredEnvValue } from "./linkedin-session.js";
|
|
29
29
|
import { buildLeadlistsFunnelQueries } from "./leadlists-funnel.js";
|
|
30
30
|
import { readJsonFile, splitCsv, writeJsonFile, writeTextFile } from "./io.js";
|
|
31
31
|
import { buildSalesNavigatorCrawlPreview, createSalesNavigatorCrawlSeed, DEFAULT_SALES_NAVIGATOR_CRAWL_DIMENSIONS, buildSalesNavigatorPeopleSlice, deriveSalesNavigatorTitleQuerySeeds, expandSalesNavigatorCrawlAttempt, SalesNavigatorSliceTooBroadError } from "./sales-navigator.js";
|
|
@@ -42,6 +42,14 @@ const runtimeOutputOptions = {
|
|
|
42
42
|
quiet: false
|
|
43
43
|
};
|
|
44
44
|
const nullableOptionalString = z.string().min(1).nullish().transform((value) => value ?? undefined);
|
|
45
|
+
const LinkedInCompanyBackfillClientIdStateSchema = z
|
|
46
|
+
.object({
|
|
47
|
+
clientId: z.number().int().positive(),
|
|
48
|
+
userId: z.string().optional(),
|
|
49
|
+
orgId: z.string().optional(),
|
|
50
|
+
updatedAt: z.string().datetime()
|
|
51
|
+
})
|
|
52
|
+
.passthrough();
|
|
45
53
|
const WorkspaceLeadSchema = LeadSchema.extend({
|
|
46
54
|
companySize: nullableOptionalString.optional(),
|
|
47
55
|
country: nullableOptionalString.optional()
|
|
@@ -66,6 +74,9 @@ const LinkedInCompanyBackfillLaunchResponseSchema = z.object({
|
|
|
66
74
|
webhookUrl: z.string().url(),
|
|
67
75
|
inputUrl: z.string().url().nullable(),
|
|
68
76
|
containerId: z.string().min(1).nullable(),
|
|
77
|
+
selectedSessionCookieSha256: z.string().min(1).nullable().optional(),
|
|
78
|
+
selectedSessionUserEmail: z.string().min(1).nullable().optional(),
|
|
79
|
+
selectedSessionUserHandle: z.string().min(1).nullable().optional(),
|
|
69
80
|
candidates: z.array(z.object({
|
|
70
81
|
companyId: z.number().int().positive(),
|
|
71
82
|
companyUrl: z.string().url(),
|
|
@@ -78,7 +89,10 @@ const LinkedInCompanyBackfillStatusResponseSchema = z.object({
|
|
|
78
89
|
containerId: z.string().min(1),
|
|
79
90
|
running: z.boolean(),
|
|
80
91
|
processed: z.boolean(),
|
|
81
|
-
remaining: z.number().int().nonnegative()
|
|
92
|
+
remaining: z.number().int().nonnegative(),
|
|
93
|
+
failed: z.boolean().default(false),
|
|
94
|
+
failureCode: z.string().nullable().optional(),
|
|
95
|
+
failureMessage: z.string().nullable().optional()
|
|
82
96
|
});
|
|
83
97
|
const CliEmailEnrichmentCompaniesResponseSchema = z.object({
|
|
84
98
|
clientId: z.number().int().positive(),
|
|
@@ -280,6 +294,7 @@ const cliPacks = [
|
|
|
280
294
|
];
|
|
281
295
|
const helpAliasByCommandName = new Map([
|
|
282
296
|
["contacts:find-linkedin-urls", "contacts:resolve-profiles"],
|
|
297
|
+
["companies:find-linkedin-urls", "companies:resolve-linkedin-urls"],
|
|
283
298
|
["contacts:process-emails", "contacts:resolve-emails"],
|
|
284
299
|
["linkedin-companies:backfill", "companies:enrich"],
|
|
285
300
|
["linkedin-products:scrape", "market:scrape"],
|
|
@@ -300,6 +315,7 @@ const helpVisibleCommandNames = new Set([
|
|
|
300
315
|
"auth:whoami",
|
|
301
316
|
"llm:ready",
|
|
302
317
|
"contacts:find-linkedin-urls",
|
|
318
|
+
"companies:find-linkedin-urls",
|
|
303
319
|
"contacts:process-emails",
|
|
304
320
|
"auth:logout",
|
|
305
321
|
"account:resolve",
|
|
@@ -339,6 +355,64 @@ function formatHelpArgumentTerm(argument) {
|
|
|
339
355
|
}
|
|
340
356
|
return argument.required ? `<${term}>` : `[${term}]`;
|
|
341
357
|
}
|
|
358
|
+
function parsePositiveClientIdValue(rawValue) {
|
|
359
|
+
if (rawValue == null) {
|
|
360
|
+
throw new Error("clientId is required and must be a positive integer.");
|
|
361
|
+
}
|
|
362
|
+
const asString = String(rawValue).trim();
|
|
363
|
+
if (!asString) {
|
|
364
|
+
throw new Error("clientId is required and must be a positive integer.");
|
|
365
|
+
}
|
|
366
|
+
return z.coerce.number().int().positive().parse(asString);
|
|
367
|
+
}
|
|
368
|
+
function getLinkedInCompanyBackfillClientStatePath() {
|
|
369
|
+
return path.join(getSalesprompterConfigDir(), "linkedin-companies-backfill.json");
|
|
370
|
+
}
|
|
371
|
+
async function readLinkedInCompanyBackfillClientIdFromCache(session) {
|
|
372
|
+
const path = getLinkedInCompanyBackfillClientStatePath();
|
|
373
|
+
try {
|
|
374
|
+
const content = await readFile(path, "utf8");
|
|
375
|
+
const parsed = JSON.parse(content);
|
|
376
|
+
const state = LinkedInCompanyBackfillClientIdStateSchema.parse(parsed);
|
|
377
|
+
if (session?.user?.id != null && state.userId != null && state.userId !== session.user.id) {
|
|
378
|
+
return undefined;
|
|
379
|
+
}
|
|
380
|
+
if (session?.user?.orgId != null &&
|
|
381
|
+
state.orgId != null &&
|
|
382
|
+
String(state.orgId) !== String(session.user.orgId)) {
|
|
383
|
+
return undefined;
|
|
384
|
+
}
|
|
385
|
+
return state.clientId;
|
|
386
|
+
}
|
|
387
|
+
catch {
|
|
388
|
+
return undefined;
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
async function writeLinkedInCompanyBackfillClientIdToCache(clientId, session) {
|
|
392
|
+
const filePath = getLinkedInCompanyBackfillClientStatePath();
|
|
393
|
+
const state = {
|
|
394
|
+
clientId,
|
|
395
|
+
userId: session?.user?.id,
|
|
396
|
+
orgId: session?.user?.orgId,
|
|
397
|
+
updatedAt: new Date().toISOString()
|
|
398
|
+
};
|
|
399
|
+
await mkdir(path.dirname(filePath), { recursive: true });
|
|
400
|
+
await writeFile(filePath, `${JSON.stringify(state, null, 2)}\n`, "utf8");
|
|
401
|
+
}
|
|
402
|
+
async function resolveLinkedInCompanyBackfillClientId(params) {
|
|
403
|
+
if (params.clientIdOption != null && String(params.clientIdOption).trim().length > 0) {
|
|
404
|
+
return parsePositiveClientIdValue(params.clientIdOption);
|
|
405
|
+
}
|
|
406
|
+
const envClientId = process.env.PIPEDREAM_CLIENT_ID?.trim() || process.env.SALESPROMPTER_CLIENT_ID?.trim();
|
|
407
|
+
if (envClientId) {
|
|
408
|
+
return parsePositiveClientIdValue(envClientId);
|
|
409
|
+
}
|
|
410
|
+
const cachedClientId = await readLinkedInCompanyBackfillClientIdFromCache(params.session);
|
|
411
|
+
if (cachedClientId != null) {
|
|
412
|
+
return cachedClientId;
|
|
413
|
+
}
|
|
414
|
+
throw new Error("Missing LinkedIn company backfill clientId. Pass --client-id, set PIPEDREAM_CLIENT_ID or SALESPROMPTER_CLIENT_ID, or run once with --client-id so the CLI can reuse it.");
|
|
415
|
+
}
|
|
342
416
|
function applyGlobalOutputOptions(actionCommand) {
|
|
343
417
|
const globalOptions = actionCommand.optsWithGlobals();
|
|
344
418
|
runtimeOutputOptions.json = Boolean(globalOptions.json);
|
|
@@ -920,6 +994,55 @@ function parseLinkedInUrlLookupInput(content) {
|
|
|
920
994
|
}))
|
|
921
995
|
.filter((row) => row.fullName.length > 0 || row.companyName.length > 0);
|
|
922
996
|
}
|
|
997
|
+
function parseLinkedInCompanyLookupInput(content) {
|
|
998
|
+
const trimmed = content.trim();
|
|
999
|
+
if (!trimmed) {
|
|
1000
|
+
return [];
|
|
1001
|
+
}
|
|
1002
|
+
if (trimmed.startsWith("[")) {
|
|
1003
|
+
const parsed = z
|
|
1004
|
+
.array(z.object({
|
|
1005
|
+
clientId: z.union([z.string(), z.number()]).nullish(),
|
|
1006
|
+
companyName: z.string().nullish(),
|
|
1007
|
+
name: z.string().nullish()
|
|
1008
|
+
}))
|
|
1009
|
+
.parse(JSON.parse(trimmed));
|
|
1010
|
+
return parsed
|
|
1011
|
+
.map((row) => ({
|
|
1012
|
+
clientId: row.clientId == null ? null : String(row.clientId).trim() || null,
|
|
1013
|
+
companyName: normalizeLookupWhitespace(row.companyName?.trim() || row.name?.trim() || "")
|
|
1014
|
+
}))
|
|
1015
|
+
.filter((row) => row.companyName.length > 0);
|
|
1016
|
+
}
|
|
1017
|
+
const lines = trimmed
|
|
1018
|
+
.split(/\r?\n/)
|
|
1019
|
+
.map((line) => line.trim())
|
|
1020
|
+
.filter((line) => line.length > 0);
|
|
1021
|
+
if (lines.length === 0) {
|
|
1022
|
+
return [];
|
|
1023
|
+
}
|
|
1024
|
+
const delimiter = detectLooseDelimiter(lines[0] ?? "");
|
|
1025
|
+
const headerValues = splitLooseDelimitedLine(lines[0] ?? "", delimiter).map((value) => value.trim().toLowerCase());
|
|
1026
|
+
const hasHeader = headerValues.includes("companyname") ||
|
|
1027
|
+
headerValues.includes("company_name") ||
|
|
1028
|
+
headerValues.includes("name");
|
|
1029
|
+
if (hasHeader) {
|
|
1030
|
+
const companyNameIndex = headerValues.findIndex((value) => ["companyname", "company_name", "name"].includes(value));
|
|
1031
|
+
const clientIdIndex = headerValues.findIndex((value) => ["clientid", "client_id"].includes(value));
|
|
1032
|
+
return lines
|
|
1033
|
+
.slice(1)
|
|
1034
|
+
.map((line) => splitLooseDelimitedLine(line, delimiter).map((value) => value.trim()))
|
|
1035
|
+
.map((columns) => ({
|
|
1036
|
+
clientId: clientIdIndex >= 0 ? columns[clientIdIndex] || null : null,
|
|
1037
|
+
companyName: normalizeLookupWhitespace(companyNameIndex >= 0 ? columns[companyNameIndex] || "" : "")
|
|
1038
|
+
}))
|
|
1039
|
+
.filter((row) => row.companyName.length > 0);
|
|
1040
|
+
}
|
|
1041
|
+
return lines.map((line) => ({
|
|
1042
|
+
clientId: null,
|
|
1043
|
+
companyName: normalizeLookupWhitespace(line)
|
|
1044
|
+
}));
|
|
1045
|
+
}
|
|
923
1046
|
function toLinkedInUrlLookupContacts(rows, cleanedCompanyMap = new Map()) {
|
|
924
1047
|
return rows.flatMap((row, index) => {
|
|
925
1048
|
const contactId = String(index + 1);
|
|
@@ -972,19 +1095,19 @@ function toLinkedInUrlLookupContacts(rows, cleanedCompanyMap = new Map()) {
|
|
|
972
1095
|
});
|
|
973
1096
|
}
|
|
974
1097
|
function readPipedreamLinkedInEnrichmentConfig() {
|
|
975
|
-
const endpointUrl = process.env
|
|
976
|
-
(process.env
|
|
977
|
-
? `https://${process.env
|
|
1098
|
+
const endpointUrl = resolveConfiguredEnvValue(process.env, "SALESPROMPTER_LINKEDIN_ENRICHMENT_ENDPOINT_URL") ||
|
|
1099
|
+
(resolveConfiguredEnvValue(process.env, "PIPEDREAM_ENDPOINT_ID")
|
|
1100
|
+
? `https://${resolveConfiguredEnvValue(process.env, "PIPEDREAM_ENDPOINT_ID")?.trim()}.m.pipedream.net`
|
|
978
1101
|
: "");
|
|
979
1102
|
if (!endpointUrl) {
|
|
980
1103
|
throw new Error("Missing LinkedIn enrichment endpoint. Set SALESPROMPTER_LINKEDIN_ENRICHMENT_ENDPOINT_URL or PIPEDREAM_ENDPOINT_ID.");
|
|
981
1104
|
}
|
|
982
1105
|
return {
|
|
983
1106
|
endpointUrl,
|
|
984
|
-
secret: process.env
|
|
985
|
-
clientId: process.env
|
|
986
|
-
projectId: process.env
|
|
987
|
-
projectEnvironment: process.env
|
|
1107
|
+
secret: resolveConfiguredEnvValue(process.env, "PIPEDREAM_SECRET_KEY") || "",
|
|
1108
|
+
clientId: resolveConfiguredEnvValue(process.env, "PIPEDREAM_CLIENT_ID") || "",
|
|
1109
|
+
projectId: resolveConfiguredEnvValue(process.env, "PIPEDREAM_PROJECT_ID") || "",
|
|
1110
|
+
projectEnvironment: resolveConfiguredEnvValue(process.env, "PIPEDREAM_PROJECT_ENVIRONMENT") || ""
|
|
988
1111
|
};
|
|
989
1112
|
}
|
|
990
1113
|
function deriveCsrfTokenFromCookie(cookie) {
|
|
@@ -1128,10 +1251,108 @@ function buildLinkedInLookupSearchVariants(contact) {
|
|
|
1128
1251
|
}
|
|
1129
1252
|
return variants;
|
|
1130
1253
|
}
|
|
1254
|
+
function normalizeSalesNavLeadUrl(value) {
|
|
1255
|
+
const trimmed = String(value ?? "").trim();
|
|
1256
|
+
if (!trimmed) {
|
|
1257
|
+
return null;
|
|
1258
|
+
}
|
|
1259
|
+
const directMatch = trimmed.match(/https:\/\/www\.linkedin\.com\/sales\/lead\/[^/?#]+/i);
|
|
1260
|
+
if (directMatch) {
|
|
1261
|
+
return directMatch[0] ?? null;
|
|
1262
|
+
}
|
|
1263
|
+
const disguisedLeadIdMatch = trimmed.match(/https:\/\/www\.linkedin\.com\/in\/(ACw[A-Za-z0-9_-]+)/i);
|
|
1264
|
+
if (disguisedLeadIdMatch?.[1]) {
|
|
1265
|
+
return `https://www.linkedin.com/sales/lead/${disguisedLeadIdMatch[1]}`;
|
|
1266
|
+
}
|
|
1267
|
+
return null;
|
|
1268
|
+
}
|
|
1269
|
+
function normalizePublicLinkedInProfileUrl(value) {
|
|
1270
|
+
const trimmed = String(value ?? "").trim();
|
|
1271
|
+
if (!trimmed) {
|
|
1272
|
+
return null;
|
|
1273
|
+
}
|
|
1274
|
+
const publicMatch = trimmed.match(/https:\/\/www\.linkedin\.com\/in\/[^/?#]+\/?/i);
|
|
1275
|
+
if (!publicMatch) {
|
|
1276
|
+
return null;
|
|
1277
|
+
}
|
|
1278
|
+
const candidate = publicMatch[0] ?? null;
|
|
1279
|
+
if (!candidate) {
|
|
1280
|
+
return null;
|
|
1281
|
+
}
|
|
1282
|
+
return normalizeSalesNavLeadUrl(candidate) ? null : candidate;
|
|
1283
|
+
}
|
|
1131
1284
|
function extractLinkedInProfileUrlFromSalesApiElement(element) {
|
|
1132
|
-
|
|
1285
|
+
if (!element) {
|
|
1286
|
+
return null;
|
|
1287
|
+
}
|
|
1288
|
+
const explicitCandidates = [
|
|
1289
|
+
typeof element.linkedinProfileUrl === "string" ? element.linkedinProfileUrl : null,
|
|
1290
|
+
typeof element.profileUrl === "string" ? element.profileUrl : null,
|
|
1291
|
+
typeof element.url === "string" ? element.url : null
|
|
1292
|
+
].filter(Boolean);
|
|
1293
|
+
for (const candidate of explicitCandidates) {
|
|
1294
|
+
const normalized = normalizePublicLinkedInProfileUrl(candidate);
|
|
1295
|
+
if (normalized) {
|
|
1296
|
+
return normalized;
|
|
1297
|
+
}
|
|
1298
|
+
}
|
|
1299
|
+
for (const value of collectNestedStrings(element)) {
|
|
1300
|
+
const normalized = normalizePublicLinkedInProfileUrl(value);
|
|
1301
|
+
if (normalized) {
|
|
1302
|
+
return normalized;
|
|
1303
|
+
}
|
|
1304
|
+
}
|
|
1305
|
+
return null;
|
|
1306
|
+
}
|
|
1307
|
+
function extractLinkedInSalesNavLeadUrlFromSalesApiElement(element) {
|
|
1308
|
+
if (!element) {
|
|
1309
|
+
return null;
|
|
1310
|
+
}
|
|
1311
|
+
const explicitCandidates = [
|
|
1312
|
+
typeof element.salesNavProfileUrl === "string" ? element.salesNavProfileUrl : null,
|
|
1313
|
+
typeof element.sales_nav_profile_url === "string" ? element.sales_nav_profile_url : null,
|
|
1314
|
+
typeof element.url === "string" ? element.url : null
|
|
1315
|
+
].filter(Boolean);
|
|
1316
|
+
for (const candidate of explicitCandidates) {
|
|
1317
|
+
const normalized = normalizeSalesNavLeadUrl(candidate);
|
|
1318
|
+
if (normalized) {
|
|
1319
|
+
return normalized;
|
|
1320
|
+
}
|
|
1321
|
+
}
|
|
1322
|
+
for (const value of collectNestedStrings(element)) {
|
|
1323
|
+
const normalized = normalizeSalesNavLeadUrl(value);
|
|
1324
|
+
if (normalized) {
|
|
1325
|
+
return normalized;
|
|
1326
|
+
}
|
|
1327
|
+
}
|
|
1328
|
+
const entityUrn = typeof element.entityUrn === "string" ? element.entityUrn : "";
|
|
1133
1329
|
const salesIdMatch = entityUrn.match(/\(([^,]+),/);
|
|
1134
|
-
return salesIdMatch ? `https://www.linkedin.com/
|
|
1330
|
+
return salesIdMatch?.[1] ? `https://www.linkedin.com/sales/lead/${salesIdMatch[1]}` : null;
|
|
1331
|
+
}
|
|
1332
|
+
function extractLinkedInSalesNavCompanyUrlFromSalesApiElement(element) {
|
|
1333
|
+
if (!element) {
|
|
1334
|
+
return null;
|
|
1335
|
+
}
|
|
1336
|
+
const explicitCandidates = [
|
|
1337
|
+
typeof element.salesNavCompanyUrl === "string" ? element.salesNavCompanyUrl : null,
|
|
1338
|
+
typeof element.sales_nav_company_url === "string" ? element.sales_nav_company_url : null,
|
|
1339
|
+
typeof element.url === "string" ? element.url : null
|
|
1340
|
+
].filter(Boolean);
|
|
1341
|
+
for (const candidate of explicitCandidates) {
|
|
1342
|
+
const directMatch = candidate.match(/https:\/\/www\.linkedin\.com\/sales\/company\/[^/?#]+/i);
|
|
1343
|
+
if (directMatch) {
|
|
1344
|
+
return directMatch[0] ?? null;
|
|
1345
|
+
}
|
|
1346
|
+
}
|
|
1347
|
+
for (const value of collectNestedStrings(element)) {
|
|
1348
|
+
const directMatch = value.match(/https:\/\/www\.linkedin\.com\/sales\/company\/[^/?#]+/i);
|
|
1349
|
+
if (directMatch) {
|
|
1350
|
+
return directMatch[0] ?? null;
|
|
1351
|
+
}
|
|
1352
|
+
}
|
|
1353
|
+
const entityUrn = typeof element.entityUrn === "string" ? element.entityUrn : "";
|
|
1354
|
+
const idMatch = entityUrn.match(/\(([^,]+),/);
|
|
1355
|
+
return idMatch?.[1] ? `https://www.linkedin.com/sales/company/${idMatch[1]}` : null;
|
|
1135
1356
|
}
|
|
1136
1357
|
function collectNestedStrings(value, seen = new Set()) {
|
|
1137
1358
|
if (value == null || seen.has(value)) {
|
|
@@ -1248,6 +1469,280 @@ function buildLinkedInCompanyLookupVariants(params) {
|
|
|
1248
1469
|
}
|
|
1249
1470
|
return variants;
|
|
1250
1471
|
}
|
|
1472
|
+
function buildPublicLinkedInCompanySearchUrl(companyName) {
|
|
1473
|
+
const baseUrl = process.env.SALESPROMPTER_LINKEDIN_COMPANY_SEARCH_BASE_URL?.trim() ||
|
|
1474
|
+
"https://duckduckgo.com/html/";
|
|
1475
|
+
const url = new URL(baseUrl);
|
|
1476
|
+
url.searchParams.set("q", `site:linkedin.com/company "${companyName}"`);
|
|
1477
|
+
return url.toString();
|
|
1478
|
+
}
|
|
1479
|
+
function getSerperApiKey(env = process.env) {
|
|
1480
|
+
return env.SALESPROMPTER_SERPER_API_KEY?.trim() || env.SERPER_API_KEY?.trim() || "";
|
|
1481
|
+
}
|
|
1482
|
+
function getSerperSearchEndpoint(env = process.env) {
|
|
1483
|
+
return env.SALESPROMPTER_SERPER_SEARCH_URL?.trim() || "https://google.serper.dev/search";
|
|
1484
|
+
}
|
|
1485
|
+
function buildSerperLinkedInCompanyQueries(companyName) {
|
|
1486
|
+
const normalized = normalizeLookupWhitespace(companyName);
|
|
1487
|
+
const coreName = normalized.split(/\s*[-,|]\s*/)[0]?.trim() || normalized.trim();
|
|
1488
|
+
const searchable = normalizeLookupCompanyForSearch(normalized);
|
|
1489
|
+
const loose = normalizeLooseMatchText(normalized).replace(/\s+/g, " ").trim();
|
|
1490
|
+
const keywordTokens = loose
|
|
1491
|
+
.split(/\s+/)
|
|
1492
|
+
.filter((token) => token.length >= 4)
|
|
1493
|
+
.filter((token) => !["oder", "with", "from", "handel", "beratung"].includes(token))
|
|
1494
|
+
.slice(0, 4);
|
|
1495
|
+
const keywordQuery = keywordTokens.join(" ");
|
|
1496
|
+
return Array.from(new Set([
|
|
1497
|
+
`site:linkedin.com/company "${companyName}"`,
|
|
1498
|
+
`site:linkedin.com/company "${coreName}"`,
|
|
1499
|
+
`site:linkedin.com/company ${searchable} linkedin`,
|
|
1500
|
+
`site:linkedin.com/company ${loose} linkedin`,
|
|
1501
|
+
keywordQuery ? `site:linkedin.com/company ${keywordQuery} linkedin` : ""
|
|
1502
|
+
])).filter((query) => query.length > 0);
|
|
1503
|
+
}
|
|
1504
|
+
function extractLinkedInCompanySearchCandidates(bodyText) {
|
|
1505
|
+
const candidates = new Set();
|
|
1506
|
+
const directMatches = bodyText.match(/https:\/\/www\.linkedin\.com\/company\/[^"'&<>\s)]+/gi) ?? [];
|
|
1507
|
+
for (const match of directMatches) {
|
|
1508
|
+
const handle = normalizeLinkedInCompanyHandle(match);
|
|
1509
|
+
if (handle) {
|
|
1510
|
+
candidates.add(normalizeLinkedInCompanyPage(handle));
|
|
1511
|
+
}
|
|
1512
|
+
}
|
|
1513
|
+
const encodedMatches = bodyText.match(/https?%3A%2F%2Fwww\.linkedin\.com%2Fcompany%2F[^"'&<>\s)]+/gi) ?? [];
|
|
1514
|
+
for (const match of encodedMatches) {
|
|
1515
|
+
try {
|
|
1516
|
+
const decoded = decodeURIComponent(match);
|
|
1517
|
+
const handle = normalizeLinkedInCompanyHandle(decoded);
|
|
1518
|
+
if (handle) {
|
|
1519
|
+
candidates.add(normalizeLinkedInCompanyPage(handle));
|
|
1520
|
+
}
|
|
1521
|
+
}
|
|
1522
|
+
catch {
|
|
1523
|
+
// Ignore malformed encoded fragments from search result pages.
|
|
1524
|
+
}
|
|
1525
|
+
}
|
|
1526
|
+
return Array.from(candidates);
|
|
1527
|
+
}
|
|
1528
|
+
function extractSerperLinkedInCompanyCandidates(payload) {
|
|
1529
|
+
if (!payload || typeof payload !== "object") {
|
|
1530
|
+
return [];
|
|
1531
|
+
}
|
|
1532
|
+
const organic = "organic" in payload && Array.isArray(payload.organic)
|
|
1533
|
+
? (payload.organic ?? [])
|
|
1534
|
+
: [];
|
|
1535
|
+
const candidates = new Set();
|
|
1536
|
+
for (const result of organic) {
|
|
1537
|
+
if (!result || typeof result !== "object") {
|
|
1538
|
+
continue;
|
|
1539
|
+
}
|
|
1540
|
+
const link = "link" in result && typeof result.link === "string"
|
|
1541
|
+
? result.link
|
|
1542
|
+
: "";
|
|
1543
|
+
const handle = normalizeLinkedInCompanyHandle(link);
|
|
1544
|
+
if (handle) {
|
|
1545
|
+
candidates.add(normalizeLinkedInCompanyPage(handle));
|
|
1546
|
+
}
|
|
1547
|
+
}
|
|
1548
|
+
return Array.from(candidates);
|
|
1549
|
+
}
|
|
1550
|
+
function decodeHtmlEntities(value) {
|
|
1551
|
+
return value
|
|
1552
|
+
.replace(/&/gi, "&")
|
|
1553
|
+
.replace(/"/gi, '"')
|
|
1554
|
+
.replace(/'/gi, "'")
|
|
1555
|
+
.replace(/</gi, "<")
|
|
1556
|
+
.replace(/>/gi, ">");
|
|
1557
|
+
}
|
|
1558
|
+
async function fetchLinkedInCompanyPageSignals(url, timeoutMs) {
|
|
1559
|
+
const controller = new AbortController();
|
|
1560
|
+
const timeout = setTimeout(() => controller.abort(), Math.min(timeoutMs, 12_000));
|
|
1561
|
+
try {
|
|
1562
|
+
const response = await fetch(url, {
|
|
1563
|
+
method: "GET",
|
|
1564
|
+
signal: controller.signal,
|
|
1565
|
+
headers: {
|
|
1566
|
+
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"
|
|
1567
|
+
}
|
|
1568
|
+
});
|
|
1569
|
+
const html = await response.text();
|
|
1570
|
+
const finalUrl = response.url || url;
|
|
1571
|
+
const title = decodeHtmlEntities(html.match(/<title[^>]*>([^<]+)/i)?.[1] || "") ||
|
|
1572
|
+
decodeHtmlEntities(html.match(/<meta[^>]+property="og:title"[^>]+content="([^"]+)/i)?.[1] || "");
|
|
1573
|
+
const description = decodeHtmlEntities(html.match(/<meta[^>]+name="description"[^>]+content="([^"]+)/i)?.[1] || "");
|
|
1574
|
+
const bodyText = decodeHtmlEntities(html.replace(/<script[\s\S]*?<\/script>/gi, " ").replace(/<style[\s\S]*?<\/style>/gi, " ").replace(/<[^>]+>/g, " "));
|
|
1575
|
+
const unavailableText = normalizeLooseMatchText(`${title} ${description} ${bodyText}`);
|
|
1576
|
+
const unavailable = response.status >= 400 ||
|
|
1577
|
+
unavailableText.includes("page not found") ||
|
|
1578
|
+
unavailableText.includes("this page does not exist") ||
|
|
1579
|
+
unavailableText.includes("page isnt available");
|
|
1580
|
+
const handle = normalizeLinkedInCompanyHandle(finalUrl) ?? normalizeLinkedInCompanyHandle(url);
|
|
1581
|
+
if (!handle) {
|
|
1582
|
+
return null;
|
|
1583
|
+
}
|
|
1584
|
+
return {
|
|
1585
|
+
normalizedUrl: normalizeLinkedInCompanyPage(handle),
|
|
1586
|
+
title: normalizeLookupWhitespace(title),
|
|
1587
|
+
description: normalizeLookupWhitespace(description),
|
|
1588
|
+
bodyText: normalizeLookupWhitespace(bodyText),
|
|
1589
|
+
unavailable
|
|
1590
|
+
};
|
|
1591
|
+
}
|
|
1592
|
+
catch {
|
|
1593
|
+
return null;
|
|
1594
|
+
}
|
|
1595
|
+
finally {
|
|
1596
|
+
clearTimeout(timeout);
|
|
1597
|
+
}
|
|
1598
|
+
}
|
|
1599
|
+
function scoreLinkedInCompanyPageSignals(companyName, signals) {
|
|
1600
|
+
const inputTokens = normalizeLooseMatchText(companyName).split(/\s+/).filter((token) => token.length >= 4);
|
|
1601
|
+
const haystack = normalizeLooseMatchText(`${signals.title} ${signals.description}`);
|
|
1602
|
+
let score = 0;
|
|
1603
|
+
for (const token of inputTokens) {
|
|
1604
|
+
if (haystack.includes(token)) {
|
|
1605
|
+
score += 12;
|
|
1606
|
+
}
|
|
1607
|
+
}
|
|
1608
|
+
if (signals.description && normalizeLooseMatchText(signals.description).includes(normalizeLooseMatchText(companyName))) {
|
|
1609
|
+
score += 50;
|
|
1610
|
+
}
|
|
1611
|
+
return score;
|
|
1612
|
+
}
|
|
1613
|
+
function scoreLinkedInCompanyUrlCandidate(companyName, url) {
|
|
1614
|
+
const handle = normalizeLinkedInCompanyHandle(url);
|
|
1615
|
+
if (!handle || /^\d+$/.test(handle)) {
|
|
1616
|
+
return 0;
|
|
1617
|
+
}
|
|
1618
|
+
const normalizedCompanyWords = normalizeLookupCompanyForSearch(companyName)
|
|
1619
|
+
.split(/\s+/)
|
|
1620
|
+
.filter((part) => part.length >= 3);
|
|
1621
|
+
const normalizedCompany = normalizedCompanyWords.join("");
|
|
1622
|
+
const aggressiveCompany = aggressivelyCleanLookupCompanyName(companyName).replace(/\s+/g, "");
|
|
1623
|
+
const normalizedHandle = handle.toLowerCase().replace(/[-_]/g, "");
|
|
1624
|
+
const slugCompany = (slugify(companyName) || "").replace(/-/g, "");
|
|
1625
|
+
let score = 0;
|
|
1626
|
+
if (normalizedHandle === normalizedCompany || normalizedHandle === aggressiveCompany || normalizedHandle === slugCompany) {
|
|
1627
|
+
score += 100;
|
|
1628
|
+
}
|
|
1629
|
+
if (normalizedCompany &&
|
|
1630
|
+
(normalizedHandle.includes(normalizedCompany) || normalizedCompany.includes(normalizedHandle))) {
|
|
1631
|
+
score += 60;
|
|
1632
|
+
}
|
|
1633
|
+
if (aggressiveCompany &&
|
|
1634
|
+
(normalizedHandle.includes(aggressiveCompany) || aggressiveCompany.includes(normalizedHandle))) {
|
|
1635
|
+
score += 40;
|
|
1636
|
+
}
|
|
1637
|
+
if (normalizedCompanyWords.length > 0) {
|
|
1638
|
+
const primaryWord = normalizedCompanyWords[0] ?? "";
|
|
1639
|
+
if (primaryWord && normalizedHandle.includes(primaryWord)) {
|
|
1640
|
+
score += 35;
|
|
1641
|
+
}
|
|
1642
|
+
const overlap = normalizedCompanyWords.filter((word) => normalizedHandle.includes(word)).length;
|
|
1643
|
+
score += Math.min(30, overlap * 10);
|
|
1644
|
+
}
|
|
1645
|
+
return score;
|
|
1646
|
+
}
|
|
1647
|
+
async function searchPublicLinkedInCompanyUrl(companyName, timeoutMs) {
|
|
1648
|
+
const controller = new AbortController();
|
|
1649
|
+
const timeout = setTimeout(() => controller.abort(), Math.min(timeoutMs, 15_000));
|
|
1650
|
+
try {
|
|
1651
|
+
const response = await fetch(buildPublicLinkedInCompanySearchUrl(companyName), {
|
|
1652
|
+
method: "GET",
|
|
1653
|
+
signal: controller.signal,
|
|
1654
|
+
headers: {
|
|
1655
|
+
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"
|
|
1656
|
+
}
|
|
1657
|
+
});
|
|
1658
|
+
if (!response.ok) {
|
|
1659
|
+
return null;
|
|
1660
|
+
}
|
|
1661
|
+
const bodyText = await response.text();
|
|
1662
|
+
const candidates = extractLinkedInCompanySearchCandidates(bodyText);
|
|
1663
|
+
const ranked = candidates
|
|
1664
|
+
.map((url) => ({ url, score: scoreLinkedInCompanyUrlCandidate(companyName, url) }))
|
|
1665
|
+
.filter((candidate) => candidate.score > 0)
|
|
1666
|
+
.sort((left, right) => right.score - left.score);
|
|
1667
|
+
return ranked[0]?.url ?? candidates[0] ?? null;
|
|
1668
|
+
}
|
|
1669
|
+
catch (error) {
|
|
1670
|
+
if (error.name === "AbortError") {
|
|
1671
|
+
return null;
|
|
1672
|
+
}
|
|
1673
|
+
return null;
|
|
1674
|
+
}
|
|
1675
|
+
finally {
|
|
1676
|
+
clearTimeout(timeout);
|
|
1677
|
+
}
|
|
1678
|
+
}
|
|
1679
|
+
async function searchSerperLinkedInCompanyUrl(companyName, timeoutMs) {
|
|
1680
|
+
const apiKey = getSerperApiKey();
|
|
1681
|
+
if (!apiKey) {
|
|
1682
|
+
return null;
|
|
1683
|
+
}
|
|
1684
|
+
for (const query of buildSerperLinkedInCompanyQueries(companyName)) {
|
|
1685
|
+
const controller = new AbortController();
|
|
1686
|
+
const timeout = setTimeout(() => controller.abort(), Math.min(timeoutMs, 15_000));
|
|
1687
|
+
try {
|
|
1688
|
+
const response = await fetch(getSerperSearchEndpoint(), {
|
|
1689
|
+
method: "POST",
|
|
1690
|
+
signal: controller.signal,
|
|
1691
|
+
headers: {
|
|
1692
|
+
"Content-Type": "application/json",
|
|
1693
|
+
"X-API-KEY": apiKey
|
|
1694
|
+
},
|
|
1695
|
+
body: JSON.stringify({
|
|
1696
|
+
q: query,
|
|
1697
|
+
num: 5
|
|
1698
|
+
})
|
|
1699
|
+
});
|
|
1700
|
+
if (!response.ok) {
|
|
1701
|
+
continue;
|
|
1702
|
+
}
|
|
1703
|
+
const parsed = (await response.json());
|
|
1704
|
+
const candidates = extractSerperLinkedInCompanyCandidates(parsed);
|
|
1705
|
+
const ranked = candidates
|
|
1706
|
+
.map((url) => ({ url, score: scoreLinkedInCompanyUrlCandidate(companyName, url) }))
|
|
1707
|
+
.filter((candidate) => candidate.score > 0)
|
|
1708
|
+
.sort((left, right) => right.score - left.score);
|
|
1709
|
+
let anySignalsFetched = false;
|
|
1710
|
+
let bestValidated = null;
|
|
1711
|
+
for (const candidate of ranked.slice(0, 3)) {
|
|
1712
|
+
const signals = await fetchLinkedInCompanyPageSignals(candidate.url, timeoutMs);
|
|
1713
|
+
if (!signals || signals.unavailable) {
|
|
1714
|
+
continue;
|
|
1715
|
+
}
|
|
1716
|
+
anySignalsFetched = true;
|
|
1717
|
+
const validationScore = scoreLinkedInCompanyPageSignals(companyName, signals);
|
|
1718
|
+
if (validationScore >= 24) {
|
|
1719
|
+
const combinedScore = candidate.score + validationScore;
|
|
1720
|
+
if (!bestValidated || combinedScore > bestValidated.score) {
|
|
1721
|
+
bestValidated = {
|
|
1722
|
+
url: signals.normalizedUrl,
|
|
1723
|
+
score: combinedScore
|
|
1724
|
+
};
|
|
1725
|
+
}
|
|
1726
|
+
}
|
|
1727
|
+
}
|
|
1728
|
+
if (bestValidated) {
|
|
1729
|
+
return bestValidated.url;
|
|
1730
|
+
}
|
|
1731
|
+
if (!anySignalsFetched && ranked[0]?.url) {
|
|
1732
|
+
return ranked[0].url;
|
|
1733
|
+
}
|
|
1734
|
+
}
|
|
1735
|
+
catch (error) {
|
|
1736
|
+
if (error.name === "AbortError") {
|
|
1737
|
+
continue;
|
|
1738
|
+
}
|
|
1739
|
+
}
|
|
1740
|
+
finally {
|
|
1741
|
+
clearTimeout(timeout);
|
|
1742
|
+
}
|
|
1743
|
+
}
|
|
1744
|
+
return null;
|
|
1745
|
+
}
|
|
1251
1746
|
async function invokeLinkedInUrlEnrichmentDirect(params) {
|
|
1252
1747
|
const config = await readLinkedInDirectLookupConfig();
|
|
1253
1748
|
const groupedContacts = new Map();
|
|
@@ -1279,6 +1774,7 @@ async function invokeLinkedInUrlEnrichmentDirect(params) {
|
|
|
1279
1774
|
continue;
|
|
1280
1775
|
}
|
|
1281
1776
|
let matchedUrl = null;
|
|
1777
|
+
let matchedSalesNavUrl = null;
|
|
1282
1778
|
let lastError = null;
|
|
1283
1779
|
for (const candidate of variations) {
|
|
1284
1780
|
for (const searchVariant of buildLinkedInLookupSearchVariants(candidate)) {
|
|
@@ -1315,8 +1811,10 @@ async function invokeLinkedInUrlEnrichmentDirect(params) {
|
|
|
1315
1811
|
const data = (await response.json());
|
|
1316
1812
|
const profilesFound = data.paging?.total ?? 0;
|
|
1317
1813
|
if (profilesFound > 0) {
|
|
1318
|
-
|
|
1319
|
-
|
|
1814
|
+
const first = data.elements?.[0];
|
|
1815
|
+
matchedUrl = extractLinkedInProfileUrlFromSalesApiElement(first) ?? null;
|
|
1816
|
+
matchedSalesNavUrl = extractLinkedInSalesNavLeadUrlFromSalesApiElement(first) ?? null;
|
|
1817
|
+
if (matchedUrl || matchedSalesNavUrl) {
|
|
1320
1818
|
break;
|
|
1321
1819
|
}
|
|
1322
1820
|
}
|
|
@@ -1327,18 +1825,19 @@ async function invokeLinkedInUrlEnrichmentDirect(params) {
|
|
|
1327
1825
|
finally {
|
|
1328
1826
|
clearTimeout(timeout);
|
|
1329
1827
|
}
|
|
1330
|
-
if (matchedUrl || rateLimited) {
|
|
1828
|
+
if (matchedUrl || matchedSalesNavUrl || rateLimited) {
|
|
1331
1829
|
break;
|
|
1332
1830
|
}
|
|
1333
1831
|
}
|
|
1334
|
-
if (matchedUrl || rateLimited) {
|
|
1832
|
+
if (matchedUrl || matchedSalesNavUrl || rateLimited) {
|
|
1335
1833
|
break;
|
|
1336
1834
|
}
|
|
1337
1835
|
}
|
|
1338
1836
|
results.push({
|
|
1339
1837
|
contact_id: primary.contact_id,
|
|
1340
|
-
linkedin_url: matchedUrl,
|
|
1341
|
-
|
|
1838
|
+
linkedin_url: matchedUrl ?? matchedSalesNavUrl,
|
|
1839
|
+
sales_nav_profile_url: matchedSalesNavUrl,
|
|
1840
|
+
error: matchedUrl || matchedSalesNavUrl ? null : lastError
|
|
1342
1841
|
});
|
|
1343
1842
|
}
|
|
1344
1843
|
return {
|
|
@@ -1372,6 +1871,7 @@ async function invokeLinkedInCompanyEnrichmentDirect(params) {
|
|
|
1372
1871
|
companyNameOriginal: contact.companyNameOriginal
|
|
1373
1872
|
});
|
|
1374
1873
|
let matchedCompanyUrl = null;
|
|
1874
|
+
let matchedSalesNavCompanyUrl = null;
|
|
1375
1875
|
let matchedCompanyName = null;
|
|
1376
1876
|
let matchedCompanyEmployeeCount = null;
|
|
1377
1877
|
let lastError = null;
|
|
@@ -1410,8 +1910,10 @@ async function invokeLinkedInCompanyEnrichmentDirect(params) {
|
|
|
1410
1910
|
const data = (await response.json());
|
|
1411
1911
|
const first = data.elements?.[0];
|
|
1412
1912
|
const companyUrl = extractLinkedInCompanyUrlFromSalesApiElement(first);
|
|
1413
|
-
|
|
1913
|
+
const salesNavCompanyUrl = extractLinkedInSalesNavCompanyUrlFromSalesApiElement(first);
|
|
1914
|
+
if (companyUrl || salesNavCompanyUrl) {
|
|
1414
1915
|
matchedCompanyUrl = companyUrl;
|
|
1916
|
+
matchedSalesNavCompanyUrl = salesNavCompanyUrl;
|
|
1415
1917
|
matchedCompanyName = extractLinkedInCompanyNameFromSalesApiElement(first);
|
|
1416
1918
|
matchedCompanyEmployeeCount = extractLinkedInCompanyEmployeeCountFromSalesApiElement(first);
|
|
1417
1919
|
break;
|
|
@@ -1430,9 +1932,10 @@ async function invokeLinkedInCompanyEnrichmentDirect(params) {
|
|
|
1430
1932
|
results.push({
|
|
1431
1933
|
contact_id: contact.contact_id,
|
|
1432
1934
|
linkedin_company_url: matchedCompanyUrl,
|
|
1935
|
+
sales_nav_company_url: matchedSalesNavCompanyUrl,
|
|
1433
1936
|
matched_company_name: matchedCompanyName,
|
|
1434
1937
|
matched_company_employee_count: matchedCompanyEmployeeCount,
|
|
1435
|
-
error: matchedCompanyUrl ? null : lastError
|
|
1938
|
+
error: matchedCompanyUrl || matchedSalesNavCompanyUrl ? null : lastError
|
|
1436
1939
|
});
|
|
1437
1940
|
}
|
|
1438
1941
|
return {
|
|
@@ -1513,6 +2016,67 @@ async function invokeLinkedInUrlEnrichmentWorkflow(params) {
|
|
|
1513
2016
|
clearTimeout(timeout);
|
|
1514
2017
|
}
|
|
1515
2018
|
}
|
|
2019
|
+
function normalizeWorkflowLinkedInUrlResult(params) {
|
|
2020
|
+
const contactIdsBySyntheticEmail = new Map(params.contacts
|
|
2021
|
+
.filter((contact) => contact.email)
|
|
2022
|
+
.map((contact) => [String(contact.email).toLowerCase(), contact.contact_id]));
|
|
2023
|
+
const rowsByContactId = new Map();
|
|
2024
|
+
const body = params.parsedBody && typeof params.parsedBody === "object" && !Array.isArray(params.parsedBody)
|
|
2025
|
+
? params.parsedBody
|
|
2026
|
+
: null;
|
|
2027
|
+
const workflowRows = [
|
|
2028
|
+
...(Array.isArray(body?.contacts) ? body?.contacts : []),
|
|
2029
|
+
...(Array.isArray(body?.profiles) ? body?.profiles : [])
|
|
2030
|
+
];
|
|
2031
|
+
for (const contact of workflowRows) {
|
|
2032
|
+
const explicitContactId = typeof contact.contact_id === "string"
|
|
2033
|
+
? contact.contact_id
|
|
2034
|
+
: typeof contact.contact_id === "number"
|
|
2035
|
+
? String(contact.contact_id)
|
|
2036
|
+
: "";
|
|
2037
|
+
const emailKey = typeof contact.email === "string" ? contact.email.toLowerCase() : "";
|
|
2038
|
+
const contactId = explicitContactId || contactIdsBySyntheticEmail.get(emailKey) || "";
|
|
2039
|
+
const linkedinUrl = normalizePublicLinkedInProfileUrl(typeof contact.linkedin_profile_url === "string"
|
|
2040
|
+
? contact.linkedin_profile_url
|
|
2041
|
+
: typeof contact.linkedinProfileUrl === "string"
|
|
2042
|
+
? contact.linkedinProfileUrl
|
|
2043
|
+
: typeof contact.default_profile_url === "string"
|
|
2044
|
+
? contact.default_profile_url
|
|
2045
|
+
: typeof contact.defaultProfileUrl === "string"
|
|
2046
|
+
? contact.defaultProfileUrl
|
|
2047
|
+
: typeof contact.linkedin_url === "string"
|
|
2048
|
+
? contact.linkedin_url
|
|
2049
|
+
: typeof contact.linkedinUrl === "string"
|
|
2050
|
+
? contact.linkedinUrl
|
|
2051
|
+
: null);
|
|
2052
|
+
const salesNavProfileUrl = normalizeSalesNavLeadUrl(typeof contact.sales_nav_profile_url === "string"
|
|
2053
|
+
? contact.sales_nav_profile_url
|
|
2054
|
+
: typeof contact.salesNavProfileUrl === "string"
|
|
2055
|
+
? contact.salesNavProfileUrl
|
|
2056
|
+
: typeof contact.linkedin_url === "string"
|
|
2057
|
+
? contact.linkedin_url
|
|
2058
|
+
: typeof contact.linkedinUrl === "string"
|
|
2059
|
+
? contact.linkedinUrl
|
|
2060
|
+
: null) ?? null;
|
|
2061
|
+
const regularCompanyHandle = normalizeLinkedInCompanyHandle(typeof contact.regular_company_url === "string"
|
|
2062
|
+
? contact.regular_company_url
|
|
2063
|
+
: typeof contact.regularCompanyUrl === "string"
|
|
2064
|
+
? contact.regularCompanyUrl
|
|
2065
|
+
: "");
|
|
2066
|
+
const linkedinCompanyUrl = extractLinkedInCompanyUrlFromSalesApiElement(contact) ??
|
|
2067
|
+
(regularCompanyHandle ? normalizeLinkedInCompanyPage(regularCompanyHandle) : null);
|
|
2068
|
+
const salesNavCompanyUrl = extractLinkedInSalesNavCompanyUrlFromSalesApiElement(contact);
|
|
2069
|
+
if (contactId) {
|
|
2070
|
+
rowsByContactId.set(contactId, {
|
|
2071
|
+
linkedinUrl: linkedinUrl ?? salesNavProfileUrl,
|
|
2072
|
+
salesNavProfileUrl,
|
|
2073
|
+
linkedinCompanyUrl,
|
|
2074
|
+
salesNavCompanyUrl
|
|
2075
|
+
});
|
|
2076
|
+
}
|
|
2077
|
+
}
|
|
2078
|
+
return rowsByContactId;
|
|
2079
|
+
}
|
|
1516
2080
|
async function fetchSalesNavLookupCandidates(params) {
|
|
1517
2081
|
const supabaseUrl = process.env.NEXT_PUBLIC_SUPABASE_URL?.trim();
|
|
1518
2082
|
const serviceRoleKey = process.env.SUPABASE_SERVICE_ROLE_KEY?.trim();
|
|
@@ -1594,6 +2158,7 @@ async function resolveLinkedInUrlsFromSalesNavRows(params) {
|
|
|
1594
2158
|
return right.score - left.score || Number(Boolean(rightUrl)) - Number(Boolean(leftUrl));
|
|
1595
2159
|
});
|
|
1596
2160
|
const best = ranked[0]?.candidate;
|
|
2161
|
+
const salesNavProfileUrl = best?.salesNavProfileUrl ?? null;
|
|
1597
2162
|
const linkedinUrl = best?.linkedInProfileUrl ?? best?.salesNavProfileUrl ?? null;
|
|
1598
2163
|
const linkedinCompanyUrl = (() => {
|
|
1599
2164
|
const handle = normalizeLinkedInCompanyHandle(best?.regularCompanyUrl ?? "") ??
|
|
@@ -1604,12 +2169,17 @@ async function resolveLinkedInUrlsFromSalesNavRows(params) {
|
|
|
1604
2169
|
const numericCompanyUrl = typeof best?.companyUrl === "string" ? best.companyUrl.trim() : "";
|
|
1605
2170
|
return numericCompanyUrl.length > 0 ? numericCompanyUrl : null;
|
|
1606
2171
|
})();
|
|
2172
|
+
const salesNavCompanyUrl = typeof best?.companyUrl === "string" && /\/sales\/company\//i.test(best.companyUrl)
|
|
2173
|
+
? best.companyUrl
|
|
2174
|
+
: null;
|
|
1607
2175
|
results.push({
|
|
1608
2176
|
clientId: row.clientId,
|
|
1609
2177
|
fullName: row.fullName,
|
|
1610
2178
|
companyName: row.companyName,
|
|
1611
2179
|
linkedinUrl,
|
|
2180
|
+
salesNavProfileUrl,
|
|
1612
2181
|
linkedinCompanyUrl,
|
|
2182
|
+
salesNavCompanyUrl,
|
|
1613
2183
|
found: Boolean(linkedinUrl),
|
|
1614
2184
|
companyFound: Boolean(linkedinCompanyUrl),
|
|
1615
2185
|
contactId: String(index + 1),
|
|
@@ -1679,7 +2249,16 @@ function normalizeLinkedInCompanyHandle(value) {
|
|
|
1679
2249
|
}
|
|
1680
2250
|
try {
|
|
1681
2251
|
const url = new URL(trimmed);
|
|
1682
|
-
|
|
2252
|
+
const overrideHostname = (() => {
|
|
2253
|
+
try {
|
|
2254
|
+
const overrideBase = process.env.SALESPROMPTER_LINKEDIN_BASE_URL?.trim();
|
|
2255
|
+
return overrideBase ? new URL(overrideBase).hostname : "";
|
|
2256
|
+
}
|
|
2257
|
+
catch {
|
|
2258
|
+
return "";
|
|
2259
|
+
}
|
|
2260
|
+
})();
|
|
2261
|
+
if (!/(^|\.)linkedin\.com$/i.test(url.hostname) && (!overrideHostname || url.hostname !== overrideHostname)) {
|
|
1683
2262
|
return null;
|
|
1684
2263
|
}
|
|
1685
2264
|
const segments = url.pathname.split("/").filter((segment) => segment.length > 0);
|
|
@@ -1961,14 +2540,8 @@ function writeWizardSection(title, description) {
|
|
|
1961
2540
|
}
|
|
1962
2541
|
writeWizardLine();
|
|
1963
2542
|
}
|
|
1964
|
-
function isOpaqueOrgId(value) {
|
|
1965
|
-
return /^org_[A-Za-z0-9]+$/.test(value);
|
|
1966
|
-
}
|
|
1967
2543
|
function getOrgLabel(session) {
|
|
1968
2544
|
const label = session.user.orgName ?? session.user.orgSlug ?? session.user.orgId ?? null;
|
|
1969
|
-
if (label && isOpaqueOrgId(label)) {
|
|
1970
|
-
return null;
|
|
1971
|
-
}
|
|
1972
2545
|
return label;
|
|
1973
2546
|
}
|
|
1974
2547
|
function resolveSessionOrgId(session) {
|
|
@@ -2192,13 +2765,19 @@ async function promptYesNo(rl, prompt, defaultValue) {
|
|
|
2192
2765
|
}
|
|
2193
2766
|
async function ensureWizardSession(options) {
|
|
2194
2767
|
if (shouldBypassAuth()) {
|
|
2195
|
-
return
|
|
2768
|
+
return {
|
|
2769
|
+
session: null,
|
|
2770
|
+
restoredFromCache: false
|
|
2771
|
+
};
|
|
2196
2772
|
}
|
|
2197
2773
|
try {
|
|
2198
2774
|
const session = await requireAuthSession();
|
|
2199
2775
|
writeSessionSummary(session);
|
|
2200
2776
|
writeWizardLine();
|
|
2201
|
-
return
|
|
2777
|
+
return {
|
|
2778
|
+
session,
|
|
2779
|
+
restoredFromCache: true
|
|
2780
|
+
};
|
|
2202
2781
|
}
|
|
2203
2782
|
catch (error) {
|
|
2204
2783
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -2214,6 +2793,29 @@ async function ensureWizardSession(options) {
|
|
|
2214
2793
|
});
|
|
2215
2794
|
writeSessionSummary(result.session);
|
|
2216
2795
|
writeWizardLine();
|
|
2796
|
+
return {
|
|
2797
|
+
session: result.session,
|
|
2798
|
+
restoredFromCache: false
|
|
2799
|
+
};
|
|
2800
|
+
}
|
|
2801
|
+
async function confirmWizardWorkspace(rl, session, options) {
|
|
2802
|
+
const orgLabel = getOrgLabel(session);
|
|
2803
|
+
const promptLabel = orgLabel ? `workspace ${orgLabel}` : "this signed-in account without a selected workspace";
|
|
2804
|
+
const useCurrentWorkspace = await promptYesNo(rl, `Use ${promptLabel} for this CLI run?`, true);
|
|
2805
|
+
if (useCurrentWorkspace) {
|
|
2806
|
+
writeWizardLine();
|
|
2807
|
+
return session;
|
|
2808
|
+
}
|
|
2809
|
+
writeWizardLine();
|
|
2810
|
+
writeWizardLine("Choose the workspace for this CLI session in the browser.");
|
|
2811
|
+
writeWizardLine();
|
|
2812
|
+
await clearAuthSession();
|
|
2813
|
+
const result = await performLogin({
|
|
2814
|
+
apiUrl: options?.apiUrl ?? session.apiBaseUrl,
|
|
2815
|
+
timeoutSeconds: options?.timeoutSeconds ?? 180
|
|
2816
|
+
});
|
|
2817
|
+
writeSessionSummary(result.session);
|
|
2818
|
+
writeWizardLine();
|
|
2217
2819
|
return result.session;
|
|
2218
2820
|
}
|
|
2219
2821
|
async function resolveLlmAuthReadiness() {
|
|
@@ -2891,6 +3493,49 @@ class SalesNavigatorExportRequestError extends Error {
|
|
|
2891
3493
|
this.launchDiagnostics = options.launchDiagnostics ?? null;
|
|
2892
3494
|
}
|
|
2893
3495
|
}
|
|
3496
|
+
class CliApiRequestError extends Error {
|
|
3497
|
+
statusCode;
|
|
3498
|
+
errorCode;
|
|
3499
|
+
constructor(message, options) {
|
|
3500
|
+
super(message);
|
|
3501
|
+
this.name = "CliApiRequestError";
|
|
3502
|
+
this.statusCode = options.statusCode;
|
|
3503
|
+
this.errorCode = options.errorCode;
|
|
3504
|
+
}
|
|
3505
|
+
}
|
|
3506
|
+
class LinkedInCompanyBackfillBatchError extends Error {
|
|
3507
|
+
failureCode;
|
|
3508
|
+
constructor(message, options) {
|
|
3509
|
+
super(message);
|
|
3510
|
+
this.name = "LinkedInCompanyBackfillBatchError";
|
|
3511
|
+
this.failureCode = options.failureCode;
|
|
3512
|
+
}
|
|
3513
|
+
}
|
|
3514
|
+
function formatLinkedInCompanyBackfillSessionLabel(launch) {
|
|
3515
|
+
const identity = launch.selectedSessionUserEmail?.trim() ||
|
|
3516
|
+
launch.selectedSessionUserHandle?.trim() ||
|
|
3517
|
+
null;
|
|
3518
|
+
const shortHash = launch.selectedSessionCookieSha256?.trim()
|
|
3519
|
+
? launch.selectedSessionCookieSha256.trim().slice(0, 12)
|
|
3520
|
+
: null;
|
|
3521
|
+
if (identity && shortHash) {
|
|
3522
|
+
return `${identity} (${shortHash})`;
|
|
3523
|
+
}
|
|
3524
|
+
return identity || shortHash || 'the selected LinkedIn session';
|
|
3525
|
+
}
|
|
3526
|
+
function isLinkedInCompanyBackfillInvalidSessionMessage(message) {
|
|
3527
|
+
return /session cookie not valid anymore|expired session cookie|invalid session cookie|can't connect to linkedin with this session cookie|no valid credentials found|please log in to linkedin to get a new one/i.test(message);
|
|
3528
|
+
}
|
|
3529
|
+
function buildLinkedInCompanyBackfillSessionRecoveryMessage(labels) {
|
|
3530
|
+
const uniqueLabels = Array.from(new Set(labels
|
|
3531
|
+
.map((label) => label.trim())
|
|
3532
|
+
.filter((label) => label.length > 0)));
|
|
3533
|
+
if (uniqueLabels.length === 0) {
|
|
3534
|
+
return "Company enrichment exhausted the LinkedIn session pool. Open LinkedIn Sales Navigator in Chrome, reconnect the Salesprompter extension, and retry companies:enrich.";
|
|
3535
|
+
}
|
|
3536
|
+
const attemptedSessions = uniqueLabels.join(", ");
|
|
3537
|
+
return `Company enrichment exhausted the LinkedIn session pool. Phantombuster rejected ${uniqueLabels.length} synced LinkedIn session${uniqueLabels.length === 1 ? "" : "s"} as expired: ${attemptedSessions}. Open LinkedIn Sales Navigator in Chrome, reconnect the Salesprompter extension, and retry companies:enrich.`;
|
|
3538
|
+
}
|
|
2894
3539
|
const SALES_NAVIGATOR_EXPORT_START_TIMEOUT_MS = 90_000;
|
|
2895
3540
|
async function withRefreshableAuthSession(session, run, contextLabel = "Salesprompter session expired during crawl. Refreshing login...") {
|
|
2896
3541
|
let currentSession = session;
|
|
@@ -2921,13 +3566,22 @@ async function fetchCliJson(session, request, schema) {
|
|
|
2921
3566
|
const text = await response.text();
|
|
2922
3567
|
const parsed = text.length > 0 ? JSON.parse(text) : {};
|
|
2923
3568
|
if (!response.ok) {
|
|
3569
|
+
const errorCode = typeof parsed === "object" &&
|
|
3570
|
+
parsed !== null &&
|
|
3571
|
+
"code" in parsed &&
|
|
3572
|
+
typeof parsed.code === "string"
|
|
3573
|
+
? parsed.code
|
|
3574
|
+
: undefined;
|
|
2924
3575
|
const errorMessage = typeof parsed === "object" &&
|
|
2925
3576
|
parsed !== null &&
|
|
2926
3577
|
"error" in parsed &&
|
|
2927
3578
|
typeof parsed.error === "string"
|
|
2928
3579
|
? parsed.error
|
|
2929
3580
|
: `request failed (${response.status})`;
|
|
2930
|
-
throw new
|
|
3581
|
+
throw new CliApiRequestError(errorMessage, {
|
|
3582
|
+
statusCode: response.status,
|
|
3583
|
+
errorCode
|
|
3584
|
+
});
|
|
2931
3585
|
}
|
|
2932
3586
|
return schema.parse(parsed);
|
|
2933
3587
|
});
|
|
@@ -2986,7 +3640,13 @@ async function enrichDirectEmailCompaniesViaApp(session, payload) {
|
|
|
2986
3640
|
return value;
|
|
2987
3641
|
}
|
|
2988
3642
|
async function fetchLinkedInCompaniesBackfillStatus(session, payload) {
|
|
2989
|
-
const
|
|
3643
|
+
const url = new URL('/api/cli/linkedin-companies/status', session.apiBaseUrl);
|
|
3644
|
+
url.searchParams.set('clientId', String(payload.clientId));
|
|
3645
|
+
url.searchParams.set('containerId', payload.containerId);
|
|
3646
|
+
if (payload.selectedSessionCookieSha256?.trim()) {
|
|
3647
|
+
url.searchParams.set('selectedSessionCookieSha256', payload.selectedSessionCookieSha256.trim());
|
|
3648
|
+
}
|
|
3649
|
+
const { value } = await fetchCliJson(session, (currentSession) => fetch(url.toString(), {
|
|
2990
3650
|
method: "GET",
|
|
2991
3651
|
headers: {
|
|
2992
3652
|
Authorization: `Bearer ${currentSession.accessToken}`
|
|
@@ -3242,10 +3902,24 @@ async function drainLinkedInCompanyBackfill(session, payload) {
|
|
|
3242
3902
|
let startedCompanies = 0;
|
|
3243
3903
|
let remaining = 0;
|
|
3244
3904
|
let consecutiveBusyPolls = 0;
|
|
3905
|
+
let consecutiveRetryableFailures = 0;
|
|
3906
|
+
const maxRetryableFailures = 3;
|
|
3907
|
+
let consecutiveInvalidSessionFailures = 0;
|
|
3908
|
+
const maxInvalidSessionFailures = 2;
|
|
3909
|
+
const invalidSessionLabels = [];
|
|
3910
|
+
const excludedSessionCookieSha256 = new Set();
|
|
3911
|
+
const excludedUserEmails = new Set();
|
|
3912
|
+
const excludedUserHandles = new Set();
|
|
3913
|
+
let lastProcessedRemaining = null;
|
|
3245
3914
|
for (;;) {
|
|
3246
3915
|
let launched;
|
|
3247
3916
|
try {
|
|
3248
|
-
launched = await launchLinkedInCompaniesBackfill(session,
|
|
3917
|
+
launched = await launchLinkedInCompaniesBackfill(session, {
|
|
3918
|
+
...payload,
|
|
3919
|
+
excludedSessionCookieSha256: Array.from(excludedSessionCookieSha256),
|
|
3920
|
+
excludedUserEmails: Array.from(excludedUserEmails),
|
|
3921
|
+
excludedUserHandles: Array.from(excludedUserHandles),
|
|
3922
|
+
});
|
|
3249
3923
|
}
|
|
3250
3924
|
catch (error) {
|
|
3251
3925
|
if (isSalesNavigatorAgentBusyError(error)) {
|
|
@@ -3256,6 +3930,19 @@ async function drainLinkedInCompanyBackfill(session, payload) {
|
|
|
3256
3930
|
await delay(30_000);
|
|
3257
3931
|
continue;
|
|
3258
3932
|
}
|
|
3933
|
+
if (isRecoverableLinkedInCompanyBackfillSessionFailure(error) &&
|
|
3934
|
+
consecutiveInvalidSessionFailures < maxInvalidSessionFailures) {
|
|
3935
|
+
consecutiveInvalidSessionFailures += 1;
|
|
3936
|
+
writeProgress(`Company enrichment session expired. Trying another synced LinkedIn session (${consecutiveInvalidSessionFailures}/${maxInvalidSessionFailures})...`);
|
|
3937
|
+
await delay(5_000);
|
|
3938
|
+
continue;
|
|
3939
|
+
}
|
|
3940
|
+
if (isRecoverableLinkedInCompanyBackfillSessionFailure(error)) {
|
|
3941
|
+
throw new Error(buildLinkedInCompanyBackfillSessionRecoveryMessage(invalidSessionLabels));
|
|
3942
|
+
}
|
|
3943
|
+
if (isCompanyBackfillSourceInvalidError(error)) {
|
|
3944
|
+
throw new Error("Company enrichment source is broken. Refresh leadPool_inner_merged and recreate leadPool_new, then retry.");
|
|
3945
|
+
}
|
|
3259
3946
|
throw error;
|
|
3260
3947
|
}
|
|
3261
3948
|
consecutiveBusyPolls = 0;
|
|
@@ -3268,24 +3955,178 @@ async function drainLinkedInCompanyBackfill(session, payload) {
|
|
|
3268
3955
|
};
|
|
3269
3956
|
}
|
|
3270
3957
|
batches += 1;
|
|
3271
|
-
|
|
3272
|
-
|
|
3958
|
+
const launchedCompanies = launched.candidates.length;
|
|
3959
|
+
startedCompanies += launchedCompanies;
|
|
3960
|
+
let initialStatus;
|
|
3961
|
+
try {
|
|
3962
|
+
initialStatus = await waitForLinkedInCompanyBackfillStart(session, {
|
|
3963
|
+
clientId: payload.clientId,
|
|
3964
|
+
containerId: launched.containerId,
|
|
3965
|
+
selectedSessionCookieSha256: launched.selectedSessionCookieSha256 ?? null,
|
|
3966
|
+
});
|
|
3967
|
+
}
|
|
3968
|
+
catch (error) {
|
|
3969
|
+
if (isRetryableLinkedInCompanyBackfillFailure(error) && consecutiveRetryableFailures < maxRetryableFailures) {
|
|
3970
|
+
consecutiveRetryableFailures += 1;
|
|
3971
|
+
batches -= 1;
|
|
3972
|
+
startedCompanies -= launchedCompanies;
|
|
3973
|
+
writeProgress(`Company enrichment batch failed before start (${error.message}). Retrying automatically (${consecutiveRetryableFailures}/${maxRetryableFailures})...`);
|
|
3974
|
+
await delay(5_000);
|
|
3975
|
+
continue;
|
|
3976
|
+
}
|
|
3977
|
+
throw error;
|
|
3978
|
+
}
|
|
3979
|
+
writeProgress(initialStatus.processed
|
|
3980
|
+
? `Finished company enrichment batch ${batches} for ${launchedCompanies} companies.`
|
|
3981
|
+
: `Started company enrichment batch ${batches} for ${launchedCompanies} companies.`);
|
|
3982
|
+
const batchStartedAt = Date.now();
|
|
3983
|
+
let lastRunningHeartbeatAt = batchStartedAt;
|
|
3984
|
+
let lastPendingPersistenceHeartbeatAt = batchStartedAt;
|
|
3273
3985
|
for (;;) {
|
|
3274
3986
|
const status = await fetchLinkedInCompaniesBackfillStatus(session, {
|
|
3275
3987
|
clientId: payload.clientId,
|
|
3276
|
-
containerId: launched.containerId
|
|
3988
|
+
containerId: launched.containerId,
|
|
3989
|
+
selectedSessionCookieSha256: launched.selectedSessionCookieSha256 ?? null,
|
|
3277
3990
|
});
|
|
3278
3991
|
remaining = status.remaining;
|
|
3992
|
+
if (!status.running && status.failed) {
|
|
3993
|
+
const batchError = new LinkedInCompanyBackfillBatchError(status.failureMessage ?? "Company enrichment batch failed.", { failureCode: status.failureCode ?? undefined });
|
|
3994
|
+
if (isRetryableLinkedInCompanyBackfillFailure(batchError) &&
|
|
3995
|
+
consecutiveRetryableFailures < maxRetryableFailures) {
|
|
3996
|
+
consecutiveRetryableFailures += 1;
|
|
3997
|
+
batches -= 1;
|
|
3998
|
+
startedCompanies -= launchedCompanies;
|
|
3999
|
+
writeProgress(`Company enrichment batch failed (${batchError.message}). Retrying automatically (${consecutiveRetryableFailures}/${maxRetryableFailures})...`);
|
|
4000
|
+
await delay(5_000);
|
|
4001
|
+
break;
|
|
4002
|
+
}
|
|
4003
|
+
if (isRecoverableLinkedInCompanyBackfillSessionFailure(batchError) &&
|
|
4004
|
+
consecutiveInvalidSessionFailures < maxInvalidSessionFailures) {
|
|
4005
|
+
consecutiveInvalidSessionFailures += 1;
|
|
4006
|
+
invalidSessionLabels.push(formatLinkedInCompanyBackfillSessionLabel(launched));
|
|
4007
|
+
if (launched.selectedSessionCookieSha256?.trim()) {
|
|
4008
|
+
excludedSessionCookieSha256.add(launched.selectedSessionCookieSha256.trim());
|
|
4009
|
+
}
|
|
4010
|
+
if (launched.selectedSessionUserEmail?.trim()) {
|
|
4011
|
+
excludedUserEmails.add(launched.selectedSessionUserEmail.trim());
|
|
4012
|
+
}
|
|
4013
|
+
if (launched.selectedSessionUserHandle?.trim()) {
|
|
4014
|
+
excludedUserHandles.add(launched.selectedSessionUserHandle.trim());
|
|
4015
|
+
}
|
|
4016
|
+
batches -= 1;
|
|
4017
|
+
startedCompanies -= launchedCompanies;
|
|
4018
|
+
writeProgress(`Company enrichment rejected ${formatLinkedInCompanyBackfillSessionLabel(launched)} as expired. Trying another synced LinkedIn session (${consecutiveInvalidSessionFailures}/${maxInvalidSessionFailures})...`);
|
|
4019
|
+
await delay(5_000);
|
|
4020
|
+
break;
|
|
4021
|
+
}
|
|
4022
|
+
if (isRecoverableLinkedInCompanyBackfillSessionFailure(batchError)) {
|
|
4023
|
+
invalidSessionLabels.push(formatLinkedInCompanyBackfillSessionLabel(launched));
|
|
4024
|
+
if (launched.selectedSessionCookieSha256?.trim()) {
|
|
4025
|
+
excludedSessionCookieSha256.add(launched.selectedSessionCookieSha256.trim());
|
|
4026
|
+
}
|
|
4027
|
+
if (launched.selectedSessionUserEmail?.trim()) {
|
|
4028
|
+
excludedUserEmails.add(launched.selectedSessionUserEmail.trim());
|
|
4029
|
+
}
|
|
4030
|
+
if (launched.selectedSessionUserHandle?.trim()) {
|
|
4031
|
+
excludedUserHandles.add(launched.selectedSessionUserHandle.trim());
|
|
4032
|
+
}
|
|
4033
|
+
throw new Error(buildLinkedInCompanyBackfillSessionRecoveryMessage(invalidSessionLabels));
|
|
4034
|
+
}
|
|
4035
|
+
throw batchError;
|
|
4036
|
+
}
|
|
3279
4037
|
if (!status.running && status.processed) {
|
|
4038
|
+
if (lastProcessedRemaining !== null && status.remaining >= lastProcessedRemaining) {
|
|
4039
|
+
const settledStatus = await waitForLinkedInCompanyBackfillRemainingDrop(session, {
|
|
4040
|
+
clientId: payload.clientId,
|
|
4041
|
+
containerId: launched.containerId,
|
|
4042
|
+
selectedSessionCookieSha256: launched.selectedSessionCookieSha256 ?? null,
|
|
4043
|
+
previousRemaining: lastProcessedRemaining
|
|
4044
|
+
});
|
|
4045
|
+
remaining = settledStatus.remaining;
|
|
4046
|
+
if (remaining >= lastProcessedRemaining) {
|
|
4047
|
+
throw new Error(`Company enrichment batch ${batches} finished but remaining stayed at ${remaining}. Stopping to avoid duplicate launches.`);
|
|
4048
|
+
}
|
|
4049
|
+
}
|
|
4050
|
+
consecutiveRetryableFailures = 0;
|
|
4051
|
+
consecutiveInvalidSessionFailures = 0;
|
|
4052
|
+
lastProcessedRemaining = remaining;
|
|
4053
|
+
const completionMessage = `Finished company enrichment batch ${batches} for ${launchedCompanies} companies.`;
|
|
3280
4054
|
writeProgress(remaining > 0
|
|
3281
|
-
? `${remaining} companies still waiting. Starting the next batch...`
|
|
3282
|
-
:
|
|
4055
|
+
? `${completionMessage} ${remaining} companies still waiting. Starting the next batch...`
|
|
4056
|
+
: `${completionMessage} Company enrichment finished.`);
|
|
3283
4057
|
break;
|
|
3284
4058
|
}
|
|
4059
|
+
if (status.running) {
|
|
4060
|
+
const now = Date.now();
|
|
4061
|
+
if (now - lastRunningHeartbeatAt >= 30_000) {
|
|
4062
|
+
const elapsedSeconds = Math.max(1, Math.round((now - batchStartedAt) / 1000));
|
|
4063
|
+
writeProgress(`Company enrichment batch ${batches} is still running (${elapsedSeconds}s elapsed)...`);
|
|
4064
|
+
lastRunningHeartbeatAt = now;
|
|
4065
|
+
}
|
|
4066
|
+
}
|
|
4067
|
+
else if (!status.processed) {
|
|
4068
|
+
const now = Date.now();
|
|
4069
|
+
if (now - lastPendingPersistenceHeartbeatAt >= 30_000) {
|
|
4070
|
+
writeProgress(`Company enrichment batch ${batches} finished remotely. Waiting for results to sync...`);
|
|
4071
|
+
lastPendingPersistenceHeartbeatAt = now;
|
|
4072
|
+
}
|
|
4073
|
+
}
|
|
3285
4074
|
await delay(15_000);
|
|
3286
4075
|
}
|
|
3287
4076
|
}
|
|
3288
4077
|
}
|
|
4078
|
+
async function waitForLinkedInCompanyBackfillStart(session, payload) {
|
|
4079
|
+
const deadline = Date.now() + 45_000;
|
|
4080
|
+
for (;;) {
|
|
4081
|
+
const status = await fetchLinkedInCompaniesBackfillStatus(session, payload);
|
|
4082
|
+
if (status.failed) {
|
|
4083
|
+
throw new LinkedInCompanyBackfillBatchError(status.failureMessage ?? "Company enrichment batch failed.", { failureCode: status.failureCode ?? undefined });
|
|
4084
|
+
}
|
|
4085
|
+
if (status.running || status.processed) {
|
|
4086
|
+
return status;
|
|
4087
|
+
}
|
|
4088
|
+
if (Date.now() >= deadline) {
|
|
4089
|
+
return status;
|
|
4090
|
+
}
|
|
4091
|
+
await delay(5_000);
|
|
4092
|
+
}
|
|
4093
|
+
}
|
|
4094
|
+
async function waitForLinkedInCompanyBackfillRemainingDrop(session, payload) {
|
|
4095
|
+
const deadline = Date.now() + 90_000;
|
|
4096
|
+
let latestStatus = await fetchLinkedInCompaniesBackfillStatus(session, payload);
|
|
4097
|
+
let lastHeartbeatAt = Date.now();
|
|
4098
|
+
while (Date.now() < deadline) {
|
|
4099
|
+
if (latestStatus.failed) {
|
|
4100
|
+
throw new LinkedInCompanyBackfillBatchError(latestStatus.failureMessage ?? "Company enrichment batch failed.", { failureCode: latestStatus.failureCode ?? undefined });
|
|
4101
|
+
}
|
|
4102
|
+
if (latestStatus.remaining < payload.previousRemaining) {
|
|
4103
|
+
return latestStatus;
|
|
4104
|
+
}
|
|
4105
|
+
const now = Date.now();
|
|
4106
|
+
if (now - lastHeartbeatAt >= 30_000) {
|
|
4107
|
+
writeProgress(`Company enrichment batch finished. Waiting for backlog to update below ${payload.previousRemaining}...`);
|
|
4108
|
+
lastHeartbeatAt = now;
|
|
4109
|
+
}
|
|
4110
|
+
await delay(10_000);
|
|
4111
|
+
latestStatus = await fetchLinkedInCompaniesBackfillStatus(session, payload);
|
|
4112
|
+
}
|
|
4113
|
+
return latestStatus;
|
|
4114
|
+
}
|
|
4115
|
+
function isRetryableLinkedInCompanyBackfillFailure(error) {
|
|
4116
|
+
return error instanceof LinkedInCompanyBackfillBatchError && error.failureCode === "input_empty";
|
|
4117
|
+
}
|
|
4118
|
+
function isRecoverableLinkedInCompanyBackfillSessionFailure(error) {
|
|
4119
|
+
if (error instanceof LinkedInCompanyBackfillBatchError) {
|
|
4120
|
+
return error.failureCode === "invalid_session" || isLinkedInCompanyBackfillInvalidSessionMessage(error.message);
|
|
4121
|
+
}
|
|
4122
|
+
if (error instanceof CliApiRequestError) {
|
|
4123
|
+
return error.errorCode === "invalid_session" || isLinkedInCompanyBackfillInvalidSessionMessage(error.message);
|
|
4124
|
+
}
|
|
4125
|
+
return false;
|
|
4126
|
+
}
|
|
4127
|
+
function isCompanyBackfillSourceInvalidError(error) {
|
|
4128
|
+
return error instanceof CliApiRequestError && error.errorCode === "company_backfill_source_invalid";
|
|
4129
|
+
}
|
|
3289
4130
|
function isSalesNavigatorSessionError(error) {
|
|
3290
4131
|
if (error instanceof SalesNavigatorExportRequestError) {
|
|
3291
4132
|
if (error.errorCode === "invalid_session") {
|
|
@@ -3718,7 +4559,8 @@ async function ensureSalesNavigatorSessionPoolReady(queryUrl, options) {
|
|
|
3718
4559
|
status: claimed ? "ok" : "skipped",
|
|
3719
4560
|
selectedSessionUserEmail: claimed?.userEmail ?? null,
|
|
3720
4561
|
selectedSessionUserHandle: claimed?.userHandle ?? null,
|
|
3721
|
-
selectedSessionCookieSha256: claimed?.sessionCookieSha256 ?? null
|
|
4562
|
+
selectedSessionCookieSha256: claimed?.sessionCookieSha256 ?? null,
|
|
4563
|
+
selectedSessionLastIngestedSource: claimed?.lastIngestedSource ?? null
|
|
3722
4564
|
});
|
|
3723
4565
|
return {
|
|
3724
4566
|
ready: true
|
|
@@ -3949,9 +4791,11 @@ async function executeSalesNavigatorCrawlJob(session, jobId, options) {
|
|
|
3949
4791
|
let nextSessionPoolRetryAt = 0;
|
|
3950
4792
|
let lastSessionPoolReadyAt = 0;
|
|
3951
4793
|
const sessionPoolReadinessCooldownMs = 120_000;
|
|
4794
|
+
let allowRetryClaimBeyondMaxSlices = false;
|
|
4795
|
+
let allowedRetrySliceId = null;
|
|
3952
4796
|
while (true) {
|
|
3953
4797
|
while (!noMoreClaimableWork && inFlight.size < parallelExports) {
|
|
3954
|
-
if (claimedSlices >= options.maxSlices) {
|
|
4798
|
+
if (claimedSlices >= options.maxSlices && !allowRetryClaimBeyondMaxSlices) {
|
|
3955
4799
|
break;
|
|
3956
4800
|
}
|
|
3957
4801
|
if (inFlight.size === 0) {
|
|
@@ -4058,6 +4902,15 @@ async function executeSalesNavigatorCrawlJob(session, jobId, options) {
|
|
|
4058
4902
|
break;
|
|
4059
4903
|
}
|
|
4060
4904
|
const slice = claimed.value.slice;
|
|
4905
|
+
if (claimedSlices >= options.maxSlices &&
|
|
4906
|
+
allowRetryClaimBeyondMaxSlices &&
|
|
4907
|
+
allowedRetrySliceId &&
|
|
4908
|
+
slice.id !== allowedRetrySliceId) {
|
|
4909
|
+
noMoreClaimableWork = true;
|
|
4910
|
+
break;
|
|
4911
|
+
}
|
|
4912
|
+
allowRetryClaimBeyondMaxSlices = false;
|
|
4913
|
+
allowedRetrySliceId = null;
|
|
4061
4914
|
idlePollCount = 0;
|
|
4062
4915
|
activeSlice = slice;
|
|
4063
4916
|
const isNewSlice = !seenSliceIds.has(slice.id);
|
|
@@ -4087,6 +4940,8 @@ async function executeSalesNavigatorCrawlJob(session, jobId, options) {
|
|
|
4087
4940
|
job = completed.value.job;
|
|
4088
4941
|
activeSlice = completed.value.activeSlice;
|
|
4089
4942
|
lastOutcome = completed.value.lastOutcome;
|
|
4943
|
+
allowRetryClaimBeyondMaxSlices = lastOutcome?.outcome === "retryable_failed";
|
|
4944
|
+
allowedRetrySliceId = lastOutcome?.outcome === "retryable_failed" ? completed.value.activeSlice.id : null;
|
|
4090
4945
|
if (completed.value.forceSessionPoolRecheck) {
|
|
4091
4946
|
lastSessionPoolReadyAt = 0;
|
|
4092
4947
|
nextSessionPoolRetryAt = 0;
|
|
@@ -4097,6 +4952,11 @@ async function executeSalesNavigatorCrawlJob(session, jobId, options) {
|
|
|
4097
4952
|
currentSession = status.session;
|
|
4098
4953
|
job = status.value.job;
|
|
4099
4954
|
}
|
|
4955
|
+
else if (!isSalesNavigatorCrawlJobTerminal(job.status)) {
|
|
4956
|
+
const status = await getSalesNavigatorCrawlStatus(currentSession, jobId, options.traceId);
|
|
4957
|
+
currentSession = status.session;
|
|
4958
|
+
job = status.value.job;
|
|
4959
|
+
}
|
|
4100
4960
|
await options.logger?.log("salesnav.crawl.job.completed", {
|
|
4101
4961
|
jobId,
|
|
4102
4962
|
status: job.status,
|
|
@@ -4339,12 +5199,15 @@ async function runWizard(options) {
|
|
|
4339
5199
|
writeWizardLine("Salesprompter");
|
|
4340
5200
|
writeWizardLine("Start with a company website, LinkedIn product page, or category URL. I will guide you from there.");
|
|
4341
5201
|
writeWizardLine();
|
|
4342
|
-
await ensureWizardSession(options);
|
|
4343
5202
|
const rl = createInterface({
|
|
4344
5203
|
input: process.stdin,
|
|
4345
5204
|
output: process.stdout
|
|
4346
5205
|
});
|
|
4347
5206
|
try {
|
|
5207
|
+
const wizardSession = await ensureWizardSession(options);
|
|
5208
|
+
if (wizardSession.session && wizardSession.restoredFromCache) {
|
|
5209
|
+
await confirmWizardWorkspace(rl, wizardSession.session, options);
|
|
5210
|
+
}
|
|
4348
5211
|
const flow = await promptChoice(rl, "What do you want help with?", [
|
|
4349
5212
|
{
|
|
4350
5213
|
value: "product-market",
|
|
@@ -4406,6 +5269,15 @@ function buildCliError(error) {
|
|
|
4406
5269
|
};
|
|
4407
5270
|
}
|
|
4408
5271
|
const message = error instanceof Error ? error.message : "Unknown error";
|
|
5272
|
+
if (message === "linkedin_session_invalid" ||
|
|
5273
|
+
isLinkedInCompanyBackfillInvalidSessionMessage(message) ||
|
|
5274
|
+
/no eligible linkedin session cookies available for company backfill|company session preflight returned/i.test(message)) {
|
|
5275
|
+
return {
|
|
5276
|
+
status: "error",
|
|
5277
|
+
code: "runtime_error",
|
|
5278
|
+
message: buildLinkedInCompanyBackfillSessionRecoveryMessage([])
|
|
5279
|
+
};
|
|
5280
|
+
}
|
|
4409
5281
|
if (message.includes("not logged in")) {
|
|
4410
5282
|
return {
|
|
4411
5283
|
status: "error",
|
|
@@ -4494,6 +5366,7 @@ const domainDecisionArraySchema = z.array(z.object({
|
|
|
4494
5366
|
reason: z.enum([
|
|
4495
5367
|
"linkedin-domain",
|
|
4496
5368
|
"linkedin-website",
|
|
5369
|
+
"better-company-match",
|
|
4497
5370
|
"highest-hunter-count",
|
|
4498
5371
|
"fallback-first-non-null",
|
|
4499
5372
|
"no-domain"
|
|
@@ -4777,13 +5650,15 @@ program
|
|
|
4777
5650
|
if (rows.length === 0) {
|
|
4778
5651
|
throw new Error("No contact rows found. Provide TSV/CSV/JSON input via --in or stdin.");
|
|
4779
5652
|
}
|
|
5653
|
+
let authSession = null;
|
|
4780
5654
|
let sessionOrgId = "";
|
|
4781
5655
|
if (!shouldBypassAuth()) {
|
|
4782
5656
|
try {
|
|
4783
|
-
|
|
4784
|
-
sessionOrgId =
|
|
5657
|
+
authSession = await requireAuthSession();
|
|
5658
|
+
sessionOrgId = authSession.user.orgId ?? "";
|
|
4785
5659
|
}
|
|
4786
5660
|
catch {
|
|
5661
|
+
authSession = null;
|
|
4787
5662
|
sessionOrgId = "";
|
|
4788
5663
|
}
|
|
4789
5664
|
}
|
|
@@ -4810,25 +5685,119 @@ program
|
|
|
4810
5685
|
orgId: String(options.orgId ?? "").trim() || undefined
|
|
4811
5686
|
});
|
|
4812
5687
|
let directAttempted = false;
|
|
5688
|
+
let workflowAttempted = false;
|
|
4813
5689
|
const missingRows = enrichedRows.filter((row) => !row.found);
|
|
4814
5690
|
if (missingRows.length > 0) {
|
|
4815
|
-
directAttempted = true;
|
|
4816
5691
|
const directContacts = contacts.filter((contact) => missingRows.some((row) => row.contactId === contact.contact_id));
|
|
4817
|
-
|
|
4818
|
-
|
|
4819
|
-
|
|
4820
|
-
|
|
4821
|
-
|
|
4822
|
-
|
|
4823
|
-
|
|
4824
|
-
|
|
4825
|
-
|
|
4826
|
-
|
|
4827
|
-
|
|
4828
|
-
|
|
4829
|
-
|
|
5692
|
+
let linkedInUrlByContactId = new Map();
|
|
5693
|
+
try {
|
|
5694
|
+
directAttempted = true;
|
|
5695
|
+
const result = await invokeLinkedInUrlEnrichmentDirect({
|
|
5696
|
+
contacts: directContacts,
|
|
5697
|
+
timeoutMs
|
|
5698
|
+
});
|
|
5699
|
+
linkedInUrlByContactId = new Map(result.contacts.map((contact) => [
|
|
5700
|
+
contact.contact_id,
|
|
5701
|
+
{
|
|
5702
|
+
linkedinUrl: contact.linkedin_url ?? null,
|
|
5703
|
+
salesNavProfileUrl: contact.sales_nav_profile_url ?? null,
|
|
5704
|
+
linkedinCompanyUrl: null,
|
|
5705
|
+
salesNavCompanyUrl: null
|
|
5706
|
+
}
|
|
5707
|
+
]));
|
|
5708
|
+
for (const row of enrichedRows) {
|
|
5709
|
+
if (row.found)
|
|
5710
|
+
continue;
|
|
5711
|
+
const profile = linkedInUrlByContactId.get(row.contactId);
|
|
5712
|
+
if (profile?.linkedinUrl) {
|
|
5713
|
+
row.linkedinUrl = profile.linkedinUrl;
|
|
5714
|
+
row.salesNavProfileUrl = profile.salesNavProfileUrl ?? row.salesNavProfileUrl ?? null;
|
|
5715
|
+
row.found = true;
|
|
5716
|
+
row.source = "linkedin-direct";
|
|
5717
|
+
}
|
|
5718
|
+
}
|
|
5719
|
+
}
|
|
5720
|
+
catch (error) {
|
|
5721
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
5722
|
+
if (!/Missing LinkedIn direct lookup session/i.test(message)) {
|
|
5723
|
+
throw error;
|
|
5724
|
+
}
|
|
5725
|
+
workflowAttempted = true;
|
|
5726
|
+
const workflow = await invokeLinkedInUrlEnrichmentWorkflow({
|
|
5727
|
+
contacts: directContacts,
|
|
5728
|
+
externalUserId: String(options.orgId ?? "").trim() || sessionOrgId || "cli_direct_lookup",
|
|
5729
|
+
timeoutMs
|
|
5730
|
+
});
|
|
5731
|
+
if (!workflow.response.ok) {
|
|
5732
|
+
throw new Error(`LinkedIn enrichment workflow returned ${workflow.response.status}: ${workflow.bodyText.slice(0, 300)}`);
|
|
5733
|
+
}
|
|
5734
|
+
linkedInUrlByContactId = normalizeWorkflowLinkedInUrlResult({
|
|
5735
|
+
parsedBody: workflow.parsedBody,
|
|
5736
|
+
contacts: directContacts
|
|
5737
|
+
});
|
|
5738
|
+
for (const row of enrichedRows) {
|
|
5739
|
+
if (row.found)
|
|
5740
|
+
continue;
|
|
5741
|
+
const profile = linkedInUrlByContactId.get(row.contactId);
|
|
5742
|
+
if (profile?.linkedinUrl) {
|
|
5743
|
+
row.linkedinUrl = profile.linkedinUrl;
|
|
5744
|
+
row.salesNavProfileUrl = profile.salesNavProfileUrl ?? row.salesNavProfileUrl ?? null;
|
|
5745
|
+
row.linkedinCompanyUrl = profile.linkedinCompanyUrl ?? row.linkedinCompanyUrl ?? null;
|
|
5746
|
+
row.salesNavCompanyUrl = profile.salesNavCompanyUrl ?? row.salesNavCompanyUrl ?? null;
|
|
5747
|
+
row.found = true;
|
|
5748
|
+
row.companyFound = Boolean(row.linkedinCompanyUrl || row.salesNavCompanyUrl);
|
|
5749
|
+
row.source = "workflow";
|
|
5750
|
+
row.companySource =
|
|
5751
|
+
row.linkedinCompanyUrl || row.salesNavCompanyUrl ? "workflow" : row.companySource ?? null;
|
|
5752
|
+
}
|
|
5753
|
+
}
|
|
5754
|
+
}
|
|
5755
|
+
}
|
|
5756
|
+
const parsedClientIds = Array.from(new Set(rows
|
|
5757
|
+
.map((row) => Number(row.clientId))
|
|
5758
|
+
.filter((value) => Number.isFinite(value) && value > 0)));
|
|
5759
|
+
if (authSession && parsedClientIds.length === 1) {
|
|
5760
|
+
try {
|
|
5761
|
+
const uniqueCompanies = Array.from(new Map(contacts
|
|
5762
|
+
.filter((contact) => !contact.isVariation)
|
|
5763
|
+
.map((contact) => {
|
|
5764
|
+
const key = normalizeLookupCompanyForCleaning(contact.companyNameOriginal ?? contact.companyName);
|
|
5765
|
+
return [
|
|
5766
|
+
key,
|
|
5767
|
+
{
|
|
5768
|
+
companyId: contact.contact_id,
|
|
5769
|
+
companyName: contact.companyNameOriginal ?? contact.companyName,
|
|
5770
|
+
companyNameCleaned: cleanedCompanyMap.get(key) ?? normalizeLookupWhitespace(contact.companyNameOriginal ?? contact.companyName)
|
|
5771
|
+
}
|
|
5772
|
+
];
|
|
5773
|
+
})).values());
|
|
5774
|
+
if (uniqueCompanies.length > 0) {
|
|
5775
|
+
const appCompanyResult = await enrichDirectEmailCompaniesViaApp(authSession, {
|
|
5776
|
+
clientId: parsedClientIds[0],
|
|
5777
|
+
companies: uniqueCompanies
|
|
5778
|
+
});
|
|
5779
|
+
const companyByNormalizedName = new Map(appCompanyResult.companies.map((company) => [
|
|
5780
|
+
normalizeLookupCompanyForCleaning(company.companyName),
|
|
5781
|
+
company.linkedinCompanyPage ?? null
|
|
5782
|
+
]));
|
|
5783
|
+
for (const row of enrichedRows) {
|
|
5784
|
+
if (row.linkedinCompanyUrl) {
|
|
5785
|
+
continue;
|
|
5786
|
+
}
|
|
5787
|
+
const normalizedName = normalizeLookupCompanyForCleaning(row.companyName);
|
|
5788
|
+
const linkedinCompanyUrl = companyByNormalizedName.get(normalizedName) ?? null;
|
|
5789
|
+
if (!linkedinCompanyUrl) {
|
|
5790
|
+
continue;
|
|
5791
|
+
}
|
|
5792
|
+
row.linkedinCompanyUrl = linkedinCompanyUrl;
|
|
5793
|
+
row.companyFound = true;
|
|
5794
|
+
row.companySource = "workflow";
|
|
5795
|
+
}
|
|
4830
5796
|
}
|
|
4831
5797
|
}
|
|
5798
|
+
catch (error) {
|
|
5799
|
+
writeProgress(`Skipping app-backed company enrichment: ${error instanceof Error ? error.message : String(error)}`);
|
|
5800
|
+
}
|
|
4832
5801
|
}
|
|
4833
5802
|
try {
|
|
4834
5803
|
const companyResult = await invokeLinkedInCompanyEnrichmentDirect({
|
|
@@ -4839,6 +5808,7 @@ program
|
|
|
4839
5808
|
contact.contact_id,
|
|
4840
5809
|
{
|
|
4841
5810
|
linkedinCompanyUrl: contact.linkedin_company_url ?? null,
|
|
5811
|
+
salesNavCompanyUrl: contact.sales_nav_company_url ?? null,
|
|
4842
5812
|
matchedCompanyName: contact.matched_company_name ?? null,
|
|
4843
5813
|
matchedCompanyEmployeeCount: contact.matched_company_employee_count ?? null
|
|
4844
5814
|
}
|
|
@@ -4849,8 +5819,10 @@ program
|
|
|
4849
5819
|
continue;
|
|
4850
5820
|
}
|
|
4851
5821
|
row.linkedinCompanyUrl = company.linkedinCompanyUrl;
|
|
4852
|
-
row.
|
|
4853
|
-
row.
|
|
5822
|
+
row.salesNavCompanyUrl = company.salesNavCompanyUrl ?? row.salesNavCompanyUrl ?? null;
|
|
5823
|
+
row.companyFound = Boolean(company.linkedinCompanyUrl || company.salesNavCompanyUrl);
|
|
5824
|
+
row.companySource =
|
|
5825
|
+
company.linkedinCompanyUrl || company.salesNavCompanyUrl ? "linkedin-direct" : row.companySource ?? null;
|
|
4854
5826
|
row.matchedCompanyName = company.matchedCompanyName ?? row.matchedCompanyName ?? null;
|
|
4855
5827
|
row.matchedCompanyEmployeeCount =
|
|
4856
5828
|
company.matchedCompanyEmployeeCount ?? row.matchedCompanyEmployeeCount ?? null;
|
|
@@ -4866,6 +5838,7 @@ program
|
|
|
4866
5838
|
found: enrichedRows.filter((row) => row.found).length,
|
|
4867
5839
|
companiesFound: enrichedRows.filter((row) => row.companyFound).length,
|
|
4868
5840
|
directAttempted,
|
|
5841
|
+
workflowAttempted,
|
|
4869
5842
|
rows: enrichedRows
|
|
4870
5843
|
};
|
|
4871
5844
|
if (options.out) {
|
|
@@ -4873,6 +5846,164 @@ program
|
|
|
4873
5846
|
}
|
|
4874
5847
|
printOutput(payload);
|
|
4875
5848
|
});
|
|
5849
|
+
program
|
|
5850
|
+
.command("companies:find-linkedin-urls")
|
|
5851
|
+
.alias("companies:resolve-linkedin-urls")
|
|
5852
|
+
.description("Resolve LinkedIn company URLs from a pasted company list directly in the CLI.")
|
|
5853
|
+
.option("--in <path>", "Input TSV/CSV/JSON file path. Omit to read from stdin.")
|
|
5854
|
+
.option("--out <path>", "Optional output JSON path for the enriched rows.")
|
|
5855
|
+
.option("--client-id <id>", "Optional clientId override for app-backed enrichment.")
|
|
5856
|
+
.option("--timeout-ms <number>", "Lookup timeout in milliseconds", "30000")
|
|
5857
|
+
.option("--company-cleaning <mode>", "Company cleaning mode: off, basic, or ai", "basic")
|
|
5858
|
+
.option("--dry-run", "Preview the normalized payload without calling LinkedIn", false)
|
|
5859
|
+
.action(async (options) => {
|
|
5860
|
+
const timeoutMs = z.coerce.number().int().min(1000).max(300000).parse(options.timeoutMs);
|
|
5861
|
+
const inputContent = options.in ? await readFile(options.in, "utf8") : await readAllStdin();
|
|
5862
|
+
const rows = parseLinkedInCompanyLookupInput(inputContent);
|
|
5863
|
+
if (rows.length === 0) {
|
|
5864
|
+
throw new Error("No company rows found. Provide TSV/CSV/JSON input via --in or stdin.");
|
|
5865
|
+
}
|
|
5866
|
+
let authSession = null;
|
|
5867
|
+
if (!shouldBypassAuth()) {
|
|
5868
|
+
authSession = await requireAuthSession().catch(() => null);
|
|
5869
|
+
}
|
|
5870
|
+
const companyCleaningMode = resolveCompanyCleaningMode(String(options.companyCleaning ?? process.env.SALESPROMPTER_COMPANY_CLEANING_MODE ?? "basic"));
|
|
5871
|
+
const lookupRows = rows.map((row) => ({
|
|
5872
|
+
clientId: row.clientId,
|
|
5873
|
+
fullName: "",
|
|
5874
|
+
companyName: row.companyName
|
|
5875
|
+
}));
|
|
5876
|
+
const cleanedCompanyMap = await buildCompanyNameCleaningMap(lookupRows, companyCleaningMode);
|
|
5877
|
+
const contacts = toLinkedInUrlLookupContacts(lookupRows, cleanedCompanyMap);
|
|
5878
|
+
if (options.dryRun) {
|
|
5879
|
+
const payload = {
|
|
5880
|
+
status: "ok",
|
|
5881
|
+
dryRun: true,
|
|
5882
|
+
companyCleaningMode,
|
|
5883
|
+
companies: contacts.length,
|
|
5884
|
+
sample: contacts.slice(0, 5).map((contact) => ({
|
|
5885
|
+
companyId: contact.contact_id,
|
|
5886
|
+
companyName: contact.companyNameOriginal ?? contact.companyName,
|
|
5887
|
+
companyNameCleaned: contact.companyName
|
|
5888
|
+
}))
|
|
5889
|
+
};
|
|
5890
|
+
if (options.out) {
|
|
5891
|
+
await writeJsonFile(options.out, payload);
|
|
5892
|
+
}
|
|
5893
|
+
printOutput(payload);
|
|
5894
|
+
return;
|
|
5895
|
+
}
|
|
5896
|
+
const clientId = resolveDirectEmailEnrichmentClientId(rows.map((row) => ({
|
|
5897
|
+
clientId: row.clientId,
|
|
5898
|
+
companyName: row.companyName,
|
|
5899
|
+
fullName: ""
|
|
5900
|
+
})), options.clientId);
|
|
5901
|
+
const results = contacts
|
|
5902
|
+
.filter((contact) => !contact.isVariation)
|
|
5903
|
+
.map((contact) => ({
|
|
5904
|
+
clientId: String(clientId),
|
|
5905
|
+
companyName: contact.companyNameOriginal ?? contact.companyName,
|
|
5906
|
+
linkedinCompanyUrl: null,
|
|
5907
|
+
salesNavCompanyUrl: null,
|
|
5908
|
+
domain: null,
|
|
5909
|
+
found: false,
|
|
5910
|
+
source: null,
|
|
5911
|
+
matchedCompanyName: null,
|
|
5912
|
+
matchedCompanyEmployeeCount: null
|
|
5913
|
+
}));
|
|
5914
|
+
const resultByNormalizedName = new Map(results.map((row) => [normalizeLookupCompanyForCleaning(row.companyName), row]));
|
|
5915
|
+
if (authSession) {
|
|
5916
|
+
try {
|
|
5917
|
+
const uniqueCompanies = contacts
|
|
5918
|
+
.filter((contact) => !contact.isVariation)
|
|
5919
|
+
.map((contact) => ({
|
|
5920
|
+
companyId: contact.contact_id,
|
|
5921
|
+
companyName: contact.companyNameOriginal ?? contact.companyName,
|
|
5922
|
+
companyNameCleaned: cleanedCompanyMap.get(normalizeLookupCompanyForCleaning(contact.companyNameOriginal ?? contact.companyName)) ?? normalizeLookupWhitespace(contact.companyNameOriginal ?? contact.companyName)
|
|
5923
|
+
}));
|
|
5924
|
+
if (uniqueCompanies.length > 0) {
|
|
5925
|
+
const enrichedCompanies = await enrichDirectEmailCompaniesViaApp(authSession, {
|
|
5926
|
+
clientId,
|
|
5927
|
+
companies: uniqueCompanies
|
|
5928
|
+
});
|
|
5929
|
+
for (const company of enrichedCompanies.companies) {
|
|
5930
|
+
const row = resultByNormalizedName.get(normalizeLookupCompanyForCleaning(company.companyName));
|
|
5931
|
+
if (!row) {
|
|
5932
|
+
continue;
|
|
5933
|
+
}
|
|
5934
|
+
row.domain = company.domain ?? row.domain ?? null;
|
|
5935
|
+
row.linkedinCompanyUrl = company.linkedinCompanyPage ?? row.linkedinCompanyUrl ?? null;
|
|
5936
|
+
row.found = Boolean(row.linkedinCompanyUrl || row.salesNavCompanyUrl);
|
|
5937
|
+
row.source = row.linkedinCompanyUrl ? "app" : row.source;
|
|
5938
|
+
}
|
|
5939
|
+
}
|
|
5940
|
+
}
|
|
5941
|
+
catch {
|
|
5942
|
+
// Ignore app failures here and keep falling back to direct or public lookup.
|
|
5943
|
+
}
|
|
5944
|
+
}
|
|
5945
|
+
try {
|
|
5946
|
+
const companyResult = await invokeLinkedInCompanyEnrichmentDirect({
|
|
5947
|
+
contacts,
|
|
5948
|
+
timeoutMs
|
|
5949
|
+
});
|
|
5950
|
+
const companyByContactId = new Map(companyResult.contacts.map((contact) => [contact.contact_id, contact]));
|
|
5951
|
+
for (const contact of contacts) {
|
|
5952
|
+
if (contact.isVariation) {
|
|
5953
|
+
continue;
|
|
5954
|
+
}
|
|
5955
|
+
const row = resultByNormalizedName.get(normalizeLookupCompanyForCleaning(contact.companyNameOriginal ?? contact.companyName));
|
|
5956
|
+
const company = companyByContactId.get(contact.contact_id);
|
|
5957
|
+
if (!row || !company) {
|
|
5958
|
+
continue;
|
|
5959
|
+
}
|
|
5960
|
+
if (!row.linkedinCompanyUrl && company.linkedin_company_url) {
|
|
5961
|
+
row.linkedinCompanyUrl = company.linkedin_company_url;
|
|
5962
|
+
}
|
|
5963
|
+
if (!row.salesNavCompanyUrl && company.sales_nav_company_url) {
|
|
5964
|
+
row.salesNavCompanyUrl = company.sales_nav_company_url;
|
|
5965
|
+
}
|
|
5966
|
+
row.matchedCompanyName = company.matched_company_name ?? row.matchedCompanyName ?? null;
|
|
5967
|
+
row.matchedCompanyEmployeeCount =
|
|
5968
|
+
company.matched_company_employee_count ?? row.matchedCompanyEmployeeCount ?? null;
|
|
5969
|
+
if ((company.linkedin_company_url || company.sales_nav_company_url) && row.source == null) {
|
|
5970
|
+
row.source = "linkedin-direct";
|
|
5971
|
+
}
|
|
5972
|
+
row.found = Boolean(row.linkedinCompanyUrl || row.salesNavCompanyUrl);
|
|
5973
|
+
}
|
|
5974
|
+
}
|
|
5975
|
+
catch (error) {
|
|
5976
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
5977
|
+
if (!/Missing LinkedIn direct lookup session/i.test(message)) {
|
|
5978
|
+
throw error;
|
|
5979
|
+
}
|
|
5980
|
+
}
|
|
5981
|
+
for (const row of results) {
|
|
5982
|
+
if (row.linkedinCompanyUrl) {
|
|
5983
|
+
continue;
|
|
5984
|
+
}
|
|
5985
|
+
const linkedinCompanyUrl = (await searchSerperLinkedInCompanyUrl(row.companyName, timeoutMs)) ??
|
|
5986
|
+
(await searchPublicLinkedInCompanyUrl(row.companyName, timeoutMs));
|
|
5987
|
+
if (!linkedinCompanyUrl) {
|
|
5988
|
+
continue;
|
|
5989
|
+
}
|
|
5990
|
+
row.linkedinCompanyUrl = linkedinCompanyUrl;
|
|
5991
|
+
row.found = true;
|
|
5992
|
+
if (row.source == null) {
|
|
5993
|
+
row.source = "web-search";
|
|
5994
|
+
}
|
|
5995
|
+
}
|
|
5996
|
+
const payload = {
|
|
5997
|
+
status: "ok",
|
|
5998
|
+
requested: rows.length,
|
|
5999
|
+
found: results.filter((row) => row.found).length,
|
|
6000
|
+
rows: results
|
|
6001
|
+
};
|
|
6002
|
+
if (options.out) {
|
|
6003
|
+
await writeJsonFile(options.out, payload);
|
|
6004
|
+
}
|
|
6005
|
+
printOutput(payload);
|
|
6006
|
+
});
|
|
4876
6007
|
program
|
|
4877
6008
|
.command("auth:logout")
|
|
4878
6009
|
.description("Remove local CLI auth session.")
|
|
@@ -4893,6 +6024,7 @@ program.hook("preAction", async (_thisCommand, actionCommand) => {
|
|
|
4893
6024
|
commandName === "wizard" ||
|
|
4894
6025
|
commandName === "llm:ready" ||
|
|
4895
6026
|
commandName === "contacts:find-linkedin-urls" ||
|
|
6027
|
+
commandName === "companies:find-linkedin-urls" ||
|
|
4896
6028
|
commandName.startsWith("packs:") ||
|
|
4897
6029
|
((commandName === "list" || commandName === "add") && parentCommandName === "packs")) {
|
|
4898
6030
|
return;
|
|
@@ -5176,16 +6308,21 @@ program
|
|
|
5176
6308
|
.command("linkedin-companies:backfill")
|
|
5177
6309
|
.alias("companies:enrich")
|
|
5178
6310
|
.description("Backfill missing or unavailable company profiles for the current workspace.")
|
|
5179
|
-
.
|
|
6311
|
+
.option("--client-id <number>", "Legacy BigQuery clientId to backfill (optional if set in cache or env)")
|
|
5180
6312
|
.option("--limit <number>", "Maximum companies to scrape in one run", "25")
|
|
5181
6313
|
.option("--concurrency <number>", "How many LinkedIn company pages to scrape in parallel", "4")
|
|
5182
6314
|
.option("--dry-run", "Preview the scrape result and generated MERGE SQL without writing to BigQuery", false)
|
|
5183
6315
|
.action(async (options) => {
|
|
5184
|
-
const
|
|
6316
|
+
const authenticatedRun = !shouldBypassAuth() && !options.dryRun;
|
|
6317
|
+
const session = authenticatedRun ? await requireAuthSession() : undefined;
|
|
6318
|
+
const clientId = await resolveLinkedInCompanyBackfillClientId({
|
|
6319
|
+
clientIdOption: options.clientId,
|
|
6320
|
+
session
|
|
6321
|
+
});
|
|
5185
6322
|
const limit = z.coerce.number().int().min(1).max(500).parse(options.limit);
|
|
5186
6323
|
const concurrency = z.coerce.number().int().min(1).max(20).parse(options.concurrency);
|
|
5187
|
-
|
|
5188
|
-
|
|
6324
|
+
await writeLinkedInCompanyBackfillClientIdToCache(clientId, session);
|
|
6325
|
+
if (authenticatedRun && session) {
|
|
5189
6326
|
const drained = await drainLinkedInCompanyBackfill(session, {
|
|
5190
6327
|
clientId,
|
|
5191
6328
|
limit
|
|
@@ -6983,7 +8120,17 @@ async function main() {
|
|
|
6983
8120
|
}
|
|
6984
8121
|
await program.parseAsync(process.argv);
|
|
6985
8122
|
}
|
|
6986
|
-
|
|
8123
|
+
async function closeGlobalHttpDispatcher() {
|
|
8124
|
+
try {
|
|
8125
|
+
const undici = await import("undici");
|
|
8126
|
+
await undici.getGlobalDispatcher().close();
|
|
8127
|
+
}
|
|
8128
|
+
catch {
|
|
8129
|
+
// Best-effort shutdown for keep-alive sockets; ignore when undici is unavailable.
|
|
8130
|
+
}
|
|
8131
|
+
}
|
|
8132
|
+
main()
|
|
8133
|
+
.catch((error) => {
|
|
6987
8134
|
if (error instanceof Error &&
|
|
6988
8135
|
(error.message === "prompt cancelled" || error.message === "readline was closed")) {
|
|
6989
8136
|
process.exitCode = 130;
|
|
@@ -6998,4 +8145,8 @@ main().catch((error) => {
|
|
|
6998
8145
|
process.stderr.write(`${cliError.message}\n`);
|
|
6999
8146
|
}
|
|
7000
8147
|
process.exitCode = exitCodeForError(cliError.code);
|
|
8148
|
+
})
|
|
8149
|
+
.finally(async () => {
|
|
8150
|
+
await closeGlobalHttpDispatcher();
|
|
8151
|
+
process.exit(process.exitCode ?? 0);
|
|
7001
8152
|
});
|