salesprompter-cli 0.1.19 → 0.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -280
- package/dist/cli.js +1618 -128
- package/dist/linkedin-products.js +29 -8
- package/dist/linkedin-session.js +751 -0
- package/dist/sales-navigator.js +207 -36
- package/dist/salesnav-backfill.js +710 -0
- package/package.json +13 -1
package/dist/cli.js
CHANGED
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { spawn } from "node:child_process";
|
|
3
|
-
import { access } from "node:fs/promises";
|
|
3
|
+
import { access, appendFile, mkdir, readFile, writeFile } from "node:fs/promises";
|
|
4
4
|
import { createRequire } from "node:module";
|
|
5
|
+
import os from "node:os";
|
|
5
6
|
import path from "node:path";
|
|
6
7
|
import { emitKeypressEvents } from "node:readline";
|
|
7
8
|
import { createInterface } from "node:readline/promises";
|
|
8
9
|
import { setTimeout as delay } from "node:timers/promises";
|
|
10
|
+
import { createClient } from "@supabase/supabase-js";
|
|
9
11
|
import { Command } from "commander";
|
|
10
12
|
import { z } from "zod";
|
|
11
13
|
import { clearAuthSession, loginWithBrowserConnect, loginWithDeviceFlow, loginWithToken, requireAuthSession, shouldBypassAuth, verifySession } from "./auth.js";
|
|
@@ -18,9 +20,11 @@ import { analyzeHistoricalQueries } from "./historical-queries.js";
|
|
|
18
20
|
import { buildHistoricalVendorIcp, buildVendorIcp } from "./icp-templates.js";
|
|
19
21
|
import { InstantlySyncProvider } from "./instantly.js";
|
|
20
22
|
import { crawlLinkedInProductCategory } from "./linkedin-products.js";
|
|
23
|
+
import { claimValidatedSalesNavigatorSessionCookieForCli } from "./linkedin-session.js";
|
|
21
24
|
import { buildLeadlistsFunnelQueries } from "./leadlists-funnel.js";
|
|
22
25
|
import { readJsonFile, splitCsv, writeJsonFile, writeTextFile } from "./io.js";
|
|
23
|
-
import { buildSalesNavigatorCrawlPreview, createSalesNavigatorCrawlSeed, DEFAULT_SALES_NAVIGATOR_CRAWL_DIMENSIONS, buildSalesNavigatorPeopleSlice, expandSalesNavigatorCrawlAttempt, SalesNavigatorSliceTooBroadError } from "./sales-navigator.js";
|
|
26
|
+
import { buildSalesNavigatorCrawlPreview, createSalesNavigatorCrawlSeed, DEFAULT_SALES_NAVIGATOR_CRAWL_DIMENSIONS, buildSalesNavigatorPeopleSlice, deriveSalesNavigatorTitleQuerySeeds, expandSalesNavigatorCrawlAttempt, SalesNavigatorSliceTooBroadError } from "./sales-navigator.js";
|
|
27
|
+
import { buildSalesNavigatorHistoricalBackfillPlan, ensureSalesNavigatorPeopleCount, resolveSalesNavigatorHistoricalBackfillConfig, resolveSalesNavigatorHistoricalBackfillResumeState, resolveSalesNavigatorHistoricalBackfillOrgId, salesNavigatorHistoricalBackfillDefaults } from "./salesnav-backfill.js";
|
|
24
28
|
const require = createRequire(import.meta.url);
|
|
25
29
|
const { version: packageVersion } = require("../package.json");
|
|
26
30
|
const program = new Command();
|
|
@@ -50,12 +54,31 @@ const LinkedInProductIngestResponseSchema = z.object({
|
|
|
50
54
|
upserted: z.number().int().nonnegative(),
|
|
51
55
|
totalInCatalog: z.number().int().nonnegative().optional()
|
|
52
56
|
});
|
|
57
|
+
const SalesNavigatorLaunchDiagnosticsSchema = z.object({
|
|
58
|
+
orderedCandidateAgentIds: z.array(z.string().min(1)),
|
|
59
|
+
runningAgentIds: z.array(z.string().min(1)),
|
|
60
|
+
busyAgentIds: z.array(z.string().min(1)),
|
|
61
|
+
selectedAgent: z.object({
|
|
62
|
+
id: z.string().min(1),
|
|
63
|
+
name: z.string().min(1),
|
|
64
|
+
maxParallelism: z.number().int().nullable(),
|
|
65
|
+
fileMgmt: z.string().min(1).nullable(),
|
|
66
|
+
hasWebhook: z.boolean(),
|
|
67
|
+
hasStoredSessionCookie: z.boolean(),
|
|
68
|
+
storedIdentityCount: z.number().int().nonnegative(),
|
|
69
|
+
supportsDirectSessionInjection: z.boolean()
|
|
70
|
+
})
|
|
71
|
+
});
|
|
53
72
|
const SalesNavigatorExportStartResponseSchema = z.object({
|
|
54
73
|
status: z.literal("accepted"),
|
|
55
74
|
runId: z.string().min(1),
|
|
56
75
|
exportStatus: z.literal("pending"),
|
|
57
76
|
agentId: z.string().min(1),
|
|
58
77
|
containerId: z.string().min(1),
|
|
78
|
+
selectedSessionCookieSha256: z.string().min(1).nullable().optional(),
|
|
79
|
+
selectedSessionUserEmail: z.string().min(1).nullable().optional(),
|
|
80
|
+
selectedSessionUserHandle: z.string().min(1).nullable().optional(),
|
|
81
|
+
launchDiagnostics: SalesNavigatorLaunchDiagnosticsSchema.nullable().optional(),
|
|
59
82
|
sourceQueryUrl: z.string().url(),
|
|
60
83
|
slicedQueryUrl: z.string().url(),
|
|
61
84
|
previousContainerId: z.string().min(1).nullable().optional()
|
|
@@ -79,6 +102,10 @@ const SalesNavigatorExportRunSchema = z.object({
|
|
|
79
102
|
resultCsvUrl: z.string().url().nullable().optional(),
|
|
80
103
|
agentId: z.string().min(1),
|
|
81
104
|
containerId: z.string().min(1),
|
|
105
|
+
selectedSessionCookieSha256: z.string().min(1).nullable().optional(),
|
|
106
|
+
selectedSessionUserEmail: z.string().min(1).nullable().optional(),
|
|
107
|
+
selectedSessionUserHandle: z.string().min(1).nullable().optional(),
|
|
108
|
+
launchDiagnostics: SalesNavigatorLaunchDiagnosticsSchema.nullable().optional(),
|
|
82
109
|
sourceQueryUrl: z.string().url(),
|
|
83
110
|
slicedQueryUrl: z.string().url(),
|
|
84
111
|
createdAt: z.string().datetime(),
|
|
@@ -99,6 +126,10 @@ const SalesNavigatorExportResponseSchema = z.object({
|
|
|
99
126
|
resultCsvUrl: z.string().url().nullable().optional(),
|
|
100
127
|
agentId: z.string().min(1),
|
|
101
128
|
containerId: z.string().min(1),
|
|
129
|
+
selectedSessionCookieSha256: z.string().min(1).nullable().optional(),
|
|
130
|
+
selectedSessionUserEmail: z.string().min(1).nullable().optional(),
|
|
131
|
+
selectedSessionUserHandle: z.string().min(1).nullable().optional(),
|
|
132
|
+
launchDiagnostics: SalesNavigatorLaunchDiagnosticsSchema.nullable().optional(),
|
|
102
133
|
sourceQueryUrl: z.string().url(),
|
|
103
134
|
slicedQueryUrl: z.string().url()
|
|
104
135
|
});
|
|
@@ -188,6 +219,12 @@ function printOutput(value) {
|
|
|
188
219
|
const space = runtimeOutputOptions.json ? undefined : 2;
|
|
189
220
|
process.stdout.write(`${JSON.stringify(value, null, space)}\n`);
|
|
190
221
|
}
|
|
222
|
+
function writeProgress(message) {
|
|
223
|
+
if (runtimeOutputOptions.json || runtimeOutputOptions.quiet) {
|
|
224
|
+
return;
|
|
225
|
+
}
|
|
226
|
+
process.stderr.write(`${message}\n`);
|
|
227
|
+
}
|
|
191
228
|
function applyGlobalOutputOptions(actionCommand) {
|
|
192
229
|
const globalOptions = actionCommand.optsWithGlobals();
|
|
193
230
|
runtimeOutputOptions.json = Boolean(globalOptions.json);
|
|
@@ -687,6 +724,430 @@ async function fetchWorkspaceLeadSearch(session, requestBody) {
|
|
|
687
724
|
function buildLinkedInProductsOutputPath(categorySlug) {
|
|
688
725
|
return `./data/linkedin-products-${categorySlug}.json`;
|
|
689
726
|
}
|
|
727
|
+
function buildLinkedInProductCategorySalesNavigatorOutputPath(categorySlug) {
|
|
728
|
+
return `./data/salesnav-product-category-${categorySlug}.json`;
|
|
729
|
+
}
|
|
730
|
+
const SALES_NAVIGATOR_TERMINAL_JOB_STATUSES = new Set(["completed", "completed_with_failures"]);
|
|
731
|
+
function isSalesNavigatorCrawlJobTerminal(status) {
|
|
732
|
+
return SALES_NAVIGATOR_TERMINAL_JOB_STATUSES.has(status);
|
|
733
|
+
}
|
|
734
|
+
function buildWorkflowTraceId(prefix) {
|
|
735
|
+
return `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 10)}`;
|
|
736
|
+
}
|
|
737
|
+
function buildSalesNavigatorWorkflowLogPath(input) {
|
|
738
|
+
const slug = slugify(input) || "salesnav-product-category";
|
|
739
|
+
return `./data/${slug}-salesnav.log.jsonl`;
|
|
740
|
+
}
|
|
741
|
+
function buildSalesNavigatorCrawlLogPath(input) {
|
|
742
|
+
const slug = slugify(input) || "salesnav-crawl";
|
|
743
|
+
return `./data/${slug}-crawl.log.jsonl`;
|
|
744
|
+
}
|
|
745
|
+
function decodeSalesNavigatorQueryParam(url) {
|
|
746
|
+
try {
|
|
747
|
+
const encoded = new URL(url).searchParams.get("query");
|
|
748
|
+
if (!encoded) {
|
|
749
|
+
return null;
|
|
750
|
+
}
|
|
751
|
+
try {
|
|
752
|
+
return decodeURIComponent(encoded);
|
|
753
|
+
}
|
|
754
|
+
catch {
|
|
755
|
+
return encoded;
|
|
756
|
+
}
|
|
757
|
+
}
|
|
758
|
+
catch {
|
|
759
|
+
return null;
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
async function createWorkflowLogger(options) {
|
|
763
|
+
const traceId = options.traceId ?? buildWorkflowTraceId("salesprompter-cli");
|
|
764
|
+
const logPath = options.logPath;
|
|
765
|
+
await mkdir(path.dirname(logPath), { recursive: true });
|
|
766
|
+
return {
|
|
767
|
+
traceId,
|
|
768
|
+
logPath,
|
|
769
|
+
log: async (event, metadata = {}) => {
|
|
770
|
+
const entry = {
|
|
771
|
+
timestamp: new Date().toISOString(),
|
|
772
|
+
traceId,
|
|
773
|
+
event,
|
|
774
|
+
metadata
|
|
775
|
+
};
|
|
776
|
+
await appendFile(logPath, `${JSON.stringify(entry)}\n`, "utf8");
|
|
777
|
+
writeProgress(`[${entry.timestamp}] ${event}`);
|
|
778
|
+
}
|
|
779
|
+
};
|
|
780
|
+
}
|
|
781
|
+
function summarizeSalesNavigatorQuery(url, appliedFilters) {
|
|
782
|
+
return {
|
|
783
|
+
url,
|
|
784
|
+
decodedQuery: decodeSalesNavigatorQueryParam(url),
|
|
785
|
+
appliedFilters
|
|
786
|
+
};
|
|
787
|
+
}
|
|
788
|
+
function shouldPreSplitSalesNavigatorRootSlice(slice, maxSplitDepth) {
|
|
789
|
+
void slice;
|
|
790
|
+
void maxSplitDepth;
|
|
791
|
+
return false;
|
|
792
|
+
}
|
|
793
|
+
function buildTraceHeaders(traceId) {
|
|
794
|
+
return traceId ? { "X-Salesprompter-Trace-Id": traceId } : {};
|
|
795
|
+
}
|
|
796
|
+
function buildSalesNavigatorWorkflowCrawlSummary(crawl) {
|
|
797
|
+
const successful = crawl.job.status === "completed" && !crawl.truncated;
|
|
798
|
+
return {
|
|
799
|
+
jobStatus: crawl.job.status,
|
|
800
|
+
importedPeople: crawl.job.importedPeople,
|
|
801
|
+
exportedSlices: crawl.job.exportedSlices,
|
|
802
|
+
failedSlices: crawl.job.failedSlices,
|
|
803
|
+
queuedSlices: crawl.job.queuedSlices,
|
|
804
|
+
runningSlices: crawl.job.runningSlices,
|
|
805
|
+
truncated: crawl.truncated,
|
|
806
|
+
successful
|
|
807
|
+
};
|
|
808
|
+
}
|
|
809
|
+
function buildSalesNavigatorWorkflowSummary(crawls) {
|
|
810
|
+
return crawls.reduce((summary, crawl) => {
|
|
811
|
+
summary.totalImportedPeople += crawl.summary.importedPeople;
|
|
812
|
+
summary.totalExportedSlices += crawl.summary.exportedSlices;
|
|
813
|
+
summary.totalFailedSlices += crawl.summary.failedSlices;
|
|
814
|
+
if (crawl.summary.truncated) {
|
|
815
|
+
summary.truncatedTitles += 1;
|
|
816
|
+
}
|
|
817
|
+
if (crawl.summary.jobStatus === "completed") {
|
|
818
|
+
summary.completedTitles += 1;
|
|
819
|
+
}
|
|
820
|
+
else if (crawl.summary.jobStatus === "completed_with_failures") {
|
|
821
|
+
summary.completedWithFailuresTitles += 1;
|
|
822
|
+
}
|
|
823
|
+
else {
|
|
824
|
+
summary.runningTitles += 1;
|
|
825
|
+
}
|
|
826
|
+
if (!crawl.summary.successful) {
|
|
827
|
+
summary.workflowStatus = "completed_with_failures";
|
|
828
|
+
}
|
|
829
|
+
return summary;
|
|
830
|
+
}, {
|
|
831
|
+
workflowStatus: "completed",
|
|
832
|
+
totalImportedPeople: 0,
|
|
833
|
+
totalExportedSlices: 0,
|
|
834
|
+
totalFailedSlices: 0,
|
|
835
|
+
completedTitles: 0,
|
|
836
|
+
completedWithFailuresTitles: 0,
|
|
837
|
+
runningTitles: 0,
|
|
838
|
+
truncatedTitles: 0
|
|
839
|
+
});
|
|
840
|
+
}
|
|
841
|
+
function buildSalesNavigatorWorkflowFailureMessage(summary) {
|
|
842
|
+
return [
|
|
843
|
+
"Sales Navigator workflow completed with failures.",
|
|
844
|
+
`completedTitles=${summary.completedTitles}`,
|
|
845
|
+
`completedWithFailuresTitles=${summary.completedWithFailuresTitles}`,
|
|
846
|
+
`runningTitles=${summary.runningTitles}`,
|
|
847
|
+
`truncatedTitles=${summary.truncatedTitles}`,
|
|
848
|
+
`totalFailedSlices=${summary.totalFailedSlices}`
|
|
849
|
+
].join(" ");
|
|
850
|
+
}
|
|
851
|
+
function validateSalesNavigatorSeedQuery(seed) {
|
|
852
|
+
const decodedQuery = decodeSalesNavigatorQueryParam(seed.queryUrl);
|
|
853
|
+
const haystack = decodedQuery?.toLowerCase() ?? "";
|
|
854
|
+
const missingFilters = seed.appliedFilters.flatMap((filter) => {
|
|
855
|
+
const missingValues = filter.values.filter((value) => !haystack.includes(value.text.toLowerCase()));
|
|
856
|
+
if (!haystack.includes(filter.type.toLowerCase()) || missingValues.length > 0) {
|
|
857
|
+
return `${filter.type}:${missingValues.map((value) => value.text).join(",") || "*"}`;
|
|
858
|
+
}
|
|
859
|
+
return [];
|
|
860
|
+
});
|
|
861
|
+
return {
|
|
862
|
+
valid: missingFilters.length === 0,
|
|
863
|
+
missingFilters,
|
|
864
|
+
decodedQuery
|
|
865
|
+
};
|
|
866
|
+
}
|
|
867
|
+
async function runSalesNavigatorFromProductCategoryWorkflow(options) {
|
|
868
|
+
const logger = await createWorkflowLogger({
|
|
869
|
+
logPath: options.logPath ?? buildSalesNavigatorWorkflowLogPath(options.input)
|
|
870
|
+
});
|
|
871
|
+
await logger.log("workflow.started", {
|
|
872
|
+
input: options.input,
|
|
873
|
+
maxPages: options.maxPages,
|
|
874
|
+
productLimit: options.productLimit ?? null,
|
|
875
|
+
titleLimit: options.titleLimit ?? null,
|
|
876
|
+
maxResultsPerSearch: options.maxResultsPerSearch,
|
|
877
|
+
numberOfProfiles: options.numberOfProfiles,
|
|
878
|
+
slicePreset: options.slicePreset,
|
|
879
|
+
maxSplitDepth: options.maxSplitDepth,
|
|
880
|
+
maxSlicesPerTitle: options.maxSlicesPerTitle,
|
|
881
|
+
maxRetries: options.maxRetries,
|
|
882
|
+
probeProfiles: options.probeProfiles,
|
|
883
|
+
agentBusyWaitSeconds: options.agentBusyWaitSeconds,
|
|
884
|
+
agentBusyMaxWaits: options.agentBusyMaxWaits,
|
|
885
|
+
idlePollSeconds: options.idlePollSeconds,
|
|
886
|
+
idleMaxPolls: options.idleMaxPolls,
|
|
887
|
+
parallelExports: options.parallelExports,
|
|
888
|
+
skipProductUpload: options.skipProductUpload,
|
|
889
|
+
dryRun: options.dryRun
|
|
890
|
+
});
|
|
891
|
+
try {
|
|
892
|
+
const scrape = await crawlLinkedInProductCategory({
|
|
893
|
+
input: options.input,
|
|
894
|
+
maxPages: options.maxPages,
|
|
895
|
+
limit: options.productLimit
|
|
896
|
+
});
|
|
897
|
+
await logger.log("linkedin.category.scraped", {
|
|
898
|
+
source: scrape.source,
|
|
899
|
+
totalPagesFetched: scrape.totalPagesFetched,
|
|
900
|
+
discoveredProducts: scrape.items.length,
|
|
901
|
+
productNames: scrape.items.map((item) => item.productName)
|
|
902
|
+
});
|
|
903
|
+
const titleSeeds = deriveSalesNavigatorTitleQuerySeeds({
|
|
904
|
+
sourceProductUrl: scrape.source.productUrl,
|
|
905
|
+
items: scrape.items,
|
|
906
|
+
titleLimit: options.titleLimit
|
|
907
|
+
});
|
|
908
|
+
if (titleSeeds.length === 0) {
|
|
909
|
+
throw new Error(`No intended-role job titles were found while crawling the LinkedIn product category ${scrape.source.category.name}.`);
|
|
910
|
+
}
|
|
911
|
+
const outPath = options.outPath ?? buildLinkedInProductCategorySalesNavigatorOutputPath(scrape.source.category.slug);
|
|
912
|
+
const previewQueries = titleSeeds.map((seed) => {
|
|
913
|
+
const preview = buildSalesNavigatorCrawlPreview({
|
|
914
|
+
sourceQueryUrl: seed.queryUrl,
|
|
915
|
+
maxResultsPerSearch: options.maxResultsPerSearch,
|
|
916
|
+
numberOfProfiles: options.numberOfProfiles,
|
|
917
|
+
slicePreset: options.slicePreset
|
|
918
|
+
});
|
|
919
|
+
return {
|
|
920
|
+
title: seed.title,
|
|
921
|
+
queryUrl: seed.queryUrl,
|
|
922
|
+
appliedFilters: seed.appliedFilters,
|
|
923
|
+
sourceProduct: seed.sourceProduct,
|
|
924
|
+
matchedProductCount: seed.matchedProductCount,
|
|
925
|
+
firstSplitQueries: preview.firstSplit.map((attempt) => ({
|
|
926
|
+
slicedQueryUrl: attempt.slicedQueryUrl,
|
|
927
|
+
appliedFilters: attempt.appliedFilters,
|
|
928
|
+
splitTrail: formatSalesNavigatorSplitTrail(attempt.splitTrail.map((entry) => ({
|
|
929
|
+
...entry,
|
|
930
|
+
value: {
|
|
931
|
+
id: entry.value.id,
|
|
932
|
+
text: entry.value.text,
|
|
933
|
+
selectionType: entry.value.selectionType
|
|
934
|
+
}
|
|
935
|
+
})))
|
|
936
|
+
}))
|
|
937
|
+
};
|
|
938
|
+
});
|
|
939
|
+
await logger.log("salesnav.title-seeds.derived", {
|
|
940
|
+
titleCount: titleSeeds.length,
|
|
941
|
+
titles: titleSeeds.map((seed) => ({
|
|
942
|
+
title: seed.title,
|
|
943
|
+
sourceProduct: seed.sourceProduct,
|
|
944
|
+
matchedProductCount: seed.matchedProductCount,
|
|
945
|
+
...summarizeSalesNavigatorQuery(seed.queryUrl, seed.appliedFilters)
|
|
946
|
+
}))
|
|
947
|
+
});
|
|
948
|
+
const firstSeedValidation = validateSalesNavigatorSeedQuery(titleSeeds[0]);
|
|
949
|
+
await logger.log("salesnav.first-query.validated", {
|
|
950
|
+
title: titleSeeds[0]?.title ?? null,
|
|
951
|
+
valid: firstSeedValidation.valid,
|
|
952
|
+
missingFilters: firstSeedValidation.missingFilters,
|
|
953
|
+
decodedQuery: firstSeedValidation.decodedQuery
|
|
954
|
+
});
|
|
955
|
+
if (!firstSeedValidation.valid) {
|
|
956
|
+
throw new Error(`Generated Sales Navigator seed query for "${titleSeeds[0]?.title ?? "unknown"}" is missing expected filters: ${firstSeedValidation.missingFilters.join(", ")}.`);
|
|
957
|
+
}
|
|
958
|
+
await logger.log("salesnav.first-split.preview", {
|
|
959
|
+
titles: previewQueries.map((query) => ({
|
|
960
|
+
title: query.title,
|
|
961
|
+
sourceProduct: query.sourceProduct,
|
|
962
|
+
matchedProductCount: query.matchedProductCount,
|
|
963
|
+
...summarizeSalesNavigatorQuery(query.queryUrl, query.appliedFilters),
|
|
964
|
+
firstSplitQueries: query.firstSplitQueries.map((split) => ({
|
|
965
|
+
splitTrail: split.splitTrail,
|
|
966
|
+
...summarizeSalesNavigatorQuery(split.slicedQueryUrl, split.appliedFilters)
|
|
967
|
+
}))
|
|
968
|
+
}))
|
|
969
|
+
});
|
|
970
|
+
if (options.dryRun) {
|
|
971
|
+
const payload = {
|
|
972
|
+
status: "ok",
|
|
973
|
+
dryRun: true,
|
|
974
|
+
mode: "linkedin-product-category-to-salesnav",
|
|
975
|
+
traceId: logger.traceId,
|
|
976
|
+
logPath: logger.logPath,
|
|
977
|
+
source: scrape.source,
|
|
978
|
+
totalPagesFetched: scrape.totalPagesFetched,
|
|
979
|
+
discoveredProducts: scrape.items.length,
|
|
980
|
+
titleCount: titleSeeds.length,
|
|
981
|
+
summary: {
|
|
982
|
+
workflowStatus: "completed",
|
|
983
|
+
totalImportedPeople: 0,
|
|
984
|
+
totalExportedSlices: 0,
|
|
985
|
+
totalFailedSlices: 0,
|
|
986
|
+
completedTitles: 0,
|
|
987
|
+
completedWithFailuresTitles: 0,
|
|
988
|
+
runningTitles: 0,
|
|
989
|
+
truncatedTitles: 0
|
|
990
|
+
},
|
|
991
|
+
uploaded: null,
|
|
992
|
+
queries: previewQueries
|
|
993
|
+
};
|
|
994
|
+
await writeJsonFile(outPath, payload);
|
|
995
|
+
await logger.log("workflow.completed", {
|
|
996
|
+
outPath,
|
|
997
|
+
dryRun: true,
|
|
998
|
+
discoveredProducts: payload.discoveredProducts,
|
|
999
|
+
titleCount: payload.titleCount
|
|
1000
|
+
});
|
|
1001
|
+
return { outPath, payload };
|
|
1002
|
+
}
|
|
1003
|
+
let session = await requireAuthSession();
|
|
1004
|
+
let uploaded = null;
|
|
1005
|
+
if (!options.skipProductUpload) {
|
|
1006
|
+
await logger.log("linkedin.catalog.upload.started", {
|
|
1007
|
+
itemCount: scrape.items.length
|
|
1008
|
+
});
|
|
1009
|
+
uploaded = await uploadLinkedInProductsCatalog(session, {
|
|
1010
|
+
source: {
|
|
1011
|
+
input: scrape.source.input,
|
|
1012
|
+
kind: scrape.source.kind,
|
|
1013
|
+
query: scrape.source.query,
|
|
1014
|
+
companyUrl: scrape.source.companyUrl,
|
|
1015
|
+
productUrl: scrape.source.productUrl,
|
|
1016
|
+
category: scrape.source.category
|
|
1017
|
+
},
|
|
1018
|
+
items: scrape.items
|
|
1019
|
+
}, 100, logger.traceId);
|
|
1020
|
+
await logger.log("linkedin.catalog.upload.completed", uploaded);
|
|
1021
|
+
}
|
|
1022
|
+
const crawls = [];
|
|
1023
|
+
for (const seed of titleSeeds) {
|
|
1024
|
+
writeProgress(`Starting durable Sales Navigator crawl for intended role "${seed.title}".`);
|
|
1025
|
+
const rootSlice = createSalesNavigatorCrawlSeed({
|
|
1026
|
+
sourceQueryUrl: seed.queryUrl,
|
|
1027
|
+
maxResultsPerSearch: options.maxResultsPerSearch,
|
|
1028
|
+
numberOfProfiles: options.numberOfProfiles,
|
|
1029
|
+
slicePreset: options.slicePreset
|
|
1030
|
+
});
|
|
1031
|
+
const created = await createOrResumeSalesNavigatorCrawlJob(session, {
|
|
1032
|
+
sourceQueryUrl: seed.queryUrl,
|
|
1033
|
+
slicePreset: options.slicePreset,
|
|
1034
|
+
maxResultsPerSearch: options.maxResultsPerSearch,
|
|
1035
|
+
numberOfProfiles: options.numberOfProfiles,
|
|
1036
|
+
rawPayload: {
|
|
1037
|
+
workflow: "linkedin-product-category-to-salesnav",
|
|
1038
|
+
traceId: logger.traceId,
|
|
1039
|
+
source: scrape.source,
|
|
1040
|
+
titleSeed: {
|
|
1041
|
+
title: seed.title,
|
|
1042
|
+
queryUrl: seed.queryUrl,
|
|
1043
|
+
appliedFilters: seed.appliedFilters,
|
|
1044
|
+
sourceProduct: seed.sourceProduct,
|
|
1045
|
+
matchedProductCount: seed.matchedProductCount
|
|
1046
|
+
}
|
|
1047
|
+
},
|
|
1048
|
+
rootSlice: {
|
|
1049
|
+
slicedQueryUrl: rootSlice.slicedQueryUrl,
|
|
1050
|
+
appliedFilters: rootSlice.appliedFilters,
|
|
1051
|
+
depth: rootSlice.depth,
|
|
1052
|
+
splitTrail: rootSlice.splitTrail,
|
|
1053
|
+
rawPayload: {
|
|
1054
|
+
traceId: logger.traceId,
|
|
1055
|
+
title: seed.title,
|
|
1056
|
+
sourceProduct: seed.sourceProduct,
|
|
1057
|
+
matchedProductCount: seed.matchedProductCount,
|
|
1058
|
+
source: scrape.source
|
|
1059
|
+
}
|
|
1060
|
+
}
|
|
1061
|
+
}, logger.traceId);
|
|
1062
|
+
session = created.session;
|
|
1063
|
+
await logger.log("salesnav.crawl.job.ready", {
|
|
1064
|
+
title: seed.title,
|
|
1065
|
+
sourceProduct: seed.sourceProduct,
|
|
1066
|
+
matchedProductCount: seed.matchedProductCount,
|
|
1067
|
+
resumed: created.value.resumed,
|
|
1068
|
+
jobId: created.value.job.id,
|
|
1069
|
+
rootSlice: {
|
|
1070
|
+
depth: rootSlice.depth,
|
|
1071
|
+
splitTrail: formatSalesNavigatorSplitTrail(rootSlice.splitTrail),
|
|
1072
|
+
...summarizeSalesNavigatorQuery(rootSlice.slicedQueryUrl, rootSlice.appliedFilters)
|
|
1073
|
+
}
|
|
1074
|
+
});
|
|
1075
|
+
const crawl = await executeSalesNavigatorCrawlJob(session, created.value.job.id, {
|
|
1076
|
+
maxSplitDepth: options.maxSplitDepth,
|
|
1077
|
+
maxSlices: options.maxSlicesPerTitle,
|
|
1078
|
+
maxRetries: options.maxRetries,
|
|
1079
|
+
probeProfiles: options.probeProfiles,
|
|
1080
|
+
agentBusyWaitSeconds: options.agentBusyWaitSeconds,
|
|
1081
|
+
agentBusyMaxWaits: options.agentBusyMaxWaits,
|
|
1082
|
+
idlePollSeconds: options.idlePollSeconds,
|
|
1083
|
+
idleMaxPolls: options.idleMaxPolls,
|
|
1084
|
+
parallelExports: options.parallelExports,
|
|
1085
|
+
traceId: logger.traceId,
|
|
1086
|
+
logger
|
|
1087
|
+
});
|
|
1088
|
+
session = crawl.session;
|
|
1089
|
+
const crawlSummary = buildSalesNavigatorWorkflowCrawlSummary(crawl);
|
|
1090
|
+
await logger.log("salesnav.crawl.job.finished", {
|
|
1091
|
+
title: seed.title,
|
|
1092
|
+
jobId: created.value.job.id,
|
|
1093
|
+
summary: crawlSummary,
|
|
1094
|
+
lastOutcome: crawl.lastOutcome
|
|
1095
|
+
});
|
|
1096
|
+
crawls.push({
|
|
1097
|
+
title: seed.title,
|
|
1098
|
+
sourceProduct: seed.sourceProduct,
|
|
1099
|
+
matchedProductCount: seed.matchedProductCount,
|
|
1100
|
+
queryUrl: seed.queryUrl,
|
|
1101
|
+
jobId: created.value.job.id,
|
|
1102
|
+
resumed: created.value.resumed,
|
|
1103
|
+
claimedSlices: crawl.claimedSlices,
|
|
1104
|
+
truncated: crawl.truncated,
|
|
1105
|
+
activeSlice: crawl.activeSlice
|
|
1106
|
+
? {
|
|
1107
|
+
id: crawl.activeSlice.id,
|
|
1108
|
+
slicedQueryUrl: crawl.activeSlice.slicedQueryUrl,
|
|
1109
|
+
depth: crawl.activeSlice.depth,
|
|
1110
|
+
splitTrail: formatSalesNavigatorSplitTrail(crawl.activeSlice.splitTrail)
|
|
1111
|
+
}
|
|
1112
|
+
: null,
|
|
1113
|
+
lastOutcome: crawl.lastOutcome,
|
|
1114
|
+
job: crawl.job,
|
|
1115
|
+
summary: crawlSummary
|
|
1116
|
+
});
|
|
1117
|
+
}
|
|
1118
|
+
const summary = buildSalesNavigatorWorkflowSummary(crawls);
|
|
1119
|
+
const payload = {
|
|
1120
|
+
status: "ok",
|
|
1121
|
+
dryRun: false,
|
|
1122
|
+
mode: "linkedin-product-category-to-salesnav",
|
|
1123
|
+
traceId: logger.traceId,
|
|
1124
|
+
logPath: logger.logPath,
|
|
1125
|
+
source: scrape.source,
|
|
1126
|
+
totalPagesFetched: scrape.totalPagesFetched,
|
|
1127
|
+
discoveredProducts: scrape.items.length,
|
|
1128
|
+
titleCount: titleSeeds.length,
|
|
1129
|
+
summary,
|
|
1130
|
+
uploaded,
|
|
1131
|
+
crawls
|
|
1132
|
+
};
|
|
1133
|
+
await writeJsonFile(outPath, payload);
|
|
1134
|
+
await logger.log("workflow.completed", {
|
|
1135
|
+
outPath,
|
|
1136
|
+
dryRun: false,
|
|
1137
|
+
uploaded,
|
|
1138
|
+
crawlCount: crawls.length,
|
|
1139
|
+
summary
|
|
1140
|
+
});
|
|
1141
|
+
return { outPath, payload };
|
|
1142
|
+
}
|
|
1143
|
+
catch (error) {
|
|
1144
|
+
await logger.log("workflow.failed", {
|
|
1145
|
+
message: error instanceof Error ? error.message : String(error),
|
|
1146
|
+
stack: error instanceof Error ? error.stack ?? null : null
|
|
1147
|
+
});
|
|
1148
|
+
throw error;
|
|
1149
|
+
}
|
|
1150
|
+
}
|
|
690
1151
|
function collectStringOptionValue(value, previous = []) {
|
|
691
1152
|
return [...previous, value];
|
|
692
1153
|
}
|
|
@@ -696,6 +1157,7 @@ class SalesNavigatorExportRequestError extends Error {
|
|
|
696
1157
|
runId;
|
|
697
1158
|
agentId;
|
|
698
1159
|
containerId;
|
|
1160
|
+
launchDiagnostics;
|
|
699
1161
|
statusCode;
|
|
700
1162
|
constructor(message, options) {
|
|
701
1163
|
super(message);
|
|
@@ -706,8 +1168,10 @@ class SalesNavigatorExportRequestError extends Error {
|
|
|
706
1168
|
this.runId = options.runId;
|
|
707
1169
|
this.agentId = options.agentId;
|
|
708
1170
|
this.containerId = options.containerId;
|
|
1171
|
+
this.launchDiagnostics = options.launchDiagnostics ?? null;
|
|
709
1172
|
}
|
|
710
1173
|
}
|
|
1174
|
+
const SALES_NAVIGATOR_EXPORT_START_TIMEOUT_MS = 90_000;
|
|
711
1175
|
async function withRefreshableAuthSession(session, run, contextLabel = "Salesprompter session expired during crawl. Refreshing login...") {
|
|
712
1176
|
let currentSession = session;
|
|
713
1177
|
let authRefreshCount = 0;
|
|
@@ -748,7 +1212,7 @@ async function fetchCliJson(session, request, schema) {
|
|
|
748
1212
|
return schema.parse(parsed);
|
|
749
1213
|
});
|
|
750
1214
|
}
|
|
751
|
-
async function uploadLinkedInProductsCatalog(session, payload, batchSize = 100) {
|
|
1215
|
+
async function uploadLinkedInProductsCatalog(session, payload, batchSize = 100, traceId) {
|
|
752
1216
|
let imported = 0;
|
|
753
1217
|
let upserted = 0;
|
|
754
1218
|
for (let startIndex = 0; startIndex < payload.items.length; startIndex += batchSize) {
|
|
@@ -757,7 +1221,8 @@ async function uploadLinkedInProductsCatalog(session, payload, batchSize = 100)
|
|
|
757
1221
|
method: "POST",
|
|
758
1222
|
headers: {
|
|
759
1223
|
"Content-Type": "application/json",
|
|
760
|
-
Authorization: `Bearer ${session.accessToken}
|
|
1224
|
+
Authorization: `Bearer ${session.accessToken}`,
|
|
1225
|
+
...buildTraceHeaders(traceId)
|
|
761
1226
|
},
|
|
762
1227
|
body: JSON.stringify({
|
|
763
1228
|
source: payload.source,
|
|
@@ -788,17 +1253,149 @@ function serializeSalesNavigatorFiltersForApi(filters) {
|
|
|
788
1253
|
}))
|
|
789
1254
|
}));
|
|
790
1255
|
}
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
1256
|
+
function buildSalesNavigatorSliceRawPayload(slice, extra = {}) {
|
|
1257
|
+
return {
|
|
1258
|
+
...extra,
|
|
1259
|
+
sourceQueryUrl: slice.sourceQueryUrl,
|
|
1260
|
+
slicedQueryUrl: slice.slicedQueryUrl,
|
|
1261
|
+
appliedFilters: slice.appliedFilters,
|
|
1262
|
+
depth: slice.depth,
|
|
1263
|
+
splitTrail: slice.splitTrail,
|
|
1264
|
+
slicePreset: slice.slicePreset,
|
|
1265
|
+
maxResultsPerSearch: slice.maxResultsPerSearch,
|
|
1266
|
+
numberOfProfiles: slice.numberOfProfiles,
|
|
1267
|
+
retryCount: slice.retryCount ?? null,
|
|
1268
|
+
cookieRetryCount: slice.cookieRetryCount ?? null,
|
|
1269
|
+
resultRetryCount: slice.resultRetryCount ?? null
|
|
1270
|
+
};
|
|
1271
|
+
}
|
|
1272
|
+
function buildSalesNavigatorCrawlReportRawPayload(slice, traceId, extra = {}) {
|
|
1273
|
+
return buildSalesNavigatorSliceRawPayload({
|
|
1274
|
+
sourceQueryUrl: slice.sourceQueryUrl,
|
|
1275
|
+
slicedQueryUrl: slice.slicedQueryUrl,
|
|
1276
|
+
appliedFilters: slice.appliedFilters,
|
|
1277
|
+
depth: slice.depth,
|
|
1278
|
+
splitTrail: slice.splitTrail,
|
|
1279
|
+
slicePreset: slice.slicePreset,
|
|
1280
|
+
maxResultsPerSearch: slice.maxResultsPerSearch,
|
|
1281
|
+
numberOfProfiles: slice.numberOfProfiles,
|
|
1282
|
+
retryCount: slice.retryCount,
|
|
1283
|
+
cookieRetryCount: slice.cookieRetryCount,
|
|
1284
|
+
resultRetryCount: slice.resultRetryCount
|
|
1285
|
+
}, {
|
|
1286
|
+
traceId: traceId ?? null,
|
|
1287
|
+
sliceId: slice.id,
|
|
1288
|
+
jobId: slice.jobId,
|
|
1289
|
+
...extra
|
|
1290
|
+
});
|
|
1291
|
+
}
|
|
1292
|
+
function describeSalesNavigatorLaunchDiagnostics(diagnostics) {
|
|
1293
|
+
if (!diagnostics) {
|
|
1294
|
+
return null;
|
|
1295
|
+
}
|
|
1296
|
+
const parts = [
|
|
1297
|
+
`agent ${diagnostics.selectedAgent.name} (${diagnostics.selectedAgent.id})`,
|
|
1298
|
+
diagnostics.runningAgentIds.length > 0
|
|
1299
|
+
? `running: ${diagnostics.runningAgentIds.join(", ")}`
|
|
1300
|
+
: null,
|
|
1301
|
+
diagnostics.busyAgentIds.length > 0
|
|
1302
|
+
? `busy fallback: ${diagnostics.busyAgentIds.join(", ")}`
|
|
1303
|
+
: null,
|
|
1304
|
+
diagnostics.selectedAgent.maxParallelism !== null
|
|
1305
|
+
? `parallelism ${diagnostics.selectedAgent.maxParallelism}`
|
|
1306
|
+
: null,
|
|
1307
|
+
diagnostics.selectedAgent.fileMgmt
|
|
1308
|
+
? `file mgmt ${diagnostics.selectedAgent.fileMgmt}`
|
|
1309
|
+
: null,
|
|
1310
|
+
diagnostics.selectedAgent.hasWebhook ? "webhook on" : "webhook off",
|
|
1311
|
+
diagnostics.selectedAgent.hasStoredSessionCookie
|
|
1312
|
+
? "stored phantom cookie present"
|
|
1313
|
+
: "stored phantom cookie cleared at launch",
|
|
1314
|
+
].filter((value) => Boolean(value));
|
|
1315
|
+
return parts.join("; ");
|
|
1316
|
+
}
|
|
1317
|
+
function writeSalesNavigatorLaunchDiagnosticsProgress(diagnostics, selectedSessionUserEmail) {
|
|
1318
|
+
if (runtimeOutputOptions.json || runtimeOutputOptions.quiet || !diagnostics) {
|
|
1319
|
+
return;
|
|
1320
|
+
}
|
|
1321
|
+
const details = describeSalesNavigatorLaunchDiagnostics(diagnostics);
|
|
1322
|
+
const operator = selectedSessionUserEmail ? ` using ${selectedSessionUserEmail}` : "";
|
|
1323
|
+
process.stderr.write(`Phantombuster launch selected ${diagnostics.selectedAgent.id}${operator}.${details ? ` ${details}` : ""}\n`);
|
|
795
1324
|
}
|
|
796
|
-
async function
|
|
1325
|
+
async function runSalesNavigatorExport(session, payload, traceId, logOptions = {}) {
|
|
1326
|
+
const baseMetadata = {
|
|
1327
|
+
sourceQueryUrl: payload.sourceQueryUrl,
|
|
1328
|
+
slicedQueryUrl: payload.slicedQueryUrl,
|
|
1329
|
+
slicePreset: payload.slicePreset,
|
|
1330
|
+
maxResultsPerSearch: payload.maxResultsPerSearch,
|
|
1331
|
+
numberOfProfiles: payload.numberOfProfiles,
|
|
1332
|
+
filterTypes: payload.appliedFilters.map((filter) => filter.type),
|
|
1333
|
+
...logOptions.metadata
|
|
1334
|
+
};
|
|
1335
|
+
await logOptions.logger?.log("salesnav.export.started", baseMetadata);
|
|
1336
|
+
try {
|
|
1337
|
+
const started = await startSalesNavigatorExport(session, payload, traceId);
|
|
1338
|
+
await logOptions.logger?.log("salesnav.export.accepted", {
|
|
1339
|
+
...baseMetadata,
|
|
1340
|
+
runId: started.value.runId,
|
|
1341
|
+
agentId: started.value.agentId,
|
|
1342
|
+
containerId: started.value.containerId,
|
|
1343
|
+
previousContainerId: started.value.previousContainerId ?? null,
|
|
1344
|
+
selectedSessionCookieSha256: started.value.selectedSessionCookieSha256 ?? null,
|
|
1345
|
+
selectedSessionUserEmail: started.value.selectedSessionUserEmail ?? null,
|
|
1346
|
+
selectedSessionUserHandle: started.value.selectedSessionUserHandle ?? null,
|
|
1347
|
+
launchDiagnostics: started.value.launchDiagnostics ?? null
|
|
1348
|
+
});
|
|
1349
|
+
writeSalesNavigatorLaunchDiagnosticsProgress(started.value.launchDiagnostics ?? null, started.value.selectedSessionUserEmail ?? null);
|
|
1350
|
+
const completed = await waitForSalesNavigatorExportRunCompletion(started.session, started.value.runId, {}, traceId, {
|
|
1351
|
+
logger: logOptions.logger,
|
|
1352
|
+
metadata: baseMetadata
|
|
1353
|
+
});
|
|
1354
|
+
await logOptions.logger?.log("salesnav.export.completed", {
|
|
1355
|
+
...baseMetadata,
|
|
1356
|
+
runId: completed.value.run.id,
|
|
1357
|
+
status: completed.value.run.status,
|
|
1358
|
+
resultClassification: completed.value.run.resultClassification,
|
|
1359
|
+
totalResults: completed.value.run.totalResults ?? null,
|
|
1360
|
+
imported: completed.value.run.imported,
|
|
1361
|
+
upserted: completed.value.run.upserted,
|
|
1362
|
+
updatedAt: completed.value.run.updatedAt,
|
|
1363
|
+
finishedAt: completed.value.run.finishedAt ?? null
|
|
1364
|
+
});
|
|
1365
|
+
const mapped = mapCompletedSalesNavigatorExportRun(completed.value.run);
|
|
1366
|
+
return SalesNavigatorExportResponseSchema.parse({
|
|
1367
|
+
...mapped,
|
|
1368
|
+
launchDiagnostics: mapped.launchDiagnostics ?? started.value.launchDiagnostics ?? null,
|
|
1369
|
+
});
|
|
1370
|
+
}
|
|
1371
|
+
catch (error) {
|
|
1372
|
+
await logOptions.logger?.log("salesnav.export.failed", {
|
|
1373
|
+
...baseMetadata,
|
|
1374
|
+
name: error instanceof Error ? error.name : "Error",
|
|
1375
|
+
message: error instanceof Error ? error.message : String(error),
|
|
1376
|
+
...(error instanceof SalesNavigatorExportRequestError
|
|
1377
|
+
? {
|
|
1378
|
+
runId: error.runId ?? null,
|
|
1379
|
+
agentId: error.agentId ?? null,
|
|
1380
|
+
containerId: error.containerId ?? null,
|
|
1381
|
+
errorCode: error.errorCode ?? null,
|
|
1382
|
+
totalResults: error.totalResults ?? null,
|
|
1383
|
+
launchDiagnostics: error.launchDiagnostics ?? null,
|
|
1384
|
+
statusCode: error.statusCode
|
|
1385
|
+
}
|
|
1386
|
+
: {})
|
|
1387
|
+
});
|
|
1388
|
+
throw error;
|
|
1389
|
+
}
|
|
1390
|
+
}
|
|
1391
|
+
async function startSalesNavigatorExport(session, payload, traceId) {
|
|
797
1392
|
return await fetchCliJson(session, (currentSession) => fetch(`${currentSession.apiBaseUrl}/api/cli/salesnav/export`, {
|
|
798
1393
|
method: "POST",
|
|
1394
|
+
signal: AbortSignal.timeout(SALES_NAVIGATOR_EXPORT_START_TIMEOUT_MS),
|
|
799
1395
|
headers: {
|
|
800
1396
|
"Content-Type": "application/json",
|
|
801
|
-
Authorization: `Bearer ${currentSession.accessToken}
|
|
1397
|
+
Authorization: `Bearer ${currentSession.accessToken}`,
|
|
1398
|
+
...buildTraceHeaders(traceId)
|
|
802
1399
|
},
|
|
803
1400
|
body: JSON.stringify({
|
|
804
1401
|
...payload,
|
|
@@ -806,11 +1403,12 @@ async function startSalesNavigatorExport(session, payload) {
|
|
|
806
1403
|
})
|
|
807
1404
|
}), SalesNavigatorExportStartResponseSchema);
|
|
808
1405
|
}
|
|
809
|
-
async function getSalesNavigatorExportRunStatus(session, runId) {
|
|
1406
|
+
async function getSalesNavigatorExportRunStatus(session, runId, traceId) {
|
|
810
1407
|
return await fetchCliJson(session, (currentSession) => fetch(`${currentSession.apiBaseUrl}/api/cli/salesnav/export-runs/${runId}?refresh=1`, {
|
|
811
1408
|
method: "GET",
|
|
812
1409
|
headers: {
|
|
813
|
-
Authorization: `Bearer ${currentSession.accessToken}
|
|
1410
|
+
Authorization: `Bearer ${currentSession.accessToken}`,
|
|
1411
|
+
...buildTraceHeaders(traceId)
|
|
814
1412
|
}
|
|
815
1413
|
}), SalesNavigatorExportRunStatusResponseSchema);
|
|
816
1414
|
}
|
|
@@ -831,7 +1429,8 @@ function mapCompletedSalesNavigatorExportRun(run) {
|
|
|
831
1429
|
totalResults: run.totalResults ?? null,
|
|
832
1430
|
runId: run.id,
|
|
833
1431
|
agentId: run.agentId,
|
|
834
|
-
containerId: run.containerId
|
|
1432
|
+
containerId: run.containerId,
|
|
1433
|
+
launchDiagnostics: run.launchDiagnostics ?? null
|
|
835
1434
|
});
|
|
836
1435
|
}
|
|
837
1436
|
return SalesNavigatorExportResponseSchema.parse({
|
|
@@ -844,18 +1443,35 @@ function mapCompletedSalesNavigatorExportRun(run) {
|
|
|
844
1443
|
resultCsvUrl: run.resultCsvUrl ?? null,
|
|
845
1444
|
agentId: run.agentId,
|
|
846
1445
|
containerId: run.containerId,
|
|
1446
|
+
selectedSessionCookieSha256: run.selectedSessionCookieSha256 ?? null,
|
|
1447
|
+
selectedSessionUserEmail: run.selectedSessionUserEmail ?? null,
|
|
1448
|
+
selectedSessionUserHandle: run.selectedSessionUserHandle ?? null,
|
|
1449
|
+
launchDiagnostics: run.launchDiagnostics ?? null,
|
|
847
1450
|
sourceQueryUrl: run.sourceQueryUrl,
|
|
848
1451
|
slicedQueryUrl: run.slicedQueryUrl
|
|
849
1452
|
});
|
|
850
1453
|
}
|
|
851
|
-
async function waitForSalesNavigatorExportRunCompletion(session, runId, options = {}) {
|
|
1454
|
+
async function waitForSalesNavigatorExportRunCompletion(session, runId, options = {}, traceId, logOptions = {}) {
|
|
852
1455
|
const timeoutSeconds = options.timeoutSeconds ?? 960;
|
|
853
1456
|
const pollIntervalMs = options.pollIntervalMs ?? 5000;
|
|
854
1457
|
const deadline = Date.now() + timeoutSeconds * 1000;
|
|
855
1458
|
let currentSession = session;
|
|
1459
|
+
let pollCount = 0;
|
|
856
1460
|
while (Date.now() < deadline) {
|
|
857
|
-
const status = await getSalesNavigatorExportRunStatus(currentSession, runId);
|
|
1461
|
+
const status = await getSalesNavigatorExportRunStatus(currentSession, runId, traceId);
|
|
858
1462
|
currentSession = status.session;
|
|
1463
|
+
pollCount += 1;
|
|
1464
|
+
await logOptions.logger?.log("salesnav.export.polled", {
|
|
1465
|
+
runId,
|
|
1466
|
+
pollCount,
|
|
1467
|
+
status: status.value.run.status,
|
|
1468
|
+
resultClassification: status.value.run.resultClassification,
|
|
1469
|
+
totalResults: status.value.run.totalResults ?? null,
|
|
1470
|
+
imported: status.value.run.imported,
|
|
1471
|
+
upserted: status.value.run.upserted,
|
|
1472
|
+
updatedAt: status.value.run.updatedAt,
|
|
1473
|
+
...logOptions.metadata
|
|
1474
|
+
});
|
|
859
1475
|
if (status.value.run.status !== "pending") {
|
|
860
1476
|
return status;
|
|
861
1477
|
}
|
|
@@ -871,12 +1487,16 @@ function isSalesNavigatorAgentBusyError(error) {
|
|
|
871
1487
|
return /parallel executions limit/i.test(message);
|
|
872
1488
|
}
|
|
873
1489
|
function isSalesNavigatorSessionError(error) {
|
|
874
|
-
if (error instanceof SalesNavigatorExportRequestError
|
|
875
|
-
|
|
876
|
-
|
|
1490
|
+
if (error instanceof SalesNavigatorExportRequestError) {
|
|
1491
|
+
if (error.errorCode === "invalid_session") {
|
|
1492
|
+
return true;
|
|
1493
|
+
}
|
|
1494
|
+
if (["phantombuster_cant_connect_profile", "salesnav_upsell_detected", "linkedin_session_invalid"].includes(error.errorCode ?? "")) {
|
|
1495
|
+
return true;
|
|
1496
|
+
}
|
|
877
1497
|
}
|
|
878
1498
|
const message = error instanceof Error ? error.message : String(error);
|
|
879
|
-
return /can't connect profile|sales navigator account|upsell|linkedin session invalid/i.test(message);
|
|
1499
|
+
return /can't connect profile|sales navigator account|upsell|linkedin session invalid|linkedin_rate_limited|too many requests|rate.?limit|invalid session cookie/i.test(message);
|
|
880
1500
|
}
|
|
881
1501
|
function isSalesNavigatorResultArtifactError(error) {
|
|
882
1502
|
if (error instanceof SalesNavigatorExportRequestError && error.errorCode === "phantombuster_result_invalid") {
|
|
@@ -899,13 +1519,16 @@ function isRefreshableAuthError(error) {
|
|
|
899
1519
|
const message = error instanceof Error ? error.message : String(error);
|
|
900
1520
|
return /token expired|session expired|not logged in|missing bearer token/i.test(message);
|
|
901
1521
|
}
|
|
902
|
-
async function runSalesNavigatorExportWithAgentWait(session, payload, options) {
|
|
1522
|
+
async function runSalesNavigatorExportWithAgentWait(session, payload, options, traceId) {
|
|
903
1523
|
let busyWaitCount = 0;
|
|
904
1524
|
let currentSession = session;
|
|
905
1525
|
let authRefreshCount = 0;
|
|
906
1526
|
while (true) {
|
|
907
1527
|
try {
|
|
908
|
-
return await runSalesNavigatorExport(currentSession, payload
|
|
1528
|
+
return await runSalesNavigatorExport(currentSession, payload, traceId, {
|
|
1529
|
+
logger: options.logger,
|
|
1530
|
+
metadata: options.logMetadata
|
|
1531
|
+
});
|
|
909
1532
|
}
|
|
910
1533
|
catch (error) {
|
|
911
1534
|
if (isRefreshableAuthError(error)) {
|
|
@@ -916,6 +1539,12 @@ async function runSalesNavigatorExportWithAgentWait(session, payload, options) {
|
|
|
916
1539
|
if (!runtimeOutputOptions.quiet) {
|
|
917
1540
|
process.stderr.write("Salesprompter session expired during crawl. Refreshing login...\n");
|
|
918
1541
|
}
|
|
1542
|
+
await options.logger?.log("salesnav.export.auth.refresh", {
|
|
1543
|
+
authRefreshCount,
|
|
1544
|
+
waitSeconds: options.waitSeconds,
|
|
1545
|
+
maxWaits: options.maxWaits,
|
|
1546
|
+
...options.logMetadata
|
|
1547
|
+
});
|
|
919
1548
|
await ensureInteractiveAuthSession(currentSession.apiBaseUrl);
|
|
920
1549
|
currentSession = await requireAuthSession();
|
|
921
1550
|
continue;
|
|
@@ -928,6 +1557,12 @@ async function runSalesNavigatorExportWithAgentWait(session, payload, options) {
|
|
|
928
1557
|
if (!runtimeOutputOptions.json && !runtimeOutputOptions.quiet) {
|
|
929
1558
|
process.stderr.write(`Sales Navigator export agent is busy. Waiting ${options.waitSeconds}s before retrying...\n`);
|
|
930
1559
|
}
|
|
1560
|
+
await options.logger?.log("salesnav.export.agent.busy", {
|
|
1561
|
+
busyWaitCount,
|
|
1562
|
+
waitSeconds: options.waitSeconds,
|
|
1563
|
+
maxWaits: options.maxWaits,
|
|
1564
|
+
...options.logMetadata
|
|
1565
|
+
});
|
|
931
1566
|
await delay(options.waitSeconds * 1000);
|
|
932
1567
|
continue;
|
|
933
1568
|
}
|
|
@@ -940,6 +1575,14 @@ async function runSalesNavigatorCrawlAttempt(session, attempt, options, context)
|
|
|
940
1575
|
options.probeProfiles < attempt.numberOfProfiles &&
|
|
941
1576
|
attempt.depth < options.maxSplitDepth;
|
|
942
1577
|
const probeProfiles = shouldProbe ? Math.max(1, options.probeProfiles) : attempt.numberOfProfiles;
|
|
1578
|
+
const logMetadata = {
|
|
1579
|
+
crawlJobId: context?.crawlJobId ?? null,
|
|
1580
|
+
crawlSliceId: context?.crawlSliceId ?? null,
|
|
1581
|
+
sliceDepth: attempt.depth,
|
|
1582
|
+
splitTrail: formatSalesNavigatorSplitTrail(attempt.splitTrail),
|
|
1583
|
+
sourceQueryUrl: attempt.sourceQueryUrl,
|
|
1584
|
+
slicedQueryUrl: attempt.slicedQueryUrl
|
|
1585
|
+
};
|
|
943
1586
|
const probeResult = await runSalesNavigatorExportWithAgentWait(session, {
|
|
944
1587
|
sourceQueryUrl: attempt.sourceQueryUrl,
|
|
945
1588
|
slicedQueryUrl: attempt.slicedQueryUrl,
|
|
@@ -948,11 +1591,23 @@ async function runSalesNavigatorCrawlAttempt(session, attempt, options, context)
|
|
|
948
1591
|
numberOfProfiles: probeProfiles,
|
|
949
1592
|
slicePreset: attempt.slicePreset,
|
|
950
1593
|
crawlJobId: context?.crawlJobId,
|
|
951
|
-
crawlSliceId: context?.crawlSliceId
|
|
1594
|
+
crawlSliceId: context?.crawlSliceId,
|
|
1595
|
+
rawPayload: buildSalesNavigatorSliceRawPayload(attempt, {
|
|
1596
|
+
traceId: context?.traceId ?? null,
|
|
1597
|
+
phase: shouldProbe ? "probe" : "full_export",
|
|
1598
|
+
requestedProfiles: probeProfiles,
|
|
1599
|
+
crawlJobId: context?.crawlJobId ?? null,
|
|
1600
|
+
crawlSliceId: context?.crawlSliceId ?? null
|
|
1601
|
+
})
|
|
952
1602
|
}, {
|
|
953
1603
|
waitSeconds: options.agentBusyWaitSeconds,
|
|
954
|
-
maxWaits: options.agentBusyMaxWaits
|
|
955
|
-
|
|
1604
|
+
maxWaits: options.agentBusyMaxWaits,
|
|
1605
|
+
logger: options.logger,
|
|
1606
|
+
logMetadata: {
|
|
1607
|
+
...logMetadata,
|
|
1608
|
+
phase: shouldProbe ? "probe" : "full_export"
|
|
1609
|
+
}
|
|
1610
|
+
}, context?.traceId);
|
|
956
1611
|
if (!shouldProbe) {
|
|
957
1612
|
return probeResult;
|
|
958
1613
|
}
|
|
@@ -960,6 +1615,10 @@ async function runSalesNavigatorCrawlAttempt(session, attempt, options, context)
|
|
|
960
1615
|
if (totalResults === null || totalResults > attempt.maxResultsPerSearch) {
|
|
961
1616
|
return probeResult;
|
|
962
1617
|
}
|
|
1618
|
+
const splitTriggerResults = Math.min(attempt.maxResultsPerSearch, SALES_NAVIGATOR_SPLIT_TRIGGER_RESULTS);
|
|
1619
|
+
if (totalResults > splitTriggerResults) {
|
|
1620
|
+
throw new SalesNavigatorSliceTooBroadError(`Sales Navigator slice produced ${totalResults} results, exceeding the split trigger of ${splitTriggerResults}.`, { totalResults });
|
|
1621
|
+
}
|
|
963
1622
|
return await runSalesNavigatorExportWithAgentWait(session, {
|
|
964
1623
|
sourceQueryUrl: attempt.sourceQueryUrl,
|
|
965
1624
|
slicedQueryUrl: attempt.slicedQueryUrl,
|
|
@@ -968,11 +1627,27 @@ async function runSalesNavigatorCrawlAttempt(session, attempt, options, context)
|
|
|
968
1627
|
numberOfProfiles: attempt.numberOfProfiles,
|
|
969
1628
|
slicePreset: attempt.slicePreset,
|
|
970
1629
|
crawlJobId: context?.crawlJobId,
|
|
971
|
-
crawlSliceId: context?.crawlSliceId
|
|
1630
|
+
crawlSliceId: context?.crawlSliceId,
|
|
1631
|
+
rawPayload: buildSalesNavigatorSliceRawPayload(attempt, {
|
|
1632
|
+
traceId: context?.traceId ?? null,
|
|
1633
|
+
phase: "full_export_after_probe",
|
|
1634
|
+
requestedProfiles: attempt.numberOfProfiles,
|
|
1635
|
+
crawlJobId: context?.crawlJobId ?? null,
|
|
1636
|
+
crawlSliceId: context?.crawlSliceId ?? null,
|
|
1637
|
+
probeProfiles,
|
|
1638
|
+
probeTotalResults: totalResults
|
|
1639
|
+
})
|
|
972
1640
|
}, {
|
|
973
1641
|
waitSeconds: options.agentBusyWaitSeconds,
|
|
974
|
-
maxWaits: options.agentBusyMaxWaits
|
|
975
|
-
|
|
1642
|
+
maxWaits: options.agentBusyMaxWaits,
|
|
1643
|
+
logger: options.logger,
|
|
1644
|
+
logMetadata: {
|
|
1645
|
+
...logMetadata,
|
|
1646
|
+
phase: "full_export_after_probe",
|
|
1647
|
+
probeProfiles,
|
|
1648
|
+
probeTotalResults: totalResults
|
|
1649
|
+
}
|
|
1650
|
+
}, context?.traceId);
|
|
976
1651
|
}
|
|
977
1652
|
function buildSalesNavigatorCrawlAttemptFromClaimedSlice(slice) {
|
|
978
1653
|
return {
|
|
@@ -987,12 +1662,13 @@ function buildSalesNavigatorCrawlAttemptFromClaimedSlice(slice) {
|
|
|
987
1662
|
splitTrail: slice.splitTrail
|
|
988
1663
|
};
|
|
989
1664
|
}
|
|
990
|
-
async function createOrResumeSalesNavigatorCrawlJob(session, payload) {
|
|
1665
|
+
async function createOrResumeSalesNavigatorCrawlJob(session, payload, traceId) {
|
|
991
1666
|
return await fetchCliJson(session, (currentSession) => fetch(`${currentSession.apiBaseUrl}/api/cli/salesnav/crawls`, {
|
|
992
1667
|
method: "POST",
|
|
993
1668
|
headers: {
|
|
994
1669
|
"Content-Type": "application/json",
|
|
995
|
-
Authorization: `Bearer ${currentSession.accessToken}
|
|
1670
|
+
Authorization: `Bearer ${currentSession.accessToken}`,
|
|
1671
|
+
...buildTraceHeaders(traceId)
|
|
996
1672
|
},
|
|
997
1673
|
body: JSON.stringify({
|
|
998
1674
|
...payload,
|
|
@@ -1003,28 +1679,31 @@ async function createOrResumeSalesNavigatorCrawlJob(session, payload) {
|
|
|
1003
1679
|
})
|
|
1004
1680
|
}), SalesNavigatorCrawlCreateResponseSchema);
|
|
1005
1681
|
}
|
|
1006
|
-
async function getSalesNavigatorCrawlStatus(session, jobId) {
|
|
1682
|
+
async function getSalesNavigatorCrawlStatus(session, jobId, traceId) {
|
|
1007
1683
|
return await fetchCliJson(session, (currentSession) => fetch(`${currentSession.apiBaseUrl}/api/cli/salesnav/crawls/${jobId}`, {
|
|
1008
1684
|
method: "GET",
|
|
1009
1685
|
headers: {
|
|
1010
|
-
Authorization: `Bearer ${currentSession.accessToken}
|
|
1686
|
+
Authorization: `Bearer ${currentSession.accessToken}`,
|
|
1687
|
+
...buildTraceHeaders(traceId)
|
|
1011
1688
|
}
|
|
1012
1689
|
}), SalesNavigatorCrawlStatusResponseSchema);
|
|
1013
1690
|
}
|
|
1014
|
-
async function claimNextSalesNavigatorCrawlSlice(session, jobId) {
|
|
1691
|
+
async function claimNextSalesNavigatorCrawlSlice(session, jobId, traceId) {
|
|
1015
1692
|
return await fetchCliJson(session, (currentSession) => fetch(`${currentSession.apiBaseUrl}/api/cli/salesnav/crawls/${jobId}/claim-next`, {
|
|
1016
1693
|
method: "POST",
|
|
1017
1694
|
headers: {
|
|
1018
|
-
Authorization: `Bearer ${currentSession.accessToken}
|
|
1695
|
+
Authorization: `Bearer ${currentSession.accessToken}`,
|
|
1696
|
+
...buildTraceHeaders(traceId)
|
|
1019
1697
|
}
|
|
1020
1698
|
}), SalesNavigatorCrawlClaimResponseSchema);
|
|
1021
1699
|
}
|
|
1022
|
-
async function reportSalesNavigatorCrawlSlice(session, jobId, payload) {
|
|
1700
|
+
async function reportSalesNavigatorCrawlSlice(session, jobId, payload, traceId) {
|
|
1023
1701
|
return await fetchCliJson(session, (currentSession) => fetch(`${currentSession.apiBaseUrl}/api/cli/salesnav/crawls/${jobId}/report`, {
|
|
1024
1702
|
method: "POST",
|
|
1025
1703
|
headers: {
|
|
1026
1704
|
"Content-Type": "application/json",
|
|
1027
|
-
Authorization: `Bearer ${currentSession.accessToken}
|
|
1705
|
+
Authorization: `Bearer ${currentSession.accessToken}`,
|
|
1706
|
+
...buildTraceHeaders(traceId)
|
|
1028
1707
|
},
|
|
1029
1708
|
body: JSON.stringify({
|
|
1030
1709
|
...payload,
|
|
@@ -1039,17 +1718,125 @@ function nextSalesNavigatorSplitDimension(slice, maxSplitDepth) {
|
|
|
1039
1718
|
if (slice.depth >= maxSplitDepth) {
|
|
1040
1719
|
return null;
|
|
1041
1720
|
}
|
|
1042
|
-
|
|
1721
|
+
const usedDimensionKeys = new Set(slice.splitTrail.map((entry) => entry.key));
|
|
1722
|
+
const orderedDimensions = getLearnedSalesNavigatorDimensionOrder();
|
|
1723
|
+
return orderedDimensions.find((dimension) => !usedDimensionKeys.has(dimension.key)) ?? null;
|
|
1043
1724
|
}
|
|
1044
1725
|
const SALES_NAVIGATOR_COOKIE_RETRY_LIMIT = 8;
|
|
1045
1726
|
const SALES_NAVIGATOR_RESULT_RETRY_LIMIT = 3;
|
|
1727
|
+
const SALES_NAVIGATOR_SPLIT_TRIGGER_RESULTS = 1500;
|
|
1728
|
+
const SALES_NAVIGATOR_FILTER_IMPACT_MIN_OBSERVATIONS = 3;
|
|
1729
|
+
let salesNavigatorFilterImpactModel = null;
|
|
1730
|
+
let salesNavigatorFilterImpactLoaded = false;
|
|
1731
|
+
function getSalesprompterConfigDir() {
|
|
1732
|
+
const override = process.env.SALESPROMPTER_CONFIG_DIR?.trim();
|
|
1733
|
+
if (override !== undefined && override.length > 0) {
|
|
1734
|
+
return override;
|
|
1735
|
+
}
|
|
1736
|
+
return path.join(os.homedir(), ".config", "salesprompter");
|
|
1737
|
+
}
|
|
1738
|
+
function getSalesNavigatorFilterImpactPath() {
|
|
1739
|
+
return path.join(getSalesprompterConfigDir(), "salesnav-filter-impact.json");
|
|
1740
|
+
}
|
|
1741
|
+
async function loadSalesNavigatorFilterImpactModel() {
|
|
1742
|
+
if (salesNavigatorFilterImpactLoaded) {
|
|
1743
|
+
return salesNavigatorFilterImpactModel;
|
|
1744
|
+
}
|
|
1745
|
+
salesNavigatorFilterImpactLoaded = true;
|
|
1746
|
+
const filePath = getSalesNavigatorFilterImpactPath();
|
|
1747
|
+
try {
|
|
1748
|
+
const content = await readFile(filePath, "utf8");
|
|
1749
|
+
const parsed = JSON.parse(content);
|
|
1750
|
+
if (parsed && parsed.version === 1 && parsed.dimensions && typeof parsed.dimensions === "object") {
|
|
1751
|
+
salesNavigatorFilterImpactModel = parsed;
|
|
1752
|
+
}
|
|
1753
|
+
}
|
|
1754
|
+
catch {
|
|
1755
|
+
salesNavigatorFilterImpactModel = null;
|
|
1756
|
+
}
|
|
1757
|
+
return salesNavigatorFilterImpactModel;
|
|
1758
|
+
}
|
|
1759
|
+
async function persistSalesNavigatorFilterImpactModel() {
|
|
1760
|
+
if (!salesNavigatorFilterImpactModel) {
|
|
1761
|
+
return;
|
|
1762
|
+
}
|
|
1763
|
+
const filePath = getSalesNavigatorFilterImpactPath();
|
|
1764
|
+
await mkdir(path.dirname(filePath), { recursive: true });
|
|
1765
|
+
await writeFile(filePath, `${JSON.stringify(salesNavigatorFilterImpactModel, null, 2)}\n`, "utf8");
|
|
1766
|
+
}
|
|
1767
|
+
function getLearnedSalesNavigatorDimensionOrder() {
|
|
1768
|
+
const model = salesNavigatorFilterImpactModel;
|
|
1769
|
+
if (!model) {
|
|
1770
|
+
return DEFAULT_SALES_NAVIGATOR_CRAWL_DIMENSIONS;
|
|
1771
|
+
}
|
|
1772
|
+
const defaultIndex = new Map(DEFAULT_SALES_NAVIGATOR_CRAWL_DIMENSIONS.map((dimension, index) => [dimension.key, index]));
|
|
1773
|
+
return [...DEFAULT_SALES_NAVIGATOR_CRAWL_DIMENSIONS].sort((left, right) => {
|
|
1774
|
+
const leftStats = model.dimensions[left.key];
|
|
1775
|
+
const rightStats = model.dimensions[right.key];
|
|
1776
|
+
const leftReliable = (leftStats?.observations ?? 0) >= SALES_NAVIGATOR_FILTER_IMPACT_MIN_OBSERVATIONS;
|
|
1777
|
+
const rightReliable = (rightStats?.observations ?? 0) >= SALES_NAVIGATOR_FILTER_IMPACT_MIN_OBSERVATIONS;
|
|
1778
|
+
if (leftReliable && rightReliable) {
|
|
1779
|
+
const delta = (leftStats?.avgResults ?? Number.POSITIVE_INFINITY) -
|
|
1780
|
+
(rightStats?.avgResults ?? Number.POSITIVE_INFINITY);
|
|
1781
|
+
if (delta !== 0) {
|
|
1782
|
+
return delta;
|
|
1783
|
+
}
|
|
1784
|
+
}
|
|
1785
|
+
else if (leftReliable !== rightReliable) {
|
|
1786
|
+
return leftReliable ? -1 : 1;
|
|
1787
|
+
}
|
|
1788
|
+
return (defaultIndex.get(left.key) ?? 0) - (defaultIndex.get(right.key) ?? 0);
|
|
1789
|
+
});
|
|
1790
|
+
}
|
|
1791
|
+
async function recordSalesNavigatorFilterImpactObservation(slice, totalResults, options) {
|
|
1792
|
+
if (totalResults === null || totalResults === undefined || !Number.isFinite(totalResults)) {
|
|
1793
|
+
return;
|
|
1794
|
+
}
|
|
1795
|
+
const learnedDimension = slice.splitTrail.at(-1)?.key ?? null;
|
|
1796
|
+
if (!learnedDimension) {
|
|
1797
|
+
return;
|
|
1798
|
+
}
|
|
1799
|
+
await loadSalesNavigatorFilterImpactModel();
|
|
1800
|
+
if (!salesNavigatorFilterImpactModel) {
|
|
1801
|
+
salesNavigatorFilterImpactModel = {
|
|
1802
|
+
version: 1,
|
|
1803
|
+
updatedAt: new Date().toISOString(),
|
|
1804
|
+
dimensions: {}
|
|
1805
|
+
};
|
|
1806
|
+
}
|
|
1807
|
+
const previous = salesNavigatorFilterImpactModel.dimensions[learnedDimension];
|
|
1808
|
+
const observations = (previous?.observations ?? 0) + 1;
|
|
1809
|
+
const sumResults = (previous?.sumResults ?? 0) + totalResults;
|
|
1810
|
+
const avgResults = sumResults / observations;
|
|
1811
|
+
salesNavigatorFilterImpactModel.dimensions[learnedDimension] = {
|
|
1812
|
+
observations,
|
|
1813
|
+
sumResults,
|
|
1814
|
+
avgResults,
|
|
1815
|
+
lastObservedAt: new Date().toISOString()
|
|
1816
|
+
};
|
|
1817
|
+
salesNavigatorFilterImpactModel.updatedAt = new Date().toISOString();
|
|
1818
|
+
await persistSalesNavigatorFilterImpactModel();
|
|
1819
|
+
await options?.logger?.log("salesnav.filter_impact.updated", {
|
|
1820
|
+
dimensionKey: learnedDimension,
|
|
1821
|
+
observations,
|
|
1822
|
+
avgResults,
|
|
1823
|
+
totalResults,
|
|
1824
|
+
outcome: options?.outcome ?? null
|
|
1825
|
+
});
|
|
1826
|
+
}
|
|
1046
1827
|
function buildSalesNavigatorSplitChildren(slice, dimension) {
|
|
1047
1828
|
const attempt = buildSalesNavigatorCrawlAttemptFromClaimedSlice(slice);
|
|
1048
1829
|
return expandSalesNavigatorCrawlAttempt(attempt, dimension).map((child) => ({
|
|
1049
1830
|
slicedQueryUrl: child.slicedQueryUrl,
|
|
1050
1831
|
appliedFilters: child.appliedFilters,
|
|
1051
1832
|
depth: child.depth,
|
|
1052
|
-
splitTrail: child.splitTrail
|
|
1833
|
+
splitTrail: child.splitTrail,
|
|
1834
|
+
rawPayload: buildSalesNavigatorSliceRawPayload(child, {
|
|
1835
|
+
parentSliceId: slice.id,
|
|
1836
|
+
parentSlicedQueryUrl: slice.slicedQueryUrl,
|
|
1837
|
+
splitDimensionKey: child.splitTrail.at(-1)?.key ?? null,
|
|
1838
|
+
splitDimensionFilterType: child.splitTrail.at(-1)?.filterType ?? null
|
|
1839
|
+
})
|
|
1053
1840
|
}));
|
|
1054
1841
|
}
|
|
1055
1842
|
function buildSalesNavigatorSliceFailureReport(slice, error, options) {
|
|
@@ -1114,81 +1901,398 @@ function buildSalesNavigatorSliceFailureReport(slice, error, options) {
|
|
|
1114
1901
|
function formatSalesNavigatorSplitTrail(splitTrail) {
|
|
1115
1902
|
return splitTrail.map((entry) => `${entry.key}:${entry.value.text}`);
|
|
1116
1903
|
}
|
|
1117
|
-
async function
|
|
1904
|
+
async function ensureSalesNavigatorSessionPoolReady(queryUrl, options) {
|
|
1905
|
+
try {
|
|
1906
|
+
await options.logger?.log("salesnav.session_pool.preflight.started", {
|
|
1907
|
+
source: options.source,
|
|
1908
|
+
queryUrl
|
|
1909
|
+
});
|
|
1910
|
+
const claimed = await claimValidatedSalesNavigatorSessionCookieForCli({
|
|
1911
|
+
queryUrl,
|
|
1912
|
+
source: options.source,
|
|
1913
|
+
env: process.env
|
|
1914
|
+
});
|
|
1915
|
+
await options.logger?.log("salesnav.session_pool.preflight.completed", {
|
|
1916
|
+
source: options.source,
|
|
1917
|
+
queryUrl,
|
|
1918
|
+
status: claimed ? "ok" : "skipped",
|
|
1919
|
+
selectedSessionUserEmail: claimed?.userEmail ?? null,
|
|
1920
|
+
selectedSessionUserHandle: claimed?.userHandle ?? null,
|
|
1921
|
+
selectedSessionCookieSha256: claimed?.sessionCookieSha256 ?? null
|
|
1922
|
+
});
|
|
1923
|
+
return {
|
|
1924
|
+
ready: true
|
|
1925
|
+
};
|
|
1926
|
+
}
|
|
1927
|
+
catch (error) {
|
|
1928
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
1929
|
+
await options.logger?.log("salesnav.session_pool.preflight.failed", {
|
|
1930
|
+
source: options.source,
|
|
1931
|
+
queryUrl,
|
|
1932
|
+
error: message
|
|
1933
|
+
});
|
|
1934
|
+
return {
|
|
1935
|
+
ready: false,
|
|
1936
|
+
error: message
|
|
1937
|
+
};
|
|
1938
|
+
}
|
|
1939
|
+
}
|
|
1940
|
+
async function processSalesNavigatorClaimedCrawlSlice(session, jobId, slice, options) {
|
|
1118
1941
|
let currentSession = session;
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
seenSliceIds.add(slice.id);
|
|
1139
|
-
claimedSlices += 1;
|
|
1140
|
-
}
|
|
1141
|
-
if (!runtimeOutputOptions.json && !runtimeOutputOptions.quiet) {
|
|
1142
|
-
process.stderr.write(`Processing Sales Navigator slice ${claimedSlices}/${options.maxSlices}: ${slice.slicedQueryUrl}\n`);
|
|
1143
|
-
}
|
|
1144
|
-
try {
|
|
1145
|
-
const result = await runSalesNavigatorCrawlAttempt(currentSession, buildSalesNavigatorCrawlAttemptFromClaimedSlice(slice), {
|
|
1146
|
-
maxSplitDepth: options.maxSplitDepth,
|
|
1147
|
-
probeProfiles: options.probeProfiles,
|
|
1148
|
-
agentBusyWaitSeconds: options.agentBusyWaitSeconds,
|
|
1149
|
-
agentBusyMaxWaits: options.agentBusyMaxWaits
|
|
1150
|
-
}, {
|
|
1151
|
-
crawlJobId: jobId,
|
|
1152
|
-
crawlSliceId: slice.id
|
|
1153
|
-
});
|
|
1942
|
+
await options.logger?.log("salesnav.crawl.slice.claimed", {
|
|
1943
|
+
jobId,
|
|
1944
|
+
sliceId: slice.id,
|
|
1945
|
+
isNewSlice: true,
|
|
1946
|
+
claimedSlices: options.claimedSlices,
|
|
1947
|
+
depth: slice.depth,
|
|
1948
|
+
retryCount: slice.retryCount,
|
|
1949
|
+
cookieRetryCount: slice.cookieRetryCount,
|
|
1950
|
+
resultRetryCount: slice.resultRetryCount,
|
|
1951
|
+
splitTrail: formatSalesNavigatorSplitTrail(slice.splitTrail),
|
|
1952
|
+
...summarizeSalesNavigatorQuery(slice.slicedQueryUrl, slice.appliedFilters)
|
|
1953
|
+
});
|
|
1954
|
+
if (!runtimeOutputOptions.json && !runtimeOutputOptions.quiet) {
|
|
1955
|
+
process.stderr.write(`Processing Sales Navigator slice ${options.claimedSlices}: ${slice.slicedQueryUrl}\n`);
|
|
1956
|
+
}
|
|
1957
|
+
if (shouldPreSplitSalesNavigatorRootSlice(slice, options.maxSplitDepth)) {
|
|
1958
|
+
const nextDimension = nextSalesNavigatorSplitDimension(slice, options.maxSplitDepth);
|
|
1959
|
+
if (nextDimension) {
|
|
1960
|
+
const children = buildSalesNavigatorSplitChildren(slice, nextDimension);
|
|
1154
1961
|
const reported = await reportSalesNavigatorCrawlSlice(currentSession, jobId, {
|
|
1155
1962
|
sliceId: slice.id,
|
|
1156
|
-
outcome: "
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1963
|
+
outcome: "split",
|
|
1964
|
+
error: `Pre-splitting broad Sales Navigator title query by ${nextDimension.key} before the first export attempt.`,
|
|
1965
|
+
errorCode: "presplit_root_title_query",
|
|
1966
|
+
children,
|
|
1967
|
+
rawPayload: buildSalesNavigatorCrawlReportRawPayload(slice, options.traceId, {
|
|
1968
|
+
phase: "presplit",
|
|
1969
|
+
reason: "broad_root_title_query",
|
|
1970
|
+
nextDimensionKey: nextDimension.key,
|
|
1971
|
+
nextDimensionFilterType: nextDimension.filterType,
|
|
1972
|
+
childCount: children.length
|
|
1973
|
+
})
|
|
1974
|
+
}, options.traceId);
|
|
1162
1975
|
currentSession = reported.session;
|
|
1163
|
-
|
|
1164
|
-
|
|
1976
|
+
await options.logger?.log("salesnav.crawl.slice.presplit", {
|
|
1977
|
+
jobId,
|
|
1978
|
+
sliceId: slice.id,
|
|
1979
|
+
nextDimension: nextDimension.key,
|
|
1980
|
+
childCount: children.length,
|
|
1981
|
+
splitTrail: formatSalesNavigatorSplitTrail(slice.splitTrail),
|
|
1982
|
+
...summarizeSalesNavigatorQuery(slice.slicedQueryUrl, slice.appliedFilters),
|
|
1983
|
+
childQueries: children.map((child) => ({
|
|
1984
|
+
splitTrail: formatSalesNavigatorSplitTrail(child.splitTrail),
|
|
1985
|
+
...summarizeSalesNavigatorQuery(child.slicedQueryUrl, child.appliedFilters)
|
|
1986
|
+
}))
|
|
1987
|
+
});
|
|
1988
|
+
return {
|
|
1989
|
+
session: currentSession,
|
|
1990
|
+
job: reported.value.job,
|
|
1991
|
+
activeSlice: slice,
|
|
1992
|
+
lastOutcome: {
|
|
1993
|
+
outcome: "split",
|
|
1994
|
+
error: `Pre-split by ${nextDimension.key}`,
|
|
1995
|
+
errorCode: "presplit_root_title_query",
|
|
1996
|
+
totalResults: null
|
|
1997
|
+
},
|
|
1998
|
+
forceSessionPoolRecheck: false
|
|
1999
|
+
};
|
|
2000
|
+
}
|
|
2001
|
+
}
|
|
2002
|
+
try {
|
|
2003
|
+
const result = await runSalesNavigatorCrawlAttempt(currentSession, buildSalesNavigatorCrawlAttemptFromClaimedSlice(slice), {
|
|
2004
|
+
maxSplitDepth: options.maxSplitDepth,
|
|
2005
|
+
probeProfiles: options.probeProfiles,
|
|
2006
|
+
agentBusyWaitSeconds: options.agentBusyWaitSeconds,
|
|
2007
|
+
agentBusyMaxWaits: options.agentBusyMaxWaits,
|
|
2008
|
+
logger: options.logger
|
|
2009
|
+
}, {
|
|
2010
|
+
crawlJobId: jobId,
|
|
2011
|
+
crawlSliceId: slice.id,
|
|
2012
|
+
traceId: options.traceId
|
|
2013
|
+
});
|
|
2014
|
+
const reported = await reportSalesNavigatorCrawlSlice(currentSession, jobId, {
|
|
2015
|
+
sliceId: slice.id,
|
|
2016
|
+
outcome: "exported",
|
|
2017
|
+
totalResults: result.totalResults ?? null,
|
|
2018
|
+
exportRunId: result.runId,
|
|
2019
|
+
importedPeople: result.imported,
|
|
2020
|
+
upsertedPeople: result.upserted,
|
|
2021
|
+
rawPayload: buildSalesNavigatorCrawlReportRawPayload(slice, options.traceId, {
|
|
2022
|
+
phase: "exported",
|
|
2023
|
+
export: {
|
|
2024
|
+
runId: result.runId,
|
|
2025
|
+
totalResults: result.totalResults ?? null,
|
|
2026
|
+
imported: result.imported,
|
|
2027
|
+
upserted: result.upserted,
|
|
2028
|
+
resultJsonUrl: result.resultJsonUrl ?? null,
|
|
2029
|
+
resultCsvUrl: result.resultCsvUrl ?? null,
|
|
2030
|
+
selectedSessionCookieSha256: result.selectedSessionCookieSha256 ?? null,
|
|
2031
|
+
selectedSessionUserEmail: result.selectedSessionUserEmail ?? null,
|
|
2032
|
+
selectedSessionUserHandle: result.selectedSessionUserHandle ?? null,
|
|
2033
|
+
launchDiagnostics: result.launchDiagnostics ?? null
|
|
2034
|
+
}
|
|
2035
|
+
})
|
|
2036
|
+
}, options.traceId);
|
|
2037
|
+
currentSession = reported.session;
|
|
2038
|
+
await recordSalesNavigatorFilterImpactObservation(slice, result.totalResults ?? null, {
|
|
2039
|
+
logger: options.logger,
|
|
2040
|
+
outcome: "exported"
|
|
2041
|
+
});
|
|
2042
|
+
await options.logger?.log("salesnav.crawl.slice.exported", {
|
|
2043
|
+
jobId,
|
|
2044
|
+
sliceId: slice.id,
|
|
2045
|
+
exportRunId: result.runId,
|
|
2046
|
+
totalResults: result.totalResults ?? null,
|
|
2047
|
+
imported: result.imported,
|
|
2048
|
+
upserted: result.upserted,
|
|
2049
|
+
selectedAgentId: result.launchDiagnostics?.selectedAgent.id ?? result.agentId,
|
|
2050
|
+
selectedSessionUserEmail: result.selectedSessionUserEmail ?? null
|
|
2051
|
+
});
|
|
2052
|
+
return {
|
|
2053
|
+
session: currentSession,
|
|
2054
|
+
job: reported.value.job,
|
|
2055
|
+
activeSlice: slice,
|
|
2056
|
+
lastOutcome: {
|
|
1165
2057
|
outcome: "exported",
|
|
1166
2058
|
runId: result.runId,
|
|
1167
2059
|
totalResults: result.totalResults ?? null
|
|
1168
|
-
}
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
2060
|
+
},
|
|
2061
|
+
forceSessionPoolRecheck: false
|
|
2062
|
+
};
|
|
2063
|
+
}
|
|
2064
|
+
catch (error) {
|
|
2065
|
+
const payload = buildSalesNavigatorSliceFailureReport(slice, error, {
|
|
2066
|
+
maxSplitDepth: options.maxSplitDepth,
|
|
2067
|
+
maxRetries: options.maxRetries
|
|
2068
|
+
});
|
|
2069
|
+
payload.rawPayload = buildSalesNavigatorCrawlReportRawPayload(slice, options.traceId, {
|
|
2070
|
+
phase: payload.outcome,
|
|
2071
|
+
error: error instanceof Error
|
|
2072
|
+
? {
|
|
2073
|
+
name: error.name,
|
|
2074
|
+
message: error.message,
|
|
2075
|
+
...(error instanceof SalesNavigatorExportRequestError
|
|
2076
|
+
? {
|
|
2077
|
+
launchDiagnostics: error.launchDiagnostics ?? null,
|
|
2078
|
+
agentId: error.agentId ?? null,
|
|
2079
|
+
containerId: error.containerId ?? null
|
|
2080
|
+
}
|
|
2081
|
+
: {})
|
|
2082
|
+
}
|
|
2083
|
+
: {
|
|
2084
|
+
name: "Error",
|
|
2085
|
+
message: String(error)
|
|
2086
|
+
}
|
|
2087
|
+
});
|
|
2088
|
+
const reported = await reportSalesNavigatorCrawlSlice(currentSession, jobId, payload, options.traceId);
|
|
2089
|
+
currentSession = reported.session;
|
|
2090
|
+
await recordSalesNavigatorFilterImpactObservation(slice, payload.totalResults ?? null, {
|
|
2091
|
+
logger: options.logger,
|
|
2092
|
+
outcome: payload.outcome
|
|
2093
|
+
});
|
|
2094
|
+
await options.logger?.log("salesnav.crawl.slice.reported", {
|
|
2095
|
+
jobId,
|
|
2096
|
+
sliceId: slice.id,
|
|
2097
|
+
outcome: payload.outcome,
|
|
2098
|
+
error: payload.error ?? null,
|
|
2099
|
+
errorCode: payload.errorCode ?? null,
|
|
2100
|
+
totalResults: payload.totalResults ?? null,
|
|
2101
|
+
exportRunId: payload.exportRunId ?? null,
|
|
2102
|
+
childCount: payload.children?.length ?? 0
|
|
2103
|
+
});
|
|
2104
|
+
return {
|
|
2105
|
+
session: currentSession,
|
|
2106
|
+
job: reported.value.job,
|
|
2107
|
+
activeSlice: slice,
|
|
2108
|
+
lastOutcome: {
|
|
1179
2109
|
outcome: payload.outcome,
|
|
1180
2110
|
runId: payload.exportRunId,
|
|
1181
2111
|
error: payload.error,
|
|
1182
2112
|
errorCode: payload.errorCode,
|
|
1183
2113
|
totalResults: payload.totalResults
|
|
1184
|
-
}
|
|
2114
|
+
},
|
|
2115
|
+
forceSessionPoolRecheck: payload.errorCode === "invalid_session"
|
|
2116
|
+
};
|
|
2117
|
+
}
|
|
2118
|
+
}
|
|
2119
|
+
async function executeSalesNavigatorCrawlJob(session, jobId, options) {
|
|
2120
|
+
await loadSalesNavigatorFilterImpactModel();
|
|
2121
|
+
let currentSession = session;
|
|
2122
|
+
let claimedSlices = 0;
|
|
2123
|
+
const seenSliceIds = new Set();
|
|
2124
|
+
let activeSlice = null;
|
|
2125
|
+
let job = null;
|
|
2126
|
+
let idlePollCount = 0;
|
|
2127
|
+
let lastOutcome = null;
|
|
2128
|
+
const parallelExports = Math.max(1, options.parallelExports);
|
|
2129
|
+
const inFlight = new Map();
|
|
2130
|
+
let nextSlot = 0;
|
|
2131
|
+
let noMoreClaimableWork = false;
|
|
2132
|
+
let sessionPoolFailures = 0;
|
|
2133
|
+
let nextSessionPoolRetryAt = 0;
|
|
2134
|
+
let lastSessionPoolReadyAt = 0;
|
|
2135
|
+
const sessionPoolReadinessCooldownMs = 120_000;
|
|
2136
|
+
while (true) {
|
|
2137
|
+
while (!noMoreClaimableWork && inFlight.size < parallelExports) {
|
|
2138
|
+
if (claimedSlices >= options.maxSlices) {
|
|
2139
|
+
break;
|
|
2140
|
+
}
|
|
2141
|
+
if (inFlight.size === 0) {
|
|
2142
|
+
const now = Date.now();
|
|
2143
|
+
if (now < nextSessionPoolRetryAt) {
|
|
2144
|
+
await delay(Math.max(0, nextSessionPoolRetryAt - now));
|
|
2145
|
+
continue;
|
|
2146
|
+
}
|
|
2147
|
+
if (now - lastSessionPoolReadyAt >= sessionPoolReadinessCooldownMs) {
|
|
2148
|
+
const readiness = await ensureSalesNavigatorSessionPoolReady(job?.sourceQueryUrl ?? "https://www.linkedin.com/sales/search/people", {
|
|
2149
|
+
logger: options.logger,
|
|
2150
|
+
source: "cli_salesnav_crawl_preflight"
|
|
2151
|
+
});
|
|
2152
|
+
if (!readiness.ready) {
|
|
2153
|
+
sessionPoolFailures += 1;
|
|
2154
|
+
idlePollCount += 1;
|
|
2155
|
+
const waitSeconds = Math.min(120, 10 * Math.max(1, sessionPoolFailures));
|
|
2156
|
+
nextSessionPoolRetryAt = Date.now() + waitSeconds * 1000;
|
|
2157
|
+
await options.logger?.log("salesnav.crawl.session_pool.waiting", {
|
|
2158
|
+
jobId,
|
|
2159
|
+
idlePollCount,
|
|
2160
|
+
idleMaxPolls: options.idleMaxPolls,
|
|
2161
|
+
sessionPoolFailures,
|
|
2162
|
+
waitSeconds,
|
|
2163
|
+
error: readiness.error
|
|
2164
|
+
});
|
|
2165
|
+
if (idlePollCount >= options.idleMaxPolls) {
|
|
2166
|
+
lastOutcome = {
|
|
2167
|
+
outcome: "terminal_failed",
|
|
2168
|
+
error: readiness.error ??
|
|
2169
|
+
`Sales Navigator session pool stayed unavailable for ${options.idleMaxPolls} checks.`,
|
|
2170
|
+
errorCode: "blocked_no_valid_salesnav_session"
|
|
2171
|
+
};
|
|
2172
|
+
noMoreClaimableWork = true;
|
|
2173
|
+
break;
|
|
2174
|
+
}
|
|
2175
|
+
continue;
|
|
2176
|
+
}
|
|
2177
|
+
sessionPoolFailures = 0;
|
|
2178
|
+
nextSessionPoolRetryAt = 0;
|
|
2179
|
+
lastSessionPoolReadyAt = Date.now();
|
|
2180
|
+
}
|
|
2181
|
+
}
|
|
2182
|
+
const claimed = await claimNextSalesNavigatorCrawlSlice(currentSession, jobId, options.traceId);
|
|
2183
|
+
currentSession = claimed.session;
|
|
2184
|
+
job = claimed.value.job;
|
|
2185
|
+
if (!claimed.value.slice) {
|
|
2186
|
+
const shouldWaitForRemoteWork = !isSalesNavigatorCrawlJobTerminal(job.status) &&
|
|
2187
|
+
options.idleMaxPolls > 0 &&
|
|
2188
|
+
job.runningSlices > 0;
|
|
2189
|
+
if (shouldWaitForRemoteWork && inFlight.size === 0) {
|
|
2190
|
+
if (idlePollCount >= options.idleMaxPolls) {
|
|
2191
|
+
lastOutcome = {
|
|
2192
|
+
outcome: "terminal_failed",
|
|
2193
|
+
error: `Sales Navigator crawl job ${jobId} stayed non-terminal without a claimable slice after ${options.idleMaxPolls} polls.`,
|
|
2194
|
+
errorCode: "crawl_idle_timeout"
|
|
2195
|
+
};
|
|
2196
|
+
await options.logger?.log("salesnav.crawl.job.stalled", {
|
|
2197
|
+
jobId,
|
|
2198
|
+
status: job.status,
|
|
2199
|
+
queuedSlices: job.queuedSlices,
|
|
2200
|
+
runningSlices: job.runningSlices,
|
|
2201
|
+
idlePollCount,
|
|
2202
|
+
idleMaxPolls: options.idleMaxPolls
|
|
2203
|
+
});
|
|
2204
|
+
noMoreClaimableWork = true;
|
|
2205
|
+
break;
|
|
2206
|
+
}
|
|
2207
|
+
idlePollCount += 1;
|
|
2208
|
+
await options.logger?.log("salesnav.crawl.job.waiting", {
|
|
2209
|
+
jobId,
|
|
2210
|
+
status: job.status,
|
|
2211
|
+
queuedSlices: job.queuedSlices,
|
|
2212
|
+
runningSlices: job.runningSlices,
|
|
2213
|
+
idlePollCount,
|
|
2214
|
+
idlePollSeconds: options.idlePollSeconds
|
|
2215
|
+
});
|
|
2216
|
+
if (!runtimeOutputOptions.json && !runtimeOutputOptions.quiet) {
|
|
2217
|
+
process.stderr.write(`Sales Navigator crawl job ${jobId} has no claimable slice yet. Waiting ${options.idlePollSeconds}s for remote work to settle...\n`);
|
|
2218
|
+
}
|
|
2219
|
+
await delay(options.idlePollSeconds * 1000);
|
|
2220
|
+
const status = await getSalesNavigatorCrawlStatus(currentSession, jobId, options.traceId);
|
|
2221
|
+
currentSession = status.session;
|
|
2222
|
+
job = status.value.job;
|
|
2223
|
+
await options.logger?.log("salesnav.crawl.job.status.polled", {
|
|
2224
|
+
jobId,
|
|
2225
|
+
status: job.status,
|
|
2226
|
+
queuedSlices: job.queuedSlices,
|
|
2227
|
+
runningSlices: job.runningSlices,
|
|
2228
|
+
exportedSlices: job.exportedSlices,
|
|
2229
|
+
failedSlices: job.failedSlices,
|
|
2230
|
+
importedPeople: job.importedPeople,
|
|
2231
|
+
idlePollCount
|
|
2232
|
+
});
|
|
2233
|
+
if (isSalesNavigatorCrawlJobTerminal(job.status)) {
|
|
2234
|
+
noMoreClaimableWork = true;
|
|
2235
|
+
break;
|
|
2236
|
+
}
|
|
2237
|
+
continue;
|
|
2238
|
+
}
|
|
2239
|
+
if (!shouldWaitForRemoteWork) {
|
|
2240
|
+
noMoreClaimableWork = true;
|
|
2241
|
+
}
|
|
2242
|
+
break;
|
|
2243
|
+
}
|
|
2244
|
+
const slice = claimed.value.slice;
|
|
2245
|
+
idlePollCount = 0;
|
|
2246
|
+
activeSlice = slice;
|
|
2247
|
+
const isNewSlice = !seenSliceIds.has(slice.id);
|
|
2248
|
+
if (isNewSlice) {
|
|
2249
|
+
seenSliceIds.add(slice.id);
|
|
2250
|
+
claimedSlices += 1;
|
|
2251
|
+
}
|
|
2252
|
+
const claimedSliceNumber = claimedSlices;
|
|
2253
|
+
const slot = nextSlot++;
|
|
2254
|
+
inFlight.set(slot, processSalesNavigatorClaimedCrawlSlice(currentSession, jobId, slice, {
|
|
2255
|
+
maxSplitDepth: options.maxSplitDepth,
|
|
2256
|
+
maxRetries: options.maxRetries,
|
|
2257
|
+
probeProfiles: options.probeProfiles,
|
|
2258
|
+
agentBusyWaitSeconds: options.agentBusyWaitSeconds,
|
|
2259
|
+
agentBusyMaxWaits: options.agentBusyMaxWaits,
|
|
2260
|
+
claimedSlices: claimedSliceNumber,
|
|
2261
|
+
traceId: options.traceId,
|
|
2262
|
+
logger: options.logger
|
|
2263
|
+
}).then((value) => ({ slot, value })));
|
|
2264
|
+
}
|
|
2265
|
+
if (inFlight.size === 0) {
|
|
2266
|
+
break;
|
|
2267
|
+
}
|
|
2268
|
+
const completed = await Promise.race(inFlight.values());
|
|
2269
|
+
inFlight.delete(completed.slot);
|
|
2270
|
+
currentSession = completed.value.session;
|
|
2271
|
+
job = completed.value.job;
|
|
2272
|
+
activeSlice = completed.value.activeSlice;
|
|
2273
|
+
lastOutcome = completed.value.lastOutcome;
|
|
2274
|
+
if (completed.value.forceSessionPoolRecheck) {
|
|
2275
|
+
lastSessionPoolReadyAt = 0;
|
|
2276
|
+
nextSessionPoolRetryAt = 0;
|
|
1185
2277
|
}
|
|
1186
2278
|
}
|
|
1187
2279
|
if (!job) {
|
|
1188
|
-
const status = await getSalesNavigatorCrawlStatus(currentSession, jobId);
|
|
2280
|
+
const status = await getSalesNavigatorCrawlStatus(currentSession, jobId, options.traceId);
|
|
1189
2281
|
currentSession = status.session;
|
|
1190
2282
|
job = status.value.job;
|
|
1191
2283
|
}
|
|
2284
|
+
await options.logger?.log("salesnav.crawl.job.completed", {
|
|
2285
|
+
jobId,
|
|
2286
|
+
status: job.status,
|
|
2287
|
+
queuedSlices: job.queuedSlices,
|
|
2288
|
+
runningSlices: job.runningSlices,
|
|
2289
|
+
exportedSlices: job.exportedSlices,
|
|
2290
|
+
failedSlices: job.failedSlices,
|
|
2291
|
+
importedPeople: job.importedPeople,
|
|
2292
|
+
claimedSlices,
|
|
2293
|
+
truncated: claimedSlices >= options.maxSlices && (job.queuedSlices > 0 || job.runningSlices > 0),
|
|
2294
|
+
lastOutcome
|
|
2295
|
+
});
|
|
1192
2296
|
return {
|
|
1193
2297
|
session: currentSession,
|
|
1194
2298
|
job,
|
|
@@ -1198,22 +2302,6 @@ async function executeSalesNavigatorCrawlJob(session, jobId, options) {
|
|
|
1198
2302
|
lastOutcome
|
|
1199
2303
|
};
|
|
1200
2304
|
}
|
|
1201
|
-
async function searchReferenceCompanyLeads(reference, icp, limit) {
|
|
1202
|
-
if (shouldBypassAuth()) {
|
|
1203
|
-
const fallbackTargetDomain = reference.domain ?? `${reference.slug}.com`;
|
|
1204
|
-
const result = await leadProvider.generateLeads(icp, limit, {
|
|
1205
|
-
companyDomain: fallbackTargetDomain,
|
|
1206
|
-
companyName: reference.companyName
|
|
1207
|
-
});
|
|
1208
|
-
return result.leads;
|
|
1209
|
-
}
|
|
1210
|
-
const session = await requireAuthSession();
|
|
1211
|
-
return await fetchWorkspaceLeadSearch(session, {
|
|
1212
|
-
mode: "reference-company",
|
|
1213
|
-
icp,
|
|
1214
|
-
limit
|
|
1215
|
-
});
|
|
1216
|
-
}
|
|
1217
2305
|
async function searchTargetCompanyLeads(reference, limit) {
|
|
1218
2306
|
if (shouldBypassAuth()) {
|
|
1219
2307
|
const fallbackTargetDomain = reference.domain ?? `${reference.slug}.com`;
|
|
@@ -1231,16 +2319,90 @@ async function searchTargetCompanyLeads(reference, limit) {
|
|
|
1231
2319
|
limit
|
|
1232
2320
|
});
|
|
1233
2321
|
}
|
|
1234
|
-
async function
|
|
1235
|
-
writeWizardSection("
|
|
1236
|
-
const
|
|
2322
|
+
async function runProductMarketWizard(rl) {
|
|
2323
|
+
writeWizardSection("Find leads from a product market", "Start from a company website, LinkedIn company page, product page, or category page. I will turn that into intended job titles and durable Sales Navigator crawls.");
|
|
2324
|
+
const input = await promptText(rl, "What company website or LinkedIn page should I start from?", {
|
|
2325
|
+
required: true
|
|
2326
|
+
});
|
|
2327
|
+
const productLimit = z.coerce.number().int().min(1).max(5000).parse(await promptText(rl, "How many products should I inspect?", { defaultValue: "25", required: true }));
|
|
2328
|
+
const titleLimit = z.coerce.number().int().min(1).max(1000).parse(await promptText(rl, "How many job titles should I turn into Sales Navigator crawls?", {
|
|
2329
|
+
defaultValue: "5",
|
|
2330
|
+
required: true
|
|
2331
|
+
}));
|
|
2332
|
+
writeWizardLine();
|
|
2333
|
+
const dryRun = shouldBypassAuth();
|
|
2334
|
+
if (dryRun) {
|
|
2335
|
+
writeWizardLine("Auth bypass is enabled, so I will preview the crawl plan instead of launching Phantombuster.");
|
|
2336
|
+
writeWizardLine();
|
|
2337
|
+
}
|
|
2338
|
+
const result = await runSalesNavigatorFromProductCategoryWorkflow({
|
|
2339
|
+
input,
|
|
2340
|
+
maxPages: 25,
|
|
2341
|
+
productLimit,
|
|
2342
|
+
titleLimit,
|
|
2343
|
+
maxResultsPerSearch: 2500,
|
|
2344
|
+
numberOfProfiles: 2500,
|
|
2345
|
+
slicePreset: "wizard-linkedin-product-category",
|
|
2346
|
+
maxSplitDepth: DEFAULT_SALES_NAVIGATOR_CRAWL_DIMENSIONS.length,
|
|
2347
|
+
maxSlicesPerTitle: 1000,
|
|
2348
|
+
maxRetries: 3,
|
|
2349
|
+
probeProfiles: 100,
|
|
2350
|
+
agentBusyWaitSeconds: 30,
|
|
2351
|
+
agentBusyMaxWaits: 20,
|
|
2352
|
+
idlePollSeconds: 10,
|
|
2353
|
+
idleMaxPolls: 180,
|
|
2354
|
+
parallelExports: 3,
|
|
2355
|
+
skipProductUpload: false,
|
|
2356
|
+
dryRun
|
|
2357
|
+
});
|
|
2358
|
+
writeWizardLine(`LinkedIn product category: ${result.payload.source.category.name}.`);
|
|
2359
|
+
writeWizardLine(`Inspected ${result.payload.discoveredProducts} product${result.payload.discoveredProducts === 1 ? "" : "s"} and derived ${result.payload.titleCount} intended job title${result.payload.titleCount === 1 ? "" : "s"}.`);
|
|
2360
|
+
if (result.payload.dryRun) {
|
|
2361
|
+
const firstQuery = result.payload.queries?.[0];
|
|
2362
|
+
writeWizardLine(`Saved preview to ${result.outPath}.`);
|
|
2363
|
+
writeWizardLine(`Saved logs to ${result.payload.logPath}.`);
|
|
2364
|
+
if (firstQuery) {
|
|
2365
|
+
writeWizardLine(`First Sales Navigator title search: ${firstQuery.title}.`);
|
|
2366
|
+
}
|
|
2367
|
+
}
|
|
2368
|
+
else {
|
|
2369
|
+
if (result.payload.uploaded) {
|
|
2370
|
+
writeWizardLine(`Uploaded ${result.payload.uploaded.upserted} LinkedIn product record${result.payload.uploaded.upserted === 1 ? "" : "s"} to Salesprompter.`);
|
|
2371
|
+
}
|
|
2372
|
+
writeWizardLine(`Finished ${result.payload.crawls?.length ?? 0} durable Sales Navigator crawl${result.payload.crawls?.length === 1 ? "" : "s"}.`);
|
|
2373
|
+
writeWizardLine(`Imported ${result.payload.summary.totalImportedPeople} people across ${result.payload.summary.totalExportedSlices} exported slice${result.payload.summary.totalExportedSlices === 1 ? "" : "s"}.`);
|
|
2374
|
+
if (result.payload.summary.workflowStatus !== "completed") {
|
|
2375
|
+
writeWizardLine(`Some title crawls still failed: ${result.payload.summary.completedWithFailuresTitles} completed with failures, ${result.payload.summary.runningTitles} still non-terminal, ${result.payload.summary.truncatedTitles} truncated.`);
|
|
2376
|
+
}
|
|
2377
|
+
writeWizardLine(`Saved crawl summary to ${result.outPath}.`);
|
|
2378
|
+
writeWizardLine(`Saved logs to ${result.payload.logPath}.`);
|
|
2379
|
+
}
|
|
2380
|
+
writeWizardLine();
|
|
2381
|
+
writeWizardLine("Equivalent raw command:");
|
|
2382
|
+
const commandArgs = [
|
|
2383
|
+
"salesprompter",
|
|
2384
|
+
"salesnav:from-product-category",
|
|
2385
|
+
"--input",
|
|
2386
|
+
input,
|
|
2387
|
+
"--product-limit",
|
|
2388
|
+
String(productLimit),
|
|
2389
|
+
"--title-limit",
|
|
2390
|
+
String(titleLimit)
|
|
2391
|
+
];
|
|
2392
|
+
if (dryRun) {
|
|
2393
|
+
commandArgs.push("--dry-run");
|
|
2394
|
+
}
|
|
2395
|
+
writeWizardLine(` ${buildCommandLine(commandArgs)}`);
|
|
2396
|
+
}
|
|
2397
|
+
async function runVendorShortcutWizard(rl) {
|
|
2398
|
+
writeWizardSection("Built-in Deel shortcut", "Use the built-in Deel ICP template and search your workspace lead data.");
|
|
2399
|
+
const reference = parseCompanyReference(await promptText(rl, "Which company shortcut should I use?", {
|
|
1237
2400
|
required: true
|
|
1238
2401
|
}));
|
|
1239
2402
|
writeWizardLine();
|
|
1240
2403
|
if (reference.vendorTemplate !== "deel") {
|
|
1241
|
-
throw new Error("
|
|
2404
|
+
throw new Error("The built-in shortcut only supports Deel right now. Use deel.com or the Deel LinkedIn company page.");
|
|
1242
2405
|
}
|
|
1243
|
-
writeWizardSection("Find matching leads", `Using the built-in ${reference.companyName} profile to search your workspace data.`);
|
|
1244
2406
|
const market = await promptChoice(rl, "Where do you want to search?", [
|
|
1245
2407
|
{ value: "dach", label: "DACH", description: "Germany, Austria, Switzerland" },
|
|
1246
2408
|
{ value: "europe", label: "Europe" },
|
|
@@ -1252,7 +2414,16 @@ async function runReferenceCompanyWizard(rl) {
|
|
|
1252
2414
|
const icpPath = `./data/${reference.slug}-icp-${market}.json`;
|
|
1253
2415
|
const leadPath = buildQualifiedLeadsPath(`${reference.slug}-${market}`);
|
|
1254
2416
|
await writeJsonFile(icpPath, icp);
|
|
1255
|
-
const leads =
|
|
2417
|
+
const leads = shouldBypassAuth()
|
|
2418
|
+
? (await leadProvider.generateLeads(icp, leadCount, {
|
|
2419
|
+
companyDomain: reference.domain ?? `${reference.slug}.com`,
|
|
2420
|
+
companyName: reference.companyName
|
|
2421
|
+
})).leads
|
|
2422
|
+
: await fetchWorkspaceLeadSearch(await requireAuthSession(), {
|
|
2423
|
+
mode: "reference-company",
|
|
2424
|
+
icp,
|
|
2425
|
+
limit: leadCount
|
|
2426
|
+
});
|
|
1256
2427
|
await writeJsonFile(leadPath, leads);
|
|
1257
2428
|
writeWizardLine(`Saved ICP to ${icpPath}.`);
|
|
1258
2429
|
if (leads.length === 0) {
|
|
@@ -1350,7 +2521,7 @@ async function runWizard(options) {
|
|
|
1350
2521
|
throw new Error("wizard does not support --json or --quiet.");
|
|
1351
2522
|
}
|
|
1352
2523
|
writeWizardLine("Salesprompter");
|
|
1353
|
-
writeWizardLine("Start with a company website or
|
|
2524
|
+
writeWizardLine("Start with a company website, LinkedIn product page, or category URL. I will guide you from there.");
|
|
1354
2525
|
writeWizardLine();
|
|
1355
2526
|
await ensureWizardSession(options);
|
|
1356
2527
|
const rl = createInterface({
|
|
@@ -1359,11 +2530,17 @@ async function runWizard(options) {
|
|
|
1359
2530
|
});
|
|
1360
2531
|
try {
|
|
1361
2532
|
const flow = await promptChoice(rl, "What do you want help with?", [
|
|
2533
|
+
{
|
|
2534
|
+
value: "product-market",
|
|
2535
|
+
label: "Find leads from a product market",
|
|
2536
|
+
description: "Start from a company, product, or LinkedIn category and crawl Sales Navigator",
|
|
2537
|
+
aliases: ["product market", "linkedin products", "category", "sales navigator", "crawl"]
|
|
2538
|
+
},
|
|
1362
2539
|
{
|
|
1363
2540
|
value: "reference-company",
|
|
1364
|
-
label: "
|
|
1365
|
-
description: "
|
|
1366
|
-
aliases: ["
|
|
2541
|
+
label: "Use the built-in Deel shortcut",
|
|
2542
|
+
description: "Generate the saved Deel ICP and search workspace leads",
|
|
2543
|
+
aliases: ["deel", "shortcut", "vendor template", "quick deel"]
|
|
1367
2544
|
},
|
|
1368
2545
|
{
|
|
1369
2546
|
value: "target-company",
|
|
@@ -1377,10 +2554,14 @@ async function runWizard(options) {
|
|
|
1377
2554
|
description: "Use a saved leads file to fill an Instantly campaign",
|
|
1378
2555
|
aliases: ["instantly", "outreach", "send leads", "campaign"]
|
|
1379
2556
|
}
|
|
1380
|
-
], "
|
|
2557
|
+
], "product-market");
|
|
1381
2558
|
writeWizardLine();
|
|
2559
|
+
if (flow === "product-market") {
|
|
2560
|
+
await runProductMarketWizard(rl);
|
|
2561
|
+
return;
|
|
2562
|
+
}
|
|
1382
2563
|
if (flow === "reference-company") {
|
|
1383
|
-
await
|
|
2564
|
+
await runVendorShortcutWizard(rl);
|
|
1384
2565
|
return;
|
|
1385
2566
|
}
|
|
1386
2567
|
if (flow === "target-company") {
|
|
@@ -1529,7 +2710,7 @@ async function fetchHistoricalQueryRows(tables) {
|
|
|
1529
2710
|
}
|
|
1530
2711
|
program
|
|
1531
2712
|
.name("salesprompter")
|
|
1532
|
-
.description("Sales workflow CLI for
|
|
2713
|
+
.description("Sales workflow CLI for LinkedIn product discovery, Sales Navigator crawling, lead enrichment, scoring, and sync.")
|
|
1533
2714
|
.version(packageVersion)
|
|
1534
2715
|
.option("--json", "Emit compact machine-readable JSON output", false)
|
|
1535
2716
|
.option("--quiet", "Suppress successful stdout output", false);
|
|
@@ -1870,6 +3051,219 @@ program
|
|
|
1870
3051
|
uploaded
|
|
1871
3052
|
});
|
|
1872
3053
|
});
|
|
3054
|
+
program
|
|
3055
|
+
.command("salesnav:from-product-category")
|
|
3056
|
+
.description("Crawl a LinkedIn product category, derive intended-role title searches, then run durable Sales Navigator crawls that export through Phantombuster into Salesprompter.")
|
|
3057
|
+
.requiredOption("--input <value>", "Company domain, LinkedIn company URL, LinkedIn product URL, LinkedIn category URL, or LinkedIn product search URL")
|
|
3058
|
+
.option("--max-pages <number>", "Maximum LinkedIn category pages to fetch", "25")
|
|
3059
|
+
.option("--product-limit <number>", "Optional cap on the number of LinkedIn products to inspect")
|
|
3060
|
+
.option("--title-limit <number>", "Optional cap on the number of intended-role titles to crawl")
|
|
3061
|
+
.option("--max-results-per-search <number>", "Maximum Sales Navigator results allowed for one slice before splitting again. Current live export cap is 2500.", "2500")
|
|
3062
|
+
.option("--number-of-profiles <number>", "Profiles to request from Phantombuster per finished Sales Navigator slice. Current live export cap is 2500.", "2500")
|
|
3063
|
+
.option("--slice-preset <name>", "Slice preset label stored with every durable crawl job", "linkedin-product-category")
|
|
3064
|
+
.option("--max-split-depth <number>", "Maximum number of adaptive split dimensions to use", "6")
|
|
3065
|
+
.option("--max-slices-per-title <number>", "Safety cap for total claimed slices per intended-role title", "1000")
|
|
3066
|
+
.option("--max-retries <number>", "Retries for non-splitting export failures", "3")
|
|
3067
|
+
.option("--probe-profiles <number>", "Profiles to scrape while probing whether a slice is still too broad", "100")
|
|
3068
|
+
.option("--agent-busy-wait-seconds <number>", "Seconds to wait before retrying when the export agent is already busy", "30")
|
|
3069
|
+
.option("--agent-busy-max-waits <number>", "How many busy-agent waits to tolerate before failing the slice", "20")
|
|
3070
|
+
.option("--idle-poll-seconds <number>", "Seconds to wait before polling durable crawl status when remote slices are still running", "10")
|
|
3071
|
+
.option("--idle-max-polls <number>", "How many no-claim status polls to tolerate before the crawl is considered stalled", "180")
|
|
3072
|
+
.option("--parallel-exports <number>", "How many Sales Navigator slices to export concurrently per title crawl", "3")
|
|
3073
|
+
.option("--allow-partial-success", "Exit 0 even when one or more durable title crawls finish with failures", false)
|
|
3074
|
+
.option("--skip-product-upload", "Do not upload the crawled LinkedIn product catalog before starting Sales Navigator crawls", false)
|
|
3075
|
+
.option("--out <path>", "Optional local JSON output path")
|
|
3076
|
+
.option("--log-path <path>", "Optional JSONL log path with timestamps, trace id, and Sales Navigator query metadata")
|
|
3077
|
+
.option("--dry-run", "Preview the derived intended-role title queries without creating crawl jobs", false)
|
|
3078
|
+
.action(async (options) => {
|
|
3079
|
+
const maxPages = z.coerce.number().int().min(1).max(500).parse(options.maxPages);
|
|
3080
|
+
const productLimit = options.productLimit === undefined
|
|
3081
|
+
? undefined
|
|
3082
|
+
: z.coerce.number().int().min(1).max(5000).parse(options.productLimit);
|
|
3083
|
+
const titleLimit = options.titleLimit === undefined
|
|
3084
|
+
? undefined
|
|
3085
|
+
: z.coerce.number().int().min(1).max(1000).parse(options.titleLimit);
|
|
3086
|
+
const maxResultsPerSearch = z.coerce.number().int().min(1).max(2500).parse(options.maxResultsPerSearch);
|
|
3087
|
+
const numberOfProfiles = z.coerce.number().int().min(1).max(2500).parse(options.numberOfProfiles);
|
|
3088
|
+
const maxSplitDepth = z.coerce.number().int().min(1).max(6).parse(options.maxSplitDepth);
|
|
3089
|
+
const maxSlicesPerTitle = z.coerce.number().int().min(1).max(10000).parse(options.maxSlicesPerTitle);
|
|
3090
|
+
const maxRetries = z.coerce.number().int().min(0).max(5).parse(options.maxRetries);
|
|
3091
|
+
const probeProfiles = z.coerce.number().int().min(1).max(2500).parse(options.probeProfiles);
|
|
3092
|
+
const agentBusyWaitSeconds = z.coerce.number().int().min(1).max(300).parse(options.agentBusyWaitSeconds);
|
|
3093
|
+
const agentBusyMaxWaits = z.coerce.number().int().min(0).max(120).parse(options.agentBusyMaxWaits);
|
|
3094
|
+
const idlePollSeconds = z.coerce.number().int().min(0).max(300).parse(options.idlePollSeconds);
|
|
3095
|
+
const idleMaxPolls = z.coerce.number().int().min(0).max(10000).parse(options.idleMaxPolls);
|
|
3096
|
+
const parallelExports = z.coerce.number().int().min(1).max(10).parse(options.parallelExports);
|
|
3097
|
+
const result = await runSalesNavigatorFromProductCategoryWorkflow({
|
|
3098
|
+
input: options.input,
|
|
3099
|
+
maxPages,
|
|
3100
|
+
productLimit,
|
|
3101
|
+
titleLimit,
|
|
3102
|
+
maxResultsPerSearch,
|
|
3103
|
+
numberOfProfiles,
|
|
3104
|
+
slicePreset: options.slicePreset,
|
|
3105
|
+
maxSplitDepth,
|
|
3106
|
+
maxSlicesPerTitle,
|
|
3107
|
+
maxRetries,
|
|
3108
|
+
probeProfiles,
|
|
3109
|
+
agentBusyWaitSeconds,
|
|
3110
|
+
agentBusyMaxWaits,
|
|
3111
|
+
idlePollSeconds,
|
|
3112
|
+
idleMaxPolls,
|
|
3113
|
+
parallelExports,
|
|
3114
|
+
skipProductUpload: Boolean(options.skipProductUpload),
|
|
3115
|
+
outPath: options.out,
|
|
3116
|
+
logPath: options.logPath,
|
|
3117
|
+
dryRun: Boolean(options.dryRun || shouldBypassAuth())
|
|
3118
|
+
});
|
|
3119
|
+
printOutput({
|
|
3120
|
+
...result.payload,
|
|
3121
|
+
out: result.outPath
|
|
3122
|
+
});
|
|
3123
|
+
if (!result.payload.dryRun && result.payload.summary.workflowStatus !== "completed" && !options.allowPartialSuccess) {
|
|
3124
|
+
throw new Error(buildSalesNavigatorWorkflowFailureMessage(result.payload.summary));
|
|
3125
|
+
}
|
|
3126
|
+
});
|
|
3127
|
+
program
|
|
3128
|
+
.command("salesnav:ensure-count")
|
|
3129
|
+
.description("Ensure the workspace has at least the target number of Sales Navigator people rows by importing historical BigQuery windows directly.")
|
|
3130
|
+
.option("--target-count <number>", "Minimum linkedin_sales_nav_people rows to guarantee", "200000")
|
|
3131
|
+
.option("--scope <scope>", "Historical scope: all-sales-people|hr-function-included", "all-sales-people")
|
|
3132
|
+
.option("--org-id <id>", "Workspace org id. Defaults to the active CLI org.")
|
|
3133
|
+
.option("--start-offset <number>", "BigQuery offset override. By default the CLI resumes from prior historical backfill runs.")
|
|
3134
|
+
.option("--window-size <number>", "How many historical contacts to request from BigQuery per window", String(salesNavigatorHistoricalBackfillDefaults.windowSize))
|
|
3135
|
+
.option("--max-windows <number>", "Maximum number of BigQuery windows to import in this invocation", "10")
|
|
3136
|
+
.option("--page-size <number>", "BigQuery page size per API read", String(salesNavigatorHistoricalBackfillDefaults.pageSize))
|
|
3137
|
+
.option("--upsert-batch-size <number>", "Supabase upsert batch size", String(salesNavigatorHistoricalBackfillDefaults.upsertBatchSize))
|
|
3138
|
+
.option("--min-upsert-batch-size <number>", "Smallest batch size allowed after timeout-driven splitting", String(salesNavigatorHistoricalBackfillDefaults.minUpsertBatchSize))
|
|
3139
|
+
.option("--max-upsert-retries <number>", "How many retry rounds to tolerate for timeout-prone writes", String(salesNavigatorHistoricalBackfillDefaults.maxUpsertRetries))
|
|
3140
|
+
.option("--retry-delay-ms <number>", "Base retry delay in milliseconds for write retries", String(salesNavigatorHistoricalBackfillDefaults.retryDelayMs))
|
|
3141
|
+
.option("--out <path>", "Optional local JSON output path")
|
|
3142
|
+
.option("--dry-run", "Preview the historical import plan without touching BigQuery or Supabase", false)
|
|
3143
|
+
.action(async (options) => {
|
|
3144
|
+
const targetCount = z.coerce.number().int().min(1).parse(options.targetCount);
|
|
3145
|
+
const scope = z.enum(["all-sales-people", "hr-function-included"]).parse(options.scope);
|
|
3146
|
+
const explicitStartOffset = typeof options.startOffset === "string" && options.startOffset.trim().length > 0
|
|
3147
|
+
? z.coerce.number().int().min(0).parse(options.startOffset)
|
|
3148
|
+
: null;
|
|
3149
|
+
const windowSize = z.coerce.number().int().min(1).parse(options.windowSize);
|
|
3150
|
+
const maxWindows = z.coerce.number().int().min(1).max(100).parse(options.maxWindows);
|
|
3151
|
+
const pageSize = z.coerce.number().int().min(1).parse(options.pageSize);
|
|
3152
|
+
const upsertBatchSize = z.coerce.number().int().min(1).parse(options.upsertBatchSize);
|
|
3153
|
+
const minUpsertBatchSize = z.coerce.number().int().min(1).parse(options.minUpsertBatchSize);
|
|
3154
|
+
const maxUpsertRetries = z.coerce.number().int().min(0).parse(options.maxUpsertRetries);
|
|
3155
|
+
const retryDelayMs = z.coerce.number().int().min(0).parse(options.retryDelayMs);
|
|
3156
|
+
if (minUpsertBatchSize > upsertBatchSize) {
|
|
3157
|
+
throw new Error("--min-upsert-batch-size must be less than or equal to --upsert-batch-size.");
|
|
3158
|
+
}
|
|
3159
|
+
if (Boolean(options.dryRun)) {
|
|
3160
|
+
const plan = buildSalesNavigatorHistoricalBackfillPlan({
|
|
3161
|
+
targetCount,
|
|
3162
|
+
currentCount: null,
|
|
3163
|
+
startOffset: explicitStartOffset ?? 0,
|
|
3164
|
+
windowSize,
|
|
3165
|
+
maxWindows
|
|
3166
|
+
});
|
|
3167
|
+
const payload = {
|
|
3168
|
+
status: "ok",
|
|
3169
|
+
dryRun: true,
|
|
3170
|
+
mode: "historical-bigquery-backfill",
|
|
3171
|
+
orgId: options.orgId ?? null,
|
|
3172
|
+
scope,
|
|
3173
|
+
targetCount,
|
|
3174
|
+
resumedFromHistory: false,
|
|
3175
|
+
plan
|
|
3176
|
+
};
|
|
3177
|
+
if (options.out) {
|
|
3178
|
+
await writeJsonFile(options.out, payload);
|
|
3179
|
+
}
|
|
3180
|
+
printOutput(payload);
|
|
3181
|
+
return;
|
|
3182
|
+
}
|
|
3183
|
+
let sessionOrgId = null;
|
|
3184
|
+
if (!shouldBypassAuth()) {
|
|
3185
|
+
const session = await requireAuthSession();
|
|
3186
|
+
sessionOrgId = session.user.orgId ?? null;
|
|
3187
|
+
}
|
|
3188
|
+
const orgId = resolveSalesNavigatorHistoricalBackfillOrgId({
|
|
3189
|
+
explicitOrgId: options.orgId,
|
|
3190
|
+
env: process.env,
|
|
3191
|
+
sessionOrgId
|
|
3192
|
+
});
|
|
3193
|
+
const config = resolveSalesNavigatorHistoricalBackfillConfig(process.env);
|
|
3194
|
+
const supabase = createClient(config.supabaseUrl, config.supabaseServiceRoleKey, {
|
|
3195
|
+
auth: { persistSession: false }
|
|
3196
|
+
});
|
|
3197
|
+
const resumeState = explicitStartOffset === null
|
|
3198
|
+
? await resolveSalesNavigatorHistoricalBackfillResumeState({
|
|
3199
|
+
supabase,
|
|
3200
|
+
orgId,
|
|
3201
|
+
scope,
|
|
3202
|
+
windowSize,
|
|
3203
|
+
fallbackOffset: 0
|
|
3204
|
+
})
|
|
3205
|
+
: {
|
|
3206
|
+
startOffset: explicitStartOffset,
|
|
3207
|
+
resumedFromHistory: false,
|
|
3208
|
+
matchedHistoryRows: 0,
|
|
3209
|
+
reason: "fallback"
|
|
3210
|
+
};
|
|
3211
|
+
if (resumeState.resumedFromHistory) {
|
|
3212
|
+
writeProgress(`Resuming historical Sales Navigator backfill from offset ${resumeState.startOffset} based on prior CLI runs.`);
|
|
3213
|
+
}
|
|
3214
|
+
else if (explicitStartOffset !== null) {
|
|
3215
|
+
writeProgress(`Using explicit historical Sales Navigator backfill offset ${explicitStartOffset}.`);
|
|
3216
|
+
}
|
|
3217
|
+
const summary = await ensureSalesNavigatorPeopleCount({
|
|
3218
|
+
config,
|
|
3219
|
+
orgId,
|
|
3220
|
+
targetCount,
|
|
3221
|
+
scope,
|
|
3222
|
+
startOffset: resumeState.startOffset,
|
|
3223
|
+
resumedFromHistory: resumeState.resumedFromHistory,
|
|
3224
|
+
windowSize,
|
|
3225
|
+
maxWindows,
|
|
3226
|
+
pageSize,
|
|
3227
|
+
upsertBatchSize,
|
|
3228
|
+
minUpsertBatchSize,
|
|
3229
|
+
maxUpsertRetries,
|
|
3230
|
+
retryDelayMs,
|
|
3231
|
+
onProgress: (event) => {
|
|
3232
|
+
if (event.type === "window-start") {
|
|
3233
|
+
writeProgress(`Starting historical Sales Navigator backfill window ${event.windowIndex + 1}: offset ${event.offset}, limit ${event.limit}.`);
|
|
3234
|
+
return;
|
|
3235
|
+
}
|
|
3236
|
+
if (event.type === "window-progress") {
|
|
3237
|
+
writeProgress(`Historical window ${event.windowIndex + 1}: ${event.processed}/${event.totalResults} rows imported (${event.percent}%).`);
|
|
3238
|
+
return;
|
|
3239
|
+
}
|
|
3240
|
+
writeProgress(`Historical window ${event.windowIndex + 1} complete: count ${event.countBefore} -> ${event.countAfter} (delta ${event.countDelta}).`);
|
|
3241
|
+
}
|
|
3242
|
+
});
|
|
3243
|
+
const payload = {
|
|
3244
|
+
status: summary.status,
|
|
3245
|
+
dryRun: false,
|
|
3246
|
+
mode: "historical-bigquery-backfill",
|
|
3247
|
+
orgId: summary.orgId,
|
|
3248
|
+
scope: summary.scope,
|
|
3249
|
+
targetCount: summary.targetCount,
|
|
3250
|
+
initialCount: summary.initialCount,
|
|
3251
|
+
currentCount: summary.currentCount,
|
|
3252
|
+
resumedFromHistory: summary.resumedFromHistory,
|
|
3253
|
+
startOffset: summary.startOffset,
|
|
3254
|
+
nextOffset: summary.nextOffset,
|
|
3255
|
+
exhausted: summary.exhausted,
|
|
3256
|
+
completedWindows: summary.completedWindows,
|
|
3257
|
+
windows: summary.windows
|
|
3258
|
+
};
|
|
3259
|
+
if (options.out) {
|
|
3260
|
+
await writeJsonFile(options.out, payload);
|
|
3261
|
+
}
|
|
3262
|
+
if (summary.status !== "ok") {
|
|
3263
|
+
throw new Error(`Historical Sales Navigator backfill stopped at ${summary.currentCount} rows before reaching the target ${summary.targetCount}.`);
|
|
3264
|
+
}
|
|
3265
|
+
printOutput(payload);
|
|
3266
|
+
});
|
|
1873
3267
|
program
|
|
1874
3268
|
.command("salesnav:crawl")
|
|
1875
3269
|
.description("Adaptively split broad LinkedIn Sales Navigator people searches into exportable slices and store every finished slice through Salesprompter.")
|
|
@@ -1884,7 +3278,12 @@ program
|
|
|
1884
3278
|
.option("--probe-profiles <number>", "Profiles to scrape while probing whether a slice is still too broad", "100")
|
|
1885
3279
|
.option("--agent-busy-wait-seconds <number>", "Seconds to wait before retrying when the export agent is already busy", "30")
|
|
1886
3280
|
.option("--agent-busy-max-waits <number>", "How many busy-agent waits to tolerate before failing the slice", "20")
|
|
3281
|
+
.option("--idle-poll-seconds <number>", "Seconds to wait before polling durable crawl status when remote slices are still running", "10")
|
|
3282
|
+
.option("--idle-max-polls <number>", "How many no-claim status polls to tolerate before the crawl is considered stalled", "180")
|
|
3283
|
+
.option("--parallel-exports <number>", "How many Sales Navigator slices to export concurrently in this invocation", "3")
|
|
3284
|
+
.option("--allow-partial-success", "Exit 0 even when the durable crawl finishes with failures or remains non-terminal", false)
|
|
1887
3285
|
.option("--out <path>", "Optional local JSON output path")
|
|
3286
|
+
.option("--log-path <path>", "Optional JSONL log path with timestamps, trace id, and Sales Navigator slice metadata")
|
|
1888
3287
|
.option("--dry-run", "Preview the adaptive crawl plan without exporting anything", false)
|
|
1889
3288
|
.action(async (options) => {
|
|
1890
3289
|
const queryUrl = z.string().url().optional().parse(options.queryUrl);
|
|
@@ -1897,7 +3296,30 @@ program
|
|
|
1897
3296
|
const probeProfiles = z.coerce.number().int().min(1).max(2500).parse(options.probeProfiles);
|
|
1898
3297
|
const agentBusyWaitSeconds = z.coerce.number().int().min(1).max(300).parse(options.agentBusyWaitSeconds);
|
|
1899
3298
|
const agentBusyMaxWaits = z.coerce.number().int().min(0).max(120).parse(options.agentBusyMaxWaits);
|
|
3299
|
+
const idlePollSeconds = z.coerce.number().int().min(0).max(300).parse(options.idlePollSeconds);
|
|
3300
|
+
const idleMaxPolls = z.coerce.number().int().min(0).max(10000).parse(options.idleMaxPolls);
|
|
3301
|
+
const parallelExports = z.coerce.number().int().min(1).max(10).parse(options.parallelExports);
|
|
1900
3302
|
const effectiveDryRun = Boolean(options.dryRun || shouldBypassAuth());
|
|
3303
|
+
const logger = await createWorkflowLogger({
|
|
3304
|
+
logPath: options.logPath ?? buildSalesNavigatorCrawlLogPath(jobId ?? queryUrl ?? "salesnav-crawl")
|
|
3305
|
+
});
|
|
3306
|
+
await logger.log("salesnav.crawl.command.started", {
|
|
3307
|
+
queryUrl: queryUrl ?? null,
|
|
3308
|
+
jobId: jobId ?? null,
|
|
3309
|
+
maxResultsPerSearch,
|
|
3310
|
+
numberOfProfiles,
|
|
3311
|
+
slicePreset: options.slicePreset,
|
|
3312
|
+
maxSplitDepth,
|
|
3313
|
+
maxSlices,
|
|
3314
|
+
maxRetries,
|
|
3315
|
+
probeProfiles,
|
|
3316
|
+
agentBusyWaitSeconds,
|
|
3317
|
+
agentBusyMaxWaits,
|
|
3318
|
+
idlePollSeconds,
|
|
3319
|
+
idleMaxPolls,
|
|
3320
|
+
parallelExports,
|
|
3321
|
+
dryRun: effectiveDryRun
|
|
3322
|
+
});
|
|
1901
3323
|
if (effectiveDryRun) {
|
|
1902
3324
|
if (jobId) {
|
|
1903
3325
|
throw new Error("--dry-run does not support --job-id. Use --query-url instead.");
|
|
@@ -1909,6 +3331,8 @@ program
|
|
|
1909
3331
|
status: "ok",
|
|
1910
3332
|
dryRun: true,
|
|
1911
3333
|
mode: "adaptive",
|
|
3334
|
+
traceId: logger.traceId,
|
|
3335
|
+
logPath: logger.logPath,
|
|
1912
3336
|
dimensionPreset: "human-resources-adaptive",
|
|
1913
3337
|
query: (() => {
|
|
1914
3338
|
const preview = buildSalesNavigatorCrawlPreview({
|
|
@@ -1938,6 +3362,15 @@ program
|
|
|
1938
3362
|
};
|
|
1939
3363
|
})()
|
|
1940
3364
|
};
|
|
3365
|
+
await logger.log("salesnav.crawl.dry-run.preview", {
|
|
3366
|
+
sourceQueryUrl: payload.query.sourceQueryUrl,
|
|
3367
|
+
root: summarizeSalesNavigatorQuery(payload.query.rootQueryUrl, payload.query.rootAppliedFilters),
|
|
3368
|
+
dimensionOrder: payload.query.dimensionOrder,
|
|
3369
|
+
firstSplitQueries: payload.query.firstSplitQueries.map((attempt) => ({
|
|
3370
|
+
splitTrail: attempt.splitTrail,
|
|
3371
|
+
...summarizeSalesNavigatorQuery(attempt.slicedQueryUrl, attempt.appliedFilters)
|
|
3372
|
+
}))
|
|
3373
|
+
});
|
|
1941
3374
|
if (options.out) {
|
|
1942
3375
|
await writeJsonFile(options.out, payload);
|
|
1943
3376
|
}
|
|
@@ -1962,23 +3395,57 @@ program
|
|
|
1962
3395
|
slicePreset: options.slicePreset,
|
|
1963
3396
|
maxResultsPerSearch,
|
|
1964
3397
|
numberOfProfiles,
|
|
3398
|
+
rawPayload: {
|
|
3399
|
+
workflow: "salesnav:crawl",
|
|
3400
|
+
traceId: logger.traceId,
|
|
3401
|
+
command: {
|
|
3402
|
+
sourceQueryUrl: queryUrl,
|
|
3403
|
+
slicePreset: options.slicePreset,
|
|
3404
|
+
maxResultsPerSearch,
|
|
3405
|
+
numberOfProfiles,
|
|
3406
|
+
maxSplitDepth,
|
|
3407
|
+
maxSlices,
|
|
3408
|
+
maxRetries,
|
|
3409
|
+
probeProfiles,
|
|
3410
|
+
agentBusyWaitSeconds,
|
|
3411
|
+
agentBusyMaxWaits,
|
|
3412
|
+
idlePollSeconds,
|
|
3413
|
+
idleMaxPolls,
|
|
3414
|
+
parallelExports
|
|
3415
|
+
}
|
|
3416
|
+
},
|
|
1965
3417
|
rootSlice: {
|
|
1966
3418
|
slicedQueryUrl: seed.slicedQueryUrl,
|
|
1967
3419
|
appliedFilters: seed.appliedFilters,
|
|
1968
3420
|
depth: seed.depth,
|
|
1969
|
-
splitTrail: seed.splitTrail
|
|
3421
|
+
splitTrail: seed.splitTrail,
|
|
3422
|
+
rawPayload: {
|
|
3423
|
+
workflow: "salesnav:crawl",
|
|
3424
|
+
traceId: logger.traceId
|
|
3425
|
+
}
|
|
1970
3426
|
}
|
|
1971
|
-
});
|
|
3427
|
+
}, logger.traceId);
|
|
1972
3428
|
session = created.session;
|
|
1973
3429
|
createResult = {
|
|
1974
3430
|
resumed: created.value.resumed,
|
|
1975
3431
|
job: created.value.job
|
|
1976
3432
|
};
|
|
1977
3433
|
resolvedJobId = created.value.job.id;
|
|
3434
|
+
await logger.log("salesnav.crawl.job.ready", {
|
|
3435
|
+
jobId: resolvedJobId,
|
|
3436
|
+
resumed: created.value.resumed,
|
|
3437
|
+
sourceQueryUrl: queryUrl,
|
|
3438
|
+
rootSlice: summarizeSalesNavigatorQuery(seed.slicedQueryUrl, seed.appliedFilters)
|
|
3439
|
+
});
|
|
1978
3440
|
}
|
|
1979
3441
|
else {
|
|
1980
|
-
const status = await getSalesNavigatorCrawlStatus(session, resolvedJobId);
|
|
3442
|
+
const status = await getSalesNavigatorCrawlStatus(session, resolvedJobId, logger.traceId);
|
|
1981
3443
|
session = status.session;
|
|
3444
|
+
await logger.log("salesnav.crawl.job.resumed", {
|
|
3445
|
+
jobId: resolvedJobId,
|
|
3446
|
+
sourceQueryUrl: status.value.job.sourceQueryUrl,
|
|
3447
|
+
status: status.value.job.status
|
|
3448
|
+
});
|
|
1982
3449
|
}
|
|
1983
3450
|
if (!resolvedJobId) {
|
|
1984
3451
|
throw new Error("Failed to determine Sales Navigator crawl job id.");
|
|
@@ -1989,12 +3456,19 @@ program
|
|
|
1989
3456
|
maxRetries,
|
|
1990
3457
|
probeProfiles,
|
|
1991
3458
|
agentBusyWaitSeconds,
|
|
1992
|
-
agentBusyMaxWaits
|
|
3459
|
+
agentBusyMaxWaits,
|
|
3460
|
+
idlePollSeconds,
|
|
3461
|
+
idleMaxPolls,
|
|
3462
|
+
parallelExports,
|
|
3463
|
+
traceId: logger.traceId,
|
|
3464
|
+
logger
|
|
1993
3465
|
});
|
|
1994
3466
|
const payload = {
|
|
1995
3467
|
status: "ok",
|
|
1996
3468
|
dryRun: false,
|
|
1997
3469
|
mode: "durable",
|
|
3470
|
+
traceId: logger.traceId,
|
|
3471
|
+
logPath: logger.logPath,
|
|
1998
3472
|
jobId: resolvedJobId,
|
|
1999
3473
|
resumed: createResult?.resumed ?? true,
|
|
2000
3474
|
sourceQueryUrl: crawl.job.sourceQueryUrl,
|
|
@@ -2017,10 +3491,20 @@ program
|
|
|
2017
3491
|
: null,
|
|
2018
3492
|
lastOutcome: crawl.lastOutcome
|
|
2019
3493
|
};
|
|
3494
|
+
await logger.log("salesnav.crawl.command.completed", {
|
|
3495
|
+
jobId: resolvedJobId,
|
|
3496
|
+
status: crawl.job.status,
|
|
3497
|
+
claimedSlices: crawl.claimedSlices,
|
|
3498
|
+
truncated: crawl.truncated,
|
|
3499
|
+
lastOutcome: crawl.lastOutcome
|
|
3500
|
+
});
|
|
2020
3501
|
if (options.out) {
|
|
2021
3502
|
await writeJsonFile(options.out, payload);
|
|
2022
3503
|
}
|
|
2023
3504
|
printOutput(payload);
|
|
3505
|
+
if ((crawl.job.status !== "completed" || crawl.truncated) && !options.allowPartialSuccess) {
|
|
3506
|
+
throw new Error(`Sales Navigator crawl did not finish cleanly. status=${crawl.job.status} truncated=${crawl.truncated} failedSlices=${crawl.job.failedSlices} runningSlices=${crawl.job.runningSlices} queuedSlices=${crawl.job.queuedSlices}`);
|
|
3507
|
+
}
|
|
2024
3508
|
});
|
|
2025
3509
|
program
|
|
2026
3510
|
.command("salesnav:crawl:status")
|
|
@@ -2081,7 +3565,13 @@ program
|
|
|
2081
3565
|
appliedFilters: item.appliedFilters,
|
|
2082
3566
|
maxResultsPerSearch,
|
|
2083
3567
|
numberOfProfiles,
|
|
2084
|
-
slicePreset: options.slicePreset
|
|
3568
|
+
slicePreset: options.slicePreset,
|
|
3569
|
+
rawPayload: {
|
|
3570
|
+
workflow: "salesnav:export",
|
|
3571
|
+
sourceQueryUrl: item.sourceQueryUrl,
|
|
3572
|
+
slicedQueryUrl: item.slicedQueryUrl,
|
|
3573
|
+
appliedFilters: item.appliedFilters
|
|
3574
|
+
}
|
|
2085
3575
|
});
|
|
2086
3576
|
exported.push(result);
|
|
2087
3577
|
}
|