salesprompter-cli 0.1.19 → 0.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,11 +1,13 @@
1
1
  #!/usr/bin/env node
2
2
  import { spawn } from "node:child_process";
3
- import { access } from "node:fs/promises";
3
+ import { access, appendFile, mkdir, readFile, writeFile } from "node:fs/promises";
4
4
  import { createRequire } from "node:module";
5
+ import os from "node:os";
5
6
  import path from "node:path";
6
7
  import { emitKeypressEvents } from "node:readline";
7
8
  import { createInterface } from "node:readline/promises";
8
9
  import { setTimeout as delay } from "node:timers/promises";
10
+ import { createClient } from "@supabase/supabase-js";
9
11
  import { Command } from "commander";
10
12
  import { z } from "zod";
11
13
  import { clearAuthSession, loginWithBrowserConnect, loginWithDeviceFlow, loginWithToken, requireAuthSession, shouldBypassAuth, verifySession } from "./auth.js";
@@ -18,9 +20,11 @@ import { analyzeHistoricalQueries } from "./historical-queries.js";
18
20
  import { buildHistoricalVendorIcp, buildVendorIcp } from "./icp-templates.js";
19
21
  import { InstantlySyncProvider } from "./instantly.js";
20
22
  import { crawlLinkedInProductCategory } from "./linkedin-products.js";
23
+ import { claimValidatedSalesNavigatorSessionCookieForCli } from "./linkedin-session.js";
21
24
  import { buildLeadlistsFunnelQueries } from "./leadlists-funnel.js";
22
25
  import { readJsonFile, splitCsv, writeJsonFile, writeTextFile } from "./io.js";
23
- import { buildSalesNavigatorCrawlPreview, createSalesNavigatorCrawlSeed, DEFAULT_SALES_NAVIGATOR_CRAWL_DIMENSIONS, buildSalesNavigatorPeopleSlice, expandSalesNavigatorCrawlAttempt, SalesNavigatorSliceTooBroadError } from "./sales-navigator.js";
26
+ import { buildSalesNavigatorCrawlPreview, createSalesNavigatorCrawlSeed, DEFAULT_SALES_NAVIGATOR_CRAWL_DIMENSIONS, buildSalesNavigatorPeopleSlice, deriveSalesNavigatorTitleQuerySeeds, expandSalesNavigatorCrawlAttempt, SalesNavigatorSliceTooBroadError } from "./sales-navigator.js";
27
+ import { buildSalesNavigatorHistoricalBackfillPlan, ensureSalesNavigatorPeopleCount, resolveSalesNavigatorHistoricalBackfillConfig, resolveSalesNavigatorHistoricalBackfillResumeState, resolveSalesNavigatorHistoricalBackfillOrgId, salesNavigatorHistoricalBackfillDefaults } from "./salesnav-backfill.js";
24
28
  const require = createRequire(import.meta.url);
25
29
  const { version: packageVersion } = require("../package.json");
26
30
  const program = new Command();
@@ -50,12 +54,31 @@ const LinkedInProductIngestResponseSchema = z.object({
50
54
  upserted: z.number().int().nonnegative(),
51
55
  totalInCatalog: z.number().int().nonnegative().optional()
52
56
  });
57
+ const SalesNavigatorLaunchDiagnosticsSchema = z.object({
58
+ orderedCandidateAgentIds: z.array(z.string().min(1)),
59
+ runningAgentIds: z.array(z.string().min(1)),
60
+ busyAgentIds: z.array(z.string().min(1)),
61
+ selectedAgent: z.object({
62
+ id: z.string().min(1),
63
+ name: z.string().min(1),
64
+ maxParallelism: z.number().int().nullable(),
65
+ fileMgmt: z.string().min(1).nullable(),
66
+ hasWebhook: z.boolean(),
67
+ hasStoredSessionCookie: z.boolean(),
68
+ storedIdentityCount: z.number().int().nonnegative(),
69
+ supportsDirectSessionInjection: z.boolean()
70
+ })
71
+ });
53
72
  const SalesNavigatorExportStartResponseSchema = z.object({
54
73
  status: z.literal("accepted"),
55
74
  runId: z.string().min(1),
56
75
  exportStatus: z.literal("pending"),
57
76
  agentId: z.string().min(1),
58
77
  containerId: z.string().min(1),
78
+ selectedSessionCookieSha256: z.string().min(1).nullable().optional(),
79
+ selectedSessionUserEmail: z.string().min(1).nullable().optional(),
80
+ selectedSessionUserHandle: z.string().min(1).nullable().optional(),
81
+ launchDiagnostics: SalesNavigatorLaunchDiagnosticsSchema.nullable().optional(),
59
82
  sourceQueryUrl: z.string().url(),
60
83
  slicedQueryUrl: z.string().url(),
61
84
  previousContainerId: z.string().min(1).nullable().optional()
@@ -79,6 +102,10 @@ const SalesNavigatorExportRunSchema = z.object({
79
102
  resultCsvUrl: z.string().url().nullable().optional(),
80
103
  agentId: z.string().min(1),
81
104
  containerId: z.string().min(1),
105
+ selectedSessionCookieSha256: z.string().min(1).nullable().optional(),
106
+ selectedSessionUserEmail: z.string().min(1).nullable().optional(),
107
+ selectedSessionUserHandle: z.string().min(1).nullable().optional(),
108
+ launchDiagnostics: SalesNavigatorLaunchDiagnosticsSchema.nullable().optional(),
82
109
  sourceQueryUrl: z.string().url(),
83
110
  slicedQueryUrl: z.string().url(),
84
111
  createdAt: z.string().datetime(),
@@ -99,6 +126,10 @@ const SalesNavigatorExportResponseSchema = z.object({
99
126
  resultCsvUrl: z.string().url().nullable().optional(),
100
127
  agentId: z.string().min(1),
101
128
  containerId: z.string().min(1),
129
+ selectedSessionCookieSha256: z.string().min(1).nullable().optional(),
130
+ selectedSessionUserEmail: z.string().min(1).nullable().optional(),
131
+ selectedSessionUserHandle: z.string().min(1).nullable().optional(),
132
+ launchDiagnostics: SalesNavigatorLaunchDiagnosticsSchema.nullable().optional(),
102
133
  sourceQueryUrl: z.string().url(),
103
134
  slicedQueryUrl: z.string().url()
104
135
  });
@@ -188,6 +219,12 @@ function printOutput(value) {
188
219
  const space = runtimeOutputOptions.json ? undefined : 2;
189
220
  process.stdout.write(`${JSON.stringify(value, null, space)}\n`);
190
221
  }
222
+ function writeProgress(message) {
223
+ if (runtimeOutputOptions.json || runtimeOutputOptions.quiet) {
224
+ return;
225
+ }
226
+ process.stderr.write(`${message}\n`);
227
+ }
191
228
  function applyGlobalOutputOptions(actionCommand) {
192
229
  const globalOptions = actionCommand.optsWithGlobals();
193
230
  runtimeOutputOptions.json = Boolean(globalOptions.json);
@@ -687,6 +724,430 @@ async function fetchWorkspaceLeadSearch(session, requestBody) {
687
724
  function buildLinkedInProductsOutputPath(categorySlug) {
688
725
  return `./data/linkedin-products-${categorySlug}.json`;
689
726
  }
727
+ function buildLinkedInProductCategorySalesNavigatorOutputPath(categorySlug) {
728
+ return `./data/salesnav-product-category-${categorySlug}.json`;
729
+ }
730
+ const SALES_NAVIGATOR_TERMINAL_JOB_STATUSES = new Set(["completed", "completed_with_failures"]);
731
+ function isSalesNavigatorCrawlJobTerminal(status) {
732
+ return SALES_NAVIGATOR_TERMINAL_JOB_STATUSES.has(status);
733
+ }
734
+ function buildWorkflowTraceId(prefix) {
735
+ return `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 10)}`;
736
+ }
737
+ function buildSalesNavigatorWorkflowLogPath(input) {
738
+ const slug = slugify(input) || "salesnav-product-category";
739
+ return `./data/${slug}-salesnav.log.jsonl`;
740
+ }
741
+ function buildSalesNavigatorCrawlLogPath(input) {
742
+ const slug = slugify(input) || "salesnav-crawl";
743
+ return `./data/${slug}-crawl.log.jsonl`;
744
+ }
745
+ function decodeSalesNavigatorQueryParam(url) {
746
+ try {
747
+ const encoded = new URL(url).searchParams.get("query");
748
+ if (!encoded) {
749
+ return null;
750
+ }
751
+ try {
752
+ return decodeURIComponent(encoded);
753
+ }
754
+ catch {
755
+ return encoded;
756
+ }
757
+ }
758
+ catch {
759
+ return null;
760
+ }
761
+ }
762
+ async function createWorkflowLogger(options) {
763
+ const traceId = options.traceId ?? buildWorkflowTraceId("salesprompter-cli");
764
+ const logPath = options.logPath;
765
+ await mkdir(path.dirname(logPath), { recursive: true });
766
+ return {
767
+ traceId,
768
+ logPath,
769
+ log: async (event, metadata = {}) => {
770
+ const entry = {
771
+ timestamp: new Date().toISOString(),
772
+ traceId,
773
+ event,
774
+ metadata
775
+ };
776
+ await appendFile(logPath, `${JSON.stringify(entry)}\n`, "utf8");
777
+ writeProgress(`[${entry.timestamp}] ${event}`);
778
+ }
779
+ };
780
+ }
781
+ function summarizeSalesNavigatorQuery(url, appliedFilters) {
782
+ return {
783
+ url,
784
+ decodedQuery: decodeSalesNavigatorQueryParam(url),
785
+ appliedFilters
786
+ };
787
+ }
788
+ function shouldPreSplitSalesNavigatorRootSlice(slice, maxSplitDepth) {
789
+ void slice;
790
+ void maxSplitDepth;
791
+ return false;
792
+ }
793
+ function buildTraceHeaders(traceId) {
794
+ return traceId ? { "X-Salesprompter-Trace-Id": traceId } : {};
795
+ }
796
+ function buildSalesNavigatorWorkflowCrawlSummary(crawl) {
797
+ const successful = crawl.job.status === "completed" && !crawl.truncated;
798
+ return {
799
+ jobStatus: crawl.job.status,
800
+ importedPeople: crawl.job.importedPeople,
801
+ exportedSlices: crawl.job.exportedSlices,
802
+ failedSlices: crawl.job.failedSlices,
803
+ queuedSlices: crawl.job.queuedSlices,
804
+ runningSlices: crawl.job.runningSlices,
805
+ truncated: crawl.truncated,
806
+ successful
807
+ };
808
+ }
809
+ function buildSalesNavigatorWorkflowSummary(crawls) {
810
+ return crawls.reduce((summary, crawl) => {
811
+ summary.totalImportedPeople += crawl.summary.importedPeople;
812
+ summary.totalExportedSlices += crawl.summary.exportedSlices;
813
+ summary.totalFailedSlices += crawl.summary.failedSlices;
814
+ if (crawl.summary.truncated) {
815
+ summary.truncatedTitles += 1;
816
+ }
817
+ if (crawl.summary.jobStatus === "completed") {
818
+ summary.completedTitles += 1;
819
+ }
820
+ else if (crawl.summary.jobStatus === "completed_with_failures") {
821
+ summary.completedWithFailuresTitles += 1;
822
+ }
823
+ else {
824
+ summary.runningTitles += 1;
825
+ }
826
+ if (!crawl.summary.successful) {
827
+ summary.workflowStatus = "completed_with_failures";
828
+ }
829
+ return summary;
830
+ }, {
831
+ workflowStatus: "completed",
832
+ totalImportedPeople: 0,
833
+ totalExportedSlices: 0,
834
+ totalFailedSlices: 0,
835
+ completedTitles: 0,
836
+ completedWithFailuresTitles: 0,
837
+ runningTitles: 0,
838
+ truncatedTitles: 0
839
+ });
840
+ }
841
+ function buildSalesNavigatorWorkflowFailureMessage(summary) {
842
+ return [
843
+ "Sales Navigator workflow completed with failures.",
844
+ `completedTitles=${summary.completedTitles}`,
845
+ `completedWithFailuresTitles=${summary.completedWithFailuresTitles}`,
846
+ `runningTitles=${summary.runningTitles}`,
847
+ `truncatedTitles=${summary.truncatedTitles}`,
848
+ `totalFailedSlices=${summary.totalFailedSlices}`
849
+ ].join(" ");
850
+ }
851
+ function validateSalesNavigatorSeedQuery(seed) {
852
+ const decodedQuery = decodeSalesNavigatorQueryParam(seed.queryUrl);
853
+ const haystack = decodedQuery?.toLowerCase() ?? "";
854
+ const missingFilters = seed.appliedFilters.flatMap((filter) => {
855
+ const missingValues = filter.values.filter((value) => !haystack.includes(value.text.toLowerCase()));
856
+ if (!haystack.includes(filter.type.toLowerCase()) || missingValues.length > 0) {
857
+ return `${filter.type}:${missingValues.map((value) => value.text).join(",") || "*"}`;
858
+ }
859
+ return [];
860
+ });
861
+ return {
862
+ valid: missingFilters.length === 0,
863
+ missingFilters,
864
+ decodedQuery
865
+ };
866
+ }
867
+ async function runSalesNavigatorFromProductCategoryWorkflow(options) {
868
+ const logger = await createWorkflowLogger({
869
+ logPath: options.logPath ?? buildSalesNavigatorWorkflowLogPath(options.input)
870
+ });
871
+ await logger.log("workflow.started", {
872
+ input: options.input,
873
+ maxPages: options.maxPages,
874
+ productLimit: options.productLimit ?? null,
875
+ titleLimit: options.titleLimit ?? null,
876
+ maxResultsPerSearch: options.maxResultsPerSearch,
877
+ numberOfProfiles: options.numberOfProfiles,
878
+ slicePreset: options.slicePreset,
879
+ maxSplitDepth: options.maxSplitDepth,
880
+ maxSlicesPerTitle: options.maxSlicesPerTitle,
881
+ maxRetries: options.maxRetries,
882
+ probeProfiles: options.probeProfiles,
883
+ agentBusyWaitSeconds: options.agentBusyWaitSeconds,
884
+ agentBusyMaxWaits: options.agentBusyMaxWaits,
885
+ idlePollSeconds: options.idlePollSeconds,
886
+ idleMaxPolls: options.idleMaxPolls,
887
+ parallelExports: options.parallelExports,
888
+ skipProductUpload: options.skipProductUpload,
889
+ dryRun: options.dryRun
890
+ });
891
+ try {
892
+ const scrape = await crawlLinkedInProductCategory({
893
+ input: options.input,
894
+ maxPages: options.maxPages,
895
+ limit: options.productLimit
896
+ });
897
+ await logger.log("linkedin.category.scraped", {
898
+ source: scrape.source,
899
+ totalPagesFetched: scrape.totalPagesFetched,
900
+ discoveredProducts: scrape.items.length,
901
+ productNames: scrape.items.map((item) => item.productName)
902
+ });
903
+ const titleSeeds = deriveSalesNavigatorTitleQuerySeeds({
904
+ sourceProductUrl: scrape.source.productUrl,
905
+ items: scrape.items,
906
+ titleLimit: options.titleLimit
907
+ });
908
+ if (titleSeeds.length === 0) {
909
+ throw new Error(`No intended-role job titles were found while crawling the LinkedIn product category ${scrape.source.category.name}.`);
910
+ }
911
+ const outPath = options.outPath ?? buildLinkedInProductCategorySalesNavigatorOutputPath(scrape.source.category.slug);
912
+ const previewQueries = titleSeeds.map((seed) => {
913
+ const preview = buildSalesNavigatorCrawlPreview({
914
+ sourceQueryUrl: seed.queryUrl,
915
+ maxResultsPerSearch: options.maxResultsPerSearch,
916
+ numberOfProfiles: options.numberOfProfiles,
917
+ slicePreset: options.slicePreset
918
+ });
919
+ return {
920
+ title: seed.title,
921
+ queryUrl: seed.queryUrl,
922
+ appliedFilters: seed.appliedFilters,
923
+ sourceProduct: seed.sourceProduct,
924
+ matchedProductCount: seed.matchedProductCount,
925
+ firstSplitQueries: preview.firstSplit.map((attempt) => ({
926
+ slicedQueryUrl: attempt.slicedQueryUrl,
927
+ appliedFilters: attempt.appliedFilters,
928
+ splitTrail: formatSalesNavigatorSplitTrail(attempt.splitTrail.map((entry) => ({
929
+ ...entry,
930
+ value: {
931
+ id: entry.value.id,
932
+ text: entry.value.text,
933
+ selectionType: entry.value.selectionType
934
+ }
935
+ })))
936
+ }))
937
+ };
938
+ });
939
+ await logger.log("salesnav.title-seeds.derived", {
940
+ titleCount: titleSeeds.length,
941
+ titles: titleSeeds.map((seed) => ({
942
+ title: seed.title,
943
+ sourceProduct: seed.sourceProduct,
944
+ matchedProductCount: seed.matchedProductCount,
945
+ ...summarizeSalesNavigatorQuery(seed.queryUrl, seed.appliedFilters)
946
+ }))
947
+ });
948
+ const firstSeedValidation = validateSalesNavigatorSeedQuery(titleSeeds[0]);
949
+ await logger.log("salesnav.first-query.validated", {
950
+ title: titleSeeds[0]?.title ?? null,
951
+ valid: firstSeedValidation.valid,
952
+ missingFilters: firstSeedValidation.missingFilters,
953
+ decodedQuery: firstSeedValidation.decodedQuery
954
+ });
955
+ if (!firstSeedValidation.valid) {
956
+ throw new Error(`Generated Sales Navigator seed query for "${titleSeeds[0]?.title ?? "unknown"}" is missing expected filters: ${firstSeedValidation.missingFilters.join(", ")}.`);
957
+ }
958
+ await logger.log("salesnav.first-split.preview", {
959
+ titles: previewQueries.map((query) => ({
960
+ title: query.title,
961
+ sourceProduct: query.sourceProduct,
962
+ matchedProductCount: query.matchedProductCount,
963
+ ...summarizeSalesNavigatorQuery(query.queryUrl, query.appliedFilters),
964
+ firstSplitQueries: query.firstSplitQueries.map((split) => ({
965
+ splitTrail: split.splitTrail,
966
+ ...summarizeSalesNavigatorQuery(split.slicedQueryUrl, split.appliedFilters)
967
+ }))
968
+ }))
969
+ });
970
+ if (options.dryRun) {
971
+ const payload = {
972
+ status: "ok",
973
+ dryRun: true,
974
+ mode: "linkedin-product-category-to-salesnav",
975
+ traceId: logger.traceId,
976
+ logPath: logger.logPath,
977
+ source: scrape.source,
978
+ totalPagesFetched: scrape.totalPagesFetched,
979
+ discoveredProducts: scrape.items.length,
980
+ titleCount: titleSeeds.length,
981
+ summary: {
982
+ workflowStatus: "completed",
983
+ totalImportedPeople: 0,
984
+ totalExportedSlices: 0,
985
+ totalFailedSlices: 0,
986
+ completedTitles: 0,
987
+ completedWithFailuresTitles: 0,
988
+ runningTitles: 0,
989
+ truncatedTitles: 0
990
+ },
991
+ uploaded: null,
992
+ queries: previewQueries
993
+ };
994
+ await writeJsonFile(outPath, payload);
995
+ await logger.log("workflow.completed", {
996
+ outPath,
997
+ dryRun: true,
998
+ discoveredProducts: payload.discoveredProducts,
999
+ titleCount: payload.titleCount
1000
+ });
1001
+ return { outPath, payload };
1002
+ }
1003
+ let session = await requireAuthSession();
1004
+ let uploaded = null;
1005
+ if (!options.skipProductUpload) {
1006
+ await logger.log("linkedin.catalog.upload.started", {
1007
+ itemCount: scrape.items.length
1008
+ });
1009
+ uploaded = await uploadLinkedInProductsCatalog(session, {
1010
+ source: {
1011
+ input: scrape.source.input,
1012
+ kind: scrape.source.kind,
1013
+ query: scrape.source.query,
1014
+ companyUrl: scrape.source.companyUrl,
1015
+ productUrl: scrape.source.productUrl,
1016
+ category: scrape.source.category
1017
+ },
1018
+ items: scrape.items
1019
+ }, 100, logger.traceId);
1020
+ await logger.log("linkedin.catalog.upload.completed", uploaded);
1021
+ }
1022
+ const crawls = [];
1023
+ for (const seed of titleSeeds) {
1024
+ writeProgress(`Starting durable Sales Navigator crawl for intended role "${seed.title}".`);
1025
+ const rootSlice = createSalesNavigatorCrawlSeed({
1026
+ sourceQueryUrl: seed.queryUrl,
1027
+ maxResultsPerSearch: options.maxResultsPerSearch,
1028
+ numberOfProfiles: options.numberOfProfiles,
1029
+ slicePreset: options.slicePreset
1030
+ });
1031
+ const created = await createOrResumeSalesNavigatorCrawlJob(session, {
1032
+ sourceQueryUrl: seed.queryUrl,
1033
+ slicePreset: options.slicePreset,
1034
+ maxResultsPerSearch: options.maxResultsPerSearch,
1035
+ numberOfProfiles: options.numberOfProfiles,
1036
+ rawPayload: {
1037
+ workflow: "linkedin-product-category-to-salesnav",
1038
+ traceId: logger.traceId,
1039
+ source: scrape.source,
1040
+ titleSeed: {
1041
+ title: seed.title,
1042
+ queryUrl: seed.queryUrl,
1043
+ appliedFilters: seed.appliedFilters,
1044
+ sourceProduct: seed.sourceProduct,
1045
+ matchedProductCount: seed.matchedProductCount
1046
+ }
1047
+ },
1048
+ rootSlice: {
1049
+ slicedQueryUrl: rootSlice.slicedQueryUrl,
1050
+ appliedFilters: rootSlice.appliedFilters,
1051
+ depth: rootSlice.depth,
1052
+ splitTrail: rootSlice.splitTrail,
1053
+ rawPayload: {
1054
+ traceId: logger.traceId,
1055
+ title: seed.title,
1056
+ sourceProduct: seed.sourceProduct,
1057
+ matchedProductCount: seed.matchedProductCount,
1058
+ source: scrape.source
1059
+ }
1060
+ }
1061
+ }, logger.traceId);
1062
+ session = created.session;
1063
+ await logger.log("salesnav.crawl.job.ready", {
1064
+ title: seed.title,
1065
+ sourceProduct: seed.sourceProduct,
1066
+ matchedProductCount: seed.matchedProductCount,
1067
+ resumed: created.value.resumed,
1068
+ jobId: created.value.job.id,
1069
+ rootSlice: {
1070
+ depth: rootSlice.depth,
1071
+ splitTrail: formatSalesNavigatorSplitTrail(rootSlice.splitTrail),
1072
+ ...summarizeSalesNavigatorQuery(rootSlice.slicedQueryUrl, rootSlice.appliedFilters)
1073
+ }
1074
+ });
1075
+ const crawl = await executeSalesNavigatorCrawlJob(session, created.value.job.id, {
1076
+ maxSplitDepth: options.maxSplitDepth,
1077
+ maxSlices: options.maxSlicesPerTitle,
1078
+ maxRetries: options.maxRetries,
1079
+ probeProfiles: options.probeProfiles,
1080
+ agentBusyWaitSeconds: options.agentBusyWaitSeconds,
1081
+ agentBusyMaxWaits: options.agentBusyMaxWaits,
1082
+ idlePollSeconds: options.idlePollSeconds,
1083
+ idleMaxPolls: options.idleMaxPolls,
1084
+ parallelExports: options.parallelExports,
1085
+ traceId: logger.traceId,
1086
+ logger
1087
+ });
1088
+ session = crawl.session;
1089
+ const crawlSummary = buildSalesNavigatorWorkflowCrawlSummary(crawl);
1090
+ await logger.log("salesnav.crawl.job.finished", {
1091
+ title: seed.title,
1092
+ jobId: created.value.job.id,
1093
+ summary: crawlSummary,
1094
+ lastOutcome: crawl.lastOutcome
1095
+ });
1096
+ crawls.push({
1097
+ title: seed.title,
1098
+ sourceProduct: seed.sourceProduct,
1099
+ matchedProductCount: seed.matchedProductCount,
1100
+ queryUrl: seed.queryUrl,
1101
+ jobId: created.value.job.id,
1102
+ resumed: created.value.resumed,
1103
+ claimedSlices: crawl.claimedSlices,
1104
+ truncated: crawl.truncated,
1105
+ activeSlice: crawl.activeSlice
1106
+ ? {
1107
+ id: crawl.activeSlice.id,
1108
+ slicedQueryUrl: crawl.activeSlice.slicedQueryUrl,
1109
+ depth: crawl.activeSlice.depth,
1110
+ splitTrail: formatSalesNavigatorSplitTrail(crawl.activeSlice.splitTrail)
1111
+ }
1112
+ : null,
1113
+ lastOutcome: crawl.lastOutcome,
1114
+ job: crawl.job,
1115
+ summary: crawlSummary
1116
+ });
1117
+ }
1118
+ const summary = buildSalesNavigatorWorkflowSummary(crawls);
1119
+ const payload = {
1120
+ status: "ok",
1121
+ dryRun: false,
1122
+ mode: "linkedin-product-category-to-salesnav",
1123
+ traceId: logger.traceId,
1124
+ logPath: logger.logPath,
1125
+ source: scrape.source,
1126
+ totalPagesFetched: scrape.totalPagesFetched,
1127
+ discoveredProducts: scrape.items.length,
1128
+ titleCount: titleSeeds.length,
1129
+ summary,
1130
+ uploaded,
1131
+ crawls
1132
+ };
1133
+ await writeJsonFile(outPath, payload);
1134
+ await logger.log("workflow.completed", {
1135
+ outPath,
1136
+ dryRun: false,
1137
+ uploaded,
1138
+ crawlCount: crawls.length,
1139
+ summary
1140
+ });
1141
+ return { outPath, payload };
1142
+ }
1143
+ catch (error) {
1144
+ await logger.log("workflow.failed", {
1145
+ message: error instanceof Error ? error.message : String(error),
1146
+ stack: error instanceof Error ? error.stack ?? null : null
1147
+ });
1148
+ throw error;
1149
+ }
1150
+ }
690
1151
  function collectStringOptionValue(value, previous = []) {
691
1152
  return [...previous, value];
692
1153
  }
@@ -696,6 +1157,7 @@ class SalesNavigatorExportRequestError extends Error {
696
1157
  runId;
697
1158
  agentId;
698
1159
  containerId;
1160
+ launchDiagnostics;
699
1161
  statusCode;
700
1162
  constructor(message, options) {
701
1163
  super(message);
@@ -706,8 +1168,10 @@ class SalesNavigatorExportRequestError extends Error {
706
1168
  this.runId = options.runId;
707
1169
  this.agentId = options.agentId;
708
1170
  this.containerId = options.containerId;
1171
+ this.launchDiagnostics = options.launchDiagnostics ?? null;
709
1172
  }
710
1173
  }
1174
+ const SALES_NAVIGATOR_EXPORT_START_TIMEOUT_MS = 90_000;
711
1175
  async function withRefreshableAuthSession(session, run, contextLabel = "Salesprompter session expired during crawl. Refreshing login...") {
712
1176
  let currentSession = session;
713
1177
  let authRefreshCount = 0;
@@ -748,7 +1212,7 @@ async function fetchCliJson(session, request, schema) {
748
1212
  return schema.parse(parsed);
749
1213
  });
750
1214
  }
751
- async function uploadLinkedInProductsCatalog(session, payload, batchSize = 100) {
1215
+ async function uploadLinkedInProductsCatalog(session, payload, batchSize = 100, traceId) {
752
1216
  let imported = 0;
753
1217
  let upserted = 0;
754
1218
  for (let startIndex = 0; startIndex < payload.items.length; startIndex += batchSize) {
@@ -757,7 +1221,8 @@ async function uploadLinkedInProductsCatalog(session, payload, batchSize = 100)
757
1221
  method: "POST",
758
1222
  headers: {
759
1223
  "Content-Type": "application/json",
760
- Authorization: `Bearer ${session.accessToken}`
1224
+ Authorization: `Bearer ${session.accessToken}`,
1225
+ ...buildTraceHeaders(traceId)
761
1226
  },
762
1227
  body: JSON.stringify({
763
1228
  source: payload.source,
@@ -788,17 +1253,149 @@ function serializeSalesNavigatorFiltersForApi(filters) {
788
1253
  }))
789
1254
  }));
790
1255
  }
791
- async function runSalesNavigatorExport(session, payload) {
792
- const started = await startSalesNavigatorExport(session, payload);
793
- const completed = await waitForSalesNavigatorExportRunCompletion(started.session, started.value.runId);
794
- return mapCompletedSalesNavigatorExportRun(completed.value.run);
1256
+ function buildSalesNavigatorSliceRawPayload(slice, extra = {}) {
1257
+ return {
1258
+ ...extra,
1259
+ sourceQueryUrl: slice.sourceQueryUrl,
1260
+ slicedQueryUrl: slice.slicedQueryUrl,
1261
+ appliedFilters: slice.appliedFilters,
1262
+ depth: slice.depth,
1263
+ splitTrail: slice.splitTrail,
1264
+ slicePreset: slice.slicePreset,
1265
+ maxResultsPerSearch: slice.maxResultsPerSearch,
1266
+ numberOfProfiles: slice.numberOfProfiles,
1267
+ retryCount: slice.retryCount ?? null,
1268
+ cookieRetryCount: slice.cookieRetryCount ?? null,
1269
+ resultRetryCount: slice.resultRetryCount ?? null
1270
+ };
1271
+ }
1272
+ function buildSalesNavigatorCrawlReportRawPayload(slice, traceId, extra = {}) {
1273
+ return buildSalesNavigatorSliceRawPayload({
1274
+ sourceQueryUrl: slice.sourceQueryUrl,
1275
+ slicedQueryUrl: slice.slicedQueryUrl,
1276
+ appliedFilters: slice.appliedFilters,
1277
+ depth: slice.depth,
1278
+ splitTrail: slice.splitTrail,
1279
+ slicePreset: slice.slicePreset,
1280
+ maxResultsPerSearch: slice.maxResultsPerSearch,
1281
+ numberOfProfiles: slice.numberOfProfiles,
1282
+ retryCount: slice.retryCount,
1283
+ cookieRetryCount: slice.cookieRetryCount,
1284
+ resultRetryCount: slice.resultRetryCount
1285
+ }, {
1286
+ traceId: traceId ?? null,
1287
+ sliceId: slice.id,
1288
+ jobId: slice.jobId,
1289
+ ...extra
1290
+ });
1291
+ }
1292
+ function describeSalesNavigatorLaunchDiagnostics(diagnostics) {
1293
+ if (!diagnostics) {
1294
+ return null;
1295
+ }
1296
+ const parts = [
1297
+ `agent ${diagnostics.selectedAgent.name} (${diagnostics.selectedAgent.id})`,
1298
+ diagnostics.runningAgentIds.length > 0
1299
+ ? `running: ${diagnostics.runningAgentIds.join(", ")}`
1300
+ : null,
1301
+ diagnostics.busyAgentIds.length > 0
1302
+ ? `busy fallback: ${diagnostics.busyAgentIds.join(", ")}`
1303
+ : null,
1304
+ diagnostics.selectedAgent.maxParallelism !== null
1305
+ ? `parallelism ${diagnostics.selectedAgent.maxParallelism}`
1306
+ : null,
1307
+ diagnostics.selectedAgent.fileMgmt
1308
+ ? `file mgmt ${diagnostics.selectedAgent.fileMgmt}`
1309
+ : null,
1310
+ diagnostics.selectedAgent.hasWebhook ? "webhook on" : "webhook off",
1311
+ diagnostics.selectedAgent.hasStoredSessionCookie
1312
+ ? "stored phantom cookie present"
1313
+ : "stored phantom cookie cleared at launch",
1314
+ ].filter((value) => Boolean(value));
1315
+ return parts.join("; ");
1316
+ }
1317
+ function writeSalesNavigatorLaunchDiagnosticsProgress(diagnostics, selectedSessionUserEmail) {
1318
+ if (runtimeOutputOptions.json || runtimeOutputOptions.quiet || !diagnostics) {
1319
+ return;
1320
+ }
1321
+ const details = describeSalesNavigatorLaunchDiagnostics(diagnostics);
1322
+ const operator = selectedSessionUserEmail ? ` using ${selectedSessionUserEmail}` : "";
1323
+ process.stderr.write(`Phantombuster launch selected ${diagnostics.selectedAgent.id}${operator}.${details ? ` ${details}` : ""}\n`);
795
1324
  }
796
- async function startSalesNavigatorExport(session, payload) {
1325
+ async function runSalesNavigatorExport(session, payload, traceId, logOptions = {}) {
1326
+ const baseMetadata = {
1327
+ sourceQueryUrl: payload.sourceQueryUrl,
1328
+ slicedQueryUrl: payload.slicedQueryUrl,
1329
+ slicePreset: payload.slicePreset,
1330
+ maxResultsPerSearch: payload.maxResultsPerSearch,
1331
+ numberOfProfiles: payload.numberOfProfiles,
1332
+ filterTypes: payload.appliedFilters.map((filter) => filter.type),
1333
+ ...logOptions.metadata
1334
+ };
1335
+ await logOptions.logger?.log("salesnav.export.started", baseMetadata);
1336
+ try {
1337
+ const started = await startSalesNavigatorExport(session, payload, traceId);
1338
+ await logOptions.logger?.log("salesnav.export.accepted", {
1339
+ ...baseMetadata,
1340
+ runId: started.value.runId,
1341
+ agentId: started.value.agentId,
1342
+ containerId: started.value.containerId,
1343
+ previousContainerId: started.value.previousContainerId ?? null,
1344
+ selectedSessionCookieSha256: started.value.selectedSessionCookieSha256 ?? null,
1345
+ selectedSessionUserEmail: started.value.selectedSessionUserEmail ?? null,
1346
+ selectedSessionUserHandle: started.value.selectedSessionUserHandle ?? null,
1347
+ launchDiagnostics: started.value.launchDiagnostics ?? null
1348
+ });
1349
+ writeSalesNavigatorLaunchDiagnosticsProgress(started.value.launchDiagnostics ?? null, started.value.selectedSessionUserEmail ?? null);
1350
+ const completed = await waitForSalesNavigatorExportRunCompletion(started.session, started.value.runId, {}, traceId, {
1351
+ logger: logOptions.logger,
1352
+ metadata: baseMetadata
1353
+ });
1354
+ await logOptions.logger?.log("salesnav.export.completed", {
1355
+ ...baseMetadata,
1356
+ runId: completed.value.run.id,
1357
+ status: completed.value.run.status,
1358
+ resultClassification: completed.value.run.resultClassification,
1359
+ totalResults: completed.value.run.totalResults ?? null,
1360
+ imported: completed.value.run.imported,
1361
+ upserted: completed.value.run.upserted,
1362
+ updatedAt: completed.value.run.updatedAt,
1363
+ finishedAt: completed.value.run.finishedAt ?? null
1364
+ });
1365
+ const mapped = mapCompletedSalesNavigatorExportRun(completed.value.run);
1366
+ return SalesNavigatorExportResponseSchema.parse({
1367
+ ...mapped,
1368
+ launchDiagnostics: mapped.launchDiagnostics ?? started.value.launchDiagnostics ?? null,
1369
+ });
1370
+ }
1371
+ catch (error) {
1372
+ await logOptions.logger?.log("salesnav.export.failed", {
1373
+ ...baseMetadata,
1374
+ name: error instanceof Error ? error.name : "Error",
1375
+ message: error instanceof Error ? error.message : String(error),
1376
+ ...(error instanceof SalesNavigatorExportRequestError
1377
+ ? {
1378
+ runId: error.runId ?? null,
1379
+ agentId: error.agentId ?? null,
1380
+ containerId: error.containerId ?? null,
1381
+ errorCode: error.errorCode ?? null,
1382
+ totalResults: error.totalResults ?? null,
1383
+ launchDiagnostics: error.launchDiagnostics ?? null,
1384
+ statusCode: error.statusCode
1385
+ }
1386
+ : {})
1387
+ });
1388
+ throw error;
1389
+ }
1390
+ }
1391
+ async function startSalesNavigatorExport(session, payload, traceId) {
797
1392
  return await fetchCliJson(session, (currentSession) => fetch(`${currentSession.apiBaseUrl}/api/cli/salesnav/export`, {
798
1393
  method: "POST",
1394
+ signal: AbortSignal.timeout(SALES_NAVIGATOR_EXPORT_START_TIMEOUT_MS),
799
1395
  headers: {
800
1396
  "Content-Type": "application/json",
801
- Authorization: `Bearer ${currentSession.accessToken}`
1397
+ Authorization: `Bearer ${currentSession.accessToken}`,
1398
+ ...buildTraceHeaders(traceId)
802
1399
  },
803
1400
  body: JSON.stringify({
804
1401
  ...payload,
@@ -806,11 +1403,12 @@ async function startSalesNavigatorExport(session, payload) {
806
1403
  })
807
1404
  }), SalesNavigatorExportStartResponseSchema);
808
1405
  }
809
- async function getSalesNavigatorExportRunStatus(session, runId) {
1406
+ async function getSalesNavigatorExportRunStatus(session, runId, traceId) {
810
1407
  return await fetchCliJson(session, (currentSession) => fetch(`${currentSession.apiBaseUrl}/api/cli/salesnav/export-runs/${runId}?refresh=1`, {
811
1408
  method: "GET",
812
1409
  headers: {
813
- Authorization: `Bearer ${currentSession.accessToken}`
1410
+ Authorization: `Bearer ${currentSession.accessToken}`,
1411
+ ...buildTraceHeaders(traceId)
814
1412
  }
815
1413
  }), SalesNavigatorExportRunStatusResponseSchema);
816
1414
  }
@@ -831,7 +1429,8 @@ function mapCompletedSalesNavigatorExportRun(run) {
831
1429
  totalResults: run.totalResults ?? null,
832
1430
  runId: run.id,
833
1431
  agentId: run.agentId,
834
- containerId: run.containerId
1432
+ containerId: run.containerId,
1433
+ launchDiagnostics: run.launchDiagnostics ?? null
835
1434
  });
836
1435
  }
837
1436
  return SalesNavigatorExportResponseSchema.parse({
@@ -844,18 +1443,35 @@ function mapCompletedSalesNavigatorExportRun(run) {
844
1443
  resultCsvUrl: run.resultCsvUrl ?? null,
845
1444
  agentId: run.agentId,
846
1445
  containerId: run.containerId,
1446
+ selectedSessionCookieSha256: run.selectedSessionCookieSha256 ?? null,
1447
+ selectedSessionUserEmail: run.selectedSessionUserEmail ?? null,
1448
+ selectedSessionUserHandle: run.selectedSessionUserHandle ?? null,
1449
+ launchDiagnostics: run.launchDiagnostics ?? null,
847
1450
  sourceQueryUrl: run.sourceQueryUrl,
848
1451
  slicedQueryUrl: run.slicedQueryUrl
849
1452
  });
850
1453
  }
851
- async function waitForSalesNavigatorExportRunCompletion(session, runId, options = {}) {
1454
+ async function waitForSalesNavigatorExportRunCompletion(session, runId, options = {}, traceId, logOptions = {}) {
852
1455
  const timeoutSeconds = options.timeoutSeconds ?? 960;
853
1456
  const pollIntervalMs = options.pollIntervalMs ?? 5000;
854
1457
  const deadline = Date.now() + timeoutSeconds * 1000;
855
1458
  let currentSession = session;
1459
+ let pollCount = 0;
856
1460
  while (Date.now() < deadline) {
857
- const status = await getSalesNavigatorExportRunStatus(currentSession, runId);
1461
+ const status = await getSalesNavigatorExportRunStatus(currentSession, runId, traceId);
858
1462
  currentSession = status.session;
1463
+ pollCount += 1;
1464
+ await logOptions.logger?.log("salesnav.export.polled", {
1465
+ runId,
1466
+ pollCount,
1467
+ status: status.value.run.status,
1468
+ resultClassification: status.value.run.resultClassification,
1469
+ totalResults: status.value.run.totalResults ?? null,
1470
+ imported: status.value.run.imported,
1471
+ upserted: status.value.run.upserted,
1472
+ updatedAt: status.value.run.updatedAt,
1473
+ ...logOptions.metadata
1474
+ });
859
1475
  if (status.value.run.status !== "pending") {
860
1476
  return status;
861
1477
  }
@@ -871,12 +1487,16 @@ function isSalesNavigatorAgentBusyError(error) {
871
1487
  return /parallel executions limit/i.test(message);
872
1488
  }
873
1489
  function isSalesNavigatorSessionError(error) {
874
- if (error instanceof SalesNavigatorExportRequestError &&
875
- ["phantombuster_cant_connect_profile", "salesnav_upsell_detected", "linkedin_session_invalid"].includes(error.errorCode ?? "")) {
876
- return true;
1490
+ if (error instanceof SalesNavigatorExportRequestError) {
1491
+ if (error.errorCode === "invalid_session") {
1492
+ return true;
1493
+ }
1494
+ if (["phantombuster_cant_connect_profile", "salesnav_upsell_detected", "linkedin_session_invalid"].includes(error.errorCode ?? "")) {
1495
+ return true;
1496
+ }
877
1497
  }
878
1498
  const message = error instanceof Error ? error.message : String(error);
879
- return /can't connect profile|sales navigator account|upsell|linkedin session invalid/i.test(message);
1499
+ return /can't connect profile|sales navigator account|upsell|linkedin session invalid|linkedin_rate_limited|too many requests|rate.?limit|invalid session cookie/i.test(message);
880
1500
  }
881
1501
  function isSalesNavigatorResultArtifactError(error) {
882
1502
  if (error instanceof SalesNavigatorExportRequestError && error.errorCode === "phantombuster_result_invalid") {
@@ -899,13 +1519,16 @@ function isRefreshableAuthError(error) {
899
1519
  const message = error instanceof Error ? error.message : String(error);
900
1520
  return /token expired|session expired|not logged in|missing bearer token/i.test(message);
901
1521
  }
902
- async function runSalesNavigatorExportWithAgentWait(session, payload, options) {
1522
+ async function runSalesNavigatorExportWithAgentWait(session, payload, options, traceId) {
903
1523
  let busyWaitCount = 0;
904
1524
  let currentSession = session;
905
1525
  let authRefreshCount = 0;
906
1526
  while (true) {
907
1527
  try {
908
- return await runSalesNavigatorExport(currentSession, payload);
1528
+ return await runSalesNavigatorExport(currentSession, payload, traceId, {
1529
+ logger: options.logger,
1530
+ metadata: options.logMetadata
1531
+ });
909
1532
  }
910
1533
  catch (error) {
911
1534
  if (isRefreshableAuthError(error)) {
@@ -916,6 +1539,12 @@ async function runSalesNavigatorExportWithAgentWait(session, payload, options) {
916
1539
  if (!runtimeOutputOptions.quiet) {
917
1540
  process.stderr.write("Salesprompter session expired during crawl. Refreshing login...\n");
918
1541
  }
1542
+ await options.logger?.log("salesnav.export.auth.refresh", {
1543
+ authRefreshCount,
1544
+ waitSeconds: options.waitSeconds,
1545
+ maxWaits: options.maxWaits,
1546
+ ...options.logMetadata
1547
+ });
919
1548
  await ensureInteractiveAuthSession(currentSession.apiBaseUrl);
920
1549
  currentSession = await requireAuthSession();
921
1550
  continue;
@@ -928,6 +1557,12 @@ async function runSalesNavigatorExportWithAgentWait(session, payload, options) {
928
1557
  if (!runtimeOutputOptions.json && !runtimeOutputOptions.quiet) {
929
1558
  process.stderr.write(`Sales Navigator export agent is busy. Waiting ${options.waitSeconds}s before retrying...\n`);
930
1559
  }
1560
+ await options.logger?.log("salesnav.export.agent.busy", {
1561
+ busyWaitCount,
1562
+ waitSeconds: options.waitSeconds,
1563
+ maxWaits: options.maxWaits,
1564
+ ...options.logMetadata
1565
+ });
931
1566
  await delay(options.waitSeconds * 1000);
932
1567
  continue;
933
1568
  }
@@ -940,6 +1575,14 @@ async function runSalesNavigatorCrawlAttempt(session, attempt, options, context)
940
1575
  options.probeProfiles < attempt.numberOfProfiles &&
941
1576
  attempt.depth < options.maxSplitDepth;
942
1577
  const probeProfiles = shouldProbe ? Math.max(1, options.probeProfiles) : attempt.numberOfProfiles;
1578
+ const logMetadata = {
1579
+ crawlJobId: context?.crawlJobId ?? null,
1580
+ crawlSliceId: context?.crawlSliceId ?? null,
1581
+ sliceDepth: attempt.depth,
1582
+ splitTrail: formatSalesNavigatorSplitTrail(attempt.splitTrail),
1583
+ sourceQueryUrl: attempt.sourceQueryUrl,
1584
+ slicedQueryUrl: attempt.slicedQueryUrl
1585
+ };
943
1586
  const probeResult = await runSalesNavigatorExportWithAgentWait(session, {
944
1587
  sourceQueryUrl: attempt.sourceQueryUrl,
945
1588
  slicedQueryUrl: attempt.slicedQueryUrl,
@@ -948,11 +1591,23 @@ async function runSalesNavigatorCrawlAttempt(session, attempt, options, context)
948
1591
  numberOfProfiles: probeProfiles,
949
1592
  slicePreset: attempt.slicePreset,
950
1593
  crawlJobId: context?.crawlJobId,
951
- crawlSliceId: context?.crawlSliceId
1594
+ crawlSliceId: context?.crawlSliceId,
1595
+ rawPayload: buildSalesNavigatorSliceRawPayload(attempt, {
1596
+ traceId: context?.traceId ?? null,
1597
+ phase: shouldProbe ? "probe" : "full_export",
1598
+ requestedProfiles: probeProfiles,
1599
+ crawlJobId: context?.crawlJobId ?? null,
1600
+ crawlSliceId: context?.crawlSliceId ?? null
1601
+ })
952
1602
  }, {
953
1603
  waitSeconds: options.agentBusyWaitSeconds,
954
- maxWaits: options.agentBusyMaxWaits
955
- });
1604
+ maxWaits: options.agentBusyMaxWaits,
1605
+ logger: options.logger,
1606
+ logMetadata: {
1607
+ ...logMetadata,
1608
+ phase: shouldProbe ? "probe" : "full_export"
1609
+ }
1610
+ }, context?.traceId);
956
1611
  if (!shouldProbe) {
957
1612
  return probeResult;
958
1613
  }
@@ -960,6 +1615,10 @@ async function runSalesNavigatorCrawlAttempt(session, attempt, options, context)
960
1615
  if (totalResults === null || totalResults > attempt.maxResultsPerSearch) {
961
1616
  return probeResult;
962
1617
  }
1618
+ const splitTriggerResults = Math.min(attempt.maxResultsPerSearch, SALES_NAVIGATOR_SPLIT_TRIGGER_RESULTS);
1619
+ if (totalResults > splitTriggerResults) {
1620
+ throw new SalesNavigatorSliceTooBroadError(`Sales Navigator slice produced ${totalResults} results, exceeding the split trigger of ${splitTriggerResults}.`, { totalResults });
1621
+ }
963
1622
  return await runSalesNavigatorExportWithAgentWait(session, {
964
1623
  sourceQueryUrl: attempt.sourceQueryUrl,
965
1624
  slicedQueryUrl: attempt.slicedQueryUrl,
@@ -968,11 +1627,27 @@ async function runSalesNavigatorCrawlAttempt(session, attempt, options, context)
968
1627
  numberOfProfiles: attempt.numberOfProfiles,
969
1628
  slicePreset: attempt.slicePreset,
970
1629
  crawlJobId: context?.crawlJobId,
971
- crawlSliceId: context?.crawlSliceId
1630
+ crawlSliceId: context?.crawlSliceId,
1631
+ rawPayload: buildSalesNavigatorSliceRawPayload(attempt, {
1632
+ traceId: context?.traceId ?? null,
1633
+ phase: "full_export_after_probe",
1634
+ requestedProfiles: attempt.numberOfProfiles,
1635
+ crawlJobId: context?.crawlJobId ?? null,
1636
+ crawlSliceId: context?.crawlSliceId ?? null,
1637
+ probeProfiles,
1638
+ probeTotalResults: totalResults
1639
+ })
972
1640
  }, {
973
1641
  waitSeconds: options.agentBusyWaitSeconds,
974
- maxWaits: options.agentBusyMaxWaits
975
- });
1642
+ maxWaits: options.agentBusyMaxWaits,
1643
+ logger: options.logger,
1644
+ logMetadata: {
1645
+ ...logMetadata,
1646
+ phase: "full_export_after_probe",
1647
+ probeProfiles,
1648
+ probeTotalResults: totalResults
1649
+ }
1650
+ }, context?.traceId);
976
1651
  }
977
1652
  function buildSalesNavigatorCrawlAttemptFromClaimedSlice(slice) {
978
1653
  return {
@@ -987,12 +1662,13 @@ function buildSalesNavigatorCrawlAttemptFromClaimedSlice(slice) {
987
1662
  splitTrail: slice.splitTrail
988
1663
  };
989
1664
  }
990
- async function createOrResumeSalesNavigatorCrawlJob(session, payload) {
1665
+ async function createOrResumeSalesNavigatorCrawlJob(session, payload, traceId) {
991
1666
  return await fetchCliJson(session, (currentSession) => fetch(`${currentSession.apiBaseUrl}/api/cli/salesnav/crawls`, {
992
1667
  method: "POST",
993
1668
  headers: {
994
1669
  "Content-Type": "application/json",
995
- Authorization: `Bearer ${currentSession.accessToken}`
1670
+ Authorization: `Bearer ${currentSession.accessToken}`,
1671
+ ...buildTraceHeaders(traceId)
996
1672
  },
997
1673
  body: JSON.stringify({
998
1674
  ...payload,
@@ -1003,28 +1679,31 @@ async function createOrResumeSalesNavigatorCrawlJob(session, payload) {
1003
1679
  })
1004
1680
  }), SalesNavigatorCrawlCreateResponseSchema);
1005
1681
  }
1006
- async function getSalesNavigatorCrawlStatus(session, jobId) {
1682
+ async function getSalesNavigatorCrawlStatus(session, jobId, traceId) {
1007
1683
  return await fetchCliJson(session, (currentSession) => fetch(`${currentSession.apiBaseUrl}/api/cli/salesnav/crawls/${jobId}`, {
1008
1684
  method: "GET",
1009
1685
  headers: {
1010
- Authorization: `Bearer ${currentSession.accessToken}`
1686
+ Authorization: `Bearer ${currentSession.accessToken}`,
1687
+ ...buildTraceHeaders(traceId)
1011
1688
  }
1012
1689
  }), SalesNavigatorCrawlStatusResponseSchema);
1013
1690
  }
1014
- async function claimNextSalesNavigatorCrawlSlice(session, jobId) {
1691
+ async function claimNextSalesNavigatorCrawlSlice(session, jobId, traceId) {
1015
1692
  return await fetchCliJson(session, (currentSession) => fetch(`${currentSession.apiBaseUrl}/api/cli/salesnav/crawls/${jobId}/claim-next`, {
1016
1693
  method: "POST",
1017
1694
  headers: {
1018
- Authorization: `Bearer ${currentSession.accessToken}`
1695
+ Authorization: `Bearer ${currentSession.accessToken}`,
1696
+ ...buildTraceHeaders(traceId)
1019
1697
  }
1020
1698
  }), SalesNavigatorCrawlClaimResponseSchema);
1021
1699
  }
1022
- async function reportSalesNavigatorCrawlSlice(session, jobId, payload) {
1700
+ async function reportSalesNavigatorCrawlSlice(session, jobId, payload, traceId) {
1023
1701
  return await fetchCliJson(session, (currentSession) => fetch(`${currentSession.apiBaseUrl}/api/cli/salesnav/crawls/${jobId}/report`, {
1024
1702
  method: "POST",
1025
1703
  headers: {
1026
1704
  "Content-Type": "application/json",
1027
- Authorization: `Bearer ${currentSession.accessToken}`
1705
+ Authorization: `Bearer ${currentSession.accessToken}`,
1706
+ ...buildTraceHeaders(traceId)
1028
1707
  },
1029
1708
  body: JSON.stringify({
1030
1709
  ...payload,
@@ -1039,17 +1718,125 @@ function nextSalesNavigatorSplitDimension(slice, maxSplitDepth) {
1039
1718
  if (slice.depth >= maxSplitDepth) {
1040
1719
  return null;
1041
1720
  }
1042
- return DEFAULT_SALES_NAVIGATOR_CRAWL_DIMENSIONS[slice.depth] ?? null;
1721
+ const usedDimensionKeys = new Set(slice.splitTrail.map((entry) => entry.key));
1722
+ const orderedDimensions = getLearnedSalesNavigatorDimensionOrder();
1723
+ return orderedDimensions.find((dimension) => !usedDimensionKeys.has(dimension.key)) ?? null;
1043
1724
  }
1044
1725
  const SALES_NAVIGATOR_COOKIE_RETRY_LIMIT = 8;
1045
1726
  const SALES_NAVIGATOR_RESULT_RETRY_LIMIT = 3;
1727
+ const SALES_NAVIGATOR_SPLIT_TRIGGER_RESULTS = 1500;
1728
+ const SALES_NAVIGATOR_FILTER_IMPACT_MIN_OBSERVATIONS = 3;
1729
+ let salesNavigatorFilterImpactModel = null;
1730
+ let salesNavigatorFilterImpactLoaded = false;
1731
+ function getSalesprompterConfigDir() {
1732
+ const override = process.env.SALESPROMPTER_CONFIG_DIR?.trim();
1733
+ if (override !== undefined && override.length > 0) {
1734
+ return override;
1735
+ }
1736
+ return path.join(os.homedir(), ".config", "salesprompter");
1737
+ }
1738
+ function getSalesNavigatorFilterImpactPath() {
1739
+ return path.join(getSalesprompterConfigDir(), "salesnav-filter-impact.json");
1740
+ }
1741
+ async function loadSalesNavigatorFilterImpactModel() {
1742
+ if (salesNavigatorFilterImpactLoaded) {
1743
+ return salesNavigatorFilterImpactModel;
1744
+ }
1745
+ salesNavigatorFilterImpactLoaded = true;
1746
+ const filePath = getSalesNavigatorFilterImpactPath();
1747
+ try {
1748
+ const content = await readFile(filePath, "utf8");
1749
+ const parsed = JSON.parse(content);
1750
+ if (parsed && parsed.version === 1 && parsed.dimensions && typeof parsed.dimensions === "object") {
1751
+ salesNavigatorFilterImpactModel = parsed;
1752
+ }
1753
+ }
1754
+ catch {
1755
+ salesNavigatorFilterImpactModel = null;
1756
+ }
1757
+ return salesNavigatorFilterImpactModel;
1758
+ }
1759
+ async function persistSalesNavigatorFilterImpactModel() {
1760
+ if (!salesNavigatorFilterImpactModel) {
1761
+ return;
1762
+ }
1763
+ const filePath = getSalesNavigatorFilterImpactPath();
1764
+ await mkdir(path.dirname(filePath), { recursive: true });
1765
+ await writeFile(filePath, `${JSON.stringify(salesNavigatorFilterImpactModel, null, 2)}\n`, "utf8");
1766
+ }
1767
+ function getLearnedSalesNavigatorDimensionOrder() {
1768
+ const model = salesNavigatorFilterImpactModel;
1769
+ if (!model) {
1770
+ return DEFAULT_SALES_NAVIGATOR_CRAWL_DIMENSIONS;
1771
+ }
1772
+ const defaultIndex = new Map(DEFAULT_SALES_NAVIGATOR_CRAWL_DIMENSIONS.map((dimension, index) => [dimension.key, index]));
1773
+ return [...DEFAULT_SALES_NAVIGATOR_CRAWL_DIMENSIONS].sort((left, right) => {
1774
+ const leftStats = model.dimensions[left.key];
1775
+ const rightStats = model.dimensions[right.key];
1776
+ const leftReliable = (leftStats?.observations ?? 0) >= SALES_NAVIGATOR_FILTER_IMPACT_MIN_OBSERVATIONS;
1777
+ const rightReliable = (rightStats?.observations ?? 0) >= SALES_NAVIGATOR_FILTER_IMPACT_MIN_OBSERVATIONS;
1778
+ if (leftReliable && rightReliable) {
1779
+ const delta = (leftStats?.avgResults ?? Number.POSITIVE_INFINITY) -
1780
+ (rightStats?.avgResults ?? Number.POSITIVE_INFINITY);
1781
+ if (delta !== 0) {
1782
+ return delta;
1783
+ }
1784
+ }
1785
+ else if (leftReliable !== rightReliable) {
1786
+ return leftReliable ? -1 : 1;
1787
+ }
1788
+ return (defaultIndex.get(left.key) ?? 0) - (defaultIndex.get(right.key) ?? 0);
1789
+ });
1790
+ }
1791
+ async function recordSalesNavigatorFilterImpactObservation(slice, totalResults, options) {
1792
+ if (totalResults === null || totalResults === undefined || !Number.isFinite(totalResults)) {
1793
+ return;
1794
+ }
1795
+ const learnedDimension = slice.splitTrail.at(-1)?.key ?? null;
1796
+ if (!learnedDimension) {
1797
+ return;
1798
+ }
1799
+ await loadSalesNavigatorFilterImpactModel();
1800
+ if (!salesNavigatorFilterImpactModel) {
1801
+ salesNavigatorFilterImpactModel = {
1802
+ version: 1,
1803
+ updatedAt: new Date().toISOString(),
1804
+ dimensions: {}
1805
+ };
1806
+ }
1807
+ const previous = salesNavigatorFilterImpactModel.dimensions[learnedDimension];
1808
+ const observations = (previous?.observations ?? 0) + 1;
1809
+ const sumResults = (previous?.sumResults ?? 0) + totalResults;
1810
+ const avgResults = sumResults / observations;
1811
+ salesNavigatorFilterImpactModel.dimensions[learnedDimension] = {
1812
+ observations,
1813
+ sumResults,
1814
+ avgResults,
1815
+ lastObservedAt: new Date().toISOString()
1816
+ };
1817
+ salesNavigatorFilterImpactModel.updatedAt = new Date().toISOString();
1818
+ await persistSalesNavigatorFilterImpactModel();
1819
+ await options?.logger?.log("salesnav.filter_impact.updated", {
1820
+ dimensionKey: learnedDimension,
1821
+ observations,
1822
+ avgResults,
1823
+ totalResults,
1824
+ outcome: options?.outcome ?? null
1825
+ });
1826
+ }
1046
1827
  function buildSalesNavigatorSplitChildren(slice, dimension) {
1047
1828
  const attempt = buildSalesNavigatorCrawlAttemptFromClaimedSlice(slice);
1048
1829
  return expandSalesNavigatorCrawlAttempt(attempt, dimension).map((child) => ({
1049
1830
  slicedQueryUrl: child.slicedQueryUrl,
1050
1831
  appliedFilters: child.appliedFilters,
1051
1832
  depth: child.depth,
1052
- splitTrail: child.splitTrail
1833
+ splitTrail: child.splitTrail,
1834
+ rawPayload: buildSalesNavigatorSliceRawPayload(child, {
1835
+ parentSliceId: slice.id,
1836
+ parentSlicedQueryUrl: slice.slicedQueryUrl,
1837
+ splitDimensionKey: child.splitTrail.at(-1)?.key ?? null,
1838
+ splitDimensionFilterType: child.splitTrail.at(-1)?.filterType ?? null
1839
+ })
1053
1840
  }));
1054
1841
  }
1055
1842
  function buildSalesNavigatorSliceFailureReport(slice, error, options) {
@@ -1114,81 +1901,398 @@ function buildSalesNavigatorSliceFailureReport(slice, error, options) {
1114
1901
  function formatSalesNavigatorSplitTrail(splitTrail) {
1115
1902
  return splitTrail.map((entry) => `${entry.key}:${entry.value.text}`);
1116
1903
  }
1117
- async function executeSalesNavigatorCrawlJob(session, jobId, options) {
1904
+ async function ensureSalesNavigatorSessionPoolReady(queryUrl, options) {
1905
+ try {
1906
+ await options.logger?.log("salesnav.session_pool.preflight.started", {
1907
+ source: options.source,
1908
+ queryUrl
1909
+ });
1910
+ const claimed = await claimValidatedSalesNavigatorSessionCookieForCli({
1911
+ queryUrl,
1912
+ source: options.source,
1913
+ env: process.env
1914
+ });
1915
+ await options.logger?.log("salesnav.session_pool.preflight.completed", {
1916
+ source: options.source,
1917
+ queryUrl,
1918
+ status: claimed ? "ok" : "skipped",
1919
+ selectedSessionUserEmail: claimed?.userEmail ?? null,
1920
+ selectedSessionUserHandle: claimed?.userHandle ?? null,
1921
+ selectedSessionCookieSha256: claimed?.sessionCookieSha256 ?? null
1922
+ });
1923
+ return {
1924
+ ready: true
1925
+ };
1926
+ }
1927
+ catch (error) {
1928
+ const message = error instanceof Error ? error.message : String(error);
1929
+ await options.logger?.log("salesnav.session_pool.preflight.failed", {
1930
+ source: options.source,
1931
+ queryUrl,
1932
+ error: message
1933
+ });
1934
+ return {
1935
+ ready: false,
1936
+ error: message
1937
+ };
1938
+ }
1939
+ }
1940
+ async function processSalesNavigatorClaimedCrawlSlice(session, jobId, slice, options) {
1118
1941
  let currentSession = session;
1119
- let claimedSlices = 0;
1120
- const seenSliceIds = new Set();
1121
- let activeSlice = null;
1122
- let job = null;
1123
- let lastOutcome = null;
1124
- while (true) {
1125
- if (claimedSlices >= options.maxSlices && lastOutcome?.outcome !== "retryable_failed") {
1126
- break;
1127
- }
1128
- const claimed = await claimNextSalesNavigatorCrawlSlice(currentSession, jobId);
1129
- currentSession = claimed.session;
1130
- job = claimed.value.job;
1131
- if (!claimed.value.slice) {
1132
- break;
1133
- }
1134
- const slice = claimed.value.slice;
1135
- activeSlice = slice;
1136
- const isNewSlice = !seenSliceIds.has(slice.id);
1137
- if (isNewSlice) {
1138
- seenSliceIds.add(slice.id);
1139
- claimedSlices += 1;
1140
- }
1141
- if (!runtimeOutputOptions.json && !runtimeOutputOptions.quiet) {
1142
- process.stderr.write(`Processing Sales Navigator slice ${claimedSlices}/${options.maxSlices}: ${slice.slicedQueryUrl}\n`);
1143
- }
1144
- try {
1145
- const result = await runSalesNavigatorCrawlAttempt(currentSession, buildSalesNavigatorCrawlAttemptFromClaimedSlice(slice), {
1146
- maxSplitDepth: options.maxSplitDepth,
1147
- probeProfiles: options.probeProfiles,
1148
- agentBusyWaitSeconds: options.agentBusyWaitSeconds,
1149
- agentBusyMaxWaits: options.agentBusyMaxWaits
1150
- }, {
1151
- crawlJobId: jobId,
1152
- crawlSliceId: slice.id
1153
- });
1942
+ await options.logger?.log("salesnav.crawl.slice.claimed", {
1943
+ jobId,
1944
+ sliceId: slice.id,
1945
+ isNewSlice: true,
1946
+ claimedSlices: options.claimedSlices,
1947
+ depth: slice.depth,
1948
+ retryCount: slice.retryCount,
1949
+ cookieRetryCount: slice.cookieRetryCount,
1950
+ resultRetryCount: slice.resultRetryCount,
1951
+ splitTrail: formatSalesNavigatorSplitTrail(slice.splitTrail),
1952
+ ...summarizeSalesNavigatorQuery(slice.slicedQueryUrl, slice.appliedFilters)
1953
+ });
1954
+ if (!runtimeOutputOptions.json && !runtimeOutputOptions.quiet) {
1955
+ process.stderr.write(`Processing Sales Navigator slice ${options.claimedSlices}: ${slice.slicedQueryUrl}\n`);
1956
+ }
1957
+ if (shouldPreSplitSalesNavigatorRootSlice(slice, options.maxSplitDepth)) {
1958
+ const nextDimension = nextSalesNavigatorSplitDimension(slice, options.maxSplitDepth);
1959
+ if (nextDimension) {
1960
+ const children = buildSalesNavigatorSplitChildren(slice, nextDimension);
1154
1961
  const reported = await reportSalesNavigatorCrawlSlice(currentSession, jobId, {
1155
1962
  sliceId: slice.id,
1156
- outcome: "exported",
1157
- totalResults: result.totalResults ?? null,
1158
- exportRunId: result.runId,
1159
- importedPeople: result.imported,
1160
- upsertedPeople: result.upserted
1161
- });
1963
+ outcome: "split",
1964
+ error: `Pre-splitting broad Sales Navigator title query by ${nextDimension.key} before the first export attempt.`,
1965
+ errorCode: "presplit_root_title_query",
1966
+ children,
1967
+ rawPayload: buildSalesNavigatorCrawlReportRawPayload(slice, options.traceId, {
1968
+ phase: "presplit",
1969
+ reason: "broad_root_title_query",
1970
+ nextDimensionKey: nextDimension.key,
1971
+ nextDimensionFilterType: nextDimension.filterType,
1972
+ childCount: children.length
1973
+ })
1974
+ }, options.traceId);
1162
1975
  currentSession = reported.session;
1163
- job = reported.value.job;
1164
- lastOutcome = {
1976
+ await options.logger?.log("salesnav.crawl.slice.presplit", {
1977
+ jobId,
1978
+ sliceId: slice.id,
1979
+ nextDimension: nextDimension.key,
1980
+ childCount: children.length,
1981
+ splitTrail: formatSalesNavigatorSplitTrail(slice.splitTrail),
1982
+ ...summarizeSalesNavigatorQuery(slice.slicedQueryUrl, slice.appliedFilters),
1983
+ childQueries: children.map((child) => ({
1984
+ splitTrail: formatSalesNavigatorSplitTrail(child.splitTrail),
1985
+ ...summarizeSalesNavigatorQuery(child.slicedQueryUrl, child.appliedFilters)
1986
+ }))
1987
+ });
1988
+ return {
1989
+ session: currentSession,
1990
+ job: reported.value.job,
1991
+ activeSlice: slice,
1992
+ lastOutcome: {
1993
+ outcome: "split",
1994
+ error: `Pre-split by ${nextDimension.key}`,
1995
+ errorCode: "presplit_root_title_query",
1996
+ totalResults: null
1997
+ },
1998
+ forceSessionPoolRecheck: false
1999
+ };
2000
+ }
2001
+ }
2002
+ try {
2003
+ const result = await runSalesNavigatorCrawlAttempt(currentSession, buildSalesNavigatorCrawlAttemptFromClaimedSlice(slice), {
2004
+ maxSplitDepth: options.maxSplitDepth,
2005
+ probeProfiles: options.probeProfiles,
2006
+ agentBusyWaitSeconds: options.agentBusyWaitSeconds,
2007
+ agentBusyMaxWaits: options.agentBusyMaxWaits,
2008
+ logger: options.logger
2009
+ }, {
2010
+ crawlJobId: jobId,
2011
+ crawlSliceId: slice.id,
2012
+ traceId: options.traceId
2013
+ });
2014
+ const reported = await reportSalesNavigatorCrawlSlice(currentSession, jobId, {
2015
+ sliceId: slice.id,
2016
+ outcome: "exported",
2017
+ totalResults: result.totalResults ?? null,
2018
+ exportRunId: result.runId,
2019
+ importedPeople: result.imported,
2020
+ upsertedPeople: result.upserted,
2021
+ rawPayload: buildSalesNavigatorCrawlReportRawPayload(slice, options.traceId, {
2022
+ phase: "exported",
2023
+ export: {
2024
+ runId: result.runId,
2025
+ totalResults: result.totalResults ?? null,
2026
+ imported: result.imported,
2027
+ upserted: result.upserted,
2028
+ resultJsonUrl: result.resultJsonUrl ?? null,
2029
+ resultCsvUrl: result.resultCsvUrl ?? null,
2030
+ selectedSessionCookieSha256: result.selectedSessionCookieSha256 ?? null,
2031
+ selectedSessionUserEmail: result.selectedSessionUserEmail ?? null,
2032
+ selectedSessionUserHandle: result.selectedSessionUserHandle ?? null,
2033
+ launchDiagnostics: result.launchDiagnostics ?? null
2034
+ }
2035
+ })
2036
+ }, options.traceId);
2037
+ currentSession = reported.session;
2038
+ await recordSalesNavigatorFilterImpactObservation(slice, result.totalResults ?? null, {
2039
+ logger: options.logger,
2040
+ outcome: "exported"
2041
+ });
2042
+ await options.logger?.log("salesnav.crawl.slice.exported", {
2043
+ jobId,
2044
+ sliceId: slice.id,
2045
+ exportRunId: result.runId,
2046
+ totalResults: result.totalResults ?? null,
2047
+ imported: result.imported,
2048
+ upserted: result.upserted,
2049
+ selectedAgentId: result.launchDiagnostics?.selectedAgent.id ?? result.agentId,
2050
+ selectedSessionUserEmail: result.selectedSessionUserEmail ?? null
2051
+ });
2052
+ return {
2053
+ session: currentSession,
2054
+ job: reported.value.job,
2055
+ activeSlice: slice,
2056
+ lastOutcome: {
1165
2057
  outcome: "exported",
1166
2058
  runId: result.runId,
1167
2059
  totalResults: result.totalResults ?? null
1168
- };
1169
- }
1170
- catch (error) {
1171
- const payload = buildSalesNavigatorSliceFailureReport(slice, error, {
1172
- maxSplitDepth: options.maxSplitDepth,
1173
- maxRetries: options.maxRetries
1174
- });
1175
- const reported = await reportSalesNavigatorCrawlSlice(currentSession, jobId, payload);
1176
- currentSession = reported.session;
1177
- job = reported.value.job;
1178
- lastOutcome = {
2060
+ },
2061
+ forceSessionPoolRecheck: false
2062
+ };
2063
+ }
2064
+ catch (error) {
2065
+ const payload = buildSalesNavigatorSliceFailureReport(slice, error, {
2066
+ maxSplitDepth: options.maxSplitDepth,
2067
+ maxRetries: options.maxRetries
2068
+ });
2069
+ payload.rawPayload = buildSalesNavigatorCrawlReportRawPayload(slice, options.traceId, {
2070
+ phase: payload.outcome,
2071
+ error: error instanceof Error
2072
+ ? {
2073
+ name: error.name,
2074
+ message: error.message,
2075
+ ...(error instanceof SalesNavigatorExportRequestError
2076
+ ? {
2077
+ launchDiagnostics: error.launchDiagnostics ?? null,
2078
+ agentId: error.agentId ?? null,
2079
+ containerId: error.containerId ?? null
2080
+ }
2081
+ : {})
2082
+ }
2083
+ : {
2084
+ name: "Error",
2085
+ message: String(error)
2086
+ }
2087
+ });
2088
+ const reported = await reportSalesNavigatorCrawlSlice(currentSession, jobId, payload, options.traceId);
2089
+ currentSession = reported.session;
2090
+ await recordSalesNavigatorFilterImpactObservation(slice, payload.totalResults ?? null, {
2091
+ logger: options.logger,
2092
+ outcome: payload.outcome
2093
+ });
2094
+ await options.logger?.log("salesnav.crawl.slice.reported", {
2095
+ jobId,
2096
+ sliceId: slice.id,
2097
+ outcome: payload.outcome,
2098
+ error: payload.error ?? null,
2099
+ errorCode: payload.errorCode ?? null,
2100
+ totalResults: payload.totalResults ?? null,
2101
+ exportRunId: payload.exportRunId ?? null,
2102
+ childCount: payload.children?.length ?? 0
2103
+ });
2104
+ return {
2105
+ session: currentSession,
2106
+ job: reported.value.job,
2107
+ activeSlice: slice,
2108
+ lastOutcome: {
1179
2109
  outcome: payload.outcome,
1180
2110
  runId: payload.exportRunId,
1181
2111
  error: payload.error,
1182
2112
  errorCode: payload.errorCode,
1183
2113
  totalResults: payload.totalResults
1184
- };
2114
+ },
2115
+ forceSessionPoolRecheck: payload.errorCode === "invalid_session"
2116
+ };
2117
+ }
2118
+ }
2119
+ async function executeSalesNavigatorCrawlJob(session, jobId, options) {
2120
+ await loadSalesNavigatorFilterImpactModel();
2121
+ let currentSession = session;
2122
+ let claimedSlices = 0;
2123
+ const seenSliceIds = new Set();
2124
+ let activeSlice = null;
2125
+ let job = null;
2126
+ let idlePollCount = 0;
2127
+ let lastOutcome = null;
2128
+ const parallelExports = Math.max(1, options.parallelExports);
2129
+ const inFlight = new Map();
2130
+ let nextSlot = 0;
2131
+ let noMoreClaimableWork = false;
2132
+ let sessionPoolFailures = 0;
2133
+ let nextSessionPoolRetryAt = 0;
2134
+ let lastSessionPoolReadyAt = 0;
2135
+ const sessionPoolReadinessCooldownMs = 120_000;
2136
+ while (true) {
2137
+ while (!noMoreClaimableWork && inFlight.size < parallelExports) {
2138
+ if (claimedSlices >= options.maxSlices) {
2139
+ break;
2140
+ }
2141
+ if (inFlight.size === 0) {
2142
+ const now = Date.now();
2143
+ if (now < nextSessionPoolRetryAt) {
2144
+ await delay(Math.max(0, nextSessionPoolRetryAt - now));
2145
+ continue;
2146
+ }
2147
+ if (now - lastSessionPoolReadyAt >= sessionPoolReadinessCooldownMs) {
2148
+ const readiness = await ensureSalesNavigatorSessionPoolReady(job?.sourceQueryUrl ?? "https://www.linkedin.com/sales/search/people", {
2149
+ logger: options.logger,
2150
+ source: "cli_salesnav_crawl_preflight"
2151
+ });
2152
+ if (!readiness.ready) {
2153
+ sessionPoolFailures += 1;
2154
+ idlePollCount += 1;
2155
+ const waitSeconds = Math.min(120, 10 * Math.max(1, sessionPoolFailures));
2156
+ nextSessionPoolRetryAt = Date.now() + waitSeconds * 1000;
2157
+ await options.logger?.log("salesnav.crawl.session_pool.waiting", {
2158
+ jobId,
2159
+ idlePollCount,
2160
+ idleMaxPolls: options.idleMaxPolls,
2161
+ sessionPoolFailures,
2162
+ waitSeconds,
2163
+ error: readiness.error
2164
+ });
2165
+ if (idlePollCount >= options.idleMaxPolls) {
2166
+ lastOutcome = {
2167
+ outcome: "terminal_failed",
2168
+ error: readiness.error ??
2169
+ `Sales Navigator session pool stayed unavailable for ${options.idleMaxPolls} checks.`,
2170
+ errorCode: "blocked_no_valid_salesnav_session"
2171
+ };
2172
+ noMoreClaimableWork = true;
2173
+ break;
2174
+ }
2175
+ continue;
2176
+ }
2177
+ sessionPoolFailures = 0;
2178
+ nextSessionPoolRetryAt = 0;
2179
+ lastSessionPoolReadyAt = Date.now();
2180
+ }
2181
+ }
2182
+ const claimed = await claimNextSalesNavigatorCrawlSlice(currentSession, jobId, options.traceId);
2183
+ currentSession = claimed.session;
2184
+ job = claimed.value.job;
2185
+ if (!claimed.value.slice) {
2186
+ const shouldWaitForRemoteWork = !isSalesNavigatorCrawlJobTerminal(job.status) &&
2187
+ options.idleMaxPolls > 0 &&
2188
+ job.runningSlices > 0;
2189
+ if (shouldWaitForRemoteWork && inFlight.size === 0) {
2190
+ if (idlePollCount >= options.idleMaxPolls) {
2191
+ lastOutcome = {
2192
+ outcome: "terminal_failed",
2193
+ error: `Sales Navigator crawl job ${jobId} stayed non-terminal without a claimable slice after ${options.idleMaxPolls} polls.`,
2194
+ errorCode: "crawl_idle_timeout"
2195
+ };
2196
+ await options.logger?.log("salesnav.crawl.job.stalled", {
2197
+ jobId,
2198
+ status: job.status,
2199
+ queuedSlices: job.queuedSlices,
2200
+ runningSlices: job.runningSlices,
2201
+ idlePollCount,
2202
+ idleMaxPolls: options.idleMaxPolls
2203
+ });
2204
+ noMoreClaimableWork = true;
2205
+ break;
2206
+ }
2207
+ idlePollCount += 1;
2208
+ await options.logger?.log("salesnav.crawl.job.waiting", {
2209
+ jobId,
2210
+ status: job.status,
2211
+ queuedSlices: job.queuedSlices,
2212
+ runningSlices: job.runningSlices,
2213
+ idlePollCount,
2214
+ idlePollSeconds: options.idlePollSeconds
2215
+ });
2216
+ if (!runtimeOutputOptions.json && !runtimeOutputOptions.quiet) {
2217
+ process.stderr.write(`Sales Navigator crawl job ${jobId} has no claimable slice yet. Waiting ${options.idlePollSeconds}s for remote work to settle...\n`);
2218
+ }
2219
+ await delay(options.idlePollSeconds * 1000);
2220
+ const status = await getSalesNavigatorCrawlStatus(currentSession, jobId, options.traceId);
2221
+ currentSession = status.session;
2222
+ job = status.value.job;
2223
+ await options.logger?.log("salesnav.crawl.job.status.polled", {
2224
+ jobId,
2225
+ status: job.status,
2226
+ queuedSlices: job.queuedSlices,
2227
+ runningSlices: job.runningSlices,
2228
+ exportedSlices: job.exportedSlices,
2229
+ failedSlices: job.failedSlices,
2230
+ importedPeople: job.importedPeople,
2231
+ idlePollCount
2232
+ });
2233
+ if (isSalesNavigatorCrawlJobTerminal(job.status)) {
2234
+ noMoreClaimableWork = true;
2235
+ break;
2236
+ }
2237
+ continue;
2238
+ }
2239
+ if (!shouldWaitForRemoteWork) {
2240
+ noMoreClaimableWork = true;
2241
+ }
2242
+ break;
2243
+ }
2244
+ const slice = claimed.value.slice;
2245
+ idlePollCount = 0;
2246
+ activeSlice = slice;
2247
+ const isNewSlice = !seenSliceIds.has(slice.id);
2248
+ if (isNewSlice) {
2249
+ seenSliceIds.add(slice.id);
2250
+ claimedSlices += 1;
2251
+ }
2252
+ const claimedSliceNumber = claimedSlices;
2253
+ const slot = nextSlot++;
2254
+ inFlight.set(slot, processSalesNavigatorClaimedCrawlSlice(currentSession, jobId, slice, {
2255
+ maxSplitDepth: options.maxSplitDepth,
2256
+ maxRetries: options.maxRetries,
2257
+ probeProfiles: options.probeProfiles,
2258
+ agentBusyWaitSeconds: options.agentBusyWaitSeconds,
2259
+ agentBusyMaxWaits: options.agentBusyMaxWaits,
2260
+ claimedSlices: claimedSliceNumber,
2261
+ traceId: options.traceId,
2262
+ logger: options.logger
2263
+ }).then((value) => ({ slot, value })));
2264
+ }
2265
+ if (inFlight.size === 0) {
2266
+ break;
2267
+ }
2268
+ const completed = await Promise.race(inFlight.values());
2269
+ inFlight.delete(completed.slot);
2270
+ currentSession = completed.value.session;
2271
+ job = completed.value.job;
2272
+ activeSlice = completed.value.activeSlice;
2273
+ lastOutcome = completed.value.lastOutcome;
2274
+ if (completed.value.forceSessionPoolRecheck) {
2275
+ lastSessionPoolReadyAt = 0;
2276
+ nextSessionPoolRetryAt = 0;
1185
2277
  }
1186
2278
  }
1187
2279
  if (!job) {
1188
- const status = await getSalesNavigatorCrawlStatus(currentSession, jobId);
2280
+ const status = await getSalesNavigatorCrawlStatus(currentSession, jobId, options.traceId);
1189
2281
  currentSession = status.session;
1190
2282
  job = status.value.job;
1191
2283
  }
2284
+ await options.logger?.log("salesnav.crawl.job.completed", {
2285
+ jobId,
2286
+ status: job.status,
2287
+ queuedSlices: job.queuedSlices,
2288
+ runningSlices: job.runningSlices,
2289
+ exportedSlices: job.exportedSlices,
2290
+ failedSlices: job.failedSlices,
2291
+ importedPeople: job.importedPeople,
2292
+ claimedSlices,
2293
+ truncated: claimedSlices >= options.maxSlices && (job.queuedSlices > 0 || job.runningSlices > 0),
2294
+ lastOutcome
2295
+ });
1192
2296
  return {
1193
2297
  session: currentSession,
1194
2298
  job,
@@ -1198,22 +2302,6 @@ async function executeSalesNavigatorCrawlJob(session, jobId, options) {
1198
2302
  lastOutcome
1199
2303
  };
1200
2304
  }
1201
- async function searchReferenceCompanyLeads(reference, icp, limit) {
1202
- if (shouldBypassAuth()) {
1203
- const fallbackTargetDomain = reference.domain ?? `${reference.slug}.com`;
1204
- const result = await leadProvider.generateLeads(icp, limit, {
1205
- companyDomain: fallbackTargetDomain,
1206
- companyName: reference.companyName
1207
- });
1208
- return result.leads;
1209
- }
1210
- const session = await requireAuthSession();
1211
- return await fetchWorkspaceLeadSearch(session, {
1212
- mode: "reference-company",
1213
- icp,
1214
- limit
1215
- });
1216
- }
1217
2305
  async function searchTargetCompanyLeads(reference, limit) {
1218
2306
  if (shouldBypassAuth()) {
1219
2307
  const fallbackTargetDomain = reference.domain ?? `${reference.slug}.com`;
@@ -1231,16 +2319,90 @@ async function searchTargetCompanyLeads(reference, limit) {
1231
2319
  limit
1232
2320
  });
1233
2321
  }
1234
- async function runReferenceCompanyWizard(rl) {
1235
- writeWizardSection("Reference company", "Paste the website or LinkedIn company page for the company you sell for.");
1236
- const reference = parseCompanyReference(await promptText(rl, "Which company are you selling for?", {
2322
+ async function runProductMarketWizard(rl) {
2323
+ writeWizardSection("Find leads from a product market", "Start from a company website, LinkedIn company page, product page, or category page. I will turn that into intended job titles and durable Sales Navigator crawls.");
2324
+ const input = await promptText(rl, "What company website or LinkedIn page should I start from?", {
2325
+ required: true
2326
+ });
2327
+ const productLimit = z.coerce.number().int().min(1).max(5000).parse(await promptText(rl, "How many products should I inspect?", { defaultValue: "25", required: true }));
2328
+ const titleLimit = z.coerce.number().int().min(1).max(1000).parse(await promptText(rl, "How many job titles should I turn into Sales Navigator crawls?", {
2329
+ defaultValue: "5",
2330
+ required: true
2331
+ }));
2332
+ writeWizardLine();
2333
+ const dryRun = shouldBypassAuth();
2334
+ if (dryRun) {
2335
+ writeWizardLine("Auth bypass is enabled, so I will preview the crawl plan instead of launching Phantombuster.");
2336
+ writeWizardLine();
2337
+ }
2338
+ const result = await runSalesNavigatorFromProductCategoryWorkflow({
2339
+ input,
2340
+ maxPages: 25,
2341
+ productLimit,
2342
+ titleLimit,
2343
+ maxResultsPerSearch: 2500,
2344
+ numberOfProfiles: 2500,
2345
+ slicePreset: "wizard-linkedin-product-category",
2346
+ maxSplitDepth: DEFAULT_SALES_NAVIGATOR_CRAWL_DIMENSIONS.length,
2347
+ maxSlicesPerTitle: 1000,
2348
+ maxRetries: 3,
2349
+ probeProfiles: 100,
2350
+ agentBusyWaitSeconds: 30,
2351
+ agentBusyMaxWaits: 20,
2352
+ idlePollSeconds: 10,
2353
+ idleMaxPolls: 180,
2354
+ parallelExports: 3,
2355
+ skipProductUpload: false,
2356
+ dryRun
2357
+ });
2358
+ writeWizardLine(`LinkedIn product category: ${result.payload.source.category.name}.`);
2359
+ writeWizardLine(`Inspected ${result.payload.discoveredProducts} product${result.payload.discoveredProducts === 1 ? "" : "s"} and derived ${result.payload.titleCount} intended job title${result.payload.titleCount === 1 ? "" : "s"}.`);
2360
+ if (result.payload.dryRun) {
2361
+ const firstQuery = result.payload.queries?.[0];
2362
+ writeWizardLine(`Saved preview to ${result.outPath}.`);
2363
+ writeWizardLine(`Saved logs to ${result.payload.logPath}.`);
2364
+ if (firstQuery) {
2365
+ writeWizardLine(`First Sales Navigator title search: ${firstQuery.title}.`);
2366
+ }
2367
+ }
2368
+ else {
2369
+ if (result.payload.uploaded) {
2370
+ writeWizardLine(`Uploaded ${result.payload.uploaded.upserted} LinkedIn product record${result.payload.uploaded.upserted === 1 ? "" : "s"} to Salesprompter.`);
2371
+ }
2372
+ writeWizardLine(`Finished ${result.payload.crawls?.length ?? 0} durable Sales Navigator crawl${result.payload.crawls?.length === 1 ? "" : "s"}.`);
2373
+ writeWizardLine(`Imported ${result.payload.summary.totalImportedPeople} people across ${result.payload.summary.totalExportedSlices} exported slice${result.payload.summary.totalExportedSlices === 1 ? "" : "s"}.`);
2374
+ if (result.payload.summary.workflowStatus !== "completed") {
2375
+ writeWizardLine(`Some title crawls still failed: ${result.payload.summary.completedWithFailuresTitles} completed with failures, ${result.payload.summary.runningTitles} still non-terminal, ${result.payload.summary.truncatedTitles} truncated.`);
2376
+ }
2377
+ writeWizardLine(`Saved crawl summary to ${result.outPath}.`);
2378
+ writeWizardLine(`Saved logs to ${result.payload.logPath}.`);
2379
+ }
2380
+ writeWizardLine();
2381
+ writeWizardLine("Equivalent raw command:");
2382
+ const commandArgs = [
2383
+ "salesprompter",
2384
+ "salesnav:from-product-category",
2385
+ "--input",
2386
+ input,
2387
+ "--product-limit",
2388
+ String(productLimit),
2389
+ "--title-limit",
2390
+ String(titleLimit)
2391
+ ];
2392
+ if (dryRun) {
2393
+ commandArgs.push("--dry-run");
2394
+ }
2395
+ writeWizardLine(` ${buildCommandLine(commandArgs)}`);
2396
+ }
2397
+ async function runVendorShortcutWizard(rl) {
2398
+ writeWizardSection("Built-in Deel shortcut", "Use the built-in Deel ICP template and search your workspace lead data.");
2399
+ const reference = parseCompanyReference(await promptText(rl, "Which company shortcut should I use?", {
1237
2400
  required: true
1238
2401
  }));
1239
2402
  writeWizardLine();
1240
2403
  if (reference.vendorTemplate !== "deel") {
1241
- throw new Error("Automatic company-to-ICP matching is available for Deel right now. Try deel.com or the Deel LinkedIn company page.");
2404
+ throw new Error("The built-in shortcut only supports Deel right now. Use deel.com or the Deel LinkedIn company page.");
1242
2405
  }
1243
- writeWizardSection("Find matching leads", `Using the built-in ${reference.companyName} profile to search your workspace data.`);
1244
2406
  const market = await promptChoice(rl, "Where do you want to search?", [
1245
2407
  { value: "dach", label: "DACH", description: "Germany, Austria, Switzerland" },
1246
2408
  { value: "europe", label: "Europe" },
@@ -1252,7 +2414,16 @@ async function runReferenceCompanyWizard(rl) {
1252
2414
  const icpPath = `./data/${reference.slug}-icp-${market}.json`;
1253
2415
  const leadPath = buildQualifiedLeadsPath(`${reference.slug}-${market}`);
1254
2416
  await writeJsonFile(icpPath, icp);
1255
- const leads = await searchReferenceCompanyLeads(reference, icp, leadCount);
2417
+ const leads = shouldBypassAuth()
2418
+ ? (await leadProvider.generateLeads(icp, leadCount, {
2419
+ companyDomain: reference.domain ?? `${reference.slug}.com`,
2420
+ companyName: reference.companyName
2421
+ })).leads
2422
+ : await fetchWorkspaceLeadSearch(await requireAuthSession(), {
2423
+ mode: "reference-company",
2424
+ icp,
2425
+ limit: leadCount
2426
+ });
1256
2427
  await writeJsonFile(leadPath, leads);
1257
2428
  writeWizardLine(`Saved ICP to ${icpPath}.`);
1258
2429
  if (leads.length === 0) {
@@ -1350,7 +2521,7 @@ async function runWizard(options) {
1350
2521
  throw new Error("wizard does not support --json or --quiet.");
1351
2522
  }
1352
2523
  writeWizardLine("Salesprompter");
1353
- writeWizardLine("Start with a company website or LinkedIn page. I will guide you from there.");
2524
+ writeWizardLine("Start with a company website, LinkedIn product page, or category URL. I will guide you from there.");
1354
2525
  writeWizardLine();
1355
2526
  await ensureWizardSession(options);
1356
2527
  const rl = createInterface({
@@ -1359,11 +2530,17 @@ async function runWizard(options) {
1359
2530
  });
1360
2531
  try {
1361
2532
  const flow = await promptChoice(rl, "What do you want help with?", [
2533
+ {
2534
+ value: "product-market",
2535
+ label: "Find leads from a product market",
2536
+ description: "Start from a company, product, or LinkedIn category and crawl Sales Navigator",
2537
+ aliases: ["product market", "linkedin products", "category", "sales navigator", "crawl"]
2538
+ },
1362
2539
  {
1363
2540
  value: "reference-company",
1364
- label: "Find leads like one of my customers",
1365
- description: "Example: I sell for Deel and want similar companies and people",
1366
- aliases: ["customer", "reference company", "similar companies", "icp", "who to target"]
2541
+ label: "Use the built-in Deel shortcut",
2542
+ description: "Generate the saved Deel ICP and search workspace leads",
2543
+ aliases: ["deel", "shortcut", "vendor template", "quick deel"]
1367
2544
  },
1368
2545
  {
1369
2546
  value: "target-company",
@@ -1377,10 +2554,14 @@ async function runWizard(options) {
1377
2554
  description: "Use a saved leads file to fill an Instantly campaign",
1378
2555
  aliases: ["instantly", "outreach", "send leads", "campaign"]
1379
2556
  }
1380
- ], "reference-company");
2557
+ ], "product-market");
1381
2558
  writeWizardLine();
2559
+ if (flow === "product-market") {
2560
+ await runProductMarketWizard(rl);
2561
+ return;
2562
+ }
1382
2563
  if (flow === "reference-company") {
1383
- await runReferenceCompanyWizard(rl);
2564
+ await runVendorShortcutWizard(rl);
1384
2565
  return;
1385
2566
  }
1386
2567
  if (flow === "target-company") {
@@ -1529,7 +2710,7 @@ async function fetchHistoricalQueryRows(tables) {
1529
2710
  }
1530
2711
  program
1531
2712
  .name("salesprompter")
1532
- .description("Sales workflow CLI for ICP definition, lead generation, enrichment, scoring, and sync.")
2713
+ .description("Sales workflow CLI for LinkedIn product discovery, Sales Navigator crawling, lead enrichment, scoring, and sync.")
1533
2714
  .version(packageVersion)
1534
2715
  .option("--json", "Emit compact machine-readable JSON output", false)
1535
2716
  .option("--quiet", "Suppress successful stdout output", false);
@@ -1870,6 +3051,219 @@ program
1870
3051
  uploaded
1871
3052
  });
1872
3053
  });
3054
+ program
3055
+ .command("salesnav:from-product-category")
3056
+ .description("Crawl a LinkedIn product category, derive intended-role title searches, then run durable Sales Navigator crawls that export through Phantombuster into Salesprompter.")
3057
+ .requiredOption("--input <value>", "Company domain, LinkedIn company URL, LinkedIn product URL, LinkedIn category URL, or LinkedIn product search URL")
3058
+ .option("--max-pages <number>", "Maximum LinkedIn category pages to fetch", "25")
3059
+ .option("--product-limit <number>", "Optional cap on the number of LinkedIn products to inspect")
3060
+ .option("--title-limit <number>", "Optional cap on the number of intended-role titles to crawl")
3061
+ .option("--max-results-per-search <number>", "Maximum Sales Navigator results allowed for one slice before splitting again. Current live export cap is 2500.", "2500")
3062
+ .option("--number-of-profiles <number>", "Profiles to request from Phantombuster per finished Sales Navigator slice. Current live export cap is 2500.", "2500")
3063
+ .option("--slice-preset <name>", "Slice preset label stored with every durable crawl job", "linkedin-product-category")
3064
+ .option("--max-split-depth <number>", "Maximum number of adaptive split dimensions to use", "6")
3065
+ .option("--max-slices-per-title <number>", "Safety cap for total claimed slices per intended-role title", "1000")
3066
+ .option("--max-retries <number>", "Retries for non-splitting export failures", "3")
3067
+ .option("--probe-profiles <number>", "Profiles to scrape while probing whether a slice is still too broad", "100")
3068
+ .option("--agent-busy-wait-seconds <number>", "Seconds to wait before retrying when the export agent is already busy", "30")
3069
+ .option("--agent-busy-max-waits <number>", "How many busy-agent waits to tolerate before failing the slice", "20")
3070
+ .option("--idle-poll-seconds <number>", "Seconds to wait before polling durable crawl status when remote slices are still running", "10")
3071
+ .option("--idle-max-polls <number>", "How many no-claim status polls to tolerate before the crawl is considered stalled", "180")
3072
+ .option("--parallel-exports <number>", "How many Sales Navigator slices to export concurrently per title crawl", "3")
3073
+ .option("--allow-partial-success", "Exit 0 even when one or more durable title crawls finish with failures", false)
3074
+ .option("--skip-product-upload", "Do not upload the crawled LinkedIn product catalog before starting Sales Navigator crawls", false)
3075
+ .option("--out <path>", "Optional local JSON output path")
3076
+ .option("--log-path <path>", "Optional JSONL log path with timestamps, trace id, and Sales Navigator query metadata")
3077
+ .option("--dry-run", "Preview the derived intended-role title queries without creating crawl jobs", false)
3078
+ .action(async (options) => {
3079
+ const maxPages = z.coerce.number().int().min(1).max(500).parse(options.maxPages);
3080
+ const productLimit = options.productLimit === undefined
3081
+ ? undefined
3082
+ : z.coerce.number().int().min(1).max(5000).parse(options.productLimit);
3083
+ const titleLimit = options.titleLimit === undefined
3084
+ ? undefined
3085
+ : z.coerce.number().int().min(1).max(1000).parse(options.titleLimit);
3086
+ const maxResultsPerSearch = z.coerce.number().int().min(1).max(2500).parse(options.maxResultsPerSearch);
3087
+ const numberOfProfiles = z.coerce.number().int().min(1).max(2500).parse(options.numberOfProfiles);
3088
+ const maxSplitDepth = z.coerce.number().int().min(1).max(6).parse(options.maxSplitDepth);
3089
+ const maxSlicesPerTitle = z.coerce.number().int().min(1).max(10000).parse(options.maxSlicesPerTitle);
3090
+ const maxRetries = z.coerce.number().int().min(0).max(5).parse(options.maxRetries);
3091
+ const probeProfiles = z.coerce.number().int().min(1).max(2500).parse(options.probeProfiles);
3092
+ const agentBusyWaitSeconds = z.coerce.number().int().min(1).max(300).parse(options.agentBusyWaitSeconds);
3093
+ const agentBusyMaxWaits = z.coerce.number().int().min(0).max(120).parse(options.agentBusyMaxWaits);
3094
+ const idlePollSeconds = z.coerce.number().int().min(0).max(300).parse(options.idlePollSeconds);
3095
+ const idleMaxPolls = z.coerce.number().int().min(0).max(10000).parse(options.idleMaxPolls);
3096
+ const parallelExports = z.coerce.number().int().min(1).max(10).parse(options.parallelExports);
3097
+ const result = await runSalesNavigatorFromProductCategoryWorkflow({
3098
+ input: options.input,
3099
+ maxPages,
3100
+ productLimit,
3101
+ titleLimit,
3102
+ maxResultsPerSearch,
3103
+ numberOfProfiles,
3104
+ slicePreset: options.slicePreset,
3105
+ maxSplitDepth,
3106
+ maxSlicesPerTitle,
3107
+ maxRetries,
3108
+ probeProfiles,
3109
+ agentBusyWaitSeconds,
3110
+ agentBusyMaxWaits,
3111
+ idlePollSeconds,
3112
+ idleMaxPolls,
3113
+ parallelExports,
3114
+ skipProductUpload: Boolean(options.skipProductUpload),
3115
+ outPath: options.out,
3116
+ logPath: options.logPath,
3117
+ dryRun: Boolean(options.dryRun || shouldBypassAuth())
3118
+ });
3119
+ printOutput({
3120
+ ...result.payload,
3121
+ out: result.outPath
3122
+ });
3123
+ if (!result.payload.dryRun && result.payload.summary.workflowStatus !== "completed" && !options.allowPartialSuccess) {
3124
+ throw new Error(buildSalesNavigatorWorkflowFailureMessage(result.payload.summary));
3125
+ }
3126
+ });
3127
+ program
3128
+ .command("salesnav:ensure-count")
3129
+ .description("Ensure the workspace has at least the target number of Sales Navigator people rows by importing historical BigQuery windows directly.")
3130
+ .option("--target-count <number>", "Minimum linkedin_sales_nav_people rows to guarantee", "200000")
3131
+ .option("--scope <scope>", "Historical scope: all-sales-people|hr-function-included", "all-sales-people")
3132
+ .option("--org-id <id>", "Workspace org id. Defaults to the active CLI org.")
3133
+ .option("--start-offset <number>", "BigQuery offset override. By default the CLI resumes from prior historical backfill runs.")
3134
+ .option("--window-size <number>", "How many historical contacts to request from BigQuery per window", String(salesNavigatorHistoricalBackfillDefaults.windowSize))
3135
+ .option("--max-windows <number>", "Maximum number of BigQuery windows to import in this invocation", "10")
3136
+ .option("--page-size <number>", "BigQuery page size per API read", String(salesNavigatorHistoricalBackfillDefaults.pageSize))
3137
+ .option("--upsert-batch-size <number>", "Supabase upsert batch size", String(salesNavigatorHistoricalBackfillDefaults.upsertBatchSize))
3138
+ .option("--min-upsert-batch-size <number>", "Smallest batch size allowed after timeout-driven splitting", String(salesNavigatorHistoricalBackfillDefaults.minUpsertBatchSize))
3139
+ .option("--max-upsert-retries <number>", "How many retry rounds to tolerate for timeout-prone writes", String(salesNavigatorHistoricalBackfillDefaults.maxUpsertRetries))
3140
+ .option("--retry-delay-ms <number>", "Base retry delay in milliseconds for write retries", String(salesNavigatorHistoricalBackfillDefaults.retryDelayMs))
3141
+ .option("--out <path>", "Optional local JSON output path")
3142
+ .option("--dry-run", "Preview the historical import plan without touching BigQuery or Supabase", false)
3143
+ .action(async (options) => {
3144
+ const targetCount = z.coerce.number().int().min(1).parse(options.targetCount);
3145
+ const scope = z.enum(["all-sales-people", "hr-function-included"]).parse(options.scope);
3146
+ const explicitStartOffset = typeof options.startOffset === "string" && options.startOffset.trim().length > 0
3147
+ ? z.coerce.number().int().min(0).parse(options.startOffset)
3148
+ : null;
3149
+ const windowSize = z.coerce.number().int().min(1).parse(options.windowSize);
3150
+ const maxWindows = z.coerce.number().int().min(1).max(100).parse(options.maxWindows);
3151
+ const pageSize = z.coerce.number().int().min(1).parse(options.pageSize);
3152
+ const upsertBatchSize = z.coerce.number().int().min(1).parse(options.upsertBatchSize);
3153
+ const minUpsertBatchSize = z.coerce.number().int().min(1).parse(options.minUpsertBatchSize);
3154
+ const maxUpsertRetries = z.coerce.number().int().min(0).parse(options.maxUpsertRetries);
3155
+ const retryDelayMs = z.coerce.number().int().min(0).parse(options.retryDelayMs);
3156
+ if (minUpsertBatchSize > upsertBatchSize) {
3157
+ throw new Error("--min-upsert-batch-size must be less than or equal to --upsert-batch-size.");
3158
+ }
3159
+ if (Boolean(options.dryRun)) {
3160
+ const plan = buildSalesNavigatorHistoricalBackfillPlan({
3161
+ targetCount,
3162
+ currentCount: null,
3163
+ startOffset: explicitStartOffset ?? 0,
3164
+ windowSize,
3165
+ maxWindows
3166
+ });
3167
+ const payload = {
3168
+ status: "ok",
3169
+ dryRun: true,
3170
+ mode: "historical-bigquery-backfill",
3171
+ orgId: options.orgId ?? null,
3172
+ scope,
3173
+ targetCount,
3174
+ resumedFromHistory: false,
3175
+ plan
3176
+ };
3177
+ if (options.out) {
3178
+ await writeJsonFile(options.out, payload);
3179
+ }
3180
+ printOutput(payload);
3181
+ return;
3182
+ }
3183
+ let sessionOrgId = null;
3184
+ if (!shouldBypassAuth()) {
3185
+ const session = await requireAuthSession();
3186
+ sessionOrgId = session.user.orgId ?? null;
3187
+ }
3188
+ const orgId = resolveSalesNavigatorHistoricalBackfillOrgId({
3189
+ explicitOrgId: options.orgId,
3190
+ env: process.env,
3191
+ sessionOrgId
3192
+ });
3193
+ const config = resolveSalesNavigatorHistoricalBackfillConfig(process.env);
3194
+ const supabase = createClient(config.supabaseUrl, config.supabaseServiceRoleKey, {
3195
+ auth: { persistSession: false }
3196
+ });
3197
+ const resumeState = explicitStartOffset === null
3198
+ ? await resolveSalesNavigatorHistoricalBackfillResumeState({
3199
+ supabase,
3200
+ orgId,
3201
+ scope,
3202
+ windowSize,
3203
+ fallbackOffset: 0
3204
+ })
3205
+ : {
3206
+ startOffset: explicitStartOffset,
3207
+ resumedFromHistory: false,
3208
+ matchedHistoryRows: 0,
3209
+ reason: "fallback"
3210
+ };
3211
+ if (resumeState.resumedFromHistory) {
3212
+ writeProgress(`Resuming historical Sales Navigator backfill from offset ${resumeState.startOffset} based on prior CLI runs.`);
3213
+ }
3214
+ else if (explicitStartOffset !== null) {
3215
+ writeProgress(`Using explicit historical Sales Navigator backfill offset ${explicitStartOffset}.`);
3216
+ }
3217
+ const summary = await ensureSalesNavigatorPeopleCount({
3218
+ config,
3219
+ orgId,
3220
+ targetCount,
3221
+ scope,
3222
+ startOffset: resumeState.startOffset,
3223
+ resumedFromHistory: resumeState.resumedFromHistory,
3224
+ windowSize,
3225
+ maxWindows,
3226
+ pageSize,
3227
+ upsertBatchSize,
3228
+ minUpsertBatchSize,
3229
+ maxUpsertRetries,
3230
+ retryDelayMs,
3231
+ onProgress: (event) => {
3232
+ if (event.type === "window-start") {
3233
+ writeProgress(`Starting historical Sales Navigator backfill window ${event.windowIndex + 1}: offset ${event.offset}, limit ${event.limit}.`);
3234
+ return;
3235
+ }
3236
+ if (event.type === "window-progress") {
3237
+ writeProgress(`Historical window ${event.windowIndex + 1}: ${event.processed}/${event.totalResults} rows imported (${event.percent}%).`);
3238
+ return;
3239
+ }
3240
+ writeProgress(`Historical window ${event.windowIndex + 1} complete: count ${event.countBefore} -> ${event.countAfter} (delta ${event.countDelta}).`);
3241
+ }
3242
+ });
3243
+ const payload = {
3244
+ status: summary.status,
3245
+ dryRun: false,
3246
+ mode: "historical-bigquery-backfill",
3247
+ orgId: summary.orgId,
3248
+ scope: summary.scope,
3249
+ targetCount: summary.targetCount,
3250
+ initialCount: summary.initialCount,
3251
+ currentCount: summary.currentCount,
3252
+ resumedFromHistory: summary.resumedFromHistory,
3253
+ startOffset: summary.startOffset,
3254
+ nextOffset: summary.nextOffset,
3255
+ exhausted: summary.exhausted,
3256
+ completedWindows: summary.completedWindows,
3257
+ windows: summary.windows
3258
+ };
3259
+ if (options.out) {
3260
+ await writeJsonFile(options.out, payload);
3261
+ }
3262
+ if (summary.status !== "ok") {
3263
+ throw new Error(`Historical Sales Navigator backfill stopped at ${summary.currentCount} rows before reaching the target ${summary.targetCount}.`);
3264
+ }
3265
+ printOutput(payload);
3266
+ });
1873
3267
  program
1874
3268
  .command("salesnav:crawl")
1875
3269
  .description("Adaptively split broad LinkedIn Sales Navigator people searches into exportable slices and store every finished slice through Salesprompter.")
@@ -1884,7 +3278,12 @@ program
1884
3278
  .option("--probe-profiles <number>", "Profiles to scrape while probing whether a slice is still too broad", "100")
1885
3279
  .option("--agent-busy-wait-seconds <number>", "Seconds to wait before retrying when the export agent is already busy", "30")
1886
3280
  .option("--agent-busy-max-waits <number>", "How many busy-agent waits to tolerate before failing the slice", "20")
3281
+ .option("--idle-poll-seconds <number>", "Seconds to wait before polling durable crawl status when remote slices are still running", "10")
3282
+ .option("--idle-max-polls <number>", "How many no-claim status polls to tolerate before the crawl is considered stalled", "180")
3283
+ .option("--parallel-exports <number>", "How many Sales Navigator slices to export concurrently in this invocation", "3")
3284
+ .option("--allow-partial-success", "Exit 0 even when the durable crawl finishes with failures or remains non-terminal", false)
1887
3285
  .option("--out <path>", "Optional local JSON output path")
3286
+ .option("--log-path <path>", "Optional JSONL log path with timestamps, trace id, and Sales Navigator slice metadata")
1888
3287
  .option("--dry-run", "Preview the adaptive crawl plan without exporting anything", false)
1889
3288
  .action(async (options) => {
1890
3289
  const queryUrl = z.string().url().optional().parse(options.queryUrl);
@@ -1897,7 +3296,30 @@ program
1897
3296
  const probeProfiles = z.coerce.number().int().min(1).max(2500).parse(options.probeProfiles);
1898
3297
  const agentBusyWaitSeconds = z.coerce.number().int().min(1).max(300).parse(options.agentBusyWaitSeconds);
1899
3298
  const agentBusyMaxWaits = z.coerce.number().int().min(0).max(120).parse(options.agentBusyMaxWaits);
3299
+ const idlePollSeconds = z.coerce.number().int().min(0).max(300).parse(options.idlePollSeconds);
3300
+ const idleMaxPolls = z.coerce.number().int().min(0).max(10000).parse(options.idleMaxPolls);
3301
+ const parallelExports = z.coerce.number().int().min(1).max(10).parse(options.parallelExports);
1900
3302
  const effectiveDryRun = Boolean(options.dryRun || shouldBypassAuth());
3303
+ const logger = await createWorkflowLogger({
3304
+ logPath: options.logPath ?? buildSalesNavigatorCrawlLogPath(jobId ?? queryUrl ?? "salesnav-crawl")
3305
+ });
3306
+ await logger.log("salesnav.crawl.command.started", {
3307
+ queryUrl: queryUrl ?? null,
3308
+ jobId: jobId ?? null,
3309
+ maxResultsPerSearch,
3310
+ numberOfProfiles,
3311
+ slicePreset: options.slicePreset,
3312
+ maxSplitDepth,
3313
+ maxSlices,
3314
+ maxRetries,
3315
+ probeProfiles,
3316
+ agentBusyWaitSeconds,
3317
+ agentBusyMaxWaits,
3318
+ idlePollSeconds,
3319
+ idleMaxPolls,
3320
+ parallelExports,
3321
+ dryRun: effectiveDryRun
3322
+ });
1901
3323
  if (effectiveDryRun) {
1902
3324
  if (jobId) {
1903
3325
  throw new Error("--dry-run does not support --job-id. Use --query-url instead.");
@@ -1909,6 +3331,8 @@ program
1909
3331
  status: "ok",
1910
3332
  dryRun: true,
1911
3333
  mode: "adaptive",
3334
+ traceId: logger.traceId,
3335
+ logPath: logger.logPath,
1912
3336
  dimensionPreset: "human-resources-adaptive",
1913
3337
  query: (() => {
1914
3338
  const preview = buildSalesNavigatorCrawlPreview({
@@ -1938,6 +3362,15 @@ program
1938
3362
  };
1939
3363
  })()
1940
3364
  };
3365
+ await logger.log("salesnav.crawl.dry-run.preview", {
3366
+ sourceQueryUrl: payload.query.sourceQueryUrl,
3367
+ root: summarizeSalesNavigatorQuery(payload.query.rootQueryUrl, payload.query.rootAppliedFilters),
3368
+ dimensionOrder: payload.query.dimensionOrder,
3369
+ firstSplitQueries: payload.query.firstSplitQueries.map((attempt) => ({
3370
+ splitTrail: attempt.splitTrail,
3371
+ ...summarizeSalesNavigatorQuery(attempt.slicedQueryUrl, attempt.appliedFilters)
3372
+ }))
3373
+ });
1941
3374
  if (options.out) {
1942
3375
  await writeJsonFile(options.out, payload);
1943
3376
  }
@@ -1962,23 +3395,57 @@ program
1962
3395
  slicePreset: options.slicePreset,
1963
3396
  maxResultsPerSearch,
1964
3397
  numberOfProfiles,
3398
+ rawPayload: {
3399
+ workflow: "salesnav:crawl",
3400
+ traceId: logger.traceId,
3401
+ command: {
3402
+ sourceQueryUrl: queryUrl,
3403
+ slicePreset: options.slicePreset,
3404
+ maxResultsPerSearch,
3405
+ numberOfProfiles,
3406
+ maxSplitDepth,
3407
+ maxSlices,
3408
+ maxRetries,
3409
+ probeProfiles,
3410
+ agentBusyWaitSeconds,
3411
+ agentBusyMaxWaits,
3412
+ idlePollSeconds,
3413
+ idleMaxPolls,
3414
+ parallelExports
3415
+ }
3416
+ },
1965
3417
  rootSlice: {
1966
3418
  slicedQueryUrl: seed.slicedQueryUrl,
1967
3419
  appliedFilters: seed.appliedFilters,
1968
3420
  depth: seed.depth,
1969
- splitTrail: seed.splitTrail
3421
+ splitTrail: seed.splitTrail,
3422
+ rawPayload: {
3423
+ workflow: "salesnav:crawl",
3424
+ traceId: logger.traceId
3425
+ }
1970
3426
  }
1971
- });
3427
+ }, logger.traceId);
1972
3428
  session = created.session;
1973
3429
  createResult = {
1974
3430
  resumed: created.value.resumed,
1975
3431
  job: created.value.job
1976
3432
  };
1977
3433
  resolvedJobId = created.value.job.id;
3434
+ await logger.log("salesnav.crawl.job.ready", {
3435
+ jobId: resolvedJobId,
3436
+ resumed: created.value.resumed,
3437
+ sourceQueryUrl: queryUrl,
3438
+ rootSlice: summarizeSalesNavigatorQuery(seed.slicedQueryUrl, seed.appliedFilters)
3439
+ });
1978
3440
  }
1979
3441
  else {
1980
- const status = await getSalesNavigatorCrawlStatus(session, resolvedJobId);
3442
+ const status = await getSalesNavigatorCrawlStatus(session, resolvedJobId, logger.traceId);
1981
3443
  session = status.session;
3444
+ await logger.log("salesnav.crawl.job.resumed", {
3445
+ jobId: resolvedJobId,
3446
+ sourceQueryUrl: status.value.job.sourceQueryUrl,
3447
+ status: status.value.job.status
3448
+ });
1982
3449
  }
1983
3450
  if (!resolvedJobId) {
1984
3451
  throw new Error("Failed to determine Sales Navigator crawl job id.");
@@ -1989,12 +3456,19 @@ program
1989
3456
  maxRetries,
1990
3457
  probeProfiles,
1991
3458
  agentBusyWaitSeconds,
1992
- agentBusyMaxWaits
3459
+ agentBusyMaxWaits,
3460
+ idlePollSeconds,
3461
+ idleMaxPolls,
3462
+ parallelExports,
3463
+ traceId: logger.traceId,
3464
+ logger
1993
3465
  });
1994
3466
  const payload = {
1995
3467
  status: "ok",
1996
3468
  dryRun: false,
1997
3469
  mode: "durable",
3470
+ traceId: logger.traceId,
3471
+ logPath: logger.logPath,
1998
3472
  jobId: resolvedJobId,
1999
3473
  resumed: createResult?.resumed ?? true,
2000
3474
  sourceQueryUrl: crawl.job.sourceQueryUrl,
@@ -2017,10 +3491,20 @@ program
2017
3491
  : null,
2018
3492
  lastOutcome: crawl.lastOutcome
2019
3493
  };
3494
+ await logger.log("salesnav.crawl.command.completed", {
3495
+ jobId: resolvedJobId,
3496
+ status: crawl.job.status,
3497
+ claimedSlices: crawl.claimedSlices,
3498
+ truncated: crawl.truncated,
3499
+ lastOutcome: crawl.lastOutcome
3500
+ });
2020
3501
  if (options.out) {
2021
3502
  await writeJsonFile(options.out, payload);
2022
3503
  }
2023
3504
  printOutput(payload);
3505
+ if ((crawl.job.status !== "completed" || crawl.truncated) && !options.allowPartialSuccess) {
3506
+ throw new Error(`Sales Navigator crawl did not finish cleanly. status=${crawl.job.status} truncated=${crawl.truncated} failedSlices=${crawl.job.failedSlices} runningSlices=${crawl.job.runningSlices} queuedSlices=${crawl.job.queuedSlices}`);
3507
+ }
2024
3508
  });
2025
3509
  program
2026
3510
  .command("salesnav:crawl:status")
@@ -2081,7 +3565,13 @@ program
2081
3565
  appliedFilters: item.appliedFilters,
2082
3566
  maxResultsPerSearch,
2083
3567
  numberOfProfiles,
2084
- slicePreset: options.slicePreset
3568
+ slicePreset: options.slicePreset,
3569
+ rawPayload: {
3570
+ workflow: "salesnav:export",
3571
+ sourceQueryUrl: item.sourceQueryUrl,
3572
+ slicedQueryUrl: item.slicedQueryUrl,
3573
+ appliedFilters: item.appliedFilters
3574
+ }
2085
3575
  });
2086
3576
  exported.push(result);
2087
3577
  }