salesprompter-cli 0.1.20 → 0.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,7 +1,8 @@
1
1
  #!/usr/bin/env node
2
2
  import { spawn } from "node:child_process";
3
- import { access, appendFile, mkdir } from "node:fs/promises";
3
+ import { access, appendFile, mkdir, readFile, writeFile } from "node:fs/promises";
4
4
  import { createRequire } from "node:module";
5
+ import os from "node:os";
5
6
  import path from "node:path";
6
7
  import { emitKeypressEvents } from "node:readline";
7
8
  import { createInterface } from "node:readline/promises";
@@ -13,12 +14,15 @@ import { clearAuthSession, loginWithBrowserConnect, loginWithDeviceFlow, loginWi
13
14
  import { buildBigQueryLeadLookupSql, executeBigQuerySql, normalizeBigQueryLeadRows, runBigQueryQuery, runBigQueryRows } from "./bigquery.js";
14
15
  import { AccountProfileSchema, EnrichedLeadSchema, IcpSchema, LeadSchema, ScoredLeadSchema, SyncTargetSchema } from "./domain.js";
15
16
  import { auditDomainDecisions, buildDomainfinderBacklogQueries, buildDomainfinderCandidatesSql, buildDomainfinderInputSql, buildDomainfinderWritebackSql, buildExistingDomainRepairSql, buildExistingDomainAuditQueries, compareDomainSelectionStrategies, selectBestDomains } from "./domainfinder.js";
17
+ import { buildDeelSalesNavCsvHeader, buildDeelSalesNavCsvLines, isDeelRelevantSalesNavTitle, normalizeDeelSalesNavRow } from "./deel-salesnav.js";
18
+ import { buildDeelOutreachExportSql, buildDeelOutreachPack, normalizeDeelOutreachRows } from "./deel-outreach.js";
16
19
  import { buildDirectPathLeadExportSql, normalizeDirectPathRows, segmentDirectPathRows } from "./direct-path.js";
17
20
  import { AccountLeadProvider, DryRunSyncProvider, HeuristicCompanyProvider, HeuristicEnrichmentProvider, HeuristicPeopleSearchProvider, HeuristicScoringProvider, RoutedSyncProvider } from "./engine.js";
18
21
  import { analyzeHistoricalQueries } from "./historical-queries.js";
19
22
  import { buildHistoricalVendorIcp, buildVendorIcp } from "./icp-templates.js";
20
23
  import { InstantlySyncProvider } from "./instantly.js";
21
24
  import { crawlLinkedInProductCategory } from "./linkedin-products.js";
25
+ import { claimValidatedSalesNavigatorSessionCookieForCli } from "./linkedin-session.js";
22
26
  import { buildLeadlistsFunnelQueries } from "./leadlists-funnel.js";
23
27
  import { readJsonFile, splitCsv, writeJsonFile, writeTextFile } from "./io.js";
24
28
  import { buildSalesNavigatorCrawlPreview, createSalesNavigatorCrawlSeed, DEFAULT_SALES_NAVIGATOR_CRAWL_DIMENSIONS, buildSalesNavigatorPeopleSlice, deriveSalesNavigatorTitleQuerySeeds, expandSalesNavigatorCrawlAttempt, SalesNavigatorSliceTooBroadError } from "./sales-navigator.js";
@@ -357,6 +361,24 @@ function slugify(value) {
357
361
  .replace(/^-+|-+$/g, "")
358
362
  .replace(/-{2,}/g, "-");
359
363
  }
364
+ function resolveSalesNavigatorSupabaseConfig(env = process.env) {
365
+ const supabaseUrl = env.SALESPROMPTER_SUPABASE_URL?.trim() || env.NEXT_PUBLIC_SUPABASE_URL?.trim() || "";
366
+ const supabaseServiceRoleKey = env.SUPABASE_SERVICE_ROLE_KEY?.trim() || "";
367
+ const missing = [];
368
+ if (supabaseUrl.length === 0) {
369
+ missing.push("SALESPROMPTER_SUPABASE_URL or NEXT_PUBLIC_SUPABASE_URL");
370
+ }
371
+ if (supabaseServiceRoleKey.length === 0) {
372
+ missing.push("SUPABASE_SERVICE_ROLE_KEY");
373
+ }
374
+ if (missing.length > 0) {
375
+ throw new Error(`Missing required environment variables for Sales Navigator Supabase export: ${missing.join(", ")}`);
376
+ }
377
+ return {
378
+ supabaseUrl,
379
+ supabaseServiceRoleKey
380
+ };
381
+ }
360
382
  function normalizeDomainInput(value) {
361
383
  return value
362
384
  .trim()
@@ -439,6 +461,26 @@ function parseCompanyReference(value) {
439
461
  })
440
462
  };
441
463
  }
464
+ function chunkArray(values, size) {
465
+ const chunks = [];
466
+ for (let index = 0; index < values.length; index += size) {
467
+ chunks.push(values.slice(index, index + size));
468
+ }
469
+ return chunks;
470
+ }
471
+ function extractSalesNavContactId(url) {
472
+ if (!url) {
473
+ return null;
474
+ }
475
+ try {
476
+ const parsed = new URL(url);
477
+ const segments = parsed.pathname.split("/").filter((segment) => segment.length > 0);
478
+ return segments.length > 0 ? segments[segments.length - 1] : null;
479
+ }
480
+ catch {
481
+ return null;
482
+ }
483
+ }
442
484
  function writeWizardLine(message = "") {
443
485
  process.stdout.write(`${message}\n`);
444
486
  }
@@ -783,29 +825,10 @@ function summarizeSalesNavigatorQuery(url, appliedFilters) {
783
825
  appliedFilters
784
826
  };
785
827
  }
786
- function extractSalesNavigatorFilterTypes(url, appliedFilters) {
787
- const filterTypes = new Set(appliedFilters.map((filter) => filter.type));
788
- const decodedQuery = decodeSalesNavigatorQueryParam(url) ?? "";
789
- for (const match of decodedQuery.matchAll(/type:([A-Z_]+)/g)) {
790
- const value = match[1]?.trim();
791
- if (value) {
792
- filterTypes.add(value);
793
- }
794
- }
795
- return [...filterTypes];
796
- }
797
828
  function shouldPreSplitSalesNavigatorRootSlice(slice, maxSplitDepth) {
798
- if (slice.depth !== 0 || slice.splitTrail.length > 0) {
799
- return false;
800
- }
801
- if (!nextSalesNavigatorSplitDimension(slice, maxSplitDepth)) {
802
- return false;
803
- }
804
- const filterTypes = new Set(extractSalesNavigatorFilterTypes(slice.slicedQueryUrl, slice.appliedFilters));
805
- if (!filterTypes.has("CURRENT_TITLE")) {
806
- return false;
807
- }
808
- return !DEFAULT_SALES_NAVIGATOR_CRAWL_DIMENSIONS.some((dimension) => filterTypes.has(dimension.filterType));
829
+ void slice;
830
+ void maxSplitDepth;
831
+ return false;
809
832
  }
810
833
  function buildTraceHeaders(traceId) {
811
834
  return traceId ? { "X-Salesprompter-Trace-Id": traceId } : {};
@@ -1632,6 +1655,10 @@ async function runSalesNavigatorCrawlAttempt(session, attempt, options, context)
1632
1655
  if (totalResults === null || totalResults > attempt.maxResultsPerSearch) {
1633
1656
  return probeResult;
1634
1657
  }
1658
+ const splitTriggerResults = Math.min(attempt.maxResultsPerSearch, SALES_NAVIGATOR_SPLIT_TRIGGER_RESULTS);
1659
+ if (totalResults > splitTriggerResults) {
1660
+ throw new SalesNavigatorSliceTooBroadError(`Sales Navigator slice produced ${totalResults} results, exceeding the split trigger of ${splitTriggerResults}.`, { totalResults });
1661
+ }
1635
1662
  return await runSalesNavigatorExportWithAgentWait(session, {
1636
1663
  sourceQueryUrl: attempt.sourceQueryUrl,
1637
1664
  slicedQueryUrl: attempt.slicedQueryUrl,
@@ -1731,10 +1758,112 @@ function nextSalesNavigatorSplitDimension(slice, maxSplitDepth) {
1731
1758
  if (slice.depth >= maxSplitDepth) {
1732
1759
  return null;
1733
1760
  }
1734
- return DEFAULT_SALES_NAVIGATOR_CRAWL_DIMENSIONS[slice.depth] ?? null;
1761
+ const usedDimensionKeys = new Set(slice.splitTrail.map((entry) => entry.key));
1762
+ const orderedDimensions = getLearnedSalesNavigatorDimensionOrder();
1763
+ return orderedDimensions.find((dimension) => !usedDimensionKeys.has(dimension.key)) ?? null;
1735
1764
  }
1736
1765
  const SALES_NAVIGATOR_COOKIE_RETRY_LIMIT = 8;
1737
1766
  const SALES_NAVIGATOR_RESULT_RETRY_LIMIT = 3;
1767
+ const SALES_NAVIGATOR_SPLIT_TRIGGER_RESULTS = 1500;
1768
+ const SALES_NAVIGATOR_FILTER_IMPACT_MIN_OBSERVATIONS = 3;
1769
+ let salesNavigatorFilterImpactModel = null;
1770
+ let salesNavigatorFilterImpactLoaded = false;
1771
+ function getSalesprompterConfigDir() {
1772
+ const override = process.env.SALESPROMPTER_CONFIG_DIR?.trim();
1773
+ if (override !== undefined && override.length > 0) {
1774
+ return override;
1775
+ }
1776
+ return path.join(os.homedir(), ".config", "salesprompter");
1777
+ }
1778
+ function getSalesNavigatorFilterImpactPath() {
1779
+ return path.join(getSalesprompterConfigDir(), "salesnav-filter-impact.json");
1780
+ }
1781
+ async function loadSalesNavigatorFilterImpactModel() {
1782
+ if (salesNavigatorFilterImpactLoaded) {
1783
+ return salesNavigatorFilterImpactModel;
1784
+ }
1785
+ salesNavigatorFilterImpactLoaded = true;
1786
+ const filePath = getSalesNavigatorFilterImpactPath();
1787
+ try {
1788
+ const content = await readFile(filePath, "utf8");
1789
+ const parsed = JSON.parse(content);
1790
+ if (parsed && parsed.version === 1 && parsed.dimensions && typeof parsed.dimensions === "object") {
1791
+ salesNavigatorFilterImpactModel = parsed;
1792
+ }
1793
+ }
1794
+ catch {
1795
+ salesNavigatorFilterImpactModel = null;
1796
+ }
1797
+ return salesNavigatorFilterImpactModel;
1798
+ }
1799
+ async function persistSalesNavigatorFilterImpactModel() {
1800
+ if (!salesNavigatorFilterImpactModel) {
1801
+ return;
1802
+ }
1803
+ const filePath = getSalesNavigatorFilterImpactPath();
1804
+ await mkdir(path.dirname(filePath), { recursive: true });
1805
+ await writeFile(filePath, `${JSON.stringify(salesNavigatorFilterImpactModel, null, 2)}\n`, "utf8");
1806
+ }
1807
+ function getLearnedSalesNavigatorDimensionOrder() {
1808
+ const model = salesNavigatorFilterImpactModel;
1809
+ if (!model) {
1810
+ return DEFAULT_SALES_NAVIGATOR_CRAWL_DIMENSIONS;
1811
+ }
1812
+ const defaultIndex = new Map(DEFAULT_SALES_NAVIGATOR_CRAWL_DIMENSIONS.map((dimension, index) => [dimension.key, index]));
1813
+ return [...DEFAULT_SALES_NAVIGATOR_CRAWL_DIMENSIONS].sort((left, right) => {
1814
+ const leftStats = model.dimensions[left.key];
1815
+ const rightStats = model.dimensions[right.key];
1816
+ const leftReliable = (leftStats?.observations ?? 0) >= SALES_NAVIGATOR_FILTER_IMPACT_MIN_OBSERVATIONS;
1817
+ const rightReliable = (rightStats?.observations ?? 0) >= SALES_NAVIGATOR_FILTER_IMPACT_MIN_OBSERVATIONS;
1818
+ if (leftReliable && rightReliable) {
1819
+ const delta = (leftStats?.avgResults ?? Number.POSITIVE_INFINITY) -
1820
+ (rightStats?.avgResults ?? Number.POSITIVE_INFINITY);
1821
+ if (delta !== 0) {
1822
+ return delta;
1823
+ }
1824
+ }
1825
+ else if (leftReliable !== rightReliable) {
1826
+ return leftReliable ? -1 : 1;
1827
+ }
1828
+ return (defaultIndex.get(left.key) ?? 0) - (defaultIndex.get(right.key) ?? 0);
1829
+ });
1830
+ }
1831
+ async function recordSalesNavigatorFilterImpactObservation(slice, totalResults, options) {
1832
+ if (totalResults === null || totalResults === undefined || !Number.isFinite(totalResults)) {
1833
+ return;
1834
+ }
1835
+ const learnedDimension = slice.splitTrail.at(-1)?.key ?? null;
1836
+ if (!learnedDimension) {
1837
+ return;
1838
+ }
1839
+ await loadSalesNavigatorFilterImpactModel();
1840
+ if (!salesNavigatorFilterImpactModel) {
1841
+ salesNavigatorFilterImpactModel = {
1842
+ version: 1,
1843
+ updatedAt: new Date().toISOString(),
1844
+ dimensions: {}
1845
+ };
1846
+ }
1847
+ const previous = salesNavigatorFilterImpactModel.dimensions[learnedDimension];
1848
+ const observations = (previous?.observations ?? 0) + 1;
1849
+ const sumResults = (previous?.sumResults ?? 0) + totalResults;
1850
+ const avgResults = sumResults / observations;
1851
+ salesNavigatorFilterImpactModel.dimensions[learnedDimension] = {
1852
+ observations,
1853
+ sumResults,
1854
+ avgResults,
1855
+ lastObservedAt: new Date().toISOString()
1856
+ };
1857
+ salesNavigatorFilterImpactModel.updatedAt = new Date().toISOString();
1858
+ await persistSalesNavigatorFilterImpactModel();
1859
+ await options?.logger?.log("salesnav.filter_impact.updated", {
1860
+ dimensionKey: learnedDimension,
1861
+ observations,
1862
+ avgResults,
1863
+ totalResults,
1864
+ outcome: options?.outcome ?? null
1865
+ });
1866
+ }
1738
1867
  function buildSalesNavigatorSplitChildren(slice, dimension) {
1739
1868
  const attempt = buildSalesNavigatorCrawlAttemptFromClaimedSlice(slice);
1740
1869
  return expandSalesNavigatorCrawlAttempt(attempt, dimension).map((child) => ({
@@ -1812,6 +1941,42 @@ function buildSalesNavigatorSliceFailureReport(slice, error, options) {
1812
1941
  function formatSalesNavigatorSplitTrail(splitTrail) {
1813
1942
  return splitTrail.map((entry) => `${entry.key}:${entry.value.text}`);
1814
1943
  }
1944
+ async function ensureSalesNavigatorSessionPoolReady(queryUrl, options) {
1945
+ try {
1946
+ await options.logger?.log("salesnav.session_pool.preflight.started", {
1947
+ source: options.source,
1948
+ queryUrl
1949
+ });
1950
+ const claimed = await claimValidatedSalesNavigatorSessionCookieForCli({
1951
+ queryUrl,
1952
+ source: options.source,
1953
+ env: process.env
1954
+ });
1955
+ await options.logger?.log("salesnav.session_pool.preflight.completed", {
1956
+ source: options.source,
1957
+ queryUrl,
1958
+ status: claimed ? "ok" : "skipped",
1959
+ selectedSessionUserEmail: claimed?.userEmail ?? null,
1960
+ selectedSessionUserHandle: claimed?.userHandle ?? null,
1961
+ selectedSessionCookieSha256: claimed?.sessionCookieSha256 ?? null
1962
+ });
1963
+ return {
1964
+ ready: true
1965
+ };
1966
+ }
1967
+ catch (error) {
1968
+ const message = error instanceof Error ? error.message : String(error);
1969
+ await options.logger?.log("salesnav.session_pool.preflight.failed", {
1970
+ source: options.source,
1971
+ queryUrl,
1972
+ error: message
1973
+ });
1974
+ return {
1975
+ ready: false,
1976
+ error: message
1977
+ };
1978
+ }
1979
+ }
1815
1980
  async function processSalesNavigatorClaimedCrawlSlice(session, jobId, slice, options) {
1816
1981
  let currentSession = session;
1817
1982
  await options.logger?.log("salesnav.crawl.slice.claimed", {
@@ -1869,7 +2034,8 @@ async function processSalesNavigatorClaimedCrawlSlice(session, jobId, slice, opt
1869
2034
  error: `Pre-split by ${nextDimension.key}`,
1870
2035
  errorCode: "presplit_root_title_query",
1871
2036
  totalResults: null
1872
- }
2037
+ },
2038
+ forceSessionPoolRecheck: false
1873
2039
  };
1874
2040
  }
1875
2041
  }
@@ -1909,6 +2075,10 @@ async function processSalesNavigatorClaimedCrawlSlice(session, jobId, slice, opt
1909
2075
  })
1910
2076
  }, options.traceId);
1911
2077
  currentSession = reported.session;
2078
+ await recordSalesNavigatorFilterImpactObservation(slice, result.totalResults ?? null, {
2079
+ logger: options.logger,
2080
+ outcome: "exported"
2081
+ });
1912
2082
  await options.logger?.log("salesnav.crawl.slice.exported", {
1913
2083
  jobId,
1914
2084
  sliceId: slice.id,
@@ -1927,7 +2097,8 @@ async function processSalesNavigatorClaimedCrawlSlice(session, jobId, slice, opt
1927
2097
  outcome: "exported",
1928
2098
  runId: result.runId,
1929
2099
  totalResults: result.totalResults ?? null
1930
- }
2100
+ },
2101
+ forceSessionPoolRecheck: false
1931
2102
  };
1932
2103
  }
1933
2104
  catch (error) {
@@ -1956,6 +2127,10 @@ async function processSalesNavigatorClaimedCrawlSlice(session, jobId, slice, opt
1956
2127
  });
1957
2128
  const reported = await reportSalesNavigatorCrawlSlice(currentSession, jobId, payload, options.traceId);
1958
2129
  currentSession = reported.session;
2130
+ await recordSalesNavigatorFilterImpactObservation(slice, payload.totalResults ?? null, {
2131
+ logger: options.logger,
2132
+ outcome: payload.outcome
2133
+ });
1959
2134
  await options.logger?.log("salesnav.crawl.slice.reported", {
1960
2135
  jobId,
1961
2136
  sliceId: slice.id,
@@ -1976,11 +2151,13 @@ async function processSalesNavigatorClaimedCrawlSlice(session, jobId, slice, opt
1976
2151
  error: payload.error,
1977
2152
  errorCode: payload.errorCode,
1978
2153
  totalResults: payload.totalResults
1979
- }
2154
+ },
2155
+ forceSessionPoolRecheck: payload.errorCode === "invalid_session"
1980
2156
  };
1981
2157
  }
1982
2158
  }
1983
2159
  async function executeSalesNavigatorCrawlJob(session, jobId, options) {
2160
+ await loadSalesNavigatorFilterImpactModel();
1984
2161
  let currentSession = session;
1985
2162
  let claimedSlices = 0;
1986
2163
  const seenSliceIds = new Set();
@@ -1992,11 +2169,56 @@ async function executeSalesNavigatorCrawlJob(session, jobId, options) {
1992
2169
  const inFlight = new Map();
1993
2170
  let nextSlot = 0;
1994
2171
  let noMoreClaimableWork = false;
2172
+ let sessionPoolFailures = 0;
2173
+ let nextSessionPoolRetryAt = 0;
2174
+ let lastSessionPoolReadyAt = 0;
2175
+ const sessionPoolReadinessCooldownMs = 120_000;
1995
2176
  while (true) {
1996
2177
  while (!noMoreClaimableWork && inFlight.size < parallelExports) {
1997
2178
  if (claimedSlices >= options.maxSlices) {
1998
2179
  break;
1999
2180
  }
2181
+ if (inFlight.size === 0) {
2182
+ const now = Date.now();
2183
+ if (now < nextSessionPoolRetryAt) {
2184
+ await delay(Math.max(0, nextSessionPoolRetryAt - now));
2185
+ continue;
2186
+ }
2187
+ if (now - lastSessionPoolReadyAt >= sessionPoolReadinessCooldownMs) {
2188
+ const readiness = await ensureSalesNavigatorSessionPoolReady(job?.sourceQueryUrl ?? "https://www.linkedin.com/sales/search/people", {
2189
+ logger: options.logger,
2190
+ source: "cli_salesnav_crawl_preflight"
2191
+ });
2192
+ if (!readiness.ready) {
2193
+ sessionPoolFailures += 1;
2194
+ idlePollCount += 1;
2195
+ const waitSeconds = Math.min(120, 10 * Math.max(1, sessionPoolFailures));
2196
+ nextSessionPoolRetryAt = Date.now() + waitSeconds * 1000;
2197
+ await options.logger?.log("salesnav.crawl.session_pool.waiting", {
2198
+ jobId,
2199
+ idlePollCount,
2200
+ idleMaxPolls: options.idleMaxPolls,
2201
+ sessionPoolFailures,
2202
+ waitSeconds,
2203
+ error: readiness.error
2204
+ });
2205
+ if (idlePollCount >= options.idleMaxPolls) {
2206
+ lastOutcome = {
2207
+ outcome: "terminal_failed",
2208
+ error: readiness.error ??
2209
+ `Sales Navigator session pool stayed unavailable for ${options.idleMaxPolls} checks.`,
2210
+ errorCode: "blocked_no_valid_salesnav_session"
2211
+ };
2212
+ noMoreClaimableWork = true;
2213
+ break;
2214
+ }
2215
+ continue;
2216
+ }
2217
+ sessionPoolFailures = 0;
2218
+ nextSessionPoolRetryAt = 0;
2219
+ lastSessionPoolReadyAt = Date.now();
2220
+ }
2221
+ }
2000
2222
  const claimed = await claimNextSalesNavigatorCrawlSlice(currentSession, jobId, options.traceId);
2001
2223
  currentSession = claimed.session;
2002
2224
  job = claimed.value.job;
@@ -2089,6 +2311,10 @@ async function executeSalesNavigatorCrawlJob(session, jobId, options) {
2089
2311
  job = completed.value.job;
2090
2312
  activeSlice = completed.value.activeSlice;
2091
2313
  lastOutcome = completed.value.lastOutcome;
2314
+ if (completed.value.forceSessionPoolRecheck) {
2315
+ lastSessionPoolReadyAt = 0;
2316
+ nextSessionPoolRetryAt = 0;
2317
+ }
2092
2318
  }
2093
2319
  if (!job) {
2094
2320
  const status = await getSalesNavigatorCrawlStatus(currentSession, jobId, options.traceId);
@@ -3078,6 +3304,257 @@ program
3078
3304
  }
3079
3305
  printOutput(payload);
3080
3306
  });
3307
+ program
3308
+ .command("salesnav:deel-locale-export")
3309
+ .description("Export the Supabase Sales Navigator Deel corpus into German-vs-English outreach backlog files.")
3310
+ .option("--org-id <id>", "Workspace org id. Defaults to the active CLI org.")
3311
+ .option("--limit <number>", "Maximum number of Supabase rows to process", "250000")
3312
+ .option("--page-size <number>", "Supabase page size per request", "1000")
3313
+ .option("--title-filter <mode>", "deel-hr|all", "deel-hr")
3314
+ .requiredOption("--out-dir <path>", "Output directory for summary and locale CSV files")
3315
+ .action(async (options) => {
3316
+ const limit = z.coerce.number().int().min(1).max(500000).parse(options.limit);
3317
+ const pageSize = z.coerce.number().int().min(1).max(1000).parse(options.pageSize);
3318
+ const titleFilter = z.enum(["deel-hr", "all"]).parse(options.titleFilter);
3319
+ let sessionOrgId = null;
3320
+ if (!shouldBypassAuth()) {
3321
+ const session = await requireAuthSession();
3322
+ sessionOrgId = session.user.orgId ?? null;
3323
+ }
3324
+ const orgId = resolveSalesNavigatorHistoricalBackfillOrgId({
3325
+ explicitOrgId: options.orgId,
3326
+ env: process.env,
3327
+ sessionOrgId
3328
+ });
3329
+ const config = resolveSalesNavigatorSupabaseConfig(process.env);
3330
+ const supabase = createClient(config.supabaseUrl, config.supabaseServiceRoleKey, {
3331
+ auth: { persistSession: false }
3332
+ });
3333
+ const countResponse = await supabase
3334
+ .from("linkedin_sales_nav_people")
3335
+ .select("id", { count: "exact", head: true })
3336
+ .eq("org_id", orgId);
3337
+ if (countResponse.error) {
3338
+ throw new Error(`Failed to count linkedin_sales_nav_people rows: ${countResponse.error.message}`);
3339
+ }
3340
+ const totalInOrg = countResponse.count ?? 0;
3341
+ const totalToProcess = Math.min(totalInOrg, limit);
3342
+ const baseSlug = `deel-salesnav-${slugify(orgId) || "workspace"}`;
3343
+ const deCsvPath = path.join(options.outDir, `${baseSlug}-de.csv`);
3344
+ const enCsvPath = path.join(options.outDir, `${baseSlug}-en.csv`);
3345
+ const summaryPath = path.join(options.outDir, `${baseSlug}-summary.json`);
3346
+ const samplesPath = path.join(options.outDir, `${baseSlug}-samples.json`);
3347
+ await mkdir(options.outDir, { recursive: true });
3348
+ await writeFile(deCsvPath, `${buildDeelSalesNavCsvHeader()}\n`, "utf8");
3349
+ await writeFile(enCsvPath, `${buildDeelSalesNavCsvHeader()}\n`, "utf8");
3350
+ const localeCounts = { de: 0, en: 0 };
3351
+ let titleMatchedCount = 0;
3352
+ let titleFilteredOutCount = 0;
3353
+ const fieldCounts = {
3354
+ firstName: 0,
3355
+ lastName: 0,
3356
+ fullName: 0,
3357
+ companyName: 0,
3358
+ companyNameCleaned: 0,
3359
+ preferredProfileUrl: 0,
3360
+ linkedinProfileUrl: 0,
3361
+ companyLinkedInHandle: 0,
3362
+ location: 0,
3363
+ companyLocation: 0,
3364
+ searchQuery: 0
3365
+ };
3366
+ const signalFieldCounts = {
3367
+ location: 0,
3368
+ companyLocation: 0,
3369
+ searchQuery: 0,
3370
+ none: 0
3371
+ };
3372
+ const titleCounts = new Map();
3373
+ const samples = {
3374
+ de: [],
3375
+ en: []
3376
+ };
3377
+ const selectFields = [
3378
+ "id",
3379
+ "org_id",
3380
+ "run_id",
3381
+ "sales_nav_profile_url",
3382
+ "linkedin_profile_url",
3383
+ "default_profile_url",
3384
+ "full_name",
3385
+ "first_name",
3386
+ "last_name",
3387
+ "company_name",
3388
+ "company_url",
3389
+ "regular_company_url",
3390
+ "title",
3391
+ "industry",
3392
+ "location",
3393
+ "company_location",
3394
+ "search_query",
3395
+ "scraped_at"
3396
+ ].join(", ");
3397
+ let processed = 0;
3398
+ let lastSeenId = null;
3399
+ while (processed < totalToProcess) {
3400
+ let query = supabase
3401
+ .from("linkedin_sales_nav_people")
3402
+ .select(selectFields)
3403
+ .eq("org_id", orgId)
3404
+ .order("id", { ascending: true })
3405
+ .limit(Math.min(pageSize, totalToProcess - processed));
3406
+ if (lastSeenId) {
3407
+ query = query.gt("id", lastSeenId);
3408
+ }
3409
+ const response = await query;
3410
+ if (response.error) {
3411
+ throw new Error(`Failed to read linkedin_sales_nav_people rows after ${lastSeenId ?? "start"}: ${response.error.message}`);
3412
+ }
3413
+ const pageRows = (response.data ?? []);
3414
+ if (pageRows.length === 0) {
3415
+ break;
3416
+ }
3417
+ const relevantRows = pageRows.filter((row) => {
3418
+ if (titleFilter === "all") {
3419
+ titleMatchedCount += 1;
3420
+ return true;
3421
+ }
3422
+ const matches = isDeelRelevantSalesNavTitle(row.title);
3423
+ if (matches) {
3424
+ titleMatchedCount += 1;
3425
+ return true;
3426
+ }
3427
+ titleFilteredOutCount += 1;
3428
+ return false;
3429
+ });
3430
+ const preparedRows = relevantRows.map((row) => normalizeDeelSalesNavRow(row));
3431
+ const deRows = preparedRows.filter((row) => row.language === "de");
3432
+ const enRows = preparedRows.filter((row) => row.language === "en");
3433
+ if (deRows.length > 0) {
3434
+ await appendFile(deCsvPath, `${buildDeelSalesNavCsvLines(deRows)}\n`, "utf8");
3435
+ }
3436
+ if (enRows.length > 0) {
3437
+ await appendFile(enCsvPath, `${buildDeelSalesNavCsvLines(enRows)}\n`, "utf8");
3438
+ }
3439
+ for (const row of preparedRows) {
3440
+ localeCounts[row.language] += 1;
3441
+ if (row.signalFields.length === 0) {
3442
+ signalFieldCounts.none += 1;
3443
+ }
3444
+ else {
3445
+ for (const field of row.signalFields) {
3446
+ if (field === "location") {
3447
+ signalFieldCounts.location += 1;
3448
+ }
3449
+ else if (field === "companyLocation") {
3450
+ signalFieldCounts.companyLocation += 1;
3451
+ }
3452
+ else if (field === "searchQuery") {
3453
+ signalFieldCounts.searchQuery += 1;
3454
+ }
3455
+ }
3456
+ }
3457
+ if (row.firstName)
3458
+ fieldCounts.firstName += 1;
3459
+ if (row.lastName)
3460
+ fieldCounts.lastName += 1;
3461
+ if (row.fullName)
3462
+ fieldCounts.fullName += 1;
3463
+ if (row.companyName)
3464
+ fieldCounts.companyName += 1;
3465
+ if (row.companyNameCleaned)
3466
+ fieldCounts.companyNameCleaned += 1;
3467
+ if (row.preferredProfileUrl)
3468
+ fieldCounts.preferredProfileUrl += 1;
3469
+ if (row.linkedinProfileUrl)
3470
+ fieldCounts.linkedinProfileUrl += 1;
3471
+ if (row.companyLinkedInHandle)
3472
+ fieldCounts.companyLinkedInHandle += 1;
3473
+ if (row.location)
3474
+ fieldCounts.location += 1;
3475
+ if (row.companyLocation)
3476
+ fieldCounts.companyLocation += 1;
3477
+ if (row.searchQuery)
3478
+ fieldCounts.searchQuery += 1;
3479
+ if (row.title) {
3480
+ titleCounts.set(row.title, (titleCounts.get(row.title) ?? 0) + 1);
3481
+ }
3482
+ if (samples[row.language].length < 25) {
3483
+ samples[row.language].push(row);
3484
+ }
3485
+ }
3486
+ processed += pageRows.length;
3487
+ lastSeenId = pageRows[pageRows.length - 1]?.id ?? lastSeenId;
3488
+ const completedPages = Math.ceil(processed / pageSize);
3489
+ if (completedPages === 1 || processed === totalToProcess || completedPages % 10 === 0) {
3490
+ writeProgress(`Processed ${processed}/${totalToProcess} Deel Sales Navigator rows for org ${orgId}; kept ${titleMatchedCount} after ${titleFilter} title filtering.`);
3491
+ }
3492
+ }
3493
+ const keptTotal = localeCounts.de + localeCounts.en;
3494
+ const percentage = (count, base = keptTotal) => base > 0 ? Number(((count / base) * 100).toFixed(2)) : 0;
3495
+ const payload = {
3496
+ status: "ok",
3497
+ vendor: "deel",
3498
+ source: "salesnav-supabase",
3499
+ recommendedRouting: "separate-campaigns-by-language",
3500
+ orgId,
3501
+ totalInOrg,
3502
+ scanned: processed,
3503
+ titleFilter,
3504
+ keptAfterTitleFilter: keptTotal,
3505
+ titleMatchedCount,
3506
+ titleFilteredOutCount,
3507
+ truncatedByLimit: totalInOrg > processed,
3508
+ localeCounts,
3509
+ localePercentages: {
3510
+ de: percentage(localeCounts.de),
3511
+ en: percentage(localeCounts.en)
3512
+ },
3513
+ fieldCoverage: {
3514
+ firstName: { count: fieldCounts.firstName, percentage: percentage(fieldCounts.firstName) },
3515
+ lastName: { count: fieldCounts.lastName, percentage: percentage(fieldCounts.lastName) },
3516
+ fullName: { count: fieldCounts.fullName, percentage: percentage(fieldCounts.fullName) },
3517
+ companyName: { count: fieldCounts.companyName, percentage: percentage(fieldCounts.companyName) },
3518
+ companyNameCleaned: {
3519
+ count: fieldCounts.companyNameCleaned,
3520
+ percentage: percentage(fieldCounts.companyNameCleaned)
3521
+ },
3522
+ preferredProfileUrl: {
3523
+ count: fieldCounts.preferredProfileUrl,
3524
+ percentage: percentage(fieldCounts.preferredProfileUrl)
3525
+ },
3526
+ linkedinProfileUrl: {
3527
+ count: fieldCounts.linkedinProfileUrl,
3528
+ percentage: percentage(fieldCounts.linkedinProfileUrl)
3529
+ },
3530
+ companyLinkedInHandle: {
3531
+ count: fieldCounts.companyLinkedInHandle,
3532
+ percentage: percentage(fieldCounts.companyLinkedInHandle)
3533
+ },
3534
+ location: { count: fieldCounts.location, percentage: percentage(fieldCounts.location) },
3535
+ companyLocation: {
3536
+ count: fieldCounts.companyLocation,
3537
+ percentage: percentage(fieldCounts.companyLocation)
3538
+ },
3539
+ searchQuery: { count: fieldCounts.searchQuery, percentage: percentage(fieldCounts.searchQuery) }
3540
+ },
3541
+ signalFieldCounts,
3542
+ topTitles: [...titleCounts.entries()].sort((a, b) => b[1] - a[1]).slice(0, 20),
3543
+ campaignRecommendation: {
3544
+ de: "Only rows with clear DACH signals should enter the German Deel campaign.",
3545
+ en: "Route everything else to a separate English Deel campaign. English is the safer fallback for ambiguous locales."
3546
+ },
3547
+ files: {
3548
+ deCsv: deCsvPath,
3549
+ enCsv: enCsvPath,
3550
+ summary: summaryPath,
3551
+ samples: samplesPath
3552
+ }
3553
+ };
3554
+ await writeJsonFile(summaryPath, payload);
3555
+ await writeJsonFile(samplesPath, samples);
3556
+ printOutput(payload);
3557
+ });
3081
3558
  program
3082
3559
  .command("salesnav:crawl")
3083
3560
  .description("Adaptively split broad LinkedIn Sales Navigator people searches into exportable slices and store every finished slice through Salesprompter.")
@@ -3547,6 +4024,94 @@ program
3547
4024
  sqlOut: options.sqlOut ?? null
3548
4025
  });
3549
4026
  });
4027
+ program
4028
+ .command("leadlists:deel-outreach:bq")
4029
+ .description("Build Instantly-ready Deel outreach batches from leadPool_new with lead-list provenance, split into German vs English.")
4030
+ .option("--market <market>", "global|europe|dach", "global")
4031
+ .option("--limit <number>", "Max rows to export", "200000")
4032
+ .option("--min-email-score <number>", "Minimum email score to keep", "70")
4033
+ .requiredOption("--out-dir <path>", "Output directory for raw rows, packs, and locale batches")
4034
+ .option("--sql-out <path>", "Optional file path for the generated SQL")
4035
+ .option("--campaign-id <id>", "Fallback Instantly campaign id for all locales")
4036
+ .option("--campaign-id-de <id>", "Instantly campaign id for German/DACH leads")
4037
+ .option("--campaign-id-en <id>", "Instantly campaign id for English/non-DACH leads")
4038
+ .option("--apply", "Create leads in Instantly instead of export-only mode", false)
4039
+ .option("--allow-duplicates", "Do not skip emails already present in the Instantly campaign", false)
4040
+ .action(async (options) => {
4041
+ const market = z.enum(["global", "europe", "dach"]).parse(options.market);
4042
+ const limit = z.coerce.number().int().min(1).max(500000).parse(options.limit);
4043
+ const minEmailScore = z.coerce.number().int().min(0).max(100).parse(options.minEmailScore);
4044
+ const sql = buildDeelOutreachExportSql({ market, limit, minEmailScore });
4045
+ if (options.sqlOut) {
4046
+ await writeTextFile(options.sqlOut, `${sql}\n`);
4047
+ }
4048
+ const rows = await runBigQueryRows(sql, { maxRows: limit });
4049
+ const normalizedRows = normalizeDeelOutreachRows(rows);
4050
+ const pack = buildDeelOutreachPack(market, normalizedRows);
4051
+ const baseSlug = `deel-outreach-${market}`;
4052
+ const rawPath = path.join(options.outDir, `${baseSlug}-raw.json`);
4053
+ const packPath = path.join(options.outDir, `${baseSlug}-pack.json`);
4054
+ const allPath = path.join(options.outDir, `${baseSlug}-all.json`);
4055
+ const dePath = path.join(options.outDir, `${baseSlug}-de.json`);
4056
+ const enPath = path.join(options.outDir, `${baseSlug}-en.json`);
4057
+ await writeJsonFile(rawPath, normalizedRows);
4058
+ await writeJsonFile(packPath, pack);
4059
+ await writeJsonFile(allPath, [...pack.locales.de, ...pack.locales.en]);
4060
+ await writeJsonFile(dePath, pack.locales.de);
4061
+ await writeJsonFile(enPath, pack.locales.en);
4062
+ const syncResults = [];
4063
+ const routes = [
4064
+ {
4065
+ locale: "de",
4066
+ campaignId: options.campaignIdDe ?? options.campaignId,
4067
+ leads: pack.locales.de
4068
+ },
4069
+ {
4070
+ locale: "en",
4071
+ campaignId: options.campaignIdEn ?? options.campaignId,
4072
+ leads: pack.locales.en
4073
+ }
4074
+ ];
4075
+ for (const route of routes) {
4076
+ if (!route.campaignId || route.leads.length === 0) {
4077
+ continue;
4078
+ }
4079
+ const result = await syncProvider.sync("instantly", route.leads, {
4080
+ apply: Boolean(options.apply),
4081
+ instantlyCampaignId: route.campaignId,
4082
+ allowDuplicates: Boolean(options.allowDuplicates)
4083
+ });
4084
+ syncResults.push({
4085
+ locale: route.locale,
4086
+ campaignId: route.campaignId,
4087
+ synced: result.synced,
4088
+ skipped: result.skipped ?? 0,
4089
+ dryRun: result.dryRun,
4090
+ provider: result.provider ?? "instantly"
4091
+ });
4092
+ }
4093
+ printOutput({
4094
+ status: "ok",
4095
+ vendor: "deel",
4096
+ market,
4097
+ limit,
4098
+ minEmailScore,
4099
+ rowCount: normalizedRows.length,
4100
+ hitLimit: normalizedRows.length === limit,
4101
+ localeCounts: pack.summary.localeCounts,
4102
+ segmentCounts: pack.summary.segmentCounts,
4103
+ averageEmailScoreByLocale: pack.summary.averageEmailScoreByLocale,
4104
+ recommendedRouting: "separate-campaigns-by-language",
4105
+ outDir: options.outDir,
4106
+ raw: rawPath,
4107
+ pack: packPath,
4108
+ all: allPath,
4109
+ german: dePath,
4110
+ english: enPath,
4111
+ syncResults,
4112
+ sqlOut: options.sqlOut ?? null
4113
+ });
4114
+ });
3550
4115
  program
3551
4116
  .command("leadlists:funnel:bq")
3552
4117
  .description("Build an upstream lead-list funnel report for a vendor/market.")