salesprompter-cli 0.1.19 → 0.1.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,710 @@
1
+ import { randomUUID } from "node:crypto";
2
+ import { setTimeout as delay } from "node:timers/promises";
3
+ import { BigQuery } from "@google-cloud/bigquery";
4
+ import { createClient } from "@supabase/supabase-js";
5
+ const DEFAULT_WINDOW_SIZE = 100_000;
6
+ const DEFAULT_PAGE_SIZE = 500;
7
+ const DEFAULT_UPSERT_BATCH_SIZE = 50;
8
+ const DEFAULT_MIN_UPSERT_BATCH_SIZE = 10;
9
+ const DEFAULT_MAX_UPSERT_RETRIES = 8;
10
+ const DEFAULT_RETRY_DELAY_MS = 1_000;
11
+ const DEFAULT_MAX_COUNT_RETRIES = 5;
12
+ const DEFAULT_COUNT_RETRY_DELAY_MS = 5_000;
13
+ const HISTORICAL_BACKFILL_SOURCE_QUERY_URL = "bigquery://SalesPrompter/salesnav-people";
14
+ const HISTORICAL_BACKFILL_SLICE_PRESET = "historical-bigquery-salesnav-backfill";
15
+ const HISTORICAL_BACKFILL_AGENT_ID = "historical-bigquery-backfill";
16
+ function toFiniteInteger(value) {
17
+ if (typeof value === "number" && Number.isFinite(value)) {
18
+ return Math.trunc(value);
19
+ }
20
+ if (typeof value === "string") {
21
+ const trimmed = value.trim();
22
+ if (trimmed.length === 0) {
23
+ return null;
24
+ }
25
+ const parsed = Number(trimmed);
26
+ return Number.isFinite(parsed) ? Math.trunc(parsed) : null;
27
+ }
28
+ return null;
29
+ }
30
+ export function resolveSalesNavigatorHistoricalBackfillConfig(env = process.env) {
31
+ const supabaseUrl = env.SALESPROMPTER_SUPABASE_URL?.trim() || env.NEXT_PUBLIC_SUPABASE_URL?.trim() || "";
32
+ const supabaseServiceRoleKey = env.SUPABASE_SERVICE_ROLE_KEY?.trim() || "";
33
+ const rawServiceAccountKey = env.GOOGLE_SERVICE_ACCOUNT_KEY?.trim() || "";
34
+ const bigQueryProjectId = env.BIGQUERY_PROJECT_ID?.trim() || "";
35
+ const missing = [];
36
+ if (supabaseUrl.length === 0) {
37
+ missing.push("SALESPROMPTER_SUPABASE_URL or NEXT_PUBLIC_SUPABASE_URL");
38
+ }
39
+ if (supabaseServiceRoleKey.length === 0) {
40
+ missing.push("SUPABASE_SERVICE_ROLE_KEY");
41
+ }
42
+ if (rawServiceAccountKey.length === 0) {
43
+ missing.push("GOOGLE_SERVICE_ACCOUNT_KEY");
44
+ }
45
+ if (missing.length > 0) {
46
+ throw new Error(`Missing required environment variables for historical Sales Navigator backfill: ${missing.join(", ")}`);
47
+ }
48
+ let bigQueryCredentials;
49
+ try {
50
+ bigQueryCredentials = JSON.parse(rawServiceAccountKey.replace(/\\n$/, ""));
51
+ }
52
+ catch (error) {
53
+ const message = error instanceof Error ? error.message : String(error);
54
+ throw new Error(`GOOGLE_SERVICE_ACCOUNT_KEY is not valid JSON: ${message}`);
55
+ }
56
+ const projectIdFromCredentials = typeof bigQueryCredentials.project_id === "string"
57
+ ? bigQueryCredentials.project_id
58
+ : "";
59
+ const resolvedProjectId = bigQueryProjectId || projectIdFromCredentials;
60
+ if (resolvedProjectId.length === 0) {
61
+ throw new Error("BIGQUERY_PROJECT_ID is required when GOOGLE_SERVICE_ACCOUNT_KEY does not contain project_id.");
62
+ }
63
+ return {
64
+ supabaseUrl,
65
+ supabaseServiceRoleKey,
66
+ bigQueryProjectId: resolvedProjectId,
67
+ bigQueryCredentials
68
+ };
69
+ }
70
+ export function resolveSalesNavigatorHistoricalBackfillOrgId(options) {
71
+ const explicitOrgId = options.explicitOrgId?.trim();
72
+ if (explicitOrgId) {
73
+ return explicitOrgId;
74
+ }
75
+ const envOrgId = options.env?.SALESPROMPTER_ORG_ID?.trim();
76
+ if (envOrgId) {
77
+ return envOrgId;
78
+ }
79
+ const sessionOrgId = options.sessionOrgId?.trim();
80
+ if (sessionOrgId) {
81
+ return sessionOrgId;
82
+ }
83
+ throw new Error("Sales Navigator historical backfill requires --org-id, SALESPROMPTER_ORG_ID, or an authenticated CLI session with an active org.");
84
+ }
85
+ export function buildSalesNavigatorHistoricalBackfillPlan(options) {
86
+ const currentCount = options.currentCount;
87
+ const remainingToTarget = currentCount === null ? null : Math.max(0, options.targetCount - currentCount);
88
+ const windows = [];
89
+ for (let index = 0; index < options.maxWindows; index += 1) {
90
+ windows.push({
91
+ index,
92
+ offset: options.startOffset + index * options.windowSize,
93
+ limit: options.windowSize
94
+ });
95
+ }
96
+ return {
97
+ targetCount: options.targetCount,
98
+ currentCount,
99
+ startOffset: options.startOffset,
100
+ windowSize: options.windowSize,
101
+ maxWindows: options.maxWindows,
102
+ remainingToTarget,
103
+ windows
104
+ };
105
+ }
106
+ export function inferSalesNavigatorHistoricalBackfillResumeState(options) {
107
+ const latestByOffset = new Map();
108
+ for (const row of options.rows) {
109
+ const rawPayload = row.raw_payload ?? {};
110
+ if (rawPayload.scope !== options.scope) {
111
+ continue;
112
+ }
113
+ const offset = toFiniteInteger(rawPayload.offset);
114
+ const limit = toFiniteInteger(rawPayload.limit);
115
+ if (offset === null || limit !== options.windowSize || offset < options.fallbackOffset) {
116
+ continue;
117
+ }
118
+ const parsedFinishedAt = row.finished_at ? Date.parse(row.finished_at) : Number.NaN;
119
+ const finishedAtMs = Number.isFinite(parsedFinishedAt)
120
+ ? parsedFinishedAt
121
+ : Number.NEGATIVE_INFINITY;
122
+ const latestEntry = latestByOffset.get(offset);
123
+ if (latestEntry && latestEntry.finishedAtMs > finishedAtMs) {
124
+ continue;
125
+ }
126
+ const isSuccessful = row.status === "finished" && (row.result_classification ?? "success") === "success";
127
+ latestByOffset.set(offset, {
128
+ status: isSuccessful ? "success" : "failed",
129
+ finishedAtMs
130
+ });
131
+ }
132
+ if (latestByOffset.size === 0) {
133
+ return {
134
+ startOffset: options.fallbackOffset,
135
+ resumedFromHistory: false,
136
+ matchedHistoryRows: 0,
137
+ reason: "fallback"
138
+ };
139
+ }
140
+ const offsets = [...latestByOffset.keys()].sort((left, right) => left - right);
141
+ const highestOffset = offsets[offsets.length - 1] ?? options.fallbackOffset;
142
+ for (let currentOffset = options.fallbackOffset; currentOffset <= highestOffset; currentOffset += options.windowSize) {
143
+ const entry = latestByOffset.get(currentOffset);
144
+ if (!entry) {
145
+ return {
146
+ startOffset: currentOffset,
147
+ resumedFromHistory: true,
148
+ matchedHistoryRows: latestByOffset.size,
149
+ reason: "fill-gap"
150
+ };
151
+ }
152
+ if (entry.status !== "success") {
153
+ return {
154
+ startOffset: currentOffset,
155
+ resumedFromHistory: true,
156
+ matchedHistoryRows: latestByOffset.size,
157
+ reason: "retry-failed-window"
158
+ };
159
+ }
160
+ }
161
+ return {
162
+ startOffset: highestOffset + options.windowSize,
163
+ resumedFromHistory: true,
164
+ matchedHistoryRows: latestByOffset.size,
165
+ reason: "next-unprocessed-window"
166
+ };
167
+ }
168
+ export async function resolveSalesNavigatorHistoricalBackfillResumeState(options) {
169
+ const rows = [];
170
+ const pageSize = 1000;
171
+ for (let pageIndex = 0; pageIndex < 20; pageIndex += 1) {
172
+ const from = pageIndex * pageSize;
173
+ const to = from + pageSize - 1;
174
+ const result = await options.supabase
175
+ .from("linkedin_sales_nav_export_runs")
176
+ .select("status,result_classification,finished_at,raw_payload")
177
+ .eq("org_id", options.orgId)
178
+ .eq("slice_preset", HISTORICAL_BACKFILL_SLICE_PRESET)
179
+ .order("finished_at", { ascending: true })
180
+ .range(from, to);
181
+ if (result.error) {
182
+ throw new Error(`Failed to load historical Sales Navigator backfill runs: ${result.error.message}`);
183
+ }
184
+ const pageRows = Array.isArray(result.data) ? result.data : [];
185
+ rows.push(...pageRows);
186
+ if (pageRows.length < pageSize) {
187
+ break;
188
+ }
189
+ }
190
+ return inferSalesNavigatorHistoricalBackfillResumeState({
191
+ rows,
192
+ scope: options.scope,
193
+ windowSize: options.windowSize,
194
+ fallbackOffset: options.fallbackOffset
195
+ });
196
+ }
197
+ export function buildSalesNavigatorHistoricalBackfillQuery(limit, offset, scope) {
198
+ const limitClause = `\nLIMIT ${Math.trunc(limit)}`;
199
+ const offsetClause = offset > 0 ? `\nOFFSET ${Math.trunc(offset)}` : "";
200
+ const leadListFilterClause = scope === "hr-function-included"
201
+ ? `
202
+ AND REGEXP_CONTAINS(LOWER(CAST(query AS STRING)), r'type%3afunction[^)]*id%3a12[^)]*selectiontype%3aincluded')
203
+ AND NOT REGEXP_CONTAINS(LOWER(CAST(query AS STRING)), r'id%3a12[^)]*selectiontype%3aexcluded')`
204
+ : "";
205
+ return `WITH hr_leadlists AS (
206
+ SELECT
207
+ CAST(leadListId AS STRING) AS leadListId,
208
+ CAST(query AS STRING) AS leadListQuery,
209
+ CAST(leadList_container_ts AS STRING) AS leadListContainerTs
210
+ FROM \`icpidentifier.SalesPrompter.leadLists_raw\`
211
+ WHERE query IS NOT NULL
212
+ AND LOWER(CAST(query AS STRING)) LIKE '%/sales/search/people%'${leadListFilterClause}
213
+ QUALIFY ROW_NUMBER() OVER (
214
+ PARTITION BY CAST(leadListId AS STRING)
215
+ ORDER BY leadList_container_ts DESC
216
+ ) = 1
217
+ ), contacts AS (
218
+ SELECT
219
+ CAST(leadListId AS STRING) AS leadListId,
220
+ CAST(contactId AS STRING) AS contactId,
221
+ CAST(companyId AS STRING) AS companyId,
222
+ CAST(firstName AS STRING) AS firstName,
223
+ CAST(lastName AS STRING) AS lastName,
224
+ CAST(jobTitle AS STRING) AS jobTitle,
225
+ CAST(contact_companyName AS STRING) AS contactCompanyName,
226
+ CAST(contact_location AS STRING) AS contactLocation,
227
+ CAST(contact_companyLocation AS STRING) AS contactCompanyLocation,
228
+ CAST(contact_summary AS STRING) AS contactSummary,
229
+ CAST(contact_titleDescription AS STRING) AS contactTitleDescription,
230
+ CAST(tenureAtCompany AS STRING) AS tenureAtCompany,
231
+ CAST(tenureAtPosition AS STRING) AS tenureAtPosition,
232
+ CAST(contact_ts AS STRING) AS contactTs,
233
+ CAST(isPremium AS STRING) AS isPremium,
234
+ CAST(isOpenLink AS STRING) AS isOpenLink
235
+ FROM \`icpidentifier.SalesPrompter.linkedin_contacts\`
236
+ WHERE contactId IS NOT NULL
237
+ ), companies AS (
238
+ SELECT
239
+ CAST(id AS STRING) AS companyId,
240
+ CAST(handle AS STRING) AS handle,
241
+ CAST(name AS STRING) AS name,
242
+ CAST(companySize AS STRING) AS companySize,
243
+ CAST(countryCode AS STRING) AS countryCode,
244
+ CAST(headquarters AS STRING) AS headquarters,
245
+ CAST(domain AS STRING) AS domain,
246
+ CAST(domain_linkedin AS STRING) AS domainLinkedin,
247
+ CAST(website_linkedin AS STRING) AS websiteLinkedin,
248
+ CAST(industry AS STRING) AS industry,
249
+ CAST(timestamp AS STRING) AS companyTs
250
+ FROM \`icpidentifier.SalesPrompter.linkedin_companies\`
251
+ )
252
+ SELECT
253
+ contacts.contactId,
254
+ contacts.companyId,
255
+ contacts.firstName,
256
+ contacts.lastName,
257
+ contacts.jobTitle,
258
+ contacts.contactCompanyName,
259
+ contacts.contactLocation,
260
+ contacts.contactCompanyLocation,
261
+ contacts.contactSummary,
262
+ contacts.contactTitleDescription,
263
+ contacts.tenureAtCompany,
264
+ contacts.tenureAtPosition,
265
+ contacts.contactTs,
266
+ contacts.isPremium,
267
+ contacts.isOpenLink,
268
+ hr_leadlists.leadListId,
269
+ hr_leadlists.leadListQuery,
270
+ hr_leadlists.leadListContainerTs,
271
+ companies.handle AS companyHandle,
272
+ companies.name AS companyName,
273
+ companies.companySize,
274
+ companies.countryCode,
275
+ companies.headquarters,
276
+ companies.domain,
277
+ companies.domainLinkedin,
278
+ companies.websiteLinkedin,
279
+ companies.industry,
280
+ companies.companyTs
281
+ FROM contacts
282
+ JOIN hr_leadlists USING (leadListId)
283
+ LEFT JOIN companies USING (companyId)
284
+ QUALIFY ROW_NUMBER() OVER (
285
+ PARTITION BY contacts.contactId
286
+ ORDER BY contacts.contactTs DESC, hr_leadlists.leadListContainerTs DESC, companies.companyTs DESC
287
+ ) = 1
288
+ ORDER BY contacts.contactId${limitClause}${offsetClause}`;
289
+ }
290
+ function sanitizeText(value) {
291
+ const input = value.replace(/\u0000/g, "");
292
+ let output = "";
293
+ for (let index = 0; index < input.length; index += 1) {
294
+ const code = input.charCodeAt(index);
295
+ if (code >= 0xd800 && code <= 0xdbff) {
296
+ const next = input.charCodeAt(index + 1);
297
+ if (next >= 0xdc00 && next <= 0xdfff) {
298
+ output += input[index] + input[index + 1];
299
+ index += 1;
300
+ }
301
+ else {
302
+ output += "�";
303
+ }
304
+ continue;
305
+ }
306
+ if (code >= 0xdc00 && code <= 0xdfff) {
307
+ output += "�";
308
+ continue;
309
+ }
310
+ output += input[index];
311
+ }
312
+ return output.normalize("NFC");
313
+ }
314
+ function sanitizeValue(value) {
315
+ if (typeof value === "string") {
316
+ return sanitizeText(value);
317
+ }
318
+ if (Array.isArray(value)) {
319
+ return value.map((item) => sanitizeValue(item));
320
+ }
321
+ if (value && typeof value === "object") {
322
+ return Object.fromEntries(Object.entries(value).map(([key, entry]) => [key, sanitizeValue(entry)]));
323
+ }
324
+ return value;
325
+ }
326
+ function toNullableString(value) {
327
+ if (value == null) {
328
+ return null;
329
+ }
330
+ const trimmed = sanitizeText(String(value)).trim();
331
+ return trimmed.length > 0 ? trimmed : null;
332
+ }
333
+ function toNullableBoolean(value) {
334
+ if (value == null) {
335
+ return null;
336
+ }
337
+ const normalized = String(value).trim().toLowerCase();
338
+ if (normalized === "true") {
339
+ return true;
340
+ }
341
+ if (normalized === "false") {
342
+ return false;
343
+ }
344
+ return null;
345
+ }
346
+ function toNullableIsoTimestamp(value) {
347
+ const stringValue = toNullableString(value);
348
+ if (!stringValue) {
349
+ return null;
350
+ }
351
+ const timestamp = Date.parse(stringValue);
352
+ return Number.isNaN(timestamp) ? null : new Date(timestamp).toISOString();
353
+ }
354
+ function buildSalesNavigatorProfileUrl(contactId) {
355
+ return `https://www.linkedin.com/sales/lead/${contactId}`;
356
+ }
357
+ function mapHistoricalContactRowToPerson(options) {
358
+ const contactId = toNullableString(options.row.contactId);
359
+ if (!contactId) {
360
+ throw new Error("Historical backfill row is missing contactId");
361
+ }
362
+ const companyId = toNullableString(options.row.companyId);
363
+ const firstName = toNullableString(options.row.firstName);
364
+ const lastName = toNullableString(options.row.lastName);
365
+ const companyName = toNullableString(options.row.companyName) ?? toNullableString(options.row.contactCompanyName);
366
+ return {
367
+ org_id: options.orgId,
368
+ run_id: null,
369
+ phantom_container_id: options.containerId,
370
+ sales_nav_profile_url: buildSalesNavigatorProfileUrl(contactId),
371
+ linkedin_profile_url: null,
372
+ default_profile_url: null,
373
+ full_name: [firstName, lastName].filter(Boolean).join(" ") || null,
374
+ first_name: firstName,
375
+ last_name: lastName,
376
+ company_name: companyName,
377
+ company_id: companyId,
378
+ company_url: companyId ? `https://www.linkedin.com/company/${companyId}` : null,
379
+ regular_company_url: toNullableString(options.row.websiteLinkedin),
380
+ title: toNullableString(options.row.jobTitle),
381
+ industry: toNullableString(options.row.industry),
382
+ summary: toNullableString(options.row.contactSummary),
383
+ title_description: toNullableString(options.row.contactTitleDescription),
384
+ location: toNullableString(options.row.contactLocation),
385
+ company_location: toNullableString(options.row.contactCompanyLocation),
386
+ duration_in_role: toNullableString(options.row.tenureAtPosition),
387
+ duration_in_company: toNullableString(options.row.tenureAtCompany),
388
+ connection_degree: null,
389
+ shared_connections_count: null,
390
+ profile_image_url: null,
391
+ vmid: null,
392
+ search_query: toNullableString(options.row.leadListQuery),
393
+ search_account_profile_id: null,
394
+ search_account_profile_name: null,
395
+ scraped_at: toNullableIsoTimestamp(options.row.contactTs) ?? new Date().toISOString(),
396
+ is_premium: toNullableBoolean(options.row.isPremium),
397
+ is_open_link: toNullableBoolean(options.row.isOpenLink),
398
+ raw_payload: sanitizeValue(options.row)
399
+ };
400
+ }
401
+ function isRetryableUpsertError(error) {
402
+ const message = error.message.toLowerCase();
403
+ return (message.includes("statement timeout") ||
404
+ message.includes("connection terminated") ||
405
+ message.includes("connection closed") ||
406
+ message.includes("fetch failed") ||
407
+ message.includes("network") ||
408
+ message.includes("temporarily unavailable") ||
409
+ message.includes("rate limit"));
410
+ }
411
+ async function upsertHistoricalPeopleBatch(supabase, batch) {
412
+ const result = await supabase.from("linkedin_sales_nav_people").upsert(batch, {
413
+ onConflict: "org_id,sales_nav_profile_url",
414
+ ignoreDuplicates: false
415
+ });
416
+ if (result.error) {
417
+ throw new Error(`Failed to upsert historical backfill batch: ${result.error.message}`);
418
+ }
419
+ }
420
+ async function upsertHistoricalPeopleBatchWithRetry(supabase, batch, options, attempt = 0) {
421
+ try {
422
+ await upsertHistoricalPeopleBatch(supabase, batch);
423
+ return;
424
+ }
425
+ catch (error) {
426
+ if (!(error instanceof Error) || !isRetryableUpsertError(error)) {
427
+ throw error;
428
+ }
429
+ if (batch.length > options.minUpsertBatchSize) {
430
+ const midpoint = Math.ceil(batch.length / 2);
431
+ await upsertHistoricalPeopleBatchWithRetry(supabase, batch.slice(0, midpoint), options, 0);
432
+ await upsertHistoricalPeopleBatchWithRetry(supabase, batch.slice(midpoint), options, 0);
433
+ return;
434
+ }
435
+ if (attempt >= options.maxUpsertRetries) {
436
+ throw error;
437
+ }
438
+ await delay(options.retryDelayMs * 2 ** attempt);
439
+ await upsertHistoricalPeopleBatchWithRetry(supabase, batch, options, attempt + 1);
440
+ }
441
+ }
442
+ async function queryExactSalesNavigatorPeopleCount(supabase, orgId) {
443
+ let lastMessage = "unknown count error";
444
+ for (let attempt = 0; attempt < DEFAULT_MAX_COUNT_RETRIES; attempt += 1) {
445
+ const result = await supabase
446
+ .from("linkedin_sales_nav_people")
447
+ .select("*", { head: true, count: "exact" })
448
+ .eq("org_id", orgId);
449
+ if (!result.error && Number.isFinite(result.count)) {
450
+ return result.count;
451
+ }
452
+ lastMessage = result.error?.message || lastMessage;
453
+ await delay(DEFAULT_COUNT_RETRY_DELAY_MS);
454
+ }
455
+ throw new Error(`Failed to read exact linkedin_sales_nav_people count: ${lastMessage}`);
456
+ }
457
+ async function insertHistoricalBackfillRun(supabase, row) {
458
+ const result = await supabase.from("linkedin_sales_nav_export_runs").upsert(row, {
459
+ onConflict: "org_id,phantom_container_id",
460
+ ignoreDuplicates: false
461
+ });
462
+ if (result.error) {
463
+ throw new Error(`Failed to record historical backfill run: ${result.error.message}`);
464
+ }
465
+ }
466
+ async function runSalesNavigatorHistoricalBackfillWindow(options) {
467
+ options.onProgress?.({
468
+ type: "window-start",
469
+ windowIndex: options.windowIndex,
470
+ offset: options.offset,
471
+ limit: options.limit,
472
+ containerId: options.containerId
473
+ });
474
+ const query = buildSalesNavigatorHistoricalBackfillQuery(options.limit, options.offset, options.scope);
475
+ const [queryJob] = await options.bigQuery.createQueryJob({
476
+ query,
477
+ useLegacySql: false
478
+ });
479
+ await queryJob.promise();
480
+ const [firstRows, nextQuery, response] = await queryJob.getQueryResults({
481
+ maxResults: options.pageSize
482
+ });
483
+ const totalResults = Number(response?.totalRows ?? firstRows.length);
484
+ let processed = 0;
485
+ let pageToken = nextQuery?.pageToken ?? null;
486
+ const processRows = async (rows) => {
487
+ if (rows.length === 0) {
488
+ return;
489
+ }
490
+ for (let start = 0; start < rows.length; start += options.upsertBatchSize) {
491
+ const batch = rows
492
+ .slice(start, start + options.upsertBatchSize)
493
+ .map((row) => mapHistoricalContactRowToPerson({
494
+ orgId: options.orgId,
495
+ containerId: options.containerId,
496
+ row
497
+ }));
498
+ await upsertHistoricalPeopleBatchWithRetry(options.supabase, batch, {
499
+ minUpsertBatchSize: options.minUpsertBatchSize,
500
+ maxUpsertRetries: options.maxUpsertRetries,
501
+ retryDelayMs: options.retryDelayMs
502
+ });
503
+ processed += batch.length;
504
+ if (processed % 5000 === 0 || processed === totalResults) {
505
+ options.onProgress?.({
506
+ type: "window-progress",
507
+ windowIndex: options.windowIndex,
508
+ offset: options.offset,
509
+ limit: options.limit,
510
+ processed,
511
+ totalResults,
512
+ percent: Number(((processed / Math.max(totalResults, 1)) * 100).toFixed(2)),
513
+ containerId: options.containerId
514
+ });
515
+ }
516
+ }
517
+ };
518
+ await processRows(firstRows ?? []);
519
+ while (pageToken) {
520
+ const [rows, next, apiResponse] = await queryJob.getQueryResults({
521
+ maxResults: options.pageSize,
522
+ pageToken
523
+ });
524
+ pageToken = next?.pageToken ?? null;
525
+ await processRows(rows ?? []);
526
+ if (!pageToken && apiResponse?.pageToken) {
527
+ pageToken = apiResponse.pageToken;
528
+ }
529
+ }
530
+ return { processed, totalResults };
531
+ }
532
+ export async function ensureSalesNavigatorPeopleCount(options) {
533
+ const supabase = createClient(options.config.supabaseUrl, options.config.supabaseServiceRoleKey, {
534
+ auth: { persistSession: false }
535
+ });
536
+ const bigQuery = new BigQuery({
537
+ projectId: options.config.bigQueryProjectId,
538
+ credentials: options.config.bigQueryCredentials
539
+ });
540
+ const initialCount = await queryExactSalesNavigatorPeopleCount(supabase, options.orgId);
541
+ const windows = [];
542
+ if (initialCount >= options.targetCount) {
543
+ return {
544
+ status: "ok",
545
+ orgId: options.orgId,
546
+ scope: options.scope,
547
+ targetCount: options.targetCount,
548
+ initialCount,
549
+ currentCount: initialCount,
550
+ resumedFromHistory: Boolean(options.resumedFromHistory),
551
+ startOffset: options.startOffset,
552
+ nextOffset: options.startOffset,
553
+ exhausted: false,
554
+ completedWindows: 0,
555
+ windows
556
+ };
557
+ }
558
+ let currentCount = initialCount;
559
+ let exhausted = false;
560
+ let nextOffset = options.startOffset;
561
+ const runSeed = `${Date.now()}-${randomUUID().slice(0, 8)}`;
562
+ for (let windowIndex = 0; windowIndex < options.maxWindows; windowIndex += 1) {
563
+ const countBefore = currentCount;
564
+ const containerId = `salesprompter-cli-salesnav-backfill-${runSeed}-window-${windowIndex + 1}-offset-${nextOffset}`;
565
+ const sourceQueryUrl = `${HISTORICAL_BACKFILL_SOURCE_QUERY_URL}?scope=${encodeURIComponent(options.scope)}&offset=${nextOffset}&limit=${options.windowSize}`;
566
+ const slicedQueryUrl = sourceQueryUrl;
567
+ try {
568
+ const windowResult = await runSalesNavigatorHistoricalBackfillWindow({
569
+ bigQuery,
570
+ supabase,
571
+ orgId: options.orgId,
572
+ scope: options.scope,
573
+ offset: nextOffset,
574
+ limit: options.windowSize,
575
+ pageSize: options.pageSize ?? DEFAULT_PAGE_SIZE,
576
+ upsertBatchSize: options.upsertBatchSize ?? DEFAULT_UPSERT_BATCH_SIZE,
577
+ minUpsertBatchSize: options.minUpsertBatchSize ?? DEFAULT_MIN_UPSERT_BATCH_SIZE,
578
+ maxUpsertRetries: options.maxUpsertRetries ?? DEFAULT_MAX_UPSERT_RETRIES,
579
+ retryDelayMs: options.retryDelayMs ?? DEFAULT_RETRY_DELAY_MS,
580
+ containerId,
581
+ onProgress: options.onProgress,
582
+ windowIndex
583
+ });
584
+ currentCount = await queryExactSalesNavigatorPeopleCount(supabase, options.orgId);
585
+ const countDelta = currentCount - countBefore;
586
+ windows.push({
587
+ containerId,
588
+ offset: nextOffset,
589
+ limit: options.windowSize,
590
+ processed: windowResult.processed,
591
+ totalResults: windowResult.totalResults,
592
+ countBefore,
593
+ countAfter: currentCount,
594
+ countDelta
595
+ });
596
+ await insertHistoricalBackfillRun(supabase, {
597
+ org_id: options.orgId,
598
+ source_query_url: sourceQueryUrl,
599
+ sliced_query_url: slicedQueryUrl,
600
+ slice_preset: HISTORICAL_BACKFILL_SLICE_PRESET,
601
+ applied_filters: options.scope === "hr-function-included"
602
+ ? [
603
+ {
604
+ type: "FUNCTION",
605
+ values: [{ id: "12", text: "Human Resources", selectionType: "INCLUDED" }]
606
+ }
607
+ ]
608
+ : [],
609
+ max_results_per_search: 2500,
610
+ number_of_profiles: 2500,
611
+ phantom_agent_id: HISTORICAL_BACKFILL_AGENT_ID,
612
+ phantom_container_id: containerId,
613
+ phantom_result_json_url: null,
614
+ phantom_result_csv_url: null,
615
+ phantom_total_results: windowResult.totalResults,
616
+ phantom_output: `Historical BigQuery ${options.scope} backfill complete (${windowResult.processed}/${windowResult.totalResults})`,
617
+ exported_results: windowResult.processed,
618
+ status: "finished",
619
+ result_classification: "success",
620
+ error_message: null,
621
+ finished_at: new Date().toISOString(),
622
+ raw_payload: {
623
+ source: "bigquery",
624
+ scope: options.scope,
625
+ offset: nextOffset,
626
+ limit: options.windowSize,
627
+ countDelta
628
+ }
629
+ });
630
+ options.onProgress?.({
631
+ type: "window-complete",
632
+ windowIndex,
633
+ offset: nextOffset,
634
+ limit: options.windowSize,
635
+ processed: windowResult.processed,
636
+ totalResults: windowResult.totalResults,
637
+ countBefore,
638
+ countAfter: currentCount,
639
+ countDelta,
640
+ containerId
641
+ });
642
+ if (windowResult.processed < options.windowSize) {
643
+ exhausted = true;
644
+ }
645
+ if (currentCount >= options.targetCount || exhausted) {
646
+ break;
647
+ }
648
+ nextOffset += options.windowSize;
649
+ }
650
+ catch (error) {
651
+ const message = error instanceof Error ? error.message : String(error);
652
+ await insertHistoricalBackfillRun(supabase, {
653
+ org_id: options.orgId,
654
+ source_query_url: sourceQueryUrl,
655
+ sliced_query_url: slicedQueryUrl,
656
+ slice_preset: HISTORICAL_BACKFILL_SLICE_PRESET,
657
+ applied_filters: options.scope === "hr-function-included"
658
+ ? [
659
+ {
660
+ type: "FUNCTION",
661
+ values: [{ id: "12", text: "Human Resources", selectionType: "INCLUDED" }]
662
+ }
663
+ ]
664
+ : [],
665
+ max_results_per_search: 2500,
666
+ number_of_profiles: 2500,
667
+ phantom_agent_id: HISTORICAL_BACKFILL_AGENT_ID,
668
+ phantom_container_id: containerId,
669
+ phantom_result_json_url: null,
670
+ phantom_result_csv_url: null,
671
+ phantom_total_results: 0,
672
+ phantom_output: `Historical BigQuery ${options.scope} backfill failed`,
673
+ exported_results: 0,
674
+ status: "failed",
675
+ result_classification: "transient_failure",
676
+ error_message: message,
677
+ finished_at: new Date().toISOString(),
678
+ raw_payload: {
679
+ source: "bigquery",
680
+ scope: options.scope,
681
+ offset: nextOffset,
682
+ limit: options.windowSize
683
+ }
684
+ });
685
+ throw error;
686
+ }
687
+ }
688
+ return {
689
+ status: currentCount >= options.targetCount ? "ok" : "incomplete",
690
+ orgId: options.orgId,
691
+ scope: options.scope,
692
+ targetCount: options.targetCount,
693
+ initialCount,
694
+ currentCount,
695
+ resumedFromHistory: Boolean(options.resumedFromHistory),
696
+ startOffset: options.startOffset,
697
+ nextOffset,
698
+ exhausted,
699
+ completedWindows: windows.length,
700
+ windows
701
+ };
702
+ }
703
+ export const salesNavigatorHistoricalBackfillDefaults = {
704
+ windowSize: DEFAULT_WINDOW_SIZE,
705
+ pageSize: DEFAULT_PAGE_SIZE,
706
+ upsertBatchSize: DEFAULT_UPSERT_BATCH_SIZE,
707
+ minUpsertBatchSize: DEFAULT_MIN_UPSERT_BATCH_SIZE,
708
+ maxUpsertRetries: DEFAULT_MAX_UPSERT_RETRIES,
709
+ retryDelayMs: DEFAULT_RETRY_DELAY_MS
710
+ };