@pagebridge/core 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,485 +0,0 @@
1
- import type { DrizzleClient } from "@pagebridge/db";
2
- import {
3
- searchAnalytics,
4
- queryAnalytics,
5
- syncLog,
6
- pageIndexStatus,
7
- } from "@pagebridge/db";
8
- import { and, eq, gte, lte } from "drizzle-orm";
9
- import type { SanityClient } from "@sanity/client";
10
- import type { GSCClient, IndexStatusResult } from "./gsc-client.js";
11
-
12
- export interface SyncOptions {
13
- siteUrl: string;
14
- startDate?: Date;
15
- endDate?: Date;
16
- dimensions?: ("page" | "query" | "date")[];
17
- }
18
-
19
- export interface SyncResult {
20
- pages: string[];
21
- rowsProcessed: number;
22
- syncLogId: string;
23
- }
24
-
25
- export interface IndexStatusSyncResult {
26
- checked: number;
27
- indexed: number;
28
- notIndexed: number;
29
- skipped: number;
30
- }
31
-
32
- export interface SyncEngineOptions {
33
- gsc: GSCClient;
34
- db: DrizzleClient;
35
- sanity: SanityClient;
36
- }
37
-
38
- export class SyncEngine {
39
- private gsc: GSCClient;
40
- private db: DrizzleClient;
41
- private sanity: SanityClient;
42
-
43
- constructor(options: SyncEngineOptions) {
44
- this.gsc = options.gsc;
45
- this.db = options.db;
46
- this.sanity = options.sanity;
47
- }
48
-
49
- async sync(options: SyncOptions): Promise<SyncResult> {
50
- const {
51
- siteUrl,
52
- startDate = daysAgo(90),
53
- endDate = daysAgo(3),
54
- dimensions = ["page", "date"],
55
- } = options;
56
-
57
- const syncLogId = `${siteUrl}:${Date.now()}`;
58
-
59
- await this.db.insert(syncLog).values({
60
- id: syncLogId,
61
- siteId: siteUrl,
62
- startedAt: new Date(),
63
- status: "running",
64
- });
65
-
66
- try {
67
- const rows = await this.gsc.fetchSearchAnalytics({
68
- siteUrl,
69
- startDate,
70
- endDate,
71
- dimensions,
72
- });
73
-
74
- const pages = new Set<string>();
75
-
76
- for (const row of rows) {
77
- pages.add(row.page);
78
-
79
- if (row.date) {
80
- const id = `${siteUrl}:${row.page}:${row.date}`;
81
- await this.db
82
- .insert(searchAnalytics)
83
- .values({
84
- id,
85
- siteId: siteUrl,
86
- page: row.page,
87
- date: row.date,
88
- clicks: row.clicks,
89
- impressions: row.impressions,
90
- ctr: row.ctr,
91
- position: row.position,
92
- })
93
- .onConflictDoUpdate({
94
- target: searchAnalytics.id,
95
- set: {
96
- clicks: row.clicks,
97
- impressions: row.impressions,
98
- ctr: row.ctr,
99
- position: row.position,
100
- fetchedAt: new Date(),
101
- },
102
- });
103
- }
104
-
105
- if (row.query && row.date) {
106
- const id = `${siteUrl}:${row.page}:${row.query}:${row.date}`;
107
- await this.db
108
- .insert(queryAnalytics)
109
- .values({
110
- id,
111
- siteId: siteUrl,
112
- page: row.page,
113
- query: row.query,
114
- date: row.date,
115
- clicks: row.clicks,
116
- impressions: row.impressions,
117
- ctr: row.ctr,
118
- position: row.position,
119
- })
120
- .onConflictDoUpdate({
121
- target: queryAnalytics.id,
122
- set: {
123
- clicks: row.clicks,
124
- impressions: row.impressions,
125
- ctr: row.ctr,
126
- position: row.position,
127
- },
128
- });
129
- }
130
- }
131
-
132
- await this.db
133
- .update(syncLog)
134
- .set({
135
- status: "completed",
136
- completedAt: new Date(),
137
- rowsProcessed: rows.length,
138
- })
139
- .where(eq(syncLog.id, syncLogId));
140
-
141
- return {
142
- pages: Array.from(pages),
143
- rowsProcessed: rows.length,
144
- syncLogId,
145
- };
146
- } catch (error) {
147
- await this.db
148
- .update(syncLog)
149
- .set({
150
- status: "failed",
151
- completedAt: new Date(),
152
- error: error instanceof Error ? error.message : String(error),
153
- })
154
- .where(eq(syncLog.id, syncLogId));
155
-
156
- throw error;
157
- }
158
- }
159
-
160
- async writeSnapshots(
161
- siteId: string,
162
- matches: { gscUrl: string; sanityId: string | undefined }[],
163
- siteUrl?: string,
164
- ): Promise<void> {
165
- // Get the siteUrl from Sanity if not provided
166
- let resolvedSiteUrl = siteUrl;
167
- if (!resolvedSiteUrl) {
168
- const siteDoc = await this.sanity.fetch<{ siteUrl: string } | null>(
169
- `*[_type == "gscSite" && _id == $siteId][0]{ siteUrl }`,
170
- { siteId },
171
- );
172
- resolvedSiteUrl = siteDoc?.siteUrl;
173
- }
174
- if (!resolvedSiteUrl) {
175
- throw new Error(`Could not find siteUrl for site ID: ${siteId}`);
176
- }
177
-
178
- const periods = ["last7", "last28", "last90"] as const;
179
- const periodDays = { last7: 7, last28: 28, last90: 90 };
180
-
181
- for (const period of periods) {
182
- const startDate = daysAgo(periodDays[period]);
183
- const endDate = daysAgo(3);
184
-
185
- for (const match of matches) {
186
- if (!match.sanityId) continue;
187
-
188
- const metrics = await this.getAggregatedMetrics(
189
- resolvedSiteUrl,
190
- match.gscUrl,
191
- startDate,
192
- endDate,
193
- );
194
- if (!metrics) continue;
195
-
196
- const topQueries = await this.getTopQueries(
197
- resolvedSiteUrl,
198
- match.gscUrl,
199
- startDate,
200
- endDate,
201
- );
202
-
203
- // Get index status from database
204
- const indexStatusData = await this.getIndexStatus(
205
- resolvedSiteUrl,
206
- match.gscUrl,
207
- );
208
-
209
- const existingSnapshot = await this.sanity.fetch(
210
- `*[_type == "gscSnapshot" && site._ref == $siteId && page == $page && period == $period][0]._id`,
211
- { siteId, page: match.gscUrl, period },
212
- );
213
-
214
- const snapshotData = {
215
- _type: "gscSnapshot" as const,
216
- site: { _type: "reference" as const, _ref: siteId },
217
- page: match.gscUrl,
218
- linkedDocument: { _type: "reference" as const, _ref: match.sanityId },
219
- period,
220
- clicks: metrics.clicks,
221
- impressions: metrics.impressions,
222
- ctr: metrics.ctr,
223
- position: metrics.position,
224
- topQueries,
225
- fetchedAt: new Date().toISOString(),
226
- indexStatus: indexStatusData
227
- ? {
228
- verdict: mapVerdictToSanity(indexStatusData.verdict),
229
- coverageState: indexStatusData.coverageState,
230
- lastCrawlTime:
231
- indexStatusData.lastCrawlTime?.toISOString() ?? null,
232
- robotsTxtState: indexStatusData.robotsTxtState,
233
- pageFetchState: indexStatusData.pageFetchState,
234
- }
235
- : undefined,
236
- };
237
-
238
- if (existingSnapshot) {
239
- await this.sanity.patch(existingSnapshot).set(snapshotData).commit();
240
- } else {
241
- await this.sanity.create(snapshotData);
242
- }
243
- }
244
- }
245
- }
246
-
247
- async syncIndexStatus(
248
- siteUrl: string,
249
- pages: string[],
250
- ): Promise<IndexStatusSyncResult> {
251
- const result: IndexStatusSyncResult = {
252
- checked: 0,
253
- indexed: 0,
254
- notIndexed: 0,
255
- skipped: 0,
256
- };
257
-
258
- const CACHE_DURATION_MS = 24 * 60 * 60 * 1000; // 24 hours
259
-
260
- for (const page of pages) {
261
- const id = `${siteUrl}:${page}`;
262
-
263
- // Check if we already have a recent status
264
- const existing = await this.db
265
- .select({ fetchedAt: pageIndexStatus.fetchedAt })
266
- .from(pageIndexStatus)
267
- .where(eq(pageIndexStatus.id, id))
268
- .limit(1);
269
-
270
- if (
271
- existing.length > 0 &&
272
- existing[0]?.fetchedAt &&
273
- Date.now() - existing[0].fetchedAt.getTime() < CACHE_DURATION_MS
274
- ) {
275
- result.skipped++;
276
- continue;
277
- }
278
-
279
- try {
280
- const status = await this.gsc.inspectUrl(siteUrl, page);
281
-
282
- await this.db
283
- .insert(pageIndexStatus)
284
- .values({
285
- id,
286
- siteId: siteUrl,
287
- page,
288
- verdict: status.verdict,
289
- coverageState: status.coverageState,
290
- indexingState: status.indexingState,
291
- pageFetchState: status.pageFetchState,
292
- lastCrawlTime: status.lastCrawlTime,
293
- robotsTxtState: status.robotsTxtState,
294
- fetchedAt: new Date(),
295
- })
296
- .onConflictDoUpdate({
297
- target: pageIndexStatus.id,
298
- set: {
299
- verdict: status.verdict,
300
- coverageState: status.coverageState,
301
- indexingState: status.indexingState,
302
- pageFetchState: status.pageFetchState,
303
- lastCrawlTime: status.lastCrawlTime,
304
- robotsTxtState: status.robotsTxtState,
305
- fetchedAt: new Date(),
306
- },
307
- });
308
-
309
- result.checked++;
310
- if (status.verdict === "PASS") {
311
- result.indexed++;
312
- } else {
313
- result.notIndexed++;
314
- }
315
-
316
- // Small delay to respect rate limits (600/min = 100ms between requests)
317
- await delay(100);
318
- } catch (error) {
319
- console.error(`Failed to check index status for ${page}:`, error);
320
- result.skipped++;
321
- }
322
- }
323
-
324
- return result;
325
- }
326
-
327
- async getIndexStatus(
328
- siteUrl: string,
329
- page: string,
330
- ): Promise<IndexStatusResult | null> {
331
- const id = `${siteUrl}:${page}`;
332
- const rows = await this.db
333
- .select()
334
- .from(pageIndexStatus)
335
- .where(eq(pageIndexStatus.id, id))
336
- .limit(1);
337
-
338
- if (rows.length === 0) return null;
339
-
340
- const row = rows[0]!;
341
- return {
342
- verdict: row.verdict as IndexStatusResult["verdict"],
343
- coverageState: row.coverageState,
344
- indexingState: row.indexingState,
345
- pageFetchState: row.pageFetchState,
346
- lastCrawlTime: row.lastCrawlTime,
347
- robotsTxtState: row.robotsTxtState,
348
- };
349
- }
350
-
351
- private async getAggregatedMetrics(
352
- siteId: string,
353
- page: string,
354
- startDate: Date,
355
- endDate: Date,
356
- ): Promise<
357
- | { clicks: number; impressions: number; ctr: number; position: number }
358
- | undefined
359
- > {
360
- const results = await this.db
361
- .select({
362
- totalClicks: searchAnalytics.clicks,
363
- totalImpressions: searchAnalytics.impressions,
364
- avgCtr: searchAnalytics.ctr,
365
- avgPosition: searchAnalytics.position,
366
- })
367
- .from(searchAnalytics)
368
- .where(
369
- and(
370
- eq(searchAnalytics.siteId, siteId),
371
- eq(searchAnalytics.page, page),
372
- gte(searchAnalytics.date, formatDate(startDate)),
373
- lte(searchAnalytics.date, formatDate(endDate)),
374
- ),
375
- );
376
-
377
- if (results.length === 0) return undefined;
378
-
379
- const totalClicks = results.reduce(
380
- (sum, r) => sum + (r.totalClicks ?? 0),
381
- 0,
382
- );
383
- const totalImpressions = results.reduce(
384
- (sum, r) => sum + (r.totalImpressions ?? 0),
385
- 0,
386
- );
387
- const avgCtr = totalImpressions > 0 ? totalClicks / totalImpressions : 0;
388
- const avgPosition =
389
- results.reduce((sum, r) => sum + (r.avgPosition ?? 0), 0) /
390
- results.length;
391
-
392
- return {
393
- clicks: totalClicks,
394
- impressions: totalImpressions,
395
- ctr: avgCtr,
396
- position: avgPosition,
397
- };
398
- }
399
-
400
- private async getTopQueries(
401
- siteId: string,
402
- page: string,
403
- startDate: Date,
404
- endDate: Date,
405
- ): Promise<
406
- { query: string; clicks: number; impressions: number; position: number }[]
407
- > {
408
- const results = await this.db
409
- .select({
410
- query: queryAnalytics.query,
411
- clicks: queryAnalytics.clicks,
412
- impressions: queryAnalytics.impressions,
413
- position: queryAnalytics.position,
414
- })
415
- .from(queryAnalytics)
416
- .where(
417
- and(
418
- eq(queryAnalytics.siteId, siteId),
419
- eq(queryAnalytics.page, page),
420
- gte(queryAnalytics.date, formatDate(startDate)),
421
- lte(queryAnalytics.date, formatDate(endDate)),
422
- ),
423
- )
424
- .limit(10);
425
-
426
- const queryMap = new Map<
427
- string,
428
- { clicks: number; impressions: number; positions: number[] }
429
- >();
430
-
431
- for (const row of results) {
432
- const existing = queryMap.get(row.query);
433
- if (existing) {
434
- existing.clicks += row.clicks ?? 0;
435
- existing.impressions += row.impressions ?? 0;
436
- existing.positions.push(row.position ?? 0);
437
- } else {
438
- queryMap.set(row.query, {
439
- clicks: row.clicks ?? 0,
440
- impressions: row.impressions ?? 0,
441
- positions: [row.position ?? 0],
442
- });
443
- }
444
- }
445
-
446
- return Array.from(queryMap.entries())
447
- .map(([query, data]) => ({
448
- query,
449
- clicks: data.clicks,
450
- impressions: data.impressions,
451
- position:
452
- data.positions.reduce((a, b) => a + b, 0) / data.positions.length,
453
- }))
454
- .sort((a, b) => b.clicks - a.clicks)
455
- .slice(0, 10);
456
- }
457
- }
458
-
459
- function daysAgo(days: number): Date {
460
- const date = new Date();
461
- date.setDate(date.getDate() - days);
462
- return date;
463
- }
464
-
465
- function formatDate(date: Date): string {
466
- return date.toISOString().split("T")[0]!;
467
- }
468
-
469
- function delay(ms: number): Promise<void> {
470
- return new Promise((resolve) => setTimeout(resolve, ms));
471
- }
472
-
473
- function mapVerdictToSanity(
474
- verdict: string,
475
- ): "indexed" | "not_indexed" | "excluded" {
476
- switch (verdict) {
477
- case "PASS":
478
- return "indexed";
479
- case "NEUTRAL":
480
- return "excluded";
481
- case "FAIL":
482
- default:
483
- return "not_indexed";
484
- }
485
- }
@@ -1,160 +0,0 @@
1
- import type { SanityClient } from "@sanity/client";
2
- import type { DrizzleClient } from "@pagebridge/db";
3
- import { queryAnalytics } from "@pagebridge/db";
4
- import { and, eq, gte, lte, sql, desc } from "drizzle-orm";
5
- import type { DecaySignal } from "./decay-detector.js";
6
- import type { MatchResult } from "./url-matcher.js";
7
-
8
- export interface QueryContext {
9
- query: string;
10
- impressions: number;
11
- clicks: number;
12
- position: number;
13
- }
14
-
15
- export interface TaskGeneratorOptions {
16
- sanity: SanityClient;
17
- db?: DrizzleClient;
18
- }
19
-
20
- export class TaskGenerator {
21
- private sanity: SanityClient;
22
- private db?: DrizzleClient;
23
-
24
- constructor(options: TaskGeneratorOptions | SanityClient) {
25
- // Support both old (SanityClient) and new (options object) signatures
26
- if ("fetch" in options) {
27
- this.sanity = options;
28
- } else {
29
- this.sanity = options.sanity;
30
- this.db = options.db;
31
- }
32
- }
33
-
34
- async createTasks(
35
- siteId: string,
36
- signals: DecaySignal[],
37
- matches: MatchResult[],
38
- siteUrl?: string,
39
- ): Promise<number> {
40
- let created = 0;
41
-
42
- // Get siteUrl from Sanity if not provided (needed for query lookup)
43
- let resolvedSiteUrl = siteUrl;
44
- if (!resolvedSiteUrl && this.db) {
45
- const siteDoc = await this.sanity.fetch<{ siteUrl: string } | null>(
46
- `*[_type == "gscSite" && _id == $siteId][0]{ siteUrl }`,
47
- { siteId },
48
- );
49
- resolvedSiteUrl = siteDoc?.siteUrl;
50
- }
51
-
52
- for (const signal of signals) {
53
- const match = matches.find((m) => m.gscUrl === signal.page);
54
- if (!match?.sanityId) continue;
55
-
56
- const existingTask = await this.sanity.fetch(
57
- `*[_type == "gscRefreshTask" && linkedDocument._ref == $docId && status in ["open", "in_progress"]][0]._id`,
58
- { docId: match.sanityId },
59
- );
60
-
61
- if (existingTask) continue;
62
-
63
- // Fetch top queries for this page if database is available
64
- let queryContext: QueryContext[] | undefined;
65
- if (this.db && resolvedSiteUrl) {
66
- queryContext = await this.getTopQueries(resolvedSiteUrl, signal.page);
67
- }
68
-
69
- await this.sanity.create({
70
- _type: "gscRefreshTask",
71
- site: { _type: "reference", _ref: siteId },
72
- linkedDocument: { _type: "reference", _ref: match.sanityId },
73
- reason: signal.reason,
74
- severity: signal.severity,
75
- status: "open",
76
- metrics: {
77
- positionBefore: signal.metrics.positionBefore,
78
- positionNow: signal.metrics.positionNow,
79
- positionDelta: signal.metrics.positionDelta,
80
- ctrBefore: signal.metrics.ctrBefore,
81
- ctrNow: signal.metrics.ctrNow,
82
- impressions: signal.metrics.impressions,
83
- },
84
- ...(queryContext && queryContext.length > 0 && { queryContext }),
85
- createdAt: new Date().toISOString(),
86
- });
87
-
88
- created++;
89
- }
90
-
91
- return created;
92
- }
93
-
94
- private async getTopQueries(
95
- siteId: string,
96
- page: string,
97
- limit = 5,
98
- ): Promise<QueryContext[]> {
99
- if (!this.db) return [];
100
-
101
- const endDate = new Date();
102
- const startDate = new Date();
103
- startDate.setDate(startDate.getDate() - 28);
104
-
105
- const results = await this.db
106
- .select({
107
- query: queryAnalytics.query,
108
- totalClicks: sql<number>`sum(${queryAnalytics.clicks})`,
109
- totalImpressions: sql<number>`sum(${queryAnalytics.impressions})`,
110
- avgPosition: sql<number>`avg(${queryAnalytics.position})`,
111
- })
112
- .from(queryAnalytics)
113
- .where(
114
- and(
115
- eq(queryAnalytics.siteId, siteId),
116
- eq(queryAnalytics.page, page),
117
- gte(queryAnalytics.date, formatDate(startDate)),
118
- lte(queryAnalytics.date, formatDate(endDate)),
119
- ),
120
- )
121
- .groupBy(queryAnalytics.query)
122
- .orderBy(desc(sql`sum(${queryAnalytics.impressions})`))
123
- .limit(limit);
124
-
125
- return results.map((r) => ({
126
- query: r.query,
127
- clicks: Number(r.totalClicks) || 0,
128
- impressions: Number(r.totalImpressions) || 0,
129
- position: Number(r.avgPosition) || 0,
130
- }));
131
- }
132
-
133
- async updateTaskStatus(
134
- taskId: string,
135
- status: "open" | "snoozed" | "in_progress" | "done" | "dismissed",
136
- options?: { snoozeDays?: number; notes?: string },
137
- ): Promise<void> {
138
- const patch: Record<string, unknown> = { status };
139
-
140
- if (status === "snoozed" && options?.snoozeDays) {
141
- const until = new Date();
142
- until.setDate(until.getDate() + options.snoozeDays);
143
- patch.snoozedUntil = until.toISOString();
144
- }
145
-
146
- if (status === "done" || status === "dismissed") {
147
- patch.resolvedAt = new Date().toISOString();
148
- }
149
-
150
- if (options?.notes) {
151
- patch.notes = options.notes;
152
- }
153
-
154
- await this.sanity.patch(taskId).set(patch).commit();
155
- }
156
- }
157
-
158
- function formatDate(date: Date): string {
159
- return date.toISOString().split("T")[0]!;
160
- }