@pagebridge/cli 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2 @@
1
+ [?9001h[?1004h[?25l> @pagebridge/cli@0.0.1 build F:\Code\pagebridge\oss\apps\cli
2
+ > tsc]0;C:\WINDOWS\system32\cmd.exe[?25h[?9001l[?1004l
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Soma Somorjai
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,164 @@
1
+ # @pagebridge/cli
2
+
3
+ Command-line interface for PageBridge. Syncs Google Search Console data to Sanity CMS, detects content decay, and generates refresh tasks.
4
+
5
+ ## Installation
6
+
7
+ The CLI is a private workspace package. Build and run it from the monorepo root:
8
+
9
+ ```bash
10
+ # Build the CLI
11
+ pnpm build --filter=@pagebridge/cli
12
+
13
+ # Run commands
14
+ pnpm --filter @pagebridge/cli start <command>
15
+
16
+ # Or use the binary name directly after building
17
+ ./apps/cli/dist/index.js <command>
18
+ ```
19
+
20
+ ## Commands
21
+
22
+ ### sync
23
+
24
+ Sync Google Search Console data and optionally generate refresh tasks for decaying content.
25
+
26
+ ```bash
27
+ pnpm --filter @pagebridge/cli start sync --site sc-domain:example.com
28
+ ```
29
+
30
+ Options:
31
+
32
+ | Option | Description | Default |
33
+ |--------|-------------|---------|
34
+ | `--site <url>` | GSC site URL (required) | - |
35
+ | `--dry-run` | Preview changes without writing to Sanity | false |
36
+ | `--skip-tasks` | Only sync data, skip task generation | false |
37
+ | `--check-index` | Check Google index status for pages | false |
38
+ | `--quiet-period <days>` | Days to ignore recently published content | 45 |
39
+
40
+ Examples:
41
+
42
+ ```bash
43
+ # Basic sync
44
+ pnpm --filter @pagebridge/cli start sync --site sc-domain:example.com
45
+
46
+ # Preview what would be synced
47
+ pnpm --filter @pagebridge/cli start sync --site sc-domain:example.com --dry-run
48
+
49
+ # Sync data only, no refresh tasks
50
+ pnpm --filter @pagebridge/cli start sync --site sc-domain:example.com --skip-tasks
51
+
52
+ # Include index status checks
53
+ pnpm --filter @pagebridge/cli start sync --site sc-domain:example.com --check-index
54
+
55
+ # Use a shorter quiet period (30 days)
56
+ pnpm --filter @pagebridge/cli start sync --site sc-domain:example.com --quiet-period 30
57
+ ```
58
+
59
+ ### list-sites
60
+
61
+ List all Google Search Console properties accessible by the service account.
62
+
63
+ ```bash
64
+ pnpm --filter @pagebridge/cli start list-sites
65
+ ```
66
+
67
+ Output:
68
+
69
+ ```
70
+ Available GSC Sites:
71
+ - sc-domain:example.com
72
+ - https://www.example.com/
73
+ - sc-domain:another-site.com
74
+ ```
75
+
76
+ ## Environment Variables
77
+
78
+ Create a `.env` file in the repository root with:
79
+
80
+ ```bash
81
+ # Google Service Account (required)
82
+ # JSON stringified credentials from Google Cloud Console
83
+ GOOGLE_SERVICE_ACCOUNT='{"type":"service_account","project_id":"...","private_key":"..."}'
84
+
85
+ # PostgreSQL Database (required)
86
+ DATABASE_URL=postgresql://user:password@localhost:5432/content_keep
87
+
88
+ # Sanity Configuration (required)
89
+ SANITY_PROJECT_ID=your-project-id
90
+ SANITY_DATASET=production
91
+ SANITY_TOKEN=your-write-token
92
+
93
+ # Site URL for URL matching (required)
94
+ SITE_URL=https://example.com
95
+ ```
96
+
97
+ ## Workflow
98
+
99
+ The `sync` command performs these steps:
100
+
101
+ 1. **Validate environment** - Checks all required variables are set
102
+ 2. **Find or create gscSite** - Ensures a Sanity document exists for the site
103
+ 3. **Fetch GSC data** - Retrieves 90 days of search analytics (skipping last 3 days for data stability)
104
+ 4. **Store metrics** - Writes page and query metrics to PostgreSQL
105
+ 5. **Match URLs** - Maps GSC pages to Sanity documents by slug
106
+ 6. **Write snapshots** - Creates gscSnapshot documents in Sanity with metrics and top queries
107
+ 7. **Check index status** (optional) - Queries Google URL Inspection API
108
+ 8. **Detect decay** - Analyzes metrics for decay patterns
109
+ 9. **Generate tasks** - Creates gscRefreshTask documents for pages showing decay
110
+
111
+ ## Programmatic Usage
112
+
113
+ The CLI uses `@pagebridge/core` under the hood. For programmatic access:
114
+
115
+ ```typescript
116
+ import { GSCClient, SyncEngine, DecayDetector, TaskGenerator } from '@pagebridge/core';
117
+ import { createDb } from '@pagebridge/db';
118
+ import { createClient } from '@sanity/client';
119
+
120
+ const gscClient = new GSCClient({
121
+ serviceAccountJson: process.env.GOOGLE_SERVICE_ACCOUNT,
122
+ });
123
+
124
+ const db = createDb(process.env.DATABASE_URL);
125
+
126
+ const sanityClient = createClient({
127
+ projectId: process.env.SANITY_PROJECT_ID,
128
+ dataset: process.env.SANITY_DATASET,
129
+ token: process.env.SANITY_TOKEN,
130
+ apiVersion: '2024-01-01',
131
+ useCdn: false,
132
+ });
133
+
134
+ const engine = new SyncEngine({ gscClient, db, sanityClient });
135
+ const result = await engine.sync({
136
+ siteUrl: 'sc-domain:example.com',
137
+ siteId: 'sanity-site-id',
138
+ });
139
+ ```
140
+
141
+ ## Dependencies
142
+
143
+ - `@pagebridge/core` - Business logic
144
+ - `@pagebridge/db` - Database operations
145
+ - `@sanity/client` - Sanity API
146
+ - `commander` - CLI framework
147
+ - `dotenv` - Environment variable loading
148
+
149
+ ## Development
150
+
151
+ ```bash
152
+ # Watch mode
153
+ pnpm --filter @pagebridge/cli dev
154
+
155
+ # Build
156
+ pnpm --filter @pagebridge/cli build
157
+
158
+ # Type check
159
+ pnpm --filter @pagebridge/cli check-types
160
+ ```
161
+
162
+ ## License
163
+
164
+ MIT
@@ -0,0 +1,3 @@
1
+ import { Command } from "commander";
2
+ export declare const diagnoseCommand: Command;
3
+ //# sourceMappingURL=diagnose.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"diagnose.d.ts","sourceRoot":"","sources":["../../src/commands/diagnose.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AASpC,eAAO,MAAM,eAAe,SAyFxB,CAAC"}
@@ -0,0 +1,105 @@
1
+ import { Command } from "commander";
2
+ import postgres from "postgres";
3
+ import { createDbWithClient, unmatchDiagnostics, eq, desc, } from "@pagebridge/db";
4
+ export const diagnoseCommand = new Command("diagnose")
5
+ .description("View diagnostics for unmatched URLs")
6
+ .requiredOption("--site <url>", "GSC site URL (e.g., sc-domain:example.com)")
7
+ .option("--reason <reason>", "Filter by unmatch reason")
8
+ .option("--limit <n>", "Limit number of results", "20")
9
+ .option("--json", "Output as JSON")
10
+ .action(async (options) => {
11
+ if (!process.env.DATABASE_URL) {
12
+ console.error("Missing required environment variable: DATABASE_URL");
13
+ process.exit(1);
14
+ }
15
+ const sql = postgres(process.env.DATABASE_URL);
16
+ const db = createDbWithClient(sql);
17
+ try {
18
+ const query = db
19
+ .select()
20
+ .from(unmatchDiagnostics)
21
+ .where(eq(unmatchDiagnostics.siteId, options.site))
22
+ .orderBy(desc(unmatchDiagnostics.lastSeenAt))
23
+ .limit(parseInt(options.limit));
24
+ const results = await query;
25
+ if (options.json) {
26
+ console.log(JSON.stringify(results, null, 2));
27
+ return;
28
+ }
29
+ if (results.length === 0) {
30
+ console.log(`No unmatched URLs found for ${options.site}`);
31
+ console.log(`Run 'sync --site ${options.site}' first to generate diagnostics.`);
32
+ return;
33
+ }
34
+ // Group by reason
35
+ const byReason = new Map();
36
+ for (const r of results) {
37
+ const existing = byReason.get(r.unmatchReason) ?? [];
38
+ existing.push(r);
39
+ byReason.set(r.unmatchReason, existing);
40
+ }
41
+ console.log(`\nUnmatched URL Diagnostics for ${options.site}\n`);
42
+ console.log(`Total: ${results.length} unmatched URLs\n`);
43
+ for (const [reason, items] of byReason) {
44
+ console.log(`${getReasonEmoji(reason)} ${getReasonDescription(reason)} (${items.length}):`);
45
+ console.log();
46
+ for (const item of items) {
47
+ console.log(` ${item.gscUrl}`);
48
+ if (item.extractedSlug) {
49
+ console.log(` Extracted slug: "${item.extractedSlug}"`);
50
+ }
51
+ if (item.similarSlugs) {
52
+ try {
53
+ const similar = JSON.parse(item.similarSlugs);
54
+ if (similar.length > 0) {
55
+ console.log(` Similar slugs in Sanity:`);
56
+ for (const s of similar) {
57
+ console.log(` - ${s}`);
58
+ }
59
+ }
60
+ }
61
+ catch {
62
+ // Ignore parse errors
63
+ }
64
+ }
65
+ console.log();
66
+ }
67
+ }
68
+ console.log(`\nTo fix unmatched URLs:`);
69
+ console.log(` 1. Check if the Sanity document exists with the correct slug`);
70
+ console.log(` 2. Verify the document type is in the contentTypes list`);
71
+ console.log(` 3. Ensure the slug field name matches your configuration`);
72
+ console.log(` 4. If using a path prefix, verify it matches your URL structure`);
73
+ }
74
+ finally {
75
+ await sql.end();
76
+ }
77
+ });
78
+ function getReasonEmoji(reason) {
79
+ switch (reason) {
80
+ case "matched":
81
+ return "[OK]";
82
+ case "no_slug_extracted":
83
+ return "[SLUG]";
84
+ case "no_matching_document":
85
+ return "[DOC]";
86
+ case "outside_path_prefix":
87
+ return "[PREFIX]";
88
+ default:
89
+ return "[?]";
90
+ }
91
+ }
92
+ function getReasonDescription(reason) {
93
+ switch (reason) {
94
+ case "matched":
95
+ return "Successfully matched";
96
+ case "no_slug_extracted":
97
+ return "Could not extract slug from URL";
98
+ case "no_matching_document":
99
+ return "No Sanity document with matching slug";
100
+ case "outside_path_prefix":
101
+ return "URL outside configured path prefix";
102
+ default:
103
+ return `Unknown reason: ${reason}`;
104
+ }
105
+ }
@@ -0,0 +1,3 @@
1
+ import { Command } from "commander";
2
+ export declare const listSitesCommand: Command;
3
+ //# sourceMappingURL=list-sites.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"list-sites.d.ts","sourceRoot":"","sources":["../../src/commands/list-sites.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAGpC,eAAO,MAAM,gBAAgB,SA2CzB,CAAC"}
@@ -0,0 +1,38 @@
1
+ import { Command } from "commander";
2
+ import { GSCClient } from "@pagebridge/core";
3
+ export const listSitesCommand = new Command("list-sites")
4
+ .description("List all sites the service account has access to")
5
+ .action(async () => {
6
+ if (!process.env.GOOGLE_SERVICE_ACCOUNT) {
7
+ console.error("❌ Missing GOOGLE_SERVICE_ACCOUNT environment variable");
8
+ process.exit(1);
9
+ }
10
+ let credentials;
11
+ try {
12
+ credentials = JSON.parse(process.env.GOOGLE_SERVICE_ACCOUNT);
13
+ }
14
+ catch {
15
+ console.error("❌ Failed to parse GOOGLE_SERVICE_ACCOUNT as JSON");
16
+ process.exit(1);
17
+ }
18
+ console.log(`🔑 Using service account: ${credentials.client_email}`);
19
+ const gsc = new GSCClient({ credentials });
20
+ try {
21
+ const sites = await gsc.listSites();
22
+ if (sites.length === 0) {
23
+ console.log("\n⚠️ No sites found. The service account has no access to any GSC properties.");
24
+ console.log("\nTo fix this:");
25
+ console.log("1. Go to Google Search Console → Settings → Users and permissions");
26
+ console.log(`2. Add user: ${credentials.client_email}`);
27
+ console.log("3. Set permission level to 'Full'");
28
+ }
29
+ else {
30
+ console.log(`\n✅ Found ${sites.length} site(s):\n`);
31
+ sites.forEach((site) => console.log(` ${site}`));
32
+ console.log('\nUse one of these exact values with: pnpm sync --site "<value>"');
33
+ }
34
+ }
35
+ catch (error) {
36
+ console.error("❌ Failed to list sites:", error);
37
+ }
38
+ });
@@ -0,0 +1,3 @@
1
+ import { Command } from "commander";
2
+ export declare const syncCommand: Command;
3
+ //# sourceMappingURL=sync.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sync.d.ts","sourceRoot":"","sources":["../../src/commands/sync.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAgCpC,eAAO,MAAM,WAAW,SAmUpB,CAAC"}
@@ -0,0 +1,323 @@
1
+ import { Command } from "commander";
2
+ import { createClient as createSanityClient } from "@sanity/client";
3
+ import postgres from "postgres";
4
+ import { GSCClient, SyncEngine, DecayDetector, URLMatcher, TaskGenerator, } from "@pagebridge/core";
5
+ import { createDbWithClient, unmatchDiagnostics } from "@pagebridge/db";
6
+ function daysAgo(days) {
7
+ const date = new Date();
8
+ date.setDate(date.getDate() - days);
9
+ return date;
10
+ }
11
+ function createTimer(debug) {
12
+ return {
13
+ start: () => performance.now(),
14
+ end: (label, startTime) => {
15
+ if (debug) {
16
+ const elapsed = ((performance.now() - startTime) / 1000).toFixed(2);
17
+ console.log(`[DEBUG] ${label} completed in ${elapsed}s`);
18
+ }
19
+ },
20
+ };
21
+ }
22
+ export const syncCommand = new Command("sync")
23
+ .description("Sync GSC data and generate refresh tasks")
24
+ .requiredOption("--site <url>", "GSC site URL (e.g., sc-domain:example.com)")
25
+ .option("--dry-run", "Preview changes without writing to Sanity")
26
+ .option("--skip-tasks", "Only sync data, do not generate tasks")
27
+ .option("--check-index", "Check Google index status for matched pages")
28
+ .option("--quiet-period <days>", "Ignore pages published within N days", "45")
29
+ .option("--diagnose", "Show detailed diagnostics for unmatched URLs")
30
+ .option("--diagnose-url <url>", "Diagnose why a specific URL is not matching")
31
+ .option("--debug", "Enable debug logging with timing information")
32
+ .action(async (options) => {
33
+ const timer = createTimer(options.debug);
34
+ const syncStartTime = timer.start();
35
+ const requiredEnvVars = [
36
+ "GOOGLE_SERVICE_ACCOUNT",
37
+ "DATABASE_URL",
38
+ "SANITY_PROJECT_ID",
39
+ "SANITY_DATASET",
40
+ "SANITY_TOKEN",
41
+ "SITE_URL",
42
+ ];
43
+ for (const envVar of requiredEnvVars) {
44
+ if (!process.env[envVar]) {
45
+ console.error(`Missing required environment variable: ${envVar}`);
46
+ process.exit(1);
47
+ }
48
+ }
49
+ let t = timer.start();
50
+ const sanity = createSanityClient({
51
+ projectId: process.env.SANITY_PROJECT_ID,
52
+ dataset: process.env.SANITY_DATASET,
53
+ token: process.env.SANITY_TOKEN,
54
+ apiVersion: "2024-01-01",
55
+ useCdn: false,
56
+ });
57
+ const sql = postgres(process.env.DATABASE_URL);
58
+ const db = createDbWithClient(sql);
59
+ const gsc = new GSCClient({
60
+ credentials: JSON.parse(process.env.GOOGLE_SERVICE_ACCOUNT),
61
+ });
62
+ timer.end("Client initialization", t);
63
+ console.log(`Starting sync for ${options.site}...`);
64
+ // Find or create the gscSite document in Sanity
65
+ t = timer.start();
66
+ let siteDoc = await sanity.fetch(`*[_type == "gscSite" && siteUrl == $siteUrl][0]{
67
+ _id,
68
+ pathPrefix,
69
+ contentTypes,
70
+ slugField
71
+ }`, { siteUrl: options.site });
72
+ if (!siteDoc) {
73
+ console.log(`Creating gscSite document for ${options.site}...`);
74
+ siteDoc = await sanity.create({
75
+ _type: "gscSite",
76
+ siteUrl: options.site,
77
+ enabled: true,
78
+ contentTypes: ["post", "page"],
79
+ slugField: "slug",
80
+ });
81
+ }
82
+ timer.end("Fetch gscSite document", t);
83
+ const siteId = siteDoc._id;
84
+ // Use configuration from gscSite document
85
+ const contentTypes = siteDoc.contentTypes ?? ["post", "page"];
86
+ const slugField = siteDoc.slugField ?? "slug";
87
+ const pathPrefix = siteDoc.pathPrefix ?? undefined;
88
+ console.log(`Configuration:`);
89
+ console.log(` Content types: ${contentTypes.join(", ")}`);
90
+ console.log(` Slug field: ${slugField}`);
91
+ console.log(` Path prefix: ${pathPrefix ?? "(none)"}`);
92
+ const syncEngine = new SyncEngine({ gsc, db, sanity });
93
+ const matcher = new URLMatcher(sanity, {
94
+ contentTypes,
95
+ slugField,
96
+ baseUrl: process.env.SITE_URL,
97
+ pathPrefix,
98
+ });
99
+ try {
100
+ t = timer.start();
101
+ const { pages, rowsProcessed } = await syncEngine.sync({
102
+ siteUrl: options.site,
103
+ startDate: daysAgo(90),
104
+ endDate: daysAgo(3),
105
+ });
106
+ timer.end("GSC data sync", t);
107
+ console.log(`Processed ${rowsProcessed} rows for ${pages.length} pages`);
108
+ t = timer.start();
109
+ const matches = await matcher.matchUrls(pages);
110
+ timer.end("URL matching", t);
111
+ const matched = matches.filter((m) => !!m.sanityId);
112
+ const unmatched = matches.filter((m) => !m.sanityId);
113
+ console.log(`Matched ${matched.length}/${pages.length} URLs to Sanity documents`);
114
+ // Store diagnostics for unmatched URLs
115
+ if (unmatched.length > 0) {
116
+ console.log(`${unmatched.length} unmatched URLs`);
117
+ // Store diagnostics in database
118
+ t = timer.start();
119
+ for (const u of unmatched) {
120
+ const diagId = `${options.site}:${u.gscUrl}`;
121
+ await db
122
+ .insert(unmatchDiagnostics)
123
+ .values({
124
+ id: diagId,
125
+ siteId: options.site,
126
+ gscUrl: u.gscUrl,
127
+ extractedSlug: u.extractedSlug ?? null,
128
+ unmatchReason: u.unmatchReason,
129
+ normalizedUrl: u.diagnostics?.normalizedUrl ?? null,
130
+ pathAfterPrefix: u.diagnostics?.pathAfterPrefix ?? null,
131
+ configuredPrefix: u.diagnostics?.configuredPrefix ?? null,
132
+ similarSlugs: u.diagnostics?.similarSlugs
133
+ ? JSON.stringify(u.diagnostics.similarSlugs)
134
+ : null,
135
+ availableSlugsCount: u.diagnostics?.availableSlugsCount ?? null,
136
+ lastSeenAt: new Date(),
137
+ })
138
+ .onConflictDoUpdate({
139
+ target: unmatchDiagnostics.id,
140
+ set: {
141
+ extractedSlug: u.extractedSlug ?? null,
142
+ unmatchReason: u.unmatchReason,
143
+ normalizedUrl: u.diagnostics?.normalizedUrl ?? null,
144
+ pathAfterPrefix: u.diagnostics?.pathAfterPrefix ?? null,
145
+ configuredPrefix: u.diagnostics?.configuredPrefix ?? null,
146
+ similarSlugs: u.diagnostics?.similarSlugs
147
+ ? JSON.stringify(u.diagnostics.similarSlugs)
148
+ : null,
149
+ availableSlugsCount: u.diagnostics?.availableSlugsCount ?? null,
150
+ lastSeenAt: new Date(),
151
+ },
152
+ });
153
+ }
154
+ // Update gscSite with unmatched count
155
+ await sanity
156
+ .patch(siteId)
157
+ .set({
158
+ unmatchedCount: unmatched.length,
159
+ lastDiagnosticsAt: new Date().toISOString(),
160
+ })
161
+ .commit();
162
+ timer.end("Store unmatched diagnostics", t);
163
+ // Show detailed diagnostics if --diagnose flag is set
164
+ if (options.diagnose) {
165
+ console.log(`\nUnmatched URL Diagnostics:\n`);
166
+ // Group by reason
167
+ const byReason = new Map();
168
+ for (const u of unmatched) {
169
+ const existing = byReason.get(u.unmatchReason) ?? [];
170
+ existing.push(u);
171
+ byReason.set(u.unmatchReason, existing);
172
+ }
173
+ for (const [reason, urls] of byReason) {
174
+ console.log(` ${getReasonEmoji(reason)} ${getReasonDescription(reason)} (${urls.length}):`);
175
+ const toShow = urls.slice(0, 5);
176
+ for (const u of toShow) {
177
+ console.log(` ${u.gscUrl}`);
178
+ if (u.extractedSlug) {
179
+ console.log(` Extracted slug: "${u.extractedSlug}"`);
180
+ }
181
+ if (u.diagnostics?.similarSlugs?.length) {
182
+ console.log(` Similar slugs in Sanity:`);
183
+ for (const similar of u.diagnostics.similarSlugs) {
184
+ console.log(` - ${similar}`);
185
+ }
186
+ }
187
+ }
188
+ if (urls.length > 5) {
189
+ console.log(` ... and ${urls.length - 5} more`);
190
+ }
191
+ console.log();
192
+ }
193
+ }
194
+ else if (unmatched.length <= 10) {
195
+ unmatched.forEach((u) => console.log(` - ${u.gscUrl}`));
196
+ console.log(`\n Run with --diagnose for detailed diagnostics`);
197
+ }
198
+ else {
199
+ console.log(` Run with --diagnose to see detailed diagnostics`);
200
+ }
201
+ }
202
+ // Handle --diagnose-url for a specific URL
203
+ if (options.diagnoseUrl) {
204
+ const targetUrl = options.diagnoseUrl;
205
+ const allUrls = [targetUrl];
206
+ const [result] = await matcher.matchUrls(allUrls);
207
+ console.log(`\nDiagnostics for: ${targetUrl}\n`);
208
+ if (result) {
209
+ console.log(` Matched: ${result.sanityId ? "Yes" : "No"}`);
210
+ console.log(` Reason: ${getReasonDescription(result.unmatchReason)}`);
211
+ if (result.extractedSlug) {
212
+ console.log(` Extracted slug: "${result.extractedSlug}"`);
213
+ }
214
+ if (result.matchedSlug) {
215
+ console.log(` Matched to Sanity slug: "${result.matchedSlug}"`);
216
+ }
217
+ if (result.diagnostics) {
218
+ console.log(` Normalized URL: ${result.diagnostics.normalizedUrl}`);
219
+ console.log(` Path after prefix: ${result.diagnostics.pathAfterPrefix}`);
220
+ console.log(` Configured prefix: ${result.diagnostics.configuredPrefix ?? "(none)"}`);
221
+ console.log(` Available Sanity slugs: ${result.diagnostics.availableSlugsCount}`);
222
+ if (result.diagnostics.similarSlugs?.length) {
223
+ console.log(` Similar slugs in Sanity:`);
224
+ for (const similar of result.diagnostics.similarSlugs) {
225
+ console.log(` - ${similar}`);
226
+ }
227
+ }
228
+ }
229
+ }
230
+ }
231
+ // Check index status if requested
232
+ if (options.checkIndex && matched.length > 0) {
233
+ console.log(`\nChecking index status for ${matched.length} pages...`);
234
+ t = timer.start();
235
+ const matchedUrls = matched.map((m) => m.gscUrl);
236
+ const indexResult = await syncEngine.syncIndexStatus(options.site, matchedUrls);
237
+ timer.end("Index status check", t);
238
+ console.log(` Indexed: ${indexResult.indexed}, Not indexed: ${indexResult.notIndexed}, Skipped: ${indexResult.skipped}`);
239
+ }
240
+ if (!options.skipTasks) {
241
+ t = timer.start();
242
+ const publishedDates = await getPublishedDates(sanity, matched);
243
+ const detector = new DecayDetector(db);
244
+ const signals = await detector.detectDecay(options.site, publishedDates, {
245
+ enabled: true,
246
+ days: parseInt(options.quietPeriod),
247
+ });
248
+ timer.end("Decay detection", t);
249
+ console.log(`Detected ${signals.length} decay signals`);
250
+ if (options.dryRun) {
251
+ console.log("\nWould create the following tasks:");
252
+ signals.forEach((s) => {
253
+ console.log(` [${s.severity.toUpperCase()}] ${s.page}`);
254
+ console.log(` Reason: ${s.reason}`);
255
+ console.log(` Position: ${s.metrics.positionBefore} -> ${s.metrics.positionNow}`);
256
+ });
257
+ }
258
+ else {
259
+ t = timer.start();
260
+ const taskGenerator = new TaskGenerator(sanity);
261
+ const created = await taskGenerator.createTasks(siteId, signals, matches);
262
+ timer.end("Task generation", t);
263
+ console.log(`Created ${created} new refresh tasks`);
264
+ }
265
+ }
266
+ if (!options.dryRun) {
267
+ t = timer.start();
268
+ await syncEngine.writeSnapshots(siteId, matched);
269
+ timer.end("Write Sanity snapshots", t);
270
+ console.log(`Updated Sanity snapshots`);
271
+ }
272
+ timer.end("Total sync", syncStartTime);
273
+ console.log(`\nSync complete!`);
274
+ }
275
+ catch (error) {
276
+ console.error("Sync failed:", error);
277
+ process.exit(1);
278
+ }
279
+ finally {
280
+ await sql.end();
281
+ }
282
+ });
283
+ async function getPublishedDates(sanity, matches) {
284
+ const ids = matches.map((m) => m.sanityId).filter(Boolean);
285
+ const docs = await sanity.fetch(`*[_id in $ids]{ _id, _createdAt, publishedAt }`, { ids });
286
+ const map = new Map();
287
+ for (const doc of docs) {
288
+ const match = matches.find((m) => m.sanityId === doc._id);
289
+ if (match) {
290
+ const dateStr = doc.publishedAt ?? doc._createdAt;
291
+ map.set(match.gscUrl, new Date(dateStr));
292
+ }
293
+ }
294
+ return map;
295
+ }
296
+ function getReasonEmoji(reason) {
297
+ switch (reason) {
298
+ case "matched":
299
+ return "[OK]";
300
+ case "no_slug_extracted":
301
+ return "[SLUG]";
302
+ case "no_matching_document":
303
+ return "[DOC]";
304
+ case "outside_path_prefix":
305
+ return "[PREFIX]";
306
+ default:
307
+ return "[?]";
308
+ }
309
+ }
310
+ function getReasonDescription(reason) {
311
+ switch (reason) {
312
+ case "matched":
313
+ return "Successfully matched";
314
+ case "no_slug_extracted":
315
+ return "Could not extract slug from URL";
316
+ case "no_matching_document":
317
+ return "No Sanity document with matching slug";
318
+ case "outside_path_prefix":
319
+ return "URL outside configured path prefix";
320
+ default:
321
+ return "Unknown reason";
322
+ }
323
+ }
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ export {};
3
+ //# sourceMappingURL=index.d.ts.map