@pagebridge/cli 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +2 -0
- package/LICENSE +21 -0
- package/README.md +164 -0
- package/dist/commands/diagnose.d.ts +3 -0
- package/dist/commands/diagnose.d.ts.map +1 -0
- package/dist/commands/diagnose.js +105 -0
- package/dist/commands/list-sites.d.ts +3 -0
- package/dist/commands/list-sites.d.ts.map +1 -0
- package/dist/commands/list-sites.js +38 -0
- package/dist/commands/sync.d.ts +3 -0
- package/dist/commands/sync.d.ts.map +1 -0
- package/dist/commands/sync.js +323 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +19 -0
- package/eslint.config.js +3 -0
- package/package.json +32 -0
- package/src/commands/diagnose.ts +129 -0
- package/src/commands/list-sites.ts +47 -0
- package/src/commands/sync.ts +406 -0
- package/src/index.ts +26 -0
- package/tsconfig.json +9 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Soma Somorjai
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# @pagebridge/cli
|
|
2
|
+
|
|
3
|
+
Command-line interface for PageBridge. Syncs Google Search Console data to Sanity CMS, detects content decay, and generates refresh tasks.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
The CLI is a private workspace package. Build and run it from the monorepo root:
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
# Build the CLI
|
|
11
|
+
pnpm build --filter=@pagebridge/cli
|
|
12
|
+
|
|
13
|
+
# Run commands
|
|
14
|
+
pnpm --filter @pagebridge/cli start <command>
|
|
15
|
+
|
|
16
|
+
# Or use the binary name directly after building
|
|
17
|
+
./apps/cli/dist/index.js <command>
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Commands
|
|
21
|
+
|
|
22
|
+
### sync
|
|
23
|
+
|
|
24
|
+
Sync Google Search Console data and optionally generate refresh tasks for decaying content.
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pnpm --filter @pagebridge/cli start sync --site sc-domain:example.com
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Options:
|
|
31
|
+
|
|
32
|
+
| Option | Description | Default |
|
|
33
|
+
|--------|-------------|---------|
|
|
34
|
+
| `--site <url>` | GSC site URL (required) | - |
|
|
35
|
+
| `--dry-run` | Preview changes without writing to Sanity | false |
|
|
36
|
+
| `--skip-tasks` | Only sync data, skip task generation | false |
|
|
37
|
+
| `--check-index` | Check Google index status for pages | false |
|
|
38
|
+
| `--quiet-period <days>` | Days to ignore recently published content | 45 |
|
|
39
|
+
|
|
40
|
+
Examples:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
# Basic sync
|
|
44
|
+
pnpm --filter @pagebridge/cli start sync --site sc-domain:example.com
|
|
45
|
+
|
|
46
|
+
# Preview what would be synced
|
|
47
|
+
pnpm --filter @pagebridge/cli start sync --site sc-domain:example.com --dry-run
|
|
48
|
+
|
|
49
|
+
# Sync data only, no refresh tasks
|
|
50
|
+
pnpm --filter @pagebridge/cli start sync --site sc-domain:example.com --skip-tasks
|
|
51
|
+
|
|
52
|
+
# Include index status checks
|
|
53
|
+
pnpm --filter @pagebridge/cli start sync --site sc-domain:example.com --check-index
|
|
54
|
+
|
|
55
|
+
# Use a shorter quiet period (30 days)
|
|
56
|
+
pnpm --filter @pagebridge/cli start sync --site sc-domain:example.com --quiet-period 30
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### list-sites
|
|
60
|
+
|
|
61
|
+
List all Google Search Console properties accessible by the service account.
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
pnpm --filter @pagebridge/cli start list-sites
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Output:
|
|
68
|
+
|
|
69
|
+
```
|
|
70
|
+
Available GSC Sites:
|
|
71
|
+
- sc-domain:example.com
|
|
72
|
+
- https://www.example.com/
|
|
73
|
+
- sc-domain:another-site.com
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Environment Variables
|
|
77
|
+
|
|
78
|
+
Create a `.env` file in the repository root with:
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
# Google Service Account (required)
|
|
82
|
+
# JSON stringified credentials from Google Cloud Console
|
|
83
|
+
GOOGLE_SERVICE_ACCOUNT='{"type":"service_account","project_id":"...","private_key":"..."}'
|
|
84
|
+
|
|
85
|
+
# PostgreSQL Database (required)
|
|
86
|
+
DATABASE_URL=postgresql://user:password@localhost:5432/content_keep
|
|
87
|
+
|
|
88
|
+
# Sanity Configuration (required)
|
|
89
|
+
SANITY_PROJECT_ID=your-project-id
|
|
90
|
+
SANITY_DATASET=production
|
|
91
|
+
SANITY_TOKEN=your-write-token
|
|
92
|
+
|
|
93
|
+
# Site URL for URL matching (required)
|
|
94
|
+
SITE_URL=https://example.com
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Workflow
|
|
98
|
+
|
|
99
|
+
The `sync` command performs these steps:
|
|
100
|
+
|
|
101
|
+
1. **Validate environment** - Checks all required variables are set
|
|
102
|
+
2. **Find or create gscSite** - Ensures a Sanity document exists for the site
|
|
103
|
+
3. **Fetch GSC data** - Retrieves 90 days of search analytics (skipping last 3 days for data stability)
|
|
104
|
+
4. **Store metrics** - Writes page and query metrics to PostgreSQL
|
|
105
|
+
5. **Match URLs** - Maps GSC pages to Sanity documents by slug
|
|
106
|
+
6. **Write snapshots** - Creates gscSnapshot documents in Sanity with metrics and top queries
|
|
107
|
+
7. **Check index status** (optional) - Queries Google URL Inspection API
|
|
108
|
+
8. **Detect decay** - Analyzes metrics for decay patterns
|
|
109
|
+
9. **Generate tasks** - Creates gscRefreshTask documents for pages showing decay
|
|
110
|
+
|
|
111
|
+
## Programmatic Usage
|
|
112
|
+
|
|
113
|
+
The CLI uses `@pagebridge/core` under the hood. For programmatic access:
|
|
114
|
+
|
|
115
|
+
```typescript
|
|
116
|
+
import { GSCClient, SyncEngine, DecayDetector, TaskGenerator } from '@pagebridge/core';
|
|
117
|
+
import { createDb } from '@pagebridge/db';
|
|
118
|
+
import { createClient } from '@sanity/client';
|
|
119
|
+
|
|
120
|
+
const gscClient = new GSCClient({
|
|
121
|
+
serviceAccountJson: process.env.GOOGLE_SERVICE_ACCOUNT,
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
const db = createDb(process.env.DATABASE_URL);
|
|
125
|
+
|
|
126
|
+
const sanityClient = createClient({
|
|
127
|
+
projectId: process.env.SANITY_PROJECT_ID,
|
|
128
|
+
dataset: process.env.SANITY_DATASET,
|
|
129
|
+
token: process.env.SANITY_TOKEN,
|
|
130
|
+
apiVersion: '2024-01-01',
|
|
131
|
+
useCdn: false,
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
const engine = new SyncEngine({ gscClient, db, sanityClient });
|
|
135
|
+
const result = await engine.sync({
|
|
136
|
+
siteUrl: 'sc-domain:example.com',
|
|
137
|
+
siteId: 'sanity-site-id',
|
|
138
|
+
});
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## Dependencies
|
|
142
|
+
|
|
143
|
+
- `@pagebridge/core` - Business logic
|
|
144
|
+
- `@pagebridge/db` - Database operations
|
|
145
|
+
- `@sanity/client` - Sanity API
|
|
146
|
+
- `commander` - CLI framework
|
|
147
|
+
- `dotenv` - Environment variable loading
|
|
148
|
+
|
|
149
|
+
## Development
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
# Watch mode
|
|
153
|
+
pnpm --filter @pagebridge/cli dev
|
|
154
|
+
|
|
155
|
+
# Build
|
|
156
|
+
pnpm --filter @pagebridge/cli build
|
|
157
|
+
|
|
158
|
+
# Type check
|
|
159
|
+
pnpm --filter @pagebridge/cli check-types
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## License
|
|
163
|
+
|
|
164
|
+
MIT
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"diagnose.d.ts","sourceRoot":"","sources":["../../src/commands/diagnose.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AASpC,eAAO,MAAM,eAAe,SAyFxB,CAAC"}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import { Command } from "commander";
|
|
2
|
+
import postgres from "postgres";
|
|
3
|
+
import { createDbWithClient, unmatchDiagnostics, eq, desc, } from "@pagebridge/db";
|
|
4
|
+
export const diagnoseCommand = new Command("diagnose")
|
|
5
|
+
.description("View diagnostics for unmatched URLs")
|
|
6
|
+
.requiredOption("--site <url>", "GSC site URL (e.g., sc-domain:example.com)")
|
|
7
|
+
.option("--reason <reason>", "Filter by unmatch reason")
|
|
8
|
+
.option("--limit <n>", "Limit number of results", "20")
|
|
9
|
+
.option("--json", "Output as JSON")
|
|
10
|
+
.action(async (options) => {
|
|
11
|
+
if (!process.env.DATABASE_URL) {
|
|
12
|
+
console.error("Missing required environment variable: DATABASE_URL");
|
|
13
|
+
process.exit(1);
|
|
14
|
+
}
|
|
15
|
+
const sql = postgres(process.env.DATABASE_URL);
|
|
16
|
+
const db = createDbWithClient(sql);
|
|
17
|
+
try {
|
|
18
|
+
const query = db
|
|
19
|
+
.select()
|
|
20
|
+
.from(unmatchDiagnostics)
|
|
21
|
+
.where(eq(unmatchDiagnostics.siteId, options.site))
|
|
22
|
+
.orderBy(desc(unmatchDiagnostics.lastSeenAt))
|
|
23
|
+
.limit(parseInt(options.limit));
|
|
24
|
+
const results = await query;
|
|
25
|
+
if (options.json) {
|
|
26
|
+
console.log(JSON.stringify(results, null, 2));
|
|
27
|
+
return;
|
|
28
|
+
}
|
|
29
|
+
if (results.length === 0) {
|
|
30
|
+
console.log(`No unmatched URLs found for ${options.site}`);
|
|
31
|
+
console.log(`Run 'sync --site ${options.site}' first to generate diagnostics.`);
|
|
32
|
+
return;
|
|
33
|
+
}
|
|
34
|
+
// Group by reason
|
|
35
|
+
const byReason = new Map();
|
|
36
|
+
for (const r of results) {
|
|
37
|
+
const existing = byReason.get(r.unmatchReason) ?? [];
|
|
38
|
+
existing.push(r);
|
|
39
|
+
byReason.set(r.unmatchReason, existing);
|
|
40
|
+
}
|
|
41
|
+
console.log(`\nUnmatched URL Diagnostics for ${options.site}\n`);
|
|
42
|
+
console.log(`Total: ${results.length} unmatched URLs\n`);
|
|
43
|
+
for (const [reason, items] of byReason) {
|
|
44
|
+
console.log(`${getReasonEmoji(reason)} ${getReasonDescription(reason)} (${items.length}):`);
|
|
45
|
+
console.log();
|
|
46
|
+
for (const item of items) {
|
|
47
|
+
console.log(` ${item.gscUrl}`);
|
|
48
|
+
if (item.extractedSlug) {
|
|
49
|
+
console.log(` Extracted slug: "${item.extractedSlug}"`);
|
|
50
|
+
}
|
|
51
|
+
if (item.similarSlugs) {
|
|
52
|
+
try {
|
|
53
|
+
const similar = JSON.parse(item.similarSlugs);
|
|
54
|
+
if (similar.length > 0) {
|
|
55
|
+
console.log(` Similar slugs in Sanity:`);
|
|
56
|
+
for (const s of similar) {
|
|
57
|
+
console.log(` - ${s}`);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
catch {
|
|
62
|
+
// Ignore parse errors
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
console.log();
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
console.log(`\nTo fix unmatched URLs:`);
|
|
69
|
+
console.log(` 1. Check if the Sanity document exists with the correct slug`);
|
|
70
|
+
console.log(` 2. Verify the document type is in the contentTypes list`);
|
|
71
|
+
console.log(` 3. Ensure the slug field name matches your configuration`);
|
|
72
|
+
console.log(` 4. If using a path prefix, verify it matches your URL structure`);
|
|
73
|
+
}
|
|
74
|
+
finally {
|
|
75
|
+
await sql.end();
|
|
76
|
+
}
|
|
77
|
+
});
|
|
78
|
+
function getReasonEmoji(reason) {
|
|
79
|
+
switch (reason) {
|
|
80
|
+
case "matched":
|
|
81
|
+
return "[OK]";
|
|
82
|
+
case "no_slug_extracted":
|
|
83
|
+
return "[SLUG]";
|
|
84
|
+
case "no_matching_document":
|
|
85
|
+
return "[DOC]";
|
|
86
|
+
case "outside_path_prefix":
|
|
87
|
+
return "[PREFIX]";
|
|
88
|
+
default:
|
|
89
|
+
return "[?]";
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
function getReasonDescription(reason) {
|
|
93
|
+
switch (reason) {
|
|
94
|
+
case "matched":
|
|
95
|
+
return "Successfully matched";
|
|
96
|
+
case "no_slug_extracted":
|
|
97
|
+
return "Could not extract slug from URL";
|
|
98
|
+
case "no_matching_document":
|
|
99
|
+
return "No Sanity document with matching slug";
|
|
100
|
+
case "outside_path_prefix":
|
|
101
|
+
return "URL outside configured path prefix";
|
|
102
|
+
default:
|
|
103
|
+
return `Unknown reason: ${reason}`;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"list-sites.d.ts","sourceRoot":"","sources":["../../src/commands/list-sites.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAGpC,eAAO,MAAM,gBAAgB,SA2CzB,CAAC"}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { Command } from "commander";
|
|
2
|
+
import { GSCClient } from "@pagebridge/core";
|
|
3
|
+
export const listSitesCommand = new Command("list-sites")
|
|
4
|
+
.description("List all sites the service account has access to")
|
|
5
|
+
.action(async () => {
|
|
6
|
+
if (!process.env.GOOGLE_SERVICE_ACCOUNT) {
|
|
7
|
+
console.error("❌ Missing GOOGLE_SERVICE_ACCOUNT environment variable");
|
|
8
|
+
process.exit(1);
|
|
9
|
+
}
|
|
10
|
+
let credentials;
|
|
11
|
+
try {
|
|
12
|
+
credentials = JSON.parse(process.env.GOOGLE_SERVICE_ACCOUNT);
|
|
13
|
+
}
|
|
14
|
+
catch {
|
|
15
|
+
console.error("❌ Failed to parse GOOGLE_SERVICE_ACCOUNT as JSON");
|
|
16
|
+
process.exit(1);
|
|
17
|
+
}
|
|
18
|
+
console.log(`🔑 Using service account: ${credentials.client_email}`);
|
|
19
|
+
const gsc = new GSCClient({ credentials });
|
|
20
|
+
try {
|
|
21
|
+
const sites = await gsc.listSites();
|
|
22
|
+
if (sites.length === 0) {
|
|
23
|
+
console.log("\n⚠️ No sites found. The service account has no access to any GSC properties.");
|
|
24
|
+
console.log("\nTo fix this:");
|
|
25
|
+
console.log("1. Go to Google Search Console → Settings → Users and permissions");
|
|
26
|
+
console.log(`2. Add user: ${credentials.client_email}`);
|
|
27
|
+
console.log("3. Set permission level to 'Full'");
|
|
28
|
+
}
|
|
29
|
+
else {
|
|
30
|
+
console.log(`\n✅ Found ${sites.length} site(s):\n`);
|
|
31
|
+
sites.forEach((site) => console.log(` ${site}`));
|
|
32
|
+
console.log('\nUse one of these exact values with: pnpm sync --site "<value>"');
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
catch (error) {
|
|
36
|
+
console.error("❌ Failed to list sites:", error);
|
|
37
|
+
}
|
|
38
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sync.d.ts","sourceRoot":"","sources":["../../src/commands/sync.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAgCpC,eAAO,MAAM,WAAW,SAmUpB,CAAC"}
|
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
import { Command } from "commander";
|
|
2
|
+
import { createClient as createSanityClient } from "@sanity/client";
|
|
3
|
+
import postgres from "postgres";
|
|
4
|
+
import { GSCClient, SyncEngine, DecayDetector, URLMatcher, TaskGenerator, } from "@pagebridge/core";
|
|
5
|
+
import { createDbWithClient, unmatchDiagnostics } from "@pagebridge/db";
|
|
6
|
+
function daysAgo(days) {
|
|
7
|
+
const date = new Date();
|
|
8
|
+
date.setDate(date.getDate() - days);
|
|
9
|
+
return date;
|
|
10
|
+
}
|
|
11
|
+
function createTimer(debug) {
|
|
12
|
+
return {
|
|
13
|
+
start: () => performance.now(),
|
|
14
|
+
end: (label, startTime) => {
|
|
15
|
+
if (debug) {
|
|
16
|
+
const elapsed = ((performance.now() - startTime) / 1000).toFixed(2);
|
|
17
|
+
console.log(`[DEBUG] ${label} completed in ${elapsed}s`);
|
|
18
|
+
}
|
|
19
|
+
},
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
export const syncCommand = new Command("sync")
|
|
23
|
+
.description("Sync GSC data and generate refresh tasks")
|
|
24
|
+
.requiredOption("--site <url>", "GSC site URL (e.g., sc-domain:example.com)")
|
|
25
|
+
.option("--dry-run", "Preview changes without writing to Sanity")
|
|
26
|
+
.option("--skip-tasks", "Only sync data, do not generate tasks")
|
|
27
|
+
.option("--check-index", "Check Google index status for matched pages")
|
|
28
|
+
.option("--quiet-period <days>", "Ignore pages published within N days", "45")
|
|
29
|
+
.option("--diagnose", "Show detailed diagnostics for unmatched URLs")
|
|
30
|
+
.option("--diagnose-url <url>", "Diagnose why a specific URL is not matching")
|
|
31
|
+
.option("--debug", "Enable debug logging with timing information")
|
|
32
|
+
.action(async (options) => {
|
|
33
|
+
const timer = createTimer(options.debug);
|
|
34
|
+
const syncStartTime = timer.start();
|
|
35
|
+
const requiredEnvVars = [
|
|
36
|
+
"GOOGLE_SERVICE_ACCOUNT",
|
|
37
|
+
"DATABASE_URL",
|
|
38
|
+
"SANITY_PROJECT_ID",
|
|
39
|
+
"SANITY_DATASET",
|
|
40
|
+
"SANITY_TOKEN",
|
|
41
|
+
"SITE_URL",
|
|
42
|
+
];
|
|
43
|
+
for (const envVar of requiredEnvVars) {
|
|
44
|
+
if (!process.env[envVar]) {
|
|
45
|
+
console.error(`Missing required environment variable: ${envVar}`);
|
|
46
|
+
process.exit(1);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
let t = timer.start();
|
|
50
|
+
const sanity = createSanityClient({
|
|
51
|
+
projectId: process.env.SANITY_PROJECT_ID,
|
|
52
|
+
dataset: process.env.SANITY_DATASET,
|
|
53
|
+
token: process.env.SANITY_TOKEN,
|
|
54
|
+
apiVersion: "2024-01-01",
|
|
55
|
+
useCdn: false,
|
|
56
|
+
});
|
|
57
|
+
const sql = postgres(process.env.DATABASE_URL);
|
|
58
|
+
const db = createDbWithClient(sql);
|
|
59
|
+
const gsc = new GSCClient({
|
|
60
|
+
credentials: JSON.parse(process.env.GOOGLE_SERVICE_ACCOUNT),
|
|
61
|
+
});
|
|
62
|
+
timer.end("Client initialization", t);
|
|
63
|
+
console.log(`Starting sync for ${options.site}...`);
|
|
64
|
+
// Find or create the gscSite document in Sanity
|
|
65
|
+
t = timer.start();
|
|
66
|
+
let siteDoc = await sanity.fetch(`*[_type == "gscSite" && siteUrl == $siteUrl][0]{
|
|
67
|
+
_id,
|
|
68
|
+
pathPrefix,
|
|
69
|
+
contentTypes,
|
|
70
|
+
slugField
|
|
71
|
+
}`, { siteUrl: options.site });
|
|
72
|
+
if (!siteDoc) {
|
|
73
|
+
console.log(`Creating gscSite document for ${options.site}...`);
|
|
74
|
+
siteDoc = await sanity.create({
|
|
75
|
+
_type: "gscSite",
|
|
76
|
+
siteUrl: options.site,
|
|
77
|
+
enabled: true,
|
|
78
|
+
contentTypes: ["post", "page"],
|
|
79
|
+
slugField: "slug",
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
timer.end("Fetch gscSite document", t);
|
|
83
|
+
const siteId = siteDoc._id;
|
|
84
|
+
// Use configuration from gscSite document
|
|
85
|
+
const contentTypes = siteDoc.contentTypes ?? ["post", "page"];
|
|
86
|
+
const slugField = siteDoc.slugField ?? "slug";
|
|
87
|
+
const pathPrefix = siteDoc.pathPrefix ?? undefined;
|
|
88
|
+
console.log(`Configuration:`);
|
|
89
|
+
console.log(` Content types: ${contentTypes.join(", ")}`);
|
|
90
|
+
console.log(` Slug field: ${slugField}`);
|
|
91
|
+
console.log(` Path prefix: ${pathPrefix ?? "(none)"}`);
|
|
92
|
+
const syncEngine = new SyncEngine({ gsc, db, sanity });
|
|
93
|
+
const matcher = new URLMatcher(sanity, {
|
|
94
|
+
contentTypes,
|
|
95
|
+
slugField,
|
|
96
|
+
baseUrl: process.env.SITE_URL,
|
|
97
|
+
pathPrefix,
|
|
98
|
+
});
|
|
99
|
+
try {
|
|
100
|
+
t = timer.start();
|
|
101
|
+
const { pages, rowsProcessed } = await syncEngine.sync({
|
|
102
|
+
siteUrl: options.site,
|
|
103
|
+
startDate: daysAgo(90),
|
|
104
|
+
endDate: daysAgo(3),
|
|
105
|
+
});
|
|
106
|
+
timer.end("GSC data sync", t);
|
|
107
|
+
console.log(`Processed ${rowsProcessed} rows for ${pages.length} pages`);
|
|
108
|
+
t = timer.start();
|
|
109
|
+
const matches = await matcher.matchUrls(pages);
|
|
110
|
+
timer.end("URL matching", t);
|
|
111
|
+
const matched = matches.filter((m) => !!m.sanityId);
|
|
112
|
+
const unmatched = matches.filter((m) => !m.sanityId);
|
|
113
|
+
console.log(`Matched ${matched.length}/${pages.length} URLs to Sanity documents`);
|
|
114
|
+
// Store diagnostics for unmatched URLs
|
|
115
|
+
if (unmatched.length > 0) {
|
|
116
|
+
console.log(`${unmatched.length} unmatched URLs`);
|
|
117
|
+
// Store diagnostics in database
|
|
118
|
+
t = timer.start();
|
|
119
|
+
for (const u of unmatched) {
|
|
120
|
+
const diagId = `${options.site}:${u.gscUrl}`;
|
|
121
|
+
await db
|
|
122
|
+
.insert(unmatchDiagnostics)
|
|
123
|
+
.values({
|
|
124
|
+
id: diagId,
|
|
125
|
+
siteId: options.site,
|
|
126
|
+
gscUrl: u.gscUrl,
|
|
127
|
+
extractedSlug: u.extractedSlug ?? null,
|
|
128
|
+
unmatchReason: u.unmatchReason,
|
|
129
|
+
normalizedUrl: u.diagnostics?.normalizedUrl ?? null,
|
|
130
|
+
pathAfterPrefix: u.diagnostics?.pathAfterPrefix ?? null,
|
|
131
|
+
configuredPrefix: u.diagnostics?.configuredPrefix ?? null,
|
|
132
|
+
similarSlugs: u.diagnostics?.similarSlugs
|
|
133
|
+
? JSON.stringify(u.diagnostics.similarSlugs)
|
|
134
|
+
: null,
|
|
135
|
+
availableSlugsCount: u.diagnostics?.availableSlugsCount ?? null,
|
|
136
|
+
lastSeenAt: new Date(),
|
|
137
|
+
})
|
|
138
|
+
.onConflictDoUpdate({
|
|
139
|
+
target: unmatchDiagnostics.id,
|
|
140
|
+
set: {
|
|
141
|
+
extractedSlug: u.extractedSlug ?? null,
|
|
142
|
+
unmatchReason: u.unmatchReason,
|
|
143
|
+
normalizedUrl: u.diagnostics?.normalizedUrl ?? null,
|
|
144
|
+
pathAfterPrefix: u.diagnostics?.pathAfterPrefix ?? null,
|
|
145
|
+
configuredPrefix: u.diagnostics?.configuredPrefix ?? null,
|
|
146
|
+
similarSlugs: u.diagnostics?.similarSlugs
|
|
147
|
+
? JSON.stringify(u.diagnostics.similarSlugs)
|
|
148
|
+
: null,
|
|
149
|
+
availableSlugsCount: u.diagnostics?.availableSlugsCount ?? null,
|
|
150
|
+
lastSeenAt: new Date(),
|
|
151
|
+
},
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
// Update gscSite with unmatched count
|
|
155
|
+
await sanity
|
|
156
|
+
.patch(siteId)
|
|
157
|
+
.set({
|
|
158
|
+
unmatchedCount: unmatched.length,
|
|
159
|
+
lastDiagnosticsAt: new Date().toISOString(),
|
|
160
|
+
})
|
|
161
|
+
.commit();
|
|
162
|
+
timer.end("Store unmatched diagnostics", t);
|
|
163
|
+
// Show detailed diagnostics if --diagnose flag is set
|
|
164
|
+
if (options.diagnose) {
|
|
165
|
+
console.log(`\nUnmatched URL Diagnostics:\n`);
|
|
166
|
+
// Group by reason
|
|
167
|
+
const byReason = new Map();
|
|
168
|
+
for (const u of unmatched) {
|
|
169
|
+
const existing = byReason.get(u.unmatchReason) ?? [];
|
|
170
|
+
existing.push(u);
|
|
171
|
+
byReason.set(u.unmatchReason, existing);
|
|
172
|
+
}
|
|
173
|
+
for (const [reason, urls] of byReason) {
|
|
174
|
+
console.log(` ${getReasonEmoji(reason)} ${getReasonDescription(reason)} (${urls.length}):`);
|
|
175
|
+
const toShow = urls.slice(0, 5);
|
|
176
|
+
for (const u of toShow) {
|
|
177
|
+
console.log(` ${u.gscUrl}`);
|
|
178
|
+
if (u.extractedSlug) {
|
|
179
|
+
console.log(` Extracted slug: "${u.extractedSlug}"`);
|
|
180
|
+
}
|
|
181
|
+
if (u.diagnostics?.similarSlugs?.length) {
|
|
182
|
+
console.log(` Similar slugs in Sanity:`);
|
|
183
|
+
for (const similar of u.diagnostics.similarSlugs) {
|
|
184
|
+
console.log(` - ${similar}`);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
if (urls.length > 5) {
|
|
189
|
+
console.log(` ... and ${urls.length - 5} more`);
|
|
190
|
+
}
|
|
191
|
+
console.log();
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
else if (unmatched.length <= 10) {
|
|
195
|
+
unmatched.forEach((u) => console.log(` - ${u.gscUrl}`));
|
|
196
|
+
console.log(`\n Run with --diagnose for detailed diagnostics`);
|
|
197
|
+
}
|
|
198
|
+
else {
|
|
199
|
+
console.log(` Run with --diagnose to see detailed diagnostics`);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
// Handle --diagnose-url for a specific URL
|
|
203
|
+
if (options.diagnoseUrl) {
|
|
204
|
+
const targetUrl = options.diagnoseUrl;
|
|
205
|
+
const allUrls = [targetUrl];
|
|
206
|
+
const [result] = await matcher.matchUrls(allUrls);
|
|
207
|
+
console.log(`\nDiagnostics for: ${targetUrl}\n`);
|
|
208
|
+
if (result) {
|
|
209
|
+
console.log(` Matched: ${result.sanityId ? "Yes" : "No"}`);
|
|
210
|
+
console.log(` Reason: ${getReasonDescription(result.unmatchReason)}`);
|
|
211
|
+
if (result.extractedSlug) {
|
|
212
|
+
console.log(` Extracted slug: "${result.extractedSlug}"`);
|
|
213
|
+
}
|
|
214
|
+
if (result.matchedSlug) {
|
|
215
|
+
console.log(` Matched to Sanity slug: "${result.matchedSlug}"`);
|
|
216
|
+
}
|
|
217
|
+
if (result.diagnostics) {
|
|
218
|
+
console.log(` Normalized URL: ${result.diagnostics.normalizedUrl}`);
|
|
219
|
+
console.log(` Path after prefix: ${result.diagnostics.pathAfterPrefix}`);
|
|
220
|
+
console.log(` Configured prefix: ${result.diagnostics.configuredPrefix ?? "(none)"}`);
|
|
221
|
+
console.log(` Available Sanity slugs: ${result.diagnostics.availableSlugsCount}`);
|
|
222
|
+
if (result.diagnostics.similarSlugs?.length) {
|
|
223
|
+
console.log(` Similar slugs in Sanity:`);
|
|
224
|
+
for (const similar of result.diagnostics.similarSlugs) {
|
|
225
|
+
console.log(` - ${similar}`);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
// Check index status if requested
|
|
232
|
+
if (options.checkIndex && matched.length > 0) {
|
|
233
|
+
console.log(`\nChecking index status for ${matched.length} pages...`);
|
|
234
|
+
t = timer.start();
|
|
235
|
+
const matchedUrls = matched.map((m) => m.gscUrl);
|
|
236
|
+
const indexResult = await syncEngine.syncIndexStatus(options.site, matchedUrls);
|
|
237
|
+
timer.end("Index status check", t);
|
|
238
|
+
console.log(` Indexed: ${indexResult.indexed}, Not indexed: ${indexResult.notIndexed}, Skipped: ${indexResult.skipped}`);
|
|
239
|
+
}
|
|
240
|
+
if (!options.skipTasks) {
|
|
241
|
+
t = timer.start();
|
|
242
|
+
const publishedDates = await getPublishedDates(sanity, matched);
|
|
243
|
+
const detector = new DecayDetector(db);
|
|
244
|
+
const signals = await detector.detectDecay(options.site, publishedDates, {
|
|
245
|
+
enabled: true,
|
|
246
|
+
days: parseInt(options.quietPeriod),
|
|
247
|
+
});
|
|
248
|
+
timer.end("Decay detection", t);
|
|
249
|
+
console.log(`Detected ${signals.length} decay signals`);
|
|
250
|
+
if (options.dryRun) {
|
|
251
|
+
console.log("\nWould create the following tasks:");
|
|
252
|
+
signals.forEach((s) => {
|
|
253
|
+
console.log(` [${s.severity.toUpperCase()}] ${s.page}`);
|
|
254
|
+
console.log(` Reason: ${s.reason}`);
|
|
255
|
+
console.log(` Position: ${s.metrics.positionBefore} -> ${s.metrics.positionNow}`);
|
|
256
|
+
});
|
|
257
|
+
}
|
|
258
|
+
else {
|
|
259
|
+
t = timer.start();
|
|
260
|
+
const taskGenerator = new TaskGenerator(sanity);
|
|
261
|
+
const created = await taskGenerator.createTasks(siteId, signals, matches);
|
|
262
|
+
timer.end("Task generation", t);
|
|
263
|
+
console.log(`Created ${created} new refresh tasks`);
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
if (!options.dryRun) {
|
|
267
|
+
t = timer.start();
|
|
268
|
+
await syncEngine.writeSnapshots(siteId, matched);
|
|
269
|
+
timer.end("Write Sanity snapshots", t);
|
|
270
|
+
console.log(`Updated Sanity snapshots`);
|
|
271
|
+
}
|
|
272
|
+
timer.end("Total sync", syncStartTime);
|
|
273
|
+
console.log(`\nSync complete!`);
|
|
274
|
+
}
|
|
275
|
+
catch (error) {
|
|
276
|
+
console.error("Sync failed:", error);
|
|
277
|
+
process.exit(1);
|
|
278
|
+
}
|
|
279
|
+
finally {
|
|
280
|
+
await sql.end();
|
|
281
|
+
}
|
|
282
|
+
});
|
|
283
|
+
async function getPublishedDates(sanity, matches) {
|
|
284
|
+
const ids = matches.map((m) => m.sanityId).filter(Boolean);
|
|
285
|
+
const docs = await sanity.fetch(`*[_id in $ids]{ _id, _createdAt, publishedAt }`, { ids });
|
|
286
|
+
const map = new Map();
|
|
287
|
+
for (const doc of docs) {
|
|
288
|
+
const match = matches.find((m) => m.sanityId === doc._id);
|
|
289
|
+
if (match) {
|
|
290
|
+
const dateStr = doc.publishedAt ?? doc._createdAt;
|
|
291
|
+
map.set(match.gscUrl, new Date(dateStr));
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
return map;
|
|
295
|
+
}
|
|
296
|
+
function getReasonEmoji(reason) {
|
|
297
|
+
switch (reason) {
|
|
298
|
+
case "matched":
|
|
299
|
+
return "[OK]";
|
|
300
|
+
case "no_slug_extracted":
|
|
301
|
+
return "[SLUG]";
|
|
302
|
+
case "no_matching_document":
|
|
303
|
+
return "[DOC]";
|
|
304
|
+
case "outside_path_prefix":
|
|
305
|
+
return "[PREFIX]";
|
|
306
|
+
default:
|
|
307
|
+
return "[?]";
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
function getReasonDescription(reason) {
|
|
311
|
+
switch (reason) {
|
|
312
|
+
case "matched":
|
|
313
|
+
return "Successfully matched";
|
|
314
|
+
case "no_slug_extracted":
|
|
315
|
+
return "Could not extract slug from URL";
|
|
316
|
+
case "no_matching_document":
|
|
317
|
+
return "No Sanity document with matching slug";
|
|
318
|
+
case "outside_path_prefix":
|
|
319
|
+
return "URL outside configured path prefix";
|
|
320
|
+
default:
|
|
321
|
+
return "Unknown reason";
|
|
322
|
+
}
|
|
323
|
+
}
|
package/dist/index.d.ts
ADDED