@gscdump/cli 0.3.1 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +82 -56
- package/dist/index.mjs +2225 -1079
- package/package.json +12 -9
package/dist/index.mjs
CHANGED
|
@@ -1,43 +1,82 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import "node:module";
|
|
3
2
|
import process from "node:process";
|
|
4
3
|
import { defineCommand, runMain } from "citty";
|
|
5
|
-
import
|
|
4
|
+
import { defaultAnalyzerRegistry } from "@gscdump/analysis/registry";
|
|
5
|
+
import { AnalyzerCapabilityError, analyzeFromSource, createEngineQuerySource, createGscApiQuerySource } from "@gscdump/analysis";
|
|
6
|
+
import { cancel, isCancel, multiselect, select, text } from "@clack/prompts";
|
|
7
|
+
import { daysAgo, fetchSitemap, formatErrorForCli, getDateRange, googleSearchConsole, progressBar } from "gscdump";
|
|
8
|
+
import fs, { readFile, rm } from "node:fs/promises";
|
|
6
9
|
import { createServer } from "node:http";
|
|
7
10
|
import path from "node:path";
|
|
8
|
-
import { cancel, isCancel, multiselect, select, text } from "@clack/prompts";
|
|
9
11
|
import { OAuth2Client } from "google-auth-library";
|
|
10
12
|
import os from "node:os";
|
|
11
13
|
import { consola } from "consola";
|
|
12
|
-
import {
|
|
13
|
-
import {
|
|
14
|
+
import { createNodeHarness } from "@gscdump/engine/node-harness";
|
|
15
|
+
import { TABLE_DIMS, transformGscRow } from "@gscdump/engine/ingest";
|
|
16
|
+
import { allTables, inferTable } from "@gscdump/engine/schema";
|
|
17
|
+
import { Buffer } from "node:buffer";
|
|
18
|
+
import { createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore } from "@gscdump/engine/entities";
|
|
19
|
+
import { createGscMcpServer } from "@gscdump/mcp/server";
|
|
14
20
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
15
|
-
import {
|
|
16
|
-
import {
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
var
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
get: all[name],
|
|
25
|
-
enumerable: true
|
|
26
|
-
});
|
|
21
|
+
import { SearchTypes, between, country, date, device, gsc, page, query, searchAppearance } from "gscdump/query";
|
|
22
|
+
import { DuckDBInstance } from "@duckdb/node-api";
|
|
23
|
+
import { sqlEscape } from "@gscdump/engine/sql";
|
|
24
|
+
import { DEFAULT_ROLLUPS, rebuildRollups } from "@gscdump/engine/rollups";
|
|
25
|
+
import { filesystemStats } from "@gscdump/engine/filesystem";
|
|
26
|
+
var LocalStoreUnsupportedError = class extends Error {
|
|
27
|
+
constructor(tool) {
|
|
28
|
+
super(`analysis "${tool}" is not yet implemented against the local Parquet store`);
|
|
29
|
+
this.name = "LocalStoreUnsupportedError";
|
|
27
30
|
}
|
|
28
|
-
|
|
29
|
-
|
|
31
|
+
};
|
|
32
|
+
var LocalStoreEmptyError = class extends Error {
|
|
33
|
+
constructor(siteUrl) {
|
|
34
|
+
super(`no local data synced for ${siteUrl} (run \`gscdump sync\` first)`);
|
|
35
|
+
this.name = "LocalStoreEmptyError";
|
|
30
36
|
}
|
|
31
|
-
return target;
|
|
32
37
|
};
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
38
|
+
async function hasLocalData(store, siteUrl) {
|
|
39
|
+
return (await store.engine.listLive({
|
|
40
|
+
userId: store.userId,
|
|
41
|
+
siteId: store.siteIdFor(siteUrl)
|
|
42
|
+
})).length > 0;
|
|
43
|
+
}
|
|
44
|
+
async function runLocalAnalysis(store, siteUrl, params) {
|
|
45
|
+
return analyzeFromSource(createEngineQuerySource({
|
|
46
|
+
engine: store.engine,
|
|
47
|
+
ctx: {
|
|
48
|
+
userId: store.userId,
|
|
49
|
+
siteId: store.siteIdFor(siteUrl)
|
|
50
|
+
}
|
|
51
|
+
}), params, defaultAnalyzerRegistry).catch((e) => {
|
|
52
|
+
if (e instanceof AnalyzerCapabilityError) throw new LocalStoreUnsupportedError(params.type);
|
|
53
|
+
throw e;
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
async function runLiveAnalysis(client, siteUrl, params) {
|
|
57
|
+
return analyzeFromSource(createGscApiQuerySource({
|
|
58
|
+
client,
|
|
59
|
+
siteUrl
|
|
60
|
+
}), params, defaultAnalyzerRegistry).catch((e) => {
|
|
61
|
+
if (e instanceof AnalyzerCapabilityError) throw new LocalStoreUnsupportedError(params.type);
|
|
62
|
+
throw e;
|
|
63
|
+
});
|
|
64
|
+
}
|
|
36
65
|
let configDir = path.join(os.homedir(), ".config", "gscdump");
|
|
37
66
|
function getConfigDir() {
|
|
38
67
|
return configDir;
|
|
39
68
|
}
|
|
40
|
-
|
|
69
|
+
function defaultDataDir() {
|
|
70
|
+
return path.join(os.homedir(), ".gscdump", "data");
|
|
71
|
+
}
|
|
72
|
+
function resolveDataDir(config) {
|
|
73
|
+
return expandTilde(config.dataDir ?? defaultDataDir());
|
|
74
|
+
}
|
|
75
|
+
function expandTilde(p) {
|
|
76
|
+
if (p === "~") return os.homedir();
|
|
77
|
+
if (p.startsWith("~/")) return path.join(os.homedir(), p.slice(2));
|
|
78
|
+
return p;
|
|
79
|
+
}
|
|
41
80
|
async function loadConfig() {
|
|
42
81
|
return fs.readFile(path.join(configDir, "config.json"), "utf-8").then((data) => JSON.parse(data)).catch(() => ({}));
|
|
43
82
|
}
|
|
@@ -51,28 +90,14 @@ async function saveConfig(config) {
|
|
|
51
90
|
function getConfigPath() {
|
|
52
91
|
return path.join(configDir, "config.json");
|
|
53
92
|
}
|
|
54
|
-
|
|
55
|
-
//#endregion
|
|
56
|
-
//#region src/utils.ts
|
|
57
93
|
const VERSION = "1.0.0";
|
|
58
94
|
const logger = consola.withTag("gscdump");
|
|
59
|
-
|
|
60
|
-
* Handles GSC API errors with helpful messages and suggestions.
|
|
61
|
-
* Exits process with code 1.
|
|
62
|
-
*/
|
|
63
|
-
function handleGscError(error) {
|
|
95
|
+
function gscErrorHandler(error) {
|
|
64
96
|
console.error();
|
|
65
97
|
console.error(formatErrorForCli(error));
|
|
66
98
|
console.error();
|
|
67
99
|
process.exit(1);
|
|
68
100
|
}
|
|
69
|
-
/**
|
|
70
|
-
* Creates a .catch() handler for GSC API errors.
|
|
71
|
-
* Use: somePromise.catch(gscErrorHandler)
|
|
72
|
-
*/
|
|
73
|
-
function gscErrorHandler(error) {
|
|
74
|
-
return handleGscError(error);
|
|
75
|
-
}
|
|
76
101
|
const gradientColors = [
|
|
77
102
|
(s) => `\x1B[38;2;52;211;153m${s}\x1B[0m`,
|
|
78
103
|
(s) => `\x1B[38;2;45;212;191m${s}\x1B[0m`,
|
|
@@ -80,9 +105,9 @@ const gradientColors = [
|
|
|
80
105
|
(s) => `\x1B[38;2;56;189;248m${s}\x1B[0m`,
|
|
81
106
|
(s) => `\x1B[38;2;96;165;250m${s}\x1B[0m`
|
|
82
107
|
];
|
|
83
|
-
function applyGradient(text
|
|
84
|
-
return [...text
|
|
85
|
-
const colorIndex = Math.floor(i / text
|
|
108
|
+
function applyGradient(text) {
|
|
109
|
+
return [...text].map((char, i) => {
|
|
110
|
+
const colorIndex = Math.floor(i / text.length * gradientColors.length);
|
|
86
111
|
return gradientColors[Math.min(colorIndex, gradientColors.length - 1)](char);
|
|
87
112
|
}).join("");
|
|
88
113
|
}
|
|
@@ -91,15 +116,27 @@ function showSplash() {
|
|
|
91
116
|
console.log(` ${applyGradient("GSC Dump")} v${VERSION}`);
|
|
92
117
|
console.log();
|
|
93
118
|
}
|
|
94
|
-
function progressBar(current, total, label, width = 30) {
|
|
95
|
-
const percent = Math.min(current / total, 1);
|
|
96
|
-
const filled = Math.round(width * percent);
|
|
97
|
-
const empty = width - filled;
|
|
98
|
-
return ` ${`\x1B[36m${"█".repeat(filled)}\x1B[0m\x1B[90m${"░".repeat(empty)}\x1B[0m`} \x1B[90m${current}/${total}\x1B[0m ${label}`;
|
|
99
|
-
}
|
|
100
119
|
function clearLine() {
|
|
101
120
|
process.stdout.write("\r\x1B[K");
|
|
102
121
|
}
|
|
122
|
+
function formatAge(ms) {
|
|
123
|
+
const delta = Date.now() - ms;
|
|
124
|
+
if (delta < 6e4) return "just now";
|
|
125
|
+
if (delta < 36e5) return `${Math.floor(delta / 6e4)}m ago`;
|
|
126
|
+
if (delta < 864e5) return `${Math.floor(delta / 36e5)}h ago`;
|
|
127
|
+
return `${Math.floor(delta / 864e5)}d ago`;
|
|
128
|
+
}
|
|
129
|
+
async function runWithConcurrency(items, concurrency, processor) {
|
|
130
|
+
const cursor = { i: 0 };
|
|
131
|
+
async function worker() {
|
|
132
|
+
while (true) {
|
|
133
|
+
const i = cursor.i++;
|
|
134
|
+
if (i >= items.length) return;
|
|
135
|
+
await processor(items[i], i);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
await Promise.all(Array.from({ length: Math.min(concurrency, items.length) }, worker));
|
|
139
|
+
}
|
|
103
140
|
function toCSV(data, columns) {
|
|
104
141
|
return [columns.join(","), ...data.map((row) => columns.map((col) => {
|
|
105
142
|
const val = row[col];
|
|
@@ -140,21 +177,7 @@ function exportToCSV(output) {
|
|
|
140
177
|
])}`);
|
|
141
178
|
return sections.join("\n\n");
|
|
142
179
|
}
|
|
143
|
-
|
|
144
|
-
//#endregion
|
|
145
|
-
//#region src/auth.ts
|
|
146
|
-
var auth_exports = /* @__PURE__ */ __exportAll({
|
|
147
|
-
authenticate: () => authenticate,
|
|
148
|
-
authenticateCloud: () => authenticateCloud,
|
|
149
|
-
clearCloudTokens: () => clearCloudTokens,
|
|
150
|
-
clearTokens: () => clearTokens,
|
|
151
|
-
getAuth: () => getAuth,
|
|
152
|
-
getAuthCredentials: () => getAuthCredentials,
|
|
153
|
-
loadCloudTokens: () => loadCloudTokens,
|
|
154
|
-
loadTokens: () => loadTokens,
|
|
155
|
-
saveCloudTokens: () => saveCloudTokens,
|
|
156
|
-
saveTokens: () => saveTokens
|
|
157
|
-
});
|
|
180
|
+
const REDIRECT_URI_RE = /redirect_uri=[^&]+/;
|
|
158
181
|
function getTokensPath() {
|
|
159
182
|
return path.join(getConfigDir(), "tokens.json");
|
|
160
183
|
}
|
|
@@ -253,7 +276,7 @@ async function getAuthCodeViaLoopback(authUrl) {
|
|
|
253
276
|
return;
|
|
254
277
|
}
|
|
255
278
|
resolvedRedirectUri = `http://127.0.0.1:${addr.port}`;
|
|
256
|
-
const fullAuthUrl = authUrl.replace(
|
|
279
|
+
const fullAuthUrl = authUrl.replace(REDIRECT_URI_RE, `redirect_uri=${encodeURIComponent(resolvedRedirectUri)}`);
|
|
257
280
|
console.log();
|
|
258
281
|
console.log(" \x1B[1mOpening browser for authorization...\x1B[0m");
|
|
259
282
|
console.log(` \x1B[90mIf browser doesn't open, visit:\x1B[0m`);
|
|
@@ -318,156 +341,400 @@ async function authenticate(credentials, interactive) {
|
|
|
318
341
|
logger.success(`Tokens saved to ${getTokensPath()}`);
|
|
319
342
|
return oauth2Client;
|
|
320
343
|
}
|
|
321
|
-
function
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
async function loadCloudTokens() {
|
|
325
|
-
return fs.readFile(getCloudTokensPath(), "utf-8").then((data) => JSON.parse(data)).catch(() => null);
|
|
326
|
-
}
|
|
327
|
-
async function saveCloudTokens(tokens) {
|
|
328
|
-
await fs.mkdir(getConfigDir(), {
|
|
329
|
-
recursive: true,
|
|
330
|
-
mode: 448
|
|
331
|
-
});
|
|
332
|
-
await fs.writeFile(getCloudTokensPath(), JSON.stringify(tokens, null, 2), { mode: 384 });
|
|
344
|
+
async function getAuth(opts = {}) {
|
|
345
|
+
const { interactive = true } = opts;
|
|
346
|
+
return authenticate(await getAuthCredentials(interactive), interactive);
|
|
333
347
|
}
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
logger.success("Logged out from cloud");
|
|
348
|
+
function createLocalStore(opts) {
|
|
349
|
+
return createNodeHarness(opts);
|
|
337
350
|
}
|
|
338
|
-
async function
|
|
339
|
-
const
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
logger.error("No cloud tokens. Run gscdump init to authenticate.");
|
|
352
|
-
process.exit(1);
|
|
353
|
-
}
|
|
354
|
-
const initRes = await fetch(`${cloudUrl}/api/cli/auth/init`, { method: "POST" }).then((r) => r.json()).catch((e) => {
|
|
355
|
-
logger.error(`Failed to connect to ${cloudUrl}: ${e.message}`);
|
|
356
|
-
process.exit(1);
|
|
357
|
-
});
|
|
358
|
-
console.log();
|
|
359
|
-
console.log(" \x1B[1mOpen this URL in your browser:\x1B[0m");
|
|
360
|
-
console.log(` \x1B[36m${initRes.authUrl}\x1B[0m`);
|
|
361
|
-
console.log();
|
|
362
|
-
console.log(` \x1B[90mCode: ${initRes.code}\x1B[0m`);
|
|
363
|
-
console.log();
|
|
364
|
-
logger.info("Waiting for authorization...");
|
|
365
|
-
const pollInterval = 2e3;
|
|
366
|
-
const maxAttempts = Math.ceil(initRes.expiresIn * 1e3 / pollInterval);
|
|
367
|
-
for (let i = 0; i < maxAttempts; i++) {
|
|
368
|
-
await new Promise((r) => setTimeout(r, pollInterval));
|
|
369
|
-
const pollRes = await fetch(`${cloudUrl}/api/cli/auth/poll?code=${initRes.code}`).then((r) => r.json()).catch(() => ({ status: "error" }));
|
|
370
|
-
if (pollRes.status === "complete" && pollRes.tokens) {
|
|
371
|
-
await saveCloudTokens(pollRes.tokens);
|
|
372
|
-
logger.success("Authenticated via cloud.gscdump.com");
|
|
373
|
-
const oauth2Client = new OAuth2Client();
|
|
374
|
-
oauth2Client.setCredentials({
|
|
375
|
-
access_token: pollRes.tokens.accessToken,
|
|
376
|
-
refresh_token: pollRes.tokens.refreshToken,
|
|
377
|
-
expiry_date: pollRes.tokens.expiresAt
|
|
378
|
-
});
|
|
379
|
-
return oauth2Client;
|
|
380
|
-
}
|
|
381
|
-
if (pollRes.status === "error") {
|
|
382
|
-
logger.error("Authorization failed");
|
|
351
|
+
async function createCommandContext(opts = {}) {
|
|
352
|
+
const { needsAuth = false, needsStore = false, interactive = false } = opts;
|
|
353
|
+
const config = await loadConfig();
|
|
354
|
+
const auth = needsAuth ? await getAuth({
|
|
355
|
+
interactive,
|
|
356
|
+
config
|
|
357
|
+
}) : null;
|
|
358
|
+
const client = auth ? googleSearchConsole(auth) : null;
|
|
359
|
+
const store = needsStore ? createLocalStore({ dataDir: resolveDataDir(config) }) : null;
|
|
360
|
+
const loadSites = async () => {
|
|
361
|
+
if (!client) throw new Error("loadSites requires needsAuth: true");
|
|
362
|
+
return (await client.sites().catch((e) => {
|
|
363
|
+
logger.error(`Failed to fetch sites: ${e.message}`);
|
|
383
364
|
process.exit(1);
|
|
384
|
-
}
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
}
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
logger.error("Not configured. Run gscdump init first.");
|
|
365
|
+
})).filter((s) => s.siteUrl && s.permissionLevel !== "siteUnverifiedUser").map((s) => ({
|
|
366
|
+
siteUrl: s.siteUrl,
|
|
367
|
+
permissionLevel: s.permissionLevel || "unknown"
|
|
368
|
+
}));
|
|
369
|
+
};
|
|
370
|
+
const resolveSite = async (target) => {
|
|
371
|
+
const hint = target ?? config.defaultSite;
|
|
372
|
+
const sites = await loadSites();
|
|
373
|
+
if (sites.length === 0) {
|
|
374
|
+
logger.error("No verified sites found");
|
|
395
375
|
process.exit(1);
|
|
396
376
|
}
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
377
|
+
if (hint) {
|
|
378
|
+
const match = sites.find((s) => s.siteUrl === hint || s.siteUrl.includes(hint));
|
|
379
|
+
if (match) return match.siteUrl;
|
|
380
|
+
}
|
|
381
|
+
if (sites.length === 1) return sites[0].siteUrl;
|
|
382
|
+
const selected = await select({
|
|
383
|
+
message: "Select a site",
|
|
384
|
+
options: sites.map((s) => ({
|
|
385
|
+
value: s.siteUrl,
|
|
386
|
+
label: s.siteUrl
|
|
387
|
+
}))
|
|
388
|
+
});
|
|
389
|
+
if (isCancel(selected)) {
|
|
390
|
+
cancel("Cancelled");
|
|
391
|
+
process.exit(0);
|
|
392
|
+
}
|
|
393
|
+
return selected;
|
|
394
|
+
};
|
|
395
|
+
return {
|
|
396
|
+
config,
|
|
397
|
+
auth,
|
|
398
|
+
client,
|
|
399
|
+
store,
|
|
400
|
+
loadSites,
|
|
401
|
+
resolveSite
|
|
402
|
+
};
|
|
403
403
|
}
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
404
|
+
const ANALYSIS_TOOLS = defaultAnalyzerRegistry.listAnalyzerIds();
|
|
405
|
+
const TOOL_EXTRA_ARGS = {
|
|
406
|
+
brand: { "brand-terms": {
|
|
407
|
+
type: "string",
|
|
408
|
+
description: "Comma-separated brand terms (required)"
|
|
409
|
+
} },
|
|
410
|
+
movers: {
|
|
411
|
+
"prev-start": {
|
|
412
|
+
type: "string",
|
|
413
|
+
description: "Previous period start date (required)"
|
|
414
|
+
},
|
|
415
|
+
"prev-end": {
|
|
416
|
+
type: "string",
|
|
417
|
+
description: "Previous period end date (required)"
|
|
418
|
+
}
|
|
411
419
|
},
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
420
|
+
decay: {
|
|
421
|
+
"prev-start": {
|
|
422
|
+
type: "string",
|
|
423
|
+
description: "Previous period start date (required)"
|
|
424
|
+
},
|
|
425
|
+
"prev-end": {
|
|
426
|
+
type: "string",
|
|
427
|
+
description: "Previous period end date (required)"
|
|
428
|
+
}
|
|
429
|
+
},
|
|
430
|
+
concentration: { dimension: {
|
|
431
|
+
type: "string",
|
|
432
|
+
description: "Dimension: pages or keywords (default: pages)"
|
|
433
|
+
} },
|
|
434
|
+
seasonality: { metric: {
|
|
435
|
+
type: "string",
|
|
436
|
+
description: "Metric: clicks or impressions (default: clicks)"
|
|
437
|
+
} },
|
|
438
|
+
clustering: { "cluster-by": {
|
|
439
|
+
type: "string",
|
|
440
|
+
description: "Cluster by: prefix, intent, or both (default: both)"
|
|
441
|
+
} },
|
|
442
|
+
trends: {
|
|
443
|
+
"dimension": {
|
|
444
|
+
type: "string",
|
|
445
|
+
description: "Dimension: pages or keywords (default: pages)"
|
|
446
|
+
},
|
|
447
|
+
"weeks": {
|
|
448
|
+
type: "string",
|
|
449
|
+
description: "Rolling window size in weeks (default: 28)"
|
|
450
|
+
},
|
|
451
|
+
"min-weeks": {
|
|
452
|
+
type: "string",
|
|
453
|
+
description: "Minimum weeks with data to include an entity (default: weeks/4)"
|
|
419
454
|
}
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
455
|
+
}
|
|
456
|
+
};
|
|
457
|
+
function buildParams(tool, args) {
|
|
458
|
+
const params = {
|
|
459
|
+
type: tool,
|
|
460
|
+
startDate: args.start ? String(args.start) : void 0,
|
|
461
|
+
endDate: args.end ? String(args.end) : void 0,
|
|
462
|
+
limit: args.limit ? Number(args.limit) : void 0
|
|
463
|
+
};
|
|
464
|
+
if (args["brand-terms"]) params.brandTerms = String(args["brand-terms"]).split(",").map((t) => t.trim()).filter(Boolean);
|
|
465
|
+
if (args["prev-start"]) params.prevStartDate = String(args["prev-start"]);
|
|
466
|
+
if (args["prev-end"]) params.prevEndDate = String(args["prev-end"]);
|
|
467
|
+
if (args.dimension) params.dimension = String(args.dimension);
|
|
468
|
+
if (args.metric) params.metric = String(args.metric);
|
|
469
|
+
if (args["cluster-by"]) params.clusterBy = String(args["cluster-by"]);
|
|
470
|
+
if (args.weeks) params.weeks = Number(args.weeks);
|
|
471
|
+
if (args["min-weeks"]) params.minWeeksWithData = Number(args["min-weeks"]);
|
|
472
|
+
return params;
|
|
473
|
+
}
|
|
474
|
+
function makeToolCommand(tool) {
|
|
475
|
+
const extraArgs = TOOL_EXTRA_ARGS[tool] || {};
|
|
476
|
+
return defineCommand({
|
|
477
|
+
meta: {
|
|
478
|
+
name: tool,
|
|
479
|
+
description: `Run ${tool} analysis`
|
|
480
|
+
},
|
|
481
|
+
args: {
|
|
482
|
+
site: {
|
|
483
|
+
type: "string",
|
|
484
|
+
alias: "s",
|
|
485
|
+
description: "Site URL"
|
|
486
|
+
},
|
|
487
|
+
start: {
|
|
488
|
+
type: "string",
|
|
489
|
+
description: "Start date (YYYY-MM-DD)"
|
|
490
|
+
},
|
|
491
|
+
end: {
|
|
492
|
+
type: "string",
|
|
493
|
+
description: "End date (YYYY-MM-DD)"
|
|
494
|
+
},
|
|
495
|
+
limit: {
|
|
496
|
+
type: "string",
|
|
497
|
+
alias: "l",
|
|
498
|
+
default: "100",
|
|
499
|
+
description: "Max results"
|
|
500
|
+
},
|
|
501
|
+
format: {
|
|
502
|
+
type: "string",
|
|
503
|
+
alias: "f",
|
|
504
|
+
default: "table",
|
|
505
|
+
description: "Output: table, json, csv"
|
|
506
|
+
},
|
|
507
|
+
json: {
|
|
508
|
+
type: "boolean",
|
|
509
|
+
default: false,
|
|
510
|
+
description: "Output as JSON"
|
|
511
|
+
},
|
|
512
|
+
live: {
|
|
513
|
+
type: "boolean",
|
|
514
|
+
default: false,
|
|
515
|
+
description: "Force live GSC API; bypass local Parquet store"
|
|
516
|
+
},
|
|
517
|
+
...extraArgs
|
|
518
|
+
},
|
|
519
|
+
async run({ args }) {
|
|
520
|
+
const ctx = await createCommandContext({
|
|
521
|
+
needsAuth: true,
|
|
522
|
+
needsStore: !args.live
|
|
523
|
+
});
|
|
524
|
+
const siteUrl = await ctx.resolveSite(args.site);
|
|
525
|
+
logger.info(`Running ${tool} analysis...`);
|
|
526
|
+
const params = buildParams(tool, args);
|
|
527
|
+
const format = args.json ? "json" : String(args.format);
|
|
528
|
+
if (!args.live) {
|
|
529
|
+
const store = ctx.store;
|
|
530
|
+
if (!await hasLocalData(store, siteUrl).catch(() => false)) {
|
|
531
|
+
logger.error(`No local data for ${siteUrl}. Run \`gscdump sync\` first, or pass --live.`);
|
|
532
|
+
process.exit(1);
|
|
533
|
+
}
|
|
534
|
+
const localResult = await runLocalAnalysis(store, siteUrl, params).catch((e) => {
|
|
535
|
+
if (e instanceof LocalStoreUnsupportedError) {
|
|
536
|
+
logger.error(`${e.message}. Pass --live to run against the GSC API.`);
|
|
537
|
+
process.exit(1);
|
|
538
|
+
}
|
|
539
|
+
if (e instanceof LocalStoreEmptyError) {
|
|
540
|
+
logger.error(`${e.message}`);
|
|
541
|
+
process.exit(1);
|
|
542
|
+
}
|
|
543
|
+
logger.error(`Local analysis failed: ${e.message}`);
|
|
544
|
+
process.exit(1);
|
|
545
|
+
});
|
|
546
|
+
if (format === "json") {
|
|
547
|
+
console.log(JSON.stringify(localResult, null, 2));
|
|
548
|
+
return;
|
|
549
|
+
}
|
|
550
|
+
renderResults(localResult.results, localResult.results.length, format);
|
|
426
551
|
return;
|
|
427
552
|
}
|
|
428
|
-
const
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
console.log(` Access token: ${hasAccess ? "\x1B[32mpresent\x1B[0m" : "\x1B[31mmissing\x1B[0m"}`);
|
|
435
|
-
console.log(` Refresh token: ${hasRefresh ? "\x1B[32mpresent\x1B[0m" : "\x1B[31mmissing\x1B[0m"}`);
|
|
436
|
-
if (expiry) {
|
|
437
|
-
const status = isExpired ? "\x1B[33mexpired\x1B[0m" : "\x1B[32mvalid\x1B[0m";
|
|
438
|
-
console.log(` Expires: ${expiry.toISOString()} (${status})`);
|
|
439
|
-
}
|
|
440
|
-
} else {
|
|
441
|
-
const tokens = await loadTokens();
|
|
442
|
-
if (!tokens) {
|
|
443
|
-
logger.warn("Not authenticated");
|
|
444
|
-
logger.info("Run gscdump init --force to re-authenticate");
|
|
553
|
+
const result = await runLiveAnalysis(ctx.client, siteUrl, params).catch((e) => {
|
|
554
|
+
logger.error(`Analysis failed: ${e.message}`);
|
|
555
|
+
process.exit(1);
|
|
556
|
+
});
|
|
557
|
+
if (format === "json") {
|
|
558
|
+
console.log(JSON.stringify(result, null, 2));
|
|
445
559
|
return;
|
|
446
560
|
}
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
561
|
+
renderResults(result.results, result.results.length, format);
|
|
562
|
+
}
|
|
563
|
+
});
|
|
564
|
+
}
|
|
565
|
+
const SPARK_CHARS = [
|
|
566
|
+
"▁",
|
|
567
|
+
"▂",
|
|
568
|
+
"▃",
|
|
569
|
+
"▄",
|
|
570
|
+
"▅",
|
|
571
|
+
"▆",
|
|
572
|
+
"▇",
|
|
573
|
+
"█"
|
|
574
|
+
];
|
|
575
|
+
const SPARK_GAP = "·";
|
|
576
|
+
const PERCENT_COLS = {
|
|
577
|
+
growthRatio: "ratio_to_pct",
|
|
578
|
+
brandShare: "direct",
|
|
579
|
+
topNConcentration: "direct",
|
|
580
|
+
declinePercent: "direct",
|
|
581
|
+
ctr: "direct",
|
|
582
|
+
share: "direct",
|
|
583
|
+
vsAverage: "ratio_to_pct",
|
|
584
|
+
clicksChangePercent: "scaled",
|
|
585
|
+
impressionsChangePercent: "scaled"
|
|
586
|
+
};
|
|
587
|
+
function formatPct(val, style) {
|
|
588
|
+
const pct = style === "ratio_to_pct" ? (val - 1) * 100 : style === "direct" ? val * 100 : val;
|
|
589
|
+
if (!Number.isFinite(pct)) return "";
|
|
590
|
+
return `${pct > 0 ? "+" : ""}${pct.toFixed(0)}%`;
|
|
591
|
+
}
|
|
592
|
+
function isTimeSeries(arr) {
|
|
593
|
+
if (arr.length === 0) return false;
|
|
594
|
+
const first = arr[0];
|
|
595
|
+
if (typeof first !== "object" || first === null) return false;
|
|
596
|
+
const keys = Object.keys(first);
|
|
597
|
+
const hasBucket = keys.includes("week") || keys.includes("date") || keys.includes("month");
|
|
598
|
+
const hasMetric = keys.includes("clicks") || keys.includes("impressions") || keys.includes("value");
|
|
599
|
+
return hasBucket && hasMetric;
|
|
600
|
+
}
|
|
601
|
+
function pickBucketKey(first) {
|
|
602
|
+
if ("week" in first) return "week";
|
|
603
|
+
if ("date" in first) return "date";
|
|
604
|
+
return "month";
|
|
605
|
+
}
|
|
606
|
+
function pickMetricKey(first) {
|
|
607
|
+
if ("clicks" in first) return "clicks";
|
|
608
|
+
if ("impressions" in first) return "impressions";
|
|
609
|
+
return "value";
|
|
610
|
+
}
|
|
611
|
+
function computeAlignedSparklines(results, col) {
|
|
612
|
+
const allBuckets = /* @__PURE__ */ new Set();
|
|
613
|
+
const perRow = [];
|
|
614
|
+
let bucketKey = "week";
|
|
615
|
+
let metricKey = "clicks";
|
|
616
|
+
for (const r of results) {
|
|
617
|
+
const arr = r[col];
|
|
618
|
+
if (!Array.isArray(arr) || !isTimeSeries(arr)) {
|
|
619
|
+
perRow.push(null);
|
|
620
|
+
continue;
|
|
621
|
+
}
|
|
622
|
+
const first = arr[0];
|
|
623
|
+
bucketKey = pickBucketKey(first);
|
|
624
|
+
metricKey = pickMetricKey(first);
|
|
625
|
+
const m = /* @__PURE__ */ new Map();
|
|
626
|
+
for (const item of arr) {
|
|
627
|
+
const rec = item;
|
|
628
|
+
const key = String(rec[bucketKey]);
|
|
629
|
+
const val = Number(rec[metricKey] ?? 0);
|
|
630
|
+
allBuckets.add(key);
|
|
631
|
+
m.set(key, val);
|
|
459
632
|
}
|
|
633
|
+
perRow.push(m);
|
|
460
634
|
}
|
|
461
|
-
|
|
462
|
-
|
|
635
|
+
const sorted = [...allBuckets].sort();
|
|
636
|
+
return perRow.map((m) => {
|
|
637
|
+
if (!m) return "";
|
|
638
|
+
const values = sorted.map((b) => m.has(b) ? m.get(b) : null);
|
|
639
|
+
const nonNull = values.filter((v) => v != null);
|
|
640
|
+
if (nonNull.length === 0) return SPARK_GAP.repeat(values.length);
|
|
641
|
+
const min = Math.min(...nonNull);
|
|
642
|
+
const range = Math.max(...nonNull) - min;
|
|
643
|
+
return values.map((v) => {
|
|
644
|
+
if (v == null) return SPARK_GAP;
|
|
645
|
+
if (range === 0) return SPARK_CHARS[0];
|
|
646
|
+
return SPARK_CHARS[Math.round((v - min) / range * (SPARK_CHARS.length - 1))];
|
|
647
|
+
}).join("");
|
|
648
|
+
});
|
|
649
|
+
}
|
|
650
|
+
function classifyCol(col, values) {
|
|
651
|
+
const firstNonNull = values.find((v) => v != null);
|
|
652
|
+
if (firstNonNull == null) return "text";
|
|
653
|
+
if (Array.isArray(firstNonNull) && isTimeSeries(firstNonNull)) return "series";
|
|
654
|
+
if (col in PERCENT_COLS && values.every((v) => v == null || typeof v === "number")) return "pct";
|
|
655
|
+
if (values.every((v) => v == null || typeof v === "number")) return values.some((v) => typeof v === "number" && !Number.isInteger(v)) ? "float" : "int";
|
|
656
|
+
return "text";
|
|
657
|
+
}
|
|
658
|
+
function formatCellKinded(val, col, kind) {
|
|
659
|
+
if (val == null) return "";
|
|
660
|
+
if (kind === "int") return typeof val === "number" ? String(val) : String(val);
|
|
661
|
+
if (kind === "float") return typeof val === "number" ? val.toFixed(2) : String(val);
|
|
662
|
+
if (kind === "pct") return typeof val === "number" ? formatPct(val, PERCENT_COLS[col]) : String(val);
|
|
663
|
+
if (Array.isArray(val)) return `[${val.length} item${val.length === 1 ? "" : "s"}]`;
|
|
664
|
+
if (typeof val === "object") return JSON.stringify(val);
|
|
665
|
+
return String(val);
|
|
666
|
+
}
|
|
667
|
+
function computeRowSeriesSparkline(results) {
|
|
668
|
+
if (results.length < 2) return null;
|
|
669
|
+
const first = results[0];
|
|
670
|
+
const bucketKey = "week" in first ? "week" : "date" in first ? "date" : "month" in first ? "month" : null;
|
|
671
|
+
if (!bucketKey) return null;
|
|
672
|
+
const metricKey = "value" in first ? "value" : "clicks" in first ? "clicks" : "impressions" in first ? "impressions" : null;
|
|
673
|
+
if (!metricKey) return null;
|
|
674
|
+
for (const r of results) if (!(bucketKey in r) || !(metricKey in r)) return null;
|
|
675
|
+
const values = [...results].sort((a, b) => String(a[bucketKey]).localeCompare(String(b[bucketKey]))).map((r) => Number(r[metricKey] ?? 0));
|
|
676
|
+
const nonNull = values.filter((v) => Number.isFinite(v));
|
|
677
|
+
if (nonNull.length === 0) return null;
|
|
678
|
+
const min = Math.min(...nonNull);
|
|
679
|
+
const range = Math.max(...nonNull) - min;
|
|
680
|
+
return {
|
|
681
|
+
spark: values.map((v) => {
|
|
682
|
+
if (range === 0) return SPARK_CHARS[0];
|
|
683
|
+
return SPARK_CHARS[Math.round((v - min) / range * (SPARK_CHARS.length - 1))];
|
|
684
|
+
}).join(""),
|
|
685
|
+
label: `${results.length} ${bucketKey}${results.length === 1 ? "" : "s"} of ${metricKey}`
|
|
686
|
+
};
|
|
687
|
+
}
|
|
688
|
+
function renderResults(results, total, format) {
|
|
689
|
+
if (format === "csv" && results.length > 0) {
|
|
690
|
+
const cols = Object.keys(results[0]);
|
|
691
|
+
console.log(toCSV(results, cols));
|
|
692
|
+
return;
|
|
693
|
+
}
|
|
694
|
+
if (results.length === 0) {
|
|
695
|
+
logger.warn("No results found");
|
|
696
|
+
return;
|
|
697
|
+
}
|
|
698
|
+
const cols = Object.keys(results[0]);
|
|
699
|
+
const kinds = cols.map((c) => classifyCol(c, results.map((r) => r[c])));
|
|
700
|
+
const sparklineByCol = {};
|
|
701
|
+
cols.forEach((c, i) => {
|
|
702
|
+
if (kinds[i] === "series") sparklineByCol[c] = computeAlignedSparklines(results, c);
|
|
703
|
+
});
|
|
704
|
+
const cellText = (row, rowIdx, colIdx) => {
|
|
705
|
+
const c = cols[colIdx];
|
|
706
|
+
const k = kinds[colIdx];
|
|
707
|
+
if (k === "series") return sparklineByCol[c][rowIdx];
|
|
708
|
+
return formatCellKinded(row[c], c, k);
|
|
709
|
+
};
|
|
710
|
+
const widths = cols.map((c, i) => {
|
|
711
|
+
let w = c.length;
|
|
712
|
+
const limit = Math.min(results.length, 20);
|
|
713
|
+
for (let j = 0; j < limit; j++) {
|
|
714
|
+
const len = cellText(results[j], j, i).length;
|
|
715
|
+
if (len > w) w = len;
|
|
716
|
+
}
|
|
717
|
+
return w;
|
|
718
|
+
});
|
|
719
|
+
console.log();
|
|
720
|
+
console.log(` ${cols.map((c, i) => c.padEnd(widths[i])).join(" ")}`);
|
|
721
|
+
console.log(` ${cols.map((_, i) => "─".repeat(widths[i])).join(" ")}`);
|
|
722
|
+
for (let r = 0; r < results.length; r++) console.log(` ${cols.map((_, i) => cellText(results[r], r, i).padEnd(widths[i])).join(" ")}`);
|
|
723
|
+
const rowSeriesSparkline = computeRowSeriesSparkline(results);
|
|
724
|
+
if (rowSeriesSparkline) {
|
|
725
|
+
console.log();
|
|
726
|
+
console.log(` trend: ${rowSeriesSparkline.spark} (${rowSeriesSparkline.label})`);
|
|
727
|
+
}
|
|
728
|
+
console.log();
|
|
729
|
+
logger.success(`${results.length} results`);
|
|
730
|
+
if (total > results.length) logger.info(`Total: ${total} (showing ${results.length})`);
|
|
731
|
+
}
|
|
732
|
+
const analyzeCommand = defineCommand({
|
|
463
733
|
meta: {
|
|
464
|
-
name: "
|
|
465
|
-
description: "
|
|
734
|
+
name: "analyze",
|
|
735
|
+
description: "SEO analysis tools"
|
|
466
736
|
},
|
|
467
|
-
|
|
468
|
-
if ((await loadConfig()).mode === "cloud") await clearCloudTokens();
|
|
469
|
-
else await clearTokens();
|
|
470
|
-
}
|
|
737
|
+
subCommands: Object.fromEntries(ANALYSIS_TOOLS.map((tool) => [tool, makeToolCommand(tool)]))
|
|
471
738
|
});
|
|
472
739
|
const authCommand = defineCommand({
|
|
473
740
|
meta: {
|
|
@@ -475,295 +742,620 @@ const authCommand = defineCommand({
|
|
|
475
742
|
description: "Manage authentication"
|
|
476
743
|
},
|
|
477
744
|
subCommands: {
|
|
478
|
-
status:
|
|
479
|
-
|
|
745
|
+
status: defineCommand({
|
|
746
|
+
meta: {
|
|
747
|
+
name: "status",
|
|
748
|
+
description: "Show current authentication status"
|
|
749
|
+
},
|
|
750
|
+
async run() {
|
|
751
|
+
const tokens = await loadTokens();
|
|
752
|
+
if (!tokens) {
|
|
753
|
+
logger.warn("Not authenticated");
|
|
754
|
+
logger.info("Run gscdump init to authenticate");
|
|
755
|
+
return;
|
|
756
|
+
}
|
|
757
|
+
const hasAccess = !!tokens.access_token;
|
|
758
|
+
const hasRefresh = !!tokens.refresh_token;
|
|
759
|
+
const expiry = tokens.expiry_date ? new Date(tokens.expiry_date) : null;
|
|
760
|
+
const isExpired = expiry && expiry < /* @__PURE__ */ new Date();
|
|
761
|
+
logger.success("Authenticated");
|
|
762
|
+
console.log();
|
|
763
|
+
console.log(` Access token: ${hasAccess ? "\x1B[32mpresent\x1B[0m" : "\x1B[31mmissing\x1B[0m"}`);
|
|
764
|
+
console.log(` Refresh token: ${hasRefresh ? "\x1B[32mpresent\x1B[0m" : "\x1B[31mmissing\x1B[0m"}`);
|
|
765
|
+
if (expiry) {
|
|
766
|
+
const status = isExpired ? "\x1B[33mexpired\x1B[0m" : "\x1B[32mvalid\x1B[0m";
|
|
767
|
+
console.log(` Expires: ${expiry.toISOString()} (${status})`);
|
|
768
|
+
}
|
|
769
|
+
}
|
|
770
|
+
}),
|
|
771
|
+
logout: defineCommand({
|
|
772
|
+
meta: {
|
|
773
|
+
name: "logout",
|
|
774
|
+
description: "Clear stored OAuth tokens"
|
|
775
|
+
},
|
|
776
|
+
async run() {
|
|
777
|
+
await clearTokens();
|
|
778
|
+
}
|
|
779
|
+
})
|
|
480
780
|
}
|
|
481
781
|
});
|
|
482
|
-
|
|
483
|
-
//#endregion
|
|
484
|
-
//#region src/commands/config.ts
|
|
485
|
-
const showCommand = defineCommand({
|
|
782
|
+
const configCommand = defineCommand({
|
|
486
783
|
meta: {
|
|
487
|
-
name: "
|
|
488
|
-
description: "
|
|
784
|
+
name: "config",
|
|
785
|
+
description: "Manage configuration"
|
|
489
786
|
},
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
787
|
+
subCommands: {
|
|
788
|
+
show: defineCommand({
|
|
789
|
+
meta: {
|
|
790
|
+
name: "show",
|
|
791
|
+
description: "Show current config"
|
|
792
|
+
},
|
|
793
|
+
async run() {
|
|
794
|
+
const config = await loadConfig();
|
|
795
|
+
const configPath = getConfigPath();
|
|
796
|
+
logger.info(`Config: ${configPath}`);
|
|
797
|
+
console.log();
|
|
798
|
+
if (Object.keys(config).length === 0) {
|
|
799
|
+
logger.warn("No config set");
|
|
800
|
+
return;
|
|
801
|
+
}
|
|
802
|
+
console.log(JSON.stringify(config, null, 2));
|
|
803
|
+
}
|
|
804
|
+
}),
|
|
805
|
+
set: defineCommand({
|
|
806
|
+
meta: {
|
|
807
|
+
name: "set",
|
|
808
|
+
description: "Set a config value"
|
|
809
|
+
},
|
|
810
|
+
args: {
|
|
811
|
+
key: {
|
|
812
|
+
type: "positional",
|
|
813
|
+
description: "Config key (defaultSite, defaultPeriod, defaultFormat, defaultDb)",
|
|
814
|
+
required: true
|
|
815
|
+
},
|
|
816
|
+
value: {
|
|
817
|
+
type: "positional",
|
|
818
|
+
description: "Value to set",
|
|
819
|
+
required: true
|
|
820
|
+
}
|
|
821
|
+
},
|
|
822
|
+
async run({ args }) {
|
|
823
|
+
const validKeys = [
|
|
824
|
+
"defaultSite",
|
|
825
|
+
"defaultPeriod",
|
|
826
|
+
"defaultFormat",
|
|
827
|
+
"defaultDb"
|
|
828
|
+
];
|
|
829
|
+
if (!validKeys.includes(args.key)) {
|
|
830
|
+
logger.error(`Invalid key: ${args.key}`);
|
|
831
|
+
logger.info(`Valid keys: ${validKeys.join(", ")}`);
|
|
832
|
+
process.exit(1);
|
|
833
|
+
}
|
|
834
|
+
const config = await loadConfig();
|
|
835
|
+
config[args.key] = args.value;
|
|
836
|
+
await saveConfig(config);
|
|
837
|
+
logger.success(`Set ${args.key} = ${args.value}`);
|
|
838
|
+
}
|
|
839
|
+
}),
|
|
840
|
+
unset: defineCommand({
|
|
841
|
+
meta: {
|
|
842
|
+
name: "unset",
|
|
843
|
+
description: "Remove a config value"
|
|
844
|
+
},
|
|
845
|
+
args: { key: {
|
|
846
|
+
type: "positional",
|
|
847
|
+
description: "Config key to remove",
|
|
848
|
+
required: true
|
|
849
|
+
} },
|
|
850
|
+
async run({ args }) {
|
|
851
|
+
const config = await loadConfig();
|
|
852
|
+
delete config[args.key];
|
|
853
|
+
await saveConfig(config);
|
|
854
|
+
logger.success(`Removed ${args.key}`);
|
|
855
|
+
}
|
|
856
|
+
}),
|
|
857
|
+
path: defineCommand({
|
|
858
|
+
meta: {
|
|
859
|
+
name: "path",
|
|
860
|
+
description: "Show config file path"
|
|
861
|
+
},
|
|
862
|
+
run() {
|
|
863
|
+
console.log(getConfigPath());
|
|
864
|
+
}
|
|
865
|
+
})
|
|
500
866
|
}
|
|
501
867
|
});
|
|
502
|
-
const
|
|
868
|
+
const DEFAULT_OUT = "./gscdump-export";
|
|
869
|
+
const dumpCommand = defineCommand({
|
|
503
870
|
meta: {
|
|
504
|
-
name: "
|
|
505
|
-
description: "
|
|
871
|
+
name: "dump",
|
|
872
|
+
description: "Export live Parquet files from the local store to a directory"
|
|
506
873
|
},
|
|
507
874
|
args: {
|
|
508
|
-
|
|
509
|
-
type: "
|
|
510
|
-
|
|
511
|
-
|
|
875
|
+
site: {
|
|
876
|
+
type: "string",
|
|
877
|
+
alias: "s",
|
|
878
|
+
description: "Site URL (e.g., sc-domain:example.com)"
|
|
512
879
|
},
|
|
513
|
-
|
|
514
|
-
type: "
|
|
515
|
-
|
|
516
|
-
|
|
880
|
+
out: {
|
|
881
|
+
type: "string",
|
|
882
|
+
alias: "o",
|
|
883
|
+
default: DEFAULT_OUT,
|
|
884
|
+
description: `Output directory (default: ${DEFAULT_OUT})`
|
|
885
|
+
},
|
|
886
|
+
compact: {
|
|
887
|
+
type: "boolean",
|
|
888
|
+
default: false,
|
|
889
|
+
description: "Compact every closed month into a single file before exporting"
|
|
890
|
+
},
|
|
891
|
+
quiet: {
|
|
892
|
+
type: "boolean",
|
|
893
|
+
alias: "q",
|
|
894
|
+
default: false,
|
|
895
|
+
description: "Suppress progress output"
|
|
517
896
|
}
|
|
518
897
|
},
|
|
519
898
|
async run({ args }) {
|
|
520
|
-
const
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
899
|
+
const ctx = await createCommandContext({
|
|
900
|
+
needsAuth: true,
|
|
901
|
+
needsStore: true
|
|
902
|
+
});
|
|
903
|
+
const siteUrl = await ctx.resolveSite(args.site ? String(args.site) : void 0);
|
|
904
|
+
const store = ctx.store;
|
|
905
|
+
const outDir = path.resolve(String(args.out));
|
|
906
|
+
if (args.compact) await compactClosedMonths(store, siteUrl, args.quiet);
|
|
907
|
+
const entries = await listLiveEntries(store, siteUrl);
|
|
908
|
+
if (entries.length === 0) {
|
|
909
|
+
logger.warn(`No data for ${siteUrl}. Run \`gscdump sync\` first.`);
|
|
910
|
+
process.exit(0);
|
|
530
911
|
}
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
});
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
},
|
|
542
|
-
args: { key: {
|
|
543
|
-
type: "positional",
|
|
544
|
-
description: "Config key to remove",
|
|
545
|
-
required: true
|
|
546
|
-
} },
|
|
547
|
-
async run({ args }) {
|
|
548
|
-
const config = await loadConfig();
|
|
549
|
-
delete config[args.key];
|
|
550
|
-
await saveConfig(config);
|
|
551
|
-
logger.success(`Removed ${args.key}`);
|
|
552
|
-
}
|
|
553
|
-
});
|
|
554
|
-
const pathCommand = defineCommand({
|
|
555
|
-
meta: {
|
|
556
|
-
name: "path",
|
|
557
|
-
description: "Show config file path"
|
|
558
|
-
},
|
|
559
|
-
run() {
|
|
560
|
-
console.log(getConfigPath());
|
|
561
|
-
}
|
|
562
|
-
});
|
|
563
|
-
const configCommand = defineCommand({
|
|
564
|
-
meta: {
|
|
565
|
-
name: "config",
|
|
566
|
-
description: "Manage configuration"
|
|
567
|
-
},
|
|
568
|
-
subCommands: {
|
|
569
|
-
show: showCommand,
|
|
570
|
-
set: setCommand,
|
|
571
|
-
unset: unsetCommand,
|
|
572
|
-
path: pathCommand
|
|
912
|
+
await fs.mkdir(outDir, { recursive: true });
|
|
913
|
+
let copied = 0;
|
|
914
|
+
for (const entry of entries) {
|
|
915
|
+
const bytes = await store.engine.readObject(entry.objectKey);
|
|
916
|
+
const target = path.join(outDir, entry.objectKey);
|
|
917
|
+
await fs.mkdir(path.dirname(target), { recursive: true });
|
|
918
|
+
await fs.writeFile(target, Buffer.from(bytes));
|
|
919
|
+
copied++;
|
|
920
|
+
}
|
|
921
|
+
if (!args.quiet) logger.success(`Exported ${copied} file(s) to ${outDir}`);
|
|
573
922
|
}
|
|
574
923
|
});
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
924
|
+
async function listLiveEntries(store, siteUrl) {
|
|
925
|
+
const siteId = store.siteIdFor(siteUrl);
|
|
926
|
+
return (await Promise.all(allTables().map((table) => store.engine.listLive({
|
|
927
|
+
userId: store.userId,
|
|
928
|
+
siteId,
|
|
929
|
+
table
|
|
930
|
+
})))).flat();
|
|
931
|
+
}
|
|
932
|
+
async function compactClosedMonths(store, siteUrl, quiet) {
|
|
933
|
+
const siteId = store.siteIdFor(siteUrl);
|
|
934
|
+
for (const table of allTables()) {
|
|
935
|
+
if (!quiet) logger.info(`Compacting ${table} (raw→d7→d30→d90)`);
|
|
936
|
+
await store.engine.compactTiered({
|
|
937
|
+
userId: store.userId,
|
|
938
|
+
siteId,
|
|
939
|
+
table
|
|
940
|
+
});
|
|
590
941
|
}
|
|
591
942
|
}
|
|
592
|
-
const
|
|
943
|
+
const INSPECTION_QPD_PER_PROPERTY = 2e3;
|
|
944
|
+
const INDEXING_NOT_FOUND_RE = /\b404\b|NOT_FOUND/i;
|
|
945
|
+
async function readUrlList(opts) {
|
|
946
|
+
if (opts.file) return (await readFile(opts.file, "utf8")).split("\n").map((l) => l.trim()).filter(Boolean);
|
|
947
|
+
const chunks = [];
|
|
948
|
+
for await (const chunk of process.stdin) chunks.push(chunk);
|
|
949
|
+
return Buffer.concat(chunks).toString("utf8").split("\n").map((l) => l.trim()).filter(Boolean);
|
|
950
|
+
}
|
|
951
|
+
const inspectSubCommand = defineCommand({
|
|
593
952
|
meta: {
|
|
594
|
-
name: "
|
|
595
|
-
description: "
|
|
953
|
+
name: "inspect",
|
|
954
|
+
description: "Run URL Inspection for a list of URLs and persist results to the local entity store"
|
|
596
955
|
},
|
|
597
956
|
args: {
|
|
598
957
|
site: {
|
|
599
958
|
type: "string",
|
|
600
959
|
alias: "s",
|
|
960
|
+
required: true,
|
|
601
961
|
description: "Site URL (e.g., sc-domain:example.com)"
|
|
602
962
|
},
|
|
603
|
-
|
|
604
|
-
type: "string",
|
|
605
|
-
alias: "o",
|
|
606
|
-
description: "Output file path (default: stdout)"
|
|
607
|
-
},
|
|
608
|
-
format: {
|
|
963
|
+
file: {
|
|
609
964
|
type: "string",
|
|
610
965
|
alias: "f",
|
|
611
|
-
|
|
612
|
-
description: "Output format: json or csv"
|
|
613
|
-
},
|
|
614
|
-
start: {
|
|
615
|
-
type: "string",
|
|
616
|
-
description: "Start date (YYYY-MM-DD)"
|
|
617
|
-
},
|
|
618
|
-
end: {
|
|
619
|
-
type: "string",
|
|
620
|
-
description: "End date (YYYY-MM-DD)"
|
|
621
|
-
},
|
|
622
|
-
days: {
|
|
623
|
-
type: "string",
|
|
624
|
-
alias: "d",
|
|
625
|
-
default: "28",
|
|
626
|
-
description: "Number of days to fetch (default: 28)"
|
|
966
|
+
description: "Path to a file with one URL per line. If omitted, reads from stdin."
|
|
627
967
|
},
|
|
628
|
-
|
|
968
|
+
limit: {
|
|
629
969
|
type: "string",
|
|
630
|
-
|
|
631
|
-
description: "Data types: pages,keywords,countries,devices"
|
|
970
|
+
description: `Max URLs to inspect this run (default: ${INSPECTION_QPD_PER_PROPERTY}, the per-property GSC daily quota)`
|
|
632
971
|
},
|
|
633
|
-
|
|
972
|
+
concurrency: {
|
|
634
973
|
type: "string",
|
|
635
|
-
alias: "
|
|
636
|
-
default: "
|
|
637
|
-
description: "
|
|
974
|
+
alias: "c",
|
|
975
|
+
default: "4",
|
|
976
|
+
description: "Concurrent in-flight inspect calls (default: 4)"
|
|
638
977
|
},
|
|
639
978
|
quiet: {
|
|
640
979
|
type: "boolean",
|
|
641
980
|
alias: "q",
|
|
642
981
|
default: false,
|
|
643
982
|
description: "Suppress progress output"
|
|
644
|
-
},
|
|
645
|
-
interactive: {
|
|
646
|
-
type: "boolean",
|
|
647
|
-
alias: "i",
|
|
648
|
-
default: false,
|
|
649
|
-
description: "Interactive mode - prompts for options"
|
|
650
983
|
}
|
|
651
984
|
},
|
|
652
985
|
async run({ args }) {
|
|
653
|
-
const
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
986
|
+
const ctx = await createCommandContext({
|
|
987
|
+
needsAuth: true,
|
|
988
|
+
needsStore: true
|
|
989
|
+
});
|
|
990
|
+
const client = ctx.client;
|
|
991
|
+
const store = ctx.store;
|
|
992
|
+
const siteUrl = String(args.site);
|
|
993
|
+
const limit = args.limit ? Number.parseInt(String(args.limit), 10) : INSPECTION_QPD_PER_PROPERTY;
|
|
994
|
+
const concurrency = Math.max(1, Number.parseInt(String(args.concurrency), 10) || 4);
|
|
995
|
+
const quiet = Boolean(args.quiet);
|
|
996
|
+
const urls = (await readUrlList({ file: args.file ? String(args.file) : void 0 })).slice(0, limit);
|
|
997
|
+
if (urls.length === 0) {
|
|
998
|
+
logger.warn("No URLs to inspect.");
|
|
999
|
+
return;
|
|
1000
|
+
}
|
|
1001
|
+
if (urls.length === limit && limit < INSPECTION_QPD_PER_PROPERTY) logger.info(`Capping at --limit ${limit}`);
|
|
1002
|
+
if (urls.length === INSPECTION_QPD_PER_PROPERTY) logger.info(`Hit per-property daily inspection quota (${INSPECTION_QPD_PER_PROPERTY}); remaining URLs will be queued for tomorrow.`);
|
|
1003
|
+
const inspector = createInspectionStore({ dataSource: store.dataSource });
|
|
1004
|
+
let completed = 0;
|
|
1005
|
+
let failed = 0;
|
|
1006
|
+
const records = [];
|
|
1007
|
+
const failures = [];
|
|
1008
|
+
await runWithConcurrency(urls, concurrency, async (url) => {
|
|
1009
|
+
const result = await client.inspect(siteUrl, url).catch((err) => err);
|
|
1010
|
+
if (result instanceof Error) {
|
|
1011
|
+
failed++;
|
|
1012
|
+
failures.push({
|
|
1013
|
+
url,
|
|
1014
|
+
error: result.message
|
|
1015
|
+
});
|
|
1016
|
+
} else {
|
|
1017
|
+
const ix = result.inspectionResult;
|
|
1018
|
+
const indexStatus = ix?.indexStatusResult;
|
|
1019
|
+
records.push({
|
|
1020
|
+
url,
|
|
1021
|
+
inspectedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1022
|
+
indexStatus: indexStatus?.verdict ?? void 0,
|
|
1023
|
+
lastCrawlTime: indexStatus?.lastCrawlTime ?? void 0,
|
|
1024
|
+
googleCanonical: indexStatus?.googleCanonical ?? void 0,
|
|
1025
|
+
userCanonical: indexStatus?.userCanonical ?? void 0,
|
|
1026
|
+
coverageState: indexStatus?.coverageState ?? void 0,
|
|
1027
|
+
robotsTxtState: indexStatus?.robotsTxtState ?? void 0,
|
|
1028
|
+
indexingState: indexStatus?.indexingState ?? void 0,
|
|
1029
|
+
pageFetchState: indexStatus?.pageFetchState ?? void 0,
|
|
1030
|
+
mobileUsabilityVerdict: ix?.mobileUsabilityResult?.verdict ?? void 0,
|
|
1031
|
+
richResultsVerdict: ix?.richResultsResult?.verdict ?? void 0,
|
|
1032
|
+
raw: ix
|
|
1033
|
+
});
|
|
665
1034
|
}
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
1035
|
+
completed++;
|
|
1036
|
+
if (!quiet) process.stdout.write(`\r${progressBar(completed, urls.length, `${url.slice(0, 60)}`)}`);
|
|
1037
|
+
});
|
|
1038
|
+
if (!quiet) process.stdout.write("\n");
|
|
1039
|
+
await inspector.writeBatch({
|
|
1040
|
+
userId: store.userId,
|
|
1041
|
+
siteId: store.siteIdFor(siteUrl)
|
|
1042
|
+
}, records);
|
|
1043
|
+
if (!quiet) {
|
|
1044
|
+
logger.success(`Inspected ${records.length}/${urls.length} URL(s)`);
|
|
1045
|
+
if (failed > 0) {
|
|
1046
|
+
logger.warn(`${failed} failed:`);
|
|
1047
|
+
for (const f of failures.slice(0, 5)) console.log(` ${f.url}: ${f.error}`);
|
|
1048
|
+
if (failures.length > 5) console.log(` ... and ${failures.length - 5} more`);
|
|
677
1049
|
}
|
|
678
|
-
siteUrl = selected;
|
|
679
1050
|
}
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
1051
|
+
if (failed > 0) process.exit(1);
|
|
1052
|
+
}
|
|
1053
|
+
});
|
|
1054
|
+
const showSubCommand = defineCommand({
|
|
1055
|
+
meta: {
|
|
1056
|
+
name: "show",
|
|
1057
|
+
description: "Print the latest inspection record for a URL from the local entity store"
|
|
1058
|
+
},
|
|
1059
|
+
args: {
|
|
1060
|
+
site: {
|
|
1061
|
+
type: "string",
|
|
1062
|
+
alias: "s",
|
|
1063
|
+
required: true,
|
|
1064
|
+
description: "Site URL"
|
|
1065
|
+
},
|
|
1066
|
+
url: {
|
|
1067
|
+
type: "positional",
|
|
1068
|
+
required: true,
|
|
1069
|
+
description: "URL to look up"
|
|
1070
|
+
},
|
|
1071
|
+
json: {
|
|
1072
|
+
type: "boolean",
|
|
1073
|
+
default: false,
|
|
1074
|
+
description: "Output as JSON"
|
|
1075
|
+
}
|
|
1076
|
+
},
|
|
1077
|
+
async run({ args }) {
|
|
1078
|
+
const store = (await createCommandContext({ needsStore: true })).store;
|
|
1079
|
+
const record = await createInspectionStore({ dataSource: store.dataSource }).getLatest({
|
|
1080
|
+
userId: store.userId,
|
|
1081
|
+
siteId: store.siteIdFor(String(args.site))
|
|
1082
|
+
}, String(args.url));
|
|
1083
|
+
if (!record) {
|
|
1084
|
+
logger.warn(`No inspection record for ${args.url}`);
|
|
1085
|
+
process.exit(1);
|
|
1086
|
+
}
|
|
1087
|
+
if (args.json) {
|
|
1088
|
+
console.log(JSON.stringify(record, null, 2));
|
|
1089
|
+
return;
|
|
1090
|
+
}
|
|
1091
|
+
console.log();
|
|
1092
|
+
console.log(` \x1B[1m${record.url}\x1B[0m`);
|
|
1093
|
+
console.log(` Inspected: ${record.inspectedAt}`);
|
|
1094
|
+
if (record.indexStatus) console.log(` Index: ${record.indexStatus}`);
|
|
1095
|
+
if (record.lastCrawlTime) console.log(` Last crawl: ${record.lastCrawlTime}`);
|
|
1096
|
+
if (record.googleCanonical) console.log(` Canonical: ${record.googleCanonical}`);
|
|
1097
|
+
if (record.coverageState) console.log(` Coverage: ${record.coverageState}`);
|
|
1098
|
+
if (record.mobileUsabilityVerdict) console.log(` Mobile: ${record.mobileUsabilityVerdict}`);
|
|
1099
|
+
if (record.richResultsVerdict) console.log(` Rich results: ${record.richResultsVerdict}`);
|
|
1100
|
+
console.log();
|
|
1101
|
+
}
|
|
1102
|
+
});
|
|
1103
|
+
const sitemapsSnapshotSubCommand = defineCommand({
|
|
1104
|
+
meta: {
|
|
1105
|
+
name: "snapshot",
|
|
1106
|
+
description: "Fetch current sitemap state from GSC and persist to the local entity store"
|
|
1107
|
+
},
|
|
1108
|
+
args: {
|
|
1109
|
+
site: {
|
|
1110
|
+
type: "string",
|
|
1111
|
+
alias: "s",
|
|
1112
|
+
required: true,
|
|
1113
|
+
description: "Site URL (e.g., sc-domain:example.com)"
|
|
1114
|
+
},
|
|
1115
|
+
quiet: {
|
|
1116
|
+
type: "boolean",
|
|
1117
|
+
alias: "q",
|
|
1118
|
+
default: false,
|
|
1119
|
+
description: "Suppress progress output"
|
|
1120
|
+
},
|
|
1121
|
+
json: {
|
|
1122
|
+
type: "boolean",
|
|
1123
|
+
default: false,
|
|
1124
|
+
description: "Emit the snapshot JSON to stdout"
|
|
1125
|
+
}
|
|
1126
|
+
},
|
|
1127
|
+
async run({ args }) {
|
|
1128
|
+
const ctx = await createCommandContext({
|
|
1129
|
+
needsAuth: true,
|
|
1130
|
+
needsStore: true
|
|
1131
|
+
});
|
|
1132
|
+
const client = ctx.client;
|
|
1133
|
+
const store = ctx.store;
|
|
1134
|
+
const siteUrl = String(args.site);
|
|
1135
|
+
const quiet = Boolean(args.quiet);
|
|
1136
|
+
const apiSitemaps = await client.sitemaps.list(siteUrl);
|
|
1137
|
+
const capturedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
1138
|
+
const records = apiSitemaps.filter((s) => typeof s.path === "string").map((s) => ({
|
|
1139
|
+
path: s.path,
|
|
1140
|
+
capturedAt,
|
|
1141
|
+
lastDownloaded: s.lastDownloaded ?? void 0,
|
|
1142
|
+
lastSubmitted: s.lastSubmitted ?? void 0,
|
|
1143
|
+
type: s.type ?? void 0,
|
|
1144
|
+
isPending: s.isPending ?? void 0,
|
|
1145
|
+
isSitemapsIndex: s.isSitemapsIndex ?? void 0,
|
|
1146
|
+
errors: s.errors ?? void 0,
|
|
1147
|
+
warnings: s.warnings ?? void 0,
|
|
1148
|
+
contents: s.contents?.map((c) => ({
|
|
1149
|
+
type: c.type ?? void 0,
|
|
1150
|
+
submitted: c.submitted ?? void 0,
|
|
1151
|
+
indexed: c.indexed ?? void 0
|
|
1152
|
+
})),
|
|
1153
|
+
raw: s
|
|
1154
|
+
}));
|
|
1155
|
+
await createSitemapStore({ dataSource: store.dataSource }).writeSnapshot({
|
|
1156
|
+
userId: store.userId,
|
|
1157
|
+
siteId: store.siteIdFor(siteUrl)
|
|
1158
|
+
}, records);
|
|
1159
|
+
if (args.json) {
|
|
1160
|
+
console.log(JSON.stringify({
|
|
1161
|
+
site: siteUrl,
|
|
1162
|
+
capturedAt,
|
|
1163
|
+
records
|
|
1164
|
+
}, null, 2));
|
|
1165
|
+
return;
|
|
1166
|
+
}
|
|
1167
|
+
if (!quiet) {
|
|
1168
|
+
logger.success(`Captured ${records.length} sitemap(s) for ${siteUrl}`);
|
|
1169
|
+
for (const r of records) {
|
|
1170
|
+
const errors = r.errors && r.errors !== "0" ? ` \x1B[31merr=${r.errors}\x1B[0m` : "";
|
|
1171
|
+
const warnings = r.warnings && r.warnings !== "0" ? ` \x1B[33mwarn=${r.warnings}\x1B[0m` : "";
|
|
1172
|
+
const downloaded = r.lastDownloaded ? ` last=${r.lastDownloaded}` : "";
|
|
1173
|
+
console.log(` ${r.path}${downloaded}${errors}${warnings}`);
|
|
1174
|
+
}
|
|
1175
|
+
}
|
|
1176
|
+
}
|
|
1177
|
+
});
|
|
1178
|
+
const sitemapsShowSubCommand = defineCommand({
|
|
1179
|
+
meta: {
|
|
1180
|
+
name: "show",
|
|
1181
|
+
description: "Print the latest captured sitemap state for a feedpath"
|
|
1182
|
+
},
|
|
1183
|
+
args: {
|
|
1184
|
+
site: {
|
|
1185
|
+
type: "string",
|
|
1186
|
+
alias: "s",
|
|
1187
|
+
required: true,
|
|
1188
|
+
description: "Site URL"
|
|
1189
|
+
},
|
|
1190
|
+
path: {
|
|
1191
|
+
type: "positional",
|
|
1192
|
+
required: true,
|
|
1193
|
+
description: "Sitemap path (feedpath)"
|
|
1194
|
+
},
|
|
1195
|
+
json: {
|
|
1196
|
+
type: "boolean",
|
|
1197
|
+
default: false,
|
|
1198
|
+
description: "Output as JSON"
|
|
1199
|
+
}
|
|
1200
|
+
},
|
|
1201
|
+
async run({ args }) {
|
|
1202
|
+
const store = (await createCommandContext({ needsStore: true })).store;
|
|
1203
|
+
const record = await createSitemapStore({ dataSource: store.dataSource }).getLatest({
|
|
1204
|
+
userId: store.userId,
|
|
1205
|
+
siteId: store.siteIdFor(String(args.site))
|
|
1206
|
+
}, String(args.path));
|
|
1207
|
+
if (!record) {
|
|
1208
|
+
logger.warn(`No sitemap record for ${args.path}`);
|
|
1209
|
+
process.exit(1);
|
|
1210
|
+
}
|
|
1211
|
+
if (args.json) {
|
|
1212
|
+
console.log(JSON.stringify(record, null, 2));
|
|
1213
|
+
return;
|
|
1214
|
+
}
|
|
1215
|
+
console.log();
|
|
1216
|
+
console.log(` \x1B[1m${record.path}\x1B[0m`);
|
|
1217
|
+
console.log(` Captured: ${record.capturedAt}`);
|
|
1218
|
+
if (record.lastDownloaded) console.log(` Downloaded: ${record.lastDownloaded}`);
|
|
1219
|
+
if (record.lastSubmitted) console.log(` Submitted: ${record.lastSubmitted}`);
|
|
1220
|
+
if (record.type) console.log(` Type: ${record.type}`);
|
|
1221
|
+
if (record.errors) console.log(` Errors: ${record.errors}`);
|
|
1222
|
+
if (record.warnings) console.log(` Warnings: ${record.warnings}`);
|
|
1223
|
+
if (record.contents?.length) {
|
|
1224
|
+
console.log(` Contents:`);
|
|
1225
|
+
for (const c of record.contents) {
|
|
1226
|
+
const bits = [
|
|
1227
|
+
c.type,
|
|
1228
|
+
c.submitted && `submitted=${c.submitted}`,
|
|
1229
|
+
c.indexed && `indexed=${c.indexed}`
|
|
1230
|
+
].filter(Boolean).join(" ");
|
|
1231
|
+
console.log(` ${bits}`);
|
|
1232
|
+
}
|
|
1233
|
+
}
|
|
1234
|
+
console.log();
|
|
1235
|
+
}
|
|
1236
|
+
});
|
|
1237
|
+
const indexingSubCommand = defineCommand({
|
|
1238
|
+
meta: {
|
|
1239
|
+
name: "indexing",
|
|
1240
|
+
description: "Snapshot Indexing API metadata per URL"
|
|
1241
|
+
},
|
|
1242
|
+
subCommands: { snapshot: defineCommand({
|
|
1243
|
+
meta: {
|
|
1244
|
+
name: "snapshot",
|
|
1245
|
+
description: "Fetch Indexing API metadata (latest update/remove per URL) and persist to the local entity store"
|
|
1246
|
+
},
|
|
1247
|
+
args: {
|
|
1248
|
+
site: {
|
|
1249
|
+
type: "string",
|
|
1250
|
+
alias: "s",
|
|
1251
|
+
required: true,
|
|
1252
|
+
description: "Site URL (e.g., sc-domain:example.com)"
|
|
1253
|
+
},
|
|
1254
|
+
file: {
|
|
1255
|
+
type: "string",
|
|
1256
|
+
alias: "f",
|
|
1257
|
+
description: "Path to a file with one URL per line. If omitted, reads from stdin."
|
|
1258
|
+
},
|
|
1259
|
+
concurrency: {
|
|
1260
|
+
type: "string",
|
|
1261
|
+
alias: "c",
|
|
1262
|
+
default: "4",
|
|
1263
|
+
description: "Concurrent in-flight getMetadata calls (default: 4)"
|
|
1264
|
+
},
|
|
1265
|
+
quiet: {
|
|
1266
|
+
type: "boolean",
|
|
1267
|
+
alias: "q",
|
|
1268
|
+
default: false,
|
|
1269
|
+
description: "Suppress progress output"
|
|
693
1270
|
}
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
1271
|
+
},
|
|
1272
|
+
async run({ args }) {
|
|
1273
|
+
const ctx = await createCommandContext({
|
|
1274
|
+
needsAuth: true,
|
|
1275
|
+
needsStore: true
|
|
697
1276
|
});
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
1277
|
+
const client = ctx.client;
|
|
1278
|
+
const store = ctx.store;
|
|
1279
|
+
const siteUrl = String(args.site);
|
|
1280
|
+
const concurrency = Math.max(1, Number.parseInt(String(args.concurrency), 10) || 4);
|
|
1281
|
+
const quiet = Boolean(args.quiet);
|
|
1282
|
+
const urls = await readUrlList({ file: args.file ? String(args.file) : void 0 });
|
|
1283
|
+
if (urls.length === 0) {
|
|
1284
|
+
logger.warn("No URLs to fetch metadata for.");
|
|
1285
|
+
return;
|
|
701
1286
|
}
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
1287
|
+
const records = [];
|
|
1288
|
+
const failures = [];
|
|
1289
|
+
let completed = 0;
|
|
1290
|
+
await runWithConcurrency(urls, concurrency, async (url) => {
|
|
1291
|
+
const result = await client.indexing.getMetadata(url).catch((err) => err);
|
|
1292
|
+
if (result instanceof Error) if (INDEXING_NOT_FOUND_RE.test(result.message)) records.push({
|
|
1293
|
+
url,
|
|
1294
|
+
capturedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
1295
|
+
});
|
|
1296
|
+
else failures.push({
|
|
1297
|
+
url,
|
|
1298
|
+
error: result.message
|
|
1299
|
+
});
|
|
1300
|
+
else records.push({
|
|
1301
|
+
url,
|
|
1302
|
+
capturedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1303
|
+
latestUpdateAt: result.latestUpdate?.notifyTime ?? void 0,
|
|
1304
|
+
latestRemoveAt: result.latestRemove?.notifyTime ?? void 0,
|
|
1305
|
+
raw: result
|
|
1306
|
+
});
|
|
1307
|
+
completed++;
|
|
1308
|
+
if (!quiet) process.stdout.write(`\r${progressBar(completed, urls.length, url.slice(0, 60))}`);
|
|
719
1309
|
});
|
|
720
|
-
if (
|
|
721
|
-
|
|
722
|
-
|
|
1310
|
+
if (!quiet) process.stdout.write("\n");
|
|
1311
|
+
await createIndexingMetadataStore({ dataSource: store.dataSource }).writeBatch({
|
|
1312
|
+
userId: store.userId,
|
|
1313
|
+
siteId: store.siteIdFor(siteUrl)
|
|
1314
|
+
}, records);
|
|
1315
|
+
if (!quiet) {
|
|
1316
|
+
logger.success(`Captured metadata for ${records.length}/${urls.length} URL(s)`);
|
|
1317
|
+
if (failures.length > 0) {
|
|
1318
|
+
logger.warn(`${failures.length} failed:`);
|
|
1319
|
+
for (const f of failures.slice(0, 5)) console.log(` ${f.url}: ${f.error}`);
|
|
1320
|
+
if (failures.length > 5) console.log(` ... and ${failures.length - 5} more`);
|
|
1321
|
+
}
|
|
723
1322
|
}
|
|
724
|
-
|
|
725
|
-
}
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
1323
|
+
if (failures.length > 0) process.exit(1);
|
|
1324
|
+
}
|
|
1325
|
+
}) }
|
|
1326
|
+
});
|
|
1327
|
+
const entitiesCommand = defineCommand({
|
|
1328
|
+
meta: {
|
|
1329
|
+
name: "entities",
|
|
1330
|
+
description: "Manage local entity snapshots (URL inspections, sitemaps, indexing metadata)"
|
|
1331
|
+
},
|
|
1332
|
+
subCommands: {
|
|
1333
|
+
inspect: inspectSubCommand,
|
|
1334
|
+
show: showSubCommand,
|
|
1335
|
+
sitemaps: defineCommand({
|
|
1336
|
+
meta: {
|
|
1337
|
+
name: "sitemaps",
|
|
1338
|
+
description: "Snapshot and inspect sitemap state per site"
|
|
733
1339
|
},
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
let currentStep = 0;
|
|
738
|
-
for (const dataType of dataTypes) {
|
|
739
|
-
currentStep++;
|
|
740
|
-
if (!args.quiet) {
|
|
741
|
-
clearLine();
|
|
742
|
-
process.stdout.write(progressBar(currentStep, totalSteps, dataType));
|
|
1340
|
+
subCommands: {
|
|
1341
|
+
snapshot: sitemapsSnapshotSubCommand,
|
|
1342
|
+
show: sitemapsShowSubCommand
|
|
743
1343
|
}
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
const rows = [];
|
|
747
|
-
for await (const batch of client.query(siteUrl, builder)) rows.push(...batch);
|
|
748
|
-
output[dataType] = {
|
|
749
|
-
total: rows.length,
|
|
750
|
-
data: rows
|
|
751
|
-
};
|
|
752
|
-
}
|
|
753
|
-
if (!args.quiet) {
|
|
754
|
-
clearLine();
|
|
755
|
-
logger.success(`Exported ${dataTypes.join(", ")} for ${siteUrl}`);
|
|
756
|
-
}
|
|
757
|
-
const content = format === "csv" ? exportToCSV(output) : JSON.stringify(output, null, 2);
|
|
758
|
-
if (args.output) {
|
|
759
|
-
await fs.writeFile(String(args.output), content);
|
|
760
|
-
if (!args.quiet) logger.info(`Written to ${args.output}`);
|
|
761
|
-
} else console.log(content);
|
|
1344
|
+
}),
|
|
1345
|
+
indexing: indexingSubCommand
|
|
762
1346
|
}
|
|
763
1347
|
});
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
1348
|
+
const ENV_LINE_RE = /^([^=]+)=(.*)$/;
|
|
1349
|
+
async function promptDataDir(existing) {
|
|
1350
|
+
const fallback = existing ?? defaultDataDir();
|
|
1351
|
+
const answer = await text({
|
|
1352
|
+
message: "Where should Parquet data be stored?",
|
|
1353
|
+
placeholder: fallback,
|
|
1354
|
+
defaultValue: fallback
|
|
1355
|
+
});
|
|
1356
|
+
if (isCancel(answer)) process.exit(1);
|
|
1357
|
+
return String(answer) || fallback;
|
|
1358
|
+
}
|
|
767
1359
|
async function loadEnvFile() {
|
|
768
1360
|
const envPath = path.join(process.cwd(), ".env");
|
|
769
1361
|
const content = await fs.readFile(envPath, "utf-8").catch(() => null);
|
|
@@ -772,7 +1364,7 @@ async function loadEnvFile() {
|
|
|
772
1364
|
for (const line of content.split("\n")) {
|
|
773
1365
|
const trimmed = line.trim();
|
|
774
1366
|
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
775
|
-
const match = trimmed.match(
|
|
1367
|
+
const match = trimmed.match(ENV_LINE_RE);
|
|
776
1368
|
if (match) {
|
|
777
1369
|
const key = match[1].trim();
|
|
778
1370
|
let value = match[2].trim();
|
|
@@ -785,7 +1377,7 @@ async function loadEnvFile() {
|
|
|
785
1377
|
const initCommand = defineCommand({
|
|
786
1378
|
meta: {
|
|
787
1379
|
name: "init",
|
|
788
|
-
description: "Set up GSCDump
|
|
1380
|
+
description: "Set up GSCDump authentication"
|
|
789
1381
|
},
|
|
790
1382
|
args: { force: {
|
|
791
1383
|
type: "boolean",
|
|
@@ -794,8 +1386,8 @@ const initCommand = defineCommand({
|
|
|
794
1386
|
} },
|
|
795
1387
|
async run({ args }) {
|
|
796
1388
|
const config = await loadConfig();
|
|
797
|
-
if (config.
|
|
798
|
-
logger.info(
|
|
1389
|
+
if (config.clientId && config.clientSecret && !args.force) {
|
|
1390
|
+
logger.info("Already configured");
|
|
799
1391
|
logger.info("Run with --force to reconfigure");
|
|
800
1392
|
return;
|
|
801
1393
|
}
|
|
@@ -808,9 +1400,9 @@ const initCommand = defineCommand({
|
|
|
808
1400
|
if (envFile.GOOGLE_ACCESS_TOKEN) process.env.GOOGLE_ACCESS_TOKEN = envFile.GOOGLE_ACCESS_TOKEN;
|
|
809
1401
|
await saveConfig({
|
|
810
1402
|
...config,
|
|
811
|
-
mode: "local",
|
|
812
1403
|
clientId: envFile.GOOGLE_CLIENT_ID,
|
|
813
|
-
clientSecret: envFile.GOOGLE_CLIENT_SECRET
|
|
1404
|
+
clientSecret: envFile.GOOGLE_CLIENT_SECRET,
|
|
1405
|
+
dataDir: config.dataDir ?? defaultDataDir()
|
|
814
1406
|
});
|
|
815
1407
|
const creds = (await authenticate({
|
|
816
1408
|
clientId: envFile.GOOGLE_CLIENT_ID,
|
|
@@ -829,422 +1421,79 @@ const initCommand = defineCommand({
|
|
|
829
1421
|
console.log(" \x1B[1mWelcome to GSCDump!\x1B[0m");
|
|
830
1422
|
console.log(" \x1B[90mGoogle Search Console data extraction CLI\x1B[0m");
|
|
831
1423
|
console.log();
|
|
832
|
-
const
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
value: "local",
|
|
840
|
-
label: "Local",
|
|
841
|
-
hint: "Use your own Google OAuth credentials"
|
|
842
|
-
}]
|
|
1424
|
+
const dataDir = await promptDataDir(config.dataDir);
|
|
1425
|
+
const credentials = await getAuthCredentials(true);
|
|
1426
|
+
await saveConfig({
|
|
1427
|
+
...config,
|
|
1428
|
+
dataDir,
|
|
1429
|
+
clientId: credentials.clientId,
|
|
1430
|
+
clientSecret: credentials.clientSecret
|
|
843
1431
|
});
|
|
844
|
-
|
|
845
|
-
if (mode === "cloud") {
|
|
846
|
-
const cloudUrl = config.cloudUrl || DEFAULT_CLOUD_URL;
|
|
847
|
-
await saveConfig({
|
|
848
|
-
...config,
|
|
849
|
-
mode: "cloud",
|
|
850
|
-
cloudUrl
|
|
851
|
-
});
|
|
852
|
-
await authenticateCloud(cloudUrl, true);
|
|
853
|
-
} else {
|
|
854
|
-
await saveConfig({
|
|
855
|
-
...config,
|
|
856
|
-
mode: "local"
|
|
857
|
-
});
|
|
858
|
-
await authenticate(await getAuthCredentials(true), true);
|
|
859
|
-
}
|
|
1432
|
+
await authenticate(credentials, true);
|
|
860
1433
|
console.log();
|
|
861
1434
|
logger.success("Setup complete! Run gscdump to get started.");
|
|
862
1435
|
}
|
|
863
1436
|
});
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
}
|
|
887
|
-
}
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
}
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
}
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
url: input.url,
|
|
920
|
-
error: e.message
|
|
921
|
-
}));
|
|
922
|
-
}
|
|
923
|
-
async function batchRequestIndexing$1(input, ctx) {
|
|
924
|
-
const results = await batchRequestIndexing(ctx.client, input.urls, {
|
|
925
|
-
type: input.type || "URL_UPDATED",
|
|
926
|
-
delayMs: input.delayMs || 100
|
|
927
|
-
});
|
|
928
|
-
return {
|
|
929
|
-
results,
|
|
930
|
-
success: results.length,
|
|
931
|
-
failed: 0
|
|
932
|
-
};
|
|
933
|
-
}
|
|
934
|
-
async function batchInspectUrls$1(input, ctx) {
|
|
935
|
-
const results = await batchInspectUrls(ctx.client, input.siteUrl, input.urls, { delayMs: input.delayMs || 200 });
|
|
936
|
-
return {
|
|
937
|
-
results,
|
|
938
|
-
indexed: results.filter((r) => r.isIndexed).length,
|
|
939
|
-
notIndexed: results.filter((r) => !r.isIndexed).length
|
|
940
|
-
};
|
|
941
|
-
}
|
|
942
|
-
|
|
943
|
-
//#endregion
|
|
944
|
-
//#region src/mcp/handlers/query.ts
|
|
945
|
-
const DIMENSION_MAP$1 = {
|
|
946
|
-
page,
|
|
947
|
-
query,
|
|
948
|
-
date,
|
|
949
|
-
country,
|
|
950
|
-
device,
|
|
951
|
-
searchAppearance
|
|
952
|
-
};
|
|
953
|
-
async function customQuery(input, ctx) {
|
|
954
|
-
const dimensions = input.dimensions.filter((d) => d in DIMENSION_MAP$1).map((d) => DIMENSION_MAP$1[d]);
|
|
955
|
-
if (dimensions.length === 0) throw new Error("At least one valid dimension required");
|
|
956
|
-
const builder = gsc.select(...dimensions).where(between(date, input.period.start, input.period.end)).limit(input.rowLimit || 25e3);
|
|
957
|
-
const rows = [];
|
|
958
|
-
for await (const batch of ctx.client.query(input.siteUrl, builder)) rows.push(...batch);
|
|
959
|
-
return {
|
|
960
|
-
total: rows.length,
|
|
961
|
-
data: rows
|
|
962
|
-
};
|
|
963
|
-
}
|
|
964
|
-
|
|
965
|
-
//#endregion
|
|
966
|
-
//#region src/mcp/handlers/sites.ts
|
|
967
|
-
async function listSites(_input, ctx) {
|
|
968
|
-
return fetchSites(ctx.client);
|
|
969
|
-
}
|
|
970
|
-
async function listSitesWithSitemaps(_input, ctx) {
|
|
971
|
-
return fetchSitesWithSitemaps(ctx.client);
|
|
972
|
-
}
|
|
973
|
-
async function listSitemaps(input, ctx) {
|
|
974
|
-
return fetchSitemaps(ctx.client, input.siteUrl);
|
|
975
|
-
}
|
|
976
|
-
async function getSitemap(input, ctx) {
|
|
977
|
-
return fetchSitemap(ctx.client, input.siteUrl, input.feedpath);
|
|
978
|
-
}
|
|
979
|
-
async function submitSitemap$1(input, ctx) {
|
|
980
|
-
await submitSitemap(ctx.client, input.siteUrl, input.feedpath);
|
|
981
|
-
return { success: true };
|
|
982
|
-
}
|
|
983
|
-
async function deleteSitemap$1(input, ctx) {
|
|
984
|
-
await deleteSitemap(ctx.client, input.siteUrl, input.feedpath);
|
|
985
|
-
return { success: true };
|
|
986
|
-
}
|
|
987
|
-
|
|
988
|
-
//#endregion
|
|
989
|
-
//#region src/mcp/types.ts
|
|
990
|
-
const periodSchema = z.object({
|
|
991
|
-
start: z.string().describe("Start date (YYYY-MM-DD)"),
|
|
992
|
-
end: z.string().describe("End date (YYYY-MM-DD)")
|
|
993
|
-
}).describe("Date range for the query");
|
|
994
|
-
const siteUrlSchema = z.string().describe("GSC property URL (e.g., sc-domain:example.com or https://example.com/)");
|
|
995
|
-
const queryOptionsSchema = z.object({
|
|
996
|
-
type: z.enum([
|
|
997
|
-
"web",
|
|
998
|
-
"image",
|
|
999
|
-
"video",
|
|
1000
|
-
"news",
|
|
1001
|
-
"discover",
|
|
1002
|
-
"googleNews"
|
|
1003
|
-
]).optional().describe("Data type"),
|
|
1004
|
-
dataState: z.enum(["final", "all"]).optional().describe("Data state: final (settled) or all (includes fresh)"),
|
|
1005
|
-
aggregationType: z.enum(["byPage", "byProperty"]).optional().describe("Aggregation: byPage or byProperty")
|
|
1006
|
-
}).optional();
|
|
1007
|
-
const listSitesInput = z.object({});
|
|
1008
|
-
const listSitemapsInput = z.object({ siteUrl: siteUrlSchema });
|
|
1009
|
-
const fetchAnalyticsInput = z.object({
|
|
1010
|
-
siteUrl: siteUrlSchema,
|
|
1011
|
-
period: periodSchema,
|
|
1012
|
-
comparePrevious: z.boolean().optional().describe("Include previous period comparison"),
|
|
1013
|
-
options: queryOptionsSchema
|
|
1014
|
-
});
|
|
1015
|
-
const fetchPageInput = z.object({
|
|
1016
|
-
siteUrl: siteUrlSchema,
|
|
1017
|
-
period: periodSchema,
|
|
1018
|
-
url: z.string().describe("Page URL to fetch details for")
|
|
1019
|
-
});
|
|
1020
|
-
const fetchKeywordInput = z.object({
|
|
1021
|
-
siteUrl: siteUrlSchema,
|
|
1022
|
-
period: periodSchema,
|
|
1023
|
-
keyword: z.string().describe("Keyword to fetch details for")
|
|
1024
|
-
});
|
|
1025
|
-
const inspectUrlInput = z.object({
|
|
1026
|
-
siteUrl: siteUrlSchema,
|
|
1027
|
-
inspectionUrl: z.string().describe("URL to inspect")
|
|
1028
|
-
});
|
|
1029
|
-
const requestIndexingInput = z.object({
|
|
1030
|
-
url: z.string().describe("URL to request indexing for"),
|
|
1031
|
-
type: z.enum(["URL_UPDATED", "URL_DELETED"]).optional().describe("Notification type")
|
|
1032
|
-
});
|
|
1033
|
-
const getIndexingStatusInput = z.object({ url: z.string().describe("URL to get indexing status for") });
|
|
1034
|
-
const customQueryInput = z.object({
|
|
1035
|
-
siteUrl: siteUrlSchema,
|
|
1036
|
-
period: periodSchema,
|
|
1037
|
-
dimensions: z.array(z.enum([
|
|
1038
|
-
"date",
|
|
1039
|
-
"query",
|
|
1040
|
-
"page",
|
|
1041
|
-
"country",
|
|
1042
|
-
"device",
|
|
1043
|
-
"searchAppearance"
|
|
1044
|
-
])).describe("Dimensions to group by"),
|
|
1045
|
-
rowLimit: z.number().optional().describe("Max rows (default 25000)"),
|
|
1046
|
-
options: queryOptionsSchema
|
|
1047
|
-
});
|
|
1048
|
-
const sitemapInput = z.object({
|
|
1049
|
-
siteUrl: siteUrlSchema,
|
|
1050
|
-
feedpath: z.string().describe("Sitemap URL (e.g., https://example.com/sitemap.xml)")
|
|
1051
|
-
});
|
|
1052
|
-
const batchRequestIndexingInput = z.object({
|
|
1053
|
-
urls: z.array(z.string()).describe("URLs to request indexing for"),
|
|
1054
|
-
type: z.enum(["URL_UPDATED", "URL_DELETED"]).optional().describe("Notification type"),
|
|
1055
|
-
delayMs: z.number().optional().describe("Delay between requests in ms (default 100)")
|
|
1056
|
-
});
|
|
1057
|
-
const batchInspectUrlsInput = z.object({
|
|
1058
|
-
siteUrl: siteUrlSchema,
|
|
1059
|
-
urls: z.array(z.string()).describe("URLs to inspect"),
|
|
1060
|
-
delayMs: z.number().optional().describe("Delay between requests in ms (default 200)")
|
|
1437
|
+
const inspectCommand = defineCommand({
|
|
1438
|
+
meta: {
|
|
1439
|
+
name: "inspect",
|
|
1440
|
+
description: "Inspect a specific URL's indexing status"
|
|
1441
|
+
},
|
|
1442
|
+
args: {
|
|
1443
|
+
site: {
|
|
1444
|
+
type: "string",
|
|
1445
|
+
alias: "s",
|
|
1446
|
+
required: true,
|
|
1447
|
+
description: "Site URL (e.g., sc-domain:example.com)"
|
|
1448
|
+
},
|
|
1449
|
+
url: {
|
|
1450
|
+
type: "positional",
|
|
1451
|
+
required: true,
|
|
1452
|
+
description: "URL to inspect"
|
|
1453
|
+
},
|
|
1454
|
+
json: {
|
|
1455
|
+
type: "boolean",
|
|
1456
|
+
default: false,
|
|
1457
|
+
description: "Output as JSON"
|
|
1458
|
+
}
|
|
1459
|
+
},
|
|
1460
|
+
async run({ args }) {
|
|
1461
|
+
const result = await (await createCommandContext({ needsAuth: true })).client.inspect(args.site, args.url).catch((e) => {
|
|
1462
|
+
logger.error(`Inspection failed: ${e.message}`);
|
|
1463
|
+
process.exit(1);
|
|
1464
|
+
});
|
|
1465
|
+
const indexStatus = (result?.inspectionResult)?.indexStatusResult;
|
|
1466
|
+
if (args.json) {
|
|
1467
|
+
console.log(JSON.stringify({
|
|
1468
|
+
url: args.url,
|
|
1469
|
+
verdict: indexStatus?.verdict || null,
|
|
1470
|
+
coverageState: indexStatus?.coverageState || null,
|
|
1471
|
+
indexingState: indexStatus?.indexingState || null,
|
|
1472
|
+
lastCrawlTime: indexStatus?.lastCrawlTime || null,
|
|
1473
|
+
isIndexed: indexStatus?.verdict === "PASS",
|
|
1474
|
+
raw: result
|
|
1475
|
+
}, null, 2));
|
|
1476
|
+
return;
|
|
1477
|
+
}
|
|
1478
|
+
console.log();
|
|
1479
|
+
console.log(` \x1B[1mURL:\x1B[0m ${args.url}`);
|
|
1480
|
+
console.log();
|
|
1481
|
+
const verdictColor = indexStatus?.verdict === "PASS" ? "\x1B[32m" : "\x1B[31m";
|
|
1482
|
+
console.log(` Verdict: ${verdictColor}${indexStatus?.verdict || "N/A"}\x1B[0m`);
|
|
1483
|
+
if (indexStatus?.coverageState) console.log(` Coverage: ${indexStatus.coverageState}`);
|
|
1484
|
+
if (indexStatus?.indexingState) console.log(` Indexing: ${indexStatus.indexingState}`);
|
|
1485
|
+
if (indexStatus?.lastCrawlTime) console.log(` Last Crawl: ${indexStatus.lastCrawlTime}`);
|
|
1486
|
+
if (indexStatus?.robotsTxtState) console.log(` Robots.txt: ${indexStatus.robotsTxtState}`);
|
|
1487
|
+
if (indexStatus?.pageFetchState) console.log(` Page Fetch: ${indexStatus.pageFetchState}`);
|
|
1488
|
+
if (indexStatus?.googleCanonical) console.log(` Google Canon: ${indexStatus.googleCanonical}`);
|
|
1489
|
+
if (indexStatus?.userCanonical) console.log(` User Canon: ${indexStatus.userCanonical}`);
|
|
1490
|
+
console.log();
|
|
1491
|
+
}
|
|
1061
1492
|
});
|
|
1062
|
-
|
|
1063
|
-
//#endregion
|
|
1064
|
-
//#region src/mcp/server/index.ts
|
|
1065
|
-
function createGscMcpServer(options) {
|
|
1066
|
-
const { name = "gscdump", version = "1.0.0", getAuth: getAuth$1 } = options;
|
|
1067
|
-
const server = new McpServer({
|
|
1068
|
-
name,
|
|
1069
|
-
version
|
|
1070
|
-
});
|
|
1071
|
-
const auth = async () => Promise.resolve(getAuth$1());
|
|
1072
|
-
const getContext = async () => {
|
|
1073
|
-
const a = await auth();
|
|
1074
|
-
return {
|
|
1075
|
-
auth: a,
|
|
1076
|
-
client: googleSearchConsole(a)
|
|
1077
|
-
};
|
|
1078
|
-
};
|
|
1079
|
-
server.registerTool("list-sites", {
|
|
1080
|
-
description: "List all Google Search Console sites the user has access to",
|
|
1081
|
-
inputSchema: listSitesInput.shape
|
|
1082
|
-
}, async (args) => {
|
|
1083
|
-
const result = await listSites(args, await getContext());
|
|
1084
|
-
return { content: [{
|
|
1085
|
-
type: "text",
|
|
1086
|
-
text: JSON.stringify(result, null, 2)
|
|
1087
|
-
}] };
|
|
1088
|
-
});
|
|
1089
|
-
server.registerTool("list-sites-with-sitemaps", {
|
|
1090
|
-
description: "List all GSC sites with their sitemaps",
|
|
1091
|
-
inputSchema: listSitesInput.shape
|
|
1092
|
-
}, async (args) => {
|
|
1093
|
-
const result = await listSitesWithSitemaps(args, await getContext());
|
|
1094
|
-
return { content: [{
|
|
1095
|
-
type: "text",
|
|
1096
|
-
text: JSON.stringify(result, null, 2)
|
|
1097
|
-
}] };
|
|
1098
|
-
});
|
|
1099
|
-
server.registerTool("list-sitemaps", {
|
|
1100
|
-
description: "List sitemaps for a specific site",
|
|
1101
|
-
inputSchema: listSitemapsInput.shape
|
|
1102
|
-
}, async (args) => {
|
|
1103
|
-
const result = await listSitemaps(args, await getContext());
|
|
1104
|
-
return { content: [{
|
|
1105
|
-
type: "text",
|
|
1106
|
-
text: JSON.stringify(result, null, 2)
|
|
1107
|
-
}] };
|
|
1108
|
-
});
|
|
1109
|
-
server.registerTool("get-sitemap", {
|
|
1110
|
-
description: "Get details for a specific sitemap",
|
|
1111
|
-
inputSchema: sitemapInput.shape
|
|
1112
|
-
}, async (args) => {
|
|
1113
|
-
const result = await getSitemap(args, await getContext());
|
|
1114
|
-
return { content: [{
|
|
1115
|
-
type: "text",
|
|
1116
|
-
text: JSON.stringify(result, null, 2)
|
|
1117
|
-
}] };
|
|
1118
|
-
});
|
|
1119
|
-
server.registerTool("submit-sitemap", {
|
|
1120
|
-
description: "Submit a sitemap to Google Search Console",
|
|
1121
|
-
inputSchema: sitemapInput.shape
|
|
1122
|
-
}, async (args) => {
|
|
1123
|
-
const result = await submitSitemap$1(args, await getContext());
|
|
1124
|
-
return { content: [{
|
|
1125
|
-
type: "text",
|
|
1126
|
-
text: JSON.stringify(result, null, 2)
|
|
1127
|
-
}] };
|
|
1128
|
-
});
|
|
1129
|
-
server.registerTool("delete-sitemap", {
|
|
1130
|
-
description: "Delete a sitemap from Google Search Console",
|
|
1131
|
-
inputSchema: sitemapInput.shape
|
|
1132
|
-
}, async (args) => {
|
|
1133
|
-
const result = await deleteSitemap$1(args, await getContext());
|
|
1134
|
-
return { content: [{
|
|
1135
|
-
type: "text",
|
|
1136
|
-
text: JSON.stringify(result, null, 2)
|
|
1137
|
-
}] };
|
|
1138
|
-
});
|
|
1139
|
-
server.registerTool("fetch-pages", {
|
|
1140
|
-
description: "Fetch page analytics data for a site",
|
|
1141
|
-
inputSchema: fetchAnalyticsInput.shape
|
|
1142
|
-
}, async (args) => {
|
|
1143
|
-
const result = await fetchPages(args, await getContext());
|
|
1144
|
-
return { content: [{
|
|
1145
|
-
type: "text",
|
|
1146
|
-
text: JSON.stringify(result, null, 2)
|
|
1147
|
-
}] };
|
|
1148
|
-
});
|
|
1149
|
-
server.registerTool("fetch-keywords", {
|
|
1150
|
-
description: "Fetch keyword/query analytics data for a site",
|
|
1151
|
-
inputSchema: fetchAnalyticsInput.shape
|
|
1152
|
-
}, async (args) => {
|
|
1153
|
-
const result = await fetchKeywords(args, await getContext());
|
|
1154
|
-
return { content: [{
|
|
1155
|
-
type: "text",
|
|
1156
|
-
text: JSON.stringify(result, null, 2)
|
|
1157
|
-
}] };
|
|
1158
|
-
});
|
|
1159
|
-
server.registerTool("fetch-countries", {
|
|
1160
|
-
description: "Fetch country analytics data for a site",
|
|
1161
|
-
inputSchema: fetchAnalyticsInput.shape
|
|
1162
|
-
}, async (args) => {
|
|
1163
|
-
const result = await fetchCountries(args, await getContext());
|
|
1164
|
-
return { content: [{
|
|
1165
|
-
type: "text",
|
|
1166
|
-
text: JSON.stringify(result, null, 2)
|
|
1167
|
-
}] };
|
|
1168
|
-
});
|
|
1169
|
-
server.registerTool("fetch-devices", {
|
|
1170
|
-
description: "Fetch device analytics data for a site",
|
|
1171
|
-
inputSchema: fetchAnalyticsInput.shape
|
|
1172
|
-
}, async (args) => {
|
|
1173
|
-
const result = await fetchDevices(args, await getContext());
|
|
1174
|
-
return { content: [{
|
|
1175
|
-
type: "text",
|
|
1176
|
-
text: JSON.stringify(result, null, 2)
|
|
1177
|
-
}] };
|
|
1178
|
-
});
|
|
1179
|
-
server.registerTool("custom-query", {
|
|
1180
|
-
description: "Run a custom search analytics query with specified dimensions",
|
|
1181
|
-
inputSchema: customQueryInput.shape
|
|
1182
|
-
}, async (args) => {
|
|
1183
|
-
const result = await customQuery(args, await getContext());
|
|
1184
|
-
return { content: [{
|
|
1185
|
-
type: "text",
|
|
1186
|
-
text: JSON.stringify(result, null, 2)
|
|
1187
|
-
}] };
|
|
1188
|
-
});
|
|
1189
|
-
server.registerTool("inspect-url", {
|
|
1190
|
-
description: "Inspect a URL in Google Search Console to check its indexing status",
|
|
1191
|
-
inputSchema: inspectUrlInput.shape
|
|
1192
|
-
}, async (args) => {
|
|
1193
|
-
const result = await inspectUrl$1(args, await getContext());
|
|
1194
|
-
return { content: [{
|
|
1195
|
-
type: "text",
|
|
1196
|
-
text: JSON.stringify(result, null, 2)
|
|
1197
|
-
}] };
|
|
1198
|
-
});
|
|
1199
|
-
server.registerTool("request-indexing", {
|
|
1200
|
-
description: "Request Google to index or remove a URL via the Indexing API",
|
|
1201
|
-
inputSchema: requestIndexingInput.shape
|
|
1202
|
-
}, async (args) => {
|
|
1203
|
-
const result = await requestIndexing$1(args, await getContext());
|
|
1204
|
-
return { content: [{
|
|
1205
|
-
type: "text",
|
|
1206
|
-
text: JSON.stringify(result, null, 2)
|
|
1207
|
-
}] };
|
|
1208
|
-
});
|
|
1209
|
-
server.registerTool("get-indexing-status", {
|
|
1210
|
-
description: "Get indexing status metadata for a URL",
|
|
1211
|
-
inputSchema: getIndexingStatusInput.shape
|
|
1212
|
-
}, async (args) => {
|
|
1213
|
-
const result = await getIndexingStatus(args, await getContext());
|
|
1214
|
-
return { content: [{
|
|
1215
|
-
type: "text",
|
|
1216
|
-
text: JSON.stringify(result, null, 2)
|
|
1217
|
-
}] };
|
|
1218
|
-
});
|
|
1219
|
-
server.registerTool("batch-request-indexing", {
|
|
1220
|
-
description: "Batch request indexing for multiple URLs with rate limiting",
|
|
1221
|
-
inputSchema: batchRequestIndexingInput.shape
|
|
1222
|
-
}, async (args) => {
|
|
1223
|
-
const result = await batchRequestIndexing$1(args, await getContext());
|
|
1224
|
-
return { content: [{
|
|
1225
|
-
type: "text",
|
|
1226
|
-
text: JSON.stringify(result, null, 2)
|
|
1227
|
-
}] };
|
|
1228
|
-
});
|
|
1229
|
-
server.registerTool("batch-inspect-urls", {
|
|
1230
|
-
description: "Batch inspect multiple URLs to check their indexing status",
|
|
1231
|
-
inputSchema: batchInspectUrlsInput.shape
|
|
1232
|
-
}, async (args) => {
|
|
1233
|
-
const result = await batchInspectUrls$1(args, await getContext());
|
|
1234
|
-
return { content: [{
|
|
1235
|
-
type: "text",
|
|
1236
|
-
text: JSON.stringify(result, null, 2)
|
|
1237
|
-
}] };
|
|
1238
|
-
});
|
|
1239
|
-
return server;
|
|
1240
|
-
}
|
|
1241
|
-
|
|
1242
|
-
//#endregion
|
|
1243
|
-
//#region src/commands/mcp.ts
|
|
1244
1493
|
async function checkAuth() {
|
|
1245
1494
|
if ((process.env.GOOGLE_ACCESS_TOKEN || process.env.GOOGLE_REFRESH_TOKEN) && process.env.GOOGLE_CLIENT_ID && process.env.GOOGLE_CLIENT_SECRET) return { ok: true };
|
|
1246
1495
|
const config = await loadConfig();
|
|
1247
|
-
if (!config.
|
|
1496
|
+
if (!config.clientId && !config.clientSecret) return {
|
|
1248
1497
|
ok: false,
|
|
1249
1498
|
error: `GSCDump not configured.
|
|
1250
1499
|
|
|
@@ -1256,20 +1505,9 @@ Or provide env vars: GOOGLE_CLIENT_ID, GOOGLE_CLIENT_SECRET, GOOGLE_ACCESS_TOKEN
|
|
|
1256
1505
|
|
|
1257
1506
|
Then restart your MCP client.`
|
|
1258
1507
|
};
|
|
1259
|
-
if (
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
error: `Cloud authentication expired or missing.
|
|
1263
|
-
|
|
1264
|
-
Run this command to re-authenticate:
|
|
1265
|
-
|
|
1266
|
-
npx @gscdump/cli init
|
|
1267
|
-
|
|
1268
|
-
Then restart your MCP client.`
|
|
1269
|
-
};
|
|
1270
|
-
} else if (!await loadTokens()) return {
|
|
1271
|
-
ok: false,
|
|
1272
|
-
error: `Local authentication missing.
|
|
1508
|
+
if (!await loadTokens()) return {
|
|
1509
|
+
ok: false,
|
|
1510
|
+
error: `Authentication missing.
|
|
1273
1511
|
|
|
1274
1512
|
Run this command to authenticate:
|
|
1275
1513
|
|
|
@@ -1299,10 +1537,15 @@ const mcpCommand = defineCommand({
|
|
|
1299
1537
|
await server.connect(transport);
|
|
1300
1538
|
}
|
|
1301
1539
|
});
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1540
|
+
const DIMENSIONS = [
|
|
1541
|
+
"page",
|
|
1542
|
+
"query",
|
|
1543
|
+
"date",
|
|
1544
|
+
"country",
|
|
1545
|
+
"device",
|
|
1546
|
+
"searchAppearance"
|
|
1547
|
+
];
|
|
1548
|
+
const DIM_COLUMNS = {
|
|
1306
1549
|
page,
|
|
1307
1550
|
query,
|
|
1308
1551
|
date,
|
|
@@ -1310,10 +1553,38 @@ const DIMENSION_MAP = {
|
|
|
1310
1553
|
device,
|
|
1311
1554
|
searchAppearance
|
|
1312
1555
|
};
|
|
1556
|
+
async function runLiveQuery(client, siteUrl, opts) {
|
|
1557
|
+
const allRows = [];
|
|
1558
|
+
let startRow = 0;
|
|
1559
|
+
while (true) {
|
|
1560
|
+
const rows = ((await client._rawQuery(siteUrl, {
|
|
1561
|
+
startDate: opts.startDate,
|
|
1562
|
+
endDate: opts.endDate,
|
|
1563
|
+
dimensions: opts.dimensions,
|
|
1564
|
+
rowLimit: opts.rowLimit,
|
|
1565
|
+
startRow
|
|
1566
|
+
})).rows || []).map((row) => {
|
|
1567
|
+
const result = {
|
|
1568
|
+
clicks: row.clicks ?? 0,
|
|
1569
|
+
impressions: row.impressions ?? 0,
|
|
1570
|
+
ctr: row.ctr ?? 0,
|
|
1571
|
+
position: row.position ?? 0
|
|
1572
|
+
};
|
|
1573
|
+
opts.dimensions.forEach((dim, i) => {
|
|
1574
|
+
result[dim] = row.keys?.[i];
|
|
1575
|
+
});
|
|
1576
|
+
return result;
|
|
1577
|
+
});
|
|
1578
|
+
allRows.push(...rows);
|
|
1579
|
+
if (rows.length < opts.rowLimit) break;
|
|
1580
|
+
startRow += rows.length;
|
|
1581
|
+
}
|
|
1582
|
+
return { rows: allRows };
|
|
1583
|
+
}
|
|
1313
1584
|
const queryCommand = defineCommand({
|
|
1314
1585
|
meta: {
|
|
1315
1586
|
name: "query",
|
|
1316
|
-
description: "Run
|
|
1587
|
+
description: "Run a search analytics query (local Parquet by default, --live hits GSC API)"
|
|
1317
1588
|
},
|
|
1318
1589
|
args: {
|
|
1319
1590
|
site: {
|
|
@@ -1324,7 +1595,7 @@ const queryCommand = defineCommand({
|
|
|
1324
1595
|
dimensions: {
|
|
1325
1596
|
type: "string",
|
|
1326
1597
|
alias: "d",
|
|
1327
|
-
description:
|
|
1598
|
+
description: `Dimensions: ${DIMENSIONS.join(",")}`
|
|
1328
1599
|
},
|
|
1329
1600
|
start: {
|
|
1330
1601
|
type: "string",
|
|
@@ -1351,6 +1622,19 @@ const queryCommand = defineCommand({
|
|
|
1351
1622
|
default: "json",
|
|
1352
1623
|
description: "Output format: json or csv"
|
|
1353
1624
|
},
|
|
1625
|
+
sql: {
|
|
1626
|
+
type: "string",
|
|
1627
|
+
description: "Raw DuckDB SQL using {{FILES}} as the file list placeholder (bypasses builder)"
|
|
1628
|
+
},
|
|
1629
|
+
table: {
|
|
1630
|
+
type: "string",
|
|
1631
|
+
description: "Analytics table for --sql (default: pages)"
|
|
1632
|
+
},
|
|
1633
|
+
live: {
|
|
1634
|
+
type: "boolean",
|
|
1635
|
+
default: false,
|
|
1636
|
+
description: "Bypass local store; hit the GSC API directly"
|
|
1637
|
+
},
|
|
1354
1638
|
quiet: {
|
|
1355
1639
|
type: "boolean",
|
|
1356
1640
|
alias: "q",
|
|
@@ -1365,291 +1649,1150 @@ const queryCommand = defineCommand({
|
|
|
1365
1649
|
}
|
|
1366
1650
|
},
|
|
1367
1651
|
async run({ args }) {
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1652
|
+
if (args.sql) {
|
|
1653
|
+
await runRawSqlMode({
|
|
1654
|
+
sql: String(args.sql),
|
|
1655
|
+
site: args.site ? String(args.site) : void 0,
|
|
1656
|
+
table: args.table ? String(args.table) : "pages",
|
|
1657
|
+
output: args.output ? String(args.output) : void 0,
|
|
1658
|
+
quiet: Boolean(args.quiet)
|
|
1659
|
+
});
|
|
1660
|
+
return;
|
|
1661
|
+
}
|
|
1662
|
+
const dimNames = await resolveDimensions(args);
|
|
1663
|
+
const { startDate, endDate } = await resolveRange(args);
|
|
1664
|
+
const rowLimit = Number.parseInt(String(args.limit), 10);
|
|
1665
|
+
const format = String(args.format);
|
|
1666
|
+
const ctx = await createCommandContext({
|
|
1667
|
+
needsAuth: true,
|
|
1668
|
+
needsStore: !args.live,
|
|
1669
|
+
interactive: Boolean(args.interactive)
|
|
1670
|
+
});
|
|
1671
|
+
const siteUrl = await ctx.resolveSite(args.site ? String(args.site) : void 0);
|
|
1672
|
+
if (args.live) {
|
|
1673
|
+
if (!args.quiet) logger.info(`Querying ${siteUrl} via live GSC API...`);
|
|
1674
|
+
const result = await runLiveQuery(ctx.client, siteUrl, {
|
|
1675
|
+
startDate,
|
|
1676
|
+
endDate,
|
|
1677
|
+
dimensions: dimNames,
|
|
1678
|
+
rowLimit
|
|
1679
|
+
}).catch((e) => {
|
|
1680
|
+
logger.error(`Query failed: ${e.message}`);
|
|
1379
1681
|
process.exit(1);
|
|
1380
|
-
}
|
|
1381
|
-
const selected = await select({
|
|
1382
|
-
message: "Select a site",
|
|
1383
|
-
options: verified.map((s) => ({
|
|
1384
|
-
value: s.siteUrl,
|
|
1385
|
-
label: s.siteUrl
|
|
1386
|
-
})),
|
|
1387
|
-
initialValue: siteUrl || verified[0]?.siteUrl
|
|
1388
1682
|
});
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
})),
|
|
1404
|
-
initialValues: ["page", "query"]
|
|
1683
|
+
await writeOutput({
|
|
1684
|
+
output: {
|
|
1685
|
+
siteUrl,
|
|
1686
|
+
dimensions: dimNames,
|
|
1687
|
+
dateRange: {
|
|
1688
|
+
start: startDate,
|
|
1689
|
+
end: endDate
|
|
1690
|
+
},
|
|
1691
|
+
total: result.rows.length,
|
|
1692
|
+
data: result.rows
|
|
1693
|
+
},
|
|
1694
|
+
format,
|
|
1695
|
+
path: args.output ? String(args.output) : void 0,
|
|
1696
|
+
quiet: Boolean(args.quiet)
|
|
1405
1697
|
});
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1698
|
+
return;
|
|
1699
|
+
}
|
|
1700
|
+
if (!args.quiet) logger.info(`Querying ${siteUrl} from local Parquet store...`);
|
|
1701
|
+
const state = buildLocalState(dimNames, startDate, endDate, rowLimit);
|
|
1702
|
+
const store = ctx.store;
|
|
1703
|
+
const table = inferTable(dimNames);
|
|
1704
|
+
await assertRangeCovered(store, siteUrl, table, startDate, endDate);
|
|
1705
|
+
const result = await store.engine.query({
|
|
1706
|
+
userId: store.userId,
|
|
1707
|
+
siteId: store.siteIdFor(siteUrl),
|
|
1708
|
+
table
|
|
1709
|
+
}, state).catch((e) => {
|
|
1710
|
+
logger.error(`Query failed: ${e.message}`);
|
|
1711
|
+
process.exit(1);
|
|
1712
|
+
});
|
|
1713
|
+
await writeOutput({
|
|
1714
|
+
output: {
|
|
1715
|
+
siteUrl,
|
|
1716
|
+
dimensions: dimNames,
|
|
1717
|
+
dateRange: {
|
|
1718
|
+
start: startDate,
|
|
1719
|
+
end: endDate
|
|
1720
|
+
},
|
|
1721
|
+
total: result.rows.length,
|
|
1722
|
+
data: result.rows
|
|
1723
|
+
},
|
|
1724
|
+
format,
|
|
1725
|
+
path: args.output ? String(args.output) : void 0,
|
|
1726
|
+
quiet: Boolean(args.quiet)
|
|
1727
|
+
});
|
|
1728
|
+
}
|
|
1729
|
+
});
|
|
1730
|
+
async function resolveDimensions(args) {
|
|
1731
|
+
if (args.dimensions) return String(args.dimensions).split(",").filter((d) => DIMENSIONS.includes(d));
|
|
1732
|
+
if (args.interactive) {
|
|
1733
|
+
const selected = await multiselect({
|
|
1734
|
+
message: "Select dimensions",
|
|
1735
|
+
options: DIMENSIONS.map((d) => ({
|
|
1736
|
+
value: d,
|
|
1737
|
+
label: d
|
|
1738
|
+
})),
|
|
1739
|
+
initialValues: ["page", "query"]
|
|
1740
|
+
});
|
|
1741
|
+
if (isCancel(selected)) {
|
|
1742
|
+
cancel("Cancelled");
|
|
1743
|
+
process.exit(0);
|
|
1744
|
+
}
|
|
1745
|
+
return selected;
|
|
1746
|
+
}
|
|
1747
|
+
return ["page", "query"];
|
|
1748
|
+
}
|
|
1749
|
+
async function resolveRange(args) {
|
|
1750
|
+
if (args.start && args.end) return {
|
|
1751
|
+
startDate: String(args.start),
|
|
1752
|
+
endDate: String(args.end)
|
|
1753
|
+
};
|
|
1754
|
+
if (args.interactive) {
|
|
1755
|
+
const startInput = await text({
|
|
1756
|
+
message: "Start date (YYYY-MM-DD)",
|
|
1757
|
+
placeholder: daysAgo(28)
|
|
1758
|
+
});
|
|
1759
|
+
if (isCancel(startInput)) {
|
|
1760
|
+
cancel("Cancelled");
|
|
1761
|
+
process.exit(0);
|
|
1762
|
+
}
|
|
1763
|
+
const endInput = await text({
|
|
1764
|
+
message: "End date (YYYY-MM-DD)",
|
|
1765
|
+
placeholder: daysAgo(3)
|
|
1766
|
+
});
|
|
1767
|
+
if (isCancel(endInput)) {
|
|
1768
|
+
cancel("Cancelled");
|
|
1769
|
+
process.exit(0);
|
|
1770
|
+
}
|
|
1771
|
+
return {
|
|
1772
|
+
startDate: String(startInput) || daysAgo(28),
|
|
1773
|
+
endDate: String(endInput) || daysAgo(3)
|
|
1774
|
+
};
|
|
1775
|
+
}
|
|
1776
|
+
return {
|
|
1777
|
+
startDate: daysAgo(31),
|
|
1778
|
+
endDate: daysAgo(3)
|
|
1779
|
+
};
|
|
1780
|
+
}
|
|
1781
|
+
function buildLocalState(dimNames, startDate, endDate, rowLimit) {
|
|
1782
|
+
const dims = dimNames.map((d) => DIM_COLUMNS[d]).filter((c) => Boolean(c));
|
|
1783
|
+
return gsc.select(...dims).where(between(date, startDate, endDate)).limit(rowLimit).getState();
|
|
1784
|
+
}
|
|
1785
|
+
async function assertRangeCovered(store, siteUrl, table, startDate, endDate) {
|
|
1786
|
+
const wm = (await store.engine.getWatermarks({
|
|
1787
|
+
userId: store.userId,
|
|
1788
|
+
siteId: store.siteIdFor(siteUrl),
|
|
1789
|
+
table
|
|
1790
|
+
}))[0];
|
|
1791
|
+
if (!wm) {
|
|
1792
|
+
logger.error(`No data synced for ${siteUrl} / ${table}. Run \`gscdump sync\` first, or pass --live.`);
|
|
1793
|
+
process.exit(1);
|
|
1794
|
+
}
|
|
1795
|
+
if (endDate > wm.newestDateSynced) {
|
|
1796
|
+
logger.error(`Requested end=${endDate} is newer than last sync (${wm.newestDateSynced}). Run \`gscdump sync\` first, or pass --live.`);
|
|
1797
|
+
process.exit(1);
|
|
1798
|
+
}
|
|
1799
|
+
if (startDate < wm.oldestDateSynced) {
|
|
1800
|
+
logger.error(`Requested start=${startDate} is older than first sync (${wm.oldestDateSynced}). Run \`gscdump sync --start=${startDate}\` first, or pass --live.`);
|
|
1801
|
+
process.exit(1);
|
|
1802
|
+
}
|
|
1803
|
+
}
|
|
1804
|
+
async function runRawSqlMode(opts) {
|
|
1805
|
+
if (!isKnownTable$1(opts.table)) {
|
|
1806
|
+
logger.error(`Unknown table "${opts.table}". Known: ${allTables().join(", ")}`);
|
|
1807
|
+
process.exit(1);
|
|
1808
|
+
}
|
|
1809
|
+
const ctx = await createCommandContext({
|
|
1810
|
+
needsAuth: true,
|
|
1811
|
+
needsStore: true
|
|
1812
|
+
});
|
|
1813
|
+
const siteUrl = await ctx.resolveSite(opts.site);
|
|
1814
|
+
const store = ctx.store;
|
|
1815
|
+
if (!opts.quiet) logger.info(`Running raw SQL over table "${opts.table}" for ${siteUrl}`);
|
|
1816
|
+
const { rows, sql } = await store.runRawSql({
|
|
1817
|
+
sql: opts.sql,
|
|
1818
|
+
siteUrl,
|
|
1819
|
+
table: opts.table
|
|
1820
|
+
}).catch((e) => {
|
|
1821
|
+
logger.error(`SQL failed: ${e.message}`);
|
|
1822
|
+
process.exit(1);
|
|
1823
|
+
});
|
|
1824
|
+
const payload = JSON.stringify({
|
|
1825
|
+
sql,
|
|
1826
|
+
total: rows.length,
|
|
1827
|
+
data: rows
|
|
1828
|
+
}, null, 2);
|
|
1829
|
+
if (opts.output) {
|
|
1830
|
+
await fs.writeFile(opts.output, payload);
|
|
1831
|
+
if (!opts.quiet) logger.info(`Written to ${opts.output}`);
|
|
1832
|
+
} else console.log(payload);
|
|
1833
|
+
}
|
|
1834
|
+
async function writeOutput(opts) {
|
|
1835
|
+
const content = opts.format === "csv" ? exportToCSV(opts.output) : JSON.stringify(opts.output, null, 2);
|
|
1836
|
+
if (opts.path) {
|
|
1837
|
+
await fs.writeFile(opts.path, content);
|
|
1838
|
+
if (!opts.quiet) logger.info(`Written to ${opts.path}`);
|
|
1839
|
+
} else console.log(content);
|
|
1840
|
+
}
|
|
1841
|
+
function isKnownTable$1(name) {
|
|
1842
|
+
return allTables().includes(name);
|
|
1843
|
+
}
|
|
1844
|
+
function requireSite(target) {
|
|
1845
|
+
if (!target) {
|
|
1846
|
+
logger.error("Site URL required (-s)");
|
|
1847
|
+
process.exit(1);
|
|
1848
|
+
}
|
|
1849
|
+
return target;
|
|
1850
|
+
}
|
|
1851
|
+
const sitemapsCommand = defineCommand({
|
|
1852
|
+
meta: {
|
|
1853
|
+
name: "sitemaps",
|
|
1854
|
+
description: "Manage sitemaps"
|
|
1855
|
+
},
|
|
1856
|
+
subCommands: {
|
|
1857
|
+
list: defineCommand({
|
|
1858
|
+
meta: {
|
|
1859
|
+
name: "list",
|
|
1860
|
+
description: "List sitemaps for a site"
|
|
1861
|
+
},
|
|
1862
|
+
args: {
|
|
1863
|
+
site: {
|
|
1864
|
+
type: "string",
|
|
1865
|
+
alias: "s",
|
|
1866
|
+
description: "Site URL (e.g., sc-domain:example.com or https://example.com/)"
|
|
1867
|
+
},
|
|
1868
|
+
json: {
|
|
1869
|
+
type: "boolean",
|
|
1870
|
+
default: false,
|
|
1871
|
+
description: "Output as JSON"
|
|
1872
|
+
}
|
|
1873
|
+
},
|
|
1874
|
+
async run({ args }) {
|
|
1875
|
+
const config = await loadConfig();
|
|
1876
|
+
const siteUrl = requireSite(args.site || config.defaultSite);
|
|
1877
|
+
const sitemaps = (await (await createCommandContext({ needsAuth: true })).client.sitemaps.list(siteUrl).catch((e) => {
|
|
1878
|
+
logger.error(`Failed to fetch sitemaps: ${e.message}`);
|
|
1879
|
+
process.exit(1);
|
|
1880
|
+
})).map((sm) => ({
|
|
1881
|
+
path: sm.path,
|
|
1882
|
+
type: sm.type || void 0,
|
|
1883
|
+
isPending: sm.isPending || false,
|
|
1884
|
+
errors: Number(sm.errors) || 0,
|
|
1885
|
+
warnings: Number(sm.warnings) || 0,
|
|
1886
|
+
lastDownloaded: sm.lastDownloaded || null
|
|
1887
|
+
}));
|
|
1888
|
+
if (args.json) {
|
|
1889
|
+
console.log(JSON.stringify(sitemaps, null, 2));
|
|
1890
|
+
return;
|
|
1891
|
+
}
|
|
1892
|
+
if (sitemaps.length === 0) {
|
|
1893
|
+
logger.warn("No sitemaps found");
|
|
1894
|
+
return;
|
|
1895
|
+
}
|
|
1896
|
+
logger.success(`Found ${sitemaps.length} sitemaps:`);
|
|
1897
|
+
console.log();
|
|
1898
|
+
for (const sm of sitemaps) {
|
|
1899
|
+
const pending = sm.isPending ? " \x1B[33m(pending)\x1B[0m" : "";
|
|
1900
|
+
const errors = sm.errors ? ` \x1B[31m${sm.errors} errors\x1B[0m` : "";
|
|
1901
|
+
const warnings = sm.warnings ? ` \x1B[33m${sm.warnings} warnings\x1B[0m` : "";
|
|
1902
|
+
console.log(` ${sm.path}${pending}${errors}${warnings}`);
|
|
1903
|
+
}
|
|
1409
1904
|
}
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1905
|
+
}),
|
|
1906
|
+
get: defineCommand({
|
|
1907
|
+
meta: {
|
|
1908
|
+
name: "get",
|
|
1909
|
+
description: "Get details for a specific sitemap"
|
|
1910
|
+
},
|
|
1911
|
+
args: {
|
|
1912
|
+
site: {
|
|
1913
|
+
type: "string",
|
|
1914
|
+
alias: "s",
|
|
1915
|
+
required: true,
|
|
1916
|
+
description: "Site URL"
|
|
1917
|
+
},
|
|
1918
|
+
url: {
|
|
1919
|
+
type: "positional",
|
|
1920
|
+
required: true,
|
|
1921
|
+
description: "Sitemap URL"
|
|
1922
|
+
},
|
|
1923
|
+
json: {
|
|
1924
|
+
type: "boolean",
|
|
1925
|
+
default: false,
|
|
1926
|
+
description: "Output as JSON"
|
|
1927
|
+
}
|
|
1928
|
+
},
|
|
1929
|
+
async run({ args }) {
|
|
1930
|
+
const client = (await createCommandContext({ needsAuth: true })).client;
|
|
1931
|
+
const sitemap = await fetchSitemap(client, args.site, args.url).catch(gscErrorHandler);
|
|
1932
|
+
if (args.json) {
|
|
1933
|
+
console.log(JSON.stringify(sitemap, null, 2));
|
|
1934
|
+
return;
|
|
1935
|
+
}
|
|
1936
|
+
console.log();
|
|
1937
|
+
console.log(` \x1B[1mPath:\x1B[0m ${sitemap.path}`);
|
|
1938
|
+
console.log(` \x1B[1mType:\x1B[0m ${sitemap.type || "sitemap"}`);
|
|
1939
|
+
console.log(` \x1B[1mLast Submitted:\x1B[0m ${sitemap.lastSubmitted || "N/A"}`);
|
|
1940
|
+
console.log(` \x1B[1mLast Downloaded:\x1B[0m ${sitemap.lastDownloaded || "N/A"}`);
|
|
1941
|
+
console.log(` \x1B[1mPending:\x1B[0m ${sitemap.isPending ? "Yes" : "No"}`);
|
|
1942
|
+
console.log(` \x1B[1mErrors:\x1B[0m ${sitemap.errors || 0}`);
|
|
1943
|
+
console.log(` \x1B[1mWarnings:\x1B[0m ${sitemap.warnings || 0}`);
|
|
1944
|
+
if (sitemap.contents?.length) {
|
|
1945
|
+
console.log();
|
|
1946
|
+
console.log(" \x1B[1mContents:\x1B[0m");
|
|
1947
|
+
for (const c of sitemap.contents) console.log(` ${c.type}: ${c.submitted} submitted, ${c.indexed} indexed`);
|
|
1948
|
+
}
|
|
1425
1949
|
}
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1950
|
+
}),
|
|
1951
|
+
submit: defineCommand({
|
|
1952
|
+
meta: {
|
|
1953
|
+
name: "submit",
|
|
1954
|
+
description: "Submit a sitemap to GSC"
|
|
1955
|
+
},
|
|
1956
|
+
args: {
|
|
1957
|
+
site: {
|
|
1958
|
+
type: "string",
|
|
1959
|
+
alias: "s",
|
|
1960
|
+
required: true,
|
|
1961
|
+
description: "Site URL"
|
|
1962
|
+
},
|
|
1963
|
+
url: {
|
|
1964
|
+
type: "positional",
|
|
1965
|
+
required: true,
|
|
1966
|
+
description: "Sitemap URL to submit"
|
|
1967
|
+
}
|
|
1968
|
+
},
|
|
1969
|
+
async run({ args }) {
|
|
1970
|
+
await (await createCommandContext({ needsAuth: true })).client.sitemaps.submit(args.site, args.url).catch((e) => {
|
|
1971
|
+
logger.error(`Submit failed: ${e.message}`);
|
|
1972
|
+
process.exit(1);
|
|
1973
|
+
});
|
|
1974
|
+
logger.success(`Submitted sitemap: ${args.url}`);
|
|
1433
1975
|
}
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
|
|
1976
|
+
}),
|
|
1977
|
+
delete: defineCommand({
|
|
1978
|
+
meta: {
|
|
1979
|
+
name: "delete",
|
|
1980
|
+
description: "Delete a sitemap from GSC"
|
|
1981
|
+
},
|
|
1982
|
+
args: {
|
|
1983
|
+
site: {
|
|
1984
|
+
type: "string",
|
|
1985
|
+
alias: "s",
|
|
1986
|
+
required: true,
|
|
1987
|
+
description: "Site URL"
|
|
1988
|
+
},
|
|
1989
|
+
url: {
|
|
1990
|
+
type: "positional",
|
|
1991
|
+
required: true,
|
|
1992
|
+
description: "Sitemap URL to delete"
|
|
1993
|
+
}
|
|
1994
|
+
},
|
|
1995
|
+
async run({ args }) {
|
|
1996
|
+
await (await createCommandContext({ needsAuth: true })).client.sitemaps.delete(args.site, args.url).catch((e) => {
|
|
1997
|
+
logger.error(`Delete failed: ${e.message}`);
|
|
1998
|
+
process.exit(1);
|
|
1999
|
+
});
|
|
2000
|
+
logger.success(`Deleted sitemap: ${args.url}`);
|
|
1450
2001
|
}
|
|
2002
|
+
})
|
|
2003
|
+
}
|
|
2004
|
+
});
|
|
2005
|
+
const sitesCommand = defineCommand({
|
|
2006
|
+
meta: {
|
|
2007
|
+
name: "sites",
|
|
2008
|
+
description: "List available GSC sites"
|
|
2009
|
+
},
|
|
2010
|
+
args: { json: {
|
|
2011
|
+
type: "boolean",
|
|
2012
|
+
default: false,
|
|
2013
|
+
description: "Output as JSON for scripting"
|
|
2014
|
+
} },
|
|
2015
|
+
async run({ args }) {
|
|
2016
|
+
const sites = await (await createCommandContext({ needsAuth: true })).loadSites();
|
|
2017
|
+
if (args.json) {
|
|
2018
|
+
console.log(JSON.stringify(sites, null, 2));
|
|
2019
|
+
return;
|
|
2020
|
+
}
|
|
2021
|
+
if (sites.length === 0) {
|
|
2022
|
+
logger.warn("No verified sites found");
|
|
2023
|
+
return;
|
|
2024
|
+
}
|
|
2025
|
+
logger.success(`Found ${sites.length} sites:`);
|
|
2026
|
+
console.log();
|
|
2027
|
+
for (const site of sites) {
|
|
2028
|
+
const perm = site.permissionLevel === "siteOwner" ? "\x1B[32m" : "\x1B[90m";
|
|
2029
|
+
console.log(` ${site.siteUrl} ${perm}(${site.permissionLevel})\x1B[0m`);
|
|
1451
2030
|
}
|
|
1452
|
-
if (!args.quiet) {
|
|
1453
|
-
clearLine();
|
|
1454
|
-
logger.success(`Fetched ${rows.length} rows`);
|
|
1455
|
-
}
|
|
1456
|
-
const output = {
|
|
1457
|
-
siteUrl,
|
|
1458
|
-
dimensions: dimensions.map((d) => String(d)),
|
|
1459
|
-
dateRange: {
|
|
1460
|
-
start: startDate,
|
|
1461
|
-
end: endDate
|
|
1462
|
-
},
|
|
1463
|
-
total: rows.length,
|
|
1464
|
-
data: rows
|
|
1465
|
-
};
|
|
1466
|
-
const content = format === "csv" ? exportToCSV(output) : JSON.stringify(output, null, 2);
|
|
1467
|
-
if (args.output) {
|
|
1468
|
-
await fs.writeFile(String(args.output), content);
|
|
1469
|
-
if (!args.quiet) logger.info(`Written to ${args.output}`);
|
|
1470
|
-
} else console.log(content);
|
|
1471
2031
|
}
|
|
1472
2032
|
});
|
|
1473
|
-
|
|
1474
|
-
//#endregion
|
|
1475
|
-
//#region src/commands/sitemaps.ts
|
|
1476
|
-
const listCommand = defineCommand({
|
|
2033
|
+
const compactCommand = defineCommand({
|
|
1477
2034
|
meta: {
|
|
1478
|
-
name: "
|
|
1479
|
-
description: "
|
|
2035
|
+
name: "compact",
|
|
2036
|
+
description: "Run tiered compaction (raw→d7 at 7d, d7→d30 at 30d, d30→d90 at 90d)"
|
|
1480
2037
|
},
|
|
1481
2038
|
args: {
|
|
1482
|
-
site: {
|
|
2039
|
+
"site": {
|
|
1483
2040
|
type: "string",
|
|
1484
2041
|
alias: "s",
|
|
1485
|
-
|
|
1486
|
-
description: "Site URL (e.g., sc-domain:example.com or https://example.com/)"
|
|
2042
|
+
description: "Restrict to a single site (default: all sites with local data)"
|
|
1487
2043
|
},
|
|
1488
|
-
|
|
2044
|
+
"raw-days": {
|
|
2045
|
+
type: "string",
|
|
2046
|
+
description: "Override raw→d7 age threshold in days (default: 7)"
|
|
2047
|
+
},
|
|
2048
|
+
"d7-days": {
|
|
2049
|
+
type: "string",
|
|
2050
|
+
description: "Override d7→d30 age threshold in days (default: 30)"
|
|
2051
|
+
},
|
|
2052
|
+
"d30-days": {
|
|
2053
|
+
type: "string",
|
|
2054
|
+
description: "Override d30→d90 age threshold in days (default: 90)"
|
|
2055
|
+
},
|
|
2056
|
+
"quiet": {
|
|
1489
2057
|
type: "boolean",
|
|
2058
|
+
alias: "q",
|
|
1490
2059
|
default: false,
|
|
1491
|
-
description: "
|
|
2060
|
+
description: "Suppress progress output"
|
|
1492
2061
|
}
|
|
1493
2062
|
},
|
|
1494
2063
|
async run({ args }) {
|
|
1495
|
-
const
|
|
1496
|
-
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
if (
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
|
|
1504
|
-
|
|
1505
|
-
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
const
|
|
1509
|
-
const
|
|
1510
|
-
|
|
2064
|
+
const store = (await createCommandContext({ needsStore: true })).store;
|
|
2065
|
+
const siteId = args.site ? store.siteIdFor(String(args.site)) : void 0;
|
|
2066
|
+
const quiet = Boolean(args.quiet);
|
|
2067
|
+
const thresholds = {};
|
|
2068
|
+
if (args["raw-days"]) thresholds.raw = Number(args["raw-days"]);
|
|
2069
|
+
if (args["d7-days"]) thresholds.d7 = Number(args["d7-days"]);
|
|
2070
|
+
if (args["d30-days"]) thresholds.d30 = Number(args["d30-days"]);
|
|
2071
|
+
for (const table of allTables()) {
|
|
2072
|
+
const entries = await store.engine.listLive({
|
|
2073
|
+
userId: store.userId,
|
|
2074
|
+
siteId,
|
|
2075
|
+
table
|
|
2076
|
+
});
|
|
2077
|
+
const siteIds = new Set(entries.map((e) => e.siteId));
|
|
2078
|
+
for (const targetSite of siteIds) {
|
|
2079
|
+
if (!quiet) logger.info(`Compacting ${table} [${targetSite ?? "-"}] (raw→d7→d30→d90)`);
|
|
2080
|
+
await store.engine.compactTiered({
|
|
2081
|
+
userId: store.userId,
|
|
2082
|
+
siteId: targetSite,
|
|
2083
|
+
table
|
|
2084
|
+
}, thresholds);
|
|
2085
|
+
}
|
|
1511
2086
|
}
|
|
2087
|
+
if (!quiet) logger.success(`compact: done`);
|
|
1512
2088
|
}
|
|
1513
2089
|
});
|
|
1514
|
-
|
|
2090
|
+
async function exportToDuckDB(opts) {
|
|
2091
|
+
const outPath = path.resolve(opts.outPath);
|
|
2092
|
+
if (opts.force) await rm(outPath, { force: true });
|
|
2093
|
+
const instance = await DuckDBInstance.create(outPath);
|
|
2094
|
+
const conn = await instance.connect();
|
|
2095
|
+
const tables = [];
|
|
2096
|
+
try {
|
|
2097
|
+
for (const table of allTables()) {
|
|
2098
|
+
const entries = await opts.engine.listLive({
|
|
2099
|
+
userId: opts.userId,
|
|
2100
|
+
siteId: opts.siteId,
|
|
2101
|
+
table
|
|
2102
|
+
});
|
|
2103
|
+
if (entries.length === 0) continue;
|
|
2104
|
+
const fileList = entries.map((e) => path.join(opts.dataDir, e.objectKey)).map((p) => `'${sqlEscape(p)}'`).join(", ");
|
|
2105
|
+
await conn.run(`CREATE OR REPLACE TABLE ${table} AS SELECT * FROM read_parquet([${fileList}], union_by_name=true)`);
|
|
2106
|
+
const rows = (await conn.runAndReadAll(`SELECT count(*)::BIGINT AS n FROM ${table}`)).getRowObjects();
|
|
2107
|
+
const rowCount = Number(rows[0]?.n ?? 0);
|
|
2108
|
+
tables.push({
|
|
2109
|
+
table,
|
|
2110
|
+
files: entries.length,
|
|
2111
|
+
rows: rowCount
|
|
2112
|
+
});
|
|
2113
|
+
}
|
|
2114
|
+
} finally {
|
|
2115
|
+
conn.closeSync();
|
|
2116
|
+
instance.closeSync();
|
|
2117
|
+
}
|
|
2118
|
+
return {
|
|
2119
|
+
outPath,
|
|
2120
|
+
tables,
|
|
2121
|
+
totalRows: tables.reduce((acc, t) => acc + t.rows, 0)
|
|
2122
|
+
};
|
|
2123
|
+
}
|
|
2124
|
+
const exportCommand = defineCommand({
|
|
1515
2125
|
meta: {
|
|
1516
|
-
name: "
|
|
1517
|
-
description: "
|
|
2126
|
+
name: "export",
|
|
2127
|
+
description: "Pack live Parquet partitions into a single .duckdb file for portable distribution (browser attach, CDN serving, etc.)"
|
|
1518
2128
|
},
|
|
1519
2129
|
args: {
|
|
1520
|
-
|
|
2130
|
+
out: {
|
|
1521
2131
|
type: "string",
|
|
1522
|
-
alias: "s",
|
|
1523
2132
|
required: true,
|
|
1524
|
-
description: "
|
|
2133
|
+
description: "Output path for the .duckdb file"
|
|
1525
2134
|
},
|
|
1526
|
-
|
|
1527
|
-
type: "
|
|
1528
|
-
|
|
1529
|
-
description: "Sitemap URL"
|
|
2135
|
+
site: {
|
|
2136
|
+
type: "string",
|
|
2137
|
+
description: "Limit export to a single site URL (omit to include all)"
|
|
1530
2138
|
},
|
|
1531
|
-
|
|
2139
|
+
force: {
|
|
1532
2140
|
type: "boolean",
|
|
1533
2141
|
default: false,
|
|
1534
|
-
description: "
|
|
2142
|
+
description: "Overwrite the output file if it already exists"
|
|
1535
2143
|
}
|
|
1536
2144
|
},
|
|
1537
2145
|
async run({ args }) {
|
|
1538
|
-
const
|
|
1539
|
-
|
|
1540
|
-
|
|
2146
|
+
const store = (await createCommandContext({ needsStore: true })).store;
|
|
2147
|
+
const siteId = args.site ? store.siteIdFor(args.site) : void 0;
|
|
2148
|
+
const result = await exportToDuckDB({
|
|
2149
|
+
engine: store.engine,
|
|
2150
|
+
dataDir: store.dataDir,
|
|
2151
|
+
userId: store.userId,
|
|
2152
|
+
siteId,
|
|
2153
|
+
outPath: args.out,
|
|
2154
|
+
force: args.force
|
|
2155
|
+
});
|
|
2156
|
+
if (result.tables.length === 0) {
|
|
2157
|
+
console.log(`\n No data to export. Run \`gscdump sync\` first.`);
|
|
1541
2158
|
return;
|
|
1542
2159
|
}
|
|
1543
|
-
console.log();
|
|
1544
|
-
console.log(
|
|
1545
|
-
console.log(
|
|
1546
|
-
console.log(` \x1B[
|
|
1547
|
-
console.log(` \x1B[1mLast Downloaded:\x1B[0m ${sitemap.lastDownloaded || "N/A"}`);
|
|
1548
|
-
console.log(` \x1B[1mPending:\x1B[0m ${sitemap.isPending ? "Yes" : "No"}`);
|
|
1549
|
-
console.log(` \x1B[1mErrors:\x1B[0m ${sitemap.errors || 0}`);
|
|
1550
|
-
console.log(` \x1B[1mWarnings:\x1B[0m ${sitemap.warnings || 0}`);
|
|
1551
|
-
if (sitemap.contents?.length) {
|
|
1552
|
-
console.log();
|
|
1553
|
-
console.log(" \x1B[1mContents:\x1B[0m");
|
|
1554
|
-
for (const c of sitemap.contents) console.log(` ${c.type}: ${c.submitted} submitted, ${c.indexed} indexed`);
|
|
1555
|
-
}
|
|
2160
|
+
for (const t of result.tables) console.log(` ${t.table.padEnd(15)} ${String(t.files).padStart(4)} parquet → ${t.table} (${t.rows.toLocaleString()} rows)`);
|
|
2161
|
+
console.log(`\n Exported ${result.tables.length} table(s), ${result.totalRows.toLocaleString()} rows → ${result.outPath}`);
|
|
2162
|
+
console.log(`\n Attach from DuckDB: \x1B[36mATTACH '${result.outPath}' AS gsc (READ_ONLY); SELECT * FROM gsc.pages LIMIT 10;\x1B[0m`);
|
|
2163
|
+
console.log(` Attach in a browser: use DuckDB-WASM registerFileBuffer + \x1B[36mATTACH 'gsc.duckdb' AS gsc (READ_ONLY)\x1B[0m`);
|
|
1556
2164
|
}
|
|
1557
2165
|
});
|
|
1558
|
-
const
|
|
2166
|
+
const DEFAULT_GRACE_HOURS = 24;
|
|
2167
|
+
const gcCommand = defineCommand({
|
|
1559
2168
|
meta: {
|
|
1560
|
-
name: "
|
|
1561
|
-
description: "
|
|
2169
|
+
name: "gc",
|
|
2170
|
+
description: "Delete orphaned object-store files not referenced by any manifest entry"
|
|
1562
2171
|
},
|
|
1563
2172
|
args: {
|
|
1564
|
-
|
|
2173
|
+
"grace-hours": {
|
|
2174
|
+
type: "string",
|
|
2175
|
+
default: String(DEFAULT_GRACE_HOURS),
|
|
2176
|
+
description: `Spare orphans younger than this (default: ${DEFAULT_GRACE_HOURS}h)`
|
|
2177
|
+
},
|
|
2178
|
+
"site": {
|
|
1565
2179
|
type: "string",
|
|
1566
2180
|
alias: "s",
|
|
1567
|
-
|
|
1568
|
-
description: "Site URL"
|
|
2181
|
+
description: "Restrict to a single site (default: all sites)"
|
|
1569
2182
|
},
|
|
1570
|
-
|
|
1571
|
-
type: "
|
|
1572
|
-
|
|
1573
|
-
|
|
2183
|
+
"quiet": {
|
|
2184
|
+
type: "boolean",
|
|
2185
|
+
alias: "q",
|
|
2186
|
+
default: false,
|
|
2187
|
+
description: "Suppress progress output"
|
|
1574
2188
|
}
|
|
1575
2189
|
},
|
|
1576
2190
|
async run({ args }) {
|
|
1577
|
-
|
|
1578
|
-
|
|
2191
|
+
const store = (await createCommandContext({ needsStore: true })).store;
|
|
2192
|
+
const siteId = args.site ? store.siteIdFor(String(args.site)) : void 0;
|
|
2193
|
+
const quiet = Boolean(args.quiet);
|
|
2194
|
+
const graceMs = Number(args["grace-hours"]) * 36e5;
|
|
2195
|
+
const result = await store.engine.gcOrphans({
|
|
2196
|
+
userId: store.userId,
|
|
2197
|
+
siteId
|
|
2198
|
+
}, graceMs);
|
|
2199
|
+
if (!quiet) logger.success(`gc: deleted ${result.deleted} orphan file(s)`);
|
|
1579
2200
|
}
|
|
1580
2201
|
});
|
|
1581
|
-
const
|
|
2202
|
+
const rollupsCommand = defineCommand({
|
|
2203
|
+
meta: {
|
|
2204
|
+
name: "rollups",
|
|
2205
|
+
description: "Manage post-sync rollups"
|
|
2206
|
+
},
|
|
2207
|
+
subCommands: { rebuild: defineCommand({
|
|
2208
|
+
meta: {
|
|
2209
|
+
name: "rebuild",
|
|
2210
|
+
description: "Rebuild post-sync rollups (daily totals, weekly totals, top-N tables) for a site"
|
|
2211
|
+
},
|
|
2212
|
+
args: {
|
|
2213
|
+
site: {
|
|
2214
|
+
type: "string",
|
|
2215
|
+
alias: "s",
|
|
2216
|
+
description: "Restrict to a single site (default: all sites with local data)"
|
|
2217
|
+
},
|
|
2218
|
+
quiet: {
|
|
2219
|
+
type: "boolean",
|
|
2220
|
+
alias: "q",
|
|
2221
|
+
default: false,
|
|
2222
|
+
description: "Suppress progress output"
|
|
2223
|
+
}
|
|
2224
|
+
},
|
|
2225
|
+
async run({ args }) {
|
|
2226
|
+
const store = (await createCommandContext({ needsStore: true })).store;
|
|
2227
|
+
const explicitSiteId = args.site ? store.siteIdFor(String(args.site)) : void 0;
|
|
2228
|
+
const quiet = Boolean(args.quiet);
|
|
2229
|
+
const allSiteIds = /* @__PURE__ */ new Set();
|
|
2230
|
+
if (explicitSiteId) allSiteIds.add(explicitSiteId);
|
|
2231
|
+
else for (const table of allTables()) {
|
|
2232
|
+
const entries = await store.engine.listLive({
|
|
2233
|
+
userId: store.userId,
|
|
2234
|
+
table
|
|
2235
|
+
});
|
|
2236
|
+
for (const e of entries) if (e.siteId) allSiteIds.add(e.siteId);
|
|
2237
|
+
}
|
|
2238
|
+
if (allSiteIds.size === 0) {
|
|
2239
|
+
logger.warn("No sites with local data. Run `gscdump sync` first.");
|
|
2240
|
+
return;
|
|
2241
|
+
}
|
|
2242
|
+
let totalBytes = 0;
|
|
2243
|
+
for (const siteId of allSiteIds) {
|
|
2244
|
+
if (!quiet) logger.info(`Rebuilding rollups for [${siteId}] (${DEFAULT_ROLLUPS.length} rollups)`);
|
|
2245
|
+
const results = await rebuildRollups({
|
|
2246
|
+
engine: store.engine,
|
|
2247
|
+
dataSource: store.dataSource,
|
|
2248
|
+
ctx: {
|
|
2249
|
+
userId: store.userId,
|
|
2250
|
+
siteId
|
|
2251
|
+
},
|
|
2252
|
+
defs: DEFAULT_ROLLUPS
|
|
2253
|
+
});
|
|
2254
|
+
for (const r of results) {
|
|
2255
|
+
totalBytes += r.bytes;
|
|
2256
|
+
if (!quiet) console.log(` ${r.id.padEnd(20)} ${(r.bytes / 1024).toFixed(1).padStart(8)} KB ${r.objectKey}`);
|
|
2257
|
+
}
|
|
2258
|
+
}
|
|
2259
|
+
if (!quiet) logger.success(`Rebuilt rollups across ${allSiteIds.size} site(s) — total ${(totalBytes / 1024).toFixed(1)} KB`);
|
|
2260
|
+
}
|
|
2261
|
+
}) }
|
|
2262
|
+
});
|
|
2263
|
+
const statsCommand = defineCommand({
|
|
1582
2264
|
meta: {
|
|
1583
|
-
name: "
|
|
1584
|
-
description: "
|
|
2265
|
+
name: "stats",
|
|
2266
|
+
description: "Show row/byte counts per table and on-disk footprint"
|
|
1585
2267
|
},
|
|
1586
2268
|
args: {
|
|
2269
|
+
json: {
|
|
2270
|
+
type: "boolean",
|
|
2271
|
+
default: false,
|
|
2272
|
+
description: "Output as JSON"
|
|
2273
|
+
},
|
|
1587
2274
|
site: {
|
|
1588
2275
|
type: "string",
|
|
1589
|
-
|
|
1590
|
-
required: true,
|
|
1591
|
-
description: "Site URL"
|
|
1592
|
-
},
|
|
1593
|
-
url: {
|
|
1594
|
-
type: "positional",
|
|
1595
|
-
required: true,
|
|
1596
|
-
description: "Sitemap URL to delete"
|
|
2276
|
+
description: "Limit to one site URL (sc-domain:example.com, https://example.com/, ...)"
|
|
1597
2277
|
}
|
|
1598
2278
|
},
|
|
1599
2279
|
async run({ args }) {
|
|
1600
|
-
|
|
1601
|
-
|
|
2280
|
+
const store = (await createCommandContext({ needsStore: true })).store;
|
|
2281
|
+
const siteId = args.site ? store.siteIdFor(args.site) : void 0;
|
|
2282
|
+
const perTable = await Promise.all(allTables().map(async (table) => {
|
|
2283
|
+
const all = await store.engine.listAll({
|
|
2284
|
+
userId: store.userId,
|
|
2285
|
+
siteId,
|
|
2286
|
+
table
|
|
2287
|
+
});
|
|
2288
|
+
return {
|
|
2289
|
+
table,
|
|
2290
|
+
live: all.filter((e) => e.retiredAt === void 0),
|
|
2291
|
+
retired: all.filter((e) => e.retiredAt !== void 0)
|
|
2292
|
+
};
|
|
2293
|
+
}));
|
|
2294
|
+
const watermarks = await store.engine.getWatermarks({
|
|
2295
|
+
userId: store.userId,
|
|
2296
|
+
siteId
|
|
2297
|
+
});
|
|
2298
|
+
const disk = await filesystemStats(store.dataDir).catch(() => ({
|
|
2299
|
+
files: 0,
|
|
2300
|
+
bytes: 0
|
|
2301
|
+
}));
|
|
2302
|
+
if (args.json) {
|
|
2303
|
+
const payload = {
|
|
2304
|
+
dataDir: store.dataDir,
|
|
2305
|
+
disk,
|
|
2306
|
+
tables: perTable.map(({ table, live, retired }) => ({
|
|
2307
|
+
table,
|
|
2308
|
+
liveFiles: live.length,
|
|
2309
|
+
liveRows: sumRows(live),
|
|
2310
|
+
liveBytes: sumBytes(live),
|
|
2311
|
+
retiredFiles: retired.length,
|
|
2312
|
+
retiredBytes: sumBytes(retired),
|
|
2313
|
+
watermarks: watermarks.filter((w) => w.table === table).map((w) => ({
|
|
2314
|
+
siteId: w.siteId ?? null,
|
|
2315
|
+
newestDateSynced: w.newestDateSynced,
|
|
2316
|
+
oldestDateSynced: w.oldestDateSynced,
|
|
2317
|
+
lastSyncAt: w.lastSyncAt
|
|
2318
|
+
}))
|
|
2319
|
+
}))
|
|
2320
|
+
};
|
|
2321
|
+
console.log(JSON.stringify(payload, null, 2));
|
|
2322
|
+
return;
|
|
2323
|
+
}
|
|
2324
|
+
console.log();
|
|
2325
|
+
console.log(` \x1B[1m${store.dataDir}\x1B[0m`);
|
|
2326
|
+
console.log(` \x1B[90mDisk: ${disk.files} file(s), ${formatBytes(disk.bytes)}\x1B[0m`);
|
|
2327
|
+
console.log();
|
|
2328
|
+
const totalRows = perTable.reduce((acc, t) => acc + sumRows(t.live), 0);
|
|
2329
|
+
const totalBytes = perTable.reduce((acc, t) => acc + sumBytes(t.live), 0);
|
|
2330
|
+
const totalFiles = perTable.reduce((acc, t) => acc + t.live.length, 0);
|
|
2331
|
+
const totalRetiredFiles = perTable.reduce((acc, t) => acc + t.retired.length, 0);
|
|
2332
|
+
const totalRetiredBytes = perTable.reduce((acc, t) => acc + sumBytes(t.retired), 0);
|
|
2333
|
+
for (const { table, live, retired } of perTable) {
|
|
2334
|
+
const rows = sumRows(live).toLocaleString();
|
|
2335
|
+
const bytes = formatBytes(sumBytes(live));
|
|
2336
|
+
const retiredSuffix = retired.length > 0 ? ` \x1B[90m(+${retired.length} retired, ${formatBytes(sumBytes(retired))})\x1B[0m` : "";
|
|
2337
|
+
console.log(` ${table.padEnd(15)} \x1B[36m${String(live.length).padStart(4)}\x1B[0m files, ${rows.padStart(10)} rows, ${bytes}${retiredSuffix}`);
|
|
2338
|
+
}
|
|
2339
|
+
console.log();
|
|
2340
|
+
console.log(` \x1B[1mTotal:\x1B[0m ${totalFiles} files, ${totalRows.toLocaleString()} rows, ${formatBytes(totalBytes)} live`);
|
|
2341
|
+
if (totalRetiredFiles > 0) console.log(` \x1B[90mRetired: ${totalRetiredFiles} files, ${formatBytes(totalRetiredBytes)} awaiting GC\x1B[0m`);
|
|
2342
|
+
if (watermarks.length > 0) {
|
|
2343
|
+
console.log();
|
|
2344
|
+
console.log(` \x1B[1mSync watermarks:\x1B[0m`);
|
|
2345
|
+
for (const w of sortWatermarks(watermarks)) {
|
|
2346
|
+
const scope = w.siteId ? `${w.table}@${w.siteId}` : w.table;
|
|
2347
|
+
console.log(` ${scope.padEnd(24)} \x1B[36m${w.oldestDateSynced}\x1B[0m → \x1B[36m${w.newestDateSynced}\x1B[0m \x1B[90m(last ${formatAge(w.lastSyncAt)})\x1B[0m`);
|
|
2348
|
+
}
|
|
2349
|
+
}
|
|
2350
|
+
console.log();
|
|
1602
2351
|
}
|
|
1603
2352
|
});
|
|
1604
|
-
|
|
2353
|
+
function sortWatermarks(ws) {
|
|
2354
|
+
return [...ws].sort((a, b) => {
|
|
2355
|
+
if (a.table !== b.table) return a.table.localeCompare(b.table);
|
|
2356
|
+
return (a.siteId ?? "").localeCompare(b.siteId ?? "");
|
|
2357
|
+
});
|
|
2358
|
+
}
|
|
2359
|
+
function sumRows(entries) {
|
|
2360
|
+
return entries.reduce((acc, e) => acc + e.rowCount, 0);
|
|
2361
|
+
}
|
|
2362
|
+
function sumBytes(entries) {
|
|
2363
|
+
return entries.reduce((acc, e) => acc + e.bytes, 0);
|
|
2364
|
+
}
|
|
2365
|
+
function formatBytes(n) {
|
|
2366
|
+
if (n < 1024) return `${n} B`;
|
|
2367
|
+
if (n < 1024 * 1024) return `${(n / 1024).toFixed(1)} KB`;
|
|
2368
|
+
if (n < 1024 * 1024 * 1024) return `${(n / 1024 / 1024).toFixed(1)} MB`;
|
|
2369
|
+
return `${(n / 1024 / 1024 / 1024).toFixed(2)} GB`;
|
|
2370
|
+
}
|
|
2371
|
+
const storeCommand = defineCommand({
|
|
1605
2372
|
meta: {
|
|
1606
|
-
name: "
|
|
1607
|
-
description: "Manage
|
|
2373
|
+
name: "store",
|
|
2374
|
+
description: "Manage the local DuckDB/Parquet store"
|
|
1608
2375
|
},
|
|
1609
2376
|
subCommands: {
|
|
1610
|
-
|
|
1611
|
-
|
|
1612
|
-
|
|
1613
|
-
|
|
2377
|
+
stats: statsCommand,
|
|
2378
|
+
compact: compactCommand,
|
|
2379
|
+
gc: gcCommand,
|
|
2380
|
+
export: exportCommand,
|
|
2381
|
+
rollups: rollupsCommand
|
|
1614
2382
|
}
|
|
1615
2383
|
});
|
|
1616
|
-
|
|
1617
|
-
|
|
1618
|
-
|
|
1619
|
-
|
|
2384
|
+
const DEFAULT_TABLES = [
|
|
2385
|
+
"pages",
|
|
2386
|
+
"keywords",
|
|
2387
|
+
"countries",
|
|
2388
|
+
"devices"
|
|
2389
|
+
];
|
|
2390
|
+
const DEFAULT_TYPES = ["web"];
|
|
2391
|
+
const ALL_SEARCH_TYPES = Object.values(SearchTypes);
|
|
2392
|
+
const DEFAULT_PENDING_DAYS = 3;
|
|
2393
|
+
const DEFAULT_CONCURRENCY = 8;
|
|
2394
|
+
const EMPTY_TYPE_PROBE_MIN_DAYS = 7;
|
|
2395
|
+
const EMPTY_TYPE_PROTECTED = ["web"];
|
|
2396
|
+
function createProgressTracker(total, quiet) {
|
|
2397
|
+
if (quiet) return {
|
|
2398
|
+
tick: () => {},
|
|
2399
|
+
done: () => {}
|
|
2400
|
+
};
|
|
2401
|
+
let current = 0;
|
|
2402
|
+
let lastLabel = "";
|
|
2403
|
+
let timer = null;
|
|
2404
|
+
const render = () => {
|
|
2405
|
+
clearLine();
|
|
2406
|
+
process.stdout.write(progressBar(current, total, lastLabel));
|
|
2407
|
+
};
|
|
2408
|
+
timer = setInterval(render, 100);
|
|
2409
|
+
return {
|
|
2410
|
+
tick: (label) => {
|
|
2411
|
+
current++;
|
|
2412
|
+
lastLabel = label;
|
|
2413
|
+
},
|
|
2414
|
+
done: () => {
|
|
2415
|
+
if (timer) {
|
|
2416
|
+
clearInterval(timer);
|
|
2417
|
+
timer = null;
|
|
2418
|
+
}
|
|
2419
|
+
clearLine();
|
|
2420
|
+
}
|
|
2421
|
+
};
|
|
2422
|
+
}
|
|
2423
|
+
async function syncTable(store, siteUrl, table, searchType, dates, client, concurrency, force, progress) {
|
|
2424
|
+
const dims = TABLE_DIMS[table];
|
|
2425
|
+
const siteId = store.siteIdFor(siteUrl);
|
|
2426
|
+
let totalRows = 0;
|
|
2427
|
+
let skipped = 0;
|
|
2428
|
+
let failed = 0;
|
|
2429
|
+
const priorStates = await store.engine.getSyncStates({
|
|
2430
|
+
userId: store.userId,
|
|
2431
|
+
siteId,
|
|
2432
|
+
table,
|
|
2433
|
+
searchType
|
|
2434
|
+
});
|
|
2435
|
+
const stateByDate = new Map(priorStates.map((s) => [s.date, s]));
|
|
2436
|
+
const label = searchType === "web" ? table : `${table}/${searchType}`;
|
|
2437
|
+
await runWithConcurrency(dates, concurrency, async (date) => {
|
|
2438
|
+
const prior = stateByDate.get(date);
|
|
2439
|
+
if (!force && prior?.state === "done") {
|
|
2440
|
+
skipped++;
|
|
2441
|
+
progress.tick(`${label} ${date} (skip)`);
|
|
2442
|
+
return;
|
|
2443
|
+
}
|
|
2444
|
+
const scope = {
|
|
2445
|
+
userId: store.userId,
|
|
2446
|
+
siteId,
|
|
2447
|
+
table,
|
|
2448
|
+
date,
|
|
2449
|
+
searchType
|
|
2450
|
+
};
|
|
2451
|
+
await store.engine.setSyncState(scope, "inflight");
|
|
2452
|
+
const result = await runOneDate(store, client, siteUrl, table, searchType, dims, date).catch((err) => ({
|
|
2453
|
+
kind: "error",
|
|
2454
|
+
error: err
|
|
2455
|
+
}));
|
|
2456
|
+
if (result.kind === "error") {
|
|
2457
|
+
await store.engine.setSyncState(scope, "failed", { error: result.error.message });
|
|
2458
|
+
failed++;
|
|
2459
|
+
progress.tick(`${label} ${date} (fail)`);
|
|
2460
|
+
return;
|
|
2461
|
+
}
|
|
2462
|
+
await store.engine.setSyncState(scope, "done");
|
|
2463
|
+
totalRows += result.rows;
|
|
2464
|
+
progress.tick(`${label} ${date}`);
|
|
2465
|
+
});
|
|
2466
|
+
return {
|
|
2467
|
+
rows: totalRows,
|
|
2468
|
+
skipped,
|
|
2469
|
+
failed
|
|
2470
|
+
};
|
|
2471
|
+
}
|
|
2472
|
+
async function runOneDate(store, client, siteUrl, table, searchType, dims, date) {
|
|
2473
|
+
const rowLimit = 25e3;
|
|
2474
|
+
const rows = [];
|
|
2475
|
+
let startRow = 0;
|
|
2476
|
+
while (true) {
|
|
2477
|
+
const batch = (await client._rawQuery(siteUrl, {
|
|
2478
|
+
startDate: date,
|
|
2479
|
+
endDate: date,
|
|
2480
|
+
dimensions: dims,
|
|
2481
|
+
searchType,
|
|
2482
|
+
rowLimit,
|
|
2483
|
+
startRow
|
|
2484
|
+
})).rows || [];
|
|
2485
|
+
for (const apiRow of batch) {
|
|
2486
|
+
const transformed = transformGscRow(table, {
|
|
2487
|
+
keys: apiRow.keys ?? [],
|
|
2488
|
+
clicks: apiRow.clicks ?? 0,
|
|
2489
|
+
impressions: apiRow.impressions ?? 0,
|
|
2490
|
+
ctr: apiRow.ctr ?? 0,
|
|
2491
|
+
position: apiRow.position ?? 0
|
|
2492
|
+
});
|
|
2493
|
+
if (transformed) rows.push(transformed.row);
|
|
2494
|
+
}
|
|
2495
|
+
if (batch.length < rowLimit) break;
|
|
2496
|
+
startRow += batch.length;
|
|
2497
|
+
}
|
|
2498
|
+
const writeCtx = {
|
|
2499
|
+
userId: store.userId,
|
|
2500
|
+
siteId: store.siteIdFor(siteUrl),
|
|
2501
|
+
table,
|
|
2502
|
+
date,
|
|
2503
|
+
searchType
|
|
2504
|
+
};
|
|
2505
|
+
await store.engine.writeDay(writeCtx, rows);
|
|
2506
|
+
return {
|
|
2507
|
+
kind: "ok",
|
|
2508
|
+
rows: rows.length
|
|
2509
|
+
};
|
|
2510
|
+
}
|
|
2511
|
+
const syncCommand = defineCommand({
|
|
1620
2512
|
meta: {
|
|
1621
|
-
name: "
|
|
1622
|
-
description: "
|
|
2513
|
+
name: "sync",
|
|
2514
|
+
description: "Sync GSC data to local Parquet store"
|
|
2515
|
+
},
|
|
2516
|
+
args: {
|
|
2517
|
+
"site": {
|
|
2518
|
+
type: "string",
|
|
2519
|
+
alias: "s",
|
|
2520
|
+
description: "Site URL"
|
|
2521
|
+
},
|
|
2522
|
+
"start": {
|
|
2523
|
+
type: "string",
|
|
2524
|
+
description: "Start date (YYYY-MM-DD) for historical sync"
|
|
2525
|
+
},
|
|
2526
|
+
"end": {
|
|
2527
|
+
type: "string",
|
|
2528
|
+
description: "End date (YYYY-MM-DD); defaults to 3 days ago"
|
|
2529
|
+
},
|
|
2530
|
+
"days": {
|
|
2531
|
+
type: "string",
|
|
2532
|
+
description: `Number of days back to sync (default: ${DEFAULT_PENDING_DAYS})`
|
|
2533
|
+
},
|
|
2534
|
+
"tables": {
|
|
2535
|
+
type: "string",
|
|
2536
|
+
alias: "t",
|
|
2537
|
+
description: `Tables to sync (default: ${DEFAULT_TABLES.join(",")}); comma-separated`
|
|
2538
|
+
},
|
|
2539
|
+
"types": {
|
|
2540
|
+
type: "string",
|
|
2541
|
+
description: `GSC search types to sync (default: ${DEFAULT_TYPES.join(",")}); comma-separated. Allowed: ${ALL_SEARCH_TYPES.join(",")}.`
|
|
2542
|
+
},
|
|
2543
|
+
"force-types": {
|
|
2544
|
+
type: "boolean",
|
|
2545
|
+
default: false,
|
|
2546
|
+
description: "Ignore stored empty-type markers and re-probe every requested type"
|
|
2547
|
+
},
|
|
2548
|
+
"no-rollups": {
|
|
2549
|
+
type: "boolean",
|
|
2550
|
+
default: false,
|
|
2551
|
+
description: "Skip the post-sync rollup rebuild (daily/weekly totals, top-N tables)"
|
|
2552
|
+
},
|
|
2553
|
+
"full": {
|
|
2554
|
+
type: "boolean",
|
|
2555
|
+
description: "Sync the last 450 days (full GSC history)"
|
|
2556
|
+
},
|
|
2557
|
+
"quiet": {
|
|
2558
|
+
type: "boolean",
|
|
2559
|
+
alias: "q",
|
|
2560
|
+
default: false,
|
|
2561
|
+
description: "Suppress progress output"
|
|
2562
|
+
},
|
|
2563
|
+
"force": {
|
|
2564
|
+
type: "boolean",
|
|
2565
|
+
default: false,
|
|
2566
|
+
description: "Re-sync dates already marked done (default: skip them for idempotent resume)"
|
|
2567
|
+
},
|
|
2568
|
+
"status": {
|
|
2569
|
+
type: "boolean",
|
|
2570
|
+
default: false,
|
|
2571
|
+
description: "Print watermarks + sync-state summary instead of syncing"
|
|
2572
|
+
},
|
|
2573
|
+
"json": {
|
|
2574
|
+
type: "boolean",
|
|
2575
|
+
default: false,
|
|
2576
|
+
description: "With --status: emit JSON"
|
|
2577
|
+
},
|
|
2578
|
+
"concurrency": {
|
|
2579
|
+
type: "string",
|
|
2580
|
+
alias: "c",
|
|
2581
|
+
description: `Concurrent in-flight day fetches per table (default: ${DEFAULT_CONCURRENCY})`
|
|
2582
|
+
},
|
|
2583
|
+
"serial-tables": {
|
|
2584
|
+
type: "boolean",
|
|
2585
|
+
default: false,
|
|
2586
|
+
description: "Run tables sequentially (default: run all tables in parallel)"
|
|
2587
|
+
}
|
|
1623
2588
|
},
|
|
1624
|
-
args: { json: {
|
|
1625
|
-
type: "boolean",
|
|
1626
|
-
default: false,
|
|
1627
|
-
description: "Output as JSON for scripting"
|
|
1628
|
-
} },
|
|
1629
2589
|
async run({ args }) {
|
|
1630
|
-
|
|
1631
|
-
|
|
1632
|
-
permission: site.permissionLevel || "unknown"
|
|
1633
|
-
}));
|
|
1634
|
-
if (args.json) {
|
|
1635
|
-
console.log(JSON.stringify(sites, null, 2));
|
|
2590
|
+
if (args.status) {
|
|
2591
|
+
await printSyncStatus(await loadConfig(), args.site ? String(args.site) : void 0, Boolean(args.json));
|
|
1636
2592
|
return;
|
|
1637
2593
|
}
|
|
1638
|
-
|
|
1639
|
-
|
|
2594
|
+
const ctx = await createCommandContext({
|
|
2595
|
+
needsAuth: true,
|
|
2596
|
+
needsStore: true
|
|
2597
|
+
});
|
|
2598
|
+
const client = ctx.client;
|
|
2599
|
+
const siteUrl = await ctx.resolveSite(args.site ? String(args.site) : void 0);
|
|
2600
|
+
const tables = args.tables ? String(args.tables).split(",").map((t) => t.trim()).filter(isKnownTable) : DEFAULT_TABLES;
|
|
2601
|
+
const requestedTypes = args.types ? String(args.types).split(",").map((t) => t.trim()).filter(isKnownSearchType) : DEFAULT_TYPES;
|
|
2602
|
+
if (requestedTypes.length === 0) {
|
|
2603
|
+
logger.error(`No valid search types specified. Allowed: ${ALL_SEARCH_TYPES.join(",")}`);
|
|
2604
|
+
process.exit(1);
|
|
2605
|
+
}
|
|
2606
|
+
const siteId = ctx.store.siteIdFor(siteUrl);
|
|
2607
|
+
const emptyTypesStore = createEmptyTypesStore({ dataSource: ctx.store.dataSource });
|
|
2608
|
+
const emptyTypesDoc = await emptyTypesStore.load({
|
|
2609
|
+
userId: ctx.store.userId,
|
|
2610
|
+
siteId
|
|
2611
|
+
});
|
|
2612
|
+
const forceTypes = Boolean(args["force-types"]);
|
|
2613
|
+
const skippedTypes = [];
|
|
2614
|
+
const types = [];
|
|
2615
|
+
for (const t of requestedTypes) {
|
|
2616
|
+
if (!forceTypes && emptyTypesDoc.emptyTypes.includes(t) && !EMPTY_TYPE_PROTECTED.includes(t)) {
|
|
2617
|
+
skippedTypes.push(t);
|
|
2618
|
+
continue;
|
|
2619
|
+
}
|
|
2620
|
+
types.push(t);
|
|
2621
|
+
}
|
|
2622
|
+
if (types.length === 0) {
|
|
2623
|
+
logger.warn(`All requested types (${requestedTypes.join(", ")}) are marked empty for this site. Pass --force-types to re-probe.`);
|
|
1640
2624
|
return;
|
|
1641
2625
|
}
|
|
1642
|
-
logger.
|
|
1643
|
-
|
|
1644
|
-
|
|
1645
|
-
|
|
1646
|
-
|
|
2626
|
+
if (skippedTypes.length > 0 && !args.quiet) logger.info(`Skipping ${skippedTypes.join(", ")} (marked empty for this site; pass --force-types to re-probe).`);
|
|
2627
|
+
const endDate = args.end ? String(args.end) : daysAgo(DEFAULT_PENDING_DAYS);
|
|
2628
|
+
let startDate;
|
|
2629
|
+
if (args.start) startDate = String(args.start);
|
|
2630
|
+
else if (args.full) startDate = daysAgo(450);
|
|
2631
|
+
else if (args.days) startDate = daysAgo(Number.parseInt(String(args.days), 10) + DEFAULT_PENDING_DAYS - 1);
|
|
2632
|
+
else startDate = daysAgo(DEFAULT_PENDING_DAYS + DEFAULT_PENDING_DAYS - 1);
|
|
2633
|
+
const dates = getDateRange(startDate, endDate);
|
|
2634
|
+
if (dates.length === 0) {
|
|
2635
|
+
logger.error(`No dates to sync (start=${startDate}, end=${endDate})`);
|
|
2636
|
+
process.exit(1);
|
|
2637
|
+
}
|
|
2638
|
+
const store = ctx.store;
|
|
2639
|
+
if (!args.quiet) {
|
|
2640
|
+
logger.info(`Syncing ${siteUrl} (${tables.join(", ")}) [${types.join(", ")}] → ${store.dataDir}`);
|
|
2641
|
+
logger.info(`Range: ${startDate} → ${endDate} (${dates.length} days)`);
|
|
2642
|
+
}
|
|
2643
|
+
const concurrency = args.concurrency ? Math.max(1, Number.parseInt(String(args.concurrency), 10) || DEFAULT_CONCURRENCY) : DEFAULT_CONCURRENCY;
|
|
2644
|
+
const serialTables = Boolean(args["serial-tables"]);
|
|
2645
|
+
const start = Date.now();
|
|
2646
|
+
const totals = {};
|
|
2647
|
+
const jobs = [];
|
|
2648
|
+
for (const table of tables) for (const type of types) {
|
|
2649
|
+
const label = type === "web" ? table : `${table}/${type}`;
|
|
2650
|
+
jobs.push({
|
|
2651
|
+
table,
|
|
2652
|
+
type,
|
|
2653
|
+
label
|
|
2654
|
+
});
|
|
2655
|
+
}
|
|
2656
|
+
const progress = createProgressTracker(dates.length * jobs.length, Boolean(args.quiet));
|
|
2657
|
+
if (serialTables) for (const job of jobs) totals[job.label] = await syncTable(store, siteUrl, job.table, job.type, dates, client, concurrency, args.force, progress);
|
|
2658
|
+
else {
|
|
2659
|
+
const results = await Promise.all(jobs.map((job) => syncTable(store, siteUrl, job.table, job.type, dates, client, concurrency, args.force, progress)));
|
|
2660
|
+
jobs.forEach((job, i) => {
|
|
2661
|
+
totals[job.label] = results[i];
|
|
2662
|
+
});
|
|
2663
|
+
}
|
|
2664
|
+
progress.done();
|
|
2665
|
+
const seconds = ((Date.now() - start) / 1e3).toFixed(1);
|
|
2666
|
+
if (!args.quiet) {
|
|
2667
|
+
logger.success(`Synced ${siteUrl} in ${seconds}s`);
|
|
2668
|
+
for (const [t, n] of Object.entries(totals)) {
|
|
2669
|
+
const suffix = [n.skipped > 0 ? `${n.skipped} skipped` : null, n.failed > 0 ? `\x1B[31m${n.failed} failed\x1B[0m` : null].filter(Boolean).join(", ");
|
|
2670
|
+
const tail = suffix ? ` (${suffix})` : "";
|
|
2671
|
+
console.log(` ${t}: ${n.rows.toLocaleString()} rows${tail}`);
|
|
2672
|
+
}
|
|
2673
|
+
console.log();
|
|
2674
|
+
}
|
|
2675
|
+
const anyFailed = Object.values(totals).some((t) => t.failed > 0);
|
|
2676
|
+
const rowsByType = /* @__PURE__ */ new Map();
|
|
2677
|
+
const failedByType = /* @__PURE__ */ new Map();
|
|
2678
|
+
for (const job of jobs) {
|
|
2679
|
+
const t = totals[job.label];
|
|
2680
|
+
rowsByType.set(job.type, (rowsByType.get(job.type) ?? 0) + t.rows);
|
|
2681
|
+
failedByType.set(job.type, (failedByType.get(job.type) ?? 0) + t.failed);
|
|
2682
|
+
}
|
|
2683
|
+
if (!forceTypes && dates.length >= EMPTY_TYPE_PROBE_MIN_DAYS) {
|
|
2684
|
+
const toMark = [];
|
|
2685
|
+
for (const type of types) {
|
|
2686
|
+
if (EMPTY_TYPE_PROTECTED.includes(type)) continue;
|
|
2687
|
+
if ((failedByType.get(type) ?? 0) > 0) continue;
|
|
2688
|
+
if ((rowsByType.get(type) ?? 0) === 0) toMark.push(type);
|
|
2689
|
+
}
|
|
2690
|
+
if (toMark.length > 0) {
|
|
2691
|
+
await emptyTypesStore.mark({
|
|
2692
|
+
userId: store.userId,
|
|
2693
|
+
siteId
|
|
2694
|
+
}, toMark);
|
|
2695
|
+
if (!args.quiet) logger.info(`Marked empty for future syncs: ${toMark.join(", ")} (0 rows across ${dates.length} days; pass --force-types to re-probe).`);
|
|
2696
|
+
}
|
|
2697
|
+
}
|
|
2698
|
+
if (forceTypes && emptyTypesDoc.emptyTypes.length > 0) {
|
|
2699
|
+
const toClear = [];
|
|
2700
|
+
for (const type of types) if (emptyTypesDoc.emptyTypes.includes(type) && (rowsByType.get(type) ?? 0) > 0) toClear.push(type);
|
|
2701
|
+
if (toClear.length > 0) {
|
|
2702
|
+
await emptyTypesStore.clear({
|
|
2703
|
+
userId: store.userId,
|
|
2704
|
+
siteId
|
|
2705
|
+
}, toClear);
|
|
2706
|
+
if (!args.quiet) logger.info(`Cleared empty markers for: ${toClear.join(", ")} (re-probe found data).`);
|
|
2707
|
+
}
|
|
1647
2708
|
}
|
|
2709
|
+
const noRollups = Boolean(args["no-rollups"]);
|
|
2710
|
+
const anyRowsSynced = Object.values(totals).some((t) => t.rows > 0);
|
|
2711
|
+
if (!noRollups && anyRowsSynced) {
|
|
2712
|
+
if (!args.quiet) logger.info(`Rebuilding rollups for [${siteId}] (${DEFAULT_ROLLUPS.length} rollups)…`);
|
|
2713
|
+
const rollupStart = Date.now();
|
|
2714
|
+
const results = await rebuildRollups({
|
|
2715
|
+
engine: store.engine,
|
|
2716
|
+
dataSource: store.dataSource,
|
|
2717
|
+
ctx: {
|
|
2718
|
+
userId: store.userId,
|
|
2719
|
+
siteId
|
|
2720
|
+
},
|
|
2721
|
+
defs: DEFAULT_ROLLUPS
|
|
2722
|
+
}).catch((err) => {
|
|
2723
|
+
logger.warn(`Rollup rebuild failed: ${err.message}`);
|
|
2724
|
+
return [];
|
|
2725
|
+
});
|
|
2726
|
+
if (!args.quiet && results.length > 0) {
|
|
2727
|
+
const kb = results.reduce((a, r) => a + r.bytes, 0) / 1024;
|
|
2728
|
+
const ms = Date.now() - rollupStart;
|
|
2729
|
+
logger.success(`Rebuilt ${results.length} rollup(s) in ${ms}ms — ${kb.toFixed(1)} KB`);
|
|
2730
|
+
}
|
|
2731
|
+
}
|
|
2732
|
+
if (anyFailed) process.exit(1);
|
|
1648
2733
|
}
|
|
1649
2734
|
});
|
|
1650
|
-
|
|
1651
|
-
|
|
1652
|
-
|
|
2735
|
+
function isKnownTable(name) {
|
|
2736
|
+
return allTables().includes(name);
|
|
2737
|
+
}
|
|
2738
|
+
function isKnownSearchType(name) {
|
|
2739
|
+
return ALL_SEARCH_TYPES.includes(name);
|
|
2740
|
+
}
|
|
2741
|
+
async function printSyncStatus(config, siteFilter, asJson) {
|
|
2742
|
+
const store = createLocalStore({ dataDir: resolveDataDir(config) });
|
|
2743
|
+
const siteId = siteFilter ? store.siteIdFor(siteFilter) : void 0;
|
|
2744
|
+
const watermarks = await store.engine.getWatermarks({
|
|
2745
|
+
userId: store.userId,
|
|
2746
|
+
siteId
|
|
2747
|
+
});
|
|
2748
|
+
const states = await store.engine.getSyncStates({
|
|
2749
|
+
userId: store.userId,
|
|
2750
|
+
siteId
|
|
2751
|
+
});
|
|
2752
|
+
const failed = states.filter((s) => s.state === "failed");
|
|
2753
|
+
const inflight = states.filter((s) => s.state === "inflight");
|
|
2754
|
+
if (asJson) {
|
|
2755
|
+
console.log(JSON.stringify({
|
|
2756
|
+
dataDir: store.dataDir,
|
|
2757
|
+
siteFilter: siteFilter ?? null,
|
|
2758
|
+
watermarks,
|
|
2759
|
+
failed,
|
|
2760
|
+
inflight
|
|
2761
|
+
}, null, 2));
|
|
2762
|
+
return;
|
|
2763
|
+
}
|
|
2764
|
+
console.log();
|
|
2765
|
+
console.log(` \x1B[1m${store.dataDir}\x1B[0m`);
|
|
2766
|
+
if (siteFilter) console.log(` \x1B[90mSite: ${siteFilter}\x1B[0m`);
|
|
2767
|
+
console.log();
|
|
2768
|
+
if (watermarks.length === 0) {
|
|
2769
|
+
console.log(` No sync watermarks. Run \`gscdump sync\` to ingest data.`);
|
|
2770
|
+
console.log();
|
|
2771
|
+
return;
|
|
2772
|
+
}
|
|
2773
|
+
console.log(` \x1B[1mWatermarks:\x1B[0m`);
|
|
2774
|
+
const sorted = [...watermarks].sort((a, b) => {
|
|
2775
|
+
if (a.table !== b.table) return a.table.localeCompare(b.table);
|
|
2776
|
+
return (a.siteId ?? "").localeCompare(b.siteId ?? "");
|
|
2777
|
+
});
|
|
2778
|
+
for (const w of sorted) {
|
|
2779
|
+
const scope = w.siteId ? `${w.table}@${w.siteId}` : w.table;
|
|
2780
|
+
console.log(` ${scope.padEnd(28)} \x1B[36m${w.oldestDateSynced}\x1B[0m → \x1B[36m${w.newestDateSynced}\x1B[0m \x1B[90m(last ${formatAge(w.lastSyncAt)})\x1B[0m`);
|
|
2781
|
+
}
|
|
2782
|
+
if (inflight.length > 0) {
|
|
2783
|
+
console.log();
|
|
2784
|
+
console.log(` \x1B[33m${inflight.length} inflight:\x1B[0m`);
|
|
2785
|
+
for (const s of inflight) console.log(` ${s.table}${s.siteId ? `@${s.siteId}` : ""} ${s.date} (attempt ${s.attempts}, started ${formatAge(s.updatedAt)})`);
|
|
2786
|
+
}
|
|
2787
|
+
if (failed.length > 0) {
|
|
2788
|
+
console.log();
|
|
2789
|
+
console.log(` \x1B[31m${failed.length} failed:\x1B[0m`);
|
|
2790
|
+
for (const s of failed) console.log(` ${s.table}${s.siteId ? `@${s.siteId}` : ""} ${s.date}: ${s.error ?? "unknown"}`);
|
|
2791
|
+
console.log();
|
|
2792
|
+
console.log(` Re-run \`gscdump sync --force\` to retry failed dates.`);
|
|
2793
|
+
}
|
|
2794
|
+
console.log();
|
|
2795
|
+
}
|
|
1653
2796
|
runMain(defineCommand({
|
|
1654
2797
|
meta: {
|
|
1655
2798
|
name: "gscdump",
|
|
@@ -1662,6 +2805,11 @@ runMain(defineCommand({
|
|
|
1662
2805
|
query: queryCommand,
|
|
1663
2806
|
sites: sitesCommand,
|
|
1664
2807
|
sitemaps: sitemapsCommand,
|
|
2808
|
+
sync: syncCommand,
|
|
2809
|
+
store: storeCommand,
|
|
2810
|
+
inspect: inspectCommand,
|
|
2811
|
+
entities: entitiesCommand,
|
|
2812
|
+
analyze: analyzeCommand,
|
|
1665
2813
|
auth: authCommand,
|
|
1666
2814
|
config: configCommand,
|
|
1667
2815
|
mcp: mcpCommand
|
|
@@ -1670,6 +2818,4 @@ runMain(defineCommand({
|
|
|
1670
2818
|
if (!process.argv.includes("mcp")) showSplash();
|
|
1671
2819
|
}
|
|
1672
2820
|
}));
|
|
1673
|
-
|
|
1674
|
-
//#endregion
|
|
1675
|
-
export { getAuth as a, loadTokens as c, clearTokens as i, saveCloudTokens as l, authenticateCloud as n, getAuthCredentials as o, clearCloudTokens as r, loadCloudTokens as s, authenticate as t, saveTokens as u };
|
|
2821
|
+
export {};
|