@gscdump/cli 0.4.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +82 -56
  2. package/dist/index.mjs +2050 -2365
  3. package/package.json +12 -9
package/dist/index.mjs CHANGED
@@ -1,38 +1,102 @@
1
1
  #!/usr/bin/env node
2
- import process$1 from "node:process";
2
+ import process from "node:process";
3
3
  import { defineCommand, runMain } from "citty";
4
- import { cancel, confirm, isCancel, multiselect, select, text } from "@clack/prompts";
5
- import fs from "node:fs/promises";
4
+ import { defaultAnalyzerRegistry } from "@gscdump/analysis/registry";
5
+ import { AnalyzerCapabilityError, analyzeFromSource, createEngineQuerySource, createGscApiQuerySource } from "@gscdump/analysis";
6
+ import { cancel, isCancel, multiselect, select, text } from "@clack/prompts";
7
+ import { daysAgo, fetchSitemap, formatErrorForCli, getDateRange, googleSearchConsole, progressBar } from "gscdump";
8
+ import fs, { readFile, rm } from "node:fs/promises";
6
9
  import { createServer } from "node:http";
7
10
  import path from "node:path";
8
11
  import { OAuth2Client } from "google-auth-library";
9
- import { consola } from "consola";
10
- import { batchInspectUrls, batchRequestIndexing, deleteSitemap, fetchSitemap, fetchSitemaps, fetchSites, fetchSitesWithSitemaps, formatErrorForCli, getIndexingMetadata, googleSearchConsole, inspectUrl, requestIndexing, submitSitemap } from "gscdump";
11
12
  import os from "node:os";
12
- import { between, country, date, device, gsc, page, query, searchAppearance } from "gscdump/query";
13
+ import { consola } from "consola";
14
+ import { createNodeHarness } from "@gscdump/engine/node-harness";
15
+ import { TABLE_DIMS, transformGscRow } from "@gscdump/engine/ingest";
16
+ import { allTables, inferTable } from "@gscdump/engine/schema";
17
+ import { Buffer } from "node:buffer";
18
+ import { createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore } from "@gscdump/engine/entities";
19
+ import { createGscMcpServer } from "@gscdump/mcp/server";
13
20
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
14
- import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
15
- import { z } from "zod";
16
-
17
- //#region src/utils.ts
21
+ import { SearchTypes, between, country, date, device, gsc, page, query, searchAppearance } from "gscdump/query";
22
+ import { DuckDBInstance } from "@duckdb/node-api";
23
+ import { sqlEscape } from "@gscdump/engine/sql";
24
+ import { DEFAULT_ROLLUPS, rebuildRollups } from "@gscdump/engine/rollups";
25
+ import { filesystemStats } from "@gscdump/engine/filesystem";
26
+ var LocalStoreUnsupportedError = class extends Error {
27
+ constructor(tool) {
28
+ super(`analysis "${tool}" is not yet implemented against the local Parquet store`);
29
+ this.name = "LocalStoreUnsupportedError";
30
+ }
31
+ };
32
+ var LocalStoreEmptyError = class extends Error {
33
+ constructor(siteUrl) {
34
+ super(`no local data synced for ${siteUrl} (run \`gscdump sync\` first)`);
35
+ this.name = "LocalStoreEmptyError";
36
+ }
37
+ };
38
+ async function hasLocalData(store, siteUrl) {
39
+ return (await store.engine.listLive({
40
+ userId: store.userId,
41
+ siteId: store.siteIdFor(siteUrl)
42
+ })).length > 0;
43
+ }
44
+ async function runLocalAnalysis(store, siteUrl, params) {
45
+ return analyzeFromSource(createEngineQuerySource({
46
+ engine: store.engine,
47
+ ctx: {
48
+ userId: store.userId,
49
+ siteId: store.siteIdFor(siteUrl)
50
+ }
51
+ }), params, defaultAnalyzerRegistry).catch((e) => {
52
+ if (e instanceof AnalyzerCapabilityError) throw new LocalStoreUnsupportedError(params.type);
53
+ throw e;
54
+ });
55
+ }
56
+ async function runLiveAnalysis(client, siteUrl, params) {
57
+ return analyzeFromSource(createGscApiQuerySource({
58
+ client,
59
+ siteUrl
60
+ }), params, defaultAnalyzerRegistry).catch((e) => {
61
+ if (e instanceof AnalyzerCapabilityError) throw new LocalStoreUnsupportedError(params.type);
62
+ throw e;
63
+ });
64
+ }
65
+ let configDir = path.join(os.homedir(), ".config", "gscdump");
66
+ function getConfigDir() {
67
+ return configDir;
68
+ }
69
+ function defaultDataDir() {
70
+ return path.join(os.homedir(), ".gscdump", "data");
71
+ }
72
+ function resolveDataDir(config) {
73
+ return expandTilde(config.dataDir ?? defaultDataDir());
74
+ }
75
+ function expandTilde(p) {
76
+ if (p === "~") return os.homedir();
77
+ if (p.startsWith("~/")) return path.join(os.homedir(), p.slice(2));
78
+ return p;
79
+ }
80
+ async function loadConfig() {
81
+ return fs.readFile(path.join(configDir, "config.json"), "utf-8").then((data) => JSON.parse(data)).catch(() => ({}));
82
+ }
83
+ async function saveConfig(config) {
84
+ await fs.mkdir(configDir, {
85
+ recursive: true,
86
+ mode: 448
87
+ });
88
+ await fs.writeFile(path.join(configDir, "config.json"), JSON.stringify(config, null, 2), { mode: 384 });
89
+ }
90
+ function getConfigPath() {
91
+ return path.join(configDir, "config.json");
92
+ }
18
93
  const VERSION = "1.0.0";
19
94
  const logger = consola.withTag("gscdump");
20
- /**
21
- * Handles GSC API errors with helpful messages and suggestions.
22
- * Exits process with code 1.
23
- */
24
- function handleGscError(error) {
95
+ function gscErrorHandler(error) {
25
96
  console.error();
26
97
  console.error(formatErrorForCli(error));
27
98
  console.error();
28
- process$1.exit(1);
29
- }
30
- /**
31
- * Creates a .catch() handler for GSC API errors.
32
- * Use: somePromise.catch(gscErrorHandler)
33
- */
34
- function gscErrorHandler(error) {
35
- return handleGscError(error);
99
+ process.exit(1);
36
100
  }
37
101
  const gradientColors = [
38
102
  (s) => `\x1B[38;2;52;211;153m${s}\x1B[0m`,
@@ -41,9 +105,9 @@ const gradientColors = [
41
105
  (s) => `\x1B[38;2;56;189;248m${s}\x1B[0m`,
42
106
  (s) => `\x1B[38;2;96;165;250m${s}\x1B[0m`
43
107
  ];
44
- function applyGradient(text$1) {
45
- return [...text$1].map((char, i) => {
46
- const colorIndex = Math.floor(i / text$1.length * gradientColors.length);
108
+ function applyGradient(text) {
109
+ return [...text].map((char, i) => {
110
+ const colorIndex = Math.floor(i / text.length * gradientColors.length);
47
111
  return gradientColors[Math.min(colorIndex, gradientColors.length - 1)](char);
48
112
  }).join("");
49
113
  }
@@ -52,14 +116,26 @@ function showSplash() {
52
116
  console.log(` ${applyGradient("GSC Dump")} v${VERSION}`);
53
117
  console.log();
54
118
  }
55
- function progressBar(current, total, label, width = 30) {
56
- const percent = Math.min(current / total, 1);
57
- const filled = Math.round(width * percent);
58
- const empty = width - filled;
59
- return ` ${`\x1B[36m${"█".repeat(filled)}\x1B[0m\x1B[90m${"░".repeat(empty)}\x1B[0m`} \x1B[90m${current}/${total}\x1B[0m ${label}`;
60
- }
61
119
  function clearLine() {
62
- process$1.stdout.write("\r\x1B[K");
120
+ process.stdout.write("\r\x1B[K");
121
+ }
122
+ function formatAge(ms) {
123
+ const delta = Date.now() - ms;
124
+ if (delta < 6e4) return "just now";
125
+ if (delta < 36e5) return `${Math.floor(delta / 6e4)}m ago`;
126
+ if (delta < 864e5) return `${Math.floor(delta / 36e5)}h ago`;
127
+ return `${Math.floor(delta / 864e5)}d ago`;
128
+ }
129
+ async function runWithConcurrency(items, concurrency, processor) {
130
+ const cursor = { i: 0 };
131
+ async function worker() {
132
+ while (true) {
133
+ const i = cursor.i++;
134
+ if (i >= items.length) return;
135
+ await processor(items[i], i);
136
+ }
137
+ }
138
+ await Promise.all(Array.from({ length: Math.min(concurrency, items.length) }, worker));
63
139
  }
64
140
  function toCSV(data, columns) {
65
141
  return [columns.join(","), ...data.map((row) => columns.map((col) => {
@@ -101,104 +177,7 @@ function exportToCSV(output) {
101
177
  ])}`);
102
178
  return sections.join("\n\n");
103
179
  }
104
-
105
- //#endregion
106
- //#region src/cloud.ts
107
- async function cloudFetch(url, options) {
108
- const res = await fetch(url, options);
109
- if (res.status === 401) {
110
- logger.error("CLI session expired or revoked. Run gscdump init --force to re-authenticate.");
111
- process$1.exit(1);
112
- }
113
- if (!res.ok) {
114
- const body = await res.json().catch(() => ({ message: res.statusText }));
115
- throw new Error(body.message || `HTTP ${res.status}: ${res.statusText}`);
116
- }
117
- return res.json();
118
- }
119
- function buildUrl(base, path$1, query$1) {
120
- const url = new URL(path$1, base);
121
- if (query$1) {
122
- for (const [k, v] of Object.entries(query$1)) if (v !== void 0 && v !== "") url.searchParams.set(k, v);
123
- }
124
- return url.toString();
125
- }
126
- function createCloudClient(cloudUrl, sessionId) {
127
- const headers = {
128
- "x-cli-session": sessionId,
129
- "content-type": "application/json"
130
- };
131
- return {
132
- me: () => cloudFetch(buildUrl(cloudUrl, "/api/cli/me"), { headers }),
133
- availableSites: () => cloudFetch(buildUrl(cloudUrl, "/api/cli/sites/available"), { headers }),
134
- registerSite: (siteUrl) => cloudFetch(buildUrl(cloudUrl, "/api/sites/register"), {
135
- method: "POST",
136
- headers,
137
- body: JSON.stringify({ siteUrl })
138
- }),
139
- syncStatus: (siteId) => cloudFetch(buildUrl(cloudUrl, `/api/sites/${siteId}/sync-status`), { headers }),
140
- data: (siteId, params) => cloudFetch(buildUrl(cloudUrl, `/api/sites/${siteId}/data`, params), { headers }),
141
- sitemaps: (siteId) => cloudFetch(buildUrl(cloudUrl, `/api/sites/${siteId}/sitemaps`), { headers }),
142
- analysis: (siteId, tool, params) => cloudFetch(buildUrl(cloudUrl, `/api/sites/${siteId}/analysis/${tool}`, params), { headers }),
143
- query: (siteId, params) => cloudFetch(buildUrl(cloudUrl, `/api/sites/${siteId}/query`, params), { headers }),
144
- indexing: (siteId, params) => cloudFetch(buildUrl(cloudUrl, `/api/sites/${siteId}/indexing`, params), { headers }),
145
- indexingDiagnostics: (siteId) => cloudFetch(buildUrl(cloudUrl, `/api/sites/${siteId}/indexing/diagnostics`), { headers }),
146
- indexingUrls: (siteId, params) => cloudFetch(buildUrl(cloudUrl, `/api/sites/${siteId}/indexing/urls`, params), { headers }),
147
- indexPercent: (siteId, params) => cloudFetch(buildUrl(cloudUrl, `/api/sites/${siteId}/index-percent`, params), { headers }),
148
- sitemapAction: (siteId, body) => cloudFetch(buildUrl(cloudUrl, `/api/sites/${siteId}/sitemaps`), {
149
- method: "POST",
150
- headers,
151
- body: JSON.stringify(body)
152
- }),
153
- triggerSync: (siteId) => cloudFetch(buildUrl(cloudUrl, `/api/sites/${siteId}/sync`), {
154
- method: "POST",
155
- headers
156
- }),
157
- deleteSite: (siteId) => cloudFetch(buildUrl(cloudUrl, `/api/sites/${siteId}`), {
158
- method: "DELETE",
159
- headers
160
- }),
161
- bulkRegister: (siteUrls) => cloudFetch(buildUrl(cloudUrl, "/api/sites/bulk-register"), {
162
- method: "POST",
163
- headers,
164
- body: JSON.stringify({ siteUrls })
165
- }),
166
- analysisPost: (siteId, body) => cloudFetch(buildUrl(cloudUrl, `/api/cli/sites/${siteId}/analysis`), {
167
- method: "POST",
168
- headers,
169
- body: JSON.stringify(body)
170
- }),
171
- detail: (siteId, body) => cloudFetch(buildUrl(cloudUrl, `/api/cli/sites/${siteId}/detail`), {
172
- method: "POST",
173
- headers,
174
- body: JSON.stringify(body)
175
- })
176
- };
177
- }
178
-
179
- //#endregion
180
- //#region src/config.ts
181
- let configDir = path.join(os.homedir(), ".config", "gscdump");
182
- function getConfigDir() {
183
- return configDir;
184
- }
185
- const DEFAULT_CLOUD_URL = "https://cloud.gscdump.com";
186
- async function loadConfig() {
187
- return fs.readFile(path.join(configDir, "config.json"), "utf-8").then((data) => JSON.parse(data)).catch(() => ({}));
188
- }
189
- async function saveConfig(config) {
190
- await fs.mkdir(configDir, {
191
- recursive: true,
192
- mode: 448
193
- });
194
- await fs.writeFile(path.join(configDir, "config.json"), JSON.stringify(config, null, 2), { mode: 384 });
195
- }
196
- function getConfigPath() {
197
- return path.join(configDir, "config.json");
198
- }
199
-
200
- //#endregion
201
- //#region src/auth.ts
180
+ const REDIRECT_URI_RE = /redirect_uri=[^&]+/;
202
181
  function getTokensPath() {
203
182
  return path.join(getConfigDir(), "tokens.json");
204
183
  }
@@ -217,8 +196,8 @@ async function clearTokens() {
217
196
  logger.success("Logged out, tokens cleared");
218
197
  }
219
198
  async function getAuthCredentials(interactive) {
220
- const envClientId = process$1.env.GOOGLE_CLIENT_ID;
221
- const envClientSecret = process$1.env.GOOGLE_CLIENT_SECRET;
199
+ const envClientId = process.env.GOOGLE_CLIENT_ID;
200
+ const envClientSecret = process.env.GOOGLE_CLIENT_SECRET;
222
201
  if (envClientId && envClientSecret) {
223
202
  logger.success("Using OAuth2 credentials from environment");
224
203
  return {
@@ -233,7 +212,7 @@ async function getAuthCredentials(interactive) {
233
212
  };
234
213
  if (!interactive) {
235
214
  logger.error("GOOGLE_CLIENT_ID and GOOGLE_CLIENT_SECRET required for non-interactive mode");
236
- process$1.exit(1);
215
+ process.exit(1);
237
216
  }
238
217
  console.log();
239
218
  console.log(" \x1B[1mOAuth 2.0 Setup Required\x1B[0m");
@@ -250,12 +229,12 @@ async function getAuthCredentials(interactive) {
250
229
  placeholder: "your-client-id.googleusercontent.com",
251
230
  validate: (v) => v ? void 0 : "Required"
252
231
  });
253
- if (isCancel(clientIdResult)) process$1.exit(1);
232
+ if (isCancel(clientIdResult)) process.exit(1);
254
233
  const clientSecretResult = await text({
255
234
  message: "Enter your Google OAuth Client Secret:",
256
235
  validate: (v) => v ? void 0 : "Required"
257
236
  });
258
- if (isCancel(clientSecretResult)) process$1.exit(1);
237
+ if (isCancel(clientSecretResult)) process.exit(1);
259
238
  console.log();
260
239
  logger.info("Tip: Set GOOGLE_CLIENT_ID and GOOGLE_CLIENT_SECRET env vars to skip prompts");
261
240
  return {
@@ -297,7 +276,7 @@ async function getAuthCodeViaLoopback(authUrl) {
297
276
  return;
298
277
  }
299
278
  resolvedRedirectUri = `http://127.0.0.1:${addr.port}`;
300
- const fullAuthUrl = authUrl.replace(/redirect_uri=[^&]+/, `redirect_uri=${encodeURIComponent(resolvedRedirectUri)}`);
279
+ const fullAuthUrl = authUrl.replace(REDIRECT_URI_RE, `redirect_uri=${encodeURIComponent(resolvedRedirectUri)}`);
301
280
  console.log();
302
281
  console.log(" \x1B[1mOpening browser for authorization...\x1B[0m");
303
282
  console.log(` \x1B[90mIf browser doesn't open, visit:\x1B[0m`);
@@ -316,8 +295,8 @@ async function getAuthCodeViaLoopback(authUrl) {
316
295
  }
317
296
  async function authenticate(credentials, interactive) {
318
297
  const oauth2Client = new OAuth2Client(credentials.clientId, credentials.clientSecret, "http://127.0.0.1");
319
- const envAccessToken = process$1.env.GOOGLE_ACCESS_TOKEN;
320
- const envRefreshToken = process$1.env.GOOGLE_REFRESH_TOKEN;
298
+ const envAccessToken = process.env.GOOGLE_ACCESS_TOKEN;
299
+ const envRefreshToken = process.env.GOOGLE_REFRESH_TOKEN;
321
300
  if (envAccessToken || envRefreshToken) {
322
301
  oauth2Client.setCredentials({
323
302
  access_token: envAccessToken,
@@ -347,7 +326,7 @@ async function authenticate(credentials, interactive) {
347
326
  }
348
327
  if (!interactive) {
349
328
  logger.error("No saved tokens. Run interactively first to authenticate.");
350
- process$1.exit(1);
329
+ process.exit(1);
351
330
  }
352
331
  const authUrl = oauth2Client.generateAuthUrl({
353
332
  access_type: "offline",
@@ -362,207 +341,73 @@ async function authenticate(credentials, interactive) {
362
341
  logger.success(`Tokens saved to ${getTokensPath()}`);
363
342
  return oauth2Client;
364
343
  }
365
- function getCloudTokensPath() {
366
- return path.join(getConfigDir(), "cloud-tokens.json");
367
- }
368
- async function loadCloudTokens() {
369
- return fs.readFile(getCloudTokensPath(), "utf-8").then((data) => JSON.parse(data)).catch(() => null);
370
- }
371
- async function saveCloudTokens(tokens) {
372
- await fs.mkdir(getConfigDir(), {
373
- recursive: true,
374
- mode: 448
375
- });
376
- await fs.writeFile(getCloudTokensPath(), JSON.stringify(tokens, null, 2), { mode: 384 });
377
- }
378
- async function clearCloudTokens() {
379
- await fs.rm(getCloudTokensPath()).catch(() => {});
380
- logger.success("Logged out from cloud");
381
- }
382
- async function authenticateCloud(cloudUrl, interactive) {
383
- const existingTokens = await loadCloudTokens();
384
- if (existingTokens) {
385
- const oauth2Client = new OAuth2Client();
386
- oauth2Client.setCredentials({
387
- access_token: existingTokens.accessToken,
388
- refresh_token: existingTokens.refreshToken,
389
- expiry_date: existingTokens.expiresAt
390
- });
391
- logger.success("Using cloud credentials");
392
- return oauth2Client;
393
- }
394
- if (!interactive) {
395
- logger.error("No cloud tokens. Run gscdump init to authenticate.");
396
- process$1.exit(1);
397
- }
398
- const initRes = await fetch(`${cloudUrl}/api/cli/auth/init`, { method: "POST" }).then((r) => r.json()).catch((e) => {
399
- logger.error(`Failed to connect to ${cloudUrl}: ${e.message}`);
400
- process$1.exit(1);
401
- });
402
- console.log();
403
- console.log(" \x1B[1mOpen this URL in your browser:\x1B[0m");
404
- console.log(` \x1B[36m${initRes.authUrl}\x1B[0m`);
405
- console.log();
406
- console.log(` \x1B[90mCode: ${initRes.code}\x1B[0m`);
407
- console.log();
408
- logger.info("Waiting for authorization...");
409
- const pollInterval = 2e3;
410
- const maxAttempts = Math.ceil(initRes.expiresIn * 1e3 / pollInterval);
411
- for (let i = 0; i < maxAttempts; i++) {
412
- await new Promise((r) => setTimeout(r, pollInterval));
413
- const pollRes = await fetch(`${cloudUrl}/api/cli/auth/poll?code=${initRes.code}`).then((r) => r.json()).catch(() => ({ status: "error" }));
414
- if (pollRes.status === "complete" && pollRes.tokens) {
415
- await saveCloudTokens({
416
- ...pollRes.tokens,
417
- sessionId: pollRes.sessionId,
418
- user: pollRes.user
419
- });
420
- if (pollRes.user?.email) logger.success(`Authenticated as ${pollRes.user.email}`);
421
- else logger.success("Authenticated via cloud.gscdump.com");
422
- const oauth2Client = new OAuth2Client();
423
- oauth2Client.setCredentials({
424
- access_token: pollRes.tokens.accessToken,
425
- refresh_token: pollRes.tokens.refreshToken,
426
- expiry_date: pollRes.tokens.expiresAt
427
- });
428
- return oauth2Client;
429
- }
430
- if (pollRes.status === "error") {
431
- logger.error("Authorization failed");
432
- process$1.exit(1);
433
- }
434
- }
435
- logger.error("Authorization timed out");
436
- process$1.exit(1);
437
- }
438
344
  async function getAuth(opts = {}) {
439
- const { interactive = true, config: providedConfig } = opts;
440
- const config = providedConfig || await loadConfig();
441
- if (!config.mode) {
442
- if (!interactive) {
443
- logger.error("Not configured. Run gscdump init first.");
444
- process$1.exit(1);
445
- }
446
- logger.warn("GSCDump not configured");
447
- logger.info("Run: gscdump init");
448
- process$1.exit(1);
449
- }
450
- if (config.mode === "cloud") return authenticateCloud(config.cloudUrl || DEFAULT_CLOUD_URL, interactive);
345
+ const { interactive = true } = opts;
451
346
  return authenticate(await getAuthCredentials(interactive), interactive);
452
347
  }
453
- async function getCloudClient() {
454
- const config = await loadConfig();
455
- if (config.mode !== "cloud") return null;
456
- const tokens = await loadCloudTokens();
457
- if (!tokens?.sessionId) return null;
458
- return createCloudClient(config.cloudUrl || DEFAULT_CLOUD_URL, tokens.sessionId);
348
+ function createLocalStore(opts) {
349
+ return createNodeHarness(opts);
459
350
  }
460
-
461
- //#endregion
462
- //#region src/commands/analysis.ts
463
- const ANALYSIS_TOOLS = [
464
- "striking-distance",
465
- "opportunity",
466
- "movers",
467
- "decay",
468
- "zero-click",
469
- "brand",
470
- "cannibalization",
471
- "clustering",
472
- "concentration",
473
- "seasonality"
474
- ];
475
- async function resolveSiteId(cloud, siteUrl) {
351
+ async function createCommandContext(opts = {}) {
352
+ const { needsAuth = false, needsStore = false, interactive = false } = opts;
476
353
  const config = await loadConfig();
477
- const target = siteUrl || config.defaultSite;
478
- const me = await cloud.me().catch((e) => {
479
- logger.error(`Failed to fetch profile: ${e.message}`);
480
- process$1.exit(1);
481
- });
482
- if (me.sites.length === 0) {
483
- logger.error("No registered sites. Run gscdump register first.");
484
- process$1.exit(1);
485
- }
486
- const match = target ? me.sites.find((s) => s.siteUrl === target || s.siteUrl.includes(target)) : void 0;
487
- if (match) return match.siteId;
488
- if (me.sites.length === 1) return me.sites[0].siteId;
489
- const selected = await select({
490
- message: "Select a site",
491
- options: me.sites.map((s) => ({
492
- value: s.siteId,
493
- label: s.siteUrl
494
- }))
495
- });
496
- if (isCancel(selected)) {
497
- cancel("Cancelled");
498
- process$1.exit(0);
499
- }
500
- return selected;
501
- }
502
- function extractResults(data) {
503
- if (Array.isArray(data.results)) return {
504
- results: data.results,
505
- total: data.meta?.total ?? data.results.length
506
- };
507
- if (Array.isArray(data.keywords)) return {
508
- results: data.keywords,
509
- total: data.totalCount ?? data.keywords.length
354
+ const auth = needsAuth ? await getAuth({
355
+ interactive,
356
+ config
357
+ }) : null;
358
+ const client = auth ? googleSearchConsole(auth) : null;
359
+ const store = needsStore ? createLocalStore({ dataDir: resolveDataDir(config) }) : null;
360
+ const loadSites = async () => {
361
+ if (!client) throw new Error("loadSites requires needsAuth: true");
362
+ return (await client.sites().catch((e) => {
363
+ logger.error(`Failed to fetch sites: ${e.message}`);
364
+ process.exit(1);
365
+ })).filter((s) => s.siteUrl && s.permissionLevel !== "siteUnverifiedUser").map((s) => ({
366
+ siteUrl: s.siteUrl,
367
+ permissionLevel: s.permissionLevel || "unknown"
368
+ }));
510
369
  };
511
- if (Array.isArray(data.clusters)) {
512
- const clusters = data.clusters;
513
- return {
514
- results: clusters,
515
- total: data.meta?.totalClusters ?? clusters.length
516
- };
517
- }
518
- if (Array.isArray(data.rising)) {
519
- const rows = [...data.rising.map((r) => ({
520
- ...r,
521
- direction: "rising"
522
- })), ...(data.declining || []).map((r) => ({
523
- ...r,
524
- direction: "declining"
525
- }))];
526
- return {
527
- results: rows,
528
- total: rows.length
529
- };
530
- }
531
- if (Array.isArray(data.brand)) {
532
- const rows = [...data.brand.map((r) => ({
533
- ...r,
534
- segment: "brand"
535
- })), ...(data.nonBrand || []).map((r) => ({
536
- ...r,
537
- segment: "non-brand"
538
- }))];
539
- return {
540
- results: rows,
541
- total: rows.length
542
- };
543
- }
544
- if (Array.isArray(data.monthlyBreakdown)) return {
545
- results: data.monthlyBreakdown,
546
- total: data.monthlyBreakdown.length
370
+ const resolveSite = async (target) => {
371
+ const hint = target ?? config.defaultSite;
372
+ const sites = await loadSites();
373
+ if (sites.length === 0) {
374
+ logger.error("No verified sites found");
375
+ process.exit(1);
376
+ }
377
+ if (hint) {
378
+ const match = sites.find((s) => s.siteUrl === hint || s.siteUrl.includes(hint));
379
+ if (match) return match.siteUrl;
380
+ }
381
+ if (sites.length === 1) return sites[0].siteUrl;
382
+ const selected = await select({
383
+ message: "Select a site",
384
+ options: sites.map((s) => ({
385
+ value: s.siteUrl,
386
+ label: s.siteUrl
387
+ }))
388
+ });
389
+ if (isCancel(selected)) {
390
+ cancel("Cancelled");
391
+ process.exit(0);
392
+ }
393
+ return selected;
547
394
  };
548
- if (data.giniCoefficient !== void 0) {
549
- const { meta: _m, ...rest } = data;
550
- return {
551
- results: [rest],
552
- total: 1
553
- };
554
- }
555
395
  return {
556
- results: [],
557
- total: 0
396
+ config,
397
+ auth,
398
+ client,
399
+ store,
400
+ loadSites,
401
+ resolveSite
558
402
  };
559
403
  }
404
+ const ANALYSIS_TOOLS = defaultAnalyzerRegistry.listAnalyzerIds();
560
405
  const TOOL_EXTRA_ARGS = {
561
- "brand": { "brand-terms": {
406
+ brand: { "brand-terms": {
562
407
  type: "string",
563
408
  description: "Comma-separated brand terms (required)"
564
409
  } },
565
- "movers": {
410
+ movers: {
566
411
  "prev-start": {
567
412
  type: "string",
568
413
  description: "Previous period start date (required)"
@@ -572,7 +417,7 @@ const TOOL_EXTRA_ARGS = {
572
417
  description: "Previous period end date (required)"
573
418
  }
574
419
  },
575
- "decay": {
420
+ decay: {
576
421
  "prev-start": {
577
422
  type: "string",
578
423
  description: "Previous period start date (required)"
@@ -582,33 +427,49 @@ const TOOL_EXTRA_ARGS = {
582
427
  description: "Previous period end date (required)"
583
428
  }
584
429
  },
585
- "concentration": { dimension: {
430
+ concentration: { dimension: {
586
431
  type: "string",
587
432
  description: "Dimension: pages or keywords (default: pages)"
588
433
  } },
589
- "seasonality": { metric: {
434
+ seasonality: { metric: {
590
435
  type: "string",
591
436
  description: "Metric: clicks or impressions (default: clicks)"
592
437
  } },
593
- "clustering": { "cluster-by": {
438
+ clustering: { "cluster-by": {
594
439
  type: "string",
595
440
  description: "Cluster by: prefix, intent, or both (default: both)"
596
- } }
441
+ } },
442
+ trends: {
443
+ "dimension": {
444
+ type: "string",
445
+ description: "Dimension: pages or keywords (default: pages)"
446
+ },
447
+ "weeks": {
448
+ type: "string",
449
+ description: "Rolling window size in weeks (default: 28)"
450
+ },
451
+ "min-weeks": {
452
+ type: "string",
453
+ description: "Minimum weeks with data to include an entity (default: weeks/4)"
454
+ }
455
+ }
597
456
  };
598
- function buildBody(tool, args) {
599
- const body = {
457
+ function buildParams(tool, args) {
458
+ const params = {
600
459
  type: tool,
601
460
  startDate: args.start ? String(args.start) : void 0,
602
461
  endDate: args.end ? String(args.end) : void 0,
603
462
  limit: args.limit ? Number(args.limit) : void 0
604
463
  };
605
- if (args["brand-terms"]) body.brandTerms = String(args["brand-terms"]).split(",").map((t) => t.trim()).filter(Boolean);
606
- if (args["prev-start"]) body.prevStartDate = String(args["prev-start"]);
607
- if (args["prev-end"]) body.prevEndDate = String(args["prev-end"]);
608
- if (args.dimension) body.dimension = String(args.dimension);
609
- if (args.metric) body.metric = String(args.metric);
610
- if (args["cluster-by"]) body.clusterBy = String(args["cluster-by"]);
611
- return body;
464
+ if (args["brand-terms"]) params.brandTerms = String(args["brand-terms"]).split(",").map((t) => t.trim()).filter(Boolean);
465
+ if (args["prev-start"]) params.prevStartDate = String(args["prev-start"]);
466
+ if (args["prev-end"]) params.prevEndDate = String(args["prev-end"]);
467
+ if (args.dimension) params.dimension = String(args.dimension);
468
+ if (args.metric) params.metric = String(args.metric);
469
+ if (args["cluster-by"]) params.clusterBy = String(args["cluster-by"]);
470
+ if (args.weeks) params.weeks = Number(args.weeks);
471
+ if (args["min-weeks"]) params.minWeeksWithData = Number(args["min-weeks"]);
472
+ return params;
612
473
  }
613
474
  function makeToolCommand(tool) {
614
475
  const extraArgs = TOOL_EXTRA_ARGS[tool] || {};
@@ -648,129 +509,232 @@ function makeToolCommand(tool) {
648
509
  default: false,
649
510
  description: "Output as JSON"
650
511
  },
512
+ live: {
513
+ type: "boolean",
514
+ default: false,
515
+ description: "Force live GSC API; bypass local Parquet store"
516
+ },
651
517
  ...extraArgs
652
518
  },
653
519
  async run({ args }) {
654
- const cloud = await getCloudClient();
655
- if (!cloud) {
656
- logger.error("Analysis requires cloud mode. Run gscdump init to set up cloud mode.");
657
- process$1.exit(1);
658
- }
659
- const siteId = await resolveSiteId(cloud, args.site);
660
- logger.info(`Running ${tool} analysis...`);
661
- const body = buildBody(tool, args);
662
- const data = await cloud.analysisPost(siteId, body).catch((e) => {
663
- logger.error(`Analysis failed: ${e.message}`);
664
- process$1.exit(1);
520
+ const ctx = await createCommandContext({
521
+ needsAuth: true,
522
+ needsStore: !args.live
665
523
  });
524
+ const siteUrl = await ctx.resolveSite(args.site);
525
+ logger.info(`Running ${tool} analysis...`);
526
+ const params = buildParams(tool, args);
666
527
  const format = args.json ? "json" : String(args.format);
667
- if (format === "json") {
668
- console.log(JSON.stringify(data, null, 2));
669
- return;
670
- }
671
- const { results, total } = extractResults(data);
672
- if (format === "csv" && results.length > 0) {
673
- const cols$1 = Object.keys(results[0]);
674
- console.log(toCSV(results, cols$1));
528
+ if (!args.live) {
529
+ const store = ctx.store;
530
+ if (!await hasLocalData(store, siteUrl).catch(() => false)) {
531
+ logger.error(`No local data for ${siteUrl}. Run \`gscdump sync\` first, or pass --live.`);
532
+ process.exit(1);
533
+ }
534
+ const localResult = await runLocalAnalysis(store, siteUrl, params).catch((e) => {
535
+ if (e instanceof LocalStoreUnsupportedError) {
536
+ logger.error(`${e.message}. Pass --live to run against the GSC API.`);
537
+ process.exit(1);
538
+ }
539
+ if (e instanceof LocalStoreEmptyError) {
540
+ logger.error(`${e.message}`);
541
+ process.exit(1);
542
+ }
543
+ logger.error(`Local analysis failed: ${e.message}`);
544
+ process.exit(1);
545
+ });
546
+ if (format === "json") {
547
+ console.log(JSON.stringify(localResult, null, 2));
548
+ return;
549
+ }
550
+ renderResults(localResult.results, localResult.results.length, format);
675
551
  return;
676
552
  }
677
- if (results.length === 0) {
678
- logger.warn("No results found");
553
+ const result = await runLiveAnalysis(ctx.client, siteUrl, params).catch((e) => {
554
+ logger.error(`Analysis failed: ${e.message}`);
555
+ process.exit(1);
556
+ });
557
+ if (format === "json") {
558
+ console.log(JSON.stringify(result, null, 2));
679
559
  return;
680
560
  }
681
- const cols = Object.keys(results[0]);
682
- const widths = cols.map((c) => Math.max(c.length, ...results.map((r) => String(r[c] ?? "").length).slice(0, 20)));
683
- console.log();
684
- console.log(` ${cols.map((c, i) => c.padEnd(widths[i])).join(" ")}`);
685
- console.log(` ${cols.map((_, i) => "─".repeat(widths[i])).join(" ")}`);
686
- for (const row of results) console.log(` ${cols.map((c, i) => {
687
- const val = row[c];
688
- return (typeof val === "number" ? Number.isInteger(val) ? String(val) : val.toFixed(2) : String(val ?? "")).padEnd(widths[i]);
689
- }).join(" ")}`);
690
- console.log();
691
- logger.success(`${results.length} results`);
692
- if (total > results.length) logger.info(`Total: ${total} (showing ${results.length})`);
561
+ renderResults(result.results, result.results.length, format);
693
562
  }
694
563
  });
695
564
  }
696
- const analysisCommand = defineCommand({
697
- meta: {
698
- name: "analysis",
699
- description: "SEO analysis tools (cloud mode only)"
700
- },
701
- subCommands: Object.fromEntries(ANALYSIS_TOOLS.map((tool) => [tool, makeToolCommand(tool)]))
702
- });
703
-
704
- //#endregion
705
- //#region src/commands/auth.ts
706
- const statusCommand$2 = defineCommand({
707
- meta: {
708
- name: "status",
709
- description: "Show current authentication status"
710
- },
711
- async run() {
712
- const config = await loadConfig();
565
+ const SPARK_CHARS = [
566
+ "▁",
567
+ "",
568
+ "",
569
+ "▄",
570
+ "▅",
571
+ "▆",
572
+ "▇",
573
+ "█"
574
+ ];
575
+ const SPARK_GAP = "·";
576
+ const PERCENT_COLS = {
577
+ growthRatio: "ratio_to_pct",
578
+ brandShare: "direct",
579
+ topNConcentration: "direct",
580
+ declinePercent: "direct",
581
+ ctr: "direct",
582
+ share: "direct",
583
+ vsAverage: "ratio_to_pct",
584
+ clicksChangePercent: "scaled",
585
+ impressionsChangePercent: "scaled"
586
+ };
587
+ function formatPct(val, style) {
588
+ const pct = style === "ratio_to_pct" ? (val - 1) * 100 : style === "direct" ? val * 100 : val;
589
+ if (!Number.isFinite(pct)) return "";
590
+ return `${pct > 0 ? "+" : ""}${pct.toFixed(0)}%`;
591
+ }
592
+ function isTimeSeries(arr) {
593
+ if (arr.length === 0) return false;
594
+ const first = arr[0];
595
+ if (typeof first !== "object" || first === null) return false;
596
+ const keys = Object.keys(first);
597
+ const hasBucket = keys.includes("week") || keys.includes("date") || keys.includes("month");
598
+ const hasMetric = keys.includes("clicks") || keys.includes("impressions") || keys.includes("value");
599
+ return hasBucket && hasMetric;
600
+ }
601
+ function pickBucketKey(first) {
602
+ if ("week" in first) return "week";
603
+ if ("date" in first) return "date";
604
+ return "month";
605
+ }
606
+ function pickMetricKey(first) {
607
+ if ("clicks" in first) return "clicks";
608
+ if ("impressions" in first) return "impressions";
609
+ return "value";
610
+ }
611
+ function computeAlignedSparklines(results, col) {
612
+ const allBuckets = /* @__PURE__ */ new Set();
613
+ const perRow = [];
614
+ let bucketKey = "week";
615
+ let metricKey = "clicks";
616
+ for (const r of results) {
617
+ const arr = r[col];
618
+ if (!Array.isArray(arr) || !isTimeSeries(arr)) {
619
+ perRow.push(null);
620
+ continue;
621
+ }
622
+ const first = arr[0];
623
+ bucketKey = pickBucketKey(first);
624
+ metricKey = pickMetricKey(first);
625
+ const m = /* @__PURE__ */ new Map();
626
+ for (const item of arr) {
627
+ const rec = item;
628
+ const key = String(rec[bucketKey]);
629
+ const val = Number(rec[metricKey] ?? 0);
630
+ allBuckets.add(key);
631
+ m.set(key, val);
632
+ }
633
+ perRow.push(m);
634
+ }
635
+ const sorted = [...allBuckets].sort();
636
+ return perRow.map((m) => {
637
+ if (!m) return "";
638
+ const values = sorted.map((b) => m.has(b) ? m.get(b) : null);
639
+ const nonNull = values.filter((v) => v != null);
640
+ if (nonNull.length === 0) return SPARK_GAP.repeat(values.length);
641
+ const min = Math.min(...nonNull);
642
+ const range = Math.max(...nonNull) - min;
643
+ return values.map((v) => {
644
+ if (v == null) return SPARK_GAP;
645
+ if (range === 0) return SPARK_CHARS[0];
646
+ return SPARK_CHARS[Math.round((v - min) / range * (SPARK_CHARS.length - 1))];
647
+ }).join("");
648
+ });
649
+ }
650
+ function classifyCol(col, values) {
651
+ const firstNonNull = values.find((v) => v != null);
652
+ if (firstNonNull == null) return "text";
653
+ if (Array.isArray(firstNonNull) && isTimeSeries(firstNonNull)) return "series";
654
+ if (col in PERCENT_COLS && values.every((v) => v == null || typeof v === "number")) return "pct";
655
+ if (values.every((v) => v == null || typeof v === "number")) return values.some((v) => typeof v === "number" && !Number.isInteger(v)) ? "float" : "int";
656
+ return "text";
657
+ }
658
+ function formatCellKinded(val, col, kind) {
659
+ if (val == null) return "";
660
+ if (kind === "int") return typeof val === "number" ? String(val) : String(val);
661
+ if (kind === "float") return typeof val === "number" ? val.toFixed(2) : String(val);
662
+ if (kind === "pct") return typeof val === "number" ? formatPct(val, PERCENT_COLS[col]) : String(val);
663
+ if (Array.isArray(val)) return `[${val.length} item${val.length === 1 ? "" : "s"}]`;
664
+ if (typeof val === "object") return JSON.stringify(val);
665
+ return String(val);
666
+ }
667
+ function computeRowSeriesSparkline(results) {
668
+ if (results.length < 2) return null;
669
+ const first = results[0];
670
+ const bucketKey = "week" in first ? "week" : "date" in first ? "date" : "month" in first ? "month" : null;
671
+ if (!bucketKey) return null;
672
+ const metricKey = "value" in first ? "value" : "clicks" in first ? "clicks" : "impressions" in first ? "impressions" : null;
673
+ if (!metricKey) return null;
674
+ for (const r of results) if (!(bucketKey in r) || !(metricKey in r)) return null;
675
+ const values = [...results].sort((a, b) => String(a[bucketKey]).localeCompare(String(b[bucketKey]))).map((r) => Number(r[metricKey] ?? 0));
676
+ const nonNull = values.filter((v) => Number.isFinite(v));
677
+ if (nonNull.length === 0) return null;
678
+ const min = Math.min(...nonNull);
679
+ const range = Math.max(...nonNull) - min;
680
+ return {
681
+ spark: values.map((v) => {
682
+ if (range === 0) return SPARK_CHARS[0];
683
+ return SPARK_CHARS[Math.round((v - min) / range * (SPARK_CHARS.length - 1))];
684
+ }).join(""),
685
+ label: `${results.length} ${bucketKey}${results.length === 1 ? "" : "s"} of ${metricKey}`
686
+ };
687
+ }
688
+ function renderResults(results, total, format) {
689
+ if (format === "csv" && results.length > 0) {
690
+ const cols = Object.keys(results[0]);
691
+ console.log(toCSV(results, cols));
692
+ return;
693
+ }
694
+ if (results.length === 0) {
695
+ logger.warn("No results found");
696
+ return;
697
+ }
698
+ const cols = Object.keys(results[0]);
699
+ const kinds = cols.map((c) => classifyCol(c, results.map((r) => r[c])));
700
+ const sparklineByCol = {};
701
+ cols.forEach((c, i) => {
702
+ if (kinds[i] === "series") sparklineByCol[c] = computeAlignedSparklines(results, c);
703
+ });
704
+ const cellText = (row, rowIdx, colIdx) => {
705
+ const c = cols[colIdx];
706
+ const k = kinds[colIdx];
707
+ if (k === "series") return sparklineByCol[c][rowIdx];
708
+ return formatCellKinded(row[c], c, k);
709
+ };
710
+ const widths = cols.map((c, i) => {
711
+ let w = c.length;
712
+ const limit = Math.min(results.length, 20);
713
+ for (let j = 0; j < limit; j++) {
714
+ const len = cellText(results[j], j, i).length;
715
+ if (len > w) w = len;
716
+ }
717
+ return w;
718
+ });
719
+ console.log();
720
+ console.log(` ${cols.map((c, i) => c.padEnd(widths[i])).join(" ")}`);
721
+ console.log(` ${cols.map((_, i) => "─".repeat(widths[i])).join(" ")}`);
722
+ for (let r = 0; r < results.length; r++) console.log(` ${cols.map((_, i) => cellText(results[r], r, i).padEnd(widths[i])).join(" ")}`);
723
+ const rowSeriesSparkline = computeRowSeriesSparkline(results);
724
+ if (rowSeriesSparkline) {
713
725
  console.log();
714
- console.log(` Mode: ${config.mode ? `\x1B[36m${config.mode}\x1B[0m` : "\x1B[33mnot configured\x1B[0m"}`);
715
- if (!config.mode) {
716
- logger.info("Run gscdump init to configure");
717
- return;
718
- }
719
- if (config.mode === "cloud") {
720
- console.log(` Cloud: \x1B[36m${config.cloudUrl}\x1B[0m`);
721
- const tokens = await loadCloudTokens();
722
- if (!tokens) {
723
- logger.warn("Not authenticated");
724
- logger.info("Run gscdump init --force to re-authenticate");
725
- return;
726
- }
727
- const hasSession = !!tokens.sessionId;
728
- const hasAccess = !!tokens.accessToken;
729
- const hasRefresh = !!tokens.refreshToken;
730
- const expiry = tokens.expiresAt ? new Date(tokens.expiresAt) : null;
731
- const isExpired = expiry && expiry < /* @__PURE__ */ new Date();
732
- logger.success("Authenticated");
733
- console.log();
734
- if (tokens.user?.email) console.log(` User: \x1B[36m${tokens.user.email}\x1B[0m`);
735
- if (tokens.user?.publicId) console.log(` User ID: \x1B[90m${tokens.user.publicId}\x1B[0m`);
736
- console.log(` Session: ${hasSession ? "\x1B[32mactive\x1B[0m" : "\x1B[31mmissing\x1B[0m"}`);
737
- console.log(` Access token: ${hasAccess ? "\x1B[32mpresent\x1B[0m" : "\x1B[31mmissing\x1B[0m"}`);
738
- console.log(` Refresh token: ${hasRefresh ? "\x1B[32mpresent\x1B[0m" : "\x1B[31mmissing\x1B[0m"}`);
739
- if (expiry) {
740
- const status = isExpired ? "\x1B[33mexpired\x1B[0m" : "\x1B[32mvalid\x1B[0m";
741
- console.log(` Expires: ${expiry.toISOString()} (${status})`);
742
- }
743
- } else {
744
- const tokens = await loadTokens();
745
- if (!tokens) {
746
- logger.warn("Not authenticated");
747
- logger.info("Run gscdump init --force to re-authenticate");
748
- return;
749
- }
750
- const hasAccess = !!tokens.access_token;
751
- const hasRefresh = !!tokens.refresh_token;
752
- const expiry = tokens.expiry_date ? new Date(tokens.expiry_date) : null;
753
- const isExpired = expiry && expiry < /* @__PURE__ */ new Date();
754
- logger.success("Authenticated");
755
- console.log();
756
- console.log(` Access token: ${hasAccess ? "\x1B[32mpresent\x1B[0m" : "\x1B[31mmissing\x1B[0m"}`);
757
- console.log(` Refresh token: ${hasRefresh ? "\x1B[32mpresent\x1B[0m" : "\x1B[31mmissing\x1B[0m"}`);
758
- if (expiry) {
759
- const status = isExpired ? "\x1B[33mexpired\x1B[0m" : "\x1B[32mvalid\x1B[0m";
760
- console.log(` Expires: ${expiry.toISOString()} (${status})`);
761
- }
762
- }
726
+ console.log(` trend: ${rowSeriesSparkline.spark} (${rowSeriesSparkline.label})`);
763
727
  }
764
- });
765
- const logoutCommand = defineCommand({
728
+ console.log();
729
+ logger.success(`${results.length} results`);
730
+ if (total > results.length) logger.info(`Total: ${total} (showing ${results.length})`);
731
+ }
732
+ const analyzeCommand = defineCommand({
766
733
  meta: {
767
- name: "logout",
768
- description: "Clear stored OAuth tokens"
734
+ name: "analyze",
735
+ description: "SEO analysis tools"
769
736
  },
770
- async run() {
771
- if ((await loadConfig()).mode === "cloud") await clearCloudTokens();
772
- else await clearTokens();
773
- }
737
+ subCommands: Object.fromEntries(ANALYSIS_TOOLS.map((tool) => [tool, makeToolCommand(tool)]))
774
738
  });
775
739
  const authCommand = defineCommand({
776
740
  meta: {
@@ -778,162 +742,134 @@ const authCommand = defineCommand({
778
742
  description: "Manage authentication"
779
743
  },
780
744
  subCommands: {
781
- status: statusCommand$2,
782
- logout: logoutCommand
745
+ status: defineCommand({
746
+ meta: {
747
+ name: "status",
748
+ description: "Show current authentication status"
749
+ },
750
+ async run() {
751
+ const tokens = await loadTokens();
752
+ if (!tokens) {
753
+ logger.warn("Not authenticated");
754
+ logger.info("Run gscdump init to authenticate");
755
+ return;
756
+ }
757
+ const hasAccess = !!tokens.access_token;
758
+ const hasRefresh = !!tokens.refresh_token;
759
+ const expiry = tokens.expiry_date ? new Date(tokens.expiry_date) : null;
760
+ const isExpired = expiry && expiry < /* @__PURE__ */ new Date();
761
+ logger.success("Authenticated");
762
+ console.log();
763
+ console.log(` Access token: ${hasAccess ? "\x1B[32mpresent\x1B[0m" : "\x1B[31mmissing\x1B[0m"}`);
764
+ console.log(` Refresh token: ${hasRefresh ? "\x1B[32mpresent\x1B[0m" : "\x1B[31mmissing\x1B[0m"}`);
765
+ if (expiry) {
766
+ const status = isExpired ? "\x1B[33mexpired\x1B[0m" : "\x1B[32mvalid\x1B[0m";
767
+ console.log(` Expires: ${expiry.toISOString()} (${status})`);
768
+ }
769
+ }
770
+ }),
771
+ logout: defineCommand({
772
+ meta: {
773
+ name: "logout",
774
+ description: "Clear stored OAuth tokens"
775
+ },
776
+ async run() {
777
+ await clearTokens();
778
+ }
779
+ })
783
780
  }
784
781
  });
785
-
786
- //#endregion
787
- //#region src/commands/config.ts
788
- const showCommand = defineCommand({
782
+ const configCommand = defineCommand({
789
783
  meta: {
790
- name: "show",
791
- description: "Show current config"
784
+ name: "config",
785
+ description: "Manage configuration"
792
786
  },
793
- async run() {
794
- const config = await loadConfig();
795
- const configPath = getConfigPath();
796
- logger.info(`Config: ${configPath}`);
797
- console.log();
798
- if (Object.keys(config).length === 0) {
799
- logger.warn("No config set");
800
- return;
801
- }
802
- console.log(JSON.stringify(config, null, 2));
787
+ subCommands: {
788
+ show: defineCommand({
789
+ meta: {
790
+ name: "show",
791
+ description: "Show current config"
792
+ },
793
+ async run() {
794
+ const config = await loadConfig();
795
+ const configPath = getConfigPath();
796
+ logger.info(`Config: ${configPath}`);
797
+ console.log();
798
+ if (Object.keys(config).length === 0) {
799
+ logger.warn("No config set");
800
+ return;
801
+ }
802
+ console.log(JSON.stringify(config, null, 2));
803
+ }
804
+ }),
805
+ set: defineCommand({
806
+ meta: {
807
+ name: "set",
808
+ description: "Set a config value"
809
+ },
810
+ args: {
811
+ key: {
812
+ type: "positional",
813
+ description: "Config key (defaultSite, defaultPeriod, defaultFormat, defaultDb)",
814
+ required: true
815
+ },
816
+ value: {
817
+ type: "positional",
818
+ description: "Value to set",
819
+ required: true
820
+ }
821
+ },
822
+ async run({ args }) {
823
+ const validKeys = [
824
+ "defaultSite",
825
+ "defaultPeriod",
826
+ "defaultFormat",
827
+ "defaultDb"
828
+ ];
829
+ if (!validKeys.includes(args.key)) {
830
+ logger.error(`Invalid key: ${args.key}`);
831
+ logger.info(`Valid keys: ${validKeys.join(", ")}`);
832
+ process.exit(1);
833
+ }
834
+ const config = await loadConfig();
835
+ config[args.key] = args.value;
836
+ await saveConfig(config);
837
+ logger.success(`Set ${args.key} = ${args.value}`);
838
+ }
839
+ }),
840
+ unset: defineCommand({
841
+ meta: {
842
+ name: "unset",
843
+ description: "Remove a config value"
844
+ },
845
+ args: { key: {
846
+ type: "positional",
847
+ description: "Config key to remove",
848
+ required: true
849
+ } },
850
+ async run({ args }) {
851
+ const config = await loadConfig();
852
+ delete config[args.key];
853
+ await saveConfig(config);
854
+ logger.success(`Removed ${args.key}`);
855
+ }
856
+ }),
857
+ path: defineCommand({
858
+ meta: {
859
+ name: "path",
860
+ description: "Show config file path"
861
+ },
862
+ run() {
863
+ console.log(getConfigPath());
864
+ }
865
+ })
803
866
  }
804
867
  });
805
- const setCommand = defineCommand({
868
+ const DEFAULT_OUT = "./gscdump-export";
869
+ const dumpCommand = defineCommand({
806
870
  meta: {
807
- name: "set",
808
- description: "Set a config value"
809
- },
810
- args: {
811
- key: {
812
- type: "positional",
813
- description: "Config key (defaultSite, defaultPeriod, defaultFormat, defaultDb)",
814
- required: true
815
- },
816
- value: {
817
- type: "positional",
818
- description: "Value to set",
819
- required: true
820
- }
821
- },
822
- async run({ args }) {
823
- const validKeys = [
824
- "defaultSite",
825
- "defaultPeriod",
826
- "defaultFormat",
827
- "defaultDb"
828
- ];
829
- if (!validKeys.includes(args.key)) {
830
- logger.error(`Invalid key: ${args.key}`);
831
- logger.info(`Valid keys: ${validKeys.join(", ")}`);
832
- process$1.exit(1);
833
- }
834
- const config = await loadConfig();
835
- config[args.key] = args.value;
836
- await saveConfig(config);
837
- logger.success(`Set ${args.key} = ${args.value}`);
838
- }
839
- });
840
- const unsetCommand = defineCommand({
841
- meta: {
842
- name: "unset",
843
- description: "Remove a config value"
844
- },
845
- args: { key: {
846
- type: "positional",
847
- description: "Config key to remove",
848
- required: true
849
- } },
850
- async run({ args }) {
851
- const config = await loadConfig();
852
- delete config[args.key];
853
- await saveConfig(config);
854
- logger.success(`Removed ${args.key}`);
855
- }
856
- });
857
- const pathCommand = defineCommand({
858
- meta: {
859
- name: "path",
860
- description: "Show config file path"
861
- },
862
- run() {
863
- console.log(getConfigPath());
864
- }
865
- });
866
- const configCommand = defineCommand({
867
- meta: {
868
- name: "config",
869
- description: "Manage configuration"
870
- },
871
- subCommands: {
872
- show: showCommand,
873
- set: setCommand,
874
- unset: unsetCommand,
875
- path: pathCommand
876
- }
877
- });
878
-
879
- //#endregion
880
- //#region src/commands/dump.ts
881
- const DUMP_DATA_TYPES = [
882
- "pages",
883
- "keywords",
884
- "countries",
885
- "devices"
886
- ];
887
- function getDimensions(dataType) {
888
- switch (dataType) {
889
- case "pages": return [page, date];
890
- case "keywords": return [query, date];
891
- case "countries": return [country, date];
892
- case "devices": return [device, date];
893
- }
894
- }
895
- function getDimensionNames(dataType) {
896
- switch (dataType) {
897
- case "pages": return "page,date";
898
- case "keywords": return "query,date";
899
- case "countries": return "country,date";
900
- case "devices": return "device,date";
901
- }
902
- }
903
- async function resolveCloudSite$3(cloud, target) {
904
- const me = await cloud.me().catch((e) => {
905
- logger.error(`Failed to fetch sites: ${e.message}`);
906
- process$1.exit(1);
907
- });
908
- if (me.sites.length === 0) {
909
- logger.error("No registered sites. Run gscdump register first.");
910
- process$1.exit(1);
911
- }
912
- let site = target ? me.sites.find((s) => s.siteUrl === target || s.siteUrl.includes(target)) : void 0;
913
- if (!site) if (me.sites.length === 1) site = me.sites[0];
914
- else {
915
- const selected = await select({
916
- message: "Select a site",
917
- options: me.sites.map((s) => ({
918
- value: s.siteId,
919
- label: s.siteUrl
920
- }))
921
- });
922
- if (isCancel(selected)) {
923
- cancel("Cancelled");
924
- process$1.exit(0);
925
- }
926
- site = me.sites.find((s) => s.siteId === selected);
927
- }
928
- return {
929
- siteId: site.siteId,
930
- siteUrl: site.siteUrl
931
- };
932
- }
933
- const dumpCommand = defineCommand({
934
- meta: {
935
- name: "dump",
936
- description: "Export search analytics data via GSC API"
871
+ name: "dump",
872
+ description: "Export live Parquet files from the local store to a directory"
937
873
  },
938
874
  args: {
939
875
  site: {
@@ -941,325 +877,197 @@ const dumpCommand = defineCommand({
941
877
  alias: "s",
942
878
  description: "Site URL (e.g., sc-domain:example.com)"
943
879
  },
944
- output: {
880
+ out: {
945
881
  type: "string",
946
882
  alias: "o",
947
- description: "Output file path (default: stdout)"
948
- },
949
- format: {
950
- type: "string",
951
- alias: "f",
952
- default: "json",
953
- description: "Output format: json or csv"
954
- },
955
- start: {
956
- type: "string",
957
- description: "Start date (YYYY-MM-DD)"
958
- },
959
- end: {
960
- type: "string",
961
- description: "End date (YYYY-MM-DD)"
962
- },
963
- days: {
964
- type: "string",
965
- alias: "d",
966
- default: "28",
967
- description: "Number of days to fetch (default: 28)"
968
- },
969
- types: {
970
- type: "string",
971
- alias: "t",
972
- description: "Data types: pages,keywords,countries,devices"
883
+ default: DEFAULT_OUT,
884
+ description: `Output directory (default: ${DEFAULT_OUT})`
973
885
  },
974
- limit: {
975
- type: "string",
976
- alias: "l",
977
- default: "25000",
978
- description: "Max rows per data type"
886
+ compact: {
887
+ type: "boolean",
888
+ default: false,
889
+ description: "Compact every closed month into a single file before exporting"
979
890
  },
980
891
  quiet: {
981
892
  type: "boolean",
982
893
  alias: "q",
983
894
  default: false,
984
895
  description: "Suppress progress output"
985
- },
986
- interactive: {
987
- type: "boolean",
988
- alias: "i",
989
- default: false,
990
- description: "Interactive mode - prompts for options"
991
896
  }
992
897
  },
993
898
  async run({ args }) {
994
- const config = await loadConfig();
995
- let startDate;
996
- let endDate;
997
- if (args.start && args.end) {
998
- startDate = String(args.start);
999
- endDate = String(args.end);
1000
- } else if (args.interactive) {
1001
- const startInput = await text({
1002
- message: "Start date (YYYY-MM-DD)",
1003
- placeholder: (/* @__PURE__ */ new Date(Date.now() - Number(args.days) * 864e5)).toISOString().split("T")[0]
1004
- });
1005
- if (isCancel(startInput)) {
1006
- cancel("Cancelled");
1007
- process$1.exit(0);
1008
- }
1009
- const endInput = await text({
1010
- message: "End date (YYYY-MM-DD)",
1011
- placeholder: (/* @__PURE__ */ new Date(Date.now() - 3 * 864e5)).toISOString().split("T")[0]
1012
- });
1013
- if (isCancel(endInput)) {
1014
- cancel("Cancelled");
1015
- process$1.exit(0);
1016
- }
1017
- startDate = String(startInput) || (/* @__PURE__ */ new Date(Date.now() - Number(args.days) * 864e5)).toISOString().split("T")[0];
1018
- endDate = String(endInput) || (/* @__PURE__ */ new Date(Date.now() - 3 * 864e5)).toISOString().split("T")[0];
1019
- } else {
1020
- const days = Number.parseInt(String(args.days), 10);
1021
- endDate = (/* @__PURE__ */ new Date(Date.now() - 3 * 864e5)).toISOString().split("T")[0];
1022
- startDate = (/* @__PURE__ */ new Date(Date.now() - (days + 3) * 864e5)).toISOString().split("T")[0];
1023
- }
1024
- let dataTypes;
1025
- if (args.types) dataTypes = String(args.types).split(",").filter((t) => DUMP_DATA_TYPES.includes(t));
1026
- else if (args.interactive) {
1027
- const selected = await multiselect({
1028
- message: "Select data types to export",
1029
- options: DUMP_DATA_TYPES.map((t) => ({
1030
- value: t,
1031
- label: t
1032
- })),
1033
- initialValues: ["pages", "keywords"]
1034
- });
1035
- if (isCancel(selected)) {
1036
- cancel("Cancelled");
1037
- process$1.exit(0);
1038
- }
1039
- dataTypes = selected;
1040
- } else dataTypes = ["pages", "keywords"];
1041
- const rowLimit = Number.parseInt(String(args.limit), 10);
1042
- const format = String(args.format);
1043
- const cloud = await getCloudClient();
1044
- if (cloud) {
1045
- const { siteId, siteUrl: siteUrl$1 } = await resolveCloudSite$3(cloud, args.site || config.defaultSite);
1046
- const output$1 = {
1047
- siteUrl: siteUrl$1,
1048
- dateRange: {
1049
- start: startDate,
1050
- end: endDate
1051
- },
1052
- exportedAt: (/* @__PURE__ */ new Date()).toISOString()
1053
- };
1054
- const totalSteps$1 = dataTypes.length;
1055
- let currentStep$1 = 0;
1056
- for (const dataType of dataTypes) {
1057
- currentStep$1++;
1058
- if (!args.quiet) {
1059
- clearLine();
1060
- process$1.stdout.write(progressBar(currentStep$1, totalSteps$1, dataType));
1061
- }
1062
- const dimensions = getDimensionNames(dataType);
1063
- const result = await cloud.query(siteId, {
1064
- startDate,
1065
- endDate,
1066
- dimensions,
1067
- rowLimit: String(rowLimit)
1068
- }).catch((e) => {
1069
- logger.error(`Query failed: ${e.message}`);
1070
- process$1.exit(1);
1071
- });
1072
- output$1[dataType] = {
1073
- total: result.rows.length,
1074
- data: result.rows
1075
- };
1076
- }
1077
- if (!args.quiet) {
1078
- clearLine();
1079
- logger.success(`Exported ${dataTypes.join(", ")} for ${siteUrl$1}`);
1080
- }
1081
- const content$1 = format === "csv" ? exportToCSV(output$1) : JSON.stringify(output$1, null, 2);
1082
- if (args.output) {
1083
- await fs.writeFile(String(args.output), content$1);
1084
- if (!args.quiet) logger.info(`Written to ${args.output}`);
1085
- } else console.log(content$1);
1086
- return;
1087
- }
1088
- const client = googleSearchConsole(await getAuth({
1089
- interactive: false,
1090
- config
1091
- }));
1092
- let siteUrl = String(args.site || config.defaultSite || "");
1093
- if (!siteUrl || args.interactive) {
1094
- const verified = (await client.sites()).filter((s) => s.permissionLevel !== "siteUnverifiedUser");
1095
- if (verified.length === 0) {
1096
- logger.error("No verified sites found");
1097
- process$1.exit(1);
1098
- }
1099
- const selected = await select({
1100
- message: "Select a site",
1101
- options: verified.map((s) => ({
1102
- value: s.siteUrl,
1103
- label: s.siteUrl
1104
- })),
1105
- initialValue: siteUrl || verified[0]?.siteUrl
1106
- });
1107
- if (isCancel(selected)) {
1108
- cancel("Cancelled");
1109
- process$1.exit(0);
1110
- }
1111
- siteUrl = selected;
1112
- }
1113
- const output = {
1114
- siteUrl,
1115
- dateRange: {
1116
- start: startDate,
1117
- end: endDate
1118
- },
1119
- exportedAt: (/* @__PURE__ */ new Date()).toISOString()
1120
- };
1121
- const totalSteps = dataTypes.length;
1122
- let currentStep = 0;
1123
- for (const dataType of dataTypes) {
1124
- currentStep++;
1125
- if (!args.quiet) {
1126
- clearLine();
1127
- process$1.stdout.write(progressBar(currentStep, totalSteps, dataType));
1128
- }
1129
- const dimensions = getDimensions(dataType);
1130
- const builder = gsc.select(...dimensions).where(between(date, startDate, endDate)).limit(rowLimit);
1131
- const rows = [];
1132
- for await (const batch of client.query(siteUrl, builder)) rows.push(...batch);
1133
- output[dataType] = {
1134
- total: rows.length,
1135
- data: rows
1136
- };
1137
- }
1138
- if (!args.quiet) {
1139
- clearLine();
1140
- logger.success(`Exported ${dataTypes.join(", ")} for ${siteUrl}`);
1141
- }
1142
- const content = format === "csv" ? exportToCSV(output) : JSON.stringify(output, null, 2);
1143
- if (args.output) {
1144
- await fs.writeFile(String(args.output), content);
1145
- if (!args.quiet) logger.info(`Written to ${args.output}`);
1146
- } else console.log(content);
899
+ const ctx = await createCommandContext({
900
+ needsAuth: true,
901
+ needsStore: true
902
+ });
903
+ const siteUrl = await ctx.resolveSite(args.site ? String(args.site) : void 0);
904
+ const store = ctx.store;
905
+ const outDir = path.resolve(String(args.out));
906
+ if (args.compact) await compactClosedMonths(store, siteUrl, args.quiet);
907
+ const entries = await listLiveEntries(store, siteUrl);
908
+ if (entries.length === 0) {
909
+ logger.warn(`No data for ${siteUrl}. Run \`gscdump sync\` first.`);
910
+ process.exit(0);
911
+ }
912
+ await fs.mkdir(outDir, { recursive: true });
913
+ let copied = 0;
914
+ for (const entry of entries) {
915
+ const bytes = await store.engine.readObject(entry.objectKey);
916
+ const target = path.join(outDir, entry.objectKey);
917
+ await fs.mkdir(path.dirname(target), { recursive: true });
918
+ await fs.writeFile(target, Buffer.from(bytes));
919
+ copied++;
920
+ }
921
+ if (!args.quiet) logger.success(`Exported ${copied} file(s) to ${outDir}`);
1147
922
  }
1148
923
  });
1149
-
1150
- //#endregion
1151
- //#region src/commands/indexing.ts
1152
- async function resolveCloudSite$2(cloud, target) {
1153
- const me = await cloud.me().catch((e) => {
1154
- logger.error(`Failed to fetch sites: ${e.message}`);
1155
- process$1.exit(1);
1156
- });
1157
- if (me.sites.length === 0) {
1158
- logger.error("No registered sites. Run gscdump register first.");
1159
- process$1.exit(1);
1160
- }
1161
- let site = target ? me.sites.find((s) => s.siteUrl === target || s.siteUrl.includes(target)) : void 0;
1162
- if (!site) if (me.sites.length === 1) site = me.sites[0];
1163
- else {
1164
- const selected = await select({
1165
- message: "Select a site",
1166
- options: me.sites.map((s) => ({
1167
- value: s.siteId,
1168
- label: s.siteUrl
1169
- }))
924
+ async function listLiveEntries(store, siteUrl) {
925
+ const siteId = store.siteIdFor(siteUrl);
926
+ return (await Promise.all(allTables().map((table) => store.engine.listLive({
927
+ userId: store.userId,
928
+ siteId,
929
+ table
930
+ })))).flat();
931
+ }
932
+ async function compactClosedMonths(store, siteUrl, quiet) {
933
+ const siteId = store.siteIdFor(siteUrl);
934
+ for (const table of allTables()) {
935
+ if (!quiet) logger.info(`Compacting ${table} (raw→d7→d30→d90)`);
936
+ await store.engine.compactTiered({
937
+ userId: store.userId,
938
+ siteId,
939
+ table
1170
940
  });
1171
- if (isCancel(selected)) {
1172
- cancel("Cancelled");
1173
- process$1.exit(0);
1174
- }
1175
- site = me.sites.find((s) => s.siteId === selected);
1176
941
  }
1177
- return {
1178
- siteId: site.siteId,
1179
- siteUrl: site.siteUrl
1180
- };
1181
942
  }
1182
- const statusCommand$1 = defineCommand({
943
+ const INSPECTION_QPD_PER_PROPERTY = 2e3;
944
+ const INDEXING_NOT_FOUND_RE = /\b404\b|NOT_FOUND/i;
945
+ async function readUrlList(opts) {
946
+ if (opts.file) return (await readFile(opts.file, "utf8")).split("\n").map((l) => l.trim()).filter(Boolean);
947
+ const chunks = [];
948
+ for await (const chunk of process.stdin) chunks.push(chunk);
949
+ return Buffer.concat(chunks).toString("utf8").split("\n").map((l) => l.trim()).filter(Boolean);
950
+ }
951
+ const inspectSubCommand = defineCommand({
1183
952
  meta: {
1184
- name: "status",
1185
- description: "Show indexing status overview (cloud mode)"
953
+ name: "inspect",
954
+ description: "Run URL Inspection for a list of URLs and persist results to the local entity store"
1186
955
  },
1187
956
  args: {
1188
957
  site: {
1189
958
  type: "string",
1190
959
  alias: "s",
1191
- description: "Site URL"
960
+ required: true,
961
+ description: "Site URL (e.g., sc-domain:example.com)"
1192
962
  },
1193
- days: {
963
+ file: {
1194
964
  type: "string",
1195
- alias: "d",
1196
- default: "28",
1197
- description: "Days of trend data (max 90)"
965
+ alias: "f",
966
+ description: "Path to a file with one URL per line. If omitted, reads from stdin."
1198
967
  },
1199
- json: {
968
+ limit: {
969
+ type: "string",
970
+ description: `Max URLs to inspect this run (default: ${INSPECTION_QPD_PER_PROPERTY}, the per-property GSC daily quota)`
971
+ },
972
+ concurrency: {
973
+ type: "string",
974
+ alias: "c",
975
+ default: "4",
976
+ description: "Concurrent in-flight inspect calls (default: 4)"
977
+ },
978
+ quiet: {
1200
979
  type: "boolean",
980
+ alias: "q",
1201
981
  default: false,
1202
- description: "Output as JSON"
982
+ description: "Suppress progress output"
1203
983
  }
1204
984
  },
1205
985
  async run({ args }) {
1206
- const cloud = await getCloudClient();
1207
- if (!cloud) {
1208
- logger.error("Indexing status requires cloud mode. Run gscdump init to set up.");
1209
- process$1.exit(1);
1210
- }
1211
- const config = await loadConfig();
1212
- const { siteId, siteUrl } = await resolveCloudSite$2(cloud, args.site || config.defaultSite);
1213
- const data = await cloud.indexing(siteId, { days: String(args.days) }).catch((e) => {
1214
- logger.error(`Failed to fetch indexing data: ${e.message}`);
1215
- process$1.exit(1);
986
+ const ctx = await createCommandContext({
987
+ needsAuth: true,
988
+ needsStore: true
1216
989
  });
1217
- if (args.json) {
1218
- console.log(JSON.stringify(data, null, 2));
990
+ const client = ctx.client;
991
+ const store = ctx.store;
992
+ const siteUrl = String(args.site);
993
+ const limit = args.limit ? Number.parseInt(String(args.limit), 10) : INSPECTION_QPD_PER_PROPERTY;
994
+ const concurrency = Math.max(1, Number.parseInt(String(args.concurrency), 10) || 4);
995
+ const quiet = Boolean(args.quiet);
996
+ const urls = (await readUrlList({ file: args.file ? String(args.file) : void 0 })).slice(0, limit);
997
+ if (urls.length === 0) {
998
+ logger.warn("No URLs to inspect.");
1219
999
  return;
1220
1000
  }
1221
- const s = data.summary;
1222
- console.log();
1223
- console.log(` \x1B[1m${siteUrl}\x1B[0m Indexing Status`);
1224
- console.log();
1225
- console.log(` Total URLs: \x1B[36m${s.totalUrls.toLocaleString()}\x1B[0m`);
1226
- console.log(` Indexed: \x1B[32m${s.indexed.toLocaleString()}\x1B[0m (${s.indexedPercent}%)`);
1227
- console.log(` Not Indexed: \x1B[31m${s.notIndexed.toLocaleString()}\x1B[0m`);
1228
- if (s.pending > 0) console.log(` Pending: \x1B[33m${s.pending.toLocaleString()}\x1B[0m`);
1229
- if (s.change7d !== null || s.change28d !== null) {
1230
- console.log();
1231
- if (s.change7d !== null) {
1232
- const color = s.change7d > 0 ? "\x1B[32m+" : s.change7d < 0 ? "\x1B[31m" : "\x1B[90m";
1233
- console.log(` 7d change: ${color}${s.change7d}%\x1B[0m`);
1234
- }
1235
- if (s.change28d !== null) {
1236
- const color = s.change28d > 0 ? "\x1B[32m+" : s.change28d < 0 ? "\x1B[31m" : "\x1B[90m";
1237
- console.log(` 28d change: ${color}${s.change28d}%\x1B[0m`);
1001
+ if (urls.length === limit && limit < INSPECTION_QPD_PER_PROPERTY) logger.info(`Capping at --limit ${limit}`);
1002
+ if (urls.length === INSPECTION_QPD_PER_PROPERTY) logger.info(`Hit per-property daily inspection quota (${INSPECTION_QPD_PER_PROPERTY}); remaining URLs will be queued for tomorrow.`);
1003
+ const inspector = createInspectionStore({ dataSource: store.dataSource });
1004
+ let completed = 0;
1005
+ let failed = 0;
1006
+ const records = [];
1007
+ const failures = [];
1008
+ await runWithConcurrency(urls, concurrency, async (url) => {
1009
+ const result = await client.inspect(siteUrl, url).catch((err) => err);
1010
+ if (result instanceof Error) {
1011
+ failed++;
1012
+ failures.push({
1013
+ url,
1014
+ error: result.message
1015
+ });
1016
+ } else {
1017
+ const ix = result.inspectionResult;
1018
+ const indexStatus = ix?.indexStatusResult;
1019
+ records.push({
1020
+ url,
1021
+ inspectedAt: (/* @__PURE__ */ new Date()).toISOString(),
1022
+ indexStatus: indexStatus?.verdict ?? void 0,
1023
+ lastCrawlTime: indexStatus?.lastCrawlTime ?? void 0,
1024
+ googleCanonical: indexStatus?.googleCanonical ?? void 0,
1025
+ userCanonical: indexStatus?.userCanonical ?? void 0,
1026
+ coverageState: indexStatus?.coverageState ?? void 0,
1027
+ robotsTxtState: indexStatus?.robotsTxtState ?? void 0,
1028
+ indexingState: indexStatus?.indexingState ?? void 0,
1029
+ pageFetchState: indexStatus?.pageFetchState ?? void 0,
1030
+ mobileUsabilityVerdict: ix?.mobileUsabilityResult?.verdict ?? void 0,
1031
+ richResultsVerdict: ix?.richResultsResult?.verdict ?? void 0,
1032
+ raw: ix
1033
+ });
1238
1034
  }
1239
- }
1240
- if (data.trend.length > 1) {
1241
- console.log();
1242
- console.log(" \x1B[1mTrend (indexed %)\x1B[0m");
1243
- const recent = data.trend.slice(-14);
1244
- for (const t of recent) {
1245
- const bar = "█".repeat(Math.round(t.indexedPercent / 5));
1246
- console.log(` ${t.date} \x1B[36m${bar}\x1B[0m ${t.indexedPercent}%`);
1035
+ completed++;
1036
+ if (!quiet) process.stdout.write(`\r${progressBar(completed, urls.length, `${url.slice(0, 60)}`)}`);
1037
+ });
1038
+ if (!quiet) process.stdout.write("\n");
1039
+ await inspector.writeBatch({
1040
+ userId: store.userId,
1041
+ siteId: store.siteIdFor(siteUrl)
1042
+ }, records);
1043
+ if (!quiet) {
1044
+ logger.success(`Inspected ${records.length}/${urls.length} URL(s)`);
1045
+ if (failed > 0) {
1046
+ logger.warn(`${failed} failed:`);
1047
+ for (const f of failures.slice(0, 5)) console.log(` ${f.url}: ${f.error}`);
1048
+ if (failures.length > 5) console.log(` ... and ${failures.length - 5} more`);
1247
1049
  }
1248
1050
  }
1249
- console.log();
1051
+ if (failed > 0) process.exit(1);
1250
1052
  }
1251
1053
  });
1252
- const diagnosticsCommand = defineCommand({
1054
+ const showSubCommand = defineCommand({
1253
1055
  meta: {
1254
- name: "diagnostics",
1255
- description: "Show indexing issue diagnostics (cloud mode)"
1056
+ name: "show",
1057
+ description: "Print the latest inspection record for a URL from the local entity store"
1256
1058
  },
1257
1059
  args: {
1258
1060
  site: {
1259
1061
  type: "string",
1260
1062
  alias: "s",
1063
+ required: true,
1261
1064
  description: "Site URL"
1262
1065
  },
1066
+ url: {
1067
+ type: "positional",
1068
+ required: true,
1069
+ description: "URL to look up"
1070
+ },
1263
1071
  json: {
1264
1072
  type: "boolean",
1265
1073
  default: false,
@@ -1267,175 +1075,122 @@ const diagnosticsCommand = defineCommand({
1267
1075
  }
1268
1076
  },
1269
1077
  async run({ args }) {
1270
- const cloud = await getCloudClient();
1271
- if (!cloud) {
1272
- logger.error("Indexing diagnostics requires cloud mode. Run gscdump init to set up.");
1273
- process$1.exit(1);
1078
+ const store = (await createCommandContext({ needsStore: true })).store;
1079
+ const record = await createInspectionStore({ dataSource: store.dataSource }).getLatest({
1080
+ userId: store.userId,
1081
+ siteId: store.siteIdFor(String(args.site))
1082
+ }, String(args.url));
1083
+ if (!record) {
1084
+ logger.warn(`No inspection record for ${args.url}`);
1085
+ process.exit(1);
1274
1086
  }
1275
- const config = await loadConfig();
1276
- const { siteId, siteUrl } = await resolveCloudSite$2(cloud, args.site || config.defaultSite);
1277
- const data = await cloud.indexingDiagnostics(siteId).catch((e) => {
1278
- logger.error(`Failed to fetch diagnostics: ${e.message}`);
1279
- process$1.exit(1);
1280
- });
1281
1087
  if (args.json) {
1282
- console.log(JSON.stringify(data, null, 2));
1088
+ console.log(JSON.stringify(record, null, 2));
1283
1089
  return;
1284
1090
  }
1285
1091
  console.log();
1286
- console.log(` \x1B[1m${siteUrl}\x1B[0m — Indexing Diagnostics`);
1287
- console.log();
1288
- console.log(` Total: \x1B[36m${data.summary.totalUrls.toLocaleString()}\x1B[0m URLs, \x1B[32m${data.summary.indexed.toLocaleString()}\x1B[0m indexed (${data.summary.indexedPercent}%)`);
1289
- console.log();
1290
- if (data.issues.length === 0) {
1291
- logger.success("No indexing issues found!");
1292
- return;
1293
- }
1294
- console.log(" \x1B[1mIssues\x1B[0m");
1295
- for (const issue of data.issues) {
1296
- const color = issue.severity === "error" ? "\x1B[31m" : issue.severity === "warning" ? "\x1B[33m" : "\x1B[90m";
1297
- console.log(` ${color}${issue.severity.toUpperCase().padEnd(7)}\x1B[0m ${issue.label} — \x1B[36m${issue.count.toLocaleString()}\x1B[0m URLs`);
1298
- }
1092
+ console.log(` \x1B[1m${record.url}\x1B[0m`);
1093
+ console.log(` Inspected: ${record.inspectedAt}`);
1094
+ if (record.indexStatus) console.log(` Index: ${record.indexStatus}`);
1095
+ if (record.lastCrawlTime) console.log(` Last crawl: ${record.lastCrawlTime}`);
1096
+ if (record.googleCanonical) console.log(` Canonical: ${record.googleCanonical}`);
1097
+ if (record.coverageState) console.log(` Coverage: ${record.coverageState}`);
1098
+ if (record.mobileUsabilityVerdict) console.log(` Mobile: ${record.mobileUsabilityVerdict}`);
1099
+ if (record.richResultsVerdict) console.log(` Rich results: ${record.richResultsVerdict}`);
1299
1100
  console.log();
1300
1101
  }
1301
1102
  });
1302
- const urlsCommand = defineCommand({
1103
+ const sitemapsSnapshotSubCommand = defineCommand({
1303
1104
  meta: {
1304
- name: "urls",
1305
- description: "List URLs with indexing status (cloud mode)"
1105
+ name: "snapshot",
1106
+ description: "Fetch current sitemap state from GSC and persist to the local entity store"
1306
1107
  },
1307
1108
  args: {
1308
1109
  site: {
1309
1110
  type: "string",
1310
1111
  alias: "s",
1311
- description: "Site URL"
1312
- },
1313
- status: {
1314
- type: "string",
1315
- description: "Filter: indexed, not_indexed, pending"
1316
- },
1317
- issue: {
1318
- type: "string",
1319
- description: "Filter by issue type"
1320
- },
1321
- search: {
1322
- type: "string",
1323
- description: "Search URLs"
1324
- },
1325
- limit: {
1326
- type: "string",
1327
- alias: "l",
1328
- default: "50",
1329
- description: "Max results"
1112
+ required: true,
1113
+ description: "Site URL (e.g., sc-domain:example.com)"
1330
1114
  },
1331
- offset: {
1332
- type: "string",
1333
- default: "0",
1334
- description: "Offset for pagination"
1115
+ quiet: {
1116
+ type: "boolean",
1117
+ alias: "q",
1118
+ default: false,
1119
+ description: "Suppress progress output"
1335
1120
  },
1336
1121
  json: {
1337
1122
  type: "boolean",
1338
1123
  default: false,
1339
- description: "Output as JSON"
1124
+ description: "Emit the snapshot JSON to stdout"
1340
1125
  }
1341
1126
  },
1342
1127
  async run({ args }) {
1343
- const cloud = await getCloudClient();
1344
- if (!cloud) {
1345
- logger.error("Indexing URLs requires cloud mode. Run gscdump init to set up.");
1346
- process$1.exit(1);
1347
- }
1348
- const config = await loadConfig();
1349
- const { siteId, siteUrl } = await resolveCloudSite$2(cloud, args.site || config.defaultSite);
1350
- const params = {
1351
- limit: String(args.limit),
1352
- offset: String(args.offset)
1353
- };
1354
- if (args.status) params.status = String(args.status);
1355
- if (args.issue) params.issue = String(args.issue);
1356
- if (args.search) params.search = String(args.search);
1357
- const data = await cloud.indexingUrls(siteId, params).catch((e) => {
1358
- logger.error(`Failed to fetch URLs: ${e.message}`);
1359
- process$1.exit(1);
1128
+ const ctx = await createCommandContext({
1129
+ needsAuth: true,
1130
+ needsStore: true
1360
1131
  });
1132
+ const client = ctx.client;
1133
+ const store = ctx.store;
1134
+ const siteUrl = String(args.site);
1135
+ const quiet = Boolean(args.quiet);
1136
+ const apiSitemaps = await client.sitemaps.list(siteUrl);
1137
+ const capturedAt = (/* @__PURE__ */ new Date()).toISOString();
1138
+ const records = apiSitemaps.filter((s) => typeof s.path === "string").map((s) => ({
1139
+ path: s.path,
1140
+ capturedAt,
1141
+ lastDownloaded: s.lastDownloaded ?? void 0,
1142
+ lastSubmitted: s.lastSubmitted ?? void 0,
1143
+ type: s.type ?? void 0,
1144
+ isPending: s.isPending ?? void 0,
1145
+ isSitemapsIndex: s.isSitemapsIndex ?? void 0,
1146
+ errors: s.errors ?? void 0,
1147
+ warnings: s.warnings ?? void 0,
1148
+ contents: s.contents?.map((c) => ({
1149
+ type: c.type ?? void 0,
1150
+ submitted: c.submitted ?? void 0,
1151
+ indexed: c.indexed ?? void 0
1152
+ })),
1153
+ raw: s
1154
+ }));
1155
+ await createSitemapStore({ dataSource: store.dataSource }).writeSnapshot({
1156
+ userId: store.userId,
1157
+ siteId: store.siteIdFor(siteUrl)
1158
+ }, records);
1361
1159
  if (args.json) {
1362
- console.log(JSON.stringify(data, null, 2));
1160
+ console.log(JSON.stringify({
1161
+ site: siteUrl,
1162
+ capturedAt,
1163
+ records
1164
+ }, null, 2));
1363
1165
  return;
1364
1166
  }
1365
- console.log();
1366
- console.log(` \x1B[1m${siteUrl}\x1B[0m — ${data.pagination.total.toLocaleString()} URLs (showing ${data.urls.length})`);
1367
- console.log();
1368
- for (const url of data.urls) {
1369
- const verdictColor = url.verdict === "PASS" ? "\x1B[32m" : url.verdict ? "\x1B[31m" : "\x1B[33m";
1370
- const verdictLabel = url.verdict === "PASS" ? "INDEXED" : url.verdict ? "NOT INDEXED" : "PENDING";
1371
- console.log(` ${verdictColor}${verdictLabel.padEnd(12)}\x1B[0m ${url.url}`);
1372
- if (url.coverageState && url.coverageState !== "Submitted and indexed") console.log(` \x1B[90m${url.coverageState}\x1B[0m`);
1167
+ if (!quiet) {
1168
+ logger.success(`Captured ${records.length} sitemap(s) for ${siteUrl}`);
1169
+ for (const r of records) {
1170
+ const errors = r.errors && r.errors !== "0" ? ` \x1B[31merr=${r.errors}\x1B[0m` : "";
1171
+ const warnings = r.warnings && r.warnings !== "0" ? ` \x1B[33mwarn=${r.warnings}\x1B[0m` : "";
1172
+ const downloaded = r.lastDownloaded ? ` last=${r.lastDownloaded}` : "";
1173
+ console.log(` ${r.path}${downloaded}${errors}${warnings}`);
1174
+ }
1373
1175
  }
1374
- if (data.pagination.hasMore) console.log(`\n \x1B[90m... ${data.pagination.total - data.pagination.offset - data.urls.length} more (use --offset ${data.pagination.offset + data.urls.length})\x1B[0m`);
1375
- console.log();
1376
1176
  }
1377
1177
  });
1378
- const inspectCommand = defineCommand({
1178
+ const sitemapsShowSubCommand = defineCommand({
1379
1179
  meta: {
1380
- name: "inspect",
1381
- description: "Inspect a specific URL's indexing status (local mode)"
1180
+ name: "show",
1181
+ description: "Print the latest captured sitemap state for a feedpath"
1382
1182
  },
1383
1183
  args: {
1384
1184
  site: {
1385
1185
  type: "string",
1386
1186
  alias: "s",
1387
1187
  required: true,
1388
- description: "Site URL (e.g., sc-domain:example.com)"
1188
+ description: "Site URL"
1389
1189
  },
1390
- url: {
1190
+ path: {
1391
1191
  type: "positional",
1392
1192
  required: true,
1393
- description: "URL to inspect"
1394
- },
1395
- json: {
1396
- type: "boolean",
1397
- default: false,
1398
- description: "Output as JSON"
1399
- }
1400
- },
1401
- async run({ args }) {
1402
- const result = await inspectUrl(googleSearchConsole(await getAuth({ interactive: false })), args.site, args.url).catch((e) => {
1403
- logger.error(`Inspection failed: ${e.message}`);
1404
- process$1.exit(1);
1405
- });
1406
- if (args.json) {
1407
- console.log(JSON.stringify(result, null, 2));
1408
- return;
1409
- }
1410
- const indexStatus = result.inspectionResult?.indexStatusResult;
1411
- console.log();
1412
- console.log(` \x1B[1mURL:\x1B[0m ${args.url}`);
1413
- console.log();
1414
- if (indexStatus) {
1415
- const verdict = indexStatus.verdict;
1416
- const verdictColor = verdict === "PASS" ? "\x1B[32m" : "\x1B[31m";
1417
- console.log(` Verdict: ${verdictColor}${verdict}\x1B[0m`);
1418
- if (indexStatus.coverageState) console.log(` Coverage: ${indexStatus.coverageState}`);
1419
- if (indexStatus.robotsTxtState) console.log(` Robots.txt: ${indexStatus.robotsTxtState}`);
1420
- if (indexStatus.indexingState) console.log(` Indexing: ${indexStatus.indexingState}`);
1421
- if (indexStatus.lastCrawlTime) console.log(` Last Crawl: ${indexStatus.lastCrawlTime}`);
1422
- if (indexStatus.pageFetchState) console.log(` Page Fetch: ${indexStatus.pageFetchState}`);
1423
- if (indexStatus.googleCanonical) console.log(` Google Canon: ${indexStatus.googleCanonical}`);
1424
- if (indexStatus.userCanonical) console.log(` User Canon: ${indexStatus.userCanonical}`);
1425
- } else console.log(JSON.stringify(result, null, 2));
1426
- console.log();
1427
- }
1428
- });
1429
- const indexPercentCommand = defineCommand({
1430
- meta: {
1431
- name: "index-percent",
1432
- description: "Show index percent, invisible URLs, and orphan pages (cloud mode)"
1433
- },
1434
- args: {
1435
- site: {
1436
- type: "string",
1437
- alias: "s",
1438
- description: "Site URL"
1193
+ description: "Sitemap path (feedpath)"
1439
1194
  },
1440
1195
  json: {
1441
1196
  type: "boolean",
@@ -1444,81 +1199,172 @@ const indexPercentCommand = defineCommand({
1444
1199
  }
1445
1200
  },
1446
1201
  async run({ args }) {
1447
- const cloud = await getCloudClient();
1448
- if (!cloud) {
1449
- logger.error("Index percent requires cloud mode. Run gscdump init to set up.");
1450
- process$1.exit(1);
1202
+ const store = (await createCommandContext({ needsStore: true })).store;
1203
+ const record = await createSitemapStore({ dataSource: store.dataSource }).getLatest({
1204
+ userId: store.userId,
1205
+ siteId: store.siteIdFor(String(args.site))
1206
+ }, String(args.path));
1207
+ if (!record) {
1208
+ logger.warn(`No sitemap record for ${args.path}`);
1209
+ process.exit(1);
1451
1210
  }
1452
- const config = await loadConfig();
1453
- const { siteId, siteUrl } = await resolveCloudSite$2(cloud, args.site || config.defaultSite);
1454
- const data = await cloud.indexPercent(siteId).catch((e) => {
1455
- logger.error(`Failed to fetch index percent: ${e.message}`);
1456
- process$1.exit(1);
1457
- });
1458
1211
  if (args.json) {
1459
- console.log(JSON.stringify(data, null, 2));
1212
+ console.log(JSON.stringify(record, null, 2));
1460
1213
  return;
1461
1214
  }
1462
- const s = data.summary;
1463
- console.log();
1464
- console.log(` \x1B[1m${siteUrl}\x1B[0m — Index Percent`);
1465
1215
  console.log();
1466
- console.log(` Index Percent: \x1B[36m${s.currentPercent}%\x1B[0m`);
1467
- console.log(` Sitemap URLs: ${s.totalSitemapUrls.toLocaleString()}`);
1468
- console.log(` Visible in Search: \x1B[32m${s.visibleUrls.toLocaleString()}\x1B[0m`);
1469
- if (s.change7d !== null) {
1470
- const color = s.change7d > 0 ? "\x1B[32m+" : s.change7d < 0 ? "\x1B[31m" : "\x1B[90m";
1471
- console.log(` 7d change: ${color}${s.change7d.toFixed(1)}%\x1B[0m`);
1472
- }
1473
- if (s.change28d !== null) {
1474
- const color = s.change28d > 0 ? "\x1B[32m+" : s.change28d < 0 ? "\x1B[31m" : "\x1B[90m";
1475
- console.log(` 28d change: ${color}${s.change28d.toFixed(1)}%\x1B[0m`);
1476
- }
1477
- if (data.invisibleCount > 0) {
1478
- console.log();
1479
- console.log(` \x1B[1mInvisible URLs\x1B[0m (\x1B[33m${data.invisibleCount}\x1B[0m — in sitemap but no search traffic)`);
1480
- for (const u of data.invisibleUrls.slice(0, 10)) console.log(` ${u.url}`);
1481
- if (data.invisibleCount > 10) console.log(` \x1B[90m... and ${data.invisibleCount - 10} more\x1B[0m`);
1482
- }
1483
- if (data.orphanCount > 0) {
1484
- console.log();
1485
- console.log(` \x1B[1mOrphan Pages\x1B[0m (\x1B[33m${data.orphanCount}\x1B[0m — has traffic but not in sitemap)`);
1486
- for (const u of data.orphanPages.slice(0, 10)) console.log(` ${u.url} \x1B[90m(${u.clicks} clicks)\x1B[0m`);
1487
- if (data.orphanCount > 10) console.log(` \x1B[90m... and ${data.orphanCount - 10} more\x1B[0m`);
1488
- }
1489
- if (data.sitemaps.length > 0) {
1490
- console.log();
1491
- console.log(" \x1B[1mSitemaps\x1B[0m");
1492
- for (const sm of data.sitemaps) console.log(` ${sm.path} \x1B[90m(${sm.urlCount.toLocaleString()} URLs)\x1B[0m`);
1216
+ console.log(` \x1B[1m${record.path}\x1B[0m`);
1217
+ console.log(` Captured: ${record.capturedAt}`);
1218
+ if (record.lastDownloaded) console.log(` Downloaded: ${record.lastDownloaded}`);
1219
+ if (record.lastSubmitted) console.log(` Submitted: ${record.lastSubmitted}`);
1220
+ if (record.type) console.log(` Type: ${record.type}`);
1221
+ if (record.errors) console.log(` Errors: ${record.errors}`);
1222
+ if (record.warnings) console.log(` Warnings: ${record.warnings}`);
1223
+ if (record.contents?.length) {
1224
+ console.log(` Contents:`);
1225
+ for (const c of record.contents) {
1226
+ const bits = [
1227
+ c.type,
1228
+ c.submitted && `submitted=${c.submitted}`,
1229
+ c.indexed && `indexed=${c.indexed}`
1230
+ ].filter(Boolean).join(" ");
1231
+ console.log(` ${bits}`);
1232
+ }
1493
1233
  }
1494
1234
  console.log();
1495
1235
  }
1496
1236
  });
1497
- const indexingCommand = defineCommand({
1237
+ const indexingSubCommand = defineCommand({
1498
1238
  meta: {
1499
1239
  name: "indexing",
1500
- description: "Indexing status, diagnostics, and URL inspection"
1240
+ description: "Snapshot Indexing API metadata per URL"
1501
1241
  },
1502
- subCommands: {
1503
- "status": statusCommand$1,
1504
- "diagnostics": diagnosticsCommand,
1505
- "urls": urlsCommand,
1506
- "inspect": inspectCommand,
1507
- "index-percent": indexPercentCommand
1508
- }
1509
- });
1510
-
1511
- //#endregion
1512
- //#region src/commands/init.ts
1513
- async function loadEnvFile() {
1514
- const envPath = path.join(process$1.cwd(), ".env");
1515
- const content = await fs.readFile(envPath, "utf-8").catch(() => null);
1242
+ subCommands: { snapshot: defineCommand({
1243
+ meta: {
1244
+ name: "snapshot",
1245
+ description: "Fetch Indexing API metadata (latest update/remove per URL) and persist to the local entity store"
1246
+ },
1247
+ args: {
1248
+ site: {
1249
+ type: "string",
1250
+ alias: "s",
1251
+ required: true,
1252
+ description: "Site URL (e.g., sc-domain:example.com)"
1253
+ },
1254
+ file: {
1255
+ type: "string",
1256
+ alias: "f",
1257
+ description: "Path to a file with one URL per line. If omitted, reads from stdin."
1258
+ },
1259
+ concurrency: {
1260
+ type: "string",
1261
+ alias: "c",
1262
+ default: "4",
1263
+ description: "Concurrent in-flight getMetadata calls (default: 4)"
1264
+ },
1265
+ quiet: {
1266
+ type: "boolean",
1267
+ alias: "q",
1268
+ default: false,
1269
+ description: "Suppress progress output"
1270
+ }
1271
+ },
1272
+ async run({ args }) {
1273
+ const ctx = await createCommandContext({
1274
+ needsAuth: true,
1275
+ needsStore: true
1276
+ });
1277
+ const client = ctx.client;
1278
+ const store = ctx.store;
1279
+ const siteUrl = String(args.site);
1280
+ const concurrency = Math.max(1, Number.parseInt(String(args.concurrency), 10) || 4);
1281
+ const quiet = Boolean(args.quiet);
1282
+ const urls = await readUrlList({ file: args.file ? String(args.file) : void 0 });
1283
+ if (urls.length === 0) {
1284
+ logger.warn("No URLs to fetch metadata for.");
1285
+ return;
1286
+ }
1287
+ const records = [];
1288
+ const failures = [];
1289
+ let completed = 0;
1290
+ await runWithConcurrency(urls, concurrency, async (url) => {
1291
+ const result = await client.indexing.getMetadata(url).catch((err) => err);
1292
+ if (result instanceof Error) if (INDEXING_NOT_FOUND_RE.test(result.message)) records.push({
1293
+ url,
1294
+ capturedAt: (/* @__PURE__ */ new Date()).toISOString()
1295
+ });
1296
+ else failures.push({
1297
+ url,
1298
+ error: result.message
1299
+ });
1300
+ else records.push({
1301
+ url,
1302
+ capturedAt: (/* @__PURE__ */ new Date()).toISOString(),
1303
+ latestUpdateAt: result.latestUpdate?.notifyTime ?? void 0,
1304
+ latestRemoveAt: result.latestRemove?.notifyTime ?? void 0,
1305
+ raw: result
1306
+ });
1307
+ completed++;
1308
+ if (!quiet) process.stdout.write(`\r${progressBar(completed, urls.length, url.slice(0, 60))}`);
1309
+ });
1310
+ if (!quiet) process.stdout.write("\n");
1311
+ await createIndexingMetadataStore({ dataSource: store.dataSource }).writeBatch({
1312
+ userId: store.userId,
1313
+ siteId: store.siteIdFor(siteUrl)
1314
+ }, records);
1315
+ if (!quiet) {
1316
+ logger.success(`Captured metadata for ${records.length}/${urls.length} URL(s)`);
1317
+ if (failures.length > 0) {
1318
+ logger.warn(`${failures.length} failed:`);
1319
+ for (const f of failures.slice(0, 5)) console.log(` ${f.url}: ${f.error}`);
1320
+ if (failures.length > 5) console.log(` ... and ${failures.length - 5} more`);
1321
+ }
1322
+ }
1323
+ if (failures.length > 0) process.exit(1);
1324
+ }
1325
+ }) }
1326
+ });
1327
+ const entitiesCommand = defineCommand({
1328
+ meta: {
1329
+ name: "entities",
1330
+ description: "Manage local entity snapshots (URL inspections, sitemaps, indexing metadata)"
1331
+ },
1332
+ subCommands: {
1333
+ inspect: inspectSubCommand,
1334
+ show: showSubCommand,
1335
+ sitemaps: defineCommand({
1336
+ meta: {
1337
+ name: "sitemaps",
1338
+ description: "Snapshot and inspect sitemap state per site"
1339
+ },
1340
+ subCommands: {
1341
+ snapshot: sitemapsSnapshotSubCommand,
1342
+ show: sitemapsShowSubCommand
1343
+ }
1344
+ }),
1345
+ indexing: indexingSubCommand
1346
+ }
1347
+ });
1348
+ const ENV_LINE_RE = /^([^=]+)=(.*)$/;
1349
+ async function promptDataDir(existing) {
1350
+ const fallback = existing ?? defaultDataDir();
1351
+ const answer = await text({
1352
+ message: "Where should Parquet data be stored?",
1353
+ placeholder: fallback,
1354
+ defaultValue: fallback
1355
+ });
1356
+ if (isCancel(answer)) process.exit(1);
1357
+ return String(answer) || fallback;
1358
+ }
1359
+ async function loadEnvFile() {
1360
+ const envPath = path.join(process.cwd(), ".env");
1361
+ const content = await fs.readFile(envPath, "utf-8").catch(() => null);
1516
1362
  if (!content) return null;
1517
1363
  const env = {};
1518
1364
  for (const line of content.split("\n")) {
1519
1365
  const trimmed = line.trim();
1520
1366
  if (!trimmed || trimmed.startsWith("#")) continue;
1521
- const match = trimmed.match(/^([^=]+)=(.*)$/);
1367
+ const match = trimmed.match(ENV_LINE_RE);
1522
1368
  if (match) {
1523
1369
  const key = match[1].trim();
1524
1370
  let value = match[2].trim();
@@ -1531,7 +1377,7 @@ async function loadEnvFile() {
1531
1377
  const initCommand = defineCommand({
1532
1378
  meta: {
1533
1379
  name: "init",
1534
- description: "Set up GSCDump (choose cloud or local mode)"
1380
+ description: "Set up GSCDump authentication"
1535
1381
  },
1536
1382
  args: { force: {
1537
1383
  type: "boolean",
@@ -1540,23 +1386,23 @@ const initCommand = defineCommand({
1540
1386
  } },
1541
1387
  async run({ args }) {
1542
1388
  const config = await loadConfig();
1543
- if (config.mode && !args.force) {
1544
- logger.info(`Already configured in ${config.mode} mode`);
1389
+ if (config.clientId && config.clientSecret && !args.force) {
1390
+ logger.info("Already configured");
1545
1391
  logger.info("Run with --force to reconfigure");
1546
1392
  return;
1547
1393
  }
1548
1394
  const envFile = await loadEnvFile();
1549
1395
  if (envFile?.GOOGLE_CLIENT_ID && envFile?.GOOGLE_CLIENT_SECRET && envFile?.GOOGLE_REFRESH_TOKEN) {
1550
1396
  logger.info("Found .env file with Google credentials");
1551
- process$1.env.GOOGLE_CLIENT_ID = envFile.GOOGLE_CLIENT_ID;
1552
- process$1.env.GOOGLE_CLIENT_SECRET = envFile.GOOGLE_CLIENT_SECRET;
1553
- process$1.env.GOOGLE_REFRESH_TOKEN = envFile.GOOGLE_REFRESH_TOKEN;
1554
- if (envFile.GOOGLE_ACCESS_TOKEN) process$1.env.GOOGLE_ACCESS_TOKEN = envFile.GOOGLE_ACCESS_TOKEN;
1397
+ process.env.GOOGLE_CLIENT_ID = envFile.GOOGLE_CLIENT_ID;
1398
+ process.env.GOOGLE_CLIENT_SECRET = envFile.GOOGLE_CLIENT_SECRET;
1399
+ process.env.GOOGLE_REFRESH_TOKEN = envFile.GOOGLE_REFRESH_TOKEN;
1400
+ if (envFile.GOOGLE_ACCESS_TOKEN) process.env.GOOGLE_ACCESS_TOKEN = envFile.GOOGLE_ACCESS_TOKEN;
1555
1401
  await saveConfig({
1556
1402
  ...config,
1557
- mode: "local",
1558
1403
  clientId: envFile.GOOGLE_CLIENT_ID,
1559
- clientSecret: envFile.GOOGLE_CLIENT_SECRET
1404
+ clientSecret: envFile.GOOGLE_CLIENT_SECRET,
1405
+ dataDir: config.dataDir ?? defaultDataDir()
1560
1406
  });
1561
1407
  const creds = (await authenticate({
1562
1408
  clientId: envFile.GOOGLE_CLIENT_ID,
@@ -1575,621 +1421,79 @@ const initCommand = defineCommand({
1575
1421
  console.log(" \x1B[1mWelcome to GSCDump!\x1B[0m");
1576
1422
  console.log(" \x1B[90mGoogle Search Console data extraction CLI\x1B[0m");
1577
1423
  console.log();
1578
- const mode = await select({
1579
- message: "Choose your setup mode:",
1580
- options: [{
1581
- value: "cloud",
1582
- label: "Cloud (Recommended)",
1583
- hint: "Easy setup via cloud.gscdump.com - no API keys needed"
1584
- }, {
1585
- value: "local",
1586
- label: "Local",
1587
- hint: "Use your own Google OAuth credentials"
1588
- }]
1424
+ const dataDir = await promptDataDir(config.dataDir);
1425
+ const credentials = await getAuthCredentials(true);
1426
+ await saveConfig({
1427
+ ...config,
1428
+ dataDir,
1429
+ clientId: credentials.clientId,
1430
+ clientSecret: credentials.clientSecret
1589
1431
  });
1590
- if (isCancel(mode)) process$1.exit(1);
1591
- if (mode === "cloud") {
1592
- const cloudUrl = config.cloudUrl || DEFAULT_CLOUD_URL;
1593
- await saveConfig({
1594
- ...config,
1595
- mode: "cloud",
1596
- cloudUrl
1597
- });
1598
- await authenticateCloud(cloudUrl, true);
1599
- } else {
1600
- await saveConfig({
1601
- ...config,
1602
- mode: "local"
1603
- });
1604
- await authenticate(await getAuthCredentials(true), true);
1605
- }
1432
+ await authenticate(credentials, true);
1606
1433
  console.log();
1607
1434
  logger.success("Setup complete! Run gscdump to get started.");
1608
1435
  }
1609
1436
  });
1610
-
1611
- //#endregion
1612
- //#region src/mcp/handlers/analytics.ts
1613
- async function collectRows(ctx, siteUrl, builder) {
1614
- const rows = [];
1615
- for await (const batch of ctx.client.query(siteUrl, builder)) rows.push(...batch);
1616
- return rows;
1617
- }
1618
- async function fetchPages(input, ctx) {
1619
- const builder = gsc.select(page, date).where(between(date, input.period.start, input.period.end)).limit(25e3);
1620
- const rows = await collectRows(ctx, input.siteUrl, builder);
1621
- return {
1622
- total: rows.length,
1623
- data: rows
1624
- };
1625
- }
1626
- async function fetchKeywords(input, ctx) {
1627
- const builder = gsc.select(query, date).where(between(date, input.period.start, input.period.end)).limit(25e3);
1628
- const rows = await collectRows(ctx, input.siteUrl, builder);
1629
- return {
1630
- total: rows.length,
1631
- data: rows
1632
- };
1633
- }
1634
- async function fetchCountries(input, ctx) {
1635
- const builder = gsc.select(country, date).where(between(date, input.period.start, input.period.end)).limit(25e3);
1636
- const rows = await collectRows(ctx, input.siteUrl, builder);
1637
- return {
1638
- total: rows.length,
1639
- data: rows
1640
- };
1641
- }
1642
- async function fetchDevices(input, ctx) {
1643
- const builder = gsc.select(device, date).where(between(date, input.period.start, input.period.end)).limit(25e3);
1644
- const rows = await collectRows(ctx, input.siteUrl, builder);
1645
- return {
1646
- total: rows.length,
1647
- data: rows
1648
- };
1649
- }
1650
-
1651
- //#endregion
1652
- //#region src/mcp/handlers/indexing.ts
1653
- async function inspectUrl$1(input, ctx) {
1654
- return inspectUrl(ctx.client, input.siteUrl, input.inspectionUrl);
1655
- }
1656
- async function requestIndexing$1(input, ctx) {
1657
- return requestIndexing(ctx.client, input.url, { type: input.type || "URL_UPDATED" }).catch((e) => ({
1658
- url: input.url,
1659
- type: input.type || "URL_UPDATED",
1660
- error: e.message
1661
- }));
1662
- }
1663
- async function getIndexingStatus(input, ctx) {
1664
- return getIndexingMetadata(ctx.client, input.url).catch((e) => ({
1665
- url: input.url,
1666
- error: e.message
1667
- }));
1668
- }
1669
- async function batchRequestIndexing$1(input, ctx) {
1670
- const results = await batchRequestIndexing(ctx.client, input.urls, {
1671
- type: input.type || "URL_UPDATED",
1672
- delayMs: input.delayMs || 100
1673
- });
1674
- return {
1675
- results,
1676
- success: results.length,
1677
- failed: 0
1678
- };
1679
- }
1680
- async function batchInspectUrls$1(input, ctx) {
1681
- const results = await batchInspectUrls(ctx.client, input.siteUrl, input.urls, { delayMs: input.delayMs || 200 });
1682
- return {
1683
- results,
1684
- indexed: results.filter((r) => r.isIndexed).length,
1685
- notIndexed: results.filter((r) => !r.isIndexed).length
1686
- };
1687
- }
1688
-
1689
- //#endregion
1690
- //#region src/mcp/handlers/query.ts
1691
- const DIMENSION_MAP$1 = {
1692
- page,
1693
- query,
1694
- date,
1695
- country,
1696
- device,
1697
- searchAppearance
1698
- };
1699
- async function customQuery(input, ctx) {
1700
- const dimensions = input.dimensions.filter((d) => d in DIMENSION_MAP$1).map((d) => DIMENSION_MAP$1[d]);
1701
- if (dimensions.length === 0) throw new Error("At least one valid dimension required");
1702
- const builder = gsc.select(...dimensions).where(between(date, input.period.start, input.period.end)).limit(input.rowLimit || 25e3);
1703
- const rows = [];
1704
- for await (const batch of ctx.client.query(input.siteUrl, builder)) rows.push(...batch);
1705
- return {
1706
- total: rows.length,
1707
- data: rows
1708
- };
1709
- }
1710
-
1711
- //#endregion
1712
- //#region src/mcp/handlers/sites.ts
1713
- async function listSites(_input, ctx) {
1714
- return fetchSites(ctx.client);
1715
- }
1716
- async function listSitesWithSitemaps(_input, ctx) {
1717
- return fetchSitesWithSitemaps(ctx.client);
1718
- }
1719
- async function listSitemaps(input, ctx) {
1720
- return fetchSitemaps(ctx.client, input.siteUrl);
1721
- }
1722
- async function getSitemap(input, ctx) {
1723
- return fetchSitemap(ctx.client, input.siteUrl, input.feedpath);
1724
- }
1725
- async function submitSitemap$1(input, ctx) {
1726
- await submitSitemap(ctx.client, input.siteUrl, input.feedpath);
1727
- return { success: true };
1728
- }
1729
- async function deleteSitemap$1(input, ctx) {
1730
- await deleteSitemap(ctx.client, input.siteUrl, input.feedpath);
1731
- return { success: true };
1732
- }
1733
-
1734
- //#endregion
1735
- //#region src/mcp/types.ts
1736
- const periodSchema = z.object({
1737
- start: z.string().describe("Start date (YYYY-MM-DD)"),
1738
- end: z.string().describe("End date (YYYY-MM-DD)")
1739
- }).describe("Date range for the query");
1740
- const siteUrlSchema = z.string().describe("GSC property URL (e.g., sc-domain:example.com or https://example.com/)");
1741
- const queryOptionsSchema = z.object({
1742
- type: z.enum([
1743
- "web",
1744
- "image",
1745
- "video",
1746
- "news",
1747
- "discover",
1748
- "googleNews"
1749
- ]).optional().describe("Data type"),
1750
- dataState: z.enum(["final", "all"]).optional().describe("Data state: final (settled) or all (includes fresh)"),
1751
- aggregationType: z.enum(["byPage", "byProperty"]).optional().describe("Aggregation: byPage or byProperty")
1752
- }).optional();
1753
- const listSitesInput = z.object({});
1754
- const listSitemapsInput = z.object({ siteUrl: siteUrlSchema });
1755
- const fetchAnalyticsInput = z.object({
1756
- siteUrl: siteUrlSchema,
1757
- period: periodSchema,
1758
- comparePrevious: z.boolean().optional().describe("Include previous period comparison"),
1759
- options: queryOptionsSchema
1760
- });
1761
- const fetchPageInput = z.object({
1762
- siteUrl: siteUrlSchema,
1763
- period: periodSchema,
1764
- url: z.string().describe("Page URL to fetch details for")
1765
- });
1766
- const fetchKeywordInput = z.object({
1767
- siteUrl: siteUrlSchema,
1768
- period: periodSchema,
1769
- keyword: z.string().describe("Keyword to fetch details for")
1770
- });
1771
- const inspectUrlInput = z.object({
1772
- siteUrl: siteUrlSchema,
1773
- inspectionUrl: z.string().describe("URL to inspect")
1774
- });
1775
- const requestIndexingInput = z.object({
1776
- url: z.string().describe("URL to request indexing for"),
1777
- type: z.enum(["URL_UPDATED", "URL_DELETED"]).optional().describe("Notification type")
1778
- });
1779
- const getIndexingStatusInput = z.object({ url: z.string().describe("URL to get indexing status for") });
1780
- const customQueryInput = z.object({
1781
- siteUrl: siteUrlSchema,
1782
- period: periodSchema,
1783
- dimensions: z.array(z.enum([
1784
- "date",
1785
- "query",
1786
- "page",
1787
- "country",
1788
- "device",
1789
- "searchAppearance"
1790
- ])).describe("Dimensions to group by"),
1791
- rowLimit: z.number().optional().describe("Max rows (default 25000)"),
1792
- options: queryOptionsSchema
1793
- });
1794
- const sitemapInput = z.object({
1795
- siteUrl: siteUrlSchema,
1796
- feedpath: z.string().describe("Sitemap URL (e.g., https://example.com/sitemap.xml)")
1797
- });
1798
- const batchRequestIndexingInput = z.object({
1799
- urls: z.array(z.string()).describe("URLs to request indexing for"),
1800
- type: z.enum(["URL_UPDATED", "URL_DELETED"]).optional().describe("Notification type"),
1801
- delayMs: z.number().optional().describe("Delay between requests in ms (default 100)")
1802
- });
1803
- const batchInspectUrlsInput = z.object({
1804
- siteUrl: siteUrlSchema,
1805
- urls: z.array(z.string()).describe("URLs to inspect"),
1806
- delayMs: z.number().optional().describe("Delay between requests in ms (default 200)")
1807
- });
1808
-
1809
- //#endregion
1810
- //#region src/mcp/server/index.ts
1811
- function createGscMcpServer(options) {
1812
- const { name = "gscdump", version = "1.0.0", getAuth: getAuth$1, cloudClient } = options;
1813
- const server = new McpServer({
1814
- name,
1815
- version
1816
- });
1817
- const auth = async () => Promise.resolve(getAuth$1());
1818
- const getContext = async () => {
1819
- const a = await auth();
1820
- return {
1821
- auth: a,
1822
- client: googleSearchConsole(a)
1823
- };
1824
- };
1825
- server.registerTool("list-sites", {
1826
- description: "List all Google Search Console sites the user has access to",
1827
- inputSchema: listSitesInput.shape
1828
- }, async (args) => {
1829
- const result = await listSites(args, await getContext());
1830
- return { content: [{
1831
- type: "text",
1832
- text: JSON.stringify(result, null, 2)
1833
- }] };
1834
- });
1835
- server.registerTool("list-sites-with-sitemaps", {
1836
- description: "List all GSC sites with their sitemaps",
1837
- inputSchema: listSitesInput.shape
1838
- }, async (args) => {
1839
- const result = await listSitesWithSitemaps(args, await getContext());
1840
- return { content: [{
1841
- type: "text",
1842
- text: JSON.stringify(result, null, 2)
1843
- }] };
1844
- });
1845
- server.registerTool("list-sitemaps", {
1846
- description: "List sitemaps for a specific site",
1847
- inputSchema: listSitemapsInput.shape
1848
- }, async (args) => {
1849
- const result = await listSitemaps(args, await getContext());
1850
- return { content: [{
1851
- type: "text",
1852
- text: JSON.stringify(result, null, 2)
1853
- }] };
1854
- });
1855
- server.registerTool("get-sitemap", {
1856
- description: "Get details for a specific sitemap",
1857
- inputSchema: sitemapInput.shape
1858
- }, async (args) => {
1859
- const result = await getSitemap(args, await getContext());
1860
- return { content: [{
1861
- type: "text",
1862
- text: JSON.stringify(result, null, 2)
1863
- }] };
1864
- });
1865
- server.registerTool("submit-sitemap", {
1866
- description: "Submit a sitemap to Google Search Console",
1867
- inputSchema: sitemapInput.shape
1868
- }, async (args) => {
1869
- const result = await submitSitemap$1(args, await getContext());
1870
- return { content: [{
1871
- type: "text",
1872
- text: JSON.stringify(result, null, 2)
1873
- }] };
1874
- });
1875
- server.registerTool("delete-sitemap", {
1876
- description: "Delete a sitemap from Google Search Console",
1877
- inputSchema: sitemapInput.shape
1878
- }, async (args) => {
1879
- const result = await deleteSitemap$1(args, await getContext());
1880
- return { content: [{
1881
- type: "text",
1882
- text: JSON.stringify(result, null, 2)
1883
- }] };
1884
- });
1885
- server.registerTool("fetch-pages", {
1886
- description: "Fetch page analytics data for a site",
1887
- inputSchema: fetchAnalyticsInput.shape
1888
- }, async (args) => {
1889
- const result = await fetchPages(args, await getContext());
1890
- return { content: [{
1891
- type: "text",
1892
- text: JSON.stringify(result, null, 2)
1893
- }] };
1894
- });
1895
- server.registerTool("fetch-keywords", {
1896
- description: "Fetch keyword/query analytics data for a site",
1897
- inputSchema: fetchAnalyticsInput.shape
1898
- }, async (args) => {
1899
- const result = await fetchKeywords(args, await getContext());
1900
- return { content: [{
1901
- type: "text",
1902
- text: JSON.stringify(result, null, 2)
1903
- }] };
1904
- });
1905
- server.registerTool("fetch-countries", {
1906
- description: "Fetch country analytics data for a site",
1907
- inputSchema: fetchAnalyticsInput.shape
1908
- }, async (args) => {
1909
- const result = await fetchCountries(args, await getContext());
1910
- return { content: [{
1911
- type: "text",
1912
- text: JSON.stringify(result, null, 2)
1913
- }] };
1914
- });
1915
- server.registerTool("fetch-devices", {
1916
- description: "Fetch device analytics data for a site",
1917
- inputSchema: fetchAnalyticsInput.shape
1918
- }, async (args) => {
1919
- const result = await fetchDevices(args, await getContext());
1920
- return { content: [{
1921
- type: "text",
1922
- text: JSON.stringify(result, null, 2)
1923
- }] };
1924
- });
1925
- server.registerTool("custom-query", {
1926
- description: "Run a custom search analytics query with specified dimensions",
1927
- inputSchema: customQueryInput.shape
1928
- }, async (args) => {
1929
- const result = await customQuery(args, await getContext());
1930
- return { content: [{
1931
- type: "text",
1932
- text: JSON.stringify(result, null, 2)
1933
- }] };
1934
- });
1935
- server.registerTool("inspect-url", {
1936
- description: "Inspect a URL in Google Search Console to check its indexing status",
1937
- inputSchema: inspectUrlInput.shape
1938
- }, async (args) => {
1939
- const result = await inspectUrl$1(args, await getContext());
1940
- return { content: [{
1941
- type: "text",
1942
- text: JSON.stringify(result, null, 2)
1943
- }] };
1944
- });
1945
- server.registerTool("request-indexing", {
1946
- description: "Request Google to index or remove a URL via the Indexing API",
1947
- inputSchema: requestIndexingInput.shape
1948
- }, async (args) => {
1949
- const result = await requestIndexing$1(args, await getContext());
1950
- return { content: [{
1951
- type: "text",
1952
- text: JSON.stringify(result, null, 2)
1953
- }] };
1954
- });
1955
- server.registerTool("get-indexing-status", {
1956
- description: "Get indexing status metadata for a URL",
1957
- inputSchema: getIndexingStatusInput.shape
1958
- }, async (args) => {
1959
- const result = await getIndexingStatus(args, await getContext());
1960
- return { content: [{
1961
- type: "text",
1962
- text: JSON.stringify(result, null, 2)
1963
- }] };
1964
- });
1965
- server.registerTool("batch-request-indexing", {
1966
- description: "Batch request indexing for multiple URLs with rate limiting",
1967
- inputSchema: batchRequestIndexingInput.shape
1968
- }, async (args) => {
1969
- const result = await batchRequestIndexing$1(args, await getContext());
1970
- return { content: [{
1971
- type: "text",
1972
- text: JSON.stringify(result, null, 2)
1973
- }] };
1974
- });
1975
- server.registerTool("batch-inspect-urls", {
1976
- description: "Batch inspect multiple URLs to check their indexing status",
1977
- inputSchema: batchInspectUrlsInput.shape
1978
- }, async (args) => {
1979
- const result = await batchInspectUrls$1(args, await getContext());
1980
- return { content: [{
1981
- type: "text",
1982
- text: JSON.stringify(result, null, 2)
1983
- }] };
1984
- });
1985
- if (cloudClient) {
1986
- const siteIdSchema = z.object({ siteId: z.string().describe("Site ID from gscdump platform (use cloud-list-sites to find)") });
1987
- const analysisSchema = z.object({
1988
- siteId: z.string().describe("Site ID from gscdump platform"),
1989
- tool: z.enum([
1990
- "striking-distance",
1991
- "opportunity",
1992
- "movers",
1993
- "decay",
1994
- "zero-click",
1995
- "brand",
1996
- "cannibalization",
1997
- "clustering",
1998
- "concentration",
1999
- "seasonality"
2000
- ]).describe("Analysis tool to run"),
2001
- startDate: z.string().optional().describe("Start date (YYYY-MM-DD)"),
2002
- endDate: z.string().optional().describe("End date (YYYY-MM-DD)"),
2003
- limit: z.number().optional().describe("Max results")
2004
- });
2005
- server.registerTool("cloud-list-sites", {
2006
- description: "List registered sites on gscdump.com with sync status and progress",
2007
- inputSchema: z.object({}).shape
2008
- }, async () => {
2009
- const result = await cloudClient.me();
2010
- return { content: [{
2011
- type: "text",
2012
- text: JSON.stringify(result, null, 2)
2013
- }] };
2014
- });
2015
- server.registerTool("cloud-sync-status", {
2016
- description: "Get detailed sync status for a site on gscdump.com",
2017
- inputSchema: siteIdSchema.shape
2018
- }, async ({ siteId }) => {
2019
- const result = await cloudClient.syncStatus(siteId);
2020
- return { content: [{
2021
- type: "text",
2022
- text: JSON.stringify(result, null, 2)
2023
- }] };
2024
- });
2025
- server.registerTool("cloud-sitemaps", {
2026
- description: "Get sitemap health data for a site from gscdump.com (includes URL counts, error tracking, history)",
2027
- inputSchema: siteIdSchema.shape
2028
- }, async ({ siteId }) => {
2029
- const result = await cloudClient.sitemaps(siteId);
2030
- return { content: [{
2031
- type: "text",
2032
- text: JSON.stringify(result, null, 2)
2033
- }] };
2034
- });
2035
- server.registerTool("cloud-analysis", {
2036
- description: "Run SEO analysis on synced data (striking-distance, opportunity, movers, decay, zero-click, brand, cannibalization, clustering, concentration, seasonality)",
2037
- inputSchema: analysisSchema.shape
2038
- }, async ({ siteId, tool, startDate, endDate, limit }) => {
2039
- const params = {};
2040
- if (startDate) params.startDate = startDate;
2041
- if (endDate) params.endDate = endDate;
2042
- if (limit) params.limit = String(limit);
2043
- const result = await cloudClient.analysis(siteId, tool, params);
2044
- return { content: [{
2045
- type: "text",
2046
- text: JSON.stringify(result, null, 2)
2047
- }] };
2048
- });
2049
- server.registerTool("cloud-register-site", {
2050
- description: "Register a site for syncing on gscdump.com",
2051
- inputSchema: z.object({ siteUrl: z.string().describe("Site URL to register (e.g., example.com)") }).shape
2052
- }, async ({ siteUrl }) => {
2053
- const result = await cloudClient.registerSite(siteUrl);
2054
- return { content: [{
2055
- type: "text",
2056
- text: JSON.stringify(result, null, 2)
2057
- }] };
2058
- });
2059
- server.registerTool("cloud-query", {
2060
- description: "Live GSC query via gscdump.com platform (bypasses synced data, queries Google directly)",
2061
- inputSchema: z.object({
2062
- siteId: z.string().describe("Site ID from gscdump platform"),
2063
- startDate: z.string().describe("Start date (YYYY-MM-DD)"),
2064
- endDate: z.string().describe("End date (YYYY-MM-DD)"),
2065
- dimensions: z.string().optional().describe("Comma-separated: page,query,country,device,date,searchAppearance"),
2066
- rowLimit: z.number().optional().describe("Max rows (default 1000, max 25000)")
2067
- }).shape
2068
- }, async ({ siteId, startDate, endDate, dimensions, rowLimit }) => {
2069
- const params = {
2070
- startDate,
2071
- endDate
2072
- };
2073
- if (dimensions) params.dimensions = dimensions;
2074
- if (rowLimit) params.rowLimit = String(rowLimit);
2075
- const result = await cloudClient.query(siteId, params);
2076
- return { content: [{
2077
- type: "text",
2078
- text: JSON.stringify(result, null, 2)
2079
- }] };
2080
- });
2081
- server.registerTool("cloud-indexing", {
2082
- description: "Get indexing status trend and summary for a site on gscdump.com",
2083
- inputSchema: z.object({
2084
- siteId: z.string().describe("Site ID from gscdump platform"),
2085
- days: z.number().optional().describe("Days of trend data (default 28, max 90)")
2086
- }).shape
2087
- }, async ({ siteId, days }) => {
2088
- const params = {};
2089
- if (days) params.days = String(days);
2090
- const result = await cloudClient.indexing(siteId, params);
2091
- return { content: [{
2092
- type: "text",
2093
- text: JSON.stringify(result, null, 2)
2094
- }] };
2095
- });
2096
- server.registerTool("cloud-indexing-diagnostics", {
2097
- description: "Get indexing issue diagnostics with counts and severity for a site",
2098
- inputSchema: siteIdSchema.shape
2099
- }, async ({ siteId }) => {
2100
- const result = await cloudClient.indexingDiagnostics(siteId);
2101
- return { content: [{
2102
- type: "text",
2103
- text: JSON.stringify(result, null, 2)
2104
- }] };
2105
- });
2106
- server.registerTool("cloud-indexing-urls", {
2107
- description: "Get paginated URL list with indexing status, verdict, and coverage details",
2108
- inputSchema: z.object({
2109
- siteId: z.string().describe("Site ID from gscdump platform"),
2110
- status: z.enum([
2111
- "indexed",
2112
- "not_indexed",
2113
- "pending"
2114
- ]).optional().describe("Filter by status"),
2115
- issue: z.string().optional().describe("Filter by issue type"),
2116
- search: z.string().optional().describe("Search URLs"),
2117
- limit: z.number().optional().describe("Max results (default 100, max 500)"),
2118
- offset: z.number().optional().describe("Pagination offset")
2119
- }).shape
2120
- }, async ({ siteId, status, issue, search, limit, offset }) => {
2121
- const params = {};
2122
- if (status) params.status = status;
2123
- if (issue) params.issue = issue;
2124
- if (search) params.search = search;
2125
- if (limit) params.limit = String(limit);
2126
- if (offset) params.offset = String(offset);
2127
- const result = await cloudClient.indexingUrls(siteId, params);
2128
- return { content: [{
2129
- type: "text",
2130
- text: JSON.stringify(result, null, 2)
2131
- }] };
2132
- });
2133
- server.registerTool("cloud-index-percent", {
2134
- description: "Get index percent trend, invisible URLs (in sitemap but no traffic), and orphan pages (traffic but not in sitemap)",
2135
- inputSchema: siteIdSchema.shape
2136
- }, async ({ siteId }) => {
2137
- const result = await cloudClient.indexPercent(siteId);
2138
- return { content: [{
2139
- type: "text",
2140
- text: JSON.stringify(result, null, 2)
2141
- }] };
2142
- });
2143
- server.registerTool("cloud-trigger-sync", {
2144
- description: "Trigger a fresh data sync for a site on gscdump.com",
2145
- inputSchema: siteIdSchema.shape
2146
- }, async ({ siteId }) => {
2147
- const result = await cloudClient.triggerSync(siteId);
2148
- return { content: [{
2149
- type: "text",
2150
- text: JSON.stringify(result, null, 2)
2151
- }] };
2152
- });
2153
- server.registerTool("cloud-delete-site", {
2154
- description: "Unregister a site from gscdump.com (stops syncing, removes pending jobs)",
2155
- inputSchema: siteIdSchema.shape
2156
- }, async ({ siteId }) => {
2157
- const result = await cloudClient.deleteSite(siteId);
2158
- return { content: [{
2159
- type: "text",
2160
- text: JSON.stringify(result, null, 2)
2161
- }] };
2162
- });
2163
- server.registerTool("cloud-sitemap-action", {
2164
- description: "Submit, delete, or refresh sitemaps via gscdump.com",
2165
- inputSchema: z.object({
2166
- siteId: z.string().describe("Site ID from gscdump platform"),
2167
- action: z.enum([
2168
- "submit",
2169
- "delete",
2170
- "refresh"
2171
- ]).describe("Action to perform"),
2172
- sitemapUrl: z.string().optional().describe("Sitemap URL (required for submit/delete)")
2173
- }).shape
2174
- }, async ({ siteId, action, sitemapUrl }) => {
2175
- const body = { action };
2176
- if (sitemapUrl) body.sitemapUrl = sitemapUrl;
2177
- const result = await cloudClient.sitemapAction(siteId, body);
2178
- return { content: [{
2179
- type: "text",
2180
- text: JSON.stringify(result, null, 2)
2181
- }] };
1437
+ const inspectCommand = defineCommand({
1438
+ meta: {
1439
+ name: "inspect",
1440
+ description: "Inspect a specific URL's indexing status"
1441
+ },
1442
+ args: {
1443
+ site: {
1444
+ type: "string",
1445
+ alias: "s",
1446
+ required: true,
1447
+ description: "Site URL (e.g., sc-domain:example.com)"
1448
+ },
1449
+ url: {
1450
+ type: "positional",
1451
+ required: true,
1452
+ description: "URL to inspect"
1453
+ },
1454
+ json: {
1455
+ type: "boolean",
1456
+ default: false,
1457
+ description: "Output as JSON"
1458
+ }
1459
+ },
1460
+ async run({ args }) {
1461
+ const result = await (await createCommandContext({ needsAuth: true })).client.inspect(args.site, args.url).catch((e) => {
1462
+ logger.error(`Inspection failed: ${e.message}`);
1463
+ process.exit(1);
2182
1464
  });
1465
+ const indexStatus = (result?.inspectionResult)?.indexStatusResult;
1466
+ if (args.json) {
1467
+ console.log(JSON.stringify({
1468
+ url: args.url,
1469
+ verdict: indexStatus?.verdict || null,
1470
+ coverageState: indexStatus?.coverageState || null,
1471
+ indexingState: indexStatus?.indexingState || null,
1472
+ lastCrawlTime: indexStatus?.lastCrawlTime || null,
1473
+ isIndexed: indexStatus?.verdict === "PASS",
1474
+ raw: result
1475
+ }, null, 2));
1476
+ return;
1477
+ }
1478
+ console.log();
1479
+ console.log(` \x1B[1mURL:\x1B[0m ${args.url}`);
1480
+ console.log();
1481
+ const verdictColor = indexStatus?.verdict === "PASS" ? "\x1B[32m" : "\x1B[31m";
1482
+ console.log(` Verdict: ${verdictColor}${indexStatus?.verdict || "N/A"}\x1B[0m`);
1483
+ if (indexStatus?.coverageState) console.log(` Coverage: ${indexStatus.coverageState}`);
1484
+ if (indexStatus?.indexingState) console.log(` Indexing: ${indexStatus.indexingState}`);
1485
+ if (indexStatus?.lastCrawlTime) console.log(` Last Crawl: ${indexStatus.lastCrawlTime}`);
1486
+ if (indexStatus?.robotsTxtState) console.log(` Robots.txt: ${indexStatus.robotsTxtState}`);
1487
+ if (indexStatus?.pageFetchState) console.log(` Page Fetch: ${indexStatus.pageFetchState}`);
1488
+ if (indexStatus?.googleCanonical) console.log(` Google Canon: ${indexStatus.googleCanonical}`);
1489
+ if (indexStatus?.userCanonical) console.log(` User Canon: ${indexStatus.userCanonical}`);
1490
+ console.log();
2183
1491
  }
2184
- return server;
2185
- }
2186
-
2187
- //#endregion
2188
- //#region src/commands/mcp.ts
1492
+ });
2189
1493
  async function checkAuth() {
2190
- if ((process$1.env.GOOGLE_ACCESS_TOKEN || process$1.env.GOOGLE_REFRESH_TOKEN) && process$1.env.GOOGLE_CLIENT_ID && process$1.env.GOOGLE_CLIENT_SECRET) return { ok: true };
1494
+ if ((process.env.GOOGLE_ACCESS_TOKEN || process.env.GOOGLE_REFRESH_TOKEN) && process.env.GOOGLE_CLIENT_ID && process.env.GOOGLE_CLIENT_SECRET) return { ok: true };
2191
1495
  const config = await loadConfig();
2192
- if (!config.mode) return {
1496
+ if (!config.clientId && !config.clientSecret) return {
2193
1497
  ok: false,
2194
1498
  error: `GSCDump not configured.
2195
1499
 
@@ -2201,20 +1505,9 @@ Or provide env vars: GOOGLE_CLIENT_ID, GOOGLE_CLIENT_SECRET, GOOGLE_ACCESS_TOKEN
2201
1505
 
2202
1506
  Then restart your MCP client.`
2203
1507
  };
2204
- if (config.mode === "cloud") {
2205
- if (!await loadCloudTokens()) return {
2206
- ok: false,
2207
- error: `Cloud authentication expired or missing.
2208
-
2209
- Run this command to re-authenticate:
2210
-
2211
- npx @gscdump/cli init
2212
-
2213
- Then restart your MCP client.`
2214
- };
2215
- } else if (!await loadTokens()) return {
1508
+ if (!await loadTokens()) return {
2216
1509
  ok: false,
2217
- error: `Local authentication missing.
1510
+ error: `Authentication missing.
2218
1511
 
2219
1512
  Run this command to authenticate:
2220
1513
 
@@ -2232,23 +1525,27 @@ const mcpCommand = defineCommand({
2232
1525
  async run() {
2233
1526
  const authCheck = await checkAuth();
2234
1527
  if (!authCheck.ok) {
2235
- process$1.stderr.write(`\n${authCheck.error}\n\n`);
2236
- process$1.exit(1);
1528
+ process.stderr.write(`\n${authCheck.error}\n\n`);
1529
+ process.exit(1);
2237
1530
  }
2238
1531
  const server = createGscMcpServer({
2239
1532
  name: "gscdump",
2240
1533
  version: VERSION,
2241
- getAuth: () => getAuth({ interactive: false }),
2242
- cloudClient: await getCloudClient()
1534
+ getAuth: () => getAuth({ interactive: false })
2243
1535
  });
2244
1536
  const transport = new StdioServerTransport();
2245
1537
  await server.connect(transport);
2246
1538
  }
2247
1539
  });
2248
-
2249
- //#endregion
2250
- //#region src/commands/query.ts
2251
- const DIMENSION_MAP = {
1540
+ const DIMENSIONS = [
1541
+ "page",
1542
+ "query",
1543
+ "date",
1544
+ "country",
1545
+ "device",
1546
+ "searchAppearance"
1547
+ ];
1548
+ const DIM_COLUMNS = {
2252
1549
  page,
2253
1550
  query,
2254
1551
  date,
@@ -2256,40 +1553,38 @@ const DIMENSION_MAP = {
2256
1553
  device,
2257
1554
  searchAppearance
2258
1555
  };
2259
- async function resolveCloudSite$1(cloud, target) {
2260
- const me = await cloud.me().catch((e) => {
2261
- logger.error(`Failed to fetch sites: ${e.message}`);
2262
- process$1.exit(1);
2263
- });
2264
- if (me.sites.length === 0) {
2265
- logger.error("No registered sites. Run gscdump register first.");
2266
- process$1.exit(1);
2267
- }
2268
- let site = target ? me.sites.find((s) => s.siteUrl === target || s.siteUrl.includes(target)) : void 0;
2269
- if (!site) if (me.sites.length === 1) site = me.sites[0];
2270
- else {
2271
- const selected = await select({
2272
- message: "Select a site",
2273
- options: me.sites.map((s) => ({
2274
- value: s.siteId,
2275
- label: s.siteUrl
2276
- }))
1556
+ async function runLiveQuery(client, siteUrl, opts) {
1557
+ const allRows = [];
1558
+ let startRow = 0;
1559
+ while (true) {
1560
+ const rows = ((await client._rawQuery(siteUrl, {
1561
+ startDate: opts.startDate,
1562
+ endDate: opts.endDate,
1563
+ dimensions: opts.dimensions,
1564
+ rowLimit: opts.rowLimit,
1565
+ startRow
1566
+ })).rows || []).map((row) => {
1567
+ const result = {
1568
+ clicks: row.clicks ?? 0,
1569
+ impressions: row.impressions ?? 0,
1570
+ ctr: row.ctr ?? 0,
1571
+ position: row.position ?? 0
1572
+ };
1573
+ opts.dimensions.forEach((dim, i) => {
1574
+ result[dim] = row.keys?.[i];
1575
+ });
1576
+ return result;
2277
1577
  });
2278
- if (isCancel(selected)) {
2279
- cancel("Cancelled");
2280
- process$1.exit(0);
2281
- }
2282
- site = me.sites.find((s) => s.siteId === selected);
1578
+ allRows.push(...rows);
1579
+ if (rows.length < opts.rowLimit) break;
1580
+ startRow += rows.length;
2283
1581
  }
2284
- return {
2285
- siteId: site.siteId,
2286
- siteUrl: site.siteUrl
2287
- };
1582
+ return { rows: allRows };
2288
1583
  }
2289
1584
  const queryCommand = defineCommand({
2290
1585
  meta: {
2291
1586
  name: "query",
2292
- description: "Run custom search analytics queries"
1587
+ description: "Run a search analytics query (local Parquet by default, --live hits GSC API)"
2293
1588
  },
2294
1589
  args: {
2295
1590
  site: {
@@ -2300,7 +1595,7 @@ const queryCommand = defineCommand({
2300
1595
  dimensions: {
2301
1596
  type: "string",
2302
1597
  alias: "d",
2303
- description: "Dimensions: page,query,date,country,device,searchAppearance"
1598
+ description: `Dimensions: ${DIMENSIONS.join(",")}`
2304
1599
  },
2305
1600
  start: {
2306
1601
  type: "string",
@@ -2327,6 +1622,19 @@ const queryCommand = defineCommand({
2327
1622
  default: "json",
2328
1623
  description: "Output format: json or csv"
2329
1624
  },
1625
+ sql: {
1626
+ type: "string",
1627
+ description: "Raw DuckDB SQL using {{FILES}} as the file list placeholder (bypasses builder)"
1628
+ },
1629
+ table: {
1630
+ type: "string",
1631
+ description: "Analytics table for --sql (default: pages)"
1632
+ },
1633
+ live: {
1634
+ type: "boolean",
1635
+ default: false,
1636
+ description: "Bypass local store; hit the GSC API directly"
1637
+ },
2330
1638
  quiet: {
2331
1639
  type: "boolean",
2332
1640
  alias: "q",
@@ -2341,70 +1649,70 @@ const queryCommand = defineCommand({
2341
1649
  }
2342
1650
  },
2343
1651
  async run({ args }) {
2344
- const config = await loadConfig();
2345
- let dimNames;
2346
- if (args.dimensions) dimNames = String(args.dimensions).split(",").filter((d) => d in DIMENSION_MAP);
2347
- else if (args.interactive) {
2348
- const selected = await multiselect({
2349
- message: "Select dimensions",
2350
- options: Object.keys(DIMENSION_MAP).map((d) => ({
2351
- value: d,
2352
- label: d
2353
- })),
2354
- initialValues: ["page", "query"]
2355
- });
2356
- if (isCancel(selected)) {
2357
- cancel("Cancelled");
2358
- process$1.exit(0);
2359
- }
2360
- dimNames = selected;
2361
- } else dimNames = ["page", "query"];
2362
- let startDate;
2363
- let endDate;
2364
- if (args.start && args.end) {
2365
- startDate = String(args.start);
2366
- endDate = String(args.end);
2367
- } else if (args.interactive) {
2368
- const startInput = await text({
2369
- message: "Start date (YYYY-MM-DD)",
2370
- placeholder: (/* @__PURE__ */ new Date(Date.now() - 28 * 864e5)).toISOString().split("T")[0]
1652
+ if (args.sql) {
1653
+ await runRawSqlMode({
1654
+ sql: String(args.sql),
1655
+ site: args.site ? String(args.site) : void 0,
1656
+ table: args.table ? String(args.table) : "pages",
1657
+ output: args.output ? String(args.output) : void 0,
1658
+ quiet: Boolean(args.quiet)
2371
1659
  });
2372
- if (isCancel(startInput)) {
2373
- cancel("Cancelled");
2374
- process$1.exit(0);
2375
- }
2376
- const endInput = await text({
2377
- message: "End date (YYYY-MM-DD)",
2378
- placeholder: (/* @__PURE__ */ new Date(Date.now() - 3 * 864e5)).toISOString().split("T")[0]
2379
- });
2380
- if (isCancel(endInput)) {
2381
- cancel("Cancelled");
2382
- process$1.exit(0);
2383
- }
2384
- startDate = String(startInput) || (/* @__PURE__ */ new Date(Date.now() - 28 * 864e5)).toISOString().split("T")[0];
2385
- endDate = String(endInput) || (/* @__PURE__ */ new Date(Date.now() - 3 * 864e5)).toISOString().split("T")[0];
2386
- } else {
2387
- endDate = (/* @__PURE__ */ new Date(Date.now() - 3 * 864e5)).toISOString().split("T")[0];
2388
- startDate = (/* @__PURE__ */ new Date(Date.now() - 31 * 864e5)).toISOString().split("T")[0];
1660
+ return;
2389
1661
  }
1662
+ const dimNames = await resolveDimensions(args);
1663
+ const { startDate, endDate } = await resolveRange(args);
2390
1664
  const rowLimit = Number.parseInt(String(args.limit), 10);
2391
1665
  const format = String(args.format);
2392
- const cloud = await getCloudClient();
2393
- if (cloud) {
2394
- const { siteId, siteUrl: siteUrl$1 } = await resolveCloudSite$1(cloud, args.site || config.defaultSite);
2395
- if (!args.quiet) logger.info(`Querying ${siteUrl$1}...`);
2396
- const result = await cloud.query(siteId, {
1666
+ const ctx = await createCommandContext({
1667
+ needsAuth: true,
1668
+ needsStore: !args.live,
1669
+ interactive: Boolean(args.interactive)
1670
+ });
1671
+ const siteUrl = await ctx.resolveSite(args.site ? String(args.site) : void 0);
1672
+ if (args.live) {
1673
+ if (!args.quiet) logger.info(`Querying ${siteUrl} via live GSC API...`);
1674
+ const result = await runLiveQuery(ctx.client, siteUrl, {
2397
1675
  startDate,
2398
1676
  endDate,
2399
- dimensions: dimNames.join(","),
2400
- rowLimit: String(rowLimit)
1677
+ dimensions: dimNames,
1678
+ rowLimit
2401
1679
  }).catch((e) => {
2402
1680
  logger.error(`Query failed: ${e.message}`);
2403
- process$1.exit(1);
1681
+ process.exit(1);
2404
1682
  });
2405
- if (!args.quiet) logger.success(`Fetched ${result.rows.length} rows`);
2406
- const output$1 = {
2407
- siteUrl: siteUrl$1,
1683
+ await writeOutput({
1684
+ output: {
1685
+ siteUrl,
1686
+ dimensions: dimNames,
1687
+ dateRange: {
1688
+ start: startDate,
1689
+ end: endDate
1690
+ },
1691
+ total: result.rows.length,
1692
+ data: result.rows
1693
+ },
1694
+ format,
1695
+ path: args.output ? String(args.output) : void 0,
1696
+ quiet: Boolean(args.quiet)
1697
+ });
1698
+ return;
1699
+ }
1700
+ if (!args.quiet) logger.info(`Querying ${siteUrl} from local Parquet store...`);
1701
+ const state = buildLocalState(dimNames, startDate, endDate, rowLimit);
1702
+ const store = ctx.store;
1703
+ const table = inferTable(dimNames);
1704
+ await assertRangeCovered(store, siteUrl, table, startDate, endDate);
1705
+ const result = await store.engine.query({
1706
+ userId: store.userId,
1707
+ siteId: store.siteIdFor(siteUrl),
1708
+ table
1709
+ }, state).catch((e) => {
1710
+ logger.error(`Query failed: ${e.message}`);
1711
+ process.exit(1);
1712
+ });
1713
+ await writeOutput({
1714
+ output: {
1715
+ siteUrl,
2408
1716
  dimensions: dimNames,
2409
1717
  dateRange: {
2410
1718
  start: startDate,
@@ -2412,700 +1720,1079 @@ const queryCommand = defineCommand({
2412
1720
  },
2413
1721
  total: result.rows.length,
2414
1722
  data: result.rows
2415
- };
2416
- const content$1 = format === "csv" ? exportToCSV(output$1) : JSON.stringify(output$1, null, 2);
2417
- if (args.output) {
2418
- await fs.writeFile(String(args.output), content$1);
2419
- if (!args.quiet) logger.info(`Written to ${args.output}`);
2420
- } else console.log(content$1);
2421
- return;
2422
- }
2423
- const client = googleSearchConsole(await getAuth({
2424
- interactive: false,
2425
- config
2426
- }));
2427
- const dimensions = dimNames.map((d) => DIMENSION_MAP[d]);
2428
- let siteUrl = String(args.site || config.defaultSite || "");
2429
- if (!siteUrl || args.interactive) {
2430
- const verified = (await client.sites()).filter((s) => s.permissionLevel !== "siteUnverifiedUser");
2431
- if (verified.length === 0) {
2432
- logger.error("No verified sites found");
2433
- process$1.exit(1);
2434
- }
2435
- const selected = await select({
2436
- message: "Select a site",
2437
- options: verified.map((s) => ({
2438
- value: s.siteUrl,
2439
- label: s.siteUrl
2440
- })),
2441
- initialValue: siteUrl || verified[0]?.siteUrl
2442
- });
2443
- if (isCancel(selected)) {
2444
- cancel("Cancelled");
2445
- process$1.exit(0);
2446
- }
2447
- siteUrl = selected;
2448
- }
2449
- const builder = gsc.select(...dimensions).where(between(date, startDate, endDate)).limit(rowLimit);
2450
- if (!args.quiet) logger.info(`Querying ${siteUrl}...`);
2451
- const rows = [];
2452
- for await (const batch of client.query(siteUrl, builder)) {
2453
- rows.push(...batch);
2454
- if (!args.quiet) {
2455
- clearLine();
2456
- process$1.stdout.write(progressBar(rows.length, rowLimit, `${rows.length} rows`));
2457
- }
2458
- }
2459
- if (!args.quiet) {
2460
- clearLine();
2461
- logger.success(`Fetched ${rows.length} rows`);
2462
- }
2463
- const output = {
2464
- siteUrl,
2465
- dimensions: dimNames,
2466
- dateRange: {
2467
- start: startDate,
2468
- end: endDate
2469
1723
  },
2470
- total: rows.length,
2471
- data: rows
2472
- };
2473
- const content = format === "csv" ? exportToCSV(output) : JSON.stringify(output, null, 2);
2474
- if (args.output) {
2475
- await fs.writeFile(String(args.output), content);
2476
- if (!args.quiet) logger.info(`Written to ${args.output}`);
2477
- } else console.log(content);
1724
+ format,
1725
+ path: args.output ? String(args.output) : void 0,
1726
+ quiet: Boolean(args.quiet)
1727
+ });
2478
1728
  }
2479
1729
  });
2480
-
2481
- //#endregion
2482
- //#region src/commands/register.ts
2483
- const registerCommand = defineCommand({
2484
- meta: {
2485
- name: "register",
2486
- description: "Register site(s) for syncing (cloud mode only)"
2487
- },
2488
- args: { site: {
2489
- type: "positional",
2490
- description: "Site URL(s) to register (space-separated for bulk)",
2491
- required: false
2492
- } },
2493
- async run({ args }) {
2494
- const cloud = await getCloudClient();
2495
- if (!cloud) {
2496
- logger.error("Register requires cloud mode. Run gscdump init to set up cloud mode.");
2497
- process$1.exit(1);
2498
- }
2499
- const rawArgs = process$1.argv.slice(3).filter((a) => !a.startsWith("-"));
2500
- const siteUrls = rawArgs.length > 0 ? rawArgs : args.site ? [args.site] : [];
2501
- if (siteUrls.length > 1) {
2502
- logger.info(`Registering ${siteUrls.length} sites...`);
2503
- const result$1 = await cloud.bulkRegister(siteUrls).catch((e) => {
2504
- logger.error(`Bulk registration failed: ${e.message}`);
2505
- process$1.exit(1);
2506
- });
2507
- console.log();
2508
- for (const r of result$1.results) {
2509
- const icon = r.status === "registered" ? "\x1B[32m✓\x1B[0m" : r.status === "already_exists" ? "\x1B[33m~\x1B[0m" : "\x1B[31m✗\x1B[0m";
2510
- const detail = r.status === "registered" ? `ID: ${r.siteId}` : r.status === "already_exists" ? "already registered" : r.error || r.status;
2511
- console.log(` ${icon} ${r.siteUrl} — ${detail}`);
2512
- }
2513
- console.log();
2514
- const s = result$1.summary;
2515
- logger.success(`${s.registered} registered, ${s.alreadyExists} existing, ${s.notFound} not found, ${s.errors} errors`);
2516
- return;
2517
- }
2518
- let siteUrl = siteUrls[0];
2519
- if (!siteUrl) {
2520
- const available = await cloud.availableSites().catch((e) => {
2521
- logger.error(`Failed to fetch available sites: ${e.message}`);
2522
- process$1.exit(1);
2523
- });
2524
- const unregistered = available.filter((s) => !s.registered);
2525
- if (unregistered.length === 0) {
2526
- if (available.length > 0) logger.info("All available sites are already registered");
2527
- else logger.warn("No GSC sites found for this account");
2528
- return;
2529
- }
2530
- const selected = await select({
2531
- message: "Select a site to register for syncing",
2532
- options: unregistered.map((s) => ({
2533
- value: s.siteUrl,
2534
- label: s.siteUrl,
2535
- hint: s.permissionLevel
2536
- }))
2537
- });
2538
- if (isCancel(selected)) {
2539
- cancel("Cancelled");
2540
- process$1.exit(0);
2541
- }
2542
- siteUrl = selected;
1730
+ async function resolveDimensions(args) {
1731
+ if (args.dimensions) return String(args.dimensions).split(",").filter((d) => DIMENSIONS.includes(d));
1732
+ if (args.interactive) {
1733
+ const selected = await multiselect({
1734
+ message: "Select dimensions",
1735
+ options: DIMENSIONS.map((d) => ({
1736
+ value: d,
1737
+ label: d
1738
+ })),
1739
+ initialValues: ["page", "query"]
1740
+ });
1741
+ if (isCancel(selected)) {
1742
+ cancel("Cancelled");
1743
+ process.exit(0);
2543
1744
  }
2544
- logger.info(`Registering ${siteUrl}...`);
2545
- const result = await cloud.registerSite(siteUrl).catch((e) => {
2546
- logger.error(`Registration failed: ${e.message}`);
2547
- process$1.exit(1);
1745
+ return selected;
1746
+ }
1747
+ return ["page", "query"];
1748
+ }
1749
+ async function resolveRange(args) {
1750
+ if (args.start && args.end) return {
1751
+ startDate: String(args.start),
1752
+ endDate: String(args.end)
1753
+ };
1754
+ if (args.interactive) {
1755
+ const startInput = await text({
1756
+ message: "Start date (YYYY-MM-DD)",
1757
+ placeholder: daysAgo(28)
2548
1758
  });
2549
- if (result.existing) logger.info(`Site already registered (${result.status})`);
2550
- else logger.success(`Site registered! Sync queued.`);
2551
- console.log();
2552
- console.log(` Site ID: \x1B[36m${result.siteId}\x1B[0m`);
2553
- console.log(` Status: ${result.status}`);
2554
- console.log();
2555
- logger.info("Run gscdump sync status to check sync progress");
1759
+ if (isCancel(startInput)) {
1760
+ cancel("Cancelled");
1761
+ process.exit(0);
1762
+ }
1763
+ const endInput = await text({
1764
+ message: "End date (YYYY-MM-DD)",
1765
+ placeholder: daysAgo(3)
1766
+ });
1767
+ if (isCancel(endInput)) {
1768
+ cancel("Cancelled");
1769
+ process.exit(0);
1770
+ }
1771
+ return {
1772
+ startDate: String(startInput) || daysAgo(28),
1773
+ endDate: String(endInput) || daysAgo(3)
1774
+ };
2556
1775
  }
2557
- });
2558
-
2559
- //#endregion
2560
- //#region src/commands/sitemaps.ts
2561
- const listCommand = defineCommand({
1776
+ return {
1777
+ startDate: daysAgo(31),
1778
+ endDate: daysAgo(3)
1779
+ };
1780
+ }
1781
+ function buildLocalState(dimNames, startDate, endDate, rowLimit) {
1782
+ const dims = dimNames.map((d) => DIM_COLUMNS[d]).filter((c) => Boolean(c));
1783
+ return gsc.select(...dims).where(between(date, startDate, endDate)).limit(rowLimit).getState();
1784
+ }
1785
+ async function assertRangeCovered(store, siteUrl, table, startDate, endDate) {
1786
+ const wm = (await store.engine.getWatermarks({
1787
+ userId: store.userId,
1788
+ siteId: store.siteIdFor(siteUrl),
1789
+ table
1790
+ }))[0];
1791
+ if (!wm) {
1792
+ logger.error(`No data synced for ${siteUrl} / ${table}. Run \`gscdump sync\` first, or pass --live.`);
1793
+ process.exit(1);
1794
+ }
1795
+ if (endDate > wm.newestDateSynced) {
1796
+ logger.error(`Requested end=${endDate} is newer than last sync (${wm.newestDateSynced}). Run \`gscdump sync\` first, or pass --live.`);
1797
+ process.exit(1);
1798
+ }
1799
+ if (startDate < wm.oldestDateSynced) {
1800
+ logger.error(`Requested start=${startDate} is older than first sync (${wm.oldestDateSynced}). Run \`gscdump sync --start=${startDate}\` first, or pass --live.`);
1801
+ process.exit(1);
1802
+ }
1803
+ }
1804
+ async function runRawSqlMode(opts) {
1805
+ if (!isKnownTable$1(opts.table)) {
1806
+ logger.error(`Unknown table "${opts.table}". Known: ${allTables().join(", ")}`);
1807
+ process.exit(1);
1808
+ }
1809
+ const ctx = await createCommandContext({
1810
+ needsAuth: true,
1811
+ needsStore: true
1812
+ });
1813
+ const siteUrl = await ctx.resolveSite(opts.site);
1814
+ const store = ctx.store;
1815
+ if (!opts.quiet) logger.info(`Running raw SQL over table "${opts.table}" for ${siteUrl}`);
1816
+ const { rows, sql } = await store.runRawSql({
1817
+ sql: opts.sql,
1818
+ siteUrl,
1819
+ table: opts.table
1820
+ }).catch((e) => {
1821
+ logger.error(`SQL failed: ${e.message}`);
1822
+ process.exit(1);
1823
+ });
1824
+ const payload = JSON.stringify({
1825
+ sql,
1826
+ total: rows.length,
1827
+ data: rows
1828
+ }, null, 2);
1829
+ if (opts.output) {
1830
+ await fs.writeFile(opts.output, payload);
1831
+ if (!opts.quiet) logger.info(`Written to ${opts.output}`);
1832
+ } else console.log(payload);
1833
+ }
1834
+ async function writeOutput(opts) {
1835
+ const content = opts.format === "csv" ? exportToCSV(opts.output) : JSON.stringify(opts.output, null, 2);
1836
+ if (opts.path) {
1837
+ await fs.writeFile(opts.path, content);
1838
+ if (!opts.quiet) logger.info(`Written to ${opts.path}`);
1839
+ } else console.log(content);
1840
+ }
1841
+ function isKnownTable$1(name) {
1842
+ return allTables().includes(name);
1843
+ }
1844
+ function requireSite(target) {
1845
+ if (!target) {
1846
+ logger.error("Site URL required (-s)");
1847
+ process.exit(1);
1848
+ }
1849
+ return target;
1850
+ }
1851
+ const sitemapsCommand = defineCommand({
2562
1852
  meta: {
2563
- name: "list",
2564
- description: "List sitemaps for a site"
2565
- },
2566
- args: {
2567
- site: {
2568
- type: "string",
2569
- alias: "s",
2570
- description: "Site URL (e.g., sc-domain:example.com or https://example.com/)"
2571
- },
2572
- json: {
2573
- type: "boolean",
2574
- default: false,
2575
- description: "Output as JSON"
2576
- }
1853
+ name: "sitemaps",
1854
+ description: "Manage sitemaps"
2577
1855
  },
2578
- async run({ args }) {
2579
- const cloud = await getCloudClient();
2580
- if (cloud) {
2581
- const config = await loadConfig();
2582
- const target = args.site || config.defaultSite;
2583
- const me = await cloud.me().catch((e) => {
2584
- logger.error(`Failed to fetch sites: ${e.message}`);
2585
- process$1.exit(1);
2586
- });
2587
- if (me.sites.length === 0) {
2588
- logger.warn("No registered sites. Run gscdump register first.");
2589
- return;
2590
- }
2591
- let site = target ? me.sites.find((s) => s.siteUrl === target || s.siteUrl.includes(target)) : void 0;
2592
- if (!site) if (me.sites.length === 1) site = me.sites[0];
2593
- else {
2594
- const selected = await select({
2595
- message: "Select a site",
2596
- options: me.sites.map((s) => ({
2597
- value: s.siteId,
2598
- label: s.siteUrl
2599
- }))
2600
- });
2601
- if (isCancel(selected)) {
2602
- cancel("Cancelled");
2603
- process$1.exit(0);
1856
+ subCommands: {
1857
+ list: defineCommand({
1858
+ meta: {
1859
+ name: "list",
1860
+ description: "List sitemaps for a site"
1861
+ },
1862
+ args: {
1863
+ site: {
1864
+ type: "string",
1865
+ alias: "s",
1866
+ description: "Site URL (e.g., sc-domain:example.com or https://example.com/)"
1867
+ },
1868
+ json: {
1869
+ type: "boolean",
1870
+ default: false,
1871
+ description: "Output as JSON"
1872
+ }
1873
+ },
1874
+ async run({ args }) {
1875
+ const config = await loadConfig();
1876
+ const siteUrl = requireSite(args.site || config.defaultSite);
1877
+ const sitemaps = (await (await createCommandContext({ needsAuth: true })).client.sitemaps.list(siteUrl).catch((e) => {
1878
+ logger.error(`Failed to fetch sitemaps: ${e.message}`);
1879
+ process.exit(1);
1880
+ })).map((sm) => ({
1881
+ path: sm.path,
1882
+ type: sm.type || void 0,
1883
+ isPending: sm.isPending || false,
1884
+ errors: Number(sm.errors) || 0,
1885
+ warnings: Number(sm.warnings) || 0,
1886
+ lastDownloaded: sm.lastDownloaded || null
1887
+ }));
1888
+ if (args.json) {
1889
+ console.log(JSON.stringify(sitemaps, null, 2));
1890
+ return;
1891
+ }
1892
+ if (sitemaps.length === 0) {
1893
+ logger.warn("No sitemaps found");
1894
+ return;
1895
+ }
1896
+ logger.success(`Found ${sitemaps.length} sitemaps:`);
1897
+ console.log();
1898
+ for (const sm of sitemaps) {
1899
+ const pending = sm.isPending ? " \x1B[33m(pending)\x1B[0m" : "";
1900
+ const errors = sm.errors ? ` \x1B[31m${sm.errors} errors\x1B[0m` : "";
1901
+ const warnings = sm.warnings ? ` \x1B[33m${sm.warnings} warnings\x1B[0m` : "";
1902
+ console.log(` ${sm.path}${pending}${errors}${warnings}`);
2604
1903
  }
2605
- site = me.sites.find((s) => s.siteId === selected);
2606
- }
2607
- const data = await cloud.sitemaps(site.siteId).catch((e) => {
2608
- logger.error(`Failed to fetch sitemaps: ${e.message}`);
2609
- process$1.exit(1);
2610
- });
2611
- if (args.json) {
2612
- console.log(JSON.stringify(data, null, 2));
2613
- return;
2614
1904
  }
2615
- if (data.sitemaps.length === 0) {
2616
- logger.warn("No sitemaps found");
2617
- return;
1905
+ }),
1906
+ get: defineCommand({
1907
+ meta: {
1908
+ name: "get",
1909
+ description: "Get details for a specific sitemap"
1910
+ },
1911
+ args: {
1912
+ site: {
1913
+ type: "string",
1914
+ alias: "s",
1915
+ required: true,
1916
+ description: "Site URL"
1917
+ },
1918
+ url: {
1919
+ type: "positional",
1920
+ required: true,
1921
+ description: "Sitemap URL"
1922
+ },
1923
+ json: {
1924
+ type: "boolean",
1925
+ default: false,
1926
+ description: "Output as JSON"
1927
+ }
1928
+ },
1929
+ async run({ args }) {
1930
+ const client = (await createCommandContext({ needsAuth: true })).client;
1931
+ const sitemap = await fetchSitemap(client, args.site, args.url).catch(gscErrorHandler);
1932
+ if (args.json) {
1933
+ console.log(JSON.stringify(sitemap, null, 2));
1934
+ return;
1935
+ }
1936
+ console.log();
1937
+ console.log(` \x1B[1mPath:\x1B[0m ${sitemap.path}`);
1938
+ console.log(` \x1B[1mType:\x1B[0m ${sitemap.type || "sitemap"}`);
1939
+ console.log(` \x1B[1mLast Submitted:\x1B[0m ${sitemap.lastSubmitted || "N/A"}`);
1940
+ console.log(` \x1B[1mLast Downloaded:\x1B[0m ${sitemap.lastDownloaded || "N/A"}`);
1941
+ console.log(` \x1B[1mPending:\x1B[0m ${sitemap.isPending ? "Yes" : "No"}`);
1942
+ console.log(` \x1B[1mErrors:\x1B[0m ${sitemap.errors || 0}`);
1943
+ console.log(` \x1B[1mWarnings:\x1B[0m ${sitemap.warnings || 0}`);
1944
+ if (sitemap.contents?.length) {
1945
+ console.log();
1946
+ console.log(" \x1B[1mContents:\x1B[0m");
1947
+ for (const c of sitemap.contents) console.log(` ${c.type}: ${c.submitted} submitted, ${c.indexed} indexed`);
1948
+ }
2618
1949
  }
2619
- logger.success(`${data.sitemaps.length} sitemaps:`);
2620
- console.log();
2621
- for (const sm of data.sitemaps) {
2622
- const pending = sm.isPending ? " \x1B[33m(pending)\x1B[0m" : "";
2623
- const errors = sm.errors ? ` \x1B[31m${sm.errors} errors\x1B[0m` : "";
2624
- const warnings = sm.warnings ? ` \x1B[33m${sm.warnings} warnings\x1B[0m` : "";
2625
- const urls = sm.urlCount ? ` \x1B[36m${sm.urlCount.toLocaleString()} URLs\x1B[0m` : "";
2626
- console.log(` ${sm.path}${urls}${pending}${errors}${warnings}`);
1950
+ }),
1951
+ submit: defineCommand({
1952
+ meta: {
1953
+ name: "submit",
1954
+ description: "Submit a sitemap to GSC"
1955
+ },
1956
+ args: {
1957
+ site: {
1958
+ type: "string",
1959
+ alias: "s",
1960
+ required: true,
1961
+ description: "Site URL"
1962
+ },
1963
+ url: {
1964
+ type: "positional",
1965
+ required: true,
1966
+ description: "Sitemap URL to submit"
1967
+ }
1968
+ },
1969
+ async run({ args }) {
1970
+ await (await createCommandContext({ needsAuth: true })).client.sitemaps.submit(args.site, args.url).catch((e) => {
1971
+ logger.error(`Submit failed: ${e.message}`);
1972
+ process.exit(1);
1973
+ });
1974
+ logger.success(`Submitted sitemap: ${args.url}`);
2627
1975
  }
2628
- if (data.history.length > 0) {
2629
- console.log();
2630
- console.log(" \x1B[1mRecent History\x1B[0m");
2631
- for (const h of data.history.slice(0, 7)) {
2632
- const errStr = h.errors > 0 ? ` \x1B[31m${h.errors} err\x1B[0m` : "";
2633
- console.log(` ${h.date}: ${h.urlCount.toLocaleString()} URLs${errStr}`);
1976
+ }),
1977
+ delete: defineCommand({
1978
+ meta: {
1979
+ name: "delete",
1980
+ description: "Delete a sitemap from GSC"
1981
+ },
1982
+ args: {
1983
+ site: {
1984
+ type: "string",
1985
+ alias: "s",
1986
+ required: true,
1987
+ description: "Site URL"
1988
+ },
1989
+ url: {
1990
+ type: "positional",
1991
+ required: true,
1992
+ description: "Sitemap URL to delete"
2634
1993
  }
1994
+ },
1995
+ async run({ args }) {
1996
+ await (await createCommandContext({ needsAuth: true })).client.sitemaps.delete(args.site, args.url).catch((e) => {
1997
+ logger.error(`Delete failed: ${e.message}`);
1998
+ process.exit(1);
1999
+ });
2000
+ logger.success(`Deleted sitemap: ${args.url}`);
2635
2001
  }
2636
- return;
2637
- }
2638
- if (!args.site) {
2639
- logger.error("Site URL required (-s)");
2640
- process$1.exit(1);
2641
- }
2642
- const sitemaps = await fetchSitemaps(googleSearchConsole(await getAuth({ interactive: false })), args.site).catch(gscErrorHandler);
2002
+ })
2003
+ }
2004
+ });
2005
+ const sitesCommand = defineCommand({
2006
+ meta: {
2007
+ name: "sites",
2008
+ description: "List available GSC sites"
2009
+ },
2010
+ args: { json: {
2011
+ type: "boolean",
2012
+ default: false,
2013
+ description: "Output as JSON for scripting"
2014
+ } },
2015
+ async run({ args }) {
2016
+ const sites = await (await createCommandContext({ needsAuth: true })).loadSites();
2643
2017
  if (args.json) {
2644
- console.log(JSON.stringify(sitemaps, null, 2));
2018
+ console.log(JSON.stringify(sites, null, 2));
2645
2019
  return;
2646
2020
  }
2647
- if (sitemaps.length === 0) {
2648
- logger.warn("No sitemaps found");
2021
+ if (sites.length === 0) {
2022
+ logger.warn("No verified sites found");
2649
2023
  return;
2650
2024
  }
2651
- logger.success(`Found ${sitemaps.length} sitemaps:`);
2025
+ logger.success(`Found ${sites.length} sites:`);
2652
2026
  console.log();
2653
- for (const sm of sitemaps) {
2654
- const pending = sm.isPending ? " \x1B[33m(pending)\x1B[0m" : "";
2655
- const errors = sm.errors ? ` \x1B[31m${sm.errors} errors\x1B[0m` : "";
2656
- const warnings = sm.warnings ? ` \x1B[33m${sm.warnings} warnings\x1B[0m` : "";
2657
- console.log(` ${sm.path}${pending}${errors}${warnings}`);
2027
+ for (const site of sites) {
2028
+ const perm = site.permissionLevel === "siteOwner" ? "\x1B[32m" : "\x1B[90m";
2029
+ console.log(` ${site.siteUrl} ${perm}(${site.permissionLevel})\x1B[0m`);
2658
2030
  }
2659
2031
  }
2660
2032
  });
2661
- const getCommand = defineCommand({
2033
+ const compactCommand = defineCommand({
2662
2034
  meta: {
2663
- name: "get",
2664
- description: "Get details for a specific sitemap"
2035
+ name: "compact",
2036
+ description: "Run tiered compaction (raw→d7 at 7d, d7→d30 at 30d, d30→d90 at 90d)"
2665
2037
  },
2666
2038
  args: {
2667
- site: {
2039
+ "site": {
2668
2040
  type: "string",
2669
2041
  alias: "s",
2670
- required: true,
2671
- description: "Site URL"
2042
+ description: "Restrict to a single site (default: all sites with local data)"
2672
2043
  },
2673
- url: {
2674
- type: "positional",
2675
- required: true,
2676
- description: "Sitemap URL"
2044
+ "raw-days": {
2045
+ type: "string",
2046
+ description: "Override raw→d7 age threshold in days (default: 7)"
2677
2047
  },
2678
- json: {
2048
+ "d7-days": {
2049
+ type: "string",
2050
+ description: "Override d7→d30 age threshold in days (default: 30)"
2051
+ },
2052
+ "d30-days": {
2053
+ type: "string",
2054
+ description: "Override d30→d90 age threshold in days (default: 90)"
2055
+ },
2056
+ "quiet": {
2679
2057
  type: "boolean",
2058
+ alias: "q",
2680
2059
  default: false,
2681
- description: "Output as JSON"
2060
+ description: "Suppress progress output"
2682
2061
  }
2683
2062
  },
2684
2063
  async run({ args }) {
2685
- const sitemap = await fetchSitemap(googleSearchConsole(await getAuth({ interactive: false })), args.site, args.url).catch(gscErrorHandler);
2686
- if (args.json) {
2687
- console.log(JSON.stringify(sitemap, null, 2));
2688
- return;
2689
- }
2690
- console.log();
2691
- console.log(` \x1B[1mPath:\x1B[0m ${sitemap.path}`);
2692
- console.log(` \x1B[1mType:\x1B[0m ${sitemap.type || "sitemap"}`);
2693
- console.log(` \x1B[1mLast Submitted:\x1B[0m ${sitemap.lastSubmitted || "N/A"}`);
2694
- console.log(` \x1B[1mLast Downloaded:\x1B[0m ${sitemap.lastDownloaded || "N/A"}`);
2695
- console.log(` \x1B[1mPending:\x1B[0m ${sitemap.isPending ? "Yes" : "No"}`);
2696
- console.log(` \x1B[1mErrors:\x1B[0m ${sitemap.errors || 0}`);
2697
- console.log(` \x1B[1mWarnings:\x1B[0m ${sitemap.warnings || 0}`);
2698
- if (sitemap.contents?.length) {
2699
- console.log();
2700
- console.log(" \x1B[1mContents:\x1B[0m");
2701
- for (const c of sitemap.contents) console.log(` ${c.type}: ${c.submitted} submitted, ${c.indexed} indexed`);
2064
+ const store = (await createCommandContext({ needsStore: true })).store;
2065
+ const siteId = args.site ? store.siteIdFor(String(args.site)) : void 0;
2066
+ const quiet = Boolean(args.quiet);
2067
+ const thresholds = {};
2068
+ if (args["raw-days"]) thresholds.raw = Number(args["raw-days"]);
2069
+ if (args["d7-days"]) thresholds.d7 = Number(args["d7-days"]);
2070
+ if (args["d30-days"]) thresholds.d30 = Number(args["d30-days"]);
2071
+ for (const table of allTables()) {
2072
+ const entries = await store.engine.listLive({
2073
+ userId: store.userId,
2074
+ siteId,
2075
+ table
2076
+ });
2077
+ const siteIds = new Set(entries.map((e) => e.siteId));
2078
+ for (const targetSite of siteIds) {
2079
+ if (!quiet) logger.info(`Compacting ${table} [${targetSite ?? "-"}] (raw→d7→d30→d90)`);
2080
+ await store.engine.compactTiered({
2081
+ userId: store.userId,
2082
+ siteId: targetSite,
2083
+ table
2084
+ }, thresholds);
2085
+ }
2702
2086
  }
2087
+ if (!quiet) logger.success(`compact: done`);
2703
2088
  }
2704
2089
  });
2705
- const submitCommand = defineCommand({
2090
+ async function exportToDuckDB(opts) {
2091
+ const outPath = path.resolve(opts.outPath);
2092
+ if (opts.force) await rm(outPath, { force: true });
2093
+ const instance = await DuckDBInstance.create(outPath);
2094
+ const conn = await instance.connect();
2095
+ const tables = [];
2096
+ try {
2097
+ for (const table of allTables()) {
2098
+ const entries = await opts.engine.listLive({
2099
+ userId: opts.userId,
2100
+ siteId: opts.siteId,
2101
+ table
2102
+ });
2103
+ if (entries.length === 0) continue;
2104
+ const fileList = entries.map((e) => path.join(opts.dataDir, e.objectKey)).map((p) => `'${sqlEscape(p)}'`).join(", ");
2105
+ await conn.run(`CREATE OR REPLACE TABLE ${table} AS SELECT * FROM read_parquet([${fileList}], union_by_name=true)`);
2106
+ const rows = (await conn.runAndReadAll(`SELECT count(*)::BIGINT AS n FROM ${table}`)).getRowObjects();
2107
+ const rowCount = Number(rows[0]?.n ?? 0);
2108
+ tables.push({
2109
+ table,
2110
+ files: entries.length,
2111
+ rows: rowCount
2112
+ });
2113
+ }
2114
+ } finally {
2115
+ conn.closeSync();
2116
+ instance.closeSync();
2117
+ }
2118
+ return {
2119
+ outPath,
2120
+ tables,
2121
+ totalRows: tables.reduce((acc, t) => acc + t.rows, 0)
2122
+ };
2123
+ }
2124
+ const exportCommand = defineCommand({
2706
2125
  meta: {
2707
- name: "submit",
2708
- description: "Submit a sitemap to GSC"
2126
+ name: "export",
2127
+ description: "Pack live Parquet partitions into a single .duckdb file for portable distribution (browser attach, CDN serving, etc.)"
2709
2128
  },
2710
2129
  args: {
2711
- site: {
2130
+ out: {
2712
2131
  type: "string",
2713
- alias: "s",
2714
2132
  required: true,
2715
- description: "Site URL"
2133
+ description: "Output path for the .duckdb file"
2716
2134
  },
2717
- url: {
2718
- type: "positional",
2719
- required: true,
2720
- description: "Sitemap URL to submit"
2135
+ site: {
2136
+ type: "string",
2137
+ description: "Limit export to a single site URL (omit to include all)"
2138
+ },
2139
+ force: {
2140
+ type: "boolean",
2141
+ default: false,
2142
+ description: "Overwrite the output file if it already exists"
2721
2143
  }
2722
2144
  },
2723
2145
  async run({ args }) {
2724
- const cloud = await getCloudClient();
2725
- if (cloud) {
2726
- const config = await loadConfig();
2727
- const me = await cloud.me().catch((e) => {
2728
- logger.error(`Failed to fetch sites: ${e.message}`);
2729
- process$1.exit(1);
2730
- });
2731
- const target = args.site || config.defaultSite;
2732
- const site = me.sites.find((s) => s.siteUrl === target || s.siteUrl.includes(target));
2733
- if (!site) {
2734
- logger.error(`Site not found: ${target}`);
2735
- process$1.exit(1);
2736
- }
2737
- await cloud.sitemapAction(site.siteId, {
2738
- action: "submit",
2739
- sitemapUrl: args.url
2740
- }).catch((e) => {
2741
- logger.error(`Submit failed: ${e.message}`);
2742
- process$1.exit(1);
2743
- });
2744
- logger.success(`Submitted sitemap: ${args.url}`);
2146
+ const store = (await createCommandContext({ needsStore: true })).store;
2147
+ const siteId = args.site ? store.siteIdFor(args.site) : void 0;
2148
+ const result = await exportToDuckDB({
2149
+ engine: store.engine,
2150
+ dataDir: store.dataDir,
2151
+ userId: store.userId,
2152
+ siteId,
2153
+ outPath: args.out,
2154
+ force: args.force
2155
+ });
2156
+ if (result.tables.length === 0) {
2157
+ console.log(`\n No data to export. Run \`gscdump sync\` first.`);
2745
2158
  return;
2746
2159
  }
2747
- await submitSitemap(googleSearchConsole(await getAuth({ interactive: false })), args.site, args.url).catch(gscErrorHandler);
2748
- logger.success(`Submitted sitemap: ${args.url}`);
2160
+ for (const t of result.tables) console.log(` ${t.table.padEnd(15)} ${String(t.files).padStart(4)} parquet → ${t.table} (${t.rows.toLocaleString()} rows)`);
2161
+ console.log(`\n Exported ${result.tables.length} table(s), ${result.totalRows.toLocaleString()} rows → ${result.outPath}`);
2162
+ console.log(`\n Attach from DuckDB: \x1B[36mATTACH '${result.outPath}' AS gsc (READ_ONLY); SELECT * FROM gsc.pages LIMIT 10;\x1B[0m`);
2163
+ console.log(` Attach in a browser: use DuckDB-WASM registerFileBuffer + \x1B[36mATTACH 'gsc.duckdb' AS gsc (READ_ONLY)\x1B[0m`);
2749
2164
  }
2750
2165
  });
2751
- const deleteCommand = defineCommand({
2166
+ const DEFAULT_GRACE_HOURS = 24;
2167
+ const gcCommand = defineCommand({
2752
2168
  meta: {
2753
- name: "delete",
2754
- description: "Delete a sitemap from GSC"
2169
+ name: "gc",
2170
+ description: "Delete orphaned object-store files not referenced by any manifest entry"
2755
2171
  },
2756
2172
  args: {
2757
- site: {
2173
+ "grace-hours": {
2174
+ type: "string",
2175
+ default: String(DEFAULT_GRACE_HOURS),
2176
+ description: `Spare orphans younger than this (default: ${DEFAULT_GRACE_HOURS}h)`
2177
+ },
2178
+ "site": {
2758
2179
  type: "string",
2759
2180
  alias: "s",
2760
- required: true,
2761
- description: "Site URL"
2181
+ description: "Restrict to a single site (default: all sites)"
2762
2182
  },
2763
- url: {
2764
- type: "positional",
2765
- required: true,
2766
- description: "Sitemap URL to delete"
2183
+ "quiet": {
2184
+ type: "boolean",
2185
+ alias: "q",
2186
+ default: false,
2187
+ description: "Suppress progress output"
2767
2188
  }
2768
2189
  },
2769
2190
  async run({ args }) {
2770
- const cloud = await getCloudClient();
2771
- if (cloud) {
2772
- const config = await loadConfig();
2773
- const me = await cloud.me().catch((e) => {
2774
- logger.error(`Failed to fetch sites: ${e.message}`);
2775
- process$1.exit(1);
2776
- });
2777
- const target = args.site || config.defaultSite;
2778
- const site = me.sites.find((s) => s.siteUrl === target || s.siteUrl.includes(target));
2779
- if (!site) {
2780
- logger.error(`Site not found: ${target}`);
2781
- process$1.exit(1);
2782
- }
2783
- await cloud.sitemapAction(site.siteId, {
2784
- action: "delete",
2785
- sitemapUrl: args.url
2786
- }).catch((e) => {
2787
- logger.error(`Delete failed: ${e.message}`);
2788
- process$1.exit(1);
2789
- });
2790
- logger.success(`Deleted sitemap: ${args.url}`);
2791
- return;
2792
- }
2793
- await deleteSitemap(googleSearchConsole(await getAuth({ interactive: false })), args.site, args.url).catch(gscErrorHandler);
2794
- logger.success(`Deleted sitemap: ${args.url}`);
2191
+ const store = (await createCommandContext({ needsStore: true })).store;
2192
+ const siteId = args.site ? store.siteIdFor(String(args.site)) : void 0;
2193
+ const quiet = Boolean(args.quiet);
2194
+ const graceMs = Number(args["grace-hours"]) * 36e5;
2195
+ const result = await store.engine.gcOrphans({
2196
+ userId: store.userId,
2197
+ siteId
2198
+ }, graceMs);
2199
+ if (!quiet) logger.success(`gc: deleted ${result.deleted} orphan file(s)`);
2795
2200
  }
2796
2201
  });
2797
- const refreshCommand = defineCommand({
2202
+ const rollupsCommand = defineCommand({
2798
2203
  meta: {
2799
- name: "refresh",
2800
- description: "Refresh sitemap data from GSC (cloud mode)"
2204
+ name: "rollups",
2205
+ description: "Manage post-sync rollups"
2801
2206
  },
2802
- args: { site: {
2803
- type: "string",
2804
- alias: "s",
2805
- description: "Site URL"
2806
- } },
2807
- async run({ args }) {
2808
- const cloud = await getCloudClient();
2809
- if (!cloud) {
2810
- logger.error("Sitemap refresh requires cloud mode. Run gscdump init to set up.");
2811
- process$1.exit(1);
2812
- }
2813
- const config = await loadConfig();
2814
- const me = await cloud.me().catch((e) => {
2815
- logger.error(`Failed to fetch sites: ${e.message}`);
2816
- process$1.exit(1);
2817
- });
2818
- const target = args.site || config.defaultSite;
2819
- let site = target ? me.sites.find((s) => s.siteUrl === target || s.siteUrl.includes(target)) : me.sites.length === 1 ? me.sites[0] : void 0;
2820
- if (!site) {
2821
- const selected = await select({
2822
- message: "Select a site",
2823
- options: me.sites.map((s) => ({
2824
- value: s.siteId,
2825
- label: s.siteUrl
2826
- }))
2827
- });
2828
- if (isCancel(selected)) {
2829
- cancel("Cancelled");
2830
- process$1.exit(0);
2207
+ subCommands: { rebuild: defineCommand({
2208
+ meta: {
2209
+ name: "rebuild",
2210
+ description: "Rebuild post-sync rollups (daily totals, weekly totals, top-N tables) for a site"
2211
+ },
2212
+ args: {
2213
+ site: {
2214
+ type: "string",
2215
+ alias: "s",
2216
+ description: "Restrict to a single site (default: all sites with local data)"
2217
+ },
2218
+ quiet: {
2219
+ type: "boolean",
2220
+ alias: "q",
2221
+ default: false,
2222
+ description: "Suppress progress output"
2831
2223
  }
2832
- site = me.sites.find((s) => s.siteId === selected);
2833
- }
2834
- const result = await cloud.sitemapAction(site.siteId, { action: "refresh" }).catch((e) => {
2835
- logger.error(`Refresh failed: ${e.message}`);
2836
- process$1.exit(1);
2837
- });
2838
- logger.success(`Refreshed sitemaps (${result.sitemapCount} found)`);
2839
- }
2840
- });
2841
- const sitemapsCommand = defineCommand({
2842
- meta: {
2843
- name: "sitemaps",
2844
- description: "Manage sitemaps"
2845
- },
2846
- subCommands: {
2847
- list: listCommand,
2848
- get: getCommand,
2849
- submit: submitCommand,
2850
- delete: deleteCommand,
2851
- refresh: refreshCommand
2852
- }
2853
- });
2854
-
2855
- //#endregion
2856
- //#region src/commands/sites.ts
2857
- const sitesCommand = defineCommand({
2858
- meta: {
2859
- name: "sites",
2860
- description: "List available GSC sites"
2861
- },
2862
- args: { json: {
2863
- type: "boolean",
2864
- default: false,
2865
- description: "Output as JSON for scripting"
2866
- } },
2867
- async run({ args }) {
2868
- const cloud = await getCloudClient();
2869
- if (cloud) {
2870
- const me = await cloud.me().catch((e) => {
2871
- logger.error(`Failed to fetch sites: ${e.message}`);
2872
- process.exit(1);
2873
- });
2874
- if (args.json) {
2875
- console.log(JSON.stringify(me.sites, null, 2));
2876
- return;
2224
+ },
2225
+ async run({ args }) {
2226
+ const store = (await createCommandContext({ needsStore: true })).store;
2227
+ const explicitSiteId = args.site ? store.siteIdFor(String(args.site)) : void 0;
2228
+ const quiet = Boolean(args.quiet);
2229
+ const allSiteIds = /* @__PURE__ */ new Set();
2230
+ if (explicitSiteId) allSiteIds.add(explicitSiteId);
2231
+ else for (const table of allTables()) {
2232
+ const entries = await store.engine.listLive({
2233
+ userId: store.userId,
2234
+ table
2235
+ });
2236
+ for (const e of entries) if (e.siteId) allSiteIds.add(e.siteId);
2877
2237
  }
2878
- if (me.sites.length === 0) {
2879
- logger.warn("No registered sites. Run gscdump register to add a site.");
2238
+ if (allSiteIds.size === 0) {
2239
+ logger.warn("No sites with local data. Run `gscdump sync` first.");
2880
2240
  return;
2881
2241
  }
2882
- logger.success(`${me.sites.length} registered sites:`);
2883
- console.log();
2884
- for (const site of me.sites) {
2885
- const statusColor = site.syncStatus === "synced" ? "\x1B[32m" : site.syncStatus === "syncing" ? "\x1B[33m" : site.syncStatus === "error" ? "\x1B[31m" : "\x1B[90m";
2886
- console.log(` ${site.siteUrl} ${statusColor}(${site.syncStatus || "pending"})\x1B[0m`);
2887
- if (site.syncProgress.percent > 0 && site.syncProgress.percent < 100) console.log(` ${progressBar(site.syncProgress.percent, 100, `${site.syncProgress.percent}%`, 20)}`);
2888
- if (site.oldestDateSynced && site.newestDateSynced) console.log(` \x1B[90m${site.oldestDateSynced} → ${site.newestDateSynced}\x1B[0m`);
2242
+ let totalBytes = 0;
2243
+ for (const siteId of allSiteIds) {
2244
+ if (!quiet) logger.info(`Rebuilding rollups for [${siteId}] (${DEFAULT_ROLLUPS.length} rollups)`);
2245
+ const results = await rebuildRollups({
2246
+ engine: store.engine,
2247
+ dataSource: store.dataSource,
2248
+ ctx: {
2249
+ userId: store.userId,
2250
+ siteId
2251
+ },
2252
+ defs: DEFAULT_ROLLUPS
2253
+ });
2254
+ for (const r of results) {
2255
+ totalBytes += r.bytes;
2256
+ if (!quiet) console.log(` ${r.id.padEnd(20)} ${(r.bytes / 1024).toFixed(1).padStart(8)} KB ${r.objectKey}`);
2257
+ }
2889
2258
  }
2890
- return;
2891
- }
2892
- const sites = (await fetchSites(googleSearchConsole(await getAuth({ interactive: false }))).catch(gscErrorHandler)).filter((site) => site.siteUrl && site.permissionLevel !== "siteUnverifiedUser").map((site) => ({
2893
- url: site.siteUrl,
2894
- permission: site.permissionLevel || "unknown"
2895
- }));
2896
- if (args.json) {
2897
- console.log(JSON.stringify(sites, null, 2));
2898
- return;
2259
+ if (!quiet) logger.success(`Rebuilt rollups across ${allSiteIds.size} site(s) — total ${(totalBytes / 1024).toFixed(1)} KB`);
2899
2260
  }
2900
- if (sites.length === 0) {
2901
- logger.warn("No verified sites found");
2902
- return;
2903
- }
2904
- logger.success(`Found ${sites.length} sites:`);
2905
- console.log();
2906
- for (const site of sites) {
2907
- const perm = site.permission === "siteOwner" ? "\x1B[32m" : "\x1B[90m";
2908
- console.log(` ${site.url} ${perm}(${site.permission})\x1B[0m`);
2909
- }
2910
- }
2261
+ }) }
2911
2262
  });
2912
-
2913
- //#endregion
2914
- //#region src/commands/sync.ts
2915
- async function resolveCloudSite(cloud, target) {
2916
- const me = await cloud.me().catch((e) => {
2917
- logger.error(`Failed to fetch sites: ${e.message}`);
2918
- process$1.exit(1);
2919
- });
2920
- if (me.sites.length === 0) {
2921
- logger.error("No registered sites. Run gscdump register first.");
2922
- process$1.exit(1);
2923
- }
2924
- let site = target ? me.sites.find((s) => s.siteUrl === target || s.siteUrl.includes(target)) : void 0;
2925
- if (!site) if (me.sites.length === 1) site = me.sites[0];
2926
- else {
2927
- const selected = await select({
2928
- message: "Select a site",
2929
- options: me.sites.map((s) => ({
2930
- value: s.siteId,
2931
- label: s.siteUrl,
2932
- hint: s.syncStatus || "unknown"
2933
- }))
2934
- });
2935
- if (isCancel(selected)) {
2936
- cancel("Cancelled");
2937
- process$1.exit(0);
2938
- }
2939
- site = me.sites.find((s) => s.siteId === selected);
2940
- }
2941
- return {
2942
- siteId: site.siteId,
2943
- siteUrl: site.siteUrl
2944
- };
2945
- }
2946
- function requireCloud(cloud) {
2947
- if (!cloud) {
2948
- logger.error("Sync requires cloud mode. Run gscdump init to set up cloud mode.");
2949
- process$1.exit(1);
2950
- }
2951
- }
2952
- const statusCommand = defineCommand({
2263
+ const statsCommand = defineCommand({
2953
2264
  meta: {
2954
- name: "status",
2955
- description: "Check sync status for a site"
2265
+ name: "stats",
2266
+ description: "Show row/byte counts per table and on-disk footprint"
2956
2267
  },
2957
2268
  args: {
2958
- site: {
2959
- type: "string",
2960
- alias: "s",
2961
- description: "Site URL"
2962
- },
2963
2269
  json: {
2964
2270
  type: "boolean",
2965
2271
  default: false,
2966
2272
  description: "Output as JSON"
2273
+ },
2274
+ site: {
2275
+ type: "string",
2276
+ description: "Limit to one site URL (sc-domain:example.com, https://example.com/, ...)"
2967
2277
  }
2968
2278
  },
2969
2279
  async run({ args }) {
2970
- const cloud = await getCloudClient();
2971
- requireCloud(cloud);
2972
- const config = await loadConfig();
2973
- const { siteId } = await resolveCloudSite(cloud, args.site || config.defaultSite);
2974
- const status = await cloud.syncStatus(siteId).catch((e) => {
2975
- logger.error(`Failed to fetch sync status: ${e.message}`);
2976
- process$1.exit(1);
2280
+ const store = (await createCommandContext({ needsStore: true })).store;
2281
+ const siteId = args.site ? store.siteIdFor(args.site) : void 0;
2282
+ const perTable = await Promise.all(allTables().map(async (table) => {
2283
+ const all = await store.engine.listAll({
2284
+ userId: store.userId,
2285
+ siteId,
2286
+ table
2287
+ });
2288
+ return {
2289
+ table,
2290
+ live: all.filter((e) => e.retiredAt === void 0),
2291
+ retired: all.filter((e) => e.retiredAt !== void 0)
2292
+ };
2293
+ }));
2294
+ const watermarks = await store.engine.getWatermarks({
2295
+ userId: store.userId,
2296
+ siteId
2977
2297
  });
2298
+ const disk = await filesystemStats(store.dataDir).catch(() => ({
2299
+ files: 0,
2300
+ bytes: 0
2301
+ }));
2978
2302
  if (args.json) {
2979
- console.log(JSON.stringify(status, null, 2));
2303
+ const payload = {
2304
+ dataDir: store.dataDir,
2305
+ disk,
2306
+ tables: perTable.map(({ table, live, retired }) => ({
2307
+ table,
2308
+ liveFiles: live.length,
2309
+ liveRows: sumRows(live),
2310
+ liveBytes: sumBytes(live),
2311
+ retiredFiles: retired.length,
2312
+ retiredBytes: sumBytes(retired),
2313
+ watermarks: watermarks.filter((w) => w.table === table).map((w) => ({
2314
+ siteId: w.siteId ?? null,
2315
+ newestDateSynced: w.newestDateSynced,
2316
+ oldestDateSynced: w.oldestDateSynced,
2317
+ lastSyncAt: w.lastSyncAt
2318
+ }))
2319
+ }))
2320
+ };
2321
+ console.log(JSON.stringify(payload, null, 2));
2980
2322
  return;
2981
2323
  }
2982
2324
  console.log();
2983
- console.log(` \x1B[1m${status.siteUrl}\x1B[0m`);
2325
+ console.log(` \x1B[1m${store.dataDir}\x1B[0m`);
2326
+ console.log(` \x1B[90mDisk: ${disk.files} file(s), ${formatBytes(disk.bytes)}\x1B[0m`);
2984
2327
  console.log();
2985
- const statusColor = status.syncStatus === "synced" ? "\x1B[32m" : status.isSyncing ? "\x1B[33m" : status.syncStatus === "error" ? "\x1B[31m" : "\x1B[90m";
2986
- console.log(` Status: ${statusColor}${status.syncStatus}\x1B[0m`);
2987
- console.log(` Progress: ${progressBar(status.progress, 100, `${status.progress}%`)}`);
2988
- console.log(` Days: \x1B[36m${status.daysSynced}\x1B[0m / ${status.daysAvailable} synced`);
2989
- if (status.oldestDateSynced) console.log(` Range: ${status.oldestDateSynced} \x1B[90m→\x1B[0m ${status.newestDateSynced}`);
2328
+ const totalRows = perTable.reduce((acc, t) => acc + sumRows(t.live), 0);
2329
+ const totalBytes = perTable.reduce((acc, t) => acc + sumBytes(t.live), 0);
2330
+ const totalFiles = perTable.reduce((acc, t) => acc + t.live.length, 0);
2331
+ const totalRetiredFiles = perTable.reduce((acc, t) => acc + t.retired.length, 0);
2332
+ const totalRetiredBytes = perTable.reduce((acc, t) => acc + sumBytes(t.retired), 0);
2333
+ for (const { table, live, retired } of perTable) {
2334
+ const rows = sumRows(live).toLocaleString();
2335
+ const bytes = formatBytes(sumBytes(live));
2336
+ const retiredSuffix = retired.length > 0 ? ` \x1B[90m(+${retired.length} retired, ${formatBytes(sumBytes(retired))})\x1B[0m` : "";
2337
+ console.log(` ${table.padEnd(15)} \x1B[36m${String(live.length).padStart(4)}\x1B[0m files, ${rows.padStart(10)} rows, ${bytes}${retiredSuffix}`);
2338
+ }
2990
2339
  console.log();
2991
- console.log(" \x1B[1mJobs\x1B[0m");
2992
- console.log(` Queued: ${status.jobs.queued}`);
2993
- console.log(` Processing: ${status.jobs.processing}`);
2994
- console.log(` Completed: \x1B[32m${status.jobs.completed}\x1B[0m`);
2995
- if (status.jobs.failed > 0) console.log(` Failed: \x1B[31m${status.jobs.failed}\x1B[0m`);
2996
- const tableNames = Object.keys(status.tables);
2997
- if (tableNames.length > 0) {
2340
+ console.log(` \x1B[1mTotal:\x1B[0m ${totalFiles} files, ${totalRows.toLocaleString()} rows, ${formatBytes(totalBytes)} live`);
2341
+ if (totalRetiredFiles > 0) console.log(` \x1B[90mRetired: ${totalRetiredFiles} files, ${formatBytes(totalRetiredBytes)} awaiting GC\x1B[0m`);
2342
+ if (watermarks.length > 0) {
2998
2343
  console.log();
2999
- console.log(" \x1B[1mTables\x1B[0m");
3000
- for (const name of tableNames) {
3001
- const t = status.tables[name];
3002
- const rows = t.totalRows > 0 ? ` (${t.totalRows.toLocaleString()} rows)` : "";
3003
- console.log(` ${name}: \x1B[32m${t.completed}\x1B[0m done, ${t.queued} queued${t.failed > 0 ? `, \x1B[31m${t.failed} failed\x1B[0m` : ""}${rows}`);
2344
+ console.log(` \x1B[1mSync watermarks:\x1B[0m`);
2345
+ for (const w of sortWatermarks(watermarks)) {
2346
+ const scope = w.siteId ? `${w.table}@${w.siteId}` : w.table;
2347
+ console.log(` ${scope.padEnd(24)} \x1B[36m${w.oldestDateSynced}\x1B[0m \x1B[36m${w.newestDateSynced}\x1B[0m \x1B[90m(last ${formatAge(w.lastSyncAt)})\x1B[0m`);
3004
2348
  }
3005
2349
  }
3006
- if (status.failedJobs.length > 0) {
3007
- console.log();
3008
- console.log(" \x1B[31mFailed Jobs\x1B[0m");
3009
- for (const j of status.failedJobs.slice(0, 5)) console.log(` ${j.date} ${j.tableName}: ${j.error}`);
3010
- if (status.failedJobs.length > 5) console.log(` \x1B[90m... and ${status.failedJobs.length - 5} more\x1B[0m`);
3011
- }
3012
2350
  console.log();
3013
2351
  }
3014
2352
  });
3015
- const triggerCommand = defineCommand({
2353
+ function sortWatermarks(ws) {
2354
+ return [...ws].sort((a, b) => {
2355
+ if (a.table !== b.table) return a.table.localeCompare(b.table);
2356
+ return (a.siteId ?? "").localeCompare(b.siteId ?? "");
2357
+ });
2358
+ }
2359
+ function sumRows(entries) {
2360
+ return entries.reduce((acc, e) => acc + e.rowCount, 0);
2361
+ }
2362
+ function sumBytes(entries) {
2363
+ return entries.reduce((acc, e) => acc + e.bytes, 0);
2364
+ }
2365
+ function formatBytes(n) {
2366
+ if (n < 1024) return `${n} B`;
2367
+ if (n < 1024 * 1024) return `${(n / 1024).toFixed(1)} KB`;
2368
+ if (n < 1024 * 1024 * 1024) return `${(n / 1024 / 1024).toFixed(1)} MB`;
2369
+ return `${(n / 1024 / 1024 / 1024).toFixed(2)} GB`;
2370
+ }
2371
+ const storeCommand = defineCommand({
3016
2372
  meta: {
3017
- name: "trigger",
3018
- description: "Trigger a fresh sync for a site"
2373
+ name: "store",
2374
+ description: "Manage the local DuckDB/Parquet store"
3019
2375
  },
3020
- args: { site: {
3021
- type: "string",
3022
- alias: "s",
3023
- description: "Site URL"
3024
- } },
3025
- async run({ args }) {
3026
- const cloud = await getCloudClient();
3027
- requireCloud(cloud);
3028
- const config = await loadConfig();
3029
- const { siteId, siteUrl } = await resolveCloudSite(cloud, args.site || config.defaultSite);
3030
- const result = await cloud.triggerSync(siteId).catch((e) => {
3031
- logger.error(`Failed to trigger sync: ${e.message}`);
3032
- process$1.exit(1);
3033
- });
3034
- logger.success(`Sync triggered for ${siteUrl}`);
3035
- console.log(` ${result.message}`);
3036
- console.log();
2376
+ subCommands: {
2377
+ stats: statsCommand,
2378
+ compact: compactCommand,
2379
+ gc: gcCommand,
2380
+ export: exportCommand,
2381
+ rollups: rollupsCommand
3037
2382
  }
3038
2383
  });
2384
+ const DEFAULT_TABLES = [
2385
+ "pages",
2386
+ "keywords",
2387
+ "countries",
2388
+ "devices"
2389
+ ];
2390
+ const DEFAULT_TYPES = ["web"];
2391
+ const ALL_SEARCH_TYPES = Object.values(SearchTypes);
2392
+ const DEFAULT_PENDING_DAYS = 3;
2393
+ const DEFAULT_CONCURRENCY = 8;
2394
+ const EMPTY_TYPE_PROBE_MIN_DAYS = 7;
2395
+ const EMPTY_TYPE_PROTECTED = ["web"];
2396
+ function createProgressTracker(total, quiet) {
2397
+ if (quiet) return {
2398
+ tick: () => {},
2399
+ done: () => {}
2400
+ };
2401
+ let current = 0;
2402
+ let lastLabel = "";
2403
+ let timer = null;
2404
+ const render = () => {
2405
+ clearLine();
2406
+ process.stdout.write(progressBar(current, total, lastLabel));
2407
+ };
2408
+ timer = setInterval(render, 100);
2409
+ return {
2410
+ tick: (label) => {
2411
+ current++;
2412
+ lastLabel = label;
2413
+ },
2414
+ done: () => {
2415
+ if (timer) {
2416
+ clearInterval(timer);
2417
+ timer = null;
2418
+ }
2419
+ clearLine();
2420
+ }
2421
+ };
2422
+ }
2423
+ async function syncTable(store, siteUrl, table, searchType, dates, client, concurrency, force, progress) {
2424
+ const dims = TABLE_DIMS[table];
2425
+ const siteId = store.siteIdFor(siteUrl);
2426
+ let totalRows = 0;
2427
+ let skipped = 0;
2428
+ let failed = 0;
2429
+ const priorStates = await store.engine.getSyncStates({
2430
+ userId: store.userId,
2431
+ siteId,
2432
+ table,
2433
+ searchType
2434
+ });
2435
+ const stateByDate = new Map(priorStates.map((s) => [s.date, s]));
2436
+ const label = searchType === "web" ? table : `${table}/${searchType}`;
2437
+ await runWithConcurrency(dates, concurrency, async (date) => {
2438
+ const prior = stateByDate.get(date);
2439
+ if (!force && prior?.state === "done") {
2440
+ skipped++;
2441
+ progress.tick(`${label} ${date} (skip)`);
2442
+ return;
2443
+ }
2444
+ const scope = {
2445
+ userId: store.userId,
2446
+ siteId,
2447
+ table,
2448
+ date,
2449
+ searchType
2450
+ };
2451
+ await store.engine.setSyncState(scope, "inflight");
2452
+ const result = await runOneDate(store, client, siteUrl, table, searchType, dims, date).catch((err) => ({
2453
+ kind: "error",
2454
+ error: err
2455
+ }));
2456
+ if (result.kind === "error") {
2457
+ await store.engine.setSyncState(scope, "failed", { error: result.error.message });
2458
+ failed++;
2459
+ progress.tick(`${label} ${date} (fail)`);
2460
+ return;
2461
+ }
2462
+ await store.engine.setSyncState(scope, "done");
2463
+ totalRows += result.rows;
2464
+ progress.tick(`${label} ${date}`);
2465
+ });
2466
+ return {
2467
+ rows: totalRows,
2468
+ skipped,
2469
+ failed
2470
+ };
2471
+ }
2472
+ async function runOneDate(store, client, siteUrl, table, searchType, dims, date) {
2473
+ const rowLimit = 25e3;
2474
+ const rows = [];
2475
+ let startRow = 0;
2476
+ while (true) {
2477
+ const batch = (await client._rawQuery(siteUrl, {
2478
+ startDate: date,
2479
+ endDate: date,
2480
+ dimensions: dims,
2481
+ searchType,
2482
+ rowLimit,
2483
+ startRow
2484
+ })).rows || [];
2485
+ for (const apiRow of batch) {
2486
+ const transformed = transformGscRow(table, {
2487
+ keys: apiRow.keys ?? [],
2488
+ clicks: apiRow.clicks ?? 0,
2489
+ impressions: apiRow.impressions ?? 0,
2490
+ ctr: apiRow.ctr ?? 0,
2491
+ position: apiRow.position ?? 0
2492
+ });
2493
+ if (transformed) rows.push(transformed.row);
2494
+ }
2495
+ if (batch.length < rowLimit) break;
2496
+ startRow += batch.length;
2497
+ }
2498
+ const writeCtx = {
2499
+ userId: store.userId,
2500
+ siteId: store.siteIdFor(siteUrl),
2501
+ table,
2502
+ date,
2503
+ searchType
2504
+ };
2505
+ await store.engine.writeDay(writeCtx, rows);
2506
+ return {
2507
+ kind: "ok",
2508
+ rows: rows.length
2509
+ };
2510
+ }
3039
2511
  const syncCommand = defineCommand({
3040
2512
  meta: {
3041
2513
  name: "sync",
3042
- description: "Sync status and management (cloud mode only)"
2514
+ description: "Sync GSC data to local Parquet store"
3043
2515
  },
3044
- subCommands: {
3045
- status: statusCommand,
3046
- trigger: triggerCommand
3047
- }
3048
- });
3049
-
3050
- //#endregion
3051
- //#region src/commands/unregister.ts
3052
- const unregisterCommand = defineCommand({
3053
- meta: {
3054
- name: "unregister",
3055
- description: "Unregister a site from syncing (cloud mode only)"
2516
+ args: {
2517
+ "site": {
2518
+ type: "string",
2519
+ alias: "s",
2520
+ description: "Site URL"
2521
+ },
2522
+ "start": {
2523
+ type: "string",
2524
+ description: "Start date (YYYY-MM-DD) for historical sync"
2525
+ },
2526
+ "end": {
2527
+ type: "string",
2528
+ description: "End date (YYYY-MM-DD); defaults to 3 days ago"
2529
+ },
2530
+ "days": {
2531
+ type: "string",
2532
+ description: `Number of days back to sync (default: ${DEFAULT_PENDING_DAYS})`
2533
+ },
2534
+ "tables": {
2535
+ type: "string",
2536
+ alias: "t",
2537
+ description: `Tables to sync (default: ${DEFAULT_TABLES.join(",")}); comma-separated`
2538
+ },
2539
+ "types": {
2540
+ type: "string",
2541
+ description: `GSC search types to sync (default: ${DEFAULT_TYPES.join(",")}); comma-separated. Allowed: ${ALL_SEARCH_TYPES.join(",")}.`
2542
+ },
2543
+ "force-types": {
2544
+ type: "boolean",
2545
+ default: false,
2546
+ description: "Ignore stored empty-type markers and re-probe every requested type"
2547
+ },
2548
+ "no-rollups": {
2549
+ type: "boolean",
2550
+ default: false,
2551
+ description: "Skip the post-sync rollup rebuild (daily/weekly totals, top-N tables)"
2552
+ },
2553
+ "full": {
2554
+ type: "boolean",
2555
+ description: "Sync the last 450 days (full GSC history)"
2556
+ },
2557
+ "quiet": {
2558
+ type: "boolean",
2559
+ alias: "q",
2560
+ default: false,
2561
+ description: "Suppress progress output"
2562
+ },
2563
+ "force": {
2564
+ type: "boolean",
2565
+ default: false,
2566
+ description: "Re-sync dates already marked done (default: skip them for idempotent resume)"
2567
+ },
2568
+ "status": {
2569
+ type: "boolean",
2570
+ default: false,
2571
+ description: "Print watermarks + sync-state summary instead of syncing"
2572
+ },
2573
+ "json": {
2574
+ type: "boolean",
2575
+ default: false,
2576
+ description: "With --status: emit JSON"
2577
+ },
2578
+ "concurrency": {
2579
+ type: "string",
2580
+ alias: "c",
2581
+ description: `Concurrent in-flight day fetches per table (default: ${DEFAULT_CONCURRENCY})`
2582
+ },
2583
+ "serial-tables": {
2584
+ type: "boolean",
2585
+ default: false,
2586
+ description: "Run tables sequentially (default: run all tables in parallel)"
2587
+ }
3056
2588
  },
3057
- args: { site: {
3058
- type: "positional",
3059
- description: "Site URL to unregister",
3060
- required: false
3061
- } },
3062
2589
  async run({ args }) {
3063
- const cloud = await getCloudClient();
3064
- if (!cloud) {
3065
- logger.error("Unregister requires cloud mode. Run gscdump init to set up.");
3066
- process$1.exit(1);
2590
+ if (args.status) {
2591
+ await printSyncStatus(await loadConfig(), args.site ? String(args.site) : void 0, Boolean(args.json));
2592
+ return;
3067
2593
  }
3068
- const config = await loadConfig();
3069
- const target = args.site || config.defaultSite;
3070
- const me = await cloud.me().catch((e) => {
3071
- logger.error(`Failed to fetch sites: ${e.message}`);
3072
- process$1.exit(1);
2594
+ const ctx = await createCommandContext({
2595
+ needsAuth: true,
2596
+ needsStore: true
2597
+ });
2598
+ const client = ctx.client;
2599
+ const siteUrl = await ctx.resolveSite(args.site ? String(args.site) : void 0);
2600
+ const tables = args.tables ? String(args.tables).split(",").map((t) => t.trim()).filter(isKnownTable) : DEFAULT_TABLES;
2601
+ const requestedTypes = args.types ? String(args.types).split(",").map((t) => t.trim()).filter(isKnownSearchType) : DEFAULT_TYPES;
2602
+ if (requestedTypes.length === 0) {
2603
+ logger.error(`No valid search types specified. Allowed: ${ALL_SEARCH_TYPES.join(",")}`);
2604
+ process.exit(1);
2605
+ }
2606
+ const siteId = ctx.store.siteIdFor(siteUrl);
2607
+ const emptyTypesStore = createEmptyTypesStore({ dataSource: ctx.store.dataSource });
2608
+ const emptyTypesDoc = await emptyTypesStore.load({
2609
+ userId: ctx.store.userId,
2610
+ siteId
3073
2611
  });
3074
- if (me.sites.length === 0) {
3075
- logger.warn("No registered sites.");
2612
+ const forceTypes = Boolean(args["force-types"]);
2613
+ const skippedTypes = [];
2614
+ const types = [];
2615
+ for (const t of requestedTypes) {
2616
+ if (!forceTypes && emptyTypesDoc.emptyTypes.includes(t) && !EMPTY_TYPE_PROTECTED.includes(t)) {
2617
+ skippedTypes.push(t);
2618
+ continue;
2619
+ }
2620
+ types.push(t);
2621
+ }
2622
+ if (types.length === 0) {
2623
+ logger.warn(`All requested types (${requestedTypes.join(", ")}) are marked empty for this site. Pass --force-types to re-probe.`);
3076
2624
  return;
3077
2625
  }
3078
- let site = target ? me.sites.find((s) => s.siteUrl === target || s.siteUrl.includes(target)) : void 0;
3079
- if (!site) {
3080
- const selected = await select({
3081
- message: "Select a site to unregister",
3082
- options: me.sites.map((s) => ({
3083
- value: s.siteId,
3084
- label: s.siteUrl,
3085
- hint: s.syncStatus || "unknown"
3086
- }))
2626
+ if (skippedTypes.length > 0 && !args.quiet) logger.info(`Skipping ${skippedTypes.join(", ")} (marked empty for this site; pass --force-types to re-probe).`);
2627
+ const endDate = args.end ? String(args.end) : daysAgo(DEFAULT_PENDING_DAYS);
2628
+ let startDate;
2629
+ if (args.start) startDate = String(args.start);
2630
+ else if (args.full) startDate = daysAgo(450);
2631
+ else if (args.days) startDate = daysAgo(Number.parseInt(String(args.days), 10) + DEFAULT_PENDING_DAYS - 1);
2632
+ else startDate = daysAgo(DEFAULT_PENDING_DAYS + DEFAULT_PENDING_DAYS - 1);
2633
+ const dates = getDateRange(startDate, endDate);
2634
+ if (dates.length === 0) {
2635
+ logger.error(`No dates to sync (start=${startDate}, end=${endDate})`);
2636
+ process.exit(1);
2637
+ }
2638
+ const store = ctx.store;
2639
+ if (!args.quiet) {
2640
+ logger.info(`Syncing ${siteUrl} (${tables.join(", ")}) [${types.join(", ")}] → ${store.dataDir}`);
2641
+ logger.info(`Range: ${startDate} → ${endDate} (${dates.length} days)`);
2642
+ }
2643
+ const concurrency = args.concurrency ? Math.max(1, Number.parseInt(String(args.concurrency), 10) || DEFAULT_CONCURRENCY) : DEFAULT_CONCURRENCY;
2644
+ const serialTables = Boolean(args["serial-tables"]);
2645
+ const start = Date.now();
2646
+ const totals = {};
2647
+ const jobs = [];
2648
+ for (const table of tables) for (const type of types) {
2649
+ const label = type === "web" ? table : `${table}/${type}`;
2650
+ jobs.push({
2651
+ table,
2652
+ type,
2653
+ label
3087
2654
  });
3088
- if (isCancel(selected)) {
3089
- cancel("Cancelled");
3090
- process$1.exit(0);
2655
+ }
2656
+ const progress = createProgressTracker(dates.length * jobs.length, Boolean(args.quiet));
2657
+ if (serialTables) for (const job of jobs) totals[job.label] = await syncTable(store, siteUrl, job.table, job.type, dates, client, concurrency, args.force, progress);
2658
+ else {
2659
+ const results = await Promise.all(jobs.map((job) => syncTable(store, siteUrl, job.table, job.type, dates, client, concurrency, args.force, progress)));
2660
+ jobs.forEach((job, i) => {
2661
+ totals[job.label] = results[i];
2662
+ });
2663
+ }
2664
+ progress.done();
2665
+ const seconds = ((Date.now() - start) / 1e3).toFixed(1);
2666
+ if (!args.quiet) {
2667
+ logger.success(`Synced ${siteUrl} in ${seconds}s`);
2668
+ for (const [t, n] of Object.entries(totals)) {
2669
+ const suffix = [n.skipped > 0 ? `${n.skipped} skipped` : null, n.failed > 0 ? `\x1B[31m${n.failed} failed\x1B[0m` : null].filter(Boolean).join(", ");
2670
+ const tail = suffix ? ` (${suffix})` : "";
2671
+ console.log(` ${t}: ${n.rows.toLocaleString()} rows${tail}`);
3091
2672
  }
3092
- site = me.sites.find((s) => s.siteId === selected);
2673
+ console.log();
3093
2674
  }
3094
- const confirmed = await confirm({ message: `Unregister ${site.siteUrl}? This will stop syncing and delete pending jobs.` });
3095
- if (isCancel(confirmed) || !confirmed) {
3096
- cancel("Cancelled");
3097
- process$1.exit(0);
2675
+ const anyFailed = Object.values(totals).some((t) => t.failed > 0);
2676
+ const rowsByType = /* @__PURE__ */ new Map();
2677
+ const failedByType = /* @__PURE__ */ new Map();
2678
+ for (const job of jobs) {
2679
+ const t = totals[job.label];
2680
+ rowsByType.set(job.type, (rowsByType.get(job.type) ?? 0) + t.rows);
2681
+ failedByType.set(job.type, (failedByType.get(job.type) ?? 0) + t.failed);
2682
+ }
2683
+ if (!forceTypes && dates.length >= EMPTY_TYPE_PROBE_MIN_DAYS) {
2684
+ const toMark = [];
2685
+ for (const type of types) {
2686
+ if (EMPTY_TYPE_PROTECTED.includes(type)) continue;
2687
+ if ((failedByType.get(type) ?? 0) > 0) continue;
2688
+ if ((rowsByType.get(type) ?? 0) === 0) toMark.push(type);
2689
+ }
2690
+ if (toMark.length > 0) {
2691
+ await emptyTypesStore.mark({
2692
+ userId: store.userId,
2693
+ siteId
2694
+ }, toMark);
2695
+ if (!args.quiet) logger.info(`Marked empty for future syncs: ${toMark.join(", ")} (0 rows across ${dates.length} days; pass --force-types to re-probe).`);
2696
+ }
3098
2697
  }
3099
- const result = await cloud.deleteSite(site.siteId).catch((e) => {
3100
- logger.error(`Failed to unregister: ${e.message}`);
3101
- process$1.exit(1);
3102
- });
3103
- logger.success(`Unregistered ${result.siteUrl}`);
2698
+ if (forceTypes && emptyTypesDoc.emptyTypes.length > 0) {
2699
+ const toClear = [];
2700
+ for (const type of types) if (emptyTypesDoc.emptyTypes.includes(type) && (rowsByType.get(type) ?? 0) > 0) toClear.push(type);
2701
+ if (toClear.length > 0) {
2702
+ await emptyTypesStore.clear({
2703
+ userId: store.userId,
2704
+ siteId
2705
+ }, toClear);
2706
+ if (!args.quiet) logger.info(`Cleared empty markers for: ${toClear.join(", ")} (re-probe found data).`);
2707
+ }
2708
+ }
2709
+ const noRollups = Boolean(args["no-rollups"]);
2710
+ const anyRowsSynced = Object.values(totals).some((t) => t.rows > 0);
2711
+ if (!noRollups && anyRowsSynced) {
2712
+ if (!args.quiet) logger.info(`Rebuilding rollups for [${siteId}] (${DEFAULT_ROLLUPS.length} rollups)…`);
2713
+ const rollupStart = Date.now();
2714
+ const results = await rebuildRollups({
2715
+ engine: store.engine,
2716
+ dataSource: store.dataSource,
2717
+ ctx: {
2718
+ userId: store.userId,
2719
+ siteId
2720
+ },
2721
+ defs: DEFAULT_ROLLUPS
2722
+ }).catch((err) => {
2723
+ logger.warn(`Rollup rebuild failed: ${err.message}`);
2724
+ return [];
2725
+ });
2726
+ if (!args.quiet && results.length > 0) {
2727
+ const kb = results.reduce((a, r) => a + r.bytes, 0) / 1024;
2728
+ const ms = Date.now() - rollupStart;
2729
+ logger.success(`Rebuilt ${results.length} rollup(s) in ${ms}ms — ${kb.toFixed(1)} KB`);
2730
+ }
2731
+ }
2732
+ if (anyFailed) process.exit(1);
3104
2733
  }
3105
2734
  });
3106
-
3107
- //#endregion
3108
- //#region src/index.ts
2735
+ function isKnownTable(name) {
2736
+ return allTables().includes(name);
2737
+ }
2738
+ function isKnownSearchType(name) {
2739
+ return ALL_SEARCH_TYPES.includes(name);
2740
+ }
2741
+ async function printSyncStatus(config, siteFilter, asJson) {
2742
+ const store = createLocalStore({ dataDir: resolveDataDir(config) });
2743
+ const siteId = siteFilter ? store.siteIdFor(siteFilter) : void 0;
2744
+ const watermarks = await store.engine.getWatermarks({
2745
+ userId: store.userId,
2746
+ siteId
2747
+ });
2748
+ const states = await store.engine.getSyncStates({
2749
+ userId: store.userId,
2750
+ siteId
2751
+ });
2752
+ const failed = states.filter((s) => s.state === "failed");
2753
+ const inflight = states.filter((s) => s.state === "inflight");
2754
+ if (asJson) {
2755
+ console.log(JSON.stringify({
2756
+ dataDir: store.dataDir,
2757
+ siteFilter: siteFilter ?? null,
2758
+ watermarks,
2759
+ failed,
2760
+ inflight
2761
+ }, null, 2));
2762
+ return;
2763
+ }
2764
+ console.log();
2765
+ console.log(` \x1B[1m${store.dataDir}\x1B[0m`);
2766
+ if (siteFilter) console.log(` \x1B[90mSite: ${siteFilter}\x1B[0m`);
2767
+ console.log();
2768
+ if (watermarks.length === 0) {
2769
+ console.log(` No sync watermarks. Run \`gscdump sync\` to ingest data.`);
2770
+ console.log();
2771
+ return;
2772
+ }
2773
+ console.log(` \x1B[1mWatermarks:\x1B[0m`);
2774
+ const sorted = [...watermarks].sort((a, b) => {
2775
+ if (a.table !== b.table) return a.table.localeCompare(b.table);
2776
+ return (a.siteId ?? "").localeCompare(b.siteId ?? "");
2777
+ });
2778
+ for (const w of sorted) {
2779
+ const scope = w.siteId ? `${w.table}@${w.siteId}` : w.table;
2780
+ console.log(` ${scope.padEnd(28)} \x1B[36m${w.oldestDateSynced}\x1B[0m → \x1B[36m${w.newestDateSynced}\x1B[0m \x1B[90m(last ${formatAge(w.lastSyncAt)})\x1B[0m`);
2781
+ }
2782
+ if (inflight.length > 0) {
2783
+ console.log();
2784
+ console.log(` \x1B[33m${inflight.length} inflight:\x1B[0m`);
2785
+ for (const s of inflight) console.log(` ${s.table}${s.siteId ? `@${s.siteId}` : ""} ${s.date} (attempt ${s.attempts}, started ${formatAge(s.updatedAt)})`);
2786
+ }
2787
+ if (failed.length > 0) {
2788
+ console.log();
2789
+ console.log(` \x1B[31m${failed.length} failed:\x1B[0m`);
2790
+ for (const s of failed) console.log(` ${s.table}${s.siteId ? `@${s.siteId}` : ""} ${s.date}: ${s.error ?? "unknown"}`);
2791
+ console.log();
2792
+ console.log(` Re-run \`gscdump sync --force\` to retry failed dates.`);
2793
+ }
2794
+ console.log();
2795
+ }
3109
2796
  runMain(defineCommand({
3110
2797
  meta: {
3111
2798
  name: "gscdump",
@@ -3118,19 +2805,17 @@ runMain(defineCommand({
3118
2805
  query: queryCommand,
3119
2806
  sites: sitesCommand,
3120
2807
  sitemaps: sitemapsCommand,
3121
- register: registerCommand,
3122
- unregister: unregisterCommand,
3123
2808
  sync: syncCommand,
3124
- indexing: indexingCommand,
3125
- analysis: analysisCommand,
2809
+ store: storeCommand,
2810
+ inspect: inspectCommand,
2811
+ entities: entitiesCommand,
2812
+ analyze: analyzeCommand,
3126
2813
  auth: authCommand,
3127
2814
  config: configCommand,
3128
2815
  mcp: mcpCommand
3129
2816
  },
3130
2817
  setup() {
3131
- if (!process$1.argv.includes("mcp")) showSplash();
2818
+ if (!process.argv.includes("mcp")) showSplash();
3132
2819
  }
3133
2820
  }));
3134
-
3135
- //#endregion
3136
- export { };
2821
+ export {};