@gscdump/cli 0.3.1 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +82 -56
  2. package/dist/index.mjs +2225 -1079
  3. package/package.json +12 -9
package/dist/index.mjs CHANGED
@@ -1,43 +1,82 @@
1
1
  #!/usr/bin/env node
2
- import "node:module";
3
2
  import process from "node:process";
4
3
  import { defineCommand, runMain } from "citty";
5
- import fs from "node:fs/promises";
4
+ import { defaultAnalyzerRegistry } from "@gscdump/analysis/registry";
5
+ import { AnalyzerCapabilityError, analyzeFromSource, createEngineQuerySource, createGscApiQuerySource } from "@gscdump/analysis";
6
+ import { cancel, isCancel, multiselect, select, text } from "@clack/prompts";
7
+ import { daysAgo, fetchSitemap, formatErrorForCli, getDateRange, googleSearchConsole, progressBar } from "gscdump";
8
+ import fs, { readFile, rm } from "node:fs/promises";
6
9
  import { createServer } from "node:http";
7
10
  import path from "node:path";
8
- import { cancel, isCancel, multiselect, select, text } from "@clack/prompts";
9
11
  import { OAuth2Client } from "google-auth-library";
10
12
  import os from "node:os";
11
13
  import { consola } from "consola";
12
- import { batchInspectUrls, batchRequestIndexing, deleteSitemap, fetchSitemap, fetchSitemaps, fetchSites, fetchSitesWithSitemaps, formatErrorForCli, getIndexingMetadata, googleSearchConsole, inspectUrl, requestIndexing, submitSitemap } from "gscdump";
13
- import { between, country, date, device, gsc, page, query, searchAppearance } from "gscdump/query";
14
+ import { createNodeHarness } from "@gscdump/engine/node-harness";
15
+ import { TABLE_DIMS, transformGscRow } from "@gscdump/engine/ingest";
16
+ import { allTables, inferTable } from "@gscdump/engine/schema";
17
+ import { Buffer } from "node:buffer";
18
+ import { createEmptyTypesStore, createIndexingMetadataStore, createInspectionStore, createSitemapStore } from "@gscdump/engine/entities";
19
+ import { createGscMcpServer } from "@gscdump/mcp/server";
14
20
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
15
- import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
16
- import { z } from "zod";
17
-
18
- //#region rolldown:runtime
19
- var __defProp = Object.defineProperty;
20
- var __exportAll = (all, symbols) => {
21
- let target = {};
22
- for (var name in all) {
23
- __defProp(target, name, {
24
- get: all[name],
25
- enumerable: true
26
- });
21
+ import { SearchTypes, between, country, date, device, gsc, page, query, searchAppearance } from "gscdump/query";
22
+ import { DuckDBInstance } from "@duckdb/node-api";
23
+ import { sqlEscape } from "@gscdump/engine/sql";
24
+ import { DEFAULT_ROLLUPS, rebuildRollups } from "@gscdump/engine/rollups";
25
+ import { filesystemStats } from "@gscdump/engine/filesystem";
26
+ var LocalStoreUnsupportedError = class extends Error {
27
+ constructor(tool) {
28
+ super(`analysis "${tool}" is not yet implemented against the local Parquet store`);
29
+ this.name = "LocalStoreUnsupportedError";
27
30
  }
28
- if (symbols) {
29
- __defProp(target, Symbol.toStringTag, { value: "Module" });
31
+ };
32
+ var LocalStoreEmptyError = class extends Error {
33
+ constructor(siteUrl) {
34
+ super(`no local data synced for ${siteUrl} (run \`gscdump sync\` first)`);
35
+ this.name = "LocalStoreEmptyError";
30
36
  }
31
- return target;
32
37
  };
33
-
34
- //#endregion
35
- //#region src/config.ts
38
+ async function hasLocalData(store, siteUrl) {
39
+ return (await store.engine.listLive({
40
+ userId: store.userId,
41
+ siteId: store.siteIdFor(siteUrl)
42
+ })).length > 0;
43
+ }
44
+ async function runLocalAnalysis(store, siteUrl, params) {
45
+ return analyzeFromSource(createEngineQuerySource({
46
+ engine: store.engine,
47
+ ctx: {
48
+ userId: store.userId,
49
+ siteId: store.siteIdFor(siteUrl)
50
+ }
51
+ }), params, defaultAnalyzerRegistry).catch((e) => {
52
+ if (e instanceof AnalyzerCapabilityError) throw new LocalStoreUnsupportedError(params.type);
53
+ throw e;
54
+ });
55
+ }
56
+ async function runLiveAnalysis(client, siteUrl, params) {
57
+ return analyzeFromSource(createGscApiQuerySource({
58
+ client,
59
+ siteUrl
60
+ }), params, defaultAnalyzerRegistry).catch((e) => {
61
+ if (e instanceof AnalyzerCapabilityError) throw new LocalStoreUnsupportedError(params.type);
62
+ throw e;
63
+ });
64
+ }
36
65
  let configDir = path.join(os.homedir(), ".config", "gscdump");
37
66
  function getConfigDir() {
38
67
  return configDir;
39
68
  }
40
- const DEFAULT_CLOUD_URL = "https://cloud.gscdump.com";
69
+ function defaultDataDir() {
70
+ return path.join(os.homedir(), ".gscdump", "data");
71
+ }
72
+ function resolveDataDir(config) {
73
+ return expandTilde(config.dataDir ?? defaultDataDir());
74
+ }
75
+ function expandTilde(p) {
76
+ if (p === "~") return os.homedir();
77
+ if (p.startsWith("~/")) return path.join(os.homedir(), p.slice(2));
78
+ return p;
79
+ }
41
80
  async function loadConfig() {
42
81
  return fs.readFile(path.join(configDir, "config.json"), "utf-8").then((data) => JSON.parse(data)).catch(() => ({}));
43
82
  }
@@ -51,28 +90,14 @@ async function saveConfig(config) {
51
90
  function getConfigPath() {
52
91
  return path.join(configDir, "config.json");
53
92
  }
54
-
55
- //#endregion
56
- //#region src/utils.ts
57
93
  const VERSION = "1.0.0";
58
94
  const logger = consola.withTag("gscdump");
59
- /**
60
- * Handles GSC API errors with helpful messages and suggestions.
61
- * Exits process with code 1.
62
- */
63
- function handleGscError(error) {
95
+ function gscErrorHandler(error) {
64
96
  console.error();
65
97
  console.error(formatErrorForCli(error));
66
98
  console.error();
67
99
  process.exit(1);
68
100
  }
69
- /**
70
- * Creates a .catch() handler for GSC API errors.
71
- * Use: somePromise.catch(gscErrorHandler)
72
- */
73
- function gscErrorHandler(error) {
74
- return handleGscError(error);
75
- }
76
101
  const gradientColors = [
77
102
  (s) => `\x1B[38;2;52;211;153m${s}\x1B[0m`,
78
103
  (s) => `\x1B[38;2;45;212;191m${s}\x1B[0m`,
@@ -80,9 +105,9 @@ const gradientColors = [
80
105
  (s) => `\x1B[38;2;56;189;248m${s}\x1B[0m`,
81
106
  (s) => `\x1B[38;2;96;165;250m${s}\x1B[0m`
82
107
  ];
83
- function applyGradient(text$1) {
84
- return [...text$1].map((char, i) => {
85
- const colorIndex = Math.floor(i / text$1.length * gradientColors.length);
108
+ function applyGradient(text) {
109
+ return [...text].map((char, i) => {
110
+ const colorIndex = Math.floor(i / text.length * gradientColors.length);
86
111
  return gradientColors[Math.min(colorIndex, gradientColors.length - 1)](char);
87
112
  }).join("");
88
113
  }
@@ -91,15 +116,27 @@ function showSplash() {
91
116
  console.log(` ${applyGradient("GSC Dump")} v${VERSION}`);
92
117
  console.log();
93
118
  }
94
- function progressBar(current, total, label, width = 30) {
95
- const percent = Math.min(current / total, 1);
96
- const filled = Math.round(width * percent);
97
- const empty = width - filled;
98
- return ` ${`\x1B[36m${"█".repeat(filled)}\x1B[0m\x1B[90m${"░".repeat(empty)}\x1B[0m`} \x1B[90m${current}/${total}\x1B[0m ${label}`;
99
- }
100
119
  function clearLine() {
101
120
  process.stdout.write("\r\x1B[K");
102
121
  }
122
+ function formatAge(ms) {
123
+ const delta = Date.now() - ms;
124
+ if (delta < 6e4) return "just now";
125
+ if (delta < 36e5) return `${Math.floor(delta / 6e4)}m ago`;
126
+ if (delta < 864e5) return `${Math.floor(delta / 36e5)}h ago`;
127
+ return `${Math.floor(delta / 864e5)}d ago`;
128
+ }
129
+ async function runWithConcurrency(items, concurrency, processor) {
130
+ const cursor = { i: 0 };
131
+ async function worker() {
132
+ while (true) {
133
+ const i = cursor.i++;
134
+ if (i >= items.length) return;
135
+ await processor(items[i], i);
136
+ }
137
+ }
138
+ await Promise.all(Array.from({ length: Math.min(concurrency, items.length) }, worker));
139
+ }
103
140
  function toCSV(data, columns) {
104
141
  return [columns.join(","), ...data.map((row) => columns.map((col) => {
105
142
  const val = row[col];
@@ -140,21 +177,7 @@ function exportToCSV(output) {
140
177
  ])}`);
141
178
  return sections.join("\n\n");
142
179
  }
143
-
144
- //#endregion
145
- //#region src/auth.ts
146
- var auth_exports = /* @__PURE__ */ __exportAll({
147
- authenticate: () => authenticate,
148
- authenticateCloud: () => authenticateCloud,
149
- clearCloudTokens: () => clearCloudTokens,
150
- clearTokens: () => clearTokens,
151
- getAuth: () => getAuth,
152
- getAuthCredentials: () => getAuthCredentials,
153
- loadCloudTokens: () => loadCloudTokens,
154
- loadTokens: () => loadTokens,
155
- saveCloudTokens: () => saveCloudTokens,
156
- saveTokens: () => saveTokens
157
- });
180
+ const REDIRECT_URI_RE = /redirect_uri=[^&]+/;
158
181
  function getTokensPath() {
159
182
  return path.join(getConfigDir(), "tokens.json");
160
183
  }
@@ -253,7 +276,7 @@ async function getAuthCodeViaLoopback(authUrl) {
253
276
  return;
254
277
  }
255
278
  resolvedRedirectUri = `http://127.0.0.1:${addr.port}`;
256
- const fullAuthUrl = authUrl.replace(/redirect_uri=[^&]+/, `redirect_uri=${encodeURIComponent(resolvedRedirectUri)}`);
279
+ const fullAuthUrl = authUrl.replace(REDIRECT_URI_RE, `redirect_uri=${encodeURIComponent(resolvedRedirectUri)}`);
257
280
  console.log();
258
281
  console.log(" \x1B[1mOpening browser for authorization...\x1B[0m");
259
282
  console.log(` \x1B[90mIf browser doesn't open, visit:\x1B[0m`);
@@ -318,156 +341,400 @@ async function authenticate(credentials, interactive) {
318
341
  logger.success(`Tokens saved to ${getTokensPath()}`);
319
342
  return oauth2Client;
320
343
  }
321
- function getCloudTokensPath() {
322
- return path.join(getConfigDir(), "cloud-tokens.json");
323
- }
324
- async function loadCloudTokens() {
325
- return fs.readFile(getCloudTokensPath(), "utf-8").then((data) => JSON.parse(data)).catch(() => null);
326
- }
327
- async function saveCloudTokens(tokens) {
328
- await fs.mkdir(getConfigDir(), {
329
- recursive: true,
330
- mode: 448
331
- });
332
- await fs.writeFile(getCloudTokensPath(), JSON.stringify(tokens, null, 2), { mode: 384 });
344
+ async function getAuth(opts = {}) {
345
+ const { interactive = true } = opts;
346
+ return authenticate(await getAuthCredentials(interactive), interactive);
333
347
  }
334
- async function clearCloudTokens() {
335
- await fs.rm(getCloudTokensPath()).catch(() => {});
336
- logger.success("Logged out from cloud");
348
+ function createLocalStore(opts) {
349
+ return createNodeHarness(opts);
337
350
  }
338
- async function authenticateCloud(cloudUrl, interactive) {
339
- const existingTokens = await loadCloudTokens();
340
- if (existingTokens) {
341
- const oauth2Client = new OAuth2Client();
342
- oauth2Client.setCredentials({
343
- access_token: existingTokens.accessToken,
344
- refresh_token: existingTokens.refreshToken,
345
- expiry_date: existingTokens.expiresAt
346
- });
347
- logger.success("Using cloud credentials");
348
- return oauth2Client;
349
- }
350
- if (!interactive) {
351
- logger.error("No cloud tokens. Run gscdump init to authenticate.");
352
- process.exit(1);
353
- }
354
- const initRes = await fetch(`${cloudUrl}/api/cli/auth/init`, { method: "POST" }).then((r) => r.json()).catch((e) => {
355
- logger.error(`Failed to connect to ${cloudUrl}: ${e.message}`);
356
- process.exit(1);
357
- });
358
- console.log();
359
- console.log(" \x1B[1mOpen this URL in your browser:\x1B[0m");
360
- console.log(` \x1B[36m${initRes.authUrl}\x1B[0m`);
361
- console.log();
362
- console.log(` \x1B[90mCode: ${initRes.code}\x1B[0m`);
363
- console.log();
364
- logger.info("Waiting for authorization...");
365
- const pollInterval = 2e3;
366
- const maxAttempts = Math.ceil(initRes.expiresIn * 1e3 / pollInterval);
367
- for (let i = 0; i < maxAttempts; i++) {
368
- await new Promise((r) => setTimeout(r, pollInterval));
369
- const pollRes = await fetch(`${cloudUrl}/api/cli/auth/poll?code=${initRes.code}`).then((r) => r.json()).catch(() => ({ status: "error" }));
370
- if (pollRes.status === "complete" && pollRes.tokens) {
371
- await saveCloudTokens(pollRes.tokens);
372
- logger.success("Authenticated via cloud.gscdump.com");
373
- const oauth2Client = new OAuth2Client();
374
- oauth2Client.setCredentials({
375
- access_token: pollRes.tokens.accessToken,
376
- refresh_token: pollRes.tokens.refreshToken,
377
- expiry_date: pollRes.tokens.expiresAt
378
- });
379
- return oauth2Client;
380
- }
381
- if (pollRes.status === "error") {
382
- logger.error("Authorization failed");
351
+ async function createCommandContext(opts = {}) {
352
+ const { needsAuth = false, needsStore = false, interactive = false } = opts;
353
+ const config = await loadConfig();
354
+ const auth = needsAuth ? await getAuth({
355
+ interactive,
356
+ config
357
+ }) : null;
358
+ const client = auth ? googleSearchConsole(auth) : null;
359
+ const store = needsStore ? createLocalStore({ dataDir: resolveDataDir(config) }) : null;
360
+ const loadSites = async () => {
361
+ if (!client) throw new Error("loadSites requires needsAuth: true");
362
+ return (await client.sites().catch((e) => {
363
+ logger.error(`Failed to fetch sites: ${e.message}`);
383
364
  process.exit(1);
384
- }
385
- }
386
- logger.error("Authorization timed out");
387
- process.exit(1);
388
- }
389
- async function getAuth(opts = {}) {
390
- const { interactive = true, config: providedConfig } = opts;
391
- const config = providedConfig || await loadConfig();
392
- if (!config.mode) {
393
- if (!interactive) {
394
- logger.error("Not configured. Run gscdump init first.");
365
+ })).filter((s) => s.siteUrl && s.permissionLevel !== "siteUnverifiedUser").map((s) => ({
366
+ siteUrl: s.siteUrl,
367
+ permissionLevel: s.permissionLevel || "unknown"
368
+ }));
369
+ };
370
+ const resolveSite = async (target) => {
371
+ const hint = target ?? config.defaultSite;
372
+ const sites = await loadSites();
373
+ if (sites.length === 0) {
374
+ logger.error("No verified sites found");
395
375
  process.exit(1);
396
376
  }
397
- logger.warn("GSCDump not configured");
398
- logger.info("Run: gscdump init");
399
- process.exit(1);
400
- }
401
- if (config.mode === "cloud") return authenticateCloud(config.cloudUrl || DEFAULT_CLOUD_URL, interactive);
402
- return authenticate(await getAuthCredentials(interactive), interactive);
377
+ if (hint) {
378
+ const match = sites.find((s) => s.siteUrl === hint || s.siteUrl.includes(hint));
379
+ if (match) return match.siteUrl;
380
+ }
381
+ if (sites.length === 1) return sites[0].siteUrl;
382
+ const selected = await select({
383
+ message: "Select a site",
384
+ options: sites.map((s) => ({
385
+ value: s.siteUrl,
386
+ label: s.siteUrl
387
+ }))
388
+ });
389
+ if (isCancel(selected)) {
390
+ cancel("Cancelled");
391
+ process.exit(0);
392
+ }
393
+ return selected;
394
+ };
395
+ return {
396
+ config,
397
+ auth,
398
+ client,
399
+ store,
400
+ loadSites,
401
+ resolveSite
402
+ };
403
403
  }
404
-
405
- //#endregion
406
- //#region src/commands/auth.ts
407
- const statusCommand = defineCommand({
408
- meta: {
409
- name: "status",
410
- description: "Show current authentication status"
404
+ const ANALYSIS_TOOLS = defaultAnalyzerRegistry.listAnalyzerIds();
405
+ const TOOL_EXTRA_ARGS = {
406
+ brand: { "brand-terms": {
407
+ type: "string",
408
+ description: "Comma-separated brand terms (required)"
409
+ } },
410
+ movers: {
411
+ "prev-start": {
412
+ type: "string",
413
+ description: "Previous period start date (required)"
414
+ },
415
+ "prev-end": {
416
+ type: "string",
417
+ description: "Previous period end date (required)"
418
+ }
411
419
  },
412
- async run() {
413
- const config = await loadConfig();
414
- console.log();
415
- console.log(` Mode: ${config.mode ? `\x1B[36m${config.mode}\x1B[0m` : "\x1B[33mnot configured\x1B[0m"}`);
416
- if (!config.mode) {
417
- logger.info("Run gscdump init to configure");
418
- return;
420
+ decay: {
421
+ "prev-start": {
422
+ type: "string",
423
+ description: "Previous period start date (required)"
424
+ },
425
+ "prev-end": {
426
+ type: "string",
427
+ description: "Previous period end date (required)"
428
+ }
429
+ },
430
+ concentration: { dimension: {
431
+ type: "string",
432
+ description: "Dimension: pages or keywords (default: pages)"
433
+ } },
434
+ seasonality: { metric: {
435
+ type: "string",
436
+ description: "Metric: clicks or impressions (default: clicks)"
437
+ } },
438
+ clustering: { "cluster-by": {
439
+ type: "string",
440
+ description: "Cluster by: prefix, intent, or both (default: both)"
441
+ } },
442
+ trends: {
443
+ "dimension": {
444
+ type: "string",
445
+ description: "Dimension: pages or keywords (default: pages)"
446
+ },
447
+ "weeks": {
448
+ type: "string",
449
+ description: "Rolling window size in weeks (default: 28)"
450
+ },
451
+ "min-weeks": {
452
+ type: "string",
453
+ description: "Minimum weeks with data to include an entity (default: weeks/4)"
419
454
  }
420
- if (config.mode === "cloud") {
421
- console.log(` Cloud: \x1B[36m${config.cloudUrl}\x1B[0m`);
422
- const tokens = await loadCloudTokens();
423
- if (!tokens) {
424
- logger.warn("Not authenticated");
425
- logger.info("Run gscdump init --force to re-authenticate");
455
+ }
456
+ };
457
+ function buildParams(tool, args) {
458
+ const params = {
459
+ type: tool,
460
+ startDate: args.start ? String(args.start) : void 0,
461
+ endDate: args.end ? String(args.end) : void 0,
462
+ limit: args.limit ? Number(args.limit) : void 0
463
+ };
464
+ if (args["brand-terms"]) params.brandTerms = String(args["brand-terms"]).split(",").map((t) => t.trim()).filter(Boolean);
465
+ if (args["prev-start"]) params.prevStartDate = String(args["prev-start"]);
466
+ if (args["prev-end"]) params.prevEndDate = String(args["prev-end"]);
467
+ if (args.dimension) params.dimension = String(args.dimension);
468
+ if (args.metric) params.metric = String(args.metric);
469
+ if (args["cluster-by"]) params.clusterBy = String(args["cluster-by"]);
470
+ if (args.weeks) params.weeks = Number(args.weeks);
471
+ if (args["min-weeks"]) params.minWeeksWithData = Number(args["min-weeks"]);
472
+ return params;
473
+ }
474
+ function makeToolCommand(tool) {
475
+ const extraArgs = TOOL_EXTRA_ARGS[tool] || {};
476
+ return defineCommand({
477
+ meta: {
478
+ name: tool,
479
+ description: `Run ${tool} analysis`
480
+ },
481
+ args: {
482
+ site: {
483
+ type: "string",
484
+ alias: "s",
485
+ description: "Site URL"
486
+ },
487
+ start: {
488
+ type: "string",
489
+ description: "Start date (YYYY-MM-DD)"
490
+ },
491
+ end: {
492
+ type: "string",
493
+ description: "End date (YYYY-MM-DD)"
494
+ },
495
+ limit: {
496
+ type: "string",
497
+ alias: "l",
498
+ default: "100",
499
+ description: "Max results"
500
+ },
501
+ format: {
502
+ type: "string",
503
+ alias: "f",
504
+ default: "table",
505
+ description: "Output: table, json, csv"
506
+ },
507
+ json: {
508
+ type: "boolean",
509
+ default: false,
510
+ description: "Output as JSON"
511
+ },
512
+ live: {
513
+ type: "boolean",
514
+ default: false,
515
+ description: "Force live GSC API; bypass local Parquet store"
516
+ },
517
+ ...extraArgs
518
+ },
519
+ async run({ args }) {
520
+ const ctx = await createCommandContext({
521
+ needsAuth: true,
522
+ needsStore: !args.live
523
+ });
524
+ const siteUrl = await ctx.resolveSite(args.site);
525
+ logger.info(`Running ${tool} analysis...`);
526
+ const params = buildParams(tool, args);
527
+ const format = args.json ? "json" : String(args.format);
528
+ if (!args.live) {
529
+ const store = ctx.store;
530
+ if (!await hasLocalData(store, siteUrl).catch(() => false)) {
531
+ logger.error(`No local data for ${siteUrl}. Run \`gscdump sync\` first, or pass --live.`);
532
+ process.exit(1);
533
+ }
534
+ const localResult = await runLocalAnalysis(store, siteUrl, params).catch((e) => {
535
+ if (e instanceof LocalStoreUnsupportedError) {
536
+ logger.error(`${e.message}. Pass --live to run against the GSC API.`);
537
+ process.exit(1);
538
+ }
539
+ if (e instanceof LocalStoreEmptyError) {
540
+ logger.error(`${e.message}`);
541
+ process.exit(1);
542
+ }
543
+ logger.error(`Local analysis failed: ${e.message}`);
544
+ process.exit(1);
545
+ });
546
+ if (format === "json") {
547
+ console.log(JSON.stringify(localResult, null, 2));
548
+ return;
549
+ }
550
+ renderResults(localResult.results, localResult.results.length, format);
426
551
  return;
427
552
  }
428
- const hasAccess = !!tokens.accessToken;
429
- const hasRefresh = !!tokens.refreshToken;
430
- const expiry = tokens.expiresAt ? new Date(tokens.expiresAt) : null;
431
- const isExpired = expiry && expiry < /* @__PURE__ */ new Date();
432
- logger.success("Authenticated");
433
- console.log();
434
- console.log(` Access token: ${hasAccess ? "\x1B[32mpresent\x1B[0m" : "\x1B[31mmissing\x1B[0m"}`);
435
- console.log(` Refresh token: ${hasRefresh ? "\x1B[32mpresent\x1B[0m" : "\x1B[31mmissing\x1B[0m"}`);
436
- if (expiry) {
437
- const status = isExpired ? "\x1B[33mexpired\x1B[0m" : "\x1B[32mvalid\x1B[0m";
438
- console.log(` Expires: ${expiry.toISOString()} (${status})`);
439
- }
440
- } else {
441
- const tokens = await loadTokens();
442
- if (!tokens) {
443
- logger.warn("Not authenticated");
444
- logger.info("Run gscdump init --force to re-authenticate");
553
+ const result = await runLiveAnalysis(ctx.client, siteUrl, params).catch((e) => {
554
+ logger.error(`Analysis failed: ${e.message}`);
555
+ process.exit(1);
556
+ });
557
+ if (format === "json") {
558
+ console.log(JSON.stringify(result, null, 2));
445
559
  return;
446
560
  }
447
- const hasAccess = !!tokens.access_token;
448
- const hasRefresh = !!tokens.refresh_token;
449
- const expiry = tokens.expiry_date ? new Date(tokens.expiry_date) : null;
450
- const isExpired = expiry && expiry < /* @__PURE__ */ new Date();
451
- logger.success("Authenticated");
452
- console.log();
453
- console.log(` Access token: ${hasAccess ? "\x1B[32mpresent\x1B[0m" : "\x1B[31mmissing\x1B[0m"}`);
454
- console.log(` Refresh token: ${hasRefresh ? "\x1B[32mpresent\x1B[0m" : "\x1B[31mmissing\x1B[0m"}`);
455
- if (expiry) {
456
- const status = isExpired ? "\x1B[33mexpired\x1B[0m" : "\x1B[32mvalid\x1B[0m";
457
- console.log(` Expires: ${expiry.toISOString()} (${status})`);
458
- }
561
+ renderResults(result.results, result.results.length, format);
562
+ }
563
+ });
564
+ }
565
+ const SPARK_CHARS = [
566
+ "▁",
567
+ "",
568
+ "",
569
+ "▄",
570
+ "",
571
+ "▆",
572
+ "▇",
573
+ "█"
574
+ ];
575
+ const SPARK_GAP = "·";
576
+ const PERCENT_COLS = {
577
+ growthRatio: "ratio_to_pct",
578
+ brandShare: "direct",
579
+ topNConcentration: "direct",
580
+ declinePercent: "direct",
581
+ ctr: "direct",
582
+ share: "direct",
583
+ vsAverage: "ratio_to_pct",
584
+ clicksChangePercent: "scaled",
585
+ impressionsChangePercent: "scaled"
586
+ };
587
+ function formatPct(val, style) {
588
+ const pct = style === "ratio_to_pct" ? (val - 1) * 100 : style === "direct" ? val * 100 : val;
589
+ if (!Number.isFinite(pct)) return "";
590
+ return `${pct > 0 ? "+" : ""}${pct.toFixed(0)}%`;
591
+ }
592
+ function isTimeSeries(arr) {
593
+ if (arr.length === 0) return false;
594
+ const first = arr[0];
595
+ if (typeof first !== "object" || first === null) return false;
596
+ const keys = Object.keys(first);
597
+ const hasBucket = keys.includes("week") || keys.includes("date") || keys.includes("month");
598
+ const hasMetric = keys.includes("clicks") || keys.includes("impressions") || keys.includes("value");
599
+ return hasBucket && hasMetric;
600
+ }
601
+ function pickBucketKey(first) {
602
+ if ("week" in first) return "week";
603
+ if ("date" in first) return "date";
604
+ return "month";
605
+ }
606
+ function pickMetricKey(first) {
607
+ if ("clicks" in first) return "clicks";
608
+ if ("impressions" in first) return "impressions";
609
+ return "value";
610
+ }
611
+ function computeAlignedSparklines(results, col) {
612
+ const allBuckets = /* @__PURE__ */ new Set();
613
+ const perRow = [];
614
+ let bucketKey = "week";
615
+ let metricKey = "clicks";
616
+ for (const r of results) {
617
+ const arr = r[col];
618
+ if (!Array.isArray(arr) || !isTimeSeries(arr)) {
619
+ perRow.push(null);
620
+ continue;
621
+ }
622
+ const first = arr[0];
623
+ bucketKey = pickBucketKey(first);
624
+ metricKey = pickMetricKey(first);
625
+ const m = /* @__PURE__ */ new Map();
626
+ for (const item of arr) {
627
+ const rec = item;
628
+ const key = String(rec[bucketKey]);
629
+ const val = Number(rec[metricKey] ?? 0);
630
+ allBuckets.add(key);
631
+ m.set(key, val);
459
632
  }
633
+ perRow.push(m);
460
634
  }
461
- });
462
- const logoutCommand = defineCommand({
635
+ const sorted = [...allBuckets].sort();
636
+ return perRow.map((m) => {
637
+ if (!m) return "";
638
+ const values = sorted.map((b) => m.has(b) ? m.get(b) : null);
639
+ const nonNull = values.filter((v) => v != null);
640
+ if (nonNull.length === 0) return SPARK_GAP.repeat(values.length);
641
+ const min = Math.min(...nonNull);
642
+ const range = Math.max(...nonNull) - min;
643
+ return values.map((v) => {
644
+ if (v == null) return SPARK_GAP;
645
+ if (range === 0) return SPARK_CHARS[0];
646
+ return SPARK_CHARS[Math.round((v - min) / range * (SPARK_CHARS.length - 1))];
647
+ }).join("");
648
+ });
649
+ }
650
+ function classifyCol(col, values) {
651
+ const firstNonNull = values.find((v) => v != null);
652
+ if (firstNonNull == null) return "text";
653
+ if (Array.isArray(firstNonNull) && isTimeSeries(firstNonNull)) return "series";
654
+ if (col in PERCENT_COLS && values.every((v) => v == null || typeof v === "number")) return "pct";
655
+ if (values.every((v) => v == null || typeof v === "number")) return values.some((v) => typeof v === "number" && !Number.isInteger(v)) ? "float" : "int";
656
+ return "text";
657
+ }
658
+ function formatCellKinded(val, col, kind) {
659
+ if (val == null) return "";
660
+ if (kind === "int") return typeof val === "number" ? String(val) : String(val);
661
+ if (kind === "float") return typeof val === "number" ? val.toFixed(2) : String(val);
662
+ if (kind === "pct") return typeof val === "number" ? formatPct(val, PERCENT_COLS[col]) : String(val);
663
+ if (Array.isArray(val)) return `[${val.length} item${val.length === 1 ? "" : "s"}]`;
664
+ if (typeof val === "object") return JSON.stringify(val);
665
+ return String(val);
666
+ }
667
+ function computeRowSeriesSparkline(results) {
668
+ if (results.length < 2) return null;
669
+ const first = results[0];
670
+ const bucketKey = "week" in first ? "week" : "date" in first ? "date" : "month" in first ? "month" : null;
671
+ if (!bucketKey) return null;
672
+ const metricKey = "value" in first ? "value" : "clicks" in first ? "clicks" : "impressions" in first ? "impressions" : null;
673
+ if (!metricKey) return null;
674
+ for (const r of results) if (!(bucketKey in r) || !(metricKey in r)) return null;
675
+ const values = [...results].sort((a, b) => String(a[bucketKey]).localeCompare(String(b[bucketKey]))).map((r) => Number(r[metricKey] ?? 0));
676
+ const nonNull = values.filter((v) => Number.isFinite(v));
677
+ if (nonNull.length === 0) return null;
678
+ const min = Math.min(...nonNull);
679
+ const range = Math.max(...nonNull) - min;
680
+ return {
681
+ spark: values.map((v) => {
682
+ if (range === 0) return SPARK_CHARS[0];
683
+ return SPARK_CHARS[Math.round((v - min) / range * (SPARK_CHARS.length - 1))];
684
+ }).join(""),
685
+ label: `${results.length} ${bucketKey}${results.length === 1 ? "" : "s"} of ${metricKey}`
686
+ };
687
+ }
688
+ function renderResults(results, total, format) {
689
+ if (format === "csv" && results.length > 0) {
690
+ const cols = Object.keys(results[0]);
691
+ console.log(toCSV(results, cols));
692
+ return;
693
+ }
694
+ if (results.length === 0) {
695
+ logger.warn("No results found");
696
+ return;
697
+ }
698
+ const cols = Object.keys(results[0]);
699
+ const kinds = cols.map((c) => classifyCol(c, results.map((r) => r[c])));
700
+ const sparklineByCol = {};
701
+ cols.forEach((c, i) => {
702
+ if (kinds[i] === "series") sparklineByCol[c] = computeAlignedSparklines(results, c);
703
+ });
704
+ const cellText = (row, rowIdx, colIdx) => {
705
+ const c = cols[colIdx];
706
+ const k = kinds[colIdx];
707
+ if (k === "series") return sparklineByCol[c][rowIdx];
708
+ return formatCellKinded(row[c], c, k);
709
+ };
710
+ const widths = cols.map((c, i) => {
711
+ let w = c.length;
712
+ const limit = Math.min(results.length, 20);
713
+ for (let j = 0; j < limit; j++) {
714
+ const len = cellText(results[j], j, i).length;
715
+ if (len > w) w = len;
716
+ }
717
+ return w;
718
+ });
719
+ console.log();
720
+ console.log(` ${cols.map((c, i) => c.padEnd(widths[i])).join(" ")}`);
721
+ console.log(` ${cols.map((_, i) => "─".repeat(widths[i])).join(" ")}`);
722
+ for (let r = 0; r < results.length; r++) console.log(` ${cols.map((_, i) => cellText(results[r], r, i).padEnd(widths[i])).join(" ")}`);
723
+ const rowSeriesSparkline = computeRowSeriesSparkline(results);
724
+ if (rowSeriesSparkline) {
725
+ console.log();
726
+ console.log(` trend: ${rowSeriesSparkline.spark} (${rowSeriesSparkline.label})`);
727
+ }
728
+ console.log();
729
+ logger.success(`${results.length} results`);
730
+ if (total > results.length) logger.info(`Total: ${total} (showing ${results.length})`);
731
+ }
732
+ const analyzeCommand = defineCommand({
463
733
  meta: {
464
- name: "logout",
465
- description: "Clear stored OAuth tokens"
734
+ name: "analyze",
735
+ description: "SEO analysis tools"
466
736
  },
467
- async run() {
468
- if ((await loadConfig()).mode === "cloud") await clearCloudTokens();
469
- else await clearTokens();
470
- }
737
+ subCommands: Object.fromEntries(ANALYSIS_TOOLS.map((tool) => [tool, makeToolCommand(tool)]))
471
738
  });
472
739
  const authCommand = defineCommand({
473
740
  meta: {
@@ -475,295 +742,620 @@ const authCommand = defineCommand({
475
742
  description: "Manage authentication"
476
743
  },
477
744
  subCommands: {
478
- status: statusCommand,
479
- logout: logoutCommand
745
+ status: defineCommand({
746
+ meta: {
747
+ name: "status",
748
+ description: "Show current authentication status"
749
+ },
750
+ async run() {
751
+ const tokens = await loadTokens();
752
+ if (!tokens) {
753
+ logger.warn("Not authenticated");
754
+ logger.info("Run gscdump init to authenticate");
755
+ return;
756
+ }
757
+ const hasAccess = !!tokens.access_token;
758
+ const hasRefresh = !!tokens.refresh_token;
759
+ const expiry = tokens.expiry_date ? new Date(tokens.expiry_date) : null;
760
+ const isExpired = expiry && expiry < /* @__PURE__ */ new Date();
761
+ logger.success("Authenticated");
762
+ console.log();
763
+ console.log(` Access token: ${hasAccess ? "\x1B[32mpresent\x1B[0m" : "\x1B[31mmissing\x1B[0m"}`);
764
+ console.log(` Refresh token: ${hasRefresh ? "\x1B[32mpresent\x1B[0m" : "\x1B[31mmissing\x1B[0m"}`);
765
+ if (expiry) {
766
+ const status = isExpired ? "\x1B[33mexpired\x1B[0m" : "\x1B[32mvalid\x1B[0m";
767
+ console.log(` Expires: ${expiry.toISOString()} (${status})`);
768
+ }
769
+ }
770
+ }),
771
+ logout: defineCommand({
772
+ meta: {
773
+ name: "logout",
774
+ description: "Clear stored OAuth tokens"
775
+ },
776
+ async run() {
777
+ await clearTokens();
778
+ }
779
+ })
480
780
  }
481
781
  });
482
-
483
- //#endregion
484
- //#region src/commands/config.ts
485
- const showCommand = defineCommand({
782
+ const configCommand = defineCommand({
486
783
  meta: {
487
- name: "show",
488
- description: "Show current config"
784
+ name: "config",
785
+ description: "Manage configuration"
489
786
  },
490
- async run() {
491
- const config = await loadConfig();
492
- const configPath = getConfigPath();
493
- logger.info(`Config: ${configPath}`);
494
- console.log();
495
- if (Object.keys(config).length === 0) {
496
- logger.warn("No config set");
497
- return;
498
- }
499
- console.log(JSON.stringify(config, null, 2));
787
+ subCommands: {
788
+ show: defineCommand({
789
+ meta: {
790
+ name: "show",
791
+ description: "Show current config"
792
+ },
793
+ async run() {
794
+ const config = await loadConfig();
795
+ const configPath = getConfigPath();
796
+ logger.info(`Config: ${configPath}`);
797
+ console.log();
798
+ if (Object.keys(config).length === 0) {
799
+ logger.warn("No config set");
800
+ return;
801
+ }
802
+ console.log(JSON.stringify(config, null, 2));
803
+ }
804
+ }),
805
+ set: defineCommand({
806
+ meta: {
807
+ name: "set",
808
+ description: "Set a config value"
809
+ },
810
+ args: {
811
+ key: {
812
+ type: "positional",
813
+ description: "Config key (defaultSite, defaultPeriod, defaultFormat, defaultDb)",
814
+ required: true
815
+ },
816
+ value: {
817
+ type: "positional",
818
+ description: "Value to set",
819
+ required: true
820
+ }
821
+ },
822
+ async run({ args }) {
823
+ const validKeys = [
824
+ "defaultSite",
825
+ "defaultPeriod",
826
+ "defaultFormat",
827
+ "defaultDb"
828
+ ];
829
+ if (!validKeys.includes(args.key)) {
830
+ logger.error(`Invalid key: ${args.key}`);
831
+ logger.info(`Valid keys: ${validKeys.join(", ")}`);
832
+ process.exit(1);
833
+ }
834
+ const config = await loadConfig();
835
+ config[args.key] = args.value;
836
+ await saveConfig(config);
837
+ logger.success(`Set ${args.key} = ${args.value}`);
838
+ }
839
+ }),
840
+ unset: defineCommand({
841
+ meta: {
842
+ name: "unset",
843
+ description: "Remove a config value"
844
+ },
845
+ args: { key: {
846
+ type: "positional",
847
+ description: "Config key to remove",
848
+ required: true
849
+ } },
850
+ async run({ args }) {
851
+ const config = await loadConfig();
852
+ delete config[args.key];
853
+ await saveConfig(config);
854
+ logger.success(`Removed ${args.key}`);
855
+ }
856
+ }),
857
+ path: defineCommand({
858
+ meta: {
859
+ name: "path",
860
+ description: "Show config file path"
861
+ },
862
+ run() {
863
+ console.log(getConfigPath());
864
+ }
865
+ })
500
866
  }
501
867
  });
502
- const setCommand = defineCommand({
868
+ const DEFAULT_OUT = "./gscdump-export";
869
+ const dumpCommand = defineCommand({
503
870
  meta: {
504
- name: "set",
505
- description: "Set a config value"
871
+ name: "dump",
872
+ description: "Export live Parquet files from the local store to a directory"
506
873
  },
507
874
  args: {
508
- key: {
509
- type: "positional",
510
- description: "Config key (defaultSite, defaultPeriod, defaultFormat, defaultDb)",
511
- required: true
875
+ site: {
876
+ type: "string",
877
+ alias: "s",
878
+ description: "Site URL (e.g., sc-domain:example.com)"
512
879
  },
513
- value: {
514
- type: "positional",
515
- description: "Value to set",
516
- required: true
880
+ out: {
881
+ type: "string",
882
+ alias: "o",
883
+ default: DEFAULT_OUT,
884
+ description: `Output directory (default: ${DEFAULT_OUT})`
885
+ },
886
+ compact: {
887
+ type: "boolean",
888
+ default: false,
889
+ description: "Compact every closed month into a single file before exporting"
890
+ },
891
+ quiet: {
892
+ type: "boolean",
893
+ alias: "q",
894
+ default: false,
895
+ description: "Suppress progress output"
517
896
  }
518
897
  },
519
898
  async run({ args }) {
520
- const validKeys = [
521
- "defaultSite",
522
- "defaultPeriod",
523
- "defaultFormat",
524
- "defaultDb"
525
- ];
526
- if (!validKeys.includes(args.key)) {
527
- logger.error(`Invalid key: ${args.key}`);
528
- logger.info(`Valid keys: ${validKeys.join(", ")}`);
529
- process.exit(1);
899
+ const ctx = await createCommandContext({
900
+ needsAuth: true,
901
+ needsStore: true
902
+ });
903
+ const siteUrl = await ctx.resolveSite(args.site ? String(args.site) : void 0);
904
+ const store = ctx.store;
905
+ const outDir = path.resolve(String(args.out));
906
+ if (args.compact) await compactClosedMonths(store, siteUrl, args.quiet);
907
+ const entries = await listLiveEntries(store, siteUrl);
908
+ if (entries.length === 0) {
909
+ logger.warn(`No data for ${siteUrl}. Run \`gscdump sync\` first.`);
910
+ process.exit(0);
530
911
  }
531
- const config = await loadConfig();
532
- config[args.key] = args.value;
533
- await saveConfig(config);
534
- logger.success(`Set ${args.key} = ${args.value}`);
535
- }
536
- });
537
- const unsetCommand = defineCommand({
538
- meta: {
539
- name: "unset",
540
- description: "Remove a config value"
541
- },
542
- args: { key: {
543
- type: "positional",
544
- description: "Config key to remove",
545
- required: true
546
- } },
547
- async run({ args }) {
548
- const config = await loadConfig();
549
- delete config[args.key];
550
- await saveConfig(config);
551
- logger.success(`Removed ${args.key}`);
552
- }
553
- });
554
- const pathCommand = defineCommand({
555
- meta: {
556
- name: "path",
557
- description: "Show config file path"
558
- },
559
- run() {
560
- console.log(getConfigPath());
561
- }
562
- });
563
- const configCommand = defineCommand({
564
- meta: {
565
- name: "config",
566
- description: "Manage configuration"
567
- },
568
- subCommands: {
569
- show: showCommand,
570
- set: setCommand,
571
- unset: unsetCommand,
572
- path: pathCommand
912
+ await fs.mkdir(outDir, { recursive: true });
913
+ let copied = 0;
914
+ for (const entry of entries) {
915
+ const bytes = await store.engine.readObject(entry.objectKey);
916
+ const target = path.join(outDir, entry.objectKey);
917
+ await fs.mkdir(path.dirname(target), { recursive: true });
918
+ await fs.writeFile(target, Buffer.from(bytes));
919
+ copied++;
920
+ }
921
+ if (!args.quiet) logger.success(`Exported ${copied} file(s) to ${outDir}`);
573
922
  }
574
923
  });
575
-
576
- //#endregion
577
- //#region src/commands/dump.ts
578
- const DUMP_DATA_TYPES = [
579
- "pages",
580
- "keywords",
581
- "countries",
582
- "devices"
583
- ];
584
- function getDimensions(dataType) {
585
- switch (dataType) {
586
- case "pages": return [page, date];
587
- case "keywords": return [query, date];
588
- case "countries": return [country, date];
589
- case "devices": return [device, date];
924
+ async function listLiveEntries(store, siteUrl) {
925
+ const siteId = store.siteIdFor(siteUrl);
926
+ return (await Promise.all(allTables().map((table) => store.engine.listLive({
927
+ userId: store.userId,
928
+ siteId,
929
+ table
930
+ })))).flat();
931
+ }
932
+ async function compactClosedMonths(store, siteUrl, quiet) {
933
+ const siteId = store.siteIdFor(siteUrl);
934
+ for (const table of allTables()) {
935
+ if (!quiet) logger.info(`Compacting ${table} (raw→d7→d30→d90)`);
936
+ await store.engine.compactTiered({
937
+ userId: store.userId,
938
+ siteId,
939
+ table
940
+ });
590
941
  }
591
942
  }
592
- const dumpCommand = defineCommand({
943
+ const INSPECTION_QPD_PER_PROPERTY = 2e3;
944
+ const INDEXING_NOT_FOUND_RE = /\b404\b|NOT_FOUND/i;
945
+ async function readUrlList(opts) {
946
+ if (opts.file) return (await readFile(opts.file, "utf8")).split("\n").map((l) => l.trim()).filter(Boolean);
947
+ const chunks = [];
948
+ for await (const chunk of process.stdin) chunks.push(chunk);
949
+ return Buffer.concat(chunks).toString("utf8").split("\n").map((l) => l.trim()).filter(Boolean);
950
+ }
951
+ const inspectSubCommand = defineCommand({
593
952
  meta: {
594
- name: "dump",
595
- description: "Export search analytics data via GSC API"
953
+ name: "inspect",
954
+ description: "Run URL Inspection for a list of URLs and persist results to the local entity store"
596
955
  },
597
956
  args: {
598
957
  site: {
599
958
  type: "string",
600
959
  alias: "s",
960
+ required: true,
601
961
  description: "Site URL (e.g., sc-domain:example.com)"
602
962
  },
603
- output: {
604
- type: "string",
605
- alias: "o",
606
- description: "Output file path (default: stdout)"
607
- },
608
- format: {
963
+ file: {
609
964
  type: "string",
610
965
  alias: "f",
611
- default: "json",
612
- description: "Output format: json or csv"
613
- },
614
- start: {
615
- type: "string",
616
- description: "Start date (YYYY-MM-DD)"
617
- },
618
- end: {
619
- type: "string",
620
- description: "End date (YYYY-MM-DD)"
621
- },
622
- days: {
623
- type: "string",
624
- alias: "d",
625
- default: "28",
626
- description: "Number of days to fetch (default: 28)"
966
+ description: "Path to a file with one URL per line. If omitted, reads from stdin."
627
967
  },
628
- types: {
968
+ limit: {
629
969
  type: "string",
630
- alias: "t",
631
- description: "Data types: pages,keywords,countries,devices"
970
+ description: `Max URLs to inspect this run (default: ${INSPECTION_QPD_PER_PROPERTY}, the per-property GSC daily quota)`
632
971
  },
633
- limit: {
972
+ concurrency: {
634
973
  type: "string",
635
- alias: "l",
636
- default: "25000",
637
- description: "Max rows per data type"
974
+ alias: "c",
975
+ default: "4",
976
+ description: "Concurrent in-flight inspect calls (default: 4)"
638
977
  },
639
978
  quiet: {
640
979
  type: "boolean",
641
980
  alias: "q",
642
981
  default: false,
643
982
  description: "Suppress progress output"
644
- },
645
- interactive: {
646
- type: "boolean",
647
- alias: "i",
648
- default: false,
649
- description: "Interactive mode - prompts for options"
650
983
  }
651
984
  },
652
985
  async run({ args }) {
653
- const config = await loadConfig();
654
- const { getAuth: getAuth$1 } = await Promise.resolve().then(() => auth_exports);
655
- const client = googleSearchConsole(await getAuth$1({
656
- interactive: false,
657
- config
658
- }));
659
- let siteUrl = String(args.site || config.defaultSite || "");
660
- if (!siteUrl || args.interactive) {
661
- const verified = (await client.sites()).filter((s) => s.permissionLevel !== "siteUnverifiedUser");
662
- if (verified.length === 0) {
663
- logger.error("No verified sites found");
664
- process.exit(1);
986
+ const ctx = await createCommandContext({
987
+ needsAuth: true,
988
+ needsStore: true
989
+ });
990
+ const client = ctx.client;
991
+ const store = ctx.store;
992
+ const siteUrl = String(args.site);
993
+ const limit = args.limit ? Number.parseInt(String(args.limit), 10) : INSPECTION_QPD_PER_PROPERTY;
994
+ const concurrency = Math.max(1, Number.parseInt(String(args.concurrency), 10) || 4);
995
+ const quiet = Boolean(args.quiet);
996
+ const urls = (await readUrlList({ file: args.file ? String(args.file) : void 0 })).slice(0, limit);
997
+ if (urls.length === 0) {
998
+ logger.warn("No URLs to inspect.");
999
+ return;
1000
+ }
1001
+ if (urls.length === limit && limit < INSPECTION_QPD_PER_PROPERTY) logger.info(`Capping at --limit ${limit}`);
1002
+ if (urls.length === INSPECTION_QPD_PER_PROPERTY) logger.info(`Hit per-property daily inspection quota (${INSPECTION_QPD_PER_PROPERTY}); remaining URLs will be queued for tomorrow.`);
1003
+ const inspector = createInspectionStore({ dataSource: store.dataSource });
1004
+ let completed = 0;
1005
+ let failed = 0;
1006
+ const records = [];
1007
+ const failures = [];
1008
+ await runWithConcurrency(urls, concurrency, async (url) => {
1009
+ const result = await client.inspect(siteUrl, url).catch((err) => err);
1010
+ if (result instanceof Error) {
1011
+ failed++;
1012
+ failures.push({
1013
+ url,
1014
+ error: result.message
1015
+ });
1016
+ } else {
1017
+ const ix = result.inspectionResult;
1018
+ const indexStatus = ix?.indexStatusResult;
1019
+ records.push({
1020
+ url,
1021
+ inspectedAt: (/* @__PURE__ */ new Date()).toISOString(),
1022
+ indexStatus: indexStatus?.verdict ?? void 0,
1023
+ lastCrawlTime: indexStatus?.lastCrawlTime ?? void 0,
1024
+ googleCanonical: indexStatus?.googleCanonical ?? void 0,
1025
+ userCanonical: indexStatus?.userCanonical ?? void 0,
1026
+ coverageState: indexStatus?.coverageState ?? void 0,
1027
+ robotsTxtState: indexStatus?.robotsTxtState ?? void 0,
1028
+ indexingState: indexStatus?.indexingState ?? void 0,
1029
+ pageFetchState: indexStatus?.pageFetchState ?? void 0,
1030
+ mobileUsabilityVerdict: ix?.mobileUsabilityResult?.verdict ?? void 0,
1031
+ richResultsVerdict: ix?.richResultsResult?.verdict ?? void 0,
1032
+ raw: ix
1033
+ });
665
1034
  }
666
- const selected = await select({
667
- message: "Select a site",
668
- options: verified.map((s) => ({
669
- value: s.siteUrl,
670
- label: s.siteUrl
671
- })),
672
- initialValue: siteUrl || verified[0]?.siteUrl
673
- });
674
- if (isCancel(selected)) {
675
- cancel("Cancelled");
676
- process.exit(0);
1035
+ completed++;
1036
+ if (!quiet) process.stdout.write(`\r${progressBar(completed, urls.length, `${url.slice(0, 60)}`)}`);
1037
+ });
1038
+ if (!quiet) process.stdout.write("\n");
1039
+ await inspector.writeBatch({
1040
+ userId: store.userId,
1041
+ siteId: store.siteIdFor(siteUrl)
1042
+ }, records);
1043
+ if (!quiet) {
1044
+ logger.success(`Inspected ${records.length}/${urls.length} URL(s)`);
1045
+ if (failed > 0) {
1046
+ logger.warn(`${failed} failed:`);
1047
+ for (const f of failures.slice(0, 5)) console.log(` ${f.url}: ${f.error}`);
1048
+ if (failures.length > 5) console.log(` ... and ${failures.length - 5} more`);
677
1049
  }
678
- siteUrl = selected;
679
1050
  }
680
- let startDate;
681
- let endDate;
682
- if (args.start && args.end) {
683
- startDate = String(args.start);
684
- endDate = String(args.end);
685
- } else if (args.interactive) {
686
- const startInput = await text({
687
- message: "Start date (YYYY-MM-DD)",
688
- placeholder: (/* @__PURE__ */ new Date(Date.now() - Number(args.days) * 864e5)).toISOString().split("T")[0]
689
- });
690
- if (isCancel(startInput)) {
691
- cancel("Cancelled");
692
- process.exit(0);
1051
+ if (failed > 0) process.exit(1);
1052
+ }
1053
+ });
1054
+ const showSubCommand = defineCommand({
1055
+ meta: {
1056
+ name: "show",
1057
+ description: "Print the latest inspection record for a URL from the local entity store"
1058
+ },
1059
+ args: {
1060
+ site: {
1061
+ type: "string",
1062
+ alias: "s",
1063
+ required: true,
1064
+ description: "Site URL"
1065
+ },
1066
+ url: {
1067
+ type: "positional",
1068
+ required: true,
1069
+ description: "URL to look up"
1070
+ },
1071
+ json: {
1072
+ type: "boolean",
1073
+ default: false,
1074
+ description: "Output as JSON"
1075
+ }
1076
+ },
1077
+ async run({ args }) {
1078
+ const store = (await createCommandContext({ needsStore: true })).store;
1079
+ const record = await createInspectionStore({ dataSource: store.dataSource }).getLatest({
1080
+ userId: store.userId,
1081
+ siteId: store.siteIdFor(String(args.site))
1082
+ }, String(args.url));
1083
+ if (!record) {
1084
+ logger.warn(`No inspection record for ${args.url}`);
1085
+ process.exit(1);
1086
+ }
1087
+ if (args.json) {
1088
+ console.log(JSON.stringify(record, null, 2));
1089
+ return;
1090
+ }
1091
+ console.log();
1092
+ console.log(` \x1B[1m${record.url}\x1B[0m`);
1093
+ console.log(` Inspected: ${record.inspectedAt}`);
1094
+ if (record.indexStatus) console.log(` Index: ${record.indexStatus}`);
1095
+ if (record.lastCrawlTime) console.log(` Last crawl: ${record.lastCrawlTime}`);
1096
+ if (record.googleCanonical) console.log(` Canonical: ${record.googleCanonical}`);
1097
+ if (record.coverageState) console.log(` Coverage: ${record.coverageState}`);
1098
+ if (record.mobileUsabilityVerdict) console.log(` Mobile: ${record.mobileUsabilityVerdict}`);
1099
+ if (record.richResultsVerdict) console.log(` Rich results: ${record.richResultsVerdict}`);
1100
+ console.log();
1101
+ }
1102
+ });
1103
+ const sitemapsSnapshotSubCommand = defineCommand({
1104
+ meta: {
1105
+ name: "snapshot",
1106
+ description: "Fetch current sitemap state from GSC and persist to the local entity store"
1107
+ },
1108
+ args: {
1109
+ site: {
1110
+ type: "string",
1111
+ alias: "s",
1112
+ required: true,
1113
+ description: "Site URL (e.g., sc-domain:example.com)"
1114
+ },
1115
+ quiet: {
1116
+ type: "boolean",
1117
+ alias: "q",
1118
+ default: false,
1119
+ description: "Suppress progress output"
1120
+ },
1121
+ json: {
1122
+ type: "boolean",
1123
+ default: false,
1124
+ description: "Emit the snapshot JSON to stdout"
1125
+ }
1126
+ },
1127
+ async run({ args }) {
1128
+ const ctx = await createCommandContext({
1129
+ needsAuth: true,
1130
+ needsStore: true
1131
+ });
1132
+ const client = ctx.client;
1133
+ const store = ctx.store;
1134
+ const siteUrl = String(args.site);
1135
+ const quiet = Boolean(args.quiet);
1136
+ const apiSitemaps = await client.sitemaps.list(siteUrl);
1137
+ const capturedAt = (/* @__PURE__ */ new Date()).toISOString();
1138
+ const records = apiSitemaps.filter((s) => typeof s.path === "string").map((s) => ({
1139
+ path: s.path,
1140
+ capturedAt,
1141
+ lastDownloaded: s.lastDownloaded ?? void 0,
1142
+ lastSubmitted: s.lastSubmitted ?? void 0,
1143
+ type: s.type ?? void 0,
1144
+ isPending: s.isPending ?? void 0,
1145
+ isSitemapsIndex: s.isSitemapsIndex ?? void 0,
1146
+ errors: s.errors ?? void 0,
1147
+ warnings: s.warnings ?? void 0,
1148
+ contents: s.contents?.map((c) => ({
1149
+ type: c.type ?? void 0,
1150
+ submitted: c.submitted ?? void 0,
1151
+ indexed: c.indexed ?? void 0
1152
+ })),
1153
+ raw: s
1154
+ }));
1155
+ await createSitemapStore({ dataSource: store.dataSource }).writeSnapshot({
1156
+ userId: store.userId,
1157
+ siteId: store.siteIdFor(siteUrl)
1158
+ }, records);
1159
+ if (args.json) {
1160
+ console.log(JSON.stringify({
1161
+ site: siteUrl,
1162
+ capturedAt,
1163
+ records
1164
+ }, null, 2));
1165
+ return;
1166
+ }
1167
+ if (!quiet) {
1168
+ logger.success(`Captured ${records.length} sitemap(s) for ${siteUrl}`);
1169
+ for (const r of records) {
1170
+ const errors = r.errors && r.errors !== "0" ? ` \x1B[31merr=${r.errors}\x1B[0m` : "";
1171
+ const warnings = r.warnings && r.warnings !== "0" ? ` \x1B[33mwarn=${r.warnings}\x1B[0m` : "";
1172
+ const downloaded = r.lastDownloaded ? ` last=${r.lastDownloaded}` : "";
1173
+ console.log(` ${r.path}${downloaded}${errors}${warnings}`);
1174
+ }
1175
+ }
1176
+ }
1177
+ });
1178
+ const sitemapsShowSubCommand = defineCommand({
1179
+ meta: {
1180
+ name: "show",
1181
+ description: "Print the latest captured sitemap state for a feedpath"
1182
+ },
1183
+ args: {
1184
+ site: {
1185
+ type: "string",
1186
+ alias: "s",
1187
+ required: true,
1188
+ description: "Site URL"
1189
+ },
1190
+ path: {
1191
+ type: "positional",
1192
+ required: true,
1193
+ description: "Sitemap path (feedpath)"
1194
+ },
1195
+ json: {
1196
+ type: "boolean",
1197
+ default: false,
1198
+ description: "Output as JSON"
1199
+ }
1200
+ },
1201
+ async run({ args }) {
1202
+ const store = (await createCommandContext({ needsStore: true })).store;
1203
+ const record = await createSitemapStore({ dataSource: store.dataSource }).getLatest({
1204
+ userId: store.userId,
1205
+ siteId: store.siteIdFor(String(args.site))
1206
+ }, String(args.path));
1207
+ if (!record) {
1208
+ logger.warn(`No sitemap record for ${args.path}`);
1209
+ process.exit(1);
1210
+ }
1211
+ if (args.json) {
1212
+ console.log(JSON.stringify(record, null, 2));
1213
+ return;
1214
+ }
1215
+ console.log();
1216
+ console.log(` \x1B[1m${record.path}\x1B[0m`);
1217
+ console.log(` Captured: ${record.capturedAt}`);
1218
+ if (record.lastDownloaded) console.log(` Downloaded: ${record.lastDownloaded}`);
1219
+ if (record.lastSubmitted) console.log(` Submitted: ${record.lastSubmitted}`);
1220
+ if (record.type) console.log(` Type: ${record.type}`);
1221
+ if (record.errors) console.log(` Errors: ${record.errors}`);
1222
+ if (record.warnings) console.log(` Warnings: ${record.warnings}`);
1223
+ if (record.contents?.length) {
1224
+ console.log(` Contents:`);
1225
+ for (const c of record.contents) {
1226
+ const bits = [
1227
+ c.type,
1228
+ c.submitted && `submitted=${c.submitted}`,
1229
+ c.indexed && `indexed=${c.indexed}`
1230
+ ].filter(Boolean).join(" ");
1231
+ console.log(` ${bits}`);
1232
+ }
1233
+ }
1234
+ console.log();
1235
+ }
1236
+ });
1237
+ const indexingSubCommand = defineCommand({
1238
+ meta: {
1239
+ name: "indexing",
1240
+ description: "Snapshot Indexing API metadata per URL"
1241
+ },
1242
+ subCommands: { snapshot: defineCommand({
1243
+ meta: {
1244
+ name: "snapshot",
1245
+ description: "Fetch Indexing API metadata (latest update/remove per URL) and persist to the local entity store"
1246
+ },
1247
+ args: {
1248
+ site: {
1249
+ type: "string",
1250
+ alias: "s",
1251
+ required: true,
1252
+ description: "Site URL (e.g., sc-domain:example.com)"
1253
+ },
1254
+ file: {
1255
+ type: "string",
1256
+ alias: "f",
1257
+ description: "Path to a file with one URL per line. If omitted, reads from stdin."
1258
+ },
1259
+ concurrency: {
1260
+ type: "string",
1261
+ alias: "c",
1262
+ default: "4",
1263
+ description: "Concurrent in-flight getMetadata calls (default: 4)"
1264
+ },
1265
+ quiet: {
1266
+ type: "boolean",
1267
+ alias: "q",
1268
+ default: false,
1269
+ description: "Suppress progress output"
693
1270
  }
694
- const endInput = await text({
695
- message: "End date (YYYY-MM-DD)",
696
- placeholder: (/* @__PURE__ */ new Date(Date.now() - 3 * 864e5)).toISOString().split("T")[0]
1271
+ },
1272
+ async run({ args }) {
1273
+ const ctx = await createCommandContext({
1274
+ needsAuth: true,
1275
+ needsStore: true
697
1276
  });
698
- if (isCancel(endInput)) {
699
- cancel("Cancelled");
700
- process.exit(0);
1277
+ const client = ctx.client;
1278
+ const store = ctx.store;
1279
+ const siteUrl = String(args.site);
1280
+ const concurrency = Math.max(1, Number.parseInt(String(args.concurrency), 10) || 4);
1281
+ const quiet = Boolean(args.quiet);
1282
+ const urls = await readUrlList({ file: args.file ? String(args.file) : void 0 });
1283
+ if (urls.length === 0) {
1284
+ logger.warn("No URLs to fetch metadata for.");
1285
+ return;
701
1286
  }
702
- startDate = String(startInput) || (/* @__PURE__ */ new Date(Date.now() - Number(args.days) * 864e5)).toISOString().split("T")[0];
703
- endDate = String(endInput) || (/* @__PURE__ */ new Date(Date.now() - 3 * 864e5)).toISOString().split("T")[0];
704
- } else {
705
- const days = Number.parseInt(String(args.days), 10);
706
- endDate = (/* @__PURE__ */ new Date(Date.now() - 3 * 864e5)).toISOString().split("T")[0];
707
- startDate = (/* @__PURE__ */ new Date(Date.now() - (days + 3) * 864e5)).toISOString().split("T")[0];
708
- }
709
- let dataTypes;
710
- if (args.types) dataTypes = String(args.types).split(",").filter((t) => DUMP_DATA_TYPES.includes(t));
711
- else if (args.interactive) {
712
- const selected = await multiselect({
713
- message: "Select data types to export",
714
- options: DUMP_DATA_TYPES.map((t) => ({
715
- value: t,
716
- label: t
717
- })),
718
- initialValues: ["pages", "keywords"]
1287
+ const records = [];
1288
+ const failures = [];
1289
+ let completed = 0;
1290
+ await runWithConcurrency(urls, concurrency, async (url) => {
1291
+ const result = await client.indexing.getMetadata(url).catch((err) => err);
1292
+ if (result instanceof Error) if (INDEXING_NOT_FOUND_RE.test(result.message)) records.push({
1293
+ url,
1294
+ capturedAt: (/* @__PURE__ */ new Date()).toISOString()
1295
+ });
1296
+ else failures.push({
1297
+ url,
1298
+ error: result.message
1299
+ });
1300
+ else records.push({
1301
+ url,
1302
+ capturedAt: (/* @__PURE__ */ new Date()).toISOString(),
1303
+ latestUpdateAt: result.latestUpdate?.notifyTime ?? void 0,
1304
+ latestRemoveAt: result.latestRemove?.notifyTime ?? void 0,
1305
+ raw: result
1306
+ });
1307
+ completed++;
1308
+ if (!quiet) process.stdout.write(`\r${progressBar(completed, urls.length, url.slice(0, 60))}`);
719
1309
  });
720
- if (isCancel(selected)) {
721
- cancel("Cancelled");
722
- process.exit(0);
1310
+ if (!quiet) process.stdout.write("\n");
1311
+ await createIndexingMetadataStore({ dataSource: store.dataSource }).writeBatch({
1312
+ userId: store.userId,
1313
+ siteId: store.siteIdFor(siteUrl)
1314
+ }, records);
1315
+ if (!quiet) {
1316
+ logger.success(`Captured metadata for ${records.length}/${urls.length} URL(s)`);
1317
+ if (failures.length > 0) {
1318
+ logger.warn(`${failures.length} failed:`);
1319
+ for (const f of failures.slice(0, 5)) console.log(` ${f.url}: ${f.error}`);
1320
+ if (failures.length > 5) console.log(` ... and ${failures.length - 5} more`);
1321
+ }
723
1322
  }
724
- dataTypes = selected;
725
- } else dataTypes = ["pages", "keywords"];
726
- const rowLimit = Number.parseInt(String(args.limit), 10);
727
- const format = String(args.format);
728
- const output = {
729
- siteUrl,
730
- dateRange: {
731
- start: startDate,
732
- end: endDate
1323
+ if (failures.length > 0) process.exit(1);
1324
+ }
1325
+ }) }
1326
+ });
1327
+ const entitiesCommand = defineCommand({
1328
+ meta: {
1329
+ name: "entities",
1330
+ description: "Manage local entity snapshots (URL inspections, sitemaps, indexing metadata)"
1331
+ },
1332
+ subCommands: {
1333
+ inspect: inspectSubCommand,
1334
+ show: showSubCommand,
1335
+ sitemaps: defineCommand({
1336
+ meta: {
1337
+ name: "sitemaps",
1338
+ description: "Snapshot and inspect sitemap state per site"
733
1339
  },
734
- exportedAt: (/* @__PURE__ */ new Date()).toISOString()
735
- };
736
- const totalSteps = dataTypes.length;
737
- let currentStep = 0;
738
- for (const dataType of dataTypes) {
739
- currentStep++;
740
- if (!args.quiet) {
741
- clearLine();
742
- process.stdout.write(progressBar(currentStep, totalSteps, dataType));
1340
+ subCommands: {
1341
+ snapshot: sitemapsSnapshotSubCommand,
1342
+ show: sitemapsShowSubCommand
743
1343
  }
744
- const dimensions = getDimensions(dataType);
745
- const builder = gsc.select(...dimensions).where(between(date, startDate, endDate)).limit(rowLimit);
746
- const rows = [];
747
- for await (const batch of client.query(siteUrl, builder)) rows.push(...batch);
748
- output[dataType] = {
749
- total: rows.length,
750
- data: rows
751
- };
752
- }
753
- if (!args.quiet) {
754
- clearLine();
755
- logger.success(`Exported ${dataTypes.join(", ")} for ${siteUrl}`);
756
- }
757
- const content = format === "csv" ? exportToCSV(output) : JSON.stringify(output, null, 2);
758
- if (args.output) {
759
- await fs.writeFile(String(args.output), content);
760
- if (!args.quiet) logger.info(`Written to ${args.output}`);
761
- } else console.log(content);
1344
+ }),
1345
+ indexing: indexingSubCommand
762
1346
  }
763
1347
  });
764
-
765
- //#endregion
766
- //#region src/commands/init.ts
1348
+ const ENV_LINE_RE = /^([^=]+)=(.*)$/;
1349
+ async function promptDataDir(existing) {
1350
+ const fallback = existing ?? defaultDataDir();
1351
+ const answer = await text({
1352
+ message: "Where should Parquet data be stored?",
1353
+ placeholder: fallback,
1354
+ defaultValue: fallback
1355
+ });
1356
+ if (isCancel(answer)) process.exit(1);
1357
+ return String(answer) || fallback;
1358
+ }
767
1359
  async function loadEnvFile() {
768
1360
  const envPath = path.join(process.cwd(), ".env");
769
1361
  const content = await fs.readFile(envPath, "utf-8").catch(() => null);
@@ -772,7 +1364,7 @@ async function loadEnvFile() {
772
1364
  for (const line of content.split("\n")) {
773
1365
  const trimmed = line.trim();
774
1366
  if (!trimmed || trimmed.startsWith("#")) continue;
775
- const match = trimmed.match(/^([^=]+)=(.*)$/);
1367
+ const match = trimmed.match(ENV_LINE_RE);
776
1368
  if (match) {
777
1369
  const key = match[1].trim();
778
1370
  let value = match[2].trim();
@@ -785,7 +1377,7 @@ async function loadEnvFile() {
785
1377
  const initCommand = defineCommand({
786
1378
  meta: {
787
1379
  name: "init",
788
- description: "Set up GSCDump (choose cloud or local mode)"
1380
+ description: "Set up GSCDump authentication"
789
1381
  },
790
1382
  args: { force: {
791
1383
  type: "boolean",
@@ -794,8 +1386,8 @@ const initCommand = defineCommand({
794
1386
  } },
795
1387
  async run({ args }) {
796
1388
  const config = await loadConfig();
797
- if (config.mode && !args.force) {
798
- logger.info(`Already configured in ${config.mode} mode`);
1389
+ if (config.clientId && config.clientSecret && !args.force) {
1390
+ logger.info("Already configured");
799
1391
  logger.info("Run with --force to reconfigure");
800
1392
  return;
801
1393
  }
@@ -808,9 +1400,9 @@ const initCommand = defineCommand({
808
1400
  if (envFile.GOOGLE_ACCESS_TOKEN) process.env.GOOGLE_ACCESS_TOKEN = envFile.GOOGLE_ACCESS_TOKEN;
809
1401
  await saveConfig({
810
1402
  ...config,
811
- mode: "local",
812
1403
  clientId: envFile.GOOGLE_CLIENT_ID,
813
- clientSecret: envFile.GOOGLE_CLIENT_SECRET
1404
+ clientSecret: envFile.GOOGLE_CLIENT_SECRET,
1405
+ dataDir: config.dataDir ?? defaultDataDir()
814
1406
  });
815
1407
  const creds = (await authenticate({
816
1408
  clientId: envFile.GOOGLE_CLIENT_ID,
@@ -829,422 +1421,79 @@ const initCommand = defineCommand({
829
1421
  console.log(" \x1B[1mWelcome to GSCDump!\x1B[0m");
830
1422
  console.log(" \x1B[90mGoogle Search Console data extraction CLI\x1B[0m");
831
1423
  console.log();
832
- const mode = await select({
833
- message: "Choose your setup mode:",
834
- options: [{
835
- value: "cloud",
836
- label: "Cloud (Recommended)",
837
- hint: "Easy setup via cloud.gscdump.com - no API keys needed"
838
- }, {
839
- value: "local",
840
- label: "Local",
841
- hint: "Use your own Google OAuth credentials"
842
- }]
1424
+ const dataDir = await promptDataDir(config.dataDir);
1425
+ const credentials = await getAuthCredentials(true);
1426
+ await saveConfig({
1427
+ ...config,
1428
+ dataDir,
1429
+ clientId: credentials.clientId,
1430
+ clientSecret: credentials.clientSecret
843
1431
  });
844
- if (isCancel(mode)) process.exit(1);
845
- if (mode === "cloud") {
846
- const cloudUrl = config.cloudUrl || DEFAULT_CLOUD_URL;
847
- await saveConfig({
848
- ...config,
849
- mode: "cloud",
850
- cloudUrl
851
- });
852
- await authenticateCloud(cloudUrl, true);
853
- } else {
854
- await saveConfig({
855
- ...config,
856
- mode: "local"
857
- });
858
- await authenticate(await getAuthCredentials(true), true);
859
- }
1432
+ await authenticate(credentials, true);
860
1433
  console.log();
861
1434
  logger.success("Setup complete! Run gscdump to get started.");
862
1435
  }
863
1436
  });
864
-
865
- //#endregion
866
- //#region src/mcp/handlers/analytics.ts
867
- async function collectRows(ctx, siteUrl, builder) {
868
- const rows = [];
869
- for await (const batch of ctx.client.query(siteUrl, builder)) rows.push(...batch);
870
- return rows;
871
- }
872
- async function fetchPages(input, ctx) {
873
- const builder = gsc.select(page, date).where(between(date, input.period.start, input.period.end)).limit(25e3);
874
- const rows = await collectRows(ctx, input.siteUrl, builder);
875
- return {
876
- total: rows.length,
877
- data: rows
878
- };
879
- }
880
- async function fetchKeywords(input, ctx) {
881
- const builder = gsc.select(query, date).where(between(date, input.period.start, input.period.end)).limit(25e3);
882
- const rows = await collectRows(ctx, input.siteUrl, builder);
883
- return {
884
- total: rows.length,
885
- data: rows
886
- };
887
- }
888
- async function fetchCountries(input, ctx) {
889
- const builder = gsc.select(country, date).where(between(date, input.period.start, input.period.end)).limit(25e3);
890
- const rows = await collectRows(ctx, input.siteUrl, builder);
891
- return {
892
- total: rows.length,
893
- data: rows
894
- };
895
- }
896
- async function fetchDevices(input, ctx) {
897
- const builder = gsc.select(device, date).where(between(date, input.period.start, input.period.end)).limit(25e3);
898
- const rows = await collectRows(ctx, input.siteUrl, builder);
899
- return {
900
- total: rows.length,
901
- data: rows
902
- };
903
- }
904
-
905
- //#endregion
906
- //#region src/mcp/handlers/indexing.ts
907
- async function inspectUrl$1(input, ctx) {
908
- return inspectUrl(ctx.client, input.siteUrl, input.inspectionUrl);
909
- }
910
- async function requestIndexing$1(input, ctx) {
911
- return requestIndexing(ctx.client, input.url, { type: input.type || "URL_UPDATED" }).catch((e) => ({
912
- url: input.url,
913
- type: input.type || "URL_UPDATED",
914
- error: e.message
915
- }));
916
- }
917
- async function getIndexingStatus(input, ctx) {
918
- return getIndexingMetadata(ctx.client, input.url).catch((e) => ({
919
- url: input.url,
920
- error: e.message
921
- }));
922
- }
923
- async function batchRequestIndexing$1(input, ctx) {
924
- const results = await batchRequestIndexing(ctx.client, input.urls, {
925
- type: input.type || "URL_UPDATED",
926
- delayMs: input.delayMs || 100
927
- });
928
- return {
929
- results,
930
- success: results.length,
931
- failed: 0
932
- };
933
- }
934
- async function batchInspectUrls$1(input, ctx) {
935
- const results = await batchInspectUrls(ctx.client, input.siteUrl, input.urls, { delayMs: input.delayMs || 200 });
936
- return {
937
- results,
938
- indexed: results.filter((r) => r.isIndexed).length,
939
- notIndexed: results.filter((r) => !r.isIndexed).length
940
- };
941
- }
942
-
943
- //#endregion
944
- //#region src/mcp/handlers/query.ts
945
- const DIMENSION_MAP$1 = {
946
- page,
947
- query,
948
- date,
949
- country,
950
- device,
951
- searchAppearance
952
- };
953
- async function customQuery(input, ctx) {
954
- const dimensions = input.dimensions.filter((d) => d in DIMENSION_MAP$1).map((d) => DIMENSION_MAP$1[d]);
955
- if (dimensions.length === 0) throw new Error("At least one valid dimension required");
956
- const builder = gsc.select(...dimensions).where(between(date, input.period.start, input.period.end)).limit(input.rowLimit || 25e3);
957
- const rows = [];
958
- for await (const batch of ctx.client.query(input.siteUrl, builder)) rows.push(...batch);
959
- return {
960
- total: rows.length,
961
- data: rows
962
- };
963
- }
964
-
965
- //#endregion
966
- //#region src/mcp/handlers/sites.ts
967
- async function listSites(_input, ctx) {
968
- return fetchSites(ctx.client);
969
- }
970
- async function listSitesWithSitemaps(_input, ctx) {
971
- return fetchSitesWithSitemaps(ctx.client);
972
- }
973
- async function listSitemaps(input, ctx) {
974
- return fetchSitemaps(ctx.client, input.siteUrl);
975
- }
976
- async function getSitemap(input, ctx) {
977
- return fetchSitemap(ctx.client, input.siteUrl, input.feedpath);
978
- }
979
- async function submitSitemap$1(input, ctx) {
980
- await submitSitemap(ctx.client, input.siteUrl, input.feedpath);
981
- return { success: true };
982
- }
983
- async function deleteSitemap$1(input, ctx) {
984
- await deleteSitemap(ctx.client, input.siteUrl, input.feedpath);
985
- return { success: true };
986
- }
987
-
988
- //#endregion
989
- //#region src/mcp/types.ts
990
- const periodSchema = z.object({
991
- start: z.string().describe("Start date (YYYY-MM-DD)"),
992
- end: z.string().describe("End date (YYYY-MM-DD)")
993
- }).describe("Date range for the query");
994
- const siteUrlSchema = z.string().describe("GSC property URL (e.g., sc-domain:example.com or https://example.com/)");
995
- const queryOptionsSchema = z.object({
996
- type: z.enum([
997
- "web",
998
- "image",
999
- "video",
1000
- "news",
1001
- "discover",
1002
- "googleNews"
1003
- ]).optional().describe("Data type"),
1004
- dataState: z.enum(["final", "all"]).optional().describe("Data state: final (settled) or all (includes fresh)"),
1005
- aggregationType: z.enum(["byPage", "byProperty"]).optional().describe("Aggregation: byPage or byProperty")
1006
- }).optional();
1007
- const listSitesInput = z.object({});
1008
- const listSitemapsInput = z.object({ siteUrl: siteUrlSchema });
1009
- const fetchAnalyticsInput = z.object({
1010
- siteUrl: siteUrlSchema,
1011
- period: periodSchema,
1012
- comparePrevious: z.boolean().optional().describe("Include previous period comparison"),
1013
- options: queryOptionsSchema
1014
- });
1015
- const fetchPageInput = z.object({
1016
- siteUrl: siteUrlSchema,
1017
- period: periodSchema,
1018
- url: z.string().describe("Page URL to fetch details for")
1019
- });
1020
- const fetchKeywordInput = z.object({
1021
- siteUrl: siteUrlSchema,
1022
- period: periodSchema,
1023
- keyword: z.string().describe("Keyword to fetch details for")
1024
- });
1025
- const inspectUrlInput = z.object({
1026
- siteUrl: siteUrlSchema,
1027
- inspectionUrl: z.string().describe("URL to inspect")
1028
- });
1029
- const requestIndexingInput = z.object({
1030
- url: z.string().describe("URL to request indexing for"),
1031
- type: z.enum(["URL_UPDATED", "URL_DELETED"]).optional().describe("Notification type")
1032
- });
1033
- const getIndexingStatusInput = z.object({ url: z.string().describe("URL to get indexing status for") });
1034
- const customQueryInput = z.object({
1035
- siteUrl: siteUrlSchema,
1036
- period: periodSchema,
1037
- dimensions: z.array(z.enum([
1038
- "date",
1039
- "query",
1040
- "page",
1041
- "country",
1042
- "device",
1043
- "searchAppearance"
1044
- ])).describe("Dimensions to group by"),
1045
- rowLimit: z.number().optional().describe("Max rows (default 25000)"),
1046
- options: queryOptionsSchema
1047
- });
1048
- const sitemapInput = z.object({
1049
- siteUrl: siteUrlSchema,
1050
- feedpath: z.string().describe("Sitemap URL (e.g., https://example.com/sitemap.xml)")
1051
- });
1052
- const batchRequestIndexingInput = z.object({
1053
- urls: z.array(z.string()).describe("URLs to request indexing for"),
1054
- type: z.enum(["URL_UPDATED", "URL_DELETED"]).optional().describe("Notification type"),
1055
- delayMs: z.number().optional().describe("Delay between requests in ms (default 100)")
1056
- });
1057
- const batchInspectUrlsInput = z.object({
1058
- siteUrl: siteUrlSchema,
1059
- urls: z.array(z.string()).describe("URLs to inspect"),
1060
- delayMs: z.number().optional().describe("Delay between requests in ms (default 200)")
1437
+ const inspectCommand = defineCommand({
1438
+ meta: {
1439
+ name: "inspect",
1440
+ description: "Inspect a specific URL's indexing status"
1441
+ },
1442
+ args: {
1443
+ site: {
1444
+ type: "string",
1445
+ alias: "s",
1446
+ required: true,
1447
+ description: "Site URL (e.g., sc-domain:example.com)"
1448
+ },
1449
+ url: {
1450
+ type: "positional",
1451
+ required: true,
1452
+ description: "URL to inspect"
1453
+ },
1454
+ json: {
1455
+ type: "boolean",
1456
+ default: false,
1457
+ description: "Output as JSON"
1458
+ }
1459
+ },
1460
+ async run({ args }) {
1461
+ const result = await (await createCommandContext({ needsAuth: true })).client.inspect(args.site, args.url).catch((e) => {
1462
+ logger.error(`Inspection failed: ${e.message}`);
1463
+ process.exit(1);
1464
+ });
1465
+ const indexStatus = (result?.inspectionResult)?.indexStatusResult;
1466
+ if (args.json) {
1467
+ console.log(JSON.stringify({
1468
+ url: args.url,
1469
+ verdict: indexStatus?.verdict || null,
1470
+ coverageState: indexStatus?.coverageState || null,
1471
+ indexingState: indexStatus?.indexingState || null,
1472
+ lastCrawlTime: indexStatus?.lastCrawlTime || null,
1473
+ isIndexed: indexStatus?.verdict === "PASS",
1474
+ raw: result
1475
+ }, null, 2));
1476
+ return;
1477
+ }
1478
+ console.log();
1479
+ console.log(` \x1B[1mURL:\x1B[0m ${args.url}`);
1480
+ console.log();
1481
+ const verdictColor = indexStatus?.verdict === "PASS" ? "\x1B[32m" : "\x1B[31m";
1482
+ console.log(` Verdict: ${verdictColor}${indexStatus?.verdict || "N/A"}\x1B[0m`);
1483
+ if (indexStatus?.coverageState) console.log(` Coverage: ${indexStatus.coverageState}`);
1484
+ if (indexStatus?.indexingState) console.log(` Indexing: ${indexStatus.indexingState}`);
1485
+ if (indexStatus?.lastCrawlTime) console.log(` Last Crawl: ${indexStatus.lastCrawlTime}`);
1486
+ if (indexStatus?.robotsTxtState) console.log(` Robots.txt: ${indexStatus.robotsTxtState}`);
1487
+ if (indexStatus?.pageFetchState) console.log(` Page Fetch: ${indexStatus.pageFetchState}`);
1488
+ if (indexStatus?.googleCanonical) console.log(` Google Canon: ${indexStatus.googleCanonical}`);
1489
+ if (indexStatus?.userCanonical) console.log(` User Canon: ${indexStatus.userCanonical}`);
1490
+ console.log();
1491
+ }
1061
1492
  });
1062
-
1063
- //#endregion
1064
- //#region src/mcp/server/index.ts
1065
- function createGscMcpServer(options) {
1066
- const { name = "gscdump", version = "1.0.0", getAuth: getAuth$1 } = options;
1067
- const server = new McpServer({
1068
- name,
1069
- version
1070
- });
1071
- const auth = async () => Promise.resolve(getAuth$1());
1072
- const getContext = async () => {
1073
- const a = await auth();
1074
- return {
1075
- auth: a,
1076
- client: googleSearchConsole(a)
1077
- };
1078
- };
1079
- server.registerTool("list-sites", {
1080
- description: "List all Google Search Console sites the user has access to",
1081
- inputSchema: listSitesInput.shape
1082
- }, async (args) => {
1083
- const result = await listSites(args, await getContext());
1084
- return { content: [{
1085
- type: "text",
1086
- text: JSON.stringify(result, null, 2)
1087
- }] };
1088
- });
1089
- server.registerTool("list-sites-with-sitemaps", {
1090
- description: "List all GSC sites with their sitemaps",
1091
- inputSchema: listSitesInput.shape
1092
- }, async (args) => {
1093
- const result = await listSitesWithSitemaps(args, await getContext());
1094
- return { content: [{
1095
- type: "text",
1096
- text: JSON.stringify(result, null, 2)
1097
- }] };
1098
- });
1099
- server.registerTool("list-sitemaps", {
1100
- description: "List sitemaps for a specific site",
1101
- inputSchema: listSitemapsInput.shape
1102
- }, async (args) => {
1103
- const result = await listSitemaps(args, await getContext());
1104
- return { content: [{
1105
- type: "text",
1106
- text: JSON.stringify(result, null, 2)
1107
- }] };
1108
- });
1109
- server.registerTool("get-sitemap", {
1110
- description: "Get details for a specific sitemap",
1111
- inputSchema: sitemapInput.shape
1112
- }, async (args) => {
1113
- const result = await getSitemap(args, await getContext());
1114
- return { content: [{
1115
- type: "text",
1116
- text: JSON.stringify(result, null, 2)
1117
- }] };
1118
- });
1119
- server.registerTool("submit-sitemap", {
1120
- description: "Submit a sitemap to Google Search Console",
1121
- inputSchema: sitemapInput.shape
1122
- }, async (args) => {
1123
- const result = await submitSitemap$1(args, await getContext());
1124
- return { content: [{
1125
- type: "text",
1126
- text: JSON.stringify(result, null, 2)
1127
- }] };
1128
- });
1129
- server.registerTool("delete-sitemap", {
1130
- description: "Delete a sitemap from Google Search Console",
1131
- inputSchema: sitemapInput.shape
1132
- }, async (args) => {
1133
- const result = await deleteSitemap$1(args, await getContext());
1134
- return { content: [{
1135
- type: "text",
1136
- text: JSON.stringify(result, null, 2)
1137
- }] };
1138
- });
1139
- server.registerTool("fetch-pages", {
1140
- description: "Fetch page analytics data for a site",
1141
- inputSchema: fetchAnalyticsInput.shape
1142
- }, async (args) => {
1143
- const result = await fetchPages(args, await getContext());
1144
- return { content: [{
1145
- type: "text",
1146
- text: JSON.stringify(result, null, 2)
1147
- }] };
1148
- });
1149
- server.registerTool("fetch-keywords", {
1150
- description: "Fetch keyword/query analytics data for a site",
1151
- inputSchema: fetchAnalyticsInput.shape
1152
- }, async (args) => {
1153
- const result = await fetchKeywords(args, await getContext());
1154
- return { content: [{
1155
- type: "text",
1156
- text: JSON.stringify(result, null, 2)
1157
- }] };
1158
- });
1159
- server.registerTool("fetch-countries", {
1160
- description: "Fetch country analytics data for a site",
1161
- inputSchema: fetchAnalyticsInput.shape
1162
- }, async (args) => {
1163
- const result = await fetchCountries(args, await getContext());
1164
- return { content: [{
1165
- type: "text",
1166
- text: JSON.stringify(result, null, 2)
1167
- }] };
1168
- });
1169
- server.registerTool("fetch-devices", {
1170
- description: "Fetch device analytics data for a site",
1171
- inputSchema: fetchAnalyticsInput.shape
1172
- }, async (args) => {
1173
- const result = await fetchDevices(args, await getContext());
1174
- return { content: [{
1175
- type: "text",
1176
- text: JSON.stringify(result, null, 2)
1177
- }] };
1178
- });
1179
- server.registerTool("custom-query", {
1180
- description: "Run a custom search analytics query with specified dimensions",
1181
- inputSchema: customQueryInput.shape
1182
- }, async (args) => {
1183
- const result = await customQuery(args, await getContext());
1184
- return { content: [{
1185
- type: "text",
1186
- text: JSON.stringify(result, null, 2)
1187
- }] };
1188
- });
1189
- server.registerTool("inspect-url", {
1190
- description: "Inspect a URL in Google Search Console to check its indexing status",
1191
- inputSchema: inspectUrlInput.shape
1192
- }, async (args) => {
1193
- const result = await inspectUrl$1(args, await getContext());
1194
- return { content: [{
1195
- type: "text",
1196
- text: JSON.stringify(result, null, 2)
1197
- }] };
1198
- });
1199
- server.registerTool("request-indexing", {
1200
- description: "Request Google to index or remove a URL via the Indexing API",
1201
- inputSchema: requestIndexingInput.shape
1202
- }, async (args) => {
1203
- const result = await requestIndexing$1(args, await getContext());
1204
- return { content: [{
1205
- type: "text",
1206
- text: JSON.stringify(result, null, 2)
1207
- }] };
1208
- });
1209
- server.registerTool("get-indexing-status", {
1210
- description: "Get indexing status metadata for a URL",
1211
- inputSchema: getIndexingStatusInput.shape
1212
- }, async (args) => {
1213
- const result = await getIndexingStatus(args, await getContext());
1214
- return { content: [{
1215
- type: "text",
1216
- text: JSON.stringify(result, null, 2)
1217
- }] };
1218
- });
1219
- server.registerTool("batch-request-indexing", {
1220
- description: "Batch request indexing for multiple URLs with rate limiting",
1221
- inputSchema: batchRequestIndexingInput.shape
1222
- }, async (args) => {
1223
- const result = await batchRequestIndexing$1(args, await getContext());
1224
- return { content: [{
1225
- type: "text",
1226
- text: JSON.stringify(result, null, 2)
1227
- }] };
1228
- });
1229
- server.registerTool("batch-inspect-urls", {
1230
- description: "Batch inspect multiple URLs to check their indexing status",
1231
- inputSchema: batchInspectUrlsInput.shape
1232
- }, async (args) => {
1233
- const result = await batchInspectUrls$1(args, await getContext());
1234
- return { content: [{
1235
- type: "text",
1236
- text: JSON.stringify(result, null, 2)
1237
- }] };
1238
- });
1239
- return server;
1240
- }
1241
-
1242
- //#endregion
1243
- //#region src/commands/mcp.ts
1244
1493
  async function checkAuth() {
1245
1494
  if ((process.env.GOOGLE_ACCESS_TOKEN || process.env.GOOGLE_REFRESH_TOKEN) && process.env.GOOGLE_CLIENT_ID && process.env.GOOGLE_CLIENT_SECRET) return { ok: true };
1246
1495
  const config = await loadConfig();
1247
- if (!config.mode) return {
1496
+ if (!config.clientId && !config.clientSecret) return {
1248
1497
  ok: false,
1249
1498
  error: `GSCDump not configured.
1250
1499
 
@@ -1256,20 +1505,9 @@ Or provide env vars: GOOGLE_CLIENT_ID, GOOGLE_CLIENT_SECRET, GOOGLE_ACCESS_TOKEN
1256
1505
 
1257
1506
  Then restart your MCP client.`
1258
1507
  };
1259
- if (config.mode === "cloud") {
1260
- if (!await loadCloudTokens()) return {
1261
- ok: false,
1262
- error: `Cloud authentication expired or missing.
1263
-
1264
- Run this command to re-authenticate:
1265
-
1266
- npx @gscdump/cli init
1267
-
1268
- Then restart your MCP client.`
1269
- };
1270
- } else if (!await loadTokens()) return {
1271
- ok: false,
1272
- error: `Local authentication missing.
1508
+ if (!await loadTokens()) return {
1509
+ ok: false,
1510
+ error: `Authentication missing.
1273
1511
 
1274
1512
  Run this command to authenticate:
1275
1513
 
@@ -1299,10 +1537,15 @@ const mcpCommand = defineCommand({
1299
1537
  await server.connect(transport);
1300
1538
  }
1301
1539
  });
1302
-
1303
- //#endregion
1304
- //#region src/commands/query.ts
1305
- const DIMENSION_MAP = {
1540
+ const DIMENSIONS = [
1541
+ "page",
1542
+ "query",
1543
+ "date",
1544
+ "country",
1545
+ "device",
1546
+ "searchAppearance"
1547
+ ];
1548
+ const DIM_COLUMNS = {
1306
1549
  page,
1307
1550
  query,
1308
1551
  date,
@@ -1310,10 +1553,38 @@ const DIMENSION_MAP = {
1310
1553
  device,
1311
1554
  searchAppearance
1312
1555
  };
1556
+ async function runLiveQuery(client, siteUrl, opts) {
1557
+ const allRows = [];
1558
+ let startRow = 0;
1559
+ while (true) {
1560
+ const rows = ((await client._rawQuery(siteUrl, {
1561
+ startDate: opts.startDate,
1562
+ endDate: opts.endDate,
1563
+ dimensions: opts.dimensions,
1564
+ rowLimit: opts.rowLimit,
1565
+ startRow
1566
+ })).rows || []).map((row) => {
1567
+ const result = {
1568
+ clicks: row.clicks ?? 0,
1569
+ impressions: row.impressions ?? 0,
1570
+ ctr: row.ctr ?? 0,
1571
+ position: row.position ?? 0
1572
+ };
1573
+ opts.dimensions.forEach((dim, i) => {
1574
+ result[dim] = row.keys?.[i];
1575
+ });
1576
+ return result;
1577
+ });
1578
+ allRows.push(...rows);
1579
+ if (rows.length < opts.rowLimit) break;
1580
+ startRow += rows.length;
1581
+ }
1582
+ return { rows: allRows };
1583
+ }
1313
1584
  const queryCommand = defineCommand({
1314
1585
  meta: {
1315
1586
  name: "query",
1316
- description: "Run custom search analytics queries"
1587
+ description: "Run a search analytics query (local Parquet by default, --live hits GSC API)"
1317
1588
  },
1318
1589
  args: {
1319
1590
  site: {
@@ -1324,7 +1595,7 @@ const queryCommand = defineCommand({
1324
1595
  dimensions: {
1325
1596
  type: "string",
1326
1597
  alias: "d",
1327
- description: "Dimensions: page,query,date,country,device,searchAppearance"
1598
+ description: `Dimensions: ${DIMENSIONS.join(",")}`
1328
1599
  },
1329
1600
  start: {
1330
1601
  type: "string",
@@ -1351,6 +1622,19 @@ const queryCommand = defineCommand({
1351
1622
  default: "json",
1352
1623
  description: "Output format: json or csv"
1353
1624
  },
1625
+ sql: {
1626
+ type: "string",
1627
+ description: "Raw DuckDB SQL using {{FILES}} as the file list placeholder (bypasses builder)"
1628
+ },
1629
+ table: {
1630
+ type: "string",
1631
+ description: "Analytics table for --sql (default: pages)"
1632
+ },
1633
+ live: {
1634
+ type: "boolean",
1635
+ default: false,
1636
+ description: "Bypass local store; hit the GSC API directly"
1637
+ },
1354
1638
  quiet: {
1355
1639
  type: "boolean",
1356
1640
  alias: "q",
@@ -1365,291 +1649,1150 @@ const queryCommand = defineCommand({
1365
1649
  }
1366
1650
  },
1367
1651
  async run({ args }) {
1368
- const config = await loadConfig();
1369
- const { getAuth: getAuth$1 } = await Promise.resolve().then(() => auth_exports);
1370
- const client = googleSearchConsole(await getAuth$1({
1371
- interactive: false,
1372
- config
1373
- }));
1374
- let siteUrl = String(args.site || config.defaultSite || "");
1375
- if (!siteUrl || args.interactive) {
1376
- const verified = (await client.sites()).filter((s) => s.permissionLevel !== "siteUnverifiedUser");
1377
- if (verified.length === 0) {
1378
- logger.error("No verified sites found");
1652
+ if (args.sql) {
1653
+ await runRawSqlMode({
1654
+ sql: String(args.sql),
1655
+ site: args.site ? String(args.site) : void 0,
1656
+ table: args.table ? String(args.table) : "pages",
1657
+ output: args.output ? String(args.output) : void 0,
1658
+ quiet: Boolean(args.quiet)
1659
+ });
1660
+ return;
1661
+ }
1662
+ const dimNames = await resolveDimensions(args);
1663
+ const { startDate, endDate } = await resolveRange(args);
1664
+ const rowLimit = Number.parseInt(String(args.limit), 10);
1665
+ const format = String(args.format);
1666
+ const ctx = await createCommandContext({
1667
+ needsAuth: true,
1668
+ needsStore: !args.live,
1669
+ interactive: Boolean(args.interactive)
1670
+ });
1671
+ const siteUrl = await ctx.resolveSite(args.site ? String(args.site) : void 0);
1672
+ if (args.live) {
1673
+ if (!args.quiet) logger.info(`Querying ${siteUrl} via live GSC API...`);
1674
+ const result = await runLiveQuery(ctx.client, siteUrl, {
1675
+ startDate,
1676
+ endDate,
1677
+ dimensions: dimNames,
1678
+ rowLimit
1679
+ }).catch((e) => {
1680
+ logger.error(`Query failed: ${e.message}`);
1379
1681
  process.exit(1);
1380
- }
1381
- const selected = await select({
1382
- message: "Select a site",
1383
- options: verified.map((s) => ({
1384
- value: s.siteUrl,
1385
- label: s.siteUrl
1386
- })),
1387
- initialValue: siteUrl || verified[0]?.siteUrl
1388
1682
  });
1389
- if (isCancel(selected)) {
1390
- cancel("Cancelled");
1391
- process.exit(0);
1392
- }
1393
- siteUrl = selected;
1394
- }
1395
- let dimensions;
1396
- if (args.dimensions) dimensions = String(args.dimensions).split(",").filter((d) => d in DIMENSION_MAP).map((d) => DIMENSION_MAP[d]);
1397
- else if (args.interactive) {
1398
- const selected = await multiselect({
1399
- message: "Select dimensions",
1400
- options: Object.keys(DIMENSION_MAP).map((d) => ({
1401
- value: d,
1402
- label: d
1403
- })),
1404
- initialValues: ["page", "query"]
1683
+ await writeOutput({
1684
+ output: {
1685
+ siteUrl,
1686
+ dimensions: dimNames,
1687
+ dateRange: {
1688
+ start: startDate,
1689
+ end: endDate
1690
+ },
1691
+ total: result.rows.length,
1692
+ data: result.rows
1693
+ },
1694
+ format,
1695
+ path: args.output ? String(args.output) : void 0,
1696
+ quiet: Boolean(args.quiet)
1405
1697
  });
1406
- if (isCancel(selected)) {
1407
- cancel("Cancelled");
1408
- process.exit(0);
1698
+ return;
1699
+ }
1700
+ if (!args.quiet) logger.info(`Querying ${siteUrl} from local Parquet store...`);
1701
+ const state = buildLocalState(dimNames, startDate, endDate, rowLimit);
1702
+ const store = ctx.store;
1703
+ const table = inferTable(dimNames);
1704
+ await assertRangeCovered(store, siteUrl, table, startDate, endDate);
1705
+ const result = await store.engine.query({
1706
+ userId: store.userId,
1707
+ siteId: store.siteIdFor(siteUrl),
1708
+ table
1709
+ }, state).catch((e) => {
1710
+ logger.error(`Query failed: ${e.message}`);
1711
+ process.exit(1);
1712
+ });
1713
+ await writeOutput({
1714
+ output: {
1715
+ siteUrl,
1716
+ dimensions: dimNames,
1717
+ dateRange: {
1718
+ start: startDate,
1719
+ end: endDate
1720
+ },
1721
+ total: result.rows.length,
1722
+ data: result.rows
1723
+ },
1724
+ format,
1725
+ path: args.output ? String(args.output) : void 0,
1726
+ quiet: Boolean(args.quiet)
1727
+ });
1728
+ }
1729
+ });
1730
+ async function resolveDimensions(args) {
1731
+ if (args.dimensions) return String(args.dimensions).split(",").filter((d) => DIMENSIONS.includes(d));
1732
+ if (args.interactive) {
1733
+ const selected = await multiselect({
1734
+ message: "Select dimensions",
1735
+ options: DIMENSIONS.map((d) => ({
1736
+ value: d,
1737
+ label: d
1738
+ })),
1739
+ initialValues: ["page", "query"]
1740
+ });
1741
+ if (isCancel(selected)) {
1742
+ cancel("Cancelled");
1743
+ process.exit(0);
1744
+ }
1745
+ return selected;
1746
+ }
1747
+ return ["page", "query"];
1748
+ }
1749
+ async function resolveRange(args) {
1750
+ if (args.start && args.end) return {
1751
+ startDate: String(args.start),
1752
+ endDate: String(args.end)
1753
+ };
1754
+ if (args.interactive) {
1755
+ const startInput = await text({
1756
+ message: "Start date (YYYY-MM-DD)",
1757
+ placeholder: daysAgo(28)
1758
+ });
1759
+ if (isCancel(startInput)) {
1760
+ cancel("Cancelled");
1761
+ process.exit(0);
1762
+ }
1763
+ const endInput = await text({
1764
+ message: "End date (YYYY-MM-DD)",
1765
+ placeholder: daysAgo(3)
1766
+ });
1767
+ if (isCancel(endInput)) {
1768
+ cancel("Cancelled");
1769
+ process.exit(0);
1770
+ }
1771
+ return {
1772
+ startDate: String(startInput) || daysAgo(28),
1773
+ endDate: String(endInput) || daysAgo(3)
1774
+ };
1775
+ }
1776
+ return {
1777
+ startDate: daysAgo(31),
1778
+ endDate: daysAgo(3)
1779
+ };
1780
+ }
1781
+ function buildLocalState(dimNames, startDate, endDate, rowLimit) {
1782
+ const dims = dimNames.map((d) => DIM_COLUMNS[d]).filter((c) => Boolean(c));
1783
+ return gsc.select(...dims).where(between(date, startDate, endDate)).limit(rowLimit).getState();
1784
+ }
1785
+ async function assertRangeCovered(store, siteUrl, table, startDate, endDate) {
1786
+ const wm = (await store.engine.getWatermarks({
1787
+ userId: store.userId,
1788
+ siteId: store.siteIdFor(siteUrl),
1789
+ table
1790
+ }))[0];
1791
+ if (!wm) {
1792
+ logger.error(`No data synced for ${siteUrl} / ${table}. Run \`gscdump sync\` first, or pass --live.`);
1793
+ process.exit(1);
1794
+ }
1795
+ if (endDate > wm.newestDateSynced) {
1796
+ logger.error(`Requested end=${endDate} is newer than last sync (${wm.newestDateSynced}). Run \`gscdump sync\` first, or pass --live.`);
1797
+ process.exit(1);
1798
+ }
1799
+ if (startDate < wm.oldestDateSynced) {
1800
+ logger.error(`Requested start=${startDate} is older than first sync (${wm.oldestDateSynced}). Run \`gscdump sync --start=${startDate}\` first, or pass --live.`);
1801
+ process.exit(1);
1802
+ }
1803
+ }
1804
+ async function runRawSqlMode(opts) {
1805
+ if (!isKnownTable$1(opts.table)) {
1806
+ logger.error(`Unknown table "${opts.table}". Known: ${allTables().join(", ")}`);
1807
+ process.exit(1);
1808
+ }
1809
+ const ctx = await createCommandContext({
1810
+ needsAuth: true,
1811
+ needsStore: true
1812
+ });
1813
+ const siteUrl = await ctx.resolveSite(opts.site);
1814
+ const store = ctx.store;
1815
+ if (!opts.quiet) logger.info(`Running raw SQL over table "${opts.table}" for ${siteUrl}`);
1816
+ const { rows, sql } = await store.runRawSql({
1817
+ sql: opts.sql,
1818
+ siteUrl,
1819
+ table: opts.table
1820
+ }).catch((e) => {
1821
+ logger.error(`SQL failed: ${e.message}`);
1822
+ process.exit(1);
1823
+ });
1824
+ const payload = JSON.stringify({
1825
+ sql,
1826
+ total: rows.length,
1827
+ data: rows
1828
+ }, null, 2);
1829
+ if (opts.output) {
1830
+ await fs.writeFile(opts.output, payload);
1831
+ if (!opts.quiet) logger.info(`Written to ${opts.output}`);
1832
+ } else console.log(payload);
1833
+ }
1834
+ async function writeOutput(opts) {
1835
+ const content = opts.format === "csv" ? exportToCSV(opts.output) : JSON.stringify(opts.output, null, 2);
1836
+ if (opts.path) {
1837
+ await fs.writeFile(opts.path, content);
1838
+ if (!opts.quiet) logger.info(`Written to ${opts.path}`);
1839
+ } else console.log(content);
1840
+ }
1841
+ function isKnownTable$1(name) {
1842
+ return allTables().includes(name);
1843
+ }
1844
+ function requireSite(target) {
1845
+ if (!target) {
1846
+ logger.error("Site URL required (-s)");
1847
+ process.exit(1);
1848
+ }
1849
+ return target;
1850
+ }
1851
+ const sitemapsCommand = defineCommand({
1852
+ meta: {
1853
+ name: "sitemaps",
1854
+ description: "Manage sitemaps"
1855
+ },
1856
+ subCommands: {
1857
+ list: defineCommand({
1858
+ meta: {
1859
+ name: "list",
1860
+ description: "List sitemaps for a site"
1861
+ },
1862
+ args: {
1863
+ site: {
1864
+ type: "string",
1865
+ alias: "s",
1866
+ description: "Site URL (e.g., sc-domain:example.com or https://example.com/)"
1867
+ },
1868
+ json: {
1869
+ type: "boolean",
1870
+ default: false,
1871
+ description: "Output as JSON"
1872
+ }
1873
+ },
1874
+ async run({ args }) {
1875
+ const config = await loadConfig();
1876
+ const siteUrl = requireSite(args.site || config.defaultSite);
1877
+ const sitemaps = (await (await createCommandContext({ needsAuth: true })).client.sitemaps.list(siteUrl).catch((e) => {
1878
+ logger.error(`Failed to fetch sitemaps: ${e.message}`);
1879
+ process.exit(1);
1880
+ })).map((sm) => ({
1881
+ path: sm.path,
1882
+ type: sm.type || void 0,
1883
+ isPending: sm.isPending || false,
1884
+ errors: Number(sm.errors) || 0,
1885
+ warnings: Number(sm.warnings) || 0,
1886
+ lastDownloaded: sm.lastDownloaded || null
1887
+ }));
1888
+ if (args.json) {
1889
+ console.log(JSON.stringify(sitemaps, null, 2));
1890
+ return;
1891
+ }
1892
+ if (sitemaps.length === 0) {
1893
+ logger.warn("No sitemaps found");
1894
+ return;
1895
+ }
1896
+ logger.success(`Found ${sitemaps.length} sitemaps:`);
1897
+ console.log();
1898
+ for (const sm of sitemaps) {
1899
+ const pending = sm.isPending ? " \x1B[33m(pending)\x1B[0m" : "";
1900
+ const errors = sm.errors ? ` \x1B[31m${sm.errors} errors\x1B[0m` : "";
1901
+ const warnings = sm.warnings ? ` \x1B[33m${sm.warnings} warnings\x1B[0m` : "";
1902
+ console.log(` ${sm.path}${pending}${errors}${warnings}`);
1903
+ }
1409
1904
  }
1410
- dimensions = selected.map((d) => DIMENSION_MAP[d]);
1411
- } else dimensions = [page, query];
1412
- let startDate;
1413
- let endDate;
1414
- if (args.start && args.end) {
1415
- startDate = String(args.start);
1416
- endDate = String(args.end);
1417
- } else if (args.interactive) {
1418
- const startInput = await text({
1419
- message: "Start date (YYYY-MM-DD)",
1420
- placeholder: (/* @__PURE__ */ new Date(Date.now() - 28 * 864e5)).toISOString().split("T")[0]
1421
- });
1422
- if (isCancel(startInput)) {
1423
- cancel("Cancelled");
1424
- process.exit(0);
1905
+ }),
1906
+ get: defineCommand({
1907
+ meta: {
1908
+ name: "get",
1909
+ description: "Get details for a specific sitemap"
1910
+ },
1911
+ args: {
1912
+ site: {
1913
+ type: "string",
1914
+ alias: "s",
1915
+ required: true,
1916
+ description: "Site URL"
1917
+ },
1918
+ url: {
1919
+ type: "positional",
1920
+ required: true,
1921
+ description: "Sitemap URL"
1922
+ },
1923
+ json: {
1924
+ type: "boolean",
1925
+ default: false,
1926
+ description: "Output as JSON"
1927
+ }
1928
+ },
1929
+ async run({ args }) {
1930
+ const client = (await createCommandContext({ needsAuth: true })).client;
1931
+ const sitemap = await fetchSitemap(client, args.site, args.url).catch(gscErrorHandler);
1932
+ if (args.json) {
1933
+ console.log(JSON.stringify(sitemap, null, 2));
1934
+ return;
1935
+ }
1936
+ console.log();
1937
+ console.log(` \x1B[1mPath:\x1B[0m ${sitemap.path}`);
1938
+ console.log(` \x1B[1mType:\x1B[0m ${sitemap.type || "sitemap"}`);
1939
+ console.log(` \x1B[1mLast Submitted:\x1B[0m ${sitemap.lastSubmitted || "N/A"}`);
1940
+ console.log(` \x1B[1mLast Downloaded:\x1B[0m ${sitemap.lastDownloaded || "N/A"}`);
1941
+ console.log(` \x1B[1mPending:\x1B[0m ${sitemap.isPending ? "Yes" : "No"}`);
1942
+ console.log(` \x1B[1mErrors:\x1B[0m ${sitemap.errors || 0}`);
1943
+ console.log(` \x1B[1mWarnings:\x1B[0m ${sitemap.warnings || 0}`);
1944
+ if (sitemap.contents?.length) {
1945
+ console.log();
1946
+ console.log(" \x1B[1mContents:\x1B[0m");
1947
+ for (const c of sitemap.contents) console.log(` ${c.type}: ${c.submitted} submitted, ${c.indexed} indexed`);
1948
+ }
1425
1949
  }
1426
- const endInput = await text({
1427
- message: "End date (YYYY-MM-DD)",
1428
- placeholder: (/* @__PURE__ */ new Date(Date.now() - 3 * 864e5)).toISOString().split("T")[0]
1429
- });
1430
- if (isCancel(endInput)) {
1431
- cancel("Cancelled");
1432
- process.exit(0);
1950
+ }),
1951
+ submit: defineCommand({
1952
+ meta: {
1953
+ name: "submit",
1954
+ description: "Submit a sitemap to GSC"
1955
+ },
1956
+ args: {
1957
+ site: {
1958
+ type: "string",
1959
+ alias: "s",
1960
+ required: true,
1961
+ description: "Site URL"
1962
+ },
1963
+ url: {
1964
+ type: "positional",
1965
+ required: true,
1966
+ description: "Sitemap URL to submit"
1967
+ }
1968
+ },
1969
+ async run({ args }) {
1970
+ await (await createCommandContext({ needsAuth: true })).client.sitemaps.submit(args.site, args.url).catch((e) => {
1971
+ logger.error(`Submit failed: ${e.message}`);
1972
+ process.exit(1);
1973
+ });
1974
+ logger.success(`Submitted sitemap: ${args.url}`);
1433
1975
  }
1434
- startDate = String(startInput) || (/* @__PURE__ */ new Date(Date.now() - 28 * 864e5)).toISOString().split("T")[0];
1435
- endDate = String(endInput) || (/* @__PURE__ */ new Date(Date.now() - 3 * 864e5)).toISOString().split("T")[0];
1436
- } else {
1437
- endDate = (/* @__PURE__ */ new Date(Date.now() - 3 * 864e5)).toISOString().split("T")[0];
1438
- startDate = (/* @__PURE__ */ new Date(Date.now() - 31 * 864e5)).toISOString().split("T")[0];
1439
- }
1440
- const rowLimit = Number.parseInt(String(args.limit), 10);
1441
- const format = String(args.format);
1442
- const builder = gsc.select(...dimensions).where(between(date, startDate, endDate)).limit(rowLimit);
1443
- if (!args.quiet) logger.info(`Querying ${siteUrl}...`);
1444
- const rows = [];
1445
- for await (const batch of client.query(siteUrl, builder)) {
1446
- rows.push(...batch);
1447
- if (!args.quiet) {
1448
- clearLine();
1449
- process.stdout.write(progressBar(rows.length, rowLimit, `${rows.length} rows`));
1976
+ }),
1977
+ delete: defineCommand({
1978
+ meta: {
1979
+ name: "delete",
1980
+ description: "Delete a sitemap from GSC"
1981
+ },
1982
+ args: {
1983
+ site: {
1984
+ type: "string",
1985
+ alias: "s",
1986
+ required: true,
1987
+ description: "Site URL"
1988
+ },
1989
+ url: {
1990
+ type: "positional",
1991
+ required: true,
1992
+ description: "Sitemap URL to delete"
1993
+ }
1994
+ },
1995
+ async run({ args }) {
1996
+ await (await createCommandContext({ needsAuth: true })).client.sitemaps.delete(args.site, args.url).catch((e) => {
1997
+ logger.error(`Delete failed: ${e.message}`);
1998
+ process.exit(1);
1999
+ });
2000
+ logger.success(`Deleted sitemap: ${args.url}`);
1450
2001
  }
2002
+ })
2003
+ }
2004
+ });
2005
+ const sitesCommand = defineCommand({
2006
+ meta: {
2007
+ name: "sites",
2008
+ description: "List available GSC sites"
2009
+ },
2010
+ args: { json: {
2011
+ type: "boolean",
2012
+ default: false,
2013
+ description: "Output as JSON for scripting"
2014
+ } },
2015
+ async run({ args }) {
2016
+ const sites = await (await createCommandContext({ needsAuth: true })).loadSites();
2017
+ if (args.json) {
2018
+ console.log(JSON.stringify(sites, null, 2));
2019
+ return;
2020
+ }
2021
+ if (sites.length === 0) {
2022
+ logger.warn("No verified sites found");
2023
+ return;
2024
+ }
2025
+ logger.success(`Found ${sites.length} sites:`);
2026
+ console.log();
2027
+ for (const site of sites) {
2028
+ const perm = site.permissionLevel === "siteOwner" ? "\x1B[32m" : "\x1B[90m";
2029
+ console.log(` ${site.siteUrl} ${perm}(${site.permissionLevel})\x1B[0m`);
1451
2030
  }
1452
- if (!args.quiet) {
1453
- clearLine();
1454
- logger.success(`Fetched ${rows.length} rows`);
1455
- }
1456
- const output = {
1457
- siteUrl,
1458
- dimensions: dimensions.map((d) => String(d)),
1459
- dateRange: {
1460
- start: startDate,
1461
- end: endDate
1462
- },
1463
- total: rows.length,
1464
- data: rows
1465
- };
1466
- const content = format === "csv" ? exportToCSV(output) : JSON.stringify(output, null, 2);
1467
- if (args.output) {
1468
- await fs.writeFile(String(args.output), content);
1469
- if (!args.quiet) logger.info(`Written to ${args.output}`);
1470
- } else console.log(content);
1471
2031
  }
1472
2032
  });
1473
-
1474
- //#endregion
1475
- //#region src/commands/sitemaps.ts
1476
- const listCommand = defineCommand({
2033
+ const compactCommand = defineCommand({
1477
2034
  meta: {
1478
- name: "list",
1479
- description: "List sitemaps for a site"
2035
+ name: "compact",
2036
+ description: "Run tiered compaction (raw→d7 at 7d, d7→d30 at 30d, d30→d90 at 90d)"
1480
2037
  },
1481
2038
  args: {
1482
- site: {
2039
+ "site": {
1483
2040
  type: "string",
1484
2041
  alias: "s",
1485
- required: true,
1486
- description: "Site URL (e.g., sc-domain:example.com or https://example.com/)"
2042
+ description: "Restrict to a single site (default: all sites with local data)"
1487
2043
  },
1488
- json: {
2044
+ "raw-days": {
2045
+ type: "string",
2046
+ description: "Override raw→d7 age threshold in days (default: 7)"
2047
+ },
2048
+ "d7-days": {
2049
+ type: "string",
2050
+ description: "Override d7→d30 age threshold in days (default: 30)"
2051
+ },
2052
+ "d30-days": {
2053
+ type: "string",
2054
+ description: "Override d30→d90 age threshold in days (default: 90)"
2055
+ },
2056
+ "quiet": {
1489
2057
  type: "boolean",
2058
+ alias: "q",
1490
2059
  default: false,
1491
- description: "Output as JSON"
2060
+ description: "Suppress progress output"
1492
2061
  }
1493
2062
  },
1494
2063
  async run({ args }) {
1495
- const sitemaps = await fetchSitemaps(googleSearchConsole(await getAuth({ interactive: false })), args.site).catch(gscErrorHandler);
1496
- if (args.json) {
1497
- console.log(JSON.stringify(sitemaps, null, 2));
1498
- return;
1499
- }
1500
- if (sitemaps.length === 0) {
1501
- logger.warn("No sitemaps found");
1502
- return;
1503
- }
1504
- logger.success(`Found ${sitemaps.length} sitemaps:`);
1505
- console.log();
1506
- for (const sm of sitemaps) {
1507
- const pending = sm.isPending ? " \x1B[33m(pending)\x1B[0m" : "";
1508
- const errors = sm.errors ? ` \x1B[31m${sm.errors} errors\x1B[0m` : "";
1509
- const warnings = sm.warnings ? ` \x1B[33m${sm.warnings} warnings\x1B[0m` : "";
1510
- console.log(` ${sm.path}${pending}${errors}${warnings}`);
2064
+ const store = (await createCommandContext({ needsStore: true })).store;
2065
+ const siteId = args.site ? store.siteIdFor(String(args.site)) : void 0;
2066
+ const quiet = Boolean(args.quiet);
2067
+ const thresholds = {};
2068
+ if (args["raw-days"]) thresholds.raw = Number(args["raw-days"]);
2069
+ if (args["d7-days"]) thresholds.d7 = Number(args["d7-days"]);
2070
+ if (args["d30-days"]) thresholds.d30 = Number(args["d30-days"]);
2071
+ for (const table of allTables()) {
2072
+ const entries = await store.engine.listLive({
2073
+ userId: store.userId,
2074
+ siteId,
2075
+ table
2076
+ });
2077
+ const siteIds = new Set(entries.map((e) => e.siteId));
2078
+ for (const targetSite of siteIds) {
2079
+ if (!quiet) logger.info(`Compacting ${table} [${targetSite ?? "-"}] (raw→d7→d30→d90)`);
2080
+ await store.engine.compactTiered({
2081
+ userId: store.userId,
2082
+ siteId: targetSite,
2083
+ table
2084
+ }, thresholds);
2085
+ }
1511
2086
  }
2087
+ if (!quiet) logger.success(`compact: done`);
1512
2088
  }
1513
2089
  });
1514
- const getCommand = defineCommand({
2090
+ async function exportToDuckDB(opts) {
2091
+ const outPath = path.resolve(opts.outPath);
2092
+ if (opts.force) await rm(outPath, { force: true });
2093
+ const instance = await DuckDBInstance.create(outPath);
2094
+ const conn = await instance.connect();
2095
+ const tables = [];
2096
+ try {
2097
+ for (const table of allTables()) {
2098
+ const entries = await opts.engine.listLive({
2099
+ userId: opts.userId,
2100
+ siteId: opts.siteId,
2101
+ table
2102
+ });
2103
+ if (entries.length === 0) continue;
2104
+ const fileList = entries.map((e) => path.join(opts.dataDir, e.objectKey)).map((p) => `'${sqlEscape(p)}'`).join(", ");
2105
+ await conn.run(`CREATE OR REPLACE TABLE ${table} AS SELECT * FROM read_parquet([${fileList}], union_by_name=true)`);
2106
+ const rows = (await conn.runAndReadAll(`SELECT count(*)::BIGINT AS n FROM ${table}`)).getRowObjects();
2107
+ const rowCount = Number(rows[0]?.n ?? 0);
2108
+ tables.push({
2109
+ table,
2110
+ files: entries.length,
2111
+ rows: rowCount
2112
+ });
2113
+ }
2114
+ } finally {
2115
+ conn.closeSync();
2116
+ instance.closeSync();
2117
+ }
2118
+ return {
2119
+ outPath,
2120
+ tables,
2121
+ totalRows: tables.reduce((acc, t) => acc + t.rows, 0)
2122
+ };
2123
+ }
2124
+ const exportCommand = defineCommand({
1515
2125
  meta: {
1516
- name: "get",
1517
- description: "Get details for a specific sitemap"
2126
+ name: "export",
2127
+ description: "Pack live Parquet partitions into a single .duckdb file for portable distribution (browser attach, CDN serving, etc.)"
1518
2128
  },
1519
2129
  args: {
1520
- site: {
2130
+ out: {
1521
2131
  type: "string",
1522
- alias: "s",
1523
2132
  required: true,
1524
- description: "Site URL"
2133
+ description: "Output path for the .duckdb file"
1525
2134
  },
1526
- url: {
1527
- type: "positional",
1528
- required: true,
1529
- description: "Sitemap URL"
2135
+ site: {
2136
+ type: "string",
2137
+ description: "Limit export to a single site URL (omit to include all)"
1530
2138
  },
1531
- json: {
2139
+ force: {
1532
2140
  type: "boolean",
1533
2141
  default: false,
1534
- description: "Output as JSON"
2142
+ description: "Overwrite the output file if it already exists"
1535
2143
  }
1536
2144
  },
1537
2145
  async run({ args }) {
1538
- const sitemap = await fetchSitemap(googleSearchConsole(await getAuth({ interactive: false })), args.site, args.url).catch(gscErrorHandler);
1539
- if (args.json) {
1540
- console.log(JSON.stringify(sitemap, null, 2));
2146
+ const store = (await createCommandContext({ needsStore: true })).store;
2147
+ const siteId = args.site ? store.siteIdFor(args.site) : void 0;
2148
+ const result = await exportToDuckDB({
2149
+ engine: store.engine,
2150
+ dataDir: store.dataDir,
2151
+ userId: store.userId,
2152
+ siteId,
2153
+ outPath: args.out,
2154
+ force: args.force
2155
+ });
2156
+ if (result.tables.length === 0) {
2157
+ console.log(`\n No data to export. Run \`gscdump sync\` first.`);
1541
2158
  return;
1542
2159
  }
1543
- console.log();
1544
- console.log(` \x1B[1mPath:\x1B[0m ${sitemap.path}`);
1545
- console.log(` \x1B[1mType:\x1B[0m ${sitemap.type || "sitemap"}`);
1546
- console.log(` \x1B[1mLast Submitted:\x1B[0m ${sitemap.lastSubmitted || "N/A"}`);
1547
- console.log(` \x1B[1mLast Downloaded:\x1B[0m ${sitemap.lastDownloaded || "N/A"}`);
1548
- console.log(` \x1B[1mPending:\x1B[0m ${sitemap.isPending ? "Yes" : "No"}`);
1549
- console.log(` \x1B[1mErrors:\x1B[0m ${sitemap.errors || 0}`);
1550
- console.log(` \x1B[1mWarnings:\x1B[0m ${sitemap.warnings || 0}`);
1551
- if (sitemap.contents?.length) {
1552
- console.log();
1553
- console.log(" \x1B[1mContents:\x1B[0m");
1554
- for (const c of sitemap.contents) console.log(` ${c.type}: ${c.submitted} submitted, ${c.indexed} indexed`);
1555
- }
2160
+ for (const t of result.tables) console.log(` ${t.table.padEnd(15)} ${String(t.files).padStart(4)} parquet → ${t.table} (${t.rows.toLocaleString()} rows)`);
2161
+ console.log(`\n Exported ${result.tables.length} table(s), ${result.totalRows.toLocaleString()} rows → ${result.outPath}`);
2162
+ console.log(`\n Attach from DuckDB: \x1B[36mATTACH '${result.outPath}' AS gsc (READ_ONLY); SELECT * FROM gsc.pages LIMIT 10;\x1B[0m`);
2163
+ console.log(` Attach in a browser: use DuckDB-WASM registerFileBuffer + \x1B[36mATTACH 'gsc.duckdb' AS gsc (READ_ONLY)\x1B[0m`);
1556
2164
  }
1557
2165
  });
1558
- const submitCommand = defineCommand({
2166
+ const DEFAULT_GRACE_HOURS = 24;
2167
+ const gcCommand = defineCommand({
1559
2168
  meta: {
1560
- name: "submit",
1561
- description: "Submit a sitemap to GSC"
2169
+ name: "gc",
2170
+ description: "Delete orphaned object-store files not referenced by any manifest entry"
1562
2171
  },
1563
2172
  args: {
1564
- site: {
2173
+ "grace-hours": {
2174
+ type: "string",
2175
+ default: String(DEFAULT_GRACE_HOURS),
2176
+ description: `Spare orphans younger than this (default: ${DEFAULT_GRACE_HOURS}h)`
2177
+ },
2178
+ "site": {
1565
2179
  type: "string",
1566
2180
  alias: "s",
1567
- required: true,
1568
- description: "Site URL"
2181
+ description: "Restrict to a single site (default: all sites)"
1569
2182
  },
1570
- url: {
1571
- type: "positional",
1572
- required: true,
1573
- description: "Sitemap URL to submit"
2183
+ "quiet": {
2184
+ type: "boolean",
2185
+ alias: "q",
2186
+ default: false,
2187
+ description: "Suppress progress output"
1574
2188
  }
1575
2189
  },
1576
2190
  async run({ args }) {
1577
- await submitSitemap(googleSearchConsole(await getAuth({ interactive: false })), args.site, args.url).catch(gscErrorHandler);
1578
- logger.success(`Submitted sitemap: ${args.url}`);
2191
+ const store = (await createCommandContext({ needsStore: true })).store;
2192
+ const siteId = args.site ? store.siteIdFor(String(args.site)) : void 0;
2193
+ const quiet = Boolean(args.quiet);
2194
+ const graceMs = Number(args["grace-hours"]) * 36e5;
2195
+ const result = await store.engine.gcOrphans({
2196
+ userId: store.userId,
2197
+ siteId
2198
+ }, graceMs);
2199
+ if (!quiet) logger.success(`gc: deleted ${result.deleted} orphan file(s)`);
1579
2200
  }
1580
2201
  });
1581
- const deleteCommand = defineCommand({
2202
+ const rollupsCommand = defineCommand({
2203
+ meta: {
2204
+ name: "rollups",
2205
+ description: "Manage post-sync rollups"
2206
+ },
2207
+ subCommands: { rebuild: defineCommand({
2208
+ meta: {
2209
+ name: "rebuild",
2210
+ description: "Rebuild post-sync rollups (daily totals, weekly totals, top-N tables) for a site"
2211
+ },
2212
+ args: {
2213
+ site: {
2214
+ type: "string",
2215
+ alias: "s",
2216
+ description: "Restrict to a single site (default: all sites with local data)"
2217
+ },
2218
+ quiet: {
2219
+ type: "boolean",
2220
+ alias: "q",
2221
+ default: false,
2222
+ description: "Suppress progress output"
2223
+ }
2224
+ },
2225
+ async run({ args }) {
2226
+ const store = (await createCommandContext({ needsStore: true })).store;
2227
+ const explicitSiteId = args.site ? store.siteIdFor(String(args.site)) : void 0;
2228
+ const quiet = Boolean(args.quiet);
2229
+ const allSiteIds = /* @__PURE__ */ new Set();
2230
+ if (explicitSiteId) allSiteIds.add(explicitSiteId);
2231
+ else for (const table of allTables()) {
2232
+ const entries = await store.engine.listLive({
2233
+ userId: store.userId,
2234
+ table
2235
+ });
2236
+ for (const e of entries) if (e.siteId) allSiteIds.add(e.siteId);
2237
+ }
2238
+ if (allSiteIds.size === 0) {
2239
+ logger.warn("No sites with local data. Run `gscdump sync` first.");
2240
+ return;
2241
+ }
2242
+ let totalBytes = 0;
2243
+ for (const siteId of allSiteIds) {
2244
+ if (!quiet) logger.info(`Rebuilding rollups for [${siteId}] (${DEFAULT_ROLLUPS.length} rollups)`);
2245
+ const results = await rebuildRollups({
2246
+ engine: store.engine,
2247
+ dataSource: store.dataSource,
2248
+ ctx: {
2249
+ userId: store.userId,
2250
+ siteId
2251
+ },
2252
+ defs: DEFAULT_ROLLUPS
2253
+ });
2254
+ for (const r of results) {
2255
+ totalBytes += r.bytes;
2256
+ if (!quiet) console.log(` ${r.id.padEnd(20)} ${(r.bytes / 1024).toFixed(1).padStart(8)} KB ${r.objectKey}`);
2257
+ }
2258
+ }
2259
+ if (!quiet) logger.success(`Rebuilt rollups across ${allSiteIds.size} site(s) — total ${(totalBytes / 1024).toFixed(1)} KB`);
2260
+ }
2261
+ }) }
2262
+ });
2263
+ const statsCommand = defineCommand({
1582
2264
  meta: {
1583
- name: "delete",
1584
- description: "Delete a sitemap from GSC"
2265
+ name: "stats",
2266
+ description: "Show row/byte counts per table and on-disk footprint"
1585
2267
  },
1586
2268
  args: {
2269
+ json: {
2270
+ type: "boolean",
2271
+ default: false,
2272
+ description: "Output as JSON"
2273
+ },
1587
2274
  site: {
1588
2275
  type: "string",
1589
- alias: "s",
1590
- required: true,
1591
- description: "Site URL"
1592
- },
1593
- url: {
1594
- type: "positional",
1595
- required: true,
1596
- description: "Sitemap URL to delete"
2276
+ description: "Limit to one site URL (sc-domain:example.com, https://example.com/, ...)"
1597
2277
  }
1598
2278
  },
1599
2279
  async run({ args }) {
1600
- await deleteSitemap(googleSearchConsole(await getAuth({ interactive: false })), args.site, args.url).catch(gscErrorHandler);
1601
- logger.success(`Deleted sitemap: ${args.url}`);
2280
+ const store = (await createCommandContext({ needsStore: true })).store;
2281
+ const siteId = args.site ? store.siteIdFor(args.site) : void 0;
2282
+ const perTable = await Promise.all(allTables().map(async (table) => {
2283
+ const all = await store.engine.listAll({
2284
+ userId: store.userId,
2285
+ siteId,
2286
+ table
2287
+ });
2288
+ return {
2289
+ table,
2290
+ live: all.filter((e) => e.retiredAt === void 0),
2291
+ retired: all.filter((e) => e.retiredAt !== void 0)
2292
+ };
2293
+ }));
2294
+ const watermarks = await store.engine.getWatermarks({
2295
+ userId: store.userId,
2296
+ siteId
2297
+ });
2298
+ const disk = await filesystemStats(store.dataDir).catch(() => ({
2299
+ files: 0,
2300
+ bytes: 0
2301
+ }));
2302
+ if (args.json) {
2303
+ const payload = {
2304
+ dataDir: store.dataDir,
2305
+ disk,
2306
+ tables: perTable.map(({ table, live, retired }) => ({
2307
+ table,
2308
+ liveFiles: live.length,
2309
+ liveRows: sumRows(live),
2310
+ liveBytes: sumBytes(live),
2311
+ retiredFiles: retired.length,
2312
+ retiredBytes: sumBytes(retired),
2313
+ watermarks: watermarks.filter((w) => w.table === table).map((w) => ({
2314
+ siteId: w.siteId ?? null,
2315
+ newestDateSynced: w.newestDateSynced,
2316
+ oldestDateSynced: w.oldestDateSynced,
2317
+ lastSyncAt: w.lastSyncAt
2318
+ }))
2319
+ }))
2320
+ };
2321
+ console.log(JSON.stringify(payload, null, 2));
2322
+ return;
2323
+ }
2324
+ console.log();
2325
+ console.log(` \x1B[1m${store.dataDir}\x1B[0m`);
2326
+ console.log(` \x1B[90mDisk: ${disk.files} file(s), ${formatBytes(disk.bytes)}\x1B[0m`);
2327
+ console.log();
2328
+ const totalRows = perTable.reduce((acc, t) => acc + sumRows(t.live), 0);
2329
+ const totalBytes = perTable.reduce((acc, t) => acc + sumBytes(t.live), 0);
2330
+ const totalFiles = perTable.reduce((acc, t) => acc + t.live.length, 0);
2331
+ const totalRetiredFiles = perTable.reduce((acc, t) => acc + t.retired.length, 0);
2332
+ const totalRetiredBytes = perTable.reduce((acc, t) => acc + sumBytes(t.retired), 0);
2333
+ for (const { table, live, retired } of perTable) {
2334
+ const rows = sumRows(live).toLocaleString();
2335
+ const bytes = formatBytes(sumBytes(live));
2336
+ const retiredSuffix = retired.length > 0 ? ` \x1B[90m(+${retired.length} retired, ${formatBytes(sumBytes(retired))})\x1B[0m` : "";
2337
+ console.log(` ${table.padEnd(15)} \x1B[36m${String(live.length).padStart(4)}\x1B[0m files, ${rows.padStart(10)} rows, ${bytes}${retiredSuffix}`);
2338
+ }
2339
+ console.log();
2340
+ console.log(` \x1B[1mTotal:\x1B[0m ${totalFiles} files, ${totalRows.toLocaleString()} rows, ${formatBytes(totalBytes)} live`);
2341
+ if (totalRetiredFiles > 0) console.log(` \x1B[90mRetired: ${totalRetiredFiles} files, ${formatBytes(totalRetiredBytes)} awaiting GC\x1B[0m`);
2342
+ if (watermarks.length > 0) {
2343
+ console.log();
2344
+ console.log(` \x1B[1mSync watermarks:\x1B[0m`);
2345
+ for (const w of sortWatermarks(watermarks)) {
2346
+ const scope = w.siteId ? `${w.table}@${w.siteId}` : w.table;
2347
+ console.log(` ${scope.padEnd(24)} \x1B[36m${w.oldestDateSynced}\x1B[0m → \x1B[36m${w.newestDateSynced}\x1B[0m \x1B[90m(last ${formatAge(w.lastSyncAt)})\x1B[0m`);
2348
+ }
2349
+ }
2350
+ console.log();
1602
2351
  }
1603
2352
  });
1604
- const sitemapsCommand = defineCommand({
2353
+ function sortWatermarks(ws) {
2354
+ return [...ws].sort((a, b) => {
2355
+ if (a.table !== b.table) return a.table.localeCompare(b.table);
2356
+ return (a.siteId ?? "").localeCompare(b.siteId ?? "");
2357
+ });
2358
+ }
2359
+ function sumRows(entries) {
2360
+ return entries.reduce((acc, e) => acc + e.rowCount, 0);
2361
+ }
2362
+ function sumBytes(entries) {
2363
+ return entries.reduce((acc, e) => acc + e.bytes, 0);
2364
+ }
2365
+ function formatBytes(n) {
2366
+ if (n < 1024) return `${n} B`;
2367
+ if (n < 1024 * 1024) return `${(n / 1024).toFixed(1)} KB`;
2368
+ if (n < 1024 * 1024 * 1024) return `${(n / 1024 / 1024).toFixed(1)} MB`;
2369
+ return `${(n / 1024 / 1024 / 1024).toFixed(2)} GB`;
2370
+ }
2371
+ const storeCommand = defineCommand({
1605
2372
  meta: {
1606
- name: "sitemaps",
1607
- description: "Manage sitemaps"
2373
+ name: "store",
2374
+ description: "Manage the local DuckDB/Parquet store"
1608
2375
  },
1609
2376
  subCommands: {
1610
- list: listCommand,
1611
- get: getCommand,
1612
- submit: submitCommand,
1613
- delete: deleteCommand
2377
+ stats: statsCommand,
2378
+ compact: compactCommand,
2379
+ gc: gcCommand,
2380
+ export: exportCommand,
2381
+ rollups: rollupsCommand
1614
2382
  }
1615
2383
  });
1616
-
1617
- //#endregion
1618
- //#region src/commands/sites.ts
1619
- const sitesCommand = defineCommand({
2384
+ const DEFAULT_TABLES = [
2385
+ "pages",
2386
+ "keywords",
2387
+ "countries",
2388
+ "devices"
2389
+ ];
2390
+ const DEFAULT_TYPES = ["web"];
2391
+ const ALL_SEARCH_TYPES = Object.values(SearchTypes);
2392
+ const DEFAULT_PENDING_DAYS = 3;
2393
+ const DEFAULT_CONCURRENCY = 8;
2394
+ const EMPTY_TYPE_PROBE_MIN_DAYS = 7;
2395
+ const EMPTY_TYPE_PROTECTED = ["web"];
2396
+ function createProgressTracker(total, quiet) {
2397
+ if (quiet) return {
2398
+ tick: () => {},
2399
+ done: () => {}
2400
+ };
2401
+ let current = 0;
2402
+ let lastLabel = "";
2403
+ let timer = null;
2404
+ const render = () => {
2405
+ clearLine();
2406
+ process.stdout.write(progressBar(current, total, lastLabel));
2407
+ };
2408
+ timer = setInterval(render, 100);
2409
+ return {
2410
+ tick: (label) => {
2411
+ current++;
2412
+ lastLabel = label;
2413
+ },
2414
+ done: () => {
2415
+ if (timer) {
2416
+ clearInterval(timer);
2417
+ timer = null;
2418
+ }
2419
+ clearLine();
2420
+ }
2421
+ };
2422
+ }
2423
+ async function syncTable(store, siteUrl, table, searchType, dates, client, concurrency, force, progress) {
2424
+ const dims = TABLE_DIMS[table];
2425
+ const siteId = store.siteIdFor(siteUrl);
2426
+ let totalRows = 0;
2427
+ let skipped = 0;
2428
+ let failed = 0;
2429
+ const priorStates = await store.engine.getSyncStates({
2430
+ userId: store.userId,
2431
+ siteId,
2432
+ table,
2433
+ searchType
2434
+ });
2435
+ const stateByDate = new Map(priorStates.map((s) => [s.date, s]));
2436
+ const label = searchType === "web" ? table : `${table}/${searchType}`;
2437
+ await runWithConcurrency(dates, concurrency, async (date) => {
2438
+ const prior = stateByDate.get(date);
2439
+ if (!force && prior?.state === "done") {
2440
+ skipped++;
2441
+ progress.tick(`${label} ${date} (skip)`);
2442
+ return;
2443
+ }
2444
+ const scope = {
2445
+ userId: store.userId,
2446
+ siteId,
2447
+ table,
2448
+ date,
2449
+ searchType
2450
+ };
2451
+ await store.engine.setSyncState(scope, "inflight");
2452
+ const result = await runOneDate(store, client, siteUrl, table, searchType, dims, date).catch((err) => ({
2453
+ kind: "error",
2454
+ error: err
2455
+ }));
2456
+ if (result.kind === "error") {
2457
+ await store.engine.setSyncState(scope, "failed", { error: result.error.message });
2458
+ failed++;
2459
+ progress.tick(`${label} ${date} (fail)`);
2460
+ return;
2461
+ }
2462
+ await store.engine.setSyncState(scope, "done");
2463
+ totalRows += result.rows;
2464
+ progress.tick(`${label} ${date}`);
2465
+ });
2466
+ return {
2467
+ rows: totalRows,
2468
+ skipped,
2469
+ failed
2470
+ };
2471
+ }
2472
+ async function runOneDate(store, client, siteUrl, table, searchType, dims, date) {
2473
+ const rowLimit = 25e3;
2474
+ const rows = [];
2475
+ let startRow = 0;
2476
+ while (true) {
2477
+ const batch = (await client._rawQuery(siteUrl, {
2478
+ startDate: date,
2479
+ endDate: date,
2480
+ dimensions: dims,
2481
+ searchType,
2482
+ rowLimit,
2483
+ startRow
2484
+ })).rows || [];
2485
+ for (const apiRow of batch) {
2486
+ const transformed = transformGscRow(table, {
2487
+ keys: apiRow.keys ?? [],
2488
+ clicks: apiRow.clicks ?? 0,
2489
+ impressions: apiRow.impressions ?? 0,
2490
+ ctr: apiRow.ctr ?? 0,
2491
+ position: apiRow.position ?? 0
2492
+ });
2493
+ if (transformed) rows.push(transformed.row);
2494
+ }
2495
+ if (batch.length < rowLimit) break;
2496
+ startRow += batch.length;
2497
+ }
2498
+ const writeCtx = {
2499
+ userId: store.userId,
2500
+ siteId: store.siteIdFor(siteUrl),
2501
+ table,
2502
+ date,
2503
+ searchType
2504
+ };
2505
+ await store.engine.writeDay(writeCtx, rows);
2506
+ return {
2507
+ kind: "ok",
2508
+ rows: rows.length
2509
+ };
2510
+ }
2511
+ const syncCommand = defineCommand({
1620
2512
  meta: {
1621
- name: "sites",
1622
- description: "List available GSC sites"
2513
+ name: "sync",
2514
+ description: "Sync GSC data to local Parquet store"
2515
+ },
2516
+ args: {
2517
+ "site": {
2518
+ type: "string",
2519
+ alias: "s",
2520
+ description: "Site URL"
2521
+ },
2522
+ "start": {
2523
+ type: "string",
2524
+ description: "Start date (YYYY-MM-DD) for historical sync"
2525
+ },
2526
+ "end": {
2527
+ type: "string",
2528
+ description: "End date (YYYY-MM-DD); defaults to 3 days ago"
2529
+ },
2530
+ "days": {
2531
+ type: "string",
2532
+ description: `Number of days back to sync (default: ${DEFAULT_PENDING_DAYS})`
2533
+ },
2534
+ "tables": {
2535
+ type: "string",
2536
+ alias: "t",
2537
+ description: `Tables to sync (default: ${DEFAULT_TABLES.join(",")}); comma-separated`
2538
+ },
2539
+ "types": {
2540
+ type: "string",
2541
+ description: `GSC search types to sync (default: ${DEFAULT_TYPES.join(",")}); comma-separated. Allowed: ${ALL_SEARCH_TYPES.join(",")}.`
2542
+ },
2543
+ "force-types": {
2544
+ type: "boolean",
2545
+ default: false,
2546
+ description: "Ignore stored empty-type markers and re-probe every requested type"
2547
+ },
2548
+ "no-rollups": {
2549
+ type: "boolean",
2550
+ default: false,
2551
+ description: "Skip the post-sync rollup rebuild (daily/weekly totals, top-N tables)"
2552
+ },
2553
+ "full": {
2554
+ type: "boolean",
2555
+ description: "Sync the last 450 days (full GSC history)"
2556
+ },
2557
+ "quiet": {
2558
+ type: "boolean",
2559
+ alias: "q",
2560
+ default: false,
2561
+ description: "Suppress progress output"
2562
+ },
2563
+ "force": {
2564
+ type: "boolean",
2565
+ default: false,
2566
+ description: "Re-sync dates already marked done (default: skip them for idempotent resume)"
2567
+ },
2568
+ "status": {
2569
+ type: "boolean",
2570
+ default: false,
2571
+ description: "Print watermarks + sync-state summary instead of syncing"
2572
+ },
2573
+ "json": {
2574
+ type: "boolean",
2575
+ default: false,
2576
+ description: "With --status: emit JSON"
2577
+ },
2578
+ "concurrency": {
2579
+ type: "string",
2580
+ alias: "c",
2581
+ description: `Concurrent in-flight day fetches per table (default: ${DEFAULT_CONCURRENCY})`
2582
+ },
2583
+ "serial-tables": {
2584
+ type: "boolean",
2585
+ default: false,
2586
+ description: "Run tables sequentially (default: run all tables in parallel)"
2587
+ }
1623
2588
  },
1624
- args: { json: {
1625
- type: "boolean",
1626
- default: false,
1627
- description: "Output as JSON for scripting"
1628
- } },
1629
2589
  async run({ args }) {
1630
- const sites = (await fetchSites(googleSearchConsole(await getAuth({ interactive: false }))).catch(gscErrorHandler)).filter((site) => site.siteUrl && site.permissionLevel !== "siteUnverifiedUser").map((site) => ({
1631
- url: site.siteUrl,
1632
- permission: site.permissionLevel || "unknown"
1633
- }));
1634
- if (args.json) {
1635
- console.log(JSON.stringify(sites, null, 2));
2590
+ if (args.status) {
2591
+ await printSyncStatus(await loadConfig(), args.site ? String(args.site) : void 0, Boolean(args.json));
1636
2592
  return;
1637
2593
  }
1638
- if (sites.length === 0) {
1639
- logger.warn("No verified sites found");
2594
+ const ctx = await createCommandContext({
2595
+ needsAuth: true,
2596
+ needsStore: true
2597
+ });
2598
+ const client = ctx.client;
2599
+ const siteUrl = await ctx.resolveSite(args.site ? String(args.site) : void 0);
2600
+ const tables = args.tables ? String(args.tables).split(",").map((t) => t.trim()).filter(isKnownTable) : DEFAULT_TABLES;
2601
+ const requestedTypes = args.types ? String(args.types).split(",").map((t) => t.trim()).filter(isKnownSearchType) : DEFAULT_TYPES;
2602
+ if (requestedTypes.length === 0) {
2603
+ logger.error(`No valid search types specified. Allowed: ${ALL_SEARCH_TYPES.join(",")}`);
2604
+ process.exit(1);
2605
+ }
2606
+ const siteId = ctx.store.siteIdFor(siteUrl);
2607
+ const emptyTypesStore = createEmptyTypesStore({ dataSource: ctx.store.dataSource });
2608
+ const emptyTypesDoc = await emptyTypesStore.load({
2609
+ userId: ctx.store.userId,
2610
+ siteId
2611
+ });
2612
+ const forceTypes = Boolean(args["force-types"]);
2613
+ const skippedTypes = [];
2614
+ const types = [];
2615
+ for (const t of requestedTypes) {
2616
+ if (!forceTypes && emptyTypesDoc.emptyTypes.includes(t) && !EMPTY_TYPE_PROTECTED.includes(t)) {
2617
+ skippedTypes.push(t);
2618
+ continue;
2619
+ }
2620
+ types.push(t);
2621
+ }
2622
+ if (types.length === 0) {
2623
+ logger.warn(`All requested types (${requestedTypes.join(", ")}) are marked empty for this site. Pass --force-types to re-probe.`);
1640
2624
  return;
1641
2625
  }
1642
- logger.success(`Found ${sites.length} sites:`);
1643
- console.log();
1644
- for (const site of sites) {
1645
- const perm = site.permission === "siteOwner" ? "\x1B[32m" : "\x1B[90m";
1646
- console.log(` ${site.url} ${perm}(${site.permission})\x1B[0m`);
2626
+ if (skippedTypes.length > 0 && !args.quiet) logger.info(`Skipping ${skippedTypes.join(", ")} (marked empty for this site; pass --force-types to re-probe).`);
2627
+ const endDate = args.end ? String(args.end) : daysAgo(DEFAULT_PENDING_DAYS);
2628
+ let startDate;
2629
+ if (args.start) startDate = String(args.start);
2630
+ else if (args.full) startDate = daysAgo(450);
2631
+ else if (args.days) startDate = daysAgo(Number.parseInt(String(args.days), 10) + DEFAULT_PENDING_DAYS - 1);
2632
+ else startDate = daysAgo(DEFAULT_PENDING_DAYS + DEFAULT_PENDING_DAYS - 1);
2633
+ const dates = getDateRange(startDate, endDate);
2634
+ if (dates.length === 0) {
2635
+ logger.error(`No dates to sync (start=${startDate}, end=${endDate})`);
2636
+ process.exit(1);
2637
+ }
2638
+ const store = ctx.store;
2639
+ if (!args.quiet) {
2640
+ logger.info(`Syncing ${siteUrl} (${tables.join(", ")}) [${types.join(", ")}] → ${store.dataDir}`);
2641
+ logger.info(`Range: ${startDate} → ${endDate} (${dates.length} days)`);
2642
+ }
2643
+ const concurrency = args.concurrency ? Math.max(1, Number.parseInt(String(args.concurrency), 10) || DEFAULT_CONCURRENCY) : DEFAULT_CONCURRENCY;
2644
+ const serialTables = Boolean(args["serial-tables"]);
2645
+ const start = Date.now();
2646
+ const totals = {};
2647
+ const jobs = [];
2648
+ for (const table of tables) for (const type of types) {
2649
+ const label = type === "web" ? table : `${table}/${type}`;
2650
+ jobs.push({
2651
+ table,
2652
+ type,
2653
+ label
2654
+ });
2655
+ }
2656
+ const progress = createProgressTracker(dates.length * jobs.length, Boolean(args.quiet));
2657
+ if (serialTables) for (const job of jobs) totals[job.label] = await syncTable(store, siteUrl, job.table, job.type, dates, client, concurrency, args.force, progress);
2658
+ else {
2659
+ const results = await Promise.all(jobs.map((job) => syncTable(store, siteUrl, job.table, job.type, dates, client, concurrency, args.force, progress)));
2660
+ jobs.forEach((job, i) => {
2661
+ totals[job.label] = results[i];
2662
+ });
2663
+ }
2664
+ progress.done();
2665
+ const seconds = ((Date.now() - start) / 1e3).toFixed(1);
2666
+ if (!args.quiet) {
2667
+ logger.success(`Synced ${siteUrl} in ${seconds}s`);
2668
+ for (const [t, n] of Object.entries(totals)) {
2669
+ const suffix = [n.skipped > 0 ? `${n.skipped} skipped` : null, n.failed > 0 ? `\x1B[31m${n.failed} failed\x1B[0m` : null].filter(Boolean).join(", ");
2670
+ const tail = suffix ? ` (${suffix})` : "";
2671
+ console.log(` ${t}: ${n.rows.toLocaleString()} rows${tail}`);
2672
+ }
2673
+ console.log();
2674
+ }
2675
+ const anyFailed = Object.values(totals).some((t) => t.failed > 0);
2676
+ const rowsByType = /* @__PURE__ */ new Map();
2677
+ const failedByType = /* @__PURE__ */ new Map();
2678
+ for (const job of jobs) {
2679
+ const t = totals[job.label];
2680
+ rowsByType.set(job.type, (rowsByType.get(job.type) ?? 0) + t.rows);
2681
+ failedByType.set(job.type, (failedByType.get(job.type) ?? 0) + t.failed);
2682
+ }
2683
+ if (!forceTypes && dates.length >= EMPTY_TYPE_PROBE_MIN_DAYS) {
2684
+ const toMark = [];
2685
+ for (const type of types) {
2686
+ if (EMPTY_TYPE_PROTECTED.includes(type)) continue;
2687
+ if ((failedByType.get(type) ?? 0) > 0) continue;
2688
+ if ((rowsByType.get(type) ?? 0) === 0) toMark.push(type);
2689
+ }
2690
+ if (toMark.length > 0) {
2691
+ await emptyTypesStore.mark({
2692
+ userId: store.userId,
2693
+ siteId
2694
+ }, toMark);
2695
+ if (!args.quiet) logger.info(`Marked empty for future syncs: ${toMark.join(", ")} (0 rows across ${dates.length} days; pass --force-types to re-probe).`);
2696
+ }
2697
+ }
2698
+ if (forceTypes && emptyTypesDoc.emptyTypes.length > 0) {
2699
+ const toClear = [];
2700
+ for (const type of types) if (emptyTypesDoc.emptyTypes.includes(type) && (rowsByType.get(type) ?? 0) > 0) toClear.push(type);
2701
+ if (toClear.length > 0) {
2702
+ await emptyTypesStore.clear({
2703
+ userId: store.userId,
2704
+ siteId
2705
+ }, toClear);
2706
+ if (!args.quiet) logger.info(`Cleared empty markers for: ${toClear.join(", ")} (re-probe found data).`);
2707
+ }
1647
2708
  }
2709
+ const noRollups = Boolean(args["no-rollups"]);
2710
+ const anyRowsSynced = Object.values(totals).some((t) => t.rows > 0);
2711
+ if (!noRollups && anyRowsSynced) {
2712
+ if (!args.quiet) logger.info(`Rebuilding rollups for [${siteId}] (${DEFAULT_ROLLUPS.length} rollups)…`);
2713
+ const rollupStart = Date.now();
2714
+ const results = await rebuildRollups({
2715
+ engine: store.engine,
2716
+ dataSource: store.dataSource,
2717
+ ctx: {
2718
+ userId: store.userId,
2719
+ siteId
2720
+ },
2721
+ defs: DEFAULT_ROLLUPS
2722
+ }).catch((err) => {
2723
+ logger.warn(`Rollup rebuild failed: ${err.message}`);
2724
+ return [];
2725
+ });
2726
+ if (!args.quiet && results.length > 0) {
2727
+ const kb = results.reduce((a, r) => a + r.bytes, 0) / 1024;
2728
+ const ms = Date.now() - rollupStart;
2729
+ logger.success(`Rebuilt ${results.length} rollup(s) in ${ms}ms — ${kb.toFixed(1)} KB`);
2730
+ }
2731
+ }
2732
+ if (anyFailed) process.exit(1);
1648
2733
  }
1649
2734
  });
1650
-
1651
- //#endregion
1652
- //#region src/index.ts
2735
+ function isKnownTable(name) {
2736
+ return allTables().includes(name);
2737
+ }
2738
+ function isKnownSearchType(name) {
2739
+ return ALL_SEARCH_TYPES.includes(name);
2740
+ }
2741
+ async function printSyncStatus(config, siteFilter, asJson) {
2742
+ const store = createLocalStore({ dataDir: resolveDataDir(config) });
2743
+ const siteId = siteFilter ? store.siteIdFor(siteFilter) : void 0;
2744
+ const watermarks = await store.engine.getWatermarks({
2745
+ userId: store.userId,
2746
+ siteId
2747
+ });
2748
+ const states = await store.engine.getSyncStates({
2749
+ userId: store.userId,
2750
+ siteId
2751
+ });
2752
+ const failed = states.filter((s) => s.state === "failed");
2753
+ const inflight = states.filter((s) => s.state === "inflight");
2754
+ if (asJson) {
2755
+ console.log(JSON.stringify({
2756
+ dataDir: store.dataDir,
2757
+ siteFilter: siteFilter ?? null,
2758
+ watermarks,
2759
+ failed,
2760
+ inflight
2761
+ }, null, 2));
2762
+ return;
2763
+ }
2764
+ console.log();
2765
+ console.log(` \x1B[1m${store.dataDir}\x1B[0m`);
2766
+ if (siteFilter) console.log(` \x1B[90mSite: ${siteFilter}\x1B[0m`);
2767
+ console.log();
2768
+ if (watermarks.length === 0) {
2769
+ console.log(` No sync watermarks. Run \`gscdump sync\` to ingest data.`);
2770
+ console.log();
2771
+ return;
2772
+ }
2773
+ console.log(` \x1B[1mWatermarks:\x1B[0m`);
2774
+ const sorted = [...watermarks].sort((a, b) => {
2775
+ if (a.table !== b.table) return a.table.localeCompare(b.table);
2776
+ return (a.siteId ?? "").localeCompare(b.siteId ?? "");
2777
+ });
2778
+ for (const w of sorted) {
2779
+ const scope = w.siteId ? `${w.table}@${w.siteId}` : w.table;
2780
+ console.log(` ${scope.padEnd(28)} \x1B[36m${w.oldestDateSynced}\x1B[0m → \x1B[36m${w.newestDateSynced}\x1B[0m \x1B[90m(last ${formatAge(w.lastSyncAt)})\x1B[0m`);
2781
+ }
2782
+ if (inflight.length > 0) {
2783
+ console.log();
2784
+ console.log(` \x1B[33m${inflight.length} inflight:\x1B[0m`);
2785
+ for (const s of inflight) console.log(` ${s.table}${s.siteId ? `@${s.siteId}` : ""} ${s.date} (attempt ${s.attempts}, started ${formatAge(s.updatedAt)})`);
2786
+ }
2787
+ if (failed.length > 0) {
2788
+ console.log();
2789
+ console.log(` \x1B[31m${failed.length} failed:\x1B[0m`);
2790
+ for (const s of failed) console.log(` ${s.table}${s.siteId ? `@${s.siteId}` : ""} ${s.date}: ${s.error ?? "unknown"}`);
2791
+ console.log();
2792
+ console.log(` Re-run \`gscdump sync --force\` to retry failed dates.`);
2793
+ }
2794
+ console.log();
2795
+ }
1653
2796
  runMain(defineCommand({
1654
2797
  meta: {
1655
2798
  name: "gscdump",
@@ -1662,6 +2805,11 @@ runMain(defineCommand({
1662
2805
  query: queryCommand,
1663
2806
  sites: sitesCommand,
1664
2807
  sitemaps: sitemapsCommand,
2808
+ sync: syncCommand,
2809
+ store: storeCommand,
2810
+ inspect: inspectCommand,
2811
+ entities: entitiesCommand,
2812
+ analyze: analyzeCommand,
1665
2813
  auth: authCommand,
1666
2814
  config: configCommand,
1667
2815
  mcp: mcpCommand
@@ -1670,6 +2818,4 @@ runMain(defineCommand({
1670
2818
  if (!process.argv.includes("mcp")) showSplash();
1671
2819
  }
1672
2820
  }));
1673
-
1674
- //#endregion
1675
- export { getAuth as a, loadTokens as c, clearTokens as i, saveCloudTokens as l, authenticateCloud as n, getAuthCredentials as o, clearCloudTokens as r, loadCloudTokens as s, authenticate as t, saveTokens as u };
2821
+ export {};