gscdump 0.8.1 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -1,6 +1,17 @@
1
1
  import { $Fetch, FetchOptions } from "ofetch";
2
2
  import { indexing_v3 } from "@googleapis/indexing/build/v3";
3
3
  import { searchconsole_v1 } from "@googleapis/searchconsole/build/v1";
4
+ /**
5
+ * Batch runner with optional concurrency, inter-call delay, and progress.
6
+ * Used by batchRequestIndexing / batchInspectUrls. Defaults to sequential
7
+ * (concurrency = 1) because the underlying APIs rate-limit aggressively;
8
+ * callers that know their quota headroom can opt into parallelism.
9
+ */
10
+ declare function runSequentialBatch<I, R>(items: I[], operation: (item: I, index: number) => Promise<R>, options?: {
11
+ delayMs?: number;
12
+ concurrency?: number;
13
+ onProgress?: (result: R, index: number, total: number) => void;
14
+ }): Promise<R[]>;
4
15
  type ApiSite = searchconsole_v1.Schema$WmxSite;
5
16
  type ApiSitemap = searchconsole_v1.Schema$WmxSitemap;
6
17
  type ApiSitemapContent = searchconsole_v1.Schema$WmxSitemapContent;
@@ -375,6 +386,17 @@ declare function verifySite(client: GoogleSearchConsoleClient, siteUrl: string,
375
386
  * List all verified WebResources for the authed user.
376
387
  */
377
388
  declare function listVerifiedSites(client: GoogleSearchConsoleClient): Promise<VerificationWebResource[]>;
389
+ /**
390
+ * Fetch a single verified WebResource by id.
391
+ */
392
+ declare function getVerifiedSite(client: GoogleSearchConsoleClient, id: string): Promise<VerificationWebResource>;
393
+ /**
394
+ * Drop the calling user's verified ownership of a WebResource. The placed
395
+ * verification token (meta tag / file / DNS record) MUST be removed first,
396
+ * otherwise Google may auto-re-verify and the call will fail. Other owners
397
+ * on the property are unaffected.
398
+ */
399
+ declare function unverifySite(client: GoogleSearchConsoleClient, id: string): Promise<void>;
378
400
  interface GscdumpApiOptions {
379
401
  /** API key (gsd_user_xxx or gsd_prod_xxx) */
380
402
  apiKey: string;
@@ -539,4 +561,29 @@ declare const INDEXING_EFFECTIVE_LIMIT = 1800;
539
561
  declare function hasGscReadScope(scopes: string | null | undefined): boolean;
540
562
  declare function hasGscWriteScope(scopes: string | null | undefined): boolean;
541
563
  declare function hasIndexingScope(scopes: string | null | undefined): boolean;
542
- export { ApiSite, ApiSitemap, ApiSitemapContent, Auth, AuthClient, AuthOptions, BackfillProgress, CallOptions, DAYS_PER_RANGE, DataRow, DimensionFilter, DimensionFilterGroup, GSC_FINALIZED_LAG_DAYS, GSC_FRESHEST_LAG_DAYS, GSC_QUOTAS, GSC_RETENTION_MONTHS, GoogleSearchConsoleClient, GoogleSearchConsoleClientOptions, GscError, GscErrorKind, GscdumpApiOptions, INDEXING_DAILY_LIMIT, INDEXING_EFFECTIVE_LIMIT, INDEXING_ISSUE_FILTERS, INDEXING_ISSUE_LABELS, INDEXING_ISSUE_SEVERITY, IndexStatusResult, IndexingIssueType, IndexingMetadata, IndexingNotificationType, IndexingResult, InspectUrlIndexResponse, InspectUrlResult, MS_PER_DAY, MobileUsabilityResult, Period, PublishUrlNotificationResponse, RequiredNonNullable, ResolvedAnalyticsRange, RichResultsResult, SearchAnalyticsQuery, SearchAnalyticsResponse, Site, SiteAnalytics, UrlInspectionResult, UrlNotificationMetadata, VerificationMethod, VerificationSite, VerificationSiteType, VerificationToken, VerificationWebResource, addDays, addSite, batchInspectUrls, batchRequestIndexing, classifyError, countDays, createAuth, createFetch, daysAgo, deleteSite, deleteSitemap, fetchSitemap, fetchSitemaps, fetchSites, fetchSitesWithSitemaps, formatErrorForCli, generateGscDateRange, getBackfillProgress, getDateRange, getFreshestGscDate, getIndexingMetadata, getLatestGscDate, getNextDate, getOldestGscDate, getPendingDates, getPreviousDate, getPstDate, getVerificationToken, googleSearchConsole, groupIntoRanges, gscdumpApi, hasGscReadScope, hasGscWriteScope, hasIndexingScope, inspectUrl, isPermissionDeniedError, isValidGscDate, listVerifiedSites, progressBar, requestIndexing, rowWithMetricDefaults, siteUrlToVerificationSite, storageError, submitSitemap, toIsoDate, verificationMethodsFor, verifySite };
564
+ interface DiscoverSitemapOptions {
565
+ /** User-Agent sent on the discovery requests. */
566
+ userAgent?: string;
567
+ /** AbortSignal threaded through fetches; defaults to a 10s timeout per call. */
568
+ signal?: AbortSignal;
569
+ }
570
+ /**
571
+ * Try to discover a sitemap for `domain` by checking robots.txt for a
572
+ * `Sitemap:` directive, then a small set of common paths. Returns the first
573
+ * URL that responds with a 2xx, or `null`.
574
+ */
575
+ declare function discoverSitemap(domain: string, options?: DiscoverSitemapOptions): Promise<string | null>;
576
+ interface FetchSitemapUrlsOptions extends DiscoverSitemapOptions {
577
+ /** Maximum nested sitemap-index depth to follow. Default 3. */
578
+ maxDepth?: number;
579
+ /** Stop after this many URLs (across all nested sitemaps). Default unlimited. */
580
+ limit?: number;
581
+ }
582
+ /**
583
+ * Fetch a sitemap (or sitemap index) and return the list of `<loc>` URLs.
584
+ * Sitemap-index files are followed up to `maxDepth` levels. Duplicates are
585
+ * de-duplicated. The XML parser is regex-based — it handles the common
586
+ * `<loc>https://...</loc>` shape but doesn't validate the schema.
587
+ */
588
+ declare function fetchSitemapUrls(sitemapUrl: string, options?: FetchSitemapUrlsOptions): Promise<string[]>;
589
+ export { ApiSite, ApiSitemap, ApiSitemapContent, Auth, AuthClient, AuthOptions, BackfillProgress, CallOptions, DAYS_PER_RANGE, DataRow, DimensionFilter, DimensionFilterGroup, DiscoverSitemapOptions, FetchSitemapUrlsOptions, GSC_FINALIZED_LAG_DAYS, GSC_FRESHEST_LAG_DAYS, GSC_QUOTAS, GSC_RETENTION_MONTHS, GoogleSearchConsoleClient, GoogleSearchConsoleClientOptions, GscError, GscErrorKind, GscdumpApiOptions, INDEXING_DAILY_LIMIT, INDEXING_EFFECTIVE_LIMIT, INDEXING_ISSUE_FILTERS, INDEXING_ISSUE_LABELS, INDEXING_ISSUE_SEVERITY, IndexStatusResult, IndexingIssueType, IndexingMetadata, IndexingNotificationType, IndexingResult, InspectUrlIndexResponse, InspectUrlResult, MS_PER_DAY, MobileUsabilityResult, Period, PublishUrlNotificationResponse, RequiredNonNullable, ResolvedAnalyticsRange, RichResultsResult, SearchAnalyticsQuery, SearchAnalyticsResponse, Site, SiteAnalytics, UrlInspectionResult, UrlNotificationMetadata, VerificationMethod, VerificationSite, VerificationSiteType, VerificationToken, VerificationWebResource, addDays, addSite, batchInspectUrls, batchRequestIndexing, classifyError, countDays, createAuth, createFetch, daysAgo, deleteSite, deleteSitemap, discoverSitemap, fetchSitemap, fetchSitemapUrls, fetchSitemaps, fetchSites, fetchSitesWithSitemaps, formatErrorForCli, generateGscDateRange, getBackfillProgress, getDateRange, getFreshestGscDate, getIndexingMetadata, getLatestGscDate, getNextDate, getOldestGscDate, getPendingDates, getPreviousDate, getPstDate, getVerificationToken, getVerifiedSite, googleSearchConsole, groupIntoRanges, gscdumpApi, hasGscReadScope, hasGscWriteScope, hasIndexingScope, inspectUrl, isPermissionDeniedError, isValidGscDate, listVerifiedSites, progressBar, requestIndexing, rowWithMetricDefaults, runSequentialBatch, siteUrlToVerificationSite, storageError, submitSitemap, toIsoDate, unverifySite, verificationMethodsFor, verifySite };
package/dist/index.mjs CHANGED
@@ -143,6 +143,12 @@ async function verifySite(client, siteUrl, method) {
143
143
  async function listVerifiedSites(client) {
144
144
  return client.verification.list();
145
145
  }
146
+ async function getVerifiedSite(client, id) {
147
+ return client.verification.get(id);
148
+ }
149
+ async function unverifySite(client, id) {
150
+ return client.verification.delete(id);
151
+ }
146
152
  const MS_PER_DAY = 864e5;
147
153
  function toIsoDate(d) {
148
154
  return d.toISOString().slice(0, 10);
@@ -923,4 +929,71 @@ function hasIndexingScope(scopes) {
923
929
  if (!scopes) return false;
924
930
  return scopes.includes("googleapis.com/auth/indexing");
925
931
  }
926
- export { DAYS_PER_RANGE, GSC_FINALIZED_LAG_DAYS, GSC_FRESHEST_LAG_DAYS, GSC_QUOTAS, GSC_RETENTION_MONTHS, INDEXING_DAILY_LIMIT, INDEXING_EFFECTIVE_LIMIT, INDEXING_ISSUE_FILTERS, INDEXING_ISSUE_LABELS, INDEXING_ISSUE_SEVERITY, MS_PER_DAY, addDays, addSite, batchInspectUrls, batchRequestIndexing, classifyError, countDays, createAuth, createFetch, daysAgo, deleteSite, deleteSitemap, fetchSitemap, fetchSitemaps, fetchSites, fetchSitesWithSitemaps, formatErrorForCli, generateGscDateRange, getBackfillProgress, getDateRange, getFreshestGscDate, getIndexingMetadata, getLatestGscDate, getNextDate, getOldestGscDate, getPendingDates, getPreviousDate, getPstDate, getVerificationToken, googleSearchConsole, groupIntoRanges, gscdumpApi, hasGscReadScope, hasGscWriteScope, hasIndexingScope, inspectUrl, isPermissionDeniedError, isValidGscDate, listVerifiedSites, progressBar, requestIndexing, rowWithMetricDefaults, siteUrlToVerificationSite, storageError, submitSitemap, toIsoDate, verificationMethodsFor, verifySite };
932
+ const FETCH_TIMEOUT_MS = 1e4;
933
+ const COMMON_PATHS = ["/sitemap.xml", "/sitemap_index.xml"];
934
+ const SITEMAP_DIRECTIVE_RE = /^Sitemap:\s*(\S+)/im;
935
+ async function discoverSitemap(domain, options = {}) {
936
+ const userAgent = options.userAgent ?? "gscdump sitemap fetcher";
937
+ const baseUrl = `https://${domain}`;
938
+ const signalFor = () => options.signal ?? AbortSignal.timeout(FETCH_TIMEOUT_MS);
939
+ const robotsRes = await fetch(`${baseUrl}/robots.txt`, {
940
+ headers: { "User-Agent": userAgent },
941
+ signal: signalFor()
942
+ }).catch(() => null);
943
+ if (robotsRes?.ok) {
944
+ const match = (await robotsRes.text()).match(SITEMAP_DIRECTIVE_RE);
945
+ if (match?.[1]) {
946
+ if ((await fetch(match[1], {
947
+ method: "HEAD",
948
+ signal: signalFor()
949
+ }).catch(() => null))?.ok) return match[1];
950
+ }
951
+ }
952
+ for (const path of COMMON_PATHS) {
953
+ const url = `${baseUrl}${path}`;
954
+ if ((await fetch(url, {
955
+ method: "HEAD",
956
+ headers: { "User-Agent": userAgent },
957
+ signal: signalFor()
958
+ }).catch(() => null))?.ok) return url;
959
+ }
960
+ return null;
961
+ }
962
+ const LOC_RE = /<loc>([^<]+)<\/loc>/gi;
963
+ const SITEMAPINDEX_RE = /<sitemapindex\b/i;
964
+ async function fetchSitemapUrls(sitemapUrl, options = {}) {
965
+ const userAgent = options.userAgent ?? "gscdump sitemap fetcher";
966
+ const maxDepth = options.maxDepth ?? 3;
967
+ const limit = options.limit;
968
+ const signalFor = () => options.signal ?? AbortSignal.timeout(FETCH_TIMEOUT_MS);
969
+ const seen = /* @__PURE__ */ new Set();
970
+ const out = [];
971
+ const visit = async (url, depth) => {
972
+ if (limit != null && out.length >= limit) return;
973
+ if (depth > maxDepth) return;
974
+ const res = await fetch(url, {
975
+ headers: { "User-Agent": userAgent },
976
+ signal: signalFor()
977
+ });
978
+ if (!res.ok) throw new Error(`Fetch ${url} failed: ${res.status}`);
979
+ const text = await res.text();
980
+ const isIndex = SITEMAPINDEX_RE.test(text);
981
+ const matches = [...text.matchAll(LOC_RE)].map((m) => m[1].trim()).filter(Boolean);
982
+ if (isIndex) {
983
+ for (const child of matches) {
984
+ if (limit != null && out.length >= limit) return;
985
+ await visit(child, depth + 1);
986
+ }
987
+ return;
988
+ }
989
+ for (const u of matches) {
990
+ if (seen.has(u)) continue;
991
+ seen.add(u);
992
+ out.push(u);
993
+ if (limit != null && out.length >= limit) return;
994
+ }
995
+ };
996
+ await visit(sitemapUrl, 0);
997
+ return out;
998
+ }
999
+ export { DAYS_PER_RANGE, GSC_FINALIZED_LAG_DAYS, GSC_FRESHEST_LAG_DAYS, GSC_QUOTAS, GSC_RETENTION_MONTHS, INDEXING_DAILY_LIMIT, INDEXING_EFFECTIVE_LIMIT, INDEXING_ISSUE_FILTERS, INDEXING_ISSUE_LABELS, INDEXING_ISSUE_SEVERITY, MS_PER_DAY, addDays, addSite, batchInspectUrls, batchRequestIndexing, classifyError, countDays, createAuth, createFetch, daysAgo, deleteSite, deleteSitemap, discoverSitemap, fetchSitemap, fetchSitemapUrls, fetchSitemaps, fetchSites, fetchSitesWithSitemaps, formatErrorForCli, generateGscDateRange, getBackfillProgress, getDateRange, getFreshestGscDate, getIndexingMetadata, getLatestGscDate, getNextDate, getOldestGscDate, getPendingDates, getPreviousDate, getPstDate, getVerificationToken, getVerifiedSite, googleSearchConsole, groupIntoRanges, gscdumpApi, hasGscReadScope, hasGscWriteScope, hasIndexingScope, inspectUrl, isPermissionDeniedError, isValidGscDate, listVerifiedSites, progressBar, requestIndexing, rowWithMetricDefaults, runSequentialBatch, siteUrlToVerificationSite, storageError, submitSitemap, toIsoDate, unverifySite, verificationMethodsFor, verifySite };
@@ -10,4 +10,17 @@ interface DiscoverSitemapOptions {
10
10
  * URL that responds with a 2xx, or `null`.
11
11
  */
12
12
  declare function discoverSitemap(domain: string, options?: DiscoverSitemapOptions): Promise<string | null>;
13
- export { DiscoverSitemapOptions, discoverSitemap };
13
+ interface FetchSitemapUrlsOptions extends DiscoverSitemapOptions {
14
+ /** Maximum nested sitemap-index depth to follow. Default 3. */
15
+ maxDepth?: number;
16
+ /** Stop after this many URLs (across all nested sitemaps). Default unlimited. */
17
+ limit?: number;
18
+ }
19
+ /**
20
+ * Fetch a sitemap (or sitemap index) and return the list of `<loc>` URLs.
21
+ * Sitemap-index files are followed up to `maxDepth` levels. Duplicates are
22
+ * de-duplicated. The XML parser is regex-based — it handles the common
23
+ * `<loc>https://...</loc>` shape but doesn't validate the schema.
24
+ */
25
+ declare function fetchSitemapUrls(sitemapUrl: string, options?: FetchSitemapUrlsOptions): Promise<string[]>;
26
+ export { DiscoverSitemapOptions, FetchSitemapUrlsOptions, discoverSitemap, fetchSitemapUrls };
package/dist/sitemap.mjs CHANGED
@@ -28,4 +28,41 @@ async function discoverSitemap(domain, options = {}) {
28
28
  }
29
29
  return null;
30
30
  }
31
- export { discoverSitemap };
31
+ const LOC_RE = /<loc>([^<]+)<\/loc>/gi;
32
+ const SITEMAPINDEX_RE = /<sitemapindex\b/i;
33
+ async function fetchSitemapUrls(sitemapUrl, options = {}) {
34
+ const userAgent = options.userAgent ?? "gscdump sitemap fetcher";
35
+ const maxDepth = options.maxDepth ?? 3;
36
+ const limit = options.limit;
37
+ const signalFor = () => options.signal ?? AbortSignal.timeout(FETCH_TIMEOUT_MS);
38
+ const seen = /* @__PURE__ */ new Set();
39
+ const out = [];
40
+ const visit = async (url, depth) => {
41
+ if (limit != null && out.length >= limit) return;
42
+ if (depth > maxDepth) return;
43
+ const res = await fetch(url, {
44
+ headers: { "User-Agent": userAgent },
45
+ signal: signalFor()
46
+ });
47
+ if (!res.ok) throw new Error(`Fetch ${url} failed: ${res.status}`);
48
+ const text = await res.text();
49
+ const isIndex = SITEMAPINDEX_RE.test(text);
50
+ const matches = [...text.matchAll(LOC_RE)].map((m) => m[1].trim()).filter(Boolean);
51
+ if (isIndex) {
52
+ for (const child of matches) {
53
+ if (limit != null && out.length >= limit) return;
54
+ await visit(child, depth + 1);
55
+ }
56
+ return;
57
+ }
58
+ for (const u of matches) {
59
+ if (seen.has(u)) continue;
60
+ seen.add(u);
61
+ out.push(u);
62
+ if (limit != null && out.length >= limit) return;
63
+ }
64
+ };
65
+ await visit(sitemapUrl, 0);
66
+ return out;
67
+ }
68
+ export { discoverSitemap, fetchSitemapUrls };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "gscdump",
3
3
  "type": "module",
4
- "version": "0.8.1",
4
+ "version": "0.8.2",
5
5
  "description": "Google Search Console API wrapper with typed query builder, streaming pagination, and SEO analysis functions",
6
6
  "author": {
7
7
  "name": "Harlan Wilton",