gscdump 0.8.0 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -1,6 +1,17 @@
1
1
  import { $Fetch, FetchOptions } from "ofetch";
2
2
  import { indexing_v3 } from "@googleapis/indexing/build/v3";
3
3
  import { searchconsole_v1 } from "@googleapis/searchconsole/build/v1";
4
+ /**
5
+ * Batch runner with optional concurrency, inter-call delay, and progress.
6
+ * Used by batchRequestIndexing / batchInspectUrls. Defaults to sequential
7
+ * (concurrency = 1) because the underlying APIs rate-limit aggressively;
8
+ * callers that know their quota headroom can opt into parallelism.
9
+ */
10
+ declare function runSequentialBatch<I, R>(items: I[], operation: (item: I, index: number) => Promise<R>, options?: {
11
+ delayMs?: number;
12
+ concurrency?: number;
13
+ onProgress?: (result: R, index: number, total: number) => void;
14
+ }): Promise<R[]>;
4
15
  type ApiSite = searchconsole_v1.Schema$WmxSite;
5
16
  type ApiSitemap = searchconsole_v1.Schema$WmxSitemap;
6
17
  type ApiSitemapContent = searchconsole_v1.Schema$WmxSitemapContent;
@@ -162,6 +173,21 @@ interface GSCQueryBuilder<D extends Dimension[] = [], C = object> {
162
173
  toBody: () => SearchAnalyticsQuery;
163
174
  getState: () => BuilderState;
164
175
  }
176
+ type VerificationMethod = 'META' | 'FILE' | 'DNS_TXT' | 'DNS_CNAME' | 'ANALYTICS' | 'TAG_MANAGER';
177
+ type VerificationSiteType = 'SITE' | 'INET_DOMAIN' | 'ANDROID_APP';
178
+ interface VerificationSite {
179
+ type: VerificationSiteType;
180
+ identifier: string;
181
+ }
182
+ interface VerificationToken {
183
+ method: string;
184
+ token: string;
185
+ }
186
+ interface VerificationWebResource {
187
+ id?: string;
188
+ site: VerificationSite;
189
+ owners?: string[];
190
+ }
165
191
  /**
166
192
  * Compatible interface with OAuth2Client from google-auth-library
167
193
  */
@@ -193,8 +219,30 @@ interface CallOptions {
193
219
  interface GoogleSearchConsoleClient {
194
220
  /** Query search analytics with builder, returns async generator yielding typed row batches */
195
221
  query: <D extends Dimension[], C>(siteUrl: string, builder: GSCQueryBuilder<D, C>, opts?: CallOptions) => AsyncGenerator<GSCRow<D, C>[]>;
196
- /** List all sites */
197
- sites: (opts?: CallOptions) => Promise<ApiSite[]>;
222
+ /**
223
+ * List all sites. Also exposes write ops as `client.sites.add(siteUrl)` and
224
+ * `client.sites.delete(siteUrl)`. Calling `client.sites()` is equivalent to
225
+ * `client.sites.list()`.
226
+ */
227
+ sites: ((opts?: CallOptions) => Promise<ApiSite[]>) & {
228
+ list: (opts?: CallOptions) => Promise<ApiSite[]>; /** Add a property in unverified state. Caller must verify ownership separately. */
229
+ add: (siteUrl: string, opts?: CallOptions) => Promise<void>; /** Remove a property from the user's account. */
230
+ delete: (siteUrl: string, opts?: CallOptions) => Promise<void>;
231
+ };
232
+ /** Site Verification API (siteverification.googleapis.com). Required to flip a property from unverified to verified. */
233
+ verification: {
234
+ /** Returns the token to place on the site/DNS, plus the resolved method. */getToken: (params: {
235
+ site: VerificationSite;
236
+ verificationMethod: VerificationMethod;
237
+ }, opts?: CallOptions) => Promise<VerificationToken>; /** Triggers Google to fetch + validate; returns the verified WebResource. */
238
+ insert: (params: {
239
+ site: VerificationSite;
240
+ verificationMethod: VerificationMethod;
241
+ }, opts?: CallOptions) => Promise<VerificationWebResource>;
242
+ list: (opts?: CallOptions) => Promise<VerificationWebResource[]>;
243
+ get: (id: string, opts?: CallOptions) => Promise<VerificationWebResource>;
244
+ delete: (id: string, opts?: CallOptions) => Promise<void>;
245
+ };
198
246
  /** Inspect a URL */
199
247
  inspect: (siteUrl: string, url: string, opts?: CallOptions) => Promise<InspectUrlIndexResponse>;
200
248
  /** Sitemap operations */
@@ -251,6 +299,7 @@ declare function getIndexingMetadata(client: GoogleSearchConsoleClient, url: str
251
299
  declare function batchRequestIndexing(client: GoogleSearchConsoleClient, urls: string[], options?: {
252
300
  type?: IndexingNotificationType;
253
301
  delayMs?: number;
302
+ concurrency?: number;
254
303
  onProgress?: (result: IndexingResult, index: number, total: number) => void;
255
304
  }): Promise<IndexingResult[]>;
256
305
  interface InspectUrlResult {
@@ -271,6 +320,7 @@ declare function inspectUrl(client: GoogleSearchConsoleClient, siteUrl: string,
271
320
  */
272
321
  declare function batchInspectUrls(client: GoogleSearchConsoleClient, siteUrl: string, urls: string[], options?: {
273
322
  delayMs?: number;
323
+ concurrency?: number;
274
324
  onProgress?: (result: InspectUrlResult, index: number, total: number) => void;
275
325
  }): Promise<InspectUrlResult[]>;
276
326
  /**
@@ -299,6 +349,54 @@ declare function submitSitemap(client: GoogleSearchConsoleClient, siteUrl: strin
299
349
  * Deletes a sitemap from Google Search Console.
300
350
  */
301
351
  declare function deleteSitemap(client: GoogleSearchConsoleClient, siteUrl: string, feedpath: string): Promise<void>;
352
+ /**
353
+ * Add a property to the user's Search Console account.
354
+ *
355
+ * Note: this only registers the property in an unverified state. Ownership
356
+ * must be proven via the Site Verification API (see `verifySite`) before any
357
+ * data is accessible.
358
+ */
359
+ declare function addSite(client: GoogleSearchConsoleClient, siteUrl: string): Promise<void>;
360
+ /**
361
+ * Remove a property from the user's Search Console account.
362
+ */
363
+ declare function deleteSite(client: GoogleSearchConsoleClient, siteUrl: string): Promise<void>;
364
+ /**
365
+ * Resolve a Search Console site URL (`https://example.com/` or
366
+ * `sc-domain:example.com`) to the Site Verification API's site shape.
367
+ */
368
+ declare function siteUrlToVerificationSite(siteUrl: string): VerificationSite;
369
+ /**
370
+ * Methods valid for a given site shape. SITE properties can use META/FILE/
371
+ * ANALYTICS/TAG_MANAGER; INET_DOMAIN must use DNS_TXT or DNS_CNAME.
372
+ */
373
+ declare function verificationMethodsFor(site: VerificationSite): VerificationMethod[];
374
+ /**
375
+ * Get the verification token Google expects to find on the site or DNS.
376
+ */
377
+ declare function getVerificationToken(client: GoogleSearchConsoleClient, siteUrl: string, method: VerificationMethod): Promise<VerificationToken & {
378
+ site: VerificationSite;
379
+ }>;
380
+ /**
381
+ * Trigger Google to validate the placed token. Caller is responsible for
382
+ * having placed the token (HTML tag / file / DNS record) before calling.
383
+ */
384
+ declare function verifySite(client: GoogleSearchConsoleClient, siteUrl: string, method: VerificationMethod): Promise<VerificationWebResource>;
385
+ /**
386
+ * List all verified WebResources for the authed user.
387
+ */
388
+ declare function listVerifiedSites(client: GoogleSearchConsoleClient): Promise<VerificationWebResource[]>;
389
+ /**
390
+ * Fetch a single verified WebResource by id.
391
+ */
392
+ declare function getVerifiedSite(client: GoogleSearchConsoleClient, id: string): Promise<VerificationWebResource>;
393
+ /**
394
+ * Drop the calling user's verified ownership of a WebResource. The placed
395
+ * verification token (meta tag / file / DNS record) MUST be removed first,
396
+ * otherwise Google may auto-re-verify and the call will fail. Other owners
397
+ * on the property are unaffected.
398
+ */
399
+ declare function unverifySite(client: GoogleSearchConsoleClient, id: string): Promise<void>;
302
400
  interface GscdumpApiOptions {
303
401
  /** API key (gsd_user_xxx or gsd_prod_xxx) */
304
402
  apiKey: string;
@@ -463,4 +561,29 @@ declare const INDEXING_EFFECTIVE_LIMIT = 1800;
463
561
  declare function hasGscReadScope(scopes: string | null | undefined): boolean;
464
562
  declare function hasGscWriteScope(scopes: string | null | undefined): boolean;
465
563
  declare function hasIndexingScope(scopes: string | null | undefined): boolean;
466
- export { ApiSite, ApiSitemap, ApiSitemapContent, Auth, AuthClient, AuthOptions, BackfillProgress, CallOptions, DAYS_PER_RANGE, DataRow, DimensionFilter, DimensionFilterGroup, GSC_FINALIZED_LAG_DAYS, GSC_FRESHEST_LAG_DAYS, GSC_QUOTAS, GSC_RETENTION_MONTHS, GoogleSearchConsoleClient, GoogleSearchConsoleClientOptions, GscError, GscErrorKind, GscdumpApiOptions, INDEXING_DAILY_LIMIT, INDEXING_EFFECTIVE_LIMIT, INDEXING_ISSUE_FILTERS, INDEXING_ISSUE_LABELS, INDEXING_ISSUE_SEVERITY, IndexStatusResult, IndexingIssueType, IndexingMetadata, IndexingNotificationType, IndexingResult, InspectUrlIndexResponse, InspectUrlResult, MS_PER_DAY, MobileUsabilityResult, Period, PublishUrlNotificationResponse, RequiredNonNullable, ResolvedAnalyticsRange, RichResultsResult, SearchAnalyticsQuery, SearchAnalyticsResponse, Site, SiteAnalytics, UrlInspectionResult, UrlNotificationMetadata, addDays, batchInspectUrls, batchRequestIndexing, classifyError, countDays, createAuth, createFetch, daysAgo, deleteSitemap, fetchSitemap, fetchSitemaps, fetchSites, fetchSitesWithSitemaps, formatErrorForCli, generateGscDateRange, getBackfillProgress, getDateRange, getFreshestGscDate, getIndexingMetadata, getLatestGscDate, getNextDate, getOldestGscDate, getPendingDates, getPreviousDate, getPstDate, googleSearchConsole, groupIntoRanges, gscdumpApi, hasGscReadScope, hasGscWriteScope, hasIndexingScope, inspectUrl, isPermissionDeniedError, isValidGscDate, progressBar, requestIndexing, rowWithMetricDefaults, storageError, submitSitemap, toIsoDate };
564
+ interface DiscoverSitemapOptions {
565
+ /** User-Agent sent on the discovery requests. */
566
+ userAgent?: string;
567
+ /** AbortSignal threaded through fetches; defaults to a 10s timeout per call. */
568
+ signal?: AbortSignal;
569
+ }
570
+ /**
571
+ * Try to discover a sitemap for `domain` by checking robots.txt for a
572
+ * `Sitemap:` directive, then a small set of common paths. Returns the first
573
+ * URL that responds with a 2xx, or `null`.
574
+ */
575
+ declare function discoverSitemap(domain: string, options?: DiscoverSitemapOptions): Promise<string | null>;
576
+ interface FetchSitemapUrlsOptions extends DiscoverSitemapOptions {
577
+ /** Maximum nested sitemap-index depth to follow. Default 3. */
578
+ maxDepth?: number;
579
+ /** Stop after this many URLs (across all nested sitemaps). Default unlimited. */
580
+ limit?: number;
581
+ }
582
+ /**
583
+ * Fetch a sitemap (or sitemap index) and return the list of `<loc>` URLs.
584
+ * Sitemap-index files are followed up to `maxDepth` levels. Duplicates are
585
+ * de-duplicated. The XML parser is regex-based — it handles the common
586
+ * `<loc>https://...</loc>` shape but doesn't validate the schema.
587
+ */
588
+ declare function fetchSitemapUrls(sitemapUrl: string, options?: FetchSitemapUrlsOptions): Promise<string[]>;
589
+ export { ApiSite, ApiSitemap, ApiSitemapContent, Auth, AuthClient, AuthOptions, BackfillProgress, CallOptions, DAYS_PER_RANGE, DataRow, DimensionFilter, DimensionFilterGroup, DiscoverSitemapOptions, FetchSitemapUrlsOptions, GSC_FINALIZED_LAG_DAYS, GSC_FRESHEST_LAG_DAYS, GSC_QUOTAS, GSC_RETENTION_MONTHS, GoogleSearchConsoleClient, GoogleSearchConsoleClientOptions, GscError, GscErrorKind, GscdumpApiOptions, INDEXING_DAILY_LIMIT, INDEXING_EFFECTIVE_LIMIT, INDEXING_ISSUE_FILTERS, INDEXING_ISSUE_LABELS, INDEXING_ISSUE_SEVERITY, IndexStatusResult, IndexingIssueType, IndexingMetadata, IndexingNotificationType, IndexingResult, InspectUrlIndexResponse, InspectUrlResult, MS_PER_DAY, MobileUsabilityResult, Period, PublishUrlNotificationResponse, RequiredNonNullable, ResolvedAnalyticsRange, RichResultsResult, SearchAnalyticsQuery, SearchAnalyticsResponse, Site, SiteAnalytics, UrlInspectionResult, UrlNotificationMetadata, VerificationMethod, VerificationSite, VerificationSiteType, VerificationToken, VerificationWebResource, addDays, addSite, batchInspectUrls, batchRequestIndexing, classifyError, countDays, createAuth, createFetch, daysAgo, deleteSite, deleteSitemap, discoverSitemap, fetchSitemap, fetchSitemapUrls, fetchSitemaps, fetchSites, fetchSitesWithSitemaps, formatErrorForCli, generateGscDateRange, getBackfillProgress, getDateRange, getFreshestGscDate, getIndexingMetadata, getLatestGscDate, getNextDate, getOldestGscDate, getPendingDates, getPreviousDate, getPstDate, getVerificationToken, getVerifiedSite, googleSearchConsole, groupIntoRanges, gscdumpApi, hasGscReadScope, hasGscWriteScope, hasIndexingScope, inspectUrl, isPermissionDeniedError, isValidGscDate, listVerifiedSites, progressBar, requestIndexing, rowWithMetricDefaults, runSequentialBatch, siteUrlToVerificationSite, storageError, submitSitemap, toIsoDate, unverifySite, verificationMethodsFor, verifySite };
package/dist/index.mjs CHANGED
@@ -1,13 +1,30 @@
1
1
  import { ofetch } from "ofetch";
2
2
  async function runSequentialBatch(items, operation, options = {}) {
3
- const { delayMs = 0, onProgress } = options;
4
- const results = [];
5
- for (let i = 0; i < items.length; i++) {
6
- const result = await operation(items[i], i);
7
- results.push(result);
8
- onProgress?.(result, i, items.length);
9
- if (i < items.length - 1 && delayMs > 0) await new Promise((r) => setTimeout(r, delayMs));
3
+ const { delayMs = 0, concurrency = 1, onProgress } = options;
4
+ const results = Array.from({ length: items.length });
5
+ let completed = 0;
6
+ if (concurrency <= 1) {
7
+ for (let i = 0; i < items.length; i++) {
8
+ const result = await operation(items[i], i);
9
+ results[i] = result;
10
+ onProgress?.(result, i, items.length);
11
+ if (i < items.length - 1 && delayMs > 0) await new Promise((r) => setTimeout(r, delayMs));
12
+ }
13
+ return results;
10
14
  }
15
+ const cursor = { i: 0 };
16
+ const worker = async () => {
17
+ while (true) {
18
+ const i = cursor.i++;
19
+ if (i >= items.length) return;
20
+ const result = await operation(items[i], i);
21
+ results[i] = result;
22
+ completed++;
23
+ onProgress?.(result, completed - 1, items.length);
24
+ if (delayMs > 0) await new Promise((r) => setTimeout(r, delayMs));
25
+ }
26
+ };
27
+ await Promise.all(Array.from({ length: Math.min(concurrency, items.length) }, worker));
11
28
  return results;
12
29
  }
13
30
  async function requestIndexing(client, url, options = {}) {
@@ -26,9 +43,10 @@ async function getIndexingMetadata(client, url) {
26
43
  }));
27
44
  }
28
45
  async function batchRequestIndexing(client, urls, options = {}) {
29
- const { type = "URL_UPDATED", delayMs = 100, onProgress } = options;
46
+ const { type = "URL_UPDATED", delayMs = 100, concurrency, onProgress } = options;
30
47
  return runSequentialBatch(urls, (url) => requestIndexing(client, url, { type }), {
31
48
  delayMs,
49
+ concurrency,
32
50
  onProgress
33
51
  });
34
52
  }
@@ -40,7 +58,7 @@ async function inspectUrl(client, siteUrl, inspectionUrl) {
40
58
  };
41
59
  }
42
60
  async function batchInspectUrls(client, siteUrl, urls, options = {}) {
43
- const { delayMs = 200, onProgress } = options;
61
+ const { delayMs = 200, concurrency, onProgress } = options;
44
62
  return runSequentialBatch(urls, async (url) => {
45
63
  const { inspection, isIndexed } = await inspectUrl(client, siteUrl, url);
46
64
  return {
@@ -50,6 +68,7 @@ async function batchInspectUrls(client, siteUrl, urls, options = {}) {
50
68
  };
51
69
  }, {
52
70
  delayMs,
71
+ concurrency,
53
72
  onProgress
54
73
  });
55
74
  }
@@ -78,6 +97,58 @@ async function submitSitemap(client, siteUrl, feedpath) {
78
97
  async function deleteSitemap(client, siteUrl, feedpath) {
79
98
  return client.sitemaps.delete(siteUrl, feedpath);
80
99
  }
100
+ async function addSite(client, siteUrl) {
101
+ return client.sites.add(siteUrl);
102
+ }
103
+ async function deleteSite(client, siteUrl) {
104
+ return client.sites.delete(siteUrl);
105
+ }
106
+ const SC_DOMAIN_PREFIX = "sc-domain:";
107
+ function siteUrlToVerificationSite(siteUrl) {
108
+ if (siteUrl.startsWith(SC_DOMAIN_PREFIX)) return {
109
+ type: "INET_DOMAIN",
110
+ identifier: siteUrl.slice(10)
111
+ };
112
+ return {
113
+ type: "SITE",
114
+ identifier: siteUrl
115
+ };
116
+ }
117
+ function verificationMethodsFor(site) {
118
+ if (site.type === "INET_DOMAIN") return ["DNS_TXT", "DNS_CNAME"];
119
+ return [
120
+ "META",
121
+ "FILE",
122
+ "ANALYTICS",
123
+ "TAG_MANAGER"
124
+ ];
125
+ }
126
+ async function getVerificationToken(client, siteUrl, method) {
127
+ const site = siteUrlToVerificationSite(siteUrl);
128
+ return {
129
+ ...await client.verification.getToken({
130
+ site,
131
+ verificationMethod: method
132
+ }),
133
+ site
134
+ };
135
+ }
136
+ async function verifySite(client, siteUrl, method) {
137
+ const site = siteUrlToVerificationSite(siteUrl);
138
+ return client.verification.insert({
139
+ site,
140
+ verificationMethod: method
141
+ });
142
+ }
143
+ async function listVerifiedSites(client) {
144
+ return client.verification.list();
145
+ }
146
+ async function getVerifiedSite(client, id) {
147
+ return client.verification.get(id);
148
+ }
149
+ async function unverifySite(client, id) {
150
+ return client.verification.delete(id);
151
+ }
81
152
  const MS_PER_DAY = 864e5;
82
153
  function toIsoDate(d) {
83
154
  return d.toISOString().slice(0, 10);
@@ -377,11 +448,38 @@ function gscdumpApi(options) {
377
448
  startRow += rows.length;
378
449
  }
379
450
  },
380
- sites: async (opts) => {
381
- return (await fetch("/api/sites", { signal: opts?.signal })).sites.map((s) => ({
382
- siteUrl: s.gscSiteUrl,
383
- permissionLevel: s.permissionLevel || "siteOwner"
384
- }));
451
+ sites: (() => {
452
+ const list = async (opts) => {
453
+ return (await fetch("/api/sites", { signal: opts?.signal })).sites.map((s) => ({
454
+ siteUrl: s.gscSiteUrl,
455
+ permissionLevel: s.permissionLevel || "siteOwner"
456
+ }));
457
+ };
458
+ const unsupported = (op) => () => {
459
+ throw new Error(`sites.${op} not available via gscdump API. Use googleSearchConsole() with OAuth credentials.`);
460
+ };
461
+ return Object.assign(list, {
462
+ list,
463
+ add: unsupported("add"),
464
+ delete: unsupported("delete")
465
+ });
466
+ })(),
467
+ verification: {
468
+ getToken: () => {
469
+ throw new Error("Site Verification API not available via gscdump API. Use googleSearchConsole() with OAuth credentials.");
470
+ },
471
+ insert: () => {
472
+ throw new Error("Site Verification API not available via gscdump API. Use googleSearchConsole() with OAuth credentials.");
473
+ },
474
+ list: () => {
475
+ throw new Error("Site Verification API not available via gscdump API. Use googleSearchConsole() with OAuth credentials.");
476
+ },
477
+ get: () => {
478
+ throw new Error("Site Verification API not available via gscdump API. Use googleSearchConsole() with OAuth credentials.");
479
+ },
480
+ delete: () => {
481
+ throw new Error("Site Verification API not available via gscdump API. Use googleSearchConsole() with OAuth credentials.");
482
+ }
385
483
  },
386
484
  inspect: () => {
387
485
  throw new Error("URL inspection not available via gscdump API. Use googleSearchConsole() with OAuth credentials.");
@@ -413,6 +511,7 @@ function gscdumpApi(options) {
413
511
  }
414
512
  const GSC_API = "https://searchconsole.googleapis.com";
415
513
  const INDEXING_API = "https://indexing.googleapis.com";
514
+ const SITE_VERIFICATION_API = "https://www.googleapis.com/siteVerification/v1";
416
515
  function createAuth(options) {
417
516
  let credentials = { refresh_token: options.refreshToken };
418
517
  return {
@@ -529,8 +628,42 @@ function googleSearchConsole(auth, options = {}) {
529
628
  startRow += rows.length;
530
629
  }
531
630
  },
532
- sites: async (opts) => {
533
- return (await fetch(`${GSC_API}/webmasters/v3/sites`, { signal: opts?.signal })).siteEntry || [];
631
+ sites: (() => {
632
+ const list = async (opts) => {
633
+ return (await fetch(`${GSC_API}/webmasters/v3/sites`, { signal: opts?.signal })).siteEntry || [];
634
+ };
635
+ return Object.assign(list, {
636
+ list,
637
+ add: (siteUrl, opts) => fetch(`${GSC_API}/webmasters/v3/sites/${encodeURIComponent(siteUrl)}`, {
638
+ method: "PUT",
639
+ signal: opts?.signal
640
+ }),
641
+ delete: (siteUrl, opts) => fetch(`${GSC_API}/webmasters/v3/sites/${encodeURIComponent(siteUrl)}`, {
642
+ method: "DELETE",
643
+ signal: opts?.signal
644
+ })
645
+ });
646
+ })(),
647
+ verification: {
648
+ getToken: (params, opts) => fetch(`${SITE_VERIFICATION_API}/token`, {
649
+ method: "POST",
650
+ body: params,
651
+ signal: opts?.signal
652
+ }),
653
+ insert: (params, opts) => fetch(`${SITE_VERIFICATION_API}/webResource`, {
654
+ method: "POST",
655
+ query: { verificationMethod: params.verificationMethod },
656
+ body: { site: params.site },
657
+ signal: opts?.signal
658
+ }),
659
+ list: async (opts) => {
660
+ return (await fetch(`${SITE_VERIFICATION_API}/webResource`, { signal: opts?.signal })).items || [];
661
+ },
662
+ get: (id, opts) => fetch(`${SITE_VERIFICATION_API}/webResource/${encodeURIComponent(id)}`, { signal: opts?.signal }),
663
+ delete: (id, opts) => fetch(`${SITE_VERIFICATION_API}/webResource/${encodeURIComponent(id)}`, {
664
+ method: "DELETE",
665
+ signal: opts?.signal
666
+ })
534
667
  },
535
668
  inspect: (siteUrl, url, opts) => fetch(`${GSC_API}/v1/urlInspection/index:inspect`, {
536
669
  method: "POST",
@@ -796,4 +929,71 @@ function hasIndexingScope(scopes) {
796
929
  if (!scopes) return false;
797
930
  return scopes.includes("googleapis.com/auth/indexing");
798
931
  }
799
- export { DAYS_PER_RANGE, GSC_FINALIZED_LAG_DAYS, GSC_FRESHEST_LAG_DAYS, GSC_QUOTAS, GSC_RETENTION_MONTHS, INDEXING_DAILY_LIMIT, INDEXING_EFFECTIVE_LIMIT, INDEXING_ISSUE_FILTERS, INDEXING_ISSUE_LABELS, INDEXING_ISSUE_SEVERITY, MS_PER_DAY, addDays, batchInspectUrls, batchRequestIndexing, classifyError, countDays, createAuth, createFetch, daysAgo, deleteSitemap, fetchSitemap, fetchSitemaps, fetchSites, fetchSitesWithSitemaps, formatErrorForCli, generateGscDateRange, getBackfillProgress, getDateRange, getFreshestGscDate, getIndexingMetadata, getLatestGscDate, getNextDate, getOldestGscDate, getPendingDates, getPreviousDate, getPstDate, googleSearchConsole, groupIntoRanges, gscdumpApi, hasGscReadScope, hasGscWriteScope, hasIndexingScope, inspectUrl, isPermissionDeniedError, isValidGscDate, progressBar, requestIndexing, rowWithMetricDefaults, storageError, submitSitemap, toIsoDate };
932
+ const FETCH_TIMEOUT_MS = 1e4;
933
+ const COMMON_PATHS = ["/sitemap.xml", "/sitemap_index.xml"];
934
+ const SITEMAP_DIRECTIVE_RE = /^Sitemap:\s*(\S+)/im;
935
+ async function discoverSitemap(domain, options = {}) {
936
+ const userAgent = options.userAgent ?? "gscdump sitemap fetcher";
937
+ const baseUrl = `https://${domain}`;
938
+ const signalFor = () => options.signal ?? AbortSignal.timeout(FETCH_TIMEOUT_MS);
939
+ const robotsRes = await fetch(`${baseUrl}/robots.txt`, {
940
+ headers: { "User-Agent": userAgent },
941
+ signal: signalFor()
942
+ }).catch(() => null);
943
+ if (robotsRes?.ok) {
944
+ const match = (await robotsRes.text()).match(SITEMAP_DIRECTIVE_RE);
945
+ if (match?.[1]) {
946
+ if ((await fetch(match[1], {
947
+ method: "HEAD",
948
+ signal: signalFor()
949
+ }).catch(() => null))?.ok) return match[1];
950
+ }
951
+ }
952
+ for (const path of COMMON_PATHS) {
953
+ const url = `${baseUrl}${path}`;
954
+ if ((await fetch(url, {
955
+ method: "HEAD",
956
+ headers: { "User-Agent": userAgent },
957
+ signal: signalFor()
958
+ }).catch(() => null))?.ok) return url;
959
+ }
960
+ return null;
961
+ }
962
+ const LOC_RE = /<loc>([^<]+)<\/loc>/gi;
963
+ const SITEMAPINDEX_RE = /<sitemapindex\b/i;
964
+ async function fetchSitemapUrls(sitemapUrl, options = {}) {
965
+ const userAgent = options.userAgent ?? "gscdump sitemap fetcher";
966
+ const maxDepth = options.maxDepth ?? 3;
967
+ const limit = options.limit;
968
+ const signalFor = () => options.signal ?? AbortSignal.timeout(FETCH_TIMEOUT_MS);
969
+ const seen = /* @__PURE__ */ new Set();
970
+ const out = [];
971
+ const visit = async (url, depth) => {
972
+ if (limit != null && out.length >= limit) return;
973
+ if (depth > maxDepth) return;
974
+ const res = await fetch(url, {
975
+ headers: { "User-Agent": userAgent },
976
+ signal: signalFor()
977
+ });
978
+ if (!res.ok) throw new Error(`Fetch ${url} failed: ${res.status}`);
979
+ const text = await res.text();
980
+ const isIndex = SITEMAPINDEX_RE.test(text);
981
+ const matches = [...text.matchAll(LOC_RE)].map((m) => m[1].trim()).filter(Boolean);
982
+ if (isIndex) {
983
+ for (const child of matches) {
984
+ if (limit != null && out.length >= limit) return;
985
+ await visit(child, depth + 1);
986
+ }
987
+ return;
988
+ }
989
+ for (const u of matches) {
990
+ if (seen.has(u)) continue;
991
+ seen.add(u);
992
+ out.push(u);
993
+ if (limit != null && out.length >= limit) return;
994
+ }
995
+ };
996
+ await visit(sitemapUrl, 0);
997
+ return out;
998
+ }
999
+ export { DAYS_PER_RANGE, GSC_FINALIZED_LAG_DAYS, GSC_FRESHEST_LAG_DAYS, GSC_QUOTAS, GSC_RETENTION_MONTHS, INDEXING_DAILY_LIMIT, INDEXING_EFFECTIVE_LIMIT, INDEXING_ISSUE_FILTERS, INDEXING_ISSUE_LABELS, INDEXING_ISSUE_SEVERITY, MS_PER_DAY, addDays, addSite, batchInspectUrls, batchRequestIndexing, classifyError, countDays, createAuth, createFetch, daysAgo, deleteSite, deleteSitemap, discoverSitemap, fetchSitemap, fetchSitemapUrls, fetchSitemaps, fetchSites, fetchSitesWithSitemaps, formatErrorForCli, generateGscDateRange, getBackfillProgress, getDateRange, getFreshestGscDate, getIndexingMetadata, getLatestGscDate, getNextDate, getOldestGscDate, getPendingDates, getPreviousDate, getPstDate, getVerificationToken, getVerifiedSite, googleSearchConsole, groupIntoRanges, gscdumpApi, hasGscReadScope, hasGscWriteScope, hasIndexingScope, inspectUrl, isPermissionDeniedError, isValidGscDate, listVerifiedSites, progressBar, requestIndexing, rowWithMetricDefaults, runSequentialBatch, siteUrlToVerificationSite, storageError, submitSitemap, toIsoDate, unverifySite, verificationMethodsFor, verifySite };
@@ -10,4 +10,17 @@ interface DiscoverSitemapOptions {
10
10
  * URL that responds with a 2xx, or `null`.
11
11
  */
12
12
  declare function discoverSitemap(domain: string, options?: DiscoverSitemapOptions): Promise<string | null>;
13
- export { DiscoverSitemapOptions, discoverSitemap };
13
+ interface FetchSitemapUrlsOptions extends DiscoverSitemapOptions {
14
+ /** Maximum nested sitemap-index depth to follow. Default 3. */
15
+ maxDepth?: number;
16
+ /** Stop after this many URLs (across all nested sitemaps). Default unlimited. */
17
+ limit?: number;
18
+ }
19
+ /**
20
+ * Fetch a sitemap (or sitemap index) and return the list of `<loc>` URLs.
21
+ * Sitemap-index files are followed up to `maxDepth` levels. Duplicates are
22
+ * de-duplicated. The XML parser is regex-based — it handles the common
23
+ * `<loc>https://...</loc>` shape but doesn't validate the schema.
24
+ */
25
+ declare function fetchSitemapUrls(sitemapUrl: string, options?: FetchSitemapUrlsOptions): Promise<string[]>;
26
+ export { DiscoverSitemapOptions, FetchSitemapUrlsOptions, discoverSitemap, fetchSitemapUrls };
package/dist/sitemap.mjs CHANGED
@@ -28,4 +28,41 @@ async function discoverSitemap(domain, options = {}) {
28
28
  }
29
29
  return null;
30
30
  }
31
- export { discoverSitemap };
31
+ const LOC_RE = /<loc>([^<]+)<\/loc>/gi;
32
+ const SITEMAPINDEX_RE = /<sitemapindex\b/i;
33
+ async function fetchSitemapUrls(sitemapUrl, options = {}) {
34
+ const userAgent = options.userAgent ?? "gscdump sitemap fetcher";
35
+ const maxDepth = options.maxDepth ?? 3;
36
+ const limit = options.limit;
37
+ const signalFor = () => options.signal ?? AbortSignal.timeout(FETCH_TIMEOUT_MS);
38
+ const seen = /* @__PURE__ */ new Set();
39
+ const out = [];
40
+ const visit = async (url, depth) => {
41
+ if (limit != null && out.length >= limit) return;
42
+ if (depth > maxDepth) return;
43
+ const res = await fetch(url, {
44
+ headers: { "User-Agent": userAgent },
45
+ signal: signalFor()
46
+ });
47
+ if (!res.ok) throw new Error(`Fetch ${url} failed: ${res.status}`);
48
+ const text = await res.text();
49
+ const isIndex = SITEMAPINDEX_RE.test(text);
50
+ const matches = [...text.matchAll(LOC_RE)].map((m) => m[1].trim()).filter(Boolean);
51
+ if (isIndex) {
52
+ for (const child of matches) {
53
+ if (limit != null && out.length >= limit) return;
54
+ await visit(child, depth + 1);
55
+ }
56
+ return;
57
+ }
58
+ for (const u of matches) {
59
+ if (seen.has(u)) continue;
60
+ seen.add(u);
61
+ out.push(u);
62
+ if (limit != null && out.length >= limit) return;
63
+ }
64
+ };
65
+ await visit(sitemapUrl, 0);
66
+ return out;
67
+ }
68
+ export { discoverSitemap, fetchSitemapUrls };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "gscdump",
3
3
  "type": "module",
4
- "version": "0.8.0",
4
+ "version": "0.8.2",
5
5
  "description": "Google Search Console API wrapper with typed query builder, streaming pagination, and SEO analysis functions",
6
6
  "author": {
7
7
  "name": "Harlan Wilton",
@@ -102,7 +102,7 @@
102
102
  "dayjs": "^1.11.20",
103
103
  "defu": "^6.1.7",
104
104
  "ofetch": "^1.5.1",
105
- "ufo": "^1.6.3"
105
+ "ufo": "^1.6.4"
106
106
  },
107
107
  "devDependencies": {
108
108
  "@googleapis/indexing": "^6.0.1",