gscdump 0.8.0 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +126 -3
- package/dist/index.mjs +217 -17
- package/dist/sitemap.d.mts +14 -1
- package/dist/sitemap.mjs +38 -1
- package/package.json +2 -2
package/dist/index.d.mts
CHANGED
|
@@ -1,6 +1,17 @@
|
|
|
1
1
|
import { $Fetch, FetchOptions } from "ofetch";
|
|
2
2
|
import { indexing_v3 } from "@googleapis/indexing/build/v3";
|
|
3
3
|
import { searchconsole_v1 } from "@googleapis/searchconsole/build/v1";
|
|
4
|
+
/**
|
|
5
|
+
* Batch runner with optional concurrency, inter-call delay, and progress.
|
|
6
|
+
* Used by batchRequestIndexing / batchInspectUrls. Defaults to sequential
|
|
7
|
+
* (concurrency = 1) because the underlying APIs rate-limit aggressively;
|
|
8
|
+
* callers that know their quota headroom can opt into parallelism.
|
|
9
|
+
*/
|
|
10
|
+
declare function runSequentialBatch<I, R>(items: I[], operation: (item: I, index: number) => Promise<R>, options?: {
|
|
11
|
+
delayMs?: number;
|
|
12
|
+
concurrency?: number;
|
|
13
|
+
onProgress?: (result: R, index: number, total: number) => void;
|
|
14
|
+
}): Promise<R[]>;
|
|
4
15
|
type ApiSite = searchconsole_v1.Schema$WmxSite;
|
|
5
16
|
type ApiSitemap = searchconsole_v1.Schema$WmxSitemap;
|
|
6
17
|
type ApiSitemapContent = searchconsole_v1.Schema$WmxSitemapContent;
|
|
@@ -162,6 +173,21 @@ interface GSCQueryBuilder<D extends Dimension[] = [], C = object> {
|
|
|
162
173
|
toBody: () => SearchAnalyticsQuery;
|
|
163
174
|
getState: () => BuilderState;
|
|
164
175
|
}
|
|
176
|
+
type VerificationMethod = 'META' | 'FILE' | 'DNS_TXT' | 'DNS_CNAME' | 'ANALYTICS' | 'TAG_MANAGER';
|
|
177
|
+
type VerificationSiteType = 'SITE' | 'INET_DOMAIN' | 'ANDROID_APP';
|
|
178
|
+
interface VerificationSite {
|
|
179
|
+
type: VerificationSiteType;
|
|
180
|
+
identifier: string;
|
|
181
|
+
}
|
|
182
|
+
interface VerificationToken {
|
|
183
|
+
method: string;
|
|
184
|
+
token: string;
|
|
185
|
+
}
|
|
186
|
+
interface VerificationWebResource {
|
|
187
|
+
id?: string;
|
|
188
|
+
site: VerificationSite;
|
|
189
|
+
owners?: string[];
|
|
190
|
+
}
|
|
165
191
|
/**
|
|
166
192
|
* Compatible interface with OAuth2Client from google-auth-library
|
|
167
193
|
*/
|
|
@@ -193,8 +219,30 @@ interface CallOptions {
|
|
|
193
219
|
interface GoogleSearchConsoleClient {
|
|
194
220
|
/** Query search analytics with builder, returns async generator yielding typed row batches */
|
|
195
221
|
query: <D extends Dimension[], C>(siteUrl: string, builder: GSCQueryBuilder<D, C>, opts?: CallOptions) => AsyncGenerator<GSCRow<D, C>[]>;
|
|
196
|
-
/**
|
|
197
|
-
|
|
222
|
+
/**
|
|
223
|
+
* List all sites. Also exposes write ops as `client.sites.add(siteUrl)` and
|
|
224
|
+
* `client.sites.delete(siteUrl)`. Calling `client.sites()` is equivalent to
|
|
225
|
+
* `client.sites.list()`.
|
|
226
|
+
*/
|
|
227
|
+
sites: ((opts?: CallOptions) => Promise<ApiSite[]>) & {
|
|
228
|
+
list: (opts?: CallOptions) => Promise<ApiSite[]>; /** Add a property in unverified state. Caller must verify ownership separately. */
|
|
229
|
+
add: (siteUrl: string, opts?: CallOptions) => Promise<void>; /** Remove a property from the user's account. */
|
|
230
|
+
delete: (siteUrl: string, opts?: CallOptions) => Promise<void>;
|
|
231
|
+
};
|
|
232
|
+
/** Site Verification API (siteverification.googleapis.com). Required to flip a property from unverified to verified. */
|
|
233
|
+
verification: {
|
|
234
|
+
/** Returns the token to place on the site/DNS, plus the resolved method. */getToken: (params: {
|
|
235
|
+
site: VerificationSite;
|
|
236
|
+
verificationMethod: VerificationMethod;
|
|
237
|
+
}, opts?: CallOptions) => Promise<VerificationToken>; /** Triggers Google to fetch + validate; returns the verified WebResource. */
|
|
238
|
+
insert: (params: {
|
|
239
|
+
site: VerificationSite;
|
|
240
|
+
verificationMethod: VerificationMethod;
|
|
241
|
+
}, opts?: CallOptions) => Promise<VerificationWebResource>;
|
|
242
|
+
list: (opts?: CallOptions) => Promise<VerificationWebResource[]>;
|
|
243
|
+
get: (id: string, opts?: CallOptions) => Promise<VerificationWebResource>;
|
|
244
|
+
delete: (id: string, opts?: CallOptions) => Promise<void>;
|
|
245
|
+
};
|
|
198
246
|
/** Inspect a URL */
|
|
199
247
|
inspect: (siteUrl: string, url: string, opts?: CallOptions) => Promise<InspectUrlIndexResponse>;
|
|
200
248
|
/** Sitemap operations */
|
|
@@ -251,6 +299,7 @@ declare function getIndexingMetadata(client: GoogleSearchConsoleClient, url: str
|
|
|
251
299
|
declare function batchRequestIndexing(client: GoogleSearchConsoleClient, urls: string[], options?: {
|
|
252
300
|
type?: IndexingNotificationType;
|
|
253
301
|
delayMs?: number;
|
|
302
|
+
concurrency?: number;
|
|
254
303
|
onProgress?: (result: IndexingResult, index: number, total: number) => void;
|
|
255
304
|
}): Promise<IndexingResult[]>;
|
|
256
305
|
interface InspectUrlResult {
|
|
@@ -271,6 +320,7 @@ declare function inspectUrl(client: GoogleSearchConsoleClient, siteUrl: string,
|
|
|
271
320
|
*/
|
|
272
321
|
declare function batchInspectUrls(client: GoogleSearchConsoleClient, siteUrl: string, urls: string[], options?: {
|
|
273
322
|
delayMs?: number;
|
|
323
|
+
concurrency?: number;
|
|
274
324
|
onProgress?: (result: InspectUrlResult, index: number, total: number) => void;
|
|
275
325
|
}): Promise<InspectUrlResult[]>;
|
|
276
326
|
/**
|
|
@@ -299,6 +349,54 @@ declare function submitSitemap(client: GoogleSearchConsoleClient, siteUrl: strin
|
|
|
299
349
|
* Deletes a sitemap from Google Search Console.
|
|
300
350
|
*/
|
|
301
351
|
declare function deleteSitemap(client: GoogleSearchConsoleClient, siteUrl: string, feedpath: string): Promise<void>;
|
|
352
|
+
/**
|
|
353
|
+
* Add a property to the user's Search Console account.
|
|
354
|
+
*
|
|
355
|
+
* Note: this only registers the property in an unverified state. Ownership
|
|
356
|
+
* must be proven via the Site Verification API (see `verifySite`) before any
|
|
357
|
+
* data is accessible.
|
|
358
|
+
*/
|
|
359
|
+
declare function addSite(client: GoogleSearchConsoleClient, siteUrl: string): Promise<void>;
|
|
360
|
+
/**
|
|
361
|
+
* Remove a property from the user's Search Console account.
|
|
362
|
+
*/
|
|
363
|
+
declare function deleteSite(client: GoogleSearchConsoleClient, siteUrl: string): Promise<void>;
|
|
364
|
+
/**
|
|
365
|
+
* Resolve a Search Console site URL (`https://example.com/` or
|
|
366
|
+
* `sc-domain:example.com`) to the Site Verification API's site shape.
|
|
367
|
+
*/
|
|
368
|
+
declare function siteUrlToVerificationSite(siteUrl: string): VerificationSite;
|
|
369
|
+
/**
|
|
370
|
+
* Methods valid for a given site shape. SITE properties can use META/FILE/
|
|
371
|
+
* ANALYTICS/TAG_MANAGER; INET_DOMAIN must use DNS_TXT or DNS_CNAME.
|
|
372
|
+
*/
|
|
373
|
+
declare function verificationMethodsFor(site: VerificationSite): VerificationMethod[];
|
|
374
|
+
/**
|
|
375
|
+
* Get the verification token Google expects to find on the site or DNS.
|
|
376
|
+
*/
|
|
377
|
+
declare function getVerificationToken(client: GoogleSearchConsoleClient, siteUrl: string, method: VerificationMethod): Promise<VerificationToken & {
|
|
378
|
+
site: VerificationSite;
|
|
379
|
+
}>;
|
|
380
|
+
/**
|
|
381
|
+
* Trigger Google to validate the placed token. Caller is responsible for
|
|
382
|
+
* having placed the token (HTML tag / file / DNS record) before calling.
|
|
383
|
+
*/
|
|
384
|
+
declare function verifySite(client: GoogleSearchConsoleClient, siteUrl: string, method: VerificationMethod): Promise<VerificationWebResource>;
|
|
385
|
+
/**
|
|
386
|
+
* List all verified WebResources for the authed user.
|
|
387
|
+
*/
|
|
388
|
+
declare function listVerifiedSites(client: GoogleSearchConsoleClient): Promise<VerificationWebResource[]>;
|
|
389
|
+
/**
|
|
390
|
+
* Fetch a single verified WebResource by id.
|
|
391
|
+
*/
|
|
392
|
+
declare function getVerifiedSite(client: GoogleSearchConsoleClient, id: string): Promise<VerificationWebResource>;
|
|
393
|
+
/**
|
|
394
|
+
* Drop the calling user's verified ownership of a WebResource. The placed
|
|
395
|
+
* verification token (meta tag / file / DNS record) MUST be removed first,
|
|
396
|
+
* otherwise Google may auto-re-verify and the call will fail. Other owners
|
|
397
|
+
* on the property are unaffected.
|
|
398
|
+
*/
|
|
399
|
+
declare function unverifySite(client: GoogleSearchConsoleClient, id: string): Promise<void>;
|
|
302
400
|
interface GscdumpApiOptions {
|
|
303
401
|
/** API key (gsd_user_xxx or gsd_prod_xxx) */
|
|
304
402
|
apiKey: string;
|
|
@@ -463,4 +561,29 @@ declare const INDEXING_EFFECTIVE_LIMIT = 1800;
|
|
|
463
561
|
declare function hasGscReadScope(scopes: string | null | undefined): boolean;
|
|
464
562
|
declare function hasGscWriteScope(scopes: string | null | undefined): boolean;
|
|
465
563
|
declare function hasIndexingScope(scopes: string | null | undefined): boolean;
|
|
466
|
-
|
|
564
|
+
interface DiscoverSitemapOptions {
|
|
565
|
+
/** User-Agent sent on the discovery requests. */
|
|
566
|
+
userAgent?: string;
|
|
567
|
+
/** AbortSignal threaded through fetches; defaults to a 10s timeout per call. */
|
|
568
|
+
signal?: AbortSignal;
|
|
569
|
+
}
|
|
570
|
+
/**
|
|
571
|
+
* Try to discover a sitemap for `domain` by checking robots.txt for a
|
|
572
|
+
* `Sitemap:` directive, then a small set of common paths. Returns the first
|
|
573
|
+
* URL that responds with a 2xx, or `null`.
|
|
574
|
+
*/
|
|
575
|
+
declare function discoverSitemap(domain: string, options?: DiscoverSitemapOptions): Promise<string | null>;
|
|
576
|
+
interface FetchSitemapUrlsOptions extends DiscoverSitemapOptions {
|
|
577
|
+
/** Maximum nested sitemap-index depth to follow. Default 3. */
|
|
578
|
+
maxDepth?: number;
|
|
579
|
+
/** Stop after this many URLs (across all nested sitemaps). Default unlimited. */
|
|
580
|
+
limit?: number;
|
|
581
|
+
}
|
|
582
|
+
/**
|
|
583
|
+
* Fetch a sitemap (or sitemap index) and return the list of `<loc>` URLs.
|
|
584
|
+
* Sitemap-index files are followed up to `maxDepth` levels. Duplicates are
|
|
585
|
+
* de-duplicated. The XML parser is regex-based — it handles the common
|
|
586
|
+
* `<loc>https://...</loc>` shape but doesn't validate the schema.
|
|
587
|
+
*/
|
|
588
|
+
declare function fetchSitemapUrls(sitemapUrl: string, options?: FetchSitemapUrlsOptions): Promise<string[]>;
|
|
589
|
+
export { ApiSite, ApiSitemap, ApiSitemapContent, Auth, AuthClient, AuthOptions, BackfillProgress, CallOptions, DAYS_PER_RANGE, DataRow, DimensionFilter, DimensionFilterGroup, DiscoverSitemapOptions, FetchSitemapUrlsOptions, GSC_FINALIZED_LAG_DAYS, GSC_FRESHEST_LAG_DAYS, GSC_QUOTAS, GSC_RETENTION_MONTHS, GoogleSearchConsoleClient, GoogleSearchConsoleClientOptions, GscError, GscErrorKind, GscdumpApiOptions, INDEXING_DAILY_LIMIT, INDEXING_EFFECTIVE_LIMIT, INDEXING_ISSUE_FILTERS, INDEXING_ISSUE_LABELS, INDEXING_ISSUE_SEVERITY, IndexStatusResult, IndexingIssueType, IndexingMetadata, IndexingNotificationType, IndexingResult, InspectUrlIndexResponse, InspectUrlResult, MS_PER_DAY, MobileUsabilityResult, Period, PublishUrlNotificationResponse, RequiredNonNullable, ResolvedAnalyticsRange, RichResultsResult, SearchAnalyticsQuery, SearchAnalyticsResponse, Site, SiteAnalytics, UrlInspectionResult, UrlNotificationMetadata, VerificationMethod, VerificationSite, VerificationSiteType, VerificationToken, VerificationWebResource, addDays, addSite, batchInspectUrls, batchRequestIndexing, classifyError, countDays, createAuth, createFetch, daysAgo, deleteSite, deleteSitemap, discoverSitemap, fetchSitemap, fetchSitemapUrls, fetchSitemaps, fetchSites, fetchSitesWithSitemaps, formatErrorForCli, generateGscDateRange, getBackfillProgress, getDateRange, getFreshestGscDate, getIndexingMetadata, getLatestGscDate, getNextDate, getOldestGscDate, getPendingDates, getPreviousDate, getPstDate, getVerificationToken, getVerifiedSite, googleSearchConsole, groupIntoRanges, gscdumpApi, hasGscReadScope, hasGscWriteScope, hasIndexingScope, inspectUrl, isPermissionDeniedError, isValidGscDate, listVerifiedSites, progressBar, requestIndexing, rowWithMetricDefaults, runSequentialBatch, siteUrlToVerificationSite, storageError, submitSitemap, toIsoDate, unverifySite, verificationMethodsFor, verifySite };
|
package/dist/index.mjs
CHANGED
|
@@ -1,13 +1,30 @@
|
|
|
1
1
|
import { ofetch } from "ofetch";
|
|
2
2
|
async function runSequentialBatch(items, operation, options = {}) {
|
|
3
|
-
const { delayMs = 0, onProgress } = options;
|
|
4
|
-
const results =
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
3
|
+
const { delayMs = 0, concurrency = 1, onProgress } = options;
|
|
4
|
+
const results = Array.from({ length: items.length });
|
|
5
|
+
let completed = 0;
|
|
6
|
+
if (concurrency <= 1) {
|
|
7
|
+
for (let i = 0; i < items.length; i++) {
|
|
8
|
+
const result = await operation(items[i], i);
|
|
9
|
+
results[i] = result;
|
|
10
|
+
onProgress?.(result, i, items.length);
|
|
11
|
+
if (i < items.length - 1 && delayMs > 0) await new Promise((r) => setTimeout(r, delayMs));
|
|
12
|
+
}
|
|
13
|
+
return results;
|
|
10
14
|
}
|
|
15
|
+
const cursor = { i: 0 };
|
|
16
|
+
const worker = async () => {
|
|
17
|
+
while (true) {
|
|
18
|
+
const i = cursor.i++;
|
|
19
|
+
if (i >= items.length) return;
|
|
20
|
+
const result = await operation(items[i], i);
|
|
21
|
+
results[i] = result;
|
|
22
|
+
completed++;
|
|
23
|
+
onProgress?.(result, completed - 1, items.length);
|
|
24
|
+
if (delayMs > 0) await new Promise((r) => setTimeout(r, delayMs));
|
|
25
|
+
}
|
|
26
|
+
};
|
|
27
|
+
await Promise.all(Array.from({ length: Math.min(concurrency, items.length) }, worker));
|
|
11
28
|
return results;
|
|
12
29
|
}
|
|
13
30
|
async function requestIndexing(client, url, options = {}) {
|
|
@@ -26,9 +43,10 @@ async function getIndexingMetadata(client, url) {
|
|
|
26
43
|
}));
|
|
27
44
|
}
|
|
28
45
|
async function batchRequestIndexing(client, urls, options = {}) {
|
|
29
|
-
const { type = "URL_UPDATED", delayMs = 100, onProgress } = options;
|
|
46
|
+
const { type = "URL_UPDATED", delayMs = 100, concurrency, onProgress } = options;
|
|
30
47
|
return runSequentialBatch(urls, (url) => requestIndexing(client, url, { type }), {
|
|
31
48
|
delayMs,
|
|
49
|
+
concurrency,
|
|
32
50
|
onProgress
|
|
33
51
|
});
|
|
34
52
|
}
|
|
@@ -40,7 +58,7 @@ async function inspectUrl(client, siteUrl, inspectionUrl) {
|
|
|
40
58
|
};
|
|
41
59
|
}
|
|
42
60
|
async function batchInspectUrls(client, siteUrl, urls, options = {}) {
|
|
43
|
-
const { delayMs = 200, onProgress } = options;
|
|
61
|
+
const { delayMs = 200, concurrency, onProgress } = options;
|
|
44
62
|
return runSequentialBatch(urls, async (url) => {
|
|
45
63
|
const { inspection, isIndexed } = await inspectUrl(client, siteUrl, url);
|
|
46
64
|
return {
|
|
@@ -50,6 +68,7 @@ async function batchInspectUrls(client, siteUrl, urls, options = {}) {
|
|
|
50
68
|
};
|
|
51
69
|
}, {
|
|
52
70
|
delayMs,
|
|
71
|
+
concurrency,
|
|
53
72
|
onProgress
|
|
54
73
|
});
|
|
55
74
|
}
|
|
@@ -78,6 +97,58 @@ async function submitSitemap(client, siteUrl, feedpath) {
|
|
|
78
97
|
async function deleteSitemap(client, siteUrl, feedpath) {
|
|
79
98
|
return client.sitemaps.delete(siteUrl, feedpath);
|
|
80
99
|
}
|
|
100
|
+
async function addSite(client, siteUrl) {
|
|
101
|
+
return client.sites.add(siteUrl);
|
|
102
|
+
}
|
|
103
|
+
async function deleteSite(client, siteUrl) {
|
|
104
|
+
return client.sites.delete(siteUrl);
|
|
105
|
+
}
|
|
106
|
+
const SC_DOMAIN_PREFIX = "sc-domain:";
|
|
107
|
+
function siteUrlToVerificationSite(siteUrl) {
|
|
108
|
+
if (siteUrl.startsWith(SC_DOMAIN_PREFIX)) return {
|
|
109
|
+
type: "INET_DOMAIN",
|
|
110
|
+
identifier: siteUrl.slice(10)
|
|
111
|
+
};
|
|
112
|
+
return {
|
|
113
|
+
type: "SITE",
|
|
114
|
+
identifier: siteUrl
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
function verificationMethodsFor(site) {
|
|
118
|
+
if (site.type === "INET_DOMAIN") return ["DNS_TXT", "DNS_CNAME"];
|
|
119
|
+
return [
|
|
120
|
+
"META",
|
|
121
|
+
"FILE",
|
|
122
|
+
"ANALYTICS",
|
|
123
|
+
"TAG_MANAGER"
|
|
124
|
+
];
|
|
125
|
+
}
|
|
126
|
+
async function getVerificationToken(client, siteUrl, method) {
|
|
127
|
+
const site = siteUrlToVerificationSite(siteUrl);
|
|
128
|
+
return {
|
|
129
|
+
...await client.verification.getToken({
|
|
130
|
+
site,
|
|
131
|
+
verificationMethod: method
|
|
132
|
+
}),
|
|
133
|
+
site
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
async function verifySite(client, siteUrl, method) {
|
|
137
|
+
const site = siteUrlToVerificationSite(siteUrl);
|
|
138
|
+
return client.verification.insert({
|
|
139
|
+
site,
|
|
140
|
+
verificationMethod: method
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
async function listVerifiedSites(client) {
|
|
144
|
+
return client.verification.list();
|
|
145
|
+
}
|
|
146
|
+
async function getVerifiedSite(client, id) {
|
|
147
|
+
return client.verification.get(id);
|
|
148
|
+
}
|
|
149
|
+
async function unverifySite(client, id) {
|
|
150
|
+
return client.verification.delete(id);
|
|
151
|
+
}
|
|
81
152
|
const MS_PER_DAY = 864e5;
|
|
82
153
|
function toIsoDate(d) {
|
|
83
154
|
return d.toISOString().slice(0, 10);
|
|
@@ -377,11 +448,38 @@ function gscdumpApi(options) {
|
|
|
377
448
|
startRow += rows.length;
|
|
378
449
|
}
|
|
379
450
|
},
|
|
380
|
-
sites:
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
451
|
+
sites: (() => {
|
|
452
|
+
const list = async (opts) => {
|
|
453
|
+
return (await fetch("/api/sites", { signal: opts?.signal })).sites.map((s) => ({
|
|
454
|
+
siteUrl: s.gscSiteUrl,
|
|
455
|
+
permissionLevel: s.permissionLevel || "siteOwner"
|
|
456
|
+
}));
|
|
457
|
+
};
|
|
458
|
+
const unsupported = (op) => () => {
|
|
459
|
+
throw new Error(`sites.${op} not available via gscdump API. Use googleSearchConsole() with OAuth credentials.`);
|
|
460
|
+
};
|
|
461
|
+
return Object.assign(list, {
|
|
462
|
+
list,
|
|
463
|
+
add: unsupported("add"),
|
|
464
|
+
delete: unsupported("delete")
|
|
465
|
+
});
|
|
466
|
+
})(),
|
|
467
|
+
verification: {
|
|
468
|
+
getToken: () => {
|
|
469
|
+
throw new Error("Site Verification API not available via gscdump API. Use googleSearchConsole() with OAuth credentials.");
|
|
470
|
+
},
|
|
471
|
+
insert: () => {
|
|
472
|
+
throw new Error("Site Verification API not available via gscdump API. Use googleSearchConsole() with OAuth credentials.");
|
|
473
|
+
},
|
|
474
|
+
list: () => {
|
|
475
|
+
throw new Error("Site Verification API not available via gscdump API. Use googleSearchConsole() with OAuth credentials.");
|
|
476
|
+
},
|
|
477
|
+
get: () => {
|
|
478
|
+
throw new Error("Site Verification API not available via gscdump API. Use googleSearchConsole() with OAuth credentials.");
|
|
479
|
+
},
|
|
480
|
+
delete: () => {
|
|
481
|
+
throw new Error("Site Verification API not available via gscdump API. Use googleSearchConsole() with OAuth credentials.");
|
|
482
|
+
}
|
|
385
483
|
},
|
|
386
484
|
inspect: () => {
|
|
387
485
|
throw new Error("URL inspection not available via gscdump API. Use googleSearchConsole() with OAuth credentials.");
|
|
@@ -413,6 +511,7 @@ function gscdumpApi(options) {
|
|
|
413
511
|
}
|
|
414
512
|
const GSC_API = "https://searchconsole.googleapis.com";
|
|
415
513
|
const INDEXING_API = "https://indexing.googleapis.com";
|
|
514
|
+
const SITE_VERIFICATION_API = "https://www.googleapis.com/siteVerification/v1";
|
|
416
515
|
function createAuth(options) {
|
|
417
516
|
let credentials = { refresh_token: options.refreshToken };
|
|
418
517
|
return {
|
|
@@ -529,8 +628,42 @@ function googleSearchConsole(auth, options = {}) {
|
|
|
529
628
|
startRow += rows.length;
|
|
530
629
|
}
|
|
531
630
|
},
|
|
532
|
-
sites:
|
|
533
|
-
|
|
631
|
+
sites: (() => {
|
|
632
|
+
const list = async (opts) => {
|
|
633
|
+
return (await fetch(`${GSC_API}/webmasters/v3/sites`, { signal: opts?.signal })).siteEntry || [];
|
|
634
|
+
};
|
|
635
|
+
return Object.assign(list, {
|
|
636
|
+
list,
|
|
637
|
+
add: (siteUrl, opts) => fetch(`${GSC_API}/webmasters/v3/sites/${encodeURIComponent(siteUrl)}`, {
|
|
638
|
+
method: "PUT",
|
|
639
|
+
signal: opts?.signal
|
|
640
|
+
}),
|
|
641
|
+
delete: (siteUrl, opts) => fetch(`${GSC_API}/webmasters/v3/sites/${encodeURIComponent(siteUrl)}`, {
|
|
642
|
+
method: "DELETE",
|
|
643
|
+
signal: opts?.signal
|
|
644
|
+
})
|
|
645
|
+
});
|
|
646
|
+
})(),
|
|
647
|
+
verification: {
|
|
648
|
+
getToken: (params, opts) => fetch(`${SITE_VERIFICATION_API}/token`, {
|
|
649
|
+
method: "POST",
|
|
650
|
+
body: params,
|
|
651
|
+
signal: opts?.signal
|
|
652
|
+
}),
|
|
653
|
+
insert: (params, opts) => fetch(`${SITE_VERIFICATION_API}/webResource`, {
|
|
654
|
+
method: "POST",
|
|
655
|
+
query: { verificationMethod: params.verificationMethod },
|
|
656
|
+
body: { site: params.site },
|
|
657
|
+
signal: opts?.signal
|
|
658
|
+
}),
|
|
659
|
+
list: async (opts) => {
|
|
660
|
+
return (await fetch(`${SITE_VERIFICATION_API}/webResource`, { signal: opts?.signal })).items || [];
|
|
661
|
+
},
|
|
662
|
+
get: (id, opts) => fetch(`${SITE_VERIFICATION_API}/webResource/${encodeURIComponent(id)}`, { signal: opts?.signal }),
|
|
663
|
+
delete: (id, opts) => fetch(`${SITE_VERIFICATION_API}/webResource/${encodeURIComponent(id)}`, {
|
|
664
|
+
method: "DELETE",
|
|
665
|
+
signal: opts?.signal
|
|
666
|
+
})
|
|
534
667
|
},
|
|
535
668
|
inspect: (siteUrl, url, opts) => fetch(`${GSC_API}/v1/urlInspection/index:inspect`, {
|
|
536
669
|
method: "POST",
|
|
@@ -796,4 +929,71 @@ function hasIndexingScope(scopes) {
|
|
|
796
929
|
if (!scopes) return false;
|
|
797
930
|
return scopes.includes("googleapis.com/auth/indexing");
|
|
798
931
|
}
|
|
799
|
-
|
|
932
|
+
const FETCH_TIMEOUT_MS = 1e4;
|
|
933
|
+
const COMMON_PATHS = ["/sitemap.xml", "/sitemap_index.xml"];
|
|
934
|
+
const SITEMAP_DIRECTIVE_RE = /^Sitemap:\s*(\S+)/im;
|
|
935
|
+
async function discoverSitemap(domain, options = {}) {
|
|
936
|
+
const userAgent = options.userAgent ?? "gscdump sitemap fetcher";
|
|
937
|
+
const baseUrl = `https://${domain}`;
|
|
938
|
+
const signalFor = () => options.signal ?? AbortSignal.timeout(FETCH_TIMEOUT_MS);
|
|
939
|
+
const robotsRes = await fetch(`${baseUrl}/robots.txt`, {
|
|
940
|
+
headers: { "User-Agent": userAgent },
|
|
941
|
+
signal: signalFor()
|
|
942
|
+
}).catch(() => null);
|
|
943
|
+
if (robotsRes?.ok) {
|
|
944
|
+
const match = (await robotsRes.text()).match(SITEMAP_DIRECTIVE_RE);
|
|
945
|
+
if (match?.[1]) {
|
|
946
|
+
if ((await fetch(match[1], {
|
|
947
|
+
method: "HEAD",
|
|
948
|
+
signal: signalFor()
|
|
949
|
+
}).catch(() => null))?.ok) return match[1];
|
|
950
|
+
}
|
|
951
|
+
}
|
|
952
|
+
for (const path of COMMON_PATHS) {
|
|
953
|
+
const url = `${baseUrl}${path}`;
|
|
954
|
+
if ((await fetch(url, {
|
|
955
|
+
method: "HEAD",
|
|
956
|
+
headers: { "User-Agent": userAgent },
|
|
957
|
+
signal: signalFor()
|
|
958
|
+
}).catch(() => null))?.ok) return url;
|
|
959
|
+
}
|
|
960
|
+
return null;
|
|
961
|
+
}
|
|
962
|
+
const LOC_RE = /<loc>([^<]+)<\/loc>/gi;
|
|
963
|
+
const SITEMAPINDEX_RE = /<sitemapindex\b/i;
|
|
964
|
+
async function fetchSitemapUrls(sitemapUrl, options = {}) {
|
|
965
|
+
const userAgent = options.userAgent ?? "gscdump sitemap fetcher";
|
|
966
|
+
const maxDepth = options.maxDepth ?? 3;
|
|
967
|
+
const limit = options.limit;
|
|
968
|
+
const signalFor = () => options.signal ?? AbortSignal.timeout(FETCH_TIMEOUT_MS);
|
|
969
|
+
const seen = /* @__PURE__ */ new Set();
|
|
970
|
+
const out = [];
|
|
971
|
+
const visit = async (url, depth) => {
|
|
972
|
+
if (limit != null && out.length >= limit) return;
|
|
973
|
+
if (depth > maxDepth) return;
|
|
974
|
+
const res = await fetch(url, {
|
|
975
|
+
headers: { "User-Agent": userAgent },
|
|
976
|
+
signal: signalFor()
|
|
977
|
+
});
|
|
978
|
+
if (!res.ok) throw new Error(`Fetch ${url} failed: ${res.status}`);
|
|
979
|
+
const text = await res.text();
|
|
980
|
+
const isIndex = SITEMAPINDEX_RE.test(text);
|
|
981
|
+
const matches = [...text.matchAll(LOC_RE)].map((m) => m[1].trim()).filter(Boolean);
|
|
982
|
+
if (isIndex) {
|
|
983
|
+
for (const child of matches) {
|
|
984
|
+
if (limit != null && out.length >= limit) return;
|
|
985
|
+
await visit(child, depth + 1);
|
|
986
|
+
}
|
|
987
|
+
return;
|
|
988
|
+
}
|
|
989
|
+
for (const u of matches) {
|
|
990
|
+
if (seen.has(u)) continue;
|
|
991
|
+
seen.add(u);
|
|
992
|
+
out.push(u);
|
|
993
|
+
if (limit != null && out.length >= limit) return;
|
|
994
|
+
}
|
|
995
|
+
};
|
|
996
|
+
await visit(sitemapUrl, 0);
|
|
997
|
+
return out;
|
|
998
|
+
}
|
|
999
|
+
export { DAYS_PER_RANGE, GSC_FINALIZED_LAG_DAYS, GSC_FRESHEST_LAG_DAYS, GSC_QUOTAS, GSC_RETENTION_MONTHS, INDEXING_DAILY_LIMIT, INDEXING_EFFECTIVE_LIMIT, INDEXING_ISSUE_FILTERS, INDEXING_ISSUE_LABELS, INDEXING_ISSUE_SEVERITY, MS_PER_DAY, addDays, addSite, batchInspectUrls, batchRequestIndexing, classifyError, countDays, createAuth, createFetch, daysAgo, deleteSite, deleteSitemap, discoverSitemap, fetchSitemap, fetchSitemapUrls, fetchSitemaps, fetchSites, fetchSitesWithSitemaps, formatErrorForCli, generateGscDateRange, getBackfillProgress, getDateRange, getFreshestGscDate, getIndexingMetadata, getLatestGscDate, getNextDate, getOldestGscDate, getPendingDates, getPreviousDate, getPstDate, getVerificationToken, getVerifiedSite, googleSearchConsole, groupIntoRanges, gscdumpApi, hasGscReadScope, hasGscWriteScope, hasIndexingScope, inspectUrl, isPermissionDeniedError, isValidGscDate, listVerifiedSites, progressBar, requestIndexing, rowWithMetricDefaults, runSequentialBatch, siteUrlToVerificationSite, storageError, submitSitemap, toIsoDate, unverifySite, verificationMethodsFor, verifySite };
|
package/dist/sitemap.d.mts
CHANGED
|
@@ -10,4 +10,17 @@ interface DiscoverSitemapOptions {
|
|
|
10
10
|
* URL that responds with a 2xx, or `null`.
|
|
11
11
|
*/
|
|
12
12
|
declare function discoverSitemap(domain: string, options?: DiscoverSitemapOptions): Promise<string | null>;
|
|
13
|
-
|
|
13
|
+
interface FetchSitemapUrlsOptions extends DiscoverSitemapOptions {
|
|
14
|
+
/** Maximum nested sitemap-index depth to follow. Default 3. */
|
|
15
|
+
maxDepth?: number;
|
|
16
|
+
/** Stop after this many URLs (across all nested sitemaps). Default unlimited. */
|
|
17
|
+
limit?: number;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Fetch a sitemap (or sitemap index) and return the list of `<loc>` URLs.
|
|
21
|
+
* Sitemap-index files are followed up to `maxDepth` levels. Duplicates are
|
|
22
|
+
* de-duplicated. The XML parser is regex-based — it handles the common
|
|
23
|
+
* `<loc>https://...</loc>` shape but doesn't validate the schema.
|
|
24
|
+
*/
|
|
25
|
+
declare function fetchSitemapUrls(sitemapUrl: string, options?: FetchSitemapUrlsOptions): Promise<string[]>;
|
|
26
|
+
export { DiscoverSitemapOptions, FetchSitemapUrlsOptions, discoverSitemap, fetchSitemapUrls };
|
package/dist/sitemap.mjs
CHANGED
|
@@ -28,4 +28,41 @@ async function discoverSitemap(domain, options = {}) {
|
|
|
28
28
|
}
|
|
29
29
|
return null;
|
|
30
30
|
}
|
|
31
|
-
|
|
31
|
+
const LOC_RE = /<loc>([^<]+)<\/loc>/gi;
|
|
32
|
+
const SITEMAPINDEX_RE = /<sitemapindex\b/i;
|
|
33
|
+
async function fetchSitemapUrls(sitemapUrl, options = {}) {
|
|
34
|
+
const userAgent = options.userAgent ?? "gscdump sitemap fetcher";
|
|
35
|
+
const maxDepth = options.maxDepth ?? 3;
|
|
36
|
+
const limit = options.limit;
|
|
37
|
+
const signalFor = () => options.signal ?? AbortSignal.timeout(FETCH_TIMEOUT_MS);
|
|
38
|
+
const seen = /* @__PURE__ */ new Set();
|
|
39
|
+
const out = [];
|
|
40
|
+
const visit = async (url, depth) => {
|
|
41
|
+
if (limit != null && out.length >= limit) return;
|
|
42
|
+
if (depth > maxDepth) return;
|
|
43
|
+
const res = await fetch(url, {
|
|
44
|
+
headers: { "User-Agent": userAgent },
|
|
45
|
+
signal: signalFor()
|
|
46
|
+
});
|
|
47
|
+
if (!res.ok) throw new Error(`Fetch ${url} failed: ${res.status}`);
|
|
48
|
+
const text = await res.text();
|
|
49
|
+
const isIndex = SITEMAPINDEX_RE.test(text);
|
|
50
|
+
const matches = [...text.matchAll(LOC_RE)].map((m) => m[1].trim()).filter(Boolean);
|
|
51
|
+
if (isIndex) {
|
|
52
|
+
for (const child of matches) {
|
|
53
|
+
if (limit != null && out.length >= limit) return;
|
|
54
|
+
await visit(child, depth + 1);
|
|
55
|
+
}
|
|
56
|
+
return;
|
|
57
|
+
}
|
|
58
|
+
for (const u of matches) {
|
|
59
|
+
if (seen.has(u)) continue;
|
|
60
|
+
seen.add(u);
|
|
61
|
+
out.push(u);
|
|
62
|
+
if (limit != null && out.length >= limit) return;
|
|
63
|
+
}
|
|
64
|
+
};
|
|
65
|
+
await visit(sitemapUrl, 0);
|
|
66
|
+
return out;
|
|
67
|
+
}
|
|
68
|
+
export { discoverSitemap, fetchSitemapUrls };
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "gscdump",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.8.
|
|
4
|
+
"version": "0.8.2",
|
|
5
5
|
"description": "Google Search Console API wrapper with typed query builder, streaming pagination, and SEO analysis functions",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Harlan Wilton",
|
|
@@ -102,7 +102,7 @@
|
|
|
102
102
|
"dayjs": "^1.11.20",
|
|
103
103
|
"defu": "^6.1.7",
|
|
104
104
|
"ofetch": "^1.5.1",
|
|
105
|
-
"ufo": "^1.6.
|
|
105
|
+
"ufo": "^1.6.4"
|
|
106
106
|
},
|
|
107
107
|
"devDependencies": {
|
|
108
108
|
"@googleapis/indexing": "^6.0.1",
|