@nitpicker/report-google-sheets 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/CHANGELOG.md +8 -0
  2. package/LICENSE +191 -0
  3. package/README.md +13 -0
  4. package/lib/archive.d.ts +6 -0
  5. package/lib/archive.js +24 -0
  6. package/lib/data/add-to-summary.d.ts +8 -0
  7. package/lib/data/add-to-summary.js +10 -0
  8. package/lib/data/create-compares.d.ts +19 -0
  9. package/lib/data/create-compares.js +64 -0
  10. package/lib/data/create-discrepancies.d.ts +19 -0
  11. package/lib/data/create-discrepancies.js +64 -0
  12. package/lib/data/create-image-list.d.ts +13 -0
  13. package/lib/data/create-image-list.js +63 -0
  14. package/lib/data/create-links.d.ts +15 -0
  15. package/lib/data/create-links.js +109 -0
  16. package/lib/data/create-page-list.d.ts +29 -0
  17. package/lib/data/create-page-list.js +380 -0
  18. package/lib/data/create-referrers-relational-table.d.ts +18 -0
  19. package/lib/data/create-referrers-relational-table.js +96 -0
  20. package/lib/data/create-resources-relational-table.d.ts +14 -0
  21. package/lib/data/create-resources-relational-table.js +84 -0
  22. package/lib/data/create-resources.d.ts +13 -0
  23. package/lib/data/create-resources.js +45 -0
  24. package/lib/data/create-violations.d.ts +12 -0
  25. package/lib/data/create-violations.js +42 -0
  26. package/lib/debug.d.ts +6 -0
  27. package/lib/debug.js +6 -0
  28. package/lib/index.d.ts +1 -0
  29. package/lib/index.js +1 -0
  30. package/lib/load-config.d.ts +6 -0
  31. package/lib/load-config.js +14 -0
  32. package/lib/report.d.ts +44 -0
  33. package/lib/report.js +180 -0
  34. package/lib/reports/get-plugin-reports.d.ts +7 -0
  35. package/lib/reports/get-plugin-reports.js +19 -0
  36. package/lib/sheets/create-cell-data.d.ts +1 -0
  37. package/lib/sheets/create-cell-data.js +1 -0
  38. package/lib/sheets/create-sheets.d.ts +74 -0
  39. package/lib/sheets/create-sheets.js +420 -0
  40. package/lib/sheets/default-cell-format.d.ts +8 -0
  41. package/lib/sheets/default-cell-format.js +8 -0
  42. package/lib/sheets/format.d.ts +2 -0
  43. package/lib/sheets/format.js +12 -0
  44. package/lib/sheets/types.d.ts +91 -0
  45. package/lib/sheets/types.js +1 -0
  46. package/lib/types.d.ts +11 -0
  47. package/lib/types.js +1 -0
  48. package/lib/utils/array-duplicated.d.ts +5 -0
  49. package/lib/utils/array-duplicated.js +15 -0
  50. package/lib/utils/get-rank-top.d.ts +6 -0
  51. package/lib/utils/get-rank-top.js +36 -0
  52. package/lib/utils/has-prop-filter.d.ts +18 -0
  53. package/lib/utils/has-prop-filter.js +19 -0
  54. package/lib/utils/is-bitmap-image.d.ts +6 -0
  55. package/lib/utils/is-bitmap-image.js +10 -0
  56. package/lib/utils/non-null-filter.d.ts +13 -0
  57. package/lib/utils/non-null-filter.js +15 -0
  58. package/package.json +45 -0
  59. package/src/__tests__/api/create-sheets.api.ts +234 -0
  60. package/src/__tests__/api/helpers.ts +148 -0
  61. package/src/__tests__/api/sheets.api.ts +217 -0
  62. package/src/archive.ts +29 -0
  63. package/src/data/add-to-summary.ts +10 -0
  64. package/src/data/create-discrepancies.ts +81 -0
  65. package/src/data/create-image-list.ts +74 -0
  66. package/src/data/create-links.ts +134 -0
  67. package/src/data/create-page-list.ts +472 -0
  68. package/src/data/create-referrers-relational-table.ts +115 -0
  69. package/src/data/create-resources-relational-table.ts +104 -0
  70. package/src/data/create-resources.ts +51 -0
  71. package/src/data/create-violations.spec.ts +95 -0
  72. package/src/data/create-violations.ts +47 -0
  73. package/src/debug.ts +7 -0
  74. package/src/index.ts +1 -0
  75. package/src/load-config.spec.ts +37 -0
  76. package/src/load-config.ts +17 -0
  77. package/src/report.ts +231 -0
  78. package/src/reports/get-plugin-reports.spec.ts +42 -0
  79. package/src/reports/get-plugin-reports.ts +24 -0
  80. package/src/sheets/create-cell-data.ts +1 -0
  81. package/src/sheets/create-sheets.ts +523 -0
  82. package/src/sheets/default-cell-format.spec.ts +13 -0
  83. package/src/sheets/default-cell-format.ts +8 -0
  84. package/src/sheets/format.spec.ts +17 -0
  85. package/src/sheets/format.ts +14 -0
  86. package/src/sheets/types.ts +106 -0
  87. package/src/types.ts +11 -0
  88. package/src/utils/has-prop-filter.spec.ts +25 -0
  89. package/src/utils/has-prop-filter.ts +21 -0
  90. package/src/utils/non-null-filter.spec.ts +27 -0
  91. package/src/utils/non-null-filter.ts +15 -0
  92. package/tsconfig.json +11 -0
  93. package/tsconfig.tsbuildinfo +1 -0
@@ -0,0 +1,29 @@
1
+ import type { CreateSheet } from '../sheets/types.js';
2
+ /**
3
+ * Creates the "Page List" sheet configuration -- the primary sitemap-style report.
4
+ *
5
+ * This is the most complex sheet, combining crawler metadata with analyze
6
+ * plugin data into a comprehensive per-page inventory:
7
+ *
8
+ * - **URL decomposition**: Protocol, domain, and up to 10 path segments
9
+ * for hierarchical filtering in the spreadsheet.
10
+ * - **Title shortening**: Directory index titles are subtracted from child
11
+ * page titles to produce concise display titles (e.g. removing the site
12
+ * name suffix). The `indexTitles` map accumulates these across pages.
13
+ * - **Link quality**: Internal/external link counts with bad-link breakdowns
14
+ * (status >= 400, excluding 401 which is often auth-protected).
15
+ * - **SEO metadata**: description, keywords, canonical, alternate, OGP, etc.
16
+ * - **Plugin columns**: Dynamic columns from analyze plugin `pageData`.
17
+ *
18
+ * Conditional formatting highlights:
19
+ * - Bad links (non-zero count)
20
+ * - Missing language attribute
21
+ * - Low internal referrer count (orphan pages)
22
+ * - Suspicious path names (copy, dummy, underscore prefixed)
23
+ * - HTTP protocol (non-HTTPS)
24
+ * - Error-like titles and non-success status codes
25
+ *
26
+ * Unused path columns (beyond the deepest URL) are hidden automatically.
27
+ * @param reports
28
+ */
29
+ export declare const createPageList: CreateSheet;
@@ -0,0 +1,380 @@
1
+ import { decodeURISafely } from '@d-zero/shared/decode-uri-safely';
2
+ import { pLog, reportLog } from '../debug.js';
3
+ import { createCellData } from '../sheets/create-cell-data.js';
4
+ import { defaultCellFormat } from '../sheets/default-cell-format.js';
5
+ import { booleanFormatError } from '../sheets/format.js';
6
+ import { nonNullFilter } from '../utils/non-null-filter.js';
7
+ const log = pLog.extend('PageList');
8
+ const indexTitles = new Map();
9
+ const indexRefs = new Map();
10
+ /**
11
+ * Creates the "Page List" sheet configuration -- the primary sitemap-style report.
12
+ *
13
+ * This is the most complex sheet, combining crawler metadata with analyze
14
+ * plugin data into a comprehensive per-page inventory:
15
+ *
16
+ * - **URL decomposition**: Protocol, domain, and up to 10 path segments
17
+ * for hierarchical filtering in the spreadsheet.
18
+ * - **Title shortening**: Directory index titles are subtracted from child
19
+ * page titles to produce concise display titles (e.g. removing the site
20
+ * name suffix). The `indexTitles` map accumulates these across pages.
21
+ * - **Link quality**: Internal/external link counts with bad-link breakdowns
22
+ * (status >= 400, excluding 401 which is often auth-protected).
23
+ * - **SEO metadata**: description, keywords, canonical, alternate, OGP, etc.
24
+ * - **Plugin columns**: Dynamic columns from analyze plugin `pageData`.
25
+ *
26
+ * Conditional formatting highlights:
27
+ * - Bad links (non-zero count)
28
+ * - Missing language attribute
29
+ * - Low internal referrer count (orphan pages)
30
+ * - Suspicious path names (copy, dummy, underscore prefixed)
31
+ * - HTTP protocol (non-HTTPS)
32
+ * - Error-like titles and non-success status codes
33
+ *
34
+ * Unused path columns (beyond the deepest URL) are hidden automatically.
35
+ * @param reports
36
+ */
37
+ export const createPageList = (reports) => {
38
+ const reportPageData = reports
39
+ .map((r) => (r.pageData ? { name: r.name, pageData: r.pageData } : null))
40
+ .filter(nonNullFilter);
41
+ let maxDepth = 0;
42
+ return {
43
+ name: 'Page List',
44
+ createHeaders() {
45
+ const headers = [
46
+ 'Title',
47
+ 'Full Title',
48
+ 'URL',
49
+ 'Protocol',
50
+ 'Domain',
51
+ 'path1',
52
+ 'path2',
53
+ 'path3',
54
+ 'path4',
55
+ 'path5',
56
+ 'path6',
57
+ 'path7',
58
+ 'path8',
59
+ 'path9',
60
+ 'path10',
61
+ 'Status Code',
62
+ 'Redirect From',
63
+ 'Language',
64
+ 'Internal Links',
65
+ 'Internal Bad Links',
66
+ 'External Links',
67
+ 'External Bad Links',
68
+ 'Internal Referrers',
69
+ 'description',
70
+ 'keywords',
71
+ 'noindex',
72
+ 'nofollow',
73
+ 'noarchive',
74
+ 'canonical',
75
+ 'alternate',
76
+ 'twitter:card',
77
+ 'og:site_name',
78
+ 'og:url',
79
+ 'og:title',
80
+ 'og:description',
81
+ 'og:type',
82
+ 'og:image',
83
+ ];
84
+ for (const report of reports) {
85
+ if (report.pageData) {
86
+ headers.push(...Object.values(report.pageData.headers));
87
+ }
88
+ }
89
+ return headers;
90
+ },
91
+ async eachPage(page) {
92
+ if (!page.isInternalPage() || !page.isTarget) {
93
+ return;
94
+ }
95
+ const url = page.url;
96
+ maxDepth = Math.max(url.depth, maxDepth);
97
+ const paths = [...url.paths];
98
+ paths[paths.length - 1] = `${paths.at(-1)}${url.query ? `?${url.query}` : ''}`;
99
+ const [path1, path2, path3, path4, path5, path6, path7, path8, path9, path10] = paths.map((p) => `/${decodeURISafely(p)}`);
100
+ const anchors = await page.getAnchors();
101
+ let iLinks = 0;
102
+ const iBadLinks = [];
103
+ let xLinks = 0;
104
+ const xBadLinks = [];
105
+ for (const anchor of anchors) {
106
+ if (anchor.isExternal) {
107
+ xLinks += 1;
108
+ if (!anchor.status || (anchor.status >= 400 && anchor.status !== 401)) {
109
+ const url = anchor.href === anchor.url ? anchor.url : `${anchor.href} => ${anchor.url}`;
110
+ xBadLinks.push(`${anchor.textContent} (${anchor.status} ${anchor.statusText} ${url})`);
111
+ }
112
+ }
113
+ else {
114
+ iLinks += 1;
115
+ if (!anchor.status || (anchor.status >= 400 && anchor.status !== 401)) {
116
+ const url = anchor.href === anchor.url ? anchor.url : `${anchor.href} => ${anchor.url}`;
117
+ iBadLinks.push(`${anchor.textContent} (${anchor.status} ${anchor.statusText} ${url})`);
118
+ }
119
+ }
120
+ }
121
+ const refers = await page.getReferrers();
122
+ let title = page.title;
123
+ const dirname = url.dirname || '/';
124
+ const parentDir = `/${url.paths.slice(0, -2).join('/')}`;
125
+ const dirTitle = url.isIndex
126
+ ? indexTitles.get(parentDir) || indexTitles.get(dirname)
127
+ : indexTitles.get(dirname) || indexTitles.get(parentDir);
128
+ if (dirTitle && title.includes(dirTitle)) {
129
+ title = title.replace(dirTitle, '').replaceAll(/\|||/g, '').trim();
130
+ if (!title) {
131
+ title = page.title;
132
+ }
133
+ }
134
+ const parentRefs = indexRefs.get(dirname);
135
+ if (url.isIndex) {
136
+ indexTitles.set(dirname, page.title);
137
+ if (parentRefs) {
138
+ parentRefs.push({
139
+ basename: url.basename,
140
+ referrers: refers,
141
+ });
142
+ // return;
143
+ }
144
+ else {
145
+ indexRefs.set(dirname, [
146
+ {
147
+ basename: url.basename,
148
+ referrers: refers,
149
+ },
150
+ ]);
151
+ }
152
+ }
153
+ const isRoot = url.dirname == null;
154
+ const depth = isRoot ? 0 : url.depth - (url.isIndex ? 1 : 0);
155
+ const data = [
156
+ createCellData({
157
+ value: title,
158
+ cellFormat: { padding: { left: Math.max(depth, 0) * 20 + 3 } },
159
+ note: `Full-title:\n${page.title}`,
160
+ }, defaultCellFormat),
161
+ createCellData({ value: page.title }, defaultCellFormat),
162
+ createCellData({
163
+ value: page.url.href,
164
+ textFormat: { link: { uri: page.url.href } },
165
+ }, defaultCellFormat),
166
+ createCellData({ value: url.protocol }, defaultCellFormat),
167
+ createCellData({ value: url.hostname }, defaultCellFormat),
168
+ createCellData({ value: path1 || null }, defaultCellFormat),
169
+ createCellData({ value: path2 || null }, defaultCellFormat),
170
+ createCellData({ value: path3 || null }, defaultCellFormat),
171
+ createCellData({ value: path4 || null }, defaultCellFormat),
172
+ createCellData({ value: path5 || null }, defaultCellFormat),
173
+ createCellData({ value: path6 || null }, defaultCellFormat),
174
+ createCellData({ value: path7 || null }, defaultCellFormat),
175
+ createCellData({ value: path8 || null }, defaultCellFormat),
176
+ createCellData({ value: path9 || null }, defaultCellFormat),
177
+ createCellData({ value: path10 || null }, defaultCellFormat),
178
+ createCellData({ value: page.status || -1 }, defaultCellFormat),
179
+ createCellData({
180
+ value: page.redirectFrom.length,
181
+ note: page.redirectFrom.map((r) => r.url).join('\n'),
182
+ }, defaultCellFormat),
183
+ createCellData({ value: page.lang || 'N/A' }, defaultCellFormat),
184
+ createCellData({ value: iLinks }, defaultCellFormat),
185
+ createCellData({ value: iBadLinks.length, note: iBadLinks.join('\n') }, defaultCellFormat),
186
+ createCellData({ value: xLinks }, defaultCellFormat),
187
+ createCellData({ value: xBadLinks.length, note: xBadLinks.join('\n') }, defaultCellFormat),
188
+ createCellData(() => ({
189
+ value: url.isIndex && parentRefs
190
+ ? parentRefs.reduce((prev, ref) => prev + ref.referrers.length, 0)
191
+ : refers.length,
192
+ note: url.isIndex && parentRefs
193
+ ? parentRefs
194
+ .map((ref) => `[[/${ref.basename || ''}]]\n${ref.referrers
195
+ .map((ref) => ref.url)
196
+ .join('\n')}`)
197
+ .join('\n\n')
198
+ : refers.map((ref) => ref.url).join('\n'),
199
+ }), defaultCellFormat),
200
+ createCellData({ value: page.description }, defaultCellFormat),
201
+ createCellData({ value: page.keywords }, defaultCellFormat),
202
+ createCellData({ value: !!page.noindex }, defaultCellFormat),
203
+ createCellData({ value: !!page.nofollow }, defaultCellFormat),
204
+ createCellData({ value: !!page.noarchive }, defaultCellFormat),
205
+ createCellData({ value: page.canonical }, defaultCellFormat),
206
+ createCellData({ value: page.alternate }, defaultCellFormat),
207
+ createCellData({ value: page.twitter_card }, defaultCellFormat),
208
+ createCellData({ value: page.og_site_name }, defaultCellFormat),
209
+ createCellData({
210
+ value: page.og_url,
211
+ textFormat: { link: { uri: page.og_url } },
212
+ }, defaultCellFormat),
213
+ createCellData({ value: page.og_title }, defaultCellFormat),
214
+ createCellData({ value: page.og_description }, defaultCellFormat),
215
+ createCellData({ value: page.og_type }, defaultCellFormat),
216
+ createCellData({ value: page.og_image }, defaultCellFormat),
217
+ ];
218
+ for (const report of reportPageData) {
219
+ const tableData = report.pageData.data[page.url.href];
220
+ const options = report.pageData.options
221
+ ? report.pageData.options[page.url.href]
222
+ : null;
223
+ if (!tableData) {
224
+ reportLog("%s did'nt have table of %s", report.name, page.url);
225
+ continue;
226
+ }
227
+ reportLog('Add %s to table from %s', page.url.href, report.name);
228
+ data.push(...Object.keys(report.pageData.headers).map((key) => {
229
+ const option = options ? options[key] || null : null;
230
+ const data = tableData[key];
231
+ const format = {};
232
+ let note;
233
+ if (option) {
234
+ if (option.bold) {
235
+ format.bold = !!option.bold;
236
+ }
237
+ if (option.fontFamily != null) {
238
+ format.fontFamily = `${option.fontFamily}`;
239
+ }
240
+ if (option.fontSize != null) {
241
+ format.fontSize = +option.fontSize;
242
+ }
243
+ if (option.color != null) {
244
+ // format.foregroundColor = option.color;
245
+ }
246
+ if (option.italic != null) {
247
+ format.italic = !!option.italic;
248
+ }
249
+ if (option.strike != null) {
250
+ format.strikethrough = !!option.strike;
251
+ }
252
+ if (option.underline != null) {
253
+ format.underline = !!option.underline;
254
+ }
255
+ note = data?.note || `${option.note || ''}`;
256
+ }
257
+ const value = data?.value;
258
+ return createCellData({ value, textFormat: format, note, ifNull: false }, defaultCellFormat);
259
+ }));
260
+ }
261
+ return [data];
262
+ },
263
+ async updateSheet(sheet) {
264
+ await sheet.frozen(1, 1);
265
+ await sheet.conditionalFormat([
266
+ sheet.getColNumByHeaderName('Internal Bad Links'),
267
+ sheet.getColNumByHeaderName('External Bad Links'),
268
+ ], {
269
+ booleanRule: {
270
+ condition: {
271
+ type: 'NUMBER_NOT_EQ',
272
+ values: [
273
+ {
274
+ userEnteredValue: '0',
275
+ },
276
+ ],
277
+ },
278
+ format: booleanFormatError,
279
+ },
280
+ });
281
+ await sheet.conditionalFormat([sheet.getColNumByHeaderName('Language')], {
282
+ booleanRule: {
283
+ condition: {
284
+ type: 'TEXT_EQ',
285
+ values: [
286
+ {
287
+ userEnteredValue: 'N/A',
288
+ },
289
+ ],
290
+ },
291
+ format: booleanFormatError,
292
+ },
293
+ });
294
+ await sheet.conditionalFormat([sheet.getColNumByHeaderName('Internal Referrers')], {
295
+ booleanRule: {
296
+ condition: {
297
+ type: 'NUMBER_LESS',
298
+ values: [
299
+ {
300
+ userEnteredValue: '2',
301
+ },
302
+ ],
303
+ },
304
+ format: booleanFormatError,
305
+ },
306
+ });
307
+ await sheet.conditionalFormat([
308
+ sheet.getColNumByHeaderName('path1'),
309
+ sheet.getColNumByHeaderName('path2'),
310
+ sheet.getColNumByHeaderName('path3'),
311
+ sheet.getColNumByHeaderName('path4'),
312
+ sheet.getColNumByHeaderName('path5'),
313
+ sheet.getColNumByHeaderName('path6'),
314
+ sheet.getColNumByHeaderName('path7'),
315
+ sheet.getColNumByHeaderName('path8'),
316
+ sheet.getColNumByHeaderName('path9'),
317
+ sheet.getColNumByHeaderName('path10'),
318
+ ], {
319
+ booleanRule: {
320
+ condition: {
321
+ type: 'CUSTOM_FORMULA',
322
+ values: [
323
+ {
324
+ userEnteredValue: '=REGEXMATCH(INDIRECT(ADDRESS(ROW(),COLUMN())), "(?i)(^/_|_$|_copy|-copy|copy_|copy-|dummy)")',
325
+ },
326
+ ],
327
+ },
328
+ format: booleanFormatError,
329
+ },
330
+ });
331
+ await sheet.conditionalFormat([sheet.getColNumByHeaderName('Title')], {
332
+ booleanRule: {
333
+ condition: {
334
+ type: 'CUSTOM_FORMULA',
335
+ values: [
336
+ {
337
+ userEnteredValue: '=REGEXMATCH(INDIRECT(ADDRESS(ROW(),COLUMN())), "(?i)(^| )(401|403|404|500|501|502|503)")',
338
+ },
339
+ ],
340
+ },
341
+ format: booleanFormatError,
342
+ },
343
+ });
344
+ await sheet.conditionalFormat([sheet.getColNumByHeaderName('Protocol')], {
345
+ booleanRule: {
346
+ condition: {
347
+ type: 'TEXT_EQ',
348
+ values: [
349
+ {
350
+ userEnteredValue: 'http:',
351
+ },
352
+ ],
353
+ },
354
+ format: booleanFormatError,
355
+ },
356
+ });
357
+ await sheet.conditionalFormat([sheet.getColNumByHeaderName('Status Code')], {
358
+ booleanRule: {
359
+ condition: {
360
+ type: 'NUMBER_NOT_BETWEEN',
361
+ values: [
362
+ {
363
+ userEnteredValue: '200',
364
+ },
365
+ {
366
+ userEnteredValue: '399',
367
+ },
368
+ ],
369
+ },
370
+ format: booleanFormatError,
371
+ },
372
+ });
373
+ for (let i = maxDepth + 1; i <= 10; i++) {
374
+ const name = `path${i}`;
375
+ log('Hide col %s', name);
376
+ await sheet.hideCol(sheet.getColNumByHeaderName(name));
377
+ }
378
+ },
379
+ };
380
+ };
@@ -0,0 +1,18 @@
1
+ import type { CreateSheet } from '../sheets/types.js';
2
+ /**
3
+ * Creates the "Referrers Relational Table" sheet configuration.
4
+ *
5
+ * Produces a normalized many-to-many table linking each page to all
6
+ * pages that reference it (referrers). Each row represents one
7
+ * referrer-to-page relationship with the referrer's text content
8
+ * and the page's HTTP status info.
9
+ *
10
+ * This relational format (as opposed to the denormalized referrer
11
+ * column in "Links") enables pivot-table analysis and filtering
12
+ * in Google Sheets -- e.g. "which pages link to this 404 page?"
13
+ *
14
+ * Redirect chains are noted: when the referrer originally linked to
15
+ * a different URL that redirected to the current page, a
16
+ * `[REDIRECTED FROM]` note is added.
17
+ */
18
+ export declare const createReferrersRelationalTable: CreateSheet;
@@ -0,0 +1,96 @@
1
+ import { pLog } from '../debug.js';
2
+ import { createCellData } from '../sheets/create-cell-data.js';
3
+ import { defaultCellFormat } from '../sheets/default-cell-format.js';
4
+ import { booleanFormatError } from '../sheets/format.js';
5
+ const log = pLog.extend('ReferrersRelationalTable');
6
+ /**
7
+ * Creates the "Referrers Relational Table" sheet configuration.
8
+ *
9
+ * Produces a normalized many-to-many table linking each page to all
10
+ * pages that reference it (referrers). Each row represents one
11
+ * referrer-to-page relationship with the referrer's text content
12
+ * and the page's HTTP status info.
13
+ *
14
+ * This relational format (as opposed to the denormalized referrer
15
+ * column in "Links") enables pivot-table analysis and filtering
16
+ * in Google Sheets -- e.g. "which pages link to this 404 page?"
17
+ *
18
+ * Redirect chains are noted: when the referrer originally linked to
19
+ * a different URL that redirected to the current page, a
20
+ * `[REDIRECTED FROM]` note is added.
21
+ */
22
+ export const createReferrersRelationalTable = () => {
23
+ return {
24
+ name: 'Referrers Relational Table',
25
+ createHeaders() {
26
+ return [
27
+ //
28
+ 'Link (To)',
29
+ 'Referrer (From)',
30
+ 'Referrer Content',
31
+ 'Link Status Code',
32
+ 'Link Status Text',
33
+ 'Link Content Type',
34
+ ];
35
+ },
36
+ async eachPage(page, num, total) {
37
+ const p = Math.round((num / total) * 100);
38
+ log('Create relational table (%d%% %d/%d)', p, num, total);
39
+ const data = [];
40
+ const referrers = await page.getReferrers();
41
+ for (const ref of referrers) {
42
+ const text = ref.textContent || '__NO_TEXT_CONTENT__';
43
+ const url = ref.url + (ref.hash ? `#${ref.hash}` : '');
44
+ const pass = page.url.href === ref.through ? '' : `[REDIRECTED FROM] ${ref.through}`;
45
+ data.push([
46
+ createCellData({
47
+ value: page.url.href,
48
+ textFormat: { link: { uri: page.url.href } },
49
+ note: pass === '' ? undefined : pass,
50
+ }, defaultCellFormat),
51
+ createCellData({
52
+ value: url,
53
+ textFormat: { link: { uri: url } },
54
+ }, defaultCellFormat),
55
+ createCellData({ value: text }, defaultCellFormat),
56
+ createCellData({ value: page.status || -1 }, defaultCellFormat),
57
+ createCellData({ value: page.statusText || '' }, defaultCellFormat),
58
+ createCellData({ value: page.contentType || '' }, defaultCellFormat),
59
+ ]);
60
+ }
61
+ return data;
62
+ },
63
+ async updateSheet(sheet) {
64
+ await sheet.frozen(2, 1);
65
+ await sheet.conditionalFormat([sheet.getColNumByHeaderName('Link Status Code')], {
66
+ booleanRule: {
67
+ condition: {
68
+ type: 'NUMBER_GREATER_THAN_EQ',
69
+ values: [
70
+ {
71
+ userEnteredValue: '400',
72
+ },
73
+ ],
74
+ },
75
+ format: booleanFormatError,
76
+ },
77
+ });
78
+ await sheet.conditionalFormat([sheet.getColNumByHeaderName('Link Status Code')], {
79
+ booleanRule: {
80
+ condition: {
81
+ type: 'NUMBER_NOT_BETWEEN',
82
+ values: [
83
+ {
84
+ userEnteredValue: '200',
85
+ },
86
+ {
87
+ userEnteredValue: '399',
88
+ },
89
+ ],
90
+ },
91
+ format: booleanFormatError,
92
+ },
93
+ });
94
+ },
95
+ };
96
+ };
@@ -0,0 +1,14 @@
1
+ import type { CreateSheet } from '../sheets/types.js';
2
+ /**
3
+ * Creates the "Resources Relational Table" sheet configuration.
4
+ *
5
+ * Produces a normalized many-to-many table linking each network
6
+ * resource (CSS, JS, images, fonts, etc.) to the pages that
7
+ * reference it. Each row represents one page-to-resource
8
+ * relationship with the resource's HTTP status and size metadata.
9
+ *
10
+ * Unlike the "Resources" sheet which shows one row per resource
11
+ * with a referrer count, this relational table enables filtering
12
+ * and pivot analysis -- e.g. "which pages load this broken CSS file?"
13
+ */
14
+ export declare const createResourcesRelationalTable: CreateSheet;
@@ -0,0 +1,84 @@
1
+ import { pLog } from '../debug.js';
2
+ import { createCellData } from '../sheets/create-cell-data.js';
3
+ import { defaultCellFormat } from '../sheets/default-cell-format.js';
4
+ import { booleanFormatError } from '../sheets/format.js';
5
+ const log = pLog.extend('ReferrersRelationalTable');
6
+ /**
7
+ * Creates the "Resources Relational Table" sheet configuration.
8
+ *
9
+ * Produces a normalized many-to-many table linking each network
10
+ * resource (CSS, JS, images, fonts, etc.) to the pages that
11
+ * reference it. Each row represents one page-to-resource
12
+ * relationship with the resource's HTTP status and size metadata.
13
+ *
14
+ * Unlike the "Resources" sheet which shows one row per resource
15
+ * with a referrer count, this relational table enables filtering
16
+ * and pivot analysis -- e.g. "which pages load this broken CSS file?"
17
+ */
18
+ export const createResourcesRelationalTable = () => {
19
+ return {
20
+ name: 'Resources Relational Table',
21
+ createHeaders() {
22
+ return [
23
+ //
24
+ 'Referred Page (From)',
25
+ 'Resource (To)',
26
+ 'Resource Status Code',
27
+ 'Resource Status Text',
28
+ 'Resource Content Type',
29
+ 'Resource Size',
30
+ ];
31
+ },
32
+ async eachResource(resource) {
33
+ log(`Read: Resource referrers (Search: ${resource.url})`);
34
+ const data = [];
35
+ const referrers = await resource.getReferrers();
36
+ for (const url of referrers) {
37
+ data.push([
38
+ createCellData({
39
+ value: url,
40
+ textFormat: { link: { uri: url } },
41
+ }, defaultCellFormat),
42
+ createCellData({ value: resource.url }, defaultCellFormat),
43
+ createCellData({ value: resource.status }, defaultCellFormat),
44
+ createCellData({ value: resource.statusText }, defaultCellFormat),
45
+ createCellData({ value: resource.contentType }, defaultCellFormat),
46
+ createCellData({ value: resource.contentLength }, defaultCellFormat),
47
+ ]);
48
+ }
49
+ return data;
50
+ },
51
+ async updateSheet(sheet) {
52
+ await sheet.frozen(2, 1);
53
+ await sheet.conditionalFormat([sheet.getColNumByHeaderName('Resource Status Code')], {
54
+ booleanRule: {
55
+ condition: {
56
+ type: 'NUMBER_GREATER_THAN_EQ',
57
+ values: [
58
+ {
59
+ userEnteredValue: '400',
60
+ },
61
+ ],
62
+ },
63
+ format: booleanFormatError,
64
+ },
65
+ });
66
+ await sheet.conditionalFormat([sheet.getColNumByHeaderName('Resource Status Code')], {
67
+ booleanRule: {
68
+ condition: {
69
+ type: 'NUMBER_NOT_BETWEEN',
70
+ values: [
71
+ {
72
+ userEnteredValue: '200',
73
+ },
74
+ {
75
+ userEnteredValue: '399',
76
+ },
77
+ ],
78
+ },
79
+ format: booleanFormatError,
80
+ },
81
+ });
82
+ },
83
+ };
84
+ };
@@ -0,0 +1,13 @@
1
+ import type { CreateSheet } from '../sheets/types.js';
2
+ /**
3
+ * Creates the "Resources" sheet configuration.
4
+ *
5
+ * Lists all network resources (CSS, JS, images, fonts, etc.) discovered
6
+ * during crawling, with one row per unique resource URL. Each row includes
7
+ * HTTP status, content type, size, and the number of pages that reference
8
+ * the resource (with URLs listed in the cell note).
9
+ *
10
+ * Uses `eachResource` (Phase 3) to iterate over the archive's resource
11
+ * table rather than page-by-page iteration.
12
+ */
13
+ export declare const createResources: CreateSheet;