lint-wiki-dumps 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,59 @@
1
+ [![npm version](https://badge.fury.io/js/lint-wiki-dumps.svg)](https://www.npmjs.com/package/lint-wiki-dumps)
2
+ [![CodeQL](https://github.com/bhsd-harry/lint-wiki-dumps/actions/workflows/codeql.yml/badge.svg)](https://github.com/bhsd-harry/lint-wiki-dumps/actions/workflows/codeql.yml)
3
+ [![Codacy Badge](https://app.codacy.com/project/badge/Grade/12aacc9d0f3e4629ae96114f7c40cf31)](https://app.codacy.com/gh/bhsd-harry/lint-wiki-dumps/dashboard)
4
+
5
+ # Lint-Wiki-Dumps
6
+
7
+ This is a tool for linting Wikitext articles from Wikipedia with the help of [WikiLint](https://www.npmjs.com/package/wikilint). It will download the latest dump of a specified Wikipedia language edition and then lint the articles in the dump.
8
+
9
+ ## Installation
10
+
11
+ To run this tool, you need to have [curl](https://curl.se/) installed on your system. You can install this tool via npm:
12
+
13
+ ```sh
14
+ npm i lint-wiki-dumps
15
+ ```
16
+
17
+ You can also install its optional dependency [vscode-css-languageservice](https://npmjs.com/package/vscode-css-languageservice) to lint inline CSS in Wikitext:
18
+
19
+ ```sh
20
+ npm i vscode-css-languageservice
21
+ ```
22
+
23
+ ## Usage
24
+
25
+ ```sh
26
+ npx lint-wiki-dumps <language> <path to download directory>
27
+ # For example:
28
+ npx lint-wiki-dumps zh-yue ~/Downloads/dumps
29
+ ```
30
+
31
+ or execute the Bash script `scan.sh` directly:
32
+
33
+ ```sh
34
+ bash scan.sh <language> <path to download>
35
+ # For example:
36
+ bash scan.sh zh-yue ~/Downloads/dumps
37
+ ```
38
+
39
+ ## Advanced Usage
40
+
41
+ If you have already downloaded the dump, you can scan the dump directly and generate JSON reports:
42
+
43
+ ```sh
44
+ node parser.js <language> <path to dump>
45
+ # For example:
46
+ node parser.js zh-yue ~/Downloads/dumps/zh-yuewiki-lastest-pages-articles.xml.bz2
47
+ ```
48
+
49
+ To generate HTML reports, you can use the following command:
50
+
51
+ ```sh
52
+ node report.js <language>
53
+ # For example:
54
+ node report.js zh-yue
55
+ ```
56
+
57
+ ## Report
58
+
59
+ The tool will generate reports in two formats: JSON and HTML. The JSON report will be saved in the `results` folder, while the HTML report will be available at `reports/index.html`.
package/package.json ADDED
@@ -0,0 +1,71 @@
1
+ {
2
+ "name": "lint-wiki-dumps",
3
+ "version": "0.0.0",
4
+ "description": "Lint Wikipedia dumps",
5
+ "keywords": [
6
+ "lint",
7
+ "wikipedia"
8
+ ],
9
+ "homepage": "https://github.com/bhsd-harry/lint-wiki-dumps#readme",
10
+ "bugs": {
11
+ "url": "https://github.com/bhsd-harry/lint-wiki-dumps/issues"
12
+ },
13
+ "license": "GPL-3.0",
14
+ "author": "Bhsd",
15
+ "files": [
16
+ "scan.sh",
17
+ "*.js",
18
+ "reports/*.html",
19
+ "reports/dist/",
20
+ "reports/*.css"
21
+ ],
22
+ "bin": {
23
+ "lint-wiki-dumps": "scan.sh"
24
+ },
25
+ "repository": {
26
+ "type": "git",
27
+ "url": "git+https://github.com/bhsd-harry/lint-wiki-dumps.git"
28
+ },
29
+ "scripts": {
30
+ "prepublishOnly": "npm run build",
31
+ "build": "tsc && mv dist/* . && tsc --project reports/tsconfig.json",
32
+ "lint": "tsc --noEmit && tsc --project reports/tsconfig.json --noEmit && eslint --cache ."
33
+ },
34
+ "dependencies": {
35
+ "@bhsd/common": "^0.9.0",
36
+ "chalk": "^4.1.2",
37
+ "unbzip2-stream": "^1.4.3",
38
+ "wikilint": "^2.18.3",
39
+ "xml-stream": "^0.4.5"
40
+ },
41
+ "optionalDependencies": {
42
+ "vscode-css-languageservice": "^6.3.2"
43
+ },
44
+ "devDependencies": {
45
+ "@stylistic/eslint-plugin": "^3.1.0",
46
+ "@stylistic/stylelint-plugin": "^3.1.2",
47
+ "@types/mocha": "^10.0.10",
48
+ "@types/node": "^22.13.1",
49
+ "@types/unbzip2-stream": "^1.4.3",
50
+ "@typescript-eslint/eslint-plugin": "^8.23.0",
51
+ "@typescript-eslint/parser": "^8.23.0",
52
+ "esbuild": "^0.25.0",
53
+ "eslint": "^8.57.1",
54
+ "eslint-plugin-es-x": "^8.4.1",
55
+ "eslint-plugin-eslint-comments": "^3.2.0",
56
+ "eslint-plugin-jsdoc": "^50.6.3",
57
+ "eslint-plugin-json-es": "^1.6.0",
58
+ "eslint-plugin-markdown": "4.0.1",
59
+ "eslint-plugin-n": "^17.15.1",
60
+ "eslint-plugin-promise": "^7.2.1",
61
+ "eslint-plugin-regexp": "^2.7.0",
62
+ "eslint-plugin-unicorn": "^56.0.1",
63
+ "http-server": "^14.1.1",
64
+ "mocha": "^11.1.0",
65
+ "stylelint": "^16.14.1",
66
+ "typescript": "^5.7.3"
67
+ },
68
+ "engines": {
69
+ "node": ">=18.17.0"
70
+ }
71
+ }
package/parser.js ADDED
@@ -0,0 +1,100 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ const fs_1 = __importDefault(require("fs"));
7
+ const path_1 = __importDefault(require("path"));
8
+ const os_1 = __importDefault(require("os"));
9
+ const perf_hooks_1 = require("perf_hooks");
10
+ const chalk_1 = __importDefault(require("chalk"));
11
+ const unbzip2_stream_1 = __importDefault(require("unbzip2-stream"));
12
+ const xml_stream_1 = __importDefault(require("xml-stream"));
13
+ const wikilint_1 = __importDefault(require("wikilint"));
14
+ const common_1 = require("@bhsd/common");
15
+ const n = Number(process.argv[4]) || Infinity, [, , site, file, , restart] = process.argv;
16
+ wikilint_1.default.config = `${site}wiki`;
17
+ if (!fs_1.default.existsSync('results')) {
18
+ fs_1.default.mkdirSync('results');
19
+ }
20
+ const stream = new xml_stream_1.default(fs_1.default.createReadStream(file.replace(/^~/u, os_1.default.homedir())).pipe((0, unbzip2_stream_1.default)())), output = path_1.default.join('results', `${site}.json`);
21
+ let old;
22
+ try {
23
+ old = require(`./${output}`); // eslint-disable-line @typescript-eslint/no-require-imports
24
+ }
25
+ catch { }
26
+ const time = old?.['#timestamp'], last = time && new Date(time), results = fs_1.default.createWriteStream(output, { flags: restart ? 'a' : 'w' }), ignore = new Set(['no-arg', 'url-encoding', 'h1', 'var-anchor']);
27
+ let i = 0, latest = last, failed = 0, comma = restart ? ',' : '', stopping = false, restarted = !restart, worst;
28
+ stream.preserve('text', true);
29
+ if (!restart) {
30
+ results.write('{');
31
+ }
32
+ results.on('close', () => {
33
+ process.exit(); // eslint-disable-line n/no-process-exit
34
+ });
35
+ const stop = () => {
36
+ stopping = true;
37
+ console.timeEnd('parse');
38
+ console.log(chalk_1.default.green(`Parsed ${i} pages`));
39
+ if (failed) {
40
+ console.error(chalk_1.default.red(`${failed} pages failed to parse`));
41
+ }
42
+ if (worst) {
43
+ console.info(chalk_1.default.yellow(`Worst page: ${worst.title} (${worst.duration.toFixed(3)} ms)`));
44
+ }
45
+ results.write(`${comma}\n"#timestamp": ${JSON.stringify(latest)}\n}`);
46
+ results.close();
47
+ };
48
+ const newEntry = (title, errors) => {
49
+ results.write(`${comma}\n${JSON.stringify(title)}: ${JSON.stringify(errors, null, '\t')}`);
50
+ comma ||= ',';
51
+ };
52
+ console.time('parse');
53
+ stream.on('endElement: page', ({ title, ns, revision: { model, timestamp, text: { $text } } }) => {
54
+ if (i === n) {
55
+ if (!stopping) {
56
+ stop();
57
+ }
58
+ }
59
+ else if (restarted && model === 'wikitext' && $text && ns === '0') {
60
+ (0, common_1.refreshStdout)(`${i++} ${title}`);
61
+ const date = new Date(timestamp);
62
+ if (last && date <= last) {
63
+ const previous = old[title];
64
+ if (previous) {
65
+ newEntry(title, previous);
66
+ }
67
+ }
68
+ else {
69
+ latest = !latest || date > latest ? date : latest;
70
+ try {
71
+ const start = perf_hooks_1.performance.now(), errors = wikilint_1.default.parse($text).lint()
72
+ .filter(({ severity, rule }) => severity === 'error' && !ignore.has(rule)), duration = perf_hooks_1.performance.now() - start;
73
+ if (errors.length > 0) {
74
+ newEntry(title, errors.map(({ severity, suggestions, fix, ...e }) => ({
75
+ ...e,
76
+ ...suggestions && {
77
+ suggestions: suggestions.map(action => ({
78
+ ...action,
79
+ original: $text.slice(...action.range),
80
+ })),
81
+ },
82
+ ...fix && { fix: { ...fix, original: $text.slice(...fix.range) } },
83
+ excerpt: $text.slice(e.startIndex, e.endIndex),
84
+ })));
85
+ }
86
+ if (!worst || duration > worst.duration) {
87
+ worst = { title, duration };
88
+ }
89
+ }
90
+ catch (e) {
91
+ console.error(chalk_1.default.red(`Error parsing ${title}`), e);
92
+ failed++;
93
+ }
94
+ }
95
+ }
96
+ else if (title === restart) {
97
+ restarted = true;
98
+ }
99
+ });
100
+ stream.on('end', stop);
package/report.js ADDED
@@ -0,0 +1,71 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ const fs_1 = __importDefault(require("fs"));
7
+ const path_1 = __importDefault(require("path"));
8
+ const crypto_1 = require("crypto");
9
+ const chalk_1 = __importDefault(require("chalk"));
10
+ const [, , lang] = process.argv;
11
+ const mkdir = (dir) => {
12
+ if (!fs_1.default.existsSync(dir)) {
13
+ fs_1.default.mkdirSync(dir);
14
+ }
15
+ };
16
+ const dataDir = path_1.default.join('reports', 'data');
17
+ mkdir(dataDir);
18
+ const writeJS = (data, file) => {
19
+ fs_1.default.writeFileSync(path_1.default.join(dataDir, `${file}.js`), `window.data=${JSON.stringify(data)}`);
20
+ };
21
+ const dir = fs_1.default.readdirSync('results'), summary = [], MAX = 100;
22
+ for (const file of dir) {
23
+ try {
24
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
25
+ const data = require(`./results/${file}`);
26
+ const site = file.slice(0, -5);
27
+ summary.push(site);
28
+ delete data['#timestamp'];
29
+ if (!lang || lang === site) {
30
+ const siteDir = path_1.default.join(dataDir, site);
31
+ mkdir(siteDir);
32
+ // wiki
33
+ const values = Object.values(data), rules = [...new Set(values.flat().map(({ rule }) => rule))].sort((a, b) => a.localeCompare(b)), wiki = [];
34
+ for (const rule of rules) {
35
+ wiki.push([rule, values.filter(errors => errors.some(({ rule: r }) => r === rule)).length]);
36
+ // rule
37
+ const articles = Object.entries(data).filter(([, errors]) => errors.some(({ rule: r }) => r === rule))
38
+ .sort(([a], [b]) => a.localeCompare(b))
39
+ .map(([page, errors]) => {
40
+ const { startLine, startCol, message, excerpt } = errors.find(({ rule: r }) => r === rule);
41
+ return [page, startLine + 1, startCol + 1, message, excerpt.slice(0, MAX * 0.8)];
42
+ }), batches = Math.ceil(articles.length / 200);
43
+ for (let i = 0; i < batches; i++) {
44
+ writeJS({
45
+ articles: articles.slice(i * 200, (i + 1) * 200),
46
+ batches,
47
+ }, path_1.default.join(site, `${rule}-${i}`));
48
+ }
49
+ }
50
+ writeJS(wiki, path_1.default.join(site, 'index'));
51
+ // article
52
+ const articlesDir = path_1.default.join(siteDir, 'pages');
53
+ mkdir(articlesDir);
54
+ for (const [page, errors] of Object.entries(data)) {
55
+ const hash = (0, crypto_1.createHash)('sha256').update(page).digest('hex')
56
+ .slice(0, 8), info = errors.map(({ startLine, startCol, rule, message, excerpt, }) => [
57
+ rule,
58
+ startLine + 1,
59
+ startCol + 1,
60
+ message,
61
+ excerpt.slice(0, MAX),
62
+ ]);
63
+ writeJS(info, path_1.default.join(site, 'pages', hash));
64
+ }
65
+ }
66
+ }
67
+ catch {
68
+ console.error(chalk_1.default.red(`Failed to read ${file}`));
69
+ }
70
+ }
71
+ writeJS(summary, 'index');
@@ -0,0 +1,47 @@
1
+ <!DOCTYPE html>
2
+ <html dir="ltr" lang="en-US">
3
+ <head>
4
+ <title>Diagnostic Report for Wikipedia</title>
5
+ <meta charset="utf-8">
6
+ <meta name="viewport" content="initial-scale=1.0, user-scalable=yes, minimum-scale=0.25, maximum-scale=5.0, width=device-width">
7
+ <link rel="stylesheet" href="reports.css">
8
+ <style>td:nth-child(-n+3){text-align:center}</style>
9
+ <script defer="" src="dist/article.js"></script>
10
+ </head>
11
+ <body>
12
+ <main>
13
+ <article>
14
+ <div id="container">
15
+ <div>
16
+ <a href="./index.html">Homepage</a>
17
+ </div>
18
+ <div>
19
+ <a id="wiki" href="./wiki.html">Wikipedia</a>
20
+ </div>
21
+ </div>
22
+ <h2>
23
+ Diagnostic Report for
24
+ <a id="article">Article</a>
25
+ </h2>
26
+ <table style="table-layout:fixed;width:100%">
27
+ <colgroup>
28
+ <col span="1">
29
+ <col span="1" style="width:calc(1.6em + 7ch)">
30
+ <col span="1" style="width:calc(1.6em + 7ch)">
31
+ <col span="1" style="width:20%">
32
+ <col span="1" style="width:50%">
33
+ </colgroup>
34
+ <tbody>
35
+ <tr>
36
+ <th>Description</th>
37
+ <th>Line</th>
38
+ <th>Column</th>
39
+ <th>Detail</th>
40
+ <th>Notice</th>
41
+ </tr>
42
+ </tbody>
43
+ </table>
44
+ </article>
45
+ </main>
46
+ </body>
47
+ </html>
@@ -0,0 +1,30 @@
1
+ "use strict";
2
+ (async () => {
3
+ const search = new URLSearchParams(location.search), page = search.get('page'), lang = search.get('lang'), buffer = await crypto.subtle.digest('SHA-256', new TextEncoder().encode(page)), hash = [...new Uint8Array(buffer)].slice(0, 4)
4
+ .map(b => b.toString(16).padStart(2, '0'))
5
+ .join(''), title = document.querySelector('title'), h2 = document.getElementById('article'), wiki = document.getElementById('wiki'), tbody = document.querySelector('tbody'), script = document.createElement('script');
6
+ title.textContent = title.textContent.replace('Wikipedia', page);
7
+ h2.textContent = page;
8
+ h2.href = `https://${lang}.wikipedia.org/wiki/${encodeURIComponent(page)}?redirect=no`;
9
+ wiki.textContent = `${lang}wiki`;
10
+ wiki.href += `?lang=${lang}`;
11
+ script.src = `./data/${lang}/pages/${hash}.js`;
12
+ script.addEventListener('load', () => {
13
+ for (const entry of data) {
14
+ const [rule, startLine, startCol, message, excerpt] = entry, tr = document.createElement('tr'), description = document.createElement('td'), line = document.createElement('td'), column = document.createElement('td'), detail = document.createElement('td'), notice = document.createElement('td'), descriptionLink = document.createElement('a');
15
+ descriptionLink.textContent = rule;
16
+ descriptionLink.href = `./rule.html?lang=${lang}&rule=${rule}`;
17
+ description.className = 'excerpt';
18
+ description.append(descriptionLink);
19
+ line.textContent = String(startLine);
20
+ column.textContent = String(startCol);
21
+ detail.textContent = message;
22
+ detail.className = 'excerpt';
23
+ notice.textContent = excerpt;
24
+ notice.className = 'excerpt mono';
25
+ tr.append(description, line, column, detail, notice);
26
+ tbody.append(tr);
27
+ }
28
+ });
29
+ document.head.append(script);
30
+ })();
@@ -0,0 +1,15 @@
1
+ "use strict";
2
+ (() => {
3
+ const container = document.getElementById('container'), script = document.createElement('script');
4
+ script.src = './data/index.js';
5
+ script.addEventListener('load', () => {
6
+ container.append(...window.data.map(lang => {
7
+ const div = document.createElement('div'), a = document.createElement('a');
8
+ a.href = `./wiki.html?lang=${lang}`;
9
+ a.innerText = `${lang}.wikipedia.org`;
10
+ div.append(a);
11
+ return div;
12
+ }));
13
+ });
14
+ document.head.append(script);
15
+ })();
@@ -0,0 +1,48 @@
1
+ "use strict";
2
+ (() => {
3
+ const search = new URLSearchParams(location.search), lang = search.get('lang'), rule = search.get('rule'), batch = Math.floor(Number(search.get('start') || 0) / 200), endStr = String((batch + 1) * 200), prev = document.getElementById('prev'), next = document.getElementById('next'), start = document.getElementById('start'), end = document.getElementById('end'), title = document.querySelector('title'), h2 = document.querySelector('h2'), wiki = document.getElementById('wiki'), tbody = document.querySelector('tbody'), script = document.createElement('script');
4
+ h2.textContent = h2.textContent.replace('Wikipedia', `${lang}.wikipedia.org: ${rule}`);
5
+ title.textContent = title.textContent.replace('Wikipedia', `${lang}.wikipedia.org`);
6
+ wiki.textContent = `${lang}wiki`;
7
+ wiki.href += `?lang=${lang}`;
8
+ if (batch === 0) {
9
+ prev.removeAttribute('href');
10
+ }
11
+ else {
12
+ start.textContent = String(batch * 200 + 1);
13
+ end.textContent = endStr;
14
+ search.set('start', String((batch - 1) * 200));
15
+ prev.href = `${location.pathname}?${search}`;
16
+ }
17
+ search.set('start', endStr);
18
+ next.href = `${location.pathname}?${search}`;
19
+ script.src = `./data/${lang}/${rule}-${batch}.js`;
20
+ script.addEventListener('load', () => {
21
+ if (data.batches === batch + 1) {
22
+ next.removeAttribute('href');
23
+ end.textContent = String(batch * 200 + data.articles.length);
24
+ }
25
+ for (const [page, startLine, startCol, message, excerpt] of data.articles) {
26
+ const tr = document.createElement('tr'), article = document.createElement('td'), edit = document.createElement('td'), line = document.createElement('td'), column = document.createElement('td'), detail = document.createElement('td'), notice = document.createElement('td'), more = document.createElement('td'), articleLink = document.createElement('a'), editLink = document.createElement('a'), moreLink = document.createElement('a');
27
+ articleLink.textContent = page;
28
+ articleLink.href = `https://${lang}.wikipedia.org/wiki/${encodeURIComponent(page)}?redirect=no`;
29
+ article.className = 'excerpt';
30
+ article.append(articleLink);
31
+ editLink.textContent = 'edit';
32
+ editLink.href = `https://${lang}.wikipedia.org/wiki/${encodeURIComponent(page)}?action=edit`;
33
+ edit.append(editLink);
34
+ line.textContent = String(startLine);
35
+ column.textContent = String(startCol);
36
+ detail.textContent = message;
37
+ detail.className = 'excerpt';
38
+ notice.textContent = excerpt;
39
+ notice.className = 'excerpt mono';
40
+ moreLink.textContent = 'more';
41
+ moreLink.href = `./article.html?lang=${lang}&page=${encodeURIComponent(page)}`;
42
+ more.append(moreLink);
43
+ tr.append(article, edit, line, column, detail, notice, more);
44
+ tbody.append(tr);
45
+ }
46
+ });
47
+ document.head.append(script);
48
+ })();
@@ -0,0 +1,19 @@
1
+ "use strict";
2
+ (() => {
3
+ const lang = new URLSearchParams(location.search).get('lang'), script = document.createElement('script'), title = document.querySelector('title'), h2 = document.querySelector('h2'), tbody = document.querySelector('tbody');
4
+ h2.textContent = h2.textContent.replace('Wikipedia', `${lang}.wikipedia.org`);
5
+ title.textContent = title.textContent.replace('Wikipedia', `${lang}.wikipedia.org`);
6
+ script.src = `./data/${lang}/index.js`;
7
+ script.addEventListener('load', () => {
8
+ for (const [rule, count] of data) {
9
+ const tr = document.createElement('tr'), description = document.createElement('td'), pages = document.createElement('td'), a = document.createElement('a');
10
+ a.textContent = rule;
11
+ a.href = `./rule.html?lang=${lang}&rule=${rule}`;
12
+ description.append(a);
13
+ pages.textContent = String(count);
14
+ tr.append(description, pages);
15
+ tbody.append(tr);
16
+ }
17
+ });
18
+ document.head.append(script);
19
+ })();
@@ -0,0 +1,18 @@
1
+ <!DOCTYPE html>
2
+ <html dir="ltr" lang="en-US">
3
+ <head>
4
+ <title>Diagnostic Report for Wikipedia Dumps</title>
5
+ <meta charset="utf-8">
6
+ <meta name="viewport" content="initial-scale=1.0, user-scalable=yes, minimum-scale=0.25, maximum-scale=5.0, width=device-width">
7
+ <link rel="stylesheet" href="reports.css">
8
+ <script defer="" src="dist/index.js"></script>
9
+ </head>
10
+ <body>
11
+ <main>
12
+ <article>
13
+ <h2>Please choose the language edition.</h2>
14
+ <div id="container"></div>
15
+ </article>
16
+ </main>
17
+ </body>
18
+ </html>
@@ -0,0 +1,79 @@
1
+ html, body, div, article, main {
2
+ margin: 0;
3
+ padding: 0;
4
+ border: 0;
5
+ font: inherit;
6
+ vertical-align: baseline;
7
+ }
8
+ :root {
9
+ font-size: 14px;
10
+ font-family: sans-serif;
11
+ }
12
+ body {
13
+ display: flex;
14
+ flex-direction: column;
15
+ align-items: center;
16
+ background-color: #f0f0f0;
17
+ line-height: 1.6;
18
+ }
19
+ main {
20
+ display: flex;
21
+ width: 90vw;
22
+ max-width: 1200px;
23
+ margin: 3rem 0;
24
+ background-color: #fff;
25
+ box-shadow: 0 0 .5rem rgba(0, 0, 0, .1);
26
+ border-radius: .5rem;
27
+ }
28
+ article {
29
+ display: block;
30
+ width: 100%;
31
+ padding: 1rem 2rem 2rem;
32
+ }
33
+ h2 {
34
+ font-size: 1.3em;
35
+ margin: .5rem 0;
36
+ font-weight: bold;
37
+ text-align: center;
38
+ }
39
+ #container, #nav {
40
+ font-size: 1.3em;
41
+ text-align: center;
42
+ }
43
+ #container > div {
44
+ margin: .1em 0;
45
+ }
46
+ table {
47
+ margin: 1em auto;
48
+ background-color: #f8f9fa;
49
+ color: #202122;
50
+ border: 1px solid #a2a9b1;
51
+ border-collapse: collapse;
52
+ }
53
+ th, td {
54
+ border: 1px solid #a2a9b1;
55
+ padding: .4em .8em;
56
+ }
57
+ th {
58
+ background-color: #eaecf0;
59
+ text-align: center;
60
+ }
61
+ .excerpt {
62
+ overflow: hidden;
63
+ white-space: nowrap;
64
+ text-overflow: ellipsis;
65
+ }
66
+ .mono {
67
+ font-family: monospace;
68
+ }
69
+
70
+ @media screen and (max-width: 720px) {
71
+ main {
72
+ width: 100%;
73
+ margin: 0;
74
+ border-radius: 0;
75
+ }
76
+ article {
77
+ padding: 1rem;
78
+ }
79
+ }
@@ -0,0 +1,55 @@
1
+ <!DOCTYPE html>
2
+ <html dir="ltr" lang="en-US">
3
+ <head>
4
+ <title>Diagnostic Report for Wikipedia</title>
5
+ <meta charset="utf-8">
6
+ <meta name="viewport" content="initial-scale=1.0, user-scalable=yes, minimum-scale=0.25, maximum-scale=5.0, width=device-width">
7
+ <link rel="stylesheet" href="reports.css">
8
+ <style>td:nth-child(-n+4):not(:first-child),td:last-child{text-align:center}</style>
9
+ <script defer="" src="dist/rule.js"></script>
10
+ </head>
11
+ <body>
12
+ <main>
13
+ <article>
14
+ <div id="container">
15
+ <div>
16
+ <a href="./index.html">Homepage</a>
17
+ </div>
18
+ <div>
19
+ <a id="wiki" href="./wiki.html">Wikipedia</a>
20
+ </div>
21
+ </div>
22
+ <h2>Diagnostic Report for Wikipedia</h2>
23
+ <div id="nav">
24
+ <a href="#" id="prev" title="Previous">←</a>
25
+ <span id="start">0</span>
26
+ ~
27
+ <span id="end">200</span>
28
+ <a href="#" id="next" title="Next">→</a>
29
+ </div>
30
+ <table style="table-layout:fixed;width:100%">
31
+ <colgroup>
32
+ <col span="1">
33
+ <col span="1" style="width:calc(1.6em + 5ch)">
34
+ <col span="1" style="width:calc(1.6em + 7ch)">
35
+ <col span="1" style="width:calc(1.6em + 7ch)">
36
+ <col span="1" style="width:20%">
37
+ <col span="1" style="width:40%">
38
+ <col span="1" style="width:calc(1.6em + 5ch)">
39
+ </colgroup>
40
+ <tbody>
41
+ <tr>
42
+ <th>Article</th>
43
+ <th>Edit</th>
44
+ <th>Line</th>
45
+ <th>Column</th>
46
+ <th>Detail</th>
47
+ <th>Notice</th>
48
+ <th>More</th>
49
+ </tr>
50
+ </tbody>
51
+ </table>
52
+ </article>
53
+ </main>
54
+ </body>
55
+ </html>
@@ -0,0 +1,28 @@
1
+ <!DOCTYPE html>
2
+ <html dir="ltr" lang="en-US">
3
+ <head>
4
+ <title>Diagnostic Report for Wikipedia</title>
5
+ <meta charset="utf-8">
6
+ <meta name="viewport" content="initial-scale=1.0, user-scalable=yes, minimum-scale=0.25, maximum-scale=5.0, width=device-width">
7
+ <link rel="stylesheet" href="reports.css">
8
+ <script defer="" src="dist/wiki.js"></script>
9
+ </head>
10
+ <body>
11
+ <main>
12
+ <article>
13
+ <div id="container">
14
+ <a href="./index.html">Homepage</a>
15
+ </div>
16
+ <h2>Diagnostic Report for Wikipedia</h2>
17
+ <table style="text-align:center">
18
+ <tbody>
19
+ <tr>
20
+ <th>Description</th>
21
+ <th>Pages</th>
22
+ </tr>
23
+ </tbody>
24
+ </table>
25
+ </article>
26
+ </main>
27
+ </body>
28
+ </html>
package/scan.sh ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/local/bin/bash
2
+ if (( $# < 2 ))
3
+ then
4
+ echo 'Usage: npx lint-wiki-dumps <language> <path to download>'
5
+ echo 'Example: npx lint-wiki-dumps zh-yue ~/Downloads/dumps'
6
+ exit 1
7
+ fi
8
+ site="${1}wiki" # example: zh-yuewiki
9
+ target="${1//-/_}wiki" # example: zh_yuewiki
10
+ file="${target}-latest-pages-articles.xml.bz2"
11
+ if (( $# < 3 ))
12
+ then
13
+ curl --output-dir "$2" -O "https://dumps.wikimedia.org/$target/latest/$file"
14
+ npx getParserConfig "$site" "https://$1.wikipedia.org/w/"
15
+ fi
16
+ node parser.js "$1" "$2/$file" "$3" "$4" && node report.js "$1"