lint-wiki-dumps 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +674 -0
- package/README.md +59 -0
- package/package.json +71 -0
- package/parser.js +100 -0
- package/report.js +71 -0
- package/reports/article.html +47 -0
- package/reports/dist/article.js +30 -0
- package/reports/dist/index.js +15 -0
- package/reports/dist/rule.js +48 -0
- package/reports/dist/wiki.js +19 -0
- package/reports/index.html +18 -0
- package/reports/reports.css +79 -0
- package/reports/rule.html +55 -0
- package/reports/wiki.html +28 -0
- package/scan.sh +16 -0
package/README.md
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
[](https://www.npmjs.com/package/lint-wiki-dumps)
|
2
|
+
[](https://github.com/bhsd-harry/lint-wiki-dumps/actions/workflows/codeql.yml)
|
3
|
+
[](https://app.codacy.com/gh/bhsd-harry/lint-wiki-dumps/dashboard)
|
4
|
+
|
5
|
+
# Lint-Wiki-Dumps
|
6
|
+
|
7
|
+
This is a tool for linting Wikitext articles from Wikipedia with the help of [WikiLint](https://www.npmjs.com/package/wikilint). It will download the latest dump of a specified Wikipedia language edition and then lint the articles in the dump.
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
To run this tool, you need to have [curl](https://curl.se/) installed on your system. You can install this tool via npm:
|
12
|
+
|
13
|
+
```sh
|
14
|
+
npm i lint-wiki-dumps
|
15
|
+
```
|
16
|
+
|
17
|
+
You can also install its optional dependency [vscode-css-languageservice](https://npmjs.com/package/vscode-css-languageservice) to lint inline CSS in Wikitext:
|
18
|
+
|
19
|
+
```sh
|
20
|
+
npm i vscode-css-languageservice
|
21
|
+
```
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
```sh
|
26
|
+
npx lint-wiki-dumps <language> <path to download directory>
|
27
|
+
# For example:
|
28
|
+
npx lint-wiki-dumps zh-yue ~/Downloads/dumps
|
29
|
+
```
|
30
|
+
|
31
|
+
or execute the Bash script `scan.sh` directly:
|
32
|
+
|
33
|
+
```sh
|
34
|
+
bash scan.sh <language> <path to download>
|
35
|
+
# For example:
|
36
|
+
bash scan.sh zh-yue ~/Downloads/dumps
|
37
|
+
```
|
38
|
+
|
39
|
+
## Advanced Usage
|
40
|
+
|
41
|
+
If you have already downloaded the dump, you can scan the dump directly and generate JSON reports:
|
42
|
+
|
43
|
+
```sh
|
44
|
+
node parser.js <language> <path to dump>
|
45
|
+
# For example:
|
46
|
+
node parser.js zh-yue ~/Downloads/dumps/zh-yuewiki-lastest-pages-articles.xml.bz2
|
47
|
+
```
|
48
|
+
|
49
|
+
To generate HTML reports, you can use the following command:
|
50
|
+
|
51
|
+
```sh
|
52
|
+
node report.js <language>
|
53
|
+
# For example:
|
54
|
+
node report.js zh-yue
|
55
|
+
```
|
56
|
+
|
57
|
+
## Report
|
58
|
+
|
59
|
+
The tool will generate reports in two formats: JSON and HTML. The JSON report will be saved in the `results` folder, while the HTML report will be available at `reports/index.html`.
|
package/package.json
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
{
|
2
|
+
"name": "lint-wiki-dumps",
|
3
|
+
"version": "0.0.0",
|
4
|
+
"description": "Lint Wikipedia dumps",
|
5
|
+
"keywords": [
|
6
|
+
"lint",
|
7
|
+
"wikipedia"
|
8
|
+
],
|
9
|
+
"homepage": "https://github.com/bhsd-harry/lint-wiki-dumps#readme",
|
10
|
+
"bugs": {
|
11
|
+
"url": "https://github.com/bhsd-harry/lint-wiki-dumps/issues"
|
12
|
+
},
|
13
|
+
"license": "GPL-3.0",
|
14
|
+
"author": "Bhsd",
|
15
|
+
"files": [
|
16
|
+
"scan.sh",
|
17
|
+
"*.js",
|
18
|
+
"reports/*.html",
|
19
|
+
"reports/dist/",
|
20
|
+
"reports/*.css"
|
21
|
+
],
|
22
|
+
"bin": {
|
23
|
+
"lint-wiki-dumps": "scan.sh"
|
24
|
+
},
|
25
|
+
"repository": {
|
26
|
+
"type": "git",
|
27
|
+
"url": "git+https://github.com/bhsd-harry/lint-wiki-dumps.git"
|
28
|
+
},
|
29
|
+
"scripts": {
|
30
|
+
"prepublishOnly": "npm run build",
|
31
|
+
"build": "tsc && mv dist/* . && tsc --project reports/tsconfig.json",
|
32
|
+
"lint": "tsc --noEmit && tsc --project reports/tsconfig.json --noEmit && eslint --cache ."
|
33
|
+
},
|
34
|
+
"dependencies": {
|
35
|
+
"@bhsd/common": "^0.9.0",
|
36
|
+
"chalk": "^4.1.2",
|
37
|
+
"unbzip2-stream": "^1.4.3",
|
38
|
+
"wikilint": "^2.18.3",
|
39
|
+
"xml-stream": "^0.4.5"
|
40
|
+
},
|
41
|
+
"optionalDependencies": {
|
42
|
+
"vscode-css-languageservice": "^6.3.2"
|
43
|
+
},
|
44
|
+
"devDependencies": {
|
45
|
+
"@stylistic/eslint-plugin": "^3.1.0",
|
46
|
+
"@stylistic/stylelint-plugin": "^3.1.2",
|
47
|
+
"@types/mocha": "^10.0.10",
|
48
|
+
"@types/node": "^22.13.1",
|
49
|
+
"@types/unbzip2-stream": "^1.4.3",
|
50
|
+
"@typescript-eslint/eslint-plugin": "^8.23.0",
|
51
|
+
"@typescript-eslint/parser": "^8.23.0",
|
52
|
+
"esbuild": "^0.25.0",
|
53
|
+
"eslint": "^8.57.1",
|
54
|
+
"eslint-plugin-es-x": "^8.4.1",
|
55
|
+
"eslint-plugin-eslint-comments": "^3.2.0",
|
56
|
+
"eslint-plugin-jsdoc": "^50.6.3",
|
57
|
+
"eslint-plugin-json-es": "^1.6.0",
|
58
|
+
"eslint-plugin-markdown": "4.0.1",
|
59
|
+
"eslint-plugin-n": "^17.15.1",
|
60
|
+
"eslint-plugin-promise": "^7.2.1",
|
61
|
+
"eslint-plugin-regexp": "^2.7.0",
|
62
|
+
"eslint-plugin-unicorn": "^56.0.1",
|
63
|
+
"http-server": "^14.1.1",
|
64
|
+
"mocha": "^11.1.0",
|
65
|
+
"stylelint": "^16.14.1",
|
66
|
+
"typescript": "^5.7.3"
|
67
|
+
},
|
68
|
+
"engines": {
|
69
|
+
"node": ">=18.17.0"
|
70
|
+
}
|
71
|
+
}
|
package/parser.js
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
"use strict";
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
4
|
+
};
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
6
|
+
const fs_1 = __importDefault(require("fs"));
|
7
|
+
const path_1 = __importDefault(require("path"));
|
8
|
+
const os_1 = __importDefault(require("os"));
|
9
|
+
const perf_hooks_1 = require("perf_hooks");
|
10
|
+
const chalk_1 = __importDefault(require("chalk"));
|
11
|
+
const unbzip2_stream_1 = __importDefault(require("unbzip2-stream"));
|
12
|
+
const xml_stream_1 = __importDefault(require("xml-stream"));
|
13
|
+
const wikilint_1 = __importDefault(require("wikilint"));
|
14
|
+
const common_1 = require("@bhsd/common");
|
15
|
+
const n = Number(process.argv[4]) || Infinity, [, , site, file, , restart] = process.argv;
|
16
|
+
wikilint_1.default.config = `${site}wiki`;
|
17
|
+
if (!fs_1.default.existsSync('results')) {
|
18
|
+
fs_1.default.mkdirSync('results');
|
19
|
+
}
|
20
|
+
const stream = new xml_stream_1.default(fs_1.default.createReadStream(file.replace(/^~/u, os_1.default.homedir())).pipe((0, unbzip2_stream_1.default)())), output = path_1.default.join('results', `${site}.json`);
|
21
|
+
let old;
|
22
|
+
try {
|
23
|
+
old = require(`./${output}`); // eslint-disable-line @typescript-eslint/no-require-imports
|
24
|
+
}
|
25
|
+
catch { }
|
26
|
+
const time = old?.['#timestamp'], last = time && new Date(time), results = fs_1.default.createWriteStream(output, { flags: restart ? 'a' : 'w' }), ignore = new Set(['no-arg', 'url-encoding', 'h1', 'var-anchor']);
|
27
|
+
let i = 0, latest = last, failed = 0, comma = restart ? ',' : '', stopping = false, restarted = !restart, worst;
|
28
|
+
stream.preserve('text', true);
|
29
|
+
if (!restart) {
|
30
|
+
results.write('{');
|
31
|
+
}
|
32
|
+
results.on('close', () => {
|
33
|
+
process.exit(); // eslint-disable-line n/no-process-exit
|
34
|
+
});
|
35
|
+
const stop = () => {
|
36
|
+
stopping = true;
|
37
|
+
console.timeEnd('parse');
|
38
|
+
console.log(chalk_1.default.green(`Parsed ${i} pages`));
|
39
|
+
if (failed) {
|
40
|
+
console.error(chalk_1.default.red(`${failed} pages failed to parse`));
|
41
|
+
}
|
42
|
+
if (worst) {
|
43
|
+
console.info(chalk_1.default.yellow(`Worst page: ${worst.title} (${worst.duration.toFixed(3)} ms)`));
|
44
|
+
}
|
45
|
+
results.write(`${comma}\n"#timestamp": ${JSON.stringify(latest)}\n}`);
|
46
|
+
results.close();
|
47
|
+
};
|
48
|
+
const newEntry = (title, errors) => {
|
49
|
+
results.write(`${comma}\n${JSON.stringify(title)}: ${JSON.stringify(errors, null, '\t')}`);
|
50
|
+
comma ||= ',';
|
51
|
+
};
|
52
|
+
console.time('parse');
|
53
|
+
stream.on('endElement: page', ({ title, ns, revision: { model, timestamp, text: { $text } } }) => {
|
54
|
+
if (i === n) {
|
55
|
+
if (!stopping) {
|
56
|
+
stop();
|
57
|
+
}
|
58
|
+
}
|
59
|
+
else if (restarted && model === 'wikitext' && $text && ns === '0') {
|
60
|
+
(0, common_1.refreshStdout)(`${i++} ${title}`);
|
61
|
+
const date = new Date(timestamp);
|
62
|
+
if (last && date <= last) {
|
63
|
+
const previous = old[title];
|
64
|
+
if (previous) {
|
65
|
+
newEntry(title, previous);
|
66
|
+
}
|
67
|
+
}
|
68
|
+
else {
|
69
|
+
latest = !latest || date > latest ? date : latest;
|
70
|
+
try {
|
71
|
+
const start = perf_hooks_1.performance.now(), errors = wikilint_1.default.parse($text).lint()
|
72
|
+
.filter(({ severity, rule }) => severity === 'error' && !ignore.has(rule)), duration = perf_hooks_1.performance.now() - start;
|
73
|
+
if (errors.length > 0) {
|
74
|
+
newEntry(title, errors.map(({ severity, suggestions, fix, ...e }) => ({
|
75
|
+
...e,
|
76
|
+
...suggestions && {
|
77
|
+
suggestions: suggestions.map(action => ({
|
78
|
+
...action,
|
79
|
+
original: $text.slice(...action.range),
|
80
|
+
})),
|
81
|
+
},
|
82
|
+
...fix && { fix: { ...fix, original: $text.slice(...fix.range) } },
|
83
|
+
excerpt: $text.slice(e.startIndex, e.endIndex),
|
84
|
+
})));
|
85
|
+
}
|
86
|
+
if (!worst || duration > worst.duration) {
|
87
|
+
worst = { title, duration };
|
88
|
+
}
|
89
|
+
}
|
90
|
+
catch (e) {
|
91
|
+
console.error(chalk_1.default.red(`Error parsing ${title}`), e);
|
92
|
+
failed++;
|
93
|
+
}
|
94
|
+
}
|
95
|
+
}
|
96
|
+
else if (title === restart) {
|
97
|
+
restarted = true;
|
98
|
+
}
|
99
|
+
});
|
100
|
+
stream.on('end', stop);
|
package/report.js
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
"use strict";
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
4
|
+
};
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
6
|
+
const fs_1 = __importDefault(require("fs"));
|
7
|
+
const path_1 = __importDefault(require("path"));
|
8
|
+
const crypto_1 = require("crypto");
|
9
|
+
const chalk_1 = __importDefault(require("chalk"));
|
10
|
+
const [, , lang] = process.argv;
|
11
|
+
const mkdir = (dir) => {
|
12
|
+
if (!fs_1.default.existsSync(dir)) {
|
13
|
+
fs_1.default.mkdirSync(dir);
|
14
|
+
}
|
15
|
+
};
|
16
|
+
const dataDir = path_1.default.join('reports', 'data');
|
17
|
+
mkdir(dataDir);
|
18
|
+
const writeJS = (data, file) => {
|
19
|
+
fs_1.default.writeFileSync(path_1.default.join(dataDir, `${file}.js`), `window.data=${JSON.stringify(data)}`);
|
20
|
+
};
|
21
|
+
const dir = fs_1.default.readdirSync('results'), summary = [], MAX = 100;
|
22
|
+
for (const file of dir) {
|
23
|
+
try {
|
24
|
+
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
25
|
+
const data = require(`./results/${file}`);
|
26
|
+
const site = file.slice(0, -5);
|
27
|
+
summary.push(site);
|
28
|
+
delete data['#timestamp'];
|
29
|
+
if (!lang || lang === site) {
|
30
|
+
const siteDir = path_1.default.join(dataDir, site);
|
31
|
+
mkdir(siteDir);
|
32
|
+
// wiki
|
33
|
+
const values = Object.values(data), rules = [...new Set(values.flat().map(({ rule }) => rule))].sort((a, b) => a.localeCompare(b)), wiki = [];
|
34
|
+
for (const rule of rules) {
|
35
|
+
wiki.push([rule, values.filter(errors => errors.some(({ rule: r }) => r === rule)).length]);
|
36
|
+
// rule
|
37
|
+
const articles = Object.entries(data).filter(([, errors]) => errors.some(({ rule: r }) => r === rule))
|
38
|
+
.sort(([a], [b]) => a.localeCompare(b))
|
39
|
+
.map(([page, errors]) => {
|
40
|
+
const { startLine, startCol, message, excerpt } = errors.find(({ rule: r }) => r === rule);
|
41
|
+
return [page, startLine + 1, startCol + 1, message, excerpt.slice(0, MAX * 0.8)];
|
42
|
+
}), batches = Math.ceil(articles.length / 200);
|
43
|
+
for (let i = 0; i < batches; i++) {
|
44
|
+
writeJS({
|
45
|
+
articles: articles.slice(i * 200, (i + 1) * 200),
|
46
|
+
batches,
|
47
|
+
}, path_1.default.join(site, `${rule}-${i}`));
|
48
|
+
}
|
49
|
+
}
|
50
|
+
writeJS(wiki, path_1.default.join(site, 'index'));
|
51
|
+
// article
|
52
|
+
const articlesDir = path_1.default.join(siteDir, 'pages');
|
53
|
+
mkdir(articlesDir);
|
54
|
+
for (const [page, errors] of Object.entries(data)) {
|
55
|
+
const hash = (0, crypto_1.createHash)('sha256').update(page).digest('hex')
|
56
|
+
.slice(0, 8), info = errors.map(({ startLine, startCol, rule, message, excerpt, }) => [
|
57
|
+
rule,
|
58
|
+
startLine + 1,
|
59
|
+
startCol + 1,
|
60
|
+
message,
|
61
|
+
excerpt.slice(0, MAX),
|
62
|
+
]);
|
63
|
+
writeJS(info, path_1.default.join(site, 'pages', hash));
|
64
|
+
}
|
65
|
+
}
|
66
|
+
}
|
67
|
+
catch {
|
68
|
+
console.error(chalk_1.default.red(`Failed to read ${file}`));
|
69
|
+
}
|
70
|
+
}
|
71
|
+
writeJS(summary, 'index');
|
@@ -0,0 +1,47 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html dir="ltr" lang="en-US">
|
3
|
+
<head>
|
4
|
+
<title>Diagnostic Report for Wikipedia</title>
|
5
|
+
<meta charset="utf-8">
|
6
|
+
<meta name="viewport" content="initial-scale=1.0, user-scalable=yes, minimum-scale=0.25, maximum-scale=5.0, width=device-width">
|
7
|
+
<link rel="stylesheet" href="reports.css">
|
8
|
+
<style>td:nth-child(-n+3){text-align:center}</style>
|
9
|
+
<script defer="" src="dist/article.js"></script>
|
10
|
+
</head>
|
11
|
+
<body>
|
12
|
+
<main>
|
13
|
+
<article>
|
14
|
+
<div id="container">
|
15
|
+
<div>
|
16
|
+
<a href="./index.html">Homepage</a>
|
17
|
+
</div>
|
18
|
+
<div>
|
19
|
+
<a id="wiki" href="./wiki.html">Wikipedia</a>
|
20
|
+
</div>
|
21
|
+
</div>
|
22
|
+
<h2>
|
23
|
+
Diagnostic Report for
|
24
|
+
<a id="article">Article</a>
|
25
|
+
</h2>
|
26
|
+
<table style="table-layout:fixed;width:100%">
|
27
|
+
<colgroup>
|
28
|
+
<col span="1">
|
29
|
+
<col span="1" style="width:calc(1.6em + 7ch)">
|
30
|
+
<col span="1" style="width:calc(1.6em + 7ch)">
|
31
|
+
<col span="1" style="width:20%">
|
32
|
+
<col span="1" style="width:50%">
|
33
|
+
</colgroup>
|
34
|
+
<tbody>
|
35
|
+
<tr>
|
36
|
+
<th>Description</th>
|
37
|
+
<th>Line</th>
|
38
|
+
<th>Column</th>
|
39
|
+
<th>Detail</th>
|
40
|
+
<th>Notice</th>
|
41
|
+
</tr>
|
42
|
+
</tbody>
|
43
|
+
</table>
|
44
|
+
</article>
|
45
|
+
</main>
|
46
|
+
</body>
|
47
|
+
</html>
|
@@ -0,0 +1,30 @@
|
|
1
|
+
"use strict";
|
2
|
+
(async () => {
|
3
|
+
const search = new URLSearchParams(location.search), page = search.get('page'), lang = search.get('lang'), buffer = await crypto.subtle.digest('SHA-256', new TextEncoder().encode(page)), hash = [...new Uint8Array(buffer)].slice(0, 4)
|
4
|
+
.map(b => b.toString(16).padStart(2, '0'))
|
5
|
+
.join(''), title = document.querySelector('title'), h2 = document.getElementById('article'), wiki = document.getElementById('wiki'), tbody = document.querySelector('tbody'), script = document.createElement('script');
|
6
|
+
title.textContent = title.textContent.replace('Wikipedia', page);
|
7
|
+
h2.textContent = page;
|
8
|
+
h2.href = `https://${lang}.wikipedia.org/wiki/${encodeURIComponent(page)}?redirect=no`;
|
9
|
+
wiki.textContent = `${lang}wiki`;
|
10
|
+
wiki.href += `?lang=${lang}`;
|
11
|
+
script.src = `./data/${lang}/pages/${hash}.js`;
|
12
|
+
script.addEventListener('load', () => {
|
13
|
+
for (const entry of data) {
|
14
|
+
const [rule, startLine, startCol, message, excerpt] = entry, tr = document.createElement('tr'), description = document.createElement('td'), line = document.createElement('td'), column = document.createElement('td'), detail = document.createElement('td'), notice = document.createElement('td'), descriptionLink = document.createElement('a');
|
15
|
+
descriptionLink.textContent = rule;
|
16
|
+
descriptionLink.href = `./rule.html?lang=${lang}&rule=${rule}`;
|
17
|
+
description.className = 'excerpt';
|
18
|
+
description.append(descriptionLink);
|
19
|
+
line.textContent = String(startLine);
|
20
|
+
column.textContent = String(startCol);
|
21
|
+
detail.textContent = message;
|
22
|
+
detail.className = 'excerpt';
|
23
|
+
notice.textContent = excerpt;
|
24
|
+
notice.className = 'excerpt mono';
|
25
|
+
tr.append(description, line, column, detail, notice);
|
26
|
+
tbody.append(tr);
|
27
|
+
}
|
28
|
+
});
|
29
|
+
document.head.append(script);
|
30
|
+
})();
|
@@ -0,0 +1,15 @@
|
|
1
|
+
"use strict";
|
2
|
+
(() => {
|
3
|
+
const container = document.getElementById('container'), script = document.createElement('script');
|
4
|
+
script.src = './data/index.js';
|
5
|
+
script.addEventListener('load', () => {
|
6
|
+
container.append(...window.data.map(lang => {
|
7
|
+
const div = document.createElement('div'), a = document.createElement('a');
|
8
|
+
a.href = `./wiki.html?lang=${lang}`;
|
9
|
+
a.innerText = `${lang}.wikipedia.org`;
|
10
|
+
div.append(a);
|
11
|
+
return div;
|
12
|
+
}));
|
13
|
+
});
|
14
|
+
document.head.append(script);
|
15
|
+
})();
|
@@ -0,0 +1,48 @@
|
|
1
|
+
"use strict";
|
2
|
+
(() => {
|
3
|
+
const search = new URLSearchParams(location.search), lang = search.get('lang'), rule = search.get('rule'), batch = Math.floor(Number(search.get('start') || 0) / 200), endStr = String((batch + 1) * 200), prev = document.getElementById('prev'), next = document.getElementById('next'), start = document.getElementById('start'), end = document.getElementById('end'), title = document.querySelector('title'), h2 = document.querySelector('h2'), wiki = document.getElementById('wiki'), tbody = document.querySelector('tbody'), script = document.createElement('script');
|
4
|
+
h2.textContent = h2.textContent.replace('Wikipedia', `${lang}.wikipedia.org: ${rule}`);
|
5
|
+
title.textContent = title.textContent.replace('Wikipedia', `${lang}.wikipedia.org`);
|
6
|
+
wiki.textContent = `${lang}wiki`;
|
7
|
+
wiki.href += `?lang=${lang}`;
|
8
|
+
if (batch === 0) {
|
9
|
+
prev.removeAttribute('href');
|
10
|
+
}
|
11
|
+
else {
|
12
|
+
start.textContent = String(batch * 200 + 1);
|
13
|
+
end.textContent = endStr;
|
14
|
+
search.set('start', String((batch - 1) * 200));
|
15
|
+
prev.href = `${location.pathname}?${search}`;
|
16
|
+
}
|
17
|
+
search.set('start', endStr);
|
18
|
+
next.href = `${location.pathname}?${search}`;
|
19
|
+
script.src = `./data/${lang}/${rule}-${batch}.js`;
|
20
|
+
script.addEventListener('load', () => {
|
21
|
+
if (data.batches === batch + 1) {
|
22
|
+
next.removeAttribute('href');
|
23
|
+
end.textContent = String(batch * 200 + data.articles.length);
|
24
|
+
}
|
25
|
+
for (const [page, startLine, startCol, message, excerpt] of data.articles) {
|
26
|
+
const tr = document.createElement('tr'), article = document.createElement('td'), edit = document.createElement('td'), line = document.createElement('td'), column = document.createElement('td'), detail = document.createElement('td'), notice = document.createElement('td'), more = document.createElement('td'), articleLink = document.createElement('a'), editLink = document.createElement('a'), moreLink = document.createElement('a');
|
27
|
+
articleLink.textContent = page;
|
28
|
+
articleLink.href = `https://${lang}.wikipedia.org/wiki/${encodeURIComponent(page)}?redirect=no`;
|
29
|
+
article.className = 'excerpt';
|
30
|
+
article.append(articleLink);
|
31
|
+
editLink.textContent = 'edit';
|
32
|
+
editLink.href = `https://${lang}.wikipedia.org/wiki/${encodeURIComponent(page)}?action=edit`;
|
33
|
+
edit.append(editLink);
|
34
|
+
line.textContent = String(startLine);
|
35
|
+
column.textContent = String(startCol);
|
36
|
+
detail.textContent = message;
|
37
|
+
detail.className = 'excerpt';
|
38
|
+
notice.textContent = excerpt;
|
39
|
+
notice.className = 'excerpt mono';
|
40
|
+
moreLink.textContent = 'more';
|
41
|
+
moreLink.href = `./article.html?lang=${lang}&page=${encodeURIComponent(page)}`;
|
42
|
+
more.append(moreLink);
|
43
|
+
tr.append(article, edit, line, column, detail, notice, more);
|
44
|
+
tbody.append(tr);
|
45
|
+
}
|
46
|
+
});
|
47
|
+
document.head.append(script);
|
48
|
+
})();
|
@@ -0,0 +1,19 @@
|
|
1
|
+
"use strict";
|
2
|
+
(() => {
|
3
|
+
const lang = new URLSearchParams(location.search).get('lang'), script = document.createElement('script'), title = document.querySelector('title'), h2 = document.querySelector('h2'), tbody = document.querySelector('tbody');
|
4
|
+
h2.textContent = h2.textContent.replace('Wikipedia', `${lang}.wikipedia.org`);
|
5
|
+
title.textContent = title.textContent.replace('Wikipedia', `${lang}.wikipedia.org`);
|
6
|
+
script.src = `./data/${lang}/index.js`;
|
7
|
+
script.addEventListener('load', () => {
|
8
|
+
for (const [rule, count] of data) {
|
9
|
+
const tr = document.createElement('tr'), description = document.createElement('td'), pages = document.createElement('td'), a = document.createElement('a');
|
10
|
+
a.textContent = rule;
|
11
|
+
a.href = `./rule.html?lang=${lang}&rule=${rule}`;
|
12
|
+
description.append(a);
|
13
|
+
pages.textContent = String(count);
|
14
|
+
tr.append(description, pages);
|
15
|
+
tbody.append(tr);
|
16
|
+
}
|
17
|
+
});
|
18
|
+
document.head.append(script);
|
19
|
+
})();
|
@@ -0,0 +1,18 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html dir="ltr" lang="en-US">
|
3
|
+
<head>
|
4
|
+
<title>Diagnostic Report for Wikipedia Dumps</title>
|
5
|
+
<meta charset="utf-8">
|
6
|
+
<meta name="viewport" content="initial-scale=1.0, user-scalable=yes, minimum-scale=0.25, maximum-scale=5.0, width=device-width">
|
7
|
+
<link rel="stylesheet" href="reports.css">
|
8
|
+
<script defer="" src="dist/index.js"></script>
|
9
|
+
</head>
|
10
|
+
<body>
|
11
|
+
<main>
|
12
|
+
<article>
|
13
|
+
<h2>Please choose the language edition.</h2>
|
14
|
+
<div id="container"></div>
|
15
|
+
</article>
|
16
|
+
</main>
|
17
|
+
</body>
|
18
|
+
</html>
|
@@ -0,0 +1,79 @@
|
|
1
|
+
html, body, div, article, main {
|
2
|
+
margin: 0;
|
3
|
+
padding: 0;
|
4
|
+
border: 0;
|
5
|
+
font: inherit;
|
6
|
+
vertical-align: baseline;
|
7
|
+
}
|
8
|
+
:root {
|
9
|
+
font-size: 14px;
|
10
|
+
font-family: sans-serif;
|
11
|
+
}
|
12
|
+
body {
|
13
|
+
display: flex;
|
14
|
+
flex-direction: column;
|
15
|
+
align-items: center;
|
16
|
+
background-color: #f0f0f0;
|
17
|
+
line-height: 1.6;
|
18
|
+
}
|
19
|
+
main {
|
20
|
+
display: flex;
|
21
|
+
width: 90vw;
|
22
|
+
max-width: 1200px;
|
23
|
+
margin: 3rem 0;
|
24
|
+
background-color: #fff;
|
25
|
+
box-shadow: 0 0 .5rem rgba(0, 0, 0, .1);
|
26
|
+
border-radius: .5rem;
|
27
|
+
}
|
28
|
+
article {
|
29
|
+
display: block;
|
30
|
+
width: 100%;
|
31
|
+
padding: 1rem 2rem 2rem;
|
32
|
+
}
|
33
|
+
h2 {
|
34
|
+
font-size: 1.3em;
|
35
|
+
margin: .5rem 0;
|
36
|
+
font-weight: bold;
|
37
|
+
text-align: center;
|
38
|
+
}
|
39
|
+
#container, #nav {
|
40
|
+
font-size: 1.3em;
|
41
|
+
text-align: center;
|
42
|
+
}
|
43
|
+
#container > div {
|
44
|
+
margin: .1em 0;
|
45
|
+
}
|
46
|
+
table {
|
47
|
+
margin: 1em auto;
|
48
|
+
background-color: #f8f9fa;
|
49
|
+
color: #202122;
|
50
|
+
border: 1px solid #a2a9b1;
|
51
|
+
border-collapse: collapse;
|
52
|
+
}
|
53
|
+
th, td {
|
54
|
+
border: 1px solid #a2a9b1;
|
55
|
+
padding: .4em .8em;
|
56
|
+
}
|
57
|
+
th {
|
58
|
+
background-color: #eaecf0;
|
59
|
+
text-align: center;
|
60
|
+
}
|
61
|
+
.excerpt {
|
62
|
+
overflow: hidden;
|
63
|
+
white-space: nowrap;
|
64
|
+
text-overflow: ellipsis;
|
65
|
+
}
|
66
|
+
.mono {
|
67
|
+
font-family: monospace;
|
68
|
+
}
|
69
|
+
|
70
|
+
@media screen and (max-width: 720px) {
|
71
|
+
main {
|
72
|
+
width: 100%;
|
73
|
+
margin: 0;
|
74
|
+
border-radius: 0;
|
75
|
+
}
|
76
|
+
article {
|
77
|
+
padding: 1rem;
|
78
|
+
}
|
79
|
+
}
|
@@ -0,0 +1,55 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html dir="ltr" lang="en-US">
|
3
|
+
<head>
|
4
|
+
<title>Diagnostic Report for Wikipedia</title>
|
5
|
+
<meta charset="utf-8">
|
6
|
+
<meta name="viewport" content="initial-scale=1.0, user-scalable=yes, minimum-scale=0.25, maximum-scale=5.0, width=device-width">
|
7
|
+
<link rel="stylesheet" href="reports.css">
|
8
|
+
<style>td:nth-child(-n+4):not(:first-child),td:last-child{text-align:center}</style>
|
9
|
+
<script defer="" src="dist/rule.js"></script>
|
10
|
+
</head>
|
11
|
+
<body>
|
12
|
+
<main>
|
13
|
+
<article>
|
14
|
+
<div id="container">
|
15
|
+
<div>
|
16
|
+
<a href="./index.html">Homepage</a>
|
17
|
+
</div>
|
18
|
+
<div>
|
19
|
+
<a id="wiki" href="./wiki.html">Wikipedia</a>
|
20
|
+
</div>
|
21
|
+
</div>
|
22
|
+
<h2>Diagnostic Report for Wikipedia</h2>
|
23
|
+
<div id="nav">
|
24
|
+
<a href="#" id="prev" title="Previous">←</a>
|
25
|
+
<span id="start">0</span>
|
26
|
+
~
|
27
|
+
<span id="end">200</span>
|
28
|
+
<a href="#" id="next" title="Next">→</a>
|
29
|
+
</div>
|
30
|
+
<table style="table-layout:fixed;width:100%">
|
31
|
+
<colgroup>
|
32
|
+
<col span="1">
|
33
|
+
<col span="1" style="width:calc(1.6em + 5ch)">
|
34
|
+
<col span="1" style="width:calc(1.6em + 7ch)">
|
35
|
+
<col span="1" style="width:calc(1.6em + 7ch)">
|
36
|
+
<col span="1" style="width:20%">
|
37
|
+
<col span="1" style="width:40%">
|
38
|
+
<col span="1" style="width:calc(1.6em + 5ch)">
|
39
|
+
</colgroup>
|
40
|
+
<tbody>
|
41
|
+
<tr>
|
42
|
+
<th>Article</th>
|
43
|
+
<th>Edit</th>
|
44
|
+
<th>Line</th>
|
45
|
+
<th>Column</th>
|
46
|
+
<th>Detail</th>
|
47
|
+
<th>Notice</th>
|
48
|
+
<th>More</th>
|
49
|
+
</tr>
|
50
|
+
</tbody>
|
51
|
+
</table>
|
52
|
+
</article>
|
53
|
+
</main>
|
54
|
+
</body>
|
55
|
+
</html>
|
@@ -0,0 +1,28 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html dir="ltr" lang="en-US">
|
3
|
+
<head>
|
4
|
+
<title>Diagnostic Report for Wikipedia</title>
|
5
|
+
<meta charset="utf-8">
|
6
|
+
<meta name="viewport" content="initial-scale=1.0, user-scalable=yes, minimum-scale=0.25, maximum-scale=5.0, width=device-width">
|
7
|
+
<link rel="stylesheet" href="reports.css">
|
8
|
+
<script defer="" src="dist/wiki.js"></script>
|
9
|
+
</head>
|
10
|
+
<body>
|
11
|
+
<main>
|
12
|
+
<article>
|
13
|
+
<div id="container">
|
14
|
+
<a href="./index.html">Homepage</a>
|
15
|
+
</div>
|
16
|
+
<h2>Diagnostic Report for Wikipedia</h2>
|
17
|
+
<table style="text-align:center">
|
18
|
+
<tbody>
|
19
|
+
<tr>
|
20
|
+
<th>Description</th>
|
21
|
+
<th>Pages</th>
|
22
|
+
</tr>
|
23
|
+
</tbody>
|
24
|
+
</table>
|
25
|
+
</article>
|
26
|
+
</main>
|
27
|
+
</body>
|
28
|
+
</html>
|
package/scan.sh
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/local/bin/bash
|
2
|
+
if (( $# < 2 ))
|
3
|
+
then
|
4
|
+
echo 'Usage: npx lint-wiki-dumps <language> <path to download>'
|
5
|
+
echo 'Example: npx lint-wiki-dumps zh-yue ~/Downloads/dumps'
|
6
|
+
exit 1
|
7
|
+
fi
|
8
|
+
site="${1}wiki" # example: zh-yuewiki
|
9
|
+
target="${1//-/_}wiki" # example: zh_yuewiki
|
10
|
+
file="${target}-latest-pages-articles.xml.bz2"
|
11
|
+
if (( $# < 3 ))
|
12
|
+
then
|
13
|
+
curl --output-dir "$2" -O "https://dumps.wikimedia.org/$target/latest/$file"
|
14
|
+
npx getParserConfig "$site" "https://$1.wikipedia.org/w/"
|
15
|
+
fi
|
16
|
+
node parser.js "$1" "$2/$file" "$3" "$4" && node report.js "$1"
|