lint-wiki-dumps 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/download.sh +16 -0
- package/filter.js +12 -0
- package/package.json +12 -10
- package/parser-parallel.js +54 -19
- package/parser.js +15 -52
- package/processor.js +38 -37
- package/report.js +6 -5
- package/reports/article.html +2 -1
- package/reports/dist/article.js +2 -30
- package/reports/dist/article.js.map +7 -0
- package/reports/dist/index.js +2 -15
- package/reports/dist/index.js.map +7 -0
- package/reports/dist/rule.js +2 -49
- package/reports/dist/rule.js.map +7 -0
- package/reports/dist/wiki.js +2 -19
- package/reports/dist/wiki.js.map +7 -0
- package/reports/index.html +1 -0
- package/reports/reports.css +2 -0
- package/reports/rule.html +2 -1
- package/reports/wiki.html +1 -0
- package/scan.sh +12 -6
- package/server.js +38 -0
- package/util.js +69 -0
- package/scan-parallel.sh +0 -23
package/README.md
CHANGED
@@ -28,7 +28,7 @@ npx lint-wiki-dumps <language> <path to download> [path to HTML output]
|
|
28
28
|
npx lint-wiki-dumps zh-yue ~/Downloads/dumps
|
29
29
|
```
|
30
30
|
|
31
|
-
or execute the Bash script `scan.sh`
|
31
|
+
or execute the Bash script `scan.sh` directly:
|
32
32
|
|
33
33
|
```sh
|
34
34
|
bash scan.sh <language> <path to download> [path to HTML output]
|
package/download.sh
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/local/bin/bash
|
2
|
+
path="https://dumps.wikimedia.org/$1/latest/"
|
3
|
+
files=$( \
|
4
|
+
curl -s "$path" \
|
5
|
+
| grep -o "href=\"$1-latest-pages-articles[0-9].*\.bz2\">" \
|
6
|
+
| gsed "s|href=\"|$path|;s|\">||" \
|
7
|
+
)
|
8
|
+
filtered=$(node filter.js $files)
|
9
|
+
if (( ${#filtered} < 2 ))
|
10
|
+
then
|
11
|
+
file="$path/$1-latest-pages-articles.xml.bz2"
|
12
|
+
curl --output-dir "$2" -O "$file"
|
13
|
+
exit 1
|
14
|
+
else
|
15
|
+
curl --output-dir "$2" --remote-name-all $filtered
|
16
|
+
fi
|
package/filter.js
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
"use strict";
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
3
|
+
const files = process.argv.slice(2)
|
4
|
+
.map(file => [
|
5
|
+
file,
|
6
|
+
.../\.xml-p(\d+)p(\d+)\.bz2$/u.exec(file).slice(1).map(Number),
|
7
|
+
])
|
8
|
+
.sort(([, a1, a2], [, b1, b2]) => a1 - b1 || a2 - b2)
|
9
|
+
.filter(([, a], i, arr) => a !== arr[i + 1]?.[1])
|
10
|
+
.map(([file]) => file)
|
11
|
+
.join(' ');
|
12
|
+
console.log(files);
|
package/package.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
{
|
2
2
|
"name": "lint-wiki-dumps",
|
3
|
-
"version": "0.
|
3
|
+
"version": "0.2.0",
|
4
4
|
"description": "Lint Wikipedia dumps",
|
5
5
|
"keywords": [
|
6
6
|
"lint",
|
@@ -13,29 +13,31 @@
|
|
13
13
|
"license": "GPL-3.0",
|
14
14
|
"author": "Bhsd",
|
15
15
|
"files": [
|
16
|
-
"
|
16
|
+
"*.sh",
|
17
|
+
"!bump.sh",
|
17
18
|
"*.js",
|
18
19
|
"reports/*.html",
|
19
20
|
"reports/dist/",
|
20
21
|
"reports/*.css"
|
21
22
|
],
|
22
23
|
"bin": {
|
23
|
-
"lint-wiki-dumps": "scan
|
24
|
+
"lint-wiki-dumps": "scan.sh"
|
24
25
|
},
|
25
26
|
"repository": {
|
26
27
|
"type": "git",
|
27
28
|
"url": "git+https://github.com/bhsd-harry/lint-wiki-dumps.git"
|
28
29
|
},
|
29
30
|
"scripts": {
|
31
|
+
"start": "node server.js",
|
30
32
|
"prepublishOnly": "npm run build",
|
31
|
-
"build": "tsc && mv dist/* . &&
|
33
|
+
"build": "tsc && mv dist/* . && esbuild reports/src/*.ts --charset=utf8 --bundle --minify --target=es2019 --sourcemap --outdir=reports/dist && rm reports/dist/common.js*",
|
32
34
|
"lint": "tsc --noEmit && tsc --project reports/tsconfig.json --noEmit && eslint --cache ."
|
33
35
|
},
|
34
36
|
"dependencies": {
|
35
|
-
"@bhsd/common": "^0.9.
|
37
|
+
"@bhsd/common": "^0.9.3",
|
36
38
|
"chalk": "^4.1.2",
|
37
39
|
"unbzip2-stream": "^1.4.3",
|
38
|
-
"wikilint": "^2.
|
40
|
+
"wikilint": "^2.20.0",
|
39
41
|
"xml-stream": "^0.4.5"
|
40
42
|
},
|
41
43
|
"optionalDependencies": {
|
@@ -47,9 +49,9 @@
|
|
47
49
|
"@types/mocha": "^10.0.10",
|
48
50
|
"@types/node": "^22.13.1",
|
49
51
|
"@types/unbzip2-stream": "^1.4.3",
|
50
|
-
"@typescript-eslint/eslint-plugin": "^8.
|
51
|
-
"@typescript-eslint/parser": "^8.
|
52
|
-
"esbuild": "^0.25.
|
52
|
+
"@typescript-eslint/eslint-plugin": "^8.29.0",
|
53
|
+
"@typescript-eslint/parser": "^8.29.0",
|
54
|
+
"esbuild": "^0.25.2",
|
53
55
|
"eslint": "^8.57.1",
|
54
56
|
"eslint-plugin-es-x": "^8.4.1",
|
55
57
|
"eslint-plugin-eslint-comments": "^3.2.0",
|
@@ -63,7 +65,7 @@
|
|
63
65
|
"http-server": "^14.1.1",
|
64
66
|
"mocha": "^11.1.0",
|
65
67
|
"stylelint": "^16.14.1",
|
66
|
-
"typescript": "^5.
|
68
|
+
"typescript": "^5.8.2"
|
67
69
|
},
|
68
70
|
"engines": {
|
69
71
|
"node": ">=18.17.0"
|
package/parser-parallel.js
CHANGED
@@ -9,11 +9,21 @@ const path_1 = __importDefault(require("path"));
|
|
9
9
|
const os_1 = __importDefault(require("os"));
|
10
10
|
const chalk_1 = __importDefault(require("chalk"));
|
11
11
|
const common_1 = require("@bhsd/common");
|
12
|
+
const util_1 = require("./util");
|
12
13
|
const processor_1 = require("./processor");
|
13
|
-
const [, , site, dir] = process.argv, target =
|
14
|
+
const [, , site, dir, refresh] = process.argv, target = (0, util_1.normalize)(site);
|
14
15
|
if (cluster_1.default.isPrimary) {
|
15
|
-
(0,
|
16
|
-
const
|
16
|
+
(0, util_1.init)();
|
17
|
+
const tempFiles = [];
|
18
|
+
for (const file of fs_1.default.readdirSync(util_1.resultDir)) {
|
19
|
+
if (file.startsWith(`${target}-p`) && file.endsWith('.json')) {
|
20
|
+
const oldName = path_1.default.join(util_1.resultDir, file), newName = path_1.default.join(util_1.resultDir, `temp${file.slice(target.length)}`);
|
21
|
+
(0, util_1.reading)(oldName);
|
22
|
+
tempFiles.push(newName);
|
23
|
+
fs_1.default.renameSync(oldName, newName);
|
24
|
+
}
|
25
|
+
}
|
26
|
+
const dumpDir = (0, util_1.replaceTilde)(dir), prefix = `${target}wiki`, files = fs_1.default.readdirSync(dumpDir).filter(file => file.endsWith('.bz2') && file.startsWith(prefix))
|
17
27
|
.map(file => {
|
18
28
|
const filePath = path_1.default.join(dumpDir, file);
|
19
29
|
return [filePath, fs_1.default.statSync(filePath).size];
|
@@ -22,40 +32,51 @@ if (cluster_1.default.isPrimary) {
|
|
22
32
|
// eslint-disable-next-line n/no-unsupported-features/node-builtins
|
23
33
|
workers = new Array(Math.min(os_1.default.availableParallelism(), files.length)).fill(undefined)
|
24
34
|
.map(() => cluster_1.default.fork());
|
25
|
-
let i = 0, n = 0;
|
35
|
+
let i = 0, n = 0, m = 0;
|
26
36
|
console.time('parse');
|
27
37
|
for (; i < workers.length; i++) {
|
28
38
|
const worker = workers[i];
|
29
|
-
|
39
|
+
// eslint-disable-next-line @typescript-eslint/no-loop-func
|
40
|
+
worker.on('message', ([count, total]) => {
|
30
41
|
n += count;
|
42
|
+
m += total;
|
31
43
|
if (i < files.length) {
|
32
|
-
worker.send(
|
44
|
+
worker.send(files[i][0]);
|
33
45
|
i++;
|
34
46
|
}
|
35
47
|
else {
|
36
48
|
worker.disconnect();
|
37
49
|
}
|
38
|
-
}).send(
|
50
|
+
}).send(files[i][0]);
|
39
51
|
}
|
40
52
|
process.on('exit', () => {
|
41
53
|
console.timeEnd('parse');
|
42
|
-
console.log(chalk_1.default.green(`Parsed ${n} pages in total`));
|
54
|
+
console.log(chalk_1.default.green(`Parsed ${n} / ${m} pages in total`));
|
55
|
+
for (const file of tempFiles) {
|
56
|
+
fs_1.default.unlinkSync(file);
|
57
|
+
}
|
43
58
|
});
|
44
59
|
}
|
45
60
|
else {
|
46
|
-
|
47
|
-
const
|
61
|
+
const getStartEnd = (f) => {
|
62
|
+
const p2 = f.lastIndexOf('p');
|
63
|
+
return [Number(f.slice(6, p2)), Number(f.slice(p2 + 1, -5))];
|
64
|
+
};
|
65
|
+
const tempFiles = fs_1.default.readdirSync(util_1.resultDir)
|
66
|
+
.filter(file => file.startsWith('temp-p') && file.endsWith('.json')), ranges = tempFiles.map(getStartEnd), max = Math.max(...ranges.map(([, end]) => end));
|
67
|
+
let start, end, last, data;
|
68
|
+
process.on('message', (file) => {
|
69
|
+
const filename = `${target}${file.slice(file.lastIndexOf('-'), -4)}.json`, tempPath = (0, util_1.getTempPath)(filename), results = (0, util_1.getWriteStream)(tempPath, () => {
|
70
|
+
fs_1.default.renameSync(tempPath, path_1.default.join(util_1.resultDir, filename));
|
71
|
+
process.send([processor.parsed, i]);
|
72
|
+
}), processor = new processor_1.Processor(site, results, refresh);
|
48
73
|
let i = 0;
|
49
|
-
results.write('{');
|
50
|
-
results.on('close', () => {
|
51
|
-
process.send(i);
|
52
|
-
});
|
53
74
|
const stop = () => {
|
54
|
-
processor.stop(`parse ${file}`,
|
75
|
+
processor.stop(`parse ${file}`, `${i} pages from ${file}`);
|
55
76
|
};
|
56
77
|
const lint = ($text, ns, title, date, retry = 0) => {
|
57
78
|
try {
|
58
|
-
processor.lint($text, ns, title, date);
|
79
|
+
processor.lint($text, ns, title, date, last, data);
|
59
80
|
return true;
|
60
81
|
}
|
61
82
|
catch (e) {
|
@@ -78,10 +99,24 @@ else {
|
|
78
99
|
}
|
79
100
|
};
|
80
101
|
console.time(`parse ${file}`);
|
81
|
-
const stream = (0,
|
82
|
-
stream.on('endElement: page', ({ title, ns, revision: { model, timestamp, text: { $text } } }) => {
|
83
|
-
if (
|
102
|
+
const stream = (0, util_1.getXmlStream)(file);
|
103
|
+
stream.on('endElement: page', ({ title, ns, id, revision: { model, timestamp, text: { $text } } }) => {
|
104
|
+
if ((0, util_1.isArticle)($text, ns, model)) {
|
84
105
|
(0, common_1.refreshStdout)(`${i++} ${title}`);
|
106
|
+
const pageid = Number(id);
|
107
|
+
if (start === undefined || end === undefined || pageid < start || pageid > end) {
|
108
|
+
const cur = pageid <= max && ranges.findIndex(([a, b]) => a <= pageid && b >= pageid);
|
109
|
+
if (cur === false || cur === -1) {
|
110
|
+
start = undefined;
|
111
|
+
end = undefined;
|
112
|
+
last = undefined;
|
113
|
+
}
|
114
|
+
else {
|
115
|
+
[start, end] = ranges[cur];
|
116
|
+
data = fs_1.default.readFileSync(path_1.default.join(util_1.resultDir, tempFiles[cur]), 'utf8');
|
117
|
+
last = (0, util_1.getTimestamp)(data);
|
118
|
+
}
|
119
|
+
}
|
85
120
|
lint($text, ns, title, new Date(timestamp));
|
86
121
|
}
|
87
122
|
});
|
package/parser.js
CHANGED
@@ -5,64 +5,27 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
6
6
|
const fs_1 = __importDefault(require("fs"));
|
7
7
|
const path_1 = __importDefault(require("path"));
|
8
|
-
const os_1 = __importDefault(require("os"));
|
9
8
|
const common_1 = require("@bhsd/common");
|
9
|
+
const util_1 = require("./util");
|
10
10
|
const processor_1 = require("./processor");
|
11
|
-
const
|
12
|
-
|
13
|
-
|
14
|
-
return undefined;
|
15
|
-
}
|
16
|
-
const i = data.indexOf('"#timestamp": "') + 15;
|
17
|
-
return data.slice(i, data.indexOf('"', i));
|
18
|
-
};
|
19
|
-
const getErrors = (page) => {
|
20
|
-
if (!data) {
|
21
|
-
return undefined;
|
22
|
-
}
|
23
|
-
const str = JSON.stringify(page), i = data.indexOf(`${str}: [`);
|
24
|
-
if (i === -1) {
|
25
|
-
return undefined;
|
26
|
-
}
|
27
|
-
const j = i + str.length + 2;
|
28
|
-
return JSON.parse(data.slice(j, data.indexOf('\n]', j) + 2));
|
29
|
-
};
|
30
|
-
(0, processor_1.init)();
|
31
|
-
const time = getTimestamp(), last = time && new Date(time), results = fs_1.default.createWriteStream(path_1.default.join(processor_1.resultDir, `${site}.json`), { flags: restart ? 'a' : 'w' }), processor = new processor_1.Processor(site, results, last);
|
32
|
-
let i = 0, stopping = false, restarted = !restart;
|
33
|
-
if (!restart) {
|
34
|
-
results.write('{');
|
11
|
+
const [, , site, file, refresh] = process.argv, filename = `${(0, util_1.normalize)(site)}.json`, filePath = path_1.default.join(util_1.resultDir, filename), tempPath = (0, util_1.getTempPath)(filename), data = fs_1.default.existsSync(filePath) && fs_1.default.readFileSync(filePath, 'utf8');
|
12
|
+
if (data) {
|
13
|
+
(0, util_1.reading)(filePath);
|
35
14
|
}
|
36
|
-
|
15
|
+
(0, util_1.init)();
|
16
|
+
const last = (0, util_1.getTimestamp)(data), results = (0, util_1.getWriteStream)(tempPath, () => {
|
17
|
+
fs_1.default.renameSync(tempPath, filePath);
|
37
18
|
process.exit(); // eslint-disable-line n/no-process-exit
|
38
|
-
});
|
39
|
-
|
40
|
-
stopping = true;
|
41
|
-
processor.stop('parse', `Parsed ${i} pages`);
|
42
|
-
};
|
19
|
+
}), processor = new processor_1.Processor(site, results, refresh, last);
|
20
|
+
let i = 0;
|
43
21
|
console.time('parse');
|
44
|
-
const stream = (0,
|
22
|
+
const stream = (0, util_1.getXmlStream)((0, util_1.replaceTilde)(file));
|
45
23
|
stream.on('endElement: page', ({ title, ns, revision: { model, timestamp, text: { $text } } }) => {
|
46
|
-
if (
|
47
|
-
if (!stopping) {
|
48
|
-
stop();
|
49
|
-
}
|
50
|
-
}
|
51
|
-
else if (restarted && model === 'wikitext' && $text && ns === '0') {
|
24
|
+
if ((0, util_1.isArticle)($text, ns, model)) {
|
52
25
|
(0, common_1.refreshStdout)(`${i++} ${title}`);
|
53
|
-
|
54
|
-
if (last && date <= last) {
|
55
|
-
const previous = getErrors(title);
|
56
|
-
if (previous) {
|
57
|
-
processor.newEntry(title, previous);
|
58
|
-
}
|
59
|
-
}
|
60
|
-
else {
|
61
|
-
processor.lint($text, ns, title, date);
|
62
|
-
}
|
63
|
-
}
|
64
|
-
else if (title === restart) {
|
65
|
-
restarted = true;
|
26
|
+
processor.lint($text, ns, title, new Date(timestamp), last, data);
|
66
27
|
}
|
67
28
|
});
|
68
|
-
stream.on('end',
|
29
|
+
stream.on('end', () => {
|
30
|
+
processor.stop('parse', `${i} pages`);
|
31
|
+
});
|
package/processor.js
CHANGED
@@ -3,56 +3,38 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
4
4
|
};
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
6
|
-
exports.Processor =
|
7
|
-
const fs_1 = __importDefault(require("fs"));
|
8
|
-
const path_1 = __importDefault(require("path"));
|
9
|
-
const perf_hooks_1 = require("perf_hooks");
|
6
|
+
exports.Processor = void 0;
|
10
7
|
const cluster_1 = __importDefault(require("cluster"));
|
11
8
|
const chalk_1 = __importDefault(require("chalk"));
|
12
|
-
const unbzip2_stream_1 = __importDefault(require("unbzip2-stream"));
|
13
|
-
const xml_stream_1 = __importDefault(require("xml-stream"));
|
14
9
|
const wikilint_1 = __importDefault(require("wikilint"));
|
15
|
-
|
10
|
+
const util_1 = require("./util");
|
16
11
|
const ignore = new Set(['no-arg', 'url-encoding', 'h1', 'var-anchor']);
|
17
|
-
const init = () => {
|
18
|
-
if (!fs_1.default.existsSync(exports.resultDir)) {
|
19
|
-
fs_1.default.mkdirSync(exports.resultDir);
|
20
|
-
}
|
21
|
-
};
|
22
|
-
exports.init = init;
|
23
|
-
const getXmlStream = (file) => {
|
24
|
-
const stream = new xml_stream_1.default(fs_1.default.createReadStream(file).pipe((0, unbzip2_stream_1.default)()));
|
25
|
-
stream.preserve('text', true);
|
26
|
-
return stream;
|
27
|
-
};
|
28
|
-
exports.getXmlStream = getXmlStream;
|
29
12
|
class Processor {
|
13
|
+
parsed = 0;
|
30
14
|
#failed = 0;
|
31
15
|
#comma = '';
|
32
|
-
#worst;
|
33
16
|
#results;
|
17
|
+
#refresh;
|
34
18
|
#latest;
|
35
19
|
/** @param site site nickname */
|
36
|
-
constructor(site, results, latest) {
|
20
|
+
constructor(site, results, refresh, latest) {
|
37
21
|
wikilint_1.default.config = `${site}wiki`;
|
38
22
|
this.#results = results;
|
23
|
+
this.#refresh = Boolean(refresh);
|
39
24
|
this.#latest = latest;
|
40
25
|
}
|
41
26
|
/**
|
42
27
|
* Stop the processing and log the results.
|
43
28
|
* @param timer timer name
|
44
|
-
* @param msg message to log
|
29
|
+
* @param msg additional message to log
|
45
30
|
*/
|
46
|
-
stop(timer, msg) {
|
31
|
+
stop(timer, msg = '') {
|
47
32
|
console.log();
|
48
33
|
console.timeEnd(timer);
|
49
|
-
console.log(chalk_1.default.green(msg));
|
34
|
+
console.log(chalk_1.default.green(`Parsed ${this.parsed} / ${msg}`));
|
50
35
|
if (this.#failed) {
|
51
36
|
console.error(chalk_1.default.red(`${this.#failed} pages failed to parse`));
|
52
37
|
}
|
53
|
-
if (this.#worst) {
|
54
|
-
console.info(chalk_1.default.yellow(`Worst page: ${this.#worst.title} (${this.#worst.duration.toFixed(3)} ms)`));
|
55
|
-
}
|
56
38
|
this.#results.write(`${this.#comma}\n"#timestamp": ${JSON.stringify(this.#latest)}\n}`);
|
57
39
|
this.#results.end();
|
58
40
|
}
|
@@ -62,7 +44,7 @@ class Processor {
|
|
62
44
|
* @param errors lint errors
|
63
45
|
*/
|
64
46
|
newEntry(title, errors) {
|
65
|
-
this.#results.write(`${this.#comma}\n${JSON.stringify(title)}: ${JSON.stringify(errors, null, '\t')}`);
|
47
|
+
this.#results.write(`${this.#comma}\n${JSON.stringify(title)}: ${typeof errors === 'string' ? errors : JSON.stringify(errors, null, '\t')}`);
|
66
48
|
this.#comma ||= ',';
|
67
49
|
}
|
68
50
|
/**
|
@@ -71,31 +53,50 @@ class Processor {
|
|
71
53
|
* @param ns page namespace
|
72
54
|
* @param title page title
|
73
55
|
* @param date page revision date
|
56
|
+
* @param last last revision date
|
57
|
+
* @param data previous results
|
74
58
|
* @throws `RangeError` maximum heap size exceeded
|
75
59
|
*/
|
76
|
-
lint($text, ns, title, date) {
|
60
|
+
lint($text, ns, title, date, last, data) {
|
77
61
|
if (!this.#latest || date > this.#latest) {
|
78
62
|
this.#latest = date;
|
79
63
|
}
|
64
|
+
if (last && date <= last) {
|
65
|
+
const previous = (0, util_1.getErrors)(data, title);
|
66
|
+
if (!previous) {
|
67
|
+
return;
|
68
|
+
}
|
69
|
+
else if (!this.#refresh) {
|
70
|
+
this.newEntry(title, previous);
|
71
|
+
return;
|
72
|
+
}
|
73
|
+
}
|
80
74
|
try {
|
81
|
-
const
|
82
|
-
.filter(({ severity, rule }) => severity === 'error' && !ignore.has(rule))
|
75
|
+
const errors = wikilint_1.default.parse($text, ns === '828').lint()
|
76
|
+
.filter(({ severity, rule }) => severity === 'error' && !ignore.has(rule));
|
77
|
+
this.parsed++;
|
83
78
|
if (errors.length > 0) {
|
84
|
-
this.newEntry(title, errors.map(({ severity, suggestions, fix,
|
79
|
+
this.newEntry(title, errors.map(({ severity, suggestions, fix,
|
80
|
+
/* DISABLED */
|
81
|
+
code, startIndex, endLine, endCol, endIndex,
|
82
|
+
/* DISABLED END */
|
83
|
+
...e }) => ({
|
85
84
|
...e,
|
85
|
+
// eslint-disable-next-line @stylistic/multiline-comment-style
|
86
|
+
/* DISABLED
|
87
|
+
|
86
88
|
...suggestions && {
|
87
89
|
suggestions: suggestions.map(action => ({
|
88
90
|
...action,
|
89
91
|
original: $text.slice(...action.range),
|
90
92
|
})),
|
91
93
|
},
|
92
|
-
...fix && {
|
93
|
-
|
94
|
+
...fix && {fix: {...fix, original: $text.slice(...fix.range)}},
|
95
|
+
|
96
|
+
*/
|
97
|
+
excerpt: $text.slice(startIndex, endIndex).slice(0, util_1.MAX),
|
94
98
|
})));
|
95
99
|
}
|
96
|
-
if (!this.#worst || duration > this.#worst.duration) {
|
97
|
-
this.#worst = { title, duration };
|
98
|
-
}
|
99
100
|
}
|
100
101
|
catch (e) {
|
101
102
|
if (cluster_1.default.isWorker && e instanceof RangeError && e.message === 'Maximum heap size exceeded') {
|
package/report.js
CHANGED
@@ -7,7 +7,7 @@ const fs_1 = __importDefault(require("fs"));
|
|
7
7
|
const path_1 = __importDefault(require("path"));
|
8
8
|
const crypto_1 = require("crypto");
|
9
9
|
const chalk_1 = __importDefault(require("chalk"));
|
10
|
-
const
|
10
|
+
const util_1 = require("./util");
|
11
11
|
const { argv } = process, [, , lang] = argv, defaultOurDir = path_1.default.join(__dirname, 'reports');
|
12
12
|
let [, , , outDir] = argv;
|
13
13
|
const mkdir = (dir, empty) => {
|
@@ -38,7 +38,7 @@ const initJS = (file) => {
|
|
38
38
|
return stream;
|
39
39
|
};
|
40
40
|
const compare = (a, b) => a.localeCompare(b);
|
41
|
-
const
|
41
|
+
const dir = fs_1.default.readdirSync(util_1.resultDir), summary = new Set(), ruleRecords = new Map(), wiki = {}, siteDir = path_1.default.join(dataDir, lang), articlesDir = path_1.default.join(siteDir, 'pages');
|
42
42
|
let latest;
|
43
43
|
mkdir(siteDir, true);
|
44
44
|
mkdir(articlesDir);
|
@@ -46,12 +46,13 @@ for (const file of dir) {
|
|
46
46
|
if (!file.endsWith('.json')) {
|
47
47
|
continue;
|
48
48
|
}
|
49
|
-
const fileDir = path_1.default.join(resultDir, file);
|
49
|
+
const fileDir = path_1.default.join(util_1.resultDir, file);
|
50
50
|
if (!fs_1.default.existsSync(fileDir)) {
|
51
51
|
console.error(chalk_1.default.red(`Failed to read ${file}`));
|
52
52
|
continue;
|
53
53
|
}
|
54
|
-
const k = file.search(
|
54
|
+
const k = file.search(/-(?:p\d+){2}\.json$/u), site = (k === -1 ? file.slice(0, -5) : file.slice(0, k))
|
55
|
+
.replaceAll('_', '-');
|
55
56
|
summary.add(site);
|
56
57
|
if (lang !== site) {
|
57
58
|
continue;
|
@@ -82,7 +83,7 @@ for (const file of dir) {
|
|
82
83
|
ruleRecords.set(rule, ruleRecord);
|
83
84
|
}
|
84
85
|
ruleRecord[1].push(page);
|
85
|
-
ruleRecord[0] += `${JSON.stringify([page, line, col, message, excerpt.slice(0,
|
86
|
+
ruleRecord[0] += `${JSON.stringify([page, line, col, message, excerpt.slice(0, util_1.MAX * 0.8)], null, '\t')},`;
|
86
87
|
}
|
87
88
|
}
|
88
89
|
writeJS(info, path_1.default.join(site, 'pages', hash));
|
package/reports/article.html
CHANGED
@@ -4,6 +4,7 @@
|
|
4
4
|
<title>Diagnostic Report for Wikipedia</title>
|
5
5
|
<meta charset="utf-8">
|
6
6
|
<meta name="viewport" content="initial-scale=1.0, user-scalable=yes, minimum-scale=0.25, maximum-scale=5.0, width=device-width">
|
7
|
+
<link rel="icon" href="data:image/png;base64,iVBORw0KGgo=">
|
7
8
|
<link rel="stylesheet" href="reports.css">
|
8
9
|
<style>td:nth-child(-n+3){text-align:center}</style>
|
9
10
|
<script defer="" src="dist/article.js"></script>
|
@@ -23,7 +24,7 @@
|
|
23
24
|
Diagnostic Report for
|
24
25
|
<a id="article">Article</a>
|
25
26
|
</h2>
|
26
|
-
<table style="table-layout:fixed;width:100
|
27
|
+
<table style="table-layout:fixed;width:100%;min-width:900px">
|
27
28
|
<colgroup>
|
28
29
|
<col span="1">
|
29
30
|
<col span="1" style="width:calc(1.6em + 7ch)">
|
package/reports/dist/article.js
CHANGED
@@ -1,30 +1,2 @@
|
|
1
|
-
"use strict";
|
2
|
-
|
3
|
-
const search = new URLSearchParams(location.search), page = search.get('page'), lang = search.get('lang'), buffer = await crypto.subtle.digest('SHA-256', new TextEncoder().encode(page)), hash = [...new Uint8Array(buffer)].slice(0, 4)
|
4
|
-
.map(b => b.toString(16).padStart(2, '0'))
|
5
|
-
.join(''), title = document.querySelector('title'), h2 = document.getElementById('article'), wiki = document.getElementById('wiki'), tbody = document.querySelector('tbody'), script = document.createElement('script');
|
6
|
-
title.textContent = title.textContent.replace('Wikipedia', page);
|
7
|
-
h2.textContent = page;
|
8
|
-
h2.href = `https://${lang}.wikipedia.org/wiki/${encodeURIComponent(page)}?redirect=no`;
|
9
|
-
wiki.textContent = `${lang}wiki`;
|
10
|
-
wiki.href += `?lang=${lang}`;
|
11
|
-
script.src = `./data/${lang}/pages/${hash}.js`;
|
12
|
-
script.addEventListener('load', () => {
|
13
|
-
for (const entry of data) {
|
14
|
-
const [rule, startLine, startCol, message, excerpt] = entry, tr = document.createElement('tr'), description = document.createElement('td'), line = document.createElement('td'), column = document.createElement('td'), detail = document.createElement('td'), notice = document.createElement('td'), descriptionLink = document.createElement('a');
|
15
|
-
descriptionLink.textContent = rule;
|
16
|
-
descriptionLink.href = `./rule.html?lang=${lang}&rule=${rule}`;
|
17
|
-
description.className = 'excerpt';
|
18
|
-
description.append(descriptionLink);
|
19
|
-
line.textContent = String(startLine);
|
20
|
-
column.textContent = String(startCol);
|
21
|
-
detail.textContent = message;
|
22
|
-
detail.className = 'excerpt';
|
23
|
-
notice.textContent = excerpt;
|
24
|
-
notice.className = 'excerpt mono';
|
25
|
-
tr.append(description, line, column, detail, notice);
|
26
|
-
tbody.append(tr);
|
27
|
-
}
|
28
|
-
});
|
29
|
-
document.head.append(script);
|
30
|
-
})();
|
1
|
+
"use strict";(()=>{var c=(n,e)=>{n.href=e,e.startsWith("https://")&&(n.target="_blank",n.rel="noopener")},l=(n,e)=>{let t=document.createElement("script");t.src=n,t.addEventListener("load",e),document.head.append(t)},d=n=>n==="mediawiki"?"www.mediawiki.org":`${n}.wikipedia.org`,g=(n,e)=>{let t=document.querySelector(n);return t.textContent=t.textContent.replace("Wikipedia",e),t},a=(n,e,t)=>{let r=document.getElementById(n);typeof e=="function"?r.href=e(r.href):e?c(r,e):r.removeAttribute("href"),t!==void 0&&(r.textContent=t)},m=(n,e,t,r)=>{let i=document.createElement(n),o=document.createElement("a");return o.textContent=e,c(o,t),i.append(o),r&&(i.className=r),i},s=(n,e)=>{let t=document.createElement("td");return t.textContent=n,e&&(t.className=e),t},p=(...n)=>{let e=document.createElement("tr");e.append(...n),document.querySelector("tbody").append(e)},u=(n,e,t,r)=>[s(String(n)),s(String(e)),s(t,"excerpt"),s(r,"excerpt mono")];(async()=>{let n=new URLSearchParams(location.search),e=n.get("page"),t=n.get("lang"),r=await crypto.subtle.digest("SHA-256",new TextEncoder().encode(e)),i=[...new Uint8Array(r)].slice(0,4).map(o=>o.toString(16).padStart(2,"0")).join("");g("title",e),a("article",`https://${d(t)}/wiki/${encodeURIComponent(e)}?redirect=no`,e),a("wiki",o=>`${o}?lang=${t}`,`${t}wiki`),l(`./data/${t}/pages/${i}.js`,()=>{for(let[o,E,f,x,L]of data)p(m("td",o,`./rule.html?lang=${t}&rule=${o}`,"excerpt"),...u(E,f,x,L))})})();})();
|
2
|
+
//# sourceMappingURL=article.js.map
|
@@ -0,0 +1,7 @@
|
|
1
|
+
{
|
2
|
+
"version": 3,
|
3
|
+
"sources": ["../src/common.ts", "../src/article.ts"],
|
4
|
+
"sourcesContent": ["const armorLink = (a: HTMLAnchorElement, href: string): void => {\n\ta.href = href;\n\tif (href.startsWith('https://')) {\n\t\ta.target = '_blank';\n\t\ta.rel = 'noopener';\n\t}\n};\n\nexport const load = (src: string, callback: () => void): void => {\n\tconst script = document.createElement('script');\n\tscript.src = src;\n\tscript.addEventListener('load', callback);\n\tdocument.head.append(script);\n};\n\nexport const getHost = (lang: string): string => lang === 'mediawiki' ? 'www.mediawiki.org' : `${lang}.wikipedia.org`;\n\nexport const update = (tag: string, replace: string): Element => {\n\tconst ele = document.querySelector(tag)!;\n\tele.textContent = ele.textContent!.replace('Wikipedia', replace);\n\treturn ele;\n};\n\nexport const updateLink = (id: string, href: string | ((s: string) => string) | false, text?: string): void => {\n\tconst a = document.getElementById(id) as HTMLAnchorElement;\n\tif (typeof href === 'function') {\n\t\ta.href = href(a.href);\n\t} else if (href) {\n\t\tarmorLink(a, href);\n\t} else {\n\t\ta.removeAttribute('href');\n\t}\n\tif (text !== undefined) {\n\t\ta.textContent = text;\n\t}\n};\n\nexport const addLink = (tag: string, text: string, href: string, className?: string): HTMLElement => {\n\tconst container = document.createElement(tag),\n\t\ta = document.createElement('a');\n\ta.textContent = text;\n\tarmorLink(a, href);\n\tcontainer.append(a);\n\tif (className) {\n\t\tcontainer.className = className;\n\t}\n\treturn container;\n};\n\nexport const createTd = (text: string, className?: string): HTMLTableCellElement => {\n\tconst td = document.createElement('td');\n\ttd.textContent = text;\n\tif (className) {\n\t\ttd.className = className;\n\t}\n\treturn td;\n};\n\nexport const insertRow = (...tds: HTMLElement[]): void => {\n\tconst tr = document.createElement('tr');\n\ttr.append(...tds);\n\tdocument.querySelector('tbody')!.append(tr);\n};\n\nexport const getErrorInfo = (\n\tstartLine: number,\n\tstartCol: number,\n\tmessage: string,\n\texcerpt: string,\n): [HTMLTableCellElement, HTMLTableCellElement, HTMLTableCellElement, HTMLTableCellElement] => [\n\tcreateTd(String(startLine)),\n\tcreateTd(String(startCol)),\n\tcreateTd(message, 'excerpt'),\n\tcreateTd(excerpt, 'excerpt mono'),\n];\n", "import {load, getHost, update, updateLink, addLink, insertRow, getErrorInfo} from './common';\n\ndeclare const data: [string, number, number, string, string][];\n\n(async () => {\n\tconst search = new URLSearchParams(location.search),\n\t\tpage = search.get('page')!,\n\t\tlang = search.get('lang')!,\n\t\tbuffer = await crypto.subtle.digest('SHA-256', new TextEncoder().encode(page)),\n\t\thash = [...new Uint8Array(buffer)].slice(0, 4)\n\t\t\t.map(b => b.toString(16).padStart(2, '0'))\n\t\t\t.join('');\n\tupdate('title', page);\n\tupdateLink('article', `https://${getHost(lang)}/wiki/${encodeURIComponent(page)}?redirect=no`, page);\n\tupdateLink('wiki', s => `${s}?lang=${lang}`, `${lang}wiki`);\n\tload(`./data/${lang}/pages/${hash}.js`, () => {\n\t\tfor (const [rule, startLine, startCol, message, excerpt] of data) {\n\t\t\tinsertRow(\n\t\t\t\taddLink('td', rule, `./rule.html?lang=${lang}&rule=${rule}`, 'excerpt'),\n\t\t\t\t...getErrorInfo(startLine, startCol, message, excerpt),\n\t\t\t);\n\t\t}\n\t});\n})();\n"],
|
5
|
+
"mappings": "mBAAA,IAAMA,EAAY,CAACC,EAAsBC,IAAuB,CAC/DD,EAAE,KAAOC,EACLA,EAAK,WAAW,UAAU,IAC7BD,EAAE,OAAS,SACXA,EAAE,IAAM,WAEV,EAEaE,EAAO,CAACC,EAAaC,IAA+B,CAChE,IAAMC,EAAS,SAAS,cAAc,QAAQ,EAC9CA,EAAO,IAAMF,EACbE,EAAO,iBAAiB,OAAQD,CAAQ,EACxC,SAAS,KAAK,OAAOC,CAAM,CAC5B,EAEaC,EAAWC,GAAyBA,IAAS,YAAc,oBAAsB,GAAGA,CAAI,iBAExFC,EAAS,CAACC,EAAaC,IAA6B,CAChE,IAAMC,EAAM,SAAS,cAAcF,CAAG,EACtC,OAAAE,EAAI,YAAcA,EAAI,YAAa,QAAQ,YAAaD,CAAO,EACxDC,CACR,EAEaC,EAAa,CAACC,EAAYZ,EAAgDa,IAAwB,CAC9G,IAAMd,EAAI,SAAS,eAAea,CAAE,EAChC,OAAOZ,GAAS,WACnBD,EAAE,KAAOC,EAAKD,EAAE,IAAI,EACVC,EACVF,EAAUC,EAAGC,CAAI,EAEjBD,EAAE,gBAAgB,MAAM,EAErBc,IAAS,SACZd,EAAE,YAAcc,EAElB,EAEaC,EAAU,CAACN,EAAaK,EAAcb,EAAce,IAAoC,CACpG,IAAMC,EAAY,SAAS,cAAcR,CAAG,EAC3CT,EAAI,SAAS,cAAc,GAAG,EAC/B,OAAAA,EAAE,YAAcc,EAChBf,EAAUC,EAAGC,CAAI,EACjBgB,EAAU,OAAOjB,CAAC,EACdgB,IACHC,EAAU,UAAYD,GAEhBC,CACR,EAEaC,EAAW,CAACJ,EAAcE,IAA6C,CACnF,IAAMG,EAAK,SAAS,cAAc,IAAI,EACtC,OAAAA,EAAG,YAAcL,EACbE,IACHG,EAAG,UAAYH,GAETG,CACR,EAEaC,EAAY,IAAIC,IAA6B,CACzD,IAAMC,EAAK,SAAS,cAAc,IAAI,EACtCA,EAAG,OAAO,GAAGD,CAAG,EAChB,SAAS,cAAc,OAAO,EAAG,OAAOC,CAAE,CAC3C,EAEaC,EAAe,CAC3BC,EACAC,EACAC,EACAC,IAC8F,CAC9FT,EAAS,OAAOM,CAAS,CAAC,EAC1BN,EAAS,OAAOO,CAAQ,CAAC,EACzBP,EAASQ,EAAS,SAAS,EAC3BR,EAASS,EAAS,cAAc,CACjC,GCtEC,SAAY,CACZ,IAAMC,EAAS,IAAI,gBAAgB,SAAS,MAAM,EACjDC,EAAOD,EAAO,IAAI,MAAM,EACxBE,EAAOF,EAAO,IAAI,MAAM,EACxBG,EAAS,MAAM,OAAO,OAAO,OAAO,UAAW,IAAI,YAAY,EAAE,OAAOF,CAAI,CAAC,EAC7EG,EAAO,CAAC,GAAG,IAAI,WAAWD,CAAM,CAAC,EAAE,MAAM,EAAG,CAAC,EAC3C,IAAIE,GAAKA,EAAE,SAAS,EAAE,EAAE,SAAS,EAAG,GAAG,CAAC,EACxC,KAAK,EAAE,EACVC,EAAO,QAASL,CAAI,EACpBM,EAAW,UAAW,WAAWC,EAAQN,CAAI,CAAC,SAAS,mBAAmBD,CAAI,CAAC,eAAgBA,CAAI,EACnGM,EAAW,OAAQE,GAAK,GAAGA,CAAC,SAASP,CAAI,GAAI,GAAGA,CAAI,MAAM,EAC1DQ,EAAK,UAAUR,CAAI,UAAUE,CAAI,MAAO,IAAM,CAC7C,OAAW,CAACO,EAAMC,EAAWC,EAAUC,EAASC,CAAO,IAAK,KAC3DC,EACCC,EAAQ,KAAMN,EAAM,oBAAoBT,CAAI,SAASS,CAAI,GAAI,SAAS,EACtE,GAAGO,EAAaN,EAAWC,EAAUC,EAASC,CAAO,CACtD,CAEF,CAAC,CACF,GAAG",
|
6
|
+
"names": ["armorLink", "a", "href", "load", "src", "callback", "script", "getHost", "lang", "update", "tag", "replace", "ele", "updateLink", "id", "text", "addLink", "className", "container", "createTd", "td", "insertRow", "tds", "tr", "getErrorInfo", "startLine", "startCol", "message", "excerpt", "search", "page", "lang", "buffer", "hash", "b", "update", "updateLink", "getHost", "s", "load", "rule", "startLine", "startCol", "message", "excerpt", "insertRow", "addLink", "getErrorInfo"]
|
7
|
+
}
|
package/reports/dist/index.js
CHANGED
@@ -1,15 +1,2 @@
|
|
1
|
-
"use strict";
|
2
|
-
|
3
|
-
const container = document.getElementById('container'), script = document.createElement('script');
|
4
|
-
script.src = './data/index.js';
|
5
|
-
script.addEventListener('load', () => {
|
6
|
-
container.append(...window.data.map(lang => {
|
7
|
-
const div = document.createElement('div'), a = document.createElement('a');
|
8
|
-
a.href = `./wiki.html?lang=${lang}`;
|
9
|
-
a.innerText = `${lang}.wikipedia.org`;
|
10
|
-
div.append(a);
|
11
|
-
return div;
|
12
|
-
}));
|
13
|
-
});
|
14
|
-
document.head.append(script);
|
15
|
-
})();
|
1
|
+
"use strict";(()=>{var a=(e,t)=>{e.href=t,t.startsWith("https://")&&(e.target="_blank",e.rel="noopener")},s=(e,t)=>{let n=document.createElement("script");n.src=e,n.addEventListener("load",t),document.head.append(n)};var d=(e,t,n,i)=>{let r=document.createElement(e),o=document.createElement("a");return o.textContent=t,a(o,n),r.append(o),i&&(r.className=i),r};s("./data/index.js",()=>{document.getElementById("container").append(...data.map(e=>d("div",`${e}wiki`,`./wiki.html?lang=${e}`)))});})();
|
2
|
+
//# sourceMappingURL=index.js.map
|
@@ -0,0 +1,7 @@
|
|
1
|
+
{
|
2
|
+
"version": 3,
|
3
|
+
"sources": ["../src/common.ts", "../src/index.ts"],
|
4
|
+
"sourcesContent": ["const armorLink = (a: HTMLAnchorElement, href: string): void => {\n\ta.href = href;\n\tif (href.startsWith('https://')) {\n\t\ta.target = '_blank';\n\t\ta.rel = 'noopener';\n\t}\n};\n\nexport const load = (src: string, callback: () => void): void => {\n\tconst script = document.createElement('script');\n\tscript.src = src;\n\tscript.addEventListener('load', callback);\n\tdocument.head.append(script);\n};\n\nexport const getHost = (lang: string): string => lang === 'mediawiki' ? 'www.mediawiki.org' : `${lang}.wikipedia.org`;\n\nexport const update = (tag: string, replace: string): Element => {\n\tconst ele = document.querySelector(tag)!;\n\tele.textContent = ele.textContent!.replace('Wikipedia', replace);\n\treturn ele;\n};\n\nexport const updateLink = (id: string, href: string | ((s: string) => string) | false, text?: string): void => {\n\tconst a = document.getElementById(id) as HTMLAnchorElement;\n\tif (typeof href === 'function') {\n\t\ta.href = href(a.href);\n\t} else if (href) {\n\t\tarmorLink(a, href);\n\t} else {\n\t\ta.removeAttribute('href');\n\t}\n\tif (text !== undefined) {\n\t\ta.textContent = text;\n\t}\n};\n\nexport const addLink = (tag: string, text: string, href: string, className?: string): HTMLElement => {\n\tconst container = document.createElement(tag),\n\t\ta = document.createElement('a');\n\ta.textContent = text;\n\tarmorLink(a, href);\n\tcontainer.append(a);\n\tif (className) {\n\t\tcontainer.className = className;\n\t}\n\treturn container;\n};\n\nexport const createTd = (text: string, className?: string): HTMLTableCellElement => {\n\tconst td = document.createElement('td');\n\ttd.textContent = text;\n\tif (className) {\n\t\ttd.className = className;\n\t}\n\treturn td;\n};\n\nexport const insertRow = (...tds: HTMLElement[]): void => {\n\tconst tr = document.createElement('tr');\n\ttr.append(...tds);\n\tdocument.querySelector('tbody')!.append(tr);\n};\n\nexport const getErrorInfo = (\n\tstartLine: number,\n\tstartCol: number,\n\tmessage: string,\n\texcerpt: string,\n): [HTMLTableCellElement, HTMLTableCellElement, HTMLTableCellElement, HTMLTableCellElement] => [\n\tcreateTd(String(startLine)),\n\tcreateTd(String(startCol)),\n\tcreateTd(message, 'excerpt'),\n\tcreateTd(excerpt, 'excerpt mono'),\n];\n", "import {load, addLink} from './common';\n\ndeclare const data: string[];\n\nload('./data/index.js', () => {\n\tdocument.getElementById('container')!.append(...data.map(\n\t\tlang => addLink('div', `${lang}wiki`, `./wiki.html?lang=${lang}`),\n\t));\n});\n"],
|
5
|
+
"mappings": "mBAAA,IAAMA,EAAY,CAACC,EAAsBC,IAAuB,CAC/DD,EAAE,KAAOC,EACLA,EAAK,WAAW,UAAU,IAC7BD,EAAE,OAAS,SACXA,EAAE,IAAM,WAEV,EAEaE,EAAO,CAACC,EAAaC,IAA+B,CAChE,IAAMC,EAAS,SAAS,cAAc,QAAQ,EAC9CA,EAAO,IAAMF,EACbE,EAAO,iBAAiB,OAAQD,CAAQ,EACxC,SAAS,KAAK,OAAOC,CAAM,CAC5B,EAwBO,IAAMC,EAAU,CAACC,EAAaC,EAAcC,EAAcC,IAAoC,CACpG,IAAMC,EAAY,SAAS,cAAcJ,CAAG,EAC3CK,EAAI,SAAS,cAAc,GAAG,EAC/B,OAAAA,EAAE,YAAcJ,EAChBK,EAAUD,EAAGH,CAAI,EACjBE,EAAU,OAAOC,CAAC,EACdF,IACHC,EAAU,UAAYD,GAEhBC,CACR,EC3CAG,EAAK,kBAAmB,IAAM,CAC7B,SAAS,eAAe,WAAW,EAAG,OAAO,GAAG,KAAK,IACpDC,GAAQC,EAAQ,MAAO,GAAGD,CAAI,OAAQ,oBAAoBA,CAAI,EAAE,CACjE,CAAC,CACF,CAAC",
|
6
|
+
"names": ["armorLink", "a", "href", "load", "src", "callback", "script", "addLink", "tag", "text", "href", "className", "container", "a", "armorLink", "load", "lang", "addLink"]
|
7
|
+
}
|
package/reports/dist/rule.js
CHANGED
@@ -1,49 +1,2 @@
|
|
1
|
-
"use strict";
|
2
|
-
|
3
|
-
const search = new URLSearchParams(location.search), lang = search.get('lang'), rule = search.get('rule'), batch = Math.floor(Number(search.get('start') || 0) / 200), endStr = String((batch + 1) * 200), nav = document.getElementById('nav'), prev = document.getElementById('prev'), next = document.getElementById('next'), start = document.getElementById('start'), end = document.getElementById('end'), title = document.querySelector('title'), h2 = document.querySelector('h2'), wiki = document.getElementById('wiki'), table = document.querySelector('table'), tbody = document.querySelector('tbody'), script = document.createElement('script');
|
4
|
-
title.textContent = title.textContent.replace('Wikipedia', `${lang}.wikipedia.org`);
|
5
|
-
wiki.textContent = `${lang}wiki`;
|
6
|
-
wiki.href += `?lang=${lang}`;
|
7
|
-
if (batch === 0) {
|
8
|
-
prev.removeAttribute('href');
|
9
|
-
}
|
10
|
-
else {
|
11
|
-
start.textContent = String(batch * 200 + 1);
|
12
|
-
end.textContent = endStr;
|
13
|
-
search.set('start', String((batch - 1) * 200));
|
14
|
-
prev.href = `${location.pathname}?${search}`;
|
15
|
-
}
|
16
|
-
search.set('start', endStr);
|
17
|
-
next.href = `${location.pathname}?${search}`;
|
18
|
-
script.src = `./data/${lang}/${rule}-${batch}.js`;
|
19
|
-
script.addEventListener('load', () => {
|
20
|
-
h2.textContent = `${h2.textContent.replace('Wikipedia', `${lang}.wikipedia.org: ${rule}`)} (${data.timestamp})`;
|
21
|
-
if (data.batches === batch + 1) {
|
22
|
-
next.removeAttribute('href');
|
23
|
-
end.textContent = String(batch * 200 + data.articles.length);
|
24
|
-
}
|
25
|
-
for (const [page, startLine, startCol, message, excerpt] of data.articles) {
|
26
|
-
const tr = document.createElement('tr'), article = document.createElement('td'), edit = document.createElement('td'), line = document.createElement('td'), column = document.createElement('td'), detail = document.createElement('td'), notice = document.createElement('td'), more = document.createElement('td'), articleLink = document.createElement('a'), editLink = document.createElement('a'), moreLink = document.createElement('a');
|
27
|
-
articleLink.textContent = page;
|
28
|
-
articleLink.href = `https://${lang}.wikipedia.org/wiki/${encodeURIComponent(page)}?redirect=no`;
|
29
|
-
article.className = 'excerpt';
|
30
|
-
article.append(articleLink);
|
31
|
-
editLink.textContent = 'edit';
|
32
|
-
editLink.href = `https://${lang}.wikipedia.org/wiki/${encodeURIComponent(page)}?action=edit`;
|
33
|
-
edit.append(editLink);
|
34
|
-
line.textContent = String(startLine);
|
35
|
-
column.textContent = String(startCol);
|
36
|
-
detail.textContent = message;
|
37
|
-
detail.className = 'excerpt';
|
38
|
-
notice.textContent = excerpt;
|
39
|
-
notice.className = 'excerpt mono';
|
40
|
-
moreLink.textContent = 'more';
|
41
|
-
moreLink.href = `./article.html?lang=${lang}&page=${encodeURIComponent(page)}`;
|
42
|
-
more.append(moreLink);
|
43
|
-
tr.append(article, edit, line, column, detail, notice, more);
|
44
|
-
tbody.append(tr);
|
45
|
-
}
|
46
|
-
table.after(nav.cloneNode(true));
|
47
|
-
});
|
48
|
-
document.head.append(script);
|
49
|
-
})();
|
1
|
+
"use strict";(()=>{var u=(t,e)=>{t.href=e,e.startsWith("https://")&&(t.target="_blank",t.rel="noopener")},E=(t,e)=>{let n=document.createElement("script");n.src=t,n.addEventListener("load",e),document.head.append(n)},h=t=>t==="mediawiki"?"www.mediawiki.org":`${t}.wikipedia.org`,p=(t,e)=>{let n=document.querySelector(t);return n.textContent=n.textContent.replace("Wikipedia",e),n},d=(t,e,n)=>{let r=document.getElementById(t);typeof e=="function"?r.href=e(r.href):e?u(r,e):r.removeAttribute("href"),n!==void 0&&(r.textContent=n)},m=(t,e,n,r)=>{let s=document.createElement(t),c=document.createElement("a");return c.textContent=e,u(c,n),s.append(c),r&&(s.className=r),s},l=(t,e)=>{let n=document.createElement("td");return n.textContent=t,e&&(n.className=e),n},$=(...t)=>{let e=document.createElement("tr");e.append(...t),document.querySelector("tbody").append(e)},x=(t,e,n,r)=>[l(String(t)),l(String(e)),l(n,"excerpt"),l(r,"excerpt mono")];var o=new URLSearchParams(location.search),i=o.get("lang"),f=o.get("rule"),a=Math.floor(Number(o.get("start")||0)/200),b=h(i),L=p("h2",`${i}wiki: ${f}`);p("title",`${i}wiki`);d("wiki",t=>`${t}?lang=${i}`,`${i}wiki`);o.set("start",String((a-1)*200));d("prev",a!==0&&`${location.pathname}?${o}`);document.getElementById("start").textContent=String(a*200+1);E(`./data/${i}/${f}-${a}.js`,()=>{let t=String(a*200+data.articles.length);L.textContent+=` (${data.timestamp})`,document.getElementById("end").textContent=t,o.set("start",t),d("next",data.batches!==a+1&&`${location.pathname}?${o}`),document.querySelector("table").after(document.getElementById("nav").cloneNode(!0));for(let[e,n,r,s,c]of data.articles){let g=encodeURIComponent(e);$(m("td",e,`https://${b}/wiki/${g}?redirect=no`,"excerpt"),m("td","edit",`https://${b}/wiki/${g}?action=edit`),...x(n,r,s,c),m("td","more",`./article.html?lang=${i}&page=${g}`))}});})();
|
2
|
+
//# sourceMappingURL=rule.js.map
|
@@ -0,0 +1,7 @@
|
|
1
|
+
{
|
2
|
+
"version": 3,
|
3
|
+
"sources": ["../src/common.ts", "../src/rule.ts"],
|
4
|
+
"sourcesContent": ["const armorLink = (a: HTMLAnchorElement, href: string): void => {\n\ta.href = href;\n\tif (href.startsWith('https://')) {\n\t\ta.target = '_blank';\n\t\ta.rel = 'noopener';\n\t}\n};\n\nexport const load = (src: string, callback: () => void): void => {\n\tconst script = document.createElement('script');\n\tscript.src = src;\n\tscript.addEventListener('load', callback);\n\tdocument.head.append(script);\n};\n\nexport const getHost = (lang: string): string => lang === 'mediawiki' ? 'www.mediawiki.org' : `${lang}.wikipedia.org`;\n\nexport const update = (tag: string, replace: string): Element => {\n\tconst ele = document.querySelector(tag)!;\n\tele.textContent = ele.textContent!.replace('Wikipedia', replace);\n\treturn ele;\n};\n\nexport const updateLink = (id: string, href: string | ((s: string) => string) | false, text?: string): void => {\n\tconst a = document.getElementById(id) as HTMLAnchorElement;\n\tif (typeof href === 'function') {\n\t\ta.href = href(a.href);\n\t} else if (href) {\n\t\tarmorLink(a, href);\n\t} else {\n\t\ta.removeAttribute('href');\n\t}\n\tif (text !== undefined) {\n\t\ta.textContent = text;\n\t}\n};\n\nexport const addLink = (tag: string, text: string, href: string, className?: string): HTMLElement => {\n\tconst container = document.createElement(tag),\n\t\ta = document.createElement('a');\n\ta.textContent = text;\n\tarmorLink(a, href);\n\tcontainer.append(a);\n\tif (className) {\n\t\tcontainer.className = className;\n\t}\n\treturn container;\n};\n\nexport const createTd = (text: string, className?: string): HTMLTableCellElement => {\n\tconst td = document.createElement('td');\n\ttd.textContent = text;\n\tif (className) {\n\t\ttd.className = className;\n\t}\n\treturn td;\n};\n\nexport const insertRow = (...tds: HTMLElement[]): void => {\n\tconst tr = document.createElement('tr');\n\ttr.append(...tds);\n\tdocument.querySelector('tbody')!.append(tr);\n};\n\nexport const getErrorInfo = (\n\tstartLine: number,\n\tstartCol: number,\n\tmessage: string,\n\texcerpt: string,\n): [HTMLTableCellElement, HTMLTableCellElement, HTMLTableCellElement, HTMLTableCellElement] => [\n\tcreateTd(String(startLine)),\n\tcreateTd(String(startCol)),\n\tcreateTd(message, 'excerpt'),\n\tcreateTd(excerpt, 'excerpt mono'),\n];\n", "import {load, getHost, update, addLink, insertRow, getErrorInfo, updateLink} from './common';\n\ndeclare const data: {\n\tarticles: [string, number, number, string, string][];\n\tbatches: number;\n\ttimestamp: string;\n};\n\nconst search = new URLSearchParams(location.search),\n\tlang = search.get('lang'),\n\trule = search.get('rule'),\n\tbatch = Math.floor(Number(search.get('start') || 0) / 200),\n\thost = getHost(lang!),\n\th2 = update('h2', `${lang}wiki: ${rule}`);\nupdate('title', `${lang}wiki`);\nupdateLink('wiki', s => `${s}?lang=${lang}`, `${lang}wiki`);\nsearch.set('start', String((batch - 1) * 200));\nupdateLink('prev', batch !== 0 && `${location.pathname}?${search}`);\ndocument.getElementById('start')!.textContent = String(batch * 200 + 1);\nload(`./data/${lang}/${rule}-${batch}.js`, () => {\n\tconst endStr = String(batch * 200 + data.articles.length);\n\th2.textContent += ` (${data.timestamp})`;\n\tdocument.getElementById('end')!.textContent = endStr;\n\tsearch.set('start', endStr);\n\tupdateLink('next', data.batches !== batch + 1 && `${location.pathname}?${search}`);\n\tdocument.querySelector('table')!\n\t\t.after(document.getElementById('nav')!.cloneNode(true));\n\tfor (const [page, startLine, startCol, message, excerpt] of data.articles) {\n\t\tconst title = encodeURIComponent(page);\n\t\tinsertRow(\n\t\t\taddLink('td', page, `https://${host}/wiki/${title}?redirect=no`, 'excerpt'),\n\t\t\taddLink('td', 'edit', `https://${host}/wiki/${title}?action=edit`),\n\t\t\t...getErrorInfo(startLine, startCol, message, excerpt),\n\t\t\taddLink('td', 'more', `./article.html?lang=${lang}&page=${title}`),\n\t\t);\n\t}\n});\n"],
|
5
|
+
"mappings": "mBAAA,IAAMA,EAAY,CAACC,EAAsBC,IAAuB,CAC/DD,EAAE,KAAOC,EACLA,EAAK,WAAW,UAAU,IAC7BD,EAAE,OAAS,SACXA,EAAE,IAAM,WAEV,EAEaE,EAAO,CAACC,EAAaC,IAA+B,CAChE,IAAMC,EAAS,SAAS,cAAc,QAAQ,EAC9CA,EAAO,IAAMF,EACbE,EAAO,iBAAiB,OAAQD,CAAQ,EACxC,SAAS,KAAK,OAAOC,CAAM,CAC5B,EAEaC,EAAWC,GAAyBA,IAAS,YAAc,oBAAsB,GAAGA,CAAI,iBAExFC,EAAS,CAACC,EAAaC,IAA6B,CAChE,IAAMC,EAAM,SAAS,cAAcF,CAAG,EACtC,OAAAE,EAAI,YAAcA,EAAI,YAAa,QAAQ,YAAaD,CAAO,EACxDC,CACR,EAEaC,EAAa,CAACC,EAAYZ,EAAgDa,IAAwB,CAC9G,IAAMd,EAAI,SAAS,eAAea,CAAE,EAChC,OAAOZ,GAAS,WACnBD,EAAE,KAAOC,EAAKD,EAAE,IAAI,EACVC,EACVF,EAAUC,EAAGC,CAAI,EAEjBD,EAAE,gBAAgB,MAAM,EAErBc,IAAS,SACZd,EAAE,YAAcc,EAElB,EAEaC,EAAU,CAACN,EAAaK,EAAcb,EAAce,IAAoC,CACpG,IAAMC,EAAY,SAAS,cAAcR,CAAG,EAC3CT,EAAI,SAAS,cAAc,GAAG,EAC/B,OAAAA,EAAE,YAAcc,EAChBf,EAAUC,EAAGC,CAAI,EACjBgB,EAAU,OAAOjB,CAAC,EACdgB,IACHC,EAAU,UAAYD,GAEhBC,CACR,EAEaC,EAAW,CAACJ,EAAcE,IAA6C,CACnF,IAAMG,EAAK,SAAS,cAAc,IAAI,EACtC,OAAAA,EAAG,YAAcL,EACbE,IACHG,EAAG,UAAYH,GAETG,CACR,EAEaC,EAAY,IAAIC,IAA6B,CACzD,IAAMC,EAAK,SAAS,cAAc,IAAI,EACtCA,EAAG,OAAO,GAAGD,CAAG,EAChB,SAAS,cAAc,OAAO,EAAG,OAAOC,CAAE,CAC3C,EAEaC,EAAe,CAC3BC,EACAC,EACAC,EACAC,IAC8F,CAC9FT,EAAS,OAAOM,CAAS,CAAC,EAC1BN,EAAS,OAAOO,CAAQ,CAAC,EACzBP,EAASQ,EAAS,SAAS,EAC3BR,EAASS,EAAS,cAAc,CACjC,EClEA,IAAMC,EAAS,IAAI,gBAAgB,SAAS,MAAM,EACjDC,EAAOD,EAAO,IAAI,MAAM,EACxBE,EAAOF,EAAO,IAAI,MAAM,EACxBG,EAAQ,KAAK,MAAM,OAAOH,EAAO,IAAI,OAAO,GAAK,CAAC,EAAI,GAAG,EACzDI,EAAOC,EAAQJ,CAAK,EACpBK,EAAKC,EAAO,KAAM,GAAGN,CAAI,SAASC,CAAI,EAAE,EACzCK,EAAO,QAAS,GAAGN,CAAI,MAAM,EAC7BO,EAAW,OAAQC,GAAK,GAAGA,CAAC,SAASR,CAAI,GAAI,GAAGA,CAAI,MAAM,EAC1DD,EAAO,IAAI,QAAS,QAAQG,EAAQ,GAAK,GAAG,CAAC,EAC7CK,EAAW,OAAQL,IAAU,GAAK,GAAG,SAAS,QAAQ,IAAIH,CAAM,EAAE,EAClE,SAAS,eAAe,OAAO,EAAG,YAAc,OAAOG,EAAQ,IAAM,CAAC,EACtEO,EAAK,UAAUT,CAAI,IAAIC,CAAI,IAAIC,CAAK,MAAO,IAAM,CAChD,IAAMQ,EAAS,OAAOR,EAAQ,IAAM,KAAK,SAAS,MAAM,EACxDG,EAAG,aAAe,KAAK,KAAK,SAAS,IACrC,SAAS,eAAe,KAAK,EAAG,YAAcK,EAC9CX,EAAO,IAAI,QAASW,CAAM,EAC1BH,EAAW,OAAQ,KAAK,UAAYL,EAAQ,GAAK,GAAG,SAAS,QAAQ,IAAIH,CAAM,EAAE,EACjF,SAAS,cAAc,OAAO,EAC5B,MAAM,SAAS,eAAe,KAAK,EAAG,UAAU,EAAI,CAAC,EACvD,OAAW,CAACY,EAAMC,EAAWC,EAAUC,EAASC,CAAO,IAAK,KAAK,SAAU,CAC1E,IAAMC,EAAQ,mBAAmBL,CAAI,EACrCM,EACCC,EAAQ,KAAMP,EAAM,WAAWR,CAAI,SAASa,CAAK,eAAgB,SAAS,EAC1EE,EAAQ,KAAM,OAAQ,WAAWf,CAAI,SAASa,CAAK,cAAc,EACjE,GAAGG,EAAaP,EAAWC,EAAUC,EAASC,CAAO,EACrDG,EAAQ,KAAM,OAAQ,uBAAuBlB,CAAI,SAASgB,CAAK,EAAE,CAClE,CACD,CACD,CAAC",
|
6
|
+
"names": ["armorLink", "a", "href", "load", "src", "callback", "script", "getHost", "lang", "update", "tag", "replace", "ele", "updateLink", "id", "text", "addLink", "className", "container", "createTd", "td", "insertRow", "tds", "tr", "getErrorInfo", "startLine", "startCol", "message", "excerpt", "search", "lang", "rule", "batch", "host", "getHost", "h2", "update", "updateLink", "s", "load", "endStr", "page", "startLine", "startCol", "message", "excerpt", "title", "insertRow", "addLink", "getErrorInfo"]
|
7
|
+
}
|
package/reports/dist/wiki.js
CHANGED
@@ -1,19 +1,2 @@
|
|
1
|
-
"use strict";
|
2
|
-
|
3
|
-
const lang = new URLSearchParams(location.search).get('lang'), script = document.createElement('script'), title = document.querySelector('title'), h2 = document.querySelector('h2'), tbody = document.querySelector('tbody');
|
4
|
-
title.textContent = title.textContent.replace('Wikipedia', `${lang}.wikipedia.org`);
|
5
|
-
script.src = `./data/${lang}/index.js`;
|
6
|
-
script.addEventListener('load', () => {
|
7
|
-
h2.textContent = `${h2.textContent.replace('Wikipedia', `${lang}.wikipedia.org`)} (${data.slice(-1)[0]})`;
|
8
|
-
for (const [rule, count] of data.slice(0, -1)) {
|
9
|
-
const tr = document.createElement('tr'), description = document.createElement('td'), pages = document.createElement('td'), a = document.createElement('a');
|
10
|
-
a.textContent = rule;
|
11
|
-
a.href = `./rule.html?lang=${lang}&rule=${rule}`;
|
12
|
-
description.append(a);
|
13
|
-
pages.textContent = String(count);
|
14
|
-
tr.append(description, pages);
|
15
|
-
tbody.append(tr);
|
16
|
-
}
|
17
|
-
});
|
18
|
-
document.head.append(script);
|
19
|
-
})();
|
1
|
+
"use strict";(()=>{var g=(t,e)=>{t.href=e,e.startsWith("https://")&&(t.target="_blank",t.rel="noopener")},l=(t,e)=>{let n=document.createElement("script");n.src=t,n.addEventListener("load",e),document.head.append(n)};var s=(t,e)=>{let n=document.querySelector(t);return n.textContent=n.textContent.replace("Wikipedia",e),n};var c=(t,e,n,a)=>{let o=document.createElement(t),i=document.createElement("a");return i.textContent=e,g(i,n),o.append(i),a&&(o.className=a),o},d=(t,e)=>{let n=document.createElement("td");return n.textContent=t,e&&(n.className=e),n},m=(...t)=>{let e=document.createElement("tr");e.append(...t),document.querySelector("tbody").append(e)};var r=new URLSearchParams(location.search).get("lang"),p=s("h2",`${r}wiki`);s("title",`${r}wiki`);l(`./data/${r}/index.js`,()=>{p.textContent+=` (${data[data.length-1]})`;for(let[t,e]of data.slice(0,-1))m(c("td",t,`./rule.html?lang=${r}&rule=${t}`),d(String(e)))});})();
|
2
|
+
//# sourceMappingURL=wiki.js.map
|
@@ -0,0 +1,7 @@
|
|
1
|
+
{
|
2
|
+
"version": 3,
|
3
|
+
"sources": ["../src/common.ts", "../src/wiki.ts"],
|
4
|
+
"sourcesContent": ["const armorLink = (a: HTMLAnchorElement, href: string): void => {\n\ta.href = href;\n\tif (href.startsWith('https://')) {\n\t\ta.target = '_blank';\n\t\ta.rel = 'noopener';\n\t}\n};\n\nexport const load = (src: string, callback: () => void): void => {\n\tconst script = document.createElement('script');\n\tscript.src = src;\n\tscript.addEventListener('load', callback);\n\tdocument.head.append(script);\n};\n\nexport const getHost = (lang: string): string => lang === 'mediawiki' ? 'www.mediawiki.org' : `${lang}.wikipedia.org`;\n\nexport const update = (tag: string, replace: string): Element => {\n\tconst ele = document.querySelector(tag)!;\n\tele.textContent = ele.textContent!.replace('Wikipedia', replace);\n\treturn ele;\n};\n\nexport const updateLink = (id: string, href: string | ((s: string) => string) | false, text?: string): void => {\n\tconst a = document.getElementById(id) as HTMLAnchorElement;\n\tif (typeof href === 'function') {\n\t\ta.href = href(a.href);\n\t} else if (href) {\n\t\tarmorLink(a, href);\n\t} else {\n\t\ta.removeAttribute('href');\n\t}\n\tif (text !== undefined) {\n\t\ta.textContent = text;\n\t}\n};\n\nexport const addLink = (tag: string, text: string, href: string, className?: string): HTMLElement => {\n\tconst container = document.createElement(tag),\n\t\ta = document.createElement('a');\n\ta.textContent = text;\n\tarmorLink(a, href);\n\tcontainer.append(a);\n\tif (className) {\n\t\tcontainer.className = className;\n\t}\n\treturn container;\n};\n\nexport const createTd = (text: string, className?: string): HTMLTableCellElement => {\n\tconst td = document.createElement('td');\n\ttd.textContent = text;\n\tif (className) {\n\t\ttd.className = className;\n\t}\n\treturn td;\n};\n\nexport const insertRow = (...tds: HTMLElement[]): void => {\n\tconst tr = document.createElement('tr');\n\ttr.append(...tds);\n\tdocument.querySelector('tbody')!.append(tr);\n};\n\nexport const getErrorInfo = (\n\tstartLine: number,\n\tstartCol: number,\n\tmessage: string,\n\texcerpt: string,\n): [HTMLTableCellElement, HTMLTableCellElement, HTMLTableCellElement, HTMLTableCellElement] => [\n\tcreateTd(String(startLine)),\n\tcreateTd(String(startCol)),\n\tcreateTd(message, 'excerpt'),\n\tcreateTd(excerpt, 'excerpt mono'),\n];\n", "import {load, update, addLink, createTd, insertRow} from './common';\n\ndeclare const data: [...[string, number][], string];\n\nconst lang = new URLSearchParams(location.search).get('lang'),\n\th2 = update('h2', `${lang}wiki`);\nupdate('title', `${lang}wiki`);\nload(`./data/${lang}/index.js`, () => {\n\th2.textContent += ` (${data[data.length - 1] as string})`;\n\tfor (const [rule, count] of data.slice(0, -1) as [string, number][]) {\n\t\tinsertRow(\n\t\t\taddLink('td', rule, `./rule.html?lang=${lang}&rule=${rule}`),\n\t\t\tcreateTd(String(count)),\n\t\t);\n\t}\n});\n"],
|
5
|
+
"mappings": "mBAAA,IAAMA,EAAY,CAACC,EAAsBC,IAAuB,CAC/DD,EAAE,KAAOC,EACLA,EAAK,WAAW,UAAU,IAC7BD,EAAE,OAAS,SACXA,EAAE,IAAM,WAEV,EAEaE,EAAO,CAACC,EAAaC,IAA+B,CAChE,IAAMC,EAAS,SAAS,cAAc,QAAQ,EAC9CA,EAAO,IAAMF,EACbE,EAAO,iBAAiB,OAAQD,CAAQ,EACxC,SAAS,KAAK,OAAOC,CAAM,CAC5B,EAIO,IAAMC,EAAS,CAACC,EAAaC,IAA6B,CAChE,IAAMC,EAAM,SAAS,cAAcF,CAAG,EACtC,OAAAE,EAAI,YAAcA,EAAI,YAAa,QAAQ,YAAaD,CAAO,EACxDC,CACR,EAgBO,IAAMC,EAAU,CAACC,EAAaC,EAAcC,EAAcC,IAAoC,CACpG,IAAMC,EAAY,SAAS,cAAcJ,CAAG,EAC3CK,EAAI,SAAS,cAAc,GAAG,EAC/B,OAAAA,EAAE,YAAcJ,EAChBK,EAAUD,EAAGH,CAAI,EACjBE,EAAU,OAAOC,CAAC,EACdF,IACHC,EAAU,UAAYD,GAEhBC,CACR,EAEaG,EAAW,CAACN,EAAcE,IAA6C,CACnF,IAAMK,EAAK,SAAS,cAAc,IAAI,EACtC,OAAAA,EAAG,YAAcP,EACbE,IACHK,EAAG,UAAYL,GAETK,CACR,EAEaC,EAAY,IAAIC,IAA6B,CACzD,IAAMC,EAAK,SAAS,cAAc,IAAI,EACtCA,EAAG,OAAO,GAAGD,CAAG,EAChB,SAAS,cAAc,OAAO,EAAG,OAAOC,CAAE,CAC3C,EC1DA,IAAMC,EAAO,IAAI,gBAAgB,SAAS,MAAM,EAAE,IAAI,MAAM,EAC3DC,EAAKC,EAAO,KAAM,GAAGF,CAAI,MAAM,EAChCE,EAAO,QAAS,GAAGF,CAAI,MAAM,EAC7BG,EAAK,UAAUH,CAAI,YAAa,IAAM,CACrCC,EAAG,aAAe,KAAK,KAAK,KAAK,OAAS,CAAC,CAAW,IACtD,OAAW,CAACG,EAAMC,CAAK,IAAK,KAAK,MAAM,EAAG,EAAE,EAC3CC,EACCC,EAAQ,KAAMH,EAAM,oBAAoBJ,CAAI,SAASI,CAAI,EAAE,EAC3DI,EAAS,OAAOH,CAAK,CAAC,CACvB,CAEF,CAAC",
|
6
|
+
"names": ["armorLink", "a", "href", "load", "src", "callback", "script", "update", "tag", "replace", "ele", "addLink", "tag", "text", "href", "className", "container", "a", "armorLink", "createTd", "td", "insertRow", "tds", "tr", "lang", "h2", "update", "load", "rule", "count", "insertRow", "addLink", "createTd"]
|
7
|
+
}
|
package/reports/index.html
CHANGED
@@ -4,6 +4,7 @@
|
|
4
4
|
<title>Diagnostic Report for Wikipedia Dumps</title>
|
5
5
|
<meta charset="utf-8">
|
6
6
|
<meta name="viewport" content="initial-scale=1.0, user-scalable=yes, minimum-scale=0.25, maximum-scale=5.0, width=device-width">
|
7
|
+
<link rel="icon" href="data:image/png;base64,iVBORw0KGgo=">
|
7
8
|
<link rel="stylesheet" href="reports.css">
|
8
9
|
<script defer="" src="dist/index.js"></script>
|
9
10
|
</head>
|
package/reports/reports.css
CHANGED
@@ -29,6 +29,7 @@ article {
|
|
29
29
|
display: block;
|
30
30
|
width: 100%;
|
31
31
|
padding: 1rem 2rem 2rem;
|
32
|
+
overflow-x: auto;
|
32
33
|
}
|
33
34
|
h2 {
|
34
35
|
font-size: 1.3em;
|
@@ -65,6 +66,7 @@ th {
|
|
65
66
|
}
|
66
67
|
.mono {
|
67
68
|
font-family: monospace;
|
69
|
+
white-space: pre;
|
68
70
|
}
|
69
71
|
|
70
72
|
@media screen and (max-width: 720px) {
|
package/reports/rule.html
CHANGED
@@ -4,6 +4,7 @@
|
|
4
4
|
<title>Diagnostic Report for Wikipedia</title>
|
5
5
|
<meta charset="utf-8">
|
6
6
|
<meta name="viewport" content="initial-scale=1.0, user-scalable=yes, minimum-scale=0.25, maximum-scale=5.0, width=device-width">
|
7
|
+
<link rel="icon" href="data:image/png;base64,iVBORw0KGgo=">
|
7
8
|
<link rel="stylesheet" href="reports.css">
|
8
9
|
<style>td:nth-child(-n+4):not(:first-child),td:last-child{text-align:center}</style>
|
9
10
|
<script defer="" src="dist/rule.js"></script>
|
@@ -27,7 +28,7 @@
|
|
27
28
|
<span id="end">200</span>
|
28
29
|
<a href="#" id="next" title="Next">→</a>
|
29
30
|
</div>
|
30
|
-
<table style="table-layout:fixed;width:100
|
31
|
+
<table style="table-layout:fixed;width:100%;min-width:900px">
|
31
32
|
<colgroup>
|
32
33
|
<col span="1">
|
33
34
|
<col span="1" style="width:calc(1.6em + 5ch)">
|
package/reports/wiki.html
CHANGED
@@ -4,6 +4,7 @@
|
|
4
4
|
<title>Diagnostic Report for Wikipedia</title>
|
5
5
|
<meta charset="utf-8">
|
6
6
|
<meta name="viewport" content="initial-scale=1.0, user-scalable=yes, minimum-scale=0.25, maximum-scale=5.0, width=device-width">
|
7
|
+
<link rel="icon" href="data:image/png;base64,iVBORw0KGgo=">
|
7
8
|
<link rel="stylesheet" href="reports.css">
|
8
9
|
<script defer="" src="dist/wiki.js"></script>
|
9
10
|
</head>
|
package/scan.sh
CHANGED
@@ -5,12 +5,18 @@ then
|
|
5
5
|
echo 'Example: npx lint-wiki-dumps zh-yue ~/Downloads/dumps'
|
6
6
|
exit 1
|
7
7
|
fi
|
8
|
-
site="${1}wiki" # example: zh-yuewiki
|
9
8
|
target="${1//-/_}wiki" # example: zh_yuewiki
|
10
|
-
|
11
|
-
|
9
|
+
npx getParserConfig "${1}wiki" "https://$1.wikipedia.org/w/"
|
10
|
+
bash download.sh "$target" "$2"
|
11
|
+
if (( $? == 1 ))
|
12
12
|
then
|
13
|
-
|
14
|
-
|
13
|
+
echo 'Switching to single-threaded mode'
|
14
|
+
node parser.js "$1" "$2/$target-latest-pages-articles.xml.bz2"
|
15
|
+
else
|
16
|
+
node parser-parallel.js "$1" "$2" "$4"
|
17
|
+
fi
|
18
|
+
if (( $? == 0))
|
19
|
+
then
|
20
|
+
echo 'Starting report generation'
|
21
|
+
node report.js "$1" "$3"
|
15
22
|
fi
|
16
|
-
node parser.js "$1" "$2/$file" "$4" "$5" && node report.js "$1" "$3"
|
package/server.js
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
"use strict";
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
4
|
+
};
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
6
|
+
const http_1 = require("http");
|
7
|
+
const path_1 = __importDefault(require("path"));
|
8
|
+
const fs_1 = __importDefault(require("fs"));
|
9
|
+
const port = parseInt(process.env['PORT'] || '8000');
|
10
|
+
(0, http_1.createServer)(({ url }, res) => {
|
11
|
+
if (!url || url === '/') {
|
12
|
+
url = 'index.html'; // eslint-disable-line no-param-reassign
|
13
|
+
}
|
14
|
+
const file = new URL(`http://localhost/${path_1.default.join('reports', url)}`).pathname.slice(1), ext = path_1.default.extname(file);
|
15
|
+
let contentType;
|
16
|
+
switch (ext) {
|
17
|
+
case '.js':
|
18
|
+
contentType = 'text/javascript';
|
19
|
+
break;
|
20
|
+
case '.css':
|
21
|
+
contentType = 'text/css';
|
22
|
+
break;
|
23
|
+
default:
|
24
|
+
contentType = 'text/html';
|
25
|
+
}
|
26
|
+
if (fs_1.default.existsSync(file)) {
|
27
|
+
res.writeHead(200, {
|
28
|
+
'content-type': contentType,
|
29
|
+
'x-content-type-options': 'nosniff',
|
30
|
+
'cache-control': `max-age=${60 * 60 * 24}, public`,
|
31
|
+
});
|
32
|
+
res.end(fs_1.default.readFileSync(file), 'utf8');
|
33
|
+
}
|
34
|
+
else {
|
35
|
+
res.writeHead(301, { Location: '/' });
|
36
|
+
res.end();
|
37
|
+
}
|
38
|
+
}).listen(port);
|
package/util.js
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
"use strict";
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
4
|
+
};
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
6
|
+
exports.normalize = exports.reading = exports.replaceTilde = exports.isArticle = exports.getErrors = exports.getTimestamp = exports.getXmlStream = exports.getWriteStream = exports.init = exports.getTempPath = exports.resultDir = exports.MAX = void 0;
|
7
|
+
const fs_1 = __importDefault(require("fs"));
|
8
|
+
const path_1 = __importDefault(require("path"));
|
9
|
+
const os_1 = __importDefault(require("os"));
|
10
|
+
const chalk_1 = __importDefault(require("chalk"));
|
11
|
+
const unbzip2_stream_1 = __importDefault(require("unbzip2-stream"));
|
12
|
+
const xml_stream_1 = __importDefault(require("xml-stream"));
|
13
|
+
exports.MAX = 100, exports.resultDir = path_1.default.join(__dirname, 'results');
|
14
|
+
const tempDir = path_1.default.join(__dirname, 'temp');
|
15
|
+
const getTempPath = (file) => path_1.default.join(tempDir, file);
|
16
|
+
exports.getTempPath = getTempPath;
|
17
|
+
const init = () => {
|
18
|
+
if (!fs_1.default.existsSync(exports.resultDir)) {
|
19
|
+
fs_1.default.mkdirSync(exports.resultDir);
|
20
|
+
}
|
21
|
+
if (!fs_1.default.existsSync(tempDir)) {
|
22
|
+
fs_1.default.mkdirSync(tempDir);
|
23
|
+
}
|
24
|
+
};
|
25
|
+
exports.init = init;
|
26
|
+
const getWriteStream = (file, callback) => {
|
27
|
+
const stream = fs_1.default.createWriteStream(file);
|
28
|
+
stream.write('{');
|
29
|
+
stream.on('close', callback);
|
30
|
+
return stream;
|
31
|
+
};
|
32
|
+
exports.getWriteStream = getWriteStream;
|
33
|
+
const getXmlStream = (file) => {
|
34
|
+
const readable = fs_1.default.createReadStream(file).pipe((0, unbzip2_stream_1.default)()), stream = new xml_stream_1.default(readable);
|
35
|
+
readable.on('error', e => {
|
36
|
+
console.error(chalk_1.default.red(`Error unzipping ${file}`));
|
37
|
+
throw e;
|
38
|
+
});
|
39
|
+
stream.preserve('text', true);
|
40
|
+
return stream;
|
41
|
+
};
|
42
|
+
exports.getXmlStream = getXmlStream;
|
43
|
+
const getTimestamp = (data) => {
|
44
|
+
if (!data) {
|
45
|
+
return undefined;
|
46
|
+
}
|
47
|
+
const i = data.indexOf('"#timestamp": "') + 15;
|
48
|
+
return new Date(data.slice(i, data.indexOf('"', i)));
|
49
|
+
};
|
50
|
+
exports.getTimestamp = getTimestamp;
|
51
|
+
const getErrors = (data, page) => {
|
52
|
+
const str = JSON.stringify(page), i = data.indexOf(`${str}: [`);
|
53
|
+
if (i === -1) {
|
54
|
+
return undefined;
|
55
|
+
}
|
56
|
+
const j = i + str.length + 2;
|
57
|
+
return data.slice(j, data.indexOf('\n]', j) + 2);
|
58
|
+
};
|
59
|
+
exports.getErrors = getErrors;
|
60
|
+
const isArticle = ($text, ns, model) => ns === '0' && model === 'wikitext' && Boolean($text);
|
61
|
+
exports.isArticle = isArticle;
|
62
|
+
const replaceTilde = (str) => str.replace(/^~/u, os_1.default.homedir());
|
63
|
+
exports.replaceTilde = replaceTilde;
|
64
|
+
const reading = (file) => {
|
65
|
+
console.log(chalk_1.default.green(`Reading ${file}`));
|
66
|
+
};
|
67
|
+
exports.reading = reading;
|
68
|
+
const normalize = (str) => str.replaceAll('-', '_');
|
69
|
+
exports.normalize = normalize;
|
package/scan-parallel.sh
DELETED
@@ -1,23 +0,0 @@
|
|
1
|
-
#!/usr/local/bin/bash
|
2
|
-
if (( $# < 2 ))
|
3
|
-
then
|
4
|
-
echo 'Usage: npx lint-wiki-dumps <language> <path to download> [path to HTML output]'
|
5
|
-
echo 'Example: npx lint-wiki-dumps zh-yue ~/Downloads/dumps'
|
6
|
-
exit 1
|
7
|
-
fi
|
8
|
-
site="${1}wiki" # example: zh-yuewiki
|
9
|
-
target="${1//-/_}wiki" # example: zh_yuewiki
|
10
|
-
files=$( \
|
11
|
-
curl -s "https://dumps.wikimedia.org/$target/latest/" \
|
12
|
-
| grep -o "href=\"$target-latest-pages-articles[0-9].*\.bz2\">" \
|
13
|
-
| gsed "s|href=\"|https://dumps.wikimedia.org/$target/latest/|;s|\">||" \
|
14
|
-
)
|
15
|
-
if (( ${#files} < 2 ))
|
16
|
-
then
|
17
|
-
echo 'Switching to single-threaded mode'
|
18
|
-
bash scan.sh "$1" "$2" "$3"
|
19
|
-
else
|
20
|
-
curl --output-dir "$2" --remote-name-all $files
|
21
|
-
npx getParserConfig "$site" "https://$1.wikipedia.org/w/"
|
22
|
-
node parser-parallel.js "$1" "$2" && node report.js "$1" "$3"
|
23
|
-
fi
|