@nitpicker/analyze-textlint 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/LICENSE +191 -0
- package/README.md +13 -0
- package/lib/index.d.ts +55 -0
- package/lib/index.js +256 -0
- package/lib/list-up.d.ts +1 -0
- package/lib/list-up.js +57 -0
- package/package.json +65 -0
- package/src/index.ts +317 -0
- package/src/list-up.ts +93 -0
- package/tsconfig.json +11 -0
- package/tsconfig.tsbuildinfo +1 -0
package/src/index.ts
ADDED
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
import type { Violation } from '@nitpicker/types';
|
|
2
|
+
import type { TextlintMessage, TextlintRuleSeverityLevel } from '@textlint/types';
|
|
3
|
+
|
|
4
|
+
import { definePlugin } from '@nitpicker/core';
|
|
5
|
+
import { createLinter } from 'textlint';
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* A textlint rule configuration map.
|
|
9
|
+
* Keys are rule identifiers; values are `true` (enable with defaults),
|
|
10
|
+
* `false` (disable), or a rule-specific options object.
|
|
11
|
+
*/
|
|
12
|
+
type Rule = Record<string, unknown>;
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Plugin options for the textlint text-proofreading analysis.
|
|
16
|
+
*/
|
|
17
|
+
type Options = {
|
|
18
|
+
/**
|
|
19
|
+
* Custom rule overrides merged on top of the default Japanese-oriented rule set.
|
|
20
|
+
* Set a rule to `false` to disable it; set to `true` or an options object to enable.
|
|
21
|
+
*/
|
|
22
|
+
rules?: Rule;
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
const defaultRules: Rule = {
|
|
26
|
+
/**
|
|
27
|
+
* @see https://github.com/textlint-ja/textlint-rule-no-nfd
|
|
28
|
+
*/
|
|
29
|
+
'no-nfd': true,
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* @see https://github.com/textlint-ja/textlint-rule-max-ten
|
|
33
|
+
*/
|
|
34
|
+
'max-ten': {
|
|
35
|
+
max: 3,
|
|
36
|
+
},
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* @see https://github.com/azu/textlint-rule-spellcheck-tech-word
|
|
40
|
+
*/
|
|
41
|
+
'spellcheck-tech-word': true,
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* @see https://github.com/azu/textlint-rule-web-plus-db
|
|
45
|
+
*/
|
|
46
|
+
'web-plus-db': true,
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* @see https://github.com/textlint-ja/textlint-rule-no-mix-dearu-desumasu
|
|
50
|
+
*/
|
|
51
|
+
// cspell:disable-next-line
|
|
52
|
+
'no-mix-dearu-desumasu': {
|
|
53
|
+
preferInHeader: '',
|
|
54
|
+
preferInBody: '',
|
|
55
|
+
preferInList: '',
|
|
56
|
+
strict: false,
|
|
57
|
+
},
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* @see https://github.com/textlint-ja/textlint-rule-no-doubled-joshi
|
|
61
|
+
*/
|
|
62
|
+
'no-doubled-joshi': true,
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* @see https://github.com/textlint-ja/textlint-rule-no-double-negative-ja
|
|
66
|
+
*/
|
|
67
|
+
'no-double-negative-ja': true,
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* @see https://github.com/textlint-ja/textlint-rule-no-hankaku-kana
|
|
71
|
+
*/
|
|
72
|
+
'no-hankaku-kana': true, // cspell:disable-line
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* @see https://github.com/textlint-ja/textlint-rule-ja-no-abusage
|
|
76
|
+
*/
|
|
77
|
+
'ja-no-abusage': true,
|
|
78
|
+
'no-mixed-zenkaku-and-hankaku-alphabet': true, // cspell:disable-line
|
|
79
|
+
'no-dropping-the-ra': true,
|
|
80
|
+
'no-doubled-conjunctive-particle-ga': true,
|
|
81
|
+
'no-doubled-conjunction': true,
|
|
82
|
+
'ja-no-mixed-period': true,
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* @see https://github.com/KeitaMoromizato/textlint-rule-max-appearence-count-of-words#readme
|
|
86
|
+
*/
|
|
87
|
+
'max-appearence-count-of-words': true, // cspell:disable-line
|
|
88
|
+
'ja-hiragana-keishikimeishi': true, // cspell:disable-line
|
|
89
|
+
'ja-hiragana-fukushi': true, // cspell:disable-line
|
|
90
|
+
'ja-hiragana-hojodoushi': true, // cspell:disable-line
|
|
91
|
+
'ja-unnatural-alphabet': true,
|
|
92
|
+
'@textlint-ja/textlint-rule-no-insert-dropping-sa': true,
|
|
93
|
+
'prefer-tari-tari': true, // cspell:disable-line
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* @see https://github.com/textlint-ja/textlint-rule-no-synonyms
|
|
97
|
+
*/
|
|
98
|
+
'@textlint-ja/no-synonyms': true,
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Mapping from short rule identifiers to their npm package names.
|
|
103
|
+
*
|
|
104
|
+
* Most textlint rules follow the convention `textlint-rule-{id}`, but some
|
|
105
|
+
* (especially scoped packages like `@textlint-ja/*`) deviate.
|
|
106
|
+
* This map provides explicit overrides so that `loadModule()` can
|
|
107
|
+
* dynamically import the correct package for each rule.
|
|
108
|
+
*/
|
|
109
|
+
const ruleImportMap: Record<string, string> = {
|
|
110
|
+
'no-nfd': 'textlint-rule-no-nfd',
|
|
111
|
+
'max-ten': 'textlint-rule-max-ten',
|
|
112
|
+
'spellcheck-tech-word': 'textlint-rule-spellcheck-tech-word',
|
|
113
|
+
'web-plus-db': 'textlint-rule-web-plus-db',
|
|
114
|
+
'no-mix-dearu-desumasu': 'textlint-rule-no-mix-dearu-desumasu',
|
|
115
|
+
'no-doubled-joshi': 'textlint-rule-no-doubled-joshi',
|
|
116
|
+
'no-double-negative-ja': 'textlint-rule-no-double-negative-ja',
|
|
117
|
+
'no-hankaku-kana': 'textlint-rule-no-hankaku-kana',
|
|
118
|
+
'ja-no-abusage': 'textlint-rule-ja-no-abusage',
|
|
119
|
+
'no-mixed-zenkaku-and-hankaku-alphabet':
|
|
120
|
+
'textlint-rule-no-mixed-zenkaku-and-hankaku-alphabet',
|
|
121
|
+
'no-dropping-the-ra': 'textlint-rule-no-dropping-the-ra',
|
|
122
|
+
'no-doubled-conjunctive-particle-ga':
|
|
123
|
+
'textlint-rule-no-doubled-conjunctive-particle-ga',
|
|
124
|
+
'no-doubled-conjunction': 'textlint-rule-no-doubled-conjunction',
|
|
125
|
+
'ja-no-mixed-period': 'textlint-rule-ja-no-mixed-period',
|
|
126
|
+
'max-appearence-count-of-words': 'textlint-rule-max-appearence-count-of-words',
|
|
127
|
+
'ja-hiragana-keishikimeishi': 'textlint-rule-ja-hiragana-keishikimeishi',
|
|
128
|
+
'ja-hiragana-fukushi': 'textlint-rule-ja-hiragana-fukushi',
|
|
129
|
+
'ja-hiragana-hojodoushi': 'textlint-rule-ja-hiragana-hojodoushi',
|
|
130
|
+
'ja-unnatural-alphabet': 'textlint-rule-ja-unnatural-alphabet',
|
|
131
|
+
'@textlint-ja/textlint-rule-no-insert-dropping-sa':
|
|
132
|
+
'@textlint-ja/textlint-rule-no-insert-dropping-sa',
|
|
133
|
+
'prefer-tari-tari': 'textlint-rule-prefer-tari-tari',
|
|
134
|
+
'@textlint-ja/no-synonyms': '@textlint-ja/textlint-rule-no-synonyms',
|
|
135
|
+
};
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Dynamically imports a module and resolves CJS/ESM default-export interop.
|
|
139
|
+
*
|
|
140
|
+
* Many textlint rules are published as CommonJS modules. When imported
|
|
141
|
+
* via dynamic `import()` in an ESM context, Node wraps the CJS export
|
|
142
|
+
* in `{ default: ... }`. Some bundlers double-wrap this, producing
|
|
143
|
+
* `{ default: { default: actualExport } }`. The fallback chain
|
|
144
|
+
* `mod.default?.default ?? mod.default ?? mod` handles all three cases:
|
|
145
|
+
*
|
|
146
|
+
* 1. Double-wrapped CJS: `mod.default.default`
|
|
147
|
+
* 2. Single-wrapped CJS: `mod.default`
|
|
148
|
+
* 3. Native ESM: `mod` (no `.default` property)
|
|
149
|
+
* @param moduleName - The npm package name to import.
|
|
150
|
+
* @returns The resolved module export (typically a rule constructor).
|
|
151
|
+
*/
|
|
152
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
153
|
+
async function loadModule(moduleName: string): Promise<any> {
|
|
154
|
+
const mod = await import(moduleName);
|
|
155
|
+
// moduleInterop equivalent: handle default export for CJS/ESM interop
|
|
156
|
+
return mod.default?.default ?? mod.default ?? mod;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Constructs a textlint `Linter` instance with the given rule set.
|
|
161
|
+
*
|
|
162
|
+
* Rules are loaded dynamically via `loadModule()` to support the mix of
|
|
163
|
+
* CJS and ESM packages in the textlint ecosystem. The HTML plugin is
|
|
164
|
+
* always registered so that raw HTML can be linted directly without
|
|
165
|
+
* first converting to Markdown.
|
|
166
|
+
* @param rules - Merged rule configuration (defaults + user overrides).
|
|
167
|
+
* @returns A configured textlint `Linter` ready for `lintText()` calls.
|
|
168
|
+
*/
|
|
169
|
+
async function buildLinter(rules: Rule) {
|
|
170
|
+
const { TextlintKernelDescriptor } = await import('@textlint/kernel');
|
|
171
|
+
|
|
172
|
+
const ruleDescriptors = await Promise.all(
|
|
173
|
+
Object.entries(rules)
|
|
174
|
+
.filter(([, value]) => value !== false)
|
|
175
|
+
.map(async ([ruleId, options]) => {
|
|
176
|
+
const moduleName = ruleImportMap[ruleId] ?? `textlint-rule-${ruleId}`;
|
|
177
|
+
const rule = await loadModule(moduleName);
|
|
178
|
+
return {
|
|
179
|
+
ruleId,
|
|
180
|
+
rule,
|
|
181
|
+
options: options === true ? {} : (options as Record<string, unknown>),
|
|
182
|
+
};
|
|
183
|
+
}),
|
|
184
|
+
);
|
|
185
|
+
|
|
186
|
+
const htmlPlugin = await loadModule('textlint-plugin-html');
|
|
187
|
+
|
|
188
|
+
const descriptor = new TextlintKernelDescriptor({
|
|
189
|
+
rules: ruleDescriptors,
|
|
190
|
+
plugins: [
|
|
191
|
+
{
|
|
192
|
+
pluginId: 'html',
|
|
193
|
+
plugin: htmlPlugin,
|
|
194
|
+
},
|
|
195
|
+
],
|
|
196
|
+
filterRules: [],
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
200
|
+
return createLinter({ descriptor: descriptor as any });
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Per-page linting report before it is mapped to Violation objects.
|
|
205
|
+
*/
|
|
206
|
+
type Report = {
|
|
207
|
+
/** The URL of the page that was linted. */
|
|
208
|
+
url: string;
|
|
209
|
+
/** Raw textlint messages (warnings/errors) for the page. */
|
|
210
|
+
results: TextlintMessage[];
|
|
211
|
+
};
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* Analyze plugin that runs textlint Japanese text-proofreading rules
|
|
215
|
+
* against each page's HTML.
|
|
216
|
+
*
|
|
217
|
+
* The default rule set is heavily Japanese-oriented (mixed script detection,
|
|
218
|
+
* doubled particles, honorific misuse, etc.) because the primary use case
|
|
219
|
+
* is auditing Japanese corporate websites. Users can override or extend
|
|
220
|
+
* rules via the `rules` option.
|
|
221
|
+
*
|
|
222
|
+
* ## Lazy linter initialization
|
|
223
|
+
*
|
|
224
|
+
* Building the linter is expensive because it dynamically imports 20+
|
|
225
|
+
* rule packages (many of which are CJS and require interop resolution).
|
|
226
|
+
* The linter is therefore created lazily on the first `eachPage` call
|
|
227
|
+
* and the resulting promise is cached for all subsequent pages.
|
|
228
|
+
*
|
|
229
|
+
* This "lazy singleton" pattern (`linterPromise` variable) ensures:
|
|
230
|
+
* 1. Zero startup cost if textlint is configured but no pages match.
|
|
231
|
+
* 2. No duplicate initialization even under concurrent `eachPage` calls,
|
|
232
|
+
* because the same promise is shared (Promise deduplication).
|
|
233
|
+
* @example
|
|
234
|
+
* ```jsonc
|
|
235
|
+
* // nitpicker.config.json
|
|
236
|
+
* {
|
|
237
|
+
* "plugins": {
|
|
238
|
+
* "analyze": {
|
|
239
|
+
* "@nitpicker/analyze-textlint": {
|
|
240
|
+
* "rules": {
|
|
241
|
+
* "max-ten": { "max": 5 },
|
|
242
|
+
* "spellcheck-tech-word": false
|
|
243
|
+
* }
|
|
244
|
+
* }
|
|
245
|
+
* }
|
|
246
|
+
* }
|
|
247
|
+
* }
|
|
248
|
+
* ```
|
|
249
|
+
*/
|
|
250
|
+
export default definePlugin((options: Options) => {
|
|
251
|
+
const rules = { ...defaultRules, ...options.rules };
|
|
252
|
+
let linterPromise: Promise<ReturnType<typeof createLinter>> | undefined;
|
|
253
|
+
|
|
254
|
+
/**
|
|
255
|
+
* Returns a shared linter promise, creating it on first call.
|
|
256
|
+
* Subsequent calls return the same promise (lazy singleton pattern).
|
|
257
|
+
*/
|
|
258
|
+
function getLinter() {
|
|
259
|
+
if (!linterPromise) {
|
|
260
|
+
linterPromise = buildLinter(rules);
|
|
261
|
+
}
|
|
262
|
+
return linterPromise;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
return {
|
|
266
|
+
label: 'textlint: テキスト校正',
|
|
267
|
+
async eachPage({ html, url }) {
|
|
268
|
+
const linter = await getLinter();
|
|
269
|
+
const reports: Report[] = [];
|
|
270
|
+
|
|
271
|
+
const result = await linter.lintText(html, url.pathname + '.html');
|
|
272
|
+
reports.push({
|
|
273
|
+
url: url.href,
|
|
274
|
+
results: result.messages,
|
|
275
|
+
});
|
|
276
|
+
|
|
277
|
+
const violations = reports.flatMap((report) => {
|
|
278
|
+
return report.results.map<Violation>((r) => {
|
|
279
|
+
return {
|
|
280
|
+
validator: 'textlint',
|
|
281
|
+
severity: convertSeverity(r.severity),
|
|
282
|
+
rule: r.ruleId,
|
|
283
|
+
code: '-',
|
|
284
|
+
message: `${r.message}`,
|
|
285
|
+
url: `${report.url} (${r.line}:${r.column})`,
|
|
286
|
+
};
|
|
287
|
+
});
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
return {
|
|
291
|
+
violations,
|
|
292
|
+
};
|
|
293
|
+
},
|
|
294
|
+
};
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
/**
|
|
298
|
+
* Maps textlint's numeric severity levels to Nitpicker's string-based severity.
|
|
299
|
+
*
|
|
300
|
+
* textlint uses `1` for warning and `2` for error, following ESLint convention.
|
|
301
|
+
* Any unexpected value defaults to `"error"` for safety.
|
|
302
|
+
* @param severity - The textlint severity level (1 = warning, 2 = error).
|
|
303
|
+
* @returns The corresponding Nitpicker severity string.
|
|
304
|
+
*/
|
|
305
|
+
function convertSeverity(severity: TextlintRuleSeverityLevel) {
|
|
306
|
+
switch (severity) {
|
|
307
|
+
case 1: {
|
|
308
|
+
return 'warning';
|
|
309
|
+
}
|
|
310
|
+
case 2: {
|
|
311
|
+
return 'error';
|
|
312
|
+
}
|
|
313
|
+
default: {
|
|
314
|
+
return 'error';
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
}
|
package/src/list-up.ts
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Standalone script that lints all HTML files in the current directory
|
|
3
|
+
* for orthographic variant inconsistencies across an entire site.
|
|
4
|
+
*
|
|
5
|
+
* Unlike the main analyze-textlint plugin (which lints each page independently),
|
|
6
|
+
* this script concatenates all pages into a single document before linting.
|
|
7
|
+
* This enables cross-page rules like `ja-no-orthographic-variants` to detect
|
|
8
|
+
* inconsistent word usage across different pages (e.g. using both "サーバー"
|
|
9
|
+
* and "サーバ" on different pages of the same site).
|
|
10
|
+
*
|
|
11
|
+
* The output is deduplicated and written to `result.txt` for manual review.
|
|
12
|
+
*
|
|
13
|
+
* Usage: `npx tsx list-up.ts` (from the directory containing HTML files)
|
|
14
|
+
*/
|
|
15
|
+
import type { TextlintMessage } from '@textlint/types';
|
|
16
|
+
|
|
17
|
+
import fs from 'node:fs/promises';
|
|
18
|
+
import path from 'node:path';
|
|
19
|
+
|
|
20
|
+
import { glob } from 'glob';
|
|
21
|
+
import { JSDOM } from 'jsdom';
|
|
22
|
+
import { createLinter } from 'textlint';
|
|
23
|
+
import TurndownService from 'turndown';
|
|
24
|
+
|
|
25
|
+
const turndownService = new TurndownService();
|
|
26
|
+
|
|
27
|
+
const globPath = path.resolve(process.cwd(), '**', '*.html');
|
|
28
|
+
const files = await glob(globPath);
|
|
29
|
+
|
|
30
|
+
const pages = await Promise.all(
|
|
31
|
+
files.map(async (filePath) => {
|
|
32
|
+
let html = await fs.readFile(filePath, { encoding: 'utf8' });
|
|
33
|
+
html = html.replaceAll(/\s+/g, ' ');
|
|
34
|
+
const md = turndownService.turndown(html);
|
|
35
|
+
|
|
36
|
+
const dom = new JSDOM(html);
|
|
37
|
+
const title = dom.window.document.title;
|
|
38
|
+
|
|
39
|
+
return `# ${title}\n\n${md}`;
|
|
40
|
+
}),
|
|
41
|
+
);
|
|
42
|
+
|
|
43
|
+
const rulesConfig: Record<string, unknown> = {
|
|
44
|
+
/**
|
|
45
|
+
* @see https://github.com/textlint-ja/textlint-rule-no-synonyms
|
|
46
|
+
*/
|
|
47
|
+
// '@textlint-ja/no-synonyms': true,
|
|
48
|
+
'ja-no-orthographic-variants': true,
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Dynamically imports a module with CJS/ESM default-export interop.
|
|
53
|
+
* See `index.ts#loadModule` for the detailed interop explanation.
|
|
54
|
+
* @param moduleName - npm package name to import.
|
|
55
|
+
* @returns The resolved module export.
|
|
56
|
+
*/
|
|
57
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
58
|
+
async function loadModule(moduleName: string): Promise<any> {
|
|
59
|
+
const mod = await import(moduleName);
|
|
60
|
+
return mod.default?.default ?? mod.default ?? mod;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const { TextlintKernelDescriptor } = await import('@textlint/kernel');
|
|
64
|
+
|
|
65
|
+
const ruleDescriptors = await Promise.all(
|
|
66
|
+
Object.entries(rulesConfig)
|
|
67
|
+
.filter(([, value]) => value !== false)
|
|
68
|
+
.map(async ([ruleId, options]) => {
|
|
69
|
+
const rule = await loadModule(`textlint-rule-${ruleId}`);
|
|
70
|
+
return {
|
|
71
|
+
ruleId,
|
|
72
|
+
rule,
|
|
73
|
+
options: options === true ? {} : (options as Record<string, unknown>),
|
|
74
|
+
};
|
|
75
|
+
}),
|
|
76
|
+
);
|
|
77
|
+
|
|
78
|
+
const descriptor = new TextlintKernelDescriptor({
|
|
79
|
+
rules: ruleDescriptors,
|
|
80
|
+
plugins: [],
|
|
81
|
+
filterRules: [],
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
85
|
+
const linter = createLinter({ descriptor: descriptor as any });
|
|
86
|
+
|
|
87
|
+
const result = await linter.lintText(pages.join('\n\n'), '.md');
|
|
88
|
+
|
|
89
|
+
const messages = result.messages.map((m: TextlintMessage) => m.message);
|
|
90
|
+
|
|
91
|
+
const resultSet = new Set(messages);
|
|
92
|
+
|
|
93
|
+
await fs.writeFile('result.txt', [...resultSet].join('\n'), { encoding: 'utf8' });
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
{
|
|
2
|
+
"extends": "../../../tsconfig.json",
|
|
3
|
+
"compilerOptions": {
|
|
4
|
+
"composite": true,
|
|
5
|
+
"outDir": "./lib",
|
|
6
|
+
"rootDir": "./src"
|
|
7
|
+
},
|
|
8
|
+
"references": [{ "path": "../core" }],
|
|
9
|
+
"include": ["./src/**/*"],
|
|
10
|
+
"exclude": ["node_modules", "lib", "./src/**/*.spec.ts"]
|
|
11
|
+
}
|