@nitpicker/analyze-textlint 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts ADDED
@@ -0,0 +1,317 @@
1
+ import type { Violation } from '@nitpicker/types';
2
+ import type { TextlintMessage, TextlintRuleSeverityLevel } from '@textlint/types';
3
+
4
+ import { definePlugin } from '@nitpicker/core';
5
+ import { createLinter } from 'textlint';
6
+
7
+ /**
8
+ * A textlint rule configuration map.
9
+ * Keys are rule identifiers; values are `true` (enable with defaults),
10
+ * `false` (disable), or a rule-specific options object.
11
+ */
12
+ type Rule = Record<string, unknown>;
13
+
14
+ /**
15
+ * Plugin options for the textlint text-proofreading analysis.
16
+ */
17
+ type Options = {
18
+ /**
19
+ * Custom rule overrides merged on top of the default Japanese-oriented rule set.
20
+ * Set a rule to `false` to disable it; set to `true` or an options object to enable.
21
+ */
22
+ rules?: Rule;
23
+ };
24
+
25
+ const defaultRules: Rule = {
26
+ /**
27
+ * @see https://github.com/textlint-ja/textlint-rule-no-nfd
28
+ */
29
+ 'no-nfd': true,
30
+
31
+ /**
32
+ * @see https://github.com/textlint-ja/textlint-rule-max-ten
33
+ */
34
+ 'max-ten': {
35
+ max: 3,
36
+ },
37
+
38
+ /**
39
+ * @see https://github.com/azu/textlint-rule-spellcheck-tech-word
40
+ */
41
+ 'spellcheck-tech-word': true,
42
+
43
+ /**
44
+ * @see https://github.com/azu/textlint-rule-web-plus-db
45
+ */
46
+ 'web-plus-db': true,
47
+
48
+ /**
49
+ * @see https://github.com/textlint-ja/textlint-rule-no-mix-dearu-desumasu
50
+ */
51
+ // cspell:disable-next-line
52
+ 'no-mix-dearu-desumasu': {
53
+ preferInHeader: '',
54
+ preferInBody: '',
55
+ preferInList: '',
56
+ strict: false,
57
+ },
58
+
59
+ /**
60
+ * @see https://github.com/textlint-ja/textlint-rule-no-doubled-joshi
61
+ */
62
+ 'no-doubled-joshi': true,
63
+
64
+ /**
65
+ * @see https://github.com/textlint-ja/textlint-rule-no-double-negative-ja
66
+ */
67
+ 'no-double-negative-ja': true,
68
+
69
+ /**
70
+ * @see https://github.com/textlint-ja/textlint-rule-no-hankaku-kana
71
+ */
72
+ 'no-hankaku-kana': true, // cspell:disable-line
73
+
74
+ /**
75
+ * @see https://github.com/textlint-ja/textlint-rule-ja-no-abusage
76
+ */
77
+ 'ja-no-abusage': true,
78
+ 'no-mixed-zenkaku-and-hankaku-alphabet': true, // cspell:disable-line
79
+ 'no-dropping-the-ra': true,
80
+ 'no-doubled-conjunctive-particle-ga': true,
81
+ 'no-doubled-conjunction': true,
82
+ 'ja-no-mixed-period': true,
83
+
84
+ /**
85
+ * @see https://github.com/KeitaMoromizato/textlint-rule-max-appearence-count-of-words#readme
86
+ */
87
+ 'max-appearence-count-of-words': true, // cspell:disable-line
88
+ 'ja-hiragana-keishikimeishi': true, // cspell:disable-line
89
+ 'ja-hiragana-fukushi': true, // cspell:disable-line
90
+ 'ja-hiragana-hojodoushi': true, // cspell:disable-line
91
+ 'ja-unnatural-alphabet': true,
92
+ '@textlint-ja/textlint-rule-no-insert-dropping-sa': true,
93
+ 'prefer-tari-tari': true, // cspell:disable-line
94
+
95
+ /**
96
+ * @see https://github.com/textlint-ja/textlint-rule-no-synonyms
97
+ */
98
+ '@textlint-ja/no-synonyms': true,
99
+ };
100
+
101
+ /**
102
+ * Mapping from short rule identifiers to their npm package names.
103
+ *
104
+ * Most textlint rules follow the convention `textlint-rule-{id}`, but some
105
+ * (especially scoped packages like `@textlint-ja/*`) deviate.
106
+ * This map provides explicit overrides so that `loadModule()` can
107
+ * dynamically import the correct package for each rule.
108
+ */
109
+ const ruleImportMap: Record<string, string> = {
110
+ 'no-nfd': 'textlint-rule-no-nfd',
111
+ 'max-ten': 'textlint-rule-max-ten',
112
+ 'spellcheck-tech-word': 'textlint-rule-spellcheck-tech-word',
113
+ 'web-plus-db': 'textlint-rule-web-plus-db',
114
+ 'no-mix-dearu-desumasu': 'textlint-rule-no-mix-dearu-desumasu',
115
+ 'no-doubled-joshi': 'textlint-rule-no-doubled-joshi',
116
+ 'no-double-negative-ja': 'textlint-rule-no-double-negative-ja',
117
+ 'no-hankaku-kana': 'textlint-rule-no-hankaku-kana',
118
+ 'ja-no-abusage': 'textlint-rule-ja-no-abusage',
119
+ 'no-mixed-zenkaku-and-hankaku-alphabet':
120
+ 'textlint-rule-no-mixed-zenkaku-and-hankaku-alphabet',
121
+ 'no-dropping-the-ra': 'textlint-rule-no-dropping-the-ra',
122
+ 'no-doubled-conjunctive-particle-ga':
123
+ 'textlint-rule-no-doubled-conjunctive-particle-ga',
124
+ 'no-doubled-conjunction': 'textlint-rule-no-doubled-conjunction',
125
+ 'ja-no-mixed-period': 'textlint-rule-ja-no-mixed-period',
126
+ 'max-appearence-count-of-words': 'textlint-rule-max-appearence-count-of-words',
127
+ 'ja-hiragana-keishikimeishi': 'textlint-rule-ja-hiragana-keishikimeishi',
128
+ 'ja-hiragana-fukushi': 'textlint-rule-ja-hiragana-fukushi',
129
+ 'ja-hiragana-hojodoushi': 'textlint-rule-ja-hiragana-hojodoushi',
130
+ 'ja-unnatural-alphabet': 'textlint-rule-ja-unnatural-alphabet',
131
+ '@textlint-ja/textlint-rule-no-insert-dropping-sa':
132
+ '@textlint-ja/textlint-rule-no-insert-dropping-sa',
133
+ 'prefer-tari-tari': 'textlint-rule-prefer-tari-tari',
134
+ '@textlint-ja/no-synonyms': '@textlint-ja/textlint-rule-no-synonyms',
135
+ };
136
+
137
+ /**
138
+ * Dynamically imports a module and resolves CJS/ESM default-export interop.
139
+ *
140
+ * Many textlint rules are published as CommonJS modules. When imported
141
+ * via dynamic `import()` in an ESM context, Node wraps the CJS export
142
+ * in `{ default: ... }`. Some bundlers double-wrap this, producing
143
+ * `{ default: { default: actualExport } }`. The fallback chain
144
+ * `mod.default?.default ?? mod.default ?? mod` handles all three cases:
145
+ *
146
+ * 1. Double-wrapped CJS: `mod.default.default`
147
+ * 2. Single-wrapped CJS: `mod.default`
148
+ * 3. Native ESM: `mod` (no `.default` property)
149
+ * @param moduleName - The npm package name to import.
150
+ * @returns The resolved module export (typically a rule constructor).
151
+ */
152
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
153
+ async function loadModule(moduleName: string): Promise<any> {
154
+ const mod = await import(moduleName);
155
+ // moduleInterop equivalent: handle default export for CJS/ESM interop
156
+ return mod.default?.default ?? mod.default ?? mod;
157
+ }
158
+
159
+ /**
160
+ * Constructs a textlint `Linter` instance with the given rule set.
161
+ *
162
+ * Rules are loaded dynamically via `loadModule()` to support the mix of
163
+ * CJS and ESM packages in the textlint ecosystem. The HTML plugin is
164
+ * always registered so that raw HTML can be linted directly without
165
+ * first converting to Markdown.
166
+ * @param rules - Merged rule configuration (defaults + user overrides).
167
+ * @returns A configured textlint `Linter` ready for `lintText()` calls.
168
+ */
169
+ async function buildLinter(rules: Rule) {
170
+ const { TextlintKernelDescriptor } = await import('@textlint/kernel');
171
+
172
+ const ruleDescriptors = await Promise.all(
173
+ Object.entries(rules)
174
+ .filter(([, value]) => value !== false)
175
+ .map(async ([ruleId, options]) => {
176
+ const moduleName = ruleImportMap[ruleId] ?? `textlint-rule-${ruleId}`;
177
+ const rule = await loadModule(moduleName);
178
+ return {
179
+ ruleId,
180
+ rule,
181
+ options: options === true ? {} : (options as Record<string, unknown>),
182
+ };
183
+ }),
184
+ );
185
+
186
+ const htmlPlugin = await loadModule('textlint-plugin-html');
187
+
188
+ const descriptor = new TextlintKernelDescriptor({
189
+ rules: ruleDescriptors,
190
+ plugins: [
191
+ {
192
+ pluginId: 'html',
193
+ plugin: htmlPlugin,
194
+ },
195
+ ],
196
+ filterRules: [],
197
+ });
198
+
199
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
200
+ return createLinter({ descriptor: descriptor as any });
201
+ }
202
+
203
+ /**
204
+ * Per-page linting report before it is mapped to Violation objects.
205
+ */
206
+ type Report = {
207
+ /** The URL of the page that was linted. */
208
+ url: string;
209
+ /** Raw textlint messages (warnings/errors) for the page. */
210
+ results: TextlintMessage[];
211
+ };
212
+
213
+ /**
214
+ * Analyze plugin that runs textlint Japanese text-proofreading rules
215
+ * against each page's HTML.
216
+ *
217
+ * The default rule set is heavily Japanese-oriented (mixed script detection,
218
+ * doubled particles, honorific misuse, etc.) because the primary use case
219
+ * is auditing Japanese corporate websites. Users can override or extend
220
+ * rules via the `rules` option.
221
+ *
222
+ * ## Lazy linter initialization
223
+ *
224
+ * Building the linter is expensive because it dynamically imports 20+
225
+ * rule packages (many of which are CJS and require interop resolution).
226
+ * The linter is therefore created lazily on the first `eachPage` call
227
+ * and the resulting promise is cached for all subsequent pages.
228
+ *
229
+ * This "lazy singleton" pattern (`linterPromise` variable) ensures:
230
+ * 1. Zero startup cost if textlint is configured but no pages match.
231
+ * 2. No duplicate initialization even under concurrent `eachPage` calls,
232
+ * because the same promise is shared (Promise deduplication).
233
+ * @example
234
+ * ```jsonc
235
+ * // nitpicker.config.json
236
+ * {
237
+ * "plugins": {
238
+ * "analyze": {
239
+ * "@nitpicker/analyze-textlint": {
240
+ * "rules": {
241
+ * "max-ten": { "max": 5 },
242
+ * "spellcheck-tech-word": false
243
+ * }
244
+ * }
245
+ * }
246
+ * }
247
+ * }
248
+ * ```
249
+ */
250
+ export default definePlugin((options: Options) => {
251
+ const rules = { ...defaultRules, ...options.rules };
252
+ let linterPromise: Promise<ReturnType<typeof createLinter>> | undefined;
253
+
254
+ /**
255
+ * Returns a shared linter promise, creating it on first call.
256
+ * Subsequent calls return the same promise (lazy singleton pattern).
257
+ */
258
+ function getLinter() {
259
+ if (!linterPromise) {
260
+ linterPromise = buildLinter(rules);
261
+ }
262
+ return linterPromise;
263
+ }
264
+
265
+ return {
266
+ label: 'textlint: テキスト校正',
267
+ async eachPage({ html, url }) {
268
+ const linter = await getLinter();
269
+ const reports: Report[] = [];
270
+
271
+ const result = await linter.lintText(html, url.pathname + '.html');
272
+ reports.push({
273
+ url: url.href,
274
+ results: result.messages,
275
+ });
276
+
277
+ const violations = reports.flatMap((report) => {
278
+ return report.results.map<Violation>((r) => {
279
+ return {
280
+ validator: 'textlint',
281
+ severity: convertSeverity(r.severity),
282
+ rule: r.ruleId,
283
+ code: '-',
284
+ message: `${r.message}`,
285
+ url: `${report.url} (${r.line}:${r.column})`,
286
+ };
287
+ });
288
+ });
289
+
290
+ return {
291
+ violations,
292
+ };
293
+ },
294
+ };
295
+ });
296
+
297
+ /**
298
+ * Maps textlint's numeric severity levels to Nitpicker's string-based severity.
299
+ *
300
+ * textlint uses `1` for warning and `2` for error, following ESLint convention.
301
+ * Any unexpected value defaults to `"error"` for safety.
302
+ * @param severity - The textlint severity level (1 = warning, 2 = error).
303
+ * @returns The corresponding Nitpicker severity string.
304
+ */
305
+ function convertSeverity(severity: TextlintRuleSeverityLevel) {
306
+ switch (severity) {
307
+ case 1: {
308
+ return 'warning';
309
+ }
310
+ case 2: {
311
+ return 'error';
312
+ }
313
+ default: {
314
+ return 'error';
315
+ }
316
+ }
317
+ }
package/src/list-up.ts ADDED
@@ -0,0 +1,93 @@
1
+ /**
2
+ * Standalone script that lints all HTML files in the current directory
3
+ * for orthographic variant inconsistencies across an entire site.
4
+ *
5
+ * Unlike the main analyze-textlint plugin (which lints each page independently),
6
+ * this script concatenates all pages into a single document before linting.
7
+ * This enables cross-page rules like `ja-no-orthographic-variants` to detect
8
+ * inconsistent word usage across different pages (e.g. using both "サーバー"
9
+ * and "サーバ" on different pages of the same site).
10
+ *
11
+ * The output is deduplicated and written to `result.txt` for manual review.
12
+ *
13
+ * Usage: `npx tsx list-up.ts` (from the directory containing HTML files)
14
+ */
15
+ import type { TextlintMessage } from '@textlint/types';
16
+
17
+ import fs from 'node:fs/promises';
18
+ import path from 'node:path';
19
+
20
+ import { glob } from 'glob';
21
+ import { JSDOM } from 'jsdom';
22
+ import { createLinter } from 'textlint';
23
+ import TurndownService from 'turndown';
24
+
25
+ const turndownService = new TurndownService();
26
+
27
+ const globPath = path.resolve(process.cwd(), '**', '*.html');
28
+ const files = await glob(globPath);
29
+
30
+ const pages = await Promise.all(
31
+ files.map(async (filePath) => {
32
+ let html = await fs.readFile(filePath, { encoding: 'utf8' });
33
+ html = html.replaceAll(/\s+/g, ' ');
34
+ const md = turndownService.turndown(html);
35
+
36
+ const dom = new JSDOM(html);
37
+ const title = dom.window.document.title;
38
+
39
+ return `# ${title}\n\n${md}`;
40
+ }),
41
+ );
42
+
43
+ const rulesConfig: Record<string, unknown> = {
44
+ /**
45
+ * @see https://github.com/textlint-ja/textlint-rule-no-synonyms
46
+ */
47
+ // '@textlint-ja/no-synonyms': true,
48
+ 'ja-no-orthographic-variants': true,
49
+ };
50
+
51
+ /**
52
+ * Dynamically imports a module with CJS/ESM default-export interop.
53
+ * See `index.ts#loadModule` for the detailed interop explanation.
54
+ * @param moduleName - npm package name to import.
55
+ * @returns The resolved module export.
56
+ */
57
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
58
+ async function loadModule(moduleName: string): Promise<any> {
59
+ const mod = await import(moduleName);
60
+ return mod.default?.default ?? mod.default ?? mod;
61
+ }
62
+
63
+ const { TextlintKernelDescriptor } = await import('@textlint/kernel');
64
+
65
+ const ruleDescriptors = await Promise.all(
66
+ Object.entries(rulesConfig)
67
+ .filter(([, value]) => value !== false)
68
+ .map(async ([ruleId, options]) => {
69
+ const rule = await loadModule(`textlint-rule-${ruleId}`);
70
+ return {
71
+ ruleId,
72
+ rule,
73
+ options: options === true ? {} : (options as Record<string, unknown>),
74
+ };
75
+ }),
76
+ );
77
+
78
+ const descriptor = new TextlintKernelDescriptor({
79
+ rules: ruleDescriptors,
80
+ plugins: [],
81
+ filterRules: [],
82
+ });
83
+
84
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
85
+ const linter = createLinter({ descriptor: descriptor as any });
86
+
87
+ const result = await linter.lintText(pages.join('\n\n'), '.md');
88
+
89
+ const messages = result.messages.map((m: TextlintMessage) => m.message);
90
+
91
+ const resultSet = new Set(messages);
92
+
93
+ await fs.writeFile('result.txt', [...resultSet].join('\n'), { encoding: 'utf8' });
package/tsconfig.json ADDED
@@ -0,0 +1,11 @@
1
+ {
2
+ "extends": "../../../tsconfig.json",
3
+ "compilerOptions": {
4
+ "composite": true,
5
+ "outDir": "./lib",
6
+ "rootDir": "./src"
7
+ },
8
+ "references": [{ "path": "../core" }],
9
+ "include": ["./src/**/*"],
10
+ "exclude": ["node_modules", "lib", "./src/**/*.spec.ts"]
11
+ }