mediac 1.5.0 → 1.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cmd/cmd_compress.js +1 -1
- package/cmd/cmd_decode.js +98 -0
- package/cmd/cmd_fixname.js +32 -10
- package/cmd/cmd_prefix.js +2 -0
- package/cmd/cmd_shared.js +2 -0
- package/lib/core.js +5 -0
- package/lib/encoding.js +88 -76
- package/lib/file.js +1 -1
- package/lib/helper.js +6 -0
- package/lib/messy_hanzi.txt +1 -1
- package/lib/unicode.js +12 -10
- package/lib/unicode_data.json +1 -1
- package/package.json +2 -1
- package/scripts/media_cli.js +7 -5
- package/scripts/path_test.js +14 -0
- package/scripts/unicode_test.js +5 -5
- package/scripts/zip_test.js +0 -1
- package/scripts/fix_messy.js +0 -59
package/cmd/cmd_compress.js
CHANGED
|
@@ -102,7 +102,7 @@ const handler = async function cmdCompress(argv) {
|
|
|
102
102
|
const purgeSource = argv.purge || false;
|
|
103
103
|
log.show(`${logTag} input:`, root);
|
|
104
104
|
|
|
105
|
-
const RE_THUMB = /Z4K|M4K|feature|web|thumb$/i;
|
|
105
|
+
const RE_THUMB = /Z4K|P4K|M4K|feature|web|thumb$/i;
|
|
106
106
|
const walkOpts = {
|
|
107
107
|
needStats: true,
|
|
108
108
|
entryFilter: (f) =>
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
|
|
2
|
+
import chardet from 'chardet';
|
|
3
|
+
import * as log from '../lib/debug.js';
|
|
4
|
+
import * as enc from '../lib/encoding.js';
|
|
5
|
+
import * as unicode from '../lib/unicode.js';
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
const ENC_LIST = [
|
|
9
|
+
'ISO-8859-1',
|
|
10
|
+
'UTF8',
|
|
11
|
+
'UTF-16',
|
|
12
|
+
'GBK',
|
|
13
|
+
'BIG5',
|
|
14
|
+
'SHIFT_JIS',
|
|
15
|
+
'EUC-JP',
|
|
16
|
+
'EUC-KR',
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
export { aliases, builder, command, describe, handler };
|
|
20
|
+
const command = "decode <strings...>"
|
|
21
|
+
const aliases = ["dc"]
|
|
22
|
+
const describe = 'Decode text with messy or invalid chars'
|
|
23
|
+
|
|
24
|
+
const builder = function addOptions(ya, helpOrVersionSet) {
|
|
25
|
+
return ya
|
|
26
|
+
.positional('strings', {
|
|
27
|
+
describe: 'string list to decode',
|
|
28
|
+
type: 'string',
|
|
29
|
+
})
|
|
30
|
+
// 修复文件名乱码
|
|
31
|
+
.option("from-enc", {
|
|
32
|
+
alias: "f",
|
|
33
|
+
type: "choices",
|
|
34
|
+
choices: ['utf8', 'gbk', 'shift_jis', 'big5', 'euc-kr'],
|
|
35
|
+
description: "from encoding name eg. utf8|gbk|shift_jis",
|
|
36
|
+
})
|
|
37
|
+
.option("to-enc", {
|
|
38
|
+
alias: "t",
|
|
39
|
+
type: "choices",
|
|
40
|
+
choices: ['utf8', 'gbk', 'shift_jis', 'big5', 'euc-kr'],
|
|
41
|
+
description: "to encoding name tg. utf8|gbk|shift_jis",
|
|
42
|
+
}).po
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const handler = async function cmdDecode(argv) {
|
|
46
|
+
const logTag = "cmdDecode";
|
|
47
|
+
log.info(logTag, 'Args:', argv);
|
|
48
|
+
const strArgs = argv.strings;
|
|
49
|
+
if (strArgs?.length === 0) {
|
|
50
|
+
throw new Error(`text input required`);
|
|
51
|
+
}
|
|
52
|
+
const fromEnc = argv.fromEnc?.length > 0 ? [argv.fromEnc] : ENC_LIST;
|
|
53
|
+
const toEnc = argv.toEnc?.length > 0 ? [argv.toEnc] : ENC_LIST;
|
|
54
|
+
const threhold = log.isVerbose() ? 1 : 50;
|
|
55
|
+
log.show(logTag, `Input:`, strArgs)
|
|
56
|
+
log.show(logTag, `fromEnc:`, JSON.stringify(fromEnc))
|
|
57
|
+
log.show(logTag, `toEnc:`, JSON.stringify(toEnc))
|
|
58
|
+
|
|
59
|
+
for (const str of strArgs) {
|
|
60
|
+
log.show(logTag, 'TryDecoding:', [str])
|
|
61
|
+
const results = decodeText(str, fromEnc, toEnc, threhold)
|
|
62
|
+
results.forEach(showResults)
|
|
63
|
+
log.show('INPUT:', [str, str.length],)
|
|
64
|
+
log.show('OUPUT:', results.pop())
|
|
65
|
+
console.log()
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function decodeText(str, fromEnc = ENC_LIST, toEnc = ENC_LIST, threhold = 50) {
|
|
70
|
+
let results = enc.tryDecodeText(str, fromEnc, toEnc, threhold)
|
|
71
|
+
return results.reverse()
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function showResults(r) {
|
|
75
|
+
log.info(`-`)
|
|
76
|
+
const str = r[0]
|
|
77
|
+
const print = (a, b) => log.info(a.padEnd(16, ' '), b)
|
|
78
|
+
log.show('Result:', str.padEnd(16, ' '), r.slice(1))
|
|
79
|
+
let cr = chardet.analyse(Buffer.from(str))
|
|
80
|
+
cr = cr.filter(ct => ct.confidence >= 70)
|
|
81
|
+
cr?.length > 0 && print('Encoding', cr)
|
|
82
|
+
print('String', Array.from(str))
|
|
83
|
+
print('Unicode', Array.from(str).map(c => c.codePointAt(0).toString(16)))
|
|
84
|
+
const badUnicode = enc.checkBadUnicode(str)
|
|
85
|
+
badUnicode?.length > 0 && log.info(`badUnicode=true`)
|
|
86
|
+
log.info(`MESSY_UNICODE=${enc.REGEX_MESSY_UNICODE.test(str)}`,
|
|
87
|
+
`MESSY_CJK=${enc.REGEX_MESSY_CJK.test(str)}`,
|
|
88
|
+
`MESSY_CJK_EXT=${enc.REGEX_MESSY_CJK_EXT.test(str)}`)
|
|
89
|
+
log.info(`OnlyJapanese=${unicode.strOnlyJapanese(str)}`,
|
|
90
|
+
`OnlyJpHan=${unicode.strOnlyJapaneseHan(str)}`,
|
|
91
|
+
`HasHiraKana=${unicode.strHasHiraKana(str)}`
|
|
92
|
+
)
|
|
93
|
+
log.info(`HasHangul=${unicode.strHasHangul(str)}`,
|
|
94
|
+
`OnlyHangul=${unicode.strOnlyHangul(str)}`)
|
|
95
|
+
log.info(`HasChinese=${unicode.strHasChinese(str)}`,
|
|
96
|
+
`OnlyChinese=${unicode.strOnlyChinese(str)}`,
|
|
97
|
+
`OnlyChn3500=${enc.RE_CHARS_MOST_USED.test(str)}`)
|
|
98
|
+
}
|
package/cmd/cmd_fixname.js
CHANGED
|
@@ -12,6 +12,7 @@ import inquirer from "inquirer";
|
|
|
12
12
|
import { cpus } from "os";
|
|
13
13
|
import pMap from 'p-map';
|
|
14
14
|
import path from "path";
|
|
15
|
+
import * as core from '../lib/core.js';
|
|
15
16
|
import { asyncFilter } from '../lib/core.js';
|
|
16
17
|
import * as log from '../lib/debug.js';
|
|
17
18
|
import * as enc from '../lib/encoding.js';
|
|
@@ -43,6 +44,13 @@ const builder = function addOptions(ya, helpOrVersionSet) {
|
|
|
43
44
|
type: "boolean",
|
|
44
45
|
description: "remove special chars in filename",
|
|
45
46
|
})
|
|
47
|
+
// 使用正则表达式替换文件名中的特定字符,比如问号
|
|
48
|
+
// 如果数组只有一项,就是替换这一项为空白,即删除模式字符串
|
|
49
|
+
// 如果有两项,就是替换第一项匹配的字符串为第二项指定的字符
|
|
50
|
+
.option("replace", {
|
|
51
|
+
type: "array",
|
|
52
|
+
description: "replace regex pattern in filename [from,to]",
|
|
53
|
+
})
|
|
46
54
|
// 修复文件名乱码
|
|
47
55
|
.option("encoding", {
|
|
48
56
|
alias: "e",
|
|
@@ -78,9 +86,9 @@ const handler = async function cmdFixName(argv) {
|
|
|
78
86
|
}
|
|
79
87
|
const startMs = Date.now();
|
|
80
88
|
log.show(logTag, `Input: ${root}`);
|
|
81
|
-
if (!(argv.clean || argv.encoding || argv.tcsc)) {
|
|
82
|
-
log.error(`Error: clean|encoding|tcsc,at least one is required`);
|
|
83
|
-
throw new Error(`clean|encoding|tcsc,at least one is required`);
|
|
89
|
+
if (!(argv.clean || argv.encoding || argv.tcsc || argv.remove)) {
|
|
90
|
+
log.error(`Error: replace|clean|encoding|tcsc,at least one is required`);
|
|
91
|
+
throw new Error(`replace|clean|encoding|tcsc,at least one is required`);
|
|
84
92
|
}
|
|
85
93
|
let files = await mf.walk(root, {
|
|
86
94
|
needStats: true,
|
|
@@ -167,22 +175,34 @@ async function fixFileName(f) {
|
|
|
167
175
|
const strPath = path.resolve(f.path).split(path.sep).join(' ')
|
|
168
176
|
let oldBase = base;
|
|
169
177
|
let newDir = oldDir;
|
|
178
|
+
if (argv.replace?.[0]?.length > 0) {
|
|
179
|
+
const rFrom = argv.replace[0];
|
|
180
|
+
const rTo = argv.replace[1] || "";
|
|
181
|
+
// 执行文件名字符替换操作
|
|
182
|
+
// 按照正则表达式替换指定字符
|
|
183
|
+
// 如果rTo为空则等于删除字符
|
|
184
|
+
oldBase = oldBase.replaceAll(rFrom, rTo);
|
|
185
|
+
oldBase = oldBase.replaceAll(new RegExp(rFrom, "gu"), rTo);
|
|
186
|
+
}
|
|
170
187
|
if (argv.encoding) {
|
|
171
188
|
// 执行文件路径乱码修复操作
|
|
172
189
|
// 对路径进行中日韩文字编码修复
|
|
173
|
-
let [fs, ft] = enc.
|
|
190
|
+
let [fs, ft] = enc.decodeText(oldBase);
|
|
174
191
|
oldBase = fs.trim();
|
|
175
192
|
// 将目录路径分割,并对每个部分进行编码修复
|
|
176
193
|
const dirNamesFixed = oldDir.split(path.sep).map(s => {
|
|
177
|
-
let [rs, rt] = enc.
|
|
194
|
+
let [rs, rt] = enc.decodeText(s)
|
|
178
195
|
return rs.trim();
|
|
179
196
|
});
|
|
180
197
|
// 重新组合修复后的目录路径
|
|
181
198
|
newDir = path.join(...dirNamesFixed);
|
|
199
|
+
if (core.isUNCPath(oldDir)) {
|
|
200
|
+
newDir = "\\\\" + newDir;
|
|
201
|
+
}
|
|
182
202
|
// 显示有乱码的文件路径
|
|
183
203
|
if (enc.hasBadUnicode(strPath)) {
|
|
184
|
-
|
|
185
|
-
|
|
204
|
+
log.showGray(logTag, `BadEnc:${++badCount}`, oldPath)
|
|
205
|
+
log.fileLog(`BadEnc: ${ipx} <${oldPath}>`, logTag);
|
|
186
206
|
}
|
|
187
207
|
}
|
|
188
208
|
if (argv.clean) {
|
|
@@ -193,19 +213,21 @@ async function fixFileName(f) {
|
|
|
193
213
|
// 执行繁体转简体操作
|
|
194
214
|
oldBase = sify(oldBase)
|
|
195
215
|
}
|
|
216
|
+
// 确保文件名不含有文件系统不允许的非法字符
|
|
217
|
+
oldBase = helper.filenameSafe(oldBase);
|
|
196
218
|
// 生成修复后的新路径,包括旧基础路径和文件扩展名
|
|
197
219
|
const newName = `${oldBase}${ext}`
|
|
198
220
|
const newPath = path.join(newDir, newName);
|
|
199
221
|
if (newPath === oldPath) {
|
|
200
|
-
log.info(logTag, `Same: ${ipx} ${helper.pathShort(newPath)}`);
|
|
222
|
+
log.info(logTag, `Ignore Same: ${ipx} ${helper.pathShort(newPath)}`);
|
|
201
223
|
f.skipped = true;
|
|
202
224
|
}
|
|
203
225
|
else if (await fs.pathExists(newPath)) {
|
|
204
|
-
log.info(logTag, `Exists: ${ipx} ${helper.pathShort(newPath)}`);
|
|
226
|
+
log.info(logTag, `Ignore Exists: ${ipx} ${helper.pathShort(newPath)}`);
|
|
205
227
|
f.skipped = true;
|
|
206
228
|
}
|
|
207
229
|
else if (nameDuplicateSet.has(newPath)) {
|
|
208
|
-
log.info(logTag, `Dup: ${ipx} ${helper.pathShort(newPath)}`);
|
|
230
|
+
log.info(logTag, `Ignore Dup: ${ipx} ${helper.pathShort(newPath)}`);
|
|
209
231
|
f.skipped = true;
|
|
210
232
|
}
|
|
211
233
|
|
package/cmd/cmd_prefix.js
CHANGED
|
@@ -317,6 +317,8 @@ async function createNewNameByMode(f) {
|
|
|
317
317
|
log.info(logTag, `IgnorePrefix: ${ipx} ${helper.pathShort(f.path)}`);
|
|
318
318
|
prefix = "";
|
|
319
319
|
}
|
|
320
|
+
// 确保文件名不含有文件系统不允许的非法字符
|
|
321
|
+
oldBase = helper.filenameSafe(oldBase);
|
|
320
322
|
let fullBase = prefix.length > 0 ? (prefix + sep + oldBase) : oldBase;
|
|
321
323
|
// 去除首位空白和特殊字符
|
|
322
324
|
fullBase = fullBase.replaceAll(reStripUglyChars, "");
|
package/cmd/cmd_shared.js
CHANGED
|
@@ -35,9 +35,11 @@ async function renameOneFile(f) {
|
|
|
35
35
|
if (!await fs.pathExists(outDir)) {
|
|
36
36
|
await fs.mkdirs(outDir);
|
|
37
37
|
}
|
|
38
|
+
|
|
38
39
|
// 使用 fs 模块的 rename 方法重命名文件,并等待操作完成
|
|
39
40
|
await fs.rename(f.path, outPath);
|
|
40
41
|
// 打印重命名成功的日志信息,显示输出文件的路径
|
|
42
|
+
log.showGray(`Source: ${f.path}`);
|
|
41
43
|
log.show(`${chalk.green(`Renamed:`)} ${outPath}`);
|
|
42
44
|
log.fileLog(`Done: <${f.path}> => ${f.outName}`, "Rename");
|
|
43
45
|
return f;
|
package/lib/core.js
CHANGED
package/lib/encoding.js
CHANGED
|
@@ -1,13 +1,10 @@
|
|
|
1
|
-
|
|
2
|
-
import fs from 'fs-extra';
|
|
1
|
+
|
|
3
2
|
import iconv from 'iconv-lite';
|
|
4
|
-
import os from 'os';
|
|
5
|
-
import path from 'path';
|
|
6
3
|
import * as log from './debug.js';
|
|
7
|
-
import { strHasASCII, strHasHFKanaHira, strHasHiraKana, strOnlyASCII, strOnlyChinese, strOnlyJapanese } from './unicode.js';
|
|
4
|
+
import { strHasASCII, strHasHFKanaHira, strHasHiraKana, strOnlyASCII, strOnlyChinese, strOnlyHangul, strOnlyJapanese, strOnlyJapaneseHan } from './unicode.js';
|
|
8
5
|
import { CHINESE_CHARS_3500, MESSY_CJK_CHARS as MESSY_CJK_CHARS_ } from './unicode_data.js';
|
|
9
6
|
|
|
10
|
-
// https://github.com/bnoordhuis/node-iconv/
|
|
7
|
+
// https://github.com/bnoordhuis/node-iconv/
|
|
11
8
|
const ENCODING_FROM = [
|
|
12
9
|
'SHIFT_JIS',
|
|
13
10
|
'GBK',
|
|
@@ -26,51 +23,59 @@ const ENCODING_TO = [
|
|
|
26
23
|
// 'EUC-KR',
|
|
27
24
|
]
|
|
28
25
|
|
|
29
|
-
const ENCODING_TRY = ['SHIFT_JIS', 'UTF8']
|
|
30
|
-
|
|
31
26
|
export const MESSY_CJK_CHARS = MESSY_CJK_CHARS_
|
|
32
27
|
|
|
33
28
|
export const REGEX_MESSY_CJK = new RegExp(`[${MESSY_CJK_CHARS}]`, 'u')
|
|
34
29
|
|
|
35
|
-
export const REGEX_MESSY_CJK_EXT = /[\
|
|
30
|
+
export const REGEX_MESSY_CJK_EXT = /[\u8720-\u883f\u9300-\u9484]/u //生僻字: 虫字旁 金字旁
|
|
36
31
|
|
|
37
32
|
export const REGEX_MESSY_UNICODE = /[\u007f-\u00a0\u00c0-\u017f\u0400-\u1cff\u2070-\u24ff\u0e00-\u0e7f\u3400-\u4dbf\uac00-\uf8ff\ufe30-\ufe4f\ufff0-\uffff]/u
|
|
38
33
|
|
|
39
34
|
// 正则:只包含中文常用汉字,日文平假名片假名和ASCII字符
|
|
40
|
-
export const RE_CHARS_MOST_USED = new RegExp(`^[${CHINESE_CHARS_3500}\\u3000-\\u303f\\uff66-\\uff9dA-Za-z0-9\\-_]+$`, 'ui')
|
|
35
|
+
export const RE_CHARS_MOST_USED = new RegExp(`^[${CHINESE_CHARS_3500}\\u3000-\\u303f\\uff66-\\uff9dA-Za-z0-9\\-_ ]+$`, 'ui')
|
|
41
36
|
|
|
42
37
|
export function charUnique(str) {
|
|
43
38
|
return String.prototype.concat.call(...new Set(str));
|
|
44
39
|
}
|
|
45
40
|
|
|
46
|
-
const nowDateStr = dayjs().format("YYYYMMDDHHmmss");
|
|
47
|
-
const tempfile = path.join(os.tmpdir(), `z_mediac_log_${nowDateStr}.txt`)
|
|
48
|
-
|
|
49
41
|
export function checkBadUnicode(str) {
|
|
50
42
|
const results = []
|
|
51
43
|
if (str.includes('?') || str.includes('\ufffd')) {
|
|
52
44
|
// 乱码标志 问号和黑问号
|
|
53
45
|
results.push([true, 0, `非法字符`])
|
|
54
46
|
}
|
|
55
|
-
if (/[\u00c0-\u024f\u3100-\u312f
|
|
47
|
+
if (/[\u00c0-\u00d6\u00d8-\u024f\u3100-\u312f]/u.test(str)) {
|
|
56
48
|
// 乱码标志 拉丁字母扩展 注音符号
|
|
57
49
|
results.push([true, 2, `拉丁字母扩展`])
|
|
58
50
|
}
|
|
59
|
-
if (/[\
|
|
60
|
-
// 乱码标志
|
|
61
|
-
results.push([true,
|
|
51
|
+
if (/[\u3300-\u33ff]/u.test(str)) {
|
|
52
|
+
// 乱码标志 特殊字符
|
|
53
|
+
results.push([true, 4, `CJK特殊字符`])
|
|
62
54
|
}
|
|
63
|
-
if (/[\
|
|
64
|
-
// 乱码标志
|
|
65
|
-
results.push([true,
|
|
55
|
+
if (/[\u0370-\u1cff]/u.test(str)) {
|
|
56
|
+
// 乱码标志 小众语言符号
|
|
57
|
+
results.push([true, 3, `小众语言A`])
|
|
58
|
+
}
|
|
59
|
+
if (/[\ua000-\ua7ff\uab30-\uabff\ud7b0-\ud7ff]/u.test(str)) {
|
|
60
|
+
// 乱码标志 小众语言符号
|
|
61
|
+
results.push([true, 4, `小众语言B`])
|
|
62
|
+
}
|
|
63
|
+
if (/[\ud800-\udfff]/u.test(str)) {
|
|
64
|
+
// 乱码标志 代理对,存疑
|
|
65
|
+
results.push([true, 4, `代理对`])
|
|
66
66
|
}
|
|
67
67
|
if (/[\ue000-\uf8ff]/u.test(str)) {
|
|
68
68
|
// 乱码标志 Unicode私有区
|
|
69
69
|
results.push([true, 5, `私有区`])
|
|
70
70
|
}
|
|
71
|
+
if (/[\ufb50-\ufdff\ufe70-\ufeff]/u.test(str)) {
|
|
72
|
+
// 乱码标志 阿拉伯字符
|
|
73
|
+
results.push([true, 5, `阿拉伯字符`])
|
|
74
|
+
}
|
|
71
75
|
if (/[\uff66-\uff9d]/u.test(str)) {
|
|
76
|
+
// 暂时忽略,还比较常用
|
|
72
77
|
// 乱码标志 半角平假名片假名
|
|
73
|
-
results.push([true, 6, `半角假名`])
|
|
78
|
+
// results.push([true, 6, `半角假名`])
|
|
74
79
|
}
|
|
75
80
|
if (/[㼿]/u.test(str)) {
|
|
76
81
|
// 乱码标志 特殊生僻字
|
|
@@ -87,71 +92,76 @@ export function hasBadCJKChar(str) {
|
|
|
87
92
|
return REGEX_MESSY_CJK.test(str) || REGEX_MESSY_CJK_EXT.test(str)
|
|
88
93
|
}
|
|
89
94
|
|
|
90
|
-
export function
|
|
91
|
-
let results =
|
|
95
|
+
export function decodeText(str) {
|
|
96
|
+
let results = tryDecodeText(str)
|
|
92
97
|
results = results.filter(r => r[2] >= 0).sort((a, b) => b[2] - a[2])
|
|
93
|
-
log.
|
|
94
|
-
log.
|
|
98
|
+
log.debug('==================================')
|
|
99
|
+
log.debug(str)
|
|
95
100
|
if (results?.length > 0) {
|
|
96
101
|
for (const r of results) {
|
|
97
|
-
log.
|
|
102
|
+
log.debug(r[0], '\t\t', r.slice(1))
|
|
98
103
|
}
|
|
99
104
|
}
|
|
100
105
|
return results[0] || [str, false, 0, 'fallback'];
|
|
101
106
|
}
|
|
102
107
|
|
|
103
|
-
export function
|
|
108
|
+
export function tryDecodeText(str,
|
|
104
109
|
fromEnc = ENCODING_FROM,
|
|
105
110
|
toEnc = ENCODING_TO,
|
|
106
111
|
threhold = 10) {
|
|
107
112
|
if (str.includes('?') || str.includes('\ufffd')) {
|
|
108
|
-
return [[str, false, 0, '
|
|
113
|
+
return [[str, false, 0, '[乱码字符]'],]
|
|
109
114
|
}
|
|
110
115
|
|
|
116
|
+
fromEnc = fromEnc.map(x => x.toLowerCase())
|
|
117
|
+
toEnc = toEnc.map(x => x.toLowerCase())
|
|
118
|
+
|
|
111
119
|
let results = []
|
|
112
120
|
if (strOnlyASCII(str)) {
|
|
113
121
|
// results.push([str, false, 0])
|
|
114
|
-
return [[str, false,
|
|
122
|
+
return [[str, false, 100, '[ASCII]'],]
|
|
115
123
|
}
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
// else if (strOnlyChinese(str)) {
|
|
123
|
-
// results.push([str, false, 99, '全中文01', ''])
|
|
124
|
-
// }
|
|
125
|
-
else if (strHasHFKanaHira(str)) {
|
|
126
|
-
// 包含不用的全角半角平假名片假名
|
|
127
|
-
results.push([str, false, 65, '含半角假名0', ''])
|
|
128
|
-
}
|
|
129
|
-
else {
|
|
130
|
-
// fs.appendFileSync(tempfile, str + '\n')
|
|
131
|
-
// return [[str, false, 0, '忽略0', ''],]
|
|
132
|
-
}
|
|
133
|
-
} else {
|
|
124
|
+
const messyUnicode = REGEX_MESSY_UNICODE.test(str)
|
|
125
|
+
const messyCJK = REGEX_MESSY_CJK.test(str)
|
|
126
|
+
const messyCJKExt = REGEX_MESSY_CJK_EXT.test(str)
|
|
127
|
+
log.info('---------------------')
|
|
128
|
+
log.info('tryDecodeText', str)
|
|
129
|
+
if (messyUnicode || messyCJK || messyCJKExt) {
|
|
134
130
|
if (strOnlyChinese(str) && !REGEX_MESSY_CJK_EXT.test(str)) {
|
|
135
|
-
return [[str, false,
|
|
131
|
+
return [[str, false, 100, `[全中文]`],]
|
|
136
132
|
}
|
|
137
133
|
}
|
|
138
|
-
if ((
|
|
139
|
-
|
|
140
|
-
|
|
134
|
+
if (RE_CHARS_MOST_USED.test(str)) {
|
|
135
|
+
results.push([str, false, 100, '[常用汉字]'])
|
|
136
|
+
}
|
|
137
|
+
else if (strHasHFKanaHira(str)) {
|
|
138
|
+
// 包含不用的全角半角平假名片假名
|
|
139
|
+
results.push([str, false, 65, '[半角假名]'])
|
|
140
|
+
}
|
|
141
|
+
else {
|
|
142
|
+
// fs.appendFileSync(tempfile, str + '\n')
|
|
143
|
+
return [[str, false, 0, '[无乱码]', ''],]
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
if (!!REGEX_MESSY_CJK.test(str)
|
|
147
|
+
&& (strHasHiraKana(str) || strHasASCII(str))
|
|
148
|
+
&& strOnlyJapanese(str)) {
|
|
149
|
+
results.push([str, false, 99, '[全日文1]'])
|
|
141
150
|
}
|
|
142
151
|
else if (strOnlyJapanese(str)) {
|
|
143
|
-
results.push([str, false, 80, '全日文
|
|
152
|
+
results.push([str, false, 80, '[全日文2]'])
|
|
144
153
|
}
|
|
145
|
-
|
|
146
|
-
// log.show(Array.from(str).map(c => c.codePointAt(0).toString(16)).join(' '))
|
|
154
|
+
|
|
147
155
|
for (const enc1 of fromEnc) {
|
|
148
156
|
for (const enc2 of toEnc) {
|
|
157
|
+
// 忽略解码编码相同的情况
|
|
149
158
|
if (enc1 === enc2) { continue }
|
|
150
159
|
try {
|
|
151
160
|
const strBuffer = iconv.encode(str, enc1)
|
|
152
161
|
let strDecoded = iconv.decode(strBuffer, enc2)
|
|
153
162
|
const badDecoded = checkBadUnicode(strDecoded)
|
|
154
163
|
// const strCleaned = strDecoded.replaceAll(/[\ufffd\u0020]/ugi, '')
|
|
164
|
+
log.debug(enc1, enc2, strDecoded, badDecoded)
|
|
155
165
|
// 如果含有乱码字符
|
|
156
166
|
if (badDecoded?.length > 0) {
|
|
157
167
|
for (const item of badDecoded) {
|
|
@@ -159,44 +169,46 @@ export function fixCJKEncImpl(str,
|
|
|
159
169
|
}
|
|
160
170
|
continue;
|
|
161
171
|
}
|
|
162
|
-
|
|
163
|
-
// log.showRed('========')
|
|
164
|
-
// log.showRed(str)
|
|
165
|
-
// log.showGreen(Array.from(str).map(c => c.codePointAt(0).toString(16)))
|
|
166
|
-
// log.show(strDecoded, enc1, enc2)
|
|
167
|
-
|
|
168
172
|
const onlyASCII = strOnlyASCII(strDecoded)
|
|
169
173
|
const onlyCN = strOnlyChinese(strDecoded)
|
|
170
174
|
const onlyJP = strOnlyJapanese(strDecoded)
|
|
175
|
+
const onlyJPHan = strOnlyJapaneseHan(strDecoded)
|
|
176
|
+
const onlyKR = strOnlyHangul(strDecoded)
|
|
177
|
+
const hasHiraKana = strHasHiraKana(strDecoded)
|
|
178
|
+
const hasHFHiraKana = strHasHFKanaHira(strDecoded)
|
|
171
179
|
const messyUnicode = REGEX_MESSY_UNICODE.test(strDecoded)
|
|
172
180
|
const messyCJK = REGEX_MESSY_CJK.test(strDecoded)
|
|
181
|
+
const messyCJKExt = REGEX_MESSY_CJK_EXT.test(strDecoded)
|
|
182
|
+
|
|
183
|
+
log.debug(strDecoded, 'cn', onlyCN, 'jp', onlyJP, 'jhan', onlyJPHan, 'kr', onlyKR)
|
|
184
|
+
log.debug(strDecoded, 'hk', hasHiraKana, 'hf', hasHFHiraKana, 'mu', messyUnicode, 'mc', messyCJK)
|
|
185
|
+
|
|
173
186
|
if (onlyASCII && !strDecoded.includes('?')) {
|
|
174
|
-
results.push([strDecoded, true, 99,
|
|
187
|
+
results.push([strDecoded, true, 99, `ASCII`, `${enc1}=>${enc2}`])
|
|
175
188
|
break
|
|
176
189
|
}
|
|
177
190
|
if (RE_CHARS_MOST_USED.test(strDecoded)) {
|
|
178
191
|
results.push([strDecoded, true, 99, `常用汉字`, `${enc1}=>${enc2}`])
|
|
179
192
|
break
|
|
180
193
|
}
|
|
181
|
-
log.info(strDecoded, onlyCN, onlyJP, messyUnicode, messyCJK)
|
|
182
|
-
|
|
183
194
|
if (onlyJP) {
|
|
184
|
-
|
|
195
|
+
if (!strHasHiraKana(strDecoded) && !onlyJPHan) {
|
|
196
|
+
results.push([strDecoded, true, 78, `日文字符`, `${enc1}=>${enc2}`])
|
|
197
|
+
}
|
|
185
198
|
}
|
|
186
199
|
else if (onlyCN) {
|
|
187
200
|
results.push([strDecoded, true, 76, `中文字符`, `${enc1}=>${enc2}`])
|
|
188
201
|
}
|
|
189
|
-
else if (
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
202
|
+
else if (hasHiraKana || hasHFHiraKana) {
|
|
203
|
+
results.push([strDecoded, true, 65, `含日文假名`, ` ${enc1}=>${enc2}`])
|
|
204
|
+
}
|
|
205
|
+
else if (onlyKR) {
|
|
206
|
+
results.push([strDecoded, true, 62, `韩文字符`, `${enc1}=>${enc2}`])
|
|
207
|
+
}
|
|
208
|
+
else if (messyCJK || messyCJKExt) {
|
|
209
|
+
results.push([strDecoded, true, 51, `生僻字`, `${enc1}=>${enc2}`])
|
|
210
|
+
// continue
|
|
193
211
|
}
|
|
194
|
-
// else if (messyCJK) {
|
|
195
|
-
// results.push([strDecoded, true, 51, `含特殊汉字`, `${enc1}=>${enc2}`])
|
|
196
|
-
// }
|
|
197
|
-
// else if (messyUnicode) {
|
|
198
|
-
// results.push([strDecoded, true, 52, `含特殊符号`, `${enc1}=>${enc2}`])
|
|
199
|
-
// }
|
|
200
212
|
else {
|
|
201
213
|
results.push([strDecoded, true, 60, `正常转换 ${onlyCN} ${onlyJP}`, ` ${enc1}=>${enc2}`])
|
|
202
214
|
}
|
|
@@ -206,8 +218,8 @@ export function fixCJKEncImpl(str,
|
|
|
206
218
|
}
|
|
207
219
|
}
|
|
208
220
|
}
|
|
209
|
-
results.push([str, false, 70, '
|
|
221
|
+
results.push([str, false, 70, '原始值'])
|
|
210
222
|
results = results.filter(r => r[2] >= threhold).sort((a, b) => b[2] - a[2])
|
|
211
|
-
log.
|
|
223
|
+
log.debug(results.slice(3))
|
|
212
224
|
return results;
|
|
213
225
|
}
|
package/lib/file.js
CHANGED
|
@@ -123,7 +123,7 @@ export async function walk(root, options = {}) {
|
|
|
123
123
|
}
|
|
124
124
|
}
|
|
125
125
|
startMs = Date.now();
|
|
126
|
-
files = (await pMap(files, entryMapper, { concurrency: cpus().length *
|
|
126
|
+
files = (await pMap(files, entryMapper, { concurrency: cpus().length * 4 }));
|
|
127
127
|
// files = files.sort(compareSmartBy('path'))
|
|
128
128
|
needBar && bar1.update(files.length);
|
|
129
129
|
needBar && bar1.stop();
|
package/lib/helper.js
CHANGED
|
@@ -24,6 +24,7 @@ const ARCHIVE_FORMATS = [
|
|
|
24
24
|
|
|
25
25
|
const IMAGE_FORMATS = [
|
|
26
26
|
".jpg",
|
|
27
|
+
".jpe",
|
|
27
28
|
".jpeg",
|
|
28
29
|
".png",
|
|
29
30
|
".avif",
|
|
@@ -202,6 +203,11 @@ export function pathExt(filename, toLowerCase = false) {
|
|
|
202
203
|
return toLowerCase ? ext?.toLowerCase() : ext;
|
|
203
204
|
}
|
|
204
205
|
|
|
206
|
+
const REGEX_ILLEGAL_FILENAME = /[\x00-\x1F\x7F<>:"\/\\|?*]/gm
|
|
207
|
+
export function filenameSafe(name) {
|
|
208
|
+
return name.replaceAll(REGEX_ILLEGAL_FILENAME, '')
|
|
209
|
+
}
|
|
210
|
+
|
|
205
211
|
export function getSafeDeletedDir(filepath) {
|
|
206
212
|
const dtStr = dayjs().format("YYYYMMDD");
|
|
207
213
|
const dir = path.join(pathRoot(filepath), 'Deleted_By_Mediac', dtStr);
|
package/lib/messy_hanzi.txt
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
娾僾壒鑀葊蓭儑垵貋嶅摮謷謸慠巼弝垻跁贁斒螌褩闆螁捠邫艕闁怉堢賲藵菢袌虣儤揹藣郥偝偹僃蛽翉楍倴桳撪輽埲偪楅螕聛貏邲苾袐弻煏蓽蜌廦覍徧艑脿幖儦藨謤贆諘虌彆邠儐虨臏抦昞棅偋傡剝袚缽僠瓟葧艊攃婇傪朁摻儏謲傖賶嶆蓸敇蓛箣憡偛艖詧祡袃訍蠆幨攙僝儃剷嵼幝閳譂讇锠僘謿巐仦蛼諃迧茞軙樄曟趻贂棦偁僜罉脭庱彨彲翄懘趩翀憃蹖揰搊菗椆摴貙幮藸傗滀蓫儊諔臅暷輲僢菙堾輴滣偆賰娕偨蠀嬨皉茦莿螆茐棇蓯聦婃欉謥脨趗憱顣鑹櫕缞嶉磪脺膬臎乼竴虘蟽軑軚艜蹛聸僤蓞贉簹艡欓讜儅趤壔翢艔軇瓙脦艠櫈袛藡拞菧聜偙僀嶳傎嵮蹎奌婰蜔訋蓧恎昳绖戜惵褋艓帄倲埬菄徚蕫迵戙枓脰嬻讟螙偳褍塅嵟逇敪埵軃尮偔崿詻僫讍奀峏栭厼趰誀茷傠蕟忛勫柉羳鐢蠜奿訉軓婏蕜杮鐨翂訜岎幩蕡羵僨膹偑僼鎽霻摓艂覂賵覅缹缻邞紨岪翇虙葍粰呒蜅撨峊袝偩覄褔蕧祴忋槩忓尷仠堈掆罁焹戅菒藳犵鎶輵挭拲羾莻袧褠茩苽箛唃傦脵崓僱詿柺覌輨泴悺罆茪僙椝槼螝膭蛫嶡蓘輥謴濄彍聝惈螒譀迒貥茠椃諕蠔悎傐抲蠚峆楁詥貈螛礉垎袔熇詪悙脝噷叿渹輷鍧纮垬娂翃粠葒綋翝晎闂闀葔虖軤膴謼螜虝嫮熩鍙婲搳撶螖諙諣褢褱貛寏藧巟偟葟熿艎趪詤愰婎幑褘撝蜖詯僡藱儶孈梡佸旤艧僟銈賷鐖覉忣揤偮庴塉檝螏鍓艥妀茍茤紒惎葪諅乫貑鎵忦蛺幏惤葌椷椾蕳熸瀐鐧虃弿謭蠒鐗葥諓翞螀傋弶謽茮虠嶕膲儌憿敿訆漖趭菨堦脻謯迼偼媫楬蜐蓵蠞蠘悈蛶褯兓惍巹蓳僸濅賮葏聙蟼曔僒褧奺欍崌跔艍諊僪蹫聥乬邭袓埾虡寠勮懅貗脧鎸埍菤臇覐趹傕蕝臄觼貜覠鍕晙懏擖欬偘埳惂邟囥犺鈧趷揢翗愙艐摼埪涳郀跍趶廤褲姱儈旝邼狅軖軠岲聧楏虁蹞聭蕢樻聵熴閫閸睏翋搚藞攋蠟婡崍賴葻儖懢攔讕襽郞艆蜋鎯蓢樃崀僗鐒蔂儽壨虆欙傫藟蠝鑸銇攂倰菞棃艃褵謧邌孋邐涖脷厤棙蛠蝷曆儮曞蠇壢藶觻欐讈梿覝聫褳聮螊謰蹥羷僆墚輬脼嶛膫屪廫賿蹘藔尦尞茢迾脟巤儠冧晽粦亃癛旈藰嬼翏廇蕯壠慺艛謱塿蓾艣鐪菉趢蹗鯥虂藘捛屢儢曫虊惀菕錀埨覙鏍儸攞欏曪洜嬤螞杩傌屘鏋槾澷庬莾茻軞乮蓩覒愗嚜脢槑徾嵄媺跊菛虋夣詸擟攠葞覔覛榓偭覕搣懱僶慜蠠姳慏詺謩擵懡眿蛨暯銆藦鏌怽畞莯幙艒鎿袦軜摨嬭褦螚暔擃儾怓巎抐脮埿聣抳儗儞聻伲迡堄嫟愵跈蹍蹨艌褭揑惗菍巕讘钀脌甯儜蕽挊羺鐞傉搙蹃藲鏂掱輫渒溿嗙褜奅斾蓜嶏葐梈旇翍壀脴擗鷿楄覑僄蠙聠呯帲幈缾蓱蛢輧敀娝擈菐贌巭迉桼蛣僛諆亝旂帺掑軝愭懠艩蠐邔婍忔湇罊蟿缼婜孯諐偂軡脥嗛傔儙輤蹡廧羻趬菬癄藮趫帩僺聺蛪鍥嶔菦嫀慬懄蠄笉梫菣撳埥夝剠殑葝檾掅儬宆儝藭丠媝蓲趥訅蛷髷菃翑葋鑺蝺迲奍峑婘葲烇虇崅夋衻袇袡媣儴讱屻岃訒軔梕茙峵媶搑褣螎嬫巆傇輮鍒蕠曘嗕偄輭膶虄僿褬濏譅桬儍翜翣邖挻脠軕搧羴晱訕傓僐敾樿蟺謪绱蕱袑蛥蠂屾葠訷蓡詵邥谉渖諗曋脤蜄瘮苼曻鉎鍟偗渻賸邿褷鍦乭姼枾媞煶諟鏉杸掓軗虪慡脽虒恖蕬螄柶蕼愯廀摉叜傁擻梀傃藗葰嵗鐩蕵傞摍溑褨鏁褟枱炲菭儓嬯忲惔墰墵燂罈藫貚傝僋蹚傏漟偒幍轁貣膯幐邆虅偍蕛趧蹏鍗悐揥趯屇庣蓨宨嬥覜聼邒閮聤娗脡蓪峝晍詷膧媮妵庩悇捈梌鍎迌貒剸鏄褖蓷藬尵蹪俀僓蹆軘臋袉跅媧嗗邷夞帵貦埦脕贃暀蛧迋葨詴嶶蓶鍏闈愇撱諉儰苿叞菋媦藯讏塭輼轀螡閺脗妏顐璺螉聬攚罋涹偓楃擭臒弙箼螐祦儛屼忢悞嵍邜傒翖嶲橀熻螇貕觽觿椺謵鎴蹝呬忥恄蕮虩傄翈敮蕸僊僲憸蹮妶蛝輱藖獮鍌幰攇峴晛缐僩僴壏膷鑲蠁缿宯虓翛膮虈郩涍詨斅峫翓讗藛缷偞偰僁褉嶰邤杺軐脪蛵曐侀讻詾賯诇脙鏅潃褎褏螑晇訏虗谞虛蕦諝譃呴偦詡怴欰藚吅弲梋鋗蕿蠉蜁嫙檈晅袨眴鏇袕壆瀥焄壎臐壦攳迿訙吖笌漄蕥偣綖詽虤壛巗椼褗覎葕傿椻暥嬊觾讞輰鍚岟炴傟懩傜摿暚邎宎抭偠袎覞艞讑僷曅歋擛悘毉侇迻宧巸弬栘袘訑貤蛦箷彛螔謻讉庡逘偯敼螘帠袣埶幆晹蛡跇嫕蓺槸艗貖賹藙贀虉讛訔訚崯朄讔憗懚粌偀愥绬嫈罃褮僌巊彮逌偤輶栯脜蜏哊迶邘扵乻堬旕艅褕嬩鍝偊貐忬茟喐棫艈銉蓹薁繘軉澚惌邧貟酛茒媴嫄邍妴傆褑褤軏跀缊奫蝹鄖愪賱傊藴偺菑賳傤儎鐟趲蹔蹧艁趮幘樍諎謮賾蠌庂崱蠈熷譄吒偧鍘夈惉邅讝嶃偡虥虦傽慞蔁墇枛旐罀嫬袩悊詟謺讋乽桭葴嫃樼轃纼栚瑱掙衼嗭膱軄藢迣庢袟乿偫翐袠傂崻寘覟儨劕妐幒螤蹱茽偅诪荮粙葤詶僽噣笁曯斸迬殶跓膼跩塼僎壵漴娷諈迍棳斲諑斀斵謶灂蠗茊葘趦輺蓻惾葼鍐菆棷崒踿鎺栬晬鋷罇僔袏䐠傤乯
|
package/lib/unicode.js
CHANGED
|
@@ -29,12 +29,12 @@ export const strHasASCII = (str) => REGEX_ASCII_ANY.test(str);
|
|
|
29
29
|
|
|
30
30
|
//-------------------------------------------------------------------------------
|
|
31
31
|
|
|
32
|
-
const COMMON_JP_CHARS = '絆'
|
|
33
32
|
// 导出一个正则表达式,用于判断字符串中是否包含日文字符
|
|
34
33
|
export const REGEX_JAPANESE =
|
|
35
34
|
/[\u3040-\u309f\u30a0-\u30ff\u3400-\u4dbf\u4e00-\u9fff]/u;
|
|
36
35
|
// 仅包含标点符号,日文片假名平假名,以及常用日语汉字,不包括生僻字
|
|
37
|
-
export const REGEX_JAPANESE_ONLY = new RegExp(`^[
|
|
36
|
+
export const REGEX_JAPANESE_ONLY = new RegExp(`^[ A-Za-z0-9_\.\(\)\|
|
|
37
|
+
-\\u2012-\\u2051\\u2150-\\u218f\\u2600-\\u27bf\\u3000-\\u303f\\u3040-\\u309f\\u30a0-\\u30ff\\uff01-\\uff60\uff66-\uff9d${JAPANESE_HAN}]+$`, 'u');
|
|
38
38
|
/**
|
|
39
39
|
* 判断给定字符串中是否包含日文字符
|
|
40
40
|
* @param {string} str 需要进行判断的字符串
|
|
@@ -44,7 +44,7 @@ export const strHasJapanese = (str) => REGEX_JAPANESE.test(str);
|
|
|
44
44
|
export const strOnlyJapanese = (str) => REGEX_JAPANESE_ONLY.test(str);
|
|
45
45
|
|
|
46
46
|
// 导出一个正则表达式,用于判断字符串中是否包含平假名或片假名
|
|
47
|
-
export const REGEX_HAS_HIRA_OR_KANA = /[\
|
|
47
|
+
export const REGEX_HAS_HIRA_OR_KANA = /[\u3040-\u30ff}]/u;
|
|
48
48
|
/**
|
|
49
49
|
* 判断给定字符串中是否包含平假名或片假名
|
|
50
50
|
* @param {string} str 需要进行判断的字符串
|
|
@@ -53,7 +53,7 @@ export const REGEX_HAS_HIRA_OR_KANA = /[\p{sc=Hira}\p{sc=Kana}]/u;
|
|
|
53
53
|
export const strHasHiraKana = (str) => REGEX_HAS_HIRA_OR_KANA.test(str);
|
|
54
54
|
|
|
55
55
|
// 导出一个正则表达式,用于判断字符串是否只包含平假名或片假名
|
|
56
|
-
export const REGEX_ONLY_HIRA_OR_KANA = /^[\
|
|
56
|
+
export const REGEX_ONLY_HIRA_OR_KANA = /^[\u3040-\u30ff]+$/u;
|
|
57
57
|
/**
|
|
58
58
|
* 判断给定字符串是否只包含平假名或片假名
|
|
59
59
|
* @param {string} str 需要进行判断的字符串
|
|
@@ -66,10 +66,11 @@ export const strOnlyHiraKana = (str) => REGEX_ONLY_HIRA_OR_KANA.test(str);
|
|
|
66
66
|
*
|
|
67
67
|
* @constant {RegExp} REGEX_JAPANESE_HAN - 用于匹配日文汉字的正则表达式对象。
|
|
68
68
|
*/
|
|
69
|
-
export const REGEX_JAPANESE_HAN = new RegExp(`^[${JAPANESE_HAN}
|
|
69
|
+
export const REGEX_JAPANESE_HAN = new RegExp(`^[${JAPANESE_HAN}A-Za-z0-9_\.
|
|
70
|
+
-]+$`, "u");
|
|
70
71
|
|
|
71
72
|
/**
|
|
72
|
-
*
|
|
73
|
+
* 检查一个字符串是否只包含日文汉字,和字母数字。
|
|
73
74
|
*
|
|
74
75
|
* @param {string} str - 需要检查的字符串。
|
|
75
76
|
* @return {boolean} 如果字符串只包含日文汉字,则返回true;否则返回false。
|
|
@@ -110,7 +111,7 @@ export const REGEX_UNICODE_HAN_ONLY = /^[\p{sc=Hani}]+$/u;
|
|
|
110
111
|
*/
|
|
111
112
|
export const strOnlyHani = (str) => REGEX_UNICODE_HAN_ONLY.test(str);
|
|
112
113
|
|
|
113
|
-
//
|
|
114
|
+
// 日文半角和全角平假名片假名,一般不会用
|
|
114
115
|
export const REGEX_HF_KANA_HIRA = /[\uff66-\uff9d]/u;
|
|
115
116
|
export const strHasHFKanaHira = (str) => REGEX_HF_KANA_HIRA.test(str);
|
|
116
117
|
|
|
@@ -119,7 +120,8 @@ export const strHasHFKanaHira = (str) => REGEX_HF_KANA_HIRA.test(str);
|
|
|
119
120
|
* @type {RegExp}
|
|
120
121
|
*/
|
|
121
122
|
export const REGEX_CHINESE_HAN_7000 = new RegExp(`^[${CHINESE_CHARS_7000}]+$`, "u");
|
|
122
|
-
export const REGEX_CHINESE_ONLY = new RegExp(`^[
|
|
123
|
+
export const REGEX_CHINESE_ONLY = new RegExp(`^[ A-Za-z0-9_\.
|
|
124
|
+
-\\u00a1-\\u00b7\\u2012-\\u2051\\u2150-\\u218f\\u2600-\\u27bf\\u3000-\\u303f\\u3040-\\u309f\\u30a0-\\u30ff\\uff01-\\uff20${CHINESE_CHARS_3500}]+$`, 'u');
|
|
123
125
|
export const strOnlyChinese = (str) => REGEX_CHINESE_ONLY.test(str)
|
|
124
126
|
/**
|
|
125
127
|
* 检查一个字符串是否只包含7000个最常用中文字符。
|
|
@@ -146,7 +148,7 @@ export const strHasChineseHan3500 = (str) => REGEX_CHINESE_HAN_3500_ANY.test(str
|
|
|
146
148
|
//-------------------------------------------------------------------------------
|
|
147
149
|
|
|
148
150
|
// 定义一个正则表达式,用于匹配包含任何Unicode朝鲜语字符的字符串
|
|
149
|
-
export const REGEX_HAS_HANGUL = /[\p{sc=
|
|
151
|
+
export const REGEX_HAS_HANGUL = /[\p{sc=Hangul}]/u;
|
|
150
152
|
/**
|
|
151
153
|
* 检查字符串中是否包含朝鲜语字符
|
|
152
154
|
* @param {string} str 需要检查的字符串
|
|
@@ -155,7 +157,7 @@ export const REGEX_HAS_HANGUL = /[\p{sc=Hang}]/u;
|
|
|
155
157
|
export const strHasHangul = (str) => REGEX_HAS_HANGUL.test(str);
|
|
156
158
|
|
|
157
159
|
// 定义一个正则表达式,用于匹配仅包含Unicode朝鲜语字符的字符串
|
|
158
|
-
export const REGEX_ONLY_HANGUL = /^[\p{sc=
|
|
160
|
+
export const REGEX_ONLY_HANGUL = /^[ A-Za-z0-9_\.-\\u00a1-\\u00b7\p{sc=Hangul}]+$/u;
|
|
159
161
|
/**
|
|
160
162
|
* 检查字符串是否仅由朝鲜语字符组成
|
|
161
163
|
* @param {string} str 需要检查的字符串
|
package/lib/unicode_data.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"chinese_7000": "一丁七万丈三上下不与丐丑专且丕世丘丙业丛东丝丞丢两严丧个丫中丰串临丸丹为主丽举乂乃久么义之乌乍乎乏乐乒乓乔乖乘乙乜九乞也习乡书乩买乱乳乾了予争事二亍于亏云互亓五井亘亚些亟亡亢交亥亦产亨亩享京亭亮亲亳亵亶亸人亿什仁仂仃仄仅仆仇仉今介仍从仑仓仔仕他仗付仙仞仟仡代令以仨仪仫们仰仲仳仵件价任份仿企伉伊伋伍伎伏伐休众优伙会伛伞伟传伢伤伥伦伧伪伫伯估伴伶伸伺似伽伾佃但位低住佐佑体何佗佘余佚佛作佝佞佟你佣佤佥佩佬佯佰佳佴佶佻佼佾使侃侄侈侉例侍侏侑侔侗供依侠侣侥侦侧侨侩侪侬侮侯侵便促俄俅俊俎俏俐俑俗俘俚俜保俞俟信俣俦俨俩俪俭修俯俱俳俵俶俸俺俾倌倍倏倒倓倔倘候倚倜倞借倡倥倦倨倩倪倬倭债倻值倾偃假偈偌偎偏偕做停健偬偲偶偷偻偾偿傀傅傈傍傣傥傧储傩催傲傻僇像僖僚僦僧僬僭僮僰僳僵僻儆儇儋儒儡儿兀允元兄充兆先光克免兑兔兕兖党兜兢入全八公六兮兰共关兴兵其具典兹养兼兽冀冁内冈冉册再冒冕冗写军农冠冢冤冥冬冯冰冱冲决况冶冷冻冼冽净凄准凇凉凋凌减凑凛凝几凡凤凫凭凯凰凳凶凸凹出击凼函凿刀刁刃分切刈刊刍刎刑划刖列刘则刚创初删判刨利别刭刮到刳制刷券刹刺刻刽刿剀剁剂剃剅削剌前剐剑剔剕剖剜剞剟剡剥剧剩剪副割剽剿劁劂劈劐劓力劝办功加务劢劣动助努劫劬劭励劲劳劾势勃勇勉勋勍勐勒勖勘勚募勤勰勺勾勿匀包匆匈匍匏匐匕化北匙匜匝匠匡匣匦匪匮匹区医匾匿十千卅升午卉半华协卑卒卓单卖南博卜卞卟占卡卢卣卤卦卧卫卮卯印危即却卵卷卸卺卿厂厄厅历厉压厌厍厕厘厚厝原厢厣厥厦厨厩厮去厾县叁参又叉及友双反发叔取受变叙叛叟叠口古句另叨叩只叫召叭叮可台叱史右叵叶号司叹叻叼叽吁吃各吆合吉吊同名后吏吐向吓吕吗君吝吞吟吠吡吣否吧吨吩含听吭吮启吱吲吴吵吸吹吻吼吾呀呃呆呈告呋呐呓呔呕呖呗员呙呛呜呢呤呦周呱呲味呵呶呷呸呻呼命咀咂咄咆咋和咎咏咐咒咔咕咖咙咚咛咝咣咤咦咧咨咩咪咫咬咭咯咱咳咴咸咻咽咿哀品哂哄哆哇哈哉哌响哎哏哐哑哓哔哕哗哙哚哝哞哟哥哦哧哨哩哪哭哮哲哳哺哼哽哿唁唆唇唉唏唐唑唔唛唝唠唢唣唤唧唪唬售唯唱唳唷唼唾唿啁啃啄商啉啊啐啕啖啜啡啤啥啦啧啪啬啭啮啰啴啵啶啷啸啻啼啾喀喁喂喃善喇喈喉喊喋喏喑喔喘喙喜喝喟喤喧喱喳喵喷喹喻喽喾嗄嗅嗉嗌嗍嗑嗒嗓嗔嗖嗜嗝嗞嗟嗡嗣嗤嗥嗦嗨嗪嗫嗬嗯嗲嗳嗵嗷嗽嗾嘀嘁嘈嘉嘌嘎嘏嘘嘚嘛嘞嘟嘡嘣嘤嘧嘬嘭嘱嘲嘴嘶嘹嘻嘿噀噌噍噎噔噗噙噜噢噤器噩噪噫噬噱噶噻噼嚄嚅嚆嚎嚏嚓嚚嚣嚯嚷嚼囊囔囚四囝回囟因囡团囤囫园困囱围囵囹固国图囿圃圄圆圈圉圊圜土圣在圩圪圬圭圮圯地圳圹场圻圾址坂均坊坌坍坎坏坐坑块坚坛坜坝坞坟坠坡坤坦坨坩坪坫坭坯坳坷坻坼垂垃垄垅垆型垌垍垒垓垛垞垟垠垡垢垣垤垦垧垩垫垭垮垯垱垲垸埂埃埋城埏埒埔埕埘埙埚埝域埠埭埯埴埸培基埼埽堂堆堇堉堋堌堍堑堕堙堞堠堡堤堪堰堵塃塄塆塈塌塍塑塔塘塞塥填塬塮塾墀墁境墅墈墉墒墓墙增墟墦墨墩墼壁壅壑壕壤士壬壮声壳壶壹处备复夏夔夕外夙多夜够夤夥大天太夫夬夭央夯失头夷夸夹夺夼奁奂奄奇奈奉奋奎奏契奔奕奖套奘奚奠奢奥奭女奴奶奸她好妁如妃妄妆妇妈妊妍妒妓妖妗妙妞妣妤妥妨妩妪妫妮妯妲妹妻妾姆姊始姐姑姒姓委姗姘姚姜姝姞姣姥姨姬姹姻姽姿威娃娄娅娆娇娈娉娌娑娓娘娜娟娠娣娥娩娱娲娴娶娼婀婆婉婊婕婚婞婢婧婪婳婴婵婶婷婺婿媒媚媛媪媲媳媵媸媾嫁嫂嫉嫌嫒嫔嫖嫘嫚嫜嫠嫡嫣嫦嫩嫪嫫嫱嬉嬖嬗嬴嬷孀子孑孓孔孕字存孙孚孛孜孝孟孢季孤孥学孩孪孬孰孱孳孵孺孽宁它宄宅宇守安宋完宏宓宕宗官宙定宛宜宝实宠审客宣室宥宦宪宫宬宰害宴宵家宸容宽宾宿寂寄寅密寇富寐寒寓寝寞察寡寤寥寨寮寰寸对寺寻导寿封射将尉尊小少尔尕尖尘尚尜尝尤尥尧尬就尴尸尹尺尻尼尽尾尿局屁层屃居屈屉届屋屎屏屐屑展屙属屠屡屣履屦屯山屹屺屿岁岂岈岌岍岐岑岔岖岗岘岙岚岛岢岣岩岫岬岭岱岳岵岷岸岿峁峂峄峋峒峙峡峣峤峥峦峧峨峪峭峰峻崂崃崆崇崎崔崖崛崞崟崤崦崩崭崮崴崽嵇嵊嵋嵌嵎嵖嵘嵚嵛嵝嵩嵫嵬嵯嵴嶂嶓嶙嶝嶷巅巉巍川州巡巢工左巧巨巩巫差巯己已巳巴巷巽巾币市布帅帆师希帏帐帑帔帕帖帘帙帚帛帜帝帡带帧席帮帱帷常帻帼帽幂幄幅幌幔幕幛幞幡幢幪干平年并幸幺幻幼幽广庀庄庆庇床庋序庐庑库应底庖店庙庚府庞废庠庤庥度座庭庳庵庶康庸庹庼庾廉廊廋廒廓廖廙廛廨廪延廷建廿开弁异弃弄弇弈弊弋式弑弓引弗弘弛弟张弥弦弧弩弭弯弱弹强弼彀归当录彖彗彘彝形彤彦彧彩彪彬彭彰影彳彷役彻彼往征徂径待徇很徉徊律徐徒徕得徘徙徜御徨循徭微徵德徼徽心必忆忉忌忍忏忐忑忒忖志忘忙忝忠忡忤忧忪快忭忮忱念忸忻忽忾忿怀态怂怃怄怅怆怊怍怎怏怒怔怕怖怙怛怜思怠怡急怦性怨怩怪怫怯怵总怼怿恁恂恃恋恍恐恒恕恙恚恝恢恣恤恧恨恩恪恫恬恭息恰恳恶恸恹恺恻恼恽恿悃悄悉悌悍悒悔悖悚悛悝悟悠悢患悦您悫悬悭悯悱悲悴悸悻悼情惆惇惊惋惑惕惘惚惜惝惟惠惦惧惨惩惫惬惭惮惯惰想惴惶惹惺愀愁愆愈愉愎意愔愕愚感愠愣愤愦愧愫愿慈慊慌慎慑慕慝慢慥慧慨慭慰慵慷憋憎憔憝憧憨憩憬憷憾懂懈懊懋懑懒懦懵懿戆戈戊戋戌戍戎戏成我戒戕或戗战戚戛戟戡戢戥截戬戮戳戴户戽戾房所扁扃扅扇扈扉扊手才扎扑扒打扔托扛扣扦执扩扪扫扬扭扮扯扰扳扶批扺扼找承技抃抄抉把抑抒抓抔投抖抗折抚抛抟抠抡抢护报抨披抬抱抵抹抻押抽抿拂拃拄担拆拇拈拉拊拌拍拎拐拒拓拔拖拗拘拙拚招拜拟拢拣拤拥拦拧拨择括拭拮拯拱拳拴拶拷拼拽拾拿持挂指挈按挎挑挖挚挛挝挞挟挠挡挢挣挤挥挦挨挪挫振挲挹挺挽捂捃捅捆捉捋捌捍捎捏捐捕捞损捡换捣捧捩捭据捯捶捷捺捻掀掂掇授掉掊掌掎掏掐排掖掘掠探掣接控推掩措掬掭掮掰掳掴掷掸掺掼掾揄揆揉揍揎描提插揖揞揠握揣揩揪揭揳援揶揸揽揾揿搀搁搂搅搋搌搏搐搒搓搔搛搜搞搠搡搦搪搬搭搴携搽摁摄摅摆摇摈摊摒摔摘摞摧摩摭摸摹摺摽撂撄撅撇撑撒撕撖撙撞撤撩撬播撮撰撵撷撸撺撼擀擂擅操擎擐擒擘擞擢擤擦攀攉攒攘攥攫攮支收攸改攻放政故效敉敌敏救敕敖教敛敝敞敢散敦敫敬数敲整敷文斋斌斐斑斓斗料斛斜斝斟斡斤斥斧斩斫断斯新方於施旁旃旄旅旆旋旌旎族旒旖旗无既日旦旧旨早旬旭旮旯旰旱时旷旸旺旻昀昂昃昆昉昊昌明昏易昔昕昙昝星映春昧昨昭是昱昴昵昶昼昽显晁晃晋晌晏晒晓晔晕晖晗晚晞晟晡晢晤晦晨普景晰晴晶晷智晾暂暄暅暇暌暑暖暗暝暧暨暮暴暹暾曈曙曛曜曝曦曩曰曲曳更曷曹曼曾替最月有朊朋服朐朔朕朗朘望朝期朦木未末本札术朱朴朵机朽杀杂权杆杈杉杌李杏材村杓杖杜杞束杠条来杧杨杪杭杯杰杲杳杵杷杻杼松板极构枇枉枋析枕林枘枚果枝枞枢枣枥枧枨枪枫枭枯枰枳枵架枷枸柁柃柄柈柏某柑柒染柔柘柙柚柜柝柞柠柢查柩柬柯柰柱柳柴柽柿栀栅标栈栉栊栋栌栎栏树栒栓栖栗栝栟校栩株栲栳样核根格栽栾桀桁桂桃桄桅框案桉桊桌桎桐桑桓桔桕桡桢档桤桥桦桧桨桩桫桴桶桷梁梃梅梆梏梓梗梢梦梧梨梭梯械梳梵梽梾检棁棂棉棋棍棒棕棘棚棠棣棨棬森棰棱棵棹棺棻棼椁椅椋植椎椐椑椒椟椠椤椭椰椴椽椿楂楔楗楚楝楞楠楣楦楫楮楯楷楸楹楼榀概榄榆榇榈榉榍榔榕榖榛榜榧榨榫榭榴榷榻槁槊槌槎槐槔槚槛槜槟槠槭槲槽槿樊樗樘樟模樨横樯樱樵樽樾橄橇橐橘橙橛橡橥橦橱橹橼檀檄檎檐檑檗檠檩檫檬檵欠次欢欣欤欧欲欸欺款歃歆歇歉歌歙止正此步武歧歪歹死歼殁殂殃殄殆殇殉殊残殍殒殓殖殚殛殡殣殪殳殴段殷殿毁毂毅毋母每毐毒毓比毕毖毗毙毛毡毪毫毯毳毵毹毽氅氆氇氍氏氐民氓气氕氖氘氙氚氛氟氡氢氤氦氧氨氩氪氮氯氰氲水永氽汀汁求汆汇汈汉汊汐汔汕汗汛汜汝汞江池污汤汨汩汪汭汰汲汴汶汹汽汾沁沂沃沄沅沆沈沉沌沏沐沓沔沘沙沚沛沟没沣沤沥沦沧沨沩沪沫沭沮沱河沸油治沼沽沾沿泃泄泅泉泊泌泐泓泔法泖泗泚泛泜泞泠泡波泣泥注泪泫泮泯泰泱泳泵泷泸泺泻泼泽泾洁洄洇洋洌洎洑洒洗洙洚洛洞洣津洧洨洪洫洮洱洲洳洴洵洹洺活洼洽派流浃浅浆浇浈浉浊测浍济浏浐浑浒浓浔浕浙浚浜浞浠浡浣浥浦浩浪浮浯浴海浸浼涂涅消涉涌涎涑涓涔涕涘涛涝涞涟涠涡涢涣涤润涧涨涩涪涫涮涯液涵涸涿淀淄淅淆淇淋淌淏淑淖淘淙淝淞淠淡淤淦淫淬淮深淳混淹添清渊渌渍渎渐渑渔渗渚渝渠渡渣渤渥温渫渭港渲渴游渺湃湄湉湍湎湔湖湘湛湜湝湟湨湫湮湲湾湿溃溅溆溇溉溏源溘溜溟溠溢溥溦溧溪溯溱溲溴溶溷溺溻溽滁滂滃滇滋滍滏滑滓滔滕滗滘滚滞滟滠满滢滤滥滦滨滩滪滫滴滹漂漆漉漏漓演漕漠漤漩漪漫漭漯漱漳漶漾潆潇潋潍潏潘潜潞潟潢潦潭潮潲潴潵潸潺潼潽澄澈澉澌澍澎澜澡澥澧澳澴澶澹澼激濂濉濑濒濞濠濡濮濯瀌瀍瀑瀚瀛瀣瀵瀹灌灏灞火灭灯灰灵灶灸灼灾灿炀炅炉炊炎炒炔炕炖炙炜炝炟炫炬炭炮炯炱炳炷炸点炻炼炽烀烁烂烃烈烊烘烙烛烜烝烟烤烦烧烨烩烫烬热烯烷烹烺烽焉焊焌焐焓焕焖焘焙焚焜焦焯焰焱然煅煊煌煎煜煞煤煦照煨煮煲煳煸煺煽熄熊熏熔熘熙熜熟熠熥熨熬熵熹燃燊燎燏燔燕燠燥燧燮燹爆爝爨爪爬爰爱爵父爷爸爹爻爽爿牁牂片版牌牍牒牖牙牚牛牝牟牡牢牦牧物牮牯牲牵特牺牾犀犁犄犊犋犍犏犒犟犨犬犯犰犴状犷犸犹狁狂狃狄狈狉狍狎狐狒狗狙狝狞狠狡狨狩独狭狮狯狰狱狲狳狴狷狸狺狻狼猁猃猄猊猎猕猖猗猛猜猝猞猡猢猥猩猪猫猬献猱猴猷猸猹猾猿獍獐獒獗獠獬獭獯獴獾玄率玉王玎玑玕玖玙玚玛玠玡玢玥玦玩玫玮环现玲玳玷玺玻珀珂珈珉珊珍珏珐珑珙珞珠珣珥珧珩班珰珲珽球琅理琇琉琎琏琐琚琛琢琤琥琦琨琪琫琬琮琯琰琳琴琵琶琼瑀瑁瑄瑕瑗瑙瑚瑛瑜瑞瑟瑢瑭瑰瑶瑾璀璁璃璆璇璈璋璎璐璘璜璞璟璠璧璨璩璪瓒瓘瓜瓞瓠瓢瓣瓤瓦瓮瓯瓴瓶瓷瓻瓿甄甍甏甑甓甘甙甚甜生甥用甩甪甫甬甭田由甲申电男甸町画甾畀畅畈畋界畎畏畔留畚畛畜略畦番畯畲畴畸畹畿疃疆疍疏疑疔疖疗疙疚疝疟疠疡疢疣疤疥疫疬疭疮疯疰疱疲疳疴疵疸疹疼疽疾痂痃痄病症痈痉痊痍痒痔痕痘痛痞痢痣痤痦痧痨痪痫痰痱痴痹痼痿瘁瘃瘅瘆瘊瘌瘐瘗瘘瘙瘛瘟瘠瘢瘤瘥瘦瘩瘪瘫瘭瘰瘳瘴瘵瘸瘼瘾瘿癀癃癌癍癔癖癜癞癣癫癯癸登白百皂的皆皇皈皋皎皑皓皖皤皮皱皲皴皿盂盅盆盈盉益盍盎盏盐监盒盔盖盗盘盛盟盥盦目盯盱盲直相盹盼盾省眄眇眈眉眊看眍眙眚真眠眢眦眨眩眬眭眯眵眶眷眸眺眼着睁睃睇睐睑睚睛睡睢督睥睦睨睫睬睹睽睾睿瞀瞄瞅瞌瞍瞎瞑瞒瞟瞠瞢瞥瞧瞩瞪瞬瞭瞰瞳瞵瞻瞽瞿矍矗矛矜矢矣知矧矩矫矬短矮石矶矸矻矽矾矿砀码砂砉砌砍砑砒研砖砗砘砚砜砝砟砣砥砧砭砮砰破砷砸砹砺砻砼砾础硁硅硇硌硎硐硒硕硖硗硚硝硪硫硬硭确硷硼碇碉碌碍碎碑碓碗碘碚碛碜碟碡碣碥碧碰碱碲碳碴碶碹碾磁磅磉磊磋磐磔磕磙磨磬磲磴磷磺礁礅礌礓礞礤礳礴示礼社祀祁祃祆祈祉祎祓祖祗祚祛祜祝神祟祠祢祥祧票祭祯祲祷祸祺祼祾禀禁禄禅禊福禚禤禧禳禹禺离禽禾秀私秃秆秉秋种科秒秕秘租秣秤秦秧秩秫秭积称秸移秽秾稀稂稃稆程稍税稔稗稚稞稠稣稳稷稻稼稽稿穄穆穑穗穰穴究穷穸穹空穿窀突窃窄窅窈窍窑窒窕窖窗窘窜窝窟窠窣窥窦窨窬窭窳窸窿立竑竖站竞竟章竣童竦竭端竹竺竽竿笃笄笆笈笊笋笏笑笔笕笙笛笞笠笤笥符笨笪笫第笮笱笳笸笺笼笾筅筇等筋筌筏筐筑筒答策筘筚筛筜筝筠筢筮筱筲筵筷筹筻筼签简箅箍箐箓箔箕算箜管箢箦箧箨箩箪箫箬箭箱箴箸篁篆篇篌篑篓篙篚篝篡篥篦篪篮篱篷篼篾簃簇簉簋簌簏簖簟簠簦簧簪簸簿籀籁籍米籴类籼籽粉粑粒粕粗粘粜粝粞粟粤粥粪粮粱粲粳粹粼粽精糁糅糇糈糊糌糍糕糖糗糙糜糟糠糨糯糵系紊素索紧紫累絮絷綦綮縻繁繄繇纂纛纠纡红纣纤纥约级纨纩纪纫纬纭纯纰纱纲纳纴纵纶纷纸纹纺纻纽纾线绀绁绂练组绅细织终绉绊绋绌绍绎经绑绒结绔绕绗绘给绚绛络绝绞统绠绡绢绣绤绥绦继绨绩绪绫续绮绯绰绲绳维绵绶绷绸绹绺绻综绽绾绿缀缁缂缃缄缅缆缇缈缉缌缎缏缑缒缓缔缕编缗缘缙缚缛缜缝缟缠缡缢缣缤缥缦缧缨缩缪缫缬缭缮缯缰缱缲缳缴缵缶缸缺罂罄罅罐网罔罕罗罘罚罟罡罢罨罩罪置罱署罴罹罽罾羁羊羌美羑羔羚羝羞羟羡群羧羯羰羲羸羹羼羽羿翁翅翊翌翎翔翕翘翙翚翟翠翡翥翦翩翮翯翰翱翳翻翼耀老考耄者耆耋而耍耐耒耔耕耖耗耘耙耜耠耢耥耦耧耨耩耪耰耱耲耳耵耶耷耸耻耽耿聂聃聆聊聋职聍聒联聘聚聩聪聱聿肃肄肆肇肉肋肌肓肖肘肚肛肝肟肠股肢肤肥肩肪肫肭肮肯肱育肴肷肺肼肽肾肿胀胁胂胃胄胆背胍胎胖胗胙胚胛胜胝胞胡胤胥胧胨胩胪胫胬胭胯胰胱胲胳胴胶胸胺胼能脂脆脉脊脍脎脏脐脑脒脓脔脖脘脚脞脬脯脱脲脶脸脾腆腈腊腋腌腐腑腒腓腔腕腙腚腠腥腧腩腭腮腰腱腴腹腺腻腼腽腾腿膀膂膈膊膏膑膘膙膛膜膝膦膨膪膳膺膻臀臁臂臃臆臊臌臑臜臣臧自臬臭至致臻臼臾舀舂舄舅舆舌舍舐舒舔舛舜舞舟舢舣舨航舫般舰舱舳舴舵舶舷舸船舻舾艄艇艋艘艚艟艨艮良艰色艳艴艺艽艾艿节芄芈芊芋芍芎芏芑芒芗芙芜芝芟芡芥芦芨芩芪芫芬芭芮芯芰花芳芴芷芸芹芼芽芾苁苄苇苈苊苋苌苍苎苏苑苒苓苔苕苗苘苛苜苞苟苠苡苣苤若苦苫苯英苴苷苹苻茀茁茂范茄茅茆茈茉茌茎茏茑茓茔茕茗茚茛茜茝茧茨茫茬茭茯茱茳茴茵茶茸茹茼荀荃荆荇草荏荐荑荒荔荙荚荛荜荞荟荠荡荣荤荥荦荧荨荩荪荫荬荭药荷荸荻荼荽莅莆莉莎莒莓莘莙莛莜莞莠莨莩莪莫莰莱莲莳莴莶获莸莹莺莼莽菀菁菂菅菇菊菌菏菔菖菘菜菝菟菠菡菥菩菪菰菱菲菹菼菽萁萃萄萋萌萍萎萏萑萘萜萝萤营萦萧萨萩萱萸萼落葆葑葓葖著葚葛葜葡董葩葫葬葭葱葳葵葶葸葺蒂蒇蒈蒉蒋蒌蒎蒗蒙蒜蒟蒡蒯蒲蒴蒸蒹蒺蒽蒿蓁蓂蓄蓇蓉蓊蓍蓐蓑蓓蓖蓝蓟蓠蓣蓥蓦蓬蓰蓼蓿蔌蔑蔓蔗蔚蔟蔡蔫蔬蔷蔸蔹蔺蔻蔼蔽蕃蕈蕉蕊蕖蕙蕞蕤蕨蕰蕲蕴蕹蕺蕻蕾薄薅薇薏薛薜薤薨薪薮薯薰薷薹藁藉藏藐藓藕藜藠藤藩藻藿蘅蘑蘖蘘蘧蘩蘸蘼虎虏虐虑虔虚虞虢虫虬虮虱虹虺虻虼虽虾虿蚀蚁蚂蚊蚋蚌蚍蚓蚕蚜蚝蚣蚤蚧蚨蚩蚪蚬蚯蚰蚱蚴蚶蚺蛀蛄蛆蛇蛉蛊蛋蛎蛏蛐蛑蛔蛘蛙蛛蛞蛟蛤蛩蛭蛮蛰蛱蛲蛳蛴蛸蛹蛾蜀蜂蜃蜇蜈蜉蜊蜍蜎蜒蜓蜕蜗蜘蜚蜜蜞蜡蜢蜣蜥蜩蜮蜱蜴蜷蜻蜾蜿蝇蝈蝉蝌蝎蝓蝗蝙蝠蝣蝤蝥蝮蝰蝴蝶蝻蝼蝽蝾螂螃螅螈螋融螗螟螠螣螨螫螬螭螯螳螵螺螽蟀蟆蟊蟋蟑蟒蟛蟠蟥蟪蟮蟹蟾蠃蠊蠋蠓蠕蠖蠡蠢蠲蠹蠼血衄衅行衍衔街衙衡衢衣补表衩衫衬衮衰衲衷衽衾衿袁袂袄袅袆袈袋袍袒袖袗袜袢袤袪被袭袯袱袼裁裂装裆裈裉裎裒裔裕裘裙裟裢裣裤裥裨裰裱裳裴裸裹裼裾褂褊褐褒褓褙褚褛褡褥褪褫褰褴褶襁襄襕襞襟襦襻西要覃覆见观规觅视觇览觉觊觋觌觎觏觐觑角觖觚觜觞解觥触觫觯觱觳言訄訇訾詈詹誉誊誓謇警譬计订讣认讥讦讧讨让讪讫训议讯记讲讳讴讵讶讷许讹论讼讽设访诀证诂诃评诅识诈诉诊诋诌词诎诏诐译诒诓诔试诖诗诘诙诚诛诜话诞诟诠诡询诣诤该详诧诨诩诫诬语诮误诰诱诲诳说诵请诸诹诺读诼诽课诿谀谁谂调谄谅谆谇谈谊谋谌谍谎谏谐谑谒谓谔谕谖谗谙谚谛谜谝谟谠谡谢谣谤谥谦谧谨谩谪谫谬谭谮谯谰谱谲谳谴谵谶谷豁豆豇豉豌豕豚象豢豨豪豫豳豸豹豺貂貅貉貊貌貔貘贝贞负贡财责贤败账货质贩贪贫贬购贮贯贰贱贲贳贴贵贶贷贸费贺贻贼贽贾贿赁赂赃资赅赆赇赈赉赊赋赌赍赎赏赐赑赓赔赖赘赙赚赛赜赝赞赟赠赡赢赣赤赦赧赪赫赭走赳赴赵赶起趁趄超越趋趑趔趟趣趱足趴趵趸趺趼趾趿跃跄跆跋跌跎跏跐跑跖跗跚跛距跞跟跣跤跨跪跬路跳践跶跷跸跹跺跻跽踅踉踊踌踏踔踝踞踟踢踣踩踪踬踮踯踱踵踶踹踺踽蹀蹁蹂蹄蹅蹇蹈蹉蹊蹋蹐蹑蹒蹓蹙蹜蹢蹦蹩蹬蹭蹯蹰蹲蹴蹶蹼蹽蹾蹿躁躅躇躏躐躔躜躞身躬躯躲躺车轧轨轩轪轫转轭轮软轰轱轲轳轴轵轶轷轸轹轺轻轼载轾轿辀辁辂较辄辅辆辇辈辉辊辋辌辍辎辏辐辑辒输辔辕辖辗辘辙辚辛辜辞辟辣辨辩辫辰辱边辽达迁迂迄迅过迈迎运近迓返迕还这进远违连迟迢迤迥迦迨迩迪迫迭迮述迷迸迹追退送适逃逄逅逆选逊逋逍透逐逑递途逖逗通逛逝逞速造逡逢逦逭逮逯逵逶逸逻逼逾遁遂遄遇遍遏遐遑遒道遗遘遛遢遣遥遨遭遮遴遵遽避邀邂邃邈邋邑邓邕邗邙邛邝邡邢那邦邪邬邮邯邰邱邳邴邵邶邸邹邺邻邽邾郁郄郅郇郈郊郎郏郐郑郓郗郚郛郜郝郡郢郤郦郧部郫郭郯郴郸都郾郿鄂鄄鄌鄘鄙鄞鄢鄣鄯鄱鄹酃酆酉酊酋酌配酎酏酐酒酗酚酝酞酡酢酣酤酥酦酩酪酬酮酯酰酱酲酴酵酶酷酸酹酽酾酿醅醇醉醋醌醍醐醑醒醚醛醢醪醭醮醯醴醵醺醾采釉释里重野量金釜鉴銎銮鋆鋈錾鍪鎏鏊鏖鐾鑫钆钇针钉钊钋钌钍钎钏钐钒钓钔钕钗钘钙钚钛钝钞钟钠钡钢钣钤钥钦钧钨钩钪钫钬钭钮钯钰钱钲钳钴钵钷钹钺钻钼钽钾钿铀铁铂铃铄铅铆铈铉铊铋铌铍铎铐铑铒铕铗铘铙铚铛铜铝铞铟铠铡铢铣铤铥铧铨铩铪铫铬铭铮铯铰铱铲铳铴铵银铷铸铹铺铻铼铽链铿销锁锂锃锄锅锆锇锈锉锊锋锌锎锏锐锑锒锓锔锕锖锗锘错锚锛锜锝锞锟锡锢锣锤锥锦锧锨锩锪锫锬锭键锯锰锱锲锴锵锶锷锸锹锻锽锾锿镀镁镂镃镄镅镆镇镈镉镊镋镌镍镎镏镐镑镒镓镔镖镗镘镚镛镜镝镞镠镡镢镣镤镥镦镧镨镩镪镫镬镭镯镰镱镲镳镴镵镶长门闩闪闭问闯闰闱闲闳间闵闶闷闸闹闺闻闼闽闾闿阀阁阂阃阄阅阆阇阈阉阊阋阌阍阎阏阐阑阒阔阕阖阗阘阙阚阜队阡阢阪阮阱防阳阴阵阶阻阼阽阿陀陂附际陆陇陈陉陋陌降限陔陕陛陟陡院除陧陨险陪陬陲陴陵陶陷隅隆隈隋隍随隐隔隗隘隙障隧隰隳隶隼隽难雀雁雄雅集雇雉雌雍雎雏雒雕雠雨雩雪雯雳零雷雹雾需霁霄霆震霈霉霍霎霏霓霖霜霞霪霭霰露霸霹霾青靓靖静靛非靠靡面靥革靰靳靴靶靸靺靼靽靿鞁鞅鞋鞍鞑鞒鞘鞠鞡鞣鞧鞨鞫鞭鞯鞲鞴韂韦韧韨韩韪韫韬韭音韵韶页顶顷顸项顺须顼顽顾顿颀颁颂颃预颅领颇颈颉颊颋颌颍颏颐频颓颔颖颗题颙颚颛颜额颞颟颠颡颢颤颥颦颧风飐飑飒飓飔飕飗飘飙飞食飧飨餍餐餮饔饕饥饧饨饩饪饫饬饭饮饯饰饱饲饳饴饵饶饷饸饹饺饻饼饽饿馀馁馃馄馅馆馇馈馉馊馋馌馍馏馐馑馒馓馔馕首馗馘香馥馨马驭驮驯驰驱驳驴驵驶驷驸驹驺驻驼驽驾驿骀骁骂骄骅骆骇骈骊骋验骍骎骏骐骑骒骓骖骗骘骙骚骛骜骝骞骟骠骡骢骣骤骥骧骨骰骶骷骸骺骼髀髁髂髅髋髌髑髓高髡髦髫髭髯髹髻鬃鬈鬏鬓鬟鬣鬯鬲鬶鬻鬼魁魂魃魄魅魆魇魈魉魍魏魑魔鱼鱽鱾鱿鲀鲁鲂鲃鲅鲆鲇鲈鲉鲊鲋鲌鲍鲎鲏鲐鲑鲔鲙鲚鲛鲜鲞鲟鲠鲡鲢鲣鲤鲥鲦鲧鲨鲩鲪鲫鲬鲭鲮鲯鲰鲱鲲鲳鲴鲵鲷鲸鲹鲺鲻鲼鲽鲾鳀鳁鳂鳃鳄鳅鳆鳇鳈鳉鳊鳌鳍鳎鳏鳐鳑鳓鳔鳕鳖鳗鳘鳙鳚鳜鳝鳞鳟鳡鳢鳤鸟鸠鸡鸢鸣鸤鸥鸦鸨鸩鸪鸫鸬鸭鸮鸯鸰鸱鸲鸳鸵鸶鸷鸸鸹鸺鸻鸼鸽鸾鸿鹀鹁鹂鹃鹄鹅鹆鹇鹈鹉鹊鹋鹌鹎鹏鹐鹑鹕鹗鹘鹚鹛鹜鹝鹞鹟鹠鹡鹣鹤鹦鹧鹨鹩鹪鹫鹬鹭鹮鹰鹱鹲鹳鹿麂麇麈麋麒麓麝麟麦麸麻麽麾黄黇黉黍黎黏黑黔默黛黜黝黟黠黢黥黧黩黯黹黻黼黾鼋鼍鼎鼐鼒鼓鼗鼙鼠鼢鼩鼫鼬鼯鼱鼷鼹鼻鼾齁齉齐齑齿龀龁龃龄龅龆龇龈龉龊龋龌龙龚龛龟龠",
|
|
3
3
|
"chinese_3500": "一丁七万丈三上下不与丐丑专且世丘丙业丛东丝丢两严丧个中丰串临丸丹为主丽举乃久么义之乌乍乎乏乐乒乓乔乖乘乙九乞也习乡书买乱乳乾了予争事二于亏云互五井亚些亡交亥亦产亩享京亭亮亲人亿什仁仅仆仇今介仍从仑仓仔他仗付仙代令以仪们仰仲件价任份仿企伊伍伏伐休众优伙会伞伟传伤伦伪伯估伴伶伸伺似佃但位低住佑体何余佛作你佣佩佳使侄侈例侍供依侠侣侥侦侧侨侮侯侵便促俄俊俏俐俗俘保信俩俭修俯俱俺倍倒倔倘候倚借倡倦债值倾假偎偏做停健偶偷偿傀傅傍储催傲傻像僚僧僵僻儒儡儿允元兄充兆先光克免兑兔党兜兢入全八公六兰共关兴兵其具典养兼兽冀内冈册再冒冕冗写军农冠冤冬冯冰冲决况冶冷冻净凄准凉凌减凑凛凝几凡凤凫凭凯凰凳凶凸凹出击函凿刀刁刃分切刊刑划列刘则刚创初删判刨利别刮到制刷券刹刺刻刽剂剃削前剑剔剖剥剧剩剪副割剿劈力劝办功加务劣动助努劫励劲劳势勃勇勉勋勒勘募勤勺勾勿匀包匆匈匕化北匙匠匣匪匹区医匾匿十千升午半华协卑卒卓单卖南博卜占卡卢卤卦卧卫印危即却卵卷卸卿厂厅历厉压厌厕厘厚原厢厦厨去县叁参又叉及友双反发叔取受变叙叛叠口古句另叨只叫召叭叮可台史右叶号司叹叼叽吁吃各吆合吉吊同名后吏吐向吓吕吗君吝吞吟吠否吧吨吩含听吭吮启吱吴吵吸吹吻吼呀呆呈告呐呕员呛呜呢周味呵呻呼命咆和咏咐咒咕咖咙咧咨咪咬咱咳咸咽哀品哄哆哈响哎哑哗哟哥哨哩哪哭哮哲哺哼唁唆唇唉唐唠唤唧唬售唯唱唾啃啄商啊啡啤啥啦啰啸啼喂善喇喉喊喘喜喝喧喳喷喻嗅嗓嗜嗡嗤嗦嗽嘀嘁嘉嘱嘲嘴嘶嘹嘿器噩噪嚎嚣嚷嚼囊囚四回因团囤园困囱围固国图圃圆圈土圣在地场圾址均坊坎坏坐坑块坚坛坝坞坟坠坡坤坦坪坯坷垂垃垄型垒垛垢垦垫垮埂埃埋城域埠培基堂堆堕堡堤堪堰堵塌塑塔塘塞填境墅墓墙增墨墩壁壕壤士壮声壳壶壹处备复夏夕外多夜够大天太夫夭央夯失头夷夸夹夺奄奇奈奉奋奏契奔奕奖套奠奢奥女奴奶奸她好如妄妆妇妈妒妓妖妙妥妨妹妻姆姊始姐姑姓委姚姜姥姨姻姿威娃娄娇娘娜娩娱娶婆婉婚婴婶婿媒媚媳嫁嫂嫉嫌嫡嫩嬉子孔孕字存孙孝孟季孤学孩孵孽宁它宅宇守安宋完宏宗官宙定宛宜宝实宠审客宣室宦宪宫宰害宴宵家容宽宾宿寂寄密寇富寒寓寝寞察寡寥寨寸对寺寻导寿封射将尉尊小少尔尖尘尚尝尤就尸尺尼尽尾尿局屁层居屈屉届屋屎屏屑展属屠屡履屯山屹屿岁岂岔岖岗岛岩岭岳岸峡峦峭峰峻崇崎崔崖崩崭嵌巍川州巡巢工左巧巨巩巫差己已巴巷巾币市布帅帆师希帐帕帖帘帚帜帝带席帮常帽幅幌幔幕幢干平年并幸幻幼幽广庄庆庇床序庐库应底店庙府庞废度座庭庵庶康庸廉廊廓延廷建开异弃弄弊式弓引弛弟张弥弦弧弯弱弹强归当录形彤彩彪彬彭彰影役彻彼往征径待很徊律徐徒得徘徙御循微德徽心必忆忌忍志忘忙忠忧快忱念忽忿怀态怎怒怔怕怖怜思怠急性怨怪怯总恃恋恍恐恒恕恢恤恨恩恬恭息恰恳恶恼悄悉悍悔悟悠患悦您悬悯悲悴悼情惊惋惑惕惜惠惦惧惨惩惫惭惯惰想惶惹愁愈愉意愕愚感愤愧愿慈慌慎慕慢慧慨慰慷憋憎憔憨憾懂懈懊懒懦戈戏成我戒或战戚截戳戴户房所扁扇手才扎扑扒打扔托扛扣执扩扫扬扭扮扯扰扳扶批扼找承技抄把抑抒抓投抖抗折抚抛抠抡抢护报披抬抱抵抹押抽拂拄担拆拇拉拌拍拐拒拓拔拖拗拘拙招拜拟拢拣拥拦拧拨择括拭拯拱拳拴拷拼拾拿持挂指按挎挑挖挚挟挠挡挣挤挥挨挪挫振挺挽捂捅捆捉捌捍捎捏捐捕捞损捡换捣捧据捶捷捺捻掀掂授掉掌掏掐排掖掘掠探接控推掩措掰掷掸掺揉揍描提插揖握揣揩揪揭援揽搀搁搂搅搏搓搔搜搞搪搬搭携摄摆摇摊摔摘摧摩摸摹撇撑撒撕撞撤撩撬播撮撰撵撼擂擅操擎擒擦攀攒攘支收改攻放政故效敌敏救教敛敞敢散敦敬数敲整敷文斋斑斗料斜斟斤斥斧斩断斯新方施旁旅旋族旗无既日旦旧旨早旬旭旱时旷旺昂昆昌明昏易昔昙星映春昧昨昭是昵昼显晃晋晌晒晓晕晚晤晦晨普景晰晴晶智晾暂暇暑暖暗暮暴曙曲更曹曼曾替最月有朋服朗望朝期朦木未末本术朱朴朵机朽杀杂权杆杈杉李杏材村杖杜束杠条来杨杭杯杰松板极构枉析枕林枚果枝枢枣枪枫枯架枷柄柏某柑柒染柔柜柠查柬柱柳柴柿栅标栈栋栏树栓栖栗校株样核根格栽桂桃桅框案桌桐桑档桥桦桨桩桶梁梅梆梗梢梦梧梨梭梯械梳检棉棋棍棒棕棘棚棠森棱棵棺椅植椎椒椭椰椿楔楚楞楣楷楼概榄榆榔榕榛榜榨榴槐槽樊樟模横樱橄橘橙橡橱檀檐檩檬欠次欢欣欧欲欺款歇歉歌止正此步武歧歪歹死歼殃殉殊残殖殴段殷殿毁毅母每毒比毕毙毛毡毫毯氏民氓气氛氢氧氨氮氯水永汁求汇汉汗汛汞江池污汤汪汰汹汽沃沈沉沐沙沛沟没沥沦沧沪沫沮河沸油治沼沽沾沿泄泉泊泌法泛泞泡波泣泥注泪泰泳泵泻泼泽洁洋洒洗洛洞津洪洲活洼洽派流浅浆浇浊测济浑浓浙浦浩浪浮浴海浸涂消涉涌涎涕涛涝涡涣涤润涧涨涩涮涯液涵淀淆淋淌淑淘淡淤淫淮深淳混淹添清渊渐渔渗渠渡渣渤温港渴游渺湃湖湘湾湿溃溅溉源溜溢溪溯溶溺滋滑滓滔滚滞满滤滥滨滩滴漂漆漏漓演漠漩漫漱漾潘潜潦潭潮澄澈澎澜澡澳激濒瀑灌火灭灯灰灵灶灸灼灾灿炉炊炎炒炕炫炬炭炮炸点炼烁烂烈烘烙烛烟烤烦烧烫热烹焊焕焙焚焦焰然煌煎煞煤照煮熄熊熏熔熙熟熬燃燎燕燥爆爪爬爱爵父爷爸爹爽片版牌牍牙牛牡牢牧物牲牵特牺犀犁犬犯状犹狂狈狐狗狞狠狡独狭狮狰狱狸狼猎猖猛猜猩猪猫猬献猴猾猿玄率玉王玖玛玩玫环现玲玷玻珊珍珠班球琅理琉琐琢琳琴琼瑞瑟瑰璃璧瓜瓢瓣瓤瓦瓮瓶瓷甘甚甜生甥用甩甫田由甲申电男甸画畅界畏畔留畜略畦番畴畸疆疏疑疗疙疚疟疤疫疮疯疲疹疼疾病症痊痒痕痘痛痢痪痰痴痹瘟瘤瘦瘩瘪瘫瘸瘾癌癞癣登白百皂的皆皇皮皱皿盅盆盈益盏盐监盒盔盖盗盘盛盟目盯盲直相盹盼盾省眉看真眠眨眯眶眷眼着睁睛睡督睦睬睹瞄瞎瞒瞧瞪瞬瞭瞳瞻矗矛矢知矩矫短矮石矾矿码砂砌砍研砖砚砰破砸砾础硅硕硝硫硬确硼碉碌碍碎碑碗碘碟碧碰碱碳碴碾磁磅磕磨磷磺礁示礼社祈祖祝神祟祠祥票祭祷祸禀禁福离禽禾秀私秃秆秉秋种科秒秕秘租秤秦秧秩秫积称秸移秽稀程稍税稚稠稳稻稼稽稿穆穗穴究穷空穿突窃窄窍窑窒窖窗窘窜窝窟窥窿立竖站竞竟章竣童竭端竹竿笆笋笑笔笙笛笤符笨第笼等筋筏筐筑筒答策筛筝筷筹签简箍箕算管箩箫箭箱篇篓篙篡篮篱篷簇簸簿籍米类籽粉粒粗粘粟粤粥粪粮粱粹精糊糕糖糙糜糟糠糯系紊素索紧紫累絮繁纠红纤约级纪纫纬纯纱纲纳纵纷纸纹纺纽线练组绅细织终绊绍绎经绑绒结绕绘给络绝绞统绢绣继绩绪续绰绳维绵绷绸综绽绿缀缅缆缎缓缔缕编缘缚缝缠缤缨缩缭缰缴缸缺罐网罕罗罚罢罩罪置署羊美羔羞羡群羹羽翁翅翎翔翘翠翩翰翻翼耀老考者而耍耐耕耗耘耙耳耸耻耽耿聂聊聋职联聘聚聪肃肄肆肉肋肌肖肘肚肛肝肠股肢肤肥肩肪肮肯育肴肺肾肿胀胁胃胆背胎胖胚胜胞胡胧胯胰胳胶胸能脂脆脉脊脏脐脑脓脖脚脯脱脸脾腊腋腌腐腔腕腥腮腰腹腺腻腾腿膀膊膏膘膛膜膝膨膳臀臂臊臣自臭至致臼舀舅舆舌舍舒舔舞舟航般舰舱舵舶舷船艇艘良艰色艳艺艾节芋芍芒芙芜芝芥芦芬芭芯花芳芹芽苇苍苏苔苗苛苞苟若苦苫英苹茁茂范茄茅茉茎茧茫茬茴茵茶茸荆草荐荒荔荚荞荠荡荣荤荧药荷荸莉莫莱莲获莹莺莽菇菊菌菜菠菩菱菲萄萌萍萎萝萤营萧萨落著葛葡董葫葬葱葵蒂蒋蒙蒜蒲蒸蒿蓄蓉蓖蓝蓬蔑蔓蔗蔚蔫蔬蔼蔽蕉蕊蕴蕾薄薇薛薪薯藏藐藕藤藻蘑蘸虎虏虐虑虚虫虱虹虽虾蚀蚁蚂蚊蚌蚓蚕蚜蚣蚤蚪蚯蛀蛆蛇蛉蛋蛔蛙蛛蛤蛮蛹蛾蜀蜂蜈蜒蜓蜕蜗蜘蜜蜡蜻蝇蝉蝌蝎蝗蝙蝠蝴蝶螃融螟螺蟀蟆蟋蟹蠕蠢血衅行衍衔街衙衡衣补表衩衫衬衰衷袁袄袋袍袒袖袜被袭袱裁裂装裆裕裙裤裳裸裹褂褐褒褥褪襟西要覆见观规觅视览觉角解触言誉誊誓警譬计订认讥讨让训议讯记讲讳讶许讹论讼讽设访诀证评诅识诈诉诊词译试诗诚话诞诡询该详诫诬语误诱诲说诵请诸诺读诽课谁调谅谆谈谊谋谍谎谐谒谓谚谜谢谣谤谦谨谬谭谱谴谷豁豆豌象豪豫豹豺貌贝贞负贡财责贤败账货质贩贪贫贬购贮贯贰贱贴贵贷贸费贺贼贾贿赁赂赃资赊赋赌赎赏赐赔赖赘赚赛赞赠赡赢赤赦赫走赴赵赶起趁超越趋趟趣足趴趾跃跋跌跑跛距跟跨跪路跳践跷跺踊踏踢踩踪踱蹂蹄蹈蹋蹦蹬蹭蹲躁躏身躬躯躲躺车轧轨轩转轮软轰轴轻载轿较辅辆辈辉辐辑输辕辖辙辛辜辞辟辣辨辩辫辰辱边辽达迁迂迄迅过迈迎运近返还这进远违连迟迫述迷迹追退送适逃逆选逊透逐递途逗通逛逝逞速造逢逮逸逻逼逾遂遇遍遏道遗遣遥遭遮遵避邀邑邓邢那邦邪邮邻郁郊郎郑部郭都鄙酌配酒酗酝酣酥酪酬酱酵酷酸酿醇醉醋醒采释里重野量金鉴针钉钓钙钝钞钟钠钢钥钦钧钩钮钱钳钻钾铁铃铅铆铐铛铜铝铡铣铭铲银铸铺链销锁锄锅锈锉锋锌锐错锚锡锣锤锥锦锨锭键锯锰锹锻镀镇镊镐镜镣镰镶长门闪闭问闯闰闲间闷闸闹闺闻闽阀阁阅阎阐阔队阱防阳阴阵阶阻阿附际陆陈陋陌降限陕陡院除陨险陪陵陶陷隅隆随隐隔隘隙障隧隶难雀雁雄雅集雇雌雏雕雨雪雳零雷雹雾需震霉霍霎霜霞露霸霹青靖静非靠靡面革靴靶鞋鞍鞠鞭韧韩韭音韵页顶顷项顺须顽顾顿颁颂预颅领颇颈颊频颓颖颗题颜额颠颤风飒飘飞食餐饥饭饮饰饱饲饵饶饺饼饿馁馅馆馋馍馏馒首香马驮驯驰驱驳驴驶驹驻驼驾骂骄骆骇验骏骑骗骚骡骤骨髓高鬓鬼魁魂魄魏魔鱼鲁鲜鲤鲫鲸鳄鳍鳖鳞鸟鸠鸡鸣鸥鸦鸭鸯鸳鸵鸽鸿鹃鹅鹉鹊鹏鹤鹦鹰鹿麦麸麻黄黍黎黑黔默鼎鼓鼠鼻齐齿龄龙龟",
|
|
4
|
-
"japanese_han": "
|
|
4
|
+
"japanese_han": "阿嗄哀埃挨欸啀皚癌矮藹靄艾隘愛碍曖瞹礙靉安庵菴鞍諳俺岸按案暗闇鮟黯昂凹敖嗷遨熬螯鏖鰲鼇拗媼襖傲奥奧墺懊澳八巴叭芭峇捌笆釛釟抜拔跋魃把耙罷覇霸擘白百佰柏栢瓸竡粨擺拝拜敗稗班般斑搬頒瘢阪坂板版鈑半伴扮拌絆瓣邦浜幇榜膀蚌傍棒蒡磅謗勹包苞枹胞褒襃雹薄宝保堡葆飽褓鴇寳寶抱豹蚫報鉋靤暴髱鮑爆陂卑杯盃悲碑鵯北孛貝背倍悖狽被備焙琲碚鞁輩憊糒鞴唄奔犇本畚笨崩絣繃迸逼鼻嬶匕比妣彼秕俾粃筆鄙必庇陛畢婢敝閇閉弼愎賁痺蓖裨幣弊碧箆蔽幤壁嬖篦篳薜避斃臂蹕髀璧襞躄贔辺編蝙邉邊鞭扁貶褊卞弁抃汳釆峅便変遍辧辨辮辯變杓髟彪標飆驃驫表俵鰾鼈別彬梹斌賓濱檳瀕繽擯殯鬢冫氷冰兵鋲丙秉柄炳禀稟鞆餅餠并並併垪病竝癶波玻剥菠袰碆鉢撥播伯帛狛勃亳秡舶博渤愽搏箔膊蔔駁駮跛簸檗蘗卜逋哺捕補不布怖歩埔捗部埠蔀餔簿擦囃偲猜才材財裁纔采彩採綵菜蔡参參喰餐驂残蚕殘慚慙蠶惨慘孱粲燦倉滄蒼艙藏操曹漕槽艚艸草懆冊册側厠廁惻測策筴岑曽噌曾層叉扠挿插茶靫槎察岔侘差詫拆釵犲柴豺儕辿覘禅蝉嬋廛潺禪蟾壥巉纏纒躔讒産諂闡懴懺顫昌娼猖菖椙常萇甞腸嘗塲嫦膓償厂昶場敞厰廠倡鬯唱悵暢蟐抄超鈔勦晁巣朝嘲樔潮炒車俥偖屮掣徹撤抻嗔瞋臣忱辰宸陳晨塵鍖趁齔襯讖称稱瞠蟶丞成呈承乗城乘晟程筬誠酲澂澄橙懲逞騁秤吃妛蚩笞喫嗤痴鴟鵄癡魑黐弛池岻持遅馳踟遲尺叺呎侈恥耻歯褫齒彳叱斥赤勅翅敕啻飭腟熾充冲沖舂憧衝艟虫崇蟲寵銃抽仇惆紬畴愁稠酬綢儔疇籌躊讎讐丑醜臭出初樗齣除芻厨耡蒭蜍廚鋤躇雛処杵楮楚儲礎俶畜處触黜觸矗揣啜巛川穿舩船傳椽舛喘串釧窓窗瘡床牀幢闖剏創愴吹炊垂埀陲捶椎槌錘鎚春椿鰆唇純淳脣蓴醇鶉惷蠢戳綽輟齪疵縒祠茨瓷詞慈辞磁雌辭此朿次伺刺賜匆怱葱聡樅聰从従從淙叢湊輳粗麁徂促猝酢蔟醋簇蹙蹴簒竄爨崔催摧榱伜忰倅粋翆脆悴淬萃毳瘁粹翠膵邨村皴存拵忖寸吋搓瑳磋撮蹉嵯嵳挫措錯荅搭迚妲怛沓剳逹答達靼燵韃打大呆歹逮代垈岱待怠殆玳帯帶紿袋貸戴黛靆丹担単眈耽單箪褝鄲擔殫襌胆疸亶膽旦但啖啗淡蛋弾蜑誕彈憚憺澹当當襠蟷鐺党黨宕档蕩盪礑蘯刀叨朷釖陦島祷搗嶋嶌槝導擣蹈到倒悼盗椡盜道稲稻纛得悳徳地的灯登燈等嶝磴鐙低羝堤滴廸狄迪荻笛滌髢嫡敵鏑糴覿邸底抵柢牴砥觝詆弟俤帝逓第棣睇蒂遞蔕締諦甸槇槙顛巓癲典点椣點佃店淀奠殿電澱癜簟凋彫貂雕簓鯛弔吊掉釣調跌迭垤喋畳耋牒蝶疂諜疉鰈疊丁叮疔酊釘頂鼎定訂碇聢錠冬東苳鮗鼕鶇鶫董諌恫洞凍胴動棟働都兜抖蚪斗豆荳逗鬥酘痘闘竇鬪督闍毒独涜読獨牘犢讀髑黷堵睹覩賭篤杜肚妬度渡鍍蠧蠹端短段断葮椴緞鍛斷堆兌対隊碓對鐓惇敦噸蹲沌盾遁鈍楯頓遯燉多夛咄鈬奪鐸朶躱陏柁柮堕舵惰墮婀痾囮俄娥峨峩莪訛蛾額鵝鵞譌鵈厄阨扼咢姶堊悪軛鄂惡愕萼遏蕚餓諤閼鍔顎鰐鶚齶恩儿而児兒粫轜尓耳迩珥爾餌邇二弍弐貮貳発發乏伐筏罰閥罸法琺髪髮帆番幡蕃旙旛繙翻藩飜凡氾籵煩樊燔膰繁礬鐇鷭反払仮返犯汎泛范梵笵販飯範匚方坊芳枋錺防妨房肪魴彷倣紡舫訪髣放妃非飛菲扉暃緋蜚霏肥腓朏匪斐榧翡誹吠沸狒肺廃費廢癈鯡分吩芬氛竕紛雰汾枌焚墳濆粉忿憤奮糞封風峯峰烽楓蜂瘋鋒豐逢馮諷奉俸鳳縫仏缶否殕夫柎趺麸孵敷膚麩弗伏佛孚扶芙彿怫拂服苻俘畉祓罘茯郛浮匐桴符袱幅福艀蜉鳧榑箙蝠髴輻黻甫府拊斧俛鳬俯釜釡椨腑腐輔嘸撫黼父付附咐坿阜訃負赴冨副婦傅富復腹複蝮賦縛輹鮒賻覆馥鰒呷垓該改丐乢盖葢概蓋漑甘杆肝坩柑竿疳扞敢桿稈感橄鰔干淦紺骭幹杠肛岡矼缸剛棡堽綱鋼崗港槓皋羔高皐睾膏槹杲槁稾稿縞藁告誥戈咯哥割彁歌擱謌鴿挌茖革格鬲葛隔膈閣閤骼舸个各個箇給根跟艮亘亙庚畊耕羮羹哽峺耿梗更工弓公功攻供肱宮恭蚣躬熕廾汞拱鞏共貢勾佝鈎溝鉤篝狗苟枸垢冓媾搆詬遘構覯購估呱姑孤沽柧菰蛄觚辜箍鴣鶻古谷股骨罟逧詁榾鈷鼓皷穀轂瞽蠱固故凅梏雇痼錮顧瓜刮聒颪寡卦挂掛罫褂乖掴拐枴夬怪恠官冠棺蒄関観關鰥觀莞管舘館丱貫慣潅樌盥灌罐鑵鸛光洸胱广広廣欟圭皈帰珪亀傀硅袿規瑰閨槻鮭龜歸癸軌鬼詭桂桧椢貴跪匱檜瞶櫃袞滾鯀棍咼郭堝鍋国圀國幗膕馘果菓椁槨裹過哈蛤咳孩骸還海塰醢亥害駭蚶酣歛鼾含邯函凾涵寒韓罕喊汗旱岾悍捍莟漢憾撼翰頷駻瀚杭垳航絎頏蒿嚆竓毫貉豪壕濠好号昊浩耗晧皓號鎬呵喝訶禾合何劾和河曷狢核盍荷啝涸渮盒粭輅闔鞨覈寉賀褐赫壑鶴黒痕很狠恨亨恆恒桁横衡鵆吽哄軣薨轟弘宏泓洪紅虹浤紘鴻黌訌閧鬨侯喉猴篌吼后厚後逅候乎虍呼忽惚戯弧狐胡壷斛壺湖葫楜瑚槲糊蝴醐餬鵠乕虎琥滸鯱互弖戸冱冴沍怙笏粐扈瓠滬護花埖椛硴嘩糀錵華滑猾磆譁化画崋畫話劃樺徊淮槐踝懐懷坏壊壞歓懽歡讙驩桓圜寰環鐶鬟緩幻奐宦浣患喚換渙煥豢澣肓荒慌皇凰隍黄徨惶湟遑煌篁蝗簧鍠鰉恍晃晄幌滉灰恢揮暉詼輝麾徽囘回廻茴迴蛔悔毀燬卉屶会恚恵彗晦喙惠絵匯彙會賄誨慧薈諱穢繪昏婚棔葷渾琿魂混溷諢豁活火夥或貨惑禍獲霍穫蠖癨机肌枅姫迹剞屐笄飢基喞朞勣畸稘跡箕撃畿稽緝機激積磯績蹟譏韲鶏饑癪躋鷄齎羇齏羈覊及吉岌汲即皀亟佶急笈級疾脊棘極集嫉楫瘠蕀輯藉蹐籍鶺几己掎幾戟擠彑旡伎妓忌技季既紀計剤記偈寂寄悸済祭継際禝稷冀劑曁薊髻濟繋薺覬繼霽驥加伽夾佳茄迦枷珈家浹痂笳袈葭跏嘉糘莢戛戞鋏頬甲岬胛假賈榎价架嫁價稼駕奸尖幵戔肩姦兼堅菅牋間煎蒹監箋樫緘艱殱殲鰹柬倹剪揀揃検減筧儉翦檢謇蹇瞼簡繭鹸件見建荐健剣栫剱釼腱践賎僭漸劍墹澗箭賤踐劒劔薦諫鍵餞擶濺繝艦鑑鑒江姜将將僵漿薑橿疆蒋奨奬獎講匠降絳彊醤杢交郊椒焦蛟嬌澆膠蕉礁鮫鵁驕鷦角佼狡皎脚湫絞剿僥餃撹徼矯纐攪叫教窖較酵轎鵤皆接掲階嗟椄街卩孑刧劫劼杰拮桀桝訐捷傑結睫節詰截碣竭潔羯姐解觧介戒芥屆届界畍疥借堺誡巾今斤金津矜衿筋釿襟尽侭菫僅盡緊槿瑾儘錦謹饉近勁晉晋浸進禁噤縉壗燼覲贐京茎亰荊莖旌経菁晶睛粳經兢精橸鯨驚井丼剄穽景頚憬頸警径浄徑逕淨脛竟敬痙靖境静靜瀞鏡競竸冂絅冏迥炯烱窘究糺糾赳啾鳩摎樛鬮九久灸玖韭酒韮旧臼咎疚柩柾厩救就廐舅廏舊鷲居拘狙苴疽砠娵掬裾雎駒鞠鞫局桔菊椈跼橘鵙咀沮矩挙筥蒟舉擧齟欅襷巨句拒苣具怐拠炬秬倨倶惧据距鉅聚劇踞據窶遽鋸颶醵懼娟捐涓鵑鐫卷呟捲劵巻倦狷眷雋絹羂亅决刔抉決倔崛掘桷訣厥絶覚獗蕨鴃爵譎蹶嚼矍覺攫钁君均軍桾菌鈞皸皹麕俊郡峻浚竣儁箘箟濬駿喀垰裃揩開凱剴慨楷鎧愾刊栞勘堪戡龕坎侃檻轗看瞰康慷糠鱇扛亢伉抗尻攷考拷栲犒靠苛柯珂科萪軻蝌顆殼可渇克刻客恪尅溘課剋肯墾懇齦吭坑鏗空倥箜孔恐控口叩扣冦寇釦刳枯哭桍堀窟苦庫袴酷夸誇胯跨圦快塊獪膾鱠寛款匡框筐筺抂狂誑况況昿砿絋絖鉱壙曠礦鑛窺虧奎逵隗馗揆葵魁喟愧潰餽簣饋坤昆崑菎焜褌鯤鶤悃梱壼困拡括筈蛞廓擴濶闊拉柆剌喇溂蝋辣臈臘鑞来來莱徠賚頼瀬藾癩籟婪嵐儖藍闌襤瀾籃繿蘭欄襴醂覧嬾懶覽攬欖纜燗濫爛郎狼廊琅榔瑯螂朗朖浪莨撈労牢勞癆醪老咾姥蛯潦烙酪肋仂楽樂了勒雷擂縲櫑羸罍耒塁誄磊蕾儡壘泪涙累類崚楞稜薐冷哩厘狸梨犁犂漓蜊璃竰貍糎黎罹藜離鯏蠡籬驪礼李里俚浬理裡裏豊禮鯉醴鱧力立吏朸利励例苙隶俐栃茘悧栗砺莅莉唳笠粒蛎痢詈慄暦歴綟勵篥隷檪癘隸櫟瀝礪麗櫪礫糲蠣儷癧轢轣靂倆怜連嗹廉漣蓮奩匳憐聨濂縺聯鎌簾斂臉恋楝煉練錬鏈瀲鰊戀良凉梁涼椋粮粱踉糧両兩裲魎亮喨量輌諒輛鍄簗撩聊僚寥寮遼暸燎療繚鐐鷯蓼料廖瞭毟列劣冽洌埒埓烈捩猟裂獵鬣林淋琳痳鄰隣燐霖臨鱗麟菻凛凜廩懍吝悋賃藺躙躪伶囹苓玲凌陵羚聆菱蛉鈴零綾蔆霊澪鴒齢齡櫺靈領嶺令溜畄流留琉硫旒榴瑠劉瘤璢嚠瀏鏐鰡柳鉚六澑霤餾竜隆滝嶐槞窿篭龍瀧蘢朧瓏籠聾隴壟哢婁僂楼樓螻髏簍陋漏瘻鏤芦枦炉舮鈩盧廬瀘蘆櫨爐臚艫轤鑪顱鱸鹵虜滷魯櫓艪陸淕鹿禄祿勠碌賂路漉戮蕗録轆麓露鷺閭櫚驢呂侶旅梠屡絽膂履褸縷律率葎緑慮濾鑢巒攣欒臠鑾鸞卵乱亂掠略畧擽侖倫崘崙淪棆綸輪論螺羅騾蘿邏鑼裸瘰洛珞絡落犖駱媽麻痲蟆蟇馬瑪碼罵嘛埋霾買売麦脉脈麥賣邁悗蛮瞞謾饅鬘鰻蠻満滿曼幔慢漫蔓縵鏝蘰忙芒尨氓盲茫鋩莽蟒蠎猫毛矛茅茆旄髦錨卯夘昴皃冐茂冒耄袤帽貿楙瑁貌懋麼沒没枚苺栂眉莓梅媒嵋楳煤黴毎美躾妹昧袂眛媚寐魅椚門捫悶懣們虻萌盟蒙甍儚濛曚朦檬矇艨猛孟梦夢弥祢迷袮謎彌糜縻麋禰靡瀰米弭濔冖糸汨泌祕秘密覓幎蜜冪樒謐櫁羃宀杣眠棉綿緜免勉眄娩冕湎黽緬面麪麺苗描杪眇秒渺緲藐妙廟滅蔑民岷旻罠緡皿泯敏閔愍憫名明茗冥溟暝榠銘鳴瞑螟酩命掵謬摸模膜摩磨糢謨魔抹末歿沫茉陌秣莫寞漠獏貊靺黙墨默貘驀麿牟眸鉾謀繆鴾某母牡姆拇畆畝木目凩沐牧苜募墓幕睦慕暮穆拏拿雫那吶肭娜衲納捺靹乃廼迺奈耐男南娚喃遖楠難赧嚢曩呶撓蟯鐃悩脳惱嫐瑙腦碯閙鬧臑訥餒内恁嫩能尼怩泥籾倪猊貎霓鯢麑擬昵逆匿眤溺睨膩拈年鮎黏鯰捻撚碾輦廿念唸娘嬢孃醸釀鳥嫋蔦樢嬲尿捏涅噛聶蘖囁齧囓躡鑷顳苧寧凝嚀獰檸聹佞侫濘牛忸狃紐鈕農儂濃膿弄耨奴孥駑努弩怒女衂衄暖煖虐瘧梛儺搦諾懦糯哦欧殴歐毆甌鴎謳偶嘔藕葩杷爬琶怕袙拍俳徘排牌派湃萠潘攀爿槃盤磐蟠蹣判叛畔袢滂厖旁胖抛萢咆垉庖袍匏鞄泡炮疱皰砲麭胚陪培裴賠沛佩旆珮配霈轡噴盆怦烹澎朋堋弸彭棚硼蓬篷膨鵬捧椪丕批披砒紕劈霹皮枇毘疲豼琵脾貔羆匹疋痞癖屁睥辟僻嚊甓譬闢偏篇翩胼諚駢諞片騙剽慓飃飄嫖瓢殍縹票漂暼瞥丿貧頻嬪顰品牝聘娉平凭坪岼泙苹屏瓶萍塀評憑鮃蘋坡泊溌頗婆岶迫珀破粕魄醗剖抔仆攴攵撲鯆匍脯菩葡蒲僕璞濮朴圃浦普溥樸譜蹼舖舗鋪瀑曝七妻凄栖悽戚淒萋期棲欺褄榿漆槭蹊祁圻岐其奇斉祇祈俟竒耆埼崎淇畦跂嵜棊棋祺碁碕旗齊臍騎騏麒籏纃鰭乞企杞豈起啓綮綺气気迄弃汽泣契砌氣訖棄葺噐憇器憩磧緕袷峠鞐恰洽千仟阡瓩竏粁牽僉愆鉛慳搴遷謙簽孅騫籤韆拑前虔乾鉗箝銭潛潜蕁錢黔濳凵浅淺遣譴鑓欠刋俔茜倩嵌慊塹歉槧篏籖羌椌腔槍錆蹌鎗鏘強墻薔檣牆艢搶襁繦悄敲橇磽鍬喬僑憔蕎樵橋巧愀峭殻誚鞘竅翹且切妾怯窃挈篋竊侵衾欽親駸芹秦琴禽勤擒檎懃寝寢沁青卿清傾蜻輕勍情晴檠黥頃請慶磬謦穹蛩蛬煢跫窮瓊丘邱秋蚯萩楸穐鞦鰌鰍龝囚求泅酋逎逑毬球遒裘区曲屈岨區蛆躯嶇駆駈麹髷趨驅劬渠瞿衢取娶齲去趣闃悛圈圏全泉拳惓痊湶筌楾詮蜷銓権權顴犬綣券勧勸椦缺却卻埆雀塙愨確闕鵲逡裙羣群然髯燃冉苒染穣禳穰壌壤攘譲讓蕘橈饒擾遶繞惹熱人仁壬忍荏荵棯稔綛刃刄仞仭任扨妊姙衽袵靭靱認仍日茸戎栄容絨溶蓉榕榮熔融鎔蠑冗穃柔揉糅蹂鞣肉宍如茹儒嬬孺濡襦蠕汝乳辱入杁洳溽蓐鳰褥縟阮軟蕊蕋蘂蚋瑞睿鋭叡閏閠潤若弱嵶蒻鰯鶸撒洒灑卅颯薩塞腮顋鰓賽三傘糂繖散桑喪掻騒騷掃嫂髞色渋嗇瑟澁澀穡森僧沙刹砂殺紗莎裟鯊厦廈歃霎篩晒山彡圸刪杉芟苫衫柵珊笘閊跚煽潸羶陝閃汕疝扇善擅膳繕譱贍商傷慯殤觴賞上尚裳梢焼稍蛸燒鮹勺芍韶少劭邵哨紹奢舌蛇舍捨社舎射渉設赦摂慴歙懾攝麝申伸身呻娠深紳蔘鯵鰺什甚神榊鰰沈哂矧審瀋腎愼慎椹罧蜃滲升生声昇枡牲陞笙甥聲縄繩省剰盛剩勝聖尸失虱屍施師釶湿葹獅蓍詩鉈蝨濕鰤十石辻竍実拾食時寔塒蒔鉐實蝕鮖識史矢豕使始屎笶榁駛士氏丗世仕市示似式亊事侍室恃拭是柿舐逝視釈弑貰勢嗜筮試軾飾誓適餝噬諡螫謚釋匙収收手守首寿受狩售授痩壽綬獣獸殳抒叔枢倏書殊梳淑菽疎疏舒踈樞蔬輸孰塾熟贖鼡属暑黍署蜀鼠薯曙藷屬朮戍束述恕庶術数竪墅漱數豎樹刷衰帥蟀閂栓双霜雙孀爽誰水税睡閖吮舜順蕣瞬説妁朔槊碩爍鑠厶司私思斯絲厮嘶廝撕死巳四寺祀価泗笥耜竢覗嗣肆飼駟松枩淞菘嵩鬆悚竦慫聳宋送訟頌誦鎹捜搜溲蒐艘叟嗾薮藪籔嗽甦酥蘇蘓俗夙泝素速宿粛粟訴塑溯肅遡愬謖酸蒜算夊綏雖隋随隨膸髄髓砕祟埣遂歳碎隧穂燧穗邃孫笋隼筍損唆娑梭蓑簑簔縮所索瑣鎖他它塔獺拓搨榻撻踏鞜鞳闥胎台抬苔臺颱駘擡薹太汰泰態貪攤灘覃痰潭談壇曇餤檀壜譚罎坦袒毯炭探嘆歎湯蝪唐堂棠塘溏糖螳帑淌儻夲涛掏絛滔濤韜饕迯逃桃陶梼淘萄綯檮討套特慝疼滕縢謄藤騰籐籘剔梯啼提蹄醍題鵜体躰軆體戻剃洟悌涕逖替楴裼薙嚏嚔笹天添田沺恬畋畑畠甜填鈿碵鴫鷆鷏忝殄腆靦佻挑条迢條蓚蜩髫齠窕誂眺跳糶帖貼僣銕鐡鐵餮庁汀听町聴廰聽廳廷亭庭停渟霆甼挺梃艇囲通樋同桐童粡僮銅潼橦瞳桶筒統痛慟偸鍮亠投骰頭透綉凸禿突図徒荼途屠菟塗圖跿土吐兎兔莵汢湍猯団團慱摶槫彖推頽腿退蛻褪呑暾屯豚飩臀托託脱佗陀沱駄駝鴕騨妥楕橢柝唾蛙窪娃瓦膃襪韈瓲哇歪外弯湾蜿豌彎灣丸完玩笂頑翫宛挽婉晩椀皖碗綰輓万卍杤萬腕汪亡王网往徃枉罔惘網魍妄忘旺望危威萎逶隈微縅薇巍鰄囗韋唯帷惟圍嵬幃違維磑鮠尾委偉偽梶猥葦痿僞緯鮪未位味為畏胃尉渭爲蔚慰蝟衛衞謂魏温榲瘟饂鰛鰮匁文紋蚊聞馼刎吻紊穏穩問翁鶲蓊瓮甕倭渦萵窩蝸我沃臥幄握渥斡齷汚巫屋烏嗚誣无毋吾呉唔茣梧珸無蜈蕪鵐五午伍忤武侮牾舞廡憮鵡兀勿戊物務悟晤塢寤誤霧鶩錻夕兮吸扱汐西希昔析唏奚息悉惜晞欷淅渓晰犀稀翕溪皙僖煕熄熈熙蜥嬉膝憙熹羲錫犠蟋谿釐鯑醯曦犧席習蓆覡隰檄襲洗徙喜橲禧璽鱚匸系係咥盻郤細隙禊潟戲鬩谺瞎蝦鰕匣侠狎峡狭陜峽狹遐暇瑕轄鍜霞黠下圷夏梺嚇罅仙先掀僊銛暹繊鮮纎纖弦咸涎絃舷啣閑嫌銜嫺嫻賢癇鹹険筅尠跣銑險嶮燹顕蘚顯限県陥陷現献羨腺蜆綫線憲縣餡獻霰相香郷啌廂湘箱襄驤庠祥翔詳享想餉響饗鱶向巷象項像橡嚮哮宵消逍梟硝銷霄蕭瀟簫囂驍淆小暁筱曉篠孝肖効咲效校笑傚嘯恷些楔歇蝎蠍邪協恊挟挾脅脇偕斜携鞋頡諧襭攜纈写冩寫泄卸洩屑屓械紲渫絏緤廨懈薤邂燮褻謝瀉蟹蠏心忻芯辛欣新噺薪馨鐔信釁星惺猩腥刑行形型醒杏姓幸性倖興哘裄凶匂兄兇匈恟洶胸雄熊夐休修烋羞脩貅鮴朽秀岫袖嗅銹繍鏥吁戌胥虚須嘘墟需歔繻鬚徐栩許旭序叙恤洫敍敘勗壻婿絮煦続蓄緒聟續蓿宣軒喧愃萱暄諠玄痃旋懸選癬眩衒絢鉉削靴薛穴斈学學鷽雪膤艝轌鱈血謔熏勲勳薫燻醺旬巡恂洵荀尋循詢馴潯迅徇殉訊訓巽遜蕈圧押椏鴉錏鴨壓鐚牙芽崕崖涯睚衙唖雅襾亜亞軋訝呀烟淹焉湮煙嫣樮閹篶臙延妍芫言岩沿炎研莚嵒筵蜒塩厳閻檐顏顔嚴巌簷巖鹽奄衍偃掩眼罨演儼魘黶咽彦宴晏偐焔堰硯雁鳫厭鴈燕諺験嚥艶贋軅讌驗艷央泱殃秧鞅鴦羊佯昜洋陽揚暘楊煬瘍仰痒養癢怏恙様漾樣瀁幺夭妖殀腰邀爻尭肴姚堯揺徭搖遙遥瑤瑶銚嶢窯窰謠謡杳咬窈要葯薬曜燿藥耀鷂鑰掖椰噎耶揶爺也冶埜野叶曳曵夜頁液腋葉楪業曄謁鵺靨一弌辷伊衣医壱依猗畩壹揖欹噫醫夷沂宜怡姨痍移萓詑詒貽飴疑儀遺嶬彜頤嶷彝乙已以矣苡倚椅礒艤蟻乂弋刈亦屹佚役抑杙苅邑易奕疫衵悒益異翊翌訳逸軼意溢義肄裔詣蜴駅億毅誼憶懌殪縊檍翳翼臆鮨藝鎰繹鯣譯議饐囈鷁懿驛因姻茵音殷氤陰堙蔭慇吟垠婬寅崟淫銀霪廴尹引蚓飲隠飮隱印胤酳憖応英桜珱瑛霙鴬嬰應膺嚶罌櫻瓔鶯纓軈鷹鸚迎盈営蛍塋楹瑩蝿營螢瀛蠅贏郢影潁穎頴映暎硬庸傭雍慵壅擁廱癰永甬咏怺泳俑勇涌湧詠蛹慂踊踴鯒用攸幽悠憂優尢尤由犹油肬疣郵蚰游猶遊楢猷蕕蝣友有酉莠黝又右幼佑侑囿宥柚祐釉誘鼬込迂紆唹淤于余於盂臾兪禺舁娯桙萸隅魚嵎愉揄渝畭腴逾愚楡瑜虞漁蝓諛餘覦踰歟輿鯲与予宇羽雨俣挧禹圄圉傴與語嶼麌齬玉聿芋育郁峪浴域欲喩寓御硲裕遇飫馭愈誉預嫗毓獄瘉慾豫燠蕷諭閾鴪礇禦癒譽鬻鷸欝鬱冤悁寃渊渕淵鳶鴛元円垣爰原員袁援湲園圓源猿縁薗轅櫞遠鋺苑怨院媛掾愿願曰約月戉岳悦粤越鉞閲嶽龠躍籥暈贇云伝芸紜耘雲允隕殞褞孕運慍韵熨薀蘊韻繧匝拶紮雑襍雜咋災哉栽宰再在載縡簪攅暫賛贊鏨讃讚賍臧蔵贓奘弉葬臓臟遭糟鑿早蚤棗澡繰藻造梍慥噪燥竃譟躁竈択沢則責嘖擇澤簀仄昃戝賊怎譖譛囎増憎甑贈扎柤査渣札閘箚鮓乍咤柞炸詐搾粂斎摘齋宅窄砦債寨沾旃栴粘霑氈瞻譫展斬盞嶄輾占桟站棧湛戦綻戰張章彰樟璋鱆長掌漲丈仗杖帳脹障嶂瘴招昭爪找沼召兆笊棹詔照罩肇趙櫂遮折哲粍晢摺輒磔輙蟄謫轍者赭褶柘浙這蔗鷓着著珍珎貞眞真砧針偵遉斟甄禎蓁榛碪箴臻鍼枕畛疹袗診軫陣振朕酖賑震鴆鎭鎮凧争征爭烝崢筝蒸鉦徴箏錚鯖拯掟整正政症幀証諍鄭證之支卮汁芝巵枝知肢祗胝脂隻梔椥稙蜘織直姪値埴執植殖跖鉄職蹠躑夂止只凪旨阯址帋沚祉咫指枳紙趾黹至志豸制帙治炙峙陟桎秩致畤痔窒貭智滞痣蛭軽稚置輊雉滯製誌幟摯膣質緻隲擲櫛穉贄躓鷙中忠柊衷終螽鍾鐘冢塚腫種踵仲狆重衆州舟周洲掫週粥駲軸肘帚菷箒呪咒宙冑昼紂胄酎晝甃皺籀驟朱侏洙茱株珠猪蛛誅潴銖諸豬瀦竹竺逐舳築燭躅丶主渚煮嘱麈囑矚佇住助杼注柱祝竚莇紵筑註貯墸箸鋳駐鑄抓専專甎磚転轉撰篆賺饌囀庄妝荘莊粧装裝壮壯状撞隹追錐騅惴畷墜綴縋錣贅諄准凖準卓拙倬捉灼斫酌啄啅濁擢濯孜咨姿茲孳滋粢觜貲資緇輜諮錙髭鯔仔姉呰梓紫滓字自恣眥眦漬子宗棕椶綜踪蹤鑁偬惣愡総總粽縦縱陬鄒諏赱走奏租卆足卒族鏃阻俎爼祖組詛鑽纂纉鑚樶嘴酔最罪醉枠尊墫遵樽鱒噂昨琢筰左佐作坐祚胙座做蓙瓧瓰瓱甅絆",
|
|
5
5
|
"japanese_kana": "ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖゝゞゟァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヷヸヹヺーヽヾヿ"
|
|
6
6
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mediac",
|
|
3
|
-
"version": "1.5.
|
|
3
|
+
"version": "1.5.2",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "MediaCli is a multimedia file processing tool that utilizes ffmpeg and exiftool, among others, to compress/convert/rename/delete/organize media files, including images, videos, and audio.",
|
|
6
6
|
"main": "index.js",
|
|
@@ -75,6 +75,7 @@
|
|
|
75
75
|
"sanitize-filename": "^1.6.3",
|
|
76
76
|
"sharp": "^0.33.2",
|
|
77
77
|
"throat": "^6.0.2",
|
|
78
|
+
"upath": "^2.0.1",
|
|
78
79
|
"which": "^4.0.0",
|
|
79
80
|
"yargs": "^17.7.2"
|
|
80
81
|
},
|
package/scripts/media_cli.js
CHANGED
|
@@ -39,11 +39,11 @@ async function main() {
|
|
|
39
39
|
// https://github.com/yargs/yargs/blob/master/docs/advanced.md
|
|
40
40
|
const ya = yargs(process.argv.slice(2));
|
|
41
41
|
ya.usage("Usage: $0 <command> <input> [options]")
|
|
42
|
-
.positional("input", {
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
})
|
|
42
|
+
// .positional("input", {
|
|
43
|
+
// describe: "Input folder that contains files",
|
|
44
|
+
// type: "string",
|
|
45
|
+
// normalize: true,
|
|
46
|
+
// })
|
|
47
47
|
// 测试命令,无作用
|
|
48
48
|
.command(
|
|
49
49
|
["test", "tt", "$0"],
|
|
@@ -143,6 +143,8 @@ async function main() {
|
|
|
143
143
|
.command(await import("../cmd/cmd_fixname.js"))
|
|
144
144
|
// 命令 智能解压ZIP文件,处理文件名乱码问题
|
|
145
145
|
.command(await import("../cmd/cmd_zipu.js"))
|
|
146
|
+
// 命令 乱码解析,猜测编码,输出可能正确的字符串
|
|
147
|
+
.command(await import("../cmd/cmd_decode.js"))
|
|
146
148
|
.count("verbose")
|
|
147
149
|
.alias("v", "verbose")
|
|
148
150
|
.alias("h", "help")
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import upath from 'upath';
|
|
2
|
+
import * as mf from '../lib/file.js';
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
let files = await mf.walk(process.argv[2], {
|
|
6
|
+
needStats: true,
|
|
7
|
+
});
|
|
8
|
+
for (const f of files) {
|
|
9
|
+
console.log('\n-----------------\n')
|
|
10
|
+
console.log(f.path)
|
|
11
|
+
console.log(upath.normalize(f.path))
|
|
12
|
+
console.log(upath.normalizeSafe(f.path))
|
|
13
|
+
console.log(upath.normalizeTrim(f.path))
|
|
14
|
+
}
|
package/scripts/unicode_test.js
CHANGED
|
@@ -32,13 +32,13 @@ function normalizeChars(filename = 'messy_hanzi.txt') {
|
|
|
32
32
|
const dataDir = path.join(path.dirname(__dirname), 'data')
|
|
33
33
|
const libDir = path.join(path.dirname(__dirname), 'lib')
|
|
34
34
|
// const fileChars = fs.readFileSync(path.join(dataDir, 'messy_sample.txt'), 'utf8')
|
|
35
|
-
const chars = enc.REGEX_MESSY_CJK + ''
|
|
35
|
+
const chars = enc.REGEX_MESSY_CJK + '堄拲儗儞亃僱僄僊儖'
|
|
36
36
|
const valid = []
|
|
37
37
|
// 排除1 汉字属于中国常用汉字7000字的范围
|
|
38
|
-
// 排除2 汉字属于日本常用汉字2100字的范围
|
|
38
|
+
// x排除2 汉字属于日本常用汉字2100字的范围 !jpHanzi.includes(c)
|
|
39
39
|
// 这样可以确保输出的汉字是不常用的
|
|
40
40
|
for (const c of chars) {
|
|
41
|
-
if (!c7000.includes(c) &&
|
|
41
|
+
if (!c7000.includes(c) && !/[\s]+/u.test(c)) {
|
|
42
42
|
valid.push(c)
|
|
43
43
|
}
|
|
44
44
|
}
|
|
@@ -85,10 +85,10 @@ function fixEnc(str) {
|
|
|
85
85
|
|
|
86
86
|
let fromStr = ''
|
|
87
87
|
// 这个特殊,解码出来有emoji JS转换会乱码
|
|
88
|
-
// 2024-01-10 06-00大鳳背面座位
|
|
88
|
+
// 2024-01-10 06-00大鳳背面座位
|
|
89
89
|
// 2024-01-10 06-00螟ァ魑ウ閭碁擇蠎ァ菴郊生
|
|
90
90
|
// messyStr = '2024-01-10 06-00螟ァ魑ウ閭碁擇蠎ァ菴郊生'
|
|
91
|
-
fromStr = '
|
|
91
|
+
fromStr = '│ │ DOT_像度画像です(PNG ×PX)-_49_Z4K'
|
|
92
92
|
const toStr = process.argv.length > 2 ? fixEnc(process.argv[2]) : fixEnc(fromStr)
|
|
93
93
|
showStatus(fromStr, 'BEFORE FIX')
|
|
94
94
|
showStatus(toStr, 'AFTER FIX')
|
package/scripts/zip_test.js
CHANGED
package/scripts/fix_messy.js
DELETED
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
import path from 'path';
|
|
3
|
-
import { REGEX_MESSY_CJK, REGEX_MESSY_CJK_EXT, REGEX_MESSY_UNICODE, checkBadUnicode, fixCJKEncImpl, hasBadUnicode } from '../lib/encoding.js';
|
|
4
|
-
import { strOnlyChinese, strOnlyJapanese } from '../lib/unicode.js';
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
import { fileURLToPath } from 'url';
|
|
10
|
-
|
|
11
|
-
const __filename = fileURLToPath(import.meta.url);
|
|
12
|
-
const __dirname = path.dirname(__filename);
|
|
13
|
-
|
|
14
|
-
const ENC_LIST = [
|
|
15
|
-
'ISO-8859-1',
|
|
16
|
-
'UTF8',
|
|
17
|
-
'UTF-16',
|
|
18
|
-
'GBK',
|
|
19
|
-
// 'BIG5',
|
|
20
|
-
'SHIFT_JIS',
|
|
21
|
-
'EUC-JP',
|
|
22
|
-
'CP949',
|
|
23
|
-
// 'EUC-KR',
|
|
24
|
-
]
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
export function fixMessyChars(str) {
|
|
28
|
-
let results = fixCJKEncImpl(str, ENC_LIST, ENC_LIST, 10)
|
|
29
|
-
return results.reverse()
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
function showStatus(str, title = '') {
|
|
33
|
-
const print = (a, b) => console.log(a.padEnd(20, ' '), b)
|
|
34
|
-
console.log()
|
|
35
|
-
console.log(`================ ${title} ================`)
|
|
36
|
-
print('STRING', str)
|
|
37
|
-
print('STRING', Array.from(str).map(c => c.codePointAt(0).toString(16)).join(' '))
|
|
38
|
-
print('BadUnicode', checkBadUnicode(str))
|
|
39
|
-
print('hasBadUnicode', hasBadUnicode(str))
|
|
40
|
-
print('strOnlyChinese', strOnlyChinese(str))
|
|
41
|
-
print('strOnlyJapanese', strOnlyJapanese(str))
|
|
42
|
-
print('REGEX_MESSY_CJK', REGEX_MESSY_CJK.test(str))
|
|
43
|
-
print('REGEX_MESSY_CJK_EXT', REGEX_MESSY_CJK_EXT.test(str))
|
|
44
|
-
print('REGEX_MESSY_UNICODE', REGEX_MESSY_UNICODE.test(str))
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
let fromStr = process.argv[2]
|
|
48
|
-
// fromStr = '\u8c4c\uff74\u9039\u0080\u8ff9\u6a23\uff76\uff74'
|
|
49
|
-
// fromStr = '\u0030\u0036\u0033\u002e\u8782\u0080\u0032'
|
|
50
|
-
const results = fixMessyChars(fromStr)
|
|
51
|
-
const toStr = results.slice(-1)[0][0]
|
|
52
|
-
for (const r of results) {
|
|
53
|
-
console.log(r[0], '\t\t', r.slice(1))
|
|
54
|
-
}
|
|
55
|
-
console.log()
|
|
56
|
-
console.log('INPUT:', [fromStr])
|
|
57
|
-
console.log('OUPUT:', results.pop())
|
|
58
|
-
showStatus(fromStr, 'fromStr')
|
|
59
|
-
showStatus(toStr, 'toStr')
|