@bukkentime/japanese-numeral 1.1.0-rc.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +16 -0
- package/.editorconfig +9 -0
- package/.github/workflows/build.yml +42 -0
- package/LICENSE +21 -0
- package/README.md +67 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.js +106 -0
- package/dist/japaneseNumerics.d.ts +5 -0
- package/dist/japaneseNumerics.js +25 -0
- package/dist/oldJapaneseNumerics.d.ts +5 -0
- package/dist/oldJapaneseNumerics.js +27 -0
- package/dist/utils.d.ts +40 -0
- package/dist/utils.js +161 -0
- package/package.json +28 -0
- package/src/index.ts +114 -0
- package/src/japaneseNumerics.ts +28 -0
- package/src/oldJapaneseNumerics.ts +30 -0
- package/src/utils.ts +185 -0
- package/test/test.ts +204 -0
- package/test/utils.ts +17 -0
- package/tsconfig.json +16 -0
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"permissions": {
|
|
3
|
+
"allow": [
|
|
4
|
+
"Bash(git checkout:*)",
|
|
5
|
+
"Bash(npm test:*)",
|
|
6
|
+
"Bash(npm install:*)",
|
|
7
|
+
"Bash(npx mocha:*)",
|
|
8
|
+
"Bash(git add:*)",
|
|
9
|
+
"Bash(git commit:*)",
|
|
10
|
+
"Bash(gh api:*)",
|
|
11
|
+
"Bash(gh pr:*)",
|
|
12
|
+
"Bash(git fetch:*)",
|
|
13
|
+
"Bash(npx tsc:*)"
|
|
14
|
+
]
|
|
15
|
+
}
|
|
16
|
+
}
|
package/.editorconfig
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# This workflow will run tests using node and then publish a package to GitHub Packages when a release is created
|
|
2
|
+
# For more information see: https://help.github.com/actions/language-and-framework-guides/publishing-nodejs-packages
|
|
3
|
+
|
|
4
|
+
name: build
|
|
5
|
+
|
|
6
|
+
on:
|
|
7
|
+
push:
|
|
8
|
+
branches:
|
|
9
|
+
- "*"
|
|
10
|
+
tags:
|
|
11
|
+
- "*"
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
test:
|
|
15
|
+
strategy:
|
|
16
|
+
matrix:
|
|
17
|
+
node-version: [14, 16, 18, 20]
|
|
18
|
+
runs-on: ubuntu-latest
|
|
19
|
+
steps:
|
|
20
|
+
- uses: actions/checkout@v3
|
|
21
|
+
- uses: actions/setup-node@v3
|
|
22
|
+
with:
|
|
23
|
+
node-version: ${{ matrix.node-version }}
|
|
24
|
+
- run: npm install
|
|
25
|
+
- run: npm test
|
|
26
|
+
- run: npm run build
|
|
27
|
+
|
|
28
|
+
publish:
|
|
29
|
+
needs: test
|
|
30
|
+
if: startsWith(github.ref, 'refs/tags/v')
|
|
31
|
+
runs-on: ubuntu-latest
|
|
32
|
+
steps:
|
|
33
|
+
- uses: actions/checkout@v3
|
|
34
|
+
- uses: actions/setup-node@v3
|
|
35
|
+
with:
|
|
36
|
+
node-version: 18
|
|
37
|
+
registry-url: https://registry.npmjs.org/
|
|
38
|
+
- run: npm install
|
|
39
|
+
- run: npm run build
|
|
40
|
+
- run: npm publish --access=public
|
|
41
|
+
env:
|
|
42
|
+
NODE_AUTH_TOKEN: ${{secrets.NPM_TOKEN}}
|
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2020 Geolonia Inc.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# @geolonia/japanese-numeral
|
|
2
|
+
|
|
3
|
+
[](https://github.com/geolonia/japanese-numeral/actions?query=workflow%3Atest)
|
|
4
|
+
[](https://badge.fury.io/js/%40geolonia%2Fjapanese-numeral)
|
|
5
|
+
|
|
6
|
+
Converts Japanese Kanji numeral <=> number.
|
|
7
|
+
|
|
8
|
+
## Installation
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
$ npm install @geolonia/japanese-numeral --save
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Usage
|
|
15
|
+
|
|
16
|
+
### kanji2number()
|
|
17
|
+
|
|
18
|
+
Converts Japanese Kanji numeral to number.
|
|
19
|
+
|
|
20
|
+
```javascript
|
|
21
|
+
import { kanji2number } from '@geolonia/japanese-numeral'
|
|
22
|
+
|
|
23
|
+
console.log(kanji2number('一千百十一兆一千百十一億一千百十一万一千百十一')) // 1111111111111111
|
|
24
|
+
|
|
25
|
+
// `一千` を `千` と記述しても同じ結果になる。
|
|
26
|
+
console.log(kanji2number('千百十一兆千百十一億千百十一万千百十一')) // 1111111111111111
|
|
27
|
+
|
|
28
|
+
// 漢数字のゼロ `〇` を使用することも可能。
|
|
29
|
+
console.log(kanji2number('二〇二〇')) // 2020
|
|
30
|
+
|
|
31
|
+
// 数字と漢数字が混ざった表記にも対応
|
|
32
|
+
console.log(kanji2number('2億3千430万')) // 234300000
|
|
33
|
+
console.log(kanji2number('2億3千456万7890')) // 234567890 (数字は全角でも可)
|
|
34
|
+
|
|
35
|
+
// 小数と漢数字の組み合わせにも対応(価格表記等)
|
|
36
|
+
console.log(kanji2number("8.5万")); // 85000
|
|
37
|
+
console.log(kanji2number("1.2兆")); // 1200000000000
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### number2kanji()
|
|
41
|
+
|
|
42
|
+
Converts number to Japanese Kanji numeral.
|
|
43
|
+
|
|
44
|
+
```javascript
|
|
45
|
+
import { number2kanji } from '@geolonia/japanese-numeral'
|
|
46
|
+
|
|
47
|
+
console.log(number2kanji(1111111111111111)) // 千百十一兆千百十一億千百十一万千百十一
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### findKanjiNumbers()
|
|
51
|
+
|
|
52
|
+
Finds the Japanese numeral numbers as an array.
|
|
53
|
+
|
|
54
|
+
```javascript
|
|
55
|
+
import { findKanjiNumbers } from '@geolonia/japanese-numeral'
|
|
56
|
+
|
|
57
|
+
console.log(findKanjiNumbers('今日は二千二十年十一月二十日です。')) // [ '二千二十', '十一', '二十' ]
|
|
58
|
+
console.log(findKanjiNumbers('今日は二〇二〇年十一月二十日です。')) // [ '二〇二〇', '十一', '二十' ]
|
|
59
|
+
console.log(findKanjiNumbers('わたしは二千二十億円もっています。')) // [ '二千二十億' ]
|
|
60
|
+
console.log(findKanjiNumbers('わたしは二〇二〇億円もっています。')) // [ '二〇二〇億' ]
|
|
61
|
+
console.log(findKanjiNumbers('わたしは1億2000万円もっています。')) // [ '1億2000万' ]
|
|
62
|
+
console.log(findKanjiNumbers("家賃は8.5万円です。")); // [ '8.5万' ]
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## License
|
|
66
|
+
|
|
67
|
+
[MIT](LICENSE)
|
package/dist/index.d.ts
ADDED
package/dist/index.js
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.findKanjiNumbers = exports.number2kanji = exports.kanji2number = void 0;
|
|
4
|
+
const utils_1 = require("./utils");
|
|
5
|
+
const japaneseNumerics_1 = require("./japaneseNumerics");
|
|
6
|
+
function parseDecimalCoefficient(coefficient) {
|
|
7
|
+
const normalized = (0, utils_1.zen2han)(coefficient);
|
|
8
|
+
const match = normalized.match(/^([0-9]+)\.([0-9]+)$/);
|
|
9
|
+
if (!match) {
|
|
10
|
+
return null;
|
|
11
|
+
}
|
|
12
|
+
return {
|
|
13
|
+
digits: BigInt(`${match[1]}${match[2]}`),
|
|
14
|
+
scale: 10n ** BigInt(match[2].length),
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
function kanji2number(japanese) {
|
|
18
|
+
japanese = (0, utils_1.normalize)(japanese);
|
|
19
|
+
if (japanese.match('〇') || japanese.match(/^[〇一二三四五六七八九]+$/)) {
|
|
20
|
+
for (const key in japaneseNumerics_1.default) {
|
|
21
|
+
const reg = new RegExp(key, 'g');
|
|
22
|
+
japanese = japanese.replace(reg, japaneseNumerics_1.default[key].toString());
|
|
23
|
+
}
|
|
24
|
+
return Number(japanese);
|
|
25
|
+
}
|
|
26
|
+
else {
|
|
27
|
+
let number = 0;
|
|
28
|
+
const { numbers, raw } = (0, utils_1.splitLargeNumberParts)(japanese);
|
|
29
|
+
// 万以上の数字を数値に変換
|
|
30
|
+
for (const key in utils_1.largeNumbers) {
|
|
31
|
+
if (numbers[key]) {
|
|
32
|
+
const decimal = parseDecimalCoefficient(raw[key]);
|
|
33
|
+
if (decimal) {
|
|
34
|
+
const unit = BigInt(utils_1.largeNumbers[key]);
|
|
35
|
+
if (unit % decimal.scale !== 0n) {
|
|
36
|
+
throw new TypeError('The attribute of kanji2number() must be a Japanese numeral as integer.');
|
|
37
|
+
}
|
|
38
|
+
number = number + Number(decimal.digits * (unit / decimal.scale));
|
|
39
|
+
}
|
|
40
|
+
else {
|
|
41
|
+
number = number + utils_1.largeNumbers[key] * numbers[key];
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
if (!Number.isInteger(number) || !Number.isInteger(numbers['千'])) {
|
|
46
|
+
throw new TypeError('The attribute of kanji2number() must be a Japanese numeral as integer.');
|
|
47
|
+
}
|
|
48
|
+
// 千以下の数字を足す
|
|
49
|
+
return number + numbers['千'];
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
exports.kanji2number = kanji2number;
|
|
53
|
+
function number2kanji(num) {
|
|
54
|
+
if (!num.toString().match(/^[0-9]+$/)) {
|
|
55
|
+
throw new TypeError('The attribute of number2kanji() must be integer.');
|
|
56
|
+
}
|
|
57
|
+
const kanjiNumbers = Object.keys(japaneseNumerics_1.default);
|
|
58
|
+
let number = num;
|
|
59
|
+
let kanji = '';
|
|
60
|
+
// 万以上の数字を漢字に変換
|
|
61
|
+
for (const key in utils_1.largeNumbers) {
|
|
62
|
+
const n = Math.floor(number / utils_1.largeNumbers[key]);
|
|
63
|
+
if (n) {
|
|
64
|
+
number = number - (n * utils_1.largeNumbers[key]);
|
|
65
|
+
kanji = `${kanji}${(0, utils_1.n2kan)(n)}${key}`;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
if (number) {
|
|
69
|
+
kanji = `${kanji}${(0, utils_1.n2kan)(number)}`;
|
|
70
|
+
}
|
|
71
|
+
return kanji || '〇';
|
|
72
|
+
}
|
|
73
|
+
exports.number2kanji = number2kanji;
|
|
74
|
+
function findKanjiNumbers(text) {
|
|
75
|
+
const num = '([0-90-9]*)|([〇一二三四五六七八九壱壹弐弍貳貮参參肆伍陸漆捌玖]*)';
|
|
76
|
+
const decimalArabicNum = '[0-90-9]+[..][0-90-9]+';
|
|
77
|
+
const decimalSmallUnitPattern = `(${decimalArabicNum})(千|阡|仟|百|陌|佰|十|拾)`;
|
|
78
|
+
// Decimal coefficients are valid as standalone Arabic-decimal prefixes before a single unit.
|
|
79
|
+
const basePattern = `((${num})(千|阡|仟))?((${num})(百|陌|佰))?((${num})(十|拾))?(${num})?`;
|
|
80
|
+
const largeUnitPattern = `((${decimalArabicNum})|(${basePattern}))`;
|
|
81
|
+
const trailingPattern = `((${decimalSmallUnitPattern})|(${basePattern}))`;
|
|
82
|
+
const pattern = `(((${largeUnitPattern}兆)?(${largeUnitPattern}億)?(${largeUnitPattern}(万|萬))?${trailingPattern}))`;
|
|
83
|
+
const regex = new RegExp(pattern, 'g');
|
|
84
|
+
const matches = Array.from(text.matchAll(regex), (match) => ({
|
|
85
|
+
index: match.index || 0,
|
|
86
|
+
value: match[1],
|
|
87
|
+
}));
|
|
88
|
+
if (matches.length) {
|
|
89
|
+
return matches.filter((match) => {
|
|
90
|
+
const previous = match.index > 0 ? text[match.index - 1] : '';
|
|
91
|
+
if ((previous === '.' || previous === '.') || ((previous >= '0' && previous <= '9') || (previous >= '0' && previous <= '9')) && match.value.match(/^[0-90-9]/)) {
|
|
92
|
+
return false;
|
|
93
|
+
}
|
|
94
|
+
if ((!match.value.match(/^[0-90-9..]+$/)) && (match.value.length && '兆' !== match.value && '億' !== match.value && '万' !== match.value && '萬' !== match.value)) {
|
|
95
|
+
return true;
|
|
96
|
+
}
|
|
97
|
+
else {
|
|
98
|
+
return false;
|
|
99
|
+
}
|
|
100
|
+
}).map((match) => match.value);
|
|
101
|
+
}
|
|
102
|
+
else {
|
|
103
|
+
return [];
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
exports.findKanjiNumbers = findKanjiNumbers;
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const japaneseNumerics = {
|
|
4
|
+
〇: 0,
|
|
5
|
+
一: 1,
|
|
6
|
+
二: 2,
|
|
7
|
+
三: 3,
|
|
8
|
+
四: 4,
|
|
9
|
+
五: 5,
|
|
10
|
+
六: 6,
|
|
11
|
+
七: 7,
|
|
12
|
+
八: 8,
|
|
13
|
+
九: 9,
|
|
14
|
+
'0': 0,
|
|
15
|
+
'1': 1,
|
|
16
|
+
'2': 2,
|
|
17
|
+
'3': 3,
|
|
18
|
+
'4': 4,
|
|
19
|
+
'5': 5,
|
|
20
|
+
'6': 6,
|
|
21
|
+
'7': 7,
|
|
22
|
+
'8': 8,
|
|
23
|
+
'9': 9,
|
|
24
|
+
};
|
|
25
|
+
exports.default = japaneseNumerics;
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const oldJapaneseNumerics = {
|
|
4
|
+
零: '〇',
|
|
5
|
+
壱: '一',
|
|
6
|
+
壹: '一',
|
|
7
|
+
弐: '二',
|
|
8
|
+
弍: '二',
|
|
9
|
+
貳: '二',
|
|
10
|
+
貮: '二',
|
|
11
|
+
参: '三',
|
|
12
|
+
參: '三',
|
|
13
|
+
肆: '四',
|
|
14
|
+
伍: '五',
|
|
15
|
+
陸: '六',
|
|
16
|
+
漆: '七',
|
|
17
|
+
捌: '八',
|
|
18
|
+
玖: '九',
|
|
19
|
+
拾: '十',
|
|
20
|
+
廿: '二十',
|
|
21
|
+
陌: '百',
|
|
22
|
+
佰: '百',
|
|
23
|
+
阡: '千',
|
|
24
|
+
仟: '千',
|
|
25
|
+
萬: '万',
|
|
26
|
+
};
|
|
27
|
+
exports.default = oldJapaneseNumerics;
|
package/dist/utils.d.ts
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
type NumHash = {
|
|
2
|
+
[key: string]: number;
|
|
3
|
+
};
|
|
4
|
+
type StringHash = {
|
|
5
|
+
[key: string]: string;
|
|
6
|
+
};
|
|
7
|
+
export type SplitLargeNumberParts = {
|
|
8
|
+
numbers: NumHash;
|
|
9
|
+
raw: StringHash;
|
|
10
|
+
};
|
|
11
|
+
export declare const largeNumbers: NumHash;
|
|
12
|
+
export declare const smallNumbers: NumHash;
|
|
13
|
+
export declare function normalize(japanese: string): string;
|
|
14
|
+
/**
|
|
15
|
+
* 漢数字を兆、億、万単位に分割する
|
|
16
|
+
*/
|
|
17
|
+
export declare function splitLargeNumber(japanese: string): NumHash;
|
|
18
|
+
/**
|
|
19
|
+
* 漢数字を兆、億、万単位に分割し、各係数の生文字列も返す
|
|
20
|
+
*/
|
|
21
|
+
export declare function splitLargeNumberParts(japanese: string): SplitLargeNumberParts;
|
|
22
|
+
/**
|
|
23
|
+
* 千単位以下の漢数字を数字に変換する(例: 三千 => 3000)
|
|
24
|
+
*
|
|
25
|
+
* @param japanese
|
|
26
|
+
*/
|
|
27
|
+
export declare function kan2n(japanese: string): number;
|
|
28
|
+
/**
|
|
29
|
+
* Converts number less than 10000 to kanji.
|
|
30
|
+
*
|
|
31
|
+
* @param num
|
|
32
|
+
*/
|
|
33
|
+
export declare function n2kan(num: number): string;
|
|
34
|
+
/**
|
|
35
|
+
* Converts double-width number to number as string.
|
|
36
|
+
*
|
|
37
|
+
* @param num
|
|
38
|
+
*/
|
|
39
|
+
export declare function zen2han(str: string): string;
|
|
40
|
+
export {};
|
package/dist/utils.js
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.zen2han = exports.n2kan = exports.kan2n = exports.splitLargeNumberParts = exports.splitLargeNumber = exports.normalize = exports.smallNumbers = exports.largeNumbers = void 0;
|
|
4
|
+
const oldJapaneseNumerics_1 = require("./oldJapaneseNumerics");
|
|
5
|
+
const japaneseNumerics_1 = require("./japaneseNumerics");
|
|
6
|
+
exports.largeNumbers = { '兆': 1000000000000, '億': 100000000, '万': 10000 };
|
|
7
|
+
exports.smallNumbers = { '千': 1000, '百': 100, '十': 10 };
|
|
8
|
+
function parseDecimalValue(japanese) {
|
|
9
|
+
const normalized = zen2han(japanese);
|
|
10
|
+
const match = normalized.match(/^([0-9]+)\.([0-9]+)$/);
|
|
11
|
+
if (!match) {
|
|
12
|
+
return null;
|
|
13
|
+
}
|
|
14
|
+
return {
|
|
15
|
+
digits: Number(`${match[1]}${match[2]}`),
|
|
16
|
+
scale: 10 ** match[2].length,
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
function normalize(japanese) {
|
|
20
|
+
for (const key in oldJapaneseNumerics_1.default) {
|
|
21
|
+
const reg = new RegExp(key, 'g');
|
|
22
|
+
japanese = japanese.replace(reg, oldJapaneseNumerics_1.default[key]);
|
|
23
|
+
}
|
|
24
|
+
return japanese;
|
|
25
|
+
}
|
|
26
|
+
exports.normalize = normalize;
|
|
27
|
+
/**
|
|
28
|
+
* 漢数字を兆、億、万単位に分割する
|
|
29
|
+
*/
|
|
30
|
+
function splitLargeNumber(japanese) {
|
|
31
|
+
return splitLargeNumberParts(japanese).numbers;
|
|
32
|
+
}
|
|
33
|
+
exports.splitLargeNumber = splitLargeNumber;
|
|
34
|
+
/**
|
|
35
|
+
* 漢数字を兆、億、万単位に分割し、各係数の生文字列も返す
|
|
36
|
+
*/
|
|
37
|
+
function splitLargeNumberParts(japanese) {
|
|
38
|
+
let kanji = japanese;
|
|
39
|
+
const numbers = {};
|
|
40
|
+
const raw = {};
|
|
41
|
+
for (const key in exports.largeNumbers) {
|
|
42
|
+
const reg = new RegExp(`(.+)${key}`);
|
|
43
|
+
const match = kanji.match(reg);
|
|
44
|
+
if (match) {
|
|
45
|
+
raw[key] = match[1];
|
|
46
|
+
numbers[key] = kan2n(match[1]);
|
|
47
|
+
kanji = kanji.replace(match[0], '');
|
|
48
|
+
}
|
|
49
|
+
else {
|
|
50
|
+
raw[key] = '';
|
|
51
|
+
numbers[key] = 0;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
if (kanji) {
|
|
55
|
+
raw['千'] = kanji;
|
|
56
|
+
numbers['千'] = kan2n(kanji);
|
|
57
|
+
}
|
|
58
|
+
else {
|
|
59
|
+
raw['千'] = '';
|
|
60
|
+
numbers['千'] = 0;
|
|
61
|
+
}
|
|
62
|
+
return { numbers, raw };
|
|
63
|
+
}
|
|
64
|
+
exports.splitLargeNumberParts = splitLargeNumberParts;
|
|
65
|
+
/**
|
|
66
|
+
* 千単位以下の漢数字を数字に変換する(例: 三千 => 3000)
|
|
67
|
+
*
|
|
68
|
+
* @param japanese
|
|
69
|
+
*/
|
|
70
|
+
function kan2n(japanese) {
|
|
71
|
+
if (japanese.match(/^[0-9]+$/)) {
|
|
72
|
+
return Number(japanese);
|
|
73
|
+
}
|
|
74
|
+
let kanji = zen2han(japanese);
|
|
75
|
+
// Allow decimal coefficients (e.g. "8.5" from "8.5万") so that
|
|
76
|
+
// kan2n returns 8.5 and the caller can multiply by the unit value.
|
|
77
|
+
// Previously this fell through to the kanji-character loop, which
|
|
78
|
+
// has no mapping for "." and silently produced an incorrect result.
|
|
79
|
+
if (kanji.match(/^[0-9]+\.[0-9]+$/)) {
|
|
80
|
+
return Number(kanji);
|
|
81
|
+
}
|
|
82
|
+
const decimalUnitMatch = kanji.match(/^([0-9]+\.[0-9]+)(千|百|十)$/);
|
|
83
|
+
if (decimalUnitMatch) {
|
|
84
|
+
const decimal = parseDecimalValue(decimalUnitMatch[1]);
|
|
85
|
+
const unit = exports.smallNumbers[decimalUnitMatch[2]];
|
|
86
|
+
if (!decimal || unit % decimal.scale !== 0) {
|
|
87
|
+
return NaN;
|
|
88
|
+
}
|
|
89
|
+
return decimal.digits * (unit / decimal.scale);
|
|
90
|
+
}
|
|
91
|
+
let number = 0;
|
|
92
|
+
for (const key in exports.smallNumbers) {
|
|
93
|
+
const reg = new RegExp(`(.*)${key}`);
|
|
94
|
+
const match = kanji.match(reg);
|
|
95
|
+
if (match) {
|
|
96
|
+
let n = 1;
|
|
97
|
+
if (match[1]) {
|
|
98
|
+
if (match[1].match(/^[0-9]+$/)) {
|
|
99
|
+
n = Number(match[1]);
|
|
100
|
+
}
|
|
101
|
+
else {
|
|
102
|
+
n = japaneseNumerics_1.default[match[1]];
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
number = number + (n * exports.smallNumbers[key]);
|
|
106
|
+
kanji = kanji.replace(match[0], '');
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
if (kanji) {
|
|
110
|
+
if (kanji.match(/^[0-9]+$/)) {
|
|
111
|
+
number = number + Number(kanji);
|
|
112
|
+
}
|
|
113
|
+
else {
|
|
114
|
+
for (let index = 0; index < kanji.length; index++) {
|
|
115
|
+
const char = kanji[index];
|
|
116
|
+
const digit = kanji.length - index - 1;
|
|
117
|
+
number = number + japaneseNumerics_1.default[char] * (10 ** digit);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
return number;
|
|
122
|
+
}
|
|
123
|
+
exports.kan2n = kan2n;
|
|
124
|
+
/**
|
|
125
|
+
* Converts number less than 10000 to kanji.
|
|
126
|
+
*
|
|
127
|
+
* @param num
|
|
128
|
+
*/
|
|
129
|
+
function n2kan(num) {
|
|
130
|
+
const kanjiNumbers = Object.keys(japaneseNumerics_1.default);
|
|
131
|
+
let number = num;
|
|
132
|
+
let kanji = '';
|
|
133
|
+
for (const key in exports.smallNumbers) {
|
|
134
|
+
const n = Math.floor(number / exports.smallNumbers[key]);
|
|
135
|
+
if (n) {
|
|
136
|
+
number = number - (n * exports.smallNumbers[key]);
|
|
137
|
+
if (1 === n) {
|
|
138
|
+
kanji = `${kanji}${key}`;
|
|
139
|
+
}
|
|
140
|
+
else {
|
|
141
|
+
kanji = `${kanji}${kanjiNumbers[n]}${key}`;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
if (number) {
|
|
146
|
+
kanji = `${kanji}${kanjiNumbers[number]}`;
|
|
147
|
+
}
|
|
148
|
+
return kanji;
|
|
149
|
+
}
|
|
150
|
+
exports.n2kan = n2kan;
|
|
151
|
+
/**
|
|
152
|
+
* Converts double-width number to number as string.
|
|
153
|
+
*
|
|
154
|
+
* @param num
|
|
155
|
+
*/
|
|
156
|
+
function zen2han(str) {
|
|
157
|
+
return str.replace(/[0-9]/g, (s) => {
|
|
158
|
+
return String.fromCharCode(s.charCodeAt(0) - 0xFEE0);
|
|
159
|
+
}).replace(/./g, '.');
|
|
160
|
+
}
|
|
161
|
+
exports.zen2han = zen2han;
|
package/package.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@bukkentime/japanese-numeral",
|
|
3
|
+
"version": "1.1.0-rc.0",
|
|
4
|
+
"description": "Converts Japanese Kanji numeral to number.",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"scripts": {
|
|
7
|
+
"build": "tsc",
|
|
8
|
+
"test": "mocha -r ts-node/register test/*.ts",
|
|
9
|
+
"test:watch": "npm test -- --watch --watch-files src/**/*.ts --watch-files test/**/*.ts"
|
|
10
|
+
},
|
|
11
|
+
"repository": {
|
|
12
|
+
"type": "git",
|
|
13
|
+
"url": "git+https://github.com/geolonia/japanese-numeral.git"
|
|
14
|
+
},
|
|
15
|
+
"engines": {
|
|
16
|
+
"node": ">=14.0.0"
|
|
17
|
+
},
|
|
18
|
+
"author": "Geolonia Inc.",
|
|
19
|
+
"license": "MIT",
|
|
20
|
+
"devDependencies": {
|
|
21
|
+
"@types/chai": "^4.3.5",
|
|
22
|
+
"@types/mocha": "^10.0.1",
|
|
23
|
+
"chai": "^4.3.8",
|
|
24
|
+
"mocha": "^10.2.0",
|
|
25
|
+
"ts-node": "^10.9.1",
|
|
26
|
+
"typescript": "^5.2.2"
|
|
27
|
+
}
|
|
28
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import { normalize, splitLargeNumberParts, largeNumbers, n2kan, zen2han } from './utils'
|
|
2
|
+
import japaneseNumerics from './japaneseNumerics'
|
|
3
|
+
|
|
4
|
+
function parseDecimalCoefficient(coefficient: string) {
|
|
5
|
+
const normalized = zen2han(coefficient)
|
|
6
|
+
const match = normalized.match(/^([0-9]+)\.([0-9]+)$/)
|
|
7
|
+
|
|
8
|
+
if (!match) {
|
|
9
|
+
return null
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
return {
|
|
13
|
+
digits: BigInt(`${match[1]}${match[2]}`),
|
|
14
|
+
scale: 10n ** BigInt(match[2].length),
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export function kanji2number(japanese: string) {
|
|
19
|
+
japanese = normalize(japanese)
|
|
20
|
+
|
|
21
|
+
if (japanese.match('〇') || japanese.match(/^[〇一二三四五六七八九]+$/)) {
|
|
22
|
+
for (const key in japaneseNumerics) {
|
|
23
|
+
const reg = new RegExp(key, 'g')
|
|
24
|
+
japanese = japanese.replace(reg, japaneseNumerics[key].toString())
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
return Number(japanese)
|
|
28
|
+
} else {
|
|
29
|
+
let number = 0
|
|
30
|
+
const { numbers, raw } = splitLargeNumberParts(japanese)
|
|
31
|
+
|
|
32
|
+
// 万以上の数字を数値に変換
|
|
33
|
+
for (const key in largeNumbers) {
|
|
34
|
+
if (numbers[key]) {
|
|
35
|
+
const decimal = parseDecimalCoefficient(raw[key])
|
|
36
|
+
if (decimal) {
|
|
37
|
+
const unit = BigInt(largeNumbers[key])
|
|
38
|
+
if (unit % decimal.scale !== 0n) {
|
|
39
|
+
throw new TypeError('The attribute of kanji2number() must be a Japanese numeral as integer.')
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
number = number + Number(decimal.digits * (unit / decimal.scale))
|
|
43
|
+
} else {
|
|
44
|
+
number = number + largeNumbers[key] * numbers[key]
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
if (!Number.isInteger(number) || !Number.isInteger(numbers['千'])) {
|
|
50
|
+
throw new TypeError('The attribute of kanji2number() must be a Japanese numeral as integer.')
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// 千以下の数字を足す
|
|
54
|
+
return number + numbers['千']
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
export function number2kanji(num: number) {
|
|
60
|
+
if (!num.toString().match(/^[0-9]+$/)) {
|
|
61
|
+
throw new TypeError('The attribute of number2kanji() must be integer.')
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const kanjiNumbers = Object.keys(japaneseNumerics)
|
|
65
|
+
let number = num
|
|
66
|
+
let kanji = ''
|
|
67
|
+
|
|
68
|
+
// 万以上の数字を漢字に変換
|
|
69
|
+
for (const key in largeNumbers) {
|
|
70
|
+
const n = Math.floor(number / largeNumbers[key])
|
|
71
|
+
if (n) {
|
|
72
|
+
number = number - (n * largeNumbers[key])
|
|
73
|
+
kanji = `${kanji}${n2kan(n)}${key}`
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
if (number) {
|
|
78
|
+
kanji = `${kanji}${n2kan(number)}`
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
return kanji || '〇'
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export function findKanjiNumbers(text: string) {
|
|
85
|
+
const num = '([0-90-9]*)|([〇一二三四五六七八九壱壹弐弍貳貮参參肆伍陸漆捌玖]*)'
|
|
86
|
+
const decimalArabicNum = '[0-90-9]+[..][0-90-9]+'
|
|
87
|
+
const decimalSmallUnitPattern = `(${decimalArabicNum})(千|阡|仟|百|陌|佰|十|拾)`
|
|
88
|
+
// Decimal coefficients are valid as standalone Arabic-decimal prefixes before a single unit.
|
|
89
|
+
const basePattern = `((${num})(千|阡|仟))?((${num})(百|陌|佰))?((${num})(十|拾))?(${num})?`
|
|
90
|
+
const largeUnitPattern = `((${decimalArabicNum})|(${basePattern}))`
|
|
91
|
+
const trailingPattern = `((${decimalSmallUnitPattern})|(${basePattern}))`
|
|
92
|
+
const pattern = `(((${largeUnitPattern}兆)?(${largeUnitPattern}億)?(${largeUnitPattern}(万|萬))?${trailingPattern}))`
|
|
93
|
+
const regex = new RegExp(pattern, 'g')
|
|
94
|
+
const matches = Array.from(text.matchAll(regex), (match) => ({
|
|
95
|
+
index: match.index || 0,
|
|
96
|
+
value: match[1],
|
|
97
|
+
}))
|
|
98
|
+
if (matches.length) {
|
|
99
|
+
return matches.filter((match) => {
|
|
100
|
+
const previous = match.index > 0 ? text[match.index - 1] : ''
|
|
101
|
+
if ((previous === '.' || previous === '.') || ((previous >= '0' && previous <= '9') || (previous >= '0' && previous <= '9')) && match.value.match(/^[0-90-9]/)) {
|
|
102
|
+
return false
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if ((! match.value.match(/^[0-90-9..]+$/)) && (match.value.length && '兆' !== match.value && '億' !== match.value && '万' !== match.value && '萬' !== match.value)) {
|
|
106
|
+
return true
|
|
107
|
+
} else {
|
|
108
|
+
return false
|
|
109
|
+
}
|
|
110
|
+
}).map((match) => match.value)
|
|
111
|
+
} else {
|
|
112
|
+
return []
|
|
113
|
+
}
|
|
114
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
type japaneseNumericsType = {
|
|
2
|
+
[key: string]: number
|
|
3
|
+
}
|
|
4
|
+
|
|
5
|
+
const japaneseNumerics: japaneseNumericsType = {
|
|
6
|
+
〇: 0,
|
|
7
|
+
一: 1,
|
|
8
|
+
二: 2,
|
|
9
|
+
三: 3,
|
|
10
|
+
四: 4,
|
|
11
|
+
五: 5,
|
|
12
|
+
六: 6,
|
|
13
|
+
七: 7,
|
|
14
|
+
八: 8,
|
|
15
|
+
九: 9,
|
|
16
|
+
'0': 0,
|
|
17
|
+
'1': 1,
|
|
18
|
+
'2': 2,
|
|
19
|
+
'3': 3,
|
|
20
|
+
'4': 4,
|
|
21
|
+
'5': 5,
|
|
22
|
+
'6': 6,
|
|
23
|
+
'7': 7,
|
|
24
|
+
'8': 8,
|
|
25
|
+
'9': 9,
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export default japaneseNumerics
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
type oldJapaneseNumericsType = {
|
|
2
|
+
[key: string]: string
|
|
3
|
+
}
|
|
4
|
+
|
|
5
|
+
const oldJapaneseNumerics: oldJapaneseNumericsType = {
|
|
6
|
+
零: '〇',
|
|
7
|
+
壱: '一',
|
|
8
|
+
壹: '一',
|
|
9
|
+
弐: '二',
|
|
10
|
+
弍: '二',
|
|
11
|
+
貳: '二',
|
|
12
|
+
貮: '二',
|
|
13
|
+
参: '三',
|
|
14
|
+
參: '三',
|
|
15
|
+
肆: '四',
|
|
16
|
+
伍: '五',
|
|
17
|
+
陸: '六',
|
|
18
|
+
漆: '七',
|
|
19
|
+
捌: '八',
|
|
20
|
+
玖: '九',
|
|
21
|
+
拾: '十',
|
|
22
|
+
廿: '二十',
|
|
23
|
+
陌: '百',
|
|
24
|
+
佰: '百',
|
|
25
|
+
阡: '千',
|
|
26
|
+
仟: '千',
|
|
27
|
+
萬: '万',
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export default oldJapaneseNumerics
|
package/src/utils.ts
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
import oldJapaneseNumerics from './oldJapaneseNumerics'
|
|
2
|
+
import japaneseNumerics from './japaneseNumerics'
|
|
3
|
+
|
|
4
|
+
type NumHash = {
|
|
5
|
+
[key: string]: number;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
type StringHash = {
|
|
9
|
+
[key: string]: string;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export type SplitLargeNumberParts = {
|
|
13
|
+
numbers: NumHash;
|
|
14
|
+
raw: StringHash;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export const largeNumbers: NumHash = { '兆': 1000000000000, '億': 100000000, '万': 10000 }
|
|
18
|
+
export const smallNumbers: NumHash = { '千': 1000, '百': 100, '十': 10 }
|
|
19
|
+
|
|
20
|
+
function parseDecimalValue(japanese: string) {
|
|
21
|
+
const normalized = zen2han(japanese)
|
|
22
|
+
const match = normalized.match(/^([0-9]+)\.([0-9]+)$/)
|
|
23
|
+
|
|
24
|
+
if (!match) {
|
|
25
|
+
return null
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
return {
|
|
29
|
+
digits: Number(`${match[1]}${match[2]}`),
|
|
30
|
+
scale: 10 ** match[2].length,
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function normalize(japanese: string) {
|
|
35
|
+
for (const key in oldJapaneseNumerics) {
|
|
36
|
+
const reg = new RegExp(key, 'g')
|
|
37
|
+
japanese = japanese.replace(reg, oldJapaneseNumerics[key])
|
|
38
|
+
}
|
|
39
|
+
return japanese
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* 漢数字を兆、億、万単位に分割する
|
|
44
|
+
*/
|
|
45
|
+
export function splitLargeNumber(japanese: string) {
|
|
46
|
+
return splitLargeNumberParts(japanese).numbers
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* 漢数字を兆、億、万単位に分割し、各係数の生文字列も返す
|
|
51
|
+
*/
|
|
52
|
+
export function splitLargeNumberParts(japanese: string): SplitLargeNumberParts {
|
|
53
|
+
let kanji = japanese
|
|
54
|
+
const numbers:NumHash = {}
|
|
55
|
+
const raw:StringHash = {}
|
|
56
|
+
for (const key in largeNumbers) {
|
|
57
|
+
const reg = new RegExp(`(.+)${key}`)
|
|
58
|
+
const match = kanji.match(reg)
|
|
59
|
+
if (match) {
|
|
60
|
+
raw[key] = match[1]
|
|
61
|
+
numbers[key] = kan2n(match[1])
|
|
62
|
+
kanji = kanji.replace(match[0], '')
|
|
63
|
+
} else {
|
|
64
|
+
raw[key] = ''
|
|
65
|
+
numbers[key] = 0
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
if (kanji) {
|
|
70
|
+
raw['千'] = kanji
|
|
71
|
+
numbers['千'] = kan2n(kanji)
|
|
72
|
+
} else {
|
|
73
|
+
raw['千'] = ''
|
|
74
|
+
numbers['千'] = 0
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
return { numbers, raw }
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* 千単位以下の漢数字を数字に変換する(例: 三千 => 3000)
|
|
82
|
+
*
|
|
83
|
+
* @param japanese
|
|
84
|
+
*/
|
|
85
|
+
export function kan2n(japanese: string) {
|
|
86
|
+
if (japanese.match(/^[0-9]+$/)) {
|
|
87
|
+
return Number(japanese)
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
let kanji = zen2han(japanese)
|
|
91
|
+
|
|
92
|
+
// Allow decimal coefficients (e.g. "8.5" from "8.5万") so that
|
|
93
|
+
// kan2n returns 8.5 and the caller can multiply by the unit value.
|
|
94
|
+
// Previously this fell through to the kanji-character loop, which
|
|
95
|
+
// has no mapping for "." and silently produced an incorrect result.
|
|
96
|
+
if (kanji.match(/^[0-9]+\.[0-9]+$/)) {
|
|
97
|
+
return Number(kanji)
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
const decimalUnitMatch = kanji.match(/^([0-9]+\.[0-9]+)(千|百|十)$/)
|
|
101
|
+
if (decimalUnitMatch) {
|
|
102
|
+
const decimal = parseDecimalValue(decimalUnitMatch[1])
|
|
103
|
+
const unit = smallNumbers[decimalUnitMatch[2]]
|
|
104
|
+
|
|
105
|
+
if (!decimal || unit % decimal.scale !== 0) {
|
|
106
|
+
return NaN
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
return decimal.digits * (unit / decimal.scale)
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
let number = 0
|
|
113
|
+
for (const key in smallNumbers) {
|
|
114
|
+
const reg = new RegExp(`(.*)${key}`)
|
|
115
|
+
const match = kanji.match(reg)
|
|
116
|
+
if (match) {
|
|
117
|
+
let n = 1
|
|
118
|
+
if (match[1]) {
|
|
119
|
+
if (match[1].match(/^[0-9]+$/)) {
|
|
120
|
+
n = Number(match[1])
|
|
121
|
+
} else {
|
|
122
|
+
n = japaneseNumerics[match[1]]
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
number = number + (n * smallNumbers[key])
|
|
127
|
+
|
|
128
|
+
kanji = kanji.replace(match[0], '')
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
if (kanji) {
|
|
133
|
+
if (kanji.match(/^[0-9]+$/)) {
|
|
134
|
+
number = number + Number(kanji)
|
|
135
|
+
} else {
|
|
136
|
+
for (let index = 0; index < kanji.length; index++) {
|
|
137
|
+
const char = kanji[index];
|
|
138
|
+
const digit = kanji.length - index - 1
|
|
139
|
+
number = number + japaneseNumerics[char] * (10 ** digit)
|
|
140
|
+
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return number
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Converts number less than 10000 to kanji.
|
|
150
|
+
*
|
|
151
|
+
* @param num
|
|
152
|
+
*/
|
|
153
|
+
export function n2kan(num: number) {
|
|
154
|
+
const kanjiNumbers = Object.keys(japaneseNumerics)
|
|
155
|
+
let number = num
|
|
156
|
+
let kanji = ''
|
|
157
|
+
for (const key in smallNumbers) {
|
|
158
|
+
const n = Math.floor(number / smallNumbers[key])
|
|
159
|
+
if (n) {
|
|
160
|
+
number = number - (n * smallNumbers[key])
|
|
161
|
+
if (1 === n) {
|
|
162
|
+
kanji = `${kanji}${key}`
|
|
163
|
+
} else {
|
|
164
|
+
kanji = `${kanji}${kanjiNumbers[n]}${key}`
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
if (number) {
|
|
170
|
+
kanji = `${kanji}${kanjiNumbers[number]}`
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
return kanji
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Converts double-width number to number as string.
|
|
178
|
+
*
|
|
179
|
+
* @param num
|
|
180
|
+
*/
|
|
181
|
+
export function zen2han(str: string) {
|
|
182
|
+
return str.replace(/[0-9]/g, (s) => {
|
|
183
|
+
return String.fromCharCode(s.charCodeAt(0) - 0xFEE0);
|
|
184
|
+
}).replace(/./g, '.');
|
|
185
|
+
}
|
package/test/test.ts
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
import { kanji2number, number2kanji, findKanjiNumbers } from '../src'
|
|
2
|
+
import { assert } from 'chai'
|
|
3
|
+
|
|
4
|
+
describe('Tests for japaneseNumeral.', () => {
|
|
5
|
+
it('Japanese numeric should be parsed as numbers.', () => {
|
|
6
|
+
assert.deepEqual(kanji2number('〇'), 0)
|
|
7
|
+
assert.deepEqual(kanji2number('零'), 0)
|
|
8
|
+
assert.deepEqual(kanji2number('一千百十一兆一千百十一億一千百十一万一千百十一'), 1111111111111111)
|
|
9
|
+
assert.deepEqual(kanji2number('一千百十一兆一千百十一億一千百十一万'), 1111111111110000)
|
|
10
|
+
assert.deepEqual(kanji2number('一千百十一兆一千百十一億一千百十一'), 1111111100001111)
|
|
11
|
+
assert.deepEqual(kanji2number('百十一'), 111)
|
|
12
|
+
assert.deepEqual(kanji2number('三億八'), 300000008)
|
|
13
|
+
assert.deepEqual(kanji2number('三百八'), 308)
|
|
14
|
+
assert.deepEqual(kanji2number('三五〇'), 350)
|
|
15
|
+
assert.deepEqual(kanji2number('三〇八'), 308)
|
|
16
|
+
assert.deepEqual(kanji2number('二〇二〇'), 2020)
|
|
17
|
+
assert.deepEqual(kanji2number('十'), 10)
|
|
18
|
+
assert.deepEqual(kanji2number('二千'), 2000)
|
|
19
|
+
assert.deepEqual(kanji2number('壱万'), 10000)
|
|
20
|
+
assert.deepEqual(kanji2number('弍万'), 20000)
|
|
21
|
+
assert.deepEqual(kanji2number('一二三四'), 1234)
|
|
22
|
+
assert.deepEqual(kanji2number('千二三四'), 1234)
|
|
23
|
+
assert.deepEqual(kanji2number('千二百三四'), 1234)
|
|
24
|
+
assert.deepEqual(kanji2number('千二百三十四'), 1234)
|
|
25
|
+
assert.deepEqual(kanji2number('壱阡陌拾壱兆壱阡陌拾壱億壱阡陌拾壱萬壱阡陌拾壱'), 1111111111111111)
|
|
26
|
+
assert.deepEqual(kanji2number('壱仟佰拾壱兆壱仟佰拾壱億壱仟佰拾壱萬壱仟佰拾壱'), 1111111111111111)
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
it('Number should be converted to Japanese kanji', () => {
|
|
30
|
+
assert.deepEqual(number2kanji(0), '〇')
|
|
31
|
+
assert.deepEqual(number2kanji(1110), '千百十')
|
|
32
|
+
assert.deepEqual(number2kanji(1111111111111111), '千百十一兆千百十一億千百十一万千百十一')
|
|
33
|
+
assert.deepEqual(number2kanji(1111113111111111), '千百十一兆千百三十一億千百十一万千百十一')
|
|
34
|
+
assert.deepEqual(number2kanji(1000000000000000), '千兆')
|
|
35
|
+
assert.deepEqual(number2kanji(1200000), '百二十万')
|
|
36
|
+
assert.deepEqual(number2kanji(18), '十八')
|
|
37
|
+
assert.deepEqual(number2kanji(100100000), '一億十万')
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
it('should returns the instance of TypeError', () => {
|
|
41
|
+
// @ts-ignore
|
|
42
|
+
assert.throws(() => number2kanji('hello'), TypeError)
|
|
43
|
+
|
|
44
|
+
assert.throws(() => kanji2number('三あ八'), TypeError)
|
|
45
|
+
assert.throws(() => kanji2number('あ'), TypeError)
|
|
46
|
+
assert.throws(() => kanji2number('三五十'), TypeError)
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
it('should find Japanese Kanji numbers.', () => {
|
|
50
|
+
assert.deepEqual([ '二千二十', '十一', '二十' ], findKanjiNumbers('今日は二千二十年十一月二十日です。'))
|
|
51
|
+
assert.deepEqual([ '二〇二〇', '十一', '二十' ], findKanjiNumbers('今日は二〇二〇年十一月二十日です。'))
|
|
52
|
+
assert.deepEqual([ '二千二十億' ], findKanjiNumbers('わたしは二千二十億円もっています。'))
|
|
53
|
+
assert.deepEqual([ '二〇二〇億' ], findKanjiNumbers('わたしは二〇二〇億円もっています。'))
|
|
54
|
+
assert.deepEqual([ '八百六十三' ], findKanjiNumbers('今日のランチは八百六十三円でした。'))
|
|
55
|
+
assert.deepEqual([ '八六三' ], findKanjiNumbers('今日のランチは八六三円でした。'))
|
|
56
|
+
assert.deepEqual([ '三千' ], findKanjiNumbers('今月のお小遣いは三千円です。'))
|
|
57
|
+
assert.deepEqual([ '五', '千' ], findKanjiNumbers('青森県五所川原市金木町喜良市千苅62−8'))
|
|
58
|
+
assert.deepEqual([ '1億2000万' ], findKanjiNumbers('わたしは1億2000万円もっています。'))
|
|
59
|
+
assert.deepEqual([ '六' ], findKanjiNumbers('香川県仲多度郡まんのう町勝浦字家六2094番地1'))
|
|
60
|
+
})
|
|
61
|
+
|
|
62
|
+
it('should not find Japanese Kanji numbers.', () => {
|
|
63
|
+
assert.deepEqual(0, findKanjiNumbers('栗沢町万字寿町').length)
|
|
64
|
+
assert.deepEqual(0, findKanjiNumbers('私は億ションに住んでいます').length)
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
it('should find mixed Japanese Kanji numbers.', () => {
|
|
68
|
+
assert.deepEqual([ '2千20', '十一', '二十' ], findKanjiNumbers('今日は2千20年十一月二十日です。'))
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
it('should find old Japanese Kanji numbers.', () => {
|
|
72
|
+
assert.deepEqual([ '壱', '弐' ], findKanjiNumbers('私が住んでいるのは壱番館の弐号室です。'))
|
|
73
|
+
assert.deepEqual([ '弍' ], findKanjiNumbers('私は、ハイツ弍号棟に住んでいます。'))
|
|
74
|
+
assert.deepEqual([ '壱阡陌拾壱兆壱億壱萬', ], findKanjiNumbers('私は、壱阡陌拾壱兆壱億壱萬円持っています。'))
|
|
75
|
+
assert.deepEqual([ '壱仟佰拾壱兆壱億壱萬', ], findKanjiNumbers('私は、壱仟佰拾壱兆壱億壱萬円持っています。'))
|
|
76
|
+
})
|
|
77
|
+
|
|
78
|
+
it('should convert mixed Japanese Kanji numbers to numbers.', () => {
|
|
79
|
+
assert.deepEqual(kanji2number('100万'), 1000000)
|
|
80
|
+
assert.deepEqual(kanji2number('5百'), 500)
|
|
81
|
+
assert.deepEqual(kanji2number('7十'), 70)
|
|
82
|
+
assert.deepEqual(kanji2number('4千8百'), 4800)
|
|
83
|
+
assert.deepEqual(kanji2number('4千8百万'), 48000000)
|
|
84
|
+
assert.deepEqual(kanji2number('3億4千8百万'), 348000000)
|
|
85
|
+
assert.deepEqual(kanji2number('3億4千8百万6'), 348000006)
|
|
86
|
+
assert.deepEqual(kanji2number('2百億'), 20000000000)
|
|
87
|
+
})
|
|
88
|
+
|
|
89
|
+
it('`4千8百21` should be converted to `4821`', () => {
|
|
90
|
+
assert.deepEqual(kanji2number('4千8百21'), 4821)
|
|
91
|
+
})
|
|
92
|
+
|
|
93
|
+
it('`1千2百億8百21` should be converted to `120000000821`', () => {
|
|
94
|
+
assert.deepEqual(kanji2number('1千2百35億8百21'), 123500000821)
|
|
95
|
+
})
|
|
96
|
+
|
|
97
|
+
it('`2億3千430万` should be converted to `234300000`', () => {
|
|
98
|
+
assert.deepEqual(kanji2number('2億3千430万'), 234300000)
|
|
99
|
+
})
|
|
100
|
+
|
|
101
|
+
it('`2億3千456万7890` should be converted to `234567890`', () => {
|
|
102
|
+
assert.deepEqual(kanji2number('2億3千456万7890'), 234567890)
|
|
103
|
+
})
|
|
104
|
+
|
|
105
|
+
it('`123` should be converted to `123`', () => {
|
|
106
|
+
assert.deepEqual(kanji2number('123'), 123)
|
|
107
|
+
})
|
|
108
|
+
|
|
109
|
+
// Issue #20: decimal Arabic-kanji numerals (e.g. 8.5万円, 25.24億円)
|
|
110
|
+
describe('findKanjiNumbers should recognize decimal Arabic-kanji numerals as single tokens', () => {
|
|
111
|
+
it('decimal + 万: common rent price like 8.5万円', () => {
|
|
112
|
+
assert.deepEqual(findKanjiNumbers('家賃は8.5万円です。'), ['8.5万'])
|
|
113
|
+
})
|
|
114
|
+
|
|
115
|
+
it('decimal + 億: property price like 25.24億円', () => {
|
|
116
|
+
assert.deepEqual(findKanjiNumbers('売上は25.24億円でした。'), ['25.24億'])
|
|
117
|
+
})
|
|
118
|
+
|
|
119
|
+
it('should handle precision in numbers like 1.20兆円', () => {
|
|
120
|
+
assert.deepEqual(findKanjiNumbers('予算は1.20兆円です。'), ['1.20兆'])
|
|
121
|
+
})
|
|
122
|
+
|
|
123
|
+
it('should not extract standalone decimals without kanji units', () => {
|
|
124
|
+
assert.deepEqual(findKanjiNumbers('値段は8.5です'), [])
|
|
125
|
+
})
|
|
126
|
+
|
|
127
|
+
it('full-width decimal: 8.5万 should be found as a single token', () => {
|
|
128
|
+
assert.deepEqual(findKanjiNumbers('家賃は8.5万円です。'), ['8.5万'])
|
|
129
|
+
})
|
|
130
|
+
|
|
131
|
+
it('should extract decimal rent and fee amounts written with small units', () => {
|
|
132
|
+
assert.deepEqual(findKanjiNumbers('家賃は8.5千円です。'), ['8.5千'])
|
|
133
|
+
assert.deepEqual(findKanjiNumbers('手数料は8.5百円です。'), ['8.5百'])
|
|
134
|
+
assert.deepEqual(findKanjiNumbers('割引は8.5十円です。'), ['8.5十'])
|
|
135
|
+
})
|
|
136
|
+
|
|
137
|
+
it('should still find kanji numerals in text like article numbers and mixed labels', () => {
|
|
138
|
+
assert.deepEqual(findKanjiNumbers('第2二十条'), ['二十'])
|
|
139
|
+
assert.deepEqual(findKanjiNumbers('3三'), ['三'])
|
|
140
|
+
})
|
|
141
|
+
|
|
142
|
+
it('should stop before broken decimal-large-unit strings from OCR or scraped text', () => {
|
|
143
|
+
assert.deepEqual(findKanjiNumbers('1千2.3億'), ['1千2'])
|
|
144
|
+
assert.deepEqual(findKanjiNumbers('1百2.3万'), ['1百2'])
|
|
145
|
+
})
|
|
146
|
+
})
|
|
147
|
+
|
|
148
|
+
describe('kanji2number should convert decimal Arabic-kanji numerals', () => {
|
|
149
|
+
it('decimal + 千/百/十: 8.5千 = 8,500, 8.5百 = 850, 8.5十 = 85', () => {
|
|
150
|
+
assert.deepEqual(kanji2number('8.5千'), 8500)
|
|
151
|
+
assert.deepEqual(kanji2number('8.5百'), 850)
|
|
152
|
+
assert.deepEqual(kanji2number('8.5十'), 85)
|
|
153
|
+
})
|
|
154
|
+
|
|
155
|
+
it('decimal + 万: 8.5万 = 85,000', () => {
|
|
156
|
+
assert.deepEqual(kanji2number('8.5万'), 85000)
|
|
157
|
+
})
|
|
158
|
+
|
|
159
|
+
it('decimal + 億: 25.24億 = 2,524,000,000', () => {
|
|
160
|
+
assert.deepEqual(kanji2number('25.24億'), 2524000000)
|
|
161
|
+
})
|
|
162
|
+
|
|
163
|
+
it('precision in numbers', () => {
|
|
164
|
+
assert.deepEqual(kanji2number('1.20兆'), 1200000000000)
|
|
165
|
+
})
|
|
166
|
+
|
|
167
|
+
it('should not suffer from floating-point drift: 0.29億 = 29,000,000', () => {
|
|
168
|
+
assert.deepEqual(kanji2number('0.29億'), 29000000)
|
|
169
|
+
})
|
|
170
|
+
|
|
171
|
+
it('full-width decimal: 8.5万 = 85,000', () => {
|
|
172
|
+
assert.deepEqual(kanji2number('8.5万'), 85000)
|
|
173
|
+
})
|
|
174
|
+
|
|
175
|
+
it('should support tiny but integral decimal coefficients like 0.0000000001兆', () => {
|
|
176
|
+
assert.deepEqual(kanji2number('0.0000000001兆'), 100)
|
|
177
|
+
})
|
|
178
|
+
|
|
179
|
+
it('should reject non-integral results like 0.00009万', () => {
|
|
180
|
+
assert.throws(() => kanji2number('0.00009万'), TypeError)
|
|
181
|
+
})
|
|
182
|
+
|
|
183
|
+
it('should reject high-precision non-integral results like 1.0000000001万', () => {
|
|
184
|
+
assert.throws(() => kanji2number('1.0000000001万'), TypeError)
|
|
185
|
+
})
|
|
186
|
+
|
|
187
|
+
it('should reject large-coefficient non-integral results like 10000000.00000001万', () => {
|
|
188
|
+
assert.throws(() => kanji2number('10000000.00000001万'), TypeError)
|
|
189
|
+
})
|
|
190
|
+
|
|
191
|
+
it('should still reject mixed decimal-small-unit hybrids like 1.2千3百', () => {
|
|
192
|
+
assert.throws(() => kanji2number('1.2千3百'), TypeError)
|
|
193
|
+
})
|
|
194
|
+
})
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
// https://github.com/geolonia/normalize-japanese-addresses/issues/94
|
|
198
|
+
it('should find Japanese Kanji number `六` in `香川県仲多度郡まんのう町勝浦字家六2094番地1`.', () => {
|
|
199
|
+
assert.deepEqual([ '六' ], findKanjiNumbers('香川県仲多度郡まんのう町勝浦字家六2094番地1'))
|
|
200
|
+
})
|
|
201
|
+
|
|
202
|
+
it('should find Japanese Kanji number in `今日は2千20年十一月二十日です。`.', () => {
|
|
203
|
+
assert.deepEqual([ '2千20', '十一', '二十' ], findKanjiNumbers('今日は2千20年十一月二十日です。'))
|
|
204
|
+
})
|
package/test/utils.ts
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { n2kan, kan2n } from '../src/utils'
|
|
2
|
+
import { assert } from 'chai'
|
|
3
|
+
|
|
4
|
+
describe('Tests for utils.', () => {
|
|
5
|
+
it('Small Japanese numeric should be parsed as numbers.', () => {
|
|
6
|
+
assert.deepEqual(n2kan(1111), '千百十一')
|
|
7
|
+
assert.deepEqual(n2kan(3111), '三千百十一')
|
|
8
|
+
assert.deepEqual(n2kan(1000), '千')
|
|
9
|
+
assert.deepEqual(n2kan(5), '五')
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
it('`kan2n()` should returns value as expected.', () => {
|
|
13
|
+
assert.deepEqual(kan2n('三千'), 3000)
|
|
14
|
+
assert.deepEqual(kan2n('22'), 22)
|
|
15
|
+
assert.deepEqual(kan2n('123'), 123)
|
|
16
|
+
})
|
|
17
|
+
});
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "esnext",
|
|
4
|
+
"module": "commonjs",
|
|
5
|
+
"moduleResolution": "node",
|
|
6
|
+
"strict": true,
|
|
7
|
+
"skipLibCheck": true,
|
|
8
|
+
"declaration": true,
|
|
9
|
+
"pretty": true,
|
|
10
|
+
"newLine": "lf",
|
|
11
|
+
"outDir": "dist"
|
|
12
|
+
},
|
|
13
|
+
"files": [
|
|
14
|
+
"./src/index.ts"
|
|
15
|
+
]
|
|
16
|
+
}
|