japanese_address_parser 2.2.0 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +4 -0
- data/CHANGELOG.md +15 -0
- data/Dockerfile +15 -0
- data/Gemfile.lock +25 -24
- data/README.md +8 -0
- data/docker-compose.yml +6 -0
- data/japanese_address_parser.gemspec +2 -2
- data/js/node_modules/.package-lock.json +7 -7
- data/js/node_modules/@geolonia/japanese-numeral/dist/index.js +1 -1
- data/js/node_modules/@geolonia/japanese-numeral/dist/oldJapaneseNumerics.js +1 -0
- data/js/node_modules/@geolonia/japanese-numeral/dist/utils.js +5 -1
- data/js/node_modules/@geolonia/japanese-numeral/package.json +1 -1
- data/js/node_modules/@geolonia/japanese-numeral/src/index.ts +1 -1
- data/js/node_modules/@geolonia/japanese-numeral/src/oldJapaneseNumerics.ts +1 -0
- data/js/node_modules/@geolonia/japanese-numeral/src/utils.ts +6 -1
- data/js/node_modules/@geolonia/japanese-numeral/test/test.ts +5 -0
- data/js/node_modules/@geolonia/normalize-japanese-addresses/README.md +21 -14
- data/js/node_modules/@geolonia/normalize-japanese-addresses/dist/config.d.ts +1 -5
- data/js/node_modules/@geolonia/normalize-japanese-addresses/dist/lib/cacheRegexes.d.ts +1 -0
- data/js/node_modules/@geolonia/normalize-japanese-addresses/dist/main-browser.d.ts +1 -1
- data/js/node_modules/@geolonia/normalize-japanese-addresses/dist/main-browser.js +94 -36
- data/js/node_modules/@geolonia/normalize-japanese-addresses/dist/main-node.d.ts +1 -1
- data/js/node_modules/@geolonia/normalize-japanese-addresses/dist/main-node.js +88 -35
- data/js/node_modules/@geolonia/normalize-japanese-addresses/dist/normalize.d.ts +13 -0
- data/js/node_modules/@geolonia/normalize-japanese-addresses/package.json +4 -4
- data/js/node_modules/@geolonia/normalize-japanese-addresses/src/config.ts +1 -6
- data/js/node_modules/@geolonia/normalize-japanese-addresses/src/lib/cacheRegexes.ts +45 -9
- data/js/node_modules/@geolonia/normalize-japanese-addresses/src/lib/dict.ts +2 -2
- data/js/node_modules/@geolonia/normalize-japanese-addresses/src/main-browser.ts +1 -2
- data/js/node_modules/@geolonia/normalize-japanese-addresses/src/main-node.ts +2 -3
- data/js/node_modules/@geolonia/normalize-japanese-addresses/src/normalize.ts +43 -15
- data/js/package-lock.json +14 -14
- data/lib/japanese_address_parser/data/01-01204.csv +1 -1
- data/lib/japanese_address_parser/data/01-01206.csv +2 -2
- data/lib/japanese_address_parser/data/01-01210.csv +1 -1
- data/lib/japanese_address_parser/data/01-01224.csv +1 -1
- data/lib/japanese_address_parser/data/01-01230.csv +1 -1
- data/lib/japanese_address_parser/data/01-01303.csv +1 -1
- data/lib/japanese_address_parser/data/01-01407.csv +3 -3
- data/lib/japanese_address_parser/data/01-01610.csv +1 -1
- data/lib/japanese_address_parser/data/01-01632.csv +1 -1
- data/lib/japanese_address_parser/data/01-01637.csv +1 -1
- data/lib/japanese_address_parser/data/01-01646.csv +1 -1
- data/lib/japanese_address_parser/data/01-01691.csv +1 -1
- data/lib/japanese_address_parser/data/03-03202.csv +1 -1
- data/lib/japanese_address_parser/data/03-03205.csv +29 -29
- data/lib/japanese_address_parser/data/03-03209.csv +56 -56
- data/lib/japanese_address_parser/data/03-03213.csv +1 -1
- data/lib/japanese_address_parser/data/03-03302.csv +1 -1
- data/lib/japanese_address_parser/data/03-03366.csv +1 -1
- data/lib/japanese_address_parser/data/03-03483.csv +1 -1
- data/lib/japanese_address_parser/data/03-03524.csv +1 -1
- data/lib/japanese_address_parser/data/04-04101.csv +6 -6
- data/lib/japanese_address_parser/data/04-04206.csv +32 -32
- data/lib/japanese_address_parser/data/04-04207.csv +41 -41
- data/lib/japanese_address_parser/data/06-06203.csv +1 -1
- data/lib/japanese_address_parser/data/07-07207.csv +1 -1
- data/lib/japanese_address_parser/data/07-07301.csv +2 -2
- data/lib/japanese_address_parser/data/11-11201.csv +3 -3
- data/lib/japanese_address_parser/data/12-12211.csv +1 -1
- data/lib/japanese_address_parser/data/12-12215.csv +2 -2
- data/lib/japanese_address_parser/data/13-13103.csv +2 -2
- data/lib/japanese_address_parser/data/13-13104.csv +3 -3
- data/lib/japanese_address_parser/data/13-13116.csv +5 -5
- data/lib/japanese_address_parser/data/14-14402.csv +2 -2
- data/lib/japanese_address_parser/data/15-15210.csv +5 -5
- data/lib/japanese_address_parser/data/15-15222.csv +4 -4
- data/lib/japanese_address_parser/data/15-15226.csv +1 -1
- data/lib/japanese_address_parser/data/16-16207.csv +1 -1
- data/lib/japanese_address_parser/data/16-16323.csv +2 -2
- data/lib/japanese_address_parser/data/17-17201.csv +26 -26
- data/lib/japanese_address_parser/data/17-17202.csv +1 -1
- data/lib/japanese_address_parser/data/17-17206.csv +29 -29
- data/lib/japanese_address_parser/data/19-19209.csv +1 -1
- data/lib/japanese_address_parser/data/19-19213.csv +2 -2
- data/lib/japanese_address_parser/data/20-20210.csv +1 -1
- data/lib/japanese_address_parser/data/20-20385.csv +14 -0
- data/lib/japanese_address_parser/data/20.csv +1 -13
- data/lib/japanese_address_parser/data/22-22135.csv +1 -1
- data/lib/japanese_address_parser/data/22-22209.csv +1 -1
- data/lib/japanese_address_parser/data/22-22210.csv +1 -1
- data/lib/japanese_address_parser/data/22-22211.csv +1 -1
- data/lib/japanese_address_parser/data/22-22325.csv +2 -2
- data/lib/japanese_address_parser/data/22-22341.csv +1 -1
- data/lib/japanese_address_parser/data/23-23105.csv +5 -5
- data/lib/japanese_address_parser/data/24-24205.csv +1 -1
- data/lib/japanese_address_parser/data/25-25202.csv +1 -1
- data/lib/japanese_address_parser/data/26-26102.csv +14 -14
- data/lib/japanese_address_parser/data/26-26103.csv +1 -1
- data/lib/japanese_address_parser/data/26-26104.csv +9 -9
- data/lib/japanese_address_parser/data/26-26105.csv +4 -4
- data/lib/japanese_address_parser/data/26-26106.csv +15 -15
- data/lib/japanese_address_parser/data/26-26107.csv +1 -1
- data/lib/japanese_address_parser/data/27-27145.csv +1 -1
- data/lib/japanese_address_parser/data/27-27202.csv +1 -1
- data/lib/japanese_address_parser/data/27-27208.csv +2 -2
- data/lib/japanese_address_parser/data/27-27211.csv +1 -1
- data/lib/japanese_address_parser/data/27-27227.csv +2 -2
- data/lib/japanese_address_parser/data/28-28109.csv +41 -41
- data/lib/japanese_address_parser/data/28-28201.csv +1 -1
- data/lib/japanese_address_parser/data/28-28204.csv +1 -1
- data/lib/japanese_address_parser/data/28-28208.csv +7 -7
- data/lib/japanese_address_parser/data/28-28217.csv +3 -3
- data/lib/japanese_address_parser/data/28-28226.csv +1 -1
- data/lib/japanese_address_parser/data/29-29201.csv +50 -50
- data/lib/japanese_address_parser/data/29-29203.csv +1 -1
- data/lib/japanese_address_parser/data/30-30304.csv +3 -3
- data/lib/japanese_address_parser/data/34-34210.csv +2 -2
- data/lib/japanese_address_parser/data/35-35201.csv +1 -1
- data/lib/japanese_address_parser/data/35-35213.csv +2 -2
- data/lib/japanese_address_parser/data/36-36201.csv +15 -15
- data/lib/japanese_address_parser/data/36-36202.csv +14 -14
- data/lib/japanese_address_parser/data/36-36207.csv +3 -3
- data/lib/japanese_address_parser/data/36-36208.csv +1 -1
- data/lib/japanese_address_parser/data/36-36468.csv +23 -23
- data/lib/japanese_address_parser/data/38-38202.csv +1 -1
- data/lib/japanese_address_parser/data/38-38203.csv +1 -1
- data/lib/japanese_address_parser/data/38-38210.csv +1 -1
- data/lib/japanese_address_parser/data/38-38422.csv +2 -2
- data/lib/japanese_address_parser/data/39-39206.csv +1 -1
- data/lib/japanese_address_parser/data/39-39210.csv +1 -1
- data/lib/japanese_address_parser/data/39-39212.csv +3 -3
- data/lib/japanese_address_parser/data/39-39386.csv +2 -2
- data/lib/japanese_address_parser/data/39-39412.csv +1 -1
- data/lib/japanese_address_parser/data/40-40131.csv +7 -7
- data/lib/japanese_address_parser/data/41-41201.csv +1 -1
- data/lib/japanese_address_parser/data/42-42214.csv +1 -1
- data/lib/japanese_address_parser/data/43-43101.csv +2 -2
- data/lib/japanese_address_parser/data/43-43204.csv +1 -1
- data/lib/japanese_address_parser/data/43-43205.csv +2 -2
- data/lib/japanese_address_parser/data/43-43215.csv +3 -3
- data/lib/japanese_address_parser/data/44-44204.csv +1 -1
- data/lib/japanese_address_parser/data/45-45201.csv +2 -2
- data/lib/japanese_address_parser/data/45-45202.csv +1 -1
- data/lib/japanese_address_parser/data/45-45203.csv +2 -2
- data/lib/japanese_address_parser/data/46-46201.csv +2 -2
- data/lib/japanese_address_parser/data/46-46210.csv +1 -1
- data/lib/japanese_address_parser/data/46-46215.csv +1 -1
- data/lib/japanese_address_parser/data/46-46216.csv +1 -1
- data/lib/japanese_address_parser/data/46-46217.csv +4 -4
- data/lib/japanese_address_parser/data/46-46218.csv +3 -3
- data/lib/japanese_address_parser/data/46-46221.csv +1 -1
- data/lib/japanese_address_parser/models/prefecture.rb +2 -13
- data/lib/japanese_address_parser/version.rb +1 -1
- metadata +10 -7
@@ -19,6 +19,17 @@
|
|
19
19
|
PERFORMANCE OF THIS SOFTWARE.
|
20
20
|
***************************************************************************** */
|
21
21
|
|
22
|
+
var __assign = function() {
|
23
|
+
__assign = Object.assign || function __assign(t) {
|
24
|
+
for (var s, i = 1, n = arguments.length; i < n; i++) {
|
25
|
+
s = arguments[i];
|
26
|
+
for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p)) t[p] = s[p];
|
27
|
+
}
|
28
|
+
return t;
|
29
|
+
};
|
30
|
+
return __assign.apply(this, arguments);
|
31
|
+
};
|
32
|
+
|
22
33
|
function __awaiter(thisArg, _arguments, P, generator) {
|
23
34
|
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
24
35
|
return new (P || (P = Promise))(function (resolve, reject) {
|
@@ -82,6 +93,7 @@
|
|
82
93
|
壱: '一',
|
83
94
|
壹: '一',
|
84
95
|
弐: '二',
|
96
|
+
弍: '二',
|
85
97
|
貳: '二',
|
86
98
|
貮: '二',
|
87
99
|
参: '三',
|
@@ -208,7 +220,11 @@
|
|
208
220
|
number = number + Number(kanji);
|
209
221
|
}
|
210
222
|
else {
|
211
|
-
|
223
|
+
for (let index = 0; index < kanji.length; index++) {
|
224
|
+
const char = kanji[index];
|
225
|
+
const digit = kanji.length - index - 1;
|
226
|
+
number = number + japaneseNumerics_1.default[char] * (10 ** digit);
|
227
|
+
}
|
212
228
|
}
|
213
229
|
}
|
214
230
|
return number;
|
@@ -308,7 +324,7 @@
|
|
308
324
|
}
|
309
325
|
exports.number2kanji = number2kanji;
|
310
326
|
function findKanjiNumbers(text) {
|
311
|
-
const num = '([0-90-9]*)|([
|
327
|
+
const num = '([0-90-9]*)|([〇一二三四五六七八九壱壹弐弍貳貮参參肆伍陸漆捌玖]*)';
|
312
328
|
const basePattern = `((${num})(千|阡|仟))?((${num})(百|陌|佰))?((${num})(十|拾))?(${num})?`;
|
313
329
|
const pattern = `((${basePattern}兆)?(${basePattern}億)?(${basePattern}(万|萬))?${basePattern})`;
|
314
330
|
const regex = new RegExp(pattern, 'g');
|
@@ -330,6 +346,11 @@
|
|
330
346
|
exports.findKanjiNumbers = findKanjiNumbers;
|
331
347
|
});
|
332
348
|
|
349
|
+
var currentConfig = {
|
350
|
+
japaneseAddressesApi: 'https://geolonia.github.io/japanese-addresses/api/ja',
|
351
|
+
townCacheSize: 1000,
|
352
|
+
};
|
353
|
+
|
333
354
|
var kan2num = function (string) {
|
334
355
|
var kanjiNumbers = dist.findKanjiNumbers(string);
|
335
356
|
for (var i = 0; i < kanjiNumbers.length; i++) {
|
@@ -380,8 +401,8 @@
|
|
380
401
|
};
|
381
402
|
|
382
403
|
// JIS 第2水準 => 第1水準 及び 旧字体 => 新字体
|
383
|
-
var JIS_OLD_KANJI = '
|
384
|
-
var JIS_NEW_KANJI = '
|
404
|
+
var JIS_OLD_KANJI = '亞,圍,壹,榮,驛,應,櫻,假,會,懷,覺,樂,陷,歡,氣,戲,據,挾,區,徑,溪,輕,藝,儉,圈,權,嚴,恆,國,齋,雜,蠶,殘,兒,實,釋,從,縱,敍,燒,條,剩,壤,釀,眞,盡,醉,髓,聲,竊,淺,錢,禪,爭,插,騷,屬,對,滯,擇,單,斷,癡,鑄,敕,鐵,傳,黨,鬪,屆,腦,廢,發,蠻,拂,邊,瓣,寶,沒,滿,藥,餘,樣,亂,兩,禮,靈,爐,灣,惡,醫,飮,營,圓,歐,奧,價,繪,擴,學,罐,勸,觀,歸,犧,擧,狹,驅,莖,經,繼,缺,劍,檢,顯,廣,鑛,碎,劑,參,慘,絲,辭,舍,壽,澁,肅,將,證,乘,疊,孃,觸,寢,圖,穗,樞,齊,攝,戰,潛,雙,莊,裝,藏,續,體,臺,澤,膽,彈,蟲,廳,鎭,點,燈,盜,獨,貳,霸,賣,髮,祕,佛,變,辯,豐,飜,默,與,譽,謠,覽,獵,勵,齡,勞,壓,爲,隱,衞,鹽,毆,穩,畫,壞,殼,嶽,卷,關,顏,僞,舊,峽,曉,勳,惠,螢,鷄,縣,險,獻,驗,效,號,濟,册,棧,贊,齒,濕,寫,收,獸,處,稱,奬,淨,繩,讓,囑,愼,粹,隨,數,靜,專,踐,纖,壯,搜,總,臟,墮,帶,瀧,擔,團,遲,晝,聽,遞,轉,當,稻,讀,惱,拜,麥,拔,濱,竝,辨,舖,襃,萬,譯,豫,搖,來,龍,壘,隸,戀,樓,鰺,鶯,蠣,攪,竈,灌,諫,頸,礦,蘂,靱,賤,壺,礪,檮,濤,邇,蠅,檜,儘,藪,籠,彌'.split(/,/);
|
405
|
+
var JIS_NEW_KANJI = '亜,囲,壱,栄,駅,応,桜,仮,会,懐,覚,楽,陥,歓,気,戯,拠,挟,区,径,渓,軽,芸,倹,圏,権,厳,恒,国,斎,雑,蚕,残,児,実,釈,従,縦,叙,焼,条,剰,壌,醸,真,尽,酔,髄,声,窃,浅,銭,禅,争,挿,騒,属,対,滞,択,単,断,痴,鋳,勅,鉄,伝,党,闘,届,脳,廃,発,蛮,払,辺,弁,宝,没,満,薬,余,様,乱,両,礼,霊,炉,湾,悪,医,飲,営,円,欧,奥,価,絵,拡,学,缶,勧,観,帰,犠,挙,狭,駆,茎,経,継,欠,剣,検,顕,広,鉱,砕,剤,参,惨,糸,辞,舎,寿,渋,粛,将,証,乗,畳,嬢,触,寝,図,穂,枢,斉,摂,戦,潜,双,荘,装,蔵,続,体,台,沢,胆,弾,虫,庁,鎮,点,灯,盗,独,弐,覇,売,髪,秘,仏,変,弁,豊,翻,黙,与,誉,謡,覧,猟,励,齢,労,圧,為,隠,衛,塩,殴,穏,画,壊,殻,岳,巻,関,顔,偽,旧,峡,暁,勲,恵,蛍,鶏,県,険,献,験,効,号,済,冊,桟,賛,歯,湿,写,収,獣,処,称,奨,浄,縄,譲,嘱,慎,粋,随,数,静,専,践,繊,壮,捜,総,臓,堕,帯,滝,担,団,遅,昼,聴,逓,転,当,稲,読,悩,拝,麦,抜,浜,並,弁,舗,褒,万,訳,予,揺,来,竜,塁,隷,恋,楼,鯵,鴬,蛎,撹,竃,潅,諌,頚,砿,蕊,靭,賎,壷,砺,梼,涛,迩,蝿,桧,侭,薮,篭,弥'.split(/,/);
|
385
406
|
var JIS_KANJI_REGEX_PATTERNS = JIS_OLD_KANJI.map(function (old, i) {
|
386
407
|
var pattern = old + "|" + JIS_NEW_KANJI[i];
|
387
408
|
return [pattern, old, JIS_NEW_KANJI[i]];
|
@@ -428,11 +449,6 @@
|
|
428
449
|
return _str;
|
429
450
|
};
|
430
451
|
|
431
|
-
var currentConfig = {
|
432
|
-
japaneseAddressesApi: 'https://geolonia.github.io/japanese-addresses/api/ja',
|
433
|
-
townCacheSize: 1000,
|
434
|
-
};
|
435
|
-
|
436
452
|
var iterator = function (Yallist) {
|
437
453
|
Yallist.prototype[Symbol.iterator] = function* () {
|
438
454
|
for (let walker = this.head; walker; walker = walker.next) {
|
@@ -1281,8 +1297,16 @@
|
|
1281
1297
|
}
|
1282
1298
|
});
|
1283
1299
|
}); };
|
1300
|
+
// 十六町 のように漢数字と町が連結しているか
|
1301
|
+
var isKanjiNumberFollewedByCho = function (targetTownName) {
|
1302
|
+
var xCho = targetTownName.match(/.町/g);
|
1303
|
+
if (!xCho)
|
1304
|
+
return false;
|
1305
|
+
var kanjiNumbers = dist.findKanjiNumbers(xCho[0]);
|
1306
|
+
return kanjiNumbers.length > 0;
|
1307
|
+
};
|
1284
1308
|
var getTownRegexPatterns = function (pref, city) { return __awaiter(void 0, void 0, void 0, function () {
|
1285
|
-
var cachedResult, towns, patterns;
|
1309
|
+
var cachedResult, pre_towns, townSet, towns, isKyoto, _i, pre_towns_1, town, originalTown, townAbbr, patterns;
|
1286
1310
|
return __generator(this, function (_a) {
|
1287
1311
|
switch (_a.label) {
|
1288
1312
|
case 0:
|
@@ -1290,11 +1314,32 @@
|
|
1290
1314
|
if (typeof cachedResult !== 'undefined') {
|
1291
1315
|
return [2 /*return*/, cachedResult];
|
1292
1316
|
}
|
1293
|
-
return [4 /*yield*/, getTowns(pref, city)
|
1294
|
-
// 少ない文字数の地名に対してミスマッチしないように文字の長さ順にソート
|
1295
|
-
];
|
1317
|
+
return [4 /*yield*/, getTowns(pref, city)];
|
1296
1318
|
case 1:
|
1297
|
-
|
1319
|
+
pre_towns = _a.sent();
|
1320
|
+
townSet = new Set(pre_towns.map(function (town) { return town.town; }));
|
1321
|
+
towns = [];
|
1322
|
+
isKyoto = city.match(/^京都市/);
|
1323
|
+
// 町丁目に「○○町」が含まれるケースへの対応
|
1324
|
+
// 通常は「○○町」のうち「町」の省略を許容し同義語として扱うが、まれに自治体内に「○○町」と「○○」が共存しているケースがある。
|
1325
|
+
// この場合は町の省略は許容せず、入力された住所は書き分けられているものとして正規化を行う。
|
1326
|
+
// 更に、「愛知県名古屋市瑞穂区十六町1丁目」漢数字を含むケースだと丁目や番地・号の正規化が不可能になる。このようなケースも除外。
|
1327
|
+
for (_i = 0, pre_towns_1 = pre_towns; _i < pre_towns_1.length; _i++) {
|
1328
|
+
town = pre_towns_1[_i];
|
1329
|
+
towns.push(town);
|
1330
|
+
originalTown = town.town;
|
1331
|
+
if (originalTown.indexOf('町') === -1)
|
1332
|
+
continue;
|
1333
|
+
townAbbr = originalTown.replace(/(?!^町)町/g, '') // NOTE: 冒頭の「町」は明らかに省略するべきではないので、除外
|
1334
|
+
;
|
1335
|
+
if (!isKyoto && // 京都は通り名削除の処理があるため、意図しないマッチになるケースがある。これを除く
|
1336
|
+
!townSet.has(townAbbr) &&
|
1337
|
+
!townSet.has("\u5927\u5B57" + townAbbr) && // 大字は省略されるため、大字〇〇と〇〇町がコンフリクトする。このケースを除外
|
1338
|
+
!isKanjiNumberFollewedByCho(originalTown)) {
|
1339
|
+
// エイリアスとして町なしのパターンを登録
|
1340
|
+
towns.push(__assign(__assign({}, town), { originalTown: originalTown, town: townAbbr }));
|
1341
|
+
}
|
1342
|
+
}
|
1298
1343
|
// 少ない文字数の地名に対してミスマッチしないように文字の長さ順にソート
|
1299
1344
|
towns.sort(function (a, b) {
|
1300
1345
|
var aLen = a.town.length;
|
@@ -1335,12 +1380,7 @@
|
|
1335
1380
|
var _pattern = "(" + patterns.join('|') + ")((\u4E01|\u753A)\u76EE?|\u756A(\u753A|\u4E01)|\u6761|\u8ED2|\u7DDA|\u306E\u753A?|\u5730\u5272|\u53F7|[-\uFF0D\uFE63\u2212\u2010\u2043\u2011\u2012\u2013\u2014\uFE58\u2015\u23AF\u23E4\u30FC\uFF70\u2500\u2501])";
|
1336
1381
|
return _pattern; // デバッグのときにめんどくさいので変数に入れる。
|
1337
1382
|
}));
|
1338
|
-
|
1339
|
-
return [town, ".*" + pattern];
|
1340
|
-
}
|
1341
|
-
else {
|
1342
|
-
return [town, "^" + pattern];
|
1343
|
-
}
|
1383
|
+
return [town, pattern];
|
1344
1384
|
});
|
1345
1385
|
cachedTownRegexes.set(pref + "-" + city, patterns);
|
1346
1386
|
return [2 /*return*/, patterns];
|
@@ -1383,35 +1423,48 @@
|
|
1383
1423
|
|
1384
1424
|
var browser = self.fetch || (self.fetch = require$$0.default || require$$0);
|
1385
1425
|
|
1426
|
+
var config$1 = currentConfig;
|
1386
1427
|
var defaultOption = {
|
1387
1428
|
level: 3,
|
1388
1429
|
};
|
1430
|
+
/**
|
1431
|
+
* @internal
|
1432
|
+
*/
|
1389
1433
|
var __internals = {
|
1390
1434
|
// default fetch
|
1391
1435
|
fetch: function (input) {
|
1392
|
-
var fileURL = new URL("" +
|
1436
|
+
var fileURL = new URL("" + config$1.japaneseAddressesApi + input);
|
1393
1437
|
return browser(fileURL.toString());
|
1394
1438
|
},
|
1395
1439
|
};
|
1396
1440
|
var normalizeTownName = function (addr, pref, city) { return __awaiter(void 0, void 0, void 0, function () {
|
1397
|
-
var townPatterns,
|
1398
|
-
return __generator(this, function (
|
1399
|
-
switch (
|
1441
|
+
var townPatterns, regexPrefixes, _i, regexPrefixes_1, regexPrefix, _a, townPatterns_1, _b, town, pattern, regex, match;
|
1442
|
+
return __generator(this, function (_c) {
|
1443
|
+
switch (_c.label) {
|
1400
1444
|
case 0:
|
1401
1445
|
addr = addr.trim().replace(/^大字/, '');
|
1402
1446
|
return [4 /*yield*/, getTownRegexPatterns(pref, city)];
|
1403
1447
|
case 1:
|
1404
|
-
townPatterns =
|
1405
|
-
|
1406
|
-
|
1407
|
-
|
1408
|
-
|
1409
|
-
|
1410
|
-
|
1411
|
-
|
1412
|
-
|
1413
|
-
|
1414
|
-
|
1448
|
+
townPatterns = _c.sent();
|
1449
|
+
regexPrefixes = ['^'];
|
1450
|
+
if (city.match(/^京都市/)) {
|
1451
|
+
// 京都は通り名削除のために後方一致を使う
|
1452
|
+
regexPrefixes.push('.*');
|
1453
|
+
}
|
1454
|
+
for (_i = 0, regexPrefixes_1 = regexPrefixes; _i < regexPrefixes_1.length; _i++) {
|
1455
|
+
regexPrefix = regexPrefixes_1[_i];
|
1456
|
+
for (_a = 0, townPatterns_1 = townPatterns; _a < townPatterns_1.length; _a++) {
|
1457
|
+
_b = townPatterns_1[_a], town = _b[0], pattern = _b[1];
|
1458
|
+
regex = new RegExp("" + regexPrefix + pattern);
|
1459
|
+
match = addr.match(regex);
|
1460
|
+
if (match) {
|
1461
|
+
return [2 /*return*/, {
|
1462
|
+
town: town.originalTown || town.town,
|
1463
|
+
addr: addr.substr(match[0].length),
|
1464
|
+
lat: town.lat,
|
1465
|
+
lng: town.lng,
|
1466
|
+
}];
|
1467
|
+
}
|
1415
1468
|
}
|
1416
1469
|
}
|
1417
1470
|
return [2 /*return*/];
|
@@ -1426,6 +1479,7 @@
|
|
1426
1479
|
switch (_e.label) {
|
1427
1480
|
case 0:
|
1428
1481
|
addr = address
|
1482
|
+
.normalize('NFC')
|
1429
1483
|
.replace(/ /g, ' ')
|
1430
1484
|
.replace(/ +/g, ' ')
|
1431
1485
|
.replace(/([0-9A-Za-z]+)/g, function (match) {
|
@@ -1533,6 +1587,10 @@
|
|
1533
1587
|
addr = normalized.addr;
|
1534
1588
|
lat = parseFloat(normalized.lat);
|
1535
1589
|
lng = parseFloat(normalized.lng);
|
1590
|
+
if (Number.isNaN(lat) || Number.isNaN(lng)) {
|
1591
|
+
lat = null;
|
1592
|
+
lng = null;
|
1593
|
+
}
|
1536
1594
|
}
|
1537
1595
|
addr = addr
|
1538
1596
|
.replace(/^-/, '')
|
@@ -1591,7 +1649,7 @@
|
|
1591
1649
|
});
|
1592
1650
|
};
|
1593
1651
|
|
1594
|
-
var config =
|
1652
|
+
var config = config$1;
|
1595
1653
|
var normalize = normalize$1;
|
1596
1654
|
|
1597
1655
|
exports.config = config;
|
@@ -27,6 +27,17 @@ OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
|
27
27
|
PERFORMANCE OF THIS SOFTWARE.
|
28
28
|
***************************************************************************** */
|
29
29
|
|
30
|
+
var __assign = function() {
|
31
|
+
__assign = Object.assign || function __assign(t) {
|
32
|
+
for (var s, i = 1, n = arguments.length; i < n; i++) {
|
33
|
+
s = arguments[i];
|
34
|
+
for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p)) t[p] = s[p];
|
35
|
+
}
|
36
|
+
return t;
|
37
|
+
};
|
38
|
+
return __assign.apply(this, arguments);
|
39
|
+
};
|
40
|
+
|
30
41
|
function __awaiter(thisArg, _arguments, P, generator) {
|
31
42
|
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
32
43
|
return new (P || (P = Promise))(function (resolve, reject) {
|
@@ -65,6 +76,11 @@ function __generator(thisArg, body) {
|
|
65
76
|
}
|
66
77
|
}
|
67
78
|
|
79
|
+
var currentConfig = {
|
80
|
+
japaneseAddressesApi: 'https://geolonia.github.io/japanese-addresses/api/ja',
|
81
|
+
townCacheSize: 1000,
|
82
|
+
};
|
83
|
+
|
68
84
|
var kan2num = function (string) {
|
69
85
|
var kanjiNumbers = japaneseNumeral.findKanjiNumbers(string);
|
70
86
|
for (var i = 0; i < kanjiNumbers.length; i++) {
|
@@ -115,8 +131,8 @@ var patchAddr = function (pref, city, town, addr) {
|
|
115
131
|
};
|
116
132
|
|
117
133
|
// JIS 第2水準 => 第1水準 及び 旧字体 => 新字体
|
118
|
-
var JIS_OLD_KANJI = '
|
119
|
-
var JIS_NEW_KANJI = '
|
134
|
+
var JIS_OLD_KANJI = '亞,圍,壹,榮,驛,應,櫻,假,會,懷,覺,樂,陷,歡,氣,戲,據,挾,區,徑,溪,輕,藝,儉,圈,權,嚴,恆,國,齋,雜,蠶,殘,兒,實,釋,從,縱,敍,燒,條,剩,壤,釀,眞,盡,醉,髓,聲,竊,淺,錢,禪,爭,插,騷,屬,對,滯,擇,單,斷,癡,鑄,敕,鐵,傳,黨,鬪,屆,腦,廢,發,蠻,拂,邊,瓣,寶,沒,滿,藥,餘,樣,亂,兩,禮,靈,爐,灣,惡,醫,飮,營,圓,歐,奧,價,繪,擴,學,罐,勸,觀,歸,犧,擧,狹,驅,莖,經,繼,缺,劍,檢,顯,廣,鑛,碎,劑,參,慘,絲,辭,舍,壽,澁,肅,將,證,乘,疊,孃,觸,寢,圖,穗,樞,齊,攝,戰,潛,雙,莊,裝,藏,續,體,臺,澤,膽,彈,蟲,廳,鎭,點,燈,盜,獨,貳,霸,賣,髮,祕,佛,變,辯,豐,飜,默,與,譽,謠,覽,獵,勵,齡,勞,壓,爲,隱,衞,鹽,毆,穩,畫,壞,殼,嶽,卷,關,顏,僞,舊,峽,曉,勳,惠,螢,鷄,縣,險,獻,驗,效,號,濟,册,棧,贊,齒,濕,寫,收,獸,處,稱,奬,淨,繩,讓,囑,愼,粹,隨,數,靜,專,踐,纖,壯,搜,總,臟,墮,帶,瀧,擔,團,遲,晝,聽,遞,轉,當,稻,讀,惱,拜,麥,拔,濱,竝,辨,舖,襃,萬,譯,豫,搖,來,龍,壘,隸,戀,樓,鰺,鶯,蠣,攪,竈,灌,諫,頸,礦,蘂,靱,賤,壺,礪,檮,濤,邇,蠅,檜,儘,藪,籠,彌'.split(/,/);
|
135
|
+
var JIS_NEW_KANJI = '亜,囲,壱,栄,駅,応,桜,仮,会,懐,覚,楽,陥,歓,気,戯,拠,挟,区,径,渓,軽,芸,倹,圏,権,厳,恒,国,斎,雑,蚕,残,児,実,釈,従,縦,叙,焼,条,剰,壌,醸,真,尽,酔,髄,声,窃,浅,銭,禅,争,挿,騒,属,対,滞,択,単,断,痴,鋳,勅,鉄,伝,党,闘,届,脳,廃,発,蛮,払,辺,弁,宝,没,満,薬,余,様,乱,両,礼,霊,炉,湾,悪,医,飲,営,円,欧,奥,価,絵,拡,学,缶,勧,観,帰,犠,挙,狭,駆,茎,経,継,欠,剣,検,顕,広,鉱,砕,剤,参,惨,糸,辞,舎,寿,渋,粛,将,証,乗,畳,嬢,触,寝,図,穂,枢,斉,摂,戦,潜,双,荘,装,蔵,続,体,台,沢,胆,弾,虫,庁,鎮,点,灯,盗,独,弐,覇,売,髪,秘,仏,変,弁,豊,翻,黙,与,誉,謡,覧,猟,励,齢,労,圧,為,隠,衛,塩,殴,穏,画,壊,殻,岳,巻,関,顔,偽,旧,峡,暁,勲,恵,蛍,鶏,県,険,献,験,効,号,済,冊,桟,賛,歯,湿,写,収,獣,処,称,奨,浄,縄,譲,嘱,慎,粋,随,数,静,専,践,繊,壮,捜,総,臓,堕,帯,滝,担,団,遅,昼,聴,逓,転,当,稲,読,悩,拝,麦,抜,浜,並,弁,舗,褒,万,訳,予,揺,来,竜,塁,隷,恋,楼,鯵,鴬,蛎,撹,竃,潅,諌,頚,砿,蕊,靭,賎,壷,砺,梼,涛,迩,蝿,桧,侭,薮,篭,弥'.split(/,/);
|
120
136
|
var JIS_KANJI_REGEX_PATTERNS = JIS_OLD_KANJI.map(function (old, i) {
|
121
137
|
var pattern = old + "|" + JIS_NEW_KANJI[i];
|
122
138
|
return [pattern, old, JIS_NEW_KANJI[i]];
|
@@ -163,11 +179,6 @@ var toRegexPattern = function (string) {
|
|
163
179
|
return _str;
|
164
180
|
};
|
165
181
|
|
166
|
-
var currentConfig = {
|
167
|
-
japaneseAddressesApi: 'https://geolonia.github.io/japanese-addresses/api/ja',
|
168
|
-
townCacheSize: 1000,
|
169
|
-
};
|
170
|
-
|
171
182
|
var cachedTownRegexes = new LRU__default['default']({
|
172
183
|
max: currentConfig.townCacheSize,
|
173
184
|
maxAge: 60 * 60 * 24 * 7 * 1000, // 7日間
|
@@ -249,8 +260,16 @@ var getTowns = function (pref, city) { return __awaiter(void 0, void 0, void 0,
|
|
249
260
|
}
|
250
261
|
});
|
251
262
|
}); };
|
263
|
+
// 十六町 のように漢数字と町が連結しているか
|
264
|
+
var isKanjiNumberFollewedByCho = function (targetTownName) {
|
265
|
+
var xCho = targetTownName.match(/.町/g);
|
266
|
+
if (!xCho)
|
267
|
+
return false;
|
268
|
+
var kanjiNumbers = japaneseNumeral.findKanjiNumbers(xCho[0]);
|
269
|
+
return kanjiNumbers.length > 0;
|
270
|
+
};
|
252
271
|
var getTownRegexPatterns = function (pref, city) { return __awaiter(void 0, void 0, void 0, function () {
|
253
|
-
var cachedResult, towns, patterns;
|
272
|
+
var cachedResult, pre_towns, townSet, towns, isKyoto, _i, pre_towns_1, town, originalTown, townAbbr, patterns;
|
254
273
|
return __generator(this, function (_a) {
|
255
274
|
switch (_a.label) {
|
256
275
|
case 0:
|
@@ -258,11 +277,32 @@ var getTownRegexPatterns = function (pref, city) { return __awaiter(void 0, void
|
|
258
277
|
if (typeof cachedResult !== 'undefined') {
|
259
278
|
return [2 /*return*/, cachedResult];
|
260
279
|
}
|
261
|
-
return [4 /*yield*/, getTowns(pref, city)
|
262
|
-
// 少ない文字数の地名に対してミスマッチしないように文字の長さ順にソート
|
263
|
-
];
|
280
|
+
return [4 /*yield*/, getTowns(pref, city)];
|
264
281
|
case 1:
|
265
|
-
|
282
|
+
pre_towns = _a.sent();
|
283
|
+
townSet = new Set(pre_towns.map(function (town) { return town.town; }));
|
284
|
+
towns = [];
|
285
|
+
isKyoto = city.match(/^京都市/);
|
286
|
+
// 町丁目に「○○町」が含まれるケースへの対応
|
287
|
+
// 通常は「○○町」のうち「町」の省略を許容し同義語として扱うが、まれに自治体内に「○○町」と「○○」が共存しているケースがある。
|
288
|
+
// この場合は町の省略は許容せず、入力された住所は書き分けられているものとして正規化を行う。
|
289
|
+
// 更に、「愛知県名古屋市瑞穂区十六町1丁目」漢数字を含むケースだと丁目や番地・号の正規化が不可能になる。このようなケースも除外。
|
290
|
+
for (_i = 0, pre_towns_1 = pre_towns; _i < pre_towns_1.length; _i++) {
|
291
|
+
town = pre_towns_1[_i];
|
292
|
+
towns.push(town);
|
293
|
+
originalTown = town.town;
|
294
|
+
if (originalTown.indexOf('町') === -1)
|
295
|
+
continue;
|
296
|
+
townAbbr = originalTown.replace(/(?!^町)町/g, '') // NOTE: 冒頭の「町」は明らかに省略するべきではないので、除外
|
297
|
+
;
|
298
|
+
if (!isKyoto && // 京都は通り名削除の処理があるため、意図しないマッチになるケースがある。これを除く
|
299
|
+
!townSet.has(townAbbr) &&
|
300
|
+
!townSet.has("\u5927\u5B57" + townAbbr) && // 大字は省略されるため、大字〇〇と〇〇町がコンフリクトする。このケースを除外
|
301
|
+
!isKanjiNumberFollewedByCho(originalTown)) {
|
302
|
+
// エイリアスとして町なしのパターンを登録
|
303
|
+
towns.push(__assign(__assign({}, town), { originalTown: originalTown, town: townAbbr }));
|
304
|
+
}
|
305
|
+
}
|
266
306
|
// 少ない文字数の地名に対してミスマッチしないように文字の長さ順にソート
|
267
307
|
towns.sort(function (a, b) {
|
268
308
|
var aLen = a.town.length;
|
@@ -303,12 +343,7 @@ var getTownRegexPatterns = function (pref, city) { return __awaiter(void 0, void
|
|
303
343
|
var _pattern = "(" + patterns.join('|') + ")((\u4E01|\u753A)\u76EE?|\u756A(\u753A|\u4E01)|\u6761|\u8ED2|\u7DDA|\u306E\u753A?|\u5730\u5272|\u53F7|[-\uFF0D\uFE63\u2212\u2010\u2043\u2011\u2012\u2013\u2014\uFE58\u2015\u23AF\u23E4\u30FC\uFF70\u2500\u2501])";
|
304
344
|
return _pattern; // デバッグのときにめんどくさいので変数に入れる。
|
305
345
|
}));
|
306
|
-
|
307
|
-
return [town, ".*" + pattern];
|
308
|
-
}
|
309
|
-
else {
|
310
|
-
return [town, "^" + pattern];
|
311
|
-
}
|
346
|
+
return [town, pattern];
|
312
347
|
});
|
313
348
|
cachedTownRegexes.set(pref + "-" + city, patterns);
|
314
349
|
return [2 /*return*/, patterns];
|
@@ -340,35 +375,48 @@ var getSameNamedPrefectureCityRegexPatterns = function (prefs, prefList) {
|
|
340
375
|
return cachedSameNamedPrefectureCityRegexPatterns;
|
341
376
|
};
|
342
377
|
|
378
|
+
var config$1 = currentConfig;
|
343
379
|
var defaultOption = {
|
344
380
|
level: 3,
|
345
381
|
};
|
382
|
+
/**
|
383
|
+
* @internal
|
384
|
+
*/
|
346
385
|
var __internals = {
|
347
386
|
// default fetch
|
348
387
|
fetch: function (input) {
|
349
|
-
var fileURL = new URL("" +
|
388
|
+
var fileURL = new URL("" + config$1.japaneseAddressesApi + input);
|
350
389
|
return unfetch__default['default'](fileURL.toString());
|
351
390
|
},
|
352
391
|
};
|
353
392
|
var normalizeTownName = function (addr, pref, city) { return __awaiter(void 0, void 0, void 0, function () {
|
354
|
-
var townPatterns,
|
355
|
-
return __generator(this, function (
|
356
|
-
switch (
|
393
|
+
var townPatterns, regexPrefixes, _i, regexPrefixes_1, regexPrefix, _a, townPatterns_1, _b, town, pattern, regex, match;
|
394
|
+
return __generator(this, function (_c) {
|
395
|
+
switch (_c.label) {
|
357
396
|
case 0:
|
358
397
|
addr = addr.trim().replace(/^大字/, '');
|
359
398
|
return [4 /*yield*/, getTownRegexPatterns(pref, city)];
|
360
399
|
case 1:
|
361
|
-
townPatterns =
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
400
|
+
townPatterns = _c.sent();
|
401
|
+
regexPrefixes = ['^'];
|
402
|
+
if (city.match(/^京都市/)) {
|
403
|
+
// 京都は通り名削除のために後方一致を使う
|
404
|
+
regexPrefixes.push('.*');
|
405
|
+
}
|
406
|
+
for (_i = 0, regexPrefixes_1 = regexPrefixes; _i < regexPrefixes_1.length; _i++) {
|
407
|
+
regexPrefix = regexPrefixes_1[_i];
|
408
|
+
for (_a = 0, townPatterns_1 = townPatterns; _a < townPatterns_1.length; _a++) {
|
409
|
+
_b = townPatterns_1[_a], town = _b[0], pattern = _b[1];
|
410
|
+
regex = new RegExp("" + regexPrefix + pattern);
|
411
|
+
match = addr.match(regex);
|
412
|
+
if (match) {
|
413
|
+
return [2 /*return*/, {
|
414
|
+
town: town.originalTown || town.town,
|
415
|
+
addr: addr.substr(match[0].length),
|
416
|
+
lat: town.lat,
|
417
|
+
lng: town.lng,
|
418
|
+
}];
|
419
|
+
}
|
372
420
|
}
|
373
421
|
}
|
374
422
|
return [2 /*return*/];
|
@@ -383,6 +431,7 @@ var normalize$1 = function (address, option) {
|
|
383
431
|
switch (_e.label) {
|
384
432
|
case 0:
|
385
433
|
addr = address
|
434
|
+
.normalize('NFC')
|
386
435
|
.replace(/ /g, ' ')
|
387
436
|
.replace(/ +/g, ' ')
|
388
437
|
.replace(/([0-9A-Za-z]+)/g, function (match) {
|
@@ -490,6 +539,10 @@ var normalize$1 = function (address, option) {
|
|
490
539
|
addr = normalized.addr;
|
491
540
|
lat = parseFloat(normalized.lat);
|
492
541
|
lng = parseFloat(normalized.lng);
|
542
|
+
if (Number.isNaN(lat) || Number.isNaN(lng)) {
|
543
|
+
lat = null;
|
544
|
+
lng = null;
|
545
|
+
}
|
493
546
|
}
|
494
547
|
addr = addr
|
495
548
|
.replace(/^-/, '')
|
@@ -551,7 +604,7 @@ var normalize$1 = function (address, option) {
|
|
551
604
|
var fetchOrReadFile = function (input) { return __awaiter(void 0, void 0, void 0, function () {
|
552
605
|
var fileURL, filePath_1;
|
553
606
|
return __generator(this, function (_a) {
|
554
|
-
fileURL = new URL("" +
|
607
|
+
fileURL = new URL("" + config$1.japaneseAddressesApi + input);
|
555
608
|
if (fileURL.protocol === 'http:' || fileURL.protocol === 'https:') {
|
556
609
|
return [2 /*return*/, unfetch__default['default'](fileURL.toString())];
|
557
610
|
}
|
@@ -577,7 +630,7 @@ var fetchOrReadFile = function (input) { return __awaiter(void 0, void 0, void 0
|
|
577
630
|
});
|
578
631
|
}); };
|
579
632
|
__internals.fetch = fetchOrReadFile;
|
580
|
-
var config =
|
633
|
+
var config = config$1;
|
581
634
|
var normalize = normalize$1;
|
582
635
|
|
583
636
|
exports.config = config;
|
@@ -1,3 +1,13 @@
|
|
1
|
+
/**
|
2
|
+
* normalize {@link Normalizer} の動作オプション。
|
3
|
+
*/
|
4
|
+
export interface Config {
|
5
|
+
/** 住所データを URL 形式で指定。 file:// 形式で指定するとローカルファイルを参照できます。 */
|
6
|
+
japaneseAddressesApi: string;
|
7
|
+
/** 町丁目のデータを何件までキャッシュするか。デフォルト 1,000 */
|
8
|
+
townCacheSize: number;
|
9
|
+
}
|
10
|
+
export declare const config: Config;
|
1
11
|
/**
|
2
12
|
* 住所の正規化結果として戻されるオブジェクト
|
3
13
|
*/
|
@@ -48,6 +58,9 @@ export declare type Normalizer = (input: string, option?: Option) => Promise<Nor
|
|
48
58
|
export declare type FetchLike = (input: string) => Promise<Response | {
|
49
59
|
json: () => Promise<unknown>;
|
50
60
|
}>;
|
61
|
+
/**
|
62
|
+
* @internal
|
63
|
+
*/
|
51
64
|
export declare const __internals: {
|
52
65
|
fetch: FetchLike;
|
53
66
|
};
|
@@ -1,17 +1,17 @@
|
|
1
1
|
{
|
2
2
|
"name": "@geolonia/normalize-japanese-addresses",
|
3
|
-
"version": "2.5.
|
3
|
+
"version": "2.5.8",
|
4
4
|
"description": "",
|
5
5
|
"browser": "./dist/main-browser.js",
|
6
6
|
"main": "./dist/main-node.js",
|
7
|
-
"types": "./dist/
|
7
|
+
"types": "./dist/normalize.d.ts",
|
8
8
|
"scripts": {
|
9
9
|
"test": "npm run test:main && npm run test:addresses && npm run test:node",
|
10
10
|
"test:main": "jest test/main.test.ts",
|
11
11
|
"test:addresses": "jest test/addresses.test.ts",
|
12
12
|
"test:node": "curl -sL https://github.com/geolonia/japanese-addresses/archive/refs/heads/master.tar.gz | tar xvfz - -C ./test > /dev/null 2>&1 && jest test/fs.test.ts",
|
13
13
|
"test:generate-test-data": "npx ts-node -O '{\"module\":\"commonjs\"}' test/build-test-data.ts > test/addresses.csv",
|
14
|
-
"lint": "eslint \"src/**/*.ts\" --fix",
|
14
|
+
"lint": "eslint \"src/**/*.ts\" \"test/**/*.test.ts\" --fix",
|
15
15
|
"build": "rm -rf ./dist && rollup -c ./rollup.config.ts"
|
16
16
|
},
|
17
17
|
"author": "",
|
@@ -51,7 +51,7 @@
|
|
51
51
|
}
|
52
52
|
},
|
53
53
|
"dependencies": {
|
54
|
-
"@geolonia/japanese-numeral": "^0.1.
|
54
|
+
"@geolonia/japanese-numeral": "^0.1.16",
|
55
55
|
"isomorphic-unfetch": "^3.1.0",
|
56
56
|
"lru-cache": "^6.0.0"
|
57
57
|
}
|
@@ -1,9 +1,4 @@
|
|
1
|
-
|
2
|
-
japaneseAddressesApi: string
|
3
|
-
|
4
|
-
/** 町丁目のデータを何件までキャッシュするか。デフォルト 1,000 */
|
5
|
-
townCacheSize: number
|
6
|
-
}
|
1
|
+
import { Config } from './normalize'
|
7
2
|
|
8
3
|
export const currentConfig: Config = {
|
9
4
|
japaneseAddressesApi: 'https://geolonia.github.io/japanese-addresses/api/ja',
|
@@ -1,12 +1,14 @@
|
|
1
1
|
import { toRegexPattern } from './dict'
|
2
2
|
import { kan2num } from './kan2num'
|
3
|
-
import { currentConfig } from '../config'
|
4
3
|
import LRU from 'lru-cache'
|
4
|
+
import { currentConfig } from '../config'
|
5
5
|
import { __internals } from '../normalize'
|
6
|
+
import { findKanjiNumbers } from '@geolonia/japanese-numeral'
|
6
7
|
|
7
8
|
type PrefectureList = { [key: string]: string[] }
|
8
9
|
interface SingleTown {
|
9
10
|
town: string
|
11
|
+
originalTown?: string
|
10
12
|
koaza: string
|
11
13
|
lat: string
|
12
14
|
lng: string
|
@@ -22,8 +24,9 @@ let cachedPrefecturePatterns: [string, string][] | undefined = undefined
|
|
22
24
|
const cachedCityPatterns: { [key: string]: [string, string][] } = {}
|
23
25
|
let cachedPrefectures: PrefectureList | undefined = undefined
|
24
26
|
const cachedTowns: { [key: string]: TownList } = {}
|
25
|
-
let cachedSameNamedPrefectureCityRegexPatterns:
|
26
|
-
|
27
|
+
let cachedSameNamedPrefectureCityRegexPatterns:
|
28
|
+
| [string, string][]
|
29
|
+
| undefined = undefined
|
27
30
|
|
28
31
|
export const getPrefectures = async () => {
|
29
32
|
if (typeof cachedPrefectures !== 'undefined') {
|
@@ -90,13 +93,50 @@ export const getTowns = async (pref: string, city: string) => {
|
|
90
93
|
return (cachedTowns[cacheKey] = towns)
|
91
94
|
}
|
92
95
|
|
96
|
+
// 十六町 のように漢数字と町が連結しているか
|
97
|
+
const isKanjiNumberFollewedByCho = (targetTownName: string) => {
|
98
|
+
const xCho = targetTownName.match(/.町/g)
|
99
|
+
if (!xCho) return false
|
100
|
+
const kanjiNumbers = findKanjiNumbers(xCho[0])
|
101
|
+
return kanjiNumbers.length > 0
|
102
|
+
}
|
103
|
+
|
93
104
|
export const getTownRegexPatterns = async (pref: string, city: string) => {
|
94
105
|
const cachedResult = cachedTownRegexes.get(`${pref}-${city}`)
|
95
106
|
if (typeof cachedResult !== 'undefined') {
|
96
107
|
return cachedResult
|
97
108
|
}
|
98
109
|
|
99
|
-
const
|
110
|
+
const pre_towns = await getTowns(pref, city)
|
111
|
+
const townSet = new Set(pre_towns.map((town) => town.town))
|
112
|
+
const towns = []
|
113
|
+
|
114
|
+
const isKyoto = city.match(/^京都市/)
|
115
|
+
|
116
|
+
// 町丁目に「○○町」が含まれるケースへの対応
|
117
|
+
// 通常は「○○町」のうち「町」の省略を許容し同義語として扱うが、まれに自治体内に「○○町」と「○○」が共存しているケースがある。
|
118
|
+
// この場合は町の省略は許容せず、入力された住所は書き分けられているものとして正規化を行う。
|
119
|
+
// 更に、「愛知県名古屋市瑞穂区十六町1丁目」漢数字を含むケースだと丁目や番地・号の正規化が不可能になる。このようなケースも除外。
|
120
|
+
for (const town of pre_towns) {
|
121
|
+
towns.push(town)
|
122
|
+
|
123
|
+
const originalTown = town.town
|
124
|
+
if (originalTown.indexOf('町') === -1) continue
|
125
|
+
const townAbbr = originalTown.replace(/(?!^町)町/g, '') // NOTE: 冒頭の「町」は明らかに省略するべきではないので、除外
|
126
|
+
if (
|
127
|
+
!isKyoto && // 京都は通り名削除の処理があるため、意図しないマッチになるケースがある。これを除く
|
128
|
+
!townSet.has(townAbbr) &&
|
129
|
+
!townSet.has(`大字${townAbbr}`) && // 大字は省略されるため、大字〇〇と〇〇町がコンフリクトする。このケースを除外
|
130
|
+
!isKanjiNumberFollewedByCho(originalTown)
|
131
|
+
) {
|
132
|
+
// エイリアスとして町なしのパターンを登録
|
133
|
+
towns.push({
|
134
|
+
...town,
|
135
|
+
originalTown,
|
136
|
+
town: townAbbr,
|
137
|
+
})
|
138
|
+
}
|
139
|
+
}
|
100
140
|
|
101
141
|
// 少ない文字数の地名に対してミスマッチしないように文字の長さ順にソート
|
102
142
|
towns.sort((a, b) => {
|
@@ -153,11 +193,7 @@ export const getTownRegexPatterns = async (pref: string, city: string) => {
|
|
153
193
|
),
|
154
194
|
)
|
155
195
|
|
156
|
-
|
157
|
-
return [town, `.*${pattern}`]
|
158
|
-
} else {
|
159
|
-
return [town, `^${pattern}`]
|
160
|
-
}
|
196
|
+
return [town, pattern]
|
161
197
|
}) as [SingleTown, string][]
|
162
198
|
|
163
199
|
cachedTownRegexes.set(`${pref}-${city}`, patterns)
|