jageocoder 2.1.2__tar.gz → 2.1.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {jageocoder-2.1.2 → jageocoder-2.1.4}/PKG-INFO +5 -5
- {jageocoder-2.1.2 → jageocoder-2.1.4}/jageocoder/__init__.py +1 -1
- {jageocoder-2.1.2 → jageocoder-2.1.4}/jageocoder/aza_master.py +31 -127
- {jageocoder-2.1.2 → jageocoder-2.1.4}/jageocoder/itaiji.py +5 -4
- {jageocoder-2.1.2 → jageocoder-2.1.4}/jageocoder/node.py +42 -28
- {jageocoder-2.1.2 → jageocoder-2.1.4}/jageocoder/tree.py +12 -5
- {jageocoder-2.1.2 → jageocoder-2.1.4}/pyproject.toml +18 -15
- jageocoder-2.1.2/setup.py +0 -46
- {jageocoder-2.1.2 → jageocoder-2.1.4}/LICENSE +0 -0
- {jageocoder-2.1.2 → jageocoder-2.1.4}/README.md +0 -0
- {jageocoder-2.1.2 → jageocoder-2.1.4}/jageocoder/__main__.py +0 -0
- {jageocoder-2.1.2 → jageocoder-2.1.4}/jageocoder/address.py +0 -0
- {jageocoder-2.1.2 → jageocoder-2.1.4}/jageocoder/aliases.json +0 -0
- {jageocoder-2.1.2 → jageocoder-2.1.4}/jageocoder/dataset.py +0 -0
- {jageocoder-2.1.2 → jageocoder-2.1.4}/jageocoder/exceptions.py +0 -0
- {jageocoder-2.1.2 → jageocoder-2.1.4}/jageocoder/itaiji_dic.json +0 -0
- {jageocoder-2.1.2 → jageocoder-2.1.4}/jageocoder/module.py +0 -0
- {jageocoder-2.1.2 → jageocoder-2.1.4}/jageocoder/result.py +0 -0
- {jageocoder-2.1.2 → jageocoder-2.1.4}/jageocoder/rtree.py +0 -0
- {jageocoder-2.1.2 → jageocoder-2.1.4}/jageocoder/strlib.py +0 -0
- {jageocoder-2.1.2 → jageocoder-2.1.4}/jageocoder/trie.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: jageocoder
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.4
|
|
4
4
|
Summary: A Japanese-address geocoder for Python.
|
|
5
5
|
Home-page: https://github.com/t-sagara/jageocoder/
|
|
6
6
|
License: The MIT License
|
|
@@ -18,17 +18,17 @@ Classifier: Programming Language :: Python :: 3.8
|
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.9
|
|
19
19
|
Classifier: Programming Language :: Python :: 3.10
|
|
20
20
|
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
-
Classifier: Programming Language :: Python :: 3
|
|
22
|
-
Requires-Dist: Werkzeug (>=2.2.3)
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
23
22
|
Requires-Dist: certifi (>=2023.7.22)
|
|
24
|
-
Requires-Dist: cryptography (>=
|
|
23
|
+
Requires-Dist: cryptography (>=42.0.4)
|
|
25
24
|
Requires-Dist: deprecated (>=1.2.13,<2.0.0)
|
|
26
25
|
Requires-Dist: docopt (>=0.6.2,<0.7.0)
|
|
27
26
|
Requires-Dist: geographiclib (>=2.0,<3.0)
|
|
27
|
+
Requires-Dist: idna (>=3.7)
|
|
28
28
|
Requires-Dist: jaconv (>=0.3.4,<0.4.0)
|
|
29
29
|
Requires-Dist: marisa-trie (>=0.7.8,<0.8.0)
|
|
30
30
|
Requires-Dist: portabletab (>=0.3.3)
|
|
31
|
-
Requires-Dist: pycapnp
|
|
31
|
+
Requires-Dist: pycapnp
|
|
32
32
|
Requires-Dist: rtree (>=1.0.0,<2.0.0)
|
|
33
33
|
Requires-Dist: tqdm (>=4.00.0,<5.0.0)
|
|
34
34
|
Requires-Dist: urllib3 (>=2.0.6)
|
|
@@ -19,7 +19,7 @@ running the following steps.
|
|
|
19
19
|
>>> jageocoder.searchNode('<Japanese-address>')
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
-
__version__ = '2.1.
|
|
22
|
+
__version__ = '2.1.4' # The package version
|
|
23
23
|
__dictionary_version__ = '20230927' # Compatible dictionary version
|
|
24
24
|
__author__ = 'Takeshi Sagara <sagara@info-proto.com>'
|
|
25
25
|
|
|
@@ -26,80 +26,15 @@ class AzaMaster(BaseTable):
|
|
|
26
26
|
Standardized names for retrieval
|
|
27
27
|
aza_class: int
|
|
28
28
|
町字区分コード
|
|
29
|
-
1:大字・町, 2:丁目, 3
|
|
30
|
-
pref: str
|
|
31
|
-
都道府県名
|
|
32
|
-
pref_kana: str
|
|
33
|
-
都道府県名_カナ
|
|
34
|
-
pref_eng: str
|
|
35
|
-
都道府県名_英字
|
|
36
|
-
county: str
|
|
37
|
-
郡名
|
|
38
|
-
county_kana: str
|
|
39
|
-
郡名_カナ
|
|
40
|
-
county_eng: str
|
|
41
|
-
郡名_英字
|
|
42
|
-
city: str
|
|
43
|
-
市区町村名
|
|
44
|
-
city_kana: str
|
|
45
|
-
市区町村名_カナ
|
|
46
|
-
city_eng: str
|
|
47
|
-
市区町村名_英字
|
|
48
|
-
ward: str
|
|
49
|
-
政令市区名
|
|
50
|
-
ward_kana: str
|
|
51
|
-
政令市区名_カナ
|
|
52
|
-
ward_eng: str
|
|
53
|
-
政令市区名_英字
|
|
54
|
-
oaza: str
|
|
55
|
-
大字・町名
|
|
56
|
-
oaza_kana: str
|
|
57
|
-
大字・町名_カナ
|
|
58
|
-
oaza_eng: str
|
|
59
|
-
大字・町名_英字
|
|
60
|
-
chome: str
|
|
61
|
-
丁目名
|
|
62
|
-
chome_kana: str
|
|
63
|
-
丁目名_カナ
|
|
64
|
-
chome_num: str
|
|
65
|
-
丁目名_数字
|
|
66
|
-
koaza: str
|
|
67
|
-
小字名
|
|
68
|
-
koaza_kana: str
|
|
69
|
-
小字名_カナ
|
|
70
|
-
koaza_eng: str
|
|
71
|
-
小字名_英字
|
|
29
|
+
1:大字・町, 2:丁目, 3:小字, 4:なし, 5:道路方式の道路名
|
|
72
30
|
is_jukyo: bool
|
|
73
31
|
住居表示フラグ
|
|
74
|
-
|
|
75
|
-
住居表示方式コード
|
|
76
|
-
1:街区方式, 2:道路方式, 0:住居表示でない
|
|
77
|
-
is_oaza_alias: bool
|
|
78
|
-
大字・町_通称フラグ
|
|
79
|
-
is_koaza_alias: bool
|
|
80
|
-
小字_通称フラグ
|
|
81
|
-
is_oaza_gaiji: bool
|
|
82
|
-
大字・町_外字フラグ
|
|
83
|
-
is_koaza_gaiji: bool
|
|
84
|
-
小字_外字フラグ
|
|
85
|
-
status: int
|
|
86
|
-
状態フラグ
|
|
87
|
-
0:自治体確認待ち, 1:地方自治法の町字に該当, 2:地方自治法の町字に非該当, 3:不明
|
|
32
|
+
1:住居表示実施, 0:住居表示非実施, 2:実施・非実施区域が併存
|
|
88
33
|
start_count_type: int
|
|
89
34
|
起番フラグ
|
|
90
35
|
1:起番, 2:非起番, 0:登記情報に存在しない
|
|
91
|
-
valid_from: date
|
|
92
|
-
効力発生日
|
|
93
|
-
valid_to: date
|
|
94
|
-
廃止日
|
|
95
|
-
reference_code: int
|
|
96
|
-
原典資料コード
|
|
97
|
-
1:自治体資料, 11:位置参照情報・自治体資料, 12:位置参照情報・街区レベル,
|
|
98
|
-
13:位置参照情報・1/2500地形図, 10:位置参照情報・その他資料, 0:その他資料
|
|
99
36
|
postcode: str
|
|
100
37
|
郵便番号(セミコロン区切り)
|
|
101
|
-
note: str
|
|
102
|
-
備考
|
|
103
38
|
"""
|
|
104
39
|
|
|
105
40
|
__tablename__ = "aza_master"
|
|
@@ -123,44 +58,13 @@ class AzaMaster(BaseTable):
|
|
|
123
58
|
def from_csvrow(self, row: dict) -> dict:
|
|
124
59
|
names = self.get_names_from_csvrow(row)
|
|
125
60
|
aza_master_row = {
|
|
126
|
-
"code": row["
|
|
61
|
+
"code": row["lg_code"][0:5] + row["machiaza_id"],
|
|
127
62
|
"names": json.dumps(names, ensure_ascii=False),
|
|
128
63
|
"namesIndex": self.__class__.standardize_aza_name(names),
|
|
129
|
-
"azaClass": row.get("
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
# "county": row.get("郡名", ""),
|
|
134
|
-
# "county_kana": row.get("郡名_カナ", ""),
|
|
135
|
-
# "county_eng": row.get("郡名_英字", ""),
|
|
136
|
-
# "city": row.get("市区町村名", ""),
|
|
137
|
-
# "city_kana": row.get("市区町村名_カナ", ""),
|
|
138
|
-
# "city_eng": row.get("市区町村名_英字", ""),
|
|
139
|
-
# "ward": row.get("政令市区名", ""),
|
|
140
|
-
# "ward_kana": row.get("政令市区名_カナ", ""),
|
|
141
|
-
# "ward_eng": row.get("政令市区名_英字", ""),
|
|
142
|
-
# "oaza": row.get("大字・町名", ""),
|
|
143
|
-
# "oaza_kana": row.get("大字・町名_カナ", ""),
|
|
144
|
-
# "oaza_eng": row.get("大字・町名_英字", ""),
|
|
145
|
-
# "chome": row.get("丁目名", ""),
|
|
146
|
-
# "chome_kana": row.get("丁目名_カナ", ""),
|
|
147
|
-
# "chome_num": row.get("丁目名_数字", ""),
|
|
148
|
-
# "koaza": row.get("小字名", ""),
|
|
149
|
-
# "koaza_kana": row.get("小字名_カナ", ""),
|
|
150
|
-
# "koaza_eng": row.get("小字名_英字", ""),
|
|
151
|
-
"isJukyo": row.get("住居表示フラグ", "") == "1",
|
|
152
|
-
# "jukyo_code": row.get("住居表示方式コード"),
|
|
153
|
-
# "is_oaza_alias": row.get("大字・町_通称フラグ", "") == "1",
|
|
154
|
-
# "is_koaza_alias": row.get("小字_通称フラグ", "") == "1",
|
|
155
|
-
# "is_oaza_gaiji": row.get("大字・町_外字フラグ", "") == "1",
|
|
156
|
-
# "is_koaza_gaiji": row.get("小字_外字フラグ", "") == "1",
|
|
157
|
-
# "status": row.get("状態フラグ"),
|
|
158
|
-
"startCountType": row.get("起番フラグ"),
|
|
159
|
-
# "valid_from": row.get("効力発生日"),
|
|
160
|
-
# "valid_to": row.get("廃止日"),
|
|
161
|
-
# "reference_code": row.get("原典資料コード"),
|
|
162
|
-
"postcode": row.get("郵便番号"),
|
|
163
|
-
# "note": row.get("備考"),
|
|
64
|
+
"azaClass": row.get("machiaza_type"),
|
|
65
|
+
"isJukyo": row.get("rsdt_addr_flg", "") == "1",
|
|
66
|
+
"startCountType": row.get("wake_num_flg"),
|
|
67
|
+
"postcode": row.get("post_code"),
|
|
164
68
|
}
|
|
165
69
|
for key in ("azaClass", "jukyoCode", "status",
|
|
166
70
|
"startCountType", "referenceCode",):
|
|
@@ -187,75 +91,75 @@ class AzaMaster(BaseTable):
|
|
|
187
91
|
return aza_master_row
|
|
188
92
|
|
|
189
93
|
def get_names_from_csvrow(self, row: dict) -> list:
|
|
190
|
-
code = row["
|
|
94
|
+
code = row["lg_code"][0:5] + row["machiaza_id"]
|
|
191
95
|
names = []
|
|
192
|
-
pref = row[
|
|
96
|
+
pref = row["pref"]
|
|
193
97
|
if pref:
|
|
194
98
|
names.append([
|
|
195
99
|
AddressLevel.PREF,
|
|
196
100
|
pref,
|
|
197
|
-
row[
|
|
198
|
-
row[
|
|
101
|
+
row["pref_kana"],
|
|
102
|
+
row["pref_roma"],
|
|
199
103
|
code[0:2]])
|
|
200
104
|
|
|
201
|
-
county = row[
|
|
105
|
+
county = row["county"]
|
|
202
106
|
if county:
|
|
203
107
|
names.append([
|
|
204
108
|
AddressLevel.COUNTY,
|
|
205
109
|
county,
|
|
206
|
-
row[
|
|
207
|
-
row[
|
|
110
|
+
row["county_kana"],
|
|
111
|
+
row["county_roma"],
|
|
208
112
|
code[0:3]])
|
|
209
113
|
|
|
210
|
-
city = row[
|
|
211
|
-
ward = row[
|
|
114
|
+
city = row["city"]
|
|
115
|
+
ward = row["ward"]
|
|
212
116
|
if ward:
|
|
213
117
|
names.append([
|
|
214
118
|
AddressLevel.CITY,
|
|
215
119
|
city,
|
|
216
|
-
row[
|
|
217
|
-
row[
|
|
120
|
+
row["city_kana"],
|
|
121
|
+
row["city_roma"],
|
|
218
122
|
code[0:3]])
|
|
219
123
|
|
|
220
124
|
names.append([
|
|
221
125
|
AddressLevel.WARD,
|
|
222
126
|
ward,
|
|
223
|
-
row[
|
|
224
|
-
row[
|
|
127
|
+
row["ward_kana"],
|
|
128
|
+
row["ward_roma"],
|
|
225
129
|
code[0:5]])
|
|
226
130
|
else:
|
|
227
131
|
names.append([
|
|
228
132
|
AddressLevel.CITY,
|
|
229
133
|
city,
|
|
230
|
-
row[
|
|
231
|
-
row[
|
|
134
|
+
row["city_kana"],
|
|
135
|
+
row["city_roma"],
|
|
232
136
|
code[0:5]])
|
|
233
137
|
|
|
234
|
-
oaza = row[
|
|
138
|
+
oaza = row["oaza_cho"]
|
|
235
139
|
if oaza:
|
|
236
140
|
names.append([
|
|
237
141
|
AddressLevel.OAZA,
|
|
238
142
|
oaza,
|
|
239
|
-
row[
|
|
240
|
-
row[
|
|
143
|
+
row["oaza_cho_kana"],
|
|
144
|
+
row["oaza_cho_roma"],
|
|
241
145
|
code[0:9]])
|
|
242
146
|
|
|
243
|
-
chome = row[
|
|
147
|
+
chome = row["chome"]
|
|
244
148
|
if chome:
|
|
245
149
|
names.append([
|
|
246
150
|
AddressLevel.AZA,
|
|
247
151
|
chome,
|
|
248
|
-
row[
|
|
249
|
-
row[
|
|
152
|
+
row["chome_kana"],
|
|
153
|
+
row["chome_number"] + 'chome',
|
|
250
154
|
code])
|
|
251
155
|
|
|
252
|
-
aza = row[
|
|
156
|
+
aza = row["koaza"]
|
|
253
157
|
if aza:
|
|
254
158
|
names.append([
|
|
255
159
|
AddressLevel.AZA,
|
|
256
160
|
aza,
|
|
257
|
-
row[
|
|
258
|
-
row[
|
|
161
|
+
row["koaza_kana"],
|
|
162
|
+
row["koaza_roma"],
|
|
259
163
|
code])
|
|
260
164
|
|
|
261
165
|
return names
|
|
@@ -122,11 +122,11 @@ class Converter(object):
|
|
|
122
122
|
# Patterns that cannot be omitted as AZA names
|
|
123
123
|
hyphens = re.escape(strlib.hyphen)
|
|
124
124
|
self.re_not_ommisible_aza_patterns = re.compile(
|
|
125
|
-
'(' +
|
|
125
|
+
r'([^。、,.0-9a-zA-Z\t\n\r\f\v]{,15}?)(' +
|
|
126
126
|
rf'{numbers}[条線丁区番号{hyphens}]|' +
|
|
127
127
|
rf'[{self.chiban_heads}]{numbers}|' +
|
|
128
128
|
rf'{numbers}$' +
|
|
129
|
-
')'
|
|
129
|
+
r')'
|
|
130
130
|
)
|
|
131
131
|
|
|
132
132
|
# Patterns that do not follow behind nodes at that level
|
|
@@ -475,11 +475,12 @@ class Converter(object):
|
|
|
475
475
|
int
|
|
476
476
|
Number of characters that can be omitted.
|
|
477
477
|
"""
|
|
478
|
-
m = self.re_not_ommisible_aza_patterns.
|
|
478
|
+
m = self.re_not_ommisible_aza_patterns.match(string[pos:])
|
|
479
479
|
if m is None:
|
|
480
480
|
return 0
|
|
481
481
|
|
|
482
|
-
n =
|
|
482
|
+
n = len(m.group(1))
|
|
483
|
+
# n = string[pos:].find(m.group(0))
|
|
483
484
|
return n
|
|
484
485
|
|
|
485
486
|
candidates = []
|
|
@@ -507,12 +507,12 @@ class AddressNode(object):
|
|
|
507
507
|
return new_node
|
|
508
508
|
|
|
509
509
|
def search_child_with_criteria(
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
510
|
+
self,
|
|
511
|
+
pattern: str,
|
|
512
|
+
min_candidate: Optional[str] = None,
|
|
513
|
+
gt_candidate: Optional[str] = None,
|
|
514
|
+
max_level: Optional[int] = None,
|
|
515
|
+
) -> List[AddressNode]:
|
|
516
516
|
"""
|
|
517
517
|
Search for children nodes that satisfy the specified conditions.
|
|
518
518
|
|
|
@@ -528,8 +528,6 @@ class AddressNode(object):
|
|
|
528
528
|
that satisfies the condition as the name of a child node.
|
|
529
529
|
max_level: int, optional
|
|
530
530
|
Maximum level of child nodes; unlimited if None.
|
|
531
|
-
require_coordinates: bool [False]
|
|
532
|
-
If set to True, the child node must have valid coordinates.
|
|
533
531
|
|
|
534
532
|
Returns
|
|
535
533
|
-------
|
|
@@ -537,8 +535,7 @@ class AddressNode(object):
|
|
|
537
535
|
A list of all child nodes that satisfy the specified condition.
|
|
538
536
|
"""
|
|
539
537
|
logger.debug((
|
|
540
|
-
"Called with self:'{}'({}), pattern:{}, min:'{}', gt:'{}', "
|
|
541
|
-
"max_level: {}, require_coordinates: {}."
|
|
538
|
+
"Called with self:'{}'({}), pattern:{}, min:'{}', gt:'{}', max_level: {}."
|
|
542
539
|
).format(
|
|
543
540
|
self.name,
|
|
544
541
|
self.id,
|
|
@@ -546,7 +543,6 @@ class AddressNode(object):
|
|
|
546
543
|
min_candidate,
|
|
547
544
|
gt_candidate,
|
|
548
545
|
max_level,
|
|
549
|
-
require_coordinates
|
|
550
546
|
))
|
|
551
547
|
re_pattern = re.compile(pattern)
|
|
552
548
|
address_node = self.table.get_record(pos=self.id)
|
|
@@ -598,12 +594,10 @@ class AddressNode(object):
|
|
|
598
594
|
|
|
599
595
|
if re_pattern.match(candidate.name_index) and \
|
|
600
596
|
(max_level is None or candidate.level <= max_level):
|
|
601
|
-
if
|
|
602
|
-
children.append(candidate)
|
|
603
|
-
else:
|
|
597
|
+
if candidate.y > 90.0:
|
|
604
598
|
candidate = candidate.add_dummy_coordinates()
|
|
605
|
-
|
|
606
|
-
|
|
599
|
+
|
|
600
|
+
children.append(candidate)
|
|
607
601
|
|
|
608
602
|
next_pos = candidate.sibling_id
|
|
609
603
|
|
|
@@ -686,7 +680,7 @@ class AddressNode(object):
|
|
|
686
680
|
min_candidate=min_candidate,
|
|
687
681
|
gt_candidate=gt_candidate,
|
|
688
682
|
max_level=max_level,
|
|
689
|
-
|
|
683
|
+
)
|
|
690
684
|
|
|
691
685
|
# Check if the index begins with an extra character of
|
|
692
686
|
# the current node.
|
|
@@ -702,12 +696,17 @@ class AddressNode(object):
|
|
|
702
696
|
if len(candidates) > 0:
|
|
703
697
|
new_candidates = []
|
|
704
698
|
for candidate in candidates:
|
|
699
|
+
if candidate.node.id == self.id:
|
|
700
|
+
new_candidates.append(candidate)
|
|
701
|
+
continue
|
|
702
|
+
|
|
705
703
|
new_candidate = Result(
|
|
706
704
|
candidate.node,
|
|
707
705
|
index[0] + candidate.matched,
|
|
708
706
|
l_optional_prefix + candidate.nchars)
|
|
709
707
|
new_candidates.append(new_candidate)
|
|
710
708
|
|
|
709
|
+
new_candidates.append(Result(self, "", 0))
|
|
711
710
|
return new_candidates
|
|
712
711
|
|
|
713
712
|
return []
|
|
@@ -738,6 +737,7 @@ class AddressNode(object):
|
|
|
738
737
|
|
|
739
738
|
if len(new_candidates) > 0:
|
|
740
739
|
candidates += new_candidates
|
|
740
|
+
candidates.append(Result(self, "", 0))
|
|
741
741
|
|
|
742
742
|
# Processes the region's own rules.
|
|
743
743
|
parent_node = self.get_parent()
|
|
@@ -757,10 +757,11 @@ class AddressNode(object):
|
|
|
757
757
|
"child:{} match {} chars".format(child, offset))
|
|
758
758
|
processed_nodes.add(child.id)
|
|
759
759
|
logger.debug(f"{child.name}({child.id}) marked as processed")
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
760
|
+
new_candidates = child.search_recursive(
|
|
761
|
+
tree=tree,
|
|
762
|
+
index=rest_index,
|
|
763
|
+
processed_nodes=processed_nodes)
|
|
764
|
+
for cand in new_candidates:
|
|
764
765
|
candidates.append(Result(
|
|
765
766
|
cand[0],
|
|
766
767
|
optional_prefix +
|
|
@@ -768,12 +769,15 @@ class AddressNode(object):
|
|
|
768
769
|
l_optional_prefix +
|
|
769
770
|
len(child.name_index) + len(cand[1])))
|
|
770
771
|
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
tree, index, processed_nodes
|
|
774
|
-
)
|
|
772
|
+
if len(new_candidates) > 0:
|
|
773
|
+
candidates.append(Result(self, "", 0))
|
|
775
774
|
|
|
776
|
-
# Search for
|
|
775
|
+
# Search for nodes with possible address changes.
|
|
776
|
+
candidates += self.check_redirect(
|
|
777
|
+
tree, index, processed_nodes
|
|
778
|
+
)
|
|
779
|
+
|
|
780
|
+
# Search for subnodes with queries excludes Aza-name candidates.
|
|
777
781
|
omissible_index = None
|
|
778
782
|
aza_skip = tree.get_config('aza_skip')
|
|
779
783
|
if len(candidates) == 0 or \
|
|
@@ -818,6 +822,7 @@ class AddressNode(object):
|
|
|
818
822
|
processed_nodes=processed_nodes)
|
|
819
823
|
tree.set_config(aza_skip=aza_skip)
|
|
820
824
|
if sub_candidates[0].matched != '':
|
|
825
|
+
added = 0
|
|
821
826
|
for cand in sub_candidates:
|
|
822
827
|
if cand.node.level < AddressLevel.BLOCK and \
|
|
823
828
|
cand.node.name_index not in \
|
|
@@ -831,6 +836,10 @@ class AddressNode(object):
|
|
|
831
836
|
optional_prefix +
|
|
832
837
|
index[0:azalen] + cand.matched,
|
|
833
838
|
l_optional_prefix + cand.nchars))
|
|
839
|
+
added += 1
|
|
840
|
+
|
|
841
|
+
if added > 0:
|
|
842
|
+
candidates.append(Result(self, "", 0))
|
|
834
843
|
|
|
835
844
|
if False and len(candidates) == 0:
|
|
836
845
|
# Search common names
|
|
@@ -879,6 +888,7 @@ class AddressNode(object):
|
|
|
879
888
|
processed_nodes: Set[int]
|
|
880
889
|
) -> List[Result]:
|
|
881
890
|
auto_redirect = tree.get_config('auto_redirect')
|
|
891
|
+
require_coordinates = tree.get_config('require_coordinates')
|
|
882
892
|
if auto_redirect is False:
|
|
883
893
|
return []
|
|
884
894
|
|
|
@@ -895,9 +905,12 @@ class AddressNode(object):
|
|
|
895
905
|
processed_nodes.add(self.id)
|
|
896
906
|
logger.debug(
|
|
897
907
|
f"{self.name}({self.id}) marked as processed")
|
|
898
|
-
tree.set_config(auto_redirect=False)
|
|
908
|
+
tree.set_config(auto_redirect=False, require_coordinates=False)
|
|
899
909
|
redirect_results = tree.search_by_trie(ref)
|
|
900
|
-
tree.set_config(
|
|
910
|
+
tree.set_config(
|
|
911
|
+
auto_redirect=auto_redirect,
|
|
912
|
+
require_coordinates=require_coordinates
|
|
913
|
+
)
|
|
901
914
|
for node_id, val in redirect_results.items():
|
|
902
915
|
if node_id in processed_nodes:
|
|
903
916
|
continue
|
|
@@ -1087,6 +1100,7 @@ class AddressNode(object):
|
|
|
1087
1100
|
|
|
1088
1101
|
if pos == 0:
|
|
1089
1102
|
break
|
|
1103
|
+
|
|
1090
1104
|
else: # strict mode
|
|
1091
1105
|
# Consider omittable only those portions that can be omitted
|
|
1092
1106
|
# with by Aza-master.
|
|
@@ -1223,10 +1223,10 @@ class AddressTree(object):
|
|
|
1223
1223
|
|
|
1224
1224
|
if node.y > 90.0 and self.get_config('require_coordinates'):
|
|
1225
1225
|
node = node.add_dummy_coordinates()
|
|
1226
|
-
if node.y > 90.0:
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1226
|
+
# if node.y > 90.0:
|
|
1227
|
+
# logger.debug("Node {}({}) has no coordinates.".format(
|
|
1228
|
+
# node.name, node.id))
|
|
1229
|
+
# continue
|
|
1230
1230
|
|
|
1231
1231
|
if min_key == '' and node.level <= AddressLevel.WARD:
|
|
1232
1232
|
# To make the process quicker, once a node higher
|
|
@@ -1315,9 +1315,16 @@ class AddressTree(object):
|
|
|
1315
1315
|
|
|
1316
1316
|
if inside != 1:
|
|
1317
1317
|
msg = "Node {}({}) is not in the target area."
|
|
1318
|
-
logger.debug(msg.format(
|
|
1318
|
+
logger.debug(msg.format(
|
|
1319
|
+
cand.node.name, cand.node.id))
|
|
1319
1320
|
continue
|
|
1320
1321
|
|
|
1322
|
+
if self.get_config("require_coordinates") and cand.node.y > 90.0:
|
|
1323
|
+
logger.debug("Node {}({}) has no coordinates.".format(
|
|
1324
|
+
cand.node.name, cand.node.id
|
|
1325
|
+
))
|
|
1326
|
+
continue
|
|
1327
|
+
|
|
1321
1328
|
_len = offset + cand.nchars
|
|
1322
1329
|
_part = offset + len(cand.matched)
|
|
1323
1330
|
msg = "candidate: {} ({})"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "jageocoder"
|
|
3
|
-
version = "2.1.
|
|
3
|
+
version = "2.1.4"
|
|
4
4
|
description = "A Japanese-address geocoder for Python."
|
|
5
5
|
authors = ["Takeshi Sagara <sagara@info-proto.com>"]
|
|
6
6
|
repository = "https://github.com/t-sagara/jageocoder/"
|
|
@@ -21,28 +21,31 @@ include = ["itaiji_dic.json", "islands.json"]
|
|
|
21
21
|
|
|
22
22
|
[tool.poetry.dependencies]
|
|
23
23
|
python = "^3.7"
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
docopt = "^0.6.2"
|
|
24
|
+
certifi = ">=2023.7.22"
|
|
25
|
+
cryptography = ">=42.0.4"
|
|
27
26
|
deprecated = "^1.2.13"
|
|
28
|
-
|
|
29
|
-
|
|
27
|
+
docopt = "^0.6.2"
|
|
28
|
+
geographiclib = "^2.0"
|
|
29
|
+
idna = ">=3.7"
|
|
30
|
+
jaconv = "^0.3.4"
|
|
31
|
+
marisa-trie = "^0.7.8"
|
|
32
|
+
pycapnp = "*"
|
|
30
33
|
portabletab = ">=0.3.3"
|
|
31
|
-
tqdm = "^4.00.0"
|
|
32
34
|
rtree = "^1.0.0"
|
|
33
|
-
|
|
34
|
-
certifi = ">=2023.7.22"
|
|
35
|
-
cryptography = ">=41.0.4"
|
|
35
|
+
tqdm = "^4.00.0"
|
|
36
36
|
urllib3 = ">=2.0.6"
|
|
37
37
|
|
|
38
38
|
[tool.poetry.group.dev.dependencies]
|
|
39
|
-
pytest = "^7.2.1"
|
|
40
|
-
flask = "^2.2.5"
|
|
41
|
-
flask-cors = "^3.0.10"
|
|
42
|
-
sphinx = ">=5.0.0,<6.0.0"
|
|
43
|
-
sphinx-rtd-theme = "^1.2.0"
|
|
44
39
|
twine = "^4.0.2"
|
|
45
40
|
|
|
41
|
+
|
|
42
|
+
[tool.poetry.group.doc.dependencies]
|
|
43
|
+
sphinx-rtd-theme = "^1.2.0"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
[tool.poetry.group.test.dependencies]
|
|
47
|
+
pytest = "^7.2.1"
|
|
48
|
+
|
|
46
49
|
[build-system]
|
|
47
50
|
requires = ["poetry-core"]
|
|
48
51
|
build-backend = "poetry.core.masonry.api"
|
jageocoder-2.1.2/setup.py
DELETED
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
from setuptools import setup
|
|
3
|
-
|
|
4
|
-
packages = \
|
|
5
|
-
['jageocoder']
|
|
6
|
-
|
|
7
|
-
package_data = \
|
|
8
|
-
{'': ['*']}
|
|
9
|
-
|
|
10
|
-
install_requires = \
|
|
11
|
-
['Werkzeug>=2.2.3',
|
|
12
|
-
'certifi>=2023.7.22',
|
|
13
|
-
'cryptography>=41.0.4',
|
|
14
|
-
'deprecated>=1.2.13,<2.0.0',
|
|
15
|
-
'docopt>=0.6.2,<0.7.0',
|
|
16
|
-
'geographiclib>=2.0,<3.0',
|
|
17
|
-
'jaconv>=0.3.4,<0.4.0',
|
|
18
|
-
'marisa-trie>=0.7.8,<0.8.0',
|
|
19
|
-
'portabletab>=0.3.3',
|
|
20
|
-
'pycapnp>=1.3.0,<2.0.0',
|
|
21
|
-
'rtree>=1.0.0,<2.0.0',
|
|
22
|
-
'tqdm>=4.00.0,<5.0.0',
|
|
23
|
-
'urllib3>=2.0.6']
|
|
24
|
-
|
|
25
|
-
entry_points = \
|
|
26
|
-
{'console_scripts': ['jageocoder = jageocoder.__main__:main']}
|
|
27
|
-
|
|
28
|
-
setup_kwargs = {
|
|
29
|
-
'name': 'jageocoder',
|
|
30
|
-
'version': '2.1.2',
|
|
31
|
-
'description': 'A Japanese-address geocoder for Python.',
|
|
32
|
-
'long_description': '# jageocoder - A Python Japanese geocoder\n\n日本語版は README_ja.md をお読みください。\n\nThis is a Python port of the Japanese-address geocoder used in CSIS at the University of Tokyo\'s ["Address Matching Service"](https://newspat.csis.u-tokyo.ac.jp/geocode/modules/addmatch/index.php?content_id=1) and [GSI Maps](https://maps.gsi.go.jp/).\n\n# Getting Started\n\nThis package provides address-geocoding functionality for Python programs. The basic usage is to specify a dictionary with `init()` then call `search()` to get geocoding results.\n\n```python\npython\n>>> import jageocoder\n>>> jageocoder.init()\n>>> jageocoder.search(\'新宿区西新宿2-8-1\')\n{\'matched\': \'新宿区西新宿2-8-\', \'candidates\': [{\'id\': 5961406, \'name\': \'8番\', \'x\': 139.691778, \'y\': 35.689627, \'level\': 7, \'note\': None, \'fullname\': [\'東京都\', \'新宿区\', \'西新宿\', \'二丁目\', \'8番\']}]}\n```\n\n# How to install\n\n## Prerequisites\n\nRequires Python 3.7.x or later.\n\nAll other required packages will be installed automatically.\n\n## Install instructions\n\n- Install the package with `pip install jageocoder`\n- Download an address database file compatible with that version from \n [here](https://www.info-proto.com/static/jageocoder/latest/v2/)\n- Install the dictionary with `install-dictionary` command\n\n```sh\npip install jageocoder\nwget https://www.info-proto.com/static/jageocoder/latest/v2/jukyo_all_v20.zip\njageocoder install-dictionary jukyo_all_v20.zip\n```\n\nThe dictionary database will be installed under\n`{sys.prefix}/jageocoder/db2/` by default,\nhowever if the user doesn\'t have write permission there,\n`{site.USER_DATA}/jageocoder/db2/` instead.\n\nIf you need to know the location of the dictionary directory,\nperform `get-db-dir` command as follows. (Or call\n`jageocoder.get_db_dir()` in your script)\n\n```sh\njageocoder get-db-dir\n```\n\nIf you prefer to create it in another location, set the environment\nvariable `JAGEOCODER_DB2_DIR` before executing `install_dictionary()`\nto specify the directory.\n\n```sh\nexport JAGEOCODER_DB2_DIR=\'/usr/local/share/jageocoder/db2\'\ninstall-dictionary <db-file>\n```\n\n## Uninstall instructions\n\nRemove the directory containing the database, or perform \n`uninstall-dictionary` command as follows.\n\n```sh\njageocoder uninstall-dictionary\n```\n\nThen, uninstall the package with `pip` command.\n\n```sh\npip uninstall jageocoder\n```\n\n# How to use\n\n## Use from the command line\n\nWe assume that jageocoder will be embedded in applications\nas a library and used by calling the API, but for testing purposes,\nyou can check the geocoding results with the following command.\n\n```sh\njageocoder search 新宿区西新宿2-8-1\n```\n\nYou can check the list of available commands with `--help`.\n\n```sh\njageocoder --help\n```\n\n## Using API\n\nFirst, import jageocoder and initialize it with `init()`.\n\n```\n>>> import jageocoder\n>>> jageocoder.init()\n```\n\n### Search for latitude and longitude by address\n\nUse `search()` to search for the address you want to check the longitude and latitude of.\n\nThe `search()` function returns a dict with `matched` as\nthe matched string and `candidates` as the list of search results.\n(The results are formatted for better viewing)\n\nEach element of `candidates` contains the information of an address node (AddressNode).\n\n```\n>>> jageocoder.search(\'新宿区西新宿2-8-1\')\n{\n \'matched\': \'新宿区西新宿2-8-\',\n \'candidates\': [{\n \'id\': 12299846, \'name\': \'8番\',\n \'x\': 139.691778, \'y\': 35.689627, \'level\': 7, \'note\': None,\n \'fullname\': [\'東京都\', \'新宿区\', \'西新宿\', \'二丁目\', \'8番\']\n }]\n}\n```\n\nThe meaning of the items is as follows\n\n- id: ID in the database\n- name: Address notation\n- x: longitude\n- y: latitude\n- level: Address level (1:Prefecture, 2:County, 3:City and 23 district,\n 4:Ward, 5:Oaza, 6:Aza and Chome, 7:Block, 8:Building)\n- note: Notes such as city codes\n- fullname: List of address notations from the prefecture level to this node\n\n### Search for addresses by longitude and latitude\n\nNote: This method is not available in v2 series.\n\n### Explore the attribute information of an address\n\nUse `searchNode()` to retrieve information about an address.\n\nThis function returns a list of type `jageocoder.result.Result` .\nYou can access the address node from node element of the Result object.\n\n```\n>>> results = jageocoder.searchNode(\'新宿区西新宿2-8-1\')\n>>> len(results)\n1\n>>> results[0].matched\n\'新宿区西新宿2-8-\'\n>>> type(results[0].node)\n<class \'jageocoder.node.AddressNode\'>\n>>> node = results[0].node\n>>> node.get_fullname()\n[\'東京都\', \'新宿区\', \'西新宿\', \'二丁目\', \'8番\']\n```\n\n#### Get GeoJSON representation\n\nYou can use the `as_geojson()` method of the Result and AddressNode\nobjects to obtain the GeoJSON representation.\n\n```\n>>> results[0].as_geojson()\n{\'type\': \'Feature\', \'geometry\': {\'type\': \'Point\', \'coordinates\': [139.691778, 35.689627]}, \'properties\': {\'id\': 12299851, \'name\': \'8番\', \'level\': 7, \'note\': None, \'fullname\': [\'東京都\', \'新宿区\', \'西新宿\', \'二丁目\', \'8番\'], \'matched\': \'新宿区西新宿2-8-\'}}\n>>> results[0].node.as_geojson()\n{\'type\': \'Feature\', \'geometry\': {\'type\': \'Point\', \'coordinates\': [139.691778, 35.689627]}, \'properties\': {\'id\': 12299851, \'name\': \'8番\', \'level\': 7, \'note\': None, \'fullname\': [\'東京都\', \'新宿区\', \'西新宿\', \'二丁目\', \'8番\']}}\n```\n\n#### Get the local government codes\n\nThere are two types of local government codes: JISX0402 (5-digit) and\nLocal Government Code (6-digit).\n\nYou can also obtain the prefecture code JISX0401 (2 digits).\n\n```\n>>> node.get_city_jiscode() # 5-digit code\n\'13104\'\n>>> node.get_city_local_authority_code() # 6-digit code\n\'131041\'\n>>> node.get_pref_jiscode() # prefecture code\n\'13\'\n```\n\n#### Get link URLs to maps\n\nGenerate URLs to link to GSI and Google maps.\n\n```\n>>> node.get_gsimap_link()\n\'https://maps.gsi.go.jp/#16/35.689627/139.691778/\'\n>>> node.get_googlemap_link()\n\'https://maps.google.com/maps?q=35.689627,139.691778&z=16\'\n```\n\n#### Traverse the parent node\n\nA "parent node" is a node that represents a level above the address.\nGet the node by attribute `parent`.\n\nNow the `node` points to \'8番\', so the parent node will be \'二丁目\'.\n\n```\n>>> parent = node.parent\n>>> parent.get_fullname()\n[\'東京都\', \'新宿区\', \'西新宿\', \'二丁目\']\n>>> parent.x, parent.y\n(139.691774, 35.68945)\n```\n\n#### Traverse the child nodes\n\nA "child node" is a node that represents a level below the address.\nGet the node by attribute `children`.\n\nThere is one parent node, but there are multiple child nodes.\nThe actual return is a SQL query object, but it can be looped through\nwith an iterator or cast to a list.\n\nNow the `parent` points to \'二丁目\', so the child node will be\nthe block number (○番) contained therein.\n\n```\n>>> parent.children\n<sqlalchemy.orm.dynamic.AppenderQuery object at 0x7fbc08404b38>\n>>> [child.name for child in parent.children]\n[\'10番\', \'11番\', \'1番\', \'2番\', \'3番\', \'4番\', \'5番\', \'6番\', \'7番\', \'8番\', \'9番\']\n```\n\n## Create your own dictionary\n\nConsider using [jageocoder-converter](https://github.com/t-sagara/jageocoder-converter).\n\n## ToDos\n\n- Supporting address changes\n\n The functionality to handle address changes due to municipal consolidation, etc.\n has already been implemented in the C++ version, but will be implemented\n in this package in the future.\n\n## Contributing\n\nAddress notation varies. So suggestions for logic improvements are welcome.\nPlease submit an issue with examples of address notations in use and how they should be parsed.\n\n## Authors\n\n* **Takeshi SAGARA** - [Info-proto Co.,Ltd.](https://www.info-proto.com/)\n\n## License\n\nThis project is licensed under [the MIT License](https://opensource.org/licenses/mit-license.php).\n\nThis is not the scope of the dictionary data license. Please follow the license of the respective dictionary data.\n\n## Acknowledgements\n\nWe would like to thank CSIS for allowing us to provide address matching services\non their institutional website for over 20 years.\n\nWe would also like to thank Professor Asanobu Kitamoto of NII for providing us\nwith a large sample of areas using the older address system and for his many help\nin confirming the results of our analysis.\n',
|
|
33
|
-
'author': 'Takeshi Sagara',
|
|
34
|
-
'author_email': 'sagara@info-proto.com',
|
|
35
|
-
'maintainer': 'None',
|
|
36
|
-
'maintainer_email': 'None',
|
|
37
|
-
'url': 'https://github.com/t-sagara/jageocoder/',
|
|
38
|
-
'packages': packages,
|
|
39
|
-
'package_data': package_data,
|
|
40
|
-
'install_requires': install_requires,
|
|
41
|
-
'entry_points': entry_points,
|
|
42
|
-
'python_requires': '>=3.7,<4.0',
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
setup(**setup_kwargs)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|