jageocoder 2.1.7.dev2__tar.gz → 2.1.7rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. {jageocoder-2.1.7.dev2 → jageocoder-2.1.7rc1}/PKG-INFO +1 -1
  2. {jageocoder-2.1.7.dev2 → jageocoder-2.1.7rc1}/jageocoder/__init__.py +1 -1
  3. {jageocoder-2.1.7.dev2 → jageocoder-2.1.7rc1}/jageocoder/node.py +25 -1
  4. {jageocoder-2.1.7.dev2 → jageocoder-2.1.7rc1}/jageocoder/remote.py +24 -0
  5. {jageocoder-2.1.7.dev2 → jageocoder-2.1.7rc1}/jageocoder/rtree.py +138 -110
  6. {jageocoder-2.1.7.dev2 → jageocoder-2.1.7rc1}/jageocoder/tree.py +51 -13
  7. {jageocoder-2.1.7.dev2 → jageocoder-2.1.7rc1}/pyproject.toml +1 -1
  8. {jageocoder-2.1.7.dev2 → jageocoder-2.1.7rc1}/LICENSE +0 -0
  9. {jageocoder-2.1.7.dev2 → jageocoder-2.1.7rc1}/README.md +0 -0
  10. {jageocoder-2.1.7.dev2 → jageocoder-2.1.7rc1}/jageocoder/__main__.py +0 -0
  11. {jageocoder-2.1.7.dev2 → jageocoder-2.1.7rc1}/jageocoder/address.py +0 -0
  12. {jageocoder-2.1.7.dev2 → jageocoder-2.1.7rc1}/jageocoder/aza_master.py +0 -0
  13. {jageocoder-2.1.7.dev2 → jageocoder-2.1.7rc1}/jageocoder/dataset.py +0 -0
  14. {jageocoder-2.1.7.dev2 → jageocoder-2.1.7rc1}/jageocoder/exceptions.py +0 -0
  15. {jageocoder-2.1.7.dev2 → jageocoder-2.1.7rc1}/jageocoder/itaiji.py +0 -0
  16. {jageocoder-2.1.7.dev2 → jageocoder-2.1.7rc1}/jageocoder/itaiji_dic.json +0 -0
  17. {jageocoder-2.1.7.dev2 → jageocoder-2.1.7rc1}/jageocoder/module.py +0 -0
  18. {jageocoder-2.1.7.dev2 → jageocoder-2.1.7rc1}/jageocoder/result.py +0 -0
  19. {jageocoder-2.1.7.dev2 → jageocoder-2.1.7rc1}/jageocoder/strlib.py +0 -0
  20. {jageocoder-2.1.7.dev2 → jageocoder-2.1.7rc1}/jageocoder/trie.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: jageocoder
3
- Version: 2.1.7.dev2
3
+ Version: 2.1.7rc1
4
4
  Summary: A Japanese-address geocoder for Python.
5
5
  Home-page: https://github.com/t-sagara/jageocoder/
6
6
  License: The MIT License
@@ -19,7 +19,7 @@ running the following steps.
19
19
  >>> jageocoder.searchNode('<Japanese-address>')
20
20
  """
21
21
 
22
- __version__ = '2.1.7.dev2' # The package version
22
+ __version__ = '2.1.7.rc1' # The package version
23
23
  __dictionary_version__ = '20230927' # Compatible dictionary version
24
24
  __author__ = 'Takeshi Sagara <sagara@info-proto.com>'
25
25
 
@@ -70,6 +70,30 @@ class AddressNodeTable(PortableTab.BaseTable):
70
70
  node.table = self
71
71
  return node
72
72
 
73
+ def search_ids_on(
74
+ self,
75
+ attr: str,
76
+ value: str,
77
+ ) -> list:
78
+ """
79
+ Search id list from the table on the specified attribute.
80
+
81
+ Paramters
82
+ ---------
83
+ attr: str
84
+ The name of target attribute.
85
+ value: str
86
+ The target value.
87
+
88
+ Returns
89
+ -------
90
+ List[int]
91
+ List of node ids.
92
+ """
93
+ trie = self.open_trie_on(attr)
94
+ positions = trie.get(value, [])
95
+ return [p[0] for p in positions]
96
+
73
97
  def create_indexes(self) -> None:
74
98
  """
75
99
  Create TRIE index on "name" and "note" columns.
@@ -514,7 +538,7 @@ class AddressNode(object):
514
538
  """
515
539
  new_node = copy.copy(self)
516
540
  for child in self.iter_children():
517
- if child.y <= 90.0:
541
+ if child.has_valid_coordinate_values():
518
542
  new_node.x, new_node.y = child.x, child.y
519
543
  logger.debug((
520
544
  "Node {}({}) has no coordinates. "
@@ -184,6 +184,30 @@ class RemoteNodeTable(object):
184
184
 
185
185
  return nodes
186
186
 
187
+ def search_ids_on(
188
+ self,
189
+ attr: str,
190
+ value: str,
191
+ ) -> list:
192
+ """
193
+ Search id from the table on the specified attribute on the remote server.
194
+
195
+ Paramters
196
+ ---------
197
+ attr: str
198
+ The name of target attribute.
199
+ value: str
200
+ The target value.
201
+
202
+ Returns
203
+ -------
204
+ List[Record]
205
+ List of records.
206
+ """
207
+ nodes = self.search_records_on(attr, value)
208
+ ids = [node.id for node in nodes]
209
+ return ids
210
+
187
211
 
188
212
  class RemoteTree(AddressTree):
189
213
  """
@@ -15,6 +15,16 @@ from jageocoder.node import AddressNode, AddressNodeTable
15
15
  logger = getLogger(__name__)
16
16
 
17
17
 
18
+ class NodeDist(object):
19
+
20
+ def __init__(self, dist: float, node: AddressNode) -> None:
21
+ self.dist = dist
22
+ self.node = node
23
+
24
+ def __repr__(self) -> str:
25
+ return f"NodeDist({self.dist}, {self.node})"
26
+
27
+
18
28
  class DelaunayTriangle(ABC):
19
29
 
20
30
  @classmethod
@@ -131,8 +141,8 @@ class DelaunayTriangle(ABC):
131
141
  cls,
132
142
  x: float,
133
143
  y: float,
134
- nodes: List[AddressNode]
135
- ) -> List[AddressNode]:
144
+ nodes: List[NodeDist]
145
+ ) -> List[NodeDist]:
136
146
  """
137
147
  Select the 3 nodes that make the smallest triangle
138
148
  surrounding the target point.
@@ -143,13 +153,14 @@ class DelaunayTriangle(ABC):
143
153
  The longitude of the target point.
144
154
  y: float
145
155
  The latitude of the target point.
146
- nodes: List[AddressNode]
147
- The candidate nodes.
156
+ nodes: List[NodeDist]
157
+ The candidate list of (distance, node).
148
158
 
149
159
  Returns
150
160
  -------
151
- List[AddressNode]
152
- Up to 3 nodes surrounding the target point.
161
+ List[NodeDist]
162
+ Up to 3 nodes surrounding the target point
163
+ and their distance.
153
164
  """
154
165
  def kval(t: Tuple[int, int, int]) -> int:
155
166
  sval = sorted(t)
@@ -161,9 +172,9 @@ class DelaunayTriangle(ABC):
161
172
  for p2 in range(p1 + 1, len(nodes)):
162
173
  if cls.p_contained_triangle(
163
174
  (x, y),
164
- (nodes[p0].x, nodes[p0].y),
165
- (nodes[p1].x, nodes[p1].y),
166
- (nodes[p2].x, nodes[p2].y)
175
+ (nodes[p0].node.x, nodes[p0].node.y),
176
+ (nodes[p1].node.x, nodes[p1].node.y),
177
+ (nodes[p2].node.x, nodes[p2].node.y)
167
178
  ):
168
179
  triangle = [p0, p1, p2]
169
180
  break
@@ -187,10 +198,10 @@ class DelaunayTriangle(ABC):
187
198
  continue
188
199
 
189
200
  if cls.p_contained_circumcircle(
190
- (nodes[i].x, nodes[i].y),
191
- (nodes[triangle[0]].x, nodes[triangle[0]].y),
192
- (nodes[triangle[1]].x, nodes[triangle[1]].y),
193
- (nodes[triangle[2]].x, nodes[triangle[2]].y)
201
+ (nodes[i].node.x, nodes[i].node.y),
202
+ (nodes[triangle[0]].node.x, nodes[triangle[0]].node.y),
203
+ (nodes[triangle[1]].node.x, nodes[triangle[1]].node.y),
204
+ (nodes[triangle[2]].node.x, nodes[triangle[2]].node.y)
194
205
  ):
195
206
  new_triangle = None
196
207
  for j in range(3):
@@ -202,9 +213,9 @@ class DelaunayTriangle(ABC):
202
213
 
203
214
  if cls.p_contained_triangle(
204
215
  (x, y),
205
- (nodes[tt[0]].x, nodes[tt[0]].y),
206
- (nodes[tt[1]].x, nodes[tt[1]].y),
207
- (nodes[tt[2]].x, nodes[tt[2]].y)
216
+ (nodes[tt[0]].node.x, nodes[tt[0]].node.y),
217
+ (nodes[tt[1]].node.x, nodes[tt[1]].node.y),
218
+ (nodes[tt[2]].node.x, nodes[tt[2]].node.y)
208
219
  ):
209
220
  new_triangle = tt
210
221
  break
@@ -309,6 +320,9 @@ class Index(object):
309
320
  node_table: AddressNodeTable = self._tree.address_nodes
310
321
 
311
322
  max_id = node_table.count_records()
323
+ registered_coordinates = set()
324
+
325
+ logger.info("Building RTree for reverse geocoding...")
312
326
  id = AddressNode.ROOT_NODE_ID
313
327
  with tqdm(total=max_id, mininterval=0.5, ascii=True) as pbar:
314
328
  prev_id = 0
@@ -317,22 +331,70 @@ class Index(object):
317
331
  prev_id = id
318
332
 
319
333
  node = node_table.get_record(pos=id)
320
- if node.level > AddressLevel.AZA:
321
- id = node.sibling_id
322
- continue
323
- elif node.level < AddressLevel.OAZA:
334
+ if node.level <= AddressLevel.WARD:
335
+ registered_coordinates.clear()
324
336
  id += 1
325
337
  continue
326
- elif not node.has_valid_coordinate_values():
327
- node = node.add_dummy_coordinates()
338
+
339
+ if node.sibling_id == node.id + 1:
340
+ # The node has no child nodes
341
+
328
342
  if not node.has_valid_coordinate_values():
329
343
  id += 1
330
344
  continue
331
345
 
332
- file_idx.insert(
333
- id=id,
334
- coordinates=(node.x, node.y, node.x, node.y)
335
- )
346
+ key = (node.x, node.y)
347
+ if key in registered_coordinates:
348
+ id += 1
349
+ continue
350
+
351
+ file_idx.insert(
352
+ id=id,
353
+ coordinates=(node.x, node.y, node.x, node.y),
354
+ )
355
+ registered_coordinates.add(key)
356
+ id += 1
357
+ continue
358
+
359
+ # The node has 1 or more child nodes
360
+ if node.level == AddressLevel.BLOCK:
361
+ # Get BDR of child nodes
362
+ bdr = None
363
+ for child_id in range(node.id + 1, node.sibling_id):
364
+ child_node = node_table.get_record(child_id)
365
+ if not child_node.has_valid_coordinate_values():
366
+ continue
367
+
368
+ if bdr is None:
369
+ bdr = (child_node.x, child_node.y,
370
+ child_node.x, child_node.y)
371
+ else:
372
+ bdr = (
373
+ min(child_node.x, bdr[0]),
374
+ min(child_node.y, bdr[1]),
375
+ max(child_node.x, bdr[2]),
376
+ max(child_node.y, bdr[3]),
377
+ )
378
+
379
+ if bdr:
380
+ file_idx.insert(
381
+ id=id,
382
+ coordinates=bdr,
383
+ )
384
+ else:
385
+ # All child nodes have invalid coordinate values
386
+ key = (node.x, node.y)
387
+ if node.has_valid_coordinate_values() and \
388
+ key not in registered_coordinates:
389
+ file_idx.insert(
390
+ id=id,
391
+ coordinates=(node.x, node.y, node.x, node.y),
392
+ )
393
+ registered_coordinates.add(key)
394
+
395
+ id = node.sibling_id
396
+ continue
397
+
336
398
  id += 1
337
399
 
338
400
  return file_idx
@@ -366,20 +428,28 @@ class Index(object):
366
428
  """
367
429
  node_table = self._tree.address_nodes
368
430
  node = node_table.get_record(pos=node_table.count_records() // 2)
369
- while node.level < AddressLevel.OAZA:
370
- node = node_table.get_record(pos=node.id + 1)
371
431
 
372
- while node.level > AddressLevel.AZA:
373
- node = node.parent
432
+ while True:
433
+ while node.level < AddressLevel.BLOCK:
434
+ node = node_table.get_record(pos=node.id + 1)
374
435
 
375
- return node.id in self.idx.nearest((node.x, node.y, node.x, node.y), 2)
436
+ while node.level > AddressLevel.BLOCK:
437
+ node = node.parent
438
+
439
+ if node.has_valid_coordinate_values():
440
+ break
441
+
442
+ node = node_table.get_record(pos=node.sibling_id)
443
+
444
+ results = tuple(self.idx.nearest((node.x, node.y, node.x, node.y), 20))
445
+ return len(results) > 0 and node.id in results
376
446
 
377
447
  def _sort_by_dist(
378
448
  self,
379
449
  lon: float,
380
450
  lat: float,
381
- id_list: Iterable[int]
382
- ) -> List[AddressNode]:
451
+ nodes: Iterable[AddressNode],
452
+ ) -> List[NodeDist]:
383
453
  """
384
454
  Sort nodes by real(projected) distance from the target point.
385
455
 
@@ -389,22 +459,24 @@ class Index(object):
389
459
  The longitude of the target point.
390
460
  lat: float
391
461
  The latitude of the target point.
392
- id_list: Iterable[int]
393
- The list of node-id.
462
+ nodes: Iterable[AddressNode]
463
+ The list of candidate node.
394
464
 
395
465
  Returns
396
466
  -------
397
- List[AddressNode]
398
- The sorted list of address nodes.
467
+ List[NodeDist]
468
+ The sorted list of (distance, address node).
399
469
  """
400
470
  results = []
401
- for node_id in id_list:
402
- node = self._tree.get_address_node(id=node_id)
471
+ for node in nodes:
472
+ if not node.has_valid_coordinate_values():
473
+ continue
474
+
403
475
  dist = self.distance(node.x, node.y, lon, lat)
404
- results.append((node, dist))
476
+ results.append(NodeDist(dist, node))
405
477
 
406
- results.sort(key=lambda x: x[1])
407
- return [x[0] for x in results]
478
+ results.sort(key=lambda x: x.dist)
479
+ return results
408
480
 
409
481
  def nearest(
410
482
  self,
@@ -436,84 +508,43 @@ class Index(object):
436
508
  """
437
509
  level = level or AddressLevel.AZA
438
510
 
439
- # Search nodes by Rtree Index
440
- nodes = []
441
- ancestors = set()
442
- max_level = 0
443
- for node in self._sort_by_dist(x, y, self.idx.nearest((x, y, x, y), 10)):
444
- if node.id in ancestors:
445
- continue
446
-
447
- if not node.has_valid_coordinate_values():
448
- node = node.add_dummy_coordinates()
449
-
450
- nodes.append(node)
451
- max_level = max(max_level, node.level)
452
- # Ancestor nodes of registering node are excluded.
453
- cur = node.parent
454
- while cur is not None:
455
- nodes = [node for node in nodes if node.id != cur.id]
456
- ancestors.add(cur.id)
457
- cur = cur.parent
458
-
459
- if level > max_level:
460
- # Search points in the higher levels
461
- local_idx = index.Rtree() # Create local rtree on memory
462
- for node in nodes:
463
- child_id = node.id
464
- while child_id < node.sibling_id:
465
- child_node = self._tree.get_address_node(id=child_id)
466
- if child_node.level > level:
467
- child_id = child_node.parent.sibling_id
468
- continue
469
- elif not child_node.has_valid_coordinate_values():
470
- child_node = child_node.add_dummy_coordinates()
471
- if not child_node.has_valid_coordinate_values():
472
- child_id += 1
473
- continue
474
-
475
- local_idx.insert(
476
- id=child_id,
477
- coordinates=(
478
- child_node.x, child_node.y,
479
- child_node.x, child_node.y))
480
- child_id += 1
481
-
482
- nodes = []
483
- ancestors = set()
484
- for node in self._sort_by_dist(x, y, local_idx.nearest((x, y, x, y), 20)):
485
- if node.id in ancestors:
486
- continue
487
-
488
- if not node.has_valid_coordinate_values():
489
- node = node.add_dummy_coordinates()
490
-
491
- nodes.append(node)
492
- # Ancestor nodes of registering node are excluded.
493
- cur = node.parent
494
- while cur is not None:
495
- nodes = [node for node in nodes if node.id != cur.id]
496
- ancestors.add(cur.id)
497
- cur = cur.parent
511
+ # Retrieve top k-nearest nodes using the R-tree index.
512
+ # If the node registered in the index is an intermediate node,
513
+ # expand its leaf nodes.
514
+ candidates = []
515
+ nearests = self.idx.nearest((x, y, x, y), 20, objects=True)
516
+ for item in nearests:
517
+ node = self._tree.get_node_by_id(item.id)
518
+ if item.bbox[0] == item.bbox[2] and item.bbox[1] == item.bbox[3]:
519
+ candidates.append(node)
520
+ else:
521
+ for child_id in range(node.id + 1, node.sibling_id):
522
+ child_node = self._tree.get_node_by_id(child_id)
523
+ if child_node.sibling_id == child_id + 1 and \
524
+ child_node.has_valid_coordinate_values():
525
+ candidates.append(child_node)
526
+
527
+ node_dists = self._sort_by_dist(x, y, candidates)
498
528
 
499
529
  # Select the 3 nodes that make the smallest triangle
500
530
  # surrounding the target point
501
- if len(nodes) == 0:
531
+ if len(node_dists) == 0:
502
532
  return []
503
533
 
504
- if self.distance(x, y, nodes[0].x, nodes[0].y) < 1.0e-02:
534
+ if len(node_dists) <= 3 or node_dists[0].dist < 1.0e-02:
505
535
  # If the distance between the nearest point and the search point is
506
536
  # less than 1 cm, it returns three points in order of distance.
507
537
  # This is because the nearest point may not be included in
508
538
  # the search results due to a calculation error.
509
- nodes = nodes[0:3]
539
+ node_dists = node_dists[0:3]
510
540
  else:
511
- nodes = DelaunayTriangle.select(x, y, nodes)
541
+ node_dists = DelaunayTriangle.select(x, y, node_dists)
512
542
 
513
543
  # Convert nodes to the dict format.
514
544
  results = []
515
545
  registered = set()
516
- for node in nodes:
546
+ for v in node_dists:
547
+ dist, node = v.dist, v.node
517
548
  while node.level > level:
518
549
  node = node.parent
519
550
 
@@ -522,11 +553,8 @@ class Index(object):
522
553
 
523
554
  results.append({
524
555
  "candidate": node.as_dict() if as_dict else node,
525
- "dist": self.distance(x, y, node.x, node.y)
556
+ "dist": dist
526
557
  })
527
558
  registered.add(node.id)
528
559
 
529
- # Sort by distance
530
- results = sorted(results, key=lambda r: r['dist'])
531
-
532
560
  return results
@@ -1,17 +1,14 @@
1
1
  from collections import OrderedDict
2
- import csv
3
- import json
4
2
  from logging import getLogger
5
3
  import os
6
4
  from pathlib import Path
7
5
  import re
8
6
  import site
9
7
  import sys
10
- from typing import Any, Union, List, Set, NoReturn, Optional, TextIO
8
+ from typing import Any, Union, List, Set, Optional
11
9
 
12
10
  from deprecated import deprecated
13
11
 
14
- import jageocoder
15
12
  from jageocoder.address import AddressLevel
16
13
  from jageocoder.aza_master import AzaMaster
17
14
  from jageocoder.exceptions import AddressTreeException
@@ -285,11 +282,13 @@ class AddressTree(object):
285
282
  node_id: int
286
283
  The target node id.
287
284
 
288
- Return
289
- ------
285
+ Returns
286
+ -------
290
287
  AddressNode
291
288
  """
292
- return self.address_nodes.get_record(node_id)
289
+ node = self.address_nodes.get_record(node_id)
290
+ node.tree = self
291
+ return node
293
292
 
294
293
  def search_nodes_by_codes(
295
294
  self,
@@ -316,6 +315,31 @@ class AddressTree(object):
316
315
 
317
316
  return nodes
318
317
 
318
+ def search_ids_by_codes(
319
+ self,
320
+ category: str,
321
+ value: str) -> List[AddressNode]:
322
+ """
323
+ Search node ids by category and value.
324
+
325
+ Parameters
326
+ ----------
327
+ category: str
328
+ Category name such as 'jisx0402' or 'postcode'.
329
+ value: str
330
+ Target value.
331
+
332
+ Returns
333
+ -------
334
+ List[int]
335
+ """
336
+ ids = []
337
+ pattern = '{}:{}'.format(category, value)
338
+ ids = self.address_nodes.search_ids_on(
339
+ attr="note", value=pattern) # exact match
340
+
341
+ return ids
342
+
319
343
  @deprecated("Use 'node.get_fullname()' instead of this method.")
320
344
  def get_node_fullname(self, node: Union[AddressNode, int]) -> List[str]:
321
345
  if isinstance(node, int):
@@ -825,7 +849,7 @@ class AddressTree(object):
825
849
  key = index[0:offset]
826
850
  rest_index = index[offset:]
827
851
  for node_id in trie_node.nodes:
828
- node = self.get_address_node(id=node_id)
852
+ node = self.get_node_by_id(node_id=node_id)
829
853
 
830
854
  if not node.has_valid_coordinate_values() \
831
855
  and self.get_config('require_coordinates'):
@@ -965,6 +989,7 @@ class AddressTree(object):
965
989
 
966
990
  return results
967
991
 
992
+ @deprecated(reason="Use 'get_node_by_id'.", version="2.1.7")
968
993
  def get_address_node(self, id: int) -> AddressNode:
969
994
  """
970
995
  Get address node from the tree by its id.
@@ -1171,17 +1196,30 @@ class AddressTree(object):
1171
1196
  """
1172
1197
  if len(id) == 12:
1173
1198
  # jisx0402(5digits) + aza_id(7digits)
1174
- candidates = self.search_nodes_by_codes(
1199
+ citynode = self.search_by_citycode(code=id[0:5])
1200
+ if len(citynode) == 0:
1201
+ return []
1202
+
1203
+ citynode = citynode[0]
1204
+ candidates = self.search_ids_by_codes(
1175
1205
  category="aza_id",
1176
1206
  value=id[-7:])
1177
- nodes = [x for x in candidates if x.get_city_jiscode() == id[0:5]]
1207
+ nodes = [self.address_nodes.get_record(x)
1208
+ for x in candidates
1209
+ if x >= citynode.id and x < citynode.sibling_id]
1178
1210
  elif len(id) == 13:
1179
1211
  # lasdec(6digits) + aza_id(7digits)
1180
- candidates = self.search_nodes_by_codes(
1212
+ citynode = self.search_by_citycode(code=id[0:6])
1213
+ if len(citynode) == 0:
1214
+ return []
1215
+
1216
+ citynode = citynode[0]
1217
+ candidates = self.search_ids_by_codes(
1181
1218
  category="aza_id",
1182
1219
  value=id[-7:])
1183
- nodes = [x for x in candidates
1184
- if x.get_city_local_authority_code() == id[0:6]]
1220
+ nodes = [self.address_nodes.get_record(x)
1221
+ for x in candidates
1222
+ if x >= citynode.id and x < citynode.sibling_id]
1185
1223
  else:
1186
1224
  nodes = self.search_nodes_by_codes(
1187
1225
  category="aza_id",
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "jageocoder"
3
- version = "2.1.7.dev2"
3
+ version = "2.1.7.rc1"
4
4
  description = "A Japanese-address geocoder for Python."
5
5
  authors = ["Takeshi Sagara <sagara@info-proto.com>"]
6
6
  repository = "https://github.com/t-sagara/jageocoder/"
File without changes
File without changes