jageocoder 2.1.7.dev3__tar.gz → 2.1.7rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. {jageocoder-2.1.7.dev3 → jageocoder-2.1.7rc1}/PKG-INFO +1 -1
  2. {jageocoder-2.1.7.dev3 → jageocoder-2.1.7rc1}/jageocoder/__init__.py +1 -1
  3. {jageocoder-2.1.7.dev3 → jageocoder-2.1.7rc1}/jageocoder/rtree.py +82 -151
  4. {jageocoder-2.1.7.dev3 → jageocoder-2.1.7rc1}/jageocoder/tree.py +1 -1
  5. {jageocoder-2.1.7.dev3 → jageocoder-2.1.7rc1}/pyproject.toml +1 -1
  6. {jageocoder-2.1.7.dev3 → jageocoder-2.1.7rc1}/LICENSE +0 -0
  7. {jageocoder-2.1.7.dev3 → jageocoder-2.1.7rc1}/README.md +0 -0
  8. {jageocoder-2.1.7.dev3 → jageocoder-2.1.7rc1}/jageocoder/__main__.py +0 -0
  9. {jageocoder-2.1.7.dev3 → jageocoder-2.1.7rc1}/jageocoder/address.py +0 -0
  10. {jageocoder-2.1.7.dev3 → jageocoder-2.1.7rc1}/jageocoder/aza_master.py +0 -0
  11. {jageocoder-2.1.7.dev3 → jageocoder-2.1.7rc1}/jageocoder/dataset.py +0 -0
  12. {jageocoder-2.1.7.dev3 → jageocoder-2.1.7rc1}/jageocoder/exceptions.py +0 -0
  13. {jageocoder-2.1.7.dev3 → jageocoder-2.1.7rc1}/jageocoder/itaiji.py +0 -0
  14. {jageocoder-2.1.7.dev3 → jageocoder-2.1.7rc1}/jageocoder/itaiji_dic.json +0 -0
  15. {jageocoder-2.1.7.dev3 → jageocoder-2.1.7rc1}/jageocoder/module.py +0 -0
  16. {jageocoder-2.1.7.dev3 → jageocoder-2.1.7rc1}/jageocoder/node.py +0 -0
  17. {jageocoder-2.1.7.dev3 → jageocoder-2.1.7rc1}/jageocoder/remote.py +0 -0
  18. {jageocoder-2.1.7.dev3 → jageocoder-2.1.7rc1}/jageocoder/result.py +0 -0
  19. {jageocoder-2.1.7.dev3 → jageocoder-2.1.7rc1}/jageocoder/strlib.py +0 -0
  20. {jageocoder-2.1.7.dev3 → jageocoder-2.1.7rc1}/jageocoder/trie.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: jageocoder
3
- Version: 2.1.7.dev3
3
+ Version: 2.1.7rc1
4
4
  Summary: A Japanese-address geocoder for Python.
5
5
  Home-page: https://github.com/t-sagara/jageocoder/
6
6
  License: The MIT License
@@ -19,7 +19,7 @@ running the following steps.
19
19
  >>> jageocoder.searchNode('<Japanese-address>')
20
20
  """
21
21
 
22
- __version__ = '2.1.7.dev3' # The package version
22
+ __version__ = '2.1.7.rc1' # The package version
23
23
  __dictionary_version__ = '20230927' # Compatible dictionary version
24
24
  __author__ = 'Takeshi Sagara <sagara@info-proto.com>'
25
25
 
@@ -1,5 +1,4 @@
1
1
  from abc import ABC
2
- import copy
3
2
  from logging import getLogger
4
3
  import os
5
4
  from typing import Iterable, List, Optional, Tuple
@@ -321,6 +320,7 @@ class Index(object):
321
320
  node_table: AddressNodeTable = self._tree.address_nodes
322
321
 
323
322
  max_id = node_table.count_records()
323
+ registered_coordinates = set()
324
324
 
325
325
  logger.info("Building RTree for reverse geocoding...")
326
326
  id = AddressNode.ROOT_NODE_ID
@@ -331,18 +331,69 @@ class Index(object):
331
331
  prev_id = id
332
332
 
333
333
  node = node_table.get_record(pos=id)
334
- if node.level <= AddressLevel.WARD or node.level > AddressLevel.BLOCK:
334
+ if node.level <= AddressLevel.WARD:
335
+ registered_coordinates.clear()
335
336
  id += 1
336
337
  continue
337
338
 
338
- if not node.has_valid_coordinate_values():
339
- node = node.add_dummy_coordinates()
339
+ if node.sibling_id == node.id + 1:
340
+ # The node has no child nodes
341
+
342
+ if not node.has_valid_coordinate_values():
343
+ id += 1
344
+ continue
345
+
346
+ key = (node.x, node.y)
347
+ if key in registered_coordinates:
348
+ id += 1
349
+ continue
340
350
 
341
- if node.has_valid_coordinate_values():
342
351
  file_idx.insert(
343
352
  id=id,
344
353
  coordinates=(node.x, node.y, node.x, node.y),
345
354
  )
355
+ registered_coordinates.add(key)
356
+ id += 1
357
+ continue
358
+
359
+ # The node has 1 or more child nodes
360
+ if node.level == AddressLevel.BLOCK:
361
+ # Get BDR of child nodes
362
+ bdr = None
363
+ for child_id in range(node.id + 1, node.sibling_id):
364
+ child_node = node_table.get_record(child_id)
365
+ if not child_node.has_valid_coordinate_values():
366
+ continue
367
+
368
+ if bdr is None:
369
+ bdr = (child_node.x, child_node.y,
370
+ child_node.x, child_node.y)
371
+ else:
372
+ bdr = (
373
+ min(child_node.x, bdr[0]),
374
+ min(child_node.y, bdr[1]),
375
+ max(child_node.x, bdr[2]),
376
+ max(child_node.y, bdr[3]),
377
+ )
378
+
379
+ if bdr:
380
+ file_idx.insert(
381
+ id=id,
382
+ coordinates=bdr,
383
+ )
384
+ else:
385
+ # All child nodes have invalid coordinate values
386
+ key = (node.x, node.y)
387
+ if node.has_valid_coordinate_values() and \
388
+ key not in registered_coordinates:
389
+ file_idx.insert(
390
+ id=id,
391
+ coordinates=(node.x, node.y, node.x, node.y),
392
+ )
393
+ registered_coordinates.add(key)
394
+
395
+ id = node.sibling_id
396
+ continue
346
397
 
347
398
  id += 1
348
399
 
@@ -397,9 +448,8 @@ class Index(object):
397
448
  self,
398
449
  lon: float,
399
450
  lat: float,
400
- id_list: Iterable[int],
401
- node_map: Optional[dict] = None,
402
- ) -> Tuple[List[NodeDist], dict]:
451
+ nodes: Iterable[AddressNode],
452
+ ) -> List[NodeDist]:
403
453
  """
404
454
  Sort nodes by real(projected) distance from the target point.
405
455
 
@@ -409,31 +459,24 @@ class Index(object):
409
459
  The longitude of the target point.
410
460
  lat: float
411
461
  The latitude of the target point.
412
- id_list: Iterable[int]
413
- The list of node-id.
462
+ nodes: Iterable[AddressNode]
463
+ The list of candidate node.
414
464
 
415
465
  Returns
416
466
  -------
417
- Tuple[List[NodeDist], dict]
418
- The sorted list of (distance, address node) and a node map.
467
+ List[NodeDist]
468
+ The sorted list of (distance, address node).
419
469
  """
420
- node_map = node_map or {}
421
470
  results = []
422
- for node_id in set(id_list):
423
- node = self._tree.get_node_by_id(node_id=node_id)
471
+ for node in nodes:
424
472
  if not node.has_valid_coordinate_values():
425
- node = node.add_dummy_coordinates()
473
+ continue
426
474
 
427
- key = (node.x, node.y)
428
- if key in node_map:
429
- node_map[key].append(node)
430
- else:
431
- node_map[key] = [node]
432
- dist = self.distance(node.x, node.y, lon, lat)
433
- results.append(NodeDist(dist, node))
475
+ dist = self.distance(node.x, node.y, lon, lat)
476
+ results.append(NodeDist(dist, node))
434
477
 
435
478
  results.sort(key=lambda x: x.dist)
436
- return (results, node_map)
479
+ return results
437
480
 
438
481
  def nearest(
439
482
  self,
@@ -463,117 +506,25 @@ class Index(object):
463
506
  [{"candidate":AddressNode or dict, "dist":float}]
464
507
  Returns the results of retrieval up to 3 nodes.
465
508
  """
466
-
467
- def _remove_parent_nodes(
468
- candidates: Iterable[NodeDist]
469
- ) -> List[NodeDist]:
470
- ancestors = set()
471
- max_level = 0
472
- if len(candidates) == 0:
473
- return []
474
-
475
- nodes = []
476
- for v in candidates:
477
- dist, node = v.dist, v.node
478
- if node.id in ancestors:
479
- continue
480
-
481
- if not node.has_valid_coordinate_values():
482
- node = node.add_dummy_coordinates()
483
-
484
- nodes.append(NodeDist(dist, node))
485
- max_level = max(max_level, node.level)
486
-
487
- # List ancestor nodes of registering node.
488
- cur = node.parent
489
- while cur is not None:
490
- # nodes = [node for node in nodes if node.id != cur.id]
491
- ancestors.add(cur.id)
492
- cur = cur.parent
493
-
494
- # Exclude ancestor nodes
495
- nodes = [node for node in nodes if node.node.id not in ancestors]
496
- return nodes
497
-
498
- def _get_k_nearest_child_nodes(
499
- aza_node_dists: List[NodeDist],
500
- *,
501
- candidates: Optional[List[NodeDist]] = None,
502
- node_map: Optional[dict] = None,
503
- k: Optional[int] = 20,
504
- min_k: Optional[int] = 0,
505
- max_dist: Optional[float] = 500.0,
506
- ) -> Tuple[List[NodeDist], dict]:
507
- candidates = candidates or []
508
- node_map = node_map or {}
509
- for v in aza_node_dists:
510
- dist, node = v.dist, v.node
511
- child_id = node.id + 1
512
- for child_id in range(node.id + 1, node.sibling_id):
513
- child_node = self._tree.get_node_by_id(
514
- node_id=child_id)
515
- if child_node.level > level:
516
- continue
517
-
518
- if not child_node.has_valid_coordinate_values():
519
- if child_node.level == level:
520
- continue
521
-
522
- child_node = child_node.add_dummy_coordinates()
523
- if not child_node.has_valid_coordinate_values():
524
- continue
525
-
526
- key = (child_node.x, child_node.y)
527
- if key in node_map:
528
- # A node with the same coordinates are already registered
529
- node_map[key].append(child_node)
530
- continue
531
-
532
- dist = self.distance(x, y, child_node.x, child_node.y)
533
- i = len(candidates)
534
- while i > 0:
535
- if dist >= candidates[i - 1].dist:
536
- break
537
-
538
- i -= 1
539
-
540
- if i < min_k or (i < k and dist <= max_dist):
541
- candidates.insert(i, NodeDist(dist, child_node))
542
- node_map[key] = [child_node]
543
- n = len(candidates)
544
- if n > k:
545
- delnode = candidates[k].node
546
- del candidates[k]
547
- delkey = (delnode.x, delnode.y)
548
- node_map[delkey].remove(delnode)
549
- if len(node_map[delkey]) == 0:
550
- del node_map[delkey]
551
-
552
- elif n > min_k and candidates[min_k].dist > max_dist:
553
- delnode = candidates[min_k].node
554
- del candidates[min_k]
555
- delkey = (delnode.x, delnode.y)
556
- node_map[delkey].remove(delnode)
557
- if len(node_map[delkey]) == 0:
558
- del node_map[delkey]
559
-
560
- return (candidates, node_map)
561
-
562
509
  level = level or AddressLevel.AZA
563
510
 
564
511
  # Retrieve top k-nearest nodes using the R-tree index.
565
- nearests = self.idx.nearest((x, y, x, y), 20)
566
- node_dists, node_map = self._sort_by_dist(x, y, nearests)
567
- node_dists = _remove_parent_nodes(node_dists)
568
-
569
- if level > AddressLevel.BLOCK:
570
- candidates, node_map = _get_k_nearest_child_nodes(
571
- node_dists,
572
- candidates=copy.copy(node_dists),
573
- node_map=node_map,
574
- min_k=1)
512
+ # If the node registered in the index is an intermediate node,
513
+ # expand its leaf nodes.
514
+ candidates = []
515
+ nearests = self.idx.nearest((x, y, x, y), 20, objects=True)
516
+ for item in nearests:
517
+ node = self._tree.get_node_by_id(item.id)
518
+ if item.bbox[0] == item.bbox[2] and item.bbox[1] == item.bbox[3]:
519
+ candidates.append(node)
520
+ else:
521
+ for child_id in range(node.id + 1, node.sibling_id):
522
+ child_node = self._tree.get_node_by_id(child_id)
523
+ if child_node.sibling_id == child_id + 1 and \
524
+ child_node.has_valid_coordinate_values():
525
+ candidates.append(child_node)
575
526
 
576
- node_dists = _remove_parent_nodes(candidates)
527
+ node_dists = self._sort_by_dist(x, y, candidates)
577
528
 
578
529
  # Select the 3 nodes that make the smallest triangle
579
530
  # surrounding the target point
@@ -589,17 +540,6 @@ class Index(object):
589
540
  else:
590
541
  node_dists = DelaunayTriangle.select(x, y, node_dists)
591
542
 
592
- # Restore nodes with the same coordinates
593
- if node_map is not None:
594
- _node_dists = []
595
- for v in node_dists:
596
- dist, node = v.dist, v.node
597
- key = (node.x, node.y)
598
- for n in node_map[key]:
599
- _node_dists.append(NodeDist(dist, n))
600
-
601
- node_dists = _node_dists
602
-
603
543
  # Convert nodes to the dict format.
604
544
  results = []
605
545
  registered = set()
@@ -607,23 +547,14 @@ class Index(object):
607
547
  dist, node = v.dist, v.node
608
548
  while node.level > level:
609
549
  node = node.parent
610
- dist = None
611
- if not node.has_valid_coordinate_values():
612
- node.x, node.y = v.node.x, v.node.y
613
550
 
614
551
  if node.id in registered:
615
552
  continue
616
553
 
617
- if dist is None:
618
- dist = self.distance(x, y, node.x, node.y)
619
-
620
554
  results.append({
621
555
  "candidate": node.as_dict() if as_dict else node,
622
556
  "dist": dist
623
557
  })
624
558
  registered.add(node.id)
625
559
 
626
- # Sort by distance
627
- results = sorted(results, key=lambda r: r['dist'])
628
-
629
560
  return results
@@ -849,7 +849,7 @@ class AddressTree(object):
849
849
  key = index[0:offset]
850
850
  rest_index = index[offset:]
851
851
  for node_id in trie_node.nodes:
852
- node = self.get_address_node(id=node_id)
852
+ node = self.get_node_by_id(node_id=node_id)
853
853
 
854
854
  if not node.has_valid_coordinate_values() \
855
855
  and self.get_config('require_coordinates'):
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "jageocoder"
3
- version = "2.1.7.dev3"
3
+ version = "2.1.7.rc1"
4
4
  description = "A Japanese-address geocoder for Python."
5
5
  authors = ["Takeshi Sagara <sagara@info-proto.com>"]
6
6
  repository = "https://github.com/t-sagara/jageocoder/"
File without changes
File without changes