allocator 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- allocator/__init__.py +154 -0
- allocator/api/__init__.py +32 -0
- allocator/api/cluster.py +126 -0
- allocator/api/distance.py +225 -0
- allocator/api/route.py +256 -0
- allocator/api/types.py +52 -0
- allocator/cli/__init__.py +1 -0
- allocator/cli/cluster_cmd.py +104 -0
- allocator/cli/main.py +170 -0
- allocator/cli/route_cmd.py +164 -0
- allocator/core/__init__.py +1 -0
- allocator/core/algorithms.py +200 -0
- allocator/core/routing.py +242 -0
- allocator/distances/__init__.py +17 -0
- allocator/distances/euclidean.py +80 -0
- allocator/distances/external_apis.py +165 -0
- allocator/distances/factory.py +66 -0
- allocator/distances/haversine.py +43 -0
- allocator/io/__init__.py +1 -0
- allocator/io/data_handler.py +174 -0
- allocator/py.typed +2 -0
- allocator/utils.py +37 -0
- allocator/viz/__init__.py +17 -0
- allocator/viz/plotting.py +206 -0
- allocator-1.0.0.dist-info/METADATA +132 -0
- allocator-1.0.0.dist-info/RECORD +28 -0
- allocator-1.0.0.dist-info/WHEEL +4 -0
- allocator-1.0.0.dist-info/entry_points.txt +3 -0
allocator/__init__.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Allocator v1.0: Optimally Allocate Geographically Distributed Tasks
|
|
3
|
+
|
|
4
|
+
A modern Python package for geographic task allocation, clustering, and routing optimization.
|
|
5
|
+
|
|
6
|
+
Key Features:
|
|
7
|
+
- Cluster geographic points into balanced groups
|
|
8
|
+
- Find optimal routes through locations (TSP solving)
|
|
9
|
+
- Assign points to closest workers/centers
|
|
10
|
+
- Multiple distance metrics (euclidean, haversine, OSRM, Google Maps)
|
|
11
|
+
- Clean API with structured results and rich metadata
|
|
12
|
+
- Unified CLI with beautiful terminal output
|
|
13
|
+
|
|
14
|
+
Quick Start:
|
|
15
|
+
>>> import allocator
|
|
16
|
+
>>> import pandas as pd
|
|
17
|
+
>>>
|
|
18
|
+
>>> # Create sample data
|
|
19
|
+
>>> data = pd.DataFrame({
|
|
20
|
+
... 'longitude': [101.0, 101.1, 101.2],
|
|
21
|
+
... 'latitude': [13.0, 13.1, 13.2]
|
|
22
|
+
... })
|
|
23
|
+
>>>
|
|
24
|
+
>>> # Cluster locations
|
|
25
|
+
>>> result = allocator.cluster(data, n_clusters=2)
|
|
26
|
+
>>> print(result.labels)
|
|
27
|
+
>>>
|
|
28
|
+
>>> # Find optimal route
|
|
29
|
+
>>> route = allocator.shortest_path(data, method='ortools')
|
|
30
|
+
>>> print(route.route)
|
|
31
|
+
|
|
32
|
+
For more examples: https://geosensing.github.io/allocator/
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
import logging
|
|
36
|
+
import sys
|
|
37
|
+
|
|
38
|
+
# Import modern API
|
|
39
|
+
from .api import (
|
|
40
|
+
ClusterResult,
|
|
41
|
+
ComparisonResult,
|
|
42
|
+
RouteResult,
|
|
43
|
+
SortResult,
|
|
44
|
+
assign_to_closest,
|
|
45
|
+
cluster,
|
|
46
|
+
distance_assignment,
|
|
47
|
+
kmeans,
|
|
48
|
+
shortest_path,
|
|
49
|
+
sort_by_distance,
|
|
50
|
+
tsp_christofides,
|
|
51
|
+
tsp_google,
|
|
52
|
+
tsp_ortools,
|
|
53
|
+
tsp_osrm,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Import utilities for advanced users
|
|
57
|
+
from .distances import (
|
|
58
|
+
euclidean_distance_matrix,
|
|
59
|
+
get_distance_matrix,
|
|
60
|
+
google_distance_matrix,
|
|
61
|
+
haversine_distance_matrix,
|
|
62
|
+
latlon2xy,
|
|
63
|
+
osrm_distance_matrix,
|
|
64
|
+
xy2latlog,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# Import visualization functions
|
|
68
|
+
from .viz.plotting import plot_assignments, plot_clusters, plot_comparison, plot_route
|
|
69
|
+
|
|
70
|
+
# Version
|
|
71
|
+
__version__ = "1.0.0"
|
|
72
|
+
|
|
73
|
+
# Export public API
|
|
74
|
+
__all__ = [
|
|
75
|
+
# Result types
|
|
76
|
+
"ClusterResult",
|
|
77
|
+
"ComparisonResult",
|
|
78
|
+
"RouteResult",
|
|
79
|
+
"SortResult",
|
|
80
|
+
"assign_to_closest",
|
|
81
|
+
# Main functions
|
|
82
|
+
"cluster",
|
|
83
|
+
"distance_assignment",
|
|
84
|
+
"euclidean_distance_matrix",
|
|
85
|
+
# Distance utilities
|
|
86
|
+
"get_distance_matrix",
|
|
87
|
+
"get_logger",
|
|
88
|
+
"google_distance_matrix",
|
|
89
|
+
"haversine_distance_matrix",
|
|
90
|
+
# Specific methods
|
|
91
|
+
"kmeans",
|
|
92
|
+
"latlon2xy",
|
|
93
|
+
"osrm_distance_matrix",
|
|
94
|
+
"plot_assignments",
|
|
95
|
+
# Visualization
|
|
96
|
+
"plot_clusters",
|
|
97
|
+
"plot_comparison",
|
|
98
|
+
"plot_route",
|
|
99
|
+
# Logging utilities
|
|
100
|
+
"setup_logging",
|
|
101
|
+
"shortest_path",
|
|
102
|
+
"sort_by_distance",
|
|
103
|
+
"tsp_christofides",
|
|
104
|
+
"tsp_google",
|
|
105
|
+
"tsp_ortools",
|
|
106
|
+
"tsp_osrm",
|
|
107
|
+
"xy2latlog",
|
|
108
|
+
]
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def setup_logging(level=logging.INFO):
|
|
112
|
+
"""
|
|
113
|
+
Set up logging configuration for the allocator package.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
level: Logging level (DEBUG, INFO, WARNING, ERROR)
|
|
117
|
+
"""
|
|
118
|
+
# Create formatter
|
|
119
|
+
formatter = logging.Formatter(
|
|
120
|
+
"%(asctime)s - %(name)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# Get root logger for allocator package
|
|
124
|
+
logger = logging.getLogger("allocator")
|
|
125
|
+
logger.setLevel(level)
|
|
126
|
+
|
|
127
|
+
# Remove existing handlers to avoid duplicates
|
|
128
|
+
for handler in logger.handlers[:]:
|
|
129
|
+
logger.removeHandler(handler)
|
|
130
|
+
|
|
131
|
+
# Console handler
|
|
132
|
+
console_handler = logging.StreamHandler(sys.stdout)
|
|
133
|
+
console_handler.setLevel(level)
|
|
134
|
+
console_handler.setFormatter(formatter)
|
|
135
|
+
logger.addHandler(console_handler)
|
|
136
|
+
|
|
137
|
+
return logger
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def get_logger(name):
|
|
141
|
+
"""
|
|
142
|
+
Get a logger instance for a specific module.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
name: Module name (typically __name__)
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
Logger instance
|
|
149
|
+
"""
|
|
150
|
+
return logging.getLogger(f"allocator.{name}")
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
# Set up default logging
|
|
154
|
+
setup_logging()
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Public API for allocator package.
|
|
3
|
+
|
|
4
|
+
This module provides a modern, Pythonic interface to the allocator package.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .cluster import cluster, kmeans
|
|
8
|
+
from .distance import assign_to_closest, distance_assignment, sort_by_distance
|
|
9
|
+
from .route import shortest_path, tsp_christofides, tsp_google, tsp_ortools, tsp_osrm
|
|
10
|
+
from .types import ClusterResult, ComparisonResult, RouteResult, SortResult
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
# Result types
|
|
14
|
+
"ClusterResult",
|
|
15
|
+
"ComparisonResult",
|
|
16
|
+
"RouteResult",
|
|
17
|
+
"SortResult",
|
|
18
|
+
# Distance assignment methods
|
|
19
|
+
"assign_to_closest",
|
|
20
|
+
# Main high-level functions
|
|
21
|
+
"cluster",
|
|
22
|
+
"distance_assignment",
|
|
23
|
+
# Specific clustering methods
|
|
24
|
+
"kmeans",
|
|
25
|
+
"shortest_path",
|
|
26
|
+
"sort_by_distance",
|
|
27
|
+
# Specific routing methods
|
|
28
|
+
"tsp_christofides",
|
|
29
|
+
"tsp_google",
|
|
30
|
+
"tsp_ortools",
|
|
31
|
+
"tsp_osrm",
|
|
32
|
+
]
|
allocator/api/cluster.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Modern clustering API for allocator package.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
12
|
+
from ..core.algorithms import kmeans_cluster as _kmeans_cluster
|
|
13
|
+
from ..io.data_handler import DataHandler
|
|
14
|
+
from .types import ClusterResult
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def cluster(
|
|
18
|
+
data: str | pd.DataFrame | np.ndarray | list,
|
|
19
|
+
n_clusters: int = 3,
|
|
20
|
+
method: str = "kmeans",
|
|
21
|
+
distance: str = "euclidean",
|
|
22
|
+
random_state: int | None = None,
|
|
23
|
+
**kwargs,
|
|
24
|
+
) -> ClusterResult:
|
|
25
|
+
"""
|
|
26
|
+
Cluster geographic data points.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
data: Input data (file path, DataFrame, numpy array, or list)
|
|
30
|
+
n_clusters: Number of clusters to create
|
|
31
|
+
method: Clustering method ('kmeans')
|
|
32
|
+
distance: Distance metric ('euclidean', 'haversine', 'osrm', 'google')
|
|
33
|
+
random_state: Random seed for reproducibility
|
|
34
|
+
**kwargs: Additional arguments for specific methods
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
ClusterResult with labels, centroids, and metadata
|
|
38
|
+
|
|
39
|
+
Example:
|
|
40
|
+
>>> result = cluster('data.csv', n_clusters=5, method='kmeans')
|
|
41
|
+
>>> print(result.labels) # Cluster assignments
|
|
42
|
+
>>> print(result.centroids) # Cluster centers
|
|
43
|
+
"""
|
|
44
|
+
# Load and standardize data
|
|
45
|
+
df = DataHandler.load_data(data)
|
|
46
|
+
|
|
47
|
+
if method == "kmeans":
|
|
48
|
+
return kmeans(
|
|
49
|
+
df, n_clusters=n_clusters, distance=distance, random_state=random_state, **kwargs
|
|
50
|
+
)
|
|
51
|
+
else:
|
|
52
|
+
raise ValueError(f"Unknown clustering method: {method}. Available methods: 'kmeans'")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def kmeans(
|
|
56
|
+
data: pd.DataFrame | np.ndarray | list,
|
|
57
|
+
n_clusters: int = 3,
|
|
58
|
+
distance: str = "euclidean",
|
|
59
|
+
max_iter: int = 300,
|
|
60
|
+
random_state: int | None = None,
|
|
61
|
+
**kwargs,
|
|
62
|
+
) -> ClusterResult:
|
|
63
|
+
"""
|
|
64
|
+
K-means clustering of geographic data.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
data: Input data as DataFrame or numpy array
|
|
68
|
+
n_clusters: Number of clusters
|
|
69
|
+
distance: Distance metric ('euclidean', 'haversine', 'osrm', 'google')
|
|
70
|
+
max_iter: Maximum iterations
|
|
71
|
+
random_state: Random seed for reproducibility
|
|
72
|
+
**kwargs: Additional distance-specific arguments
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
ClusterResult with clustering information
|
|
76
|
+
"""
|
|
77
|
+
# Ensure we have a DataFrame for output
|
|
78
|
+
if isinstance(data, np.ndarray):
|
|
79
|
+
df = DataHandler._from_numpy(data)
|
|
80
|
+
elif isinstance(data, list):
|
|
81
|
+
df = DataHandler._from_list(data)
|
|
82
|
+
elif isinstance(data, (str, Path)):
|
|
83
|
+
df = DataHandler.load_data(data)
|
|
84
|
+
else:
|
|
85
|
+
df = data.copy()
|
|
86
|
+
|
|
87
|
+
# Run clustering algorithm
|
|
88
|
+
result = _kmeans_cluster(
|
|
89
|
+
df,
|
|
90
|
+
n_clusters=n_clusters,
|
|
91
|
+
distance_method=distance,
|
|
92
|
+
max_iter=max_iter,
|
|
93
|
+
random_state=random_state,
|
|
94
|
+
**kwargs,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# Add cluster assignments to DataFrame
|
|
98
|
+
df_result = df.copy()
|
|
99
|
+
df_result["cluster"] = result["labels"]
|
|
100
|
+
|
|
101
|
+
# Calculate inertia (sum of squared distances to centroids)
|
|
102
|
+
inertia = None
|
|
103
|
+
if distance == "euclidean":
|
|
104
|
+
from ..distances import euclidean_distance_matrix
|
|
105
|
+
|
|
106
|
+
coords = df[["longitude", "latitude"]].values
|
|
107
|
+
distances = euclidean_distance_matrix(coords, result["centroids"])
|
|
108
|
+
inertia = np.sum(
|
|
109
|
+
[distances[i, result["labels"][i]] ** 2 for i in range(len(result["labels"]))]
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
return ClusterResult(
|
|
113
|
+
labels=result["labels"],
|
|
114
|
+
centroids=result["centroids"],
|
|
115
|
+
n_iter=result["iterations"],
|
|
116
|
+
inertia=inertia,
|
|
117
|
+
data=df_result,
|
|
118
|
+
converged=result["converged"],
|
|
119
|
+
metadata={
|
|
120
|
+
"method": "kmeans",
|
|
121
|
+
"distance": distance,
|
|
122
|
+
"n_clusters": n_clusters,
|
|
123
|
+
"max_iter": max_iter,
|
|
124
|
+
"random_state": random_state,
|
|
125
|
+
},
|
|
126
|
+
)
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Modern distance-based assignment API for allocator package.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pandas as pd
|
|
9
|
+
|
|
10
|
+
from ..core.algorithms import sort_by_distance_assignment
|
|
11
|
+
from ..io.data_handler import DataHandler
|
|
12
|
+
from .types import SortResult
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def sort_by_distance(
|
|
16
|
+
points: str | pd.DataFrame | np.ndarray | list,
|
|
17
|
+
workers: str | pd.DataFrame | np.ndarray | list,
|
|
18
|
+
by_worker: bool = False,
|
|
19
|
+
distance: str = "euclidean",
|
|
20
|
+
**kwargs,
|
|
21
|
+
) -> SortResult:
|
|
22
|
+
"""
|
|
23
|
+
Sort points by distance to workers or vice versa.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
points: Geographic points to assign (file path, DataFrame, numpy array, or list)
|
|
27
|
+
workers: Worker/centroid locations (file path, DataFrame, numpy array, or list)
|
|
28
|
+
by_worker: If True, sort points by worker; if False, sort workers by point
|
|
29
|
+
distance: Distance metric ('euclidean', 'haversine', 'osrm', 'google')
|
|
30
|
+
**kwargs: Additional distance-specific arguments
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
SortResult with assignments and distance information
|
|
34
|
+
|
|
35
|
+
Example:
|
|
36
|
+
>>> result = sort_by_distance('points.csv', 'workers.csv')
|
|
37
|
+
>>> print(result.data) # Points with worker assignments
|
|
38
|
+
"""
|
|
39
|
+
# Load and standardize data
|
|
40
|
+
points_df = DataHandler.load_data(points)
|
|
41
|
+
workers_df = DataHandler.load_data(workers)
|
|
42
|
+
|
|
43
|
+
if by_worker:
|
|
44
|
+
return _sort_points_by_worker(points_df, workers_df, distance=distance, **kwargs)
|
|
45
|
+
else:
|
|
46
|
+
return _sort_workers_by_point(points_df, workers_df, distance=distance, **kwargs)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _sort_workers_by_point(
|
|
50
|
+
points_df: pd.DataFrame, workers_df: pd.DataFrame, distance: str = "euclidean", **kwargs
|
|
51
|
+
) -> SortResult:
|
|
52
|
+
"""Sort workers by distance to each point (default behavior)."""
|
|
53
|
+
from ..distances import get_distance_matrix
|
|
54
|
+
|
|
55
|
+
# Extract coordinates
|
|
56
|
+
points_coords = _extract_coordinates(points_df)
|
|
57
|
+
workers_coords = _extract_coordinates(workers_df)
|
|
58
|
+
|
|
59
|
+
# Calculate distance matrix
|
|
60
|
+
distance_matrix = get_distance_matrix(points_coords, workers_coords, method=distance, **kwargs)
|
|
61
|
+
|
|
62
|
+
# Create result DataFrame
|
|
63
|
+
result_data = []
|
|
64
|
+
for i, (_, point_row) in enumerate(points_df.iterrows()):
|
|
65
|
+
point_distances = distance_matrix[i, :]
|
|
66
|
+
sorted_worker_indices = np.argsort(point_distances)
|
|
67
|
+
|
|
68
|
+
for rank, worker_idx in enumerate(sorted_worker_indices):
|
|
69
|
+
worker_row = workers_df.iloc[worker_idx]
|
|
70
|
+
result_row = {
|
|
71
|
+
"point_id": i,
|
|
72
|
+
"worker_id": worker_idx,
|
|
73
|
+
"rank": rank + 1,
|
|
74
|
+
"distance": point_distances[worker_idx],
|
|
75
|
+
}
|
|
76
|
+
# Add point data
|
|
77
|
+
for col in points_df.columns:
|
|
78
|
+
result_row[f"point_{col}"] = point_row[col]
|
|
79
|
+
# Add worker data
|
|
80
|
+
for col in workers_df.columns:
|
|
81
|
+
result_row[f"worker_{col}"] = worker_row[col]
|
|
82
|
+
result_data.append(result_row)
|
|
83
|
+
|
|
84
|
+
result_df = pd.DataFrame(result_data)
|
|
85
|
+
|
|
86
|
+
return SortResult(
|
|
87
|
+
data=result_df,
|
|
88
|
+
distance_matrix=distance_matrix,
|
|
89
|
+
metadata={
|
|
90
|
+
"method": "sort_workers_by_point",
|
|
91
|
+
"distance": distance,
|
|
92
|
+
"n_points": len(points_df),
|
|
93
|
+
"n_workers": len(workers_df),
|
|
94
|
+
},
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _sort_points_by_worker(
|
|
99
|
+
points_df: pd.DataFrame, workers_df: pd.DataFrame, distance: str = "euclidean", **kwargs
|
|
100
|
+
) -> SortResult:
|
|
101
|
+
"""Sort points by distance to each worker."""
|
|
102
|
+
from ..distances import get_distance_matrix
|
|
103
|
+
|
|
104
|
+
# Extract coordinates
|
|
105
|
+
points_coords = _extract_coordinates(points_df)
|
|
106
|
+
workers_coords = _extract_coordinates(workers_df)
|
|
107
|
+
|
|
108
|
+
# Calculate distance matrix
|
|
109
|
+
distance_matrix = get_distance_matrix(points_coords, workers_coords, method=distance, **kwargs)
|
|
110
|
+
|
|
111
|
+
# Create result DataFrame
|
|
112
|
+
result_data = []
|
|
113
|
+
for j, (_, worker_row) in enumerate(workers_df.iterrows()):
|
|
114
|
+
worker_distances = distance_matrix[:, j]
|
|
115
|
+
sorted_point_indices = np.argsort(worker_distances)
|
|
116
|
+
|
|
117
|
+
for rank, point_idx in enumerate(sorted_point_indices):
|
|
118
|
+
point_row = points_df.iloc[point_idx]
|
|
119
|
+
result_row = {
|
|
120
|
+
"worker_id": j,
|
|
121
|
+
"point_id": point_idx,
|
|
122
|
+
"rank": rank + 1,
|
|
123
|
+
"distance": worker_distances[point_idx],
|
|
124
|
+
}
|
|
125
|
+
# Add worker data
|
|
126
|
+
for col in workers_df.columns:
|
|
127
|
+
result_row[f"worker_{col}"] = worker_row[col]
|
|
128
|
+
# Add point data
|
|
129
|
+
for col in points_df.columns:
|
|
130
|
+
result_row[f"point_{col}"] = point_row[col]
|
|
131
|
+
result_data.append(result_row)
|
|
132
|
+
|
|
133
|
+
result_df = pd.DataFrame(result_data)
|
|
134
|
+
|
|
135
|
+
return SortResult(
|
|
136
|
+
data=result_df,
|
|
137
|
+
distance_matrix=distance_matrix,
|
|
138
|
+
metadata={
|
|
139
|
+
"method": "sort_points_by_worker",
|
|
140
|
+
"distance": distance,
|
|
141
|
+
"n_points": len(points_df),
|
|
142
|
+
"n_workers": len(workers_df),
|
|
143
|
+
},
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def distance_assignment(
|
|
148
|
+
points: str | pd.DataFrame | np.ndarray | list,
|
|
149
|
+
workers: str | pd.DataFrame | np.ndarray | list,
|
|
150
|
+
distance: str = "euclidean",
|
|
151
|
+
**kwargs,
|
|
152
|
+
) -> SortResult:
|
|
153
|
+
"""
|
|
154
|
+
Assign each point to its closest worker (alias for assign_to_closest).
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
points: Geographic points to assign
|
|
158
|
+
workers: Worker/centroid locations
|
|
159
|
+
distance: Distance metric
|
|
160
|
+
**kwargs: Additional distance-specific arguments
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
SortResult with point assignments
|
|
164
|
+
"""
|
|
165
|
+
return assign_to_closest(points, workers, distance=distance, **kwargs)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def assign_to_closest(
|
|
169
|
+
points: str | pd.DataFrame | np.ndarray | list,
|
|
170
|
+
workers: str | pd.DataFrame | np.ndarray | list,
|
|
171
|
+
distance: str = "euclidean",
|
|
172
|
+
**kwargs,
|
|
173
|
+
) -> SortResult:
|
|
174
|
+
"""
|
|
175
|
+
Assign each point to its closest worker.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
points: Geographic points to assign
|
|
179
|
+
workers: Worker/centroid locations
|
|
180
|
+
distance: Distance metric
|
|
181
|
+
**kwargs: Additional distance-specific arguments
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
SortResult with point assignments
|
|
185
|
+
"""
|
|
186
|
+
# Load and standardize data
|
|
187
|
+
points_df = DataHandler.load_data(points)
|
|
188
|
+
workers_df = DataHandler.load_data(workers)
|
|
189
|
+
|
|
190
|
+
# Extract coordinates
|
|
191
|
+
workers_coords = _extract_coordinates(workers_df)
|
|
192
|
+
|
|
193
|
+
# Get assignments using core algorithm
|
|
194
|
+
labels = sort_by_distance_assignment(
|
|
195
|
+
points_df, workers_coords, distance_method=distance, **kwargs
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
# Create result DataFrame
|
|
199
|
+
result_df = points_df.copy()
|
|
200
|
+
result_df["assigned_worker"] = labels
|
|
201
|
+
|
|
202
|
+
# Add worker info
|
|
203
|
+
for i, label in enumerate(labels):
|
|
204
|
+
worker_row = workers_df.iloc[label]
|
|
205
|
+
for col in workers_df.columns:
|
|
206
|
+
result_df.loc[i, f"worker_{col}"] = worker_row[col]
|
|
207
|
+
|
|
208
|
+
return SortResult(
|
|
209
|
+
data=result_df,
|
|
210
|
+
distance_matrix=None,
|
|
211
|
+
metadata={
|
|
212
|
+
"method": "assign_to_closest",
|
|
213
|
+
"distance": distance,
|
|
214
|
+
"n_points": len(points_df),
|
|
215
|
+
"n_workers": len(workers_df),
|
|
216
|
+
},
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def _extract_coordinates(df: pd.DataFrame) -> np.ndarray:
|
|
221
|
+
"""Extract longitude/latitude coordinates from DataFrame."""
|
|
222
|
+
if "longitude" in df.columns and "latitude" in df.columns:
|
|
223
|
+
return df[["longitude", "latitude"]].values
|
|
224
|
+
else:
|
|
225
|
+
raise ValueError("DataFrame must contain 'longitude' and 'latitude' columns")
|