allocator 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
allocator/__init__.py ADDED
@@ -0,0 +1,154 @@
1
+ """
2
+ Allocator v1.0: Optimally Allocate Geographically Distributed Tasks
3
+
4
+ A modern Python package for geographic task allocation, clustering, and routing optimization.
5
+
6
+ Key Features:
7
+ - Cluster geographic points into balanced groups
8
+ - Find optimal routes through locations (TSP solving)
9
+ - Assign points to closest workers/centers
10
+ - Multiple distance metrics (euclidean, haversine, OSRM, Google Maps)
11
+ - Clean API with structured results and rich metadata
12
+ - Unified CLI with beautiful terminal output
13
+
14
+ Quick Start:
15
+ >>> import allocator
16
+ >>> import pandas as pd
17
+ >>>
18
+ >>> # Create sample data
19
+ >>> data = pd.DataFrame({
20
+ ... 'longitude': [101.0, 101.1, 101.2],
21
+ ... 'latitude': [13.0, 13.1, 13.2]
22
+ ... })
23
+ >>>
24
+ >>> # Cluster locations
25
+ >>> result = allocator.cluster(data, n_clusters=2)
26
+ >>> print(result.labels)
27
+ >>>
28
+ >>> # Find optimal route
29
+ >>> route = allocator.shortest_path(data, method='ortools')
30
+ >>> print(route.route)
31
+
32
+ For more examples: https://geosensing.github.io/allocator/
33
+ """
34
+
35
+ import logging
36
+ import sys
37
+
38
+ # Import modern API
39
+ from .api import (
40
+ ClusterResult,
41
+ ComparisonResult,
42
+ RouteResult,
43
+ SortResult,
44
+ assign_to_closest,
45
+ cluster,
46
+ distance_assignment,
47
+ kmeans,
48
+ shortest_path,
49
+ sort_by_distance,
50
+ tsp_christofides,
51
+ tsp_google,
52
+ tsp_ortools,
53
+ tsp_osrm,
54
+ )
55
+
56
+ # Import utilities for advanced users
57
+ from .distances import (
58
+ euclidean_distance_matrix,
59
+ get_distance_matrix,
60
+ google_distance_matrix,
61
+ haversine_distance_matrix,
62
+ latlon2xy,
63
+ osrm_distance_matrix,
64
+ xy2latlog,
65
+ )
66
+
67
+ # Import visualization functions
68
+ from .viz.plotting import plot_assignments, plot_clusters, plot_comparison, plot_route
69
+
70
+ # Version
71
+ __version__ = "1.0.0"
72
+
73
+ # Export public API
74
+ __all__ = [
75
+ # Result types
76
+ "ClusterResult",
77
+ "ComparisonResult",
78
+ "RouteResult",
79
+ "SortResult",
80
+ "assign_to_closest",
81
+ # Main functions
82
+ "cluster",
83
+ "distance_assignment",
84
+ "euclidean_distance_matrix",
85
+ # Distance utilities
86
+ "get_distance_matrix",
87
+ "get_logger",
88
+ "google_distance_matrix",
89
+ "haversine_distance_matrix",
90
+ # Specific methods
91
+ "kmeans",
92
+ "latlon2xy",
93
+ "osrm_distance_matrix",
94
+ "plot_assignments",
95
+ # Visualization
96
+ "plot_clusters",
97
+ "plot_comparison",
98
+ "plot_route",
99
+ # Logging utilities
100
+ "setup_logging",
101
+ "shortest_path",
102
+ "sort_by_distance",
103
+ "tsp_christofides",
104
+ "tsp_google",
105
+ "tsp_ortools",
106
+ "tsp_osrm",
107
+ "xy2latlog",
108
+ ]
109
+
110
+
111
+ def setup_logging(level=logging.INFO):
112
+ """
113
+ Set up logging configuration for the allocator package.
114
+
115
+ Args:
116
+ level: Logging level (DEBUG, INFO, WARNING, ERROR)
117
+ """
118
+ # Create formatter
119
+ formatter = logging.Formatter(
120
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
121
+ )
122
+
123
+ # Get root logger for allocator package
124
+ logger = logging.getLogger("allocator")
125
+ logger.setLevel(level)
126
+
127
+ # Remove existing handlers to avoid duplicates
128
+ for handler in logger.handlers[:]:
129
+ logger.removeHandler(handler)
130
+
131
+ # Console handler
132
+ console_handler = logging.StreamHandler(sys.stdout)
133
+ console_handler.setLevel(level)
134
+ console_handler.setFormatter(formatter)
135
+ logger.addHandler(console_handler)
136
+
137
+ return logger
138
+
139
+
140
+ def get_logger(name):
141
+ """
142
+ Get a logger instance for a specific module.
143
+
144
+ Args:
145
+ name: Module name (typically __name__)
146
+
147
+ Returns:
148
+ Logger instance
149
+ """
150
+ return logging.getLogger(f"allocator.{name}")
151
+
152
+
153
+ # Set up default logging
154
+ setup_logging()
@@ -0,0 +1,32 @@
1
+ """
2
+ Public API for allocator package.
3
+
4
+ This module provides a modern, Pythonic interface to the allocator package.
5
+ """
6
+
7
+ from .cluster import cluster, kmeans
8
+ from .distance import assign_to_closest, distance_assignment, sort_by_distance
9
+ from .route import shortest_path, tsp_christofides, tsp_google, tsp_ortools, tsp_osrm
10
+ from .types import ClusterResult, ComparisonResult, RouteResult, SortResult
11
+
12
+ __all__ = [
13
+ # Result types
14
+ "ClusterResult",
15
+ "ComparisonResult",
16
+ "RouteResult",
17
+ "SortResult",
18
+ # Distance assignment methods
19
+ "assign_to_closest",
20
+ # Main high-level functions
21
+ "cluster",
22
+ "distance_assignment",
23
+ # Specific clustering methods
24
+ "kmeans",
25
+ "shortest_path",
26
+ "sort_by_distance",
27
+ # Specific routing methods
28
+ "tsp_christofides",
29
+ "tsp_google",
30
+ "tsp_ortools",
31
+ "tsp_osrm",
32
+ ]
@@ -0,0 +1,126 @@
1
+ """
2
+ Modern clustering API for allocator package.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from pathlib import Path
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+
12
+ from ..core.algorithms import kmeans_cluster as _kmeans_cluster
13
+ from ..io.data_handler import DataHandler
14
+ from .types import ClusterResult
15
+
16
+
17
+ def cluster(
18
+ data: str | pd.DataFrame | np.ndarray | list,
19
+ n_clusters: int = 3,
20
+ method: str = "kmeans",
21
+ distance: str = "euclidean",
22
+ random_state: int | None = None,
23
+ **kwargs,
24
+ ) -> ClusterResult:
25
+ """
26
+ Cluster geographic data points.
27
+
28
+ Args:
29
+ data: Input data (file path, DataFrame, numpy array, or list)
30
+ n_clusters: Number of clusters to create
31
+ method: Clustering method ('kmeans')
32
+ distance: Distance metric ('euclidean', 'haversine', 'osrm', 'google')
33
+ random_state: Random seed for reproducibility
34
+ **kwargs: Additional arguments for specific methods
35
+
36
+ Returns:
37
+ ClusterResult with labels, centroids, and metadata
38
+
39
+ Example:
40
+ >>> result = cluster('data.csv', n_clusters=5, method='kmeans')
41
+ >>> print(result.labels) # Cluster assignments
42
+ >>> print(result.centroids) # Cluster centers
43
+ """
44
+ # Load and standardize data
45
+ df = DataHandler.load_data(data)
46
+
47
+ if method == "kmeans":
48
+ return kmeans(
49
+ df, n_clusters=n_clusters, distance=distance, random_state=random_state, **kwargs
50
+ )
51
+ else:
52
+ raise ValueError(f"Unknown clustering method: {method}. Available methods: 'kmeans'")
53
+
54
+
55
+ def kmeans(
56
+ data: pd.DataFrame | np.ndarray | list,
57
+ n_clusters: int = 3,
58
+ distance: str = "euclidean",
59
+ max_iter: int = 300,
60
+ random_state: int | None = None,
61
+ **kwargs,
62
+ ) -> ClusterResult:
63
+ """
64
+ K-means clustering of geographic data.
65
+
66
+ Args:
67
+ data: Input data as DataFrame or numpy array
68
+ n_clusters: Number of clusters
69
+ distance: Distance metric ('euclidean', 'haversine', 'osrm', 'google')
70
+ max_iter: Maximum iterations
71
+ random_state: Random seed for reproducibility
72
+ **kwargs: Additional distance-specific arguments
73
+
74
+ Returns:
75
+ ClusterResult with clustering information
76
+ """
77
+ # Ensure we have a DataFrame for output
78
+ if isinstance(data, np.ndarray):
79
+ df = DataHandler._from_numpy(data)
80
+ elif isinstance(data, list):
81
+ df = DataHandler._from_list(data)
82
+ elif isinstance(data, (str, Path)):
83
+ df = DataHandler.load_data(data)
84
+ else:
85
+ df = data.copy()
86
+
87
+ # Run clustering algorithm
88
+ result = _kmeans_cluster(
89
+ df,
90
+ n_clusters=n_clusters,
91
+ distance_method=distance,
92
+ max_iter=max_iter,
93
+ random_state=random_state,
94
+ **kwargs,
95
+ )
96
+
97
+ # Add cluster assignments to DataFrame
98
+ df_result = df.copy()
99
+ df_result["cluster"] = result["labels"]
100
+
101
+ # Calculate inertia (sum of squared distances to centroids)
102
+ inertia = None
103
+ if distance == "euclidean":
104
+ from ..distances import euclidean_distance_matrix
105
+
106
+ coords = df[["longitude", "latitude"]].values
107
+ distances = euclidean_distance_matrix(coords, result["centroids"])
108
+ inertia = np.sum(
109
+ [distances[i, result["labels"][i]] ** 2 for i in range(len(result["labels"]))]
110
+ )
111
+
112
+ return ClusterResult(
113
+ labels=result["labels"],
114
+ centroids=result["centroids"],
115
+ n_iter=result["iterations"],
116
+ inertia=inertia,
117
+ data=df_result,
118
+ converged=result["converged"],
119
+ metadata={
120
+ "method": "kmeans",
121
+ "distance": distance,
122
+ "n_clusters": n_clusters,
123
+ "max_iter": max_iter,
124
+ "random_state": random_state,
125
+ },
126
+ )
@@ -0,0 +1,225 @@
1
+ """
2
+ Modern distance-based assignment API for allocator package.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+
10
+ from ..core.algorithms import sort_by_distance_assignment
11
+ from ..io.data_handler import DataHandler
12
+ from .types import SortResult
13
+
14
+
15
+ def sort_by_distance(
16
+ points: str | pd.DataFrame | np.ndarray | list,
17
+ workers: str | pd.DataFrame | np.ndarray | list,
18
+ by_worker: bool = False,
19
+ distance: str = "euclidean",
20
+ **kwargs,
21
+ ) -> SortResult:
22
+ """
23
+ Sort points by distance to workers or vice versa.
24
+
25
+ Args:
26
+ points: Geographic points to assign (file path, DataFrame, numpy array, or list)
27
+ workers: Worker/centroid locations (file path, DataFrame, numpy array, or list)
28
+ by_worker: If True, sort points by worker; if False, sort workers by point
29
+ distance: Distance metric ('euclidean', 'haversine', 'osrm', 'google')
30
+ **kwargs: Additional distance-specific arguments
31
+
32
+ Returns:
33
+ SortResult with assignments and distance information
34
+
35
+ Example:
36
+ >>> result = sort_by_distance('points.csv', 'workers.csv')
37
+ >>> print(result.data) # Points with worker assignments
38
+ """
39
+ # Load and standardize data
40
+ points_df = DataHandler.load_data(points)
41
+ workers_df = DataHandler.load_data(workers)
42
+
43
+ if by_worker:
44
+ return _sort_points_by_worker(points_df, workers_df, distance=distance, **kwargs)
45
+ else:
46
+ return _sort_workers_by_point(points_df, workers_df, distance=distance, **kwargs)
47
+
48
+
49
+ def _sort_workers_by_point(
50
+ points_df: pd.DataFrame, workers_df: pd.DataFrame, distance: str = "euclidean", **kwargs
51
+ ) -> SortResult:
52
+ """Sort workers by distance to each point (default behavior)."""
53
+ from ..distances import get_distance_matrix
54
+
55
+ # Extract coordinates
56
+ points_coords = _extract_coordinates(points_df)
57
+ workers_coords = _extract_coordinates(workers_df)
58
+
59
+ # Calculate distance matrix
60
+ distance_matrix = get_distance_matrix(points_coords, workers_coords, method=distance, **kwargs)
61
+
62
+ # Create result DataFrame
63
+ result_data = []
64
+ for i, (_, point_row) in enumerate(points_df.iterrows()):
65
+ point_distances = distance_matrix[i, :]
66
+ sorted_worker_indices = np.argsort(point_distances)
67
+
68
+ for rank, worker_idx in enumerate(sorted_worker_indices):
69
+ worker_row = workers_df.iloc[worker_idx]
70
+ result_row = {
71
+ "point_id": i,
72
+ "worker_id": worker_idx,
73
+ "rank": rank + 1,
74
+ "distance": point_distances[worker_idx],
75
+ }
76
+ # Add point data
77
+ for col in points_df.columns:
78
+ result_row[f"point_{col}"] = point_row[col]
79
+ # Add worker data
80
+ for col in workers_df.columns:
81
+ result_row[f"worker_{col}"] = worker_row[col]
82
+ result_data.append(result_row)
83
+
84
+ result_df = pd.DataFrame(result_data)
85
+
86
+ return SortResult(
87
+ data=result_df,
88
+ distance_matrix=distance_matrix,
89
+ metadata={
90
+ "method": "sort_workers_by_point",
91
+ "distance": distance,
92
+ "n_points": len(points_df),
93
+ "n_workers": len(workers_df),
94
+ },
95
+ )
96
+
97
+
98
+ def _sort_points_by_worker(
99
+ points_df: pd.DataFrame, workers_df: pd.DataFrame, distance: str = "euclidean", **kwargs
100
+ ) -> SortResult:
101
+ """Sort points by distance to each worker."""
102
+ from ..distances import get_distance_matrix
103
+
104
+ # Extract coordinates
105
+ points_coords = _extract_coordinates(points_df)
106
+ workers_coords = _extract_coordinates(workers_df)
107
+
108
+ # Calculate distance matrix
109
+ distance_matrix = get_distance_matrix(points_coords, workers_coords, method=distance, **kwargs)
110
+
111
+ # Create result DataFrame
112
+ result_data = []
113
+ for j, (_, worker_row) in enumerate(workers_df.iterrows()):
114
+ worker_distances = distance_matrix[:, j]
115
+ sorted_point_indices = np.argsort(worker_distances)
116
+
117
+ for rank, point_idx in enumerate(sorted_point_indices):
118
+ point_row = points_df.iloc[point_idx]
119
+ result_row = {
120
+ "worker_id": j,
121
+ "point_id": point_idx,
122
+ "rank": rank + 1,
123
+ "distance": worker_distances[point_idx],
124
+ }
125
+ # Add worker data
126
+ for col in workers_df.columns:
127
+ result_row[f"worker_{col}"] = worker_row[col]
128
+ # Add point data
129
+ for col in points_df.columns:
130
+ result_row[f"point_{col}"] = point_row[col]
131
+ result_data.append(result_row)
132
+
133
+ result_df = pd.DataFrame(result_data)
134
+
135
+ return SortResult(
136
+ data=result_df,
137
+ distance_matrix=distance_matrix,
138
+ metadata={
139
+ "method": "sort_points_by_worker",
140
+ "distance": distance,
141
+ "n_points": len(points_df),
142
+ "n_workers": len(workers_df),
143
+ },
144
+ )
145
+
146
+
147
+ def distance_assignment(
148
+ points: str | pd.DataFrame | np.ndarray | list,
149
+ workers: str | pd.DataFrame | np.ndarray | list,
150
+ distance: str = "euclidean",
151
+ **kwargs,
152
+ ) -> SortResult:
153
+ """
154
+ Assign each point to its closest worker (alias for assign_to_closest).
155
+
156
+ Args:
157
+ points: Geographic points to assign
158
+ workers: Worker/centroid locations
159
+ distance: Distance metric
160
+ **kwargs: Additional distance-specific arguments
161
+
162
+ Returns:
163
+ SortResult with point assignments
164
+ """
165
+ return assign_to_closest(points, workers, distance=distance, **kwargs)
166
+
167
+
168
+ def assign_to_closest(
169
+ points: str | pd.DataFrame | np.ndarray | list,
170
+ workers: str | pd.DataFrame | np.ndarray | list,
171
+ distance: str = "euclidean",
172
+ **kwargs,
173
+ ) -> SortResult:
174
+ """
175
+ Assign each point to its closest worker.
176
+
177
+ Args:
178
+ points: Geographic points to assign
179
+ workers: Worker/centroid locations
180
+ distance: Distance metric
181
+ **kwargs: Additional distance-specific arguments
182
+
183
+ Returns:
184
+ SortResult with point assignments
185
+ """
186
+ # Load and standardize data
187
+ points_df = DataHandler.load_data(points)
188
+ workers_df = DataHandler.load_data(workers)
189
+
190
+ # Extract coordinates
191
+ workers_coords = _extract_coordinates(workers_df)
192
+
193
+ # Get assignments using core algorithm
194
+ labels = sort_by_distance_assignment(
195
+ points_df, workers_coords, distance_method=distance, **kwargs
196
+ )
197
+
198
+ # Create result DataFrame
199
+ result_df = points_df.copy()
200
+ result_df["assigned_worker"] = labels
201
+
202
+ # Add worker info
203
+ for i, label in enumerate(labels):
204
+ worker_row = workers_df.iloc[label]
205
+ for col in workers_df.columns:
206
+ result_df.loc[i, f"worker_{col}"] = worker_row[col]
207
+
208
+ return SortResult(
209
+ data=result_df,
210
+ distance_matrix=None,
211
+ metadata={
212
+ "method": "assign_to_closest",
213
+ "distance": distance,
214
+ "n_points": len(points_df),
215
+ "n_workers": len(workers_df),
216
+ },
217
+ )
218
+
219
+
220
+ def _extract_coordinates(df: pd.DataFrame) -> np.ndarray:
221
+ """Extract longitude/latitude coordinates from DataFrame."""
222
+ if "longitude" in df.columns and "latitude" in df.columns:
223
+ return df[["longitude", "latitude"]].values
224
+ else:
225
+ raise ValueError("DataFrame must contain 'longitude' and 'latitude' columns")