allocator 1.0.0__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {allocator-1.0.0 → allocator-1.2.0}/PKG-INFO +61 -31
- allocator-1.2.0/README.md +87 -0
- {allocator-1.0.0 → allocator-1.2.0}/allocator/__init__.py +20 -18
- {allocator-1.0.0 → allocator-1.2.0}/allocator/api/__init__.py +15 -1
- {allocator-1.0.0 → allocator-1.2.0}/allocator/api/cluster.py +5 -21
- {allocator-1.0.0 → allocator-1.2.0}/allocator/api/distance.py +0 -2
- allocator-1.2.0/allocator/api/itinerary.py +185 -0
- allocator-1.2.0/allocator/api/random_walk.py +139 -0
- {allocator-1.0.0 → allocator-1.2.0}/allocator/api/route.py +0 -2
- {allocator-1.0.0 → allocator-1.2.0}/allocator/api/types.py +23 -2
- allocator-1.2.0/allocator/cli/itinerary_cmd.py +147 -0
- {allocator-1.0.0 → allocator-1.2.0}/allocator/cli/main.py +39 -6
- allocator-1.2.0/allocator/cli/random_walk_cmd.py +128 -0
- allocator-1.2.0/allocator/core/__init__.py +13 -0
- {allocator-1.0.0 → allocator-1.2.0}/allocator/core/algorithms.py +26 -86
- allocator-1.2.0/allocator/core/itinerary.py +479 -0
- allocator-1.2.0/allocator/core/random_walk.py +295 -0
- {allocator-1.0.0 → allocator-1.2.0}/allocator/core/routing.py +0 -2
- {allocator-1.0.0 → allocator-1.2.0}/allocator/distances/euclidean.py +0 -2
- {allocator-1.0.0 → allocator-1.2.0}/allocator/distances/external_apis.py +0 -2
- {allocator-1.0.0 → allocator-1.2.0}/allocator/distances/factory.py +0 -2
- {allocator-1.0.0 → allocator-1.2.0}/allocator/distances/haversine.py +0 -2
- {allocator-1.0.0 → allocator-1.2.0}/allocator/io/data_handler.py +0 -2
- allocator-1.2.0/allocator/stats/__init__.py +14 -0
- allocator-1.2.0/allocator/stats/design_effect.py +128 -0
- {allocator-1.0.0 → allocator-1.2.0}/allocator/utils.py +0 -2
- {allocator-1.0.0 → allocator-1.2.0}/allocator/viz/__init__.py +4 -0
- allocator-1.2.0/allocator/viz/plotting.py +544 -0
- allocator-1.2.0/allocator/vulture_whitelist.py +5 -0
- {allocator-1.0.0 → allocator-1.2.0}/pyproject.toml +57 -5
- allocator-1.0.0/README.md +0 -57
- allocator-1.0.0/allocator/core/__init__.py +0 -1
- allocator-1.0.0/allocator/viz/plotting.py +0 -206
- {allocator-1.0.0 → allocator-1.2.0}/allocator/cli/__init__.py +0 -0
- {allocator-1.0.0 → allocator-1.2.0}/allocator/cli/cluster_cmd.py +0 -0
- {allocator-1.0.0 → allocator-1.2.0}/allocator/cli/route_cmd.py +0 -0
- {allocator-1.0.0 → allocator-1.2.0}/allocator/distances/__init__.py +0 -0
- {allocator-1.0.0 → allocator-1.2.0}/allocator/io/__init__.py +0 -0
- {allocator-1.0.0 → allocator-1.2.0}/allocator/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: allocator
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: Modern Python package for geographic task allocation, clustering, and routing optimization
|
|
5
5
|
Keywords: geographic,allocation,clustering,routing,optimization,tsp,kmeans,geospatial,logistics,shortest-path
|
|
6
6
|
Author: Suriyan Laohaprapanon, Gaurav Sood
|
|
@@ -23,6 +23,7 @@ Classifier: Typing :: Typed
|
|
|
23
23
|
Requires-Dist: pandas>=2.0.0
|
|
24
24
|
Requires-Dist: numpy>=1.24.0
|
|
25
25
|
Requires-Dist: scikit-learn>=1.3.0
|
|
26
|
+
Requires-Dist: scipy>=1.10.0
|
|
26
27
|
Requires-Dist: utm>=0.7.0
|
|
27
28
|
Requires-Dist: haversine>=2.8.0
|
|
28
29
|
Requires-Dist: networkx>=3.0
|
|
@@ -33,7 +34,6 @@ Requires-Dist: googlemaps>=4.6.0
|
|
|
33
34
|
Requires-Dist: ortools>=9.5.0
|
|
34
35
|
Requires-Dist: matplotlib>=3.6.0
|
|
35
36
|
Requires-Dist: seaborn>=0.13.2
|
|
36
|
-
Requires-Dist: scipy>=1.10.0 ; extra == 'algorithms'
|
|
37
37
|
Requires-Dist: christofides>=1.0.0 ; extra == 'algorithms'
|
|
38
38
|
Requires-Dist: allocator[algorithms,geo] ; extra == 'all'
|
|
39
39
|
Requires-Dist: allocator[all,dev,test,docs] ; extra == 'complete'
|
|
@@ -58,12 +58,12 @@ Requires-Dist: hypothesis>=6.82.0 ; extra == 'test'
|
|
|
58
58
|
Maintainer: Gaurav Sood
|
|
59
59
|
Maintainer-email: Gaurav Sood <gsood07@gmail.com>
|
|
60
60
|
Requires-Python: >=3.11
|
|
61
|
-
Project-URL: Bug Reports, https://github.com/geosensing/allocator/issues
|
|
62
|
-
Project-URL: Changelog, https://github.com/geosensing/allocator/blob/main/CHANGELOG.md
|
|
63
|
-
Project-URL: Documentation, https://geosensing.github.io/allocator/
|
|
64
61
|
Project-URL: Homepage, https://github.com/geosensing/allocator
|
|
62
|
+
Project-URL: Documentation, https://geosensing.github.io/allocator/
|
|
65
63
|
Project-URL: Repository, https://github.com/geosensing/allocator.git
|
|
64
|
+
Project-URL: Bug Reports, https://github.com/geosensing/allocator/issues
|
|
66
65
|
Project-URL: Source Code, https://github.com/geosensing/allocator
|
|
66
|
+
Project-URL: Changelog, https://github.com/geosensing/allocator/blob/main/CHANGELOG.md
|
|
67
67
|
Provides-Extra: algorithms
|
|
68
68
|
Provides-Extra: all
|
|
69
69
|
Provides-Extra: complete
|
|
@@ -73,60 +73,90 @@ Provides-Extra: geo
|
|
|
73
73
|
Provides-Extra: test
|
|
74
74
|
Description-Content-Type: text/markdown
|
|
75
75
|
|
|
76
|
-
# allocator
|
|
76
|
+
# allocator
|
|
77
77
|
|
|
78
78
|
[](https://pypi.python.org/pypi/allocator)
|
|
79
79
|
[](https://pepy.tech/project/allocator)
|
|
80
80
|
[](https://github.com/geosensing/allocator/actions/workflows/ci.yml)
|
|
81
81
|
[](https://geosensing.github.io/allocator/)
|
|
82
82
|
|
|
83
|
-
|
|
83
|
+
Field teams, delivery services, and survey organizations waste time and money on inefficient routes. When you have 100+ locations to visit, manual planning fails. Allocator solves this.
|
|
84
84
|
|
|
85
|
-
##
|
|
85
|
+
## What It Does
|
|
86
86
|
|
|
87
|
-
-
|
|
88
|
-
-
|
|
89
|
-
-
|
|
90
|
-
-
|
|
91
|
-
- **📦 Modern API**: Clean Python interface + unified CLI
|
|
87
|
+
- **Cluster**: Divide locations into balanced work zones
|
|
88
|
+
- **Route**: Find the shortest path through locations (TSP)
|
|
89
|
+
- **Assign**: Match locations to nearest workers or depots
|
|
90
|
+
- **Random Walk**: Generate survey itineraries on road networks
|
|
92
91
|
|
|
93
|
-
##
|
|
92
|
+
## Install
|
|
94
93
|
|
|
95
94
|
```bash
|
|
96
95
|
pip install allocator
|
|
97
96
|
```
|
|
98
97
|
|
|
98
|
+
## Python API
|
|
99
|
+
|
|
100
|
+
### Cluster locations into zones
|
|
101
|
+
|
|
99
102
|
```python
|
|
100
103
|
import allocator
|
|
101
104
|
import pandas as pd
|
|
102
105
|
|
|
103
|
-
# Geographic locations
|
|
104
106
|
locations = pd.DataFrame({
|
|
105
|
-
'longitude': [100.
|
|
106
|
-
'latitude': [13.
|
|
107
|
+
'longitude': [100.501, 100.506, 100.510, 100.515, 100.520],
|
|
108
|
+
'latitude': [13.756, 13.759, 13.763, 13.768, 13.772]
|
|
107
109
|
})
|
|
108
110
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
+
result = allocator.cluster(locations, n_clusters=2)
|
|
112
|
+
print(result.labels) # [0 0 0 1 1]
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Find shortest route
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
route = allocator.shortest_path(locations, method='ortools')
|
|
119
|
+
print(route.route) # [0, 1, 2, 4, 3, 0]
|
|
120
|
+
```
|
|
111
121
|
|
|
112
|
-
|
|
113
|
-
route = allocator.shortest_path(locations)
|
|
122
|
+
### Assign to nearest depot
|
|
114
123
|
|
|
115
|
-
|
|
116
|
-
|
|
124
|
+
```python
|
|
125
|
+
depots = pd.DataFrame({
|
|
117
126
|
'longitude': [100.50, 100.52],
|
|
118
127
|
'latitude': [13.75, 13.77]
|
|
119
128
|
})
|
|
120
|
-
|
|
129
|
+
|
|
130
|
+
assignments = allocator.assign_to_closest(locations, depots)
|
|
131
|
+
print(assignments.data['assigned_worker'].tolist()) # [0, 0, 1, 1, 1]
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### Generate random walk itineraries
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
import networkx as nx
|
|
138
|
+
|
|
139
|
+
# Load road network graph (from OSMnx or similar)
|
|
140
|
+
G = nx.read_graphml("road_network.graphml")
|
|
141
|
+
|
|
142
|
+
result = allocator.random_walk(G, n_walks=10, walk_length_m=5000)
|
|
143
|
+
print(result.data) # DataFrame with waypoints
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## CLI
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
allocator cluster kmeans locations.csv -n 5 -o zones.csv
|
|
150
|
+
allocator route tsp locations.csv --method ortools -o route.csv
|
|
151
|
+
allocator sort locations.csv --workers depots.csv -o assignments.csv
|
|
152
|
+
allocator random-walk road_network.graphml -n 10 -l 5000 -o waypoints.csv
|
|
121
153
|
```
|
|
122
154
|
|
|
123
|
-
## Documentation
|
|
155
|
+
## Documentation
|
|
124
156
|
|
|
125
|
-
-
|
|
126
|
-
-
|
|
127
|
-
- **🔧 [API Reference](https://geosensing.github.io/allocator/api/clustering.html)**
|
|
128
|
-
- **💡 [Real-World Examples](https://geosensing.github.io/allocator/examples/overview.html)**
|
|
157
|
+
- [Full Documentation](https://geosensing.github.io/allocator/)
|
|
158
|
+
- [API Reference](https://geosensing.github.io/allocator/api/clustering.html)
|
|
129
159
|
|
|
130
|
-
## License
|
|
160
|
+
## License
|
|
131
161
|
|
|
132
|
-
MIT
|
|
162
|
+
MIT
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# allocator
|
|
2
|
+
|
|
3
|
+
[](https://pypi.python.org/pypi/allocator)
|
|
4
|
+
[](https://pepy.tech/project/allocator)
|
|
5
|
+
[](https://github.com/geosensing/allocator/actions/workflows/ci.yml)
|
|
6
|
+
[](https://geosensing.github.io/allocator/)
|
|
7
|
+
|
|
8
|
+
Field teams, delivery services, and survey organizations waste time and money on inefficient routes. When you have 100+ locations to visit, manual planning fails. Allocator solves this.
|
|
9
|
+
|
|
10
|
+
## What It Does
|
|
11
|
+
|
|
12
|
+
- **Cluster**: Divide locations into balanced work zones
|
|
13
|
+
- **Route**: Find the shortest path through locations (TSP)
|
|
14
|
+
- **Assign**: Match locations to nearest workers or depots
|
|
15
|
+
- **Random Walk**: Generate survey itineraries on road networks
|
|
16
|
+
|
|
17
|
+
## Install
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install allocator
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Python API
|
|
24
|
+
|
|
25
|
+
### Cluster locations into zones
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
import allocator
|
|
29
|
+
import pandas as pd
|
|
30
|
+
|
|
31
|
+
locations = pd.DataFrame({
|
|
32
|
+
'longitude': [100.501, 100.506, 100.510, 100.515, 100.520],
|
|
33
|
+
'latitude': [13.756, 13.759, 13.763, 13.768, 13.772]
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
result = allocator.cluster(locations, n_clusters=2)
|
|
37
|
+
print(result.labels) # [0 0 0 1 1]
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### Find shortest route
|
|
41
|
+
|
|
42
|
+
```python
|
|
43
|
+
route = allocator.shortest_path(locations, method='ortools')
|
|
44
|
+
print(route.route) # [0, 1, 2, 4, 3, 0]
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### Assign to nearest depot
|
|
48
|
+
|
|
49
|
+
```python
|
|
50
|
+
depots = pd.DataFrame({
|
|
51
|
+
'longitude': [100.50, 100.52],
|
|
52
|
+
'latitude': [13.75, 13.77]
|
|
53
|
+
})
|
|
54
|
+
|
|
55
|
+
assignments = allocator.assign_to_closest(locations, depots)
|
|
56
|
+
print(assignments.data['assigned_worker'].tolist()) # [0, 0, 1, 1, 1]
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### Generate random walk itineraries
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
import networkx as nx
|
|
63
|
+
|
|
64
|
+
# Load road network graph (from OSMnx or similar)
|
|
65
|
+
G = nx.read_graphml("road_network.graphml")
|
|
66
|
+
|
|
67
|
+
result = allocator.random_walk(G, n_walks=10, walk_length_m=5000)
|
|
68
|
+
print(result.data) # DataFrame with waypoints
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## CLI
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
allocator cluster kmeans locations.csv -n 5 -o zones.csv
|
|
75
|
+
allocator route tsp locations.csv --method ortools -o route.csv
|
|
76
|
+
allocator sort locations.csv --workers depots.csv -o assignments.csv
|
|
77
|
+
allocator random-walk road_network.graphml -n 10 -l 5000 -o waypoints.csv
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## Documentation
|
|
81
|
+
|
|
82
|
+
- [Full Documentation](https://geosensing.github.io/allocator/)
|
|
83
|
+
- [API Reference](https://geosensing.github.io/allocator/api/clustering.html)
|
|
84
|
+
|
|
85
|
+
## License
|
|
86
|
+
|
|
87
|
+
MIT
|
|
@@ -34,17 +34,25 @@ For more examples: https://geosensing.github.io/allocator/
|
|
|
34
34
|
|
|
35
35
|
import logging
|
|
36
36
|
import sys
|
|
37
|
+
import warnings
|
|
38
|
+
|
|
39
|
+
warnings.filterwarnings("ignore", message=".*SwigPyPacked.*")
|
|
40
|
+
warnings.filterwarnings("ignore", message=".*SwigPyObject.*")
|
|
41
|
+
warnings.filterwarnings("ignore", message=".*swigvarlink.*")
|
|
37
42
|
|
|
38
|
-
# Import modern API
|
|
39
43
|
from .api import (
|
|
40
44
|
ClusterResult,
|
|
41
45
|
ComparisonResult,
|
|
46
|
+
ItineraryResult,
|
|
47
|
+
RandomWalkResult,
|
|
42
48
|
RouteResult,
|
|
43
49
|
SortResult,
|
|
44
50
|
assign_to_closest,
|
|
45
51
|
cluster,
|
|
52
|
+
create_itineraries,
|
|
46
53
|
distance_assignment,
|
|
47
54
|
kmeans,
|
|
55
|
+
random_walk,
|
|
48
56
|
shortest_path,
|
|
49
57
|
sort_by_distance,
|
|
50
58
|
tsp_christofides,
|
|
@@ -52,8 +60,6 @@ from .api import (
|
|
|
52
60
|
tsp_ortools,
|
|
53
61
|
tsp_osrm,
|
|
54
62
|
)
|
|
55
|
-
|
|
56
|
-
# Import utilities for advanced users
|
|
57
63
|
from .distances import (
|
|
58
64
|
euclidean_distance_matrix,
|
|
59
65
|
get_distance_matrix,
|
|
@@ -63,26 +69,25 @@ from .distances import (
|
|
|
63
69
|
osrm_distance_matrix,
|
|
64
70
|
xy2latlog,
|
|
65
71
|
)
|
|
66
|
-
|
|
67
|
-
# Import visualization functions
|
|
68
72
|
from .viz.plotting import plot_assignments, plot_clusters, plot_comparison, plot_route
|
|
69
73
|
|
|
70
|
-
|
|
71
|
-
__version__ = "1.0.0"
|
|
74
|
+
__version__ = "1.2.0"
|
|
72
75
|
|
|
73
|
-
# Export public API
|
|
74
76
|
__all__ = [
|
|
75
77
|
# Result types
|
|
76
78
|
"ClusterResult",
|
|
77
79
|
"ComparisonResult",
|
|
80
|
+
"ItineraryResult",
|
|
81
|
+
"RandomWalkResult",
|
|
78
82
|
"RouteResult",
|
|
79
83
|
"SortResult",
|
|
80
|
-
"assign_to_closest",
|
|
81
84
|
# Main functions
|
|
85
|
+
"assign_to_closest",
|
|
82
86
|
"cluster",
|
|
87
|
+
"create_itineraries",
|
|
83
88
|
"distance_assignment",
|
|
84
|
-
"euclidean_distance_matrix",
|
|
85
89
|
# Distance utilities
|
|
90
|
+
"euclidean_distance_matrix",
|
|
86
91
|
"get_distance_matrix",
|
|
87
92
|
"get_logger",
|
|
88
93
|
"google_distance_matrix",
|
|
@@ -91,11 +96,13 @@ __all__ = [
|
|
|
91
96
|
"kmeans",
|
|
92
97
|
"latlon2xy",
|
|
93
98
|
"osrm_distance_matrix",
|
|
94
|
-
"plot_assignments",
|
|
95
99
|
# Visualization
|
|
100
|
+
"plot_assignments",
|
|
96
101
|
"plot_clusters",
|
|
97
102
|
"plot_comparison",
|
|
98
103
|
"plot_route",
|
|
104
|
+
# Random walk
|
|
105
|
+
"random_walk",
|
|
99
106
|
# Logging utilities
|
|
100
107
|
"setup_logging",
|
|
101
108
|
"shortest_path",
|
|
@@ -108,27 +115,23 @@ __all__ = [
|
|
|
108
115
|
]
|
|
109
116
|
|
|
110
117
|
|
|
111
|
-
def setup_logging(level=logging.INFO):
|
|
118
|
+
def setup_logging(level: int = logging.INFO) -> logging.Logger:
|
|
112
119
|
"""
|
|
113
120
|
Set up logging configuration for the allocator package.
|
|
114
121
|
|
|
115
122
|
Args:
|
|
116
123
|
level: Logging level (DEBUG, INFO, WARNING, ERROR)
|
|
117
124
|
"""
|
|
118
|
-
# Create formatter
|
|
119
125
|
formatter = logging.Formatter(
|
|
120
126
|
"%(asctime)s - %(name)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
|
|
121
127
|
)
|
|
122
128
|
|
|
123
|
-
# Get root logger for allocator package
|
|
124
129
|
logger = logging.getLogger("allocator")
|
|
125
130
|
logger.setLevel(level)
|
|
126
131
|
|
|
127
|
-
# Remove existing handlers to avoid duplicates
|
|
128
132
|
for handler in logger.handlers[:]:
|
|
129
133
|
logger.removeHandler(handler)
|
|
130
134
|
|
|
131
|
-
# Console handler
|
|
132
135
|
console_handler = logging.StreamHandler(sys.stdout)
|
|
133
136
|
console_handler.setLevel(level)
|
|
134
137
|
console_handler.setFormatter(formatter)
|
|
@@ -137,7 +140,7 @@ def setup_logging(level=logging.INFO):
|
|
|
137
140
|
return logger
|
|
138
141
|
|
|
139
142
|
|
|
140
|
-
def get_logger(name):
|
|
143
|
+
def get_logger(name: str) -> logging.Logger:
|
|
141
144
|
"""
|
|
142
145
|
Get a logger instance for a specific module.
|
|
143
146
|
|
|
@@ -150,5 +153,4 @@ def get_logger(name):
|
|
|
150
153
|
return logging.getLogger(f"allocator.{name}")
|
|
151
154
|
|
|
152
155
|
|
|
153
|
-
# Set up default logging
|
|
154
156
|
setup_logging()
|
|
@@ -6,22 +6,36 @@ This module provides a modern, Pythonic interface to the allocator package.
|
|
|
6
6
|
|
|
7
7
|
from .cluster import cluster, kmeans
|
|
8
8
|
from .distance import assign_to_closest, distance_assignment, sort_by_distance
|
|
9
|
+
from .itinerary import create_itineraries
|
|
10
|
+
from .random_walk import random_walk
|
|
9
11
|
from .route import shortest_path, tsp_christofides, tsp_google, tsp_ortools, tsp_osrm
|
|
10
|
-
from .types import
|
|
12
|
+
from .types import (
|
|
13
|
+
ClusterResult,
|
|
14
|
+
ComparisonResult,
|
|
15
|
+
ItineraryResult,
|
|
16
|
+
RandomWalkResult,
|
|
17
|
+
RouteResult,
|
|
18
|
+
SortResult,
|
|
19
|
+
)
|
|
11
20
|
|
|
12
21
|
__all__ = [
|
|
13
22
|
# Result types
|
|
14
23
|
"ClusterResult",
|
|
15
24
|
"ComparisonResult",
|
|
25
|
+
"ItineraryResult",
|
|
26
|
+
"RandomWalkResult",
|
|
16
27
|
"RouteResult",
|
|
17
28
|
"SortResult",
|
|
18
29
|
# Distance assignment methods
|
|
19
30
|
"assign_to_closest",
|
|
20
31
|
# Main high-level functions
|
|
21
32
|
"cluster",
|
|
33
|
+
"create_itineraries",
|
|
22
34
|
"distance_assignment",
|
|
23
35
|
# Specific clustering methods
|
|
24
36
|
"kmeans",
|
|
37
|
+
# Random walk
|
|
38
|
+
"random_walk",
|
|
25
39
|
"shortest_path",
|
|
26
40
|
"sort_by_distance",
|
|
27
41
|
# Specific routing methods
|
|
@@ -2,8 +2,6 @@
|
|
|
2
2
|
Modern clustering API for allocator package.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
from __future__ import annotations
|
|
6
|
-
|
|
7
5
|
from pathlib import Path
|
|
8
6
|
|
|
9
7
|
import numpy as np
|
|
@@ -66,15 +64,16 @@ def kmeans(
|
|
|
66
64
|
Args:
|
|
67
65
|
data: Input data as DataFrame or numpy array
|
|
68
66
|
n_clusters: Number of clusters
|
|
69
|
-
distance: Distance metric (
|
|
67
|
+
distance: Distance metric (stored in metadata only; clustering uses Euclidean)
|
|
70
68
|
max_iter: Maximum iterations
|
|
71
69
|
random_state: Random seed for reproducibility
|
|
72
|
-
**kwargs: Additional
|
|
70
|
+
**kwargs: Additional arguments (unused, kept for API compatibility)
|
|
73
71
|
|
|
74
72
|
Returns:
|
|
75
73
|
ClusterResult with clustering information
|
|
76
74
|
"""
|
|
77
|
-
|
|
75
|
+
del kwargs
|
|
76
|
+
|
|
78
77
|
if isinstance(data, np.ndarray):
|
|
79
78
|
df = DataHandler._from_numpy(data)
|
|
80
79
|
elif isinstance(data, list):
|
|
@@ -84,36 +83,21 @@ def kmeans(
|
|
|
84
83
|
else:
|
|
85
84
|
df = data.copy()
|
|
86
85
|
|
|
87
|
-
# Run clustering algorithm
|
|
88
86
|
result = _kmeans_cluster(
|
|
89
87
|
df,
|
|
90
88
|
n_clusters=n_clusters,
|
|
91
|
-
distance_method=distance,
|
|
92
89
|
max_iter=max_iter,
|
|
93
90
|
random_state=random_state,
|
|
94
|
-
**kwargs,
|
|
95
91
|
)
|
|
96
92
|
|
|
97
|
-
# Add cluster assignments to DataFrame
|
|
98
93
|
df_result = df.copy()
|
|
99
94
|
df_result["cluster"] = result["labels"]
|
|
100
95
|
|
|
101
|
-
# Calculate inertia (sum of squared distances to centroids)
|
|
102
|
-
inertia = None
|
|
103
|
-
if distance == "euclidean":
|
|
104
|
-
from ..distances import euclidean_distance_matrix
|
|
105
|
-
|
|
106
|
-
coords = df[["longitude", "latitude"]].values
|
|
107
|
-
distances = euclidean_distance_matrix(coords, result["centroids"])
|
|
108
|
-
inertia = np.sum(
|
|
109
|
-
[distances[i, result["labels"][i]] ** 2 for i in range(len(result["labels"]))]
|
|
110
|
-
)
|
|
111
|
-
|
|
112
96
|
return ClusterResult(
|
|
113
97
|
labels=result["labels"],
|
|
114
98
|
centroids=result["centroids"],
|
|
115
99
|
n_iter=result["iterations"],
|
|
116
|
-
inertia=inertia,
|
|
100
|
+
inertia=result["inertia"],
|
|
117
101
|
data=df_result,
|
|
118
102
|
converged=result["converged"],
|
|
119
103
|
metadata={
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""
|
|
2
|
+
API for itinerary generation.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pandas as pd
|
|
9
|
+
|
|
10
|
+
from ..core.itinerary import (
|
|
11
|
+
greedy_grow_itineraries,
|
|
12
|
+
kmeans_tsp_itineraries,
|
|
13
|
+
random_partition_itineraries,
|
|
14
|
+
round_robin_itineraries,
|
|
15
|
+
softmax_greedy_itineraries,
|
|
16
|
+
stratified_itineraries,
|
|
17
|
+
)
|
|
18
|
+
from ..distances import get_distance_matrix
|
|
19
|
+
from ..io.data_handler import DataHandler
|
|
20
|
+
from .types import BUDGET_METHODS, PARTITION_METHODS, VALID_METHODS, ItineraryResult
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def create_itineraries(
|
|
24
|
+
data: str | pd.DataFrame | np.ndarray | list[Any],
|
|
25
|
+
max_distance: float | None = None,
|
|
26
|
+
n_itineraries: int | None = None,
|
|
27
|
+
method: str = "greedy_nn",
|
|
28
|
+
distance: str = "haversine",
|
|
29
|
+
start_method: str = "random",
|
|
30
|
+
temperature: float = 0.1,
|
|
31
|
+
n_strata: int = 4,
|
|
32
|
+
optimize_routes: bool = True,
|
|
33
|
+
seed: int | None = None,
|
|
34
|
+
**kwargs: Any,
|
|
35
|
+
) -> ItineraryResult:
|
|
36
|
+
"""
|
|
37
|
+
Create multiple itineraries from points with a distance budget per itinerary.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
data: Input data (file path, DataFrame, numpy array, or list)
|
|
41
|
+
max_distance: Maximum total distance per itinerary (in meters for haversine/osrm/google).
|
|
42
|
+
Required for greedy_nn and softmax_greedy methods.
|
|
43
|
+
n_itineraries: Number of itineraries to create. Required for random_partition,
|
|
44
|
+
stratified, round_robin, and kmeans_tsp methods.
|
|
45
|
+
method: Itinerary generation method:
|
|
46
|
+
- "greedy_nn": Greedy nearest-neighbor (default, most efficient)
|
|
47
|
+
- "random_partition": Random assignment (theoretical baseline)
|
|
48
|
+
- "stratified": Stratified by distance from centroid
|
|
49
|
+
- "round_robin": Round-robin assignment
|
|
50
|
+
- "softmax_greedy": Greedy with softmax sampling
|
|
51
|
+
- "kmeans_tsp": K-means clustering with TSP optimization
|
|
52
|
+
distance: Distance metric ('euclidean', 'haversine', 'osrm', 'google')
|
|
53
|
+
start_method: How to pick starting point for greedy methods
|
|
54
|
+
- "random": Random unvisited point
|
|
55
|
+
- "furthest": Point furthest from centroid of remaining points
|
|
56
|
+
- "first": First available unvisited point (index order)
|
|
57
|
+
temperature: Softmax temperature for softmax_greedy method (default 0.1)
|
|
58
|
+
n_strata: Number of strata for stratified method (default 4)
|
|
59
|
+
optimize_routes: Whether to TSP-optimize routes for partition methods (default True)
|
|
60
|
+
seed: Random seed for reproducibility
|
|
61
|
+
**kwargs: Additional arguments for distance calculation:
|
|
62
|
+
- api_key: Required for 'google' distance
|
|
63
|
+
- osrm_base_url: Custom OSRM server URL
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
ItineraryResult containing:
|
|
67
|
+
- itineraries: List of routes (each route is list of point indices)
|
|
68
|
+
- distances: Total distance for each itinerary
|
|
69
|
+
- data: Original DataFrame with itinerary_id column added
|
|
70
|
+
- metadata: Algorithm details
|
|
71
|
+
|
|
72
|
+
Example:
|
|
73
|
+
>>> result = create_itineraries('points.csv', max_distance=20000, method='greedy_nn')
|
|
74
|
+
>>> result = create_itineraries('points.csv', n_itineraries=10, method='random_partition')
|
|
75
|
+
"""
|
|
76
|
+
if method not in VALID_METHODS:
|
|
77
|
+
raise ValueError(f"Unknown method: {method}. Use one of {VALID_METHODS}")
|
|
78
|
+
|
|
79
|
+
if method in BUDGET_METHODS and max_distance is None:
|
|
80
|
+
raise ValueError(f"max_distance is required for method '{method}'")
|
|
81
|
+
if method in PARTITION_METHODS and n_itineraries is None:
|
|
82
|
+
raise ValueError(f"n_itineraries is required for method '{method}'")
|
|
83
|
+
|
|
84
|
+
df = DataHandler.load_data(data)
|
|
85
|
+
|
|
86
|
+
if len(df) == 0:
|
|
87
|
+
return ItineraryResult(
|
|
88
|
+
itineraries=[],
|
|
89
|
+
distances=[],
|
|
90
|
+
data=df.assign(itinerary_id=[]),
|
|
91
|
+
metadata={
|
|
92
|
+
"n_points": 0,
|
|
93
|
+
"n_itineraries": 0,
|
|
94
|
+
"max_distance": max_distance,
|
|
95
|
+
"method": method,
|
|
96
|
+
"distance": distance,
|
|
97
|
+
},
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
points: np.ndarray = df[["longitude", "latitude"]].to_numpy()
|
|
101
|
+
distance_matrix = get_distance_matrix(points, points, method=distance, **kwargs)
|
|
102
|
+
|
|
103
|
+
rng = np.random.default_rng(seed)
|
|
104
|
+
|
|
105
|
+
itineraries: list[list[int]]
|
|
106
|
+
distances: list[float]
|
|
107
|
+
|
|
108
|
+
if method == "greedy_nn":
|
|
109
|
+
itineraries, distances = greedy_grow_itineraries(
|
|
110
|
+
distance_matrix,
|
|
111
|
+
max_distance=max_distance, # type: ignore[arg-type]
|
|
112
|
+
start_method=start_method,
|
|
113
|
+
rng=rng,
|
|
114
|
+
)
|
|
115
|
+
elif method == "random_partition":
|
|
116
|
+
itineraries, distances = random_partition_itineraries(
|
|
117
|
+
distance_matrix,
|
|
118
|
+
n_itineraries=n_itineraries, # type: ignore[arg-type]
|
|
119
|
+
optimize_routes=optimize_routes,
|
|
120
|
+
rng=rng,
|
|
121
|
+
)
|
|
122
|
+
elif method == "stratified":
|
|
123
|
+
itineraries, distances = stratified_itineraries(
|
|
124
|
+
distance_matrix,
|
|
125
|
+
points=points,
|
|
126
|
+
n_itineraries=n_itineraries, # type: ignore[arg-type]
|
|
127
|
+
n_strata=n_strata,
|
|
128
|
+
optimize_routes=optimize_routes,
|
|
129
|
+
rng=rng,
|
|
130
|
+
)
|
|
131
|
+
elif method == "round_robin":
|
|
132
|
+
itineraries, distances = round_robin_itineraries(
|
|
133
|
+
distance_matrix,
|
|
134
|
+
n_itineraries=n_itineraries, # type: ignore[arg-type]
|
|
135
|
+
optimize_routes=optimize_routes,
|
|
136
|
+
rng=rng,
|
|
137
|
+
)
|
|
138
|
+
elif method == "softmax_greedy":
|
|
139
|
+
itineraries, distances = softmax_greedy_itineraries(
|
|
140
|
+
distance_matrix,
|
|
141
|
+
max_distance=max_distance, # type: ignore[arg-type]
|
|
142
|
+
temperature=temperature,
|
|
143
|
+
start_method=start_method,
|
|
144
|
+
rng=rng,
|
|
145
|
+
)
|
|
146
|
+
else:
|
|
147
|
+
itineraries, distances = kmeans_tsp_itineraries(
|
|
148
|
+
distance_matrix,
|
|
149
|
+
points=points,
|
|
150
|
+
n_itineraries=n_itineraries, # type: ignore[arg-type]
|
|
151
|
+
max_distance=max_distance,
|
|
152
|
+
rng=rng,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
itinerary_ids = np.full(len(df), -1, dtype=int)
|
|
156
|
+
for itinerary_idx, route in enumerate(itineraries):
|
|
157
|
+
for point_idx in route:
|
|
158
|
+
itinerary_ids[point_idx] = itinerary_idx
|
|
159
|
+
|
|
160
|
+
result_df = df.copy()
|
|
161
|
+
result_df["itinerary_id"] = itinerary_ids
|
|
162
|
+
|
|
163
|
+
return ItineraryResult(
|
|
164
|
+
itineraries=itineraries,
|
|
165
|
+
distances=distances,
|
|
166
|
+
data=result_df,
|
|
167
|
+
metadata={
|
|
168
|
+
"n_points": len(df),
|
|
169
|
+
"n_itineraries": len(itineraries),
|
|
170
|
+
"max_distance": max_distance,
|
|
171
|
+
"n_itineraries_requested": n_itineraries,
|
|
172
|
+
"method": method,
|
|
173
|
+
"distance": distance,
|
|
174
|
+
"start_method": start_method if method in BUDGET_METHODS else None,
|
|
175
|
+
"temperature": temperature if method == "softmax_greedy" else None,
|
|
176
|
+
"n_strata": n_strata if method == "stratified" else None,
|
|
177
|
+
"optimize_routes": optimize_routes if method in PARTITION_METHODS else None,
|
|
178
|
+
"seed": seed,
|
|
179
|
+
"total_distance": float(sum(distances)) if distances else 0.0,
|
|
180
|
+
"avg_distance": float(np.mean(distances)) if distances else 0.0,
|
|
181
|
+
"avg_points_per_itinerary": (
|
|
182
|
+
float(np.mean([len(it) for it in itineraries])) if itineraries else 0.0
|
|
183
|
+
),
|
|
184
|
+
},
|
|
185
|
+
)
|