allocator 1.0.0__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {allocator-1.0.0 → allocator-1.2.0}/PKG-INFO +61 -31
  2. allocator-1.2.0/README.md +87 -0
  3. {allocator-1.0.0 → allocator-1.2.0}/allocator/__init__.py +20 -18
  4. {allocator-1.0.0 → allocator-1.2.0}/allocator/api/__init__.py +15 -1
  5. {allocator-1.0.0 → allocator-1.2.0}/allocator/api/cluster.py +5 -21
  6. {allocator-1.0.0 → allocator-1.2.0}/allocator/api/distance.py +0 -2
  7. allocator-1.2.0/allocator/api/itinerary.py +185 -0
  8. allocator-1.2.0/allocator/api/random_walk.py +139 -0
  9. {allocator-1.0.0 → allocator-1.2.0}/allocator/api/route.py +0 -2
  10. {allocator-1.0.0 → allocator-1.2.0}/allocator/api/types.py +23 -2
  11. allocator-1.2.0/allocator/cli/itinerary_cmd.py +147 -0
  12. {allocator-1.0.0 → allocator-1.2.0}/allocator/cli/main.py +39 -6
  13. allocator-1.2.0/allocator/cli/random_walk_cmd.py +128 -0
  14. allocator-1.2.0/allocator/core/__init__.py +13 -0
  15. {allocator-1.0.0 → allocator-1.2.0}/allocator/core/algorithms.py +26 -86
  16. allocator-1.2.0/allocator/core/itinerary.py +479 -0
  17. allocator-1.2.0/allocator/core/random_walk.py +295 -0
  18. {allocator-1.0.0 → allocator-1.2.0}/allocator/core/routing.py +0 -2
  19. {allocator-1.0.0 → allocator-1.2.0}/allocator/distances/euclidean.py +0 -2
  20. {allocator-1.0.0 → allocator-1.2.0}/allocator/distances/external_apis.py +0 -2
  21. {allocator-1.0.0 → allocator-1.2.0}/allocator/distances/factory.py +0 -2
  22. {allocator-1.0.0 → allocator-1.2.0}/allocator/distances/haversine.py +0 -2
  23. {allocator-1.0.0 → allocator-1.2.0}/allocator/io/data_handler.py +0 -2
  24. allocator-1.2.0/allocator/stats/__init__.py +14 -0
  25. allocator-1.2.0/allocator/stats/design_effect.py +128 -0
  26. {allocator-1.0.0 → allocator-1.2.0}/allocator/utils.py +0 -2
  27. {allocator-1.0.0 → allocator-1.2.0}/allocator/viz/__init__.py +4 -0
  28. allocator-1.2.0/allocator/viz/plotting.py +544 -0
  29. allocator-1.2.0/allocator/vulture_whitelist.py +5 -0
  30. {allocator-1.0.0 → allocator-1.2.0}/pyproject.toml +57 -5
  31. allocator-1.0.0/README.md +0 -57
  32. allocator-1.0.0/allocator/core/__init__.py +0 -1
  33. allocator-1.0.0/allocator/viz/plotting.py +0 -206
  34. {allocator-1.0.0 → allocator-1.2.0}/allocator/cli/__init__.py +0 -0
  35. {allocator-1.0.0 → allocator-1.2.0}/allocator/cli/cluster_cmd.py +0 -0
  36. {allocator-1.0.0 → allocator-1.2.0}/allocator/cli/route_cmd.py +0 -0
  37. {allocator-1.0.0 → allocator-1.2.0}/allocator/distances/__init__.py +0 -0
  38. {allocator-1.0.0 → allocator-1.2.0}/allocator/io/__init__.py +0 -0
  39. {allocator-1.0.0 → allocator-1.2.0}/allocator/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: allocator
3
- Version: 1.0.0
3
+ Version: 1.2.0
4
4
  Summary: Modern Python package for geographic task allocation, clustering, and routing optimization
5
5
  Keywords: geographic,allocation,clustering,routing,optimization,tsp,kmeans,geospatial,logistics,shortest-path
6
6
  Author: Suriyan Laohaprapanon, Gaurav Sood
@@ -23,6 +23,7 @@ Classifier: Typing :: Typed
23
23
  Requires-Dist: pandas>=2.0.0
24
24
  Requires-Dist: numpy>=1.24.0
25
25
  Requires-Dist: scikit-learn>=1.3.0
26
+ Requires-Dist: scipy>=1.10.0
26
27
  Requires-Dist: utm>=0.7.0
27
28
  Requires-Dist: haversine>=2.8.0
28
29
  Requires-Dist: networkx>=3.0
@@ -33,7 +34,6 @@ Requires-Dist: googlemaps>=4.6.0
33
34
  Requires-Dist: ortools>=9.5.0
34
35
  Requires-Dist: matplotlib>=3.6.0
35
36
  Requires-Dist: seaborn>=0.13.2
36
- Requires-Dist: scipy>=1.10.0 ; extra == 'algorithms'
37
37
  Requires-Dist: christofides>=1.0.0 ; extra == 'algorithms'
38
38
  Requires-Dist: allocator[algorithms,geo] ; extra == 'all'
39
39
  Requires-Dist: allocator[all,dev,test,docs] ; extra == 'complete'
@@ -58,12 +58,12 @@ Requires-Dist: hypothesis>=6.82.0 ; extra == 'test'
58
58
  Maintainer: Gaurav Sood
59
59
  Maintainer-email: Gaurav Sood <gsood07@gmail.com>
60
60
  Requires-Python: >=3.11
61
- Project-URL: Bug Reports, https://github.com/geosensing/allocator/issues
62
- Project-URL: Changelog, https://github.com/geosensing/allocator/blob/main/CHANGELOG.md
63
- Project-URL: Documentation, https://geosensing.github.io/allocator/
64
61
  Project-URL: Homepage, https://github.com/geosensing/allocator
62
+ Project-URL: Documentation, https://geosensing.github.io/allocator/
65
63
  Project-URL: Repository, https://github.com/geosensing/allocator.git
64
+ Project-URL: Bug Reports, https://github.com/geosensing/allocator/issues
66
65
  Project-URL: Source Code, https://github.com/geosensing/allocator
66
+ Project-URL: Changelog, https://github.com/geosensing/allocator/blob/main/CHANGELOG.md
67
67
  Provides-Extra: algorithms
68
68
  Provides-Extra: all
69
69
  Provides-Extra: complete
@@ -73,60 +73,90 @@ Provides-Extra: geo
73
73
  Provides-Extra: test
74
74
  Description-Content-Type: text/markdown
75
75
 
76
- # allocator: Efficiently collect data from geographically distributed locations
76
+ # allocator
77
77
 
78
78
  [![PyPI version](https://img.shields.io/pypi/v/allocator.svg)](https://pypi.python.org/pypi/allocator)
79
79
  [![Downloads](https://pepy.tech/badge/allocator)](https://pepy.tech/project/allocator)
80
80
  [![CI](https://github.com/geosensing/allocator/actions/workflows/ci.yml/badge.svg)](https://github.com/geosensing/allocator/actions/workflows/ci.yml)
81
81
  [![Documentation](https://img.shields.io/badge/docs-github.io-blue)](https://geosensing.github.io/allocator/)
82
82
 
83
- **Allocator** provides a modern, Pythonic API for geographic task allocation, clustering, and routing optimization.
83
+ Field teams, delivery services, and survey organizations waste time and money on inefficient routes. When you have 100+ locations to visit, manual planning fails. Allocator solves this.
84
84
 
85
- ## Key Features
85
+ ## What It Does
86
86
 
87
- - **🎯 Clustering**: Group geographic points into balanced zones
88
- - **🛣️ Routing**: Find optimal paths through locations (TSP solving)
89
- - **📍 Assignment**: Connect points to closest workers/centers
90
- - **🚀 Performance**: Optimized algorithms with NumPy and scikit-learn
91
- - **📦 Modern API**: Clean Python interface + unified CLI
87
+ - **Cluster**: Divide locations into balanced work zones
88
+ - **Route**: Find the shortest path through locations (TSP)
89
+ - **Assign**: Match locations to nearest workers or depots
90
+ - **Random Walk**: Generate survey itineraries on road networks
92
91
 
93
- ## Quick Start
92
+ ## Install
94
93
 
95
94
  ```bash
96
95
  pip install allocator
97
96
  ```
98
97
 
98
+ ## Python API
99
+
100
+ ### Cluster locations into zones
101
+
99
102
  ```python
100
103
  import allocator
101
104
  import pandas as pd
102
105
 
103
- # Geographic locations
104
106
  locations = pd.DataFrame({
105
- 'longitude': [100.5018, 100.5065, 100.5108],
106
- 'latitude': [13.7563, 13.7590, 13.7633]
107
+ 'longitude': [100.501, 100.506, 100.510, 100.515, 100.520],
108
+ 'latitude': [13.756, 13.759, 13.763, 13.768, 13.772]
107
109
  })
108
110
 
109
- # Group into zones
110
- clusters = allocator.cluster(locations, n_clusters=2)
111
+ result = allocator.cluster(locations, n_clusters=2)
112
+ print(result.labels) # [0 0 0 1 1]
113
+ ```
114
+
115
+ ### Find shortest route
116
+
117
+ ```python
118
+ route = allocator.shortest_path(locations, method='ortools')
119
+ print(route.route) # [0, 1, 2, 4, 3, 0]
120
+ ```
111
121
 
112
- # Find optimal route
113
- route = allocator.shortest_path(locations)
122
+ ### Assign to nearest depot
114
123
 
115
- # Assign to service centers
116
- centers = pd.DataFrame({
124
+ ```python
125
+ depots = pd.DataFrame({
117
126
  'longitude': [100.50, 100.52],
118
127
  'latitude': [13.75, 13.77]
119
128
  })
120
- assignments = allocator.assign(locations, centers)
129
+
130
+ assignments = allocator.assign_to_closest(locations, depots)
131
+ print(assignments.data['assigned_worker'].tolist()) # [0, 0, 1, 1, 1]
132
+ ```
133
+
134
+ ### Generate random walk itineraries
135
+
136
+ ```python
137
+ import networkx as nx
138
+
139
+ # Load road network graph (from OSMnx or similar)
140
+ G = nx.read_graphml("road_network.graphml")
141
+
142
+ result = allocator.random_walk(G, n_walks=10, walk_length_m=5000)
143
+ print(result.data) # DataFrame with waypoints
144
+ ```
145
+
146
+ ## CLI
147
+
148
+ ```bash
149
+ allocator cluster kmeans locations.csv -n 5 -o zones.csv
150
+ allocator route tsp locations.csv --method ortools -o route.csv
151
+ allocator sort locations.csv --workers depots.csv -o assignments.csv
152
+ allocator random-walk road_network.graphml -n 10 -l 5000 -o waypoints.csv
121
153
  ```
122
154
 
123
- ## Documentation & Examples
155
+ ## Documentation
124
156
 
125
- - **📖 [Full Documentation](https://geosensing.github.io/allocator/)**
126
- - **🚀 [Installation & Tutorial](https://geosensing.github.io/allocator/quickstart.html)**
127
- - **🔧 [API Reference](https://geosensing.github.io/allocator/api/clustering.html)**
128
- - **💡 [Real-World Examples](https://geosensing.github.io/allocator/examples/overview.html)**
157
+ - [Full Documentation](https://geosensing.github.io/allocator/)
158
+ - [API Reference](https://geosensing.github.io/allocator/api/clustering.html)
129
159
 
130
- ## License & Contributing
160
+ ## License
131
161
 
132
- MIT License. Contributions welcome - see [Contributing Guide](https://geosensing.github.io/allocator/contributing.html).
162
+ MIT
@@ -0,0 +1,87 @@
1
+ # allocator
2
+
3
+ [![PyPI version](https://img.shields.io/pypi/v/allocator.svg)](https://pypi.python.org/pypi/allocator)
4
+ [![Downloads](https://pepy.tech/badge/allocator)](https://pepy.tech/project/allocator)
5
+ [![CI](https://github.com/geosensing/allocator/actions/workflows/ci.yml/badge.svg)](https://github.com/geosensing/allocator/actions/workflows/ci.yml)
6
+ [![Documentation](https://img.shields.io/badge/docs-github.io-blue)](https://geosensing.github.io/allocator/)
7
+
8
+ Field teams, delivery services, and survey organizations waste time and money on inefficient routes. When you have 100+ locations to visit, manual planning fails. Allocator solves this.
9
+
10
+ ## What It Does
11
+
12
+ - **Cluster**: Divide locations into balanced work zones
13
+ - **Route**: Find the shortest path through locations (TSP)
14
+ - **Assign**: Match locations to nearest workers or depots
15
+ - **Random Walk**: Generate survey itineraries on road networks
16
+
17
+ ## Install
18
+
19
+ ```bash
20
+ pip install allocator
21
+ ```
22
+
23
+ ## Python API
24
+
25
+ ### Cluster locations into zones
26
+
27
+ ```python
28
+ import allocator
29
+ import pandas as pd
30
+
31
+ locations = pd.DataFrame({
32
+ 'longitude': [100.501, 100.506, 100.510, 100.515, 100.520],
33
+ 'latitude': [13.756, 13.759, 13.763, 13.768, 13.772]
34
+ })
35
+
36
+ result = allocator.cluster(locations, n_clusters=2)
37
+ print(result.labels) # [0 0 0 1 1]
38
+ ```
39
+
40
+ ### Find shortest route
41
+
42
+ ```python
43
+ route = allocator.shortest_path(locations, method='ortools')
44
+ print(route.route) # [0, 1, 2, 4, 3, 0]
45
+ ```
46
+
47
+ ### Assign to nearest depot
48
+
49
+ ```python
50
+ depots = pd.DataFrame({
51
+ 'longitude': [100.50, 100.52],
52
+ 'latitude': [13.75, 13.77]
53
+ })
54
+
55
+ assignments = allocator.assign_to_closest(locations, depots)
56
+ print(assignments.data['assigned_worker'].tolist()) # [0, 0, 1, 1, 1]
57
+ ```
58
+
59
+ ### Generate random walk itineraries
60
+
61
+ ```python
62
+ import networkx as nx
63
+
64
+ # Load road network graph (from OSMnx or similar)
65
+ G = nx.read_graphml("road_network.graphml")
66
+
67
+ result = allocator.random_walk(G, n_walks=10, walk_length_m=5000)
68
+ print(result.data) # DataFrame with waypoints
69
+ ```
70
+
71
+ ## CLI
72
+
73
+ ```bash
74
+ allocator cluster kmeans locations.csv -n 5 -o zones.csv
75
+ allocator route tsp locations.csv --method ortools -o route.csv
76
+ allocator sort locations.csv --workers depots.csv -o assignments.csv
77
+ allocator random-walk road_network.graphml -n 10 -l 5000 -o waypoints.csv
78
+ ```
79
+
80
+ ## Documentation
81
+
82
+ - [Full Documentation](https://geosensing.github.io/allocator/)
83
+ - [API Reference](https://geosensing.github.io/allocator/api/clustering.html)
84
+
85
+ ## License
86
+
87
+ MIT
@@ -34,17 +34,25 @@ For more examples: https://geosensing.github.io/allocator/
34
34
 
35
35
  import logging
36
36
  import sys
37
+ import warnings
38
+
39
+ warnings.filterwarnings("ignore", message=".*SwigPyPacked.*")
40
+ warnings.filterwarnings("ignore", message=".*SwigPyObject.*")
41
+ warnings.filterwarnings("ignore", message=".*swigvarlink.*")
37
42
 
38
- # Import modern API
39
43
  from .api import (
40
44
  ClusterResult,
41
45
  ComparisonResult,
46
+ ItineraryResult,
47
+ RandomWalkResult,
42
48
  RouteResult,
43
49
  SortResult,
44
50
  assign_to_closest,
45
51
  cluster,
52
+ create_itineraries,
46
53
  distance_assignment,
47
54
  kmeans,
55
+ random_walk,
48
56
  shortest_path,
49
57
  sort_by_distance,
50
58
  tsp_christofides,
@@ -52,8 +60,6 @@ from .api import (
52
60
  tsp_ortools,
53
61
  tsp_osrm,
54
62
  )
55
-
56
- # Import utilities for advanced users
57
63
  from .distances import (
58
64
  euclidean_distance_matrix,
59
65
  get_distance_matrix,
@@ -63,26 +69,25 @@ from .distances import (
63
69
  osrm_distance_matrix,
64
70
  xy2latlog,
65
71
  )
66
-
67
- # Import visualization functions
68
72
  from .viz.plotting import plot_assignments, plot_clusters, plot_comparison, plot_route
69
73
 
70
- # Version
71
- __version__ = "1.0.0"
74
+ __version__ = "1.2.0"
72
75
 
73
- # Export public API
74
76
  __all__ = [
75
77
  # Result types
76
78
  "ClusterResult",
77
79
  "ComparisonResult",
80
+ "ItineraryResult",
81
+ "RandomWalkResult",
78
82
  "RouteResult",
79
83
  "SortResult",
80
- "assign_to_closest",
81
84
  # Main functions
85
+ "assign_to_closest",
82
86
  "cluster",
87
+ "create_itineraries",
83
88
  "distance_assignment",
84
- "euclidean_distance_matrix",
85
89
  # Distance utilities
90
+ "euclidean_distance_matrix",
86
91
  "get_distance_matrix",
87
92
  "get_logger",
88
93
  "google_distance_matrix",
@@ -91,11 +96,13 @@ __all__ = [
91
96
  "kmeans",
92
97
  "latlon2xy",
93
98
  "osrm_distance_matrix",
94
- "plot_assignments",
95
99
  # Visualization
100
+ "plot_assignments",
96
101
  "plot_clusters",
97
102
  "plot_comparison",
98
103
  "plot_route",
104
+ # Random walk
105
+ "random_walk",
99
106
  # Logging utilities
100
107
  "setup_logging",
101
108
  "shortest_path",
@@ -108,27 +115,23 @@ __all__ = [
108
115
  ]
109
116
 
110
117
 
111
- def setup_logging(level=logging.INFO):
118
+ def setup_logging(level: int = logging.INFO) -> logging.Logger:
112
119
  """
113
120
  Set up logging configuration for the allocator package.
114
121
 
115
122
  Args:
116
123
  level: Logging level (DEBUG, INFO, WARNING, ERROR)
117
124
  """
118
- # Create formatter
119
125
  formatter = logging.Formatter(
120
126
  "%(asctime)s - %(name)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S"
121
127
  )
122
128
 
123
- # Get root logger for allocator package
124
129
  logger = logging.getLogger("allocator")
125
130
  logger.setLevel(level)
126
131
 
127
- # Remove existing handlers to avoid duplicates
128
132
  for handler in logger.handlers[:]:
129
133
  logger.removeHandler(handler)
130
134
 
131
- # Console handler
132
135
  console_handler = logging.StreamHandler(sys.stdout)
133
136
  console_handler.setLevel(level)
134
137
  console_handler.setFormatter(formatter)
@@ -137,7 +140,7 @@ def setup_logging(level=logging.INFO):
137
140
  return logger
138
141
 
139
142
 
140
- def get_logger(name):
143
+ def get_logger(name: str) -> logging.Logger:
141
144
  """
142
145
  Get a logger instance for a specific module.
143
146
 
@@ -150,5 +153,4 @@ def get_logger(name):
150
153
  return logging.getLogger(f"allocator.{name}")
151
154
 
152
155
 
153
- # Set up default logging
154
156
  setup_logging()
@@ -6,22 +6,36 @@ This module provides a modern, Pythonic interface to the allocator package.
6
6
 
7
7
  from .cluster import cluster, kmeans
8
8
  from .distance import assign_to_closest, distance_assignment, sort_by_distance
9
+ from .itinerary import create_itineraries
10
+ from .random_walk import random_walk
9
11
  from .route import shortest_path, tsp_christofides, tsp_google, tsp_ortools, tsp_osrm
10
- from .types import ClusterResult, ComparisonResult, RouteResult, SortResult
12
+ from .types import (
13
+ ClusterResult,
14
+ ComparisonResult,
15
+ ItineraryResult,
16
+ RandomWalkResult,
17
+ RouteResult,
18
+ SortResult,
19
+ )
11
20
 
12
21
  __all__ = [
13
22
  # Result types
14
23
  "ClusterResult",
15
24
  "ComparisonResult",
25
+ "ItineraryResult",
26
+ "RandomWalkResult",
16
27
  "RouteResult",
17
28
  "SortResult",
18
29
  # Distance assignment methods
19
30
  "assign_to_closest",
20
31
  # Main high-level functions
21
32
  "cluster",
33
+ "create_itineraries",
22
34
  "distance_assignment",
23
35
  # Specific clustering methods
24
36
  "kmeans",
37
+ # Random walk
38
+ "random_walk",
25
39
  "shortest_path",
26
40
  "sort_by_distance",
27
41
  # Specific routing methods
@@ -2,8 +2,6 @@
2
2
  Modern clustering API for allocator package.
3
3
  """
4
4
 
5
- from __future__ import annotations
6
-
7
5
  from pathlib import Path
8
6
 
9
7
  import numpy as np
@@ -66,15 +64,16 @@ def kmeans(
66
64
  Args:
67
65
  data: Input data as DataFrame or numpy array
68
66
  n_clusters: Number of clusters
69
- distance: Distance metric ('euclidean', 'haversine', 'osrm', 'google')
67
+ distance: Distance metric (stored in metadata only; clustering uses Euclidean)
70
68
  max_iter: Maximum iterations
71
69
  random_state: Random seed for reproducibility
72
- **kwargs: Additional distance-specific arguments
70
+ **kwargs: Additional arguments (unused, kept for API compatibility)
73
71
 
74
72
  Returns:
75
73
  ClusterResult with clustering information
76
74
  """
77
- # Ensure we have a DataFrame for output
75
+ del kwargs
76
+
78
77
  if isinstance(data, np.ndarray):
79
78
  df = DataHandler._from_numpy(data)
80
79
  elif isinstance(data, list):
@@ -84,36 +83,21 @@ def kmeans(
84
83
  else:
85
84
  df = data.copy()
86
85
 
87
- # Run clustering algorithm
88
86
  result = _kmeans_cluster(
89
87
  df,
90
88
  n_clusters=n_clusters,
91
- distance_method=distance,
92
89
  max_iter=max_iter,
93
90
  random_state=random_state,
94
- **kwargs,
95
91
  )
96
92
 
97
- # Add cluster assignments to DataFrame
98
93
  df_result = df.copy()
99
94
  df_result["cluster"] = result["labels"]
100
95
 
101
- # Calculate inertia (sum of squared distances to centroids)
102
- inertia = None
103
- if distance == "euclidean":
104
- from ..distances import euclidean_distance_matrix
105
-
106
- coords = df[["longitude", "latitude"]].values
107
- distances = euclidean_distance_matrix(coords, result["centroids"])
108
- inertia = np.sum(
109
- [distances[i, result["labels"][i]] ** 2 for i in range(len(result["labels"]))]
110
- )
111
-
112
96
  return ClusterResult(
113
97
  labels=result["labels"],
114
98
  centroids=result["centroids"],
115
99
  n_iter=result["iterations"],
116
- inertia=inertia,
100
+ inertia=result["inertia"],
117
101
  data=df_result,
118
102
  converged=result["converged"],
119
103
  metadata={
@@ -2,8 +2,6 @@
2
2
  Modern distance-based assignment API for allocator package.
3
3
  """
4
4
 
5
- from __future__ import annotations
6
-
7
5
  import numpy as np
8
6
  import pandas as pd
9
7
 
@@ -0,0 +1,185 @@
1
+ """
2
+ API for itinerary generation.
3
+ """
4
+
5
+ from typing import Any
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+
10
+ from ..core.itinerary import (
11
+ greedy_grow_itineraries,
12
+ kmeans_tsp_itineraries,
13
+ random_partition_itineraries,
14
+ round_robin_itineraries,
15
+ softmax_greedy_itineraries,
16
+ stratified_itineraries,
17
+ )
18
+ from ..distances import get_distance_matrix
19
+ from ..io.data_handler import DataHandler
20
+ from .types import BUDGET_METHODS, PARTITION_METHODS, VALID_METHODS, ItineraryResult
21
+
22
+
23
+ def create_itineraries(
24
+ data: str | pd.DataFrame | np.ndarray | list[Any],
25
+ max_distance: float | None = None,
26
+ n_itineraries: int | None = None,
27
+ method: str = "greedy_nn",
28
+ distance: str = "haversine",
29
+ start_method: str = "random",
30
+ temperature: float = 0.1,
31
+ n_strata: int = 4,
32
+ optimize_routes: bool = True,
33
+ seed: int | None = None,
34
+ **kwargs: Any,
35
+ ) -> ItineraryResult:
36
+ """
37
+ Create multiple itineraries from points with a distance budget per itinerary.
38
+
39
+ Args:
40
+ data: Input data (file path, DataFrame, numpy array, or list)
41
+ max_distance: Maximum total distance per itinerary (in meters for haversine/osrm/google).
42
+ Required for greedy_nn and softmax_greedy methods.
43
+ n_itineraries: Number of itineraries to create. Required for random_partition,
44
+ stratified, round_robin, and kmeans_tsp methods.
45
+ method: Itinerary generation method:
46
+ - "greedy_nn": Greedy nearest-neighbor (default, most efficient)
47
+ - "random_partition": Random assignment (theoretical baseline)
48
+ - "stratified": Stratified by distance from centroid
49
+ - "round_robin": Round-robin assignment
50
+ - "softmax_greedy": Greedy with softmax sampling
51
+ - "kmeans_tsp": K-means clustering with TSP optimization
52
+ distance: Distance metric ('euclidean', 'haversine', 'osrm', 'google')
53
+ start_method: How to pick starting point for greedy methods
54
+ - "random": Random unvisited point
55
+ - "furthest": Point furthest from centroid of remaining points
56
+ - "first": First available unvisited point (index order)
57
+ temperature: Softmax temperature for softmax_greedy method (default 0.1)
58
+ n_strata: Number of strata for stratified method (default 4)
59
+ optimize_routes: Whether to TSP-optimize routes for partition methods (default True)
60
+ seed: Random seed for reproducibility
61
+ **kwargs: Additional arguments for distance calculation:
62
+ - api_key: Required for 'google' distance
63
+ - osrm_base_url: Custom OSRM server URL
64
+
65
+ Returns:
66
+ ItineraryResult containing:
67
+ - itineraries: List of routes (each route is list of point indices)
68
+ - distances: Total distance for each itinerary
69
+ - data: Original DataFrame with itinerary_id column added
70
+ - metadata: Algorithm details
71
+
72
+ Example:
73
+ >>> result = create_itineraries('points.csv', max_distance=20000, method='greedy_nn')
74
+ >>> result = create_itineraries('points.csv', n_itineraries=10, method='random_partition')
75
+ """
76
+ if method not in VALID_METHODS:
77
+ raise ValueError(f"Unknown method: {method}. Use one of {VALID_METHODS}")
78
+
79
+ if method in BUDGET_METHODS and max_distance is None:
80
+ raise ValueError(f"max_distance is required for method '{method}'")
81
+ if method in PARTITION_METHODS and n_itineraries is None:
82
+ raise ValueError(f"n_itineraries is required for method '{method}'")
83
+
84
+ df = DataHandler.load_data(data)
85
+
86
+ if len(df) == 0:
87
+ return ItineraryResult(
88
+ itineraries=[],
89
+ distances=[],
90
+ data=df.assign(itinerary_id=[]),
91
+ metadata={
92
+ "n_points": 0,
93
+ "n_itineraries": 0,
94
+ "max_distance": max_distance,
95
+ "method": method,
96
+ "distance": distance,
97
+ },
98
+ )
99
+
100
+ points: np.ndarray = df[["longitude", "latitude"]].to_numpy()
101
+ distance_matrix = get_distance_matrix(points, points, method=distance, **kwargs)
102
+
103
+ rng = np.random.default_rng(seed)
104
+
105
+ itineraries: list[list[int]]
106
+ distances: list[float]
107
+
108
+ if method == "greedy_nn":
109
+ itineraries, distances = greedy_grow_itineraries(
110
+ distance_matrix,
111
+ max_distance=max_distance, # type: ignore[arg-type]
112
+ start_method=start_method,
113
+ rng=rng,
114
+ )
115
+ elif method == "random_partition":
116
+ itineraries, distances = random_partition_itineraries(
117
+ distance_matrix,
118
+ n_itineraries=n_itineraries, # type: ignore[arg-type]
119
+ optimize_routes=optimize_routes,
120
+ rng=rng,
121
+ )
122
+ elif method == "stratified":
123
+ itineraries, distances = stratified_itineraries(
124
+ distance_matrix,
125
+ points=points,
126
+ n_itineraries=n_itineraries, # type: ignore[arg-type]
127
+ n_strata=n_strata,
128
+ optimize_routes=optimize_routes,
129
+ rng=rng,
130
+ )
131
+ elif method == "round_robin":
132
+ itineraries, distances = round_robin_itineraries(
133
+ distance_matrix,
134
+ n_itineraries=n_itineraries, # type: ignore[arg-type]
135
+ optimize_routes=optimize_routes,
136
+ rng=rng,
137
+ )
138
+ elif method == "softmax_greedy":
139
+ itineraries, distances = softmax_greedy_itineraries(
140
+ distance_matrix,
141
+ max_distance=max_distance, # type: ignore[arg-type]
142
+ temperature=temperature,
143
+ start_method=start_method,
144
+ rng=rng,
145
+ )
146
+ else:
147
+ itineraries, distances = kmeans_tsp_itineraries(
148
+ distance_matrix,
149
+ points=points,
150
+ n_itineraries=n_itineraries, # type: ignore[arg-type]
151
+ max_distance=max_distance,
152
+ rng=rng,
153
+ )
154
+
155
+ itinerary_ids = np.full(len(df), -1, dtype=int)
156
+ for itinerary_idx, route in enumerate(itineraries):
157
+ for point_idx in route:
158
+ itinerary_ids[point_idx] = itinerary_idx
159
+
160
+ result_df = df.copy()
161
+ result_df["itinerary_id"] = itinerary_ids
162
+
163
+ return ItineraryResult(
164
+ itineraries=itineraries,
165
+ distances=distances,
166
+ data=result_df,
167
+ metadata={
168
+ "n_points": len(df),
169
+ "n_itineraries": len(itineraries),
170
+ "max_distance": max_distance,
171
+ "n_itineraries_requested": n_itineraries,
172
+ "method": method,
173
+ "distance": distance,
174
+ "start_method": start_method if method in BUDGET_METHODS else None,
175
+ "temperature": temperature if method == "softmax_greedy" else None,
176
+ "n_strata": n_strata if method == "stratified" else None,
177
+ "optimize_routes": optimize_routes if method in PARTITION_METHODS else None,
178
+ "seed": seed,
179
+ "total_distance": float(sum(distances)) if distances else 0.0,
180
+ "avg_distance": float(np.mean(distances)) if distances else 0.0,
181
+ "avg_points_per_itinerary": (
182
+ float(np.mean([len(it) for it in itineraries])) if itineraries else 0.0
183
+ ),
184
+ },
185
+ )