ortidy 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ortidy/__init__.py +41 -0
- ortidy/_narwhals.py +62 -0
- ortidy/_scaling.py +58 -0
- ortidy/assignment/__init__.py +5 -0
- ortidy/assignment/assignment.py +101 -0
- ortidy/binning/__init__.py +7 -0
- ortidy/binning/bin_packing.py +122 -0
- ortidy/binning/knapsack.py +80 -0
- ortidy/binning/multi_knapsack.py +127 -0
- ortidy/data/__init__.py +61 -0
- ortidy/data/binning/bins.csv +6 -0
- ortidy/data/binning/items_bin_packing.csv +12 -0
- ortidy/data/binning/items_knapsack.csv +51 -0
- ortidy/data/binning/items_multi.csv +16 -0
- ortidy/data/routing/locations.csv +18 -0
- ortidy/data/routing/pickups_and_deliveries.csv +9 -0
- ortidy/data/routing/vehicles.csv +5 -0
- ortidy/facility/__init__.py +5 -0
- ortidy/facility/facility_location.py +140 -0
- ortidy/flow/__init__.py +5 -0
- ortidy/flow/flow.py +198 -0
- ortidy/result.py +98 -0
- ortidy/routing/__init__.py +6 -0
- ortidy/routing/distance.py +92 -0
- ortidy/routing/routing.py +318 -0
- ortidy/scheduling/__init__.py +5 -0
- ortidy/scheduling/shift_scheduling.py +152 -0
- ortidy/schema.py +41 -0
- ortidy-0.2.0.dist-info/METADATA +126 -0
- ortidy-0.2.0.dist-info/RECORD +32 -0
- ortidy-0.2.0.dist-info/WHEEL +4 -0
- ortidy-0.2.0.dist-info/licenses/LICENSE.md +201 -0
ortidy/__init__.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""ortidy — operations research on tidy dataframes.
|
|
2
|
+
|
|
3
|
+
A backend-agnostic (Narwhals) dataframe façade over Google OR-Tools. Solvers
|
|
4
|
+
accept native frames (pandas, Polars, …), return the same backend, and hand back
|
|
5
|
+
a :class:`~ortidy.result.SolveResult` carrying the result frame, a status enum,
|
|
6
|
+
the objective, and solve metadata.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from ortidy import data
|
|
12
|
+
from ortidy.assignment.assignment import assignment
|
|
13
|
+
from ortidy.binning.bin_packing import bin_packing
|
|
14
|
+
from ortidy.binning.knapsack import knapsack
|
|
15
|
+
from ortidy.binning.multi_knapsack import multi_knapsack
|
|
16
|
+
from ortidy.facility.facility_location import facility_location
|
|
17
|
+
from ortidy.flow.flow import max_flow, min_cost_flow, shortest_path
|
|
18
|
+
from ortidy.result import SolveResult, SolveStatus
|
|
19
|
+
from ortidy.routing.distance import distance_matrix
|
|
20
|
+
from ortidy.routing.routing import solve_routing
|
|
21
|
+
from ortidy.scheduling.shift_scheduling import shift_scheduling
|
|
22
|
+
|
|
23
|
+
__version__ = "0.2.0"
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
"knapsack",
|
|
27
|
+
"multi_knapsack",
|
|
28
|
+
"bin_packing",
|
|
29
|
+
"assignment",
|
|
30
|
+
"max_flow",
|
|
31
|
+
"min_cost_flow",
|
|
32
|
+
"shortest_path",
|
|
33
|
+
"solve_routing",
|
|
34
|
+
"distance_matrix",
|
|
35
|
+
"shift_scheduling",
|
|
36
|
+
"facility_location",
|
|
37
|
+
"data",
|
|
38
|
+
"SolveResult",
|
|
39
|
+
"SolveStatus",
|
|
40
|
+
"__version__",
|
|
41
|
+
]
|
ortidy/_narwhals.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Narwhals boundary helpers.
|
|
2
|
+
|
|
3
|
+
The library is backend-agnostic: a user who passes pandas gets pandas back, a
|
|
4
|
+
user who passes Polars gets Polars back. We accept native frames at the public
|
|
5
|
+
boundary, do internal work in Narwhals, and return native frames. At the solver
|
|
6
|
+
boundary we extract plain Python lists/ints/floats — OR-Tools does not consume
|
|
7
|
+
dataframes.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
import narwhals.stable.v1 as nw
|
|
15
|
+
|
|
16
|
+
ID_COLUMN_DEFAULT = "__ortidy_row_id__"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def to_nw(frame: Any) -> nw.DataFrame:
|
|
20
|
+
"""Wrap a native frame in a Narwhals DataFrame at the public boundary."""
|
|
21
|
+
return nw.from_native(frame, eager_only=True)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def to_native(frame: nw.DataFrame) -> Any:
|
|
25
|
+
"""Unwrap a Narwhals DataFrame back to the user's native backend."""
|
|
26
|
+
return frame.to_native()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def column_to_list(frame: nw.DataFrame, column: str) -> list:
|
|
30
|
+
"""Extract a column as a plain Python list (the solver-boundary handoff)."""
|
|
31
|
+
return frame.get_column(column).to_list()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def ensure_id_column(
|
|
35
|
+
frame: nw.DataFrame, id_column: str | None
|
|
36
|
+
) -> tuple[nw.DataFrame, str, bool]:
|
|
37
|
+
"""Guarantee an explicit row-identity column (no implicit positional index).
|
|
38
|
+
|
|
39
|
+
If ``id_column`` is given it must exist. If ``None`` we synthesize a stable
|
|
40
|
+
integer id column, honoring the index-free model: identity is always an
|
|
41
|
+
explicit column, never a positional index.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
``(frame, id_column_name, was_synthesized)``.
|
|
45
|
+
"""
|
|
46
|
+
if id_column is not None:
|
|
47
|
+
if id_column not in frame.columns:
|
|
48
|
+
raise KeyError(
|
|
49
|
+
f"id column {id_column!r} not found; columns are {frame.columns}"
|
|
50
|
+
)
|
|
51
|
+
return frame, id_column, False
|
|
52
|
+
frame = frame.with_row_index(name=ID_COLUMN_DEFAULT)
|
|
53
|
+
return frame, ID_COLUMN_DEFAULT, True
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def drop_if_synthesized(
|
|
57
|
+
frame: nw.DataFrame, id_column: str, was_synthesized: bool
|
|
58
|
+
) -> nw.DataFrame:
|
|
59
|
+
"""Drop the helper id column if we created it ourselves."""
|
|
60
|
+
if was_synthesized and id_column in frame.columns:
|
|
61
|
+
return frame.drop(id_column)
|
|
62
|
+
return frame
|
ortidy/_scaling.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""Unit-scaling layer for integer-only solvers.
|
|
2
|
+
|
|
3
|
+
OR-Tools' knapsack solver (and CP-SAT) require integer coefficients, but real
|
|
4
|
+
data has floats. We scale floats to ints, solve, and unscale, rather than
|
|
5
|
+
silently truncating floats at the solver boundary.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from collections.abc import Sequence
|
|
11
|
+
|
|
12
|
+
# Default precision: 6 significant fractional digits. Chosen to stay well within
|
|
13
|
+
# 63-bit integer range for realistically-sized coefficients.
|
|
14
|
+
_DEFAULT_MAX_FACTOR = 10**6
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _all_integral(values: Sequence[float]) -> bool:
|
|
18
|
+
return all(float(v).is_integer() for v in values)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def choose_factor(
|
|
22
|
+
values: Sequence[float], *, max_factor: int = _DEFAULT_MAX_FACTOR
|
|
23
|
+
) -> int:
|
|
24
|
+
"""Pick an integer scale factor (a power of ten) for ``values``.
|
|
25
|
+
|
|
26
|
+
Returns ``1`` when every value is already integral, otherwise the smallest
|
|
27
|
+
power of ten (capped at ``max_factor``) that renders the values integral.
|
|
28
|
+
"""
|
|
29
|
+
if not values or _all_integral(values):
|
|
30
|
+
return 1
|
|
31
|
+
factor = 1
|
|
32
|
+
while factor < max_factor:
|
|
33
|
+
factor *= 10
|
|
34
|
+
if _all_integral([v * factor for v in values]):
|
|
35
|
+
return factor
|
|
36
|
+
return max_factor
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def scale_to_int(
|
|
40
|
+
values: Sequence[float], *, factor: int | None = None
|
|
41
|
+
) -> tuple[list[int], int]:
|
|
42
|
+
"""Scale floats to ints.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
values: The float (or int) sequence to scale.
|
|
46
|
+
factor: An explicit scale factor; if ``None`` it is chosen automatically.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
``(scaled_ints, factor)`` where ``scaled_ints[i] == round(values[i] * factor)``.
|
|
50
|
+
"""
|
|
51
|
+
if factor is None:
|
|
52
|
+
factor = choose_factor(values)
|
|
53
|
+
return [round(v * factor) for v in values], factor
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def unscale(value: float, factor: int) -> float:
|
|
57
|
+
"""Invert :func:`scale_to_int` for a single (objective) value."""
|
|
58
|
+
return value / factor if factor != 1 else value
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""Linear sum assignment — assignment-matrix shape.
|
|
2
|
+
|
|
3
|
+
A cost matrix *is* a dataframe: rows are agents, columns are tasks. We assign each
|
|
4
|
+
agent to exactly one task minimizing (or maximizing) total cost, and return the
|
|
5
|
+
input matrix frame with ``assignedTo`` and ``cost`` columns added.
|
|
6
|
+
|
|
7
|
+
Link:
|
|
8
|
+
https://developers.google.com/optimization/assignment/assignment_example
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
import narwhals.stable.v1 as nw
|
|
16
|
+
from ortools.graph.python import linear_sum_assignment
|
|
17
|
+
|
|
18
|
+
from ortidy import _narwhals as _nw
|
|
19
|
+
from ortidy import _scaling, schema
|
|
20
|
+
from ortidy.result import SolveResult, SolveStatus
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def assignment(
|
|
24
|
+
costs: Any,
|
|
25
|
+
*,
|
|
26
|
+
id_column: str | None = None,
|
|
27
|
+
maximize: bool = False,
|
|
28
|
+
assigned_column: str = "assignedTo",
|
|
29
|
+
cost_column: str = "cost",
|
|
30
|
+
) -> SolveResult:
|
|
31
|
+
"""Solve a balanced/over-supplied linear assignment from a cost matrix.
|
|
32
|
+
|
|
33
|
+
Parameters:
|
|
34
|
+
costs: A cost-matrix frame. Each non-id column is a task; each row an agent.
|
|
35
|
+
id_column: Optional column labelling agents (not treated as a task). If
|
|
36
|
+
``None``, agents are positional and the column is not added back.
|
|
37
|
+
maximize: Maximize total value instead of minimizing cost.
|
|
38
|
+
assigned_column: Name of the added column holding each agent's task label.
|
|
39
|
+
cost_column: Name of the added column holding each agent's assignment cost.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
SolveResult whose ``frame`` is the input matrix (same backend) plus the
|
|
43
|
+
assignment and per-agent cost columns, with status and total objective.
|
|
44
|
+
"""
|
|
45
|
+
frame = _nw.to_nw(costs)
|
|
46
|
+
schema.require_nonempty(frame, frame_name="costs")
|
|
47
|
+
if id_column is not None:
|
|
48
|
+
schema.require_columns(frame, {id_column}, frame_name="costs")
|
|
49
|
+
|
|
50
|
+
task_columns = [c for c in frame.columns if c != id_column]
|
|
51
|
+
if not task_columns:
|
|
52
|
+
raise ValueError("costs must have at least one task column.")
|
|
53
|
+
schema.require_numeric(frame, set(task_columns), frame_name="costs")
|
|
54
|
+
|
|
55
|
+
n_agents = frame.shape[0]
|
|
56
|
+
n_tasks = len(task_columns)
|
|
57
|
+
if n_agents > n_tasks:
|
|
58
|
+
raise ValueError(
|
|
59
|
+
f"assignment needs at least as many tasks as agents; got "
|
|
60
|
+
f"{n_agents} agents and {n_tasks} tasks."
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
columns = [_nw.column_to_list(frame, c) for c in task_columns]
|
|
64
|
+
flat = [columns[j][i] for i in range(n_agents) for j in range(n_tasks)]
|
|
65
|
+
_, factor = _scaling.scale_to_int(flat)
|
|
66
|
+
sign = -1 if maximize else 1
|
|
67
|
+
|
|
68
|
+
solver = linear_sum_assignment.SimpleLinearSumAssignment()
|
|
69
|
+
for i in range(n_agents):
|
|
70
|
+
for j in range(n_tasks):
|
|
71
|
+
solver.add_arc_with_cost(i, j, sign * round(columns[j][i] * factor))
|
|
72
|
+
|
|
73
|
+
status = solver.solve()
|
|
74
|
+
if status != solver.OPTIMAL:
|
|
75
|
+
mapped = (
|
|
76
|
+
SolveStatus.INFEASIBLE
|
|
77
|
+
if status == solver.INFEASIBLE
|
|
78
|
+
else SolveStatus.MODEL_INVALID
|
|
79
|
+
)
|
|
80
|
+
return SolveResult(
|
|
81
|
+
frame=_nw.to_native(frame),
|
|
82
|
+
status=mapped,
|
|
83
|
+
objective=None,
|
|
84
|
+
metadata={"solver": "LinearSumAssignment"},
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
assigned = [task_columns[solver.right_mate(i)] for i in range(n_agents)]
|
|
88
|
+
per_agent_cost = [columns[solver.right_mate(i)][i] for i in range(n_agents)]
|
|
89
|
+
objective = sum(per_agent_cost)
|
|
90
|
+
|
|
91
|
+
frame = frame.with_columns(
|
|
92
|
+
nw.new_series(assigned_column, assigned, backend=frame.implementation),
|
|
93
|
+
nw.new_series(cost_column, per_agent_cost, backend=frame.implementation),
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
return SolveResult(
|
|
97
|
+
frame=_nw.to_native(frame),
|
|
98
|
+
status=SolveStatus.OPTIMAL,
|
|
99
|
+
objective=objective,
|
|
100
|
+
metadata={"solver": "LinearSumAssignment"},
|
|
101
|
+
)
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
"""Binning solvers (assignment-matrix shape)."""
|
|
2
|
+
|
|
3
|
+
from ortidy.binning.bin_packing import bin_packing
|
|
4
|
+
from ortidy.binning.knapsack import knapsack
|
|
5
|
+
from ortidy.binning.multi_knapsack import multi_knapsack
|
|
6
|
+
|
|
7
|
+
__all__ = ["knapsack", "multi_knapsack", "bin_packing"]
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""Bin packing — assignment-matrix shape.
|
|
2
|
+
|
|
3
|
+
Packs every item into bins of a common capacity, minimizing the number of bins
|
|
4
|
+
used. Returns the original items frame with a ``binId`` column (contiguously
|
|
5
|
+
numbered from 0) added.
|
|
6
|
+
|
|
7
|
+
Built on CP-SAT — no per-row variable construction.
|
|
8
|
+
|
|
9
|
+
Link:
|
|
10
|
+
https://developers.google.com/optimization/bin/bin_packing
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
import narwhals.stable.v1 as nw
|
|
18
|
+
from ortools.sat.python import cp_model
|
|
19
|
+
|
|
20
|
+
from ortidy import _narwhals as _nw
|
|
21
|
+
from ortidy import _scaling, result, schema
|
|
22
|
+
from ortidy.result import SolveResult
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def bin_packing(
|
|
26
|
+
items: Any,
|
|
27
|
+
capacity: float,
|
|
28
|
+
*,
|
|
29
|
+
weight: str = "weight",
|
|
30
|
+
item_id: str | None = None,
|
|
31
|
+
bin_id: str = "binId",
|
|
32
|
+
time_limit: float | None = None,
|
|
33
|
+
random_seed: int = 0,
|
|
34
|
+
) -> SolveResult:
|
|
35
|
+
"""Solve a bin-packing problem.
|
|
36
|
+
|
|
37
|
+
Parameters:
|
|
38
|
+
items: Frame with a weight column.
|
|
39
|
+
capacity: The (shared) capacity of every bin.
|
|
40
|
+
weight: Name of the weight column.
|
|
41
|
+
item_id: Optional explicit item-id column (synthesized if ``None``).
|
|
42
|
+
bin_id: Name of the bin-assignment column added to the result.
|
|
43
|
+
time_limit: Optional wall-clock limit in seconds.
|
|
44
|
+
random_seed: Solver seed for determinism.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
SolveResult whose ``frame`` is the items frame (same backend) plus a
|
|
48
|
+
``bin_id`` column, with status and objective (number of bins used).
|
|
49
|
+
"""
|
|
50
|
+
frame = _nw.to_nw(items)
|
|
51
|
+
schema.require_nonempty(frame, frame_name="items")
|
|
52
|
+
schema.require_columns(frame, {weight}, frame_name="items")
|
|
53
|
+
schema.require_numeric(frame, {weight}, frame_name="items")
|
|
54
|
+
|
|
55
|
+
frame, id_col, synthesized = _nw.ensure_id_column(frame, item_id)
|
|
56
|
+
weights = _nw.column_to_list(frame, weight)
|
|
57
|
+
|
|
58
|
+
factor = _scaling.choose_factor(list(weights) + [capacity])
|
|
59
|
+
int_weights, _ = _scaling.scale_to_int(weights, factor=factor)
|
|
60
|
+
int_capacity = round(capacity * factor)
|
|
61
|
+
|
|
62
|
+
n = len(weights) # at most one bin per item is ever needed
|
|
63
|
+
model = cp_model.CpModel()
|
|
64
|
+
x = {(i, j): model.new_bool_var(f"x_{i}_{j}") for i in range(n) for j in range(n)}
|
|
65
|
+
y = [model.new_bool_var(f"y_{j}") for j in range(n)]
|
|
66
|
+
|
|
67
|
+
for i in range(n):
|
|
68
|
+
model.add_exactly_one(x[i, j] for j in range(n))
|
|
69
|
+
for j in range(n):
|
|
70
|
+
model.add(
|
|
71
|
+
sum(x[i, j] * int_weights[i] for i in range(n)) <= int_capacity * y[j]
|
|
72
|
+
)
|
|
73
|
+
# Symmetry break: bins fill in order, which also speeds the search.
|
|
74
|
+
for j in range(n - 1):
|
|
75
|
+
model.add(y[j] >= y[j + 1])
|
|
76
|
+
model.minimize(sum(y))
|
|
77
|
+
|
|
78
|
+
solver = cp_model.CpSolver()
|
|
79
|
+
solver.parameters.random_seed = random_seed
|
|
80
|
+
if time_limit is not None:
|
|
81
|
+
solver.parameters.max_time_in_seconds = time_limit
|
|
82
|
+
status = solver.solve(model)
|
|
83
|
+
solve_status = result.from_cp_sat(status)
|
|
84
|
+
|
|
85
|
+
if not solve_status.is_success:
|
|
86
|
+
frame = _nw.drop_if_synthesized(frame, id_col, synthesized)
|
|
87
|
+
return SolveResult(
|
|
88
|
+
frame=_nw.to_native(frame),
|
|
89
|
+
status=solve_status,
|
|
90
|
+
objective=None,
|
|
91
|
+
metadata=_metadata(solver),
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
raw_bin: list[int] = [-1] * n
|
|
95
|
+
for i in range(n):
|
|
96
|
+
for j in range(n):
|
|
97
|
+
if solver.value(x[i, j]) == 1:
|
|
98
|
+
raw_bin[i] = j
|
|
99
|
+
break
|
|
100
|
+
# Renumber used bins contiguously from 0 in order of first appearance.
|
|
101
|
+
remap: dict[int, int] = {}
|
|
102
|
+
contiguous = [remap.setdefault(b, len(remap)) for b in raw_bin]
|
|
103
|
+
|
|
104
|
+
frame = frame.with_columns(
|
|
105
|
+
nw.new_series(bin_id, contiguous, backend=frame.implementation)
|
|
106
|
+
)
|
|
107
|
+
frame = _nw.drop_if_synthesized(frame, id_col, synthesized)
|
|
108
|
+
|
|
109
|
+
return SolveResult(
|
|
110
|
+
frame=_nw.to_native(frame),
|
|
111
|
+
status=solve_status,
|
|
112
|
+
objective=round(solver.objective_value),
|
|
113
|
+
metadata=_metadata(solver),
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _metadata(solver: cp_model.CpSolver) -> dict[str, Any]:
|
|
118
|
+
return {
|
|
119
|
+
"solver": "CP-SAT",
|
|
120
|
+
"wall_time": solver.wall_time,
|
|
121
|
+
"best_objective_bound": solver.best_objective_bound,
|
|
122
|
+
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""0/1 knapsack — assignment-matrix shape.
|
|
2
|
+
|
|
3
|
+
Selects the subset of items maximizing total value subject to a weight capacity.
|
|
4
|
+
Returns the original frame with an ``isIncluded`` boolean column added.
|
|
5
|
+
|
|
6
|
+
Link:
|
|
7
|
+
https://developers.google.com/optimization/bin/knapsack
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
import narwhals.stable.v1 as nw
|
|
15
|
+
from ortools.algorithms.python import knapsack_solver
|
|
16
|
+
|
|
17
|
+
from ortidy import _narwhals as _nw
|
|
18
|
+
from ortidy import _scaling, schema
|
|
19
|
+
from ortidy.result import SolveResult, SolveStatus
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def knapsack(
|
|
23
|
+
items: Any,
|
|
24
|
+
capacity: float,
|
|
25
|
+
*,
|
|
26
|
+
value: str = "value",
|
|
27
|
+
weight: str = "weight",
|
|
28
|
+
item_id: str | None = None,
|
|
29
|
+
assignment_column: str = "isIncluded",
|
|
30
|
+
) -> SolveResult:
|
|
31
|
+
"""Solve a 0/1 knapsack.
|
|
32
|
+
|
|
33
|
+
Parameters:
|
|
34
|
+
items: A dataframe (pandas, Polars, …) with a value and a weight column.
|
|
35
|
+
capacity: The maximum total weight of the knapsack.
|
|
36
|
+
value: Name of the value column. Default ``"value"``.
|
|
37
|
+
weight: Name of the weight column. Default ``"weight"``.
|
|
38
|
+
item_id: Optional explicit row-id column. If ``None``, identity is handled
|
|
39
|
+
internally without mutating the returned frame.
|
|
40
|
+
assignment_column: Name of the added boolean column. Default ``"isIncluded"``.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
SolveResult whose ``frame`` is the input frame (same backend) plus a
|
|
44
|
+
boolean ``assignment_column``, with status and total selected value.
|
|
45
|
+
"""
|
|
46
|
+
frame = _nw.to_nw(items)
|
|
47
|
+
schema.require_nonempty(frame, frame_name="items")
|
|
48
|
+
schema.require_columns(frame, {value, weight}, frame_name="items")
|
|
49
|
+
schema.require_numeric(frame, {value, weight}, frame_name="items")
|
|
50
|
+
|
|
51
|
+
frame, id_col, synthesized = _nw.ensure_id_column(frame, item_id)
|
|
52
|
+
|
|
53
|
+
raw_values = _nw.column_to_list(frame, value)
|
|
54
|
+
raw_weights = _nw.column_to_list(frame, weight)
|
|
55
|
+
|
|
56
|
+
int_values, _ = _scaling.scale_to_int(raw_values)
|
|
57
|
+
int_weights, weight_factor = _scaling.scale_to_int(raw_weights)
|
|
58
|
+
int_capacity = round(capacity * weight_factor)
|
|
59
|
+
|
|
60
|
+
solver = knapsack_solver.KnapsackSolver(
|
|
61
|
+
knapsack_solver.SolverType.KNAPSACK_MULTIDIMENSION_BRANCH_AND_BOUND_SOLVER,
|
|
62
|
+
"ortidy_knapsack",
|
|
63
|
+
)
|
|
64
|
+
solver.init(int_values, [int_weights], [int_capacity])
|
|
65
|
+
solver.solve()
|
|
66
|
+
|
|
67
|
+
included = [solver.best_solution_contains(i) for i in range(len(int_values))]
|
|
68
|
+
objective = sum(v for v, keep in zip(raw_values, included, strict=False) if keep)
|
|
69
|
+
|
|
70
|
+
frame = frame.with_columns(
|
|
71
|
+
nw.new_series(assignment_column, included, backend=frame.implementation)
|
|
72
|
+
)
|
|
73
|
+
frame = _nw.drop_if_synthesized(frame, id_col, synthesized)
|
|
74
|
+
|
|
75
|
+
return SolveResult(
|
|
76
|
+
frame=_nw.to_native(frame),
|
|
77
|
+
status=SolveStatus.OPTIMAL, # branch-and-bound returns the optimum
|
|
78
|
+
objective=objective,
|
|
79
|
+
metadata={"solver": "KNAPSACK_MULTIDIMENSION_BRANCH_AND_BOUND"},
|
|
80
|
+
)
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""Multiple knapsack — assignment-matrix shape.
|
|
2
|
+
|
|
3
|
+
Packs items into capacitated bins to maximize total packed value; each item goes
|
|
4
|
+
into at most one bin. Returns the original items frame with a bin-assignment
|
|
5
|
+
column added (null where an item was left unpacked).
|
|
6
|
+
|
|
7
|
+
Built on CP-SAT — no per-row variable construction.
|
|
8
|
+
|
|
9
|
+
Link:
|
|
10
|
+
https://developers.google.com/optimization/bin/multiple_knapsack
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
import narwhals.stable.v1 as nw
|
|
18
|
+
from ortools.sat.python import cp_model
|
|
19
|
+
|
|
20
|
+
from ortidy import _narwhals as _nw
|
|
21
|
+
from ortidy import _scaling, result, schema
|
|
22
|
+
from ortidy.result import SolveResult
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def multi_knapsack(
|
|
26
|
+
items: Any,
|
|
27
|
+
bins: Any,
|
|
28
|
+
*,
|
|
29
|
+
value: str = "value",
|
|
30
|
+
weight: str = "weight",
|
|
31
|
+
item_id: str | None = None,
|
|
32
|
+
bin_id: str = "binId",
|
|
33
|
+
capacity: str = "capacity",
|
|
34
|
+
time_limit: float | None = None,
|
|
35
|
+
random_seed: int = 0,
|
|
36
|
+
) -> SolveResult:
|
|
37
|
+
"""Solve a multiple-knapsack assignment.
|
|
38
|
+
|
|
39
|
+
Parameters:
|
|
40
|
+
items: Frame with value and weight columns.
|
|
41
|
+
bins: Frame with bin-id and capacity columns.
|
|
42
|
+
value: Name of the item value column.
|
|
43
|
+
weight: Name of the item weight column.
|
|
44
|
+
item_id: Optional explicit item-id column (synthesized if ``None``).
|
|
45
|
+
bin_id: Bin-id column; also names the assignment column added to the
|
|
46
|
+
returned items frame.
|
|
47
|
+
capacity: Bin capacity column.
|
|
48
|
+
time_limit: Optional wall-clock limit in seconds.
|
|
49
|
+
random_seed: Solver seed for determinism.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
SolveResult whose ``frame`` is the items frame (same backend) plus a
|
|
53
|
+
``bin_id`` column (the assigned bin, or null), with status and objective.
|
|
54
|
+
"""
|
|
55
|
+
items_nw = _nw.to_nw(items)
|
|
56
|
+
bins_nw = _nw.to_nw(bins)
|
|
57
|
+
schema.require_nonempty(items_nw, frame_name="items")
|
|
58
|
+
schema.require_nonempty(bins_nw, frame_name="bins")
|
|
59
|
+
schema.require_columns(items_nw, {value, weight}, frame_name="items")
|
|
60
|
+
schema.require_numeric(items_nw, {value, weight}, frame_name="items")
|
|
61
|
+
schema.require_columns(bins_nw, {bin_id, capacity}, frame_name="bins")
|
|
62
|
+
schema.require_numeric(bins_nw, {capacity}, frame_name="bins")
|
|
63
|
+
|
|
64
|
+
items_nw, id_col, synthesized = _nw.ensure_id_column(items_nw, item_id)
|
|
65
|
+
|
|
66
|
+
weights = _nw.column_to_list(items_nw, weight)
|
|
67
|
+
values = _nw.column_to_list(items_nw, value)
|
|
68
|
+
capacities = _nw.column_to_list(bins_nw, capacity)
|
|
69
|
+
bin_ids = _nw.column_to_list(bins_nw, bin_id)
|
|
70
|
+
|
|
71
|
+
# Common integer scale for weights and capacities so the constraints align.
|
|
72
|
+
factor = _scaling.choose_factor(list(weights) + list(capacities))
|
|
73
|
+
int_weights, _ = _scaling.scale_to_int(weights, factor=factor)
|
|
74
|
+
int_caps, _ = _scaling.scale_to_int(capacities, factor=factor)
|
|
75
|
+
int_values, value_factor = _scaling.scale_to_int(values)
|
|
76
|
+
|
|
77
|
+
n, m = len(weights), len(capacities)
|
|
78
|
+
model = cp_model.CpModel()
|
|
79
|
+
x = {(i, j): model.new_bool_var(f"x_{i}_{j}") for i in range(n) for j in range(m)}
|
|
80
|
+
for i in range(n):
|
|
81
|
+
model.add_at_most_one(x[i, j] for j in range(m))
|
|
82
|
+
for j in range(m):
|
|
83
|
+
model.add(sum(x[i, j] * int_weights[i] for i in range(n)) <= int_caps[j])
|
|
84
|
+
model.maximize(sum(x[i, j] * int_values[i] for i in range(n) for j in range(m)))
|
|
85
|
+
|
|
86
|
+
solver = cp_model.CpSolver()
|
|
87
|
+
solver.parameters.random_seed = random_seed
|
|
88
|
+
if time_limit is not None:
|
|
89
|
+
solver.parameters.max_time_in_seconds = time_limit
|
|
90
|
+
status = solver.solve(model)
|
|
91
|
+
solve_status = result.from_cp_sat(status)
|
|
92
|
+
|
|
93
|
+
if not solve_status.is_success:
|
|
94
|
+
items_nw = _nw.drop_if_synthesized(items_nw, id_col, synthesized)
|
|
95
|
+
return SolveResult(
|
|
96
|
+
frame=_nw.to_native(items_nw),
|
|
97
|
+
status=solve_status,
|
|
98
|
+
objective=None,
|
|
99
|
+
metadata=_metadata(solver),
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
assigned: list[Any] = [None] * n
|
|
103
|
+
for i in range(n):
|
|
104
|
+
for j in range(m):
|
|
105
|
+
if solver.value(x[i, j]) == 1:
|
|
106
|
+
assigned[i] = bin_ids[j]
|
|
107
|
+
break
|
|
108
|
+
|
|
109
|
+
items_nw = items_nw.with_columns(
|
|
110
|
+
nw.new_series(bin_id, assigned, backend=items_nw.implementation)
|
|
111
|
+
)
|
|
112
|
+
items_nw = _nw.drop_if_synthesized(items_nw, id_col, synthesized)
|
|
113
|
+
|
|
114
|
+
return SolveResult(
|
|
115
|
+
frame=_nw.to_native(items_nw),
|
|
116
|
+
status=solve_status,
|
|
117
|
+
objective=_scaling.unscale(solver.objective_value, value_factor),
|
|
118
|
+
metadata=_metadata(solver),
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _metadata(solver: cp_model.CpSolver) -> dict[str, Any]:
|
|
123
|
+
return {
|
|
124
|
+
"solver": "CP-SAT",
|
|
125
|
+
"wall_time": solver.wall_time,
|
|
126
|
+
"best_objective_bound": solver.best_objective_bound,
|
|
127
|
+
}
|
ortidy/data/__init__.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
"""Bundled sample datasets for examples, docs, and golden-file tests.
|
|
2
|
+
|
|
3
|
+
Each loader returns a native dataframe in the requested ``backend`` (``"pandas"``
|
|
4
|
+
by default, ``"polars"`` also supported), so the same fixtures drive the
|
|
5
|
+
backend-parity tests.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
_DATA_DIR = os.path.dirname(__file__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def bins(backend: str = "pandas") -> Any:
|
|
17
|
+
return _get_dataset("binning/bins", backend)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def items_knapsack(backend: str = "pandas") -> Any:
|
|
21
|
+
return _get_dataset("binning/items_knapsack", backend)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def items_multi(backend: str = "pandas") -> Any:
|
|
25
|
+
return _get_dataset("binning/items_multi", backend)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def items_bin_packing(backend: str = "pandas") -> Any:
|
|
29
|
+
return _get_dataset("binning/items_bin_packing", backend)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def locations(backend: str = "pandas") -> Any:
|
|
33
|
+
return _get_dataset("routing/locations", backend)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def vehicles(with_capacity: bool = True, backend: str = "pandas") -> Any:
|
|
37
|
+
data = _get_dataset("routing/vehicles", "pandas")
|
|
38
|
+
if not with_capacity:
|
|
39
|
+
data = data.drop(columns=["capacity"])
|
|
40
|
+
return _to_backend(data, backend)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def pickups_and_deliveries(backend: str = "pandas") -> Any:
|
|
44
|
+
return _get_dataset("routing/pickups_and_deliveries", backend)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _get_dataset(name: str, backend: str) -> Any:
|
|
48
|
+
import pandas as pd
|
|
49
|
+
|
|
50
|
+
df = pd.read_csv(os.path.join(_DATA_DIR, f"{name}.csv"))
|
|
51
|
+
return _to_backend(df, backend)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _to_backend(df: Any, backend: str) -> Any:
|
|
55
|
+
if backend == "pandas":
|
|
56
|
+
return df
|
|
57
|
+
if backend == "polars":
|
|
58
|
+
import polars as pl
|
|
59
|
+
|
|
60
|
+
return pl.from_pandas(df)
|
|
61
|
+
raise ValueError(f"Unknown backend {backend!r}; expected 'pandas' or 'polars'.")
|