pyoframe 0.2.1__py3-none-any.whl → 1.0.0a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyoframe/_core.py ADDED
@@ -0,0 +1,2575 @@
1
+ """Defines several core Pyoframe objects including Set, Constraint, Variable, and Expression."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import warnings
6
+ from abc import abstractmethod
7
+ from collections.abc import Iterable, Mapping, Sequence
8
+ from typing import TYPE_CHECKING, Literal, Protocol, Union, overload
9
+
10
+ import pandas as pd
11
+ import polars as pl
12
+ import pyoptinterface as poi
13
+
14
+ from pyoframe._arithmetic import (
15
+ _get_dimensions,
16
+ _simplify_expr_df,
17
+ add,
18
+ multiply,
19
+ )
20
+ from pyoframe._constants import (
21
+ COEF_KEY,
22
+ CONST_TERM,
23
+ CONSTRAINT_KEY,
24
+ DUAL_KEY,
25
+ KEY_TYPE,
26
+ QUAD_VAR_KEY,
27
+ RESERVED_COL_KEYS,
28
+ SOLUTION_KEY,
29
+ VAR_KEY,
30
+ Config,
31
+ ConstraintSense,
32
+ ObjSense,
33
+ PyoframeError,
34
+ UnmatchedStrategy,
35
+ VType,
36
+ VTypeValue,
37
+ )
38
+ from pyoframe._model_element import ModelElement, ModelElementWithId
39
+ from pyoframe._utils import (
40
+ Container,
41
+ FuncArgs,
42
+ cast_coef_to_string,
43
+ concat_dimensions,
44
+ get_obj_repr,
45
+ pairwise,
46
+ parse_inputs_as_iterable,
47
+ return_new,
48
+ unwrap_single_values,
49
+ )
50
+
51
+ if TYPE_CHECKING: # pragma: no cover
52
+ from pyoframe._model import Model
53
+
54
+
55
+ # TODO consider changing this simply to a type and having a helper "Expression.from(object)"
56
+ class SupportsToExpr(Protocol):
57
+ """Protocol for any object that can be converted to a Pyoframe [Expression][pyoframe.Expression]."""
58
+
59
+ def to_expr(self) -> Expression:
60
+ """Converts the object to a Pyoframe [Expression][pyoframe.Expression]."""
61
+ ...
62
+
63
+
64
+ class SupportsMath(ModelElement, SupportsToExpr):
65
+ """Any object that can be converted into an expression."""
66
+
67
+ def __init__(self, *args, **kwargs):
68
+ self._unmatched_strategy = UnmatchedStrategy.UNSET
69
+ self._allowed_new_dims: list[str] = []
70
+ super().__init__(*args, **kwargs)
71
+
72
+ @abstractmethod
73
+ def _new(self, data: pl.DataFrame, name: str) -> SupportsMath:
74
+ """Helper method to create a new instance of the same (or for Variable derivative) class."""
75
+
76
+ def _copy_flags(self, other: SupportsMath):
77
+ """Copies the flags from another SupportsMath object."""
78
+ self._unmatched_strategy = other._unmatched_strategy
79
+ self._allowed_new_dims = other._allowed_new_dims.copy()
80
+
81
+ def keep_unmatched(self):
82
+ """Indicates that all rows should be kept during addition or subtraction, even if they are not matched in the other expression."""
83
+ new = self._new(self.data, name=f"{self.name}.keep_unmatched()")
84
+ new._copy_flags(self)
85
+ new._unmatched_strategy = UnmatchedStrategy.KEEP
86
+ return new
87
+
88
+ def drop_unmatched(self):
89
+ """Indicates that rows that are not matched in the other expression during addition or subtraction should be dropped."""
90
+ new = self._new(self.data, name=f"{self.name}.drop_unmatched()")
91
+ new._copy_flags(self)
92
+ new._unmatched_strategy = UnmatchedStrategy.DROP
93
+ return new
94
+
95
+ def over(self, *dims: str):
96
+ """Indicates that the expression can be broadcasted over the given dimensions during addition and subtraction."""
97
+ new = self._new(self.data, name=f"{self.name}.over(…)")
98
+ new._copy_flags(self)
99
+ new._allowed_new_dims.extend(dims)
100
+ return new
101
+
102
+ @return_new
103
+ def rename(self, *args, **kwargs):
104
+ """Renames one or several of the object's dimensions.
105
+
106
+ Takes the same arguments as [`polars.DataFrame.rename`](https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.rename.html).
107
+
108
+ See the [portfolio optimization example](../examples/portfolio_optimization.md) for a usage example.
109
+
110
+ Examples:
111
+ >>> m = pf.Model()
112
+ >>> m.v = pf.Variable(
113
+ ... {"hour": ["00:00", "06:00", "12:00", "18:00"]},
114
+ ... {"city": ["Toronto", "Berlin", "Paris"]},
115
+ ... )
116
+ >>> m.v
117
+ <Variable 'v' height=12>
118
+ ┌───────┬─────────┬──────────────────┐
119
+ │ hour ┆ city ┆ variable │
120
+ │ (4) ┆ (3) ┆ │
121
+ ╞═══════╪═════════╪══════════════════╡
122
+ │ 00:00 ┆ Toronto ┆ v[00:00,Toronto] │
123
+ │ 00:00 ┆ Berlin ┆ v[00:00,Berlin] │
124
+ │ 00:00 ┆ Paris ┆ v[00:00,Paris] │
125
+ │ 06:00 ┆ Toronto ┆ v[06:00,Toronto] │
126
+ │ 06:00 ┆ Berlin ┆ v[06:00,Berlin] │
127
+ │ … ┆ … ┆ … │
128
+ │ 12:00 ┆ Berlin ┆ v[12:00,Berlin] │
129
+ │ 12:00 ┆ Paris ┆ v[12:00,Paris] │
130
+ │ 18:00 ┆ Toronto ┆ v[18:00,Toronto] │
131
+ │ 18:00 ┆ Berlin ┆ v[18:00,Berlin] │
132
+ │ 18:00 ┆ Paris ┆ v[18:00,Paris] │
133
+ └───────┴─────────┴──────────────────┘
134
+
135
+ >>> m.v.rename({"city": "location"})
136
+ <Expression height=12 terms=12 type=linear>
137
+ ┌───────┬──────────┬──────────────────┐
138
+ │ hour ┆ location ┆ expression │
139
+ │ (4) ┆ (3) ┆ │
140
+ ╞═══════╪══════════╪══════════════════╡
141
+ │ 00:00 ┆ Toronto ┆ v[00:00,Toronto] │
142
+ │ 00:00 ┆ Berlin ┆ v[00:00,Berlin] │
143
+ │ 00:00 ┆ Paris ┆ v[00:00,Paris] │
144
+ │ 06:00 ┆ Toronto ┆ v[06:00,Toronto] │
145
+ │ 06:00 ┆ Berlin ┆ v[06:00,Berlin] │
146
+ │ … ┆ … ┆ … │
147
+ │ 12:00 ┆ Berlin ┆ v[12:00,Berlin] │
148
+ │ 12:00 ┆ Paris ┆ v[12:00,Paris] │
149
+ │ 18:00 ┆ Toronto ┆ v[18:00,Toronto] │
150
+ │ 18:00 ┆ Berlin ┆ v[18:00,Berlin] │
151
+ │ 18:00 ┆ Paris ┆ v[18:00,Paris] │
152
+ └───────┴──────────┴──────────────────┘
153
+
154
+ """
155
+ return self.data.rename(*args, **kwargs)
156
+
157
+ @return_new
158
+ def with_columns(self, *args, **kwargs):
159
+ """Creates a new object with modified columns.
160
+
161
+ Takes the same arguments as [`polars.DataFrame.with_columns`](https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.with_columns.html).
162
+
163
+ !!! warning
164
+ Only use this function if you know what you're doing. It is not recommended to manually modify the columns
165
+ within a Pyoframe object.
166
+ """
167
+ return self.data.with_columns(*args, **kwargs)
168
+
169
+ @return_new
170
+ def filter(self, *args, **kwargs):
171
+ """Creates a copy of the object containing only a subset of the original rows.
172
+
173
+ Takes the same arguments as [`polars.DataFrame.filter`](https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.filter.html).
174
+
175
+ See Also:
176
+ [`Expression.pick`][pyoframe.Expression.pick] or [`Variable.pick`][pyoframe.Variable.pick] if you wish to drop the filtered
177
+ column in the process.
178
+
179
+ """
180
+ return self.data.filter(*args, **kwargs)
181
+
182
+ @return_new
183
+ def pick(self, **kwargs):
184
+ """Filters elements by the given criteria and then drops the filtered dimensions.
185
+
186
+ Examples:
187
+ >>> m = pf.Model()
188
+ >>> m.v = pf.Variable(
189
+ ... [
190
+ ... {"hour": ["00:00", "06:00", "12:00", "18:00"]},
191
+ ... {"city": ["Toronto", "Berlin", "Paris"]},
192
+ ... ]
193
+ ... )
194
+ >>> m.v.pick(hour="06:00")
195
+ <Expression height=3 terms=3 type=linear>
196
+ ┌─────────┬──────────────────┐
197
+ │ city ┆ expression │
198
+ │ (3) ┆ │
199
+ ╞═════════╪══════════════════╡
200
+ │ Toronto ┆ v[06:00,Toronto] │
201
+ │ Berlin ┆ v[06:00,Berlin] │
202
+ │ Paris ┆ v[06:00,Paris] │
203
+ └─────────┴──────────────────┘
204
+ >>> m.v.pick(hour="06:00", city="Toronto")
205
+ <Expression terms=1 type=linear>
206
+ v[06:00,Toronto]
207
+
208
+ See Also:
209
+ [`Expression.filter`][pyoframe.Expression.filter] or [`Variable.filter`][pyoframe.Variable.filter] if you don't wish to drop the filtered column.
210
+ """
211
+ return self.data.filter(**kwargs).drop(kwargs.keys())
212
+
213
+ def _add_allowed_new_dims_to_df(self, df):
214
+ cols = df.columns
215
+ df = df.with_columns(*(pl.lit("*").alias(c) for c in self._allowed_new_dims))
216
+ df = df.select(cols[:-1] + self._allowed_new_dims + [cols[-1]]) # reorder
217
+ return df
218
+
219
+ def add_dim(self, *dims: str): # pragma: no cover
220
+ """Deprecated, use [`over`][pyoframe.Expression.over] instead."""
221
+ warnings.warn(
222
+ "'add_dim' has been renamed to 'over'. Please use 'over' instead.",
223
+ DeprecationWarning,
224
+ )
225
+ return self.over(*dims)
226
+
227
+ @abstractmethod
228
+ def to_expr(self) -> Expression:
229
+ """Converts the object to a Pyoframe Expression."""
230
+ ...
231
+
232
+ def sum(self, *args, **kwargs):
233
+ """Converts the object to an expression (see `.to_expr()`) and then applies [`Expression.sum`][pyoframe.Expression.sum]."""
234
+ return self.to_expr().sum(*args, **kwargs)
235
+
236
+ def sum_by(self, *args, **kwargs):
237
+ """Converts the object to an expression (see `.to_expr()`) and then applies [`Expression.sum_by`][pyoframe.Expression.sum_by]."""
238
+ return self.to_expr().sum_by(*args, **kwargs)
239
+
240
+ def map(self, *args, **kwargs):
241
+ """Converts the object to an expression (see `.to_expr()`) and then applies [`Expression.map`][pyoframe.Expression.map]."""
242
+ return self.to_expr().map(*args, **kwargs)
243
+
244
+ def __add__(self, *args, **kwargs):
245
+ return self.to_expr().__add__(*args, **kwargs)
246
+
247
+ def __mul__(self, *args, **kwargs):
248
+ return self.to_expr().__mul__(*args, **kwargs)
249
+
250
+ def __pow__(self, power: int):
251
+ """Supports squaring expressions.
252
+
253
+ Examples:
254
+ >>> m = pf.Model()
255
+ >>> m.v = pf.Variable()
256
+ >>> m.v**2
257
+ <Expression terms=1 type=quadratic>
258
+ v * v
259
+ >>> m.v**3
260
+ Traceback (most recent call last):
261
+ ...
262
+ ValueError: Raising an expressions to **3 is not supported. Expressions can only be squared (**2).
263
+ """
264
+ if power == 2:
265
+ res = self * self
266
+ res.name = f"({self.name}**2)"
267
+ return res
268
+ raise ValueError(
269
+ f"Raising an expressions to **{power} is not supported. Expressions can only be squared (**2)."
270
+ )
271
+
272
+ def __neg__(self):
273
+ res = self.to_expr() * -1
274
+ res.name = f"-{self.name}"
275
+ res._copy_flags(self)
276
+ return res
277
+
278
+ def __sub__(self, other):
279
+ """Subtracts a value from this Expression.
280
+
281
+ Examples:
282
+ >>> import polars as pl
283
+ >>> m = pf.Model()
284
+ >>> df = pl.DataFrame({"dim1": [1, 2, 3], "value": [1, 2, 3]})
285
+ >>> m.v = pf.Variable(df["dim1"])
286
+ >>> m.v - df
287
+ <Expression height=3 terms=6 type=linear>
288
+ ┌──────┬────────────┐
289
+ │ dim1 ┆ expression │
290
+ │ (3) ┆ │
291
+ ╞══════╪════════════╡
292
+ │ 1 ┆ v[1] -1 │
293
+ │ 2 ┆ v[2] -2 │
294
+ │ 3 ┆ v[3] -3 │
295
+ └──────┴────────────┘
296
+ """
297
+ if not isinstance(other, (int, float)):
298
+ other = other.to_expr()
299
+ return self.to_expr() + (-other)
300
+
301
+ def __rmul__(self, other):
302
+ return self.to_expr() * other
303
+
304
+ def __radd__(self, other):
305
+ return self.to_expr() + other
306
+
307
+ def __truediv__(self, other):
308
+ """Divides this expression.
309
+
310
+ Examples:
311
+ Support division.
312
+ >>> m = pf.Model()
313
+ >>> m.v = Variable({"dim1": [1, 2, 3]})
314
+ >>> m.v / 2
315
+ <Expression height=3 terms=3 type=linear>
316
+ ┌──────┬────────────┐
317
+ │ dim1 ┆ expression │
318
+ │ (3) ┆ │
319
+ ╞══════╪════════════╡
320
+ │ 1 ┆ 0.5 v[1] │
321
+ │ 2 ┆ 0.5 v[2] │
322
+ │ 3 ┆ 0.5 v[3] │
323
+ └──────┴────────────┘
324
+ """
325
+ return self.to_expr() * (1 / other)
326
+
327
+ def __rsub__(self, other):
328
+ """Supports right subtraction.
329
+
330
+ Examples:
331
+ >>> m = pf.Model()
332
+ >>> m.v = Variable({"dim1": [1, 2, 3]})
333
+ >>> 1 - m.v
334
+ <Expression height=3 terms=6 type=linear>
335
+ ┌──────┬────────────┐
336
+ │ dim1 ┆ expression │
337
+ │ (3) ┆ │
338
+ ╞══════╪════════════╡
339
+ │ 1 ┆ 1 - v[1] │
340
+ │ 2 ┆ 1 - v[2] │
341
+ │ 3 ┆ 1 - v[3] │
342
+ └──────┴────────────┘
343
+ """
344
+ return other + (-self.to_expr())
345
+
346
+ def __le__(self, other):
347
+ """Equality constraint.
348
+
349
+ Examples:
350
+ >>> m = pf.Model()
351
+ >>> m.v = pf.Variable()
352
+ >>> m.v <= 1
353
+ <Constraint 'unnamed' terms=2 type=linear>
354
+ v <= 1
355
+ """
356
+ return Constraint(self - other, ConstraintSense.LE)
357
+
358
+ def __ge__(self, other):
359
+ """Equality constraint.
360
+
361
+ Examples:
362
+ >>> m = pf.Model()
363
+ >>> m.v = pf.Variable()
364
+ >>> m.v >= 1
365
+ <Constraint 'unnamed' terms=2 type=linear>
366
+ v >= 1
367
+ """
368
+ return Constraint(self - other, ConstraintSense.GE)
369
+
370
+ def __eq__(self, value: object): # type: ignore
371
+ """Equality constraint.
372
+
373
+ Examples:
374
+ >>> m = pf.Model()
375
+ >>> m.v = pf.Variable()
376
+ >>> m.v == 1
377
+ <Constraint 'unnamed' terms=2 type=linear>
378
+ v = 1
379
+ """
380
+ return Constraint(self - value, ConstraintSense.EQ)
381
+
382
+
383
+ SetTypes = Union[
384
+ pl.DataFrame,
385
+ pd.Index,
386
+ pd.DataFrame,
387
+ SupportsMath,
388
+ Mapping[str, Sequence[object]],
389
+ "Set",
390
+ "Constraint",
391
+ ]
392
+
393
+
394
+ class Set(SupportsMath):
395
+ """A set which can then be used to index variables.
396
+
397
+ Examples:
398
+ >>> pf.Set(x=range(2), y=range(3))
399
+ <Set 'unnamed' height=6>
400
+ ┌─────┬─────┐
401
+ │ x ┆ y │
402
+ │ (2) ┆ (3) │
403
+ ╞═════╪═════╡
404
+ │ 0 ┆ 0 │
405
+ │ 0 ┆ 1 │
406
+ │ 0 ┆ 2 │
407
+ │ 1 ┆ 0 │
408
+ │ 1 ┆ 1 │
409
+ │ 1 ┆ 2 │
410
+ └─────┴─────┘
411
+ """
412
+
413
+ def __init__(self, *data: SetTypes | Iterable[SetTypes], **named_data):
414
+ data_list = list(data)
415
+ for name, set in named_data.items():
416
+ data_list.append({name: set})
417
+ df = self._parse_acceptable_sets(*data_list)
418
+ if not df.is_empty() and df.is_duplicated().any():
419
+ raise ValueError("Duplicate rows found in input data.")
420
+ super().__init__(df, name="unnamed_set")
421
+
422
+ def _new(self, data: pl.DataFrame, name: str) -> Set:
423
+ s = Set(data)
424
+ s.name = name
425
+ s._model = self._model
426
+ return s
427
+
428
+ @staticmethod
429
+ def _parse_acceptable_sets(
430
+ *over: SetTypes | Iterable[SetTypes],
431
+ ) -> pl.DataFrame:
432
+ """Computes the cartesian product of the given sets.
433
+
434
+ Examples:
435
+ >>> import pandas as pd
436
+ >>> dim1 = pd.Index([1, 2, 3], name="dim1")
437
+ >>> dim2 = pd.Index(["a", "b"], name="dim1")
438
+ >>> Set._parse_acceptable_sets([dim1, dim2])
439
+ Traceback (most recent call last):
440
+ ...
441
+ AssertionError: Dimension 'dim1' is not unique.
442
+ >>> dim2.name = "dim2"
443
+ >>> Set._parse_acceptable_sets([dim1, dim2])
444
+ shape: (6, 2)
445
+ ┌──────┬──────┐
446
+ │ dim1 ┆ dim2 │
447
+ │ --- ┆ --- │
448
+ │ i64 ┆ str │
449
+ ╞══════╪══════╡
450
+ │ 1 ┆ a │
451
+ │ 1 ┆ b │
452
+ │ 2 ┆ a │
453
+ │ 2 ┆ b │
454
+ │ 3 ┆ a │
455
+ │ 3 ┆ b │
456
+ └──────┴──────┘
457
+ """
458
+ assert len(over) > 0, "At least one set must be provided."
459
+ over_iter: Iterable[SetTypes] = parse_inputs_as_iterable(*over)
460
+
461
+ over_frames: list[pl.DataFrame] = [Set._set_to_polars(set) for set in over_iter]
462
+
463
+ over_merged = over_frames[0]
464
+
465
+ for df in over_frames[1:]:
466
+ overlap_dims = set(over_merged.columns) & set(df.columns)
467
+ assert not overlap_dims, (
468
+ f"Dimension '{tuple(overlap_dims)[0]}' is not unique."
469
+ )
470
+ over_merged = over_merged.join(df, how="cross")
471
+ return over_merged
472
+
473
+ def to_expr(self) -> Expression:
474
+ """Converts the Set to an Expression equal to 1 for each index.
475
+
476
+ Useful when multiplying a Set by an Expression.
477
+ """
478
+ return Expression(
479
+ self.data.with_columns(
480
+ pl.lit(1).alias(COEF_KEY), pl.lit(CONST_TERM).alias(VAR_KEY)
481
+ ),
482
+ name=self.name,
483
+ )
484
+
485
+ def __mul__(self, other):
486
+ if isinstance(other, Set):
487
+ overlap_dims = set(self.data.columns) & set(other.data.columns)
488
+ assert not overlap_dims, (
489
+ f"Cannot multiply the two sets because dimension '{tuple(overlap_dims)[0]}' is present in both sets."
490
+ )
491
+ return Set(self.data, other.data)
492
+ return super().__mul__(other)
493
+
494
+ def __add__(self, other):
495
+ # TODO replace with bitwise or
496
+ if isinstance(other, Set):
497
+ try:
498
+ return self._new(
499
+ pl.concat([self.data, other.data]).unique(
500
+ maintain_order=Config.maintain_order
501
+ ),
502
+ name=f"({self.name} + {other.name})",
503
+ )
504
+ except pl.exceptions.ShapeError as e:
505
+ if "unable to vstack, column names don't match" in str(e):
506
+ raise PyoframeError(
507
+ f"Failed to add sets '{self.name}' and '{other.name}' because dimensions do not match ({self.dimensions} != {other.dimensions}) "
508
+ ) from e
509
+ raise e # pragma: no cover
510
+
511
+ return super().__add__(other)
512
+
513
+ def __repr__(self):
514
+ header = get_obj_repr(
515
+ self,
516
+ "unnamed" if self.name == "unnamed_set" else self.name,
517
+ height=self.data.height,
518
+ )
519
+ data = self._add_shape_to_columns(self.data)
520
+ data = self._add_allowed_new_dims_to_df(data)
521
+ with Config.print_polars_config:
522
+ table = repr(data)
523
+
524
+ return header + "\n" + table
525
+
526
+ @staticmethod
527
+ def _set_to_polars(set: SetTypes) -> pl.DataFrame:
528
+ if isinstance(set, dict):
529
+ df = pl.DataFrame(set)
530
+ elif isinstance(set, Constraint):
531
+ df = set.data.select(set._dimensions_unsafe)
532
+ elif isinstance(set, SupportsMath):
533
+ df = (
534
+ set.to_expr()
535
+ .data.drop(RESERVED_COL_KEYS, strict=False)
536
+ .unique(maintain_order=Config.maintain_order)
537
+ )
538
+ elif isinstance(set, pd.Index):
539
+ df = pl.from_pandas(pd.DataFrame(index=set).reset_index())
540
+ elif isinstance(set, pd.DataFrame):
541
+ df = pl.from_pandas(set)
542
+ elif isinstance(set, pl.DataFrame):
543
+ df = set
544
+ elif isinstance(set, pl.Series):
545
+ df = set.to_frame()
546
+ elif isinstance(set, pd.Series):
547
+ if not set.name:
548
+ raise ValueError("Cannot convert an unnamed Pandas Series to a Set.")
549
+ df = pl.from_pandas(set).to_frame()
550
+ elif isinstance(set, Set):
551
+ df = set.data
552
+ elif isinstance(set, range):
553
+ raise ValueError(
554
+ "Cannot convert a range to a set without a dimension name. Try Set(dim_name=range(...))"
555
+ )
556
+ else:
557
+ raise ValueError(f"Cannot convert type {type(set)} to a polars DataFrame")
558
+
559
+ if "index" in df.columns:
560
+ raise ValueError(
561
+ "Please specify a custom dimension name rather than using 'index' to avoid confusion."
562
+ )
563
+
564
+ for reserved_key in RESERVED_COL_KEYS:
565
+ if reserved_key in df.columns:
566
+ raise ValueError(
567
+ f"Cannot use reserved column names {reserved_key} as dimensions."
568
+ )
569
+
570
+ return df
571
+
572
+
573
+ class Expression(SupportsMath):
574
+ """Represents a linear or quadratic mathematical expression.
575
+
576
+ Examples:
577
+ >>> import pandas as pd
578
+ >>> df = pd.DataFrame(
579
+ ... {
580
+ ... "item": [1, 1, 1, 2, 2],
581
+ ... "time": ["mon", "tue", "wed", "mon", "tue"],
582
+ ... "cost": [1, 2, 3, 4, 5],
583
+ ... }
584
+ ... ).set_index(["item", "time"])
585
+ >>> m = pf.Model()
586
+ >>> m.Time = pf.Variable(df.index)
587
+ >>> m.Size = pf.Variable(df.index)
588
+ >>> expr = df["cost"] * m.Time + df["cost"] * m.Size
589
+ >>> expr
590
+ <Expression height=5 terms=10 type=linear>
591
+ ┌──────┬──────┬──────────────────────────────┐
592
+ │ item ┆ time ┆ expression │
593
+ │ (2) ┆ (3) ┆ │
594
+ ╞══════╪══════╪══════════════════════════════╡
595
+ │ 1 ┆ mon ┆ Time[1,mon] + Size[1,mon] │
596
+ │ 1 ┆ tue ┆ 2 Time[1,tue] +2 Size[1,tue] │
597
+ │ 1 ┆ wed ┆ 3 Time[1,wed] +3 Size[1,wed] │
598
+ │ 2 ┆ mon ┆ 4 Time[2,mon] +4 Size[2,mon] │
599
+ │ 2 ┆ tue ┆ 5 Time[2,tue] +5 Size[2,tue] │
600
+ └──────┴──────┴──────────────────────────────┘
601
+ """
602
+
603
+ def __init__(self, data: pl.DataFrame, name: str | None = None):
604
+ # Sanity checks, VAR_KEY and COEF_KEY must be present
605
+ assert VAR_KEY in data.columns, "Missing variable column."
606
+ assert COEF_KEY in data.columns, "Missing coefficient column."
607
+
608
+ # Sanity check no duplicates indices
609
+ if Config.enable_is_duplicated_expression_safety_check:
610
+ duplicated_mask = data.drop(COEF_KEY).is_duplicated()
611
+ # In theory this should never happen unless there's a bug in the library
612
+ if duplicated_mask.any():
613
+ duplicated_data = data.filter(duplicated_mask)
614
+ raise ValueError(
615
+ f"Cannot create an expression with duplicate indices:\n{duplicated_data}."
616
+ )
617
+
618
+ data = _simplify_expr_df(data)
619
+
620
+ if name is None:
621
+ warnings.warn(
622
+ "Expression should be given a name to support troubleshooting.",
623
+ UserWarning,
624
+ )
625
+
626
+ super().__init__(data)
627
+ else:
628
+ super().__init__(data, name=name)
629
+
630
+ @classmethod
631
+ def constant(cls, constant: int | float) -> Expression:
632
+ """Creates a new expression equal to the given constant.
633
+
634
+ Examples:
635
+ >>> pf.Expression.constant(5)
636
+ <Expression terms=1 type=constant>
637
+ 5
638
+ """
639
+ return cls(
640
+ pl.DataFrame(
641
+ {
642
+ COEF_KEY: [constant],
643
+ VAR_KEY: [CONST_TERM],
644
+ },
645
+ schema={COEF_KEY: pl.Float64, VAR_KEY: KEY_TYPE},
646
+ ),
647
+ name=str(constant),
648
+ )
649
+
650
+ @return_new
651
+ def sum(self, *over: str):
652
+ """Sums an expression over specified dimensions.
653
+
654
+ If no dimensions are specified, the sum is taken over all of the expression's dimensions.
655
+
656
+ Examples:
657
+ >>> expr = pl.DataFrame(
658
+ ... {
659
+ ... "time": ["mon", "tue", "wed", "mon", "tue"],
660
+ ... "place": [
661
+ ... "Toronto",
662
+ ... "Toronto",
663
+ ... "Toronto",
664
+ ... "Vancouver",
665
+ ... "Vancouver",
666
+ ... ],
667
+ ... "tiktok_posts": [1e6, 3e6, 2e6, 1e6, 2e6],
668
+ ... }
669
+ ... ).to_expr()
670
+ >>> expr
671
+ <Expression height=5 terms=5 type=constant>
672
+ ┌──────┬───────────┬────────────┐
673
+ │ time ┆ place ┆ expression │
674
+ │ (3) ┆ (2) ┆ │
675
+ ╞══════╪═══════════╪════════════╡
676
+ │ mon ┆ Toronto ┆ 1000000 │
677
+ │ tue ┆ Toronto ┆ 3000000 │
678
+ │ wed ┆ Toronto ┆ 2000000 │
679
+ │ mon ┆ Vancouver ┆ 1000000 │
680
+ │ tue ┆ Vancouver ┆ 2000000 │
681
+ └──────┴───────────┴────────────┘
682
+ >>> expr.sum("time")
683
+ <Expression height=2 terms=2 type=constant>
684
+ ┌───────────┬────────────┐
685
+ │ place ┆ expression │
686
+ │ (2) ┆ │
687
+ ╞═══════════╪════════════╡
688
+ │ Toronto ┆ 6000000 │
689
+ │ Vancouver ┆ 3000000 │
690
+ └───────────┴────────────┘
691
+ >>> expr.sum()
692
+ <Expression terms=1 type=constant>
693
+ 9000000
694
+
695
+ If the given dimensions don't exist, an error will be raised:
696
+
697
+ >>> expr.sum("city")
698
+ Traceback (most recent call last):
699
+ ...
700
+ AssertionError: Cannot sum over ['city'] as it is not in ['time', 'place']
701
+
702
+ See Also:
703
+ [pyoframe.Expression.sum_by][] for summing over all dimensions _except_ those that are specified.
704
+ """
705
+ dims = self.dimensions
706
+ if dims is None:
707
+ raise ValueError("Cannot sum a dimensionless expression.")
708
+ if not over:
709
+ over = tuple(dims)
710
+ assert set(over) <= set(dims), (
711
+ f"Cannot sum over {list(over)} as it is not in {dims}"
712
+ )
713
+ remaining_dims = [dim for dim in dims if dim not in over]
714
+
715
+ return (
716
+ self.data.drop(over)
717
+ .group_by(
718
+ remaining_dims + self._variable_columns,
719
+ maintain_order=Config.maintain_order,
720
+ )
721
+ .sum()
722
+ )
723
+
724
+ def sum_by(self, *by: str):
725
+ """Like [`Expression.sum`][pyoframe.Expression.sum], but the sum is taken over all dimensions *except* those specified in `by` (just like a `group_by().sum()` operation).
726
+
727
+ Examples:
728
+ >>> expr = pl.DataFrame(
729
+ ... {
730
+ ... "time": ["mon", "tue", "wed", "mon", "tue"],
731
+ ... "place": [
732
+ ... "Toronto",
733
+ ... "Toronto",
734
+ ... "Toronto",
735
+ ... "Vancouver",
736
+ ... "Vancouver",
737
+ ... ],
738
+ ... "tiktok_posts": [1e6, 3e6, 2e6, 1e6, 2e6],
739
+ ... }
740
+ ... ).to_expr()
741
+ >>> expr
742
+ <Expression height=5 terms=5 type=constant>
743
+ ┌──────┬───────────┬────────────┐
744
+ │ time ┆ place ┆ expression │
745
+ │ (3) ┆ (2) ┆ │
746
+ ╞══════╪═══════════╪════════════╡
747
+ │ mon ┆ Toronto ┆ 1000000 │
748
+ │ tue ┆ Toronto ┆ 3000000 │
749
+ │ wed ┆ Toronto ┆ 2000000 │
750
+ │ mon ┆ Vancouver ┆ 1000000 │
751
+ │ tue ┆ Vancouver ┆ 2000000 │
752
+ └──────┴───────────┴────────────┘
753
+
754
+ >>> expr.sum_by("place")
755
+ <Expression height=2 terms=2 type=constant>
756
+ ┌───────────┬────────────┐
757
+ │ place ┆ expression │
758
+ │ (2) ┆ │
759
+ ╞═══════════╪════════════╡
760
+ │ Toronto ┆ 6000000 │
761
+ │ Vancouver ┆ 3000000 │
762
+ └───────────┴────────────┘
763
+
764
+ If the specified dimensions don't exist, an error will be raised:
765
+
766
+ >>> expr.sum_by("city")
767
+ Traceback (most recent call last):
768
+ ...
769
+ ValueError: Cannot sum by ['city'] because it is not a valid dimension. The expression's dimensions are: ['time', 'place'].
770
+
771
+ >>> total_sum = expr.sum()
772
+ >>> total_sum.sum_by("time")
773
+ Traceback (most recent call last):
774
+ ...
775
+ ValueError: Cannot sum a dimensionless expression.
776
+
777
+ See Also:
778
+ [pyoframe.Expression.sum][] for summing over specified dimensions.
779
+ """
780
+ if not by:
781
+ raise ValueError("sum_by requires at least 1 argument.")
782
+ dims = self.dimensions
783
+ if dims is None:
784
+ raise ValueError("Cannot sum a dimensionless expression.")
785
+ if not set(by) <= set(dims):
786
+ raise ValueError(
787
+ f"Cannot sum by {list(set(by) - set(dims))} because it is not a valid dimension. The expression's dimensions are: {list(dims)}."
788
+ )
789
+ remaining_dims = [dim for dim in dims if dim not in by]
790
+ return self.sum(*remaining_dims)
791
+
792
+ @property
793
+ def _variable_columns(self) -> list[str]:
794
+ if self.is_quadratic:
795
+ return [VAR_KEY, QUAD_VAR_KEY]
796
+ else:
797
+ return [VAR_KEY]
798
+
799
+ def map(self, mapping_set: SetTypes, drop_shared_dims: bool = True) -> Expression:
800
+ """Replaces the dimensions that are shared with mapping_set with the other dimensions found in mapping_set.
801
+
802
+ This is particularly useful to go from one type of dimensions to another. For example, to convert data that
803
+ is indexed by city to data indexed by country (see example).
804
+
805
+ Parameters:
806
+ mapping_set:
807
+ The set to map the expression to. This can be a DataFrame, Index, or another Set.
808
+ drop_shared_dims:
809
+ If `True`, the dimensions shared between the expression and the mapping set are dropped from the resulting expression and
810
+ repeated rows are summed.
811
+ If `False`, the shared dimensions are kept in the resulting expression.
812
+
813
+ Returns:
814
+ A new Expression containing the result of the mapping operation.
815
+
816
+ Examples:
817
+ >>> import polars as pl
818
+ >>> pop_data = pl.DataFrame(
819
+ ... {
820
+ ... "city": ["Toronto", "Vancouver", "Boston"],
821
+ ... "year": [2024, 2024, 2024],
822
+ ... "population": [10, 2, 8],
823
+ ... }
824
+ ... ).to_expr()
825
+ >>> cities_and_countries = pl.DataFrame(
826
+ ... {
827
+ ... "city": ["Toronto", "Vancouver", "Boston"],
828
+ ... "country": ["Canada", "Canada", "USA"],
829
+ ... }
830
+ ... )
831
+ >>> pop_data.map(cities_and_countries)
832
+ <Expression height=2 terms=2 type=constant>
833
+ ┌──────┬─────────┬────────────┐
834
+ │ year ┆ country ┆ expression │
835
+ │ (1) ┆ (2) ┆ │
836
+ ╞══════╪═════════╪════════════╡
837
+ │ 2024 ┆ Canada ┆ 12 │
838
+ │ 2024 ┆ USA ┆ 8 │
839
+ └──────┴─────────┴────────────┘
840
+
841
+ >>> pop_data.map(cities_and_countries, drop_shared_dims=False)
842
+ <Expression height=3 terms=3 type=constant>
843
+ ┌───────────┬──────┬─────────┬────────────┐
844
+ │ city ┆ year ┆ country ┆ expression │
845
+ │ (3) ┆ (1) ┆ (2) ┆ │
846
+ ╞═══════════╪══════╪═════════╪════════════╡
847
+ │ Toronto ┆ 2024 ┆ Canada ┆ 10 │
848
+ │ Vancouver ┆ 2024 ┆ Canada ┆ 2 │
849
+ │ Boston ┆ 2024 ┆ USA ┆ 8 │
850
+ └───────────┴──────┴─────────┴────────────┘
851
+ """
852
+ mapping_set = Set(mapping_set)
853
+
854
+ dims = self.dimensions
855
+ if dims is None:
856
+ raise ValueError("Cannot use .map() on an expression with no dimensions.")
857
+
858
+ mapping_dims = mapping_set.dimensions
859
+ if mapping_dims is None:
860
+ raise ValueError(
861
+ "Cannot use .map() with a mapping set containing no dimensions."
862
+ )
863
+
864
+ shared_dims = [dim for dim in dims if dim in mapping_dims]
865
+ if not shared_dims:
866
+ raise ValueError(
867
+ f"Cannot apply .map() as there are no shared dimensions between the expression (dims={self.dimensions}) and the mapping set (dims={mapping_set.dimensions})."
868
+ )
869
+
870
+ mapped_expression = self * mapping_set
871
+
872
+ if drop_shared_dims:
873
+ mapped_expression = mapped_expression.sum(*shared_dims)
874
+
875
+ mapped_expression.name = f"{self.name}.map(…)"
876
+
877
+ return mapped_expression
878
+
879
+ @return_new
880
+ def rolling_sum(self, over: str, window_size: int):
881
+ """Calculates the rolling sum of the Expression over a specified window size for a given dimension.
882
+
883
+ This method applies a rolling sum operation over the dimension specified by `over`,
884
+ using a window defined by `window_size`.
885
+
886
+
887
+ Parameters:
888
+ over:
889
+ The name of the dimension (column) over which the rolling sum is calculated.
890
+ This dimension must exist within the Expression's dimensions.
891
+ window_size:
892
+ The size of the moving window in terms of number of records.
893
+ The rolling sum is calculated over this many consecutive elements.
894
+
895
+ Returns:
896
+ A new Expression instance containing the result of the rolling sum operation.
897
+ This new Expression retains all dimensions (columns) of the original data,
898
+ with the rolling sum applied over the specified dimension.
899
+
900
+ Examples:
901
+ >>> import polars as pl
902
+ >>> cost = pl.DataFrame(
903
+ ... {
904
+ ... "item": [1, 1, 1, 2, 2],
905
+ ... "time": [1, 2, 3, 1, 2],
906
+ ... "cost": [1, 2, 3, 4, 5],
907
+ ... }
908
+ ... )
909
+ >>> m = pf.Model()
910
+ >>> m.quantity = pf.Variable(cost[["item", "time"]])
911
+ >>> (m.quantity * cost).rolling_sum(over="time", window_size=2)
912
+ <Expression height=5 terms=8 type=linear>
913
+ ┌──────┬──────┬──────────────────────────────────┐
914
+ │ item ┆ time ┆ expression │
915
+ │ (2) ┆ (3) ┆ │
916
+ ╞══════╪══════╪══════════════════════════════════╡
917
+ │ 1 ┆ 1 ┆ quantity[1,1] │
918
+ │ 1 ┆ 2 ┆ quantity[1,1] +2 quantity[1,2] │
919
+ │ 1 ┆ 3 ┆ 2 quantity[1,2] +3 quantity[1,3] │
920
+ │ 2 ┆ 1 ┆ 4 quantity[2,1] │
921
+ │ 2 ┆ 2 ┆ 4 quantity[2,1] +5 quantity[2,2] │
922
+ └──────┴──────┴──────────────────────────────────┘
923
+ """
924
+ dims = self.dimensions
925
+ if dims is None:
926
+ raise ValueError(
927
+ "Cannot use rolling_sum() with an expression with no dimensions."
928
+ )
929
+ assert over in dims, f"Cannot sum over {over} as it is not in {dims}"
930
+ remaining_dims = [dim for dim in dims if dim not in over]
931
+
932
+ return pl.concat(
933
+ [
934
+ df.with_columns(pl.col(over).max())
935
+ for _, df in self.data.rolling(
936
+ index_column=over,
937
+ period=f"{window_size}i",
938
+ group_by=remaining_dims,
939
+ )
940
+ ]
941
+ )
942
+
943
+ @return_new
944
+ def within(self, set: SetTypes):
945
+ """Filters this expression to only include the dimensions within the provided set.
946
+
947
+ Examples:
948
+ >>> import pandas as pd
949
+ >>> general_expr = pd.DataFrame(
950
+ ... {"dim1": [1, 2, 3], "value": [1, 2, 3]}
951
+ ... ).to_expr()
952
+ >>> filter_expr = pd.DataFrame({"dim1": [1, 3], "value": [5, 6]}).to_expr()
953
+ >>> general_expr.within(filter_expr).data
954
+ shape: (2, 3)
955
+ ┌──────┬─────────┬───────────────┐
956
+ │ dim1 ┆ __coeff ┆ __variable_id │
957
+ │ --- ┆ --- ┆ --- │
958
+ │ i64 ┆ f64 ┆ u32 │
959
+ ╞══════╪═════════╪═══════════════╡
960
+ │ 1 ┆ 1.0 ┆ 0 │
961
+ │ 3 ┆ 3.0 ┆ 0 │
962
+ └──────┴─────────┴───────────────┘
963
+ """
964
+ df: pl.DataFrame = Set(set).data
965
+ set_dims = _get_dimensions(df)
966
+ assert set_dims is not None, (
967
+ "Cannot use .within() with a set with no dimensions."
968
+ )
969
+ dims = self.dimensions
970
+ assert dims is not None, (
971
+ "Cannot use .within() with an expression with no dimensions."
972
+ )
973
+ dims_in_common = [dim for dim in dims if dim in set_dims]
974
+ by_dims = df.select(dims_in_common).unique(maintain_order=Config.maintain_order)
975
+ return self.data.join(
976
+ by_dims,
977
+ on=dims_in_common,
978
+ maintain_order="left" if Config.maintain_order else None,
979
+ )
980
+
981
+ @property
982
+ def is_quadratic(self) -> bool:
983
+ """Returns `True` if the expression is quadratic, False otherwise.
984
+
985
+ Computes in O(1) since expressions are quadratic if and
986
+ only if self.data contain the QUAD_VAR_KEY column.
987
+
988
+ Examples:
989
+ >>> import pandas as pd
990
+ >>> m = pf.Model()
991
+ >>> m.v = Variable()
992
+ >>> expr = pd.DataFrame({"dim1": [1, 2, 3], "value": [1, 2, 3]}) * m.v
993
+ >>> expr *= m.v
994
+ >>> expr.is_quadratic
995
+ True
996
+ """
997
+ return QUAD_VAR_KEY in self.data.columns
998
+
999
+ @overload
1000
+ def degree(self, return_str: Literal[False] = False) -> int: ...
1001
+
1002
+ @overload
1003
+ def degree(self, return_str: Literal[True] = True) -> str: ...
1004
+
1005
+ def degree(self, return_str: bool = False) -> int | str:
1006
+ """Returns the degree of the expression (0=constant, 1=linear, 2=quadratic).
1007
+
1008
+ Parameters:
1009
+ return_str: If `True`, returns the degree as a string (`"constant"`, `"linear"`, or `"quadratic"`).
1010
+ If `False`, returns the degree as an integer (0, 1, or 2).
1011
+
1012
+ Examples:
1013
+ >>> import pandas as pd
1014
+ >>> m = pf.Model()
1015
+ >>> m.v1 = pf.Variable()
1016
+ >>> m.v2 = pf.Variable()
1017
+ >>> expr = pd.DataFrame({"dim1": [1, 2, 3], "value": [1, 2, 3]}).to_expr()
1018
+ >>> expr.degree()
1019
+ 0
1020
+ >>> expr *= m.v1
1021
+ >>> expr.degree()
1022
+ 1
1023
+ >>> expr += (m.v2**2).over("dim1")
1024
+ >>> expr.degree()
1025
+ 2
1026
+ >>> expr.degree(return_str=True)
1027
+ 'quadratic'
1028
+ """
1029
+ if self.is_quadratic:
1030
+ return "quadratic" if return_str else 2
1031
+ # TODO improve performance of .evaluate() by ensuring early exit if linear
1032
+ elif (self.data.get_column(VAR_KEY) != CONST_TERM).any():
1033
+ return "linear" if return_str else 1
1034
+ else:
1035
+ return "constant" if return_str else 0
1036
+
1037
+ def __add__(self, other):
1038
+ """Adds another expression or a constant to this expression.
1039
+
1040
+ Examples:
1041
+ >>> import pandas as pd
1042
+ >>> m = pf.Model()
1043
+ >>> add = pd.DataFrame({"dim1": [1, 2, 3], "add": [10, 20, 30]}).to_expr()
1044
+ >>> m.v = Variable(add)
1045
+ >>> m.v + add
1046
+ <Expression height=3 terms=6 type=linear>
1047
+ ┌──────┬────────────┐
1048
+ │ dim1 ┆ expression │
1049
+ │ (3) ┆ │
1050
+ ╞══════╪════════════╡
1051
+ │ 1 ┆ v[1] +10 │
1052
+ │ 2 ┆ v[2] +20 │
1053
+ │ 3 ┆ v[3] +30 │
1054
+ └──────┴────────────┘
1055
+
1056
+ >>> m.v + add + 2
1057
+ <Expression height=3 terms=6 type=linear>
1058
+ ┌──────┬────────────┐
1059
+ │ dim1 ┆ expression │
1060
+ │ (3) ┆ │
1061
+ ╞══════╪════════════╡
1062
+ │ 1 ┆ 12 + v[1] │
1063
+ │ 2 ┆ 22 + v[2] │
1064
+ │ 3 ┆ 32 + v[3] │
1065
+ └──────┴────────────┘
1066
+
1067
+ >>> m.v + pd.DataFrame({"dim1": [1, 2], "add": [10, 20]})
1068
+ Traceback (most recent call last):
1069
+ ...
1070
+ pyoframe._constants.PyoframeError: Cannot add the two expressions below because of unmatched values.
1071
+ Expression 1: v
1072
+ Expression 2: add
1073
+ Unmatched values:
1074
+ shape: (1, 2)
1075
+ ┌──────┬────────────┐
1076
+ │ dim1 ┆ dim1_right │
1077
+ │ --- ┆ --- │
1078
+ │ i64 ┆ i64 │
1079
+ ╞══════╪════════════╡
1080
+ │ 3 ┆ null │
1081
+ └──────┴────────────┘
1082
+ If this is intentional, use .drop_unmatched() or .keep_unmatched().
1083
+ >>> m.v2 = Variable()
1084
+ >>> 5 + 2 * m.v2
1085
+ <Expression terms=2 type=linear>
1086
+ 2 v2 +5
1087
+ """
1088
+ if isinstance(other, (int, float)):
1089
+ return self._add_const(other)
1090
+ other = other.to_expr()
1091
+ self._learn_from_other(other)
1092
+ return add(self, other)
1093
+
1094
+ def __mul__(self: Expression, other: int | float | SupportsToExpr) -> Expression:
1095
+ if isinstance(other, (int, float)):
1096
+ if other == 1:
1097
+ return self
1098
+ return self._new(
1099
+ self.data.with_columns(pl.col(COEF_KEY) * other),
1100
+ name=f"({other} * {self.name})",
1101
+ )
1102
+
1103
+ other = other.to_expr()
1104
+ self._learn_from_other(other)
1105
+ return multiply(self, other)
1106
+
1107
+ def to_expr(self) -> Expression:
1108
+ """Returns the expression itself."""
1109
+ return self
1110
+
1111
+ def _learn_from_other(self, other: Expression):
1112
+ if self._model is None and other._model is not None:
1113
+ self._model = other._model
1114
+
1115
+ def _new(self, data: pl.DataFrame, name: str) -> Expression:
1116
+ e = Expression(data, name)
1117
+ e._model = self._model
1118
+ return e
1119
+
1120
+ def _add_const(self, const: int | float) -> Expression:
1121
+ """Adds a constant to the expression.
1122
+
1123
+ Examples:
1124
+ >>> m = pf.Model()
1125
+ >>> m.x1 = Variable()
1126
+ >>> m.x2 = Variable()
1127
+ >>> m.x1 + 5
1128
+ <Expression terms=2 type=linear>
1129
+ x1 +5
1130
+ >>> m.x1**2 + 5
1131
+ <Expression terms=2 type=quadratic>
1132
+ x1 * x1 +5
1133
+ >>> m.x1**2 + m.x2 + 5
1134
+ <Expression terms=3 type=quadratic>
1135
+ x1 * x1 + x2 +5
1136
+
1137
+ It also works with dimensions
1138
+
1139
+ >>> m = pf.Model()
1140
+ >>> m.v = Variable({"dim1": [1, 2, 3]})
1141
+ >>> m.v * m.v + 5
1142
+ <Expression height=3 terms=6 type=quadratic>
1143
+ ┌──────┬─────────────────┐
1144
+ │ dim1 ┆ expression │
1145
+ │ (3) ┆ │
1146
+ ╞══════╪═════════════════╡
1147
+ │ 1 ┆ 5 + v[1] * v[1] │
1148
+ │ 2 ┆ 5 + v[2] * v[2] │
1149
+ │ 3 ┆ 5 + v[3] * v[3] │
1150
+ └──────┴─────────────────┘
1151
+ """
1152
+ if const == 0:
1153
+ return self
1154
+ dim = self.dimensions
1155
+ data = self.data
1156
+ # Fill in missing constant terms
1157
+ if not dim:
1158
+ if CONST_TERM not in data[VAR_KEY]:
1159
+ const_df = pl.DataFrame(
1160
+ {COEF_KEY: [0.0], VAR_KEY: [CONST_TERM]},
1161
+ schema={COEF_KEY: pl.Float64, VAR_KEY: KEY_TYPE},
1162
+ )
1163
+ if self.is_quadratic:
1164
+ const_df = const_df.with_columns(
1165
+ pl.lit(CONST_TERM).alias(QUAD_VAR_KEY).cast(KEY_TYPE)
1166
+ )
1167
+ data = pl.concat(
1168
+ [data, const_df],
1169
+ how="vertical_relaxed",
1170
+ )
1171
+ else:
1172
+ keys = (
1173
+ data.select(dim)
1174
+ .unique(maintain_order=Config.maintain_order)
1175
+ .with_columns(pl.lit(CONST_TERM).alias(VAR_KEY).cast(KEY_TYPE))
1176
+ )
1177
+ if self.is_quadratic:
1178
+ keys = keys.with_columns(
1179
+ pl.lit(CONST_TERM).alias(QUAD_VAR_KEY).cast(KEY_TYPE)
1180
+ )
1181
+ data = data.join(
1182
+ keys,
1183
+ on=dim + self._variable_columns,
1184
+ how="full",
1185
+ coalesce=True,
1186
+ # We use right_left not left_right to bring the constants near the front for better readability
1187
+ maintain_order="right_left" if Config.maintain_order else None,
1188
+ ).with_columns(pl.col(COEF_KEY).fill_null(0.0))
1189
+
1190
+ data = data.with_columns(
1191
+ pl.when(pl.col(VAR_KEY) == CONST_TERM)
1192
+ .then(pl.col(COEF_KEY) + const)
1193
+ .otherwise(pl.col(COEF_KEY))
1194
+ )
1195
+
1196
+ name = f"({self.name} + {const})" if const >= 0 else f"({self.name} - {-const})"
1197
+ return self._new(data, name=name)
1198
+
1199
+ @property
1200
+ def constant_terms(self) -> pl.DataFrame:
1201
+ """Returns all the constant terms in the expression."""
1202
+ dims = self.dimensions
1203
+ constant_terms = self.data.filter(pl.col(VAR_KEY) == CONST_TERM).drop(VAR_KEY)
1204
+ if self.is_quadratic:
1205
+ constant_terms = constant_terms.drop(QUAD_VAR_KEY)
1206
+ if dims is not None:
1207
+ dims_df = self.data.select(dims).unique(
1208
+ maintain_order=Config.maintain_order
1209
+ )
1210
+ df = constant_terms.join(
1211
+ dims_df,
1212
+ on=dims,
1213
+ how="full",
1214
+ coalesce=True,
1215
+ maintain_order="left_right" if Config.maintain_order else None,
1216
+ )
1217
+ return df.with_columns(pl.col(COEF_KEY).fill_null(0.0))
1218
+ else:
1219
+ if len(constant_terms) == 0:
1220
+ return pl.DataFrame(
1221
+ {COEF_KEY: [0.0], VAR_KEY: [CONST_TERM]},
1222
+ schema={COEF_KEY: pl.Float64, VAR_KEY: KEY_TYPE},
1223
+ )
1224
+ return constant_terms
1225
+
1226
+ @property
1227
+ def variable_terms(self) -> pl.DataFrame:
1228
+ """Returns all the non-constant terms in the expression."""
1229
+ return self.data.filter(pl.col(VAR_KEY) != CONST_TERM)
1230
+
1231
+ @unwrap_single_values
1232
+ def evaluate(self) -> pl.DataFrame:
1233
+ """Computes the value of the expression using the variables' solutions.
1234
+
1235
+ Returns:
1236
+ A Polars `DataFrame` for dimensioned expressions a `float` for dimensionless expressions.
1237
+
1238
+ Examples:
1239
+ >>> m = pf.Model()
1240
+ >>> m.X = pf.Variable({"dim1": [1, 2, 3]}, lb=10, ub=10)
1241
+ >>> m.expr = 2 * m.X * m.X + 1
1242
+
1243
+ >>> m.expr.evaluate()
1244
+ Traceback (most recent call last):
1245
+ ...
1246
+ ValueError: Cannot evaluate the expression 'expr' before calling model.optimize().
1247
+
1248
+ >>> m.constant_expression = m.expr - 2 * m.X * m.X
1249
+ >>> m.constant_expression.evaluate()
1250
+ shape: (3, 2)
1251
+ ┌──────┬──────────┐
1252
+ │ dim1 ┆ solution │
1253
+ │ --- ┆ --- │
1254
+ │ i64 ┆ f64 │
1255
+ ╞══════╪══════════╡
1256
+ │ 1 ┆ 1.0 │
1257
+ │ 2 ┆ 1.0 │
1258
+ │ 3 ┆ 1.0 │
1259
+ └──────┴──────────┘
1260
+
1261
+
1262
+ >>> m.optimize()
1263
+ >>> m.expr.evaluate()
1264
+ shape: (3, 2)
1265
+ ┌──────┬──────────┐
1266
+ │ dim1 ┆ solution │
1267
+ │ --- ┆ --- │
1268
+ │ i64 ┆ f64 │
1269
+ ╞══════╪══════════╡
1270
+ │ 1 ┆ 201.0 │
1271
+ │ 2 ┆ 201.0 │
1272
+ │ 3 ┆ 201.0 │
1273
+ └──────┴──────────┘
1274
+
1275
+ >>> m.expr.sum().evaluate()
1276
+ 603.0
1277
+
1278
+ """
1279
+ assert self._model is not None, (
1280
+ "Expression must be added to the model to use .value"
1281
+ )
1282
+
1283
+ df = self.data.rename({COEF_KEY: SOLUTION_KEY})
1284
+ sm = self._model.poi
1285
+ attr = poi.VariableAttribute.Value
1286
+
1287
+ if self.degree() == 0:
1288
+ df = df.drop(self._variable_columns)
1289
+ elif (
1290
+ self._model.attr.TerminationStatus
1291
+ == poi.TerminationStatusCode.OPTIMIZE_NOT_CALLED
1292
+ ):
1293
+ raise ValueError(
1294
+ f"Cannot evaluate the expression '{self.name}' before calling model.optimize()."
1295
+ )
1296
+ else:
1297
+ for var_col in self._variable_columns:
1298
+ values = [
1299
+ sm.get_variable_attribute(poi.VariableIndex(v_id), attr)
1300
+ for v_id in df.get_column(var_col).to_list()
1301
+ ]
1302
+
1303
+ df = df.drop(var_col).with_columns(
1304
+ pl.col(SOLUTION_KEY) * pl.Series(values, dtype=pl.Float64)
1305
+ )
1306
+
1307
+ dims = self.dimensions
1308
+ if dims is not None:
1309
+ df = df.group_by(dims, maintain_order=Config.maintain_order)
1310
+ return df.sum()
1311
+
1312
+ def _to_poi(self) -> poi.ScalarAffineFunction | poi.ScalarQuadraticFunction:
1313
+ assert self.dimensions is None, (
1314
+ "._to_poi() only works for non-dimensioned expressions."
1315
+ )
1316
+
1317
+ if self.is_quadratic:
1318
+ return poi.ScalarQuadraticFunction(
1319
+ coefficients=self.data.get_column(COEF_KEY).to_numpy(),
1320
+ var1s=self.data.get_column(VAR_KEY).to_numpy(),
1321
+ var2s=self.data.get_column(QUAD_VAR_KEY).to_numpy(),
1322
+ )
1323
+ else:
1324
+ return poi.ScalarAffineFunction(
1325
+ coefficients=self.data.get_column(COEF_KEY).to_numpy(),
1326
+ variables=self.data.get_column(VAR_KEY).to_numpy(),
1327
+ )
1328
+
1329
+ @overload
1330
+ def to_str(
1331
+ self,
1332
+ str_col_name: str = "expression",
1333
+ include_const_term: bool = True,
1334
+ return_df: Literal[False] = False,
1335
+ ) -> str: ...
1336
+
1337
+ @overload
1338
+ def to_str(
1339
+ self,
1340
+ str_col_name: str = "expression",
1341
+ include_const_term: bool = True,
1342
+ return_df: Literal[True] = True,
1343
+ ) -> pl.DataFrame: ...
1344
+
1345
+ def to_str(
1346
+ self,
1347
+ str_col_name: str = "expression",
1348
+ include_const_term: bool = True,
1349
+ return_df: bool = False,
1350
+ ) -> str | pl.DataFrame:
1351
+ """Converts the expression to a human-readable string, or several arranged in a table.
1352
+
1353
+ Long expressions are truncated according to [`Config.print_max_terms`][pyoframe._Config.print_max_terms] and [`Config.print_polars_config`][pyoframe._Config.print_polars_config].
1354
+
1355
+ `str(pyoframe.Expression)` is equivalent to `pyoframe.Expression.to_str()`.
1356
+
1357
+ Parameters:
1358
+ str_col_name:
1359
+ The name of the column containing the string representation of the expression (dimensioned expressions only).
1360
+ include_const_term:
1361
+ If `False`, constant terms are omitted from the string representation.
1362
+ return_df:
1363
+ If `True`, returns a DataFrame containing the human-readable strings instead of the DataFrame's string representation.
1364
+
1365
+ Examples:
1366
+ >>> import polars as pl
1367
+ >>> m = pf.Model()
1368
+ >>> x = pf.Set(x=range(1000))
1369
+ >>> y = pf.Set(y=range(1000))
1370
+ >>> m.V = pf.Variable(x, y)
1371
+ >>> expr = 2 * m.V * m.V + 3
1372
+ >>> print(expr.to_str())
1373
+ ┌────────┬────────┬──────────────────────────────┐
1374
+ │ x ┆ y ┆ expression │
1375
+ │ (1000) ┆ (1000) ┆ │
1376
+ ╞════════╪════════╪══════════════════════════════╡
1377
+ │ 0 ┆ 0 ┆ 3 +2 V[0,0] * V[0,0] │
1378
+ │ 0 ┆ 1 ┆ 3 +2 V[0,1] * V[0,1] │
1379
+ │ 0 ┆ 2 ┆ 3 +2 V[0,2] * V[0,2] │
1380
+ │ 0 ┆ 3 ┆ 3 +2 V[0,3] * V[0,3] │
1381
+ │ 0 ┆ 4 ┆ 3 +2 V[0,4] * V[0,4] │
1382
+ │ … ┆ … ┆ … │
1383
+ │ 999 ┆ 995 ┆ 3 +2 V[999,995] * V[999,995] │
1384
+ │ 999 ┆ 996 ┆ 3 +2 V[999,996] * V[999,996] │
1385
+ │ 999 ┆ 997 ┆ 3 +2 V[999,997] * V[999,997] │
1386
+ │ 999 ┆ 998 ┆ 3 +2 V[999,998] * V[999,998] │
1387
+ │ 999 ┆ 999 ┆ 3 +2 V[999,999] * V[999,999] │
1388
+ └────────┴────────┴──────────────────────────────┘
1389
+ >>> expr = expr.sum("y")
1390
+ >>> print(expr.to_str())
1391
+ ┌────────┬─────────────────────────────────────────────────────────────────────────────────────────┐
1392
+ │ x ┆ expression │
1393
+ │ (1000) ┆ │
1394
+ ╞════════╪═════════════════════════════════════════════════════════════════════════════════════════╡
1395
+ │ 0 ┆ 3000 +2 V[0,0] * V[0,0] +2 V[0,1] * V[0,1] +2 V[0,2] * V[0,2] +2 V[0,3] * V[0,3] … │
1396
+ │ 1 ┆ 3000 +2 V[1,0] * V[1,0] +2 V[1,1] * V[1,1] +2 V[1,2] * V[1,2] +2 V[1,3] * V[1,3] … │
1397
+ │ 2 ┆ 3000 +2 V[2,0] * V[2,0] +2 V[2,1] * V[2,1] +2 V[2,2] * V[2,2] +2 V[2,3] * V[2,3] … │
1398
+ │ 3 ┆ 3000 +2 V[3,0] * V[3,0] +2 V[3,1] * V[3,1] +2 V[3,2] * V[3,2] +2 V[3,3] * V[3,3] … │
1399
+ │ 4 ┆ 3000 +2 V[4,0] * V[4,0] +2 V[4,1] * V[4,1] +2 V[4,2] * V[4,2] +2 V[4,3] * V[4,3] … │
1400
+ │ … ┆ … │
1401
+ │ 995 ┆ 3000 +2 V[995,0] * V[995,0] +2 V[995,1] * V[995,1] +2 V[995,2] * V[995,2] +2 V[995,3] * │
1402
+ │ ┆ V[995,3] … │
1403
+ │ 996 ┆ 3000 +2 V[996,0] * V[996,0] +2 V[996,1] * V[996,1] +2 V[996,2] * V[996,2] +2 V[996,3] * │
1404
+ │ ┆ V[996,3] … │
1405
+ │ 997 ┆ 3000 +2 V[997,0] * V[997,0] +2 V[997,1] * V[997,1] +2 V[997,2] * V[997,2] +2 V[997,3] * │
1406
+ │ ┆ V[997,3] … │
1407
+ │ 998 ┆ 3000 +2 V[998,0] * V[998,0] +2 V[998,1] * V[998,1] +2 V[998,2] * V[998,2] +2 V[998,3] * │
1408
+ │ ┆ V[998,3] … │
1409
+ │ 999 ┆ 3000 +2 V[999,0] * V[999,0] +2 V[999,1] * V[999,1] +2 V[999,2] * V[999,2] +2 V[999,3] * │
1410
+ │ ┆ V[999,3] … │
1411
+ └────────┴─────────────────────────────────────────────────────────────────────────────────────────┘
1412
+ >>> expr = expr.sum("x")
1413
+ >>> print(expr.to_str())
1414
+ 3000000 +2 V[0,0] * V[0,0] +2 V[0,1] * V[0,1] +2 V[0,2] * V[0,2] +2 V[0,3] * V[0,3] …
1415
+
1416
+ """
1417
+ # TODO consider optimizing using LazyFrames since .head() could maybe be automatically pushed up the chain of operations.
1418
+ data = self.data if include_const_term else self.variable_terms
1419
+ data = cast_coef_to_string(data)
1420
+
1421
+ for var_col in self._variable_columns:
1422
+ temp_var_column = f"{var_col}_temp"
1423
+ if self._model is not None and self._model._var_map is not None:
1424
+ data = self._model._var_map.apply(
1425
+ data, to_col=temp_var_column, id_col=var_col
1426
+ )
1427
+ else:
1428
+ data = data.with_columns(
1429
+ pl.concat_str(pl.lit("x"), var_col).alias(temp_var_column)
1430
+ )
1431
+ data = data.with_columns(
1432
+ pl.when(pl.col(var_col) == CONST_TERM)
1433
+ .then(pl.lit(""))
1434
+ .otherwise(temp_var_column)
1435
+ .alias(var_col)
1436
+ ).drop(temp_var_column)
1437
+ if self.is_quadratic:
1438
+ data = data.with_columns(
1439
+ pl.when(pl.col(QUAD_VAR_KEY) == "")
1440
+ .then(pl.col(VAR_KEY))
1441
+ .otherwise(pl.concat_str(VAR_KEY, pl.lit(" * "), pl.col(QUAD_VAR_KEY)))
1442
+ .alias(VAR_KEY)
1443
+ ).drop(QUAD_VAR_KEY)
1444
+
1445
+ dimensions = self.dimensions
1446
+
1447
+ # Create a string for each term
1448
+ data = data.with_columns(
1449
+ pl.concat_str(
1450
+ COEF_KEY,
1451
+ pl.lit(" "),
1452
+ VAR_KEY,
1453
+ )
1454
+ .str.strip_chars(characters="  ")
1455
+ .alias(str_col_name)
1456
+ ).drop(COEF_KEY, VAR_KEY)
1457
+
1458
+ if dimensions is not None:
1459
+ data = data.group_by(dimensions, maintain_order=Config.maintain_order).agg(
1460
+ pl.concat_str(
1461
+ pl.col(str_col_name)
1462
+ .head(Config.print_max_terms)
1463
+ .str.join(delimiter=" "),
1464
+ pl.when(pl.len() > Config.print_max_terms)
1465
+ .then(pl.lit(" …"))
1466
+ .otherwise(pl.lit("")),
1467
+ )
1468
+ )
1469
+ else:
1470
+ truncate = data.height > Config.print_max_terms
1471
+ if truncate:
1472
+ data = data.head(Config.print_max_terms)
1473
+
1474
+ data = data.select(pl.col(str_col_name).str.join(delimiter=" "))
1475
+
1476
+ if truncate:
1477
+ data = data.with_columns(
1478
+ pl.concat_str(pl.col(str_col_name), pl.lit(" …"))
1479
+ )
1480
+
1481
+ # Remove leading +
1482
+ data = data.with_columns(pl.col(str_col_name).str.strip_chars(characters="  +"))
1483
+
1484
+ if not return_df:
1485
+ if dimensions is None and not self._allowed_new_dims:
1486
+ data = data.item()
1487
+ else:
1488
+ data = self._add_shape_to_columns(data)
1489
+ data = self._add_allowed_new_dims_to_df(data)
1490
+ with Config.print_polars_config:
1491
+ data = repr(data)
1492
+
1493
+ return data
1494
+
1495
+ def _str_header(self) -> str:
1496
+ """Returns a string representation of the expression's header."""
1497
+ return get_obj_repr(
1498
+ self,
1499
+ height=len(self) if self.dimensions else None,
1500
+ terms=self.terms,
1501
+ type=self.degree(return_str=True),
1502
+ )
1503
+
1504
+ def __repr__(self) -> str:
1505
+ return self._str_header() + "\n" + self.to_str()
1506
+
1507
+ def __str__(self) -> str:
1508
+ return self.to_str()
1509
+
1510
+ @property
1511
+ def terms(self) -> int:
1512
+ """The number of terms across all subexpressions.
1513
+
1514
+ Expressions equal to zero count as one term.
1515
+
1516
+ Examples:
1517
+ >>> import polars as pl
1518
+ >>> m = pf.Model()
1519
+ >>> m.v = pf.Variable({"t": [1, 2]})
1520
+ >>> coef = pl.DataFrame({"t": [1, 2], "coef": [0, 1]})
1521
+ >>> coef * (m.v + 4)
1522
+ <Expression height=2 terms=3 type=linear>
1523
+ ┌─────┬────────────┐
1524
+ │ t ┆ expression │
1525
+ │ (2) ┆ │
1526
+ ╞═════╪════════════╡
1527
+ │ 1 ┆ 0 │
1528
+ │ 2 ┆ 4 + v[2] │
1529
+ └─────┴────────────┘
1530
+ >>> (coef * (m.v + 4)).terms
1531
+ 3
1532
+ """
1533
+ return len(self.data)
1534
+
1535
+
1536
+ @overload
1537
+ def sum(over: str | Sequence[str], expr: SupportsToExpr) -> Expression: ...
1538
+
1539
+
1540
+ @overload
1541
+ def sum(over: SupportsToExpr) -> Expression: ...
1542
+
1543
+
1544
+ def sum(
1545
+ over: str | Sequence[str] | SupportsToExpr,
1546
+ expr: SupportsToExpr | None = None,
1547
+ ) -> Expression: # pragma: no cover
1548
+ """Deprecated: Use Expression.sum() or Variable.sum() instead.
1549
+
1550
+ Examples:
1551
+ >>> x = pf.Set(x=range(100))
1552
+ >>> pf.sum(x)
1553
+ Traceback (most recent call last):
1554
+ ...
1555
+ DeprecationWarning: pf.sum() is deprecated. Use Expression.sum() or Variable.sum() instead.
1556
+ """
1557
+ warnings.warn(
1558
+ "pf.sum() is deprecated. Use Expression.sum() or Variable.sum() instead.",
1559
+ DeprecationWarning,
1560
+ )
1561
+
1562
+ if expr is None:
1563
+ assert isinstance(over, SupportsMath)
1564
+ return over.to_expr().sum()
1565
+ else:
1566
+ assert isinstance(over, (str, Sequence))
1567
+ if isinstance(over, str):
1568
+ over = (over,)
1569
+ return expr.to_expr().sum(*over)
1570
+
1571
+
1572
+ def sum_by(
1573
+ by: str | Sequence[str], expr: SupportsToExpr
1574
+ ) -> Expression: # pragma: no cover
1575
+ """Deprecated: Use Expression.sum() or Variable.sum() instead."""
1576
+ warnings.warn(
1577
+ "pf.sum_by() is deprecated. Use Expression.sum_by() or Variable.sum_by() instead.",
1578
+ DeprecationWarning,
1579
+ )
1580
+
1581
+ if isinstance(by, str):
1582
+ by = [by]
1583
+ return expr.to_expr().sum_by(*by)
1584
+
1585
+
1586
+ class Constraint(ModelElementWithId):
1587
+ """An optimization constraint that can be added to a [Model][pyoframe.Model].
1588
+
1589
+ Tip: Implementation Note
1590
+ Pyoframe simplifies constraints by moving all the constraint's mathematical terms to the left-hand side.
1591
+ This way, the right-hand side is always zero, and constraints only need to manage one expression.
1592
+
1593
+ Warning: Use `<=`, `>=`, or `==` operators to create constraints
1594
+ Constraints should be created using the `<=`, `>=`, or `==` operators, not by directly calling the `Constraint` constructor.
1595
+
1596
+ Parameters:
1597
+ lhs:
1598
+ The constraint's left-hand side expression.
1599
+ sense:
1600
+ The sense of the constraint.
1601
+ """
1602
+
1603
+ def __init__(self, lhs: Expression, sense: ConstraintSense):
1604
+ self.lhs: Expression = lhs
1605
+ self._model = lhs._model
1606
+ self.sense = sense
1607
+ self._to_relax: FuncArgs | None = None
1608
+ self._attr = Container(self._set_attribute, self._get_attribute)
1609
+
1610
+ dims = self.lhs.dimensions
1611
+ data = (
1612
+ pl.DataFrame()
1613
+ if dims is None
1614
+ else self.lhs.data.select(dims).unique(maintain_order=Config.maintain_order)
1615
+ )
1616
+
1617
+ super().__init__(data)
1618
+
1619
+ @property
1620
+ def attr(self) -> Container:
1621
+ """Allows reading and writing constraint attributes similarly to [Model.attr][pyoframe.Model.attr]."""
1622
+ return self._attr
1623
+
1624
+ def _set_attribute(self, name, value):
1625
+ self._assert_has_ids()
1626
+ col_name = name
1627
+ try:
1628
+ name = poi.ConstraintAttribute[name]
1629
+ setter = self._model.poi.set_constraint_attribute
1630
+ except KeyError:
1631
+ setter = self._model.poi.set_constraint_raw_attribute
1632
+
1633
+ constr_type = (
1634
+ poi.ConstraintType.Quadratic
1635
+ if self.lhs.is_quadratic
1636
+ else poi.ConstraintType.Linear
1637
+ )
1638
+
1639
+ if self.dimensions is None:
1640
+ for key in self.data.get_column(CONSTRAINT_KEY):
1641
+ setter(poi.ConstraintIndex(constr_type, key), name, value)
1642
+ else:
1643
+ for key, value in (
1644
+ self.data.join(
1645
+ value,
1646
+ on=self.dimensions,
1647
+ maintain_order="left" if Config.maintain_order else None,
1648
+ )
1649
+ .select(pl.col(CONSTRAINT_KEY), pl.col(col_name))
1650
+ .iter_rows()
1651
+ ):
1652
+ setter(poi.ConstraintIndex(constr_type, key), name, value)
1653
+
1654
+ @unwrap_single_values
1655
+ def _get_attribute(self, name):
1656
+ self._assert_has_ids()
1657
+ col_name = name
1658
+ try:
1659
+ name = poi.ConstraintAttribute[name]
1660
+ getter = self._model.poi.get_constraint_attribute
1661
+ except KeyError:
1662
+ getter = self._model.poi.get_constraint_raw_attribute
1663
+
1664
+ constr_type = (
1665
+ poi.ConstraintType.Quadratic
1666
+ if self.lhs.is_quadratic
1667
+ else poi.ConstraintType.Linear
1668
+ )
1669
+
1670
+ ids = self.data.get_column(CONSTRAINT_KEY).to_list()
1671
+ attr = [getter(poi.ConstraintIndex(constr_type, v_id), name) for v_id in ids]
1672
+ data = self.data.with_columns(pl.Series(attr).alias(col_name))
1673
+ return data.select(self._dimensions_unsafe + [col_name])
1674
+
1675
+ def _on_add_to_model(self, model: Model, name: str):
1676
+ super()._on_add_to_model(model, name)
1677
+ if self._to_relax is not None:
1678
+ self.relax(*self._to_relax.args, **self._to_relax.kwargs)
1679
+ self._assign_ids()
1680
+
1681
+ def _assign_ids(self):
1682
+ """This function is the main bottleneck for pyoframe.
1683
+
1684
+ I've spent a lot of time optimizing it.
1685
+ """
1686
+ assert self._model is not None
1687
+
1688
+ is_quadratic = self.lhs.is_quadratic
1689
+ use_var_names = self._model.solver_uses_variable_names
1690
+ sense = self.sense._to_poi()
1691
+ dims = self.dimensions
1692
+ df = self.lhs.data
1693
+ add_constraint = (
1694
+ self._model.poi._add_quadratic_constraint
1695
+ if is_quadratic
1696
+ else self._model.poi._add_linear_constraint
1697
+ )
1698
+
1699
+ # GRBaddconstr uses sprintf when no name or "" is given. sprintf is slow. As such, we specify "C" as the name.
1700
+ # Specifying "" is the same as not specifying anything, see pyoptinterface:
1701
+ # https://github.com/metab0t/PyOptInterface/blob/6d61f3738ad86379cff71fee77077d4ea919f2d5/lib/gurobi_model.cpp#L338
1702
+ name = "C" if self._model.solver.block_auto_names else ""
1703
+
1704
+ if dims is None:
1705
+ if self._model.solver_uses_variable_names:
1706
+ name = self.name
1707
+ create_expression = (
1708
+ poi.ScalarQuadraticFunction
1709
+ if is_quadratic
1710
+ else poi.ScalarAffineFunction.from_numpy # when called only once from_numpy is faster
1711
+ )
1712
+ df = self.data.with_columns(
1713
+ pl.lit(
1714
+ add_constraint(
1715
+ create_expression(
1716
+ *(
1717
+ df.get_column(c).to_numpy()
1718
+ for c in ([COEF_KEY] + self.lhs._variable_columns)
1719
+ )
1720
+ ),
1721
+ sense,
1722
+ 0,
1723
+ name,
1724
+ ).index
1725
+ )
1726
+ .alias(CONSTRAINT_KEY)
1727
+ .cast(KEY_TYPE)
1728
+ )
1729
+ else:
1730
+ create_expression = (
1731
+ poi.ScalarQuadraticFunction
1732
+ if is_quadratic
1733
+ else poi.ScalarAffineFunction # when called multiple times the default constructor is fastest
1734
+ )
1735
+ if Config.maintain_order:
1736
+ # This adds a 5-10% overhead on _assign_ids but ensures the order
1737
+ # is the same as the input data
1738
+ df_unique = df.select(dims).unique(maintain_order=True)
1739
+ df = (
1740
+ df.join(
1741
+ df_unique.with_row_index(),
1742
+ on=dims,
1743
+ maintain_order="left",
1744
+ )
1745
+ .sort("index", maintain_order=True)
1746
+ .drop("index")
1747
+ )
1748
+ else:
1749
+ df = df.sort(dims, maintain_order=False)
1750
+ # must maintain order otherwise results are wrong!
1751
+ df_unique = df.select(dims).unique(maintain_order=True)
1752
+ coefs = df.get_column(COEF_KEY).to_list()
1753
+ vars = df.get_column(VAR_KEY).to_list()
1754
+ if is_quadratic:
1755
+ vars2 = df.get_column(QUAD_VAR_KEY).to_list()
1756
+
1757
+ split = (
1758
+ df.lazy()
1759
+ .with_row_index()
1760
+ .filter(pl.struct(dims).is_first_distinct())
1761
+ .select("index")
1762
+ .collect()
1763
+ .to_series()
1764
+ .to_list()
1765
+ ) + [df.height]
1766
+ del df
1767
+
1768
+ # Note: list comprehension was slightly faster than using polars map_elements
1769
+ # Note 2: not specifying the argument name (`expr=`) was also slightly faster.
1770
+ # Note 3: we could have merged the if-else using an expansion operator (*) but that is slow.
1771
+ # Note 4: using kwargs is slow and including the constant term for linear expressions is faster.
1772
+ if use_var_names:
1773
+ names = concat_dimensions(df_unique, prefix=self.name)[
1774
+ "concated_dim"
1775
+ ].to_list()
1776
+ if is_quadratic:
1777
+ ids = [
1778
+ add_constraint(
1779
+ create_expression(coefs[s0:s1], vars[s0:s1], vars2[s0:s1]),
1780
+ sense,
1781
+ 0,
1782
+ names[i],
1783
+ ).index
1784
+ for i, (s0, s1) in enumerate(pairwise(split))
1785
+ ]
1786
+ else:
1787
+ ids = [
1788
+ add_constraint(
1789
+ create_expression(coefs[s0:s1], vars[s0:s1], 0),
1790
+ sense,
1791
+ 0,
1792
+ names[i],
1793
+ ).index
1794
+ for i, (s0, s1) in enumerate(pairwise(split))
1795
+ ]
1796
+ else:
1797
+ if is_quadratic:
1798
+ ids = [
1799
+ add_constraint(
1800
+ create_expression(coefs[s0:s1], vars[s0:s1], vars2[s0:s1]),
1801
+ sense,
1802
+ 0,
1803
+ name,
1804
+ ).index
1805
+ for s0, s1 in pairwise(split)
1806
+ ]
1807
+ else:
1808
+ ids = [
1809
+ add_constraint(
1810
+ create_expression(coefs[s0:s1], vars[s0:s1], 0),
1811
+ sense,
1812
+ 0,
1813
+ name,
1814
+ ).index
1815
+ for s0, s1 in pairwise(split)
1816
+ ]
1817
+ df = df_unique.with_columns(
1818
+ pl.Series(ids, dtype=KEY_TYPE).alias(CONSTRAINT_KEY)
1819
+ )
1820
+
1821
+ self._data = df
1822
+
1823
+ @property
1824
+ def dual(self) -> pl.DataFrame | float:
1825
+ """Returns the constraint's dual values.
1826
+
1827
+ Examples:
1828
+ >>> m = pf.Model()
1829
+ >>> m.x = pf.Variable()
1830
+ >>> m.y = pf.Variable()
1831
+ >>> m.maximize = m.x - m.y
1832
+
1833
+ Notice that for every unit increase in the right-hand side, the objective only improves by 0.5.
1834
+ >>> m.constraint_x = 2 * m.x <= 10
1835
+ >>> m.constraint_y = 2 * m.y >= 5
1836
+ >>> m.optimize()
1837
+
1838
+ For every unit increase in the right-hand side of `constraint_x`, the objective improves by 0.5.
1839
+ >>> m.constraint_x.dual
1840
+ 0.5
1841
+
1842
+ For every unit increase in the right-hand side of `constraint_y`, the objective worsens by 0.5.
1843
+ >>> m.constraint_y.dual
1844
+ -0.5
1845
+ """
1846
+ dual = self.attr.Dual
1847
+ if isinstance(dual, pl.DataFrame):
1848
+ dual = dual.rename({"Dual": DUAL_KEY})
1849
+
1850
+ # Weirdly, IPOPT returns dual values with the opposite sign, so we correct this bug.
1851
+ # It also does this for maximization problems
1852
+ # but since we flip the objective (because Ipopt doesn't support maximization), the double negatives cancel out.
1853
+ assert self._model is not None
1854
+ if self._model.solver.name == "ipopt" and self._model.sense == ObjSense.MIN:
1855
+ if isinstance(dual, pl.DataFrame):
1856
+ dual = dual.with_columns(-pl.col(DUAL_KEY))
1857
+ else:
1858
+ dual = -dual
1859
+ return dual
1860
+
1861
+ @classmethod
1862
+ def _get_id_column_name(cls):
1863
+ return CONSTRAINT_KEY
1864
+
1865
+ def filter(self, *args, **kwargs) -> pl.DataFrame:
1866
+ """Syntactic sugar on `Constraint.lhs.data.filter()`, to help debugging."""
1867
+ return self.lhs.data.filter(*args, **kwargs)
1868
+
1869
+ def relax(
1870
+ self, cost: SupportsToExpr, max: SupportsToExpr | None = None
1871
+ ) -> Constraint:
1872
+ """Allows the constraint to be violated at a `cost` and, optionally, up to a maximum.
1873
+
1874
+ Warning:
1875
+ `.relax()` must be called before the constraint is assigned to the [Model][pyoframe.Model] (see examples below).
1876
+
1877
+ Parameters:
1878
+ cost:
1879
+ The cost of violating the constraint. Costs should be positive because Pyoframe will automatically
1880
+ make them negative for maximization problems.
1881
+ max:
1882
+ The maximum value of the relaxation variable.
1883
+
1884
+ Returns:
1885
+ The same constraint
1886
+
1887
+ Examples:
1888
+ >>> m = pf.Model()
1889
+ >>> m.hours_sleep = pf.Variable(lb=0)
1890
+ >>> m.hours_day = pf.Variable(lb=0)
1891
+ >>> m.hours_in_day = m.hours_sleep + m.hours_day == 24
1892
+ >>> m.maximize = m.hours_day
1893
+ >>> m.must_sleep = (m.hours_sleep >= 8).relax(cost=2, max=3)
1894
+ >>> m.optimize()
1895
+ >>> m.hours_day.solution
1896
+ 16.0
1897
+ >>> m.maximize += 2 * m.hours_day
1898
+ >>> m.optimize()
1899
+ >>> m.hours_day.solution
1900
+ 19.0
1901
+
1902
+ `relax` can only be called after the sense of the model has been defined.
1903
+
1904
+ >>> m = pf.Model()
1905
+ >>> m.hours_sleep = pf.Variable(lb=0)
1906
+ >>> m.hours_day = pf.Variable(lb=0)
1907
+ >>> m.hours_in_day = m.hours_sleep + m.hours_day == 24
1908
+ >>> m.must_sleep = (m.hours_sleep >= 8).relax(cost=2, max=3)
1909
+ Traceback (most recent call last):
1910
+ ...
1911
+ ValueError: Cannot relax a constraint before the objective sense has been set. Try setting the objective first or using Model(sense=...).
1912
+
1913
+ One way to solve this is by setting the sense directly on the model. See how this works fine:
1914
+
1915
+ >>> m = pf.Model(sense="max")
1916
+ >>> m.hours_sleep = pf.Variable(lb=0)
1917
+ >>> m.hours_day = pf.Variable(lb=0)
1918
+ >>> m.hours_in_day = m.hours_sleep + m.hours_day == 24
1919
+ >>> m.must_sleep = (m.hours_sleep >= 8).relax(cost=2, max=3)
1920
+
1921
+ And now an example with dimensions:
1922
+
1923
+ >>> homework_due_tomorrow = pl.DataFrame(
1924
+ ... {
1925
+ ... "project": ["A", "B", "C"],
1926
+ ... "cost_per_hour_underdelivered": [10, 20, 30],
1927
+ ... "hours_to_finish": [9, 9, 9],
1928
+ ... "max_underdelivered": [1, 9, 9],
1929
+ ... }
1930
+ ... )
1931
+ >>> m.hours_spent = pf.Variable(homework_due_tomorrow["project"], lb=0)
1932
+ >>> m.must_finish_project = (
1933
+ ... m.hours_spent
1934
+ ... >= homework_due_tomorrow[["project", "hours_to_finish"]]
1935
+ ... ).relax(
1936
+ ... homework_due_tomorrow[["project", "cost_per_hour_underdelivered"]],
1937
+ ... max=homework_due_tomorrow[["project", "max_underdelivered"]],
1938
+ ... )
1939
+ >>> m.only_one_day = m.hours_spent.sum("project") <= 24
1940
+ >>> # Relaxing a constraint after it has already been assigned will give an error
1941
+ >>> m.only_one_day.relax(1)
1942
+ Traceback (most recent call last):
1943
+ ...
1944
+ ValueError: .relax() must be called before the Constraint is added to the model
1945
+ >>> m.attr.Silent = True
1946
+ >>> m.optimize()
1947
+ >>> m.maximize.value
1948
+ -50.0
1949
+ >>> m.hours_spent.solution
1950
+ shape: (3, 2)
1951
+ ┌─────────┬──────────┐
1952
+ │ project ┆ solution │
1953
+ │ --- ┆ --- │
1954
+ │ str ┆ f64 │
1955
+ ╞═════════╪══════════╡
1956
+ │ A ┆ 8.0 │
1957
+ │ B ┆ 7.0 │
1958
+ │ C ┆ 9.0 │
1959
+ └─────────┴──────────┘
1960
+ """
1961
+ if self._has_ids:
1962
+ raise ValueError(
1963
+ ".relax() must be called before the Constraint is added to the model"
1964
+ )
1965
+
1966
+ m = self._model
1967
+ if m is None:
1968
+ self._to_relax = FuncArgs(args=[cost, max])
1969
+ return self
1970
+
1971
+ var_name = f"{self.name}_relaxation"
1972
+ assert not hasattr(m, var_name), (
1973
+ "Conflicting names, relaxation variable already exists on the model."
1974
+ )
1975
+ var = Variable(self, lb=0, ub=max)
1976
+ setattr(m, var_name, var)
1977
+
1978
+ if self.sense == ConstraintSense.LE:
1979
+ self.lhs -= var
1980
+ elif self.sense == ConstraintSense.GE:
1981
+ self.lhs += var
1982
+ else: # pragma: no cover
1983
+ # TODO
1984
+ raise NotImplementedError(
1985
+ "Relaxation for equalities has not yet been implemented. Submit a pull request!"
1986
+ )
1987
+
1988
+ penalty = var * cost
1989
+ if self.dimensions:
1990
+ penalty = penalty.sum()
1991
+ if m.sense is None:
1992
+ raise ValueError(
1993
+ "Cannot relax a constraint before the objective sense has been set. Try setting the objective first or using Model(sense=...)."
1994
+ )
1995
+ elif m.sense == ObjSense.MAX:
1996
+ penalty *= -1
1997
+ if m.has_objective:
1998
+ m.objective += penalty
1999
+ else:
2000
+ m.objective = penalty
2001
+
2002
+ return self
2003
+
2004
+ def estimated_size(self, *args, **kwargs):
2005
+ """Returns the estimated size of the constraint.
2006
+
2007
+ Includes the size of the underlying expression (`Constraint.lhs`).
2008
+
2009
+ See [`Expression.estimated_size`][pyoframe.Expression.estimated_size] for details on signature and behavior.
2010
+
2011
+ Examples:
2012
+ An dimensionless constraint has contains a 32 bit constraint id and, for each term, a 64 bit coefficient with a 32 bit variable id.
2013
+ For a two-term expression that is: (32 + 2 * (64 + 32)) = 224 bits = 28 bytes.
2014
+
2015
+ >>> m = pf.Model()
2016
+ >>> m.x = pf.Variable()
2017
+ >>> m.con = m.x <= 4
2018
+ >>> m.con.estimated_size()
2019
+ 28
2020
+ """
2021
+ return super().estimated_size(*args, **kwargs) + self.lhs.estimated_size(
2022
+ *args, **kwargs
2023
+ )
2024
+
2025
+ @overload
2026
+ def to_str(self, return_df: Literal[False] = False) -> str: ...
2027
+
2028
+ @overload
2029
+ def to_str(self, return_df: Literal[True] = True) -> pl.DataFrame: ...
2030
+
2031
+ def to_str(self, return_df: bool = False) -> str | pl.DataFrame:
2032
+ """Converts the constraint to a human-readable string, or several arranged in a table.
2033
+
2034
+ Long expressions are truncated according to [`Config.print_max_terms`][pyoframe._Config.print_max_terms] and [`Config.print_polars_config`][pyoframe._Config.print_polars_config].
2035
+
2036
+ Parameters:
2037
+ return_df:
2038
+ If `True`, returns a DataFrame containing strings instead of the string representation of the DataFrame.
2039
+
2040
+ Examples:
2041
+ >>> import polars as pl
2042
+ >>> m = pf.Model()
2043
+ >>> x = pf.Set(x=range(1000))
2044
+ >>> y = pf.Set(y=range(1000))
2045
+ >>> m.V = pf.Variable(x, y)
2046
+ >>> expr = 2 * m.V * m.V
2047
+ >>> print((expr <= 3).to_str())
2048
+ ┌────────┬────────┬────────────────────────────────┐
2049
+ │ x ┆ y ┆ constraint │
2050
+ │ (1000) ┆ (1000) ┆ │
2051
+ ╞════════╪════════╪════════════════════════════════╡
2052
+ │ 0 ┆ 0 ┆ 2 V[0,0] * V[0,0] <= 3 │
2053
+ │ 0 ┆ 1 ┆ 2 V[0,1] * V[0,1] <= 3 │
2054
+ │ 0 ┆ 2 ┆ 2 V[0,2] * V[0,2] <= 3 │
2055
+ │ 0 ┆ 3 ┆ 2 V[0,3] * V[0,3] <= 3 │
2056
+ │ 0 ┆ 4 ┆ 2 V[0,4] * V[0,4] <= 3 │
2057
+ │ … ┆ … ┆ … │
2058
+ │ 999 ┆ 995 ┆ 2 V[999,995] * V[999,995] <= 3 │
2059
+ │ 999 ┆ 996 ┆ 2 V[999,996] * V[999,996] <= 3 │
2060
+ │ 999 ┆ 997 ┆ 2 V[999,997] * V[999,997] <= 3 │
2061
+ │ 999 ┆ 998 ┆ 2 V[999,998] * V[999,998] <= 3 │
2062
+ │ 999 ┆ 999 ┆ 2 V[999,999] * V[999,999] <= 3 │
2063
+ └────────┴────────┴────────────────────────────────┘
2064
+ >>> expr = expr.sum("x")
2065
+ >>> print((expr >= 3).to_str())
2066
+ ┌────────┬─────────────────────────────────────────────────────────────────────────────────────────┐
2067
+ │ y ┆ constraint │
2068
+ │ (1000) ┆ │
2069
+ ╞════════╪═════════════════════════════════════════════════════════════════════════════════════════╡
2070
+ │ 0 ┆ 2 V[0,0] * V[0,0] +2 V[1,0] * V[1,0] +2 V[2,0] * V[2,0] +2 V[3,0] * V[3,0] +2 V[4,0] * │
2071
+ │ ┆ V[4,0] … >= 3 │
2072
+ │ 1 ┆ 2 V[0,1] * V[0,1] +2 V[1,1] * V[1,1] +2 V[2,1] * V[2,1] +2 V[3,1] * V[3,1] +2 V[4,1] * │
2073
+ │ ┆ V[4,1] … >= 3 │
2074
+ │ 2 ┆ 2 V[0,2] * V[0,2] +2 V[1,2] * V[1,2] +2 V[2,2] * V[2,2] +2 V[3,2] * V[3,2] +2 V[4,2] * │
2075
+ │ ┆ V[4,2] … >= 3 │
2076
+ │ 3 ┆ 2 V[0,3] * V[0,3] +2 V[1,3] * V[1,3] +2 V[2,3] * V[2,3] +2 V[3,3] * V[3,3] +2 V[4,3] * │
2077
+ │ ┆ V[4,3] … >= 3 │
2078
+ │ 4 ┆ 2 V[0,4] * V[0,4] +2 V[1,4] * V[1,4] +2 V[2,4] * V[2,4] +2 V[3,4] * V[3,4] +2 V[4,4] * │
2079
+ │ ┆ V[4,4] … >= 3 │
2080
+ │ … ┆ … │
2081
+ │ 995 ┆ 2 V[0,995] * V[0,995] +2 V[1,995] * V[1,995] +2 V[2,995] * V[2,995] +2 V[3,995] * │
2082
+ │ ┆ V[3,995] +2 V[4,99… │
2083
+ │ 996 ┆ 2 V[0,996] * V[0,996] +2 V[1,996] * V[1,996] +2 V[2,996] * V[2,996] +2 V[3,996] * │
2084
+ │ ┆ V[3,996] +2 V[4,99… │
2085
+ │ 997 ┆ 2 V[0,997] * V[0,997] +2 V[1,997] * V[1,997] +2 V[2,997] * V[2,997] +2 V[3,997] * │
2086
+ │ ┆ V[3,997] +2 V[4,99… │
2087
+ │ 998 ┆ 2 V[0,998] * V[0,998] +2 V[1,998] * V[1,998] +2 V[2,998] * V[2,998] +2 V[3,998] * │
2088
+ │ ┆ V[3,998] +2 V[4,99… │
2089
+ │ 999 ┆ 2 V[0,999] * V[0,999] +2 V[1,999] * V[1,999] +2 V[2,999] * V[2,999] +2 V[3,999] * │
2090
+ │ ┆ V[3,999] +2 V[4,99… │
2091
+ └────────┴─────────────────────────────────────────────────────────────────────────────────────────┘
2092
+ >>> expr = expr.sum("y")
2093
+ >>> print((expr == 3).to_str())
2094
+ 2 V[0,0] * V[0,0] +2 V[0,1] * V[0,1] +2 V[0,2] * V[0,2] +2 V[0,3] * V[0,3] +2 V[0,4] * V[0,4] … = 3
2095
+ """
2096
+ dims = self.dimensions
2097
+ str_table = self.lhs.to_str(
2098
+ include_const_term=False, return_df=True, str_col_name="constraint"
2099
+ )
2100
+ rhs = self.lhs.constant_terms.with_columns(pl.col(COEF_KEY) * -1)
2101
+ rhs = cast_coef_to_string(rhs, drop_ones=False, always_show_sign=False)
2102
+ rhs = rhs.rename({COEF_KEY: "rhs"})
2103
+ if dims:
2104
+ constr_str = str_table.join(
2105
+ rhs, on=dims, how="left", maintain_order="left", coalesce=True
2106
+ )
2107
+ else:
2108
+ constr_str = pl.concat([str_table, rhs], how="horizontal")
2109
+ constr_str = constr_str.with_columns(
2110
+ pl.concat_str("constraint", pl.lit(f" {self.sense.value} "), "rhs")
2111
+ ).drop("rhs")
2112
+
2113
+ if not return_df:
2114
+ if self.dimensions is None:
2115
+ constr_str = constr_str.item()
2116
+ else:
2117
+ constr_str = self._add_shape_to_columns(constr_str)
2118
+ with Config.print_polars_config:
2119
+ constr_str = repr(constr_str)
2120
+
2121
+ return constr_str
2122
+
2123
+ def __repr__(self) -> str:
2124
+ return (
2125
+ get_obj_repr(
2126
+ self,
2127
+ self.name,
2128
+ height=len(self) if self.dimensions else None,
2129
+ terms=len(self.lhs.data),
2130
+ type=self.lhs.degree(return_str=True),
2131
+ )
2132
+ + "\n"
2133
+ + self.to_str()
2134
+ )
2135
+
2136
+
2137
+ class Variable(ModelElementWithId, SupportsMath):
2138
+ """A decision variable for an optimization model.
2139
+
2140
+ Parameters:
2141
+ *indexing_sets:
2142
+ If no indexing_sets are provided, a single variable with no dimensions is created.
2143
+ Otherwise, a variable is created for each element in the Cartesian product of the indexing_sets (see Set for details on behaviour).
2144
+ lb:
2145
+ The lower bound for all variables.
2146
+ ub:
2147
+ The upper bound for all variables.
2148
+ vtype:
2149
+ The type of the variable. Can be either a VType enum or a string. Default is VType.CONTINUOUS.
2150
+ equals:
2151
+ When specified, a variable is created and a constraint is added to make the variable equal to the provided expression.
2152
+
2153
+ Examples:
2154
+ >>> import pandas as pd
2155
+ >>> m = pf.Model()
2156
+ >>> df = pd.DataFrame(
2157
+ ... {"dim1": [1, 1, 2, 2, 3, 3], "dim2": ["a", "b", "a", "b", "a", "b"]}
2158
+ ... )
2159
+ >>> Variable(df)
2160
+ <Variable 'unnamed' height=6>
2161
+ ┌──────┬──────┐
2162
+ │ dim1 ┆ dim2 │
2163
+ │ (3) ┆ (2) │
2164
+ ╞══════╪══════╡
2165
+ │ 1 ┆ a │
2166
+ │ 1 ┆ b │
2167
+ │ 2 ┆ a │
2168
+ │ 2 ┆ b │
2169
+ │ 3 ┆ a │
2170
+ │ 3 ┆ b │
2171
+ └──────┴──────┘
2172
+
2173
+ Variables cannot be used until they're added to the model.
2174
+
2175
+ >>> m.constraint = Variable(df) <= 3
2176
+ Traceback (most recent call last):
2177
+ ...
2178
+ ValueError: Cannot use 'Variable' before it has been added to a model.
2179
+
2180
+ Instead, assign the variable to the model first:
2181
+ >>> m.v = Variable(df)
2182
+ >>> m.constraint = m.v <= 3
2183
+
2184
+ >>> m.v
2185
+ <Variable 'v' height=6>
2186
+ ┌──────┬──────┬──────────┐
2187
+ │ dim1 ┆ dim2 ┆ variable │
2188
+ │ (3) ┆ (2) ┆ │
2189
+ ╞══════╪══════╪══════════╡
2190
+ │ 1 ┆ a ┆ v[1,a] │
2191
+ │ 1 ┆ b ┆ v[1,b] │
2192
+ │ 2 ┆ a ┆ v[2,a] │
2193
+ │ 2 ┆ b ┆ v[2,b] │
2194
+ │ 3 ┆ a ┆ v[3,a] │
2195
+ │ 3 ┆ b ┆ v[3,b] │
2196
+ └──────┴──────┴──────────┘
2197
+
2198
+ >>> m.v2 = Variable(df[["dim1"]])
2199
+ Traceback (most recent call last):
2200
+ ...
2201
+ ValueError: Duplicate rows found in input data.
2202
+ >>> m.v3 = Variable(df[["dim1"]].drop_duplicates())
2203
+ >>> m.v3
2204
+ <Variable 'v3' height=3>
2205
+ ┌──────┬──────────┐
2206
+ │ dim1 ┆ variable │
2207
+ │ (3) ┆ │
2208
+ ╞══════╪══════════╡
2209
+ │ 1 ┆ v3[1] │
2210
+ │ 2 ┆ v3[2] │
2211
+ │ 3 ┆ v3[3] │
2212
+ └──────┴──────────┘
2213
+ """
2214
+
2215
+ # TODO: Breaking change, remove support for Iterable[AcceptableSets]
2216
+ def __init__(
2217
+ self,
2218
+ *indexing_sets: SetTypes | Iterable[SetTypes],
2219
+ lb: float | int | SupportsToExpr | None = None,
2220
+ ub: float | int | SupportsToExpr | None = None,
2221
+ vtype: VType | VTypeValue = VType.CONTINUOUS,
2222
+ equals: SupportsToExpr | None = None,
2223
+ ):
2224
+ if equals is not None:
2225
+ assert len(indexing_sets) == 0, (
2226
+ "Cannot specify both 'equals' and 'indexing_sets'"
2227
+ )
2228
+ indexing_sets = (equals,)
2229
+
2230
+ data = Set(*indexing_sets).data if len(indexing_sets) > 0 else pl.DataFrame()
2231
+ super().__init__(data)
2232
+
2233
+ self.vtype: VType = VType(vtype)
2234
+ self._attr = Container(self._set_attribute, self._get_attribute)
2235
+ self._equals = equals
2236
+
2237
+ if lb is not None and not isinstance(lb, (float, int)):
2238
+ self._lb_expr, self.lb = lb, None
2239
+ else:
2240
+ self._lb_expr, self.lb = None, lb
2241
+ if ub is not None and not isinstance(ub, (float, int)):
2242
+ self._ub_expr, self.ub = ub, None
2243
+ else:
2244
+ self._ub_expr, self.ub = None, ub
2245
+
2246
+ @property
2247
+ def attr(self) -> Container:
2248
+ """Allows reading and writing variable attributes similarly to [Model.attr][pyoframe.Model.attr]."""
2249
+ return self._attr
2250
+
2251
+ def _set_attribute(self, name, value):
2252
+ self._assert_has_ids()
2253
+ col_name = name
2254
+ try:
2255
+ name = poi.VariableAttribute[name]
2256
+ setter = self._model.poi.set_variable_attribute
2257
+ except KeyError:
2258
+ setter = self._model.poi.set_variable_raw_attribute
2259
+
2260
+ if self.dimensions is None:
2261
+ for key in self.data.get_column(VAR_KEY):
2262
+ setter(poi.VariableIndex(key), name, value)
2263
+ else:
2264
+ for key, v in (
2265
+ self.data.join(
2266
+ value,
2267
+ on=self.dimensions,
2268
+ maintain_order="left" if Config.maintain_order else None,
2269
+ )
2270
+ .select(pl.col(VAR_KEY), pl.col(col_name))
2271
+ .iter_rows()
2272
+ ):
2273
+ setter(poi.VariableIndex(key), name, v)
2274
+
2275
+ @unwrap_single_values
2276
+ def _get_attribute(self, name):
2277
+ self._assert_has_ids()
2278
+ col_name = name
2279
+ try:
2280
+ name = poi.VariableAttribute[name]
2281
+ getter = self._model.poi.get_variable_attribute
2282
+ except KeyError:
2283
+ getter = self._model.poi.get_variable_raw_attribute
2284
+
2285
+ ids = self.data.get_column(VAR_KEY).to_list()
2286
+ attr = [getter(poi.VariableIndex(v_id), name) for v_id in ids]
2287
+ data = self.data.with_columns(pl.Series(attr).alias(col_name))
2288
+ return data.select(self._dimensions_unsafe + [col_name])
2289
+
2290
+ def _assign_ids(self):
2291
+ assert self._model is not None
2292
+ assert self.name is not None
2293
+
2294
+ solver = self._model.solver
2295
+ if solver.supports_integer_variables:
2296
+ domain = self.vtype._to_poi()
2297
+ else:
2298
+ if self.vtype != VType.CONTINUOUS:
2299
+ raise ValueError(
2300
+ f"Solver {solver.name} does not support integer or binary variables."
2301
+ )
2302
+
2303
+ lb = -1e100 if self.lb is None else float(self.lb)
2304
+ ub = 1e100 if self.ub is None else float(self.ub)
2305
+
2306
+ poi_add_var = self._model.poi.add_variable
2307
+
2308
+ dims = self.dimensions
2309
+
2310
+ dynamic_names = dims is not None and self._model.solver_uses_variable_names
2311
+ if dynamic_names:
2312
+ names = concat_dimensions(self.data, prefix=self.name)[
2313
+ "concated_dim"
2314
+ ].to_list()
2315
+ if solver.supports_integer_variables:
2316
+ ids = [poi_add_var(domain, lb, ub, name).index for name in names]
2317
+ else:
2318
+ ids = [poi_add_var(lb, ub, name=name).index for name in names]
2319
+ else:
2320
+ if self._model.solver_uses_variable_names:
2321
+ name = self.name
2322
+ elif solver.block_auto_names:
2323
+ name = "V"
2324
+ else:
2325
+ name = ""
2326
+
2327
+ n = 1 if dims is None else len(self.data)
2328
+
2329
+ if solver.supports_integer_variables:
2330
+ ids = [poi_add_var(domain, lb, ub, name).index for _ in range(n)]
2331
+ else:
2332
+ ids = [poi_add_var(lb, ub, name=name).index for _ in range(n)]
2333
+
2334
+ df = self.data.with_columns(pl.Series(ids, dtype=KEY_TYPE).alias(VAR_KEY))
2335
+
2336
+ self._data = df
2337
+
2338
+ def _on_add_to_model(self, model, name):
2339
+ super()._on_add_to_model(model, name)
2340
+ self._assign_ids()
2341
+ if self._lb_expr is not None:
2342
+ setattr(model, f"{name}_lb", self._lb_expr <= self)
2343
+
2344
+ if self._ub_expr is not None:
2345
+ setattr(model, f"{name}_ub", self <= self._ub_expr)
2346
+
2347
+ if self._equals is not None:
2348
+ setattr(model, f"{name}_equals", self == self._equals)
2349
+
2350
+ @classmethod
2351
+ def _get_id_column_name(cls):
2352
+ return VAR_KEY
2353
+
2354
+ @property
2355
+ @unwrap_single_values
2356
+ def solution(self):
2357
+ """Retrieves a variable's optimal value after the model has been solved.
2358
+
2359
+ Return type is a DataFrame if the variable has dimensions, otherwise it is a single value.
2360
+ Binary and integer variables are returned as integers.
2361
+
2362
+ Examples:
2363
+ >>> m = pf.Model()
2364
+ >>> m.var_continuous = pf.Variable({"dim1": [1, 2, 3]}, lb=5, ub=5)
2365
+ >>> m.var_integer = pf.Variable(
2366
+ ... {"dim1": [1, 2, 3]}, lb=4.5, ub=5.5, vtype=pf.VType.INTEGER
2367
+ ... )
2368
+ >>> m.var_dimensionless = pf.Variable(
2369
+ ... lb=4.5, ub=5.5, vtype=pf.VType.INTEGER
2370
+ ... )
2371
+ >>> m.var_continuous.solution
2372
+ Traceback (most recent call last):
2373
+ ...
2374
+ RuntimeError: Failed to retrieve solution for variable. Are you sure the model has been solved?
2375
+ >>> m.optimize()
2376
+ >>> m.var_continuous.solution
2377
+ shape: (3, 2)
2378
+ ┌──────┬──────────┐
2379
+ │ dim1 ┆ solution │
2380
+ │ --- ┆ --- │
2381
+ │ i64 ┆ f64 │
2382
+ ╞══════╪══════════╡
2383
+ │ 1 ┆ 5.0 │
2384
+ │ 2 ┆ 5.0 │
2385
+ │ 3 ┆ 5.0 │
2386
+ └──────┴──────────┘
2387
+ >>> m.var_integer.solution
2388
+ shape: (3, 2)
2389
+ ┌──────┬──────────┐
2390
+ │ dim1 ┆ solution │
2391
+ │ --- ┆ --- │
2392
+ │ i64 ┆ i64 │
2393
+ ╞══════╪══════════╡
2394
+ │ 1 ┆ 5 │
2395
+ │ 2 ┆ 5 │
2396
+ │ 3 ┆ 5 │
2397
+ └──────┴──────────┘
2398
+ >>> m.var_dimensionless.solution
2399
+ 5
2400
+ """
2401
+ try:
2402
+ solution = self.attr.Value
2403
+ except RuntimeError as e:
2404
+ raise RuntimeError(
2405
+ "Failed to retrieve solution for variable. Are you sure the model has been solved?"
2406
+ ) from e
2407
+ if isinstance(solution, pl.DataFrame):
2408
+ solution = solution.rename({"Value": SOLUTION_KEY})
2409
+
2410
+ if self.vtype in [VType.BINARY, VType.INTEGER]:
2411
+ if isinstance(solution, pl.DataFrame):
2412
+ # TODO handle values that are out of bounds of Int64 (i.e. when problem is unbounded)
2413
+ solution = solution.with_columns(
2414
+ pl.col("solution").alias("solution_float"),
2415
+ pl.col("solution").round().cast(pl.Int64),
2416
+ )
2417
+ if Config.integer_tolerance != 0:
2418
+ df = solution.filter(
2419
+ (pl.col("solution_float") - pl.col("solution")).abs()
2420
+ > Config.integer_tolerance
2421
+ )
2422
+ assert df.is_empty(), (
2423
+ f"Variable {self.name} has a non-integer value: {df}\nThis should not happen."
2424
+ )
2425
+ solution = solution.drop("solution_float")
2426
+ else:
2427
+ solution_float = solution
2428
+ solution = int(round(solution))
2429
+ if Config.integer_tolerance != 0:
2430
+ assert abs(solution - solution_float) < Config.integer_tolerance, (
2431
+ f"Value of variable {self.name} is not an integer: {solution}. This should not happen."
2432
+ )
2433
+
2434
+ return solution
2435
+
2436
+ def __repr__(self):
2437
+ result = (
2438
+ get_obj_repr(
2439
+ self,
2440
+ self.name,
2441
+ lb=self.lb,
2442
+ ub=self.ub,
2443
+ height=self.data.height if self.dimensions else None,
2444
+ )
2445
+ + "\n"
2446
+ )
2447
+ if self._has_ids:
2448
+ result += self.to_expr().to_str(str_col_name="variable")
2449
+ else:
2450
+ with Config.print_polars_config:
2451
+ data = self._add_shape_to_columns(self.data)
2452
+ # we don't try to include the allowed_new_dims because there are none for Variables (only exist on Expression or Sets)
2453
+ result += repr(data)
2454
+
2455
+ return result
2456
+
2457
+ def to_expr(self) -> Expression:
2458
+ """Converts the Variable to an Expression."""
2459
+ self._assert_has_ids()
2460
+ return self._new(self.data.drop(SOLUTION_KEY, strict=False), self.name) # pyright: ignore[reportArgumentType], we know it's safe after _assert_has_ids()
2461
+
2462
+ def _new(self, data: pl.DataFrame, name: str) -> Expression:
2463
+ self._assert_has_ids()
2464
+ e = Expression(data.with_columns(pl.lit(1.0).alias(COEF_KEY)), name)
2465
+ e._model = self._model
2466
+ return e
2467
+
2468
+ @return_new
2469
+ def next(self, dim: str, wrap_around: bool = False):
2470
+ """Creates an expression where the variable at each index is the next variable in the specified dimension.
2471
+
2472
+ Parameters:
2473
+ dim:
2474
+ The dimension over which to shift the variable.
2475
+ wrap_around:
2476
+ If `True`, the last index in the dimension is connected to the first index.
2477
+
2478
+ Examples:
2479
+ >>> import pandas as pd
2480
+ >>> time_dim = pd.DataFrame({"time": ["00:00", "06:00", "12:00", "18:00"]})
2481
+ >>> space_dim = pd.DataFrame({"city": ["Toronto", "Berlin"]})
2482
+ >>> m = pf.Model()
2483
+ >>> m.bat_charge = pf.Variable(time_dim, space_dim)
2484
+ >>> m.bat_flow = pf.Variable(time_dim, space_dim)
2485
+ >>> # Fails because the dimensions are not the same
2486
+ >>> m.bat_charge + m.bat_flow == m.bat_charge.next("time")
2487
+ Traceback (most recent call last):
2488
+ ...
2489
+ pyoframe._constants.PyoframeError: Cannot subtract the two expressions below because of unmatched values.
2490
+ Expression 1: (bat_charge + bat_flow)
2491
+ Expression 2: bat_charge.next(…)
2492
+ Unmatched values:
2493
+ shape: (2, 4)
2494
+ ┌───────┬─────────┬────────────┬────────────┐
2495
+ │ time ┆ city ┆ time_right ┆ city_right │
2496
+ │ --- ┆ --- ┆ --- ┆ --- │
2497
+ │ str ┆ str ┆ str ┆ str │
2498
+ ╞═══════╪═════════╪════════════╪════════════╡
2499
+ │ 18:00 ┆ Toronto ┆ null ┆ null │
2500
+ │ 18:00 ┆ Berlin ┆ null ┆ null │
2501
+ └───────┴─────────┴────────────┴────────────┘
2502
+ If this is intentional, use .drop_unmatched() or .keep_unmatched().
2503
+
2504
+ >>> (m.bat_charge + m.bat_flow).drop_unmatched() == m.bat_charge.next(
2505
+ ... "time"
2506
+ ... )
2507
+ <Constraint 'unnamed' height=6 terms=18 type=linear>
2508
+ ┌───────┬─────────┬────────────────────────────────────────────────────────────────────────────────┐
2509
+ │ time ┆ city ┆ constraint │
2510
+ │ (3) ┆ (2) ┆ │
2511
+ ╞═══════╪═════════╪════════════════════════════════════════════════════════════════════════════════╡
2512
+ │ 00:00 ┆ Toronto ┆ bat_charge[00:00,Toronto] + bat_flow[00:00,Toronto] │
2513
+ │ ┆ ┆ - bat_charge[06:00,Toronto] = 0 │
2514
+ │ 00:00 ┆ Berlin ┆ bat_charge[00:00,Berlin] + bat_flow[00:00,Berlin] - bat_charge[06:00,Berlin] │
2515
+ │ ┆ ┆ = 0 │
2516
+ │ 06:00 ┆ Toronto ┆ bat_charge[06:00,Toronto] + bat_flow[06:00,Toronto] │
2517
+ │ ┆ ┆ - bat_charge[12:00,Toronto] = 0 │
2518
+ │ 06:00 ┆ Berlin ┆ bat_charge[06:00,Berlin] + bat_flow[06:00,Berlin] - bat_charge[12:00,Berlin] │
2519
+ │ ┆ ┆ = 0 │
2520
+ │ 12:00 ┆ Toronto ┆ bat_charge[12:00,Toronto] + bat_flow[12:00,Toronto] │
2521
+ │ ┆ ┆ - bat_charge[18:00,Toronto] = 0 │
2522
+ │ 12:00 ┆ Berlin ┆ bat_charge[12:00,Berlin] + bat_flow[12:00,Berlin] - bat_charge[18:00,Berlin] │
2523
+ │ ┆ ┆ = 0 │
2524
+ └───────┴─────────┴────────────────────────────────────────────────────────────────────────────────┘
2525
+
2526
+ >>> (m.bat_charge + m.bat_flow) == m.bat_charge.next(
2527
+ ... "time", wrap_around=True
2528
+ ... )
2529
+ <Constraint 'unnamed' height=8 terms=24 type=linear>
2530
+ ┌───────┬─────────┬────────────────────────────────────────────────────────────────────────────────┐
2531
+ │ time ┆ city ┆ constraint │
2532
+ │ (4) ┆ (2) ┆ │
2533
+ ╞═══════╪═════════╪════════════════════════════════════════════════════════════════════════════════╡
2534
+ │ 00:00 ┆ Toronto ┆ bat_charge[00:00,Toronto] + bat_flow[00:00,Toronto] │
2535
+ │ ┆ ┆ - bat_charge[06:00,Toronto] = 0 │
2536
+ │ 00:00 ┆ Berlin ┆ bat_charge[00:00,Berlin] + bat_flow[00:00,Berlin] - bat_charge[06:00,Berlin] │
2537
+ │ ┆ ┆ = 0 │
2538
+ │ 06:00 ┆ Toronto ┆ bat_charge[06:00,Toronto] + bat_flow[06:00,Toronto] │
2539
+ │ ┆ ┆ - bat_charge[12:00,Toronto] = 0 │
2540
+ │ 06:00 ┆ Berlin ┆ bat_charge[06:00,Berlin] + bat_flow[06:00,Berlin] - bat_charge[12:00,Berlin] │
2541
+ │ ┆ ┆ = 0 │
2542
+ │ 12:00 ┆ Toronto ┆ bat_charge[12:00,Toronto] + bat_flow[12:00,Toronto] │
2543
+ │ ┆ ┆ - bat_charge[18:00,Toronto] = 0 │
2544
+ │ 12:00 ┆ Berlin ┆ bat_charge[12:00,Berlin] + bat_flow[12:00,Berlin] - bat_charge[18:00,Berlin] │
2545
+ │ ┆ ┆ = 0 │
2546
+ │ 18:00 ┆ Toronto ┆ bat_charge[18:00,Toronto] + bat_flow[18:00,Toronto] │
2547
+ │ ┆ ┆ - bat_charge[00:00,Toronto] = 0 │
2548
+ │ 18:00 ┆ Berlin ┆ bat_charge[18:00,Berlin] + bat_flow[18:00,Berlin] - bat_charge[00:00,Berlin] │
2549
+ │ ┆ ┆ = 0 │
2550
+ └───────┴─────────┴────────────────────────────────────────────────────────────────────────────────┘
2551
+
2552
+ """
2553
+ wrapped = (
2554
+ self.data.select(dim)
2555
+ .unique(maintain_order=Config.maintain_order)
2556
+ .sort(by=dim)
2557
+ )
2558
+ wrapped = wrapped.with_columns(pl.col(dim).shift(-1).alias("__next"))
2559
+ if wrap_around:
2560
+ wrapped = wrapped.with_columns(pl.col("__next").fill_null(pl.first(dim)))
2561
+ else:
2562
+ wrapped = wrapped.drop_nulls(dim)
2563
+
2564
+ expr = self.to_expr()
2565
+ data = expr.data.rename({dim: "__prev"})
2566
+
2567
+ data = data.join(
2568
+ wrapped,
2569
+ left_on="__prev",
2570
+ right_on="__next",
2571
+ # We use "right" instead of "left" to maintain consistency with the behavior without maintain_order
2572
+ maintain_order="right" if Config.maintain_order else None,
2573
+ ).drop(["__prev", "__next"], strict=False)
2574
+
2575
+ return data