pyoframe 0.2.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyoframe/_core.py ADDED
@@ -0,0 +1,2652 @@
1
+ """Defines several core Pyoframe objects including Set, Constraint, Variable, and Expression."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import warnings
6
+ from abc import abstractmethod
7
+ from collections.abc import Iterable, Mapping, Sequence
8
+ from typing import TYPE_CHECKING, Literal, Union, overload
9
+
10
+ import pandas as pd
11
+ import polars as pl
12
+ import pyoptinterface as poi
13
+
14
+ from pyoframe._arithmetic import (
15
+ _get_dimensions,
16
+ _simplify_expr_df,
17
+ add,
18
+ multiply,
19
+ )
20
+ from pyoframe._constants import (
21
+ COEF_KEY,
22
+ CONST_TERM,
23
+ CONSTRAINT_KEY,
24
+ DUAL_KEY,
25
+ QUAD_VAR_KEY,
26
+ RESERVED_COL_KEYS,
27
+ SOLUTION_KEY,
28
+ VAR_KEY,
29
+ Config,
30
+ ConstraintSense,
31
+ ExtrasStrategy,
32
+ ObjSense,
33
+ PyoframeError,
34
+ VType,
35
+ VTypeValue,
36
+ )
37
+ from pyoframe._model_element import BaseBlock
38
+ from pyoframe._utils import (
39
+ Container,
40
+ FuncArgs,
41
+ cast_coef_to_string,
42
+ concat_dimensions,
43
+ get_obj_repr,
44
+ pairwise,
45
+ parse_inputs_as_iterable,
46
+ return_new,
47
+ unwrap_single_values,
48
+ )
49
+
50
+ if TYPE_CHECKING: # pragma: no cover
51
+ from pyoframe._model import Model
52
+
53
+ Operable = Union["BaseOperableBlock", pl.DataFrame, pd.DataFrame, pd.Series, int, float]
54
+ """Any of the following objects: `int`, `float`, [Variable][pyoframe.Variable], [Expression][pyoframe.Expression], [Set][pyoframe.Set], polars or pandas DataFrame, or pandas Series."""
55
+
56
+
57
+ class BaseOperableBlock(BaseBlock):
58
+ """Any object that can be converted into an expression."""
59
+
60
+ def __init__(self, *args, **kwargs):
61
+ self._extras_strategy = ExtrasStrategy.UNSET
62
+ self._allowed_new_dims: list[str] = []
63
+ super().__init__(*args, **kwargs)
64
+
65
+ @abstractmethod
66
+ def _new(self, data: pl.DataFrame, name: str) -> BaseOperableBlock:
67
+ """Helper method to create a new instance of the same (or for Variable derivative) class."""
68
+
69
+ def _copy_flags(self, other: BaseOperableBlock):
70
+ """Copies the flags from another BaseOperableBlock object."""
71
+ self._extras_strategy = other._extras_strategy
72
+ self._allowed_new_dims = other._allowed_new_dims.copy()
73
+
74
+ def keep_extras(self):
75
+ """Indicates that labels not present in the other expression should be kept during addition, subtraction, or constraint creation.
76
+
77
+ [Learn more](../../learn/concepts/addition.md) about addition modifiers.
78
+
79
+ See Also:
80
+ [`drop_extras`][pyoframe.Expression.drop_extras].
81
+ """
82
+ new = self._new(self.data, name=f"{self.name}.keep_extras()")
83
+ new._copy_flags(self)
84
+ new._extras_strategy = ExtrasStrategy.KEEP
85
+ return new
86
+
87
+ def drop_extras(self):
88
+ """Indicates that labels not present in the other expression should be discarded during addition, subtraction, or constraint creation.
89
+
90
+ [Learn more](../../learn/concepts/addition.md) about addition modifiers.
91
+
92
+ See Also:
93
+ [`keep_extras`][pyoframe.Expression.keep_extras].
94
+ """
95
+ new = self._new(self.data, name=f"{self.name}.drop_extras()")
96
+ new._copy_flags(self)
97
+ new._extras_strategy = ExtrasStrategy.DROP
98
+ return new
99
+
100
+ def keep_unmatched(self): # pragma: no cover
101
+ """Deprecated, use [`keep_extras`][pyoframe.Expression.keep_extras] instead."""
102
+ warnings.warn(
103
+ "'keep_unmatched' has been renamed to 'keep_extras'. Please use 'keep_extras' instead.",
104
+ DeprecationWarning,
105
+ )
106
+ return self.keep_extras()
107
+
108
+ def drop_unmatched(self): # pragma: no cover
109
+ """Deprecated, use [`drop_extras`][pyoframe.Expression.drop_extras] instead."""
110
+ warnings.warn(
111
+ "'drop_unmatched' has been renamed to 'drop_extras'. Please use 'drop_extras' instead.",
112
+ DeprecationWarning,
113
+ )
114
+ return self.drop_extras()
115
+
116
+ def raise_extras(self):
117
+ """Indicates that labels not present in the other expression should raise an error during addition, subtraction, or constraint creation.
118
+
119
+ This is the default behavior and, as such, this addition modifier should only be used in the rare cases where you want to override a previous use of `keep_extras()` or `drop_extras()`.
120
+
121
+ [Learn more](../../learn/concepts/addition.md) about addition modifiers.
122
+
123
+ See Also:
124
+ [`keep_extras`][pyoframe.Expression.keep_extras] and [`drop_extras`][pyoframe.Expression.drop_extras].
125
+ """
126
+ new = self._new(self.data, name=f"{self.name}.raise_extras()")
127
+ new._copy_flags(self)
128
+ new._extras_strategy = ExtrasStrategy.UNSET
129
+ return new
130
+
131
+ def over(self, *dims: str):
132
+ """Indicates that the expression can be broadcasted over the given dimensions during addition and subtraction."""
133
+ new = self._new(self.data, name=f"{self.name}.over(…)")
134
+ new._copy_flags(self)
135
+ new._allowed_new_dims.extend(dims)
136
+ return new
137
+
138
+ @return_new
139
+ def rename(self, *args, **kwargs):
140
+ """Renames one or several of the object's dimensions.
141
+
142
+ Takes the same arguments as [`polars.DataFrame.rename`](https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.rename.html).
143
+
144
+ See the [portfolio optimization example](../../examples/portfolio_optimization.md) for a usage example.
145
+
146
+ Examples:
147
+ >>> m = pf.Model()
148
+ >>> m.v = pf.Variable(
149
+ ... {"hour": ["00:00", "06:00", "12:00", "18:00"]},
150
+ ... {"city": ["Toronto", "Berlin", "Paris"]},
151
+ ... )
152
+ >>> m.v
153
+ <Variable 'v' height=12>
154
+ ┌───────┬─────────┬──────────────────┐
155
+ │ hour ┆ city ┆ variable │
156
+ │ (4) ┆ (3) ┆ │
157
+ ╞═══════╪═════════╪══════════════════╡
158
+ │ 00:00 ┆ Toronto ┆ v[00:00,Toronto] │
159
+ │ 00:00 ┆ Berlin ┆ v[00:00,Berlin] │
160
+ │ 00:00 ┆ Paris ┆ v[00:00,Paris] │
161
+ │ 06:00 ┆ Toronto ┆ v[06:00,Toronto] │
162
+ │ 06:00 ┆ Berlin ┆ v[06:00,Berlin] │
163
+ │ … ┆ … ┆ … │
164
+ │ 12:00 ┆ Berlin ┆ v[12:00,Berlin] │
165
+ │ 12:00 ┆ Paris ┆ v[12:00,Paris] │
166
+ │ 18:00 ┆ Toronto ┆ v[18:00,Toronto] │
167
+ │ 18:00 ┆ Berlin ┆ v[18:00,Berlin] │
168
+ │ 18:00 ┆ Paris ┆ v[18:00,Paris] │
169
+ └───────┴─────────┴──────────────────┘
170
+
171
+ >>> m.v.rename({"city": "location"})
172
+ <Expression height=12 terms=12 type=linear>
173
+ ┌───────┬──────────┬──────────────────┐
174
+ │ hour ┆ location ┆ expression │
175
+ │ (4) ┆ (3) ┆ │
176
+ ╞═══════╪══════════╪══════════════════╡
177
+ │ 00:00 ┆ Toronto ┆ v[00:00,Toronto] │
178
+ │ 00:00 ┆ Berlin ┆ v[00:00,Berlin] │
179
+ │ 00:00 ┆ Paris ┆ v[00:00,Paris] │
180
+ │ 06:00 ┆ Toronto ┆ v[06:00,Toronto] │
181
+ │ 06:00 ┆ Berlin ┆ v[06:00,Berlin] │
182
+ │ … ┆ … ┆ … │
183
+ │ 12:00 ┆ Berlin ┆ v[12:00,Berlin] │
184
+ │ 12:00 ┆ Paris ┆ v[12:00,Paris] │
185
+ │ 18:00 ┆ Toronto ┆ v[18:00,Toronto] │
186
+ │ 18:00 ┆ Berlin ┆ v[18:00,Berlin] │
187
+ │ 18:00 ┆ Paris ┆ v[18:00,Paris] │
188
+ └───────┴──────────┴──────────────────┘
189
+
190
+ """
191
+ return self.data.rename(*args, **kwargs)
192
+
193
+ @return_new
194
+ def with_columns(self, *args, **kwargs):
195
+ """Creates a new object with modified columns.
196
+
197
+ Takes the same arguments as [`polars.DataFrame.with_columns`](https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.with_columns.html).
198
+
199
+ !!! warning
200
+ Only use this function if you know what you're doing. It is not recommended to manually modify the columns
201
+ within a Pyoframe object.
202
+ """
203
+ return self.data.with_columns(*args, **kwargs)
204
+
205
+ @return_new
206
+ def filter(self, *args, **kwargs):
207
+ """Creates a copy of the object containing only a subset of the original rows.
208
+
209
+ Takes the same arguments as [`polars.DataFrame.filter`](https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.filter.html).
210
+
211
+ See Also:
212
+ [`Expression.pick`][pyoframe.Expression.pick] or [`Variable.pick`][pyoframe.Variable.pick] if you wish to drop the filtered
213
+ column in the process.
214
+
215
+ """
216
+ return self.data.filter(*args, **kwargs)
217
+
218
+ @return_new
219
+ def pick(self, **kwargs):
220
+ """Filters elements by the given criteria and then drops the filtered dimensions.
221
+
222
+ Examples:
223
+ >>> m = pf.Model()
224
+ >>> m.v = pf.Variable(
225
+ ... [
226
+ ... {"hour": ["00:00", "06:00", "12:00", "18:00"]},
227
+ ... {"city": ["Toronto", "Berlin", "Paris"]},
228
+ ... ]
229
+ ... )
230
+ >>> m.v.pick(hour="06:00")
231
+ <Expression height=3 terms=3 type=linear>
232
+ ┌─────────┬──────────────────┐
233
+ │ city ┆ expression │
234
+ │ (3) ┆ │
235
+ ╞═════════╪══════════════════╡
236
+ │ Toronto ┆ v[06:00,Toronto] │
237
+ │ Berlin ┆ v[06:00,Berlin] │
238
+ │ Paris ┆ v[06:00,Paris] │
239
+ └─────────┴──────────────────┘
240
+ >>> m.v.pick(hour="06:00", city="Toronto")
241
+ <Expression terms=1 type=linear>
242
+ v[06:00,Toronto]
243
+
244
+ See Also:
245
+ [`Expression.filter`][pyoframe.Expression.filter] or [`Variable.filter`][pyoframe.Variable.filter] if you don't wish to drop the filtered column.
246
+ """
247
+ return self.data.filter(**kwargs).drop(kwargs.keys())
248
+
249
+ def _add_allowed_new_dims_to_df(self, df):
250
+ cols = df.columns
251
+ df = df.with_columns(*(pl.lit("*").alias(c) for c in self._allowed_new_dims))
252
+ df = df.select(cols[:-1] + self._allowed_new_dims + [cols[-1]]) # reorder
253
+ return df
254
+
255
+ def add_dim(self, *dims: str): # pragma: no cover
256
+ """Deprecated, use [`over`][pyoframe.Expression.over] instead."""
257
+ warnings.warn(
258
+ "'add_dim' has been renamed to 'over'. Please use 'over' instead.",
259
+ DeprecationWarning,
260
+ )
261
+ return self.over(*dims)
262
+
263
+ @abstractmethod
264
+ def to_expr(self) -> Expression:
265
+ """Converts the object to a Pyoframe Expression."""
266
+ ...
267
+
268
+ def sum(self, *args, **kwargs):
269
+ """Converts the object to an expression (see `.to_expr()`) and then applies [`Expression.sum`][pyoframe.Expression.sum]."""
270
+ return self.to_expr().sum(*args, **kwargs)
271
+
272
+ def sum_by(self, *args, **kwargs):
273
+ """Converts the object to an expression (see `.to_expr()`) and then applies [`Expression.sum_by`][pyoframe.Expression.sum_by]."""
274
+ return self.to_expr().sum_by(*args, **kwargs)
275
+
276
+ def map(self, *args, **kwargs):
277
+ """Converts the object to an expression (see `.to_expr()`) and then applies [`Expression.map`][pyoframe.Expression.map]."""
278
+ return self.to_expr().map(*args, **kwargs)
279
+
280
+ def __add__(self, *args, **kwargs):
281
+ return self.to_expr().__add__(*args, **kwargs)
282
+
283
+ def __mul__(self, *args, **kwargs):
284
+ return self.to_expr().__mul__(*args, **kwargs)
285
+
286
+ def __pow__(self, power: int):
287
+ """Supports squaring expressions.
288
+
289
+ Examples:
290
+ >>> m = pf.Model()
291
+ >>> m.v = pf.Variable()
292
+ >>> m.v**2
293
+ <Expression terms=1 type=quadratic>
294
+ v * v
295
+ >>> m.v**3
296
+ Traceback (most recent call last):
297
+ ...
298
+ ValueError: Raising an expressions to **3 is not supported. Expressions can only be squared (**2).
299
+ """
300
+ if power == 2:
301
+ res = self * self
302
+ res.name = f"({self.name}**2)"
303
+ return res
304
+ raise ValueError(
305
+ f"Raising an expressions to **{power} is not supported. Expressions can only be squared (**2)."
306
+ )
307
+
308
+ def __neg__(self):
309
+ res = self.to_expr() * -1
310
+ res.name = f"-{self.name}"
311
+ res._copy_flags(self)
312
+ return res
313
+
314
+ def __sub__(self, other):
315
+ """Subtracts a value from this Expression.
316
+
317
+ Examples:
318
+ >>> import polars as pl
319
+ >>> m = pf.Model()
320
+ >>> df = pl.DataFrame({"dim1": [1, 2, 3], "value": [1, 2, 3]})
321
+ >>> m.v = pf.Variable(df["dim1"])
322
+ >>> m.v - df
323
+ <Expression height=3 terms=6 type=linear>
324
+ ┌──────┬────────────┐
325
+ │ dim1 ┆ expression │
326
+ │ (3) ┆ │
327
+ ╞══════╪════════════╡
328
+ │ 1 ┆ v[1] -1 │
329
+ │ 2 ┆ v[2] -2 │
330
+ │ 3 ┆ v[3] -3 │
331
+ └──────┴────────────┘
332
+ """
333
+ if not isinstance(other, (int, float)):
334
+ other = other.to_expr()
335
+ return self.to_expr() + (-other)
336
+
337
+ def __rmul__(self, other):
338
+ return self.to_expr() * other
339
+
340
+ def __radd__(self, other):
341
+ return self.to_expr() + other
342
+
343
+ def __truediv__(self, other):
344
+ """Divides this expression.
345
+
346
+ Examples:
347
+ Support division.
348
+ >>> m = pf.Model()
349
+ >>> m.v = Variable({"dim1": [1, 2, 3]})
350
+ >>> m.v / 2
351
+ <Expression height=3 terms=3 type=linear>
352
+ ┌──────┬────────────┐
353
+ │ dim1 ┆ expression │
354
+ │ (3) ┆ │
355
+ ╞══════╪════════════╡
356
+ │ 1 ┆ 0.5 v[1] │
357
+ │ 2 ┆ 0.5 v[2] │
358
+ │ 3 ┆ 0.5 v[3] │
359
+ └──────┴────────────┘
360
+ """
361
+ return self.to_expr() * (1 / other)
362
+
363
+ def __rsub__(self, other):
364
+ """Supports right subtraction.
365
+
366
+ Examples:
367
+ >>> m = pf.Model()
368
+ >>> m.v = Variable({"dim1": [1, 2, 3]})
369
+ >>> 1 - m.v
370
+ <Expression height=3 terms=6 type=linear>
371
+ ┌──────┬────────────┐
372
+ │ dim1 ┆ expression │
373
+ │ (3) ┆ │
374
+ ╞══════╪════════════╡
375
+ │ 1 ┆ 1 - v[1] │
376
+ │ 2 ┆ 1 - v[2] │
377
+ │ 3 ┆ 1 - v[3] │
378
+ └──────┴────────────┘
379
+ """
380
+ return other + (-self.to_expr())
381
+
382
+ def __le__(self, other):
383
+ return Constraint(self - other, ConstraintSense.LE)
384
+
385
+ def __lt__(self, _):
386
+ raise PyoframeError(
387
+ "Constraints cannot be created with the '<' or '>' operators. Did you mean to use '<=' or '>=' instead?"
388
+ )
389
+
390
+ def __ge__(self, other):
391
+ return Constraint(self - other, ConstraintSense.GE)
392
+
393
+ def __gt__(self, _):
394
+ raise PyoframeError(
395
+ "Constraints cannot be created with the '<' or '>' operator. Did you mean to use '<=' or '>=' instead?"
396
+ )
397
+
398
+ def __eq__(self, value: object): # type: ignore
399
+ return Constraint(self - value, ConstraintSense.EQ)
400
+
401
+
402
+ SetTypes = Union[
403
+ pl.DataFrame,
404
+ pd.Index,
405
+ pd.DataFrame,
406
+ BaseOperableBlock,
407
+ Mapping[str, Sequence[object]],
408
+ "Set",
409
+ "Constraint",
410
+ ]
411
+
412
+
413
+ class Set(BaseOperableBlock):
414
+ """A set which can then be used to index variables.
415
+
416
+ Examples:
417
+ >>> pf.Set(x=range(2), y=range(3))
418
+ <Set 'unnamed' height=6>
419
+ ┌─────┬─────┐
420
+ │ x ┆ y │
421
+ │ (2) ┆ (3) │
422
+ ╞═════╪═════╡
423
+ │ 0 ┆ 0 │
424
+ │ 0 ┆ 1 │
425
+ │ 0 ┆ 2 │
426
+ │ 1 ┆ 0 │
427
+ │ 1 ┆ 1 │
428
+ │ 1 ┆ 2 │
429
+ └─────┴─────┘
430
+ """
431
+
432
+ def __init__(self, *data: SetTypes | Iterable[SetTypes], **named_data):
433
+ data_list = list(data)
434
+ for name, set in named_data.items():
435
+ data_list.append({name: set})
436
+ df = self._parse_acceptable_sets(*data_list)
437
+ if not df.is_empty() and df.is_duplicated().any():
438
+ raise ValueError("Duplicate rows found in input data.")
439
+ super().__init__(df, name="unnamed_set")
440
+
441
+ def _new(self, data: pl.DataFrame, name: str) -> Set:
442
+ s = Set(data)
443
+ s.name = name
444
+ s._model = self._model
445
+ return s
446
+
447
+ @staticmethod
448
+ def _parse_acceptable_sets(
449
+ *over: SetTypes | Iterable[SetTypes],
450
+ ) -> pl.DataFrame:
451
+ """Computes the cartesian product of the given sets.
452
+
453
+ Examples:
454
+ >>> import pandas as pd
455
+ >>> dim1 = pd.Index([1, 2, 3], name="dim1")
456
+ >>> dim2 = pd.Index(["a", "b"], name="dim1")
457
+ >>> Set._parse_acceptable_sets([dim1, dim2])
458
+ Traceback (most recent call last):
459
+ ...
460
+ AssertionError: Dimension 'dim1' is not unique.
461
+ >>> dim2.name = "dim2"
462
+ >>> Set._parse_acceptable_sets([dim1, dim2])
463
+ shape: (6, 2)
464
+ ┌──────┬──────┐
465
+ │ dim1 ┆ dim2 │
466
+ │ --- ┆ --- │
467
+ │ i64 ┆ str │
468
+ ╞══════╪══════╡
469
+ │ 1 ┆ a │
470
+ │ 1 ┆ b │
471
+ │ 2 ┆ a │
472
+ │ 2 ┆ b │
473
+ │ 3 ┆ a │
474
+ │ 3 ┆ b │
475
+ └──────┴──────┘
476
+ """
477
+ assert len(over) > 0, "At least one set must be provided."
478
+ over_iter: Iterable[SetTypes] = parse_inputs_as_iterable(*over)
479
+
480
+ over_frames: list[pl.DataFrame] = [Set._set_to_polars(set) for set in over_iter]
481
+
482
+ over_merged = over_frames[0]
483
+
484
+ for df in over_frames[1:]:
485
+ overlap_dims = set(over_merged.columns) & set(df.columns)
486
+ assert not overlap_dims, (
487
+ f"Dimension '{tuple(overlap_dims)[0]}' is not unique."
488
+ )
489
+ over_merged = over_merged.join(df, how="cross")
490
+ return over_merged
491
+
492
+ def to_expr(self) -> Expression:
493
+ """Converts the Set to an Expression equal to 1 for each index.
494
+
495
+ Useful when multiplying a Set by an Expression.
496
+ """
497
+ return Expression(
498
+ self.data.with_columns(
499
+ pl.lit(1).alias(COEF_KEY), pl.lit(CONST_TERM).alias(VAR_KEY)
500
+ ),
501
+ name=self.name,
502
+ )
503
+
504
+ def drop(self, *dims: str) -> Set:
505
+ """Returns a new Set with the given dimensions dropped.
506
+
507
+ Only unique rows are kept in the resulting Set.
508
+
509
+ Examples:
510
+ >>> xy = pf.Set(x=range(3), y=range(2))
511
+ >>> xy
512
+ <Set 'unnamed' height=6>
513
+ ┌─────┬─────┐
514
+ │ x ┆ y │
515
+ │ (3) ┆ (2) │
516
+ ╞═════╪═════╡
517
+ │ 0 ┆ 0 │
518
+ │ 0 ┆ 1 │
519
+ │ 1 ┆ 0 │
520
+ │ 1 ┆ 1 │
521
+ │ 2 ┆ 0 │
522
+ │ 2 ┆ 1 │
523
+ └─────┴─────┘
524
+ >>> x = xy.drop("y")
525
+ >>> x
526
+ <Set 'unnamed_set.drop(…)' height=3>
527
+ ┌─────┐
528
+ │ x │
529
+ │ (3) │
530
+ ╞═════╡
531
+ │ 0 │
532
+ │ 1 │
533
+ │ 2 │
534
+ └─────┘
535
+ """
536
+ if not dims:
537
+ raise ValueError("At least one dimension must be provided to drop.")
538
+ return self._new(
539
+ self.data.drop(dims).unique(maintain_order=Config.maintain_order),
540
+ name=f"{self.name}.drop(…)",
541
+ )
542
+
543
+ def __mul__(self, other):
544
+ if isinstance(other, Set):
545
+ overlap_dims = set(self.data.columns) & set(other.data.columns)
546
+ assert not overlap_dims, (
547
+ f"Cannot multiply the two sets because dimension '{tuple(overlap_dims)[0]}' is present in both sets."
548
+ )
549
+ return Set(self.data, other.data)
550
+ return super().__mul__(other)
551
+
552
+ def __add__(self, other):
553
+ # TODO replace with bitwise or
554
+ if isinstance(other, Set):
555
+ try:
556
+ return self._new(
557
+ pl.concat([self.data, other.data]).unique(
558
+ maintain_order=Config.maintain_order
559
+ ),
560
+ name=f"({self.name} + {other.name})",
561
+ )
562
+ except pl.exceptions.ShapeError as e:
563
+ if "unable to vstack, column names don't match" in str(e):
564
+ raise PyoframeError(
565
+ f"Failed to add sets '{self.name}' and '{other.name}' because dimensions do not match ({self.dimensions} != {other.dimensions}) "
566
+ ) from e
567
+ raise e # pragma: no cover
568
+
569
+ return super().__add__(other)
570
+
571
+ def __repr__(self):
572
+ header = get_obj_repr(
573
+ self,
574
+ "unnamed" if self.name == "unnamed_set" else self.name,
575
+ height=self.data.height,
576
+ )
577
+ data = self._add_shape_to_columns(self.data)
578
+ data = self._add_allowed_new_dims_to_df(data)
579
+ with Config.print_polars_config:
580
+ table = repr(data)
581
+
582
+ return header + "\n" + table
583
+
584
+ @staticmethod
585
+ def _set_to_polars(set: SetTypes) -> pl.DataFrame:
586
+ if isinstance(set, dict):
587
+ df = pl.DataFrame(set)
588
+ elif isinstance(set, Constraint):
589
+ df = set.data.select(set._dimensions_unsafe)
590
+ elif isinstance(set, BaseOperableBlock):
591
+ df = (
592
+ set.to_expr()
593
+ .data.drop(RESERVED_COL_KEYS, strict=False)
594
+ .unique(maintain_order=Config.maintain_order)
595
+ )
596
+ elif isinstance(set, pd.Index):
597
+ df = pl.from_pandas(pd.DataFrame(index=set).reset_index())
598
+ elif isinstance(set, pd.DataFrame):
599
+ df = pl.from_pandas(set)
600
+ elif isinstance(set, pl.DataFrame):
601
+ df = set
602
+ elif isinstance(set, pl.Series):
603
+ df = set.to_frame()
604
+ elif isinstance(set, pd.Series):
605
+ if not set.name:
606
+ raise ValueError("Cannot convert an unnamed Pandas Series to a Set.")
607
+ df = pl.from_pandas(set).to_frame()
608
+ elif isinstance(set, Set):
609
+ df = set.data
610
+ elif isinstance(set, range):
611
+ raise ValueError(
612
+ "Cannot convert a range to a set without a dimension name. Try Set(dim_name=range(...))"
613
+ )
614
+ else:
615
+ raise ValueError(f"Cannot convert type {type(set)} to a polars DataFrame")
616
+
617
+ if "index" in df.columns:
618
+ raise ValueError(
619
+ "Please specify a custom dimension name rather than using 'index' to avoid confusion."
620
+ )
621
+
622
+ for reserved_key in RESERVED_COL_KEYS:
623
+ if reserved_key in df.columns:
624
+ raise ValueError(
625
+ f"Cannot use reserved column names {reserved_key} as dimensions."
626
+ )
627
+
628
+ return df
629
+
630
+
631
+ class Expression(BaseOperableBlock):
632
+ """Represents a linear or quadratic mathematical expression.
633
+
634
+ Examples:
635
+ >>> import pandas as pd
636
+ >>> df = pd.DataFrame(
637
+ ... {
638
+ ... "item": [1, 1, 1, 2, 2],
639
+ ... "time": ["mon", "tue", "wed", "mon", "tue"],
640
+ ... "cost": [1, 2, 3, 4, 5],
641
+ ... }
642
+ ... ).set_index(["item", "time"])
643
+ >>> m = pf.Model()
644
+ >>> m.Time = pf.Variable(df.index)
645
+ >>> m.Size = pf.Variable(df.index)
646
+ >>> expr = df["cost"] * m.Time + df["cost"] * m.Size
647
+ >>> expr
648
+ <Expression height=5 terms=10 type=linear>
649
+ ┌──────┬──────┬──────────────────────────────┐
650
+ │ item ┆ time ┆ expression │
651
+ │ (2) ┆ (3) ┆ │
652
+ ╞══════╪══════╪══════════════════════════════╡
653
+ │ 1 ┆ mon ┆ Time[1,mon] + Size[1,mon] │
654
+ │ 1 ┆ tue ┆ 2 Time[1,tue] +2 Size[1,tue] │
655
+ │ 1 ┆ wed ┆ 3 Time[1,wed] +3 Size[1,wed] │
656
+ │ 2 ┆ mon ┆ 4 Time[2,mon] +4 Size[2,mon] │
657
+ │ 2 ┆ tue ┆ 5 Time[2,tue] +5 Size[2,tue] │
658
+ └──────┴──────┴──────────────────────────────┘
659
+ """
660
+
661
+ def __init__(self, data: pl.DataFrame, name: str | None = None):
662
+ # Sanity checks, VAR_KEY and COEF_KEY must be present
663
+ assert VAR_KEY in data.columns, "Missing variable column."
664
+ assert COEF_KEY in data.columns, "Missing coefficient column."
665
+
666
+ # Sanity check no duplicates labels
667
+ if Config.enable_is_duplicated_expression_safety_check:
668
+ duplicated_mask = data.drop(COEF_KEY).is_duplicated()
669
+ # In theory this should never happen unless there's a bug in the library
670
+ if duplicated_mask.any():
671
+ duplicated_data = data.filter(duplicated_mask)
672
+ raise ValueError(
673
+ f"Cannot create an expression with duplicate labels:\n{duplicated_data}."
674
+ )
675
+
676
+ data = _simplify_expr_df(data)
677
+
678
+ if name is None:
679
+ warnings.warn(
680
+ "Expression should be given a name to support troubleshooting.",
681
+ UserWarning,
682
+ )
683
+
684
+ super().__init__(data)
685
+ else:
686
+ super().__init__(data, name=name)
687
+
688
+ @classmethod
689
+ def constant(cls, constant: int | float) -> Expression:
690
+ """Creates a new expression equal to the given constant.
691
+
692
+ Examples:
693
+ >>> pf.Expression.constant(5)
694
+ <Expression terms=1 type=constant>
695
+ 5
696
+ """
697
+ return cls(
698
+ pl.DataFrame(
699
+ {
700
+ COEF_KEY: [constant],
701
+ VAR_KEY: [CONST_TERM],
702
+ },
703
+ schema={COEF_KEY: pl.Float64, VAR_KEY: Config.id_dtype},
704
+ ),
705
+ name=str(constant),
706
+ )
707
+
708
+ @return_new
709
+ def sum(self, *over: str):
710
+ """Sums an expression over specified dimensions.
711
+
712
+ If no dimensions are specified, the sum is taken over all of the expression's dimensions.
713
+
714
+ Examples:
715
+ >>> expr = pl.DataFrame(
716
+ ... {
717
+ ... "time": ["mon", "tue", "wed", "mon", "tue"],
718
+ ... "place": [
719
+ ... "Toronto",
720
+ ... "Toronto",
721
+ ... "Toronto",
722
+ ... "Vancouver",
723
+ ... "Vancouver",
724
+ ... ],
725
+ ... "tiktok_posts": [1e6, 3e6, 2e6, 1e6, 2e6],
726
+ ... }
727
+ ... ).to_expr()
728
+ >>> expr
729
+ <Expression height=5 terms=5 type=constant>
730
+ ┌──────┬───────────┬────────────┐
731
+ │ time ┆ place ┆ expression │
732
+ │ (3) ┆ (2) ┆ │
733
+ ╞══════╪═══════════╪════════════╡
734
+ │ mon ┆ Toronto ┆ 1000000 │
735
+ │ tue ┆ Toronto ┆ 3000000 │
736
+ │ wed ┆ Toronto ┆ 2000000 │
737
+ │ mon ┆ Vancouver ┆ 1000000 │
738
+ │ tue ┆ Vancouver ┆ 2000000 │
739
+ └──────┴───────────┴────────────┘
740
+ >>> expr.sum("time")
741
+ <Expression height=2 terms=2 type=constant>
742
+ ┌───────────┬────────────┐
743
+ │ place ┆ expression │
744
+ │ (2) ┆ │
745
+ ╞═══════════╪════════════╡
746
+ │ Toronto ┆ 6000000 │
747
+ │ Vancouver ┆ 3000000 │
748
+ └───────────┴────────────┘
749
+ >>> expr.sum()
750
+ <Expression terms=1 type=constant>
751
+ 9000000
752
+
753
+ If the given dimensions don't exist, an error will be raised:
754
+
755
+ >>> expr.sum("city")
756
+ Traceback (most recent call last):
757
+ ...
758
+ AssertionError: Cannot sum over ['city'] as it is not in ['time', 'place']
759
+
760
+ See Also:
761
+ [pyoframe.Expression.sum_by][] for summing over all dimensions _except_ those that are specified.
762
+ """
763
+ dims = self.dimensions
764
+ if dims is None:
765
+ raise ValueError("Cannot sum a dimensionless expression.")
766
+ if not over:
767
+ over = tuple(dims)
768
+ assert set(over) <= set(dims), (
769
+ f"Cannot sum over {list(over)} as it is not in {dims}"
770
+ )
771
+ remaining_dims = [dim for dim in dims if dim not in over]
772
+
773
+ return (
774
+ self.data.drop(over)
775
+ .group_by(
776
+ remaining_dims + self._variable_columns,
777
+ maintain_order=Config.maintain_order,
778
+ )
779
+ .sum()
780
+ )
781
+
782
+ def sum_by(self, *by: str):
783
+ """Like [`Expression.sum`][pyoframe.Expression.sum], but the sum is taken over all dimensions *except* those specified in `by` (just like a `group_by().sum()` operation).
784
+
785
+ Examples:
786
+ >>> expr = pl.DataFrame(
787
+ ... {
788
+ ... "time": ["mon", "tue", "wed", "mon", "tue"],
789
+ ... "place": [
790
+ ... "Toronto",
791
+ ... "Toronto",
792
+ ... "Toronto",
793
+ ... "Vancouver",
794
+ ... "Vancouver",
795
+ ... ],
796
+ ... "tiktok_posts": [1e6, 3e6, 2e6, 1e6, 2e6],
797
+ ... }
798
+ ... ).to_expr()
799
+ >>> expr
800
+ <Expression height=5 terms=5 type=constant>
801
+ ┌──────┬───────────┬────────────┐
802
+ │ time ┆ place ┆ expression │
803
+ │ (3) ┆ (2) ┆ │
804
+ ╞══════╪═══════════╪════════════╡
805
+ │ mon ┆ Toronto ┆ 1000000 │
806
+ │ tue ┆ Toronto ┆ 3000000 │
807
+ │ wed ┆ Toronto ┆ 2000000 │
808
+ │ mon ┆ Vancouver ┆ 1000000 │
809
+ │ tue ┆ Vancouver ┆ 2000000 │
810
+ └──────┴───────────┴────────────┘
811
+
812
+ >>> expr.sum_by("place")
813
+ <Expression height=2 terms=2 type=constant>
814
+ ┌───────────┬────────────┐
815
+ │ place ┆ expression │
816
+ │ (2) ┆ │
817
+ ╞═══════════╪════════════╡
818
+ │ Toronto ┆ 6000000 │
819
+ │ Vancouver ┆ 3000000 │
820
+ └───────────┴────────────┘
821
+
822
+ If the specified dimensions don't exist, an error will be raised:
823
+
824
+ >>> expr.sum_by("city")
825
+ Traceback (most recent call last):
826
+ ...
827
+ ValueError: Cannot sum by ['city'] because it is not a valid dimension. The expression's dimensions are: ['time', 'place'].
828
+
829
+ >>> total_sum = expr.sum()
830
+ >>> total_sum.sum_by("time")
831
+ Traceback (most recent call last):
832
+ ...
833
+ ValueError: Cannot sum a dimensionless expression.
834
+
835
+ See Also:
836
+ [pyoframe.Expression.sum][] for summing over specified dimensions.
837
+ """
838
+ if not by:
839
+ raise ValueError("sum_by requires at least 1 argument.")
840
+ dims = self.dimensions
841
+ if dims is None:
842
+ raise ValueError("Cannot sum a dimensionless expression.")
843
+ if not set(by) <= set(dims):
844
+ raise ValueError(
845
+ f"Cannot sum by {list(set(by) - set(dims))} because it is not a valid dimension. The expression's dimensions are: {list(dims)}."
846
+ )
847
+ remaining_dims = [dim for dim in dims if dim not in by]
848
+ return self.sum(*remaining_dims)
849
+
850
+ @property
851
+ def _variable_columns(self) -> list[str]:
852
+ if self.is_quadratic:
853
+ return [VAR_KEY, QUAD_VAR_KEY]
854
+ else:
855
+ return [VAR_KEY]
856
+
857
+ def map(self, mapping_set: SetTypes, drop_shared_dims: bool = True) -> Expression:
858
+ """Replaces the dimensions that are shared with mapping_set with the other dimensions found in mapping_set.
859
+
860
+ This is particularly useful to go from one type of dimensions to another. For example, to convert data that
861
+ is indexed by city to data indexed by country (see example).
862
+
863
+ Parameters:
864
+ mapping_set:
865
+ The set to map the expression to. This can be a DataFrame, Index, or another Set.
866
+ drop_shared_dims:
867
+ If `True`, the dimensions shared between the expression and the mapping set are dropped from the resulting expression and
868
+ repeated rows are summed.
869
+ If `False`, the shared dimensions are kept in the resulting expression.
870
+
871
+ Returns:
872
+ A new Expression containing the result of the mapping operation.
873
+
874
+ Examples:
875
+ >>> import polars as pl
876
+ >>> pop_data = pl.DataFrame(
877
+ ... {
878
+ ... "city": ["Toronto", "Vancouver", "Boston"],
879
+ ... "year": [2024, 2024, 2024],
880
+ ... "population": [10, 2, 8],
881
+ ... }
882
+ ... ).to_expr()
883
+ >>> cities_and_countries = pl.DataFrame(
884
+ ... {
885
+ ... "city": ["Toronto", "Vancouver", "Boston"],
886
+ ... "country": ["Canada", "Canada", "USA"],
887
+ ... }
888
+ ... )
889
+ >>> pop_data.map(cities_and_countries)
890
+ <Expression height=2 terms=2 type=constant>
891
+ ┌──────┬─────────┬────────────┐
892
+ │ year ┆ country ┆ expression │
893
+ │ (1) ┆ (2) ┆ │
894
+ ╞══════╪═════════╪════════════╡
895
+ │ 2024 ┆ Canada ┆ 12 │
896
+ │ 2024 ┆ USA ┆ 8 │
897
+ └──────┴─────────┴────────────┘
898
+
899
+ >>> pop_data.map(cities_and_countries, drop_shared_dims=False)
900
+ <Expression height=3 terms=3 type=constant>
901
+ ┌───────────┬──────┬─────────┬────────────┐
902
+ │ city ┆ year ┆ country ┆ expression │
903
+ │ (3) ┆ (1) ┆ (2) ┆ │
904
+ ╞═══════════╪══════╪═════════╪════════════╡
905
+ │ Toronto ┆ 2024 ┆ Canada ┆ 10 │
906
+ │ Vancouver ┆ 2024 ┆ Canada ┆ 2 │
907
+ │ Boston ┆ 2024 ┆ USA ┆ 8 │
908
+ └───────────┴──────┴─────────┴────────────┘
909
+ """
910
+ mapping_set = Set(mapping_set)
911
+
912
+ dims = self.dimensions
913
+ if dims is None:
914
+ raise ValueError("Cannot use .map() on an expression with no dimensions.")
915
+
916
+ mapping_dims = mapping_set.dimensions
917
+ if mapping_dims is None:
918
+ raise ValueError(
919
+ "Cannot use .map() with a mapping set containing no dimensions."
920
+ )
921
+
922
+ shared_dims = [dim for dim in dims if dim in mapping_dims]
923
+ if not shared_dims:
924
+ raise ValueError(
925
+ f"Cannot apply .map() as there are no shared dimensions between the expression (dims={self.dimensions}) and the mapping set (dims={mapping_set.dimensions})."
926
+ )
927
+
928
+ mapped_expression = self * mapping_set
929
+
930
+ if drop_shared_dims:
931
+ mapped_expression = mapped_expression.sum(*shared_dims)
932
+
933
+ mapped_expression.name = f"{self.name}.map(…)"
934
+
935
+ return mapped_expression
936
+
937
+ @return_new
938
+ def rolling_sum(self, over: str, window_size: int):
939
+ """Calculates the rolling sum of the Expression over a specified window size for a given dimension.
940
+
941
+ This method applies a rolling sum operation over the dimension specified by `over`,
942
+ using a window defined by `window_size`.
943
+
944
+
945
+ Parameters:
946
+ over:
947
+ The name of the dimension (column) over which the rolling sum is calculated.
948
+ This dimension must exist within the Expression's dimensions.
949
+ window_size:
950
+ The size of the moving window in terms of number of records.
951
+ The rolling sum is calculated over this many consecutive elements.
952
+
953
+ Returns:
954
+ A new Expression instance containing the result of the rolling sum operation.
955
+ This new Expression retains all dimensions (columns) of the original data,
956
+ with the rolling sum applied over the specified dimension.
957
+
958
+ Examples:
959
+ >>> import polars as pl
960
+ >>> cost = pl.DataFrame(
961
+ ... {
962
+ ... "item": [1, 1, 1, 2, 2],
963
+ ... "time": [1, 2, 3, 1, 2],
964
+ ... "cost": [1, 2, 3, 4, 5],
965
+ ... }
966
+ ... )
967
+ >>> m = pf.Model()
968
+ >>> m.quantity = pf.Variable(cost[["item", "time"]])
969
+ >>> (m.quantity * cost).rolling_sum(over="time", window_size=2)
970
+ <Expression height=5 terms=8 type=linear>
971
+ ┌──────┬──────┬──────────────────────────────────┐
972
+ │ item ┆ time ┆ expression │
973
+ │ (2) ┆ (3) ┆ │
974
+ ╞══════╪══════╪══════════════════════════════════╡
975
+ │ 1 ┆ 1 ┆ quantity[1,1] │
976
+ │ 1 ┆ 2 ┆ quantity[1,1] +2 quantity[1,2] │
977
+ │ 1 ┆ 3 ┆ 2 quantity[1,2] +3 quantity[1,3] │
978
+ │ 2 ┆ 1 ┆ 4 quantity[2,1] │
979
+ │ 2 ┆ 2 ┆ 4 quantity[2,1] +5 quantity[2,2] │
980
+ └──────┴──────┴──────────────────────────────────┘
981
+ """
982
+ dims = self.dimensions
983
+ if dims is None:
984
+ raise ValueError(
985
+ "Cannot use rolling_sum() with an expression with no dimensions."
986
+ )
987
+ assert over in dims, f"Cannot sum over {over} as it is not in {dims}"
988
+ remaining_dims = [dim for dim in dims if dim not in over]
989
+
990
+ return pl.concat(
991
+ [
992
+ df.with_columns(pl.col(over).max())
993
+ for _, df in self.data.rolling(
994
+ index_column=over,
995
+ period=f"{window_size}i",
996
+ group_by=remaining_dims,
997
+ )
998
+ ]
999
+ )
1000
+
1001
+ @return_new
1002
+ def within(self, set: SetTypes):
1003
+ """Filters this expression to only include the dimensions within the provided set.
1004
+
1005
+ Examples:
1006
+ >>> import pandas as pd
1007
+ >>> general_expr = pd.DataFrame(
1008
+ ... {"dim1": [1, 2, 3], "value": [1, 2, 3]}
1009
+ ... ).to_expr()
1010
+ >>> filter_expr = pd.DataFrame({"dim1": [1, 3], "value": [5, 6]}).to_expr()
1011
+ >>> general_expr.within(filter_expr).data
1012
+ shape: (2, 3)
1013
+ ┌──────┬─────────┬───────────────┐
1014
+ │ dim1 ┆ __coeff ┆ __variable_id │
1015
+ │ --- ┆ --- ┆ --- │
1016
+ │ i64 ┆ f64 ┆ u32 │
1017
+ ╞══════╪═════════╪═══════════════╡
1018
+ │ 1 ┆ 1.0 ┆ 0 │
1019
+ │ 3 ┆ 3.0 ┆ 0 │
1020
+ └──────┴─────────┴───────────────┘
1021
+ """
1022
+ df: pl.DataFrame = Set(set).data
1023
+ set_dims = _get_dimensions(df)
1024
+ assert set_dims is not None, (
1025
+ "Cannot use .within() with a set with no dimensions."
1026
+ )
1027
+ dims = self.dimensions
1028
+ assert dims is not None, (
1029
+ "Cannot use .within() with an expression with no dimensions."
1030
+ )
1031
+ dims_in_common = [dim for dim in dims if dim in set_dims]
1032
+ by_dims = df.select(dims_in_common).unique(maintain_order=Config.maintain_order)
1033
+ return self.data.join(
1034
+ by_dims,
1035
+ on=dims_in_common,
1036
+ maintain_order="left" if Config.maintain_order else None,
1037
+ )
1038
+
1039
+ @property
1040
+ def is_quadratic(self) -> bool:
1041
+ """Returns `True` if the expression is quadratic, False otherwise.
1042
+
1043
+ Computes in O(1) since expressions are quadratic if and
1044
+ only if self.data contain the QUAD_VAR_KEY column.
1045
+
1046
+ Examples:
1047
+ >>> import pandas as pd
1048
+ >>> m = pf.Model()
1049
+ >>> m.v = Variable()
1050
+ >>> expr = pd.DataFrame({"dim1": [1, 2, 3], "value": [1, 2, 3]}) * m.v
1051
+ >>> expr *= m.v
1052
+ >>> expr.is_quadratic
1053
+ True
1054
+ """
1055
+ return QUAD_VAR_KEY in self.data.columns
1056
+
1057
+ @overload
1058
+ def degree(self, return_str: Literal[False] = False) -> int: ...
1059
+
1060
+ @overload
1061
+ def degree(self, return_str: Literal[True] = True) -> str: ...
1062
+
1063
+ def degree(self, return_str: bool = False) -> int | str:
1064
+ """Returns the degree of the expression (0=constant, 1=linear, 2=quadratic).
1065
+
1066
+ Parameters:
1067
+ return_str: If `True`, returns the degree as a string (`"constant"`, `"linear"`, or `"quadratic"`).
1068
+ If `False`, returns the degree as an integer (0, 1, or 2).
1069
+
1070
+ Examples:
1071
+ >>> import pandas as pd
1072
+ >>> m = pf.Model()
1073
+ >>> m.v1 = pf.Variable()
1074
+ >>> m.v2 = pf.Variable()
1075
+ >>> expr = pd.DataFrame({"dim1": [1, 2, 3], "value": [1, 2, 3]}).to_expr()
1076
+ >>> expr.degree()
1077
+ 0
1078
+ >>> expr *= m.v1
1079
+ >>> expr.degree()
1080
+ 1
1081
+ >>> expr += (m.v2**2).over("dim1")
1082
+ >>> expr.degree()
1083
+ 2
1084
+ >>> expr.degree(return_str=True)
1085
+ 'quadratic'
1086
+ """
1087
+ if self.is_quadratic:
1088
+ return "quadratic" if return_str else 2
1089
+ # TODO improve performance of .evaluate() by ensuring early exit if linear
1090
+ elif (self.data.get_column(VAR_KEY) != CONST_TERM).any():
1091
+ return "linear" if return_str else 1
1092
+ else:
1093
+ return "constant" if return_str else 0
1094
+
1095
+ def __add__(self, other):
1096
+ """Adds another expression or a constant to this expression.
1097
+
1098
+ Examples:
1099
+ >>> import pandas as pd
1100
+ >>> m = pf.Model()
1101
+ >>> add = pd.DataFrame({"dim1": [1, 2, 3], "add": [10, 20, 30]}).to_expr()
1102
+ >>> m.v = Variable(add)
1103
+ >>> m.v + add
1104
+ <Expression height=3 terms=6 type=linear>
1105
+ ┌──────┬────────────┐
1106
+ │ dim1 ┆ expression │
1107
+ │ (3) ┆ │
1108
+ ╞══════╪════════════╡
1109
+ │ 1 ┆ v[1] +10 │
1110
+ │ 2 ┆ v[2] +20 │
1111
+ │ 3 ┆ v[3] +30 │
1112
+ └──────┴────────────┘
1113
+
1114
+ >>> m.v + add + 2
1115
+ <Expression height=3 terms=6 type=linear>
1116
+ ┌──────┬────────────┐
1117
+ │ dim1 ┆ expression │
1118
+ │ (3) ┆ │
1119
+ ╞══════╪════════════╡
1120
+ │ 1 ┆ 12 + v[1] │
1121
+ │ 2 ┆ 22 + v[2] │
1122
+ │ 3 ┆ 32 + v[3] │
1123
+ └──────┴────────────┘
1124
+
1125
+ >>> m.v + pd.DataFrame({"dim1": [1, 2], "add": [10, 20]})
1126
+ Traceback (most recent call last):
1127
+ ...
1128
+ pyoframe._constants.PyoframeError: Cannot add the two expressions below because expression 1 has extra labels.
1129
+ Expression 1: v
1130
+ Expression 2: add
1131
+ Extra labels in expression 1:
1132
+ ┌──────┐
1133
+ │ dim1 │
1134
+ ╞══════╡
1135
+ │ 3 │
1136
+ └──────┘
1137
+ Use .drop_extras() or .keep_extras() to indicate how the extra labels should be handled. Learn more at
1138
+ https://bravos-power.github.io/pyoframe/latest/learn/concepts/addition
1139
+ >>> m.v2 = Variable()
1140
+ >>> 5 + 2 * m.v2
1141
+ <Expression terms=2 type=linear>
1142
+ 2 v2 +5
1143
+ """
1144
+ if isinstance(other, (int, float)):
1145
+ return self._add_const(other)
1146
+ other = other.to_expr()
1147
+ self._learn_from_other(other)
1148
+ return add(self, other)
1149
+
1150
+ def __mul__(self: Expression, other: Operable) -> Expression:
1151
+ if isinstance(other, (int, float)):
1152
+ if other == 1:
1153
+ return self
1154
+ return self._new(
1155
+ self.data.with_columns(pl.col(COEF_KEY) * other),
1156
+ name=f"({other} * {self.name})",
1157
+ )
1158
+
1159
+ other = other.to_expr()
1160
+ self._learn_from_other(other)
1161
+ return multiply(self, other)
1162
+
1163
+ def to_expr(self) -> Expression:
1164
+ """Returns the expression itself."""
1165
+ return self
1166
+
1167
+ def _learn_from_other(self, other: Expression):
1168
+ if self._model is None and other._model is not None:
1169
+ self._model = other._model
1170
+
1171
+ def _new(self, data: pl.DataFrame, name: str) -> Expression:
1172
+ e = Expression(data, name)
1173
+ e._model = self._model
1174
+ return e
1175
+
1176
+ def _add_const(self, const: int | float) -> Expression:
1177
+ """Adds a constant to the expression.
1178
+
1179
+ Examples:
1180
+ >>> m = pf.Model()
1181
+ >>> m.x1 = Variable()
1182
+ >>> m.x2 = Variable()
1183
+ >>> m.x1 + 5
1184
+ <Expression terms=2 type=linear>
1185
+ x1 +5
1186
+ >>> m.x1**2 + 5
1187
+ <Expression terms=2 type=quadratic>
1188
+ x1 * x1 +5
1189
+ >>> m.x1**2 + m.x2 + 5
1190
+ <Expression terms=3 type=quadratic>
1191
+ x1 * x1 + x2 +5
1192
+
1193
+ It also works with dimensions
1194
+
1195
+ >>> m = pf.Model()
1196
+ >>> m.v = Variable({"dim1": [1, 2, 3]})
1197
+ >>> m.v * m.v + 5
1198
+ <Expression height=3 terms=6 type=quadratic>
1199
+ ┌──────┬─────────────────┐
1200
+ │ dim1 ┆ expression │
1201
+ │ (3) ┆ │
1202
+ ╞══════╪═════════════════╡
1203
+ │ 1 ┆ 5 + v[1] * v[1] │
1204
+ │ 2 ┆ 5 + v[2] * v[2] │
1205
+ │ 3 ┆ 5 + v[3] * v[3] │
1206
+ └──────┴─────────────────┘
1207
+ """
1208
+ if const == 0:
1209
+ return self
1210
+ dim = self.dimensions
1211
+ data = self.data
1212
+ # Fill in missing constant terms
1213
+ if not dim:
1214
+ if CONST_TERM not in data[VAR_KEY]:
1215
+ const_df = pl.DataFrame(
1216
+ {COEF_KEY: [0.0], VAR_KEY: [CONST_TERM]},
1217
+ schema={COEF_KEY: pl.Float64, VAR_KEY: Config.id_dtype},
1218
+ )
1219
+ if self.is_quadratic:
1220
+ const_df = const_df.with_columns(
1221
+ pl.lit(CONST_TERM).alias(QUAD_VAR_KEY).cast(Config.id_dtype)
1222
+ )
1223
+ data = pl.concat(
1224
+ [data, const_df],
1225
+ how="vertical_relaxed",
1226
+ )
1227
+ else:
1228
+ keys = (
1229
+ data.select(dim)
1230
+ .unique(maintain_order=Config.maintain_order)
1231
+ .with_columns(pl.lit(CONST_TERM).alias(VAR_KEY).cast(Config.id_dtype))
1232
+ )
1233
+ if self.is_quadratic:
1234
+ keys = keys.with_columns(
1235
+ pl.lit(CONST_TERM).alias(QUAD_VAR_KEY).cast(Config.id_dtype)
1236
+ )
1237
+ data = data.join(
1238
+ keys,
1239
+ on=dim + self._variable_columns,
1240
+ how="full",
1241
+ coalesce=True,
1242
+ # We use right_left not left_right to bring the constants near the front for better readability
1243
+ maintain_order="right_left" if Config.maintain_order else None,
1244
+ ).with_columns(pl.col(COEF_KEY).fill_null(0.0))
1245
+
1246
+ data = data.with_columns(
1247
+ pl.when(pl.col(VAR_KEY) == CONST_TERM)
1248
+ .then(pl.col(COEF_KEY) + const)
1249
+ .otherwise(pl.col(COEF_KEY))
1250
+ )
1251
+
1252
+ name = f"({self.name} + {const})" if const >= 0 else f"({self.name} - {-const})"
1253
+ return self._new(data, name=name)
1254
+
1255
+ @property
1256
+ def constant_terms(self) -> pl.DataFrame:
1257
+ """Returns all the constant terms in the expression."""
1258
+ dims = self.dimensions
1259
+ constant_terms = self.data.filter(pl.col(VAR_KEY) == CONST_TERM).drop(VAR_KEY)
1260
+ if self.is_quadratic:
1261
+ constant_terms = constant_terms.drop(QUAD_VAR_KEY)
1262
+ if dims is not None:
1263
+ dims_df = self.data.select(dims).unique(
1264
+ maintain_order=Config.maintain_order
1265
+ )
1266
+ df = constant_terms.join(
1267
+ dims_df,
1268
+ on=dims,
1269
+ how="full",
1270
+ coalesce=True,
1271
+ maintain_order="left_right" if Config.maintain_order else None,
1272
+ )
1273
+ return df.with_columns(pl.col(COEF_KEY).fill_null(0.0))
1274
+ else:
1275
+ if len(constant_terms) == 0:
1276
+ return pl.DataFrame(
1277
+ {COEF_KEY: [0.0], VAR_KEY: [CONST_TERM]},
1278
+ schema={COEF_KEY: pl.Float64, VAR_KEY: Config.id_dtype},
1279
+ )
1280
+ return constant_terms
1281
+
1282
+ @property
1283
+ def variable_terms(self) -> pl.DataFrame:
1284
+ """Returns all the non-constant terms in the expression."""
1285
+ return self.data.filter(pl.col(VAR_KEY) != CONST_TERM)
1286
+
1287
+ @unwrap_single_values
1288
+ def evaluate(self) -> pl.DataFrame:
1289
+ """Computes the value of the expression using the variables' solutions.
1290
+
1291
+ Returns:
1292
+ A Polars `DataFrame` for dimensioned expressions a `float` for dimensionless expressions.
1293
+
1294
+ Examples:
1295
+ >>> m = pf.Model()
1296
+ >>> m.X = pf.Variable({"dim1": [1, 2, 3]}, lb=10, ub=10)
1297
+ >>> m.expr = 2 * m.X * m.X + 1
1298
+
1299
+ >>> m.expr.evaluate()
1300
+ Traceback (most recent call last):
1301
+ ...
1302
+ ValueError: Cannot evaluate the expression 'expr' before calling model.optimize().
1303
+
1304
+ >>> m.constant_expression = m.expr - 2 * m.X * m.X
1305
+ >>> m.constant_expression.evaluate()
1306
+ shape: (3, 2)
1307
+ ┌──────┬──────────┐
1308
+ │ dim1 ┆ solution │
1309
+ │ --- ┆ --- │
1310
+ │ i64 ┆ f64 │
1311
+ ╞══════╪══════════╡
1312
+ │ 1 ┆ 1.0 │
1313
+ │ 2 ┆ 1.0 │
1314
+ │ 3 ┆ 1.0 │
1315
+ └──────┴──────────┘
1316
+
1317
+
1318
+ >>> m.optimize()
1319
+ >>> m.expr.evaluate()
1320
+ shape: (3, 2)
1321
+ ┌──────┬──────────┐
1322
+ │ dim1 ┆ solution │
1323
+ │ --- ┆ --- │
1324
+ │ i64 ┆ f64 │
1325
+ ╞══════╪══════════╡
1326
+ │ 1 ┆ 201.0 │
1327
+ │ 2 ┆ 201.0 │
1328
+ │ 3 ┆ 201.0 │
1329
+ └──────┴──────────┘
1330
+
1331
+ >>> m.expr.sum().evaluate()
1332
+ 603.0
1333
+
1334
+ """
1335
+ assert self._model is not None, (
1336
+ "Expression must be added to the model to use .value"
1337
+ )
1338
+
1339
+ df = self.data.rename({COEF_KEY: SOLUTION_KEY})
1340
+ sm = self._model.poi
1341
+ attr = poi.VariableAttribute.Value
1342
+
1343
+ if self.degree() == 0:
1344
+ df = df.drop(self._variable_columns)
1345
+ elif (
1346
+ self._model.attr.TerminationStatus
1347
+ == poi.TerminationStatusCode.OPTIMIZE_NOT_CALLED
1348
+ ):
1349
+ raise ValueError(
1350
+ f"Cannot evaluate the expression '{self.name}' before calling model.optimize()."
1351
+ )
1352
+ else:
1353
+ for var_col in self._variable_columns:
1354
+ values = [
1355
+ sm.get_variable_attribute(poi.VariableIndex(v_id), attr)
1356
+ for v_id in df.get_column(var_col).to_list()
1357
+ ]
1358
+
1359
+ df = df.drop(var_col).with_columns(
1360
+ pl.col(SOLUTION_KEY) * pl.Series(values, dtype=pl.Float64)
1361
+ )
1362
+
1363
+ dims = self.dimensions
1364
+ if dims is not None:
1365
+ df = df.group_by(dims, maintain_order=Config.maintain_order)
1366
+ return df.sum()
1367
+
1368
+ def _to_poi(self) -> poi.ScalarAffineFunction | poi.ScalarQuadraticFunction:
1369
+ assert self.dimensions is None, (
1370
+ "._to_poi() only works for non-dimensioned expressions."
1371
+ )
1372
+
1373
+ data = self.data
1374
+
1375
+ if self.is_quadratic:
1376
+ # Workaround for bug https://github.com/metab0t/PyOptInterface/issues/59
1377
+ if self._model is None or self._model.solver.name == "highs":
1378
+ data = data.sort(VAR_KEY, QUAD_VAR_KEY, descending=False)
1379
+
1380
+ return poi.ScalarQuadraticFunction(
1381
+ coefficients=data.get_column(COEF_KEY).to_numpy(),
1382
+ var1s=data.get_column(VAR_KEY).to_numpy(),
1383
+ var2s=data.get_column(QUAD_VAR_KEY).to_numpy(),
1384
+ )
1385
+ else:
1386
+ return poi.ScalarAffineFunction(
1387
+ coefficients=data.get_column(COEF_KEY).to_numpy(),
1388
+ variables=data.get_column(VAR_KEY).to_numpy(),
1389
+ )
1390
+
1391
+ @overload
1392
+ def to_str(
1393
+ self,
1394
+ str_col_name: str = "expression",
1395
+ include_const_term: bool = True,
1396
+ return_df: Literal[False] = False,
1397
+ ) -> str: ...
1398
+
1399
+ @overload
1400
+ def to_str(
1401
+ self,
1402
+ str_col_name: str = "expression",
1403
+ include_const_term: bool = True,
1404
+ return_df: Literal[True] = True,
1405
+ ) -> pl.DataFrame: ...
1406
+
1407
+ def to_str(
1408
+ self,
1409
+ str_col_name: str = "expression",
1410
+ include_const_term: bool = True,
1411
+ return_df: bool = False,
1412
+ ) -> str | pl.DataFrame:
1413
+ """Converts the expression to a human-readable string, or several arranged in a table.
1414
+
1415
+ Long expressions are truncated according to [`Config.print_max_terms`][pyoframe._Config.print_max_terms] and [`Config.print_polars_config`][pyoframe._Config.print_polars_config].
1416
+
1417
+ `str(pyoframe.Expression)` is equivalent to `pyoframe.Expression.to_str()`.
1418
+
1419
+ Parameters:
1420
+ str_col_name:
1421
+ The name of the column containing the string representation of the expression (dimensioned expressions only).
1422
+ include_const_term:
1423
+ If `False`, constant terms are omitted from the string representation.
1424
+ return_df:
1425
+ If `True`, returns a DataFrame containing the human-readable strings instead of the DataFrame's string representation.
1426
+
1427
+ Examples:
1428
+ >>> import polars as pl
1429
+ >>> m = pf.Model()
1430
+ >>> x = pf.Set(x=range(1000))
1431
+ >>> y = pf.Set(y=range(1000))
1432
+ >>> m.V = pf.Variable(x, y)
1433
+ >>> expr = 2 * m.V * m.V + 3
1434
+ >>> print(expr.to_str())
1435
+ ┌────────┬────────┬──────────────────────────────┐
1436
+ │ x ┆ y ┆ expression │
1437
+ │ (1000) ┆ (1000) ┆ │
1438
+ ╞════════╪════════╪══════════════════════════════╡
1439
+ │ 0 ┆ 0 ┆ 3 +2 V[0,0] * V[0,0] │
1440
+ │ 0 ┆ 1 ┆ 3 +2 V[0,1] * V[0,1] │
1441
+ │ 0 ┆ 2 ┆ 3 +2 V[0,2] * V[0,2] │
1442
+ │ 0 ┆ 3 ┆ 3 +2 V[0,3] * V[0,3] │
1443
+ │ 0 ┆ 4 ┆ 3 +2 V[0,4] * V[0,4] │
1444
+ │ … ┆ … ┆ … │
1445
+ │ 999 ┆ 995 ┆ 3 +2 V[999,995] * V[999,995] │
1446
+ │ 999 ┆ 996 ┆ 3 +2 V[999,996] * V[999,996] │
1447
+ │ 999 ┆ 997 ┆ 3 +2 V[999,997] * V[999,997] │
1448
+ │ 999 ┆ 998 ┆ 3 +2 V[999,998] * V[999,998] │
1449
+ │ 999 ┆ 999 ┆ 3 +2 V[999,999] * V[999,999] │
1450
+ └────────┴────────┴──────────────────────────────┘
1451
+ >>> expr = expr.sum("y")
1452
+ >>> print(expr.to_str())
1453
+ ┌────────┬─────────────────────────────────────────────────────────────────────────────────────────┐
1454
+ │ x ┆ expression │
1455
+ │ (1000) ┆ │
1456
+ ╞════════╪═════════════════════════════════════════════════════════════════════════════════════════╡
1457
+ │ 0 ┆ 3000 +2 V[0,0] * V[0,0] +2 V[0,1] * V[0,1] +2 V[0,2] * V[0,2] +2 V[0,3] * V[0,3] … │
1458
+ │ 1 ┆ 3000 +2 V[1,0] * V[1,0] +2 V[1,1] * V[1,1] +2 V[1,2] * V[1,2] +2 V[1,3] * V[1,3] … │
1459
+ │ 2 ┆ 3000 +2 V[2,0] * V[2,0] +2 V[2,1] * V[2,1] +2 V[2,2] * V[2,2] +2 V[2,3] * V[2,3] … │
1460
+ │ 3 ┆ 3000 +2 V[3,0] * V[3,0] +2 V[3,1] * V[3,1] +2 V[3,2] * V[3,2] +2 V[3,3] * V[3,3] … │
1461
+ │ 4 ┆ 3000 +2 V[4,0] * V[4,0] +2 V[4,1] * V[4,1] +2 V[4,2] * V[4,2] +2 V[4,3] * V[4,3] … │
1462
+ │ … ┆ … │
1463
+ │ 995 ┆ 3000 +2 V[995,0] * V[995,0] +2 V[995,1] * V[995,1] +2 V[995,2] * V[995,2] +2 V[995,3] * │
1464
+ │ ┆ V[995,3] … │
1465
+ │ 996 ┆ 3000 +2 V[996,0] * V[996,0] +2 V[996,1] * V[996,1] +2 V[996,2] * V[996,2] +2 V[996,3] * │
1466
+ │ ┆ V[996,3] … │
1467
+ │ 997 ┆ 3000 +2 V[997,0] * V[997,0] +2 V[997,1] * V[997,1] +2 V[997,2] * V[997,2] +2 V[997,3] * │
1468
+ │ ┆ V[997,3] … │
1469
+ │ 998 ┆ 3000 +2 V[998,0] * V[998,0] +2 V[998,1] * V[998,1] +2 V[998,2] * V[998,2] +2 V[998,3] * │
1470
+ │ ┆ V[998,3] … │
1471
+ │ 999 ┆ 3000 +2 V[999,0] * V[999,0] +2 V[999,1] * V[999,1] +2 V[999,2] * V[999,2] +2 V[999,3] * │
1472
+ │ ┆ V[999,3] … │
1473
+ └────────┴─────────────────────────────────────────────────────────────────────────────────────────┘
1474
+ >>> expr = expr.sum("x")
1475
+ >>> print(expr.to_str())
1476
+ 3000000 +2 V[0,0] * V[0,0] +2 V[0,1] * V[0,1] +2 V[0,2] * V[0,2] +2 V[0,3] * V[0,3] …
1477
+
1478
+ """
1479
+ # TODO consider optimizing using LazyFrames since .head() could maybe be automatically pushed up the chain of operations.
1480
+ data = self.data if include_const_term else self.variable_terms
1481
+ data = cast_coef_to_string(data)
1482
+
1483
+ for var_col in self._variable_columns:
1484
+ temp_var_column = f"{var_col}_temp"
1485
+ if self._model is not None and self._model._var_map is not None:
1486
+ data = self._model._var_map.apply(
1487
+ data, to_col=temp_var_column, id_col=var_col
1488
+ )
1489
+ else:
1490
+ data = data.with_columns(
1491
+ pl.concat_str(pl.lit("x"), var_col).alias(temp_var_column)
1492
+ )
1493
+ data = data.with_columns(
1494
+ pl.when(pl.col(var_col) == CONST_TERM)
1495
+ .then(pl.lit(""))
1496
+ .otherwise(temp_var_column)
1497
+ .alias(var_col)
1498
+ ).drop(temp_var_column)
1499
+ if self.is_quadratic:
1500
+ data = data.with_columns(
1501
+ pl.when(pl.col(QUAD_VAR_KEY) == "")
1502
+ .then(pl.col(VAR_KEY))
1503
+ .otherwise(pl.concat_str(VAR_KEY, pl.lit(" * "), pl.col(QUAD_VAR_KEY)))
1504
+ .alias(VAR_KEY)
1505
+ ).drop(QUAD_VAR_KEY)
1506
+
1507
+ dimensions = self.dimensions
1508
+
1509
+ # Create a string for each term
1510
+ data = data.with_columns(
1511
+ pl.concat_str(
1512
+ COEF_KEY,
1513
+ pl.lit(" "),
1514
+ VAR_KEY,
1515
+ )
1516
+ .str.strip_chars(characters="  ")
1517
+ .alias(str_col_name)
1518
+ ).drop(COEF_KEY, VAR_KEY)
1519
+
1520
+ if dimensions is not None:
1521
+ data = data.group_by(dimensions, maintain_order=Config.maintain_order).agg(
1522
+ pl.concat_str(
1523
+ pl.col(str_col_name)
1524
+ .head(Config.print_max_terms)
1525
+ .str.join(delimiter=" "),
1526
+ pl.when(pl.len() > Config.print_max_terms)
1527
+ .then(pl.lit(" …"))
1528
+ .otherwise(pl.lit("")),
1529
+ )
1530
+ )
1531
+ else:
1532
+ truncate = data.height > Config.print_max_terms
1533
+ if truncate:
1534
+ data = data.head(Config.print_max_terms)
1535
+
1536
+ data = data.select(pl.col(str_col_name).str.join(delimiter=" "))
1537
+
1538
+ if truncate:
1539
+ data = data.with_columns(
1540
+ pl.concat_str(pl.col(str_col_name), pl.lit(" …"))
1541
+ )
1542
+
1543
+ # Remove leading +
1544
+ data = data.with_columns(pl.col(str_col_name).str.strip_chars(characters="  +"))
1545
+
1546
+ if not return_df:
1547
+ if dimensions is None and not self._allowed_new_dims:
1548
+ data = data.item()
1549
+ else:
1550
+ data = self._add_shape_to_columns(data)
1551
+ data = self._add_allowed_new_dims_to_df(data)
1552
+ with Config.print_polars_config:
1553
+ data = repr(data)
1554
+
1555
+ return data
1556
+
1557
+ def _str_header(self) -> str:
1558
+ """Returns a string representation of the expression's header."""
1559
+ return get_obj_repr(
1560
+ self,
1561
+ height=len(self) if self.dimensions else None,
1562
+ terms=self.terms,
1563
+ type=self.degree(return_str=True),
1564
+ )
1565
+
1566
+ def __repr__(self) -> str:
1567
+ return self._str_header() + "\n" + self.to_str()
1568
+
1569
+ def __str__(self) -> str:
1570
+ return self.to_str()
1571
+
1572
+ @property
1573
+ def terms(self) -> int:
1574
+ """The number of terms across all subexpressions.
1575
+
1576
+ Expressions equal to zero count as one term.
1577
+
1578
+ Examples:
1579
+ >>> import polars as pl
1580
+ >>> m = pf.Model()
1581
+ >>> m.v = pf.Variable({"t": [1, 2]})
1582
+ >>> coef = pl.DataFrame({"t": [1, 2], "coef": [0, 1]})
1583
+ >>> coef * (m.v + 4)
1584
+ <Expression height=2 terms=3 type=linear>
1585
+ ┌─────┬────────────┐
1586
+ │ t ┆ expression │
1587
+ │ (2) ┆ │
1588
+ ╞═════╪════════════╡
1589
+ │ 1 ┆ 0 │
1590
+ │ 2 ┆ 4 + v[2] │
1591
+ └─────┴────────────┘
1592
+ >>> (coef * (m.v + 4)).terms
1593
+ 3
1594
+ """
1595
+ return len(self.data)
1596
+
1597
+
1598
+ @overload
1599
+ def sum(over: str | Sequence[str], expr: Operable) -> Expression: ...
1600
+
1601
+
1602
+ @overload
1603
+ def sum(over: Operable) -> Expression: ...
1604
+
1605
+
1606
+ def sum(
1607
+ over: str | Sequence[str] | Operable,
1608
+ expr: Operable | None = None,
1609
+ ) -> Expression: # pragma: no cover
1610
+ """Deprecated: Use Expression.sum() or Variable.sum() instead.
1611
+
1612
+ Examples:
1613
+ >>> x = pf.Set(x=range(100))
1614
+ >>> pf.sum(x)
1615
+ Traceback (most recent call last):
1616
+ ...
1617
+ DeprecationWarning: pf.sum() is deprecated. Use Expression.sum() or Variable.sum() instead.
1618
+ """
1619
+ warnings.warn(
1620
+ "pf.sum() is deprecated. Use Expression.sum() or Variable.sum() instead.",
1621
+ DeprecationWarning,
1622
+ )
1623
+
1624
+ if expr is None:
1625
+ assert isinstance(over, BaseOperableBlock)
1626
+ return over.to_expr().sum()
1627
+ else:
1628
+ assert isinstance(over, (str, Sequence))
1629
+ if isinstance(over, str):
1630
+ over = (over,)
1631
+ return expr.to_expr().sum(*over)
1632
+
1633
+
1634
+ def sum_by(by: str | Sequence[str], expr: Operable) -> Expression: # pragma: no cover
1635
+ """Deprecated: Use Expression.sum() or Variable.sum() instead."""
1636
+ warnings.warn(
1637
+ "pf.sum_by() is deprecated. Use Expression.sum_by() or Variable.sum_by() instead.",
1638
+ DeprecationWarning,
1639
+ )
1640
+
1641
+ if isinstance(by, str):
1642
+ by = [by]
1643
+ return expr.to_expr().sum_by(*by)
1644
+
1645
+
1646
+ class Constraint(BaseBlock):
1647
+ """An optimization constraint that can be added to a [Model][pyoframe.Model].
1648
+
1649
+ Tip: Implementation Note
1650
+ Pyoframe simplifies constraints by moving all the constraint's mathematical terms to the left-hand side.
1651
+ This way, the right-hand side is always zero, and constraints only need to manage one expression.
1652
+
1653
+ Warning: Use `<=`, `>=`, or `==` operators to create constraints
1654
+ Constraints should be created using the `<=`, `>=`, or `==` operators, not by directly calling the `Constraint` constructor.
1655
+
1656
+ Parameters:
1657
+ lhs:
1658
+ The constraint's left-hand side expression.
1659
+ sense:
1660
+ The sense of the constraint.
1661
+ """
1662
+
1663
+ def __init__(self, lhs: Expression, sense: ConstraintSense):
1664
+ self.lhs: Expression = lhs
1665
+ self._model = lhs._model
1666
+ self.sense = sense
1667
+ self._to_relax: FuncArgs | None = None
1668
+ self._attr = Container(self._set_attribute, self._get_attribute)
1669
+
1670
+ dims = self.lhs.dimensions
1671
+ data = (
1672
+ pl.DataFrame()
1673
+ if dims is None
1674
+ else self.lhs.data.select(dims).unique(maintain_order=Config.maintain_order)
1675
+ )
1676
+
1677
+ super().__init__(data)
1678
+
1679
+ @property
1680
+ def attr(self) -> Container:
1681
+ """Allows reading and writing constraint attributes similarly to [Model.attr][pyoframe.Model.attr]."""
1682
+ return self._attr
1683
+
1684
+ def _set_attribute(self, name, value):
1685
+ self._assert_has_ids()
1686
+ col_name = name
1687
+ try:
1688
+ name = poi.ConstraintAttribute[name]
1689
+ setter = self._model.poi.set_constraint_attribute
1690
+ except KeyError:
1691
+ setter = self._model.poi.set_constraint_raw_attribute
1692
+
1693
+ constr_type = (
1694
+ poi.ConstraintType.Quadratic
1695
+ if self.lhs.is_quadratic
1696
+ else poi.ConstraintType.Linear
1697
+ )
1698
+
1699
+ if self.dimensions is None:
1700
+ for key in self.data.get_column(CONSTRAINT_KEY):
1701
+ setter(poi.ConstraintIndex(constr_type, key), name, value)
1702
+ else:
1703
+ for key, value in (
1704
+ self.data.join(
1705
+ value,
1706
+ on=self.dimensions,
1707
+ maintain_order="left" if Config.maintain_order else None,
1708
+ )
1709
+ .select(pl.col(CONSTRAINT_KEY), pl.col(col_name))
1710
+ .iter_rows()
1711
+ ):
1712
+ setter(poi.ConstraintIndex(constr_type, key), name, value)
1713
+
1714
+ @unwrap_single_values
1715
+ def _get_attribute(self, name):
1716
+ self._assert_has_ids()
1717
+ col_name = name
1718
+ try:
1719
+ name = poi.ConstraintAttribute[name]
1720
+ getter = self._model.poi.get_constraint_attribute
1721
+ except KeyError:
1722
+ getter = self._model.poi.get_constraint_raw_attribute
1723
+
1724
+ constr_type = (
1725
+ poi.ConstraintType.Quadratic
1726
+ if self.lhs.is_quadratic
1727
+ else poi.ConstraintType.Linear
1728
+ )
1729
+
1730
+ ids = self.data.get_column(CONSTRAINT_KEY).to_list()
1731
+ attr = [getter(poi.ConstraintIndex(constr_type, v_id), name) for v_id in ids]
1732
+ data = self.data.with_columns(pl.Series(attr).alias(col_name))
1733
+ return data.select(self._dimensions_unsafe + [col_name])
1734
+
1735
+ def _on_add_to_model(self, model: Model, name: str):
1736
+ super()._on_add_to_model(model, name)
1737
+ if self._to_relax is not None:
1738
+ self.relax(*self._to_relax.args, **self._to_relax.kwargs)
1739
+ self._assign_ids()
1740
+
1741
+ def _assign_ids(self):
1742
+ """This function is the main bottleneck for pyoframe.
1743
+
1744
+ I've spent a lot of time optimizing it.
1745
+ """
1746
+ assert self._model is not None
1747
+
1748
+ is_quadratic = self.lhs.is_quadratic
1749
+ use_var_names = self._model.solver_uses_variable_names
1750
+ sense = self.sense._to_poi()
1751
+ dims = self.dimensions
1752
+ df = self.lhs.data
1753
+ add_constraint = (
1754
+ self._model.poi._add_quadratic_constraint
1755
+ if is_quadratic
1756
+ else self._model.poi._add_linear_constraint
1757
+ )
1758
+
1759
+ # GRBaddconstr uses sprintf when no name or "" is given. sprintf is slow. As such, we specify "C" as the name.
1760
+ # Specifying "" is the same as not specifying anything, see pyoptinterface:
1761
+ # https://github.com/metab0t/PyOptInterface/blob/6d61f3738ad86379cff71fee77077d4ea919f2d5/lib/gurobi_model.cpp#L338
1762
+ name = "C" if self._model.solver.accelerate_with_repeat_names else ""
1763
+
1764
+ if dims is None:
1765
+ if self._model.solver_uses_variable_names:
1766
+ name = self.name
1767
+ create_expression = (
1768
+ poi.ScalarQuadraticFunction
1769
+ if is_quadratic
1770
+ else poi.ScalarAffineFunction.from_numpy # when called only once from_numpy is faster
1771
+ )
1772
+ constr_id = add_constraint(
1773
+ create_expression(
1774
+ *(
1775
+ df.get_column(c).to_numpy()
1776
+ for c in ([COEF_KEY] + self.lhs._variable_columns)
1777
+ )
1778
+ ),
1779
+ sense,
1780
+ 0,
1781
+ name,
1782
+ ).index
1783
+ try:
1784
+ df = self.data.with_columns(
1785
+ pl.lit(constr_id).alias(CONSTRAINT_KEY).cast(Config.id_dtype)
1786
+ )
1787
+ except TypeError as e:
1788
+ raise TypeError(
1789
+ f"Number of constraints exceeds the current data type ({Config.id_dtype}). Consider increasing the data type by changing Config.id_dtype."
1790
+ ) from e
1791
+ else:
1792
+ create_expression = (
1793
+ poi.ScalarQuadraticFunction
1794
+ if is_quadratic
1795
+ else poi.ScalarAffineFunction # when called multiple times the default constructor is fastest
1796
+ )
1797
+ if Config.maintain_order:
1798
+ # This adds a 5-10% overhead on _assign_ids but ensures the order
1799
+ # is the same as the input data
1800
+ df_unique = df.select(dims).unique(maintain_order=True)
1801
+ df = (
1802
+ df.join(
1803
+ df_unique.with_row_index(),
1804
+ on=dims,
1805
+ maintain_order="left",
1806
+ )
1807
+ .sort("index", maintain_order=True)
1808
+ .drop("index")
1809
+ )
1810
+ else:
1811
+ df = df.sort(dims, maintain_order=False)
1812
+ # must maintain order otherwise results are wrong!
1813
+ df_unique = df.select(dims).unique(maintain_order=True)
1814
+ coefs = df.get_column(COEF_KEY).to_list()
1815
+ vars = df.get_column(VAR_KEY).to_list()
1816
+ if is_quadratic:
1817
+ vars2 = df.get_column(QUAD_VAR_KEY).to_list()
1818
+
1819
+ split = (
1820
+ df.lazy()
1821
+ .with_row_index()
1822
+ .filter(pl.struct(dims).is_first_distinct())
1823
+ .select("index")
1824
+ .collect()
1825
+ .to_series()
1826
+ .to_list()
1827
+ ) + [df.height]
1828
+ del df
1829
+
1830
+ # Note: list comprehension was slightly faster than using polars map_elements
1831
+ # Note 2: not specifying the argument name (`expr=`) was also slightly faster.
1832
+ # Note 3: we could have merged the if-else using an expansion operator (*) but that is slow.
1833
+ # Note 4: using kwargs is slow and including the constant term for linear expressions is faster.
1834
+ if use_var_names:
1835
+ names = concat_dimensions(df_unique, prefix=self.name)[
1836
+ "concated_dim"
1837
+ ].to_list()
1838
+ if is_quadratic:
1839
+ ids = [
1840
+ add_constraint(
1841
+ create_expression(coefs[s0:s1], vars[s0:s1], vars2[s0:s1]),
1842
+ sense,
1843
+ 0,
1844
+ names[i],
1845
+ ).index
1846
+ for i, (s0, s1) in enumerate(pairwise(split))
1847
+ ]
1848
+ else:
1849
+ ids = [
1850
+ add_constraint(
1851
+ create_expression(coefs[s0:s1], vars[s0:s1], 0),
1852
+ sense,
1853
+ 0,
1854
+ names[i],
1855
+ ).index
1856
+ for i, (s0, s1) in enumerate(pairwise(split))
1857
+ ]
1858
+ else:
1859
+ if is_quadratic:
1860
+ ids = [
1861
+ add_constraint(
1862
+ create_expression(coefs[s0:s1], vars[s0:s1], vars2[s0:s1]),
1863
+ sense,
1864
+ 0,
1865
+ name,
1866
+ ).index
1867
+ for s0, s1 in pairwise(split)
1868
+ ]
1869
+ else:
1870
+ ids = [
1871
+ add_constraint(
1872
+ create_expression(coefs[s0:s1], vars[s0:s1], 0),
1873
+ sense,
1874
+ 0,
1875
+ name,
1876
+ ).index
1877
+ for s0, s1 in pairwise(split)
1878
+ ]
1879
+ try:
1880
+ df = df_unique.with_columns(
1881
+ pl.Series(ids, dtype=Config.id_dtype).alias(CONSTRAINT_KEY)
1882
+ )
1883
+ except TypeError as e:
1884
+ raise TypeError(
1885
+ f"Number of constraints exceeds the current data type ({Config.id_dtype}). Consider increasing the data type by changing Config.id_dtype."
1886
+ ) from e
1887
+
1888
+ self._data = df
1889
+
1890
+ @property
1891
+ def dual(self) -> pl.DataFrame | float:
1892
+ """Returns the constraint's dual values.
1893
+
1894
+ Examples:
1895
+ >>> m = pf.Model()
1896
+ >>> m.x = pf.Variable()
1897
+ >>> m.y = pf.Variable()
1898
+ >>> m.maximize = m.x - m.y
1899
+
1900
+ Notice that for every unit increase in the right-hand side, the objective only improves by 0.5.
1901
+ >>> m.constraint_x = 2 * m.x <= 10
1902
+ >>> m.constraint_y = 2 * m.y >= 5
1903
+ >>> m.optimize()
1904
+
1905
+ For every unit increase in the right-hand side of `constraint_x`, the objective improves by 0.5.
1906
+ >>> m.constraint_x.dual
1907
+ 0.5
1908
+
1909
+ For every unit increase in the right-hand side of `constraint_y`, the objective worsens by 0.5.
1910
+ >>> m.constraint_y.dual
1911
+ -0.5
1912
+ """
1913
+ dual = self.attr.Dual
1914
+ if isinstance(dual, pl.DataFrame):
1915
+ dual = dual.rename({"Dual": DUAL_KEY})
1916
+
1917
+ # Weirdly, IPOPT returns dual values with the opposite sign, so we correct this bug.
1918
+ # It also does this for maximization problems
1919
+ # but since we flip the objective (because Ipopt doesn't support maximization), the double negatives cancel out.
1920
+ assert self._model is not None
1921
+ if self._model.solver.name == "ipopt" and self._model.sense == ObjSense.MIN:
1922
+ if isinstance(dual, pl.DataFrame):
1923
+ dual = dual.with_columns(-pl.col(DUAL_KEY))
1924
+ else:
1925
+ dual = -dual
1926
+ return dual
1927
+
1928
+ @classmethod
1929
+ def _get_id_column_name(cls):
1930
+ return CONSTRAINT_KEY
1931
+
1932
+ def filter(self, *args, **kwargs) -> pl.DataFrame:
1933
+ """Syntactic sugar on `Constraint.lhs.data.filter()`, to help debugging."""
1934
+ return self.lhs.data.filter(*args, **kwargs)
1935
+
1936
+ def relax(self, cost: Operable, max: Operable | None = None) -> Constraint:
1937
+ """Allows the constraint to be violated at a `cost` and, optionally, up to a maximum.
1938
+
1939
+ Warning:
1940
+ `.relax()` must be called before the constraint is assigned to the [Model][pyoframe.Model] (see examples below).
1941
+
1942
+ Parameters:
1943
+ cost:
1944
+ The cost of violating the constraint. Costs should be positive because Pyoframe will automatically
1945
+ make them negative for maximization problems.
1946
+ max:
1947
+ The maximum value of the relaxation variable.
1948
+
1949
+ Returns:
1950
+ The same constraint
1951
+
1952
+ Examples:
1953
+ >>> m = pf.Model()
1954
+ >>> m.hours_sleep = pf.Variable(lb=0)
1955
+ >>> m.hours_day = pf.Variable(lb=0)
1956
+ >>> m.hours_in_day = m.hours_sleep + m.hours_day == 24
1957
+ >>> m.maximize = m.hours_day
1958
+ >>> m.must_sleep = (m.hours_sleep >= 8).relax(cost=2, max=3)
1959
+ >>> m.optimize()
1960
+ >>> m.hours_day.solution
1961
+ 16.0
1962
+ >>> m.maximize += 2 * m.hours_day
1963
+ >>> m.optimize()
1964
+ >>> m.hours_day.solution
1965
+ 19.0
1966
+
1967
+ `relax` can only be called after the sense of the model has been defined.
1968
+
1969
+ >>> m = pf.Model()
1970
+ >>> m.hours_sleep = pf.Variable(lb=0)
1971
+ >>> m.hours_day = pf.Variable(lb=0)
1972
+ >>> m.hours_in_day = m.hours_sleep + m.hours_day == 24
1973
+ >>> m.must_sleep = (m.hours_sleep >= 8).relax(cost=2, max=3)
1974
+ Traceback (most recent call last):
1975
+ ...
1976
+ ValueError: Cannot relax a constraint before the objective sense has been set. Try setting the objective first or using Model(sense=...).
1977
+
1978
+ One way to solve this is by setting the sense directly on the model. See how this works fine:
1979
+
1980
+ >>> m = pf.Model(sense="max")
1981
+ >>> m.hours_sleep = pf.Variable(lb=0)
1982
+ >>> m.hours_day = pf.Variable(lb=0)
1983
+ >>> m.hours_in_day = m.hours_sleep + m.hours_day == 24
1984
+ >>> m.must_sleep = (m.hours_sleep >= 8).relax(cost=2, max=3)
1985
+
1986
+ And now an example with dimensions:
1987
+
1988
+ >>> homework_due_tomorrow = pl.DataFrame(
1989
+ ... {
1990
+ ... "project": ["A", "B", "C"],
1991
+ ... "cost_per_hour_underdelivered": [10, 20, 30],
1992
+ ... "hours_to_finish": [9, 9, 9],
1993
+ ... "max_underdelivered": [1, 9, 9],
1994
+ ... }
1995
+ ... )
1996
+ >>> m.hours_spent = pf.Variable(homework_due_tomorrow["project"], lb=0)
1997
+ >>> m.must_finish_project = (
1998
+ ... m.hours_spent
1999
+ ... >= homework_due_tomorrow[["project", "hours_to_finish"]]
2000
+ ... ).relax(
2001
+ ... homework_due_tomorrow[["project", "cost_per_hour_underdelivered"]],
2002
+ ... max=homework_due_tomorrow[["project", "max_underdelivered"]],
2003
+ ... )
2004
+ >>> m.only_one_day = m.hours_spent.sum("project") <= 24
2005
+ >>> # Relaxing a constraint after it has already been assigned will give an error
2006
+ >>> m.only_one_day.relax(1)
2007
+ Traceback (most recent call last):
2008
+ ...
2009
+ ValueError: .relax() must be called before the Constraint is added to the model
2010
+ >>> m.attr.Silent = True
2011
+ >>> m.optimize()
2012
+ >>> m.maximize.value
2013
+ -50.0
2014
+ >>> m.hours_spent.solution
2015
+ shape: (3, 2)
2016
+ ┌─────────┬──────────┐
2017
+ │ project ┆ solution │
2018
+ │ --- ┆ --- │
2019
+ │ str ┆ f64 │
2020
+ ╞═════════╪══════════╡
2021
+ │ A ┆ 8.0 │
2022
+ │ B ┆ 7.0 │
2023
+ │ C ┆ 9.0 │
2024
+ └─────────┴──────────┘
2025
+ """
2026
+ if self._has_ids:
2027
+ raise ValueError(
2028
+ ".relax() must be called before the Constraint is added to the model"
2029
+ )
2030
+
2031
+ m = self._model
2032
+ if m is None:
2033
+ self._to_relax = FuncArgs(args=[cost, max])
2034
+ return self
2035
+
2036
+ var_name = f"{self.name}_relaxation"
2037
+ assert not hasattr(m, var_name), (
2038
+ "Conflicting names, relaxation variable already exists on the model."
2039
+ )
2040
+ var = Variable(self, lb=0, ub=max)
2041
+ setattr(m, var_name, var)
2042
+
2043
+ if self.sense == ConstraintSense.LE:
2044
+ self.lhs -= var
2045
+ elif self.sense == ConstraintSense.GE:
2046
+ self.lhs += var
2047
+ else: # pragma: no cover
2048
+ # TODO
2049
+ raise NotImplementedError(
2050
+ "Relaxation for equalities has not yet been implemented. Submit a pull request!"
2051
+ )
2052
+
2053
+ penalty = var * cost
2054
+ if self.dimensions:
2055
+ penalty = penalty.sum()
2056
+ if m.sense is None:
2057
+ raise ValueError(
2058
+ "Cannot relax a constraint before the objective sense has been set. Try setting the objective first or using Model(sense=...)."
2059
+ )
2060
+ elif m.sense == ObjSense.MAX:
2061
+ penalty *= -1
2062
+ if m.has_objective:
2063
+ m.objective += penalty
2064
+ else:
2065
+ m.objective = penalty
2066
+
2067
+ return self
2068
+
2069
+ def estimated_size(self, *args, **kwargs):
2070
+ """Returns the estimated size of the constraint.
2071
+
2072
+ Includes the size of the underlying expression (`Constraint.lhs`).
2073
+
2074
+ See [`Expression.estimated_size`][pyoframe.Expression.estimated_size] for details on signature and behavior.
2075
+
2076
+ Examples:
2077
+ An dimensionless constraint has contains a 32 bit constraint id and, for each term, a 64 bit coefficient with a 32 bit variable id.
2078
+ For a two-term expression that is: (32 + 2 * (64 + 32)) = 224 bits = 28 bytes.
2079
+
2080
+ >>> m = pf.Model()
2081
+ >>> m.x = pf.Variable()
2082
+ >>> m.con = m.x <= 4
2083
+ >>> m.con.estimated_size()
2084
+ 28
2085
+ """
2086
+ return super().estimated_size(*args, **kwargs) + self.lhs.estimated_size(
2087
+ *args, **kwargs
2088
+ )
2089
+
2090
+ @overload
2091
+ def to_str(self, return_df: Literal[False] = False) -> str: ...
2092
+
2093
+ @overload
2094
+ def to_str(self, return_df: Literal[True] = True) -> pl.DataFrame: ...
2095
+
2096
+ def to_str(self, return_df: bool = False) -> str | pl.DataFrame:
2097
+ """Converts the constraint to a human-readable string, or several arranged in a table.
2098
+
2099
+ Long expressions are truncated according to [`Config.print_max_terms`][pyoframe._Config.print_max_terms] and [`Config.print_polars_config`][pyoframe._Config.print_polars_config].
2100
+
2101
+ Parameters:
2102
+ return_df:
2103
+ If `True`, returns a DataFrame containing strings instead of the string representation of the DataFrame.
2104
+
2105
+ Examples:
2106
+ >>> import polars as pl
2107
+ >>> m = pf.Model()
2108
+ >>> x = pf.Set(x=range(1000))
2109
+ >>> y = pf.Set(y=range(1000))
2110
+ >>> m.V = pf.Variable(x, y)
2111
+ >>> expr = 2 * m.V * m.V
2112
+ >>> print((expr <= 3).to_str())
2113
+ ┌────────┬────────┬────────────────────────────────┐
2114
+ │ x ┆ y ┆ constraint │
2115
+ │ (1000) ┆ (1000) ┆ │
2116
+ ╞════════╪════════╪════════════════════════════════╡
2117
+ │ 0 ┆ 0 ┆ 2 V[0,0] * V[0,0] <= 3 │
2118
+ │ 0 ┆ 1 ┆ 2 V[0,1] * V[0,1] <= 3 │
2119
+ │ 0 ┆ 2 ┆ 2 V[0,2] * V[0,2] <= 3 │
2120
+ │ 0 ┆ 3 ┆ 2 V[0,3] * V[0,3] <= 3 │
2121
+ │ 0 ┆ 4 ┆ 2 V[0,4] * V[0,4] <= 3 │
2122
+ │ … ┆ … ┆ … │
2123
+ │ 999 ┆ 995 ┆ 2 V[999,995] * V[999,995] <= 3 │
2124
+ │ 999 ┆ 996 ┆ 2 V[999,996] * V[999,996] <= 3 │
2125
+ │ 999 ┆ 997 ┆ 2 V[999,997] * V[999,997] <= 3 │
2126
+ │ 999 ┆ 998 ┆ 2 V[999,998] * V[999,998] <= 3 │
2127
+ │ 999 ┆ 999 ┆ 2 V[999,999] * V[999,999] <= 3 │
2128
+ └────────┴────────┴────────────────────────────────┘
2129
+ >>> expr = expr.sum("x")
2130
+ >>> print((expr >= 3).to_str())
2131
+ ┌────────┬─────────────────────────────────────────────────────────────────────────────────────────┐
2132
+ │ y ┆ constraint │
2133
+ │ (1000) ┆ │
2134
+ ╞════════╪═════════════════════════════════════════════════════════════════════════════════════════╡
2135
+ │ 0 ┆ 2 V[0,0] * V[0,0] +2 V[1,0] * V[1,0] +2 V[2,0] * V[2,0] +2 V[3,0] * V[3,0] +2 V[4,0] * │
2136
+ │ ┆ V[4,0] … >= 3 │
2137
+ │ 1 ┆ 2 V[0,1] * V[0,1] +2 V[1,1] * V[1,1] +2 V[2,1] * V[2,1] +2 V[3,1] * V[3,1] +2 V[4,1] * │
2138
+ │ ┆ V[4,1] … >= 3 │
2139
+ │ 2 ┆ 2 V[0,2] * V[0,2] +2 V[1,2] * V[1,2] +2 V[2,2] * V[2,2] +2 V[3,2] * V[3,2] +2 V[4,2] * │
2140
+ │ ┆ V[4,2] … >= 3 │
2141
+ │ 3 ┆ 2 V[0,3] * V[0,3] +2 V[1,3] * V[1,3] +2 V[2,3] * V[2,3] +2 V[3,3] * V[3,3] +2 V[4,3] * │
2142
+ │ ┆ V[4,3] … >= 3 │
2143
+ │ 4 ┆ 2 V[0,4] * V[0,4] +2 V[1,4] * V[1,4] +2 V[2,4] * V[2,4] +2 V[3,4] * V[3,4] +2 V[4,4] * │
2144
+ │ ┆ V[4,4] … >= 3 │
2145
+ │ … ┆ … │
2146
+ │ 995 ┆ 2 V[0,995] * V[0,995] +2 V[1,995] * V[1,995] +2 V[2,995] * V[2,995] +2 V[3,995] * │
2147
+ │ ┆ V[3,995] +2 V[4,99… │
2148
+ │ 996 ┆ 2 V[0,996] * V[0,996] +2 V[1,996] * V[1,996] +2 V[2,996] * V[2,996] +2 V[3,996] * │
2149
+ │ ┆ V[3,996] +2 V[4,99… │
2150
+ │ 997 ┆ 2 V[0,997] * V[0,997] +2 V[1,997] * V[1,997] +2 V[2,997] * V[2,997] +2 V[3,997] * │
2151
+ │ ┆ V[3,997] +2 V[4,99… │
2152
+ │ 998 ┆ 2 V[0,998] * V[0,998] +2 V[1,998] * V[1,998] +2 V[2,998] * V[2,998] +2 V[3,998] * │
2153
+ │ ┆ V[3,998] +2 V[4,99… │
2154
+ │ 999 ┆ 2 V[0,999] * V[0,999] +2 V[1,999] * V[1,999] +2 V[2,999] * V[2,999] +2 V[3,999] * │
2155
+ │ ┆ V[3,999] +2 V[4,99… │
2156
+ └────────┴─────────────────────────────────────────────────────────────────────────────────────────┘
2157
+ >>> expr = expr.sum("y")
2158
+ >>> print((expr == 3).to_str())
2159
+ 2 V[0,0] * V[0,0] +2 V[0,1] * V[0,1] +2 V[0,2] * V[0,2] +2 V[0,3] * V[0,3] +2 V[0,4] * V[0,4] … = 3
2160
+ """
2161
+ dims = self.dimensions
2162
+ str_table = self.lhs.to_str(
2163
+ include_const_term=False, return_df=True, str_col_name="constraint"
2164
+ )
2165
+ rhs = self.lhs.constant_terms.with_columns(pl.col(COEF_KEY) * -1)
2166
+ rhs = cast_coef_to_string(rhs, drop_ones=False, always_show_sign=False)
2167
+ rhs = rhs.rename({COEF_KEY: "rhs"})
2168
+ if dims:
2169
+ constr_str = str_table.join(
2170
+ rhs, on=dims, how="left", maintain_order="left", coalesce=True
2171
+ )
2172
+ else:
2173
+ constr_str = pl.concat([str_table, rhs], how="horizontal")
2174
+ constr_str = constr_str.with_columns(
2175
+ pl.concat_str("constraint", pl.lit(f" {self.sense.value} "), "rhs")
2176
+ ).drop("rhs")
2177
+
2178
+ if not return_df:
2179
+ if self.dimensions is None:
2180
+ constr_str = constr_str.item()
2181
+ else:
2182
+ constr_str = self._add_shape_to_columns(constr_str)
2183
+ with Config.print_polars_config:
2184
+ constr_str = repr(constr_str)
2185
+
2186
+ return constr_str
2187
+
2188
+ def __repr__(self) -> str:
2189
+ return (
2190
+ get_obj_repr(
2191
+ self,
2192
+ self.name,
2193
+ height=len(self) if self.dimensions else None,
2194
+ terms=len(self.lhs.data),
2195
+ type=self.lhs.degree(return_str=True),
2196
+ )
2197
+ + "\n"
2198
+ + self.to_str()
2199
+ )
2200
+
2201
+
2202
+ class Variable(BaseOperableBlock):
2203
+ """A decision variable for an optimization model.
2204
+
2205
+ Parameters:
2206
+ *indexing_sets:
2207
+ If no indexing_sets are provided, a single variable with no dimensions is created.
2208
+ Otherwise, a variable is created for each element in the Cartesian product of the indexing_sets (see Set for details on behaviour).
2209
+ lb:
2210
+ The lower bound for all variables.
2211
+ ub:
2212
+ The upper bound for all variables.
2213
+ vtype:
2214
+ The type of the variable. Can be either a VType enum or a string. Default is VType.CONTINUOUS.
2215
+ equals:
2216
+ When specified, a variable is created and a constraint is added to make the variable equal to the provided expression.
2217
+
2218
+ Examples:
2219
+ >>> import pandas as pd
2220
+ >>> m = pf.Model()
2221
+ >>> df = pd.DataFrame(
2222
+ ... {"dim1": [1, 1, 2, 2, 3, 3], "dim2": ["a", "b", "a", "b", "a", "b"]}
2223
+ ... )
2224
+ >>> Variable(df)
2225
+ <Variable 'unnamed' height=6>
2226
+ ┌──────┬──────┐
2227
+ │ dim1 ┆ dim2 │
2228
+ │ (3) ┆ (2) │
2229
+ ╞══════╪══════╡
2230
+ │ 1 ┆ a │
2231
+ │ 1 ┆ b │
2232
+ │ 2 ┆ a │
2233
+ │ 2 ┆ b │
2234
+ │ 3 ┆ a │
2235
+ │ 3 ┆ b │
2236
+ └──────┴──────┘
2237
+
2238
+ Variables cannot be used until they're added to the model.
2239
+
2240
+ >>> m.constraint = Variable(df) <= 3
2241
+ Traceback (most recent call last):
2242
+ ...
2243
+ ValueError: Cannot use 'Variable' before it has been added to a model.
2244
+
2245
+ Instead, assign the variable to the model first:
2246
+ >>> m.v = Variable(df)
2247
+ >>> m.constraint = m.v <= 3
2248
+
2249
+ >>> m.v
2250
+ <Variable 'v' height=6>
2251
+ ┌──────┬──────┬──────────┐
2252
+ │ dim1 ┆ dim2 ┆ variable │
2253
+ │ (3) ┆ (2) ┆ │
2254
+ ╞══════╪══════╪══════════╡
2255
+ │ 1 ┆ a ┆ v[1,a] │
2256
+ │ 1 ┆ b ┆ v[1,b] │
2257
+ │ 2 ┆ a ┆ v[2,a] │
2258
+ │ 2 ┆ b ┆ v[2,b] │
2259
+ │ 3 ┆ a ┆ v[3,a] │
2260
+ │ 3 ┆ b ┆ v[3,b] │
2261
+ └──────┴──────┴──────────┘
2262
+
2263
+ >>> m.v2 = Variable(df[["dim1"]])
2264
+ Traceback (most recent call last):
2265
+ ...
2266
+ ValueError: Duplicate rows found in input data.
2267
+ >>> m.v3 = Variable(df[["dim1"]].drop_duplicates())
2268
+ >>> m.v3
2269
+ <Variable 'v3' height=3>
2270
+ ┌──────┬──────────┐
2271
+ │ dim1 ┆ variable │
2272
+ │ (3) ┆ │
2273
+ ╞══════╪══════════╡
2274
+ │ 1 ┆ v3[1] │
2275
+ │ 2 ┆ v3[2] │
2276
+ │ 3 ┆ v3[3] │
2277
+ └──────┴──────────┘
2278
+ """
2279
+
2280
+ # TODO: Breaking change, remove support for Iterable[AcceptableSets]
2281
+ def __init__(
2282
+ self,
2283
+ *indexing_sets: SetTypes | Iterable[SetTypes],
2284
+ lb: Operable | None = None,
2285
+ ub: Operable | None = None,
2286
+ vtype: VType | VTypeValue = VType.CONTINUOUS,
2287
+ equals: Operable | None = None,
2288
+ ):
2289
+ if equals is not None:
2290
+ if isinstance(equals, (float, int)):
2291
+ if lb is not None:
2292
+ raise ValueError("Cannot specify 'lb' when 'equals' is a constant.")
2293
+ if ub is not None:
2294
+ raise ValueError("Cannot specify 'ub' when 'equals' is a constant.")
2295
+ lb = ub = equals
2296
+ equals = None
2297
+ else:
2298
+ assert len(indexing_sets) == 0, (
2299
+ "Cannot specify both 'equals' and 'indexing_sets'"
2300
+ )
2301
+ equals = equals.to_expr()
2302
+ indexing_sets = (equals,)
2303
+
2304
+ data = Set(*indexing_sets).data if len(indexing_sets) > 0 else pl.DataFrame()
2305
+ super().__init__(data)
2306
+
2307
+ self.vtype: VType = VType(vtype)
2308
+ self._attr = Container(self._set_attribute, self._get_attribute)
2309
+ self._equals: Expression | None = equals
2310
+
2311
+ if lb is not None and not isinstance(lb, (float, int)):
2312
+ self._lb_expr, self.lb = lb, None
2313
+ else:
2314
+ self._lb_expr, self.lb = None, lb
2315
+ if ub is not None and not isinstance(ub, (float, int)):
2316
+ self._ub_expr, self.ub = ub, None
2317
+ else:
2318
+ self._ub_expr, self.ub = None, ub
2319
+
2320
+ @property
2321
+ def attr(self) -> Container:
2322
+ """Allows reading and writing variable attributes similarly to [Model.attr][pyoframe.Model.attr]."""
2323
+ return self._attr
2324
+
2325
+ def _set_attribute(self, name, value):
2326
+ self._assert_has_ids()
2327
+ col_name = name
2328
+ try:
2329
+ name = poi.VariableAttribute[name]
2330
+ setter = self._model.poi.set_variable_attribute
2331
+ except KeyError:
2332
+ setter = self._model.poi.set_variable_raw_attribute
2333
+
2334
+ if self.dimensions is None:
2335
+ for key in self.data.get_column(VAR_KEY):
2336
+ setter(poi.VariableIndex(key), name, value)
2337
+ else:
2338
+ for key, v in (
2339
+ self.data.join(
2340
+ value,
2341
+ on=self.dimensions,
2342
+ maintain_order="left" if Config.maintain_order else None,
2343
+ )
2344
+ .select(pl.col(VAR_KEY), pl.col(col_name))
2345
+ .iter_rows()
2346
+ ):
2347
+ setter(poi.VariableIndex(key), name, v)
2348
+
2349
+ @unwrap_single_values
2350
+ def _get_attribute(self, name):
2351
+ self._assert_has_ids()
2352
+ col_name = name
2353
+ try:
2354
+ name = poi.VariableAttribute[name]
2355
+ getter = self._model.poi.get_variable_attribute
2356
+ except KeyError:
2357
+ getter = self._model.poi.get_variable_raw_attribute
2358
+
2359
+ ids = self.data.get_column(VAR_KEY).to_list()
2360
+ attr = [getter(poi.VariableIndex(v_id), name) for v_id in ids]
2361
+ data = self.data.with_columns(pl.Series(attr).alias(col_name))
2362
+ return data.select(self._dimensions_unsafe + [col_name])
2363
+
2364
+ def _assign_ids(self):
2365
+ assert self._model is not None
2366
+ assert self.name is not None
2367
+
2368
+ solver = self._model.solver
2369
+ if solver.supports_integer_variables:
2370
+ domain = self.vtype._to_poi()
2371
+ else:
2372
+ if self.vtype != VType.CONTINUOUS:
2373
+ raise ValueError(
2374
+ f"Solver {solver.name} does not support integer or binary variables."
2375
+ )
2376
+
2377
+ lb = -1e100 if self.lb is None else float(self.lb)
2378
+ ub = 1e100 if self.ub is None else float(self.ub)
2379
+
2380
+ poi_add_var = self._model.poi.add_variable
2381
+
2382
+ dims = self.dimensions
2383
+
2384
+ dynamic_names = dims is not None and self._model.solver_uses_variable_names
2385
+ if dynamic_names:
2386
+ names = concat_dimensions(self.data, prefix=self.name)[
2387
+ "concated_dim"
2388
+ ].to_list()
2389
+ if solver.supports_integer_variables:
2390
+ ids = [poi_add_var(domain, lb, ub, name).index for name in names]
2391
+ else:
2392
+ ids = [poi_add_var(lb, ub, name=name).index for name in names]
2393
+ else:
2394
+ if self._model.solver_uses_variable_names:
2395
+ name = self.name
2396
+ elif solver.accelerate_with_repeat_names:
2397
+ name = "V"
2398
+ else:
2399
+ name = ""
2400
+
2401
+ n = 1 if dims is None else len(self.data)
2402
+
2403
+ if solver.supports_integer_variables:
2404
+ ids = [poi_add_var(domain, lb, ub, name).index for _ in range(n)]
2405
+ else:
2406
+ ids = [poi_add_var(lb, ub, name=name).index for _ in range(n)]
2407
+
2408
+ try:
2409
+ df = self.data.with_columns(
2410
+ pl.Series(ids, dtype=Config.id_dtype).alias(VAR_KEY)
2411
+ )
2412
+ except TypeError as e:
2413
+ raise TypeError(
2414
+ f"Number of variables exceeds the current data type ({Config.id_dtype}). Consider increasing the data type by changing Config.id_dtype."
2415
+ ) from e
2416
+
2417
+ self._data = df
2418
+
2419
+ def _on_add_to_model(self, model, name):
2420
+ super()._on_add_to_model(model, name)
2421
+ self._assign_ids()
2422
+ if self._lb_expr is not None:
2423
+ setattr(model, f"{name}_lb", self._lb_expr <= self)
2424
+
2425
+ if self._ub_expr is not None:
2426
+ setattr(model, f"{name}_ub", self <= self._ub_expr)
2427
+
2428
+ if self._equals is not None:
2429
+ setattr(model, f"{name}_equals", self == self._equals)
2430
+
2431
+ @classmethod
2432
+ def _get_id_column_name(cls):
2433
+ return VAR_KEY
2434
+
2435
+ @property
2436
+ @unwrap_single_values
2437
+ def solution(self):
2438
+ """Retrieves a variable's optimal value after the model has been solved.
2439
+
2440
+ Return type is a DataFrame if the variable has dimensions, otherwise it is a single value.
2441
+ Binary and integer variables are returned as integers.
2442
+
2443
+ Examples:
2444
+ >>> m = pf.Model()
2445
+ >>> m.var_continuous = pf.Variable({"dim1": [1, 2, 3]}, lb=5, ub=5)
2446
+ >>> m.var_integer = pf.Variable(
2447
+ ... {"dim1": [1, 2, 3]}, lb=4.5, ub=5.5, vtype=pf.VType.INTEGER
2448
+ ... )
2449
+ >>> m.var_dimensionless = pf.Variable(
2450
+ ... lb=4.5, ub=5.5, vtype=pf.VType.INTEGER
2451
+ ... )
2452
+ >>> m.var_continuous.solution
2453
+ Traceback (most recent call last):
2454
+ ...
2455
+ RuntimeError: Failed to retrieve solution for variable. Are you sure the model has been solved?
2456
+ >>> m.optimize()
2457
+ >>> m.var_continuous.solution
2458
+ shape: (3, 2)
2459
+ ┌──────┬──────────┐
2460
+ │ dim1 ┆ solution │
2461
+ │ --- ┆ --- │
2462
+ │ i64 ┆ f64 │
2463
+ ╞══════╪══════════╡
2464
+ │ 1 ┆ 5.0 │
2465
+ │ 2 ┆ 5.0 │
2466
+ │ 3 ┆ 5.0 │
2467
+ └──────┴──────────┘
2468
+ >>> m.var_integer.solution
2469
+ shape: (3, 2)
2470
+ ┌──────┬──────────┐
2471
+ │ dim1 ┆ solution │
2472
+ │ --- ┆ --- │
2473
+ │ i64 ┆ i64 │
2474
+ ╞══════╪══════════╡
2475
+ │ 1 ┆ 5 │
2476
+ │ 2 ┆ 5 │
2477
+ │ 3 ┆ 5 │
2478
+ └──────┴──────────┘
2479
+ >>> m.var_dimensionless.solution
2480
+ 5
2481
+ """
2482
+ try:
2483
+ solution = self.attr.Value
2484
+ except RuntimeError as e:
2485
+ raise RuntimeError(
2486
+ "Failed to retrieve solution for variable. Are you sure the model has been solved?"
2487
+ ) from e
2488
+ if isinstance(solution, pl.DataFrame):
2489
+ solution = solution.rename({"Value": SOLUTION_KEY})
2490
+
2491
+ if self.vtype in [VType.BINARY, VType.INTEGER]:
2492
+ if isinstance(solution, pl.DataFrame):
2493
+ # TODO handle values that are out of bounds of Int64 (i.e. when problem is unbounded)
2494
+ solution = solution.with_columns(
2495
+ pl.col("solution").alias("solution_float"),
2496
+ pl.col("solution").round().cast(pl.Int64),
2497
+ )
2498
+ if Config.integer_tolerance != 0:
2499
+ df = solution.filter(
2500
+ (pl.col("solution_float") - pl.col("solution")).abs()
2501
+ > Config.integer_tolerance
2502
+ )
2503
+ assert df.is_empty(), (
2504
+ f"Variable {self.name} has a non-integer value: {df}\nThis should not happen."
2505
+ )
2506
+ solution = solution.drop("solution_float")
2507
+ else:
2508
+ solution_float = solution
2509
+ solution = int(round(solution))
2510
+ if Config.integer_tolerance != 0:
2511
+ assert abs(solution - solution_float) < Config.integer_tolerance, (
2512
+ f"Value of variable {self.name} is not an integer: {solution}. This should not happen."
2513
+ )
2514
+
2515
+ return solution
2516
+
2517
+ def __repr__(self):
2518
+ result = (
2519
+ get_obj_repr(
2520
+ self,
2521
+ self.name,
2522
+ lb=self.lb,
2523
+ ub=self.ub,
2524
+ height=self.data.height if self.dimensions else None,
2525
+ )
2526
+ + "\n"
2527
+ )
2528
+ if self._has_ids:
2529
+ result += self.to_expr().to_str(str_col_name="variable")
2530
+ else:
2531
+ with Config.print_polars_config:
2532
+ data = self._add_shape_to_columns(self.data)
2533
+ # we don't try to include the allowed_new_dims because there are none for Variables (only exist on Expression or Sets)
2534
+ result += repr(data)
2535
+
2536
+ return result
2537
+
2538
+ def to_expr(self) -> Expression:
2539
+ """Converts the Variable to an Expression."""
2540
+ self._assert_has_ids()
2541
+ return self._new(self.data.drop(SOLUTION_KEY, strict=False), self.name) # pyright: ignore[reportArgumentType], we know it's safe after _assert_has_ids()
2542
+
2543
+ def _new(self, data: pl.DataFrame, name: str) -> Expression:
2544
+ self._assert_has_ids()
2545
+ e = Expression(data.with_columns(pl.lit(1.0).alias(COEF_KEY)), name)
2546
+ e._model = self._model
2547
+ return e
2548
+
2549
+ @return_new
2550
+ def next(self, dim: str, wrap_around: bool = False):
2551
+ """Creates an expression where the variable at each label is the next variable in the specified dimension.
2552
+
2553
+ Parameters:
2554
+ dim:
2555
+ The dimension over which to shift the variable.
2556
+ wrap_around:
2557
+ If `True`, the last label in the dimension is connected to the first label.
2558
+
2559
+ Examples:
2560
+ >>> import pandas as pd
2561
+ >>> time_dim = pd.DataFrame({"time": ["00:00", "06:00", "12:00", "18:00"]})
2562
+ >>> space_dim = pd.DataFrame({"city": ["Toronto", "Berlin"]})
2563
+ >>> m = pf.Model()
2564
+ >>> m.bat_charge = pf.Variable(time_dim, space_dim)
2565
+ >>> m.bat_flow = pf.Variable(time_dim, space_dim)
2566
+ >>> # Fails because the dimensions are not the same
2567
+ >>> m.bat_charge + m.bat_flow == m.bat_charge.next("time")
2568
+ Traceback (most recent call last):
2569
+ ...
2570
+ pyoframe._constants.PyoframeError: Cannot subtract the two expressions below because expression 1 has extra labels.
2571
+ Expression 1: (bat_charge + bat_flow)
2572
+ Expression 2: bat_charge.next(…)
2573
+ Extra labels in expression 1:
2574
+ ┌───────┬─────────┐
2575
+ │ time ┆ city │
2576
+ ╞═══════╪═════════╡
2577
+ │ 18:00 ┆ Toronto │
2578
+ │ 18:00 ┆ Berlin │
2579
+ └───────┴─────────┘
2580
+ Use .drop_extras() or .keep_extras() to indicate how the extra labels should be handled. Learn more at
2581
+ https://bravos-power.github.io/pyoframe/latest/learn/concepts/addition
2582
+
2583
+ >>> (m.bat_charge + m.bat_flow).drop_extras() == m.bat_charge.next("time")
2584
+ <Constraint 'unnamed' height=6 terms=18 type=linear>
2585
+ ┌───────┬─────────┬────────────────────────────────────────────────────────────────────────────────┐
2586
+ │ time ┆ city ┆ constraint │
2587
+ │ (3) ┆ (2) ┆ │
2588
+ ╞═══════╪═════════╪════════════════════════════════════════════════════════════════════════════════╡
2589
+ │ 00:00 ┆ Toronto ┆ bat_charge[00:00,Toronto] + bat_flow[00:00,Toronto] │
2590
+ │ ┆ ┆ - bat_charge[06:00,Toronto] = 0 │
2591
+ │ 00:00 ┆ Berlin ┆ bat_charge[00:00,Berlin] + bat_flow[00:00,Berlin] - bat_charge[06:00,Berlin] │
2592
+ │ ┆ ┆ = 0 │
2593
+ │ 06:00 ┆ Toronto ┆ bat_charge[06:00,Toronto] + bat_flow[06:00,Toronto] │
2594
+ │ ┆ ┆ - bat_charge[12:00,Toronto] = 0 │
2595
+ │ 06:00 ┆ Berlin ┆ bat_charge[06:00,Berlin] + bat_flow[06:00,Berlin] - bat_charge[12:00,Berlin] │
2596
+ │ ┆ ┆ = 0 │
2597
+ │ 12:00 ┆ Toronto ┆ bat_charge[12:00,Toronto] + bat_flow[12:00,Toronto] │
2598
+ │ ┆ ┆ - bat_charge[18:00,Toronto] = 0 │
2599
+ │ 12:00 ┆ Berlin ┆ bat_charge[12:00,Berlin] + bat_flow[12:00,Berlin] - bat_charge[18:00,Berlin] │
2600
+ │ ┆ ┆ = 0 │
2601
+ └───────┴─────────┴────────────────────────────────────────────────────────────────────────────────┘
2602
+
2603
+ >>> (m.bat_charge + m.bat_flow) == m.bat_charge.next(
2604
+ ... "time", wrap_around=True
2605
+ ... )
2606
+ <Constraint 'unnamed' height=8 terms=24 type=linear>
2607
+ ┌───────┬─────────┬────────────────────────────────────────────────────────────────────────────────┐
2608
+ │ time ┆ city ┆ constraint │
2609
+ │ (4) ┆ (2) ┆ │
2610
+ ╞═══════╪═════════╪════════════════════════════════════════════════════════════════════════════════╡
2611
+ │ 00:00 ┆ Toronto ┆ bat_charge[00:00,Toronto] + bat_flow[00:00,Toronto] │
2612
+ │ ┆ ┆ - bat_charge[06:00,Toronto] = 0 │
2613
+ │ 00:00 ┆ Berlin ┆ bat_charge[00:00,Berlin] + bat_flow[00:00,Berlin] - bat_charge[06:00,Berlin] │
2614
+ │ ┆ ┆ = 0 │
2615
+ │ 06:00 ┆ Toronto ┆ bat_charge[06:00,Toronto] + bat_flow[06:00,Toronto] │
2616
+ │ ┆ ┆ - bat_charge[12:00,Toronto] = 0 │
2617
+ │ 06:00 ┆ Berlin ┆ bat_charge[06:00,Berlin] + bat_flow[06:00,Berlin] - bat_charge[12:00,Berlin] │
2618
+ │ ┆ ┆ = 0 │
2619
+ │ 12:00 ┆ Toronto ┆ bat_charge[12:00,Toronto] + bat_flow[12:00,Toronto] │
2620
+ │ ┆ ┆ - bat_charge[18:00,Toronto] = 0 │
2621
+ │ 12:00 ┆ Berlin ┆ bat_charge[12:00,Berlin] + bat_flow[12:00,Berlin] - bat_charge[18:00,Berlin] │
2622
+ │ ┆ ┆ = 0 │
2623
+ │ 18:00 ┆ Toronto ┆ bat_charge[18:00,Toronto] + bat_flow[18:00,Toronto] │
2624
+ │ ┆ ┆ - bat_charge[00:00,Toronto] = 0 │
2625
+ │ 18:00 ┆ Berlin ┆ bat_charge[18:00,Berlin] + bat_flow[18:00,Berlin] - bat_charge[00:00,Berlin] │
2626
+ │ ┆ ┆ = 0 │
2627
+ └───────┴─────────┴────────────────────────────────────────────────────────────────────────────────┘
2628
+
2629
+ """
2630
+ wrapped = (
2631
+ self.data.select(dim)
2632
+ .unique(maintain_order=Config.maintain_order)
2633
+ .sort(by=dim)
2634
+ )
2635
+ wrapped = wrapped.with_columns(pl.col(dim).shift(-1).alias("__next"))
2636
+ if wrap_around:
2637
+ wrapped = wrapped.with_columns(pl.col("__next").fill_null(pl.first(dim)))
2638
+ else:
2639
+ wrapped = wrapped.drop_nulls(dim)
2640
+
2641
+ expr = self.to_expr()
2642
+ data = expr.data.rename({dim: "__prev"})
2643
+
2644
+ data = data.join(
2645
+ wrapped,
2646
+ left_on="__prev",
2647
+ right_on="__next",
2648
+ # We use "right" instead of "left" to maintain consistency with the behavior without maintain_order
2649
+ maintain_order="right" if Config.maintain_order else None,
2650
+ ).drop(["__prev", "__next"], strict=False)
2651
+
2652
+ return data