pyoframe 0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,911 @@
1
+ from __future__ import annotations
2
+ from typing import (
3
+ Iterable,
4
+ List,
5
+ Mapping,
6
+ Protocol,
7
+ Sequence,
8
+ overload,
9
+ Union,
10
+ Optional,
11
+ )
12
+ from abc import ABC, abstractmethod
13
+
14
+ import pandas as pd
15
+ import polars as pl
16
+
17
+ from pyoframe._arithmetic import _add_expressions, _get_dimensions
18
+ from pyoframe.constants import (
19
+ COEF_KEY,
20
+ CONST_TERM,
21
+ CONSTRAINT_KEY,
22
+ DUAL_KEY,
23
+ RESERVED_COL_KEYS,
24
+ VAR_KEY,
25
+ Config,
26
+ ConstraintSense,
27
+ UnmatchedStrategy,
28
+ )
29
+ from pyoframe.util import (
30
+ IdCounterMixin,
31
+ cast_coef_to_string,
32
+ concat_dimensions,
33
+ get_obj_repr,
34
+ parse_inputs_as_iterable,
35
+ )
36
+
37
+ from pyoframe.model_element import ModelElement
38
+
39
+ VAR_TYPE = pl.UInt32
40
+
41
+
42
+ def _forward_to_expression(func_name: str):
43
+ def wrapper(self: "SupportsMath", *args, **kwargs) -> "Expression":
44
+ expr = self.to_expr()
45
+ return getattr(expr, func_name)(*args, **kwargs)
46
+
47
+ return wrapper
48
+
49
+
50
+ class SupportsToExpr(Protocol):
51
+ def to_expr(self) -> "Expression": ...
52
+
53
+
54
+ class SupportsMath(ABC, SupportsToExpr):
55
+ """Any object that can be converted into an expression."""
56
+
57
+ def __init__(self):
58
+ self.unmatched_strategy = UnmatchedStrategy.UNSET
59
+ self.allowed_new_dims: List[str] = []
60
+
61
+ def keep_unmatched(self):
62
+ self.unmatched_strategy = UnmatchedStrategy.KEEP
63
+ return self
64
+
65
+ def drop_unmatched(self):
66
+ self.unmatched_strategy = UnmatchedStrategy.DROP
67
+ return self
68
+
69
+ def add_dim(self, *dims: str):
70
+ self.allowed_new_dims.extend(dims)
71
+ return self
72
+
73
+ @abstractmethod
74
+ def to_expr(self) -> "Expression":
75
+ raise NotImplementedError
76
+
77
+ __add__ = _forward_to_expression("__add__")
78
+ __mul__ = _forward_to_expression("__mul__")
79
+ sum = _forward_to_expression("sum")
80
+ map = _forward_to_expression("map")
81
+
82
+ def __neg__(self):
83
+ res = self.to_expr() * -1
84
+ # Negating a constant term should keep the unmatched strategy
85
+ res.unmatched_strategy = self.unmatched_strategy
86
+ return res
87
+
88
+ def __sub__(self, other):
89
+ """
90
+ >>> import polars as pl
91
+ >>> from pyoframe import Variable
92
+ >>> df = pl.DataFrame({"dim1": [1,2,3], "value": [1,2,3]})
93
+ >>> var = Variable(df["dim1"])
94
+ >>> var - df
95
+ <Expression size=3 dimensions={'dim1': 3} terms=6>
96
+ [1]: x1 -1
97
+ [2]: x2 -2
98
+ [3]: x3 -3
99
+ """
100
+ if not isinstance(other, (int, float)):
101
+ other = other.to_expr()
102
+ return self.to_expr() + (-other)
103
+
104
+ def __rmul__(self, other):
105
+ return self.to_expr() * other
106
+
107
+ def __radd__(self, other):
108
+ return self.to_expr() + other
109
+
110
+ def __le__(self, other):
111
+ """Equality constraint.
112
+ Examples
113
+ >>> from pyoframe import Variable
114
+ >>> Variable() <= 1
115
+ <Constraint sense='<=' size=1 dimensions={} terms=2>
116
+ x1 <= 1
117
+ """
118
+ return Constraint(self - other, ConstraintSense.LE)
119
+
120
+ def __ge__(self, other):
121
+ """Equality constraint.
122
+ Examples
123
+ >>> from pyoframe import Variable
124
+ >>> Variable() >= 1
125
+ <Constraint sense='>=' size=1 dimensions={} terms=2>
126
+ x1 >= 1
127
+ """
128
+ return Constraint(self - other, ConstraintSense.GE)
129
+
130
+ def __eq__(self, value: object):
131
+ """Equality constraint.
132
+ Examples
133
+ >>> from pyoframe import Variable
134
+ >>> Variable() == 1
135
+ <Constraint sense='=' size=1 dimensions={} terms=2>
136
+ x1 = 1
137
+ """
138
+ return Constraint(self - value, ConstraintSense.EQ)
139
+
140
+
141
+ SetTypes = Union[
142
+ pl.DataFrame,
143
+ pd.Index,
144
+ pd.DataFrame,
145
+ SupportsMath,
146
+ Mapping[str, Sequence[object]],
147
+ "Set",
148
+ ]
149
+
150
+
151
+ class Set(ModelElement, SupportsMath):
152
+ def __init__(self, *data: SetTypes | Iterable[SetTypes], **named_data):
153
+ data_list = list(data)
154
+ for name, set in named_data.items():
155
+ data_list.append({name: set})
156
+ df = self._parse_acceptable_sets(*data_list)
157
+ if df.is_duplicated().any():
158
+ raise ValueError("Duplicate rows found in input data.")
159
+ super().__init__(df)
160
+
161
+ def _new(self, data: pl.DataFrame):
162
+ s = Set(data)
163
+ s._model = self._model
164
+ # Copy over the unmatched strategy on operations like .rename(), .with_columns(), etc.
165
+ s.unmatched_strategy = self.unmatched_strategy
166
+ return s
167
+
168
+ @staticmethod
169
+ def _parse_acceptable_sets(
170
+ *over: SetTypes | Iterable[SetTypes],
171
+ ) -> pl.DataFrame:
172
+ """
173
+ >>> import pandas as pd
174
+ >>> dim1 = pd.Index([1, 2, 3], name="dim1")
175
+ >>> dim2 = pd.Index(["a", "b"], name="dim1")
176
+ >>> Set._parse_acceptable_sets([dim1, dim2])
177
+ Traceback (most recent call last):
178
+ ...
179
+ AssertionError: All coordinates must have unique column names.
180
+ >>> dim2.name = "dim2"
181
+ >>> Set._parse_acceptable_sets([dim1, dim2])
182
+ shape: (6, 2)
183
+ ┌──────┬──────┐
184
+ │ dim1 ┆ dim2 │
185
+ │ --- ┆ --- │
186
+ │ i64 ┆ str │
187
+ ╞══════╪══════╡
188
+ │ 1 ┆ a │
189
+ │ 1 ┆ b │
190
+ │ 2 ┆ a │
191
+ │ 2 ┆ b │
192
+ │ 3 ┆ a │
193
+ │ 3 ┆ b │
194
+ └──────┴──────┘
195
+ """
196
+ assert len(over) > 0, "At least one set must be provided."
197
+ over_iter: Iterable[SetTypes] = parse_inputs_as_iterable(*over)
198
+
199
+ over_frames: List[pl.DataFrame] = [Set._set_to_polars(set) for set in over_iter]
200
+
201
+ over_merged = over_frames[0]
202
+
203
+ for df in over_frames[1:]:
204
+ assert (
205
+ set(over_merged.columns) & set(df.columns) == set()
206
+ ), "All coordinates must have unique column names."
207
+ over_merged = over_merged.join(df, how="cross")
208
+ return over_merged
209
+
210
+ def to_expr(self) -> Expression:
211
+ return Expression(
212
+ self.data.with_columns(
213
+ pl.lit(1).alias(COEF_KEY), pl.lit(CONST_TERM).alias(VAR_KEY)
214
+ )
215
+ )
216
+
217
+ def __mul__(self, other):
218
+ if isinstance(other, Set):
219
+ assert (
220
+ set(self.data.columns) & set(other.data.columns) == set()
221
+ ), "Cannot multiply two sets with columns in common."
222
+ return Set(self.data, other.data)
223
+ return super().__mul__(other)
224
+
225
+ def __add__(self, other):
226
+ if isinstance(other, Set):
227
+ raise ValueError("Cannot add two sets.")
228
+ return super().__add__(other)
229
+
230
+ def __repr__(self):
231
+ return (
232
+ get_obj_repr(self, ("name",), size=self.data.height, dimensions=self.shape)
233
+ + "\n"
234
+ + self.to_expr().to_str(max_line_len=80, max_rows=10)
235
+ )
236
+
237
+ @staticmethod
238
+ def _set_to_polars(set: "SetTypes") -> pl.DataFrame:
239
+ if isinstance(set, dict):
240
+ df = pl.DataFrame(set)
241
+ elif isinstance(set, SupportsMath):
242
+ df = set.to_expr().data.drop(RESERVED_COL_KEYS).unique(maintain_order=True)
243
+ elif isinstance(set, pd.Index):
244
+ df = pl.from_pandas(pd.DataFrame(index=set).reset_index())
245
+ elif isinstance(set, pd.DataFrame):
246
+ df = pl.from_pandas(set)
247
+ elif isinstance(set, pl.DataFrame):
248
+ df = set
249
+ elif isinstance(set, pl.Series):
250
+ df = set.to_frame()
251
+ elif isinstance(set, Set):
252
+ df = set.data
253
+ else:
254
+ raise ValueError(f"Cannot convert type {type(set)} to a polars DataFrame")
255
+
256
+ if "index" in df.columns:
257
+ raise ValueError(
258
+ "Please specify a custom dimension name rather than using 'index' to avoid confusion."
259
+ )
260
+
261
+ for reserved_key in RESERVED_COL_KEYS:
262
+ if reserved_key in df.columns:
263
+ raise ValueError(
264
+ f"Cannot use reserved column names {reserved_key} as dimensions."
265
+ )
266
+
267
+ return df
268
+
269
+
270
+ class Expression(ModelElement, SupportsMath):
271
+ """A linear expression."""
272
+
273
+ def __init__(self, data: pl.DataFrame):
274
+ """
275
+ >>> import pandas as pd
276
+ >>> from pyoframe import Variable, Model
277
+ >>> df = pd.DataFrame({"item" : [1, 1, 1, 2, 2], "time": ["mon", "tue", "wed", "mon", "tue"], "cost": [1, 2, 3, 4, 5]}).set_index(["item", "time"])
278
+ >>> m = Model()
279
+ >>> m.Time = Variable(df.index)
280
+ >>> m.Size = Variable(df.index)
281
+ >>> expr = df["cost"] * m.Time + df["cost"] * m.Size
282
+ >>> expr
283
+ <Expression size=5 dimensions={'item': 2, 'time': 3} terms=10>
284
+ [1,mon]: Time[1,mon] + Size[1,mon]
285
+ [1,tue]: 2 Time[1,tue] +2 Size[1,tue]
286
+ [1,wed]: 3 Time[1,wed] +3 Size[1,wed]
287
+ [2,mon]: 4 Time[2,mon] +4 Size[2,mon]
288
+ [2,tue]: 5 Time[2,tue] +5 Size[2,tue]
289
+ """
290
+ # Sanity checks, VAR_KEY and COEF_KEY must be present
291
+ assert VAR_KEY in data.columns, "Missing variable column."
292
+ assert COEF_KEY in data.columns, "Missing coefficient column."
293
+
294
+ # Sanity check no duplicates indices
295
+ if data.drop(COEF_KEY).is_duplicated().any():
296
+ duplicated_data = data.filter(data.drop(COEF_KEY).is_duplicated())
297
+ raise ValueError(
298
+ f"Cannot create an expression with duplicate indices:\n{duplicated_data}."
299
+ )
300
+
301
+ super().__init__(data)
302
+
303
+ def sum(self, over: Union[str, Iterable[str]]):
304
+ """
305
+ Examples:
306
+ >>> import pandas as pd
307
+ >>> from pyoframe import Variable
308
+ >>> df = pd.DataFrame({"item" : [1, 1, 1, 2, 2], "time": ["mon", "tue", "wed", "mon", "tue"], "cost": [1, 2, 3, 4, 5]}).set_index(["item", "time"])
309
+ >>> quantity = Variable(df.reset_index()[["item"]].drop_duplicates())
310
+ >>> expr = (quantity * df["cost"]).sum("time")
311
+ >>> expr.data
312
+ shape: (2, 3)
313
+ ┌──────┬─────────┬───────────────┐
314
+ │ item ┆ __coeff ┆ __variable_id │
315
+ │ --- ┆ --- ┆ --- │
316
+ │ i64 ┆ f64 ┆ u32 │
317
+ ╞══════╪═════════╪═══════════════╡
318
+ │ 1 ┆ 6.0 ┆ 1 │
319
+ │ 2 ┆ 9.0 ┆ 2 │
320
+ └──────┴─────────┴───────────────┘
321
+ """
322
+ if isinstance(over, str):
323
+ over = [over]
324
+ dims = self.dimensions
325
+ if not dims:
326
+ raise ValueError(
327
+ f"Cannot sum over dimensions {over} since the current expression has no dimensions."
328
+ )
329
+ assert set(over) <= set(dims), f"Cannot sum over {over} as it is not in {dims}"
330
+ remaining_dims = [dim for dim in dims if dim not in over]
331
+
332
+ return self._new(
333
+ self.data.drop(over)
334
+ .group_by(remaining_dims + [VAR_KEY], maintain_order=True)
335
+ .sum()
336
+ )
337
+
338
+ def map(self, mapping_set: SetTypes, drop_shared_dims: bool = True):
339
+ """
340
+ Replaces the dimensions that are shared with mapping_set with the other dimensions found in mapping_set.
341
+
342
+ This is particularly useful to go from one type of dimensions to another. For example, to convert data that
343
+ is indexed by city to data indexed by country (see example).
344
+
345
+ Parameters:
346
+ mapping_set : SetTypes
347
+ The set to map the expression to. This can be a DataFrame, Index, or another Set.
348
+ drop_shared_dims : bool, default True
349
+ If True, the dimensions shared between the expression and the mapping set are dropped from the resulting expression and
350
+ repeated rows are summed.
351
+ If False, the shared dimensions are kept in the resulting expression.
352
+
353
+ Returns:
354
+ Expression
355
+ A new Expression containing the result of the mapping operation.
356
+
357
+ Examples:
358
+
359
+ >>> import polars as pl
360
+ >>> from pyoframe import Variable, Model
361
+ >>> pop_data = pl.DataFrame({"city": ["Toronto", "Vancouver", "Boston"], "population": [10, 2, 8]}).to_expr()
362
+ >>> cities_and_countries = pl.DataFrame({"city": ["Toronto", "Vancouver", "Boston"], "country": ["Canada", "Canada", "USA"]})
363
+ >>> pop_data.map(cities_and_countries)
364
+ <Expression size=2 dimensions={'country': 2} terms=2>
365
+ [Canada]: 12
366
+ [USA]: 8
367
+
368
+ >>> pop_data.map(cities_and_countries, drop_shared_dims=False)
369
+ <Expression size=3 dimensions={'city': 3, 'country': 2} terms=3>
370
+ [Toronto,Canada]: 10
371
+ [Vancouver,Canada]: 2
372
+ [Boston,USA]: 8
373
+ """
374
+ mapping_set = Set(mapping_set)
375
+
376
+ dims = self.dimensions
377
+ if dims is None:
378
+ raise ValueError("Cannot use .map() on an expression with no dimensions.")
379
+
380
+ mapping_dims = mapping_set.dimensions
381
+ if mapping_dims is None:
382
+ raise ValueError(
383
+ "Cannot use .map() with a mapping set containing no dimensions."
384
+ )
385
+
386
+ shared_dims = [dim for dim in dims if dim in mapping_dims]
387
+ if not shared_dims:
388
+ raise ValueError(
389
+ f"Cannot apply .map() as there are no shared dimensions between the expression (dims={self.dimensions}) and the mapping set (dims={mapping_set.dimensions})."
390
+ )
391
+
392
+ mapped_expression = self * mapping_set
393
+
394
+ if drop_shared_dims:
395
+ return sum(shared_dims, mapped_expression)
396
+
397
+ return mapped_expression
398
+
399
+ def rolling_sum(self, over: str, window_size: int):
400
+ """
401
+ Calculates the rolling sum of the Expression over a specified window size for a given dimension.
402
+
403
+ This method applies a rolling sum operation over the dimension specified by `over`,
404
+ using a window defined by `window_size`.
405
+
406
+
407
+ Parameters:
408
+ over : str
409
+ The name of the dimension (column) over which the rolling sum is calculated.
410
+ This dimension must exist within the Expression's dimensions.
411
+ window_size : int
412
+ The size of the moving window in terms of number of records.
413
+ The rolling sum is calculated over this many consecutive elements.
414
+
415
+ Returns:
416
+ Expression
417
+ A new Expression instance containing the result of the rolling sum operation.
418
+ This new Expression retains all dimensions (columns) of the original data,
419
+ with the rolling sum applied over the specified dimension.
420
+
421
+ Examples:
422
+ >>> import polars as pl
423
+ >>> from pyoframe import Variable, Model
424
+ >>> cost = pl.DataFrame({"item" : [1, 1, 1, 2, 2], "time": [1, 2, 3, 1, 2], "cost": [1, 2, 3, 4, 5]})
425
+ >>> m = Model()
426
+ >>> m.quantity = Variable(cost[["item", "time"]])
427
+ >>> (m.quantity * cost).rolling_sum(over="time", window_size=2)
428
+ <Expression size=5 dimensions={'item': 2, 'time': 3} terms=8>
429
+ [1,1]: quantity[1,1]
430
+ [1,2]: quantity[1,1] +2 quantity[1,2]
431
+ [1,3]: 2 quantity[1,2] +3 quantity[1,3]
432
+ [2,1]: 4 quantity[2,1]
433
+ [2,2]: 4 quantity[2,1] +5 quantity[2,2]
434
+ """
435
+ dims = self.dimensions
436
+ if dims is None:
437
+ raise ValueError(
438
+ "Cannot use rolling_sum() with an expression with no dimensions."
439
+ )
440
+ assert over in dims, f"Cannot sum over {over} as it is not in {dims}"
441
+ remaining_dims = [dim for dim in dims if dim not in over]
442
+
443
+ return self._new(
444
+ pl.concat(
445
+ [
446
+ df.with_columns(pl.col(over).max())
447
+ for _, df in self.data.rolling(
448
+ index_column=over, period=f"{window_size}i", by=remaining_dims
449
+ )
450
+ ]
451
+ )
452
+ )
453
+
454
+ def within(self, set: "SetTypes") -> Expression:
455
+ """
456
+ Examples
457
+ >>> import pandas as pd
458
+ >>> general_expr = pd.DataFrame({"dim1": [1, 2, 3], "value": [1, 2, 3]}).to_expr()
459
+ >>> filter_expr = pd.DataFrame({"dim1": [1, 3], "value": [5, 6]}).to_expr()
460
+ >>> general_expr.within(filter_expr).data
461
+ shape: (2, 3)
462
+ ┌──────┬─────────┬───────────────┐
463
+ │ dim1 ┆ __coeff ┆ __variable_id │
464
+ │ --- ┆ --- ┆ --- │
465
+ │ i64 ┆ f64 ┆ u32 │
466
+ ╞══════╪═════════╪═══════════════╡
467
+ │ 1 ┆ 1.0 ┆ 0 │
468
+ │ 3 ┆ 3.0 ┆ 0 │
469
+ └──────┴─────────┴───────────────┘
470
+ """
471
+ df: pl.DataFrame = Set(set).data
472
+ set_dims = _get_dimensions(df)
473
+ assert (
474
+ set_dims is not None
475
+ ), "Cannot use .within() with a set with no dimensions."
476
+ dims = self.dimensions
477
+ assert (
478
+ dims is not None
479
+ ), "Cannot use .within() with an expression with no dimensions."
480
+ dims_in_common = [dim for dim in dims if dim in set_dims]
481
+ by_dims = df.select(dims_in_common).unique(maintain_order=True)
482
+ return self._new(self.data.join(by_dims, on=dims_in_common))
483
+
484
+ def __add__(self, other):
485
+ """
486
+ Examples:
487
+ >>> import pandas as pd
488
+ >>> from pyoframe import Variable
489
+ >>> add = pd.DataFrame({"dim1": [1,2,3], "add": [10, 20, 30]}).to_expr()
490
+ >>> var = Variable(add)
491
+ >>> var + add
492
+ <Expression size=3 dimensions={'dim1': 3} terms=6>
493
+ [1]: x1 +10
494
+ [2]: x2 +20
495
+ [3]: x3 +30
496
+ >>> var + add + 2
497
+ <Expression size=3 dimensions={'dim1': 3} terms=6>
498
+ [1]: x1 +12
499
+ [2]: x2 +22
500
+ [3]: x3 +32
501
+ >>> var + pd.DataFrame({"dim1": [1,2], "add": [10, 20]})
502
+ Traceback (most recent call last):
503
+ ...
504
+ pyoframe._arithmetic.PyoframeError: Failed to add expressions:
505
+ <Expression size=3 dimensions={'dim1': 3} terms=3> + <Expression size=2 dimensions={'dim1': 2} terms=2>
506
+ Due to error:
507
+ Dataframe has unmatched values. If this is intentional, use .drop_unmatched() or .keep_unmatched()
508
+ shape: (1, 2)
509
+ ┌──────┬────────────┐
510
+ │ dim1 ┆ dim1_right │
511
+ │ --- ┆ --- │
512
+ │ i64 ┆ i64 │
513
+ ╞══════╪════════════╡
514
+ │ 3 ┆ null │
515
+ └──────┴────────────┘
516
+ >>> 5 + 2 * Variable()
517
+ <Expression size=1 dimensions={} terms=2>
518
+ 2 x4 +5
519
+ """
520
+ if isinstance(other, str):
521
+ raise ValueError(
522
+ "Cannot add a string to an expression. Perhaps you meant to use pf.sum() instead of sum()?"
523
+ )
524
+ if isinstance(other, (int, float)):
525
+ return self._add_const(other)
526
+ other = other.to_expr()
527
+ self._learn_from_other(other)
528
+ return _add_expressions(self, other)
529
+
530
+ def __mul__(
531
+ self: "Expression", other: int | float | SupportsToExpr
532
+ ) -> "Expression":
533
+ if isinstance(other, (int, float)):
534
+ return self.with_columns(pl.col(COEF_KEY) * other)
535
+
536
+ other = other.to_expr()
537
+ self._learn_from_other(other)
538
+
539
+ if (other.data.get_column(VAR_KEY) != CONST_TERM).any():
540
+ self, other = other, self
541
+
542
+ if (other.data.get_column(VAR_KEY) != CONST_TERM).any():
543
+ raise ValueError(
544
+ "Multiplication of two expressions with variables is non-linear and not supported."
545
+ )
546
+ multiplier = other.data.drop(VAR_KEY)
547
+
548
+ dims = self.dimensions_unsafe
549
+ other_dims = other.dimensions_unsafe
550
+ dims_in_common = [dim for dim in dims if dim in other_dims]
551
+
552
+ data = (
553
+ self.data.join(
554
+ multiplier,
555
+ on=dims_in_common,
556
+ how="inner" if dims_in_common else "cross",
557
+ )
558
+ .with_columns(pl.col(COEF_KEY) * pl.col(COEF_KEY + "_right"))
559
+ .drop(COEF_KEY + "_right")
560
+ )
561
+
562
+ return self._new(data)
563
+
564
+ def to_expr(self) -> Expression:
565
+ return self
566
+
567
+ def _learn_from_other(self, other: Expression):
568
+ if self._model is None and other._model is not None:
569
+ self._model = other._model
570
+
571
+ def _new(self, data: pl.DataFrame) -> Expression:
572
+ e = Expression(data)
573
+ e._model = self._model
574
+ # Note: We intentionally don't propogate the unmatched strategy to the new expression
575
+ e.allowed_new_dims = self.allowed_new_dims
576
+ return e
577
+
578
+ def _add_const(self, const: int | float) -> Expression:
579
+ dim = self.dimensions
580
+ data = self.data
581
+ # Fill in missing constant terms
582
+ if not dim:
583
+ if CONST_TERM not in data[VAR_KEY]:
584
+ data = pl.concat(
585
+ [
586
+ data,
587
+ pl.DataFrame(
588
+ {COEF_KEY: [0.0], VAR_KEY: [CONST_TERM]},
589
+ schema={COEF_KEY: pl.Float64, VAR_KEY: VAR_TYPE},
590
+ ),
591
+ ],
592
+ how="vertical_relaxed",
593
+ )
594
+ else:
595
+ keys = (
596
+ data.select(dim)
597
+ .unique(maintain_order=True)
598
+ .with_columns(pl.lit(CONST_TERM).alias(VAR_KEY).cast(VAR_TYPE))
599
+ )
600
+ data = data.join(keys, on=dim + [VAR_KEY], how="outer_coalesce")
601
+ data = data.with_columns(pl.col(COEF_KEY).fill_null(0.0))
602
+
603
+ data = data.with_columns(
604
+ pl.when(pl.col(VAR_KEY) == CONST_TERM)
605
+ .then(pl.col(COEF_KEY) + const)
606
+ .otherwise(pl.col(COEF_KEY))
607
+ )
608
+
609
+ return self._new(data)
610
+
611
+ @property
612
+ def constant_terms(self):
613
+ dims = self.dimensions
614
+ constant_terms = self.data.filter(pl.col(VAR_KEY) == CONST_TERM).drop(VAR_KEY)
615
+ if dims is not None:
616
+ return constant_terms.join(
617
+ self.data.select(dims).unique(maintain_order=True),
618
+ on=dims,
619
+ how="outer_coalesce",
620
+ ).with_columns(pl.col(COEF_KEY).fill_null(0.0))
621
+ else:
622
+ if len(constant_terms) == 0:
623
+ return pl.DataFrame(
624
+ {COEF_KEY: [0.0], VAR_KEY: [CONST_TERM]},
625
+ schema={COEF_KEY: pl.Float64, VAR_KEY: VAR_TYPE},
626
+ )
627
+ return constant_terms
628
+
629
+ @property
630
+ def variable_terms(self):
631
+ return self.data.filter(pl.col(VAR_KEY) != CONST_TERM)
632
+
633
+ def to_str_table(
634
+ self,
635
+ max_line_len=None,
636
+ max_rows=None,
637
+ include_const_term=True,
638
+ var_map=None,
639
+ float_precision=None,
640
+ ):
641
+ data = self.data if include_const_term else self.variable_terms
642
+ data = cast_coef_to_string(data, float_precision=float_precision)
643
+
644
+ if var_map is not None:
645
+ data = var_map.apply(data, to_col="str_var")
646
+ elif self._model is not None and self._model.var_map is not None:
647
+ var_map = self._model.var_map
648
+ data = var_map.apply(data, to_col="str_var")
649
+ else:
650
+ data = data.with_columns(
651
+ pl.concat_str(pl.lit("x"), VAR_KEY).alias("str_var")
652
+ )
653
+ data = data.with_columns(
654
+ pl.when(pl.col(VAR_KEY) == CONST_TERM)
655
+ .then(pl.lit(""))
656
+ .otherwise("str_var")
657
+ .alias(VAR_KEY)
658
+ ).drop("str_var")
659
+
660
+ dimensions = self.dimensions
661
+
662
+ # Create a string for each term
663
+ data = data.with_columns(
664
+ expr=pl.concat_str(
665
+ COEF_KEY,
666
+ pl.lit(" "),
667
+ VAR_KEY,
668
+ )
669
+ ).drop(COEF_KEY, VAR_KEY)
670
+
671
+ # Combine terms into one string
672
+ if dimensions is not None:
673
+ data = data.group_by(dimensions, maintain_order=True).agg(
674
+ pl.col("expr").str.concat(delimiter=" ")
675
+ )
676
+ else:
677
+ data = data.select(pl.col("expr").str.concat(delimiter=" "))
678
+
679
+ # Remove leading +
680
+ data = data.with_columns(pl.col("expr").str.strip_chars(characters=" +"))
681
+
682
+ # TODO add vertical ... if too many rows, in the middle of the table
683
+ if max_rows:
684
+ data = data.head(max_rows)
685
+
686
+ if max_line_len:
687
+ data = data.with_columns(
688
+ pl.when(pl.col("expr").str.len_chars() > max_line_len)
689
+ .then(
690
+ pl.concat_str(
691
+ pl.col("expr").str.slice(0, max_line_len),
692
+ pl.lit("..."),
693
+ )
694
+ )
695
+ .otherwise(pl.col("expr"))
696
+ )
697
+ return data
698
+
699
+ def to_str_create_prefix(self, data):
700
+ if self.name is not None or self.dimensions:
701
+ data = concat_dimensions(data, prefix=self.name, ignore_columns=["expr"])
702
+ data = data.with_columns(
703
+ pl.concat_str("concated_dim", pl.lit(": "), "expr").alias("expr")
704
+ ).drop("concated_dim")
705
+ return data
706
+
707
+ def to_str(
708
+ self,
709
+ max_line_len=None,
710
+ max_rows=None,
711
+ include_const_term=True,
712
+ var_map=None,
713
+ include_prefix=True,
714
+ include_header=False,
715
+ include_data=True,
716
+ float_precision=None,
717
+ ):
718
+ result = ""
719
+ if include_header:
720
+ result += get_obj_repr(
721
+ self, size=len(self), dimensions=self.shape, terms=len(self.data)
722
+ )
723
+ if include_header and include_data:
724
+ result += "\n"
725
+ if include_data:
726
+ str_table = self.to_str_table(
727
+ max_line_len=max_line_len,
728
+ max_rows=max_rows,
729
+ include_const_term=include_const_term,
730
+ var_map=var_map,
731
+ float_precision=float_precision,
732
+ )
733
+ if include_prefix:
734
+ str_table = self.to_str_create_prefix(str_table)
735
+ result += str_table.select(pl.col("expr").str.concat(delimiter="\n")).item()
736
+
737
+ return result
738
+
739
+ def __repr__(self) -> str:
740
+ return self.to_str(
741
+ max_line_len=80,
742
+ max_rows=15,
743
+ include_header=True,
744
+ float_precision=Config.print_float_precision,
745
+ )
746
+
747
+ def __str__(self) -> str:
748
+ return self.to_str()
749
+
750
+
751
+ @overload
752
+ def sum(over: Union[str, Sequence[str]], expr: SupportsToExpr): ...
753
+
754
+
755
+ @overload
756
+ def sum(over: SupportsToExpr): ...
757
+
758
+
759
+ def sum(
760
+ over: Union[str, Sequence[str], SupportsToExpr],
761
+ expr: Optional[SupportsToExpr] = None,
762
+ ) -> "Expression":
763
+ if expr is None:
764
+ assert isinstance(over, SupportsMath)
765
+ over = over.to_expr()
766
+ all_dims = over.dimensions
767
+ if all_dims is None:
768
+ raise ValueError(
769
+ "Cannot sum over dimensions with an expression with no dimensions."
770
+ )
771
+ return over.sum(all_dims)
772
+ else:
773
+ assert isinstance(over, (str, Sequence))
774
+ return expr.to_expr().sum(over)
775
+
776
+
777
+ def sum_by(by: Union[str, Sequence[str]], expr: SupportsToExpr) -> "Expression":
778
+ if isinstance(by, str):
779
+ by = [by]
780
+ expr = expr.to_expr()
781
+ dimensions = expr.dimensions
782
+ assert (
783
+ dimensions is not None
784
+ ), "Cannot sum by dimensions with an expression with no dimensions."
785
+ remaining_dims = [dim for dim in dimensions if dim not in by]
786
+ return sum(over=remaining_dims, expr=expr)
787
+
788
+
789
+ class Constraint(Expression, IdCounterMixin):
790
+ """A linear programming constraint."""
791
+
792
+ def __init__(self, lhs: Expression | pl.DataFrame, sense: ConstraintSense):
793
+ """Initialize a constraint.
794
+
795
+ Parameters:
796
+ lhs: Expression
797
+ The left hand side of the constraint.
798
+ sense: Sense
799
+ The sense of the constraint.
800
+ """
801
+ if isinstance(lhs, Expression):
802
+ data = lhs.data
803
+ else:
804
+ data = lhs
805
+ super().__init__(data)
806
+ if isinstance(lhs, Expression):
807
+ self._model = lhs._model
808
+ self.sense = sense
809
+
810
+ dims = self.dimensions
811
+ data_per_constraint = (
812
+ pl.DataFrame() if dims is None else self.data.select(dims).unique()
813
+ )
814
+ self.data_per_constraint = self._assign_ids(data_per_constraint)
815
+
816
+ @property
817
+ def dual(self) -> Union[pl.DataFrame, float]:
818
+ if DUAL_KEY not in self.data_per_constraint.columns:
819
+ raise ValueError(f"No dual values founds for constraint '{self.name}'")
820
+ result = self.data_per_constraint.select(self.dimensions_unsafe + [DUAL_KEY])
821
+ if result.shape == (1, 1):
822
+ return result.item()
823
+ return result
824
+
825
+ @dual.setter
826
+ def dual(self, value):
827
+ assert sorted(value.columns) == sorted([DUAL_KEY, CONSTRAINT_KEY])
828
+ df = self.data_per_constraint
829
+ if DUAL_KEY in df.columns:
830
+ df = df.drop(DUAL_KEY)
831
+ self.data_per_constraint = df.join(
832
+ value, on=CONSTRAINT_KEY, how="left", validate="1:1"
833
+ )
834
+
835
+ @classmethod
836
+ def get_id_column_name(cls):
837
+ return CONSTRAINT_KEY
838
+
839
+ @property
840
+ def ids(self) -> pl.DataFrame:
841
+ return self.data_per_constraint.select(
842
+ self.dimensions_unsafe + [CONSTRAINT_KEY]
843
+ )
844
+
845
+ def to_str_create_prefix(self, data, const_map=None):
846
+ if const_map is None:
847
+ return super().to_str_create_prefix(data)
848
+
849
+ data_map = const_map.apply(self.ids, to_col=None)
850
+
851
+ if self.dimensions is None:
852
+ assert data.height == 1
853
+ prefix = data_map.select(pl.col(CONSTRAINT_KEY)).item()
854
+ return data.select(
855
+ pl.concat_str(pl.lit(f"{prefix}: "), "expr").alias("expr")
856
+ )
857
+
858
+ data = data.join(data_map, on=self.dimensions)
859
+ return data.with_columns(
860
+ pl.concat_str(CONSTRAINT_KEY, pl.lit(": "), "expr").alias("expr")
861
+ ).drop(CONSTRAINT_KEY)
862
+
863
+ def to_str(
864
+ self,
865
+ max_line_len=None,
866
+ max_rows=None,
867
+ var_map=None,
868
+ float_precision=None,
869
+ const_map=None,
870
+ ):
871
+ dims = self.dimensions
872
+ str_table = self.to_str_table(
873
+ max_line_len=max_line_len,
874
+ max_rows=max_rows,
875
+ include_const_term=False,
876
+ var_map=var_map,
877
+ )
878
+ str_table = self.to_str_create_prefix(str_table, const_map=const_map)
879
+ rhs = self.constant_terms.with_columns(pl.col(COEF_KEY) * -1)
880
+ rhs = cast_coef_to_string(rhs, drop_ones=False, float_precision=float_precision)
881
+ # Remove leading +
882
+ rhs = rhs.with_columns(pl.col(COEF_KEY).str.strip_chars(characters=" +"))
883
+ rhs = rhs.rename({COEF_KEY: "rhs"})
884
+ constr_str = pl.concat(
885
+ [str_table, rhs], how=("align" if dims else "horizontal")
886
+ )
887
+ constr_str = constr_str.select(
888
+ pl.concat_str("expr", pl.lit(f" {self.sense.value} "), "rhs").str.concat(
889
+ delimiter="\n"
890
+ )
891
+ ).item()
892
+ return constr_str
893
+
894
+ def __repr__(self) -> str:
895
+ return (
896
+ get_obj_repr(
897
+ self,
898
+ ("name",),
899
+ sense=f"'{self.sense.value}'",
900
+ size=len(self),
901
+ dimensions=self.shape,
902
+ terms=len(self.data),
903
+ )
904
+ + "\n"
905
+ + self.to_str(max_line_len=80, max_rows=15)
906
+ )
907
+
908
+ def _new(self, data: pl.DataFrame):
909
+ c = Constraint(data, self.sense)
910
+ c._model = self._model
911
+ return c