desdeo 1.1.3__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- desdeo/__init__.py +8 -8
- desdeo/api/README.md +73 -0
- desdeo/api/__init__.py +15 -0
- desdeo/api/app.py +40 -0
- desdeo/api/config.py +69 -0
- desdeo/api/config.toml +53 -0
- desdeo/api/db.py +25 -0
- desdeo/api/db_init.py +79 -0
- desdeo/api/db_models.py +164 -0
- desdeo/api/malaga_db_init.py +27 -0
- desdeo/api/models/__init__.py +66 -0
- desdeo/api/models/archive.py +34 -0
- desdeo/api/models/preference.py +90 -0
- desdeo/api/models/problem.py +507 -0
- desdeo/api/models/reference_point_method.py +18 -0
- desdeo/api/models/session.py +46 -0
- desdeo/api/models/state.py +96 -0
- desdeo/api/models/user.py +51 -0
- desdeo/api/routers/_NAUTILUS.py +245 -0
- desdeo/api/routers/_NAUTILUS_navigator.py +233 -0
- desdeo/api/routers/_NIMBUS.py +762 -0
- desdeo/api/routers/__init__.py +5 -0
- desdeo/api/routers/problem.py +110 -0
- desdeo/api/routers/reference_point_method.py +117 -0
- desdeo/api/routers/session.py +76 -0
- desdeo/api/routers/test.py +16 -0
- desdeo/api/routers/user_authentication.py +366 -0
- desdeo/api/schema.py +94 -0
- desdeo/api/tests/__init__.py +0 -0
- desdeo/api/tests/conftest.py +59 -0
- desdeo/api/tests/test_models.py +701 -0
- desdeo/api/tests/test_routes.py +216 -0
- desdeo/api/utils/database.py +274 -0
- desdeo/api/utils/logger.py +29 -0
- desdeo/core.py +27 -0
- desdeo/emo/__init__.py +29 -0
- desdeo/emo/hooks/archivers.py +172 -0
- desdeo/emo/methods/EAs.py +418 -0
- desdeo/emo/methods/__init__.py +0 -0
- desdeo/emo/methods/bases.py +59 -0
- desdeo/emo/operators/__init__.py +1 -0
- desdeo/emo/operators/crossover.py +780 -0
- desdeo/emo/operators/evaluator.py +118 -0
- desdeo/emo/operators/generator.py +356 -0
- desdeo/emo/operators/mutation.py +1053 -0
- desdeo/emo/operators/selection.py +1036 -0
- desdeo/emo/operators/termination.py +178 -0
- desdeo/explanations/__init__.py +6 -0
- desdeo/explanations/explainer.py +100 -0
- desdeo/explanations/utils.py +90 -0
- desdeo/mcdm/__init__.py +19 -0
- desdeo/mcdm/nautili.py +345 -0
- desdeo/mcdm/nautilus.py +477 -0
- desdeo/mcdm/nautilus_navigator.py +655 -0
- desdeo/mcdm/nimbus.py +417 -0
- desdeo/mcdm/pareto_navigator.py +269 -0
- desdeo/mcdm/reference_point_method.py +116 -0
- desdeo/problem/__init__.py +79 -0
- desdeo/problem/evaluator.py +561 -0
- desdeo/problem/gurobipy_evaluator.py +562 -0
- desdeo/problem/infix_parser.py +341 -0
- desdeo/problem/json_parser.py +944 -0
- desdeo/problem/pyomo_evaluator.py +468 -0
- desdeo/problem/schema.py +1808 -0
- desdeo/problem/simulator_evaluator.py +298 -0
- desdeo/problem/sympy_evaluator.py +244 -0
- desdeo/problem/testproblems/__init__.py +73 -0
- desdeo/problem/testproblems/binh_and_korn_problem.py +88 -0
- desdeo/problem/testproblems/dtlz2_problem.py +102 -0
- desdeo/problem/testproblems/forest_problem.py +275 -0
- desdeo/problem/testproblems/knapsack_problem.py +163 -0
- desdeo/problem/testproblems/mcwb_problem.py +831 -0
- desdeo/problem/testproblems/mixed_variable_dimenrions_problem.py +83 -0
- desdeo/problem/testproblems/momip_problem.py +172 -0
- desdeo/problem/testproblems/nimbus_problem.py +143 -0
- desdeo/problem/testproblems/pareto_navigator_problem.py +89 -0
- desdeo/problem/testproblems/re_problem.py +492 -0
- desdeo/problem/testproblems/river_pollution_problem.py +434 -0
- desdeo/problem/testproblems/rocket_injector_design_problem.py +140 -0
- desdeo/problem/testproblems/simple_problem.py +351 -0
- desdeo/problem/testproblems/simulator_problem.py +92 -0
- desdeo/problem/testproblems/spanish_sustainability_problem.py +945 -0
- desdeo/problem/testproblems/zdt_problem.py +271 -0
- desdeo/problem/utils.py +245 -0
- desdeo/tools/GenerateReferencePoints.py +181 -0
- desdeo/tools/__init__.py +102 -0
- desdeo/tools/generics.py +145 -0
- desdeo/tools/gurobipy_solver_interfaces.py +258 -0
- desdeo/tools/indicators_binary.py +11 -0
- desdeo/tools/indicators_unary.py +375 -0
- desdeo/tools/interaction_schema.py +38 -0
- desdeo/tools/intersection.py +54 -0
- desdeo/tools/iterative_pareto_representer.py +99 -0
- desdeo/tools/message.py +234 -0
- desdeo/tools/ng_solver_interfaces.py +199 -0
- desdeo/tools/non_dominated_sorting.py +133 -0
- desdeo/tools/patterns.py +281 -0
- desdeo/tools/proximal_solver.py +99 -0
- desdeo/tools/pyomo_solver_interfaces.py +464 -0
- desdeo/tools/reference_vectors.py +462 -0
- desdeo/tools/scalarization.py +3138 -0
- desdeo/tools/scipy_solver_interfaces.py +454 -0
- desdeo/tools/score_bands.py +464 -0
- desdeo/tools/utils.py +320 -0
- desdeo/utopia_stuff/__init__.py +0 -0
- desdeo/utopia_stuff/data/1.json +15 -0
- desdeo/utopia_stuff/data/2.json +13 -0
- desdeo/utopia_stuff/data/3.json +15 -0
- desdeo/utopia_stuff/data/4.json +17 -0
- desdeo/utopia_stuff/data/5.json +15 -0
- desdeo/utopia_stuff/from_json.py +40 -0
- desdeo/utopia_stuff/reinit_user.py +38 -0
- desdeo/utopia_stuff/utopia_db_init.py +212 -0
- desdeo/utopia_stuff/utopia_problem.py +403 -0
- desdeo/utopia_stuff/utopia_problem_old.py +415 -0
- desdeo/utopia_stuff/utopia_reference_solutions.py +79 -0
- desdeo-2.0.0.dist-info/LICENSE +21 -0
- desdeo-2.0.0.dist-info/METADATA +168 -0
- desdeo-2.0.0.dist-info/RECORD +120 -0
- {desdeo-1.1.3.dist-info → desdeo-2.0.0.dist-info}/WHEEL +1 -1
- desdeo-1.1.3.dist-info/METADATA +0 -18
- desdeo-1.1.3.dist-info/RECORD +0 -4
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""The base class for termination criteria.
|
|
2
|
+
|
|
3
|
+
The termination criterion is used to determine when the optimization process should stop. In this implementation, it
|
|
4
|
+
also includes a simple counter for the number of elapsed generations. This counter is increased by one each time the
|
|
5
|
+
termination criterion is called. The simplest termination criterion is reaching the maximum number of generations.
|
|
6
|
+
The implementation also contains a counter for the number of evaluations. This counter is updated by the Evaluator
|
|
7
|
+
and Generator classes. The termination criterion can be based on the number of evaluations as well.
|
|
8
|
+
|
|
9
|
+
Warning:
|
|
10
|
+
Each subclass of BaseTerminator must implement the do method. The do method should always call the
|
|
11
|
+
super().do method to increment the generation counter _before_ conducting the termination check.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from collections.abc import Sequence
|
|
15
|
+
|
|
16
|
+
from desdeo.tools.message import (
|
|
17
|
+
EvaluatorMessageTopics,
|
|
18
|
+
GeneratorMessageTopics,
|
|
19
|
+
IntMessage,
|
|
20
|
+
Message,
|
|
21
|
+
TerminatorMessageTopics,
|
|
22
|
+
)
|
|
23
|
+
from desdeo.tools.patterns import Subscriber
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class BaseTerminator(Subscriber):
|
|
27
|
+
"""The base class for the termination criteria.
|
|
28
|
+
|
|
29
|
+
Also includes a simple counter for number of elapsed generations. This counter is increased by one each time the
|
|
30
|
+
termination criterion is called.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
@property
|
|
34
|
+
def provided_topics(self) -> dict[int, Sequence[TerminatorMessageTopics]]:
|
|
35
|
+
"""Return the topics provided by the terminator.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
dict[int, Sequence[TerminatorMessageTopics]]: The topics provided by the terminator.
|
|
39
|
+
"""
|
|
40
|
+
return {
|
|
41
|
+
0: [],
|
|
42
|
+
1: [
|
|
43
|
+
TerminatorMessageTopics.GENERATION,
|
|
44
|
+
TerminatorMessageTopics.EVALUATION,
|
|
45
|
+
TerminatorMessageTopics.MAX_GENERATIONS,
|
|
46
|
+
TerminatorMessageTopics.MAX_EVALUATIONS,
|
|
47
|
+
],
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def interested_topics(self):
|
|
52
|
+
"""Return the message topics that the terminator is interested in."""
|
|
53
|
+
return [EvaluatorMessageTopics.NEW_EVALUATIONS, GeneratorMessageTopics.NEW_EVALUATIONS]
|
|
54
|
+
|
|
55
|
+
def __init__(self, **kwargs):
|
|
56
|
+
"""Initialize a termination criterion."""
|
|
57
|
+
super().__init__(**kwargs)
|
|
58
|
+
self.current_generation: int = 1
|
|
59
|
+
self.current_evaluations: int = 0
|
|
60
|
+
self.max_generations: int = 0
|
|
61
|
+
self.max_evaluations: int = 0
|
|
62
|
+
|
|
63
|
+
def check(self) -> bool | None:
|
|
64
|
+
"""Check if the termination criterion is reached.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
bool: True if the termination criterion is reached, False otherwise.
|
|
68
|
+
"""
|
|
69
|
+
self.current_generation += 1
|
|
70
|
+
|
|
71
|
+
def state(self) -> Sequence[Message]:
|
|
72
|
+
"""Return the state of the termination criterion."""
|
|
73
|
+
state = [
|
|
74
|
+
IntMessage(
|
|
75
|
+
topic=TerminatorMessageTopics.GENERATION,
|
|
76
|
+
value=self.current_generation,
|
|
77
|
+
source=self.__class__.__name__,
|
|
78
|
+
),
|
|
79
|
+
IntMessage(
|
|
80
|
+
topic=TerminatorMessageTopics.EVALUATION, value=self.current_evaluations, source=self.__class__.__name__
|
|
81
|
+
),
|
|
82
|
+
]
|
|
83
|
+
if self.max_evaluations != 0:
|
|
84
|
+
state.append(
|
|
85
|
+
IntMessage(
|
|
86
|
+
topic=TerminatorMessageTopics.MAX_EVALUATIONS,
|
|
87
|
+
value=self.max_evaluations,
|
|
88
|
+
source=self.__class__.__name__,
|
|
89
|
+
)
|
|
90
|
+
)
|
|
91
|
+
if self.max_generations != 0:
|
|
92
|
+
state.append(
|
|
93
|
+
IntMessage(
|
|
94
|
+
topic=TerminatorMessageTopics.MAX_GENERATIONS,
|
|
95
|
+
value=self.max_generations,
|
|
96
|
+
source=self.__class__.__name__,
|
|
97
|
+
)
|
|
98
|
+
)
|
|
99
|
+
return state
|
|
100
|
+
|
|
101
|
+
def update(self, message: Message) -> None:
|
|
102
|
+
"""Update the number of evaluations.
|
|
103
|
+
|
|
104
|
+
Note that for this method to work, this class must be registered as an observer of a subject that sends
|
|
105
|
+
messages with the key "num_evaluations". The Evaluator class does this.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
message (dict): the message from the subject, must contain the key "num_evaluations".
|
|
109
|
+
"""
|
|
110
|
+
if not isinstance(message, IntMessage):
|
|
111
|
+
return
|
|
112
|
+
if not isinstance(message.topic, EvaluatorMessageTopics) or isinstance(message.topic, GeneratorMessageTopics):
|
|
113
|
+
return
|
|
114
|
+
if (
|
|
115
|
+
message.topic == EvaluatorMessageTopics.NEW_EVALUATIONS # NOQA: PLR1714
|
|
116
|
+
or message.topic == GeneratorMessageTopics.NEW_EVALUATIONS
|
|
117
|
+
):
|
|
118
|
+
self.current_evaluations += message.value
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class MaxGenerationsTerminator(BaseTerminator):
|
|
122
|
+
"""A class for a termination criterion based on the number of generations."""
|
|
123
|
+
|
|
124
|
+
def __init__(self, max_generations: int, **kwargs):
|
|
125
|
+
"""Initialize a termination criterion based on the number of generations.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
max_generations (int): the maximum number of generations.
|
|
129
|
+
kwargs: Additional keyword arguments. These are passed to the Subscriber class. At the very least, the
|
|
130
|
+
publisher must be passed. See the Subscriber class for more information.
|
|
131
|
+
"""
|
|
132
|
+
super().__init__(**kwargs)
|
|
133
|
+
self.max_generations = max_generations
|
|
134
|
+
|
|
135
|
+
def check(self) -> bool:
|
|
136
|
+
"""Check if the termination criterion based on the number of generations is reached.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
new_generation (bool, optional): Increment the generation counter. Defaults to True.
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
bool: True if the termination criterion is reached, False otherwise.
|
|
143
|
+
"""
|
|
144
|
+
super().check()
|
|
145
|
+
self.notify()
|
|
146
|
+
return self.current_generation > self.max_generations
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# TODO (@light-weaver): This check is done _after_ the evaluations have taken place.
|
|
150
|
+
# This means that the algorithm will run for one more generation than it should.
|
|
151
|
+
class MaxEvaluationsTerminator(BaseTerminator):
|
|
152
|
+
"""A class for a termination criterion based on the number of evaluations."""
|
|
153
|
+
|
|
154
|
+
def __init__(self, max_evaluations: int, **kwargs):
|
|
155
|
+
"""Initialize a termination criterion based on the number of evaluations.
|
|
156
|
+
|
|
157
|
+
Looks for messages with key "num_evaluations" to update the number of evaluations.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
max_evaluations (int): the maximum number of evaluations.
|
|
161
|
+
kwargs: Additional keyword arguments. These are passed to the Subscriber class. At the very least, the
|
|
162
|
+
publisher must be passed. See the Subscriber class for more information.
|
|
163
|
+
"""
|
|
164
|
+
super().__init__(**kwargs)
|
|
165
|
+
if not isinstance(max_evaluations, int) or max_evaluations < 0:
|
|
166
|
+
raise ValueError("max_evaluations must be a non-negative integer")
|
|
167
|
+
self.max_evaluations = max_evaluations
|
|
168
|
+
self.current_evaluations = 0
|
|
169
|
+
|
|
170
|
+
def check(self) -> bool:
|
|
171
|
+
"""Check if the termination criterion based on the number of evaluations is reached.
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
bool: True if the termination criterion is reached, False otherwise.
|
|
175
|
+
"""
|
|
176
|
+
super().check()
|
|
177
|
+
self.notify()
|
|
178
|
+
return self.current_evaluations >= self.max_evaluations
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""Explainers are defined here."""
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import polars as pl
|
|
5
|
+
import shap
|
|
6
|
+
from scipy.spatial import cKDTree
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ShapExplainer:
|
|
10
|
+
"""Defines a SHAP explainer for reference point based methods."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, problem_data: pl.DataFrame, input_symbols: list[str], output_symbols: list[str]):
|
|
13
|
+
"""Initialize the explainer.
|
|
14
|
+
|
|
15
|
+
Initializes the explainer with given data, and input and output symbols.
|
|
16
|
+
The data should contain the columns listed in the input and output symbols.
|
|
17
|
+
This data is then used to simulate the inputs and outputs of an (interactive)
|
|
18
|
+
multiobjective optimization method, which is used to explain the relation of its
|
|
19
|
+
inputs and outputs using SHAP values.
|
|
20
|
+
|
|
21
|
+
Note:
|
|
22
|
+
The `data` can be generated by for a reference point based based by, e.g.,
|
|
23
|
+
randomly sampling the input space and then evaluating the methods with the
|
|
24
|
+
sampled inputs to generate outputs.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
problem_data (pl.DataFrame): the data to simulate the input and
|
|
28
|
+
outputs of a multiobjective optimization method.
|
|
29
|
+
input_symbols (list[str]): the input symbols present in `data`.
|
|
30
|
+
These symbols represent the inputs to the method.
|
|
31
|
+
output_symbols (list[str]): the output symbols present in `data`.
|
|
32
|
+
These symbols represent the outputs of the method.
|
|
33
|
+
"""
|
|
34
|
+
self.data = problem_data
|
|
35
|
+
self.input_symbols = input_symbols
|
|
36
|
+
self.output_symbols = output_symbols
|
|
37
|
+
self.input_array = self.data[self.input_symbols].to_numpy()
|
|
38
|
+
self.output_array = self.data[self.output_symbols].to_numpy()
|
|
39
|
+
self.to_output_tree = cKDTree(self.input_array)
|
|
40
|
+
self.explainer = None
|
|
41
|
+
|
|
42
|
+
def setup(self, background_data: pl.DataFrame):
|
|
43
|
+
"""Setup the explainer.
|
|
44
|
+
|
|
45
|
+
Setups the SHAP explainer with the given background data. The
|
|
46
|
+
background data should have the columns `self.input_symbols`. The
|
|
47
|
+
background data is used as the background (or missing data) when
|
|
48
|
+
computing SHAP values. The mean (or expected values) of the background
|
|
49
|
+
data's output (`self.output_symbols`) will determine the baseline of the
|
|
50
|
+
SHAP values.
|
|
51
|
+
|
|
52
|
+
Note:
|
|
53
|
+
To generate a dataset with meaningful expected values, e.g., in case
|
|
54
|
+
the SHAP values are better understood by relating them to a specific baseline,
|
|
55
|
+
see `desdeo.explanations.generate_biased_mean_data`.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
background_data (pl.DataFrame): the background data.
|
|
59
|
+
"""
|
|
60
|
+
self.explainer = shap.Explainer(
|
|
61
|
+
self.evaluate,
|
|
62
|
+
masker=background_data[self.input_symbols].to_numpy(),
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
def evaluate(self, evaluate_array: np.ndarray) -> np.ndarray:
|
|
66
|
+
"""Evaluates the multiobjective optimization method represented by the data.
|
|
67
|
+
|
|
68
|
+
Note:
|
|
69
|
+
Evaluation happens by finding the closest matching input array in the
|
|
70
|
+
`self.input_array` and then using that value's corresponding output
|
|
71
|
+
as the evaluation result. Closest means lowest Euclidean distance.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
evaluate_array (np.ndarray): the inputs to the method represented by the data.
|
|
75
|
+
Can be either a single input, or an array of multiple inputs. Used mainly by
|
|
76
|
+
`self.explain_input`.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
np.ndarray: the evaluated output(s) corresponding to the input data.
|
|
80
|
+
"""
|
|
81
|
+
_, indices = self.to_output_tree.query(evaluate_array)
|
|
82
|
+
|
|
83
|
+
return self.output_array[indices]
|
|
84
|
+
|
|
85
|
+
def explain_input(self, to_be_explained: pl.DataFrame) -> dict:
|
|
86
|
+
"""Explain an input and produces SHAP values.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
to_be_explained (pl.DataFrame): the input to be explained. The
|
|
90
|
+
dataframe must have the columns defined in `self.input_symbols`.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
dict: the key 'shaps' corresponds to the computed SHAP values for
|
|
94
|
+
the input, the key 'base_values' is the baseline the SHAP values
|
|
95
|
+
were computed against, and the key 'data' is the input the SHAP
|
|
96
|
+
values were computed for.
|
|
97
|
+
"""
|
|
98
|
+
_to_be_explained = to_be_explained[self.input_symbols].to_numpy()
|
|
99
|
+
|
|
100
|
+
return self.explainer(_to_be_explained)
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""Utilities specific to explainable multiobjective optimization."""
|
|
2
|
+
|
|
3
|
+
import cvxpy as cp
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def generate_biased_mean_data(
|
|
8
|
+
data: np.ndarray, target_means: np.ndarray, min_size: int = 2, max_size: int | None = None, solver: str = "SCIP"
|
|
9
|
+
) -> list | None:
|
|
10
|
+
r"""Finds a subset of the provided data that has a mean value close to provided target values.
|
|
11
|
+
|
|
12
|
+
Finds a subset of the provided data that has a mean value close to the
|
|
13
|
+
provided target values. Formulates a mixed-integer quadratic programming problem to
|
|
14
|
+
find a subset of `data` with a mean as close as possible to `target_values`
|
|
15
|
+
and a size between `min_size` and `max_size`. In other words, the following problems is solved:
|
|
16
|
+
|
|
17
|
+
\begin{align}
|
|
18
|
+
&\min_{\mathbf{x}} & f(\mathbf{x}) & = \sum_{i=1}^m \left[ \left(\frac{1}{k}
|
|
19
|
+
\sum_{j=1}^n x_j \times \text{data}_j\right)_i - \text{target}_i \right]^2 \\
|
|
20
|
+
&\text{s.t.,} & k & = \sum_{i=1}^n x_i,\\
|
|
21
|
+
& & k & \leq \text{max_size},\\
|
|
22
|
+
& & k & \geq \text{min_size},\\
|
|
23
|
+
\end{align},
|
|
24
|
+
where $n$ is the number of rows in `data`, $m$ is the number of columns in
|
|
25
|
+
`data`, and $k$ is the size of the subset. Notice that the closeness to the
|
|
26
|
+
target means is based on the Euclidean distance.
|
|
27
|
+
|
|
28
|
+
Note:
|
|
29
|
+
Be mindful that this functions can take a long time with a very large
|
|
30
|
+
data set and large upper bound for the desired subset.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
data (np.ndarray): the data from which to generate the subset with a
|
|
34
|
+
biased mean. Should be a 2D array with each row representing a sample
|
|
35
|
+
and each column the value of the variables in the sample.
|
|
36
|
+
target_means (np.ndarray): the target mean values for each column the
|
|
37
|
+
generated subset should have values close to.
|
|
38
|
+
min_size (int, optional): the minimum size of the generated subset. Defaults to 2.
|
|
39
|
+
max_size (int | None, optional): the maximum size of the generated
|
|
40
|
+
subset. If None, then the maximum size is bound by the numbers of rows
|
|
41
|
+
in `data`. Defaults to None.
|
|
42
|
+
solver (str, optional): the selected solver to be used by cvxpy. The
|
|
43
|
+
solver should support mixed-integer quadratic programming. Defaults to
|
|
44
|
+
"SCIP".
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
list | None: the indices of the samples of the generated subset respect to
|
|
48
|
+
`data`, i.e., the generated subset is `data[indices]`. If optimization is not successful,
|
|
49
|
+
returns None instead.
|
|
50
|
+
"""
|
|
51
|
+
# Number of rows and columns
|
|
52
|
+
n_rows, n_cols = data.shape
|
|
53
|
+
max_size = n_rows if max_size is None else max_size
|
|
54
|
+
|
|
55
|
+
# Big M used to penalize the auxiliary variable z
|
|
56
|
+
big_m = data.max(axis=0)
|
|
57
|
+
|
|
58
|
+
# Binary variables to select rows from the data
|
|
59
|
+
x = cp.Variable(n_rows, boolean=True)
|
|
60
|
+
|
|
61
|
+
# Auxiliary variables, z represents the mean. phi is the weighted sum of the data (weighted by x)
|
|
62
|
+
z = cp.Variable(n_cols)
|
|
63
|
+
phi = cp.Variable((n_rows, n_cols))
|
|
64
|
+
|
|
65
|
+
# The objective function, squared values of the difference between the mean
|
|
66
|
+
# of the currently selected subset and the target values.
|
|
67
|
+
objective = cp.sum_squares(z - target_means)
|
|
68
|
+
|
|
69
|
+
# Define the constraints
|
|
70
|
+
constraints = [
|
|
71
|
+
# Sets the value of phi
|
|
72
|
+
*[cp.sum(phi[:, col]) == cp.sum(cp.multiply(x, data[:, col])) for col in range(n_cols)],
|
|
73
|
+
# Constraints the values of phi using big M, in practice setting z to be the mean values
|
|
74
|
+
*[phi[:, col] <= cp.multiply(big_m[col], x) for col in range(n_cols)],
|
|
75
|
+
*[phi[:, col] <= z[col] for col in range(n_cols)],
|
|
76
|
+
*[phi[:, col] >= z[col] - cp.multiply(big_m[col], 1 - x) for col in range(n_cols)],
|
|
77
|
+
phi >= 0,
|
|
78
|
+
# Bounds the size of the set: min_size <= k <= max_size
|
|
79
|
+
cp.sum(x) >= min_size,
|
|
80
|
+
cp.sum(x) <= max_size,
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
# Create the problem model
|
|
84
|
+
problem = cp.Problem(cp.Minimize(objective), constraints)
|
|
85
|
+
|
|
86
|
+
# Solve the problem
|
|
87
|
+
problem.solve(solver=solver)
|
|
88
|
+
|
|
89
|
+
# Return the indices of the found subset
|
|
90
|
+
return [i for i in range(n_rows) if x.value[i] == 1]
|
desdeo/mcdm/__init__.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Imports available from the desdeo-mcdm package."""
|
|
2
|
+
|
|
3
|
+
__all__ = [
|
|
4
|
+
"NimbusError",
|
|
5
|
+
"generate_starting_point",
|
|
6
|
+
"infer_classifications",
|
|
7
|
+
"solve_intermediate_solutions",
|
|
8
|
+
"solve_sub_problems",
|
|
9
|
+
"rpm_solve_solutions",
|
|
10
|
+
]
|
|
11
|
+
|
|
12
|
+
from .nimbus import (
|
|
13
|
+
NimbusError,
|
|
14
|
+
generate_starting_point,
|
|
15
|
+
infer_classifications,
|
|
16
|
+
solve_intermediate_solutions,
|
|
17
|
+
solve_sub_problems,
|
|
18
|
+
)
|
|
19
|
+
from .reference_point_method import rpm_solve_solutions
|