libadalina-analytics 0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. libadalina_analytics/__init__.py +0 -0
  2. libadalina_analytics/clustering/__init__.py +0 -0
  3. libadalina_analytics/clustering/algorithms/__init__.py +5 -0
  4. libadalina_analytics/clustering/algorithms/zoning_algorithm.py +49 -0
  5. libadalina_analytics/clustering/algorithms/zoning_model.py +351 -0
  6. libadalina_analytics/clustering/algorithms/zoning_model_simple.py +342 -0
  7. libadalina_analytics/clustering/models/__init__.py +0 -0
  8. libadalina_analytics/clustering/models/adalina_zoning_algorithm_options.py +37 -0
  9. libadalina_analytics/clustering/models/adalina_zoning_data.py +404 -0
  10. libadalina_analytics/clustering/models/adalina_zoning_distance.py +14 -0
  11. libadalina_analytics/clustering/models/adalina_zoning_solution.py +380 -0
  12. libadalina_analytics/exceptions/__init__.py +0 -0
  13. libadalina_analytics/exceptions/input_file_exception.py +17 -0
  14. libadalina_analytics/flows_distribution/__init__.py +0 -0
  15. libadalina_analytics/flows_distribution/algorithms/__init__.py +0 -0
  16. libadalina_analytics/flows_distribution/algorithms/flows_distribution_algorithm.py +140 -0
  17. libadalina_analytics/flows_distribution/algorithms/origin_destination_extractor.py +47 -0
  18. libadalina_analytics/flows_distribution/models/__init__.py +0 -0
  19. libadalina_analytics/graph_extraction/__init__.py +0 -0
  20. libadalina_analytics/graph_extraction/builders/__init__.py +5 -0
  21. libadalina_analytics/graph_extraction/builders/graph_builder.py +157 -0
  22. libadalina_analytics/graph_extraction/readers/__init__.py +9 -0
  23. libadalina_analytics/graph_extraction/readers/open_street_map.py +79 -0
  24. libadalina_analytics/graph_extraction/readers/reader.py +48 -0
  25. libadalina_analytics/graph_extraction/utils/__init__.py +0 -0
  26. libadalina_analytics/graph_extraction/utils/search_address.py +12 -0
  27. libadalina_analytics/graph_extraction/writers/__init__.py +11 -0
  28. libadalina_analytics/graph_extraction/writers/to_csv.py +103 -0
  29. libadalina_analytics/graph_extraction/writers/to_geopackage.py +67 -0
  30. libadalina_analytics/graph_extraction/writers/to_shapefile.py +21 -0
  31. libadalina_analytics/relocation/__init__.py +0 -0
  32. libadalina_analytics/relocation/algorithms/__init__.py +5 -0
  33. libadalina_analytics/relocation/algorithms/adalina_algorithms.py +227 -0
  34. libadalina_analytics/relocation/algorithms/adalina_model_highs.py +823 -0
  35. libadalina_analytics/relocation/algorithms/adalina_relocation_algorithm.py +55 -0
  36. libadalina_analytics/relocation/models/__init__.py +13 -0
  37. libadalina_analytics/relocation/models/adalina_data.py +664 -0
  38. libadalina_analytics/relocation/models/adalina_model_type.py +73 -0
  39. libadalina_analytics/relocation/models/adalina_solution.py +431 -0
  40. libadalina_analytics/relocation/models/options.py +54 -0
  41. libadalina_analytics/relocation/models/relocation_resource.py +6 -0
  42. libadalina_analytics/utils/__init__.py +6 -0
  43. libadalina_analytics/utils/geometry_formats.py +6 -0
  44. libadalina_analytics/utils/timing.py +15 -0
  45. libadalina_analytics-0.0.dist-info/METADATA +23 -0
  46. libadalina_analytics-0.0.dist-info/RECORD +49 -0
  47. libadalina_analytics-0.0.dist-info/WHEEL +5 -0
  48. libadalina_analytics-0.0.dist-info/licenses/LICENSE +21 -0
  49. libadalina_analytics-0.0.dist-info/top_level.txt +1 -0
File without changes
File without changes
@@ -0,0 +1,5 @@
1
+ from .zoning_algorithm import clustering_algorithm
2
+
3
+ __all__ = [
4
+ 'clustering_algorithm'
5
+ ]
@@ -0,0 +1,49 @@
1
+ from libadalina_analytics.utils import GeometryFormats
2
+ from .zoning_model_simple import AdalinaZoningModelSimple
3
+ from libadalina_core.sedona_utils import DataFrame, EPSGFormats
4
+
5
+ from ..models.adalina_zoning_data import AdalinaZoningData
6
+ from ..models.adalina_zoning_distance import ClusteringDistance
7
+ from ..models.adalina_zoning_solution import AdalinaZoningSolution
8
+
9
+
10
+ def clustering_algorithm(data: DataFrame,
11
+ epsg: EPSGFormats,
12
+ geometry_column: str = 'geometry',
13
+ geometry_format: GeometryFormats = GeometryFormats.WKT,
14
+ weight_column: str | None = None,
15
+ k_min: int | None = None,
16
+ k_max: int | None = None,
17
+ f_min: float | None = None,
18
+ distances: list[ClusteringDistance] | None = None,
19
+ timelimit: int = 60) -> AdalinaZoningSolution | None:
20
+ user_input: dict = {
21
+ "epsg": epsg,
22
+ "geometry_type": geometry_format,
23
+ "geometry": geometry_column,
24
+ "timelimit": timelimit
25
+ }
26
+ if weight_column is not None:
27
+ user_input["weight"] = weight_column
28
+ if k_min is not None:
29
+ user_input["Kmin"] = k_min
30
+ if k_max is not None:
31
+ user_input["Kmax"] = k_max
32
+ if f_min is not None:
33
+ user_input["Fmin"] = f_min
34
+ if distances is not None:
35
+ user_input["distances"] = [{
36
+ 'name': distance.name,
37
+ 'weight': distance.weight,
38
+ 'func': distance.function
39
+ } for distance in distances]
40
+
41
+ data = AdalinaZoningData.from_Amelia(amelia_file=data, user_input=user_input)
42
+
43
+ model = AdalinaZoningModelSimple(data)
44
+
45
+ _ = model.run(timelimit=timelimit)
46
+
47
+ sol = model.get_solution(False)
48
+
49
+ return sol
@@ -0,0 +1,351 @@
1
+ from clustering.models.adalina_zoning_data import AdalinaZoningData
2
+ from clustering.models.adalina_zoning_solution import AdalinaZoningSolution
3
+
4
+ import highspy
5
+ import numpy as np
6
+ import time
7
+ import logging
8
+ from sklearn.cluster import AgglomerativeClustering
9
+
10
+ hscb = highspy.cb
11
+
12
+ # model.callbacks
13
+ # highspy.highs.HighsCallback
14
+
15
+ """
16
+ sum_{(s,t) in E : s in S, t in T x_{st} >= x_{ij}
17
+ forall cut [S,T] : i in S, j in T
18
+ """
19
+
20
+ # def lazy_SEC_constraint(callback_type : hscb.HighsCallbackType,
21
+ # message : str,
22
+ # data_out : hscb.HighsCallbackDataOut,
23
+ # data_in : hscb.HighsCallbackDataOut,
24
+ # user_callback_data : object) -> None:
25
+ #
26
+ # if callback_type == hscb.HighsCallbackType.kCallbackMipSolution:
27
+ # print("callback mip_solution")
28
+ # print(data_out.mip_solution.to_array(len(user_callback_data.vardict)))
29
+ #
30
+ # # print("obj. fun callback ", data_out.objective_function_value)
31
+ # # print("obj. fun callback ", data_out.running_time)
32
+ #
33
+ # assert isinstance(user_callback_data, AdalinaZoningModel)
34
+ #
35
+ # # if not user_callback_data._build_solution():
36
+ # # if len(user_callback_data.solution.unconnected_clusters) > 0:
37
+ #
38
+ # # costruisci gomory-hu su G con i valori delle x
39
+ # # per ogni cluster disconnesso:
40
+ # # per ogni coppia di nodi
41
+ #
42
+ # # for cl in user_callback_data.solution.unconnected_clusters:
43
+ # # print(cl)
44
+ #
45
+ # pass
46
+ #
47
+ # # elif callback_type == hscb.kCallbackMipDefineLazyConstraints:
48
+ # # print("LAZY CONSTRAINT")
49
+
50
+ # VAR
51
+ # x_ij {1 if i in cluster with representative j, 0 otherwise}
52
+ # y_j {1 if j is representative of a cluster, 0 otherwise}
53
+
54
+ # Kmin <= sum{j in N} y_j <= Kmax
55
+ # sum{j in N} x_ij + y_i = 1 forall i in N
56
+ # x_{ij} <= sum{k in N : k in adj(i) and dist^-(k, j) <= dist^-(i,j) } forall (i,j) : i not in adj(j)
57
+ # x_{ik} + x_{jk} <= 1 + w_{ij} forall i,j,k in N^3 : i \not= j \not= k
58
+ # x_{ik} + w_{ik} <= 1 forall (i,k) in N^2 : i \not= k
59
+ # min \sum{(i,j) \in N^2 : i \not= j} c[i,j] * (x_{ij} + w_{ij})
60
+ # \sum_{i in N : i\not=j } x_ij * c_i >= y_j * F_min forall j in N
61
+
62
+ class AdalinaZoningModel:
63
+
64
+ def __init__(self, data : AdalinaZoningData, log_fout = None):
65
+
66
+ self.log_fout = log_fout
67
+ self.data = data
68
+ self.vardict = dict()
69
+ self.model = highspy.Highs()
70
+ inf = highspy.kHighsInf
71
+ self.run_status = None
72
+ self.solution = None
73
+ self.best_feasible_sol = None
74
+
75
+ xvar = []
76
+ for (i,j) in data.E1:
77
+ xvar.append(
78
+ self._add_variable(0, 1,
79
+ data.get_cost_edge(i,j),
80
+ highspy.HighsVarType.kInteger,
81
+ f"x_{i}_{j}")
82
+ )
83
+
84
+ # y[s] in {0,1}
85
+ zvar = []
86
+ firstnode = True
87
+ for i in data.G.nodes:
88
+ if firstnode:
89
+ zvar.append(
90
+ self._add_variable(1,1, 0,
91
+ highspy.HighsVarType.kInteger,
92
+ f"z_{i}")
93
+ )
94
+ firstnode = False
95
+ else:
96
+ zvar.append(
97
+ self._add_variable(0,1, 0,
98
+ highspy.HighsVarType.kInteger,
99
+ f"z_{i}")
100
+ )
101
+
102
+ # x_{ij} + x_{jk} - x_{ik} \le 1 & \forall i, j, k \in V \colon (i,j), (j,k), (i,k) \in E'
103
+ for i in data.G.nodes:
104
+ for j in data.edges[i]:
105
+ for k in data.edges[j]:
106
+ if k <= i:
107
+ continue
108
+
109
+ for el in range(3):
110
+ _varvalues = [1]*3
111
+ _varvalues[el] = -1
112
+ self.model.addRow(-inf, 1, 3,
113
+ np.array([
114
+ self._get_vardict_index(f'x_{i}_{j}'),
115
+ self._get_vardict_index(f'x_{j}_{k}'),
116
+ self._get_vardict_index(f'x_{i}_{k}')
117
+ ]),
118
+ np.array(_varvalues)
119
+ )
120
+
121
+ # \sum_{(i,j) \in E'} w_j x_{ij} \ge \left(F^{\min} - w_i\right) z_i & \forall i \in V
122
+ for i in data.G.nodes:
123
+ _vars = [self._get_vardict_index(f'z_{i}')]
124
+ _varvalues = [- self.data.Fmin + self.data.get_weight_node(i)]
125
+ for j in data.edges[i]:
126
+ _vars.append(self._get_vardict_index(f'x_{i}_{j}'))
127
+ _varvalues.append(self.data.get_weight_node(j))
128
+
129
+ self.model.addRow(0, +inf, len(_vars),
130
+ np.array(_vars),
131
+ np.array(_varvalues)
132
+ )
133
+
134
+ # \sum_{j \in V \colon j <= i} x_{ji} \le |V| (1-z_i) & \forall i \in V
135
+ for i in data.G.nodes:
136
+ _vars = [self._get_vardict_index(f'z_{i}')]
137
+ _varvalues = [self.data.V]
138
+ for j in data.G.nodes:
139
+ if j >=i:
140
+ continue
141
+
142
+ _vars.append(self._get_vardict_index(f'x_{j}_{i}'))
143
+ _varvalues.append(1)
144
+
145
+ self.model.addRow(-inf, self.data.V,
146
+ len(_vars),
147
+ np.array(_vars),
148
+ np.array(_varvalues)
149
+ )
150
+
151
+ # \sum_{j\in V \colon j <= i} x_{ji} + z_i \ge 1 & \forall i \in V
152
+ for i in data.G.nodes:
153
+ _vars = [self._get_vardict_index(f'z_{i}')]
154
+ _varvalues = [1]
155
+ for j in data.G.nodes:
156
+ if j >=i:
157
+ continue
158
+
159
+ _vars.append(self._get_vardict_index(f'x_{j}_{i}'))
160
+ _varvalues.append(1)
161
+
162
+ self.model.addRow(1, inf,
163
+ len(_vars),
164
+ np.array(_vars),
165
+ np.array(_varvalues)
166
+ )
167
+
168
+ # K^{\min} \le \sum_{i\in V} z_i \le K^{\max}
169
+ _vars = [self._get_vardict_index(f'z_{i}') for i in data.G.nodes]
170
+ _varvalues = np.ones(self.data.V)
171
+ self.model.addRow(self.data.Kmin, self.data.Kmax,
172
+ len(_vars),
173
+ np.array(_vars),
174
+ np.array(_varvalues)
175
+ )
176
+
177
+ # self.model.setCallback(lazy_SEC_constraint, self)
178
+ # self.model.startCallback(hscb.HighsCallbackType.kCallbackMipSolution)
179
+ # self.model.startCallback(hscb.HighsCallbackType.kCallbackMipDefineLazyConstraints)
180
+
181
+
182
+ def _add_variable_dict_element(self, name):
183
+ self.vardict[name] = len(self.vardict)
184
+
185
+ def _add_variable(self, lb, ub, obj, vartype, name):
186
+ el = self.model.addVariable(lb, ub,
187
+ obj = obj,
188
+ type=vartype,
189
+ name=name)
190
+ self._add_variable_dict_element(name)
191
+
192
+ return el
193
+
194
+ def _get_vardict_index(self, name):
195
+ return self.vardict[name]
196
+
197
+ def run(self, timelimit=60, label_sol = None):
198
+
199
+ self.solution = None
200
+ self.model.setOptionValue("time_limit",
201
+ #highspy.HighsOptions.time_limit,
202
+ timelimit/5)
203
+
204
+ if label_sol is None:
205
+ try:
206
+ agglom_clust = AgglomerativeClustering(linkage='average',
207
+ metric='precomputed',
208
+ n_clusters=self.data.Kmax).fit(self.data.node_pairs_costs_df)
209
+
210
+ label_sol = agglom_clust.labels_
211
+ except:
212
+ logging.debug("AgglomaritveClustering failed!", self.log_fout)
213
+ pass
214
+
215
+ if label_sol is not None:
216
+ xwarmstart = []
217
+ for el in np.unique(label_sol):
218
+ areas = np.where(label_sol==el)[0]
219
+ xwarmstart.append(self._get_vardict_index(f'z_{areas[0]}'))
220
+ for i, _el in enumerate(areas):
221
+ for j in range(i+1, areas.shape[0]):
222
+ xwarmstart.append(self._get_vardict_index(f'x_{_el}_{areas[j]}'))
223
+
224
+ xwarmstart = np.array(xwarmstart, dtype=int)
225
+ self.model.setSolution(xwarmstart.shape[0], xwarmstart, np.ones(xwarmstart.shape[0]))
226
+
227
+ start_time = time.time()
228
+
229
+ while True:
230
+ self.solution = None
231
+ self.run_status = self.model.run()
232
+
233
+ self.get_solution(run_model=False)
234
+
235
+ if self.solution is None:
236
+ break
237
+
238
+ if time.time() - start_time > timelimit:
239
+ break
240
+
241
+ if not self.solution.is_feasible and len(self.solution.unconnected_clusters) > 0:
242
+ # costruisci gomory-hu su G con i valori delle x
243
+ # per ogni cluster disconnesso:
244
+ # per ogni coppia di nodi
245
+
246
+ xvarvaluedict = dict()
247
+ allvars = self.model.getVariables()
248
+
249
+ for e in self.data.G.edges:
250
+ xvarvaluedict[e] = self.model.variableValue(allvars[self._get_vardict_index(f'x_{e[0]}_{e[1]}')])
251
+
252
+ self.data.gomory_hu(xvarvaluedict)
253
+
254
+ for cl in self.solution.unconnected_clusters:
255
+ # for u, v in zip(cl, cl[1:]):
256
+ for i, u in enumerate(cl):
257
+ for j in range(i+1, len(cl)):
258
+ v = cl[j]
259
+
260
+ if (u, v) in self.data.G.edges:
261
+ continue
262
+
263
+ _, (S, T) = self.data.minimum_edge_weight_in_shortest_path(u, v)
264
+
265
+ # sum_{(s,t) in E : s in S, t in T x_{st} >= x_{ij}
266
+ # forall cut [S,T] : i in S, j in T
267
+ _vars = []
268
+ _varvalues = [-1]
269
+
270
+ if u < v:
271
+ _vars.append(self._get_vardict_index(f'x_{u}_{v}'))
272
+ elif u > v:
273
+ _vars.append(self._get_vardict_index(f'x_{v}_{u}'))
274
+
275
+ for s in S:
276
+ for t in T:
277
+ if (s,t) in self.data.G.edges:
278
+ if s < t:
279
+ _vars.append(self._get_vardict_index(f'x_{s}_{t}'))
280
+ else:
281
+ _vars.append(self._get_vardict_index(f'x_{t}_{s}'))
282
+ _varvalues.append(1)
283
+
284
+ # print_log("adding new cut" )
285
+ self.model.addRow(0, +highspy.kHighsInf,
286
+ len(_vars),
287
+ np.array(_vars),
288
+ np.array(_varvalues)
289
+ )
290
+
291
+ # self.model.writeModel("/home/marco/Desktop/prova.lp")
292
+ else:
293
+ break
294
+
295
+ return self.run_status
296
+
297
+ def _build_solution(self):
298
+ self.solution = AdalinaZoningSolution(self.data)
299
+ allvars = self.model.getVariables()
300
+
301
+ _repr = []
302
+ for varname, varindex in self.vardict.items():
303
+
304
+ val = self.model.variableValue(allvars[varindex])
305
+ if val < 1e-5:
306
+ continue
307
+
308
+ l = varname.split("_")
309
+
310
+ if l[0].startswith('x'):
311
+ self.solution.add_edge(int(l[1]), int(l[2]))
312
+ elif l[0].startswith('z'):
313
+ _repr.append(int(l[1]))
314
+
315
+ for r in _repr:
316
+ self.solution.add_repr(r)
317
+
318
+ return self.solution.check_feasibility()
319
+
320
+ def get_solution(self, run_model=True):
321
+
322
+ if self.solution is not None:
323
+ return self.solution
324
+
325
+ info = self.model.getInfo()
326
+ if (self.run_status == highspy.HighsStatus.kOk or
327
+ (self.run_status == highspy.HighsStatus.kWarning and
328
+ info.primal_solution_status == highspy.SolutionStatus.kSolutionStatusFeasible)):
329
+
330
+ logging.debug(f"obj. fun. value {self.model.getObjectiveValue()}")
331
+
332
+ if self._build_solution():
333
+ logging.debug("solution feasible!")
334
+
335
+ else:
336
+ logging.debug("model solved but solution infeasible!")
337
+
338
+ return self.solution
339
+
340
+ elif self.run_status == highspy.HighsStatus.kError:
341
+
342
+ logging.debug("no solution available. model ended with Error")
343
+ return None
344
+
345
+ if self.solution is None and run_model:
346
+
347
+ self.run()
348
+ return self.get_solution(run_model=False)
349
+
350
+ logging.debug("no solution available. run model with function run()")
351
+ return None