huff 1.1.2__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- huff/gistools.py +123 -3
- huff/models.py +925 -134
- huff/ors.py +16 -16
- huff/osm.py +207 -0
- huff/tests/data/Wieland2015.xlsx +0 -0
- huff/tests/tests_huff.py +146 -41
- {huff-1.1.2.dist-info → huff-1.3.0.dist-info}/METADATA +31 -11
- {huff-1.1.2.dist-info → huff-1.3.0.dist-info}/RECORD +10 -8
- {huff-1.1.2.dist-info → huff-1.3.0.dist-info}/WHEEL +0 -0
- {huff-1.1.2.dist-info → huff-1.3.0.dist-info}/top_level.txt +0 -0
huff/models.py
CHANGED
@@ -4,8 +4,8 @@
|
|
4
4
|
# Author: Thomas Wieland
|
5
5
|
# ORCID: 0000-0001-5168-9846
|
6
6
|
# mail: geowieland@googlemail.com
|
7
|
-
# Version: 1.
|
8
|
-
# Last update: 2025-05-
|
7
|
+
# Version: 1.3.0
|
8
|
+
# Last update: 2025-05-22 05:45
|
9
9
|
# Copyright (c) 2025 Thomas Wieland
|
10
10
|
#-----------------------------------------------------------------------
|
11
11
|
|
@@ -14,8 +14,11 @@ import pandas as pd
|
|
14
14
|
import geopandas as gp
|
15
15
|
import numpy as np
|
16
16
|
import time
|
17
|
+
from statsmodels.formula.api import ols
|
18
|
+
from shapely.geometry import Point
|
19
|
+
from shapely import wkt
|
17
20
|
from huff.ors import Client, TimeDistanceMatrix, Isochrone
|
18
|
-
from huff.gistools import overlay_difference, distance_matrix
|
21
|
+
from huff.gistools import overlay_difference, distance_matrix, buffers
|
19
22
|
|
20
23
|
|
21
24
|
class CustomerOrigins:
|
@@ -24,12 +27,16 @@ class CustomerOrigins:
|
|
24
27
|
self,
|
25
28
|
geodata_gpd,
|
26
29
|
geodata_gpd_original,
|
27
|
-
metadata
|
30
|
+
metadata,
|
31
|
+
isochrones_gdf,
|
32
|
+
buffers_gdf
|
28
33
|
):
|
29
34
|
|
30
35
|
self.geodata_gpd = geodata_gpd
|
31
36
|
self.geodata_gpd_original = geodata_gpd_original
|
32
37
|
self.metadata = metadata
|
38
|
+
self.isochrones_gdf = isochrones_gdf
|
39
|
+
self.buffers_gdf = buffers_gdf
|
33
40
|
|
34
41
|
def get_geodata_gpd(self):
|
35
42
|
|
@@ -42,6 +49,14 @@ class CustomerOrigins:
|
|
42
49
|
def get_metadata(self):
|
43
50
|
|
44
51
|
return self.metadata
|
52
|
+
|
53
|
+
def get_isochrones(self):
|
54
|
+
|
55
|
+
return self.isochrones_gdf
|
56
|
+
|
57
|
+
def get_buffers(self):
|
58
|
+
|
59
|
+
return self.buffers_gdf
|
45
60
|
|
46
61
|
def summary(self):
|
47
62
|
|
@@ -63,6 +78,16 @@ class CustomerOrigins:
|
|
63
78
|
print("Unique ID column " + metadata["unique_id"])
|
64
79
|
print("Input CRS " + str(metadata["crs_input"]))
|
65
80
|
|
81
|
+
if self.isochrones_gdf is None:
|
82
|
+
print("Including isochrones NO")
|
83
|
+
else:
|
84
|
+
print("Including isochrones YES")
|
85
|
+
|
86
|
+
if self.buffers_gdf is None:
|
87
|
+
print("Including buffers NO")
|
88
|
+
else:
|
89
|
+
print("Including buffers YES")
|
90
|
+
|
66
91
|
return metadata
|
67
92
|
|
68
93
|
def define_marketsize(
|
@@ -97,27 +122,109 @@ class CustomerOrigins:
|
|
97
122
|
|
98
123
|
return self
|
99
124
|
|
125
|
+
def isochrones(
|
126
|
+
self,
|
127
|
+
segments_minutes: list = [5, 10, 15],
|
128
|
+
range_type: str = "time",
|
129
|
+
intersections: str = "true",
|
130
|
+
profile: str = "driving-car",
|
131
|
+
donut: bool = True,
|
132
|
+
ors_server: str = "https://api.openrouteservice.org/v2/",
|
133
|
+
ors_auth: str = None,
|
134
|
+
timeout: int = 10,
|
135
|
+
delay: int = 1,
|
136
|
+
save_output: bool = True,
|
137
|
+
output_filepath: str = "customer_origins_isochrones.shp",
|
138
|
+
output_crs: str = "EPSG:4326"
|
139
|
+
):
|
140
|
+
|
141
|
+
geodata_gpd = self.get_geodata_gpd()
|
142
|
+
metadata = self.get_metadata()
|
143
|
+
|
144
|
+
isochrones_gdf = get_isochrones(
|
145
|
+
geodata_gpd = geodata_gpd,
|
146
|
+
unique_id_col = metadata["unique_id"],
|
147
|
+
segments_minutes = segments_minutes,
|
148
|
+
range_type = range_type,
|
149
|
+
intersections = intersections,
|
150
|
+
profile = profile,
|
151
|
+
donut = donut,
|
152
|
+
ors_server = ors_server,
|
153
|
+
ors_auth = ors_auth,
|
154
|
+
timeout = timeout,
|
155
|
+
delay = delay,
|
156
|
+
save_output = save_output,
|
157
|
+
output_filepath = output_filepath,
|
158
|
+
output_crs = output_crs
|
159
|
+
)
|
160
|
+
|
161
|
+
self.isochrones_gdf = isochrones_gdf
|
162
|
+
|
163
|
+
return self
|
164
|
+
|
165
|
+
def buffers(
|
166
|
+
self,
|
167
|
+
segments_distance: list = [500, 1000],
|
168
|
+
donut: bool = True,
|
169
|
+
save_output: bool = True,
|
170
|
+
output_filepath: str = "customer_origins_buffers.shp",
|
171
|
+
output_crs: str = "EPSG:4326"
|
172
|
+
):
|
173
|
+
|
174
|
+
geodata_gpd_original = self.get_geodata_gpd_original()
|
175
|
+
metadata = self.metadata
|
176
|
+
|
177
|
+
buffers_gdf = buffers(
|
178
|
+
point_gdf = geodata_gpd_original,
|
179
|
+
unique_id_col = metadata["unique_id"],
|
180
|
+
distances = segments_distance,
|
181
|
+
donut = donut,
|
182
|
+
save_output = save_output,
|
183
|
+
output_filepath = output_filepath,
|
184
|
+
output_crs = output_crs
|
185
|
+
)
|
186
|
+
|
187
|
+
self.buffers_gdf = buffers_gdf
|
188
|
+
|
189
|
+
return self
|
190
|
+
|
191
|
+
|
100
192
|
class SupplyLocations:
|
101
193
|
|
102
194
|
def __init__(
|
103
195
|
self,
|
104
196
|
geodata_gpd,
|
105
197
|
geodata_gpd_original,
|
106
|
-
metadata
|
198
|
+
metadata,
|
199
|
+
isochrones_gdf,
|
200
|
+
buffers_gdf
|
107
201
|
):
|
108
202
|
|
109
203
|
self.geodata_gpd = geodata_gpd
|
110
204
|
self.geodata_gpd_original = geodata_gpd_original
|
111
205
|
self.metadata = metadata
|
206
|
+
self.isochrones_gdf = isochrones_gdf
|
207
|
+
self.buffers_gdf = buffers_gdf
|
112
208
|
|
113
209
|
def get_geodata_gpd(self):
|
210
|
+
|
114
211
|
return self.geodata_gpd
|
115
212
|
|
116
213
|
def get_geodata_gpd_original(self):
|
214
|
+
|
117
215
|
return self.geodata_gpd_original
|
118
216
|
|
119
217
|
def get_metadata(self):
|
218
|
+
|
120
219
|
return self.metadata
|
220
|
+
|
221
|
+
def get_isochrones_gdf(self):
|
222
|
+
|
223
|
+
return self.isochrones_gdf
|
224
|
+
|
225
|
+
def get_buffers_gdf(self):
|
226
|
+
|
227
|
+
return self.buffers_gdf
|
121
228
|
|
122
229
|
def summary(self):
|
123
230
|
|
@@ -139,6 +246,11 @@ class SupplyLocations:
|
|
139
246
|
print("Unique ID column " + metadata["unique_id"])
|
140
247
|
print("Input CRS " + str(metadata["crs_input"]))
|
141
248
|
|
249
|
+
if self.isochrones_gdf is None:
|
250
|
+
print("Including isochrones NO")
|
251
|
+
else:
|
252
|
+
print("Including isochrones YES")
|
253
|
+
|
142
254
|
return metadata
|
143
255
|
|
144
256
|
def define_attraction(
|
@@ -239,86 +351,70 @@ class SupplyLocations:
|
|
239
351
|
|
240
352
|
def isochrones(
|
241
353
|
self,
|
242
|
-
|
354
|
+
segments_minutes: list = [5, 10, 15],
|
243
355
|
range_type: str = "time",
|
244
356
|
intersections: str = "true",
|
245
357
|
profile: str = "driving-car",
|
246
358
|
donut: bool = True,
|
247
359
|
ors_server: str = "https://api.openrouteservice.org/v2/",
|
248
360
|
ors_auth: str = None,
|
249
|
-
timeout = 10,
|
250
|
-
delay = 1,
|
361
|
+
timeout: int = 10,
|
362
|
+
delay: int = 1,
|
251
363
|
save_output: bool = True,
|
252
|
-
output_filepath: str = "
|
364
|
+
output_filepath: str = "supply_locations_isochrones.shp",
|
253
365
|
output_crs: str = "EPSG:4326"
|
254
366
|
):
|
255
367
|
|
256
368
|
geodata_gpd = self.get_geodata_gpd()
|
257
369
|
metadata = self.get_metadata()
|
258
370
|
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
371
|
+
isochrones_gdf = get_isochrones(
|
372
|
+
geodata_gpd = geodata_gpd,
|
373
|
+
unique_id_col = metadata["unique_id"],
|
374
|
+
segments_minutes = segments_minutes,
|
375
|
+
range_type = range_type,
|
376
|
+
intersections = intersections,
|
377
|
+
profile = profile,
|
378
|
+
donut = donut,
|
379
|
+
ors_server = ors_server,
|
380
|
+
ors_auth = ors_auth,
|
381
|
+
timeout = timeout,
|
382
|
+
delay = delay,
|
383
|
+
save_output = save_output,
|
384
|
+
output_filepath = output_filepath,
|
385
|
+
output_crs = output_crs
|
267
386
|
)
|
268
|
-
|
269
|
-
isochrones_gdf = gp.GeoDataFrame(columns=[unique_id_col, "geometry"])
|
270
|
-
|
271
|
-
i = 0
|
272
387
|
|
273
|
-
|
274
|
-
|
275
|
-
isochrone_output = ors_client.isochrone(
|
276
|
-
locations = [[x, y]],
|
277
|
-
segments = segments,
|
278
|
-
range_type = range_type,
|
279
|
-
intersections = intersections,
|
280
|
-
profile = profile,
|
281
|
-
timeout = timeout,
|
282
|
-
save_output = False,
|
283
|
-
output_crs = output_crs
|
284
|
-
)
|
285
|
-
|
286
|
-
if isochrone_output.status_code != 200:
|
287
|
-
continue
|
288
|
-
|
289
|
-
isochrone_gdf = isochrone_output.get_isochrones_gdf()
|
290
|
-
|
291
|
-
if donut:
|
292
|
-
isochrone_gdf = overlay_difference(
|
293
|
-
polygon_gdf = isochrone_gdf,
|
294
|
-
sort_col = "segment"
|
295
|
-
)
|
296
|
-
|
297
|
-
time.sleep(delay)
|
298
|
-
|
299
|
-
isochrone_gdf[unique_id_col] = unique_id_values[i]
|
300
|
-
|
301
|
-
isochrones_gdf = pd.concat(
|
302
|
-
[
|
303
|
-
isochrones_gdf,
|
304
|
-
isochrone_gdf
|
305
|
-
],
|
306
|
-
ignore_index=True
|
307
|
-
)
|
308
|
-
|
309
|
-
i = i+1
|
388
|
+
self.isochrones_gdf = isochrones_gdf
|
310
389
|
|
311
|
-
|
312
|
-
output_crs,
|
313
|
-
allow_override=True,
|
314
|
-
inplace=True
|
315
|
-
)
|
316
|
-
|
317
|
-
if save_output:
|
390
|
+
return self
|
318
391
|
|
319
|
-
|
392
|
+
def buffers(
|
393
|
+
self,
|
394
|
+
segments_distance: list = [500, 1000],
|
395
|
+
donut: bool = True,
|
396
|
+
save_output: bool = True,
|
397
|
+
output_filepath: str = "supply_locations_buffers.shp",
|
398
|
+
output_crs: str = "EPSG:4326"
|
399
|
+
):
|
320
400
|
|
321
|
-
|
401
|
+
geodata_gpd_original = self.get_geodata_gpd_original()
|
402
|
+
metadata = self.metadata
|
403
|
+
|
404
|
+
buffers_gdf = buffers(
|
405
|
+
point_gdf = geodata_gpd_original,
|
406
|
+
unique_id_col = metadata["unique_id"],
|
407
|
+
distances = segments_distance,
|
408
|
+
donut = donut,
|
409
|
+
save_output = save_output,
|
410
|
+
output_filepath = output_filepath,
|
411
|
+
output_crs = output_crs
|
412
|
+
)
|
413
|
+
|
414
|
+
self.buffers_gdf = buffers_gdf
|
415
|
+
|
416
|
+
return self
|
417
|
+
|
322
418
|
|
323
419
|
class InteractionMatrix:
|
324
420
|
|
@@ -360,15 +456,16 @@ class InteractionMatrix:
|
|
360
456
|
else:
|
361
457
|
print("Market size column " + customer_origins_metadata["marketsize_col"])
|
362
458
|
print("----------------------------------")
|
363
|
-
print("
|
459
|
+
print("Partial utilities")
|
460
|
+
print(" Weights")
|
364
461
|
if supply_locations_metadata["weighting"][0]["func"] is None and supply_locations_metadata["weighting"][0]["param"] is None:
|
365
|
-
print("
|
462
|
+
print("Attraction not defined")
|
366
463
|
else:
|
367
|
-
print("
|
464
|
+
print("Attraction " + str(supply_locations_metadata["weighting"][0]["param"]) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
|
368
465
|
if customer_origins_metadata["weighting"][0]["func"] is None and customer_origins_metadata["weighting"][0]["param"] is None:
|
369
|
-
print("
|
466
|
+
print("Transport costs not defined")
|
370
467
|
else:
|
371
|
-
print("
|
468
|
+
print("Transport costs " + str(customer_origins_metadata["weighting"][0]["param"]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
372
469
|
print("----------------------------------")
|
373
470
|
|
374
471
|
def transport_costs(
|
@@ -431,14 +528,10 @@ class InteractionMatrix:
|
|
431
528
|
range_type = transport_costs_matrix_config["range_type"]
|
432
529
|
|
433
530
|
transport_costs_matrix["source"] = transport_costs_matrix["source"].astype(int)
|
434
|
-
transport_costs_matrix["source"] = transport_costs_matrix["source"].map(
|
435
|
-
dict(enumerate(customer_origins_ids))
|
436
|
-
)
|
531
|
+
transport_costs_matrix["source"] = transport_costs_matrix["source"].map(dict(enumerate(customer_origins_ids)))
|
437
532
|
|
438
533
|
transport_costs_matrix["destination"] = transport_costs_matrix["destination"].astype(int)
|
439
|
-
transport_costs_matrix["destination"] = transport_costs_matrix["destination"].map(
|
440
|
-
dict(enumerate(supply_locations_ids))
|
441
|
-
)
|
534
|
+
transport_costs_matrix["destination"] = transport_costs_matrix["destination"].map(dict(enumerate(supply_locations_ids)))
|
442
535
|
|
443
536
|
transport_costs_matrix["source_destination"] = transport_costs_matrix["source"].astype(str)+"_"+transport_costs_matrix["destination"].astype(str)
|
444
537
|
transport_costs_matrix = transport_costs_matrix[["source_destination", range_type]]
|
@@ -506,11 +599,11 @@ class InteractionMatrix:
|
|
506
599
|
if attraction_weighting["func"] == "power":
|
507
600
|
interaction_matrix_df["A_j_weighted"] = interaction_matrix_df["A_j"] ** attraction_weighting["param"]
|
508
601
|
elif tc_weighting["func"] == "exponential":
|
509
|
-
interaction_matrix_df["A_j_weighted"] = np.exp(attraction_weighting["param"] * interaction_matrix_df[
|
602
|
+
interaction_matrix_df["A_j_weighted"] = np.exp(attraction_weighting["param"] * interaction_matrix_df["A_j"])
|
510
603
|
else:
|
511
604
|
raise ValueError ("Attraction weighting is not defined.")
|
512
605
|
|
513
|
-
interaction_matrix_df["U_ij"] = interaction_matrix_df["A_j_weighted"]
|
606
|
+
interaction_matrix_df["U_ij"] = interaction_matrix_df["A_j_weighted"]*interaction_matrix_df["t_ij_weighted"]
|
514
607
|
|
515
608
|
interaction_matrix_df = interaction_matrix_df.drop(columns=['A_j_weighted', 't_ij_weighted'])
|
516
609
|
|
@@ -591,13 +684,11 @@ class InteractionMatrix:
|
|
591
684
|
cols: list = ["A_j", "t_ij"]
|
592
685
|
):
|
593
686
|
|
594
|
-
""" MCI model log-centering transformation """
|
595
|
-
|
596
687
|
cols = cols + ["p_ij"]
|
597
688
|
|
598
689
|
interaction_matrix_df = self.interaction_matrix_df
|
599
690
|
|
600
|
-
interaction_matrix_df =
|
691
|
+
interaction_matrix_df = log_centering_transformation(
|
601
692
|
df = interaction_matrix_df,
|
602
693
|
ref_col = "i",
|
603
694
|
cols = cols
|
@@ -607,6 +698,87 @@ class InteractionMatrix:
|
|
607
698
|
|
608
699
|
return self
|
609
700
|
|
701
|
+
def mci_fit(
|
702
|
+
self,
|
703
|
+
cols: list = ["A_j", "t_ij"],
|
704
|
+
alpha = 0.05
|
705
|
+
):
|
706
|
+
|
707
|
+
supply_locations = self.get_supply_locations()
|
708
|
+
supply_locations_metadata = supply_locations.get_metadata()
|
709
|
+
|
710
|
+
customer_origins = self.get_customer_origins()
|
711
|
+
customer_origins_metadata = customer_origins.get_metadata()
|
712
|
+
|
713
|
+
interaction_matrix_df = self.get_interaction_matrix_df()
|
714
|
+
|
715
|
+
cols_t = [col + "__LCT" for col in cols]
|
716
|
+
|
717
|
+
if "p_ij__LCT" not in interaction_matrix_df.columns:
|
718
|
+
interaction_matrix = self.mci_transformation(
|
719
|
+
cols = cols
|
720
|
+
)
|
721
|
+
interaction_matrix_df = self.get_interaction_matrix_df()
|
722
|
+
|
723
|
+
mci_formula = f'p_ij__LCT ~ {" + ".join(cols_t)} -1'
|
724
|
+
|
725
|
+
mci_ols_model = ols(mci_formula, data = interaction_matrix_df).fit()
|
726
|
+
|
727
|
+
mci_ols_coefficients = mci_ols_model.params
|
728
|
+
mci_ols_coef_standarderrors = mci_ols_model.bse
|
729
|
+
mci_ols_coef_t = mci_ols_model.tvalues
|
730
|
+
mci_ols_coef_p = mci_ols_model.pvalues
|
731
|
+
mci_ols_coef_ci = mci_ols_model.conf_int(alpha = alpha)
|
732
|
+
|
733
|
+
coefs = {}
|
734
|
+
for i, col in enumerate(cols_t):
|
735
|
+
coefs[i] = {
|
736
|
+
"Coefficient": col[:-5],
|
737
|
+
"Estimate": float(mci_ols_coefficients[col]),
|
738
|
+
"SE": float(mci_ols_coef_standarderrors[col]),
|
739
|
+
"t": float(mci_ols_coef_t[col]),
|
740
|
+
"p": float(mci_ols_coef_p[col]),
|
741
|
+
"CI_lower": float(mci_ols_coef_ci.loc[col, 0]),
|
742
|
+
"CI_upper": float(mci_ols_coef_ci.loc[col, 1]),
|
743
|
+
}
|
744
|
+
|
745
|
+
customer_origins_metadata["weighting"][0] = {
|
746
|
+
"func": "power",
|
747
|
+
"param": mci_ols_coefficients["t_ij__LCT"]
|
748
|
+
}
|
749
|
+
|
750
|
+
coefs2 = coefs.copy()
|
751
|
+
for key, value in list(coefs2.items()):
|
752
|
+
if value["Coefficient"] == "t_ij":
|
753
|
+
del coefs2[key]
|
754
|
+
|
755
|
+
for key, value in coefs2.items():
|
756
|
+
supply_locations_metadata["weighting"][key] = {
|
757
|
+
"func": "power",
|
758
|
+
"param": value["Estimate"]
|
759
|
+
}
|
760
|
+
|
761
|
+
supply_locations_metadata["attraction_col"].append(None)
|
762
|
+
supply_locations_metadata["attraction_col"][key] = value["Coefficient"]
|
763
|
+
|
764
|
+
customer_origins.metadata = customer_origins_metadata
|
765
|
+
supply_locations.metadata = supply_locations_metadata
|
766
|
+
interaction_matrix = InteractionMatrix(
|
767
|
+
interaction_matrix_df,
|
768
|
+
customer_origins,
|
769
|
+
supply_locations
|
770
|
+
)
|
771
|
+
|
772
|
+
mci_model = MCIModel(
|
773
|
+
interaction_matrix,
|
774
|
+
coefs,
|
775
|
+
mci_ols_model,
|
776
|
+
None
|
777
|
+
)
|
778
|
+
|
779
|
+
return mci_model
|
780
|
+
|
781
|
+
|
610
782
|
class HuffModel:
|
611
783
|
|
612
784
|
def __init__(
|
@@ -662,19 +834,305 @@ class HuffModel:
|
|
662
834
|
else:
|
663
835
|
print("Market size column " + customer_origins_metadata["marketsize_col"])
|
664
836
|
print("----------------------------------")
|
665
|
-
print("
|
837
|
+
print("Partial utilities")
|
838
|
+
print(" Weights")
|
666
839
|
if supply_locations_metadata["weighting"][0]["func"] is None and supply_locations_metadata["weighting"][0]["param"] is None:
|
667
|
-
print("
|
840
|
+
print("Attraction not defined")
|
668
841
|
else:
|
669
|
-
print("
|
842
|
+
print("Attraction " + str(supply_locations_metadata["weighting"][0]["param"]) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
|
670
843
|
if customer_origins_metadata["weighting"][0]["func"] is None and customer_origins_metadata["weighting"][0]["param"] is None:
|
671
|
-
print("
|
844
|
+
print("Transport costs not defined")
|
672
845
|
else:
|
673
|
-
print("
|
846
|
+
print("Transport costs " + str(customer_origins_metadata["weighting"][0]["param"]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
674
847
|
print("----------------------------------")
|
675
|
-
|
848
|
+
|
849
|
+
def mci_fit(
|
850
|
+
self,
|
851
|
+
cols: list = ["A_j", "t_ij"],
|
852
|
+
alpha = 0.05
|
853
|
+
):
|
854
|
+
|
855
|
+
interaction_matrix = self.interaction_matrix
|
856
|
+
|
857
|
+
supply_locations = interaction_matrix.get_supply_locations()
|
858
|
+
supply_locations_metadata = supply_locations.get_metadata()
|
859
|
+
|
860
|
+
customer_origins = interaction_matrix.get_customer_origins()
|
861
|
+
customer_origins_metadata = customer_origins.get_metadata()
|
862
|
+
|
863
|
+
interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
|
864
|
+
|
865
|
+
cols_t = [col + "__LCT" for col in cols]
|
866
|
+
|
867
|
+
if "p_ij__LCT" not in interaction_matrix_df.columns:
|
868
|
+
interaction_matrix = interaction_matrix.mci_transformation(
|
869
|
+
cols = cols
|
870
|
+
)
|
871
|
+
interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
|
872
|
+
|
873
|
+
mci_formula = f'p_ij__LCT ~ {" + ".join(cols_t)} -1'
|
874
|
+
|
875
|
+
mci_ols_model = ols(mci_formula, data = interaction_matrix_df).fit()
|
876
|
+
|
877
|
+
mci_ols_coefficients = mci_ols_model.params
|
878
|
+
mci_ols_coef_standarderrors = mci_ols_model.bse
|
879
|
+
mci_ols_coef_t = mci_ols_model.tvalues
|
880
|
+
mci_ols_coef_p = mci_ols_model.pvalues
|
881
|
+
mci_ols_coef_ci = mci_ols_model.conf_int(alpha = alpha)
|
882
|
+
|
883
|
+
coefs = {}
|
884
|
+
for i, col in enumerate(cols_t):
|
885
|
+
coefs[i] = {
|
886
|
+
"Coefficient": col[:-5],
|
887
|
+
"Estimate": float(mci_ols_coefficients[col]),
|
888
|
+
"SE": float(mci_ols_coef_standarderrors[col]),
|
889
|
+
"t": float(mci_ols_coef_t[col]),
|
890
|
+
"p": float(mci_ols_coef_p[col]),
|
891
|
+
"CI_lower": float(mci_ols_coef_ci.loc[col, 0]),
|
892
|
+
"CI_upper": float(mci_ols_coef_ci.loc[col, 1]),
|
893
|
+
}
|
894
|
+
|
895
|
+
customer_origins_metadata["weighting"][0] = {
|
896
|
+
"func": "power",
|
897
|
+
"param": mci_ols_coefficients["t_ij__LCT"]
|
898
|
+
}
|
899
|
+
|
900
|
+
coefs2 = coefs.copy()
|
901
|
+
for key, value in list(coefs2.items()):
|
902
|
+
if value["Coefficient"] == "t_ij":
|
903
|
+
del coefs2[key]
|
904
|
+
|
905
|
+
for key, value in coefs2.items():
|
906
|
+
supply_locations_metadata["weighting"][(key)] = {
|
907
|
+
"func": "power",
|
908
|
+
"param": value["Estimate"]
|
909
|
+
}
|
910
|
+
supply_locations_metadata["attraction_col"][key] = value["Coefficient"]
|
911
|
+
|
912
|
+
customer_origins.metadata = customer_origins_metadata
|
913
|
+
supply_locations.metadata = supply_locations_metadata
|
914
|
+
interaction_matrix = InteractionMatrix(
|
915
|
+
interaction_matrix_df,
|
916
|
+
customer_origins,
|
917
|
+
supply_locations
|
918
|
+
)
|
919
|
+
|
920
|
+
mci_model = MCIModel(
|
921
|
+
interaction_matrix,
|
922
|
+
coefs,
|
923
|
+
mci_ols_model,
|
924
|
+
None
|
925
|
+
)
|
926
|
+
|
927
|
+
return mci_model
|
928
|
+
|
929
|
+
|
930
|
+
class MCIModel:
|
931
|
+
|
932
|
+
def __init__(
|
933
|
+
self,
|
934
|
+
interaction_matrix: InteractionMatrix,
|
935
|
+
coefs: dict,
|
936
|
+
mci_ols_model,
|
937
|
+
market_areas_df
|
938
|
+
):
|
939
|
+
|
940
|
+
self.interaction_matrix = interaction_matrix
|
941
|
+
self.coefs = coefs
|
942
|
+
self.mci_ols_model = mci_ols_model
|
943
|
+
self.market_areas_df = market_areas_df
|
944
|
+
|
945
|
+
def get_interaction_matrix_df(self):
|
946
|
+
|
947
|
+
interaction_matrix = self.interaction_matrix
|
948
|
+
interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
|
949
|
+
|
950
|
+
return interaction_matrix_df
|
951
|
+
|
952
|
+
def get_supply_locations(self):
|
953
|
+
|
954
|
+
interaction_matrix = self.interaction_matrix
|
955
|
+
supply_locations = interaction_matrix.get_supply_locations()
|
956
|
+
|
957
|
+
return supply_locations
|
958
|
+
|
959
|
+
def get_customer_origins(self):
|
960
|
+
|
961
|
+
interaction_matrix = self.interaction_matrix
|
962
|
+
customer_origins = interaction_matrix.get_customer_origins()
|
963
|
+
|
964
|
+
return customer_origins
|
965
|
+
|
966
|
+
def get_mci_ols_model(self):
|
967
|
+
|
968
|
+
return self.mci_ols_model
|
969
|
+
|
970
|
+
def get_coefs_dict(self):
|
971
|
+
|
972
|
+
return self.coefs
|
973
|
+
|
974
|
+
def get_market_areas_df(self):
|
975
|
+
|
976
|
+
return self.market_areas_df
|
977
|
+
|
978
|
+
def summary(self):
|
979
|
+
|
980
|
+
interaction_matrix = self.interaction_matrix
|
981
|
+
coefs = self.coefs
|
982
|
+
|
983
|
+
customer_origins_metadata = interaction_matrix.get_customer_origins().get_metadata()
|
984
|
+
supply_locations_metadata = interaction_matrix.get_supply_locations().get_metadata()
|
985
|
+
|
986
|
+
print("Multiplicative Competitive Interaction Model")
|
987
|
+
print("--------------------------------------------")
|
988
|
+
print("Supply locations " + str(supply_locations_metadata["no_points"]))
|
989
|
+
print("Customer origins " + str(customer_origins_metadata["no_points"]))
|
990
|
+
print("--------------------------------------------")
|
991
|
+
print("Partial utilities")
|
992
|
+
|
993
|
+
coefficients_rows = []
|
994
|
+
for key, value in coefs.items():
|
995
|
+
coefficient_name = value["Coefficient"]
|
996
|
+
if coefficient_name == "A_j":
|
997
|
+
coefficient_name = "Attraction"
|
998
|
+
if coefficient_name == "t_ij":
|
999
|
+
coefficient_name = "Transport costs"
|
1000
|
+
coefficients_rows.append({
|
1001
|
+
"": coefficient_name,
|
1002
|
+
"Estimate": round(value["Estimate"], 3),
|
1003
|
+
"SE": round(value["SE"], 3),
|
1004
|
+
"t": round(value["t"], 3),
|
1005
|
+
"p": round(value["p"], 3),
|
1006
|
+
"CI lower": round(value["CI_lower"], 3),
|
1007
|
+
"CI upper": round(value["CI_upper"], 3)
|
1008
|
+
})
|
1009
|
+
coefficients_df = pd.DataFrame(coefficients_rows)
|
1010
|
+
|
1011
|
+
print (coefficients_df)
|
1012
|
+
|
1013
|
+
print("--------------------------------------------")
|
1014
|
+
|
1015
|
+
def utility(
|
1016
|
+
self,
|
1017
|
+
transformation = "LCT"
|
1018
|
+
):
|
1019
|
+
|
1020
|
+
interaction_matrix = self.interaction_matrix
|
1021
|
+
|
1022
|
+
interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
|
1023
|
+
|
1024
|
+
if interaction_matrix_df["t_ij"].isna().all():
|
1025
|
+
raise ValueError ("Transport cost variable is not defined")
|
1026
|
+
if interaction_matrix_df["A_j"].isna().all():
|
1027
|
+
raise ValueError ("Attraction variable is not defined")
|
1028
|
+
|
1029
|
+
check_vars(
|
1030
|
+
df = interaction_matrix_df,
|
1031
|
+
cols = ["A_j", "t_ij"]
|
1032
|
+
)
|
1033
|
+
|
1034
|
+
customer_origins_metadata = interaction_matrix.get_customer_origins().get_metadata()
|
1035
|
+
|
1036
|
+
t_ij_weighting = customer_origins_metadata["weighting"][0]["param"]
|
1037
|
+
|
1038
|
+
if transformation == "ILCT":
|
1039
|
+
mci_formula = f"{t_ij_weighting}*t_ij"
|
1040
|
+
else:
|
1041
|
+
mci_formula = f"t_ij**{t_ij_weighting}"
|
1042
|
+
|
1043
|
+
supply_locations_metadata = interaction_matrix.get_supply_locations().get_metadata()
|
1044
|
+
attraction_col = supply_locations_metadata["attraction_col"]
|
1045
|
+
attraction_weighting = supply_locations_metadata["weighting"]
|
1046
|
+
|
1047
|
+
if transformation == "ILCT":
|
1048
|
+
for key, value in attraction_weighting.items():
|
1049
|
+
mci_formula = mci_formula + f" + {value['param']}*{attraction_col[key]}"
|
1050
|
+
else:
|
1051
|
+
for key, value in attraction_weighting.items():
|
1052
|
+
mci_formula = mci_formula + f" * {attraction_col[key]}**{value['param']}"
|
1053
|
+
|
1054
|
+
interaction_matrix_df["U_ij"] = interaction_matrix_df.apply(lambda row: eval(mci_formula, {}, row.to_dict()), axis=1)
|
1055
|
+
|
1056
|
+
if transformation == "ILCT":
|
1057
|
+
interaction_matrix_df["U_ij"] = np.exp(interaction_matrix_df["U_ij"])
|
1058
|
+
|
1059
|
+
self.interaction_matrix = interaction_matrix_df
|
1060
|
+
|
1061
|
+
return self
|
1062
|
+
|
1063
|
+
def probabilities (self):
|
1064
|
+
|
1065
|
+
interaction_matrix_df = self.interaction_matrix_df
|
1066
|
+
|
1067
|
+
if interaction_matrix_df["U_ij"].isna().all():
|
1068
|
+
self.utility()
|
1069
|
+
interaction_matrix_df = self.interaction_matrix_df
|
1070
|
+
|
1071
|
+
utility_i = pd.DataFrame(interaction_matrix_df.groupby("i")["U_ij"].sum())
|
1072
|
+
utility_i = utility_i.rename(columns = {"U_ij": "U_i"})
|
1073
|
+
|
1074
|
+
interaction_matrix_df = interaction_matrix_df.merge(
|
1075
|
+
utility_i,
|
1076
|
+
left_on="i",
|
1077
|
+
right_on="i",
|
1078
|
+
how="inner"
|
1079
|
+
)
|
1080
|
+
|
1081
|
+
interaction_matrix_df["p_ij"] = (interaction_matrix_df["U_ij"]) / (interaction_matrix_df["U_i"])
|
1082
|
+
|
1083
|
+
interaction_matrix_df = interaction_matrix_df.drop(columns=["U_i"])
|
1084
|
+
|
1085
|
+
self.interaction_matrix_df = interaction_matrix_df
|
1086
|
+
|
1087
|
+
return self
|
1088
|
+
|
1089
|
+
def flows (self):
|
1090
|
+
|
1091
|
+
interaction_matrix_df = self.interaction_matrix_df
|
1092
|
+
|
1093
|
+
if interaction_matrix_df["C_i"].isna().all():
|
1094
|
+
raise ValueError ("Market size column in customer origins not defined. Use CustomerOrigins.define_marketsize()")
|
1095
|
+
|
1096
|
+
check_vars(
|
1097
|
+
df = interaction_matrix_df,
|
1098
|
+
cols = ["C_i"]
|
1099
|
+
)
|
1100
|
+
|
1101
|
+
if interaction_matrix_df["p_ij"].isna().all():
|
1102
|
+
self.probabilities()
|
1103
|
+
interaction_matrix_df = self.interaction_matrix_df
|
1104
|
+
|
1105
|
+
interaction_matrix_df["E_ij"] = interaction_matrix_df["p_ij"] * interaction_matrix_df["C_i"]
|
1106
|
+
|
1107
|
+
self.interaction_matrix_df = interaction_matrix_df
|
1108
|
+
|
1109
|
+
return self
|
1110
|
+
|
1111
|
+
def marketareas (self):
|
1112
|
+
|
1113
|
+
interaction_matrix = self.interaction_matrix
|
1114
|
+
interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
|
1115
|
+
|
1116
|
+
check_vars(
|
1117
|
+
df = interaction_matrix_df,
|
1118
|
+
cols = ["E_ij"]
|
1119
|
+
)
|
1120
|
+
|
1121
|
+
market_areas_df = pd.DataFrame(interaction_matrix_df.groupby("j")["E_ij"].sum())
|
1122
|
+
market_areas_df = market_areas_df.reset_index(drop=False)
|
1123
|
+
market_areas_df = market_areas_df.rename(columns={"E_ij": "T_j"})
|
1124
|
+
|
1125
|
+
mci_model = MCIModel(
|
1126
|
+
interaction_matrix = interaction_matrix,
|
1127
|
+
coefs = self.get_coefs_dict(),
|
1128
|
+
mci_ols_model = self.get_mci_ols_model(),
|
1129
|
+
market_areas_df = market_areas_df
|
1130
|
+
)
|
1131
|
+
|
1132
|
+
return mci_model
|
1133
|
+
|
676
1134
|
def load_geodata (
|
677
|
-
|
1135
|
+
data,
|
678
1136
|
location_type: str,
|
679
1137
|
unique_id: str,
|
680
1138
|
x_col: str = None,
|
@@ -687,33 +1145,47 @@ def load_geodata (
|
|
687
1145
|
):
|
688
1146
|
|
689
1147
|
if location_type is None or (location_type != "origins" and location_type != "destinations"):
|
690
|
-
raise ValueError ("location_type must be either 'origins' or 'destinations'")
|
691
|
-
|
692
|
-
if data_type not in ["shp", "csv", "xlsx"]:
|
693
|
-
raise ValueError ("data_type must be 'shp', 'csv' or 'xlsx'")
|
1148
|
+
raise ValueError ("Argument location_type must be either 'origins' or 'destinations'")
|
694
1149
|
|
695
|
-
if
|
696
|
-
geodata_gpd_original =
|
1150
|
+
if isinstance(data, gp.GeoDataFrame):
|
1151
|
+
geodata_gpd_original = data
|
1152
|
+
if not all(geodata_gpd_original.geometry.geom_type == "Point"):
|
1153
|
+
raise ValueError ("Input geopandas.GeoDataFrame must be of type 'Point'")
|
697
1154
|
crs_input = geodata_gpd_original.crs
|
1155
|
+
elif isinstance(data, pd.DataFrame):
|
1156
|
+
geodata_tab = data
|
1157
|
+
elif isinstance(data, str):
|
1158
|
+
if data_type == "shp":
|
1159
|
+
geodata_gpd_original = gp.read_file(data)
|
1160
|
+
if not all(geodata_gpd_original.geometry.geom_type == "Point"):
|
1161
|
+
raise ValueError ("Input shapefile must be of type 'Point'")
|
1162
|
+
crs_input = geodata_gpd_original.crs
|
1163
|
+
elif data_type == "csv" or data_type == "xlsx":
|
1164
|
+
if x_col is None:
|
1165
|
+
raise ValueError ("Missing value for X coordinate column")
|
1166
|
+
if y_col is None:
|
1167
|
+
raise ValueError ("Missing value for Y coordinate column")
|
1168
|
+
elif data_type == "csv":
|
1169
|
+
geodata_tab = pd.read_csv(
|
1170
|
+
data,
|
1171
|
+
sep = csv_sep,
|
1172
|
+
decimal = csv_decimal,
|
1173
|
+
encoding = csv_encoding
|
1174
|
+
)
|
1175
|
+
elif data_type == "xlsx":
|
1176
|
+
geodata_tab = pd.read_excel(data)
|
1177
|
+
else:
|
1178
|
+
raise TypeError("Unknown type of data")
|
1179
|
+
else:
|
1180
|
+
raise TypeError("data must be pandas.DataFrame, geopandas.GeoDataFrame or file (.csv, .xlsx, .shp)")
|
698
1181
|
|
699
|
-
if data_type == "csv" or data_type == "xlsx":
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
if data_type == "csv":
|
706
|
-
geodata_tab = pd.read_csv(
|
707
|
-
file,
|
708
|
-
sep = csv_sep,
|
709
|
-
decimal = csv_decimal,
|
710
|
-
encoding = csv_encoding
|
1182
|
+
if data_type == "csv" or data_type == "xlsx" or (isinstance(data, pd.DataFrame) and not isinstance(data, gp.GeoDataFrame)):
|
1183
|
+
|
1184
|
+
check_vars(
|
1185
|
+
df = geodata_tab,
|
1186
|
+
cols = [x_col, y_col]
|
711
1187
|
)
|
712
|
-
|
713
|
-
if data_type == "xlsx":
|
714
|
-
geodata_tab = pd.read_excel(file)
|
715
|
-
|
716
|
-
if data_type == "csv" or data_type == "xlsx":
|
1188
|
+
|
717
1189
|
geodata_gpd_original = gp.GeoDataFrame(
|
718
1190
|
geodata_tab,
|
719
1191
|
geometry = gp.points_from_xy(
|
@@ -722,8 +1194,9 @@ def load_geodata (
|
|
722
1194
|
),
|
723
1195
|
crs = crs_input
|
724
1196
|
)
|
725
|
-
|
1197
|
+
|
726
1198
|
crs_output = "EPSG:4326"
|
1199
|
+
|
727
1200
|
geodata_gpd = geodata_gpd_original.to_crs(crs_output)
|
728
1201
|
geodata_gpd = geodata_gpd[[unique_id, "geometry"]]
|
729
1202
|
|
@@ -747,13 +1220,17 @@ def load_geodata (
|
|
747
1220
|
geodata_object = CustomerOrigins(
|
748
1221
|
geodata_gpd,
|
749
1222
|
geodata_gpd_original,
|
750
|
-
metadata
|
1223
|
+
metadata,
|
1224
|
+
None,
|
1225
|
+
None
|
751
1226
|
)
|
752
1227
|
elif location_type == "destinations":
|
753
1228
|
geodata_object = SupplyLocations(
|
754
1229
|
geodata_gpd,
|
755
1230
|
geodata_gpd_original,
|
756
|
-
metadata
|
1231
|
+
metadata,
|
1232
|
+
None,
|
1233
|
+
None
|
757
1234
|
)
|
758
1235
|
|
759
1236
|
return geodata_object
|
@@ -830,33 +1307,241 @@ def create_interaction_matrix(
|
|
830
1307
|
|
831
1308
|
return interaction_matrix
|
832
1309
|
|
833
|
-
def
|
834
|
-
|
835
|
-
|
836
|
-
|
1310
|
+
def load_interaction_matrix(
|
1311
|
+
data,
|
1312
|
+
customer_origins_col: str,
|
1313
|
+
supply_locations_col: str,
|
1314
|
+
attraction_col: list,
|
1315
|
+
transport_costs_col: str,
|
1316
|
+
probabilities_col: str = None,
|
1317
|
+
market_size_col: str = None,
|
1318
|
+
customer_origins_coords_col = None,
|
1319
|
+
supply_locations_coords_col = None,
|
1320
|
+
data_type = "csv",
|
1321
|
+
csv_sep = ";",
|
1322
|
+
csv_decimal = ",",
|
1323
|
+
csv_encoding="unicode_escape",
|
1324
|
+
crs_input = "EPSG:4326",
|
1325
|
+
crs_output = "EPSG:4326"
|
1326
|
+
):
|
1327
|
+
|
1328
|
+
if isinstance(data, pd.DataFrame):
|
1329
|
+
interaction_matrix_df = data
|
1330
|
+
elif isinstance(data, str):
|
1331
|
+
if data_type not in ["csv", "xlsx"]:
|
1332
|
+
raise ValueError ("data_type must be 'csv' or 'xlsx'")
|
1333
|
+
if data_type == "csv":
|
1334
|
+
interaction_matrix_df = pd.read_csv(
|
1335
|
+
data,
|
1336
|
+
sep = csv_sep,
|
1337
|
+
decimal = csv_decimal,
|
1338
|
+
encoding = csv_encoding
|
1339
|
+
)
|
1340
|
+
elif data_type == "xlsx":
|
1341
|
+
interaction_matrix_df = pd.read_excel(data)
|
1342
|
+
else:
|
1343
|
+
raise TypeError("Unknown type of data")
|
1344
|
+
else:
|
1345
|
+
raise TypeError("data must be pandas.DataFrame or file (.csv, .xlsx)")
|
1346
|
+
|
1347
|
+
if customer_origins_col not in interaction_matrix_df.columns:
|
1348
|
+
raise KeyError ("Column " + customer_origins_col + " not in data")
|
1349
|
+
if supply_locations_col not in interaction_matrix_df.columns:
|
1350
|
+
raise KeyError ("Column " + supply_locations_col + " not in data")
|
1351
|
+
|
1352
|
+
cols_check = attraction_col + [transport_costs_col]
|
1353
|
+
if probabilities_col is not None:
|
1354
|
+
cols_check = cols_check + [probabilities_col]
|
1355
|
+
if market_size_col is not None:
|
1356
|
+
cols_check = cols_check + [market_size_col]
|
837
1357
|
|
838
|
-
|
839
|
-
|
840
|
-
|
1358
|
+
check_vars(
|
1359
|
+
interaction_matrix_df,
|
1360
|
+
cols = cols_check
|
1361
|
+
)
|
1362
|
+
|
1363
|
+
if customer_origins_coords_col is not None:
|
1364
|
+
|
1365
|
+
if isinstance(customer_origins_coords_col, str):
|
1366
|
+
|
1367
|
+
if customer_origins_coords_col not in interaction_matrix_df.columns:
|
1368
|
+
raise KeyError ("Column " + customer_origins_coords_col + " not in data.")
|
1369
|
+
|
1370
|
+
customer_origins_geodata_tab = interaction_matrix_df[[customer_origins_col, customer_origins_coords_col]]
|
1371
|
+
customer_origins_geodata_tab = customer_origins_geodata_tab.drop_duplicates()
|
1372
|
+
customer_origins_geodata_tab["geometry"] = customer_origins_geodata_tab[customer_origins_coords_col].apply(lambda x: wkt.loads(x))
|
1373
|
+
customer_origins_geodata_gpd = gp.GeoDataFrame(
|
1374
|
+
customer_origins_geodata_tab,
|
1375
|
+
geometry="geometry",
|
1376
|
+
crs = crs_input)
|
1377
|
+
customer_origins_geodata_gpd = customer_origins_geodata_gpd.drop(
|
1378
|
+
columns = customer_origins_coords_col
|
1379
|
+
)
|
1380
|
+
|
1381
|
+
elif isinstance(customer_origins_coords_col, list):
|
1382
|
+
|
1383
|
+
if len(customer_origins_coords_col) != 2:
|
1384
|
+
raise ValueError ("Column " + customer_origins_coords_col + " must be a geometry column OR TWO columns with X and Y")
|
1385
|
+
|
1386
|
+
check_vars (
|
1387
|
+
df = interaction_matrix_df,
|
1388
|
+
cols = customer_origins_coords_col
|
1389
|
+
)
|
1390
|
+
|
1391
|
+
customer_origins_geodata_tab = interaction_matrix_df[[customer_origins_col, customer_origins_coords_col[0], customer_origins_coords_col[1]]]
|
1392
|
+
customer_origins_geodata_tab = customer_origins_geodata_tab.drop_duplicates()
|
1393
|
+
customer_origins_geodata_tab["geometry"] = customer_origins_geodata_tab.apply(lambda row: Point(row[customer_origins_coords_col[0]], row[customer_origins_coords_col[1]]), axis=1)
|
1394
|
+
customer_origins_geodata_gpd = gp.GeoDataFrame(customer_origins_geodata_tab, geometry="geometry")
|
1395
|
+
|
1396
|
+
customer_origins_geodata_gpd.set_crs(crs_output, inplace=True)
|
1397
|
+
|
1398
|
+
else:
|
1399
|
+
|
1400
|
+
customer_origins_geodata_gpd = interaction_matrix_df[customer_origins_col]
|
1401
|
+
customer_origins_geodata_gpd = customer_origins_geodata_gpd.drop_duplicates()
|
1402
|
+
|
1403
|
+
if market_size_col is not None:
|
1404
|
+
customer_origins_cols = [customer_origins_col] + [market_size_col]
|
1405
|
+
else:
|
1406
|
+
customer_origins_cols = [customer_origins_col]
|
1407
|
+
customer_origins_geodata_original_tab = customer_origins_geodata_tab = interaction_matrix_df[customer_origins_cols]
|
1408
|
+
|
1409
|
+
customer_origins_metadata = {
|
1410
|
+
"location_type": "origins",
|
1411
|
+
"unique_id": customer_origins_col,
|
1412
|
+
"attraction_col": [None],
|
1413
|
+
"marketsize_col": market_size_col,
|
1414
|
+
"weighting": {
|
1415
|
+
0: {
|
1416
|
+
"func": None,
|
1417
|
+
"param": None
|
1418
|
+
}
|
1419
|
+
},
|
1420
|
+
"crs_input": crs_input,
|
1421
|
+
"crs_output": crs_output,
|
1422
|
+
"no_points": len(customer_origins_geodata_gpd)
|
1423
|
+
}
|
1424
|
+
|
1425
|
+
customer_origins = CustomerOrigins(
|
1426
|
+
geodata_gpd = customer_origins_geodata_gpd,
|
1427
|
+
geodata_gpd_original = customer_origins_geodata_original_tab,
|
1428
|
+
metadata = customer_origins_metadata,
|
1429
|
+
isochrones_gdf = None,
|
1430
|
+
buffers_gdf = None
|
1431
|
+
)
|
1432
|
+
|
1433
|
+
if supply_locations_coords_col is not None:
|
1434
|
+
|
1435
|
+
if isinstance(supply_locations_coords_col, str):
|
1436
|
+
|
1437
|
+
if supply_locations_coords_col not in interaction_matrix_df.columns:
|
1438
|
+
raise KeyError ("Column " + supply_locations_coords_col + " not in data.")
|
1439
|
+
|
1440
|
+
supply_locations_geodata_tab = interaction_matrix_df[[supply_locations_col, supply_locations_coords_col]]
|
1441
|
+
supply_locations_geodata_tab = supply_locations_geodata_tab.drop_duplicates()
|
1442
|
+
supply_locations_geodata_tab["geometry"] = supply_locations_geodata_tab[supply_locations_coords_col].apply(lambda x: wkt.loads(x))
|
1443
|
+
supply_locations_geodata_gpd = gp.GeoDataFrame(
|
1444
|
+
supply_locations_geodata_tab,
|
1445
|
+
geometry="geometry",
|
1446
|
+
crs = crs_input)
|
1447
|
+
supply_locations_geodata_gpd = supply_locations_geodata_gpd.drop(
|
1448
|
+
columns = supply_locations_coords_col
|
1449
|
+
)
|
1450
|
+
|
1451
|
+
if isinstance(supply_locations_coords_col, list):
|
1452
|
+
|
1453
|
+
if len(supply_locations_coords_col) != 2:
|
1454
|
+
raise ValueError ("Column " + supply_locations_coords_col + " must be a geometry column OR TWO columns with X and Y")
|
1455
|
+
|
1456
|
+
check_vars (
|
1457
|
+
df = interaction_matrix_df,
|
1458
|
+
cols = supply_locations_coords_col
|
1459
|
+
)
|
1460
|
+
|
1461
|
+
supply_locations_geodata_tab = interaction_matrix_df[[supply_locations_col, supply_locations_coords_col[0], supply_locations_coords_col[1]]]
|
1462
|
+
supply_locations_geodata_tab = supply_locations_geodata_tab.drop_duplicates()
|
1463
|
+
supply_locations_geodata_tab["geometry"] = supply_locations_geodata_tab.apply(lambda row: Point(row[supply_locations_coords_col[0]], row[supply_locations_coords_col[1]]), axis=1)
|
1464
|
+
supply_locations_geodata_gpd = gp.GeoDataFrame(supply_locations_geodata_tab, geometry="geometry")
|
1465
|
+
|
1466
|
+
supply_locations_geodata_gpd.set_crs(crs_output, inplace=True)
|
1467
|
+
|
1468
|
+
else:
|
1469
|
+
|
1470
|
+
supply_locations_geodata_gpd = interaction_matrix_df[supply_locations_col]
|
1471
|
+
supply_locations_geodata_gpd = supply_locations_geodata_gpd.drop_duplicates()
|
1472
|
+
|
1473
|
+
supply_locations_cols = [supply_locations_col] + attraction_col
|
1474
|
+
supply_locations_geodata_original_tab = supply_locations_geodata_tab = interaction_matrix_df[supply_locations_cols]
|
1475
|
+
|
1476
|
+
supply_locations_metadata = {
|
1477
|
+
"location_type": "destinations",
|
1478
|
+
"unique_id": supply_locations_col,
|
1479
|
+
"attraction_col": attraction_col,
|
1480
|
+
"marketsize_col": None,
|
1481
|
+
"weighting": {
|
1482
|
+
0: {
|
1483
|
+
"func": None,
|
1484
|
+
"param": None
|
1485
|
+
}
|
1486
|
+
},
|
1487
|
+
"crs_input": crs_input,
|
1488
|
+
"crs_output": crs_output,
|
1489
|
+
"no_points": len(supply_locations_geodata_gpd)
|
1490
|
+
}
|
1491
|
+
|
1492
|
+
supply_locations = SupplyLocations(
|
1493
|
+
geodata_gpd = supply_locations_geodata_gpd,
|
1494
|
+
geodata_gpd_original = supply_locations_geodata_original_tab,
|
1495
|
+
metadata = supply_locations_metadata,
|
1496
|
+
isochrones_gdf = None,
|
1497
|
+
buffers_gdf = None
|
1498
|
+
)
|
841
1499
|
|
842
|
-
|
843
|
-
|
844
|
-
|
1500
|
+
interaction_matrix_df = interaction_matrix_df.rename(
|
1501
|
+
columns = {
|
1502
|
+
customer_origins_col: "i",
|
1503
|
+
supply_locations_col: "j",
|
1504
|
+
attraction_col[0]: "A_j",
|
1505
|
+
transport_costs_col: "t_ij"
|
1506
|
+
}
|
1507
|
+
)
|
1508
|
+
|
1509
|
+
if probabilities_col is not None:
|
1510
|
+
interaction_matrix_df = interaction_matrix_df.rename(
|
1511
|
+
columns = {
|
1512
|
+
probabilities_col: "p_ij"
|
1513
|
+
}
|
1514
|
+
)
|
1515
|
+
|
1516
|
+
if market_size_col is not None:
|
1517
|
+
interaction_matrix_df = interaction_matrix_df.rename(
|
1518
|
+
columns = {
|
1519
|
+
market_size_col: "C_i"
|
1520
|
+
}
|
1521
|
+
)
|
1522
|
+
|
1523
|
+
interaction_matrix = InteractionMatrix(
|
1524
|
+
interaction_matrix_df=interaction_matrix_df,
|
1525
|
+
customer_origins=customer_origins,
|
1526
|
+
supply_locations=supply_locations
|
1527
|
+
)
|
845
1528
|
|
846
|
-
|
847
|
-
if (df[col] <= 0).any():
|
848
|
-
raise ValueError(f"Column '{col}' includes values <= 0. All values must be numeric and positive.")
|
1529
|
+
return interaction_matrix
|
849
1530
|
|
850
|
-
def
|
1531
|
+
def log_centering_transformation(
|
851
1532
|
df: pd.DataFrame,
|
852
1533
|
ref_col: str,
|
853
|
-
cols: list
|
1534
|
+
cols: list,
|
1535
|
+
suffix: str = "__LCT"
|
854
1536
|
):
|
855
1537
|
|
856
1538
|
check_vars(
|
857
1539
|
df = df,
|
858
|
-
cols = cols
|
1540
|
+
cols = cols
|
859
1541
|
)
|
1542
|
+
|
1543
|
+
if ref_col not in df.columns:
|
1544
|
+
raise KeyError(f"Column '{ref_col}' not in dataframe.")
|
860
1545
|
|
861
1546
|
def lct (x):
|
862
1547
|
|
@@ -866,9 +1551,115 @@ def mci_transformation(
|
|
866
1551
|
return x_lct
|
867
1552
|
|
868
1553
|
for var in cols:
|
1554
|
+
|
1555
|
+
unique_values = df[var].unique()
|
1556
|
+
if set(unique_values).issubset({0, 1}):
|
1557
|
+
df[var+suffix] = df[var]
|
1558
|
+
print ("Column " + str(var) + " is a dummy variable and requires/allows no log-centering transformation")
|
1559
|
+
continue
|
869
1560
|
|
870
1561
|
var_t = df.groupby(ref_col)[var].apply(lct)
|
871
1562
|
var_t = var_t.reset_index()
|
872
|
-
df[var+
|
1563
|
+
df[var+suffix] = var_t[var]
|
1564
|
+
|
1565
|
+
return df
|
1566
|
+
|
1567
|
+
def get_isochrones(
|
1568
|
+
geodata_gpd: gp.GeoDataFrame,
|
1569
|
+
unique_id_col: str,
|
1570
|
+
segments_minutes: list = [5, 10, 15],
|
1571
|
+
range_type: str = "time",
|
1572
|
+
intersections: str = "true",
|
1573
|
+
profile: str = "driving-car",
|
1574
|
+
donut: bool = True,
|
1575
|
+
ors_server: str = "https://api.openrouteservice.org/v2/",
|
1576
|
+
ors_auth: str = None,
|
1577
|
+
timeout = 10,
|
1578
|
+
delay = 1,
|
1579
|
+
save_output: bool = True,
|
1580
|
+
output_filepath: str = "isochrones.shp",
|
1581
|
+
output_crs: str = "EPSG:4326"
|
1582
|
+
):
|
1583
|
+
|
1584
|
+
coords = [(point.x, point.y) for point in geodata_gpd.geometry]
|
1585
|
+
|
1586
|
+
unique_id_values = geodata_gpd[unique_id_col].values
|
1587
|
+
|
1588
|
+
ors_client = Client(
|
1589
|
+
server = ors_server,
|
1590
|
+
auth = ors_auth
|
1591
|
+
)
|
1592
|
+
|
1593
|
+
isochrones_gdf = gp.GeoDataFrame(columns=[unique_id_col, "geometry"])
|
1594
|
+
|
1595
|
+
segments = [segment*60 for segment in segments_minutes]
|
1596
|
+
|
1597
|
+
i = 0
|
1598
|
+
|
1599
|
+
for x, y in coords:
|
1600
|
+
|
1601
|
+
isochrone_output = ors_client.isochrone(
|
1602
|
+
locations = [[x, y]],
|
1603
|
+
segments = segments,
|
1604
|
+
range_type = range_type,
|
1605
|
+
intersections = intersections,
|
1606
|
+
profile = profile,
|
1607
|
+
timeout = timeout,
|
1608
|
+
save_output = False,
|
1609
|
+
output_crs = output_crs
|
1610
|
+
)
|
1611
|
+
|
1612
|
+
if isochrone_output.status_code != 200:
|
1613
|
+
continue
|
1614
|
+
|
1615
|
+
isochrone_gdf = isochrone_output.get_isochrones_gdf()
|
1616
|
+
|
1617
|
+
if donut:
|
1618
|
+
isochrone_gdf = overlay_difference(
|
1619
|
+
polygon_gdf = isochrone_gdf,
|
1620
|
+
sort_col = "segment"
|
1621
|
+
)
|
1622
|
+
|
1623
|
+
time.sleep(delay)
|
1624
|
+
|
1625
|
+
isochrone_gdf[unique_id_col] = unique_id_values[i]
|
1626
|
+
|
1627
|
+
isochrones_gdf = pd.concat(
|
1628
|
+
[
|
1629
|
+
isochrones_gdf,
|
1630
|
+
isochrone_gdf
|
1631
|
+
],
|
1632
|
+
ignore_index=True
|
1633
|
+
)
|
1634
|
+
|
1635
|
+
i = i+1
|
1636
|
+
|
1637
|
+
isochrones_gdf.set_crs(
|
1638
|
+
output_crs,
|
1639
|
+
allow_override=True,
|
1640
|
+
inplace=True
|
1641
|
+
)
|
1642
|
+
|
1643
|
+
if save_output:
|
1644
|
+
|
1645
|
+
isochrones_gdf.to_file(filename = output_filepath)
|
1646
|
+
|
1647
|
+
return isochrones_gdf
|
1648
|
+
|
1649
|
+
|
1650
|
+
def check_vars(
|
1651
|
+
df: pd.DataFrame,
|
1652
|
+
cols: list
|
1653
|
+
):
|
873
1654
|
|
874
|
-
|
1655
|
+
for col in cols:
|
1656
|
+
if col not in df.columns:
|
1657
|
+
raise KeyError(f"Column '{col}' not in dataframe.")
|
1658
|
+
|
1659
|
+
for col in cols:
|
1660
|
+
if not pd.api.types.is_numeric_dtype(df[col]):
|
1661
|
+
raise ValueError(f"Column '{col}' is not numeric. All stated columns must be numeric.")
|
1662
|
+
|
1663
|
+
for col in cols:
|
1664
|
+
if (df[col] <= 0).any():
|
1665
|
+
raise ValueError(f"Column '{col}' includes values <= 0. All values must be numeric and positive.")
|