huff 1.1.1__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- huff/gistools.py +50 -3
- huff/models.py +907 -172
- huff/ors.py +2 -2
- huff/tests/data/Wieland2015.xlsx +0 -0
- huff/tests/tests_huff.py +95 -44
- {huff-1.1.1.dist-info → huff-1.2.0.dist-info}/METADATA +12 -5
- {huff-1.1.1.dist-info → huff-1.2.0.dist-info}/RECORD +9 -8
- {huff-1.1.1.dist-info → huff-1.2.0.dist-info}/WHEEL +0 -0
- {huff-1.1.1.dist-info → huff-1.2.0.dist-info}/top_level.txt +0 -0
huff/models.py
CHANGED
@@ -4,8 +4,8 @@
|
|
4
4
|
# Author: Thomas Wieland
|
5
5
|
# ORCID: 0000-0001-5168-9846
|
6
6
|
# mail: geowieland@googlemail.com
|
7
|
-
# Version: 1.
|
8
|
-
# Last update: 2025-
|
7
|
+
# Version: 1.2.0
|
8
|
+
# Last update: 2025-05-14 18:33
|
9
9
|
# Copyright (c) 2025 Thomas Wieland
|
10
10
|
#-----------------------------------------------------------------------
|
11
11
|
|
@@ -14,8 +14,11 @@ import pandas as pd
|
|
14
14
|
import geopandas as gp
|
15
15
|
import numpy as np
|
16
16
|
import time
|
17
|
+
from statsmodels.formula.api import ols
|
18
|
+
from shapely.geometry import Point
|
19
|
+
from shapely import wkt
|
17
20
|
from huff.ors import Client, TimeDistanceMatrix, Isochrone
|
18
|
-
from huff.gistools import overlay_difference
|
21
|
+
from huff.gistools import overlay_difference, distance_matrix
|
19
22
|
|
20
23
|
|
21
24
|
class CustomerOrigins:
|
@@ -24,12 +27,14 @@ class CustomerOrigins:
|
|
24
27
|
self,
|
25
28
|
geodata_gpd,
|
26
29
|
geodata_gpd_original,
|
27
|
-
metadata
|
30
|
+
metadata,
|
31
|
+
isochrones_gdf
|
28
32
|
):
|
29
33
|
|
30
34
|
self.geodata_gpd = geodata_gpd
|
31
35
|
self.geodata_gpd_original = geodata_gpd_original
|
32
36
|
self.metadata = metadata
|
37
|
+
self.isochrones_gdf = isochrones_gdf
|
33
38
|
|
34
39
|
def get_geodata_gpd(self):
|
35
40
|
|
@@ -42,6 +47,10 @@ class CustomerOrigins:
|
|
42
47
|
def get_metadata(self):
|
43
48
|
|
44
49
|
return self.metadata
|
50
|
+
|
51
|
+
def get_isochrones(self):
|
52
|
+
|
53
|
+
return self.isochrones_gdf
|
45
54
|
|
46
55
|
def summary(self):
|
47
56
|
|
@@ -63,6 +72,11 @@ class CustomerOrigins:
|
|
63
72
|
print("Unique ID column " + metadata["unique_id"])
|
64
73
|
print("Input CRS " + str(metadata["crs_input"]))
|
65
74
|
|
75
|
+
if self.isochrones_gdf is None:
|
76
|
+
print("Including isochrones NO")
|
77
|
+
else:
|
78
|
+
print("Including isochrones YES")
|
79
|
+
|
66
80
|
return metadata
|
67
81
|
|
68
82
|
def define_marketsize(
|
@@ -97,27 +111,77 @@ class CustomerOrigins:
|
|
97
111
|
|
98
112
|
return self
|
99
113
|
|
114
|
+
def isochrones(
|
115
|
+
self,
|
116
|
+
segments_minutes: list = [5, 10, 15],
|
117
|
+
range_type: str = "time",
|
118
|
+
intersections: str = "true",
|
119
|
+
profile: str = "driving-car",
|
120
|
+
donut: bool = True,
|
121
|
+
ors_server: str = "https://api.openrouteservice.org/v2/",
|
122
|
+
ors_auth: str = None,
|
123
|
+
timeout: int = 10,
|
124
|
+
delay: int = 1,
|
125
|
+
save_output: bool = True,
|
126
|
+
output_filepath: str = "customer_origins_isochrones.shp",
|
127
|
+
output_crs: str = "EPSG:4326"
|
128
|
+
):
|
129
|
+
|
130
|
+
geodata_gpd = self.get_geodata_gpd()
|
131
|
+
metadata = self.get_metadata()
|
132
|
+
|
133
|
+
isochrones_gdf = get_isochrones(
|
134
|
+
geodata_gpd = geodata_gpd,
|
135
|
+
unique_id_col = metadata["unique_id"],
|
136
|
+
segments_minutes = segments_minutes,
|
137
|
+
range_type = range_type,
|
138
|
+
intersections = intersections,
|
139
|
+
profile = profile,
|
140
|
+
donut = donut,
|
141
|
+
ors_server = ors_server,
|
142
|
+
ors_auth = ors_auth,
|
143
|
+
timeout = timeout,
|
144
|
+
delay = delay,
|
145
|
+
save_output = save_output,
|
146
|
+
output_filepath = output_filepath,
|
147
|
+
output_crs = output_crs
|
148
|
+
)
|
149
|
+
|
150
|
+
self.isochrones_gdf = isochrones_gdf
|
151
|
+
|
152
|
+
return self
|
153
|
+
|
154
|
+
|
100
155
|
class SupplyLocations:
|
101
156
|
|
102
157
|
def __init__(
|
103
158
|
self,
|
104
159
|
geodata_gpd,
|
105
160
|
geodata_gpd_original,
|
106
|
-
metadata
|
161
|
+
metadata,
|
162
|
+
isochrones_gdf
|
107
163
|
):
|
108
164
|
|
109
165
|
self.geodata_gpd = geodata_gpd
|
110
166
|
self.geodata_gpd_original = geodata_gpd_original
|
111
167
|
self.metadata = metadata
|
168
|
+
self.isochrones_gdf = isochrones_gdf
|
112
169
|
|
113
170
|
def get_geodata_gpd(self):
|
171
|
+
|
114
172
|
return self.geodata_gpd
|
115
173
|
|
116
174
|
def get_geodata_gpd_original(self):
|
175
|
+
|
117
176
|
return self.geodata_gpd_original
|
118
177
|
|
119
178
|
def get_metadata(self):
|
179
|
+
|
120
180
|
return self.metadata
|
181
|
+
|
182
|
+
def get_isochrones_gdf(self):
|
183
|
+
|
184
|
+
return self.isochrones_gdf
|
121
185
|
|
122
186
|
def summary(self):
|
123
187
|
|
@@ -139,6 +203,11 @@ class SupplyLocations:
|
|
139
203
|
print("Unique ID column " + metadata["unique_id"])
|
140
204
|
print("Input CRS " + str(metadata["crs_input"]))
|
141
205
|
|
206
|
+
if self.isochrones_gdf is None:
|
207
|
+
print("Including isochrones NO")
|
208
|
+
else:
|
209
|
+
print("Including isochrones YES")
|
210
|
+
|
142
211
|
return metadata
|
143
212
|
|
144
213
|
def define_attraction(
|
@@ -239,86 +308,44 @@ class SupplyLocations:
|
|
239
308
|
|
240
309
|
def isochrones(
|
241
310
|
self,
|
242
|
-
|
311
|
+
segments_minutes: list = [5, 10, 15],
|
243
312
|
range_type: str = "time",
|
244
313
|
intersections: str = "true",
|
245
314
|
profile: str = "driving-car",
|
246
315
|
donut: bool = True,
|
247
316
|
ors_server: str = "https://api.openrouteservice.org/v2/",
|
248
317
|
ors_auth: str = None,
|
249
|
-
timeout = 10,
|
250
|
-
delay = 1,
|
318
|
+
timeout: int = 10,
|
319
|
+
delay: int = 1,
|
251
320
|
save_output: bool = True,
|
252
|
-
output_filepath: str = "
|
321
|
+
output_filepath: str = "supply_locations_isochrones.shp",
|
253
322
|
output_crs: str = "EPSG:4326"
|
254
323
|
):
|
255
324
|
|
256
325
|
geodata_gpd = self.get_geodata_gpd()
|
257
326
|
metadata = self.get_metadata()
|
258
327
|
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
328
|
+
isochrones_gdf = get_isochrones(
|
329
|
+
geodata_gpd = geodata_gpd,
|
330
|
+
unique_id_col = metadata["unique_id"],
|
331
|
+
segments_minutes = segments_minutes,
|
332
|
+
range_type = range_type,
|
333
|
+
intersections = intersections,
|
334
|
+
profile = profile,
|
335
|
+
donut = donut,
|
336
|
+
ors_server = ors_server,
|
337
|
+
ors_auth = ors_auth,
|
338
|
+
timeout = timeout,
|
339
|
+
delay = delay,
|
340
|
+
save_output = save_output,
|
341
|
+
output_filepath = output_filepath,
|
342
|
+
output_crs = output_crs
|
267
343
|
)
|
268
|
-
|
269
|
-
isochrones_gdf = gp.GeoDataFrame(columns=[unique_id_col, "geometry"])
|
270
|
-
|
271
|
-
i = 0
|
272
|
-
|
273
|
-
for x, y in coords:
|
274
|
-
|
275
|
-
isochrone_output = ors_client.isochrone(
|
276
|
-
locations = [[x, y]],
|
277
|
-
segments = segments,
|
278
|
-
range_type = range_type,
|
279
|
-
intersections = intersections,
|
280
|
-
profile = profile,
|
281
|
-
timeout = timeout,
|
282
|
-
save_output = False,
|
283
|
-
output_crs = output_crs
|
284
|
-
)
|
285
|
-
|
286
|
-
if isochrone_output.status_code != 200:
|
287
|
-
continue
|
288
|
-
|
289
|
-
isochrone_gdf = isochrone_output.get_isochrones_gdf()
|
290
|
-
|
291
|
-
if donut:
|
292
|
-
isochrone_gdf = overlay_difference(
|
293
|
-
polygon_gdf = isochrone_gdf,
|
294
|
-
sort_col = "segment"
|
295
|
-
)
|
296
|
-
|
297
|
-
time.sleep(delay)
|
298
|
-
|
299
|
-
isochrone_gdf[unique_id_col] = unique_id_values[i]
|
300
|
-
|
301
|
-
isochrones_gdf = pd.concat(
|
302
|
-
[
|
303
|
-
isochrones_gdf,
|
304
|
-
isochrone_gdf
|
305
|
-
],
|
306
|
-
ignore_index=True
|
307
|
-
)
|
308
|
-
|
309
|
-
i = i+1
|
310
344
|
|
311
|
-
isochrones_gdf
|
312
|
-
output_crs,
|
313
|
-
allow_override=True,
|
314
|
-
inplace=True
|
315
|
-
)
|
316
|
-
|
317
|
-
if save_output:
|
345
|
+
self.isochrones_gdf = isochrones_gdf
|
318
346
|
|
319
|
-
|
347
|
+
return self
|
320
348
|
|
321
|
-
return isochrones_gdf
|
322
349
|
|
323
350
|
class InteractionMatrix:
|
324
351
|
|
@@ -360,26 +387,33 @@ class InteractionMatrix:
|
|
360
387
|
else:
|
361
388
|
print("Market size column " + customer_origins_metadata["marketsize_col"])
|
362
389
|
print("----------------------------------")
|
363
|
-
print("
|
390
|
+
print("Partial utilities")
|
391
|
+
print(" Weights")
|
364
392
|
if supply_locations_metadata["weighting"][0]["func"] is None and supply_locations_metadata["weighting"][0]["param"] is None:
|
365
|
-
print("
|
393
|
+
print("Attraction not defined")
|
366
394
|
else:
|
367
|
-
print("
|
395
|
+
print("Attraction " + str(supply_locations_metadata["weighting"][0]["param"]) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
|
368
396
|
if customer_origins_metadata["weighting"][0]["func"] is None and customer_origins_metadata["weighting"][0]["param"] is None:
|
369
|
-
print("
|
397
|
+
print("Transport costs not defined")
|
370
398
|
else:
|
371
|
-
print("
|
399
|
+
print("Transport costs " + str(customer_origins_metadata["weighting"][0]["param"]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
372
400
|
print("----------------------------------")
|
373
401
|
|
374
402
|
def transport_costs(
|
375
403
|
self,
|
404
|
+
network: bool = True,
|
376
405
|
range_type: str = "time",
|
377
406
|
time_unit: str = "minutes",
|
407
|
+
distance_unit: str = "kilometers",
|
378
408
|
ors_server: str = "https://api.openrouteservice.org/v2/",
|
379
409
|
ors_auth: str = None,
|
380
410
|
save_output: bool = False,
|
381
411
|
output_filepath: str = "transport_costs_matrix.csv"
|
382
412
|
):
|
413
|
+
|
414
|
+
if not network and range_type == "time":
|
415
|
+
print ("Calculating euclidean distances (network = False). Setting range_type = 'distance'")
|
416
|
+
range_type = "distance"
|
383
417
|
|
384
418
|
interaction_matrix_df = self.get_interaction_matrix_df()
|
385
419
|
|
@@ -401,53 +435,70 @@ class InteractionMatrix:
|
|
401
435
|
|
402
436
|
customer_origins_index = list(range(len(customer_origins_coords)))
|
403
437
|
locations_coords_index = list(range(len(customer_origins_index), len(locations_coords)))
|
404
|
-
|
405
|
-
ors_client = Client(
|
406
|
-
server = ors_server,
|
407
|
-
auth = ors_auth
|
408
|
-
)
|
409
|
-
time_distance_matrix = ors_client.matrix(
|
410
|
-
locations = locations_coords,
|
411
|
-
save_output = save_output,
|
412
|
-
output_filepath = output_filepath,
|
413
|
-
sources = customer_origins_index,
|
414
|
-
destinations = locations_coords_index,
|
415
|
-
range_type = range_type
|
416
|
-
)
|
417
|
-
|
418
|
-
if time_distance_matrix.get_metadata() is None:
|
419
|
-
raise ValueError ("No transport costs matrix was built.")
|
420
438
|
|
421
|
-
|
422
|
-
transport_costs_matrix_config = time_distance_matrix.get_config()
|
423
|
-
range_type = transport_costs_matrix_config["range_type"]
|
439
|
+
if network:
|
424
440
|
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
441
|
+
ors_client = Client(
|
442
|
+
server = ors_server,
|
443
|
+
auth = ors_auth
|
444
|
+
)
|
445
|
+
time_distance_matrix = ors_client.matrix(
|
446
|
+
locations = locations_coords,
|
447
|
+
save_output = save_output,
|
448
|
+
output_filepath = output_filepath,
|
449
|
+
sources = customer_origins_index,
|
450
|
+
destinations = locations_coords_index,
|
451
|
+
range_type = range_type
|
452
|
+
)
|
453
|
+
|
454
|
+
if time_distance_matrix.get_metadata() is None:
|
455
|
+
raise ValueError ("No transport costs matrix was built.")
|
437
456
|
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
)
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
457
|
+
transport_costs_matrix = time_distance_matrix.get_matrix()
|
458
|
+
transport_costs_matrix_config = time_distance_matrix.get_config()
|
459
|
+
range_type = transport_costs_matrix_config["range_type"]
|
460
|
+
|
461
|
+
transport_costs_matrix["source"] = transport_costs_matrix["source"].astype(int)
|
462
|
+
transport_costs_matrix["source"] = transport_costs_matrix["source"].map(
|
463
|
+
dict(enumerate(customer_origins_ids))
|
464
|
+
)
|
465
|
+
|
466
|
+
transport_costs_matrix["destination"] = transport_costs_matrix["destination"].astype(int)
|
467
|
+
transport_costs_matrix["destination"] = transport_costs_matrix["destination"].map(
|
468
|
+
dict(enumerate(supply_locations_ids))
|
469
|
+
)
|
470
|
+
|
471
|
+
transport_costs_matrix["source_destination"] = transport_costs_matrix["source"].astype(str)+"_"+transport_costs_matrix["destination"].astype(str)
|
472
|
+
transport_costs_matrix = transport_costs_matrix[["source_destination", range_type]]
|
473
|
+
|
474
|
+
interaction_matrix_df = interaction_matrix_df.merge(
|
475
|
+
transport_costs_matrix,
|
476
|
+
left_on="ij",
|
477
|
+
right_on="source_destination"
|
478
|
+
)
|
479
|
+
|
480
|
+
interaction_matrix_df["t_ij"] = interaction_matrix_df[range_type]
|
481
|
+
if time_unit == "minutes":
|
482
|
+
interaction_matrix_df["t_ij"] = interaction_matrix_df["t_ij"]/60
|
483
|
+
if time_unit == "hours":
|
484
|
+
interaction_matrix_df["t_ij"] = interaction_matrix_df["t_ij"]/60/60
|
485
|
+
|
486
|
+
interaction_matrix_df = interaction_matrix_df.drop(columns=["source_destination", range_type])
|
487
|
+
|
488
|
+
else:
|
489
|
+
|
490
|
+
distance_matrix_result = distance_matrix(
|
491
|
+
sources = customer_origins_coords,
|
492
|
+
destinations = supply_locations_coords,
|
493
|
+
unit = "m"
|
494
|
+
)
|
495
|
+
|
496
|
+
distance_matrix_result_flat = [distance for sublist in distance_matrix_result for distance in sublist]
|
449
497
|
|
450
|
-
|
498
|
+
interaction_matrix_df["t_ij"] = distance_matrix_result_flat
|
499
|
+
|
500
|
+
if distance_unit == "kilometers":
|
501
|
+
interaction_matrix_df["t_ij"] = interaction_matrix_df["t_ij"]/1000
|
451
502
|
|
452
503
|
self.interaction_matrix_df = interaction_matrix_df
|
453
504
|
|
@@ -483,11 +534,11 @@ class InteractionMatrix:
|
|
483
534
|
if attraction_weighting["func"] == "power":
|
484
535
|
interaction_matrix_df["A_j_weighted"] = interaction_matrix_df["A_j"] ** attraction_weighting["param"]
|
485
536
|
elif tc_weighting["func"] == "exponential":
|
486
|
-
interaction_matrix_df["A_j_weighted"] = np.exp(attraction_weighting["param"] * interaction_matrix_df[
|
537
|
+
interaction_matrix_df["A_j_weighted"] = np.exp(attraction_weighting["param"] * interaction_matrix_df["A_j"])
|
487
538
|
else:
|
488
539
|
raise ValueError ("Attraction weighting is not defined.")
|
489
540
|
|
490
|
-
interaction_matrix_df["U_ij"] = interaction_matrix_df["A_j_weighted"]
|
541
|
+
interaction_matrix_df["U_ij"] = interaction_matrix_df["A_j_weighted"]*interaction_matrix_df["t_ij_weighted"]
|
491
542
|
|
492
543
|
interaction_matrix_df = interaction_matrix_df.drop(columns=['A_j_weighted', 't_ij_weighted'])
|
493
544
|
|
@@ -568,13 +619,11 @@ class InteractionMatrix:
|
|
568
619
|
cols: list = ["A_j", "t_ij"]
|
569
620
|
):
|
570
621
|
|
571
|
-
""" MCI model log-centering transformation """
|
572
|
-
|
573
622
|
cols = cols + ["p_ij"]
|
574
623
|
|
575
624
|
interaction_matrix_df = self.interaction_matrix_df
|
576
625
|
|
577
|
-
interaction_matrix_df =
|
626
|
+
interaction_matrix_df = log_centering_transformation(
|
578
627
|
df = interaction_matrix_df,
|
579
628
|
ref_col = "i",
|
580
629
|
cols = cols
|
@@ -584,6 +633,87 @@ class InteractionMatrix:
|
|
584
633
|
|
585
634
|
return self
|
586
635
|
|
636
|
+
def mci_fit(
|
637
|
+
self,
|
638
|
+
cols: list = ["A_j", "t_ij"],
|
639
|
+
alpha = 0.05
|
640
|
+
):
|
641
|
+
|
642
|
+
supply_locations = self.get_supply_locations()
|
643
|
+
supply_locations_metadata = supply_locations.get_metadata()
|
644
|
+
|
645
|
+
customer_origins = self.get_customer_origins()
|
646
|
+
customer_origins_metadata = customer_origins.get_metadata()
|
647
|
+
|
648
|
+
interaction_matrix_df = self.get_interaction_matrix_df()
|
649
|
+
|
650
|
+
cols_t = [col + "__LCT" for col in cols]
|
651
|
+
|
652
|
+
if "p_ij__LCT" not in interaction_matrix_df.columns:
|
653
|
+
interaction_matrix = self.mci_transformation(
|
654
|
+
cols = cols
|
655
|
+
)
|
656
|
+
interaction_matrix_df = self.get_interaction_matrix_df()
|
657
|
+
|
658
|
+
mci_formula = f'p_ij__LCT ~ {" + ".join(cols_t)} -1'
|
659
|
+
|
660
|
+
mci_ols_model = ols(mci_formula, data = interaction_matrix_df).fit()
|
661
|
+
|
662
|
+
mci_ols_coefficients = mci_ols_model.params
|
663
|
+
mci_ols_coef_standarderrors = mci_ols_model.bse
|
664
|
+
mci_ols_coef_t = mci_ols_model.tvalues
|
665
|
+
mci_ols_coef_p = mci_ols_model.pvalues
|
666
|
+
mci_ols_coef_ci = mci_ols_model.conf_int(alpha = alpha)
|
667
|
+
|
668
|
+
coefs = {}
|
669
|
+
for i, col in enumerate(cols_t):
|
670
|
+
coefs[i] = {
|
671
|
+
"Coefficient": col[:-5],
|
672
|
+
"Estimate": float(mci_ols_coefficients[col]),
|
673
|
+
"SE": float(mci_ols_coef_standarderrors[col]),
|
674
|
+
"t": float(mci_ols_coef_t[col]),
|
675
|
+
"p": float(mci_ols_coef_p[col]),
|
676
|
+
"CI_lower": float(mci_ols_coef_ci.loc[col, 0]),
|
677
|
+
"CI_upper": float(mci_ols_coef_ci.loc[col, 1]),
|
678
|
+
}
|
679
|
+
|
680
|
+
customer_origins_metadata["weighting"][0] = {
|
681
|
+
"func": "power",
|
682
|
+
"param": mci_ols_coefficients["t_ij__LCT"]
|
683
|
+
}
|
684
|
+
|
685
|
+
coefs2 = coefs.copy()
|
686
|
+
for key, value in list(coefs2.items()):
|
687
|
+
if value["Coefficient"] == "t_ij":
|
688
|
+
del coefs2[key]
|
689
|
+
|
690
|
+
for key, value in coefs2.items():
|
691
|
+
supply_locations_metadata["weighting"][key] = {
|
692
|
+
"func": "power",
|
693
|
+
"param": value["Estimate"]
|
694
|
+
}
|
695
|
+
|
696
|
+
supply_locations_metadata["attraction_col"].append(None)
|
697
|
+
supply_locations_metadata["attraction_col"][key] = value["Coefficient"]
|
698
|
+
|
699
|
+
customer_origins.metadata = customer_origins_metadata
|
700
|
+
supply_locations.metadata = supply_locations_metadata
|
701
|
+
interaction_matrix = InteractionMatrix(
|
702
|
+
interaction_matrix_df,
|
703
|
+
customer_origins,
|
704
|
+
supply_locations
|
705
|
+
)
|
706
|
+
|
707
|
+
mci_model = MCIModel(
|
708
|
+
interaction_matrix,
|
709
|
+
coefs,
|
710
|
+
mci_ols_model,
|
711
|
+
None
|
712
|
+
)
|
713
|
+
|
714
|
+
return mci_model
|
715
|
+
|
716
|
+
|
587
717
|
class HuffModel:
|
588
718
|
|
589
719
|
def __init__(
|
@@ -639,19 +769,305 @@ class HuffModel:
|
|
639
769
|
else:
|
640
770
|
print("Market size column " + customer_origins_metadata["marketsize_col"])
|
641
771
|
print("----------------------------------")
|
642
|
-
print("
|
772
|
+
print("Partial utilities")
|
773
|
+
print(" Weights")
|
643
774
|
if supply_locations_metadata["weighting"][0]["func"] is None and supply_locations_metadata["weighting"][0]["param"] is None:
|
644
|
-
print("
|
775
|
+
print("Attraction not defined")
|
645
776
|
else:
|
646
|
-
print("
|
777
|
+
print("Attraction " + str(supply_locations_metadata["weighting"][0]["param"]) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
|
647
778
|
if customer_origins_metadata["weighting"][0]["func"] is None and customer_origins_metadata["weighting"][0]["param"] is None:
|
648
|
-
print("
|
779
|
+
print("Transport costs not defined")
|
649
780
|
else:
|
650
|
-
print("
|
781
|
+
print("Transport costs " + str(customer_origins_metadata["weighting"][0]["param"]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
651
782
|
print("----------------------------------")
|
652
|
-
|
783
|
+
|
784
|
+
def mci_fit(
|
785
|
+
self,
|
786
|
+
cols: list = ["A_j", "t_ij"],
|
787
|
+
alpha = 0.05
|
788
|
+
):
|
789
|
+
|
790
|
+
interaction_matrix = self.interaction_matrix
|
791
|
+
|
792
|
+
supply_locations = interaction_matrix.get_supply_locations()
|
793
|
+
supply_locations_metadata = supply_locations.get_metadata()
|
794
|
+
|
795
|
+
customer_origins = interaction_matrix.get_customer_origins()
|
796
|
+
customer_origins_metadata = customer_origins.get_metadata()
|
797
|
+
|
798
|
+
interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
|
799
|
+
|
800
|
+
cols_t = [col + "__LCT" for col in cols]
|
801
|
+
|
802
|
+
if "p_ij__LCT" not in interaction_matrix_df.columns:
|
803
|
+
interaction_matrix = interaction_matrix.mci_transformation(
|
804
|
+
cols = cols
|
805
|
+
)
|
806
|
+
interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
|
807
|
+
|
808
|
+
mci_formula = f'p_ij__LCT ~ {" + ".join(cols_t)} -1'
|
809
|
+
|
810
|
+
mci_ols_model = ols(mci_formula, data = interaction_matrix_df).fit()
|
811
|
+
|
812
|
+
mci_ols_coefficients = mci_ols_model.params
|
813
|
+
mci_ols_coef_standarderrors = mci_ols_model.bse
|
814
|
+
mci_ols_coef_t = mci_ols_model.tvalues
|
815
|
+
mci_ols_coef_p = mci_ols_model.pvalues
|
816
|
+
mci_ols_coef_ci = mci_ols_model.conf_int(alpha = alpha)
|
817
|
+
|
818
|
+
coefs = {}
|
819
|
+
for i, col in enumerate(cols_t):
|
820
|
+
coefs[i] = {
|
821
|
+
"Coefficient": col[:-5],
|
822
|
+
"Estimate": float(mci_ols_coefficients[col]),
|
823
|
+
"SE": float(mci_ols_coef_standarderrors[col]),
|
824
|
+
"t": float(mci_ols_coef_t[col]),
|
825
|
+
"p": float(mci_ols_coef_p[col]),
|
826
|
+
"CI_lower": float(mci_ols_coef_ci.loc[col, 0]),
|
827
|
+
"CI_upper": float(mci_ols_coef_ci.loc[col, 1]),
|
828
|
+
}
|
829
|
+
|
830
|
+
customer_origins_metadata["weighting"][0] = {
|
831
|
+
"func": "power",
|
832
|
+
"param": mci_ols_coefficients["t_ij__LCT"]
|
833
|
+
}
|
834
|
+
|
835
|
+
coefs2 = coefs.copy()
|
836
|
+
for key, value in list(coefs2.items()):
|
837
|
+
if value["Coefficient"] == "t_ij":
|
838
|
+
del coefs2[key]
|
839
|
+
|
840
|
+
for key, value in coefs2.items():
|
841
|
+
supply_locations_metadata["weighting"][(key)] = {
|
842
|
+
"func": "power",
|
843
|
+
"param": value["Estimate"]
|
844
|
+
}
|
845
|
+
supply_locations_metadata["attraction_col"][key] = value["Coefficient"]
|
846
|
+
|
847
|
+
customer_origins.metadata = customer_origins_metadata
|
848
|
+
supply_locations.metadata = supply_locations_metadata
|
849
|
+
interaction_matrix = InteractionMatrix(
|
850
|
+
interaction_matrix_df,
|
851
|
+
customer_origins,
|
852
|
+
supply_locations
|
853
|
+
)
|
854
|
+
|
855
|
+
mci_model = MCIModel(
|
856
|
+
interaction_matrix,
|
857
|
+
coefs,
|
858
|
+
mci_ols_model,
|
859
|
+
None
|
860
|
+
)
|
861
|
+
|
862
|
+
return mci_model
|
863
|
+
|
864
|
+
|
865
|
+
class MCIModel:
|
866
|
+
|
867
|
+
def __init__(
|
868
|
+
self,
|
869
|
+
interaction_matrix: InteractionMatrix,
|
870
|
+
coefs: dict,
|
871
|
+
mci_ols_model,
|
872
|
+
market_areas_df
|
873
|
+
):
|
874
|
+
|
875
|
+
self.interaction_matrix = interaction_matrix
|
876
|
+
self.coefs = coefs
|
877
|
+
self.mci_ols_model = mci_ols_model
|
878
|
+
self.market_areas_df = market_areas_df
|
879
|
+
|
880
|
+
def get_interaction_matrix_df(self):
|
881
|
+
|
882
|
+
interaction_matrix = self.interaction_matrix
|
883
|
+
interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
|
884
|
+
|
885
|
+
return interaction_matrix_df
|
886
|
+
|
887
|
+
def get_supply_locations(self):
|
888
|
+
|
889
|
+
interaction_matrix = self.interaction_matrix
|
890
|
+
supply_locations = interaction_matrix.get_supply_locations()
|
891
|
+
|
892
|
+
return supply_locations
|
893
|
+
|
894
|
+
def get_customer_origins(self):
|
895
|
+
|
896
|
+
interaction_matrix = self.interaction_matrix
|
897
|
+
customer_origins = interaction_matrix.get_customer_origins()
|
898
|
+
|
899
|
+
return customer_origins
|
900
|
+
|
901
|
+
def get_mci_ols_model(self):
|
902
|
+
|
903
|
+
return self.mci_ols_model
|
904
|
+
|
905
|
+
def get_coefs_dict(self):
|
906
|
+
|
907
|
+
return self.coefs
|
908
|
+
|
909
|
+
def get_market_areas_df(self):
|
910
|
+
|
911
|
+
return self.market_areas_df
|
912
|
+
|
913
|
+
def summary(self):
|
914
|
+
|
915
|
+
interaction_matrix = self.interaction_matrix
|
916
|
+
coefs = self.coefs
|
917
|
+
|
918
|
+
customer_origins_metadata = interaction_matrix.get_customer_origins().get_metadata()
|
919
|
+
supply_locations_metadata = interaction_matrix.get_supply_locations().get_metadata()
|
920
|
+
|
921
|
+
print("Multiplicative Competitive Interaction Model")
|
922
|
+
print("--------------------------------------------")
|
923
|
+
print("Supply locations " + str(supply_locations_metadata["no_points"]))
|
924
|
+
print("Customer origins " + str(customer_origins_metadata["no_points"]))
|
925
|
+
print("--------------------------------------------")
|
926
|
+
print("Partial utilities")
|
927
|
+
|
928
|
+
coefficients_rows = []
|
929
|
+
for key, value in coefs.items():
|
930
|
+
coefficient_name = value["Coefficient"]
|
931
|
+
if coefficient_name == "A_j":
|
932
|
+
coefficient_name = "Attraction"
|
933
|
+
if coefficient_name == "t_ij":
|
934
|
+
coefficient_name = "Transport costs"
|
935
|
+
coefficients_rows.append({
|
936
|
+
"": coefficient_name,
|
937
|
+
"Estimate": round(value["Estimate"], 3),
|
938
|
+
"SE": round(value["SE"], 3),
|
939
|
+
"t": round(value["t"], 3),
|
940
|
+
"p": round(value["p"], 3),
|
941
|
+
"CI lower": round(value["CI_lower"], 3),
|
942
|
+
"CI upper": round(value["CI_upper"], 3)
|
943
|
+
})
|
944
|
+
coefficients_df = pd.DataFrame(coefficients_rows)
|
945
|
+
|
946
|
+
print (coefficients_df)
|
947
|
+
|
948
|
+
print("--------------------------------------------")
|
949
|
+
|
950
|
+
def utility(
|
951
|
+
self,
|
952
|
+
transformation = "LCT"
|
953
|
+
):
|
954
|
+
|
955
|
+
interaction_matrix = self.interaction_matrix
|
956
|
+
|
957
|
+
interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
|
958
|
+
|
959
|
+
if interaction_matrix_df["t_ij"].isna().all():
|
960
|
+
raise ValueError ("Transport cost variable is not defined")
|
961
|
+
if interaction_matrix_df["A_j"].isna().all():
|
962
|
+
raise ValueError ("Attraction variable is not defined")
|
963
|
+
|
964
|
+
check_vars(
|
965
|
+
df = interaction_matrix_df,
|
966
|
+
cols = ["A_j", "t_ij"]
|
967
|
+
)
|
968
|
+
|
969
|
+
customer_origins_metadata = interaction_matrix.get_customer_origins().get_metadata()
|
970
|
+
|
971
|
+
t_ij_weighting = customer_origins_metadata["weighting"][0]["param"]
|
972
|
+
|
973
|
+
if transformation == "ILCT":
|
974
|
+
mci_formula = f"{t_ij_weighting}*t_ij"
|
975
|
+
else:
|
976
|
+
mci_formula = f"t_ij**{t_ij_weighting}"
|
977
|
+
|
978
|
+
supply_locations_metadata = interaction_matrix.get_supply_locations().get_metadata()
|
979
|
+
attraction_col = supply_locations_metadata["attraction_col"]
|
980
|
+
attraction_weighting = supply_locations_metadata["weighting"]
|
981
|
+
|
982
|
+
if transformation == "ILCT":
|
983
|
+
for key, value in attraction_weighting.items():
|
984
|
+
mci_formula = mci_formula + f" + {value['param']}*{attraction_col[key]}"
|
985
|
+
else:
|
986
|
+
for key, value in attraction_weighting.items():
|
987
|
+
mci_formula = mci_formula + f" * {attraction_col[key]}**{value['param']}"
|
988
|
+
|
989
|
+
interaction_matrix_df["U_ij"] = interaction_matrix_df.apply(lambda row: eval(mci_formula, {}, row.to_dict()), axis=1)
|
990
|
+
|
991
|
+
if transformation == "ILCT":
|
992
|
+
interaction_matrix_df["U_ij"] = np.exp(interaction_matrix_df["U_ij"])
|
993
|
+
|
994
|
+
self.interaction_matrix = interaction_matrix_df
|
995
|
+
|
996
|
+
return self
|
997
|
+
|
998
|
+
def probabilities (self):
|
999
|
+
|
1000
|
+
interaction_matrix_df = self.interaction_matrix_df
|
1001
|
+
|
1002
|
+
if interaction_matrix_df["U_ij"].isna().all():
|
1003
|
+
self.utility()
|
1004
|
+
interaction_matrix_df = self.interaction_matrix_df
|
1005
|
+
|
1006
|
+
utility_i = pd.DataFrame(interaction_matrix_df.groupby("i")["U_ij"].sum())
|
1007
|
+
utility_i = utility_i.rename(columns = {"U_ij": "U_i"})
|
1008
|
+
|
1009
|
+
interaction_matrix_df = interaction_matrix_df.merge(
|
1010
|
+
utility_i,
|
1011
|
+
left_on="i",
|
1012
|
+
right_on="i",
|
1013
|
+
how="inner"
|
1014
|
+
)
|
1015
|
+
|
1016
|
+
interaction_matrix_df["p_ij"] = (interaction_matrix_df["U_ij"]) / (interaction_matrix_df["U_i"])
|
1017
|
+
|
1018
|
+
interaction_matrix_df = interaction_matrix_df.drop(columns=["U_i"])
|
1019
|
+
|
1020
|
+
self.interaction_matrix_df = interaction_matrix_df
|
1021
|
+
|
1022
|
+
return self
|
1023
|
+
|
1024
|
+
def flows (self):
|
1025
|
+
|
1026
|
+
interaction_matrix_df = self.interaction_matrix_df
|
1027
|
+
|
1028
|
+
if interaction_matrix_df["C_i"].isna().all():
|
1029
|
+
raise ValueError ("Market size column in customer origins not defined. Use CustomerOrigins.define_marketsize()")
|
1030
|
+
|
1031
|
+
check_vars(
|
1032
|
+
df = interaction_matrix_df,
|
1033
|
+
cols = ["C_i"]
|
1034
|
+
)
|
1035
|
+
|
1036
|
+
if interaction_matrix_df["p_ij"].isna().all():
|
1037
|
+
self.probabilities()
|
1038
|
+
interaction_matrix_df = self.interaction_matrix_df
|
1039
|
+
|
1040
|
+
interaction_matrix_df["E_ij"] = interaction_matrix_df["p_ij"] * interaction_matrix_df["C_i"]
|
1041
|
+
|
1042
|
+
self.interaction_matrix_df = interaction_matrix_df
|
1043
|
+
|
1044
|
+
return self
|
1045
|
+
|
1046
|
+
def marketareas (self):
|
1047
|
+
|
1048
|
+
interaction_matrix = self.interaction_matrix
|
1049
|
+
interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
|
1050
|
+
|
1051
|
+
check_vars(
|
1052
|
+
df = interaction_matrix_df,
|
1053
|
+
cols = ["E_ij"]
|
1054
|
+
)
|
1055
|
+
|
1056
|
+
market_areas_df = pd.DataFrame(interaction_matrix_df.groupby("j")["E_ij"].sum())
|
1057
|
+
market_areas_df = market_areas_df.reset_index(drop=False)
|
1058
|
+
market_areas_df = market_areas_df.rename(columns={"E_ij": "T_j"})
|
1059
|
+
|
1060
|
+
mci_model = MCIModel(
|
1061
|
+
interaction_matrix = interaction_matrix,
|
1062
|
+
coefs = self.get_coefs_dict(),
|
1063
|
+
mci_ols_model = self.get_mci_ols_model(),
|
1064
|
+
market_areas_df = market_areas_df
|
1065
|
+
)
|
1066
|
+
|
1067
|
+
return mci_model
|
1068
|
+
|
653
1069
|
def load_geodata (
|
654
|
-
|
1070
|
+
data,
|
655
1071
|
location_type: str,
|
656
1072
|
unique_id: str,
|
657
1073
|
x_col: str = None,
|
@@ -665,32 +1081,36 @@ def load_geodata (
|
|
665
1081
|
|
666
1082
|
if location_type is None or (location_type != "origins" and location_type != "destinations"):
|
667
1083
|
raise ValueError ("location_type must be either 'origins' or 'destinations'")
|
668
|
-
|
669
|
-
if data_type not in ["shp", "csv", "xlsx"]:
|
670
|
-
raise ValueError ("data_type must be 'shp', 'csv' or 'xlsx'")
|
671
1084
|
|
672
|
-
if
|
673
|
-
geodata_gpd_original =
|
1085
|
+
if isinstance(data, gp.GeoDataFrame):
|
1086
|
+
geodata_gpd_original = data
|
674
1087
|
crs_input = geodata_gpd_original.crs
|
1088
|
+
elif isinstance(data, pd.DataFrame):
|
1089
|
+
geodata_tab = data
|
1090
|
+
elif isinstance(data, str):
|
1091
|
+
if data_type == "shp":
|
1092
|
+
geodata_gpd_original = gp.read_file(data)
|
1093
|
+
crs_input = geodata_gpd_original.crs
|
1094
|
+
elif data_type == "csv" or data_type == "xlsx":
|
1095
|
+
if x_col is None:
|
1096
|
+
raise ValueError ("Missing value for X coordinate column")
|
1097
|
+
if y_col is None:
|
1098
|
+
raise ValueError ("Missing value for Y coordinate column")
|
1099
|
+
elif data_type == "csv":
|
1100
|
+
geodata_tab = pd.read_csv(
|
1101
|
+
data,
|
1102
|
+
sep = csv_sep,
|
1103
|
+
decimal = csv_decimal,
|
1104
|
+
encoding = csv_encoding
|
1105
|
+
)
|
1106
|
+
elif data_type == "xlsx":
|
1107
|
+
geodata_tab = pd.read_excel(data)
|
1108
|
+
else:
|
1109
|
+
raise TypeError("Unknown type of data")
|
1110
|
+
else:
|
1111
|
+
raise TypeError("data must be pandas.DataFrame, geopandas.GeoDataFrame or file (.csv, .xlsx, .shp)")
|
675
1112
|
|
676
|
-
if data_type == "csv" or data_type == "xlsx":
|
677
|
-
if x_col is None:
|
678
|
-
raise ValueError ("Missing value for X coordinate column")
|
679
|
-
if y_col is None:
|
680
|
-
raise ValueError ("Missing value for Y coordinate column")
|
681
|
-
|
682
|
-
if data_type == "csv":
|
683
|
-
geodata_tab = pd.read_csv(
|
684
|
-
file,
|
685
|
-
sep = csv_sep,
|
686
|
-
decimal = csv_decimal,
|
687
|
-
encoding = csv_encoding
|
688
|
-
)
|
689
|
-
|
690
|
-
if data_type == "xlsx":
|
691
|
-
geodata_tab = pd.read_excel(file)
|
692
|
-
|
693
|
-
if data_type == "csv" or data_type == "xlsx":
|
1113
|
+
if data_type == "csv" or data_type == "xlsx" or (isinstance(data, pd.DataFrame) and not isinstance(data, gp.GeoDataFrame)):
|
694
1114
|
geodata_gpd_original = gp.GeoDataFrame(
|
695
1115
|
geodata_tab,
|
696
1116
|
geometry = gp.points_from_xy(
|
@@ -699,8 +1119,9 @@ def load_geodata (
|
|
699
1119
|
),
|
700
1120
|
crs = crs_input
|
701
1121
|
)
|
702
|
-
|
1122
|
+
|
703
1123
|
crs_output = "EPSG:4326"
|
1124
|
+
|
704
1125
|
geodata_gpd = geodata_gpd_original.to_crs(crs_output)
|
705
1126
|
geodata_gpd = geodata_gpd[[unique_id, "geometry"]]
|
706
1127
|
|
@@ -724,13 +1145,15 @@ def load_geodata (
|
|
724
1145
|
geodata_object = CustomerOrigins(
|
725
1146
|
geodata_gpd,
|
726
1147
|
geodata_gpd_original,
|
727
|
-
metadata
|
1148
|
+
metadata,
|
1149
|
+
None
|
728
1150
|
)
|
729
1151
|
elif location_type == "destinations":
|
730
1152
|
geodata_object = SupplyLocations(
|
731
1153
|
geodata_gpd,
|
732
1154
|
geodata_gpd_original,
|
733
|
-
metadata
|
1155
|
+
metadata,
|
1156
|
+
None
|
734
1157
|
)
|
735
1158
|
|
736
1159
|
return geodata_object
|
@@ -807,33 +1230,239 @@ def create_interaction_matrix(
|
|
807
1230
|
|
808
1231
|
return interaction_matrix
|
809
1232
|
|
810
|
-
def
|
811
|
-
|
812
|
-
|
813
|
-
|
1233
|
+
def load_interaction_matrix(
|
1234
|
+
data,
|
1235
|
+
customer_origins_col: str,
|
1236
|
+
supply_locations_col: str,
|
1237
|
+
attraction_col: list,
|
1238
|
+
transport_costs_col: str,
|
1239
|
+
probabilities_col: str = None,
|
1240
|
+
market_size_col: str = None,
|
1241
|
+
customer_origins_coords_col = None,
|
1242
|
+
supply_locations_coords_col = None,
|
1243
|
+
data_type = "csv",
|
1244
|
+
csv_sep = ";",
|
1245
|
+
csv_decimal = ",",
|
1246
|
+
csv_encoding="unicode_escape",
|
1247
|
+
crs_input = "EPSG:4326",
|
1248
|
+
crs_output = "EPSG:4326"
|
1249
|
+
):
|
1250
|
+
|
1251
|
+
if isinstance(data, pd.DataFrame):
|
1252
|
+
interaction_matrix_df = data
|
1253
|
+
elif isinstance(data, str):
|
1254
|
+
if data_type not in ["csv", "xlsx"]:
|
1255
|
+
raise ValueError ("data_type must be 'csv' or 'xlsx'")
|
1256
|
+
if data_type == "csv":
|
1257
|
+
interaction_matrix_df = pd.read_csv(
|
1258
|
+
data,
|
1259
|
+
sep = csv_sep,
|
1260
|
+
decimal = csv_decimal,
|
1261
|
+
encoding = csv_encoding
|
1262
|
+
)
|
1263
|
+
elif data_type == "xlsx":
|
1264
|
+
interaction_matrix_df = pd.read_excel(data)
|
1265
|
+
else:
|
1266
|
+
raise TypeError("Unknown type of data")
|
1267
|
+
else:
|
1268
|
+
raise TypeError("data must be pandas.DataFrame or file (.csv, .xlsx)")
|
1269
|
+
|
1270
|
+
if customer_origins_col not in interaction_matrix_df.columns:
|
1271
|
+
raise KeyError ("Column " + customer_origins_col + " not in data")
|
1272
|
+
if supply_locations_col not in interaction_matrix_df.columns:
|
1273
|
+
raise KeyError ("Column " + supply_locations_col + " not in data")
|
1274
|
+
|
1275
|
+
cols_check = attraction_col + [transport_costs_col]
|
1276
|
+
if probabilities_col is not None:
|
1277
|
+
cols_check = cols_check + [probabilities_col]
|
1278
|
+
if market_size_col is not None:
|
1279
|
+
cols_check = cols_check + [market_size_col]
|
814
1280
|
|
815
|
-
|
816
|
-
|
817
|
-
|
1281
|
+
check_vars(
|
1282
|
+
interaction_matrix_df,
|
1283
|
+
cols = cols_check
|
1284
|
+
)
|
1285
|
+
|
1286
|
+
if customer_origins_coords_col is not None:
|
1287
|
+
|
1288
|
+
if isinstance(customer_origins_coords_col, str):
|
1289
|
+
|
1290
|
+
if customer_origins_coords_col not in interaction_matrix_df.columns:
|
1291
|
+
raise KeyError ("Column " + customer_origins_coords_col + " not in data.")
|
1292
|
+
|
1293
|
+
customer_origins_geodata_tab = interaction_matrix_df[[customer_origins_col, customer_origins_coords_col]]
|
1294
|
+
customer_origins_geodata_tab = customer_origins_geodata_tab.drop_duplicates()
|
1295
|
+
customer_origins_geodata_tab["geometry"] = customer_origins_geodata_tab[customer_origins_coords_col].apply(lambda x: wkt.loads(x))
|
1296
|
+
customer_origins_geodata_gpd = gp.GeoDataFrame(
|
1297
|
+
customer_origins_geodata_tab,
|
1298
|
+
geometry="geometry",
|
1299
|
+
crs = crs_input)
|
1300
|
+
customer_origins_geodata_gpd = customer_origins_geodata_gpd.drop(
|
1301
|
+
columns = customer_origins_coords_col
|
1302
|
+
)
|
1303
|
+
|
1304
|
+
elif isinstance(customer_origins_coords_col, list):
|
1305
|
+
|
1306
|
+
if len(customer_origins_coords_col) != 2:
|
1307
|
+
raise ValueError ("Column " + customer_origins_coords_col + " must be a geometry column OR TWO columns with X and Y")
|
1308
|
+
|
1309
|
+
check_vars (
|
1310
|
+
df = interaction_matrix_df,
|
1311
|
+
cols = customer_origins_coords_col
|
1312
|
+
)
|
1313
|
+
|
1314
|
+
customer_origins_geodata_tab = interaction_matrix_df[[customer_origins_col, customer_origins_coords_col[0], customer_origins_coords_col[1]]]
|
1315
|
+
customer_origins_geodata_tab = customer_origins_geodata_tab.drop_duplicates()
|
1316
|
+
customer_origins_geodata_tab["geometry"] = customer_origins_geodata_tab.apply(lambda row: Point(row[customer_origins_coords_col[0]], row[customer_origins_coords_col[1]]), axis=1)
|
1317
|
+
customer_origins_geodata_gpd = gp.GeoDataFrame(customer_origins_geodata_tab, geometry="geometry")
|
1318
|
+
|
1319
|
+
customer_origins_geodata_gpd.set_crs(crs_output, inplace=True)
|
1320
|
+
|
1321
|
+
else:
|
1322
|
+
|
1323
|
+
customer_origins_geodata_gpd = interaction_matrix_df[customer_origins_col]
|
1324
|
+
customer_origins_geodata_gpd = customer_origins_geodata_gpd.drop_duplicates()
|
1325
|
+
|
1326
|
+
if market_size_col is not None:
|
1327
|
+
customer_origins_cols = [customer_origins_col] + [market_size_col]
|
1328
|
+
else:
|
1329
|
+
customer_origins_cols = [customer_origins_col]
|
1330
|
+
customer_origins_geodata_original_tab = customer_origins_geodata_tab = interaction_matrix_df[customer_origins_cols]
|
1331
|
+
|
1332
|
+
customer_origins_metadata = {
|
1333
|
+
"location_type": "origins",
|
1334
|
+
"unique_id": customer_origins_col,
|
1335
|
+
"attraction_col": [None],
|
1336
|
+
"marketsize_col": market_size_col,
|
1337
|
+
"weighting": {
|
1338
|
+
0: {
|
1339
|
+
"func": None,
|
1340
|
+
"param": None
|
1341
|
+
}
|
1342
|
+
},
|
1343
|
+
"crs_input": crs_input,
|
1344
|
+
"crs_output": crs_output,
|
1345
|
+
"no_points": len(customer_origins_geodata_gpd)
|
1346
|
+
}
|
1347
|
+
|
1348
|
+
customer_origins = CustomerOrigins(
|
1349
|
+
geodata_gpd = customer_origins_geodata_gpd,
|
1350
|
+
geodata_gpd_original = customer_origins_geodata_original_tab,
|
1351
|
+
metadata = customer_origins_metadata,
|
1352
|
+
isochrones_gdf = None
|
1353
|
+
)
|
1354
|
+
|
1355
|
+
if supply_locations_coords_col is not None:
|
1356
|
+
|
1357
|
+
if isinstance(supply_locations_coords_col, str):
|
1358
|
+
|
1359
|
+
if supply_locations_coords_col not in interaction_matrix_df.columns:
|
1360
|
+
raise KeyError ("Column " + supply_locations_coords_col + " not in data.")
|
1361
|
+
|
1362
|
+
supply_locations_geodata_tab = interaction_matrix_df[[supply_locations_col, supply_locations_coords_col]]
|
1363
|
+
supply_locations_geodata_tab = supply_locations_geodata_tab.drop_duplicates()
|
1364
|
+
supply_locations_geodata_tab["geometry"] = supply_locations_geodata_tab[supply_locations_coords_col].apply(lambda x: wkt.loads(x))
|
1365
|
+
supply_locations_geodata_gpd = gp.GeoDataFrame(
|
1366
|
+
supply_locations_geodata_tab,
|
1367
|
+
geometry="geometry",
|
1368
|
+
crs = crs_input)
|
1369
|
+
supply_locations_geodata_gpd = supply_locations_geodata_gpd.drop(
|
1370
|
+
columns = supply_locations_coords_col
|
1371
|
+
)
|
1372
|
+
|
1373
|
+
if isinstance(supply_locations_coords_col, list):
|
1374
|
+
|
1375
|
+
if len(supply_locations_coords_col) != 2:
|
1376
|
+
raise ValueError ("Column " + supply_locations_coords_col + " must be a geometry column OR TWO columns with X and Y")
|
1377
|
+
|
1378
|
+
check_vars (
|
1379
|
+
df = interaction_matrix_df,
|
1380
|
+
cols = supply_locations_coords_col
|
1381
|
+
)
|
1382
|
+
|
1383
|
+
supply_locations_geodata_tab = interaction_matrix_df[[supply_locations_col, supply_locations_coords_col[0], supply_locations_coords_col[1]]]
|
1384
|
+
supply_locations_geodata_tab = supply_locations_geodata_tab.drop_duplicates()
|
1385
|
+
supply_locations_geodata_tab["geometry"] = supply_locations_geodata_tab.apply(lambda row: Point(row[supply_locations_coords_col[0]], row[supply_locations_coords_col[1]]), axis=1)
|
1386
|
+
supply_locations_geodata_gpd = gp.GeoDataFrame(supply_locations_geodata_tab, geometry="geometry")
|
1387
|
+
|
1388
|
+
supply_locations_geodata_gpd.set_crs(crs_output, inplace=True)
|
1389
|
+
|
1390
|
+
else:
|
1391
|
+
|
1392
|
+
supply_locations_geodata_gpd = interaction_matrix_df[supply_locations_col]
|
1393
|
+
supply_locations_geodata_gpd = supply_locations_geodata_gpd.drop_duplicates()
|
1394
|
+
|
1395
|
+
supply_locations_cols = [supply_locations_col] + attraction_col
|
1396
|
+
supply_locations_geodata_original_tab = supply_locations_geodata_tab = interaction_matrix_df[supply_locations_cols]
|
1397
|
+
|
1398
|
+
supply_locations_metadata = {
|
1399
|
+
"location_type": "destinations",
|
1400
|
+
"unique_id": supply_locations_col,
|
1401
|
+
"attraction_col": attraction_col,
|
1402
|
+
"marketsize_col": None,
|
1403
|
+
"weighting": {
|
1404
|
+
0: {
|
1405
|
+
"func": None,
|
1406
|
+
"param": None
|
1407
|
+
}
|
1408
|
+
},
|
1409
|
+
"crs_input": crs_input,
|
1410
|
+
"crs_output": crs_output,
|
1411
|
+
"no_points": len(supply_locations_geodata_gpd)
|
1412
|
+
}
|
1413
|
+
|
1414
|
+
supply_locations = SupplyLocations(
|
1415
|
+
geodata_gpd = supply_locations_geodata_gpd,
|
1416
|
+
geodata_gpd_original = supply_locations_geodata_original_tab,
|
1417
|
+
metadata = supply_locations_metadata,
|
1418
|
+
isochrones_gdf = None
|
1419
|
+
)
|
818
1420
|
|
819
|
-
|
820
|
-
|
821
|
-
|
1421
|
+
interaction_matrix_df = interaction_matrix_df.rename(
|
1422
|
+
columns = {
|
1423
|
+
customer_origins_col: "i",
|
1424
|
+
supply_locations_col: "j",
|
1425
|
+
attraction_col[0]: "A_j",
|
1426
|
+
transport_costs_col: "t_ij"
|
1427
|
+
}
|
1428
|
+
)
|
1429
|
+
|
1430
|
+
if probabilities_col is not None:
|
1431
|
+
interaction_matrix_df = interaction_matrix_df.rename(
|
1432
|
+
columns = {
|
1433
|
+
probabilities_col: "p_ij"
|
1434
|
+
}
|
1435
|
+
)
|
1436
|
+
|
1437
|
+
if market_size_col is not None:
|
1438
|
+
interaction_matrix_df = interaction_matrix_df.rename(
|
1439
|
+
columns = {
|
1440
|
+
market_size_col: "C_i"
|
1441
|
+
}
|
1442
|
+
)
|
1443
|
+
|
1444
|
+
interaction_matrix = InteractionMatrix(
|
1445
|
+
interaction_matrix_df=interaction_matrix_df,
|
1446
|
+
customer_origins=customer_origins,
|
1447
|
+
supply_locations=supply_locations
|
1448
|
+
)
|
822
1449
|
|
823
|
-
|
824
|
-
if (df[col] <= 0).any():
|
825
|
-
raise ValueError(f"Column '{col}' includes values <= 0. All values must be numeric and positive.")
|
1450
|
+
return interaction_matrix
|
826
1451
|
|
827
|
-
def
|
1452
|
+
def log_centering_transformation(
|
828
1453
|
df: pd.DataFrame,
|
829
1454
|
ref_col: str,
|
830
|
-
cols: list
|
1455
|
+
cols: list,
|
1456
|
+
suffix: str = "__LCT"
|
831
1457
|
):
|
832
1458
|
|
833
1459
|
check_vars(
|
834
1460
|
df = df,
|
835
|
-
cols = cols
|
1461
|
+
cols = cols
|
836
1462
|
)
|
1463
|
+
|
1464
|
+
if ref_col not in df.columns:
|
1465
|
+
raise KeyError(f"Column '{ref_col}' not in dataframe.")
|
837
1466
|
|
838
1467
|
def lct (x):
|
839
1468
|
|
@@ -843,9 +1472,115 @@ def mci_transformation(
|
|
843
1472
|
return x_lct
|
844
1473
|
|
845
1474
|
for var in cols:
|
1475
|
+
|
1476
|
+
unique_values = df[var].unique()
|
1477
|
+
if set(unique_values).issubset({0, 1}):
|
1478
|
+
df[var+suffix] = df[var]
|
1479
|
+
print ("Column " + str(var) + " is a dummy variable and requires/allows no log-centering transformation")
|
1480
|
+
continue
|
846
1481
|
|
847
1482
|
var_t = df.groupby(ref_col)[var].apply(lct)
|
848
1483
|
var_t = var_t.reset_index()
|
849
|
-
df[var+
|
1484
|
+
df[var+suffix] = var_t[var]
|
1485
|
+
|
1486
|
+
return df
|
1487
|
+
|
1488
|
+
def get_isochrones(
|
1489
|
+
geodata_gpd: gp.GeoDataFrame,
|
1490
|
+
unique_id_col: str,
|
1491
|
+
segments_minutes: list = [5, 10, 15],
|
1492
|
+
range_type: str = "time",
|
1493
|
+
intersections: str = "true",
|
1494
|
+
profile: str = "driving-car",
|
1495
|
+
donut: bool = True,
|
1496
|
+
ors_server: str = "https://api.openrouteservice.org/v2/",
|
1497
|
+
ors_auth: str = None,
|
1498
|
+
timeout = 10,
|
1499
|
+
delay = 1,
|
1500
|
+
save_output: bool = True,
|
1501
|
+
output_filepath: str = "isochrones.shp",
|
1502
|
+
output_crs: str = "EPSG:4326"
|
1503
|
+
):
|
1504
|
+
|
1505
|
+
coords = [(point.x, point.y) for point in geodata_gpd.geometry]
|
1506
|
+
|
1507
|
+
unique_id_values = geodata_gpd[unique_id_col].values
|
1508
|
+
|
1509
|
+
ors_client = Client(
|
1510
|
+
server = ors_server,
|
1511
|
+
auth = ors_auth
|
1512
|
+
)
|
1513
|
+
|
1514
|
+
isochrones_gdf = gp.GeoDataFrame(columns=[unique_id_col, "geometry"])
|
1515
|
+
|
1516
|
+
segments = [segment*60 for segment in segments_minutes]
|
1517
|
+
|
1518
|
+
i = 0
|
1519
|
+
|
1520
|
+
for x, y in coords:
|
1521
|
+
|
1522
|
+
isochrone_output = ors_client.isochrone(
|
1523
|
+
locations = [[x, y]],
|
1524
|
+
segments = segments,
|
1525
|
+
range_type = range_type,
|
1526
|
+
intersections = intersections,
|
1527
|
+
profile = profile,
|
1528
|
+
timeout = timeout,
|
1529
|
+
save_output = False,
|
1530
|
+
output_crs = output_crs
|
1531
|
+
)
|
1532
|
+
|
1533
|
+
if isochrone_output.status_code != 200:
|
1534
|
+
continue
|
1535
|
+
|
1536
|
+
isochrone_gdf = isochrone_output.get_isochrones_gdf()
|
1537
|
+
|
1538
|
+
if donut:
|
1539
|
+
isochrone_gdf = overlay_difference(
|
1540
|
+
polygon_gdf = isochrone_gdf,
|
1541
|
+
sort_col = "segment"
|
1542
|
+
)
|
1543
|
+
|
1544
|
+
time.sleep(delay)
|
1545
|
+
|
1546
|
+
isochrone_gdf[unique_id_col] = unique_id_values[i]
|
1547
|
+
|
1548
|
+
isochrones_gdf = pd.concat(
|
1549
|
+
[
|
1550
|
+
isochrones_gdf,
|
1551
|
+
isochrone_gdf
|
1552
|
+
],
|
1553
|
+
ignore_index=True
|
1554
|
+
)
|
1555
|
+
|
1556
|
+
i = i+1
|
1557
|
+
|
1558
|
+
isochrones_gdf.set_crs(
|
1559
|
+
output_crs,
|
1560
|
+
allow_override=True,
|
1561
|
+
inplace=True
|
1562
|
+
)
|
1563
|
+
|
1564
|
+
if save_output:
|
1565
|
+
|
1566
|
+
isochrones_gdf.to_file(filename = output_filepath)
|
1567
|
+
|
1568
|
+
return isochrones_gdf
|
1569
|
+
|
1570
|
+
|
1571
|
+
def check_vars(
|
1572
|
+
df: pd.DataFrame,
|
1573
|
+
cols: list
|
1574
|
+
):
|
850
1575
|
|
851
|
-
|
1576
|
+
for col in cols:
|
1577
|
+
if col not in df.columns:
|
1578
|
+
raise KeyError(f"Column '{col}' not in dataframe.")
|
1579
|
+
|
1580
|
+
for col in cols:
|
1581
|
+
if not pd.api.types.is_numeric_dtype(df[col]):
|
1582
|
+
raise ValueError(f"Column '{col}' is not numeric. All stated columns must be numeric.")
|
1583
|
+
|
1584
|
+
for col in cols:
|
1585
|
+
if (df[col] <= 0).any():
|
1586
|
+
raise ValueError(f"Column '{col}' includes values <= 0. All values must be numeric and positive.")
|