huff 1.3.5__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- huff/gistools.py +8 -3
- huff/models.py +622 -75
- huff/ors.py +2 -2
- huff/osm.py +59 -42
- huff/tests/data/Haslach_new_supermarket.cpg +1 -0
- huff/tests/data/Haslach_new_supermarket.dbf +0 -0
- huff/tests/data/Haslach_new_supermarket.prj +1 -0
- huff/tests/data/Haslach_new_supermarket.qmd +26 -0
- huff/tests/data/Haslach_new_supermarket.shp +0 -0
- huff/tests/data/Haslach_new_supermarket.shx +0 -0
- huff/tests/tests_huff.py +84 -32
- {huff-1.3.5.dist-info → huff-1.4.1.dist-info}/METADATA +9 -3
- {huff-1.3.5.dist-info → huff-1.4.1.dist-info}/RECORD +15 -9
- {huff-1.3.5.dist-info → huff-1.4.1.dist-info}/WHEEL +0 -0
- {huff-1.3.5.dist-info → huff-1.4.1.dist-info}/top_level.txt +0 -0
huff/models.py
CHANGED
@@ -4,8 +4,8 @@
|
|
4
4
|
# Author: Thomas Wieland
|
5
5
|
# ORCID: 0000-0001-5168-9846
|
6
6
|
# mail: geowieland@googlemail.com
|
7
|
-
# Version: 1.
|
8
|
-
# Last update: 2025-06-
|
7
|
+
# Version: 1.4.1
|
8
|
+
# Last update: 2025-06-16 17:43
|
9
9
|
# Copyright (c) 2025 Thomas Wieland
|
10
10
|
#-----------------------------------------------------------------------
|
11
11
|
|
@@ -17,8 +17,10 @@ from math import sqrt
|
|
17
17
|
import time
|
18
18
|
from pandas.api.types import is_numeric_dtype
|
19
19
|
from statsmodels.formula.api import ols
|
20
|
+
from scipy.optimize import minimize, Bounds, LinearConstraint, NonlinearConstraint
|
20
21
|
from shapely.geometry import Point
|
21
22
|
from shapely import wkt
|
23
|
+
import copy
|
22
24
|
from huff.ors import Client, TimeDistanceMatrix, Isochrone
|
23
25
|
from huff.gistools import overlay_difference, distance_matrix, buffers
|
24
26
|
|
@@ -73,10 +75,12 @@ class CustomerOrigins:
|
|
73
75
|
print("Market size column " + metadata["marketsize_col"])
|
74
76
|
|
75
77
|
if metadata["weighting"][0]["func"] is None and metadata["weighting"][0]["param"] is None:
|
76
|
-
print("Transport cost weighting
|
77
|
-
|
78
|
-
print("Transport cost weighting " + metadata["weighting"][0]["
|
79
|
-
|
78
|
+
print("Transport cost weighting not defined")
|
79
|
+
elif metadata["weighting"][0]["func"] in ["power", "exponential"]:
|
80
|
+
print("Transport cost weighting " + str(round(metadata["weighting"][0]["param"],3)) + " (" + metadata["weighting"][0]["func"] + ")")
|
81
|
+
elif metadata["weighting"][0]["func"] == "logistic":
|
82
|
+
print("Transport cost weighting " + str(round(metadata["weighting"][0]["param"][0],3)) + ", " + str(round(metadata["weighting"][0]["param"][1],3)) + " (" + metadata["weighting"][0]["func"] + ")")
|
83
|
+
|
80
84
|
print("Unique ID column " + metadata["unique_id"])
|
81
85
|
print("Input CRS " + str(metadata["crs_input"]))
|
82
86
|
|
@@ -117,11 +121,24 @@ class CustomerOrigins:
|
|
117
121
|
|
118
122
|
metadata = self.metadata
|
119
123
|
|
124
|
+
if func not in ["power", "exponential", "logistic"]:
|
125
|
+
raise ValueError("Parameter 'func' must be 'power', 'exponential' or 'logistic'")
|
126
|
+
|
127
|
+
if isinstance(param_lambda, list) and func != "logistic":
|
128
|
+
raise ValueError("Function type "+ func + " requires one single parameter value")
|
129
|
+
|
130
|
+
if isinstance(param_lambda, (int, float)) and func == "logistic":
|
131
|
+
raise ValueError("Function type "+ func + " requires two parameters in a list")
|
132
|
+
|
120
133
|
metadata["weighting"][0]["func"] = func
|
121
|
-
metadata["weighting"][0]["param"] = param_lambda
|
122
134
|
|
123
|
-
|
135
|
+
if isinstance(param_lambda, list):
|
136
|
+
metadata["weighting"][0]["param"] = [float(param_lambda[0]), float(param_lambda[1])]
|
137
|
+
else:
|
138
|
+
metadata["weighting"][0]["param"] = float(param_lambda)
|
124
139
|
|
140
|
+
self.metadata = metadata
|
141
|
+
|
125
142
|
return self
|
126
143
|
|
127
144
|
def isochrones(
|
@@ -190,7 +207,6 @@ class CustomerOrigins:
|
|
190
207
|
|
191
208
|
return self
|
192
209
|
|
193
|
-
|
194
210
|
class SupplyLocations:
|
195
211
|
|
196
212
|
def __init__(
|
@@ -243,7 +259,7 @@ class SupplyLocations:
|
|
243
259
|
if metadata["weighting"][0]["func"] is None and metadata["weighting"][0]["param"] is None:
|
244
260
|
print("Attraction weighting not defined")
|
245
261
|
else:
|
246
|
-
print("Attraction weighting " + metadata["weighting"][0]["func"] + " with gamma = " + str(metadata["weighting"][0]["param"]))
|
262
|
+
print("Attraction weighting " + metadata["weighting"][0]["func"] + " with gamma = " + str(round(metadata["weighting"][0]["param"],3)))
|
247
263
|
|
248
264
|
print("Unique ID column " + metadata["unique_id"])
|
249
265
|
print("Input CRS " + str(metadata["crs_input"]))
|
@@ -284,7 +300,7 @@ class SupplyLocations:
|
|
284
300
|
raise ValueError ("Attraction column is not yet defined. Use SupplyLocations.define_attraction()")
|
285
301
|
|
286
302
|
metadata["weighting"][0]["func"] = func
|
287
|
-
metadata["weighting"][0]["param"] = param_gamma
|
303
|
+
metadata["weighting"][0]["param"] = float(param_gamma)
|
288
304
|
self.metadata = metadata
|
289
305
|
|
290
306
|
return self
|
@@ -325,7 +341,11 @@ class SupplyLocations:
|
|
325
341
|
metadata = self.get_metadata()
|
326
342
|
|
327
343
|
new_destinations_gpd_original = new_destinations.get_geodata_gpd_original()
|
344
|
+
new_destinations_gpd_original["j_update"] = 1
|
345
|
+
|
328
346
|
new_destinations_gpd = new_destinations.get_geodata_gpd()
|
347
|
+
new_destinations_gpd["j_update"] = 1
|
348
|
+
|
329
349
|
new_destinations_metadata = new_destinations.get_metadata()
|
330
350
|
|
331
351
|
if list(new_destinations_gpd_original.columns) != list(geodata_gpd_original.columns):
|
@@ -333,14 +353,20 @@ class SupplyLocations:
|
|
333
353
|
if list(new_destinations_gpd.columns) != list(geodata_gpd.columns):
|
334
354
|
raise KeyError("Supply locations and new destinations data have different column names.")
|
335
355
|
|
336
|
-
geodata_gpd_original =
|
337
|
-
|
356
|
+
geodata_gpd_original = pd.concat(
|
357
|
+
[
|
358
|
+
geodata_gpd_original,
|
359
|
+
new_destinations_gpd_original
|
360
|
+
],
|
338
361
|
ignore_index=True
|
339
362
|
)
|
340
|
-
|
341
|
-
geodata_gpd =
|
342
|
-
|
343
|
-
|
363
|
+
|
364
|
+
geodata_gpd = pd.concat(
|
365
|
+
[
|
366
|
+
geodata_gpd,
|
367
|
+
new_destinations_gpd
|
368
|
+
],
|
369
|
+
ignore_index=True
|
344
370
|
)
|
345
371
|
|
346
372
|
metadata["no_points"] = metadata["no_points"]+new_destinations_metadata["no_points"]
|
@@ -423,13 +449,15 @@ class InteractionMatrix:
|
|
423
449
|
self,
|
424
450
|
interaction_matrix_df,
|
425
451
|
customer_origins,
|
426
|
-
supply_locations
|
452
|
+
supply_locations,
|
453
|
+
metadata
|
427
454
|
):
|
428
455
|
|
429
456
|
self.interaction_matrix_df = interaction_matrix_df
|
430
457
|
self.customer_origins = customer_origins
|
431
458
|
self.supply_locations = supply_locations
|
432
|
-
|
459
|
+
self.metadata = metadata
|
460
|
+
|
433
461
|
def get_interaction_matrix_df(self):
|
434
462
|
return self.interaction_matrix_df
|
435
463
|
|
@@ -439,35 +467,55 @@ class InteractionMatrix:
|
|
439
467
|
def get_supply_locations(self):
|
440
468
|
return self.supply_locations
|
441
469
|
|
470
|
+
def get_metadata(self):
|
471
|
+
return self.metadata
|
472
|
+
|
442
473
|
def summary(self):
|
443
474
|
|
444
475
|
customer_origins_metadata = self.get_customer_origins().get_metadata()
|
445
476
|
supply_locations_metadata = self.get_supply_locations().get_metadata()
|
477
|
+
interaction_matrix_metadata = self.get_metadata()
|
446
478
|
|
447
479
|
print("Interaction Matrix")
|
448
480
|
print("----------------------------------")
|
449
481
|
|
450
|
-
print("Supply locations
|
482
|
+
print("Supply locations " + str(supply_locations_metadata["no_points"]))
|
451
483
|
if supply_locations_metadata["attraction_col"][0] is None:
|
452
|
-
print("Attraction column
|
484
|
+
print("Attraction column not defined")
|
453
485
|
else:
|
454
|
-
print("Attraction column
|
455
|
-
print("Customer origins
|
486
|
+
print("Attraction column " + supply_locations_metadata["attraction_col"][0])
|
487
|
+
print("Customer origins " + str(customer_origins_metadata["no_points"]))
|
456
488
|
if customer_origins_metadata["marketsize_col"] is None:
|
457
489
|
print("Market size column not defined")
|
458
490
|
else:
|
459
|
-
print("Market size column
|
491
|
+
print("Market size column " + customer_origins_metadata["marketsize_col"])
|
492
|
+
|
493
|
+
if interaction_matrix_metadata != {}:
|
494
|
+
if "transport_costs" in interaction_matrix_metadata:
|
495
|
+
print("----------------------------------")
|
496
|
+
if interaction_matrix_metadata["transport_costs"]["network"]:
|
497
|
+
print("Transport cost type Time")
|
498
|
+
print("Transport cost unit " + interaction_matrix_metadata["transport_costs"]["time_unit"])
|
499
|
+
else:
|
500
|
+
print("Transport cost type Distance")
|
501
|
+
print("Transport cost unit " + interaction_matrix_metadata["transport_costs"]["distance_unit"])
|
502
|
+
|
460
503
|
print("----------------------------------")
|
461
504
|
print("Partial utilities")
|
462
|
-
print("
|
505
|
+
print(" Weights")
|
506
|
+
|
463
507
|
if supply_locations_metadata["weighting"][0]["func"] is None and supply_locations_metadata["weighting"][0]["param"] is None:
|
464
|
-
print("Attraction
|
508
|
+
print("Attraction not defined")
|
465
509
|
else:
|
466
|
-
print("Attraction
|
510
|
+
print("Attraction " + str(round(supply_locations_metadata["weighting"][0]["param"],3)) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
|
511
|
+
|
467
512
|
if customer_origins_metadata["weighting"][0]["func"] is None and customer_origins_metadata["weighting"][0]["param"] is None:
|
468
|
-
print("Transport costs
|
469
|
-
|
470
|
-
print("Transport costs " + str(customer_origins_metadata["weighting"][0]["param"]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
513
|
+
print("Transport costs not defined")
|
514
|
+
elif customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
|
515
|
+
print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
516
|
+
elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
517
|
+
print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"][0],3)) + ", " + str(round(customer_origins_metadata["weighting"][0]["param"][1],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
518
|
+
|
471
519
|
print("----------------------------------")
|
472
520
|
|
473
521
|
def transport_costs(
|
@@ -487,6 +535,7 @@ class InteractionMatrix:
|
|
487
535
|
range_type = "distance"
|
488
536
|
|
489
537
|
interaction_matrix_df = self.get_interaction_matrix_df()
|
538
|
+
interaction_matrix_metadata = self.get_metadata()
|
490
539
|
|
491
540
|
customer_origins = self.get_customer_origins()
|
492
541
|
customer_origins_geodata_gpd = customer_origins.get_geodata_gpd()
|
@@ -567,7 +616,17 @@ class InteractionMatrix:
|
|
567
616
|
if distance_unit == "kilometers":
|
568
617
|
interaction_matrix_df["t_ij"] = interaction_matrix_df["t_ij"]/1000
|
569
618
|
|
619
|
+
interaction_matrix_metadata["transport_costs"] = {
|
620
|
+
"network": network,
|
621
|
+
"range_type": range_type,
|
622
|
+
"time_unit": time_unit,
|
623
|
+
"distance_unit": distance_unit,
|
624
|
+
"ors_server": ors_server,
|
625
|
+
"ors_auth": ors_auth
|
626
|
+
}
|
627
|
+
|
570
628
|
self.interaction_matrix_df = interaction_matrix_df
|
629
|
+
self.metadata = interaction_matrix_metadata
|
571
630
|
|
572
631
|
return self
|
573
632
|
|
@@ -575,6 +634,8 @@ class InteractionMatrix:
|
|
575
634
|
|
576
635
|
interaction_matrix_df = self.interaction_matrix_df
|
577
636
|
|
637
|
+
interaction_matrix_metadata = self.get_metadata()
|
638
|
+
|
578
639
|
if interaction_matrix_df["t_ij"].isna().all():
|
579
640
|
raise ValueError ("Transport cost variable is not defined")
|
580
641
|
if interaction_matrix_df["A_j"].isna().all():
|
@@ -588,16 +649,20 @@ class InteractionMatrix:
|
|
588
649
|
customer_origins = self.customer_origins
|
589
650
|
customer_origins_metadata = customer_origins.get_metadata()
|
590
651
|
tc_weighting = customer_origins_metadata["weighting"][0]
|
652
|
+
|
591
653
|
if tc_weighting["func"] == "power":
|
592
654
|
interaction_matrix_df["t_ij_weighted"] = interaction_matrix_df["t_ij"] ** tc_weighting["param"]
|
593
655
|
elif tc_weighting["func"] == "exponential":
|
594
656
|
interaction_matrix_df["t_ij_weighted"] = np.exp(tc_weighting["param"] * interaction_matrix_df['t_ij'])
|
657
|
+
elif tc_weighting["func"] == "logistic":
|
658
|
+
interaction_matrix_df["t_ij_weighted"] = 1+np.exp(tc_weighting["param"][0] + tc_weighting["param"][1] * interaction_matrix_df['t_ij'])
|
595
659
|
else:
|
596
660
|
raise ValueError ("Transport costs weighting is not defined.")
|
597
661
|
|
598
662
|
supply_locations = self.supply_locations
|
599
663
|
supply_locations_metadata = supply_locations.get_metadata()
|
600
664
|
attraction_weighting = supply_locations_metadata["weighting"][0]
|
665
|
+
|
601
666
|
if attraction_weighting["func"] == "power":
|
602
667
|
interaction_matrix_df["A_j_weighted"] = interaction_matrix_df["A_j"] ** attraction_weighting["param"]
|
603
668
|
elif tc_weighting["func"] == "exponential":
|
@@ -607,10 +672,15 @@ class InteractionMatrix:
|
|
607
672
|
|
608
673
|
interaction_matrix_df["U_ij"] = interaction_matrix_df["A_j_weighted"]*interaction_matrix_df["t_ij_weighted"]
|
609
674
|
|
610
|
-
interaction_matrix_df = interaction_matrix_df.drop(columns=[
|
675
|
+
interaction_matrix_df = interaction_matrix_df.drop(columns=["A_j_weighted", "t_ij_weighted"])
|
611
676
|
|
612
|
-
|
677
|
+
interaction_matrix_metadata["model"] = {
|
678
|
+
"model_type": "Huff"
|
679
|
+
}
|
613
680
|
|
681
|
+
self.interaction_matrix_df = interaction_matrix_df
|
682
|
+
self.metadata = interaction_matrix_metadata
|
683
|
+
|
614
684
|
return self
|
615
685
|
|
616
686
|
def probabilities (self):
|
@@ -681,7 +751,6 @@ class InteractionMatrix:
|
|
681
751
|
|
682
752
|
return huff_model
|
683
753
|
|
684
|
-
|
685
754
|
def hansen(
|
686
755
|
self,
|
687
756
|
from_origins: bool = True
|
@@ -689,16 +758,34 @@ class InteractionMatrix:
|
|
689
758
|
|
690
759
|
interaction_matrix_df = self.interaction_matrix_df
|
691
760
|
|
692
|
-
if interaction_matrix_df["U_ij"].isna().all():
|
693
|
-
self.utility()
|
694
|
-
interaction_matrix_df = self.interaction_matrix_df
|
695
|
-
|
696
761
|
if from_origins:
|
762
|
+
|
763
|
+
if interaction_matrix_df["U_ij"].isna().all():
|
764
|
+
self.utility()
|
765
|
+
interaction_matrix_df = self.interaction_matrix_df
|
766
|
+
|
697
767
|
hansen_df = pd.DataFrame(interaction_matrix_df.groupby("i")["U_ij"].sum()).reset_index()
|
698
768
|
hansen_df = hansen_df.rename(columns = {"U_ij": "A_i"})
|
769
|
+
|
699
770
|
else:
|
700
|
-
|
701
|
-
|
771
|
+
|
772
|
+
if "C_i" not in interaction_matrix_df.columns or interaction_matrix_df["C_i"].isna().all():
|
773
|
+
raise ValueError("Customer origins market size is not available")
|
774
|
+
|
775
|
+
customer_origins_metadata = self.customer_origins.get_metadata()
|
776
|
+
tc_weighting = customer_origins_metadata["weighting"][0]
|
777
|
+
if tc_weighting["func"] == "power":
|
778
|
+
interaction_matrix_df["t_ij_weighted"] = interaction_matrix_df["t_ij"] ** tc_weighting["param"]
|
779
|
+
elif tc_weighting["func"] == "exponential":
|
780
|
+
interaction_matrix_df["t_ij_weighted"] = np.exp(tc_weighting["param"] * interaction_matrix_df['t_ij'])
|
781
|
+
elif tc_weighting["func"] == "logistic":
|
782
|
+
interaction_matrix_df["t_ij_weighted"] = 1+np.exp(tc_weighting["param"][0] + tc_weighting["param"][1] * interaction_matrix_df['t_ij'])
|
783
|
+
else:
|
784
|
+
raise ValueError ("Transport costs weighting is not defined.")
|
785
|
+
|
786
|
+
interaction_matrix_df["U_ji"] = interaction_matrix_df["C_i"]*interaction_matrix_df["t_ij_weighted"]
|
787
|
+
hansen_df = pd.DataFrame(interaction_matrix_df.groupby("j")["U_ji"].sum()).reset_index()
|
788
|
+
hansen_df = hansen_df.rename(columns = {"U_ji": "A_j"})
|
702
789
|
|
703
790
|
return hansen_df
|
704
791
|
|
@@ -735,6 +822,8 @@ class InteractionMatrix:
|
|
735
822
|
|
736
823
|
interaction_matrix_df = self.get_interaction_matrix_df()
|
737
824
|
|
825
|
+
interaction_matrix_metadata = self.get_metadata()
|
826
|
+
|
738
827
|
cols_t = [col + "__LCT" for col in cols]
|
739
828
|
|
740
829
|
if "p_ij__LCT" not in interaction_matrix_df.columns:
|
@@ -789,7 +878,8 @@ class InteractionMatrix:
|
|
789
878
|
interaction_matrix = InteractionMatrix(
|
790
879
|
interaction_matrix_df,
|
791
880
|
customer_origins,
|
792
|
-
supply_locations
|
881
|
+
supply_locations,
|
882
|
+
metadata=interaction_matrix_metadata
|
793
883
|
)
|
794
884
|
|
795
885
|
mci_model = MCIModel(
|
@@ -801,6 +891,249 @@ class InteractionMatrix:
|
|
801
891
|
|
802
892
|
return mci_model
|
803
893
|
|
894
|
+
def huff_loglik(
|
895
|
+
self,
|
896
|
+
params
|
897
|
+
):
|
898
|
+
|
899
|
+
if not isinstance(params, list):
|
900
|
+
if isinstance(params, np.ndarray):
|
901
|
+
params = params.tolist()
|
902
|
+
else:
|
903
|
+
raise ValueError("Parameter 'params' must be a list or np.ndarray with two or three parameter values")
|
904
|
+
|
905
|
+
if len(params) == 2:
|
906
|
+
param_gamma, param_lambda = params
|
907
|
+
elif len(params) == 3:
|
908
|
+
param_gamma, param_lambda, param_lambda2 = params
|
909
|
+
else:
|
910
|
+
raise ValueError("Parameter 'params' must be a list with two or three parameter values")
|
911
|
+
|
912
|
+
interaction_matrix_df = self.interaction_matrix_df
|
913
|
+
|
914
|
+
supply_locations = self.supply_locations
|
915
|
+
supply_locations_metadata = supply_locations.get_metadata()
|
916
|
+
|
917
|
+
customer_origins = self.customer_origins
|
918
|
+
customer_origins_metadata = customer_origins.get_metadata()
|
919
|
+
|
920
|
+
supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
|
921
|
+
supply_locations.metadata = supply_locations_metadata
|
922
|
+
|
923
|
+
if customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
|
924
|
+
|
925
|
+
if len(params) == 2:
|
926
|
+
customer_origins_metadata["weighting"][0]["param"] = float(param_lambda)
|
927
|
+
else:
|
928
|
+
raise ValueError ("Huff Model with transport cost weightig of type " + customer_origins_metadata["weighting"][0]["func"] + " must have two input parameters")
|
929
|
+
|
930
|
+
elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
931
|
+
|
932
|
+
if len(params) == 3:
|
933
|
+
customer_origins_metadata["weighting"][0]["param"] = [float(param_lambda), float(param_lambda2)]
|
934
|
+
else:
|
935
|
+
raise ValueError("Huff Model with transport cost weightig of type " + customer_origins_metadata["weighting"][0]["func"] + " must have three input parameters")
|
936
|
+
|
937
|
+
customer_origins.metadata = customer_origins_metadata
|
938
|
+
|
939
|
+
p_ij_emp = interaction_matrix_df["p_ij"]
|
940
|
+
|
941
|
+
interaction_matrix_copy = copy.deepcopy(self)
|
942
|
+
|
943
|
+
interaction_matrix_copy.utility()
|
944
|
+
interaction_matrix_copy.probabilities()
|
945
|
+
|
946
|
+
interaction_matrix_df_copy = interaction_matrix_copy.get_interaction_matrix_df()
|
947
|
+
p_ij = interaction_matrix_df_copy["p_ij"]
|
948
|
+
|
949
|
+
LL = loglik(
|
950
|
+
observed = p_ij_emp,
|
951
|
+
expected = p_ij
|
952
|
+
)
|
953
|
+
|
954
|
+
return -LL
|
955
|
+
|
956
|
+
def ml_fit(
|
957
|
+
self,
|
958
|
+
initial_params: list = [1.0, -2.0],
|
959
|
+
method: str = "L-BFGS-B",
|
960
|
+
bounds: list = [(0.5, 1), (-3, -1)],
|
961
|
+
constraints: list = [],
|
962
|
+
update_estimates: bool = True
|
963
|
+
):
|
964
|
+
|
965
|
+
supply_locations = self.supply_locations
|
966
|
+
supply_locations_metadata = supply_locations.get_metadata()
|
967
|
+
|
968
|
+
customer_origins = self.customer_origins
|
969
|
+
customer_origins_metadata = customer_origins.get_metadata()
|
970
|
+
|
971
|
+
if len(initial_params) > 3 or len(initial_params) < 2:
|
972
|
+
raise ValueError("Parameter 'initial_params' must be a list with two or three entries")
|
973
|
+
|
974
|
+
if len(bounds) != len(initial_params):
|
975
|
+
raise ValueError("Parameter 'bounds' must have the same length as parameter 'initial_params' (" + str(len(bounds)) + ", " + str(len(initial_params)) + ")")
|
976
|
+
|
977
|
+
ml_result = minimize(
|
978
|
+
self.huff_loglik,
|
979
|
+
initial_params,
|
980
|
+
method = method,
|
981
|
+
bounds = bounds,
|
982
|
+
constraints = constraints,
|
983
|
+
options={'disp': 3}
|
984
|
+
)
|
985
|
+
|
986
|
+
if ml_result.success:
|
987
|
+
|
988
|
+
fitted_params = ml_result.x
|
989
|
+
|
990
|
+
if len(initial_params) == 2:
|
991
|
+
|
992
|
+
param_gamma = fitted_params[0]
|
993
|
+
param_lambda = fitted_params[1]
|
994
|
+
param_results = [
|
995
|
+
float(param_gamma),
|
996
|
+
float(param_lambda)
|
997
|
+
]
|
998
|
+
|
999
|
+
supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
|
1000
|
+
customer_origins_metadata["weighting"][0]["param"] = float(param_lambda)
|
1001
|
+
|
1002
|
+
elif len (initial_params) == 3:
|
1003
|
+
|
1004
|
+
param_gamma = fitted_params[0]
|
1005
|
+
param_lambda = fitted_params[1]
|
1006
|
+
param_lambda2 = fitted_params[2]
|
1007
|
+
param_results = [
|
1008
|
+
float(param_gamma),
|
1009
|
+
float(param_lambda),
|
1010
|
+
float(param_lambda2)
|
1011
|
+
]
|
1012
|
+
|
1013
|
+
supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
|
1014
|
+
customer_origins_metadata["weighting"][0]["param"][0] = float(param_lambda)
|
1015
|
+
customer_origins_metadata["weighting"][0]["param"][1] = float(param_lambda2)
|
1016
|
+
|
1017
|
+
print(f"Optimization via {method} algorithm succeeded with parameters: {', '.join(str(round(par, 3)) for par in param_results)}.")
|
1018
|
+
|
1019
|
+
else:
|
1020
|
+
|
1021
|
+
param_gamma = None
|
1022
|
+
param_lambda = None
|
1023
|
+
|
1024
|
+
supply_locations_metadata["weighting"][0]["param"] = param_gamma
|
1025
|
+
|
1026
|
+
if len(initial_params) == 3:
|
1027
|
+
|
1028
|
+
param_lambda2 = None
|
1029
|
+
customer_origins_metadata["weighting"][0]["param"][0] = param_lambda
|
1030
|
+
customer_origins_metadata["weighting"][0]["param"][1] = param_lambda2
|
1031
|
+
|
1032
|
+
else:
|
1033
|
+
customer_origins_metadata["weighting"][0]["param"] = param_lambda
|
1034
|
+
|
1035
|
+
print(f"Optimiziation via {method} algorithm failed with error message: '{ml_result.message}'. See https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html for all available algorithms.")
|
1036
|
+
|
1037
|
+
self.supply_locations.metadata = supply_locations_metadata
|
1038
|
+
self.customer_origins.metadata = customer_origins_metadata
|
1039
|
+
|
1040
|
+
if ml_result.success and update_estimates:
|
1041
|
+
|
1042
|
+
self.interaction_matrix_df["p_ij_emp"] = self.interaction_matrix_df["p_ij"]
|
1043
|
+
self = self.utility()
|
1044
|
+
self = self.probabilities()
|
1045
|
+
self = self.flows()
|
1046
|
+
|
1047
|
+
return self
|
1048
|
+
|
1049
|
+
|
1050
|
+
def update(self):
|
1051
|
+
|
1052
|
+
interaction_matrix_df = self.get_interaction_matrix_df()
|
1053
|
+
|
1054
|
+
interaction_matrix_metadata = self.get_metadata()
|
1055
|
+
|
1056
|
+
customer_origins = self.get_customer_origins()
|
1057
|
+
|
1058
|
+
supply_locations = self.get_supply_locations()
|
1059
|
+
|
1060
|
+
supply_locations_geodata_gpd = supply_locations.get_geodata_gpd().copy()
|
1061
|
+
supply_locations_geodata_gpd_new = supply_locations_geodata_gpd[supply_locations_geodata_gpd["j_update"] == 1]
|
1062
|
+
|
1063
|
+
if len(supply_locations_geodata_gpd_new) < 1:
|
1064
|
+
raise ValueError("There are no new destinations for an interaction matrix update. Use SupplyLocations.add_new_destinations()")
|
1065
|
+
|
1066
|
+
supply_locations_geodata_gpd_original = supply_locations.get_geodata_gpd_original().copy()
|
1067
|
+
supply_locations_geodata_gpd_original_new = supply_locations_geodata_gpd_original[supply_locations_geodata_gpd_original["j_update"] == 1]
|
1068
|
+
if len(supply_locations_geodata_gpd_original_new) < 1:
|
1069
|
+
raise ValueError("There are no new destinations for an interaction matrix update. Use SupplyLocations.add_new_destinations()")
|
1070
|
+
|
1071
|
+
supply_locations_new = SupplyLocations(
|
1072
|
+
geodata_gpd=supply_locations_geodata_gpd_new,
|
1073
|
+
geodata_gpd_original=supply_locations_geodata_gpd_original_new,
|
1074
|
+
metadata=supply_locations.metadata,
|
1075
|
+
isochrones_gdf=supply_locations.isochrones_gdf,
|
1076
|
+
buffers_gdf=supply_locations.buffers_gdf
|
1077
|
+
)
|
1078
|
+
|
1079
|
+
interaction_matrix_new = create_interaction_matrix(
|
1080
|
+
customer_origins=customer_origins,
|
1081
|
+
supply_locations=supply_locations_new
|
1082
|
+
)
|
1083
|
+
|
1084
|
+
interaction_matrix_new_df = interaction_matrix_new.get_interaction_matrix_df()
|
1085
|
+
|
1086
|
+
if "transport_costs" not in interaction_matrix_metadata:
|
1087
|
+
|
1088
|
+
print("New destination(s) included. No transport costs calculation because not defined in original interaction matrix.")
|
1089
|
+
|
1090
|
+
interaction_matrix_df = pd.concat(
|
1091
|
+
[
|
1092
|
+
interaction_matrix_df,
|
1093
|
+
interaction_matrix_new_df
|
1094
|
+
],
|
1095
|
+
ignore_index=True
|
1096
|
+
)
|
1097
|
+
|
1098
|
+
interaction_matrix_df = interaction_matrix_df.sort_values(by = "ij")
|
1099
|
+
|
1100
|
+
self.interaction_matrix_df = interaction_matrix_df
|
1101
|
+
|
1102
|
+
else:
|
1103
|
+
|
1104
|
+
network = interaction_matrix_metadata["transport_costs"]["network"]
|
1105
|
+
range_type = interaction_matrix_metadata["transport_costs"]["range_type"]
|
1106
|
+
time_unit = interaction_matrix_metadata["transport_costs"]["time_unit"]
|
1107
|
+
distance_unit = interaction_matrix_metadata["transport_costs"]["distance_unit"]
|
1108
|
+
ors_server = interaction_matrix_metadata["transport_costs"]["ors_server"]
|
1109
|
+
ors_auth = interaction_matrix_metadata["transport_costs"]["ors_auth"]
|
1110
|
+
|
1111
|
+
interaction_matrix_new.transport_costs(
|
1112
|
+
network=network,
|
1113
|
+
range_type=range_type,
|
1114
|
+
time_unit=time_unit,
|
1115
|
+
distance_unit=distance_unit,
|
1116
|
+
ors_server=ors_server,
|
1117
|
+
ors_auth=ors_auth
|
1118
|
+
)
|
1119
|
+
|
1120
|
+
interaction_matrix_df = pd.concat(
|
1121
|
+
[
|
1122
|
+
interaction_matrix_df,
|
1123
|
+
interaction_matrix_new_df
|
1124
|
+
],
|
1125
|
+
ignore_index=True
|
1126
|
+
)
|
1127
|
+
|
1128
|
+
interaction_matrix_df = interaction_matrix_df.sort_values(by = "ij")
|
1129
|
+
|
1130
|
+
self.interaction_matrix_df = interaction_matrix_df
|
1131
|
+
|
1132
|
+
self.utility()
|
1133
|
+
self.probabilities()
|
1134
|
+
self.flows()
|
1135
|
+
|
1136
|
+
return self
|
804
1137
|
|
805
1138
|
class HuffModel:
|
806
1139
|
|
@@ -857,17 +1190,52 @@ class HuffModel:
|
|
857
1190
|
else:
|
858
1191
|
print("Market size column " + customer_origins_metadata["marketsize_col"])
|
859
1192
|
print("----------------------------------")
|
1193
|
+
|
860
1194
|
print("Partial utilities")
|
861
1195
|
print(" Weights")
|
1196
|
+
|
862
1197
|
if supply_locations_metadata["weighting"][0]["func"] is None and supply_locations_metadata["weighting"][0]["param"] is None:
|
863
1198
|
print("Attraction not defined")
|
864
1199
|
else:
|
865
|
-
print("Attraction " + str(supply_locations_metadata["weighting"][0]["param"]) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
|
1200
|
+
print("Attraction " + str(round(supply_locations_metadata["weighting"][0]["param"], 3)) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
|
1201
|
+
|
866
1202
|
if customer_origins_metadata["weighting"][0]["func"] is None and customer_origins_metadata["weighting"][0]["param"] is None:
|
867
1203
|
print("Transport costs not defined")
|
868
|
-
|
869
|
-
print("Transport costs " + str(customer_origins_metadata["weighting"][0]["param"]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
1204
|
+
elif customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
|
1205
|
+
print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
1206
|
+
elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
1207
|
+
print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"][0],3)) + ", " + str(round(customer_origins_metadata["weighting"][0]["param"][1],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
1208
|
+
|
870
1209
|
print("----------------------------------")
|
1210
|
+
|
1211
|
+
huff_modelfit = self.modelfit()
|
1212
|
+
if huff_modelfit is not None:
|
1213
|
+
|
1214
|
+
print ("Goodness-of-fit for probabilities")
|
1215
|
+
|
1216
|
+
print("Sum of squared residuals ", round(huff_modelfit[1]["SQR"], 2))
|
1217
|
+
print("Sum of squares ", round(huff_modelfit[1]["SQT"], 2))
|
1218
|
+
print("R-squared ", round(huff_modelfit[1]["Rsq"], 2))
|
1219
|
+
print("Mean squared error ", round(huff_modelfit[1]["MSE"], 2))
|
1220
|
+
print("Root mean squared error ", round(huff_modelfit[1]["RMSE"], 2))
|
1221
|
+
print("Mean absolute error ", round(huff_modelfit[1]["MAE"], 2))
|
1222
|
+
print("Mean absolute percentage error ", round(huff_modelfit[1]["MAPE"], 2))
|
1223
|
+
print("Absolute percentage errors")
|
1224
|
+
|
1225
|
+
APE_list = [
|
1226
|
+
["< 5 % ", round(huff_modelfit[1]["APE"]["resid_below5"], 2), " < 30 % ", round(huff_modelfit[1]["APE"]["resid_below30"], 2)],
|
1227
|
+
["< 10 % ", round(huff_modelfit[1]["APE"]["resid_below10"], 2), " < 35 % ", round(huff_modelfit[1]["APE"]["resid_below35"], 2)],
|
1228
|
+
["< 15 % ", round(huff_modelfit[1]["APE"]["resid_below15"], 2), " < 40 % ", round(huff_modelfit[1]["APE"]["resid_below40"], 2)],
|
1229
|
+
["< 20 % ", round(huff_modelfit[1]["APE"]["resid_below20"], 2), " < 45 % ", round(huff_modelfit[1]["APE"]["resid_below45"], 2)],
|
1230
|
+
["< 25% ", round(huff_modelfit[1]["APE"]["resid_below25"], 2), " < 50 % ", round(huff_modelfit[1]["APE"]["resid_below50"], 2)]
|
1231
|
+
]
|
1232
|
+
APE_df = pd.DataFrame(
|
1233
|
+
APE_list,
|
1234
|
+
columns=["Resid.", "%", "Resid.", "%"]
|
1235
|
+
)
|
1236
|
+
print(APE_df.to_string(index=False))
|
1237
|
+
|
1238
|
+
print("----------------------------------")
|
871
1239
|
|
872
1240
|
def mci_fit(
|
873
1241
|
self,
|
@@ -876,15 +1244,15 @@ class HuffModel:
|
|
876
1244
|
):
|
877
1245
|
|
878
1246
|
interaction_matrix = self.interaction_matrix
|
879
|
-
|
1247
|
+
interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
|
1248
|
+
interaction_matrix_metadata = interaction_matrix.get_metadata()
|
1249
|
+
|
880
1250
|
supply_locations = interaction_matrix.get_supply_locations()
|
881
1251
|
supply_locations_metadata = supply_locations.get_metadata()
|
882
1252
|
|
883
1253
|
customer_origins = interaction_matrix.get_customer_origins()
|
884
1254
|
customer_origins_metadata = customer_origins.get_metadata()
|
885
|
-
|
886
|
-
interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
|
887
|
-
|
1255
|
+
|
888
1256
|
cols_t = [col + "__LCT" for col in cols]
|
889
1257
|
|
890
1258
|
if "p_ij__LCT" not in interaction_matrix_df.columns:
|
@@ -937,7 +1305,8 @@ class HuffModel:
|
|
937
1305
|
interaction_matrix = InteractionMatrix(
|
938
1306
|
interaction_matrix_df,
|
939
1307
|
customer_origins,
|
940
|
-
supply_locations
|
1308
|
+
supply_locations,
|
1309
|
+
metadata=interaction_matrix_metadata
|
941
1310
|
)
|
942
1311
|
|
943
1312
|
mci_model = MCIModel(
|
@@ -949,7 +1318,39 @@ class HuffModel:
|
|
949
1318
|
|
950
1319
|
return mci_model
|
951
1320
|
|
952
|
-
|
1321
|
+
def update(self):
|
1322
|
+
|
1323
|
+
self.interaction_matrix = self.interaction_matrix.update()
|
1324
|
+
|
1325
|
+
self.market_areas_df = self.interaction_matrix.marketareas().get_market_areas_df()
|
1326
|
+
|
1327
|
+
return self
|
1328
|
+
|
1329
|
+
def modelfit(self):
|
1330
|
+
|
1331
|
+
interaction_matrix = self.interaction_matrix
|
1332
|
+
interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
|
1333
|
+
|
1334
|
+
if ("p_ij" in interaction_matrix_df.columns and "p_ij_emp" in interaction_matrix_df.columns):
|
1335
|
+
|
1336
|
+
try:
|
1337
|
+
|
1338
|
+
huff_modelfit = modelfit(
|
1339
|
+
interaction_matrix_df["p_ij_emp"],
|
1340
|
+
interaction_matrix_df["p_ij"]
|
1341
|
+
)
|
1342
|
+
|
1343
|
+
return huff_modelfit
|
1344
|
+
|
1345
|
+
except:
|
1346
|
+
|
1347
|
+
print("Goodness-of-fit metrics could not be calculated due to NaN values.")
|
1348
|
+
return None
|
1349
|
+
|
1350
|
+
else:
|
1351
|
+
|
1352
|
+
return None
|
1353
|
+
|
953
1354
|
class MCIModel:
|
954
1355
|
|
955
1356
|
def __init__(
|
@@ -1005,12 +1406,19 @@ class MCIModel:
|
|
1005
1406
|
|
1006
1407
|
if ("p_ij" in interaction_matrix_df.columns and "p_ij_emp" in interaction_matrix_df.columns):
|
1007
1408
|
|
1008
|
-
|
1009
|
-
interaction_matrix_df["p_ij_emp"],
|
1010
|
-
interaction_matrix_df["p_ij"]
|
1011
|
-
)
|
1409
|
+
try:
|
1012
1410
|
|
1013
|
-
|
1411
|
+
mci_modelfit = modelfit(
|
1412
|
+
interaction_matrix_df["p_ij_emp"],
|
1413
|
+
interaction_matrix_df["p_ij"]
|
1414
|
+
)
|
1415
|
+
|
1416
|
+
return mci_modelfit
|
1417
|
+
|
1418
|
+
except:
|
1419
|
+
|
1420
|
+
print("Goodness-of-fit metrics could not be calculated due to NaN values.")
|
1421
|
+
return None
|
1014
1422
|
|
1015
1423
|
else:
|
1016
1424
|
|
@@ -1056,7 +1464,7 @@ class MCIModel:
|
|
1056
1464
|
mci_modelfit = self.modelfit()
|
1057
1465
|
if mci_modelfit is not None:
|
1058
1466
|
|
1059
|
-
print ("Goodness-of-fit
|
1467
|
+
print ("Goodness-of-fit for probabilities")
|
1060
1468
|
|
1061
1469
|
print("Sum of squared residuals ", round(mci_modelfit[1]["SQR"], 2))
|
1062
1470
|
print("Sum of squares ", round(mci_modelfit[1]["SQT"], 2))
|
@@ -1065,12 +1473,20 @@ class MCIModel:
|
|
1065
1473
|
print("Root mean squared error ", round(mci_modelfit[1]["RMSE"], 2))
|
1066
1474
|
print("Mean absolute error ", round(mci_modelfit[1]["MAE"], 2))
|
1067
1475
|
print("Mean absolute percentage error ", round(mci_modelfit[1]["MAPE"], 2))
|
1476
|
+
|
1068
1477
|
print("Absolute percentage errors")
|
1069
|
-
|
1070
|
-
|
1071
|
-
|
1072
|
-
|
1073
|
-
|
1478
|
+
APE_list = [
|
1479
|
+
["< 5 % ", round(mci_modelfit[1]["APE"]["resid_below5"], 2), " < 30 % ", round(mci_modelfit[1]["APE"]["resid_below30"], 2)],
|
1480
|
+
["< 10 % ", round(mci_modelfit[1]["APE"]["resid_below10"], 2), " < 35 % ", round(mci_modelfit[1]["APE"]["resid_below35"], 2)],
|
1481
|
+
["< 15 % ", round(mci_modelfit[1]["APE"]["resid_below15"], 2), " < 40 % ", round(mci_modelfit[1]["APE"]["resid_below40"], 2)],
|
1482
|
+
["< 20 % ", round(mci_modelfit[1]["APE"]["resid_below20"], 2), " < 45 % ", round(mci_modelfit[1]["APE"]["resid_below45"], 2)],
|
1483
|
+
["< 25% ", round(mci_modelfit[1]["APE"]["resid_below25"], 2), " < 50 % ", round(mci_modelfit[1]["APE"]["resid_below50"], 2)]
|
1484
|
+
]
|
1485
|
+
APE_df = pd.DataFrame(
|
1486
|
+
APE_list,
|
1487
|
+
columns=["Resid.", "%", "Resid.", "%"]
|
1488
|
+
)
|
1489
|
+
print(APE_df.to_string(index=False))
|
1074
1490
|
|
1075
1491
|
print("--------------------------------------------")
|
1076
1492
|
|
@@ -1081,6 +1497,7 @@ class MCIModel:
|
|
1081
1497
|
|
1082
1498
|
interaction_matrix = self.interaction_matrix
|
1083
1499
|
interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
|
1500
|
+
interaction_matrix_metadata = interaction_matrix.get_metadata()
|
1084
1501
|
|
1085
1502
|
if interaction_matrix_df["t_ij"].isna().all():
|
1086
1503
|
raise ValueError ("Transport cost variable is not defined")
|
@@ -1119,10 +1536,16 @@ class MCIModel:
|
|
1119
1536
|
if transformation == "ILCT":
|
1120
1537
|
interaction_matrix_df["U_ij"] = np.exp(interaction_matrix_df["U_ij"])
|
1121
1538
|
|
1539
|
+
interaction_matrix_metadata["model"] = {
|
1540
|
+
"model_type": "MCI",
|
1541
|
+
"transformation": transformation
|
1542
|
+
}
|
1543
|
+
|
1122
1544
|
interaction_matrix = InteractionMatrix(
|
1123
1545
|
interaction_matrix_df,
|
1124
1546
|
customer_origins,
|
1125
|
-
supply_locations
|
1547
|
+
supply_locations,
|
1548
|
+
metadata=interaction_matrix_metadata
|
1126
1549
|
)
|
1127
1550
|
self.interaction_matrix = interaction_matrix
|
1128
1551
|
|
@@ -1289,7 +1712,7 @@ def load_geodata (
|
|
1289
1712
|
|
1290
1713
|
geodata_gpd = geodata_gpd_original.to_crs(crs_output)
|
1291
1714
|
geodata_gpd = geodata_gpd[[unique_id, "geometry"]]
|
1292
|
-
|
1715
|
+
|
1293
1716
|
metadata = {
|
1294
1717
|
"location_type": location_type,
|
1295
1718
|
"unique_id": unique_id,
|
@@ -1307,14 +1730,20 @@ def load_geodata (
|
|
1307
1730
|
}
|
1308
1731
|
|
1309
1732
|
if location_type == "origins":
|
1733
|
+
|
1310
1734
|
geodata_object = CustomerOrigins(
|
1311
1735
|
geodata_gpd,
|
1312
1736
|
geodata_gpd_original,
|
1313
1737
|
metadata,
|
1314
1738
|
None,
|
1315
1739
|
None
|
1316
|
-
)
|
1740
|
+
)
|
1741
|
+
|
1317
1742
|
elif location_type == "destinations":
|
1743
|
+
|
1744
|
+
geodata_gpd["j_update"] = 0
|
1745
|
+
geodata_gpd_original["j_update"] = 0
|
1746
|
+
|
1318
1747
|
geodata_object = SupplyLocations(
|
1319
1748
|
geodata_gpd,
|
1320
1749
|
geodata_gpd_original,
|
@@ -1393,10 +1822,13 @@ def create_interaction_matrix(
|
|
1393
1822
|
interaction_matrix_df["p_ij"] = None
|
1394
1823
|
interaction_matrix_df["E_ij"] = None
|
1395
1824
|
|
1825
|
+
metadata = {}
|
1826
|
+
|
1396
1827
|
interaction_matrix = InteractionMatrix(
|
1397
1828
|
interaction_matrix_df,
|
1398
1829
|
customer_origins,
|
1399
|
-
supply_locations
|
1830
|
+
supply_locations,
|
1831
|
+
metadata
|
1400
1832
|
)
|
1401
1833
|
|
1402
1834
|
return interaction_matrix
|
@@ -1407,6 +1839,7 @@ def load_interaction_matrix(
|
|
1407
1839
|
supply_locations_col: str,
|
1408
1840
|
attraction_col: list,
|
1409
1841
|
transport_costs_col: str,
|
1842
|
+
flows_col: str = None,
|
1410
1843
|
probabilities_col: str = None,
|
1411
1844
|
market_size_col: str = None,
|
1412
1845
|
customer_origins_coords_col = None,
|
@@ -1444,6 +1877,8 @@ def load_interaction_matrix(
|
|
1444
1877
|
raise KeyError ("Column " + supply_locations_col + " not in data")
|
1445
1878
|
|
1446
1879
|
cols_check = attraction_col + [transport_costs_col]
|
1880
|
+
if flows_col is not None:
|
1881
|
+
cols_check = cols_check + [flows_col]
|
1447
1882
|
if probabilities_col is not None:
|
1448
1883
|
cols_check = cols_check + [probabilities_col]
|
1449
1884
|
if market_size_col is not None:
|
@@ -1600,6 +2035,13 @@ def load_interaction_matrix(
|
|
1600
2035
|
}
|
1601
2036
|
)
|
1602
2037
|
|
2038
|
+
if flows_col is not None:
|
2039
|
+
interaction_matrix_df = interaction_matrix_df.rename(
|
2040
|
+
columns = {
|
2041
|
+
flows_col: "E_ij"
|
2042
|
+
}
|
2043
|
+
)
|
2044
|
+
|
1603
2045
|
if probabilities_col is not None:
|
1604
2046
|
interaction_matrix_df = interaction_matrix_df.rename(
|
1605
2047
|
columns = {
|
@@ -1613,15 +2055,68 @@ def load_interaction_matrix(
|
|
1613
2055
|
market_size_col: "C_i"
|
1614
2056
|
}
|
1615
2057
|
)
|
1616
|
-
|
2058
|
+
|
2059
|
+
metadata = {}
|
2060
|
+
|
1617
2061
|
interaction_matrix = InteractionMatrix(
|
1618
2062
|
interaction_matrix_df=interaction_matrix_df,
|
1619
2063
|
customer_origins=customer_origins,
|
1620
|
-
supply_locations=supply_locations
|
2064
|
+
supply_locations=supply_locations,
|
2065
|
+
metadata=metadata
|
1621
2066
|
)
|
1622
2067
|
|
1623
2068
|
return interaction_matrix
|
1624
2069
|
|
2070
|
+
def market_shares(
|
2071
|
+
df: pd.DataFrame,
|
2072
|
+
turnover_col: str,
|
2073
|
+
ref_col: str = None,
|
2074
|
+
marketshares_col: str = "p_ij"
|
2075
|
+
):
|
2076
|
+
|
2077
|
+
check_vars(
|
2078
|
+
df = df,
|
2079
|
+
cols = [turnover_col]
|
2080
|
+
)
|
2081
|
+
|
2082
|
+
if ref_col is not None:
|
2083
|
+
|
2084
|
+
if ref_col not in df.columns:
|
2085
|
+
raise KeyError(f"Column '{ref_col}' not in dataframe.")
|
2086
|
+
|
2087
|
+
ms_refcol = pd.DataFrame(df.groupby(ref_col)[turnover_col].sum())
|
2088
|
+
ms_refcol = ms_refcol.rename(columns = {turnover_col: "total"})
|
2089
|
+
ms_refcol = ms_refcol.reset_index()
|
2090
|
+
|
2091
|
+
df = df.merge(
|
2092
|
+
ms_refcol,
|
2093
|
+
how = "left",
|
2094
|
+
left_on = ref_col,
|
2095
|
+
right_on= ref_col
|
2096
|
+
)
|
2097
|
+
|
2098
|
+
else:
|
2099
|
+
|
2100
|
+
ms_norefcol = pd.DataFrame([df[turnover_col].sum()], columns=["total"])
|
2101
|
+
ms_norefcol = ms_norefcol.reset_index()
|
2102
|
+
|
2103
|
+
df["key_temp"] = 1
|
2104
|
+
ms_norefcol["key_temp"] = 1
|
2105
|
+
df = pd.merge(
|
2106
|
+
df,
|
2107
|
+
ms_norefcol,
|
2108
|
+
on="key_temp"
|
2109
|
+
).drop(
|
2110
|
+
"key_temp",
|
2111
|
+
axis=1
|
2112
|
+
)
|
2113
|
+
|
2114
|
+
df[marketshares_col] = df[turnover_col]/df["total"]
|
2115
|
+
|
2116
|
+
df = df.drop(columns="total")
|
2117
|
+
|
2118
|
+
return df
|
2119
|
+
|
1625
2120
|
def log_centering_transformation(
|
1626
2121
|
df: pd.DataFrame,
|
1627
2122
|
ref_col: str,
|
@@ -1652,12 +2147,18 @@ def log_centering_transformation(
|
|
1652
2147
|
print ("Column " + str(var) + " is a dummy variable and requires/allows no log-centering transformation")
|
1653
2148
|
continue
|
1654
2149
|
|
2150
|
+
if (df[var] <= 0).any():
|
2151
|
+
df[var+suffix] = float("nan")
|
2152
|
+
print ("Column " + str(var) + " contains values <= 0. No log-centering transformation possible.")
|
2153
|
+
continue
|
2154
|
+
|
1655
2155
|
var_t = df.groupby(ref_col)[var].apply(lct)
|
1656
2156
|
var_t = var_t.reset_index()
|
1657
2157
|
df[var+suffix] = var_t[var]
|
1658
2158
|
|
1659
2159
|
return df
|
1660
2160
|
|
2161
|
+
|
1661
2162
|
def get_isochrones(
|
1662
2163
|
geodata_gpd: gp.GeoDataFrame,
|
1663
2164
|
unique_id_col: str,
|
@@ -1748,7 +2249,8 @@ def get_isochrones(
|
|
1748
2249
|
|
1749
2250
|
def modelfit(
|
1750
2251
|
observed,
|
1751
|
-
expected
|
2252
|
+
expected,
|
2253
|
+
remove_nan: bool = True
|
1752
2254
|
):
|
1753
2255
|
|
1754
2256
|
observed_no = len(observed)
|
@@ -1763,7 +2265,28 @@ def modelfit(
|
|
1763
2265
|
if not isinstance(expected, np.number):
|
1764
2266
|
if not is_numeric_dtype(expected):
|
1765
2267
|
raise ValueError("Expected column is not numeric")
|
1766
|
-
|
2268
|
+
|
2269
|
+
if remove_nan:
|
2270
|
+
|
2271
|
+
obs_exp = pd.DataFrame(
|
2272
|
+
{
|
2273
|
+
"observed": observed,
|
2274
|
+
"expected": expected
|
2275
|
+
}
|
2276
|
+
)
|
2277
|
+
|
2278
|
+
obs_exp_clean = obs_exp.dropna(subset=["observed", "expected"])
|
2279
|
+
|
2280
|
+
observed = obs_exp_clean["observed"].to_numpy()
|
2281
|
+
expected = obs_exp_clean["expected"].to_numpy()
|
2282
|
+
|
2283
|
+
else:
|
2284
|
+
|
2285
|
+
if np.isnan(observed).any():
|
2286
|
+
raise ValueError("Vector with observed data contains NaN")
|
2287
|
+
if np.isnan(expected).any():
|
2288
|
+
raise ValueError("Vector with expected data contains NaN")
|
2289
|
+
|
1767
2290
|
residuals = np.array(observed)-np.array(expected)
|
1768
2291
|
residuals_sq = residuals**2
|
1769
2292
|
residuals_abs = abs(residuals)
|
@@ -1780,7 +2303,7 @@ def modelfit(
|
|
1780
2303
|
})
|
1781
2304
|
|
1782
2305
|
SQR = float(np.sum(residuals_sq))
|
1783
|
-
SAR = float(np.sum(residuals_abs))
|
2306
|
+
SAR = float(np.sum(residuals_abs))
|
1784
2307
|
observed_mean = float(np.sum(observed)/observed_no)
|
1785
2308
|
SQT = float(np.sum((observed-observed_mean)**2))
|
1786
2309
|
Rsq = float(1-(SQR/SQT))
|
@@ -1789,11 +2312,16 @@ def modelfit(
|
|
1789
2312
|
MAE = float(SAR/observed_no)
|
1790
2313
|
MAPE = float(np.mean(APE))
|
1791
2314
|
|
1792
|
-
resid_below5 = float(len([APE < 5])/expected_no*100)
|
1793
|
-
resid_below10 = float(len([APE < 10])/expected_no*100)
|
1794
|
-
resid_below15 = float(len([APE < 15])/expected_no*100)
|
1795
|
-
resid_below20 = float(len([APE < 20])/expected_no*100)
|
1796
|
-
resid_below25 = float(len([APE < 25])/expected_no*100)
|
2315
|
+
resid_below5 = float(len(data_residuals[data_residuals["APE"] < 5])/expected_no*100)
|
2316
|
+
resid_below10 = float(len(data_residuals[data_residuals["APE"] < 10])/expected_no*100)
|
2317
|
+
resid_below15 = float(len(data_residuals[data_residuals["APE"] < 15])/expected_no*100)
|
2318
|
+
resid_below20 = float(len(data_residuals[data_residuals["APE"] < 20])/expected_no*100)
|
2319
|
+
resid_below25 = float(len(data_residuals[data_residuals["APE"] < 25])/expected_no*100)
|
2320
|
+
resid_below30 = float(len(data_residuals[data_residuals["APE"] < 30])/expected_no*100)
|
2321
|
+
resid_below35 = float(len(data_residuals[data_residuals["APE"] < 35])/expected_no*100)
|
2322
|
+
resid_below40 = float(len(data_residuals[data_residuals["APE"] < 40])/expected_no*100)
|
2323
|
+
resid_below45 = float(len(data_residuals[data_residuals["APE"] < 45])/expected_no*100)
|
2324
|
+
resid_below50 = float(len(data_residuals[data_residuals["APE"] < 50])/expected_no*100)
|
1797
2325
|
|
1798
2326
|
data_lossfunctions = {
|
1799
2327
|
"SQR": SQR,
|
@@ -1809,7 +2337,12 @@ def modelfit(
|
|
1809
2337
|
"resid_below10": resid_below10,
|
1810
2338
|
"resid_below15": resid_below15,
|
1811
2339
|
"resid_below20": resid_below20,
|
1812
|
-
"resid_below25": resid_below25
|
2340
|
+
"resid_below25": resid_below25,
|
2341
|
+
"resid_below30": resid_below30,
|
2342
|
+
"resid_below35": resid_below35,
|
2343
|
+
"resid_below40": resid_below40,
|
2344
|
+
"resid_below45": resid_below45,
|
2345
|
+
"resid_below50": resid_below50,
|
1813
2346
|
}
|
1814
2347
|
}
|
1815
2348
|
|
@@ -1820,6 +2353,20 @@ def modelfit(
|
|
1820
2353
|
|
1821
2354
|
return modelfit_results
|
1822
2355
|
|
2356
|
+
def loglik(
|
2357
|
+
observed,
|
2358
|
+
expected
|
2359
|
+
):
|
2360
|
+
|
2361
|
+
model_fit = modelfit(
|
2362
|
+
observed,
|
2363
|
+
expected
|
2364
|
+
)
|
2365
|
+
residuals_sq = model_fit[0]["residuals_sq"]
|
2366
|
+
|
2367
|
+
LL = np.sum(np.log(residuals_sq))
|
2368
|
+
|
2369
|
+
return -LL
|
1823
2370
|
|
1824
2371
|
def check_vars(
|
1825
2372
|
df: pd.DataFrame,
|