huff 1.4.1__py3-none-any.whl → 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- huff/models.py +1020 -163
- huff/tests/data/Wieland2015.xlsx +0 -0
- huff/tests/tests_huff.py +142 -24
- {huff-1.4.1.dist-info → huff-1.5.1.dist-info}/METADATA +13 -3
- {huff-1.4.1.dist-info → huff-1.5.1.dist-info}/RECORD +7 -7
- {huff-1.4.1.dist-info → huff-1.5.1.dist-info}/WHEEL +0 -0
- {huff-1.4.1.dist-info → huff-1.5.1.dist-info}/top_level.txt +0 -0
huff/models.py
CHANGED
@@ -4,8 +4,8 @@
|
|
4
4
|
# Author: Thomas Wieland
|
5
5
|
# ORCID: 0000-0001-5168-9846
|
6
6
|
# mail: geowieland@googlemail.com
|
7
|
-
# Version: 1.
|
8
|
-
# Last update: 2025-
|
7
|
+
# Version: 1.5.1
|
8
|
+
# Last update: 2025-07-01 17:10
|
9
9
|
# Copyright (c) 2025 Thomas Wieland
|
10
10
|
#-----------------------------------------------------------------------
|
11
11
|
|
@@ -119,6 +119,24 @@ class CustomerOrigins:
|
|
119
119
|
param_lambda = -2
|
120
120
|
):
|
121
121
|
|
122
|
+
"""
|
123
|
+
metadata["weighting"] = {
|
124
|
+
0: {
|
125
|
+
"name": "t_ij",
|
126
|
+
"func": "power",
|
127
|
+
"param": -2
|
128
|
+
}
|
129
|
+
}
|
130
|
+
|
131
|
+
metadata["weighting"] = {
|
132
|
+
0: {
|
133
|
+
"name": "t_ij",
|
134
|
+
"func": "logistic",
|
135
|
+
"param": [10, -0.5]
|
136
|
+
}
|
137
|
+
}
|
138
|
+
"""
|
139
|
+
|
122
140
|
metadata = self.metadata
|
123
141
|
|
124
142
|
if func not in ["power", "exponential", "logistic"]:
|
@@ -130,6 +148,7 @@ class CustomerOrigins:
|
|
130
148
|
if isinstance(param_lambda, (int, float)) and func == "logistic":
|
131
149
|
raise ValueError("Function type "+ func + " requires two parameters in a list")
|
132
150
|
|
151
|
+
metadata["weighting"][0]["name"] = "t_ij"
|
133
152
|
metadata["weighting"][0]["func"] = func
|
134
153
|
|
135
154
|
if isinstance(param_lambda, list):
|
@@ -299,8 +318,10 @@ class SupplyLocations:
|
|
299
318
|
if metadata["attraction_col"] is None:
|
300
319
|
raise ValueError ("Attraction column is not yet defined. Use SupplyLocations.define_attraction()")
|
301
320
|
|
321
|
+
metadata["weighting"][0]["name"] = "A_j"
|
302
322
|
metadata["weighting"][0]["func"] = func
|
303
323
|
metadata["weighting"][0]["param"] = float(param_gamma)
|
324
|
+
|
304
325
|
self.metadata = metadata
|
305
326
|
|
306
327
|
return self
|
@@ -323,6 +344,7 @@ class SupplyLocations:
|
|
323
344
|
metadata["attraction_col"] = metadata["attraction_col"] + [var]
|
324
345
|
|
325
346
|
metadata["weighting"][new_key] = {
|
347
|
+
"name": var,
|
326
348
|
"func": func,
|
327
349
|
"param": param
|
328
350
|
}
|
@@ -490,34 +512,84 @@ class InteractionMatrix:
|
|
490
512
|
else:
|
491
513
|
print("Market size column " + customer_origins_metadata["marketsize_col"])
|
492
514
|
|
493
|
-
if interaction_matrix_metadata != {}:
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
print("Transport cost unit " + interaction_matrix_metadata["transport_costs"]["distance_unit"])
|
515
|
+
if interaction_matrix_metadata != {} and "transport_costs" in interaction_matrix_metadata:
|
516
|
+
print("----------------------------------")
|
517
|
+
if interaction_matrix_metadata["transport_costs"]["network"]:
|
518
|
+
print("Transport cost type Time")
|
519
|
+
print("Transport cost unit " + interaction_matrix_metadata["transport_costs"]["time_unit"])
|
520
|
+
else:
|
521
|
+
print("Transport cost type Distance")
|
522
|
+
print("Transport cost unit " + interaction_matrix_metadata["transport_costs"]["distance_unit"])
|
502
523
|
|
503
524
|
print("----------------------------------")
|
504
525
|
print("Partial utilities")
|
505
526
|
print(" Weights")
|
506
|
-
|
527
|
+
|
507
528
|
if supply_locations_metadata["weighting"][0]["func"] is None and supply_locations_metadata["weighting"][0]["param"] is None:
|
508
529
|
print("Attraction not defined")
|
509
530
|
else:
|
510
|
-
|
531
|
+
if supply_locations_metadata["weighting"][0]["param"] is not None:
|
532
|
+
print("Attraction " + str(round(supply_locations_metadata["weighting"][0]["param"],3)) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
|
533
|
+
else:
|
534
|
+
print("Attraction NA" + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
|
511
535
|
|
512
536
|
if customer_origins_metadata["weighting"][0]["func"] is None and customer_origins_metadata["weighting"][0]["param"] is None:
|
513
537
|
print("Transport costs not defined")
|
514
538
|
elif customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
|
515
|
-
|
539
|
+
if customer_origins_metadata["weighting"][0]["param"] is not None:
|
540
|
+
print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
541
|
+
else:
|
542
|
+
print("Transport costs NA" + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
516
543
|
elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
517
|
-
|
544
|
+
if customer_origins_metadata["weighting"][0]["param"] is not None:
|
545
|
+
print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"][0],3)) + ", " + str(round(customer_origins_metadata["weighting"][0]["param"][1],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
546
|
+
else:
|
547
|
+
print("Transport costs NA" + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
548
|
+
|
549
|
+
attrac_vars = supply_locations_metadata["attraction_col"]
|
550
|
+
attrac_vars_no = len(attrac_vars)
|
551
|
+
|
552
|
+
if attrac_vars_no > 1:
|
553
|
+
|
554
|
+
for key, attrac_var in enumerate(attrac_vars):
|
555
|
+
|
556
|
+
if key == 0:
|
557
|
+
continue
|
558
|
+
|
559
|
+
if key not in supply_locations_metadata["weighting"].keys():
|
560
|
+
|
561
|
+
print(f"{attrac_vars[key][:16]:16} not defined")
|
562
|
+
|
563
|
+
else:
|
564
|
+
|
565
|
+
if supply_locations_metadata["weighting"][key]["func"] is None and supply_locations_metadata["weighting"][key]["param"]:
|
566
|
+
|
567
|
+
print(f"{attrac_vars[key][:16]:16} not defined")
|
568
|
+
|
569
|
+
else:
|
570
|
+
|
571
|
+
if supply_locations_metadata["weighting"][key]["param"] is not None:
|
572
|
+
|
573
|
+
name = supply_locations_metadata["weighting"][key]["name"]
|
574
|
+
param = supply_locations_metadata["weighting"][key]["param"]
|
575
|
+
func = supply_locations_metadata["weighting"][key]["func"]
|
576
|
+
|
577
|
+
print(f"{name[:16]:16} {round(param, 3)} ({func})")
|
578
|
+
|
579
|
+
else:
|
580
|
+
|
581
|
+
print(f"{attrac_vars[key][:16]:16} NA" + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
|
582
|
+
|
518
583
|
|
519
584
|
print("----------------------------------")
|
520
585
|
|
586
|
+
if interaction_matrix_metadata != {} and "fit" in interaction_matrix_metadata and interaction_matrix_metadata["fit"]["function"] is not None:
|
587
|
+
print("Parameter estimation")
|
588
|
+
print("Fit function " + interaction_matrix_metadata["fit"]["function"])
|
589
|
+
print("Fit by " + interaction_matrix_metadata["fit"]["fit_by"])
|
590
|
+
if interaction_matrix_metadata["fit"]["function"] == "huff_ml_fit":
|
591
|
+
print("Fit method " + interaction_matrix_metadata["fit"]["method"] + " (Converged: " + str(interaction_matrix_metadata["fit"]["minimize_success"]) + ")")
|
592
|
+
|
521
593
|
def transport_costs(
|
522
594
|
self,
|
523
595
|
network: bool = True,
|
@@ -630,12 +702,80 @@ class InteractionMatrix:
|
|
630
702
|
|
631
703
|
return self
|
632
704
|
|
705
|
+
def define_weightings(
|
706
|
+
self,
|
707
|
+
vars_funcs: dict
|
708
|
+
):
|
709
|
+
|
710
|
+
"""
|
711
|
+
vars_funcs = {
|
712
|
+
0: {
|
713
|
+
"name": "A_j",
|
714
|
+
"func": "power",
|
715
|
+
"param": 1
|
716
|
+
},
|
717
|
+
1: {
|
718
|
+
"name": "t_ij",
|
719
|
+
"func": "logistic"
|
720
|
+
},
|
721
|
+
2: {
|
722
|
+
"name": "second_attraction_variable",
|
723
|
+
"func": "power"
|
724
|
+
},
|
725
|
+
3: {
|
726
|
+
"name": "third_attraction_variable",
|
727
|
+
"func": "exponential"
|
728
|
+
},
|
729
|
+
...
|
730
|
+
}
|
731
|
+
"""
|
732
|
+
|
733
|
+
supply_locations_metadata = self.supply_locations.metadata
|
734
|
+
customer_origins_metadata = self.customer_origins.metadata
|
735
|
+
|
736
|
+
supply_locations_metadata["weighting"][0]["name"] = vars_funcs[0]["name"]
|
737
|
+
supply_locations_metadata["weighting"][0]["func"] = vars_funcs[0]["func"]
|
738
|
+
if "param" in vars_funcs[0]:
|
739
|
+
supply_locations_metadata["weighting"][0]["param"] = vars_funcs[0]["param"]
|
740
|
+
|
741
|
+
customer_origins_metadata["weighting"][0]["name"] = vars_funcs[1]["name"]
|
742
|
+
customer_origins_metadata["weighting"][0]["func"] = vars_funcs[1]["func"]
|
743
|
+
if "param" in vars_funcs[1]:
|
744
|
+
customer_origins_metadata["weighting"][0]["param"] = vars_funcs[1]["param"]
|
745
|
+
|
746
|
+
if len(vars_funcs) > 2:
|
747
|
+
|
748
|
+
for key, var in vars_funcs.items():
|
749
|
+
|
750
|
+
if key < 2:
|
751
|
+
continue
|
752
|
+
|
753
|
+
if key not in supply_locations_metadata["weighting"]:
|
754
|
+
supply_locations_metadata["weighting"][key-1] = {
|
755
|
+
"name": "attrac"+str(key),
|
756
|
+
"func": "power",
|
757
|
+
"param": None
|
758
|
+
}
|
759
|
+
|
760
|
+
supply_locations_metadata["weighting"][key-1]["name"] = var["name"]
|
761
|
+
supply_locations_metadata["weighting"][key-1]["func"] = var["func"]
|
762
|
+
|
763
|
+
if "param" in var:
|
764
|
+
supply_locations_metadata["weighting"][key-1]["param"] = var["param"]
|
765
|
+
|
766
|
+
self.supply_locations.metadata = supply_locations_metadata
|
767
|
+
self.customer_origins.metadata = customer_origins_metadata
|
768
|
+
|
633
769
|
def utility(self):
|
634
770
|
|
635
771
|
interaction_matrix_df = self.interaction_matrix_df
|
636
772
|
|
637
773
|
interaction_matrix_metadata = self.get_metadata()
|
638
774
|
|
775
|
+
if "t_ij" not in interaction_matrix_df.columns:
|
776
|
+
raise ValueError ("No transport cost variable in interaction matrix")
|
777
|
+
if "A_j" not in interaction_matrix_df.columns:
|
778
|
+
raise ValueError ("No attraction variable in interaction matrix")
|
639
779
|
if interaction_matrix_df["t_ij"].isna().all():
|
640
780
|
raise ValueError ("Transport cost variable is not defined")
|
641
781
|
if interaction_matrix_df["A_j"].isna().all():
|
@@ -670,6 +810,33 @@ class InteractionMatrix:
|
|
670
810
|
else:
|
671
811
|
raise ValueError ("Attraction weighting is not defined.")
|
672
812
|
|
813
|
+
attrac_vars = supply_locations_metadata["attraction_col"]
|
814
|
+
attrac_vars_no = len(attrac_vars)
|
815
|
+
attrac_var_key = 0
|
816
|
+
|
817
|
+
if attrac_vars_no > 1:
|
818
|
+
|
819
|
+
for key, attrac_var in enumerate(attrac_vars):
|
820
|
+
|
821
|
+
attrac_var_key = key #+1
|
822
|
+
if attrac_var_key == 0: #1:
|
823
|
+
continue
|
824
|
+
|
825
|
+
name = supply_locations_metadata["weighting"][attrac_var_key]["name"]
|
826
|
+
param = supply_locations_metadata["weighting"][attrac_var_key]["param"]
|
827
|
+
func = supply_locations_metadata["weighting"][attrac_var_key]["func"]
|
828
|
+
|
829
|
+
if func == "power":
|
830
|
+
interaction_matrix_df[name+"_weighted"] = interaction_matrix_df[name] ** param
|
831
|
+
elif func == "exponential":
|
832
|
+
interaction_matrix_df[name+"_weighted"] = np.exp(param * interaction_matrix_df[name])
|
833
|
+
else:
|
834
|
+
raise ValueError ("Weighting for " + name + " is not defined.")
|
835
|
+
|
836
|
+
interaction_matrix_df["A_j_weighted"] = interaction_matrix_df["A_j_weighted"]*interaction_matrix_df[name+"_weighted"]
|
837
|
+
|
838
|
+
interaction_matrix_df = interaction_matrix_df.drop(columns=[name+"_weighted"])
|
839
|
+
|
673
840
|
interaction_matrix_df["U_ij"] = interaction_matrix_df["A_j_weighted"]*interaction_matrix_df["t_ij_weighted"]
|
674
841
|
|
675
842
|
interaction_matrix_df = interaction_matrix_df.drop(columns=["A_j_weighted", "t_ij_weighted"])
|
@@ -687,7 +854,7 @@ class InteractionMatrix:
|
|
687
854
|
|
688
855
|
interaction_matrix_df = self.interaction_matrix_df
|
689
856
|
|
690
|
-
if interaction_matrix_df["U_ij"].isna().all():
|
857
|
+
if "U_ij" not in interaction_matrix_df.columns or interaction_matrix_df["U_ij"].isna().all():
|
691
858
|
self.utility()
|
692
859
|
interaction_matrix_df = self.interaction_matrix_df
|
693
860
|
|
@@ -713,6 +880,8 @@ class InteractionMatrix:
|
|
713
880
|
|
714
881
|
interaction_matrix_df = self.interaction_matrix_df
|
715
882
|
|
883
|
+
if "C_i" not in interaction_matrix_df.columns:
|
884
|
+
raise ValueError ("No market size variable in interaction matrix")
|
716
885
|
if interaction_matrix_df["C_i"].isna().all():
|
717
886
|
raise ValueError ("Market size column in customer origins not defined. Use CustomerOrigins.define_marketsize()")
|
718
887
|
|
@@ -737,7 +906,8 @@ class InteractionMatrix:
|
|
737
906
|
|
738
907
|
check_vars(
|
739
908
|
df = interaction_matrix_df,
|
740
|
-
cols = ["E_ij"]
|
909
|
+
cols = ["E_ij"],
|
910
|
+
check_zero = False
|
741
911
|
)
|
742
912
|
|
743
913
|
market_areas_df = pd.DataFrame(interaction_matrix_df.groupby("j")["E_ij"].sum())
|
@@ -875,6 +1045,15 @@ class InteractionMatrix:
|
|
875
1045
|
|
876
1046
|
customer_origins.metadata = customer_origins_metadata
|
877
1047
|
supply_locations.metadata = supply_locations_metadata
|
1048
|
+
|
1049
|
+
interaction_matrix_metadata = {
|
1050
|
+
"fit": {
|
1051
|
+
"function": "mci_fit",
|
1052
|
+
"fit_by": "probabilities",
|
1053
|
+
"method": "OLS"
|
1054
|
+
}
|
1055
|
+
}
|
1056
|
+
|
878
1057
|
interaction_matrix = InteractionMatrix(
|
879
1058
|
interaction_matrix_df,
|
880
1059
|
customer_origins,
|
@@ -891,74 +1070,132 @@ class InteractionMatrix:
|
|
891
1070
|
|
892
1071
|
return mci_model
|
893
1072
|
|
894
|
-
def
|
1073
|
+
def loglik(
|
895
1074
|
self,
|
896
|
-
params
|
1075
|
+
params,
|
1076
|
+
fit_by = "probabilities"
|
897
1077
|
):
|
1078
|
+
|
1079
|
+
if fit_by not in ["probabilities", "flows"]:
|
1080
|
+
raise ValueError ("Parameter 'fit_by' must be 'probabilities' or 'flows'")
|
898
1081
|
|
899
1082
|
if not isinstance(params, list):
|
900
1083
|
if isinstance(params, np.ndarray):
|
901
1084
|
params = params.tolist()
|
902
1085
|
else:
|
903
|
-
raise ValueError("Parameter 'params' must be a list or np.ndarray with
|
1086
|
+
raise ValueError("Parameter 'params' must be a list or np.ndarray with at least 2 parameter values")
|
904
1087
|
|
905
|
-
if len(params)
|
906
|
-
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
|
1088
|
+
if len(params) < 2:
|
1089
|
+
raise ValueError("Parameter 'params' must be a list or np.ndarray with at least 2 parameter values")
|
1090
|
+
|
1091
|
+
customer_origins = self.customer_origins
|
1092
|
+
customer_origins_metadata = customer_origins.get_metadata()
|
1093
|
+
|
1094
|
+
param_gamma, param_lambda = params[0], params[1]
|
1095
|
+
|
1096
|
+
if customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
1097
|
+
|
1098
|
+
if len(params) < 3:
|
1099
|
+
raise ValueError("When using logistic weighting, parameter 'params' must be a list or np.ndarray with at least 3 parameter values")
|
1100
|
+
|
1101
|
+
param_gamma, param_lambda, param_lambda2 = params[0], params[1], params[2]
|
911
1102
|
|
912
1103
|
interaction_matrix_df = self.interaction_matrix_df
|
913
1104
|
|
914
1105
|
supply_locations = self.supply_locations
|
915
|
-
supply_locations_metadata = supply_locations.get_metadata()
|
916
|
-
|
917
|
-
customer_origins = self.customer_origins
|
918
|
-
customer_origins_metadata = customer_origins.get_metadata()
|
1106
|
+
supply_locations_metadata = supply_locations.get_metadata()
|
919
1107
|
|
920
1108
|
supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
|
921
1109
|
supply_locations.metadata = supply_locations_metadata
|
922
1110
|
|
923
1111
|
if customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
|
924
1112
|
|
925
|
-
if len(params)
|
1113
|
+
if len(params) >= 2:
|
1114
|
+
|
926
1115
|
customer_origins_metadata["weighting"][0]["param"] = float(param_lambda)
|
1116
|
+
|
927
1117
|
else:
|
928
|
-
|
1118
|
+
|
1119
|
+
raise ValueError ("Huff Model with transport cost weighting of type " + customer_origins_metadata["weighting"][0]["func"] + " must have >= 2 input parameters")
|
929
1120
|
|
930
1121
|
elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
931
1122
|
|
932
|
-
if len(params)
|
1123
|
+
if len(params) >= 3:
|
1124
|
+
|
933
1125
|
customer_origins_metadata["weighting"][0]["param"] = [float(param_lambda), float(param_lambda2)]
|
1126
|
+
|
934
1127
|
else:
|
935
|
-
|
936
|
-
|
1128
|
+
|
1129
|
+
raise ValueError("Huff Model with transport cost weightig of type " + customer_origins_metadata["weighting"][0]["func"] + " must have >= 3 input parameters")
|
1130
|
+
|
1131
|
+
if (customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"] and len(params) > 2):
|
1132
|
+
|
1133
|
+
for key, param in enumerate(params):
|
1134
|
+
|
1135
|
+
if key <= 1:
|
1136
|
+
continue
|
1137
|
+
|
1138
|
+
supply_locations_metadata["weighting"][key-1]["param"] = float(param)
|
1139
|
+
|
1140
|
+
if (customer_origins_metadata["weighting"][0]["func"] == "logistic" and len(params) > 3):
|
1141
|
+
|
1142
|
+
for key, param in enumerate(params):
|
1143
|
+
|
1144
|
+
if key <= 2:
|
1145
|
+
continue
|
1146
|
+
|
1147
|
+
supply_locations_metadata["weighting"][key-2]["param"] = float(param)
|
1148
|
+
|
937
1149
|
customer_origins.metadata = customer_origins_metadata
|
938
|
-
|
939
|
-
p_ij_emp
|
1150
|
+
|
1151
|
+
if "p_ij_emp" not in interaction_matrix_df.columns:
|
1152
|
+
p_ij_emp = interaction_matrix_df["p_ij"]
|
1153
|
+
else:
|
1154
|
+
p_ij_emp = interaction_matrix_df["p_ij_emp"]
|
1155
|
+
|
1156
|
+
if "E_ij_emp" not in interaction_matrix_df.columns:
|
1157
|
+
E_ij_emp = interaction_matrix_df["E_ij"]
|
1158
|
+
else:
|
1159
|
+
E_ij_emp = interaction_matrix_df["E_ij_emp"]
|
940
1160
|
|
941
1161
|
interaction_matrix_copy = copy.deepcopy(self)
|
942
1162
|
|
943
1163
|
interaction_matrix_copy.utility()
|
944
1164
|
interaction_matrix_copy.probabilities()
|
1165
|
+
interaction_matrix_copy.flows()
|
945
1166
|
|
946
1167
|
interaction_matrix_df_copy = interaction_matrix_copy.get_interaction_matrix_df()
|
947
|
-
p_ij = interaction_matrix_df_copy["p_ij"]
|
948
1168
|
|
949
|
-
|
950
|
-
|
1169
|
+
if fit_by == "flows":
|
1170
|
+
|
1171
|
+
E_ij = interaction_matrix_df_copy["E_ij"]
|
1172
|
+
|
1173
|
+
observed = E_ij_emp
|
1174
|
+
expected = E_ij
|
1175
|
+
|
1176
|
+
else:
|
1177
|
+
|
1178
|
+
p_ij = interaction_matrix_df_copy["p_ij"]
|
1179
|
+
|
1180
|
+
observed = p_ij_emp
|
951
1181
|
expected = p_ij
|
952
|
-
)
|
953
1182
|
|
1183
|
+
modelfit_metrics = modelfit(
|
1184
|
+
observed = observed,
|
1185
|
+
expected = expected
|
1186
|
+
)
|
1187
|
+
|
1188
|
+
LL = modelfit_metrics[1]["LL"]
|
1189
|
+
|
954
1190
|
return -LL
|
955
1191
|
|
956
|
-
def
|
1192
|
+
def huff_ml_fit(
|
957
1193
|
self,
|
958
1194
|
initial_params: list = [1.0, -2.0],
|
959
1195
|
method: str = "L-BFGS-B",
|
960
1196
|
bounds: list = [(0.5, 1), (-3, -1)],
|
961
1197
|
constraints: list = [],
|
1198
|
+
fit_by = "probabilities",
|
962
1199
|
update_estimates: bool = True
|
963
1200
|
):
|
964
1201
|
|
@@ -967,41 +1204,62 @@ class InteractionMatrix:
|
|
967
1204
|
|
968
1205
|
customer_origins = self.customer_origins
|
969
1206
|
customer_origins_metadata = customer_origins.get_metadata()
|
1207
|
+
|
1208
|
+
if customer_origins_metadata["weighting"][0]["param"] is None:
|
1209
|
+
params_metadata_customer_origins = 1
|
1210
|
+
else:
|
1211
|
+
if customer_origins_metadata["weighting"][0]["param"] is not None:
|
1212
|
+
if isinstance(customer_origins_metadata["weighting"][0]["param"], (int, float)):
|
1213
|
+
params_metadata_customer_origins = 1
|
1214
|
+
else:
|
1215
|
+
params_metadata_customer_origins = len(customer_origins_metadata["weighting"][0]["param"])
|
1216
|
+
|
1217
|
+
if customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
1218
|
+
params_metadata_customer_origins = 2
|
1219
|
+
else:
|
1220
|
+
params_metadata_customer_origins = 1
|
1221
|
+
|
1222
|
+
params_metadata_supply_locations = len(supply_locations_metadata["weighting"])
|
970
1223
|
|
971
|
-
|
972
|
-
|
1224
|
+
params_metadata = params_metadata_customer_origins+params_metadata_supply_locations
|
1225
|
+
|
1226
|
+
if len(initial_params) < 2 or len(initial_params) != params_metadata:
|
1227
|
+
raise ValueError("Parameter 'initial_params' must be a list with " + str(params_metadata) + " entries (Attaction: " + str(params_metadata_supply_locations) + ", Transport costs: " + str(params_metadata_customer_origins) + ")")
|
973
1228
|
|
974
1229
|
if len(bounds) != len(initial_params):
|
975
1230
|
raise ValueError("Parameter 'bounds' must have the same length as parameter 'initial_params' (" + str(len(bounds)) + ", " + str(len(initial_params)) + ")")
|
976
|
-
|
1231
|
+
|
977
1232
|
ml_result = minimize(
|
978
|
-
self.
|
1233
|
+
self.loglik,
|
979
1234
|
initial_params,
|
1235
|
+
args=fit_by,
|
980
1236
|
method = method,
|
981
1237
|
bounds = bounds,
|
982
1238
|
constraints = constraints,
|
983
1239
|
options={'disp': 3}
|
984
1240
|
)
|
985
1241
|
|
1242
|
+
attrac_vars = len(supply_locations_metadata["weighting"])
|
1243
|
+
|
986
1244
|
if ml_result.success:
|
987
1245
|
|
988
1246
|
fitted_params = ml_result.x
|
989
1247
|
|
990
|
-
|
1248
|
+
param_gamma = fitted_params[0]
|
1249
|
+
supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
|
991
1250
|
|
992
|
-
|
1251
|
+
if customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
|
1252
|
+
|
993
1253
|
param_lambda = fitted_params[1]
|
994
1254
|
param_results = [
|
995
1255
|
float(param_gamma),
|
996
1256
|
float(param_lambda)
|
997
1257
|
]
|
998
|
-
|
999
|
-
supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
|
1258
|
+
|
1000
1259
|
customer_origins_metadata["weighting"][0]["param"] = float(param_lambda)
|
1001
1260
|
|
1002
|
-
elif
|
1261
|
+
elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
1003
1262
|
|
1004
|
-
param_gamma = fitted_params[0]
|
1005
1263
|
param_lambda = fitted_params[1]
|
1006
1264
|
param_lambda2 = fitted_params[2]
|
1007
1265
|
param_results = [
|
@@ -1010,43 +1268,91 @@ class InteractionMatrix:
|
|
1010
1268
|
float(param_lambda2)
|
1011
1269
|
]
|
1012
1270
|
|
1013
|
-
supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
|
1014
1271
|
customer_origins_metadata["weighting"][0]["param"][0] = float(param_lambda)
|
1015
|
-
customer_origins_metadata["weighting"][0]["param"][1] = float(param_lambda2)
|
1272
|
+
customer_origins_metadata["weighting"][0]["param"][1] = float(param_lambda2)
|
1016
1273
|
|
1274
|
+
if attrac_vars > 1:
|
1275
|
+
|
1276
|
+
if customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
1277
|
+
fitted_params_add = 3
|
1278
|
+
else:
|
1279
|
+
fitted_params_add = 2
|
1280
|
+
|
1281
|
+
for key, var in supply_locations_metadata["weighting"].items():
|
1282
|
+
|
1283
|
+
if key > len(supply_locations_metadata["weighting"])-fitted_params_add:
|
1284
|
+
break
|
1285
|
+
|
1286
|
+
param = float(fitted_params[key+fitted_params_add])
|
1287
|
+
|
1288
|
+
param_results = param_results + [param]
|
1289
|
+
|
1290
|
+
supply_locations_metadata["weighting"][(key+1)]["param"] = float(param)
|
1291
|
+
|
1017
1292
|
print(f"Optimization via {method} algorithm succeeded with parameters: {', '.join(str(round(par, 3)) for par in param_results)}.")
|
1018
1293
|
|
1019
1294
|
else:
|
1020
1295
|
|
1021
|
-
param_gamma = None
|
1022
|
-
param_lambda = None
|
1296
|
+
# param_gamma = None
|
1297
|
+
# param_lambda = None
|
1023
1298
|
|
1024
|
-
supply_locations_metadata["weighting"][0]["param"] = param_gamma
|
1299
|
+
# supply_locations_metadata["weighting"][0]["param"] = param_gamma
|
1025
1300
|
|
1026
|
-
if
|
1301
|
+
# if customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
1027
1302
|
|
1028
|
-
|
1029
|
-
|
1030
|
-
|
1303
|
+
# param_lambda2 = None
|
1304
|
+
# customer_origins_metadata["weighting"][0]["param"][0] = param_lambda
|
1305
|
+
# customer_origins_metadata["weighting"][0]["param"][1] = param_lambda2
|
1031
1306
|
|
1032
|
-
else:
|
1033
|
-
|
1307
|
+
# else:
|
1308
|
+
|
1309
|
+
# customer_origins_metadata["weighting"][0]["param"] = param_lambda
|
1034
1310
|
|
1035
1311
|
print(f"Optimiziation via {method} algorithm failed with error message: '{ml_result.message}'. See https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html for all available algorithms.")
|
1036
1312
|
|
1037
1313
|
self.supply_locations.metadata = supply_locations_metadata
|
1038
|
-
self.customer_origins.metadata = customer_origins_metadata
|
1314
|
+
self.customer_origins.metadata = customer_origins_metadata
|
1039
1315
|
|
1040
|
-
if
|
1041
|
-
|
1042
|
-
|
1043
|
-
|
1044
|
-
|
1045
|
-
|
1316
|
+
if update_estimates:
|
1317
|
+
|
1318
|
+
if "p_ij_emp" not in self.interaction_matrix_df.columns:
|
1319
|
+
self.interaction_matrix_df["p_ij_emp"] = self.interaction_matrix_df["p_ij"]
|
1320
|
+
print("Probabilties in interaction matrix are treated as empirical probabilties")
|
1321
|
+
else:
|
1322
|
+
print("Interaction matrix contains empirical probabilties")
|
1323
|
+
|
1324
|
+
if "E_ij_emp" not in self.interaction_matrix_df.columns:
|
1325
|
+
self.interaction_matrix_df["E_ij_emp"] = self.interaction_matrix_df["E_ij"]
|
1326
|
+
print("Customer interactions in interaction matrix are treated as empirical interactions")
|
1327
|
+
else:
|
1328
|
+
print("Interaction matrix contains empirical customer interactions")
|
1329
|
+
|
1330
|
+
if np.isnan(ml_result.x).any():
|
1331
|
+
|
1332
|
+
print("No update of estimates because fit parameters contain NaN")
|
1333
|
+
|
1334
|
+
update_estimates = False
|
1335
|
+
|
1336
|
+
else:
|
1337
|
+
|
1338
|
+
self = self.utility()
|
1339
|
+
self = self.probabilities()
|
1340
|
+
self = self.flows()
|
1341
|
+
|
1342
|
+
self.metadata["fit"] = {
|
1343
|
+
"function": "huff_ml_fit",
|
1344
|
+
"fit_by": fit_by,
|
1345
|
+
"initial_params": initial_params,
|
1346
|
+
"method": method,
|
1347
|
+
"bounds": bounds,
|
1348
|
+
"constraints": constraints,
|
1349
|
+
"minimize_success": ml_result.success,
|
1350
|
+
"minimize_fittedparams": ml_result.x,
|
1351
|
+
"update_estimates": update_estimates
|
1352
|
+
}
|
1046
1353
|
|
1047
1354
|
return self
|
1048
1355
|
|
1049
|
-
|
1050
1356
|
def update(self):
|
1051
1357
|
|
1052
1358
|
interaction_matrix_df = self.get_interaction_matrix_df()
|
@@ -1135,6 +1441,70 @@ class InteractionMatrix:
|
|
1135
1441
|
|
1136
1442
|
return self
|
1137
1443
|
|
1444
|
+
class MarketAreas:
|
1445
|
+
|
1446
|
+
def __init__(
|
1447
|
+
self,
|
1448
|
+
market_areas_df,
|
1449
|
+
metadata
|
1450
|
+
):
|
1451
|
+
|
1452
|
+
self.market_areas_df = market_areas_df
|
1453
|
+
self.metadata = metadata
|
1454
|
+
|
1455
|
+
def get_market_areas_df(self):
|
1456
|
+
return self.market_areas_df
|
1457
|
+
|
1458
|
+
def get_metadata(self):
|
1459
|
+
return self.metadata
|
1460
|
+
|
1461
|
+
def add_to_model(
|
1462
|
+
self,
|
1463
|
+
model_object,
|
1464
|
+
output_model = "Huff"
|
1465
|
+
):
|
1466
|
+
|
1467
|
+
if not isinstance(model_object, (HuffModel, MCIModel, InteractionMatrix)):
|
1468
|
+
raise ValueError("Parameter 'interaction_matrix' must be of class HuffModel, MCIModel, or InteractionMatrix")
|
1469
|
+
|
1470
|
+
if isinstance(model_object, MCIModel):
|
1471
|
+
|
1472
|
+
model = MCIModel(
|
1473
|
+
interaction_matrix = model_object.interaction_matrix,
|
1474
|
+
coefs = model_object.get_coefs_dict(),
|
1475
|
+
mci_ols_model = model_object.get_mci_ols_model(),
|
1476
|
+
market_areas_df = self.market_areas_df
|
1477
|
+
)
|
1478
|
+
|
1479
|
+
elif isinstance(model_object, HuffModel):
|
1480
|
+
|
1481
|
+
model = HuffModel(
|
1482
|
+
interaction_matrix = model_object.interaction_matrix,
|
1483
|
+
market_areas_df = self.market_areas_df
|
1484
|
+
)
|
1485
|
+
|
1486
|
+
elif isinstance(model_object, InteractionMatrix):
|
1487
|
+
|
1488
|
+
if output_model not in ["Huff", "MCI"]:
|
1489
|
+
raise ValueError("Parameter 'output_model' must be either 'Huff' or 'MCI'")
|
1490
|
+
|
1491
|
+
if output_model == "Huff":
|
1492
|
+
|
1493
|
+
model = HuffModel(
|
1494
|
+
interaction_matrix=model_object,
|
1495
|
+
market_areas_df=self.market_areas_df
|
1496
|
+
)
|
1497
|
+
|
1498
|
+
if output_model == "MCI":
|
1499
|
+
|
1500
|
+
model = MCIModel(
|
1501
|
+
coefs=model_object.coefs,
|
1502
|
+
mci_ols_model=model_object.mci_ols_model,
|
1503
|
+
market_areas_df=self.market_areas_df
|
1504
|
+
)
|
1505
|
+
|
1506
|
+
return model
|
1507
|
+
|
1138
1508
|
class HuffModel:
|
1139
1509
|
|
1140
1510
|
def __init__(
|
@@ -1168,74 +1538,119 @@ class HuffModel:
|
|
1168
1538
|
return customer_origins
|
1169
1539
|
|
1170
1540
|
def get_market_areas_df(self):
|
1541
|
+
|
1171
1542
|
return self.market_areas_df
|
1172
|
-
|
1543
|
+
|
1173
1544
|
def summary(self):
|
1174
1545
|
|
1175
1546
|
interaction_matrix = self.interaction_matrix
|
1176
1547
|
|
1177
1548
|
customer_origins_metadata = interaction_matrix.get_customer_origins().get_metadata()
|
1178
1549
|
supply_locations_metadata = interaction_matrix.get_supply_locations().get_metadata()
|
1550
|
+
interaction_matrix_metadata = interaction_matrix.get_metadata()
|
1179
1551
|
|
1180
1552
|
print("Huff Model")
|
1181
1553
|
print("----------------------------------")
|
1182
|
-
print("Supply locations
|
1554
|
+
print("Supply locations " + str(supply_locations_metadata["no_points"]))
|
1183
1555
|
if supply_locations_metadata["attraction_col"][0] is None:
|
1184
|
-
print("Attraction column
|
1556
|
+
print("Attraction column not defined")
|
1185
1557
|
else:
|
1186
|
-
print("Attraction column
|
1187
|
-
print("Customer origins
|
1558
|
+
print("Attraction column " + supply_locations_metadata["attraction_col"][0])
|
1559
|
+
print("Customer origins " + str(customer_origins_metadata["no_points"]))
|
1188
1560
|
if customer_origins_metadata["marketsize_col"] is None:
|
1189
|
-
print("Market size column
|
1561
|
+
print("Market size column not defined")
|
1190
1562
|
else:
|
1191
|
-
print("Market size column
|
1563
|
+
print("Market size column " + customer_origins_metadata["marketsize_col"])
|
1192
1564
|
print("----------------------------------")
|
1193
1565
|
|
1194
1566
|
print("Partial utilities")
|
1195
|
-
print("
|
1567
|
+
print(" Weights")
|
1196
1568
|
|
1197
1569
|
if supply_locations_metadata["weighting"][0]["func"] is None and supply_locations_metadata["weighting"][0]["param"] is None:
|
1198
|
-
print("Attraction
|
1570
|
+
print("Attraction not defined")
|
1199
1571
|
else:
|
1200
|
-
|
1572
|
+
if supply_locations_metadata["weighting"][0]["param"] is not None:
|
1573
|
+
print("Attraction " + str(round(supply_locations_metadata["weighting"][0]["param"],3)) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
|
1574
|
+
else:
|
1575
|
+
print("Attraction NA" + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
|
1201
1576
|
|
1202
1577
|
if customer_origins_metadata["weighting"][0]["func"] is None and customer_origins_metadata["weighting"][0]["param"] is None:
|
1203
|
-
print("Transport costs
|
1578
|
+
print("Transport costs not defined")
|
1204
1579
|
elif customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
|
1205
|
-
|
1580
|
+
if customer_origins_metadata["weighting"][0]["param"] is not None:
|
1581
|
+
print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
1582
|
+
else:
|
1583
|
+
print("Transport costs NA" + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
1206
1584
|
elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
1207
|
-
|
1585
|
+
if customer_origins_metadata["weighting"][0]["param"] is not None:
|
1586
|
+
print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"][0],3)) + ", " + str(round(customer_origins_metadata["weighting"][0]["param"][1],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
1587
|
+
else:
|
1588
|
+
print("Transport costs NA" + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
1208
1589
|
|
1209
|
-
|
1590
|
+
attrac_vars = supply_locations_metadata["attraction_col"]
|
1591
|
+
attrac_vars_no = len(attrac_vars)
|
1210
1592
|
|
1211
|
-
|
1212
|
-
|
1213
|
-
|
1214
|
-
|
1593
|
+
if attrac_vars_no > 1:
|
1594
|
+
|
1595
|
+
for key, attrac_var in enumerate(attrac_vars):
|
1596
|
+
|
1597
|
+
if key == 0:
|
1598
|
+
continue
|
1599
|
+
|
1600
|
+
if key not in supply_locations_metadata["weighting"].keys():
|
1215
1601
|
|
1216
|
-
|
1217
|
-
|
1218
|
-
|
1219
|
-
|
1220
|
-
|
1221
|
-
|
1222
|
-
|
1223
|
-
|
1602
|
+
print(f"{attrac_vars[key][:16]:16} not defined")
|
1603
|
+
|
1604
|
+
else:
|
1605
|
+
|
1606
|
+
name = supply_locations_metadata["weighting"][key]["name"]
|
1607
|
+
param = supply_locations_metadata["weighting"][key]["param"]
|
1608
|
+
func = supply_locations_metadata["weighting"][key]["func"]
|
1609
|
+
|
1610
|
+
print(f"{name[:16]:16} {round(param, 3)} ({func})")
|
1611
|
+
|
1612
|
+
print("----------------------------------")
|
1613
|
+
|
1614
|
+
if interaction_matrix_metadata != {} and "fit" in interaction_matrix_metadata and interaction_matrix_metadata["fit"]["function"] is not None:
|
1615
|
+
print("Parameter estimation")
|
1616
|
+
print("Fit function " + interaction_matrix_metadata["fit"]["function"])
|
1617
|
+
print("Fit by " + interaction_matrix_metadata["fit"]["fit_by"])
|
1618
|
+
if interaction_matrix_metadata["fit"]["function"] == "huff_ml_fit":
|
1619
|
+
print("Fit method " + interaction_matrix_metadata["fit"]["method"] + " (Converged: " + str(interaction_matrix_metadata["fit"]["minimize_success"]) + ")")
|
1620
|
+
|
1621
|
+
huff_modelfit = self.modelfit(by = interaction_matrix_metadata["fit"]["fit_by"])
|
1224
1622
|
|
1225
|
-
|
1226
|
-
|
1227
|
-
|
1228
|
-
|
1229
|
-
|
1230
|
-
|
1231
|
-
]
|
1232
|
-
|
1233
|
-
|
1234
|
-
|
1235
|
-
|
1236
|
-
|
1237
|
-
|
1238
|
-
|
1623
|
+
if huff_modelfit is not None:
|
1624
|
+
|
1625
|
+
print ("Goodness-of-fit for " + interaction_matrix_metadata["fit"]["fit_by"])
|
1626
|
+
|
1627
|
+
print("Sum of squared residuals ", round(huff_modelfit[1]["SQR"], 2))
|
1628
|
+
print("Sum of squares ", round(huff_modelfit[1]["SQT"], 2))
|
1629
|
+
print("R-squared ", round(huff_modelfit[1]["Rsq"], 2))
|
1630
|
+
print("Mean squared error ", round(huff_modelfit[1]["MSE"], 2))
|
1631
|
+
print("Root mean squared error ", round(huff_modelfit[1]["RMSE"], 2))
|
1632
|
+
print("Mean absolute error ", round(huff_modelfit[1]["MAE"], 2))
|
1633
|
+
if huff_modelfit[1]["MAPE"] is not None:
|
1634
|
+
print("Mean absolute percentage error ", round(huff_modelfit[1]["MAPE"], 2))
|
1635
|
+
else:
|
1636
|
+
print("Mean absolute percentage error Not calculated")
|
1637
|
+
print("Symmetric MAPE ", round(huff_modelfit[1]["sMAPE"], 2))
|
1638
|
+
print("Absolute percentage errors")
|
1639
|
+
|
1640
|
+
APE_list = [
|
1641
|
+
["< 5 % ", round(huff_modelfit[1]["APE"]["resid_below5"], 2), " < 30 % ", round(huff_modelfit[1]["APE"]["resid_below30"], 2)],
|
1642
|
+
["< 10 % ", round(huff_modelfit[1]["APE"]["resid_below10"], 2), " < 35 % ", round(huff_modelfit[1]["APE"]["resid_below35"], 2)],
|
1643
|
+
["< 15 % ", round(huff_modelfit[1]["APE"]["resid_below15"], 2), " < 40 % ", round(huff_modelfit[1]["APE"]["resid_below40"], 2)],
|
1644
|
+
["< 20 % ", round(huff_modelfit[1]["APE"]["resid_below20"], 2), " < 45 % ", round(huff_modelfit[1]["APE"]["resid_below45"], 2)],
|
1645
|
+
["< 25% ", round(huff_modelfit[1]["APE"]["resid_below25"], 2), " < 50 % ", round(huff_modelfit[1]["APE"]["resid_below50"], 2)]
|
1646
|
+
]
|
1647
|
+
APE_df = pd.DataFrame(
|
1648
|
+
APE_list,
|
1649
|
+
columns=["Resid.", "%", "Resid.", "%"]
|
1650
|
+
)
|
1651
|
+
print(APE_df.to_string(index=False))
|
1652
|
+
|
1653
|
+
print("----------------------------------")
|
1239
1654
|
|
1240
1655
|
def mci_fit(
|
1241
1656
|
self,
|
@@ -1302,6 +1717,15 @@ class HuffModel:
|
|
1302
1717
|
|
1303
1718
|
customer_origins.metadata = customer_origins_metadata
|
1304
1719
|
supply_locations.metadata = supply_locations_metadata
|
1720
|
+
|
1721
|
+
interaction_matrix_metadata = {
|
1722
|
+
"fit": {
|
1723
|
+
"function": "mci_fit",
|
1724
|
+
"fit_by": "probabilities",
|
1725
|
+
"method": "OLS"
|
1726
|
+
}
|
1727
|
+
}
|
1728
|
+
|
1305
1729
|
interaction_matrix = InteractionMatrix(
|
1306
1730
|
interaction_matrix_df,
|
1307
1731
|
customer_origins,
|
@@ -1318,6 +1742,275 @@ class HuffModel:
|
|
1318
1742
|
|
1319
1743
|
return mci_model
|
1320
1744
|
|
1745
|
+
def loglik(
|
1746
|
+
self,
|
1747
|
+
params
|
1748
|
+
):
|
1749
|
+
|
1750
|
+
if not isinstance(params, list):
|
1751
|
+
if isinstance(params, np.ndarray):
|
1752
|
+
params = params.tolist()
|
1753
|
+
else:
|
1754
|
+
raise ValueError("Parameter 'params' must be a list or np.ndarray with at least 2 parameter values")
|
1755
|
+
|
1756
|
+
if len(params) < 2:
|
1757
|
+
raise ValueError("Parameter 'params' must be a list or np.ndarray with at least 2 parameter values")
|
1758
|
+
|
1759
|
+
market_areas_df = self.market_areas_df
|
1760
|
+
|
1761
|
+
customer_origins = self.interaction_matrix.customer_origins
|
1762
|
+
customer_origins_metadata = customer_origins.get_metadata()
|
1763
|
+
|
1764
|
+
param_gamma, param_lambda = params[0], params[1]
|
1765
|
+
|
1766
|
+
if customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
1767
|
+
|
1768
|
+
if len(params) < 3:
|
1769
|
+
raise ValueError("When using logistic weighting, parameter 'params' must be a list or np.ndarray with at least 3 parameter values")
|
1770
|
+
|
1771
|
+
param_gamma, param_lambda, param_lambda2 = params[0], params[1], params[2]
|
1772
|
+
|
1773
|
+
supply_locations = self.interaction_matrix.supply_locations
|
1774
|
+
supply_locations_metadata = supply_locations.get_metadata()
|
1775
|
+
|
1776
|
+
supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
|
1777
|
+
supply_locations.metadata = supply_locations_metadata
|
1778
|
+
|
1779
|
+
if customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
|
1780
|
+
|
1781
|
+
if len(params) >= 2:
|
1782
|
+
|
1783
|
+
customer_origins_metadata["weighting"][0]["param"] = float(param_lambda)
|
1784
|
+
|
1785
|
+
else:
|
1786
|
+
|
1787
|
+
raise ValueError ("Huff Model with transport cost weighting of type " + customer_origins_metadata["weighting"][0]["func"] + " must have >= 2 input parameters")
|
1788
|
+
|
1789
|
+
elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
1790
|
+
|
1791
|
+
if len(params) >= 3:
|
1792
|
+
|
1793
|
+
customer_origins_metadata["weighting"][0]["param"] = [float(param_lambda), float(param_lambda2)]
|
1794
|
+
|
1795
|
+
else:
|
1796
|
+
|
1797
|
+
raise ValueError("Huff Model with transport cost weightig of type " + customer_origins_metadata["weighting"][0]["func"] + " must have >= 3 input parameters")
|
1798
|
+
|
1799
|
+
if (customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"] and len(params) > 2):
|
1800
|
+
|
1801
|
+
for key, param in enumerate(params):
|
1802
|
+
|
1803
|
+
if key <= 1:
|
1804
|
+
continue
|
1805
|
+
|
1806
|
+
supply_locations_metadata["weighting"][key-1]["param"] = float(param)
|
1807
|
+
|
1808
|
+
if (customer_origins_metadata["weighting"][0]["func"] == "logistic" and len(params) > 3):
|
1809
|
+
|
1810
|
+
for key, param in enumerate(params):
|
1811
|
+
|
1812
|
+
if key <= 2:
|
1813
|
+
continue
|
1814
|
+
|
1815
|
+
supply_locations_metadata["weighting"][key-2]["param"] = float(param)
|
1816
|
+
|
1817
|
+
customer_origins.metadata = customer_origins_metadata
|
1818
|
+
|
1819
|
+
if "T_j_emp" not in market_areas_df.columns:
|
1820
|
+
T_j_emp = market_areas_df["T_j"]
|
1821
|
+
else:
|
1822
|
+
T_j_emp = market_areas_df["T_j_emp"]
|
1823
|
+
|
1824
|
+
|
1825
|
+
huff_model_copy = copy.deepcopy(self)
|
1826
|
+
|
1827
|
+
interaction_matrix_copy = copy.deepcopy(huff_model_copy.interaction_matrix)
|
1828
|
+
|
1829
|
+
interaction_matrix_copy = interaction_matrix_copy.utility()
|
1830
|
+
interaction_matrix_copy = interaction_matrix_copy.probabilities()
|
1831
|
+
interaction_matrix_copy = interaction_matrix_copy.flows()
|
1832
|
+
|
1833
|
+
huff_model_copy = interaction_matrix_copy.marketareas()
|
1834
|
+
|
1835
|
+
market_areas_df_copy = huff_model_copy.market_areas_df
|
1836
|
+
|
1837
|
+
observed = T_j_emp
|
1838
|
+
expected = market_areas_df_copy["T_j"]
|
1839
|
+
|
1840
|
+
modelfit_metrics = modelfit(
|
1841
|
+
observed = observed,
|
1842
|
+
expected = expected
|
1843
|
+
)
|
1844
|
+
|
1845
|
+
LL = modelfit_metrics[1]["LL"]
|
1846
|
+
|
1847
|
+
return -LL
|
1848
|
+
|
1849
|
+
def ml_fit(
|
1850
|
+
self,
|
1851
|
+
initial_params: list = [1.0, -2.0],
|
1852
|
+
method: str = "L-BFGS-B",
|
1853
|
+
bounds: list = [(0.5, 1), (-3, -1)],
|
1854
|
+
constraints: list = [],
|
1855
|
+
fit_by = "probabilities",
|
1856
|
+
update_estimates: bool = True
|
1857
|
+
):
|
1858
|
+
|
1859
|
+
if fit_by in ["probabilities", "flows"]:
|
1860
|
+
|
1861
|
+
self.interaction_matrix.huff_ml_fit(
|
1862
|
+
initial_params = initial_params,
|
1863
|
+
method = method,
|
1864
|
+
bounds = bounds,
|
1865
|
+
constraints = constraints,
|
1866
|
+
fit_by = fit_by,
|
1867
|
+
update_estimates = update_estimates
|
1868
|
+
)
|
1869
|
+
|
1870
|
+
elif fit_by == "totals":
|
1871
|
+
|
1872
|
+
supply_locations = self.interaction_matrix.supply_locations
|
1873
|
+
supply_locations_metadata = supply_locations.get_metadata()
|
1874
|
+
|
1875
|
+
customer_origins = self.interaction_matrix.customer_origins
|
1876
|
+
customer_origins_metadata = customer_origins.get_metadata()
|
1877
|
+
|
1878
|
+
if customer_origins_metadata["weighting"][0]["param"] is None:
|
1879
|
+
params_metadata_customer_origins = 1
|
1880
|
+
else:
|
1881
|
+
if customer_origins_metadata["weighting"][0]["param"] is not None:
|
1882
|
+
if isinstance(customer_origins_metadata["weighting"][0]["param"], (int, float)):
|
1883
|
+
params_metadata_customer_origins = 1
|
1884
|
+
else:
|
1885
|
+
params_metadata_customer_origins = len(customer_origins_metadata["weighting"][0]["param"])
|
1886
|
+
|
1887
|
+
if customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
1888
|
+
params_metadata_customer_origins = 2
|
1889
|
+
else:
|
1890
|
+
params_metadata_customer_origins = 1
|
1891
|
+
|
1892
|
+
params_metadata_supply_locations = len(supply_locations_metadata["weighting"])
|
1893
|
+
|
1894
|
+
params_metadata = params_metadata_customer_origins+params_metadata_supply_locations
|
1895
|
+
|
1896
|
+
if len(initial_params) < 2 or len(initial_params) != params_metadata:
|
1897
|
+
raise ValueError("Parameter 'initial_params' must be a list with " + str(params_metadata) + " entries (Attaction: " + str(params_metadata_supply_locations) + ", Transport costs: " + str(params_metadata_customer_origins) + ")")
|
1898
|
+
|
1899
|
+
if len(bounds) != len(initial_params):
|
1900
|
+
raise ValueError("Parameter 'bounds' must have the same length as parameter 'initial_params' (" + str(len(bounds)) + ", " + str(len(initial_params)) + ")")
|
1901
|
+
|
1902
|
+
ml_result = minimize(
|
1903
|
+
self.loglik,
|
1904
|
+
initial_params,
|
1905
|
+
method = method,
|
1906
|
+
bounds = bounds,
|
1907
|
+
constraints = constraints,
|
1908
|
+
options={'disp': 3}
|
1909
|
+
)
|
1910
|
+
|
1911
|
+
attrac_vars = len(supply_locations_metadata["weighting"])
|
1912
|
+
|
1913
|
+
if ml_result.success:
|
1914
|
+
|
1915
|
+
fitted_params = ml_result.x
|
1916
|
+
|
1917
|
+
param_gamma = fitted_params[0]
|
1918
|
+
supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
|
1919
|
+
|
1920
|
+
if customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
|
1921
|
+
|
1922
|
+
param_lambda = fitted_params[1]
|
1923
|
+
param_results = [
|
1924
|
+
float(param_gamma),
|
1925
|
+
float(param_lambda)
|
1926
|
+
]
|
1927
|
+
|
1928
|
+
customer_origins_metadata["weighting"][0]["param"] = float(param_lambda)
|
1929
|
+
|
1930
|
+
elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
1931
|
+
|
1932
|
+
param_lambda = fitted_params[1]
|
1933
|
+
param_lambda2 = fitted_params[2]
|
1934
|
+
param_results = [
|
1935
|
+
float(param_gamma),
|
1936
|
+
float(param_lambda),
|
1937
|
+
float(param_lambda2)
|
1938
|
+
]
|
1939
|
+
|
1940
|
+
customer_origins_metadata["weighting"][0]["param"][0] = float(param_lambda)
|
1941
|
+
customer_origins_metadata["weighting"][0]["param"][1] = float(param_lambda2)
|
1942
|
+
|
1943
|
+
if attrac_vars > 1:
|
1944
|
+
|
1945
|
+
if customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
1946
|
+
fitted_params_add = 3
|
1947
|
+
else:
|
1948
|
+
fitted_params_add = 2
|
1949
|
+
|
1950
|
+
for key, var in supply_locations_metadata["weighting"].items():
|
1951
|
+
|
1952
|
+
if key > len(supply_locations_metadata["weighting"])-fitted_params_add:
|
1953
|
+
break
|
1954
|
+
|
1955
|
+
param = float(fitted_params[key+fitted_params_add])
|
1956
|
+
|
1957
|
+
param_results = param_results + [param]
|
1958
|
+
|
1959
|
+
supply_locations_metadata["weighting"][(key+1)]["param"] = float(param)
|
1960
|
+
|
1961
|
+
print(f"Optimization via {method} algorithm succeeded with parameters: {', '.join(str(round(par, 3)) for par in param_results)}.")
|
1962
|
+
|
1963
|
+
else:
|
1964
|
+
|
1965
|
+
print(f"Optimiziation via {method} algorithm failed with error message: '{ml_result.message}'. See https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html for all available algorithms.")
|
1966
|
+
|
1967
|
+
self.interaction_matrix.supply_locations.metadata = supply_locations_metadata
|
1968
|
+
self.interaction_matrix.customer_origins.metadata = customer_origins_metadata
|
1969
|
+
|
1970
|
+
if update_estimates:
|
1971
|
+
|
1972
|
+
if "T_j_emp" not in self.market_areas_df.columns:
|
1973
|
+
|
1974
|
+
self.market_areas_df["T_j_emp"] = self.market_areas_df["T_j"]
|
1975
|
+
|
1976
|
+
print("NOTE: Total values in market areas df are treated as empirical total values")
|
1977
|
+
|
1978
|
+
else:
|
1979
|
+
|
1980
|
+
print("NOTE: Total market areas df contains empirical total values")
|
1981
|
+
|
1982
|
+
if np.isnan(ml_result.x).any():
|
1983
|
+
|
1984
|
+
print("WARNING: No update of estimates because fit parameters contain NaN")
|
1985
|
+
|
1986
|
+
update_estimates = False
|
1987
|
+
|
1988
|
+
else:
|
1989
|
+
|
1990
|
+
self.interaction_matrix.utility()
|
1991
|
+
self.interaction_matrix.probabilities()
|
1992
|
+
self.interaction_matrix.flows()
|
1993
|
+
|
1994
|
+
self.interaction_matrix.marketareas()
|
1995
|
+
|
1996
|
+
self.interaction_matrix.metadata["fit"] = {
|
1997
|
+
"function": "huff_ml_fit",
|
1998
|
+
"fit_by": fit_by,
|
1999
|
+
"initial_params": initial_params,
|
2000
|
+
"method": method,
|
2001
|
+
"bounds": bounds,
|
2002
|
+
"constraints": constraints,
|
2003
|
+
"minimize_success": ml_result.success,
|
2004
|
+
"minimize_fittedparams": ml_result.x,
|
2005
|
+
"update_estimates": update_estimates
|
2006
|
+
}
|
2007
|
+
|
2008
|
+
else:
|
2009
|
+
|
2010
|
+
raise ValueError("Parameter 'fit_by' must be 'probabilities', 'flows' or 'totals'")
|
2011
|
+
|
2012
|
+
return self
|
2013
|
+
|
1321
2014
|
def update(self):
|
1322
2015
|
|
1323
2016
|
self.interaction_matrix = self.interaction_matrix.update()
|
@@ -1326,30 +2019,94 @@ class HuffModel:
|
|
1326
2019
|
|
1327
2020
|
return self
|
1328
2021
|
|
1329
|
-
def modelfit(
|
1330
|
-
|
1331
|
-
|
1332
|
-
|
2022
|
+
def modelfit(
|
2023
|
+
self,
|
2024
|
+
by = "probabilities"
|
2025
|
+
):
|
1333
2026
|
|
1334
|
-
if
|
2027
|
+
if by == "probabilities":
|
2028
|
+
|
2029
|
+
interaction_matrix = self.interaction_matrix
|
2030
|
+
interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
|
2031
|
+
|
2032
|
+
if ("p_ij" in interaction_matrix_df.columns and "p_ij_emp" in interaction_matrix_df.columns):
|
2033
|
+
|
2034
|
+
try:
|
2035
|
+
|
2036
|
+
huff_modelfit = modelfit(
|
2037
|
+
interaction_matrix_df["p_ij_emp"],
|
2038
|
+
interaction_matrix_df["p_ij"]
|
2039
|
+
)
|
2040
|
+
|
2041
|
+
return huff_modelfit
|
2042
|
+
|
2043
|
+
except:
|
2044
|
+
|
2045
|
+
print("Goodness-of-fit metrics could not be calculated due to NaN values.")
|
2046
|
+
return None
|
1335
2047
|
|
1336
|
-
|
2048
|
+
else:
|
2049
|
+
|
2050
|
+
print("Goodness-of-fit metrics could not be calculated. No empirical values of probabilities in interaction matrix.")
|
2051
|
+
|
2052
|
+
return None
|
1337
2053
|
|
1338
|
-
|
1339
|
-
|
1340
|
-
|
1341
|
-
|
2054
|
+
elif by == "flows":
|
2055
|
+
|
2056
|
+
interaction_matrix = self.interaction_matrix
|
2057
|
+
interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
|
2058
|
+
|
2059
|
+
if ("E_ij" in interaction_matrix_df.columns and "E_ij_emp" in interaction_matrix_df.columns):
|
1342
2060
|
|
1343
|
-
|
2061
|
+
try:
|
1344
2062
|
|
1345
|
-
|
2063
|
+
huff_modelfit = modelfit(
|
2064
|
+
interaction_matrix_df["E_ij_emp"],
|
2065
|
+
interaction_matrix_df["E_ij"]
|
2066
|
+
)
|
2067
|
+
|
2068
|
+
return huff_modelfit
|
2069
|
+
|
2070
|
+
except:
|
2071
|
+
|
2072
|
+
print("Goodness-of-fit metrics could not be calculated due to NaN values.")
|
2073
|
+
return None
|
2074
|
+
|
2075
|
+
else:
|
1346
2076
|
|
1347
|
-
print("Goodness-of-fit metrics could not be calculated
|
2077
|
+
print("Goodness-of-fit metrics could not be calculated. No empirical values of customer flows in interaction matrix.")
|
2078
|
+
|
1348
2079
|
return None
|
1349
|
-
|
1350
|
-
else:
|
1351
2080
|
|
1352
|
-
|
2081
|
+
elif by == "totals":
|
2082
|
+
|
2083
|
+
market_areas_df = self.market_areas_df
|
2084
|
+
|
2085
|
+
if ("T_j" in market_areas_df.columns and "T_j_emp" in market_areas_df.columns):
|
2086
|
+
|
2087
|
+
try:
|
2088
|
+
|
2089
|
+
huff_modelfit = modelfit(
|
2090
|
+
market_areas_df["T_j_emp"],
|
2091
|
+
market_areas_df["T_j"]
|
2092
|
+
)
|
2093
|
+
|
2094
|
+
return huff_modelfit
|
2095
|
+
|
2096
|
+
except:
|
2097
|
+
|
2098
|
+
print("Goodness-of-fit metrics could not be calculated due to NaN values.")
|
2099
|
+
return None
|
2100
|
+
|
2101
|
+
else:
|
2102
|
+
|
2103
|
+
print("Goodness-of-fit metrics could not be calculated. No empirical values of T_j in market areas data.")
|
2104
|
+
|
2105
|
+
return None
|
2106
|
+
|
2107
|
+
else:
|
2108
|
+
|
2109
|
+
raise ValueError("Parameter 'by' must be 'probabilities', 'flows', or 'totals'")
|
1353
2110
|
|
1354
2111
|
class MCIModel:
|
1355
2112
|
|
@@ -1472,7 +2229,11 @@ class MCIModel:
|
|
1472
2229
|
print("Mean squared error ", round(mci_modelfit[1]["MSE"], 2))
|
1473
2230
|
print("Root mean squared error ", round(mci_modelfit[1]["RMSE"], 2))
|
1474
2231
|
print("Mean absolute error ", round(mci_modelfit[1]["MAE"], 2))
|
1475
|
-
|
2232
|
+
if mci_modelfit[1]["MAPE"] is not None:
|
2233
|
+
print("Mean absolute percentage error ", round(mci_modelfit[1]["MAPE"], 2))
|
2234
|
+
else:
|
2235
|
+
print("Mean absolute percentage error Not calculated")
|
2236
|
+
print("Symmetric MAPE ", round(mci_modelfit[1]["sMAPE"], 2))
|
1476
2237
|
|
1477
2238
|
print("Absolute percentage errors")
|
1478
2239
|
APE_list = [
|
@@ -1559,8 +2320,11 @@ class MCIModel:
|
|
1559
2320
|
interaction_matrix = self.interaction_matrix
|
1560
2321
|
interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
|
1561
2322
|
|
1562
|
-
if "p_ij" in interaction_matrix_df.columns:
|
2323
|
+
if "p_ij" in interaction_matrix_df.columns and "p_ij_emp" not in interaction_matrix_df.columns:
|
2324
|
+
print("NOTE: Probabilities in interaction matrix are treated as empirical probabilities")
|
1563
2325
|
interaction_matrix_df["p_ij_emp"] = interaction_matrix_df["p_ij"]
|
2326
|
+
else:
|
2327
|
+
print("NOTE: Interaction matrix contains empirical probabilities")
|
1564
2328
|
|
1565
2329
|
if "U_ij" not in interaction_matrix_df.columns:
|
1566
2330
|
self.utility(transformation = transformation)
|
@@ -1720,6 +2484,7 @@ def load_geodata (
|
|
1720
2484
|
"marketsize_col": None,
|
1721
2485
|
"weighting": {
|
1722
2486
|
0: {
|
2487
|
+
"name": None,
|
1723
2488
|
"func": None,
|
1724
2489
|
"param": None
|
1725
2490
|
}
|
@@ -1727,7 +2492,7 @@ def load_geodata (
|
|
1727
2492
|
"crs_input": crs_input,
|
1728
2493
|
"crs_output": crs_output,
|
1729
2494
|
"no_points": len(geodata_gpd)
|
1730
|
-
}
|
2495
|
+
}
|
1731
2496
|
|
1732
2497
|
if location_type == "origins":
|
1733
2498
|
|
@@ -1848,8 +2613,10 @@ def load_interaction_matrix(
|
|
1848
2613
|
csv_sep = ";",
|
1849
2614
|
csv_decimal = ",",
|
1850
2615
|
csv_encoding="unicode_escape",
|
2616
|
+
xlsx_sheet: str = None,
|
1851
2617
|
crs_input = "EPSG:4326",
|
1852
|
-
crs_output = "EPSG:4326"
|
2618
|
+
crs_output = "EPSG:4326",
|
2619
|
+
check_df_vars = True
|
1853
2620
|
):
|
1854
2621
|
|
1855
2622
|
if isinstance(data, pd.DataFrame):
|
@@ -1865,7 +2632,13 @@ def load_interaction_matrix(
|
|
1865
2632
|
encoding = csv_encoding
|
1866
2633
|
)
|
1867
2634
|
elif data_type == "xlsx":
|
1868
|
-
|
2635
|
+
if xlsx_sheet is not None:
|
2636
|
+
interaction_matrix_df = pd.read_excel(
|
2637
|
+
data,
|
2638
|
+
sheet_name=xlsx_sheet
|
2639
|
+
)
|
2640
|
+
else:
|
2641
|
+
interaction_matrix_df = pd.read_excel(data)
|
1869
2642
|
else:
|
1870
2643
|
raise TypeError("Unknown type of data")
|
1871
2644
|
else:
|
@@ -1884,10 +2657,11 @@ def load_interaction_matrix(
|
|
1884
2657
|
if market_size_col is not None:
|
1885
2658
|
cols_check = cols_check + [market_size_col]
|
1886
2659
|
|
1887
|
-
|
1888
|
-
|
1889
|
-
|
1890
|
-
|
2660
|
+
if check_df_vars:
|
2661
|
+
check_vars(
|
2662
|
+
interaction_matrix_df,
|
2663
|
+
cols = cols_check
|
2664
|
+
)
|
1891
2665
|
|
1892
2666
|
if customer_origins_coords_col is not None:
|
1893
2667
|
|
@@ -1942,6 +2716,7 @@ def load_interaction_matrix(
|
|
1942
2716
|
"marketsize_col": market_size_col,
|
1943
2717
|
"weighting": {
|
1944
2718
|
0: {
|
2719
|
+
"name": None,
|
1945
2720
|
"func": None,
|
1946
2721
|
"param": None
|
1947
2722
|
}
|
@@ -2009,6 +2784,7 @@ def load_interaction_matrix(
|
|
2009
2784
|
"marketsize_col": None,
|
2010
2785
|
"weighting": {
|
2011
2786
|
0: {
|
2787
|
+
"name": None,
|
2012
2788
|
"func": None,
|
2013
2789
|
"param": None
|
2014
2790
|
}
|
@@ -2056,7 +2832,12 @@ def load_interaction_matrix(
|
|
2056
2832
|
}
|
2057
2833
|
)
|
2058
2834
|
|
2059
|
-
metadata = {
|
2835
|
+
metadata = {
|
2836
|
+
"fit": {
|
2837
|
+
"function": None,
|
2838
|
+
"fit_by": None
|
2839
|
+
}
|
2840
|
+
}
|
2060
2841
|
|
2061
2842
|
interaction_matrix = InteractionMatrix(
|
2062
2843
|
interaction_matrix_df=interaction_matrix_df,
|
@@ -2067,6 +2848,74 @@ def load_interaction_matrix(
|
|
2067
2848
|
|
2068
2849
|
return interaction_matrix
|
2069
2850
|
|
2851
|
+
def load_marketareas(
|
2852
|
+
data,
|
2853
|
+
supply_locations_col: str,
|
2854
|
+
total_col: str,
|
2855
|
+
data_type = "csv",
|
2856
|
+
csv_sep = ";",
|
2857
|
+
csv_decimal = ",",
|
2858
|
+
csv_encoding="unicode_escape",
|
2859
|
+
xlsx_sheet: str = None,
|
2860
|
+
check_df_vars = True
|
2861
|
+
):
|
2862
|
+
|
2863
|
+
if isinstance(data, pd.DataFrame):
|
2864
|
+
market_areas_df = data
|
2865
|
+
elif isinstance(data, str):
|
2866
|
+
if data_type not in ["csv", "xlsx"]:
|
2867
|
+
raise ValueError ("data_type must be 'csv' or 'xlsx'")
|
2868
|
+
if data_type == "csv":
|
2869
|
+
market_areas_df = pd.read_csv(
|
2870
|
+
data,
|
2871
|
+
sep = csv_sep,
|
2872
|
+
decimal = csv_decimal,
|
2873
|
+
encoding = csv_encoding
|
2874
|
+
)
|
2875
|
+
elif data_type == "xlsx":
|
2876
|
+
if xlsx_sheet is not None:
|
2877
|
+
market_areas_df = pd.read_excel(
|
2878
|
+
data,
|
2879
|
+
sheet_name=xlsx_sheet
|
2880
|
+
)
|
2881
|
+
else:
|
2882
|
+
market_areas_df = pd.read_excel(data)
|
2883
|
+
else:
|
2884
|
+
raise TypeError("Unknown type of data")
|
2885
|
+
else:
|
2886
|
+
raise TypeError("data must be pandas.DataFrame or file (.csv, .xlsx)")
|
2887
|
+
|
2888
|
+
if supply_locations_col not in market_areas_df.columns:
|
2889
|
+
raise KeyError ("Column " + supply_locations_col + " not in data")
|
2890
|
+
if total_col not in market_areas_df.columns:
|
2891
|
+
raise KeyError ("Column " + supply_locations_col + " not in data")
|
2892
|
+
|
2893
|
+
if check_df_vars:
|
2894
|
+
check_vars(
|
2895
|
+
market_areas_df,
|
2896
|
+
cols = [total_col]
|
2897
|
+
)
|
2898
|
+
|
2899
|
+
market_areas_df = market_areas_df.rename(
|
2900
|
+
columns = {
|
2901
|
+
supply_locations_col: "j",
|
2902
|
+
total_col: "T_j"
|
2903
|
+
}
|
2904
|
+
)
|
2905
|
+
|
2906
|
+
metadata = {
|
2907
|
+
"unique_id": supply_locations_col,
|
2908
|
+
"total_col": total_col,
|
2909
|
+
"no_points": len(market_areas_df)
|
2910
|
+
}
|
2911
|
+
|
2912
|
+
market_areas = MarketAreas(
|
2913
|
+
market_areas_df,
|
2914
|
+
metadata
|
2915
|
+
)
|
2916
|
+
|
2917
|
+
return market_areas
|
2918
|
+
|
2070
2919
|
def market_shares(
|
2071
2920
|
df: pd.DataFrame,
|
2072
2921
|
turnover_col: str,
|
@@ -2250,7 +3099,8 @@ def get_isochrones(
|
|
2250
3099
|
def modelfit(
|
2251
3100
|
observed,
|
2252
3101
|
expected,
|
2253
|
-
remove_nan: bool = True
|
3102
|
+
remove_nan: bool = True,
|
3103
|
+
verbose: bool = False
|
2254
3104
|
):
|
2255
3105
|
|
2256
3106
|
observed_no = len(observed)
|
@@ -2276,6 +3126,10 @@ def modelfit(
|
|
2276
3126
|
)
|
2277
3127
|
|
2278
3128
|
obs_exp_clean = obs_exp.dropna(subset=["observed", "expected"])
|
3129
|
+
|
3130
|
+
if len(obs_exp_clean) < len(observed) or len(obs_exp_clean) < len(expected):
|
3131
|
+
if verbose:
|
3132
|
+
print("Vectors 'observed' and/or 'expected' contain zeros which are dropped.")
|
2279
3133
|
|
2280
3134
|
observed = obs_exp_clean["observed"].to_numpy()
|
2281
3135
|
expected = obs_exp_clean["expected"].to_numpy()
|
@@ -2291,7 +3145,16 @@ def modelfit(
|
|
2291
3145
|
residuals_sq = residuals**2
|
2292
3146
|
residuals_abs = abs(residuals)
|
2293
3147
|
|
2294
|
-
|
3148
|
+
if any(observed == 0):
|
3149
|
+
if verbose:
|
3150
|
+
print ("Vector 'observed' contains values equal to zero. No APE/MAPE calculated.")
|
3151
|
+
APE = np.full_like(observed, np.nan)
|
3152
|
+
MAPE = None
|
3153
|
+
else:
|
3154
|
+
APE = abs(observed-expected)/observed*100
|
3155
|
+
MAPE = float(np.mean(APE))
|
3156
|
+
|
3157
|
+
sAPE = abs(observed-expected)/((abs(observed)+abs(expected))/2)*100
|
2295
3158
|
|
2296
3159
|
data_residuals = pd.DataFrame({
|
2297
3160
|
"observed": observed,
|
@@ -2299,7 +3162,8 @@ def modelfit(
|
|
2299
3162
|
"residuals": residuals,
|
2300
3163
|
"residuals_sq": residuals_sq,
|
2301
3164
|
"residuals_abs": residuals_abs,
|
2302
|
-
"APE": APE
|
3165
|
+
"APE": APE,
|
3166
|
+
"sAPE": sAPE
|
2303
3167
|
})
|
2304
3168
|
|
2305
3169
|
SQR = float(np.sum(residuals_sq))
|
@@ -2310,7 +3174,9 @@ def modelfit(
|
|
2310
3174
|
MSE = float(SQR/observed_no)
|
2311
3175
|
RMSE = float(sqrt(MSE))
|
2312
3176
|
MAE = float(SAR/observed_no)
|
2313
|
-
|
3177
|
+
LL = np.sum(np.log(residuals_sq))
|
3178
|
+
|
3179
|
+
sMAPE = float(np.mean(sAPE))
|
2314
3180
|
|
2315
3181
|
resid_below5 = float(len(data_residuals[data_residuals["APE"] < 5])/expected_no*100)
|
2316
3182
|
resid_below10 = float(len(data_residuals[data_residuals["APE"] < 10])/expected_no*100)
|
@@ -2332,6 +3198,8 @@ def modelfit(
|
|
2332
3198
|
"RMSE": RMSE,
|
2333
3199
|
"MAE": MAE,
|
2334
3200
|
"MAPE": MAPE,
|
3201
|
+
"sMAPE": sMAPE,
|
3202
|
+
"LL": -LL,
|
2335
3203
|
"APE": {
|
2336
3204
|
"resid_below5": resid_below5,
|
2337
3205
|
"resid_below10": resid_below10,
|
@@ -2353,34 +3221,23 @@ def modelfit(
|
|
2353
3221
|
|
2354
3222
|
return modelfit_results
|
2355
3223
|
|
2356
|
-
def loglik(
|
2357
|
-
observed,
|
2358
|
-
expected
|
2359
|
-
):
|
2360
|
-
|
2361
|
-
model_fit = modelfit(
|
2362
|
-
observed,
|
2363
|
-
expected
|
2364
|
-
)
|
2365
|
-
residuals_sq = model_fit[0]["residuals_sq"]
|
2366
|
-
|
2367
|
-
LL = np.sum(np.log(residuals_sq))
|
2368
|
-
|
2369
|
-
return -LL
|
2370
|
-
|
2371
3224
|
def check_vars(
|
2372
3225
|
df: pd.DataFrame,
|
2373
|
-
cols: list
|
3226
|
+
cols: list,
|
3227
|
+
check_numeric: bool = True,
|
3228
|
+
check_zero: bool = True
|
2374
3229
|
):
|
2375
3230
|
|
2376
3231
|
for col in cols:
|
2377
3232
|
if col not in df.columns:
|
2378
3233
|
raise KeyError(f"Column '{col}' not in dataframe.")
|
2379
3234
|
|
2380
|
-
|
2381
|
-
|
2382
|
-
|
3235
|
+
if check_numeric:
|
3236
|
+
for col in cols:
|
3237
|
+
if not pd.api.types.is_numeric_dtype(df[col]):
|
3238
|
+
raise ValueError(f"Column '{col}' is not numeric. All stated columns must be numeric.")
|
2383
3239
|
|
2384
|
-
|
2385
|
-
|
2386
|
-
|
3240
|
+
if check_zero:
|
3241
|
+
for col in cols:
|
3242
|
+
if (df[col] <= 0).any():
|
3243
|
+
raise ValueError(f"Column '{col}' includes values <= 0. All values must be numeric and positive.")
|