huff 1.4.1__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
huff/models.py CHANGED
@@ -4,8 +4,8 @@
4
4
  # Author: Thomas Wieland
5
5
  # ORCID: 0000-0001-5168-9846
6
6
  # mail: geowieland@googlemail.com
7
- # Version: 1.4.1
8
- # Last update: 2025-06-16 17:43
7
+ # Version: 1.5.1
8
+ # Last update: 2025-07-01 17:10
9
9
  # Copyright (c) 2025 Thomas Wieland
10
10
  #-----------------------------------------------------------------------
11
11
 
@@ -119,6 +119,24 @@ class CustomerOrigins:
119
119
  param_lambda = -2
120
120
  ):
121
121
 
122
+ """
123
+ metadata["weighting"] = {
124
+ 0: {
125
+ "name": "t_ij",
126
+ "func": "power",
127
+ "param": -2
128
+ }
129
+ }
130
+
131
+ metadata["weighting"] = {
132
+ 0: {
133
+ "name": "t_ij",
134
+ "func": "logistic",
135
+ "param": [10, -0.5]
136
+ }
137
+ }
138
+ """
139
+
122
140
  metadata = self.metadata
123
141
 
124
142
  if func not in ["power", "exponential", "logistic"]:
@@ -130,6 +148,7 @@ class CustomerOrigins:
130
148
  if isinstance(param_lambda, (int, float)) and func == "logistic":
131
149
  raise ValueError("Function type "+ func + " requires two parameters in a list")
132
150
 
151
+ metadata["weighting"][0]["name"] = "t_ij"
133
152
  metadata["weighting"][0]["func"] = func
134
153
 
135
154
  if isinstance(param_lambda, list):
@@ -299,8 +318,10 @@ class SupplyLocations:
299
318
  if metadata["attraction_col"] is None:
300
319
  raise ValueError ("Attraction column is not yet defined. Use SupplyLocations.define_attraction()")
301
320
 
321
+ metadata["weighting"][0]["name"] = "A_j"
302
322
  metadata["weighting"][0]["func"] = func
303
323
  metadata["weighting"][0]["param"] = float(param_gamma)
324
+
304
325
  self.metadata = metadata
305
326
 
306
327
  return self
@@ -323,6 +344,7 @@ class SupplyLocations:
323
344
  metadata["attraction_col"] = metadata["attraction_col"] + [var]
324
345
 
325
346
  metadata["weighting"][new_key] = {
347
+ "name": var,
326
348
  "func": func,
327
349
  "param": param
328
350
  }
@@ -490,34 +512,84 @@ class InteractionMatrix:
490
512
  else:
491
513
  print("Market size column " + customer_origins_metadata["marketsize_col"])
492
514
 
493
- if interaction_matrix_metadata != {}:
494
- if "transport_costs" in interaction_matrix_metadata:
495
- print("----------------------------------")
496
- if interaction_matrix_metadata["transport_costs"]["network"]:
497
- print("Transport cost type Time")
498
- print("Transport cost unit " + interaction_matrix_metadata["transport_costs"]["time_unit"])
499
- else:
500
- print("Transport cost type Distance")
501
- print("Transport cost unit " + interaction_matrix_metadata["transport_costs"]["distance_unit"])
515
+ if interaction_matrix_metadata != {} and "transport_costs" in interaction_matrix_metadata:
516
+ print("----------------------------------")
517
+ if interaction_matrix_metadata["transport_costs"]["network"]:
518
+ print("Transport cost type Time")
519
+ print("Transport cost unit " + interaction_matrix_metadata["transport_costs"]["time_unit"])
520
+ else:
521
+ print("Transport cost type Distance")
522
+ print("Transport cost unit " + interaction_matrix_metadata["transport_costs"]["distance_unit"])
502
523
 
503
524
  print("----------------------------------")
504
525
  print("Partial utilities")
505
526
  print(" Weights")
506
-
527
+
507
528
  if supply_locations_metadata["weighting"][0]["func"] is None and supply_locations_metadata["weighting"][0]["param"] is None:
508
529
  print("Attraction not defined")
509
530
  else:
510
- print("Attraction " + str(round(supply_locations_metadata["weighting"][0]["param"],3)) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
531
+ if supply_locations_metadata["weighting"][0]["param"] is not None:
532
+ print("Attraction " + str(round(supply_locations_metadata["weighting"][0]["param"],3)) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
533
+ else:
534
+ print("Attraction NA" + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
511
535
 
512
536
  if customer_origins_metadata["weighting"][0]["func"] is None and customer_origins_metadata["weighting"][0]["param"] is None:
513
537
  print("Transport costs not defined")
514
538
  elif customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
515
- print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
539
+ if customer_origins_metadata["weighting"][0]["param"] is not None:
540
+ print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
541
+ else:
542
+ print("Transport costs NA" + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
516
543
  elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
517
- print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"][0],3)) + ", " + str(round(customer_origins_metadata["weighting"][0]["param"][1],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
544
+ if customer_origins_metadata["weighting"][0]["param"] is not None:
545
+ print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"][0],3)) + ", " + str(round(customer_origins_metadata["weighting"][0]["param"][1],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
546
+ else:
547
+ print("Transport costs NA" + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
548
+
549
+ attrac_vars = supply_locations_metadata["attraction_col"]
550
+ attrac_vars_no = len(attrac_vars)
551
+
552
+ if attrac_vars_no > 1:
553
+
554
+ for key, attrac_var in enumerate(attrac_vars):
555
+
556
+ if key == 0:
557
+ continue
558
+
559
+ if key not in supply_locations_metadata["weighting"].keys():
560
+
561
+ print(f"{attrac_vars[key][:16]:16} not defined")
562
+
563
+ else:
564
+
565
+ if supply_locations_metadata["weighting"][key]["func"] is None and supply_locations_metadata["weighting"][key]["param"]:
566
+
567
+ print(f"{attrac_vars[key][:16]:16} not defined")
568
+
569
+ else:
570
+
571
+ if supply_locations_metadata["weighting"][key]["param"] is not None:
572
+
573
+ name = supply_locations_metadata["weighting"][key]["name"]
574
+ param = supply_locations_metadata["weighting"][key]["param"]
575
+ func = supply_locations_metadata["weighting"][key]["func"]
576
+
577
+ print(f"{name[:16]:16} {round(param, 3)} ({func})")
578
+
579
+ else:
580
+
581
+ print(f"{attrac_vars[key][:16]:16} NA" + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
582
+
518
583
 
519
584
  print("----------------------------------")
520
585
 
586
+ if interaction_matrix_metadata != {} and "fit" in interaction_matrix_metadata and interaction_matrix_metadata["fit"]["function"] is not None:
587
+ print("Parameter estimation")
588
+ print("Fit function " + interaction_matrix_metadata["fit"]["function"])
589
+ print("Fit by " + interaction_matrix_metadata["fit"]["fit_by"])
590
+ if interaction_matrix_metadata["fit"]["function"] == "huff_ml_fit":
591
+ print("Fit method " + interaction_matrix_metadata["fit"]["method"] + " (Converged: " + str(interaction_matrix_metadata["fit"]["minimize_success"]) + ")")
592
+
521
593
  def transport_costs(
522
594
  self,
523
595
  network: bool = True,
@@ -630,12 +702,80 @@ class InteractionMatrix:
630
702
 
631
703
  return self
632
704
 
705
+ def define_weightings(
706
+ self,
707
+ vars_funcs: dict
708
+ ):
709
+
710
+ """
711
+ vars_funcs = {
712
+ 0: {
713
+ "name": "A_j",
714
+ "func": "power",
715
+ "param": 1
716
+ },
717
+ 1: {
718
+ "name": "t_ij",
719
+ "func": "logistic"
720
+ },
721
+ 2: {
722
+ "name": "second_attraction_variable",
723
+ "func": "power"
724
+ },
725
+ 3: {
726
+ "name": "third_attraction_variable",
727
+ "func": "exponential"
728
+ },
729
+ ...
730
+ }
731
+ """
732
+
733
+ supply_locations_metadata = self.supply_locations.metadata
734
+ customer_origins_metadata = self.customer_origins.metadata
735
+
736
+ supply_locations_metadata["weighting"][0]["name"] = vars_funcs[0]["name"]
737
+ supply_locations_metadata["weighting"][0]["func"] = vars_funcs[0]["func"]
738
+ if "param" in vars_funcs[0]:
739
+ supply_locations_metadata["weighting"][0]["param"] = vars_funcs[0]["param"]
740
+
741
+ customer_origins_metadata["weighting"][0]["name"] = vars_funcs[1]["name"]
742
+ customer_origins_metadata["weighting"][0]["func"] = vars_funcs[1]["func"]
743
+ if "param" in vars_funcs[1]:
744
+ customer_origins_metadata["weighting"][0]["param"] = vars_funcs[1]["param"]
745
+
746
+ if len(vars_funcs) > 2:
747
+
748
+ for key, var in vars_funcs.items():
749
+
750
+ if key < 2:
751
+ continue
752
+
753
+ if key not in supply_locations_metadata["weighting"]:
754
+ supply_locations_metadata["weighting"][key-1] = {
755
+ "name": "attrac"+str(key),
756
+ "func": "power",
757
+ "param": None
758
+ }
759
+
760
+ supply_locations_metadata["weighting"][key-1]["name"] = var["name"]
761
+ supply_locations_metadata["weighting"][key-1]["func"] = var["func"]
762
+
763
+ if "param" in var:
764
+ supply_locations_metadata["weighting"][key-1]["param"] = var["param"]
765
+
766
+ self.supply_locations.metadata = supply_locations_metadata
767
+ self.customer_origins.metadata = customer_origins_metadata
768
+
633
769
  def utility(self):
634
770
 
635
771
  interaction_matrix_df = self.interaction_matrix_df
636
772
 
637
773
  interaction_matrix_metadata = self.get_metadata()
638
774
 
775
+ if "t_ij" not in interaction_matrix_df.columns:
776
+ raise ValueError ("No transport cost variable in interaction matrix")
777
+ if "A_j" not in interaction_matrix_df.columns:
778
+ raise ValueError ("No attraction variable in interaction matrix")
639
779
  if interaction_matrix_df["t_ij"].isna().all():
640
780
  raise ValueError ("Transport cost variable is not defined")
641
781
  if interaction_matrix_df["A_j"].isna().all():
@@ -670,6 +810,33 @@ class InteractionMatrix:
670
810
  else:
671
811
  raise ValueError ("Attraction weighting is not defined.")
672
812
 
813
+ attrac_vars = supply_locations_metadata["attraction_col"]
814
+ attrac_vars_no = len(attrac_vars)
815
+ attrac_var_key = 0
816
+
817
+ if attrac_vars_no > 1:
818
+
819
+ for key, attrac_var in enumerate(attrac_vars):
820
+
821
+ attrac_var_key = key #+1
822
+ if attrac_var_key == 0: #1:
823
+ continue
824
+
825
+ name = supply_locations_metadata["weighting"][attrac_var_key]["name"]
826
+ param = supply_locations_metadata["weighting"][attrac_var_key]["param"]
827
+ func = supply_locations_metadata["weighting"][attrac_var_key]["func"]
828
+
829
+ if func == "power":
830
+ interaction_matrix_df[name+"_weighted"] = interaction_matrix_df[name] ** param
831
+ elif func == "exponential":
832
+ interaction_matrix_df[name+"_weighted"] = np.exp(param * interaction_matrix_df[name])
833
+ else:
834
+ raise ValueError ("Weighting for " + name + " is not defined.")
835
+
836
+ interaction_matrix_df["A_j_weighted"] = interaction_matrix_df["A_j_weighted"]*interaction_matrix_df[name+"_weighted"]
837
+
838
+ interaction_matrix_df = interaction_matrix_df.drop(columns=[name+"_weighted"])
839
+
673
840
  interaction_matrix_df["U_ij"] = interaction_matrix_df["A_j_weighted"]*interaction_matrix_df["t_ij_weighted"]
674
841
 
675
842
  interaction_matrix_df = interaction_matrix_df.drop(columns=["A_j_weighted", "t_ij_weighted"])
@@ -687,7 +854,7 @@ class InteractionMatrix:
687
854
 
688
855
  interaction_matrix_df = self.interaction_matrix_df
689
856
 
690
- if interaction_matrix_df["U_ij"].isna().all():
857
+ if "U_ij" not in interaction_matrix_df.columns or interaction_matrix_df["U_ij"].isna().all():
691
858
  self.utility()
692
859
  interaction_matrix_df = self.interaction_matrix_df
693
860
 
@@ -713,6 +880,8 @@ class InteractionMatrix:
713
880
 
714
881
  interaction_matrix_df = self.interaction_matrix_df
715
882
 
883
+ if "C_i" not in interaction_matrix_df.columns:
884
+ raise ValueError ("No market size variable in interaction matrix")
716
885
  if interaction_matrix_df["C_i"].isna().all():
717
886
  raise ValueError ("Market size column in customer origins not defined. Use CustomerOrigins.define_marketsize()")
718
887
 
@@ -737,7 +906,8 @@ class InteractionMatrix:
737
906
 
738
907
  check_vars(
739
908
  df = interaction_matrix_df,
740
- cols = ["E_ij"]
909
+ cols = ["E_ij"],
910
+ check_zero = False
741
911
  )
742
912
 
743
913
  market_areas_df = pd.DataFrame(interaction_matrix_df.groupby("j")["E_ij"].sum())
@@ -875,6 +1045,15 @@ class InteractionMatrix:
875
1045
 
876
1046
  customer_origins.metadata = customer_origins_metadata
877
1047
  supply_locations.metadata = supply_locations_metadata
1048
+
1049
+ interaction_matrix_metadata = {
1050
+ "fit": {
1051
+ "function": "mci_fit",
1052
+ "fit_by": "probabilities",
1053
+ "method": "OLS"
1054
+ }
1055
+ }
1056
+
878
1057
  interaction_matrix = InteractionMatrix(
879
1058
  interaction_matrix_df,
880
1059
  customer_origins,
@@ -891,74 +1070,132 @@ class InteractionMatrix:
891
1070
 
892
1071
  return mci_model
893
1072
 
894
- def huff_loglik(
1073
+ def loglik(
895
1074
  self,
896
- params
1075
+ params,
1076
+ fit_by = "probabilities"
897
1077
  ):
1078
+
1079
+ if fit_by not in ["probabilities", "flows"]:
1080
+ raise ValueError ("Parameter 'fit_by' must be 'probabilities' or 'flows'")
898
1081
 
899
1082
  if not isinstance(params, list):
900
1083
  if isinstance(params, np.ndarray):
901
1084
  params = params.tolist()
902
1085
  else:
903
- raise ValueError("Parameter 'params' must be a list or np.ndarray with two or three parameter values")
1086
+ raise ValueError("Parameter 'params' must be a list or np.ndarray with at least 2 parameter values")
904
1087
 
905
- if len(params) == 2:
906
- param_gamma, param_lambda = params
907
- elif len(params) == 3:
908
- param_gamma, param_lambda, param_lambda2 = params
909
- else:
910
- raise ValueError("Parameter 'params' must be a list with two or three parameter values")
1088
+ if len(params) < 2:
1089
+ raise ValueError("Parameter 'params' must be a list or np.ndarray with at least 2 parameter values")
1090
+
1091
+ customer_origins = self.customer_origins
1092
+ customer_origins_metadata = customer_origins.get_metadata()
1093
+
1094
+ param_gamma, param_lambda = params[0], params[1]
1095
+
1096
+ if customer_origins_metadata["weighting"][0]["func"] == "logistic":
1097
+
1098
+ if len(params) < 3:
1099
+ raise ValueError("When using logistic weighting, parameter 'params' must be a list or np.ndarray with at least 3 parameter values")
1100
+
1101
+ param_gamma, param_lambda, param_lambda2 = params[0], params[1], params[2]
911
1102
 
912
1103
  interaction_matrix_df = self.interaction_matrix_df
913
1104
 
914
1105
  supply_locations = self.supply_locations
915
- supply_locations_metadata = supply_locations.get_metadata()
916
-
917
- customer_origins = self.customer_origins
918
- customer_origins_metadata = customer_origins.get_metadata()
1106
+ supply_locations_metadata = supply_locations.get_metadata()
919
1107
 
920
1108
  supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
921
1109
  supply_locations.metadata = supply_locations_metadata
922
1110
 
923
1111
  if customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
924
1112
 
925
- if len(params) == 2:
1113
+ if len(params) >= 2:
1114
+
926
1115
  customer_origins_metadata["weighting"][0]["param"] = float(param_lambda)
1116
+
927
1117
  else:
928
- raise ValueError ("Huff Model with transport cost weightig of type " + customer_origins_metadata["weighting"][0]["func"] + " must have two input parameters")
1118
+
1119
+ raise ValueError ("Huff Model with transport cost weighting of type " + customer_origins_metadata["weighting"][0]["func"] + " must have >= 2 input parameters")
929
1120
 
930
1121
  elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
931
1122
 
932
- if len(params) == 3:
1123
+ if len(params) >= 3:
1124
+
933
1125
  customer_origins_metadata["weighting"][0]["param"] = [float(param_lambda), float(param_lambda2)]
1126
+
934
1127
  else:
935
- raise ValueError("Huff Model with transport cost weightig of type " + customer_origins_metadata["weighting"][0]["func"] + " must have three input parameters")
936
-
1128
+
1129
+ raise ValueError("Huff Model with transport cost weightig of type " + customer_origins_metadata["weighting"][0]["func"] + " must have >= 3 input parameters")
1130
+
1131
+ if (customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"] and len(params) > 2):
1132
+
1133
+ for key, param in enumerate(params):
1134
+
1135
+ if key <= 1:
1136
+ continue
1137
+
1138
+ supply_locations_metadata["weighting"][key-1]["param"] = float(param)
1139
+
1140
+ if (customer_origins_metadata["weighting"][0]["func"] == "logistic" and len(params) > 3):
1141
+
1142
+ for key, param in enumerate(params):
1143
+
1144
+ if key <= 2:
1145
+ continue
1146
+
1147
+ supply_locations_metadata["weighting"][key-2]["param"] = float(param)
1148
+
937
1149
  customer_origins.metadata = customer_origins_metadata
938
-
939
- p_ij_emp = interaction_matrix_df["p_ij"]
1150
+
1151
+ if "p_ij_emp" not in interaction_matrix_df.columns:
1152
+ p_ij_emp = interaction_matrix_df["p_ij"]
1153
+ else:
1154
+ p_ij_emp = interaction_matrix_df["p_ij_emp"]
1155
+
1156
+ if "E_ij_emp" not in interaction_matrix_df.columns:
1157
+ E_ij_emp = interaction_matrix_df["E_ij"]
1158
+ else:
1159
+ E_ij_emp = interaction_matrix_df["E_ij_emp"]
940
1160
 
941
1161
  interaction_matrix_copy = copy.deepcopy(self)
942
1162
 
943
1163
  interaction_matrix_copy.utility()
944
1164
  interaction_matrix_copy.probabilities()
1165
+ interaction_matrix_copy.flows()
945
1166
 
946
1167
  interaction_matrix_df_copy = interaction_matrix_copy.get_interaction_matrix_df()
947
- p_ij = interaction_matrix_df_copy["p_ij"]
948
1168
 
949
- LL = loglik(
950
- observed = p_ij_emp,
1169
+ if fit_by == "flows":
1170
+
1171
+ E_ij = interaction_matrix_df_copy["E_ij"]
1172
+
1173
+ observed = E_ij_emp
1174
+ expected = E_ij
1175
+
1176
+ else:
1177
+
1178
+ p_ij = interaction_matrix_df_copy["p_ij"]
1179
+
1180
+ observed = p_ij_emp
951
1181
  expected = p_ij
952
- )
953
1182
 
1183
+ modelfit_metrics = modelfit(
1184
+ observed = observed,
1185
+ expected = expected
1186
+ )
1187
+
1188
+ LL = modelfit_metrics[1]["LL"]
1189
+
954
1190
  return -LL
955
1191
 
956
- def ml_fit(
1192
+ def huff_ml_fit(
957
1193
  self,
958
1194
  initial_params: list = [1.0, -2.0],
959
1195
  method: str = "L-BFGS-B",
960
1196
  bounds: list = [(0.5, 1), (-3, -1)],
961
1197
  constraints: list = [],
1198
+ fit_by = "probabilities",
962
1199
  update_estimates: bool = True
963
1200
  ):
964
1201
 
@@ -967,41 +1204,62 @@ class InteractionMatrix:
967
1204
 
968
1205
  customer_origins = self.customer_origins
969
1206
  customer_origins_metadata = customer_origins.get_metadata()
1207
+
1208
+ if customer_origins_metadata["weighting"][0]["param"] is None:
1209
+ params_metadata_customer_origins = 1
1210
+ else:
1211
+ if customer_origins_metadata["weighting"][0]["param"] is not None:
1212
+ if isinstance(customer_origins_metadata["weighting"][0]["param"], (int, float)):
1213
+ params_metadata_customer_origins = 1
1214
+ else:
1215
+ params_metadata_customer_origins = len(customer_origins_metadata["weighting"][0]["param"])
1216
+
1217
+ if customer_origins_metadata["weighting"][0]["func"] == "logistic":
1218
+ params_metadata_customer_origins = 2
1219
+ else:
1220
+ params_metadata_customer_origins = 1
1221
+
1222
+ params_metadata_supply_locations = len(supply_locations_metadata["weighting"])
970
1223
 
971
- if len(initial_params) > 3 or len(initial_params) < 2:
972
- raise ValueError("Parameter 'initial_params' must be a list with two or three entries")
1224
+ params_metadata = params_metadata_customer_origins+params_metadata_supply_locations
1225
+
1226
+ if len(initial_params) < 2 or len(initial_params) != params_metadata:
1227
+ raise ValueError("Parameter 'initial_params' must be a list with " + str(params_metadata) + " entries (Attaction: " + str(params_metadata_supply_locations) + ", Transport costs: " + str(params_metadata_customer_origins) + ")")
973
1228
 
974
1229
  if len(bounds) != len(initial_params):
975
1230
  raise ValueError("Parameter 'bounds' must have the same length as parameter 'initial_params' (" + str(len(bounds)) + ", " + str(len(initial_params)) + ")")
976
-
1231
+
977
1232
  ml_result = minimize(
978
- self.huff_loglik,
1233
+ self.loglik,
979
1234
  initial_params,
1235
+ args=fit_by,
980
1236
  method = method,
981
1237
  bounds = bounds,
982
1238
  constraints = constraints,
983
1239
  options={'disp': 3}
984
1240
  )
985
1241
 
1242
+ attrac_vars = len(supply_locations_metadata["weighting"])
1243
+
986
1244
  if ml_result.success:
987
1245
 
988
1246
  fitted_params = ml_result.x
989
1247
 
990
- if len(initial_params) == 2:
1248
+ param_gamma = fitted_params[0]
1249
+ supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
991
1250
 
992
- param_gamma = fitted_params[0]
1251
+ if customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
1252
+
993
1253
  param_lambda = fitted_params[1]
994
1254
  param_results = [
995
1255
  float(param_gamma),
996
1256
  float(param_lambda)
997
1257
  ]
998
-
999
- supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
1258
+
1000
1259
  customer_origins_metadata["weighting"][0]["param"] = float(param_lambda)
1001
1260
 
1002
- elif len (initial_params) == 3:
1261
+ elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
1003
1262
 
1004
- param_gamma = fitted_params[0]
1005
1263
  param_lambda = fitted_params[1]
1006
1264
  param_lambda2 = fitted_params[2]
1007
1265
  param_results = [
@@ -1010,43 +1268,91 @@ class InteractionMatrix:
1010
1268
  float(param_lambda2)
1011
1269
  ]
1012
1270
 
1013
- supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
1014
1271
  customer_origins_metadata["weighting"][0]["param"][0] = float(param_lambda)
1015
- customer_origins_metadata["weighting"][0]["param"][1] = float(param_lambda2)
1272
+ customer_origins_metadata["weighting"][0]["param"][1] = float(param_lambda2)
1016
1273
 
1274
+ if attrac_vars > 1:
1275
+
1276
+ if customer_origins_metadata["weighting"][0]["func"] == "logistic":
1277
+ fitted_params_add = 3
1278
+ else:
1279
+ fitted_params_add = 2
1280
+
1281
+ for key, var in supply_locations_metadata["weighting"].items():
1282
+
1283
+ if key > len(supply_locations_metadata["weighting"])-fitted_params_add:
1284
+ break
1285
+
1286
+ param = float(fitted_params[key+fitted_params_add])
1287
+
1288
+ param_results = param_results + [param]
1289
+
1290
+ supply_locations_metadata["weighting"][(key+1)]["param"] = float(param)
1291
+
1017
1292
  print(f"Optimization via {method} algorithm succeeded with parameters: {', '.join(str(round(par, 3)) for par in param_results)}.")
1018
1293
 
1019
1294
  else:
1020
1295
 
1021
- param_gamma = None
1022
- param_lambda = None
1296
+ # param_gamma = None
1297
+ # param_lambda = None
1023
1298
 
1024
- supply_locations_metadata["weighting"][0]["param"] = param_gamma
1299
+ # supply_locations_metadata["weighting"][0]["param"] = param_gamma
1025
1300
 
1026
- if len(initial_params) == 3:
1301
+ # if customer_origins_metadata["weighting"][0]["func"] == "logistic":
1027
1302
 
1028
- param_lambda2 = None
1029
- customer_origins_metadata["weighting"][0]["param"][0] = param_lambda
1030
- customer_origins_metadata["weighting"][0]["param"][1] = param_lambda2
1303
+ # param_lambda2 = None
1304
+ # customer_origins_metadata["weighting"][0]["param"][0] = param_lambda
1305
+ # customer_origins_metadata["weighting"][0]["param"][1] = param_lambda2
1031
1306
 
1032
- else:
1033
- customer_origins_metadata["weighting"][0]["param"] = param_lambda
1307
+ # else:
1308
+
1309
+ # customer_origins_metadata["weighting"][0]["param"] = param_lambda
1034
1310
 
1035
1311
  print(f"Optimiziation via {method} algorithm failed with error message: '{ml_result.message}'. See https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html for all available algorithms.")
1036
1312
 
1037
1313
  self.supply_locations.metadata = supply_locations_metadata
1038
- self.customer_origins.metadata = customer_origins_metadata
1314
+ self.customer_origins.metadata = customer_origins_metadata
1039
1315
 
1040
- if ml_result.success and update_estimates:
1041
-
1042
- self.interaction_matrix_df["p_ij_emp"] = self.interaction_matrix_df["p_ij"]
1043
- self = self.utility()
1044
- self = self.probabilities()
1045
- self = self.flows()
1316
+ if update_estimates:
1317
+
1318
+ if "p_ij_emp" not in self.interaction_matrix_df.columns:
1319
+ self.interaction_matrix_df["p_ij_emp"] = self.interaction_matrix_df["p_ij"]
1320
+ print("Probabilties in interaction matrix are treated as empirical probabilties")
1321
+ else:
1322
+ print("Interaction matrix contains empirical probabilties")
1323
+
1324
+ if "E_ij_emp" not in self.interaction_matrix_df.columns:
1325
+ self.interaction_matrix_df["E_ij_emp"] = self.interaction_matrix_df["E_ij"]
1326
+ print("Customer interactions in interaction matrix are treated as empirical interactions")
1327
+ else:
1328
+ print("Interaction matrix contains empirical customer interactions")
1329
+
1330
+ if np.isnan(ml_result.x).any():
1331
+
1332
+ print("No update of estimates because fit parameters contain NaN")
1333
+
1334
+ update_estimates = False
1335
+
1336
+ else:
1337
+
1338
+ self = self.utility()
1339
+ self = self.probabilities()
1340
+ self = self.flows()
1341
+
1342
+ self.metadata["fit"] = {
1343
+ "function": "huff_ml_fit",
1344
+ "fit_by": fit_by,
1345
+ "initial_params": initial_params,
1346
+ "method": method,
1347
+ "bounds": bounds,
1348
+ "constraints": constraints,
1349
+ "minimize_success": ml_result.success,
1350
+ "minimize_fittedparams": ml_result.x,
1351
+ "update_estimates": update_estimates
1352
+ }
1046
1353
 
1047
1354
  return self
1048
1355
 
1049
-
1050
1356
  def update(self):
1051
1357
 
1052
1358
  interaction_matrix_df = self.get_interaction_matrix_df()
@@ -1135,6 +1441,70 @@ class InteractionMatrix:
1135
1441
 
1136
1442
  return self
1137
1443
 
1444
+ class MarketAreas:
1445
+
1446
+ def __init__(
1447
+ self,
1448
+ market_areas_df,
1449
+ metadata
1450
+ ):
1451
+
1452
+ self.market_areas_df = market_areas_df
1453
+ self.metadata = metadata
1454
+
1455
+ def get_market_areas_df(self):
1456
+ return self.market_areas_df
1457
+
1458
+ def get_metadata(self):
1459
+ return self.metadata
1460
+
1461
+ def add_to_model(
1462
+ self,
1463
+ model_object,
1464
+ output_model = "Huff"
1465
+ ):
1466
+
1467
+ if not isinstance(model_object, (HuffModel, MCIModel, InteractionMatrix)):
1468
+ raise ValueError("Parameter 'interaction_matrix' must be of class HuffModel, MCIModel, or InteractionMatrix")
1469
+
1470
+ if isinstance(model_object, MCIModel):
1471
+
1472
+ model = MCIModel(
1473
+ interaction_matrix = model_object.interaction_matrix,
1474
+ coefs = model_object.get_coefs_dict(),
1475
+ mci_ols_model = model_object.get_mci_ols_model(),
1476
+ market_areas_df = self.market_areas_df
1477
+ )
1478
+
1479
+ elif isinstance(model_object, HuffModel):
1480
+
1481
+ model = HuffModel(
1482
+ interaction_matrix = model_object.interaction_matrix,
1483
+ market_areas_df = self.market_areas_df
1484
+ )
1485
+
1486
+ elif isinstance(model_object, InteractionMatrix):
1487
+
1488
+ if output_model not in ["Huff", "MCI"]:
1489
+ raise ValueError("Parameter 'output_model' must be either 'Huff' or 'MCI'")
1490
+
1491
+ if output_model == "Huff":
1492
+
1493
+ model = HuffModel(
1494
+ interaction_matrix=model_object,
1495
+ market_areas_df=self.market_areas_df
1496
+ )
1497
+
1498
+ if output_model == "MCI":
1499
+
1500
+ model = MCIModel(
1501
+ coefs=model_object.coefs,
1502
+ mci_ols_model=model_object.mci_ols_model,
1503
+ market_areas_df=self.market_areas_df
1504
+ )
1505
+
1506
+ return model
1507
+
1138
1508
  class HuffModel:
1139
1509
 
1140
1510
  def __init__(
@@ -1168,74 +1538,119 @@ class HuffModel:
1168
1538
  return customer_origins
1169
1539
 
1170
1540
  def get_market_areas_df(self):
1541
+
1171
1542
  return self.market_areas_df
1172
-
1543
+
1173
1544
  def summary(self):
1174
1545
 
1175
1546
  interaction_matrix = self.interaction_matrix
1176
1547
 
1177
1548
  customer_origins_metadata = interaction_matrix.get_customer_origins().get_metadata()
1178
1549
  supply_locations_metadata = interaction_matrix.get_supply_locations().get_metadata()
1550
+ interaction_matrix_metadata = interaction_matrix.get_metadata()
1179
1551
 
1180
1552
  print("Huff Model")
1181
1553
  print("----------------------------------")
1182
- print("Supply locations " + str(supply_locations_metadata["no_points"]))
1554
+ print("Supply locations " + str(supply_locations_metadata["no_points"]))
1183
1555
  if supply_locations_metadata["attraction_col"][0] is None:
1184
- print("Attraction column not defined")
1556
+ print("Attraction column not defined")
1185
1557
  else:
1186
- print("Attraction column " + supply_locations_metadata["attraction_col"][0])
1187
- print("Customer origins " + str(customer_origins_metadata["no_points"]))
1558
+ print("Attraction column " + supply_locations_metadata["attraction_col"][0])
1559
+ print("Customer origins " + str(customer_origins_metadata["no_points"]))
1188
1560
  if customer_origins_metadata["marketsize_col"] is None:
1189
- print("Market size column not defined")
1561
+ print("Market size column not defined")
1190
1562
  else:
1191
- print("Market size column " + customer_origins_metadata["marketsize_col"])
1563
+ print("Market size column " + customer_origins_metadata["marketsize_col"])
1192
1564
  print("----------------------------------")
1193
1565
 
1194
1566
  print("Partial utilities")
1195
- print(" Weights")
1567
+ print(" Weights")
1196
1568
 
1197
1569
  if supply_locations_metadata["weighting"][0]["func"] is None and supply_locations_metadata["weighting"][0]["param"] is None:
1198
- print("Attraction not defined")
1570
+ print("Attraction not defined")
1199
1571
  else:
1200
- print("Attraction " + str(round(supply_locations_metadata["weighting"][0]["param"], 3)) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
1572
+ if supply_locations_metadata["weighting"][0]["param"] is not None:
1573
+ print("Attraction " + str(round(supply_locations_metadata["weighting"][0]["param"],3)) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
1574
+ else:
1575
+ print("Attraction NA" + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
1201
1576
 
1202
1577
  if customer_origins_metadata["weighting"][0]["func"] is None and customer_origins_metadata["weighting"][0]["param"] is None:
1203
- print("Transport costs not defined")
1578
+ print("Transport costs not defined")
1204
1579
  elif customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
1205
- print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
1580
+ if customer_origins_metadata["weighting"][0]["param"] is not None:
1581
+ print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
1582
+ else:
1583
+ print("Transport costs NA" + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
1206
1584
  elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
1207
- print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"][0],3)) + ", " + str(round(customer_origins_metadata["weighting"][0]["param"][1],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
1585
+ if customer_origins_metadata["weighting"][0]["param"] is not None:
1586
+ print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"][0],3)) + ", " + str(round(customer_origins_metadata["weighting"][0]["param"][1],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
1587
+ else:
1588
+ print("Transport costs NA" + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
1208
1589
 
1209
- print("----------------------------------")
1590
+ attrac_vars = supply_locations_metadata["attraction_col"]
1591
+ attrac_vars_no = len(attrac_vars)
1210
1592
 
1211
- huff_modelfit = self.modelfit()
1212
- if huff_modelfit is not None:
1213
-
1214
- print ("Goodness-of-fit for probabilities")
1593
+ if attrac_vars_no > 1:
1594
+
1595
+ for key, attrac_var in enumerate(attrac_vars):
1596
+
1597
+ if key == 0:
1598
+ continue
1599
+
1600
+ if key not in supply_locations_metadata["weighting"].keys():
1215
1601
 
1216
- print("Sum of squared residuals ", round(huff_modelfit[1]["SQR"], 2))
1217
- print("Sum of squares ", round(huff_modelfit[1]["SQT"], 2))
1218
- print("R-squared ", round(huff_modelfit[1]["Rsq"], 2))
1219
- print("Mean squared error ", round(huff_modelfit[1]["MSE"], 2))
1220
- print("Root mean squared error ", round(huff_modelfit[1]["RMSE"], 2))
1221
- print("Mean absolute error ", round(huff_modelfit[1]["MAE"], 2))
1222
- print("Mean absolute percentage error ", round(huff_modelfit[1]["MAPE"], 2))
1223
- print("Absolute percentage errors")
1602
+ print(f"{attrac_vars[key][:16]:16} not defined")
1603
+
1604
+ else:
1605
+
1606
+ name = supply_locations_metadata["weighting"][key]["name"]
1607
+ param = supply_locations_metadata["weighting"][key]["param"]
1608
+ func = supply_locations_metadata["weighting"][key]["func"]
1609
+
1610
+ print(f"{name[:16]:16} {round(param, 3)} ({func})")
1611
+
1612
+ print("----------------------------------")
1613
+
1614
+ if interaction_matrix_metadata != {} and "fit" in interaction_matrix_metadata and interaction_matrix_metadata["fit"]["function"] is not None:
1615
+ print("Parameter estimation")
1616
+ print("Fit function " + interaction_matrix_metadata["fit"]["function"])
1617
+ print("Fit by " + interaction_matrix_metadata["fit"]["fit_by"])
1618
+ if interaction_matrix_metadata["fit"]["function"] == "huff_ml_fit":
1619
+ print("Fit method " + interaction_matrix_metadata["fit"]["method"] + " (Converged: " + str(interaction_matrix_metadata["fit"]["minimize_success"]) + ")")
1620
+
1621
+ huff_modelfit = self.modelfit(by = interaction_matrix_metadata["fit"]["fit_by"])
1224
1622
 
1225
- APE_list = [
1226
- ["< 5 % ", round(huff_modelfit[1]["APE"]["resid_below5"], 2), " < 30 % ", round(huff_modelfit[1]["APE"]["resid_below30"], 2)],
1227
- ["< 10 % ", round(huff_modelfit[1]["APE"]["resid_below10"], 2), " < 35 % ", round(huff_modelfit[1]["APE"]["resid_below35"], 2)],
1228
- ["< 15 % ", round(huff_modelfit[1]["APE"]["resid_below15"], 2), " < 40 % ", round(huff_modelfit[1]["APE"]["resid_below40"], 2)],
1229
- ["< 20 % ", round(huff_modelfit[1]["APE"]["resid_below20"], 2), " < 45 % ", round(huff_modelfit[1]["APE"]["resid_below45"], 2)],
1230
- ["< 25% ", round(huff_modelfit[1]["APE"]["resid_below25"], 2), " < 50 % ", round(huff_modelfit[1]["APE"]["resid_below50"], 2)]
1231
- ]
1232
- APE_df = pd.DataFrame(
1233
- APE_list,
1234
- columns=["Resid.", "%", "Resid.", "%"]
1235
- )
1236
- print(APE_df.to_string(index=False))
1237
-
1238
- print("----------------------------------")
1623
+ if huff_modelfit is not None:
1624
+
1625
+ print ("Goodness-of-fit for " + interaction_matrix_metadata["fit"]["fit_by"])
1626
+
1627
+ print("Sum of squared residuals ", round(huff_modelfit[1]["SQR"], 2))
1628
+ print("Sum of squares ", round(huff_modelfit[1]["SQT"], 2))
1629
+ print("R-squared ", round(huff_modelfit[1]["Rsq"], 2))
1630
+ print("Mean squared error ", round(huff_modelfit[1]["MSE"], 2))
1631
+ print("Root mean squared error ", round(huff_modelfit[1]["RMSE"], 2))
1632
+ print("Mean absolute error ", round(huff_modelfit[1]["MAE"], 2))
1633
+ if huff_modelfit[1]["MAPE"] is not None:
1634
+ print("Mean absolute percentage error ", round(huff_modelfit[1]["MAPE"], 2))
1635
+ else:
1636
+ print("Mean absolute percentage error Not calculated")
1637
+ print("Symmetric MAPE ", round(huff_modelfit[1]["sMAPE"], 2))
1638
+ print("Absolute percentage errors")
1639
+
1640
+ APE_list = [
1641
+ ["< 5 % ", round(huff_modelfit[1]["APE"]["resid_below5"], 2), " < 30 % ", round(huff_modelfit[1]["APE"]["resid_below30"], 2)],
1642
+ ["< 10 % ", round(huff_modelfit[1]["APE"]["resid_below10"], 2), " < 35 % ", round(huff_modelfit[1]["APE"]["resid_below35"], 2)],
1643
+ ["< 15 % ", round(huff_modelfit[1]["APE"]["resid_below15"], 2), " < 40 % ", round(huff_modelfit[1]["APE"]["resid_below40"], 2)],
1644
+ ["< 20 % ", round(huff_modelfit[1]["APE"]["resid_below20"], 2), " < 45 % ", round(huff_modelfit[1]["APE"]["resid_below45"], 2)],
1645
+ ["< 25% ", round(huff_modelfit[1]["APE"]["resid_below25"], 2), " < 50 % ", round(huff_modelfit[1]["APE"]["resid_below50"], 2)]
1646
+ ]
1647
+ APE_df = pd.DataFrame(
1648
+ APE_list,
1649
+ columns=["Resid.", "%", "Resid.", "%"]
1650
+ )
1651
+ print(APE_df.to_string(index=False))
1652
+
1653
+ print("----------------------------------")
1239
1654
 
1240
1655
  def mci_fit(
1241
1656
  self,
@@ -1302,6 +1717,15 @@ class HuffModel:
1302
1717
 
1303
1718
  customer_origins.metadata = customer_origins_metadata
1304
1719
  supply_locations.metadata = supply_locations_metadata
1720
+
1721
+ interaction_matrix_metadata = {
1722
+ "fit": {
1723
+ "function": "mci_fit",
1724
+ "fit_by": "probabilities",
1725
+ "method": "OLS"
1726
+ }
1727
+ }
1728
+
1305
1729
  interaction_matrix = InteractionMatrix(
1306
1730
  interaction_matrix_df,
1307
1731
  customer_origins,
@@ -1318,6 +1742,275 @@ class HuffModel:
1318
1742
 
1319
1743
  return mci_model
1320
1744
 
1745
+ def loglik(
1746
+ self,
1747
+ params
1748
+ ):
1749
+
1750
+ if not isinstance(params, list):
1751
+ if isinstance(params, np.ndarray):
1752
+ params = params.tolist()
1753
+ else:
1754
+ raise ValueError("Parameter 'params' must be a list or np.ndarray with at least 2 parameter values")
1755
+
1756
+ if len(params) < 2:
1757
+ raise ValueError("Parameter 'params' must be a list or np.ndarray with at least 2 parameter values")
1758
+
1759
+ market_areas_df = self.market_areas_df
1760
+
1761
+ customer_origins = self.interaction_matrix.customer_origins
1762
+ customer_origins_metadata = customer_origins.get_metadata()
1763
+
1764
+ param_gamma, param_lambda = params[0], params[1]
1765
+
1766
+ if customer_origins_metadata["weighting"][0]["func"] == "logistic":
1767
+
1768
+ if len(params) < 3:
1769
+ raise ValueError("When using logistic weighting, parameter 'params' must be a list or np.ndarray with at least 3 parameter values")
1770
+
1771
+ param_gamma, param_lambda, param_lambda2 = params[0], params[1], params[2]
1772
+
1773
+ supply_locations = self.interaction_matrix.supply_locations
1774
+ supply_locations_metadata = supply_locations.get_metadata()
1775
+
1776
+ supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
1777
+ supply_locations.metadata = supply_locations_metadata
1778
+
1779
+ if customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
1780
+
1781
+ if len(params) >= 2:
1782
+
1783
+ customer_origins_metadata["weighting"][0]["param"] = float(param_lambda)
1784
+
1785
+ else:
1786
+
1787
+ raise ValueError ("Huff Model with transport cost weighting of type " + customer_origins_metadata["weighting"][0]["func"] + " must have >= 2 input parameters")
1788
+
1789
+ elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
1790
+
1791
+ if len(params) >= 3:
1792
+
1793
+ customer_origins_metadata["weighting"][0]["param"] = [float(param_lambda), float(param_lambda2)]
1794
+
1795
+ else:
1796
+
1797
+ raise ValueError("Huff Model with transport cost weightig of type " + customer_origins_metadata["weighting"][0]["func"] + " must have >= 3 input parameters")
1798
+
1799
+ if (customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"] and len(params) > 2):
1800
+
1801
+ for key, param in enumerate(params):
1802
+
1803
+ if key <= 1:
1804
+ continue
1805
+
1806
+ supply_locations_metadata["weighting"][key-1]["param"] = float(param)
1807
+
1808
+ if (customer_origins_metadata["weighting"][0]["func"] == "logistic" and len(params) > 3):
1809
+
1810
+ for key, param in enumerate(params):
1811
+
1812
+ if key <= 2:
1813
+ continue
1814
+
1815
+ supply_locations_metadata["weighting"][key-2]["param"] = float(param)
1816
+
1817
+ customer_origins.metadata = customer_origins_metadata
1818
+
1819
+ if "T_j_emp" not in market_areas_df.columns:
1820
+ T_j_emp = market_areas_df["T_j"]
1821
+ else:
1822
+ T_j_emp = market_areas_df["T_j_emp"]
1823
+
1824
+
1825
+ huff_model_copy = copy.deepcopy(self)
1826
+
1827
+ interaction_matrix_copy = copy.deepcopy(huff_model_copy.interaction_matrix)
1828
+
1829
+ interaction_matrix_copy = interaction_matrix_copy.utility()
1830
+ interaction_matrix_copy = interaction_matrix_copy.probabilities()
1831
+ interaction_matrix_copy = interaction_matrix_copy.flows()
1832
+
1833
+ huff_model_copy = interaction_matrix_copy.marketareas()
1834
+
1835
+ market_areas_df_copy = huff_model_copy.market_areas_df
1836
+
1837
+ observed = T_j_emp
1838
+ expected = market_areas_df_copy["T_j"]
1839
+
1840
+ modelfit_metrics = modelfit(
1841
+ observed = observed,
1842
+ expected = expected
1843
+ )
1844
+
1845
+ LL = modelfit_metrics[1]["LL"]
1846
+
1847
+ return -LL
1848
+
1849
+ def ml_fit(
1850
+ self,
1851
+ initial_params: list = [1.0, -2.0],
1852
+ method: str = "L-BFGS-B",
1853
+ bounds: list = [(0.5, 1), (-3, -1)],
1854
+ constraints: list = [],
1855
+ fit_by = "probabilities",
1856
+ update_estimates: bool = True
1857
+ ):
1858
+
1859
+ if fit_by in ["probabilities", "flows"]:
1860
+
1861
+ self.interaction_matrix.huff_ml_fit(
1862
+ initial_params = initial_params,
1863
+ method = method,
1864
+ bounds = bounds,
1865
+ constraints = constraints,
1866
+ fit_by = fit_by,
1867
+ update_estimates = update_estimates
1868
+ )
1869
+
1870
+ elif fit_by == "totals":
1871
+
1872
+ supply_locations = self.interaction_matrix.supply_locations
1873
+ supply_locations_metadata = supply_locations.get_metadata()
1874
+
1875
+ customer_origins = self.interaction_matrix.customer_origins
1876
+ customer_origins_metadata = customer_origins.get_metadata()
1877
+
1878
+ if customer_origins_metadata["weighting"][0]["param"] is None:
1879
+ params_metadata_customer_origins = 1
1880
+ else:
1881
+ if customer_origins_metadata["weighting"][0]["param"] is not None:
1882
+ if isinstance(customer_origins_metadata["weighting"][0]["param"], (int, float)):
1883
+ params_metadata_customer_origins = 1
1884
+ else:
1885
+ params_metadata_customer_origins = len(customer_origins_metadata["weighting"][0]["param"])
1886
+
1887
+ if customer_origins_metadata["weighting"][0]["func"] == "logistic":
1888
+ params_metadata_customer_origins = 2
1889
+ else:
1890
+ params_metadata_customer_origins = 1
1891
+
1892
+ params_metadata_supply_locations = len(supply_locations_metadata["weighting"])
1893
+
1894
+ params_metadata = params_metadata_customer_origins+params_metadata_supply_locations
1895
+
1896
+ if len(initial_params) < 2 or len(initial_params) != params_metadata:
1897
+ raise ValueError("Parameter 'initial_params' must be a list with " + str(params_metadata) + " entries (Attaction: " + str(params_metadata_supply_locations) + ", Transport costs: " + str(params_metadata_customer_origins) + ")")
1898
+
1899
+ if len(bounds) != len(initial_params):
1900
+ raise ValueError("Parameter 'bounds' must have the same length as parameter 'initial_params' (" + str(len(bounds)) + ", " + str(len(initial_params)) + ")")
1901
+
1902
+ ml_result = minimize(
1903
+ self.loglik,
1904
+ initial_params,
1905
+ method = method,
1906
+ bounds = bounds,
1907
+ constraints = constraints,
1908
+ options={'disp': 3}
1909
+ )
1910
+
1911
+ attrac_vars = len(supply_locations_metadata["weighting"])
1912
+
1913
+ if ml_result.success:
1914
+
1915
+ fitted_params = ml_result.x
1916
+
1917
+ param_gamma = fitted_params[0]
1918
+ supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
1919
+
1920
+ if customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
1921
+
1922
+ param_lambda = fitted_params[1]
1923
+ param_results = [
1924
+ float(param_gamma),
1925
+ float(param_lambda)
1926
+ ]
1927
+
1928
+ customer_origins_metadata["weighting"][0]["param"] = float(param_lambda)
1929
+
1930
+ elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
1931
+
1932
+ param_lambda = fitted_params[1]
1933
+ param_lambda2 = fitted_params[2]
1934
+ param_results = [
1935
+ float(param_gamma),
1936
+ float(param_lambda),
1937
+ float(param_lambda2)
1938
+ ]
1939
+
1940
+ customer_origins_metadata["weighting"][0]["param"][0] = float(param_lambda)
1941
+ customer_origins_metadata["weighting"][0]["param"][1] = float(param_lambda2)
1942
+
1943
+ if attrac_vars > 1:
1944
+
1945
+ if customer_origins_metadata["weighting"][0]["func"] == "logistic":
1946
+ fitted_params_add = 3
1947
+ else:
1948
+ fitted_params_add = 2
1949
+
1950
+ for key, var in supply_locations_metadata["weighting"].items():
1951
+
1952
+ if key > len(supply_locations_metadata["weighting"])-fitted_params_add:
1953
+ break
1954
+
1955
+ param = float(fitted_params[key+fitted_params_add])
1956
+
1957
+ param_results = param_results + [param]
1958
+
1959
+ supply_locations_metadata["weighting"][(key+1)]["param"] = float(param)
1960
+
1961
+ print(f"Optimization via {method} algorithm succeeded with parameters: {', '.join(str(round(par, 3)) for par in param_results)}.")
1962
+
1963
+ else:
1964
+
1965
+ print(f"Optimiziation via {method} algorithm failed with error message: '{ml_result.message}'. See https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html for all available algorithms.")
1966
+
1967
+ self.interaction_matrix.supply_locations.metadata = supply_locations_metadata
1968
+ self.interaction_matrix.customer_origins.metadata = customer_origins_metadata
1969
+
1970
+ if update_estimates:
1971
+
1972
+ if "T_j_emp" not in self.market_areas_df.columns:
1973
+
1974
+ self.market_areas_df["T_j_emp"] = self.market_areas_df["T_j"]
1975
+
1976
+ print("NOTE: Total values in market areas df are treated as empirical total values")
1977
+
1978
+ else:
1979
+
1980
+ print("NOTE: Total market areas df contains empirical total values")
1981
+
1982
+ if np.isnan(ml_result.x).any():
1983
+
1984
+ print("WARNING: No update of estimates because fit parameters contain NaN")
1985
+
1986
+ update_estimates = False
1987
+
1988
+ else:
1989
+
1990
+ self.interaction_matrix.utility()
1991
+ self.interaction_matrix.probabilities()
1992
+ self.interaction_matrix.flows()
1993
+
1994
+ self.interaction_matrix.marketareas()
1995
+
1996
+ self.interaction_matrix.metadata["fit"] = {
1997
+ "function": "huff_ml_fit",
1998
+ "fit_by": fit_by,
1999
+ "initial_params": initial_params,
2000
+ "method": method,
2001
+ "bounds": bounds,
2002
+ "constraints": constraints,
2003
+ "minimize_success": ml_result.success,
2004
+ "minimize_fittedparams": ml_result.x,
2005
+ "update_estimates": update_estimates
2006
+ }
2007
+
2008
+ else:
2009
+
2010
+ raise ValueError("Parameter 'fit_by' must be 'probabilities', 'flows' or 'totals'")
2011
+
2012
+ return self
2013
+
1321
2014
  def update(self):
1322
2015
 
1323
2016
  self.interaction_matrix = self.interaction_matrix.update()
@@ -1326,30 +2019,94 @@ class HuffModel:
1326
2019
 
1327
2020
  return self
1328
2021
 
1329
- def modelfit(self):
1330
-
1331
- interaction_matrix = self.interaction_matrix
1332
- interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
2022
+ def modelfit(
2023
+ self,
2024
+ by = "probabilities"
2025
+ ):
1333
2026
 
1334
- if ("p_ij" in interaction_matrix_df.columns and "p_ij_emp" in interaction_matrix_df.columns):
2027
+ if by == "probabilities":
2028
+
2029
+ interaction_matrix = self.interaction_matrix
2030
+ interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
2031
+
2032
+ if ("p_ij" in interaction_matrix_df.columns and "p_ij_emp" in interaction_matrix_df.columns):
2033
+
2034
+ try:
2035
+
2036
+ huff_modelfit = modelfit(
2037
+ interaction_matrix_df["p_ij_emp"],
2038
+ interaction_matrix_df["p_ij"]
2039
+ )
2040
+
2041
+ return huff_modelfit
2042
+
2043
+ except:
2044
+
2045
+ print("Goodness-of-fit metrics could not be calculated due to NaN values.")
2046
+ return None
1335
2047
 
1336
- try:
2048
+ else:
2049
+
2050
+ print("Goodness-of-fit metrics could not be calculated. No empirical values of probabilities in interaction matrix.")
2051
+
2052
+ return None
1337
2053
 
1338
- huff_modelfit = modelfit(
1339
- interaction_matrix_df["p_ij_emp"],
1340
- interaction_matrix_df["p_ij"]
1341
- )
2054
+ elif by == "flows":
2055
+
2056
+ interaction_matrix = self.interaction_matrix
2057
+ interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
2058
+
2059
+ if ("E_ij" in interaction_matrix_df.columns and "E_ij_emp" in interaction_matrix_df.columns):
1342
2060
 
1343
- return huff_modelfit
2061
+ try:
1344
2062
 
1345
- except:
2063
+ huff_modelfit = modelfit(
2064
+ interaction_matrix_df["E_ij_emp"],
2065
+ interaction_matrix_df["E_ij"]
2066
+ )
2067
+
2068
+ return huff_modelfit
2069
+
2070
+ except:
2071
+
2072
+ print("Goodness-of-fit metrics could not be calculated due to NaN values.")
2073
+ return None
2074
+
2075
+ else:
1346
2076
 
1347
- print("Goodness-of-fit metrics could not be calculated due to NaN values.")
2077
+ print("Goodness-of-fit metrics could not be calculated. No empirical values of customer flows in interaction matrix.")
2078
+
1348
2079
  return None
1349
-
1350
- else:
1351
2080
 
1352
- return None
2081
+ elif by == "totals":
2082
+
2083
+ market_areas_df = self.market_areas_df
2084
+
2085
+ if ("T_j" in market_areas_df.columns and "T_j_emp" in market_areas_df.columns):
2086
+
2087
+ try:
2088
+
2089
+ huff_modelfit = modelfit(
2090
+ market_areas_df["T_j_emp"],
2091
+ market_areas_df["T_j"]
2092
+ )
2093
+
2094
+ return huff_modelfit
2095
+
2096
+ except:
2097
+
2098
+ print("Goodness-of-fit metrics could not be calculated due to NaN values.")
2099
+ return None
2100
+
2101
+ else:
2102
+
2103
+ print("Goodness-of-fit metrics could not be calculated. No empirical values of T_j in market areas data.")
2104
+
2105
+ return None
2106
+
2107
+ else:
2108
+
2109
+ raise ValueError("Parameter 'by' must be 'probabilities', 'flows', or 'totals'")
1353
2110
 
1354
2111
  class MCIModel:
1355
2112
 
@@ -1472,7 +2229,11 @@ class MCIModel:
1472
2229
  print("Mean squared error ", round(mci_modelfit[1]["MSE"], 2))
1473
2230
  print("Root mean squared error ", round(mci_modelfit[1]["RMSE"], 2))
1474
2231
  print("Mean absolute error ", round(mci_modelfit[1]["MAE"], 2))
1475
- print("Mean absolute percentage error ", round(mci_modelfit[1]["MAPE"], 2))
2232
+ if mci_modelfit[1]["MAPE"] is not None:
2233
+ print("Mean absolute percentage error ", round(mci_modelfit[1]["MAPE"], 2))
2234
+ else:
2235
+ print("Mean absolute percentage error Not calculated")
2236
+ print("Symmetric MAPE ", round(mci_modelfit[1]["sMAPE"], 2))
1476
2237
 
1477
2238
  print("Absolute percentage errors")
1478
2239
  APE_list = [
@@ -1559,8 +2320,11 @@ class MCIModel:
1559
2320
  interaction_matrix = self.interaction_matrix
1560
2321
  interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
1561
2322
 
1562
- if "p_ij" in interaction_matrix_df.columns:
2323
+ if "p_ij" in interaction_matrix_df.columns and "p_ij_emp" not in interaction_matrix_df.columns:
2324
+ print("NOTE: Probabilities in interaction matrix are treated as empirical probabilities")
1563
2325
  interaction_matrix_df["p_ij_emp"] = interaction_matrix_df["p_ij"]
2326
+ else:
2327
+ print("NOTE: Interaction matrix contains empirical probabilities")
1564
2328
 
1565
2329
  if "U_ij" not in interaction_matrix_df.columns:
1566
2330
  self.utility(transformation = transformation)
@@ -1720,6 +2484,7 @@ def load_geodata (
1720
2484
  "marketsize_col": None,
1721
2485
  "weighting": {
1722
2486
  0: {
2487
+ "name": None,
1723
2488
  "func": None,
1724
2489
  "param": None
1725
2490
  }
@@ -1727,7 +2492,7 @@ def load_geodata (
1727
2492
  "crs_input": crs_input,
1728
2493
  "crs_output": crs_output,
1729
2494
  "no_points": len(geodata_gpd)
1730
- }
2495
+ }
1731
2496
 
1732
2497
  if location_type == "origins":
1733
2498
 
@@ -1848,8 +2613,10 @@ def load_interaction_matrix(
1848
2613
  csv_sep = ";",
1849
2614
  csv_decimal = ",",
1850
2615
  csv_encoding="unicode_escape",
2616
+ xlsx_sheet: str = None,
1851
2617
  crs_input = "EPSG:4326",
1852
- crs_output = "EPSG:4326"
2618
+ crs_output = "EPSG:4326",
2619
+ check_df_vars = True
1853
2620
  ):
1854
2621
 
1855
2622
  if isinstance(data, pd.DataFrame):
@@ -1865,7 +2632,13 @@ def load_interaction_matrix(
1865
2632
  encoding = csv_encoding
1866
2633
  )
1867
2634
  elif data_type == "xlsx":
1868
- interaction_matrix_df = pd.read_excel(data)
2635
+ if xlsx_sheet is not None:
2636
+ interaction_matrix_df = pd.read_excel(
2637
+ data,
2638
+ sheet_name=xlsx_sheet
2639
+ )
2640
+ else:
2641
+ interaction_matrix_df = pd.read_excel(data)
1869
2642
  else:
1870
2643
  raise TypeError("Unknown type of data")
1871
2644
  else:
@@ -1884,10 +2657,11 @@ def load_interaction_matrix(
1884
2657
  if market_size_col is not None:
1885
2658
  cols_check = cols_check + [market_size_col]
1886
2659
 
1887
- check_vars(
1888
- interaction_matrix_df,
1889
- cols = cols_check
1890
- )
2660
+ if check_df_vars:
2661
+ check_vars(
2662
+ interaction_matrix_df,
2663
+ cols = cols_check
2664
+ )
1891
2665
 
1892
2666
  if customer_origins_coords_col is not None:
1893
2667
 
@@ -1942,6 +2716,7 @@ def load_interaction_matrix(
1942
2716
  "marketsize_col": market_size_col,
1943
2717
  "weighting": {
1944
2718
  0: {
2719
+ "name": None,
1945
2720
  "func": None,
1946
2721
  "param": None
1947
2722
  }
@@ -2009,6 +2784,7 @@ def load_interaction_matrix(
2009
2784
  "marketsize_col": None,
2010
2785
  "weighting": {
2011
2786
  0: {
2787
+ "name": None,
2012
2788
  "func": None,
2013
2789
  "param": None
2014
2790
  }
@@ -2056,7 +2832,12 @@ def load_interaction_matrix(
2056
2832
  }
2057
2833
  )
2058
2834
 
2059
- metadata = {}
2835
+ metadata = {
2836
+ "fit": {
2837
+ "function": None,
2838
+ "fit_by": None
2839
+ }
2840
+ }
2060
2841
 
2061
2842
  interaction_matrix = InteractionMatrix(
2062
2843
  interaction_matrix_df=interaction_matrix_df,
@@ -2067,6 +2848,74 @@ def load_interaction_matrix(
2067
2848
 
2068
2849
  return interaction_matrix
2069
2850
 
2851
+ def load_marketareas(
2852
+ data,
2853
+ supply_locations_col: str,
2854
+ total_col: str,
2855
+ data_type = "csv",
2856
+ csv_sep = ";",
2857
+ csv_decimal = ",",
2858
+ csv_encoding="unicode_escape",
2859
+ xlsx_sheet: str = None,
2860
+ check_df_vars = True
2861
+ ):
2862
+
2863
+ if isinstance(data, pd.DataFrame):
2864
+ market_areas_df = data
2865
+ elif isinstance(data, str):
2866
+ if data_type not in ["csv", "xlsx"]:
2867
+ raise ValueError ("data_type must be 'csv' or 'xlsx'")
2868
+ if data_type == "csv":
2869
+ market_areas_df = pd.read_csv(
2870
+ data,
2871
+ sep = csv_sep,
2872
+ decimal = csv_decimal,
2873
+ encoding = csv_encoding
2874
+ )
2875
+ elif data_type == "xlsx":
2876
+ if xlsx_sheet is not None:
2877
+ market_areas_df = pd.read_excel(
2878
+ data,
2879
+ sheet_name=xlsx_sheet
2880
+ )
2881
+ else:
2882
+ market_areas_df = pd.read_excel(data)
2883
+ else:
2884
+ raise TypeError("Unknown type of data")
2885
+ else:
2886
+ raise TypeError("data must be pandas.DataFrame or file (.csv, .xlsx)")
2887
+
2888
+ if supply_locations_col not in market_areas_df.columns:
2889
+ raise KeyError ("Column " + supply_locations_col + " not in data")
2890
+ if total_col not in market_areas_df.columns:
2891
+ raise KeyError ("Column " + supply_locations_col + " not in data")
2892
+
2893
+ if check_df_vars:
2894
+ check_vars(
2895
+ market_areas_df,
2896
+ cols = [total_col]
2897
+ )
2898
+
2899
+ market_areas_df = market_areas_df.rename(
2900
+ columns = {
2901
+ supply_locations_col: "j",
2902
+ total_col: "T_j"
2903
+ }
2904
+ )
2905
+
2906
+ metadata = {
2907
+ "unique_id": supply_locations_col,
2908
+ "total_col": total_col,
2909
+ "no_points": len(market_areas_df)
2910
+ }
2911
+
2912
+ market_areas = MarketAreas(
2913
+ market_areas_df,
2914
+ metadata
2915
+ )
2916
+
2917
+ return market_areas
2918
+
2070
2919
  def market_shares(
2071
2920
  df: pd.DataFrame,
2072
2921
  turnover_col: str,
@@ -2250,7 +3099,8 @@ def get_isochrones(
2250
3099
  def modelfit(
2251
3100
  observed,
2252
3101
  expected,
2253
- remove_nan: bool = True
3102
+ remove_nan: bool = True,
3103
+ verbose: bool = False
2254
3104
  ):
2255
3105
 
2256
3106
  observed_no = len(observed)
@@ -2276,6 +3126,10 @@ def modelfit(
2276
3126
  )
2277
3127
 
2278
3128
  obs_exp_clean = obs_exp.dropna(subset=["observed", "expected"])
3129
+
3130
+ if len(obs_exp_clean) < len(observed) or len(obs_exp_clean) < len(expected):
3131
+ if verbose:
3132
+ print("Vectors 'observed' and/or 'expected' contain zeros which are dropped.")
2279
3133
 
2280
3134
  observed = obs_exp_clean["observed"].to_numpy()
2281
3135
  expected = obs_exp_clean["expected"].to_numpy()
@@ -2291,7 +3145,16 @@ def modelfit(
2291
3145
  residuals_sq = residuals**2
2292
3146
  residuals_abs = abs(residuals)
2293
3147
 
2294
- APE = abs(observed-expected)/observed*100
3148
+ if any(observed == 0):
3149
+ if verbose:
3150
+ print ("Vector 'observed' contains values equal to zero. No APE/MAPE calculated.")
3151
+ APE = np.full_like(observed, np.nan)
3152
+ MAPE = None
3153
+ else:
3154
+ APE = abs(observed-expected)/observed*100
3155
+ MAPE = float(np.mean(APE))
3156
+
3157
+ sAPE = abs(observed-expected)/((abs(observed)+abs(expected))/2)*100
2295
3158
 
2296
3159
  data_residuals = pd.DataFrame({
2297
3160
  "observed": observed,
@@ -2299,7 +3162,8 @@ def modelfit(
2299
3162
  "residuals": residuals,
2300
3163
  "residuals_sq": residuals_sq,
2301
3164
  "residuals_abs": residuals_abs,
2302
- "APE": APE
3165
+ "APE": APE,
3166
+ "sAPE": sAPE
2303
3167
  })
2304
3168
 
2305
3169
  SQR = float(np.sum(residuals_sq))
@@ -2310,7 +3174,9 @@ def modelfit(
2310
3174
  MSE = float(SQR/observed_no)
2311
3175
  RMSE = float(sqrt(MSE))
2312
3176
  MAE = float(SAR/observed_no)
2313
- MAPE = float(np.mean(APE))
3177
+ LL = np.sum(np.log(residuals_sq))
3178
+
3179
+ sMAPE = float(np.mean(sAPE))
2314
3180
 
2315
3181
  resid_below5 = float(len(data_residuals[data_residuals["APE"] < 5])/expected_no*100)
2316
3182
  resid_below10 = float(len(data_residuals[data_residuals["APE"] < 10])/expected_no*100)
@@ -2332,6 +3198,8 @@ def modelfit(
2332
3198
  "RMSE": RMSE,
2333
3199
  "MAE": MAE,
2334
3200
  "MAPE": MAPE,
3201
+ "sMAPE": sMAPE,
3202
+ "LL": -LL,
2335
3203
  "APE": {
2336
3204
  "resid_below5": resid_below5,
2337
3205
  "resid_below10": resid_below10,
@@ -2353,34 +3221,23 @@ def modelfit(
2353
3221
 
2354
3222
  return modelfit_results
2355
3223
 
2356
- def loglik(
2357
- observed,
2358
- expected
2359
- ):
2360
-
2361
- model_fit = modelfit(
2362
- observed,
2363
- expected
2364
- )
2365
- residuals_sq = model_fit[0]["residuals_sq"]
2366
-
2367
- LL = np.sum(np.log(residuals_sq))
2368
-
2369
- return -LL
2370
-
2371
3224
  def check_vars(
2372
3225
  df: pd.DataFrame,
2373
- cols: list
3226
+ cols: list,
3227
+ check_numeric: bool = True,
3228
+ check_zero: bool = True
2374
3229
  ):
2375
3230
 
2376
3231
  for col in cols:
2377
3232
  if col not in df.columns:
2378
3233
  raise KeyError(f"Column '{col}' not in dataframe.")
2379
3234
 
2380
- for col in cols:
2381
- if not pd.api.types.is_numeric_dtype(df[col]):
2382
- raise ValueError(f"Column '{col}' is not numeric. All stated columns must be numeric.")
3235
+ if check_numeric:
3236
+ for col in cols:
3237
+ if not pd.api.types.is_numeric_dtype(df[col]):
3238
+ raise ValueError(f"Column '{col}' is not numeric. All stated columns must be numeric.")
2383
3239
 
2384
- for col in cols:
2385
- if (df[col] <= 0).any():
2386
- raise ValueError(f"Column '{col}' includes values <= 0. All values must be numeric and positive.")
3240
+ if check_zero:
3241
+ for col in cols:
3242
+ if (df[col] <= 0).any():
3243
+ raise ValueError(f"Column '{col}' includes values <= 0. All values must be numeric and positive.")