huff 1.5.1__py3-none-any.whl → 1.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
huff/models.py CHANGED
@@ -4,8 +4,8 @@
4
4
  # Author: Thomas Wieland
5
5
  # ORCID: 0000-0001-5168-9846
6
6
  # mail: geowieland@googlemail.com
7
- # Version: 1.5.1
8
- # Last update: 2025-07-01 17:10
7
+ # Version: 1.5.3
8
+ # Last update: 2025-07-15 17:22
9
9
  # Copyright (c) 2025 Thomas Wieland
10
10
  #-----------------------------------------------------------------------
11
11
 
@@ -105,7 +105,7 @@ class CustomerOrigins:
105
105
  metadata = self.metadata
106
106
 
107
107
  if marketsize_col not in geodata_gpd_original.columns:
108
- raise KeyError ("Column " + marketsize_col + " not in data")
108
+ raise KeyError ("Error while defining market size variable: Column " + marketsize_col + " not in data")
109
109
  else:
110
110
  metadata["marketsize_col"] = marketsize_col
111
111
 
@@ -140,13 +140,13 @@ class CustomerOrigins:
140
140
  metadata = self.metadata
141
141
 
142
142
  if func not in ["power", "exponential", "logistic"]:
143
- raise ValueError("Parameter 'func' must be 'power', 'exponential' or 'logistic'")
143
+ raise ValueError("Error while defining transport costs weighting: Parameter 'func' must be 'power', 'exponential' or 'logistic'")
144
144
 
145
145
  if isinstance(param_lambda, list) and func != "logistic":
146
- raise ValueError("Function type "+ func + " requires one single parameter value")
146
+ raise ValueError("Error while defining transport costs weighting: Function type "+ func + " requires one single parameter value")
147
147
 
148
148
  if isinstance(param_lambda, (int, float)) and func == "logistic":
149
- raise ValueError("Function type "+ func + " requires two parameters in a list")
149
+ raise ValueError("Error while defining transport costs weighting: Function type "+ func + " requires two parameters in a list")
150
150
 
151
151
  metadata["weighting"][0]["name"] = "t_ij"
152
152
  metadata["weighting"][0]["func"] = func
@@ -299,7 +299,7 @@ class SupplyLocations:
299
299
  metadata = self.metadata
300
300
 
301
301
  if attraction_col not in geodata_gpd_original.columns:
302
- raise KeyError ("Column " + attraction_col + " not in data")
302
+ raise KeyError ("Error while defining attraction variable: Column " + attraction_col + " not in data")
303
303
  else:
304
304
  metadata["attraction_col"][0] = attraction_col
305
305
 
@@ -316,7 +316,7 @@ class SupplyLocations:
316
316
  metadata = self.metadata
317
317
 
318
318
  if metadata["attraction_col"] is None:
319
- raise ValueError ("Attraction column is not yet defined. Use SupplyLocations.define_attraction()")
319
+ raise ValueError ("Error while defining attraction weighting: Attraction column is not yet defined. Use SupplyLocations.define_attraction()")
320
320
 
321
321
  metadata["weighting"][0]["name"] = "A_j"
322
322
  metadata["weighting"][0]["func"] = func
@@ -336,7 +336,7 @@ class SupplyLocations:
336
336
  metadata = self.metadata
337
337
 
338
338
  if metadata["attraction_col"] is None:
339
- raise ValueError ("Attraction column is not yet defined. Use SupplyLocations.define_attraction()")
339
+ raise ValueError ("Error while adding utility variable: Attraction column is not yet defined. Use SupplyLocations.define_attraction()")
340
340
 
341
341
  no_attraction_vars = len(metadata["attraction_col"])
342
342
  new_key = no_attraction_vars
@@ -371,9 +371,9 @@ class SupplyLocations:
371
371
  new_destinations_metadata = new_destinations.get_metadata()
372
372
 
373
373
  if list(new_destinations_gpd_original.columns) != list(geodata_gpd_original.columns):
374
- raise KeyError("Supply locations and new destinations data have different column names.")
374
+ raise KeyError("Error while adding new destinations: Supply locations and new destinations data have different column names.")
375
375
  if list(new_destinations_gpd.columns) != list(geodata_gpd.columns):
376
- raise KeyError("Supply locations and new destinations data have different column names.")
376
+ raise KeyError("Error while adding new destinations: Supply locations and new destinations data have different column names.")
377
377
 
378
378
  geodata_gpd_original = pd.concat(
379
379
  [
@@ -644,7 +644,7 @@ class InteractionMatrix:
644
644
  )
645
645
 
646
646
  if time_distance_matrix.get_metadata() is None:
647
- raise ValueError ("No transport costs matrix was built.")
647
+ raise ValueError ("Error in transport costs calculation: No transport costs matrix was built.")
648
648
 
649
649
  transport_costs_matrix = time_distance_matrix.get_matrix()
650
650
  transport_costs_matrix_config = time_distance_matrix.get_config()
@@ -773,13 +773,13 @@ class InteractionMatrix:
773
773
  interaction_matrix_metadata = self.get_metadata()
774
774
 
775
775
  if "t_ij" not in interaction_matrix_df.columns:
776
- raise ValueError ("No transport cost variable in interaction matrix")
776
+ raise ValueError ("Error in utility calculation: No transport cost variable in interaction matrix")
777
777
  if "A_j" not in interaction_matrix_df.columns:
778
- raise ValueError ("No attraction variable in interaction matrix")
778
+ raise ValueError ("Error in utility calculation: No attraction variable in interaction matrix")
779
779
  if interaction_matrix_df["t_ij"].isna().all():
780
- raise ValueError ("Transport cost variable is not defined")
780
+ raise ValueError ("Error in utility calculation: Transport cost variable is not defined")
781
781
  if interaction_matrix_df["A_j"].isna().all():
782
- raise ValueError ("Attraction variable is not defined")
782
+ raise ValueError ("Error in utility calculation: Attraction variable is not defined")
783
783
 
784
784
  check_vars(
785
785
  df = interaction_matrix_df,
@@ -797,7 +797,7 @@ class InteractionMatrix:
797
797
  elif tc_weighting["func"] == "logistic":
798
798
  interaction_matrix_df["t_ij_weighted"] = 1+np.exp(tc_weighting["param"][0] + tc_weighting["param"][1] * interaction_matrix_df['t_ij'])
799
799
  else:
800
- raise ValueError ("Transport costs weighting is not defined.")
800
+ raise ValueError ("Error in utility calculation: Transport costs weighting is not defined.")
801
801
 
802
802
  supply_locations = self.supply_locations
803
803
  supply_locations_metadata = supply_locations.get_metadata()
@@ -808,7 +808,7 @@ class InteractionMatrix:
808
808
  elif tc_weighting["func"] == "exponential":
809
809
  interaction_matrix_df["A_j_weighted"] = np.exp(attraction_weighting["param"] * interaction_matrix_df["A_j"])
810
810
  else:
811
- raise ValueError ("Attraction weighting is not defined.")
811
+ raise ValueError ("Error in utility calculation: Attraction weighting is not defined.")
812
812
 
813
813
  attrac_vars = supply_locations_metadata["attraction_col"]
814
814
  attrac_vars_no = len(attrac_vars)
@@ -831,7 +831,7 @@ class InteractionMatrix:
831
831
  elif func == "exponential":
832
832
  interaction_matrix_df[name+"_weighted"] = np.exp(param * interaction_matrix_df[name])
833
833
  else:
834
- raise ValueError ("Weighting for " + name + " is not defined.")
834
+ raise ValueError ("Error in utility calculation: Weighting for " + name + " is not defined.")
835
835
 
836
836
  interaction_matrix_df["A_j_weighted"] = interaction_matrix_df["A_j_weighted"]*interaction_matrix_df[name+"_weighted"]
837
837
 
@@ -881,9 +881,9 @@ class InteractionMatrix:
881
881
  interaction_matrix_df = self.interaction_matrix_df
882
882
 
883
883
  if "C_i" not in interaction_matrix_df.columns:
884
- raise ValueError ("No market size variable in interaction matrix")
884
+ raise ValueError ("Error in flows calculation: No market size variable in interaction matrix")
885
885
  if interaction_matrix_df["C_i"].isna().all():
886
- raise ValueError ("Market size column in customer origins not defined. Use CustomerOrigins.define_marketsize()")
886
+ raise ValueError ("Error in flows calculation: Market size column in customer origins not defined. Use CustomerOrigins.define_marketsize()")
887
887
 
888
888
  check_vars(
889
889
  df = interaction_matrix_df,
@@ -940,7 +940,7 @@ class InteractionMatrix:
940
940
  else:
941
941
 
942
942
  if "C_i" not in interaction_matrix_df.columns or interaction_matrix_df["C_i"].isna().all():
943
- raise ValueError("Customer origins market size is not available")
943
+ raise ValueError("Error in hansen accessibility calculation: Customer origins market size is not available")
944
944
 
945
945
  customer_origins_metadata = self.customer_origins.get_metadata()
946
946
  tc_weighting = customer_origins_metadata["weighting"][0]
@@ -951,7 +951,7 @@ class InteractionMatrix:
951
951
  elif tc_weighting["func"] == "logistic":
952
952
  interaction_matrix_df["t_ij_weighted"] = 1+np.exp(tc_weighting["param"][0] + tc_weighting["param"][1] * interaction_matrix_df['t_ij'])
953
953
  else:
954
- raise ValueError ("Transport costs weighting is not defined.")
954
+ raise ValueError ("Error in hansen accessibility calculation: Transport costs weighting is not defined.")
955
955
 
956
956
  interaction_matrix_df["U_ji"] = interaction_matrix_df["C_i"]*interaction_matrix_df["t_ij_weighted"]
957
957
  hansen_df = pd.DataFrame(interaction_matrix_df.groupby("j")["U_ji"].sum()).reset_index()
@@ -1077,16 +1077,16 @@ class InteractionMatrix:
1077
1077
  ):
1078
1078
 
1079
1079
  if fit_by not in ["probabilities", "flows"]:
1080
- raise ValueError ("Parameter 'fit_by' must be 'probabilities' or 'flows'")
1080
+ raise ValueError ("Error in loglik: Parameter 'fit_by' must be 'probabilities' or 'flows'")
1081
1081
 
1082
1082
  if not isinstance(params, list):
1083
1083
  if isinstance(params, np.ndarray):
1084
1084
  params = params.tolist()
1085
1085
  else:
1086
- raise ValueError("Parameter 'params' must be a list or np.ndarray with at least 2 parameter values")
1086
+ raise ValueError("Error in loglik: Parameter 'params' must be a list or np.ndarray with at least 2 parameter values")
1087
1087
 
1088
1088
  if len(params) < 2:
1089
- raise ValueError("Parameter 'params' must be a list or np.ndarray with at least 2 parameter values")
1089
+ raise ValueError("Error in loglik: Parameter 'params' must be a list or np.ndarray with at least 2 parameter values")
1090
1090
 
1091
1091
  customer_origins = self.customer_origins
1092
1092
  customer_origins_metadata = customer_origins.get_metadata()
@@ -1096,7 +1096,7 @@ class InteractionMatrix:
1096
1096
  if customer_origins_metadata["weighting"][0]["func"] == "logistic":
1097
1097
 
1098
1098
  if len(params) < 3:
1099
- raise ValueError("When using logistic weighting, parameter 'params' must be a list or np.ndarray with at least 3 parameter values")
1099
+ raise ValueError("Error in loglik: When using logistic weighting, parameter 'params' must be a list or np.ndarray with at least 3 parameter values")
1100
1100
 
1101
1101
  param_gamma, param_lambda, param_lambda2 = params[0], params[1], params[2]
1102
1102
 
@@ -1116,7 +1116,7 @@ class InteractionMatrix:
1116
1116
 
1117
1117
  else:
1118
1118
 
1119
- raise ValueError ("Huff Model with transport cost weighting of type " + customer_origins_metadata["weighting"][0]["func"] + " must have >= 2 input parameters")
1119
+ raise ValueError ("Error in loglik: Huff Model with transport cost weighting of type " + customer_origins_metadata["weighting"][0]["func"] + " must have >= 2 input parameters")
1120
1120
 
1121
1121
  elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
1122
1122
 
@@ -1126,7 +1126,7 @@ class InteractionMatrix:
1126
1126
 
1127
1127
  else:
1128
1128
 
1129
- raise ValueError("Huff Model with transport cost weightig of type " + customer_origins_metadata["weighting"][0]["func"] + " must have >= 3 input parameters")
1129
+ raise ValueError("Error in loglik: Huff Model with transport cost weightig of type " + customer_origins_metadata["weighting"][0]["func"] + " must have >= 3 input parameters")
1130
1130
 
1131
1131
  if (customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"] and len(params) > 2):
1132
1132
 
@@ -1224,10 +1224,10 @@ class InteractionMatrix:
1224
1224
  params_metadata = params_metadata_customer_origins+params_metadata_supply_locations
1225
1225
 
1226
1226
  if len(initial_params) < 2 or len(initial_params) != params_metadata:
1227
- raise ValueError("Parameter 'initial_params' must be a list with " + str(params_metadata) + " entries (Attaction: " + str(params_metadata_supply_locations) + ", Transport costs: " + str(params_metadata_customer_origins) + ")")
1227
+ raise ValueError("Error in huff_ml_fit: Parameter 'initial_params' must be a list with " + str(params_metadata) + " entries (Attaction: " + str(params_metadata_supply_locations) + ", Transport costs: " + str(params_metadata_customer_origins) + ")")
1228
1228
 
1229
1229
  if len(bounds) != len(initial_params):
1230
- raise ValueError("Parameter 'bounds' must have the same length as parameter 'initial_params' (" + str(len(bounds)) + ", " + str(len(initial_params)) + ")")
1230
+ raise ValueError("Error in huff_ml_fit: Parameter 'bounds' must have the same length as parameter 'initial_params' (" + str(len(bounds)) + ", " + str(len(initial_params)) + ")")
1231
1231
 
1232
1232
  ml_result = minimize(
1233
1233
  self.loglik,
@@ -1292,21 +1292,6 @@ class InteractionMatrix:
1292
1292
  print(f"Optimization via {method} algorithm succeeded with parameters: {', '.join(str(round(par, 3)) for par in param_results)}.")
1293
1293
 
1294
1294
  else:
1295
-
1296
- # param_gamma = None
1297
- # param_lambda = None
1298
-
1299
- # supply_locations_metadata["weighting"][0]["param"] = param_gamma
1300
-
1301
- # if customer_origins_metadata["weighting"][0]["func"] == "logistic":
1302
-
1303
- # param_lambda2 = None
1304
- # customer_origins_metadata["weighting"][0]["param"][0] = param_lambda
1305
- # customer_origins_metadata["weighting"][0]["param"][1] = param_lambda2
1306
-
1307
- # else:
1308
-
1309
- # customer_origins_metadata["weighting"][0]["param"] = param_lambda
1310
1295
 
1311
1296
  print(f"Optimiziation via {method} algorithm failed with error message: '{ml_result.message}'. See https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html for all available algorithms.")
1312
1297
 
@@ -1316,20 +1301,28 @@ class InteractionMatrix:
1316
1301
  if update_estimates:
1317
1302
 
1318
1303
  if "p_ij_emp" not in self.interaction_matrix_df.columns:
1304
+
1319
1305
  self.interaction_matrix_df["p_ij_emp"] = self.interaction_matrix_df["p_ij"]
1320
- print("Probabilties in interaction matrix are treated as empirical probabilties")
1306
+
1307
+ print("NOTE: Probabilities in interaction matrix are treated as empirical probabilities")
1308
+
1321
1309
  else:
1322
- print("Interaction matrix contains empirical probabilties")
1310
+
1311
+ print("NOTE: Interaction matrix contains empirical probabilities")
1323
1312
 
1324
1313
  if "E_ij_emp" not in self.interaction_matrix_df.columns:
1314
+
1325
1315
  self.interaction_matrix_df["E_ij_emp"] = self.interaction_matrix_df["E_ij"]
1326
- print("Customer interactions in interaction matrix are treated as empirical interactions")
1316
+
1317
+ print("NOTE: Customer interactions in interaction matrix are treated as empirical interactions")
1318
+
1327
1319
  else:
1328
- print("Interaction matrix contains empirical customer interactions")
1320
+
1321
+ print("NOTE: Interaction matrix contains empirical customer interactions")
1329
1322
 
1330
1323
  if np.isnan(ml_result.x).any():
1331
1324
 
1332
- print("No update of estimates because fit parameters contain NaN")
1325
+ print("WARNING: No update of estimates because fit parameters contain NaN")
1333
1326
 
1334
1327
  update_estimates = False
1335
1328
 
@@ -1351,7 +1344,7 @@ class InteractionMatrix:
1351
1344
  "update_estimates": update_estimates
1352
1345
  }
1353
1346
 
1354
- return self
1347
+ return self
1355
1348
 
1356
1349
  def update(self):
1357
1350
 
@@ -1367,12 +1360,12 @@ class InteractionMatrix:
1367
1360
  supply_locations_geodata_gpd_new = supply_locations_geodata_gpd[supply_locations_geodata_gpd["j_update"] == 1]
1368
1361
 
1369
1362
  if len(supply_locations_geodata_gpd_new) < 1:
1370
- raise ValueError("There are no new destinations for an interaction matrix update. Use SupplyLocations.add_new_destinations()")
1363
+ raise ValueError("Error in InteractionMatrix update: There are no new destinations for an interaction matrix update. Use SupplyLocations.add_new_destinations()")
1371
1364
 
1372
1365
  supply_locations_geodata_gpd_original = supply_locations.get_geodata_gpd_original().copy()
1373
1366
  supply_locations_geodata_gpd_original_new = supply_locations_geodata_gpd_original[supply_locations_geodata_gpd_original["j_update"] == 1]
1374
1367
  if len(supply_locations_geodata_gpd_original_new) < 1:
1375
- raise ValueError("There are no new destinations for an interaction matrix update. Use SupplyLocations.add_new_destinations()")
1368
+ raise ValueError("Error in InteractionMatrix update: There are no new destinations for an interaction matrix update. Use SupplyLocations.add_new_destinations()")
1376
1369
 
1377
1370
  supply_locations_new = SupplyLocations(
1378
1371
  geodata_gpd=supply_locations_geodata_gpd_new,
@@ -1391,7 +1384,7 @@ class InteractionMatrix:
1391
1384
 
1392
1385
  if "transport_costs" not in interaction_matrix_metadata:
1393
1386
 
1394
- print("New destination(s) included. No transport costs calculation because not defined in original interaction matrix.")
1387
+ print("WARNING: New destination(s) included. No transport costs calculation because not defined in original interaction matrix.")
1395
1388
 
1396
1389
  interaction_matrix_df = pd.concat(
1397
1390
  [
@@ -1465,7 +1458,7 @@ class MarketAreas:
1465
1458
  ):
1466
1459
 
1467
1460
  if not isinstance(model_object, (HuffModel, MCIModel, InteractionMatrix)):
1468
- raise ValueError("Parameter 'interaction_matrix' must be of class HuffModel, MCIModel, or InteractionMatrix")
1461
+ raise ValueError("Error while adding MarketAreas to model: Parameter 'interaction_matrix' must be of class HuffModel, MCIModel, or InteractionMatrix")
1469
1462
 
1470
1463
  if isinstance(model_object, MCIModel):
1471
1464
 
@@ -1486,7 +1479,7 @@ class MarketAreas:
1486
1479
  elif isinstance(model_object, InteractionMatrix):
1487
1480
 
1488
1481
  if output_model not in ["Huff", "MCI"]:
1489
- raise ValueError("Parameter 'output_model' must be either 'Huff' or 'MCI'")
1482
+ raise ValueError("Error while adding MarketAreas to model: Parameter 'output_model' must be either 'Huff' or 'MCI'")
1490
1483
 
1491
1484
  if output_model == "Huff":
1492
1485
 
@@ -1604,10 +1597,19 @@ class HuffModel:
1604
1597
  else:
1605
1598
 
1606
1599
  name = supply_locations_metadata["weighting"][key]["name"]
1607
- param = supply_locations_metadata["weighting"][key]["param"]
1608
1600
  func = supply_locations_metadata["weighting"][key]["func"]
1609
1601
 
1610
- print(f"{name[:16]:16} {round(param, 3)} ({func})")
1602
+ if "param" in supply_locations_metadata["weighting"][key]:
1603
+
1604
+ param = supply_locations_metadata["weighting"][key]["param"]
1605
+
1606
+ if param is not None:
1607
+
1608
+ print(f"{name[:16]:16} {round(param, 3)} ({func})")
1609
+
1610
+ else:
1611
+
1612
+ print(f"{attrac_vars[key][:16]:16} NA ({func})")
1611
1613
 
1612
1614
  print("----------------------------------")
1613
1615
 
@@ -1625,7 +1627,6 @@ class HuffModel:
1625
1627
  print ("Goodness-of-fit for " + interaction_matrix_metadata["fit"]["fit_by"])
1626
1628
 
1627
1629
  print("Sum of squared residuals ", round(huff_modelfit[1]["SQR"], 2))
1628
- print("Sum of squares ", round(huff_modelfit[1]["SQT"], 2))
1629
1630
  print("R-squared ", round(huff_modelfit[1]["Rsq"], 2))
1630
1631
  print("Mean squared error ", round(huff_modelfit[1]["MSE"], 2))
1631
1632
  print("Root mean squared error ", round(huff_modelfit[1]["RMSE"], 2))
@@ -1751,10 +1752,10 @@ class HuffModel:
1751
1752
  if isinstance(params, np.ndarray):
1752
1753
  params = params.tolist()
1753
1754
  else:
1754
- raise ValueError("Parameter 'params' must be a list or np.ndarray with at least 2 parameter values")
1755
+ raise ValueError("Error in loglik: Parameter 'params' must be a list or np.ndarray with at least 2 parameter values")
1755
1756
 
1756
1757
  if len(params) < 2:
1757
- raise ValueError("Parameter 'params' must be a list or np.ndarray with at least 2 parameter values")
1758
+ raise ValueError("Error in loglik: Parameter 'params' must be a list or np.ndarray with at least 2 parameter values")
1758
1759
 
1759
1760
  market_areas_df = self.market_areas_df
1760
1761
 
@@ -1766,7 +1767,7 @@ class HuffModel:
1766
1767
  if customer_origins_metadata["weighting"][0]["func"] == "logistic":
1767
1768
 
1768
1769
  if len(params) < 3:
1769
- raise ValueError("When using logistic weighting, parameter 'params' must be a list or np.ndarray with at least 3 parameter values")
1770
+ raise ValueError("Error in loglik: When using logistic weighting, parameter 'params' must be a list or np.ndarray with at least 3 parameter values")
1770
1771
 
1771
1772
  param_gamma, param_lambda, param_lambda2 = params[0], params[1], params[2]
1772
1773
 
@@ -1784,7 +1785,7 @@ class HuffModel:
1784
1785
 
1785
1786
  else:
1786
1787
 
1787
- raise ValueError ("Huff Model with transport cost weighting of type " + customer_origins_metadata["weighting"][0]["func"] + " must have >= 2 input parameters")
1788
+ raise ValueError ("Error in loglik: Huff Model with transport cost weighting of type " + customer_origins_metadata["weighting"][0]["func"] + " must have >= 2 input parameters")
1788
1789
 
1789
1790
  elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
1790
1791
 
@@ -1794,7 +1795,7 @@ class HuffModel:
1794
1795
 
1795
1796
  else:
1796
1797
 
1797
- raise ValueError("Huff Model with transport cost weightig of type " + customer_origins_metadata["weighting"][0]["func"] + " must have >= 3 input parameters")
1798
+ raise ValueError("Error in loglik: Huff Model with transport cost weightig of type " + customer_origins_metadata["weighting"][0]["func"] + " must have >= 3 input parameters")
1798
1799
 
1799
1800
  if (customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"] and len(params) > 2):
1800
1801
 
@@ -1853,7 +1854,8 @@ class HuffModel:
1853
1854
  bounds: list = [(0.5, 1), (-3, -1)],
1854
1855
  constraints: list = [],
1855
1856
  fit_by = "probabilities",
1856
- update_estimates: bool = True
1857
+ update_estimates: bool = True,
1858
+ check_numbers: bool = True
1857
1859
  ):
1858
1860
 
1859
1861
  if fit_by in ["probabilities", "flows"]:
@@ -1869,6 +1871,16 @@ class HuffModel:
1869
1871
 
1870
1872
  elif fit_by == "totals":
1871
1873
 
1874
+ if check_numbers:
1875
+
1876
+ market_areas_df = self.market_areas_df
1877
+ interaction_matrix_df = self.get_interaction_matrix_df()
1878
+ T_j_market_areas_df = sum(market_areas_df["T_j"])
1879
+ T_j_interaction_matrix_df = sum(interaction_matrix_df["E_ij"])
1880
+
1881
+ if T_j_market_areas_df != T_j_interaction_matrix_df:
1882
+ print("WARNING: Sum of total market areas (" + str(int(T_j_market_areas_df)) + ") is not equal to sum of customer flows (" + str(int(T_j_interaction_matrix_df)) + ")")
1883
+
1872
1884
  supply_locations = self.interaction_matrix.supply_locations
1873
1885
  supply_locations_metadata = supply_locations.get_metadata()
1874
1886
 
@@ -1894,10 +1906,10 @@ class HuffModel:
1894
1906
  params_metadata = params_metadata_customer_origins+params_metadata_supply_locations
1895
1907
 
1896
1908
  if len(initial_params) < 2 or len(initial_params) != params_metadata:
1897
- raise ValueError("Parameter 'initial_params' must be a list with " + str(params_metadata) + " entries (Attaction: " + str(params_metadata_supply_locations) + ", Transport costs: " + str(params_metadata_customer_origins) + ")")
1909
+ raise ValueError("Error in ml_fit: Parameter 'initial_params' must be a list with " + str(params_metadata) + " entries (Attaction: " + str(params_metadata_supply_locations) + ", Transport costs: " + str(params_metadata_customer_origins) + ")")
1898
1910
 
1899
1911
  if len(bounds) != len(initial_params):
1900
- raise ValueError("Parameter 'bounds' must have the same length as parameter 'initial_params' (" + str(len(bounds)) + ", " + str(len(initial_params)) + ")")
1912
+ raise ValueError("Error in ml_fit: Parameter 'bounds' must have the same length as parameter 'initial_params' (" + str(len(bounds)) + ", " + str(len(initial_params)) + ")")
1901
1913
 
1902
1914
  ml_result = minimize(
1903
1915
  self.loglik,
@@ -1986,12 +1998,13 @@ class HuffModel:
1986
1998
  update_estimates = False
1987
1999
 
1988
2000
  else:
1989
-
2001
+
1990
2002
  self.interaction_matrix.utility()
1991
2003
  self.interaction_matrix.probabilities()
1992
- self.interaction_matrix.flows()
2004
+ self.interaction_matrix.flows()
1993
2005
 
1994
- self.interaction_matrix.marketareas()
2006
+ huff_model_new_marketareas = self.interaction_matrix.marketareas()
2007
+ self.market_areas_df["T_j"] = huff_model_new_marketareas.get_market_areas_df()["T_j"]
1995
2008
 
1996
2009
  self.interaction_matrix.metadata["fit"] = {
1997
2010
  "function": "huff_ml_fit",
@@ -2007,9 +2020,116 @@ class HuffModel:
2007
2020
 
2008
2021
  else:
2009
2022
 
2010
- raise ValueError("Parameter 'fit_by' must be 'probabilities', 'flows' or 'totals'")
2023
+ raise ValueError("Error in ml_fit: Parameter 'fit_by' must be 'probabilities', 'flows' or 'totals'")
2011
2024
 
2012
2025
  return self
2026
+
2027
+ def confint(
2028
+ self,
2029
+ alpha = 0.05,
2030
+ repeats = 3,
2031
+ sample_size = 0.75,
2032
+ replace = True
2033
+ ):
2034
+
2035
+ if self.interaction_matrix.metadata["fit"] is None or self.interaction_matrix.metadata["fit"] == {}:
2036
+ raise ValueError("Error while estimating confidence intervals: Model object does not contain information towards fit procedure")
2037
+
2038
+ keys_necessary = [
2039
+ "function",
2040
+ "fit_by",
2041
+ "initial_params",
2042
+ "method",
2043
+ "bounds",
2044
+ "constraints"
2045
+ ]
2046
+
2047
+ for key_necessary in keys_necessary:
2048
+ if key_necessary not in self.interaction_matrix.metadata["fit"]:
2049
+ raise KeyError(f"Error while estimating confidence intervals: Model object does not contain full information towards fit procedure. Missing key {key_necessary}")
2050
+
2051
+ fitted_params_repeats = []
2052
+
2053
+ alpha_lower = alpha/2
2054
+ alpha_upper = 1-alpha/2
2055
+
2056
+ huff_model_copy = copy.deepcopy(self)
2057
+
2058
+ if self.interaction_matrix.metadata["fit"]["fit_by"] in ["probabilities", "flows"]:
2059
+
2060
+ for i in range(repeats):
2061
+
2062
+ try:
2063
+
2064
+ n_samples = int(len(huff_model_copy.interaction_matrix.interaction_matrix_df)*sample_size)
2065
+
2066
+ huff_model_copy.interaction_matrix.interaction_matrix_df = huff_model_copy.interaction_matrix.interaction_matrix_df.sample(
2067
+ n = n_samples,
2068
+ replace = replace
2069
+ )
2070
+
2071
+ huff_model_copy.ml_fit(
2072
+ initial_params = self.interaction_matrix.metadata["fit"]["initial_params"],
2073
+ method = self.interaction_matrix.metadata["fit"]["method"],
2074
+ bounds = self.interaction_matrix.metadata["fit"]["bounds"],
2075
+ constraints = self.interaction_matrix.metadata["fit"]["constraints"],
2076
+ fit_by = self.interaction_matrix.metadata["fit"]["fit_by"],
2077
+ update_estimates = True,
2078
+ check_numbers = True
2079
+ )
2080
+
2081
+ minimize_fittedparams = huff_model_copy.interaction_matrix.metadata["fit"]["minimize_fittedparams"]
2082
+
2083
+ fitted_params_repeats.append(minimize_fittedparams)
2084
+
2085
+ except Exception as err:
2086
+
2087
+ print (f"Error in repeat {str(i)}: {err}")
2088
+
2089
+ elif self.metadata["fit"]["fit_by"] == "totals":
2090
+
2091
+ for i in range(repeats):
2092
+
2093
+ n_samples = int(len(huff_model_copy.market_areas_df)*sample_size)
2094
+
2095
+ huff_model_copy.market_areas_df = huff_model_copy.market_areas_df.sample(
2096
+ n = n_samples,
2097
+ replace = replace
2098
+ )
2099
+
2100
+ huff_model_copy.interaction_matrix.interaction_matrix_df = huff_model_copy.interaction_matrix.interaction_matrix_df[
2101
+ huff_model_copy.interaction_matrix.interaction_matrix_df["j"].isin(huff_model_copy.market_areas_df["j"])
2102
+ ]
2103
+
2104
+ huff_model_copy.ml_fit(
2105
+ initial_params = self.interaction_matrix.metadata["fit"]["initial_params"],
2106
+ method = self.interaction_matrix.metadata["fit"]["method"],
2107
+ bounds = self.interaction_matrix.metadata["fit"]["bounds"],
2108
+ constraints = self.interaction_matrix.metadata["fit"]["constraints"],
2109
+ fit_by = self.interaction_matrix.metadata["fit"]["fit_by"],
2110
+ update_estimates = True,
2111
+ check_numbers = True
2112
+ )
2113
+
2114
+ minimize_fittedparams = huff_model_copy.interaction_matrix.metadata["fit"]["minimize_fittedparams"]
2115
+
2116
+ fitted_params_repeats.append(minimize_fittedparams)
2117
+
2118
+ else:
2119
+
2120
+ raise ValueError("Error while estimating confidence intervals: Parameter 'fit_by' must be 'probabilities', 'flows' or 'totals'")
2121
+
2122
+ fitted_params_repeats_array = np.array(fitted_params_repeats)
2123
+ fitted_params_repeats_array_transposed = fitted_params_repeats_array.T
2124
+
2125
+ param_ci = pd.DataFrame(columns=["lower", "upper"])
2126
+
2127
+ for i, col in enumerate(fitted_params_repeats_array_transposed):
2128
+
2129
+ param_ci.loc[i, "lower"] = np.quantile(col, alpha_lower)
2130
+ param_ci.loc[i, "upper"] = np.quantile(col, alpha_upper)
2131
+
2132
+ return param_ci
2013
2133
 
2014
2134
  def update(self):
2015
2135
 
@@ -2042,12 +2162,12 @@ class HuffModel:
2042
2162
 
2043
2163
  except:
2044
2164
 
2045
- print("Goodness-of-fit metrics could not be calculated due to NaN values.")
2165
+ print("WARNING: Goodness-of-fit metrics could not be calculated due to NaN values.")
2046
2166
  return None
2047
2167
 
2048
2168
  else:
2049
2169
 
2050
- print("Goodness-of-fit metrics could not be calculated. No empirical values of probabilities in interaction matrix.")
2170
+ print("WARNING: Goodness-of-fit metrics could not be calculated. No empirical values of probabilities in interaction matrix.")
2051
2171
 
2052
2172
  return None
2053
2173
 
@@ -2069,12 +2189,12 @@ class HuffModel:
2069
2189
 
2070
2190
  except:
2071
2191
 
2072
- print("Goodness-of-fit metrics could not be calculated due to NaN values.")
2192
+ print("WARNING: Goodness-of-fit metrics could not be calculated due to NaN values.")
2073
2193
  return None
2074
2194
 
2075
2195
  else:
2076
2196
 
2077
- print("Goodness-of-fit metrics could not be calculated. No empirical values of customer flows in interaction matrix.")
2197
+ print("WARNING: Goodness-of-fit metrics could not be calculated. No empirical values of customer flows in interaction matrix.")
2078
2198
 
2079
2199
  return None
2080
2200
 
@@ -2095,18 +2215,18 @@ class HuffModel:
2095
2215
 
2096
2216
  except:
2097
2217
 
2098
- print("Goodness-of-fit metrics could not be calculated due to NaN values.")
2218
+ print("WARNING: Goodness-of-fit metrics could not be calculated due to NaN values.")
2099
2219
  return None
2100
2220
 
2101
2221
  else:
2102
2222
 
2103
- print("Goodness-of-fit metrics could not be calculated. No empirical values of T_j in market areas data.")
2223
+ print("WARNING: Goodness-of-fit metrics could not be calculated. No empirical values of T_j in market areas data.")
2104
2224
 
2105
2225
  return None
2106
2226
 
2107
2227
  else:
2108
2228
 
2109
- raise ValueError("Parameter 'by' must be 'probabilities', 'flows', or 'totals'")
2229
+ raise ValueError("Error in HuffModel.modelfit: Parameter 'by' must be 'probabilities', 'flows', or 'totals'")
2110
2230
 
2111
2231
  class MCIModel:
2112
2232
 
@@ -2174,7 +2294,7 @@ class MCIModel:
2174
2294
 
2175
2295
  except:
2176
2296
 
2177
- print("Goodness-of-fit metrics could not be calculated due to NaN values.")
2297
+ print("WARNING: Goodness-of-fit metrics could not be calculated due to NaN values.")
2178
2298
  return None
2179
2299
 
2180
2300
  else:
@@ -2224,7 +2344,6 @@ class MCIModel:
2224
2344
  print ("Goodness-of-fit for probabilities")
2225
2345
 
2226
2346
  print("Sum of squared residuals ", round(mci_modelfit[1]["SQR"], 2))
2227
- print("Sum of squares ", round(mci_modelfit[1]["SQT"], 2))
2228
2347
  print("R-squared ", round(mci_modelfit[1]["Rsq"], 2))
2229
2348
  print("Mean squared error ", round(mci_modelfit[1]["MSE"], 2))
2230
2349
  print("Root mean squared error ", round(mci_modelfit[1]["RMSE"], 2))
@@ -2261,9 +2380,9 @@ class MCIModel:
2261
2380
  interaction_matrix_metadata = interaction_matrix.get_metadata()
2262
2381
 
2263
2382
  if interaction_matrix_df["t_ij"].isna().all():
2264
- raise ValueError ("Transport cost variable is not defined")
2383
+ raise ValueError ("Error in utility calculation: Transport cost variable is not defined")
2265
2384
  if interaction_matrix_df["A_j"].isna().all():
2266
- raise ValueError ("Attraction variable is not defined")
2385
+ raise ValueError ("Error in utility calculation: Attraction variable is not defined")
2267
2386
 
2268
2387
  check_vars(
2269
2388
  df = interaction_matrix_df,
@@ -2364,10 +2483,10 @@ class MCIModel:
2364
2483
  interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
2365
2484
 
2366
2485
  if "C_i" not in interaction_matrix_df.columns:
2367
- raise ValueError ("No market size column defined in interaction matrix.")
2486
+ raise ValueError ("Error in flows calculation: No market size column defined in interaction matrix.")
2368
2487
 
2369
2488
  if interaction_matrix_df["C_i"].isna().all():
2370
- raise ValueError ("Market size column in customer origins not defined. Use CustomerOrigins.define_marketsize()")
2489
+ raise ValueError ("Error in flows calculation: Market size column in customer origins not defined. Use CustomerOrigins.define_marketsize()")
2371
2490
 
2372
2491
  check_vars(
2373
2492
  df = interaction_matrix_df,
@@ -2422,12 +2541,12 @@ def load_geodata (
2422
2541
  ):
2423
2542
 
2424
2543
  if location_type is None or (location_type != "origins" and location_type != "destinations"):
2425
- raise ValueError ("Argument location_type must be either 'origins' or 'destinations'")
2544
+ raise ValueError ("Error while loading geodata: Argument location_type must be either 'origins' or 'destinations'")
2426
2545
 
2427
2546
  if isinstance(data, gp.GeoDataFrame):
2428
2547
  geodata_gpd_original = data
2429
2548
  if not all(geodata_gpd_original.geometry.geom_type == "Point"):
2430
- raise ValueError ("Input geopandas.GeoDataFrame must be of type 'Point'")
2549
+ raise ValueError ("Error while loading geodata: Input geopandas.GeoDataFrame must be of type 'Point'")
2431
2550
  crs_input = geodata_gpd_original.crs
2432
2551
  elif isinstance(data, pd.DataFrame):
2433
2552
  geodata_tab = data
@@ -2435,13 +2554,13 @@ def load_geodata (
2435
2554
  if data_type == "shp":
2436
2555
  geodata_gpd_original = gp.read_file(data)
2437
2556
  if not all(geodata_gpd_original.geometry.geom_type == "Point"):
2438
- raise ValueError ("Input shapefile must be of type 'Point'")
2557
+ raise ValueError ("Error while loading geodata: Input shapefile must be of type 'Point'")
2439
2558
  crs_input = geodata_gpd_original.crs
2440
2559
  elif data_type == "csv" or data_type == "xlsx":
2441
2560
  if x_col is None:
2442
- raise ValueError ("Missing value for X coordinate column")
2561
+ raise ValueError ("Error while loading geodata: Missing value for X coordinate column")
2443
2562
  if y_col is None:
2444
- raise ValueError ("Missing value for Y coordinate column")
2563
+ raise ValueError ("Error while loading geodata: Missing value for Y coordinate column")
2445
2564
  elif data_type == "csv":
2446
2565
  geodata_tab = pd.read_csv(
2447
2566
  data,
@@ -2452,9 +2571,9 @@ def load_geodata (
2452
2571
  elif data_type == "xlsx":
2453
2572
  geodata_tab = pd.read_excel(data)
2454
2573
  else:
2455
- raise TypeError("Unknown type of data")
2574
+ raise TypeError("Error while loading geodata: Unknown type of data")
2456
2575
  else:
2457
- raise TypeError("data must be pandas.DataFrame, geopandas.GeoDataFrame or file (.csv, .xlsx, .shp)")
2576
+ raise TypeError("Error while loading geodata: Param 'data' must be pandas.DataFrame, geopandas.GeoDataFrame or file (.csv, .xlsx, .shp)")
2458
2577
 
2459
2578
  if data_type == "csv" or data_type == "xlsx" or (isinstance(data, pd.DataFrame) and not isinstance(data, gp.GeoDataFrame)):
2460
2579
 
@@ -2525,17 +2644,17 @@ def create_interaction_matrix(
2525
2644
  ):
2526
2645
 
2527
2646
  if not isinstance(customer_origins, CustomerOrigins):
2528
- raise ValueError ("customer_origins must be of class CustomerOrigins")
2647
+ raise ValueError ("Error while creating interaction matrix: customer_origins must be of class CustomerOrigins")
2529
2648
  if not isinstance(supply_locations, SupplyLocations):
2530
- raise ValueError ("supply_locations must be of class SupplyLocations")
2649
+ raise ValueError ("Error while creating interaction matrix: supply_locations must be of class SupplyLocations")
2531
2650
 
2532
2651
  customer_origins_metadata = customer_origins.get_metadata()
2533
2652
  if customer_origins_metadata["marketsize_col"] is None:
2534
- raise ValueError("Market size column in customer origins not defined. Use CustomerOrigins.define_marketsize()")
2653
+ raise ValueError("Error while creating interaction matrix: Market size column in customer origins not defined. Use CustomerOrigins.define_marketsize()")
2535
2654
 
2536
2655
  supply_locations_metadata = supply_locations.get_metadata()
2537
2656
  if supply_locations_metadata["attraction_col"][0] is None:
2538
- raise ValueError("Attraction column in supply locations not defined. Use SupplyLocations.define_attraction()")
2657
+ raise ValueError("Error while creating interaction matrix: Attraction column in supply locations not defined. Use SupplyLocations.define_attraction()")
2539
2658
 
2540
2659
  customer_origins_unique_id = customer_origins_metadata["unique_id"]
2541
2660
  customer_origins_marketsize = customer_origins_metadata["marketsize_col"]
@@ -2623,7 +2742,7 @@ def load_interaction_matrix(
2623
2742
  interaction_matrix_df = data
2624
2743
  elif isinstance(data, str):
2625
2744
  if data_type not in ["csv", "xlsx"]:
2626
- raise ValueError ("data_type must be 'csv' or 'xlsx'")
2745
+ raise ValueError ("Error while loading interaction matrix: param 'data_type' must be 'csv' or 'xlsx'")
2627
2746
  if data_type == "csv":
2628
2747
  interaction_matrix_df = pd.read_csv(
2629
2748
  data,
@@ -2640,14 +2759,14 @@ def load_interaction_matrix(
2640
2759
  else:
2641
2760
  interaction_matrix_df = pd.read_excel(data)
2642
2761
  else:
2643
- raise TypeError("Unknown type of data")
2762
+ raise TypeError("Error while loading interaction matrix: Unknown type of data")
2644
2763
  else:
2645
- raise TypeError("data must be pandas.DataFrame or file (.csv, .xlsx)")
2764
+ raise TypeError("Error while loading interaction matrix: param 'data' must be pandas.DataFrame or file (.csv, .xlsx)")
2646
2765
 
2647
2766
  if customer_origins_col not in interaction_matrix_df.columns:
2648
- raise KeyError ("Column " + customer_origins_col + " not in data")
2767
+ raise KeyError ("Error while loading interaction matrix: Column " + customer_origins_col + " not in data")
2649
2768
  if supply_locations_col not in interaction_matrix_df.columns:
2650
- raise KeyError ("Column " + supply_locations_col + " not in data")
2769
+ raise KeyError ("Error while loading interaction matrix: Column " + supply_locations_col + " not in data")
2651
2770
 
2652
2771
  cols_check = attraction_col + [transport_costs_col]
2653
2772
  if flows_col is not None:
@@ -2668,7 +2787,7 @@ def load_interaction_matrix(
2668
2787
  if isinstance(customer_origins_coords_col, str):
2669
2788
 
2670
2789
  if customer_origins_coords_col not in interaction_matrix_df.columns:
2671
- raise KeyError ("Column " + customer_origins_coords_col + " not in data.")
2790
+ raise KeyError ("Error while loading interaction matrix: Column " + customer_origins_coords_col + " not in data.")
2672
2791
 
2673
2792
  customer_origins_geodata_tab = interaction_matrix_df[[customer_origins_col, customer_origins_coords_col]]
2674
2793
  customer_origins_geodata_tab = customer_origins_geodata_tab.drop_duplicates()
@@ -2684,7 +2803,7 @@ def load_interaction_matrix(
2684
2803
  elif isinstance(customer_origins_coords_col, list):
2685
2804
 
2686
2805
  if len(customer_origins_coords_col) != 2:
2687
- raise ValueError ("Column " + customer_origins_coords_col + " must be a geometry column OR TWO columns with X and Y")
2806
+ raise ValueError ("Error while loading interaction matrix: Column " + customer_origins_coords_col + " must be a geometry column OR TWO columns with X and Y")
2688
2807
 
2689
2808
  check_vars (
2690
2809
  df = interaction_matrix_df,
@@ -2739,7 +2858,7 @@ def load_interaction_matrix(
2739
2858
  if isinstance(supply_locations_coords_col, str):
2740
2859
 
2741
2860
  if supply_locations_coords_col not in interaction_matrix_df.columns:
2742
- raise KeyError ("Column " + supply_locations_coords_col + " not in data.")
2861
+ raise KeyError ("Error while loading interaction matrix: Column " + supply_locations_coords_col + " not in data.")
2743
2862
 
2744
2863
  supply_locations_geodata_tab = interaction_matrix_df[[supply_locations_col, supply_locations_coords_col]]
2745
2864
  supply_locations_geodata_tab = supply_locations_geodata_tab.drop_duplicates()
@@ -2755,7 +2874,7 @@ def load_interaction_matrix(
2755
2874
  if isinstance(supply_locations_coords_col, list):
2756
2875
 
2757
2876
  if len(supply_locations_coords_col) != 2:
2758
- raise ValueError ("Column " + supply_locations_coords_col + " must be a geometry column OR TWO columns with X and Y")
2877
+ raise ValueError ("Error while loading interaction matrix: Column " + supply_locations_coords_col + " must be a geometry column OR TWO columns with X and Y")
2759
2878
 
2760
2879
  check_vars (
2761
2880
  df = interaction_matrix_df,
@@ -2864,7 +2983,7 @@ def load_marketareas(
2864
2983
  market_areas_df = data
2865
2984
  elif isinstance(data, str):
2866
2985
  if data_type not in ["csv", "xlsx"]:
2867
- raise ValueError ("data_type must be 'csv' or 'xlsx'")
2986
+ raise ValueError ("Error while loading market areas: data_type must be 'csv' or 'xlsx'")
2868
2987
  if data_type == "csv":
2869
2988
  market_areas_df = pd.read_csv(
2870
2989
  data,
@@ -2881,14 +3000,14 @@ def load_marketareas(
2881
3000
  else:
2882
3001
  market_areas_df = pd.read_excel(data)
2883
3002
  else:
2884
- raise TypeError("Unknown type of data")
3003
+ raise TypeError("Error while loading market areas: Unknown type of data")
2885
3004
  else:
2886
- raise TypeError("data must be pandas.DataFrame or file (.csv, .xlsx)")
3005
+ raise TypeError("Error while loading market areas: data must be pandas.DataFrame or file (.csv, .xlsx)")
2887
3006
 
2888
3007
  if supply_locations_col not in market_areas_df.columns:
2889
- raise KeyError ("Column " + supply_locations_col + " not in data")
3008
+ raise KeyError ("Error while loading market areas: Column " + supply_locations_col + " not in data")
2890
3009
  if total_col not in market_areas_df.columns:
2891
- raise KeyError ("Column " + supply_locations_col + " not in data")
3010
+ raise KeyError ("Error while loading market areas: Column " + supply_locations_col + " not in data")
2892
3011
 
2893
3012
  if check_df_vars:
2894
3013
  check_vars(
@@ -2931,7 +3050,7 @@ def market_shares(
2931
3050
  if ref_col is not None:
2932
3051
 
2933
3052
  if ref_col not in df.columns:
2934
- raise KeyError(f"Column '{ref_col}' not in dataframe.")
3053
+ raise KeyError(f"Error while calculating market shares: Column '{ref_col}' not in dataframe.")
2935
3054
 
2936
3055
  ms_refcol = pd.DataFrame(df.groupby(ref_col)[turnover_col].sum())
2937
3056
  ms_refcol = ms_refcol.rename(columns = {turnover_col: "total"})
@@ -2979,7 +3098,7 @@ def log_centering_transformation(
2979
3098
  )
2980
3099
 
2981
3100
  if ref_col not in df.columns:
2982
- raise KeyError(f"Column '{ref_col}' not in dataframe.")
3101
+ raise KeyError(f"Error in log-centering transformation: Column '{ref_col}' not in dataframe.")
2983
3102
 
2984
3103
  def lct (x):
2985
3104
 
@@ -3107,17 +3226,20 @@ def modelfit(
3107
3226
  expected_no = len(expected)
3108
3227
 
3109
3228
  if not observed_no == expected_no:
3110
- raise ValueError("Observed and expected differ in length")
3229
+ raise ValueError("Error while calculating fit metrics: Observed and expected differ in length")
3111
3230
 
3112
3231
  if not isinstance(observed, np.number):
3113
3232
  if not is_numeric_dtype(observed):
3114
- raise ValueError("Observed column is not numeric")
3233
+ raise ValueError("Error while calculating fit metrics: Observed column is not numeric")
3115
3234
  if not isinstance(expected, np.number):
3116
3235
  if not is_numeric_dtype(expected):
3117
- raise ValueError("Expected column is not numeric")
3236
+ raise ValueError("Error while calculating fit metrics: Expected column is not numeric")
3118
3237
 
3119
3238
  if remove_nan:
3120
3239
 
3240
+ observed = observed.reset_index(drop=True)
3241
+ expected = expected.reset_index(drop=True)
3242
+
3121
3243
  obs_exp = pd.DataFrame(
3122
3244
  {
3123
3245
  "observed": observed,
@@ -3129,7 +3251,7 @@ def modelfit(
3129
3251
 
3130
3252
  if len(obs_exp_clean) < len(observed) or len(obs_exp_clean) < len(expected):
3131
3253
  if verbose:
3132
- print("Vectors 'observed' and/or 'expected' contain zeros which are dropped.")
3254
+ print("NOTE: Vectors 'observed' and/or 'expected' contain zeros which are dropped.")
3133
3255
 
3134
3256
  observed = obs_exp_clean["observed"].to_numpy()
3135
3257
  expected = obs_exp_clean["expected"].to_numpy()
@@ -3137,9 +3259,9 @@ def modelfit(
3137
3259
  else:
3138
3260
 
3139
3261
  if np.isnan(observed).any():
3140
- raise ValueError("Vector with observed data contains NaN")
3262
+ raise ValueError("Error while calculating fit metrics: Vector with observed data contains NaN and 'remove_nan' is False")
3141
3263
  if np.isnan(expected).any():
3142
- raise ValueError("Vector with expected data contains NaN")
3264
+ raise ValueError("Error while calculating fit metrics: Vector with expected data contains NaN and 'remove_nan' is False")
3143
3265
 
3144
3266
  residuals = np.array(observed)-np.array(expected)
3145
3267
  residuals_sq = residuals**2
Binary file
huff/tests/tests_huff.py CHANGED
@@ -4,12 +4,13 @@
4
4
  # Author: Thomas Wieland
5
5
  # ORCID: 0000-0001-5168-9846
6
6
  # mail: geowieland@googlemail.com
7
- # Version: 1.5.1
8
- # Last update: 2025-07-01 17:10
7
+ # Version: 1.5.3
8
+ # Last update: 2025-07-15 17:22
9
9
  # Copyright (c) 2025 Thomas Wieland
10
10
  #-----------------------------------------------------------------------
11
11
 
12
12
  from huff.models import create_interaction_matrix, get_isochrones, load_geodata, load_interaction_matrix, load_marketareas, market_shares, modelfit
13
+ from huff.models import HuffModel
13
14
  from huff.osm import map_with_basemap
14
15
  from huff.gistools import buffers, point_spatial_join
15
16
 
@@ -140,6 +141,10 @@ huff_model_fit = haslach_interactionmatrix.marketareas()
140
141
  # Calculcation of total market areas
141
142
  # Result of class HuffModel
142
143
 
144
+ bootstrap_cis = huff_model_fit.confint(repeats=10)
145
+ print(bootstrap_cis)
146
+ # Confidence intervals for estimated parameters
147
+
143
148
  huff_model_fit.summary()
144
149
  # Huff model summary
145
150
 
@@ -354,6 +359,9 @@ huff_model_fit3 = huff_model_fit2.ml_fit(
354
359
  huff_model_fit3.summary()
355
360
  # Show summary
356
361
 
362
+ print(huff_model_fit3.get_market_areas_df())
363
+ # Show market areas df
364
+
357
365
 
358
366
  # Buffer analysis:
359
367
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: huff
3
- Version: 1.5.1
3
+ Version: 1.5.3
4
4
  Summary: huff: Huff Model Market Area Analysis
5
5
  Author: Thomas Wieland
6
6
  Author-email: geowieland@googlemail.com
@@ -18,7 +18,7 @@ Requires-Dist: openpyxl
18
18
 
19
19
  # huff: Huff Model Market Area Analysis
20
20
 
21
- This Python library is designed for performing market area analyses with the Huff Model (Huff 1962, 1964) and/or the Multiplicative Competitive Interaction (MCI) Model (Nakanishi and Cooper 1974, 1982). Users may load point shapefiles (or CSV, XLSX) of customer origins and supply locations and conduct a market area analysis step by step. The package also includes supplementary GIS functions, including clients for OpenRouteService(1) for network analysis (e.g., transport cost matrix) and OpenStreetMap(2) for simple maps. See Huff and McCallum (2008) or Wieland (2017) for a description of the models and their practical application.
21
+ This Python library is designed for performing market area analyses with the Huff Model (Huff 1962, 1964) and/or the Multiplicative Competitive Interaction (MCI) Model (Nakanishi and Cooper 1974, 1982). Users may load point shapefiles (or CSV, XLSX) of customer origins and supply locations and conduct a market area analysis step by step. The library supports parameter estimation based on empirical customer data using the MCI model and Maximum Likelihood. The package also includes supplementary GIS functions, including clients for OpenRouteService(1) for network analysis (e.g., transport cost matrix) and OpenStreetMap(2) for simple maps. See Huff and McCallum (2008) or Wieland (2017) for a description of the models and their practical application.
22
22
 
23
23
 
24
24
  ## Author
@@ -28,14 +28,12 @@ Thomas Wieland [ORCID](https://orcid.org/0000-0001-5168-9846) [EMail](mailto:geo
28
28
  See the /tests directory for usage examples of most of the included functions.
29
29
 
30
30
 
31
- ## Updates v1.5.1
32
- - Extensions:
33
- - HuffModel.ml_fit(): Fit Huff model parameters by empirical total market areas
34
- - HuffModel.summary(): Goodnes-of-fit depends on fit_by
35
- - load_market_areas(): Loading table with totals and including into InteractionMatrix, HuffModel and MCIModel objects
31
+ ## Updates v1.5.3
36
32
  - Bugfixes:
37
- - InteractionMatrix.summary(): NoneType parameter
38
- - InteractionMatrix.huff_ml_fit(): Update estimates possible even if fit algorithm did not converge
33
+ - InteractionMatrix.summary() and HuffModel.summary(): No KeyError when param is None anymore
34
+ - Extensions:
35
+ - Confidence intervals (bootstrap) for Huff Model ML estimations
36
+ - ValueError and KeyError are more precise (function is included)
39
37
 
40
38
 
41
39
  ## Features
@@ -44,7 +42,7 @@ See the /tests directory for usage examples of most of the included functions.
44
42
  - Defining origins and destinations with weightings
45
43
  - Creating interaction matrix from origins and destinations
46
44
  - Different function types: power, exponential, logistic
47
- - Huff model parameter estimation via Maximum Likelihood (ML) by probalities and customer flows
45
+ - Huff model parameter estimation via Maximum Likelihood (ML) by probalities, customer flows, and total market areas
48
46
  - Huff model market simulation
49
47
  - **Multiplicative Competitive Interaction Model**:
50
48
  - Log-centering transformation of interaction matrix
@@ -1,10 +1,10 @@
1
1
  huff/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  huff/gistools.py,sha256=fgeE1IsUO7UIaawb23kuiz_Rlxn7T18iLLTA5yvgp74,7038
3
- huff/models.py,sha256=IihaZmutJjdKqzTVXGVmcYbauFJImF-UPsZ2QCPzw8s,125182
3
+ huff/models.py,sha256=mPASlL0YA8x-cnhoRgrpr1sP-p5gGg1_cwM-QGf8GfU,133310
4
4
  huff/ors.py,sha256=JlO2UEishQX87PIiktksOrVT5QdB-GEWgjXcxoR_KuA,11929
5
5
  huff/osm.py,sha256=9A-7hxeZyjA2r8w2_IqqwH14qq2Y9AS1GxVKOD7utqs,7747
6
6
  huff/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- huff/tests/tests_huff.py,sha256=eHnEiV8m7TjpAXnJqo1aZ0YlQCGHxK1iFXROEyhN5cU,12884
7
+ huff/tests/tests_huff.py,sha256=FMnkSs7id4KcJei71DRLNPbY7cvOVjALwYzcBKCm1Ao,13116
8
8
  huff/tests/data/Haslach.cpg,sha256=OtMDH1UDpEBK-CUmLugjLMBNTqZoPULF3QovKiesmCQ,5
9
9
  huff/tests/data/Haslach.dbf,sha256=GVPIt05OzDO7UrRDcsMhiYWvyXAPg6Z-qkiysFzj-fc,506
10
10
  huff/tests/data/Haslach.prj,sha256=2Jy1Vlzh7UxQ1MXpZ9UYLs2SxfrObj2xkEkZyLqmGTY,437
@@ -23,8 +23,8 @@ huff/tests/data/Haslach_supermarkets.prj,sha256=2Jy1Vlzh7UxQ1MXpZ9UYLs2SxfrObj2x
23
23
  huff/tests/data/Haslach_supermarkets.qmd,sha256=JlcOYzG4vI1NH1IuOpxwIPnJsCyC-pDRAI00TzEvNf0,2522
24
24
  huff/tests/data/Haslach_supermarkets.shp,sha256=X7QbQ0BTMag_B-bDRbpr-go2BQIXo3Y8zMAKpYZmlps,324
25
25
  huff/tests/data/Haslach_supermarkets.shx,sha256=j23QHX-SmdAeN04rw0x8nUOran-OCg_T6r_LvzzEPWs,164
26
- huff/tests/data/Wieland2015.xlsx,sha256=jUt9YcRrYL99AjxzXKMXD3o5erjd9r_jYfnALdrTQ3o,24333
27
- huff-1.5.1.dist-info/METADATA,sha256=BXRKyUp5qIEoYjNpo_w9zpUwMTDTO_aegNgr67qj8ns,6187
28
- huff-1.5.1.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
29
- huff-1.5.1.dist-info/top_level.txt,sha256=nlzX-PxZNFmIxANIJMySuIFPihd6qOBkRlhIC28NEsQ,5
30
- huff-1.5.1.dist-info/RECORD,,
26
+ huff/tests/data/Wieland2015.xlsx,sha256=H4rxCFlctn44-O6mIyeFf67FlgvznLX7xZqpoWYS41A,25788
27
+ huff-1.5.3.dist-info/METADATA,sha256=vqlH9tlDz5pHu85fYu1Bnprk9yiWzAPRY94rQTQxFGM,6142
28
+ huff-1.5.3.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
29
+ huff-1.5.3.dist-info/top_level.txt,sha256=nlzX-PxZNFmIxANIJMySuIFPihd6qOBkRlhIC28NEsQ,5
30
+ huff-1.5.3.dist-info/RECORD,,
File without changes