huff 1.3.5__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
huff/models.py CHANGED
@@ -4,8 +4,8 @@
4
4
  # Author: Thomas Wieland
5
5
  # ORCID: 0000-0001-5168-9846
6
6
  # mail: geowieland@googlemail.com
7
- # Version: 1.3.5
8
- # Last update: 2025-06-03 17:23
7
+ # Version: 1.4.1
8
+ # Last update: 2025-06-16 17:43
9
9
  # Copyright (c) 2025 Thomas Wieland
10
10
  #-----------------------------------------------------------------------
11
11
 
@@ -17,8 +17,10 @@ from math import sqrt
17
17
  import time
18
18
  from pandas.api.types import is_numeric_dtype
19
19
  from statsmodels.formula.api import ols
20
+ from scipy.optimize import minimize, Bounds, LinearConstraint, NonlinearConstraint
20
21
  from shapely.geometry import Point
21
22
  from shapely import wkt
23
+ import copy
22
24
  from huff.ors import Client, TimeDistanceMatrix, Isochrone
23
25
  from huff.gistools import overlay_difference, distance_matrix, buffers
24
26
 
@@ -73,10 +75,12 @@ class CustomerOrigins:
73
75
  print("Market size column " + metadata["marketsize_col"])
74
76
 
75
77
  if metadata["weighting"][0]["func"] is None and metadata["weighting"][0]["param"] is None:
76
- print("Transport cost weighting not defined")
77
- else:
78
- print("Transport cost weighting " + metadata["weighting"][0]["func"] + " with lambda = " + str(metadata["weighting"][0]["param"]))
79
-
78
+ print("Transport cost weighting not defined")
79
+ elif metadata["weighting"][0]["func"] in ["power", "exponential"]:
80
+ print("Transport cost weighting " + str(round(metadata["weighting"][0]["param"],3)) + " (" + metadata["weighting"][0]["func"] + ")")
81
+ elif metadata["weighting"][0]["func"] == "logistic":
82
+ print("Transport cost weighting " + str(round(metadata["weighting"][0]["param"][0],3)) + ", " + str(round(metadata["weighting"][0]["param"][1],3)) + " (" + metadata["weighting"][0]["func"] + ")")
83
+
80
84
  print("Unique ID column " + metadata["unique_id"])
81
85
  print("Input CRS " + str(metadata["crs_input"]))
82
86
 
@@ -117,11 +121,24 @@ class CustomerOrigins:
117
121
 
118
122
  metadata = self.metadata
119
123
 
124
+ if func not in ["power", "exponential", "logistic"]:
125
+ raise ValueError("Parameter 'func' must be 'power', 'exponential' or 'logistic'")
126
+
127
+ if isinstance(param_lambda, list) and func != "logistic":
128
+ raise ValueError("Function type "+ func + " requires one single parameter value")
129
+
130
+ if isinstance(param_lambda, (int, float)) and func == "logistic":
131
+ raise ValueError("Function type "+ func + " requires two parameters in a list")
132
+
120
133
  metadata["weighting"][0]["func"] = func
121
- metadata["weighting"][0]["param"] = param_lambda
122
134
 
123
- self.metadata = metadata
135
+ if isinstance(param_lambda, list):
136
+ metadata["weighting"][0]["param"] = [float(param_lambda[0]), float(param_lambda[1])]
137
+ else:
138
+ metadata["weighting"][0]["param"] = float(param_lambda)
124
139
 
140
+ self.metadata = metadata
141
+
125
142
  return self
126
143
 
127
144
  def isochrones(
@@ -190,7 +207,6 @@ class CustomerOrigins:
190
207
 
191
208
  return self
192
209
 
193
-
194
210
  class SupplyLocations:
195
211
 
196
212
  def __init__(
@@ -243,7 +259,7 @@ class SupplyLocations:
243
259
  if metadata["weighting"][0]["func"] is None and metadata["weighting"][0]["param"] is None:
244
260
  print("Attraction weighting not defined")
245
261
  else:
246
- print("Attraction weighting " + metadata["weighting"][0]["func"] + " with gamma = " + str(metadata["weighting"][0]["param"]))
262
+ print("Attraction weighting " + metadata["weighting"][0]["func"] + " with gamma = " + str(round(metadata["weighting"][0]["param"],3)))
247
263
 
248
264
  print("Unique ID column " + metadata["unique_id"])
249
265
  print("Input CRS " + str(metadata["crs_input"]))
@@ -284,7 +300,7 @@ class SupplyLocations:
284
300
  raise ValueError ("Attraction column is not yet defined. Use SupplyLocations.define_attraction()")
285
301
 
286
302
  metadata["weighting"][0]["func"] = func
287
- metadata["weighting"][0]["param"] = param_gamma
303
+ metadata["weighting"][0]["param"] = float(param_gamma)
288
304
  self.metadata = metadata
289
305
 
290
306
  return self
@@ -325,7 +341,11 @@ class SupplyLocations:
325
341
  metadata = self.get_metadata()
326
342
 
327
343
  new_destinations_gpd_original = new_destinations.get_geodata_gpd_original()
344
+ new_destinations_gpd_original["j_update"] = 1
345
+
328
346
  new_destinations_gpd = new_destinations.get_geodata_gpd()
347
+ new_destinations_gpd["j_update"] = 1
348
+
329
349
  new_destinations_metadata = new_destinations.get_metadata()
330
350
 
331
351
  if list(new_destinations_gpd_original.columns) != list(geodata_gpd_original.columns):
@@ -333,14 +353,20 @@ class SupplyLocations:
333
353
  if list(new_destinations_gpd.columns) != list(geodata_gpd.columns):
334
354
  raise KeyError("Supply locations and new destinations data have different column names.")
335
355
 
336
- geodata_gpd_original = geodata_gpd_original.append(
337
- new_destinations_gpd_original,
356
+ geodata_gpd_original = pd.concat(
357
+ [
358
+ geodata_gpd_original,
359
+ new_destinations_gpd_original
360
+ ],
338
361
  ignore_index=True
339
362
  )
340
-
341
- geodata_gpd = geodata_gpd.append(
342
- new_destinations_gpd,
343
- ignore_index=True
363
+
364
+ geodata_gpd = pd.concat(
365
+ [
366
+ geodata_gpd,
367
+ new_destinations_gpd
368
+ ],
369
+ ignore_index=True
344
370
  )
345
371
 
346
372
  metadata["no_points"] = metadata["no_points"]+new_destinations_metadata["no_points"]
@@ -423,13 +449,15 @@ class InteractionMatrix:
423
449
  self,
424
450
  interaction_matrix_df,
425
451
  customer_origins,
426
- supply_locations
452
+ supply_locations,
453
+ metadata
427
454
  ):
428
455
 
429
456
  self.interaction_matrix_df = interaction_matrix_df
430
457
  self.customer_origins = customer_origins
431
458
  self.supply_locations = supply_locations
432
-
459
+ self.metadata = metadata
460
+
433
461
  def get_interaction_matrix_df(self):
434
462
  return self.interaction_matrix_df
435
463
 
@@ -439,35 +467,55 @@ class InteractionMatrix:
439
467
  def get_supply_locations(self):
440
468
  return self.supply_locations
441
469
 
470
+ def get_metadata(self):
471
+ return self.metadata
472
+
442
473
  def summary(self):
443
474
 
444
475
  customer_origins_metadata = self.get_customer_origins().get_metadata()
445
476
  supply_locations_metadata = self.get_supply_locations().get_metadata()
477
+ interaction_matrix_metadata = self.get_metadata()
446
478
 
447
479
  print("Interaction Matrix")
448
480
  print("----------------------------------")
449
481
 
450
- print("Supply locations " + str(supply_locations_metadata["no_points"]))
482
+ print("Supply locations " + str(supply_locations_metadata["no_points"]))
451
483
  if supply_locations_metadata["attraction_col"][0] is None:
452
- print("Attraction column not defined")
484
+ print("Attraction column not defined")
453
485
  else:
454
- print("Attraction column " + supply_locations_metadata["attraction_col"][0])
455
- print("Customer origins " + str(customer_origins_metadata["no_points"]))
486
+ print("Attraction column " + supply_locations_metadata["attraction_col"][0])
487
+ print("Customer origins " + str(customer_origins_metadata["no_points"]))
456
488
  if customer_origins_metadata["marketsize_col"] is None:
457
489
  print("Market size column not defined")
458
490
  else:
459
- print("Market size column " + customer_origins_metadata["marketsize_col"])
491
+ print("Market size column " + customer_origins_metadata["marketsize_col"])
492
+
493
+ if interaction_matrix_metadata != {}:
494
+ if "transport_costs" in interaction_matrix_metadata:
495
+ print("----------------------------------")
496
+ if interaction_matrix_metadata["transport_costs"]["network"]:
497
+ print("Transport cost type Time")
498
+ print("Transport cost unit " + interaction_matrix_metadata["transport_costs"]["time_unit"])
499
+ else:
500
+ print("Transport cost type Distance")
501
+ print("Transport cost unit " + interaction_matrix_metadata["transport_costs"]["distance_unit"])
502
+
460
503
  print("----------------------------------")
461
504
  print("Partial utilities")
462
- print(" Weights")
505
+ print(" Weights")
506
+
463
507
  if supply_locations_metadata["weighting"][0]["func"] is None and supply_locations_metadata["weighting"][0]["param"] is None:
464
- print("Attraction not defined")
508
+ print("Attraction not defined")
465
509
  else:
466
- print("Attraction " + str(supply_locations_metadata["weighting"][0]["param"]) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
510
+ print("Attraction " + str(round(supply_locations_metadata["weighting"][0]["param"],3)) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
511
+
467
512
  if customer_origins_metadata["weighting"][0]["func"] is None and customer_origins_metadata["weighting"][0]["param"] is None:
468
- print("Transport costs not defined")
469
- else:
470
- print("Transport costs " + str(customer_origins_metadata["weighting"][0]["param"]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
513
+ print("Transport costs not defined")
514
+ elif customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
515
+ print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
516
+ elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
517
+ print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"][0],3)) + ", " + str(round(customer_origins_metadata["weighting"][0]["param"][1],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
518
+
471
519
  print("----------------------------------")
472
520
 
473
521
  def transport_costs(
@@ -487,6 +535,7 @@ class InteractionMatrix:
487
535
  range_type = "distance"
488
536
 
489
537
  interaction_matrix_df = self.get_interaction_matrix_df()
538
+ interaction_matrix_metadata = self.get_metadata()
490
539
 
491
540
  customer_origins = self.get_customer_origins()
492
541
  customer_origins_geodata_gpd = customer_origins.get_geodata_gpd()
@@ -567,7 +616,17 @@ class InteractionMatrix:
567
616
  if distance_unit == "kilometers":
568
617
  interaction_matrix_df["t_ij"] = interaction_matrix_df["t_ij"]/1000
569
618
 
619
+ interaction_matrix_metadata["transport_costs"] = {
620
+ "network": network,
621
+ "range_type": range_type,
622
+ "time_unit": time_unit,
623
+ "distance_unit": distance_unit,
624
+ "ors_server": ors_server,
625
+ "ors_auth": ors_auth
626
+ }
627
+
570
628
  self.interaction_matrix_df = interaction_matrix_df
629
+ self.metadata = interaction_matrix_metadata
571
630
 
572
631
  return self
573
632
 
@@ -575,6 +634,8 @@ class InteractionMatrix:
575
634
 
576
635
  interaction_matrix_df = self.interaction_matrix_df
577
636
 
637
+ interaction_matrix_metadata = self.get_metadata()
638
+
578
639
  if interaction_matrix_df["t_ij"].isna().all():
579
640
  raise ValueError ("Transport cost variable is not defined")
580
641
  if interaction_matrix_df["A_j"].isna().all():
@@ -588,16 +649,20 @@ class InteractionMatrix:
588
649
  customer_origins = self.customer_origins
589
650
  customer_origins_metadata = customer_origins.get_metadata()
590
651
  tc_weighting = customer_origins_metadata["weighting"][0]
652
+
591
653
  if tc_weighting["func"] == "power":
592
654
  interaction_matrix_df["t_ij_weighted"] = interaction_matrix_df["t_ij"] ** tc_weighting["param"]
593
655
  elif tc_weighting["func"] == "exponential":
594
656
  interaction_matrix_df["t_ij_weighted"] = np.exp(tc_weighting["param"] * interaction_matrix_df['t_ij'])
657
+ elif tc_weighting["func"] == "logistic":
658
+ interaction_matrix_df["t_ij_weighted"] = 1+np.exp(tc_weighting["param"][0] + tc_weighting["param"][1] * interaction_matrix_df['t_ij'])
595
659
  else:
596
660
  raise ValueError ("Transport costs weighting is not defined.")
597
661
 
598
662
  supply_locations = self.supply_locations
599
663
  supply_locations_metadata = supply_locations.get_metadata()
600
664
  attraction_weighting = supply_locations_metadata["weighting"][0]
665
+
601
666
  if attraction_weighting["func"] == "power":
602
667
  interaction_matrix_df["A_j_weighted"] = interaction_matrix_df["A_j"] ** attraction_weighting["param"]
603
668
  elif tc_weighting["func"] == "exponential":
@@ -607,10 +672,15 @@ class InteractionMatrix:
607
672
 
608
673
  interaction_matrix_df["U_ij"] = interaction_matrix_df["A_j_weighted"]*interaction_matrix_df["t_ij_weighted"]
609
674
 
610
- interaction_matrix_df = interaction_matrix_df.drop(columns=['A_j_weighted', 't_ij_weighted'])
675
+ interaction_matrix_df = interaction_matrix_df.drop(columns=["A_j_weighted", "t_ij_weighted"])
611
676
 
612
- self.interaction_matrix_df = interaction_matrix_df
677
+ interaction_matrix_metadata["model"] = {
678
+ "model_type": "Huff"
679
+ }
613
680
 
681
+ self.interaction_matrix_df = interaction_matrix_df
682
+ self.metadata = interaction_matrix_metadata
683
+
614
684
  return self
615
685
 
616
686
  def probabilities (self):
@@ -681,7 +751,6 @@ class InteractionMatrix:
681
751
 
682
752
  return huff_model
683
753
 
684
-
685
754
  def hansen(
686
755
  self,
687
756
  from_origins: bool = True
@@ -689,16 +758,34 @@ class InteractionMatrix:
689
758
 
690
759
  interaction_matrix_df = self.interaction_matrix_df
691
760
 
692
- if interaction_matrix_df["U_ij"].isna().all():
693
- self.utility()
694
- interaction_matrix_df = self.interaction_matrix_df
695
-
696
761
  if from_origins:
762
+
763
+ if interaction_matrix_df["U_ij"].isna().all():
764
+ self.utility()
765
+ interaction_matrix_df = self.interaction_matrix_df
766
+
697
767
  hansen_df = pd.DataFrame(interaction_matrix_df.groupby("i")["U_ij"].sum()).reset_index()
698
768
  hansen_df = hansen_df.rename(columns = {"U_ij": "A_i"})
769
+
699
770
  else:
700
- hansen_df = pd.DataFrame(interaction_matrix_df.groupby("j")["U_ij"].sum()).reset_index()
701
- hansen_df = hansen_df.rename(columns = {"U_ij": "A_j"})
771
+
772
+ if "C_i" not in interaction_matrix_df.columns or interaction_matrix_df["C_i"].isna().all():
773
+ raise ValueError("Customer origins market size is not available")
774
+
775
+ customer_origins_metadata = self.customer_origins.get_metadata()
776
+ tc_weighting = customer_origins_metadata["weighting"][0]
777
+ if tc_weighting["func"] == "power":
778
+ interaction_matrix_df["t_ij_weighted"] = interaction_matrix_df["t_ij"] ** tc_weighting["param"]
779
+ elif tc_weighting["func"] == "exponential":
780
+ interaction_matrix_df["t_ij_weighted"] = np.exp(tc_weighting["param"] * interaction_matrix_df['t_ij'])
781
+ elif tc_weighting["func"] == "logistic":
782
+ interaction_matrix_df["t_ij_weighted"] = 1+np.exp(tc_weighting["param"][0] + tc_weighting["param"][1] * interaction_matrix_df['t_ij'])
783
+ else:
784
+ raise ValueError ("Transport costs weighting is not defined.")
785
+
786
+ interaction_matrix_df["U_ji"] = interaction_matrix_df["C_i"]*interaction_matrix_df["t_ij_weighted"]
787
+ hansen_df = pd.DataFrame(interaction_matrix_df.groupby("j")["U_ji"].sum()).reset_index()
788
+ hansen_df = hansen_df.rename(columns = {"U_ji": "A_j"})
702
789
 
703
790
  return hansen_df
704
791
 
@@ -735,6 +822,8 @@ class InteractionMatrix:
735
822
 
736
823
  interaction_matrix_df = self.get_interaction_matrix_df()
737
824
 
825
+ interaction_matrix_metadata = self.get_metadata()
826
+
738
827
  cols_t = [col + "__LCT" for col in cols]
739
828
 
740
829
  if "p_ij__LCT" not in interaction_matrix_df.columns:
@@ -789,7 +878,8 @@ class InteractionMatrix:
789
878
  interaction_matrix = InteractionMatrix(
790
879
  interaction_matrix_df,
791
880
  customer_origins,
792
- supply_locations
881
+ supply_locations,
882
+ metadata=interaction_matrix_metadata
793
883
  )
794
884
 
795
885
  mci_model = MCIModel(
@@ -801,6 +891,249 @@ class InteractionMatrix:
801
891
 
802
892
  return mci_model
803
893
 
894
+ def huff_loglik(
895
+ self,
896
+ params
897
+ ):
898
+
899
+ if not isinstance(params, list):
900
+ if isinstance(params, np.ndarray):
901
+ params = params.tolist()
902
+ else:
903
+ raise ValueError("Parameter 'params' must be a list or np.ndarray with two or three parameter values")
904
+
905
+ if len(params) == 2:
906
+ param_gamma, param_lambda = params
907
+ elif len(params) == 3:
908
+ param_gamma, param_lambda, param_lambda2 = params
909
+ else:
910
+ raise ValueError("Parameter 'params' must be a list with two or three parameter values")
911
+
912
+ interaction_matrix_df = self.interaction_matrix_df
913
+
914
+ supply_locations = self.supply_locations
915
+ supply_locations_metadata = supply_locations.get_metadata()
916
+
917
+ customer_origins = self.customer_origins
918
+ customer_origins_metadata = customer_origins.get_metadata()
919
+
920
+ supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
921
+ supply_locations.metadata = supply_locations_metadata
922
+
923
+ if customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
924
+
925
+ if len(params) == 2:
926
+ customer_origins_metadata["weighting"][0]["param"] = float(param_lambda)
927
+ else:
928
+ raise ValueError ("Huff Model with transport cost weightig of type " + customer_origins_metadata["weighting"][0]["func"] + " must have two input parameters")
929
+
930
+ elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
931
+
932
+ if len(params) == 3:
933
+ customer_origins_metadata["weighting"][0]["param"] = [float(param_lambda), float(param_lambda2)]
934
+ else:
935
+ raise ValueError("Huff Model with transport cost weightig of type " + customer_origins_metadata["weighting"][0]["func"] + " must have three input parameters")
936
+
937
+ customer_origins.metadata = customer_origins_metadata
938
+
939
+ p_ij_emp = interaction_matrix_df["p_ij"]
940
+
941
+ interaction_matrix_copy = copy.deepcopy(self)
942
+
943
+ interaction_matrix_copy.utility()
944
+ interaction_matrix_copy.probabilities()
945
+
946
+ interaction_matrix_df_copy = interaction_matrix_copy.get_interaction_matrix_df()
947
+ p_ij = interaction_matrix_df_copy["p_ij"]
948
+
949
+ LL = loglik(
950
+ observed = p_ij_emp,
951
+ expected = p_ij
952
+ )
953
+
954
+ return -LL
955
+
956
+ def ml_fit(
957
+ self,
958
+ initial_params: list = [1.0, -2.0],
959
+ method: str = "L-BFGS-B",
960
+ bounds: list = [(0.5, 1), (-3, -1)],
961
+ constraints: list = [],
962
+ update_estimates: bool = True
963
+ ):
964
+
965
+ supply_locations = self.supply_locations
966
+ supply_locations_metadata = supply_locations.get_metadata()
967
+
968
+ customer_origins = self.customer_origins
969
+ customer_origins_metadata = customer_origins.get_metadata()
970
+
971
+ if len(initial_params) > 3 or len(initial_params) < 2:
972
+ raise ValueError("Parameter 'initial_params' must be a list with two or three entries")
973
+
974
+ if len(bounds) != len(initial_params):
975
+ raise ValueError("Parameter 'bounds' must have the same length as parameter 'initial_params' (" + str(len(bounds)) + ", " + str(len(initial_params)) + ")")
976
+
977
+ ml_result = minimize(
978
+ self.huff_loglik,
979
+ initial_params,
980
+ method = method,
981
+ bounds = bounds,
982
+ constraints = constraints,
983
+ options={'disp': 3}
984
+ )
985
+
986
+ if ml_result.success:
987
+
988
+ fitted_params = ml_result.x
989
+
990
+ if len(initial_params) == 2:
991
+
992
+ param_gamma = fitted_params[0]
993
+ param_lambda = fitted_params[1]
994
+ param_results = [
995
+ float(param_gamma),
996
+ float(param_lambda)
997
+ ]
998
+
999
+ supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
1000
+ customer_origins_metadata["weighting"][0]["param"] = float(param_lambda)
1001
+
1002
+ elif len (initial_params) == 3:
1003
+
1004
+ param_gamma = fitted_params[0]
1005
+ param_lambda = fitted_params[1]
1006
+ param_lambda2 = fitted_params[2]
1007
+ param_results = [
1008
+ float(param_gamma),
1009
+ float(param_lambda),
1010
+ float(param_lambda2)
1011
+ ]
1012
+
1013
+ supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
1014
+ customer_origins_metadata["weighting"][0]["param"][0] = float(param_lambda)
1015
+ customer_origins_metadata["weighting"][0]["param"][1] = float(param_lambda2)
1016
+
1017
+ print(f"Optimization via {method} algorithm succeeded with parameters: {', '.join(str(round(par, 3)) for par in param_results)}.")
1018
+
1019
+ else:
1020
+
1021
+ param_gamma = None
1022
+ param_lambda = None
1023
+
1024
+ supply_locations_metadata["weighting"][0]["param"] = param_gamma
1025
+
1026
+ if len(initial_params) == 3:
1027
+
1028
+ param_lambda2 = None
1029
+ customer_origins_metadata["weighting"][0]["param"][0] = param_lambda
1030
+ customer_origins_metadata["weighting"][0]["param"][1] = param_lambda2
1031
+
1032
+ else:
1033
+ customer_origins_metadata["weighting"][0]["param"] = param_lambda
1034
+
1035
+ print(f"Optimiziation via {method} algorithm failed with error message: '{ml_result.message}'. See https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html for all available algorithms.")
1036
+
1037
+ self.supply_locations.metadata = supply_locations_metadata
1038
+ self.customer_origins.metadata = customer_origins_metadata
1039
+
1040
+ if ml_result.success and update_estimates:
1041
+
1042
+ self.interaction_matrix_df["p_ij_emp"] = self.interaction_matrix_df["p_ij"]
1043
+ self = self.utility()
1044
+ self = self.probabilities()
1045
+ self = self.flows()
1046
+
1047
+ return self
1048
+
1049
+
1050
+ def update(self):
1051
+
1052
+ interaction_matrix_df = self.get_interaction_matrix_df()
1053
+
1054
+ interaction_matrix_metadata = self.get_metadata()
1055
+
1056
+ customer_origins = self.get_customer_origins()
1057
+
1058
+ supply_locations = self.get_supply_locations()
1059
+
1060
+ supply_locations_geodata_gpd = supply_locations.get_geodata_gpd().copy()
1061
+ supply_locations_geodata_gpd_new = supply_locations_geodata_gpd[supply_locations_geodata_gpd["j_update"] == 1]
1062
+
1063
+ if len(supply_locations_geodata_gpd_new) < 1:
1064
+ raise ValueError("There are no new destinations for an interaction matrix update. Use SupplyLocations.add_new_destinations()")
1065
+
1066
+ supply_locations_geodata_gpd_original = supply_locations.get_geodata_gpd_original().copy()
1067
+ supply_locations_geodata_gpd_original_new = supply_locations_geodata_gpd_original[supply_locations_geodata_gpd_original["j_update"] == 1]
1068
+ if len(supply_locations_geodata_gpd_original_new) < 1:
1069
+ raise ValueError("There are no new destinations for an interaction matrix update. Use SupplyLocations.add_new_destinations()")
1070
+
1071
+ supply_locations_new = SupplyLocations(
1072
+ geodata_gpd=supply_locations_geodata_gpd_new,
1073
+ geodata_gpd_original=supply_locations_geodata_gpd_original_new,
1074
+ metadata=supply_locations.metadata,
1075
+ isochrones_gdf=supply_locations.isochrones_gdf,
1076
+ buffers_gdf=supply_locations.buffers_gdf
1077
+ )
1078
+
1079
+ interaction_matrix_new = create_interaction_matrix(
1080
+ customer_origins=customer_origins,
1081
+ supply_locations=supply_locations_new
1082
+ )
1083
+
1084
+ interaction_matrix_new_df = interaction_matrix_new.get_interaction_matrix_df()
1085
+
1086
+ if "transport_costs" not in interaction_matrix_metadata:
1087
+
1088
+ print("New destination(s) included. No transport costs calculation because not defined in original interaction matrix.")
1089
+
1090
+ interaction_matrix_df = pd.concat(
1091
+ [
1092
+ interaction_matrix_df,
1093
+ interaction_matrix_new_df
1094
+ ],
1095
+ ignore_index=True
1096
+ )
1097
+
1098
+ interaction_matrix_df = interaction_matrix_df.sort_values(by = "ij")
1099
+
1100
+ self.interaction_matrix_df = interaction_matrix_df
1101
+
1102
+ else:
1103
+
1104
+ network = interaction_matrix_metadata["transport_costs"]["network"]
1105
+ range_type = interaction_matrix_metadata["transport_costs"]["range_type"]
1106
+ time_unit = interaction_matrix_metadata["transport_costs"]["time_unit"]
1107
+ distance_unit = interaction_matrix_metadata["transport_costs"]["distance_unit"]
1108
+ ors_server = interaction_matrix_metadata["transport_costs"]["ors_server"]
1109
+ ors_auth = interaction_matrix_metadata["transport_costs"]["ors_auth"]
1110
+
1111
+ interaction_matrix_new.transport_costs(
1112
+ network=network,
1113
+ range_type=range_type,
1114
+ time_unit=time_unit,
1115
+ distance_unit=distance_unit,
1116
+ ors_server=ors_server,
1117
+ ors_auth=ors_auth
1118
+ )
1119
+
1120
+ interaction_matrix_df = pd.concat(
1121
+ [
1122
+ interaction_matrix_df,
1123
+ interaction_matrix_new_df
1124
+ ],
1125
+ ignore_index=True
1126
+ )
1127
+
1128
+ interaction_matrix_df = interaction_matrix_df.sort_values(by = "ij")
1129
+
1130
+ self.interaction_matrix_df = interaction_matrix_df
1131
+
1132
+ self.utility()
1133
+ self.probabilities()
1134
+ self.flows()
1135
+
1136
+ return self
804
1137
 
805
1138
  class HuffModel:
806
1139
 
@@ -857,17 +1190,52 @@ class HuffModel:
857
1190
  else:
858
1191
  print("Market size column " + customer_origins_metadata["marketsize_col"])
859
1192
  print("----------------------------------")
1193
+
860
1194
  print("Partial utilities")
861
1195
  print(" Weights")
1196
+
862
1197
  if supply_locations_metadata["weighting"][0]["func"] is None and supply_locations_metadata["weighting"][0]["param"] is None:
863
1198
  print("Attraction not defined")
864
1199
  else:
865
- print("Attraction " + str(supply_locations_metadata["weighting"][0]["param"]) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
1200
+ print("Attraction " + str(round(supply_locations_metadata["weighting"][0]["param"], 3)) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
1201
+
866
1202
  if customer_origins_metadata["weighting"][0]["func"] is None and customer_origins_metadata["weighting"][0]["param"] is None:
867
1203
  print("Transport costs not defined")
868
- else:
869
- print("Transport costs " + str(customer_origins_metadata["weighting"][0]["param"]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
1204
+ elif customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
1205
+ print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
1206
+ elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
1207
+ print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"][0],3)) + ", " + str(round(customer_origins_metadata["weighting"][0]["param"][1],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
1208
+
870
1209
  print("----------------------------------")
1210
+
1211
+ huff_modelfit = self.modelfit()
1212
+ if huff_modelfit is not None:
1213
+
1214
+ print ("Goodness-of-fit for probabilities")
1215
+
1216
+ print("Sum of squared residuals ", round(huff_modelfit[1]["SQR"], 2))
1217
+ print("Sum of squares ", round(huff_modelfit[1]["SQT"], 2))
1218
+ print("R-squared ", round(huff_modelfit[1]["Rsq"], 2))
1219
+ print("Mean squared error ", round(huff_modelfit[1]["MSE"], 2))
1220
+ print("Root mean squared error ", round(huff_modelfit[1]["RMSE"], 2))
1221
+ print("Mean absolute error ", round(huff_modelfit[1]["MAE"], 2))
1222
+ print("Mean absolute percentage error ", round(huff_modelfit[1]["MAPE"], 2))
1223
+ print("Absolute percentage errors")
1224
+
1225
+ APE_list = [
1226
+ ["< 5 % ", round(huff_modelfit[1]["APE"]["resid_below5"], 2), " < 30 % ", round(huff_modelfit[1]["APE"]["resid_below30"], 2)],
1227
+ ["< 10 % ", round(huff_modelfit[1]["APE"]["resid_below10"], 2), " < 35 % ", round(huff_modelfit[1]["APE"]["resid_below35"], 2)],
1228
+ ["< 15 % ", round(huff_modelfit[1]["APE"]["resid_below15"], 2), " < 40 % ", round(huff_modelfit[1]["APE"]["resid_below40"], 2)],
1229
+ ["< 20 % ", round(huff_modelfit[1]["APE"]["resid_below20"], 2), " < 45 % ", round(huff_modelfit[1]["APE"]["resid_below45"], 2)],
1230
+ ["< 25% ", round(huff_modelfit[1]["APE"]["resid_below25"], 2), " < 50 % ", round(huff_modelfit[1]["APE"]["resid_below50"], 2)]
1231
+ ]
1232
+ APE_df = pd.DataFrame(
1233
+ APE_list,
1234
+ columns=["Resid.", "%", "Resid.", "%"]
1235
+ )
1236
+ print(APE_df.to_string(index=False))
1237
+
1238
+ print("----------------------------------")
871
1239
 
872
1240
  def mci_fit(
873
1241
  self,
@@ -876,15 +1244,15 @@ class HuffModel:
876
1244
  ):
877
1245
 
878
1246
  interaction_matrix = self.interaction_matrix
879
-
1247
+ interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
1248
+ interaction_matrix_metadata = interaction_matrix.get_metadata()
1249
+
880
1250
  supply_locations = interaction_matrix.get_supply_locations()
881
1251
  supply_locations_metadata = supply_locations.get_metadata()
882
1252
 
883
1253
  customer_origins = interaction_matrix.get_customer_origins()
884
1254
  customer_origins_metadata = customer_origins.get_metadata()
885
-
886
- interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
887
-
1255
+
888
1256
  cols_t = [col + "__LCT" for col in cols]
889
1257
 
890
1258
  if "p_ij__LCT" not in interaction_matrix_df.columns:
@@ -937,7 +1305,8 @@ class HuffModel:
937
1305
  interaction_matrix = InteractionMatrix(
938
1306
  interaction_matrix_df,
939
1307
  customer_origins,
940
- supply_locations
1308
+ supply_locations,
1309
+ metadata=interaction_matrix_metadata
941
1310
  )
942
1311
 
943
1312
  mci_model = MCIModel(
@@ -949,7 +1318,39 @@ class HuffModel:
949
1318
 
950
1319
  return mci_model
951
1320
 
952
-
1321
+ def update(self):
1322
+
1323
+ self.interaction_matrix = self.interaction_matrix.update()
1324
+
1325
+ self.market_areas_df = self.interaction_matrix.marketareas().get_market_areas_df()
1326
+
1327
+ return self
1328
+
1329
+ def modelfit(self):
1330
+
1331
+ interaction_matrix = self.interaction_matrix
1332
+ interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
1333
+
1334
+ if ("p_ij" in interaction_matrix_df.columns and "p_ij_emp" in interaction_matrix_df.columns):
1335
+
1336
+ try:
1337
+
1338
+ huff_modelfit = modelfit(
1339
+ interaction_matrix_df["p_ij_emp"],
1340
+ interaction_matrix_df["p_ij"]
1341
+ )
1342
+
1343
+ return huff_modelfit
1344
+
1345
+ except:
1346
+
1347
+ print("Goodness-of-fit metrics could not be calculated due to NaN values.")
1348
+ return None
1349
+
1350
+ else:
1351
+
1352
+ return None
1353
+
953
1354
  class MCIModel:
954
1355
 
955
1356
  def __init__(
@@ -1005,12 +1406,19 @@ class MCIModel:
1005
1406
 
1006
1407
  if ("p_ij" in interaction_matrix_df.columns and "p_ij_emp" in interaction_matrix_df.columns):
1007
1408
 
1008
- mci_modelfit = modelfit(
1009
- interaction_matrix_df["p_ij_emp"],
1010
- interaction_matrix_df["p_ij"]
1011
- )
1409
+ try:
1012
1410
 
1013
- return mci_modelfit
1411
+ mci_modelfit = modelfit(
1412
+ interaction_matrix_df["p_ij_emp"],
1413
+ interaction_matrix_df["p_ij"]
1414
+ )
1415
+
1416
+ return mci_modelfit
1417
+
1418
+ except:
1419
+
1420
+ print("Goodness-of-fit metrics could not be calculated due to NaN values.")
1421
+ return None
1014
1422
 
1015
1423
  else:
1016
1424
 
@@ -1056,7 +1464,7 @@ class MCIModel:
1056
1464
  mci_modelfit = self.modelfit()
1057
1465
  if mci_modelfit is not None:
1058
1466
 
1059
- print ("Goodness-of-fit with respect to probabilities")
1467
+ print ("Goodness-of-fit for probabilities")
1060
1468
 
1061
1469
  print("Sum of squared residuals ", round(mci_modelfit[1]["SQR"], 2))
1062
1470
  print("Sum of squares ", round(mci_modelfit[1]["SQT"], 2))
@@ -1065,12 +1473,20 @@ class MCIModel:
1065
1473
  print("Root mean squared error ", round(mci_modelfit[1]["RMSE"], 2))
1066
1474
  print("Mean absolute error ", round(mci_modelfit[1]["MAE"], 2))
1067
1475
  print("Mean absolute percentage error ", round(mci_modelfit[1]["MAPE"], 2))
1476
+
1068
1477
  print("Absolute percentage errors")
1069
- print("< 5 % ", round(mci_modelfit[1]["APE"]["resid_below5"], 2))
1070
- print("< 10 % ", round(mci_modelfit[1]["APE"]["resid_below10"], 2))
1071
- print("< 15 % ", round(mci_modelfit[1]["APE"]["resid_below15"], 2))
1072
- print("< 20 % ", round(mci_modelfit[1]["APE"]["resid_below20"], 2))
1073
- print("< 25 % ", round(mci_modelfit[1]["APE"]["resid_below25"], 2))
1478
+ APE_list = [
1479
+ ["< 5 % ", round(mci_modelfit[1]["APE"]["resid_below5"], 2), " < 30 % ", round(mci_modelfit[1]["APE"]["resid_below30"], 2)],
1480
+ ["< 10 % ", round(mci_modelfit[1]["APE"]["resid_below10"], 2), " < 35 % ", round(mci_modelfit[1]["APE"]["resid_below35"], 2)],
1481
+ ["< 15 % ", round(mci_modelfit[1]["APE"]["resid_below15"], 2), " < 40 % ", round(mci_modelfit[1]["APE"]["resid_below40"], 2)],
1482
+ ["< 20 % ", round(mci_modelfit[1]["APE"]["resid_below20"], 2), " < 45 % ", round(mci_modelfit[1]["APE"]["resid_below45"], 2)],
1483
+ ["< 25% ", round(mci_modelfit[1]["APE"]["resid_below25"], 2), " < 50 % ", round(mci_modelfit[1]["APE"]["resid_below50"], 2)]
1484
+ ]
1485
+ APE_df = pd.DataFrame(
1486
+ APE_list,
1487
+ columns=["Resid.", "%", "Resid.", "%"]
1488
+ )
1489
+ print(APE_df.to_string(index=False))
1074
1490
 
1075
1491
  print("--------------------------------------------")
1076
1492
 
@@ -1081,6 +1497,7 @@ class MCIModel:
1081
1497
 
1082
1498
  interaction_matrix = self.interaction_matrix
1083
1499
  interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
1500
+ interaction_matrix_metadata = interaction_matrix.get_metadata()
1084
1501
 
1085
1502
  if interaction_matrix_df["t_ij"].isna().all():
1086
1503
  raise ValueError ("Transport cost variable is not defined")
@@ -1119,10 +1536,16 @@ class MCIModel:
1119
1536
  if transformation == "ILCT":
1120
1537
  interaction_matrix_df["U_ij"] = np.exp(interaction_matrix_df["U_ij"])
1121
1538
 
1539
+ interaction_matrix_metadata["model"] = {
1540
+ "model_type": "MCI",
1541
+ "transformation": transformation
1542
+ }
1543
+
1122
1544
  interaction_matrix = InteractionMatrix(
1123
1545
  interaction_matrix_df,
1124
1546
  customer_origins,
1125
- supply_locations
1547
+ supply_locations,
1548
+ metadata=interaction_matrix_metadata
1126
1549
  )
1127
1550
  self.interaction_matrix = interaction_matrix
1128
1551
 
@@ -1289,7 +1712,7 @@ def load_geodata (
1289
1712
 
1290
1713
  geodata_gpd = geodata_gpd_original.to_crs(crs_output)
1291
1714
  geodata_gpd = geodata_gpd[[unique_id, "geometry"]]
1292
-
1715
+
1293
1716
  metadata = {
1294
1717
  "location_type": location_type,
1295
1718
  "unique_id": unique_id,
@@ -1307,14 +1730,20 @@ def load_geodata (
1307
1730
  }
1308
1731
 
1309
1732
  if location_type == "origins":
1733
+
1310
1734
  geodata_object = CustomerOrigins(
1311
1735
  geodata_gpd,
1312
1736
  geodata_gpd_original,
1313
1737
  metadata,
1314
1738
  None,
1315
1739
  None
1316
- )
1740
+ )
1741
+
1317
1742
  elif location_type == "destinations":
1743
+
1744
+ geodata_gpd["j_update"] = 0
1745
+ geodata_gpd_original["j_update"] = 0
1746
+
1318
1747
  geodata_object = SupplyLocations(
1319
1748
  geodata_gpd,
1320
1749
  geodata_gpd_original,
@@ -1393,10 +1822,13 @@ def create_interaction_matrix(
1393
1822
  interaction_matrix_df["p_ij"] = None
1394
1823
  interaction_matrix_df["E_ij"] = None
1395
1824
 
1825
+ metadata = {}
1826
+
1396
1827
  interaction_matrix = InteractionMatrix(
1397
1828
  interaction_matrix_df,
1398
1829
  customer_origins,
1399
- supply_locations
1830
+ supply_locations,
1831
+ metadata
1400
1832
  )
1401
1833
 
1402
1834
  return interaction_matrix
@@ -1407,6 +1839,7 @@ def load_interaction_matrix(
1407
1839
  supply_locations_col: str,
1408
1840
  attraction_col: list,
1409
1841
  transport_costs_col: str,
1842
+ flows_col: str = None,
1410
1843
  probabilities_col: str = None,
1411
1844
  market_size_col: str = None,
1412
1845
  customer_origins_coords_col = None,
@@ -1444,6 +1877,8 @@ def load_interaction_matrix(
1444
1877
  raise KeyError ("Column " + supply_locations_col + " not in data")
1445
1878
 
1446
1879
  cols_check = attraction_col + [transport_costs_col]
1880
+ if flows_col is not None:
1881
+ cols_check = cols_check + [flows_col]
1447
1882
  if probabilities_col is not None:
1448
1883
  cols_check = cols_check + [probabilities_col]
1449
1884
  if market_size_col is not None:
@@ -1600,6 +2035,13 @@ def load_interaction_matrix(
1600
2035
  }
1601
2036
  )
1602
2037
 
2038
+ if flows_col is not None:
2039
+ interaction_matrix_df = interaction_matrix_df.rename(
2040
+ columns = {
2041
+ flows_col: "E_ij"
2042
+ }
2043
+ )
2044
+
1603
2045
  if probabilities_col is not None:
1604
2046
  interaction_matrix_df = interaction_matrix_df.rename(
1605
2047
  columns = {
@@ -1613,15 +2055,68 @@ def load_interaction_matrix(
1613
2055
  market_size_col: "C_i"
1614
2056
  }
1615
2057
  )
1616
-
2058
+
2059
+ metadata = {}
2060
+
1617
2061
  interaction_matrix = InteractionMatrix(
1618
2062
  interaction_matrix_df=interaction_matrix_df,
1619
2063
  customer_origins=customer_origins,
1620
- supply_locations=supply_locations
2064
+ supply_locations=supply_locations,
2065
+ metadata=metadata
1621
2066
  )
1622
2067
 
1623
2068
  return interaction_matrix
1624
2069
 
2070
+ def market_shares(
2071
+ df: pd.DataFrame,
2072
+ turnover_col: str,
2073
+ ref_col: str = None,
2074
+ marketshares_col: str = "p_ij"
2075
+ ):
2076
+
2077
+ check_vars(
2078
+ df = df,
2079
+ cols = [turnover_col]
2080
+ )
2081
+
2082
+ if ref_col is not None:
2083
+
2084
+ if ref_col not in df.columns:
2085
+ raise KeyError(f"Column '{ref_col}' not in dataframe.")
2086
+
2087
+ ms_refcol = pd.DataFrame(df.groupby(ref_col)[turnover_col].sum())
2088
+ ms_refcol = ms_refcol.rename(columns = {turnover_col: "total"})
2089
+ ms_refcol = ms_refcol.reset_index()
2090
+
2091
+ df = df.merge(
2092
+ ms_refcol,
2093
+ how = "left",
2094
+ left_on = ref_col,
2095
+ right_on= ref_col
2096
+ )
2097
+
2098
+ else:
2099
+
2100
+ ms_norefcol = pd.DataFrame([df[turnover_col].sum()], columns=["total"])
2101
+ ms_norefcol = ms_norefcol.reset_index()
2102
+
2103
+ df["key_temp"] = 1
2104
+ ms_norefcol["key_temp"] = 1
2105
+ df = pd.merge(
2106
+ df,
2107
+ ms_norefcol,
2108
+ on="key_temp"
2109
+ ).drop(
2110
+ "key_temp",
2111
+ axis=1
2112
+ )
2113
+
2114
+ df[marketshares_col] = df[turnover_col]/df["total"]
2115
+
2116
+ df = df.drop(columns="total")
2117
+
2118
+ return df
2119
+
1625
2120
  def log_centering_transformation(
1626
2121
  df: pd.DataFrame,
1627
2122
  ref_col: str,
@@ -1652,12 +2147,18 @@ def log_centering_transformation(
1652
2147
  print ("Column " + str(var) + " is a dummy variable and requires/allows no log-centering transformation")
1653
2148
  continue
1654
2149
 
2150
+ if (df[var] <= 0).any():
2151
+ df[var+suffix] = float("nan")
2152
+ print ("Column " + str(var) + " contains values <= 0. No log-centering transformation possible.")
2153
+ continue
2154
+
1655
2155
  var_t = df.groupby(ref_col)[var].apply(lct)
1656
2156
  var_t = var_t.reset_index()
1657
2157
  df[var+suffix] = var_t[var]
1658
2158
 
1659
2159
  return df
1660
2160
 
2161
+
1661
2162
  def get_isochrones(
1662
2163
  geodata_gpd: gp.GeoDataFrame,
1663
2164
  unique_id_col: str,
@@ -1748,7 +2249,8 @@ def get_isochrones(
1748
2249
 
1749
2250
  def modelfit(
1750
2251
  observed,
1751
- expected
2252
+ expected,
2253
+ remove_nan: bool = True
1752
2254
  ):
1753
2255
 
1754
2256
  observed_no = len(observed)
@@ -1763,7 +2265,28 @@ def modelfit(
1763
2265
  if not isinstance(expected, np.number):
1764
2266
  if not is_numeric_dtype(expected):
1765
2267
  raise ValueError("Expected column is not numeric")
1766
-
2268
+
2269
+ if remove_nan:
2270
+
2271
+ obs_exp = pd.DataFrame(
2272
+ {
2273
+ "observed": observed,
2274
+ "expected": expected
2275
+ }
2276
+ )
2277
+
2278
+ obs_exp_clean = obs_exp.dropna(subset=["observed", "expected"])
2279
+
2280
+ observed = obs_exp_clean["observed"].to_numpy()
2281
+ expected = obs_exp_clean["expected"].to_numpy()
2282
+
2283
+ else:
2284
+
2285
+ if np.isnan(observed).any():
2286
+ raise ValueError("Vector with observed data contains NaN")
2287
+ if np.isnan(expected).any():
2288
+ raise ValueError("Vector with expected data contains NaN")
2289
+
1767
2290
  residuals = np.array(observed)-np.array(expected)
1768
2291
  residuals_sq = residuals**2
1769
2292
  residuals_abs = abs(residuals)
@@ -1780,7 +2303,7 @@ def modelfit(
1780
2303
  })
1781
2304
 
1782
2305
  SQR = float(np.sum(residuals_sq))
1783
- SAR = float(np.sum(residuals_abs))
2306
+ SAR = float(np.sum(residuals_abs))
1784
2307
  observed_mean = float(np.sum(observed)/observed_no)
1785
2308
  SQT = float(np.sum((observed-observed_mean)**2))
1786
2309
  Rsq = float(1-(SQR/SQT))
@@ -1789,11 +2312,16 @@ def modelfit(
1789
2312
  MAE = float(SAR/observed_no)
1790
2313
  MAPE = float(np.mean(APE))
1791
2314
 
1792
- resid_below5 = float(len([APE < 5])/expected_no*100)
1793
- resid_below10 = float(len([APE < 10])/expected_no*100)
1794
- resid_below15 = float(len([APE < 15])/expected_no*100)
1795
- resid_below20 = float(len([APE < 20])/expected_no*100)
1796
- resid_below25 = float(len([APE < 25])/expected_no*100)
2315
+ resid_below5 = float(len(data_residuals[data_residuals["APE"] < 5])/expected_no*100)
2316
+ resid_below10 = float(len(data_residuals[data_residuals["APE"] < 10])/expected_no*100)
2317
+ resid_below15 = float(len(data_residuals[data_residuals["APE"] < 15])/expected_no*100)
2318
+ resid_below20 = float(len(data_residuals[data_residuals["APE"] < 20])/expected_no*100)
2319
+ resid_below25 = float(len(data_residuals[data_residuals["APE"] < 25])/expected_no*100)
2320
+ resid_below30 = float(len(data_residuals[data_residuals["APE"] < 30])/expected_no*100)
2321
+ resid_below35 = float(len(data_residuals[data_residuals["APE"] < 35])/expected_no*100)
2322
+ resid_below40 = float(len(data_residuals[data_residuals["APE"] < 40])/expected_no*100)
2323
+ resid_below45 = float(len(data_residuals[data_residuals["APE"] < 45])/expected_no*100)
2324
+ resid_below50 = float(len(data_residuals[data_residuals["APE"] < 50])/expected_no*100)
1797
2325
 
1798
2326
  data_lossfunctions = {
1799
2327
  "SQR": SQR,
@@ -1809,7 +2337,12 @@ def modelfit(
1809
2337
  "resid_below10": resid_below10,
1810
2338
  "resid_below15": resid_below15,
1811
2339
  "resid_below20": resid_below20,
1812
- "resid_below25": resid_below25
2340
+ "resid_below25": resid_below25,
2341
+ "resid_below30": resid_below30,
2342
+ "resid_below35": resid_below35,
2343
+ "resid_below40": resid_below40,
2344
+ "resid_below45": resid_below45,
2345
+ "resid_below50": resid_below50,
1813
2346
  }
1814
2347
  }
1815
2348
 
@@ -1820,6 +2353,20 @@ def modelfit(
1820
2353
 
1821
2354
  return modelfit_results
1822
2355
 
2356
+ def loglik(
2357
+ observed,
2358
+ expected
2359
+ ):
2360
+
2361
+ model_fit = modelfit(
2362
+ observed,
2363
+ expected
2364
+ )
2365
+ residuals_sq = model_fit[0]["residuals_sq"]
2366
+
2367
+ LL = np.sum(np.log(residuals_sq))
2368
+
2369
+ return -LL
1823
2370
 
1824
2371
  def check_vars(
1825
2372
  df: pd.DataFrame,