huff 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- huff/gistools.py +2 -2
- huff/models.py +537 -175
- huff/ors.py +2 -2
- huff/osm.py +2 -2
- huff/tests/data/Haslach_new_supermarket.cpg +1 -0
- huff/tests/data/Haslach_new_supermarket.dbf +0 -0
- huff/tests/data/Haslach_new_supermarket.prj +1 -0
- huff/tests/data/Haslach_new_supermarket.qmd +26 -0
- huff/tests/data/Haslach_new_supermarket.shp +0 -0
- huff/tests/data/Haslach_new_supermarket.shx +0 -0
- huff/tests/tests_huff.py +68 -22
- {huff-1.4.0.dist-info → huff-1.4.1.dist-info}/METADATA +1 -1
- {huff-1.4.0.dist-info → huff-1.4.1.dist-info}/RECORD +15 -9
- {huff-1.4.0.dist-info → huff-1.4.1.dist-info}/WHEEL +0 -0
- {huff-1.4.0.dist-info → huff-1.4.1.dist-info}/top_level.txt +0 -0
huff/models.py
CHANGED
@@ -4,8 +4,8 @@
|
|
4
4
|
# Author: Thomas Wieland
|
5
5
|
# ORCID: 0000-0001-5168-9846
|
6
6
|
# mail: geowieland@googlemail.com
|
7
|
-
# Version: 1.4.
|
8
|
-
# Last update: 2025-06-
|
7
|
+
# Version: 1.4.1
|
8
|
+
# Last update: 2025-06-16 17:43
|
9
9
|
# Copyright (c) 2025 Thomas Wieland
|
10
10
|
#-----------------------------------------------------------------------
|
11
11
|
|
@@ -17,7 +17,7 @@ from math import sqrt
|
|
17
17
|
import time
|
18
18
|
from pandas.api.types import is_numeric_dtype
|
19
19
|
from statsmodels.formula.api import ols
|
20
|
-
from scipy.optimize import minimize
|
20
|
+
from scipy.optimize import minimize, Bounds, LinearConstraint, NonlinearConstraint
|
21
21
|
from shapely.geometry import Point
|
22
22
|
from shapely import wkt
|
23
23
|
import copy
|
@@ -77,9 +77,9 @@ class CustomerOrigins:
|
|
77
77
|
if metadata["weighting"][0]["func"] is None and metadata["weighting"][0]["param"] is None:
|
78
78
|
print("Transport cost weighting not defined")
|
79
79
|
elif metadata["weighting"][0]["func"] in ["power", "exponential"]:
|
80
|
-
print("Transport cost weighting " + str(metadata["weighting"][0]["param"]) + " (" + metadata["weighting"][0]["func"] + ")")
|
80
|
+
print("Transport cost weighting " + str(round(metadata["weighting"][0]["param"],3)) + " (" + metadata["weighting"][0]["func"] + ")")
|
81
81
|
elif metadata["weighting"][0]["func"] == "logistic":
|
82
|
-
print("Transport cost weighting " + str(metadata["weighting"][0]["param"][0]) + ", " + str(metadata["weighting"][0]["param"][1]) + " (" + metadata["weighting"][0]["func"] + ")")
|
82
|
+
print("Transport cost weighting " + str(round(metadata["weighting"][0]["param"][0],3)) + ", " + str(round(metadata["weighting"][0]["param"][1],3)) + " (" + metadata["weighting"][0]["func"] + ")")
|
83
83
|
|
84
84
|
print("Unique ID column " + metadata["unique_id"])
|
85
85
|
print("Input CRS " + str(metadata["crs_input"]))
|
@@ -207,7 +207,6 @@ class CustomerOrigins:
|
|
207
207
|
|
208
208
|
return self
|
209
209
|
|
210
|
-
|
211
210
|
class SupplyLocations:
|
212
211
|
|
213
212
|
def __init__(
|
@@ -260,7 +259,7 @@ class SupplyLocations:
|
|
260
259
|
if metadata["weighting"][0]["func"] is None and metadata["weighting"][0]["param"] is None:
|
261
260
|
print("Attraction weighting not defined")
|
262
261
|
else:
|
263
|
-
print("Attraction weighting " + metadata["weighting"][0]["func"] + " with gamma = " + str(metadata["weighting"][0]["param"]))
|
262
|
+
print("Attraction weighting " + metadata["weighting"][0]["func"] + " with gamma = " + str(round(metadata["weighting"][0]["param"],3)))
|
264
263
|
|
265
264
|
print("Unique ID column " + metadata["unique_id"])
|
266
265
|
print("Input CRS " + str(metadata["crs_input"]))
|
@@ -342,7 +341,11 @@ class SupplyLocations:
|
|
342
341
|
metadata = self.get_metadata()
|
343
342
|
|
344
343
|
new_destinations_gpd_original = new_destinations.get_geodata_gpd_original()
|
344
|
+
new_destinations_gpd_original["j_update"] = 1
|
345
|
+
|
345
346
|
new_destinations_gpd = new_destinations.get_geodata_gpd()
|
347
|
+
new_destinations_gpd["j_update"] = 1
|
348
|
+
|
346
349
|
new_destinations_metadata = new_destinations.get_metadata()
|
347
350
|
|
348
351
|
if list(new_destinations_gpd_original.columns) != list(geodata_gpd_original.columns):
|
@@ -350,14 +353,20 @@ class SupplyLocations:
|
|
350
353
|
if list(new_destinations_gpd.columns) != list(geodata_gpd.columns):
|
351
354
|
raise KeyError("Supply locations and new destinations data have different column names.")
|
352
355
|
|
353
|
-
geodata_gpd_original =
|
354
|
-
|
356
|
+
geodata_gpd_original = pd.concat(
|
357
|
+
[
|
358
|
+
geodata_gpd_original,
|
359
|
+
new_destinations_gpd_original
|
360
|
+
],
|
355
361
|
ignore_index=True
|
356
362
|
)
|
357
|
-
|
358
|
-
geodata_gpd =
|
359
|
-
|
360
|
-
|
363
|
+
|
364
|
+
geodata_gpd = pd.concat(
|
365
|
+
[
|
366
|
+
geodata_gpd,
|
367
|
+
new_destinations_gpd
|
368
|
+
],
|
369
|
+
ignore_index=True
|
361
370
|
)
|
362
371
|
|
363
372
|
metadata["no_points"] = metadata["no_points"]+new_destinations_metadata["no_points"]
|
@@ -440,13 +449,15 @@ class InteractionMatrix:
|
|
440
449
|
self,
|
441
450
|
interaction_matrix_df,
|
442
451
|
customer_origins,
|
443
|
-
supply_locations
|
452
|
+
supply_locations,
|
453
|
+
metadata
|
444
454
|
):
|
445
455
|
|
446
456
|
self.interaction_matrix_df = interaction_matrix_df
|
447
457
|
self.customer_origins = customer_origins
|
448
458
|
self.supply_locations = supply_locations
|
449
|
-
|
459
|
+
self.metadata = metadata
|
460
|
+
|
450
461
|
def get_interaction_matrix_df(self):
|
451
462
|
return self.interaction_matrix_df
|
452
463
|
|
@@ -456,38 +467,54 @@ class InteractionMatrix:
|
|
456
467
|
def get_supply_locations(self):
|
457
468
|
return self.supply_locations
|
458
469
|
|
470
|
+
def get_metadata(self):
|
471
|
+
return self.metadata
|
472
|
+
|
459
473
|
def summary(self):
|
460
474
|
|
461
475
|
customer_origins_metadata = self.get_customer_origins().get_metadata()
|
462
476
|
supply_locations_metadata = self.get_supply_locations().get_metadata()
|
477
|
+
interaction_matrix_metadata = self.get_metadata()
|
463
478
|
|
464
479
|
print("Interaction Matrix")
|
465
480
|
print("----------------------------------")
|
466
481
|
|
467
|
-
print("Supply locations
|
482
|
+
print("Supply locations " + str(supply_locations_metadata["no_points"]))
|
468
483
|
if supply_locations_metadata["attraction_col"][0] is None:
|
469
|
-
print("Attraction column
|
484
|
+
print("Attraction column not defined")
|
470
485
|
else:
|
471
|
-
print("Attraction column
|
472
|
-
print("Customer origins
|
486
|
+
print("Attraction column " + supply_locations_metadata["attraction_col"][0])
|
487
|
+
print("Customer origins " + str(customer_origins_metadata["no_points"]))
|
473
488
|
if customer_origins_metadata["marketsize_col"] is None:
|
474
489
|
print("Market size column not defined")
|
475
490
|
else:
|
476
|
-
print("Market size column
|
491
|
+
print("Market size column " + customer_origins_metadata["marketsize_col"])
|
492
|
+
|
493
|
+
if interaction_matrix_metadata != {}:
|
494
|
+
if "transport_costs" in interaction_matrix_metadata:
|
495
|
+
print("----------------------------------")
|
496
|
+
if interaction_matrix_metadata["transport_costs"]["network"]:
|
497
|
+
print("Transport cost type Time")
|
498
|
+
print("Transport cost unit " + interaction_matrix_metadata["transport_costs"]["time_unit"])
|
499
|
+
else:
|
500
|
+
print("Transport cost type Distance")
|
501
|
+
print("Transport cost unit " + interaction_matrix_metadata["transport_costs"]["distance_unit"])
|
502
|
+
|
477
503
|
print("----------------------------------")
|
478
504
|
print("Partial utilities")
|
479
|
-
print("
|
505
|
+
print(" Weights")
|
506
|
+
|
480
507
|
if supply_locations_metadata["weighting"][0]["func"] is None and supply_locations_metadata["weighting"][0]["param"] is None:
|
481
|
-
print("Attraction
|
508
|
+
print("Attraction not defined")
|
482
509
|
else:
|
483
|
-
print("Attraction
|
510
|
+
print("Attraction " + str(round(supply_locations_metadata["weighting"][0]["param"],3)) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
|
484
511
|
|
485
512
|
if customer_origins_metadata["weighting"][0]["func"] is None and customer_origins_metadata["weighting"][0]["param"] is None:
|
486
|
-
print("Transport costs
|
513
|
+
print("Transport costs not defined")
|
487
514
|
elif customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
|
488
|
-
print("Transport costs
|
515
|
+
print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
489
516
|
elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
490
|
-
print("Transport costs
|
517
|
+
print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"][0],3)) + ", " + str(round(customer_origins_metadata["weighting"][0]["param"][1],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
491
518
|
|
492
519
|
print("----------------------------------")
|
493
520
|
|
@@ -508,6 +535,7 @@ class InteractionMatrix:
|
|
508
535
|
range_type = "distance"
|
509
536
|
|
510
537
|
interaction_matrix_df = self.get_interaction_matrix_df()
|
538
|
+
interaction_matrix_metadata = self.get_metadata()
|
511
539
|
|
512
540
|
customer_origins = self.get_customer_origins()
|
513
541
|
customer_origins_geodata_gpd = customer_origins.get_geodata_gpd()
|
@@ -588,7 +616,17 @@ class InteractionMatrix:
|
|
588
616
|
if distance_unit == "kilometers":
|
589
617
|
interaction_matrix_df["t_ij"] = interaction_matrix_df["t_ij"]/1000
|
590
618
|
|
619
|
+
interaction_matrix_metadata["transport_costs"] = {
|
620
|
+
"network": network,
|
621
|
+
"range_type": range_type,
|
622
|
+
"time_unit": time_unit,
|
623
|
+
"distance_unit": distance_unit,
|
624
|
+
"ors_server": ors_server,
|
625
|
+
"ors_auth": ors_auth
|
626
|
+
}
|
627
|
+
|
591
628
|
self.interaction_matrix_df = interaction_matrix_df
|
629
|
+
self.metadata = interaction_matrix_metadata
|
592
630
|
|
593
631
|
return self
|
594
632
|
|
@@ -596,6 +634,8 @@ class InteractionMatrix:
|
|
596
634
|
|
597
635
|
interaction_matrix_df = self.interaction_matrix_df
|
598
636
|
|
637
|
+
interaction_matrix_metadata = self.get_metadata()
|
638
|
+
|
599
639
|
if interaction_matrix_df["t_ij"].isna().all():
|
600
640
|
raise ValueError ("Transport cost variable is not defined")
|
601
641
|
if interaction_matrix_df["A_j"].isna().all():
|
@@ -609,6 +649,7 @@ class InteractionMatrix:
|
|
609
649
|
customer_origins = self.customer_origins
|
610
650
|
customer_origins_metadata = customer_origins.get_metadata()
|
611
651
|
tc_weighting = customer_origins_metadata["weighting"][0]
|
652
|
+
|
612
653
|
if tc_weighting["func"] == "power":
|
613
654
|
interaction_matrix_df["t_ij_weighted"] = interaction_matrix_df["t_ij"] ** tc_weighting["param"]
|
614
655
|
elif tc_weighting["func"] == "exponential":
|
@@ -621,6 +662,7 @@ class InteractionMatrix:
|
|
621
662
|
supply_locations = self.supply_locations
|
622
663
|
supply_locations_metadata = supply_locations.get_metadata()
|
623
664
|
attraction_weighting = supply_locations_metadata["weighting"][0]
|
665
|
+
|
624
666
|
if attraction_weighting["func"] == "power":
|
625
667
|
interaction_matrix_df["A_j_weighted"] = interaction_matrix_df["A_j"] ** attraction_weighting["param"]
|
626
668
|
elif tc_weighting["func"] == "exponential":
|
@@ -630,10 +672,15 @@ class InteractionMatrix:
|
|
630
672
|
|
631
673
|
interaction_matrix_df["U_ij"] = interaction_matrix_df["A_j_weighted"]*interaction_matrix_df["t_ij_weighted"]
|
632
674
|
|
633
|
-
interaction_matrix_df = interaction_matrix_df.drop(columns=[
|
675
|
+
interaction_matrix_df = interaction_matrix_df.drop(columns=["A_j_weighted", "t_ij_weighted"])
|
634
676
|
|
635
|
-
|
677
|
+
interaction_matrix_metadata["model"] = {
|
678
|
+
"model_type": "Huff"
|
679
|
+
}
|
636
680
|
|
681
|
+
self.interaction_matrix_df = interaction_matrix_df
|
682
|
+
self.metadata = interaction_matrix_metadata
|
683
|
+
|
637
684
|
return self
|
638
685
|
|
639
686
|
def probabilities (self):
|
@@ -775,6 +822,8 @@ class InteractionMatrix:
|
|
775
822
|
|
776
823
|
interaction_matrix_df = self.get_interaction_matrix_df()
|
777
824
|
|
825
|
+
interaction_matrix_metadata = self.get_metadata()
|
826
|
+
|
778
827
|
cols_t = [col + "__LCT" for col in cols]
|
779
828
|
|
780
829
|
if "p_ij__LCT" not in interaction_matrix_df.columns:
|
@@ -829,7 +878,8 @@ class InteractionMatrix:
|
|
829
878
|
interaction_matrix = InteractionMatrix(
|
830
879
|
interaction_matrix_df,
|
831
880
|
customer_origins,
|
832
|
-
supply_locations
|
881
|
+
supply_locations,
|
882
|
+
metadata=interaction_matrix_metadata
|
833
883
|
)
|
834
884
|
|
835
885
|
mci_model = MCIModel(
|
@@ -841,6 +891,249 @@ class InteractionMatrix:
|
|
841
891
|
|
842
892
|
return mci_model
|
843
893
|
|
894
|
+
def huff_loglik(
|
895
|
+
self,
|
896
|
+
params
|
897
|
+
):
|
898
|
+
|
899
|
+
if not isinstance(params, list):
|
900
|
+
if isinstance(params, np.ndarray):
|
901
|
+
params = params.tolist()
|
902
|
+
else:
|
903
|
+
raise ValueError("Parameter 'params' must be a list or np.ndarray with two or three parameter values")
|
904
|
+
|
905
|
+
if len(params) == 2:
|
906
|
+
param_gamma, param_lambda = params
|
907
|
+
elif len(params) == 3:
|
908
|
+
param_gamma, param_lambda, param_lambda2 = params
|
909
|
+
else:
|
910
|
+
raise ValueError("Parameter 'params' must be a list with two or three parameter values")
|
911
|
+
|
912
|
+
interaction_matrix_df = self.interaction_matrix_df
|
913
|
+
|
914
|
+
supply_locations = self.supply_locations
|
915
|
+
supply_locations_metadata = supply_locations.get_metadata()
|
916
|
+
|
917
|
+
customer_origins = self.customer_origins
|
918
|
+
customer_origins_metadata = customer_origins.get_metadata()
|
919
|
+
|
920
|
+
supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
|
921
|
+
supply_locations.metadata = supply_locations_metadata
|
922
|
+
|
923
|
+
if customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
|
924
|
+
|
925
|
+
if len(params) == 2:
|
926
|
+
customer_origins_metadata["weighting"][0]["param"] = float(param_lambda)
|
927
|
+
else:
|
928
|
+
raise ValueError ("Huff Model with transport cost weightig of type " + customer_origins_metadata["weighting"][0]["func"] + " must have two input parameters")
|
929
|
+
|
930
|
+
elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
931
|
+
|
932
|
+
if len(params) == 3:
|
933
|
+
customer_origins_metadata["weighting"][0]["param"] = [float(param_lambda), float(param_lambda2)]
|
934
|
+
else:
|
935
|
+
raise ValueError("Huff Model with transport cost weightig of type " + customer_origins_metadata["weighting"][0]["func"] + " must have three input parameters")
|
936
|
+
|
937
|
+
customer_origins.metadata = customer_origins_metadata
|
938
|
+
|
939
|
+
p_ij_emp = interaction_matrix_df["p_ij"]
|
940
|
+
|
941
|
+
interaction_matrix_copy = copy.deepcopy(self)
|
942
|
+
|
943
|
+
interaction_matrix_copy.utility()
|
944
|
+
interaction_matrix_copy.probabilities()
|
945
|
+
|
946
|
+
interaction_matrix_df_copy = interaction_matrix_copy.get_interaction_matrix_df()
|
947
|
+
p_ij = interaction_matrix_df_copy["p_ij"]
|
948
|
+
|
949
|
+
LL = loglik(
|
950
|
+
observed = p_ij_emp,
|
951
|
+
expected = p_ij
|
952
|
+
)
|
953
|
+
|
954
|
+
return -LL
|
955
|
+
|
956
|
+
def ml_fit(
|
957
|
+
self,
|
958
|
+
initial_params: list = [1.0, -2.0],
|
959
|
+
method: str = "L-BFGS-B",
|
960
|
+
bounds: list = [(0.5, 1), (-3, -1)],
|
961
|
+
constraints: list = [],
|
962
|
+
update_estimates: bool = True
|
963
|
+
):
|
964
|
+
|
965
|
+
supply_locations = self.supply_locations
|
966
|
+
supply_locations_metadata = supply_locations.get_metadata()
|
967
|
+
|
968
|
+
customer_origins = self.customer_origins
|
969
|
+
customer_origins_metadata = customer_origins.get_metadata()
|
970
|
+
|
971
|
+
if len(initial_params) > 3 or len(initial_params) < 2:
|
972
|
+
raise ValueError("Parameter 'initial_params' must be a list with two or three entries")
|
973
|
+
|
974
|
+
if len(bounds) != len(initial_params):
|
975
|
+
raise ValueError("Parameter 'bounds' must have the same length as parameter 'initial_params' (" + str(len(bounds)) + ", " + str(len(initial_params)) + ")")
|
976
|
+
|
977
|
+
ml_result = minimize(
|
978
|
+
self.huff_loglik,
|
979
|
+
initial_params,
|
980
|
+
method = method,
|
981
|
+
bounds = bounds,
|
982
|
+
constraints = constraints,
|
983
|
+
options={'disp': 3}
|
984
|
+
)
|
985
|
+
|
986
|
+
if ml_result.success:
|
987
|
+
|
988
|
+
fitted_params = ml_result.x
|
989
|
+
|
990
|
+
if len(initial_params) == 2:
|
991
|
+
|
992
|
+
param_gamma = fitted_params[0]
|
993
|
+
param_lambda = fitted_params[1]
|
994
|
+
param_results = [
|
995
|
+
float(param_gamma),
|
996
|
+
float(param_lambda)
|
997
|
+
]
|
998
|
+
|
999
|
+
supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
|
1000
|
+
customer_origins_metadata["weighting"][0]["param"] = float(param_lambda)
|
1001
|
+
|
1002
|
+
elif len (initial_params) == 3:
|
1003
|
+
|
1004
|
+
param_gamma = fitted_params[0]
|
1005
|
+
param_lambda = fitted_params[1]
|
1006
|
+
param_lambda2 = fitted_params[2]
|
1007
|
+
param_results = [
|
1008
|
+
float(param_gamma),
|
1009
|
+
float(param_lambda),
|
1010
|
+
float(param_lambda2)
|
1011
|
+
]
|
1012
|
+
|
1013
|
+
supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
|
1014
|
+
customer_origins_metadata["weighting"][0]["param"][0] = float(param_lambda)
|
1015
|
+
customer_origins_metadata["weighting"][0]["param"][1] = float(param_lambda2)
|
1016
|
+
|
1017
|
+
print(f"Optimization via {method} algorithm succeeded with parameters: {', '.join(str(round(par, 3)) for par in param_results)}.")
|
1018
|
+
|
1019
|
+
else:
|
1020
|
+
|
1021
|
+
param_gamma = None
|
1022
|
+
param_lambda = None
|
1023
|
+
|
1024
|
+
supply_locations_metadata["weighting"][0]["param"] = param_gamma
|
1025
|
+
|
1026
|
+
if len(initial_params) == 3:
|
1027
|
+
|
1028
|
+
param_lambda2 = None
|
1029
|
+
customer_origins_metadata["weighting"][0]["param"][0] = param_lambda
|
1030
|
+
customer_origins_metadata["weighting"][0]["param"][1] = param_lambda2
|
1031
|
+
|
1032
|
+
else:
|
1033
|
+
customer_origins_metadata["weighting"][0]["param"] = param_lambda
|
1034
|
+
|
1035
|
+
print(f"Optimiziation via {method} algorithm failed with error message: '{ml_result.message}'. See https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html for all available algorithms.")
|
1036
|
+
|
1037
|
+
self.supply_locations.metadata = supply_locations_metadata
|
1038
|
+
self.customer_origins.metadata = customer_origins_metadata
|
1039
|
+
|
1040
|
+
if ml_result.success and update_estimates:
|
1041
|
+
|
1042
|
+
self.interaction_matrix_df["p_ij_emp"] = self.interaction_matrix_df["p_ij"]
|
1043
|
+
self = self.utility()
|
1044
|
+
self = self.probabilities()
|
1045
|
+
self = self.flows()
|
1046
|
+
|
1047
|
+
return self
|
1048
|
+
|
1049
|
+
|
1050
|
+
def update(self):
|
1051
|
+
|
1052
|
+
interaction_matrix_df = self.get_interaction_matrix_df()
|
1053
|
+
|
1054
|
+
interaction_matrix_metadata = self.get_metadata()
|
1055
|
+
|
1056
|
+
customer_origins = self.get_customer_origins()
|
1057
|
+
|
1058
|
+
supply_locations = self.get_supply_locations()
|
1059
|
+
|
1060
|
+
supply_locations_geodata_gpd = supply_locations.get_geodata_gpd().copy()
|
1061
|
+
supply_locations_geodata_gpd_new = supply_locations_geodata_gpd[supply_locations_geodata_gpd["j_update"] == 1]
|
1062
|
+
|
1063
|
+
if len(supply_locations_geodata_gpd_new) < 1:
|
1064
|
+
raise ValueError("There are no new destinations for an interaction matrix update. Use SupplyLocations.add_new_destinations()")
|
1065
|
+
|
1066
|
+
supply_locations_geodata_gpd_original = supply_locations.get_geodata_gpd_original().copy()
|
1067
|
+
supply_locations_geodata_gpd_original_new = supply_locations_geodata_gpd_original[supply_locations_geodata_gpd_original["j_update"] == 1]
|
1068
|
+
if len(supply_locations_geodata_gpd_original_new) < 1:
|
1069
|
+
raise ValueError("There are no new destinations for an interaction matrix update. Use SupplyLocations.add_new_destinations()")
|
1070
|
+
|
1071
|
+
supply_locations_new = SupplyLocations(
|
1072
|
+
geodata_gpd=supply_locations_geodata_gpd_new,
|
1073
|
+
geodata_gpd_original=supply_locations_geodata_gpd_original_new,
|
1074
|
+
metadata=supply_locations.metadata,
|
1075
|
+
isochrones_gdf=supply_locations.isochrones_gdf,
|
1076
|
+
buffers_gdf=supply_locations.buffers_gdf
|
1077
|
+
)
|
1078
|
+
|
1079
|
+
interaction_matrix_new = create_interaction_matrix(
|
1080
|
+
customer_origins=customer_origins,
|
1081
|
+
supply_locations=supply_locations_new
|
1082
|
+
)
|
1083
|
+
|
1084
|
+
interaction_matrix_new_df = interaction_matrix_new.get_interaction_matrix_df()
|
1085
|
+
|
1086
|
+
if "transport_costs" not in interaction_matrix_metadata:
|
1087
|
+
|
1088
|
+
print("New destination(s) included. No transport costs calculation because not defined in original interaction matrix.")
|
1089
|
+
|
1090
|
+
interaction_matrix_df = pd.concat(
|
1091
|
+
[
|
1092
|
+
interaction_matrix_df,
|
1093
|
+
interaction_matrix_new_df
|
1094
|
+
],
|
1095
|
+
ignore_index=True
|
1096
|
+
)
|
1097
|
+
|
1098
|
+
interaction_matrix_df = interaction_matrix_df.sort_values(by = "ij")
|
1099
|
+
|
1100
|
+
self.interaction_matrix_df = interaction_matrix_df
|
1101
|
+
|
1102
|
+
else:
|
1103
|
+
|
1104
|
+
network = interaction_matrix_metadata["transport_costs"]["network"]
|
1105
|
+
range_type = interaction_matrix_metadata["transport_costs"]["range_type"]
|
1106
|
+
time_unit = interaction_matrix_metadata["transport_costs"]["time_unit"]
|
1107
|
+
distance_unit = interaction_matrix_metadata["transport_costs"]["distance_unit"]
|
1108
|
+
ors_server = interaction_matrix_metadata["transport_costs"]["ors_server"]
|
1109
|
+
ors_auth = interaction_matrix_metadata["transport_costs"]["ors_auth"]
|
1110
|
+
|
1111
|
+
interaction_matrix_new.transport_costs(
|
1112
|
+
network=network,
|
1113
|
+
range_type=range_type,
|
1114
|
+
time_unit=time_unit,
|
1115
|
+
distance_unit=distance_unit,
|
1116
|
+
ors_server=ors_server,
|
1117
|
+
ors_auth=ors_auth
|
1118
|
+
)
|
1119
|
+
|
1120
|
+
interaction_matrix_df = pd.concat(
|
1121
|
+
[
|
1122
|
+
interaction_matrix_df,
|
1123
|
+
interaction_matrix_new_df
|
1124
|
+
],
|
1125
|
+
ignore_index=True
|
1126
|
+
)
|
1127
|
+
|
1128
|
+
interaction_matrix_df = interaction_matrix_df.sort_values(by = "ij")
|
1129
|
+
|
1130
|
+
self.interaction_matrix_df = interaction_matrix_df
|
1131
|
+
|
1132
|
+
self.utility()
|
1133
|
+
self.probabilities()
|
1134
|
+
self.flows()
|
1135
|
+
|
1136
|
+
return self
|
844
1137
|
|
845
1138
|
class HuffModel:
|
846
1139
|
|
@@ -904,16 +1197,45 @@ class HuffModel:
|
|
904
1197
|
if supply_locations_metadata["weighting"][0]["func"] is None and supply_locations_metadata["weighting"][0]["param"] is None:
|
905
1198
|
print("Attraction not defined")
|
906
1199
|
else:
|
907
|
-
print("Attraction " + str(supply_locations_metadata["weighting"][0]["param"]) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
|
1200
|
+
print("Attraction " + str(round(supply_locations_metadata["weighting"][0]["param"], 3)) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
|
908
1201
|
|
909
1202
|
if customer_origins_metadata["weighting"][0]["func"] is None and customer_origins_metadata["weighting"][0]["param"] is None:
|
910
1203
|
print("Transport costs not defined")
|
911
1204
|
elif customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
|
912
|
-
print("Transport costs " + str(customer_origins_metadata["weighting"][0]["param"]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
1205
|
+
print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
913
1206
|
elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
914
|
-
print("Transport costs " + str(customer_origins_metadata["weighting"][0]["param"][0]) + ", " + str(customer_origins_metadata["weighting"][0]["param"][1]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
1207
|
+
print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"][0],3)) + ", " + str(round(customer_origins_metadata["weighting"][0]["param"][1],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
915
1208
|
|
916
1209
|
print("----------------------------------")
|
1210
|
+
|
1211
|
+
huff_modelfit = self.modelfit()
|
1212
|
+
if huff_modelfit is not None:
|
1213
|
+
|
1214
|
+
print ("Goodness-of-fit for probabilities")
|
1215
|
+
|
1216
|
+
print("Sum of squared residuals ", round(huff_modelfit[1]["SQR"], 2))
|
1217
|
+
print("Sum of squares ", round(huff_modelfit[1]["SQT"], 2))
|
1218
|
+
print("R-squared ", round(huff_modelfit[1]["Rsq"], 2))
|
1219
|
+
print("Mean squared error ", round(huff_modelfit[1]["MSE"], 2))
|
1220
|
+
print("Root mean squared error ", round(huff_modelfit[1]["RMSE"], 2))
|
1221
|
+
print("Mean absolute error ", round(huff_modelfit[1]["MAE"], 2))
|
1222
|
+
print("Mean absolute percentage error ", round(huff_modelfit[1]["MAPE"], 2))
|
1223
|
+
print("Absolute percentage errors")
|
1224
|
+
|
1225
|
+
APE_list = [
|
1226
|
+
["< 5 % ", round(huff_modelfit[1]["APE"]["resid_below5"], 2), " < 30 % ", round(huff_modelfit[1]["APE"]["resid_below30"], 2)],
|
1227
|
+
["< 10 % ", round(huff_modelfit[1]["APE"]["resid_below10"], 2), " < 35 % ", round(huff_modelfit[1]["APE"]["resid_below35"], 2)],
|
1228
|
+
["< 15 % ", round(huff_modelfit[1]["APE"]["resid_below15"], 2), " < 40 % ", round(huff_modelfit[1]["APE"]["resid_below40"], 2)],
|
1229
|
+
["< 20 % ", round(huff_modelfit[1]["APE"]["resid_below20"], 2), " < 45 % ", round(huff_modelfit[1]["APE"]["resid_below45"], 2)],
|
1230
|
+
["< 25% ", round(huff_modelfit[1]["APE"]["resid_below25"], 2), " < 50 % ", round(huff_modelfit[1]["APE"]["resid_below50"], 2)]
|
1231
|
+
]
|
1232
|
+
APE_df = pd.DataFrame(
|
1233
|
+
APE_list,
|
1234
|
+
columns=["Resid.", "%", "Resid.", "%"]
|
1235
|
+
)
|
1236
|
+
print(APE_df.to_string(index=False))
|
1237
|
+
|
1238
|
+
print("----------------------------------")
|
917
1239
|
|
918
1240
|
def mci_fit(
|
919
1241
|
self,
|
@@ -923,7 +1245,8 @@ class HuffModel:
|
|
923
1245
|
|
924
1246
|
interaction_matrix = self.interaction_matrix
|
925
1247
|
interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
|
926
|
-
|
1248
|
+
interaction_matrix_metadata = interaction_matrix.get_metadata()
|
1249
|
+
|
927
1250
|
supply_locations = interaction_matrix.get_supply_locations()
|
928
1251
|
supply_locations_metadata = supply_locations.get_metadata()
|
929
1252
|
|
@@ -982,7 +1305,8 @@ class HuffModel:
|
|
982
1305
|
interaction_matrix = InteractionMatrix(
|
983
1306
|
interaction_matrix_df,
|
984
1307
|
customer_origins,
|
985
|
-
supply_locations
|
1308
|
+
supply_locations,
|
1309
|
+
metadata=interaction_matrix_metadata
|
986
1310
|
)
|
987
1311
|
|
988
1312
|
mci_model = MCIModel(
|
@@ -994,132 +1318,39 @@ class HuffModel:
|
|
994
1318
|
|
995
1319
|
return mci_model
|
996
1320
|
|
997
|
-
def
|
998
|
-
self,
|
999
|
-
params
|
1000
|
-
):
|
1001
|
-
|
1002
|
-
if not isinstance(params, list):
|
1003
|
-
if isinstance(params, np.ndarray):
|
1004
|
-
params = params.tolist()
|
1005
|
-
else:
|
1006
|
-
raise ValueError("Parameter 'params' must be a list or np.ndarray with two or three parameter values")
|
1321
|
+
def update(self):
|
1007
1322
|
|
1008
|
-
|
1009
|
-
param_gamma, param_lambda = params
|
1010
|
-
elif len(params) == 3:
|
1011
|
-
param_gamma, param_lambda, param_lambda2 = params
|
1012
|
-
else:
|
1013
|
-
raise ValueError("Parameter 'params' must be a list with two or three parameter values")
|
1014
|
-
|
1015
|
-
interaction_matrix = self.interaction_matrix
|
1016
|
-
interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
|
1323
|
+
self.interaction_matrix = self.interaction_matrix.update()
|
1017
1324
|
|
1018
|
-
|
1019
|
-
supply_locations_metadata = supply_locations.get_metadata()
|
1020
|
-
|
1021
|
-
customer_origins = interaction_matrix.get_customer_origins()
|
1022
|
-
customer_origins_metadata = customer_origins.get_metadata()
|
1023
|
-
|
1024
|
-
supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
|
1025
|
-
supply_locations.metadata = supply_locations_metadata
|
1026
|
-
|
1027
|
-
if customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
|
1028
|
-
|
1029
|
-
if len(params) == 2:
|
1030
|
-
customer_origins_metadata["weighting"][0]["param"] = float(param_lambda)
|
1031
|
-
else:
|
1032
|
-
raise ValueError ("Huff Model with transport cost weightig of type " + customer_origins_metadata["weighting"]["func"] + " must have two input parameters")
|
1033
|
-
|
1034
|
-
elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
1035
|
-
|
1036
|
-
if len(params) == 3:
|
1037
|
-
customer_origins_metadata["weighting"][0]["param"] = [float(param_lambda), float(param_lambda2)]
|
1038
|
-
else:
|
1039
|
-
raise ValueError("Huff Model with transport cost weightig of type " + customer_origins_metadata["weighting"]["func"] + " must have three input parameters")
|
1040
|
-
|
1041
|
-
customer_origins.metadata = customer_origins_metadata
|
1042
|
-
|
1043
|
-
interaction_matrix = self.interaction_matrix
|
1044
|
-
interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
|
1045
|
-
|
1046
|
-
p_ij_emp = interaction_matrix_df["p_ij"]
|
1047
|
-
|
1048
|
-
interaction_matrix_copy = copy.deepcopy(interaction_matrix)
|
1049
|
-
|
1050
|
-
interaction_matrix_copy.utility()
|
1051
|
-
interaction_matrix_copy.probabilities()
|
1052
|
-
|
1053
|
-
interaction_matrix_df_copy = interaction_matrix_copy.get_interaction_matrix_df()
|
1054
|
-
p_ij = interaction_matrix_df_copy["p_ij"]
|
1055
|
-
|
1056
|
-
LL = loglik(
|
1057
|
-
observed = p_ij_emp,
|
1058
|
-
expected = p_ij
|
1059
|
-
)
|
1325
|
+
self.market_areas_df = self.interaction_matrix.marketareas().get_market_areas_df()
|
1060
1326
|
|
1061
|
-
return
|
1327
|
+
return self
|
1062
1328
|
|
1063
|
-
def
|
1064
|
-
self,
|
1065
|
-
initial_params = [1.0, -2.0],
|
1066
|
-
bounds = [(0.5, 1), (-3, -1)],
|
1067
|
-
method = "L-BFGS-B"
|
1068
|
-
):
|
1069
|
-
|
1070
|
-
if len(initial_params) > 3 or len(initial_params) < 2:
|
1071
|
-
raise ValueError("Parameter 'initial_params' must be a list with two or three entries")
|
1329
|
+
def modelfit(self):
|
1072
1330
|
|
1073
|
-
|
1074
|
-
|
1331
|
+
interaction_matrix = self.interaction_matrix
|
1332
|
+
interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
|
1075
1333
|
|
1076
|
-
|
1077
|
-
self.huff_loglik,
|
1078
|
-
initial_params,
|
1079
|
-
method = method,
|
1080
|
-
bounds = bounds,
|
1081
|
-
options={'disp': 3}
|
1082
|
-
)
|
1083
|
-
|
1084
|
-
if ml_result.success:
|
1085
|
-
|
1086
|
-
fitted_params = ml_result.x
|
1334
|
+
if ("p_ij" in interaction_matrix_df.columns and "p_ij_emp" in interaction_matrix_df.columns):
|
1087
1335
|
|
1088
|
-
|
1336
|
+
try:
|
1089
1337
|
|
1090
|
-
|
1091
|
-
|
1092
|
-
|
1093
|
-
|
1094
|
-
float(param_lambda)
|
1095
|
-
]
|
1338
|
+
huff_modelfit = modelfit(
|
1339
|
+
interaction_matrix_df["p_ij_emp"],
|
1340
|
+
interaction_matrix_df["p_ij"]
|
1341
|
+
)
|
1096
1342
|
|
1097
|
-
|
1098
|
-
|
1099
|
-
|
1100
|
-
param_lambda = fitted_params[1]
|
1101
|
-
param_lambda2 = fitted_params[2]
|
1102
|
-
param_results = [
|
1103
|
-
float(param_gamma),
|
1104
|
-
float(param_lambda),
|
1105
|
-
float(param_lambda2)
|
1106
|
-
]
|
1343
|
+
return huff_modelfit
|
1344
|
+
|
1345
|
+
except:
|
1107
1346
|
|
1347
|
+
print("Goodness-of-fit metrics could not be calculated due to NaN values.")
|
1348
|
+
return None
|
1349
|
+
|
1108
1350
|
else:
|
1109
|
-
|
1110
|
-
param_gamma = None
|
1111
|
-
param_lambda = None
|
1112
|
-
param_results = [param_gamma, param_lambda]
|
1113
1351
|
|
1114
|
-
|
1115
|
-
|
1116
|
-
param_results.append(param_lambda2)
|
1117
|
-
|
1118
|
-
print(f"Optimiziation via {method} algorithm failed with error message: '{ml_result.message}'. See https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html for all available algorithms.")
|
1119
|
-
|
1120
|
-
return param_results
|
1121
|
-
|
1122
|
-
|
1352
|
+
return None
|
1353
|
+
|
1123
1354
|
class MCIModel:
|
1124
1355
|
|
1125
1356
|
def __init__(
|
@@ -1175,12 +1406,19 @@ class MCIModel:
|
|
1175
1406
|
|
1176
1407
|
if ("p_ij" in interaction_matrix_df.columns and "p_ij_emp" in interaction_matrix_df.columns):
|
1177
1408
|
|
1178
|
-
|
1179
|
-
interaction_matrix_df["p_ij_emp"],
|
1180
|
-
interaction_matrix_df["p_ij"]
|
1181
|
-
)
|
1409
|
+
try:
|
1182
1410
|
|
1183
|
-
|
1411
|
+
mci_modelfit = modelfit(
|
1412
|
+
interaction_matrix_df["p_ij_emp"],
|
1413
|
+
interaction_matrix_df["p_ij"]
|
1414
|
+
)
|
1415
|
+
|
1416
|
+
return mci_modelfit
|
1417
|
+
|
1418
|
+
except:
|
1419
|
+
|
1420
|
+
print("Goodness-of-fit metrics could not be calculated due to NaN values.")
|
1421
|
+
return None
|
1184
1422
|
|
1185
1423
|
else:
|
1186
1424
|
|
@@ -1226,7 +1464,7 @@ class MCIModel:
|
|
1226
1464
|
mci_modelfit = self.modelfit()
|
1227
1465
|
if mci_modelfit is not None:
|
1228
1466
|
|
1229
|
-
print ("Goodness-of-fit
|
1467
|
+
print ("Goodness-of-fit for probabilities")
|
1230
1468
|
|
1231
1469
|
print("Sum of squared residuals ", round(mci_modelfit[1]["SQR"], 2))
|
1232
1470
|
print("Sum of squares ", round(mci_modelfit[1]["SQT"], 2))
|
@@ -1235,12 +1473,20 @@ class MCIModel:
|
|
1235
1473
|
print("Root mean squared error ", round(mci_modelfit[1]["RMSE"], 2))
|
1236
1474
|
print("Mean absolute error ", round(mci_modelfit[1]["MAE"], 2))
|
1237
1475
|
print("Mean absolute percentage error ", round(mci_modelfit[1]["MAPE"], 2))
|
1476
|
+
|
1238
1477
|
print("Absolute percentage errors")
|
1239
|
-
|
1240
|
-
|
1241
|
-
|
1242
|
-
|
1243
|
-
|
1478
|
+
APE_list = [
|
1479
|
+
["< 5 % ", round(mci_modelfit[1]["APE"]["resid_below5"], 2), " < 30 % ", round(mci_modelfit[1]["APE"]["resid_below30"], 2)],
|
1480
|
+
["< 10 % ", round(mci_modelfit[1]["APE"]["resid_below10"], 2), " < 35 % ", round(mci_modelfit[1]["APE"]["resid_below35"], 2)],
|
1481
|
+
["< 15 % ", round(mci_modelfit[1]["APE"]["resid_below15"], 2), " < 40 % ", round(mci_modelfit[1]["APE"]["resid_below40"], 2)],
|
1482
|
+
["< 20 % ", round(mci_modelfit[1]["APE"]["resid_below20"], 2), " < 45 % ", round(mci_modelfit[1]["APE"]["resid_below45"], 2)],
|
1483
|
+
["< 25% ", round(mci_modelfit[1]["APE"]["resid_below25"], 2), " < 50 % ", round(mci_modelfit[1]["APE"]["resid_below50"], 2)]
|
1484
|
+
]
|
1485
|
+
APE_df = pd.DataFrame(
|
1486
|
+
APE_list,
|
1487
|
+
columns=["Resid.", "%", "Resid.", "%"]
|
1488
|
+
)
|
1489
|
+
print(APE_df.to_string(index=False))
|
1244
1490
|
|
1245
1491
|
print("--------------------------------------------")
|
1246
1492
|
|
@@ -1251,6 +1497,7 @@ class MCIModel:
|
|
1251
1497
|
|
1252
1498
|
interaction_matrix = self.interaction_matrix
|
1253
1499
|
interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
|
1500
|
+
interaction_matrix_metadata = interaction_matrix.get_metadata()
|
1254
1501
|
|
1255
1502
|
if interaction_matrix_df["t_ij"].isna().all():
|
1256
1503
|
raise ValueError ("Transport cost variable is not defined")
|
@@ -1289,10 +1536,16 @@ class MCIModel:
|
|
1289
1536
|
if transformation == "ILCT":
|
1290
1537
|
interaction_matrix_df["U_ij"] = np.exp(interaction_matrix_df["U_ij"])
|
1291
1538
|
|
1539
|
+
interaction_matrix_metadata["model"] = {
|
1540
|
+
"model_type": "MCI",
|
1541
|
+
"transformation": transformation
|
1542
|
+
}
|
1543
|
+
|
1292
1544
|
interaction_matrix = InteractionMatrix(
|
1293
1545
|
interaction_matrix_df,
|
1294
1546
|
customer_origins,
|
1295
|
-
supply_locations
|
1547
|
+
supply_locations,
|
1548
|
+
metadata=interaction_matrix_metadata
|
1296
1549
|
)
|
1297
1550
|
self.interaction_matrix = interaction_matrix
|
1298
1551
|
|
@@ -1459,7 +1712,7 @@ def load_geodata (
|
|
1459
1712
|
|
1460
1713
|
geodata_gpd = geodata_gpd_original.to_crs(crs_output)
|
1461
1714
|
geodata_gpd = geodata_gpd[[unique_id, "geometry"]]
|
1462
|
-
|
1715
|
+
|
1463
1716
|
metadata = {
|
1464
1717
|
"location_type": location_type,
|
1465
1718
|
"unique_id": unique_id,
|
@@ -1477,14 +1730,20 @@ def load_geodata (
|
|
1477
1730
|
}
|
1478
1731
|
|
1479
1732
|
if location_type == "origins":
|
1733
|
+
|
1480
1734
|
geodata_object = CustomerOrigins(
|
1481
1735
|
geodata_gpd,
|
1482
1736
|
geodata_gpd_original,
|
1483
1737
|
metadata,
|
1484
1738
|
None,
|
1485
1739
|
None
|
1486
|
-
)
|
1740
|
+
)
|
1741
|
+
|
1487
1742
|
elif location_type == "destinations":
|
1743
|
+
|
1744
|
+
geodata_gpd["j_update"] = 0
|
1745
|
+
geodata_gpd_original["j_update"] = 0
|
1746
|
+
|
1488
1747
|
geodata_object = SupplyLocations(
|
1489
1748
|
geodata_gpd,
|
1490
1749
|
geodata_gpd_original,
|
@@ -1563,10 +1822,13 @@ def create_interaction_matrix(
|
|
1563
1822
|
interaction_matrix_df["p_ij"] = None
|
1564
1823
|
interaction_matrix_df["E_ij"] = None
|
1565
1824
|
|
1825
|
+
metadata = {}
|
1826
|
+
|
1566
1827
|
interaction_matrix = InteractionMatrix(
|
1567
1828
|
interaction_matrix_df,
|
1568
1829
|
customer_origins,
|
1569
|
-
supply_locations
|
1830
|
+
supply_locations,
|
1831
|
+
metadata
|
1570
1832
|
)
|
1571
1833
|
|
1572
1834
|
return interaction_matrix
|
@@ -1577,6 +1839,7 @@ def load_interaction_matrix(
|
|
1577
1839
|
supply_locations_col: str,
|
1578
1840
|
attraction_col: list,
|
1579
1841
|
transport_costs_col: str,
|
1842
|
+
flows_col: str = None,
|
1580
1843
|
probabilities_col: str = None,
|
1581
1844
|
market_size_col: str = None,
|
1582
1845
|
customer_origins_coords_col = None,
|
@@ -1614,6 +1877,8 @@ def load_interaction_matrix(
|
|
1614
1877
|
raise KeyError ("Column " + supply_locations_col + " not in data")
|
1615
1878
|
|
1616
1879
|
cols_check = attraction_col + [transport_costs_col]
|
1880
|
+
if flows_col is not None:
|
1881
|
+
cols_check = cols_check + [flows_col]
|
1617
1882
|
if probabilities_col is not None:
|
1618
1883
|
cols_check = cols_check + [probabilities_col]
|
1619
1884
|
if market_size_col is not None:
|
@@ -1770,6 +2035,13 @@ def load_interaction_matrix(
|
|
1770
2035
|
}
|
1771
2036
|
)
|
1772
2037
|
|
2038
|
+
if flows_col is not None:
|
2039
|
+
interaction_matrix_df = interaction_matrix_df.rename(
|
2040
|
+
columns = {
|
2041
|
+
flows_col: "E_ij"
|
2042
|
+
}
|
2043
|
+
)
|
2044
|
+
|
1773
2045
|
if probabilities_col is not None:
|
1774
2046
|
interaction_matrix_df = interaction_matrix_df.rename(
|
1775
2047
|
columns = {
|
@@ -1783,15 +2055,68 @@ def load_interaction_matrix(
|
|
1783
2055
|
market_size_col: "C_i"
|
1784
2056
|
}
|
1785
2057
|
)
|
1786
|
-
|
2058
|
+
|
2059
|
+
metadata = {}
|
2060
|
+
|
1787
2061
|
interaction_matrix = InteractionMatrix(
|
1788
2062
|
interaction_matrix_df=interaction_matrix_df,
|
1789
2063
|
customer_origins=customer_origins,
|
1790
|
-
supply_locations=supply_locations
|
2064
|
+
supply_locations=supply_locations,
|
2065
|
+
metadata=metadata
|
1791
2066
|
)
|
1792
2067
|
|
1793
2068
|
return interaction_matrix
|
1794
2069
|
|
2070
|
+
def market_shares(
|
2071
|
+
df: pd.DataFrame,
|
2072
|
+
turnover_col: str,
|
2073
|
+
ref_col: str = None,
|
2074
|
+
marketshares_col: str = "p_ij"
|
2075
|
+
):
|
2076
|
+
|
2077
|
+
check_vars(
|
2078
|
+
df = df,
|
2079
|
+
cols = [turnover_col]
|
2080
|
+
)
|
2081
|
+
|
2082
|
+
if ref_col is not None:
|
2083
|
+
|
2084
|
+
if ref_col not in df.columns:
|
2085
|
+
raise KeyError(f"Column '{ref_col}' not in dataframe.")
|
2086
|
+
|
2087
|
+
ms_refcol = pd.DataFrame(df.groupby(ref_col)[turnover_col].sum())
|
2088
|
+
ms_refcol = ms_refcol.rename(columns = {turnover_col: "total"})
|
2089
|
+
ms_refcol = ms_refcol.reset_index()
|
2090
|
+
|
2091
|
+
df = df.merge(
|
2092
|
+
ms_refcol,
|
2093
|
+
how = "left",
|
2094
|
+
left_on = ref_col,
|
2095
|
+
right_on= ref_col
|
2096
|
+
)
|
2097
|
+
|
2098
|
+
else:
|
2099
|
+
|
2100
|
+
ms_norefcol = pd.DataFrame([df[turnover_col].sum()], columns=["total"])
|
2101
|
+
ms_norefcol = ms_norefcol.reset_index()
|
2102
|
+
|
2103
|
+
df["key_temp"] = 1
|
2104
|
+
ms_norefcol["key_temp"] = 1
|
2105
|
+
df = pd.merge(
|
2106
|
+
df,
|
2107
|
+
ms_norefcol,
|
2108
|
+
on="key_temp"
|
2109
|
+
).drop(
|
2110
|
+
"key_temp",
|
2111
|
+
axis=1
|
2112
|
+
)
|
2113
|
+
|
2114
|
+
df[marketshares_col] = df[turnover_col]/df["total"]
|
2115
|
+
|
2116
|
+
df = df.drop(columns="total")
|
2117
|
+
|
2118
|
+
return df
|
2119
|
+
|
1795
2120
|
def log_centering_transformation(
|
1796
2121
|
df: pd.DataFrame,
|
1797
2122
|
ref_col: str,
|
@@ -1822,12 +2147,18 @@ def log_centering_transformation(
|
|
1822
2147
|
print ("Column " + str(var) + " is a dummy variable and requires/allows no log-centering transformation")
|
1823
2148
|
continue
|
1824
2149
|
|
2150
|
+
if (df[var] <= 0).any():
|
2151
|
+
df[var+suffix] = float("nan")
|
2152
|
+
print ("Column " + str(var) + " contains values <= 0. No log-centering transformation possible.")
|
2153
|
+
continue
|
2154
|
+
|
1825
2155
|
var_t = df.groupby(ref_col)[var].apply(lct)
|
1826
2156
|
var_t = var_t.reset_index()
|
1827
2157
|
df[var+suffix] = var_t[var]
|
1828
2158
|
|
1829
2159
|
return df
|
1830
2160
|
|
2161
|
+
|
1831
2162
|
def get_isochrones(
|
1832
2163
|
geodata_gpd: gp.GeoDataFrame,
|
1833
2164
|
unique_id_col: str,
|
@@ -1918,7 +2249,8 @@ def get_isochrones(
|
|
1918
2249
|
|
1919
2250
|
def modelfit(
|
1920
2251
|
observed,
|
1921
|
-
expected
|
2252
|
+
expected,
|
2253
|
+
remove_nan: bool = True
|
1922
2254
|
):
|
1923
2255
|
|
1924
2256
|
observed_no = len(observed)
|
@@ -1933,7 +2265,28 @@ def modelfit(
|
|
1933
2265
|
if not isinstance(expected, np.number):
|
1934
2266
|
if not is_numeric_dtype(expected):
|
1935
2267
|
raise ValueError("Expected column is not numeric")
|
1936
|
-
|
2268
|
+
|
2269
|
+
if remove_nan:
|
2270
|
+
|
2271
|
+
obs_exp = pd.DataFrame(
|
2272
|
+
{
|
2273
|
+
"observed": observed,
|
2274
|
+
"expected": expected
|
2275
|
+
}
|
2276
|
+
)
|
2277
|
+
|
2278
|
+
obs_exp_clean = obs_exp.dropna(subset=["observed", "expected"])
|
2279
|
+
|
2280
|
+
observed = obs_exp_clean["observed"].to_numpy()
|
2281
|
+
expected = obs_exp_clean["expected"].to_numpy()
|
2282
|
+
|
2283
|
+
else:
|
2284
|
+
|
2285
|
+
if np.isnan(observed).any():
|
2286
|
+
raise ValueError("Vector with observed data contains NaN")
|
2287
|
+
if np.isnan(expected).any():
|
2288
|
+
raise ValueError("Vector with expected data contains NaN")
|
2289
|
+
|
1937
2290
|
residuals = np.array(observed)-np.array(expected)
|
1938
2291
|
residuals_sq = residuals**2
|
1939
2292
|
residuals_abs = abs(residuals)
|
@@ -1950,8 +2303,7 @@ def modelfit(
|
|
1950
2303
|
})
|
1951
2304
|
|
1952
2305
|
SQR = float(np.sum(residuals_sq))
|
1953
|
-
SAR = float(np.sum(residuals_abs))
|
1954
|
-
LL = float(np.sum(np.log(residuals_sq)))
|
2306
|
+
SAR = float(np.sum(residuals_abs))
|
1955
2307
|
observed_mean = float(np.sum(observed)/observed_no)
|
1956
2308
|
SQT = float(np.sum((observed-observed_mean)**2))
|
1957
2309
|
Rsq = float(1-(SQR/SQT))
|
@@ -1960,11 +2312,16 @@ def modelfit(
|
|
1960
2312
|
MAE = float(SAR/observed_no)
|
1961
2313
|
MAPE = float(np.mean(APE))
|
1962
2314
|
|
1963
|
-
resid_below5 = float(len([APE < 5])/expected_no*100)
|
1964
|
-
resid_below10 = float(len([APE < 10])/expected_no*100)
|
1965
|
-
resid_below15 = float(len([APE < 15])/expected_no*100)
|
1966
|
-
resid_below20 = float(len([APE < 20])/expected_no*100)
|
1967
|
-
resid_below25 = float(len([APE < 25])/expected_no*100)
|
2315
|
+
resid_below5 = float(len(data_residuals[data_residuals["APE"] < 5])/expected_no*100)
|
2316
|
+
resid_below10 = float(len(data_residuals[data_residuals["APE"] < 10])/expected_no*100)
|
2317
|
+
resid_below15 = float(len(data_residuals[data_residuals["APE"] < 15])/expected_no*100)
|
2318
|
+
resid_below20 = float(len(data_residuals[data_residuals["APE"] < 20])/expected_no*100)
|
2319
|
+
resid_below25 = float(len(data_residuals[data_residuals["APE"] < 25])/expected_no*100)
|
2320
|
+
resid_below30 = float(len(data_residuals[data_residuals["APE"] < 30])/expected_no*100)
|
2321
|
+
resid_below35 = float(len(data_residuals[data_residuals["APE"] < 35])/expected_no*100)
|
2322
|
+
resid_below40 = float(len(data_residuals[data_residuals["APE"] < 40])/expected_no*100)
|
2323
|
+
resid_below45 = float(len(data_residuals[data_residuals["APE"] < 45])/expected_no*100)
|
2324
|
+
resid_below50 = float(len(data_residuals[data_residuals["APE"] < 50])/expected_no*100)
|
1968
2325
|
|
1969
2326
|
data_lossfunctions = {
|
1970
2327
|
"SQR": SQR,
|
@@ -1980,7 +2337,12 @@ def modelfit(
|
|
1980
2337
|
"resid_below10": resid_below10,
|
1981
2338
|
"resid_below15": resid_below15,
|
1982
2339
|
"resid_below20": resid_below20,
|
1983
|
-
"resid_below25": resid_below25
|
2340
|
+
"resid_below25": resid_below25,
|
2341
|
+
"resid_below30": resid_below30,
|
2342
|
+
"resid_below35": resid_below35,
|
2343
|
+
"resid_below40": resid_below40,
|
2344
|
+
"resid_below45": resid_below45,
|
2345
|
+
"resid_below50": resid_below50,
|
1984
2346
|
}
|
1985
2347
|
}
|
1986
2348
|
|