PyPI - huff - Versions diffs - 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl - Mend

huff 1.4.0py3-none-any.whl → 1.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

huff/gistools.py +2 -2
huff/models.py +537 -175
huff/ors.py +2 -2
huff/osm.py +2 -2
huff/tests/data/Haslach_new_supermarket.cpg +1 -0
huff/tests/data/Haslach_new_supermarket.dbf +0 -0
huff/tests/data/Haslach_new_supermarket.prj +1 -0
huff/tests/data/Haslach_new_supermarket.qmd +26 -0
huff/tests/data/Haslach_new_supermarket.shp +0 -0
huff/tests/data/Haslach_new_supermarket.shx +0 -0
huff/tests/tests_huff.py +68 -22
{huff-1.4.0.dist-info → huff-1.4.1.dist-info}/METADATA +1 -1
{huff-1.4.0.dist-info → huff-1.4.1.dist-info}/RECORD +15 -9
{huff-1.4.0.dist-info → huff-1.4.1.dist-info}/WHEEL +0 -0
{huff-1.4.0.dist-info → huff-1.4.1.dist-info}/top_level.txt +0 -0

huff/models.py CHANGED Viewed

@@ -4,8 +4,8 @@
 # Author:      Thomas Wieland
 #              ORCID: 0000-0001-5168-9846
 #              mail: geowieland@googlemail.com
-# Version:     1.4.0
-# Last update: 2025-06-10 17:17
+# Version:     1.4.1
+# Last update: 2025-06-16 17:43
 # Copyright (c) 2025 Thomas Wieland
 #-----------------------------------------------------------------------
@@ -17,7 +17,7 @@ from math import sqrt
 import time
 from pandas.api.types import is_numeric_dtype
 from statsmodels.formula.api import ols
-from scipy.optimize import minimize
+from scipy.optimize import minimize, Bounds, LinearConstraint, NonlinearConstraint
 from shapely.geometry import Point
 from shapely import wkt
 import copy
@@ -77,9 +77,9 @@ class CustomerOrigins:
         if metadata["weighting"][0]["func"] is None and metadata["weighting"][0]["param"] is None:
             print("Transport cost weighting    not defined")
         elif metadata["weighting"][0]["func"] in ["power", "exponential"]:
-            print("Transport cost weighting   " + str(metadata["weighting"][0]["param"]) + " (" + metadata["weighting"][0]["func"] + ")")
+            print("Transport cost weighting   " + str(round(metadata["weighting"][0]["param"],3)) + " (" + metadata["weighting"][0]["func"] + ")")
         elif metadata["weighting"][0]["func"] == "logistic":
-            print("Transport cost weighting   " + str(metadata["weighting"][0]["param"][0]) + ", " + str(metadata["weighting"][0]["param"][1]) + " (" + metadata["weighting"][0]["func"] + ")")
+            print("Transport cost weighting   " + str(round(metadata["weighting"][0]["param"][0],3)) + ", " + str(round(metadata["weighting"][0]["param"][1],3)) + " (" + metadata["weighting"][0]["func"] + ")")
         print("Unique ID column           " + metadata["unique_id"])
         print("Input CRS                  " + str(metadata["crs_input"]))
@@ -207,7 +207,6 @@ class CustomerOrigins:
         return self
 class SupplyLocations:
     def __init__(
@@ -260,7 +259,7 @@ class SupplyLocations:
         if metadata["weighting"][0]["func"] is None and metadata["weighting"][0]["param"] is None:
             print("Attraction weighting  not defined")
         else:
-            print("Attraction weighting  " + metadata["weighting"][0]["func"] + " with gamma = " + str(metadata["weighting"][0]["param"]))
+            print("Attraction weighting  " + metadata["weighting"][0]["func"] + " with gamma = " + str(round(metadata["weighting"][0]["param"],3)))
         print("Unique ID column      " + metadata["unique_id"])
         print("Input CRS             " + str(metadata["crs_input"]))
@@ -342,7 +341,11 @@ class SupplyLocations:
         metadata = self.get_metadata()
         new_destinations_gpd_original = new_destinations.get_geodata_gpd_original()
+        new_destinations_gpd_original["j_update"] = 1
         new_destinations_gpd = new_destinations.get_geodata_gpd()
+        new_destinations_gpd["j_update"] = 1
         new_destinations_metadata = new_destinations.get_metadata()
         if list(new_destinations_gpd_original.columns) != list(geodata_gpd_original.columns):
@@ -350,14 +353,20 @@ class SupplyLocations:
         if list(new_destinations_gpd.columns) != list(geodata_gpd.columns):
             raise KeyError("Supply locations and new destinations data have different column names.")
-        geodata_gpd_original = geodata_gpd_original.append(
-            new_destinations_gpd_original,
+        geodata_gpd_original = pd.concat(
+            [
+                geodata_gpd_original,
+                new_destinations_gpd_original
+                ],
             ignore_index=True
             )
-        geodata_gpd = geodata_gpd.append(
-            new_destinations_gpd,
-            ignore_index=True
+        geodata_gpd = pd.concat(
+            [
+                geodata_gpd,
+                new_destinations_gpd
+                ],
+                ignore_index=True
             )
         metadata["no_points"] = metadata["no_points"]+new_destinations_metadata["no_points"]
@@ -440,13 +449,15 @@ class InteractionMatrix:
         self,
         interaction_matrix_df,
         customer_origins,
-        supply_locations
+        supply_locations,
+        metadata
         ):
         self.interaction_matrix_df = interaction_matrix_df
         self.customer_origins = customer_origins
         self.supply_locations = supply_locations
+        self.metadata = metadata
     def get_interaction_matrix_df(self):
         return self.interaction_matrix_df
@@ -456,38 +467,54 @@ class InteractionMatrix:
     def get_supply_locations(self):
         return self.supply_locations
+    def get_metadata(self):
+        return self.metadata
     def summary(self):
         customer_origins_metadata = self.get_customer_origins().get_metadata()
         supply_locations_metadata = self.get_supply_locations().get_metadata()
+        interaction_matrix_metadata = self.get_metadata()
         print("Interaction Matrix")
         print("----------------------------------")
-        print("Supply locations   " + str(supply_locations_metadata["no_points"]))
+        print("Supply locations    " + str(supply_locations_metadata["no_points"]))
         if supply_locations_metadata["attraction_col"][0] is None:
-            print("Attraction column  not defined")
+            print("Attraction column   not defined")
         else:
-            print("Attraction column  " + supply_locations_metadata["attraction_col"][0])
-        print("Customer origins   " + str(customer_origins_metadata["no_points"]))
+            print("Attraction column   " + supply_locations_metadata["attraction_col"][0])
+        print("Customer origins    " + str(customer_origins_metadata["no_points"]))
         if customer_origins_metadata["marketsize_col"] is None:
             print("Market size column not defined")
         else:
-            print("Market size column " + customer_origins_metadata["marketsize_col"])
+            print("Market size column  " + customer_origins_metadata["marketsize_col"])
+        if interaction_matrix_metadata != {}:
+            if "transport_costs" in interaction_matrix_metadata:
+                print("----------------------------------")
+                if interaction_matrix_metadata["transport_costs"]["network"]:
+                    print("Transport cost type Time")
+                    print("Transport cost unit " + interaction_matrix_metadata["transport_costs"]["time_unit"])
+                else:
+                    print("Transport cost type Distance")
+                    print("Transport cost unit " + interaction_matrix_metadata["transport_costs"]["distance_unit"])
         print("----------------------------------")
         print("Partial utilities")
-        print("                   Weights")
+        print("                    Weights")
         if supply_locations_metadata["weighting"][0]["func"] is None and supply_locations_metadata["weighting"][0]["param"] is None:
-            print("Attraction         not defined")
+            print("Attraction          not defined")
         else:
-            print("Attraction         " + str(supply_locations_metadata["weighting"][0]["param"]) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
+            print("Attraction          " + str(round(supply_locations_metadata["weighting"][0]["param"],3)) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
         if customer_origins_metadata["weighting"][0]["func"] is None and customer_origins_metadata["weighting"][0]["param"] is None:
-            print("Transport costs    not defined")
+            print("Transport costs     not defined")
         elif customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
-            print("Transport costs   " + str(customer_origins_metadata["weighting"][0]["param"]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
+            print("Transport costs    " + str(round(customer_origins_metadata["weighting"][0]["param"],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
         elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
-            print("Transport costs   " + str(customer_origins_metadata["weighting"][0]["param"][0]) + ", " + str(customer_origins_metadata["weighting"][0]["param"][1]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
+            print("Transport costs    " + str(round(customer_origins_metadata["weighting"][0]["param"][0],3)) + ", " + str(round(customer_origins_metadata["weighting"][0]["param"][1],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
         print("----------------------------------")
@@ -508,6 +535,7 @@ class InteractionMatrix:
             range_type = "distance"
         interaction_matrix_df = self.get_interaction_matrix_df()
+        interaction_matrix_metadata = self.get_metadata()
         customer_origins = self.get_customer_origins()
         customer_origins_geodata_gpd = customer_origins.get_geodata_gpd()
@@ -588,7 +616,17 @@ class InteractionMatrix:
             if distance_unit == "kilometers":
                 interaction_matrix_df["t_ij"] = interaction_matrix_df["t_ij"]/1000
+        interaction_matrix_metadata["transport_costs"] = {
+            "network": network,
+            "range_type": range_type,
+            "time_unit": time_unit,
+            "distance_unit": distance_unit,
+            "ors_server": ors_server,
+            "ors_auth": ors_auth
+            }
         self.interaction_matrix_df = interaction_matrix_df
+        self.metadata = interaction_matrix_metadata
         return self
@@ -596,6 +634,8 @@ class InteractionMatrix:
         interaction_matrix_df = self.interaction_matrix_df
+        interaction_matrix_metadata = self.get_metadata()
         if interaction_matrix_df["t_ij"].isna().all():
             raise ValueError ("Transport cost variable is not defined")
         if interaction_matrix_df["A_j"].isna().all():
@@ -609,6 +649,7 @@ class InteractionMatrix:
         customer_origins = self.customer_origins
         customer_origins_metadata = customer_origins.get_metadata()
         tc_weighting = customer_origins_metadata["weighting"][0]
         if tc_weighting["func"] == "power":
             interaction_matrix_df["t_ij_weighted"] = interaction_matrix_df["t_ij"] ** tc_weighting["param"]
         elif tc_weighting["func"] == "exponential":
@@ -621,6 +662,7 @@ class InteractionMatrix:
         supply_locations = self.supply_locations
         supply_locations_metadata = supply_locations.get_metadata()
         attraction_weighting = supply_locations_metadata["weighting"][0]
         if attraction_weighting["func"] == "power":
             interaction_matrix_df["A_j_weighted"] = interaction_matrix_df["A_j"] ** attraction_weighting["param"]
         elif tc_weighting["func"] == "exponential":
@@ -630,10 +672,15 @@ class InteractionMatrix:
         interaction_matrix_df["U_ij"] = interaction_matrix_df["A_j_weighted"]*interaction_matrix_df["t_ij_weighted"]
-        interaction_matrix_df = interaction_matrix_df.drop(columns=['A_j_weighted', 't_ij_weighted'])
+        interaction_matrix_df = interaction_matrix_df.drop(columns=["A_j_weighted", "t_ij_weighted"])
-        self.interaction_matrix_df = interaction_matrix_df
+        interaction_matrix_metadata["model"] = {
+            "model_type": "Huff"
+            }
+        self.interaction_matrix_df = interaction_matrix_df
+        self.metadata = interaction_matrix_metadata
         return self
     def probabilities (self):
@@ -775,6 +822,8 @@ class InteractionMatrix:
         interaction_matrix_df = self.get_interaction_matrix_df()
+        interaction_matrix_metadata = self.get_metadata()
         cols_t = [col + "__LCT" for col in cols]
         if "p_ij__LCT" not in interaction_matrix_df.columns:
@@ -829,7 +878,8 @@ class InteractionMatrix:
         interaction_matrix = InteractionMatrix(
             interaction_matrix_df,
             customer_origins,
-            supply_locations
+            supply_locations,
+            metadata=interaction_matrix_metadata
             )
         mci_model = MCIModel(
@@ -841,6 +891,249 @@ class InteractionMatrix:
         return mci_model
+    def huff_loglik(
+        self,
+        params
+        ):
+        if not isinstance(params, list):
+            if isinstance(params, np.ndarray):
+                params = params.tolist()
+            else:
+                raise ValueError("Parameter 'params' must be a list or np.ndarray with two or three parameter values")
+        if len(params) == 2:
+            param_gamma, param_lambda = params
+        elif len(params) == 3:
+            param_gamma, param_lambda, param_lambda2 = params
+        else:
+            raise ValueError("Parameter 'params' must be a list with two or three parameter values")
+        interaction_matrix_df = self.interaction_matrix_df
+        supply_locations = self.supply_locations
+        supply_locations_metadata = supply_locations.get_metadata()
+        customer_origins = self.customer_origins
+        customer_origins_metadata = customer_origins.get_metadata()
+        supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
+        supply_locations.metadata = supply_locations_metadata
+        if customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
+            if len(params) == 2:
+                customer_origins_metadata["weighting"][0]["param"] = float(param_lambda)
+            else:
+                raise ValueError ("Huff Model with transport cost weightig of type " + customer_origins_metadata["weighting"][0]["func"] + " must have two input parameters")
+        elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
+            if len(params) == 3:
+                customer_origins_metadata["weighting"][0]["param"] = [float(param_lambda), float(param_lambda2)]
+            else:
+                raise ValueError("Huff Model with transport cost weightig of type " + customer_origins_metadata["weighting"][0]["func"] + " must have three input parameters")
+        customer_origins.metadata = customer_origins_metadata
+        p_ij_emp = interaction_matrix_df["p_ij"]
+        interaction_matrix_copy = copy.deepcopy(self)
+        interaction_matrix_copy.utility()
+        interaction_matrix_copy.probabilities()
+        interaction_matrix_df_copy = interaction_matrix_copy.get_interaction_matrix_df()
+        p_ij = interaction_matrix_df_copy["p_ij"]
+        LL = loglik(
+            observed = p_ij_emp,
+            expected = p_ij
+            )
+        return -LL
+    def ml_fit(
+        self,
+        initial_params: list = [1.0, -2.0],
+        method: str = "L-BFGS-B",
+        bounds: list = [(0.5, 1), (-3, -1)],
+        constraints: list = [],
+        update_estimates: bool = True
+        ):
+        supply_locations = self.supply_locations
+        supply_locations_metadata = supply_locations.get_metadata()
+        customer_origins = self.customer_origins
+        customer_origins_metadata = customer_origins.get_metadata()
+        if len(initial_params) > 3 or len(initial_params) < 2:
+            raise ValueError("Parameter 'initial_params' must be a list with two or three entries")
+        if len(bounds) != len(initial_params):
+            raise ValueError("Parameter 'bounds' must have the same length as parameter 'initial_params' (" + str(len(bounds)) + ", " + str(len(initial_params)) + ")")
+        ml_result = minimize(
+            self.huff_loglik,
+            initial_params,
+            method = method,
+            bounds = bounds,
+            constraints = constraints,
+            options={'disp': 3}
+            )
+        if ml_result.success:
+            fitted_params = ml_result.x
+            if len(initial_params) == 2:
+                param_gamma = fitted_params[0]
+                param_lambda = fitted_params[1]
+                param_results = [
+                    float(param_gamma),
+                    float(param_lambda)
+                    ]
+                supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
+                customer_origins_metadata["weighting"][0]["param"] = float(param_lambda)
+            elif len (initial_params) == 3:
+                param_gamma = fitted_params[0]
+                param_lambda = fitted_params[1]
+                param_lambda2 = fitted_params[2]
+                param_results = [
+                    float(param_gamma),
+                    float(param_lambda),
+                    float(param_lambda2)
+                    ]
+                supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
+                customer_origins_metadata["weighting"][0]["param"][0] = float(param_lambda)
+                customer_origins_metadata["weighting"][0]["param"][1] = float(param_lambda2)
+            print(f"Optimization via {method} algorithm succeeded with parameters: {', '.join(str(round(par, 3)) for par in param_results)}.")
+        else:
+            param_gamma = None
+            param_lambda = None
+            supply_locations_metadata["weighting"][0]["param"] = param_gamma
+            if len(initial_params) == 3:
+                param_lambda2 = None
+                customer_origins_metadata["weighting"][0]["param"][0] = param_lambda
+                customer_origins_metadata["weighting"][0]["param"][1] = param_lambda2
+            else:
+                customer_origins_metadata["weighting"][0]["param"] = param_lambda
+            print(f"Optimiziation via {method} algorithm failed with error message: '{ml_result.message}'. See https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html for all available algorithms.")
+        self.supply_locations.metadata = supply_locations_metadata
+        self.customer_origins.metadata = customer_origins_metadata
+        if ml_result.success and update_estimates:
+            self.interaction_matrix_df["p_ij_emp"] = self.interaction_matrix_df["p_ij"]
+            self = self.utility()
+            self = self.probabilities()
+            self = self.flows()
+        return self
+    def update(self):
+        interaction_matrix_df = self.get_interaction_matrix_df()
+        interaction_matrix_metadata = self.get_metadata()
+        customer_origins = self.get_customer_origins()
+        supply_locations = self.get_supply_locations()
+        supply_locations_geodata_gpd = supply_locations.get_geodata_gpd().copy()
+        supply_locations_geodata_gpd_new = supply_locations_geodata_gpd[supply_locations_geodata_gpd["j_update"] == 1]
+        if len(supply_locations_geodata_gpd_new) < 1:
+            raise ValueError("There are no new destinations for an interaction matrix update. Use SupplyLocations.add_new_destinations()")
+        supply_locations_geodata_gpd_original = supply_locations.get_geodata_gpd_original().copy()
+        supply_locations_geodata_gpd_original_new = supply_locations_geodata_gpd_original[supply_locations_geodata_gpd_original["j_update"] == 1]
+        if len(supply_locations_geodata_gpd_original_new) < 1:
+            raise ValueError("There are no new destinations for an interaction matrix update. Use SupplyLocations.add_new_destinations()")
+        supply_locations_new = SupplyLocations(
+            geodata_gpd=supply_locations_geodata_gpd_new,
+            geodata_gpd_original=supply_locations_geodata_gpd_original_new,
+            metadata=supply_locations.metadata,
+            isochrones_gdf=supply_locations.isochrones_gdf,
+            buffers_gdf=supply_locations.buffers_gdf
+        )
+        interaction_matrix_new = create_interaction_matrix(
+            customer_origins=customer_origins,
+            supply_locations=supply_locations_new
+        )
+        interaction_matrix_new_df = interaction_matrix_new.get_interaction_matrix_df()
+        if "transport_costs" not in interaction_matrix_metadata:
+            print("New destination(s) included. No transport costs calculation because not defined in original interaction matrix.")
+            interaction_matrix_df = pd.concat(
+                [
+                interaction_matrix_df,
+                interaction_matrix_new_df
+                ],
+                ignore_index=True
+                )
+            interaction_matrix_df = interaction_matrix_df.sort_values(by = "ij")
+            self.interaction_matrix_df = interaction_matrix_df
+        else:
+            network = interaction_matrix_metadata["transport_costs"]["network"]
+            range_type = interaction_matrix_metadata["transport_costs"]["range_type"]
+            time_unit = interaction_matrix_metadata["transport_costs"]["time_unit"]
+            distance_unit = interaction_matrix_metadata["transport_costs"]["distance_unit"]
+            ors_server = interaction_matrix_metadata["transport_costs"]["ors_server"]
+            ors_auth = interaction_matrix_metadata["transport_costs"]["ors_auth"]
+            interaction_matrix_new.transport_costs(
+                network=network,
+                range_type=range_type,
+                time_unit=time_unit,
+                distance_unit=distance_unit,
+                ors_server=ors_server,
+                ors_auth=ors_auth
+            )
+            interaction_matrix_df = pd.concat(
+                [
+                    interaction_matrix_df,
+                    interaction_matrix_new_df
+                ],
+                ignore_index=True
+                )
+            interaction_matrix_df = interaction_matrix_df.sort_values(by = "ij")
+            self.interaction_matrix_df = interaction_matrix_df
+            self.utility()
+            self.probabilities()
+            self.flows()
+        return self
 class HuffModel:
@@ -904,16 +1197,45 @@ class HuffModel:
         if supply_locations_metadata["weighting"][0]["func"] is None and supply_locations_metadata["weighting"][0]["param"] is None:
             print("Attraction         not defined")
         else:
-            print("Attraction         " + str(supply_locations_metadata["weighting"][0]["param"]) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
+            print("Attraction         " + str(round(supply_locations_metadata["weighting"][0]["param"], 3)) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
         if customer_origins_metadata["weighting"][0]["func"] is None and customer_origins_metadata["weighting"][0]["param"] is None:
             print("Transport costs    not defined")
         elif customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
-            print("Transport costs   " + str(customer_origins_metadata["weighting"][0]["param"]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
+            print("Transport costs   " + str(round(customer_origins_metadata["weighting"][0]["param"],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
         elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
-            print("Transport costs   " + str(customer_origins_metadata["weighting"][0]["param"][0]) + ", " + str(customer_origins_metadata["weighting"][0]["param"][1]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
+            print("Transport costs   " + str(round(customer_origins_metadata["weighting"][0]["param"][0],3)) + ", " + str(round(customer_origins_metadata["weighting"][0]["param"][1],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
         print("----------------------------------")
+        huff_modelfit = self.modelfit()
+        if huff_modelfit is not None:
+            print ("Goodness-of-fit for probabilities")
+            print("Sum of squared residuals       ", round(huff_modelfit[1]["SQR"], 2))
+            print("Sum of squares                 ", round(huff_modelfit[1]["SQT"], 2))
+            print("R-squared                      ", round(huff_modelfit[1]["Rsq"], 2))
+            print("Mean squared error             ", round(huff_modelfit[1]["MSE"], 2))
+            print("Root mean squared error        ", round(huff_modelfit[1]["RMSE"], 2))
+            print("Mean absolute error            ", round(huff_modelfit[1]["MAE"], 2))
+            print("Mean absolute percentage error ", round(huff_modelfit[1]["MAPE"], 2))
+            print("Absolute percentage errors")
+            APE_list = [
+                ["< 5 % ", round(huff_modelfit[1]["APE"]["resid_below5"], 2), "  < 30 % ", round(huff_modelfit[1]["APE"]["resid_below30"], 2)],
+                ["< 10 % ", round(huff_modelfit[1]["APE"]["resid_below10"], 2), "  < 35 % ", round(huff_modelfit[1]["APE"]["resid_below35"], 2)],
+                ["< 15 % ", round(huff_modelfit[1]["APE"]["resid_below15"], 2), "  < 40 % ", round(huff_modelfit[1]["APE"]["resid_below40"], 2)],
+                ["< 20 % ", round(huff_modelfit[1]["APE"]["resid_below20"], 2), "  < 45 % ", round(huff_modelfit[1]["APE"]["resid_below45"], 2)],
+                ["< 25% ", round(huff_modelfit[1]["APE"]["resid_below25"], 2), "  < 50 % ", round(huff_modelfit[1]["APE"]["resid_below50"], 2)]
+                ]
+            APE_df = pd.DataFrame(
+                APE_list,
+                columns=["Resid.", "%", "Resid.", "%"]
+                )
+            print(APE_df.to_string(index=False))
+            print("----------------------------------")
     def mci_fit(
         self,
@@ -923,7 +1245,8 @@ class HuffModel:
         interaction_matrix = self.interaction_matrix
         interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
+        interaction_matrix_metadata = interaction_matrix.get_metadata()
         supply_locations = interaction_matrix.get_supply_locations()
         supply_locations_metadata = supply_locations.get_metadata()
@@ -982,7 +1305,8 @@ class HuffModel:
         interaction_matrix = InteractionMatrix(
             interaction_matrix_df,
             customer_origins,
-            supply_locations
+            supply_locations,
+            metadata=interaction_matrix_metadata
             )
         mci_model = MCIModel(
@@ -994,132 +1318,39 @@ class HuffModel:
         return mci_model
-    def huff_loglik(
-        self,
-        params
-        ):
-        if not isinstance(params, list):
-            if isinstance(params, np.ndarray):
-                params = params.tolist()
-            else:
-                raise ValueError("Parameter 'params' must be a list or np.ndarray with two or three parameter values")
+    def update(self):
-        if len(params) == 2:
-            param_gamma, param_lambda = params
-        elif len(params) == 3:
-            param_gamma, param_lambda, param_lambda2 = params
-        else:
-            raise ValueError("Parameter 'params' must be a list with two or three parameter values")
-        interaction_matrix = self.interaction_matrix
-        interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
+        self.interaction_matrix = self.interaction_matrix.update()
-        supply_locations = interaction_matrix.get_supply_locations()
-        supply_locations_metadata = supply_locations.get_metadata()
-        customer_origins = interaction_matrix.get_customer_origins()
-        customer_origins_metadata = customer_origins.get_metadata()
-        supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
-        supply_locations.metadata = supply_locations_metadata
-        if customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
-            if len(params) == 2:
-                customer_origins_metadata["weighting"][0]["param"] = float(param_lambda)
-            else:
-                raise ValueError ("Huff Model with transport cost weightig of type " + customer_origins_metadata["weighting"]["func"] + " must have two input parameters")
-        elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
-            if len(params) == 3:
-                customer_origins_metadata["weighting"][0]["param"] = [float(param_lambda), float(param_lambda2)]
-            else:
-                raise ValueError("Huff Model with transport cost weightig of type " + customer_origins_metadata["weighting"]["func"] + " must have three input parameters")
-        customer_origins.metadata = customer_origins_metadata
-        interaction_matrix = self.interaction_matrix
-        interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
-        p_ij_emp = interaction_matrix_df["p_ij"]
-        interaction_matrix_copy = copy.deepcopy(interaction_matrix)
-        interaction_matrix_copy.utility()
-        interaction_matrix_copy.probabilities()
-        interaction_matrix_df_copy = interaction_matrix_copy.get_interaction_matrix_df()
-        p_ij = interaction_matrix_df_copy["p_ij"]
-        LL = loglik(
-            observed = p_ij_emp,
-            expected = p_ij
-            )
+        self.market_areas_df = self.interaction_matrix.marketareas().get_market_areas_df()
-        return LL
+        return self
-    def ml_fit(
-        self,
-        initial_params = [1.0, -2.0],
-        bounds = [(0.5, 1), (-3, -1)],
-        method = "L-BFGS-B"
-        ):
-        if len(initial_params) > 3 or len(initial_params) < 2:
-            raise ValueError("Parameter 'initial_params' must be a list with two or three entries")
+    def modelfit(self):
-        if len(bounds) != len(initial_params):
-            raise ValueError("Parameter 'bounds' must have the same length as parameter 'initial_params' (" + str(len(bounds)) + ", " + str(len(initial_params)) + ")")
+        interaction_matrix = self.interaction_matrix
+        interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
-        ml_result = minimize(
-            self.huff_loglik,
-            initial_params,
-            method = method,
-            bounds = bounds,
-            options={'disp': 3}
-            )
-        if ml_result.success:
-            fitted_params = ml_result.x
+        if ("p_ij" in interaction_matrix_df.columns and "p_ij_emp" in interaction_matrix_df.columns):
-            if len(initial_params) == 2:
+            try:
-                param_gamma = fitted_params[0]
-                param_lambda = fitted_params[1]
-                param_results = [
-                    float(param_gamma),
-                    float(param_lambda)
-                    ]
+                huff_modelfit = modelfit(
+                    interaction_matrix_df["p_ij_emp"],
+                    interaction_matrix_df["p_ij"]
+                )
-            elif len (initial_params) == 3:
-                param_gamma = fitted_params[0]
-                param_lambda = fitted_params[1]
-                param_lambda2 = fitted_params[2]
-                param_results = [
-                    float(param_gamma),
-                    float(param_lambda),
-                    float(param_lambda2)
-                    ]
+                return huff_modelfit
+            except:
+                print("Goodness-of-fit metrics could not be calculated due to NaN values.")
+                return None
         else:
-            param_gamma = None
-            param_lambda = None
-            param_results = [param_gamma, param_lambda]
-            if len(initial_params) == 3:
-                param_lambda2 = None
-                param_results.append(param_lambda2)
-            print(f"Optimiziation via {method} algorithm failed with error message: '{ml_result.message}'. See https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html for all available algorithms.")
-        return param_results
+            return None
 class MCIModel:
     def __init__(
@@ -1175,12 +1406,19 @@ class MCIModel:
         if ("p_ij" in interaction_matrix_df.columns and "p_ij_emp" in interaction_matrix_df.columns):
-            mci_modelfit = modelfit(
-                interaction_matrix_df["p_ij_emp"],
-                interaction_matrix_df["p_ij"]
-            )
+            try:
-            return mci_modelfit
+                mci_modelfit = modelfit(
+                    interaction_matrix_df["p_ij_emp"],
+                    interaction_matrix_df["p_ij"]
+                )
+                return mci_modelfit
+            except:
+                print("Goodness-of-fit metrics could not be calculated due to NaN values.")
+                return None
         else:
@@ -1226,7 +1464,7 @@ class MCIModel:
         mci_modelfit = self.modelfit()
         if mci_modelfit is not None:
-            print ("Goodness-of-fit with respect to probabilities")
+            print ("Goodness-of-fit for probabilities")
             print("Sum of squared residuals       ", round(mci_modelfit[1]["SQR"], 2))
             print("Sum of squares                 ", round(mci_modelfit[1]["SQT"], 2))
@@ -1235,12 +1473,20 @@ class MCIModel:
             print("Root mean squared error        ", round(mci_modelfit[1]["RMSE"], 2))
             print("Mean absolute error            ", round(mci_modelfit[1]["MAE"], 2))
             print("Mean absolute percentage error ", round(mci_modelfit[1]["MAPE"], 2))
             print("Absolute percentage errors")
-            print("< 5 %                          ", round(mci_modelfit[1]["APE"]["resid_below5"], 2))
-            print("< 10 %                         ", round(mci_modelfit[1]["APE"]["resid_below10"], 2))
-            print("< 15 %                         ", round(mci_modelfit[1]["APE"]["resid_below15"], 2))
-            print("< 20 %                         ", round(mci_modelfit[1]["APE"]["resid_below20"], 2))
-            print("< 25 %                         ", round(mci_modelfit[1]["APE"]["resid_below25"], 2))
+            APE_list = [
+                ["< 5 % ", round(mci_modelfit[1]["APE"]["resid_below5"], 2), "  < 30 % ", round(mci_modelfit[1]["APE"]["resid_below30"], 2)],
+                ["< 10 % ", round(mci_modelfit[1]["APE"]["resid_below10"], 2), "  < 35 % ", round(mci_modelfit[1]["APE"]["resid_below35"], 2)],
+                ["< 15 % ", round(mci_modelfit[1]["APE"]["resid_below15"], 2), "  < 40 % ", round(mci_modelfit[1]["APE"]["resid_below40"], 2)],
+                ["< 20 % ", round(mci_modelfit[1]["APE"]["resid_below20"], 2), "  < 45 % ", round(mci_modelfit[1]["APE"]["resid_below45"], 2)],
+                ["< 25% ", round(mci_modelfit[1]["APE"]["resid_below25"], 2), "  < 50 % ", round(mci_modelfit[1]["APE"]["resid_below50"], 2)]
+                ]
+            APE_df = pd.DataFrame(
+                APE_list,
+                columns=["Resid.", "%", "Resid.", "%"]
+                )
+            print(APE_df.to_string(index=False))
             print("--------------------------------------------")
@@ -1251,6 +1497,7 @@ class MCIModel:
         interaction_matrix = self.interaction_matrix
         interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
+        interaction_matrix_metadata = interaction_matrix.get_metadata()
         if interaction_matrix_df["t_ij"].isna().all():
             raise ValueError ("Transport cost variable is not defined")
@@ -1289,10 +1536,16 @@ class MCIModel:
         if transformation == "ILCT":
             interaction_matrix_df["U_ij"] = np.exp(interaction_matrix_df["U_ij"])
+        interaction_matrix_metadata["model"] = {
+            "model_type": "MCI",
+            "transformation": transformation
+            }
         interaction_matrix = InteractionMatrix(
             interaction_matrix_df,
             customer_origins,
-            supply_locations
+            supply_locations,
+            metadata=interaction_matrix_metadata
             )
         self.interaction_matrix = interaction_matrix
@@ -1459,7 +1712,7 @@ def load_geodata (
     geodata_gpd = geodata_gpd_original.to_crs(crs_output)
     geodata_gpd = geodata_gpd[[unique_id, "geometry"]]
     metadata = {
         "location_type": location_type,
         "unique_id": unique_id,
@@ -1477,14 +1730,20 @@ def load_geodata (
         }
     if location_type == "origins":
         geodata_object = CustomerOrigins(
             geodata_gpd,
             geodata_gpd_original,
             metadata,
             None,
             None
-            )
+            )
     elif location_type == "destinations":
+        geodata_gpd["j_update"] = 0
+        geodata_gpd_original["j_update"] = 0
         geodata_object = SupplyLocations(
             geodata_gpd,
             geodata_gpd_original,
@@ -1563,10 +1822,13 @@ def create_interaction_matrix(
     interaction_matrix_df["p_ij"] = None
     interaction_matrix_df["E_ij"] = None
+    metadata = {}
     interaction_matrix = InteractionMatrix(
         interaction_matrix_df,
         customer_origins,
-        supply_locations
+        supply_locations,
+        metadata
         )
     return interaction_matrix
@@ -1577,6 +1839,7 @@ def load_interaction_matrix(
     supply_locations_col: str,
     attraction_col: list,
     transport_costs_col: str,
+    flows_col: str = None,
     probabilities_col: str = None,
     market_size_col: str = None,
     customer_origins_coords_col = None,
@@ -1614,6 +1877,8 @@ def load_interaction_matrix(
         raise KeyError ("Column " + supply_locations_col + " not in data")
     cols_check = attraction_col + [transport_costs_col]
+    if flows_col is not None:
+        cols_check = cols_check + [flows_col]
     if probabilities_col is not None:
         cols_check = cols_check + [probabilities_col]
     if market_size_col is not None:
@@ -1770,6 +2035,13 @@ def load_interaction_matrix(
         }
         )
+    if flows_col is not None:
+        interaction_matrix_df = interaction_matrix_df.rename(
+            columns = {
+                flows_col: "E_ij"
+            }
+            )
     if probabilities_col is not None:
         interaction_matrix_df = interaction_matrix_df.rename(
             columns = {
@@ -1783,15 +2055,68 @@ def load_interaction_matrix(
                 market_size_col: "C_i"
             }
             )
+    metadata = {}
     interaction_matrix = InteractionMatrix(
         interaction_matrix_df=interaction_matrix_df,
         customer_origins=customer_origins,
-        supply_locations=supply_locations
+        supply_locations=supply_locations,
+        metadata=metadata
         )
     return interaction_matrix
+def market_shares(
+    df: pd.DataFrame,
+    turnover_col: str,
+    ref_col: str = None,
+    marketshares_col: str = "p_ij"
+    ):
+    check_vars(
+        df = df,
+        cols = [turnover_col]
+        )
+    if ref_col is not None:
+        if ref_col not in df.columns:
+            raise KeyError(f"Column '{ref_col}' not in dataframe.")
+        ms_refcol = pd.DataFrame(df.groupby(ref_col)[turnover_col].sum())
+        ms_refcol = ms_refcol.rename(columns = {turnover_col: "total"})
+        ms_refcol = ms_refcol.reset_index()
+        df = df.merge(
+            ms_refcol,
+            how = "left",
+            left_on = ref_col,
+            right_on= ref_col
+        )
+    else:
+        ms_norefcol = pd.DataFrame([df[turnover_col].sum()], columns=["total"])
+        ms_norefcol = ms_norefcol.reset_index()
+        df["key_temp"] = 1
+        ms_norefcol["key_temp"] = 1
+        df = pd.merge(
+            df,
+            ms_norefcol,
+            on="key_temp"
+            ).drop(
+                "key_temp",
+                axis=1
+                )
+    df[marketshares_col] = df[turnover_col]/df["total"]
+    df = df.drop(columns="total")
+    return df
 def log_centering_transformation(
     df: pd.DataFrame,
     ref_col: str,
@@ -1822,12 +2147,18 @@ def log_centering_transformation(
             print ("Column " + str(var) + " is a dummy variable and requires/allows no log-centering transformation")
             continue
+        if (df[var] <= 0).any():
+            df[var+suffix] = float("nan")
+            print ("Column " + str(var) + " contains values <= 0. No log-centering transformation possible.")
+            continue
         var_t = df.groupby(ref_col)[var].apply(lct)
         var_t = var_t.reset_index()
         df[var+suffix] = var_t[var]
     return df
 def get_isochrones(
     geodata_gpd: gp.GeoDataFrame,
     unique_id_col: str,
@@ -1918,7 +2249,8 @@ def get_isochrones(
 def modelfit(
     observed,
-    expected
+    expected,
+    remove_nan: bool = True
     ):
     observed_no = len(observed)
@@ -1933,7 +2265,28 @@ def modelfit(
     if not isinstance(expected, np.number):
         if not is_numeric_dtype(expected):
             raise ValueError("Expected column is not numeric")
+    if remove_nan:
+        obs_exp = pd.DataFrame(
+            {
+                "observed": observed,
+                "expected": expected
+                }
+            )
+        obs_exp_clean = obs_exp.dropna(subset=["observed", "expected"])
+        observed = obs_exp_clean["observed"].to_numpy()
+        expected = obs_exp_clean["expected"].to_numpy()
+    else:
+        if np.isnan(observed).any():
+            raise ValueError("Vector with observed data contains NaN")
+        if np.isnan(expected).any():
+            raise ValueError("Vector with expected data contains NaN")
     residuals = np.array(observed)-np.array(expected)
     residuals_sq = residuals**2
     residuals_abs = abs(residuals)
@@ -1950,8 +2303,7 @@ def modelfit(
         })
     SQR = float(np.sum(residuals_sq))
-    SAR = float(np.sum(residuals_abs))
-    LL = float(np.sum(np.log(residuals_sq)))
+    SAR = float(np.sum(residuals_abs))
     observed_mean = float(np.sum(observed)/observed_no)
     SQT = float(np.sum((observed-observed_mean)**2))
     Rsq = float(1-(SQR/SQT))
@@ -1960,11 +2312,16 @@ def modelfit(
     MAE = float(SAR/observed_no)
     MAPE = float(np.mean(APE))
-    resid_below5 = float(len([APE < 5])/expected_no*100)
-    resid_below10 = float(len([APE < 10])/expected_no*100)
-    resid_below15 = float(len([APE < 15])/expected_no*100)
-    resid_below20 = float(len([APE < 20])/expected_no*100)
-    resid_below25 = float(len([APE < 25])/expected_no*100)
+    resid_below5 = float(len(data_residuals[data_residuals["APE"] < 5])/expected_no*100)
+    resid_below10 = float(len(data_residuals[data_residuals["APE"] < 10])/expected_no*100)
+    resid_below15 = float(len(data_residuals[data_residuals["APE"] < 15])/expected_no*100)
+    resid_below20 = float(len(data_residuals[data_residuals["APE"] < 20])/expected_no*100)
+    resid_below25 = float(len(data_residuals[data_residuals["APE"] < 25])/expected_no*100)
+    resid_below30 = float(len(data_residuals[data_residuals["APE"] < 30])/expected_no*100)
+    resid_below35 = float(len(data_residuals[data_residuals["APE"] < 35])/expected_no*100)
+    resid_below40 = float(len(data_residuals[data_residuals["APE"] < 40])/expected_no*100)
+    resid_below45 = float(len(data_residuals[data_residuals["APE"] < 45])/expected_no*100)
+    resid_below50 = float(len(data_residuals[data_residuals["APE"] < 50])/expected_no*100)
     data_lossfunctions = {
         "SQR": SQR,
@@ -1980,7 +2337,12 @@ def modelfit(
             "resid_below10": resid_below10,
             "resid_below15": resid_below15,
             "resid_below20": resid_below20,
-            "resid_below25": resid_below25
+            "resid_below25": resid_below25,
+            "resid_below30": resid_below30,
+            "resid_below35": resid_below35,
+            "resid_below40": resid_below40,
+            "resid_below45": resid_below45,
+            "resid_below50": resid_below50,
         }
     }

huff 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl

huff 1.4.0py3-none-any.whl → 1.4.1py3-none-any.whl