PyPI - huff - Versions diffs - 1.3.5__py3-none-any.whl → 1.4.1__py3-none-any.whl - Mend

huff 1.3.5py3-none-any.whl → 1.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

huff/gistools.py +8 -3
huff/models.py +622 -75
huff/ors.py +2 -2
huff/osm.py +59 -42
huff/tests/data/Haslach_new_supermarket.cpg +1 -0
huff/tests/data/Haslach_new_supermarket.dbf +0 -0
huff/tests/data/Haslach_new_supermarket.prj +1 -0
huff/tests/data/Haslach_new_supermarket.qmd +26 -0
huff/tests/data/Haslach_new_supermarket.shp +0 -0
huff/tests/data/Haslach_new_supermarket.shx +0 -0
huff/tests/tests_huff.py +84 -32
{huff-1.3.5.dist-info → huff-1.4.1.dist-info}/METADATA +9 -3
{huff-1.3.5.dist-info → huff-1.4.1.dist-info}/RECORD +15 -9
{huff-1.3.5.dist-info → huff-1.4.1.dist-info}/WHEEL +0 -0
{huff-1.3.5.dist-info → huff-1.4.1.dist-info}/top_level.txt +0 -0

huff/models.py CHANGED Viewed

@@ -4,8 +4,8 @@
 # Author:      Thomas Wieland
 #              ORCID: 0000-0001-5168-9846
 #              mail: geowieland@googlemail.com
-# Version:     1.3.5
-# Last update: 2025-06-03 17:23
+# Version:     1.4.1
+# Last update: 2025-06-16 17:43
 # Copyright (c) 2025 Thomas Wieland
 #-----------------------------------------------------------------------
@@ -17,8 +17,10 @@ from math import sqrt
 import time
 from pandas.api.types import is_numeric_dtype
 from statsmodels.formula.api import ols
+from scipy.optimize import minimize, Bounds, LinearConstraint, NonlinearConstraint
 from shapely.geometry import Point
 from shapely import wkt
+import copy
 from huff.ors import Client, TimeDistanceMatrix, Isochrone
 from huff.gistools import overlay_difference, distance_matrix, buffers
@@ -73,10 +75,12 @@ class CustomerOrigins:
             print("Market size column         " + metadata["marketsize_col"])
         if metadata["weighting"][0]["func"] is None and metadata["weighting"][0]["param"] is None:
-            print("Transport cost weighting   not defined")
-        else:
-            print("Transport cost weighting   " + metadata["weighting"][0]["func"] + " with lambda = " + str(metadata["weighting"][0]["param"]))
+            print("Transport cost weighting    not defined")
+        elif metadata["weighting"][0]["func"] in ["power", "exponential"]:
+            print("Transport cost weighting   " + str(round(metadata["weighting"][0]["param"],3)) + " (" + metadata["weighting"][0]["func"] + ")")
+        elif metadata["weighting"][0]["func"] == "logistic":
+            print("Transport cost weighting   " + str(round(metadata["weighting"][0]["param"][0],3)) + ", " + str(round(metadata["weighting"][0]["param"][1],3)) + " (" + metadata["weighting"][0]["func"] + ")")
         print("Unique ID column           " + metadata["unique_id"])
         print("Input CRS                  " + str(metadata["crs_input"]))
@@ -117,11 +121,24 @@ class CustomerOrigins:
         metadata = self.metadata
+        if func not in ["power", "exponential", "logistic"]:
+            raise ValueError("Parameter 'func' must be 'power', 'exponential' or 'logistic'")
+        if isinstance(param_lambda, list) and func != "logistic":
+            raise ValueError("Function type "+ func + " requires one single parameter value")
+        if isinstance(param_lambda, (int, float)) and func == "logistic":
+            raise ValueError("Function type "+ func + " requires two parameters in a list")
         metadata["weighting"][0]["func"] = func
-        metadata["weighting"][0]["param"] = param_lambda
-        self.metadata = metadata
+        if isinstance(param_lambda, list):
+            metadata["weighting"][0]["param"] = [float(param_lambda[0]), float(param_lambda[1])]
+        else:
+            metadata["weighting"][0]["param"] = float(param_lambda)
+        self.metadata = metadata
         return self
     def isochrones(
@@ -190,7 +207,6 @@ class CustomerOrigins:
         return self
 class SupplyLocations:
     def __init__(
@@ -243,7 +259,7 @@ class SupplyLocations:
         if metadata["weighting"][0]["func"] is None and metadata["weighting"][0]["param"] is None:
             print("Attraction weighting  not defined")
         else:
-            print("Attraction weighting  " + metadata["weighting"][0]["func"] + " with gamma = " + str(metadata["weighting"][0]["param"]))
+            print("Attraction weighting  " + metadata["weighting"][0]["func"] + " with gamma = " + str(round(metadata["weighting"][0]["param"],3)))
         print("Unique ID column      " + metadata["unique_id"])
         print("Input CRS             " + str(metadata["crs_input"]))
@@ -284,7 +300,7 @@ class SupplyLocations:
             raise ValueError ("Attraction column is not yet defined. Use SupplyLocations.define_attraction()")
         metadata["weighting"][0]["func"] = func
-        metadata["weighting"][0]["param"] = param_gamma
+        metadata["weighting"][0]["param"] = float(param_gamma)
         self.metadata = metadata
         return self
@@ -325,7 +341,11 @@ class SupplyLocations:
         metadata = self.get_metadata()
         new_destinations_gpd_original = new_destinations.get_geodata_gpd_original()
+        new_destinations_gpd_original["j_update"] = 1
         new_destinations_gpd = new_destinations.get_geodata_gpd()
+        new_destinations_gpd["j_update"] = 1
         new_destinations_metadata = new_destinations.get_metadata()
         if list(new_destinations_gpd_original.columns) != list(geodata_gpd_original.columns):
@@ -333,14 +353,20 @@ class SupplyLocations:
         if list(new_destinations_gpd.columns) != list(geodata_gpd.columns):
             raise KeyError("Supply locations and new destinations data have different column names.")
-        geodata_gpd_original = geodata_gpd_original.append(
-            new_destinations_gpd_original,
+        geodata_gpd_original = pd.concat(
+            [
+                geodata_gpd_original,
+                new_destinations_gpd_original
+                ],
             ignore_index=True
             )
-        geodata_gpd = geodata_gpd.append(
-            new_destinations_gpd,
-            ignore_index=True
+        geodata_gpd = pd.concat(
+            [
+                geodata_gpd,
+                new_destinations_gpd
+                ],
+                ignore_index=True
             )
         metadata["no_points"] = metadata["no_points"]+new_destinations_metadata["no_points"]
@@ -423,13 +449,15 @@ class InteractionMatrix:
         self,
         interaction_matrix_df,
         customer_origins,
-        supply_locations
+        supply_locations,
+        metadata
         ):
         self.interaction_matrix_df = interaction_matrix_df
         self.customer_origins = customer_origins
         self.supply_locations = supply_locations
+        self.metadata = metadata
     def get_interaction_matrix_df(self):
         return self.interaction_matrix_df
@@ -439,35 +467,55 @@ class InteractionMatrix:
     def get_supply_locations(self):
         return self.supply_locations
+    def get_metadata(self):
+        return self.metadata
     def summary(self):
         customer_origins_metadata = self.get_customer_origins().get_metadata()
         supply_locations_metadata = self.get_supply_locations().get_metadata()
+        interaction_matrix_metadata = self.get_metadata()
         print("Interaction Matrix")
         print("----------------------------------")
-        print("Supply locations   " + str(supply_locations_metadata["no_points"]))
+        print("Supply locations    " + str(supply_locations_metadata["no_points"]))
         if supply_locations_metadata["attraction_col"][0] is None:
-            print("Attraction column  not defined")
+            print("Attraction column   not defined")
         else:
-            print("Attraction column  " + supply_locations_metadata["attraction_col"][0])
-        print("Customer origins   " + str(customer_origins_metadata["no_points"]))
+            print("Attraction column   " + supply_locations_metadata["attraction_col"][0])
+        print("Customer origins    " + str(customer_origins_metadata["no_points"]))
         if customer_origins_metadata["marketsize_col"] is None:
             print("Market size column not defined")
         else:
-            print("Market size column " + customer_origins_metadata["marketsize_col"])
+            print("Market size column  " + customer_origins_metadata["marketsize_col"])
+        if interaction_matrix_metadata != {}:
+            if "transport_costs" in interaction_matrix_metadata:
+                print("----------------------------------")
+                if interaction_matrix_metadata["transport_costs"]["network"]:
+                    print("Transport cost type Time")
+                    print("Transport cost unit " + interaction_matrix_metadata["transport_costs"]["time_unit"])
+                else:
+                    print("Transport cost type Distance")
+                    print("Transport cost unit " + interaction_matrix_metadata["transport_costs"]["distance_unit"])
         print("----------------------------------")
         print("Partial utilities")
-        print("                   Weights")
+        print("                    Weights")
         if supply_locations_metadata["weighting"][0]["func"] is None and supply_locations_metadata["weighting"][0]["param"] is None:
-            print("Attraction         not defined")
+            print("Attraction          not defined")
         else:
-            print("Attraction         " + str(supply_locations_metadata["weighting"][0]["param"]) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
+            print("Attraction          " + str(round(supply_locations_metadata["weighting"][0]["param"],3)) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
         if customer_origins_metadata["weighting"][0]["func"] is None and customer_origins_metadata["weighting"][0]["param"] is None:
-            print("Transport costs    not defined")
-        else:
-            print("Transport costs    " + str(customer_origins_metadata["weighting"][0]["param"]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
+            print("Transport costs     not defined")
+        elif customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
+            print("Transport costs    " + str(round(customer_origins_metadata["weighting"][0]["param"],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
+        elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
+            print("Transport costs    " + str(round(customer_origins_metadata["weighting"][0]["param"][0],3)) + ", " + str(round(customer_origins_metadata["weighting"][0]["param"][1],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
         print("----------------------------------")
     def transport_costs(
@@ -487,6 +535,7 @@ class InteractionMatrix:
             range_type = "distance"
         interaction_matrix_df = self.get_interaction_matrix_df()
+        interaction_matrix_metadata = self.get_metadata()
         customer_origins = self.get_customer_origins()
         customer_origins_geodata_gpd = customer_origins.get_geodata_gpd()
@@ -567,7 +616,17 @@ class InteractionMatrix:
             if distance_unit == "kilometers":
                 interaction_matrix_df["t_ij"] = interaction_matrix_df["t_ij"]/1000
+        interaction_matrix_metadata["transport_costs"] = {
+            "network": network,
+            "range_type": range_type,
+            "time_unit": time_unit,
+            "distance_unit": distance_unit,
+            "ors_server": ors_server,
+            "ors_auth": ors_auth
+            }
         self.interaction_matrix_df = interaction_matrix_df
+        self.metadata = interaction_matrix_metadata
         return self
@@ -575,6 +634,8 @@ class InteractionMatrix:
         interaction_matrix_df = self.interaction_matrix_df
+        interaction_matrix_metadata = self.get_metadata()
         if interaction_matrix_df["t_ij"].isna().all():
             raise ValueError ("Transport cost variable is not defined")
         if interaction_matrix_df["A_j"].isna().all():
@@ -588,16 +649,20 @@ class InteractionMatrix:
         customer_origins = self.customer_origins
         customer_origins_metadata = customer_origins.get_metadata()
         tc_weighting = customer_origins_metadata["weighting"][0]
         if tc_weighting["func"] == "power":
             interaction_matrix_df["t_ij_weighted"] = interaction_matrix_df["t_ij"] ** tc_weighting["param"]
         elif tc_weighting["func"] == "exponential":
             interaction_matrix_df["t_ij_weighted"] = np.exp(tc_weighting["param"] * interaction_matrix_df['t_ij'])
+        elif tc_weighting["func"] == "logistic":
+            interaction_matrix_df["t_ij_weighted"] = 1+np.exp(tc_weighting["param"][0] + tc_weighting["param"][1] * interaction_matrix_df['t_ij'])
         else:
             raise ValueError ("Transport costs weighting is not defined.")
         supply_locations = self.supply_locations
         supply_locations_metadata = supply_locations.get_metadata()
         attraction_weighting = supply_locations_metadata["weighting"][0]
         if attraction_weighting["func"] == "power":
             interaction_matrix_df["A_j_weighted"] = interaction_matrix_df["A_j"] ** attraction_weighting["param"]
         elif tc_weighting["func"] == "exponential":
@@ -607,10 +672,15 @@ class InteractionMatrix:
         interaction_matrix_df["U_ij"] = interaction_matrix_df["A_j_weighted"]*interaction_matrix_df["t_ij_weighted"]
-        interaction_matrix_df = interaction_matrix_df.drop(columns=['A_j_weighted', 't_ij_weighted'])
+        interaction_matrix_df = interaction_matrix_df.drop(columns=["A_j_weighted", "t_ij_weighted"])
-        self.interaction_matrix_df = interaction_matrix_df
+        interaction_matrix_metadata["model"] = {
+            "model_type": "Huff"
+            }
+        self.interaction_matrix_df = interaction_matrix_df
+        self.metadata = interaction_matrix_metadata
         return self
     def probabilities (self):
@@ -681,7 +751,6 @@ class InteractionMatrix:
         return huff_model
     def hansen(
         self,
         from_origins: bool = True
@@ -689,16 +758,34 @@ class InteractionMatrix:
         interaction_matrix_df = self.interaction_matrix_df
-        if interaction_matrix_df["U_ij"].isna().all():
-            self.utility()
-            interaction_matrix_df = self.interaction_matrix_df
         if from_origins:
+            if interaction_matrix_df["U_ij"].isna().all():
+                self.utility()
+                interaction_matrix_df = self.interaction_matrix_df
             hansen_df = pd.DataFrame(interaction_matrix_df.groupby("i")["U_ij"].sum()).reset_index()
             hansen_df = hansen_df.rename(columns = {"U_ij": "A_i"})
         else:
-            hansen_df = pd.DataFrame(interaction_matrix_df.groupby("j")["U_ij"].sum()).reset_index()
-            hansen_df = hansen_df.rename(columns = {"U_ij": "A_j"})
+            if "C_i" not in interaction_matrix_df.columns or interaction_matrix_df["C_i"].isna().all():
+                raise ValueError("Customer origins market size is not available")
+            customer_origins_metadata = self.customer_origins.get_metadata()
+            tc_weighting = customer_origins_metadata["weighting"][0]
+            if tc_weighting["func"] == "power":
+                interaction_matrix_df["t_ij_weighted"] = interaction_matrix_df["t_ij"] ** tc_weighting["param"]
+            elif tc_weighting["func"] == "exponential":
+                interaction_matrix_df["t_ij_weighted"] = np.exp(tc_weighting["param"] * interaction_matrix_df['t_ij'])
+            elif tc_weighting["func"] == "logistic":
+                interaction_matrix_df["t_ij_weighted"] = 1+np.exp(tc_weighting["param"][0] + tc_weighting["param"][1] * interaction_matrix_df['t_ij'])
+            else:
+                raise ValueError ("Transport costs weighting is not defined.")
+            interaction_matrix_df["U_ji"] = interaction_matrix_df["C_i"]*interaction_matrix_df["t_ij_weighted"]
+            hansen_df = pd.DataFrame(interaction_matrix_df.groupby("j")["U_ji"].sum()).reset_index()
+            hansen_df = hansen_df.rename(columns = {"U_ji": "A_j"})
         return hansen_df
@@ -735,6 +822,8 @@ class InteractionMatrix:
         interaction_matrix_df = self.get_interaction_matrix_df()
+        interaction_matrix_metadata = self.get_metadata()
         cols_t = [col + "__LCT" for col in cols]
         if "p_ij__LCT" not in interaction_matrix_df.columns:
@@ -789,7 +878,8 @@ class InteractionMatrix:
         interaction_matrix = InteractionMatrix(
             interaction_matrix_df,
             customer_origins,
-            supply_locations
+            supply_locations,
+            metadata=interaction_matrix_metadata
             )
         mci_model = MCIModel(
@@ -801,6 +891,249 @@ class InteractionMatrix:
         return mci_model
+    def huff_loglik(
+        self,
+        params
+        ):
+        if not isinstance(params, list):
+            if isinstance(params, np.ndarray):
+                params = params.tolist()
+            else:
+                raise ValueError("Parameter 'params' must be a list or np.ndarray with two or three parameter values")
+        if len(params) == 2:
+            param_gamma, param_lambda = params
+        elif len(params) == 3:
+            param_gamma, param_lambda, param_lambda2 = params
+        else:
+            raise ValueError("Parameter 'params' must be a list with two or three parameter values")
+        interaction_matrix_df = self.interaction_matrix_df
+        supply_locations = self.supply_locations
+        supply_locations_metadata = supply_locations.get_metadata()
+        customer_origins = self.customer_origins
+        customer_origins_metadata = customer_origins.get_metadata()
+        supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
+        supply_locations.metadata = supply_locations_metadata
+        if customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
+            if len(params) == 2:
+                customer_origins_metadata["weighting"][0]["param"] = float(param_lambda)
+            else:
+                raise ValueError ("Huff Model with transport cost weightig of type " + customer_origins_metadata["weighting"][0]["func"] + " must have two input parameters")
+        elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
+            if len(params) == 3:
+                customer_origins_metadata["weighting"][0]["param"] = [float(param_lambda), float(param_lambda2)]
+            else:
+                raise ValueError("Huff Model with transport cost weightig of type " + customer_origins_metadata["weighting"][0]["func"] + " must have three input parameters")
+        customer_origins.metadata = customer_origins_metadata
+        p_ij_emp = interaction_matrix_df["p_ij"]
+        interaction_matrix_copy = copy.deepcopy(self)
+        interaction_matrix_copy.utility()
+        interaction_matrix_copy.probabilities()
+        interaction_matrix_df_copy = interaction_matrix_copy.get_interaction_matrix_df()
+        p_ij = interaction_matrix_df_copy["p_ij"]
+        LL = loglik(
+            observed = p_ij_emp,
+            expected = p_ij
+            )
+        return -LL
+    def ml_fit(
+        self,
+        initial_params: list = [1.0, -2.0],
+        method: str = "L-BFGS-B",
+        bounds: list = [(0.5, 1), (-3, -1)],
+        constraints: list = [],
+        update_estimates: bool = True
+        ):
+        supply_locations = self.supply_locations
+        supply_locations_metadata = supply_locations.get_metadata()
+        customer_origins = self.customer_origins
+        customer_origins_metadata = customer_origins.get_metadata()
+        if len(initial_params) > 3 or len(initial_params) < 2:
+            raise ValueError("Parameter 'initial_params' must be a list with two or three entries")
+        if len(bounds) != len(initial_params):
+            raise ValueError("Parameter 'bounds' must have the same length as parameter 'initial_params' (" + str(len(bounds)) + ", " + str(len(initial_params)) + ")")
+        ml_result = minimize(
+            self.huff_loglik,
+            initial_params,
+            method = method,
+            bounds = bounds,
+            constraints = constraints,
+            options={'disp': 3}
+            )
+        if ml_result.success:
+            fitted_params = ml_result.x
+            if len(initial_params) == 2:
+                param_gamma = fitted_params[0]
+                param_lambda = fitted_params[1]
+                param_results = [
+                    float(param_gamma),
+                    float(param_lambda)
+                    ]
+                supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
+                customer_origins_metadata["weighting"][0]["param"] = float(param_lambda)
+            elif len (initial_params) == 3:
+                param_gamma = fitted_params[0]
+                param_lambda = fitted_params[1]
+                param_lambda2 = fitted_params[2]
+                param_results = [
+                    float(param_gamma),
+                    float(param_lambda),
+                    float(param_lambda2)
+                    ]
+                supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
+                customer_origins_metadata["weighting"][0]["param"][0] = float(param_lambda)
+                customer_origins_metadata["weighting"][0]["param"][1] = float(param_lambda2)
+            print(f"Optimization via {method} algorithm succeeded with parameters: {', '.join(str(round(par, 3)) for par in param_results)}.")
+        else:
+            param_gamma = None
+            param_lambda = None
+            supply_locations_metadata["weighting"][0]["param"] = param_gamma
+            if len(initial_params) == 3:
+                param_lambda2 = None
+                customer_origins_metadata["weighting"][0]["param"][0] = param_lambda
+                customer_origins_metadata["weighting"][0]["param"][1] = param_lambda2
+            else:
+                customer_origins_metadata["weighting"][0]["param"] = param_lambda
+            print(f"Optimiziation via {method} algorithm failed with error message: '{ml_result.message}'. See https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html for all available algorithms.")
+        self.supply_locations.metadata = supply_locations_metadata
+        self.customer_origins.metadata = customer_origins_metadata
+        if ml_result.success and update_estimates:
+            self.interaction_matrix_df["p_ij_emp"] = self.interaction_matrix_df["p_ij"]
+            self = self.utility()
+            self = self.probabilities()
+            self = self.flows()
+        return self
+    def update(self):
+        interaction_matrix_df = self.get_interaction_matrix_df()
+        interaction_matrix_metadata = self.get_metadata()
+        customer_origins = self.get_customer_origins()
+        supply_locations = self.get_supply_locations()
+        supply_locations_geodata_gpd = supply_locations.get_geodata_gpd().copy()
+        supply_locations_geodata_gpd_new = supply_locations_geodata_gpd[supply_locations_geodata_gpd["j_update"] == 1]
+        if len(supply_locations_geodata_gpd_new) < 1:
+            raise ValueError("There are no new destinations for an interaction matrix update. Use SupplyLocations.add_new_destinations()")
+        supply_locations_geodata_gpd_original = supply_locations.get_geodata_gpd_original().copy()
+        supply_locations_geodata_gpd_original_new = supply_locations_geodata_gpd_original[supply_locations_geodata_gpd_original["j_update"] == 1]
+        if len(supply_locations_geodata_gpd_original_new) < 1:
+            raise ValueError("There are no new destinations for an interaction matrix update. Use SupplyLocations.add_new_destinations()")
+        supply_locations_new = SupplyLocations(
+            geodata_gpd=supply_locations_geodata_gpd_new,
+            geodata_gpd_original=supply_locations_geodata_gpd_original_new,
+            metadata=supply_locations.metadata,
+            isochrones_gdf=supply_locations.isochrones_gdf,
+            buffers_gdf=supply_locations.buffers_gdf
+        )
+        interaction_matrix_new = create_interaction_matrix(
+            customer_origins=customer_origins,
+            supply_locations=supply_locations_new
+        )
+        interaction_matrix_new_df = interaction_matrix_new.get_interaction_matrix_df()
+        if "transport_costs" not in interaction_matrix_metadata:
+            print("New destination(s) included. No transport costs calculation because not defined in original interaction matrix.")
+            interaction_matrix_df = pd.concat(
+                [
+                interaction_matrix_df,
+                interaction_matrix_new_df
+                ],
+                ignore_index=True
+                )
+            interaction_matrix_df = interaction_matrix_df.sort_values(by = "ij")
+            self.interaction_matrix_df = interaction_matrix_df
+        else:
+            network = interaction_matrix_metadata["transport_costs"]["network"]
+            range_type = interaction_matrix_metadata["transport_costs"]["range_type"]
+            time_unit = interaction_matrix_metadata["transport_costs"]["time_unit"]
+            distance_unit = interaction_matrix_metadata["transport_costs"]["distance_unit"]
+            ors_server = interaction_matrix_metadata["transport_costs"]["ors_server"]
+            ors_auth = interaction_matrix_metadata["transport_costs"]["ors_auth"]
+            interaction_matrix_new.transport_costs(
+                network=network,
+                range_type=range_type,
+                time_unit=time_unit,
+                distance_unit=distance_unit,
+                ors_server=ors_server,
+                ors_auth=ors_auth
+            )
+            interaction_matrix_df = pd.concat(
+                [
+                    interaction_matrix_df,
+                    interaction_matrix_new_df
+                ],
+                ignore_index=True
+                )
+            interaction_matrix_df = interaction_matrix_df.sort_values(by = "ij")
+            self.interaction_matrix_df = interaction_matrix_df
+            self.utility()
+            self.probabilities()
+            self.flows()
+        return self
 class HuffModel:
@@ -857,17 +1190,52 @@ class HuffModel:
         else:
             print("Market size column " + customer_origins_metadata["marketsize_col"])
         print("----------------------------------")
         print("Partial utilities")
         print("                   Weights")
         if supply_locations_metadata["weighting"][0]["func"] is None and supply_locations_metadata["weighting"][0]["param"] is None:
             print("Attraction         not defined")
         else:
-            print("Attraction         " + str(supply_locations_metadata["weighting"][0]["param"]) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
+            print("Attraction         " + str(round(supply_locations_metadata["weighting"][0]["param"], 3)) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
         if customer_origins_metadata["weighting"][0]["func"] is None and customer_origins_metadata["weighting"][0]["param"] is None:
             print("Transport costs    not defined")
-        else:
-            print("Transport costs   " + str(customer_origins_metadata["weighting"][0]["param"]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
+        elif customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
+            print("Transport costs   " + str(round(customer_origins_metadata["weighting"][0]["param"],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
+        elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
+            print("Transport costs   " + str(round(customer_origins_metadata["weighting"][0]["param"][0],3)) + ", " + str(round(customer_origins_metadata["weighting"][0]["param"][1],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
         print("----------------------------------")
+        huff_modelfit = self.modelfit()
+        if huff_modelfit is not None:
+            print ("Goodness-of-fit for probabilities")
+            print("Sum of squared residuals       ", round(huff_modelfit[1]["SQR"], 2))
+            print("Sum of squares                 ", round(huff_modelfit[1]["SQT"], 2))
+            print("R-squared                      ", round(huff_modelfit[1]["Rsq"], 2))
+            print("Mean squared error             ", round(huff_modelfit[1]["MSE"], 2))
+            print("Root mean squared error        ", round(huff_modelfit[1]["RMSE"], 2))
+            print("Mean absolute error            ", round(huff_modelfit[1]["MAE"], 2))
+            print("Mean absolute percentage error ", round(huff_modelfit[1]["MAPE"], 2))
+            print("Absolute percentage errors")
+            APE_list = [
+                ["< 5 % ", round(huff_modelfit[1]["APE"]["resid_below5"], 2), "  < 30 % ", round(huff_modelfit[1]["APE"]["resid_below30"], 2)],
+                ["< 10 % ", round(huff_modelfit[1]["APE"]["resid_below10"], 2), "  < 35 % ", round(huff_modelfit[1]["APE"]["resid_below35"], 2)],
+                ["< 15 % ", round(huff_modelfit[1]["APE"]["resid_below15"], 2), "  < 40 % ", round(huff_modelfit[1]["APE"]["resid_below40"], 2)],
+                ["< 20 % ", round(huff_modelfit[1]["APE"]["resid_below20"], 2), "  < 45 % ", round(huff_modelfit[1]["APE"]["resid_below45"], 2)],
+                ["< 25% ", round(huff_modelfit[1]["APE"]["resid_below25"], 2), "  < 50 % ", round(huff_modelfit[1]["APE"]["resid_below50"], 2)]
+                ]
+            APE_df = pd.DataFrame(
+                APE_list,
+                columns=["Resid.", "%", "Resid.", "%"]
+                )
+            print(APE_df.to_string(index=False))
+            print("----------------------------------")
     def mci_fit(
         self,
@@ -876,15 +1244,15 @@ class HuffModel:
         ):
         interaction_matrix = self.interaction_matrix
+        interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
+        interaction_matrix_metadata = interaction_matrix.get_metadata()
         supply_locations = interaction_matrix.get_supply_locations()
         supply_locations_metadata = supply_locations.get_metadata()
         customer_origins = interaction_matrix.get_customer_origins()
         customer_origins_metadata = customer_origins.get_metadata()
-        interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
         cols_t = [col + "__LCT" for col in cols]
         if "p_ij__LCT" not in interaction_matrix_df.columns:
@@ -937,7 +1305,8 @@ class HuffModel:
         interaction_matrix = InteractionMatrix(
             interaction_matrix_df,
             customer_origins,
-            supply_locations
+            supply_locations,
+            metadata=interaction_matrix_metadata
             )
         mci_model = MCIModel(
@@ -949,7 +1318,39 @@ class HuffModel:
         return mci_model
+    def update(self):
+        self.interaction_matrix = self.interaction_matrix.update()
+        self.market_areas_df = self.interaction_matrix.marketareas().get_market_areas_df()
+        return self
+    def modelfit(self):
+        interaction_matrix = self.interaction_matrix
+        interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
+        if ("p_ij" in interaction_matrix_df.columns and "p_ij_emp" in interaction_matrix_df.columns):
+            try:
+                huff_modelfit = modelfit(
+                    interaction_matrix_df["p_ij_emp"],
+                    interaction_matrix_df["p_ij"]
+                )
+                return huff_modelfit
+            except:
+                print("Goodness-of-fit metrics could not be calculated due to NaN values.")
+                return None
+        else:
+            return None
 class MCIModel:
     def __init__(
@@ -1005,12 +1406,19 @@ class MCIModel:
         if ("p_ij" in interaction_matrix_df.columns and "p_ij_emp" in interaction_matrix_df.columns):
-            mci_modelfit = modelfit(
-                interaction_matrix_df["p_ij_emp"],
-                interaction_matrix_df["p_ij"]
-            )
+            try:
-            return mci_modelfit
+                mci_modelfit = modelfit(
+                    interaction_matrix_df["p_ij_emp"],
+                    interaction_matrix_df["p_ij"]
+                )
+                return mci_modelfit
+            except:
+                print("Goodness-of-fit metrics could not be calculated due to NaN values.")
+                return None
         else:
@@ -1056,7 +1464,7 @@ class MCIModel:
         mci_modelfit = self.modelfit()
         if mci_modelfit is not None:
-            print ("Goodness-of-fit with respect to probabilities")
+            print ("Goodness-of-fit for probabilities")
             print("Sum of squared residuals       ", round(mci_modelfit[1]["SQR"], 2))
             print("Sum of squares                 ", round(mci_modelfit[1]["SQT"], 2))
@@ -1065,12 +1473,20 @@ class MCIModel:
             print("Root mean squared error        ", round(mci_modelfit[1]["RMSE"], 2))
             print("Mean absolute error            ", round(mci_modelfit[1]["MAE"], 2))
             print("Mean absolute percentage error ", round(mci_modelfit[1]["MAPE"], 2))
             print("Absolute percentage errors")
-            print("< 5 %                          ", round(mci_modelfit[1]["APE"]["resid_below5"], 2))
-            print("< 10 %                         ", round(mci_modelfit[1]["APE"]["resid_below10"], 2))
-            print("< 15 %                         ", round(mci_modelfit[1]["APE"]["resid_below15"], 2))
-            print("< 20 %                         ", round(mci_modelfit[1]["APE"]["resid_below20"], 2))
-            print("< 25 %                         ", round(mci_modelfit[1]["APE"]["resid_below25"], 2))
+            APE_list = [
+                ["< 5 % ", round(mci_modelfit[1]["APE"]["resid_below5"], 2), "  < 30 % ", round(mci_modelfit[1]["APE"]["resid_below30"], 2)],
+                ["< 10 % ", round(mci_modelfit[1]["APE"]["resid_below10"], 2), "  < 35 % ", round(mci_modelfit[1]["APE"]["resid_below35"], 2)],
+                ["< 15 % ", round(mci_modelfit[1]["APE"]["resid_below15"], 2), "  < 40 % ", round(mci_modelfit[1]["APE"]["resid_below40"], 2)],
+                ["< 20 % ", round(mci_modelfit[1]["APE"]["resid_below20"], 2), "  < 45 % ", round(mci_modelfit[1]["APE"]["resid_below45"], 2)],
+                ["< 25% ", round(mci_modelfit[1]["APE"]["resid_below25"], 2), "  < 50 % ", round(mci_modelfit[1]["APE"]["resid_below50"], 2)]
+                ]
+            APE_df = pd.DataFrame(
+                APE_list,
+                columns=["Resid.", "%", "Resid.", "%"]
+                )
+            print(APE_df.to_string(index=False))
             print("--------------------------------------------")
@@ -1081,6 +1497,7 @@ class MCIModel:
         interaction_matrix = self.interaction_matrix
         interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
+        interaction_matrix_metadata = interaction_matrix.get_metadata()
         if interaction_matrix_df["t_ij"].isna().all():
             raise ValueError ("Transport cost variable is not defined")
@@ -1119,10 +1536,16 @@ class MCIModel:
         if transformation == "ILCT":
             interaction_matrix_df["U_ij"] = np.exp(interaction_matrix_df["U_ij"])
+        interaction_matrix_metadata["model"] = {
+            "model_type": "MCI",
+            "transformation": transformation
+            }
         interaction_matrix = InteractionMatrix(
             interaction_matrix_df,
             customer_origins,
-            supply_locations
+            supply_locations,
+            metadata=interaction_matrix_metadata
             )
         self.interaction_matrix = interaction_matrix
@@ -1289,7 +1712,7 @@ def load_geodata (
     geodata_gpd = geodata_gpd_original.to_crs(crs_output)
     geodata_gpd = geodata_gpd[[unique_id, "geometry"]]
     metadata = {
         "location_type": location_type,
         "unique_id": unique_id,
@@ -1307,14 +1730,20 @@ def load_geodata (
         }
     if location_type == "origins":
         geodata_object = CustomerOrigins(
             geodata_gpd,
             geodata_gpd_original,
             metadata,
             None,
             None
-            )
+            )
     elif location_type == "destinations":
+        geodata_gpd["j_update"] = 0
+        geodata_gpd_original["j_update"] = 0
         geodata_object = SupplyLocations(
             geodata_gpd,
             geodata_gpd_original,
@@ -1393,10 +1822,13 @@ def create_interaction_matrix(
     interaction_matrix_df["p_ij"] = None
     interaction_matrix_df["E_ij"] = None
+    metadata = {}
     interaction_matrix = InteractionMatrix(
         interaction_matrix_df,
         customer_origins,
-        supply_locations
+        supply_locations,
+        metadata
         )
     return interaction_matrix
@@ -1407,6 +1839,7 @@ def load_interaction_matrix(
     supply_locations_col: str,
     attraction_col: list,
     transport_costs_col: str,
+    flows_col: str = None,
     probabilities_col: str = None,
     market_size_col: str = None,
     customer_origins_coords_col = None,
@@ -1444,6 +1877,8 @@ def load_interaction_matrix(
         raise KeyError ("Column " + supply_locations_col + " not in data")
     cols_check = attraction_col + [transport_costs_col]
+    if flows_col is not None:
+        cols_check = cols_check + [flows_col]
     if probabilities_col is not None:
         cols_check = cols_check + [probabilities_col]
     if market_size_col is not None:
@@ -1600,6 +2035,13 @@ def load_interaction_matrix(
         }
         )
+    if flows_col is not None:
+        interaction_matrix_df = interaction_matrix_df.rename(
+            columns = {
+                flows_col: "E_ij"
+            }
+            )
     if probabilities_col is not None:
         interaction_matrix_df = interaction_matrix_df.rename(
             columns = {
@@ -1613,15 +2055,68 @@ def load_interaction_matrix(
                 market_size_col: "C_i"
             }
             )
+    metadata = {}
     interaction_matrix = InteractionMatrix(
         interaction_matrix_df=interaction_matrix_df,
         customer_origins=customer_origins,
-        supply_locations=supply_locations
+        supply_locations=supply_locations,
+        metadata=metadata
         )
     return interaction_matrix
+def market_shares(
+    df: pd.DataFrame,
+    turnover_col: str,
+    ref_col: str = None,
+    marketshares_col: str = "p_ij"
+    ):
+    check_vars(
+        df = df,
+        cols = [turnover_col]
+        )
+    if ref_col is not None:
+        if ref_col not in df.columns:
+            raise KeyError(f"Column '{ref_col}' not in dataframe.")
+        ms_refcol = pd.DataFrame(df.groupby(ref_col)[turnover_col].sum())
+        ms_refcol = ms_refcol.rename(columns = {turnover_col: "total"})
+        ms_refcol = ms_refcol.reset_index()
+        df = df.merge(
+            ms_refcol,
+            how = "left",
+            left_on = ref_col,
+            right_on= ref_col
+        )
+    else:
+        ms_norefcol = pd.DataFrame([df[turnover_col].sum()], columns=["total"])
+        ms_norefcol = ms_norefcol.reset_index()
+        df["key_temp"] = 1
+        ms_norefcol["key_temp"] = 1
+        df = pd.merge(
+            df,
+            ms_norefcol,
+            on="key_temp"
+            ).drop(
+                "key_temp",
+                axis=1
+                )
+    df[marketshares_col] = df[turnover_col]/df["total"]
+    df = df.drop(columns="total")
+    return df
 def log_centering_transformation(
     df: pd.DataFrame,
     ref_col: str,
@@ -1652,12 +2147,18 @@ def log_centering_transformation(
             print ("Column " + str(var) + " is a dummy variable and requires/allows no log-centering transformation")
             continue
+        if (df[var] <= 0).any():
+            df[var+suffix] = float("nan")
+            print ("Column " + str(var) + " contains values <= 0. No log-centering transformation possible.")
+            continue
         var_t = df.groupby(ref_col)[var].apply(lct)
         var_t = var_t.reset_index()
         df[var+suffix] = var_t[var]
     return df
 def get_isochrones(
     geodata_gpd: gp.GeoDataFrame,
     unique_id_col: str,
@@ -1748,7 +2249,8 @@ def get_isochrones(
 def modelfit(
     observed,
-    expected
+    expected,
+    remove_nan: bool = True
     ):
     observed_no = len(observed)
@@ -1763,7 +2265,28 @@ def modelfit(
     if not isinstance(expected, np.number):
         if not is_numeric_dtype(expected):
             raise ValueError("Expected column is not numeric")
+    if remove_nan:
+        obs_exp = pd.DataFrame(
+            {
+                "observed": observed,
+                "expected": expected
+                }
+            )
+        obs_exp_clean = obs_exp.dropna(subset=["observed", "expected"])
+        observed = obs_exp_clean["observed"].to_numpy()
+        expected = obs_exp_clean["expected"].to_numpy()
+    else:
+        if np.isnan(observed).any():
+            raise ValueError("Vector with observed data contains NaN")
+        if np.isnan(expected).any():
+            raise ValueError("Vector with expected data contains NaN")
     residuals = np.array(observed)-np.array(expected)
     residuals_sq = residuals**2
     residuals_abs = abs(residuals)
@@ -1780,7 +2303,7 @@ def modelfit(
         })
     SQR = float(np.sum(residuals_sq))
-    SAR = float(np.sum(residuals_abs))
+    SAR = float(np.sum(residuals_abs))
     observed_mean = float(np.sum(observed)/observed_no)
     SQT = float(np.sum((observed-observed_mean)**2))
     Rsq = float(1-(SQR/SQT))
@@ -1789,11 +2312,16 @@ def modelfit(
     MAE = float(SAR/observed_no)
     MAPE = float(np.mean(APE))
-    resid_below5 = float(len([APE < 5])/expected_no*100)
-    resid_below10 = float(len([APE < 10])/expected_no*100)
-    resid_below15 = float(len([APE < 15])/expected_no*100)
-    resid_below20 = float(len([APE < 20])/expected_no*100)
-    resid_below25 = float(len([APE < 25])/expected_no*100)
+    resid_below5 = float(len(data_residuals[data_residuals["APE"] < 5])/expected_no*100)
+    resid_below10 = float(len(data_residuals[data_residuals["APE"] < 10])/expected_no*100)
+    resid_below15 = float(len(data_residuals[data_residuals["APE"] < 15])/expected_no*100)
+    resid_below20 = float(len(data_residuals[data_residuals["APE"] < 20])/expected_no*100)
+    resid_below25 = float(len(data_residuals[data_residuals["APE"] < 25])/expected_no*100)
+    resid_below30 = float(len(data_residuals[data_residuals["APE"] < 30])/expected_no*100)
+    resid_below35 = float(len(data_residuals[data_residuals["APE"] < 35])/expected_no*100)
+    resid_below40 = float(len(data_residuals[data_residuals["APE"] < 40])/expected_no*100)
+    resid_below45 = float(len(data_residuals[data_residuals["APE"] < 45])/expected_no*100)
+    resid_below50 = float(len(data_residuals[data_residuals["APE"] < 50])/expected_no*100)
     data_lossfunctions = {
         "SQR": SQR,
@@ -1809,7 +2337,12 @@ def modelfit(
             "resid_below10": resid_below10,
             "resid_below15": resid_below15,
             "resid_below20": resid_below20,
-            "resid_below25": resid_below25
+            "resid_below25": resid_below25,
+            "resid_below30": resid_below30,
+            "resid_below35": resid_below35,
+            "resid_below40": resid_below40,
+            "resid_below45": resid_below45,
+            "resid_below50": resid_below50,
         }
     }
@@ -1820,6 +2353,20 @@ def modelfit(
     return modelfit_results
+def loglik(
+    observed,
+    expected
+    ):
+        model_fit = modelfit(
+            observed,
+            expected
+            )
+        residuals_sq = model_fit[0]["residuals_sq"]
+        LL = np.sum(np.log(residuals_sq))
+        return -LL
 def check_vars(
     df: pd.DataFrame,

huff 1.3.5__py3-none-any.whl → 1.4.1__py3-none-any.whl

huff 1.3.5py3-none-any.whl → 1.4.1py3-none-any.whl