PyPI - huff - Versions diffs - 1.1.2__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend

huff 1.1.2py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

huff/gistools.py +123 -3
huff/models.py +925 -134
huff/ors.py +16 -16
huff/osm.py +207 -0
huff/tests/data/Wieland2015.xlsx +0 -0
huff/tests/tests_huff.py +146 -41
{huff-1.1.2.dist-info → huff-1.3.0.dist-info}/METADATA +31 -11
{huff-1.1.2.dist-info → huff-1.3.0.dist-info}/RECORD +10 -8
{huff-1.1.2.dist-info → huff-1.3.0.dist-info}/WHEEL +0 -0
{huff-1.1.2.dist-info → huff-1.3.0.dist-info}/top_level.txt +0 -0

huff/models.py CHANGED Viewed

@@ -4,8 +4,8 @@
 # Author:      Thomas Wieland
 #              ORCID: 0000-0001-5168-9846
 #              mail: geowieland@googlemail.com
-# Version:     1.1.2
-# Last update: 2025-05-03 13:29
+# Version:     1.3.0
+# Last update: 2025-05-22 05:45
 # Copyright (c) 2025 Thomas Wieland
 #-----------------------------------------------------------------------
@@ -14,8 +14,11 @@ import pandas as pd
 import geopandas as gp
 import numpy as np
 import time
+from statsmodels.formula.api import ols
+from shapely.geometry import Point
+from shapely import wkt
 from huff.ors import Client, TimeDistanceMatrix, Isochrone
-from huff.gistools import overlay_difference, distance_matrix
+from huff.gistools import overlay_difference, distance_matrix, buffers
 class CustomerOrigins:
@@ -24,12 +27,16 @@ class CustomerOrigins:
         self,
         geodata_gpd,
         geodata_gpd_original,
-        metadata
+        metadata,
+        isochrones_gdf,
+        buffers_gdf
         ):
         self.geodata_gpd = geodata_gpd
         self.geodata_gpd_original = geodata_gpd_original
         self.metadata = metadata
+        self.isochrones_gdf = isochrones_gdf
+        self.buffers_gdf = buffers_gdf
     def get_geodata_gpd(self):
@@ -42,6 +49,14 @@ class CustomerOrigins:
     def get_metadata(self):
         return self.metadata
+    def get_isochrones(self):
+        return self.isochrones_gdf
+    def get_buffers(self):
+        return self.buffers_gdf
     def summary(self):
@@ -63,6 +78,16 @@ class CustomerOrigins:
         print("Unique ID column           " + metadata["unique_id"])
         print("Input CRS                  " + str(metadata["crs_input"]))
+        if self.isochrones_gdf is None:
+            print("Including isochrones       NO")
+        else:
+            print("Including isochrones       YES")
+        if self.buffers_gdf is None:
+            print("Including buffers          NO")
+        else:
+            print("Including buffers          YES")
         return metadata
     def define_marketsize(
@@ -97,27 +122,109 @@ class CustomerOrigins:
         return self
+    def isochrones(
+        self,
+        segments_minutes: list = [5, 10, 15],
+        range_type: str = "time",
+        intersections: str = "true",
+        profile: str = "driving-car",
+        donut: bool = True,
+        ors_server: str = "https://api.openrouteservice.org/v2/",
+        ors_auth: str = None,
+        timeout: int = 10,
+        delay: int = 1,
+        save_output: bool = True,
+        output_filepath: str = "customer_origins_isochrones.shp",
+        output_crs: str = "EPSG:4326"
+        ):
+        geodata_gpd = self.get_geodata_gpd()
+        metadata = self.get_metadata()
+        isochrones_gdf = get_isochrones(
+            geodata_gpd = geodata_gpd,
+            unique_id_col = metadata["unique_id"],
+            segments_minutes = segments_minutes,
+            range_type = range_type,
+            intersections = intersections,
+            profile = profile,
+            donut = donut,
+            ors_server = ors_server,
+            ors_auth = ors_auth,
+            timeout = timeout,
+            delay = delay,
+            save_output = save_output,
+            output_filepath = output_filepath,
+            output_crs = output_crs
+            )
+        self.isochrones_gdf = isochrones_gdf
+        return self
+    def buffers(
+        self,
+        segments_distance: list = [500, 1000],
+        donut: bool = True,
+        save_output: bool = True,
+        output_filepath: str = "customer_origins_buffers.shp",
+        output_crs: str = "EPSG:4326"
+        ):
+        geodata_gpd_original = self.get_geodata_gpd_original()
+        metadata = self.metadata
+        buffers_gdf = buffers(
+            point_gdf = geodata_gpd_original,
+            unique_id_col = metadata["unique_id"],
+            distances = segments_distance,
+            donut = donut,
+            save_output = save_output,
+            output_filepath = output_filepath,
+            output_crs = output_crs
+            )
+        self.buffers_gdf = buffers_gdf
+        return self
 class SupplyLocations:
     def __init__(
         self,
         geodata_gpd,
         geodata_gpd_original,
-        metadata
+        metadata,
+        isochrones_gdf,
+        buffers_gdf
         ):
         self.geodata_gpd = geodata_gpd
         self.geodata_gpd_original = geodata_gpd_original
         self.metadata = metadata
+        self.isochrones_gdf = isochrones_gdf
+        self.buffers_gdf = buffers_gdf
     def get_geodata_gpd(self):
         return self.geodata_gpd
     def get_geodata_gpd_original(self):
         return self.geodata_gpd_original
     def get_metadata(self):
         return self.metadata
+    def get_isochrones_gdf(self):
+        return self.isochrones_gdf
+    def get_buffers_gdf(self):
+        return self.buffers_gdf
     def summary(self):
@@ -139,6 +246,11 @@ class SupplyLocations:
         print("Unique ID column      " + metadata["unique_id"])
         print("Input CRS             " + str(metadata["crs_input"]))
+        if self.isochrones_gdf is None:
+            print("Including isochrones  NO")
+        else:
+            print("Including isochrones  YES")
         return metadata
     def define_attraction(
@@ -239,86 +351,70 @@ class SupplyLocations:
     def isochrones(
         self,
-        segments: list = [900, 600, 300],
+        segments_minutes: list = [5, 10, 15],
         range_type: str = "time",
         intersections: str = "true",
         profile: str = "driving-car",
         donut: bool = True,
         ors_server: str = "https://api.openrouteservice.org/v2/",
         ors_auth: str = None,
-        timeout = 10,
-        delay = 1,
+        timeout: int = 10,
+        delay: int = 1,
         save_output: bool = True,
-        output_filepath: str = "isochrones.shp",
+        output_filepath: str = "supply_locations_isochrones.shp",
         output_crs: str = "EPSG:4326"
         ):
         geodata_gpd = self.get_geodata_gpd()
         metadata = self.get_metadata()
-        coords = [(point.x, point.y) for point in geodata_gpd.geometry]
-        unique_id_col = metadata["unique_id"]
-        unique_id_values = geodata_gpd[unique_id_col].values
-        ors_client = Client(
-            server = ors_server,
-            auth = ors_auth
+        isochrones_gdf = get_isochrones(
+            geodata_gpd = geodata_gpd,
+            unique_id_col = metadata["unique_id"],
+            segments_minutes = segments_minutes,
+            range_type = range_type,
+            intersections = intersections,
+            profile = profile,
+            donut = donut,
+            ors_server = ors_server,
+            ors_auth = ors_auth,
+            timeout = timeout,
+            delay = delay,
+            save_output = save_output,
+            output_filepath = output_filepath,
+            output_crs = output_crs
             )
-        isochrones_gdf = gp.GeoDataFrame(columns=[unique_id_col, "geometry"])
-        i = 0
-        for x, y in coords:
-            isochrone_output = ors_client.isochrone(
-                locations = [[x, y]],
-                segments = segments,
-                range_type = range_type,
-                intersections = intersections,
-                profile = profile,
-                timeout = timeout,
-                save_output = False,
-                output_crs = output_crs
-                )
-            if isochrone_output.status_code != 200:
-                continue
-            isochrone_gdf = isochrone_output.get_isochrones_gdf()
-            if donut:
-                isochrone_gdf = overlay_difference(
-                    polygon_gdf = isochrone_gdf,
-                    sort_col = "segment"
-                    )
-            time.sleep(delay)
-            isochrone_gdf[unique_id_col] = unique_id_values[i]
-            isochrones_gdf = pd.concat(
-                [
-                    isochrones_gdf,
-                    isochrone_gdf
-                    ],
-                ignore_index=True
-                )
-            i = i+1
+        self.isochrones_gdf = isochrones_gdf
-        isochrones_gdf.set_crs(
-            output_crs,
-            allow_override=True,
-            inplace=True
-            )
-        if save_output:
+        return self
-            isochrones_gdf.to_file(filename = output_filepath)
+    def buffers(
+        self,
+        segments_distance: list = [500, 1000],
+        donut: bool = True,
+        save_output: bool = True,
+        output_filepath: str = "supply_locations_buffers.shp",
+        output_crs: str = "EPSG:4326"
+        ):
-        return isochrones_gdf
+        geodata_gpd_original = self.get_geodata_gpd_original()
+        metadata = self.metadata
+        buffers_gdf = buffers(
+            point_gdf = geodata_gpd_original,
+            unique_id_col = metadata["unique_id"],
+            distances = segments_distance,
+            donut = donut,
+            save_output = save_output,
+            output_filepath = output_filepath,
+            output_crs = output_crs
+            )
+        self.buffers_gdf = buffers_gdf
+        return self
 class InteractionMatrix:
@@ -360,15 +456,16 @@ class InteractionMatrix:
         else:
             print("Market size column " + customer_origins_metadata["marketsize_col"])
         print("----------------------------------")
-        print("Weights")
+        print("Partial utilities")
+        print("                   Weights")
         if supply_locations_metadata["weighting"][0]["func"] is None and supply_locations_metadata["weighting"][0]["param"] is None:
-            print("Gamma              not defined")
+            print("Attraction         not defined")
         else:
-            print("Gamma              " + str(supply_locations_metadata["weighting"][0]["param"]) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
+            print("Attraction         " + str(supply_locations_metadata["weighting"][0]["param"]) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
         if customer_origins_metadata["weighting"][0]["func"] is None and customer_origins_metadata["weighting"][0]["param"] is None:
-            print("Lambda             not defined")
+            print("Transport costs    not defined")
         else:
-            print("Lambda            " + str(customer_origins_metadata["weighting"][0]["param"]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
+            print("Transport costs    " + str(customer_origins_metadata["weighting"][0]["param"]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
         print("----------------------------------")
     def transport_costs(
@@ -431,14 +528,10 @@ class InteractionMatrix:
             range_type = transport_costs_matrix_config["range_type"]
             transport_costs_matrix["source"] = transport_costs_matrix["source"].astype(int)
-            transport_costs_matrix["source"] = transport_costs_matrix["source"].map(
-                dict(enumerate(customer_origins_ids))
-                )
+            transport_costs_matrix["source"] = transport_costs_matrix["source"].map(dict(enumerate(customer_origins_ids)))
             transport_costs_matrix["destination"] = transport_costs_matrix["destination"].astype(int)
-            transport_costs_matrix["destination"] = transport_costs_matrix["destination"].map(
-                dict(enumerate(supply_locations_ids))
-                )
+            transport_costs_matrix["destination"] = transport_costs_matrix["destination"].map(dict(enumerate(supply_locations_ids)))
             transport_costs_matrix["source_destination"] = transport_costs_matrix["source"].astype(str)+"_"+transport_costs_matrix["destination"].astype(str)
             transport_costs_matrix = transport_costs_matrix[["source_destination", range_type]]
@@ -506,11 +599,11 @@ class InteractionMatrix:
         if attraction_weighting["func"] == "power":
             interaction_matrix_df["A_j_weighted"] = interaction_matrix_df["A_j"] ** attraction_weighting["param"]
         elif tc_weighting["func"] == "exponential":
-            interaction_matrix_df["A_j_weighted"] = np.exp(attraction_weighting["param"] * interaction_matrix_df['A_j'])
+            interaction_matrix_df["A_j_weighted"] = np.exp(attraction_weighting["param"] * interaction_matrix_df["A_j"])
         else:
             raise ValueError ("Attraction weighting is not defined.")
-        interaction_matrix_df["U_ij"] = interaction_matrix_df["A_j_weighted"]/interaction_matrix_df["t_ij_weighted"]
+        interaction_matrix_df["U_ij"] = interaction_matrix_df["A_j_weighted"]*interaction_matrix_df["t_ij_weighted"]
         interaction_matrix_df = interaction_matrix_df.drop(columns=['A_j_weighted', 't_ij_weighted'])
@@ -591,13 +684,11 @@ class InteractionMatrix:
         cols: list = ["A_j", "t_ij"]
         ):
-        """ MCI model log-centering transformation """
         cols = cols + ["p_ij"]
         interaction_matrix_df = self.interaction_matrix_df
-        interaction_matrix_df = mci_transformation(
+        interaction_matrix_df = log_centering_transformation(
             df = interaction_matrix_df,
             ref_col = "i",
             cols = cols
@@ -607,6 +698,87 @@ class InteractionMatrix:
         return self
+    def mci_fit(
+        self,
+        cols: list = ["A_j", "t_ij"],
+        alpha = 0.05
+        ):
+        supply_locations = self.get_supply_locations()
+        supply_locations_metadata = supply_locations.get_metadata()
+        customer_origins = self.get_customer_origins()
+        customer_origins_metadata = customer_origins.get_metadata()
+        interaction_matrix_df = self.get_interaction_matrix_df()
+        cols_t = [col + "__LCT" for col in cols]
+        if "p_ij__LCT" not in interaction_matrix_df.columns:
+            interaction_matrix = self.mci_transformation(
+                cols = cols
+                )
+            interaction_matrix_df = self.get_interaction_matrix_df()
+        mci_formula = f'p_ij__LCT ~ {" + ".join(cols_t)} -1'
+        mci_ols_model = ols(mci_formula, data = interaction_matrix_df).fit()
+        mci_ols_coefficients = mci_ols_model.params
+        mci_ols_coef_standarderrors = mci_ols_model.bse
+        mci_ols_coef_t = mci_ols_model.tvalues
+        mci_ols_coef_p = mci_ols_model.pvalues
+        mci_ols_coef_ci = mci_ols_model.conf_int(alpha = alpha)
+        coefs = {}
+        for i, col in enumerate(cols_t):
+            coefs[i] = {
+                "Coefficient": col[:-5],
+                "Estimate": float(mci_ols_coefficients[col]),
+                "SE": float(mci_ols_coef_standarderrors[col]),
+                "t": float(mci_ols_coef_t[col]),
+                "p": float(mci_ols_coef_p[col]),
+                "CI_lower": float(mci_ols_coef_ci.loc[col, 0]),
+                "CI_upper": float(mci_ols_coef_ci.loc[col, 1]),
+                }
+        customer_origins_metadata["weighting"][0] = {
+            "func": "power",
+            "param": mci_ols_coefficients["t_ij__LCT"]
+            }
+        coefs2 = coefs.copy()
+        for key, value in list(coefs2.items()):
+            if value["Coefficient"] == "t_ij":
+                del coefs2[key]
+        for key, value in coefs2.items():
+            supply_locations_metadata["weighting"][key] = {
+                "func": "power",
+                "param": value["Estimate"]
+            }
+            supply_locations_metadata["attraction_col"].append(None)
+            supply_locations_metadata["attraction_col"][key] = value["Coefficient"]
+        customer_origins.metadata = customer_origins_metadata
+        supply_locations.metadata = supply_locations_metadata
+        interaction_matrix = InteractionMatrix(
+            interaction_matrix_df,
+            customer_origins,
+            supply_locations
+            )
+        mci_model = MCIModel(
+            interaction_matrix,
+            coefs,
+            mci_ols_model,
+            None
+            )
+        return mci_model
 class HuffModel:
     def __init__(
@@ -662,19 +834,305 @@ class HuffModel:
         else:
             print("Market size column " + customer_origins_metadata["marketsize_col"])
         print("----------------------------------")
-        print("Weights")
+        print("Partial utilities")
+        print("                   Weights")
         if supply_locations_metadata["weighting"][0]["func"] is None and supply_locations_metadata["weighting"][0]["param"] is None:
-            print("Gamma              not defined")
+            print("Attraction         not defined")
         else:
-            print("Gamma              " + str(supply_locations_metadata["weighting"][0]["param"]) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
+            print("Attraction         " + str(supply_locations_metadata["weighting"][0]["param"]) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
         if customer_origins_metadata["weighting"][0]["func"] is None and customer_origins_metadata["weighting"][0]["param"] is None:
-            print("Lambda             not defined")
+            print("Transport costs    not defined")
         else:
-            print("Lambda            " + str(customer_origins_metadata["weighting"][0]["param"]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
+            print("Transport costs   " + str(customer_origins_metadata["weighting"][0]["param"]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
         print("----------------------------------")
+    def mci_fit(
+        self,
+        cols: list = ["A_j", "t_ij"],
+        alpha = 0.05
+        ):
+        interaction_matrix = self.interaction_matrix
+        supply_locations = interaction_matrix.get_supply_locations()
+        supply_locations_metadata = supply_locations.get_metadata()
+        customer_origins = interaction_matrix.get_customer_origins()
+        customer_origins_metadata = customer_origins.get_metadata()
+        interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
+        cols_t = [col + "__LCT" for col in cols]
+        if "p_ij__LCT" not in interaction_matrix_df.columns:
+            interaction_matrix = interaction_matrix.mci_transformation(
+                cols = cols
+                )
+            interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
+        mci_formula = f'p_ij__LCT ~ {" + ".join(cols_t)} -1'
+        mci_ols_model = ols(mci_formula, data = interaction_matrix_df).fit()
+        mci_ols_coefficients = mci_ols_model.params
+        mci_ols_coef_standarderrors = mci_ols_model.bse
+        mci_ols_coef_t = mci_ols_model.tvalues
+        mci_ols_coef_p = mci_ols_model.pvalues
+        mci_ols_coef_ci = mci_ols_model.conf_int(alpha = alpha)
+        coefs = {}
+        for i, col in enumerate(cols_t):
+            coefs[i] = {
+                "Coefficient": col[:-5],
+                "Estimate": float(mci_ols_coefficients[col]),
+                "SE": float(mci_ols_coef_standarderrors[col]),
+                "t": float(mci_ols_coef_t[col]),
+                "p": float(mci_ols_coef_p[col]),
+                "CI_lower": float(mci_ols_coef_ci.loc[col, 0]),
+                "CI_upper": float(mci_ols_coef_ci.loc[col, 1]),
+                }
+        customer_origins_metadata["weighting"][0] = {
+            "func": "power",
+            "param": mci_ols_coefficients["t_ij__LCT"]
+            }
+        coefs2 = coefs.copy()
+        for key, value in list(coefs2.items()):
+            if value["Coefficient"] == "t_ij":
+                del coefs2[key]
+        for key, value in coefs2.items():
+            supply_locations_metadata["weighting"][(key)] = {
+                "func": "power",
+                "param": value["Estimate"]
+            }
+            supply_locations_metadata["attraction_col"][key] = value["Coefficient"]
+        customer_origins.metadata = customer_origins_metadata
+        supply_locations.metadata = supply_locations_metadata
+        interaction_matrix = InteractionMatrix(
+            interaction_matrix_df,
+            customer_origins,
+            supply_locations
+            )
+        mci_model = MCIModel(
+            interaction_matrix,
+            coefs,
+            mci_ols_model,
+            None
+            )
+        return mci_model
+class MCIModel:
+    def __init__(
+        self,
+        interaction_matrix: InteractionMatrix,
+        coefs: dict,
+        mci_ols_model,
+        market_areas_df
+        ):
+        self.interaction_matrix = interaction_matrix
+        self.coefs = coefs
+        self.mci_ols_model = mci_ols_model
+        self.market_areas_df = market_areas_df
+    def get_interaction_matrix_df(self):
+        interaction_matrix = self.interaction_matrix
+        interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
+        return interaction_matrix_df
+    def get_supply_locations(self):
+        interaction_matrix = self.interaction_matrix
+        supply_locations = interaction_matrix.get_supply_locations()
+        return supply_locations
+    def get_customer_origins(self):
+        interaction_matrix = self.interaction_matrix
+        customer_origins = interaction_matrix.get_customer_origins()
+        return customer_origins
+    def get_mci_ols_model(self):
+        return self.mci_ols_model
+    def get_coefs_dict(self):
+        return self.coefs
+    def get_market_areas_df(self):
+        return self.market_areas_df
+    def summary(self):
+        interaction_matrix = self.interaction_matrix
+        coefs = self.coefs
+        customer_origins_metadata = interaction_matrix.get_customer_origins().get_metadata()
+        supply_locations_metadata = interaction_matrix.get_supply_locations().get_metadata()
+        print("Multiplicative Competitive Interaction Model")
+        print("--------------------------------------------")
+        print("Supply locations   " + str(supply_locations_metadata["no_points"]))
+        print("Customer origins   " + str(customer_origins_metadata["no_points"]))
+        print("--------------------------------------------")
+        print("Partial utilities")
+        coefficients_rows = []
+        for key, value in coefs.items():
+            coefficient_name = value["Coefficient"]
+            if coefficient_name == "A_j":
+                coefficient_name = "Attraction"
+            if coefficient_name == "t_ij":
+                coefficient_name = "Transport costs"
+            coefficients_rows.append({
+                "": coefficient_name,
+                "Estimate": round(value["Estimate"], 3),
+                "SE": round(value["SE"], 3),
+                "t": round(value["t"], 3),
+                "p": round(value["p"], 3),
+                "CI lower": round(value["CI_lower"], 3),
+                "CI upper": round(value["CI_upper"], 3)
+            })
+        coefficients_df = pd.DataFrame(coefficients_rows)
+        print (coefficients_df)
+        print("--------------------------------------------")
+    def utility(
+        self,
+        transformation = "LCT"
+        ):
+        interaction_matrix = self.interaction_matrix
+        interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
+        if interaction_matrix_df["t_ij"].isna().all():
+            raise ValueError ("Transport cost variable is not defined")
+        if interaction_matrix_df["A_j"].isna().all():
+            raise ValueError ("Attraction variable is not defined")
+        check_vars(
+            df = interaction_matrix_df,
+            cols = ["A_j", "t_ij"]
+            )
+        customer_origins_metadata = interaction_matrix.get_customer_origins().get_metadata()
+        t_ij_weighting = customer_origins_metadata["weighting"][0]["param"]
+        if transformation == "ILCT":
+            mci_formula = f"{t_ij_weighting}*t_ij"
+        else:
+            mci_formula = f"t_ij**{t_ij_weighting}"
+        supply_locations_metadata = interaction_matrix.get_supply_locations().get_metadata()
+        attraction_col = supply_locations_metadata["attraction_col"]
+        attraction_weighting = supply_locations_metadata["weighting"]
+        if transformation == "ILCT":
+            for key, value in attraction_weighting.items():
+                mci_formula = mci_formula + f" + {value['param']}*{attraction_col[key]}"
+        else:
+            for key, value in attraction_weighting.items():
+                mci_formula = mci_formula + f" * {attraction_col[key]}**{value['param']}"
+        interaction_matrix_df["U_ij"] = interaction_matrix_df.apply(lambda row: eval(mci_formula, {}, row.to_dict()), axis=1)
+        if transformation == "ILCT":
+            interaction_matrix_df["U_ij"] = np.exp(interaction_matrix_df["U_ij"])
+        self.interaction_matrix = interaction_matrix_df
+        return self
+    def probabilities (self):
+        interaction_matrix_df = self.interaction_matrix_df
+        if interaction_matrix_df["U_ij"].isna().all():
+            self.utility()
+            interaction_matrix_df = self.interaction_matrix_df
+        utility_i = pd.DataFrame(interaction_matrix_df.groupby("i")["U_ij"].sum())
+        utility_i = utility_i.rename(columns = {"U_ij": "U_i"})
+        interaction_matrix_df = interaction_matrix_df.merge(
+            utility_i,
+            left_on="i",
+            right_on="i",
+            how="inner"
+            )
+        interaction_matrix_df["p_ij"] = (interaction_matrix_df["U_ij"]) / (interaction_matrix_df["U_i"])
+        interaction_matrix_df = interaction_matrix_df.drop(columns=["U_i"])
+        self.interaction_matrix_df = interaction_matrix_df
+        return self
+    def flows (self):
+        interaction_matrix_df = self.interaction_matrix_df
+        if interaction_matrix_df["C_i"].isna().all():
+            raise ValueError ("Market size column in customer origins not defined. Use CustomerOrigins.define_marketsize()")
+        check_vars(
+            df = interaction_matrix_df,
+            cols = ["C_i"]
+            )
+        if interaction_matrix_df["p_ij"].isna().all():
+            self.probabilities()
+            interaction_matrix_df = self.interaction_matrix_df
+        interaction_matrix_df["E_ij"] = interaction_matrix_df["p_ij"] * interaction_matrix_df["C_i"]
+        self.interaction_matrix_df = interaction_matrix_df
+        return self
+    def marketareas (self):
+        interaction_matrix = self.interaction_matrix
+        interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
+        check_vars(
+            df = interaction_matrix_df,
+            cols = ["E_ij"]
+            )
+        market_areas_df = pd.DataFrame(interaction_matrix_df.groupby("j")["E_ij"].sum())
+        market_areas_df = market_areas_df.reset_index(drop=False)
+        market_areas_df = market_areas_df.rename(columns={"E_ij": "T_j"})
+        mci_model = MCIModel(
+            interaction_matrix = interaction_matrix,
+            coefs = self.get_coefs_dict(),
+            mci_ols_model = self.get_mci_ols_model(),
+            market_areas_df = market_areas_df
+            )
+        return mci_model
 def load_geodata (
-    file,
+    data,
     location_type: str,
     unique_id: str,
     x_col: str = None,
@@ -687,33 +1145,47 @@ def load_geodata (
     ):
     if location_type is None or (location_type != "origins" and location_type != "destinations"):
-        raise ValueError ("location_type must be either 'origins' or 'destinations'")
-    if data_type not in ["shp", "csv", "xlsx"]:
-        raise ValueError ("data_type must be 'shp', 'csv' or 'xlsx'")
+        raise ValueError ("Argument location_type must be either 'origins' or 'destinations'")
-    if data_type == "shp":
-        geodata_gpd_original = gp.read_file(file)
+    if isinstance(data, gp.GeoDataFrame):
+        geodata_gpd_original = data
+        if not all(geodata_gpd_original.geometry.geom_type == "Point"):
+            raise ValueError ("Input geopandas.GeoDataFrame must be of type 'Point'")
         crs_input = geodata_gpd_original.crs
+    elif isinstance(data, pd.DataFrame):
+        geodata_tab = data
+    elif isinstance(data, str):
+        if data_type == "shp":
+            geodata_gpd_original = gp.read_file(data)
+            if not all(geodata_gpd_original.geometry.geom_type == "Point"):
+                raise ValueError ("Input shapefile must be of type 'Point'")
+            crs_input = geodata_gpd_original.crs
+        elif data_type == "csv" or data_type == "xlsx":
+            if x_col is None:
+                raise ValueError ("Missing value for X coordinate column")
+            if y_col is None:
+                raise ValueError ("Missing value for Y coordinate column")
+        elif data_type == "csv":
+            geodata_tab = pd.read_csv(
+                data,
+                sep = csv_sep,
+                decimal = csv_decimal,
+                encoding = csv_encoding
+                )
+        elif data_type == "xlsx":
+            geodata_tab = pd.read_excel(data)
+        else:
+            raise TypeError("Unknown type of data")
+    else:
+        raise TypeError("data must be pandas.DataFrame, geopandas.GeoDataFrame or file (.csv, .xlsx, .shp)")
-    if data_type == "csv" or data_type == "xlsx":
-        if x_col is None:
-            raise ValueError ("Missing value for X coordinate column")
-        if y_col is None:
-            raise ValueError ("Missing value for Y coordinate column")
-    if data_type == "csv":
-        geodata_tab = pd.read_csv(
-            file,
-            sep = csv_sep,
-            decimal = csv_decimal,
-            encoding = csv_encoding
+    if data_type == "csv" or data_type == "xlsx" or (isinstance(data, pd.DataFrame) and not isinstance(data, gp.GeoDataFrame)):
+        check_vars(
+            df = geodata_tab,
+            cols = [x_col, y_col]
             )
-    if data_type == "xlsx":
-        geodata_tab = pd.read_excel(file)
-    if data_type == "csv" or data_type == "xlsx":
         geodata_gpd_original = gp.GeoDataFrame(
             geodata_tab,
             geometry = gp.points_from_xy(
@@ -722,8 +1194,9 @@ def load_geodata (
                 ),
             crs = crs_input
             )
     crs_output = "EPSG:4326"
     geodata_gpd = geodata_gpd_original.to_crs(crs_output)
     geodata_gpd = geodata_gpd[[unique_id, "geometry"]]
@@ -747,13 +1220,17 @@ def load_geodata (
         geodata_object = CustomerOrigins(
             geodata_gpd,
             geodata_gpd_original,
-            metadata
+            metadata,
+            None,
+            None
             )
     elif location_type == "destinations":
         geodata_object = SupplyLocations(
             geodata_gpd,
             geodata_gpd_original,
-            metadata
+            metadata,
+            None,
+            None
             )
     return geodata_object
@@ -830,33 +1307,241 @@ def create_interaction_matrix(
     return interaction_matrix
-def check_vars(
-    df: pd.DataFrame,
-    cols: list
-    ):
+def load_interaction_matrix(
+    data,
+    customer_origins_col: str,
+    supply_locations_col: str,
+    attraction_col: list,
+    transport_costs_col: str,
+    probabilities_col: str = None,
+    market_size_col: str = None,
+    customer_origins_coords_col = None,
+    supply_locations_coords_col = None,
+    data_type = "csv",
+    csv_sep = ";",
+    csv_decimal = ",",
+    csv_encoding="unicode_escape",
+    crs_input = "EPSG:4326",
+    crs_output = "EPSG:4326"
+    ):
+    if isinstance(data, pd.DataFrame):
+        interaction_matrix_df = data
+    elif isinstance(data, str):
+        if data_type not in ["csv", "xlsx"]:
+            raise ValueError ("data_type must be 'csv' or 'xlsx'")
+        if data_type == "csv":
+            interaction_matrix_df = pd.read_csv(
+                data,
+                sep = csv_sep,
+                decimal = csv_decimal,
+                encoding = csv_encoding
+                )
+        elif data_type == "xlsx":
+            interaction_matrix_df = pd.read_excel(data)
+        else:
+            raise TypeError("Unknown type of data")
+    else:
+        raise TypeError("data must be pandas.DataFrame or file (.csv, .xlsx)")
+    if customer_origins_col not in interaction_matrix_df.columns:
+        raise KeyError ("Column " + customer_origins_col + " not in data")
+    if supply_locations_col not in interaction_matrix_df.columns:
+        raise KeyError ("Column " + supply_locations_col + " not in data")
+    cols_check = attraction_col + [transport_costs_col]
+    if probabilities_col is not None:
+        cols_check = cols_check + [probabilities_col]
+    if market_size_col is not None:
+        cols_check = cols_check + [market_size_col]
-    for col in cols:
-        if col not in df.columns:
-            raise KeyError(f"Column '{col}' not in dataframe.")
+    check_vars(
+        interaction_matrix_df,
+        cols = cols_check
+        )
+    if customer_origins_coords_col is not None:
+        if isinstance(customer_origins_coords_col, str):
+            if customer_origins_coords_col not in interaction_matrix_df.columns:
+                raise KeyError ("Column " + customer_origins_coords_col + " not in data.")
+            customer_origins_geodata_tab = interaction_matrix_df[[customer_origins_col, customer_origins_coords_col]]
+            customer_origins_geodata_tab = customer_origins_geodata_tab.drop_duplicates()
+            customer_origins_geodata_tab["geometry"] = customer_origins_geodata_tab[customer_origins_coords_col].apply(lambda x: wkt.loads(x))
+            customer_origins_geodata_gpd = gp.GeoDataFrame(
+                customer_origins_geodata_tab,
+                geometry="geometry",
+                crs = crs_input)
+            customer_origins_geodata_gpd = customer_origins_geodata_gpd.drop(
+                columns = customer_origins_coords_col
+                )
+        elif isinstance(customer_origins_coords_col, list):
+            if len(customer_origins_coords_col) != 2:
+                raise ValueError ("Column " + customer_origins_coords_col + " must be a geometry column OR TWO columns with X and Y")
+            check_vars (
+                df = interaction_matrix_df,
+                cols = customer_origins_coords_col
+                )
+            customer_origins_geodata_tab = interaction_matrix_df[[customer_origins_col, customer_origins_coords_col[0], customer_origins_coords_col[1]]]
+            customer_origins_geodata_tab = customer_origins_geodata_tab.drop_duplicates()
+            customer_origins_geodata_tab["geometry"] = customer_origins_geodata_tab.apply(lambda row: Point(row[customer_origins_coords_col[0]], row[customer_origins_coords_col[1]]), axis=1)
+            customer_origins_geodata_gpd = gp.GeoDataFrame(customer_origins_geodata_tab, geometry="geometry")
+        customer_origins_geodata_gpd.set_crs(crs_output, inplace=True)
+    else:
+        customer_origins_geodata_gpd = interaction_matrix_df[customer_origins_col]
+        customer_origins_geodata_gpd = customer_origins_geodata_gpd.drop_duplicates()
+    if market_size_col is not None:
+        customer_origins_cols = [customer_origins_col] + [market_size_col]
+    else:
+        customer_origins_cols = [customer_origins_col]
+    customer_origins_geodata_original_tab = customer_origins_geodata_tab = interaction_matrix_df[customer_origins_cols]
+    customer_origins_metadata = {
+        "location_type": "origins",
+        "unique_id": customer_origins_col,
+        "attraction_col": [None],
+        "marketsize_col": market_size_col,
+        "weighting": {
+            0: {
+                "func": None,
+                "param": None
+                }
+            },
+        "crs_input": crs_input,
+        "crs_output": crs_output,
+        "no_points": len(customer_origins_geodata_gpd)
+        }
+    customer_origins = CustomerOrigins(
+        geodata_gpd = customer_origins_geodata_gpd,
+        geodata_gpd_original = customer_origins_geodata_original_tab,
+        metadata = customer_origins_metadata,
+        isochrones_gdf = None,
+        buffers_gdf = None
+        )
+    if supply_locations_coords_col is not None:
+        if isinstance(supply_locations_coords_col, str):
+            if supply_locations_coords_col not in interaction_matrix_df.columns:
+                raise KeyError ("Column " + supply_locations_coords_col + " not in data.")
+            supply_locations_geodata_tab = interaction_matrix_df[[supply_locations_col, supply_locations_coords_col]]
+            supply_locations_geodata_tab = supply_locations_geodata_tab.drop_duplicates()
+            supply_locations_geodata_tab["geometry"] = supply_locations_geodata_tab[supply_locations_coords_col].apply(lambda x: wkt.loads(x))
+            supply_locations_geodata_gpd = gp.GeoDataFrame(
+                supply_locations_geodata_tab,
+                geometry="geometry",
+                crs = crs_input)
+            supply_locations_geodata_gpd = supply_locations_geodata_gpd.drop(
+                columns = supply_locations_coords_col
+                )
+        if isinstance(supply_locations_coords_col, list):
+            if len(supply_locations_coords_col) != 2:
+                raise ValueError ("Column " + supply_locations_coords_col + " must be a geometry column OR TWO columns with X and Y")
+            check_vars (
+                df = interaction_matrix_df,
+                cols = supply_locations_coords_col
+                )
+            supply_locations_geodata_tab = interaction_matrix_df[[supply_locations_col, supply_locations_coords_col[0], supply_locations_coords_col[1]]]
+            supply_locations_geodata_tab = supply_locations_geodata_tab.drop_duplicates()
+            supply_locations_geodata_tab["geometry"] = supply_locations_geodata_tab.apply(lambda row: Point(row[supply_locations_coords_col[0]], row[supply_locations_coords_col[1]]), axis=1)
+            supply_locations_geodata_gpd = gp.GeoDataFrame(supply_locations_geodata_tab, geometry="geometry")
+        supply_locations_geodata_gpd.set_crs(crs_output, inplace=True)
+    else:
+        supply_locations_geodata_gpd = interaction_matrix_df[supply_locations_col]
+        supply_locations_geodata_gpd = supply_locations_geodata_gpd.drop_duplicates()
+    supply_locations_cols = [supply_locations_col] + attraction_col
+    supply_locations_geodata_original_tab = supply_locations_geodata_tab = interaction_matrix_df[supply_locations_cols]
+    supply_locations_metadata = {
+        "location_type": "destinations",
+        "unique_id": supply_locations_col,
+        "attraction_col": attraction_col,
+        "marketsize_col": None,
+        "weighting": {
+            0: {
+                "func": None,
+                "param": None
+                }
+            },
+        "crs_input": crs_input,
+        "crs_output": crs_output,
+        "no_points": len(supply_locations_geodata_gpd)
+        }
+    supply_locations = SupplyLocations(
+        geodata_gpd = supply_locations_geodata_gpd,
+        geodata_gpd_original = supply_locations_geodata_original_tab,
+        metadata = supply_locations_metadata,
+        isochrones_gdf = None,
+        buffers_gdf = None
+        )
-    for col in cols:
-        if not pd.api.types.is_numeric_dtype(df[col]):
-            raise ValueError(f"Column '{col}' is not numeric. All columns must be numeric.")
+    interaction_matrix_df = interaction_matrix_df.rename(
+        columns = {
+            customer_origins_col: "i",
+            supply_locations_col: "j",
+            attraction_col[0]: "A_j",
+            transport_costs_col: "t_ij"
+        }
+        )
+    if probabilities_col is not None:
+        interaction_matrix_df = interaction_matrix_df.rename(
+            columns = {
+                probabilities_col: "p_ij"
+            }
+            )
+    if market_size_col is not None:
+        interaction_matrix_df = interaction_matrix_df.rename(
+            columns = {
+                market_size_col: "C_i"
+            }
+            )
+    interaction_matrix = InteractionMatrix(
+        interaction_matrix_df=interaction_matrix_df,
+        customer_origins=customer_origins,
+        supply_locations=supply_locations
+        )
-    for col in cols:
-        if (df[col] <= 0).any():
-            raise ValueError(f"Column '{col}' includes values <= 0. All values must be numeric and positive.")
+    return interaction_matrix
-def mci_transformation(
+def log_centering_transformation(
     df: pd.DataFrame,
     ref_col: str,
-    cols: list
+    cols: list,
+    suffix: str = "__LCT"
     ):
     check_vars(
         df = df,
-        cols = cols + [ref_col]
+        cols = cols
         )
+    if ref_col not in df.columns:
+        raise KeyError(f"Column '{ref_col}' not in dataframe.")
     def lct (x):
@@ -866,9 +1551,115 @@ def mci_transformation(
         return x_lct
     for var in cols:
+        unique_values = df[var].unique()
+        if set(unique_values).issubset({0, 1}):
+            df[var+suffix] = df[var]
+            print ("Column " + str(var) + " is a dummy variable and requires/allows no log-centering transformation")
+            continue
         var_t = df.groupby(ref_col)[var].apply(lct)
         var_t = var_t.reset_index()
-        df[var+"_t"] = var_t[var]
+        df[var+suffix] = var_t[var]
+    return df
+def get_isochrones(
+    geodata_gpd: gp.GeoDataFrame,
+    unique_id_col: str,
+    segments_minutes: list = [5, 10, 15],
+    range_type: str = "time",
+    intersections: str = "true",
+    profile: str = "driving-car",
+    donut: bool = True,
+    ors_server: str = "https://api.openrouteservice.org/v2/",
+    ors_auth: str = None,
+    timeout = 10,
+    delay = 1,
+    save_output: bool = True,
+    output_filepath: str = "isochrones.shp",
+    output_crs: str = "EPSG:4326"
+    ):
+    coords = [(point.x, point.y) for point in geodata_gpd.geometry]
+    unique_id_values = geodata_gpd[unique_id_col].values
+    ors_client = Client(
+        server = ors_server,
+        auth = ors_auth
+        )
+    isochrones_gdf = gp.GeoDataFrame(columns=[unique_id_col, "geometry"])
+    segments = [segment*60 for segment in segments_minutes]
+    i = 0
+    for x, y in coords:
+        isochrone_output = ors_client.isochrone(
+            locations = [[x, y]],
+            segments = segments,
+            range_type = range_type,
+            intersections = intersections,
+            profile = profile,
+            timeout = timeout,
+            save_output = False,
+            output_crs = output_crs
+            )
+        if isochrone_output.status_code != 200:
+            continue
+        isochrone_gdf = isochrone_output.get_isochrones_gdf()
+        if donut:
+            isochrone_gdf = overlay_difference(
+                polygon_gdf = isochrone_gdf,
+                sort_col = "segment"
+                )
+        time.sleep(delay)
+        isochrone_gdf[unique_id_col] = unique_id_values[i]
+        isochrones_gdf = pd.concat(
+            [
+                isochrones_gdf,
+                isochrone_gdf
+                ],
+            ignore_index=True
+            )
+        i = i+1
+    isochrones_gdf.set_crs(
+        output_crs,
+        allow_override=True,
+        inplace=True
+        )
+    if save_output:
+        isochrones_gdf.to_file(filename = output_filepath)
+    return isochrones_gdf
+def check_vars(
+    df: pd.DataFrame,
+    cols: list
+    ):
-    return df
+    for col in cols:
+        if col not in df.columns:
+            raise KeyError(f"Column '{col}' not in dataframe.")
+    for col in cols:
+        if not pd.api.types.is_numeric_dtype(df[col]):
+            raise ValueError(f"Column '{col}' is not numeric. All stated columns must be numeric.")
+    for col in cols:
+        if (df[col] <= 0).any():
+            raise ValueError(f"Column '{col}' includes values <= 0. All values must be numeric and positive.")

huff 1.1.2__py3-none-any.whl → 1.3.0__py3-none-any.whl

huff 1.1.2py3-none-any.whl → 1.3.0py3-none-any.whl