huff 1.1.2__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
huff/models.py CHANGED
@@ -4,8 +4,8 @@
4
4
  # Author: Thomas Wieland
5
5
  # ORCID: 0000-0001-5168-9846
6
6
  # mail: geowieland@googlemail.com
7
- # Version: 1.1.2
8
- # Last update: 2025-05-03 13:29
7
+ # Version: 1.2.0
8
+ # Last update: 2025-05-14 18:33
9
9
  # Copyright (c) 2025 Thomas Wieland
10
10
  #-----------------------------------------------------------------------
11
11
 
@@ -14,6 +14,9 @@ import pandas as pd
14
14
  import geopandas as gp
15
15
  import numpy as np
16
16
  import time
17
+ from statsmodels.formula.api import ols
18
+ from shapely.geometry import Point
19
+ from shapely import wkt
17
20
  from huff.ors import Client, TimeDistanceMatrix, Isochrone
18
21
  from huff.gistools import overlay_difference, distance_matrix
19
22
 
@@ -24,12 +27,14 @@ class CustomerOrigins:
24
27
  self,
25
28
  geodata_gpd,
26
29
  geodata_gpd_original,
27
- metadata
30
+ metadata,
31
+ isochrones_gdf
28
32
  ):
29
33
 
30
34
  self.geodata_gpd = geodata_gpd
31
35
  self.geodata_gpd_original = geodata_gpd_original
32
36
  self.metadata = metadata
37
+ self.isochrones_gdf = isochrones_gdf
33
38
 
34
39
  def get_geodata_gpd(self):
35
40
 
@@ -42,6 +47,10 @@ class CustomerOrigins:
42
47
  def get_metadata(self):
43
48
 
44
49
  return self.metadata
50
+
51
+ def get_isochrones(self):
52
+
53
+ return self.isochrones_gdf
45
54
 
46
55
  def summary(self):
47
56
 
@@ -63,6 +72,11 @@ class CustomerOrigins:
63
72
  print("Unique ID column " + metadata["unique_id"])
64
73
  print("Input CRS " + str(metadata["crs_input"]))
65
74
 
75
+ if self.isochrones_gdf is None:
76
+ print("Including isochrones NO")
77
+ else:
78
+ print("Including isochrones YES")
79
+
66
80
  return metadata
67
81
 
68
82
  def define_marketsize(
@@ -97,27 +111,77 @@ class CustomerOrigins:
97
111
 
98
112
  return self
99
113
 
114
+ def isochrones(
115
+ self,
116
+ segments_minutes: list = [5, 10, 15],
117
+ range_type: str = "time",
118
+ intersections: str = "true",
119
+ profile: str = "driving-car",
120
+ donut: bool = True,
121
+ ors_server: str = "https://api.openrouteservice.org/v2/",
122
+ ors_auth: str = None,
123
+ timeout: int = 10,
124
+ delay: int = 1,
125
+ save_output: bool = True,
126
+ output_filepath: str = "customer_origins_isochrones.shp",
127
+ output_crs: str = "EPSG:4326"
128
+ ):
129
+
130
+ geodata_gpd = self.get_geodata_gpd()
131
+ metadata = self.get_metadata()
132
+
133
+ isochrones_gdf = get_isochrones(
134
+ geodata_gpd = geodata_gpd,
135
+ unique_id_col = metadata["unique_id"],
136
+ segments_minutes = segments_minutes,
137
+ range_type = range_type,
138
+ intersections = intersections,
139
+ profile = profile,
140
+ donut = donut,
141
+ ors_server = ors_server,
142
+ ors_auth = ors_auth,
143
+ timeout = timeout,
144
+ delay = delay,
145
+ save_output = save_output,
146
+ output_filepath = output_filepath,
147
+ output_crs = output_crs
148
+ )
149
+
150
+ self.isochrones_gdf = isochrones_gdf
151
+
152
+ return self
153
+
154
+
100
155
  class SupplyLocations:
101
156
 
102
157
  def __init__(
103
158
  self,
104
159
  geodata_gpd,
105
160
  geodata_gpd_original,
106
- metadata
161
+ metadata,
162
+ isochrones_gdf
107
163
  ):
108
164
 
109
165
  self.geodata_gpd = geodata_gpd
110
166
  self.geodata_gpd_original = geodata_gpd_original
111
167
  self.metadata = metadata
168
+ self.isochrones_gdf = isochrones_gdf
112
169
 
113
170
  def get_geodata_gpd(self):
171
+
114
172
  return self.geodata_gpd
115
173
 
116
174
  def get_geodata_gpd_original(self):
175
+
117
176
  return self.geodata_gpd_original
118
177
 
119
178
  def get_metadata(self):
179
+
120
180
  return self.metadata
181
+
182
+ def get_isochrones_gdf(self):
183
+
184
+ return self.isochrones_gdf
121
185
 
122
186
  def summary(self):
123
187
 
@@ -139,6 +203,11 @@ class SupplyLocations:
139
203
  print("Unique ID column " + metadata["unique_id"])
140
204
  print("Input CRS " + str(metadata["crs_input"]))
141
205
 
206
+ if self.isochrones_gdf is None:
207
+ print("Including isochrones NO")
208
+ else:
209
+ print("Including isochrones YES")
210
+
142
211
  return metadata
143
212
 
144
213
  def define_attraction(
@@ -239,86 +308,44 @@ class SupplyLocations:
239
308
 
240
309
  def isochrones(
241
310
  self,
242
- segments: list = [900, 600, 300],
311
+ segments_minutes: list = [5, 10, 15],
243
312
  range_type: str = "time",
244
313
  intersections: str = "true",
245
314
  profile: str = "driving-car",
246
315
  donut: bool = True,
247
316
  ors_server: str = "https://api.openrouteservice.org/v2/",
248
317
  ors_auth: str = None,
249
- timeout = 10,
250
- delay = 1,
318
+ timeout: int = 10,
319
+ delay: int = 1,
251
320
  save_output: bool = True,
252
- output_filepath: str = "isochrones.shp",
321
+ output_filepath: str = "supply_locations_isochrones.shp",
253
322
  output_crs: str = "EPSG:4326"
254
323
  ):
255
324
 
256
325
  geodata_gpd = self.get_geodata_gpd()
257
326
  metadata = self.get_metadata()
258
327
 
259
- coords = [(point.x, point.y) for point in geodata_gpd.geometry]
260
-
261
- unique_id_col = metadata["unique_id"]
262
- unique_id_values = geodata_gpd[unique_id_col].values
263
-
264
- ors_client = Client(
265
- server = ors_server,
266
- auth = ors_auth
328
+ isochrones_gdf = get_isochrones(
329
+ geodata_gpd = geodata_gpd,
330
+ unique_id_col = metadata["unique_id"],
331
+ segments_minutes = segments_minutes,
332
+ range_type = range_type,
333
+ intersections = intersections,
334
+ profile = profile,
335
+ donut = donut,
336
+ ors_server = ors_server,
337
+ ors_auth = ors_auth,
338
+ timeout = timeout,
339
+ delay = delay,
340
+ save_output = save_output,
341
+ output_filepath = output_filepath,
342
+ output_crs = output_crs
267
343
  )
268
-
269
- isochrones_gdf = gp.GeoDataFrame(columns=[unique_id_col, "geometry"])
270
-
271
- i = 0
272
-
273
- for x, y in coords:
274
-
275
- isochrone_output = ors_client.isochrone(
276
- locations = [[x, y]],
277
- segments = segments,
278
- range_type = range_type,
279
- intersections = intersections,
280
- profile = profile,
281
- timeout = timeout,
282
- save_output = False,
283
- output_crs = output_crs
284
- )
285
-
286
- if isochrone_output.status_code != 200:
287
- continue
288
-
289
- isochrone_gdf = isochrone_output.get_isochrones_gdf()
290
-
291
- if donut:
292
- isochrone_gdf = overlay_difference(
293
- polygon_gdf = isochrone_gdf,
294
- sort_col = "segment"
295
- )
296
-
297
- time.sleep(delay)
298
-
299
- isochrone_gdf[unique_id_col] = unique_id_values[i]
300
-
301
- isochrones_gdf = pd.concat(
302
- [
303
- isochrones_gdf,
304
- isochrone_gdf
305
- ],
306
- ignore_index=True
307
- )
308
-
309
- i = i+1
310
344
 
311
- isochrones_gdf.set_crs(
312
- output_crs,
313
- allow_override=True,
314
- inplace=True
315
- )
316
-
317
- if save_output:
345
+ self.isochrones_gdf = isochrones_gdf
318
346
 
319
- isochrones_gdf.to_file(filename = output_filepath)
347
+ return self
320
348
 
321
- return isochrones_gdf
322
349
 
323
350
  class InteractionMatrix:
324
351
 
@@ -360,15 +387,16 @@ class InteractionMatrix:
360
387
  else:
361
388
  print("Market size column " + customer_origins_metadata["marketsize_col"])
362
389
  print("----------------------------------")
363
- print("Weights")
390
+ print("Partial utilities")
391
+ print(" Weights")
364
392
  if supply_locations_metadata["weighting"][0]["func"] is None and supply_locations_metadata["weighting"][0]["param"] is None:
365
- print("Gamma not defined")
393
+ print("Attraction not defined")
366
394
  else:
367
- print("Gamma " + str(supply_locations_metadata["weighting"][0]["param"]) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
395
+ print("Attraction " + str(supply_locations_metadata["weighting"][0]["param"]) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
368
396
  if customer_origins_metadata["weighting"][0]["func"] is None and customer_origins_metadata["weighting"][0]["param"] is None:
369
- print("Lambda not defined")
397
+ print("Transport costs not defined")
370
398
  else:
371
- print("Lambda " + str(customer_origins_metadata["weighting"][0]["param"]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
399
+ print("Transport costs " + str(customer_origins_metadata["weighting"][0]["param"]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
372
400
  print("----------------------------------")
373
401
 
374
402
  def transport_costs(
@@ -506,11 +534,11 @@ class InteractionMatrix:
506
534
  if attraction_weighting["func"] == "power":
507
535
  interaction_matrix_df["A_j_weighted"] = interaction_matrix_df["A_j"] ** attraction_weighting["param"]
508
536
  elif tc_weighting["func"] == "exponential":
509
- interaction_matrix_df["A_j_weighted"] = np.exp(attraction_weighting["param"] * interaction_matrix_df['A_j'])
537
+ interaction_matrix_df["A_j_weighted"] = np.exp(attraction_weighting["param"] * interaction_matrix_df["A_j"])
510
538
  else:
511
539
  raise ValueError ("Attraction weighting is not defined.")
512
540
 
513
- interaction_matrix_df["U_ij"] = interaction_matrix_df["A_j_weighted"]/interaction_matrix_df["t_ij_weighted"]
541
+ interaction_matrix_df["U_ij"] = interaction_matrix_df["A_j_weighted"]*interaction_matrix_df["t_ij_weighted"]
514
542
 
515
543
  interaction_matrix_df = interaction_matrix_df.drop(columns=['A_j_weighted', 't_ij_weighted'])
516
544
 
@@ -591,13 +619,11 @@ class InteractionMatrix:
591
619
  cols: list = ["A_j", "t_ij"]
592
620
  ):
593
621
 
594
- """ MCI model log-centering transformation """
595
-
596
622
  cols = cols + ["p_ij"]
597
623
 
598
624
  interaction_matrix_df = self.interaction_matrix_df
599
625
 
600
- interaction_matrix_df = mci_transformation(
626
+ interaction_matrix_df = log_centering_transformation(
601
627
  df = interaction_matrix_df,
602
628
  ref_col = "i",
603
629
  cols = cols
@@ -607,6 +633,87 @@ class InteractionMatrix:
607
633
 
608
634
  return self
609
635
 
636
+ def mci_fit(
637
+ self,
638
+ cols: list = ["A_j", "t_ij"],
639
+ alpha = 0.05
640
+ ):
641
+
642
+ supply_locations = self.get_supply_locations()
643
+ supply_locations_metadata = supply_locations.get_metadata()
644
+
645
+ customer_origins = self.get_customer_origins()
646
+ customer_origins_metadata = customer_origins.get_metadata()
647
+
648
+ interaction_matrix_df = self.get_interaction_matrix_df()
649
+
650
+ cols_t = [col + "__LCT" for col in cols]
651
+
652
+ if "p_ij__LCT" not in interaction_matrix_df.columns:
653
+ interaction_matrix = self.mci_transformation(
654
+ cols = cols
655
+ )
656
+ interaction_matrix_df = self.get_interaction_matrix_df()
657
+
658
+ mci_formula = f'p_ij__LCT ~ {" + ".join(cols_t)} -1'
659
+
660
+ mci_ols_model = ols(mci_formula, data = interaction_matrix_df).fit()
661
+
662
+ mci_ols_coefficients = mci_ols_model.params
663
+ mci_ols_coef_standarderrors = mci_ols_model.bse
664
+ mci_ols_coef_t = mci_ols_model.tvalues
665
+ mci_ols_coef_p = mci_ols_model.pvalues
666
+ mci_ols_coef_ci = mci_ols_model.conf_int(alpha = alpha)
667
+
668
+ coefs = {}
669
+ for i, col in enumerate(cols_t):
670
+ coefs[i] = {
671
+ "Coefficient": col[:-5],
672
+ "Estimate": float(mci_ols_coefficients[col]),
673
+ "SE": float(mci_ols_coef_standarderrors[col]),
674
+ "t": float(mci_ols_coef_t[col]),
675
+ "p": float(mci_ols_coef_p[col]),
676
+ "CI_lower": float(mci_ols_coef_ci.loc[col, 0]),
677
+ "CI_upper": float(mci_ols_coef_ci.loc[col, 1]),
678
+ }
679
+
680
+ customer_origins_metadata["weighting"][0] = {
681
+ "func": "power",
682
+ "param": mci_ols_coefficients["t_ij__LCT"]
683
+ }
684
+
685
+ coefs2 = coefs.copy()
686
+ for key, value in list(coefs2.items()):
687
+ if value["Coefficient"] == "t_ij":
688
+ del coefs2[key]
689
+
690
+ for key, value in coefs2.items():
691
+ supply_locations_metadata["weighting"][key] = {
692
+ "func": "power",
693
+ "param": value["Estimate"]
694
+ }
695
+
696
+ supply_locations_metadata["attraction_col"].append(None)
697
+ supply_locations_metadata["attraction_col"][key] = value["Coefficient"]
698
+
699
+ customer_origins.metadata = customer_origins_metadata
700
+ supply_locations.metadata = supply_locations_metadata
701
+ interaction_matrix = InteractionMatrix(
702
+ interaction_matrix_df,
703
+ customer_origins,
704
+ supply_locations
705
+ )
706
+
707
+ mci_model = MCIModel(
708
+ interaction_matrix,
709
+ coefs,
710
+ mci_ols_model,
711
+ None
712
+ )
713
+
714
+ return mci_model
715
+
716
+
610
717
  class HuffModel:
611
718
 
612
719
  def __init__(
@@ -662,19 +769,305 @@ class HuffModel:
662
769
  else:
663
770
  print("Market size column " + customer_origins_metadata["marketsize_col"])
664
771
  print("----------------------------------")
665
- print("Weights")
772
+ print("Partial utilities")
773
+ print(" Weights")
666
774
  if supply_locations_metadata["weighting"][0]["func"] is None and supply_locations_metadata["weighting"][0]["param"] is None:
667
- print("Gamma not defined")
775
+ print("Attraction not defined")
668
776
  else:
669
- print("Gamma " + str(supply_locations_metadata["weighting"][0]["param"]) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
777
+ print("Attraction " + str(supply_locations_metadata["weighting"][0]["param"]) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
670
778
  if customer_origins_metadata["weighting"][0]["func"] is None and customer_origins_metadata["weighting"][0]["param"] is None:
671
- print("Lambda not defined")
779
+ print("Transport costs not defined")
672
780
  else:
673
- print("Lambda " + str(customer_origins_metadata["weighting"][0]["param"]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
781
+ print("Transport costs " + str(customer_origins_metadata["weighting"][0]["param"]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
674
782
  print("----------------------------------")
675
-
783
+
784
+ def mci_fit(
785
+ self,
786
+ cols: list = ["A_j", "t_ij"],
787
+ alpha = 0.05
788
+ ):
789
+
790
+ interaction_matrix = self.interaction_matrix
791
+
792
+ supply_locations = interaction_matrix.get_supply_locations()
793
+ supply_locations_metadata = supply_locations.get_metadata()
794
+
795
+ customer_origins = interaction_matrix.get_customer_origins()
796
+ customer_origins_metadata = customer_origins.get_metadata()
797
+
798
+ interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
799
+
800
+ cols_t = [col + "__LCT" for col in cols]
801
+
802
+ if "p_ij__LCT" not in interaction_matrix_df.columns:
803
+ interaction_matrix = interaction_matrix.mci_transformation(
804
+ cols = cols
805
+ )
806
+ interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
807
+
808
+ mci_formula = f'p_ij__LCT ~ {" + ".join(cols_t)} -1'
809
+
810
+ mci_ols_model = ols(mci_formula, data = interaction_matrix_df).fit()
811
+
812
+ mci_ols_coefficients = mci_ols_model.params
813
+ mci_ols_coef_standarderrors = mci_ols_model.bse
814
+ mci_ols_coef_t = mci_ols_model.tvalues
815
+ mci_ols_coef_p = mci_ols_model.pvalues
816
+ mci_ols_coef_ci = mci_ols_model.conf_int(alpha = alpha)
817
+
818
+ coefs = {}
819
+ for i, col in enumerate(cols_t):
820
+ coefs[i] = {
821
+ "Coefficient": col[:-5],
822
+ "Estimate": float(mci_ols_coefficients[col]),
823
+ "SE": float(mci_ols_coef_standarderrors[col]),
824
+ "t": float(mci_ols_coef_t[col]),
825
+ "p": float(mci_ols_coef_p[col]),
826
+ "CI_lower": float(mci_ols_coef_ci.loc[col, 0]),
827
+ "CI_upper": float(mci_ols_coef_ci.loc[col, 1]),
828
+ }
829
+
830
+ customer_origins_metadata["weighting"][0] = {
831
+ "func": "power",
832
+ "param": mci_ols_coefficients["t_ij__LCT"]
833
+ }
834
+
835
+ coefs2 = coefs.copy()
836
+ for key, value in list(coefs2.items()):
837
+ if value["Coefficient"] == "t_ij":
838
+ del coefs2[key]
839
+
840
+ for key, value in coefs2.items():
841
+ supply_locations_metadata["weighting"][(key)] = {
842
+ "func": "power",
843
+ "param": value["Estimate"]
844
+ }
845
+ supply_locations_metadata["attraction_col"][key] = value["Coefficient"]
846
+
847
+ customer_origins.metadata = customer_origins_metadata
848
+ supply_locations.metadata = supply_locations_metadata
849
+ interaction_matrix = InteractionMatrix(
850
+ interaction_matrix_df,
851
+ customer_origins,
852
+ supply_locations
853
+ )
854
+
855
+ mci_model = MCIModel(
856
+ interaction_matrix,
857
+ coefs,
858
+ mci_ols_model,
859
+ None
860
+ )
861
+
862
+ return mci_model
863
+
864
+
865
+ class MCIModel:
866
+
867
+ def __init__(
868
+ self,
869
+ interaction_matrix: InteractionMatrix,
870
+ coefs: dict,
871
+ mci_ols_model,
872
+ market_areas_df
873
+ ):
874
+
875
+ self.interaction_matrix = interaction_matrix
876
+ self.coefs = coefs
877
+ self.mci_ols_model = mci_ols_model
878
+ self.market_areas_df = market_areas_df
879
+
880
+ def get_interaction_matrix_df(self):
881
+
882
+ interaction_matrix = self.interaction_matrix
883
+ interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
884
+
885
+ return interaction_matrix_df
886
+
887
+ def get_supply_locations(self):
888
+
889
+ interaction_matrix = self.interaction_matrix
890
+ supply_locations = interaction_matrix.get_supply_locations()
891
+
892
+ return supply_locations
893
+
894
+ def get_customer_origins(self):
895
+
896
+ interaction_matrix = self.interaction_matrix
897
+ customer_origins = interaction_matrix.get_customer_origins()
898
+
899
+ return customer_origins
900
+
901
+ def get_mci_ols_model(self):
902
+
903
+ return self.mci_ols_model
904
+
905
+ def get_coefs_dict(self):
906
+
907
+ return self.coefs
908
+
909
+ def get_market_areas_df(self):
910
+
911
+ return self.market_areas_df
912
+
913
+ def summary(self):
914
+
915
+ interaction_matrix = self.interaction_matrix
916
+ coefs = self.coefs
917
+
918
+ customer_origins_metadata = interaction_matrix.get_customer_origins().get_metadata()
919
+ supply_locations_metadata = interaction_matrix.get_supply_locations().get_metadata()
920
+
921
+ print("Multiplicative Competitive Interaction Model")
922
+ print("--------------------------------------------")
923
+ print("Supply locations " + str(supply_locations_metadata["no_points"]))
924
+ print("Customer origins " + str(customer_origins_metadata["no_points"]))
925
+ print("--------------------------------------------")
926
+ print("Partial utilities")
927
+
928
+ coefficients_rows = []
929
+ for key, value in coefs.items():
930
+ coefficient_name = value["Coefficient"]
931
+ if coefficient_name == "A_j":
932
+ coefficient_name = "Attraction"
933
+ if coefficient_name == "t_ij":
934
+ coefficient_name = "Transport costs"
935
+ coefficients_rows.append({
936
+ "": coefficient_name,
937
+ "Estimate": round(value["Estimate"], 3),
938
+ "SE": round(value["SE"], 3),
939
+ "t": round(value["t"], 3),
940
+ "p": round(value["p"], 3),
941
+ "CI lower": round(value["CI_lower"], 3),
942
+ "CI upper": round(value["CI_upper"], 3)
943
+ })
944
+ coefficients_df = pd.DataFrame(coefficients_rows)
945
+
946
+ print (coefficients_df)
947
+
948
+ print("--------------------------------------------")
949
+
950
+ def utility(
951
+ self,
952
+ transformation = "LCT"
953
+ ):
954
+
955
+ interaction_matrix = self.interaction_matrix
956
+
957
+ interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
958
+
959
+ if interaction_matrix_df["t_ij"].isna().all():
960
+ raise ValueError ("Transport cost variable is not defined")
961
+ if interaction_matrix_df["A_j"].isna().all():
962
+ raise ValueError ("Attraction variable is not defined")
963
+
964
+ check_vars(
965
+ df = interaction_matrix_df,
966
+ cols = ["A_j", "t_ij"]
967
+ )
968
+
969
+ customer_origins_metadata = interaction_matrix.get_customer_origins().get_metadata()
970
+
971
+ t_ij_weighting = customer_origins_metadata["weighting"][0]["param"]
972
+
973
+ if transformation == "ILCT":
974
+ mci_formula = f"{t_ij_weighting}*t_ij"
975
+ else:
976
+ mci_formula = f"t_ij**{t_ij_weighting}"
977
+
978
+ supply_locations_metadata = interaction_matrix.get_supply_locations().get_metadata()
979
+ attraction_col = supply_locations_metadata["attraction_col"]
980
+ attraction_weighting = supply_locations_metadata["weighting"]
981
+
982
+ if transformation == "ILCT":
983
+ for key, value in attraction_weighting.items():
984
+ mci_formula = mci_formula + f" + {value['param']}*{attraction_col[key]}"
985
+ else:
986
+ for key, value in attraction_weighting.items():
987
+ mci_formula = mci_formula + f" * {attraction_col[key]}**{value['param']}"
988
+
989
+ interaction_matrix_df["U_ij"] = interaction_matrix_df.apply(lambda row: eval(mci_formula, {}, row.to_dict()), axis=1)
990
+
991
+ if transformation == "ILCT":
992
+ interaction_matrix_df["U_ij"] = np.exp(interaction_matrix_df["U_ij"])
993
+
994
+ self.interaction_matrix = interaction_matrix_df
995
+
996
+ return self
997
+
998
+ def probabilities (self):
999
+
1000
+ interaction_matrix_df = self.interaction_matrix_df
1001
+
1002
+ if interaction_matrix_df["U_ij"].isna().all():
1003
+ self.utility()
1004
+ interaction_matrix_df = self.interaction_matrix_df
1005
+
1006
+ utility_i = pd.DataFrame(interaction_matrix_df.groupby("i")["U_ij"].sum())
1007
+ utility_i = utility_i.rename(columns = {"U_ij": "U_i"})
1008
+
1009
+ interaction_matrix_df = interaction_matrix_df.merge(
1010
+ utility_i,
1011
+ left_on="i",
1012
+ right_on="i",
1013
+ how="inner"
1014
+ )
1015
+
1016
+ interaction_matrix_df["p_ij"] = (interaction_matrix_df["U_ij"]) / (interaction_matrix_df["U_i"])
1017
+
1018
+ interaction_matrix_df = interaction_matrix_df.drop(columns=["U_i"])
1019
+
1020
+ self.interaction_matrix_df = interaction_matrix_df
1021
+
1022
+ return self
1023
+
1024
+ def flows (self):
1025
+
1026
+ interaction_matrix_df = self.interaction_matrix_df
1027
+
1028
+ if interaction_matrix_df["C_i"].isna().all():
1029
+ raise ValueError ("Market size column in customer origins not defined. Use CustomerOrigins.define_marketsize()")
1030
+
1031
+ check_vars(
1032
+ df = interaction_matrix_df,
1033
+ cols = ["C_i"]
1034
+ )
1035
+
1036
+ if interaction_matrix_df["p_ij"].isna().all():
1037
+ self.probabilities()
1038
+ interaction_matrix_df = self.interaction_matrix_df
1039
+
1040
+ interaction_matrix_df["E_ij"] = interaction_matrix_df["p_ij"] * interaction_matrix_df["C_i"]
1041
+
1042
+ self.interaction_matrix_df = interaction_matrix_df
1043
+
1044
+ return self
1045
+
1046
+ def marketareas (self):
1047
+
1048
+ interaction_matrix = self.interaction_matrix
1049
+ interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
1050
+
1051
+ check_vars(
1052
+ df = interaction_matrix_df,
1053
+ cols = ["E_ij"]
1054
+ )
1055
+
1056
+ market_areas_df = pd.DataFrame(interaction_matrix_df.groupby("j")["E_ij"].sum())
1057
+ market_areas_df = market_areas_df.reset_index(drop=False)
1058
+ market_areas_df = market_areas_df.rename(columns={"E_ij": "T_j"})
1059
+
1060
+ mci_model = MCIModel(
1061
+ interaction_matrix = interaction_matrix,
1062
+ coefs = self.get_coefs_dict(),
1063
+ mci_ols_model = self.get_mci_ols_model(),
1064
+ market_areas_df = market_areas_df
1065
+ )
1066
+
1067
+ return mci_model
1068
+
676
1069
  def load_geodata (
677
- file,
1070
+ data,
678
1071
  location_type: str,
679
1072
  unique_id: str,
680
1073
  x_col: str = None,
@@ -688,32 +1081,36 @@ def load_geodata (
688
1081
 
689
1082
  if location_type is None or (location_type != "origins" and location_type != "destinations"):
690
1083
  raise ValueError ("location_type must be either 'origins' or 'destinations'")
691
-
692
- if data_type not in ["shp", "csv", "xlsx"]:
693
- raise ValueError ("data_type must be 'shp', 'csv' or 'xlsx'")
694
1084
 
695
- if data_type == "shp":
696
- geodata_gpd_original = gp.read_file(file)
1085
+ if isinstance(data, gp.GeoDataFrame):
1086
+ geodata_gpd_original = data
697
1087
  crs_input = geodata_gpd_original.crs
1088
+ elif isinstance(data, pd.DataFrame):
1089
+ geodata_tab = data
1090
+ elif isinstance(data, str):
1091
+ if data_type == "shp":
1092
+ geodata_gpd_original = gp.read_file(data)
1093
+ crs_input = geodata_gpd_original.crs
1094
+ elif data_type == "csv" or data_type == "xlsx":
1095
+ if x_col is None:
1096
+ raise ValueError ("Missing value for X coordinate column")
1097
+ if y_col is None:
1098
+ raise ValueError ("Missing value for Y coordinate column")
1099
+ elif data_type == "csv":
1100
+ geodata_tab = pd.read_csv(
1101
+ data,
1102
+ sep = csv_sep,
1103
+ decimal = csv_decimal,
1104
+ encoding = csv_encoding
1105
+ )
1106
+ elif data_type == "xlsx":
1107
+ geodata_tab = pd.read_excel(data)
1108
+ else:
1109
+ raise TypeError("Unknown type of data")
1110
+ else:
1111
+ raise TypeError("data must be pandas.DataFrame, geopandas.GeoDataFrame or file (.csv, .xlsx, .shp)")
698
1112
 
699
- if data_type == "csv" or data_type == "xlsx":
700
- if x_col is None:
701
- raise ValueError ("Missing value for X coordinate column")
702
- if y_col is None:
703
- raise ValueError ("Missing value for Y coordinate column")
704
-
705
- if data_type == "csv":
706
- geodata_tab = pd.read_csv(
707
- file,
708
- sep = csv_sep,
709
- decimal = csv_decimal,
710
- encoding = csv_encoding
711
- )
712
-
713
- if data_type == "xlsx":
714
- geodata_tab = pd.read_excel(file)
715
-
716
- if data_type == "csv" or data_type == "xlsx":
1113
+ if data_type == "csv" or data_type == "xlsx" or (isinstance(data, pd.DataFrame) and not isinstance(data, gp.GeoDataFrame)):
717
1114
  geodata_gpd_original = gp.GeoDataFrame(
718
1115
  geodata_tab,
719
1116
  geometry = gp.points_from_xy(
@@ -722,8 +1119,9 @@ def load_geodata (
722
1119
  ),
723
1120
  crs = crs_input
724
1121
  )
725
-
1122
+
726
1123
  crs_output = "EPSG:4326"
1124
+
727
1125
  geodata_gpd = geodata_gpd_original.to_crs(crs_output)
728
1126
  geodata_gpd = geodata_gpd[[unique_id, "geometry"]]
729
1127
 
@@ -747,13 +1145,15 @@ def load_geodata (
747
1145
  geodata_object = CustomerOrigins(
748
1146
  geodata_gpd,
749
1147
  geodata_gpd_original,
750
- metadata
1148
+ metadata,
1149
+ None
751
1150
  )
752
1151
  elif location_type == "destinations":
753
1152
  geodata_object = SupplyLocations(
754
1153
  geodata_gpd,
755
1154
  geodata_gpd_original,
756
- metadata
1155
+ metadata,
1156
+ None
757
1157
  )
758
1158
 
759
1159
  return geodata_object
@@ -830,33 +1230,239 @@ def create_interaction_matrix(
830
1230
 
831
1231
  return interaction_matrix
832
1232
 
833
- def check_vars(
834
- df: pd.DataFrame,
835
- cols: list
836
- ):
1233
+ def load_interaction_matrix(
1234
+ data,
1235
+ customer_origins_col: str,
1236
+ supply_locations_col: str,
1237
+ attraction_col: list,
1238
+ transport_costs_col: str,
1239
+ probabilities_col: str = None,
1240
+ market_size_col: str = None,
1241
+ customer_origins_coords_col = None,
1242
+ supply_locations_coords_col = None,
1243
+ data_type = "csv",
1244
+ csv_sep = ";",
1245
+ csv_decimal = ",",
1246
+ csv_encoding="unicode_escape",
1247
+ crs_input = "EPSG:4326",
1248
+ crs_output = "EPSG:4326"
1249
+ ):
1250
+
1251
+ if isinstance(data, pd.DataFrame):
1252
+ interaction_matrix_df = data
1253
+ elif isinstance(data, str):
1254
+ if data_type not in ["csv", "xlsx"]:
1255
+ raise ValueError ("data_type must be 'csv' or 'xlsx'")
1256
+ if data_type == "csv":
1257
+ interaction_matrix_df = pd.read_csv(
1258
+ data,
1259
+ sep = csv_sep,
1260
+ decimal = csv_decimal,
1261
+ encoding = csv_encoding
1262
+ )
1263
+ elif data_type == "xlsx":
1264
+ interaction_matrix_df = pd.read_excel(data)
1265
+ else:
1266
+ raise TypeError("Unknown type of data")
1267
+ else:
1268
+ raise TypeError("data must be pandas.DataFrame or file (.csv, .xlsx)")
1269
+
1270
+ if customer_origins_col not in interaction_matrix_df.columns:
1271
+ raise KeyError ("Column " + customer_origins_col + " not in data")
1272
+ if supply_locations_col not in interaction_matrix_df.columns:
1273
+ raise KeyError ("Column " + supply_locations_col + " not in data")
1274
+
1275
+ cols_check = attraction_col + [transport_costs_col]
1276
+ if probabilities_col is not None:
1277
+ cols_check = cols_check + [probabilities_col]
1278
+ if market_size_col is not None:
1279
+ cols_check = cols_check + [market_size_col]
837
1280
 
838
- for col in cols:
839
- if col not in df.columns:
840
- raise KeyError(f"Column '{col}' not in dataframe.")
1281
+ check_vars(
1282
+ interaction_matrix_df,
1283
+ cols = cols_check
1284
+ )
1285
+
1286
+ if customer_origins_coords_col is not None:
1287
+
1288
+ if isinstance(customer_origins_coords_col, str):
1289
+
1290
+ if customer_origins_coords_col not in interaction_matrix_df.columns:
1291
+ raise KeyError ("Column " + customer_origins_coords_col + " not in data.")
1292
+
1293
+ customer_origins_geodata_tab = interaction_matrix_df[[customer_origins_col, customer_origins_coords_col]]
1294
+ customer_origins_geodata_tab = customer_origins_geodata_tab.drop_duplicates()
1295
+ customer_origins_geodata_tab["geometry"] = customer_origins_geodata_tab[customer_origins_coords_col].apply(lambda x: wkt.loads(x))
1296
+ customer_origins_geodata_gpd = gp.GeoDataFrame(
1297
+ customer_origins_geodata_tab,
1298
+ geometry="geometry",
1299
+ crs = crs_input)
1300
+ customer_origins_geodata_gpd = customer_origins_geodata_gpd.drop(
1301
+ columns = customer_origins_coords_col
1302
+ )
1303
+
1304
+ elif isinstance(customer_origins_coords_col, list):
1305
+
1306
+ if len(customer_origins_coords_col) != 2:
1307
+ raise ValueError ("Column " + customer_origins_coords_col + " must be a geometry column OR TWO columns with X and Y")
1308
+
1309
+ check_vars (
1310
+ df = interaction_matrix_df,
1311
+ cols = customer_origins_coords_col
1312
+ )
1313
+
1314
+ customer_origins_geodata_tab = interaction_matrix_df[[customer_origins_col, customer_origins_coords_col[0], customer_origins_coords_col[1]]]
1315
+ customer_origins_geodata_tab = customer_origins_geodata_tab.drop_duplicates()
1316
+ customer_origins_geodata_tab["geometry"] = customer_origins_geodata_tab.apply(lambda row: Point(row[customer_origins_coords_col[0]], row[customer_origins_coords_col[1]]), axis=1)
1317
+ customer_origins_geodata_gpd = gp.GeoDataFrame(customer_origins_geodata_tab, geometry="geometry")
1318
+
1319
+ customer_origins_geodata_gpd.set_crs(crs_output, inplace=True)
1320
+
1321
+ else:
1322
+
1323
+ customer_origins_geodata_gpd = interaction_matrix_df[customer_origins_col]
1324
+ customer_origins_geodata_gpd = customer_origins_geodata_gpd.drop_duplicates()
1325
+
1326
+ if market_size_col is not None:
1327
+ customer_origins_cols = [customer_origins_col] + [market_size_col]
1328
+ else:
1329
+ customer_origins_cols = [customer_origins_col]
1330
+ customer_origins_geodata_original_tab = customer_origins_geodata_tab = interaction_matrix_df[customer_origins_cols]
1331
+
1332
+ customer_origins_metadata = {
1333
+ "location_type": "origins",
1334
+ "unique_id": customer_origins_col,
1335
+ "attraction_col": [None],
1336
+ "marketsize_col": market_size_col,
1337
+ "weighting": {
1338
+ 0: {
1339
+ "func": None,
1340
+ "param": None
1341
+ }
1342
+ },
1343
+ "crs_input": crs_input,
1344
+ "crs_output": crs_output,
1345
+ "no_points": len(customer_origins_geodata_gpd)
1346
+ }
1347
+
1348
+ customer_origins = CustomerOrigins(
1349
+ geodata_gpd = customer_origins_geodata_gpd,
1350
+ geodata_gpd_original = customer_origins_geodata_original_tab,
1351
+ metadata = customer_origins_metadata,
1352
+ isochrones_gdf = None
1353
+ )
1354
+
1355
+ if supply_locations_coords_col is not None:
1356
+
1357
+ if isinstance(supply_locations_coords_col, str):
1358
+
1359
+ if supply_locations_coords_col not in interaction_matrix_df.columns:
1360
+ raise KeyError ("Column " + supply_locations_coords_col + " not in data.")
1361
+
1362
+ supply_locations_geodata_tab = interaction_matrix_df[[supply_locations_col, supply_locations_coords_col]]
1363
+ supply_locations_geodata_tab = supply_locations_geodata_tab.drop_duplicates()
1364
+ supply_locations_geodata_tab["geometry"] = supply_locations_geodata_tab[supply_locations_coords_col].apply(lambda x: wkt.loads(x))
1365
+ supply_locations_geodata_gpd = gp.GeoDataFrame(
1366
+ supply_locations_geodata_tab,
1367
+ geometry="geometry",
1368
+ crs = crs_input)
1369
+ supply_locations_geodata_gpd = supply_locations_geodata_gpd.drop(
1370
+ columns = supply_locations_coords_col
1371
+ )
1372
+
1373
+ if isinstance(supply_locations_coords_col, list):
1374
+
1375
+ if len(supply_locations_coords_col) != 2:
1376
+ raise ValueError ("Column " + supply_locations_coords_col + " must be a geometry column OR TWO columns with X and Y")
1377
+
1378
+ check_vars (
1379
+ df = interaction_matrix_df,
1380
+ cols = supply_locations_coords_col
1381
+ )
1382
+
1383
+ supply_locations_geodata_tab = interaction_matrix_df[[supply_locations_col, supply_locations_coords_col[0], supply_locations_coords_col[1]]]
1384
+ supply_locations_geodata_tab = supply_locations_geodata_tab.drop_duplicates()
1385
+ supply_locations_geodata_tab["geometry"] = supply_locations_geodata_tab.apply(lambda row: Point(row[supply_locations_coords_col[0]], row[supply_locations_coords_col[1]]), axis=1)
1386
+ supply_locations_geodata_gpd = gp.GeoDataFrame(supply_locations_geodata_tab, geometry="geometry")
1387
+
1388
+ supply_locations_geodata_gpd.set_crs(crs_output, inplace=True)
1389
+
1390
+ else:
1391
+
1392
+ supply_locations_geodata_gpd = interaction_matrix_df[supply_locations_col]
1393
+ supply_locations_geodata_gpd = supply_locations_geodata_gpd.drop_duplicates()
1394
+
1395
+ supply_locations_cols = [supply_locations_col] + attraction_col
1396
+ supply_locations_geodata_original_tab = supply_locations_geodata_tab = interaction_matrix_df[supply_locations_cols]
1397
+
1398
+ supply_locations_metadata = {
1399
+ "location_type": "destinations",
1400
+ "unique_id": supply_locations_col,
1401
+ "attraction_col": attraction_col,
1402
+ "marketsize_col": None,
1403
+ "weighting": {
1404
+ 0: {
1405
+ "func": None,
1406
+ "param": None
1407
+ }
1408
+ },
1409
+ "crs_input": crs_input,
1410
+ "crs_output": crs_output,
1411
+ "no_points": len(supply_locations_geodata_gpd)
1412
+ }
1413
+
1414
+ supply_locations = SupplyLocations(
1415
+ geodata_gpd = supply_locations_geodata_gpd,
1416
+ geodata_gpd_original = supply_locations_geodata_original_tab,
1417
+ metadata = supply_locations_metadata,
1418
+ isochrones_gdf = None
1419
+ )
841
1420
 
842
- for col in cols:
843
- if not pd.api.types.is_numeric_dtype(df[col]):
844
- raise ValueError(f"Column '{col}' is not numeric. All columns must be numeric.")
1421
+ interaction_matrix_df = interaction_matrix_df.rename(
1422
+ columns = {
1423
+ customer_origins_col: "i",
1424
+ supply_locations_col: "j",
1425
+ attraction_col[0]: "A_j",
1426
+ transport_costs_col: "t_ij"
1427
+ }
1428
+ )
1429
+
1430
+ if probabilities_col is not None:
1431
+ interaction_matrix_df = interaction_matrix_df.rename(
1432
+ columns = {
1433
+ probabilities_col: "p_ij"
1434
+ }
1435
+ )
1436
+
1437
+ if market_size_col is not None:
1438
+ interaction_matrix_df = interaction_matrix_df.rename(
1439
+ columns = {
1440
+ market_size_col: "C_i"
1441
+ }
1442
+ )
1443
+
1444
+ interaction_matrix = InteractionMatrix(
1445
+ interaction_matrix_df=interaction_matrix_df,
1446
+ customer_origins=customer_origins,
1447
+ supply_locations=supply_locations
1448
+ )
845
1449
 
846
- for col in cols:
847
- if (df[col] <= 0).any():
848
- raise ValueError(f"Column '{col}' includes values <= 0. All values must be numeric and positive.")
1450
+ return interaction_matrix
849
1451
 
850
- def mci_transformation(
1452
+ def log_centering_transformation(
851
1453
  df: pd.DataFrame,
852
1454
  ref_col: str,
853
- cols: list
1455
+ cols: list,
1456
+ suffix: str = "__LCT"
854
1457
  ):
855
1458
 
856
1459
  check_vars(
857
1460
  df = df,
858
- cols = cols + [ref_col]
1461
+ cols = cols
859
1462
  )
1463
+
1464
+ if ref_col not in df.columns:
1465
+ raise KeyError(f"Column '{ref_col}' not in dataframe.")
860
1466
 
861
1467
  def lct (x):
862
1468
 
@@ -866,9 +1472,115 @@ def mci_transformation(
866
1472
  return x_lct
867
1473
 
868
1474
  for var in cols:
1475
+
1476
+ unique_values = df[var].unique()
1477
+ if set(unique_values).issubset({0, 1}):
1478
+ df[var+suffix] = df[var]
1479
+ print ("Column " + str(var) + " is a dummy variable and requires/allows no log-centering transformation")
1480
+ continue
869
1481
 
870
1482
  var_t = df.groupby(ref_col)[var].apply(lct)
871
1483
  var_t = var_t.reset_index()
872
- df[var+"_t"] = var_t[var]
1484
+ df[var+suffix] = var_t[var]
1485
+
1486
+ return df
1487
+
1488
+ def get_isochrones(
1489
+ geodata_gpd: gp.GeoDataFrame,
1490
+ unique_id_col: str,
1491
+ segments_minutes: list = [5, 10, 15],
1492
+ range_type: str = "time",
1493
+ intersections: str = "true",
1494
+ profile: str = "driving-car",
1495
+ donut: bool = True,
1496
+ ors_server: str = "https://api.openrouteservice.org/v2/",
1497
+ ors_auth: str = None,
1498
+ timeout = 10,
1499
+ delay = 1,
1500
+ save_output: bool = True,
1501
+ output_filepath: str = "isochrones.shp",
1502
+ output_crs: str = "EPSG:4326"
1503
+ ):
1504
+
1505
+ coords = [(point.x, point.y) for point in geodata_gpd.geometry]
1506
+
1507
+ unique_id_values = geodata_gpd[unique_id_col].values
1508
+
1509
+ ors_client = Client(
1510
+ server = ors_server,
1511
+ auth = ors_auth
1512
+ )
1513
+
1514
+ isochrones_gdf = gp.GeoDataFrame(columns=[unique_id_col, "geometry"])
1515
+
1516
+ segments = [segment*60 for segment in segments_minutes]
1517
+
1518
+ i = 0
1519
+
1520
+ for x, y in coords:
1521
+
1522
+ isochrone_output = ors_client.isochrone(
1523
+ locations = [[x, y]],
1524
+ segments = segments,
1525
+ range_type = range_type,
1526
+ intersections = intersections,
1527
+ profile = profile,
1528
+ timeout = timeout,
1529
+ save_output = False,
1530
+ output_crs = output_crs
1531
+ )
1532
+
1533
+ if isochrone_output.status_code != 200:
1534
+ continue
1535
+
1536
+ isochrone_gdf = isochrone_output.get_isochrones_gdf()
1537
+
1538
+ if donut:
1539
+ isochrone_gdf = overlay_difference(
1540
+ polygon_gdf = isochrone_gdf,
1541
+ sort_col = "segment"
1542
+ )
1543
+
1544
+ time.sleep(delay)
1545
+
1546
+ isochrone_gdf[unique_id_col] = unique_id_values[i]
873
1547
 
874
- return df
1548
+ isochrones_gdf = pd.concat(
1549
+ [
1550
+ isochrones_gdf,
1551
+ isochrone_gdf
1552
+ ],
1553
+ ignore_index=True
1554
+ )
1555
+
1556
+ i = i+1
1557
+
1558
+ isochrones_gdf.set_crs(
1559
+ output_crs,
1560
+ allow_override=True,
1561
+ inplace=True
1562
+ )
1563
+
1564
+ if save_output:
1565
+
1566
+ isochrones_gdf.to_file(filename = output_filepath)
1567
+
1568
+ return isochrones_gdf
1569
+
1570
+
1571
+ def check_vars(
1572
+ df: pd.DataFrame,
1573
+ cols: list
1574
+ ):
1575
+
1576
+ for col in cols:
1577
+ if col not in df.columns:
1578
+ raise KeyError(f"Column '{col}' not in dataframe.")
1579
+
1580
+ for col in cols:
1581
+ if not pd.api.types.is_numeric_dtype(df[col]):
1582
+ raise ValueError(f"Column '{col}' is not numeric. All stated columns must be numeric.")
1583
+
1584
+ for col in cols:
1585
+ if (df[col] <= 0).any():
1586
+ raise ValueError(f"Column '{col}' includes values <= 0. All values must be numeric and positive.")