huff 1.1.1__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
huff/models.py CHANGED
@@ -4,8 +4,8 @@
4
4
  # Author: Thomas Wieland
5
5
  # ORCID: 0000-0001-5168-9846
6
6
  # mail: geowieland@googlemail.com
7
- # Version: 1.1.1
8
- # Last update: 2025-04-29 18:12
7
+ # Version: 1.2.0
8
+ # Last update: 2025-05-14 18:33
9
9
  # Copyright (c) 2025 Thomas Wieland
10
10
  #-----------------------------------------------------------------------
11
11
 
@@ -14,8 +14,11 @@ import pandas as pd
14
14
  import geopandas as gp
15
15
  import numpy as np
16
16
  import time
17
+ from statsmodels.formula.api import ols
18
+ from shapely.geometry import Point
19
+ from shapely import wkt
17
20
  from huff.ors import Client, TimeDistanceMatrix, Isochrone
18
- from huff.gistools import overlay_difference
21
+ from huff.gistools import overlay_difference, distance_matrix
19
22
 
20
23
 
21
24
  class CustomerOrigins:
@@ -24,12 +27,14 @@ class CustomerOrigins:
24
27
  self,
25
28
  geodata_gpd,
26
29
  geodata_gpd_original,
27
- metadata
30
+ metadata,
31
+ isochrones_gdf
28
32
  ):
29
33
 
30
34
  self.geodata_gpd = geodata_gpd
31
35
  self.geodata_gpd_original = geodata_gpd_original
32
36
  self.metadata = metadata
37
+ self.isochrones_gdf = isochrones_gdf
33
38
 
34
39
  def get_geodata_gpd(self):
35
40
 
@@ -42,6 +47,10 @@ class CustomerOrigins:
42
47
  def get_metadata(self):
43
48
 
44
49
  return self.metadata
50
+
51
+ def get_isochrones(self):
52
+
53
+ return self.isochrones_gdf
45
54
 
46
55
  def summary(self):
47
56
 
@@ -63,6 +72,11 @@ class CustomerOrigins:
63
72
  print("Unique ID column " + metadata["unique_id"])
64
73
  print("Input CRS " + str(metadata["crs_input"]))
65
74
 
75
+ if self.isochrones_gdf is None:
76
+ print("Including isochrones NO")
77
+ else:
78
+ print("Including isochrones YES")
79
+
66
80
  return metadata
67
81
 
68
82
  def define_marketsize(
@@ -97,27 +111,77 @@ class CustomerOrigins:
97
111
 
98
112
  return self
99
113
 
114
+ def isochrones(
115
+ self,
116
+ segments_minutes: list = [5, 10, 15],
117
+ range_type: str = "time",
118
+ intersections: str = "true",
119
+ profile: str = "driving-car",
120
+ donut: bool = True,
121
+ ors_server: str = "https://api.openrouteservice.org/v2/",
122
+ ors_auth: str = None,
123
+ timeout: int = 10,
124
+ delay: int = 1,
125
+ save_output: bool = True,
126
+ output_filepath: str = "customer_origins_isochrones.shp",
127
+ output_crs: str = "EPSG:4326"
128
+ ):
129
+
130
+ geodata_gpd = self.get_geodata_gpd()
131
+ metadata = self.get_metadata()
132
+
133
+ isochrones_gdf = get_isochrones(
134
+ geodata_gpd = geodata_gpd,
135
+ unique_id_col = metadata["unique_id"],
136
+ segments_minutes = segments_minutes,
137
+ range_type = range_type,
138
+ intersections = intersections,
139
+ profile = profile,
140
+ donut = donut,
141
+ ors_server = ors_server,
142
+ ors_auth = ors_auth,
143
+ timeout = timeout,
144
+ delay = delay,
145
+ save_output = save_output,
146
+ output_filepath = output_filepath,
147
+ output_crs = output_crs
148
+ )
149
+
150
+ self.isochrones_gdf = isochrones_gdf
151
+
152
+ return self
153
+
154
+
100
155
  class SupplyLocations:
101
156
 
102
157
  def __init__(
103
158
  self,
104
159
  geodata_gpd,
105
160
  geodata_gpd_original,
106
- metadata
161
+ metadata,
162
+ isochrones_gdf
107
163
  ):
108
164
 
109
165
  self.geodata_gpd = geodata_gpd
110
166
  self.geodata_gpd_original = geodata_gpd_original
111
167
  self.metadata = metadata
168
+ self.isochrones_gdf = isochrones_gdf
112
169
 
113
170
  def get_geodata_gpd(self):
171
+
114
172
  return self.geodata_gpd
115
173
 
116
174
  def get_geodata_gpd_original(self):
175
+
117
176
  return self.geodata_gpd_original
118
177
 
119
178
  def get_metadata(self):
179
+
120
180
  return self.metadata
181
+
182
+ def get_isochrones_gdf(self):
183
+
184
+ return self.isochrones_gdf
121
185
 
122
186
  def summary(self):
123
187
 
@@ -139,6 +203,11 @@ class SupplyLocations:
139
203
  print("Unique ID column " + metadata["unique_id"])
140
204
  print("Input CRS " + str(metadata["crs_input"]))
141
205
 
206
+ if self.isochrones_gdf is None:
207
+ print("Including isochrones NO")
208
+ else:
209
+ print("Including isochrones YES")
210
+
142
211
  return metadata
143
212
 
144
213
  def define_attraction(
@@ -239,86 +308,44 @@ class SupplyLocations:
239
308
 
240
309
  def isochrones(
241
310
  self,
242
- segments: list = [900, 600, 300],
311
+ segments_minutes: list = [5, 10, 15],
243
312
  range_type: str = "time",
244
313
  intersections: str = "true",
245
314
  profile: str = "driving-car",
246
315
  donut: bool = True,
247
316
  ors_server: str = "https://api.openrouteservice.org/v2/",
248
317
  ors_auth: str = None,
249
- timeout = 10,
250
- delay = 1,
318
+ timeout: int = 10,
319
+ delay: int = 1,
251
320
  save_output: bool = True,
252
- output_filepath: str = "isochrones.shp",
321
+ output_filepath: str = "supply_locations_isochrones.shp",
253
322
  output_crs: str = "EPSG:4326"
254
323
  ):
255
324
 
256
325
  geodata_gpd = self.get_geodata_gpd()
257
326
  metadata = self.get_metadata()
258
327
 
259
- coords = [(point.x, point.y) for point in geodata_gpd.geometry]
260
-
261
- unique_id_col = metadata["unique_id"]
262
- unique_id_values = geodata_gpd[unique_id_col].values
263
-
264
- ors_client = Client(
265
- server = ors_server,
266
- auth = ors_auth
328
+ isochrones_gdf = get_isochrones(
329
+ geodata_gpd = geodata_gpd,
330
+ unique_id_col = metadata["unique_id"],
331
+ segments_minutes = segments_minutes,
332
+ range_type = range_type,
333
+ intersections = intersections,
334
+ profile = profile,
335
+ donut = donut,
336
+ ors_server = ors_server,
337
+ ors_auth = ors_auth,
338
+ timeout = timeout,
339
+ delay = delay,
340
+ save_output = save_output,
341
+ output_filepath = output_filepath,
342
+ output_crs = output_crs
267
343
  )
268
-
269
- isochrones_gdf = gp.GeoDataFrame(columns=[unique_id_col, "geometry"])
270
-
271
- i = 0
272
-
273
- for x, y in coords:
274
-
275
- isochrone_output = ors_client.isochrone(
276
- locations = [[x, y]],
277
- segments = segments,
278
- range_type = range_type,
279
- intersections = intersections,
280
- profile = profile,
281
- timeout = timeout,
282
- save_output = False,
283
- output_crs = output_crs
284
- )
285
-
286
- if isochrone_output.status_code != 200:
287
- continue
288
-
289
- isochrone_gdf = isochrone_output.get_isochrones_gdf()
290
-
291
- if donut:
292
- isochrone_gdf = overlay_difference(
293
- polygon_gdf = isochrone_gdf,
294
- sort_col = "segment"
295
- )
296
-
297
- time.sleep(delay)
298
-
299
- isochrone_gdf[unique_id_col] = unique_id_values[i]
300
-
301
- isochrones_gdf = pd.concat(
302
- [
303
- isochrones_gdf,
304
- isochrone_gdf
305
- ],
306
- ignore_index=True
307
- )
308
-
309
- i = i+1
310
344
 
311
- isochrones_gdf.set_crs(
312
- output_crs,
313
- allow_override=True,
314
- inplace=True
315
- )
316
-
317
- if save_output:
345
+ self.isochrones_gdf = isochrones_gdf
318
346
 
319
- isochrones_gdf.to_file(filename = output_filepath)
347
+ return self
320
348
 
321
- return isochrones_gdf
322
349
 
323
350
  class InteractionMatrix:
324
351
 
@@ -360,26 +387,33 @@ class InteractionMatrix:
360
387
  else:
361
388
  print("Market size column " + customer_origins_metadata["marketsize_col"])
362
389
  print("----------------------------------")
363
- print("Weights")
390
+ print("Partial utilities")
391
+ print(" Weights")
364
392
  if supply_locations_metadata["weighting"][0]["func"] is None and supply_locations_metadata["weighting"][0]["param"] is None:
365
- print("Gamma not defined")
393
+ print("Attraction not defined")
366
394
  else:
367
- print("Gamma " + str(supply_locations_metadata["weighting"][0]["param"]) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
395
+ print("Attraction " + str(supply_locations_metadata["weighting"][0]["param"]) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
368
396
  if customer_origins_metadata["weighting"][0]["func"] is None and customer_origins_metadata["weighting"][0]["param"] is None:
369
- print("Lambda not defined")
397
+ print("Transport costs not defined")
370
398
  else:
371
- print("Lambda " + str(customer_origins_metadata["weighting"][0]["param"]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
399
+ print("Transport costs " + str(customer_origins_metadata["weighting"][0]["param"]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
372
400
  print("----------------------------------")
373
401
 
374
402
  def transport_costs(
375
403
  self,
404
+ network: bool = True,
376
405
  range_type: str = "time",
377
406
  time_unit: str = "minutes",
407
+ distance_unit: str = "kilometers",
378
408
  ors_server: str = "https://api.openrouteservice.org/v2/",
379
409
  ors_auth: str = None,
380
410
  save_output: bool = False,
381
411
  output_filepath: str = "transport_costs_matrix.csv"
382
412
  ):
413
+
414
+ if not network and range_type == "time":
415
+ print ("Calculating euclidean distances (network = False). Setting range_type = 'distance'")
416
+ range_type = "distance"
383
417
 
384
418
  interaction_matrix_df = self.get_interaction_matrix_df()
385
419
 
@@ -401,53 +435,70 @@ class InteractionMatrix:
401
435
 
402
436
  customer_origins_index = list(range(len(customer_origins_coords)))
403
437
  locations_coords_index = list(range(len(customer_origins_index), len(locations_coords)))
404
-
405
- ors_client = Client(
406
- server = ors_server,
407
- auth = ors_auth
408
- )
409
- time_distance_matrix = ors_client.matrix(
410
- locations = locations_coords,
411
- save_output = save_output,
412
- output_filepath = output_filepath,
413
- sources = customer_origins_index,
414
- destinations = locations_coords_index,
415
- range_type = range_type
416
- )
417
-
418
- if time_distance_matrix.get_metadata() is None:
419
- raise ValueError ("No transport costs matrix was built.")
420
438
 
421
- transport_costs_matrix = time_distance_matrix.get_matrix()
422
- transport_costs_matrix_config = time_distance_matrix.get_config()
423
- range_type = transport_costs_matrix_config["range_type"]
439
+ if network:
424
440
 
425
- transport_costs_matrix["source"] = transport_costs_matrix["source"].astype(int)
426
- transport_costs_matrix["source"] = transport_costs_matrix["source"].map(
427
- dict(enumerate(customer_origins_ids))
428
- )
429
-
430
- transport_costs_matrix["destination"] = transport_costs_matrix["destination"].astype(int)
431
- transport_costs_matrix["destination"] = transport_costs_matrix["destination"].map(
432
- dict(enumerate(supply_locations_ids))
433
- )
434
-
435
- transport_costs_matrix["source_destination"] = transport_costs_matrix["source"].astype(str)+"_"+transport_costs_matrix["destination"].astype(str)
436
- transport_costs_matrix = transport_costs_matrix[["source_destination", range_type]]
441
+ ors_client = Client(
442
+ server = ors_server,
443
+ auth = ors_auth
444
+ )
445
+ time_distance_matrix = ors_client.matrix(
446
+ locations = locations_coords,
447
+ save_output = save_output,
448
+ output_filepath = output_filepath,
449
+ sources = customer_origins_index,
450
+ destinations = locations_coords_index,
451
+ range_type = range_type
452
+ )
453
+
454
+ if time_distance_matrix.get_metadata() is None:
455
+ raise ValueError ("No transport costs matrix was built.")
437
456
 
438
- interaction_matrix_df = interaction_matrix_df.merge(
439
- transport_costs_matrix,
440
- left_on="ij",
441
- right_on="source_destination"
442
- )
443
-
444
- interaction_matrix_df["t_ij"] = interaction_matrix_df[range_type]
445
- if time_unit == "minutes":
446
- interaction_matrix_df["t_ij"] = interaction_matrix_df["t_ij"]/60
447
- if time_unit == "hours":
448
- interaction_matrix_df["t_ij"] = interaction_matrix_df["t_ij"]/60/60
457
+ transport_costs_matrix = time_distance_matrix.get_matrix()
458
+ transport_costs_matrix_config = time_distance_matrix.get_config()
459
+ range_type = transport_costs_matrix_config["range_type"]
460
+
461
+ transport_costs_matrix["source"] = transport_costs_matrix["source"].astype(int)
462
+ transport_costs_matrix["source"] = transport_costs_matrix["source"].map(
463
+ dict(enumerate(customer_origins_ids))
464
+ )
465
+
466
+ transport_costs_matrix["destination"] = transport_costs_matrix["destination"].astype(int)
467
+ transport_costs_matrix["destination"] = transport_costs_matrix["destination"].map(
468
+ dict(enumerate(supply_locations_ids))
469
+ )
470
+
471
+ transport_costs_matrix["source_destination"] = transport_costs_matrix["source"].astype(str)+"_"+transport_costs_matrix["destination"].astype(str)
472
+ transport_costs_matrix = transport_costs_matrix[["source_destination", range_type]]
473
+
474
+ interaction_matrix_df = interaction_matrix_df.merge(
475
+ transport_costs_matrix,
476
+ left_on="ij",
477
+ right_on="source_destination"
478
+ )
479
+
480
+ interaction_matrix_df["t_ij"] = interaction_matrix_df[range_type]
481
+ if time_unit == "minutes":
482
+ interaction_matrix_df["t_ij"] = interaction_matrix_df["t_ij"]/60
483
+ if time_unit == "hours":
484
+ interaction_matrix_df["t_ij"] = interaction_matrix_df["t_ij"]/60/60
485
+
486
+ interaction_matrix_df = interaction_matrix_df.drop(columns=["source_destination", range_type])
487
+
488
+ else:
489
+
490
+ distance_matrix_result = distance_matrix(
491
+ sources = customer_origins_coords,
492
+ destinations = supply_locations_coords,
493
+ unit = "m"
494
+ )
495
+
496
+ distance_matrix_result_flat = [distance for sublist in distance_matrix_result for distance in sublist]
449
497
 
450
- interaction_matrix_df = interaction_matrix_df.drop(columns=["source_destination", range_type])
498
+ interaction_matrix_df["t_ij"] = distance_matrix_result_flat
499
+
500
+ if distance_unit == "kilometers":
501
+ interaction_matrix_df["t_ij"] = interaction_matrix_df["t_ij"]/1000
451
502
 
452
503
  self.interaction_matrix_df = interaction_matrix_df
453
504
 
@@ -483,11 +534,11 @@ class InteractionMatrix:
483
534
  if attraction_weighting["func"] == "power":
484
535
  interaction_matrix_df["A_j_weighted"] = interaction_matrix_df["A_j"] ** attraction_weighting["param"]
485
536
  elif tc_weighting["func"] == "exponential":
486
- interaction_matrix_df["A_j_weighted"] = np.exp(attraction_weighting["param"] * interaction_matrix_df['A_j'])
537
+ interaction_matrix_df["A_j_weighted"] = np.exp(attraction_weighting["param"] * interaction_matrix_df["A_j"])
487
538
  else:
488
539
  raise ValueError ("Attraction weighting is not defined.")
489
540
 
490
- interaction_matrix_df["U_ij"] = interaction_matrix_df["A_j_weighted"]/interaction_matrix_df["t_ij_weighted"]
541
+ interaction_matrix_df["U_ij"] = interaction_matrix_df["A_j_weighted"]*interaction_matrix_df["t_ij_weighted"]
491
542
 
492
543
  interaction_matrix_df = interaction_matrix_df.drop(columns=['A_j_weighted', 't_ij_weighted'])
493
544
 
@@ -568,13 +619,11 @@ class InteractionMatrix:
568
619
  cols: list = ["A_j", "t_ij"]
569
620
  ):
570
621
 
571
- """ MCI model log-centering transformation """
572
-
573
622
  cols = cols + ["p_ij"]
574
623
 
575
624
  interaction_matrix_df = self.interaction_matrix_df
576
625
 
577
- interaction_matrix_df = mci_transformation(
626
+ interaction_matrix_df = log_centering_transformation(
578
627
  df = interaction_matrix_df,
579
628
  ref_col = "i",
580
629
  cols = cols
@@ -584,6 +633,87 @@ class InteractionMatrix:
584
633
 
585
634
  return self
586
635
 
636
+ def mci_fit(
637
+ self,
638
+ cols: list = ["A_j", "t_ij"],
639
+ alpha = 0.05
640
+ ):
641
+
642
+ supply_locations = self.get_supply_locations()
643
+ supply_locations_metadata = supply_locations.get_metadata()
644
+
645
+ customer_origins = self.get_customer_origins()
646
+ customer_origins_metadata = customer_origins.get_metadata()
647
+
648
+ interaction_matrix_df = self.get_interaction_matrix_df()
649
+
650
+ cols_t = [col + "__LCT" for col in cols]
651
+
652
+ if "p_ij__LCT" not in interaction_matrix_df.columns:
653
+ interaction_matrix = self.mci_transformation(
654
+ cols = cols
655
+ )
656
+ interaction_matrix_df = self.get_interaction_matrix_df()
657
+
658
+ mci_formula = f'p_ij__LCT ~ {" + ".join(cols_t)} -1'
659
+
660
+ mci_ols_model = ols(mci_formula, data = interaction_matrix_df).fit()
661
+
662
+ mci_ols_coefficients = mci_ols_model.params
663
+ mci_ols_coef_standarderrors = mci_ols_model.bse
664
+ mci_ols_coef_t = mci_ols_model.tvalues
665
+ mci_ols_coef_p = mci_ols_model.pvalues
666
+ mci_ols_coef_ci = mci_ols_model.conf_int(alpha = alpha)
667
+
668
+ coefs = {}
669
+ for i, col in enumerate(cols_t):
670
+ coefs[i] = {
671
+ "Coefficient": col[:-5],
672
+ "Estimate": float(mci_ols_coefficients[col]),
673
+ "SE": float(mci_ols_coef_standarderrors[col]),
674
+ "t": float(mci_ols_coef_t[col]),
675
+ "p": float(mci_ols_coef_p[col]),
676
+ "CI_lower": float(mci_ols_coef_ci.loc[col, 0]),
677
+ "CI_upper": float(mci_ols_coef_ci.loc[col, 1]),
678
+ }
679
+
680
+ customer_origins_metadata["weighting"][0] = {
681
+ "func": "power",
682
+ "param": mci_ols_coefficients["t_ij__LCT"]
683
+ }
684
+
685
+ coefs2 = coefs.copy()
686
+ for key, value in list(coefs2.items()):
687
+ if value["Coefficient"] == "t_ij":
688
+ del coefs2[key]
689
+
690
+ for key, value in coefs2.items():
691
+ supply_locations_metadata["weighting"][key] = {
692
+ "func": "power",
693
+ "param": value["Estimate"]
694
+ }
695
+
696
+ supply_locations_metadata["attraction_col"].append(None)
697
+ supply_locations_metadata["attraction_col"][key] = value["Coefficient"]
698
+
699
+ customer_origins.metadata = customer_origins_metadata
700
+ supply_locations.metadata = supply_locations_metadata
701
+ interaction_matrix = InteractionMatrix(
702
+ interaction_matrix_df,
703
+ customer_origins,
704
+ supply_locations
705
+ )
706
+
707
+ mci_model = MCIModel(
708
+ interaction_matrix,
709
+ coefs,
710
+ mci_ols_model,
711
+ None
712
+ )
713
+
714
+ return mci_model
715
+
716
+
587
717
  class HuffModel:
588
718
 
589
719
  def __init__(
@@ -639,19 +769,305 @@ class HuffModel:
639
769
  else:
640
770
  print("Market size column " + customer_origins_metadata["marketsize_col"])
641
771
  print("----------------------------------")
642
- print("Weights")
772
+ print("Partial utilities")
773
+ print(" Weights")
643
774
  if supply_locations_metadata["weighting"][0]["func"] is None and supply_locations_metadata["weighting"][0]["param"] is None:
644
- print("Gamma not defined")
775
+ print("Attraction not defined")
645
776
  else:
646
- print("Gamma " + str(supply_locations_metadata["weighting"][0]["param"]) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
777
+ print("Attraction " + str(supply_locations_metadata["weighting"][0]["param"]) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
647
778
  if customer_origins_metadata["weighting"][0]["func"] is None and customer_origins_metadata["weighting"][0]["param"] is None:
648
- print("Lambda not defined")
779
+ print("Transport costs not defined")
649
780
  else:
650
- print("Lambda " + str(customer_origins_metadata["weighting"][0]["param"]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
781
+ print("Transport costs " + str(customer_origins_metadata["weighting"][0]["param"]) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
651
782
  print("----------------------------------")
652
-
783
+
784
+ def mci_fit(
785
+ self,
786
+ cols: list = ["A_j", "t_ij"],
787
+ alpha = 0.05
788
+ ):
789
+
790
+ interaction_matrix = self.interaction_matrix
791
+
792
+ supply_locations = interaction_matrix.get_supply_locations()
793
+ supply_locations_metadata = supply_locations.get_metadata()
794
+
795
+ customer_origins = interaction_matrix.get_customer_origins()
796
+ customer_origins_metadata = customer_origins.get_metadata()
797
+
798
+ interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
799
+
800
+ cols_t = [col + "__LCT" for col in cols]
801
+
802
+ if "p_ij__LCT" not in interaction_matrix_df.columns:
803
+ interaction_matrix = interaction_matrix.mci_transformation(
804
+ cols = cols
805
+ )
806
+ interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
807
+
808
+ mci_formula = f'p_ij__LCT ~ {" + ".join(cols_t)} -1'
809
+
810
+ mci_ols_model = ols(mci_formula, data = interaction_matrix_df).fit()
811
+
812
+ mci_ols_coefficients = mci_ols_model.params
813
+ mci_ols_coef_standarderrors = mci_ols_model.bse
814
+ mci_ols_coef_t = mci_ols_model.tvalues
815
+ mci_ols_coef_p = mci_ols_model.pvalues
816
+ mci_ols_coef_ci = mci_ols_model.conf_int(alpha = alpha)
817
+
818
+ coefs = {}
819
+ for i, col in enumerate(cols_t):
820
+ coefs[i] = {
821
+ "Coefficient": col[:-5],
822
+ "Estimate": float(mci_ols_coefficients[col]),
823
+ "SE": float(mci_ols_coef_standarderrors[col]),
824
+ "t": float(mci_ols_coef_t[col]),
825
+ "p": float(mci_ols_coef_p[col]),
826
+ "CI_lower": float(mci_ols_coef_ci.loc[col, 0]),
827
+ "CI_upper": float(mci_ols_coef_ci.loc[col, 1]),
828
+ }
829
+
830
+ customer_origins_metadata["weighting"][0] = {
831
+ "func": "power",
832
+ "param": mci_ols_coefficients["t_ij__LCT"]
833
+ }
834
+
835
+ coefs2 = coefs.copy()
836
+ for key, value in list(coefs2.items()):
837
+ if value["Coefficient"] == "t_ij":
838
+ del coefs2[key]
839
+
840
+ for key, value in coefs2.items():
841
+ supply_locations_metadata["weighting"][(key)] = {
842
+ "func": "power",
843
+ "param": value["Estimate"]
844
+ }
845
+ supply_locations_metadata["attraction_col"][key] = value["Coefficient"]
846
+
847
+ customer_origins.metadata = customer_origins_metadata
848
+ supply_locations.metadata = supply_locations_metadata
849
+ interaction_matrix = InteractionMatrix(
850
+ interaction_matrix_df,
851
+ customer_origins,
852
+ supply_locations
853
+ )
854
+
855
+ mci_model = MCIModel(
856
+ interaction_matrix,
857
+ coefs,
858
+ mci_ols_model,
859
+ None
860
+ )
861
+
862
+ return mci_model
863
+
864
+
865
+ class MCIModel:
866
+
867
+ def __init__(
868
+ self,
869
+ interaction_matrix: InteractionMatrix,
870
+ coefs: dict,
871
+ mci_ols_model,
872
+ market_areas_df
873
+ ):
874
+
875
+ self.interaction_matrix = interaction_matrix
876
+ self.coefs = coefs
877
+ self.mci_ols_model = mci_ols_model
878
+ self.market_areas_df = market_areas_df
879
+
880
+ def get_interaction_matrix_df(self):
881
+
882
+ interaction_matrix = self.interaction_matrix
883
+ interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
884
+
885
+ return interaction_matrix_df
886
+
887
+ def get_supply_locations(self):
888
+
889
+ interaction_matrix = self.interaction_matrix
890
+ supply_locations = interaction_matrix.get_supply_locations()
891
+
892
+ return supply_locations
893
+
894
+ def get_customer_origins(self):
895
+
896
+ interaction_matrix = self.interaction_matrix
897
+ customer_origins = interaction_matrix.get_customer_origins()
898
+
899
+ return customer_origins
900
+
901
+ def get_mci_ols_model(self):
902
+
903
+ return self.mci_ols_model
904
+
905
+ def get_coefs_dict(self):
906
+
907
+ return self.coefs
908
+
909
+ def get_market_areas_df(self):
910
+
911
+ return self.market_areas_df
912
+
913
+ def summary(self):
914
+
915
+ interaction_matrix = self.interaction_matrix
916
+ coefs = self.coefs
917
+
918
+ customer_origins_metadata = interaction_matrix.get_customer_origins().get_metadata()
919
+ supply_locations_metadata = interaction_matrix.get_supply_locations().get_metadata()
920
+
921
+ print("Multiplicative Competitive Interaction Model")
922
+ print("--------------------------------------------")
923
+ print("Supply locations " + str(supply_locations_metadata["no_points"]))
924
+ print("Customer origins " + str(customer_origins_metadata["no_points"]))
925
+ print("--------------------------------------------")
926
+ print("Partial utilities")
927
+
928
+ coefficients_rows = []
929
+ for key, value in coefs.items():
930
+ coefficient_name = value["Coefficient"]
931
+ if coefficient_name == "A_j":
932
+ coefficient_name = "Attraction"
933
+ if coefficient_name == "t_ij":
934
+ coefficient_name = "Transport costs"
935
+ coefficients_rows.append({
936
+ "": coefficient_name,
937
+ "Estimate": round(value["Estimate"], 3),
938
+ "SE": round(value["SE"], 3),
939
+ "t": round(value["t"], 3),
940
+ "p": round(value["p"], 3),
941
+ "CI lower": round(value["CI_lower"], 3),
942
+ "CI upper": round(value["CI_upper"], 3)
943
+ })
944
+ coefficients_df = pd.DataFrame(coefficients_rows)
945
+
946
+ print (coefficients_df)
947
+
948
+ print("--------------------------------------------")
949
+
950
+ def utility(
951
+ self,
952
+ transformation = "LCT"
953
+ ):
954
+
955
+ interaction_matrix = self.interaction_matrix
956
+
957
+ interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
958
+
959
+ if interaction_matrix_df["t_ij"].isna().all():
960
+ raise ValueError ("Transport cost variable is not defined")
961
+ if interaction_matrix_df["A_j"].isna().all():
962
+ raise ValueError ("Attraction variable is not defined")
963
+
964
+ check_vars(
965
+ df = interaction_matrix_df,
966
+ cols = ["A_j", "t_ij"]
967
+ )
968
+
969
+ customer_origins_metadata = interaction_matrix.get_customer_origins().get_metadata()
970
+
971
+ t_ij_weighting = customer_origins_metadata["weighting"][0]["param"]
972
+
973
+ if transformation == "ILCT":
974
+ mci_formula = f"{t_ij_weighting}*t_ij"
975
+ else:
976
+ mci_formula = f"t_ij**{t_ij_weighting}"
977
+
978
+ supply_locations_metadata = interaction_matrix.get_supply_locations().get_metadata()
979
+ attraction_col = supply_locations_metadata["attraction_col"]
980
+ attraction_weighting = supply_locations_metadata["weighting"]
981
+
982
+ if transformation == "ILCT":
983
+ for key, value in attraction_weighting.items():
984
+ mci_formula = mci_formula + f" + {value['param']}*{attraction_col[key]}"
985
+ else:
986
+ for key, value in attraction_weighting.items():
987
+ mci_formula = mci_formula + f" * {attraction_col[key]}**{value['param']}"
988
+
989
+ interaction_matrix_df["U_ij"] = interaction_matrix_df.apply(lambda row: eval(mci_formula, {}, row.to_dict()), axis=1)
990
+
991
+ if transformation == "ILCT":
992
+ interaction_matrix_df["U_ij"] = np.exp(interaction_matrix_df["U_ij"])
993
+
994
+ self.interaction_matrix = interaction_matrix_df
995
+
996
+ return self
997
+
998
+ def probabilities (self):
999
+
1000
+ interaction_matrix_df = self.interaction_matrix_df
1001
+
1002
+ if interaction_matrix_df["U_ij"].isna().all():
1003
+ self.utility()
1004
+ interaction_matrix_df = self.interaction_matrix_df
1005
+
1006
+ utility_i = pd.DataFrame(interaction_matrix_df.groupby("i")["U_ij"].sum())
1007
+ utility_i = utility_i.rename(columns = {"U_ij": "U_i"})
1008
+
1009
+ interaction_matrix_df = interaction_matrix_df.merge(
1010
+ utility_i,
1011
+ left_on="i",
1012
+ right_on="i",
1013
+ how="inner"
1014
+ )
1015
+
1016
+ interaction_matrix_df["p_ij"] = (interaction_matrix_df["U_ij"]) / (interaction_matrix_df["U_i"])
1017
+
1018
+ interaction_matrix_df = interaction_matrix_df.drop(columns=["U_i"])
1019
+
1020
+ self.interaction_matrix_df = interaction_matrix_df
1021
+
1022
+ return self
1023
+
1024
+ def flows (self):
1025
+
1026
+ interaction_matrix_df = self.interaction_matrix_df
1027
+
1028
+ if interaction_matrix_df["C_i"].isna().all():
1029
+ raise ValueError ("Market size column in customer origins not defined. Use CustomerOrigins.define_marketsize()")
1030
+
1031
+ check_vars(
1032
+ df = interaction_matrix_df,
1033
+ cols = ["C_i"]
1034
+ )
1035
+
1036
+ if interaction_matrix_df["p_ij"].isna().all():
1037
+ self.probabilities()
1038
+ interaction_matrix_df = self.interaction_matrix_df
1039
+
1040
+ interaction_matrix_df["E_ij"] = interaction_matrix_df["p_ij"] * interaction_matrix_df["C_i"]
1041
+
1042
+ self.interaction_matrix_df = interaction_matrix_df
1043
+
1044
+ return self
1045
+
1046
+ def marketareas (self):
1047
+
1048
+ interaction_matrix = self.interaction_matrix
1049
+ interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
1050
+
1051
+ check_vars(
1052
+ df = interaction_matrix_df,
1053
+ cols = ["E_ij"]
1054
+ )
1055
+
1056
+ market_areas_df = pd.DataFrame(interaction_matrix_df.groupby("j")["E_ij"].sum())
1057
+ market_areas_df = market_areas_df.reset_index(drop=False)
1058
+ market_areas_df = market_areas_df.rename(columns={"E_ij": "T_j"})
1059
+
1060
+ mci_model = MCIModel(
1061
+ interaction_matrix = interaction_matrix,
1062
+ coefs = self.get_coefs_dict(),
1063
+ mci_ols_model = self.get_mci_ols_model(),
1064
+ market_areas_df = market_areas_df
1065
+ )
1066
+
1067
+ return mci_model
1068
+
653
1069
  def load_geodata (
654
- file,
1070
+ data,
655
1071
  location_type: str,
656
1072
  unique_id: str,
657
1073
  x_col: str = None,
@@ -665,32 +1081,36 @@ def load_geodata (
665
1081
 
666
1082
  if location_type is None or (location_type != "origins" and location_type != "destinations"):
667
1083
  raise ValueError ("location_type must be either 'origins' or 'destinations'")
668
-
669
- if data_type not in ["shp", "csv", "xlsx"]:
670
- raise ValueError ("data_type must be 'shp', 'csv' or 'xlsx'")
671
1084
 
672
- if data_type == "shp":
673
- geodata_gpd_original = gp.read_file(file)
1085
+ if isinstance(data, gp.GeoDataFrame):
1086
+ geodata_gpd_original = data
674
1087
  crs_input = geodata_gpd_original.crs
1088
+ elif isinstance(data, pd.DataFrame):
1089
+ geodata_tab = data
1090
+ elif isinstance(data, str):
1091
+ if data_type == "shp":
1092
+ geodata_gpd_original = gp.read_file(data)
1093
+ crs_input = geodata_gpd_original.crs
1094
+ elif data_type == "csv" or data_type == "xlsx":
1095
+ if x_col is None:
1096
+ raise ValueError ("Missing value for X coordinate column")
1097
+ if y_col is None:
1098
+ raise ValueError ("Missing value for Y coordinate column")
1099
+ elif data_type == "csv":
1100
+ geodata_tab = pd.read_csv(
1101
+ data,
1102
+ sep = csv_sep,
1103
+ decimal = csv_decimal,
1104
+ encoding = csv_encoding
1105
+ )
1106
+ elif data_type == "xlsx":
1107
+ geodata_tab = pd.read_excel(data)
1108
+ else:
1109
+ raise TypeError("Unknown type of data")
1110
+ else:
1111
+ raise TypeError("data must be pandas.DataFrame, geopandas.GeoDataFrame or file (.csv, .xlsx, .shp)")
675
1112
 
676
- if data_type == "csv" or data_type == "xlsx":
677
- if x_col is None:
678
- raise ValueError ("Missing value for X coordinate column")
679
- if y_col is None:
680
- raise ValueError ("Missing value for Y coordinate column")
681
-
682
- if data_type == "csv":
683
- geodata_tab = pd.read_csv(
684
- file,
685
- sep = csv_sep,
686
- decimal = csv_decimal,
687
- encoding = csv_encoding
688
- )
689
-
690
- if data_type == "xlsx":
691
- geodata_tab = pd.read_excel(file)
692
-
693
- if data_type == "csv" or data_type == "xlsx":
1113
+ if data_type == "csv" or data_type == "xlsx" or (isinstance(data, pd.DataFrame) and not isinstance(data, gp.GeoDataFrame)):
694
1114
  geodata_gpd_original = gp.GeoDataFrame(
695
1115
  geodata_tab,
696
1116
  geometry = gp.points_from_xy(
@@ -699,8 +1119,9 @@ def load_geodata (
699
1119
  ),
700
1120
  crs = crs_input
701
1121
  )
702
-
1122
+
703
1123
  crs_output = "EPSG:4326"
1124
+
704
1125
  geodata_gpd = geodata_gpd_original.to_crs(crs_output)
705
1126
  geodata_gpd = geodata_gpd[[unique_id, "geometry"]]
706
1127
 
@@ -724,13 +1145,15 @@ def load_geodata (
724
1145
  geodata_object = CustomerOrigins(
725
1146
  geodata_gpd,
726
1147
  geodata_gpd_original,
727
- metadata
1148
+ metadata,
1149
+ None
728
1150
  )
729
1151
  elif location_type == "destinations":
730
1152
  geodata_object = SupplyLocations(
731
1153
  geodata_gpd,
732
1154
  geodata_gpd_original,
733
- metadata
1155
+ metadata,
1156
+ None
734
1157
  )
735
1158
 
736
1159
  return geodata_object
@@ -807,33 +1230,239 @@ def create_interaction_matrix(
807
1230
 
808
1231
  return interaction_matrix
809
1232
 
810
- def check_vars(
811
- df: pd.DataFrame,
812
- cols: list
813
- ):
1233
+ def load_interaction_matrix(
1234
+ data,
1235
+ customer_origins_col: str,
1236
+ supply_locations_col: str,
1237
+ attraction_col: list,
1238
+ transport_costs_col: str,
1239
+ probabilities_col: str = None,
1240
+ market_size_col: str = None,
1241
+ customer_origins_coords_col = None,
1242
+ supply_locations_coords_col = None,
1243
+ data_type = "csv",
1244
+ csv_sep = ";",
1245
+ csv_decimal = ",",
1246
+ csv_encoding="unicode_escape",
1247
+ crs_input = "EPSG:4326",
1248
+ crs_output = "EPSG:4326"
1249
+ ):
1250
+
1251
+ if isinstance(data, pd.DataFrame):
1252
+ interaction_matrix_df = data
1253
+ elif isinstance(data, str):
1254
+ if data_type not in ["csv", "xlsx"]:
1255
+ raise ValueError ("data_type must be 'csv' or 'xlsx'")
1256
+ if data_type == "csv":
1257
+ interaction_matrix_df = pd.read_csv(
1258
+ data,
1259
+ sep = csv_sep,
1260
+ decimal = csv_decimal,
1261
+ encoding = csv_encoding
1262
+ )
1263
+ elif data_type == "xlsx":
1264
+ interaction_matrix_df = pd.read_excel(data)
1265
+ else:
1266
+ raise TypeError("Unknown type of data")
1267
+ else:
1268
+ raise TypeError("data must be pandas.DataFrame or file (.csv, .xlsx)")
1269
+
1270
+ if customer_origins_col not in interaction_matrix_df.columns:
1271
+ raise KeyError ("Column " + customer_origins_col + " not in data")
1272
+ if supply_locations_col not in interaction_matrix_df.columns:
1273
+ raise KeyError ("Column " + supply_locations_col + " not in data")
1274
+
1275
+ cols_check = attraction_col + [transport_costs_col]
1276
+ if probabilities_col is not None:
1277
+ cols_check = cols_check + [probabilities_col]
1278
+ if market_size_col is not None:
1279
+ cols_check = cols_check + [market_size_col]
814
1280
 
815
- for col in cols:
816
- if col not in df.columns:
817
- raise KeyError(f"Column '{col}' not in dataframe.")
1281
+ check_vars(
1282
+ interaction_matrix_df,
1283
+ cols = cols_check
1284
+ )
1285
+
1286
+ if customer_origins_coords_col is not None:
1287
+
1288
+ if isinstance(customer_origins_coords_col, str):
1289
+
1290
+ if customer_origins_coords_col not in interaction_matrix_df.columns:
1291
+ raise KeyError ("Column " + customer_origins_coords_col + " not in data.")
1292
+
1293
+ customer_origins_geodata_tab = interaction_matrix_df[[customer_origins_col, customer_origins_coords_col]]
1294
+ customer_origins_geodata_tab = customer_origins_geodata_tab.drop_duplicates()
1295
+ customer_origins_geodata_tab["geometry"] = customer_origins_geodata_tab[customer_origins_coords_col].apply(lambda x: wkt.loads(x))
1296
+ customer_origins_geodata_gpd = gp.GeoDataFrame(
1297
+ customer_origins_geodata_tab,
1298
+ geometry="geometry",
1299
+ crs = crs_input)
1300
+ customer_origins_geodata_gpd = customer_origins_geodata_gpd.drop(
1301
+ columns = customer_origins_coords_col
1302
+ )
1303
+
1304
+ elif isinstance(customer_origins_coords_col, list):
1305
+
1306
+ if len(customer_origins_coords_col) != 2:
1307
+ raise ValueError ("Column " + customer_origins_coords_col + " must be a geometry column OR TWO columns with X and Y")
1308
+
1309
+ check_vars (
1310
+ df = interaction_matrix_df,
1311
+ cols = customer_origins_coords_col
1312
+ )
1313
+
1314
+ customer_origins_geodata_tab = interaction_matrix_df[[customer_origins_col, customer_origins_coords_col[0], customer_origins_coords_col[1]]]
1315
+ customer_origins_geodata_tab = customer_origins_geodata_tab.drop_duplicates()
1316
+ customer_origins_geodata_tab["geometry"] = customer_origins_geodata_tab.apply(lambda row: Point(row[customer_origins_coords_col[0]], row[customer_origins_coords_col[1]]), axis=1)
1317
+ customer_origins_geodata_gpd = gp.GeoDataFrame(customer_origins_geodata_tab, geometry="geometry")
1318
+
1319
+ customer_origins_geodata_gpd.set_crs(crs_output, inplace=True)
1320
+
1321
+ else:
1322
+
1323
+ customer_origins_geodata_gpd = interaction_matrix_df[customer_origins_col]
1324
+ customer_origins_geodata_gpd = customer_origins_geodata_gpd.drop_duplicates()
1325
+
1326
+ if market_size_col is not None:
1327
+ customer_origins_cols = [customer_origins_col] + [market_size_col]
1328
+ else:
1329
+ customer_origins_cols = [customer_origins_col]
1330
+ customer_origins_geodata_original_tab = customer_origins_geodata_tab = interaction_matrix_df[customer_origins_cols]
1331
+
1332
+ customer_origins_metadata = {
1333
+ "location_type": "origins",
1334
+ "unique_id": customer_origins_col,
1335
+ "attraction_col": [None],
1336
+ "marketsize_col": market_size_col,
1337
+ "weighting": {
1338
+ 0: {
1339
+ "func": None,
1340
+ "param": None
1341
+ }
1342
+ },
1343
+ "crs_input": crs_input,
1344
+ "crs_output": crs_output,
1345
+ "no_points": len(customer_origins_geodata_gpd)
1346
+ }
1347
+
1348
+ customer_origins = CustomerOrigins(
1349
+ geodata_gpd = customer_origins_geodata_gpd,
1350
+ geodata_gpd_original = customer_origins_geodata_original_tab,
1351
+ metadata = customer_origins_metadata,
1352
+ isochrones_gdf = None
1353
+ )
1354
+
1355
+ if supply_locations_coords_col is not None:
1356
+
1357
+ if isinstance(supply_locations_coords_col, str):
1358
+
1359
+ if supply_locations_coords_col not in interaction_matrix_df.columns:
1360
+ raise KeyError ("Column " + supply_locations_coords_col + " not in data.")
1361
+
1362
+ supply_locations_geodata_tab = interaction_matrix_df[[supply_locations_col, supply_locations_coords_col]]
1363
+ supply_locations_geodata_tab = supply_locations_geodata_tab.drop_duplicates()
1364
+ supply_locations_geodata_tab["geometry"] = supply_locations_geodata_tab[supply_locations_coords_col].apply(lambda x: wkt.loads(x))
1365
+ supply_locations_geodata_gpd = gp.GeoDataFrame(
1366
+ supply_locations_geodata_tab,
1367
+ geometry="geometry",
1368
+ crs = crs_input)
1369
+ supply_locations_geodata_gpd = supply_locations_geodata_gpd.drop(
1370
+ columns = supply_locations_coords_col
1371
+ )
1372
+
1373
+ if isinstance(supply_locations_coords_col, list):
1374
+
1375
+ if len(supply_locations_coords_col) != 2:
1376
+ raise ValueError ("Column " + supply_locations_coords_col + " must be a geometry column OR TWO columns with X and Y")
1377
+
1378
+ check_vars (
1379
+ df = interaction_matrix_df,
1380
+ cols = supply_locations_coords_col
1381
+ )
1382
+
1383
+ supply_locations_geodata_tab = interaction_matrix_df[[supply_locations_col, supply_locations_coords_col[0], supply_locations_coords_col[1]]]
1384
+ supply_locations_geodata_tab = supply_locations_geodata_tab.drop_duplicates()
1385
+ supply_locations_geodata_tab["geometry"] = supply_locations_geodata_tab.apply(lambda row: Point(row[supply_locations_coords_col[0]], row[supply_locations_coords_col[1]]), axis=1)
1386
+ supply_locations_geodata_gpd = gp.GeoDataFrame(supply_locations_geodata_tab, geometry="geometry")
1387
+
1388
+ supply_locations_geodata_gpd.set_crs(crs_output, inplace=True)
1389
+
1390
+ else:
1391
+
1392
+ supply_locations_geodata_gpd = interaction_matrix_df[supply_locations_col]
1393
+ supply_locations_geodata_gpd = supply_locations_geodata_gpd.drop_duplicates()
1394
+
1395
+ supply_locations_cols = [supply_locations_col] + attraction_col
1396
+ supply_locations_geodata_original_tab = supply_locations_geodata_tab = interaction_matrix_df[supply_locations_cols]
1397
+
1398
+ supply_locations_metadata = {
1399
+ "location_type": "destinations",
1400
+ "unique_id": supply_locations_col,
1401
+ "attraction_col": attraction_col,
1402
+ "marketsize_col": None,
1403
+ "weighting": {
1404
+ 0: {
1405
+ "func": None,
1406
+ "param": None
1407
+ }
1408
+ },
1409
+ "crs_input": crs_input,
1410
+ "crs_output": crs_output,
1411
+ "no_points": len(supply_locations_geodata_gpd)
1412
+ }
1413
+
1414
+ supply_locations = SupplyLocations(
1415
+ geodata_gpd = supply_locations_geodata_gpd,
1416
+ geodata_gpd_original = supply_locations_geodata_original_tab,
1417
+ metadata = supply_locations_metadata,
1418
+ isochrones_gdf = None
1419
+ )
818
1420
 
819
- for col in cols:
820
- if not pd.api.types.is_numeric_dtype(df[col]):
821
- raise ValueError(f"Column '{col}' is not numeric. All columns must be numeric.")
1421
+ interaction_matrix_df = interaction_matrix_df.rename(
1422
+ columns = {
1423
+ customer_origins_col: "i",
1424
+ supply_locations_col: "j",
1425
+ attraction_col[0]: "A_j",
1426
+ transport_costs_col: "t_ij"
1427
+ }
1428
+ )
1429
+
1430
+ if probabilities_col is not None:
1431
+ interaction_matrix_df = interaction_matrix_df.rename(
1432
+ columns = {
1433
+ probabilities_col: "p_ij"
1434
+ }
1435
+ )
1436
+
1437
+ if market_size_col is not None:
1438
+ interaction_matrix_df = interaction_matrix_df.rename(
1439
+ columns = {
1440
+ market_size_col: "C_i"
1441
+ }
1442
+ )
1443
+
1444
+ interaction_matrix = InteractionMatrix(
1445
+ interaction_matrix_df=interaction_matrix_df,
1446
+ customer_origins=customer_origins,
1447
+ supply_locations=supply_locations
1448
+ )
822
1449
 
823
- for col in cols:
824
- if (df[col] <= 0).any():
825
- raise ValueError(f"Column '{col}' includes values <= 0. All values must be numeric and positive.")
1450
+ return interaction_matrix
826
1451
 
827
- def mci_transformation(
1452
+ def log_centering_transformation(
828
1453
  df: pd.DataFrame,
829
1454
  ref_col: str,
830
- cols: list
1455
+ cols: list,
1456
+ suffix: str = "__LCT"
831
1457
  ):
832
1458
 
833
1459
  check_vars(
834
1460
  df = df,
835
- cols = cols + [ref_col]
1461
+ cols = cols
836
1462
  )
1463
+
1464
+ if ref_col not in df.columns:
1465
+ raise KeyError(f"Column '{ref_col}' not in dataframe.")
837
1466
 
838
1467
  def lct (x):
839
1468
 
@@ -843,9 +1472,115 @@ def mci_transformation(
843
1472
  return x_lct
844
1473
 
845
1474
  for var in cols:
1475
+
1476
+ unique_values = df[var].unique()
1477
+ if set(unique_values).issubset({0, 1}):
1478
+ df[var+suffix] = df[var]
1479
+ print ("Column " + str(var) + " is a dummy variable and requires/allows no log-centering transformation")
1480
+ continue
846
1481
 
847
1482
  var_t = df.groupby(ref_col)[var].apply(lct)
848
1483
  var_t = var_t.reset_index()
849
- df[var+"_t"] = var_t[var]
1484
+ df[var+suffix] = var_t[var]
1485
+
1486
+ return df
1487
+
1488
+ def get_isochrones(
1489
+ geodata_gpd: gp.GeoDataFrame,
1490
+ unique_id_col: str,
1491
+ segments_minutes: list = [5, 10, 15],
1492
+ range_type: str = "time",
1493
+ intersections: str = "true",
1494
+ profile: str = "driving-car",
1495
+ donut: bool = True,
1496
+ ors_server: str = "https://api.openrouteservice.org/v2/",
1497
+ ors_auth: str = None,
1498
+ timeout = 10,
1499
+ delay = 1,
1500
+ save_output: bool = True,
1501
+ output_filepath: str = "isochrones.shp",
1502
+ output_crs: str = "EPSG:4326"
1503
+ ):
1504
+
1505
+ coords = [(point.x, point.y) for point in geodata_gpd.geometry]
1506
+
1507
+ unique_id_values = geodata_gpd[unique_id_col].values
1508
+
1509
+ ors_client = Client(
1510
+ server = ors_server,
1511
+ auth = ors_auth
1512
+ )
1513
+
1514
+ isochrones_gdf = gp.GeoDataFrame(columns=[unique_id_col, "geometry"])
1515
+
1516
+ segments = [segment*60 for segment in segments_minutes]
1517
+
1518
+ i = 0
1519
+
1520
+ for x, y in coords:
1521
+
1522
+ isochrone_output = ors_client.isochrone(
1523
+ locations = [[x, y]],
1524
+ segments = segments,
1525
+ range_type = range_type,
1526
+ intersections = intersections,
1527
+ profile = profile,
1528
+ timeout = timeout,
1529
+ save_output = False,
1530
+ output_crs = output_crs
1531
+ )
1532
+
1533
+ if isochrone_output.status_code != 200:
1534
+ continue
1535
+
1536
+ isochrone_gdf = isochrone_output.get_isochrones_gdf()
1537
+
1538
+ if donut:
1539
+ isochrone_gdf = overlay_difference(
1540
+ polygon_gdf = isochrone_gdf,
1541
+ sort_col = "segment"
1542
+ )
1543
+
1544
+ time.sleep(delay)
1545
+
1546
+ isochrone_gdf[unique_id_col] = unique_id_values[i]
1547
+
1548
+ isochrones_gdf = pd.concat(
1549
+ [
1550
+ isochrones_gdf,
1551
+ isochrone_gdf
1552
+ ],
1553
+ ignore_index=True
1554
+ )
1555
+
1556
+ i = i+1
1557
+
1558
+ isochrones_gdf.set_crs(
1559
+ output_crs,
1560
+ allow_override=True,
1561
+ inplace=True
1562
+ )
1563
+
1564
+ if save_output:
1565
+
1566
+ isochrones_gdf.to_file(filename = output_filepath)
1567
+
1568
+ return isochrones_gdf
1569
+
1570
+
1571
+ def check_vars(
1572
+ df: pd.DataFrame,
1573
+ cols: list
1574
+ ):
850
1575
 
851
- return df
1576
+ for col in cols:
1577
+ if col not in df.columns:
1578
+ raise KeyError(f"Column '{col}' not in dataframe.")
1579
+
1580
+ for col in cols:
1581
+ if not pd.api.types.is_numeric_dtype(df[col]):
1582
+ raise ValueError(f"Column '{col}' is not numeric. All stated columns must be numeric.")
1583
+
1584
+ for col in cols:
1585
+ if (df[col] <= 0).any():
1586
+ raise ValueError(f"Column '{col}' includes values <= 0. All values must be numeric and positive.")