huff 1.4.0__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- huff/gistools.py +2 -2
- huff/models.py +985 -196
- huff/ors.py +2 -2
- huff/osm.py +2 -2
- huff/tests/data/Haslach_new_supermarket.cpg +1 -0
- huff/tests/data/Haslach_new_supermarket.dbf +0 -0
- huff/tests/data/Haslach_new_supermarket.prj +1 -0
- huff/tests/data/Haslach_new_supermarket.qmd +26 -0
- huff/tests/data/Haslach_new_supermarket.shp +0 -0
- huff/tests/data/Haslach_new_supermarket.shx +0 -0
- huff/tests/data/Wieland2015.xlsx +0 -0
- huff/tests/tests_huff.py +163 -25
- {huff-1.4.0.dist-info → huff-1.5.0.dist-info}/METADATA +14 -3
- huff-1.5.0.dist-info/RECORD +30 -0
- huff-1.4.0.dist-info/RECORD +0 -24
- {huff-1.4.0.dist-info → huff-1.5.0.dist-info}/WHEEL +0 -0
- {huff-1.4.0.dist-info → huff-1.5.0.dist-info}/top_level.txt +0 -0
huff/models.py
CHANGED
@@ -4,8 +4,8 @@
|
|
4
4
|
# Author: Thomas Wieland
|
5
5
|
# ORCID: 0000-0001-5168-9846
|
6
6
|
# mail: geowieland@googlemail.com
|
7
|
-
# Version: 1.
|
8
|
-
# Last update: 2025-06-
|
7
|
+
# Version: 1.5.0
|
8
|
+
# Last update: 2025-06-25 18:32
|
9
9
|
# Copyright (c) 2025 Thomas Wieland
|
10
10
|
#-----------------------------------------------------------------------
|
11
11
|
|
@@ -17,7 +17,7 @@ from math import sqrt
|
|
17
17
|
import time
|
18
18
|
from pandas.api.types import is_numeric_dtype
|
19
19
|
from statsmodels.formula.api import ols
|
20
|
-
from scipy.optimize import minimize
|
20
|
+
from scipy.optimize import minimize, Bounds, LinearConstraint, NonlinearConstraint
|
21
21
|
from shapely.geometry import Point
|
22
22
|
from shapely import wkt
|
23
23
|
import copy
|
@@ -77,9 +77,9 @@ class CustomerOrigins:
|
|
77
77
|
if metadata["weighting"][0]["func"] is None and metadata["weighting"][0]["param"] is None:
|
78
78
|
print("Transport cost weighting not defined")
|
79
79
|
elif metadata["weighting"][0]["func"] in ["power", "exponential"]:
|
80
|
-
print("Transport cost weighting " + str(metadata["weighting"][0]["param"]) + " (" + metadata["weighting"][0]["func"] + ")")
|
80
|
+
print("Transport cost weighting " + str(round(metadata["weighting"][0]["param"],3)) + " (" + metadata["weighting"][0]["func"] + ")")
|
81
81
|
elif metadata["weighting"][0]["func"] == "logistic":
|
82
|
-
print("Transport cost weighting " + str(metadata["weighting"][0]["param"][0]) + ", " + str(metadata["weighting"][0]["param"][1]) + " (" + metadata["weighting"][0]["func"] + ")")
|
82
|
+
print("Transport cost weighting " + str(round(metadata["weighting"][0]["param"][0],3)) + ", " + str(round(metadata["weighting"][0]["param"][1],3)) + " (" + metadata["weighting"][0]["func"] + ")")
|
83
83
|
|
84
84
|
print("Unique ID column " + metadata["unique_id"])
|
85
85
|
print("Input CRS " + str(metadata["crs_input"]))
|
@@ -119,6 +119,24 @@ class CustomerOrigins:
|
|
119
119
|
param_lambda = -2
|
120
120
|
):
|
121
121
|
|
122
|
+
"""
|
123
|
+
metadata["weighting"] = {
|
124
|
+
0: {
|
125
|
+
"name": "t_ij",
|
126
|
+
"func": "power",
|
127
|
+
"param": -2
|
128
|
+
}
|
129
|
+
}
|
130
|
+
|
131
|
+
metadata["weighting"] = {
|
132
|
+
0: {
|
133
|
+
"name": "t_ij",
|
134
|
+
"func": "logistic",
|
135
|
+
"param": [10, -0.5]
|
136
|
+
}
|
137
|
+
}
|
138
|
+
"""
|
139
|
+
|
122
140
|
metadata = self.metadata
|
123
141
|
|
124
142
|
if func not in ["power", "exponential", "logistic"]:
|
@@ -130,6 +148,7 @@ class CustomerOrigins:
|
|
130
148
|
if isinstance(param_lambda, (int, float)) and func == "logistic":
|
131
149
|
raise ValueError("Function type "+ func + " requires two parameters in a list")
|
132
150
|
|
151
|
+
metadata["weighting"][0]["name"] = "t_ij"
|
133
152
|
metadata["weighting"][0]["func"] = func
|
134
153
|
|
135
154
|
if isinstance(param_lambda, list):
|
@@ -207,7 +226,6 @@ class CustomerOrigins:
|
|
207
226
|
|
208
227
|
return self
|
209
228
|
|
210
|
-
|
211
229
|
class SupplyLocations:
|
212
230
|
|
213
231
|
def __init__(
|
@@ -260,7 +278,7 @@ class SupplyLocations:
|
|
260
278
|
if metadata["weighting"][0]["func"] is None and metadata["weighting"][0]["param"] is None:
|
261
279
|
print("Attraction weighting not defined")
|
262
280
|
else:
|
263
|
-
print("Attraction weighting " + metadata["weighting"][0]["func"] + " with gamma = " + str(metadata["weighting"][0]["param"]))
|
281
|
+
print("Attraction weighting " + metadata["weighting"][0]["func"] + " with gamma = " + str(round(metadata["weighting"][0]["param"],3)))
|
264
282
|
|
265
283
|
print("Unique ID column " + metadata["unique_id"])
|
266
284
|
print("Input CRS " + str(metadata["crs_input"]))
|
@@ -300,8 +318,10 @@ class SupplyLocations:
|
|
300
318
|
if metadata["attraction_col"] is None:
|
301
319
|
raise ValueError ("Attraction column is not yet defined. Use SupplyLocations.define_attraction()")
|
302
320
|
|
321
|
+
metadata["weighting"][0]["name"] = "A_j"
|
303
322
|
metadata["weighting"][0]["func"] = func
|
304
323
|
metadata["weighting"][0]["param"] = float(param_gamma)
|
324
|
+
|
305
325
|
self.metadata = metadata
|
306
326
|
|
307
327
|
return self
|
@@ -324,6 +344,7 @@ class SupplyLocations:
|
|
324
344
|
metadata["attraction_col"] = metadata["attraction_col"] + [var]
|
325
345
|
|
326
346
|
metadata["weighting"][new_key] = {
|
347
|
+
"name": var,
|
327
348
|
"func": func,
|
328
349
|
"param": param
|
329
350
|
}
|
@@ -342,7 +363,11 @@ class SupplyLocations:
|
|
342
363
|
metadata = self.get_metadata()
|
343
364
|
|
344
365
|
new_destinations_gpd_original = new_destinations.get_geodata_gpd_original()
|
366
|
+
new_destinations_gpd_original["j_update"] = 1
|
367
|
+
|
345
368
|
new_destinations_gpd = new_destinations.get_geodata_gpd()
|
369
|
+
new_destinations_gpd["j_update"] = 1
|
370
|
+
|
346
371
|
new_destinations_metadata = new_destinations.get_metadata()
|
347
372
|
|
348
373
|
if list(new_destinations_gpd_original.columns) != list(geodata_gpd_original.columns):
|
@@ -350,14 +375,20 @@ class SupplyLocations:
|
|
350
375
|
if list(new_destinations_gpd.columns) != list(geodata_gpd.columns):
|
351
376
|
raise KeyError("Supply locations and new destinations data have different column names.")
|
352
377
|
|
353
|
-
geodata_gpd_original =
|
354
|
-
|
378
|
+
geodata_gpd_original = pd.concat(
|
379
|
+
[
|
380
|
+
geodata_gpd_original,
|
381
|
+
new_destinations_gpd_original
|
382
|
+
],
|
355
383
|
ignore_index=True
|
356
384
|
)
|
357
|
-
|
358
|
-
geodata_gpd =
|
359
|
-
|
360
|
-
|
385
|
+
|
386
|
+
geodata_gpd = pd.concat(
|
387
|
+
[
|
388
|
+
geodata_gpd,
|
389
|
+
new_destinations_gpd
|
390
|
+
],
|
391
|
+
ignore_index=True
|
361
392
|
)
|
362
393
|
|
363
394
|
metadata["no_points"] = metadata["no_points"]+new_destinations_metadata["no_points"]
|
@@ -440,13 +471,15 @@ class InteractionMatrix:
|
|
440
471
|
self,
|
441
472
|
interaction_matrix_df,
|
442
473
|
customer_origins,
|
443
|
-
supply_locations
|
474
|
+
supply_locations,
|
475
|
+
metadata
|
444
476
|
):
|
445
477
|
|
446
478
|
self.interaction_matrix_df = interaction_matrix_df
|
447
479
|
self.customer_origins = customer_origins
|
448
480
|
self.supply_locations = supply_locations
|
449
|
-
|
481
|
+
self.metadata = metadata
|
482
|
+
|
450
483
|
def get_interaction_matrix_df(self):
|
451
484
|
return self.interaction_matrix_df
|
452
485
|
|
@@ -456,41 +489,84 @@ class InteractionMatrix:
|
|
456
489
|
def get_supply_locations(self):
|
457
490
|
return self.supply_locations
|
458
491
|
|
492
|
+
def get_metadata(self):
|
493
|
+
return self.metadata
|
494
|
+
|
459
495
|
def summary(self):
|
460
496
|
|
461
497
|
customer_origins_metadata = self.get_customer_origins().get_metadata()
|
462
498
|
supply_locations_metadata = self.get_supply_locations().get_metadata()
|
499
|
+
interaction_matrix_metadata = self.get_metadata()
|
463
500
|
|
464
501
|
print("Interaction Matrix")
|
465
502
|
print("----------------------------------")
|
466
503
|
|
467
|
-
print("Supply locations
|
504
|
+
print("Supply locations " + str(supply_locations_metadata["no_points"]))
|
468
505
|
if supply_locations_metadata["attraction_col"][0] is None:
|
469
|
-
print("Attraction column
|
506
|
+
print("Attraction column not defined")
|
470
507
|
else:
|
471
|
-
print("Attraction column
|
472
|
-
print("Customer origins
|
508
|
+
print("Attraction column " + supply_locations_metadata["attraction_col"][0])
|
509
|
+
print("Customer origins " + str(customer_origins_metadata["no_points"]))
|
473
510
|
if customer_origins_metadata["marketsize_col"] is None:
|
474
511
|
print("Market size column not defined")
|
475
512
|
else:
|
476
|
-
print("Market size column
|
513
|
+
print("Market size column " + customer_origins_metadata["marketsize_col"])
|
514
|
+
|
515
|
+
if interaction_matrix_metadata != {} and "transport_costs" in interaction_matrix_metadata:
|
516
|
+
print("----------------------------------")
|
517
|
+
if interaction_matrix_metadata["transport_costs"]["network"]:
|
518
|
+
print("Transport cost type Time")
|
519
|
+
print("Transport cost unit " + interaction_matrix_metadata["transport_costs"]["time_unit"])
|
520
|
+
else:
|
521
|
+
print("Transport cost type Distance")
|
522
|
+
print("Transport cost unit " + interaction_matrix_metadata["transport_costs"]["distance_unit"])
|
523
|
+
|
477
524
|
print("----------------------------------")
|
478
525
|
print("Partial utilities")
|
479
|
-
print("
|
526
|
+
print(" Weights")
|
527
|
+
|
480
528
|
if supply_locations_metadata["weighting"][0]["func"] is None and supply_locations_metadata["weighting"][0]["param"] is None:
|
481
|
-
print("Attraction
|
529
|
+
print("Attraction not defined")
|
482
530
|
else:
|
483
|
-
print("Attraction
|
531
|
+
print("Attraction " + str(round(supply_locations_metadata["weighting"][0]["param"],3)) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
|
484
532
|
|
485
533
|
if customer_origins_metadata["weighting"][0]["func"] is None and customer_origins_metadata["weighting"][0]["param"] is None:
|
486
|
-
print("Transport costs
|
534
|
+
print("Transport costs not defined")
|
487
535
|
elif customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
|
488
|
-
print("Transport costs
|
536
|
+
print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
489
537
|
elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
490
|
-
print("Transport costs
|
538
|
+
print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"][0],3)) + ", " + str(round(customer_origins_metadata["weighting"][0]["param"][1],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
539
|
+
|
540
|
+
|
541
|
+
attrac_vars = supply_locations_metadata["attraction_col"]
|
542
|
+
attrac_vars_no = len(attrac_vars)
|
543
|
+
|
544
|
+
if attrac_vars_no > 1:
|
545
|
+
|
546
|
+
for key, attrac_var in enumerate(attrac_vars):
|
547
|
+
|
548
|
+
if key == 0:
|
549
|
+
continue
|
550
|
+
|
551
|
+
if key not in supply_locations_metadata["weighting"].keys():
|
552
|
+
|
553
|
+
print(f"{attrac_vars[key][:16]:16} not defined")
|
554
|
+
|
555
|
+
else:
|
556
|
+
|
557
|
+
name = supply_locations_metadata["weighting"][key]["name"]
|
558
|
+
param = supply_locations_metadata["weighting"][key]["param"]
|
559
|
+
func = supply_locations_metadata["weighting"][key]["func"]
|
560
|
+
|
561
|
+
print(f"{name[:16]:16} {round(param, 3)} ({func})")
|
491
562
|
|
492
563
|
print("----------------------------------")
|
493
564
|
|
565
|
+
if interaction_matrix_metadata != {} and "fit" in interaction_matrix_metadata and interaction_matrix_metadata["fit"]["function"] is not None:
|
566
|
+
print("Parameter estimation")
|
567
|
+
print("Fit function " + interaction_matrix_metadata["fit"]["function"])
|
568
|
+
print("Fit by " + interaction_matrix_metadata["fit"]["fit_by"])
|
569
|
+
|
494
570
|
def transport_costs(
|
495
571
|
self,
|
496
572
|
network: bool = True,
|
@@ -508,6 +584,7 @@ class InteractionMatrix:
|
|
508
584
|
range_type = "distance"
|
509
585
|
|
510
586
|
interaction_matrix_df = self.get_interaction_matrix_df()
|
587
|
+
interaction_matrix_metadata = self.get_metadata()
|
511
588
|
|
512
589
|
customer_origins = self.get_customer_origins()
|
513
590
|
customer_origins_geodata_gpd = customer_origins.get_geodata_gpd()
|
@@ -588,14 +665,87 @@ class InteractionMatrix:
|
|
588
665
|
if distance_unit == "kilometers":
|
589
666
|
interaction_matrix_df["t_ij"] = interaction_matrix_df["t_ij"]/1000
|
590
667
|
|
668
|
+
interaction_matrix_metadata["transport_costs"] = {
|
669
|
+
"network": network,
|
670
|
+
"range_type": range_type,
|
671
|
+
"time_unit": time_unit,
|
672
|
+
"distance_unit": distance_unit,
|
673
|
+
"ors_server": ors_server,
|
674
|
+
"ors_auth": ors_auth
|
675
|
+
}
|
676
|
+
|
591
677
|
self.interaction_matrix_df = interaction_matrix_df
|
678
|
+
self.metadata = interaction_matrix_metadata
|
592
679
|
|
593
680
|
return self
|
594
681
|
|
682
|
+
def define_weightings(
|
683
|
+
self,
|
684
|
+
vars_funcs: dict
|
685
|
+
):
|
686
|
+
|
687
|
+
"""
|
688
|
+
vars_funcs = {
|
689
|
+
0: {
|
690
|
+
"name": "A_j",
|
691
|
+
"func": "power"
|
692
|
+
},
|
693
|
+
1: {
|
694
|
+
"name": "t_ij",
|
695
|
+
"func": "logistic"
|
696
|
+
},
|
697
|
+
2: {
|
698
|
+
"name": "second_attraction_variable",
|
699
|
+
"func": "power"
|
700
|
+
},
|
701
|
+
3: {
|
702
|
+
"name": "third_attraction_variable",
|
703
|
+
"func": "exponential"
|
704
|
+
},
|
705
|
+
...
|
706
|
+
}
|
707
|
+
"""
|
708
|
+
|
709
|
+
supply_locations_metadata = self.supply_locations.metadata
|
710
|
+
customer_origins_metadata = self.customer_origins.metadata
|
711
|
+
|
712
|
+
supply_locations_metadata["weighting"][0]["name"] = vars_funcs[0]["name"]
|
713
|
+
supply_locations_metadata["weighting"][0]["func"] = vars_funcs[0]["func"]
|
714
|
+
|
715
|
+
customer_origins_metadata["weighting"][0]["name"] = vars_funcs[1]["name"]
|
716
|
+
customer_origins_metadata["weighting"][0]["func"] = vars_funcs[1]["func"]
|
717
|
+
|
718
|
+
if len(vars_funcs) > 2:
|
719
|
+
|
720
|
+
for key, var in vars_funcs.items():
|
721
|
+
|
722
|
+
if key < 2:
|
723
|
+
continue
|
724
|
+
|
725
|
+
if key not in supply_locations_metadata["weighting"]:
|
726
|
+
supply_locations_metadata["weighting"][key-1] = {
|
727
|
+
"name": "attrac"+str(key),
|
728
|
+
"func": "power",
|
729
|
+
"param": None
|
730
|
+
}
|
731
|
+
|
732
|
+
supply_locations_metadata["weighting"][key-1]["name"] = var["name"]
|
733
|
+
supply_locations_metadata["weighting"][key-1]["func"] = var["func"]
|
734
|
+
supply_locations_metadata["weighting"][key-1]["param"] = None
|
735
|
+
|
736
|
+
self.supply_locations.metadata = supply_locations_metadata
|
737
|
+
self.customer_origins.metadata = customer_origins_metadata
|
738
|
+
|
595
739
|
def utility(self):
|
596
740
|
|
597
741
|
interaction_matrix_df = self.interaction_matrix_df
|
598
742
|
|
743
|
+
interaction_matrix_metadata = self.get_metadata()
|
744
|
+
|
745
|
+
if "t_ij" not in interaction_matrix_df.columns:
|
746
|
+
raise ValueError ("No transport cost variable in interaction matrix")
|
747
|
+
if "A_j" not in interaction_matrix_df.columns:
|
748
|
+
raise ValueError ("No attraction variable in interaction matrix")
|
599
749
|
if interaction_matrix_df["t_ij"].isna().all():
|
600
750
|
raise ValueError ("Transport cost variable is not defined")
|
601
751
|
if interaction_matrix_df["A_j"].isna().all():
|
@@ -609,6 +759,7 @@ class InteractionMatrix:
|
|
609
759
|
customer_origins = self.customer_origins
|
610
760
|
customer_origins_metadata = customer_origins.get_metadata()
|
611
761
|
tc_weighting = customer_origins_metadata["weighting"][0]
|
762
|
+
|
612
763
|
if tc_weighting["func"] == "power":
|
613
764
|
interaction_matrix_df["t_ij_weighted"] = interaction_matrix_df["t_ij"] ** tc_weighting["param"]
|
614
765
|
elif tc_weighting["func"] == "exponential":
|
@@ -621,6 +772,7 @@ class InteractionMatrix:
|
|
621
772
|
supply_locations = self.supply_locations
|
622
773
|
supply_locations_metadata = supply_locations.get_metadata()
|
623
774
|
attraction_weighting = supply_locations_metadata["weighting"][0]
|
775
|
+
|
624
776
|
if attraction_weighting["func"] == "power":
|
625
777
|
interaction_matrix_df["A_j_weighted"] = interaction_matrix_df["A_j"] ** attraction_weighting["param"]
|
626
778
|
elif tc_weighting["func"] == "exponential":
|
@@ -628,19 +780,51 @@ class InteractionMatrix:
|
|
628
780
|
else:
|
629
781
|
raise ValueError ("Attraction weighting is not defined.")
|
630
782
|
|
783
|
+
attrac_vars = supply_locations_metadata["attraction_col"]
|
784
|
+
attrac_vars_no = len(attrac_vars)
|
785
|
+
attrac_var_key = 0
|
786
|
+
|
787
|
+
if attrac_vars_no > 1:
|
788
|
+
|
789
|
+
for key, attrac_var in enumerate(attrac_vars):
|
790
|
+
|
791
|
+
attrac_var_key = key #+1
|
792
|
+
if attrac_var_key == 0: #1:
|
793
|
+
continue
|
794
|
+
|
795
|
+
name = supply_locations_metadata["weighting"][attrac_var_key]["name"]
|
796
|
+
param = supply_locations_metadata["weighting"][attrac_var_key]["param"]
|
797
|
+
func = supply_locations_metadata["weighting"][attrac_var_key]["func"]
|
798
|
+
|
799
|
+
if func == "power":
|
800
|
+
interaction_matrix_df[name+"_weighted"] = interaction_matrix_df[name] ** param
|
801
|
+
elif func == "exponential":
|
802
|
+
interaction_matrix_df[name+"_weighted"] = np.exp(param * interaction_matrix_df[name])
|
803
|
+
else:
|
804
|
+
raise ValueError ("Weighting for " + name + " is not defined.")
|
805
|
+
|
806
|
+
interaction_matrix_df["A_j_weighted"] = interaction_matrix_df["A_j_weighted"]*interaction_matrix_df[name+"_weighted"]
|
807
|
+
|
808
|
+
interaction_matrix_df = interaction_matrix_df.drop(columns=[name+"_weighted"])
|
809
|
+
|
631
810
|
interaction_matrix_df["U_ij"] = interaction_matrix_df["A_j_weighted"]*interaction_matrix_df["t_ij_weighted"]
|
632
811
|
|
633
|
-
interaction_matrix_df = interaction_matrix_df.drop(columns=[
|
812
|
+
interaction_matrix_df = interaction_matrix_df.drop(columns=["A_j_weighted", "t_ij_weighted"])
|
634
813
|
|
635
|
-
|
814
|
+
interaction_matrix_metadata["model"] = {
|
815
|
+
"model_type": "Huff"
|
816
|
+
}
|
636
817
|
|
818
|
+
self.interaction_matrix_df = interaction_matrix_df
|
819
|
+
self.metadata = interaction_matrix_metadata
|
820
|
+
|
637
821
|
return self
|
638
822
|
|
639
823
|
def probabilities (self):
|
640
824
|
|
641
825
|
interaction_matrix_df = self.interaction_matrix_df
|
642
826
|
|
643
|
-
if interaction_matrix_df["U_ij"].isna().all():
|
827
|
+
if "U_ij" not in interaction_matrix_df.columns or interaction_matrix_df["U_ij"].isna().all():
|
644
828
|
self.utility()
|
645
829
|
interaction_matrix_df = self.interaction_matrix_df
|
646
830
|
|
@@ -666,6 +850,8 @@ class InteractionMatrix:
|
|
666
850
|
|
667
851
|
interaction_matrix_df = self.interaction_matrix_df
|
668
852
|
|
853
|
+
if "C_i" not in interaction_matrix_df.columns:
|
854
|
+
raise ValueError ("No market size variable in interaction matrix")
|
669
855
|
if interaction_matrix_df["C_i"].isna().all():
|
670
856
|
raise ValueError ("Market size column in customer origins not defined. Use CustomerOrigins.define_marketsize()")
|
671
857
|
|
@@ -775,6 +961,8 @@ class InteractionMatrix:
|
|
775
961
|
|
776
962
|
interaction_matrix_df = self.get_interaction_matrix_df()
|
777
963
|
|
964
|
+
interaction_matrix_metadata = self.get_metadata()
|
965
|
+
|
778
966
|
cols_t = [col + "__LCT" for col in cols]
|
779
967
|
|
780
968
|
if "p_ij__LCT" not in interaction_matrix_df.columns:
|
@@ -826,10 +1014,20 @@ class InteractionMatrix:
|
|
826
1014
|
|
827
1015
|
customer_origins.metadata = customer_origins_metadata
|
828
1016
|
supply_locations.metadata = supply_locations_metadata
|
1017
|
+
|
1018
|
+
interaction_matrix_metadata = {
|
1019
|
+
"fit": {
|
1020
|
+
"function": "mci_fit",
|
1021
|
+
"fit_by": "probabilities",
|
1022
|
+
"method": "OLS"
|
1023
|
+
}
|
1024
|
+
}
|
1025
|
+
|
829
1026
|
interaction_matrix = InteractionMatrix(
|
830
1027
|
interaction_matrix_df,
|
831
1028
|
customer_origins,
|
832
|
-
supply_locations
|
1029
|
+
supply_locations,
|
1030
|
+
metadata=interaction_matrix_metadata
|
833
1031
|
)
|
834
1032
|
|
835
1033
|
mci_model = MCIModel(
|
@@ -841,7 +1039,391 @@ class InteractionMatrix:
|
|
841
1039
|
|
842
1040
|
return mci_model
|
843
1041
|
|
1042
|
+
def loglik(
|
1043
|
+
self,
|
1044
|
+
params,
|
1045
|
+
fit_by = "probabilities"
|
1046
|
+
):
|
1047
|
+
|
1048
|
+
if fit_by not in ["probabilities", "flows"]:
|
1049
|
+
raise ValueError ("Parameter 'fit_by' must be 'probabilities' or 'flows'")
|
1050
|
+
|
1051
|
+
if not isinstance(params, list):
|
1052
|
+
if isinstance(params, np.ndarray):
|
1053
|
+
params = params.tolist()
|
1054
|
+
else:
|
1055
|
+
raise ValueError("Parameter 'params' must be a list or np.ndarray with at least 2 parameter values")
|
1056
|
+
|
1057
|
+
if len(params) < 2:
|
1058
|
+
raise ValueError("Parameter 'params' must be a list or np.ndarray with at least 2 parameter values")
|
1059
|
+
|
1060
|
+
customer_origins_metadata = self.customer_origins.get_metadata()
|
1061
|
+
|
1062
|
+
param_gamma, param_lambda = params[0], params[1]
|
1063
|
+
|
1064
|
+
if customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
1065
|
+
|
1066
|
+
if len(params) < 3:
|
1067
|
+
raise ValueError("When using logistic weighting, parameter 'params' must be a list or np.ndarray with at least 3 parameter values")
|
1068
|
+
|
1069
|
+
param_gamma, param_lambda, param_lambda2 = params[0], params[1], params[2]
|
1070
|
+
|
1071
|
+
interaction_matrix_df = self.interaction_matrix_df
|
1072
|
+
|
1073
|
+
supply_locations = self.supply_locations
|
1074
|
+
supply_locations_metadata = supply_locations.get_metadata()
|
1075
|
+
|
1076
|
+
customer_origins = self.customer_origins
|
1077
|
+
customer_origins_metadata = customer_origins.get_metadata()
|
1078
|
+
|
1079
|
+
supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
|
1080
|
+
supply_locations.metadata = supply_locations_metadata
|
1081
|
+
|
1082
|
+
if customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
|
1083
|
+
|
1084
|
+
if len(params) >= 2:
|
1085
|
+
|
1086
|
+
customer_origins_metadata["weighting"][0]["param"] = float(param_lambda)
|
1087
|
+
|
1088
|
+
else:
|
1089
|
+
|
1090
|
+
raise ValueError ("Huff Model with transport cost weighting of type " + customer_origins_metadata["weighting"][0]["func"] + " must have >= 2 input parameters")
|
1091
|
+
|
1092
|
+
elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
1093
|
+
|
1094
|
+
if len(params) >= 3:
|
1095
|
+
|
1096
|
+
customer_origins_metadata["weighting"][0]["param"] = [float(param_lambda), float(param_lambda2)]
|
1097
|
+
|
1098
|
+
else:
|
1099
|
+
|
1100
|
+
raise ValueError("Huff Model with transport cost weightig of type " + customer_origins_metadata["weighting"][0]["func"] + " must have >= 3 input parameters")
|
1101
|
+
|
1102
|
+
if (customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"] and len(params) > 2):
|
1103
|
+
|
1104
|
+
for key, param in enumerate(params):
|
1105
|
+
|
1106
|
+
if key <= 1:
|
1107
|
+
continue
|
1108
|
+
|
1109
|
+
supply_locations_metadata["weighting"][key-1]["param"] = float(param)
|
1110
|
+
|
1111
|
+
if (customer_origins_metadata["weighting"][0]["func"] == "logistic" and len(params) > 3):
|
1112
|
+
|
1113
|
+
for key, param in enumerate(params):
|
1114
|
+
|
1115
|
+
if key <= 2:
|
1116
|
+
continue
|
1117
|
+
|
1118
|
+
supply_locations_metadata["weighting"][key-2]["param"] = float(param)
|
1119
|
+
|
1120
|
+
customer_origins.metadata = customer_origins_metadata
|
1121
|
+
|
1122
|
+
p_ij_emp = interaction_matrix_df["p_ij"]
|
1123
|
+
E_ij_emp = interaction_matrix_df["E_ij"]
|
1124
|
+
|
1125
|
+
interaction_matrix_copy = copy.deepcopy(self)
|
1126
|
+
|
1127
|
+
interaction_matrix_copy.utility()
|
1128
|
+
interaction_matrix_copy.probabilities()
|
1129
|
+
interaction_matrix_copy.flows()
|
1130
|
+
|
1131
|
+
interaction_matrix_df_copy = interaction_matrix_copy.get_interaction_matrix_df()
|
1132
|
+
|
1133
|
+
if fit_by == "flows":
|
1134
|
+
|
1135
|
+
E_ij = interaction_matrix_df_copy["E_ij"]
|
1136
|
+
|
1137
|
+
observed = E_ij_emp
|
1138
|
+
expected = E_ij
|
1139
|
+
|
1140
|
+
else:
|
1141
|
+
|
1142
|
+
p_ij = interaction_matrix_df_copy["p_ij"]
|
1143
|
+
|
1144
|
+
observed = p_ij_emp
|
1145
|
+
expected = p_ij
|
1146
|
+
|
1147
|
+
LL = loglik(
|
1148
|
+
observed = observed,
|
1149
|
+
expected = expected
|
1150
|
+
)
|
1151
|
+
|
1152
|
+
return -LL
|
1153
|
+
|
1154
|
+
def huff_ml_fit(
|
1155
|
+
self,
|
1156
|
+
initial_params: list = [1.0, -2.0],
|
1157
|
+
method: str = "L-BFGS-B",
|
1158
|
+
bounds: list = [(0.5, 1), (-3, -1)],
|
1159
|
+
constraints: list = [],
|
1160
|
+
fit_by = "probabilities",
|
1161
|
+
update_estimates: bool = True
|
1162
|
+
):
|
1163
|
+
|
1164
|
+
supply_locations = self.supply_locations
|
1165
|
+
supply_locations_metadata = supply_locations.get_metadata()
|
1166
|
+
|
1167
|
+
customer_origins = self.customer_origins
|
1168
|
+
customer_origins_metadata = customer_origins.get_metadata()
|
1169
|
+
|
1170
|
+
if customer_origins_metadata["weighting"][0]["param"] is None:
|
1171
|
+
params_metadata_customer_origins = 1
|
1172
|
+
else:
|
1173
|
+
if customer_origins_metadata["weighting"][0]["param"] is not None:
|
1174
|
+
params_metadata_customer_origins = len(customer_origins_metadata["weighting"][0]["param"])
|
1175
|
+
|
1176
|
+
if customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
1177
|
+
params_metadata_customer_origins = 2
|
1178
|
+
else:
|
1179
|
+
params_metadata_customer_origins = 1
|
1180
|
+
|
1181
|
+
params_metadata_supply_locations = len(supply_locations_metadata["weighting"])
|
1182
|
+
|
1183
|
+
params_metadata = params_metadata_customer_origins+params_metadata_supply_locations
|
1184
|
+
|
1185
|
+
if len(initial_params) < 2 or len(initial_params) != params_metadata:
|
1186
|
+
raise ValueError("Parameter 'initial_params' must be a list with " + str(params_metadata) + " entries (Attaction: " + str(params_metadata_supply_locations) + ", Transport costs: " + str(params_metadata_customer_origins) + ")")
|
1187
|
+
|
1188
|
+
if len(bounds) != len(initial_params):
|
1189
|
+
raise ValueError("Parameter 'bounds' must have the same length as parameter 'initial_params' (" + str(len(bounds)) + ", " + str(len(initial_params)) + ")")
|
1190
|
+
|
1191
|
+
ml_result = minimize(
|
1192
|
+
self.loglik,
|
1193
|
+
initial_params,
|
1194
|
+
args=fit_by,
|
1195
|
+
method = method,
|
1196
|
+
bounds = bounds,
|
1197
|
+
constraints = constraints,
|
1198
|
+
options={'disp': 3}
|
1199
|
+
)
|
1200
|
+
|
1201
|
+
attrac_vars = len(supply_locations_metadata["weighting"])
|
1202
|
+
|
1203
|
+
if ml_result.success:
|
1204
|
+
|
1205
|
+
fitted_params = ml_result.x
|
1206
|
+
|
1207
|
+
param_gamma = fitted_params[0]
|
1208
|
+
supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
|
1209
|
+
|
1210
|
+
if customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
|
1211
|
+
|
1212
|
+
param_lambda = fitted_params[1]
|
1213
|
+
param_results = [
|
1214
|
+
float(param_gamma),
|
1215
|
+
float(param_lambda)
|
1216
|
+
]
|
1217
|
+
|
1218
|
+
customer_origins_metadata["weighting"][0]["param"] = float(param_lambda)
|
1219
|
+
|
1220
|
+
elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
1221
|
+
|
1222
|
+
param_lambda = fitted_params[1]
|
1223
|
+
param_lambda2 = fitted_params[2]
|
1224
|
+
param_results = [
|
1225
|
+
float(param_gamma),
|
1226
|
+
float(param_lambda),
|
1227
|
+
float(param_lambda2)
|
1228
|
+
]
|
1229
|
+
|
1230
|
+
customer_origins_metadata["weighting"][0]["param"][0] = float(param_lambda)
|
1231
|
+
customer_origins_metadata["weighting"][0]["param"][1] = float(param_lambda2)
|
1232
|
+
|
1233
|
+
if attrac_vars > 1:
|
1234
|
+
|
1235
|
+
if customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
1236
|
+
fitted_params_add = 3
|
1237
|
+
else:
|
1238
|
+
fitted_params_add = 2
|
1239
|
+
|
1240
|
+
for key, var in supply_locations_metadata["weighting"].items():
|
1241
|
+
|
1242
|
+
if key > len(supply_locations_metadata["weighting"])-fitted_params_add:
|
1243
|
+
break
|
1244
|
+
|
1245
|
+
param = float(fitted_params[key+fitted_params_add])
|
1246
|
+
|
1247
|
+
param_results = param_results + [param]
|
1248
|
+
|
1249
|
+
supply_locations_metadata["weighting"][(key+1)]["param"] = float(param)
|
1250
|
+
|
1251
|
+
print(f"Optimization via {method} algorithm succeeded with parameters: {', '.join(str(round(par, 3)) for par in param_results)}.")
|
1252
|
+
|
1253
|
+
else:
|
1254
|
+
|
1255
|
+
param_gamma = None
|
1256
|
+
param_lambda = None
|
1257
|
+
|
1258
|
+
supply_locations_metadata["weighting"][0]["param"] = param_gamma
|
1259
|
+
|
1260
|
+
if customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
1261
|
+
|
1262
|
+
param_lambda2 = None
|
1263
|
+
customer_origins_metadata["weighting"][0]["param"][0] = param_lambda
|
1264
|
+
customer_origins_metadata["weighting"][0]["param"][1] = param_lambda2
|
1265
|
+
|
1266
|
+
else:
|
1267
|
+
|
1268
|
+
customer_origins_metadata["weighting"][0]["param"] = param_lambda
|
1269
|
+
|
1270
|
+
print(f"Optimiziation via {method} algorithm failed with error message: '{ml_result.message}'. See https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html for all available algorithms.")
|
1271
|
+
|
1272
|
+
self.supply_locations.metadata = supply_locations_metadata
|
1273
|
+
self.customer_origins.metadata = customer_origins_metadata
|
1274
|
+
|
1275
|
+
if ml_result.success and update_estimates:
|
1276
|
+
|
1277
|
+
self.metadata["fit"] = {
|
1278
|
+
"function": "huff_ml_fit",
|
1279
|
+
"fit_by": fit_by,
|
1280
|
+
"initial_params": initial_params,
|
1281
|
+
"method": method,
|
1282
|
+
"bounds": bounds,
|
1283
|
+
"constraints": constraints
|
1284
|
+
}
|
1285
|
+
|
1286
|
+
|
1287
|
+
self.interaction_matrix_df["p_ij_emp"] = self.interaction_matrix_df["p_ij"]
|
1288
|
+
self.interaction_matrix_df["E_ij_emp"] = self.interaction_matrix_df["E_ij"]
|
1289
|
+
|
1290
|
+
self = self.utility()
|
1291
|
+
self = self.probabilities()
|
1292
|
+
self = self.flows()
|
1293
|
+
|
1294
|
+
return self
|
1295
|
+
|
1296
|
+
def update(self):
|
1297
|
+
|
1298
|
+
interaction_matrix_df = self.get_interaction_matrix_df()
|
1299
|
+
|
1300
|
+
interaction_matrix_metadata = self.get_metadata()
|
1301
|
+
|
1302
|
+
customer_origins = self.get_customer_origins()
|
1303
|
+
|
1304
|
+
supply_locations = self.get_supply_locations()
|
1305
|
+
|
1306
|
+
supply_locations_geodata_gpd = supply_locations.get_geodata_gpd().copy()
|
1307
|
+
supply_locations_geodata_gpd_new = supply_locations_geodata_gpd[supply_locations_geodata_gpd["j_update"] == 1]
|
1308
|
+
|
1309
|
+
if len(supply_locations_geodata_gpd_new) < 1:
|
1310
|
+
raise ValueError("There are no new destinations for an interaction matrix update. Use SupplyLocations.add_new_destinations()")
|
1311
|
+
|
1312
|
+
supply_locations_geodata_gpd_original = supply_locations.get_geodata_gpd_original().copy()
|
1313
|
+
supply_locations_geodata_gpd_original_new = supply_locations_geodata_gpd_original[supply_locations_geodata_gpd_original["j_update"] == 1]
|
1314
|
+
if len(supply_locations_geodata_gpd_original_new) < 1:
|
1315
|
+
raise ValueError("There are no new destinations for an interaction matrix update. Use SupplyLocations.add_new_destinations()")
|
1316
|
+
|
1317
|
+
supply_locations_new = SupplyLocations(
|
1318
|
+
geodata_gpd=supply_locations_geodata_gpd_new,
|
1319
|
+
geodata_gpd_original=supply_locations_geodata_gpd_original_new,
|
1320
|
+
metadata=supply_locations.metadata,
|
1321
|
+
isochrones_gdf=supply_locations.isochrones_gdf,
|
1322
|
+
buffers_gdf=supply_locations.buffers_gdf
|
1323
|
+
)
|
1324
|
+
|
1325
|
+
interaction_matrix_new = create_interaction_matrix(
|
1326
|
+
customer_origins=customer_origins,
|
1327
|
+
supply_locations=supply_locations_new
|
1328
|
+
)
|
1329
|
+
|
1330
|
+
interaction_matrix_new_df = interaction_matrix_new.get_interaction_matrix_df()
|
1331
|
+
|
1332
|
+
if "transport_costs" not in interaction_matrix_metadata:
|
1333
|
+
|
1334
|
+
print("New destination(s) included. No transport costs calculation because not defined in original interaction matrix.")
|
1335
|
+
|
1336
|
+
interaction_matrix_df = pd.concat(
|
1337
|
+
[
|
1338
|
+
interaction_matrix_df,
|
1339
|
+
interaction_matrix_new_df
|
1340
|
+
],
|
1341
|
+
ignore_index=True
|
1342
|
+
)
|
1343
|
+
|
1344
|
+
interaction_matrix_df = interaction_matrix_df.sort_values(by = "ij")
|
1345
|
+
|
1346
|
+
self.interaction_matrix_df = interaction_matrix_df
|
1347
|
+
|
1348
|
+
else:
|
1349
|
+
|
1350
|
+
network = interaction_matrix_metadata["transport_costs"]["network"]
|
1351
|
+
range_type = interaction_matrix_metadata["transport_costs"]["range_type"]
|
1352
|
+
time_unit = interaction_matrix_metadata["transport_costs"]["time_unit"]
|
1353
|
+
distance_unit = interaction_matrix_metadata["transport_costs"]["distance_unit"]
|
1354
|
+
ors_server = interaction_matrix_metadata["transport_costs"]["ors_server"]
|
1355
|
+
ors_auth = interaction_matrix_metadata["transport_costs"]["ors_auth"]
|
1356
|
+
|
1357
|
+
interaction_matrix_new.transport_costs(
|
1358
|
+
network=network,
|
1359
|
+
range_type=range_type,
|
1360
|
+
time_unit=time_unit,
|
1361
|
+
distance_unit=distance_unit,
|
1362
|
+
ors_server=ors_server,
|
1363
|
+
ors_auth=ors_auth
|
1364
|
+
)
|
1365
|
+
|
1366
|
+
interaction_matrix_df = pd.concat(
|
1367
|
+
[
|
1368
|
+
interaction_matrix_df,
|
1369
|
+
interaction_matrix_new_df
|
1370
|
+
],
|
1371
|
+
ignore_index=True
|
1372
|
+
)
|
1373
|
+
|
1374
|
+
interaction_matrix_df = interaction_matrix_df.sort_values(by = "ij")
|
1375
|
+
|
1376
|
+
self.interaction_matrix_df = interaction_matrix_df
|
1377
|
+
|
1378
|
+
self.utility()
|
1379
|
+
self.probabilities()
|
1380
|
+
self.flows()
|
1381
|
+
|
1382
|
+
return self
|
1383
|
+
|
1384
|
+
class MarketAreas:
|
1385
|
+
|
1386
|
+
def __init__(
|
1387
|
+
self,
|
1388
|
+
market_areas_df,
|
1389
|
+
metadata
|
1390
|
+
):
|
844
1391
|
|
1392
|
+
self.market_areas_df = market_areas_df
|
1393
|
+
self.metadata = metadata
|
1394
|
+
|
1395
|
+
def get_market_areas_df(self):
|
1396
|
+
return self.market_areas_df
|
1397
|
+
|
1398
|
+
def get_metadata(self):
|
1399
|
+
return self.metadata
|
1400
|
+
|
1401
|
+
def add_to_model(
|
1402
|
+
self,
|
1403
|
+
model_object
|
1404
|
+
):
|
1405
|
+
|
1406
|
+
if not isinstance(model_object, (HuffModel, MCIModel)):
|
1407
|
+
raise ValueError("Parameter 'interaction_matrix' must be of class HuffModel or MCIModel")
|
1408
|
+
|
1409
|
+
if isinstance(model_object, MCIModel):
|
1410
|
+
|
1411
|
+
model = MCIModel(
|
1412
|
+
interaction_matrix = model_object.interaction_matrix,
|
1413
|
+
coefs = model_object.get_coefs_dict(),
|
1414
|
+
mci_ols_model = model_object.get_mci_ols_model(),
|
1415
|
+
market_areas_df = self.market_areas_df
|
1416
|
+
)
|
1417
|
+
|
1418
|
+
if isinstance(model_object, HuffModel):
|
1419
|
+
|
1420
|
+
model = HuffModel(
|
1421
|
+
interaction_matrix = model_object.interaction_matrix,
|
1422
|
+
market_areas_df = self.market_areas_df
|
1423
|
+
)
|
1424
|
+
|
1425
|
+
return model
|
1426
|
+
|
845
1427
|
class HuffModel:
|
846
1428
|
|
847
1429
|
def __init__(
|
@@ -886,34 +1468,89 @@ class HuffModel:
|
|
886
1468
|
|
887
1469
|
print("Huff Model")
|
888
1470
|
print("----------------------------------")
|
889
|
-
print("Supply locations
|
1471
|
+
print("Supply locations " + str(supply_locations_metadata["no_points"]))
|
890
1472
|
if supply_locations_metadata["attraction_col"][0] is None:
|
891
|
-
print("Attraction column
|
1473
|
+
print("Attraction column not defined")
|
892
1474
|
else:
|
893
|
-
print("Attraction column
|
894
|
-
print("Customer origins
|
1475
|
+
print("Attraction column " + supply_locations_metadata["attraction_col"][0])
|
1476
|
+
print("Customer origins " + str(customer_origins_metadata["no_points"]))
|
895
1477
|
if customer_origins_metadata["marketsize_col"] is None:
|
896
|
-
print("Market size column
|
1478
|
+
print("Market size column not defined")
|
897
1479
|
else:
|
898
|
-
print("Market size column
|
1480
|
+
print("Market size column " + customer_origins_metadata["marketsize_col"])
|
899
1481
|
print("----------------------------------")
|
900
1482
|
|
901
1483
|
print("Partial utilities")
|
902
|
-
print("
|
1484
|
+
print(" Weights")
|
903
1485
|
|
904
1486
|
if supply_locations_metadata["weighting"][0]["func"] is None and supply_locations_metadata["weighting"][0]["param"] is None:
|
905
|
-
print("Attraction
|
1487
|
+
print("Attraction not defined")
|
906
1488
|
else:
|
907
|
-
print("Attraction
|
1489
|
+
print("Attraction " + str(round(supply_locations_metadata["weighting"][0]["param"], 3)) + " (" + supply_locations_metadata["weighting"][0]["func"] + ")")
|
908
1490
|
|
909
1491
|
if customer_origins_metadata["weighting"][0]["func"] is None and customer_origins_metadata["weighting"][0]["param"] is None:
|
910
|
-
print("Transport costs
|
1492
|
+
print("Transport costs not defined")
|
911
1493
|
elif customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
|
912
|
-
print("Transport costs
|
1494
|
+
print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
913
1495
|
elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
914
|
-
print("Transport costs
|
1496
|
+
print("Transport costs " + str(round(customer_origins_metadata["weighting"][0]["param"][0],3)) + ", " + str(round(customer_origins_metadata["weighting"][0]["param"][1],3)) + " (" + customer_origins_metadata["weighting"][0]["func"] + ")")
|
1497
|
+
|
1498
|
+
attrac_vars = supply_locations_metadata["attraction_col"]
|
1499
|
+
attrac_vars_no = len(attrac_vars)
|
1500
|
+
|
1501
|
+
if attrac_vars_no > 1:
|
1502
|
+
|
1503
|
+
for key, attrac_var in enumerate(attrac_vars):
|
1504
|
+
|
1505
|
+
if key == 0:
|
1506
|
+
continue
|
1507
|
+
|
1508
|
+
if key not in supply_locations_metadata["weighting"].keys():
|
1509
|
+
|
1510
|
+
print(f"{attrac_vars[key][:16]:16} not defined")
|
1511
|
+
|
1512
|
+
else:
|
915
1513
|
|
1514
|
+
name = supply_locations_metadata["weighting"][key]["name"]
|
1515
|
+
param = supply_locations_metadata["weighting"][key]["param"]
|
1516
|
+
func = supply_locations_metadata["weighting"][key]["func"]
|
1517
|
+
|
1518
|
+
print(f"{name[:16]:16} {round(param, 3)} ({func})")
|
1519
|
+
|
916
1520
|
print("----------------------------------")
|
1521
|
+
|
1522
|
+
huff_modelfit = self.modelfit()
|
1523
|
+
if huff_modelfit is not None:
|
1524
|
+
|
1525
|
+
print ("Goodness-of-fit for probabilities")
|
1526
|
+
|
1527
|
+
print("Sum of squared residuals ", round(huff_modelfit[1]["SQR"], 2))
|
1528
|
+
print("Sum of squares ", round(huff_modelfit[1]["SQT"], 2))
|
1529
|
+
print("R-squared ", round(huff_modelfit[1]["Rsq"], 2))
|
1530
|
+
print("Mean squared error ", round(huff_modelfit[1]["MSE"], 2))
|
1531
|
+
print("Root mean squared error ", round(huff_modelfit[1]["RMSE"], 2))
|
1532
|
+
print("Mean absolute error ", round(huff_modelfit[1]["MAE"], 2))
|
1533
|
+
if huff_modelfit[1]["MAPE"] is not None:
|
1534
|
+
print("Mean absolute percentage error ", round(huff_modelfit[1]["MAPE"], 2))
|
1535
|
+
else:
|
1536
|
+
print("Mean absolute percentage error Not calculated")
|
1537
|
+
print("Symmetric MAPE ", round(huff_modelfit[1]["sMAPE"], 2))
|
1538
|
+
print("Absolute percentage errors")
|
1539
|
+
|
1540
|
+
APE_list = [
|
1541
|
+
["< 5 % ", round(huff_modelfit[1]["APE"]["resid_below5"], 2), " < 30 % ", round(huff_modelfit[1]["APE"]["resid_below30"], 2)],
|
1542
|
+
["< 10 % ", round(huff_modelfit[1]["APE"]["resid_below10"], 2), " < 35 % ", round(huff_modelfit[1]["APE"]["resid_below35"], 2)],
|
1543
|
+
["< 15 % ", round(huff_modelfit[1]["APE"]["resid_below15"], 2), " < 40 % ", round(huff_modelfit[1]["APE"]["resid_below40"], 2)],
|
1544
|
+
["< 20 % ", round(huff_modelfit[1]["APE"]["resid_below20"], 2), " < 45 % ", round(huff_modelfit[1]["APE"]["resid_below45"], 2)],
|
1545
|
+
["< 25% ", round(huff_modelfit[1]["APE"]["resid_below25"], 2), " < 50 % ", round(huff_modelfit[1]["APE"]["resid_below50"], 2)]
|
1546
|
+
]
|
1547
|
+
APE_df = pd.DataFrame(
|
1548
|
+
APE_list,
|
1549
|
+
columns=["Resid.", "%", "Resid.", "%"]
|
1550
|
+
)
|
1551
|
+
print(APE_df.to_string(index=False))
|
1552
|
+
|
1553
|
+
print("----------------------------------")
|
917
1554
|
|
918
1555
|
def mci_fit(
|
919
1556
|
self,
|
@@ -923,7 +1560,8 @@ class HuffModel:
|
|
923
1560
|
|
924
1561
|
interaction_matrix = self.interaction_matrix
|
925
1562
|
interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
|
926
|
-
|
1563
|
+
interaction_matrix_metadata = interaction_matrix.get_metadata()
|
1564
|
+
|
927
1565
|
supply_locations = interaction_matrix.get_supply_locations()
|
928
1566
|
supply_locations_metadata = supply_locations.get_metadata()
|
929
1567
|
|
@@ -979,10 +1617,20 @@ class HuffModel:
|
|
979
1617
|
|
980
1618
|
customer_origins.metadata = customer_origins_metadata
|
981
1619
|
supply_locations.metadata = supply_locations_metadata
|
1620
|
+
|
1621
|
+
interaction_matrix_metadata = {
|
1622
|
+
"fit": {
|
1623
|
+
"function": "mci_fit",
|
1624
|
+
"fit_by": "probabilities",
|
1625
|
+
"method": "OLS"
|
1626
|
+
}
|
1627
|
+
}
|
1628
|
+
|
982
1629
|
interaction_matrix = InteractionMatrix(
|
983
1630
|
interaction_matrix_df,
|
984
1631
|
customer_origins,
|
985
|
-
supply_locations
|
1632
|
+
supply_locations,
|
1633
|
+
metadata=interaction_matrix_metadata
|
986
1634
|
)
|
987
1635
|
|
988
1636
|
mci_model = MCIModel(
|
@@ -994,132 +1642,42 @@ class HuffModel:
|
|
994
1642
|
|
995
1643
|
return mci_model
|
996
1644
|
|
997
|
-
def
|
998
|
-
self,
|
999
|
-
params
|
1000
|
-
):
|
1001
|
-
|
1002
|
-
if not isinstance(params, list):
|
1003
|
-
if isinstance(params, np.ndarray):
|
1004
|
-
params = params.tolist()
|
1005
|
-
else:
|
1006
|
-
raise ValueError("Parameter 'params' must be a list or np.ndarray with two or three parameter values")
|
1007
|
-
|
1008
|
-
if len(params) == 2:
|
1009
|
-
param_gamma, param_lambda = params
|
1010
|
-
elif len(params) == 3:
|
1011
|
-
param_gamma, param_lambda, param_lambda2 = params
|
1012
|
-
else:
|
1013
|
-
raise ValueError("Parameter 'params' must be a list with two or three parameter values")
|
1014
|
-
|
1015
|
-
interaction_matrix = self.interaction_matrix
|
1016
|
-
interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
|
1017
|
-
|
1018
|
-
supply_locations = interaction_matrix.get_supply_locations()
|
1019
|
-
supply_locations_metadata = supply_locations.get_metadata()
|
1020
|
-
|
1021
|
-
customer_origins = interaction_matrix.get_customer_origins()
|
1022
|
-
customer_origins_metadata = customer_origins.get_metadata()
|
1023
|
-
|
1024
|
-
supply_locations_metadata["weighting"][0]["param"] = float(param_gamma)
|
1025
|
-
supply_locations.metadata = supply_locations_metadata
|
1026
|
-
|
1027
|
-
if customer_origins_metadata["weighting"][0]["func"] in ["power", "exponential"]:
|
1028
|
-
|
1029
|
-
if len(params) == 2:
|
1030
|
-
customer_origins_metadata["weighting"][0]["param"] = float(param_lambda)
|
1031
|
-
else:
|
1032
|
-
raise ValueError ("Huff Model with transport cost weightig of type " + customer_origins_metadata["weighting"]["func"] + " must have two input parameters")
|
1033
|
-
|
1034
|
-
elif customer_origins_metadata["weighting"][0]["func"] == "logistic":
|
1035
|
-
|
1036
|
-
if len(params) == 3:
|
1037
|
-
customer_origins_metadata["weighting"][0]["param"] = [float(param_lambda), float(param_lambda2)]
|
1038
|
-
else:
|
1039
|
-
raise ValueError("Huff Model with transport cost weightig of type " + customer_origins_metadata["weighting"]["func"] + " must have three input parameters")
|
1040
|
-
|
1041
|
-
customer_origins.metadata = customer_origins_metadata
|
1042
|
-
|
1043
|
-
interaction_matrix = self.interaction_matrix
|
1044
|
-
interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
|
1045
|
-
|
1046
|
-
p_ij_emp = interaction_matrix_df["p_ij"]
|
1645
|
+
def update(self):
|
1047
1646
|
|
1048
|
-
|
1049
|
-
|
1050
|
-
interaction_matrix_copy.utility()
|
1051
|
-
interaction_matrix_copy.probabilities()
|
1052
|
-
|
1053
|
-
interaction_matrix_df_copy = interaction_matrix_copy.get_interaction_matrix_df()
|
1054
|
-
p_ij = interaction_matrix_df_copy["p_ij"]
|
1647
|
+
self.interaction_matrix = self.interaction_matrix.update()
|
1055
1648
|
|
1056
|
-
|
1057
|
-
observed = p_ij_emp,
|
1058
|
-
expected = p_ij
|
1059
|
-
)
|
1649
|
+
self.market_areas_df = self.interaction_matrix.marketareas().get_market_areas_df()
|
1060
1650
|
|
1061
|
-
return
|
1651
|
+
return self
|
1062
1652
|
|
1063
|
-
def
|
1653
|
+
def modelfit(
|
1064
1654
|
self,
|
1065
|
-
|
1066
|
-
bounds = [(0.5, 1), (-3, -1)],
|
1067
|
-
method = "L-BFGS-B"
|
1655
|
+
by = "p_ij"
|
1068
1656
|
):
|
1069
1657
|
|
1070
|
-
|
1071
|
-
|
1072
|
-
|
1073
|
-
if len(bounds) != len(initial_params):
|
1074
|
-
raise ValueError("Parameter 'bounds' must have the same length as parameter 'initial_params' (" + str(len(bounds)) + ", " + str(len(initial_params)) + ")")
|
1658
|
+
interaction_matrix = self.interaction_matrix
|
1659
|
+
interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
|
1075
1660
|
|
1076
|
-
|
1077
|
-
self.huff_loglik,
|
1078
|
-
initial_params,
|
1079
|
-
method = method,
|
1080
|
-
bounds = bounds,
|
1081
|
-
options={'disp': 3}
|
1082
|
-
)
|
1083
|
-
|
1084
|
-
if ml_result.success:
|
1085
|
-
|
1086
|
-
fitted_params = ml_result.x
|
1661
|
+
if (by in interaction_matrix_df.columns and by+"_emp" in interaction_matrix_df.columns):
|
1087
1662
|
|
1088
|
-
|
1663
|
+
try:
|
1089
1664
|
|
1090
|
-
|
1091
|
-
|
1092
|
-
|
1093
|
-
|
1094
|
-
float(param_lambda)
|
1095
|
-
]
|
1665
|
+
huff_modelfit = modelfit(
|
1666
|
+
interaction_matrix_df[by+"_emp"],
|
1667
|
+
interaction_matrix_df[by]
|
1668
|
+
)
|
1096
1669
|
|
1097
|
-
|
1098
|
-
|
1099
|
-
|
1100
|
-
param_lambda = fitted_params[1]
|
1101
|
-
param_lambda2 = fitted_params[2]
|
1102
|
-
param_results = [
|
1103
|
-
float(param_gamma),
|
1104
|
-
float(param_lambda),
|
1105
|
-
float(param_lambda2)
|
1106
|
-
]
|
1670
|
+
return huff_modelfit
|
1671
|
+
|
1672
|
+
except:
|
1107
1673
|
|
1674
|
+
print("Goodness-of-fit metrics could not be calculated due to NaN values.")
|
1675
|
+
return None
|
1676
|
+
|
1108
1677
|
else:
|
1109
|
-
|
1110
|
-
param_gamma = None
|
1111
|
-
param_lambda = None
|
1112
|
-
param_results = [param_gamma, param_lambda]
|
1113
1678
|
|
1114
|
-
|
1115
|
-
|
1116
|
-
param_results.append(param_lambda2)
|
1117
|
-
|
1118
|
-
print(f"Optimiziation via {method} algorithm failed with error message: '{ml_result.message}'. See https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html for all available algorithms.")
|
1119
|
-
|
1120
|
-
return param_results
|
1121
|
-
|
1122
|
-
|
1679
|
+
return None
|
1680
|
+
|
1123
1681
|
class MCIModel:
|
1124
1682
|
|
1125
1683
|
def __init__(
|
@@ -1175,12 +1733,19 @@ class MCIModel:
|
|
1175
1733
|
|
1176
1734
|
if ("p_ij" in interaction_matrix_df.columns and "p_ij_emp" in interaction_matrix_df.columns):
|
1177
1735
|
|
1178
|
-
|
1179
|
-
interaction_matrix_df["p_ij_emp"],
|
1180
|
-
interaction_matrix_df["p_ij"]
|
1181
|
-
)
|
1736
|
+
try:
|
1182
1737
|
|
1183
|
-
|
1738
|
+
mci_modelfit = modelfit(
|
1739
|
+
interaction_matrix_df["p_ij_emp"],
|
1740
|
+
interaction_matrix_df["p_ij"]
|
1741
|
+
)
|
1742
|
+
|
1743
|
+
return mci_modelfit
|
1744
|
+
|
1745
|
+
except:
|
1746
|
+
|
1747
|
+
print("Goodness-of-fit metrics could not be calculated due to NaN values.")
|
1748
|
+
return None
|
1184
1749
|
|
1185
1750
|
else:
|
1186
1751
|
|
@@ -1226,7 +1791,7 @@ class MCIModel:
|
|
1226
1791
|
mci_modelfit = self.modelfit()
|
1227
1792
|
if mci_modelfit is not None:
|
1228
1793
|
|
1229
|
-
print ("Goodness-of-fit
|
1794
|
+
print ("Goodness-of-fit for probabilities")
|
1230
1795
|
|
1231
1796
|
print("Sum of squared residuals ", round(mci_modelfit[1]["SQR"], 2))
|
1232
1797
|
print("Sum of squares ", round(mci_modelfit[1]["SQT"], 2))
|
@@ -1234,13 +1799,25 @@ class MCIModel:
|
|
1234
1799
|
print("Mean squared error ", round(mci_modelfit[1]["MSE"], 2))
|
1235
1800
|
print("Root mean squared error ", round(mci_modelfit[1]["RMSE"], 2))
|
1236
1801
|
print("Mean absolute error ", round(mci_modelfit[1]["MAE"], 2))
|
1237
|
-
|
1802
|
+
if mci_modelfit[1]["MAPE"] is not None:
|
1803
|
+
print("Mean absolute percentage error ", round(mci_modelfit[1]["MAPE"], 2))
|
1804
|
+
else:
|
1805
|
+
print("Mean absolute percentage error Not calculated")
|
1806
|
+
print("Symmetric MAPE ", round(mci_modelfit[1]["sMAPE"], 2))
|
1807
|
+
|
1238
1808
|
print("Absolute percentage errors")
|
1239
|
-
|
1240
|
-
|
1241
|
-
|
1242
|
-
|
1243
|
-
|
1809
|
+
APE_list = [
|
1810
|
+
["< 5 % ", round(mci_modelfit[1]["APE"]["resid_below5"], 2), " < 30 % ", round(mci_modelfit[1]["APE"]["resid_below30"], 2)],
|
1811
|
+
["< 10 % ", round(mci_modelfit[1]["APE"]["resid_below10"], 2), " < 35 % ", round(mci_modelfit[1]["APE"]["resid_below35"], 2)],
|
1812
|
+
["< 15 % ", round(mci_modelfit[1]["APE"]["resid_below15"], 2), " < 40 % ", round(mci_modelfit[1]["APE"]["resid_below40"], 2)],
|
1813
|
+
["< 20 % ", round(mci_modelfit[1]["APE"]["resid_below20"], 2), " < 45 % ", round(mci_modelfit[1]["APE"]["resid_below45"], 2)],
|
1814
|
+
["< 25% ", round(mci_modelfit[1]["APE"]["resid_below25"], 2), " < 50 % ", round(mci_modelfit[1]["APE"]["resid_below50"], 2)]
|
1815
|
+
]
|
1816
|
+
APE_df = pd.DataFrame(
|
1817
|
+
APE_list,
|
1818
|
+
columns=["Resid.", "%", "Resid.", "%"]
|
1819
|
+
)
|
1820
|
+
print(APE_df.to_string(index=False))
|
1244
1821
|
|
1245
1822
|
print("--------------------------------------------")
|
1246
1823
|
|
@@ -1251,6 +1828,7 @@ class MCIModel:
|
|
1251
1828
|
|
1252
1829
|
interaction_matrix = self.interaction_matrix
|
1253
1830
|
interaction_matrix_df = interaction_matrix.get_interaction_matrix_df()
|
1831
|
+
interaction_matrix_metadata = interaction_matrix.get_metadata()
|
1254
1832
|
|
1255
1833
|
if interaction_matrix_df["t_ij"].isna().all():
|
1256
1834
|
raise ValueError ("Transport cost variable is not defined")
|
@@ -1289,10 +1867,16 @@ class MCIModel:
|
|
1289
1867
|
if transformation == "ILCT":
|
1290
1868
|
interaction_matrix_df["U_ij"] = np.exp(interaction_matrix_df["U_ij"])
|
1291
1869
|
|
1870
|
+
interaction_matrix_metadata["model"] = {
|
1871
|
+
"model_type": "MCI",
|
1872
|
+
"transformation": transformation
|
1873
|
+
}
|
1874
|
+
|
1292
1875
|
interaction_matrix = InteractionMatrix(
|
1293
1876
|
interaction_matrix_df,
|
1294
1877
|
customer_origins,
|
1295
|
-
supply_locations
|
1878
|
+
supply_locations,
|
1879
|
+
metadata=interaction_matrix_metadata
|
1296
1880
|
)
|
1297
1881
|
self.interaction_matrix = interaction_matrix
|
1298
1882
|
|
@@ -1459,7 +2043,7 @@ def load_geodata (
|
|
1459
2043
|
|
1460
2044
|
geodata_gpd = geodata_gpd_original.to_crs(crs_output)
|
1461
2045
|
geodata_gpd = geodata_gpd[[unique_id, "geometry"]]
|
1462
|
-
|
2046
|
+
|
1463
2047
|
metadata = {
|
1464
2048
|
"location_type": location_type,
|
1465
2049
|
"unique_id": unique_id,
|
@@ -1467,6 +2051,7 @@ def load_geodata (
|
|
1467
2051
|
"marketsize_col": None,
|
1468
2052
|
"weighting": {
|
1469
2053
|
0: {
|
2054
|
+
"name": None,
|
1470
2055
|
"func": None,
|
1471
2056
|
"param": None
|
1472
2057
|
}
|
@@ -1474,17 +2059,23 @@ def load_geodata (
|
|
1474
2059
|
"crs_input": crs_input,
|
1475
2060
|
"crs_output": crs_output,
|
1476
2061
|
"no_points": len(geodata_gpd)
|
1477
|
-
}
|
2062
|
+
}
|
1478
2063
|
|
1479
2064
|
if location_type == "origins":
|
2065
|
+
|
1480
2066
|
geodata_object = CustomerOrigins(
|
1481
2067
|
geodata_gpd,
|
1482
2068
|
geodata_gpd_original,
|
1483
2069
|
metadata,
|
1484
2070
|
None,
|
1485
2071
|
None
|
1486
|
-
)
|
2072
|
+
)
|
2073
|
+
|
1487
2074
|
elif location_type == "destinations":
|
2075
|
+
|
2076
|
+
geodata_gpd["j_update"] = 0
|
2077
|
+
geodata_gpd_original["j_update"] = 0
|
2078
|
+
|
1488
2079
|
geodata_object = SupplyLocations(
|
1489
2080
|
geodata_gpd,
|
1490
2081
|
geodata_gpd_original,
|
@@ -1563,10 +2154,13 @@ def create_interaction_matrix(
|
|
1563
2154
|
interaction_matrix_df["p_ij"] = None
|
1564
2155
|
interaction_matrix_df["E_ij"] = None
|
1565
2156
|
|
2157
|
+
metadata = {}
|
2158
|
+
|
1566
2159
|
interaction_matrix = InteractionMatrix(
|
1567
2160
|
interaction_matrix_df,
|
1568
2161
|
customer_origins,
|
1569
|
-
supply_locations
|
2162
|
+
supply_locations,
|
2163
|
+
metadata
|
1570
2164
|
)
|
1571
2165
|
|
1572
2166
|
return interaction_matrix
|
@@ -1577,6 +2171,7 @@ def load_interaction_matrix(
|
|
1577
2171
|
supply_locations_col: str,
|
1578
2172
|
attraction_col: list,
|
1579
2173
|
transport_costs_col: str,
|
2174
|
+
flows_col: str = None,
|
1580
2175
|
probabilities_col: str = None,
|
1581
2176
|
market_size_col: str = None,
|
1582
2177
|
customer_origins_coords_col = None,
|
@@ -1585,8 +2180,10 @@ def load_interaction_matrix(
|
|
1585
2180
|
csv_sep = ";",
|
1586
2181
|
csv_decimal = ",",
|
1587
2182
|
csv_encoding="unicode_escape",
|
2183
|
+
xlsx_sheet: str = None,
|
1588
2184
|
crs_input = "EPSG:4326",
|
1589
|
-
crs_output = "EPSG:4326"
|
2185
|
+
crs_output = "EPSG:4326",
|
2186
|
+
check_df_vars = True
|
1590
2187
|
):
|
1591
2188
|
|
1592
2189
|
if isinstance(data, pd.DataFrame):
|
@@ -1602,7 +2199,13 @@ def load_interaction_matrix(
|
|
1602
2199
|
encoding = csv_encoding
|
1603
2200
|
)
|
1604
2201
|
elif data_type == "xlsx":
|
1605
|
-
|
2202
|
+
if xlsx_sheet is not None:
|
2203
|
+
interaction_matrix_df = pd.read_excel(
|
2204
|
+
data,
|
2205
|
+
sheet_name=xlsx_sheet
|
2206
|
+
)
|
2207
|
+
else:
|
2208
|
+
interaction_matrix_df = pd.read_excel(data)
|
1606
2209
|
else:
|
1607
2210
|
raise TypeError("Unknown type of data")
|
1608
2211
|
else:
|
@@ -1614,15 +2217,18 @@ def load_interaction_matrix(
|
|
1614
2217
|
raise KeyError ("Column " + supply_locations_col + " not in data")
|
1615
2218
|
|
1616
2219
|
cols_check = attraction_col + [transport_costs_col]
|
2220
|
+
if flows_col is not None:
|
2221
|
+
cols_check = cols_check + [flows_col]
|
1617
2222
|
if probabilities_col is not None:
|
1618
2223
|
cols_check = cols_check + [probabilities_col]
|
1619
2224
|
if market_size_col is not None:
|
1620
2225
|
cols_check = cols_check + [market_size_col]
|
1621
2226
|
|
1622
|
-
|
1623
|
-
|
1624
|
-
|
1625
|
-
|
2227
|
+
if check_df_vars:
|
2228
|
+
check_vars(
|
2229
|
+
interaction_matrix_df,
|
2230
|
+
cols = cols_check
|
2231
|
+
)
|
1626
2232
|
|
1627
2233
|
if customer_origins_coords_col is not None:
|
1628
2234
|
|
@@ -1677,6 +2283,7 @@ def load_interaction_matrix(
|
|
1677
2283
|
"marketsize_col": market_size_col,
|
1678
2284
|
"weighting": {
|
1679
2285
|
0: {
|
2286
|
+
"name": None,
|
1680
2287
|
"func": None,
|
1681
2288
|
"param": None
|
1682
2289
|
}
|
@@ -1744,6 +2351,7 @@ def load_interaction_matrix(
|
|
1744
2351
|
"marketsize_col": None,
|
1745
2352
|
"weighting": {
|
1746
2353
|
0: {
|
2354
|
+
"name": None,
|
1747
2355
|
"func": None,
|
1748
2356
|
"param": None
|
1749
2357
|
}
|
@@ -1770,6 +2378,13 @@ def load_interaction_matrix(
|
|
1770
2378
|
}
|
1771
2379
|
)
|
1772
2380
|
|
2381
|
+
if flows_col is not None:
|
2382
|
+
interaction_matrix_df = interaction_matrix_df.rename(
|
2383
|
+
columns = {
|
2384
|
+
flows_col: "E_ij"
|
2385
|
+
}
|
2386
|
+
)
|
2387
|
+
|
1773
2388
|
if probabilities_col is not None:
|
1774
2389
|
interaction_matrix_df = interaction_matrix_df.rename(
|
1775
2390
|
columns = {
|
@@ -1783,15 +2398,141 @@ def load_interaction_matrix(
|
|
1783
2398
|
market_size_col: "C_i"
|
1784
2399
|
}
|
1785
2400
|
)
|
1786
|
-
|
2401
|
+
|
2402
|
+
metadata = {
|
2403
|
+
"fit": {
|
2404
|
+
"function": None,
|
2405
|
+
"fit_by": None
|
2406
|
+
}
|
2407
|
+
}
|
2408
|
+
|
1787
2409
|
interaction_matrix = InteractionMatrix(
|
1788
2410
|
interaction_matrix_df=interaction_matrix_df,
|
1789
2411
|
customer_origins=customer_origins,
|
1790
|
-
supply_locations=supply_locations
|
2412
|
+
supply_locations=supply_locations,
|
2413
|
+
metadata=metadata
|
1791
2414
|
)
|
1792
2415
|
|
1793
2416
|
return interaction_matrix
|
1794
2417
|
|
2418
|
+
def load_marketareas(
|
2419
|
+
data,
|
2420
|
+
supply_locations_col: str,
|
2421
|
+
total_col: str,
|
2422
|
+
data_type = "csv",
|
2423
|
+
csv_sep = ";",
|
2424
|
+
csv_decimal = ",",
|
2425
|
+
csv_encoding="unicode_escape",
|
2426
|
+
xlsx_sheet: str = None,
|
2427
|
+
check_df_vars = True
|
2428
|
+
):
|
2429
|
+
|
2430
|
+
if isinstance(data, pd.DataFrame):
|
2431
|
+
market_areas_df = data
|
2432
|
+
elif isinstance(data, str):
|
2433
|
+
if data_type not in ["csv", "xlsx"]:
|
2434
|
+
raise ValueError ("data_type must be 'csv' or 'xlsx'")
|
2435
|
+
if data_type == "csv":
|
2436
|
+
market_areas_df = pd.read_csv(
|
2437
|
+
data,
|
2438
|
+
sep = csv_sep,
|
2439
|
+
decimal = csv_decimal,
|
2440
|
+
encoding = csv_encoding
|
2441
|
+
)
|
2442
|
+
elif data_type == "xlsx":
|
2443
|
+
if xlsx_sheet is not None:
|
2444
|
+
market_areas_df = pd.read_excel(
|
2445
|
+
data,
|
2446
|
+
sheet_name=xlsx_sheet
|
2447
|
+
)
|
2448
|
+
else:
|
2449
|
+
market_areas_df = pd.read_excel(data)
|
2450
|
+
else:
|
2451
|
+
raise TypeError("Unknown type of data")
|
2452
|
+
else:
|
2453
|
+
raise TypeError("data must be pandas.DataFrame or file (.csv, .xlsx)")
|
2454
|
+
|
2455
|
+
if supply_locations_col not in market_areas_df.columns:
|
2456
|
+
raise KeyError ("Column " + supply_locations_col + " not in data")
|
2457
|
+
if total_col not in market_areas_df.columns:
|
2458
|
+
raise KeyError ("Column " + supply_locations_col + " not in data")
|
2459
|
+
|
2460
|
+
if check_df_vars:
|
2461
|
+
check_vars(
|
2462
|
+
market_areas_df,
|
2463
|
+
cols = [total_col]
|
2464
|
+
)
|
2465
|
+
|
2466
|
+
market_areas_df = market_areas_df.rename(
|
2467
|
+
columns = {
|
2468
|
+
supply_locations_col: "j",
|
2469
|
+
total_col: "T_j"
|
2470
|
+
}
|
2471
|
+
)
|
2472
|
+
|
2473
|
+
metadata = {
|
2474
|
+
"unique_id": supply_locations_col,
|
2475
|
+
"total_col": total_col,
|
2476
|
+
"no_points": len(market_areas_df)
|
2477
|
+
}
|
2478
|
+
|
2479
|
+
market_areas = MarketAreas(
|
2480
|
+
market_areas_df,
|
2481
|
+
metadata
|
2482
|
+
)
|
2483
|
+
|
2484
|
+
return market_areas
|
2485
|
+
|
2486
|
+
def market_shares(
|
2487
|
+
df: pd.DataFrame,
|
2488
|
+
turnover_col: str,
|
2489
|
+
ref_col: str = None,
|
2490
|
+
marketshares_col: str = "p_ij"
|
2491
|
+
):
|
2492
|
+
|
2493
|
+
check_vars(
|
2494
|
+
df = df,
|
2495
|
+
cols = [turnover_col]
|
2496
|
+
)
|
2497
|
+
|
2498
|
+
if ref_col is not None:
|
2499
|
+
|
2500
|
+
if ref_col not in df.columns:
|
2501
|
+
raise KeyError(f"Column '{ref_col}' not in dataframe.")
|
2502
|
+
|
2503
|
+
ms_refcol = pd.DataFrame(df.groupby(ref_col)[turnover_col].sum())
|
2504
|
+
ms_refcol = ms_refcol.rename(columns = {turnover_col: "total"})
|
2505
|
+
ms_refcol = ms_refcol.reset_index()
|
2506
|
+
|
2507
|
+
df = df.merge(
|
2508
|
+
ms_refcol,
|
2509
|
+
how = "left",
|
2510
|
+
left_on = ref_col,
|
2511
|
+
right_on= ref_col
|
2512
|
+
)
|
2513
|
+
|
2514
|
+
else:
|
2515
|
+
|
2516
|
+
ms_norefcol = pd.DataFrame([df[turnover_col].sum()], columns=["total"])
|
2517
|
+
ms_norefcol = ms_norefcol.reset_index()
|
2518
|
+
|
2519
|
+
df["key_temp"] = 1
|
2520
|
+
ms_norefcol["key_temp"] = 1
|
2521
|
+
df = pd.merge(
|
2522
|
+
df,
|
2523
|
+
ms_norefcol,
|
2524
|
+
on="key_temp"
|
2525
|
+
).drop(
|
2526
|
+
"key_temp",
|
2527
|
+
axis=1
|
2528
|
+
)
|
2529
|
+
|
2530
|
+
df[marketshares_col] = df[turnover_col]/df["total"]
|
2531
|
+
|
2532
|
+
df = df.drop(columns="total")
|
2533
|
+
|
2534
|
+
return df
|
2535
|
+
|
1795
2536
|
def log_centering_transformation(
|
1796
2537
|
df: pd.DataFrame,
|
1797
2538
|
ref_col: str,
|
@@ -1822,12 +2563,18 @@ def log_centering_transformation(
|
|
1822
2563
|
print ("Column " + str(var) + " is a dummy variable and requires/allows no log-centering transformation")
|
1823
2564
|
continue
|
1824
2565
|
|
2566
|
+
if (df[var] <= 0).any():
|
2567
|
+
df[var+suffix] = float("nan")
|
2568
|
+
print ("Column " + str(var) + " contains values <= 0. No log-centering transformation possible.")
|
2569
|
+
continue
|
2570
|
+
|
1825
2571
|
var_t = df.groupby(ref_col)[var].apply(lct)
|
1826
2572
|
var_t = var_t.reset_index()
|
1827
2573
|
df[var+suffix] = var_t[var]
|
1828
2574
|
|
1829
2575
|
return df
|
1830
2576
|
|
2577
|
+
|
1831
2578
|
def get_isochrones(
|
1832
2579
|
geodata_gpd: gp.GeoDataFrame,
|
1833
2580
|
unique_id_col: str,
|
@@ -1918,7 +2665,8 @@ def get_isochrones(
|
|
1918
2665
|
|
1919
2666
|
def modelfit(
|
1920
2667
|
observed,
|
1921
|
-
expected
|
2668
|
+
expected,
|
2669
|
+
remove_nan: bool = True
|
1922
2670
|
):
|
1923
2671
|
|
1924
2672
|
observed_no = len(observed)
|
@@ -1933,12 +2681,41 @@ def modelfit(
|
|
1933
2681
|
if not isinstance(expected, np.number):
|
1934
2682
|
if not is_numeric_dtype(expected):
|
1935
2683
|
raise ValueError("Expected column is not numeric")
|
1936
|
-
|
2684
|
+
|
2685
|
+
if remove_nan:
|
2686
|
+
|
2687
|
+
obs_exp = pd.DataFrame(
|
2688
|
+
{
|
2689
|
+
"observed": observed,
|
2690
|
+
"expected": expected
|
2691
|
+
}
|
2692
|
+
)
|
2693
|
+
|
2694
|
+
obs_exp_clean = obs_exp.dropna(subset=["observed", "expected"])
|
2695
|
+
|
2696
|
+
observed = obs_exp_clean["observed"].to_numpy()
|
2697
|
+
expected = obs_exp_clean["expected"].to_numpy()
|
2698
|
+
|
2699
|
+
else:
|
2700
|
+
|
2701
|
+
if np.isnan(observed).any():
|
2702
|
+
raise ValueError("Vector with observed data contains NaN")
|
2703
|
+
if np.isnan(expected).any():
|
2704
|
+
raise ValueError("Vector with expected data contains NaN")
|
2705
|
+
|
1937
2706
|
residuals = np.array(observed)-np.array(expected)
|
1938
2707
|
residuals_sq = residuals**2
|
1939
2708
|
residuals_abs = abs(residuals)
|
1940
2709
|
|
1941
|
-
|
2710
|
+
if any(observed == 0):
|
2711
|
+
print ("Vector 'observed' contains values equal to zero. No APE/MAPE calculated.")
|
2712
|
+
APE = np.full_like(observed, np.nan)
|
2713
|
+
MAPE = None
|
2714
|
+
else:
|
2715
|
+
APE = abs(observed-expected)/observed*100
|
2716
|
+
MAPE = float(np.mean(APE))
|
2717
|
+
|
2718
|
+
sAPE = abs(observed-expected)/((abs(observed)+abs(expected))/2)*100
|
1942
2719
|
|
1943
2720
|
data_residuals = pd.DataFrame({
|
1944
2721
|
"observed": observed,
|
@@ -1946,25 +2723,31 @@ def modelfit(
|
|
1946
2723
|
"residuals": residuals,
|
1947
2724
|
"residuals_sq": residuals_sq,
|
1948
2725
|
"residuals_abs": residuals_abs,
|
1949
|
-
"APE": APE
|
2726
|
+
"APE": APE,
|
2727
|
+
"sAPE": sAPE
|
1950
2728
|
})
|
1951
2729
|
|
1952
2730
|
SQR = float(np.sum(residuals_sq))
|
1953
|
-
SAR = float(np.sum(residuals_abs))
|
1954
|
-
LL = float(np.sum(np.log(residuals_sq)))
|
2731
|
+
SAR = float(np.sum(residuals_abs))
|
1955
2732
|
observed_mean = float(np.sum(observed)/observed_no)
|
1956
2733
|
SQT = float(np.sum((observed-observed_mean)**2))
|
1957
2734
|
Rsq = float(1-(SQR/SQT))
|
1958
2735
|
MSE = float(SQR/observed_no)
|
1959
2736
|
RMSE = float(sqrt(MSE))
|
1960
2737
|
MAE = float(SAR/observed_no)
|
1961
|
-
|
1962
|
-
|
1963
|
-
|
1964
|
-
|
1965
|
-
|
1966
|
-
|
1967
|
-
|
2738
|
+
|
2739
|
+
sMAPE = float(np.mean(sAPE))
|
2740
|
+
|
2741
|
+
resid_below5 = float(len(data_residuals[data_residuals["APE"] < 5])/expected_no*100)
|
2742
|
+
resid_below10 = float(len(data_residuals[data_residuals["APE"] < 10])/expected_no*100)
|
2743
|
+
resid_below15 = float(len(data_residuals[data_residuals["APE"] < 15])/expected_no*100)
|
2744
|
+
resid_below20 = float(len(data_residuals[data_residuals["APE"] < 20])/expected_no*100)
|
2745
|
+
resid_below25 = float(len(data_residuals[data_residuals["APE"] < 25])/expected_no*100)
|
2746
|
+
resid_below30 = float(len(data_residuals[data_residuals["APE"] < 30])/expected_no*100)
|
2747
|
+
resid_below35 = float(len(data_residuals[data_residuals["APE"] < 35])/expected_no*100)
|
2748
|
+
resid_below40 = float(len(data_residuals[data_residuals["APE"] < 40])/expected_no*100)
|
2749
|
+
resid_below45 = float(len(data_residuals[data_residuals["APE"] < 45])/expected_no*100)
|
2750
|
+
resid_below50 = float(len(data_residuals[data_residuals["APE"] < 50])/expected_no*100)
|
1968
2751
|
|
1969
2752
|
data_lossfunctions = {
|
1970
2753
|
"SQR": SQR,
|
@@ -1975,12 +2758,18 @@ def modelfit(
|
|
1975
2758
|
"RMSE": RMSE,
|
1976
2759
|
"MAE": MAE,
|
1977
2760
|
"MAPE": MAPE,
|
2761
|
+
"sMAPE": sMAPE,
|
1978
2762
|
"APE": {
|
1979
2763
|
"resid_below5": resid_below5,
|
1980
2764
|
"resid_below10": resid_below10,
|
1981
2765
|
"resid_below15": resid_below15,
|
1982
2766
|
"resid_below20": resid_below20,
|
1983
|
-
"resid_below25": resid_below25
|
2767
|
+
"resid_below25": resid_below25,
|
2768
|
+
"resid_below30": resid_below30,
|
2769
|
+
"resid_below35": resid_below35,
|
2770
|
+
"resid_below40": resid_below40,
|
2771
|
+
"resid_below45": resid_below45,
|
2772
|
+
"resid_below50": resid_below50,
|
1984
2773
|
}
|
1985
2774
|
}
|
1986
2775
|
|
@@ -2000,7 +2789,7 @@ def loglik(
|
|
2000
2789
|
observed,
|
2001
2790
|
expected
|
2002
2791
|
)
|
2003
|
-
residuals_sq = model_fit[0]["residuals_sq"]
|
2792
|
+
residuals_sq = model_fit[0]["residuals_sq"]
|
2004
2793
|
|
2005
2794
|
LL = np.sum(np.log(residuals_sq))
|
2006
2795
|
|