SearchLibrium 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- old_code/__init__.py +8 -0
- old_code/_choice_model.py +1363 -0
- old_code/_device.py +145 -0
- old_code/akshay_test.py +125 -0
- old_code/boxcox_functions.py +116 -0
- old_code/draws.py +128 -0
- old_code/harmony.py +1261 -0
- old_code/latent_class_constrained.py +434 -0
- old_code/latent_class_mixed_model.py +1566 -0
- old_code/latent_class_model.py +1281 -0
- old_code/latent_main.py +945 -0
- old_code/main.py +1880 -0
- old_code/main_ol.py +127 -0
- old_code/misc.py +303 -0
- old_code/mixed_logit.py +1553 -0
- old_code/multinomial_logit.py +559 -0
- old_code/ordered_logit.py +1641 -0
- old_code/ordered_logit_mixed.py +103 -0
- old_code/ordered_logit_multinomial.py +701 -0
- old_code/r_ordered.py +168 -0
- old_code/rrm.py +521 -0
- old_code/search.py +3485 -0
- old_code/siman.py +1023 -0
- old_code/threshold.py +777 -0
- searchlibrium-0.0.1.dist-info/METADATA +21 -0
- searchlibrium-0.0.1.dist-info/RECORD +28 -0
- searchlibrium-0.0.1.dist-info/WHEEL +5 -0
- searchlibrium-0.0.1.dist-info/top_level.txt +1 -0
old_code/_device.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
''' ---------------------------------------------------------- '''
|
|
2
|
+
''' LIBRARIES '''
|
|
3
|
+
''' ---------------------------------------------------------- '''
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
''' ---------------------------------------------------------- '''
|
|
8
|
+
''' LOCAL INITIALISATION '''
|
|
9
|
+
''' ---------------------------------------------------------- '''
|
|
10
|
+
|
|
11
|
+
_gpu_available = False
|
|
12
|
+
try:
|
|
13
|
+
import cupy
|
|
14
|
+
_gpu_available = False
|
|
15
|
+
except ImportError:
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
''' ---------------------------------------------------------- '''
|
|
19
|
+
''' CLASS '''
|
|
20
|
+
''' ---------------------------------------------------------- '''
|
|
21
|
+
class Device():
|
|
22
|
+
# {
|
|
23
|
+
''' ------------------------------------------------------ '''
|
|
24
|
+
''' Function '''
|
|
25
|
+
''' ------------------------------------------------------ '''
|
|
26
|
+
# QUERY. WHAT DOES np STAND FOR?
|
|
27
|
+
def __init__(self):
|
|
28
|
+
# {
|
|
29
|
+
if _gpu_available:
|
|
30
|
+
self.np = cupy
|
|
31
|
+
self._using_gpu = True
|
|
32
|
+
else:
|
|
33
|
+
self.np = np
|
|
34
|
+
self._using_gpu = False
|
|
35
|
+
# }
|
|
36
|
+
|
|
37
|
+
''' ------------------------------------------------------ '''
|
|
38
|
+
''' Function '''
|
|
39
|
+
''' ------------------------------------------------------ '''
|
|
40
|
+
def enable_gpu_acceleration(self, device_id=0):
|
|
41
|
+
# {
|
|
42
|
+
if _gpu_available == False: raise Exception("CuPy not found. Verify installation")
|
|
43
|
+
self.np = cupy
|
|
44
|
+
self._using_gpu = True
|
|
45
|
+
cupy.cuda.Device(device_id).use()
|
|
46
|
+
# }
|
|
47
|
+
|
|
48
|
+
''' ------------------------------------------------------ '''
|
|
49
|
+
''' Function '''
|
|
50
|
+
''' ------------------------------------------------------ '''
|
|
51
|
+
def disable_gpu_acceleration(self):
|
|
52
|
+
# {
|
|
53
|
+
self.np = np
|
|
54
|
+
self._using_gpu = False
|
|
55
|
+
# }
|
|
56
|
+
|
|
57
|
+
''' ------------------------------------------------------ '''
|
|
58
|
+
''' Function. '''
|
|
59
|
+
''' ------------------------------------------------------ '''
|
|
60
|
+
def multiply_1(self, a, b):
|
|
61
|
+
# {
|
|
62
|
+
n, p, j, k = a.shape # Extract the cardinality of each of the 4 dimensions of a
|
|
63
|
+
r = b.shape[-1] # Cardinality of last dimension of b
|
|
64
|
+
|
|
65
|
+
# return self.np.matmul(a.reshape(n, p*j, k), b).reshape(n, p, j, r)
|
|
66
|
+
# Equivalent to:
|
|
67
|
+
a = a.reshape(n, p * j, k) # Collapse a into 3 dimensions
|
|
68
|
+
a_b = np.matmul(a, b) # Compute a * b
|
|
69
|
+
a_b = a_b.reshape(n, p, j, r) # Expand to 4 dimensions
|
|
70
|
+
return a_b
|
|
71
|
+
|
|
72
|
+
# }
|
|
73
|
+
|
|
74
|
+
def multiply_2(self, a, b):
|
|
75
|
+
return np.matmul(a, b)
|
|
76
|
+
|
|
77
|
+
def multiply_3(self, a, b):
|
|
78
|
+
# {
|
|
79
|
+
n, p, j, r = a.shape # Extract the cardinality of each of the 4 dimensions
|
|
80
|
+
k = b.shape[-1] # Cardinality of last dimension of b
|
|
81
|
+
|
|
82
|
+
return self.np.matmul(b.reshape(n, p * j, k).transpose([0, 2, 1]), a.reshape(n, p * j, r))
|
|
83
|
+
b_ = b.reshape(n, p * j, k) # Collapse into 3 dimensions
|
|
84
|
+
a_ = a.reshape(n, p * j, r) # Collapse into 3 dimensions
|
|
85
|
+
bT_a = np.matmul(b_.transpose([0, 2, 1]), a_) # Compute b^T * a
|
|
86
|
+
return bT_a
|
|
87
|
+
# }
|
|
88
|
+
|
|
89
|
+
''' ------------------------------------------------------ '''
|
|
90
|
+
''' Function. Efficient einsum for common expressions '''
|
|
91
|
+
''' ------------------------------------------------------ '''
|
|
92
|
+
def cust_einsum(self, expr, a, b):
|
|
93
|
+
# {
|
|
94
|
+
#if True: # QUERY: IS THIS NEEDED?
|
|
95
|
+
|
|
96
|
+
if expr == 'npjk,nkr -> npjr':
|
|
97
|
+
return self.multiply_1(a, b)
|
|
98
|
+
elif expr == 'npjk,k -> npj':
|
|
99
|
+
return self.multiply_2(a, b)
|
|
100
|
+
elif expr == 'npjr,npjk -> nkr':
|
|
101
|
+
return self.multiply_3(a,b)
|
|
102
|
+
else: raise Exception(f"The expression {expr} is not supported by custeinsum")
|
|
103
|
+
|
|
104
|
+
#QUERY. WHEN IS THIS CALLED? CODE IS UNREACHABLE
|
|
105
|
+
#return self.np.einsum(expr, a, b)
|
|
106
|
+
# }
|
|
107
|
+
|
|
108
|
+
''' ------------------------------------------------------ '''
|
|
109
|
+
''' Function '''
|
|
110
|
+
''' ------------------------------------------------------ '''
|
|
111
|
+
@property
|
|
112
|
+
def using_gpu(self): return self._using_gpu
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def to_cpu(self, arr):
|
|
116
|
+
return cupy.asnumpy(arr)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def to_gpu(self, arr):
|
|
120
|
+
return cupy.asarray(arr)
|
|
121
|
+
|
|
122
|
+
''' ------------------------------------------------------ '''
|
|
123
|
+
''' Function '''
|
|
124
|
+
''' ------------------------------------------------------ '''
|
|
125
|
+
def convert_array_cpu(self, arr): return cupy.asnumpy(arr)
|
|
126
|
+
|
|
127
|
+
''' ------------------------------------------------------ '''
|
|
128
|
+
''' Function '''
|
|
129
|
+
''' ------------------------------------------------------ '''
|
|
130
|
+
def convert_array_gpu(self, arr): return cupy.asarray(arr)
|
|
131
|
+
|
|
132
|
+
''' ------------------------------------------------------ '''
|
|
133
|
+
''' Function '''
|
|
134
|
+
''' ------------------------------------------------------ '''
|
|
135
|
+
def get_device_count(self):
|
|
136
|
+
# {
|
|
137
|
+
if _gpu_available: return cupy.cuda.runtime.getDeviceCount()
|
|
138
|
+
else: return 0
|
|
139
|
+
# }
|
|
140
|
+
# }
|
|
141
|
+
|
|
142
|
+
''' ---------------------------------------------------------- '''
|
|
143
|
+
''' GLOBAL OBJECT '''
|
|
144
|
+
''' ---------------------------------------------------------- '''
|
|
145
|
+
device = Device() # Create an object to use
|
old_code/akshay_test.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from harmony import *
|
|
3
|
+
from siman import *
|
|
4
|
+
from threshold import *
|
|
5
|
+
from latent_class_mixed_model import LatentClassMixedModel
|
|
6
|
+
from latent_class_model import LatentClassModel
|
|
7
|
+
from mixed_logit import MixedLogit
|
|
8
|
+
from multinomial_logit import MultinomialLogit
|
|
9
|
+
import pandas as pd
|
|
10
|
+
from scipy import stats
|
|
11
|
+
|
|
12
|
+
##df_wide = pd.read_csv('dataMaaSInd_wide.csv')
|
|
13
|
+
##df_wide =df_wide[df_wide['CHOICE'] !=0]
|
|
14
|
+
##
|
|
15
|
+
##from xlogit.utils import wide_to_long
|
|
16
|
+
##df = wide_to_long(df_wide, id_col='indID', alt_list=[1, 2, 3, 4],
|
|
17
|
+
## varying=['pref', 'purchase', 'useWork', 'useFamily', 'useMaintenance', 'useSocial',
|
|
18
|
+
## 'maasLocalPT-1', 'maasLDPT-1', 'maasTaxi-1', 'maasCarRental-1', 'maasCarshare-1', 'maasRideshare-1', 'maasBikeshare-1', 'maasTktInt-1', 'maasBkInt-1', 'maasRTInf-1', 'maasPers-1', 'maasPaymentType-1', 'maasCost-1',
|
|
19
|
+
## 'maasLocalPT-2', 'maasLDPT-2', 'maasTaxi-2', 'maasCarRental-2', 'maasCarshare-2', 'maasRideshare-2', 'maasBikeshare-2', 'maasTktInt-2', 'maasBkInt-2', 'maasRTInf-2', 'maasPers-2', 'maasPaymentType-2', 'maasCost-2'], alt_name='alt', sep= "_")
|
|
20
|
+
##
|
|
21
|
+
##df = wide_to_long(df, id_col='indID', alt_list=[1, 2],
|
|
22
|
+
## varying=['maasLocalPT', 'maasLDPT', 'maasTaxi', 'maasCarRental', 'maasCarshare', 'maasRideshare', 'maasBikeshare', 'maasTktInt', 'maasBkInt', 'maasRTInf', 'maasPers', 'maasPaymentType', 'maasCost'], alt_name='alt2', sep= "-")
|
|
23
|
+
##
|
|
24
|
+
##df = df.fillna(0)
|
|
25
|
+
##
|
|
26
|
+
##df['CHOICE'] = 1 * (df['pref'] == df['alt2'] )
|
|
27
|
+
##
|
|
28
|
+
##df.to_csv("akshay_long.csv", index=False)
|
|
29
|
+
|
|
30
|
+
df =pd.read_csv('akshay_long_true.csv')
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
model = LatentClassModel()
|
|
34
|
+
varnames=['InnerCity', 'InnerRegional', 'Under30', 'Over65', 'College', 'FullTime', 'PartTime', 'Male', 'Children', 'Income', 'NDI',
|
|
35
|
+
'LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG', 'BikesharePayG',
|
|
36
|
+
'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
|
|
37
|
+
'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers'
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
X = df[varnames].values
|
|
41
|
+
y = df['CHOICE'].values
|
|
42
|
+
member_params_spec = np.array([['_inter', 'InnerCity', 'InnerRegional', 'Under30', 'Over65', 'College', 'FullTime', 'PartTime', 'Male', 'Children', 'Income', 'NDI'],
|
|
43
|
+
['_inter', 'InnerCity', 'InnerRegional', 'Under30', 'Over65', 'College', 'FullTime', 'PartTime', 'Male', 'Children', 'Income', 'NDI'],
|
|
44
|
+
['_inter', 'InnerCity', 'InnerRegional', 'Under30', 'Over65', 'College', 'FullTime', 'PartTime', 'Male', 'Children', 'Income', 'NDI'],
|
|
45
|
+
['_inter', 'InnerCity', 'InnerRegional', 'Under30', 'Over65', 'College', 'FullTime', 'PartTime', 'Male', 'Children', 'Income', 'NDI']],
|
|
46
|
+
dtype='object')
|
|
47
|
+
|
|
48
|
+
class_params_spec = np.array([['LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG', 'BikesharePayG',
|
|
49
|
+
'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
|
|
50
|
+
'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers'],
|
|
51
|
+
['LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG', 'BikesharePayG',
|
|
52
|
+
'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
|
|
53
|
+
'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers'],
|
|
54
|
+
['LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG', 'BikesharePayG',
|
|
55
|
+
'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
|
|
56
|
+
'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers'],
|
|
57
|
+
['LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG', 'BikesharePayG',
|
|
58
|
+
'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
|
|
59
|
+
'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers'],
|
|
60
|
+
['LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG', 'BikesharePayG',
|
|
61
|
+
'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
|
|
62
|
+
'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers']],
|
|
63
|
+
dtype='object')
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
init_class_thetas = np.array([-1.321318, -0.254239, -0.137624, -9.159877, 0.009594, 1.189211, -0.084255, 0.437849, 0.222736, -2.338727, -0.220732, 0.206103,
|
|
67
|
+
0.293479, 0.17829, -0.293836, -0.499868, -0.336, 0.588949, 0.0357, 0.393709, -0.215125, -0.28694, -0.264146, -0.871409,
|
|
68
|
+
-1.160788, 0.752398, -0.054771, 0.554518, -0.559022, 0.633359, -0.150176, 0.020715, -0.23028, 0.185878, -0.219888, -1.531753,
|
|
69
|
+
-0.833134, -0.168312, -2.27768, 1.136705, 0.093996, 1.672507, 1.29167, 1.49679, 0.423603, 0.249344, -0.832107, -2.778636])
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
init_class_betas = [np.array([0.441269, 0.448334, 0.288787, 0.35502, 0.216816, 0.198564, 0.069477,
|
|
73
|
+
0.346543, 0.233089, 0.323059, 0.333928, 0.149546, 0.124614, 0.0443181,
|
|
74
|
+
-0.00741137, 0.036144, -0.00298227, 0.140595, 0.046312]), #Class 1
|
|
75
|
+
np.array([0.801542, 0.483616, 0.546757, 0.498264, 0.206961, 0.367382, 0.00124702,
|
|
76
|
+
0.587733, 0.398037, 0.5319, 0.369294, 0.246564, -0.100532, -0.141248,
|
|
77
|
+
-0.019849, 0.038627, -0.104714, 0.173183, 0.0905047]), #Class 2
|
|
78
|
+
np.array([1.28245, 0.704765, 0.8016, 0.145479, 0.340825, 0.554092, -0.0942558,
|
|
79
|
+
12.6054, 83.2791, 27.7743, -14.1763, 26.7106, 21.6308, -2.87297,
|
|
80
|
+
-32.6663, 0.528885, 0.375195, 0.367734, 0.343927]), #Class 3
|
|
81
|
+
np.array([1.18916, 0.562234, 0.58024, -0.00850272, 0.122827, 0.619118, 0.0330975,
|
|
82
|
+
0.970455, 0.24954, 0.698946, 0.172871, 0.64793, -0.395843, 0.00472563,
|
|
83
|
+
-0.425557, 0.157351, 0.0453663, 0.194574, 0.0677801]), #Class 4
|
|
84
|
+
np.array([0, 0, 0, 0, 0, 0, 0,
|
|
85
|
+
0, 0, 0, 0, 0, 0, 0,
|
|
86
|
+
0, 0, 0, 0, 0])] #Class 5
|
|
87
|
+
|
|
88
|
+
bounds_thetas = ((-1.321318, -1.321318), (-0.254239, -0.254239), (-0.137624, -0.137624), (-9.159877, -9.159877), (0.009594, 0.009594), (1.189211, 1.189211), (-0.084255, -0.084255), (0.437849, 0.437849), (0.222736, 0.222736), (-2.338727,-2.338727), (-0.220732, -0.220732), (0.206103,0.206103),
|
|
89
|
+
(0.293479, 0.293479), (0.17829, 0.17829), (-0.293836, -0.293836), (-0.499868, -0.499868), (-0.336, -0.336), (0.588949, 0.588949), (0.0357, 0.0357), (0.393709, 0.393709), (-0.215125, -0.215125), (-0.28694,-0.28694), (-0.264146, -0.264146), (-0.871409,-0.871409),
|
|
90
|
+
(-1.160788, -1.160788), (0.752398, 0.752398), (-0.054771, -0.054771), (0.554518, 0.554518), (-0.559022, -0.559022), (0.633359, 0.633359), ( -0.150176, -0.150176), (0.020715, 0.020715), (-0.23028, -0.23028), (0.185878,0.185878), (-0.219888, -0.219888), (-1.531753, -1.531753),
|
|
91
|
+
(-0.833134, -0.833134), (-0.168312, -0.168312), (-2.27768, -2.27768), (1.136705, 1.136705), (0.093996, 0.093996), (1.672507, 1.672507), (1.29167, 1.29167), (1.49679, 1.49679), (0.423603, 0.423603), (0.249344, 0.249344), (-0.832107, -0.832107), (-2.778636, -2.778636))
|
|
92
|
+
bounds_betas = [((-100, 100), (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100),
|
|
93
|
+
(-100, 100), (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100),
|
|
94
|
+
(-100, 100), (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100) ,),
|
|
95
|
+
((-100, 100), (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100),
|
|
96
|
+
(-100, 100), (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100),
|
|
97
|
+
(-100, 100), (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100) ,),
|
|
98
|
+
((-100, 100), (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100),
|
|
99
|
+
(-100, 100), (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100),
|
|
100
|
+
(-100, 100), (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100) ,),
|
|
101
|
+
((-100, 100), (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100),
|
|
102
|
+
(-100, 100), (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100),
|
|
103
|
+
(-100, 100), (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100) ,),
|
|
104
|
+
((-100, 100), (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100),
|
|
105
|
+
(-100, 100), (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100),
|
|
106
|
+
(-100, 100), (-100, 100) , (-100, 100) , (-100, 100) , (-100, 100) ,),]
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
model.setup(X, y, ids=df['CHID'].values, panels=df['indID'].values,
|
|
111
|
+
varnames=varnames,
|
|
112
|
+
num_classes=5,
|
|
113
|
+
class_params_spec=class_params_spec,
|
|
114
|
+
member_params_spec=member_params_spec,
|
|
115
|
+
init_class_betas=init_class_betas,
|
|
116
|
+
init_class_thetas = init_class_thetas,
|
|
117
|
+
alts=[1,2],
|
|
118
|
+
ftol_lccm=1e-4,
|
|
119
|
+
gtol=1e-5,
|
|
120
|
+
#verbose = 2
|
|
121
|
+
)
|
|
122
|
+
model.reassign_penalty(0.05)
|
|
123
|
+
model.fit()
|
|
124
|
+
model.summarise()
|
|
125
|
+
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
|
2
|
+
FUNCTIONS FOR BOX-COX TRANSFORMATION
|
|
3
|
+
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
|
4
|
+
|
|
5
|
+
''' ----------------------------------------------------------- '''
|
|
6
|
+
''' MAIN PARAMETERS: '''
|
|
7
|
+
''' ----------------------------------------------------------- '''
|
|
8
|
+
# X_matrix: Matrix to transform / array-like
|
|
9
|
+
# lmdas: lambda parameters for boxcox transformation/ array-like
|
|
10
|
+
# bxcx_X: Matrix after boxcox transformation / array-like
|
|
11
|
+
|
|
12
|
+
''' ---------------------------------------------------------- '''
|
|
13
|
+
''' LIBRARIES '''
|
|
14
|
+
''' ---------------------------------------------------------- '''
|
|
15
|
+
import numpy as np
|
|
16
|
+
|
|
17
|
+
''' ---------------------------------------------------------- '''
|
|
18
|
+
''' CONSTANTS '''
|
|
19
|
+
''' ---------------------------------------------------------- '''
|
|
20
|
+
min_val = 1e-30 # Define smallest value
|
|
21
|
+
|
|
22
|
+
''' ---------------------------------------------------------- '''
|
|
23
|
+
''' FUNCTION '''
|
|
24
|
+
''' ---------------------------------------------------------- '''
|
|
25
|
+
def truncate_lower(x, minval):
|
|
26
|
+
x[np.where(x < minval)] = minval
|
|
27
|
+
return x
|
|
28
|
+
|
|
29
|
+
def truncate_higher(x, maxval):
|
|
30
|
+
x[np.where(x > maxval)] = maxval
|
|
31
|
+
return x
|
|
32
|
+
|
|
33
|
+
def truncate(x, minval, maxval): # {
|
|
34
|
+
x = truncate_lower(x, minval)
|
|
35
|
+
x = truncate_higher(x, maxval)
|
|
36
|
+
return x
|
|
37
|
+
# }
|
|
38
|
+
|
|
39
|
+
''' ---------------------------------------------------------- '''
|
|
40
|
+
''' Function. x is an ndarray, lamda is a scalar '''
|
|
41
|
+
''' ---------------------------------------------------------- '''
|
|
42
|
+
def transform(x, lmda):
|
|
43
|
+
# {
|
|
44
|
+
if lmda == 0:
|
|
45
|
+
#return np.log(x)
|
|
46
|
+
return np.log1p(x)
|
|
47
|
+
else:
|
|
48
|
+
val = (np.power(x, lmda) - 1) / lmda
|
|
49
|
+
return np.nan_to_num(val)
|
|
50
|
+
# }
|
|
51
|
+
|
|
52
|
+
''' ---------------------------------------------------------- '''
|
|
53
|
+
''' Function. x is an ndarray, lamda is a scalar '''
|
|
54
|
+
''' ---------------------------------------------------------- '''
|
|
55
|
+
def transform_derivative(x, lmda):
|
|
56
|
+
# {
|
|
57
|
+
""" Derivative of log likelihood with respect to lmda """
|
|
58
|
+
ln_x = np.log1p(x) # Evaluate ln(1+x) because elements of x are close to zero
|
|
59
|
+
# Note: original code was ln_x = np.log(x)
|
|
60
|
+
|
|
61
|
+
if lmda == 0:
|
|
62
|
+
return 0.5*(ln_x)**2 # i.e., 0.5 * ln(x)^2
|
|
63
|
+
else:
|
|
64
|
+
# {
|
|
65
|
+
x_lmda = np.nan_to_num(np.power(x, lmda))
|
|
66
|
+
val = (lmda * x_lmda * ln_x - x_lmda + 1) / np.power(lmda, 2)
|
|
67
|
+
return np.nan_to_num(val) # Return zero
|
|
68
|
+
# }
|
|
69
|
+
# }
|
|
70
|
+
|
|
71
|
+
def prep(X_matrix, lmdas):
|
|
72
|
+
# {
|
|
73
|
+
lmdas = truncate(lmdas, -5, 5)
|
|
74
|
+
X_matrix = truncate_lower(X_matrix, min_val)
|
|
75
|
+
bxcx = np.zeros_like(X_matrix) # initialise to zero
|
|
76
|
+
bxcx = bxcx.astype("float64") # cast each element as a float
|
|
77
|
+
return X_matrix, lmdas, bxcx
|
|
78
|
+
# }
|
|
79
|
+
|
|
80
|
+
''' ---------------------------------------------------------- '''
|
|
81
|
+
''' Function.Returns boxcox transformed matrix '''
|
|
82
|
+
''' ---------------------------------------------------------- '''
|
|
83
|
+
def boxcox_transformation(X_matrix, lmdas):
|
|
84
|
+
# {
|
|
85
|
+
X_matrix, lmdas, bxcx_X = prep(X_matrix, lmdas)
|
|
86
|
+
for i, lmda in enumerate(lmdas):
|
|
87
|
+
bxcx_X[:, :, i] = transform(X_matrix[:, :, i], lmda)
|
|
88
|
+
return bxcx_X
|
|
89
|
+
# }
|
|
90
|
+
|
|
91
|
+
def boxcox_transformation_mixed(X_matrix, lmdas):
|
|
92
|
+
# {
|
|
93
|
+
X_matrix, lmdas, bxcx_X = prep(X_matrix, lmdas)
|
|
94
|
+
for i, lmda in enumerate(lmdas):
|
|
95
|
+
bxcx_X[:, :, :, i] = transform(X_matrix[:, :, :, i], lmda)
|
|
96
|
+
return bxcx_X
|
|
97
|
+
# }
|
|
98
|
+
|
|
99
|
+
''' ---------------------------------------------------------------------- '''
|
|
100
|
+
''' Function. Estimate derivative of boxcox transformation parameter (lambda)'''
|
|
101
|
+
''' ---------------------------------------------------------------------- '''
|
|
102
|
+
def boxcox_param_deriv(X_matrix, lmdas):
|
|
103
|
+
# {
|
|
104
|
+
X_matrix, lmdas, der_bxcx_X = prep(X_matrix, lmdas)
|
|
105
|
+
for i, lmda in enumerate(lmdas):
|
|
106
|
+
der_bxcx_X[:, :, i] = transform_derivative(X_matrix[:, :, i], lmda)
|
|
107
|
+
return der_bxcx_X
|
|
108
|
+
# }
|
|
109
|
+
|
|
110
|
+
def boxcox_param_deriv_mixed(X_matrix, lmdas):
|
|
111
|
+
# {
|
|
112
|
+
X_matrix, lmdas, der_bxcx_X = prep(X_matrix, lmdas)
|
|
113
|
+
for i, lmda in enumerate(lmdas):
|
|
114
|
+
der_bxcx_X[:, :, :, i] = transform_derivative(X_matrix[:, :, :, i], lmda)
|
|
115
|
+
return der_bxcx_X
|
|
116
|
+
# }
|
old_code/draws.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import scipy.stats as ss
|
|
3
|
+
|
|
4
|
+
class Draws:
|
|
5
|
+
''' ---------------------------------------------------------- '''
|
|
6
|
+
''' Class. Generate draws based on the given mixing distributions '''
|
|
7
|
+
''' ---------------------------------------------------------- '''
|
|
8
|
+
def __init__(self, k = 0, halton = 1, rvdist=None, rvtransdist=None): # {
|
|
9
|
+
# {
|
|
10
|
+
self.k = k # Number of random variables
|
|
11
|
+
self.fn_generate_draws = self.generate_draws_halton if halton else self.generate_draws_random
|
|
12
|
+
if rvdist is None:
|
|
13
|
+
rvdist = ['n'] * k
|
|
14
|
+
if rvtransdist is None:
|
|
15
|
+
rvtransdist = ['n'] * k
|
|
16
|
+
self.rvdist = rvdist
|
|
17
|
+
self.rvtransdist = rvtransdist
|
|
18
|
+
# }
|
|
19
|
+
|
|
20
|
+
''' ---------------------------------------------------------------- '''
|
|
21
|
+
''' Function. Generate draws based on the given mixing distributions '''
|
|
22
|
+
''' ---------------------------------------------------------------- '''
|
|
23
|
+
def generate_draws(self, sample_size, n_draws, halton=True, chol_mat=None):
|
|
24
|
+
# {
|
|
25
|
+
args = (sample_size, n_draws)
|
|
26
|
+
draws, drawstrans = self.fn_generate_draws(*args)
|
|
27
|
+
|
|
28
|
+
# Filter out any False values from the lists
|
|
29
|
+
self.rvdist = [item for item in self.rvdist if item is not False]
|
|
30
|
+
self.rvtransdist = [item for item in self.rvtransdist if item is not False]
|
|
31
|
+
draws = self.evaluate_distribution(self.rvdist, draws) # Evaluate distributions
|
|
32
|
+
draws = np.atleast_3d(draws)
|
|
33
|
+
drawstrans = self.evaluate_distribution(self.rvtransdist, drawstrans) # Evaluate distributions
|
|
34
|
+
drawstrans = np.atleast_3d(drawstrans)
|
|
35
|
+
return draws, drawstrans # (N,Kr,R)
|
|
36
|
+
# }
|
|
37
|
+
|
|
38
|
+
''' ---------------------------------------------------------------- '''
|
|
39
|
+
''' Function. '''
|
|
40
|
+
''' ---------------------------------------------------------------- '''
|
|
41
|
+
def evaluate_distribution(self, distr, values):
|
|
42
|
+
# {
|
|
43
|
+
for k, distr_k in enumerate(distr): # {
|
|
44
|
+
|
|
45
|
+
if distr_k in ['n', 'ln', 'tn']: # Normal based
|
|
46
|
+
values[:, k, :] = ss.norm.ppf(values[:, k, :])
|
|
47
|
+
elif distr_k == 't': # Triangular
|
|
48
|
+
# {
|
|
49
|
+
values_k = values[:, k, :]
|
|
50
|
+
|
|
51
|
+
# This code transforms elements based on whether the corresponding elements
|
|
52
|
+
# in values_k are less than or equal to 0.5 or greater than 0.5.
|
|
53
|
+
values[:, k, :] = (np.sqrt(2 * values_k) - 1) * (values_k <= .5) + \
|
|
54
|
+
(1 - np.sqrt(2 * (1 - values_k))) * (values_k > .5)
|
|
55
|
+
# }
|
|
56
|
+
elif distr_k == 'u': # Uniform
|
|
57
|
+
values[:, k, :] = 2 * values[:, k, :] - 1
|
|
58
|
+
# }
|
|
59
|
+
return values
|
|
60
|
+
# }
|
|
61
|
+
|
|
62
|
+
''' ---------------------------------------------------------- '''
|
|
63
|
+
''' Function. Generate random uniform draws between 0 and 1 '''
|
|
64
|
+
''' ---------------------------------------------------------- '''
|
|
65
|
+
def get_random_draws(self, sample_size, n_draws, n_vars): # {
|
|
66
|
+
return np.random.uniform(size=(sample_size, n_vars, n_draws))
|
|
67
|
+
# }
|
|
68
|
+
|
|
69
|
+
''' -------------------------------------------------------------- '''
|
|
70
|
+
''' Function. Generate Halton draws for multiple random variables '''
|
|
71
|
+
''' using different primes as base '''
|
|
72
|
+
''' -------------------------------------------------------------- '''
|
|
73
|
+
def generate_halton_draws(self, sample_size, n_draws, n_vars, shuffled=False, drop=100, primes=None):
|
|
74
|
+
# {
|
|
75
|
+
if primes is None: # {
|
|
76
|
+
primes = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47,
|
|
77
|
+
53, 59, 61, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109,
|
|
78
|
+
113, 127, 131, 137, 139, 149, 151, 157, 163, 167, 173,
|
|
79
|
+
179, 181, 191, 193, 197, 199, 211, 223, 227, 229, 233,
|
|
80
|
+
239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293,
|
|
81
|
+
307, 311]
|
|
82
|
+
# }
|
|
83
|
+
|
|
84
|
+
draws = [self.halton_seq(sample_size * n_draws, prime=primes[i % len(primes)],
|
|
85
|
+
shuffled=shuffled, drop=drop).reshape(sample_size, n_draws) for i in range(n_vars)]
|
|
86
|
+
draws = np.stack(draws, axis=1)
|
|
87
|
+
return draws # (N,Kr,R)
|
|
88
|
+
# }
|
|
89
|
+
|
|
90
|
+
''' ---------------------------------------------------------- '''
|
|
91
|
+
''' Function. Generates a halton sequence efficiently '''
|
|
92
|
+
''' ---------------------------------------------------------- '''
|
|
93
|
+
def halton_seq(self, length, prime=3, drop=100, shuffled=False):
|
|
94
|
+
# {
|
|
95
|
+
""" Memory is efficiently handled by creating a single array ``seq`` that is iteratively
|
|
96
|
+
filled without using intermediate arrays. """
|
|
97
|
+
|
|
98
|
+
# This code generates a sequence based on a prime number and then optionally shuffles it
|
|
99
|
+
req_length = length + drop
|
|
100
|
+
seq = np.zeros(req_length)
|
|
101
|
+
seq_idx, t = 1, 1
|
|
102
|
+
while seq_idx < req_length:
|
|
103
|
+
# {
|
|
104
|
+
d = 1/prime**t # Calculate the decrement based on the prime number and t
|
|
105
|
+
seq_size = seq_idx # Keep track of the current size of the sequence
|
|
106
|
+
|
|
107
|
+
# Iterate over the sequence to fill it
|
|
108
|
+
for i in range(1, prime):
|
|
109
|
+
# {
|
|
110
|
+
if seq_idx >= req_length: break
|
|
111
|
+
|
|
112
|
+
# Calculate the maximum sequence to copy based on the remaining length
|
|
113
|
+
max_seq = min(req_length - seq_idx, seq_size)
|
|
114
|
+
|
|
115
|
+
# Fill the sequence with the new values
|
|
116
|
+
seq[seq_idx: seq_idx+max_seq] = seq[:max_seq] + d*i
|
|
117
|
+
|
|
118
|
+
# Update the sequence index
|
|
119
|
+
seq_idx += max_seq
|
|
120
|
+
i += 1
|
|
121
|
+
# }
|
|
122
|
+
t += 1 # Increment t for the next iteration
|
|
123
|
+
# }
|
|
124
|
+
seq = seq[drop:length+drop] # Trim the sequence to the desired length
|
|
125
|
+
if shuffled: # Shuffle the sequence if required
|
|
126
|
+
np.random.shuffle(seq)
|
|
127
|
+
return seq
|
|
128
|
+
# }
|