metacountregressor 0.1.73__py3-none-any.whl → 0.1.83__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metacountregressor/app_main.py +258 -0
- metacountregressor/data_split_helper.py +90 -0
- metacountregressor/helperprocess.py +372 -5
- metacountregressor/main.py +297 -117
- metacountregressor/metaheuristics.py +43 -31
- metacountregressor/setup.py +3 -2
- metacountregressor/solution.py +734 -832
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.83.dist-info}/METADATA +256 -35
- metacountregressor-0.1.83.dist-info/RECORD +20 -0
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.83.dist-info}/WHEEL +1 -1
- metacountregressor-0.1.73.dist-info/RECORD +0 -18
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.83.dist-info}/LICENSE.txt +0 -0
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.83.dist-info}/top_level.txt +0 -0
metacountregressor/solution.py
CHANGED
|
@@ -9,12 +9,10 @@ import math
|
|
|
9
9
|
import os
|
|
10
10
|
import random
|
|
11
11
|
import sys
|
|
12
|
-
import time
|
|
13
12
|
import warnings
|
|
14
13
|
from collections import Counter
|
|
15
14
|
from functools import wraps
|
|
16
15
|
|
|
17
|
-
from tempfile import TemporaryFile
|
|
18
16
|
import traceback
|
|
19
17
|
import latextable
|
|
20
18
|
import numpy as np
|
|
@@ -32,18 +30,25 @@ from scipy.special import gammaln
|
|
|
32
30
|
from sklearn.metrics import mean_absolute_error as MAE
|
|
33
31
|
from sklearn.metrics import mean_squared_error as MSPE
|
|
34
32
|
from statsmodels.tools.numdiff import approx_fprime, approx_hess
|
|
35
|
-
from sklearn.preprocessing import StandardScaler
|
|
33
|
+
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
|
36
34
|
from texttable import Texttable
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
from .
|
|
40
|
-
|
|
35
|
+
import time
|
|
36
|
+
try:
|
|
37
|
+
from ._device_cust import device as dev
|
|
38
|
+
from .pareto_file import Pareto, Solution
|
|
39
|
+
from .data_split_helper import DataProcessor
|
|
40
|
+
except ImportError:
|
|
41
|
+
from _device_cust import device as dev
|
|
42
|
+
from pareto_file import Pareto, Solution
|
|
43
|
+
from data_split_helper import DataProcessor
|
|
44
|
+
|
|
45
|
+
from scipy import stats
|
|
41
46
|
np.seterr(divide='ignore', invalid='ignore')
|
|
42
47
|
warnings.simplefilter("ignore")
|
|
43
48
|
|
|
44
|
-
#
|
|
45
|
-
min_comp_val = 1e-
|
|
46
|
-
max_comp_val = 1e+
|
|
49
|
+
# define the computation boundary limits
|
|
50
|
+
min_comp_val = 1e-160
|
|
51
|
+
max_comp_val = 1e+200
|
|
47
52
|
log_lik_min = -1e+200
|
|
48
53
|
log_lik_max = 1e+200
|
|
49
54
|
|
|
@@ -117,24 +122,24 @@ class ObjectiveFunction(object):
|
|
|
117
122
|
|
|
118
123
|
def __init__(self, x_data, y_data, **kwargs):
|
|
119
124
|
|
|
120
|
-
self.reg_penalty =
|
|
125
|
+
self.reg_penalty = 0
|
|
121
126
|
self.power_up_ll = False
|
|
127
|
+
self.nb_parma = 1
|
|
122
128
|
self.bic = None
|
|
123
129
|
self.other_bic = False
|
|
130
|
+
self.test_flag = 1
|
|
131
|
+
self.no_extra_param =1 #if true, fix dispersion. w
|
|
124
132
|
if self.other_bic:
|
|
125
133
|
print('change this to false latter ')
|
|
126
|
-
offset = None
|
|
127
134
|
|
|
128
|
-
#
|
|
129
|
-
self.constant_value =
|
|
130
|
-
self.negative_binomial_value =
|
|
135
|
+
# initialize values
|
|
136
|
+
self.constant_value = 0
|
|
137
|
+
self.negative_binomial_value = 1
|
|
131
138
|
|
|
132
|
-
self.verbose_safe =
|
|
133
|
-
self.zi_force = None # Analst want a zi model and formally declares the zi components below
|
|
134
|
-
self.zi_force_names = None # delare the zi components
|
|
139
|
+
self.verbose_safe = kwargs.get('verbose', 0)
|
|
135
140
|
self.please_print = kwargs.get('please_print', 0)
|
|
136
141
|
self.group_halton = None
|
|
137
|
-
self.grad_yes = False
|
|
142
|
+
self.grad_yes = kwargs.get('grad_est', False)
|
|
138
143
|
self.hess_yes = False
|
|
139
144
|
self.group_halton_test = None
|
|
140
145
|
self.panels = None
|
|
@@ -145,41 +150,50 @@ class ObjectiveFunction(object):
|
|
|
145
150
|
self.rdm_fit = None
|
|
146
151
|
self.rdm_cor_fit = None
|
|
147
152
|
self.dist_fit = None
|
|
148
|
-
|
|
153
|
+
|
|
149
154
|
self.MAE = None
|
|
150
|
-
self.best_obj_1 =
|
|
151
|
-
self._obj_1 = 'bic'
|
|
152
|
-
self._obj_2 = 'MSE'
|
|
155
|
+
self.best_obj_1 = 1000000.0
|
|
156
|
+
self._obj_1 = kwargs.get('_obj_1', 'bic')
|
|
157
|
+
self._obj_2 = kwargs.get('_obj_2', 'MSE')
|
|
153
158
|
self.numerical_hessian_calc = 0 # calculates hessian by statsmodels otherwise scipy
|
|
154
159
|
self.full_model = None
|
|
155
160
|
self.GP_parameter = 0
|
|
156
|
-
self.is_multi =
|
|
161
|
+
self.is_multi = kwargs.get('is_multi', False)
|
|
157
162
|
self.complexity_level = 6
|
|
158
|
-
self._max_iterations_improvement =
|
|
163
|
+
self._max_iterations_improvement = 10000
|
|
159
164
|
self.generated_sln = set()
|
|
160
165
|
self.ave_mae = 0
|
|
161
|
-
#
|
|
166
|
+
# defalt paramaters for hs #TODO unpack into harmony search class
|
|
162
167
|
self.algorithm = 'hs' # 'sa' 'de' also avialable
|
|
163
168
|
self._hms = 20
|
|
164
169
|
self._max_time = 60 * 60 * 24
|
|
165
170
|
self._hmcr = .5
|
|
166
|
-
self._par = 0.3
|
|
171
|
+
self._par = 0.3 #dont think this gets useted
|
|
167
172
|
self._mpai = 1
|
|
168
173
|
self._max_imp = 100000
|
|
169
|
-
self._WIC = 1000 # Number of
|
|
174
|
+
self._WIC = 1000 # Number of Iterations without Multiobjective Improvement #tod chuck into solution
|
|
170
175
|
self._panels = None
|
|
171
176
|
self.is_multi = True
|
|
172
177
|
self.method_ll = 'Nelder-Mead-BFGS'
|
|
178
|
+
|
|
173
179
|
self.method_ll = 'L-BFGS-B' # alternatives 'BFGS_2', 'BFGS
|
|
174
|
-
self.method_ll = 'BFGS_2'
|
|
180
|
+
self.method_ll = kwargs.get('method', 'BFGS_2')
|
|
181
|
+
|
|
182
|
+
#self.method_ll = 'Nelder-Mead-BFGS'
|
|
175
183
|
self.Keep_Fit = 2
|
|
176
184
|
self.MP = 0
|
|
177
185
|
# Nelder-Mead-BFGS
|
|
178
186
|
|
|
179
|
-
self._max_characteristics = 26
|
|
187
|
+
self._max_characteristics = kwargs.get('_max_vars', 26)
|
|
180
188
|
|
|
181
189
|
self.beta_dict = dict
|
|
190
|
+
if 'model_terms' in kwargs:
|
|
191
|
+
print('change')
|
|
192
|
+
if kwargs.get('model_terms').get('group') is not None:
|
|
193
|
+
kwargs['group'] = kwargs.get('model_terms').get('group')
|
|
182
194
|
|
|
195
|
+
if kwargs.get('model_terms').get('panels') is not None:
|
|
196
|
+
kwargs['panels'] = kwargs.get('model_terms').get('panels')
|
|
183
197
|
acceptable_keys_list = ['_par', '_max_imp', '_hmcr', 'steps',
|
|
184
198
|
'algorithm', '_random_seed', '_max_time',
|
|
185
199
|
'forcedvariables', '_obj_1', '_obj_2', '_par',
|
|
@@ -190,11 +204,6 @@ class ObjectiveFunction(object):
|
|
|
190
204
|
if k in acceptable_keys_list:
|
|
191
205
|
self.__setattr__(k, self.tryeval(kwargs[k]))
|
|
192
206
|
|
|
193
|
-
if self.zi_force_names is not None:
|
|
194
|
-
self.zi_force = True
|
|
195
|
-
if 'const' not in self.zi_force_names:
|
|
196
|
-
self.zi_force_names = ['const'] + self.zi_force_names
|
|
197
|
-
print('did this work?')
|
|
198
207
|
|
|
199
208
|
if 'complexity_level' in kwargs:
|
|
200
209
|
self.complexity_level = kwargs['complexity_level']
|
|
@@ -202,35 +211,52 @@ class ObjectiveFunction(object):
|
|
|
202
211
|
if 'instance_number' in kwargs:
|
|
203
212
|
self.instance_number = str(kwargs['instance_number'])
|
|
204
213
|
else:
|
|
214
|
+
|
|
215
|
+
print('no name set, setting name as 0')
|
|
205
216
|
self.instance_number = str(0) # set an arbitrary instance number
|
|
206
217
|
|
|
207
218
|
if not os.path.exists(self.instance_number):
|
|
208
|
-
|
|
219
|
+
if kwargs.get('make_directory', True):
|
|
220
|
+
print('Making a Directory, if you want to stop from storing the files to this directory set argumet: make_directory:False')
|
|
221
|
+
os.makedirs(self.instance_number)
|
|
209
222
|
|
|
210
223
|
if not hasattr(self, '_obj_1'):
|
|
224
|
+
print('_obj_1 required, define as bic, aic, ll')
|
|
211
225
|
raise Exception
|
|
212
226
|
|
|
213
227
|
self.pvalue_penalty = float(kwargs.get('pvalue_penalty', 0.5))
|
|
214
|
-
|
|
215
228
|
self.pvalue_exceed = 0
|
|
216
229
|
self._maximize = False # do we maximize or minimize?
|
|
217
|
-
|
|
218
|
-
# data_names = self._random_forest_preprocess(x_data, y_data)
|
|
230
|
+
|
|
219
231
|
x_data = sm.add_constant(x_data)
|
|
232
|
+
standardize_the_data = 0
|
|
233
|
+
if standardize_the_data:
|
|
234
|
+
print('we are standardize the data')
|
|
235
|
+
x_data = self.self_standardize_positive(x_data)
|
|
236
|
+
|
|
220
237
|
self._input_data(x_data, y_data)
|
|
238
|
+
|
|
239
|
+
|
|
221
240
|
if y_data.ndim == 1:
|
|
222
241
|
y_data = pd.DataFrame(y_data)
|
|
223
242
|
|
|
224
|
-
|
|
243
|
+
'''
|
|
244
|
+
#TODO ADD THIS IN LATER
|
|
245
|
+
splitter = DataProcessor(x_data, y_data, kwargs)
|
|
246
|
+
self.copy_class_attributes(splitter) #inherit the self objects
|
|
247
|
+
'''
|
|
248
|
+
|
|
225
249
|
if self._obj_1 == 'MAE' or self._obj_2 in ["MAE", 'RMSE', 'MAE', 'MSE', 'RMSE_IN', 'RMSE_TEST']:
|
|
226
250
|
self.test_percentage = float(kwargs.get('test_percentage', 0))
|
|
227
251
|
self.val_percentage = float(kwargs.get('val_percentage', 0))
|
|
228
252
|
if self.test_percentage == 0:
|
|
253
|
+
print('test percentage is 0, please enter arg test_percentage as decimal, eg 0.8')
|
|
254
|
+
print('continuing single objective')
|
|
255
|
+
time.sleep(2)
|
|
229
256
|
self.is_multi = False
|
|
230
257
|
|
|
231
|
-
if 'panels' in kwargs:
|
|
232
|
-
self.group_names = np.asarray(x_data[kwargs['group']].astype(
|
|
233
|
-
'category').cat._parent.dtype.categories)
|
|
258
|
+
if 'panels' in kwargs and not (kwargs.get('panels') == None):
|
|
259
|
+
self.group_names = np.asarray(x_data[kwargs['group']].astype('category').cat._parent.dtype.categories)
|
|
234
260
|
|
|
235
261
|
x_data[kwargs['group']] = x_data[kwargs['group']].astype(
|
|
236
262
|
'category').cat.codes
|
|
@@ -243,59 +269,40 @@ class ObjectiveFunction(object):
|
|
|
243
269
|
|
|
244
270
|
N = len(np.unique(x_data[kwargs['panels']].values))
|
|
245
271
|
id_unique = np.unique(x_data[kwargs['panels']].values)
|
|
246
|
-
|
|
247
272
|
except KeyError:
|
|
248
273
|
N = len(np.unique(x_data[kwargs['panels']]))
|
|
274
|
+
id_unique = np.unique(x_data[kwargs['panels']].values)
|
|
249
275
|
|
|
250
276
|
training_size = int((1 - self.test_percentage - self.val_percentage) * N)
|
|
251
277
|
ids = np.random.choice(N, training_size, replace=False)
|
|
252
278
|
ids = id_unique[ids]
|
|
253
279
|
train_idx = [ii for ii, id_val in enumerate(x_data[kwargs['panels']]) if id_val in ids]
|
|
254
280
|
test_idx = [ii for ii, id_val in enumerate(x_data[kwargs['panels']]) if id_val not in ids]
|
|
255
|
-
|
|
256
281
|
df_train = x_data.loc[train_idx, :]
|
|
257
282
|
df_test = x_data.loc[test_idx, :]
|
|
258
283
|
y_train = y_data.loc[train_idx, :]
|
|
259
284
|
y_test = y_data.loc[test_idx, :]
|
|
260
|
-
|
|
261
285
|
else:
|
|
262
286
|
N = len(x_data)
|
|
263
287
|
training_size = int((1 - self.test_percentage - self.val_percentage) * N)
|
|
264
288
|
ids = np.random.choice(N, training_size, replace=False)
|
|
265
289
|
id_unique = np.array([i for i in range(N)])
|
|
266
290
|
ids = id_unique[ids]
|
|
267
|
-
|
|
268
291
|
train_idx = [ii for ii in range(len(id_unique)) if id_unique[ii] in ids]
|
|
269
292
|
test_idx = [ii for ii in range(len(id_unique)) if id_unique[ii] not in ids]
|
|
293
|
+
df_train = x_data.loc[train_idx, :]
|
|
294
|
+
df_test = x_data.loc[test_idx, :]
|
|
295
|
+
y_train = y_data.loc[train_idx, :]
|
|
296
|
+
y_test = y_data.loc[test_idx, :]
|
|
270
297
|
|
|
271
|
-
try: # @IgnoreException
|
|
272
|
-
df_train = x_data.loc[train_idx, :]
|
|
273
|
-
df_test = x_data.loc[test_idx, :]
|
|
274
|
-
y_train = y_data.loc[train_idx, :]
|
|
275
|
-
y_test = y_data.loc[test_idx, :]
|
|
276
|
-
except:
|
|
277
|
-
# Convert all values to their real parts
|
|
278
|
-
df_real = x_data.select_dtypes(include=[np.number]).apply(np.real)
|
|
279
|
-
|
|
280
|
-
# Replace the original DataFrame's numerical columns with real-valued ones
|
|
281
|
-
x_data[df_real.columns] = df_real
|
|
282
|
-
|
|
283
|
-
df_train = x_data.iloc[train_idx, :]
|
|
284
|
-
df_test = x_data.iloc[test_idx, :]
|
|
285
|
-
y_train = y_data.iloc[train_idx, :]
|
|
286
|
-
y_test = y_data.iloc[test_idx, :]
|
|
287
298
|
|
|
288
|
-
self.n_obs = N
|
|
299
|
+
#self.n_obs = N
|
|
289
300
|
self._characteristics_names = list(self._x_data.columns)
|
|
290
|
-
|
|
291
|
-
self.alpha_hurdle = np.isin(self._characteristics_names,
|
|
292
|
-
[item.split(':')[0] for item in self.zi_force_names]).astype(int).tolist()
|
|
293
|
-
|
|
294
|
-
self._max_group_all_means = 1
|
|
301
|
+
self._max_group_all_means = 2
|
|
295
302
|
|
|
296
303
|
exclude_this_test = [4]
|
|
297
|
-
|
|
298
|
-
if 'panels' in kwargs:
|
|
304
|
+
|
|
305
|
+
if 'panels' in kwargs and not (kwargs.get('panels') == None):
|
|
299
306
|
self.panels = np.asarray(df_train[kwargs['panels']])
|
|
300
307
|
self.panels_test = np.asarray(df_test[kwargs['panels']])
|
|
301
308
|
self.ids = np.asarray(
|
|
@@ -309,9 +316,10 @@ class ObjectiveFunction(object):
|
|
|
309
316
|
X, Y, panel, group = self._arrange_long_format(
|
|
310
317
|
df_train, y_train, self.ids, self.ids, groupll)
|
|
311
318
|
self.group_halton = group.copy()
|
|
312
|
-
Y = Y.astype('float')
|
|
313
319
|
self.group_dummies = pd.get_dummies(group)
|
|
314
320
|
Xnew, Ynew, panel_info = self._balance_panels(X, Y, panel)
|
|
321
|
+
|
|
322
|
+
Xnew = pd.DataFrame(Xnew, columns=X.columns)
|
|
315
323
|
self.panel_info = panel_info
|
|
316
324
|
self.N, self.P = panel_info.shape
|
|
317
325
|
Xnew.drop(kwargs['panels'], axis=1, inplace=True)
|
|
@@ -319,12 +327,13 @@ class ObjectiveFunction(object):
|
|
|
319
327
|
K = Xnew.shape[1]
|
|
320
328
|
self._characteristics_names = list(Xnew.columns)
|
|
321
329
|
XX = Xnew.values.reshape(self.N, self.P, K).copy()
|
|
330
|
+
XX = XX.astype('float')
|
|
322
331
|
self.group_dummies = self.group_dummies.values.reshape(self.N, self.P, -1)
|
|
323
332
|
self.group_halton = self.group_halton.reshape(self.N, self.P)[:, 0]
|
|
324
333
|
YY = Ynew.values.reshape(self.N, self.P, 1).copy()
|
|
334
|
+
YY = YY.astype('float')
|
|
325
335
|
self._x_data = XX.copy()
|
|
326
336
|
self._y_data = YY.copy()
|
|
327
|
-
# Xalt, Yalt, group_info = self._balance_panels(X, Y, group)
|
|
328
337
|
X, Y, panel, group = self._arrange_long_format(df_test, y_test, self.ids_test, self.panels_test, group_test)
|
|
329
338
|
if np.max(group) > 50:
|
|
330
339
|
exclude_this_test = [4]
|
|
@@ -344,6 +353,7 @@ class ObjectiveFunction(object):
|
|
|
344
353
|
K = X.shape[1]
|
|
345
354
|
self.columns_names = X.columns
|
|
346
355
|
X = X.values.reshape(self.N_test, self.P_test, K)
|
|
356
|
+
X = X.astype('float')
|
|
347
357
|
self.group_halton_test = self.group_halton_test.reshape(self.N_test, self.P_test)[:, 0]
|
|
348
358
|
Y = Y.values.reshape(self.N_test, self.P_test, 1)
|
|
349
359
|
Y = Y.astype('float')
|
|
@@ -353,9 +363,10 @@ class ObjectiveFunction(object):
|
|
|
353
363
|
|
|
354
364
|
self._samples, self._panels, self._characteristics = self._x_data.shape
|
|
355
365
|
|
|
356
|
-
|
|
366
|
+
|
|
357
367
|
|
|
358
368
|
else:
|
|
369
|
+
print('No Panels. Grouped Random Paramaters Will not be estimated')
|
|
359
370
|
self.G = None
|
|
360
371
|
self._Gnum = 1
|
|
361
372
|
self._max_group_all_means = 0
|
|
@@ -372,82 +383,48 @@ class ObjectiveFunction(object):
|
|
|
372
383
|
K = Xnew.shape[1]
|
|
373
384
|
self._characteristics_names = list(Xnew.columns)
|
|
374
385
|
XX = Xnew.values.reshape(self.N, self.P, K).copy()
|
|
375
|
-
|
|
376
|
-
# self.group_halton = self.group_halton.reshape(self.N, self.P)[:, 0]
|
|
386
|
+
XX = XX.astype('float')
|
|
377
387
|
YY = Ynew.values.reshape(self.N, self.P, 1).copy()
|
|
388
|
+
YY = YY.astype('float')
|
|
378
389
|
self._x_data = XX.copy()
|
|
379
390
|
self._y_data = YY.copy()
|
|
380
|
-
|
|
391
|
+
|
|
381
392
|
if self.is_multi:
|
|
382
393
|
X, Y, panel, group = self._arrange_long_format(df_test, y_test, self.ids_test, self.ids_test, None)
|
|
383
394
|
if np.max(group) > 50:
|
|
384
395
|
exclude_this_test = [4]
|
|
385
396
|
else:
|
|
386
397
|
exclude_this_test = []
|
|
387
|
-
# self.group_halton_test = group.copy()
|
|
388
398
|
X, Y, panel_info = self._balance_panels(X, Y, panel)
|
|
389
|
-
|
|
399
|
+
|
|
390
400
|
self.N_test, self.P_test = panel_info.shape
|
|
391
|
-
|
|
392
|
-
# self.group_dummies_test = pd.get_dummies(group)
|
|
393
|
-
# self.group_dummies_test = self.group_dummies_test.values.reshape(self.N_test, self.P_test, -1)
|
|
394
401
|
K = X.shape[1]
|
|
395
402
|
self.columns_names = X.columns
|
|
396
403
|
X = X.values.reshape(self.N_test, self.P_test, K)
|
|
397
|
-
|
|
404
|
+
X = X.astype('float')
|
|
398
405
|
Y = Y.values.reshape(self.N_test, self.P_test, 1)
|
|
406
|
+
Y = Y.astype('float')
|
|
399
407
|
self._x_data_test = X.copy()
|
|
400
408
|
self.y_data_test = Y.copy()
|
|
401
|
-
|
|
409
|
+
|
|
402
410
|
self._samples, self._panels, self._characteristics = self._x_data.shape
|
|
403
411
|
|
|
404
|
-
# draws and pvalue
|
|
405
|
-
|
|
406
|
-
if 'Offset' in self._characteristics_names:
|
|
407
|
-
offset = True
|
|
408
|
-
self.have_offset = offset
|
|
409
|
-
if self.have_offset is not None:
|
|
410
|
-
try:
|
|
411
|
-
# offset for training data
|
|
412
|
-
# define offset
|
|
413
|
-
val_od = self.get_named_indices(['Offset'])
|
|
414
|
-
self._offsets = self._x_data[:, :, val_od]
|
|
415
|
-
|
|
416
|
-
# drop the offset from the data
|
|
417
|
-
self._x_data = np.delete(self._x_data, val_od, axis=2)
|
|
418
|
-
self._characteristics_names = [x for x in self._characteristics_names if not 'Offset' in x]
|
|
419
|
-
self._characteristics = len(self._characteristics_names)
|
|
420
|
-
# self._x_data.drop(columns=['Offset'], inplace=True)
|
|
421
|
-
|
|
422
|
-
# offset for testing data
|
|
423
|
-
if self.is_multi:
|
|
424
|
-
# define offset
|
|
425
|
-
self._offsets_test = self._x_data_test[:, :, val_od]
|
|
426
|
-
# self._offsets_test = self._x_data_test['Offset'].to_numpy()
|
|
427
|
-
# self._offsets_test = np.reshape(
|
|
428
|
-
# self._offsets_test, (-1, 1))
|
|
429
|
-
# drop the offset from the data
|
|
430
|
-
self._x_data_test = np.delete(self._x_data_test, val_od, axis=2)
|
|
431
|
-
# self._x_data_test.drop(columns=['Offset'], inplace=True)
|
|
432
|
-
except:
|
|
433
|
-
# if no offset, set as 0
|
|
434
|
-
self._offsets = np.zeros((self.N, self.P, 1))
|
|
435
|
-
if self.is_multi:
|
|
436
|
-
self._offsets_test = np.zeros((self.N_test, self.P_test, 1))
|
|
437
|
-
else:
|
|
438
|
-
self._offsets = np.zeros((self.N, self.P, 1))
|
|
439
|
-
if self.is_multi:
|
|
440
|
-
self._offsets_test = np.zeros((self.N_test, self.P_test, 1))
|
|
441
412
|
|
|
413
|
+
#Define the offset into the data
|
|
414
|
+
self.process_offset()
|
|
442
415
|
if self.is_multi:
|
|
443
416
|
self.pareto_printer = Pareto(self._obj_1, self._obj_2, True)
|
|
444
|
-
|
|
445
417
|
self._pareto_population = list()
|
|
446
|
-
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
self.Ndraws = kwargs.get('Ndraws', 200)
|
|
447
422
|
self.draws1 = None
|
|
448
423
|
self.initial_sig = 1 # pass the test of a single model
|
|
449
424
|
self.pvalue_sig_value = .1
|
|
450
425
|
self.observations = self._x_data.shape[0]
|
|
426
|
+
self.minimize_scaler = 1/self.observations # scale the minimization function to the observations
|
|
427
|
+
|
|
451
428
|
self.batch_size = None
|
|
452
429
|
# open the file in the write mode
|
|
453
430
|
self.grab_transforms = 0
|
|
@@ -459,17 +436,19 @@ class ObjectiveFunction(object):
|
|
|
459
436
|
print('Setup Complete...')
|
|
460
437
|
else:
|
|
461
438
|
print('No Panels Supplied')
|
|
439
|
+
print('Setup Complete...')
|
|
462
440
|
self._characteristics_names = list(self._x_data.columns)
|
|
463
441
|
# define the variables
|
|
464
442
|
# self._transformations = ["no", "sqrt", "log", "exp", "fact", "arcsinh", 2, 3]
|
|
465
443
|
self._transformations = ["no", "sqrt", "log", "arcsinh"]
|
|
466
444
|
self._transformations = kwargs.get('_transformation', ["no", "sqrt", "log", 'arcsinh'])
|
|
467
|
-
|
|
445
|
+
self._transformations = kwargs.get('_transformation', ["no", "log", "sqrt", "arcsinh"])
|
|
468
446
|
# self._distribution = ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal', 'lindley']
|
|
469
447
|
|
|
470
|
-
self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', '
|
|
448
|
+
self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal'])
|
|
471
449
|
|
|
472
450
|
if self.G is not None:
|
|
451
|
+
#TODO need to handle this for groups
|
|
473
452
|
self._distribution = ["trad| " + item for item in self._distribution
|
|
474
453
|
] + ["grpd| " + item for item in self._distribution]
|
|
475
454
|
|
|
@@ -480,20 +459,33 @@ class ObjectiveFunction(object):
|
|
|
480
459
|
self.coeff_ = None
|
|
481
460
|
|
|
482
461
|
self.significant = 0
|
|
483
|
-
# define the states of our
|
|
462
|
+
# define the states of our explanatory variables
|
|
463
|
+
|
|
484
464
|
|
|
485
465
|
self._discrete_values = self.define_alphas(self.complexity_level, exclude_this_test,
|
|
486
|
-
kwargs.get('must_include', []))
|
|
466
|
+
kwargs.get('must_include', []), extra = kwargs.get('decisions', None))
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
|
|
487
470
|
self._discrete_values = self._discrete_values + \
|
|
488
|
-
|
|
471
|
+
self.define_distributions_analyst(extra=kwargs.get('decisions', None))
|
|
489
472
|
|
|
490
473
|
if 'model_types' in kwargs:
|
|
491
474
|
model_types = kwargs['model_types']
|
|
492
475
|
else:
|
|
493
|
-
model_types = [[0, 1]] # add 2 for Generalized Poisson
|
|
494
476
|
|
|
477
|
+
|
|
478
|
+
model_types = [[0, 1]] # add 2 for Generalized Poisson
|
|
479
|
+
#model_types = [[0]]
|
|
480
|
+
#TODO change back and fix NB
|
|
481
|
+
model_t_dict = {'Poisson':0,
|
|
482
|
+
"NB":1}
|
|
483
|
+
# Retrieve the keys (model names) corresponding to the values in model_types
|
|
484
|
+
model_keys = [key for key, value in model_t_dict.items() if value in model_types[0]]
|
|
485
|
+
# Print the formatted result
|
|
486
|
+
print(f'The type of models possible will consider: {", ".join(model_keys)}')
|
|
495
487
|
self._discrete_values = self._discrete_values + self.define_poissible_transforms(
|
|
496
|
-
self._transformations) + model_types
|
|
488
|
+
self._transformations, kwargs.get('decisions',None)) + model_types
|
|
497
489
|
|
|
498
490
|
self._model_type_codes = ['p', 'nb',
|
|
499
491
|
'gp', "pl", ["nb-theta", 'nb-dis']]
|
|
@@ -506,21 +498,85 @@ class ObjectiveFunction(object):
|
|
|
506
498
|
self.endog = None
|
|
507
499
|
# solution parameters
|
|
508
500
|
self._min_characteristics = 1
|
|
509
|
-
|
|
510
501
|
self._max_hurdle = 4
|
|
511
502
|
|
|
512
|
-
|
|
503
|
+
#Manually fit from analyst specification
|
|
504
|
+
manual_fit = kwargs.get('Manual_Fit')
|
|
505
|
+
if manual_fit is not None:
|
|
506
|
+
print('fitting manual')
|
|
507
|
+
self.process_manual_fit(manual_fit)
|
|
508
|
+
|
|
509
|
+
self.solution_analyst = None
|
|
513
510
|
|
|
514
|
-
self.initial_sig = 1 # pass the test of a single model
|
|
515
|
-
self.pvalue_sig_value = 1
|
|
516
|
-
# embed the solution to how you want it
|
|
517
|
-
self.set_defined_seed(42)
|
|
518
|
-
a = self.modify_initial_fit(kwargs['Manual_Fit'])
|
|
519
|
-
self.makeRegression(a)
|
|
520
511
|
|
|
521
512
|
|
|
522
|
-
|
|
523
|
-
|
|
513
|
+
|
|
514
|
+
def over_ride_self(self, **kwargs):
|
|
515
|
+
"""
|
|
516
|
+
Dynamically sets attributes on the instance based on the provided keyword arguments.
|
|
517
|
+
"""
|
|
518
|
+
for key, value in kwargs.items():
|
|
519
|
+
setattr(self, key, value)
|
|
520
|
+
print(f"Updated attributes: {kwargs}")
|
|
521
|
+
|
|
522
|
+
def remove_offset(self, data, indices):
|
|
523
|
+
""" Remove offset data from the dataset """
|
|
524
|
+
new_data = np.delete(data, indices, axis=2)
|
|
525
|
+
return new_data
|
|
526
|
+
|
|
527
|
+
def process_offset(self):
|
|
528
|
+
""" Process offset if it exists in the characteristics """
|
|
529
|
+
try:
|
|
530
|
+
if 'Offset' in self._characteristics_names:
|
|
531
|
+
self.have_offset = True
|
|
532
|
+
val_od = self.get_named_indices(['Offset'])
|
|
533
|
+
self._offsets = self._x_data[:, :, val_od]
|
|
534
|
+
self._x_data = self.remove_offset(self._x_data, val_od)
|
|
535
|
+
self._characteristics_names = [x for x in self._characteristics_names if x != 'Offset']
|
|
536
|
+
self._characteristics = len(self._characteristics_names)
|
|
537
|
+
|
|
538
|
+
if self.is_multi:
|
|
539
|
+
self._offsets_test = self._x_data_test[:, :, val_od]
|
|
540
|
+
self._x_data_test = self.remove_offset(self._x_data_test, val_od)
|
|
541
|
+
print(self._offsets)
|
|
542
|
+
else:
|
|
543
|
+
self.initialize_empty_offsets()
|
|
544
|
+
|
|
545
|
+
except Exception as e:
|
|
546
|
+
print(f"An error occurred: {e}") # Better error handling
|
|
547
|
+
self.initialize_empty_offsets()
|
|
548
|
+
|
|
549
|
+
def initialize_empty_offsets(self):
|
|
550
|
+
""" Initialize offsets to zero if none are found or on error """
|
|
551
|
+
self._offsets = np.zeros((self.N, self.P, 1))
|
|
552
|
+
if self.is_multi:
|
|
553
|
+
self._offsets_test = np.zeros((self.N_test, self.P_test, 1))
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
def copy_class_attributes(self, class_object):
|
|
557
|
+
'''
|
|
558
|
+
Loop through an
|
|
559
|
+
'''
|
|
560
|
+
|
|
561
|
+
# Loop through all attributes of the car object and copy them
|
|
562
|
+
for attr in vars(class_object):
|
|
563
|
+
setattr(self, attr, getattr(class_object, attr))
|
|
564
|
+
|
|
565
|
+
|
|
566
|
+
def process_manual_fit(self, manual_fit):
|
|
567
|
+
"""Process the manual fit configuration."""
|
|
568
|
+
self.initial_sig = 1 # Example: Initialize some signal
|
|
569
|
+
self.pvalue_sig_value = 1 # Example: Initialize another signal
|
|
570
|
+
self.set_defined_seed(42) # Set a specific seed
|
|
571
|
+
|
|
572
|
+
modified_fit = self.modify_initial_fit(manual_fit) # Modify the initial fit based on manual_fit
|
|
573
|
+
self.makeRegression(modified_fit) # Perform regression with the modified fit
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
def process_fit_specifications(self, find_constant, hard_code):
|
|
577
|
+
"""
|
|
578
|
+
Function to for proceccing testing, and finding a suitable initial coefficient (linear intercept)
|
|
579
|
+
"""
|
|
524
580
|
if hard_code:
|
|
525
581
|
manual_fit_spec = {
|
|
526
582
|
'fixed_terms': ['Constant', 'US', 'RSMS', 'MCV'],
|
|
@@ -559,7 +615,7 @@ class ObjectiveFunction(object):
|
|
|
559
615
|
constant_values.append(self.beta_dict['const'][0][1])
|
|
560
616
|
dispersion_values.append(self.beta_dict.get(self._model_type_codes[i], [[0, 0], [0, 0]])[0][1])
|
|
561
617
|
except:
|
|
562
|
-
print('
|
|
618
|
+
print('Error during regression analysis.')
|
|
563
619
|
i += 1
|
|
564
620
|
|
|
565
621
|
# Add the values of this iteration to the total
|
|
@@ -570,7 +626,7 @@ class ObjectiveFunction(object):
|
|
|
570
626
|
constant_values_avg = [x / 100 for x in constant_values_total]
|
|
571
627
|
dispersion_values_avg = [x / 100 for x in dispersion_values_total]
|
|
572
628
|
|
|
573
|
-
|
|
629
|
+
return constant_values_avg, dispersion_values_avg
|
|
574
630
|
|
|
575
631
|
|
|
576
632
|
def _balance_panels(self, X, y, panels): # ToDO re
|
|
@@ -615,22 +671,7 @@ class ObjectiveFunction(object):
|
|
|
615
671
|
|
|
616
672
|
return np.nan_to_num(quad(integrand, 0, np.inf)[0], nan=0)
|
|
617
673
|
|
|
618
|
-
|
|
619
|
-
import rpy2.rinterface as rinterface
|
|
620
|
-
import rpy2.robjects as robjects
|
|
621
|
-
import rpy2.robjects as ro
|
|
622
|
-
from rpy2.robjects import pandas2ri
|
|
623
|
-
r = robjects.r
|
|
624
|
-
r['source']('testML.R')
|
|
625
|
-
pandas2ri.activate()
|
|
626
|
-
RF_function_r = robjects.globalenv['RF_plot']
|
|
627
|
-
RF_function_corr_r = robjects.globalenv['RF_plot_corr']
|
|
628
|
-
r_df = ro.conversion.py2rpy(self._x_data)
|
|
629
|
-
y_dy = ro.conversion.py2rpy(self._y_data)
|
|
630
|
-
RF_function_r(r_df, y_dy)
|
|
631
|
-
|
|
632
|
-
print('did this work')
|
|
633
|
-
RF_function_corr_r(r_df, y_dy)
|
|
674
|
+
|
|
634
675
|
|
|
635
676
|
def print_system_utilization(self):
|
|
636
677
|
# Get CPU usage
|
|
@@ -647,7 +688,8 @@ class ObjectiveFunction(object):
|
|
|
647
688
|
mem_free = round(mem_info.available /
|
|
648
689
|
(1024 * 1024), 2) # Convert to MB
|
|
649
690
|
print(
|
|
650
|
-
f"Current memory usage: {mem_percent}% ({mem_used} MB used / {mem_total} MB total
|
|
691
|
+
f"Current memory usage: {mem_percent}% ({mem_used} MB used / {mem_total} MB total / "
|
|
692
|
+
f" mem free {mem_free})")
|
|
651
693
|
|
|
652
694
|
def _arrange_long_format(self, X, y, ids, panels=None, groups=None):
|
|
653
695
|
'''converts the data to long format'''
|
|
@@ -665,59 +707,14 @@ class ObjectiveFunction(object):
|
|
|
665
707
|
if group is not None:
|
|
666
708
|
group = group[sorted_idx]
|
|
667
709
|
|
|
668
|
-
return X, y, pnl, group
|
|
669
|
-
|
|
670
|
-
pandas_sort = 1
|
|
671
|
-
if pandas_sort:
|
|
672
|
-
if ids is not None:
|
|
673
|
-
|
|
674
|
-
pnl = panels if panels is not None else np.ones(len(ids))
|
|
675
|
-
df = X
|
|
676
|
-
|
|
677
|
-
df['panels'], df['ids'] = pnl, ids
|
|
678
|
-
new = 0
|
|
679
|
-
if new:
|
|
680
|
-
cols = np.zeros(len(ids), dtype={'names': ['panels', 'ids'],
|
|
681
|
-
'formats': ['<f4', '<f4']})
|
|
682
|
-
cols['panels'], cols['ids'] = pnl, ids
|
|
683
|
-
sorted_idx = np.argsort(cols, order=['panels', 'ids'])
|
|
684
|
-
X, y = X[sorted_idx], y[sorted_idx]
|
|
685
|
-
if panels is not None:
|
|
686
|
-
panels = panels[sorted_idx]
|
|
687
|
-
return X, y, panels
|
|
688
|
-
|
|
689
|
-
df = pd.concat([X.reset_index(drop=True),
|
|
690
|
-
y.reset_index(drop=True)], axis=1)
|
|
691
|
-
sorted_df = df.sort_values(
|
|
692
|
-
['panels', 'ids']).reset_index(drop=True)
|
|
693
|
-
|
|
694
|
-
X, y, panels = sorted_df.iloc[:, :-
|
|
695
|
-
3], sorted_df.iloc[:, -3:-2], sorted_df.iloc[:, -2]
|
|
696
|
-
if panels is not None:
|
|
697
|
-
# panels = panels[sorted_idx]
|
|
698
|
-
P_i = (
|
|
699
|
-
(np.unique(panels, return_counts=True)[1])).astype(int)
|
|
700
|
-
P = np.max(P_i)
|
|
701
|
-
N = len(P_i)
|
|
702
|
-
print(1)
|
|
703
|
-
return X, y, panels
|
|
704
|
-
|
|
705
|
-
if ids is not None:
|
|
706
|
-
X = np.asarray(X)
|
|
707
|
-
y = np.asarray(y)
|
|
708
|
-
pnl = panels if panels is not None else np.ones(len(ids))
|
|
710
|
+
return X, y.astype('float'), pnl, group
|
|
709
711
|
|
|
710
|
-
|
|
711
|
-
'formats': ['<f4', '<f4']})
|
|
712
|
-
cols['panels'], cols['ids'] = pnl, ids
|
|
713
|
-
sorted_idx = np.argsort(cols, order=['panels', 'ids'])
|
|
714
|
-
X, y = X[sorted_idx], y[sorted_idx]
|
|
715
|
-
if panels is not None:
|
|
716
|
-
panels = panels[sorted_idx]
|
|
717
|
-
|
|
718
|
-
return X, y, panels
|
|
712
|
+
return X, y.astype('float'), panels
|
|
719
713
|
|
|
720
714
|
def _random_forest_identify_transformations(self, x_data, y_data):
|
|
715
|
+
'''
|
|
716
|
+
use the random forrest model to identify best feature
|
|
717
|
+
'''
|
|
721
718
|
# let's use the pprint module for readability
|
|
722
719
|
import inspect
|
|
723
720
|
from pprint import pprint
|
|
@@ -814,6 +811,8 @@ class ObjectiveFunction(object):
|
|
|
814
811
|
if dispersion == 0:
|
|
815
812
|
return None, None
|
|
816
813
|
elif dispersion == 2 or dispersion == 1:
|
|
814
|
+
if self.no_extra_param:
|
|
815
|
+
return self.nb_parma, None
|
|
817
816
|
return betas[-1], None
|
|
818
817
|
|
|
819
818
|
elif dispersion == 3:
|
|
@@ -841,14 +840,65 @@ class ObjectiveFunction(object):
|
|
|
841
840
|
par = np.nan_to_num(par)
|
|
842
841
|
return par
|
|
843
842
|
|
|
844
|
-
def
|
|
843
|
+
def rename_distro(self, distro):
|
|
844
|
+
# Mapping dictionary
|
|
845
|
+
mapping = {
|
|
846
|
+
'normal': ['normal', 'n', 'Normal'],
|
|
847
|
+
'triangular': ['triangular', 't', 'Triangular'],
|
|
848
|
+
'uniform': ['uniform', 'u', 'Uniform'],
|
|
849
|
+
'ln_normal': ['ln_normal', 'ln_n', 'Ln_Normal'],
|
|
850
|
+
'tn_normal': ['tn_normal', 'tn_n', 'trunc_normal']
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
# Use list comprehension with the mapping
|
|
854
|
+
reversed_mapping = {value: key for key, values in mapping.items() for value in values}
|
|
855
|
+
|
|
856
|
+
# Use the reversed mapping to find the corresponding key
|
|
857
|
+
new_distro = [reversed_mapping.get(i, i) for i in distro]
|
|
858
|
+
return new_distro
|
|
859
|
+
|
|
860
|
+
def define_distributions_analyst(self, extra = None):
|
|
861
|
+
|
|
862
|
+
if extra is not None:
|
|
863
|
+
set_alpha = []
|
|
864
|
+
for col in self._characteristics_names:
|
|
865
|
+
if col in extra[('Column')].values:
|
|
866
|
+
matched_index = extra[('Column')].index[extra[('Column')] == col].tolist()
|
|
867
|
+
distro = ast.literal_eval(extra.iloc[matched_index, 7].values.tolist()[0])
|
|
868
|
+
distro = self.rename_distro(distro)
|
|
869
|
+
set_alpha = set_alpha+[distro]
|
|
870
|
+
elif col == 'const':
|
|
871
|
+
set_alpha = set_alpha +[['normal']]
|
|
872
|
+
return set_alpha
|
|
873
|
+
return [[x for x in self._distribution]] * self._characteristics
|
|
874
|
+
|
|
875
|
+
|
|
876
|
+
|
|
877
|
+
|
|
878
|
+
def define_alphas(self, complexity_level=4, exclude=[], include=[], extra = None):
|
|
845
879
|
'complexity level'
|
|
846
880
|
'''
|
|
847
881
|
2 is feature selection,
|
|
848
|
-
3 is random
|
|
849
|
-
4 is correlated random
|
|
882
|
+
3 is random parameters
|
|
883
|
+
4 is correlated random parameters
|
|
884
|
+
|
|
885
|
+
extra is the stuff defined by the Meta APP
|
|
850
886
|
'''
|
|
851
887
|
set_alpha = []
|
|
888
|
+
if extra is not None:
|
|
889
|
+
for col in self._characteristics_names:
|
|
890
|
+
if col == 'const' or col == 'Constant' or col == 'constant': # no random paramaters for const
|
|
891
|
+
set_alpha = set_alpha + [[1]]
|
|
892
|
+
elif col == 'Offset':
|
|
893
|
+
set_alpha = set_alpha + [[1]]
|
|
894
|
+
|
|
895
|
+
elif col in extra[('Column')].values:
|
|
896
|
+
matched_index = extra[('Column')].index[extra[('Column')] == col].tolist()
|
|
897
|
+
check = list(itertools.chain(*extra.iloc[matched_index, 1:7].values))
|
|
898
|
+
set_alpha = set_alpha + [[x for x in range(len(check)) if check[x] == True]]
|
|
899
|
+
return set_alpha
|
|
900
|
+
|
|
901
|
+
|
|
852
902
|
for col in self._characteristics_names:
|
|
853
903
|
if col == 'const' or col == 'Constant' or col == 'constant': # no random paramaters for const
|
|
854
904
|
set_alpha = set_alpha + [[1]]
|
|
@@ -866,7 +916,6 @@ class ObjectiveFunction(object):
|
|
|
866
916
|
def pvalue_asterix_add(self, pvalues):
|
|
867
917
|
pvalue_ast = list()
|
|
868
918
|
for i in range(len(pvalues)):
|
|
869
|
-
signif = ""
|
|
870
919
|
if float(pvalues[i]) < 0.001:
|
|
871
920
|
signif = "***"
|
|
872
921
|
elif float(pvalues[i]) < 0.01:
|
|
@@ -899,8 +948,14 @@ class ObjectiveFunction(object):
|
|
|
899
948
|
|
|
900
949
|
return ([self._model_type_codes[dispersion]])
|
|
901
950
|
|
|
902
|
-
def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None,
|
|
903
|
-
|
|
951
|
+
def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None, obj_1=None, model_nature=None):
|
|
952
|
+
'''
|
|
953
|
+
setup for naming of the model summary
|
|
954
|
+
'''
|
|
955
|
+
if self.no_extra_param and dispersion ==1:
|
|
956
|
+
|
|
957
|
+
betas = np.append(betas, self.nb_parma)
|
|
958
|
+
|
|
904
959
|
self.name_deleter = []
|
|
905
960
|
group_rpm = None
|
|
906
961
|
group_dist = []
|
|
@@ -911,8 +966,7 @@ class ObjectiveFunction(object):
|
|
|
911
966
|
rdm_fit = self.none_handler(self.rdm_fit)
|
|
912
967
|
if rdm_cor_fit is None:
|
|
913
968
|
rdm_cor_fit = self.none_handler(self.rdm_cor_fit)
|
|
914
|
-
|
|
915
|
-
zi_fit = self.none_handler(self.zi_fit)
|
|
969
|
+
|
|
916
970
|
dis_fit = [x for x in self.none_handler(
|
|
917
971
|
self.dist_fit)] # check if dis fit is name
|
|
918
972
|
|
|
@@ -977,18 +1031,18 @@ class ObjectiveFunction(object):
|
|
|
977
1031
|
br_w_names = [randvars[i] + " (Std. Dev.) " + rand_vars_dis[i]
|
|
978
1032
|
for i in range(len(randvars))]
|
|
979
1033
|
|
|
980
|
-
|
|
1034
|
+
|
|
981
1035
|
|
|
982
1036
|
names = fixednames + randvars + chol_names + \
|
|
983
|
-
br_w_names + chol_part_1 + chol +
|
|
1037
|
+
br_w_names + chol_part_1 + chol + hetro_long + dispersion_name
|
|
984
1038
|
self.name_deleter = fixednames + randvars + chol_names + randvars + [chol_names[i] for i
|
|
985
1039
|
in range(len(chol_names)) for j in
|
|
986
1040
|
range(
|
|
987
|
-
i + 1)]
|
|
1041
|
+
i + 1)] + dispersion_name # TODO does this break
|
|
988
1042
|
name_delete_2 = fixednames + randvars + chol_names + randvars + [chol_names[i] + "/" +
|
|
989
1043
|
chol_names[j] for i
|
|
990
1044
|
in range(len(chol_names)) for j in
|
|
991
|
-
range(i + 1)]
|
|
1045
|
+
range(i + 1)] + dispersion_name
|
|
992
1046
|
index_dict = {}
|
|
993
1047
|
for i, name in enumerate(name_delete_2):
|
|
994
1048
|
split_names = name.split('/')
|
|
@@ -1012,22 +1066,24 @@ class ObjectiveFunction(object):
|
|
|
1012
1066
|
randvars = [x for x in self.none_handler(rdm_fit)]
|
|
1013
1067
|
chol_names = [x for x in self.none_handler(rdm_cor_fit)]
|
|
1014
1068
|
|
|
1015
|
-
zi_names = [x + ': inflated' for x in self.none_handler(self.zi_force_names)]
|
|
1016
1069
|
|
|
1017
|
-
|
|
1070
|
+
|
|
1071
|
+
names = fixednames + randvars + chol_names + big_hetro + dispersion_name
|
|
1018
1072
|
|
|
1019
1073
|
names = np.array(names) # TODO check order
|
|
1020
1074
|
self.print_transform = self.transform_id_names + \
|
|
1021
1075
|
[''] * (len(names) - len(self.transform_id_names))
|
|
1022
1076
|
self.coeff_names = names
|
|
1023
1077
|
|
|
1078
|
+
'''
|
|
1024
1079
|
if betas is not None:
|
|
1025
1080
|
try:
|
|
1026
1081
|
if len(betas) != len(names):
|
|
1027
|
-
print('
|
|
1028
|
-
|
|
1082
|
+
print('standard_model', no_draws)
|
|
1083
|
+
|
|
1029
1084
|
except Exception as e:
|
|
1030
1085
|
print(e)
|
|
1086
|
+
'''
|
|
1031
1087
|
|
|
1032
1088
|
|
|
1033
1089
|
|
|
@@ -1052,22 +1108,9 @@ class ObjectiveFunction(object):
|
|
|
1052
1108
|
if not isinstance(self.pvalues, np.ndarray):
|
|
1053
1109
|
raise Exception
|
|
1054
1110
|
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
if float(self.pvalues[i]) < 0.01:
|
|
1059
|
-
signif = "***"
|
|
1060
|
-
elif float(self.pvalues[i]) < 0.05:
|
|
1061
|
-
signif = "**"
|
|
1062
|
-
elif float(self.pvalues[i]) < 0.1:
|
|
1063
|
-
signif = "*"
|
|
1111
|
+
if 'nb' in self.coeff_names and self.no_extra_param:
|
|
1112
|
+
self.pvalues = np.append(self.pvalues,0)
|
|
1064
1113
|
|
|
1065
|
-
'''
|
|
1066
|
-
print(fmt.format(self.coeff_names[i][:coeff_name_str_length], self.print_transform[i], self.coeff_[i],
|
|
1067
|
-
self.stderr[i], self.zvalues[i], self.pvalues[i],
|
|
1068
|
-
signif
|
|
1069
|
-
))
|
|
1070
|
-
'''
|
|
1071
1114
|
if self.please_print or save_state:
|
|
1072
1115
|
|
|
1073
1116
|
if self.convergance is not None:
|
|
@@ -1082,17 +1125,22 @@ class ObjectiveFunction(object):
|
|
|
1082
1125
|
|
|
1083
1126
|
if solution is not None:
|
|
1084
1127
|
print(f"{self._obj_2}: {self.round_with_padding(solution[self._obj_2], 2)}")
|
|
1085
|
-
|
|
1128
|
+
|
|
1086
1129
|
self.pvalues = [self.round_with_padding(
|
|
1087
1130
|
x, 2) for x in self.pvalues]
|
|
1088
1131
|
signif_list = self.pvalue_asterix_add(self.pvalues)
|
|
1089
1132
|
if model == 1:
|
|
1090
1133
|
|
|
1091
|
-
self.coeff_[-1] = np.
|
|
1092
|
-
if self.
|
|
1134
|
+
#self.coeff_[-1] = 1/np.exp(self.coeff_[-1])
|
|
1135
|
+
if self.no_extra_param:
|
|
1136
|
+
self.coeff_ = np.append(self.coeff_, self.nb_parma)
|
|
1137
|
+
self.stderr = np.append(self.stderr, 0.00001)
|
|
1138
|
+
self.zvalues = np.append(self.zvalues, 50)
|
|
1139
|
+
|
|
1140
|
+
elif self.coeff_[-1] < 0.25:
|
|
1093
1141
|
print(self.coeff_[-1], 'Warning Check Dispersion')
|
|
1094
1142
|
print(np.exp(self.coeff_[-1]))
|
|
1095
|
-
self.coeff_[-1] = np.exp(self.coeff_[-1]) # min possible value for negbinom
|
|
1143
|
+
#self.coeff_[-1] = np.exp(self.coeff_[-1]) # min possible value for negbinom
|
|
1096
1144
|
|
|
1097
1145
|
self.coeff_ = [self.round_with_padding(x, 2) for x in self.coeff_]
|
|
1098
1146
|
|
|
@@ -1175,14 +1223,7 @@ class ObjectiveFunction(object):
|
|
|
1175
1223
|
self.save_to_file(latextable.draw_latex(
|
|
1176
1224
|
table, caption=caption, caption_above=True), file_name)
|
|
1177
1225
|
|
|
1178
|
-
# print('change this')
|
|
1179
|
-
# df = pd.read_csv("artificial_mixed_corr_2023_MOOF.csv")
|
|
1180
|
-
|
|
1181
|
-
# updating the column value/data
|
|
1182
|
-
# df['Y'] = np.mean(self.lam, axis = (1,2))
|
|
1183
1226
|
|
|
1184
|
-
# writing into the file
|
|
1185
|
-
# df.to_csv("artificial_mixed_corr_2023_MOOF.csv", index=False)
|
|
1186
1227
|
|
|
1187
1228
|
def summary(self, model=None, algorithm=None, transform_list=None, long_print=0, solution=None):
|
|
1188
1229
|
"""
|
|
@@ -1311,7 +1352,7 @@ class ObjectiveFunction(object):
|
|
|
1311
1352
|
with open(filename, 'w') as file:
|
|
1312
1353
|
file.write(content)
|
|
1313
1354
|
|
|
1314
|
-
def define_poissible_transforms(self, transforms) -> list:
|
|
1355
|
+
def define_poissible_transforms(self, transforms, extra= None) -> list:
|
|
1315
1356
|
transform_set = []
|
|
1316
1357
|
if not isinstance(self._x_data, pd.DataFrame):
|
|
1317
1358
|
x_data = self._x_data.reshape(self.N * self.P, -1).copy()
|
|
@@ -1322,6 +1363,7 @@ class ObjectiveFunction(object):
|
|
|
1322
1363
|
|
|
1323
1364
|
if 'AADT' in self._characteristics_names[col]:
|
|
1324
1365
|
new_transform = [['log']]
|
|
1366
|
+
#new_transform = [['no']]
|
|
1325
1367
|
transform_set = transform_set + new_transform
|
|
1326
1368
|
|
|
1327
1369
|
elif all(x_data[col] <= 5):
|
|
@@ -1361,6 +1403,18 @@ class ObjectiveFunction(object):
|
|
|
1361
1403
|
|
|
1362
1404
|
return transform_set
|
|
1363
1405
|
|
|
1406
|
+
def poisson_mean_get_dispersion(self, betas, X, y):
|
|
1407
|
+
eVy = self._loglik_gradient(betas, X, y, None, X, None, None, False, False, dispersion=0,
|
|
1408
|
+
return_EV=True,
|
|
1409
|
+
zi_list=None, draws_grouped=None, Xgroup=None)
|
|
1410
|
+
|
|
1411
|
+
ab = ((y - eVy)**2 - eVy)/eVy
|
|
1412
|
+
bb = eVy -1
|
|
1413
|
+
disp = sm.OLS(ab.ravel(), bb.ravel()).fit()
|
|
1414
|
+
gamma = disp.params[0]
|
|
1415
|
+
#print(f'dispersion is {gamma}')
|
|
1416
|
+
return gamma
|
|
1417
|
+
|
|
1364
1418
|
def validation(self, betas, y, X, Xr=None, dispersion=0, rdm_cor_fit=None, zi_list=None, exog_infl=None,
|
|
1365
1419
|
model_nature=None, halton=1, testing=1, validation=0):
|
|
1366
1420
|
'validation if mu needs to be calculated'
|
|
@@ -1394,7 +1448,7 @@ class ObjectiveFunction(object):
|
|
|
1394
1448
|
XG = model_nature.get('XGtest')[:total_percent, :, :]
|
|
1395
1449
|
else:
|
|
1396
1450
|
XG = model_nature.get('XGtest')[total_percent:, :, :]
|
|
1397
|
-
|
|
1451
|
+
|
|
1398
1452
|
else:
|
|
1399
1453
|
if 'XG' in model_nature:
|
|
1400
1454
|
XG = model_nature.get('XG')
|
|
@@ -1516,7 +1570,7 @@ class ObjectiveFunction(object):
|
|
|
1516
1570
|
5: herogeneity_in _means
|
|
1517
1571
|
|
|
1518
1572
|
|
|
1519
|
-
a: how to
|
|
1573
|
+
a: how to transform the original data
|
|
1520
1574
|
b: grab dispersion '''
|
|
1521
1575
|
|
|
1522
1576
|
# todo: better way
|
|
@@ -1540,22 +1594,9 @@ class ObjectiveFunction(object):
|
|
|
1540
1594
|
alpha_hetro = [
|
|
1541
1595
|
0 if x != 5 else 1 for x in vector[:self._characteristics]]
|
|
1542
1596
|
|
|
1543
|
-
if self.zi_force == True:
|
|
1544
1597
|
|
|
1545
|
-
return {
|
|
1546
|
-
'alpha': alpha,
|
|
1547
|
-
'alpha_rdm': alpha_rdm,
|
|
1548
|
-
'alpha_cor_rdm': alpha_cor_rdm,
|
|
1549
|
-
'alpha_grouped': alpha_grouped,
|
|
1550
|
-
'alpha_hetro': alpha_hetro,
|
|
1551
|
-
'distributions': distributions,
|
|
1552
|
-
'transformations': transformations,
|
|
1553
|
-
'exog_infl': self.zi_force_names,
|
|
1554
|
-
'dispersion': dispersion
|
|
1555
|
-
}
|
|
1556
1598
|
|
|
1557
|
-
|
|
1558
|
-
return {
|
|
1599
|
+
return {
|
|
1559
1600
|
'alpha': alpha,
|
|
1560
1601
|
'alpha_rdm': alpha_rdm,
|
|
1561
1602
|
'alpha_cor_rdm': alpha_cor_rdm,
|
|
@@ -1563,7 +1604,6 @@ class ObjectiveFunction(object):
|
|
|
1563
1604
|
'alpha_hetro': alpha_hetro,
|
|
1564
1605
|
'distributions': distributions,
|
|
1565
1606
|
'transformations': transformations,
|
|
1566
|
-
|
|
1567
1607
|
'dispersion': dispersion
|
|
1568
1608
|
}
|
|
1569
1609
|
|
|
@@ -1599,7 +1639,7 @@ class ObjectiveFunction(object):
|
|
|
1599
1639
|
|
|
1600
1640
|
def repair(self, vector, reduce_to_this=10000): # todo get the number of parameters
|
|
1601
1641
|
'Method to repair the model so that the number of paramaters is held within the constraint'
|
|
1602
|
-
|
|
1642
|
+
|
|
1603
1643
|
new_j = 0
|
|
1604
1644
|
# extract explanatory vector
|
|
1605
1645
|
prmVect = vector[:self._characteristics]
|
|
@@ -1618,7 +1658,6 @@ class ObjectiveFunction(object):
|
|
|
1618
1658
|
int(np.min((5, self.complexity_level - 1)))])
|
|
1619
1659
|
|
|
1620
1660
|
count_3 = prmVect.count(3)
|
|
1621
|
-
this_many = count_3 * (count_3 + 1) / 2
|
|
1622
1661
|
|
|
1623
1662
|
vector[:len(prmVect)] = prmVect.copy()
|
|
1624
1663
|
|
|
@@ -1637,8 +1676,7 @@ class ObjectiveFunction(object):
|
|
|
1637
1676
|
# b = sum(prmVect) + self.is_dispersion(vector[-1])
|
|
1638
1677
|
max_loops = 100 # Maximum number of loops
|
|
1639
1678
|
counter = 0 # Counter variable to keep track of the number of loops
|
|
1640
|
-
|
|
1641
|
-
raise Exception('fhfhfhf')
|
|
1679
|
+
|
|
1642
1680
|
|
|
1643
1681
|
while b > self._max_characteristics and counter < max_loops or b > reduce_to_this:
|
|
1644
1682
|
|
|
@@ -1686,8 +1724,6 @@ class ObjectiveFunction(object):
|
|
|
1686
1724
|
counter += 1
|
|
1687
1725
|
|
|
1688
1726
|
counter = 0
|
|
1689
|
-
if any(isinstance(num, int) and num < 0 for num in vector):
|
|
1690
|
-
raise Exception('fhfhfhf')
|
|
1691
1727
|
while b < self._min_characteristics and counter < max_loops:
|
|
1692
1728
|
|
|
1693
1729
|
weights = [1 if x == 0 else 0 for x in only_ints_vals]
|
|
@@ -1734,13 +1770,13 @@ class ObjectiveFunction(object):
|
|
|
1734
1770
|
cor_l = 0 if self.rdm_cor_fit is None else len(self.rdm_cor_fit)
|
|
1735
1771
|
Kh = 0 if self.hetro_fit is None else len(self.hetro_fit) + len(set(self.dist_hetro))
|
|
1736
1772
|
|
|
1737
|
-
|
|
1773
|
+
|
|
1738
1774
|
Kchol = int((cor_l *
|
|
1739
1775
|
(cor_l + 1)) / 2)
|
|
1740
1776
|
n_coeff = Kf + Kr + cor_l + Kchol + Kr_b + Kh
|
|
1741
1777
|
if block:
|
|
1742
|
-
return [Kf, Kr, cor_l, Kr_b, Kchol, Kh
|
|
1743
|
-
return Kf, Kr, cor_l, Kr_b, Kchol, Kh
|
|
1778
|
+
return [Kf, Kr, cor_l, Kr_b, Kchol, Kh]
|
|
1779
|
+
return Kf, Kr, cor_l, Kr_b, Kchol, Kh
|
|
1744
1780
|
|
|
1745
1781
|
def find_index_of_block(self, lst, value):
|
|
1746
1782
|
cumulative_sum = 0
|
|
@@ -1821,8 +1857,7 @@ class ObjectiveFunction(object):
|
|
|
1821
1857
|
self.rdm_fit)):
|
|
1822
1858
|
raise Exception('pop wrong for id names')
|
|
1823
1859
|
|
|
1824
|
-
|
|
1825
|
-
# self.rdm_cor_fit.pop(self.name_deleter(idx))
|
|
1860
|
+
|
|
1826
1861
|
|
|
1827
1862
|
def get_value_to_delete(self, idx, dispersion):
|
|
1828
1863
|
block = self.get_num_params(True)
|
|
@@ -1858,8 +1893,7 @@ class ObjectiveFunction(object):
|
|
|
1858
1893
|
self.dist_fit.pop(cc[b] + len(self.rdm_fit))
|
|
1859
1894
|
self.transform_id_names.pop(
|
|
1860
1895
|
cc[b] + len(self.none_handler(self.fixed_fit)) + len(self.none_handler(self.rdm_fit)))
|
|
1861
|
-
|
|
1862
|
-
# self.rdm_cor_fit.pop(self.name_deleter(idx))
|
|
1896
|
+
|
|
1863
1897
|
|
|
1864
1898
|
def get_param_num(self, dispersion=0):
|
|
1865
1899
|
a = np.sum(self.get_num_params()) + \
|
|
@@ -1884,13 +1918,16 @@ class ObjectiveFunction(object):
|
|
|
1884
1918
|
elif dispersion == 4:
|
|
1885
1919
|
return 2
|
|
1886
1920
|
else:
|
|
1887
|
-
|
|
1921
|
+
if self.no_extra_param:
|
|
1922
|
+
return 0
|
|
1923
|
+
else:
|
|
1924
|
+
return 1
|
|
1888
1925
|
|
|
1889
1926
|
def get_pvalue_info_alt(self, pvalues, names, sig_value=0.05, dispersion=0, is_halton=1, delete=0,
|
|
1890
1927
|
return_violated_terms=0):
|
|
1891
1928
|
|
|
1892
1929
|
num_params = len(pvalues)
|
|
1893
|
-
Kf, Kr, Kc, Kr_b, Kchol, Kh
|
|
1930
|
+
Kf, Kr, Kc, Kr_b, Kchol, Kh = self.get_num_params()
|
|
1894
1931
|
|
|
1895
1932
|
vio_counts = 0
|
|
1896
1933
|
pvalues = np.array([float(string) for string in pvalues])
|
|
@@ -1899,6 +1936,7 @@ class ObjectiveFunction(object):
|
|
|
1899
1936
|
|
|
1900
1937
|
else:
|
|
1901
1938
|
slice_this_amount = self.num_dispersion_params(dispersion)
|
|
1939
|
+
slice_this_amount = 1 #TODO handle this
|
|
1902
1940
|
if pvalues[-1] > sig_value:
|
|
1903
1941
|
vio_counts += 1
|
|
1904
1942
|
subpvalues = pvalues[:-slice_this_amount].copy()
|
|
@@ -1915,18 +1953,14 @@ class ObjectiveFunction(object):
|
|
|
1915
1953
|
subpvalues[i] = 0
|
|
1916
1954
|
|
|
1917
1955
|
sum_k += Kr_b
|
|
1918
|
-
if Kchol > 0:
|
|
1919
|
-
cc = [i for i
|
|
1920
|
-
in range(len(self.rdm_cor_fit)) for j in range(i + 1)]
|
|
1921
|
-
|
|
1922
1956
|
lower_triangular = subpvalues[sum_k:sum_k + Kchol]
|
|
1923
1957
|
|
|
1924
|
-
|
|
1958
|
+
|
|
1925
1959
|
# initialize matrix with zeros
|
|
1926
|
-
matrix_alt = [[0] *
|
|
1960
|
+
matrix_alt = [[0] * Kc for _ in range(Kc)]
|
|
1927
1961
|
index = 0
|
|
1928
1962
|
|
|
1929
|
-
for i in range(
|
|
1963
|
+
for i in range(Kc):
|
|
1930
1964
|
for j in range(i + 1):
|
|
1931
1965
|
# fill in lower triangular entries
|
|
1932
1966
|
matrix_alt[i][j] = lower_triangular[index]
|
|
@@ -2327,7 +2361,7 @@ class ObjectiveFunction(object):
|
|
|
2327
2361
|
sorted(my_dict, key=lambda x: x[0]['pval_percentage'])
|
|
2328
2362
|
|
|
2329
2363
|
def get_fitness(self, vector, multi=False, verbose=False, max_routine=3):
|
|
2330
|
-
obj_1 = 10.0 **
|
|
2364
|
+
obj_1 = 10.0 ** 5
|
|
2331
2365
|
obj_best = None
|
|
2332
2366
|
sub_slns = list()
|
|
2333
2367
|
|
|
@@ -2338,7 +2372,7 @@ class ObjectiveFunction(object):
|
|
|
2338
2372
|
try:
|
|
2339
2373
|
self.repair(vector)
|
|
2340
2374
|
except Exception as e:
|
|
2341
|
-
print('
|
|
2375
|
+
print('prolem repairing here')
|
|
2342
2376
|
print(vector)
|
|
2343
2377
|
print(e)
|
|
2344
2378
|
layout = vector.copy()
|
|
@@ -2414,17 +2448,9 @@ class ObjectiveFunction(object):
|
|
|
2414
2448
|
|
|
2415
2449
|
if obj_1 is not None:
|
|
2416
2450
|
obj_1['layout'] = vector.copy()
|
|
2417
|
-
# alpha, alpha_rdm, alpha_cor_rdm = self.modify(
|
|
2418
|
-
# obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'])
|
|
2419
|
-
# a = self.modifyn(model_mod)
|
|
2420
|
-
# vector = self.modify_vector(
|
|
2421
|
-
# vector, alpha, alpha_rdm, alpha_cor_rdm)
|
|
2422
2451
|
sub_vector = vector[:self._characteristics]
|
|
2423
2452
|
dispersion_parm = vector[-1]
|
|
2424
|
-
|
|
2425
|
-
num_parm = sum(sub_vector)
|
|
2426
|
-
else:
|
|
2427
|
-
num_parm = sum(sub_vector) + 1
|
|
2453
|
+
|
|
2428
2454
|
|
|
2429
2455
|
if not self.is_quanitifiable_num(obj_1[self._obj_1]):
|
|
2430
2456
|
obj_1[self._obj_1] = 10 ** 9
|
|
@@ -2457,7 +2483,7 @@ class ObjectiveFunction(object):
|
|
|
2457
2483
|
|
|
2458
2484
|
self.Last_Sol = obj_1.copy()
|
|
2459
2485
|
|
|
2460
|
-
|
|
2486
|
+
|
|
2461
2487
|
|
|
2462
2488
|
self.reset_sln()
|
|
2463
2489
|
if not self.is_quanitifiable_num(obj_1[self._obj_1]):
|
|
@@ -2495,7 +2521,7 @@ class ObjectiveFunction(object):
|
|
|
2495
2521
|
self.coeff_names = None
|
|
2496
2522
|
self.draws1 = None
|
|
2497
2523
|
self.coeff_ = None
|
|
2498
|
-
|
|
2524
|
+
|
|
2499
2525
|
self.bic = None
|
|
2500
2526
|
self.log_lik = None
|
|
2501
2527
|
self.pvalues = None
|
|
@@ -2589,13 +2615,13 @@ class ObjectiveFunction(object):
|
|
|
2589
2615
|
def set_defined_seed(self, seed):
|
|
2590
2616
|
print('Benchmaking test with Seed', seed)
|
|
2591
2617
|
np.random.seed(seed)
|
|
2592
|
-
|
|
2618
|
+
|
|
2593
2619
|
random.seed(seed)
|
|
2594
2620
|
|
|
2595
2621
|
def set_random_seed(self):
|
|
2596
|
-
print('
|
|
2622
|
+
print('Imbedding Seed', self._random_seed)
|
|
2597
2623
|
np.random.seed(self._random_seed)
|
|
2598
|
-
|
|
2624
|
+
|
|
2599
2625
|
random.seed(self._random_seed)
|
|
2600
2626
|
return self._random_seed
|
|
2601
2627
|
|
|
@@ -2627,7 +2653,7 @@ class ObjectiveFunction(object):
|
|
|
2627
2653
|
self._hmcr = (
|
|
2628
2654
|
self._hmcr_min + ((self._hmcr_max - self._hmcr_min) / self._max_imp) * iteration)
|
|
2629
2655
|
|
|
2630
|
-
|
|
2656
|
+
|
|
2631
2657
|
|
|
2632
2658
|
def update_par(self, iteration, is_sin=False):
|
|
2633
2659
|
"""
|
|
@@ -2720,85 +2746,9 @@ class ObjectiveFunction(object):
|
|
|
2720
2746
|
print(e)
|
|
2721
2747
|
print('f')
|
|
2722
2748
|
|
|
2723
|
-
def negbinom_gradients(r, p, k, a=None): # TODO: delete if wrong
|
|
2724
|
-
"""_summary_
|
|
2725
2749
|
|
|
2726
|
-
Args:
|
|
2727
|
-
r (_type_): rate paramaters or dispersion of the nb
|
|
2728
|
-
p (_type_): probability
|
|
2729
|
-
k (_type_): vector of (non-negative integer) quantiles.
|
|
2730
|
-
a (_type_, optional): optional paramater, if none NB model, otherwise NB-Lindley model with Lindley paramater a.
|
|
2731
2750
|
|
|
2732
|
-
Raises:
|
|
2733
|
-
Exception: _description_
|
|
2734
|
-
Exception: _description_
|
|
2735
|
-
ValueError: _description_
|
|
2736
|
-
Exception: _description_
|
|
2737
|
-
Exception: _description_
|
|
2738
2751
|
|
|
2739
|
-
Returns:
|
|
2740
|
-
_type_: _description_
|
|
2741
|
-
"""
|
|
2742
|
-
# fine the NegBinom PMF
|
|
2743
|
-
import scipy.special as sps
|
|
2744
|
-
negbinom_pmf = sps.comb(k + r - 1, k) * p ** r * (1 - p) ** k
|
|
2745
|
-
|
|
2746
|
-
# Calculate the gradient of the NegBinom PMF with respect to r and p
|
|
2747
|
-
d_negbinom_pmf_dr = sps.comb(
|
|
2748
|
-
k + r - 1, k) * (np.log(p) - sps.digamma(r)) * p ** r * (1 - p) ** k
|
|
2749
|
-
d_negbinom_pmf_dp = sps.comb(
|
|
2750
|
-
k + r - 1, k) * (r / p - (k + r) / (1 - p)) * p ** r * (1 - p) ** k
|
|
2751
|
-
|
|
2752
|
-
if a is not None:
|
|
2753
|
-
# Define the NegBinom-Lindley PMF
|
|
2754
|
-
negbinom_lindley_pmf = sps.comb(a + k - 1, k) * p ** r * (1 - p) ** k
|
|
2755
|
-
|
|
2756
|
-
# Calculate the gradient of the NegBinom-Lindley PMF with respect to r, p, and a
|
|
2757
|
-
d_negbinom_lindley_pmf_dr = sps.comb(
|
|
2758
|
-
a + k - 1, k) * (np.log(p) * p ** r * (1 - p) ** k)
|
|
2759
|
-
d_negbinom_lindley_pmf_dp = sps.comb(
|
|
2760
|
-
a + k - 1, k) * ((r / p - (k + r) / (1 - p)) * p ** r * (1 - p) ** k)
|
|
2761
|
-
d_negbinom_lindley_pmf_da = sps.comb(
|
|
2762
|
-
a + k - 1, k) * (-sps.digamma(a) + sps.digamma(a + k)) * p ** r * (1 - p) ** k
|
|
2763
|
-
|
|
2764
|
-
return [d_negbinom_pmf_dr, d_negbinom_pmf_dp], [d_negbinom_lindley_pmf_dr, d_negbinom_lindley_pmf_dp,
|
|
2765
|
-
d_negbinom_lindley_pmf_da]
|
|
2766
|
-
else:
|
|
2767
|
-
return [d_negbinom_pmf_dr, d_negbinom_pmf_dp]
|
|
2768
|
-
|
|
2769
|
-
def f(self, x, N, sig, mu):
|
|
2770
|
-
return norm.pdf(x, 0, 1) * poisson.pmf(N, np.exp(x * sig + mu))
|
|
2771
|
-
|
|
2772
|
-
def poilog(self, n, mu, sig):
|
|
2773
|
-
from scipy import integrate
|
|
2774
|
-
if len(mu) > 1 or len(sig) > 1:
|
|
2775
|
-
raise ValueError(
|
|
2776
|
-
"vectorization of mu and sig is currently not implemented")
|
|
2777
|
-
if any((n[n != 0] / np.trunc(n[n != 0])) != 1):
|
|
2778
|
-
raise ValueError("all n must be integers")
|
|
2779
|
-
if any(n < 0):
|
|
2780
|
-
raise ValueError("one or several values of n are negative")
|
|
2781
|
-
if not np.all(np.isfinite(np.concatenate((mu, sig)))):
|
|
2782
|
-
raise ValueError("all parameters should be finite")
|
|
2783
|
-
if sig <= 0:
|
|
2784
|
-
raise ValueError("sig is not larger than 0")
|
|
2785
|
-
spos = np.where(n < 8)[0]
|
|
2786
|
-
lpos = np.where(n >= 8)[0]
|
|
2787
|
-
val = np.empty_like(n)
|
|
2788
|
-
|
|
2789
|
-
if spos.size > 0:
|
|
2790
|
-
vali = np.empty(spos.size)
|
|
2791
|
-
for i in range(spos.size):
|
|
2792
|
-
try:
|
|
2793
|
-
vali[i] = integrate.quad(
|
|
2794
|
-
self.f, -np.inf, np.inf, sig, mu, args=(n[spos[i]],))[0]
|
|
2795
|
-
except:
|
|
2796
|
-
vali[i] = 1e-300
|
|
2797
|
-
valp = self.poilog(n[spos], mu, sig ** 2)[0]
|
|
2798
|
-
val[spos] = np.maximum(vali, valp)
|
|
2799
|
-
if lpos.size > 0:
|
|
2800
|
-
val[lpos] = self.poilog(n[lpos], mu, sig ** 2)[0]
|
|
2801
|
-
return val
|
|
2802
2752
|
|
|
2803
2753
|
def negbinom_pmf(self, r, p, k, a=None): # TODO: delete if wrong
|
|
2804
2754
|
"""_summary_
|
|
@@ -2828,45 +2778,7 @@ class ObjectiveFunction(object):
|
|
|
2828
2778
|
negbinom_lindley_pmf = sc.comb(a + k - 1, k) * p ** r * (1 - p) ** k
|
|
2829
2779
|
return negbinom_lindley_pmf
|
|
2830
2780
|
|
|
2831
|
-
def nbl_score(self, y, X, betas, alpha, theta):
|
|
2832
|
-
from scipy.special import gammaln, psi
|
|
2833
|
-
"""
|
|
2834
|
-
Calculate the Negative Binomial-lindley model score vector of the log-likelihood.
|
|
2835
|
-
|
|
2836
|
-
Parameters:
|
|
2837
|
-
-----------
|
|
2838
|
-
y : numpy array
|
|
2839
|
-
The dependent variable of the model.
|
|
2840
|
-
X : numpy array
|
|
2841
|
-
The independent variables of the model.
|
|
2842
|
-
beta : numpy array
|
|
2843
|
-
The coefficients of the model.
|
|
2844
|
-
alpha : float
|
|
2845
|
-
The dispersion parameter of the Negative Binomial-lindley distribution.
|
|
2846
|
-
theta : float
|
|
2847
|
-
The theta parameter of the Negative Binomial-lindley distribution.
|
|
2848
|
-
|
|
2849
|
-
Returns:
|
|
2850
|
-
--------
|
|
2851
|
-
score : numpy array
|
|
2852
|
-
The score vector of the Negative Binomial-lindley model log-likelihood.
|
|
2853
|
-
"""
|
|
2854
|
-
alpha = betas[-1]
|
|
2855
|
-
theta = betas[-2]
|
|
2856
|
-
beta = betas[:-2]
|
|
2857
|
-
zi = self.my_lindley(y, theta).ravel()
|
|
2858
|
-
|
|
2859
|
-
eta = np.dot(X, beta)
|
|
2860
|
-
mu = np.exp(eta) * zi
|
|
2861
|
-
p = 1 / (1 + mu * theta / alpha)
|
|
2862
|
-
q = 1 - p
|
|
2863
|
-
score = np.zeros(len(betas))
|
|
2864
|
-
|
|
2865
|
-
for i in range(len(y)):
|
|
2866
|
-
score += (psi(y[i] + zi[i] * p[i]) - psi(alpha * p[i]) + np.log(zi[i])
|
|
2867
|
-
- np.log(1 + zi * mu[i] / alpha)) * X[i, :]
|
|
2868
2781
|
|
|
2869
|
-
return score
|
|
2870
2782
|
|
|
2871
2783
|
def poisson_lognormal_glm_score(self, betas, Y, X, sigma, tau=1e-6):
|
|
2872
2784
|
"""
|
|
@@ -2909,7 +2821,7 @@ class ObjectiveFunction(object):
|
|
|
2909
2821
|
grad_n = np.concatenate((grad_n_sub, grad_n_sub1), axis=1)
|
|
2910
2822
|
return gradient, grad_n
|
|
2911
2823
|
|
|
2912
|
-
def NB_Score(self, params, y, mu, X, Q=0, obs_specific=False):
|
|
2824
|
+
def NB_Score(self, params, y, mu, X, Q=0, obs_specific=False, alpha = None):
|
|
2913
2825
|
"""
|
|
2914
2826
|
Negative Binomial model score (gradient) vector of the log-likelihood
|
|
2915
2827
|
Parameters
|
|
@@ -2929,9 +2841,43 @@ class ObjectiveFunction(object):
|
|
|
2929
2841
|
|
|
2930
2842
|
"""
|
|
2931
2843
|
|
|
2932
|
-
|
|
2844
|
+
# Calculate common terms
|
|
2845
|
+
'''
|
|
2846
|
+
n = len(y)
|
|
2847
|
+
n, p, d = X.shape # n: observations, p: panels (1 in your case), d: explanatory variables
|
|
2848
|
+
|
|
2849
|
+
# Flatten the data since there's only one panel, simplifying the operations
|
|
2850
|
+
X_flat = X.reshape(n * p, d)
|
|
2851
|
+
y_flat = y.flatten()
|
|
2852
|
+
mu_flat = mu.flatten()
|
|
2853
|
+
|
|
2854
|
+
# Prepare score array
|
|
2855
|
+
score = np.zeros(d + 1) # +1 for alpha
|
|
2856
|
+
|
|
2857
|
+
# Compute the gradient for regression coefficients
|
|
2858
|
+
for j in range(d): # Exclude the last parameter (alpha)
|
|
2859
|
+
score[j] = np.dot(X_flat[:, j], (y_flat - mu_flat))
|
|
2933
2860
|
|
|
2934
|
-
|
|
2861
|
+
# Compute the gradient for the dispersion parameter
|
|
2862
|
+
if obs_specific:
|
|
2863
|
+
# Adjust the calculation if observation-specific effects are considered
|
|
2864
|
+
sum_terms = (y_flat - mu_flat) ** 2 / (mu_flat + alpha * mu_flat ** 2) - (
|
|
2865
|
+
y_flat + mu_flat ** 2 / (mu_flat + alpha * mu_flat ** 2))
|
|
2866
|
+
score[-1] = np.sum(sum_terms)
|
|
2867
|
+
else:
|
|
2868
|
+
# Standard calculation
|
|
2869
|
+
sum_terms = (y_flat - mu_flat) ** 2 / (mu_flat + alpha * mu_flat ** 2) - (
|
|
2870
|
+
y_flat + mu_flat ** 2 / (mu_flat + alpha * mu_flat ** 2))
|
|
2871
|
+
score[-1] = np.sum(sum_terms)
|
|
2872
|
+
return score
|
|
2873
|
+
'''
|
|
2874
|
+
#return score
|
|
2875
|
+
|
|
2876
|
+
try:
|
|
2877
|
+
if alpha is None:
|
|
2878
|
+
alpha = np.exp(params[-1])
|
|
2879
|
+
else:
|
|
2880
|
+
alpha = np.exp(params[-1])
|
|
2935
2881
|
a1 = 1 / alpha * mu ** Q
|
|
2936
2882
|
prob = a1 / (a1 + mu)
|
|
2937
2883
|
exog = X
|
|
@@ -2973,7 +2919,8 @@ class ObjectiveFunction(object):
|
|
|
2973
2919
|
return np.concatenate((dparams, dalpha),
|
|
2974
2920
|
axis=1)
|
|
2975
2921
|
except Exception as e:
|
|
2976
|
-
print(
|
|
2922
|
+
print(e)
|
|
2923
|
+
print('NB score exception problem..')
|
|
2977
2924
|
exc_type, exc_obj, exc_tb = sys.exc_info()
|
|
2978
2925
|
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
|
|
2979
2926
|
print(exc_type, fname, exc_tb.tb_lineno)
|
|
@@ -3066,7 +3013,7 @@ class ObjectiveFunction(object):
|
|
|
3066
3013
|
argument = prob.mean(axis=1)
|
|
3067
3014
|
# if less than 0 penalise
|
|
3068
3015
|
if np.min(argument) < 0:
|
|
3069
|
-
print('
|
|
3016
|
+
print('Error with args..')
|
|
3070
3017
|
if np.min(argument) < limit:
|
|
3071
3018
|
# add a penalty for too small argument of log
|
|
3072
3019
|
log_lik += -np.sum(np.minimum(0.0, argument - limit)) / limit
|
|
@@ -3557,6 +3504,7 @@ class ObjectiveFunction(object):
|
|
|
3557
3504
|
else:
|
|
3558
3505
|
corr_pairs = list(itertools.combinations(corr_indices, 2))
|
|
3559
3506
|
|
|
3507
|
+
|
|
3560
3508
|
for ii, corr_pair in enumerate(corr_pairs):
|
|
3561
3509
|
# lower cholesky matrix
|
|
3562
3510
|
chol_mat_temp[tuple(reversed(corr_pair))] = chol[chol_count]
|
|
@@ -3584,7 +3532,7 @@ class ObjectiveFunction(object):
|
|
|
3584
3532
|
a = 0
|
|
3585
3533
|
b = 0
|
|
3586
3534
|
stuff = []
|
|
3587
|
-
#
|
|
3535
|
+
# TODO get order
|
|
3588
3536
|
for j, i in enumerate(list_sizes):
|
|
3589
3537
|
br_mean = betas_hetro[a:i + a]
|
|
3590
3538
|
a += i
|
|
@@ -3611,7 +3559,30 @@ class ObjectiveFunction(object):
|
|
|
3611
3559
|
br_mean = betas_m
|
|
3612
3560
|
br_sd = betas_sd # Last Kr positions
|
|
3613
3561
|
# Compute: betas = mean + sd*draws
|
|
3614
|
-
|
|
3562
|
+
if len(br_sd) != draws.shape[1]:
|
|
3563
|
+
#get the same size as the mean
|
|
3564
|
+
betas_random = self.Br.copy()
|
|
3565
|
+
|
|
3566
|
+
'''
|
|
3567
|
+
c = self.get_num_params()[3:5]
|
|
3568
|
+
|
|
3569
|
+
cor = []
|
|
3570
|
+
for i in range(c[0]):
|
|
3571
|
+
cor.append(i)
|
|
3572
|
+
|
|
3573
|
+
vall =[]
|
|
3574
|
+
for i, val in enumerate(reversed(br_sd)):
|
|
3575
|
+
vall.append()
|
|
3576
|
+
|
|
3577
|
+
remaining = draws.shape[1] - len(betas_sd)
|
|
3578
|
+
'''
|
|
3579
|
+
|
|
3580
|
+
else:
|
|
3581
|
+
|
|
3582
|
+
|
|
3583
|
+
betas_random = br_mean[None, :, None] + draws * br_sd[None, :, None]
|
|
3584
|
+
|
|
3585
|
+
|
|
3615
3586
|
betas_random = self._apply_distribution(betas_random)
|
|
3616
3587
|
|
|
3617
3588
|
return betas_random
|
|
@@ -3630,28 +3601,71 @@ class ObjectiveFunction(object):
|
|
|
3630
3601
|
# if gamma <= 0.01: #min defined value for stable nb
|
|
3631
3602
|
# gamma = 0.01
|
|
3632
3603
|
|
|
3604
|
+
#g = stats.gamma.rvs(gamma, scale = lam/gamma, size = 1.0 / gamma * lam ** Q )
|
|
3605
|
+
|
|
3606
|
+
#gg = stats.poisson.rvs(g)
|
|
3607
|
+
|
|
3608
|
+
|
|
3609
|
+
|
|
3610
|
+
|
|
3633
3611
|
endog = y
|
|
3634
3612
|
mu = lam
|
|
3613
|
+
''''
|
|
3614
|
+
mu = lam*np.exp(gamma) #TODO check that this does not need to be multiplied
|
|
3615
|
+
alpha = np.exp(gamma)
|
|
3616
|
+
|
|
3617
|
+
'''
|
|
3635
3618
|
alpha = gamma
|
|
3636
3619
|
size = 1.0 / alpha * mu ** Q
|
|
3637
|
-
|
|
3638
|
-
|
|
3639
|
-
|
|
3640
|
-
|
|
3620
|
+
|
|
3621
|
+
prob = size/(size+mu)
|
|
3622
|
+
|
|
3623
|
+
|
|
3624
|
+
|
|
3625
|
+
'''test'''
|
|
3626
|
+
|
|
3627
|
+
|
|
3628
|
+
'''
|
|
3629
|
+
size = 1 / np.exp(gamma) * mu ** 0
|
|
3630
|
+
prob = size / (size + mu)
|
|
3631
|
+
coeff = (gammaln(size + y) - gammaln(y + 1) -
|
|
3632
|
+
gammaln(size))
|
|
3633
|
+
llf = coeff + size * np.log(prob) + y * np.log(1 - prob)
|
|
3634
|
+
'''
|
|
3635
|
+
|
|
3641
3636
|
try:
|
|
3642
3637
|
# print(np.shape(y),np.shape(size), np.shape(prob))
|
|
3643
|
-
#
|
|
3638
|
+
#gg2 = self.negbinom_pmf(alpha_size, size/(size+mu), y)
|
|
3639
|
+
#import time
|
|
3640
|
+
#start_time = time.time()
|
|
3641
|
+
|
|
3642
|
+
|
|
3643
|
+
# Measure time for negbinom_pmf
|
|
3644
|
+
#start_time = time.time()
|
|
3645
|
+
#for _ in range(10000):
|
|
3646
|
+
|
|
3644
3647
|
|
|
3648
|
+
#end_time = time.time()
|
|
3649
|
+
#print("Custom functieon time:", end_time - start_time)
|
|
3650
|
+
#start_time = time.time()
|
|
3651
|
+
#for _ in range(10000):
|
|
3652
|
+
'''
|
|
3645
3653
|
gg = np.exp(
|
|
3646
3654
|
gammaln(y + alpha) - gammaln(y + 1) - gammaln(alpha) + y * np.log(mu) + alpha * np.log(alpha) - (
|
|
3647
3655
|
y + alpha) * np.log(mu + alpha))
|
|
3648
|
-
|
|
3649
|
-
|
|
3650
|
-
|
|
3656
|
+
gg[np.isnan(gg)] = 1
|
|
3657
|
+
'''
|
|
3658
|
+
gg_alt = nbinom.pmf(y ,1/alpha, prob)
|
|
3659
|
+
#gg_alt_2 = (gammaln(size + y) - gammaln(y + 1) -
|
|
3660
|
+
#gammaln(size)) + size * np.log(prob) + y * np.log(1 - prob)
|
|
3661
|
+
#print('check theses')
|
|
3662
|
+
#gg = nbinom.pmf(y ,alpha, prob)
|
|
3663
|
+
#end_time = time.time()
|
|
3664
|
+
#print("Custom functieon time:", end_time - start_time)
|
|
3651
3665
|
|
|
3652
3666
|
except Exception as e:
|
|
3653
|
-
print(
|
|
3654
|
-
return
|
|
3667
|
+
print("Neg Binom error.")
|
|
3668
|
+
return gg_alt
|
|
3655
3669
|
|
|
3656
3670
|
def lindley_pmf(self, x, r, theta, k=50):
|
|
3657
3671
|
"""
|
|
@@ -3718,7 +3732,7 @@ class ObjectiveFunction(object):
|
|
|
3718
3732
|
|
|
3719
3733
|
endog = y
|
|
3720
3734
|
mu = lam
|
|
3721
|
-
alpha = gamma
|
|
3735
|
+
alpha = np.exp(gamma)
|
|
3722
3736
|
alpha = alpha * mu ** Q
|
|
3723
3737
|
size = 1 / alpha * mu ** Q # also r
|
|
3724
3738
|
# self.rate_param = size
|
|
@@ -3798,21 +3812,8 @@ class ObjectiveFunction(object):
|
|
|
3798
3812
|
|
|
3799
3813
|
if dispersion == 1 or dispersion == 4: # nb
|
|
3800
3814
|
# if model_nature is not None and 'dispersion_penalty' in model_nature:
|
|
3801
|
-
#
|
|
3802
|
-
#
|
|
3803
|
-
|
|
3804
|
-
# if abs(b_gam) < 0.01:
|
|
3805
|
-
# penalty += 1/np.abs(b_gam)
|
|
3806
|
-
|
|
3807
|
-
if b_gam >= 4.5:
|
|
3808
|
-
penalty += b_gam
|
|
3809
|
-
b_gam = 4.61
|
|
3810
|
-
# b_gam = 7.9
|
|
3811
|
-
# penalty += model_nature['dispersion_penalty'] -b_gam
|
|
3812
|
-
# penalty += 1/np.max((0.01,abs(b_gam)))
|
|
3813
|
-
# b_gam = model_nature['dispersion_penalty']
|
|
3814
|
-
|
|
3815
|
-
"""
|
|
3815
|
+
#b_gam = 1/np.exp(b_gam)
|
|
3816
|
+
#print(b_gam)
|
|
3816
3817
|
if b_gam <= 0:
|
|
3817
3818
|
#penalty += 100
|
|
3818
3819
|
#penalty += abs(b_gam)
|
|
@@ -3820,21 +3821,21 @@ class ObjectiveFunction(object):
|
|
|
3820
3821
|
#b_gam = 1
|
|
3821
3822
|
|
|
3822
3823
|
# if b_gam < 0.03:
|
|
3823
|
-
penalty +=
|
|
3824
|
+
penalty += min(1, np.abs(b_gam), 0)
|
|
3824
3825
|
|
|
3825
|
-
b_gam = 0.
|
|
3826
|
+
#b_gam = 0.001
|
|
3826
3827
|
#
|
|
3827
3828
|
|
|
3828
|
-
if b_gam >= 10:
|
|
3829
|
-
|
|
3829
|
+
#if b_gam >= 10:
|
|
3830
|
+
# penalty+= b_gam
|
|
3830
3831
|
|
|
3831
|
-
|
|
3832
|
-
b_gam = min_comp_val
|
|
3832
|
+
# if b_gam == 0:
|
|
3833
|
+
#b_gam = min_comp_val
|
|
3833
3834
|
#b_gam = 0.03
|
|
3834
3835
|
|
|
3835
|
-
|
|
3836
|
+
# b_gam = abs(b_gam)
|
|
3836
3837
|
|
|
3837
|
-
|
|
3838
|
+
|
|
3838
3839
|
|
|
3839
3840
|
elif dispersion == 2:
|
|
3840
3841
|
if b_gam >= 1:
|
|
@@ -3854,8 +3855,15 @@ class ObjectiveFunction(object):
|
|
|
3854
3855
|
def eXB_calc(self, params_main, Xd, offset, dispersion, b_gam=None):
|
|
3855
3856
|
|
|
3856
3857
|
# print('this was 0')
|
|
3857
|
-
|
|
3858
|
+
if dispersion:
|
|
3859
|
+
eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
|
|
3860
|
+
|
|
3861
|
+
#eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])+dispersion
|
|
3862
|
+
#print('check if this holds size')
|
|
3863
|
+
else:
|
|
3864
|
+
eta = np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
|
|
3858
3865
|
eta = np.array(eta)
|
|
3866
|
+
|
|
3859
3867
|
# eta = np.float64(eta)
|
|
3860
3868
|
# eta = np.dot(Xd, params_main)+offset[:,:,0]
|
|
3861
3869
|
# eta2 = np.dot(Xd, params_main)[:,:,None]+np.array(offset[:,:,:])
|
|
@@ -3874,7 +3882,7 @@ class ObjectiveFunction(object):
|
|
|
3874
3882
|
|
|
3875
3883
|
else:
|
|
3876
3884
|
# eVd = self.my_lindley(np.exp(np.clip(eta, None, EXP_UPPER_LIMIT)), 1.29)
|
|
3877
|
-
|
|
3885
|
+
eta = eta.astype('float')
|
|
3878
3886
|
eVd = np.exp(np.clip(eta, None, EXP_UPPER_LIMIT))
|
|
3879
3887
|
return eVd
|
|
3880
3888
|
|
|
@@ -3918,195 +3926,7 @@ class ObjectiveFunction(object):
|
|
|
3918
3926
|
# np.sum(stats.poisson.logpmf(endog, np.exp(XB)))
|
|
3919
3927
|
return -np.exp(XB) + y * XB - sc.gammaln(y + 1)
|
|
3920
3928
|
|
|
3921
|
-
def loglik_zi(params, return_grad=False):
|
|
3922
|
-
"""
|
|
3923
|
-
Loglikelihood for observations of Generic Zero Inflated model.
|
|
3924
|
-
|
|
3925
|
-
Parameters
|
|
3926
|
-
----------
|
|
3927
|
-
params : array_like
|
|
3928
|
-
The parameters of the model.
|
|
3929
|
-
|
|
3930
|
-
Returns
|
|
3931
|
-
-------
|
|
3932
|
-
loglike : ndarray
|
|
3933
|
-
The log likelihood for each observation of the model evaluated
|
|
3934
|
-
at `params`. See Notes for definition.
|
|
3935
|
-
|
|
3936
|
-
Notes
|
|
3937
|
-
-----
|
|
3938
|
-
.. math:: \\ln L=\\ln(w_{i}+(1-w_{i})*P_{main\\_model})+
|
|
3939
|
-
\\ln(1-w_{i})+L_{main\\_model}
|
|
3940
|
-
where P - pdf of main model, L - loglike function of main model.
|
|
3941
|
-
|
|
3942
|
-
for observations :math:`i=1,...,n`
|
|
3943
|
-
"""
|
|
3944
|
-
params_infl = params[:self.k_inflate]
|
|
3945
|
-
params_main = params[self.k_inflate:]
|
|
3946
|
-
|
|
3947
|
-
y = self.endog
|
|
3948
|
-
w = predict_logit(params_infl, exog_infl)
|
|
3949
|
-
|
|
3950
|
-
w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
|
|
3951
|
-
llf_main = loglik_obs_poisson(params_main, y)
|
|
3952
|
-
dispersion = 0
|
|
3953
|
-
b_gam = None
|
|
3954
|
-
Xd = exog
|
|
3955
|
-
eta = np.tile(np.dot(Xd, params_main), (1, 1)).transpose()
|
|
3956
|
-
eVd = np.exp(np.clip(eta, None, EXP_UPPER_LIMIT))
|
|
3957
|
-
|
|
3958
|
-
llf_main_og = self.loglik_obs(y, eVd.ravel(), dispersion, b_gam)
|
|
3959
|
-
zero_idx = np.nonzero(y == 0)[0]
|
|
3960
|
-
nonzero_idx = np.nonzero(y)[0] # type: ignore
|
|
3961
|
-
|
|
3962
|
-
llf = np.zeros_like(y, dtype=np.float64)
|
|
3963
|
-
llf[zero_idx] = (np.log(w[zero_idx] +
|
|
3964
|
-
(1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
|
|
3965
|
-
llf[nonzero_idx] = np.log(
|
|
3966
|
-
1 - w[nonzero_idx]) + llf_main[nonzero_idx]
|
|
3967
|
-
if return_grad:
|
|
3968
|
-
score_main = Xd.T @ (y - eVd.ravel())
|
|
3969
|
-
L = np.exp(np.dot(Xd, params_main))
|
|
3970
|
-
score_main = (self.endog - L)[:, None] * Xd
|
|
3971
|
-
|
|
3972
|
-
dldp = np.zeros(
|
|
3973
|
-
(exog.shape[0], len(params_main)), dtype=np.float64)
|
|
3974
|
-
dldw = np.zeros_like(exog_infl, dtype=np.float64)
|
|
3975
|
-
|
|
3976
|
-
dldp[zero_idx, :] = (score_main[zero_idx].T *
|
|
3977
|
-
(1 - (w[zero_idx]) / np.exp(llf[zero_idx]))).T
|
|
3978
|
-
dldp[nonzero_idx, :] = score_main[nonzero_idx]
|
|
3979
|
-
|
|
3980
|
-
dldw[zero_idx, :] = (exog_infl[zero_idx].T * w[zero_idx] *
|
|
3981
|
-
(1 - w[zero_idx]) *
|
|
3982
|
-
(1 - np.exp(llf_main[zero_idx])) /
|
|
3983
|
-
np.exp(llf[zero_idx])).T
|
|
3984
|
-
dldw[nonzero_idx, :] = -(exog_infl[nonzero_idx].T *
|
|
3985
|
-
w[nonzero_idx]).T
|
|
3986
|
-
|
|
3987
|
-
return llf, np.hstack((dldw, dldp)).sum(axis=0)
|
|
3988
|
-
|
|
3989
|
-
else:
|
|
3990
|
-
|
|
3991
|
-
return llf
|
|
3992
|
-
|
|
3993
|
-
def zipoisson_logpmf(x, mu, w):
|
|
3994
|
-
return _lazywhere(x != 0, (x, mu, w),
|
|
3995
|
-
(lambda x, mu, w: np.log(1. - w) + x * np.log(mu) -
|
|
3996
|
-
sc.gammaln(x + 1.) - mu),
|
|
3997
|
-
np.log(w + (1. - w) * np.exp(-mu)))
|
|
3998
|
-
|
|
3999
|
-
def zipoisson_pmf(x, mu, w):
|
|
4000
|
-
return np.exp(zipoisson_logpmf(x, mu, w))
|
|
4001
|
-
|
|
4002
|
-
def loglik_logit(params, endog_y, exog_x): # this is predict I think
|
|
4003
|
-
q = 2 * endog_y - 1
|
|
4004
|
-
X = exog_x
|
|
4005
|
-
return np.sum(np.log(cdf(q * np.dot(X, params))))
|
|
4006
|
-
|
|
4007
|
-
def predict_logit(params, exog=None, linear=False):
|
|
4008
|
-
if exog is None:
|
|
4009
|
-
exog = self.exog
|
|
4010
|
-
if not linear:
|
|
4011
|
-
return (cdf(np.dot(exog, params)))
|
|
4012
|
-
else:
|
|
4013
|
-
return (np.dot(exog, params))
|
|
4014
|
-
|
|
4015
|
-
def cdf(X):
|
|
4016
|
-
"""
|
|
4017
|
-
The logistic cumulative distribution function
|
|
4018
|
-
|
|
4019
|
-
Parameters
|
|
4020
|
-
----------
|
|
4021
|
-
X : array_like
|
|
4022
|
-
`X` is the linear predictor of the logit model. See notes.
|
|
4023
|
-
|
|
4024
|
-
Returns
|
|
4025
|
-
-------
|
|
4026
|
-
1/(1 + exp(-X))
|
|
4027
|
-
|
|
4028
|
-
Notes
|
|
4029
|
-
-----
|
|
4030
|
-
In the logit model,
|
|
4031
|
-
|
|
4032
|
-
.. math:: \\Lambda\\left(x^{\\prime}\\beta\\right)=
|
|
4033
|
-
\\text{Prob}\\left(Y=1|x\\right)=
|
|
4034
|
-
\\frac{e^{x^{\\prime}\\beta}}{1+e^{x^{\\prime}\\beta}}
|
|
4035
|
-
"""
|
|
4036
|
-
X = np.asarray(X)
|
|
4037
|
-
return 1 / (1 + np.exp(-X))
|
|
4038
|
-
|
|
4039
|
-
llobs, grad = loglik_zi(betas, return_grad)
|
|
4040
|
-
llf = np.sum(llobs)
|
|
4041
|
-
if return_grad:
|
|
4042
|
-
return -llf, -grad
|
|
4043
|
-
else:
|
|
4044
|
-
return -llf
|
|
4045
|
-
|
|
4046
|
-
def cdf_logit(self, X):
|
|
4047
|
-
"""
|
|
4048
|
-
The logistic cumulative distribution function
|
|
4049
|
-
|
|
4050
|
-
Parameters
|
|
4051
|
-
----------
|
|
4052
|
-
X : array_like
|
|
4053
|
-
`X` is the linear predictor of the logit model. See notes.
|
|
4054
|
-
|
|
4055
|
-
Returns
|
|
4056
|
-
-------
|
|
4057
|
-
1/(1 + exp(-X))
|
|
4058
|
-
|
|
4059
|
-
Notes
|
|
4060
|
-
-----
|
|
4061
|
-
In the logit model,
|
|
4062
|
-
|
|
4063
|
-
.. math:: \\Lambda\\left(x^{\\prime}\\beta\\right)=
|
|
4064
|
-
\\text{Prob}\\left(Y=1|x\\right)=
|
|
4065
|
-
\\frac{e^{x^{\\prime}\\beta}}{1+e^{x^{\\prime}\\beta}}
|
|
4066
|
-
"""
|
|
4067
|
-
X = np.asarray(X)
|
|
4068
|
-
return 1 / (1 + np.exp(-X))
|
|
4069
|
-
|
|
4070
|
-
def predict_logit_part(self, params_infl, exog_infl, linear=False):
|
|
4071
|
-
|
|
4072
|
-
if not linear:
|
|
4073
|
-
return (self.cdf_logit(np.dot(exog_infl, params_infl)))
|
|
4074
|
-
else:
|
|
4075
|
-
return (np.dot(exog_infl, params_infl))
|
|
4076
|
-
|
|
4077
|
-
def ZeroInflate_W_setup(self, exog_infl, y, params_infl):
|
|
4078
|
-
|
|
4079
|
-
w = self.predict_logit_part(params_infl, exog_infl)
|
|
4080
|
-
|
|
4081
|
-
w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
|
|
4082
|
-
|
|
4083
|
-
llf_main = [1, 2, 3] # TODO ge
|
|
4084
|
-
zero_idx = np.nonzero(y == 0)[0]
|
|
4085
|
-
nonzero_idx = np.nonzero(y)[0]
|
|
4086
|
-
|
|
4087
|
-
llf = np.zeros_like(y, dtype=np.float64)
|
|
4088
|
-
llf[zero_idx] = (np.log(w[zero_idx] +
|
|
4089
|
-
(1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
|
|
4090
|
-
llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
|
|
4091
|
-
|
|
4092
|
-
return llf
|
|
4093
|
-
|
|
4094
|
-
def dPXL(self, x, alpha):
|
|
4095
|
-
return ((alpha ** 2) * (x + 3 * alpha + (alpha ** 2) + 3)) / (1 + alpha) ** (4 + x)
|
|
4096
|
-
|
|
4097
|
-
# Define the gradient function
|
|
4098
3929
|
|
|
4099
|
-
def poisson_lindley_gradient(self, params, exog, endog):
|
|
4100
|
-
beta = params[-1]
|
|
4101
|
-
mu = np.exp(np.dot(exog, params[:-1]))
|
|
4102
|
-
q = beta / (1 + beta)
|
|
4103
|
-
d_beta = (endog.ravel() + 1) / (mu + endog.ravel() + 1) - q / (1 - q)
|
|
4104
|
-
d_beta = self.dpoisl(endog, beta).ravel()
|
|
4105
|
-
d_mu = np.dot((endog - mu) * (1 - q) / (mu + endog + 1), exog)
|
|
4106
|
-
|
|
4107
|
-
grad_n = np.concatenate((d_mu, np.atleast_2d(d_beta).T), axis=1)
|
|
4108
|
-
der = np.sum(grad_n, axis=0)
|
|
4109
|
-
return der, grad_n
|
|
4110
3930
|
|
|
4111
3931
|
def dpoisl(self, x, theta, log=False):
|
|
4112
3932
|
# if theta < 0:
|
|
@@ -4175,29 +3995,18 @@ class ObjectiveFunction(object):
|
|
|
4175
3995
|
elif dispersion == 1:
|
|
4176
3996
|
|
|
4177
3997
|
proba_r = self._nonlog_nbin(y, eVd, b_gam)
|
|
4178
|
-
|
|
3998
|
+
|
|
3999
|
+
|
|
4179
4000
|
# proba_d = self.dnegbimonli(y, eVd, b_gam )
|
|
4180
|
-
|
|
4001
|
+
|
|
4181
4002
|
|
|
4182
4003
|
elif dispersion == 2:
|
|
4183
4004
|
|
|
4184
4005
|
proba_r = self.general_poisson_pmf(eVd, y, b_gam)
|
|
4185
4006
|
|
|
4186
|
-
elif dispersion == 3:
|
|
4187
|
-
fa, ba = self.get_dispersion_paramaters(betas, dispersion)
|
|
4188
|
-
zi = self.my_lindley(y, ba)
|
|
4189
|
-
proba_r = poisson.pmf(y, zi * eVd.ravel())
|
|
4190
|
-
# proba_r = self.lindl_pmf_chatgpt(y, l_pam)
|
|
4191
|
-
# prob_2 = self.dpoisl(y, l_pam)
|
|
4192
|
-
# proba_r = self.poisson_lindley_pmf(eVd, l_pam, y)
|
|
4193
|
-
# proba_r = self.poisson_lindley_pmf(eVd, l_pam, y)
|
|
4194
|
-
# print(1)
|
|
4195
|
-
# proba_r = self.dpoisl(y, eVd)
|
|
4196
4007
|
|
|
4197
|
-
|
|
4198
|
-
|
|
4199
|
-
self.zi = self.my_lindley(eVd, ba)
|
|
4200
|
-
proba_r = self._nonlog_nbin(y, eVd + self.zi, b_gam)
|
|
4008
|
+
|
|
4009
|
+
|
|
4201
4010
|
# proba_r = self._nonlog_nbin_lindley(y, eVd, fa, ba)
|
|
4202
4011
|
|
|
4203
4012
|
elif dispersion == 'poisson_lognormal':
|
|
@@ -4210,7 +4019,7 @@ class ObjectiveFunction(object):
|
|
|
4210
4019
|
# proba_r = self.poisson_lognormal_pmf(y, eVd, sig)
|
|
4211
4020
|
proba_r = np.array(store)
|
|
4212
4021
|
proba_r = np.atleast_2d(proba_r).T
|
|
4213
|
-
|
|
4022
|
+
|
|
4214
4023
|
|
|
4215
4024
|
else:
|
|
4216
4025
|
raise Exception('not implemented other modeling forms')
|
|
@@ -4219,7 +4028,7 @@ class ObjectiveFunction(object):
|
|
|
4219
4028
|
proba_p = self._prob_product_across_panels(
|
|
4220
4029
|
proba_r, self.panel_info)
|
|
4221
4030
|
proba_r = proba_p
|
|
4222
|
-
proba_r = np.clip(proba_r, min_comp_val,
|
|
4031
|
+
proba_r = np.clip(proba_r, min_comp_val, max_comp_val)
|
|
4223
4032
|
loglik = np.log(proba_r)
|
|
4224
4033
|
return loglik
|
|
4225
4034
|
|
|
@@ -4227,6 +4036,8 @@ class ObjectiveFunction(object):
|
|
|
4227
4036
|
if dispersion == 0 or dispersion == 3:
|
|
4228
4037
|
return 0
|
|
4229
4038
|
else:
|
|
4039
|
+
|
|
4040
|
+
|
|
4230
4041
|
return 1
|
|
4231
4042
|
|
|
4232
4043
|
def _prob_product_across_panels(self, pch, panel_info):
|
|
@@ -4267,7 +4078,7 @@ class ObjectiveFunction(object):
|
|
|
4267
4078
|
# if (len(betas) -Kf-Kr-self.is_dispersion(dispersion)) != (Kchol + Kr):
|
|
4268
4079
|
|
|
4269
4080
|
# gr_f, gr_u, gr_s = np.zeros((N, Kf)), np.zeros((N, Kr)), np.zeros((N, len(betas) -Kf-Kr-self.is_dispersion(dispersion))) #FIX
|
|
4270
|
-
Kf2, Kr, Kc, Kr_b, Kchol, Kh
|
|
4081
|
+
Kf2, Kr, Kc, Kr_b, Kchol, Kh = self.get_num_params()
|
|
4271
4082
|
|
|
4272
4083
|
gr_f, gr_u, gr_s = np.zeros((N, Kf)), np.zeros(
|
|
4273
4084
|
(N, Kr + Kc)), np.zeros((N, Kchol + Kr_b))
|
|
@@ -4282,7 +4093,7 @@ class ObjectiveFunction(object):
|
|
|
4282
4093
|
if y[i] == 0:
|
|
4283
4094
|
gr_e[i] = 0
|
|
4284
4095
|
|
|
4285
|
-
if self.is_dispersion(dispersion):
|
|
4096
|
+
if self.is_dispersion(dispersion) and not self.no_extra_param:
|
|
4286
4097
|
gr_d = np.zeros((N, 1))
|
|
4287
4098
|
if dispersion == 1:
|
|
4288
4099
|
# trying alt
|
|
@@ -4386,12 +4197,13 @@ class ObjectiveFunction(object):
|
|
|
4386
4197
|
br, draws_, brstd, dis_fit_long) # (N,K,R)
|
|
4387
4198
|
dprod_r = dev.np.einsum("njk,njr -> nkr", Xdr,
|
|
4388
4199
|
einsum_model_form, dtype=np.float64) # (N,K,R)
|
|
4389
|
-
der_prod_r = dprod_r * der * proba_n[:, None, :] # (N,K,R)
|
|
4390
|
-
der_prod_r = dprod_r * der * proba_n[:, X_tril_idx, :] # I think this is the case check
|
|
4391
|
-
|
|
4392
|
-
|
|
4200
|
+
#der_prod_r = dprod_r * der * proba_n[:, None, :] # (N,K,R)
|
|
4201
|
+
#der_prod_r = dprod_r * der * proba_n[:, X_tril_idx, :] # I think this is the case check
|
|
4202
|
+
|
|
4203
|
+
der_prod_r = dprod_r * der * proba_n[:, None, :] # or this one
|
|
4204
|
+
|
|
4393
4205
|
der_t = self._compute_derivatives(
|
|
4394
|
-
br, draws_[:, draws_tril_idx, :], brstd, self.dist_fit) # (N,K,R)
|
|
4206
|
+
br[draws_tril_idx], draws_[:, draws_tril_idx, :], brstd, np.array(self.dist_fit)[draws_tril_idx]) # (N,K,R)
|
|
4395
4207
|
# er_t = self._compute_derivatives(br, draws_, brstd[:, draws_tril_idx,: ], self.dist_fit, draws_tril_idx)
|
|
4396
4208
|
der_prod_r_t = dprod_r[:, draws_tril_idx, :] * \
|
|
4397
4209
|
der_t * proba_n[:, None, :] # (N,K,R)
|
|
@@ -4452,14 +4264,18 @@ class ObjectiveFunction(object):
|
|
|
4452
4264
|
grad_n = self._concat_gradients(
|
|
4453
4265
|
(gr_f, gr_u, gr_s, gr_e)) / Rlik # (N,K)
|
|
4454
4266
|
else:
|
|
4455
|
-
|
|
4456
|
-
|
|
4457
|
-
|
|
4458
|
-
|
|
4267
|
+
if self.no_extra_param:
|
|
4268
|
+
grad_n = self._concat_gradients(
|
|
4269
|
+
(gr_f, gr_u, gr_s, gr_h, gr_hs)) / Rlik # (N,K)
|
|
4270
|
+
else:
|
|
4271
|
+
grad_n = self._concat_gradients(
|
|
4272
|
+
(gr_f, gr_u, gr_s, gr_h, gr_hs, gr_d[:, None])) / Rlik # (N,K)
|
|
4273
|
+
grad_n = np.nan_to_num(grad_n, nan=0, posinf=1000, neginf=-1000)
|
|
4274
|
+
grad_n = np.clip(grad_n, -100, 100)
|
|
4459
4275
|
n = np.shape(grad_n)[0]
|
|
4460
4276
|
# subtract out mean gradient value
|
|
4461
|
-
|
|
4462
|
-
|
|
4277
|
+
grad_n_sub = grad_n-(np.sum(grad_n, axis=0)/n)
|
|
4278
|
+
grad_n = grad_n_sub
|
|
4463
4279
|
grad = grad_n.sum(axis=0)
|
|
4464
4280
|
return grad, grad_n
|
|
4465
4281
|
|
|
@@ -4521,9 +4337,9 @@ class ObjectiveFunction(object):
|
|
|
4521
4337
|
|
|
4522
4338
|
elif dispersion == 1:
|
|
4523
4339
|
|
|
4524
|
-
der =
|
|
4340
|
+
der = self.NB_Score(betas, y, eVd, Xd, 0, obs_specific)
|
|
4525
4341
|
if both:
|
|
4526
|
-
grad_n =
|
|
4342
|
+
grad_n = self.NB_Score(betas, y, eVd, Xd, 0, True)
|
|
4527
4343
|
return np.nan_to_num(der, nan=200, posinf=200, neginf=-200), np.nan_to_num(grad_n, nan=140, posinf=140,
|
|
4528
4344
|
neginf=-140)
|
|
4529
4345
|
|
|
@@ -4610,7 +4426,7 @@ class ObjectiveFunction(object):
|
|
|
4610
4426
|
return proba_r.sum(axis=1), np.squeeze(proba_r)
|
|
4611
4427
|
|
|
4612
4428
|
def _penalty_betas(self, betas, dispersion, penalty, penalty_ap=100.0):
|
|
4613
|
-
penalty_val = 0.
|
|
4429
|
+
penalty_val = 0.1
|
|
4614
4430
|
penalty_val_max = 130
|
|
4615
4431
|
|
|
4616
4432
|
# print('change_later')
|
|
@@ -4626,8 +4442,8 @@ class ObjectiveFunction(object):
|
|
|
4626
4442
|
if abs(i) > penalty_val_max:
|
|
4627
4443
|
penalty += abs(i)
|
|
4628
4444
|
|
|
4629
|
-
#
|
|
4630
|
-
#
|
|
4445
|
+
#if abs(i) < penalty_val:
|
|
4446
|
+
# penalty += 5
|
|
4631
4447
|
|
|
4632
4448
|
# penalty = 0
|
|
4633
4449
|
return penalty
|
|
@@ -4716,8 +4532,10 @@ class ObjectiveFunction(object):
|
|
|
4716
4532
|
return self._loglik_gradient(self, betas, *stuff)
|
|
4717
4533
|
|
|
4718
4534
|
def get_br_and_bstd(betas, self):
|
|
4719
|
-
Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh
|
|
4720
|
-
|
|
4535
|
+
Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh = self.get_num_params()
|
|
4536
|
+
Kr = Kr_a + Kr_c #todo check if this works
|
|
4537
|
+
print('check if this works')
|
|
4538
|
+
br = betas[Kf_a:Kf_a + Kr]
|
|
4721
4539
|
# Calculate the size of the br matrix
|
|
4722
4540
|
br_size = int((1 + np.sqrt(1 + 8 * Kr_b_a)) / 2)
|
|
4723
4541
|
|
|
@@ -4728,12 +4546,11 @@ class ObjectiveFunction(object):
|
|
|
4728
4546
|
index = 0
|
|
4729
4547
|
for i in range(br_size):
|
|
4730
4548
|
for j in range(i, br_size):
|
|
4731
|
-
br_std[j, i] = betas[
|
|
4549
|
+
br_std[j, i] = betas[Kf_a + Kr + index]
|
|
4732
4550
|
index += 1
|
|
4733
4551
|
|
|
4734
4552
|
brstd = br_std
|
|
4735
|
-
|
|
4736
|
-
print(brstd)
|
|
4553
|
+
|
|
4737
4554
|
|
|
4738
4555
|
|
|
4739
4556
|
def _loglik_gradient(self, betas, Xd, y, draws=None, Xf=None, Xr=None, batch_size=None, return_gradient=False,
|
|
@@ -4765,9 +4582,9 @@ class ObjectiveFunction(object):
|
|
|
4765
4582
|
penalty = self._penalty_betas(
|
|
4766
4583
|
betas, dispersion, penalty, float(len(y) / 10.0))
|
|
4767
4584
|
self.n_obs = len(y) # feeds into gradient
|
|
4768
|
-
if draws is None and draws_grouped is None and (
|
|
4585
|
+
if draws is None and draws_grouped is None and (model_nature is None or
|
|
4769
4586
|
'draws_hetro' not in model_nature or model_nature.get('draws_hetro').shape[1] == 0):
|
|
4770
|
-
|
|
4587
|
+
#TODO do i shuffle the draws
|
|
4771
4588
|
if type(Xd) == dict:
|
|
4772
4589
|
N, Kf, P = 0, 0, 0
|
|
4773
4590
|
for key in Xd:
|
|
@@ -4775,13 +4592,13 @@ class ObjectiveFunction(object):
|
|
|
4775
4592
|
P += Xd[key].shape[1]
|
|
4776
4593
|
Kf += Xd[key].shape[2]
|
|
4777
4594
|
else:
|
|
4778
|
-
self.naming_for_printing(betas, 1, dispersion,
|
|
4595
|
+
self.naming_for_printing(betas, 1, dispersion, model_nature=model_nature)
|
|
4779
4596
|
N, P, Kf = Xd.shape[0], Xd.shape[1], Xd.shape[2]
|
|
4780
4597
|
betas = np.array(betas)
|
|
4781
4598
|
Bf = betas[0:Kf] # Fixed betas
|
|
4782
4599
|
|
|
4783
4600
|
main_disper, lindley_disp = self.get_dispersion_paramaters(
|
|
4784
|
-
betas, dispersion)
|
|
4601
|
+
betas, dispersion) #todo fix this up
|
|
4785
4602
|
if lindley_disp is not None:
|
|
4786
4603
|
if lindley_disp <= 0:
|
|
4787
4604
|
penalty += 1
|
|
@@ -4805,36 +4622,20 @@ class ObjectiveFunction(object):
|
|
|
4805
4622
|
llf_main = self.loglik_obs(
|
|
4806
4623
|
y, eVd, dispersion, main_disper, lindley_disp, betas)
|
|
4807
4624
|
|
|
4808
|
-
|
|
4625
|
+
llf_main = np.clip(llf_main, log_lik_min, log_lik_max)
|
|
4809
4626
|
|
|
4810
4627
|
loglik = llf_main.sum()
|
|
4811
|
-
if 'exog_infl' in model_nature:
|
|
4812
|
-
params_infl = betas[Kf:Kf + len(model_nature.get('exog_infl'))]
|
|
4813
|
-
params_main = Bf
|
|
4814
|
-
# ones = np.ones((model_nature.get('exog_inflX').shape[0], model_nature.get('exog_inflX').shape[1], 1))
|
|
4815
|
-
# exog_infl = np.concatenate((ones, model_nature.get('exog_inflX')), axis =2 )
|
|
4816
|
-
exog_infl = model_nature.get('exog_inflX')
|
|
4817
|
-
llf_main = llf_main # TODO test this
|
|
4818
|
-
w = self.predict_logit_part(params_infl, exog_infl)
|
|
4819
|
-
|
|
4820
|
-
w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
|
|
4821
|
-
|
|
4822
|
-
zero_idx = np.nonzero(y == 0)[0]
|
|
4823
|
-
nonzero_idx = np.nonzero(y)[0] # FIXME should shape be unravelled
|
|
4824
4628
|
|
|
4825
|
-
llf = np.zeros_like(y, dtype=np.float64).reshape(-1, 1) # TODO test this i added ravel to this code
|
|
4826
|
-
llf[zero_idx] = (np.log(w[zero_idx] + (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
|
|
4827
|
-
llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
|
|
4828
|
-
loglik = llf.sum()
|
|
4829
4629
|
|
|
4830
4630
|
loglik = np.clip(loglik, log_lik_min, log_lik_max)
|
|
4831
4631
|
if self.power_up_ll:
|
|
4832
4632
|
|
|
4833
4633
|
loglik += 2*loglik
|
|
4634
|
+
print('am i powering up')
|
|
4834
4635
|
penalty = self.regularise_l2(betas)
|
|
4835
4636
|
|
|
4836
4637
|
if not np.isreal(loglik):
|
|
4837
|
-
loglik = -
|
|
4638
|
+
loglik = - 10000000.0
|
|
4838
4639
|
|
|
4839
4640
|
output = (-loglik + penalty,)
|
|
4840
4641
|
if return_gradient:
|
|
@@ -4842,16 +4643,21 @@ class ObjectiveFunction(object):
|
|
|
4842
4643
|
if return_gradient_n:
|
|
4843
4644
|
der, grad_n = self.simple_score_grad(
|
|
4844
4645
|
betas, y, eVd, Xd, dispersion, both=True)
|
|
4845
|
-
return (-loglik + penalty, -der, grad_n)
|
|
4646
|
+
#return (-loglik + penalty, -der, grad_n)*self.minimize_scaler
|
|
4647
|
+
scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -der.ravel(), grad_n))
|
|
4648
|
+
return scaled_tuple
|
|
4846
4649
|
else:
|
|
4847
4650
|
der = self.simple_score_grad(
|
|
4848
4651
|
betas, y, eVd, Xd, dispersion, both=False)
|
|
4849
|
-
|
|
4850
|
-
|
|
4652
|
+
scaled_tuple = tuple(
|
|
4653
|
+
x * self.minimize_scaler for x in (-loglik + penalty, -der.ravel()))
|
|
4654
|
+
return scaled_tuple
|
|
4655
|
+
#return (-loglik + penalty, -der.ravel())*self.minimize_scaler
|
|
4851
4656
|
else:
|
|
4852
|
-
|
|
4657
|
+
|
|
4658
|
+
return (-loglik + penalty)*self.minimize_scaler
|
|
4853
4659
|
# Else, we have draws
|
|
4854
|
-
self.n_obs = len(y) * self.Ndraws
|
|
4660
|
+
self.n_obs = len(y) * self.Ndraws #todo is this problematic
|
|
4855
4661
|
penalty += self._penalty_betas(
|
|
4856
4662
|
betas, dispersion, penalty, float(len(y) / 10.0))
|
|
4857
4663
|
|
|
@@ -4860,7 +4666,7 @@ class ObjectiveFunction(object):
|
|
|
4860
4666
|
# Kf =0
|
|
4861
4667
|
betas = np.array(betas)
|
|
4862
4668
|
betas = dev.to_gpu(betas) # TODO fix mepotnetially problem
|
|
4863
|
-
self.naming_for_printing(betas, 0, dispersion,
|
|
4669
|
+
self.naming_for_printing(betas, 0, dispersion, model_nature=model_nature)
|
|
4864
4670
|
y = dev.to_gpu(y)
|
|
4865
4671
|
if draws is not None and draws_grouped is not None:
|
|
4866
4672
|
draws = np.concatenate((draws_grouped, draws), axis=1)
|
|
@@ -4908,7 +4714,7 @@ class ObjectiveFunction(object):
|
|
|
4908
4714
|
# if (Kchol +Kr) != (len(betas) -Kf-Kr -self.is_dispersion(dispersion)):
|
|
4909
4715
|
# print('I think this is fine')
|
|
4910
4716
|
n_coeff = self.get_param_num(dispersion)
|
|
4911
|
-
Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh
|
|
4717
|
+
Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh = self.get_num_params()
|
|
4912
4718
|
if Kchol_a != Kchol:
|
|
4913
4719
|
print('hold')
|
|
4914
4720
|
|
|
@@ -4923,7 +4729,9 @@ class ObjectiveFunction(object):
|
|
|
4923
4729
|
Kf = 0
|
|
4924
4730
|
else:
|
|
4925
4731
|
if n_coeff != len(betas):
|
|
4926
|
-
raise Exception
|
|
4732
|
+
raise Exception(
|
|
4733
|
+
|
|
4734
|
+
)
|
|
4927
4735
|
Bf = betas[0:Kf] # Fixed betas
|
|
4928
4736
|
|
|
4929
4737
|
|
|
@@ -4949,11 +4757,11 @@ class ObjectiveFunction(object):
|
|
|
4949
4757
|
# brstd), draws_) # Get random coefficients, old method
|
|
4950
4758
|
Br = self._transform_rand_betas(br,
|
|
4951
4759
|
brstd, draws_) # Get random coefficients
|
|
4952
|
-
self.naming_for_printing(betas, dispersion=dispersion,
|
|
4760
|
+
self.naming_for_printing(betas, dispersion=dispersion, model_nature=model_nature)
|
|
4953
4761
|
self.Br = Br.copy()
|
|
4954
4762
|
|
|
4955
4763
|
else:
|
|
4956
|
-
self.naming_for_printing(betas, dispersion=dispersion,
|
|
4764
|
+
self.naming_for_printing(betas, dispersion=dispersion, model_nature=model_nature)
|
|
4957
4765
|
chol_mat = self._chol_mat(
|
|
4958
4766
|
len(self.rdm_cor_fit), br, brstd, self.rdm_cor_fit)
|
|
4959
4767
|
self.chol_mat = chol_mat.copy()
|
|
@@ -5025,7 +4833,8 @@ class ObjectiveFunction(object):
|
|
|
5025
4833
|
eVd = self.lam_transform(eVd, dispersion, betas[-1])
|
|
5026
4834
|
|
|
5027
4835
|
if self.is_dispersion(dispersion):
|
|
5028
|
-
|
|
4836
|
+
if not self.no_extra_param:
|
|
4837
|
+
penalty, betas[-1] = self._penalty_dispersion(
|
|
5029
4838
|
dispersion, betas[-1], eVd, y, penalty, model_nature)
|
|
5030
4839
|
|
|
5031
4840
|
'''
|
|
@@ -5069,38 +4878,22 @@ class ObjectiveFunction(object):
|
|
|
5069
4878
|
proba.append(dev.to_cpu(proba_))
|
|
5070
4879
|
|
|
5071
4880
|
lik = np.stack(proba).sum(axis=0) / R # (N, )
|
|
5072
|
-
lik = np.clip(lik, min_comp_val,
|
|
4881
|
+
lik = np.clip(lik, min_comp_val, max_comp_val)
|
|
5073
4882
|
# lik = np.nan_to_num(lik, )
|
|
5074
4883
|
loglik = np.log(lik)
|
|
5075
4884
|
llf_main = loglik
|
|
5076
|
-
if 'exog_infl' in model_nature:
|
|
5077
|
-
params_infl = betas[Kf:Kf + len(model_nature.get('exog_infl'))]
|
|
5078
|
-
params_main = Bf
|
|
5079
|
-
exog_infl = model_nature.get('exog_inflX')
|
|
5080
|
-
llf_main = llf_main.ravel() # TODO test this
|
|
5081
|
-
w = self.predict_logit_part(params_infl, exog_infl)
|
|
5082
|
-
|
|
5083
|
-
w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
|
|
5084
|
-
|
|
5085
|
-
zero_idx = np.nonzero(y == 0)[0]
|
|
5086
|
-
nonzero_idx = np.nonzero(y)[0] # FIXME should shape be unravelled
|
|
5087
|
-
|
|
5088
|
-
llf = np.zeros_like(y, dtype=np.float64).reshape(-1, 1) # TODO test this i added ravel to this code
|
|
5089
|
-
llf[zero_idx] = (np.log(w[zero_idx] + (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
|
|
5090
|
-
llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
|
|
5091
|
-
loglik = llf.sum()
|
|
5092
|
-
else:
|
|
5093
4885
|
|
|
5094
|
-
|
|
4886
|
+
|
|
4887
|
+
loglik = loglik.sum()
|
|
5095
4888
|
|
|
5096
4889
|
loglik = np.clip(loglik, log_lik_min, log_lik_max)
|
|
5097
4890
|
if self.power_up_ll:
|
|
5098
4891
|
penalty += self.regularise_l2(betas)
|
|
5099
|
-
|
|
4892
|
+
|
|
5100
4893
|
penalty += self.regularise_l2(betas)
|
|
5101
4894
|
if not return_gradient:
|
|
5102
4895
|
|
|
5103
|
-
output = (-loglik + penalty,)
|
|
4896
|
+
output = ((-loglik + penalty)*self.minimize_scaler,)
|
|
5104
4897
|
if verbose > 1:
|
|
5105
4898
|
print(
|
|
5106
4899
|
f"Evaluation {self.total_fun_eval} Log-Lik.={-loglik:.2f}")
|
|
@@ -5130,19 +4923,24 @@ class ObjectiveFunction(object):
|
|
|
5130
4923
|
# Hinv = np.linalg.inv(H)
|
|
5131
4924
|
# except Exception:
|
|
5132
4925
|
# Hinv = np.linalg.pinv(H)
|
|
5133
|
-
|
|
4926
|
+
scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -grad, grad_n))
|
|
4927
|
+
return scaled_tuple
|
|
4928
|
+
#output = (-loglik + penalty, -grad, grad_n)*self.minimize_scaler
|
|
5134
4929
|
|
|
5135
|
-
return output
|
|
4930
|
+
#return output
|
|
5136
4931
|
else:
|
|
4932
|
+
scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -grad))
|
|
4933
|
+
return scaled_tuple
|
|
4934
|
+
#output = (-loglik + penalty, -grad)*self.minimize_scaler
|
|
5137
4935
|
|
|
5138
|
-
output
|
|
5139
|
-
|
|
5140
|
-
return output
|
|
4936
|
+
#return output
|
|
5141
4937
|
except Exception as e:
|
|
5142
4938
|
traceback.print_exc()
|
|
5143
4939
|
print(e)
|
|
5144
4940
|
|
|
5145
|
-
|
|
4941
|
+
def minimize_function(self, loglike):
|
|
4942
|
+
r'Takes the logliklihood function and tranforms it to a more handed minimization function'
|
|
4943
|
+
return loglike/self.n_obs
|
|
5146
4944
|
def print_chol_mat(self, betas):
|
|
5147
4945
|
print(self.chol_mat)
|
|
5148
4946
|
self.get_br_and_bstd(betas)
|
|
@@ -5378,12 +5176,16 @@ class ObjectiveFunction(object):
|
|
|
5378
5176
|
return H
|
|
5379
5177
|
|
|
5380
5178
|
def _minimize(self, loglik_fn, x, args, method, tol, options, bounds=None, hess_calc=None):
|
|
5381
|
-
|
|
5179
|
+
#method = 'BFGS'
|
|
5382
5180
|
if method == "BFGS":
|
|
5383
5181
|
|
|
5384
5182
|
try:
|
|
5183
|
+
argbs = list(args)
|
|
5385
5184
|
|
|
5386
|
-
|
|
5185
|
+
argbs[7] = True
|
|
5186
|
+
argsb = tuple(argbs)
|
|
5187
|
+
a = self._bfgs(loglik_fn, x, args=argsb, tol=tol, **options)
|
|
5188
|
+
return self._bfgs(loglik_fn, x, args=args, tol=tol, **options)
|
|
5387
5189
|
|
|
5388
5190
|
except:
|
|
5389
5191
|
return minimize(loglik_fn, x, args=args, jac=args[6], method='BFGS', tol=tol, options=options)
|
|
@@ -5406,7 +5208,7 @@ class ObjectiveFunction(object):
|
|
|
5406
5208
|
H = self.numerical_hessian(lambda x: self._loglik_gradient(x, *argbs), result.x, eps=1e-7 * self.n_obs)
|
|
5407
5209
|
result['Hessian'] = H
|
|
5408
5210
|
result['hess_inv'] = np.linalg.pinv(H)
|
|
5409
|
-
|
|
5211
|
+
|
|
5410
5212
|
standard_errors = np.sqrt(np.diag(np.linalg.pinv(H)))
|
|
5411
5213
|
return result
|
|
5412
5214
|
# return minimize(loglik_fn, x, args=args, jac=args[6], hess=args[7], method='BFGS', options= {'gtol':1e-7*self.N}*self.Ndraws)
|
|
@@ -5630,7 +5432,7 @@ class ObjectiveFunction(object):
|
|
|
5630
5432
|
if self.power_up_ll:
|
|
5631
5433
|
loglikelihood =-optim_res['fun']/2 - penalty
|
|
5632
5434
|
else:
|
|
5633
|
-
loglikelihood = -optim_res['fun'] - penalty
|
|
5435
|
+
loglikelihood = -optim_res['fun']/self.minimize_scaler - penalty
|
|
5634
5436
|
|
|
5635
5437
|
# self.coeff_names = coeff_names
|
|
5636
5438
|
# self.total_iter = optim_res['nit']
|
|
@@ -5677,7 +5479,7 @@ class ObjectiveFunction(object):
|
|
|
5677
5479
|
return a
|
|
5678
5480
|
|
|
5679
5481
|
def fitRegression(self, mod,
|
|
5680
|
-
dispersion=0, maxiter=2000, batch_size=None, num_hess=False):
|
|
5482
|
+
dispersion=0, maxiter=2000, batch_size=None, num_hess=False, **kwargs):
|
|
5681
5483
|
|
|
5682
5484
|
"""
|
|
5683
5485
|
Fits a poisson regression given data and outcomes if dispersion is not declared
|
|
@@ -5689,12 +5491,12 @@ class ObjectiveFunction(object):
|
|
|
5689
5491
|
betas_est - array. Coefficients which maximize the negative log-liklihood.
|
|
5690
5492
|
"""
|
|
5691
5493
|
# Set defualt method
|
|
5692
|
-
|
|
5693
|
-
|
|
5694
|
-
|
|
5494
|
+
#TODO, the inital fit worked but it throws
|
|
5495
|
+
|
|
5496
|
+
|
|
5695
5497
|
|
|
5696
5498
|
sol = Solution()
|
|
5697
|
-
|
|
5499
|
+
|
|
5698
5500
|
tol = {'ftol': 1e-8, 'gtol': 1e-6}
|
|
5699
5501
|
is_delete = 0
|
|
5700
5502
|
dispersion = mod.get('dispersion')
|
|
@@ -5706,10 +5508,7 @@ class ObjectiveFunction(object):
|
|
|
5706
5508
|
if self.hess_yes == False:
|
|
5707
5509
|
method2 = 'BFGS_2'
|
|
5708
5510
|
method2 = self.method_ll
|
|
5709
|
-
# method2 = 'BFGS_2'
|
|
5710
5511
|
|
|
5711
|
-
# method2 = 'BFGS_2'
|
|
5712
|
-
# method2 = 'dogleg'
|
|
5713
5512
|
bic = None
|
|
5714
5513
|
pvalue_alt = None
|
|
5715
5514
|
zvalues = None
|
|
@@ -5726,8 +5525,10 @@ class ObjectiveFunction(object):
|
|
|
5726
5525
|
_g, pg, kg = 0, 0, 0
|
|
5727
5526
|
|
|
5728
5527
|
dispersion_param_num = self.is_dispersion(dispersion)
|
|
5528
|
+
if self.no_extra_param:
|
|
5529
|
+
dispersion_param_num =0
|
|
5729
5530
|
|
|
5730
|
-
paramNum = self.get_param_num(dispersion)
|
|
5531
|
+
#paramNum = self.get_param_num(dispersion)
|
|
5731
5532
|
self.no_random_paramaters = 0
|
|
5732
5533
|
if 'XG' in mod:
|
|
5733
5534
|
XX = np.concatenate((mod.get('X'), mod.get('XG'), mod.get('Xr'), mod.get('XH')), axis=2)
|
|
@@ -5753,7 +5554,7 @@ class ObjectiveFunction(object):
|
|
|
5753
5554
|
XX_test = mod.get('Xr_test')
|
|
5754
5555
|
|
|
5755
5556
|
bb = np.random.uniform(
|
|
5756
|
-
-0.05, 0.05, size=k + kr + kg + kh + dispersion_param_num
|
|
5557
|
+
-0.05, 0.05, size=k + kr + kg + kh + dispersion_param_num)
|
|
5757
5558
|
|
|
5758
5559
|
if method == 'L-BFGS-B':
|
|
5759
5560
|
if dispersion == 0:
|
|
@@ -5780,17 +5581,28 @@ class ObjectiveFunction(object):
|
|
|
5780
5581
|
else:
|
|
5781
5582
|
bb[0] = self.constant_value
|
|
5782
5583
|
if dispersion == 1:
|
|
5783
|
-
|
|
5584
|
+
if not self.no_extra_param:
|
|
5585
|
+
bb[-1] = self.negative_binomial_value
|
|
5784
5586
|
bounds = None
|
|
5785
5587
|
|
|
5588
|
+
|
|
5589
|
+
|
|
5786
5590
|
# intial_beta = minimize(self._loglik_gradient, bb, args =(XX, y, None, None, None, None, calc_gradient, hess_est, dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod), method = 'nelder-mead', options={'gtol': 1e-7*len(XX)})
|
|
5787
5591
|
hess_est = False if method2 in ['L-BFGS-B', 'BFGS_2', 'Nelder-Mead-BFGS'] else True
|
|
5788
|
-
|
|
5592
|
+
|
|
5593
|
+
if self.no_extra_param:
|
|
5594
|
+
dispersion_poisson = 0
|
|
5595
|
+
initial_beta = self._minimize(self._loglik_gradient, bb,
|
|
5789
5596
|
args=(XX, y, None, None, None, None, calc_gradient, hess_est,
|
|
5790
|
-
|
|
5597
|
+
dispersion_poisson, 0, False, 0, None, None, None, None, None,
|
|
5791
5598
|
mod),
|
|
5792
5599
|
method=method2, tol=1e-5, options={'gtol': tol['gtol']},
|
|
5793
5600
|
bounds=bounds)
|
|
5601
|
+
if dispersion:
|
|
5602
|
+
nb_parma = self.poisson_mean_get_dispersion(initial_beta.x, XX, y)
|
|
5603
|
+
|
|
5604
|
+
|
|
5605
|
+
|
|
5794
5606
|
|
|
5795
5607
|
if method2 == 'L-BFGS-B':
|
|
5796
5608
|
if hasattr(initial_beta.hess_inv, 'todense'):
|
|
@@ -5803,7 +5615,7 @@ class ObjectiveFunction(object):
|
|
|
5803
5615
|
if initial_beta is not None and np.isnan(initial_beta['fun']):
|
|
5804
5616
|
initial_beta = self._minimize(self._loglik_gradient, bb,
|
|
5805
5617
|
args=(XX, y, None, None, None, None, True, True, dispersion,
|
|
5806
|
-
0, False, 0, None,
|
|
5618
|
+
0, False, 0, None, None, None, None, None, mod),
|
|
5807
5619
|
method=method2, tol=tol['ftol'], options={'gtol': tol['gtol']})
|
|
5808
5620
|
|
|
5809
5621
|
if initial_beta is not None and not np.isnan(initial_beta['fun']):
|
|
@@ -5827,24 +5639,24 @@ class ObjectiveFunction(object):
|
|
|
5827
5639
|
loglik=log_ll_fixed, num_parm=paramNum, GOF=other_measures)
|
|
5828
5640
|
|
|
5829
5641
|
self.naming_for_printing(
|
|
5830
|
-
initial_beta['x'], 1, dispersion,
|
|
5642
|
+
initial_beta['x'], 1, dispersion, model_nature=mod)
|
|
5831
5643
|
|
|
5832
5644
|
if self.is_multi:
|
|
5833
5645
|
in_sample_mae = self.validation(
|
|
5834
5646
|
initial_beta['x'], mod.get('y'), mod.get('X'), dispersion=dispersion,
|
|
5835
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
|
5647
|
+
rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0,
|
|
5836
5648
|
testing=0)
|
|
5837
5649
|
|
|
5838
5650
|
sol.add_objective(TRAIN=in_sample_mae)
|
|
5839
5651
|
MAE_out = self.validation(
|
|
5840
5652
|
initial_beta['x'], mod.get('y_test'), mod.get('X_test'), dispersion=dispersion,
|
|
5841
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
|
5653
|
+
rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0)
|
|
5842
5654
|
sol.add_objective(TEST=MAE_out)
|
|
5843
5655
|
|
|
5844
5656
|
if self.val_percentage >0:
|
|
5845
5657
|
MAE_VAL = self.validation(
|
|
5846
5658
|
initial_beta['x'], mod.get('y_test'), mod.get('X_test'), dispersion=dispersion,
|
|
5847
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
|
5659
|
+
rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0,
|
|
5848
5660
|
validation=1)
|
|
5849
5661
|
sol.add_objective(VAL=MAE_VAL)
|
|
5850
5662
|
if sol[self._obj_1] <= self.best_obj_1:
|
|
@@ -5888,7 +5700,7 @@ class ObjectiveFunction(object):
|
|
|
5888
5700
|
|
|
5889
5701
|
b = [b[i] if i > len(self.none_handler(self.fixed_fit)) + len(
|
|
5890
5702
|
self.none_handler(self.rdm_fit)) + len(
|
|
5891
|
-
self.none_handler(self.rdm_cor_fit)) else b[i] / 1
|
|
5703
|
+
self.none_handler(self.rdm_cor_fit)) else b[i] / 1 for i in range(len(b))]
|
|
5892
5704
|
else:
|
|
5893
5705
|
b = bb
|
|
5894
5706
|
|
|
@@ -5898,14 +5710,15 @@ class ObjectiveFunction(object):
|
|
|
5898
5710
|
else:
|
|
5899
5711
|
b = np.insert(b, -1, np.random.uniform(0.05, 0.1))
|
|
5900
5712
|
if dispersion == 1:
|
|
5901
|
-
|
|
5902
|
-
|
|
5903
|
-
b[-1]
|
|
5713
|
+
if not self.no_extra_param:
|
|
5714
|
+
b[-1] = np.abs(b[-1])
|
|
5715
|
+
if b[-1] > 10:
|
|
5716
|
+
b[-1] = 5
|
|
5904
5717
|
elif dispersion == 2:
|
|
5905
5718
|
b[-1] = .5
|
|
5906
5719
|
if method == 'L-BFGS-B' or method2 == 'L-BFGS-B':
|
|
5907
5720
|
|
|
5908
|
-
Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh
|
|
5721
|
+
Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh= self.get_num_params()
|
|
5909
5722
|
if Kh > 0:
|
|
5910
5723
|
Kh_e = mod.get('XH').shape[-1]
|
|
5911
5724
|
Kh_range = Kh - Kh_e
|
|
@@ -5949,9 +5762,6 @@ class ObjectiveFunction(object):
|
|
|
5949
5762
|
|
|
5950
5763
|
bounds.append((np.random.uniform(0.05, .15), bob2[count] + 7))
|
|
5951
5764
|
count += 1
|
|
5952
|
-
|
|
5953
|
-
|
|
5954
|
-
|
|
5955
5765
|
elif ii < jj:
|
|
5956
5766
|
if bob2[count] > 0:
|
|
5957
5767
|
|
|
@@ -6024,18 +5834,35 @@ class ObjectiveFunction(object):
|
|
|
6024
5834
|
mod['dispersion_penalty'] = np.abs(b[-1])
|
|
6025
5835
|
grad_args = (
|
|
6026
5836
|
X, y, draws, X, Xr, self.batch_size, False, False, dispersion, 0, False, 0, self.rdm_cor_fit,
|
|
6027
|
-
|
|
5837
|
+
None, None, draws_grouped, XG, mod)
|
|
6028
5838
|
# self.gradients_est_yes = (1, 1)
|
|
6029
5839
|
|
|
6030
5840
|
if draws is None and draws_hetro is not None:
|
|
6031
5841
|
print('hold')
|
|
6032
|
-
|
|
6033
|
-
|
|
6034
|
-
self.rdm_cor_fit, self.zi_fit, exog_infl, draws_grouped, XG, mod),
|
|
6035
|
-
method=method2, tol=tol['ftol'],
|
|
6036
|
-
options={'gtol': tol['gtol']}, bounds=bounds,
|
|
6037
|
-
hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
|
|
5842
|
+
#self.grad_yes = True
|
|
5843
|
+
#self.hess_yes = True
|
|
6038
5844
|
|
|
5845
|
+
if self.no_extra_param:
|
|
5846
|
+
dispersion_poisson = 0
|
|
5847
|
+
betas_est = self._minimize(self._loglik_gradient, b, args=(
|
|
5848
|
+
X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion_poisson, 0, False, 0,
|
|
5849
|
+
self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
|
|
5850
|
+
method=method2, tol=tol['ftol'],
|
|
5851
|
+
options={'gtol': tol['gtol']}, bounds=bounds,
|
|
5852
|
+
hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
|
|
5853
|
+
if dispersion:
|
|
5854
|
+
initial_fit_beta = betas_est.x
|
|
5855
|
+
parmas = np.append(initial_fit_beta, nb_parma)
|
|
5856
|
+
self.nb_parma = nb_parma
|
|
5857
|
+
#print(f'neg binomi,{self.nb_parma}')
|
|
5858
|
+
betas_est = self._minimize(self._loglik_gradient, initial_fit_beta, args=(
|
|
5859
|
+
X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
|
|
5860
|
+
self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
|
|
5861
|
+
method=method2, tol=tol['ftol'],
|
|
5862
|
+
options={'gtol': tol['gtol']}, bounds=bounds,
|
|
5863
|
+
hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
|
|
5864
|
+
|
|
5865
|
+
#print('refit with estimation of NB')
|
|
6039
5866
|
# self.numerical_hessian_calc = True
|
|
6040
5867
|
if self.numerical_hessian_calc:
|
|
6041
5868
|
try:
|
|
@@ -6050,7 +5877,7 @@ class ObjectiveFunction(object):
|
|
|
6050
5877
|
betas_est = self._minimize(self._loglik_gradient, b, args=(
|
|
6051
5878
|
X, y, draws, X, Xr, self.batch_size, False, False, dispersion, 0, False, 0,
|
|
6052
5879
|
self.rdm_cor_fit,
|
|
6053
|
-
|
|
5880
|
+
None, None, draws_grouped, XG, mod),
|
|
6054
5881
|
method=method2, tol=tol['ftol'],
|
|
6055
5882
|
options={'gtol': tol['gtol']})
|
|
6056
5883
|
|
|
@@ -6059,7 +5886,7 @@ class ObjectiveFunction(object):
|
|
|
6059
5886
|
|
|
6060
5887
|
if np.isfinite(betas_est['fun']):
|
|
6061
5888
|
self.naming_for_printing(
|
|
6062
|
-
betas_est['x'], 0, dispersion,
|
|
5889
|
+
betas_est['x'], 0, dispersion, model_nature=mod)
|
|
6063
5890
|
|
|
6064
5891
|
if method2 == 'L-BFGS-B':
|
|
6065
5892
|
|
|
@@ -6086,7 +5913,7 @@ class ObjectiveFunction(object):
|
|
|
6086
5913
|
|
|
6087
5914
|
paramNum = len(betas_est['x'])
|
|
6088
5915
|
self.naming_for_printing(
|
|
6089
|
-
betas_est['x'], 0, dispersion,
|
|
5916
|
+
betas_est['x'], 0, dispersion, model_nature=mod)
|
|
6090
5917
|
|
|
6091
5918
|
sol.add_objective(bic=bic, aic=aic,
|
|
6092
5919
|
loglik=log_ll, num_parm=paramNum, GOF=other_measures)
|
|
@@ -6096,19 +5923,19 @@ class ObjectiveFunction(object):
|
|
|
6096
5923
|
try:
|
|
6097
5924
|
|
|
6098
5925
|
in_sample_mae = self.validation(betas_est['x'], y, X, Xr, dispersion=dispersion,
|
|
6099
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
|
5926
|
+
rdm_cor_fit=self.rdm_cor_fit,
|
|
6100
5927
|
model_nature=mod, testing=0)
|
|
6101
5928
|
sol.add_objective(TRAIN=in_sample_mae)
|
|
6102
5929
|
y_test, X_test, Xr_test = mod.get('y_test'), mod.get('X_test'), mod.get('Xr_test')
|
|
6103
5930
|
Xr_grouped_test = mod.get('Xrtest')
|
|
6104
5931
|
MAE_test = self.validation(betas_est['x'], y_test, X_test, Xr_test, dispersion=dispersion,
|
|
6105
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
|
5932
|
+
rdm_cor_fit=self.rdm_cor_fit,
|
|
6106
5933
|
model_nature=mod)
|
|
6107
5934
|
|
|
6108
5935
|
sol.add_objective(TEST=MAE_test)
|
|
6109
|
-
if self.val_percentage >0:
|
|
5936
|
+
if self.val_percentage > 0:
|
|
6110
5937
|
MAE_val = self.validation(betas_est['x'], y_test, X_test, Xr_test, dispersion=dispersion,
|
|
6111
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
|
5938
|
+
rdm_cor_fit=self.rdm_cor_fit,
|
|
6112
5939
|
model_nature=mod, validation=1)
|
|
6113
5940
|
sol.add_objective(VAL=MAE_val)
|
|
6114
5941
|
|
|
@@ -6226,8 +6053,7 @@ class ObjectiveFunction(object):
|
|
|
6226
6053
|
self.rdm_cor_fit = [x for x, y in zip(
|
|
6227
6054
|
select_data, model_nature.get('alpha_cor_rdm')) if y == 1]
|
|
6228
6055
|
|
|
6229
|
-
|
|
6230
|
-
# self.zi_fit = [x for x, y in zip(select_data, model_nature.get('exog_infl')) if y == 1]
|
|
6056
|
+
|
|
6231
6057
|
# if alpha_grouped is not None:
|
|
6232
6058
|
self.grouped_rpm = [x for x, y in zip(select_data, model_nature.get('alpha_grouped')) if y == 1]
|
|
6233
6059
|
self.hetro_fit = [x for x, y in zip(select_data, model_nature.get('alpha_hetro')) if y == 1]
|
|
@@ -6335,7 +6161,7 @@ class ObjectiveFunction(object):
|
|
|
6335
6161
|
return delim + self._model_type_codes[dispersion]
|
|
6336
6162
|
|
|
6337
6163
|
def self_standardize_positive(self, X):
|
|
6338
|
-
scaler =
|
|
6164
|
+
scaler = MinMaxScaler()
|
|
6339
6165
|
if type(X) == list:
|
|
6340
6166
|
return X
|
|
6341
6167
|
|
|
@@ -6345,12 +6171,26 @@ class ObjectiveFunction(object):
|
|
|
6345
6171
|
# Reshaping to 2D - combining the last two dimensions
|
|
6346
6172
|
df_tf_reshaped = X.reshape(original_shape[0], -1)
|
|
6347
6173
|
df_tf_scaled = scaler.fit_transform(df_tf_reshaped)
|
|
6348
|
-
df_tf_scaled = df_tf_scaled - df_tf_scaled.min()
|
|
6174
|
+
#df_tf_scaled = df_tf_scaled - df_tf_scaled.min()
|
|
6349
6175
|
# Reshape back to original 3D shape if necessary
|
|
6350
6176
|
df_tf = df_tf_scaled.reshape(original_shape)
|
|
6351
6177
|
return df_tf
|
|
6352
6178
|
else:
|
|
6353
|
-
|
|
6179
|
+
# Initialize the MinMaxScaler
|
|
6180
|
+
scaler = MinMaxScaler()
|
|
6181
|
+
float_columns = X.select_dtypes(include=['float64', 'float32', 'int']).columns.difference(['const', 'offset, "EXPOSE', 'Constant', 'constant'])
|
|
6182
|
+
non_numeric_columns = X.select_dtypes(exclude=['float64', 'float32', 'int']).columns
|
|
6183
|
+
|
|
6184
|
+
# Fit the scaler to the float columns and transform them
|
|
6185
|
+
X[float_columns] = scaler.fit_transform(X[float_columns])
|
|
6186
|
+
# Fit the scaler to the data and transform it
|
|
6187
|
+
#scaled_data = scaler.fit_transform(X)
|
|
6188
|
+
|
|
6189
|
+
# Convert the result back to a DataFrame
|
|
6190
|
+
#scaled_df = pd.DataFrame(scaled_data, columns=X.columns)
|
|
6191
|
+
|
|
6192
|
+
|
|
6193
|
+
return X
|
|
6354
6194
|
|
|
6355
6195
|
def make_regression_from_terms(self, fixed=None, rdm=None, rdm_cor_fit=None, distribution=None, dispersion=None,
|
|
6356
6196
|
*args, **kwargs):
|
|
@@ -6405,14 +6245,15 @@ class ObjectiveFunction(object):
|
|
|
6405
6245
|
df_test[:, :, idx], model_nature.get('transformations')[idx] = self.transformer(
|
|
6406
6246
|
t, idx, df_test[:, :, idx])
|
|
6407
6247
|
if np.max(df_tf[:, :, idx]) >= 77000:
|
|
6408
|
-
|
|
6248
|
+
#TODO need to normalise the data
|
|
6249
|
+
|
|
6250
|
+
print('should not be possible')
|
|
6409
6251
|
|
|
6410
6252
|
self.define_selfs_fixed_rdm_cor(model_nature)
|
|
6411
6253
|
indices = self.get_named_indices(self.fixed_fit)
|
|
6412
6254
|
indices5 = self.get_named_indices(self.hetro_fit)
|
|
6413
6255
|
|
|
6414
|
-
|
|
6415
|
-
model_nature['exog_inflX'] = df_tf[:, :, self.get_named_indices(self.zi_force_names)]
|
|
6256
|
+
|
|
6416
6257
|
|
|
6417
6258
|
x_h_storage = []
|
|
6418
6259
|
x_h_storage_test = []
|
|
@@ -6445,7 +6286,7 @@ class ObjectiveFunction(object):
|
|
|
6445
6286
|
if XG is not None:
|
|
6446
6287
|
indices4_test = np.repeat(self.get_named_indices(self.grouped_rpm),
|
|
6447
6288
|
self.group_dummies_test.shape[2]) if self.grouped_rpm != [] else []
|
|
6448
|
-
XGtest = np.tile(self.group_dummies_test, len(self.grouped_rpm)) * df_test[:, :,
|
|
6289
|
+
XGtest = np.tile(self.group_dummies_test, len(self.grouped_rpm)) * df_test[:, :, indices4_test]
|
|
6449
6290
|
model_nature['XG'] = XG
|
|
6450
6291
|
model_nature['XGtest'] = XGtest
|
|
6451
6292
|
|
|
@@ -6463,7 +6304,7 @@ class ObjectiveFunction(object):
|
|
|
6463
6304
|
model_nature['XH'] = XH
|
|
6464
6305
|
X_test = None
|
|
6465
6306
|
if np.isin(X, [np.inf, -np.inf, None, np.nan]).any(): # type ignore
|
|
6466
|
-
raise Exception('there is some kind of error')
|
|
6307
|
+
raise Exception('there is some kind of error in X')
|
|
6467
6308
|
|
|
6468
6309
|
# numpy data setup fpr estimation
|
|
6469
6310
|
indices2 = self.get_named_indices(self.rdm_fit)
|
|
@@ -6488,7 +6329,8 @@ class ObjectiveFunction(object):
|
|
|
6488
6329
|
Xr_test = None
|
|
6489
6330
|
model_nature['Xr_test'] = Xr_test
|
|
6490
6331
|
if (Xr.ndim <= 1) or (Xr.shape[0] <= 11) or np.isin(Xr, [np.inf, -np.inf, None, np.nan]).any():
|
|
6491
|
-
print('
|
|
6332
|
+
print('Not Possible')
|
|
6333
|
+
raise Exception
|
|
6492
6334
|
if Xr.size == 0:
|
|
6493
6335
|
Xr = None
|
|
6494
6336
|
Xr_test = None
|
|
@@ -6509,10 +6351,10 @@ class ObjectiveFunction(object):
|
|
|
6509
6351
|
obj_1.add_layout(layout)
|
|
6510
6352
|
|
|
6511
6353
|
model_form_name = self.check_complexity(
|
|
6512
|
-
self.fixed_fit, self.rdm_fit, self.rdm_cor_fit,
|
|
6354
|
+
self.fixed_fit, self.rdm_fit, self.rdm_cor_fit, None, dispersion, is_halton, model_nature)
|
|
6513
6355
|
|
|
6514
6356
|
obj_1.add_names(self.fixed_fit.copy(), self.rdm_fit.copy(),
|
|
6515
|
-
self.rdm_cor_fit.copy(), model_form_name,
|
|
6357
|
+
self.rdm_cor_fit.copy(), model_form_name, None, pvalues)
|
|
6516
6358
|
if not isinstance(obj_1, dict):
|
|
6517
6359
|
raise Exception('should not be possible')
|
|
6518
6360
|
|
|
@@ -6540,31 +6382,32 @@ class ObjectiveFunction(object):
|
|
|
6540
6382
|
else:
|
|
6541
6383
|
obj_1 = Solution()
|
|
6542
6384
|
self.significant = 3
|
|
6543
|
-
print('
|
|
6385
|
+
print('not_implemented yet') #TODO check this for exciddeing values
|
|
6544
6386
|
|
|
6545
6387
|
if self.is_quanitifiable_num(obj_1[self._obj_1]) and pvalues is not None:
|
|
6546
6388
|
self.bic = obj_1['bic']
|
|
6547
6389
|
self.pvalues = pvalues
|
|
6548
|
-
if any(sub_string in obj_1['simple'] for sub_string in ["rp", "c",
|
|
6390
|
+
if any(sub_string in obj_1['simple'] for sub_string in ["rp", "c", 'grp', 'xh']):
|
|
6549
6391
|
# todo: probably delete
|
|
6550
6392
|
self.naming_for_printing(
|
|
6551
|
-
pvalues, 0, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'],
|
|
6393
|
+
pvalues, 0, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'],
|
|
6552
6394
|
obj_1, model_nature)
|
|
6553
6395
|
else:
|
|
6554
6396
|
if is_delete == 0:
|
|
6555
6397
|
# todo: probably delete
|
|
6556
6398
|
self.naming_for_printing(
|
|
6557
6399
|
pvalues, 1, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'],
|
|
6558
|
-
|
|
6400
|
+
obj_1, model_nature)
|
|
6559
6401
|
self.coeff_ = betas
|
|
6560
6402
|
self.stderr = stderr
|
|
6561
6403
|
self.zvalues = zvalues
|
|
6562
6404
|
self.log_lik = log_lik
|
|
6563
6405
|
if self.significant == 0:
|
|
6564
6406
|
|
|
6565
|
-
|
|
6566
|
-
|
|
6567
|
-
|
|
6407
|
+
|
|
6408
|
+
if not self.test_flag:
|
|
6409
|
+
alpha, alpha_rdm, alpha_cor_rdm = self.modify(
|
|
6410
|
+
self.fixed_fit, self.rdm_fit, self.rdm_cor_fit)
|
|
6568
6411
|
|
|
6569
6412
|
return obj_1, model_nature
|
|
6570
6413
|
|
|
@@ -6581,8 +6424,9 @@ class ObjectiveFunction(object):
|
|
|
6581
6424
|
self.significant = 3
|
|
6582
6425
|
|
|
6583
6426
|
return obj_1, model_nature
|
|
6584
|
-
|
|
6585
|
-
|
|
6427
|
+
if not self.test_flag:
|
|
6428
|
+
alpha, alpha_rdm, alpha_cor_rdm = self.modify(
|
|
6429
|
+
self.fixed_fit, self.rdm_fit, self.rdm_cor_fit)
|
|
6586
6430
|
if self.grab_transforms:
|
|
6587
6431
|
|
|
6588
6432
|
if is_halton and self.significant == 1:
|
|
@@ -6611,6 +6455,53 @@ class ObjectiveFunction(object):
|
|
|
6611
6455
|
|
|
6612
6456
|
return obj_1, model_nature
|
|
6613
6457
|
|
|
6458
|
+
def get_X_tril(self):
|
|
6459
|
+
'''For correlations find the repeating terms'''
|
|
6460
|
+
varnames = self.none_join([self.rdm_grouped_fit, self.rdm_fit, self.rdm_cor_fit])
|
|
6461
|
+
rv_count_all = 0
|
|
6462
|
+
chol_count = 0
|
|
6463
|
+
rv_count = 0
|
|
6464
|
+
corr_indices = []
|
|
6465
|
+
rv_indices = []
|
|
6466
|
+
for ii, var in enumerate(varnames): # TODO: BUGFIXf
|
|
6467
|
+
if var in self.none_handler(self.rdm_cor_fit):
|
|
6468
|
+
is_correlated = True
|
|
6469
|
+
else:
|
|
6470
|
+
is_correlated = False
|
|
6471
|
+
|
|
6472
|
+
rv_count_all += 1
|
|
6473
|
+
if is_correlated:
|
|
6474
|
+
chol_count += 1
|
|
6475
|
+
else:
|
|
6476
|
+
rv_count += 1
|
|
6477
|
+
|
|
6478
|
+
if var in self.none_handler(self.rdm_cor_fit):
|
|
6479
|
+
|
|
6480
|
+
corr_indices.append(rv_count_all - 1) # TODO: what does tis do
|
|
6481
|
+
|
|
6482
|
+
else:
|
|
6483
|
+
rv_indices.append(rv_count_all - 1)
|
|
6484
|
+
|
|
6485
|
+
# for s.d.: gr_w = (Obs prob. minus predicted probability) * obs. var * random draw
|
|
6486
|
+
draws_tril_idx = np.array([corr_indices[j]
|
|
6487
|
+
for i in range(len(self.none_handler(self.rdm_cor_fit)))
|
|
6488
|
+
for j in range(i + 1)]) # varnames pos.
|
|
6489
|
+
X_tril_idx = np.array([corr_indices[i]
|
|
6490
|
+
for i in range(len(self.none_handler(self.rdm_cor_fit)))
|
|
6491
|
+
for j in range(i + 1)])
|
|
6492
|
+
# Find the s.d. for random variables that are not correlated
|
|
6493
|
+
var_uncor = self.none_join([self.rdm_grouped_fit, self.rdm_fit])
|
|
6494
|
+
range_var = [x for x in
|
|
6495
|
+
range(len(self.none_handler(var_uncor)))]
|
|
6496
|
+
range_var = sorted(range_var)
|
|
6497
|
+
draws_tril_idx = np.array(np.concatenate((range_var, draws_tril_idx)))
|
|
6498
|
+
X_tril_idx = np.array(np.concatenate((range_var, X_tril_idx)))
|
|
6499
|
+
draws_tril_idx = draws_tril_idx.astype(int)
|
|
6500
|
+
X_tril_idx = X_tril_idx.astype(int)
|
|
6501
|
+
return X_tril_idx
|
|
6502
|
+
|
|
6503
|
+
|
|
6504
|
+
|
|
6614
6505
|
def modifyn(self, data):
|
|
6615
6506
|
select_data = self._characteristics_names
|
|
6616
6507
|
alpha = np.isin(select_data, [item.split(':')[0] for item in data['fixed_fit']]).astype(int).tolist()
|
|
@@ -6692,7 +6583,7 @@ class ObjectiveFunction(object):
|
|
|
6692
6583
|
alpha_cor_rdm = np.in1d(select_data, cor_rdm) * 1
|
|
6693
6584
|
alpha_cor_rdm = alpha_cor_rdm.tolist()
|
|
6694
6585
|
alpha_group_rdm = np.in1d(select_data, group_rdm) * 1
|
|
6695
|
-
alpha_group_rdm = alpha_group_rdm.tolist()
|
|
6586
|
+
alpha_group_rdm = alpha_group_rdm.tolist() #todo will this ever trigger
|
|
6696
6587
|
return alpha, alpha_rdm, alpha_cor_rdm
|
|
6697
6588
|
|
|
6698
6589
|
def show_transforms(self, fix, rdm):
|
|
@@ -6818,28 +6709,39 @@ class ObjectiveFunction(object):
|
|
|
6818
6709
|
# N, D = draws.shape[0], draws.shape[1]
|
|
6819
6710
|
N, R, Kr = draws.shape[0], draws.shape[2], draws.shape[1]
|
|
6820
6711
|
der = dev.np.ones((N, Kr, R), dtype=draws.dtype)
|
|
6821
|
-
if len(self.none_handler(self.rdm_cor_fit)) == 0:
|
|
6822
|
-
Br_come_one = self.Br.copy()
|
|
6823
|
-
# Br_come_one =
|
|
6824
|
-
else:
|
|
6825
6712
|
|
|
6826
|
-
Br_come_one = self.Br.copy()
|
|
6827
6713
|
# betas_random = self._transform_rand_betas(betas, betas_std, draws)
|
|
6828
|
-
|
|
6714
|
+
#todo make sure this works for ln and truncated normal
|
|
6829
6715
|
if any(set(distribution).intersection(['ln_normal', 'tn_normal'])):
|
|
6830
|
-
|
|
6716
|
+
|
|
6717
|
+
#print('check this, intesection shouldn not happen for all')
|
|
6718
|
+
|
|
6719
|
+
if der.shape[1] != draws.shape[1]:
|
|
6720
|
+
print('why')
|
|
6721
|
+
Br_come_one = self._transform_rand_betas(betas, betas_std, draws)
|
|
6722
|
+
if der.shape[1] != draws.shape[1]:
|
|
6723
|
+
print('why')
|
|
6724
|
+
#TODO need to get the stuction of the rdms
|
|
6831
6725
|
for k, dist_k in enumerate(distribution):
|
|
6832
6726
|
if dist_k == 'ln_normal':
|
|
6727
|
+
if der.shape[1] != draws.shape[1]:
|
|
6728
|
+
print('why')
|
|
6833
6729
|
der[:, k, :] = Br_come_one[:, k, :]
|
|
6730
|
+
if der.shape[1] != draws.shape[1]:
|
|
6731
|
+
print('why')
|
|
6834
6732
|
elif dist_k == 'tn_normal':
|
|
6733
|
+
if der.shape[1] != draws.shape[1]:
|
|
6734
|
+
print('why')
|
|
6835
6735
|
der[:, k, :] = 1 * (Br_come_one[:, k, :] > 0)
|
|
6736
|
+
if der.shape[1] != draws.shape[1]:
|
|
6737
|
+
print('why')
|
|
6836
6738
|
|
|
6739
|
+
if der.shape[1] != draws.shape[1]:
|
|
6740
|
+
print('why')
|
|
6837
6741
|
return der
|
|
6838
6742
|
|
|
6839
6743
|
def _copy_size_display_as_ones(self, matrix):
|
|
6840
|
-
|
|
6841
|
-
please = matrix.shape
|
|
6842
|
-
der = dev.np.ones((please), dtype=matrix.dtype)
|
|
6744
|
+
der = dev.np.ones(matrix.shape, dtype=matrix.dtype)
|
|
6843
6745
|
return der
|
|
6844
6746
|
|
|
6845
6747
|
def prepare_halton(self, dim, n_sample, draws, distribution, long=False, slice_this_way=None):
|