metacountregressor 0.1.73__py3-none-any.whl → 0.1.88__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metacountregressor/data_split_helper.py +90 -0
- metacountregressor/helperprocess.py +115 -0
- metacountregressor/main.py +51 -72
- metacountregressor/metaheuristics.py +25 -24
- metacountregressor/solution.py +281 -694
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.88.dist-info}/METADATA +78 -20
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.88.dist-info}/RECORD +10 -9
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.88.dist-info}/WHEEL +1 -1
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.88.dist-info}/LICENSE.txt +0 -0
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.88.dist-info}/top_level.txt +0 -0
metacountregressor/solution.py
CHANGED
@@ -9,12 +9,10 @@ import math
|
|
9
9
|
import os
|
10
10
|
import random
|
11
11
|
import sys
|
12
|
-
import time
|
13
12
|
import warnings
|
14
13
|
from collections import Counter
|
15
14
|
from functools import wraps
|
16
15
|
|
17
|
-
from tempfile import TemporaryFile
|
18
16
|
import traceback
|
19
17
|
import latextable
|
20
18
|
import numpy as np
|
@@ -35,15 +33,22 @@ from statsmodels.tools.numdiff import approx_fprime, approx_hess
|
|
35
33
|
from sklearn.preprocessing import StandardScaler
|
36
34
|
from texttable import Texttable
|
37
35
|
|
38
|
-
|
39
|
-
from .
|
36
|
+
try:
|
37
|
+
from ._device_cust import device as dev
|
38
|
+
from .pareto_file import Pareto, Solution
|
39
|
+
from .data_split_helper import DataProcessor
|
40
|
+
except ImportError:
|
41
|
+
from metacountregressor._device_cust import device as dev
|
42
|
+
from metacountregressor.pareto_file import Pareto, Solution
|
43
|
+
from data_split_helper import DataProcessor
|
44
|
+
|
40
45
|
|
41
46
|
np.seterr(divide='ignore', invalid='ignore')
|
42
47
|
warnings.simplefilter("ignore")
|
43
48
|
|
44
|
-
#
|
45
|
-
min_comp_val = 1e-
|
46
|
-
max_comp_val = 1e+
|
49
|
+
# define the computation boundary limits
|
50
|
+
min_comp_val = 1e-160
|
51
|
+
max_comp_val = 1e+200
|
47
52
|
log_lik_min = -1e+200
|
48
53
|
log_lik_max = 1e+200
|
49
54
|
|
@@ -117,21 +122,19 @@ class ObjectiveFunction(object):
|
|
117
122
|
|
118
123
|
def __init__(self, x_data, y_data, **kwargs):
|
119
124
|
|
120
|
-
self.reg_penalty =
|
125
|
+
self.reg_penalty = 1
|
121
126
|
self.power_up_ll = False
|
122
127
|
self.bic = None
|
123
128
|
self.other_bic = False
|
129
|
+
self.test_flag = 1
|
124
130
|
if self.other_bic:
|
125
131
|
print('change this to false latter ')
|
126
|
-
offset = None
|
127
132
|
|
128
|
-
#
|
129
|
-
self.constant_value =
|
130
|
-
self.negative_binomial_value =
|
133
|
+
# initialize values
|
134
|
+
self.constant_value = 0
|
135
|
+
self.negative_binomial_value = 1
|
131
136
|
|
132
137
|
self.verbose_safe = True
|
133
|
-
self.zi_force = None # Analst want a zi model and formally declares the zi components below
|
134
|
-
self.zi_force_names = None # delare the zi components
|
135
138
|
self.please_print = kwargs.get('please_print', 0)
|
136
139
|
self.group_halton = None
|
137
140
|
self.grad_yes = False
|
@@ -145,7 +148,7 @@ class ObjectiveFunction(object):
|
|
145
148
|
self.rdm_fit = None
|
146
149
|
self.rdm_cor_fit = None
|
147
150
|
self.dist_fit = None
|
148
|
-
|
151
|
+
|
149
152
|
self.MAE = None
|
150
153
|
self.best_obj_1 = 100000000.0
|
151
154
|
self._obj_1 = 'bic'
|
@@ -158,7 +161,7 @@ class ObjectiveFunction(object):
|
|
158
161
|
self._max_iterations_improvement = 100
|
159
162
|
self.generated_sln = set()
|
160
163
|
self.ave_mae = 0
|
161
|
-
#
|
164
|
+
# defalt paramaters for hs #TODO unpack into harmony search class
|
162
165
|
self.algorithm = 'hs' # 'sa' 'de' also avialable
|
163
166
|
self._hms = 20
|
164
167
|
self._max_time = 60 * 60 * 24
|
@@ -166,7 +169,7 @@ class ObjectiveFunction(object):
|
|
166
169
|
self._par = 0.3
|
167
170
|
self._mpai = 1
|
168
171
|
self._max_imp = 100000
|
169
|
-
self._WIC = 1000 # Number of
|
172
|
+
self._WIC = 1000 # Number of Iterations without Multiobjective Improvement #tod chuck into solution
|
170
173
|
self._panels = None
|
171
174
|
self.is_multi = True
|
172
175
|
self.method_ll = 'Nelder-Mead-BFGS'
|
@@ -190,11 +193,6 @@ class ObjectiveFunction(object):
|
|
190
193
|
if k in acceptable_keys_list:
|
191
194
|
self.__setattr__(k, self.tryeval(kwargs[k]))
|
192
195
|
|
193
|
-
if self.zi_force_names is not None:
|
194
|
-
self.zi_force = True
|
195
|
-
if 'const' not in self.zi_force_names:
|
196
|
-
self.zi_force_names = ['const'] + self.zi_force_names
|
197
|
-
print('did this work?')
|
198
196
|
|
199
197
|
if 'complexity_level' in kwargs:
|
200
198
|
self.complexity_level = kwargs['complexity_level']
|
@@ -211,17 +209,22 @@ class ObjectiveFunction(object):
|
|
211
209
|
raise Exception
|
212
210
|
|
213
211
|
self.pvalue_penalty = float(kwargs.get('pvalue_penalty', 0.5))
|
214
|
-
|
215
212
|
self.pvalue_exceed = 0
|
216
213
|
self._maximize = False # do we maximize or minimize?
|
217
|
-
|
218
|
-
# data_names = self._random_forest_preprocess(x_data, y_data)
|
214
|
+
|
219
215
|
x_data = sm.add_constant(x_data)
|
220
216
|
self._input_data(x_data, y_data)
|
217
|
+
|
218
|
+
|
221
219
|
if y_data.ndim == 1:
|
222
220
|
y_data = pd.DataFrame(y_data)
|
223
221
|
|
224
|
-
|
222
|
+
'''
|
223
|
+
#TODO ADD THIS IN LATER
|
224
|
+
splitter = DataProcessor(x_data, y_data, kwargs)
|
225
|
+
self.copy_class_attributes(splitter) #inherit the self objects
|
226
|
+
'''
|
227
|
+
|
225
228
|
if self._obj_1 == 'MAE' or self._obj_2 in ["MAE", 'RMSE', 'MAE', 'MSE', 'RMSE_IN', 'RMSE_TEST']:
|
226
229
|
self.test_percentage = float(kwargs.get('test_percentage', 0))
|
227
230
|
self.val_percentage = float(kwargs.get('val_percentage', 0))
|
@@ -229,8 +232,7 @@ class ObjectiveFunction(object):
|
|
229
232
|
self.is_multi = False
|
230
233
|
|
231
234
|
if 'panels' in kwargs:
|
232
|
-
self.group_names = np.asarray(x_data[kwargs['group']].astype(
|
233
|
-
'category').cat._parent.dtype.categories)
|
235
|
+
self.group_names = np.asarray(x_data[kwargs['group']].astype('category').cat._parent.dtype.categories)
|
234
236
|
|
235
237
|
x_data[kwargs['group']] = x_data[kwargs['group']].astype(
|
236
238
|
'category').cat.codes
|
@@ -243,58 +245,39 @@ class ObjectiveFunction(object):
|
|
243
245
|
|
244
246
|
N = len(np.unique(x_data[kwargs['panels']].values))
|
245
247
|
id_unique = np.unique(x_data[kwargs['panels']].values)
|
246
|
-
|
247
248
|
except KeyError:
|
248
249
|
N = len(np.unique(x_data[kwargs['panels']]))
|
250
|
+
id_unique = np.unique(x_data[kwargs['panels']].values)
|
249
251
|
|
250
252
|
training_size = int((1 - self.test_percentage - self.val_percentage) * N)
|
251
253
|
ids = np.random.choice(N, training_size, replace=False)
|
252
254
|
ids = id_unique[ids]
|
253
255
|
train_idx = [ii for ii, id_val in enumerate(x_data[kwargs['panels']]) if id_val in ids]
|
254
256
|
test_idx = [ii for ii, id_val in enumerate(x_data[kwargs['panels']]) if id_val not in ids]
|
255
|
-
|
256
257
|
df_train = x_data.loc[train_idx, :]
|
257
258
|
df_test = x_data.loc[test_idx, :]
|
258
259
|
y_train = y_data.loc[train_idx, :]
|
259
260
|
y_test = y_data.loc[test_idx, :]
|
260
|
-
|
261
261
|
else:
|
262
262
|
N = len(x_data)
|
263
263
|
training_size = int((1 - self.test_percentage - self.val_percentage) * N)
|
264
264
|
ids = np.random.choice(N, training_size, replace=False)
|
265
265
|
id_unique = np.array([i for i in range(N)])
|
266
266
|
ids = id_unique[ids]
|
267
|
-
|
268
267
|
train_idx = [ii for ii in range(len(id_unique)) if id_unique[ii] in ids]
|
269
268
|
test_idx = [ii for ii in range(len(id_unique)) if id_unique[ii] not in ids]
|
269
|
+
df_train = x_data.loc[train_idx, :]
|
270
|
+
df_test = x_data.loc[test_idx, :]
|
271
|
+
y_train = y_data.loc[train_idx, :]
|
272
|
+
y_test = y_data.loc[test_idx, :]
|
270
273
|
|
271
|
-
try: # @IgnoreException
|
272
|
-
df_train = x_data.loc[train_idx, :]
|
273
|
-
df_test = x_data.loc[test_idx, :]
|
274
|
-
y_train = y_data.loc[train_idx, :]
|
275
|
-
y_test = y_data.loc[test_idx, :]
|
276
|
-
except:
|
277
|
-
# Convert all values to their real parts
|
278
|
-
df_real = x_data.select_dtypes(include=[np.number]).apply(np.real)
|
279
|
-
|
280
|
-
# Replace the original DataFrame's numerical columns with real-valued ones
|
281
|
-
x_data[df_real.columns] = df_real
|
282
|
-
|
283
|
-
df_train = x_data.iloc[train_idx, :]
|
284
|
-
df_test = x_data.iloc[test_idx, :]
|
285
|
-
y_train = y_data.iloc[train_idx, :]
|
286
|
-
y_test = y_data.iloc[test_idx, :]
|
287
274
|
|
288
|
-
self.n_obs = N
|
275
|
+
#self.n_obs = N
|
289
276
|
self._characteristics_names = list(self._x_data.columns)
|
290
|
-
if self.zi_force:
|
291
|
-
self.alpha_hurdle = np.isin(self._characteristics_names,
|
292
|
-
[item.split(':')[0] for item in self.zi_force_names]).astype(int).tolist()
|
293
|
-
|
294
277
|
self._max_group_all_means = 1
|
295
278
|
|
296
279
|
exclude_this_test = [4]
|
297
|
-
|
280
|
+
|
298
281
|
if 'panels' in kwargs:
|
299
282
|
self.panels = np.asarray(df_train[kwargs['panels']])
|
300
283
|
self.panels_test = np.asarray(df_test[kwargs['panels']])
|
@@ -309,7 +292,6 @@ class ObjectiveFunction(object):
|
|
309
292
|
X, Y, panel, group = self._arrange_long_format(
|
310
293
|
df_train, y_train, self.ids, self.ids, groupll)
|
311
294
|
self.group_halton = group.copy()
|
312
|
-
Y = Y.astype('float')
|
313
295
|
self.group_dummies = pd.get_dummies(group)
|
314
296
|
Xnew, Ynew, panel_info = self._balance_panels(X, Y, panel)
|
315
297
|
self.panel_info = panel_info
|
@@ -324,7 +306,6 @@ class ObjectiveFunction(object):
|
|
324
306
|
YY = Ynew.values.reshape(self.N, self.P, 1).copy()
|
325
307
|
self._x_data = XX.copy()
|
326
308
|
self._y_data = YY.copy()
|
327
|
-
# Xalt, Yalt, group_info = self._balance_panels(X, Y, group)
|
328
309
|
X, Y, panel, group = self._arrange_long_format(df_test, y_test, self.ids_test, self.panels_test, group_test)
|
329
310
|
if np.max(group) > 50:
|
330
311
|
exclude_this_test = [4]
|
@@ -353,7 +334,7 @@ class ObjectiveFunction(object):
|
|
353
334
|
|
354
335
|
self._samples, self._panels, self._characteristics = self._x_data.shape
|
355
336
|
|
356
|
-
|
337
|
+
|
357
338
|
|
358
339
|
else:
|
359
340
|
self.G = None
|
@@ -372,77 +353,37 @@ class ObjectiveFunction(object):
|
|
372
353
|
K = Xnew.shape[1]
|
373
354
|
self._characteristics_names = list(Xnew.columns)
|
374
355
|
XX = Xnew.values.reshape(self.N, self.P, K).copy()
|
375
|
-
# self.group_dummies = self.group_dummies.values.reshape(self.N, self.P, -1)
|
376
|
-
# self.group_halton = self.group_halton.reshape(self.N, self.P)[:, 0]
|
377
356
|
YY = Ynew.values.reshape(self.N, self.P, 1).copy()
|
378
357
|
self._x_data = XX.copy()
|
379
358
|
self._y_data = YY.copy()
|
380
|
-
|
359
|
+
|
381
360
|
if self.is_multi:
|
382
361
|
X, Y, panel, group = self._arrange_long_format(df_test, y_test, self.ids_test, self.ids_test, None)
|
383
362
|
if np.max(group) > 50:
|
384
363
|
exclude_this_test = [4]
|
385
364
|
else:
|
386
365
|
exclude_this_test = []
|
387
|
-
# self.group_halton_test = group.copy()
|
388
366
|
X, Y, panel_info = self._balance_panels(X, Y, panel)
|
389
|
-
|
367
|
+
|
390
368
|
self.N_test, self.P_test = panel_info.shape
|
391
|
-
|
392
|
-
# self.group_dummies_test = pd.get_dummies(group)
|
393
|
-
# self.group_dummies_test = self.group_dummies_test.values.reshape(self.N_test, self.P_test, -1)
|
394
369
|
K = X.shape[1]
|
395
370
|
self.columns_names = X.columns
|
396
371
|
X = X.values.reshape(self.N_test, self.P_test, K)
|
397
|
-
# self.group_halton_test = self.group_halton_test.reshape(self.N_test, self.P_test)[:, 0]
|
398
372
|
Y = Y.values.reshape(self.N_test, self.P_test, 1)
|
399
373
|
self._x_data_test = X.copy()
|
400
374
|
self.y_data_test = Y.copy()
|
401
|
-
|
375
|
+
|
402
376
|
self._samples, self._panels, self._characteristics = self._x_data.shape
|
403
377
|
|
404
|
-
# draws and pvalue
|
405
|
-
|
406
|
-
if 'Offset' in self._characteristics_names:
|
407
|
-
offset = True
|
408
|
-
self.have_offset = offset
|
409
|
-
if self.have_offset is not None:
|
410
|
-
try:
|
411
|
-
# offset for training data
|
412
|
-
# define offset
|
413
|
-
val_od = self.get_named_indices(['Offset'])
|
414
|
-
self._offsets = self._x_data[:, :, val_od]
|
415
|
-
|
416
|
-
# drop the offset from the data
|
417
|
-
self._x_data = np.delete(self._x_data, val_od, axis=2)
|
418
|
-
self._characteristics_names = [x for x in self._characteristics_names if not 'Offset' in x]
|
419
|
-
self._characteristics = len(self._characteristics_names)
|
420
|
-
# self._x_data.drop(columns=['Offset'], inplace=True)
|
421
|
-
|
422
|
-
# offset for testing data
|
423
|
-
if self.is_multi:
|
424
|
-
# define offset
|
425
|
-
self._offsets_test = self._x_data_test[:, :, val_od]
|
426
|
-
# self._offsets_test = self._x_data_test['Offset'].to_numpy()
|
427
|
-
# self._offsets_test = np.reshape(
|
428
|
-
# self._offsets_test, (-1, 1))
|
429
|
-
# drop the offset from the data
|
430
|
-
self._x_data_test = np.delete(self._x_data_test, val_od, axis=2)
|
431
|
-
# self._x_data_test.drop(columns=['Offset'], inplace=True)
|
432
|
-
except:
|
433
|
-
# if no offset, set as 0
|
434
|
-
self._offsets = np.zeros((self.N, self.P, 1))
|
435
|
-
if self.is_multi:
|
436
|
-
self._offsets_test = np.zeros((self.N_test, self.P_test, 1))
|
437
|
-
else:
|
438
|
-
self._offsets = np.zeros((self.N, self.P, 1))
|
439
|
-
if self.is_multi:
|
440
|
-
self._offsets_test = np.zeros((self.N_test, self.P_test, 1))
|
441
378
|
|
379
|
+
#Define the offset into the data
|
380
|
+
self.process_offset()
|
442
381
|
if self.is_multi:
|
443
382
|
self.pareto_printer = Pareto(self._obj_1, self._obj_2, True)
|
444
|
-
|
445
383
|
self._pareto_population = list()
|
384
|
+
|
385
|
+
|
386
|
+
|
446
387
|
self.Ndraws = 200 # todo: change back
|
447
388
|
self.draws1 = None
|
448
389
|
self.initial_sig = 1 # pass the test of a single model
|
@@ -480,8 +421,7 @@ class ObjectiveFunction(object):
|
|
480
421
|
self.coeff_ = None
|
481
422
|
|
482
423
|
self.significant = 0
|
483
|
-
# define the states of our
|
484
|
-
|
424
|
+
# define the states of our explanatory variables
|
485
425
|
self._discrete_values = self.define_alphas(self.complexity_level, exclude_this_test,
|
486
426
|
kwargs.get('must_include', []))
|
487
427
|
self._discrete_values = self._discrete_values + \
|
@@ -506,21 +446,83 @@ class ObjectiveFunction(object):
|
|
506
446
|
self.endog = None
|
507
447
|
# solution parameters
|
508
448
|
self._min_characteristics = 1
|
509
|
-
|
510
449
|
self._max_hurdle = 4
|
511
450
|
|
512
|
-
|
451
|
+
#Manually fit from analyst specification
|
452
|
+
manual_fit = kwargs.get('Manual_Fit')
|
453
|
+
if manual_fit is not None:
|
454
|
+
self.process_manual_fit(manual_fit)
|
513
455
|
|
514
|
-
|
515
|
-
self.pvalue_sig_value = 1
|
516
|
-
# embed the solution to how you want it
|
517
|
-
self.set_defined_seed(42)
|
518
|
-
a = self.modify_initial_fit(kwargs['Manual_Fit'])
|
519
|
-
self.makeRegression(a)
|
456
|
+
self.solution_analyst = None
|
520
457
|
|
521
458
|
|
522
|
-
|
523
|
-
|
459
|
+
|
460
|
+
|
461
|
+
def over_ride_self(self, **kwargs):
|
462
|
+
"""
|
463
|
+
Dynamically sets attributes on the instance based on the provided keyword arguments.
|
464
|
+
"""
|
465
|
+
for key, value in kwargs.items():
|
466
|
+
setattr(self, key, value)
|
467
|
+
print(f"Updated attributes: {kwargs}")
|
468
|
+
|
469
|
+
def remove_offset(self, data, indices):
|
470
|
+
""" Remove offset data from the dataset """
|
471
|
+
new_data = np.delete(data, indices, axis=2)
|
472
|
+
return new_data
|
473
|
+
|
474
|
+
def process_offset(self):
|
475
|
+
""" Process offset if it exists in the characteristics """
|
476
|
+
try:
|
477
|
+
if 'Offset' in self._characteristics_names:
|
478
|
+
self.have_offset = True
|
479
|
+
val_od = self.get_named_indices(['Offset'])
|
480
|
+
self._offsets = self._x_data[:, :, val_od]
|
481
|
+
self._x_data = self.remove_offset(self._x_data, val_od)
|
482
|
+
self._characteristics_names = [x for x in self._characteristics_names if x != 'Offset']
|
483
|
+
self._characteristics = len(self._characteristics_names)
|
484
|
+
|
485
|
+
if self.is_multi:
|
486
|
+
self._offsets_test = self._x_data_test[:, :, val_od]
|
487
|
+
self._x_data_test = self.remove_offset(self._x_data_test, val_od)
|
488
|
+
else:
|
489
|
+
self.initialize_empty_offsets()
|
490
|
+
|
491
|
+
except Exception as e:
|
492
|
+
print(f"An error occurred: {e}") # Better error handling
|
493
|
+
self.initialize_empty_offsets()
|
494
|
+
|
495
|
+
def initialize_empty_offsets(self):
|
496
|
+
""" Initialize offsets to zero if none are found or on error """
|
497
|
+
self._offsets = np.zeros((self.N, self.P, 1))
|
498
|
+
if self.is_multi:
|
499
|
+
self._offsets_test = np.zeros((self.N_test, self.P_test, 1))
|
500
|
+
|
501
|
+
|
502
|
+
def copy_class_attributes(self, class_object):
|
503
|
+
'''
|
504
|
+
Loop through an
|
505
|
+
'''
|
506
|
+
|
507
|
+
# Loop through all attributes of the car object and copy them
|
508
|
+
for attr in vars(class_object):
|
509
|
+
setattr(self, attr, getattr(class_object, attr))
|
510
|
+
|
511
|
+
|
512
|
+
def process_manual_fit(self, manual_fit):
|
513
|
+
"""Process the manual fit configuration."""
|
514
|
+
self.initial_sig = 1 # Example: Initialize some signal
|
515
|
+
self.pvalue_sig_value = 1 # Example: Initialize another signal
|
516
|
+
self.set_defined_seed(42) # Set a specific seed
|
517
|
+
|
518
|
+
modified_fit = self.modify_initial_fit(manual_fit) # Modify the initial fit based on manual_fit
|
519
|
+
self.makeRegression(modified_fit) # Perform regression with the modified fit
|
520
|
+
|
521
|
+
|
522
|
+
def process_fit_specifications(self, find_constant, hard_code):
|
523
|
+
"""
|
524
|
+
Function to for proceccing testing, and finding a suitable initial coefficient (linear intercept)
|
525
|
+
"""
|
524
526
|
if hard_code:
|
525
527
|
manual_fit_spec = {
|
526
528
|
'fixed_terms': ['Constant', 'US', 'RSMS', 'MCV'],
|
@@ -559,7 +561,7 @@ class ObjectiveFunction(object):
|
|
559
561
|
constant_values.append(self.beta_dict['const'][0][1])
|
560
562
|
dispersion_values.append(self.beta_dict.get(self._model_type_codes[i], [[0, 0], [0, 0]])[0][1])
|
561
563
|
except:
|
562
|
-
print('
|
564
|
+
print('Error during regression analysis.')
|
563
565
|
i += 1
|
564
566
|
|
565
567
|
# Add the values of this iteration to the total
|
@@ -570,7 +572,7 @@ class ObjectiveFunction(object):
|
|
570
572
|
constant_values_avg = [x / 100 for x in constant_values_total]
|
571
573
|
dispersion_values_avg = [x / 100 for x in dispersion_values_total]
|
572
574
|
|
573
|
-
|
575
|
+
return constant_values_avg, dispersion_values_avg
|
574
576
|
|
575
577
|
|
576
578
|
def _balance_panels(self, X, y, panels): # ToDO re
|
@@ -615,22 +617,7 @@ class ObjectiveFunction(object):
|
|
615
617
|
|
616
618
|
return np.nan_to_num(quad(integrand, 0, np.inf)[0], nan=0)
|
617
619
|
|
618
|
-
|
619
|
-
import rpy2.rinterface as rinterface
|
620
|
-
import rpy2.robjects as robjects
|
621
|
-
import rpy2.robjects as ro
|
622
|
-
from rpy2.robjects import pandas2ri
|
623
|
-
r = robjects.r
|
624
|
-
r['source']('testML.R')
|
625
|
-
pandas2ri.activate()
|
626
|
-
RF_function_r = robjects.globalenv['RF_plot']
|
627
|
-
RF_function_corr_r = robjects.globalenv['RF_plot_corr']
|
628
|
-
r_df = ro.conversion.py2rpy(self._x_data)
|
629
|
-
y_dy = ro.conversion.py2rpy(self._y_data)
|
630
|
-
RF_function_r(r_df, y_dy)
|
631
|
-
|
632
|
-
print('did this work')
|
633
|
-
RF_function_corr_r(r_df, y_dy)
|
620
|
+
|
634
621
|
|
635
622
|
def print_system_utilization(self):
|
636
623
|
# Get CPU usage
|
@@ -647,7 +634,8 @@ class ObjectiveFunction(object):
|
|
647
634
|
mem_free = round(mem_info.available /
|
648
635
|
(1024 * 1024), 2) # Convert to MB
|
649
636
|
print(
|
650
|
-
f"Current memory usage: {mem_percent}% ({mem_used} MB used / {mem_total} MB total
|
637
|
+
f"Current memory usage: {mem_percent}% ({mem_used} MB used / {mem_total} MB total / "
|
638
|
+
f" mem free {mem_free})")
|
651
639
|
|
652
640
|
def _arrange_long_format(self, X, y, ids, panels=None, groups=None):
|
653
641
|
'''converts the data to long format'''
|
@@ -665,59 +653,14 @@ class ObjectiveFunction(object):
|
|
665
653
|
if group is not None:
|
666
654
|
group = group[sorted_idx]
|
667
655
|
|
668
|
-
return X, y, pnl, group
|
669
|
-
|
670
|
-
pandas_sort = 1
|
671
|
-
if pandas_sort:
|
672
|
-
if ids is not None:
|
673
|
-
|
674
|
-
pnl = panels if panels is not None else np.ones(len(ids))
|
675
|
-
df = X
|
676
|
-
|
677
|
-
df['panels'], df['ids'] = pnl, ids
|
678
|
-
new = 0
|
679
|
-
if new:
|
680
|
-
cols = np.zeros(len(ids), dtype={'names': ['panels', 'ids'],
|
681
|
-
'formats': ['<f4', '<f4']})
|
682
|
-
cols['panels'], cols['ids'] = pnl, ids
|
683
|
-
sorted_idx = np.argsort(cols, order=['panels', 'ids'])
|
684
|
-
X, y = X[sorted_idx], y[sorted_idx]
|
685
|
-
if panels is not None:
|
686
|
-
panels = panels[sorted_idx]
|
687
|
-
return X, y, panels
|
688
|
-
|
689
|
-
df = pd.concat([X.reset_index(drop=True),
|
690
|
-
y.reset_index(drop=True)], axis=1)
|
691
|
-
sorted_df = df.sort_values(
|
692
|
-
['panels', 'ids']).reset_index(drop=True)
|
693
|
-
|
694
|
-
X, y, panels = sorted_df.iloc[:, :-
|
695
|
-
3], sorted_df.iloc[:, -3:-2], sorted_df.iloc[:, -2]
|
696
|
-
if panels is not None:
|
697
|
-
# panels = panels[sorted_idx]
|
698
|
-
P_i = (
|
699
|
-
(np.unique(panels, return_counts=True)[1])).astype(int)
|
700
|
-
P = np.max(P_i)
|
701
|
-
N = len(P_i)
|
702
|
-
print(1)
|
703
|
-
return X, y, panels
|
704
|
-
|
705
|
-
if ids is not None:
|
706
|
-
X = np.asarray(X)
|
707
|
-
y = np.asarray(y)
|
708
|
-
pnl = panels if panels is not None else np.ones(len(ids))
|
709
|
-
|
710
|
-
cols = np.zeros(len(ids), dtype={'names': ['panels', 'ids'],
|
711
|
-
'formats': ['<f4', '<f4']})
|
712
|
-
cols['panels'], cols['ids'] = pnl, ids
|
713
|
-
sorted_idx = np.argsort(cols, order=['panels', 'ids'])
|
714
|
-
X, y = X[sorted_idx], y[sorted_idx]
|
715
|
-
if panels is not None:
|
716
|
-
panels = panels[sorted_idx]
|
656
|
+
return X, y.astype('float'), pnl, group
|
717
657
|
|
718
|
-
return X, y, panels
|
658
|
+
return X, y.astype('float'), panels
|
719
659
|
|
720
660
|
def _random_forest_identify_transformations(self, x_data, y_data):
|
661
|
+
'''
|
662
|
+
use the random forrest model to identify best feature
|
663
|
+
'''
|
721
664
|
# let's use the pprint module for readability
|
722
665
|
import inspect
|
723
666
|
from pprint import pprint
|
@@ -866,7 +809,6 @@ class ObjectiveFunction(object):
|
|
866
809
|
def pvalue_asterix_add(self, pvalues):
|
867
810
|
pvalue_ast = list()
|
868
811
|
for i in range(len(pvalues)):
|
869
|
-
signif = ""
|
870
812
|
if float(pvalues[i]) < 0.001:
|
871
813
|
signif = "***"
|
872
814
|
elif float(pvalues[i]) < 0.01:
|
@@ -899,8 +841,7 @@ class ObjectiveFunction(object):
|
|
899
841
|
|
900
842
|
return ([self._model_type_codes[dispersion]])
|
901
843
|
|
902
|
-
def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None,
|
903
|
-
zi_fit=None, obj_1=None, model_nature=None):
|
844
|
+
def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None, obj_1=None, model_nature=None):
|
904
845
|
self.name_deleter = []
|
905
846
|
group_rpm = None
|
906
847
|
group_dist = []
|
@@ -911,8 +852,7 @@ class ObjectiveFunction(object):
|
|
911
852
|
rdm_fit = self.none_handler(self.rdm_fit)
|
912
853
|
if rdm_cor_fit is None:
|
913
854
|
rdm_cor_fit = self.none_handler(self.rdm_cor_fit)
|
914
|
-
|
915
|
-
zi_fit = self.none_handler(self.zi_fit)
|
855
|
+
|
916
856
|
dis_fit = [x for x in self.none_handler(
|
917
857
|
self.dist_fit)] # check if dis fit is name
|
918
858
|
|
@@ -977,18 +917,18 @@ class ObjectiveFunction(object):
|
|
977
917
|
br_w_names = [randvars[i] + " (Std. Dev.) " + rand_vars_dis[i]
|
978
918
|
for i in range(len(randvars))]
|
979
919
|
|
980
|
-
|
920
|
+
|
981
921
|
|
982
922
|
names = fixednames + randvars + chol_names + \
|
983
|
-
br_w_names + chol_part_1 + chol +
|
923
|
+
br_w_names + chol_part_1 + chol + hetro_long + dispersion_name
|
984
924
|
self.name_deleter = fixednames + randvars + chol_names + randvars + [chol_names[i] for i
|
985
925
|
in range(len(chol_names)) for j in
|
986
926
|
range(
|
987
|
-
i + 1)]
|
927
|
+
i + 1)] + dispersion_name # TODO does this break
|
988
928
|
name_delete_2 = fixednames + randvars + chol_names + randvars + [chol_names[i] + "/" +
|
989
929
|
chol_names[j] for i
|
990
930
|
in range(len(chol_names)) for j in
|
991
|
-
range(i + 1)]
|
931
|
+
range(i + 1)] + dispersion_name
|
992
932
|
index_dict = {}
|
993
933
|
for i, name in enumerate(name_delete_2):
|
994
934
|
split_names = name.split('/')
|
@@ -1012,9 +952,9 @@ class ObjectiveFunction(object):
|
|
1012
952
|
randvars = [x for x in self.none_handler(rdm_fit)]
|
1013
953
|
chol_names = [x for x in self.none_handler(rdm_cor_fit)]
|
1014
954
|
|
1015
|
-
zi_names = [x + ': inflated' for x in self.none_handler(self.zi_force_names)]
|
1016
955
|
|
1017
|
-
|
956
|
+
|
957
|
+
names = fixednames + randvars + chol_names + big_hetro + dispersion_name
|
1018
958
|
|
1019
959
|
names = np.array(names) # TODO check order
|
1020
960
|
self.print_transform = self.transform_id_names + \
|
@@ -1052,22 +992,8 @@ class ObjectiveFunction(object):
|
|
1052
992
|
if not isinstance(self.pvalues, np.ndarray):
|
1053
993
|
raise Exception
|
1054
994
|
|
1055
|
-
for i in range(len(self.coeff_)):
|
1056
|
-
signif = ""
|
1057
995
|
|
1058
|
-
if float(self.pvalues[i]) < 0.01:
|
1059
|
-
signif = "***"
|
1060
|
-
elif float(self.pvalues[i]) < 0.05:
|
1061
|
-
signif = "**"
|
1062
|
-
elif float(self.pvalues[i]) < 0.1:
|
1063
|
-
signif = "*"
|
1064
996
|
|
1065
|
-
'''
|
1066
|
-
print(fmt.format(self.coeff_names[i][:coeff_name_str_length], self.print_transform[i], self.coeff_[i],
|
1067
|
-
self.stderr[i], self.zvalues[i], self.pvalues[i],
|
1068
|
-
signif
|
1069
|
-
))
|
1070
|
-
'''
|
1071
997
|
if self.please_print or save_state:
|
1072
998
|
|
1073
999
|
if self.convergance is not None:
|
@@ -1175,14 +1101,7 @@ class ObjectiveFunction(object):
|
|
1175
1101
|
self.save_to_file(latextable.draw_latex(
|
1176
1102
|
table, caption=caption, caption_above=True), file_name)
|
1177
1103
|
|
1178
|
-
# print('change this')
|
1179
|
-
# df = pd.read_csv("artificial_mixed_corr_2023_MOOF.csv")
|
1180
|
-
|
1181
|
-
# updating the column value/data
|
1182
|
-
# df['Y'] = np.mean(self.lam, axis = (1,2))
|
1183
1104
|
|
1184
|
-
# writing into the file
|
1185
|
-
# df.to_csv("artificial_mixed_corr_2023_MOOF.csv", index=False)
|
1186
1105
|
|
1187
1106
|
def summary(self, model=None, algorithm=None, transform_list=None, long_print=0, solution=None):
|
1188
1107
|
"""
|
@@ -1540,22 +1459,9 @@ class ObjectiveFunction(object):
|
|
1540
1459
|
alpha_hetro = [
|
1541
1460
|
0 if x != 5 else 1 for x in vector[:self._characteristics]]
|
1542
1461
|
|
1543
|
-
if self.zi_force == True:
|
1544
1462
|
|
1545
|
-
return {
|
1546
|
-
'alpha': alpha,
|
1547
|
-
'alpha_rdm': alpha_rdm,
|
1548
|
-
'alpha_cor_rdm': alpha_cor_rdm,
|
1549
|
-
'alpha_grouped': alpha_grouped,
|
1550
|
-
'alpha_hetro': alpha_hetro,
|
1551
|
-
'distributions': distributions,
|
1552
|
-
'transformations': transformations,
|
1553
|
-
'exog_infl': self.zi_force_names,
|
1554
|
-
'dispersion': dispersion
|
1555
|
-
}
|
1556
1463
|
|
1557
|
-
|
1558
|
-
return {
|
1464
|
+
return {
|
1559
1465
|
'alpha': alpha,
|
1560
1466
|
'alpha_rdm': alpha_rdm,
|
1561
1467
|
'alpha_cor_rdm': alpha_cor_rdm,
|
@@ -1563,7 +1469,6 @@ class ObjectiveFunction(object):
|
|
1563
1469
|
'alpha_hetro': alpha_hetro,
|
1564
1470
|
'distributions': distributions,
|
1565
1471
|
'transformations': transformations,
|
1566
|
-
|
1567
1472
|
'dispersion': dispersion
|
1568
1473
|
}
|
1569
1474
|
|
@@ -1599,7 +1504,7 @@ class ObjectiveFunction(object):
|
|
1599
1504
|
|
1600
1505
|
def repair(self, vector, reduce_to_this=10000): # todo get the number of parameters
|
1601
1506
|
'Method to repair the model so that the number of paramaters is held within the constraint'
|
1602
|
-
|
1507
|
+
|
1603
1508
|
new_j = 0
|
1604
1509
|
# extract explanatory vector
|
1605
1510
|
prmVect = vector[:self._characteristics]
|
@@ -1618,7 +1523,6 @@ class ObjectiveFunction(object):
|
|
1618
1523
|
int(np.min((5, self.complexity_level - 1)))])
|
1619
1524
|
|
1620
1525
|
count_3 = prmVect.count(3)
|
1621
|
-
this_many = count_3 * (count_3 + 1) / 2
|
1622
1526
|
|
1623
1527
|
vector[:len(prmVect)] = prmVect.copy()
|
1624
1528
|
|
@@ -1637,8 +1541,7 @@ class ObjectiveFunction(object):
|
|
1637
1541
|
# b = sum(prmVect) + self.is_dispersion(vector[-1])
|
1638
1542
|
max_loops = 100 # Maximum number of loops
|
1639
1543
|
counter = 0 # Counter variable to keep track of the number of loops
|
1640
|
-
|
1641
|
-
raise Exception('fhfhfhf')
|
1544
|
+
|
1642
1545
|
|
1643
1546
|
while b > self._max_characteristics and counter < max_loops or b > reduce_to_this:
|
1644
1547
|
|
@@ -1686,8 +1589,6 @@ class ObjectiveFunction(object):
|
|
1686
1589
|
counter += 1
|
1687
1590
|
|
1688
1591
|
counter = 0
|
1689
|
-
if any(isinstance(num, int) and num < 0 for num in vector):
|
1690
|
-
raise Exception('fhfhfhf')
|
1691
1592
|
while b < self._min_characteristics and counter < max_loops:
|
1692
1593
|
|
1693
1594
|
weights = [1 if x == 0 else 0 for x in only_ints_vals]
|
@@ -1734,13 +1635,13 @@ class ObjectiveFunction(object):
|
|
1734
1635
|
cor_l = 0 if self.rdm_cor_fit is None else len(self.rdm_cor_fit)
|
1735
1636
|
Kh = 0 if self.hetro_fit is None else len(self.hetro_fit) + len(set(self.dist_hetro))
|
1736
1637
|
|
1737
|
-
|
1638
|
+
|
1738
1639
|
Kchol = int((cor_l *
|
1739
1640
|
(cor_l + 1)) / 2)
|
1740
1641
|
n_coeff = Kf + Kr + cor_l + Kchol + Kr_b + Kh
|
1741
1642
|
if block:
|
1742
|
-
return [Kf, Kr, cor_l, Kr_b, Kchol, Kh
|
1743
|
-
return Kf, Kr, cor_l, Kr_b, Kchol, Kh
|
1643
|
+
return [Kf, Kr, cor_l, Kr_b, Kchol, Kh]
|
1644
|
+
return Kf, Kr, cor_l, Kr_b, Kchol, Kh
|
1744
1645
|
|
1745
1646
|
def find_index_of_block(self, lst, value):
|
1746
1647
|
cumulative_sum = 0
|
@@ -1821,8 +1722,7 @@ class ObjectiveFunction(object):
|
|
1821
1722
|
self.rdm_fit)):
|
1822
1723
|
raise Exception('pop wrong for id names')
|
1823
1724
|
|
1824
|
-
|
1825
|
-
# self.rdm_cor_fit.pop(self.name_deleter(idx))
|
1725
|
+
|
1826
1726
|
|
1827
1727
|
def get_value_to_delete(self, idx, dispersion):
|
1828
1728
|
block = self.get_num_params(True)
|
@@ -1858,8 +1758,7 @@ class ObjectiveFunction(object):
|
|
1858
1758
|
self.dist_fit.pop(cc[b] + len(self.rdm_fit))
|
1859
1759
|
self.transform_id_names.pop(
|
1860
1760
|
cc[b] + len(self.none_handler(self.fixed_fit)) + len(self.none_handler(self.rdm_fit)))
|
1861
|
-
|
1862
|
-
# self.rdm_cor_fit.pop(self.name_deleter(idx))
|
1761
|
+
|
1863
1762
|
|
1864
1763
|
def get_param_num(self, dispersion=0):
|
1865
1764
|
a = np.sum(self.get_num_params()) + \
|
@@ -1890,7 +1789,7 @@ class ObjectiveFunction(object):
|
|
1890
1789
|
return_violated_terms=0):
|
1891
1790
|
|
1892
1791
|
num_params = len(pvalues)
|
1893
|
-
Kf, Kr, Kc, Kr_b, Kchol, Kh
|
1792
|
+
Kf, Kr, Kc, Kr_b, Kchol, Kh = self.get_num_params()
|
1894
1793
|
|
1895
1794
|
vio_counts = 0
|
1896
1795
|
pvalues = np.array([float(string) for string in pvalues])
|
@@ -1915,18 +1814,14 @@ class ObjectiveFunction(object):
|
|
1915
1814
|
subpvalues[i] = 0
|
1916
1815
|
|
1917
1816
|
sum_k += Kr_b
|
1918
|
-
if Kchol > 0:
|
1919
|
-
cc = [i for i
|
1920
|
-
in range(len(self.rdm_cor_fit)) for j in range(i + 1)]
|
1921
|
-
|
1922
1817
|
lower_triangular = subpvalues[sum_k:sum_k + Kchol]
|
1923
1818
|
|
1924
|
-
|
1819
|
+
|
1925
1820
|
# initialize matrix with zeros
|
1926
|
-
matrix_alt = [[0] *
|
1821
|
+
matrix_alt = [[0] * Kc for _ in range(Kc)]
|
1927
1822
|
index = 0
|
1928
1823
|
|
1929
|
-
for i in range(
|
1824
|
+
for i in range(Kc):
|
1930
1825
|
for j in range(i + 1):
|
1931
1826
|
# fill in lower triangular entries
|
1932
1827
|
matrix_alt[i][j] = lower_triangular[index]
|
@@ -2414,17 +2309,9 @@ class ObjectiveFunction(object):
|
|
2414
2309
|
|
2415
2310
|
if obj_1 is not None:
|
2416
2311
|
obj_1['layout'] = vector.copy()
|
2417
|
-
# alpha, alpha_rdm, alpha_cor_rdm = self.modify(
|
2418
|
-
# obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'])
|
2419
|
-
# a = self.modifyn(model_mod)
|
2420
|
-
# vector = self.modify_vector(
|
2421
|
-
# vector, alpha, alpha_rdm, alpha_cor_rdm)
|
2422
2312
|
sub_vector = vector[:self._characteristics]
|
2423
2313
|
dispersion_parm = vector[-1]
|
2424
|
-
|
2425
|
-
num_parm = sum(sub_vector)
|
2426
|
-
else:
|
2427
|
-
num_parm = sum(sub_vector) + 1
|
2314
|
+
|
2428
2315
|
|
2429
2316
|
if not self.is_quanitifiable_num(obj_1[self._obj_1]):
|
2430
2317
|
obj_1[self._obj_1] = 10 ** 9
|
@@ -2457,7 +2344,7 @@ class ObjectiveFunction(object):
|
|
2457
2344
|
|
2458
2345
|
self.Last_Sol = obj_1.copy()
|
2459
2346
|
|
2460
|
-
|
2347
|
+
|
2461
2348
|
|
2462
2349
|
self.reset_sln()
|
2463
2350
|
if not self.is_quanitifiable_num(obj_1[self._obj_1]):
|
@@ -2495,7 +2382,7 @@ class ObjectiveFunction(object):
|
|
2495
2382
|
self.coeff_names = None
|
2496
2383
|
self.draws1 = None
|
2497
2384
|
self.coeff_ = None
|
2498
|
-
|
2385
|
+
|
2499
2386
|
self.bic = None
|
2500
2387
|
self.log_lik = None
|
2501
2388
|
self.pvalues = None
|
@@ -2589,13 +2476,13 @@ class ObjectiveFunction(object):
|
|
2589
2476
|
def set_defined_seed(self, seed):
|
2590
2477
|
print('Benchmaking test with Seed', seed)
|
2591
2478
|
np.random.seed(seed)
|
2592
|
-
|
2479
|
+
|
2593
2480
|
random.seed(seed)
|
2594
2481
|
|
2595
2482
|
def set_random_seed(self):
|
2596
2483
|
print('Imbdedding Seed', self._random_seed)
|
2597
2484
|
np.random.seed(self._random_seed)
|
2598
|
-
|
2485
|
+
|
2599
2486
|
random.seed(self._random_seed)
|
2600
2487
|
return self._random_seed
|
2601
2488
|
|
@@ -2720,85 +2607,9 @@ class ObjectiveFunction(object):
|
|
2720
2607
|
print(e)
|
2721
2608
|
print('f')
|
2722
2609
|
|
2723
|
-
def negbinom_gradients(r, p, k, a=None): # TODO: delete if wrong
|
2724
|
-
"""_summary_
|
2725
|
-
|
2726
|
-
Args:
|
2727
|
-
r (_type_): rate paramaters or dispersion of the nb
|
2728
|
-
p (_type_): probability
|
2729
|
-
k (_type_): vector of (non-negative integer) quantiles.
|
2730
|
-
a (_type_, optional): optional paramater, if none NB model, otherwise NB-Lindley model with Lindley paramater a.
|
2731
|
-
|
2732
|
-
Raises:
|
2733
|
-
Exception: _description_
|
2734
|
-
Exception: _description_
|
2735
|
-
ValueError: _description_
|
2736
|
-
Exception: _description_
|
2737
|
-
Exception: _description_
|
2738
2610
|
|
2739
|
-
Returns:
|
2740
|
-
_type_: _description_
|
2741
|
-
"""
|
2742
|
-
# fine the NegBinom PMF
|
2743
|
-
import scipy.special as sps
|
2744
|
-
negbinom_pmf = sps.comb(k + r - 1, k) * p ** r * (1 - p) ** k
|
2745
2611
|
|
2746
|
-
# Calculate the gradient of the NegBinom PMF with respect to r and p
|
2747
|
-
d_negbinom_pmf_dr = sps.comb(
|
2748
|
-
k + r - 1, k) * (np.log(p) - sps.digamma(r)) * p ** r * (1 - p) ** k
|
2749
|
-
d_negbinom_pmf_dp = sps.comb(
|
2750
|
-
k + r - 1, k) * (r / p - (k + r) / (1 - p)) * p ** r * (1 - p) ** k
|
2751
2612
|
|
2752
|
-
if a is not None:
|
2753
|
-
# Define the NegBinom-Lindley PMF
|
2754
|
-
negbinom_lindley_pmf = sps.comb(a + k - 1, k) * p ** r * (1 - p) ** k
|
2755
|
-
|
2756
|
-
# Calculate the gradient of the NegBinom-Lindley PMF with respect to r, p, and a
|
2757
|
-
d_negbinom_lindley_pmf_dr = sps.comb(
|
2758
|
-
a + k - 1, k) * (np.log(p) * p ** r * (1 - p) ** k)
|
2759
|
-
d_negbinom_lindley_pmf_dp = sps.comb(
|
2760
|
-
a + k - 1, k) * ((r / p - (k + r) / (1 - p)) * p ** r * (1 - p) ** k)
|
2761
|
-
d_negbinom_lindley_pmf_da = sps.comb(
|
2762
|
-
a + k - 1, k) * (-sps.digamma(a) + sps.digamma(a + k)) * p ** r * (1 - p) ** k
|
2763
|
-
|
2764
|
-
return [d_negbinom_pmf_dr, d_negbinom_pmf_dp], [d_negbinom_lindley_pmf_dr, d_negbinom_lindley_pmf_dp,
|
2765
|
-
d_negbinom_lindley_pmf_da]
|
2766
|
-
else:
|
2767
|
-
return [d_negbinom_pmf_dr, d_negbinom_pmf_dp]
|
2768
|
-
|
2769
|
-
def f(self, x, N, sig, mu):
|
2770
|
-
return norm.pdf(x, 0, 1) * poisson.pmf(N, np.exp(x * sig + mu))
|
2771
|
-
|
2772
|
-
def poilog(self, n, mu, sig):
|
2773
|
-
from scipy import integrate
|
2774
|
-
if len(mu) > 1 or len(sig) > 1:
|
2775
|
-
raise ValueError(
|
2776
|
-
"vectorization of mu and sig is currently not implemented")
|
2777
|
-
if any((n[n != 0] / np.trunc(n[n != 0])) != 1):
|
2778
|
-
raise ValueError("all n must be integers")
|
2779
|
-
if any(n < 0):
|
2780
|
-
raise ValueError("one or several values of n are negative")
|
2781
|
-
if not np.all(np.isfinite(np.concatenate((mu, sig)))):
|
2782
|
-
raise ValueError("all parameters should be finite")
|
2783
|
-
if sig <= 0:
|
2784
|
-
raise ValueError("sig is not larger than 0")
|
2785
|
-
spos = np.where(n < 8)[0]
|
2786
|
-
lpos = np.where(n >= 8)[0]
|
2787
|
-
val = np.empty_like(n)
|
2788
|
-
|
2789
|
-
if spos.size > 0:
|
2790
|
-
vali = np.empty(spos.size)
|
2791
|
-
for i in range(spos.size):
|
2792
|
-
try:
|
2793
|
-
vali[i] = integrate.quad(
|
2794
|
-
self.f, -np.inf, np.inf, sig, mu, args=(n[spos[i]],))[0]
|
2795
|
-
except:
|
2796
|
-
vali[i] = 1e-300
|
2797
|
-
valp = self.poilog(n[spos], mu, sig ** 2)[0]
|
2798
|
-
val[spos] = np.maximum(vali, valp)
|
2799
|
-
if lpos.size > 0:
|
2800
|
-
val[lpos] = self.poilog(n[lpos], mu, sig ** 2)[0]
|
2801
|
-
return val
|
2802
2613
|
|
2803
2614
|
def negbinom_pmf(self, r, p, k, a=None): # TODO: delete if wrong
|
2804
2615
|
"""_summary_
|
@@ -2828,45 +2639,7 @@ class ObjectiveFunction(object):
|
|
2828
2639
|
negbinom_lindley_pmf = sc.comb(a + k - 1, k) * p ** r * (1 - p) ** k
|
2829
2640
|
return negbinom_lindley_pmf
|
2830
2641
|
|
2831
|
-
def nbl_score(self, y, X, betas, alpha, theta):
|
2832
|
-
from scipy.special import gammaln, psi
|
2833
|
-
"""
|
2834
|
-
Calculate the Negative Binomial-lindley model score vector of the log-likelihood.
|
2835
|
-
|
2836
|
-
Parameters:
|
2837
|
-
-----------
|
2838
|
-
y : numpy array
|
2839
|
-
The dependent variable of the model.
|
2840
|
-
X : numpy array
|
2841
|
-
The independent variables of the model.
|
2842
|
-
beta : numpy array
|
2843
|
-
The coefficients of the model.
|
2844
|
-
alpha : float
|
2845
|
-
The dispersion parameter of the Negative Binomial-lindley distribution.
|
2846
|
-
theta : float
|
2847
|
-
The theta parameter of the Negative Binomial-lindley distribution.
|
2848
|
-
|
2849
|
-
Returns:
|
2850
|
-
--------
|
2851
|
-
score : numpy array
|
2852
|
-
The score vector of the Negative Binomial-lindley model log-likelihood.
|
2853
|
-
"""
|
2854
|
-
alpha = betas[-1]
|
2855
|
-
theta = betas[-2]
|
2856
|
-
beta = betas[:-2]
|
2857
|
-
zi = self.my_lindley(y, theta).ravel()
|
2858
|
-
|
2859
|
-
eta = np.dot(X, beta)
|
2860
|
-
mu = np.exp(eta) * zi
|
2861
|
-
p = 1 / (1 + mu * theta / alpha)
|
2862
|
-
q = 1 - p
|
2863
|
-
score = np.zeros(len(betas))
|
2864
2642
|
|
2865
|
-
for i in range(len(y)):
|
2866
|
-
score += (psi(y[i] + zi[i] * p[i]) - psi(alpha * p[i]) + np.log(zi[i])
|
2867
|
-
- np.log(1 + zi * mu[i] / alpha)) * X[i, :]
|
2868
|
-
|
2869
|
-
return score
|
2870
2643
|
|
2871
2644
|
def poisson_lognormal_glm_score(self, betas, Y, X, sigma, tau=1e-6):
|
2872
2645
|
"""
|
@@ -2909,7 +2682,7 @@ class ObjectiveFunction(object):
|
|
2909
2682
|
grad_n = np.concatenate((grad_n_sub, grad_n_sub1), axis=1)
|
2910
2683
|
return gradient, grad_n
|
2911
2684
|
|
2912
|
-
def NB_Score(self, params, y, mu, X, Q=0, obs_specific=False):
|
2685
|
+
def NB_Score(self, params, y, mu, X, Q=0, obs_specific=False, alpha = None):
|
2913
2686
|
"""
|
2914
2687
|
Negative Binomial model score (gradient) vector of the log-likelihood
|
2915
2688
|
Parameters
|
@@ -2928,10 +2701,48 @@ class ObjectiveFunction(object):
|
|
2928
2701
|
|
2929
2702
|
|
2930
2703
|
"""
|
2704
|
+
#print('delete this later')
|
2705
|
+
if alpha is None:
|
2706
|
+
alpha = params[-1]
|
2707
|
+
# Calculate common terms
|
2708
|
+
'''
|
2709
|
+
n = len(y)
|
2710
|
+
n, p, d = X.shape # n: observations, p: panels (1 in your case), d: explanatory variables
|
2931
2711
|
|
2932
|
-
|
2712
|
+
# Flatten the data since there's only one panel, simplifying the operations
|
2713
|
+
X_flat = X.reshape(n * p, d)
|
2714
|
+
y_flat = y.flatten()
|
2715
|
+
mu_flat = mu.flatten()
|
2933
2716
|
|
2934
|
-
|
2717
|
+
# Prepare score array
|
2718
|
+
score = np.zeros(d + 1) # +1 for alpha
|
2719
|
+
|
2720
|
+
# Compute the gradient for regression coefficients
|
2721
|
+
for j in range(d): # Exclude the last parameter (alpha)
|
2722
|
+
score[j] = np.dot(X_flat[:, j], (y_flat - mu_flat))
|
2723
|
+
|
2724
|
+
# Compute the gradient for the dispersion parameter
|
2725
|
+
if obs_specific:
|
2726
|
+
# Adjust the calculation if observation-specific effects are considered
|
2727
|
+
sum_terms = (y_flat - mu_flat) ** 2 / (mu_flat + alpha * mu_flat ** 2) - (
|
2728
|
+
y_flat + mu_flat ** 2 / (mu_flat + alpha * mu_flat ** 2))
|
2729
|
+
score[-1] = np.sum(sum_terms)
|
2730
|
+
else:
|
2731
|
+
# Standard calculation
|
2732
|
+
sum_terms = (y_flat - mu_flat) ** 2 / (mu_flat + alpha * mu_flat ** 2) - (
|
2733
|
+
y_flat + mu_flat ** 2 / (mu_flat + alpha * mu_flat ** 2))
|
2734
|
+
score[-1] = np.sum(sum_terms)
|
2735
|
+
return score
|
2736
|
+
'''
|
2737
|
+
#return score
|
2738
|
+
|
2739
|
+
|
2740
|
+
|
2741
|
+
|
2742
|
+
|
2743
|
+
try:
|
2744
|
+
if alpha is None:
|
2745
|
+
alpha = params[-1]
|
2935
2746
|
a1 = 1 / alpha * mu ** Q
|
2936
2747
|
prob = a1 / (a1 + mu)
|
2937
2748
|
exog = X
|
@@ -2973,7 +2784,8 @@ class ObjectiveFunction(object):
|
|
2973
2784
|
return np.concatenate((dparams, dalpha),
|
2974
2785
|
axis=1)
|
2975
2786
|
except Exception as e:
|
2976
|
-
print(
|
2787
|
+
print(e)
|
2788
|
+
print('NB score exception problem..')
|
2977
2789
|
exc_type, exc_obj, exc_tb = sys.exc_info()
|
2978
2790
|
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
|
2979
2791
|
print(exc_type, fname, exc_tb.tb_lineno)
|
@@ -3640,7 +3452,7 @@ class ObjectiveFunction(object):
|
|
3640
3452
|
# prob = 1/(1+mu*alpha)
|
3641
3453
|
try:
|
3642
3454
|
# print(np.shape(y),np.shape(size), np.shape(prob))
|
3643
|
-
|
3455
|
+
gg2 = self.negbinom_pmf(alpha_size, size/(size+mu), y)
|
3644
3456
|
|
3645
3457
|
gg = np.exp(
|
3646
3458
|
gammaln(y + alpha) - gammaln(y + 1) - gammaln(alpha) + y * np.log(mu) + alpha * np.log(alpha) - (
|
@@ -3798,21 +3610,8 @@ class ObjectiveFunction(object):
|
|
3798
3610
|
|
3799
3611
|
if dispersion == 1 or dispersion == 4: # nb
|
3800
3612
|
# if model_nature is not None and 'dispersion_penalty' in model_nature:
|
3801
|
-
# if b_gam < 0.8*model_nature['dispersion_penalty']:
|
3802
|
-
# penalty += model_nature['dispersion_penalty'] -b_gam
|
3803
3613
|
|
3804
|
-
# if abs(b_gam) < 0.01:
|
3805
|
-
# penalty += 1/np.abs(b_gam)
|
3806
3614
|
|
3807
|
-
if b_gam >= 4.5:
|
3808
|
-
penalty += b_gam
|
3809
|
-
b_gam = 4.61
|
3810
|
-
# b_gam = 7.9
|
3811
|
-
# penalty += model_nature['dispersion_penalty'] -b_gam
|
3812
|
-
# penalty += 1/np.max((0.01,abs(b_gam)))
|
3813
|
-
# b_gam = model_nature['dispersion_penalty']
|
3814
|
-
|
3815
|
-
"""
|
3816
3615
|
if b_gam <= 0:
|
3817
3616
|
#penalty += 100
|
3818
3617
|
#penalty += abs(b_gam)
|
@@ -3820,21 +3619,21 @@ class ObjectiveFunction(object):
|
|
3820
3619
|
#b_gam = 1
|
3821
3620
|
|
3822
3621
|
# if b_gam < 0.03:
|
3823
|
-
penalty +=
|
3622
|
+
penalty += min(1, np.abs(b_gam))
|
3824
3623
|
|
3825
|
-
b_gam = 0.
|
3624
|
+
b_gam = 0.001
|
3826
3625
|
#
|
3827
3626
|
|
3828
|
-
if b_gam >= 10:
|
3829
|
-
|
3627
|
+
#if b_gam >= 10:
|
3628
|
+
# penalty+= b_gam
|
3830
3629
|
|
3831
|
-
|
3832
|
-
b_gam = min_comp_val
|
3630
|
+
# if b_gam == 0:
|
3631
|
+
#b_gam = min_comp_val
|
3833
3632
|
#b_gam = 0.03
|
3834
3633
|
|
3835
|
-
|
3634
|
+
# b_gam = abs(b_gam)
|
3836
3635
|
|
3837
|
-
|
3636
|
+
|
3838
3637
|
|
3839
3638
|
elif dispersion == 2:
|
3840
3639
|
if b_gam >= 1:
|
@@ -3918,195 +3717,7 @@ class ObjectiveFunction(object):
|
|
3918
3717
|
# np.sum(stats.poisson.logpmf(endog, np.exp(XB)))
|
3919
3718
|
return -np.exp(XB) + y * XB - sc.gammaln(y + 1)
|
3920
3719
|
|
3921
|
-
def loglik_zi(params, return_grad=False):
|
3922
|
-
"""
|
3923
|
-
Loglikelihood for observations of Generic Zero Inflated model.
|
3924
|
-
|
3925
|
-
Parameters
|
3926
|
-
----------
|
3927
|
-
params : array_like
|
3928
|
-
The parameters of the model.
|
3929
|
-
|
3930
|
-
Returns
|
3931
|
-
-------
|
3932
|
-
loglike : ndarray
|
3933
|
-
The log likelihood for each observation of the model evaluated
|
3934
|
-
at `params`. See Notes for definition.
|
3935
|
-
|
3936
|
-
Notes
|
3937
|
-
-----
|
3938
|
-
.. math:: \\ln L=\\ln(w_{i}+(1-w_{i})*P_{main\\_model})+
|
3939
|
-
\\ln(1-w_{i})+L_{main\\_model}
|
3940
|
-
where P - pdf of main model, L - loglike function of main model.
|
3941
3720
|
|
3942
|
-
for observations :math:`i=1,...,n`
|
3943
|
-
"""
|
3944
|
-
params_infl = params[:self.k_inflate]
|
3945
|
-
params_main = params[self.k_inflate:]
|
3946
|
-
|
3947
|
-
y = self.endog
|
3948
|
-
w = predict_logit(params_infl, exog_infl)
|
3949
|
-
|
3950
|
-
w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
|
3951
|
-
llf_main = loglik_obs_poisson(params_main, y)
|
3952
|
-
dispersion = 0
|
3953
|
-
b_gam = None
|
3954
|
-
Xd = exog
|
3955
|
-
eta = np.tile(np.dot(Xd, params_main), (1, 1)).transpose()
|
3956
|
-
eVd = np.exp(np.clip(eta, None, EXP_UPPER_LIMIT))
|
3957
|
-
|
3958
|
-
llf_main_og = self.loglik_obs(y, eVd.ravel(), dispersion, b_gam)
|
3959
|
-
zero_idx = np.nonzero(y == 0)[0]
|
3960
|
-
nonzero_idx = np.nonzero(y)[0] # type: ignore
|
3961
|
-
|
3962
|
-
llf = np.zeros_like(y, dtype=np.float64)
|
3963
|
-
llf[zero_idx] = (np.log(w[zero_idx] +
|
3964
|
-
(1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
|
3965
|
-
llf[nonzero_idx] = np.log(
|
3966
|
-
1 - w[nonzero_idx]) + llf_main[nonzero_idx]
|
3967
|
-
if return_grad:
|
3968
|
-
score_main = Xd.T @ (y - eVd.ravel())
|
3969
|
-
L = np.exp(np.dot(Xd, params_main))
|
3970
|
-
score_main = (self.endog - L)[:, None] * Xd
|
3971
|
-
|
3972
|
-
dldp = np.zeros(
|
3973
|
-
(exog.shape[0], len(params_main)), dtype=np.float64)
|
3974
|
-
dldw = np.zeros_like(exog_infl, dtype=np.float64)
|
3975
|
-
|
3976
|
-
dldp[zero_idx, :] = (score_main[zero_idx].T *
|
3977
|
-
(1 - (w[zero_idx]) / np.exp(llf[zero_idx]))).T
|
3978
|
-
dldp[nonzero_idx, :] = score_main[nonzero_idx]
|
3979
|
-
|
3980
|
-
dldw[zero_idx, :] = (exog_infl[zero_idx].T * w[zero_idx] *
|
3981
|
-
(1 - w[zero_idx]) *
|
3982
|
-
(1 - np.exp(llf_main[zero_idx])) /
|
3983
|
-
np.exp(llf[zero_idx])).T
|
3984
|
-
dldw[nonzero_idx, :] = -(exog_infl[nonzero_idx].T *
|
3985
|
-
w[nonzero_idx]).T
|
3986
|
-
|
3987
|
-
return llf, np.hstack((dldw, dldp)).sum(axis=0)
|
3988
|
-
|
3989
|
-
else:
|
3990
|
-
|
3991
|
-
return llf
|
3992
|
-
|
3993
|
-
def zipoisson_logpmf(x, mu, w):
|
3994
|
-
return _lazywhere(x != 0, (x, mu, w),
|
3995
|
-
(lambda x, mu, w: np.log(1. - w) + x * np.log(mu) -
|
3996
|
-
sc.gammaln(x + 1.) - mu),
|
3997
|
-
np.log(w + (1. - w) * np.exp(-mu)))
|
3998
|
-
|
3999
|
-
def zipoisson_pmf(x, mu, w):
|
4000
|
-
return np.exp(zipoisson_logpmf(x, mu, w))
|
4001
|
-
|
4002
|
-
def loglik_logit(params, endog_y, exog_x): # this is predict I think
|
4003
|
-
q = 2 * endog_y - 1
|
4004
|
-
X = exog_x
|
4005
|
-
return np.sum(np.log(cdf(q * np.dot(X, params))))
|
4006
|
-
|
4007
|
-
def predict_logit(params, exog=None, linear=False):
|
4008
|
-
if exog is None:
|
4009
|
-
exog = self.exog
|
4010
|
-
if not linear:
|
4011
|
-
return (cdf(np.dot(exog, params)))
|
4012
|
-
else:
|
4013
|
-
return (np.dot(exog, params))
|
4014
|
-
|
4015
|
-
def cdf(X):
|
4016
|
-
"""
|
4017
|
-
The logistic cumulative distribution function
|
4018
|
-
|
4019
|
-
Parameters
|
4020
|
-
----------
|
4021
|
-
X : array_like
|
4022
|
-
`X` is the linear predictor of the logit model. See notes.
|
4023
|
-
|
4024
|
-
Returns
|
4025
|
-
-------
|
4026
|
-
1/(1 + exp(-X))
|
4027
|
-
|
4028
|
-
Notes
|
4029
|
-
-----
|
4030
|
-
In the logit model,
|
4031
|
-
|
4032
|
-
.. math:: \\Lambda\\left(x^{\\prime}\\beta\\right)=
|
4033
|
-
\\text{Prob}\\left(Y=1|x\\right)=
|
4034
|
-
\\frac{e^{x^{\\prime}\\beta}}{1+e^{x^{\\prime}\\beta}}
|
4035
|
-
"""
|
4036
|
-
X = np.asarray(X)
|
4037
|
-
return 1 / (1 + np.exp(-X))
|
4038
|
-
|
4039
|
-
llobs, grad = loglik_zi(betas, return_grad)
|
4040
|
-
llf = np.sum(llobs)
|
4041
|
-
if return_grad:
|
4042
|
-
return -llf, -grad
|
4043
|
-
else:
|
4044
|
-
return -llf
|
4045
|
-
|
4046
|
-
def cdf_logit(self, X):
|
4047
|
-
"""
|
4048
|
-
The logistic cumulative distribution function
|
4049
|
-
|
4050
|
-
Parameters
|
4051
|
-
----------
|
4052
|
-
X : array_like
|
4053
|
-
`X` is the linear predictor of the logit model. See notes.
|
4054
|
-
|
4055
|
-
Returns
|
4056
|
-
-------
|
4057
|
-
1/(1 + exp(-X))
|
4058
|
-
|
4059
|
-
Notes
|
4060
|
-
-----
|
4061
|
-
In the logit model,
|
4062
|
-
|
4063
|
-
.. math:: \\Lambda\\left(x^{\\prime}\\beta\\right)=
|
4064
|
-
\\text{Prob}\\left(Y=1|x\\right)=
|
4065
|
-
\\frac{e^{x^{\\prime}\\beta}}{1+e^{x^{\\prime}\\beta}}
|
4066
|
-
"""
|
4067
|
-
X = np.asarray(X)
|
4068
|
-
return 1 / (1 + np.exp(-X))
|
4069
|
-
|
4070
|
-
def predict_logit_part(self, params_infl, exog_infl, linear=False):
|
4071
|
-
|
4072
|
-
if not linear:
|
4073
|
-
return (self.cdf_logit(np.dot(exog_infl, params_infl)))
|
4074
|
-
else:
|
4075
|
-
return (np.dot(exog_infl, params_infl))
|
4076
|
-
|
4077
|
-
def ZeroInflate_W_setup(self, exog_infl, y, params_infl):
|
4078
|
-
|
4079
|
-
w = self.predict_logit_part(params_infl, exog_infl)
|
4080
|
-
|
4081
|
-
w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
|
4082
|
-
|
4083
|
-
llf_main = [1, 2, 3] # TODO ge
|
4084
|
-
zero_idx = np.nonzero(y == 0)[0]
|
4085
|
-
nonzero_idx = np.nonzero(y)[0]
|
4086
|
-
|
4087
|
-
llf = np.zeros_like(y, dtype=np.float64)
|
4088
|
-
llf[zero_idx] = (np.log(w[zero_idx] +
|
4089
|
-
(1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
|
4090
|
-
llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
|
4091
|
-
|
4092
|
-
return llf
|
4093
|
-
|
4094
|
-
def dPXL(self, x, alpha):
|
4095
|
-
return ((alpha ** 2) * (x + 3 * alpha + (alpha ** 2) + 3)) / (1 + alpha) ** (4 + x)
|
4096
|
-
|
4097
|
-
# Define the gradient function
|
4098
|
-
|
4099
|
-
def poisson_lindley_gradient(self, params, exog, endog):
|
4100
|
-
beta = params[-1]
|
4101
|
-
mu = np.exp(np.dot(exog, params[:-1]))
|
4102
|
-
q = beta / (1 + beta)
|
4103
|
-
d_beta = (endog.ravel() + 1) / (mu + endog.ravel() + 1) - q / (1 - q)
|
4104
|
-
d_beta = self.dpoisl(endog, beta).ravel()
|
4105
|
-
d_mu = np.dot((endog - mu) * (1 - q) / (mu + endog + 1), exog)
|
4106
|
-
|
4107
|
-
grad_n = np.concatenate((d_mu, np.atleast_2d(d_beta).T), axis=1)
|
4108
|
-
der = np.sum(grad_n, axis=0)
|
4109
|
-
return der, grad_n
|
4110
3721
|
|
4111
3722
|
def dpoisl(self, x, theta, log=False):
|
4112
3723
|
# if theta < 0:
|
@@ -4175,7 +3786,8 @@ class ObjectiveFunction(object):
|
|
4175
3786
|
elif dispersion == 1:
|
4176
3787
|
|
4177
3788
|
proba_r = self._nonlog_nbin(y, eVd, b_gam)
|
4178
|
-
|
3789
|
+
|
3790
|
+
|
4179
3791
|
# proba_d = self.dnegbimonli(y, eVd, b_gam )
|
4180
3792
|
# print('fuck if this actually works')
|
4181
3793
|
|
@@ -4183,21 +3795,9 @@ class ObjectiveFunction(object):
|
|
4183
3795
|
|
4184
3796
|
proba_r = self.general_poisson_pmf(eVd, y, b_gam)
|
4185
3797
|
|
4186
|
-
elif dispersion == 3:
|
4187
|
-
fa, ba = self.get_dispersion_paramaters(betas, dispersion)
|
4188
|
-
zi = self.my_lindley(y, ba)
|
4189
|
-
proba_r = poisson.pmf(y, zi * eVd.ravel())
|
4190
|
-
# proba_r = self.lindl_pmf_chatgpt(y, l_pam)
|
4191
|
-
# prob_2 = self.dpoisl(y, l_pam)
|
4192
|
-
# proba_r = self.poisson_lindley_pmf(eVd, l_pam, y)
|
4193
|
-
# proba_r = self.poisson_lindley_pmf(eVd, l_pam, y)
|
4194
|
-
# print(1)
|
4195
|
-
# proba_r = self.dpoisl(y, eVd)
|
4196
3798
|
|
4197
|
-
|
4198
|
-
|
4199
|
-
self.zi = self.my_lindley(eVd, ba)
|
4200
|
-
proba_r = self._nonlog_nbin(y, eVd + self.zi, b_gam)
|
3799
|
+
|
3800
|
+
|
4201
3801
|
# proba_r = self._nonlog_nbin_lindley(y, eVd, fa, ba)
|
4202
3802
|
|
4203
3803
|
elif dispersion == 'poisson_lognormal':
|
@@ -4219,7 +3819,7 @@ class ObjectiveFunction(object):
|
|
4219
3819
|
proba_p = self._prob_product_across_panels(
|
4220
3820
|
proba_r, self.panel_info)
|
4221
3821
|
proba_r = proba_p
|
4222
|
-
proba_r = np.clip(proba_r, min_comp_val,
|
3822
|
+
proba_r = np.clip(proba_r, min_comp_val, max_comp_val)
|
4223
3823
|
loglik = np.log(proba_r)
|
4224
3824
|
return loglik
|
4225
3825
|
|
@@ -4267,7 +3867,7 @@ class ObjectiveFunction(object):
|
|
4267
3867
|
# if (len(betas) -Kf-Kr-self.is_dispersion(dispersion)) != (Kchol + Kr):
|
4268
3868
|
|
4269
3869
|
# gr_f, gr_u, gr_s = np.zeros((N, Kf)), np.zeros((N, Kr)), np.zeros((N, len(betas) -Kf-Kr-self.is_dispersion(dispersion))) #FIX
|
4270
|
-
Kf2, Kr, Kc, Kr_b, Kchol, Kh
|
3870
|
+
Kf2, Kr, Kc, Kr_b, Kchol, Kh = self.get_num_params()
|
4271
3871
|
|
4272
3872
|
gr_f, gr_u, gr_s = np.zeros((N, Kf)), np.zeros(
|
4273
3873
|
(N, Kr + Kc)), np.zeros((N, Kchol + Kr_b))
|
@@ -4521,9 +4121,9 @@ class ObjectiveFunction(object):
|
|
4521
4121
|
|
4522
4122
|
elif dispersion == 1:
|
4523
4123
|
|
4524
|
-
der =
|
4124
|
+
der = self.NB_Score(betas, y, eVd, Xd, 0, obs_specific)
|
4525
4125
|
if both:
|
4526
|
-
grad_n =
|
4126
|
+
grad_n = self.NB_Score(betas, y, eVd, Xd, 0, True)
|
4527
4127
|
return np.nan_to_num(der, nan=200, posinf=200, neginf=-200), np.nan_to_num(grad_n, nan=140, posinf=140,
|
4528
4128
|
neginf=-140)
|
4529
4129
|
|
@@ -4716,8 +4316,10 @@ class ObjectiveFunction(object):
|
|
4716
4316
|
return self._loglik_gradient(self, betas, *stuff)
|
4717
4317
|
|
4718
4318
|
def get_br_and_bstd(betas, self):
|
4719
|
-
Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh
|
4720
|
-
|
4319
|
+
Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh = self.get_num_params()
|
4320
|
+
Kr = Kr_a + Kr_c #todo check if this works
|
4321
|
+
print('check if this works')
|
4322
|
+
br = betas[Kf_a:Kf_a + Kr]
|
4721
4323
|
# Calculate the size of the br matrix
|
4722
4324
|
br_size = int((1 + np.sqrt(1 + 8 * Kr_b_a)) / 2)
|
4723
4325
|
|
@@ -4728,7 +4330,7 @@ class ObjectiveFunction(object):
|
|
4728
4330
|
index = 0
|
4729
4331
|
for i in range(br_size):
|
4730
4332
|
for j in range(i, br_size):
|
4731
|
-
br_std[j, i] = betas[
|
4333
|
+
br_std[j, i] = betas[Kf_a + Kr + index]
|
4732
4334
|
index += 1
|
4733
4335
|
|
4734
4336
|
brstd = br_std
|
@@ -4767,7 +4369,7 @@ class ObjectiveFunction(object):
|
|
4767
4369
|
self.n_obs = len(y) # feeds into gradient
|
4768
4370
|
if draws is None and draws_grouped is None and (
|
4769
4371
|
'draws_hetro' not in model_nature or model_nature.get('draws_hetro').shape[1] == 0):
|
4770
|
-
|
4372
|
+
#TODO do i shuffle the draws
|
4771
4373
|
if type(Xd) == dict:
|
4772
4374
|
N, Kf, P = 0, 0, 0
|
4773
4375
|
for key in Xd:
|
@@ -4775,13 +4377,13 @@ class ObjectiveFunction(object):
|
|
4775
4377
|
P += Xd[key].shape[1]
|
4776
4378
|
Kf += Xd[key].shape[2]
|
4777
4379
|
else:
|
4778
|
-
self.naming_for_printing(betas, 1, dispersion,
|
4380
|
+
self.naming_for_printing(betas, 1, dispersion, model_nature=model_nature)
|
4779
4381
|
N, P, Kf = Xd.shape[0], Xd.shape[1], Xd.shape[2]
|
4780
4382
|
betas = np.array(betas)
|
4781
4383
|
Bf = betas[0:Kf] # Fixed betas
|
4782
4384
|
|
4783
4385
|
main_disper, lindley_disp = self.get_dispersion_paramaters(
|
4784
|
-
betas, dispersion)
|
4386
|
+
betas, dispersion) #todo fix this up
|
4785
4387
|
if lindley_disp is not None:
|
4786
4388
|
if lindley_disp <= 0:
|
4787
4389
|
penalty += 1
|
@@ -4805,32 +4407,16 @@ class ObjectiveFunction(object):
|
|
4805
4407
|
llf_main = self.loglik_obs(
|
4806
4408
|
y, eVd, dispersion, main_disper, lindley_disp, betas)
|
4807
4409
|
|
4808
|
-
|
4410
|
+
llf_main = np.clip(llf_main, log_lik_min, log_lik_max)
|
4809
4411
|
|
4810
4412
|
loglik = llf_main.sum()
|
4811
|
-
if 'exog_infl' in model_nature:
|
4812
|
-
params_infl = betas[Kf:Kf + len(model_nature.get('exog_infl'))]
|
4813
|
-
params_main = Bf
|
4814
|
-
# ones = np.ones((model_nature.get('exog_inflX').shape[0], model_nature.get('exog_inflX').shape[1], 1))
|
4815
|
-
# exog_infl = np.concatenate((ones, model_nature.get('exog_inflX')), axis =2 )
|
4816
|
-
exog_infl = model_nature.get('exog_inflX')
|
4817
|
-
llf_main = llf_main # TODO test this
|
4818
|
-
w = self.predict_logit_part(params_infl, exog_infl)
|
4819
|
-
|
4820
|
-
w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
|
4821
|
-
|
4822
|
-
zero_idx = np.nonzero(y == 0)[0]
|
4823
|
-
nonzero_idx = np.nonzero(y)[0] # FIXME should shape be unravelled
|
4824
4413
|
|
4825
|
-
llf = np.zeros_like(y, dtype=np.float64).reshape(-1, 1) # TODO test this i added ravel to this code
|
4826
|
-
llf[zero_idx] = (np.log(w[zero_idx] + (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
|
4827
|
-
llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
|
4828
|
-
loglik = llf.sum()
|
4829
4414
|
|
4830
4415
|
loglik = np.clip(loglik, log_lik_min, log_lik_max)
|
4831
4416
|
if self.power_up_ll:
|
4832
4417
|
|
4833
4418
|
loglik += 2*loglik
|
4419
|
+
print('am i powering up')
|
4834
4420
|
penalty = self.regularise_l2(betas)
|
4835
4421
|
|
4836
4422
|
if not np.isreal(loglik):
|
@@ -4851,7 +4437,7 @@ class ObjectiveFunction(object):
|
|
4851
4437
|
else:
|
4852
4438
|
return -loglik + penalty
|
4853
4439
|
# Else, we have draws
|
4854
|
-
self.n_obs = len(y) * self.Ndraws
|
4440
|
+
self.n_obs = len(y) * self.Ndraws #todo is this problematic
|
4855
4441
|
penalty += self._penalty_betas(
|
4856
4442
|
betas, dispersion, penalty, float(len(y) / 10.0))
|
4857
4443
|
|
@@ -4860,7 +4446,7 @@ class ObjectiveFunction(object):
|
|
4860
4446
|
# Kf =0
|
4861
4447
|
betas = np.array(betas)
|
4862
4448
|
betas = dev.to_gpu(betas) # TODO fix mepotnetially problem
|
4863
|
-
self.naming_for_printing(betas, 0, dispersion,
|
4449
|
+
self.naming_for_printing(betas, 0, dispersion, model_nature=model_nature)
|
4864
4450
|
y = dev.to_gpu(y)
|
4865
4451
|
if draws is not None and draws_grouped is not None:
|
4866
4452
|
draws = np.concatenate((draws_grouped, draws), axis=1)
|
@@ -4908,7 +4494,7 @@ class ObjectiveFunction(object):
|
|
4908
4494
|
# if (Kchol +Kr) != (len(betas) -Kf-Kr -self.is_dispersion(dispersion)):
|
4909
4495
|
# print('I think this is fine')
|
4910
4496
|
n_coeff = self.get_param_num(dispersion)
|
4911
|
-
Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh
|
4497
|
+
Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh = self.get_num_params()
|
4912
4498
|
if Kchol_a != Kchol:
|
4913
4499
|
print('hold')
|
4914
4500
|
|
@@ -4949,11 +4535,11 @@ class ObjectiveFunction(object):
|
|
4949
4535
|
# brstd), draws_) # Get random coefficients, old method
|
4950
4536
|
Br = self._transform_rand_betas(br,
|
4951
4537
|
brstd, draws_) # Get random coefficients
|
4952
|
-
self.naming_for_printing(betas, dispersion=dispersion,
|
4538
|
+
self.naming_for_printing(betas, dispersion=dispersion, model_nature=model_nature)
|
4953
4539
|
self.Br = Br.copy()
|
4954
4540
|
|
4955
4541
|
else:
|
4956
|
-
self.naming_for_printing(betas, dispersion=dispersion,
|
4542
|
+
self.naming_for_printing(betas, dispersion=dispersion, model_nature=model_nature)
|
4957
4543
|
chol_mat = self._chol_mat(
|
4958
4544
|
len(self.rdm_cor_fit), br, brstd, self.rdm_cor_fit)
|
4959
4545
|
self.chol_mat = chol_mat.copy()
|
@@ -5378,12 +4964,16 @@ class ObjectiveFunction(object):
|
|
5378
4964
|
return H
|
5379
4965
|
|
5380
4966
|
def _minimize(self, loglik_fn, x, args, method, tol, options, bounds=None, hess_calc=None):
|
5381
|
-
|
4967
|
+
#method = 'BFGS'
|
5382
4968
|
if method == "BFGS":
|
5383
4969
|
|
5384
4970
|
try:
|
4971
|
+
argbs = list(args)
|
5385
4972
|
|
5386
|
-
|
4973
|
+
argbs[7] = True
|
4974
|
+
argsb = tuple(argbs)
|
4975
|
+
a = self._bfgs(loglik_fn, x, args=argsb, tol=tol, **options)
|
4976
|
+
return self._bfgs(loglik_fn, x, args=args, tol=tol, **options)
|
5387
4977
|
|
5388
4978
|
except:
|
5389
4979
|
return minimize(loglik_fn, x, args=args, jac=args[6], method='BFGS', tol=tol, options=options)
|
@@ -5689,9 +5279,9 @@ class ObjectiveFunction(object):
|
|
5689
5279
|
betas_est - array. Coefficients which maximize the negative log-liklihood.
|
5690
5280
|
"""
|
5691
5281
|
# Set defualt method
|
5692
|
-
|
5693
|
-
|
5694
|
-
|
5282
|
+
#TODO, the inital fit worked but it throws
|
5283
|
+
|
5284
|
+
|
5695
5285
|
|
5696
5286
|
sol = Solution()
|
5697
5287
|
log_ll = 10.0 ** 9
|
@@ -5706,10 +5296,7 @@ class ObjectiveFunction(object):
|
|
5706
5296
|
if self.hess_yes == False:
|
5707
5297
|
method2 = 'BFGS_2'
|
5708
5298
|
method2 = self.method_ll
|
5709
|
-
# method2 = 'BFGS_2'
|
5710
5299
|
|
5711
|
-
# method2 = 'BFGS_2'
|
5712
|
-
# method2 = 'dogleg'
|
5713
5300
|
bic = None
|
5714
5301
|
pvalue_alt = None
|
5715
5302
|
zvalues = None
|
@@ -5727,7 +5314,7 @@ class ObjectiveFunction(object):
|
|
5727
5314
|
|
5728
5315
|
dispersion_param_num = self.is_dispersion(dispersion)
|
5729
5316
|
|
5730
|
-
paramNum = self.get_param_num(dispersion)
|
5317
|
+
#paramNum = self.get_param_num(dispersion)
|
5731
5318
|
self.no_random_paramaters = 0
|
5732
5319
|
if 'XG' in mod:
|
5733
5320
|
XX = np.concatenate((mod.get('X'), mod.get('XG'), mod.get('Xr'), mod.get('XH')), axis=2)
|
@@ -5753,7 +5340,7 @@ class ObjectiveFunction(object):
|
|
5753
5340
|
XX_test = mod.get('Xr_test')
|
5754
5341
|
|
5755
5342
|
bb = np.random.uniform(
|
5756
|
-
-0.05, 0.05, size=k + kr + kg + kh + dispersion_param_num
|
5343
|
+
-0.05, 0.05, size=k + kr + kg + kh + dispersion_param_num)
|
5757
5344
|
|
5758
5345
|
if method == 'L-BFGS-B':
|
5759
5346
|
if dispersion == 0:
|
@@ -5787,10 +5374,12 @@ class ObjectiveFunction(object):
|
|
5787
5374
|
hess_est = False if method2 in ['L-BFGS-B', 'BFGS_2', 'Nelder-Mead-BFGS'] else True
|
5788
5375
|
initial_beta = self._minimize(self._loglik_gradient, bb,
|
5789
5376
|
args=(XX, y, None, None, None, None, calc_gradient, hess_est,
|
5790
|
-
dispersion, 0, False, 0, None,
|
5377
|
+
dispersion, 0, False, 0, None, None, None, None, None,
|
5791
5378
|
mod),
|
5792
5379
|
method=method2, tol=1e-5, options={'gtol': tol['gtol']},
|
5793
5380
|
bounds=bounds)
|
5381
|
+
print(1)
|
5382
|
+
|
5794
5383
|
|
5795
5384
|
if method2 == 'L-BFGS-B':
|
5796
5385
|
if hasattr(initial_beta.hess_inv, 'todense'):
|
@@ -5803,7 +5392,7 @@ class ObjectiveFunction(object):
|
|
5803
5392
|
if initial_beta is not None and np.isnan(initial_beta['fun']):
|
5804
5393
|
initial_beta = self._minimize(self._loglik_gradient, bb,
|
5805
5394
|
args=(XX, y, None, None, None, None, True, True, dispersion,
|
5806
|
-
0, False, 0, None,
|
5395
|
+
0, False, 0, None, None, None, None, None, mod),
|
5807
5396
|
method=method2, tol=tol['ftol'], options={'gtol': tol['gtol']})
|
5808
5397
|
|
5809
5398
|
if initial_beta is not None and not np.isnan(initial_beta['fun']):
|
@@ -5827,24 +5416,24 @@ class ObjectiveFunction(object):
|
|
5827
5416
|
loglik=log_ll_fixed, num_parm=paramNum, GOF=other_measures)
|
5828
5417
|
|
5829
5418
|
self.naming_for_printing(
|
5830
|
-
initial_beta['x'], 1, dispersion,
|
5419
|
+
initial_beta['x'], 1, dispersion, model_nature=mod)
|
5831
5420
|
|
5832
5421
|
if self.is_multi:
|
5833
5422
|
in_sample_mae = self.validation(
|
5834
5423
|
initial_beta['x'], mod.get('y'), mod.get('X'), dispersion=dispersion,
|
5835
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
5424
|
+
rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0,
|
5836
5425
|
testing=0)
|
5837
5426
|
|
5838
5427
|
sol.add_objective(TRAIN=in_sample_mae)
|
5839
5428
|
MAE_out = self.validation(
|
5840
5429
|
initial_beta['x'], mod.get('y_test'), mod.get('X_test'), dispersion=dispersion,
|
5841
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
5430
|
+
rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0)
|
5842
5431
|
sol.add_objective(TEST=MAE_out)
|
5843
5432
|
|
5844
5433
|
if self.val_percentage >0:
|
5845
5434
|
MAE_VAL = self.validation(
|
5846
5435
|
initial_beta['x'], mod.get('y_test'), mod.get('X_test'), dispersion=dispersion,
|
5847
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
5436
|
+
rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0,
|
5848
5437
|
validation=1)
|
5849
5438
|
sol.add_objective(VAL=MAE_VAL)
|
5850
5439
|
if sol[self._obj_1] <= self.best_obj_1:
|
@@ -5905,7 +5494,7 @@ class ObjectiveFunction(object):
|
|
5905
5494
|
b[-1] = .5
|
5906
5495
|
if method == 'L-BFGS-B' or method2 == 'L-BFGS-B':
|
5907
5496
|
|
5908
|
-
Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh
|
5497
|
+
Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh= self.get_num_params()
|
5909
5498
|
if Kh > 0:
|
5910
5499
|
Kh_e = mod.get('XH').shape[-1]
|
5911
5500
|
Kh_range = Kh - Kh_e
|
@@ -5949,9 +5538,6 @@ class ObjectiveFunction(object):
|
|
5949
5538
|
|
5950
5539
|
bounds.append((np.random.uniform(0.05, .15), bob2[count] + 7))
|
5951
5540
|
count += 1
|
5952
|
-
|
5953
|
-
|
5954
|
-
|
5955
5541
|
elif ii < jj:
|
5956
5542
|
if bob2[count] > 0:
|
5957
5543
|
|
@@ -6024,14 +5610,14 @@ class ObjectiveFunction(object):
|
|
6024
5610
|
mod['dispersion_penalty'] = np.abs(b[-1])
|
6025
5611
|
grad_args = (
|
6026
5612
|
X, y, draws, X, Xr, self.batch_size, False, False, dispersion, 0, False, 0, self.rdm_cor_fit,
|
6027
|
-
|
5613
|
+
None, None, draws_grouped, XG, mod)
|
6028
5614
|
# self.gradients_est_yes = (1, 1)
|
6029
5615
|
|
6030
5616
|
if draws is None and draws_hetro is not None:
|
6031
5617
|
print('hold')
|
6032
5618
|
betas_est = self._minimize(self._loglik_gradient, b, args=(
|
6033
5619
|
X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
|
6034
|
-
self.rdm_cor_fit,
|
5620
|
+
self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
|
6035
5621
|
method=method2, tol=tol['ftol'],
|
6036
5622
|
options={'gtol': tol['gtol']}, bounds=bounds,
|
6037
5623
|
hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
|
@@ -6050,7 +5636,7 @@ class ObjectiveFunction(object):
|
|
6050
5636
|
betas_est = self._minimize(self._loglik_gradient, b, args=(
|
6051
5637
|
X, y, draws, X, Xr, self.batch_size, False, False, dispersion, 0, False, 0,
|
6052
5638
|
self.rdm_cor_fit,
|
6053
|
-
|
5639
|
+
None, None, draws_grouped, XG, mod),
|
6054
5640
|
method=method2, tol=tol['ftol'],
|
6055
5641
|
options={'gtol': tol['gtol']})
|
6056
5642
|
|
@@ -6059,7 +5645,7 @@ class ObjectiveFunction(object):
|
|
6059
5645
|
|
6060
5646
|
if np.isfinite(betas_est['fun']):
|
6061
5647
|
self.naming_for_printing(
|
6062
|
-
betas_est['x'], 0, dispersion,
|
5648
|
+
betas_est['x'], 0, dispersion, model_nature=mod)
|
6063
5649
|
|
6064
5650
|
if method2 == 'L-BFGS-B':
|
6065
5651
|
|
@@ -6086,7 +5672,7 @@ class ObjectiveFunction(object):
|
|
6086
5672
|
|
6087
5673
|
paramNum = len(betas_est['x'])
|
6088
5674
|
self.naming_for_printing(
|
6089
|
-
betas_est['x'], 0, dispersion,
|
5675
|
+
betas_est['x'], 0, dispersion, model_nature=mod)
|
6090
5676
|
|
6091
5677
|
sol.add_objective(bic=bic, aic=aic,
|
6092
5678
|
loglik=log_ll, num_parm=paramNum, GOF=other_measures)
|
@@ -6096,19 +5682,19 @@ class ObjectiveFunction(object):
|
|
6096
5682
|
try:
|
6097
5683
|
|
6098
5684
|
in_sample_mae = self.validation(betas_est['x'], y, X, Xr, dispersion=dispersion,
|
6099
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
5685
|
+
rdm_cor_fit=self.rdm_cor_fit,
|
6100
5686
|
model_nature=mod, testing=0)
|
6101
5687
|
sol.add_objective(TRAIN=in_sample_mae)
|
6102
5688
|
y_test, X_test, Xr_test = mod.get('y_test'), mod.get('X_test'), mod.get('Xr_test')
|
6103
5689
|
Xr_grouped_test = mod.get('Xrtest')
|
6104
5690
|
MAE_test = self.validation(betas_est['x'], y_test, X_test, Xr_test, dispersion=dispersion,
|
6105
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
5691
|
+
rdm_cor_fit=self.rdm_cor_fit,
|
6106
5692
|
model_nature=mod)
|
6107
5693
|
|
6108
5694
|
sol.add_objective(TEST=MAE_test)
|
6109
|
-
if self.val_percentage >0:
|
5695
|
+
if self.val_percentage > 0:
|
6110
5696
|
MAE_val = self.validation(betas_est['x'], y_test, X_test, Xr_test, dispersion=dispersion,
|
6111
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
5697
|
+
rdm_cor_fit=self.rdm_cor_fit,
|
6112
5698
|
model_nature=mod, validation=1)
|
6113
5699
|
sol.add_objective(VAL=MAE_val)
|
6114
5700
|
|
@@ -6226,8 +5812,7 @@ class ObjectiveFunction(object):
|
|
6226
5812
|
self.rdm_cor_fit = [x for x, y in zip(
|
6227
5813
|
select_data, model_nature.get('alpha_cor_rdm')) if y == 1]
|
6228
5814
|
|
6229
|
-
|
6230
|
-
# self.zi_fit = [x for x, y in zip(select_data, model_nature.get('exog_infl')) if y == 1]
|
5815
|
+
|
6231
5816
|
# if alpha_grouped is not None:
|
6232
5817
|
self.grouped_rpm = [x for x, y in zip(select_data, model_nature.get('alpha_grouped')) if y == 1]
|
6233
5818
|
self.hetro_fit = [x for x, y in zip(select_data, model_nature.get('alpha_hetro')) if y == 1]
|
@@ -6405,14 +5990,14 @@ class ObjectiveFunction(object):
|
|
6405
5990
|
df_test[:, :, idx], model_nature.get('transformations')[idx] = self.transformer(
|
6406
5991
|
t, idx, df_test[:, :, idx])
|
6407
5992
|
if np.max(df_tf[:, :, idx]) >= 77000:
|
5993
|
+
|
6408
5994
|
raise Exception('should not be possible')
|
6409
5995
|
|
6410
5996
|
self.define_selfs_fixed_rdm_cor(model_nature)
|
6411
5997
|
indices = self.get_named_indices(self.fixed_fit)
|
6412
5998
|
indices5 = self.get_named_indices(self.hetro_fit)
|
6413
5999
|
|
6414
|
-
|
6415
|
-
model_nature['exog_inflX'] = df_tf[:, :, self.get_named_indices(self.zi_force_names)]
|
6000
|
+
|
6416
6001
|
|
6417
6002
|
x_h_storage = []
|
6418
6003
|
x_h_storage_test = []
|
@@ -6445,7 +6030,7 @@ class ObjectiveFunction(object):
|
|
6445
6030
|
if XG is not None:
|
6446
6031
|
indices4_test = np.repeat(self.get_named_indices(self.grouped_rpm),
|
6447
6032
|
self.group_dummies_test.shape[2]) if self.grouped_rpm != [] else []
|
6448
|
-
XGtest = np.tile(self.group_dummies_test, len(self.grouped_rpm)) * df_test[:, :,
|
6033
|
+
XGtest = np.tile(self.group_dummies_test, len(self.grouped_rpm)) * df_test[:, :, indices4_test]
|
6449
6034
|
model_nature['XG'] = XG
|
6450
6035
|
model_nature['XGtest'] = XGtest
|
6451
6036
|
|
@@ -6488,7 +6073,8 @@ class ObjectiveFunction(object):
|
|
6488
6073
|
Xr_test = None
|
6489
6074
|
model_nature['Xr_test'] = Xr_test
|
6490
6075
|
if (Xr.ndim <= 1) or (Xr.shape[0] <= 11) or np.isin(Xr, [np.inf, -np.inf, None, np.nan]).any():
|
6491
|
-
print('
|
6076
|
+
print('Not Possible')
|
6077
|
+
raise Exception
|
6492
6078
|
if Xr.size == 0:
|
6493
6079
|
Xr = None
|
6494
6080
|
Xr_test = None
|
@@ -6509,10 +6095,10 @@ class ObjectiveFunction(object):
|
|
6509
6095
|
obj_1.add_layout(layout)
|
6510
6096
|
|
6511
6097
|
model_form_name = self.check_complexity(
|
6512
|
-
self.fixed_fit, self.rdm_fit, self.rdm_cor_fit,
|
6098
|
+
self.fixed_fit, self.rdm_fit, self.rdm_cor_fit, None, dispersion, is_halton, model_nature)
|
6513
6099
|
|
6514
6100
|
obj_1.add_names(self.fixed_fit.copy(), self.rdm_fit.copy(),
|
6515
|
-
self.rdm_cor_fit.copy(), model_form_name,
|
6101
|
+
self.rdm_cor_fit.copy(), model_form_name, None, pvalues)
|
6516
6102
|
if not isinstance(obj_1, dict):
|
6517
6103
|
raise Exception('should not be possible')
|
6518
6104
|
|
@@ -6540,22 +6126,22 @@ class ObjectiveFunction(object):
|
|
6540
6126
|
else:
|
6541
6127
|
obj_1 = Solution()
|
6542
6128
|
self.significant = 3
|
6543
|
-
print('
|
6129
|
+
print('not_implemented yet') #TODO check this for exciddeing values
|
6544
6130
|
|
6545
6131
|
if self.is_quanitifiable_num(obj_1[self._obj_1]) and pvalues is not None:
|
6546
6132
|
self.bic = obj_1['bic']
|
6547
6133
|
self.pvalues = pvalues
|
6548
|
-
if any(sub_string in obj_1['simple'] for sub_string in ["rp", "c",
|
6134
|
+
if any(sub_string in obj_1['simple'] for sub_string in ["rp", "c", 'grp', 'xh']):
|
6549
6135
|
# todo: probably delete
|
6550
6136
|
self.naming_for_printing(
|
6551
|
-
pvalues, 0, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'],
|
6137
|
+
pvalues, 0, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'],
|
6552
6138
|
obj_1, model_nature)
|
6553
6139
|
else:
|
6554
6140
|
if is_delete == 0:
|
6555
6141
|
# todo: probably delete
|
6556
6142
|
self.naming_for_printing(
|
6557
6143
|
pvalues, 1, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'],
|
6558
|
-
|
6144
|
+
obj_1, model_nature)
|
6559
6145
|
self.coeff_ = betas
|
6560
6146
|
self.stderr = stderr
|
6561
6147
|
self.zvalues = zvalues
|
@@ -6563,8 +6149,9 @@ class ObjectiveFunction(object):
|
|
6563
6149
|
if self.significant == 0:
|
6564
6150
|
|
6565
6151
|
print(self.full_model, 'full model is')
|
6566
|
-
|
6567
|
-
|
6152
|
+
if not self.test_flag:
|
6153
|
+
alpha, alpha_rdm, alpha_cor_rdm = self.modify(
|
6154
|
+
self.fixed_fit, self.rdm_fit, self.rdm_cor_fit)
|
6568
6155
|
|
6569
6156
|
return obj_1, model_nature
|
6570
6157
|
|
@@ -6581,8 +6168,9 @@ class ObjectiveFunction(object):
|
|
6581
6168
|
self.significant = 3
|
6582
6169
|
|
6583
6170
|
return obj_1, model_nature
|
6584
|
-
|
6585
|
-
|
6171
|
+
if not self.test_flag:
|
6172
|
+
alpha, alpha_rdm, alpha_cor_rdm = self.modify(
|
6173
|
+
self.fixed_fit, self.rdm_fit, self.rdm_cor_fit)
|
6586
6174
|
if self.grab_transforms:
|
6587
6175
|
|
6588
6176
|
if is_halton and self.significant == 1:
|
@@ -6692,7 +6280,7 @@ class ObjectiveFunction(object):
|
|
6692
6280
|
alpha_cor_rdm = np.in1d(select_data, cor_rdm) * 1
|
6693
6281
|
alpha_cor_rdm = alpha_cor_rdm.tolist()
|
6694
6282
|
alpha_group_rdm = np.in1d(select_data, group_rdm) * 1
|
6695
|
-
alpha_group_rdm = alpha_group_rdm.tolist()
|
6283
|
+
alpha_group_rdm = alpha_group_rdm.tolist() #todo will this ever trigger
|
6696
6284
|
return alpha, alpha_rdm, alpha_cor_rdm
|
6697
6285
|
|
6698
6286
|
def show_transforms(self, fix, rdm):
|
@@ -6825,9 +6413,10 @@ class ObjectiveFunction(object):
|
|
6825
6413
|
|
6826
6414
|
Br_come_one = self.Br.copy()
|
6827
6415
|
# betas_random = self._transform_rand_betas(betas, betas_std, draws)
|
6828
|
-
|
6416
|
+
#todo make sure this works for ln and truncated normal
|
6829
6417
|
if any(set(distribution).intersection(['ln_normal', 'tn_normal'])):
|
6830
|
-
|
6418
|
+
print('check this, intesection shouldn not happen for all')
|
6419
|
+
Br_come_one = self._transform_rand_betas(betas, betas_std, draws)
|
6831
6420
|
for k, dist_k in enumerate(distribution):
|
6832
6421
|
if dist_k == 'ln_normal':
|
6833
6422
|
der[:, k, :] = Br_come_one[:, k, :]
|
@@ -6837,9 +6426,7 @@ class ObjectiveFunction(object):
|
|
6837
6426
|
return der
|
6838
6427
|
|
6839
6428
|
def _copy_size_display_as_ones(self, matrix):
|
6840
|
-
|
6841
|
-
please = matrix.shape
|
6842
|
-
der = dev.np.ones((please), dtype=matrix.dtype)
|
6429
|
+
der = dev.np.ones(matrix.shape, dtype=matrix.dtype)
|
6843
6430
|
return der
|
6844
6431
|
|
6845
6432
|
def prepare_halton(self, dim, n_sample, draws, distribution, long=False, slice_this_way=None):
|